├── .gitignore
├── LICENSE
├── README.md
├── docs
    ├── Makefile
    ├── blob_funcs.html
    ├── blob_funcs.md
    ├── bloom_filter1.html
    ├── bloom_filter1.md
    ├── json_funcs.html
    ├── json_funcs.md
    ├── math_funcs.html
    ├── math_funcs.md
    ├── pcre2_funcs.html
    ├── pcre2_funcs.md
    ├── posix_re_funcs.html
    ├── posix_re_funcs.md
    ├── string_funcs.html
    └── string_funcs.md
├── src
    ├── CMakeLists.txt
    ├── blob_funcs.c
    ├── bloom_filter.c
    ├── cJSON.c
    ├── cJSON.h
    ├── cmake
    │   └── FindPCRE2.cmake
    ├── config.h.in
    ├── egc_str_funcs.c
    ├── group.c
    ├── icu_extras.c
    ├── json_funcs.c
    ├── math_funcs.c
    ├── more_str_funcs.c
    ├── passwd.c
    ├── pcre2_funcs.c
    ├── posix_re_funcs.c
    └── stripaccents.c
└── tools
    ├── README.md
    ├── csv2sqlite
    ├── demo_csvimport.sh
    └── table2sql


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Backups
 2 | *~
 3 | 
 4 | # Prerequisites
 5 | *.d
 6 | 
 7 | # Object files
 8 | *.o
 9 | *.ko
10 | *.obj
11 | *.elf
12 | 
13 | # Linker output
14 | *.ilk
15 | *.map
16 | *.exp
17 | 
18 | # Precompiled Headers
19 | *.gch
20 | *.pch
21 | 
22 | # Libraries
23 | *.lib
24 | *.a
25 | *.la
26 | *.lo
27 | 
28 | # Shared objects (inc. Windows DLLs)
29 | *.dll
30 | *.so
31 | *.so.*
32 | *.dylib
33 | 
34 | # Executables
35 | *.exe
36 | *.out
37 | *.app
38 | *.i*86
39 | *.x86_64
40 | *.hex
41 | 
42 | # Debug files
43 | *.dSYM/
44 | *.su
45 | *.idb
46 | *.pdb
47 | 
48 | # Kernel Module Compile Results
49 | *.mod*
50 | *.cmd
51 | .tmp_versions/
52 | modules.order
53 | Module.symvers
54 | Mkfile.old
55 | dkms.conf
56 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2019 Shawn Wagner <shawnw.mobile@gmail.com>
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining
 4 | a copy of this software and associated documentation files (the
 5 | "Software"), to deal in the Software without restriction, including
 6 | without limitation the rights to use, copy, modify, merge, publish,
 7 | distribute, sublicense, and/or sell copies of the Software, and to
 8 | permit persons to whom the Software is furnished to do so, subject to
 9 | the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be
12 | included in all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Useful (?) Sqlite3 Stuff
 2 | ========================
 3 | 
 4 | Extensions
 5 | ----------
 6 | 
 7 | A handful of hopefully useful Sqlite3 extensions. Written mostly as a
 8 | way to get more familiar with the extension frameworks. See the files
 9 | in the *docs/* directory for details on individual extensions.
10 | 
11 | * **string_funcs** - Lots of extra Unicode-aware text handling functions
12 |   and more. Includes all of the standard ICU module as well.
13 | * **math_funcs** - Assorted floating-point functions.
14 | * **blob_funcs** - Assorted functions that act on blobs.
15 | * **pcre2_funcs** - Regular expressions using PCRE2
16 | * **posix_re_funcs** - Regular expressions using POSIX Extended and Basic syntax.
17 | * **json_funcs** - Extra JSON functions.
18 | * **bloom_filter1** - Bloom filter indexes.
19 | 
20 | ### Build Instructions ###
21 | 
22 |     % mkdir build
23 |     % cd build
24 |     % cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo ../src
25 |     % make
26 |     % sqlite3
27 |     sqlite3> .load ./libstring_funcs
28 |     sqlite3> SELECT regexp_substr('abc def ghi', '[a-z]{3}', 1, 2);
29 |     etc.
30 | 
31 | cmake will only build extensions that satisfy dependencies (No ICU dev
32 | package installed, no libstring_funcs, for example).
33 | 
34 | A C99 compiler and standard library is required.
35 | 
36 | Tools
37 | -----
38 | 
39 | Assorted scripts in the `tools/` directory. See the
40 | [README](tools/README.md) there for details.
41 | 
42 | * **csv2sqlite** - Import CSV files into SQLite databases.
43 | * **table2sql** - Convert ASCII art tables to SQL statements.
44 | 
45 | To-Do
46 | -----
47 | 
48 | * Some of the string functions do a lot of reallocation and could
49 |   stand to be improved.
50 | * Test cases!
51 | * Add more stuff.
52 | 
53 | License
54 | -------
55 | 
56 | MIT.
57 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: all
2 | 
3 | all: string_funcs.html math_funcs.html bloom_filter1.html \
4 | 	blob_funcs.html pcre2_funcs.html posix_re_funcs.html json_funcs.html
5 | 
6 | %.html:  %.md
7 | 	pandoc -f markdown -t html5 --mathjax -s --toc --toc-depth=2 -o $@ $<
8 | 


--------------------------------------------------------------------------------
/docs/blob_funcs.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang="">
  3 | <head>
  4 |   <meta charset="utf-8" />
  5 |   <meta name="generator" content="pandoc" />
  6 |   <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
  7 |   <title>Extra blob functions</title>
  8 |   <style type="text/css">
  9 |       code{white-space: pre-wrap;}
 10 |       span.smallcaps{font-variant: small-caps;}
 11 |       span.underline{text-decoration: underline;}
 12 |       div.column{display: inline-block; vertical-align: top; width: 50%;}
 13 |   </style>
 14 |   <!--[if lt IE 9]>
 15 |     <script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv-printshiv.min.js"></script>
 16 |   <![endif]-->
 17 | </head>
 18 | <body>
 19 | <header>
 20 | <h1 class="title">Extra blob functions</h1>
 21 | </header>
 22 | <nav id="TOC">
 23 | <ul>
 24 | <li><a href="#introduction">Introduction</a></li>
 25 | <li><a href="#scalar-functions">Scalar Functions</a><ul>
 26 | <li><a href="#formatting">Formatting</a></li>
 27 | <li><a href="#message-digests">Message Digests</a></li>
 28 | <li><a href="#encryption">Encryption</a></li>
 29 | <li><a href="#compression">Compression</a></li>
 30 | <li><a href="#uuids">UUIDs</a></li>
 31 | </ul></li>
 32 | <li><a href="#aggregate-functions">Aggregate Functions</a></li>
 33 | </ul>
 34 | </nav>
 35 | <h1 id="introduction">Introduction</h1>
 36 | <p>This Sqlite3 extension module adds assorted functions for working with BLOB types (And strings). Requires OpenSSL.</p>
 37 | <p>Largely influenced by MySQL functions in features and names.</p>
 38 | <h1 id="scalar-functions">Scalar Functions</h1>
 39 | <h2 id="formatting">Formatting</h2>
 40 | <h3 id="unhex">UNHEX()</h3>
 41 | <ul>
 42 | <li>UNHEX(str)</li>
 43 | </ul>
 44 | <p>Does the opposite of the built in <code>HEX()</code> function - given a string that’s Base16 encoded, returns the decoded BLOB. Returns <code>NULL</code> if the string is not Base16 encoded or isn’t an even length, or if passed a <code>NULL</code>.</p>
 45 | <h3 id="to_base64">TO_BASE64()</h3>
 46 | <ul>
 47 | <li>TO_BASE64(blob)</li>
 48 | </ul>
 49 | <p>Returns <code>blob</code> encoded as a Base64 string. To match MySQL, the result is broken up into multiple lines if long enough.</p>
 50 | <h3 id="from_base64">FROM_BASE64()</h3>
 51 | <ul>
 52 | <li>FROM_BASE64(str)</li>
 53 | </ul>
 54 | <p>Returns <code>str</code> decoded from Base64 into a BLOB. If the argument is <code>NULL</code> or an invalid Base64 string, returns <code>NULL</code>.</p>
 55 | <h2 id="message-digests">Message Digests</h2>
 56 | <p>If any of these functions are passed a <code>NULL</code> argument, return <code>NULL</code>.</p>
 57 | <h3 id="md5">MD5()</h3>
 58 | <ul>
 59 | <li>MD5(b)</li>
 60 | </ul>
 61 | <p>Returns the MD5 digest of its blob argument as a Base16 encoded string.</p>
 62 | <h3 id="sha1">SHA1()</h3>
 63 | <ul>
 64 | <li>SHA1(b)</li>
 65 | </ul>
 66 | <p>Returns the SHA1 digest of its blob argument as a Base16 encoded string.</p>
 67 | <h3 id="sha2">SHA2()</h3>
 68 | <ul>
 69 | <li>SHA2(b)</li>
 70 | <li>SHA2(b, i)</li>
 71 | </ul>
 72 | <p>Returns the <code>i</code>-bit SHA2 digest of its blob argument as a Base16 encoded string. <code>i</code> can be 224, 256, 384, 512, or 0 (Which is treated as 256). Defaults to 256 if ommitted.</p>
 73 | <h3 id="sha3">SHA3()</h3>
 74 | <ul>
 75 | <li>SHA3(b)</li>
 76 | <li>SHA3(b, i)</li>
 77 | </ul>
 78 | <p>Returns the <code>i</code>-bit SHA3 digest of its blob argument as a Base16 encoded string. <code>i</code> can be 224, 256, 384, or 512. Defaults to 256 if omitted.</p>
 79 | <p>Implementation taken from the <code>shathree</code> extension in the sqlite3 source tree. This version returns a Base16 string, not blob, though.</p>
 80 | <h3 id="create_digest">CREATE_DIGEST()</h3>
 81 | <ul>
 82 | <li>CREATE_DIGEST(algo, b)</li>
 83 | </ul>
 84 | <p>Returns the <code>algo</code> digest of <code>b</code> as a blob. <code>algo</code> can be any message digest supported by OpenSSL, including but not limited to, ‘md5’, ‘rmd160’, ‘sha1’, ‘sha256’, ‘sha512’, etc.</p>
 85 | <p>You can get the complete list with <code>openssl list --digest-commands</code> at a shell.</p>
 86 | <h3 id="hmac">HMAC()</h3>
 87 | <ul>
 88 | <li>HMAC(algo, secret, b)</li>
 89 | </ul>
 90 | <p>Returns the HMAC of <code>b</code>, using the <code>algo</code> message digest algorithm, and secret <code>secret</code> as a Base16 encoded string.</p>
 91 | <h3 id="crc32">CRC32()</h3>
 92 | <ul>
 93 | <li>CRC32(b)</li>
 94 | </ul>
 95 | <p>Computes the <a href="https://en.wikipedia.org/wiki/Cyclic_redundancy_check">CRC-32</a> checksum of its blob argument and returns the result as an integer.</p>
 96 | <h3 id="crc32c">CRC32C()</h3>
 97 | <ul>
 98 | <li>CRC32C(b)</li>
 99 | </ul>
100 | <p>Computes the CRC-32C checksum of its blob argument and returns the result as an integer. Currently only available on x86 processors.</p>
101 | <h2 id="encryption">Encryption</h2>
102 | <h3 id="aes_encrypt">AES_ENCRYPT()</h3>
103 | <ul>
104 | <li>AES_ENCRYPT(str, key)</li>
105 | </ul>
106 | <p>Returns a BLOB of <code>str</code> encrypted using the <strong>AES-128-ECB</strong> algorithm with key <code>key</code>. The key should be a 128 bit (16 byte) or larger blob or string.</p>
107 | <h3 id="aes_decrypt">AES_DECRYPT()</h3>
108 | <ul>
109 | <li>AES_DECRYPT(aes, key)</li>
110 | </ul>
111 | <p>Returns the decrypted <code>aes</code>, which is a BLOB holding padded, <strong>AES-128-ECB</strong> encrypted data.</p>
112 | <h2 id="compression">Compression</h2>
113 | <p>These functions require the zlib library and work with arbitrary blobs of data.</p>
114 | <p>For compressing non-ASCII text, consider the Unicode-specific compression functions in the <code>string_funcs</code> module.</p>
115 | <h3 id="compress">COMPRESS()</h3>
116 | <ul>
117 | <li>COMPRESS(b)</li>
118 | </ul>
119 | <p>Returns its blob argument as a zlib-compressed blob. The first four bytes are the length of the uncompressed text, in big-endian format. Note: This makes it incompatible with the <a href="https://www3.sqlite.org/cgi/src/artifact/dd4f8a6d0baccff3">compress module</a> in the sqlite source tree, which uses a variable-width encoding.</p>
120 | <h3 id="uncompress">UNCOMPRESS()</h3>
121 | <ul>
122 | <li>UNCOMPRESS(b)</li>
123 | </ul>
124 | <p>Returns its decompressed argument as a blob. <code>b</code> must be a blob that was compressed with <code>COMPRESS()</code>.</p>
125 | <h2 id="uuids">UUIDs</h2>
126 | <h3 id="uuid">UUID()</h3>
127 | <ul>
128 | <li>UUID()</li>
129 | </ul>
130 | <p>Generate a new type 4 (Random) UUID and return it as a blob.</p>
131 | <h3 id="bin_to_uuid">BIN_TO_UUID()</h3>
132 | <ul>
133 | <li>BIN_TO_UUID(b)</li>
134 | </ul>
135 | <p>Convert a UUID blob to a string representation.</p>
136 | <h3 id="uuid_to_bin">UUID_TO_BIN()</h3>
137 | <ul>
138 | <li>UUID_TO_BIN(s)</li>
139 | </ul>
140 | <p>Convert a UUID string to a blob representation.</p>
141 | <h3 id="is_uuid">IS_UUID()</h3>
142 | <ul>
143 | <li>IS_UUID(s/b)</li>
144 | </ul>
145 | <p>Returns 1 if <code>s</code> is a valid string or blob representation of a UUID, 0 if not, and NULL if NULL.</p>
146 | <h1 id="aggregate-functions">Aggregate Functions</h1>
147 | <p>TODO: Aggregate versions of the digest functions?</p>
148 | </body>
149 | </html>
150 | 


--------------------------------------------------------------------------------
/docs/blob_funcs.md:
--------------------------------------------------------------------------------
  1 | % Extra blob functions
  2 | 
  3 | Introduction
  4 | ============
  5 | 
  6 | This Sqlite3 extension module adds assorted functions for working with
  7 | BLOB types (And strings). Requires OpenSSL.
  8 | 
  9 | Largely influenced by MySQL functions in features and names.
 10 | 
 11 | Scalar Functions
 12 | ================
 13 | 
 14 | Formatting
 15 | ----------
 16 | 
 17 | ### UNHEX()
 18 | 
 19 | * UNHEX(str)
 20 | 
 21 | Does the opposite of the built in `HEX()` function - given a string
 22 | that's Base16 encoded, returns the decoded BLOB. Returns `NULL` if
 23 | the string is not Base16 encoded or isn't an even length, or if
 24 | passed a `NULL`.
 25 | 
 26 | ### TO_BASE64()
 27 | 
 28 | * TO_BASE64(blob)
 29 | 
 30 | Returns `blob` encoded as a Base64 string. To match MySQL, the result
 31 | is broken up into multiple lines if long enough.
 32 | 
 33 | ### FROM_BASE64()
 34 | 
 35 | * FROM_BASE64(str)
 36 | 
 37 | Returns `str` decoded from Base64 into a BLOB. If the argument is
 38 | `NULL` or an invalid Base64 string, returns `NULL`.
 39 | 
 40 | Message Digests
 41 | ---------------
 42 | 
 43 | If any of these functions are passed a `NULL` argument, return `NULL`.
 44 | 
 45 | ### MD5() ###
 46 | 
 47 | * MD5(b)
 48 | 
 49 | Returns the MD5 digest of its blob argument as a Base16 encoded string.
 50 | 
 51 | ### SHA1() ####
 52 | 
 53 | * SHA1(b)
 54 | 
 55 | Returns the SHA1 digest of its blob argument as a Base16 encoded string.
 56 | 
 57 | ### SHA2() ####
 58 | 
 59 | * SHA2(b)
 60 | * SHA2(b, i)
 61 | 
 62 | Returns the `i`-bit SHA2 digest of its blob argument as a Base16
 63 | encoded string. `i` can be 224, 256, 384, 512, or 0 (Which is treated
 64 | as 256). Defaults to 256 if ommitted.
 65 | 
 66 | ### SHA3() ###
 67 | 
 68 | * SHA3(b)
 69 | * SHA3(b, i)
 70 | 
 71 | Returns the `i`-bit SHA3 digest of its blob argument as a Base16
 72 | encoded string.  `i` can be 224, 256, 384, or 512. Defaults to 256 if
 73 | omitted.
 74 | 
 75 | Implementation taken from the `shathree` extension in the sqlite3
 76 | source tree. This version returns a Base16 string, not blob, though.
 77 | 
 78 | ### CREATE_DIGEST()
 79 | 
 80 | * CREATE_DIGEST(algo, b)
 81 | 
 82 | Returns the `algo` digest of `b` as a blob. `algo` can be any message
 83 | digest supported by OpenSSL, including but not limited to, 'md5', 'rmd160',
 84 | 'sha1', 'sha256', 'sha512', etc.
 85 | 
 86 | You can get the complete list with `openssl list --digest-commands` at
 87 | a shell.
 88 | 
 89 | ### HMAC()
 90 | 
 91 | * HMAC(algo, secret, b)
 92 | 
 93 | Returns the HMAC of `b`, using the `algo` message digest algorithm,
 94 | and secret `secret` as a Base16 encoded string.
 95 | 
 96 | ### CRC32()
 97 | 
 98 | * CRC32(b)
 99 | 
100 | Computes the [CRC-32] checksum of its blob argument and returns the
101 | result as an integer.
102 | 
103 | [CRC-32]: https://en.wikipedia.org/wiki/Cyclic_redundancy_check
104 | 
105 | ### CRC32C()
106 | 
107 | * CRC32C(b)
108 | 
109 | Computes the CRC-32C checksum of its blob argument and returns the
110 | result as an integer. Currently only available on x86 processors.
111 | 
112 | Encryption
113 | ----------
114 | 
115 | ### AES_ENCRYPT()
116 | 
117 | * AES_ENCRYPT(str, key)
118 | 
119 | Returns a BLOB of `str` encrypted using the **AES-128-ECB** algorithm
120 | with key `key`. The key should be a 128 bit (16 byte) or larger blob
121 | or string.
122 | 
123 | ### AES_DECRYPT()
124 | 
125 | * AES_DECRYPT(aes, key)
126 | 
127 | Returns the decrypted `aes`, which is a BLOB holding padded,
128 | **AES-128-ECB** encrypted data.
129 | 
130 | Compression
131 | -----------
132 | 
133 | These functions require the zlib library and work with arbitrary blobs
134 | of data.
135 | 
136 | For compressing non-ASCII text, consider the Unicode-specific
137 | compression functions in the `string_funcs` module.
138 | 
139 | ### COMPRESS()
140 | 
141 | * COMPRESS(b)
142 | 
143 | Returns its blob argument as a zlib-compressed blob. The first four
144 | bytes are the length of the uncompressed text, in big-endian
145 | format. Note: This makes it incompatible with the [compress module] in
146 | the sqlite source tree, which uses a variable-width encoding.
147 | 
148 | [compress module]: https://www3.sqlite.org/cgi/src/artifact/dd4f8a6d0baccff3
149 | 
150 | ### UNCOMPRESS()
151 | 
152 | * UNCOMPRESS(b)
153 | 
154 | Returns its decompressed argument as a blob. `b` must be a blob that
155 | was compressed with `COMPRESS()`.
156 | 
157 | UUIDs
158 | -----
159 | 
160 | ### UUID()
161 | 
162 | * UUID()
163 | 
164 | Generate a new type 4 (Random) UUID and return it as a blob.
165 | 
166 | ### BIN_TO_UUID()
167 | 
168 | * BIN\_TO\_UUID(b)
169 | 
170 | Convert a UUID blob to a string representation.
171 | 
172 | ### UUID_TO_BIN()
173 | 
174 | * UUID\_TO\_BIN(s)
175 | 
176 | Convert a UUID string to a blob representation.
177 | 
178 | ### IS_UUID()
179 | 
180 | * IS_UUID(s/b)
181 | 
182 | Returns 1 if `s` is a valid string or blob representation of a UUID, 0 if not,
183 | and NULL if NULL.
184 | 
185 | Aggregate Functions
186 | ===================
187 | 
188 | TODO: Aggregate versions of the digest functions?
189 | 


--------------------------------------------------------------------------------
/docs/bloom_filter1.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang="">
 3 | <head>
 4 |   <meta charset="utf-8" />
 5 |   <meta name="generator" content="pandoc" />
 6 |   <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
 7 |   <title>Bloom Filter Virtual Table</title>
 8 |   <style type="text/css">
 9 |       code{white-space: pre-wrap;}
10 |       span.smallcaps{font-variant: small-caps;}
11 |       span.underline{text-decoration: underline;}
12 |       div.column{display: inline-block; vertical-align: top; width: 50%;}
13 |   </style>
14 |   <!--[if lt IE 9]>
15 |     <script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv-printshiv.min.js"></script>
16 |   <![endif]-->
17 | </head>
18 | <body>
19 | <header>
20 | <h1 class="title">Bloom Filter Virtual Table</h1>
21 | </header>
22 | <nav id="TOC">
23 | <ul>
24 | <li><a href="#introduction">Introduction</a></li>
25 | <li><a href="#usage">Usage</a><ul>
26 | <li><a href="#creating-a-table">Creating a table</a></li>
27 | <li><a href="#populating-the-table">Populating the table</a></li>
28 | <li><a href="#querying-the-table">Querying the table</a></li>
29 | </ul></li>
30 | <li><a href="#example">Example</a></li>
31 | <li><a href="#implementation-notes">Implementation notes</a></li>
32 | </ul>
33 | </nav>
34 | <h1 id="introduction">Introduction</h1>
35 | <p><a href="https://en.wikipedia.org/wiki/Bloom_filter">Bloom filters</a> are a classic data structure for telling, in a fast and compact manner, if a given value has a chance of being present in a set. This extension module provides virtual tables that act as an interface to a bloom filter. They can be useful as a fast index to tell if a value is probably in a table or certainly isn’t.</p>
36 | <h1 id="usage">Usage</h1>
37 | <h2 id="creating-a-table">Creating a table</h2>
38 | <pre><code>CREATE VIRTUAL TABLE foo USING bloom_filter1(n, p, k);</code></pre>
39 | <p>The constructor takes three arguments:</p>
40 | <ul>
41 | <li><code>n</code> – the expected number of elements that will be stored in the filter.</li>
42 | <li><code>p</code> – the probability of false positives, as a number between 0 and 1.0. The higher the number, the higher the chance of a false positive.</li>
43 | <li><code>k</code> – the number of times to hash each value into the filter.</li>
44 | </ul>
45 | <p>The only mandatory one is <code>n</code>. If omitted, <code>p</code> defaults to <strong>0.01</strong> (1%), and an ideal value of <code>k</code> is calculated.</p>
46 | <h2 id="populating-the-table">Populating the table</h2>
47 | <pre><code>INSERT INTO foo VALUES (&#39;key 1&#39;), (&#39;key 2&#39;), ...;</code></pre>
48 | <p>All values are first converted to <code>BLOB</code>s and the resuling bytes are hashed. This means that integers and floating point numbers can produce funny results. If you insert <code>1.0</code>, looking for <code>1</code> won’t find it, but looking for <code>'1.0'</code> will.</p>
49 | <h2 id="querying-the-table">Querying the table</h2>
50 | <pre><code>SELECT * from foo(&#39;key 1&#39;);</code></pre>
51 | <p>Returns a single row with a single column set to <strong>1</strong> if the key is found in the filter. No rows are returned if the key is not present. This can be used with <code>EXISTS</code> and <code>IN</code> in queries.</p>
52 | <p>I’ve thought about making <code>MATCH</code> work too, but it has some issues - you can’t have multiple modules loaded at once that use it, and syntax would be ugly in its own way - you’d have to provide the table name as a string.</p>
53 | <h1 id="example">Example</h1>
54 | <pre><code>sqlite&gt; CREATE TABLE plants(name);
55 | sqlite&gt; CREATE VIRTUAL TABLE vegetables USING bloom_filter1(20);
56 | sqlite&gt; INSERT INTO plants VALUES (&#39;apple&#39;), (&#39;asparagus&#39;), (&#39;cabbage&#39;), (&#39;grass&#39;);
57 | sqlite&gt; INSERT INTO vegetables VALUES (&#39;asparagus&#39;), (&#39;cabbage&#39;);
58 | sqlite&gt; SELECT * FROM plants WHERE EXISTS (SELECT * FROM vegetables(name));
59 | asparagus
60 | cabbage
61 | sqlite3&gt; SELECT * FROM plants WHERE 1 NOT IN vegetables(name);
62 | apple
63 | grass</code></pre>
64 | <h1 id="implementation-notes">Implementation notes</h1>
65 | <p>The underlying hash function is <a href="https://en.wikipedia.org/wiki/SipHash">SipHash</a>.</p>
66 | <p>For each virtual table <code>foo</code>, a backing table <code>foo_storage</code> is created, with one row. It has a blob holding the bloom filter, and some informational columns:</p>
67 | <ul>
68 | <li><code>n</code> – how many elements the table is intended for. The actual number of elements inserted into it is not tracked.</li>
69 | <li><code>m</code> – the number of bits used in the filter.</li>
70 | <li><code>p</code> – the false positive chance.</li>
71 | <li><code>k</code> – the number of hash functions used.</li>
72 | </ul>
73 | <p>The Sqlite3 incremental blob API is used to avoid reading and writing large amounts of data at once.</p>
74 | </body>
75 | </html>
76 | 


--------------------------------------------------------------------------------
/docs/bloom_filter1.md:
--------------------------------------------------------------------------------
 1 | % Bloom Filter Virtual Table
 2 | 
 3 | Introduction
 4 | ============
 5 | 
 6 | [Bloom filters] are a classic data structure for telling, in a fast
 7 | and compact manner, if a given value has a chance of being present in
 8 | a set. This extension module provides virtual tables that act as an
 9 | interface to a bloom filter. They can be useful as a fast index to
10 | tell if a value is probably in a table or certainly isn't.
11 | 
12 | [Bloom filters]: https://en.wikipedia.org/wiki/Bloom_filter
13 | 
14 | Usage
15 | =====
16 | 
17 | Creating a table
18 | ----------------
19 | 
20 |     CREATE VIRTUAL TABLE foo USING bloom_filter1(n, p, k);
21 |     
22 | The constructor takes three arguments:
23 | 
24 | * `n` -- the expected number of elements that will be stored in the filter.
25 | * `p` -- the probability of false positives, as a number between 0 and
26 |   1.0. The higher the number, the higher the chance of a false
27 |   positive.
28 | * `k` -- the number of times to hash each value into the filter.
29 | 
30 | The only mandatory one is `n`. If omitted, `p` defaults to **0.01**
31 | (1%), and an ideal value of `k` is calculated.
32 | 
33 | Populating the table
34 | --------------------
35 | 
36 |     INSERT INTO foo VALUES ('key 1'), ('key 2'), ...;
37 | 
38 | All values are first converted to `BLOB`s and the resuling bytes are
39 | hashed. This means that integers and floating point numbers can
40 | produce funny results. If you insert `1.0`, looking for `1` won't find
41 | it, but looking for `'1.0'` will.
42 | 
43 | Querying the table
44 | ------------------
45 | 
46 |     SELECT * from foo('key 1');
47 | 
48 | Returns a single row with a single column set to **1** if the key is
49 | found in the filter. No rows are returned if the key is not
50 | present. This can be used with `EXISTS` and `IN` in queries.
51 | 
52 | I've thought about making `MATCH` work too, but it has some issues -
53 | you can't have multiple modules loaded at once that use it, and syntax
54 | would be ugly in its own way - you'd have to provide the table name as
55 | a string.
56 | 
57 | Example
58 | =======
59 | 
60 |     sqlite> CREATE TABLE plants(name);
61 |     sqlite> CREATE VIRTUAL TABLE vegetables USING bloom_filter1(20);
62 |     sqlite> INSERT INTO plants VALUES ('apple'), ('asparagus'), ('cabbage'), ('grass');
63 |     sqlite> INSERT INTO vegetables VALUES ('asparagus'), ('cabbage');
64 |     sqlite> SELECT * FROM plants WHERE EXISTS (SELECT * FROM vegetables(name));
65 |     asparagus
66 |     cabbage
67 |     sqlite3> SELECT * FROM plants WHERE 1 NOT IN vegetables(name);
68 |     apple
69 |     grass
70 |     
71 | Implementation notes
72 | ====================
73 | 
74 | The underlying hash function is [SipHash].
75 | 
76 | For each virtual table `foo`, a backing table `foo_storage` is
77 | created, with one row. It has a blob holding the bloom filter, and
78 | some informational columns:
79 | 
80 | * `n` -- how many elements the table is intended for. The actual number
81 |   of elements inserted into it is not tracked.
82 | * `m` -- the number of bits used in the filter.
83 | * `p` -- the false positive chance.
84 | * `k` -- the number of hash functions used.
85 | 
86 | The Sqlite3 incremental blob API is used to avoid reading and writing
87 | large amounts of data at once.
88 | 
89 | [SipHash]: https://en.wikipedia.org/wiki/SipHash
90 | 


--------------------------------------------------------------------------------
/docs/json_funcs.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang="">
 3 | <head>
 4 |   <meta charset="utf-8" />
 5 |   <meta name="generator" content="pandoc" />
 6 |   <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
 7 |   <title>Extra JSON functions</title>
 8 |   <style type="text/css">
 9 |       code{white-space: pre-wrap;}
10 |       span.smallcaps{font-variant: small-caps;}
11 |       span.underline{text-decoration: underline;}
12 |       div.column{display: inline-block; vertical-align: top; width: 50%;}
13 |   </style>
14 |   <!--[if lt IE 9]>
15 |     <script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv-printshiv.min.js"></script>
16 |   <![endif]-->
17 | </head>
18 | <body>
19 | <header>
20 | <h1 class="title">Extra JSON functions</h1>
21 | </header>
22 | <nav id="TOC">
23 | <ul>
24 | <li><a href="#introduction">Introduction</a></li>
25 | <li><a href="#functions">Functions</a></li>
26 | </ul>
27 | </nav>
28 | <h1 id="introduction">Introduction</h1>
29 | <p>This Sqlite3 extension adds some extra JSON handling functions to supplement the standard <a href="https://www.sqlite.org/json1.html">JSON1</a> extension. They’re mostly inspired by <a href="https://dev.mysql.com/doc/refman/8.0/en/json-function-reference.html">MySQL JSON functions</a>.</p>
30 | <h1 id="functions">Functions</h1>
31 | <h3 id="json_equal">JSON_EQUAL()</h3>
32 | <ul>
33 | <li>JSON_EQUAL(json, json)</li>
34 | </ul>
35 | <p>Returns 1 if the two JSON values are equivalent, 0 if not.</p>
36 | <h3 id="json_length">JSON_LENGTH()</h3>
37 | <ul>
38 | <li>JSON_LENGTH(json)</li>
39 | <li>JSON_LENGTH(json, path)</li>
40 | </ul>
41 | <p>If <code>json</code> (Either the entire object, or the part of it at <code>path</code>) is an object or array, return the number of elements in it. Otherwise, returns 1.</p>
42 | <h3 id="json_pretty">JSON_PRETTY()</h3>
43 | <ul>
44 | <li>JSON_PRETTY(json)</li>
45 | </ul>
46 | <p>Returns <code>json</code>, pretty-printed for human readability. The exact format depends on the version of sqlite3 that the extension was compiled against; 3.24 and newer produce MySQL style output, older produces <a href="https://github.com/DaveGamble/cJSON">cJSON</a> style.</p>
47 | <h3 id="json_keys">JSON_KEYS()</h3>
48 | <ul>
49 | <li>JSON_KEYS(json)</li>
50 | <li>JSON_KEYS(json, path)</li>
51 | </ul>
52 | <p>Returns a JSON array of the keys of the given JSON object.</p>
53 | <p>*** JSON_CONTAINS_PATH()</p>
54 | <ul>
55 | <li>JSON_CONTAINS_PATH(json, path)</li>
56 | </ul>
57 | <p>Returns 1 if the json object has data at the given path, 0 otherwise.</p>
58 | </body>
59 | </html>
60 | 


--------------------------------------------------------------------------------
/docs/json_funcs.md:
--------------------------------------------------------------------------------
 1 | % Extra JSON functions
 2 | 
 3 | Introduction
 4 | ============
 5 | 
 6 | This Sqlite3 extension adds some extra JSON handling functions to
 7 | supplement the standard [JSON1] extension. They're mostly inspired by
 8 | [MySQL JSON functions].
 9 | 
10 | [JSON1]: https://www.sqlite.org/json1.html
11 | [MySQL JSON functions]: https://dev.mysql.com/doc/refman/8.0/en/json-function-reference.html
12 | 
13 | Functions
14 | =========
15 | 
16 | ### JSON_EQUAL()
17 | 
18 | * JSON_EQUAL(json, json)
19 | 
20 | Returns 1 if the two JSON values are equivalent, 0 if not.
21 | 
22 | ### JSON_LENGTH()
23 | 
24 | * JSON_LENGTH(json)
25 | * JSON_LENGTH(json, path)
26 | 
27 | If `json` (Either the entire object, or the part of it at `path`) is
28 | an object or array, return the number of elements in it. Otherwise,
29 | returns 1.
30 | 
31 | ### JSON_PRETTY()
32 | 
33 | * JSON_PRETTY(json)
34 | 
35 | Returns `json`, pretty-printed for human readability. The exact format
36 | depends on the version of sqlite3 that the extension was compiled
37 | against; 3.24 and newer produce MySQL style output, older produces
38 | [cJSON] style.
39 | 
40 | [cJSON]: https://github.com/DaveGamble/cJSON
41 | 
42 | ### JSON_KEYS()
43 | 
44 | * JSON_KEYS(json)
45 | * JSON_KEYS(json, path)
46 | 
47 | Returns a JSON array of the keys of the given JSON object.
48 | 
49 | *** JSON_CONTAINS_PATH()
50 | 
51 | * JSON_CONTAINS_PATH(json, path)
52 | 
53 | Returns 1 if the json object has data at the given path, 0 otherwise.
54 | 


--------------------------------------------------------------------------------
/docs/math_funcs.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang="">
  3 | <head>
  4 |   <meta charset="utf-8" />
  5 |   <meta name="generator" content="pandoc" />
  6 |   <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
  7 |   <title>Math and Numeric Functions</title>
  8 |   <style type="text/css">
  9 |       code{white-space: pre-wrap;}
 10 |       span.smallcaps{font-variant: small-caps;}
 11 |       span.underline{text-decoration: underline;}
 12 |       div.column{display: inline-block; vertical-align: top; width: 50%;}
 13 |   </style>
 14 |   <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.2/MathJax.js?config=TeX-AMS_CHTML-full" type="text/javascript"></script>
 15 |   <!--[if lt IE 9]>
 16 |     <script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv-printshiv.min.js"></script>
 17 |   <![endif]-->
 18 | </head>
 19 | <body>
 20 | <header>
 21 | <h1 class="title">Math and Numeric Functions</h1>
 22 | </header>
 23 | <nav id="TOC">
 24 | <ul>
 25 | <li><a href="#introduction">Introduction</a></li>
 26 | <li><a href="#scalar-functions">Scalar Functions</a><ul>
 27 | <li><a href="#trigonometric">Trigonometric</a></li>
 28 | <li><a href="#roots-powers-and-logs">Roots, Powers and Logs</a></li>
 29 | <li><a href="#rounding">Rounding</a></li>
 30 | <li><a href="#math">Math</a></li>
 31 | <li><a href="#other">Other</a></li>
 32 | </ul></li>
 33 | <li><a href="#aggregate-functions">Aggregate Functions</a><ul>
 34 | <li><a href="#math-1">Math</a></li>
 35 | <li><a href="#bitwise">Bitwise</a></li>
 36 | <li><a href="#statistics">Statistics</a></li>
 37 | </ul></li>
 38 | </ul>
 39 | </nav>
 40 | <h1 id="introduction">Introduction</h1>
 41 | <p>Mostly lifted from MySQL and Postgres. Unless otherwise documented, they do the same thing as the C function of the same name. Pass them <code>NULL</code>, get <code>NULL</code> back. Do something that would make them return a <code>NaN</code>, get <code>NULL</code> back. In the arguments, <em>d</em> means a floating point number, <em>i</em> means an integer. <em>d/i</em> means either.</p>
 42 | <h1 id="scalar-functions">Scalar Functions</h1>
 43 | <h2 id="trigonometric">Trigonometric</h2>
 44 | <ul>
 45 | <li>ACOS(d)</li>
 46 | <li>ACOSH(d)</li>
 47 | <li>ASIN(d)</li>
 48 | <li>ASINH(d)</li>
 49 | <li>ATAN(d)</li>
 50 | <li>ATAN2(d,d)</li>
 51 | <li>ATANH(d)</li>
 52 | <li>COS(d)</li>
 53 | <li>COSH(d)</li>
 54 | <li>COT(d) – Cotangent</li>
 55 | <li>SIN(d)</li>
 56 | <li>SINH(d)</li>
 57 | <li>TAN(d)</li>
 58 | <li>TANH(d)</li>
 59 | <li>DEGREES(d) – Radians to degrees</li>
 60 | <li>RADIANS(d) – Degrees to radians</li>
 61 | </ul>
 62 | <h2 id="roots-powers-and-logs">Roots, Powers and Logs</h2>
 63 | <ul>
 64 | <li>CBRT(d) – <span class="math inline">\(\sqrt[3]{x}\)</span></li>
 65 | <li>EXP(d) – <span class="math inline">\(e^{x}\)</span></li>
 66 | <li>EXP2(d) – <span class="math inline">\(2^{x}\)</span></li>
 67 | <li>EXPM1(d) – <span class="math inline">\(e^{x-1}\)</span></li>
 68 | <li>HYPOT(d, d) – <span class="math inline">\(\sqrt{x^{2} + y^{2}}\)</span></li>
 69 | <li>LN(d) – <span class="math inline">\(\log_{e} x\)</span></li>
 70 | <li>LOG(d) – Base ℯ like MySQL, not base 10 like Postgresql.</li>
 71 | <li>LOG(d,d) – <span class="math inline">\(\log_{x} y\)</span></li>
 72 | <li>LOG1P(d) – <span class="math inline">\(\log_{e} (x+1)\)</span></li>
 73 | <li>LOG10(d) – <span class="math inline">\(\log_{10} x\)</span></li>
 74 | <li>LOG2(d) – <span class="math inline">\(\log_{2} x\)</span></li>
 75 | <li>POWER(d,d) – <span class="math inline">\(x^{y}\)</span></li>
 76 | <li>SQRT(d) – <span class="math inline">\(\sqrt{x}\)</span></li>
 77 | </ul>
 78 | <h2 id="rounding">Rounding</h2>
 79 | <ul>
 80 | <li>CEIL(d)</li>
 81 | <li>FLOOR(d)</li>
 82 | <li>ROUND(d) – Overrides the standard one-argument <code>ROUND()</code>. Rounds halfway cases away from zero.</li>
 83 | <li>TRUNC(d) – Rounds towards zero.</li>
 84 | </ul>
 85 | <h2 id="math">Math</h2>
 86 | <ul>
 87 | <li>DIV(i, i) – Integer division</li>
 88 | <li>MOD(i, i) – Integer remainder</li>
 89 | <li>SIGN(d/i)</li>
 90 | <li>PI() – π</li>
 91 | </ul>
 92 | <h2 id="other">Other</h2>
 93 | <ul>
 94 | <li>BIT_COUNT(i) – Returns the number of set bits in its integer argument.</li>
 95 | </ul>
 96 | <h1 id="aggregate-functions">Aggregate Functions</h1>
 97 | <h2 id="math-1">Math</h2>
 98 | <ul>
 99 | <li>PRODUCT(d) – compute the product of a group. Can be used as a window function.</li>
100 | </ul>
101 | <h2 id="bitwise">Bitwise</h2>
102 | <ul>
103 | <li>BIT_OR(i)</li>
104 | <li>BIT_XOR(i) – can be used as a window function</li>
105 | <li>BIT_AND(i)</li>
106 | </ul>
107 | <h2 id="statistics">Statistics</h2>
108 | <p>All of these can be used as window functions.</p>
109 | <ul>
110 | <li>CORR(d, d) – Correlation coefficient.</li>
111 | <li>COVAR_POP(d, d) – Population covariance.</li>
112 | <li>COVAR_SAMP(d, d) – Sample covariance.</li>
113 | <li>STDDEV_POP(d) – Population standard deviation.</li>
114 | <li>STDDEV_SAMP(d) – Sample standard deviation.</li>
115 | <li>VAR_POP(d) – Population variance.</li>
116 | <li>VAR_SAMP(d) – Sample variance.</li>
117 | <li>GEO_MEAN(d) – Geometric mean.</li>
118 | <li>HARM_MEAN(d) – Harmonic mean.</li>
119 | <li>MEDIAN(d)</li>
120 | <li>MODE(d)</li>
121 | <li>Q1(d) – 1st Quartile Value.</li>
122 | <li>Q3(d) – 3rd Quartile Value.</li>
123 | <li>IQR(d) – Interquartile Range.</li>
124 | </ul>
125 | <h3 id="linear-regression">Linear Regression</h3>
126 | <ul>
127 | <li>REGR_SLOPE(d,d) – The slope of a line.</li>
128 | <li>REGR_INTERCEPT(d, d) – The y-intercept of the regression line.</li>
129 | <li>REGR_COUNT(d, d) – The number of pairs used in regression calculations.</li>
130 | <li>REGR_R2(d, d) – The coefficient of determination for the regression.</li>
131 | <li>REGR_AVGX(d, d) – The average of the second values of the pairs.</li>
132 | <li>REGR_AVGY(d, d) – The average of the first values of the pairs.</li>
133 | <li>REGR_SXX(d, d)</li>
134 | <li>REGR_SYY(d, d)</li>
135 | <li>REGR_SXY(d, d)</li>
136 | </ul>
137 | </body>
138 | </html>
139 | 


--------------------------------------------------------------------------------
/docs/math_funcs.md:
--------------------------------------------------------------------------------
  1 | % Math and Numeric Functions
  2 | 
  3 | Introduction
  4 | ============
  5 | 
  6 | Mostly lifted from MySQL and Postgres. Unless otherwise documented,
  7 | they do the same thing as the C function of the same name. Pass them
  8 | `NULL`, get `NULL` back. Do something that would make them return a
  9 | `NaN`, get `NULL` back. In the arguments, *d* means a floating point
 10 | number, *i* means an integer. *d/i* means either.
 11 | 
 12 | Scalar Functions
 13 | ================
 14 | 
 15 | Trigonometric
 16 | -------------
 17 | 
 18 | * ACOS(d)
 19 | * ACOSH(d)
 20 | * ASIN(d)
 21 | * ASINH(d)
 22 | * ATAN(d)
 23 | * ATAN2(d,d)
 24 | * ATANH(d)
 25 | * COS(d)
 26 | * COSH(d)
 27 | * COT(d) -- Cotangent
 28 | * SIN(d)
 29 | * SINH(d)
 30 | * TAN(d)
 31 | * TANH(d)
 32 | * DEGREES(d) -- Radians to degrees 
 33 | * RADIANS(d) -- Degrees to radians
 34 | 
 35 | Roots, Powers and Logs
 36 | ----------------------
 37 | 
 38 | * CBRT(d) -- $\sqrt[3]{x}$
 39 | * EXP(d) -- $e^{x}$
 40 | * EXP2(d) -- $2^{x}$
 41 | * EXPM1(d) -- $e^{x-1}$
 42 | * HYPOT(d, d) -- $\sqrt{x^{2} + y^{2}}$
 43 | * LN(d) -- $\log_{e} x$
 44 | * LOG(d) -- Base ℯ like MySQL, not base 10 like Postgresql.
 45 | * LOG(d,d) -- $\log_{x} y$
 46 | * LOG1P(d) -- $\log_{e} (x+1)$
 47 | * LOG10(d) -- $\log_{10} x$
 48 | * LOG2(d) -- $\log_{2} x$
 49 | * POWER(d,d) -- $x^{y}$
 50 | * SQRT(d) -- $\sqrt{x}$
 51 | 
 52 | Rounding
 53 | --------
 54 | 
 55 | * CEIL(d)
 56 | * FLOOR(d)
 57 | * ROUND(d) -- Overrides the standard one-argument `ROUND()`. Rounds
 58 |   halfway cases away from zero.
 59 | * TRUNC(d) -- Rounds towards zero.
 60 | 
 61 | Math
 62 | ----
 63 | 
 64 | * DIV(i, i) -- Integer division
 65 | * MOD(i, i) -- Integer remainder
 66 | * SIGN(d/i)
 67 | * PI() -- π
 68 | 
 69 | Other
 70 | -----
 71 | 
 72 | * BIT_COUNT(i) -- Returns the number of set bits in its integer argument.
 73 | 
 74 | Aggregate Functions
 75 | ===================
 76 | 
 77 | Math
 78 | ----
 79 | 
 80 | * PRODUCT(d) -- compute the product of a group. Can be used as a
 81 |   window function.
 82 | 
 83 | Bitwise
 84 | -------
 85 | 
 86 | * BIT_OR(i)
 87 | * BIT_XOR(i) -- can be used as a window function
 88 | * BIT_AND(i)
 89 | 
 90 | Statistics
 91 | ----------
 92 | 
 93 | All of these can be used as window functions.
 94 | 
 95 | * CORR(d, d) -- Correlation coefficient.
 96 | * COVAR_POP(d, d) -- Population covariance. 
 97 | * COVAR_SAMP(d, d) -- Sample covariance. 
 98 | * STDDEV_POP(d) -- Population standard deviation.
 99 | * STDDEV_SAMP(d) -- Sample standard deviation.
100 | * VAR_POP(d) -- Population variance.
101 | * VAR_SAMP(d) -- Sample variance.
102 | * GEO_MEAN(d) -- Geometric mean.
103 | * HARM_MEAN(d) -- Harmonic mean.
104 | * MEDIAN(d)
105 | * MODE(d)
106 | * Q1(d) -- 1st Quartile Value.
107 | * Q3(d) -- 3rd Quartile Value.
108 | * IQR(d) -- Interquartile Range.
109 | 
110 | ### Linear Regression
111 | 
112 | * REGR_SLOPE(d,d) -- The slope of a line.
113 | * REGR_INTERCEPT(d, d) -- The y-intercept of the regression line.
114 | * REGR_COUNT(d, d) -- The number of pairs used in regression calculations.
115 | * REGR_R2(d, d) -- The coefficient of determination for the regression.
116 | * REGR_AVGX(d, d) -- The average of the second values of the pairs.
117 | * REGR_AVGY(d, d) -- The average of the first values of the pairs.
118 | * REGR_SXX(d, d)
119 | * REGR_SYY(d, d)
120 | * REGR_SXY(d, d)
121 | 


--------------------------------------------------------------------------------
/docs/pcre2_funcs.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang="">
 3 | <head>
 4 |   <meta charset="utf-8" />
 5 |   <meta name="generator" content="pandoc" />
 6 |   <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
 7 |   <title>PCRE2 regular expression functions</title>
 8 |   <style type="text/css">
 9 |       code{white-space: pre-wrap;}
10 |       span.smallcaps{font-variant: small-caps;}
11 |       span.underline{text-decoration: underline;}
12 |       div.column{display: inline-block; vertical-align: top; width: 50%;}
13 |   </style>
14 |   <!--[if lt IE 9]>
15 |     <script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv-printshiv.min.js"></script>
16 |   <![endif]-->
17 | </head>
18 | <body>
19 | <header>
20 | <h1 class="title">PCRE2 regular expression functions</h1>
21 | </header>
22 | <nav id="TOC">
23 | <ul>
24 | <li><a href="#introduction">Introduction</a></li>
25 | <li><a href="#functions">Functions</a><ul>
26 | <li><a href="#matching">Matching</a></li>
27 | <li><a href="#informational">Informational</a></li>
28 | </ul></li>
29 | <li><a href="#to-do">To-Do</a></li>
30 | </ul>
31 | </nav>
32 | <h1 id="introduction">Introduction</h1>
33 | <p>This Sqlite3 extension module provides MySQL inspired regular expression functions using the <a href="https://www.pcre.org">PCRE2</a> engine. Every plain function is also present with <strong>PCRE_</strong> prepended to the name, so multiple modules that provide RE functions can coexist portably. (See the <code>string_funcs</code> module for versions using <a href="http://site.icu-project.org">ICU</a> regular expressions.) The default names are used by whichever module was loaded last.</p>
34 | <h1 id="functions">Functions</h1>
35 | <h2 id="matching">Matching</h2>
36 | <h3 id="regexp">REGEXP()</h3>
37 | <ul>
38 | <li>REGEXP(re, string)</li>
39 | <li>string REGEXP re</li>
40 | </ul>
41 | <p>Returns 1 if <code>string</code> matches against <code>re</code>. The entire string must match.</p>
42 | <h2 id="informational">Informational</h2>
43 | <h3 id="pcre_version">PCRE_VERSION()</h3>
44 | <p>Returns the version of PCRE2 being used.</p>
45 | <h3 id="pcre_unicode_version">PCRE_UNICODE_VERSION()</h3>
46 | <p>Returns the version of Unicode that PCRE2 is using.</p>
47 | <h1 id="to-do">To-Do</h1>
48 | <ul>
49 | <li>Add the rest of the functions.</li>
50 | </ul>
51 | </body>
52 | </html>
53 | 


--------------------------------------------------------------------------------
/docs/pcre2_funcs.md:
--------------------------------------------------------------------------------
 1 | % PCRE2 regular expression functions
 2 | 
 3 | Introduction
 4 | ============
 5 | 
 6 | This Sqlite3 extension module provides MySQL inspired regular
 7 | expression functions using the [PCRE2] engine. Every plain function is
 8 | also present with **PCRE_** prepended to the name, so multiple modules
 9 | that provide RE functions can coexist portably. (See the
10 | `string_funcs` module for versions using [ICU] regular expressions.)
11 | The default names are used by whichever module was loaded last.
12 | 
13 | [PCRE2]: https://www.pcre.org
14 | [ICU]: http://site.icu-project.org
15 | 
16 | Functions
17 | =========
18 | 
19 | Matching
20 | --------
21 | 
22 | ### REGEXP()
23 | 
24 | * REGEXP(re, string)
25 | * string REGEXP re
26 | 
27 | Returns 1 if `string` matches against `re`. The entire string must match.
28 | 
29 | Informational
30 | -------------
31 | 
32 | ### PCRE_VERSION()
33 | 
34 | Returns the version of PCRE2 being used.
35 | 
36 | ### PCRE_UNICODE_VERSION()
37 | 
38 | Returns the version of Unicode that PCRE2 is using.
39 | 
40 | To-Do
41 | =====
42 | 
43 | * Add the rest of the functions.
44 | 


--------------------------------------------------------------------------------
/docs/posix_re_funcs.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang="">
 3 | <head>
 4 |   <meta charset="utf-8" />
 5 |   <meta name="generator" content="pandoc" />
 6 |   <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
 7 |   <title>POSIX regular expression functions</title>
 8 |   <style type="text/css">
 9 |       code{white-space: pre-wrap;}
10 |       span.smallcaps{font-variant: small-caps;}
11 |       span.underline{text-decoration: underline;}
12 |       div.column{display: inline-block; vertical-align: top; width: 50%;}
13 |   </style>
14 |   <!--[if lt IE 9]>
15 |     <script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv-printshiv.min.js"></script>
16 |   <![endif]-->
17 | </head>
18 | <body>
19 | <header>
20 | <h1 class="title">POSIX regular expression functions</h1>
21 | </header>
22 | <nav id="TOC">
23 | <ul>
24 | <li><a href="#introduction">Introduction</a></li>
25 | <li><a href="#functions">Functions</a><ul>
26 | <li><a href="#matching">Matching</a></li>
27 | </ul></li>
28 | <li><a href="#to-do">To-Do</a></li>
29 | </ul>
30 | </nav>
31 | <h1 id="introduction">Introduction</h1>
32 | <p>This Sqlite3 extension module provides regular expression functions using POSIX Extended and Basic syntax.</p>
33 | <p>See the <code>string_funcs</code> module for versions using <a href="http://site.icu-project.org">ICU</a> regular expressions and <code>pcre2_funcs</code> module for <a href="https://www.pcre.org">PCRE2</a> regular expressions.</p>
34 | <h1 id="functions">Functions</h1>
35 | <h2 id="matching">Matching</h2>
36 | <h3 id="regexp">REGEXP()</h3>
37 | <ul>
38 | <li>REGEXP(re, string)</li>
39 | <li>EXT_REGEXP(re, string)</li>
40 | <li>string REGEXP re</li>
41 | </ul>
42 | <p>Returns 1 if <code>string</code> matches against the Extended Regular Expression <code>re</code>.</p>
43 | <h3 id="basic_regexp">BASIC_REGEXP()</h3>
44 | <ul>
45 | <li>BASIC_REGEXP(re, string)</li>
46 | </ul>
47 | <p>Returns 1 if <code>string</code> matches against the Basic Regular Expression <code>re</code>.</p>
48 | <h1 id="to-do">To-Do</h1>
49 | <ul>
50 | <li>Add the rest of the MySQL RE functions.</li>
51 | </ul>
52 | </body>
53 | </html>
54 | 


--------------------------------------------------------------------------------
/docs/posix_re_funcs.md:
--------------------------------------------------------------------------------
 1 | % POSIX regular expression functions
 2 | 
 3 | Introduction
 4 | ============
 5 | 
 6 | This Sqlite3 extension module provides regular expression functions
 7 | using POSIX Extended and Basic syntax.
 8 | 
 9 | See the `string_funcs` module for versions using [ICU] regular
10 | expressions and `pcre2_funcs` module for [PCRE2] regular expressions.
11 | 
12 | [PCRE2]: https://www.pcre.org
13 | [ICU]: http://site.icu-project.org
14 | 
15 | Functions
16 | =========
17 | 
18 | Matching
19 | --------
20 | 
21 | ### REGEXP()
22 | 
23 | * REGEXP(re, string)
24 | * EXT_REGEXP(re, string)
25 | * string REGEXP re
26 | 
27 | Returns 1 if `string` matches against the Extended Regular Expression `re`. 
28 | 
29 | ### BASIC_REGEXP()
30 | 
31 | * BASIC_REGEXP(re, string)
32 | 
33 | Returns 1 if `string` matches against the Basic Regular Expression `re`.
34 | 
35 | To-Do
36 | =====
37 | 
38 | * Add the rest of the MySQL RE functions.
39 | 


--------------------------------------------------------------------------------
/docs/string_funcs.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang="">
  3 | <head>
  4 |   <meta charset="utf-8" />
  5 |   <meta name="generator" content="pandoc" />
  6 |   <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
  7 |   <title>Extra String Functions</title>
  8 |   <style type="text/css">
  9 |       code{white-space: pre-wrap;}
 10 |       span.smallcaps{font-variant: small-caps;}
 11 |       span.underline{text-decoration: underline;}
 12 |       div.column{display: inline-block; vertical-align: top; width: 50%;}
 13 |   </style>
 14 |   <!--[if lt IE 9]>
 15 |     <script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv-printshiv.min.js"></script>
 16 |   <![endif]-->
 17 | </head>
 18 | <body>
 19 | <header>
 20 | <h1 class="title">Extra String Functions</h1>
 21 | </header>
 22 | <nav id="TOC">
 23 | <ul>
 24 | <li><a href="#introduction">Introduction</a></li>
 25 | <li><a href="#scalar-functions">Scalar Functions</a><ul>
 26 | <li><a href="#informative">Informative</a></li>
 27 | <li><a href="#case-mapping">Case Mapping</a></li>
 28 | <li><a href="#text-extraction">Text Extraction</a></li>
 29 | <li><a href="#normalization">Normalization</a></li>
 30 | <li><a href="#other-conversions">Other conversions</a></li>
 31 | <li><a href="#unicode-text-compression">Unicode Text Compression</a></li>
 32 | <li><a href="#regular-expressions">Regular Expressions</a></li>
 33 | <li><a href="#other-functions">Other functions</a></li>
 34 | </ul></li>
 35 | <li><a href="#collations">Collations</a><ul>
 36 | <li><a href="#functions">Functions</a></li>
 37 | <li><a href="#predefined-collation-types">Predefined collation types</a></li>
 38 | <li><a href="#examples">Examples</a></li>
 39 | </ul></li>
 40 | </ul>
 41 | </nav>
 42 | <h1 id="introduction">Introduction</h1>
 43 | <p>This Sqlite3 extension module adds extra functionality for dealing with text in SQL queries, with an emphasis on Unicode. It depends on <a href="http://site.icu-project.org/">ICU</a> for most of its features. Many functions are inspired by MySQL and Postgresql string functions.</p>
 44 | <p>Since the standard <a href="https://www.sqlite.org/src/dir?ci=cdb68d2c64e453fd&amp;name=ext/icu">ICU extension</a> itself doesn’t seem to be compiled into or otherwise provided by many OS’s sqlite3 packages, it is included as part of this one. See that documentation for details about what it provides.</p>
 45 | <p>If you do a lot of things in your queries with Unicode text, or even just use Unicode-aware collations on index columns, consider setting the encoding of your databases to UTF-16 when creating them. Most <a href="http://site.icu-project.org/">ICU</a> functions work on UTF-16 strings, so this reduces the amount of converting to and from UTF-8.</p>
 46 | <h1 id="scalar-functions">Scalar Functions</h1>
 47 | <h2 id="informative">Informative</h2>
 48 | <h3 id="icu_version">ICU_VERSION()</h3>
 49 | <ul>
 50 | <li>ICU_VERSION()</li>
 51 | </ul>
 52 | <p>Returns the version of the ICU library being used.</p>
 53 | <h3 id="unicode_version">UNICODE_VERSION()</h3>
 54 | <ul>
 55 | <li>UNICODE_VERSION()</li>
 56 | </ul>
 57 | <p>Returns the version of Unicode understood by ICU.</p>
 58 | <h3 id="char_name">CHAR_NAME()</h3>
 59 | <ul>
 60 | <li>CHAR_NAME(c)</li>
 61 | </ul>
 62 | <p>Returns the Unicode name of the first codepoint in <code>c</code>, which can be an integer or string.</p>
 63 | <h3 id="script_name">SCRIPT_NAME()</h3>
 64 | <ul>
 65 | <li>SCRIPT_NAME(c)</li>
 66 | </ul>
 67 | <p>Returns the Unicode script of the first codepoint in <code>c</code>, which can be an integer or string.</p>
 68 | <h3 id="gclength">GCLENGTH()</h3>
 69 | <ul>
 70 | <li>GCLENGTH(string)</li>
 71 | <li>GCLENGTH(string, locale)</li>
 72 | </ul>
 73 | <p>Returns the number of <a href="http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries">extended grapheme clusters</a> in <code>string</code>. This will be less than or equal to <code>LENGTH(string)</code>, which returns the number of <em>code points</em>.</p>
 74 | <h2 id="case-mapping">Case Mapping</h2>
 75 | <h3 id="upper">UPPER()</h3>
 76 | <ul>
 77 | <li>UPPER(string)</li>
 78 | <li>UPPER(string, locale)</li>
 79 | </ul>
 80 | <p>Returns <code>string</code> converted to uppercase, with an optional <code>locale</code> for specific rules.</p>
 81 | <h3 id="lower">LOWER()</h3>
 82 | <ul>
 83 | <li>LOWER(string)</li>
 84 | <li>LOWER(string, locale)</li>
 85 | </ul>
 86 | <p>Returns <code>string</code> converted to lowercase, with an optional <code>locale</code> for specific rules.</p>
 87 | <h3 id="title">TITLE()</h3>
 88 | <ul>
 89 | <li>TITLE(string)</li>
 90 | <li>TITLE(string, locale)</li>
 91 | <li>INITCAP(string)</li>
 92 | </ul>
 93 | <p>Returns <code>string</code> converted to lowercase, and the first letter of each word titlecased. The optional <code>locale</code> argument uses specific casing rules, like with <code>UPPER()</code> and <code>LOWER()</code>.</p>
 94 | <p>If <code>string</code> is <code>NULL</code>, returns <code>NULL</code>.</p>
 95 | <h3 id="casefold">CASEFOLD()</h3>
 96 | <ul>
 97 | <li>CASEFOLD(string)</li>
 98 | </ul>
 99 | <p>Returns a case-folded version of <code>string</code>.</p>
100 | <p>If <code>string</code> is <code>NULL</code>, returns <code>NULL</code>.</p>
101 | <h2 id="text-extraction">Text Extraction</h2>
102 | <p>Sqlite3 provides one function, <code>SUBSTR()</code>, for extracting text from a string. It has the major drawback that it treats one Unicode code point as one character. As soon as you start getting outside of the Latin characters (And even in them if dealing with text in NFD format), that’s not true. It’s very easy to cut off a base character’s following combining characters, for example, with undesirable results.</p>
103 | <p>The entire question of “What is a character?” gets very complicated fast when it comes to Unicode. The following functions consider characters to be <a href="http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries">extended grapheme clusters</a>, which means they <em>usually</em> do what people expect.</p>
104 | <h3 id="scalar-functions-1">Scalar Functions</h3>
105 | <h4 id="gcleft">GCLEFT()</h4>
106 | <ul>
107 | <li>GCLEFT(string, len)</li>
108 | <li>GCLEFT(string, len, locale)</li>
109 | </ul>
110 | <p>Returns the first <code>len</code> <a href="http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries">extended grapheme clusters</a> from <code>string</code>.</p>
111 | <p>If <code>len</code> is negative, returns all but the last <code>abs(len)</code> clusters.</p>
112 | <h4 id="gcright">GCRIGHT()</h4>
113 | <ul>
114 | <li>GCRIGHT(string, len)</li>
115 | <li>GCRIGHT(string, len, locale)</li>
116 | </ul>
117 | <p>Returns the last <code>len</code> <a href="http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries">extended grapheme clusters</a> from <code>string</code>.</p>
118 | <p>If <code>len</code> is negative, returns all but the first <code>abs(len)</code> clusters.</p>
119 | <h4 id="gcsubstr">GCSUBSTR()</h4>
120 | <ul>
121 | <li>GCSUBSTR(string, start, len)</li>
122 | <li>GCSUBSTR(string, start)</li>
123 | <li>GCSUBSTR(string, start, len, locale)</li>
124 | </ul>
125 | <p>The <code>GCSUBSTR(string, start, len)</code> function returns a substring of input <code>string</code> that begins with the <code>start</code>-th extended grapheme cluster and which is <code>len</code> clusters long. If <code>len</code> is omitted or -1 then <code>GCSUBSTR(string, start)</code> returns all clusters through the end of the string beginning with the <code>start</code>-th. The left-most cluster of <code>string</code> is number 1.</p>
126 | <h3 id="table-valued-functions">Table Valued Functions</h3>
127 | <p>These functions use Unicode breaking algorithms from UAX#29 to split a string into its component tokens, one row per token. Each row has three columns: <code>value</code>, a string holding the token, <code>start</code>, the offset of the starting code point in the original string (The first codepoint is index 1), and <code>len</code>, the length of the token in code points. <code>substr(original, start, len)</code> will thus equal <code>value</code>.</p>
128 | <h4 id="graphemes">GRAPHEMES()</h4>
129 | <ul>
130 | <li>GRAPHEMES(string)</li>
131 | <li>GRAPHEMES(string, locale)</li>
132 | </ul>
133 | <p>Splits its argument up into individual extended grapheme clusters, optionally using a specific locale’s rules.</p>
134 | <h4 id="words">WORDS()</h4>
135 | <ul>
136 | <li>WORDS(string)</li>
137 | <li>WORDS(string, locale)</li>
138 | </ul>
139 | <p>Splits its argument into words, and the gaps betweens words, optionally using a specific locale’s rules.</p>
140 | <p>To get only words, filter for only odd <code>rowid</code>s (Assuming the string starts with a word and not whitespace).</p>
141 | <h4 id="sentences">SENTENCES()</h4>
142 | <ul>
143 | <li>SENTENCES(string)</li>
144 | <li>SENTENCES(string, locale)</li>
145 | </ul>
146 | <p>Splits its argument into sentences.</p>
147 | <h4 id="lines">LINES()</h4>
148 | <ul>
149 | <li>LINES(string)</li>
150 | <li>LINES(string, locale)</li>
151 | </ul>
152 | <p>Splits its argument up into good spots for line breaks per UAX#14.</p>
153 | <h2 id="normalization">Normalization</h2>
154 | <p>Functions for normalizing Unicode text, and normalized concatentation (Since naive joining of two normalized Unicode strings can produce a non-normalized string. Yay Unicode!).</p>
155 | <p>To-Do: Aggregate versions?</p>
156 | <h3 id="normalize">NORMALIZE()</h3>
157 | <ul>
158 | <li>NORMALIZE(string, form)</li>
159 | </ul>
160 | <p>Returns <code>string</code> normalized according to <code>form</code>, which can be one of <code>'NFC'</code>, <code>'NFD'</code>, <code>'NFKC'</code>, <code>'NFKD'</code>, or <code>'NFKCCaseFold'</code>.</p>
161 | <p>If <code>string</code> is <code>NULL</code>, returns <code>NULL</code>.</p>
162 | <h3 id="nfc">NFC()</h3>
163 | <ul>
164 | <li>NFC(string, …)</li>
165 | <li>NFC_WS(sep, string, …)</li>
166 | </ul>
167 | <p>Concatenates its non-NULL arguments together and returns the result in NFC. With one argument is equivalent to <code>NORMALIZE(string, 'NFC')</code>.</p>
168 | <p>The <code>_WS</code> version intersperses <code>sep</code> between strings.</p>
169 | <h3 id="nfd">NFD()</h3>
170 | <ul>
171 | <li>NFD(string, …)</li>
172 | <li>NFD_WS(sep, string, …)</li>
173 | </ul>
174 | <p>Concatenates its non-NULL arguments together and returns the result in NFD. With one argument is equivalent to <code>NORMALIZE(string, 'NFD')</code>.</p>
175 | <p>The <code>_WS</code> version intersperses <code>sep</code> between strings.</p>
176 | <h3 id="nfkc">NFKC()</h3>
177 | <ul>
178 | <li>NFKC(string, …)</li>
179 | <li>NFKC_WS(sep, string, …)</li>
180 | </ul>
181 | <p>Concatenates its non-NULL arguments together and returns the result in NFKC. With one argument is equivalent to <code>NORMALIZE(string, 'NFKC')</code>.</p>
182 | <p>The <code>_WS</code> version intersperses <code>sep</code> between strings.</p>
183 | <h3 id="nfkd">NFKD()</h3>
184 | <ul>
185 | <li>NFKD(string, …)</li>
186 | <li>NFKD_WS(sep, string, …)</li>
187 | </ul>
188 | <p>Concatenates its non-NULL arguments together and returns the result in NFKD. With one argument is equivalent to <code>NORMALIZE(string, 'NFKD')</code>.</p>
189 | <p>The <code>_WS</code> version intersperses <code>sep</code> between strings.</p>
190 | <h2 id="other-conversions">Other conversions</h2>
191 | <h3 id="to_ascii">TO_ASCII()</h3>
192 | <ul>
193 | <li>TO_ASCII(string)</li>
194 | </ul>
195 | <p>An enhanced version of <code>SPELLFIX1_TRANSLIT()</code> from the <em>spellfix1</em> extension. It converts Unicode text to ASCII, trying to gracefully downgrade many Latin accented characters and ligatures, transliterate Greek and Cyrillic characters, smart quotes, smart dashes, etc. It knows about more conversions than its inspiration, can handle characters outside the BMP, and deals with combining characters in a more intelligent way.</p>
196 | <h3 id="convert_to">CONVERT_TO()</h3>
197 | <ul>
198 | <li>CONVERT_TO(string, charset)</li>
199 | <li>CONVERT_TO(string, charset, substitution)</li>
200 | </ul>
201 | <p>Convert a Unicode string to the given character encoding, and return the result as a blob. The optional <code>substitution</code> string is used to replace characters that can’t be represented in the target encoding.</p>
202 | <h3 id="convert_from">CONVERT_FROM()</h3>
203 | <ul>
204 | <li>CONVERT_FROM(blob, charset)</li>
205 | </ul>
206 | <p>Treats <code>blob</code> as being encoded in the given character encoding, and returns it converted to a Unicode string.</p>
207 | <h3 id="strptime">STRPTIME()</h3>
208 | <ul>
209 | <li>STRPTIME(time-format, time-string)</li>
210 | </ul>
211 | <p>Wrapper for the C <code>strptime()</code> function. Returns a unixepoch time, or <code>null</code> on errors.</p>
212 | <p>Example:</p>
213 | <pre><code>SELECT date(strptime(&#39;%m/%d/%Y&#39;, &#39;10/08/2019&#39;), &#39;unixepoch&#39;);
214 | -&gt; 2019-10-08</code></pre>
215 | <h2 id="unicode-text-compression">Unicode Text Compression</h2>
216 | <p>There are a few Unicode-specific text compression algorithms. They don’t have as good a compression ratio as more general purpose ones, but they have low overhead for compressing short strings. Could come in handy if you have a table with many short to medium length strings and are trying to save some space.</p>
217 | <h3 id="scsu_compress">SCSU_COMPRESS()</h3>
218 | <ul>
219 | <li>SCSU_COMPRESS(string)</li>
220 | </ul>
221 | <p>Returns a blob representing <code>string</code> compressed with <a href="https://en.wikipedia.org/wiki/Standard_Compression_Scheme_for_Unicode">SCSU</a>.</p>
222 | <h3 id="scsu_decompress">SCSU_DECOMPRESS()</h3>
223 | <ul>
224 | <li>SCSU_DECOMPRESS(blob)</li>
225 | </ul>
226 | <p>Decompresses <code>blob</code>, which should be <a href="https://en.wikipedia.org/wiki/Standard_Compression_Scheme_for_Unicode">SCSU</a> compressed Unicode text.</p>
227 | <h3 id="bocu_compress">BOCU_COMPRESS()</h3>
228 | <ul>
229 | <li>BOCU_COMPRESS(string)</li>
230 | </ul>
231 | <p>Returns a blob representing <code>string</code> compressed with <a href="https://en.wikipedia.org/wiki/Binary_Ordered_Compression_for_Unicode">BOCU-1</a>.</p>
232 | <h3 id="bocu_decompress">BOCU_DECOMPRESS()</h3>
233 | <ul>
234 | <li>BOCU_DECOMPRESS(blob)</li>
235 | </ul>
236 | <p>Decompresses <code>blob</code>, which should be <a href="https://en.wikipedia.org/wiki/Binary_Ordered_Compression_for_Unicode">BOCU-1</a> compressed Unicode text.</p>
237 | <h2 id="regular-expressions">Regular Expressions</h2>
238 | <p>MySQL-compatible regular expression functions. All also work when the name is prefixed by <strong>ICU_</strong>, to support coexisting with future extensions that use different engines - PCRE, RE2, etc. being loaded at the same time.</p>
239 | <p>The <code>match_type</code> string argument supports some extra options over MySQL:</p>
240 | <ul>
241 | <li><em>w</em> means to use Unicode word breaks instead of traditional ones.</li>
242 | <li><em>x</em> means that the regexp can have comments and whitespace.</li>
243 | <li><em>l</em> means to treat the regexp as a literal string to search for and not a regular expression.</li>
244 | </ul>
245 | <h3 id="regexp">REGEXP()</h3>
246 | <ul>
247 | <li>REGEXP(re, string)</li>
248 | <li>REGEXP(re, string, match_type)</li>
249 | <li>string REGEXP re</li>
250 | </ul>
251 | <p>Returns 1 if <code>string</code> matches <code>re</code>. The entire string must match. The three argument version is an extension over the normal ICU extension implementation.</p>
252 | <h3 id="regexp_instr">REGEXP_INSTR()</h3>
253 | <p>See <a href="https://dev.mysql.com/doc/refman/8.0/en/regexp.html#function_regexp-instr">MySQL REGEXP_INSTR()</a> documentation.</p>
254 | <p>If the <code>match_type</code> option has a digit in the range 0-9 in it, the position of that capturing group is returned instead of the complete match. 0 is the full match.</p>
255 | <h3 id="regexp_like">REGEXP_LIKE()</h3>
256 | <p>See <a href="https://dev.mysql.com/doc/refman/8.0/en/regexp.html#function_regexp-like">MySQL REGEXP_LIKE()</a> documentation.</p>
257 | <h3 id="regexp_replace">REGEXP_REPLACE()</h3>
258 | <p>See <a href="https://dev.mysql.com/doc/refman/8.0/en/regexp.html#function_regexp-replace">MySQL REGEXP_REPLACE()</a> documentation.</p>
259 | <p>This implementation currently only supports a <code>pos</code> argument of 1 and <code>occurence</code> of 0 or 1. It also replaces tokens like <code>$N</code> in the replacement string with the N-th capture group.</p>
260 | <h3 id="regexp_substr">REGEXP_SUBSTR()</h3>
261 | <p>See <a href="https://dev.mysql.com/doc/refman/8.0/en/regexp.html#function_regexp-substr">MySQL REGEXP_SUBSTR()</a> documentation.</p>
262 | <p>If the <code>match_type</code> option has a digit in the range 0-9 in it, that capturing group is returned instead of the complete match. 0 is the full match.</p>
263 | <h2 id="other-functions">Other functions</h2>
264 | <h3 id="concat">CONCAT()</h3>
265 | <ul>
266 | <li>CONCAT(string, …)</li>
267 | <li>MYSQL_CONCAT(string, …)</li>
268 | <li>CONCAT_WS(sep, string, …)</li>
269 | </ul>
270 | <p>Returns a string concatenating its arguments together. If <code>MYSQL_CONCAT()</code> gets a <code>NULL</code> argument, it returns <code>NULL</code>. The other versions just skip those arguments. The <code>_WS</code> version puts <code>sep</code> between strings.</p>
271 | <h3 id="repeat">REPEAT()</h3>
272 | <ul>
273 | <li>REPEAT(string, count)</li>
274 | <li>REPEAT(string, count, form)</li>
275 | </ul>
276 | <p>Returns a new string created by repeating <code>string</code> <code>count</code> times. If a third argument is given, it’s the normalization form to use for the result.</p>
277 | <h3 id="confusable">CONFUSABLE()</h3>
278 | <ul>
279 | <li>CONFUSABLE(string1, string2)</li>
280 | </ul>
281 | <p>Returns 1 if its two arguments are the same or can easily be visually mistaken to be the same, 0 if they’re distinct.</p>
282 | <p>(If only one of the arguments is a string literal or bound to a placeholder, it should be the first one.)</p>
283 | <h1 id="collations">Collations</h1>
284 | <p>For when <code>BINARY</code> and <code>NOCASE</code> aren’t good enough.</p>
285 | <h2 id="functions">Functions</h2>
286 | <h3 id="icu_load_collationlocale-name">ICU_LOAD_COLLATION(locale, name)</h3>
287 | <p>See the <a href="https://www.sqlite.org/src/dir?ci=cdb68d2c64e453fd&amp;name=ext/icu">ICU extension</a> documentation.</p>
288 | <h2 id="predefined-collation-types">Predefined collation types</h2>
289 | <h3 id="codepoint">CODEPOINT</h3>
290 | <p>Compares code points instead of code units like <code>BINARY</code> does. Makes a difference when comparing UTF-16 text with surrogate pairs.</p>
291 | <h3 id="unocase">UNOCASE</h3>
292 | <p>Unicode-aware case-insensitive ordering. Compares case-folded code points without any locale-specific rules. If doing lots of comparisions, it’s better to use precomputed casefolded strings.</p>
293 | <h3 id="equiv">EQUIV</h3>
294 | <p>Unicode equivalence. The same string normalized in two different forms is equivalent. If comparing a lot of strings, it’s best to canonize them with the same normalization form.</p>
295 | <h3 id="enocase">ENOCASE</h3>
296 | <p>Case-insensitive Unicode equivalence. If comparing a lot of strings, it’s better to use precomputed case folded and normalized ones.</p>
297 | <h2 id="examples">Examples</h2>
298 | <pre><code>char(0x0122) = char(0x0123) COLLATE BINARY       =&gt; 0
299 | char(0x0122) = char(0x0123) COLLATE NOCASE       =&gt; 0
300 | char(0x0122) = char(0x0123) COLLATE UNOCASE      =&gt; 1
301 | char(0x0122) = nfd(char(0x0122)) COLATE BINARY   =&gt; 0
302 | char(0x0122) = nfd(char(0x0122)) COLLATE EQUIV   =&gt; 1
303 | char(0x0122) = nfd(char(0x0123)) COLLATE EQUIV   =&gt; 0
304 | char(0x0122) = nfd(char(0x0123)) COLLATE ENOCASE =&gt; 1</code></pre>
305 | </body>
306 | </html>
307 | 


--------------------------------------------------------------------------------
/docs/string_funcs.md:
--------------------------------------------------------------------------------
  1 | % Extra String Functions
  2 | 
  3 | Introduction
  4 | ============
  5 | 
  6 | This Sqlite3 extension module adds extra functionality for dealing
  7 | with text in SQL queries, with an emphasis on Unicode. It depends on
  8 | [ICU] for most of its features. Many functions are inspired by MySQL
  9 | and Postgresql string functions.
 10 | 
 11 | Since the standard [ICU extension] itself doesn't seem to be compiled
 12 | into or otherwise provided by many OS's sqlite3 packages, it is
 13 | included as part of this one. See that documentation for details about
 14 | what it provides.
 15 | 
 16 | If you do a lot of things in your queries with Unicode text, or even
 17 | just use Unicode-aware collations on index columns, consider setting
 18 | the encoding of your databases to UTF-16 when creating them. Most
 19 | [ICU] functions work on UTF-16 strings, so this reduces the amount of
 20 | converting to and from UTF-8.
 21 | 
 22 | [ICU]: http://site.icu-project.org/
 23 | [ICU extension]: https://www.sqlite.org/src/dir?ci=cdb68d2c64e453fd&name=ext/icu
 24 | 
 25 | Scalar Functions
 26 | ================
 27 | 
 28 | Informative
 29 | -----------
 30 | 
 31 | ### ICU_VERSION()
 32 | 
 33 | * ICU_VERSION()
 34 | 
 35 | Returns the version of the ICU library being used.
 36 | 
 37 | ### UNICODE_VERSION()
 38 | 
 39 | * UNICODE_VERSION()
 40 | 
 41 | Returns the version of Unicode understood by ICU.
 42 | 
 43 | ### CHAR_NAME()
 44 | 
 45 | * CHAR_NAME(c)
 46 | 
 47 | Returns the Unicode name of the first codepoint in `c`, which can be
 48 | an integer or string.
 49 | 
 50 | ### SCRIPT_NAME()
 51 | 
 52 | * SCRIPT_NAME(c)
 53 | 
 54 | Returns the Unicode script of the first codepoint in `c`, which can be
 55 | an integer or string.
 56 | 
 57 | ### GCLENGTH()
 58 | 
 59 | * GCLENGTH(string)
 60 | * GCLENGTH(string, locale)
 61 | 
 62 | Returns the number of [extended grapheme clusters] in `string`. This
 63 | will be less than or equal to `LENGTH(string)`, which returns the
 64 | number of *code points*.
 65 | 
 66 | [extended grapheme clusters]: http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries
 67 | 
 68 | Case Mapping
 69 | ------------
 70 | 
 71 | ### UPPER()
 72 | 
 73 | * UPPER(string)
 74 | * UPPER(string, locale)
 75 | 
 76 | Returns `string` converted to uppercase, with an optional `locale` for
 77 | specific rules.
 78 | 
 79 | ### LOWER()
 80 | 
 81 | * LOWER(string)
 82 | * LOWER(string, locale)
 83 | 
 84 | Returns `string` converted to lowercase, with an optional `locale` for
 85 | specific rules.
 86 | 
 87 | ### TITLE()
 88 | 
 89 | * TITLE(string)
 90 | * TITLE(string, locale)
 91 | * INITCAP(string)
 92 | 
 93 | Returns `string` converted to lowercase, and the first letter of each
 94 | word titlecased. The optional `locale` argument uses specific casing
 95 | rules, like with `UPPER()` and `LOWER()`.
 96 | 
 97 | If `string` is `NULL`, returns `NULL`.
 98 | 
 99 | ### CASEFOLD()
100 | 
101 | * CASEFOLD(string)
102 | 
103 | Returns a case-folded version of `string`.
104 | 
105 | If `string` is `NULL`, returns `NULL`.
106 | 
107 | Text Extraction
108 | ---------------
109 | 
110 | Sqlite3 provides one function, `SUBSTR()`, for extracting text from a
111 | string. It has the major drawback that it treats one Unicode code
112 | point as one character. As soon as you start getting outside of the
113 | Latin characters (And even in them if dealing with text in NFD
114 | format), that's not true. It's very easy to cut off a base character's
115 | following combining characters, for example, with undesirable results.
116 | 
117 | The entire question of "What is a character?" gets very complicated
118 | fast when it comes to Unicode. The following functions consider
119 | characters to be [extended grapheme clusters], which means they
120 | *usually* do what people expect.
121 | 
122 | ### Scalar Functions
123 | 
124 | #### GCLEFT()
125 | 
126 | * GCLEFT(string, len)
127 | * GCLEFT(string, len, locale)
128 | 
129 | Returns the first `len` [extended grapheme clusters] from `string`.
130 | 
131 | If `len` is negative, returns all but the last `abs(len)` clusters.
132 | 
133 | #### GCRIGHT()
134 | 
135 | * GCRIGHT(string, len)
136 | * GCRIGHT(string, len, locale)
137 | 
138 | Returns the last `len` [extended grapheme clusters] from `string`.
139 | 
140 | If `len` is negative, returns all but the first `abs(len)` clusters.
141 | 
142 | #### GCSUBSTR()
143 | 
144 | * GCSUBSTR(string, start, len)
145 | * GCSUBSTR(string, start)
146 | * GCSUBSTR(string, start, len, locale)
147 | 
148 | The `GCSUBSTR(string, start, len)` function returns a substring of
149 | input `string` that begins with the `start`-th extended grapheme
150 | cluster and which is `len` clusters long. If `len` is omitted or -1 then
151 | `GCSUBSTR(string, start)` returns all clusters through the end of the
152 | string beginning with the `start`-th. The left-most cluster of
153 | `string` is number 1.
154 | 
155 | ### Table Valued Functions
156 | 
157 | These functions use Unicode breaking algorithms from UAX#29 to split a
158 | string into its component tokens, one row per token. Each row has
159 | three columns: `value`, a string holding the token, `start`, the
160 | offset of the starting code point in the original string (The first
161 | codepoint is index 1), and `len`, the length of the token in code
162 | points. `substr(original, start, len)` will thus equal `value`.
163 | 
164 | #### GRAPHEMES()
165 | 
166 | * GRAPHEMES(string)
167 | * GRAPHEMES(string, locale)
168 | 
169 | Splits its argument up into individual extended grapheme clusters,
170 | optionally using a specific locale's rules.
171 | 
172 | #### WORDS()
173 | 
174 | * WORDS(string)
175 | * WORDS(string, locale)
176 | 
177 | Splits its argument into words, and the gaps betweens words,
178 | optionally using a specific locale's rules.
179 | 
180 | To get only words, filter for only odd `rowid`s (Assuming the string
181 | starts with a word and not whitespace).
182 | 
183 | #### SENTENCES()
184 | 
185 | * SENTENCES(string)
186 | * SENTENCES(string, locale)
187 | 
188 | Splits its argument into sentences.
189 | 
190 | #### LINES()
191 | 
192 | * LINES(string)
193 | * LINES(string, locale)
194 | 
195 | Splits its argument up into good spots for line breaks per UAX#14.
196 | 
197 | 
198 | Normalization
199 | -------------
200 | 
201 | Functions for normalizing Unicode text, and normalized concatentation
202 | (Since naive joining of two normalized Unicode strings can produce a
203 | non-normalized string. Yay Unicode!).
204 | 
205 | To-Do: Aggregate versions?
206 | 
207 | ### NORMALIZE()
208 | 
209 | * NORMALIZE(string, form)
210 | 
211 | Returns `string` normalized according to `form`, which can be one of
212 | `'NFC'`, `'NFD'`, `'NFKC'`, `'NFKD'`, or `'NFKCCaseFold'`.
213 | 
214 | If `string` is `NULL`, returns `NULL`.
215 | 
216 | ### NFC()
217 | 
218 | * NFC(string, ...)
219 | * NFC_WS(sep, string, ...)
220 | 
221 | Concatenates its non-NULL arguments together and returns the result in
222 | NFC. With one argument is equivalent to `NORMALIZE(string, 'NFC')`.
223 | 
224 | The `_WS` version intersperses `sep` between strings.
225 | 
226 | ### NFD()
227 | 
228 | * NFD(string, ...)
229 | * NFD_WS(sep, string, ...)
230 | 
231 | Concatenates its non-NULL arguments together and returns the result in
232 | NFD. With one argument is equivalent to `NORMALIZE(string, 'NFD')`.
233 | 
234 | The `_WS` version intersperses `sep` between strings.
235 | 
236 | ### NFKC()
237 | 
238 | * NFKC(string, ...)
239 | * NFKC_WS(sep, string, ...)
240 | 
241 | Concatenates its non-NULL arguments together and returns the result in
242 | NFKC. With one argument is equivalent to `NORMALIZE(string, 'NFKC')`.
243 | 
244 | The `_WS` version intersperses `sep` between strings.
245 | 
246 | ### NFKD()
247 | 
248 | * NFKD(string, ...)
249 | * NFKD_WS(sep, string, ...)
250 | 
251 | Concatenates its non-NULL arguments together and returns the result in
252 | NFKD. With one argument is equivalent to `NORMALIZE(string, 'NFKD')`.
253 | 
254 | The `_WS` version intersperses `sep` between strings.
255 | 
256 | Other conversions
257 | -----------------
258 | 
259 | ### TO_ASCII()
260 | 
261 | * TO_ASCII(string)
262 | 
263 | An enhanced version of `SPELLFIX1_TRANSLIT()` from the *spellfix1*
264 | extension. It converts Unicode text to ASCII, trying to gracefully
265 | downgrade many Latin accented characters and ligatures, transliterate
266 | Greek and Cyrillic characters, smart quotes, smart dashes, etc. It
267 | knows about more conversions than its inspiration, can handle
268 | characters outside the BMP, and deals with combining characters in a
269 | more intelligent way.
270 | 
271 | ### CONVERT_TO()
272 | 
273 | * CONVERT_TO(string, charset)
274 | * CONVERT_TO(string, charset, substitution)
275 | 
276 | Convert a Unicode string to the given character encoding, and return
277 | the result as a blob. The optional `substitution` string is used to
278 | replace characters that can't be represented in the target encoding.
279 | 
280 | ### CONVERT_FROM()
281 | 
282 | * CONVERT_FROM(blob, charset)
283 | 
284 | Treats `blob` as being encoded in the given character encoding, and
285 | returns it converted to a Unicode string.
286 | 
287 | ### STRPTIME()
288 | 
289 | * STRPTIME(time-format, time-string)
290 | 
291 | Wrapper for the C `strptime()` function. Returns a unixepoch time, or
292 | `null` on errors.
293 | 
294 | Example:
295 | 
296 |     SELECT date(strptime('%m/%d/%Y', '10/08/2019'), 'unixepoch');
297 |     -> 2019-10-08
298 |     
299 | 
300 | Unicode Text Compression
301 | ------------------------
302 | 
303 | There are a few Unicode-specific text compression algorithms. They
304 | don't have as good a compression ratio as more general purpose ones,
305 | but they have low overhead for compressing short strings. Could come
306 | in handy if you have a table with many short to medium length strings
307 | and are trying to save some space.
308 | 
309 | ### SCSU_COMPRESS()
310 | 
311 | * SCSU_COMPRESS(string)
312 | 
313 | Returns a blob representing `string` compressed with [SCSU].
314 | 
315 | ### SCSU_DECOMPRESS()
316 | 
317 | * SCSU_DECOMPRESS(blob)
318 | 
319 | Decompresses `blob`, which should be [SCSU] compressed Unicode text.
320 | 
321 | ### BOCU_COMPRESS()
322 | 
323 | * BOCU_COMPRESS(string)
324 | 
325 | Returns a blob representing `string` compressed with [BOCU-1].
326 | 
327 | ### BOCU_DECOMPRESS()
328 | 
329 | * BOCU_DECOMPRESS(blob)
330 | 
331 | Decompresses `blob`, which should be [BOCU-1] compressed Unicode text.
332 | 
333 | [SCSU]: https://en.wikipedia.org/wiki/Standard_Compression_Scheme_for_Unicode
334 | [BOCU-1]: https://en.wikipedia.org/wiki/Binary_Ordered_Compression_for_Unicode
335 | 
336 | Regular Expressions
337 | -------------------
338 | 
339 | MySQL-compatible regular expression functions. All also work when the
340 | name is prefixed by **ICU\_**, to support coexisting with future
341 | extensions that use different engines - PCRE, RE2, etc. being loaded
342 | at the same time.
343 | 
344 | The `match_type` string argument supports some extra options over MySQL:
345 | 
346 | * *w* means to use Unicode word breaks instead of traditional ones.
347 | * *x* means that the regexp can have comments and whitespace.
348 | * *l* means to treat the regexp as a literal string to search for and
349 |   not a regular expression.
350 | 
351 | ### REGEXP()
352 | 
353 | * REGEXP(re, string)
354 | * REGEXP(re, string, match_type)
355 | * string REGEXP re
356 | 
357 | Returns 1 if `string` matches `re`. The entire string must match. The
358 | three argument version is an extension over the normal ICU extension
359 | implementation.
360 | 
361 | ### REGEXP_INSTR()
362 | 
363 | See [MySQL REGEXP_INSTR()] documentation.
364 | 
365 | If the `match_type` option has a digit in the range 0-9 in it, the
366 | position of that capturing group is returned instead of the complete
367 | match. 0 is the full match.
368 | 
369 | [MySQL REGEXP_INSTR()]: https://dev.mysql.com/doc/refman/8.0/en/regexp.html#function_regexp-instr
370 | 
371 | ### REGEXP_LIKE()
372 | 
373 | See [MySQL REGEXP_LIKE()] documentation.
374 | 
375 | [MySQL REGEXP_LIKE()]: https://dev.mysql.com/doc/refman/8.0/en/regexp.html#function_regexp-like
376 | 
377 | ### REGEXP_REPLACE()
378 | 
379 | See [MySQL REGEXP_REPLACE()] documentation.
380 | 
381 | This implementation currently only supports a `pos` argument of 1 and
382 | `occurence` of 0 or 1. It also replaces tokens like `$N` in the
383 | replacement string with the N-th capture group.
384 | 
385 | [MySQL REGEXP_REPLACE()]: https://dev.mysql.com/doc/refman/8.0/en/regexp.html#function_regexp-replace
386 | 
387 | ### REGEXP_SUBSTR()
388 | 
389 | See [MySQL REGEXP_SUBSTR()] documentation.
390 | 
391 | If the `match_type` option has a digit in the range 0-9 in it, that
392 | capturing group is returned instead of the complete match. 0 is the
393 | full match.
394 | 
395 | [MySQL REGEXP_SUBSTR()]: https://dev.mysql.com/doc/refman/8.0/en/regexp.html#function_regexp-substr
396 | 
397 | Other functions
398 | ---------------
399 | 
400 | ### CONCAT()
401 | 
402 | * CONCAT(string, ...)
403 | * MYSQL_CONCAT(string, ...)
404 | * CONCAT_WS(sep, string, ...)
405 | 
406 | Returns a string concatenating its arguments together. If
407 | `MYSQL_CONCAT()` gets a `NULL` argument, it returns `NULL`. The other
408 | versions just skip those arguments. The `_WS` version puts `sep`
409 | between strings.
410 | 
411 | ### REPEAT()
412 | 
413 | * REPEAT(string, count)
414 | * REPEAT(string, count, form)
415 | 
416 | Returns a new string created by repeating `string` `count` times. If a
417 | third argument is given, it's the normalization form to use for the
418 | result.
419 | 
420 | ### CONFUSABLE()
421 | 
422 | * CONFUSABLE(string1, string2)
423 | 
424 | Returns 1 if its two arguments are the same or can easily be visually
425 | mistaken to be the same, 0 if they're distinct.
426 | 
427 | (If only one of the arguments is a string literal or bound to a
428 | placeholder, it should be the first one.)
429 | 
430 | Collations
431 | ==========
432 | 
433 | For when `BINARY` and `NOCASE` aren't good enough.
434 | 
435 | Functions
436 | ---------
437 | 
438 | ### ICU_LOAD_COLLATION(locale, name)
439 | 
440 | See the [ICU extension] documentation.
441 | 
442 | Predefined collation types
443 | --------------------------
444 | 
445 | ### CODEPOINT
446 | 
447 | Compares code points instead of code units like `BINARY` does. Makes a
448 | difference when comparing UTF-16 text with surrogate pairs.
449 | 
450 | ### UNOCASE
451 | 
452 | Unicode-aware case-insensitive ordering. Compares case-folded code
453 | points without any locale-specific rules. If doing lots of
454 | comparisions, it's better to use precomputed casefolded strings.
455 | 
456 | ### EQUIV
457 | 
458 | Unicode equivalence. The same string normalized in two
459 | different forms is equivalent. If comparing a lot of strings, it's
460 | best to canonize them with the same normalization form.
461 | 
462 | ### ENOCASE
463 | 
464 | Case-insensitive Unicode equivalence. If comparing a lot of strings,
465 | it's better to use precomputed case folded and normalized ones.
466 | 
467 | Examples
468 | --------
469 | 
470 |     char(0x0122) = char(0x0123) COLLATE BINARY       => 0
471 |     char(0x0122) = char(0x0123) COLLATE NOCASE       => 0
472 |     char(0x0122) = char(0x0123) COLLATE UNOCASE      => 1
473 |     char(0x0122) = nfd(char(0x0122)) COLATE BINARY   => 0
474 |     char(0x0122) = nfd(char(0x0122)) COLLATE EQUIV   => 1
475 |     char(0x0122) = nfd(char(0x0123)) COLLATE EQUIV   => 0
476 |     char(0x0122) = nfd(char(0x0123)) COLLATE ENOCASE => 1
477 | 
478 | 


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # -*- cmake -*-
 2 | cmake_minimum_required(VERSION 3.5)
 3 | project("Useful Sqlite3 Extensions" LANGUAGES C VERSION 0.3)
 4 | 
 5 | set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
 6 | 
 7 | include(CheckSymbolExists)
 8 | 
 9 | set(CMAKE_THREAD_PREFER_PTHREAD 1)
10 | find_package(Threads)
11 | find_package(ZLIB)
12 | 
13 | set(SAVED_DEFINITIONS ${CMAKE_REQUIRED_DEFINITIONS})
14 | set(CMAKE_REQUIRED_DEFINITIONS -D_XOPEN_SOURCE)
15 | check_symbol_exists(strptime "time.h" HAVE_STRPTIME)
16 | set(CMAKE_REQUIRED_DEFINITIONS ${SAVED_DEFINITIONS})
17 | find_package(ICU COMPONENTS data uc i18n)
18 | if(ICU_FOUND)
19 |   add_library(string_funcs MODULE icu_extras.c stripaccents.c egc_str_funcs.c
20 |     more_str_funcs.c)
21 |   set_target_properties(string_funcs PROPERTIES C_STANDARD 99
22 |     C_STANDARD_REQUIRED ON)
23 |   target_compile_options(string_funcs PRIVATE -Wall -Wextra)
24 |   target_include_directories(string_funcs PRIVATE ${ICU_INCLUDE_DIRS}
25 |     ${PROJECT_BINARY_DIR})
26 |   target_link_libraries(string_funcs ${ICU_LIBRARIES})
27 | endif()
28 | 
29 | find_package(OpenSSL)
30 | if(OpenSSL_FOUND)
31 |   set(SAVED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES})
32 |   set(CMAKE_REQUIRED_LIBRARIES OpenSSL::Crypto)
33 |   check_symbol_exists(EVP_MD_CTX_new "openssl/evp.h" HAVE_EVP_MD_CTX_NEW)
34 |   set(CMAKE_REQUIRED_LIBRARIES ${SAVED_LIBRARIES})
35 |   add_library(blob_funcs MODULE blob_funcs.c)
36 |   set_target_properties(blob_funcs PROPERTIES C_STANDARD 99
37 |     C_STANDARD_REQUIRED ON)
38 |   target_compile_options(blob_funcs PRIVATE -Wall -Wextra)  
39 |   target_include_directories(blob_funcs PRIVATE OpenSSL::Crypto ZLIB::ZLIB
40 |     ${PROJECT_BINARY_DIR})
41 |   target_link_libraries(blob_funcs OpenSSL::Crypto ZLIB::ZLIB Threads::Threads)
42 | endif()
43 | 
44 | find_package(PCRE2)
45 | if(Pcre2_8_FOUND OR Pcre2_8_FOUND)
46 |   add_library(pcre2_funcs MODULE pcre2_funcs.c)
47 |   set_target_properties(pcre2_funcs PROPERTIES C_STANDARD 99
48 |     C_STANDARD_REQUIRED ON)
49 |   if(Pcre2_8_FOUND)
50 |     set(HAVE_PCRE2_8 1)
51 |     target_link_libraries(pcre2_funcs ${PCRE2_8})
52 |   endif()
53 |   if(Pcre2_16_FOUND)
54 |     set(HAVE_PCRE1_16 1)
55 |     target_link_libraries(pcre2_funcs ${PCRE2_16})
56 |   endif()
57 |   target_include_directories(pcre2_funcs PRIVATE ${PCRE2_INCLUDE_DIR}
58 |     ${PROJECT_BINARY_DIR})
59 | endif()
60 | 
61 | check_symbol_exists(regcomp "regex.h" HAVE_REGCOMP)
62 | if(HAVE_REGCOMP)
63 |   add_library(posix_re_funcs MODULE posix_re_funcs.c)
64 |   set_target_properties(posix_re_funcs PROPERTIES C_STANDARD 99
65 |     C_STANDARD_REQUIRED ON)
66 |   target_include_directories(posix_re_funcs PRIVATE "${PROJECT_BINARY_DIR}")
67 | endif()
68 | 
69 | add_library(math_funcs MODULE math_funcs.c)
70 | set_target_properties(math_funcs PROPERTIES C_STANDARD 99
71 |   C_STANDARD_REQUIRED ON)
72 | target_compile_options(math_funcs PRIVATE -Wall -Wextra)
73 | 
74 | add_library(bloom_filter1 MODULE bloom_filter.c)
75 | set_target_properties(bloom_filter1 PROPERTIES C_STANDARD 99
76 |   C_STANDARD_REQUIRED ON)
77 | target_compile_options(bloom_filter1 PRIVATE -Wall -Wextra
78 |   -Wno-implicit-fallthrough)
79 | 
80 | add_library(json_funcs MODULE json_funcs.c cJSON.c)
81 | set_target_properties(json_funcs PROPERTIES C_STANDARD 99
82 |   C_STANDARD_REQUIRED ON)
83 | target_compile_options(json_funcs PRIVATE -Wall -Wextra)
84 | 
85 | configure_file(config.h.in config.h)
86 | 


--------------------------------------------------------------------------------
/src/bloom_filter.c:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2018 Shawn Wagner
  3 | 
  4 | Permission is hereby granted, free of charge, to any person obtaining
  5 | a copy of this software and associated documentation files (the
  6 | "Software"), to deal in the Software without restriction, including
  7 | without limitation the rights to use, copy, modify, merge, publish,
  8 | distribute, sublicense, and/or sell copies of the Software, and to
  9 | permit persons to whom the Software is furnished to do so, subject to
 10 | the following conditions:
 11 | 
 12 | The above copyright notice and this permission notice shall be
 13 | included in all copies or substantial portions of the Software.
 14 | 
 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 19 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 20 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 21 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 22 | */
 23 | 
 24 | /* Bloom filter virtual table */
 25 | 
 26 | #include <math.h>
 27 | #include <stdlib.h>
 28 | 
 29 | /*
 30 |    SipHash reference C implementation
 31 | 
 32 |    Copyright (c) 2012-2016 Jean-Philippe Aumasson
 33 |    <jeanphilippe.aumasson@gmail.com>
 34 |    Copyright (c) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to>
 35 | 
 36 |    To the extent possible under law, the author(s) have dedicated all copyright
 37 |    and related and neighboring rights to this software to the public domain
 38 |    worldwide. This software is distributed without any warranty.
 39 | 
 40 |    You should have received a copy of the CC0 Public Domain Dedication along
 41 |    with
 42 |    this software. If not, see
 43 |    <http://creativecommons.org/publicdomain/zero/1.0/>.
 44 |  */
 45 | #include <assert.h>
 46 | #include <stdint.h>
 47 | #include <stdio.h>
 48 | #include <string.h>
 49 | 
 50 | /* default: SipHash-2-4 */
 51 | #define cROUNDS 2
 52 | #define dROUNDS 4
 53 | 
 54 | #define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))
 55 | 
 56 | #define U32TO8_LE(p, v)                                                        \
 57 |   (p)[0] = (uint8_t)((v));                                                     \
 58 |   (p)[1] = (uint8_t)((v) >> 8);                                                \
 59 |   (p)[2] = (uint8_t)((v) >> 16);                                               \
 60 |   (p)[3] = (uint8_t)((v) >> 24);
 61 | 
 62 | #define U64TO8_LE(p, v)                                                        \
 63 |   U32TO8_LE((p), (uint32_t)((v)));                                             \
 64 |   U32TO8_LE((p) + 4, (uint32_t)((v) >> 32));
 65 | 
 66 | #define U8TO64_LE(p)                                                           \
 67 |   (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) |                          \
 68 |    ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) |                   \
 69 |    ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) |                   \
 70 |    ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56))
 71 | 
 72 | #define SIPROUND                                                               \
 73 |   do {                                                                         \
 74 |     v0 += v1;                                                                  \
 75 |     v1 = ROTL(v1, 13);                                                         \
 76 |     v1 ^= v0;                                                                  \
 77 |     v0 = ROTL(v0, 32);                                                         \
 78 |     v2 += v3;                                                                  \
 79 |     v3 = ROTL(v3, 16);                                                         \
 80 |     v3 ^= v2;                                                                  \
 81 |     v0 += v3;                                                                  \
 82 |     v3 = ROTL(v3, 21);                                                         \
 83 |     v3 ^= v0;                                                                  \
 84 |     v2 += v1;                                                                  \
 85 |     v1 = ROTL(v1, 17);                                                         \
 86 |     v1 ^= v2;                                                                  \
 87 |     v2 = ROTL(v2, 32);                                                         \
 88 |   } while (0)
 89 | 
 90 | #ifdef DEBUG
 91 | #define TRACE                                                                  \
 92 |   do {                                                                         \
 93 |     printf("(%3d) v0 %08x %08x\n", (int)inlen, (uint32_t)(v0 >> 32),           \
 94 |            (uint32_t)v0);                                                      \
 95 |     printf("(%3d) v1 %08x %08x\n", (int)inlen, (uint32_t)(v1 >> 32),           \
 96 |            (uint32_t)v1);                                                      \
 97 |     printf("(%3d) v2 %08x %08x\n", (int)inlen, (uint32_t)(v2 >> 32),           \
 98 |            (uint32_t)v2);                                                      \
 99 |     printf("(%3d) v3 %08x %08x\n", (int)inlen, (uint32_t)(v3 >> 32),           \
100 |            (uint32_t)v3);                                                      \
101 |   } while (0)
102 | #else
103 | #define TRACE
104 | #endif
105 | 
106 | static int siphash(const uint8_t *in, const size_t inlen, const uint8_t *k,
107 |                    uint8_t *out, const size_t outlen) {
108 | 
109 |   assert((outlen == 8) || (outlen == 16));
110 |   uint64_t v0 = 0x736f6d6570736575ULL;
111 |   uint64_t v1 = 0x646f72616e646f6dULL;
112 |   uint64_t v2 = 0x6c7967656e657261ULL;
113 |   uint64_t v3 = 0x7465646279746573ULL;
114 |   uint64_t k0 = U8TO64_LE(k);
115 |   uint64_t k1 = U8TO64_LE(k + 8);
116 |   uint64_t m;
117 |   int i;
118 |   const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t));
119 |   const int left = inlen & 7;
120 |   uint64_t b = ((uint64_t)inlen) << 56;
121 |   v3 ^= k1;
122 |   v2 ^= k0;
123 |   v1 ^= k1;
124 |   v0 ^= k0;
125 | 
126 |   if (outlen == 16)
127 |     v1 ^= 0xee;
128 | 
129 |   for (; in != end; in += 8) {
130 |     m = U8TO64_LE(in);
131 |     v3 ^= m;
132 | 
133 |     TRACE;
134 |     for (i = 0; i < cROUNDS; ++i)
135 |       SIPROUND;
136 | 
137 |     v0 ^= m;
138 |   }
139 | 
140 |   switch (left) {
141 |   case 7:
142 |     b |= ((uint64_t)in[6]) << 48;
143 |   case 6:
144 |     b |= ((uint64_t)in[5]) << 40;
145 |   case 5:
146 |     b |= ((uint64_t)in[4]) << 32;
147 |   case 4:
148 |     b |= ((uint64_t)in[3]) << 24;
149 |   case 3:
150 |     b |= ((uint64_t)in[2]) << 16;
151 |   case 2:
152 |     b |= ((uint64_t)in[1]) << 8;
153 |   case 1:
154 |     b |= ((uint64_t)in[0]);
155 |     break;
156 |   case 0:
157 |     break;
158 |   }
159 | 
160 |   v3 ^= b;
161 | 
162 |   TRACE;
163 |   for (i = 0; i < cROUNDS; ++i)
164 |     SIPROUND;
165 | 
166 |   v0 ^= b;
167 | 
168 |   if (outlen == 16)
169 |     v2 ^= 0xee;
170 |   else
171 |     v2 ^= 0xff;
172 | 
173 |   TRACE;
174 |   for (i = 0; i < dROUNDS; ++i)
175 |     SIPROUND;
176 | 
177 |   b = v0 ^ v1 ^ v2 ^ v3;
178 |   U64TO8_LE(out, b);
179 | 
180 |   if (outlen == 8)
181 |     return 0;
182 | 
183 |   v1 ^= 0xdd;
184 | 
185 |   TRACE;
186 |   for (i = 0; i < dROUNDS; ++i)
187 |     SIPROUND;
188 | 
189 |   b = v0 ^ v1 ^ v2 ^ v3;
190 |   U64TO8_LE(out + 8, b);
191 | 
192 |   return 0;
193 | }
194 | 
195 | #include <sqlite3ext.h>
196 | SQLITE_EXTENSION_INIT1
197 | 
198 | static inline sqlite3_uint64 compute_bits(sqlite3_uint64 n, double p) {
199 |   return ceil(-((n * log(p)) / (log(2.0) * log(2.0))));
200 | }
201 | 
202 | static inline int compute_k(double p) { return round(-log2(p)); }
203 | 
204 | static inline int compute_bytes(sqlite3_uint64 bits) {
205 |   int quo = bits / 8;
206 |   int rem = bits % 8;
207 |   if (rem) {
208 |     quo += 1;
209 |   }
210 |   return quo;
211 | }
212 | 
213 | static inline int bf_size(int nElements, double p) {
214 |   return compute_bytes(compute_bits(nElements, p));
215 | }
216 | 
217 | static int bf_create(sqlite3 *, void *, int, const char *const *,
218 |                      sqlite3_vtab **, char **);
219 | static int bf_connect(sqlite3 *, void *, int, const char *const *,
220 |                       sqlite3_vtab **, char **);
221 | static int bf_bestindex(sqlite3_vtab *, sqlite3_index_info *);
222 | static int bf_disconnect(sqlite3_vtab *);
223 | static int bf_destroy(sqlite3_vtab *);
224 | static int bf_open(sqlite3_vtab *, sqlite3_vtab_cursor **);
225 | static int bf_close(sqlite3_vtab_cursor *);
226 | static int bf_filter(sqlite3_vtab_cursor *, int, const char *, int argc,
227 |                      sqlite3_value **argv);
228 | static int bf_next(sqlite3_vtab_cursor *);
229 | static int bf_eof(sqlite3_vtab_cursor *);
230 | static int bf_column(sqlite3_vtab_cursor *, sqlite3_context *, int);
231 | static int bf_rowid(sqlite3_vtab_cursor *, sqlite3_int64 *);
232 | static int bf_update(sqlite3_vtab *, int, sqlite3_value **, sqlite3_int64 *);
233 | static int bf_rename(sqlite3_vtab *, const char *);
234 | 
235 | struct sqlite3_module bf_module = {
236 |     1,        bf_create, bf_connect, bf_bestindex, bf_disconnect, bf_destroy,
237 |     bf_open,  bf_close,  bf_filter,  bf_next,      bf_eof,        bf_column,
238 |     bf_rowid, bf_update, NULL,       NULL,         NULL,          NULL,
239 |     NULL,     bf_rename, NULL,       NULL,         NULL,
240 | #if SQLITE_VERSION_NUMBER >= 3026000
241 |     NULL, // xShadowName
242 | #endif
243 | };
244 | 
245 | struct bf_vtab {
246 |   const sqlite3_module *pModule;
247 |   int nRef;
248 |   char *zErrMsg;
249 |   char *zDBName;
250 |   char *zName;
251 |   char *zStorage;
252 |   int nFilter;
253 |   double p;
254 |   int k;
255 |   sqlite3 *db;
256 | };
257 | 
258 | // CREATE VIRTUAL TABLE foo USING bloom_filter(nElemens, falseProb, kHashes)
259 | static int bf_create(sqlite3 *db, void *pAux __attribute__((unused)), int argc,
260 |                      char const *const *argv, sqlite3_vtab **ppVTab,
261 |                      char **pzErr) {
262 |   struct bf_vtab *vtab = sqlite3_malloc(sizeof *vtab);
263 |   if (!vtab) {
264 |     return SQLITE_NOMEM;
265 |   }
266 | 
267 |   vtab->db = db;
268 | 
269 |   int nElems = 100;
270 |   if (argc >= 4) {
271 |     nElems = strtoull(argv[3], NULL, 0);
272 |     if (nElems <= 0) {
273 |       *pzErr = sqlite3_mprintf("Number of elements in filter must be positive");
274 |       sqlite3_free(vtab);
275 |       return SQLITE_ERROR;
276 |     }
277 |   }
278 |   if (argc >= 5) {
279 |     vtab->p = strtod(argv[4], NULL);
280 |     if (vtab->p >= 1.0 || vtab->p <= 0.0) {
281 |       *pzErr = sqlite3_mprintf("Probability must be in the range (0,1)");
282 |       sqlite3_free(vtab);
283 |       return SQLITE_ERROR;
284 |     }
285 |   } else {
286 |     vtab->p = 0.01;
287 |   }
288 |   vtab->nFilter = bf_size(nElems, vtab->p);
289 |   if (argc == 6) {
290 |     vtab->k = strtol(argv[5], NULL, 0);
291 |     if (vtab->k <= 0) {
292 |       *pzErr = sqlite3_mprintf("Number of hash functions must be positive.");
293 |       sqlite3_free(vtab);
294 |       return SQLITE_ERROR;
295 |     }
296 |   } else {
297 |     vtab->k = compute_k(vtab->p);
298 |   }
299 | 
300 |   char *storage =
301 |       sqlite3_mprintf("CREATE TABLE \"%s\".\"%s_storage\"(data "
302 |                       "BLOB, p REAL, n INTEGER, m INTEGER, k INTEGER)",
303 |                       argv[1], argv[2]);
304 |   int rc = sqlite3_exec(db, storage, NULL, NULL, pzErr);
305 |   sqlite3_free(storage);
306 |   if (rc != SQLITE_OK) {
307 |     sqlite3_free(vtab);
308 |     return rc;
309 |   }
310 | 
311 |   storage =
312 |       sqlite3_mprintf("INSERT INTO \"%s\".\"%s_storage\"(rowid, data, p, n, m, "
313 |                       "k) VALUES (1, ?, %f, %d, %llu, %d)",
314 |                       argv[1], argv[2], vtab->p, nElems,
315 |                       (sqlite3_uint64)vtab->nFilter * 8, vtab->k);
316 | 
317 |   sqlite3_stmt *inserter;
318 |   rc = sqlite3_prepare_v2(db, storage, -1, &inserter, NULL);
319 |   sqlite3_free(storage);
320 |   if (rc != SQLITE_OK) {
321 |     sqlite3_free(vtab);
322 |     return rc;
323 |   }
324 | 
325 |   sqlite3_bind_zeroblob(inserter, 1, vtab->nFilter);
326 |   rc = sqlite3_step(inserter);
327 |   sqlite3_finalize(inserter);
328 |   if (rc != SQLITE_DONE) {
329 |     sqlite3_free(vtab);
330 |     return rc;
331 |   }
332 | 
333 |   rc = sqlite3_declare_vtab(db, "CREATE TABLE x(present, word HIDDEN "
334 |                                 "NOT NULL PRIMARY KEY) WITHOUT ROWID");
335 |   if (rc != SQLITE_OK) {
336 |     storage =
337 |         sqlite3_mprintf("DROP TABLE \"%s\".\"%s_storage\"", argv[1], argv[2]);
338 |     sqlite3_exec(db, storage, NULL, NULL, NULL);
339 |     sqlite3_free(storage);
340 |     sqlite3_free(vtab);
341 |     return rc;
342 |   }
343 | 
344 |   vtab->zDBName = sqlite3_mprintf("%s", argv[1]);
345 |   if (!vtab->zDBName) {
346 |     sqlite3_free(vtab);
347 |     return SQLITE_NOMEM;
348 |   }
349 |   vtab->zName = sqlite3_mprintf("%s", argv[2]);
350 |   if (!vtab->zName) {
351 |     sqlite3_free(vtab->zDBName);
352 |     sqlite3_free(vtab);
353 |     return SQLITE_NOMEM;
354 |   }
355 |   vtab->zStorage = sqlite3_mprintf("%s_storage", argv[2]);
356 |   if (!vtab->zName) {
357 |     sqlite3_free(vtab->zName);
358 |     sqlite3_free(vtab->zDBName);
359 |     sqlite3_free(vtab);
360 |     return SQLITE_NOMEM;
361 |   }
362 | 
363 |   *ppVTab = (struct sqlite3_vtab *)vtab;
364 |   return SQLITE_OK;
365 | }
366 | 
367 | static void free_bf_vtab(struct bf_vtab *vtab) {
368 |   sqlite3_free(vtab->zErrMsg);
369 |   sqlite3_free(vtab->zDBName);
370 |   sqlite3_free(vtab->zName);
371 |   sqlite3_free(vtab->zStorage);
372 |   sqlite3_free(vtab);
373 | }
374 | 
375 | static int bf_connect(sqlite3 *db, void *pAux __attribute__((unused)),
376 |                       int argc __attribute__((unused)), const char *const *argv,
377 |                       sqlite3_vtab **ppVTab,
378 |                       char **pzErr __attribute__((unused))) {
379 |   struct bf_vtab *vtab = sqlite3_malloc(sizeof *vtab);
380 |   if (!vtab) {
381 |     return SQLITE_NOMEM;
382 |   }
383 |   vtab->db = db;
384 | 
385 |   int rc = sqlite3_declare_vtab(db, "CREATE TABLE x(present, word HIDDEN  "
386 |                                     "NOT NULL PRIMARY KEY) WITHOUT ROWID");
387 |   if (rc != SQLITE_OK) {
388 |     sqlite3_free(vtab);
389 |     return rc;
390 |   }
391 | 
392 |   char *load_query = sqlite3_mprintf(
393 |       "SELECT m/8, p, k FROM \"%s\".\"%s_storage\" WHERE rowid = 1", argv[1],
394 |       argv[2]);
395 |   if (!load_query) {
396 |     sqlite3_free(vtab);
397 |     return SQLITE_NOMEM;
398 |   }
399 | 
400 |   sqlite3_stmt *loader;
401 |   rc = sqlite3_prepare_v2(db, load_query, -1, &loader, NULL);
402 |   sqlite3_free(load_query);
403 |   if (rc != SQLITE_OK) {
404 |     sqlite3_free(vtab);
405 |     return rc;
406 |   }
407 | 
408 |   rc = sqlite3_step(loader);
409 |   if (rc != SQLITE_ROW) {
410 |     sqlite3_finalize(loader);
411 |     sqlite3_free(vtab);
412 |     if (rc == SQLITE_DONE) {
413 |       rc = SQLITE_CORRUPT_VTAB;
414 |     }
415 |     return rc;
416 |   }
417 | 
418 |   vtab->nFilter = sqlite3_column_int(loader, 0);
419 |   vtab->p = sqlite3_column_double(loader, 1);
420 |   vtab->k = sqlite3_column_int(loader, 2);
421 |   sqlite3_finalize(loader);
422 | 
423 |   vtab->zDBName = sqlite3_mprintf("%s", argv[1]);
424 |   if (!vtab->zDBName) {
425 |     sqlite3_free(vtab);
426 |     return SQLITE_NOMEM;
427 |   }
428 |   vtab->zName = sqlite3_mprintf("%s", argv[2]);
429 |   if (!vtab->zName) {
430 |     sqlite3_free(vtab->zDBName);
431 |     sqlite3_free(vtab);
432 |     return SQLITE_NOMEM;
433 |   }
434 |   vtab->zStorage = sqlite3_mprintf("%s_storage", argv[2]);
435 |   if (!vtab->zStorage) {
436 |     sqlite3_free(vtab->zName);
437 |     sqlite3_free(vtab->zDBName);
438 |     sqlite3_free(vtab);
439 |     return SQLITE_NOMEM;
440 |   }
441 | 
442 |   *ppVTab = (struct sqlite3_vtab *)vtab;
443 |   return SQLITE_OK;
444 | }
445 | 
446 | static int bf_bestindex(struct sqlite3_vtab *pVTab, sqlite3_index_info *idx) {
447 |   struct bf_vtab *vtab = (struct bf_vtab *)pVTab;
448 | 
449 |   for (int n = 0; n < idx->nConstraint; n += 1) {
450 |     if (!idx->aConstraint[n].usable) {
451 |       continue;
452 |     }
453 |     if (idx->aConstraint[n].iColumn == 1 &&
454 |         idx->aConstraint[n].op == SQLITE_INDEX_CONSTRAINT_EQ) {
455 |       idx->aConstraintUsage[n].argvIndex = 1;
456 |     }
457 |   }
458 |   idx->orderByConsumed = 1;
459 |   if (sqlite3_libversion_number() >= 3008002) {
460 |     idx->estimatedRows = 1;
461 |   }
462 |   if (sqlite3_libversion_number() >= 3009000) {
463 |     idx->idxFlags = SQLITE_INDEX_SCAN_UNIQUE;
464 |   }
465 |   idx->estimatedCost = vtab->k;
466 |   return SQLITE_OK;
467 | }
468 | 
469 | static int bf_disconnect(struct sqlite3_vtab *pVTab) {
470 |   free_bf_vtab((struct bf_vtab *)pVTab);
471 |   return SQLITE_OK;
472 | }
473 | 
474 | static int bf_destroy(struct sqlite3_vtab *pVTab) {
475 |   struct bf_vtab *vtab = (struct bf_vtab *)pVTab;
476 | 
477 |   char *deleter = sqlite3_mprintf("DROP TABLE \"%s\".\"%s\"", vtab->zDBName,
478 |                                   vtab->zStorage);
479 |   int rc = sqlite3_exec(vtab->db, deleter, NULL, NULL, NULL);
480 |   sqlite3_free(deleter);
481 |   if (rc != SQLITE_OK) {
482 |     return rc;
483 |   }
484 | 
485 |   free_bf_vtab(vtab);
486 |   return SQLITE_OK;
487 | }
488 | 
489 | struct bf_cursor {
490 |   struct bf_vtab *vtab;
491 |   sqlite3_value *orig;
492 |   _Bool found;
493 | };
494 | 
495 | static int bf_open(sqlite3_vtab *pVTab __attribute__((unused)),
496 |                    sqlite3_vtab_cursor **ppCursor) {
497 |   struct bf_cursor *c = sqlite3_malloc(sizeof *c);
498 |   if (!c) {
499 |     return SQLITE_NOMEM;
500 |   }
501 |   c->orig = NULL;
502 |   c->found = 0;
503 |   *ppCursor = (struct sqlite3_vtab_cursor *)c;
504 |   return SQLITE_OK;
505 | }
506 | 
507 | static int bf_close(sqlite3_vtab_cursor *pCursor) {
508 |   struct bf_cursor *c = (struct bf_cursor *)pCursor;
509 |   sqlite3_value_free(c->orig);
510 |   sqlite3_free(c);
511 |   return SQLITE_OK;
512 | }
513 | 
514 | static int bf_eof(sqlite3_vtab_cursor *pCursor) {
515 |   struct bf_cursor *c = (struct bf_cursor *)pCursor;
516 |   return c->found == 0;
517 | }
518 | 
519 | static inline uint64_t calc_hash(const uint8_t *data, size_t len, int k) {
520 |   uint64_t seed[2] = {~((uint64_t)k), k};
521 |   uint64_t hash;
522 |   siphash(data, len, (const uint8_t *)seed, (uint8_t *)&hash, sizeof hash);
523 |   return hash;
524 | }
525 | 
526 | static int bf_filter(sqlite3_vtab_cursor *pCursor,
527 |                      int idxNum __attribute__((unused)),
528 |                      const char *idxStr __attribute__((unused)), int argc,
529 |                      sqlite3_value **argv) {
530 |   struct bf_cursor *c = (struct bf_cursor *)pCursor;
531 |   int rc;
532 | 
533 |   if (argc != 1) {
534 |     return SQLITE_OK;
535 |   }
536 | 
537 |   c->orig = sqlite3_value_dup(argv[0]);
538 |   const unsigned char *blob = sqlite3_value_blob(argv[0]);
539 |   int len = sqlite3_value_bytes(argv[0]);
540 | 
541 |   do {
542 |     sqlite3_blob *b;
543 |     rc = sqlite3_blob_open(c->vtab->db, c->vtab->zDBName, c->vtab->zStorage,
544 |                            "data", 1, 0, &b);
545 |     if (rc != SQLITE_OK) {
546 |       return rc;
547 |     }
548 | 
549 |     for (int n = 0; n < c->vtab->k; n += 1) {
550 |       uint64_t hash = calc_hash(blob, len, n);
551 |       hash %= (uint64_t)c->vtab->nFilter * 8;
552 |       int bytepos = hash / 8;
553 |       int bitpos = hash % 8;
554 |       unsigned char byte;
555 |       rc = sqlite3_blob_read(b, &byte, 1, bytepos);
556 |       if (rc == SQLITE_OK) {
557 |         c->found = byte & (1 << bitpos);
558 |         if (!c->found) {
559 |           sqlite3_blob_close(b);
560 |           return SQLITE_OK;
561 |         }
562 |       } else if (rc == SQLITE_ABORT) {
563 |         break;
564 |       } else {
565 |         sqlite3_blob_close(b);
566 |         return rc;
567 |       }
568 |     }
569 |     sqlite3_blob_close(b);
570 |   } while (rc == SQLITE_ABORT);
571 |   return rc;
572 | }
573 | 
574 | static int bf_next(sqlite3_vtab_cursor *pCursor) {
575 |   struct bf_cursor *c = (struct bf_cursor *)pCursor;
576 |   c->found = 0;
577 |   return SQLITE_OK;
578 | }
579 | 
580 | static int bf_column(sqlite3_vtab_cursor *pCursor, sqlite3_context *ctx,
581 |                      int n) {
582 |   struct bf_cursor *c = (struct bf_cursor *)pCursor;
583 |   assert(n == 0 || n == 1);
584 | 
585 |   if (n == 0) {
586 |     sqlite3_result_int(ctx, c->found);
587 |   } else if (n == 1) {
588 |     sqlite3_result_value(ctx, c->orig);
589 |   }
590 |   return SQLITE_OK;
591 | }
592 | 
593 | static int bf_rowid(sqlite3_vtab_cursor *pCursor __attribute__((unused)),
594 |                     sqlite_int64 *pRowid __attribute__((unused))) {
595 |   return SQLITE_OK;
596 | }
597 | 
598 | static int bf_update(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv,
599 |                      sqlite_int64 *pRowid __attribute__((unused))) {
600 |   struct bf_vtab *vtab = (struct bf_vtab *)pVTab;
601 | 
602 |   if (sqlite3_value_type(argv[0]) != SQLITE_NULL) {
603 |     if (vtab->zErrMsg) {
604 |       sqlite3_free(vtab->zErrMsg);
605 |     }
606 |     if (argc == 1) {
607 |       vtab->zErrMsg =
608 |           sqlite3_mprintf("bloom_filter elements cannot be deleted.");
609 |     } else {
610 |       vtab->zErrMsg =
611 |           sqlite3_mprintf("bloom_filter elements cannot be updated.");
612 |     }
613 |     return SQLITE_ERROR;
614 |   }
615 | 
616 |   assert(argc == 4);
617 | 
618 |   const uint8_t *blob = sqlite3_value_blob(argv[2]);
619 |   size_t len = sqlite3_value_bytes(argv[2]);
620 | 
621 |   int rc;
622 |   do {
623 |     sqlite3_blob *b;
624 |     rc = sqlite3_blob_open(vtab->db, vtab->zDBName, vtab->zStorage, "data", 1,
625 |                            1, &b);
626 |     if (rc != SQLITE_OK) {
627 |       return rc;
628 |     }
629 |     for (int n = 0; n < vtab->k; n += 1) {
630 |       uint64_t hash = calc_hash(blob, len, n);
631 |       hash %= (uint64_t)vtab->nFilter * 8;
632 |       int bytepos = hash / 8;
633 |       int bitpos = hash % 8;
634 |       unsigned char byte;
635 |       rc = sqlite3_blob_read(b, &byte, 1, bytepos);
636 |       if (rc == SQLITE_OK) {
637 |         byte |= 1 << bitpos;
638 |         rc = sqlite3_blob_write(b, &byte, 1, bytepos);
639 |         if (rc == SQLITE_ABORT) {
640 |           break;
641 |         } else if (rc != SQLITE_OK) {
642 |           sqlite3_blob_close(b);
643 |           return rc;
644 |         }
645 |       } else if (rc == SQLITE_ABORT) {
646 |         break;
647 |       } else {
648 |         sqlite3_blob_close(b);
649 |         return rc;
650 |       }
651 |     }
652 |     sqlite3_blob_close(b);
653 |   } while (rc == SQLITE_ABORT);
654 | 
655 |   return rc;
656 | }
657 | 
658 | static int bf_rename(sqlite3_vtab *pVTab, const char *zNew) {
659 |   struct bf_vtab *vtab = (struct bf_vtab *)pVTab;
660 | 
661 |   char *renamer =
662 |       sqlite3_mprintf("ALTER TABLE \"%s\".\"%s\" RENAME TO \"%s_storage\"",
663 |                       vtab->zDBName, vtab->zStorage, zNew);
664 |   if (!renamer) {
665 |     return SQLITE_NOMEM;
666 |   }
667 |   if (vtab->zErrMsg) {
668 |     sqlite3_free(vtab->zErrMsg);
669 |     vtab->zErrMsg = NULL;
670 |   }
671 |   int rc = sqlite3_exec(vtab->db, renamer, NULL, NULL, &(vtab->zErrMsg));
672 |   sqlite3_free(renamer);
673 |   if (rc == SQLITE_OK) {
674 |     sqlite3_free(vtab->zName);
675 |     vtab->zName = sqlite3_mprintf("%s", zNew);
676 |     if (!vtab->zName) {
677 |       return SQLITE_NOMEM;
678 |     }
679 |     sqlite3_free(vtab->zStorage);
680 |     vtab->zStorage = sqlite3_mprintf("%s_storage", zNew);
681 |     if (!vtab->zStorage) {
682 |       return SQLITE_NOMEM;
683 |     }
684 |   }
685 |   return rc;
686 | }
687 | 
688 | #ifdef _WIN32
689 | _declspec(dllexport)
690 | #endif
691 |     int sqlite3_bloomfilter_init(sqlite3 *db,
692 |                                  char **pzErrMsg __attribute__((unused)),
693 |                                  const sqlite3_api_routines *pApi) {
694 |   SQLITE_EXTENSION_INIT2(pApi);
695 |   return sqlite3_create_module(db, "bloom_filter1", &bf_module, NULL);
696 | }
697 | 


--------------------------------------------------------------------------------
/src/cJSON.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |   Copyright (c) 2009-2017 Dave Gamble and cJSON contributors
  3 | 
  4 |   Permission is hereby granted, free of charge, to any person obtaining a copy
  5 |   of this software and associated documentation files (the "Software"), to deal
  6 |   in the Software without restriction, including without limitation the rights
  7 |   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8 |   copies of the Software, and to permit persons to whom the Software is
  9 |   furnished to do so, subject to the following conditions:
 10 | 
 11 |   The above copyright notice and this permission notice shall be included in
 12 |   all copies or substantial portions of the Software.
 13 | 
 14 |   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 |   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 19 |   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 20 |   THE SOFTWARE.
 21 | */
 22 | 
 23 | #ifndef cJSON__h
 24 | #define cJSON__h
 25 | 
 26 | #ifdef __cplusplus
 27 | extern "C"
 28 | {
 29 | #endif
 30 | 
 31 | /* project version */
 32 | #define CJSON_VERSION_MAJOR 1
 33 | #define CJSON_VERSION_MINOR 7
 34 | #define CJSON_VERSION_PATCH 7
 35 | 
 36 | #include <stddef.h>
 37 | 
 38 | /* cJSON Types: */
 39 | #define cJSON_Invalid (0)
 40 | #define cJSON_False  (1 << 0)
 41 | #define cJSON_True   (1 << 1)
 42 | #define cJSON_NULL   (1 << 2)
 43 | #define cJSON_Number (1 << 3)
 44 | #define cJSON_String (1 << 4)
 45 | #define cJSON_Array  (1 << 5)
 46 | #define cJSON_Object (1 << 6)
 47 | #define cJSON_Raw    (1 << 7) /* raw json */
 48 | 
 49 | #define cJSON_IsReference 256
 50 | #define cJSON_StringIsConst 512
 51 | 
 52 | /* The cJSON structure: */
 53 | typedef struct cJSON
 54 | {
 55 |     /* next/prev allow you to walk array/object chains. Alternatively, use GetArraySize/GetArrayItem/GetObjectItem */
 56 |     struct cJSON *next;
 57 |     struct cJSON *prev;
 58 |     /* An array or object item will have a child pointer pointing to a chain of the items in the array/object. */
 59 |     struct cJSON *child;
 60 | 
 61 |     /* The type of the item, as above. */
 62 |     int type;
 63 | 
 64 |     /* The item's string, if type==cJSON_String  and type == cJSON_Raw */
 65 |     char *valuestring;
 66 |     /* writing to valueint is DEPRECATED, use cJSON_SetNumberValue instead */
 67 |     int valueint;
 68 |     /* The item's number, if type==cJSON_Number */
 69 |     double valuedouble;
 70 | 
 71 |     /* The item's name string, if this item is the child of, or is in the list of subitems of an object. */
 72 |     char *string;
 73 | } cJSON;
 74 | 
 75 | typedef struct cJSON_Hooks
 76 | {
 77 |       void *(*malloc_fn)(size_t sz);
 78 |       void (*free_fn)(void *ptr);
 79 | } cJSON_Hooks;
 80 | 
 81 | typedef int cJSON_bool;
 82 | 
 83 | #if !defined(__WINDOWS__) && (defined(WIN32) || defined(WIN64) || defined(_MSC_VER) || defined(_WIN32))
 84 | #define __WINDOWS__
 85 | #endif
 86 | #ifdef __WINDOWS__
 87 | 
 88 | /* When compiling for windows, we specify a specific calling convention to avoid issues where we are being called from a project with a different default calling convention.  For windows you have 2 define options:
 89 | 
 90 | CJSON_HIDE_SYMBOLS - Define this in the case where you don't want to ever dllexport symbols
 91 | CJSON_EXPORT_SYMBOLS - Define this on library build when you want to dllexport symbols (default)
 92 | CJSON_IMPORT_SYMBOLS - Define this if you want to dllimport symbol
 93 | 
 94 | For *nix builds that support visibility attribute, you can define similar behavior by
 95 | 
 96 | setting default visibility to hidden by adding
 97 | -fvisibility=hidden (for gcc)
 98 | or
 99 | -xldscope=hidden (for sun cc)
100 | to CFLAGS
101 | 
102 | then using the CJSON_API_VISIBILITY flag to "export" the same symbols the way CJSON_EXPORT_SYMBOLS does
103 | 
104 | */
105 | 
106 | /* export symbols by default, this is necessary for copy pasting the C and header file */
107 | #if !defined(CJSON_HIDE_SYMBOLS) && !defined(CJSON_IMPORT_SYMBOLS) && !defined(CJSON_EXPORT_SYMBOLS)
108 | #define CJSON_EXPORT_SYMBOLS
109 | #endif
110 | 
111 | #if defined(CJSON_HIDE_SYMBOLS)
112 | #define CJSON_PUBLIC(type)   type __stdcall
113 | #elif defined(CJSON_EXPORT_SYMBOLS)
114 | #define CJSON_PUBLIC(type)   __declspec(dllexport) type __stdcall
115 | #elif defined(CJSON_IMPORT_SYMBOLS)
116 | #define CJSON_PUBLIC(type)   __declspec(dllimport) type __stdcall
117 | #endif
118 | #else /* !WIN32 */
119 | #if (defined(__GNUC__) || defined(__SUNPRO_CC) || defined (__SUNPRO_C)) && defined(CJSON_API_VISIBILITY)
120 | #define CJSON_PUBLIC(type)   __attribute__((visibility("default"))) type
121 | #else
122 | #define CJSON_PUBLIC(type) type
123 | #endif
124 | #endif
125 | 
126 | /* Limits how deeply nested arrays/objects can be before cJSON rejects to parse them.
127 |  * This is to prevent stack overflows. */
128 | #ifndef CJSON_NESTING_LIMIT
129 | #define CJSON_NESTING_LIMIT 1000
130 | #endif
131 | 
132 | /* returns the version of cJSON as a string */
133 | CJSON_PUBLIC(const char*) cJSON_Version(void);
134 | 
135 | /* Supply malloc, realloc and free functions to cJSON */
136 | CJSON_PUBLIC(void) cJSON_InitHooks(cJSON_Hooks* hooks);
137 | 
138 | /* Memory Management: the caller is always responsible to free the results from all variants of cJSON_Parse (with cJSON_Delete) and cJSON_Print (with stdlib free, cJSON_Hooks.free_fn, or cJSON_free as appropriate). The exception is cJSON_PrintPreallocated, where the caller has full responsibility of the buffer. */
139 | /* Supply a block of JSON, and this returns a cJSON object you can interrogate. */
140 | CJSON_PUBLIC(cJSON *) cJSON_Parse(const char *value);
141 | /* ParseWithOpts allows you to require (and check) that the JSON is null terminated, and to retrieve the pointer to the final byte parsed. */
142 | /* If you supply a ptr in return_parse_end and parsing fails, then return_parse_end will contain a pointer to the error so will match cJSON_GetErrorPtr(). */
143 | CJSON_PUBLIC(cJSON *) cJSON_ParseWithOpts(const char *value, const char **return_parse_end, cJSON_bool require_null_terminated);
144 | 
145 | /* Render a cJSON entity to text for transfer/storage. */
146 | CJSON_PUBLIC(char *) cJSON_Print(const cJSON *item);
147 | /* Render a cJSON entity to text for transfer/storage without any formatting. */
148 | CJSON_PUBLIC(char *) cJSON_PrintUnformatted(const cJSON *item);
149 | /* Render a cJSON entity to text using a buffered strategy. prebuffer is a guess at the final size. guessing well reduces reallocation. fmt=0 gives unformatted, =1 gives formatted */
150 | CJSON_PUBLIC(char *) cJSON_PrintBuffered(const cJSON *item, int prebuffer, cJSON_bool fmt);
151 | /* Render a cJSON entity to text using a buffer already allocated in memory with given length. Returns 1 on success and 0 on failure. */
152 | /* NOTE: cJSON is not always 100% accurate in estimating how much memory it will use, so to be safe allocate 5 bytes more than you actually need */
153 | CJSON_PUBLIC(cJSON_bool) cJSON_PrintPreallocated(cJSON *item, char *buffer, const int length, const cJSON_bool format);
154 | /* Delete a cJSON entity and all subentities. */
155 | CJSON_PUBLIC(void) cJSON_Delete(cJSON *c);
156 | 
157 | /* Returns the number of items in an array (or object). */
158 | CJSON_PUBLIC(int) cJSON_GetArraySize(const cJSON *array);
159 | /* Retrieve item number "index" from array "array". Returns NULL if unsuccessful. */
160 | CJSON_PUBLIC(cJSON *) cJSON_GetArrayItem(const cJSON *array, int index);
161 | /* Get item "string" from object. Case insensitive. */
162 | CJSON_PUBLIC(cJSON *) cJSON_GetObjectItem(const cJSON * const object, const char * const string);
163 | CJSON_PUBLIC(cJSON *) cJSON_GetObjectItemCaseSensitive(const cJSON * const object, const char * const string);
164 | CJSON_PUBLIC(cJSON_bool) cJSON_HasObjectItem(const cJSON *object, const char *string);
165 | /* For analysing failed parses. This returns a pointer to the parse error. You'll probably need to look a few chars back to make sense of it. Defined when cJSON_Parse() returns 0. 0 when cJSON_Parse() succeeds. */
166 | CJSON_PUBLIC(const char *) cJSON_GetErrorPtr(void);
167 | 
168 | /* Check if the item is a string and return its valuestring */
169 | CJSON_PUBLIC(char *) cJSON_GetStringValue(cJSON *item);
170 | 
171 | /* These functions check the type of an item */
172 | CJSON_PUBLIC(cJSON_bool) cJSON_IsInvalid(const cJSON * const item);
173 | CJSON_PUBLIC(cJSON_bool) cJSON_IsFalse(const cJSON * const item);
174 | CJSON_PUBLIC(cJSON_bool) cJSON_IsTrue(const cJSON * const item);
175 | CJSON_PUBLIC(cJSON_bool) cJSON_IsBool(const cJSON * const item);
176 | CJSON_PUBLIC(cJSON_bool) cJSON_IsNull(const cJSON * const item);
177 | CJSON_PUBLIC(cJSON_bool) cJSON_IsNumber(const cJSON * const item);
178 | CJSON_PUBLIC(cJSON_bool) cJSON_IsString(const cJSON * const item);
179 | CJSON_PUBLIC(cJSON_bool) cJSON_IsArray(const cJSON * const item);
180 | CJSON_PUBLIC(cJSON_bool) cJSON_IsObject(const cJSON * const item);
181 | CJSON_PUBLIC(cJSON_bool) cJSON_IsRaw(const cJSON * const item);
182 | 
183 | /* These calls create a cJSON item of the appropriate type. */
184 | CJSON_PUBLIC(cJSON *) cJSON_CreateNull(void);
185 | CJSON_PUBLIC(cJSON *) cJSON_CreateTrue(void);
186 | CJSON_PUBLIC(cJSON *) cJSON_CreateFalse(void);
187 | CJSON_PUBLIC(cJSON *) cJSON_CreateBool(cJSON_bool boolean);
188 | CJSON_PUBLIC(cJSON *) cJSON_CreateNumber(double num);
189 | CJSON_PUBLIC(cJSON *) cJSON_CreateString(const char *string);
190 | /* raw json */
191 | CJSON_PUBLIC(cJSON *) cJSON_CreateRaw(const char *raw);
192 | CJSON_PUBLIC(cJSON *) cJSON_CreateArray(void);
193 | CJSON_PUBLIC(cJSON *) cJSON_CreateObject(void);
194 | 
195 | /* Create a string where valuestring references a string so
196 |  * it will not be freed by cJSON_Delete */
197 | CJSON_PUBLIC(cJSON *) cJSON_CreateStringReference(const char *string);
198 | /* Create an object/arrray that only references it's elements so
199 |  * they will not be freed by cJSON_Delete */
200 | CJSON_PUBLIC(cJSON *) cJSON_CreateObjectReference(const cJSON *child);
201 | CJSON_PUBLIC(cJSON *) cJSON_CreateArrayReference(const cJSON *child);
202 | 
203 | /* These utilities create an Array of count items. */
204 | CJSON_PUBLIC(cJSON *) cJSON_CreateIntArray(const int *numbers, int count);
205 | CJSON_PUBLIC(cJSON *) cJSON_CreateFloatArray(const float *numbers, int count);
206 | CJSON_PUBLIC(cJSON *) cJSON_CreateDoubleArray(const double *numbers, int count);
207 | CJSON_PUBLIC(cJSON *) cJSON_CreateStringArray(const char **strings, int count);
208 | 
209 | /* Append item to the specified array/object. */
210 | CJSON_PUBLIC(void) cJSON_AddItemToArray(cJSON *array, cJSON *item);
211 | CJSON_PUBLIC(void) cJSON_AddItemToObject(cJSON *object, const char *string, cJSON *item);
212 | /* Use this when string is definitely const (i.e. a literal, or as good as), and will definitely survive the cJSON object.
213 |  * WARNING: When this function was used, make sure to always check that (item->type & cJSON_StringIsConst) is zero before
214 |  * writing to `item->string` */
215 | CJSON_PUBLIC(void) cJSON_AddItemToObjectCS(cJSON *object, const char *string, cJSON *item);
216 | /* Append reference to item to the specified array/object. Use this when you want to add an existing cJSON to a new cJSON, but don't want to corrupt your existing cJSON. */
217 | CJSON_PUBLIC(void) cJSON_AddItemReferenceToArray(cJSON *array, cJSON *item);
218 | CJSON_PUBLIC(void) cJSON_AddItemReferenceToObject(cJSON *object, const char *string, cJSON *item);
219 | 
220 | /* Remove/Detatch items from Arrays/Objects. */
221 | CJSON_PUBLIC(cJSON *) cJSON_DetachItemViaPointer(cJSON *parent, cJSON * const item);
222 | CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromArray(cJSON *array, int which);
223 | CJSON_PUBLIC(void) cJSON_DeleteItemFromArray(cJSON *array, int which);
224 | CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromObject(cJSON *object, const char *string);
225 | CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromObjectCaseSensitive(cJSON *object, const char *string);
226 | CJSON_PUBLIC(void) cJSON_DeleteItemFromObject(cJSON *object, const char *string);
227 | CJSON_PUBLIC(void) cJSON_DeleteItemFromObjectCaseSensitive(cJSON *object, const char *string);
228 | 
229 | /* Update array items. */
230 | CJSON_PUBLIC(void) cJSON_InsertItemInArray(cJSON *array, int which, cJSON *newitem); /* Shifts pre-existing items to the right. */
231 | CJSON_PUBLIC(cJSON_bool) cJSON_ReplaceItemViaPointer(cJSON * const parent, cJSON * const item, cJSON * replacement);
232 | CJSON_PUBLIC(void) cJSON_ReplaceItemInArray(cJSON *array, int which, cJSON *newitem);
233 | CJSON_PUBLIC(void) cJSON_ReplaceItemInObject(cJSON *object,const char *string,cJSON *newitem);
234 | CJSON_PUBLIC(void) cJSON_ReplaceItemInObjectCaseSensitive(cJSON *object,const char *string,cJSON *newitem);
235 | 
236 | /* Duplicate a cJSON item */
237 | CJSON_PUBLIC(cJSON *) cJSON_Duplicate(const cJSON *item, cJSON_bool recurse);
238 | /* Duplicate will create a new, identical cJSON item to the one you pass, in new memory that will
239 | need to be released. With recurse!=0, it will duplicate any children connected to the item.
240 | The item->next and ->prev pointers are always zero on return from Duplicate. */
241 | /* Recursively compare two cJSON items for equality. If either a or b is NULL or invalid, they will be considered unequal.
242 |  * case_sensitive determines if object keys are treated case sensitive (1) or case insensitive (0) */
243 | CJSON_PUBLIC(cJSON_bool) cJSON_Compare(const cJSON * const a, const cJSON * const b, const cJSON_bool case_sensitive);
244 | 
245 | 
246 | CJSON_PUBLIC(void) cJSON_Minify(char *json);
247 | 
248 | /* Helper functions for creating and adding items to an object at the same time.
249 |  * They return the added item or NULL on failure. */
250 | CJSON_PUBLIC(cJSON*) cJSON_AddNullToObject(cJSON * const object, const char * const name);
251 | CJSON_PUBLIC(cJSON*) cJSON_AddTrueToObject(cJSON * const object, const char * const name);
252 | CJSON_PUBLIC(cJSON*) cJSON_AddFalseToObject(cJSON * const object, const char * const name);
253 | CJSON_PUBLIC(cJSON*) cJSON_AddBoolToObject(cJSON * const object, const char * const name, const cJSON_bool boolean);
254 | CJSON_PUBLIC(cJSON*) cJSON_AddNumberToObject(cJSON * const object, const char * const name, const double number);
255 | CJSON_PUBLIC(cJSON*) cJSON_AddStringToObject(cJSON * const object, const char * const name, const char * const string);
256 | CJSON_PUBLIC(cJSON*) cJSON_AddRawToObject(cJSON * const object, const char * const name, const char * const raw);
257 | CJSON_PUBLIC(cJSON*) cJSON_AddObjectToObject(cJSON * const object, const char * const name);
258 | CJSON_PUBLIC(cJSON*) cJSON_AddArrayToObject(cJSON * const object, const char * const name);
259 | 
260 | /* When assigning an integer value, it needs to be propagated to valuedouble too. */
261 | #define cJSON_SetIntValue(object, number) ((object) ? (object)->valueint = (object)->valuedouble = (number) : (number))
262 | /* helper for the cJSON_SetNumberValue macro */
263 | CJSON_PUBLIC(double) cJSON_SetNumberHelper(cJSON *object, double number);
264 | #define cJSON_SetNumberValue(object, number) ((object != NULL) ? cJSON_SetNumberHelper(object, (double)number) : (number))
265 | 
266 | /* Macro for iterating over an array or object */
267 | #define cJSON_ArrayForEach(element, array) for(element = (array != NULL) ? (array)->child : NULL; element != NULL; element = element->next)
268 | 
269 | /* malloc/free objects using the malloc/free functions that have been set with cJSON_InitHooks */
270 | CJSON_PUBLIC(void *) cJSON_malloc(size_t size);
271 | CJSON_PUBLIC(void) cJSON_free(void *object);
272 | 
273 | #ifdef __cplusplus
274 | }
275 | #endif
276 | 
277 | #endif
278 | 


--------------------------------------------------------------------------------
/src/cmake/FindPCRE2.cmake:
--------------------------------------------------------------------------------
 1 | # Quick and dirty module for PCRE2 stuff.
 2 | 
 3 | 
 4 | # Use may set ``PCRE2_ROOT` to a pcre2 installation root.
 5 | 
 6 | include(FindPackageHandleStandardArgs)
 7 | 
 8 | set(_PCRE2_SEARCHES)
 9 | if(PCRE2_ROOT)
10 |   set(_PCRE2_SEARCH_ROOT PATHS ${PCRE2_ROOT} NO_DEFAULT_PATH)
11 |   list(APPEND _PCRE2_SEARCHES _PCRE2_SEARCH_ROOT)
12 | endif()
13 | 
14 | find_path(PCRE2_INCLUDE_DIR pcre2.h DOC "PCRE2 header" PATH_SUFFIXES include)
15 | find_library(PCRE2_8 pcre2-8 DOC "UTF-8 version of PCRE2")
16 | find_library(PCRE2_16 pcre2-16 DOC "UTF-16 version of PCRE2")
17 | 
18 | mark_as_advanced(PCRE2_INCLUDE_DIR)
19 | find_package_handle_standard_args(Pcre2_8 REQUIRED_VARS
20 |   PCRE2_8 PCRE2_INCLUDE_DIR)
21 | find_package_handle_standard_args(Pcre2_16 REQUIRED_VARS
22 |   PCRE2_8 PCRE2_INCLUDE_DIR)
23 | 


--------------------------------------------------------------------------------
/src/config.h.in:
--------------------------------------------------------------------------------
 1 | /* Config file for compile-time options */
 2 | #pragma once
 3 | 
 4 | #cmakedefine HAVE_PCRE2_8
 5 | #cmakedefine HAVE_PCRE2_16
 6 | #cmakedefine CMAKE_USE_PTHREADS_INIT
 7 | #cmakedefine ZLIB_FOUND
 8 | #cmakedefine HAVE_REGCOMP
 9 | #cmakedefine HAVE_STRPTIME
10 | #cmakedefine HAVE_EVP_MD_CTX_NEW
11 | 


--------------------------------------------------------------------------------
/src/egc_str_funcs.c:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2018-2019 Shawn Wagner
  3 | 
  4 | Permission is hereby granted, free of charge, to any person obtaining
  5 | a copy of this software and associated documentation files (the
  6 | "Software"), to deal in the Software without restriction, including
  7 | without limitation the rights to use, copy, modify, merge, publish,
  8 | distribute, sublicense, and/or sell copies of the Software, and to
  9 | permit persons to whom the Software is furnished to do so, subject to
 10 | the following conditions:
 11 | 
 12 | The above copyright notice and this permission notice shall be
 13 | included in all copies or substantial portions of the Software.
 14 | 
 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 19 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 20 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 21 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 22 | */
 23 | 
 24 | /* String functions that deal with extended grapheme clusters instead
 25 |    of strings as a whole. */
 26 | 
 27 | #include <assert.h>
 28 | #include <stdio.h>
 29 | #include <stdlib.h>
 30 | #include <string.h>
 31 | 
 32 | #include <unicode/ubrk.h>
 33 | #include <unicode/ustring.h>
 34 | #include <unicode/utypes.h>
 35 | 
 36 | #include <sqlite3ext.h>
 37 | SQLITE_EXTENSION_INIT3
 38 | 
 39 | void icuFunctionError(
 40 |     sqlite3_context *pCtx, /* SQLite scalar function context */
 41 |     const char *zName,     /* Name of ICU function that failed */
 42 |     UErrorCode e           /* Error code returned by ICU function */
 43 | );
 44 | 
 45 | static void closeBreakIterator(void *v) {
 46 |   UBreakIterator *bi = v;
 47 |   ubrk_close(bi);
 48 | }
 49 | 
 50 | UBreakIterator *default_charbreak = NULL;
 51 | 
 52 | static void sf_gclength16(sqlite3_context *c, int nArg __attribute__((unused)),
 53 |                           sqlite3_value **apArg) {
 54 |   assert(nArg == 1 || nArg == 2);
 55 | 
 56 |   if (sqlite3_value_type(apArg[0]) == SQLITE_NULL) {
 57 |     return;
 58 |   }
 59 |   if (nArg == 2 && sqlite3_value_type(apArg[1]) == SQLITE_NULL) {
 60 |     return;
 61 |   }
 62 | 
 63 |   const UChar *utf16 = sqlite3_value_text16(apArg[0]);
 64 |   if (!utf16) {
 65 |     return;
 66 |   }
 67 |   UBreakIterator *bi = NULL;
 68 |   UErrorCode status = U_ZERO_ERROR;
 69 |   if (nArg == 2) {
 70 |     bi = sqlite3_get_auxdata(c, 1);
 71 |     if (!bi) {
 72 |       const char *locale = (const char *)sqlite3_value_text(apArg[1]);
 73 |       if (!locale) {
 74 |         return;
 75 |       }
 76 |       bi = ubrk_open(UBRK_CHARACTER, locale, NULL, 0, &status);
 77 |       if (U_FAILURE(status)) {
 78 |         icuFunctionError(c, "ubrk_open", status);
 79 |         return;
 80 |       }
 81 |       sqlite3_set_auxdata(c, 1, bi, closeBreakIterator);
 82 |     }
 83 |   } else {
 84 |     bi = ubrk_safeClone(default_charbreak, NULL, NULL, &status);
 85 |     if (U_FAILURE(status)) {
 86 |       icuFunctionError(c, "ubrk_safeClone", status);
 87 |       return;
 88 |     }
 89 |   }
 90 | 
 91 |   ubrk_setText(bi, utf16, -1, &status);
 92 |   if (U_FAILURE(status)) {
 93 |     icuFunctionError(c, "ubrk_setText", status);
 94 |     return;
 95 |   }
 96 | 
 97 |   int len = 0;
 98 |   while (ubrk_next(bi) != UBRK_DONE) {
 99 |     len += 1;
100 |   }
101 | 
102 |   if (nArg == 1) {
103 |     ubrk_close(bi);
104 |   }
105 |   sqlite3_result_int(c, len);
106 | }
107 | 
108 | static void sf_gcleft16(sqlite3_context *c, int nArg __attribute__((unused)),
109 |                         sqlite3_value **apArg) {
110 |   assert(nArg == 2 || nArg == 3);
111 | 
112 |   if (sqlite3_value_type(apArg[0]) == SQLITE_NULL ||
113 |       sqlite3_value_type(apArg[1]) == SQLITE_NULL) {
114 |     return;
115 |   }
116 |   if (nArg == 3 && sqlite3_value_type(apArg[2]) == SQLITE_NULL) {
117 |     return;
118 |   }
119 | 
120 |   int n = sqlite3_value_int(apArg[1]);
121 |   if (n == 0) {
122 |     sqlite3_result_text(c, "", 0, SQLITE_STATIC);
123 |     return;
124 |   }
125 | 
126 |   const UChar *utf16 = sqlite3_value_text16(apArg[0]);
127 |   if (!utf16) {
128 |     return;
129 |   }
130 | 
131 |   UErrorCode status = U_ZERO_ERROR;
132 |   UBreakIterator *bi;
133 |   if (nArg == 3) {
134 |     bi = sqlite3_get_auxdata(c, 2);
135 |     if (!bi) {
136 |       const char *locale = (const char *)sqlite3_value_text(apArg[2]);
137 |       if (!locale) {
138 |         return;
139 |       }
140 |       bi = ubrk_open(UBRK_CHARACTER, locale, NULL, 0, &status);
141 |       if (U_FAILURE(status)) {
142 |         icuFunctionError(c, "ubrk_open", status);
143 |         return;
144 |       }
145 |       sqlite3_set_auxdata(c, 2, bi, closeBreakIterator);
146 |     }
147 |   } else {
148 |     bi = ubrk_safeClone(default_charbreak, NULL, NULL, &status);
149 |     if (U_FAILURE(status)) {
150 |       icuFunctionError(c, "ubrk_safeClone", status);
151 |       return;
152 |     }
153 |   }
154 | 
155 |   ubrk_setText(bi, utf16, -1, &status);
156 |   if (U_FAILURE(status)) {
157 |     icuFunctionError(c, "ubrk_setText", status);
158 |     return;
159 |   }
160 | 
161 |   int32_t endlen = 0;
162 |   if (n > 0) {
163 |     int32_t off;
164 |     while (n > 0 && (off = ubrk_next(bi)) != UBRK_DONE) {
165 |       endlen = off;
166 |       n -= 1;
167 |     }
168 |   } else if (n < 0) {
169 |     int32_t off;
170 |     ubrk_last(bi);
171 |     while (n < 0 && (off = ubrk_previous(bi)) != UBRK_DONE) {
172 |       endlen = off;
173 |       n += 1;
174 |     }
175 |   }
176 |   sqlite3_result_text16(c, utf16, endlen * 2, SQLITE_TRANSIENT);
177 |   if (nArg == 2) {
178 |     ubrk_close(bi);
179 |   }
180 | }
181 | 
182 | static void sf_gcright16(sqlite3_context *c, int nArg __attribute__((unused)),
183 |                          sqlite3_value **apArg) {
184 | 
185 |   assert(nArg == 2 || nArg == 3);
186 | 
187 |   if (sqlite3_value_type(apArg[0]) == SQLITE_NULL ||
188 |       sqlite3_value_type(apArg[1]) == SQLITE_NULL) {
189 |     return;
190 |   }
191 | 
192 |   int n = sqlite3_value_int(apArg[1]);
193 |   if (n == 0) {
194 |     sqlite3_result_text(c, "", 0, SQLITE_STATIC);
195 |     return;
196 |   }
197 | 
198 |   const UChar *utf16 = sqlite3_value_text16(apArg[0]);
199 |   if (!utf16) {
200 |     return;
201 |   }
202 | 
203 |   UErrorCode status = U_ZERO_ERROR;
204 |   UBreakIterator *bi;
205 |   if (nArg == 3) {
206 |     bi = sqlite3_get_auxdata(c, 2);
207 |     if (!bi) {
208 |       const char *locale = (const char *)sqlite3_value_text(apArg[2]);
209 |       if (!locale) {
210 |         return;
211 |       }
212 |       bi = ubrk_open(UBRK_CHARACTER, locale, NULL, 0, &status);
213 |       if (U_FAILURE(status)) {
214 |         icuFunctionError(c, "ubrk_open", status);
215 |         return;
216 |       }
217 |       sqlite3_set_auxdata(c, 2, bi, closeBreakIterator);
218 |     }
219 |   } else {
220 |     bi = ubrk_safeClone(default_charbreak, NULL, NULL, &status);
221 |     if (U_FAILURE(status)) {
222 |       icuFunctionError(c, "ubrk_safeClone", status);
223 |       return;
224 |     }
225 |   }
226 | 
227 |   ubrk_setText(bi, utf16, -1, &status);
228 |   if (U_FAILURE(status)) {
229 |     icuFunctionError(c, "ubrk_setText", status);
230 |     return;
231 |   }
232 | 
233 |   int32_t off = 0, nextoff = 0;
234 |   if (n > 0) {
235 |     ubrk_last(bi);
236 |     while (n > 0 && (nextoff = ubrk_previous(bi)) != UBRK_DONE) {
237 |       n -= 1;
238 |       off = nextoff;
239 |     }
240 |   } else {
241 |     while (n < 0 && (nextoff = ubrk_next(bi)) != UBRK_DONE) {
242 |       off = nextoff;
243 |       n += 1;
244 |     }
245 |   }
246 |   sqlite3_result_text16(c, utf16 + off, -1, SQLITE_TRANSIENT);
247 |   if (nArg == 2) {
248 |     ubrk_close(bi);
249 |   }
250 | }
251 | 
252 | static void sf_gcsubstr16(sqlite3_context *c, int nArg, sqlite3_value **apArg) {
253 |   assert(nArg >= 2 && nArg <= 4);
254 |   if (sqlite3_value_type(apArg[0]) == SQLITE_NULL ||
255 |       sqlite3_value_type(apArg[1]) == SQLITE_NULL) {
256 |     return;
257 |   }
258 |   if (nArg >= 3 && sqlite3_value_type(apArg[2]) == SQLITE_NULL) {
259 |     return;
260 |   }
261 |   if (nArg == 4 && sqlite3_value_type(apArg[3]) == SQLITE_NULL) {
262 |     return;
263 |   }
264 | 
265 |   const UChar *utf16 = sqlite3_value_text16(apArg[0]);
266 |   if (!utf16) {
267 |     return;
268 |   }
269 | 
270 |   int start_pos = sqlite3_value_int(apArg[1]);
271 |   if (start_pos <= 0) {
272 |     sqlite3_result_error_code(c, SQLITE_RANGE);
273 |     return;
274 |   }
275 |   start_pos -= 1;
276 | 
277 |   int sublen = -1;
278 |   if (nArg == 3) {
279 |     sublen = sqlite3_value_int(apArg[2]);
280 |     if (sublen < -1) {
281 |       sqlite3_result_error_code(c, SQLITE_RANGE);
282 |       return;
283 |     }
284 |   }
285 | 
286 |   UErrorCode status = U_ZERO_ERROR;
287 |   UBreakIterator *bi;
288 |   if (nArg == 4) {
289 |     bi = sqlite3_get_auxdata(c, 3);
290 |     if (!bi) {
291 |       const char *locale = (const char *)sqlite3_value_text(apArg[3]);
292 |       if (!locale) {
293 |         return;
294 |       }
295 |       bi = ubrk_open(UBRK_CHARACTER, locale, NULL, 0, &status);
296 |       if (U_FAILURE(status)) {
297 |         icuFunctionError(c, "ubrk_open", status);
298 |         return;
299 |       }
300 |       sqlite3_set_auxdata(c, 3, bi, closeBreakIterator);
301 |     }
302 |   } else {
303 |     bi = ubrk_safeClone(default_charbreak, NULL, NULL, &status);
304 |     if (U_FAILURE(status)) {
305 |       icuFunctionError(c, "ubrk_safeClone", status);
306 |       return;
307 |     }
308 |   }
309 | 
310 |   ubrk_setText(bi, utf16, -1, &status);
311 |   if (U_FAILURE(status)) {
312 |     icuFunctionError(c, "ubrk_setText", status);
313 |     return;
314 |   }
315 | 
316 |   int32_t off = 0, nextoff = 0;
317 |   while (start_pos > 0 && (nextoff = ubrk_next(bi)) != UBRK_DONE) {
318 |     start_pos -= 1;
319 |     off = nextoff;
320 |   }
321 | 
322 |   if (sublen == -1) {
323 |     sqlite3_result_text16(c, utf16 + off, -1, SQLITE_TRANSIENT);
324 |     if (nArg != 4) {
325 |       ubrk_close(bi);
326 |     }
327 |     return;
328 |   }
329 | 
330 |   int32_t endoff = 0;
331 |   while (sublen > 0 && (nextoff = ubrk_next(bi)) != UBRK_DONE) {
332 |     sublen -= 1;
333 |     endoff = nextoff;
334 |   }
335 | 
336 |   sqlite3_result_text16(c, utf16 + off, (endoff - off) * 2, SQLITE_TRANSIENT);
337 |   if (nArg != 4) {
338 |     ubrk_close(bi);
339 |   }
340 | }
341 | 
342 | /* Eponymous-only virtual tables to break strings */
343 | 
344 | struct break_vtab {
345 |   sqlite3_vtab base;
346 |   UBreakIteratorType type;
347 | };
348 | 
349 | struct break_cursor {
350 |   sqlite3_vtab_cursor base;
351 |   sqlite3_int64 rowid;
352 |   char *locale;
353 |   UChar *utf16;
354 |   UBreakIterator *bi;
355 |   int cps_seen;
356 |   int32_t start_off;
357 |   int32_t end_off;
358 | };
359 | 
360 | static int breakConnect(sqlite3 *db, void *pAux,
361 |                         int argc __attribute__((unused)),
362 |                         const char *const *argv __attribute__((unused)),
363 |                         sqlite3_vtab **ppVtab,
364 |                         char **pzErr __attribute__((unused))) {
365 |   struct break_vtab *bv;
366 |   int rc;
367 | 
368 | #define BREAK_COLUMN_VALUE 0
369 | #define BREAK_COLUMN_START 1
370 | #define BREAK_COLUMN_LEN 2
371 | #define BREAK_COLUMN_TXT 3
372 | #define BREAK_COLUMN_LOCALE 4
373 | 
374 |   rc = sqlite3_declare_vtab(db, "CREATE TABLE x(value TEXT, start INTEGER, "
375 |                                 "len INTEGER, txt hidden, locale hidden)");
376 |   if (rc != SQLITE_OK) {
377 |     return rc;
378 |   }
379 |   bv = sqlite3_malloc(sizeof *bv);
380 |   if (!bv) {
381 |     return SQLITE_NOMEM;
382 |   }
383 |   *ppVtab = &bv->base;
384 |   memset(bv, 0, sizeof *bv);
385 |   bv->type = (UBreakIteratorType)pAux;
386 |   return SQLITE_OK;
387 | }
388 | 
389 | static int breakDisconnect(sqlite3_vtab *pVtab) {
390 |   sqlite3_free(pVtab);
391 |   return SQLITE_OK;
392 | }
393 | 
394 | static int breakOpen(sqlite3_vtab *p __attribute__((unused)),
395 |                      sqlite3_vtab_cursor **ppCursor) {
396 |   struct break_cursor *bc = sqlite3_malloc(sizeof *bc);
397 |   if (!bc) {
398 |     return SQLITE_NOMEM;
399 |   }
400 |   memset(bc, 0, sizeof *bc);
401 |   *ppCursor = &bc->base;
402 |   bc->cps_seen = -1;
403 |   return SQLITE_OK;
404 | }
405 | 
406 | static int breakClose(sqlite3_vtab_cursor *cur) {
407 |   struct break_cursor *bc = (struct break_cursor *)cur;
408 |   if (bc->bi) {
409 |     ubrk_close(bc->bi);
410 |   }
411 |   sqlite3_free(bc->utf16);
412 |   sqlite3_free(bc->locale);
413 |   sqlite3_free(bc);
414 |   return SQLITE_OK;
415 | }
416 | 
417 | static int breakNext(sqlite3_vtab_cursor *cur) {
418 |   struct break_cursor *bc = (struct break_cursor *)cur;
419 |   if (bc->cps_seen >= 0) {
420 |     bc->cps_seen +=
421 |         u_countChar32(bc->utf16 + bc->start_off, bc->end_off - bc->start_off);
422 |   }
423 |   bc->start_off = bc->end_off;
424 |   bc->end_off = ubrk_next(bc->bi);
425 |   bc->rowid += 1;
426 |   return SQLITE_OK;
427 | }
428 | 
429 | static int breakColumn(sqlite3_vtab_cursor *cur, sqlite3_context *c, int i) {
430 |   struct break_cursor *bc = (struct break_cursor *)cur;
431 |   switch (i) {
432 |   default:
433 |   case BREAK_COLUMN_VALUE:
434 |     if (bc->utf16) {
435 |       sqlite3_result_text16(c, bc->utf16 + bc->start_off,
436 |                             (bc->end_off - bc->start_off) * 2, SQLITE_STATIC);
437 |     }
438 |     break;
439 |   case BREAK_COLUMN_START:
440 |     if (bc->utf16) {
441 |       if (bc->cps_seen < 0) {
442 |         bc->cps_seen = u_countChar32(bc->utf16, bc->start_off);
443 |       }
444 |       sqlite3_result_int(c, bc->cps_seen + 1);
445 |     }
446 |     break;
447 |   case BREAK_COLUMN_LEN:
448 |     if (bc->utf16) {
449 |       sqlite3_result_int(c, u_countChar32(bc->utf16 + bc->start_off,
450 |                                           bc->end_off - bc->start_off));
451 |     }
452 |     break;
453 |   case BREAK_COLUMN_TXT:
454 |     if (bc->utf16) {
455 |       sqlite3_result_text16(c, bc->utf16, -1, SQLITE_STATIC);
456 |     }
457 |     break;
458 |   case BREAK_COLUMN_LOCALE:
459 |     if (bc->locale) {
460 |       sqlite3_result_text(c, bc->locale, -1, SQLITE_STATIC);
461 |     }
462 |     break;
463 |   }
464 |   return SQLITE_OK;
465 | }
466 | 
467 | static int breakRowid(sqlite3_vtab_cursor *cur, sqlite3_int64 *pRowid) {
468 |   struct break_cursor *bc = (struct break_cursor *)cur;
469 |   *pRowid = bc->rowid;
470 |   return SQLITE_OK;
471 | }
472 | 
473 | static int breakEof(sqlite3_vtab_cursor *cur) {
474 |   struct break_cursor *bc = (struct break_cursor *)cur;
475 |   return !bc->utf16 || bc->end_off == UBRK_DONE;
476 | }
477 | 
478 | static int breakFilter(sqlite3_vtab_cursor *cur,
479 |                        int idxNum __attribute__((unused)),
480 |                        const char *idxStr __attribute__((unused)), int argc,
481 |                        sqlite3_value **argv) {
482 |   struct break_cursor *bc = (struct break_cursor *)cur;
483 |   struct break_vtab *bv = (struct break_vtab *)bc->base.pVtab;
484 | 
485 |   assert(argc == 1 || argc == 2);
486 | 
487 |   if (sqlite3_value_type(argv[0]) == SQLITE_NULL) {
488 |     return SQLITE_OK;
489 |   }
490 | 
491 |   if (argc == 2) {
492 |     if (sqlite3_value_type(argv[1]) != SQLITE_NULL) {
493 |       bc->locale = sqlite3_mprintf("%s", sqlite3_value_text(argv[1]));
494 |       if (!bc->locale) {
495 |         return SQLITE_NOMEM;
496 |       }
497 |     }
498 |   }
499 | 
500 |   const void *u16 = sqlite3_value_text16(argv[0]);
501 |   if (!u16) {
502 |     return SQLITE_OK;
503 |   }
504 |   int32_t len = sqlite3_value_bytes16(argv[0]);
505 |   bc->utf16 = sqlite3_malloc(len + 2);
506 |   memcpy(bc->utf16, u16, len);
507 |   bc->utf16[len / 2] = 0;
508 | 
509 |   UErrorCode status = U_ZERO_ERROR;
510 |   bc->bi = ubrk_open(bv->type, bc->locale, bc->utf16, -1, &status);
511 |   if (U_FAILURE(status)) {
512 |     sqlite3_free(bc->utf16);
513 |     sqlite3_free(bc->locale);
514 |     sqlite3_free(cur->pVtab->zErrMsg);
515 |     cur->pVtab->zErrMsg =
516 |         sqlite3_mprintf("ICU error: ubrk_open(): %s", u_errorName(status));
517 |     return SQLITE_ERROR;
518 |   }
519 | 
520 |   return breakNext(cur);
521 | }
522 | 
523 | static int breakBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo) {
524 |   int aIdx[2] = {-1, -1};
525 |   int unusableMask = 0;
526 |   int idxMask = 0;
527 |   _Bool start_requested = 0, len_requested = 0;
528 |   const struct sqlite3_index_constraint *pConstraint = pIdxInfo->aConstraint;
529 |   for (int i = 0; i < pIdxInfo->nConstraint; i++, pConstraint++) {
530 |     if (pConstraint->iColumn < BREAK_COLUMN_TXT) {
531 |       if (pConstraint->usable && pConstraint->iColumn == BREAK_COLUMN_START) {
532 |         start_requested = 1;
533 |       } else
534 | 
535 |           if (pConstraint->usable && pConstraint->iColumn == BREAK_COLUMN_LEN) {
536 |         len_requested = 1;
537 |       }
538 |       continue;
539 |     }
540 |     int iCol = pConstraint->iColumn - BREAK_COLUMN_TXT;
541 |     int iMask = 1 << iCol;
542 |     if (pConstraint->usable == 0) {
543 |       unusableMask |= iMask;
544 |     } else if (pConstraint->op == SQLITE_INDEX_CONSTRAINT_EQ) {
545 |       aIdx[iCol] = i;
546 |       idxMask |= iMask;
547 |     }
548 |   }
549 |   if ((unusableMask & !idxMask) != 0) {
550 |     return SQLITE_CONSTRAINT;
551 |   }
552 |   if (aIdx[0] >= 0) {
553 |     int i = aIdx[0];
554 |     pIdxInfo->aConstraintUsage[i].argvIndex = 1;
555 |     pIdxInfo->aConstraintUsage[i].omit = 1;
556 |   }
557 |   if (aIdx[1] >= 0) {
558 |     int i = aIdx[1];
559 |     pIdxInfo->aConstraintUsage[i].argvIndex = 2;
560 |     pIdxInfo->aConstraintUsage[i].omit = 1;
561 |     pIdxInfo->idxNum = 2;
562 |   } else {
563 |     pIdxInfo->idxNum = 1;
564 |   }
565 | 
566 | #undef MAX
567 | #define MAX(a, b) (a) > (b) ? (a) : (b)
568 |   int cost = 0;
569 |   struct break_vtab *bv = (struct break_vtab *)tab;
570 |   if (bv->type == UBRK_CHARACTER) {
571 |     cost = 500;
572 |   } else {
573 |     /* Word and sentence tokenizing is more expensive as there are
574 |      * more code points to scan. */
575 |     cost = 1200;
576 |   }
577 |   /* Counting the code points to the starting index is only done on
578 |    * demand; expensive once for the first count if not done from the
579 |    * very beginning. Amortize the costs. */
580 |   if (start_requested) {
581 |     if (bv->type == UBRK_CHARACTER) {
582 |       cost += 100;
583 |     } else if (bv->type == UBRK_WORD) {
584 |       cost += 200;
585 |     } else {
586 |       cost += 400;
587 |     }
588 |   }
589 |   /* Counting the code points in the token is more expensive for words
590 |    * and sentences than characters. */
591 |   if (len_requested) {
592 |     if (bv->type == UBRK_CHARACTER) {
593 |       cost = MAX(cost, 550);
594 |     } else if (bv->type == UBRK_WORD) {
595 |       cost = MAX(cost, 1500);
596 |     } else {
597 |       cost = MAX(cost, 4000);
598 |     }
599 |   }
600 |   pIdxInfo->estimatedCost = cost;
601 |   return SQLITE_OK;
602 | }
603 | 
604 | static sqlite3_module breakModule = {
605 |     1,               // iVersion
606 |     0,               // xCreate
607 |     breakConnect,    // xConnect
608 |     breakBestIndex,  // xBestIndex
609 |     breakDisconnect, // xDisconnect
610 |     0,               // xDestroy
611 |     breakOpen,       // xOpen
612 |     breakClose,      // xClose
613 |     breakFilter,     // xFilter
614 |     breakNext,       // xNext
615 |     breakEof,        // xEof
616 |     breakColumn,     // xColumn
617 |     breakRowid,      // xRowid
618 |     0,               // xUpdate
619 |     0,               // xBegin
620 |     0,               // xSync
621 |     0,               // xCommit
622 |     0,               // xRollback
623 |     0,               // xFindFunction
624 |     0,               // xRename
625 |     0,               // xSavepoint
626 |     0,               // xRelease
627 |     0,               // xRollbackTo
628 | #if SQLITE_VERSION_NUMBER >= 3026000
629 |     0, // xShadowName
630 | #endif
631 | };
632 | 
633 | int sf_egc_init(sqlite3 *db, char **pzErrMsg) {
634 |   const struct Scalar {
635 |     const char *zName;  /* Function name */
636 |     int nArg;           /* Number of arguments */
637 |     unsigned short enc; /* Optimal text encoding */
638 |     void *iContext;     /* sqlite3_user_data() context */
639 |     void (*xFunc)(sqlite3_context *, int, sqlite3_value **);
640 |   } scalars[] = {
641 |       {"gclength", 1, SQLITE_UTF16 | SQLITE_DETERMINISTIC, NULL, sf_gclength16},
642 |       {"gclength", 2, SQLITE_UTF16 | SQLITE_DETERMINISTIC, NULL, sf_gclength16},
643 |       {"gcleft", 2, SQLITE_UTF16 | SQLITE_DETERMINISTIC, NULL, sf_gcleft16},
644 |       {"gcleft", 3, SQLITE_UTF16 | SQLITE_DETERMINISTIC, NULL, sf_gcleft16},
645 |       {"gcright", 2, SQLITE_UTF16 | SQLITE_DETERMINISTIC, NULL, sf_gcright16},
646 |       {"gcright", 3, SQLITE_UTF16 | SQLITE_DETERMINISTIC, NULL, sf_gcright16},
647 |       {"gcsubstr", 2, SQLITE_UTF16 | SQLITE_DETERMINISTIC, NULL, sf_gcsubstr16},
648 |       {"gcsubstr", 3, SQLITE_UTF16 | SQLITE_DETERMINISTIC, NULL, sf_gcsubstr16},
649 |       {"gcsubstr", 4, SQLITE_UTF16 | SQLITE_DETERMINISTIC, NULL, sf_gcsubstr16},
650 |       {NULL, -1, 0, NULL, NULL}};
651 |   const struct Break {
652 |     const char *name;
653 |     UBreakIteratorType type;
654 |   } breaks[] = {{"graphemes", UBRK_CHARACTER},
655 |                 {"words", UBRK_WORD},
656 |                 {"sentences", UBRK_SENTENCE},
657 |                 {"lines", UBRK_LINE},
658 |                 {NULL, UBRK_COUNT}};
659 |   int rc = SQLITE_OK;
660 |   UErrorCode status = U_ZERO_ERROR;
661 | 
662 |   if (!default_charbreak) {
663 |     default_charbreak = ubrk_open(UBRK_CHARACTER, NULL, NULL, 0, &status);
664 |     if (U_FAILURE(status)) {
665 |       if (pzErrMsg) {
666 |         *pzErrMsg =
667 |             sqlite3_mprintf("ICU error: ubrk_open(): %s", u_errorName(status));
668 |       }
669 |       return SQLITE_ERROR;
670 |     }
671 |   }
672 | 
673 |   for (int i = 0; rc == SQLITE_OK && scalars[i].zName; i += 1) {
674 |     const struct Scalar *p = &scalars[i];
675 |     rc = sqlite3_create_function(db, p->zName, p->nArg, p->enc, p->iContext,
676 |                                  p->xFunc, 0, 0);
677 |   }
678 | 
679 |   for (int i = 0; rc == SQLITE_OK && breaks[i].name; i += 1) {
680 |     rc = sqlite3_create_module(db, breaks[i].name, &breakModule,
681 |                                (void *)breaks[i].type);
682 |   }
683 | 
684 |   return rc;
685 | }
686 | 


--------------------------------------------------------------------------------
/src/group.c:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2018 Shawn Wagner
  3 | 
  4 | Permission is hereby granted, free of charge, to any person obtaining
  5 | a copy of this software and associated documentation files (the
  6 | "Software"), to deal in the Software without restriction, including
  7 | without limitation the rights to use, copy, modify, merge, publish,
  8 | distribute, sublicense, and/or sell copies of the Software, and to
  9 | permit persons to whom the Software is furnished to do so, subject to
 10 | the following conditions:
 11 | 
 12 | The above copyright notice and this permission notice shall be
 13 | included in all copies or substantial portions of the Software.
 14 | 
 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 19 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 20 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 21 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 22 | */
 23 | 
 24 | #define _GNU_SOURCE
 25 | 
 26 | #include <errno.h>
 27 | #include <grp.h>
 28 | #include <stdlib.h>
 29 | #include <string.h>
 30 | #include <sys/types.h>
 31 | #include <unistd.h>
 32 | 
 33 | #include <sqlite3ext.h>
 34 | SQLITE_EXTENSION_INIT3
 35 | 
 36 | #include "nss_tables.h"
 37 | 
 38 | /* /etc/group table */
 39 | 
 40 | struct group_vtab {
 41 |   sqlite3_vtab vtab;
 42 | };
 43 | 
 44 | struct group_cursor {
 45 |   sqlite3_vtab *vtab;
 46 |   struct group *gr;
 47 |   struct group grstorage;
 48 |   char *buf;
 49 |   int bufsize;
 50 |   int memidx;
 51 |   sqlite3_int64 rowid;
 52 |   _Bool specific;
 53 | };
 54 | 
 55 | static int group_connect(sqlite3 *db, void *pAux __attribute__((unused)),
 56 |                          int argc __attribute__((unused)),
 57 |                          const char *const *argv __attribute__((unused)),
 58 |                          sqlite3_vtab **ppVTab, char **pzErr) {
 59 |   int status;
 60 |   status = sqlite3_declare_vtab(
 61 |       db, "CREATE TABLE etc_group(name TEXT NOT NULL, password TEXT, "
 62 |           "gid INTEGER NOT NULL, member TEXT)");
 63 |   if (status != SQLITE_OK) {
 64 |     *pzErr = sqlite3_mprintf("%s", sqlite3_errstr(status));
 65 |     return SQLITE_ERROR;
 66 |   }
 67 | 
 68 |   *ppVTab = sqlite3_malloc(sizeof(struct group_vtab));
 69 |   (*ppVTab)->pModule = &group_funcs;
 70 |   (*ppVTab)->nRef = 0;
 71 |   (*ppVTab)->zErrMsg = NULL;
 72 |   return SQLITE_OK;
 73 | }
 74 | 
 75 | static int group_bestindex(sqlite3_vtab *tab __attribute__((unused)),
 76 |                            sqlite3_index_info *info) {
 77 |   info->idxNum = 0;
 78 |   for (int n = 0; n < info->nConstraint; n += 1) {
 79 |     if (info->aConstraint[n].usable == 0) {
 80 |       continue;
 81 |     }
 82 |     if (info->aConstraint[n].iColumn == -1 ||
 83 |         info->aConstraint[n].iColumn == 2) {
 84 |       if (info->aConstraint[n].op == SQLITE_INDEX_CONSTRAINT_EQ) {
 85 |         info->idxNum |= 1;
 86 |         info->aConstraintUsage[n].argvIndex = 1;
 87 |         info->aConstraintUsage[n].omit = 1;
 88 |       }
 89 |     } else if (info->aConstraint[n].iColumn == 0) {
 90 |       if (info->aConstraint[n].op == SQLITE_INDEX_CONSTRAINT_EQ) {
 91 |         info->idxNum |= 2;
 92 |         info->aConstraintUsage[n].argvIndex = 1;
 93 |         info->aConstraintUsage[n].omit = 1;
 94 |       }
 95 |     }
 96 |   }
 97 |   if (info->idxNum) {
 98 |     info->estimatedCost = 10;
 99 |     info->estimatedRows = 1;
100 |     info->idxFlags = SQLITE_INDEX_SCAN_UNIQUE;
101 |   } else {
102 |     info->estimatedCost = 200;
103 |   }
104 |   return SQLITE_OK;
105 | }
106 | 
107 | static int group_disconnect(sqlite3_vtab *tab) {
108 |   sqlite3_free(tab);
109 |   return SQLITE_OK;
110 | }
111 | 
112 | static int group_open(sqlite3_vtab *tab, sqlite3_vtab_cursor **curs) {
113 |   struct group_cursor *c = sqlite3_malloc(sizeof(struct group_cursor));
114 |   if (!c) {
115 |     if (tab->zErrMsg) {
116 |       sqlite3_free(tab->zErrMsg);
117 |     }
118 |     tab->zErrMsg = sqlite3_mprintf("Out of memory");
119 |     return SQLITE_NOMEM;
120 |   }
121 |   c->vtab = tab;
122 |   c->bufsize = 4096;
123 |   c->buf = sqlite3_malloc(c->bufsize);
124 |   if (!c->buf) {
125 |     if (tab->zErrMsg) {
126 |       sqlite3_free(tab->zErrMsg);
127 |     }
128 |     sqlite3_free(c);
129 |     tab->zErrMsg = sqlite3_mprintf("Out of memory");
130 |     return SQLITE_NOMEM;
131 |   }
132 |   c->rowid = 0;
133 |   
134 |   *curs = (sqlite3_vtab_cursor *)c;
135 |   return SQLITE_OK;
136 | }
137 | 
138 | static int group_close(sqlite3_vtab_cursor *vc) {
139 |   struct group_cursor *c = (struct group_cursor *)vc;
140 |   if (!c->specific) {
141 |     endgrent();
142 |   }
143 |   sqlite3_free(c->buf);
144 |   sqlite3_free(c);
145 |   return SQLITE_OK;
146 | }
147 | 
148 | int group_eof(sqlite3_vtab_cursor *vc) {
149 |   struct group_cursor *c = (struct group_cursor *)vc;
150 |   return c->gr == NULL;
151 | }
152 | 
153 | static int group_filter(sqlite3_vtab_cursor *vc, int idxNum,
154 |                         const char *idxStr __attribute__((unused)),
155 |                         int argc __attribute__((unused)),
156 |                         sqlite3_value **argv) {
157 |   struct group_cursor *c = (struct group_cursor *)vc;
158 | 
159 |   c->memidx = 0;
160 | 
161 |   if (idxNum == 3) {
162 |     if (c->vtab->zErrMsg) {
163 |       sqlite3_free(c->vtab->zErrMsg);
164 |     }
165 |     c->vtab->zErrMsg = sqlite3_mprintf("cannot search for a gid AND groupname");
166 |     return SQLITE_CONSTRAINT_VTAB;
167 |   }
168 | 
169 |   if (idxNum == 1) {
170 |     gid_t gid = sqlite3_value_int(argv[0]);
171 |     c->specific = 1;
172 |     while (1) {
173 |       if (getgrgid_r(gid, &(c->grstorage), c->buf, c->bufsize, &(c->gr)) < 0) {
174 |         if (errno == ERANGE) {
175 |           c->buf = sqlite3_realloc(c->buf, c->bufsize * 2);
176 |           c->bufsize *= 2;
177 |         } else if (errno == ENOENT || errno == ESRCH) {
178 |           break;
179 |         } else {
180 |           if (c->vtab->zErrMsg) {
181 |             sqlite3_free(c->vtab->zErrMsg);
182 |           }
183 |           c->vtab->zErrMsg = sqlite3_mprintf("getgrgid_r: %s", strerror(errno));
184 |           return SQLITE_ERROR;
185 |         }
186 |       } else {
187 |         break;
188 |       }
189 |     }
190 |     return SQLITE_OK;
191 |   }
192 | 
193 |   if (idxNum == 2) {
194 |     const char *username = (const char *)sqlite3_value_text(argv[0]);
195 |     c->specific = 1;
196 |     while (1) {
197 |       if (getgrnam_r(username, &(c->grstorage), c->buf, c->bufsize, &(c->gr)) <
198 |           0) {
199 |         if (errno == ERANGE) {
200 |           c->buf = sqlite3_realloc(c->buf, c->bufsize * 2);
201 |           c->bufsize *= 2;
202 |         } else if (errno == ENOENT || errno == ESRCH) {
203 |           break;
204 |         } else {
205 |           if (c->vtab->zErrMsg) {
206 |             sqlite3_free(c->vtab->zErrMsg);
207 |           }
208 |           c->vtab->zErrMsg = sqlite3_mprintf("getgrnam_r: %s", strerror(errno));
209 |           return SQLITE_ERROR;
210 |         }
211 |       } else {
212 |         break;
213 |       }
214 |     }
215 |     return SQLITE_OK;
216 |   }
217 | 
218 |   setgrent();
219 |   c->specific = 0;
220 |   while (1) {
221 |     if (getgrent_r(&(c->grstorage), c->buf, c->bufsize, &(c->gr)) < 0) {
222 |       if (errno == ERANGE) {
223 |         c->buf = sqlite3_realloc(c->buf, c->bufsize * 2);
224 |         c->bufsize *= 2;
225 |       } else if (errno == ENOENT || errno == ESRCH) {
226 |         break;
227 |       } else {
228 |         if (c->vtab->zErrMsg) {
229 |           sqlite3_free(c->vtab->zErrMsg);
230 |         }
231 |         c->vtab->zErrMsg = sqlite3_mprintf("getgrent_r: %s", strerror(errno));
232 |         return SQLITE_ERROR;
233 |       }
234 |     } else {
235 |       break;
236 |     }
237 |   }
238 |   return SQLITE_OK;
239 | }
240 | 
241 | static int group_next(sqlite3_vtab_cursor *vc) {
242 |   struct group_cursor *c = (struct group_cursor *)vc;
243 | 
244 |   // One row per member of the current group
245 |   if (c->gr->gr_mem[c->memidx] != NULL) {
246 |     c->memidx += 1;
247 |     if (c->gr->gr_mem[c->memidx]) {
248 |       c->rowid += 1;
249 |       return SQLITE_OK;
250 |     }
251 |   }
252 | 
253 |   // EOF if done with members of an indexed group.
254 |   if (c->specific) {
255 |     c->gr = NULL;
256 |     return SQLITE_OK;
257 |   }
258 | 
259 |   // Get the next group.
260 |   while (1) {
261 |     if (getgrent_r(&(c->grstorage), c->buf, c->bufsize, &(c->gr)) < 0) {
262 |       if (errno == ERANGE) {
263 |         c->buf = sqlite3_realloc(c->buf, c->bufsize * 2);
264 |         c->bufsize *= 2;
265 |       } else {
266 |         if (c->vtab->zErrMsg) {
267 |           sqlite3_free(c->vtab->zErrMsg);
268 |         }
269 |         c->vtab->zErrMsg = sqlite3_mprintf("getgrent_r: %s", strerror(errno));
270 |         return SQLITE_ERROR;
271 |       }
272 |     } else {
273 |       break;
274 |     }
275 |   }
276 |   c->memidx = 0;
277 |   c->rowid += 1;
278 |   return SQLITE_OK;
279 | }
280 | 
281 | static int group_column(sqlite3_vtab_cursor *vc, sqlite3_context *p, int n) {
282 |   struct group_cursor *c = (struct group_cursor *)vc;
283 |   switch (n) {
284 |   case 0:
285 |     sqlite3_result_text(p, c->gr->gr_name, -1, SQLITE_TRANSIENT);
286 |     break;
287 |   case 1:
288 |     if (c->gr->gr_passwd) {
289 |       sqlite3_result_text(p, c->gr->gr_passwd, -1, SQLITE_TRANSIENT);
290 |     } else {
291 |       sqlite3_result_null(p);
292 |     }
293 |     break;
294 |   case 2:
295 |     sqlite3_result_int(p, c->gr->gr_gid);
296 |     break;
297 |   case 3:
298 |     if (c->gr->gr_mem[c->memidx]) {
299 |       sqlite3_result_text(p, c->gr->gr_mem[c->memidx], -1, SQLITE_TRANSIENT);
300 |     } else {
301 |       sqlite3_result_null(p);
302 |     }
303 |     break;
304 |   default:
305 |     if (c->vtab->zErrMsg) {
306 |       sqlite3_free(c->vtab->zErrMsg);
307 |     }
308 |     c->vtab->zErrMsg = sqlite3_mprintf("Column out of range");
309 |     return SQLITE_RANGE;
310 |   }
311 |   return SQLITE_OK;
312 | }
313 | 
314 | static int group_rowid(sqlite3_vtab_cursor *vc, sqlite3_int64 *pRowId) {
315 |   struct group_cursor *c = (struct group_cursor *)vc;
316 |   *pRowId = c->rowid;
317 |   return SQLITE_OK;
318 | }
319 | 
320 | static int group_rename(sqlite3_vtab *tab __attribute__((unused)),
321 |                         const char *newname __attribute__((unused))) {
322 |   return SQLITE_OK;
323 | }
324 | 
325 | struct sqlite3_module group_funcs = {1,
326 |                                      group_connect,
327 |                                      group_connect,
328 |                                      group_bestindex,
329 |                                      group_disconnect,
330 |                                      group_disconnect,
331 |                                      group_open,
332 |                                      group_close,
333 |                                      group_filter,
334 |                                      group_next,
335 |                                      group_eof,
336 |                                      group_column,
337 |                                      group_rowid,
338 |                                      NULL,
339 |                                      NULL,
340 |                                      NULL,
341 |                                      NULL,
342 |                                      NULL,
343 |                                      NULL,
344 |                                      group_rename,
345 |                                      NULL,
346 |                                      NULL,
347 |                                      NULL};
348 | 


--------------------------------------------------------------------------------
/src/json_funcs.c:
--------------------------------------------------------------------------------
  1 | /* Extra JSON functions */
  2 | 
  3 | /*
  4 | Copyright 2018 Shawn Wagner
  5 | 
  6 | Permission is hereby granted, free of charge, to any person obtaining
  7 | a copy of this software and associated documentation files (the
  8 | "Software"), to deal in the Software without restriction, including
  9 | without limitation the rights to use, copy, modify, merge, publish,
 10 | distribute, sublicense, and/or sell copies of the Software, and to
 11 | permit persons to whom the Software is furnished to do so, subject to
 12 | the following conditions:
 13 | 
 14 | The above copyright notice and this permission notice shall be
 15 | included in all copies or substantial portions of the Software.
 16 | 
 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 18 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 19 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 20 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 21 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 22 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 23 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 24 | */
 25 | 
 26 | #include "cJSON.h"
 27 | #include <sqlite3ext.h>
 28 | #include <stdio.h>
 29 | #include <stdlib.h>
 30 | #include <string.h>
 31 | 
 32 | SQLITE_EXTENSION_INIT1
 33 | 
 34 | static cJSON *json_find_path(cJSON *json, char *path, int *err) {
 35 | 
 36 |   *err = 0;
 37 | 
 38 |   if (!path) {
 39 |     *err = 1;
 40 |     return NULL;
 41 |   }
 42 |   if (path[0] != '$') {
 43 |     *err = 1;
 44 |     return NULL;
 45 |   }
 46 | 
 47 |   for (size_t i = 1; path[i] && json; i += 1) {
 48 |     if (path[i] == '.') {
 49 |       i += 1;
 50 |       size_t eok = strcspn(path + i, ".[");
 51 |       char saved = path[i + eok];
 52 |       path[i + eok] = '\0';
 53 |       json = cJSON_GetObjectItemCaseSensitive(json, path + i);
 54 |       path[i + eok] = saved;
 55 |       i = i + eok - 1;
 56 |     } else if (path[i] == '[') {
 57 |       char *eon;
 58 |       int idx = strtoul(path + i + 1, &eon, 10);
 59 |       if (*eon != ']') {
 60 |         *err = 1;
 61 |         json = NULL;
 62 |         break;
 63 |       }
 64 |       i = eon - path;
 65 |       json = cJSON_GetArrayItem(json, idx);
 66 |     } else {
 67 |       *err = 1;
 68 |       json = NULL;
 69 |     }
 70 |   }
 71 |   return json;
 72 | }
 73 | 
 74 | static void json_length(sqlite3_context *ctx, int nargs, sqlite3_value **args) {
 75 |   if (sqlite3_value_type(args[0]) == SQLITE_NULL) {
 76 |     return;
 77 |   }
 78 |   if (nargs == 2 && sqlite3_value_type(args[0]) == SQLITE_NULL) {
 79 |     return;
 80 |   }
 81 | 
 82 |   cJSON *orig, *json;
 83 |   orig = json = cJSON_Parse((const char *)sqlite3_value_text(args[0]));
 84 |   if (!json) {
 85 |     sqlite3_result_error(ctx, "malformed JSON", -1);
 86 |     return;
 87 |   }
 88 | 
 89 |   if (nargs == 2) {
 90 |     char *path =
 91 |         sqlite3_mprintf("%s", (const char *)sqlite3_value_text(args[1]));
 92 |     if (!path) {
 93 |       sqlite3_result_error_nomem(ctx);
 94 |       cJSON_Delete(orig);
 95 |       return;
 96 |     }
 97 |     int error = 0;
 98 |     json = json_find_path(json, path, &error);
 99 |     sqlite3_free(path);
100 |     if (error) {
101 |       sqlite3_result_error(ctx, "malformed path", -1);
102 |     }
103 |     if (!json) {
104 |       cJSON_Delete(orig);
105 |       return;
106 |     }
107 |   }
108 | 
109 |   if (cJSON_IsArray(json) || cJSON_IsObject(json)) {
110 |     sqlite3_result_int(ctx, cJSON_GetArraySize(json));
111 |   } else {
112 |     sqlite3_result_int(ctx, 1);
113 |   }
114 |   cJSON_Delete(orig);
115 | }
116 | 
117 | #if SQLITE_VERSION_NUMBER >= 3024000
118 | /* MySQL style json_pretty() formatting */
119 | static _Bool json_pp_fancy(sqlite3_str *out, cJSON *json, int level) {
120 |   if (cJSON_IsArray(json)) {
121 |     cJSON *elem;
122 |     sqlite3_str_appendall(out, "[\n");
123 |     cJSON_ArrayForEach(elem, json) {
124 |       sqlite3_str_appendchar(out, level * 2 + 2, ' ');
125 |       if (!json_pp_fancy(out, elem, level + 1)) {
126 |         return 0;
127 |       }
128 |       if (elem->next) {
129 |         sqlite3_str_appendchar(out, 1, ',');
130 |       }
131 |       sqlite3_str_appendchar(out, 1, '\n');
132 |     }
133 |     sqlite3_str_appendchar(out, level * 2, ' ');
134 |     sqlite3_str_appendchar(out, 1, ']');
135 |   } else if (cJSON_IsObject(json)) {
136 |     cJSON *elem;
137 |     sqlite3_str_appendall(out, "{\n");
138 |     cJSON_ArrayForEach(elem, json) {
139 |       cJSON *key = cJSON_CreateString(elem->string);
140 |       if (!key) {
141 |         return 0;
142 |       }
143 |       char *keytext = cJSON_Print(key);
144 |       cJSON_Delete(key);
145 |       if (!keytext) {
146 |         return 0;
147 |       }
148 |       sqlite3_str_appendchar(out, level * 2 + 2, ' ');
149 |       sqlite3_str_appendf(out, "%s: ", keytext);
150 |       free(keytext);
151 |       if (!json_pp_fancy(out, elem, level + 1)) {
152 |         return 0;
153 |       }
154 |       if (elem->next) {
155 |         sqlite3_str_appendchar(out, 1, ',');
156 |       }
157 |       sqlite3_str_appendchar(out, 1, '\n');
158 |     }
159 |     sqlite3_str_appendchar(out, level * 2, ' ');
160 |     sqlite3_str_appendchar(out, 1, '}');
161 |   } else {
162 |     char *text = cJSON_Print(json);
163 |     if (text) {
164 |       sqlite3_str_appendall(out, text);
165 |       free(text);
166 |     } else {
167 |       return 0;
168 |     }
169 |   }
170 |   return 1;
171 | }
172 | #endif
173 | 
174 | static void json_pp(sqlite3_context *ctx, int nargs __attribute__((unused)),
175 |                     sqlite3_value **args) {
176 |   cJSON *json;
177 | 
178 |   if (sqlite3_value_type(args[0]) == SQLITE_NULL) {
179 |     return;
180 |   }
181 | 
182 |   json = cJSON_Parse((const char *)sqlite3_value_text(args[0]));
183 |   if (!json) {
184 |     sqlite3_result_error(ctx, "malformed JSON", -1);
185 |     return;
186 |   }
187 | 
188 | #if SQLITE_VERSION_NUMBER >= 3024000
189 | 
190 |   sqlite3_str *out = sqlite3_str_new(sqlite3_context_db_handle(ctx));
191 |   if (!out) {
192 |     sqlite3_result_error_nomem(ctx);
193 |     cJSON_Delete(json);
194 |     return;
195 |   }
196 |   if (json_pp_fancy(out, json, 0)) {
197 |     char *text = sqlite3_str_finish(out);
198 |     if (text) {
199 |       sqlite3_result_text(ctx, text, -1, sqlite3_free);
200 |       sqlite3_result_subtype(ctx, 'J');
201 |     }
202 |   } else {
203 |     char *text = sqlite3_str_finish(out);
204 |     sqlite3_free(text);
205 |   }
206 | 
207 | #else
208 | 
209 |   char *text = cJSON_Print(json);
210 |   if (text) {
211 |     sqlite3_result_text(ctx, text, -1, free);
212 |     sqlite3_result_subtype(ctx, 'J');
213 |   }
214 | 
215 | #endif
216 | 
217 |   cJSON_Delete(json);
218 | }
219 | 
220 | static void json_equal(sqlite3_context *ctx, int nargs __attribute__((unused)),
221 |                        sqlite3_value **args) {
222 |   if (sqlite3_value_type(args[0]) == SQLITE_NULL ||
223 |       sqlite3_value_type(args[1]) == SQLITE_NULL) {
224 |     return;
225 |   }
226 | 
227 |   cJSON *j1 = cJSON_Parse((const char *)sqlite3_value_text(args[0]));
228 |   if (!j1) {
229 |     sqlite3_result_error(ctx, "malformed JSON", -1);
230 |     return;
231 |   }
232 |   cJSON *j2 = cJSON_Parse((const char *)sqlite3_value_text(args[1]));
233 |   if (!j2) {
234 |     sqlite3_result_error(ctx, "malformed JSON", -1);
235 |     cJSON_Delete(j1);
236 |     return;
237 |   }
238 | 
239 |   sqlite3_result_int(ctx, cJSON_Compare(j1, j2, 1));
240 |   cJSON_Delete(j1);
241 |   cJSON_Delete(j2);
242 | }
243 | 
244 | static void json_keys(sqlite3_context *ctx, int nargs, sqlite3_value **args) {
245 |   if (sqlite3_value_type(args[0]) == SQLITE_NULL) {
246 |     return;
247 |   }
248 |   if (nargs == 2 && sqlite3_value_type(args[1]) == SQLITE_NULL) {
249 |     return;
250 |   }
251 | 
252 |   cJSON *orig, *json;
253 |   orig = json = cJSON_Parse((const char *)sqlite3_value_text(args[0]));
254 |   if (!json) {
255 |     sqlite3_result_error(ctx, "malformed JSON", -1);
256 |     return;
257 |   }
258 | 
259 |   if (nargs == 2) {
260 |     char *path =
261 |         sqlite3_mprintf("%s", (const char *)sqlite3_value_text(args[1]));
262 |     if (!path) {
263 |       sqlite3_result_error_nomem(ctx);
264 |       cJSON_Delete(orig);
265 |       return;
266 |     }
267 |     int error = 0;
268 |     json = json_find_path(json, path, &error);
269 |     sqlite3_free(path);
270 |     if (error) {
271 |       sqlite3_result_error(ctx, "malformed path", -1);
272 |     }
273 |     if (!json) {
274 |       cJSON_Delete(orig);
275 |       return;
276 |     }
277 |   }
278 | 
279 |   if (!cJSON_IsObject(json)) {
280 |     cJSON_Delete(orig);
281 |     return;
282 |   }
283 | 
284 |   cJSON *res = cJSON_CreateArray();
285 |   if (!res) {
286 |     cJSON_Delete(orig);
287 |     sqlite3_result_error_nomem(ctx);
288 |     return;
289 |   }
290 | 
291 |   cJSON *elem;
292 |   cJSON_ArrayForEach(elem, json) {
293 |     cJSON *key = cJSON_CreateString(elem->string);
294 |     if (!key) {
295 |       cJSON_Delete(res);
296 |       cJSON_Delete(orig);
297 |       sqlite3_result_error_nomem(ctx);
298 |       return;
299 |     }
300 | 
301 |     cJSON_AddItemToArray(res, key);
302 |   }
303 | 
304 |   sqlite3_result_text(ctx, cJSON_PrintUnformatted(res), -1, cJSON_free);
305 |   sqlite3_result_subtype(ctx, 'J');
306 |   cJSON_Delete(res);
307 |   cJSON_Delete(orig);
308 | }
309 | 
310 | static void json_contains_path(sqlite3_context *ctx, int nargs,
311 |                                sqlite3_value **args) {
312 |   if (sqlite3_value_type(args[0]) == SQLITE_NULL ||
313 |       sqlite3_value_type(args[1]) == SQLITE_NULL) {
314 |     return;
315 |   }
316 | 
317 |   cJSON *orig, *json;
318 |   orig = json = cJSON_Parse((const char *)sqlite3_value_text(args[0]));
319 |   if (!json) {
320 |     sqlite3_result_error(ctx, "malformed JSON", -1);
321 |     return;
322 |   }
323 | 
324 |   char *path = sqlite3_mprintf("%s", (const char *)sqlite3_value_text(args[1]));
325 |   if (!path) {
326 |     sqlite3_result_error_nomem(ctx);
327 |     cJSON_Delete(orig);
328 |     return;
329 |   }
330 |   int error = 0;
331 |   json = json_find_path(json, path, &error);
332 |   if (error) {
333 |     sqlite3_result_error(ctx, "malformed path", -1);
334 |     cJSON_Delete(orig);
335 |     return;
336 |   }
337 |   sqlite3_free(path);
338 |   sqlite3_result_int(ctx, !!json);
339 |   cJSON_Delete(orig);
340 | }
341 | 
342 | static void *my_cjson_malloc(size_t sz) {
343 |   return sqlite3_malloc64(sz);
344 | }
345 | 
346 | #ifdef _WIN32
347 | __declspec(export)
348 | #endif
349 |     int sqlite3_jsonfuncs_init(sqlite3 *db,
350 |                                char **pzErrMsg __attribute__((unused)),
351 |                                const sqlite3_api_routines *pApi) {
352 |   SQLITE_EXTENSION_INIT2(pApi);
353 |   struct json_func {
354 |     const char *name;
355 |     int args;
356 |     void (*fun)(sqlite3_context *, int, sqlite3_value **);
357 |   } func_table[] = {{"json_pretty", 1, json_pp},
358 |                     {"json_length", 1, json_length},
359 |                     {"json_length", 2, json_length},
360 |                     {"json_equal", 2, json_equal},
361 |                     {"json_keys", 1, json_keys},
362 |                     {"json_keys", 2, json_keys},
363 |                     {"json_contains_path", 2, json_contains_path},
364 |                     {NULL, 0, NULL}};
365 |   cJSON_Hooks memhooks = { my_cjson_malloc, sqlite3_free };
366 |   cJSON_InitHooks(&memhooks);
367 |   for (int i = 0; func_table[i].name; i += 1) {
368 |     int r = sqlite3_create_function(db, func_table[i].name, func_table[i].args,
369 |                                     SQLITE_UTF8 | SQLITE_DETERMINISTIC, NULL,
370 |                                     func_table[i].fun, NULL, NULL);
371 |     if (r != SQLITE_OK) {
372 |       return r;
373 |     }
374 |   }
375 |   return SQLITE_OK;
376 | }
377 | 


--------------------------------------------------------------------------------
/src/more_str_funcs.c:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2018-2019 Shawn Wagner
  3 | 
  4 | Permission is hereby granted, free of charge, to any person obtaining
  5 | a copy of this software and associated documentation files (the
  6 | "Software"), to deal in the Software without restriction, including
  7 | without limitation the rights to use, copy, modify, merge, publish,
  8 | distribute, sublicense, and/or sell copies of the Software, and to
  9 | permit persons to whom the Software is furnished to do so, subject to
 10 | the following conditions:
 11 | 
 12 | The above copyright notice and this permission notice shall be
 13 | included in all copies or substantial portions of the Software.
 14 | 
 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 19 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 20 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 21 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 22 | */
 23 | 
 24 | /* Additional string functions that don't involve unicode character
 25 |    properties or the like. */
 26 | 
 27 | #define _XOPEN_SOURCE
 28 | 
 29 | #include "config.h"
 30 | 
 31 | #include <assert.h>
 32 | #include <stdlib.h>
 33 | #include <string.h>
 34 | #include <time.h>
 35 | 
 36 | #include <sqlite3ext.h>
 37 | SQLITE_EXTENSION_INIT3
 38 | 
 39 | static char *do_append(sqlite3_context *p, char *zOut, sqlite3_uint64 *nOut,
 40 |                        const char *zApp, int nApp) {
 41 |   sqlite3_int64 newLen = *nOut + nApp;
 42 | 
 43 |   if (nApp == 0) {
 44 |     return zOut;
 45 |   }
 46 | 
 47 |   char *zNew = sqlite3_realloc64(zOut, newLen);
 48 |   if (!zNew) {
 49 |     sqlite3_free(zOut);
 50 |     sqlite3_result_error_nomem(p);
 51 |     return NULL;
 52 |   }
 53 | 
 54 |   memcpy(zNew + *nOut, zApp, nApp);
 55 |   *nOut = newLen;
 56 |   return zNew;
 57 | }
 58 | 
 59 | static void sf_concat(sqlite3_context *p, int nArg, sqlite3_value **apArg) {
 60 |   char *zOut = NULL;
 61 |   sqlite3_uint64 nOut = 0;
 62 |   _Bool empty = 0;
 63 |   // MySQL concat() returns NULL if given any NULL arguments, Postgres
 64 |   // just ignores NULLS.
 65 |   _Bool mysql_style = sqlite3_user_data(p);
 66 | 
 67 |   for (int n = 0; n < nArg; n += 1) {
 68 |     if (sqlite3_value_type(apArg[n]) == SQLITE_NULL) {
 69 |       if (mysql_style) {
 70 |         sqlite3_free(zOut);
 71 |         return;
 72 |       }
 73 |       continue;
 74 |     }
 75 | 
 76 |     const char *zArg = (const char *)sqlite3_value_text(apArg[n]);
 77 |     int arglen = sqlite3_value_bytes(apArg[n]);
 78 | 
 79 |     if (arglen == 0) {
 80 |       empty = 1;
 81 |       continue;
 82 |     }
 83 | 
 84 |     zOut = do_append(p, zOut, &nOut, zArg, arglen);
 85 |     if (!zOut) {
 86 |       return;
 87 |     }
 88 |   }
 89 | 
 90 |   if (zOut) {
 91 |     sqlite3_result_text64(p, zOut, nOut, sqlite3_free, SQLITE_UTF8);
 92 |   } else if (empty) {
 93 |     sqlite3_result_text(p, "", 0, SQLITE_STATIC);
 94 |   }
 95 | }
 96 | 
 97 | static void sf_concat_ws(sqlite3_context *p, int nArg, sqlite3_value **apArg) {
 98 |   if (nArg <= 1) {
 99 |     return;
100 |   }
101 | 
102 |   if (sqlite3_value_type(apArg[0]) == SQLITE_NULL) {
103 |     return;
104 |   }
105 | 
106 |   const char *zSep = (const char *)sqlite3_value_text(apArg[0]);
107 |   int nSep = sqlite3_value_bytes(apArg[0]);
108 | 
109 |   char *zOut = NULL;
110 |   sqlite3_uint64 nOut = 0;
111 | 
112 |   for (int n = 1; n < nArg; n += 1) {
113 |     if (sqlite3_value_type(apArg[n]) == SQLITE_NULL) {
114 |       continue;
115 |     }
116 | 
117 |     const char *zArg = (const char *)sqlite3_value_text(apArg[n]);
118 |     int arglen = sqlite3_value_bytes(apArg[n]);
119 | 
120 |     if (zOut) {
121 |       zOut = do_append(p, zOut, &nOut, zSep, nSep);
122 |       if (!zOut) {
123 |         return;
124 |       }
125 |       zOut = do_append(p, zOut, &nOut, zArg, arglen);
126 |       if (!zOut) {
127 |         return;
128 |       }
129 |     } else {
130 |       if (arglen > 0) {
131 |         zOut = sqlite3_malloc(arglen);
132 |         if (!zOut) {
133 |           sqlite3_result_error_nomem(p);
134 |           return;
135 |         }
136 |         memcpy(zOut, zArg, arglen);
137 |         nOut = arglen;
138 |       } else {
139 |         zOut = sqlite3_malloc(1);
140 |         if (!zOut) {
141 |           sqlite3_result_error_nomem(p);
142 |           return;
143 |         }
144 |         *zOut = 0;
145 |       }
146 |     }
147 |   }
148 | 
149 |   if (zOut) {
150 |     sqlite3_result_text64(p, zOut, nOut, sqlite3_free, SQLITE_UTF8);
151 |   }
152 | }
153 | 
154 | static void sf_repeat8(sqlite3_context *p, int nArg __attribute__((unused)),
155 |                        sqlite3_value **apArg) {
156 |   assert(nArg == 2);
157 |   if (sqlite3_value_type(apArg[0]) == SQLITE_NULL ||
158 |       sqlite3_value_type(apArg[1]) == SQLITE_NULL) {
159 |     return;
160 |   }
161 | 
162 |   const unsigned char *t = sqlite3_value_text(apArg[0]);
163 |   int tlen = sqlite3_value_bytes(apArg[0]);
164 |   int reps = sqlite3_value_int(apArg[1]);
165 | 
166 |   if (reps <= 0) {
167 |     return;
168 |   }
169 | 
170 |   sqlite3_uint64 olen = (sqlite3_uint64)reps * tlen;
171 |   unsigned char *output = sqlite3_malloc64(olen);
172 |   if (!output) {
173 |     sqlite3_result_error_nomem(p);
174 |     return;
175 |   }
176 | 
177 |   size_t off = 0;
178 |   while (reps--) {
179 |     memcpy(output + off, t, tlen);
180 |     off += tlen;
181 |   }
182 |   sqlite3_result_text64(p, (const char *)output, olen, sqlite3_free,
183 |                         SQLITE_UTF8);
184 | }
185 | 
186 | static void sf_strptime(sqlite3_context *ctx,
187 |                         int nArg __attribute__((__unused__)),
188 |                         sqlite3_value **apArg) {
189 |   assert(nArg == 2);
190 | 
191 | #ifdef HAVE_STRPTIME
192 |   if (sqlite3_value_type(apArg[0]) == SQLITE_NULL ||
193 |       sqlite3_value_type(apArg[1]) == SQLITE_NULL) {
194 |     return;
195 |   }
196 | 
197 |   const char *fmt = (const char *)sqlite3_value_text(apArg[0]);
198 |   const char *s = (const char *)sqlite3_value_text(apArg[1]);
199 | 
200 |   if (!fmt || !s) {
201 |     return;
202 |   }
203 | 
204 |   struct tm t;
205 |   memset(&t, 0, sizeof t);
206 |   char *end = strptime(s, fmt, &t);
207 |   if (!end || *end) {
208 |     return;
209 |   }
210 | 
211 |   time_t secs = mktime(&t);
212 |   if (secs != (time_t)-1) {
213 |     sqlite3_result_int64(ctx, (sqlite3_int64)secs);
214 |   }
215 | #endif
216 | }
217 | 
218 | int sf_more_init(sqlite3 *db) {
219 |   const struct Scalar {
220 |     const char *zName;  /* Function name */
221 |     int nArg;           /* Number of arguments */
222 |     unsigned short enc; /* Optimal text encoding */
223 |     void *iContext;     /* sqlite3_user_data() context */
224 |     void (*xFunc)(sqlite3_context *, int, sqlite3_value **);
225 |   } scalars[] = {
226 |       {"concat", -1, SQLITE_UTF8 | SQLITE_DETERMINISTIC, NULL, sf_concat},
227 |       {"mysql_concat", -1, SQLITE_UTF8 | SQLITE_DETERMINISTIC, (void *)1,
228 |        sf_concat},
229 |       {"concat_ws", -1, SQLITE_UTF8 | SQLITE_DETERMINISTIC, NULL, sf_concat_ws},
230 |       {"repeat", 2, SQLITE_UTF8 | SQLITE_DETERMINISTIC, NULL, sf_repeat8},
231 |       {"strptime", 2, SQLITE_UTF8 | SQLITE_DETERMINISTIC, NULL, sf_strptime},
232 |   };
233 |   int rc = SQLITE_OK;
234 | 
235 |   for (int i = 0;
236 |        rc == SQLITE_OK && i < (int)(sizeof(scalars) / sizeof(scalars[0]));
237 |        i++) {
238 |     const struct Scalar *p = &scalars[i];
239 |     rc = sqlite3_create_function(db, p->zName, p->nArg, p->enc, p->iContext,
240 |                                  p->xFunc, 0, 0);
241 |   }
242 | 
243 |   return rc;
244 | }
245 | 


--------------------------------------------------------------------------------
/src/passwd.c:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2018 Shawn Wagner
  3 | 
  4 | Permission is hereby granted, free of charge, to any person obtaining
  5 | a copy of this software and associated documentation files (the
  6 | "Software"), to deal in the Software without restriction, including
  7 | without limitation the rights to use, copy, modify, merge, publish,
  8 | distribute, sublicense, and/or sell copies of the Software, and to
  9 | permit persons to whom the Software is furnished to do so, subject to
 10 | the following conditions:
 11 | 
 12 | The above copyright notice and this permission notice shall be
 13 | included in all copies or substantial portions of the Software.
 14 | 
 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 19 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 20 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 21 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 22 | */
 23 | 
 24 | #include <errno.h>
 25 | #include <pwd.h>
 26 | #include <stdlib.h>
 27 | #include <string.h>
 28 | #include <sys/types.h>
 29 | #include <unistd.h>
 30 | 
 31 | #include <sqlite3ext.h>
 32 | SQLITE_EXTENSION_INIT3
 33 | 
 34 | #include "nss_tables.h"
 35 | 
 36 | /* /etc/passwd table */
 37 | 
 38 | struct passwd_vtab {
 39 |   sqlite3_vtab vtab;
 40 | };
 41 | 
 42 | struct passwd_cursor {
 43 |   sqlite3_vtab *vtab;
 44 |   struct passwd *pw;
 45 |   struct passwd pwstorage;
 46 |   char *buf;
 47 |   int bufsize;
 48 |   _Bool specific;
 49 | };
 50 | 
 51 | static int passwd_connect(sqlite3 *db, void *pAux __attribute__((unused)),
 52 |                           int argc __attribute__((unused)),
 53 |                           const char *const *argv __attribute__((unused)),
 54 |                           sqlite3_vtab **ppVTab, char **pzErr) {
 55 |   int status;
 56 |   status = sqlite3_declare_vtab(
 57 |       db, "CREATE TABLE etc_passwd(name TEXT NOT NULL, password TEXT, uid INTEGER "
 58 |           "NOT NULL, gid INTEGER NOT NULL, gecos TEXT, homedir "
 59 |           "TEXT, shell TEXT)");
 60 |   if (status != SQLITE_OK) {
 61 |     *pzErr = sqlite3_mprintf("%s", sqlite3_errstr(status));
 62 |     return SQLITE_ERROR;
 63 |   }
 64 | 
 65 |   *ppVTab = sqlite3_malloc(sizeof(struct passwd_vtab));
 66 |   (*ppVTab)->pModule = &passwd_funcs;
 67 |   (*ppVTab)->nRef = 0;
 68 |   (*ppVTab)->zErrMsg = NULL;
 69 |   return SQLITE_OK;
 70 | }
 71 | 
 72 | static int passwd_bestindex(sqlite3_vtab *tab __attribute__((unused)),
 73 |                             sqlite3_index_info *info) {
 74 |   info->idxNum = 0;
 75 |   for (int n = 0; n < info->nConstraint; n += 1) {
 76 |     if (info->aConstraint[n].usable == 0) {
 77 |       continue;
 78 |     }
 79 |     if (info->aConstraint[n].iColumn == -1 ||
 80 |         info->aConstraint[n].iColumn == 2) {
 81 |       if (info->aConstraint[n].op == SQLITE_INDEX_CONSTRAINT_EQ) {
 82 |         info->idxNum |= 1;
 83 |         info->aConstraintUsage[n].argvIndex = 1;
 84 |         info->aConstraintUsage[n].omit = 1;
 85 |       }
 86 |     } else if (info->aConstraint[n].iColumn == 0) {
 87 |       if (info->aConstraint[n].op == SQLITE_INDEX_CONSTRAINT_EQ) {
 88 |         info->idxNum |= 2;
 89 |         info->aConstraintUsage[n].argvIndex = 1;
 90 |         info->aConstraintUsage[n].omit = 1;
 91 |       }
 92 |     }
 93 |   }
 94 |   if (info->idxNum) {
 95 |     info->estimatedCost = 10;
 96 |     info->estimatedRows = 1;
 97 |     info->idxFlags = SQLITE_INDEX_SCAN_UNIQUE;
 98 |   } else {
 99 |     info->estimatedCost = 200;
100 |   }
101 |   return SQLITE_OK;
102 | }
103 | 
104 | static int passwd_disconnect(sqlite3_vtab *tab) {
105 |   sqlite3_free(tab);
106 |   return SQLITE_OK;
107 | }
108 | 
109 | static int passwd_open(sqlite3_vtab *tab, sqlite3_vtab_cursor **curs) {
110 |   struct passwd_cursor *c = sqlite3_malloc(sizeof(struct passwd_cursor));
111 |   if (!c) {
112 |     if (tab->zErrMsg) {
113 |       sqlite3_free(tab->zErrMsg);
114 |     }
115 |     tab->zErrMsg = sqlite3_mprintf("Out of memory");
116 |     return SQLITE_NOMEM;
117 |   }
118 |   c->vtab = tab;
119 | 
120 |   c->bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
121 |   if (c->bufsize <= 0) {
122 |     c->bufsize = 4096;
123 |   }
124 |   c->buf = sqlite3_malloc(c->bufsize);
125 |   if (!c->buf) {
126 |     if (tab->zErrMsg) {
127 |       sqlite3_free(tab->zErrMsg);
128 |     }
129 |     sqlite3_free(c);
130 |     tab->zErrMsg = sqlite3_mprintf("Out of memory");
131 |     return SQLITE_NOMEM;
132 |   }
133 | 
134 |   *curs = (sqlite3_vtab_cursor *)c;
135 |   return SQLITE_OK;
136 | }
137 | 
138 | static int passwd_close(sqlite3_vtab_cursor *vc) {
139 |   struct passwd_cursor *c = (struct passwd_cursor *)vc;
140 |   if (!c->specific) {
141 |     endpwent();
142 |   }
143 |   sqlite3_free(c->buf);
144 |   sqlite3_free(c);
145 |   return SQLITE_OK;
146 | }
147 | 
148 | int passwd_eof(sqlite3_vtab_cursor *vc) {
149 |   struct passwd_cursor *c = (struct passwd_cursor *)vc;
150 |   return c->pw == NULL;
151 | }
152 | 
153 | static int passwd_filter(sqlite3_vtab_cursor *vc, int idxNum,
154 |                          const char *idxStr __attribute__((unused)),
155 |                          int argc __attribute__((unused)),
156 |                          sqlite3_value **argv) {
157 |   struct passwd_cursor *c = (struct passwd_cursor *)vc;
158 | 
159 |   if (idxNum == 3) {
160 |     if (c->vtab->zErrMsg) {
161 |       sqlite3_free(c->vtab->zErrMsg);
162 |     }
163 |     c->vtab->zErrMsg = sqlite3_mprintf("cannot search for a uid AND username");
164 |     return SQLITE_CONSTRAINT_VTAB;
165 |   }
166 | 
167 |   if (idxNum == 1) {
168 |     uid_t uid = sqlite3_value_int(argv[0]);
169 |     c->specific = 1;
170 |     while (1) {
171 |       if (getpwuid_r(uid, &(c->pwstorage), c->buf, c->bufsize, &(c->pw)) < 0) {
172 |         if (errno == ERANGE) {
173 |           c->buf = sqlite3_realloc(c->buf, c->bufsize * 2);
174 |           c->bufsize *= 2;
175 |         } else if (errno == ENOENT || errno == ESRCH) {
176 |           break;
177 |         } else {
178 |           if (c->vtab->zErrMsg) {
179 |             sqlite3_free(c->vtab->zErrMsg);
180 |           }
181 |           c->vtab->zErrMsg = sqlite3_mprintf("getpwuid_r: %s", strerror(errno));
182 |           return SQLITE_ERROR;
183 |         }
184 |       } else {
185 |         break;
186 |       }
187 |     }
188 |     return SQLITE_OK;
189 |   }
190 | 
191 |   if (idxNum == 2) {
192 |     const char *username = (const char *)sqlite3_value_text(argv[0]);
193 |     c->specific = 1;
194 |     while (1) {
195 |       if (getpwnam_r(username, &(c->pwstorage), c->buf, c->bufsize, &(c->pw)) <
196 |           0) {
197 |         if (errno == ERANGE) {
198 |           c->buf = sqlite3_realloc(c->buf, c->bufsize * 2);
199 |           c->bufsize *= 2;
200 |         } else if (errno == ENOENT || errno == ESRCH) {
201 |           break;
202 |         } else {
203 |           if (c->vtab->zErrMsg) {
204 |             sqlite3_free(c->vtab->zErrMsg);
205 |           }
206 |           c->vtab->zErrMsg = sqlite3_mprintf("getpwnam_r: %s", strerror(errno));
207 |           return SQLITE_ERROR;
208 |         }
209 |       } else {
210 |         break;
211 |       }
212 |     }
213 |     return SQLITE_OK;
214 |   }
215 | 
216 |   setpwent();
217 |   c->specific = 0;
218 |   while (1) {
219 |     if (getpwent_r(&(c->pwstorage), c->buf, c->bufsize, &(c->pw)) < 0) {
220 |       if (errno == ERANGE) {
221 |         c->buf = sqlite3_realloc(c->buf, c->bufsize * 2);
222 |         c->bufsize *= 2;
223 |       } else if (errno == ENOENT || errno == ESRCH) {
224 |         break;
225 |       } else {
226 |         if (c->vtab->zErrMsg) {
227 |           sqlite3_free(c->vtab->zErrMsg);
228 |         }
229 |         c->vtab->zErrMsg = sqlite3_mprintf("getpwent_r: %s", strerror(errno));
230 |         return SQLITE_ERROR;
231 |       }
232 |     } else {
233 |       break;
234 |     }
235 |   }
236 |   return SQLITE_OK;
237 | }
238 | 
239 | static int passwd_next(sqlite3_vtab_cursor *vc) {
240 |   struct passwd_cursor *c = (struct passwd_cursor *)vc;
241 | 
242 |   if (c->specific) {
243 |     c->pw = NULL;
244 |     return SQLITE_OK;
245 |   }
246 | 
247 |   while (1) {
248 |     if (getpwent_r(&(c->pwstorage), c->buf, c->bufsize, &(c->pw)) < 0) {
249 |       if (errno == ERANGE) {
250 |         c->buf = sqlite3_realloc(c->buf, c->bufsize * 2);
251 |         c->bufsize *= 2;
252 |       } else {
253 |         if (c->vtab->zErrMsg) {
254 |           sqlite3_free(c->vtab->zErrMsg);
255 |         }
256 |         c->vtab->zErrMsg = sqlite3_mprintf("getpwent_r: %s", strerror(errno));
257 |         return SQLITE_ERROR;
258 |       }
259 |     } else {
260 |       break;
261 |     }
262 |   }
263 |   return SQLITE_OK;
264 | }
265 | 
266 | static int passwd_column(sqlite3_vtab_cursor *vc, sqlite3_context *p, int n) {
267 |   struct passwd_cursor *c = (struct passwd_cursor *)vc;
268 |   switch (n) {
269 |   case 0:
270 |     sqlite3_result_text(p, c->pw->pw_name, -1, SQLITE_TRANSIENT);
271 |     break;
272 |   case 1:
273 |     if (c->pw->pw_passwd) {
274 |       sqlite3_result_text(p, c->pw->pw_passwd, -1, SQLITE_TRANSIENT);
275 |     } else {
276 |       sqlite3_result_null(p);
277 |     }
278 |     break;
279 |   case 2:
280 |     sqlite3_result_int(p, c->pw->pw_uid);
281 |     break;
282 |   case 3:
283 |     sqlite3_result_int(p, c->pw->pw_gid);
284 |     break;
285 |   case 4:
286 |     if (c->pw->pw_gecos) {
287 |       sqlite3_result_text(p, c->pw->pw_gecos, -1, SQLITE_TRANSIENT);
288 |     } else {
289 |       sqlite3_result_null(p);
290 |     }
291 |     break;
292 |   case 5:
293 |     if (c->pw->pw_dir) {
294 |       sqlite3_result_text(p, c->pw->pw_dir, -1, SQLITE_TRANSIENT);
295 |     } else {
296 |       sqlite3_result_null(p);
297 |     }
298 |     break;
299 |   case 6:
300 |     if (c->pw->pw_shell) {
301 |       sqlite3_result_text(p, c->pw->pw_shell, -1, SQLITE_TRANSIENT);
302 |     } else {
303 |       sqlite3_result_null(p);
304 |     }
305 |     break;
306 |   default:
307 |     if (c->vtab->zErrMsg) {
308 |       sqlite3_free(c->vtab->zErrMsg);
309 |     }
310 |     c->vtab->zErrMsg = sqlite3_mprintf("Column out of range");
311 |     return SQLITE_RANGE;
312 |   }
313 |   return SQLITE_OK;
314 | }
315 | 
316 | static int passwd_rowid(sqlite3_vtab_cursor *vc, sqlite3_int64 *pRowId) {
317 |   struct passwd_cursor *c = (struct passwd_cursor *)vc;
318 |   *pRowId = c->pw->pw_uid;
319 |   return SQLITE_OK;
320 | }
321 | 
322 | static int passwd_rename(sqlite3_vtab *tab __attribute__((unused)),
323 |                          const char *newname __attribute__((unused))) {
324 |   return SQLITE_OK;
325 | }
326 | 
327 | struct sqlite3_module passwd_funcs = {1,
328 |                                       passwd_connect,
329 |                                       passwd_connect,
330 |                                       passwd_bestindex,
331 |                                       passwd_disconnect,
332 |                                       passwd_disconnect,
333 |                                       passwd_open,
334 |                                       passwd_close,
335 |                                       passwd_filter,
336 |                                       passwd_next,
337 |                                       passwd_eof,
338 |                                       passwd_column,
339 |                                       passwd_rowid,
340 |                                       NULL,
341 |                                       NULL,
342 |                                       NULL,
343 |                                       NULL,
344 |                                       NULL,
345 |                                       NULL,
346 |                                       passwd_rename,
347 |                                       NULL,
348 |                                       NULL,
349 |                                       NULL};
350 | 


--------------------------------------------------------------------------------
/src/pcre2_funcs.c:
--------------------------------------------------------------------------------
  1 | /* PCRE2 regexp support */
  2 | 
  3 | #include <stdio.h>
  4 | 
  5 | #include "config.h"
  6 | #include <sqlite3ext.h>
  7 | #include <stdbool.h>
  8 | 
  9 | #if defined(HAVE_PCRE2_8) || defined(HAVE_PCRE2_16)
 10 | #define PCRE2_CODE_UNIT_WIDTH 0
 11 | #include <pcre2.h>
 12 | #endif
 13 | 
 14 | SQLITE_EXTENSION_INIT1
 15 | 
 16 | #ifdef HAVE_PCRE2_8
 17 | 
 18 | struct re_cache8 {
 19 |   pcre2_code_8 *re;
 20 |   pcre2_match_data_8 *md;
 21 | };
 22 | 
 23 | static void re_delete8(void *v) {
 24 |   struct re_cache8 *c = v;
 25 |   pcre2_code_free_8(c->re);
 26 |   pcre2_match_data_free_8(c->md);
 27 |   sqlite3_free(c);
 28 | }
 29 | 
 30 | void re_regexp8(sqlite3_context *ctx, int nargs __attribute__((unused)),
 31 |                 sqlite3_value **args) {
 32 |   if (sqlite3_value_type(args[0]) == SQLITE_NULL ||
 33 |       sqlite3_value_type(args[1]) == SQLITE_NULL) {
 34 |     return;
 35 |   }
 36 | 
 37 |   struct re_cache8 *c = sqlite3_get_auxdata(ctx, 0);
 38 |   if (!c) {
 39 |     int errcode;
 40 |     PCRE2_SIZE erroffset;
 41 |     c = sqlite3_malloc(sizeof *c);
 42 |     if (!c) {
 43 |       sqlite3_result_error_nomem(ctx);
 44 |       return;
 45 |     }
 46 |     c->re = pcre2_compile_8(
 47 |         sqlite3_value_text(args[0]), sqlite3_value_bytes(args[0]),
 48 |         PCRE2_ANCHORED | PCRE2_ENDANCHORED | PCRE2_UTF | PCRE2_UCP, &errcode,
 49 |         &erroffset, NULL);
 50 |     if (!c->re) {
 51 |       PCRE2_UCHAR8 errstr[120];
 52 |       sqlite3_free(c);
 53 |       pcre2_get_error_message_8(errcode, errstr, sizeof errstr);
 54 |       sqlite3_result_error(ctx, (const char *)&errstr, -1);
 55 |       return;
 56 |     }
 57 |     pcre2_jit_compile_8(c->re, PCRE2_JIT_COMPLETE);
 58 |     c->md = pcre2_match_data_create_from_pattern_8(c->re, NULL);
 59 |     sqlite3_set_auxdata(ctx, 0, c, re_delete8);
 60 |   }
 61 | 
 62 |   int m = pcre2_match_8(c->re, sqlite3_value_text(args[1]),
 63 |                         sqlite3_value_bytes(args[1]), 0, 0, c->md, NULL);
 64 |   sqlite3_result_int(ctx, m >= 0);
 65 | }
 66 | 
 67 | void re_version8(sqlite3_context *ctx, int nargs __attribute__((unused)),
 68 |                  sqlite3_value **args __attribute__((unused))) {
 69 |   PCRE2_UCHAR8 vers[24];
 70 |   pcre2_config_8(PCRE2_CONFIG_VERSION, vers);
 71 |   sqlite3_result_text(ctx, (const char *)&vers, -1, SQLITE_TRANSIENT);
 72 | }
 73 | 
 74 | void re_unicode8(sqlite3_context *ctx, int nargs __attribute__((unused)),
 75 |                  sqlite3_value **args __attribute__((unused))) {
 76 |   PCRE2_UCHAR8 vers[24];
 77 |   pcre2_config_8(PCRE2_CONFIG_UNICODE_VERSION, vers);
 78 |   sqlite3_result_text(ctx, (const char *)&vers, -1, SQLITE_TRANSIENT);
 79 | }
 80 | #endif
 81 | 
 82 | #ifdef HAVE_PCRE2_16
 83 | 
 84 | struct re_cache16 {
 85 |   pcre2_code_16 *re;
 86 |   pcre2_match_data_16 *md;
 87 | };
 88 | 
 89 | static void re_delete16(void *v) {
 90 |   struct re_cache16 *c = v;
 91 |   pcre2_code_free_16(c->re);
 92 |   pcre2_match_data_free_16(c->md);
 93 |   sqlite3_free(c);
 94 | }
 95 | 
 96 | void re_regexp16(sqlite3_context *ctx, int nargs __attribute__((unused)),
 97 |                  sqlite3_value **args) {
 98 |   if (sqlite3_value_type(args[0]) == SQLITE_NULL ||
 99 |       sqlite3_value_type(args[1]) == SQLITE_NULL) {
100 |     return;
101 |   }
102 | 
103 |   struct re_cache16 *c = sqlite3_get_auxdata(ctx, 0);
104 |   if (!c) {
105 |     int errcode;
106 |     PCRE2_SIZE erroffset;
107 |     c = sqlite3_malloc(sizeof *c);
108 |     if (!c) {
109 |       sqlite3_result_error_nomem(ctx);
110 |       return;
111 |     }
112 | 
113 |     c->re = pcre2_compile_16(
114 |         sqlite3_value_text16(args[0]), sqlite3_value_bytes16(args[0]) / 2,
115 |         PCRE2_ANCHORED | PCRE2_ENDANCHORED | PCRE2_UTF | PCRE2_UCP, &errcode,
116 |         &erroffset, NULL);
117 |     if (!c->re) {
118 |       PCRE2_UCHAR16 errstr[120];
119 |       sqlite3_free(c);
120 |       pcre2_get_error_message_16(errcode, errstr, sizeof errstr / 2);
121 |       sqlite3_result_error16(ctx, errstr, -1);
122 |       return;
123 |     }
124 |     pcre2_jit_compile_16(c->re, PCRE2_JIT_COMPLETE);
125 |     c->md = pcre2_match_data_create_from_pattern_16(c->re, NULL);
126 |     sqlite3_set_auxdata(ctx, 0, c, re_delete16);
127 |   }
128 | 
129 |   int m = pcre2_match_16(c->re, sqlite3_value_text16(args[1]),
130 |                          sqlite3_value_bytes16(args[1]) / 2, 0, 0, c->md, NULL);
131 |   sqlite3_result_int(ctx, m >= 0);
132 | }
133 | 
134 | void re_version16(sqlite3_context *ctx, int nargs __attribute__((unused)),
135 |                   sqlite3_value **args __attribute__((unused))) {
136 |   PCRE2_UCHAR16 vers[24];
137 |   pcre2_config_16(PCRE2_CONFIG_VERSION, vers);
138 |   sqlite3_result_text16(ctx, vers, -1, SQLITE_TRANSIENT);
139 | }
140 | 
141 | void re_unicode16(sqlite3_context *ctx, int nargs __attribute__((unused)),
142 |                  sqlite3_value **args __attribute__((unused))) {
143 |   PCRE2_UCHAR16 vers[24];
144 |   pcre2_config_16(PCRE2_CONFIG_UNICODE_VERSION, vers);
145 |   sqlite3_result_text16(ctx, vers, -1, SQLITE_TRANSIENT);
146 | }
147 | 
148 | #endif
149 | 
150 | #ifdef _WIN32
151 | __declspec(export)
152 | #endif
153 |     int sqlite3_pcrefuncs_init(sqlite3 *db,
154 |                                char **pzErrMsg __attribute__((unused)),
155 |                                const sqlite3_api_routines *pApi) {
156 |   SQLITE_EXTENSION_INIT2(pApi);
157 | 
158 |   struct re_funcs {
159 |     const char *name;
160 |     int nargs;
161 |     bool utf8;
162 |     void (*fp)(sqlite3_context *, int, sqlite3_value **);
163 |   } func_table[] = {
164 | #ifdef HAVE_PCRE2_8
165 |       {"pcre_version", 0, 1, re_version8},
166 |       {"pcre_unicode_version", 0, 1, re_unicode8},
167 |       {"regexp", 2, 1, re_regexp8},
168 |       {"pcre_regexp", 2, 1, re_regexp8},
169 | #endif
170 | #ifdef HAVEPCRE2_16
171 |       {"pcre_version", 0, 0, re_version16},
172 |       {"pcre_unicode_version", 0, 0, re_unicode16},
173 |       {"regexp", 2, 0, re_regexp16},
174 |       {"pcre_regexp", 2, 0, re_regexp16},
175 | #endif
176 |       {NULL, 0, 0, NULL}};
177 |   int rc = SQLITE_OK;
178 |   for (int n = 0; func_table[n].name; n += 1) {
179 |     rc = sqlite3_create_function(
180 |         db, func_table[n].name, func_table[n].nargs,
181 |         SQLITE_DETERMINISTIC |
182 |             (func_table[n].utf8 ? SQLITE_UTF8 : SQLITE_UTF16),
183 |         NULL, func_table[n].fp, NULL, NULL);
184 |     if (rc != SQLITE_OK) {
185 |       return rc;
186 |     }
187 |   }
188 |   return SQLITE_OK;
189 | }
190 | 


--------------------------------------------------------------------------------
/src/posix_re_funcs.c:
--------------------------------------------------------------------------------
 1 | #include "config.h"
 2 | #include <stdlib.h>
 3 | #include <sys/types.h>
 4 | #include <regex.h>
 5 | #include "sqlite3.h"
 6 | #include "sqlite3ext.h"
 7 | 
 8 | SQLITE_EXTENSION_INIT1
 9 | 
10 | struct posix_re_cache {
11 |   regex_t re;
12 | };
13 | 
14 | static void posix_re_delete(void *v) {
15 |   struct posix_re_cache *re = v;
16 |   regfree(&re->re);
17 | }
18 | 
19 | static void posix_regexp(sqlite3_context *ctx, sqlite3_value **args, int cflags) {
20 |   if (sqlite3_value_type(args[0]) == SQLITE_NULL ||
21 |       sqlite3_value_type(args[1]) == SQLITE_NULL) {
22 |     return;
23 |   }
24 | 
25 |   struct posix_re_cache *c = sqlite3_get_auxdata(ctx, 0);
26 |   if (!c) {
27 |     c = sqlite3_malloc(sizeof *c);
28 |     if (!c) {
29 |       sqlite3_result_error_nomem(ctx);
30 |       return;
31 |     }
32 |     const char *regex = (const char *)sqlite3_value_text(args[0]);
33 |     int err = regcomp(&c->re, regex, cflags | REG_NOSUB);
34 |     if (err != 0) {
35 |       char errbuff[512];
36 |       regerror(err, &c->re, errbuff, sizeof errbuff);
37 |       sqlite3_result_error(ctx, errbuff, -1);
38 |       sqlite3_free(c);
39 |       return;
40 |     }
41 |     sqlite3_set_auxdata(ctx, 0, c, posix_re_delete);
42 |   }
43 | 
44 |   const char *str = (const char *)sqlite3_value_text(args[1]);
45 |   int rc = regexec(&c->re, str, 0, NULL, 0);
46 |   if (rc == 0 || rc == REG_NOMATCH) {
47 |     sqlite3_result_int(ctx, rc == 0);
48 |   } else {
49 |     char errbuff[512];
50 |     regerror(rc, &c->re, errbuff, sizeof errbuff);
51 |     sqlite3_result_error(ctx, errbuff, -1);
52 |   }
53 | }
54 | 
55 | 
56 | static void ere_func(sqlite3_context *ctx, int nargs __attribute__((unused)),
57 |                        sqlite3_value **args) {
58 |   posix_regexp(ctx, args, REG_EXTENDED);
59 | }
60 | 
61 | static void bre_func(sqlite3_context *ctx, int nargs __attribute__((unused)),
62 |                        sqlite3_value **args) {
63 |   posix_regexp(ctx, args, 0);
64 | }
65 | 
66 | #ifdef _WIN32
67 | __declspec(export)
68 | #endif
69 | int sqlite3_posixrefuncs_init(sqlite3 *db, char **pzErrMsg __attribute__((unused)),
70 |                               const sqlite3_api_routines *pApi) {
71 |   SQLITE_EXTENSION_INIT2(pApi);
72 |   struct re_funcs {
73 |     const char *name;
74 |     void (*fp)(sqlite3_context *, int, sqlite3_value **);
75 |   } func_table[] = {
76 |     {"regexp", ere_func},
77 |     {"ext_regexp", ere_func},
78 |     {"basic_regexp", bre_func},
79 |     {NULL, NULL}
80 |   };
81 |   for (int n = 0; func_table[n].name; n += 1) {
82 |     int rc = sqlite3_create_function(db, func_table[n].name, 2,
83 |                                      SQLITE_DETERMINISTIC | SQLITE_UTF8,
84 |                                      NULL, func_table[n].fp, NULL, NULL);
85 |     if (rc != SQLITE_OK) {
86 |       return rc;
87 |     }
88 |   }
89 |   return SQLITE_OK;
90 | }
91 | 


--------------------------------------------------------------------------------
/tools/README.md:
--------------------------------------------------------------------------------
  1 | Introduction
  2 | ============
  3 | 
  4 | A collection of handy scripts that do stuff with Sqlite. Mostly perl,
  5 | and thus requiring the [DBD::SQLite] module. Install through your OS
  6 | package manager or CPAN.
  7 | 
  8 | The Programs
  9 | ============
 10 | 
 11 | csv2sqlite
 12 | ----------
 13 | 
 14 | Import CSV files into Sqlite, in a smarter way than the command line
 15 | shell's [CSV import] feature. Requires the [Text::CSV_XS] module.
 16 | 
 17 | ### Usage ###
 18 | 
 19 |     csv2sqlite [OPTIONS] database tablename [FILE]
 20 | 
 21 | If a file to import from is not given, reads from standard input. If
 22 | the table does not already exist in the database, it's created using
 23 | the first line of the CSV input as column names.
 24 | 
 25 | ### Options ###
 26 | 
 27 | * `-t CHAR` What to use as a field separator. Defaults to comma.
 28 | * `--primary-key=COLUMN` A comma-separated list of column names to use
 29 |    as the primary key when creating a table. Can be abbreviated
 30 |    `--pk`. Ignored if importing into an existing table.
 31 | * `--ipk` Takes a single column name and treats it as an `INTEGER
 32 |   PRIMARY KEY` rowid alias. Can't be used with `--primary-key`.
 33 | * `--without-rowid` Makes the table a **WITHOUT ROWID** one. Requires
 34 |   a primary key. Ignored if the table already exists.
 35 | * `--headers` If importing into an existing table, this option assumes
 36 |   the first line is a header with column/field names that should not
 37 |   be inserted. Default behavior mimics the sqlite3 shell and tries to
 38 |   insert all rows. When reading headers, they're used as column names
 39 |   for inserting rows, so the order of columns in the table can be
 40 |   different from the order in the CSV file.
 41 | * `--columns=COLUMNS` A comma-separted list of column names. When
 42 |   creating a table, normally the first line is used to get the column
 43 |   names. With this option, the given names are used instead, and the
 44 |   first line is treated as data (Unless `--headers` is also given;
 45 |   then the names must be the same but the order can be different).
 46 | * `--guess-types` Normally, all fields are inserted as strings. With
 47 |   this option, it tries to insert values that look like numbers as
 48 |   numbers.
 49 | * `--empty-nulls` Normally an empty field is treated as a 0 length
 50 |   string. This inserts them as `NULL`s instead.
 51 | * `--strip` When set, strips leading and trailing whitespace from
 52 |   fields (Such spaces are always kept if inside quoted fields. Spaces
 53 |   before or after the quotes are removed with this option).
 54 | * `--ignore` Ignore attempts to insert rows with constraint violations
 55 |   instead of aborting.
 56 | * `--replace` Replace an existing row with the current one on a
 57 |   constraint violation. Cannot be combined with `--ignore`.
 58 | * `--unsafe` Turn on options that speed up insertion at the cost of
 59 |   possible database corruption on OS crash or power loss.
 60 | * `--help` I need somebody! Not just anybody!
 61 | 
 62 | table2sql
 63 | ---------
 64 | 
 65 | Convert a typical ASCII art table where columns are separated by pipes
 66 | (`|`) into SQLite DDL statements. The first line of the input is used
 67 | as the header to get column names.
 68 | 
 69 | Created as a way to make it easier to work with sample data in Stack
 70 | Overflow questions; hence none of the fine-tuning options for defining
 71 | keys, etc. like with `csv2sqlite`. It's intended mostly for use with
 72 | throwaway data and in-memory databases. Depends on [Regexp::Common].
 73 | 
 74 | ### Usage ###
 75 | 
 76 |     table2sql [OPTIONS] [TABLE NAME] [TABLE FILE]
 77 |     
 78 | ### Options ###
 79 | 
 80 | * `-t` Create a temporary table.
 81 | * `--help` Display help.
 82 | 
 83 | ### Example ###
 84 | 
 85 |     $ table2sql example <<EOF
 86 |     Header1 | Header2
 87 |     --------+--------
 88 |       a     |    b
 89 |       1     |    2
 90 |     EOF
 91 |     BEGIN TRANSACTION;
 92 |     CREATE TABLE IF NOT EXISTS "example"("Header1","Header2");
 93 |     INSERT INTO "example" VALUES ('a','b');
 94 |     INSERT INTO "example" VALUES (1,2);
 95 |     COMMIT;
 96 | 
 97 | 
 98 | [DBD::SQLite]: https://metacpan.org/pod/DBD::SQLite
 99 | [CSV import]: https://www.sqlite.org/cli.html#csv_import
100 | [Text::CSV_XS]: https://metacpan.org/pod/Text::CSV_XS
101 | [Regexp::Common]: https://metacpan.org/pod/Regexp::Common
102 | 


--------------------------------------------------------------------------------
/tools/csv2sqlite:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | # -*- perl -*-
  3 | # Smart import of CSV files into sqlite tables.
  4 | # Usage: csv2sqlite [OPTIONS] database tablename [FILE]
  5 | # csv2sqlite --help or see the bottom of the source for details.
  6 | 
  7 | #  Copyright 2019 Shawn Wagner
  8 | 
  9 | # Permission is hereby granted, free of charge, to any person
 10 | # obtaining a copy of this software and associated documentation files
 11 | # (the "Software"), to deal in the Software without restriction,
 12 | # including without limitation the rights to use, copy, modify, merge,
 13 | # publish, distribute, sublicense, and/or sell copies of the Software,
 14 | # and to permit persons to whom the Software is furnished to do so,
 15 | # subject to the following conditions:
 16 | 
 17 | # The above copyright notice and this permission notice shall be
 18 | # included in all copies or substantial portions of the Software.
 19 | 
 20 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 21 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 22 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 23 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 24 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 25 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 26 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 27 | # SOFTWARE.
 28 | 
 29 | use warnings;
 30 | use strict;
 31 | use autodie;
 32 | use feature qw/say/;
 33 | use open ':locale';
 34 | use DBI; # Also needs DBD::SQLite, obviously
 35 | use Text::CSV_XS;
 36 | use Getopt::Long;
 37 | use Pod::Usage;
 38 | 
 39 | # Options
 40 | my $sep = ',';
 41 | my $columns;
 42 | my $has_headers = 0;
 43 | my $guess_types = 0;
 44 | my $empty_null = 0;
 45 | my $help = 0;
 46 | my $strip = 0;
 47 | my $primary_key;
 48 | my $ipk;
 49 | my $without_rowid = 0;
 50 | my $ignore = 0;
 51 | my $replace = 0;
 52 | my $debug = 0;
 53 | my $verbose = 0;
 54 | my $unsafe = 0;
 55 | GetOptions('t=s' => \$sep,
 56 |            'columns=s' => \$columns,
 57 |            'headers' => \$has_headers,
 58 |            'guess-types' => \$guess_types,
 59 |            'empty-nulls' => \$empty_null,
 60 |            'strip' => \$strip,
 61 |            'primary-key|pk=s' => \$primary_key,
 62 |            'ipk=s' => \$ipk,
 63 |            "without-rowid" => \$without_rowid,
 64 |            'ignore' => \$ignore,
 65 |            'replace' => \$replace,
 66 |            'debug' => \$debug,
 67 |            'verbose' => \$verbose,
 68 |            'unsafe' => \$unsafe,
 69 |            'help' => \$help)
 70 |     or pod2usage(2);
 71 | pod2usage(1) if $help;
 72 | pod2usage(2) if @ARGV < 2;
 73 | die "Cannot use both --ignore and --replace\n" if $ignore && $replace;
 74 | die "Cannot use both --primary-key and --ipk\n" if $primary_key && $ipk;
 75 | die "Cannot use --without-rowid without a primary key\n"
 76 |     if $without_rowid && !($primary_key || $ipk);
 77 | $verbose = 1 if $debug;
 78 | 
 79 | my $database = shift;
 80 | my $table = shift;
 81 | my $fh;
 82 | if (@ARGV) {
 83 |     my $filename = shift;
 84 |     open $fh, "<", $filename;
 85 | } else {
 86 |     $fh = \*STDIN;
 87 | }
 88 | 
 89 | my $csv = Text::CSV_XS->new({binary => 1, sep_char => $sep,
 90 |                              auto_diag => 2, diag_verbose => 1,
 91 |                              blank_is_undef => $empty_null,
 92 |                              allow_whitespace => $strip});
 93 | 
 94 | my $dbh = DBI->connect("dbi:SQLite:dbname=$database", '', '',
 95 |                        { sqlite_see_if_its_a_number => $guess_types,
 96 |                          AutoCommit => 1,
 97 |                          RaiseError => 1
 98 |                        });
 99 | 
100 | my $exists = table_exists($dbh, $table);
101 | my @headers;
102 | 
103 | if ((!$exists && !defined $columns) || $has_headers) {
104 |     @headers =
105 |         $csv->header($fh, { munge_column_names => sub { 
106 |             my $col = lc $_;
107 |             $col =~ s/"/""/g;
108 |             qq/"$col"/;         
109 |                             }});
110 |     say 'Read headers from file' if $debug;
111 | }
112 | 
113 | $dbh->do('PRAGMA synchronous = OFF') if $unsafe;
114 | 
115 | $dbh->begin_work;
116 | 
117 | if (!$exists) {
118 |     local $" = ',';
119 |     my @pks;
120 |     if (defined $ipk) {
121 |         $ipk =~ s/"/""/g;
122 |         @pks = (qq/"$ipk"/);
123 |     } elsif (defined $primary_key) {
124 |         @pks = column_names($primary_key);
125 |     }
126 |     my @columns =
127 |         map { $ipk && $_ eq $pks[0] ? "$_ INTEGER" : $_ }
128 |           (defined $columns ? column_names($columns) : @headers);
129 |     my $create = qq/CREATE TABLE "$table"(@columns/;
130 |     $create .= ", PRIMARY KEY(@pks)" if @pks;        
131 |     $create .= ')';
132 |     $create .= " WITHOUT ROWID" if $without_rowid;
133 |     say "Table $table does not exist; creating." if $verbose;
134 |     say "Creating table using: $create" if $debug;    
135 |     $dbh->do($create);
136 | }
137 | 
138 | my $insert;
139 | $insert = build_insert_stmt($dbh, \@headers, \@headers) if @headers;
140 | 
141 | my $count = 0;
142 | while (my $row = $csv->getline($fh)) {
143 |     $insert = build_insert_stmt($dbh, [], $row) unless defined $insert;
144 |     eval { $insert->execute(@$row) };
145 |     $count += $insert->rows;
146 | }
147 | +$csv->error_diag and $csv->error_diag; # auto_diag doesn't always report errors
148 | 
149 | say "Inserted $count rows into $table" if $verbose;
150 | 
151 | $dbh->commit;
152 | $dbh->disconnect;
153 | 
154 | sub table_exists {
155 |     my ($db, $table) = @_;
156 |     my $res = $db->selectrow_array(<<EOQ, {}, $table);
157 | SELECT count(*)
158 | FROM sqlite_master
159 | WHERE type = 'table' AND name = ?
160 | EOQ
161 | 
162 |     return $res == 1;
163 | }
164 | 
165 | sub build_insert_stmt {
166 |     my ($db, $headers, $cols) = @_;
167 |     local $" = ',';
168 |     my @values = ('?') x @$cols;
169 |     my $constraint = $ignore ? 'OR IGNORE'
170 |         : ($replace ? 'OR REPLACE' : 'OR ABORT');
171 |     my $insert = qq/INSERT $constraint INTO "$table"/;
172 |     $insert .= "(@$headers)" if @$headers;
173 |     $insert .= " VALUES(@values)";
174 |     say "Inserting rows using: $insert" if $debug;
175 |     return $dbh->prepare($insert);
176 | }
177 | 
178 | sub column_names {
179 |     map { s/"/""/g; qq/"$_"/ } split(/,/, $_[0]);
180 | }
181 | 
182 | __END__
183 | 
184 | =head1 NAME
185 | 
186 | csv2sqlite Import a CSV file into Sqlite database
187 | 
188 | =head1 SYNOPSIS
189 | 
190 | csv2sqlite [OPTIONS] database tablename [FILE]
191 | 
192 | =head1 OPTIONS
193 | 
194 | =over 4
195 | 
196 | =item B<-t CHAR>
197 | 
198 | Set field separator.
199 | 
200 | =item B<--headers>
201 | 
202 | Treat the first row as a list of column names to use when creating a
203 | new table or inserting into an existing table. If the table does not
204 | exist, a header line is always assumed.
205 | 
206 | =item B<--columns=COLUMNS>
207 | 
208 | When creating a table, normally the first line is used to get the
209 | column names. With this option, the given comma-separted names are
210 | used instead, and the first line is treated as data (Unless
211 | C<--headers> is also given; then the names must be the same but the
212 | order can be different).
213 | 
214 | =item B<--primary-key=COLUMNS>
215 | 
216 | Set the primary key of the newly created table to the given
217 | comma-separated column names. Ignored if the table exists.
218 | 
219 | =item B<--ipk=COLUMN>
220 | 
221 | The (single) given column should be an B<INTEGER PRIMARY KEY>
222 | one. Ignored if the table exists. Cannot be combined with
223 | C<--primary-key>.
224 | 
225 | =item B<--without-rowid>
226 | 
227 | Make the table a B<WITHOUT ROWID> one. Requires a primary key. Ignored
228 | if the table exists.
229 | 
230 | =item B<--guess-types>
231 | 
232 | Try to figure out if a value is a numeric type and insert accordingly,
233 | instead of treating all values as strings.
234 | 
235 | =item B<--empty-nulls>
236 | 
237 | Empty unquoted fields are inserted as null values, not 0-length
238 | strings.
239 | 
240 | =item B<--strip>
241 | 
242 | Remove leading and trailing whitespace from fields (Such spaces are
243 | always kept if inside quoted fields. Spaces before or after the quotes
244 | are removed with this option).
245 | 
246 | =item B<--ignore>
247 | 
248 | Ignore inserts that fail because of constraint violations.
249 | 
250 | =item B<--replace>
251 | 
252 | Replace the existing row with the current one on constraint
253 | violations.
254 | 
255 | =item B<--unsafe>
256 | 
257 | Speed up insertion at the cost of possible database corruption on OS
258 | crash or power loss at the wrong time.
259 | 
260 | =item B<--help>
261 | 
262 | Display help.
263 | 
264 | =back
265 | 
266 | =head1 DESCRIPTION
267 | 
268 | Import a CSV file into a Sqlite database with some extra features not
269 | provided by the normal sqlite3 shell's import functionality.
270 | 
271 | =cut
272 | 


--------------------------------------------------------------------------------
/tools/demo_csvimport.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Examples of csv2sqlite usage
 4 | 
 5 | db=demo.db
 6 | rm -f "$db"
 7 | 
 8 | # Basic import
 9 | ./csv2sqlite --verbose "$db" test1 <<EOF
10 | id,animal
11 | 1,dog
12 | 2,cat
13 | 3,fish
14 | 4,turtle
15 | EOF
16 | 
17 | # Import into a new table with an integer primary key
18 | ./csv2sqlite --verbose --ipk=id "$db" test2 <<EOF
19 | id,animal
20 | 1,dog
21 | 2,cat
22 | 3,fish
23 | 4,turtle
24 | EOF
25 | 
26 | # Import into a new table with default affinities, storing numeric
27 | # values as numbers instead of strings. Also demonstrate default
28 | # handling of leading spaces in fields, and quoted fields.
29 | ./csv2sqlite --verbose --guess-types "$db" test3 <<EOF
30 | id,example
31 | 1,a string
32 | 2,"a quoted string"
33 | 3,"a quoted string with ""quotes"""
34 | 4, a string with a leading space
35 | 5," a quoted leading space"
36 | EOF
37 | 
38 | # Same but stripping leading spaces in fields of badly formed CSV
39 | ./csv2sqlite --verbose --guess-types --headers --strip "$db" test3 <<EOF
40 | id,example
41 | 6, a string with a leading space
42 | 7," a quoted leading space"
43 | 8, "a quoted string with unquoted leading space"
44 | 9, " a quoted string with unquoted leading space and quoted leading space"
45 | EOF
46 | 
47 | # Creating a new table with user-defined column names instead of
48 | # using the first line.
49 | ./csv2sqlite --verbose --columns=fname,lname "$db" test4 <<EOF
50 | John,Smith
51 | Jane,Doe
52 | EOF
53 | 
54 | # Same but using the first line to determine the order of fields in the input.
55 | ./csv2sqlite --verbose --columns=fname,lname --headers "$db" test5 <<EOF
56 | lname,fname
57 | Lincoln,Abraham
58 | Polk,James
59 | EOF
60 | 
61 | 


--------------------------------------------------------------------------------
/tools/table2sql:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | # -*- perl -*-
  3 | 
  4 | # Bare bones tool to convert a table like
  5 | #
  6 | # HEADER2 | HEADER2
  7 | # --------|--------
  8 | # VALUE1  | VALUE2
  9 | #
 10 | # to sqlite3-dialect SQL DDL statements.
 11 | 
 12 | # Copyright 2019 Shawn Wagner <shawnw.mobile@gmail.com>
 13 | 
 14 | # Permission is hereby granted, free of charge, to any person obtaining
 15 | # a copy of this software and associated documentation files (the
 16 | # "Software"), to deal in the Software without restriction, including
 17 | # without limitation the rights to use, copy, modify, merge, publish,
 18 | # distribute, sublicense, and/or sell copies of the Software, and to
 19 | # permit persons to whom the Software is furnished to do so, subject to
 20 | # the following conditions:
 21 | 
 22 | # The above copyright notice and this permission notice shall be
 23 | # included in all copies or substantial portions of the Software.
 24 | 
 25 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 26 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 27 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 28 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 29 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 30 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 31 | # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 32 | 
 33 | 
 34 | use warnings;
 35 | use strict;
 36 | use feature qw/say/;
 37 | use Regexp::Common qw/number/;
 38 | use Getopt::Long;
 39 | use Pod::Usage;
 40 | 
 41 | my $temp_table = 0;
 42 | my $use_spaces = 0;
 43 | my $help = 0;
 44 | GetOptions('t' => \$temp_table,
 45 |            's' => \$use_spaces,
 46 |            'help' => \$help);
 47 | pod2usage(1) if $help;
 48 | 
 49 | my $strip_pipes;
 50 | if ($use_spaces) {
 51 |     $strip_pipes = qr/^\s+|\s+$/;
 52 | } else {
 53 |     $strip_pipes = qr/(?:^\s*\|\s*) # Leading pipe and whitespace
 54 |         |
 55 |         (?:\s*\|\s*$) # Trailing pipe and whitespace
 56 |          /x;
 57 | }
 58 | 
 59 | my $pipes = $use_spaces ? qr/\s+/ : qr/\s*\|\s*/;
 60 | $" = ',';
 61 | 
 62 | my $table = shift // 'mytable';
 63 | $table =~ s/"/""/g;
 64 | $table = qq{"$table"};
 65 | $table = "temp.$table" if $temp_table;
 66 | 
 67 | my $header = <>;
 68 | chomp $header;
 69 | $header =~ s/$strip_pipes//g;
 70 | my @columns = map { s/"/""/g; qq/"$_"/ } split /$pipes/, $header;
 71 | 
 72 | say "BEGIN TRANSACTION;";
 73 | say "CREATE TABLE IF NOT EXISTS $table(@columns);";
 74 | 
 75 | while (<>) {
 76 |   chomp;
 77 |   next if /^\s*[-+|_]+\s*$/;     # Skip separator lines
 78 |   s/$strip_pipes//g;
 79 |   my @values = map { /^$RE{num}{real}$/ ? $_ : do { s/'/''/g; qq{'$_'} } }
 80 |     split /$pipes/;
 81 |   say "INSERT INTO $table VALUES (@values);";
 82 | }
 83 | say "COMMIT;";
 84 | 
 85 | __END__
 86 | 
 87 | =head1 NAME
 88 | 
 89 | table2sql Convert an ASCII art table to SQLite 
 90 | 
 91 | =head1 SYNOPSIS
 92 | 
 93 | table2sql [OPTIONS] [TABLE NAME] [TABLE FILE]
 94 | 
 95 | =head1 OPTIONS
 96 | 
 97 | =over 4
 98 | 
 99 | =item B<-s>
100 | 
101 | Use whitespace instead of pipes as column delimiters.
102 | 
103 | =item B<-t>
104 | 
105 | Create a temporary table.
106 | 
107 | =item B<--help>
108 | 
109 | Display help.
110 | 
111 | =back
112 | 
113 | =head1 DESCRIPTION
114 | 
115 | Given an ASCII art table where columns are separated by pipe
116 | characters (C<|>), generate SQLite flavor C<CREATE TABLE> and C<INSERT
117 | INTO> statements to recreate it in a database. The first line of input
118 | must be a header with column names.
119 | 
120 | Reads from standard input if no filename is given.
121 | 
122 | Outputs to standard output.
123 | 
124 | =cut
125 | 


--------------------------------------------------------------------------------