├── .gitignore ├── .travis.yml ├── Cargo.toml ├── LICENSE ├── README.md ├── build.rs ├── demo.py ├── demo_serialize.py ├── src ├── lib.rs ├── serialize.rs └── shim.c ├── tests ├── hmh.rs ├── serialize.rs └── util.rs └── wrapper.h /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | **/*.rs.bk 3 | Cargo.lock -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | 3 | cache: cargo 4 | 5 | dist: xenial 6 | 7 | os: 8 | - linux 9 | - osx 10 | 11 | addons: 12 | homebrew: 13 | packages: 14 | - sqlite3 15 | 16 | env: 17 | - RUST_BACKTRACE=1 PKG_CONFIG_PATH="/usr/local/opt/sqlite/lib/pkgconfig" 18 | 19 | before_script: 20 | - rustup component add rustfmt 21 | 22 | rust: 23 | - stable 24 | - nightly 25 | 26 | script: 27 | - cargo fmt --all -- --check 28 | - cargo test --no-default-features 29 | - cargo test --all-features 30 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "sqlite3_hyperminhash" 3 | version = "0.1.0" 4 | authors = ["Lukas Lueg "] 5 | edition = "2018" 6 | build = "build.rs" 7 | 8 | [dependencies] 9 | hyperminhash = "0.1" 10 | 11 | [dev-dependencies] 12 | rusqlite = "0.27" 13 | rand = "0.8" 14 | 15 | [build-dependencies] 16 | cc = "1" 17 | bindgen = { version = "0.60", default_features = false, features = ["runtime"] } 18 | pkg-config = "0.3" 19 | 20 | [lib] 21 | crate-type = ["cdylib", "lib"] 22 | 23 | [features] 24 | default = [] 25 | serialize = ["hyperminhash/serialize"] 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Lukas Lueg (lukas.lueg@gmail.com) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Hyperminhash for SQLite3 2 | 3 | [![Build Status](https://travis-ci.org/lukaslueg/sqlite3_hyperminhash.svg?branch=master)](https://travis-ci.org/lukaslueg/sqlite3_hyperminhash) 4 | 5 | A [Hyperminhash](https://github.com/lukaslueg/hyperminhash)-extension for SQLite3, providing very fast, constant-memory cardinality approximation, including intersection- and union-operations. 6 | 7 | #### ... Query on an in-memory table of two million (INT, INT)-rows, no index 8 | 9 | Query | Result | Time 10 | --------------------------------------------------------------|-------------------------|-------------- 11 | `SELECT COUNT(*) FROM (SELECT DISTINCT foo, bar FROM foobar)` | 1,734,479 | 5028ms 12 | `SELECT hyperminhash(foo, bar) FROM foobar` | 1,728,632 (error 0.34%) | 337ms (x14.9) 13 | 14 | 15 | ## The extensions provides the following functions 16 | 17 | * **`HYPERMINHASH()`**, an aggregate-function accepting up to `SQLITE_LIMIT_FUNCTION_ARG` arguments; returns the approximate cardinality of the items seen as a `DOUBLE`. 18 | 19 | E.g. `SELECT HYPERMINHASH(users.date, users.ip) AS unique_users FROM users;` 20 | 21 | * **`HYPERMINHASH_ZERO()`**, a scalar-function accepting no arguments; returns a opaque `BLOB` representing a count of zero. 22 | 23 | E.g. `INSERT INTO stats (data_point, hmh_data) VALUES ('users', HYPERMINHASH_ZERO());` 24 | 25 | * **`HYPERMINHASH_SERIALIZE()`**, an aggregate-function similar to `HYPERMINHASH()`. Returns a opaque `BLOB` representing the approximate cardinality of the items seen. 26 | 27 | E.g. `UPDATE stats SET stats.hmh_data = (SELECT HYPERMINHASH_SERIALIZE(users.date, users.ip) FROM users) WHERE stats.data_point = 'users';` 28 | 29 | * **`HYPERMINHASH_DESERIALIZE()`**, a scalar-function accepting a single `BLOB` returned by `HYPERMINHASH_ZERO()`, `HYPERMINHASH_SERIALIZE()`, `HYPERMINHASH_ADD()` or `HYPERMINHASH_UNION()`. Returns the approximate cardinality as a `DOUBLE`. 30 | 31 | E.g. `SELECT HYPERMINHASH_DESERIALIZE(stats.hmh_data) FROM stats WHERE stats.data_point = 'users';` 32 | 33 | * **`HYPERMINHASH_UNION()`**, an aggregate-function accepting `BLOB`s returned by `HYPERMINHASH_ZERO()`, `HYPERMINHASH_SERIALIZE()`, `HYPERMINHASH_ADD()` or `HYPERMINHASH_UNION()`. Returns an opaque `BLOB` representing the union-set operation over it's inputs. 34 | 35 | E.g. `SELECT HYPERMINHASH_UNION(stats.hmh_data) FROM stats WHERE stats.data_point = 'users' AND result = 'error';` 36 | 37 | * **`HYPERMINHASH_ADD()`**, a scalar-function accepting up to `SQLITE_LIMIT_FUNCTION_ARG`, equivalent to `HYPERMINHASH_UNION()`. 38 | 39 | E.g. `UPDATE stats SET stats.hmh_data = HYPERMINHASH_ADD(stats.hmh_data, (SELECT HYPERMINHASH_SERIALIZE(users.date, users.ip) FROM users WHERE users.date = DATE('now'))) WHERE stats.data_point = 'users';` 40 | 41 | * **`HYPERMINHASH_INTERSECTION()`**, a scalar-function accepting exactly two `BLOB`s returned by `HYPERMINHASH_ZERO()`, `HYPERMINHASH_SERIALIZE()`, `HYPERMINHASH_ADD()` or `HYPERMINHASH_UNION()`. Returns the approximate cardinality of the intersection-set operation over it's arguments as a `DOUBLE`. 42 | 43 | E.g. `SELECT HYPERMINHASH_INTERSECTION((SELECT stats.hmh_data FROM stats WHERE stats.data_point = 'users'), (SELECT stats.hmh_data FROM stats FROM stats WHERE stats.data_point = 'admins'));` 44 | 45 | ## Building 46 | 47 | Use Rust's package manager via `cargo build --release`. A shared object file for the current platform will be placed in `target/release`. 48 | 49 | By default, only the `HYPERMINHASH()`-function is available. Compile the crate with the `serialize`-feature to enable the other functions, which return a static error if the `serialize`-feature was not activated. 50 | -------------------------------------------------------------------------------- /build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | pkg_config::Config::new() 3 | .atleast_version("3.8.7") 4 | .probe("sqlite3") 5 | .unwrap(); 6 | 7 | let bindings = bindgen::Builder::default() 8 | .header("wrapper.h") 9 | .allowlist_function("sqlite3_aggregate_context") 10 | .allowlist_function("sqlite3_auto_extension") 11 | .allowlist_function("sqlite3_errstr") 12 | .allowlist_function("sqlite3_result_blob") 13 | .allowlist_function("sqlite3_result_double") 14 | .allowlist_function("sqlite3_result_error") 15 | .allowlist_function("sqlite3_result_error_nomem") 16 | .allowlist_function("sqlite3_value_blob") 17 | .allowlist_function("sqlite3_value_bytes") 18 | .allowlist_function("sqlite3_value_double") 19 | .allowlist_function("sqlite3_value_int64") 20 | .allowlist_function("sqlite3_value_text") 21 | .allowlist_function("sqlite3_value_type") 22 | .allowlist_type("sqlite3_context") 23 | .allowlist_var("SQLITE_BLOB") 24 | .allowlist_var("SQLITE_FLOAT") 25 | .allowlist_var("SQLITE_INTEGER") 26 | .allowlist_var("SQLITE_NULL") 27 | .allowlist_var("SQLITE_OK") 28 | .allowlist_var("SQLITE_TEXT") 29 | .generate() 30 | .expect("Unable to generate bindings"); 31 | let out_path = std::path::PathBuf::from(std::env::var("OUT_DIR").unwrap()); 32 | bindings 33 | .write_to_file(out_path.join("bindings.rs")) 34 | .expect("Couldn't write bindings!"); 35 | 36 | cc::Build::new().file("src/shim.c").compile("shim") 37 | } 38 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | import time 2 | import sqlite3 3 | 4 | if __name__ == "__main__": 5 | # Load the extension 6 | con = sqlite3.connect(":memory:") 7 | con.enable_load_extension(True) 8 | con.load_extension("target/release/libsqlite3_hyperminhash") 9 | 10 | # Create a dummy table 11 | con.execute('''CREATE TABLE foobar (foo INT NOT NULL, bar INT NOT NULL)''') 12 | con.executemany('''INSERT INTO foobar (foo, bar) VALUES (?, ?)''', 13 | ((i % 1231, i % 1409) for i in range(2000000))) 14 | 15 | # Real count 16 | t = time.perf_counter_ns() 17 | c = con.cursor() 18 | c.execute('''SELECT COUNT(*) FROM (SELECT DISTINCT foo, bar FROM foobar)''') 19 | real_count = c.fetchone()[0] 20 | real_t = time.perf_counter_ns() - t 21 | 22 | # Approximate count 23 | t = time.perf_counter_ns() 24 | c = con.cursor() 25 | c.execute('''SELECT hyperminhash(foo, bar) FROM foobar''') 26 | approx_count = c.fetchone()[0] 27 | approx_t = time.perf_counter_ns() - t 28 | 29 | print("%i unique rows in %.2fms via COUNT()" % (real_count, real_t / 1000000)) 30 | print("%i unique rows (%.2f%% error) in %.2fms (%.1fx) via HYPERMINHASH()" % 31 | (approx_count, 32 | (1 - (approx_count / real_count)) * 100, 33 | approx_t / 1000000, 34 | real_t / approx_t)) 35 | -------------------------------------------------------------------------------- /demo_serialize.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import random 3 | import time 4 | import sqlite3 5 | 6 | 7 | def random_date(): 8 | return datetime.date.fromtimestamp(random.randint(1451602800, 1546297199)).strftime('%Y-%m-%d') 9 | 10 | 11 | def random_ipv6(): 12 | return bytearray(random.getrandbits(8) for _ in range(3)) + b'\x00'*13 13 | 14 | 15 | if __name__ == "__main__": 16 | 17 | ## Setup 18 | 19 | # Load the extension 20 | con = sqlite3.connect(":memory:") 21 | con.enable_load_extension(True) 22 | con.load_extension("target/release/libsqlite3_hyperminhash") 23 | 24 | # A log of users; date and ipv6 25 | con.execute('CREATE TABLE users (date DATE NOT NULL, ip BLOB(16) NOT NULL)'); 26 | con.executemany('''INSERT INTO users (date, ip) VALUES (?, ?)''', 27 | ((random_date(), random_ipv6()) for i in range(250000))) 28 | 29 | con.execute('CREATE TABLE stats (data_point VARCHAR(255) PRIMARY KEY, hmh_data BLOB)') 30 | 31 | # Intialize a set with zero 32 | con.execute('INSERT INTO stats (data_point, hmh_data) VALUES ("users", HYPERMINHASH_ZERO())') 33 | 34 | def update_count(): 35 | # Update the count, ran daily, 36 | # using `HYPERMINHASH_ADD` and `HYPERMINHASH_SERIALIZE` 37 | con.execute('''UPDATE stats 38 | SET hmh_data = 39 | HYPERMINHASH_ADD( 40 | hmh_data, 41 | (SELECT HYPERMINHASH_SERIALIZE(users.date, users.ip) 42 | FROM users 43 | WHERE users.date BETWEEN '2018-01-01' AND '2018-12-31') 44 | ) 45 | WHERE stats.data_point = 'users' 46 | ''') 47 | 48 | update_count() 49 | 50 | ## Usage 51 | 52 | # Current count via `HYPERMINHASH_DESERIALIZE()` 53 | c = con.cursor() 54 | c.execute('SELECT HYPERMINHASH_DESERIALIZE(hmh_data) FROM stats WHERE data_point = "users"') 55 | print("Current count is %i" % (c.fetchone()[0], )) 56 | 57 | # New days, more users... 58 | con.executemany('''INSERT INTO users (date, ip) VALUES (?, ?)''', 59 | ((random_date(), random_ipv6()) for i in range(250000))) 60 | 61 | update_count() 62 | 63 | # New count 64 | c = con.cursor() 65 | c.execute('SELECT HYPERMINHASH_DESERIALIZE(hmh_data) FROM stats WHERE data_point = "users"') 66 | print("Count is now %i" % (c.fetchone()[0], )) 67 | 68 | 69 | # Using `HYPERMINHASH_INTERSECTION` to get the count of unique users we've 70 | # seen 2017/2018 and before 71 | t = time.perf_counter_ns() 72 | c = con.cursor() 73 | c.execute('''SELECT HYPERMINHASH_INTERSECTION( 74 | (SELECT HYPERMINHASH_SERIALIZE(users.ip) 75 | FROM users 76 | WHERE users.date BETWEEN '2017-01-01' AND '2018-12-31'), 77 | (SELECT HYPERMINHASH_SERIALIZE(users.ip) 78 | FROM users 79 | WHERE users.date < '2017-01-01') 80 | ) 81 | ''') 82 | approx_t = time.perf_counter_ns() - t 83 | print("Recurring users, approx: %i, in %.2fms" % (c.fetchone()[0], approx_t / 1000000)) 84 | 85 | 86 | # Same query using a subquery and DISTINCT 87 | t = time.perf_counter_ns() 88 | c = con.cursor() 89 | c.execute('''SELECT COUNT(DISTINCT users.ip) 90 | FROM users 91 | WHERE users.date BETWEEN '2017-01-01' AND '2018-12-31' 92 | AND users.ip IN (SELECT u.ip 93 | FROM users AS u 94 | WHERE u.date < '2017-01-01') 95 | ''') 96 | approx_t = time.perf_counter_ns() - t 97 | print("Recurring users, exact: %i, in %.2fms" % (c.fetchone()[0], approx_t / 1000000)) 98 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::missing_safety_doc)] 2 | mod bindings { 3 | #![allow(non_upper_case_globals)] 4 | #![allow(non_camel_case_types)] 5 | #![allow(non_snake_case)] 6 | #![allow(dead_code)] 7 | include!(concat!(env!("OUT_DIR"), "/bindings.rs")); 8 | } 9 | use bindings::*; 10 | use std::{ffi, fmt, io, mem, os::raw, slice}; 11 | 12 | use hyperminhash::Sketch; 13 | 14 | #[cfg(feature = "serialize")] 15 | pub mod serialize; 16 | 17 | #[derive(Debug)] 18 | enum HMHError<'a> { 19 | #[cfg(not(feature = "serialize"))] 20 | FeatureMissing, 21 | #[cfg_attr(not(feature = "serialize"), allow(dead_code))] 22 | ValueIsNotBlob(RawValue<'a>), 23 | UnknownValueType, 24 | Io(io::Error), 25 | } 26 | impl<'a> HMHError<'a> { 27 | unsafe fn set_ctx Result<(), Self>>(ctx: *mut sqlite3_context, f: F) { 28 | if let Err(e) = f() { 29 | let err_msg = e.to_string(); 30 | sqlite3_result_error( 31 | ctx, 32 | err_msg.as_bytes().as_ptr() as *const raw::c_char, 33 | err_msg.as_bytes().len() as raw::c_int, 34 | ); 35 | } 36 | } 37 | } 38 | 39 | impl<'a> fmt::Display for HMHError<'a> { 40 | fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { 41 | match self { 42 | #[cfg(not(feature = "serialize"))] 43 | HMHError::FeatureMissing => write!(f, "This function is unavailable because sqlite3_hyperminhash was compiled without the `serialize`-feature."), 44 | HMHError::ValueIsNotBlob(v) => write!(f, "value is not of type BLOB: {:?}", v), 45 | HMHError::Io(e) => write!(f, "IO-error in hyperminhash: {}", e), 46 | HMHError::UnknownValueType => write!(f, "Unkown value-type from sqlite") 47 | } 48 | } 49 | } 50 | 51 | impl<'a> From for HMHError<'a> { 52 | fn from(e: io::Error) -> Self { 53 | HMHError::Io(e) 54 | } 55 | } 56 | 57 | #[derive(Debug, Hash)] 58 | enum RawValue<'a> { 59 | Null, 60 | Int(i64), 61 | Float(u64), // Bit-representation of a double to make it hashable 62 | Text(&'a str), 63 | Blob(&'a [u8]), 64 | } 65 | 66 | impl<'a> RawValue<'a> { 67 | unsafe fn new(value: *mut sqlite3_value) -> Result> { 68 | match sqlite3_value_type(value) as u32 { 69 | SQLITE_NULL => Ok(RawValue::Null), 70 | SQLITE_INTEGER => Ok(RawValue::Int(sqlite3_value_int64(value))), 71 | SQLITE_FLOAT => Ok(RawValue::Float({ 72 | // Hold my beer and watch this! 73 | sqlite3_value_double(value).to_bits() 74 | })), 75 | SQLITE_TEXT => { 76 | let s = sqlite3_value_text(value); 77 | assert!(!s.is_null()); 78 | let s = std::ffi::CStr::from_ptr(s as *const raw::c_char); 79 | // We explicitely told sqlite3 that we want UTF8-data in shim.c! 80 | Ok(RawValue::Text(std::str::from_utf8_unchecked(s.to_bytes()))) 81 | } 82 | SQLITE_BLOB => { 83 | let blob = sqlite3_value_blob(value); 84 | let len = sqlite3_value_bytes(value); 85 | if len > 0 { 86 | Ok(RawValue::Blob(std::slice::from_raw_parts( 87 | blob as *const u8, 88 | len as usize, 89 | ))) 90 | } else { 91 | Ok(RawValue::Blob(&[])) 92 | } 93 | } 94 | _ => Err(HMHError::UnknownValueType), 95 | } 96 | } 97 | 98 | #[cfg(feature = "serialize")] 99 | fn as_blob(self) -> Result<&'a [u8], HMHError<'a>> { 100 | match self { 101 | RawValue::Blob(b) => Ok(b), 102 | other => Err(HMHError::ValueIsNotBlob(other)), 103 | } 104 | } 105 | } 106 | 107 | /// Used by tests to auto-load itself into sqlite 108 | #[doc(hidden)] 109 | pub mod testutil { 110 | pub use super::bindings::sqlite3_auto_extension; 111 | pub use super::bindings::sqlite3_errstr; 112 | pub use super::bindings::SQLITE_OK; 113 | } 114 | 115 | // Initialization shim provided by shim.c 116 | extern "C" { 117 | pub fn init_shim( 118 | db: *mut ffi::c_void, 119 | pzErrMsg: *const ffi::c_void, 120 | pApi: *const ffi::c_void, 121 | ) -> raw::c_int; 122 | } 123 | 124 | /// Public initialization function, called by sqlite 125 | #[no_mangle] 126 | #[allow(non_snake_case)] 127 | pub unsafe extern "C" fn sqlite3_sqlitehyperminhash_init( 128 | db: *mut ffi::c_void, 129 | pzErrMsg: *const ffi::c_void, 130 | pApi: *const ffi::c_void, 131 | ) -> raw::c_int { 132 | init_shim(db, pzErrMsg, pApi) 133 | } 134 | 135 | /// The step-function, called for each row 136 | #[no_mangle] 137 | pub unsafe extern "C" fn hyperminhash_step( 138 | ctx: *mut sqlite3_context, 139 | num_values: raw::c_int, 140 | values: *mut *mut sqlite3_value, 141 | ) { 142 | HMHError::set_ctx(ctx, || { 143 | let p = sqlite3_aggregate_context(ctx, mem::size_of::<*mut Sketch>() as raw::c_int) 144 | as *mut *mut Sketch; 145 | if p.is_null() { 146 | sqlite3_result_error_nomem(ctx); 147 | return Ok(()); 148 | } 149 | if (*p).is_null() { 150 | *p = Box::into_raw(Box::new(Sketch::default())); 151 | } 152 | let sketch = &mut **p; 153 | let args: Result, _> = slice::from_raw_parts(values, num_values as usize) 154 | .iter() 155 | .filter_map(|v| match RawValue::new(*v) { 156 | Ok(RawValue::Null) => None, 157 | other => Some(other), 158 | }) 159 | .collect(); 160 | sketch.add(args?); 161 | Ok(()) 162 | }) 163 | } 164 | 165 | /// Finalize the aggregate by computing the cardinality 166 | #[no_mangle] 167 | pub unsafe extern "C" fn hyperminhash_final(ctx: *mut sqlite3_context) { 168 | let p = sqlite3_aggregate_context(ctx, 0) as *mut *mut Sketch; 169 | if p.is_null() { 170 | sqlite3_result_double(ctx, 0.0); 171 | return; 172 | } 173 | sqlite3_result_double(ctx, Box::from_raw(*p).cardinality()); 174 | } 175 | 176 | #[cfg(not(feature = "serialize"))] 177 | pub mod serialize_stub { 178 | use super::*; 179 | 180 | #[no_mangle] 181 | pub unsafe extern "C" fn hyperminhash_serialize_final(ctx: *mut sqlite3_context) { 182 | HMHError::set_ctx(ctx, || Err(HMHError::FeatureMissing)) 183 | } 184 | 185 | macro_rules! no_such_func { 186 | ($name:ident) => { 187 | #[no_mangle] 188 | pub unsafe extern "C" fn $name( 189 | ctx: *mut sqlite3_context, 190 | _num_values: raw::c_int, 191 | _values: *mut *mut sqlite3_value, 192 | ) { 193 | HMHError::set_ctx(ctx, || Err(HMHError::FeatureMissing)) 194 | } 195 | }; 196 | } 197 | 198 | no_such_func!(hyperminhash_zero); 199 | no_such_func!(hyperminhash_deserialize); 200 | no_such_func!(hyperminhash_add); 201 | no_such_func!(hyperminhash_union_step); 202 | no_such_func!(hyperminhash_intersection); 203 | } 204 | -------------------------------------------------------------------------------- /src/serialize.rs: -------------------------------------------------------------------------------- 1 | use std::{ffi, mem, os::raw, slice}; 2 | 3 | use super::bindings::*; 4 | use super::{HMHError, RawValue, Sketch}; 5 | 6 | unsafe extern "C" fn drop_blob_buffer(buf: *mut ffi::c_void) { 7 | drop(Box::::from_raw(buf as *mut _)) 8 | } 9 | 10 | unsafe fn set_blob_result(ctx: *mut sqlite3_context, value: Box) { 11 | let buf_len = std::mem::size_of_val(&*value); 12 | let p = Box::into_raw(value) as *const ffi::c_void; 13 | sqlite3_result_blob(ctx, p, buf_len as i32, Some(drop_blob_buffer::)); 14 | } 15 | 16 | unsafe fn sketch_to_result<'a>( 17 | sk: &Sketch, 18 | ctx: &'a *mut sqlite3_context, 19 | ) -> Result<(), HMHError<'a>> { 20 | let mut buf = Box::new([0; 32768]); 21 | sk.save(&mut buf[..])?; 22 | set_blob_result(*ctx, buf); 23 | Ok(()) 24 | } 25 | 26 | #[no_mangle] 27 | pub unsafe extern "C" fn hyperminhash_zero( 28 | ctx: *mut sqlite3_context, 29 | _num_values: raw::c_int, 30 | _values: *mut *mut sqlite3_value, 31 | ) { 32 | HMHError::set_ctx(ctx, || sketch_to_result(&Sketch::default(), &ctx)); 33 | } 34 | 35 | #[no_mangle] 36 | pub unsafe extern "C" fn hyperminhash_serialize_final(ctx: *mut sqlite3_context) { 37 | HMHError::set_ctx(ctx, || { 38 | let p = sqlite3_aggregate_context(ctx, 0) as *mut *mut Sketch; 39 | let sketch = if p.is_null() { 40 | Box::new(Sketch::default()) 41 | } else { 42 | Box::from_raw(*p) 43 | }; 44 | sketch_to_result(&sketch, &ctx) 45 | }) 46 | } 47 | 48 | #[no_mangle] 49 | pub unsafe extern "C" fn hyperminhash_deserialize( 50 | ctx: *mut sqlite3_context, 51 | num_values: raw::c_int, 52 | values: *mut *mut sqlite3_value, 53 | ) { 54 | assert!(num_values == 1); // Declared as such in shim.c 55 | HMHError::set_ctx(ctx, || { 56 | let sk = Sketch::load(RawValue::new(*values)?.as_blob()?)?; 57 | sqlite3_result_double(ctx, sk.cardinality()); 58 | Ok(()) 59 | }); 60 | } 61 | 62 | #[no_mangle] 63 | pub unsafe extern "C" fn hyperminhash_add( 64 | ctx: *mut sqlite3_context, 65 | num_values: raw::c_int, 66 | values: *mut *mut sqlite3_value, 67 | ) { 68 | HMHError::set_ctx(ctx, || { 69 | let args = slice::from_raw_parts(values, num_values as usize); 70 | let sum_sketch = args 71 | .iter() 72 | .map(|p| { 73 | RawValue::new(*p) 74 | .and_then(|v| v.as_blob()) 75 | .and_then(|b| Sketch::load(b).map_err(Into::into)) 76 | }) 77 | .fold(None, |sk1: Option>, sk2| { 78 | match (sk1, sk2) { 79 | (None, Ok(sk2)) => Some(Ok(sk2.clone())), 80 | (None, Err(e)) | (Some(Err(e)), _) | (Some(Ok(_)), Err(e)) => Some(Err(e)), 81 | (Some(Ok(mut sk1)), Ok(sk2)) => { 82 | sk1.union(&sk2); 83 | Some(Ok(sk1)) 84 | } 85 | } 86 | }) 87 | .transpose()? 88 | .unwrap_or_default(); 89 | sketch_to_result(&sum_sketch, &ctx)?; 90 | Ok(()) 91 | }); 92 | } 93 | 94 | #[no_mangle] 95 | pub unsafe extern "C" fn hyperminhash_union_step( 96 | ctx: *mut sqlite3_context, 97 | num_values: raw::c_int, 98 | values: *mut *mut sqlite3_value, 99 | ) { 100 | assert!(num_values == 1); // Declared as such in shim.c 101 | HMHError::set_ctx(ctx, || { 102 | let sketch = Sketch::load(RawValue::new(*values)?.as_blob()?)?; 103 | 104 | let p = sqlite3_aggregate_context(ctx, mem::size_of::<*mut Sketch>() as raw::c_int) 105 | as *mut *mut Sketch; 106 | if p.is_null() { 107 | sqlite3_result_error_nomem(ctx); 108 | return Ok(()); 109 | } 110 | if (*p).is_null() { 111 | *p = Box::into_raw(Box::new(sketch.clone())); 112 | } else { 113 | let running_sketch = &mut **p; 114 | running_sketch.union(&sketch); 115 | } 116 | Ok(()) 117 | }) 118 | } 119 | 120 | #[no_mangle] 121 | pub unsafe extern "C" fn hyperminhash_intersection( 122 | ctx: *mut sqlite3_context, 123 | num_values: raw::c_int, 124 | values: *mut *mut sqlite3_value, 125 | ) { 126 | assert!(num_values == 2); // Declared as such in shim.c 127 | HMHError::set_ctx(ctx, || { 128 | let args = slice::from_raw_parts(values, num_values as usize); 129 | let sketch1 = Sketch::load(RawValue::new(args[0])?.as_blob()?)?; 130 | let sketch2 = Sketch::load(RawValue::new(args[1])?.as_blob()?)?; 131 | 132 | sqlite3_result_double(ctx, sketch1.intersection(&sketch2)); 133 | Ok(()) 134 | }); 135 | } 136 | -------------------------------------------------------------------------------- /src/shim.c: -------------------------------------------------------------------------------- 1 | #include 2 | SQLITE_EXTENSION_INIT1 3 | 4 | #include 5 | 6 | #ifdef SQLITE_DETERMINISTIC 7 | #define WEAK_DETERMINISTIC SQLITE_DETERMINISTIC 8 | #else 9 | #define WEAK_DETERMINISTIC 0 10 | #endif 11 | 12 | void hyperminhash_step(sqlite3_context*, int, sqlite3_value**); 13 | void hyperminhash_final(sqlite3_context*); 14 | 15 | // The following have error-throwing impls if `serialize`-feature is inactive 16 | void hyperminhash_zero(sqlite3_context*, int, sqlite3_value**); 17 | void hyperminhash_serialize_final(sqlite3_context*); 18 | void hyperminhash_deserialize(sqlite3_context*, int, sqlite3_value**); 19 | void hyperminhash_add(sqlite3_context*, int, sqlite3_value**); 20 | void hyperminhash_union_step(sqlite3_context*, int, sqlite3_value**); 21 | void hyperminhash_intersection(sqlite3_context*, int, sqlite3_value**); 22 | 23 | int init_shim( 24 | sqlite3 *db, 25 | char **pzErrMsg, 26 | const sqlite3_api_routines *pApi 27 | ){ 28 | SQLITE_EXTENSION_INIT2(pApi); 29 | int rc; 30 | 31 | if (sqlite3_libversion_number() < 3008007) { 32 | *pzErrMsg = sqlite3_mprintf("hyperminhash requires sqlite 3.8.7 or later"); 33 | return SQLITE_ERROR; 34 | } 35 | 36 | rc = sqlite3_create_function_v2( 37 | db, // db 38 | "hyperminhash", // zFunctionName 39 | -1, // nArg 40 | SQLITE_UTF8 | WEAK_DETERMINISTIC, // eTextRep 41 | NULL, // pApp 42 | NULL, // xFunc 43 | hyperminhash_step, // xStep 44 | hyperminhash_final, // xFinal 45 | NULL // xDestroy 46 | ); 47 | if (rc != SQLITE_OK) 48 | return rc; 49 | 50 | rc = sqlite3_create_function_v2( 51 | db, // db 52 | "hyperminhash_zero", // zFunctionName 53 | 0, // nArg 54 | SQLITE_UTF8 | WEAK_DETERMINISTIC, // eTextRep 55 | NULL, // pApp 56 | hyperminhash_zero, // xFunc 57 | NULL, // xStep 58 | NULL, // xFinal 59 | NULL // xDestroy 60 | ); 61 | if (rc != SQLITE_OK) 62 | return rc; 63 | 64 | rc = sqlite3_create_function_v2( 65 | db, // db 66 | "hyperminhash_add", // zFunctionName 67 | -1, // nArg 68 | SQLITE_UTF8 | WEAK_DETERMINISTIC, // eTextRep 69 | NULL, // pApp 70 | hyperminhash_add, // xFunc 71 | NULL, // xStep 72 | NULL, // xFinal 73 | NULL // xDestroy 74 | ); 75 | if (rc != SQLITE_OK) 76 | return rc; 77 | 78 | rc = sqlite3_create_function_v2( 79 | db, // db 80 | "hyperminhash_serialize", // zFunctionName 81 | -1, // nArg 82 | SQLITE_UTF8 | WEAK_DETERMINISTIC, // eTextRep 83 | NULL, // pApp 84 | NULL, // xFunc 85 | hyperminhash_step, // xStep 86 | hyperminhash_serialize_final, // xFinal 87 | NULL // xDestroy 88 | ); 89 | if (rc != SQLITE_OK) 90 | return rc; 91 | 92 | rc = sqlite3_create_function_v2( 93 | db, // db 94 | "hyperminhash_deserialize", // zFunctionName 95 | 1, // nArg 96 | SQLITE_UTF8 | WEAK_DETERMINISTIC, // eTextRep 97 | NULL, // pApp 98 | hyperminhash_deserialize, // xFunc 99 | NULL, // xStep 100 | NULL, // xFinal 101 | NULL // xDestroy 102 | ); 103 | if (rc != SQLITE_OK) 104 | return rc; 105 | 106 | rc = sqlite3_create_function_v2( 107 | db, // db 108 | "hyperminhash_union", // zFunctionName 109 | 1, // nArg 110 | SQLITE_UTF8 | WEAK_DETERMINISTIC, // eTextRep 111 | NULL, // pApp 112 | NULL, // xFunc 113 | hyperminhash_union_step, // xStep 114 | hyperminhash_serialize_final, // xFinal 115 | NULL // xDestroy 116 | ); 117 | if (rc != SQLITE_OK) 118 | return rc; 119 | 120 | return sqlite3_create_function_v2( 121 | db, // db 122 | "hyperminhash_intersection", // zFunctionName 123 | 2, // nArg 124 | SQLITE_UTF8 | WEAK_DETERMINISTIC, // eTextRep 125 | NULL, // pApp 126 | hyperminhash_intersection, // xFunc 127 | NULL, // xStep 128 | NULL, // xFinal 129 | NULL // xDestroy 130 | ); 131 | } 132 | -------------------------------------------------------------------------------- /tests/hmh.rs: -------------------------------------------------------------------------------- 1 | use rand::Rng; 2 | 3 | mod util; 4 | use util::init_db; 5 | 6 | fn hmh_id(con: &rusqlite::Connection) -> rusqlite::Result { 7 | con.query_row( 8 | "SELECT hyperminhash(id) FROM foo", 9 | rusqlite::params![], 10 | |row| row.get(0), 11 | ) 12 | } 13 | 14 | #[test] 15 | fn empty_table() -> rusqlite::Result<()> { 16 | let con = init_db()?; 17 | con.execute("CREATE TABLE foo (id INT)", rusqlite::params![])?; 18 | // Count is zero 19 | assert_eq!(hmh_id(&con)?, 0.0); 20 | con.execute("INSERT INTO foo (id) VALUES (0)", rusqlite::params![])?; 21 | // Count is not zero 22 | let r = hmh_id(&con)?; 23 | assert!(r > 0.8); 24 | assert!(r < 1.2); 25 | Ok(()) 26 | } 27 | 28 | #[test] 29 | fn simple_count_error() -> rusqlite::Result<()> { 30 | let con = init_db()?; 31 | con.execute("CREATE TABLE foo (id INT)", rusqlite::params![])?; 32 | let mut stmt = con.prepare("INSERT INTO foo (id) VALUES (?1)")?; 33 | for i in 0..1000 { 34 | stmt.execute([i % 97])?; 35 | } 36 | let r = hmh_id(&con)?; 37 | // Error should be small 38 | assert!((1.0 - (r / 97.0)).abs() < 0.05); 39 | Ok(()) 40 | } 41 | 42 | #[test] 43 | fn data_types() -> rusqlite::Result<()> { 44 | let con = init_db()?; 45 | con.execute( 46 | "CREATE TABLE bar (i INT, f FLOAT, s TEXT, b BLOB)", 47 | rusqlite::params![], 48 | )?; 49 | con.execute( 50 | "INSERT INTO bar (i, f, s, b) VALUES (?1, ?2, ?3, ?4)", 51 | rusqlite::params![1, 2.0, "3.0", &b"4.0"[..]], 52 | )?; 53 | // All primitive data types are counted 54 | let r: f64 = con.query_row( 55 | "SELECT hyperminhash(i, f, s, b) FROM bar", 56 | rusqlite::params![], 57 | |row| row.get(0), 58 | )?; 59 | assert!(r > 0.8); 60 | assert!(r < 1.2); 61 | Ok(()) 62 | } 63 | 64 | #[test] 65 | fn random_data() -> rusqlite::Result<()> { 66 | let mut rnd = rand::thread_rng(); 67 | let con = init_db()?; 68 | con.execute( 69 | "CREATE TABLE bar (i INT, f FLOAT, s TEXT, b BLOB)", 70 | rusqlite::params![], 71 | )?; 72 | let mut stmt = con.prepare("INSERT INTO bar (i, f, s, b) VALUES (?1, ?2, ?3, ?4)")?; 73 | for _ in 0..10_000 { 74 | let row: (i64, f64, [u8; 10]) = rnd.gen(); 75 | let s: String = (0..10) 76 | .map(|_| rnd.gen::()) 77 | .filter(|c| *c != '\x00') 78 | .collect(); 79 | stmt.execute(rusqlite::params![row.0, row.1, s, &row.2[..]])?; 80 | } 81 | con.execute( 82 | "INSERT INTO bar (i, f, s, b) SELECT * FROM bar", 83 | rusqlite::params![], 84 | )?; 85 | 86 | // Real count is 20000 87 | let r: i64 = con.query_row("SELECT COUNT(*) FROM bar", rusqlite::params![], |row| { 88 | row.get(0) 89 | })?; 90 | assert_eq!(r, 20_000); 91 | 92 | // Real distinct count is 10000 93 | let r: i64 = con.query_row( 94 | "SELECT COUNT(*) FROM (SELECT DISTINCT i, f, s, b FROM bar)", 95 | rusqlite::params![], 96 | |row| row.get(0), 97 | )?; 98 | assert_eq!(r, 10_000); 99 | 100 | // Approximate count has small error 101 | let r: f64 = con.query_row( 102 | "SELECT hyperminhash(i, f, s, b) FROM bar", 103 | rusqlite::params![], 104 | |row| row.get(0), 105 | )?; 106 | assert!((1.0 - (r / 10_000.0)).abs() < 0.05); 107 | Ok(()) 108 | } 109 | 110 | #[test] 111 | fn null_rows() -> rusqlite::Result<()> { 112 | let con = init_db()?; 113 | con.execute( 114 | "CREATE TABLE foobar (foo INT, bar INT)", 115 | rusqlite::params![], 116 | )?; 117 | let mut stmt = con.prepare("INSERT INTO foobar (foo, bar) VALUES (?1, ?2)")?; 118 | stmt.execute(rusqlite::params![Option::::None, Option::::None])?; 119 | stmt.execute(rusqlite::params![Option::::None, Option::::None])?; 120 | 121 | // Empty rows count as a single row 122 | let r: f64 = con.query_row( 123 | "SELECT hyperminhash(foo, bar) FROM foobar", 124 | rusqlite::params![], 125 | |row| row.get(0), 126 | )?; 127 | assert!(r > 0.8); 128 | assert!(r < 1.2); 129 | 130 | stmt.execute(rusqlite::params![&1, &2])?; 131 | let t: f64 = con.query_row( 132 | "SELECT hyperminhash(foo, bar) FROM foobar", 133 | rusqlite::params![], 134 | |row| row.get(0), 135 | )?; 136 | assert!(t > r); 137 | Ok(()) 138 | } 139 | 140 | #[test] 141 | fn null_data() -> rusqlite::Result<()> { 142 | let mut rnd = rand::thread_rng(); 143 | let con = init_db()?; 144 | con.execute( 145 | "CREATE TABLE foobar (foo INT, bar INT)", 146 | rusqlite::params![], 147 | )?; 148 | let mut stmt = con.prepare("INSERT INTO foobar (foo, bar) VALUES (?1, ?2)")?; 149 | for _ in 0..10_000 { 150 | let row: (Option, Option) = rnd.gen(); 151 | stmt.execute(rusqlite::params![row.0, row.1])?; 152 | } 153 | 154 | let real_count: f64 = con.query_row( 155 | "SELECT COUNT(*) FROM (SELECT DISTINCT foo, bar FROM foobar)", 156 | rusqlite::params![], 157 | |row| row.get(0), 158 | )?; 159 | // NULL-Values and rows are counted as DISTINCT does 160 | let r: f64 = con.query_row( 161 | "SELECT hyperminhash(foo, bar) FROM foobar", 162 | rusqlite::params![], 163 | |row| row.get(0), 164 | )?; 165 | assert!((1.0 - (r / real_count)).abs() < 0.05); 166 | Ok(()) 167 | } 168 | -------------------------------------------------------------------------------- /tests/serialize.rs: -------------------------------------------------------------------------------- 1 | mod util; 2 | use util::init_db; 3 | 4 | fn expect_error_msg( 5 | r: rusqlite::Result, 6 | needle: &'static str, 7 | err_msg: &'static str, 8 | ) -> rusqlite::Result<()> { 9 | match r { 10 | Err(rusqlite::Error::SqliteFailure(_, Some(ref s))) if s.contains(needle) => Ok(()), 11 | other => { 12 | panic!("{} {:?}", err_msg, other); 13 | } 14 | } 15 | } 16 | 17 | #[cfg(feature = "serialize")] 18 | pub mod serialize { 19 | use super::*; 20 | use hyperminhash::Sketch; 21 | 22 | macro_rules! test_wrong_type { 23 | ($name:ident, $func:literal) => { 24 | #[test] 25 | fn $name() -> rusqlite::Result<()> { 26 | let con = init_db()?; 27 | let r: rusqlite::Result = 28 | con.query_row(&format!("SELECT {}", $func), rusqlite::params![], |row| { 29 | row.get(0) 30 | }); 31 | expect_error_msg(r, "not of type BLOB", "did not complain about type:") 32 | } 33 | }; 34 | } 35 | 36 | macro_rules! test_bad_data { 37 | ($name:ident, $func:literal) => { 38 | #[test] 39 | fn $name() -> rusqlite::Result<()> { 40 | let con = init_db()?; 41 | let r: rusqlite::Result = 42 | con.query_row(&format!("SELECT {}", $func), rusqlite::params![], |row| { 43 | row.get(0) 44 | }); 45 | expect_error_msg( 46 | r, 47 | "IO-error in hyperminhash", 48 | "unpacked bad data without error", 49 | ) 50 | } 51 | }; 52 | } 53 | 54 | #[test] 55 | fn zero() -> rusqlite::Result<()> { 56 | let con = init_db()?; 57 | // Count is zero 58 | let buf: Vec = 59 | con.query_row("SELECT HYPERMINHASH_ZERO()", rusqlite::params![], |row| { 60 | row.get(0) 61 | })?; 62 | let sketch = Sketch::load(&buf[..]).unwrap(); 63 | assert_eq!(sketch.cardinality(), 0.0); 64 | Ok(()) 65 | } 66 | 67 | #[test] 68 | fn serialize() -> rusqlite::Result<()> { 69 | let con = init_db()?; 70 | con.execute("CREATE TABLE foo (id INT)", rusqlite::params![])?; 71 | 72 | // Count is zero 73 | let buf: Vec = con.query_row( 74 | "SELECT HYPERMINHASH_SERIALIZE(id) FROM foo", 75 | rusqlite::params![], 76 | |row| row.get(0), 77 | )?; 78 | let sketch = Sketch::load(&buf[..]).unwrap(); 79 | assert_eq!(sketch.cardinality(), 0.0); 80 | 81 | // Count is not zero 82 | con.execute("INSERT INTO foo (id) VALUES (0)", rusqlite::params![])?; 83 | let buf: Vec = con.query_row( 84 | "SELECT HYPERMINHASH_SERIALIZE(id) FROM foo", 85 | rusqlite::params![], 86 | |row| row.get(0), 87 | )?; 88 | let r = Sketch::load(&buf[..]).unwrap().cardinality(); 89 | assert!((1.0 - r).abs() < 0.05); 90 | 91 | Ok(()) 92 | } 93 | 94 | #[test] 95 | fn deserialize() -> rusqlite::Result<()> { 96 | let sketch: Sketch = (0..100).collect(); 97 | let mut buf: Vec = Vec::new(); 98 | sketch.save(&mut buf).unwrap(); 99 | 100 | let con = init_db()?; 101 | con.execute("CREATE TABLE counts (data BLOB)", rusqlite::params![])?; 102 | con.execute( 103 | "INSERT INTO counts (data) VALUES (?1)", 104 | rusqlite::params![&buf], 105 | )?; 106 | 107 | let r: f64 = con.query_row( 108 | "SELECT HYPERMINHASH_DESERIALIZE(data) FROM counts", 109 | rusqlite::params![], 110 | |row| row.get(0), 111 | )?; 112 | assert_eq!(r, sketch.cardinality()); 113 | 114 | Ok(()) 115 | } 116 | 117 | test_wrong_type!(deserialize_wrong_type, "HYPERMINHASH_DESERIALIZE('foo')"); 118 | test_bad_data!(deserialize_bad_data, "HYPERMINHASH_DESERIALIZE(X'00')"); 119 | 120 | #[test] 121 | fn add() -> rusqlite::Result<()> { 122 | let con = init_db()?; 123 | con.execute( 124 | "CREATE TABLE counts (id INT PRIMARY KEY, data BLOB)", 125 | rusqlite::params![], 126 | )?; 127 | con.execute( 128 | "INSERT INTO counts VALUES (0, HYPERMINHASH_ZERO())", 129 | rusqlite::params![], 130 | )?; 131 | 132 | con.execute("CREATE TABLE foo (id INT)", rusqlite::params![])?; 133 | let mut stmt = con.prepare("INSERT INTO foo (id) VALUES (?1)")?; 134 | for i in 0..200 { 135 | stmt.execute([i])?; 136 | } 137 | con.execute( 138 | r#"UPDATE counts 139 | SET data = HYPERMINHASH_ADD(data, 140 | (SELECT HYPERMINHASH_SERIALIZE(id) 141 | FROM foo 142 | WHERE id < 100) 143 | ) 144 | WHERE counts.id = 0"#, 145 | rusqlite::params![], 146 | )?; 147 | let r: f64 = con.query_row( 148 | "SELECT HYPERMINHASH_DESERIALIZE(data) FROM counts WHERE id = 0", 149 | rusqlite::params![], 150 | |row| row.get(0), 151 | )?; 152 | assert!((1.0 - (r / 100.0)).abs() < 0.05); 153 | 154 | con.execute( 155 | r#"UPDATE counts 156 | SET data = HYPERMINHASH_ADD(data, 157 | (SELECT HYPERMINHASH_SERIALIZE(id) 158 | FROM foo 159 | WHERE id >= 100) 160 | ) 161 | WHERE counts.id = 0"#, 162 | rusqlite::params![], 163 | )?; 164 | let r: f64 = con.query_row( 165 | "SELECT HYPERMINHASH_DESERIALIZE(data) FROM counts WHERE id = 0", 166 | rusqlite::params![], 167 | |row| row.get(0), 168 | )?; 169 | assert!((1.0 - (r / 200.0)).abs() < 0.05); 170 | 171 | Ok(()) 172 | } 173 | 174 | test_wrong_type!(add_wrong_type, "HYPERMINHASH_ADD('foo')"); 175 | test_bad_data!(add_bad_data, "HYPERMINHASH_ADD(X'00')"); 176 | 177 | #[test] 178 | fn union() -> rusqlite::Result<()> { 179 | let con = init_db()?; 180 | con.execute("CREATE TABLE foo (id INT)", rusqlite::params![])?; 181 | let mut stmt = con.prepare("INSERT INTO foo (id) VALUES (?1)")?; 182 | for i in 0..100 { 183 | stmt.execute([i])?; 184 | } 185 | con.execute( 186 | "CREATE TABLE stats (id INT PRIMARY KEY, data BLOB)", 187 | rusqlite::params![], 188 | )?; 189 | con.execute( 190 | r#"INSERT INTO stats (id, data) 191 | SELECT 0, HYPERMINHASH_SERIALIZE(foo.id) 192 | FROM foo 193 | WHERE foo.id <= 50"#, 194 | rusqlite::params![], 195 | )?; 196 | con.execute( 197 | r#"INSERT INTO stats (id, data) 198 | SELECT 1, HYPERMINHASH_SERIALIZE(foo.id) 199 | FROM foo 200 | WHERE foo.id > 50"#, 201 | rusqlite::params![], 202 | )?; 203 | let r: f64 = con.query_row( 204 | r#"SELECT HYPERMINHASH_DESERIALIZE( 205 | (SELECT HYPERMINHASH_UNION(data) 206 | FROM stats 207 | ) 208 | )"#, 209 | rusqlite::params![], 210 | |row| row.get(0), 211 | )?; 212 | assert!((1.0 - (r / 100.0)).abs() < 0.05); 213 | Ok(()) 214 | } 215 | 216 | test_wrong_type!(union_wrong_type, "HYPERMINHASH_UNION('foo')"); 217 | test_bad_data!(union_bad_data, "HYPERMINHASH_UNION(X'00')"); 218 | 219 | #[test] 220 | fn intersection() -> rusqlite::Result<()> { 221 | let con = init_db()?; 222 | con.execute("CREATE TABLE foo (id INT)", rusqlite::params![])?; 223 | let mut stmt = con.prepare("INSERT INTO foo (id) VALUES (?1)")?; 224 | for i in 0..1000 { 225 | stmt.execute([i])?; 226 | } 227 | let r: f64 = con.query_row( 228 | "SELECT HYPERMINHASH_INTERSECTION( 229 | (SELECT HYPERMINHASH_SERIALIZE(id) FROM foo WHERE id < 750), 230 | (SELECT HYPERMINHASH_SERIALIZE(id) FROM foo WHERE id >= 250) 231 | )", 232 | rusqlite::params![], 233 | |row| row.get(0), 234 | )?; 235 | assert!((1.0 - (r / 500.0)).abs() < 0.05); 236 | Ok(()) 237 | } 238 | 239 | test_wrong_type!( 240 | intersection_wrong_type, 241 | "HYPERMINHASH_INTERSECTION('foo', 'bar')" 242 | ); 243 | test_bad_data!( 244 | intersection_bad_data, 245 | "HYPERMINHASH_INTERSECTION(X'00', X'00')" 246 | ); 247 | } 248 | 249 | #[cfg(not(feature = "serialize"))] 250 | pub mod serialize_stub { 251 | use super::*; 252 | 253 | macro_rules! no_such_func { 254 | ($name:ident, $func:literal) => { 255 | #[test] 256 | fn $name() -> rusqlite::Result<()> { 257 | let con = init_db()?; 258 | let r: rusqlite::Result = 259 | con.query_row(&format!("SELECT {}", $func), rusqlite::params![], |row| { 260 | row.get(0) 261 | }); 262 | expect_error_msg(r, "`serialize`-feature", "error not reported: ") 263 | } 264 | }; 265 | } 266 | 267 | no_such_func!(zero_returns_error, "hyperminhash_zero()"); 268 | no_such_func!(serialize_returns_error, "hyperminhash_serialize()"); 269 | no_such_func!(deserialize_returns_error, "hyperminhash_deserialize(X'00')"); 270 | no_such_func!(add_returns_error, "hyperminhash_add()"); 271 | no_such_func!(union_returns_error, "hyperminhash_union(X'00')"); 272 | no_such_func!( 273 | intersection_returns_error, 274 | "hyperminhash_intersection(X'00', X'00')" 275 | ); 276 | } 277 | -------------------------------------------------------------------------------- /tests/util.rs: -------------------------------------------------------------------------------- 1 | static AUTOLOAD: std::sync::Once = std::sync::Once::new(); 2 | 3 | pub fn init_db() -> rusqlite::Result { 4 | AUTOLOAD.call_once(|| { 5 | // https://sqlite.org/c3ref/auto_extension.html 6 | let ptr = sqlite3_hyperminhash::sqlite3_sqlitehyperminhash_init 7 | as unsafe extern "C" fn( 8 | *mut std::ffi::c_void, 9 | *const std::ffi::c_void, 10 | *const std::ffi::c_void, 11 | ) -> i32; 12 | let rc = unsafe { 13 | sqlite3_hyperminhash::testutil::sqlite3_auto_extension(Some(std::mem::transmute(ptr))) 14 | }; 15 | if rc as u32 != sqlite3_hyperminhash::testutil::SQLITE_OK { 16 | let err = unsafe { 17 | std::ffi::CStr::from_ptr(sqlite3_hyperminhash::testutil::sqlite3_errstr(rc)) 18 | .to_str() 19 | } 20 | .unwrap_or("sqlite3_auto_extension failed"); 21 | panic!("{}", err); 22 | } 23 | }); 24 | rusqlite::Connection::open_in_memory() 25 | } 26 | -------------------------------------------------------------------------------- /wrapper.h: -------------------------------------------------------------------------------- 1 | #include 2 | --------------------------------------------------------------------------------