├── .gitignore ├── GNUmakefile ├── README └── sqlite-hexhammdist.c /.gitignore: -------------------------------------------------------------------------------- 1 | *.so 2 | -------------------------------------------------------------------------------- /GNUmakefile: -------------------------------------------------------------------------------- 1 | UNAME_S:= $(shell uname -s) 2 | 3 | ifeq ($(UNAME_S),Darwin) 4 | SUFFIX:= dylib 5 | CFLAGS+= -arch i386 -arch x86_64 6 | else 7 | SUFFIX:= so 8 | endif 9 | 10 | CFLAGS+= -fPIC 11 | LIBS+= -lsqlite3 12 | 13 | all: sqlite-hexhammdist.$(SUFFIX) 14 | 15 | sqlite-hexhammdist.$(SUFFIX): sqlite-hexhammdist.c GNUmakefile 16 | $(CC) -shared $(CFLAGS) -o $@ $< $(LIBS) 17 | 18 | clean: 19 | rm -f sqlite-hexhammdist.$(SUFFIX) 20 | 21 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | sqlite-hexhammdist - Hamming distance between hex strings in SQLite 2 | Copyright (C) 2016, Daniel Roethlisberger 3 | 4 | 5 | Synopsis 6 | -------- 7 | This SQLite extension adds a function hexhammdist() to SQLite. hexhammdist() 8 | takes two hex strings as arguments and returns the Hamming distance as integer, 9 | that is, the number of bits the two hex strings differ in. The return value is 10 | undefined if the two strings are not the same length or if the strings contain 11 | anything other than hexadecimal characters (0-9a-fA-F). 12 | 13 | One application for calculating the Hamming distance directly in the database 14 | is perceptual hashing. While the hashing can be done in the calling scripting 15 | language, the actual Hamming calculations need to be done in the database in 16 | order to perform sufficiently well to scale to a large number of pictures. 17 | 18 | 19 | Usage 20 | ----- 21 | To load the extension into an open SQLite database, run the following queries: 22 | 23 | SELECT load_extension('/path/to/sqlite-hexhammdist.so', 'hexhammdist_init'); 24 | 25 | You can then use the function in SQL queries, for example to find photos which 26 | have a perceptual hash that is close in Hamming distance: 27 | 28 | SELECT photo_id, hexhammdist(photo_hash, ?) AS hd FROM photos WHERE hd <= 9; 29 | 30 | When loading extensions from python, the python binary needs to be built with 31 | the --enable-loadable-sqlite-extensions configure argument. You then need to 32 | enable the loading of extensions using: 33 | 34 | dbconn.enable_load_extension(True) 35 | 36 | After that, you can issue the above SELECT statement to load the extension. 37 | 38 | 39 | -------------------------------------------------------------------------------- /sqlite-hexhammdist.c: -------------------------------------------------------------------------------- 1 | /* 2 | * hexhammdist - hamming distance between same-length hex strings in SQLite 3 | * Copyright (c) 2016, Daniel Roethlisberger 4 | * All rights reserved. 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions 8 | * are met: 9 | * 1. Redistributions of source code must retain the above copyright 10 | * notice, this list of conditions, and the following disclaimer. 11 | * 2. Redistributions in binary form must reproduce the above copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | #include 27 | #include 28 | SQLITE_EXTENSION_INIT1 29 | #ifdef _WIN32 30 | __declspec(dllexport) 31 | #endif 32 | 33 | #define NAME hexhammdist 34 | 35 | #define STRNX(x) #x 36 | #define STR(x) STRNX(x) 37 | #define CATNX(a, b) a ## b 38 | #define CAT(a, b) CATNX(a, b) 39 | 40 | #ifndef SQLITE_SKIP_UTF8 41 | #define SQLITE_SKIP_UTF8(x) \ 42 | if ((*((x)++)) >= 0xc0) { \ 43 | while ((*(x) & 0xc0) == 0x80){ (x)++; } \ 44 | } 45 | #endif 46 | 47 | #define BST2(n) n, n+1, n+1, n+2 48 | #define BST4(n) BST2(n), BST2(n+1), BST2(n+1), BST2(n+2) 49 | #define BST6(n) BST4(n), BST4(n+1), BST4(n+1), BST4(n+2) 50 | static const unsigned char bitssettab[256] = { 51 | BST6(0), BST6(1), BST6(1), BST6(2) 52 | }; 53 | #undef BST2 54 | #undef BST4 55 | #undef BST6 56 | 57 | static const unsigned char unhextab[256] = { 58 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 59 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 60 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 61 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 62 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 63 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 64 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 65 | 0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 66 | 0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 67 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 68 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 69 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 70 | 0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 71 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 72 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 73 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 74 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 75 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 76 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 77 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 78 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 79 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 80 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 81 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 82 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 83 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 84 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 85 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 86 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 87 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 88 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 89 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 90 | }; 91 | 92 | static void 93 | CAT(NAME, _func)(sqlite3_context *ctx, int args, sqlite3_value **argv) 94 | { 95 | const unsigned char *a, *b; 96 | int res = 0; 97 | 98 | if (args != 2) { 99 | sqlite3_result_error(ctx, STR(NAME)": need 2 args", -1); 100 | return; 101 | } 102 | 103 | a = sqlite3_value_text(argv[0]); 104 | b = sqlite3_value_text(argv[1]); 105 | if (!a) { 106 | sqlite3_result_error(ctx, STR(NAME)": arg 1 has no text value", 107 | -1); 108 | return; 109 | } 110 | if (!b) { 111 | sqlite3_result_error(ctx, STR(NAME)": arg 2 has no text value", 112 | -1); 113 | return; 114 | } 115 | while (*a && *b) { 116 | res += bitssettab[unhextab[*a] ^ unhextab[*b]]; 117 | SQLITE_SKIP_UTF8(a); 118 | SQLITE_SKIP_UTF8(b); 119 | } 120 | if (*a || *b) { 121 | sqlite3_result_error(ctx, STR(NAME)": args not same length", 122 | -1); 123 | return; 124 | } 125 | sqlite3_result_int(ctx, res); 126 | } 127 | 128 | int 129 | CAT(NAME, _init)(sqlite3 *db, char **errmsg, const sqlite3_api_routines *api) 130 | { 131 | SQLITE_EXTENSION_INIT2(api); 132 | 133 | return sqlite3_create_function_v2(db, STR(NAME), 2, SQLITE_UTF8, NULL, 134 | CAT(NAME, _func), NULL, NULL, NULL); 135 | } 136 | 137 | --------------------------------------------------------------------------------