├── .github └── workflows │ ├── release.yaml │ └── test.yaml ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── Makefile ├── README.md ├── VERSION ├── build.rs ├── examples └── simple-search │ └── demo.sql ├── scripts └── publish-release.sh ├── sqlite-dist.toml ├── sqlite-rembed.h ├── src ├── clients.rs ├── clients_vtab.rs └── lib.rs └── test.sql /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: "Release" 2 | on: 3 | release: 4 | types: [published] 5 | permissions: 6 | contents: read 7 | jobs: 8 | build-linux-x86_64-extension: 9 | runs-on: ubuntu-20.04 10 | steps: 11 | - uses: actions/checkout@v4 12 | - run: make loadable-release 13 | - uses: actions/upload-artifact@v4 14 | with: 15 | name: sqlite-rembed-linux-x86_64-extension 16 | path: dist/release/* 17 | build-macos-x86_64-extension: 18 | runs-on: macos-12 19 | steps: 20 | - uses: actions/checkout@v4 21 | - run: make loadable-release 22 | - uses: actions/upload-artifact@v4 23 | with: 24 | name: sqlite-rembed-macos-x86_64-extension 25 | path: dist/release/* 26 | build-macos-aarch64-extension: 27 | runs-on: macos-14 28 | steps: 29 | - uses: actions/checkout@v4 30 | - run: make loadable-release 31 | - uses: actions/upload-artifact@v4 32 | with: 33 | name: sqlite-rembed-macos-aarch64-extension 34 | path: dist/release/* 35 | build-windows-x86_64-extension: 36 | runs-on: windows-2019 37 | steps: 38 | - uses: actions/checkout@v4 39 | - uses: actions-rs/toolchain@v1 40 | with: 41 | toolchain: stable 42 | - run: make loadable-release 43 | - uses: actions/upload-artifact@v4 44 | with: 45 | name: sqlite-rembed-windows-x86_64-extension 46 | path: dist/release/* 47 | dist: 48 | runs-on: ubuntu-latest 49 | needs: 50 | [ 51 | build-linux-x86_64-extension, 52 | build-macos-x86_64-extension, 53 | build-macos-aarch64-extension, 54 | build-windows-x86_64-extension, 55 | ] 56 | permissions: 57 | contents: write 58 | steps: 59 | - uses: actions/checkout@v4 60 | - uses: actions/download-artifact@v4 61 | with: 62 | name: sqlite-rembed-linux-x86_64-extension 63 | path: dist/linux-x86_64 64 | - uses: actions/download-artifact@v4 65 | with: 66 | name: sqlite-rembed-macos-x86_64-extension 67 | path: dist/macos-x86_64 68 | - uses: actions/download-artifact@v4 69 | with: 70 | name: sqlite-rembed-macos-aarch64-extension 71 | path: dist/macos-aarch64 72 | - uses: actions/download-artifact@v4 73 | with: 74 | name: sqlite-rembed-windows-x86_64-extension 75 | path: dist/windows-x86_64 76 | - run: | 77 | curl -L https://github.com/asg017/sqlite-dist/releases/download/v0.0.1-alpha.7/sqlite-dist-x86_64-unknown-linux-gnu.tar.xz \ 78 | | tar xfJ - --strip-components 1 79 | - run: make sqlite-rembed.h 80 | - run: ./sqlite-dist ./sqlite-dist.toml --input dist/ --output distx/ --version $(cat VERSION) 81 | - run: | 82 | gh release upload ${{ github.ref_name }} \ 83 | distx/github_releases/* \ 84 | distx/spm/* \ 85 | distx/sqlpkg/* \ 86 | distx/checksums.txt \ 87 | distx/sqlite-dist-manifest.json \ 88 | distx/install.sh 89 | env: 90 | GH_TOKEN: ${{ github.token }} 91 | - name: Install node 92 | uses: actions/setup-node@v3 93 | with: 94 | node-version: "16" 95 | registry-url: "https://registry.npmjs.org" 96 | - run: | 97 | npm publish --access public distx/npm/sqlite-rembed-darwin-arm64.tar.gz 98 | npm publish --access public distx/npm/sqlite-rembed-darwin-x64.tar.gz 99 | npm publish --access public distx/npm/sqlite-rembed-linux-x64.tar.gz 100 | npm publish --access public distx/npm/sqlite-rembed.tar.gz 101 | env: 102 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} 103 | - uses: ruby/setup-ruby@v1 104 | with: 105 | ruby-version: 3.2 106 | - run: | 107 | for file in distx/gem/*; do 108 | gem push "$file" 109 | done 110 | env: 111 | GEM_HOST_API_KEY: ${{ secrets.GEM_HOST_API_KEY }} 112 | - uses: actions/setup-python@v5 113 | with: 114 | python-version: "3.12" 115 | - run: pip install twine 116 | - run: | 117 | twine upload distx/pip/* 118 | twine upload distx/datasette/* 119 | twine upload distx/sqlite_utils/* 120 | env: 121 | TWINE_USERNAME: __token__ 122 | TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} 123 | -------------------------------------------------------------------------------- /.github/workflows/test.yaml: -------------------------------------------------------------------------------- 1 | name: "Test" 2 | on: 3 | push: 4 | branches: 5 | - main 6 | permissions: 7 | contents: read 8 | jobs: 9 | build-linux-x86_64-extension: 10 | runs-on: ubuntu-20.04 11 | steps: 12 | - uses: actions/checkout@v4 13 | - uses: actions-rs/toolchain@v1 14 | with: 15 | toolchain: stable 16 | - run: make loadable static 17 | #- run: pip install pytest numpy; make test-loadable 18 | - uses: actions/upload-artifact@v4 19 | with: 20 | name: sqlite-rembed-linux-x86_64-extension 21 | path: dist/* 22 | build-macos-x86_64-extension: 23 | runs-on: macos-12 24 | steps: 25 | - uses: actions/checkout@v4 26 | - uses: actions-rs/toolchain@v1 27 | with: 28 | toolchain: stable 29 | - run: make loadable static 30 | #- run: /usr/local/opt/python@3/libexec/bin/python -m pip install pytest numpy; make test-loadable python=/usr/local/opt/python@3/libexec/bin/python 31 | - uses: actions/upload-artifact@v4 32 | with: 33 | name: sqlite-rembed-macos-x86_64-extension 34 | path: dist/* 35 | build-macos-aarch64-extension: 36 | runs-on: macos-14 37 | steps: 38 | - uses: actions/checkout@v4 39 | - uses: actions-rs/toolchain@v1 40 | with: 41 | toolchain: stable 42 | - run: make loadable static 43 | #- run: /opt/homebrew/opt/python3/libexec/bin/python -m pip install pytest numpy --break-system-packages; make test-loadable python=/opt/homebrew/opt/python3/libexec/bin/python 44 | - uses: actions/upload-artifact@v4 45 | with: 46 | name: sqlite-rembed-macos-aarch64-extension 47 | path: dist/* 48 | build-windows-x86_64-extension: 49 | runs-on: windows-2019 50 | steps: 51 | - uses: actions/checkout@v4 52 | - uses: actions-rs/toolchain@v1 53 | with: 54 | toolchain: stable 55 | - run: make loadable static 56 | #- run: pip install pytest numpy; make test-loadable 57 | - uses: actions/upload-artifact@v4 58 | with: 59 | name: sqlite-rembed-windows-x86_64-extension 60 | path: dist/* 61 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | .env 3 | dist/ 4 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "adler" 7 | version = "1.0.2" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" 10 | 11 | [[package]] 12 | name = "aho-corasick" 13 | version = "1.1.3" 14 | source = "registry+https://github.com/rust-lang/crates.io-index" 15 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" 16 | dependencies = [ 17 | "memchr", 18 | ] 19 | 20 | [[package]] 21 | name = "atty" 22 | version = "0.2.14" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" 25 | dependencies = [ 26 | "hermit-abi", 27 | "libc", 28 | "winapi", 29 | ] 30 | 31 | [[package]] 32 | name = "autocfg" 33 | version = "1.3.0" 34 | source = "registry+https://github.com/rust-lang/crates.io-index" 35 | checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" 36 | 37 | [[package]] 38 | name = "base64" 39 | version = "0.22.1" 40 | source = "registry+https://github.com/rust-lang/crates.io-index" 41 | checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" 42 | 43 | [[package]] 44 | name = "bindgen" 45 | version = "0.60.1" 46 | source = "registry+https://github.com/rust-lang/crates.io-index" 47 | checksum = "062dddbc1ba4aca46de6338e2bf87771414c335f7b2f2036e8f3e9befebf88e6" 48 | dependencies = [ 49 | "bitflags 1.3.2", 50 | "cexpr", 51 | "clang-sys", 52 | "clap", 53 | "env_logger", 54 | "lazy_static", 55 | "lazycell", 56 | "log", 57 | "peeking_take_while", 58 | "proc-macro2", 59 | "quote", 60 | "regex", 61 | "rustc-hash", 62 | "shlex", 63 | "which", 64 | ] 65 | 66 | [[package]] 67 | name = "bitflags" 68 | version = "1.3.2" 69 | source = "registry+https://github.com/rust-lang/crates.io-index" 70 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 71 | 72 | [[package]] 73 | name = "bitflags" 74 | version = "2.5.0" 75 | source = "registry+https://github.com/rust-lang/crates.io-index" 76 | checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" 77 | 78 | [[package]] 79 | name = "byteorder" 80 | version = "1.5.0" 81 | source = "registry+https://github.com/rust-lang/crates.io-index" 82 | checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" 83 | 84 | [[package]] 85 | name = "cc" 86 | version = "1.0.98" 87 | source = "registry+https://github.com/rust-lang/crates.io-index" 88 | checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" 89 | 90 | [[package]] 91 | name = "cexpr" 92 | version = "0.6.0" 93 | source = "registry+https://github.com/rust-lang/crates.io-index" 94 | checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" 95 | dependencies = [ 96 | "nom", 97 | ] 98 | 99 | [[package]] 100 | name = "cfg-if" 101 | version = "1.0.0" 102 | source = "registry+https://github.com/rust-lang/crates.io-index" 103 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 104 | 105 | [[package]] 106 | name = "clang-sys" 107 | version = "1.8.2" 108 | source = "registry+https://github.com/rust-lang/crates.io-index" 109 | checksum = "f803f94ecf597339c7a34eed2036ef83f86aaba937f001f7c5b5e251f043f1f9" 110 | dependencies = [ 111 | "glob", 112 | "libc", 113 | "libloading", 114 | ] 115 | 116 | [[package]] 117 | name = "clap" 118 | version = "3.2.25" 119 | source = "registry+https://github.com/rust-lang/crates.io-index" 120 | checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123" 121 | dependencies = [ 122 | "atty", 123 | "bitflags 1.3.2", 124 | "clap_lex", 125 | "indexmap", 126 | "strsim", 127 | "termcolor", 128 | "textwrap", 129 | ] 130 | 131 | [[package]] 132 | name = "clap_lex" 133 | version = "0.2.4" 134 | source = "registry+https://github.com/rust-lang/crates.io-index" 135 | checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" 136 | dependencies = [ 137 | "os_str_bytes", 138 | ] 139 | 140 | [[package]] 141 | name = "crc32fast" 142 | version = "1.4.2" 143 | source = "registry+https://github.com/rust-lang/crates.io-index" 144 | checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" 145 | dependencies = [ 146 | "cfg-if", 147 | ] 148 | 149 | [[package]] 150 | name = "either" 151 | version = "1.12.0" 152 | source = "registry+https://github.com/rust-lang/crates.io-index" 153 | checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" 154 | 155 | [[package]] 156 | name = "env_logger" 157 | version = "0.9.3" 158 | source = "registry+https://github.com/rust-lang/crates.io-index" 159 | checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7" 160 | dependencies = [ 161 | "atty", 162 | "humantime", 163 | "log", 164 | "regex", 165 | "termcolor", 166 | ] 167 | 168 | [[package]] 169 | name = "errno" 170 | version = "0.3.9" 171 | source = "registry+https://github.com/rust-lang/crates.io-index" 172 | checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" 173 | dependencies = [ 174 | "libc", 175 | "windows-sys", 176 | ] 177 | 178 | [[package]] 179 | name = "flate2" 180 | version = "1.0.30" 181 | source = "registry+https://github.com/rust-lang/crates.io-index" 182 | checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" 183 | dependencies = [ 184 | "crc32fast", 185 | "miniz_oxide", 186 | ] 187 | 188 | [[package]] 189 | name = "form_urlencoded" 190 | version = "1.2.1" 191 | source = "registry+https://github.com/rust-lang/crates.io-index" 192 | checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" 193 | dependencies = [ 194 | "percent-encoding", 195 | ] 196 | 197 | [[package]] 198 | name = "getrandom" 199 | version = "0.2.15" 200 | source = "registry+https://github.com/rust-lang/crates.io-index" 201 | checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" 202 | dependencies = [ 203 | "cfg-if", 204 | "libc", 205 | "wasi", 206 | ] 207 | 208 | [[package]] 209 | name = "glob" 210 | version = "0.3.1" 211 | source = "registry+https://github.com/rust-lang/crates.io-index" 212 | checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" 213 | 214 | [[package]] 215 | name = "hashbrown" 216 | version = "0.12.3" 217 | source = "registry+https://github.com/rust-lang/crates.io-index" 218 | checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" 219 | 220 | [[package]] 221 | name = "hermit-abi" 222 | version = "0.1.19" 223 | source = "registry+https://github.com/rust-lang/crates.io-index" 224 | checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" 225 | dependencies = [ 226 | "libc", 227 | ] 228 | 229 | [[package]] 230 | name = "home" 231 | version = "0.5.9" 232 | source = "registry+https://github.com/rust-lang/crates.io-index" 233 | checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" 234 | dependencies = [ 235 | "windows-sys", 236 | ] 237 | 238 | [[package]] 239 | name = "humantime" 240 | version = "2.1.0" 241 | source = "registry+https://github.com/rust-lang/crates.io-index" 242 | checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" 243 | 244 | [[package]] 245 | name = "idna" 246 | version = "0.5.0" 247 | source = "registry+https://github.com/rust-lang/crates.io-index" 248 | checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" 249 | dependencies = [ 250 | "unicode-bidi", 251 | "unicode-normalization", 252 | ] 253 | 254 | [[package]] 255 | name = "indexmap" 256 | version = "1.9.3" 257 | source = "registry+https://github.com/rust-lang/crates.io-index" 258 | checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" 259 | dependencies = [ 260 | "autocfg", 261 | "hashbrown", 262 | ] 263 | 264 | [[package]] 265 | name = "itoa" 266 | version = "1.0.11" 267 | source = "registry+https://github.com/rust-lang/crates.io-index" 268 | checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" 269 | 270 | [[package]] 271 | name = "lazy_static" 272 | version = "1.4.0" 273 | source = "registry+https://github.com/rust-lang/crates.io-index" 274 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 275 | 276 | [[package]] 277 | name = "lazycell" 278 | version = "1.3.0" 279 | source = "registry+https://github.com/rust-lang/crates.io-index" 280 | checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" 281 | 282 | [[package]] 283 | name = "libc" 284 | version = "0.2.155" 285 | source = "registry+https://github.com/rust-lang/crates.io-index" 286 | checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" 287 | 288 | [[package]] 289 | name = "libloading" 290 | version = "0.8.3" 291 | source = "registry+https://github.com/rust-lang/crates.io-index" 292 | checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" 293 | dependencies = [ 294 | "cfg-if", 295 | "windows-targets", 296 | ] 297 | 298 | [[package]] 299 | name = "linux-raw-sys" 300 | version = "0.4.14" 301 | source = "registry+https://github.com/rust-lang/crates.io-index" 302 | checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" 303 | 304 | [[package]] 305 | name = "log" 306 | version = "0.4.21" 307 | source = "registry+https://github.com/rust-lang/crates.io-index" 308 | checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" 309 | 310 | [[package]] 311 | name = "memchr" 312 | version = "2.7.2" 313 | source = "registry+https://github.com/rust-lang/crates.io-index" 314 | checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" 315 | 316 | [[package]] 317 | name = "minimal-lexical" 318 | version = "0.2.1" 319 | source = "registry+https://github.com/rust-lang/crates.io-index" 320 | checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" 321 | 322 | [[package]] 323 | name = "miniz_oxide" 324 | version = "0.7.3" 325 | source = "registry+https://github.com/rust-lang/crates.io-index" 326 | checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae" 327 | dependencies = [ 328 | "adler", 329 | ] 330 | 331 | [[package]] 332 | name = "nom" 333 | version = "7.1.3" 334 | source = "registry+https://github.com/rust-lang/crates.io-index" 335 | checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" 336 | dependencies = [ 337 | "memchr", 338 | "minimal-lexical", 339 | ] 340 | 341 | [[package]] 342 | name = "once_cell" 343 | version = "1.19.0" 344 | source = "registry+https://github.com/rust-lang/crates.io-index" 345 | checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" 346 | 347 | [[package]] 348 | name = "os_str_bytes" 349 | version = "6.6.1" 350 | source = "registry+https://github.com/rust-lang/crates.io-index" 351 | checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1" 352 | 353 | [[package]] 354 | name = "peeking_take_while" 355 | version = "0.1.2" 356 | source = "registry+https://github.com/rust-lang/crates.io-index" 357 | checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" 358 | 359 | [[package]] 360 | name = "percent-encoding" 361 | version = "2.3.1" 362 | source = "registry+https://github.com/rust-lang/crates.io-index" 363 | checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" 364 | 365 | [[package]] 366 | name = "proc-macro2" 367 | version = "1.0.84" 368 | source = "registry+https://github.com/rust-lang/crates.io-index" 369 | checksum = "ec96c6a92621310b51366f1e28d05ef11489516e93be030060e5fc12024a49d6" 370 | dependencies = [ 371 | "unicode-ident", 372 | ] 373 | 374 | [[package]] 375 | name = "quote" 376 | version = "1.0.36" 377 | source = "registry+https://github.com/rust-lang/crates.io-index" 378 | checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" 379 | dependencies = [ 380 | "proc-macro2", 381 | ] 382 | 383 | [[package]] 384 | name = "regex" 385 | version = "1.10.4" 386 | source = "registry+https://github.com/rust-lang/crates.io-index" 387 | checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" 388 | dependencies = [ 389 | "aho-corasick", 390 | "memchr", 391 | "regex-automata", 392 | "regex-syntax", 393 | ] 394 | 395 | [[package]] 396 | name = "regex-automata" 397 | version = "0.4.6" 398 | source = "registry+https://github.com/rust-lang/crates.io-index" 399 | checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" 400 | dependencies = [ 401 | "aho-corasick", 402 | "memchr", 403 | "regex-syntax", 404 | ] 405 | 406 | [[package]] 407 | name = "regex-syntax" 408 | version = "0.8.3" 409 | source = "registry+https://github.com/rust-lang/crates.io-index" 410 | checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" 411 | 412 | [[package]] 413 | name = "ring" 414 | version = "0.17.8" 415 | source = "registry+https://github.com/rust-lang/crates.io-index" 416 | checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" 417 | dependencies = [ 418 | "cc", 419 | "cfg-if", 420 | "getrandom", 421 | "libc", 422 | "spin", 423 | "untrusted", 424 | "windows-sys", 425 | ] 426 | 427 | [[package]] 428 | name = "rustc-hash" 429 | version = "1.1.0" 430 | source = "registry+https://github.com/rust-lang/crates.io-index" 431 | checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" 432 | 433 | [[package]] 434 | name = "rustix" 435 | version = "0.38.34" 436 | source = "registry+https://github.com/rust-lang/crates.io-index" 437 | checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" 438 | dependencies = [ 439 | "bitflags 2.5.0", 440 | "errno", 441 | "libc", 442 | "linux-raw-sys", 443 | "windows-sys", 444 | ] 445 | 446 | [[package]] 447 | name = "rustls" 448 | version = "0.22.4" 449 | source = "registry+https://github.com/rust-lang/crates.io-index" 450 | checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432" 451 | dependencies = [ 452 | "log", 453 | "ring", 454 | "rustls-pki-types", 455 | "rustls-webpki", 456 | "subtle", 457 | "zeroize", 458 | ] 459 | 460 | [[package]] 461 | name = "rustls-pki-types" 462 | version = "1.7.0" 463 | source = "registry+https://github.com/rust-lang/crates.io-index" 464 | checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d" 465 | 466 | [[package]] 467 | name = "rustls-webpki" 468 | version = "0.102.4" 469 | source = "registry+https://github.com/rust-lang/crates.io-index" 470 | checksum = "ff448f7e92e913c4b7d4c6d8e4540a1724b319b4152b8aef6d4cf8339712b33e" 471 | dependencies = [ 472 | "ring", 473 | "rustls-pki-types", 474 | "untrusted", 475 | ] 476 | 477 | [[package]] 478 | name = "ryu" 479 | version = "1.0.18" 480 | source = "registry+https://github.com/rust-lang/crates.io-index" 481 | checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" 482 | 483 | [[package]] 484 | name = "serde" 485 | version = "1.0.203" 486 | source = "registry+https://github.com/rust-lang/crates.io-index" 487 | checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" 488 | dependencies = [ 489 | "serde_derive", 490 | ] 491 | 492 | [[package]] 493 | name = "serde_derive" 494 | version = "1.0.203" 495 | source = "registry+https://github.com/rust-lang/crates.io-index" 496 | checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" 497 | dependencies = [ 498 | "proc-macro2", 499 | "quote", 500 | "syn 2.0.66", 501 | ] 502 | 503 | [[package]] 504 | name = "serde_json" 505 | version = "1.0.117" 506 | source = "registry+https://github.com/rust-lang/crates.io-index" 507 | checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" 508 | dependencies = [ 509 | "itoa", 510 | "ryu", 511 | "serde", 512 | ] 513 | 514 | [[package]] 515 | name = "shlex" 516 | version = "1.3.0" 517 | source = "registry+https://github.com/rust-lang/crates.io-index" 518 | checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" 519 | 520 | [[package]] 521 | name = "spin" 522 | version = "0.9.8" 523 | source = "registry+https://github.com/rust-lang/crates.io-index" 524 | checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" 525 | 526 | [[package]] 527 | name = "sqlite-loadable" 528 | version = "0.0.6-alpha.6" 529 | source = "registry+https://github.com/rust-lang/crates.io-index" 530 | checksum = "daaaad0ad506b154a72bf01fde23235377c01256abd4bd25e17419dbfd4e28a0" 531 | dependencies = [ 532 | "bitflags 1.3.2", 533 | "serde", 534 | "serde_json", 535 | "sqlite-loadable-macros", 536 | "sqlite3ext-sys", 537 | ] 538 | 539 | [[package]] 540 | name = "sqlite-loadable-macros" 541 | version = "0.0.3" 542 | source = "registry+https://github.com/rust-lang/crates.io-index" 543 | checksum = "96037a396115a2675db783f700faad878b44c8ff56c8a29c3404649a517a5e8f" 544 | dependencies = [ 545 | "proc-macro2", 546 | "quote", 547 | "syn 1.0.109", 548 | ] 549 | 550 | [[package]] 551 | name = "sqlite-rembed" 552 | version = "0.0.1-alpha.9" 553 | dependencies = [ 554 | "serde_json", 555 | "sqlite-loadable", 556 | "ureq", 557 | "zerocopy", 558 | ] 559 | 560 | [[package]] 561 | name = "sqlite3ext-sys" 562 | version = "0.0.1" 563 | source = "registry+https://github.com/rust-lang/crates.io-index" 564 | checksum = "3afdc2b3dc08f16d6eecf8aa07d19975a268603ab1cca67d3f9b4172c507cf16" 565 | dependencies = [ 566 | "bindgen", 567 | "cc", 568 | ] 569 | 570 | [[package]] 571 | name = "strsim" 572 | version = "0.10.0" 573 | source = "registry+https://github.com/rust-lang/crates.io-index" 574 | checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" 575 | 576 | [[package]] 577 | name = "subtle" 578 | version = "2.5.0" 579 | source = "registry+https://github.com/rust-lang/crates.io-index" 580 | checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" 581 | 582 | [[package]] 583 | name = "syn" 584 | version = "1.0.109" 585 | source = "registry+https://github.com/rust-lang/crates.io-index" 586 | checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" 587 | dependencies = [ 588 | "proc-macro2", 589 | "quote", 590 | "unicode-ident", 591 | ] 592 | 593 | [[package]] 594 | name = "syn" 595 | version = "2.0.66" 596 | source = "registry+https://github.com/rust-lang/crates.io-index" 597 | checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" 598 | dependencies = [ 599 | "proc-macro2", 600 | "quote", 601 | "unicode-ident", 602 | ] 603 | 604 | [[package]] 605 | name = "termcolor" 606 | version = "1.4.1" 607 | source = "registry+https://github.com/rust-lang/crates.io-index" 608 | checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" 609 | dependencies = [ 610 | "winapi-util", 611 | ] 612 | 613 | [[package]] 614 | name = "textwrap" 615 | version = "0.16.1" 616 | source = "registry+https://github.com/rust-lang/crates.io-index" 617 | checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9" 618 | 619 | [[package]] 620 | name = "tinyvec" 621 | version = "1.6.0" 622 | source = "registry+https://github.com/rust-lang/crates.io-index" 623 | checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" 624 | dependencies = [ 625 | "tinyvec_macros", 626 | ] 627 | 628 | [[package]] 629 | name = "tinyvec_macros" 630 | version = "0.1.1" 631 | source = "registry+https://github.com/rust-lang/crates.io-index" 632 | checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" 633 | 634 | [[package]] 635 | name = "unicode-bidi" 636 | version = "0.3.15" 637 | source = "registry+https://github.com/rust-lang/crates.io-index" 638 | checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" 639 | 640 | [[package]] 641 | name = "unicode-ident" 642 | version = "1.0.12" 643 | source = "registry+https://github.com/rust-lang/crates.io-index" 644 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" 645 | 646 | [[package]] 647 | name = "unicode-normalization" 648 | version = "0.1.23" 649 | source = "registry+https://github.com/rust-lang/crates.io-index" 650 | checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" 651 | dependencies = [ 652 | "tinyvec", 653 | ] 654 | 655 | [[package]] 656 | name = "untrusted" 657 | version = "0.9.0" 658 | source = "registry+https://github.com/rust-lang/crates.io-index" 659 | checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" 660 | 661 | [[package]] 662 | name = "ureq" 663 | version = "2.9.7" 664 | source = "registry+https://github.com/rust-lang/crates.io-index" 665 | checksum = "d11a831e3c0b56e438a28308e7c810799e3c118417f342d30ecec080105395cd" 666 | dependencies = [ 667 | "base64", 668 | "flate2", 669 | "log", 670 | "once_cell", 671 | "rustls", 672 | "rustls-pki-types", 673 | "rustls-webpki", 674 | "serde", 675 | "serde_json", 676 | "url", 677 | "webpki-roots", 678 | ] 679 | 680 | [[package]] 681 | name = "url" 682 | version = "2.5.0" 683 | source = "registry+https://github.com/rust-lang/crates.io-index" 684 | checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" 685 | dependencies = [ 686 | "form_urlencoded", 687 | "idna", 688 | "percent-encoding", 689 | ] 690 | 691 | [[package]] 692 | name = "wasi" 693 | version = "0.11.0+wasi-snapshot-preview1" 694 | source = "registry+https://github.com/rust-lang/crates.io-index" 695 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 696 | 697 | [[package]] 698 | name = "webpki-roots" 699 | version = "0.26.1" 700 | source = "registry+https://github.com/rust-lang/crates.io-index" 701 | checksum = "b3de34ae270483955a94f4b21bdaaeb83d508bb84a01435f393818edb0012009" 702 | dependencies = [ 703 | "rustls-pki-types", 704 | ] 705 | 706 | [[package]] 707 | name = "which" 708 | version = "4.4.2" 709 | source = "registry+https://github.com/rust-lang/crates.io-index" 710 | checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" 711 | dependencies = [ 712 | "either", 713 | "home", 714 | "once_cell", 715 | "rustix", 716 | ] 717 | 718 | [[package]] 719 | name = "winapi" 720 | version = "0.3.9" 721 | source = "registry+https://github.com/rust-lang/crates.io-index" 722 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 723 | dependencies = [ 724 | "winapi-i686-pc-windows-gnu", 725 | "winapi-x86_64-pc-windows-gnu", 726 | ] 727 | 728 | [[package]] 729 | name = "winapi-i686-pc-windows-gnu" 730 | version = "0.4.0" 731 | source = "registry+https://github.com/rust-lang/crates.io-index" 732 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 733 | 734 | [[package]] 735 | name = "winapi-util" 736 | version = "0.1.8" 737 | source = "registry+https://github.com/rust-lang/crates.io-index" 738 | checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" 739 | dependencies = [ 740 | "windows-sys", 741 | ] 742 | 743 | [[package]] 744 | name = "winapi-x86_64-pc-windows-gnu" 745 | version = "0.4.0" 746 | source = "registry+https://github.com/rust-lang/crates.io-index" 747 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 748 | 749 | [[package]] 750 | name = "windows-sys" 751 | version = "0.52.0" 752 | source = "registry+https://github.com/rust-lang/crates.io-index" 753 | checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" 754 | dependencies = [ 755 | "windows-targets", 756 | ] 757 | 758 | [[package]] 759 | name = "windows-targets" 760 | version = "0.52.5" 761 | source = "registry+https://github.com/rust-lang/crates.io-index" 762 | checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" 763 | dependencies = [ 764 | "windows_aarch64_gnullvm", 765 | "windows_aarch64_msvc", 766 | "windows_i686_gnu", 767 | "windows_i686_gnullvm", 768 | "windows_i686_msvc", 769 | "windows_x86_64_gnu", 770 | "windows_x86_64_gnullvm", 771 | "windows_x86_64_msvc", 772 | ] 773 | 774 | [[package]] 775 | name = "windows_aarch64_gnullvm" 776 | version = "0.52.5" 777 | source = "registry+https://github.com/rust-lang/crates.io-index" 778 | checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" 779 | 780 | [[package]] 781 | name = "windows_aarch64_msvc" 782 | version = "0.52.5" 783 | source = "registry+https://github.com/rust-lang/crates.io-index" 784 | checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" 785 | 786 | [[package]] 787 | name = "windows_i686_gnu" 788 | version = "0.52.5" 789 | source = "registry+https://github.com/rust-lang/crates.io-index" 790 | checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" 791 | 792 | [[package]] 793 | name = "windows_i686_gnullvm" 794 | version = "0.52.5" 795 | source = "registry+https://github.com/rust-lang/crates.io-index" 796 | checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" 797 | 798 | [[package]] 799 | name = "windows_i686_msvc" 800 | version = "0.52.5" 801 | source = "registry+https://github.com/rust-lang/crates.io-index" 802 | checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" 803 | 804 | [[package]] 805 | name = "windows_x86_64_gnu" 806 | version = "0.52.5" 807 | source = "registry+https://github.com/rust-lang/crates.io-index" 808 | checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" 809 | 810 | [[package]] 811 | name = "windows_x86_64_gnullvm" 812 | version = "0.52.5" 813 | source = "registry+https://github.com/rust-lang/crates.io-index" 814 | checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" 815 | 816 | [[package]] 817 | name = "windows_x86_64_msvc" 818 | version = "0.52.5" 819 | source = "registry+https://github.com/rust-lang/crates.io-index" 820 | checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" 821 | 822 | [[package]] 823 | name = "zerocopy" 824 | version = "0.7.34" 825 | source = "registry+https://github.com/rust-lang/crates.io-index" 826 | checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087" 827 | dependencies = [ 828 | "byteorder", 829 | "zerocopy-derive", 830 | ] 831 | 832 | [[package]] 833 | name = "zerocopy-derive" 834 | version = "0.7.34" 835 | source = "registry+https://github.com/rust-lang/crates.io-index" 836 | checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" 837 | dependencies = [ 838 | "proc-macro2", 839 | "quote", 840 | "syn 2.0.66", 841 | ] 842 | 843 | [[package]] 844 | name = "zeroize" 845 | version = "1.8.1" 846 | source = "registry+https://github.com/rust-lang/crates.io-index" 847 | checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" 848 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "sqlite-rembed" 3 | version = "0.0.1-alpha.9" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | serde_json = "1.0.117" 8 | sqlite-loadable = "0.0.6-alpha.6" 9 | ureq = {version="2.9.7", features=["json"]} 10 | zerocopy = "0.7.34" 11 | 12 | [lib] 13 | crate-type=["cdylib", "staticlib", "lib"] 14 | 15 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Alex Garcia 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | SHELL := /bin/bash 2 | 3 | VERSION=$(shell cat VERSION) 4 | 5 | ifeq ($(shell uname -s),Darwin) 6 | CONFIG_DARWIN=y 7 | else ifeq ($(OS),Windows_NT) 8 | CONFIG_WINDOWS=y 9 | else 10 | CONFIG_LINUX=y 11 | endif 12 | 13 | LIBRARY_PREFIX=lib 14 | ifdef CONFIG_DARWIN 15 | LOADABLE_EXTENSION=dylib 16 | STATIC_EXTENSION=a 17 | endif 18 | 19 | ifdef CONFIG_LINUX 20 | LOADABLE_EXTENSION=so 21 | STATIC_EXTENSION=a 22 | endif 23 | 24 | 25 | ifdef CONFIG_WINDOWS 26 | LOADABLE_EXTENSION=dll 27 | LIBRARY_PREFIX= 28 | STATIC_EXTENSION=lib 29 | endif 30 | 31 | prefix=dist 32 | TARGET_LOADABLE=$(prefix)/debug/rembed0.$(LOADABLE_EXTENSION) 33 | TARGET_LOADABLE_RELEASE=$(prefix)/release/rembed0.$(LOADABLE_EXTENSION) 34 | 35 | TARGET_STATIC=$(prefix)/debug/$(LIBRARY_PREFIX)sqlite_rembed0.$(STATIC_EXTENSION) 36 | TARGET_STATIC_RELEASE=$(prefix)/release/$(LIBRARY_PREFIX)sqlite_rembed0.$(STATIC_EXTENSION) 37 | 38 | TARGET_H=$(prefix)/debug/sqlite-rembed.h 39 | TARGET_H_RELEASE=$(prefix)/release/sqlite-rembed.h 40 | 41 | TARGET_WHEELS=$(prefix)/debug/wheels 42 | TARGET_WHEELS_RELEASE=$(prefix)/release/wheels 43 | 44 | INTERMEDIATE_PYPACKAGE_EXTENSION=python/sqlite_rembed/sqlite_rembed/rembed0.$(LOADABLE_EXTENSION) 45 | 46 | ifdef target 47 | CARGO_TARGET=--target=$(target) 48 | BUILT_LOCATION=target/$(target)/debug/$(LIBRARY_PREFIX)sqlite_rembed.$(LOADABLE_EXTENSION) 49 | BUILT_LOCATION_RELEASE=target/$(target)/release/$(LIBRARY_PREFIX)sqlite_rembed.$(LOADABLE_EXTENSION) 50 | BUILT_LOCATION_STATIC=target/$(target)/debug/$(LIBRARY_PREFIX)sqlite_rembed.$(STATIC_EXTENSION) 51 | BUILT_LOCATION_STATIC_RELEASE=target/$(target)/release/$(LIBRARY_PREFIX)sqlite_rembed.$(STATIC_EXTENSION) 52 | else 53 | CARGO_TARGET= 54 | BUILT_LOCATION=target/debug/$(LIBRARY_PREFIX)sqlite_rembed.$(LOADABLE_EXTENSION) 55 | BUILT_LOCATION_RELEASE=target/release/$(LIBRARY_PREFIX)sqlite_rembed.$(LOADABLE_EXTENSION) 56 | BUILT_LOCATION_STATIC=target/debug/$(LIBRARY_PREFIX)sqlite_rembed.$(STATIC_EXTENSION) 57 | BUILT_LOCATION_STATIC_RELEASE=target/release/$(LIBRARY_PREFIX)sqlite_rembed.$(STATIC_EXTENSION) 58 | endif 59 | 60 | ifdef python 61 | PYTHON=$(python) 62 | else 63 | PYTHON=python3 64 | endif 65 | 66 | ifdef IS_MACOS_ARM 67 | RENAME_WHEELS_ARGS=--is-macos-arm 68 | else 69 | RENAME_WHEELS_ARGS= 70 | endif 71 | 72 | $(prefix): 73 | mkdir -p $(prefix)/debug 74 | mkdir -p $(prefix)/release 75 | 76 | $(TARGET_WHEELS): $(prefix) 77 | mkdir -p $(TARGET_WHEELS) 78 | 79 | $(TARGET_WHEELS_RELEASE): $(prefix) 80 | mkdir -p $(TARGET_WHEELS_RELEASE) 81 | 82 | $(TARGET_LOADABLE): $(prefix) $(shell find . -type f -name '*.rs') 83 | cargo build --verbose $(CARGO_TARGET) 84 | cp $(BUILT_LOCATION) $@ 85 | 86 | $(TARGET_LOADABLE_RELEASE): $(prefix) $(shell find . -type f -name '*.rs') 87 | cargo build --verbose --release $(CARGO_TARGET) 88 | cp $(BUILT_LOCATION_RELEASE) $@ 89 | 90 | $(TARGET_STATIC): $(prefix) $(shell find . -type f -name '*.rs') 91 | cargo build --verbose $(CARGO_TARGET) --features=sqlite-loadable/static 92 | ls target 93 | ls target/$(target)/debug 94 | cp $(BUILT_LOCATION_STATIC) $@ 95 | 96 | $(TARGET_STATIC_RELEASE): $(prefix) $(shell find . -type f -name '*.rs') 97 | cargo build --verbose --release $(CARGO_TARGET) --features=sqlite-loadable/static 98 | cp $(BUILT_LOCATION_STATIC_RELEASE) $@ 99 | 100 | $(TARGET_H): sqlite-rembed.h 101 | cp $< $@ 102 | 103 | $(TARGET_H_RELEASE): sqlite-rembed.h 104 | cp $< $@ 105 | 106 | Cargo.toml: VERSION 107 | cargo set-version `cat VERSION` 108 | 109 | version: 110 | make Cargo.toml 111 | 112 | format: 113 | cargo fmt 114 | 115 | release: $(TARGET_LOADABLE_RELEASE) $(TARGET_STATIC_RELEASE) 116 | 117 | loadable: $(TARGET_LOADABLE) 118 | loadable-release: $(TARGET_LOADABLE_RELEASE) 119 | 120 | static: $(TARGET_STATIC) $(TARGET_H) 121 | static-release: $(TARGET_STATIC_RELEASE) $(TARGET_H_RELEASE) 122 | 123 | debug: loadable static python datasette 124 | release: loadable-release static-release python-release datasette-release 125 | 126 | clean: 127 | rm dist/* 128 | cargo clean 129 | 130 | test-loadable: 131 | $(PYTHON) tests/test-loadable.py 132 | 133 | publish-release: 134 | ./scripts/publish_release.sh 135 | 136 | .PHONY: clean \ 137 | test test-loadable test-python test-npm test-deno \ 138 | loadable loadable-release \ 139 | static static-release \ 140 | debug release \ 141 | format version publish-release 142 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # `sqlite-rembed` 2 | 3 | A SQLite extension for generating text embeddings from remote APIs (OpenAI, Nomic, Cohere, llamafile, Ollama, etc.). A sister project to [`sqlite-vec`](https://github.com/asg017/sqlite-vec) and [`sqlite-lembed`](https://github.com/asg017/sqlite-lembed). A work-in-progress! 4 | 5 | ## Usage 6 | 7 | ```sql 8 | .load ./rembed0 9 | 10 | INSERT INTO temp.rembed_clients(name, options) 11 | VALUES ('text-embedding-3-small', 'openai'); 12 | 13 | select rembed( 14 | 'text-embedding-3-small', 15 | 'The United States Postal Service is an independent agency...' 16 | ); 17 | ``` 18 | 19 | The `temp.rembed_clients` virtual table lets you "register" clients with pure `INSERT INTO` statements. The `name` field is a unique identifier for a given client, and `options` allows you to specify which 3rd party embedding service you want to use. 20 | 21 | In this case, `openai` is a pre-defined client that will default to OpenAI's `https://api.openai.com/v1/embeddings` endpoint and will source your API key from the `OPENAI_API_KEY` environment variable. The name of the client, `text-embedding-3-small`, will be used as the embeddings model. 22 | 23 | Other pre-defined clients include: 24 | 25 | | Client name | Provider | Endpoint | API Key | 26 | | ------------ | ------------------------------------------------------------------------------------ | ---------------------------------------------- | -------------------- | 27 | | `openai` | [OpenAI](https://platform.openai.com/docs/guides/embeddings) | `https://api.openai.com/v1/embeddings` | `OPENAI_API_KEY` | 28 | | `nomic` | [Nomic](https://docs.nomic.ai/reference/endpoints/nomic-embed-text) | `https://api-atlas.nomic.ai/v1/embedding/text` | `NOMIC_API_KEY` | 29 | | `cohere` | [Cohere](https://docs.cohere.com/reference/embed) | `https://api.cohere.com/v1/embed` | `CO_API_KEY` | 30 | | `jina` | [Jina](https://api.jina.ai/redoc#tag/embeddings) | `https://api.jina.ai/v1/embeddings` | `JINA_API_KEY` | 31 | | `mixedbread` | [MixedBread](https://www.mixedbread.ai/api-reference#quick-start-guide) | `https://api.mixedbread.ai/v1/embeddings/` | `MIXEDBREAD_API_KEY` | 32 | | `llamafile` | [llamafile](https://github.com/Mozilla-Ocho/llamafile) | `http://localhost:8080/embedding` | None | 33 | | `ollama` | [Ollama](https://github.com/ollama/ollama/blob/main/docs/api.md#generate-embeddings) | `http://localhost:11434/api/embeddings` | None | 34 | 35 | Different client options can be specified with `remebed_client_options()`. For example, if you have a different OpenAI-compatible service you want to use, then you can use: 36 | 37 | ```sql 38 | INSERT INTO temp.rembed_clients(name, options) VALUES 39 | ( 40 | 'xyz-small-1', 41 | rembed_client_options( 42 | 'format', 'openai', 43 | 'url', 'https://api.xyz.com/v1/embeddings', 44 | 'key', 'xyz-ca865ece65-hunter2' 45 | ) 46 | ); 47 | ``` 48 | 49 | Or to use a llamafile server that's on a different port: 50 | 51 | ```sql 52 | INSERT INTO temp.rembed_clients(name, options) VALUES 53 | ( 54 | 'xyz-small-1', 55 | rembed_client_options( 56 | 'format', 'lamafile', 57 | 'url', 'http://localhost:9999/embedding' 58 | ) 59 | ); 60 | ``` 61 | 62 | ### Using with `sqlite-vec` 63 | 64 | `sqlite-rembed` works well with [`sqlite-vec`](https://github.com/asg017/sqlite-vec), a SQLite extension for vector search. Embeddings generated with `rembed()` use the same BLOB format for vectors that `sqlite-vec` uses. 65 | 66 | Here's a sample "semantic search" application, made from a sample dataset of news article headlines. 67 | 68 | ```sql 69 | create table articles( 70 | headline text 71 | ); 72 | 73 | -- Random NPR headlines from 2024-06-04 74 | insert into articles VALUES 75 | ('Shohei Ohtani''s ex-interpreter pleads guilty to charges related to gambling and theft'), 76 | ('The jury has been selected in Hunter Biden''s gun trial'), 77 | ('Larry Allen, a Super Bowl champion and famed Dallas Cowboy, has died at age 52'), 78 | ('After saying Charlotte, a lone stingray, was pregnant, aquarium now says she''s sick'), 79 | ('An Epoch Times executive is facing money laundering charge'); 80 | 81 | 82 | -- Build a vector table with embeddings of article headlines, using OpenAI's API 83 | create virtual table vec_articles using vec0( 84 | headline_embeddings float[1536] 85 | ); 86 | 87 | insert into vec_articles(rowid, headline_embeddings) 88 | select rowid, rembed('text-embedding-3-small', headline) 89 | from articles; 90 | 91 | ``` 92 | 93 | Now we have a regular `articles` table that stores text headlines, and a `vec_articles` virtual table that stores embeddings of the article headlines, using OpenAI's `text-embedding-3-small` model. 94 | 95 | To perform a "semantic search" on the embeddings, we can query the `vec_articles` table with an embedding of our query, and join the results back to our `articles` table to retrieve the original headlines. 96 | 97 | ```sql 98 | param set :query 'firearm courtroom' 99 | 100 | with matches as ( 101 | select 102 | rowid, 103 | distance 104 | from vec_articles 105 | where headline_embeddings match rembed('text-embedding-3-small', :query) 106 | order by distance 107 | limit 3 108 | ) 109 | select 110 | headline, 111 | distance 112 | from matches 113 | left join articles on articles.rowid = matches.rowid; 114 | 115 | /* 116 | +--------------------------------------------------------------+------------------+ 117 | | headline | distance | 118 | +--------------------------------------------------------------+------------------+ 119 | | The jury has been selected in Hunter Biden's gun trial | 1.05906391143799 | 120 | +--------------------------------------------------------------+------------------+ 121 | | Shohei Ohtani's ex-interpreter pleads guilty to charges rela | 1.2574303150177 | 122 | | ted to gambling and theft | | 123 | +--------------------------------------------------------------+------------------+ 124 | | An Epoch Times executive is facing money laundering charge | 1.27144026756287 | 125 | +--------------------------------------------------------------+------------------+ 126 | */ 127 | ``` 128 | 129 | Notice how "firearm courtroom" doesn't appear in any of these headlines, but it can still figure out that "Hunter Biden's gun trial" is related, and the other two justice-related articles appear on top. 130 | 131 | ## Drawbacks 132 | 133 | 1. **No batch support yet.** If you use `rembed()` in a batch UPDATE or INSERT in 1,000 rows, then 1,000 HTTP requests will be made. Add a :+1: to [Issue #1](https://github.com/asg017/sqlite-rembed/issues/1) if you want to see this fixed. 134 | 2. **No builtin rate limiting.** Requests are sent sequentially so this may not come up in small demos, but `sqlite-rembed` could add features that handles rate limiting/retries implicitly. Add a :+1: to [Issue #2](https://github.com/asg017/sqlite-rembed/issues/2) if you want to see this implemented. 135 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 0.0.1-alpha.9 -------------------------------------------------------------------------------- /build.rs: -------------------------------------------------------------------------------- 1 | use std::process::Command; 2 | fn main() { 3 | let output = Command::new("git") 4 | .args(["rev-parse", "HEAD"]) 5 | .output() 6 | .unwrap(); 7 | let git_hash = String::from_utf8(output.stdout).unwrap(); 8 | println!("cargo:rustc-env=GIT_HASH={}", git_hash); 9 | } 10 | -------------------------------------------------------------------------------- /examples/simple-search/demo.sql: -------------------------------------------------------------------------------- 1 | .bail on 2 | .mode table 3 | .header on 4 | 5 | .timer on 6 | 7 | .load ../../dist/debug/rembed0 8 | .load ../../../sqlite-vec/dist/vec0 9 | 10 | INSERT INTO temp.rembed_clients(name, options) 11 | VALUES ('text-embedding-3-small', 'openai'); 12 | 13 | create table articles(headline text); 14 | 15 | 16 | -- Random NPR headlines from 2024-06-04 17 | insert into articles VALUES 18 | ('Shohei Ohtani''s ex-interpreter pleads guilty to charges related to gambling and theft'), 19 | ('The jury has been selected in Hunter Biden''s gun trial'), 20 | ('Larry Allen, a Super Bowl champion and famed Dallas Cowboy, has died at age 52'), 21 | ('After saying Charlotte, a lone stingray, was pregnant, aquarium now says she''s sick'), 22 | ('An Epoch Times executive is facing money laundering charge'); 23 | 24 | 25 | -- Seed a vector table with embeddings of article headlines, using OpenAI's API 26 | create virtual table vec_articles using vec0(headline_embeddings float[1536]); 27 | 28 | insert into vec_articles(rowid, headline_embeddings) 29 | select rowid, rembed('text-embedding-3-small', headline) 30 | from articles; 31 | 32 | 33 | .param set :query 'firearm courtroom' 34 | 35 | with matches as ( 36 | select 37 | rowid, 38 | distance 39 | from vec_articles 40 | where headline_embeddings match rembed('text-embedding-3-small', :query) 41 | order by distance 42 | limit 3 43 | ) 44 | select 45 | headline, 46 | distance 47 | from matches 48 | left join articles on articles.rowid = matches.rowid; 49 | -------------------------------------------------------------------------------- /scripts/publish-release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail xtrace 4 | 5 | if [[ -n $(git status --porcelain | grep -v VERSION | grep -v sqlite-dist.toml) ]]; then 6 | echo "❌ There are other un-staged changes to the repository besides VERSION and sqlite-dist.toml" 7 | exit 1 8 | fi 9 | 10 | VERSION="$(cat VERSION)" 11 | 12 | echo "Publishing version v$VERSION..." 13 | 14 | make version 15 | git add --all 16 | git commit -m "v$VERSION" 17 | git tag v$VERSION 18 | git push origin main v$VERSION 19 | 20 | if grep -qE "alpha|beta" VERSION; then 21 | gh release create v$VERSION --title=v$VERSION --prerelease --notes="" 22 | else 23 | gh release create v$VERSION --title=v$VERSION 24 | fi 25 | 26 | 27 | echo "✅ Published! version v$VERSION" 28 | -------------------------------------------------------------------------------- /sqlite-dist.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "sqlite-rembed" 3 | license = "MIT OR Apache" 4 | homepage = "https://alexgarcia.xyz/sqlite-rembed" 5 | repo = "https://github.com/asg017/sqlite-rembed" 6 | description = "A SQLite extension for generating text embeddings from remote sources (OpenAI, Cohere, localhost, etc.)" 7 | authors = ["Alex Garcia"] 8 | git_tag_format = "v$VERSION" 9 | 10 | [targets] 11 | github_releases = {} 12 | sqlpkg = {} 13 | spm = {} 14 | 15 | pip = {} 16 | datasette = {} 17 | sqlite_utils = {} 18 | 19 | npm = {} 20 | 21 | gem = { module_name = "SqliteRembed" } 22 | -------------------------------------------------------------------------------- /sqlite-rembed.h: -------------------------------------------------------------------------------- 1 | #ifndef _SQLITE_REMBED_H 2 | #define _SQLITE_REMBED_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | int sqlite3_rembed_init(sqlite3*, char**, const sqlite3_api_routines*); 9 | 10 | #ifdef __cplusplus 11 | } /* end of the 'extern "C"' block */ 12 | #endif 13 | 14 | #endif /* ifndef _SQLITE_REMBED_H */ 15 | -------------------------------------------------------------------------------- /src/clients.rs: -------------------------------------------------------------------------------- 1 | use sqlite_loadable::{Error, Result}; 2 | 3 | pub(crate) fn try_env_var(key: &str) -> Result { 4 | std::env::var(key) 5 | .map_err(|_| Error::new_message(format!("{} environment variable not define. Alternatively, pass in an API key with rembed_client_options", DEFAULT_OPENAI_API_KEY_ENV))) 6 | } 7 | 8 | #[derive(Clone)] 9 | pub struct OpenAiClient { 10 | model: String, 11 | url: String, 12 | key: String, 13 | } 14 | const DEFAULT_OPENAI_URL: &str = "https://api.openai.com/v1/embeddings"; 15 | const DEFAULT_OPENAI_API_KEY_ENV: &str = "OPENAI_API_KEY"; 16 | 17 | impl OpenAiClient { 18 | pub fn new>( 19 | model: S, 20 | url: Option, 21 | key: Option, 22 | ) -> Result { 23 | Ok(Self { 24 | model: model.into(), 25 | url: url.unwrap_or(DEFAULT_OPENAI_URL.to_owned()), 26 | key: match key { 27 | Some(key) => key, 28 | None => try_env_var(DEFAULT_OPENAI_API_KEY_ENV)?, 29 | }, 30 | }) 31 | } 32 | pub fn infer_single(&self, input: &str) -> Result> { 33 | let body = serde_json::json!({ 34 | "input": input, 35 | "model": self.model 36 | }); 37 | 38 | let data: serde_json::Value = ureq::post(&self.url) 39 | .set("Content-Type", "application/json") 40 | .set("Authorization", format!("Bearer {}", self.key).as_str()) 41 | .send_bytes( 42 | serde_json::to_vec(&body) 43 | .map_err(|error| { 44 | Error::new_message(format!("Error serializing body to JSON: {error}")) 45 | })? 46 | .as_ref(), 47 | ) 48 | .map_err(|error| Error::new_message(format!("Error sending HTTP request: {error}")))? 49 | .into_json() 50 | .map_err(|error| { 51 | Error::new_message(format!("Error parsing HTTP response as JSON: {error}")) 52 | })?; 53 | OpenAiClient::parse_single_response(data) 54 | } 55 | 56 | pub fn parse_single_response(value: serde_json::Value) -> Result> { 57 | value 58 | .get("data") 59 | .ok_or_else(|| Error::new_message("expected 'data' key in response body")) 60 | .and_then(|v| { 61 | v.get(0) 62 | .ok_or_else(|| Error::new_message("expected 'data.0' path in response body")) 63 | }) 64 | .and_then(|v| { 65 | v.get("embedding").ok_or_else(|| { 66 | Error::new_message("expected 'data.0.embedding' path in response body") 67 | }) 68 | }) 69 | .and_then(|v| { 70 | v.as_array().ok_or_else(|| { 71 | Error::new_message("expected 'data.0.embedding' path to be an array") 72 | }) 73 | }) 74 | .and_then(|arr| { 75 | arr.iter() 76 | .map(|v| { 77 | v.as_f64() 78 | .ok_or_else(|| { 79 | Error::new_message( 80 | "expected 'data.0.embedding' array to contain floats", 81 | ) 82 | }) 83 | .map(|f| f as f32) 84 | }) 85 | .collect() 86 | }) 87 | } 88 | } 89 | 90 | #[derive(Clone)] 91 | pub struct NomicClient { 92 | model: String, 93 | url: String, 94 | key: String, 95 | } 96 | const DEFAULT_NOMIC_URL: &str = "https://api-atlas.nomic.ai/v1/embedding/text"; 97 | const DEFAULT_NOMIC_API_KEY_ENV: &str = "NOMIC_API_KEY"; 98 | 99 | impl NomicClient { 100 | pub fn new>( 101 | model: S, 102 | url: Option, 103 | key: Option, 104 | ) -> Result { 105 | Ok(Self { 106 | model: model.into(), 107 | url: url.unwrap_or(DEFAULT_NOMIC_URL.to_owned()), 108 | key: match key { 109 | Some(key) => key, 110 | None => try_env_var(DEFAULT_NOMIC_API_KEY_ENV)?, 111 | }, 112 | }) 113 | } 114 | 115 | pub fn infer_single(&self, input: &str, input_type: Option<&str>) -> Result> { 116 | let mut body = serde_json::Map::new(); 117 | body.insert("texts".to_owned(), vec![input.to_owned()].into()); 118 | body.insert("model".to_owned(), self.model.to_owned().into()); 119 | 120 | if let Some(input_type) = input_type { 121 | body.insert("input_type".to_owned(), input_type.to_owned().into()); 122 | } 123 | 124 | let data: serde_json::Value = ureq::post(&self.url) 125 | .set("Content-Type", "application/json") 126 | .set("Authorization", format!("Bearer {}", self.key).as_str()) 127 | .send_bytes( 128 | serde_json::to_vec(&body) 129 | .map_err(|error| { 130 | Error::new_message(format!("Error serializing body to JSON: {error}")) 131 | })? 132 | .as_ref(), 133 | ) 134 | .map_err(|error| Error::new_message(format!("Error sending HTTP request: {error}")))? 135 | .into_json() 136 | .map_err(|error| { 137 | Error::new_message(format!("Error parsing HTTP response as JSON: {error}")) 138 | })?; 139 | NomicClient::parse_single_response(data) 140 | } 141 | pub fn parse_single_response(value: serde_json::Value) -> Result> { 142 | value 143 | .get("embeddings") 144 | .ok_or_else(|| Error::new_message("expected 'embeddings' key in response body")) 145 | .and_then(|v| { 146 | v.get(0).ok_or_else(|| { 147 | Error::new_message("expected 'embeddings.0' path in response body") 148 | }) 149 | }) 150 | .and_then(|v| { 151 | v.as_array().ok_or_else(|| { 152 | Error::new_message("expected 'embeddings.0' path to be an array") 153 | }) 154 | }) 155 | .and_then(|arr| { 156 | arr.iter() 157 | .map(|v| { 158 | v.as_f64() 159 | .ok_or_else(|| { 160 | Error::new_message( 161 | "expected 'embeddings.0' array to contain floats", 162 | ) 163 | }) 164 | .map(|f| f as f32) 165 | }) 166 | .collect() 167 | }) 168 | } 169 | } 170 | 171 | #[derive(Clone)] 172 | pub struct CohereClient { 173 | url: String, 174 | model: String, 175 | key: String, 176 | } 177 | const DEFAULT_COHERE_URL: &str = "https://api.cohere.com/v1/embed"; 178 | const DEFAULT_COHERE_API_KEY_ENV: &str = "CO_API_KEY"; 179 | 180 | impl CohereClient { 181 | pub fn new>( 182 | model: S, 183 | url: Option, 184 | key: Option, 185 | ) -> Result { 186 | Ok(Self { 187 | model: model.into(), 188 | url: url.unwrap_or(DEFAULT_COHERE_URL.to_owned()), 189 | key: match key { 190 | Some(key) => key, 191 | None => try_env_var(DEFAULT_COHERE_API_KEY_ENV)?, 192 | }, 193 | }) 194 | } 195 | 196 | pub fn infer_single(&self, input: &str, input_type: Option<&str>) -> Result> { 197 | let mut body = serde_json::Map::new(); 198 | body.insert("texts".to_owned(), vec![input.to_owned()].into()); 199 | body.insert("model".to_owned(), self.model.to_owned().into()); 200 | 201 | if let Some(input_type) = input_type { 202 | body.insert("input_type".to_owned(), input_type.to_owned().into()); 203 | } 204 | 205 | let data: serde_json::Value = ureq::post(&self.url) 206 | .set("Content-Type", "application/json") 207 | .set("Accept", "application/json") 208 | .set("Authorization", format!("Bearer {}", self.key).as_str()) 209 | .send_bytes( 210 | serde_json::to_vec(&body) 211 | .map_err(|error| { 212 | Error::new_message(format!("Error serializing body to JSON: {error}")) 213 | })? 214 | .as_ref(), 215 | ) 216 | .map_err(|error| Error::new_message(format!("Error sending HTTP request: {error}")))? 217 | .into_json() 218 | .map_err(|error| { 219 | Error::new_message(format!("Error parsing HTTP response as JSON: {error}")) 220 | })?; 221 | CohereClient::parse_single_response(data) 222 | } 223 | pub fn parse_single_response(value: serde_json::Value) -> Result> { 224 | value 225 | .get("embeddings") 226 | .ok_or_else(|| Error::new_message("expected 'embeddings' key in response body")) 227 | .and_then(|v| { 228 | v.get(0).ok_or_else(|| { 229 | Error::new_message("expected 'embeddings.0' path in response body") 230 | }) 231 | }) 232 | .and_then(|v| { 233 | v.as_array().ok_or_else(|| { 234 | Error::new_message("expected 'embeddings.0' path to be an array") 235 | }) 236 | }) 237 | .and_then(|arr| { 238 | arr.iter() 239 | .map(|v| { 240 | v.as_f64() 241 | .ok_or_else(|| { 242 | Error::new_message( 243 | "expected 'embeddings.0' array to contain floats", 244 | ) 245 | }) 246 | .map(|f| f as f32) 247 | }) 248 | .collect() 249 | }) 250 | } 251 | } 252 | #[derive(Clone)] 253 | pub struct JinaClient { 254 | url: String, 255 | model: String, 256 | key: String, 257 | } 258 | const DEFAULT_JINA_URL: &str = "https://api.jina.ai/v1/embeddings"; 259 | const DEFAULT_JINA_API_KEY_ENV: &str = "JINA_API_KEY"; 260 | 261 | impl JinaClient { 262 | pub fn new>( 263 | model: S, 264 | url: Option, 265 | key: Option, 266 | ) -> Result { 267 | Ok(Self { 268 | model: model.into(), 269 | url: url.unwrap_or(DEFAULT_JINA_URL.to_owned()), 270 | key: match key { 271 | Some(key) => key, 272 | None => try_env_var(DEFAULT_JINA_API_KEY_ENV)?, 273 | }, 274 | }) 275 | } 276 | 277 | pub fn infer_single(&self, input: &str) -> Result> { 278 | let mut body = serde_json::Map::new(); 279 | body.insert("input".to_owned(), vec![input.to_owned()].into()); 280 | body.insert("model".to_owned(), self.model.to_owned().into()); 281 | 282 | let data: serde_json::Value = ureq::post(&self.url) 283 | .set("Content-Type", "application/json") 284 | .set("Accept", "application/json") 285 | .set("Authorization", format!("Bearer {}", self.key).as_str()) 286 | .send_bytes( 287 | serde_json::to_vec(&body) 288 | .map_err(|error| { 289 | Error::new_message(format!("Error serializing body to JSON: {error}")) 290 | })? 291 | .as_ref(), 292 | ) 293 | .map_err(|error| Error::new_message(format!("Error sending HTTP request: {error}")))? 294 | .into_json() 295 | .map_err(|error| { 296 | Error::new_message(format!("Error parsing HTTP response as JSON: {error}")) 297 | })?; 298 | JinaClient::parse_single_response(data) 299 | } 300 | pub fn parse_single_response(value: serde_json::Value) -> Result> { 301 | value 302 | .get("data") 303 | .ok_or_else(|| Error::new_message("expected 'data' key in response body")) 304 | .and_then(|v| { 305 | v.get(0) 306 | .ok_or_else(|| Error::new_message("expected 'data.0' path in response body")) 307 | }) 308 | .and_then(|v| { 309 | v.get("embedding").ok_or_else(|| { 310 | Error::new_message("expected 'data.0.embedding' path in response body") 311 | }) 312 | }) 313 | .and_then(|v| { 314 | v.as_array().ok_or_else(|| { 315 | Error::new_message("expected 'data.0.embedding' path to be an array") 316 | }) 317 | }) 318 | .and_then(|arr| { 319 | arr.iter() 320 | .map(|v| { 321 | v.as_f64() 322 | .ok_or_else(|| { 323 | Error::new_message( 324 | "expected 'data.0.embedding' array to contain floats", 325 | ) 326 | }) 327 | .map(|f| f as f32) 328 | }) 329 | .collect() 330 | }) 331 | } 332 | } 333 | #[derive(Clone)] 334 | pub struct MixedbreadClient { 335 | url: String, 336 | model: String, 337 | key: String, 338 | } 339 | const DEFAULT_MIXEDBREAD_URL: &str = "https://api.mixedbread.ai/v1/embeddings/"; 340 | const DEFAULT_MIXEDBREAD_API_KEY_ENV: &str = "MIXEDBREAD_API_KEY"; 341 | 342 | impl MixedbreadClient { 343 | pub fn new>( 344 | model: S, 345 | url: Option, 346 | key: Option, 347 | ) -> Result { 348 | Ok(Self { 349 | model: model.into(), 350 | url: url.unwrap_or(DEFAULT_MIXEDBREAD_URL.to_owned()), 351 | key: match key { 352 | Some(key) => key, 353 | None => try_env_var(DEFAULT_MIXEDBREAD_API_KEY_ENV)?, 354 | }, 355 | }) 356 | } 357 | 358 | pub fn infer_single(&self, input: &str) -> Result> { 359 | let mut body = serde_json::Map::new(); 360 | body.insert("input".to_owned(), vec![input.to_owned()].into()); 361 | body.insert("model".to_owned(), self.model.to_owned().into()); 362 | 363 | let data: serde_json::Value = ureq::post(&self.url) 364 | .set("Content-Type", "application/json") 365 | .set("Accept", "application/json") 366 | .set("Authorization", format!("Bearer {}", self.key).as_str()) 367 | .send_bytes( 368 | serde_json::to_vec(&body) 369 | .map_err(|error| { 370 | Error::new_message(format!("Error serializing body to JSON: {error}")) 371 | })? 372 | .as_ref(), 373 | ) 374 | .map_err(|error| Error::new_message(format!("Error sending HTTP request: {error}")))? 375 | .into_json() 376 | .map_err(|error| { 377 | Error::new_message(format!("Error parsing HTTP response as JSON: {error}")) 378 | })?; 379 | JinaClient::parse_single_response(data) 380 | } 381 | pub fn parse_single_response(value: serde_json::Value) -> Result> { 382 | value 383 | .get("data") 384 | .ok_or_else(|| Error::new_message("expected 'data' key in response body")) 385 | .and_then(|v| { 386 | v.get(0) 387 | .ok_or_else(|| Error::new_message("expected 'data.0' path in response body")) 388 | }) 389 | .and_then(|v| { 390 | v.get("embedding").ok_or_else(|| { 391 | Error::new_message("expected 'data.0.embedding' path in response body") 392 | }) 393 | }) 394 | .and_then(|v| { 395 | v.as_array().ok_or_else(|| { 396 | Error::new_message("expected 'data.0.embedding' path to be an array") 397 | }) 398 | }) 399 | .and_then(|arr| { 400 | arr.iter() 401 | .map(|v| { 402 | v.as_f64() 403 | .ok_or_else(|| { 404 | Error::new_message( 405 | "expected 'data.0.embedding' array to contain floats", 406 | ) 407 | }) 408 | .map(|f| f as f32) 409 | }) 410 | .collect() 411 | }) 412 | } 413 | } 414 | 415 | #[derive(Clone)] 416 | pub struct OllamaClient { 417 | url: String, 418 | model: String, 419 | } 420 | const DEFAULT_OLLAMA_URL: &str = "http://localhost:11434/api/embeddings"; 421 | impl OllamaClient { 422 | pub fn new>(model: S, url: Option) -> Self { 423 | Self { 424 | model: model.into(), 425 | url: url.unwrap_or(DEFAULT_OLLAMA_URL.to_owned()), 426 | } 427 | } 428 | 429 | pub fn infer_single(&self, input: &str) -> Result> { 430 | let mut body = serde_json::Map::new(); 431 | body.insert("prompt".to_owned(), input.to_owned().into()); 432 | body.insert("model".to_owned(), self.model.to_owned().into()); 433 | 434 | let data: serde_json::Value = ureq::post(&self.url) 435 | .set("Content-Type", "application/json") 436 | .send_bytes( 437 | serde_json::to_vec(&body) 438 | .map_err(|error| { 439 | Error::new_message(format!("Error serializing body to JSON: {error}")) 440 | })? 441 | .as_ref(), 442 | ) 443 | .map_err(|error| Error::new_message(format!("Error sending HTTP request: {error}")))? 444 | .into_json() 445 | .map_err(|error| { 446 | Error::new_message(format!("Error parsing HTTP response as JSON: {error}")) 447 | })?; 448 | OllamaClient::parse_single_response(data) 449 | } 450 | pub fn parse_single_response(value: serde_json::Value) -> Result> { 451 | value 452 | .get("embedding") 453 | .ok_or_else(|| Error::new_message("expected 'embedding' key in response body")) 454 | .and_then(|v| { 455 | v.as_array() 456 | .ok_or_else(|| Error::new_message("expected 'embedding' path to be an array")) 457 | }) 458 | .and_then(|arr| { 459 | arr.iter() 460 | .map(|v| { 461 | v.as_f64() 462 | .ok_or_else(|| { 463 | Error::new_message("expected 'embedding' array to contain floats") 464 | }) 465 | .map(|f| f as f32) 466 | }) 467 | .collect() 468 | }) 469 | } 470 | } 471 | 472 | #[derive(Clone)] 473 | pub struct LlamafileClient { 474 | url: String, 475 | } 476 | const DEFAULT_LLAMAFILE_URL: &str = "http://localhost:8080/embedding"; 477 | 478 | impl LlamafileClient { 479 | pub fn new(url: Option) -> Self { 480 | Self { 481 | url: url.unwrap_or(DEFAULT_LLAMAFILE_URL.to_owned()), 482 | } 483 | } 484 | 485 | pub fn infer_single(&self, input: &str) -> Result> { 486 | let mut body = serde_json::Map::new(); 487 | body.insert("content".to_owned(), input.to_owned().into()); 488 | 489 | let data: serde_json::Value = ureq::post(&self.url) 490 | .set("Content-Type", "application/json") 491 | .send_bytes( 492 | serde_json::to_vec(&body) 493 | .map_err(|error| { 494 | Error::new_message(format!("Error serializing body to JSON: {error}")) 495 | })? 496 | .as_ref(), 497 | ) 498 | .map_err(|error| Error::new_message(format!("Error sending HTTP request: {error}")))? 499 | .into_json() 500 | .map_err(|error| { 501 | Error::new_message(format!("Error parsing HTTP response as JSON: {error}")) 502 | })?; 503 | OllamaClient::parse_single_response(data) 504 | } 505 | } 506 | 507 | #[derive(Clone)] 508 | pub enum Client { 509 | OpenAI(OpenAiClient), 510 | Nomic(NomicClient), 511 | Cohere(CohereClient), 512 | Ollama(OllamaClient), 513 | Llamafile(LlamafileClient), 514 | Jina(JinaClient), 515 | Mixedbread(MixedbreadClient), 516 | } 517 | -------------------------------------------------------------------------------- /src/clients_vtab.rs: -------------------------------------------------------------------------------- 1 | use sqlite_loadable::table::UpdateOperation; 2 | use sqlite_loadable::{api, prelude::*, Error}; 3 | use sqlite_loadable::{ 4 | api::ValueType, 5 | table::{IndexInfo, VTab, VTabArguments, VTabCursor, VTabWriteable}, 6 | BestIndexError, Result, 7 | }; 8 | use std::{cell::RefCell, collections::HashMap, marker::PhantomData, mem, os::raw::c_int, rc::Rc}; 9 | 10 | use crate::clients::MixedbreadClient; 11 | use crate::{ 12 | clients::{ 13 | Client, CohereClient, JinaClient, LlamafileClient, NomicClient, OllamaClient, OpenAiClient, 14 | }, 15 | CLIENT_OPTIONS_POINTER_NAME, 16 | }; 17 | 18 | enum Columns { 19 | Name, 20 | Options, 21 | } 22 | fn column(index: i32) -> Option { 23 | match index { 24 | 0 => Some(Columns::Name), 25 | 1 => Some(Columns::Options), 26 | _ => None, 27 | } 28 | } 29 | #[repr(C)] 30 | pub struct ClientsTable { 31 | /// must be first 32 | base: sqlite3_vtab, 33 | clients: Rc>>, 34 | } 35 | 36 | impl<'vtab> VTab<'vtab> for ClientsTable { 37 | type Aux = Rc>>; 38 | type Cursor = ClientsCursor<'vtab>; 39 | 40 | fn create( 41 | db: *mut sqlite3, 42 | aux: Option<&Self::Aux>, 43 | args: VTabArguments, 44 | ) -> Result<(String, Self)> { 45 | Self::connect(db, aux, args) 46 | } 47 | fn connect( 48 | _db: *mut sqlite3, 49 | aux: Option<&Self::Aux>, 50 | _args: VTabArguments, 51 | ) -> Result<(String, ClientsTable)> { 52 | let base: sqlite3_vtab = unsafe { mem::zeroed() }; 53 | let clients = aux.expect("Required aux").to_owned(); 54 | 55 | let vtab = ClientsTable { base, clients }; 56 | let sql = "create table x(name text primary key, options)".to_owned(); 57 | 58 | Ok((sql, vtab)) 59 | } 60 | fn destroy(&self) -> Result<()> { 61 | Ok(()) 62 | } 63 | 64 | fn best_index(&self, mut info: IndexInfo) -> core::result::Result<(), BestIndexError> { 65 | info.set_estimated_cost(10000.0); 66 | info.set_estimated_rows(10000); 67 | info.set_idxnum(1); 68 | Ok(()) 69 | } 70 | 71 | fn open(&'vtab mut self) -> Result> { 72 | ClientsCursor::new(self) 73 | } 74 | } 75 | 76 | impl<'vtab> VTabWriteable<'vtab> for ClientsTable { 77 | fn update(&'vtab mut self, operation: UpdateOperation<'_>, _p_rowid: *mut i64) -> Result<()> { 78 | match operation { 79 | UpdateOperation::Delete(_) => { 80 | return Err(Error::new_message( 81 | "DELETE operations on rembed_clients is not supported yet", 82 | )) 83 | } 84 | UpdateOperation::Update { _values } => { 85 | return Err(Error::new_message( 86 | "DELETE operations on rembed_clients is not supported yet", 87 | )) 88 | } 89 | UpdateOperation::Insert { values, rowid: _ } => { 90 | let name = api::value_text(&values[0])?; 91 | let client = match api::value_type(&values[1]) { 92 | ValueType::Text => match api::value_text(&values[1])? { 93 | "openai" => Client::OpenAI(OpenAiClient::new(name, None, None)?), 94 | "mixedbread" => { 95 | Client::Mixedbread(MixedbreadClient::new(name, None, None)?) 96 | } 97 | "jina" => Client::Jina(JinaClient::new(name, None, None)?), 98 | "nomic" => Client::Nomic(NomicClient::new(name, None, None)?), 99 | "cohere" => Client::Cohere(CohereClient::new(name, None, None)?), 100 | "ollama" => Client::Ollama(OllamaClient::new(name, None)), 101 | "llamafile" => Client::Llamafile(LlamafileClient::new(None)), 102 | text => { 103 | return Err(Error::new_message(format!( 104 | "'{text}' is not a valid rembed client." 105 | ))) 106 | } 107 | }, 108 | ValueType::Null => unsafe { 109 | if let Some(client) = 110 | api::value_pointer::(&values[1], CLIENT_OPTIONS_POINTER_NAME) 111 | { 112 | (*client).clone() 113 | } else { 114 | return Err(Error::new_message("client options required")); 115 | } 116 | }, 117 | _ => return Err(Error::new_message("client options required")), 118 | }; 119 | self.clients.borrow_mut().insert(name.to_owned(), client); 120 | } 121 | } 122 | Ok(()) 123 | } 124 | } 125 | 126 | #[repr(C)] 127 | pub struct ClientsCursor<'vtab> { 128 | /// Base class. Must be first 129 | base: sqlite3_vtab_cursor, 130 | keys: Vec, 131 | rowid: i64, 132 | phantom: PhantomData<&'vtab ClientsTable>, 133 | } 134 | impl ClientsCursor<'_> { 135 | fn new(table: &mut ClientsTable) -> Result { 136 | let base: sqlite3_vtab_cursor = unsafe { mem::zeroed() }; 137 | let c = table.clients.borrow(); 138 | let keys = c.keys().map(|k| k.to_string()).collect(); 139 | let cursor = ClientsCursor { 140 | base, 141 | keys, 142 | rowid: 0, 143 | phantom: PhantomData, 144 | }; 145 | Ok(cursor) 146 | } 147 | } 148 | 149 | impl VTabCursor for ClientsCursor<'_> { 150 | fn filter( 151 | &mut self, 152 | _idx_num: c_int, 153 | _idx_str: Option<&str>, 154 | _values: &[*mut sqlite3_value], 155 | ) -> Result<()> { 156 | Ok(()) 157 | } 158 | 159 | fn next(&mut self) -> Result<()> { 160 | self.rowid += 1; 161 | Ok(()) 162 | } 163 | 164 | fn eof(&self) -> bool { 165 | (self.rowid as usize) >= self.keys.len() 166 | } 167 | 168 | fn column(&self, context: *mut sqlite3_context, i: c_int) -> Result<()> { 169 | let key = self 170 | .keys 171 | .get(self.rowid as usize) 172 | .expect("Internal rembed_clients logic error"); 173 | match column(i) { 174 | Some(Columns::Name) => api::result_text(context, key)?, 175 | Some(Columns::Options) => (), 176 | None => (), 177 | }; 178 | Ok(()) 179 | } 180 | 181 | fn rowid(&self) -> Result { 182 | Ok(self.rowid) 183 | } 184 | } 185 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | mod clients; 2 | mod clients_vtab; 3 | 4 | use std::cell::RefCell; 5 | use std::collections::HashMap; 6 | use std::rc::Rc; 7 | 8 | use clients::{Client, CohereClient, LlamafileClient, NomicClient, OllamaClient, OpenAiClient}; 9 | use clients_vtab::ClientsTable; 10 | use sqlite_loadable::{ 11 | api, define_scalar_function, define_scalar_function_with_aux, define_virtual_table_writeablex, 12 | prelude::*, Error, Result, 13 | }; 14 | use zerocopy::AsBytes; 15 | 16 | const FLOAT32_VECTOR_SUBTYPE: u8 = 223; 17 | const CLIENT_OPTIONS_POINTER_NAME: &[u8] = b"sqlite-rembed-client-options\0"; 18 | 19 | pub fn rembed_version(context: *mut sqlite3_context, _values: &[*mut sqlite3_value]) -> Result<()> { 20 | api::result_text(context, format!("v{}", env!("CARGO_PKG_VERSION")))?; 21 | Ok(()) 22 | } 23 | 24 | pub fn rembed_debug(context: *mut sqlite3_context, _values: &[*mut sqlite3_value]) -> Result<()> { 25 | api::result_text( 26 | context, 27 | format!( 28 | "Version: v{} 29 | Source: {} 30 | ", 31 | env!("CARGO_PKG_VERSION"), 32 | env!("GIT_HASH") 33 | ), 34 | )?; 35 | Ok(()) 36 | } 37 | 38 | pub fn rembed_client_options( 39 | context: *mut sqlite3_context, 40 | values: &[*mut sqlite3_value], 41 | ) -> Result<()> { 42 | if (values.len() % 2) != 0 { 43 | return Err(Error::new_message( 44 | "Must have an even number of arguments to rembed_client_options, as key/value pairs.", 45 | )); 46 | } 47 | let mut options: HashMap = HashMap::new(); 48 | let mut format: Option = None; 49 | for pair in values.chunks(2) { 50 | let key = api::value_text(&pair[0])?; 51 | let value = api::value_text(&pair[1])?; 52 | if key == "format" { 53 | format = Some(value.to_owned()); 54 | } else { 55 | options.insert(key.to_owned(), value.to_owned()); 56 | } 57 | } 58 | 59 | let format = match format { 60 | Some(format) => format, 61 | None => { 62 | return Err(Error::new_message("'format' key is required.")); 63 | } 64 | }; 65 | let client: Client = match format.as_str() { 66 | "openai" => Client::OpenAI(OpenAiClient::new( 67 | options 68 | .get("model") 69 | .ok_or_else(|| Error::new_message("'model' option is required"))?, 70 | options.get("url").cloned(), 71 | options.get("key").cloned(), 72 | )?), 73 | "nomic" => Client::Nomic(NomicClient::new( 74 | options 75 | .get("model") 76 | .ok_or_else(|| Error::new_message("'model' option is required"))?, 77 | options.get("url").cloned(), 78 | options.get("key").cloned(), 79 | )?), 80 | "cohere" => Client::Cohere(CohereClient::new( 81 | options 82 | .get("model") 83 | .ok_or_else(|| Error::new_message("'model' option is required"))?, 84 | options.get("url").cloned(), 85 | options.get("key").cloned(), 86 | )?), 87 | "ollama" => Client::Ollama(OllamaClient::new( 88 | options 89 | .get("model") 90 | .ok_or_else(|| Error::new_message("'model' option is required"))?, 91 | options.get("url").cloned(), 92 | )), 93 | "llamafile" => Client::Llamafile(LlamafileClient::new(options.get("url").cloned())), 94 | format => return Err(Error::new_message(format!("Unknown format '{format}'"))), 95 | }; 96 | 97 | api::result_pointer(context, CLIENT_OPTIONS_POINTER_NAME, client); 98 | 99 | Ok(()) 100 | } 101 | pub fn rembed( 102 | context: *mut sqlite3_context, 103 | values: &[*mut sqlite3_value], 104 | clients: &Rc>>, 105 | ) -> Result<()> { 106 | let client_name = api::value_text(&values[0])?; 107 | let input = api::value_text(&values[1])?; 108 | let x = clients.borrow(); 109 | let client = x.get(client_name).ok_or_else(|| { 110 | Error::new_message(format!( 111 | "Client with name {client_name} was not registered with rembed_clients." 112 | )) 113 | })?; 114 | 115 | let embedding = match client { 116 | Client::OpenAI(client) => client.infer_single(input)?, 117 | Client::Jina(client) => client.infer_single(input)?, 118 | Client::Mixedbread(client) => client.infer_single(input)?, 119 | Client::Ollama(client) => client.infer_single(input)?, 120 | Client::Llamafile(client) => client.infer_single(input)?, 121 | Client::Nomic(client) => { 122 | let input_type = values.get(2).and_then(|v| api::value_text(v).ok()); 123 | client.infer_single(input, input_type)? 124 | } 125 | Client::Cohere(client) => { 126 | let input_type = values.get(2).and_then(|v| api::value_text(v).ok()); 127 | client.infer_single(input, input_type)? 128 | } 129 | }; 130 | 131 | api::result_blob(context, embedding.as_bytes()); 132 | api::result_subtype(context, FLOAT32_VECTOR_SUBTYPE); 133 | Ok(()) 134 | } 135 | 136 | #[sqlite_entrypoint] 137 | pub fn sqlite3_rembed_init(db: *mut sqlite3) -> Result<()> { 138 | let flags = FunctionFlags::UTF8 139 | | FunctionFlags::DETERMINISTIC 140 | | unsafe { FunctionFlags::from_bits_unchecked(0x001000000) }; 141 | 142 | let c = Rc::new(RefCell::new(HashMap::new())); 143 | 144 | define_scalar_function( 145 | db, 146 | "rembed_version", 147 | 0, 148 | rembed_version, 149 | FunctionFlags::UTF8 | FunctionFlags::DETERMINISTIC, 150 | )?; 151 | define_scalar_function( 152 | db, 153 | "rembed_debug", 154 | 0, 155 | rembed_debug, 156 | FunctionFlags::UTF8 | FunctionFlags::DETERMINISTIC, 157 | )?; 158 | define_scalar_function_with_aux(db, "rembed", 2, rembed, flags, Rc::clone(&c))?; 159 | define_scalar_function_with_aux(db, "rembed", 3, rembed, flags, Rc::clone(&c))?; 160 | define_scalar_function( 161 | db, 162 | "rembed_client_options", 163 | -1, 164 | rembed_client_options, 165 | flags, 166 | )?; 167 | define_virtual_table_writeablex::(db, "rembed_clients", Some(Rc::clone(&c)))?; 168 | Ok(()) 169 | } 170 | -------------------------------------------------------------------------------- /test.sql: -------------------------------------------------------------------------------- 1 | .load dist/debug/rembed0 2 | .bail on 3 | .mode box 4 | .header on 5 | .timer on 6 | .echo on 7 | 8 | INSERT INTO temp.rembed_clients(name, options) VALUES 9 | ('text-embedding-3-small','openai'), 10 | ('jina-embeddings-v2-base-en','jina'), 11 | ('mixedbread-ai/mxbai-embed-large-v1','mixedbread'), 12 | ('nomic-embed-text-v1.5', 'nomic'), 13 | ('embed-english-v3.0', 'cohere'), 14 | ('snowflake-arctic-embed:s', 'ollama'), 15 | ('llamafile', 'llamafile'), 16 | ( 17 | 'mxbai-embed-large-v1-f16', 18 | rembed_client_options( 19 | 'format', 'llamafile', 20 | --'url', 'http://mm1:8080/v1/embeddings' 21 | 'url', 'http://mm1:8080/embedding' 22 | ) 23 | ); 24 | 25 | select length(rembed('mixedbread-ai/mxbai-embed-large-v1', 'obama the person')); 26 | .exit 27 | select length(rembed('jina-embeddings-v2-base-en', 'obama the person')); 28 | 29 | .exit 30 | 31 | select length(rembed('text-embedding-3-small', 'obama the person')); 32 | select length(rembed('llamafile', 'obama the person')); 33 | select length(rembed('snowflake-arctic-embed:s', 'obama the person')); 34 | select length(rembed('embed-english-v3.0', 'obama the person', 'search_document')); 35 | select length(rembed('mxbai-embed-large-v1-f16', 'obama the person')); 36 | 37 | 38 | --------------------------------------------------------------------------------