├── VERSION ├── benchmarks ├── README.md ├── micro │ ├── .gitignore │ ├── build.rs │ ├── src │ │ └── lib.rs │ ├── Cargo.toml │ └── benches │ │ └── my_benchmark.rs ├── exhaustive-memory │ ├── .gitignore │ ├── gist.suite │ ├── Makefile │ ├── sift.suite │ ├── README.md │ └── requirements.txt ├── profiling │ ├── build-from-npy.sql │ └── query-k.sql └── self-params │ ├── test.py │ ├── knn.py │ └── build.py ├── site ├── guides │ ├── classifiers.md │ ├── hybrid-search.md │ ├── semantic-search.md │ ├── rag.md │ ├── performance.md │ ├── arithmetic.md │ ├── scalar-quant.md │ ├── matryoshka.md │ └── binary-quant.md ├── .gitignore ├── public │ ├── fonts │ │ ├── ZillaSlab-SemiBold.otf │ │ ├── ZillaSlab-SemiBold.ttf │ │ ├── ZillaSlab-SemiBold.woff │ │ └── ZillaSlab-SemiBold.woff2 │ ├── logo.dark.svg │ ├── logo.light.svg │ ├── mozilla.svg │ ├── turso.svg │ ├── shinkai.svg │ └── shinkai.dark.svg ├── getting-started │ ├── introduction.md │ └── installation.md ├── using │ ├── c.md │ ├── sqlite-utils.md │ ├── datasette.md │ ├── android-ios.md │ ├── ruby.md │ ├── wasm.md │ ├── rqlite.md │ ├── rust.md │ ├── go.md │ ├── python.md │ └── js.md ├── project.data.ts ├── package.json ├── .vitepress │ └── theme │ │ ├── index.ts │ │ ├── Sponsors.vue │ │ └── HeroImg.vue ├── versioning.md ├── index.md ├── compiling.md └── build-ref.mjs ├── tests ├── .gitignore ├── .python-version ├── afbd │ ├── .gitignore │ ├── .python-version │ ├── Makefile │ └── README.md ├── minimum │ ├── .gitignore │ ├── demo.c │ └── Makefile ├── fuzz │ ├── .gitignore │ ├── corpus │ │ └── vec0-create │ │ │ ├── normal1 │ │ │ └── normal2 │ ├── numpy.dict │ ├── vec0-create.dict │ ├── exec.dict │ ├── README.md │ ├── exec.c │ ├── json.c │ ├── vec0-create.c │ ├── numpy.c │ └── Makefile ├── leak-fixtures │ ├── vec0-create.sql │ ├── each.sql │ └── knn.sql ├── pyproject.toml ├── Cargo.toml ├── conftest.py ├── test-wasm.mjs ├── sqlite-vec-internal.h ├── build.rs ├── Cargo.lock ├── correctness │ ├── build.py │ └── test-correctness.py ├── utils.py ├── fuzz.py ├── skip.test-correctness.py ├── test-unit.c ├── test-general.py ├── test-partition-keys.py ├── unittest.rs └── __snapshots__ │ └── test-general.ambr ├── examples ├── simple-c │ ├── .gitignore │ ├── Makefile │ └── demo.c ├── simple-go-cgo │ ├── .gitignore │ ├── Makefile │ ├── go.mod │ ├── go.sum │ └── demo.go ├── simple-python │ ├── .gitignore │ ├── requirements.txt │ └── demo.py ├── simple-ruby │ ├── .gitignore │ ├── Gemfile │ └── demo.rb ├── simple-go-ncruces │ ├── .gitignore │ ├── Makefile │ ├── go.mod │ ├── go.sum │ └── demo.go ├── simple-rust │ ├── .gitignore │ ├── Cargo.toml │ └── demo.rs ├── simple-bun │ ├── .gitignore │ ├── package.json │ └── demo.ts ├── simple-node2 │ ├── .gitignore │ ├── package.json │ ├── tmp.mjs │ └── demo.mjs ├── simple-node │ ├── .gitignore │ ├── package.json │ └── demo.mjs ├── nbc-headlines │ ├── .gitignore │ ├── README.md │ └── Makefile ├── wasm │ ├── wasm.c │ └── README.md ├── sqlite3-cli │ ├── core_init.c │ └── README.md ├── simple-sqlite │ └── demo.sql ├── simple-deno │ └── demo.ts ├── simple-wasm │ └── index.html └── python-recipes │ └── openai-sample.py ├── bindings ├── rust │ ├── .gitignore │ ├── build.rs │ ├── Makefile │ ├── Cargo.toml.tmpl │ └── src │ │ └── lib.rs ├── go │ └── ncruces │ │ └── go-sqlite3.patch └── python │ └── extra_init.py ├── .github ├── logos │ ├── flyio.small.ico │ ├── turso.small.ico │ ├── sqlitecloud.small.svg │ ├── mozilla.svg │ ├── mozilla.dark.svg │ ├── turso.svg │ ├── shinkai.svg │ ├── shinkai.dark.svg │ └── shinkai.small.svg └── workflows │ └── site.yaml ├── SECURITY.md ├── scripts ├── vendor.sh ├── publish-release.sh └── progress.ts ├── .gitignore ├── sqlite-dist.toml ├── TODO ├── sqlite-vec.h.tmpl ├── LICENSE-MIT ├── tmp-static.py └── ARCHITECTURE.md /VERSION: -------------------------------------------------------------------------------- 1 | 0.1.7-alpha.2 -------------------------------------------------------------------------------- /benchmarks/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /site/guides/classifiers.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /site/guides/hybrid-search.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | -------------------------------------------------------------------------------- /site/guides/semantic-search.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/.python-version: -------------------------------------------------------------------------------- 1 | 3.12 2 | -------------------------------------------------------------------------------- /tests/afbd/.gitignore: -------------------------------------------------------------------------------- 1 | *.tgz 2 | -------------------------------------------------------------------------------- /examples/simple-c/.gitignore: -------------------------------------------------------------------------------- 1 | demo 2 | -------------------------------------------------------------------------------- /tests/afbd/.python-version: -------------------------------------------------------------------------------- 1 | 3.12 2 | -------------------------------------------------------------------------------- /tests/minimum/.gitignore: -------------------------------------------------------------------------------- 1 | dist/ 2 | -------------------------------------------------------------------------------- /benchmarks/micro/.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | -------------------------------------------------------------------------------- /examples/simple-go-cgo/.gitignore: -------------------------------------------------------------------------------- 1 | demo 2 | -------------------------------------------------------------------------------- /examples/simple-python/.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | -------------------------------------------------------------------------------- /examples/simple-ruby/.gitignore: -------------------------------------------------------------------------------- 1 | Gemfile.lock 2 | -------------------------------------------------------------------------------- /tests/fuzz/.gitignore: -------------------------------------------------------------------------------- 1 | *.dSYM 2 | targets/ 3 | -------------------------------------------------------------------------------- /benchmarks/exhaustive-memory/.gitignore: -------------------------------------------------------------------------------- 1 | data/ 2 | -------------------------------------------------------------------------------- /site/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | .vitepress/cache 3 | -------------------------------------------------------------------------------- /examples/simple-go-ncruces/.gitignore: -------------------------------------------------------------------------------- 1 | demo 2 | *.wasm 3 | -------------------------------------------------------------------------------- /examples/simple-python/requirements.txt: -------------------------------------------------------------------------------- 1 | sqlite-vec 2 | -------------------------------------------------------------------------------- /examples/simple-rust/.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /tests/fuzz/corpus/vec0-create/normal1: -------------------------------------------------------------------------------- 1 | aaa float[12] 2 | -------------------------------------------------------------------------------- /examples/simple-bun/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | bun.lockb 3 | -------------------------------------------------------------------------------- /examples/simple-node2/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | package-lock.json -------------------------------------------------------------------------------- /tests/fuzz/corpus/vec0-create/normal2: -------------------------------------------------------------------------------- 1 | aaa float[12], bbb int8[6] 2 | -------------------------------------------------------------------------------- /examples/simple-node/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | package-lock.json 3 | -------------------------------------------------------------------------------- /examples/nbc-headlines/.gitignore: -------------------------------------------------------------------------------- 1 | *.dylib 2 | *.so 3 | *.dll 4 | *.gguf 5 | -------------------------------------------------------------------------------- /examples/simple-go-cgo/Makefile: -------------------------------------------------------------------------------- 1 | demo: demo.go go.mod go.sum 2 | go build -o $@ 3 | -------------------------------------------------------------------------------- /bindings/rust/.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | sqlite-vec.c 3 | sqlite-vec.h 4 | Cargo.toml 5 | -------------------------------------------------------------------------------- /examples/nbc-headlines/README.md: -------------------------------------------------------------------------------- 1 | - `headlines-2024.db` 2 | - 14.5k rows 3 | - 4.4MB 4 | 5 | -------------------------------------------------------------------------------- /site/guides/rag.md: -------------------------------------------------------------------------------- 1 | # Retrival Augmented Generation (RAG) 2 | 3 | - "memories"? 4 | - chunking 5 | -------------------------------------------------------------------------------- /site/guides/performance.md: -------------------------------------------------------------------------------- 1 | - page_size 2 | - memory mapping 3 | - in-memory index 4 | - chunk_size (?) 5 | -------------------------------------------------------------------------------- /.github/logos/flyio.small.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asg017/sqlite-vec/HEAD/.github/logos/flyio.small.ico -------------------------------------------------------------------------------- /.github/logos/turso.small.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asg017/sqlite-vec/HEAD/.github/logos/turso.small.ico -------------------------------------------------------------------------------- /site/guides/arithmetic.md: -------------------------------------------------------------------------------- 1 | # Vector Arithmetic 2 | 3 | - `vec_add()` 4 | - `vec_sub()` 5 | - `vec_mean()` 6 | -------------------------------------------------------------------------------- /tests/fuzz/numpy.dict: -------------------------------------------------------------------------------- 1 | magic="\x93NUMPY" 2 | lparen="(" 3 | rparen=")" 4 | lbrace="{" 5 | rbrace="}" 6 | sq1="\"" 7 | sq2="'" 8 | -------------------------------------------------------------------------------- /examples/simple-go-ncruces/Makefile: -------------------------------------------------------------------------------- 1 | 2 | demo: demo.go 3 | go build -o $@ $< 4 | 5 | clean: 6 | rm demo 7 | 8 | .PHONY: clean 9 | -------------------------------------------------------------------------------- /site/public/fonts/ZillaSlab-SemiBold.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asg017/sqlite-vec/HEAD/site/public/fonts/ZillaSlab-SemiBold.otf -------------------------------------------------------------------------------- /site/public/fonts/ZillaSlab-SemiBold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asg017/sqlite-vec/HEAD/site/public/fonts/ZillaSlab-SemiBold.ttf -------------------------------------------------------------------------------- /site/public/fonts/ZillaSlab-SemiBold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asg017/sqlite-vec/HEAD/site/public/fonts/ZillaSlab-SemiBold.woff -------------------------------------------------------------------------------- /site/public/fonts/ZillaSlab-SemiBold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asg017/sqlite-vec/HEAD/site/public/fonts/ZillaSlab-SemiBold.woff2 -------------------------------------------------------------------------------- /bindings/rust/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | cc::Build::new().file("sqlite-vec.c").define("SQLITE_CORE", None).compile("sqlite_vec0"); 3 | } 4 | -------------------------------------------------------------------------------- /benchmarks/micro/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | cc::Build::new() 3 | .file("../../sqlite-vec.c") 4 | .compile("sqlite_vec0"); 5 | } 6 | -------------------------------------------------------------------------------- /examples/simple-ruby/Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | ruby '>= 3.0' 4 | 5 | gem 'sqlite3', '~> 2.0', '>= 2.0.1' 6 | gem 'sqlite-vec' 7 | -------------------------------------------------------------------------------- /tests/leak-fixtures/vec0-create.sql: -------------------------------------------------------------------------------- 1 | .load dist/vec0 2 | .mode box 3 | .header on 4 | .eqp on 5 | .echo on 6 | 7 | create virtual table v using vec0(y); 8 | -------------------------------------------------------------------------------- /examples/simple-c/Makefile: -------------------------------------------------------------------------------- 1 | demo: demo.c 2 | gcc \ 3 | -O3 -DSQLITE_CORE \ 4 | -I../../ -I../../vendor \ 5 | demo.c ../../sqlite-vec.c ../../vendor/sqlite3.c \ 6 | -o $@ 7 | -------------------------------------------------------------------------------- /examples/simple-bun/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "simple-bun", 3 | "module": "index.ts", 4 | "type": "module", 5 | "dependencies": { 6 | "sqlite-vec": "latest" 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /site/getting-started/introduction.md: -------------------------------------------------------------------------------- 1 | # Introduction to `sqlite-vec` 2 | 3 | ## Intro to Vector Databases 4 | 5 | ## Vector Search in SQLite with `sqlite-vec` 6 | 7 | ## Getting help 8 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | Please report any security vulnerabilities to alexsebastian.garcia@gmail.com . Avould using public Github issues whenever possible. I will get back to you as quickly as possible. 2 | -------------------------------------------------------------------------------- /examples/wasm/wasm.c: -------------------------------------------------------------------------------- 1 | #include "sqlite3.h" 2 | #include "sqlite-vec.h" 3 | 4 | int sqlite3_wasm_extra_init(const char * unused) { 5 | return sqlite3_auto_extension((void (*)(void)) sqlite3_vec_init); 6 | } 7 | -------------------------------------------------------------------------------- /examples/nbc-headlines/Makefile: -------------------------------------------------------------------------------- 1 | all-MiniLM-L6-v2.e4ce9877.q8_0.gguf: 2 | curl -L -o $@ https://huggingface.co/asg017/sqlite-lembed-model-examples/resolve/main/all-MiniLM-L6-v2/all-MiniLM-L6-v2.e4ce9877.q8_0.gguf 3 | -------------------------------------------------------------------------------- /examples/sqlite3-cli/core_init.c: -------------------------------------------------------------------------------- 1 | #include "sqlite3.h" 2 | #include "sqlite-vec.h" 3 | #include 4 | int core_init(const char *dummy) { 5 | return sqlite3_auto_extension((void *)sqlite3_vec_init); 6 | } 7 | -------------------------------------------------------------------------------- /examples/simple-go-cgo/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/asg017/sqlite-vec/examples/go 2 | 3 | go 1.22.5 4 | 5 | require github.com/mattn/go-sqlite3 v1.14.22 6 | 7 | require github.com/asg017/sqlite-vec-go-bindings v0.0.1-alpha.36 // indirect 8 | -------------------------------------------------------------------------------- /tests/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "tests" 3 | version = "0.1.0" 4 | description = "Add your description here" 5 | readme = "README.md" 6 | requires-python = ">=3.12" 7 | dependencies = [ 8 | "pytest", "numpy", "syrupy" 9 | ] 10 | -------------------------------------------------------------------------------- /tests/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "tests" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | 8 | [build-dependencies] 9 | cc = "1.0" 10 | 11 | [[bin]] 12 | name = "unittest" 13 | path = "unittest.rs" 14 | 15 | 16 | -------------------------------------------------------------------------------- /examples/simple-node2/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "simple-node2", 3 | "version": "1.0.0", 4 | "main": "demo.mjs", 5 | "engines": { 6 | "node": ">=23.5.0" 7 | }, 8 | "dependencies": { 9 | "sqlite-vec": "latest" 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /examples/simple-rust/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "sqlite-vec-demo" 3 | edition = "2021" 4 | 5 | [dependencies] 6 | sqlite-vec={version="0.0.1-alpha.7"} 7 | rusqlite = {version="0.31.0", features=["bundled"]} 8 | zerocopy = "0.7.33" 9 | 10 | [[bin]] 11 | name="demo" 12 | path="demo.rs" 13 | -------------------------------------------------------------------------------- /scripts/vendor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | mkdir -p vendor 3 | curl -o sqlite-amalgamation.zip https://www.sqlite.org/2024/sqlite-amalgamation-3450300.zip 4 | unzip -d 5 | unzip sqlite-amalgamation.zip 6 | mv sqlite-amalgamation-3450300/* vendor/ 7 | rmdir sqlite-amalgamation-3450300 8 | rm sqlite-amalgamation.zip 9 | -------------------------------------------------------------------------------- /benchmarks/exhaustive-memory/gist.suite: -------------------------------------------------------------------------------- 1 | @name=gist 2 | @input=data/gist/gist_base.fvecs 3 | @queries=data/gist/gist_query.fvecs 4 | @sample=500000 5 | @qsample=20 6 | @k=20 7 | 8 | faiss 9 | usearch 10 | vec-static 11 | #duckdb 12 | #vec-vec0.8192.1024 13 | #vec-vec0.8192.2048 14 | #vec-scalar.8192 15 | #numpy 16 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import sqlite3 3 | 4 | 5 | @pytest.fixture() 6 | def db(): 7 | db = sqlite3.connect(":memory:") 8 | db.row_factory = sqlite3.Row 9 | db.enable_load_extension(True) 10 | db.load_extension("dist/vec0") 11 | db.enable_load_extension(False) 12 | return db 13 | -------------------------------------------------------------------------------- /tests/fuzz/vec0-create.dict: -------------------------------------------------------------------------------- 1 | cname1="aaa" 2 | cname1="bbb" 3 | cname1="ccc" 4 | type1="float" 5 | type2="int8" 6 | type3="bit" 7 | lparen="[" 8 | rparen="]" 9 | pk="primary key" 10 | text="text" 11 | distance_metric="distance_metric" 12 | eq="=" 13 | l1="l1" 14 | l2="l2" 15 | cosine="cosine" 16 | hamming="hamming" 17 | -------------------------------------------------------------------------------- /tests/leak-fixtures/each.sql: -------------------------------------------------------------------------------- 1 | .load dist/vec0 2 | .mode box 3 | .header on 4 | .eqp on 5 | .echo on 6 | 7 | select sqlite_version(), vec_version(); 8 | 9 | select * from vec_each('[1,2,3]'); 10 | 11 | select * 12 | from json_each('[ 13 | [1,2,3,4], 14 | [1,2,3,4] 15 | ]') 16 | join vec_each(json_each.value); 17 | -------------------------------------------------------------------------------- /site/using/c.md: -------------------------------------------------------------------------------- 1 | # Using `sqlite-vec` in C 2 | 3 | The `sqlite-vec` project is a single `sqlite-vec.c` and `sqlite-vec.h` file. They can be vendored into your C or C++ projects and compiled like normal. 4 | 5 | "Amalgammation" builds are provided on the [`sqlite-vec` Releases page](https://github.com/asg017/sqlite-vec/releases). 6 | -------------------------------------------------------------------------------- /tests/test-wasm.mjs: -------------------------------------------------------------------------------- 1 | async function main() { 2 | const { default: init } = await import("../dist/.wasm/sqlite3.mjs"); 3 | const sqlite3 = await init(); 4 | const vec_version = new sqlite3.oo1.DB(":memory:").selectValue( 5 | "select vec_version()", 6 | ); 7 | console.log(vec_version); 8 | } 9 | 10 | main(); 11 | -------------------------------------------------------------------------------- /benchmarks/micro/src/lib.rs: -------------------------------------------------------------------------------- 1 | use rusqlite::ffi::sqlite3_auto_extension; 2 | 3 | #[link(name = "sqlite_vec0")] 4 | extern "C" { 5 | pub fn sqlite3_vec_init(); 6 | } 7 | 8 | pub fn init_vec() { 9 | unsafe { 10 | sqlite3_auto_extension(Some(std::mem::transmute(sqlite3_vec_init as *const ()))); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /tests/sqlite-vec-internal.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int min_idx( 4 | // list of distances, size n 5 | const float *distances, 6 | // number of entries in distances 7 | int32_t n, 8 | // output array of size k, the indicies of the lowest k values in distances 9 | int32_t *out, 10 | // output number of elements 11 | int32_t k 12 | ); 13 | -------------------------------------------------------------------------------- /benchmarks/micro/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "micro" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | rusqlite = {version="0.31.0", features=["bundled"]} 8 | 9 | [dev-dependencies] 10 | criterion = "0.3" 11 | rand = "0.8.5" 12 | zerocopy = "0.7.34" 13 | 14 | [build-dependencies] 15 | cc = "1.0.99" 16 | 17 | [[bench]] 18 | name = "my_benchmark" 19 | harness = false 20 | -------------------------------------------------------------------------------- /tests/build.rs: -------------------------------------------------------------------------------- 1 | use std::env; 2 | use std::path::{Path, PathBuf}; 3 | use std::process::Command; 4 | 5 | fn main() { 6 | cc::Build::new() 7 | .file("../sqlite-vec.c") 8 | .include(".") 9 | .static_flag(true) 10 | .compile("sqlite-vec-internal"); 11 | println!("cargo:rerun-if-changed=usleep.c"); 12 | println!("cargo:rerun-if-changed=build.rs"); 13 | } 14 | -------------------------------------------------------------------------------- /examples/simple-node/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "node", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "keywords": [], 10 | "author": "", 11 | "license": "ISC", 12 | "dependencies": { 13 | "better-sqlite3": "^9.6.0", 14 | "sqlite-vec": "latest" 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /examples/simple-go-ncruces/go.mod: -------------------------------------------------------------------------------- 1 | module asg017.com/ex1 2 | 3 | go 1.22.5 4 | 5 | require ( 6 | github.com/asg017/sqlite-vec-go-bindings v0.0.1-alpha.37 7 | github.com/ncruces/go-sqlite3 v0.17.2-0.20240711235451-21de85e849b7 8 | ) 9 | 10 | require ( 11 | github.com/ncruces/julianday v1.0.0 // indirect 12 | github.com/tetratelabs/wazero v1.7.3 // indirect 13 | golang.org/x/sys v0.22.0 // indirect 14 | ) 15 | -------------------------------------------------------------------------------- /benchmarks/exhaustive-memory/Makefile: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | data/: 5 | mkdir -p $@ 6 | 7 | data/sift: data/ 8 | curl -o data/sift.tar.gz ftp://ftp.irisa.fr/local/texmex/corpus/sift.tar.gz 9 | tar -xvzf data/sift.tar.gz -C data/ 10 | rm data/sift.tar.gz 11 | 12 | data/gist: data/ 13 | curl -o data/gist.tar.gz ftp://ftp.irisa.fr/local/texmex/corpus/gist.tar.gz 14 | tar -xvzf data/gist.tar.gz -C data/ 15 | rm data/gist.tar.gz 16 | -------------------------------------------------------------------------------- /examples/simple-node2/tmp.mjs: -------------------------------------------------------------------------------- 1 | import { DatabaseSync } from "node:sqlite"; 2 | import * as sqliteVec from "sqlite-vec"; 3 | 4 | const db = new DatabaseSync(":memory:", { allowExtension: true }); 5 | sqliteVec.load(db); 6 | 7 | const embedding = new Float32Array([0.1, 0.2, 0.3, 0.4]); 8 | const { result } = db 9 | .prepare("select vec_length(?) as result") 10 | .get(new Uint8Array(embedding.buffer)); 11 | 12 | console.log(result); // 4 -------------------------------------------------------------------------------- /tests/fuzz/exec.dict: -------------------------------------------------------------------------------- 1 | select="select" 2 | from="from" 3 | cname1="aaa" 4 | cname1="bbb" 5 | cname1="ccc" 6 | type1="float" 7 | type2="int8" 8 | type3="bit" 9 | lparen="[" 10 | rparen="]" 11 | pk="primary key" 12 | text="text" 13 | distance_metric="distance_metric" 14 | eq="=" 15 | l1="l1" 16 | l2="l2" 17 | cosine="cosine" 18 | hamming="hamming" 19 | vec_distance_l2="vec_distance_l2" 20 | vec_distance_l1="vec_distance_l1" 21 | comma="," 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | .vscode 3 | sift/ 4 | *.tar.gz 5 | *.db 6 | *.npy 7 | *.bin 8 | *.out 9 | venv/ 10 | 11 | vendor/ 12 | dist/ 13 | 14 | *.pyc 15 | *.db-journal 16 | 17 | alexandria/ 18 | openai/ 19 | examples/supabase-dbpedia 20 | examples/ann-filtering 21 | examples/dbpedia-openai 22 | examples/imdb 23 | examples/sotu 24 | 25 | sqlite-vec.h 26 | tmp/ 27 | 28 | poetry.lock 29 | 30 | *.jsonl 31 | 32 | memstat.c 33 | memstat.* 34 | -------------------------------------------------------------------------------- /tests/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "cc" 7 | version = "1.0.90" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5" 10 | 11 | [[package]] 12 | name = "tests" 13 | version = "0.1.0" 14 | dependencies = [ 15 | "cc", 16 | ] 17 | -------------------------------------------------------------------------------- /tests/fuzz/README.md: -------------------------------------------------------------------------------- 1 | ``` 2 | ASAN_OPTIONS=detect_leaks=1 ./targets/vec0_create \ 3 | -dict=./vec0-create.dict -max_total_time=5 \ 4 | ./corpus/vec0-create 5 | ``` 6 | 7 | ``` 8 | export PATH="/opt/homebrew/opt/llvm/bin:$PATH" 9 | export LDFLAGS="-L/opt/homebrew/opt/llvm/lib" 10 | export CPPFLAGS="-I/opt/homebrew/opt/llvm/include" 11 | 12 | 13 | LDFLAGS="-L/opt/homebrew/opt/llvm/lib/c++ -Wl,-rpath,/opt/homebrew/opt/llvm/lib/c++" 14 | ``` 15 | -------------------------------------------------------------------------------- /site/project.data.ts: -------------------------------------------------------------------------------- 1 | import { readFileSync } from "node:fs"; 2 | import { dirname, join } from "node:path"; 3 | import { fileURLToPath } from "node:url"; 4 | 5 | const PROJECT = "sqlite-vec"; 6 | 7 | const VERSION = readFileSync( 8 | join(dirname(fileURLToPath(import.meta.url)), "..", "VERSION"), 9 | "utf8", 10 | ); 11 | 12 | export default { 13 | load() { 14 | return { 15 | PROJECT, 16 | VERSION, 17 | }; 18 | }, 19 | }; 20 | -------------------------------------------------------------------------------- /tests/correctness/build.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import duckdb 3 | db = duckdb.connect(":memory:") 4 | 5 | result = db.execute( 6 | """ 7 | select 8 | -- _id, 9 | -- title, 10 | -- text as contents, 11 | embedding::float[] as embeddings 12 | from "hf://datasets/Supabase/dbpedia-openai-3-large-1M/dbpedia_openai_3_large_00.parquet" 13 | """ 14 | ).fetchnumpy()['embeddings'] 15 | 16 | np.save("dbpedia_openai_3_large_00.npy", np.vstack(result)) 17 | -------------------------------------------------------------------------------- /tests/afbd/Makefile: -------------------------------------------------------------------------------- 1 | random_ints_1m.tgz: 2 | curl -o $@ https://storage.googleapis.com/ann-filtered-benchmark/datasets/random_ints_1m.tgz 3 | 4 | random_float_1m.tgz: 5 | curl -o $@ https://storage.googleapis.com/ann-filtered-benchmark/datasets/random_float_1m.tgz 6 | 7 | random_keywords_1m.tgz: 8 | curl -o $@ https://storage.googleapis.com/ann-filtered-benchmark/datasets/random_keywords_1m.tgz 9 | all: random_ints_1m.tgz random_float_1m.tgz random_keywords_1m.tgz 10 | -------------------------------------------------------------------------------- /site/using/sqlite-utils.md: -------------------------------------------------------------------------------- 1 | # Using `sqlite-vec` in `sqlite-utils` 2 | 3 | ![sqlite-utils](https://img.shields.io/pypi/v/sqlite-utils-sqlite-vec.svg?color=B6B6D9&label=sqlite-utils+plugin&logoColor=white&logo=python) 4 | 5 | [`sqlite-utils`](https://sqlite-utils.datasette.io/en/stable/) users can install `sqlite-vec` into their `sqlite-utils` projects with the `sqlite-utils-sqlite-vec` plugin: 6 | 7 | 8 | ```bash 9 | sqlite-utils install sqlite-utils-sqlite-vec 10 | ``` 11 | -------------------------------------------------------------------------------- /benchmarks/profiling/build-from-npy.sql: -------------------------------------------------------------------------------- 1 | .timer on 2 | pragma page_size = 32768; 3 | --pragma page_size = 16384; 4 | --pragma page_size = 16384; 5 | --pragma page_size = 4096; 6 | 7 | create virtual table vec_items using vec0( 8 | embedding float[1536] 9 | ); 10 | 11 | -- 65s (limit 1e5), ~615MB on disk 12 | insert into vec_items 13 | select 14 | rowid, 15 | vector 16 | from vec_npy_each(vec_npy_file('examples/dbpedia-openai/data/vectors.npy')) 17 | limit 1e5; 18 | -------------------------------------------------------------------------------- /site/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "scripts": { 3 | "ref": "node build-ref.mjs", 4 | "dev": "vitepress dev", 5 | "build": "vitepress build", 6 | "preview": "vitepress preview" 7 | }, 8 | "devDependencies": { 9 | "vue": "^3.4.26" 10 | }, 11 | "dependencies": { 12 | "@types/node": "^20.12.8", 13 | "better-sqlite3": "^11.1.2", 14 | "js-yaml": "^4.1.0", 15 | "table": "^6.8.2", 16 | "valibot": "^0.36.0", 17 | "vitepress": "^1.1.4" 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /examples/sqlite3-cli/README.md: -------------------------------------------------------------------------------- 1 | # `sqlite-vec` statically compiled in the SQLite CLI 2 | 3 | You can compile your own version of the `sqlite3` CLI with `sqlite-vec` builtin. 4 | The process is not well documented, but the special `SQLITE_EXTRA_INIT` compile 5 | option can be used to "inject" code at initialization time. See the `Makefile` 6 | at the root of this project for some more info. 7 | 8 | The `core_init.c` file here demonstrates auto-loading the `sqlite-vec` 9 | entrypoints at startup. 10 | -------------------------------------------------------------------------------- /bindings/rust/Makefile: -------------------------------------------------------------------------------- 1 | VERSION=$(shell cat ../../VERSION) 2 | 3 | deps: Cargo.toml sqlite-vec.c sqlite-vec.h sqlite3ext.h sqlite3.h 4 | 5 | Cargo.toml: ../../VERSION Cargo.toml.tmpl 6 | VERSION=$(VERSION) envsubst < Cargo.toml.tmpl > $@ 7 | 8 | sqlite-vec.c: ../../sqlite-vec.c 9 | cp $< $@ 10 | 11 | sqlite-vec.h: ../../sqlite-vec.h 12 | cp $< $@ 13 | 14 | sqlite3ext.h: ../../vendor/sqlite3ext.h 15 | cp $< $@ 16 | 17 | sqlite3.h: ../../vendor/sqlite3.h 18 | cp $< $@ 19 | 20 | .PHONY: deps 21 | -------------------------------------------------------------------------------- /tests/afbd/README.md: -------------------------------------------------------------------------------- 1 | 2 | # hnm 3 | 4 | ``` 5 | tar -xOzf hnm.tgz ./tests.jsonl > tests.jsonl 6 | solite q "select group_concat(distinct key) from lines_read('tests.jsonl'), json_each(line -> '$.conditions.and[0]')" 7 | ``` 8 | 9 | 10 | ``` 11 | > python test-afbd.py build hnm.tgz --metadata product_group_name,colour_group_name,index_group_name,perceived_colour_value_name,section_name,product_type_name,department_name,graphical_appearance_name,garment_group_name,perceived_colour_master_name 12 | ``` 13 | -------------------------------------------------------------------------------- /benchmarks/exhaustive-memory/sift.suite: -------------------------------------------------------------------------------- 1 | @name=sift1m 2 | @input=data/sift/sift_base.fvecs 3 | @queries=data/sift/sift_query.fvecs 4 | @qsample=100 5 | @k=20 6 | 7 | faiss 8 | usearch 9 | duckdb 10 | vec-static 11 | vec-vec0.8192.1024 12 | vec-vec0.8192.2048 13 | vec-scalar.8192 14 | numpy 15 | 16 | # #libsql.4096 17 | # #libsql.8192 18 | # faiss 19 | # vec-scalar.4096 20 | # vec-static 21 | # vec-vec0.4096.16 22 | # vec-vec0.8192.1024 23 | # vec-vec0.4096.2048 24 | # usearch 25 | # duckdb 26 | # hnswlib 27 | # numpy 28 | # chroma 29 | -------------------------------------------------------------------------------- /bindings/rust/Cargo.toml.tmpl: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "sqlite-vec" 3 | version = "${VERSION}" 4 | edition = "2021" 5 | authors = ["Alex Garcia "] 6 | description = "FFI bindings to the sqlite-vec SQLite extension" 7 | homepage = "https://github.com/asg017/sqlite-vec" 8 | repository = "https://github.com/asg017/sqlite-vec" 9 | keywords = ["sqlite", "sqlite-extension"] 10 | license = "MIT/Apache-2.0" 11 | 12 | 13 | [dependencies] 14 | 15 | [build-dependencies] 16 | cc = "1.0" 17 | 18 | [dev-dependencies] 19 | rusqlite = "0.31.0" 20 | -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from io import BytesIO 3 | 4 | 5 | def to_npy(arr): 6 | buf = BytesIO() 7 | np.save(buf, arr) 8 | buf.seek(0) 9 | return buf.read() 10 | 11 | 12 | to_npy(np.array([[1.0, 2.0, 3.0], [2.0, 3.0, 4.0]], dtype=np.float32)) 13 | 14 | print(to_npy(np.array([[1.0, 2.0]], dtype=np.float32))) 15 | print(to_npy(np.array([1.0, 2.0], dtype=np.float32))) 16 | 17 | to_npy( 18 | np.array( 19 | [np.zeros(10), np.zeros(10), np.zeros(10), np.zeros(10), np.zeros(10)], 20 | dtype=np.float32, 21 | ) 22 | ) 23 | -------------------------------------------------------------------------------- /sqlite-dist.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "sqlite-vec" 3 | license = "MIT OR Apache" 4 | homepage = "https://alexgarcia.xyz/sqlite-vec" 5 | repo = "https://github.com/asg017/sqlite-vec" 6 | description = "A vector search SQLite extension." 7 | authors = ["Alex Garcia"] 8 | git_tag_format = "v$VERSION" 9 | 10 | [targets] 11 | github_releases = {} 12 | sqlpkg = {} 13 | spm = {} 14 | amalgamation = {include=["sqlite-vec.c", "sqlite-vec.h"]} 15 | 16 | pip = { extra_init_py = "bindings/python/extra_init.py" } 17 | datasette = {} 18 | sqlite_utils = {} 19 | 20 | npm = {} 21 | 22 | gem = { module_name = "SqliteVec" } 23 | -------------------------------------------------------------------------------- /examples/wasm/README.md: -------------------------------------------------------------------------------- 1 | # `sqlite-vec` statically compiled into WASM builds 2 | 3 | You can compile your own version of SQLite's WASM build with `sqlite-vec` 4 | builtin. Dynamically loading SQLite extensions is not supported in the official 5 | WASM build yet, but you can statically compile extensions in. It's not well 6 | documented, but the `sqlite3_wasm_extra_init` option in the SQLite `ext/wasm` 7 | Makefile allows you to inject your own code at initialization time. See the 8 | `Makefile` at the room of the project for more info. 9 | 10 | The `wasm.c` file here demonstrates auto-loading the `sqlite-vec` entrypoints at 11 | startup. 12 | -------------------------------------------------------------------------------- /examples/simple-sqlite/demo.sql: -------------------------------------------------------------------------------- 1 | .load ../../dist/vec0 2 | .mode table 3 | .header on 4 | 5 | select sqlite_version(), vec_version(); 6 | 7 | CREATE VIRTUAL TABLE vec_items USING vec0(embedding float[4]); 8 | 9 | INSERT INTO vec_items(rowid, embedding) 10 | select 11 | value ->> 0, 12 | value ->> 1 13 | from json_each('[ 14 | [1, [0.1, 0.1, 0.1, 0.1]], 15 | [2, [0.2, 0.2, 0.2, 0.2]], 16 | [3, [0.3, 0.3, 0.3, 0.3]], 17 | [4, [0.4, 0.4, 0.4, 0.4]], 18 | [5, [0.5, 0.5, 0.5, 0.5]] 19 | ]'); 20 | 21 | SELECT 22 | rowid, 23 | distance 24 | FROM vec_items 25 | WHERE embedding MATCH '[0.3, 0.3, 0.3, 0.3]' 26 | ORDER BY distance 27 | LIMIT 3; 28 | -------------------------------------------------------------------------------- /site/using/datasette.md: -------------------------------------------------------------------------------- 1 | # Using `sqlite-vec` in Datasette 2 | 3 | [![Datasette](https://img.shields.io/pypi/v/datasette-sqlite-vec.svg?color=B6B6D9&label=Datasette+plugin&logoColor=white&logo=python)](https://datasette.io/plugins/datasette-sqlite-vec) 4 | 5 | [Datasette](https://datasette.io/) users can install `sqlite-vec` into their Datasette instances with the `datasette-sqlite-vec` plugin: 6 | 7 | ```bash 8 | datasette install datasette-sqlite-vec 9 | ``` 10 | 11 | After installing, future Datasette instances will have `sqlite-vec` SQL functions loaded in. 12 | 13 | "Unsafe" functions like static blobs and NumPy file reading are not available with `datasette-sqlite-vec`. 14 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | - [ ] add `xyz_info` shadow table with version etc. 2 | 3 | - later 4 | - [ ] partition: UPDATE support 5 | - [ ] skip invalid validity entries in knn filter? 6 | - [ ] nulls in metadata 7 | - [ ] partition `x in (...)` handling 8 | - [ ] blobs/date/datetime 9 | - [ ] uuid/ulid perf 10 | - [ ] Aux columns: `NOT NULL` constraint 11 | - [ ] Metadata columns: `NOT NULL` constraint 12 | - [ ] Partiion key: `NOT NULL` constraint 13 | - [ ] dictionary encoding? 14 | - [ ] properly sqlite3_vtab_nochange / sqlite3_value_nochange handling 15 | - [ ] perf 16 | - [ ] aux: cache INSERT 17 | - [ ] aux: LEFT JOIN on `_rowids` queries to avoid N lookup queries 18 | -------------------------------------------------------------------------------- /examples/simple-go-cgo/go.sum: -------------------------------------------------------------------------------- 1 | github.com/asg017/sqlite-vec-go-bindings v0.0.1-alpha.36 h1:FMGkKAA7nZL8gr/dvIx1uc54J3v2gbLVa+mLqZDCvjk= 2 | github.com/asg017/sqlite-vec-go-bindings v0.0.1-alpha.36/go.mod h1:A8+cTt/nKFsYCQF6OgzSNpKZrzNo5gQsXBTfsXHXY0Q= 3 | github.com/asg017/sqlite-vec/bindings/go/cgo v0.0.0-20240511043328-3d763f499859 h1:6jeFy/tSnyNJUrTHoIaFTYkjrHtwVAojvCGkr9G8d4o= 4 | github.com/asg017/sqlite-vec/bindings/go/cgo v0.0.0-20240511043328-3d763f499859/go.mod h1:Go89G54PaautWRwxvAa1fmKeYoSuUyIvSYpvlfXQaNU= 5 | github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU= 6 | github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= 7 | -------------------------------------------------------------------------------- /bindings/rust/src/lib.rs: -------------------------------------------------------------------------------- 1 | #[link(name = "sqlite_vec0")] 2 | extern "C" { 3 | pub fn sqlite3_vec_init(); 4 | } 5 | 6 | #[cfg(test)] 7 | mod tests { 8 | use super::*; 9 | 10 | use rusqlite::{ffi::sqlite3_auto_extension, Connection}; 11 | 12 | #[test] 13 | fn test_rusqlite_auto_extension() { 14 | unsafe { 15 | sqlite3_auto_extension(Some(std::mem::transmute(sqlite3_vec_init as *const ()))); 16 | } 17 | 18 | let conn = Connection::open_in_memory().unwrap(); 19 | 20 | let result: String = conn 21 | .query_row("select vec_version()", [], |x| x.get(0)) 22 | .unwrap(); 23 | 24 | assert!(result.starts_with("v")); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /scripts/publish-release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail xtrace 4 | 5 | if [[ -n $(git status --porcelain | grep -v VERSION | grep -v sqlite-dist.toml) ]]; then 6 | echo "❌ There are other un-staged changes to the repository besides VERSION and sqlite-dist.toml" 7 | exit 1 8 | fi 9 | 10 | VERSION="$(cat VERSION)" 11 | 12 | echo "Publishing version v$VERSION..." 13 | 14 | make version 15 | git add --all 16 | git commit -m "v$VERSION" 17 | git tag v$VERSION 18 | git push origin main v$VERSION 19 | 20 | if grep -qE "alpha|beta" VERSION; then 21 | gh release create v$VERSION --title=v$VERSION --prerelease 22 | else 23 | gh release create v$VERSION --title=v$VERSION 24 | fi 25 | 26 | 27 | echo "✅ Published! version v$VERSION" 28 | -------------------------------------------------------------------------------- /tests/fuzz/exec.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include "sqlite-vec.h" 8 | #include "sqlite3.h" 9 | #include 10 | 11 | int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { 12 | int rc = SQLITE_OK; 13 | sqlite3 *db; 14 | sqlite3_stmt *stmt; 15 | if(size < 1) return 0; 16 | 17 | rc = sqlite3_open(":memory:", &db); 18 | assert(rc == SQLITE_OK); 19 | rc = sqlite3_vec_init(db, NULL, NULL); 20 | assert(rc == SQLITE_OK); 21 | 22 | const char * zSrc = sqlite3_mprintf("%.*s", size, data); 23 | assert(zSrc); 24 | 25 | sqlite3_exec(db, zSrc, NULL, NULL, NULL); 26 | sqlite3_free(zSrc); 27 | 28 | sqlite3_close(db); 29 | return 0; 30 | } 31 | -------------------------------------------------------------------------------- /benchmarks/self-params/test.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import time 3 | 4 | 5 | def connect(path): 6 | db = sqlite3.connect(path) 7 | db.enable_load_extension(True) 8 | db.load_extension("../dist/vec0") 9 | db.execute("select load_extension('../dist/vec0', 'sqlite3_vec_fs_read_init')") 10 | db.enable_load_extension(False) 11 | return db 12 | 13 | 14 | page_sizes = [4096, 8192, 16384, 32768] 15 | chunk_sizes = [256, 1024, 2048] 16 | 17 | for page_size in page_sizes: 18 | for chunk_size in chunk_sizes: 19 | print(f"page_size={page_size}, chunk_size={chunk_size}") 20 | 21 | t0 = time.time() 22 | db = connect(f"dbs/test.{page_size}.{chunk_size}.db") 23 | print(db.execute("pragma page_size").fetchone()[0]) 24 | print(db.execute("select count(*) from vec_items_rowids").fetchone()[0]) 25 | -------------------------------------------------------------------------------- /tests/minimum/demo.c: -------------------------------------------------------------------------------- 1 | #include "sqlite3.h" 2 | #include "sqlite-vec.h" 3 | #include 4 | #include 5 | #include 6 | 7 | int main(int argc, char *argv[]) { 8 | int rc = SQLITE_OK; 9 | sqlite3 *db; 10 | sqlite3_stmt *stmt; 11 | 12 | rc = sqlite3_open(":memory:", &db); 13 | assert(rc == SQLITE_OK); 14 | 15 | rc = sqlite3_vec_init(db, NULL, NULL); 16 | assert(rc == SQLITE_OK); 17 | 18 | 19 | rc = sqlite3_prepare_v2(db, "SELECT sqlite_version(), vec_version(), (select json_group_array(compile_options) from pragma_compile_options)", -1, &stmt, NULL); 20 | assert(rc == SQLITE_OK); 21 | 22 | rc = sqlite3_step(stmt); 23 | printf("sqlite_version=%s, vec_version=%s %s\n", sqlite3_column_text(stmt, 0), sqlite3_column_text(stmt, 1), sqlite3_column_text(stmt, 2)); 24 | sqlite3_finalize(stmt); 25 | sqlite3_close(db); 26 | return 0; 27 | } 28 | -------------------------------------------------------------------------------- /benchmarks/profiling/query-k.sql: -------------------------------------------------------------------------------- 1 | .timer on 2 | 3 | select rowid, distance 4 | from vec_items 5 | where embedding match (select embedding from vec_items where rowid = 100) 6 | and k = :k 7 | order by distance; 8 | 9 | select rowid, distance 10 | from vec_items 11 | where embedding match (select embedding from vec_items where rowid = 100) 12 | and k = :k 13 | order by distance; 14 | 15 | select rowid, distance 16 | from vec_items 17 | where embedding match (select embedding from vec_items where rowid = 100) 18 | and k = :k 19 | order by distance; 20 | 21 | select rowid, distance 22 | from vec_items 23 | where embedding match (select embedding from vec_items where rowid = 100) 24 | and k = :k 25 | order by distance; 26 | 27 | select rowid, distance 28 | from vec_items 29 | where embedding match (select embedding from vec_items where rowid = 100) 30 | and k = :k 31 | order by distance; 32 | -------------------------------------------------------------------------------- /site/.vitepress/theme/index.ts: -------------------------------------------------------------------------------- 1 | // https://vitepress.dev/guide/custom-theme 2 | import { h } from "vue"; 3 | import type { Theme } from "vitepress"; 4 | import DefaultTheme from "vitepress/theme"; 5 | import "./style.css"; 6 | import Sponsors from "./Sponsors.vue"; 7 | import HeroImg from "./HeroImg.vue"; 8 | 9 | export default { 10 | extends: DefaultTheme, 11 | Layout: () => { 12 | return h(DefaultTheme.Layout, null, { 13 | // https://vitepress.dev/guide/extending-default-theme#layout-slots 14 | "layout-top": () => 15 | h("marquee", { class: "banner", scrollamount: "10" }, [ 16 | "🚧🚧🚧 This documentation is a work-in-progress! 🚧🚧🚧", 17 | ]), 18 | //"home-hero-image": () => h(HeroImg), 19 | "aside-ads-before": () => h(Sponsors), 20 | }); 21 | }, 22 | enhanceApp({ app, router, siteData }) { 23 | // ... 24 | }, 25 | } satisfies Theme; 26 | -------------------------------------------------------------------------------- /tests/fuzz/json.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include "sqlite-vec.h" 8 | #include "sqlite3.h" 9 | #include 10 | 11 | int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { 12 | int rc = SQLITE_OK; 13 | sqlite3 *db; 14 | sqlite3_stmt *stmt; 15 | 16 | //rc = sqlite3_auto_extension((void (*)())sqlite3_vec_init); 17 | //assert(rc == SQLITE_OK); 18 | 19 | rc = sqlite3_open(":memory:", &db); 20 | assert(rc == SQLITE_OK); 21 | rc = sqlite3_vec_init(db, NULL, NULL); 22 | assert(rc == SQLITE_OK); 23 | 24 | rc = sqlite3_prepare_v2(db, "SELECT vec_f32(cast(? as text))", -1, &stmt, NULL); 25 | assert(rc == SQLITE_OK); 26 | 27 | sqlite3_bind_blob(stmt, 1, data, size, SQLITE_STATIC); 28 | sqlite3_step(stmt); 29 | 30 | sqlite3_finalize(stmt); 31 | sqlite3_close(db); 32 | return 0; 33 | 34 | } 35 | -------------------------------------------------------------------------------- /.github/workflows/site.yaml: -------------------------------------------------------------------------------- 1 | name: Deploy Site 2 | on: 3 | workflow_dispatch: {} 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - "site/**" 9 | - ".github/**" 10 | - "VERSION" 11 | - "reference.yaml" 12 | jobs: 13 | deploy: 14 | runs-on: ubuntu-latest 15 | permissions: 16 | pages: write 17 | id-token: write 18 | environment: 19 | name: github-pages 20 | url: ${{ steps.deployment.outputs.page_url }} 21 | steps: 22 | - uses: actions/checkout@v4 23 | - uses: actions/setup-node@v4 24 | with: 25 | cache: npm 26 | cache-dependency-path: site/package-lock.json 27 | - run: npm ci 28 | working-directory: site/ 29 | - run: make site-build 30 | - uses: actions/configure-pages@v2 31 | - uses: actions/upload-pages-artifact@v1 32 | with: 33 | path: site/.vitepress/dist 34 | - id: deployment 35 | uses: actions/deploy-pages@v1 36 | -------------------------------------------------------------------------------- /sqlite-vec.h.tmpl: -------------------------------------------------------------------------------- 1 | #ifndef SQLITE_VEC_H 2 | #define SQLITE_VEC_H 3 | 4 | #ifndef SQLITE_CORE 5 | #include "sqlite3ext.h" 6 | #else 7 | #include "sqlite3.h" 8 | #endif 9 | 10 | #ifdef SQLITE_VEC_STATIC 11 | #define SQLITE_VEC_API 12 | #else 13 | #ifdef _WIN32 14 | #define SQLITE_VEC_API __declspec(dllexport) 15 | #else 16 | #define SQLITE_VEC_API 17 | #endif 18 | #endif 19 | 20 | #define SQLITE_VEC_VERSION "v${VERSION}" 21 | // TODO rm 22 | #define SQLITE_VEC_DATE "${DATE}" 23 | #define SQLITE_VEC_SOURCE "${SOURCE}" 24 | 25 | 26 | #define SQLITE_VEC_VERSION_MAJOR ${VERSION_MAJOR} 27 | #define SQLITE_VEC_VERSION_MINOR ${VERSION_MINOR} 28 | #define SQLITE_VEC_VERSION_PATCH ${VERSION_PATCH} 29 | 30 | #ifdef __cplusplus 31 | extern "C" { 32 | #endif 33 | 34 | SQLITE_VEC_API int sqlite3_vec_init(sqlite3 *db, char **pzErrMsg, 35 | const sqlite3_api_routines *pApi); 36 | 37 | #ifdef __cplusplus 38 | } /* end of the 'extern "C"' block */ 39 | #endif 40 | 41 | #endif /* ifndef SQLITE_VEC_H */ 42 | -------------------------------------------------------------------------------- /site/using/android-ios.md: -------------------------------------------------------------------------------- 1 | # `sqlite-vec` on Android and iOS devices 2 | 3 | `sqlite-vec` can run on mobile devices like Android and iOS. As of `v0.1.2`, We publish pre-compiled loadable library for both platforms to [our Github Releases](https://github.com/asg017/sqlite-vec/releases). 4 | 5 | You can drop those files into your Android Studio or XCode projects as needed. We eventually will also include [`.aar` file support](https://github.com/asg017/sqlite-vec/issues/102) and [`.xvframework`](https://github.com/asg017/sqlite-vec/issues/103) support in future releases. 6 | 7 | If you have any feedback or ideas on how we can better support Android/iOS projects, please [file an issue](https://github.com/asg017/sqlite-vec/issues/new). 8 | 9 | Also consider [`op-sqlite`](https://github.com/OP-Engineering/op-sqlite) for React Native, which [has builtin support for `sqlite-vec`](https://ospfranco.notion.site/Installation-93044890aa3d4d14b6c525ba4ba8686f#:~:text=sqliteVec%20enables%20sqlite%2Dvec%2C%20an%20extension%20for%20RAG%20embeddings). 10 | -------------------------------------------------------------------------------- /tests/fuzz/vec0-create.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include "sqlite-vec.h" 8 | #include "sqlite3.h" 9 | #include 10 | 11 | int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { 12 | int rc = SQLITE_OK; 13 | sqlite3 *db; 14 | sqlite3_stmt *stmt; 15 | 16 | rc = sqlite3_open(":memory:", &db); 17 | assert(rc == SQLITE_OK); 18 | rc = sqlite3_vec_init(db, NULL, NULL); 19 | assert(rc == SQLITE_OK); 20 | 21 | sqlite3_str * s = sqlite3_str_new(NULL); 22 | assert(s); 23 | sqlite3_str_appendall(s, "CREATE VIRTUAL TABLE v USING vec0("); 24 | sqlite3_str_appendf(s, "%.*s", size, data); 25 | sqlite3_str_appendall(s, ")"); 26 | const char * zSql = sqlite3_str_finish(s); 27 | assert(zSql); 28 | 29 | rc = sqlite3_prepare_v2(db, zSql, -1, &stmt, NULL); 30 | sqlite3_free(zSql); 31 | if(rc == SQLITE_OK) { 32 | sqlite3_step(stmt); 33 | } 34 | sqlite3_finalize(stmt); 35 | sqlite3_close(db); 36 | return 0; 37 | } 38 | -------------------------------------------------------------------------------- /examples/simple-ruby/demo.rb: -------------------------------------------------------------------------------- 1 | require 'sqlite3' 2 | require 'sqlite_vec' 3 | 4 | 5 | db = SQLite3::Database.new(':memory:') 6 | db.enable_load_extension(true) 7 | SqliteVec.load(db) 8 | db.enable_load_extension(false) 9 | 10 | sqlite_version, vec_version = db.execute("select sqlite_version(), vec_version()").first 11 | puts "sqlite_version=#{sqlite_version}, vec_version=#{vec_version}" 12 | 13 | db.execute("CREATE VIRTUAL TABLE vec_items USING vec0(embedding float[4])") 14 | 15 | items = [ 16 | [1, [0.1, 0.1, 0.1, 0.1]], 17 | [2, [0.2, 0.2, 0.2, 0.2]], 18 | [3, [0.3, 0.3, 0.3, 0.3]], 19 | [4, [0.4, 0.4, 0.4, 0.4]], 20 | [5, [0.5, 0.5, 0.5, 0.5]], 21 | ] 22 | 23 | db.transaction do 24 | items.each do |item| 25 | db.execute("INSERT INTO vec_items(rowid, embedding) VALUES (?, ?)", [item[0], item[1].pack("f*")]) 26 | end 27 | end 28 | 29 | query = [0.3, 0.3, 0.3, 0.3] 30 | rows = db.execute(<<-SQL, [query.pack("f*")]) 31 | SELECT 32 | rowid, 33 | distance 34 | FROM vec_items 35 | WHERE embedding MATCH ? 36 | ORDER BY distance 37 | LIMIT 3 38 | SQL 39 | 40 | puts rows 41 | -------------------------------------------------------------------------------- /tests/leak-fixtures/knn.sql: -------------------------------------------------------------------------------- 1 | .load dist/vec0 2 | .mode box 3 | .header on 4 | .eqp on 5 | .echo on 6 | 7 | select sqlite_version(), vec_version(); 8 | 9 | create virtual table v using vec0(a float[1], chunk_size=8); 10 | 11 | insert into v 12 | select value, format('[%f]', value / 100.0) 13 | from generate_series(1, 100); 14 | 15 | select 16 | rowid, 17 | vec_to_json(a) 18 | from v 19 | where a match '[.3]' 20 | and k = 2; 21 | 22 | select 23 | rowid, 24 | vec_to_json(a) 25 | from v 26 | where a match '[.3]' 27 | and k = 0; 28 | 29 | 30 | select 31 | rowid, 32 | vec_to_json(a) 33 | from v 34 | where a match '[2.0]' 35 | and k = 2 36 | and rowid in (1,2,3,4,5); 37 | 38 | 39 | 40 | with queries as ( 41 | select 42 | rowid as query_id, 43 | json_array(value / 100.0) as value 44 | from generate_series(24, 39) 45 | ) 46 | select 47 | query_id, 48 | rowid, 49 | distance, 50 | vec_to_json(a) 51 | from queries, v 52 | where a match queries.value 53 | and k =5; 54 | 55 | 56 | select * 57 | from v 58 | where rowid in (1,2,3,4); 59 | 60 | drop table v; 61 | 62 | -------------------------------------------------------------------------------- /tests/fuzz/numpy.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include "sqlite-vec.h" 8 | #include "sqlite3.h" 9 | #include 10 | 11 | int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { 12 | int rc = SQLITE_OK; 13 | sqlite3 *db; 14 | sqlite3_stmt *stmt; 15 | 16 | rc = sqlite3_open(":memory:", &db); 17 | assert(rc == SQLITE_OK); 18 | rc = sqlite3_vec_init(db, NULL, NULL); 19 | assert(rc == SQLITE_OK); 20 | 21 | 22 | rc = sqlite3_prepare_v2(db, "select * from vec_npy_each(?)", -1, &stmt, NULL); 23 | assert(rc == SQLITE_OK); 24 | sqlite3_bind_blob(stmt, 1, data, size, SQLITE_STATIC); 25 | rc = sqlite3_step(stmt); 26 | if(rc != SQLITE_DONE || rc != SQLITE_ROW) { 27 | sqlite3_finalize(stmt); 28 | sqlite3_close(db); 29 | return -1; 30 | } 31 | 32 | while(1) { 33 | if(rc == SQLITE_DONE) break; 34 | if(rc == SQLITE_ROW) continue; 35 | sqlite3_finalize(stmt); 36 | sqlite3_close(db); 37 | return 1; 38 | } 39 | sqlite3_finalize(stmt); 40 | sqlite3_close(db); 41 | return 0; 42 | } 43 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Alex Garcia 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /scripts/progress.ts: -------------------------------------------------------------------------------- 1 | const src = Deno.readTextFileSync("sqlite-vec.c"); 2 | 3 | function numOccuranges(rg) { 4 | return [...src.matchAll(rg)].length; 5 | } 6 | const numAsserts = numOccuranges(/todo_assert/g); 7 | const numComments = numOccuranges(/TODO/g); 8 | const numHandles = numOccuranges(/todo\(/g); 9 | 10 | const realTodos = numOccuranges(/TODO\(/g); 11 | 12 | const numTotal = numAsserts + numComments + numHandles - realTodos; 13 | 14 | console.log("Number of todo_assert()'s: ", numAsserts); 15 | console.log('Number of "// TODO" comments: ', numComments); 16 | console.log("Number of todo panics: ", numHandles); 17 | console.log("Total TODOs: ", numTotal); 18 | 19 | console.log(); 20 | 21 | const TOTAL = 246; // as of e5b0f4c0c5 (2024-04-20) 22 | const progress = (TOTAL - numTotal) / TOTAL; 23 | const width = 60; 24 | 25 | console.log( 26 | "▓".repeat((progress < 0 ? 0 : progress) * width) + 27 | "░".repeat((1 - progress) * width) + 28 | ` (${TOTAL - numTotal}/${TOTAL})`, 29 | ); 30 | console.log(); 31 | console.log( 32 | `${(progress * 100.0).toPrecision(2)}% complete to sqlite-vec v0.1.0`, 33 | ); 34 | -------------------------------------------------------------------------------- /examples/simple-go-ncruces/go.sum: -------------------------------------------------------------------------------- 1 | github.com/asg017/sqlite-vec-go-bindings v0.0.1-alpha.37 h1:Gz6YkDCs60k5VwbBPKDfAPPeIBcuaN3qriAozAaIIZI= 2 | github.com/asg017/sqlite-vec-go-bindings v0.0.1-alpha.37/go.mod h1:A8+cTt/nKFsYCQF6OgzSNpKZrzNo5gQsXBTfsXHXY0Q= 3 | github.com/ncruces/go-sqlite3 v0.17.2-0.20240711235451-21de85e849b7 h1:ssM02uUFDfz0V2TMg2du2BjbW9cpOhFJK0kpDN+X768= 4 | github.com/ncruces/go-sqlite3 v0.17.2-0.20240711235451-21de85e849b7/go.mod h1:FnCyui8SlDoL0mQZ5dTouNo7s7jXS0kJv9lBt1GlM9w= 5 | github.com/ncruces/julianday v1.0.0 h1:fH0OKwa7NWvniGQtxdJRxAgkBMolni2BjDHaWTxqt7M= 6 | github.com/ncruces/julianday v1.0.0/go.mod h1:Dusn2KvZrrovOMJuOt0TNXL6tB7U2E8kvza5fFc9G7g= 7 | github.com/tetratelabs/wazero v1.7.3 h1:PBH5KVahrt3S2AHgEjKu4u+LlDbbk+nsGE3KLucy6Rw= 8 | github.com/tetratelabs/wazero v1.7.3/go.mod h1:ytl6Zuh20R/eROuyDaGPkp82O9C/DJfXAwJfQ3X6/7Y= 9 | golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= 10 | golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 11 | golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= 12 | golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= 13 | -------------------------------------------------------------------------------- /tests/minimum/Makefile: -------------------------------------------------------------------------------- 1 | dist/.stammp: 2 | mkdir -p dist 3 | touch $@ 4 | 5 | dist/sqlite-amalgamation-3310100/.stamp: dist/.stammp 6 | rm -rf dist/sqlite-amalgamation-3310100/ || true 7 | curl -q -o sqlite-amalgamation-3310100.zip https://www.sqlite.org/2020/sqlite-amalgamation-3310100.zip 8 | unzip -d dist/ sqlite-amalgamation-3310100.zip 9 | rm sqlite-amalgamation-3310100.zip 10 | touch $@ 11 | 12 | dist/t3310100: demo.c dist/sqlite-amalgamation-3310100/.stamp ../../sqlite-vec.c 13 | gcc \ 14 | -lm \ 15 | -DSQLITE_CORE -DSQLITE_ENABLE_JSON1 \ 16 | -I dist/sqlite-amalgamation-3310100 \ 17 | -I ../../ \ 18 | $< dist/sqlite-amalgamation-3310100/sqlite3.c \ 19 | ../../sqlite-vec.c \ 20 | -o $@ 21 | 22 | dist/t3310100-threadsafe: demo.c dist/sqlite-amalgamation-3310100/.stamp ../../sqlite-vec.c 23 | gcc \ 24 | -lm \ 25 | -DSQLITE_CORE -DSQLITE_ENABLE_JSON1 -DSQLITE_THREADSAFE=0 \ 26 | -I dist/sqlite-amalgamation-3310100 \ 27 | -I ../../ \ 28 | $< dist/sqlite-amalgamation-3310100/sqlite3.c \ 29 | ../../sqlite-vec.c \ 30 | -o $@ 31 | 32 | test: dist/t3310100 dist/t3310100-threadsafe 33 | ./dist/t3310100 34 | ./dist/t3310100-threadsafe 35 | 36 | clean: 37 | rm -rf dist/ 38 | -------------------------------------------------------------------------------- /bindings/go/ncruces/go-sqlite3.patch: -------------------------------------------------------------------------------- 1 | diff --git a/embed/build.sh b/embed/build.sh 2 | index ed2aaec..4cc0b0e 100755 3 | --- a/embed/build.sh 4 | +++ b/embed/build.sh 5 | @@ -23,6 +23,7 @@ trap 'rm -f sqlite3.tmp' EXIT 6 | -Wl,--initial-memory=327680 \ 7 | -D_HAVE_SQLITE_CONFIG_H \ 8 | -DSQLITE_CUSTOM_INCLUDE=sqlite_opt.h \ 9 | + -DSQLITE_VEC_OMIT_FS=1 \ 10 | $(awk '{print "-Wl,--export="$0}' exports.txt) 11 | 12 | "$BINARYEN/wasm-ctor-eval" -g -c _initialize sqlite3.wasm -o sqlite3.tmp 13 | diff --git a/sqlite3/main.c b/sqlite3/main.c 14 | index c732937..7c9002a 100644 15 | --- a/sqlite3/main.c 16 | +++ b/sqlite3/main.c 17 | @@ -19,6 +19,7 @@ 18 | #include "time.c" 19 | #include "vfs.c" 20 | #include "vtab.c" 21 | +#include "../../sqlite-vec.c" 22 | 23 | __attribute__((constructor)) void init() { 24 | sqlite3_initialize(); 25 | @@ -30,4 +31,5 @@ __attribute__((constructor)) void init() { 26 | sqlite3_auto_extension((void (*)(void))sqlite3_spellfix_init); 27 | sqlite3_auto_extension((void (*)(void))sqlite3_uint_init); 28 | sqlite3_auto_extension((void (*)(void))sqlite3_time_init); 29 | + sqlite3_auto_extension((void (*)(void))sqlite3_vec_init); 30 | } 31 | \ No newline at end of file 32 | -------------------------------------------------------------------------------- /tests/fuzz.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | EXT_PATH = "dist/vec0" 3 | db = sqlite3.connect(":memory:") 4 | 5 | db.execute( 6 | "create temp table base_functions as select name from pragma_function_list" 7 | ) 8 | db.execute("create temp table base_modules as select name from pragma_module_list") 9 | 10 | db.enable_load_extension(True) 11 | db.load_extension(EXT_PATH) 12 | db.row_factory = sqlite3.Row 13 | loaded_functions = db.execute( 14 | "select name, narg from pragma_function_list where name not in (select name from base_functions) order by name" 15 | ).fetchall() 16 | 17 | db.execute( 18 | "create temp table loaded_modules as select name from pragma_module_list where name not in (select name from base_modules) order by name" 19 | ) 20 | 21 | db.row_factory = sqlite3.Row 22 | 23 | def trace(sql): 24 | print(sql) 25 | db.set_trace_callback(trace) 26 | 27 | def spread_args(n): 28 | return ",".join(["?"] * n) 29 | 30 | for f in loaded_functions: 31 | v = [None, 1, 1.2, b"", '', "asdf", b"\xff", b"\x00", "\0\0\0\0"] 32 | for x in v: 33 | try: 34 | db.execute("select {}({}); ".format(f['name'],spread_args(f['narg'])), [x] * f['narg']) 35 | except sqlite3.OperationalError: 36 | pass 37 | -------------------------------------------------------------------------------- /site/guides/scalar-quant.md: -------------------------------------------------------------------------------- 1 | # Scalar Quantization (SQ) 2 | 3 | "Quantization" refers to a variety of methods and techniques for reducing the 4 | size of vectors in a vector index. **Scalar quantization** (SQ) refers to a 5 | specific technique where each individual floating point element in a vector is 6 | scaled to a small element type, like `float16`, `int8`. 7 | 8 | Most embedding models generate `float32` vectors. Each `float32` takes up 4 9 | bytes of space. This can add up, especially when working with a large amount of 10 | vectors or vectors with many dimensions. However, if you scale them to `float16` 11 | or `int8` vectors, they only take up 2 bytes of space and 1 bytes of space 12 | respectively, saving you precious space at the expense of some quality. 13 | 14 | ```sql 15 | select vec_quantize_float16(vec_f32('[]'), 'unit'); 16 | select vec_quantize_int8(vec_f32('[]'), 'unit'); 17 | 18 | select vec_quantize('float16', vec_f32('...')); 19 | select vec_quantize('int8', vec_f32('...')); 20 | select vec_quantize('bit', vec_f32('...')); 21 | 22 | select vec_quantize('sqf16', vec_f32('...')); 23 | select vec_quantize('sqi8', vec_f32('...')); 24 | select vec_quantize('bq2', vec_f32('...')); 25 | ``` 26 | 27 | ## Benchmarks 28 | -------------------------------------------------------------------------------- /site/versioning.md: -------------------------------------------------------------------------------- 1 | # Semantic Versioning for `sqlite-vec` 2 | 3 | `sqlite-vec` is pre-v1, so according to the rules of 4 | [Semantic Versioning](https://semver.org/), so "minor" release like "0.2.0" or 5 | "0.3.0" may contain breaking changes. 6 | 7 | Only SQL functions, table functions, and virtual tables that are defined in the default `sqlite3_vec_init` entrypoint are considered as the `sqlite-vec` API for semantic versioning. This means that other entrypoints and other SQL functions should be considered unstable, untested, and possibly dangerous. 8 | 9 | For the SQL API, a "breaking change" would include: 10 | 11 | - Removing a function or module 12 | - Changing the number or types of arguments for an SQL function 13 | - Changing the require arguments of position of a table functions 14 | - Changing the `CREATE VIRTUAL TABLE` constructor of a virtual table in a backwards-incompatible way 15 | - Removing columns from a virtual table or table function 16 | 17 | 18 | The official "bindings" to `sqlite-vec`, including the Python/Node.js/Ruby/Go/Rust are subject to change and are not covered by semantic versioning. 19 | Though I have no plans to change or break them, and would include notes in changelogs if that ever needs to happen. 20 | -------------------------------------------------------------------------------- /site/using/ruby.md: -------------------------------------------------------------------------------- 1 | # Using `sqlite-vec` in Ruby 2 | 3 | ![Gem](https://img.shields.io/gem/v/sqlite-vec?color=red&logo=rubygems&logoColor=white) 4 | 5 | Ruby developers can use `sqlite-vec` with the [`sqlite-vec` Gem](https://rubygems.org/gems/sqlite-vec). 6 | 7 | 8 | ```bash 9 | gem install sqlite-vec 10 | ``` 11 | 12 | You can then use `SqliteVec.load()` to load `sqlite-vec` SQL functions in a given SQLite connection. 13 | 14 | ```ruby 15 | require 'sqlite3' 16 | require 'sqlite_vec' 17 | 18 | db = SQLite3::Database.new(':memory:') 19 | db.enable_load_extension(true) 20 | SqliteVec.load(db) 21 | db.enable_load_extension(false) 22 | 23 | result = db.execute('SELECT vec_version()') 24 | puts result.first.first 25 | 26 | ``` 27 | 28 | See 29 | [`simple-ruby/demo.rb`](https://github.com/asg017/sqlite-vec/blob/main/examples/simple-ruby/demo.rb) 30 | for a more complete Ruby demo. 31 | 32 | ## Working with vectors in Ruby 33 | 34 | If your embeddings are provided as a list of numbers, use `.pack("f*")` to convert them into the compact BLOB format that `sqlite-vec` uses. 35 | 36 | ```ruby 37 | embedding = [0.1, 0.2, 0.3, 0.4] 38 | result = db.execute("SELECT vec_length(?)", [query.pack("f*")]]) 39 | puts result.first.first # 4 40 | ``` 41 | -------------------------------------------------------------------------------- /examples/simple-node/demo.mjs: -------------------------------------------------------------------------------- 1 | import * as sqliteVec from "sqlite-vec"; 2 | import Database from "better-sqlite3"; 3 | 4 | const db = new Database(":memory:"); 5 | sqliteVec.load(db); 6 | 7 | const { sqlite_version, vec_version } = db 8 | .prepare( 9 | "select sqlite_version() as sqlite_version, vec_version() as vec_version;", 10 | ) 11 | .get(); 12 | 13 | console.log(`sqlite_version=${sqlite_version}, vec_version=${vec_version}`); 14 | 15 | const items = [ 16 | [1, [0.1, 0.1, 0.1, 0.1]], 17 | [2, [0.2, 0.2, 0.2, 0.2]], 18 | [3, [0.3, 0.3, 0.3, 0.3]], 19 | [4, [0.4, 0.4, 0.4, 0.4]], 20 | [5, [0.5, 0.5, 0.5, 0.5]], 21 | ]; 22 | const query = [0.3, 0.3, 0.3, 0.3]; 23 | 24 | db.exec("CREATE VIRTUAL TABLE vec_items USING vec0(embedding float[4])"); 25 | 26 | const insertStmt = db.prepare( 27 | "INSERT INTO vec_items(rowid, embedding) VALUES (?, ?)", 28 | ); 29 | 30 | const insertVectors = db.transaction((items) => { 31 | for (const [id, vector] of items) { 32 | insertStmt.run(BigInt(id), new Float32Array(vector)); 33 | } 34 | }); 35 | 36 | insertVectors(items); 37 | 38 | const rows = db 39 | .prepare( 40 | ` 41 | SELECT 42 | rowid, 43 | distance 44 | FROM vec_items 45 | WHERE embedding MATCH ? 46 | ORDER BY distance 47 | LIMIT 3 48 | `, 49 | ) 50 | .all(new Float32Array(query)); 51 | 52 | console.log(rows); 53 | -------------------------------------------------------------------------------- /site/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | # https://vitepress.dev/reference/default-theme-home-page 3 | layout: home 4 | 5 | hero: 6 | name: "sqlite-vec" 7 | text: "" 8 | tagline: A vector search SQLite extension that runs anywhere! 9 | actions: 10 | - theme: brand 11 | text: Getting Started 12 | link: /introduction 13 | - theme: alt 14 | text: API Reference 15 | link: /api-reference 16 | 17 | features: 18 | - title: Runs everywhere 19 | details: On laptops, servers, mobile devices, browsers with WASM, Raspberry Pis, and more! 20 | - title: Bindings for many languages 21 | details: Python, Ruby, Node.js/Deno/Bun, Go, Rust, and more! 22 | - title: Pure SQL 23 | details: No extra configuration or server required — only CREATE, INSERT, and SELECT statements 24 | --- 25 | 26 | ```sqlite 27 | -- store 768-dimensional vectors in a vec0 virtual table 28 | create virtual table vec_movies using vec0( 29 | synopsis_embedding float[768] 30 | ); 31 | 32 | -- insert vectors into the table, as JSON or compact BLOBs 33 | insert into vec_movies(rowid, synopsis_embedding) 34 | select 35 | rowid, 36 | embed(synopsis) as synopsis_embedding 37 | from movies; 38 | 39 | -- KNN search! 40 | select 41 | rowid, 42 | distance 43 | from vec_movies 44 | where synopsis_embedding match embed('scary futuristic movies') 45 | order by distance 46 | limit 20; 47 | ``` 48 | -------------------------------------------------------------------------------- /tests/skip.test-correctness.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import json 3 | 4 | db = sqlite3.connect("test2.db") 5 | db.enable_load_extension(True) 6 | db.load_extension("dist/vec0") 7 | db.enable_load_extension(False) 8 | db.row_factory = sqlite3.Row 9 | db.execute('attach database "sift1m-base.db" as sift1m') 10 | 11 | 12 | #def test_sift1m(): 13 | rows = db.execute( 14 | ''' 15 | with q as ( 16 | select rowid, vector, k100 from sift1m.sift1m_query limit 10 17 | ), 18 | results as ( 19 | select 20 | q.rowid as query_rowid, 21 | vec_sift1m.rowid as vec_rowid, 22 | distance, 23 | k100 as k100_groundtruth 24 | from q 25 | join vec_sift1m 26 | where 27 | vec_sift1m.vector match q.vector 28 | and k = 100 29 | order by distance 30 | ) 31 | select 32 | query_rowid, 33 | json_group_array(vec_rowid order by distance) as topk, 34 | k100_groundtruth, 35 | json_group_array(vec_rowid order by distance) == k100_groundtruth 36 | from results 37 | group by 1; 38 | ''').fetchall() 39 | 40 | results = [] 41 | for row in rows: 42 | actual = json.loads(row["topk"]) 43 | expected = json.loads(row["k100_groundtruth"]) 44 | 45 | ncorrect = sum([x in expected for x in actual]) 46 | results.append(ncorrect / 100.0) 47 | 48 | from statistics import mean 49 | print(mean(results)) 50 | -------------------------------------------------------------------------------- /tests/fuzz/Makefile: -------------------------------------------------------------------------------- 1 | 2 | TARGET_DIR=./targets 3 | 4 | $(TARGET_DIR): 5 | mkdir -p $@ 6 | 7 | # ASAN_OPTIONS=detect_leaks=1 ./fuzz_json -detect_leaks=1 '-trace_malloc=[12]' tmp 8 | $(TARGET_DIR)/json: json.c $(TARGET_DIR) 9 | /opt/homebrew/opt/llvm/bin/clang \ 10 | -fsanitize=address,fuzzer \ 11 | -I ../../ -I ../../vendor -DSQLITE_CORE -g \ 12 | ../../vendor/sqlite3.c \ 13 | ../../sqlite-vec.c \ 14 | $< \ 15 | -o $@ 16 | 17 | 18 | $(TARGET_DIR)/vec0_create: vec0-create.c ../../sqlite-vec.c $(TARGET_DIR) 19 | /opt/homebrew/opt/llvm/bin/clang \ 20 | -fsanitize=address,fuzzer \ 21 | -I ../../ -I ../../vendor -DSQLITE_CORE -g \ 22 | ../../vendor/sqlite3.c \ 23 | ../../sqlite-vec.c \ 24 | $< \ 25 | -o $@ 26 | 27 | $(TARGET_DIR)/numpy: numpy.c ../../sqlite-vec.c $(TARGET_DIR) 28 | /opt/homebrew/opt/llvm/bin/clang \ 29 | -fsanitize=address,fuzzer \ 30 | -I ../../ -I ../../vendor -DSQLITE_CORE -g \ 31 | ../../vendor/sqlite3.c \ 32 | ../../sqlite-vec.c \ 33 | $< \ 34 | -o $@ 35 | 36 | $(TARGET_DIR)/exec: exec.c ../../sqlite-vec.c $(TARGET_DIR) 37 | /opt/homebrew/opt/llvm/bin/clang \ 38 | -fsanitize=address,fuzzer \ 39 | -I ../../ -I ../../vendor -DSQLITE_CORE -g \ 40 | ../../vendor/sqlite3.c \ 41 | ../../sqlite-vec.c \ 42 | $< \ 43 | -o $@ 44 | 45 | all: $(TARGET_DIR)/json $(TARGET_DIR)/numpy $(TARGET_DIR)/json $(TARGET_DIR)/exec 46 | 47 | clean: 48 | rm -rf $(TARGET_DIR)/* 49 | -------------------------------------------------------------------------------- /examples/simple-bun/demo.ts: -------------------------------------------------------------------------------- 1 | import { Database } from "bun:sqlite"; 2 | Database.setCustomSQLite("/usr/local/opt/sqlite3/lib/libsqlite3.dylib"); 3 | 4 | const db = new Database(":memory:"); 5 | //sqliteVec.load(db); 6 | db.loadExtension("../../dist/vec0"); 7 | 8 | const { sqlite_version, vec_version } = db 9 | .prepare( 10 | "select sqlite_version() as sqlite_version, vec_version() as vec_version;", 11 | ) 12 | .get(); 13 | 14 | console.log(`sqlite_version=${sqlite_version}, vec_version=${vec_version}`); 15 | 16 | const items = [ 17 | [1, [0.1, 0.1, 0.1, 0.1]], 18 | [2, [0.2, 0.2, 0.2, 0.2]], 19 | [3, [0.3, 0.3, 0.3, 0.3]], 20 | [4, [0.4, 0.4, 0.4, 0.4]], 21 | [5, [0.5, 0.5, 0.5, 0.5]], 22 | ]; 23 | const query = [0.3, 0.3, 0.3, 0.3]; 24 | 25 | db.exec("CREATE VIRTUAL TABLE vec_items USING vec0(embedding float[4])"); 26 | 27 | const insertStmt = db.prepare( 28 | "INSERT INTO vec_items(rowid, embedding) VALUES (?, vec_f32(?))", 29 | ); 30 | 31 | const insertVectors = db.transaction((items) => { 32 | for (const [id, vector] of items) { 33 | insertStmt.run(BigInt(id), new Float32Array(vector)); 34 | } 35 | }); 36 | 37 | insertVectors(items); 38 | 39 | const rows = db 40 | .prepare( 41 | ` 42 | SELECT 43 | rowid, 44 | distance 45 | FROM vec_items 46 | WHERE embedding MATCH ? 47 | ORDER BY distance 48 | LIMIT 3 49 | `, 50 | ) 51 | .all(new Float32Array(query)); 52 | 53 | console.log(rows); 54 | -------------------------------------------------------------------------------- /site/public/logo.dark.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /site/public/logo.light.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /examples/simple-python/demo.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import sqlite_vec 3 | 4 | from typing import List 5 | import struct 6 | 7 | 8 | def serialize_f32(vector: List[float]) -> bytes: 9 | """serializes a list of floats into a compact "raw bytes" format""" 10 | return struct.pack("%sf" % len(vector), *vector) 11 | 12 | 13 | db = sqlite3.connect(":memory:") 14 | db.enable_load_extension(True) 15 | sqlite_vec.load(db) 16 | db.enable_load_extension(False) 17 | 18 | sqlite_version, vec_version = db.execute( 19 | "select sqlite_version(), vec_version()" 20 | ).fetchone() 21 | print(f"sqlite_version={sqlite_version}, vec_version={vec_version}") 22 | 23 | items = [ 24 | (1, [0.1, 0.1, 0.1, 0.1]), 25 | (2, [0.2, 0.2, 0.2, 0.2]), 26 | (3, [0.3, 0.3, 0.3, 0.3]), 27 | (4, [0.4, 0.4, 0.4, 0.4]), 28 | (5, [0.5, 0.5, 0.5, 0.5]), 29 | ] 30 | query = [0.3, 0.3, 0.3, 0.3] 31 | 32 | db.execute("CREATE VIRTUAL TABLE vec_items USING vec0(embedding float[4])") 33 | 34 | with db: 35 | for item in items: 36 | db.execute( 37 | "INSERT INTO vec_items(rowid, embedding) VALUES (?, ?)", 38 | [item[0], serialize_f32(item[1])], 39 | ) 40 | 41 | rows = db.execute( 42 | """ 43 | SELECT 44 | rowid, 45 | distance 46 | FROM vec_items 47 | WHERE embedding MATCH ? 48 | ORDER BY distance 49 | LIMIT 3 50 | """, 51 | [serialize_f32(query)], 52 | ).fetchall() 53 | 54 | print(rows) 55 | -------------------------------------------------------------------------------- /examples/simple-deno/demo.ts: -------------------------------------------------------------------------------- 1 | import { Database } from "jsr:@db/sqlite@0.11"; 2 | import * as sqliteVec from "npm:sqlite-vec@0.0.1-alpha.9"; 3 | 4 | const db = new Database(":memory:"); 5 | db.enableLoadExtension = true; 6 | sqliteVec.load(db); 7 | db.enableLoadExtension = false; 8 | 9 | const [sqlite_version, vec_version] = db 10 | .prepare("select sqlite_version(), vec_version()") 11 | .value<[string, string]>()!; 12 | console.log(`sqlite_version=${sqlite_version}, vec_version=${vec_version}`); 13 | 14 | const items = [ 15 | [1, [0.1, 0.1, 0.1, 0.1]], 16 | [2, [0.2, 0.2, 0.2, 0.2]], 17 | [3, [0.3, 0.3, 0.3, 0.3]], 18 | [4, [0.4, 0.4, 0.4, 0.4]], 19 | [5, [0.5, 0.5, 0.5, 0.5]], 20 | ]; 21 | const query = [0.3, 0.3, 0.3, 0.3]; 22 | 23 | db.exec("CREATE VIRTUAL TABLE vec_items USING vec0(embedding float[4])"); 24 | 25 | const insertStmt = db.prepare( 26 | "INSERT INTO vec_items(rowid, embedding) VALUES (?, ?)" 27 | ); 28 | 29 | const insertVectors = db.transaction((items) => { 30 | for (const [id, vector] of items) { 31 | insertStmt.run(BigInt(id), new Uint8Array(new Float32Array(vector).buffer)); 32 | } 33 | }); 34 | 35 | insertVectors(items); 36 | 37 | const rows = db 38 | .prepare( 39 | ` 40 | SELECT 41 | rowid, 42 | distance 43 | FROM vec_items 44 | WHERE embedding MATCH ? 45 | ORDER BY distance 46 | LIMIT 5 47 | ` 48 | ) 49 | .all([new Uint8Array(new Float32Array(query).buffer)]); 50 | 51 | console.log(rows); 52 | 53 | db.close(); 54 | -------------------------------------------------------------------------------- /site/using/wasm.md: -------------------------------------------------------------------------------- 1 | # `sqlite-vec` in the Browser with WebAssembly 2 | 3 | `sqlite-vec` can be statically compiled into [official SQLite WASM](https://sqlite.org/wasm/doc/trunk/index.md) builds. The process is a bit complicated, but the result is a vector search in the browser, which is pretty cool! 4 | 5 | ```html 6 | 7 | 8 | 17 | 18 | 19 | ``` 20 | [*Open in CodePen*](https://codepen.io/asg017_ucsd/pen/MWMpJNY) 21 | 22 | 23 | It's not possibly to dynamically load a SQLite extension into a WASM build of SQLite. So `sqlite-vec` must be statically compiled into custom WASM builds. 24 | 25 | ## The `sqlite-vec-wasm-demo` NPM package 26 | 27 | A **demonstration** of `sqlite-vec` in WASM is provided with the `sqlite-vec-wasm-demo` NPM package. This package is a demonstration and may change at any time. It doesn't follow the [Semantic version of `sqlite-vec`](./versioning.md). 28 | 29 | 30 | See 31 | [`simple-wasm/index.html`](https://github.com/asg017/sqlite-vec/blob/main/examples/simple-wasm/index.html) 32 | for a more complete WASM demo using this package. 33 | -------------------------------------------------------------------------------- /bindings/python/extra_init.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from struct import pack 3 | from sqlite3 import Connection 4 | 5 | 6 | def serialize_float32(vector: List[float]) -> bytes: 7 | """Serializes a list of floats into the "raw bytes" format sqlite-vec expects""" 8 | return pack("%sf" % len(vector), *vector) 9 | 10 | 11 | def serialize_int8(vector: List[int]) -> bytes: 12 | """Serializes a list of integers into the "raw bytes" format sqlite-vec expects""" 13 | return pack("%sb" % len(vector), *vector) 14 | 15 | 16 | try: 17 | import numpy.typing as npt 18 | 19 | def register_numpy(db: Connection, name: str, array: npt.NDArray): 20 | """ayoo""" 21 | 22 | ptr = array.__array_interface__["data"][0] 23 | nvectors, dimensions = array.__array_interface__["shape"] 24 | element_type = array.__array_interface__["typestr"] 25 | 26 | assert element_type == " 3 | #include 4 | #include 5 | 6 | #define countof(x) (sizeof(x) / sizeof((x)[0])) 7 | 8 | void test_vec0_parse_partition_key_definition() { 9 | printf("Starting %s...\n", __func__); 10 | typedef struct { 11 | char * test; 12 | int expected_rc; 13 | const char *expected_column_name; 14 | int expected_column_type; 15 | } TestCase; 16 | 17 | TestCase suite[] = { 18 | {"user_id integer partition key", SQLITE_OK, "user_id", SQLITE_INTEGER}, 19 | {"USER_id int partition key", SQLITE_OK, "USER_id", SQLITE_INTEGER}, 20 | {"category text partition key", SQLITE_OK, "category", SQLITE_TEXT}, 21 | 22 | {"", SQLITE_EMPTY, "", 0}, 23 | {"document_id text primary key", SQLITE_EMPTY, "", 0}, 24 | {"document_id text partition keyy", SQLITE_EMPTY, "", 0}, 25 | }; 26 | for(int i = 0; i < countof(suite); i++) { 27 | char * out_column_name; 28 | int out_column_name_length; 29 | int out_column_type; 30 | int rc; 31 | rc = vec0_parse_partition_key_definition( 32 | suite[i].test, 33 | strlen(suite[i].test), 34 | &out_column_name, 35 | &out_column_name_length, 36 | &out_column_type 37 | ); 38 | printf("2\n"); 39 | assert(rc == suite[i].expected_rc); 40 | 41 | if(rc == SQLITE_OK) { 42 | assert(out_column_name_length == strlen(suite[i].expected_column_name)); 43 | assert(strncmp(out_column_name, suite[i].expected_column_name, out_column_name_length) == 0); 44 | assert(out_column_type == suite[i].expected_column_type); 45 | } 46 | 47 | printf("✅ %s\n", suite[i].test); 48 | } 49 | } 50 | 51 | int main() { 52 | printf("Starting unit tests...\n"); 53 | test_vec0_parse_partition_key_definition(); 54 | } 55 | -------------------------------------------------------------------------------- /.github/logos/sqlitecloud.small.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /site/using/rqlite.md: -------------------------------------------------------------------------------- 1 | # Using `sqlite-vec` in rqlite 2 | 3 | [rqlite](https://rqlite.io/) users can use `sqlite-vec` with rqlite by loading the extension when they launch their rqlite node: 4 | 5 | ```bash 6 | # Download a sqlite-vec release. 7 | curl -L https://github.com/asg017/sqlite-vec/releases/download/v0.1.1/sqlite-vec-0.1.1-loadable-linux-x86_64.tar.gz -o sqlite-vec.tar.gz 8 | 9 | # Tell rqlite to load sqlite-vec at launch time. 10 | rqlited -extensions-path=sqlite-vec.tar.gz data 11 | ``` 12 | 13 | Once loaded you can use `sqlite-vec` functionality within rqlite. For example, you can perform searches via the [rqlite shell](https://rqlite.io/docs/cli/): 14 | 15 | ``` 16 | $ rqlite 17 | Welcome to the rqlite CLI. 18 | Enter ".help" for usage hints. 19 | Connected to http://127.0.0.1:4001 running version 8 20 | 127.0.0.1:4001> create virtual table vec_examples using vec0(sample_embedding float[8]); 21 | 1 row affected 22 | 127.0.0.1:4001> insert into vec_examples(rowid, sample_embedding) values (1, '[-0.200, 0.250, 0.341, -0.211, 0.645, 0.935, -0.316, -0.924]'), (2, '[0.443, -0.501, 0.355, -0.771, 0.707, -0.708, -0.185, 0.362]'), (3, '[0.716, -0.927, 0.134, 0.052, -0.669, 0.793, -0.634, -0.162]'), (4, '[-0.710, 0.330, 0.656, 0.041, -0.990, 0.726, 0.385, -0.958]') 23 | 4 rows affected 24 | 127.0.0.1:4001> select rowid, distance from vec_examples where sample_embedding match '[0.890, 0.544, 0.825, 0.961, 0.358, 0.0196, 0.521, 0.175]' order by distance limit 2 25 | +-------+-------------------+ 26 | | rowid | distance | 27 | +-------+-------------------+ 28 | | 2 | 2.386873722076416 | 29 | +-------+-------------------+ 30 | | 1 | 2.389785051345825 | 31 | +-------+-------------------+ 32 | ``` 33 | 34 | You can learn more from the [rqlite website](https://rqlite.io/docs/guides/extensions/). 35 | 36 | -------------------------------------------------------------------------------- /examples/simple-wasm/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |

sqlite-vec demo/simple-wasm

4 | 5 |
6 | 54 | 55 | 56 | ` 57 | -------------------------------------------------------------------------------- /tmp-static.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import numpy as np 3 | 4 | db = sqlite3.connect(":memory:") 5 | 6 | db.enable_load_extension(True) 7 | db.load_extension("./dist/vec0") 8 | db.execute("select load_extension('./dist/vec0', 'sqlite3_vec_raw_init')") 9 | db.enable_load_extension(False) 10 | 11 | x = np.array([[0.1, 0.2, 0.3, 0.4], [0.9, 0.8, 0.7, 0.6]], dtype=np.float32) 12 | y = np.array([[0.2, 0.3], [0.9, 0.8], [0.6, 0.5]], dtype=np.float32) 13 | z = np.array( 14 | [ 15 | [0.1, 0.1, 0.1, 0.1], 16 | [0.2, 0.2, 0.2, 0.2], 17 | [0.3, 0.3, 0.3, 0.3], 18 | [0.4, 0.4, 0.4, 0.4], 19 | [0.5, 0.5, 0.5, 0.5], 20 | ], 21 | dtype=np.float32, 22 | ) 23 | 24 | 25 | def register_np(array, name): 26 | ptr = array.__array_interface__["data"][0] 27 | nvectors, dimensions = array.__array_interface__["shape"] 28 | element_type = array.__array_interface__["typestr"] 29 | 30 | assert element_type == " Result<()> { 6 | unsafe { 7 | sqlite3_auto_extension(Some(std::mem::transmute(sqlite3_vec_init as *const ()))); 8 | } 9 | 10 | let db = Connection::open_in_memory()?; 11 | let v: Vec = vec![0.1, 0.2, 0.3]; 12 | 13 | let (sqlite_version, vec_version, x): (String, String, String) = db.query_row( 14 | "select sqlite_version(), vec_version(), vec_to_json(?)", 15 | &[v.as_bytes()], 16 | |x| Ok((x.get(0)?, x.get(1)?, x.get(2)?)), 17 | )?; 18 | 19 | println!("sqlite_version={sqlite_version}, vec_version={vec_version}"); 20 | 21 | let items: Vec<(usize, Vec)> = vec![ 22 | (1, vec![0.1, 0.1, 0.1, 0.1]), 23 | (2, vec![0.2, 0.2, 0.2, 0.2]), 24 | (3, vec![0.3, 0.3, 0.3, 0.3]), 25 | (4, vec![0.4, 0.4, 0.4, 0.4]), 26 | (5, vec![0.5, 0.5, 0.5, 0.5]), 27 | ]; 28 | println!("{x}"); 29 | 30 | db.execute( 31 | "CREATE VIRTUAL TABLE vec_items USING vec0(embedding float[4])", 32 | [], 33 | )?; 34 | let mut stmt = db.prepare("INSERT INTO vec_items(rowid, embedding) VALUES (?, ?)")?; 35 | for item in items { 36 | stmt.execute(rusqlite::params![item.0, item.1.as_bytes()])?; 37 | } 38 | 39 | let query: Vec = vec![0.3, 0.3, 0.3, 0.3]; 40 | let result: Vec<(i64, f64)> = db 41 | .prepare( 42 | r" 43 | SELECT 44 | rowid, 45 | distance 46 | FROM vec_items 47 | WHERE embedding MATCH ?1 48 | ORDER BY distance 49 | LIMIT 3 50 | ", 51 | )? 52 | .query_map([query.as_bytes()], |r| Ok((r.get(0)?, r.get(1)?)))? 53 | .collect::, _>>()?; 54 | println!("{:?}", result); 55 | Ok(()) 56 | } 57 | -------------------------------------------------------------------------------- /.github/logos/mozilla.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /tests/test-general.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | from collections import OrderedDict 3 | import pytest 4 | 5 | 6 | @pytest.mark.skipif( 7 | sqlite3.sqlite_version_info[1] < 37, 8 | reason="pragma_table_list was added in SQLite 3.37", 9 | ) 10 | def test_shadow(db, snapshot): 11 | db.execute( 12 | "create virtual table v using vec0(a float[1], partition text partition key, metadata text, +name text, chunk_size=8)" 13 | ) 14 | assert exec(db, "select * from sqlite_master order by name") == snapshot() 15 | assert ( 16 | exec(db, "select * from pragma_table_list where type = 'shadow'") == snapshot() 17 | ) 18 | 19 | db.execute("drop table v;") 20 | assert ( 21 | exec(db, "select * from pragma_table_list where type = 'shadow'") == snapshot() 22 | ) 23 | 24 | 25 | def test_info(db, snapshot): 26 | db.execute("create virtual table v using vec0(a float[1])") 27 | assert exec(db, "select key, typeof(value) from v_info order by 1") == snapshot() 28 | 29 | 30 | def exec(db, sql, parameters=[]): 31 | try: 32 | rows = db.execute(sql, parameters).fetchall() 33 | except (sqlite3.OperationalError, sqlite3.DatabaseError) as e: 34 | return { 35 | "error": e.__class__.__name__, 36 | "message": str(e), 37 | } 38 | a = [] 39 | for row in rows: 40 | o = OrderedDict() 41 | for k in row.keys(): 42 | o[k] = row[k] 43 | a.append(o) 44 | result = OrderedDict() 45 | result["sql"] = sql 46 | result["rows"] = a 47 | return result 48 | 49 | 50 | def vec0_shadow_table_contents(db, v): 51 | shadow_tables = [ 52 | row[0] 53 | for row in db.execute( 54 | "select name from sqlite_master where name like ? order by 1", [f"{v}_%"] 55 | ).fetchall() 56 | ] 57 | o = {} 58 | for shadow_table in shadow_tables: 59 | o[shadow_table] = exec(db, f"select * from {shadow_table}") 60 | return o 61 | -------------------------------------------------------------------------------- /.github/logos/mozilla.dark.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /examples/simple-go-cgo/demo.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "database/sql" 5 | "fmt" 6 | "log" 7 | 8 | sqlite_vec "github.com/asg017/sqlite-vec-go-bindings/cgo" 9 | _ "github.com/mattn/go-sqlite3" 10 | ) 11 | 12 | func main() { 13 | sqlite_vec.Auto() 14 | db, err := sql.Open("sqlite3", ":memory:") 15 | if err != nil { 16 | log.Fatal(err) 17 | } 18 | defer db.Close() 19 | 20 | var sqliteVersion string 21 | var vecVersion string 22 | err = db.QueryRow("select sqlite_version(), vec_version()").Scan(&sqliteVersion, &vecVersion) 23 | if err != nil { 24 | log.Fatal(err) 25 | } 26 | fmt.Printf("sqlite_version=%s, vec_version=%s\n", sqliteVersion, vecVersion) 27 | 28 | _, err = db.Exec("CREATE VIRTUAL TABLE vec_items USING vec0(embedding float[4])") 29 | if err != nil { 30 | log.Fatal(err) 31 | } 32 | 33 | items := map[int][]float32{ 34 | 1: {0.1, 0.1, 0.1, 0.1}, 35 | 2: {0.2, 0.2, 0.2, 0.2}, 36 | 3: {0.3, 0.3, 0.3, 0.3}, 37 | 4: {0.4, 0.4, 0.4, 0.4}, 38 | 5: {0.5, 0.5, 0.5, 0.5}, 39 | } 40 | q := []float32{0.3, 0.3, 0.3, 0.3} 41 | 42 | for id, values := range items { 43 | v, err := sqlite_vec.SerializeFloat32(values) 44 | if err != nil { 45 | log.Fatal(err) 46 | } 47 | _, err = db.Exec("INSERT INTO vec_items(rowid, embedding) VALUES (?, ?)", id, v) 48 | if err != nil { 49 | log.Fatal(err) 50 | } 51 | } 52 | 53 | query, err := sqlite_vec.SerializeFloat32(q) 54 | if err != nil { 55 | log.Fatal(err) 56 | } 57 | 58 | rows, err := db.Query(` 59 | SELECT 60 | rowid, 61 | distance 62 | FROM vec_items 63 | WHERE embedding MATCH ? 64 | ORDER BY distance 65 | LIMIT 3 66 | `, query) 67 | 68 | if err != nil { 69 | log.Fatal(err) 70 | } 71 | 72 | for rows.Next() { 73 | var rowid int64 74 | var distance float64 75 | err = rows.Scan(&rowid, &distance) 76 | if err != nil { 77 | log.Fatal(err) 78 | } 79 | fmt.Printf("rowid=%d, distance=%f\n", rowid, distance) 80 | } 81 | err = rows.Err() 82 | if err != nil { 83 | log.Fatal((err)) 84 | } 85 | 86 | } 87 | -------------------------------------------------------------------------------- /site/public/mozilla.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 8 | 9 | 10 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /site/getting-started/installation.md: -------------------------------------------------------------------------------- 1 | # Installing 2 | 3 | You have several options to include `sqlite-vec` into your projects, including 4 | PyPi packages for Python, NPM packages for Node.js, Gems for Ruby, and more. 5 | 6 | ## With popular package managers 7 | 8 | ::: code-group 9 | 10 | ```bash [Python] 11 | pip install sqlite-vec 12 | ``` 13 | 14 | ```bash [Node.js] 15 | npm install sqlite-vec 16 | ``` 17 | 18 | ```bash [Bun] 19 | bun install sqlite-vec 20 | ``` 21 | 22 | ```bash [Deno] 23 | deno add npm:sqlite-vec 24 | ``` 25 | 26 | ```bash [Ruby] 27 | gem install sqlite-vec 28 | ``` 29 | 30 | ```bash [Rust] 31 | cargo add sqlite-vec 32 | ``` 33 | 34 | ```bash [Go (CGO)] 35 | go get -u github.com/asg017/sqlite-vec-go-bindings/cgo 36 | ``` 37 | ```bash [Go (ncruces WASM)] 38 | go get -u github.com/asg017/sqlite-vec-go-bindings/ncruces 39 | ``` 40 | 41 | ```bash [Datasette] 42 | datasette install datasette-sqlite-vec 43 | ``` 44 | 45 | ```bash [sqlite-utils] 46 | sqlite-utils install sqlite-utils-sqlite-vec 47 | ``` 48 | 49 | ::: 50 | 51 | ## Pre-compiled extensions 52 | 53 | Alternatively, you can download pre-compiled loadable extensions from the 54 | [`sqlite-vec` Github Releases](https://github.com/asg017/sqlite-vec/releases/latest). 55 | 56 | There's also an `install.sh` script that will automatically download the appropriate pre-compiled extension from Github Releases to your machine. 57 | 58 | 59 | ```sh 60 | # yolo 61 | curl -L 'https://github.com/asg017/sqlite-vec/releases/latest/download/install.sh' | sh 62 | ``` 63 | 64 | ```sh 65 | # ok lets play it safe 66 | curl -o install.sh -L https://github.com/asg017/sqlite-vec/releases/latest/download/install.sh 67 | # inspect your scripts 68 | cat install.sh 69 | # TODO Test if execute permissions? 70 | ./install.sh 71 | ``` 72 | 73 | 74 | ## Compiling 75 | 76 | `sqlite-vec` is a single `sqlite-vec.c` and `sqlite-vec.h`, and can be easily compiled for different platforms, or statically linked into larger applications. 77 | 78 | See [*Compiling `sqlite-vec`*](#compiling) for more information. 79 | -------------------------------------------------------------------------------- /site/.vitepress/theme/Sponsors.vue: -------------------------------------------------------------------------------- 1 | 61 | 62 | 80 | 81 | 92 | -------------------------------------------------------------------------------- /benchmarks/exhaustive-memory/README.md: -------------------------------------------------------------------------------- 1 | # `sqlite-vec` In-memory benchmark comparisions 2 | 3 | This repo contains a benchmarks that compares KNN queries of `sqlite-vec` to other in-process vector search tools using **brute force linear scans only**. These include: 4 | 5 | 6 | - [Faiss IndexFlatL2](https://faiss.ai/) 7 | - [usearch with `exact=True`](https://github.com/unum-cloud/usearch) 8 | - [libsql vector search with `vector_distance_cos`](https://turso.tech/vector) 9 | - [numpy](https://numpy.org/), using [this approach](https://github.com/EthanRosenthal/nn-vs-ann) 10 | - [duckdb with `list_cosine_similarity`](https://duckdb.org/docs/sql/functions/nested.html#list_cosine_similaritylist1-list2) 11 | - [`sentence_transformers.util.semantic_search`](https://sbert.net/docs/package_reference/util.html#sentence_transformers.util.semantic_search) 12 | - [hnswlib BFIndex](https://github.com/nmslib/hnswlib/blob/c1b9b79af3d10c6ee7b5d0afa1ce851ae975254c/TESTING_RECALL.md?plain=1#L8) 13 | 14 | 15 | Again **ONLY BRUTE FORCE LINEAR SCANS ARE TESTED**. This benchmark does **not** test approximate nearest neighbors (ANN) implementations. This benchmarks is extremely narrow to just testing KNN searches using brute force. 16 | 17 | A few other caveats: 18 | 19 | - Only brute-force linear scans, no ANN 20 | - Only CPU is used. The only tool that does offer GPU is Faiss anyway. 21 | - Only in-memory datasets are used. Many of these tools do support serializing and reading from disk (including `sqlite-vec`) and possibly `mmap`'ing, but this only tests in-memory datasets. Mostly because of numpy 22 | - Queries are made one after the other, **not batched.** Some tools offer APIs to query multiple inputs at the same time, but this benchmark runs queries sequentially. This was done to emulate "server request"-style queries, but multiple users would send queries at different times, making batching more difficult. To note, `sqlite-vec` does **not** support batched queries yet. 23 | 24 | 25 | These tests are run in Python. Vectors are provided as an in-memory numpy array, and each test converts that numpy array into whatever makes sense for the given tool. For example, `sqlite-vec` tests will read those vectors into a SQLite table. DuckDB will read them into an Array array then create a DuckDB table from that. 26 | -------------------------------------------------------------------------------- /examples/simple-go-ncruces/demo.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | _ "embed" 5 | "log" 6 | 7 | sqlite_vec "github.com/asg017/sqlite-vec-go-bindings/ncruces" 8 | "github.com/ncruces/go-sqlite3" 9 | ) 10 | 11 | func main() { 12 | db, err := sqlite3.Open(":memory:") 13 | if err != nil { 14 | log.Fatal(err) 15 | } 16 | 17 | stmt, _, err := db.Prepare(`SELECT sqlite_version(), vec_version()`) 18 | if err != nil { 19 | log.Fatal(err) 20 | } 21 | 22 | stmt.Step() 23 | 24 | log.Printf("sqlite_version=%s, vec_version=%s\n", stmt.ColumnText(0), stmt.ColumnText(1)) 25 | stmt.Close() 26 | 27 | 28 | err = db.Exec("CREATE VIRTUAL TABLE vec_items USING vec0(embedding float[4])") 29 | if err != nil { 30 | log.Fatal(err) 31 | } 32 | items := map[int][]float32{ 33 | 1: {0.1, 0.1, 0.1, 0.1}, 34 | 2: {0.2, 0.2, 0.2, 0.2}, 35 | 3: {0.3, 0.3, 0.3, 0.3}, 36 | 4: {0.4, 0.4, 0.4, 0.4}, 37 | 5: {0.5, 0.5, 0.5, 0.5}, 38 | } 39 | q := []float32{0.3, 0.3, 0.3, 0.3} 40 | 41 | stmt, _, err = db.Prepare("INSERT INTO vec_items(rowid, embedding) VALUES (?, ?)") 42 | if err != nil { 43 | log.Fatal(err) 44 | } 45 | 46 | for id, values := range items { 47 | v, err := sqlite_vec.SerializeFloat32(values) 48 | if err != nil { 49 | log.Fatal(err) 50 | } 51 | stmt.BindInt(1, id) 52 | stmt.BindBlob(2, v) 53 | err = stmt.Exec() 54 | if err != nil { 55 | log.Fatal(err) 56 | } 57 | stmt.Reset() 58 | } 59 | stmt.Close() 60 | 61 | 62 | 63 | stmt, _, err = db.Prepare(` 64 | SELECT 65 | rowid, 66 | distance 67 | FROM vec_items 68 | WHERE embedding MATCH ? 69 | ORDER BY distance 70 | LIMIT 3 71 | `); 72 | 73 | if err != nil { 74 | log.Fatal(err) 75 | } 76 | 77 | query, err := sqlite_vec.SerializeFloat32(q) 78 | if err != nil { 79 | log.Fatal(err) 80 | } 81 | stmt.BindBlob(1, query) 82 | 83 | for stmt.Step() { 84 | rowid := stmt.ColumnInt64(0) 85 | distance := stmt.ColumnFloat(1) 86 | log.Printf("rowid=%d, distance=%f\n", rowid, distance) 87 | } 88 | if err := stmt.Err(); err != nil { 89 | log.Fatal(err) 90 | } 91 | 92 | err = stmt.Close() 93 | if err != nil { 94 | log.Fatal(err) 95 | } 96 | 97 | err = db.Close() 98 | if err != nil { 99 | log.Fatal(err) 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /site/guides/matryoshka.md: -------------------------------------------------------------------------------- 1 | # Matryoshka (Adaptive-Length) Embeddings 2 | 3 | Matryoshka embeddings are a new class of embedding models introduced in the 4 | TODO-YYY paper [_TODO title_](https://arxiv.org/abs/2205.13147). They allow one 5 | to truncate excess dimensions in large vector, without sacrificing much quality. 6 | 7 | Let's say your embedding model generate 1024-dimensional vectors. If you have 1 8 | million of these 1024-dimensional vectors, they would take up `4.096 GB` of 9 | space! You're not able to reduce the dimensions without losing a lot of 10 | quality - if you were to remove half of the dimensions 512-dimensional vectors, 11 | you could expect to also lose 50% or more of the quality of results. There are 12 | other dimensional-reduction techniques, like [PCA](#TODO) or [Product Quantization](#TODO), but they typically require 13 | complicated and expensive training processes. 14 | 15 | Matryoshka embeddings, on the other hand, _can_ be truncated, without losing much 16 | quality. Using [`mixedbread.ai`](#TODO) `mxbai-embed-large-v1` model, they claim 17 | that 18 | 19 | They are called "Matryoshka" embeddings because ... TODO 20 | 21 | ## Matryoshka Embeddings with `sqlite-vec` 22 | 23 | You can use a combination of [`vec_slice()`](../api-reference.md#vec_slice) and 24 | [`vec_normalize()`](../api-reference.md#vec_slice) on Matryoshka embeddings to 25 | truncate. 26 | 27 | ```sql 28 | select 29 | vec_normalize( 30 | vec_slice(title_embeddings, 0, 256) 31 | ) as title_embeddings_256d 32 | from vec_articles; 33 | ``` 34 | 35 | [`vec_slice()`](../api-reference.md#vec_slice) will cut down the vector to the first 256 dimensions. Then [`vec_normalize()`](../api-reference.md#vec_normalize) will normalize that truncated vector, which is typically a required step for Matryoshka embeddings. 36 | 37 | ## Benchmarks 38 | 39 | ## Suppported Models 40 | 41 | https://supabase.com/blog/matryoshka-embeddings#which-granularities-were-openais-text-embedding-3-models-trained-on 42 | 43 | `text-embedding-3-small`: 1536, 512 `text-embedding-3-large`: 3072, 1024, 256 44 | 45 | https://x.com/ZainHasan6/status/1757519325202686255 46 | 47 | `text-embeddings-3-large:` 3072, 1536, 1024, 512 48 | 49 | https://www.mixedbread.ai/blog/binary-mrl 50 | 51 | `mxbai-embed-large-v1`: 1024, 512, 256, 128, 64 52 | 53 | `nomic-embed-text-v1.5`: 768, 512, 256, 128, 64 54 | 55 | ``` 56 | # TODO new snowflake model 57 | ``` 58 | -------------------------------------------------------------------------------- /benchmarks/self-params/knn.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import time 3 | from random import randrange 4 | from statistics import mean 5 | 6 | 7 | def connect(path): 8 | print(path) 9 | db = sqlite3.connect(path) 10 | db.enable_load_extension(True) 11 | db.load_extension("../dist/vec0") 12 | db.execute("select load_extension('../dist/vec0', 'sqlite3_vec_fs_read_init')") 13 | db.enable_load_extension(False) 14 | return db 15 | 16 | 17 | page_sizes = [ # 4096, 8192, 18 | 16384, 19 | 32768, 20 | ] 21 | chunk_sizes = [128, 256, 1024, 2048] 22 | types = ["f32", "int8", "bit"] 23 | 24 | types.reverse() 25 | 26 | for t in types: 27 | for page_size in page_sizes: 28 | for chunk_size in chunk_sizes: 29 | print(f"page_size={page_size}, chunk_size={chunk_size}") 30 | 31 | func = "embedding" 32 | if t == "int8": 33 | func = "vec_quantize_i8(embedding, 'unit')" 34 | if t == "bit": 35 | func = "vec_quantize_binary(embedding)" 36 | 37 | times = [] 38 | trials = 20 39 | db = connect(f"dbs/test.{page_size}.{chunk_size}.{t}.db") 40 | 41 | for trial in range(trials): 42 | t0 = time.time() 43 | results = db.execute( 44 | f""" 45 | select rowid 46 | from vec_items 47 | where embedding match (select {func} from vec_items where rowid = ?) 48 | and k = 10 49 | order by distance 50 | """, 51 | [randrange(100000)], 52 | ).fetchall() 53 | 54 | times.append(time.time() - t0) 55 | print(mean(times)) 56 | 57 | """ 58 | 59 | page_size=4096, chunk_size=256 60 | 0.2635102152824402 61 | page_size=4096, chunk_size=1024 62 | 0.2609449863433838 63 | page_size=4096, chunk_size=2048 64 | 0.275589919090271 65 | page_size=8192, chunk_size=256 66 | 0.18621582984924318 67 | page_size=8192, chunk_size=1024 68 | 0.20939643383026124 69 | page_size=8192, chunk_size=2048 70 | 0.22376316785812378 71 | page_size=16384, chunk_size=256 72 | 0.16012665033340454 73 | page_size=16384, chunk_size=1024 74 | 0.18346318006515502 75 | page_size=16384, chunk_size=2048 76 | 0.18224761486053467 77 | page_size=32768, chunk_size=256 78 | 0.14202518463134767 79 | page_size=32768, chunk_size=1024 80 | 0.15340715646743774 81 | page_size=32768, chunk_size=2048 82 | 0.18018823862075806 83 | """ 84 | -------------------------------------------------------------------------------- /site/using/rust.md: -------------------------------------------------------------------------------- 1 | # Using `sqlite-vec` in Rust 2 | [![Crates.io](https://img.shields.io/crates/v/sqlite-vec?logo=rust)](https://crates.io/crates/sqlite-vec) 3 | 4 | You can embed `sqlite-vec` into your Rust projects using the official 5 | [`sqlite-vec` crate](https://crates.io/crates/sqlite-vec). 6 | 7 | ```bash 8 | cargo add sqlite-vec 9 | ``` 10 | 11 | The crate embeds the `sqlite-vec` C source code, and uses the 12 | [`cc` crate](https://crates.io/crates/sqlite-vec) to compile and statically link 13 | `sqlite-vec` at build-time. 14 | 15 | The `sqlite-vec` crate exposes a single function `sqlite3_vec_init`, which is 16 | the C entrypoint for the SQLite extension. You can "register" with your Rust 17 | SQLite library's `sqlite3_auto_extension()` function. 18 | 19 | This example registers sqlite-vec using [rusqlite](https://docs.rs/rusqlite/0.32.1/rusqlite/). First, enable the `"bundled"` feature in your Cargo file entry for rusqlite: 20 | 21 | ```diff 22 | # Cargo.toml 23 | [dependencies] 24 | + rusqlite = { version = "VERSION", features = ["bundled"] } 25 | ``` 26 | 27 | Then, you can verify your installation was successful by embedding your first vector. This example uses [zerocopy](https://docs.rs/zerocopy/latest/zerocopy/) to efficiently pass the vector as bytes, and prints the resulting vector and library version as Strings: 28 | 29 | ```rs 30 | use sqlite_vec::sqlite3_vec_init; 31 | use rusqlite::{ffi::sqlite3_auto_extension, Result}; 32 | use zerocopy::AsBytes; 33 | 34 | fn main()-> Result<()> { 35 | unsafe { 36 | sqlite3_auto_extension(Some(std::mem::transmute(sqlite3_vec_init as *const ()))); 37 | } 38 | 39 | let db = Connection::open_in_memory()?; 40 | let v: Vec = vec![0.1, 0.2, 0.3]; 41 | 42 | let (vec_version, embedding): (String, String) = db.query_row( 43 | "select vec_version(), vec_to_json(?)", 44 | &[v.as_bytes()], 45 | |x| Ok((x.get(0)?, x.get(1)?)), 46 | )?; 47 | 48 | println!("vec_version={vec_version}, embedding={embedding}"); 49 | Ok(()) 50 | } 51 | ``` 52 | 53 | See 54 | [`simple-rust/demo.rs`](https://github.com/asg017/sqlite-vec/blob/main/examples/simple-rust/demo.rs) 55 | for a more complete Rust demo. 56 | 57 | ## Working with vectors in Rust 58 | 59 | If your vectors are provided as a `Vec` type, the [`zerocopy` crate](https://crates.io/crates/zerocopy) is recommended, specifically `zerocopy::AsBytes`. This will allow you to pass in vectors into `sqlite-vec` without any copying. 60 | 61 | ```rs 62 | let query: Vec = vec![0.1, 0.2, 0.3, 0.4]; 63 | let mut stmt = db.prepare("SELECT vec_length(?)")?; 64 | stmt.execute(&[item.1.as_bytes()])?; 65 | ``` 66 | -------------------------------------------------------------------------------- /benchmarks/self-params/build.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import time 3 | 4 | 5 | def connect(path): 6 | db = sqlite3.connect(path) 7 | db.enable_load_extension(True) 8 | db.load_extension("../dist/vec0") 9 | db.execute("select load_extension('../dist/vec0', 'sqlite3_vec_fs_read_init')") 10 | db.enable_load_extension(False) 11 | return db 12 | 13 | 14 | page_sizes = [ # 4096, 8192, 15 | 16384, 16 | 32768, 17 | ] 18 | chunk_sizes = [128, 256, 1024, 2048] 19 | types = ["f32", "int8", "bit"] 20 | 21 | SRC = "../examples/dbpedia-openai/data/vectors.npy" 22 | 23 | for page_size in page_sizes: 24 | for chunk_size in chunk_sizes: 25 | for t in types: 26 | print(f"{t} page_size={page_size}, chunk_size={chunk_size}") 27 | 28 | t0 = time.time() 29 | db = connect(f"dbs/test.{page_size}.{chunk_size}.{t}.db") 30 | db.execute(f"pragma page_size = {page_size}") 31 | with db: 32 | db.execute( 33 | f""" 34 | create virtual table vec_items using vec0( 35 | embedding {t}[1536], 36 | chunk_size={chunk_size} 37 | ) 38 | """ 39 | ) 40 | func = "vector" 41 | if t == "int8": 42 | func = "vec_quantize_i8(vector, 'unit')" 43 | if t == "bit": 44 | func = "vec_quantize_binary(vector)" 45 | db.execute( 46 | f""" 47 | insert into vec_items 48 | select rowid, {func} 49 | from vec_npy_each(vec_npy_file(?)) 50 | limit 100000 51 | """, 52 | [SRC], 53 | ) 54 | elapsed = time.time() - t0 55 | print(elapsed) 56 | 57 | """ 58 | 59 | # for 100_000 60 | 61 | page_size=4096, chunk_size=256 62 | 3.5894200801849365 63 | page_size=4096, chunk_size=1024 64 | 60.70046401023865 65 | page_size=4096, chunk_size=2048 66 | 201.04426288604736 67 | page_size=8192, chunk_size=256 68 | 7.034514904022217 69 | page_size=8192, chunk_size=1024 70 | 9.983598947525024 71 | page_size=8192, chunk_size=2048 72 | 12.318921089172363 73 | page_size=16384, chunk_size=256 74 | 4.97080397605896 75 | page_size=16384, chunk_size=1024 76 | 6.051195859909058 77 | page_size=16384, chunk_size=2048 78 | 8.492683172225952 79 | page_size=32768, chunk_size=256 80 | 5.906642198562622 81 | page_size=32768, chunk_size=1024 82 | 5.876632213592529 83 | page_size=32768, chunk_size=2048 84 | 5.420510292053223 85 | """ 86 | -------------------------------------------------------------------------------- /examples/simple-c/demo.c: -------------------------------------------------------------------------------- 1 | #include "sqlite3.h" 2 | #include "sqlite-vec.h" 3 | #include 4 | #include 5 | #include 6 | 7 | int main(int argc, char *argv[]) { 8 | int rc = SQLITE_OK; 9 | sqlite3 *db; 10 | sqlite3_stmt *stmt; 11 | 12 | rc = sqlite3_auto_extension((void (*)())sqlite3_vec_init); 13 | assert(rc == SQLITE_OK); 14 | 15 | rc = sqlite3_open(":memory:", &db); 16 | assert(rc == SQLITE_OK); 17 | 18 | rc = sqlite3_prepare_v2(db, "SELECT sqlite_version(), vec_version()", -1, &stmt, NULL); 19 | assert(rc == SQLITE_OK); 20 | 21 | rc = sqlite3_step(stmt); 22 | printf("sqlite_version=%s, vec_version=%s\n", sqlite3_column_text(stmt, 0), sqlite3_column_text(stmt, 1)); 23 | sqlite3_finalize(stmt); 24 | 25 | static const struct { 26 | sqlite3_int64 id; 27 | float vector[4]; 28 | } items[] = { 29 | {1, {0.1, 0.1, 0.1, 0.1}}, 30 | {2, {0.2, 0.2, 0.2, 0.2}}, 31 | {3, {0.3, 0.3, 0.3, 0.3}}, 32 | {4, {0.4, 0.4, 0.4, 0.4}}, 33 | {5, {0.5, 0.5, 0.5, 0.5}}, 34 | }; 35 | float query[4] = {0.3, 0.3, 0.3, 0.3}; 36 | 37 | 38 | rc = sqlite3_prepare_v2(db, "CREATE VIRTUAL TABLE vec_items USING vec0(embedding float[4])", -1, &stmt, NULL); 39 | assert(rc == SQLITE_OK); 40 | rc = sqlite3_step(stmt); 41 | assert(rc == SQLITE_DONE); 42 | sqlite3_finalize(stmt); 43 | 44 | rc = sqlite3_exec(db, "BEGIN", NULL, NULL, NULL); 45 | assert(rc == SQLITE_OK); 46 | rc = sqlite3_prepare_v2(db, "INSERT INTO vec_items(rowid, embedding) VALUES (?, ?)", -1, &stmt, NULL); 47 | assert(rc == SQLITE_OK); 48 | for (unsigned long i = 0; i < sizeof(items) / sizeof(items[0]); i++) { 49 | sqlite3_bind_int64(stmt, 1, items[i].id); 50 | sqlite3_bind_blob(stmt, 2, items[i].vector, sizeof(items[i].vector), SQLITE_STATIC); 51 | rc = sqlite3_step(stmt); 52 | assert(rc == SQLITE_DONE); 53 | sqlite3_reset(stmt); 54 | } 55 | sqlite3_finalize(stmt); 56 | rc = sqlite3_exec(db, "COMMIT", NULL, NULL, NULL); 57 | assert(rc == SQLITE_OK); 58 | 59 | rc = sqlite3_prepare_v2(db, 60 | "SELECT " 61 | " rowid, " 62 | " distance " 63 | "FROM vec_items " 64 | "WHERE embedding MATCH ?1 " 65 | "ORDER BY distance " 66 | "LIMIT 3 " 67 | , -1, &stmt, NULL); 68 | assert(rc == SQLITE_OK); 69 | 70 | sqlite3_bind_blob(stmt, 1, query, sizeof(query), SQLITE_STATIC); 71 | 72 | while(1) { 73 | rc = sqlite3_step(stmt); 74 | if(rc == SQLITE_DONE) break; 75 | assert(rc==SQLITE_ROW); 76 | sqlite3_int64 rowid = sqlite3_column_int64(stmt, 0); 77 | double distance = sqlite3_column_double(stmt, 1); 78 | printf("rowid=%lld distance=%f\n", rowid, distance); 79 | } 80 | sqlite3_finalize(stmt); 81 | sqlite3_close(db); 82 | return 0; 83 | } 84 | -------------------------------------------------------------------------------- /examples/python-recipes/openai-sample.py: -------------------------------------------------------------------------------- 1 | # pip install openai sqlite-vec 2 | 3 | from openai import OpenAI 4 | import sqlite3 5 | import sqlite_vec 6 | import struct 7 | from typing import List 8 | 9 | 10 | def serialize(vector: List[float]) -> bytes: 11 | """serializes a list of floats into a compact "raw bytes" format""" 12 | return struct.pack("%sf" % len(vector), *vector) 13 | 14 | 15 | sentences = [ 16 | "Capri-Sun is a brand of juice concentrate–based drinks manufactured by the German company Wild and regional licensees.", 17 | "George V was King of the United Kingdom and the British Dominions, and Emperor of India, from 6 May 1910 until his death in 1936.", 18 | "Alaqua Cox is a Native American (Menominee) actress.", 19 | "Shohei Ohtani is a Japanese professional baseball pitcher and designated hitter for the Los Angeles Dodgers of Major League Baseball.", 20 | "Tamarindo, also commonly known as agua de tamarindo, is a non-alcoholic beverage made of tamarind, sugar, and water.", 21 | ] 22 | 23 | 24 | client = OpenAI() 25 | 26 | # change ':memory:' to a filepath to persist data 27 | db = sqlite3.connect(":memory:") 28 | db.enable_load_extension(True) 29 | sqlite_vec.load(db) 30 | db.enable_load_extension(False) 31 | 32 | db.execute( 33 | """ 34 | CREATE TABLE sentences( 35 | id INTEGER PRIMARY KEY, 36 | sentence TEXT 37 | ); 38 | """ 39 | ) 40 | 41 | with db: 42 | for i, sentence in enumerate(sentences): 43 | db.execute("INSERT INTO sentences(id, sentence) VALUES(?, ?)", [i, sentence]) 44 | 45 | db.execute( 46 | """ 47 | CREATE VIRTUAL TABLE vec_sentences USING vec0( 48 | id INTEGER PRIMARY KEY, 49 | sentence_embedding FLOAT[1536] 50 | ); 51 | """ 52 | ) 53 | 54 | 55 | with db: 56 | sentence_rows = db.execute("SELECT id, sentence FROM sentences").fetchall() 57 | response = client.embeddings.create( 58 | input=[row[1] for row in sentence_rows], model="text-embedding-3-small" 59 | ) 60 | for (id, _), embedding in zip(sentence_rows, response.data): 61 | db.execute( 62 | "INSERT INTO vec_sentences(id, sentence_embedding) VALUES(?, ?)", 63 | [id, serialize(embedding.embedding)], 64 | ) 65 | 66 | 67 | query = "fruity liquids" 68 | query_embedding = ( 69 | client.embeddings.create(input=query, model="text-embedding-3-small") 70 | .data[0] 71 | .embedding 72 | ) 73 | 74 | results = db.execute( 75 | """ 76 | SELECT 77 | vec_sentences.id, 78 | distance, 79 | sentence 80 | FROM vec_sentences 81 | LEFT JOIN sentences ON sentences.id = vec_sentences.id 82 | WHERE sentence_embedding MATCH ? 83 | AND k = 3 84 | ORDER BY distance 85 | """, 86 | [serialize(query_embedding)], 87 | ).fetchall() 88 | 89 | for row in results: 90 | print(row) 91 | -------------------------------------------------------------------------------- /benchmarks/exhaustive-memory/requirements.txt: -------------------------------------------------------------------------------- 1 | annotated-types==0.7.0 2 | anyio==4.4.0 3 | asgiref==3.8.1 4 | attrs==23.2.0 5 | backoff==2.2.1 6 | bcrypt==4.2.0 7 | build==1.2.1 8 | cachetools==5.4.0 9 | certifi==2024.7.4 10 | charset-normalizer==3.3.2 11 | chroma-hnswlib==0.7.6 12 | chromadb==0.5.5 13 | click==8.1.7 14 | coloredlogs==15.0.1 15 | decorator==5.1.1 16 | deprecated==1.2.14 17 | deprecation==2.1.0 18 | dnspython==2.6.1 19 | duckdb==1.0.0 20 | email-validator==2.2.0 21 | faiss-cpu==1.8.0.post1 22 | fastapi==0.111.1 23 | fastapi-cli==0.0.4 24 | filelock==3.15.4 25 | flatbuffers==24.3.25 26 | fsspec==2024.6.1 27 | google-auth==2.32.0 28 | googleapis-common-protos==1.63.2 29 | grpcio==1.65.1 30 | h11==0.14.0 31 | hnswlib==0.8.0 32 | httpcore==1.0.5 33 | httptools==0.6.1 34 | httpx==0.27.0 35 | huggingface-hub==0.24.1 36 | humanfriendly==10.0 37 | idna==3.7 38 | importlib-metadata==8.0.0 39 | importlib-resources==6.4.0 40 | jinja2==3.1.4 41 | joblib==1.4.2 42 | kubernetes==30.1.0 43 | lancedb==0.10.2 44 | markdown-it-py==3.0.0 45 | markupsafe==2.1.5 46 | mdurl==0.1.2 47 | mmh3==4.1.0 48 | monotonic==1.6 49 | mpmath==1.3.0 50 | networkx==3.3 51 | numpy==1.26.4 52 | oauthlib==3.2.2 53 | onnxruntime==1.18.1 54 | opentelemetry-api==1.26.0 55 | opentelemetry-exporter-otlp-proto-common==1.26.0 56 | opentelemetry-exporter-otlp-proto-grpc==1.26.0 57 | opentelemetry-instrumentation==0.47b0 58 | opentelemetry-instrumentation-asgi==0.47b0 59 | opentelemetry-instrumentation-fastapi==0.47b0 60 | opentelemetry-proto==1.26.0 61 | opentelemetry-sdk==1.26.0 62 | opentelemetry-semantic-conventions==0.47b0 63 | opentelemetry-util-http==0.47b0 64 | orjson==3.10.6 65 | overrides==7.7.0 66 | packaging==24.1 67 | pandas==2.2.2 68 | pillow==10.4.0 69 | posthog==3.5.0 70 | protobuf==4.25.4 71 | py==1.11.0 72 | pyarrow==15.0.0 73 | pyasn1==0.6.0 74 | pyasn1-modules==0.4.0 75 | pydantic==2.8.2 76 | pydantic-core==2.20.1 77 | pygments==2.18.0 78 | pylance==0.14.1 79 | pypika==0.48.9 80 | pyproject-hooks==1.1.0 81 | python-dateutil==2.9.0.post0 82 | python-dotenv==1.0.1 83 | python-multipart==0.0.9 84 | pytz==2024.1 85 | pyyaml==6.0.1 86 | ratelimiter==1.2.0.post0 87 | regex==2024.5.15 88 | requests==2.32.3 89 | requests-oauthlib==2.0.0 90 | retry==0.9.2 91 | rich==13.7.1 92 | rsa==4.9 93 | safetensors==0.4.3 94 | scikit-learn==1.5.1 95 | scipy==1.14.0 96 | sentence-transformers==3.0.1 97 | setuptools==71.1.0 98 | shellingham==1.5.4 99 | six==1.16.0 100 | sniffio==1.3.1 101 | starlette==0.37.2 102 | sympy==1.13.1 103 | tenacity==8.5.0 104 | threadpoolctl==3.5.0 105 | tokenizers==0.19.1 106 | torch==2.3.1 107 | tqdm==4.66.4 108 | transformers==4.43.1 109 | typer==0.12.3 110 | typing-extensions==4.12.2 111 | tzdata==2024.1 112 | urllib3==2.2.2 113 | usearch==2.12.0 114 | uvicorn==0.30.3 115 | uvloop==0.19.0 116 | watchfiles==0.22.0 117 | websocket-client==1.8.0 118 | websockets==12.0 119 | wrapt==1.16.0 120 | zipp==3.19.2 121 | -------------------------------------------------------------------------------- /site/compiling.md: -------------------------------------------------------------------------------- 1 | 4 | 5 | # Compiling `sqlite-vec` 6 | 7 | `sqlite-vec` is is easy to compile yourself! It's a single C file with no dependencies, so the process should be straightforward. 8 | 9 | ## From Source 10 | 11 | To compile `sqlite-vec` as a loadable SQLite extension, you can `git clone` the source repository and run the following commands: 12 | 13 | ```bash 14 | git clone https://github.com/asg017/sqlite-vec 15 | cd sqlite-vec 16 | ./scripts/vendor.sh 17 | make loadable 18 | ``` 19 | 20 | The `./scripts/vendor.sh` command will download a recent version of [SQLite's amalgammation builds](https://www.sqlite.org/amalgamation.html), to ensure you have an up-to-date `sqlite3ext.h` available on your system. 21 | 22 | Then `make loadable` will generate the `sqlite-vec.h` file and a dynamically loadable library at `dist/vec.$SUFFIX`. The suffix will be `.dylib` for MacOS, `.so` for Linux, and `.dll` for Windows. 23 | 24 | 25 | ## From the amalgamation build 26 | 27 | The "amalgamation" build of `sqlite-vec` is a `.zip` or `.tar.gz` archive with the pre-configured `sqlite-vec.c` and `sqlite-vec.h` source files. 28 | 29 | The amalgamation builds can be found in [`sqlite-vec` Releases](https://github.com/asg017/sqlite-vec/releases). You can also download the latest amalgamation build with this command: 30 | 31 | ```-vue 32 | wget https://github.com/asg017/sqlite-vec/releases/download/v{{data.VERSION}}/sqlite-vec-{{data.VERSION}}-amalgamation.zip 33 | unzip sqlite-vec-{{data.VERSION}}-amalgamation.zip 34 | ``` 35 | 36 | There will now be `sqlite-vec.c` and `sqlite-vec.h` available in your current directory. To compile it manually, follow the [official SQLite extension compilation instructions](https://www.sqlite.org/loadext.html#compiling_a_loadable_extension), which will be something like: 37 | 38 | ```bash 39 | # Linux 40 | gcc -g -fPIC -shared sqlite-vec.c -o vec0.so 41 | 42 | # MacOS 43 | gcc -g -fPIC -dynamiclib sqlite-vec.c -o vec0.dylib 44 | 45 | # Windows, MSVC compiler 46 | cl sqlite-vec.c -link -dll -out:sqlite-vec.dll 47 | 48 | # Windows, MinGW 49 | gcc -g -shared sqlite-vec.c -o vec0.dll 50 | ``` 51 | 52 | Different platforms, compiler, or architectures may require different compilation flags. 53 | 54 | ## Compile-time options 55 | 56 | There are a few compilation options available for `sqlite-vec`, but they're currently unstable and may change in the future. They aren't tracked with [`sqlite-vec`'s semantic versioning policy ](./versioning.md), so options may break in patch version updates. 57 | 58 | The current compile-time flags are: 59 | 60 | - `SQLITE_VEC_ENABLE_AVX`, enables AVX CPU instructions for some vector search operations 61 | - `SQLITE_VEC_ENABLE_NEON`, enables NEON CPU instructions for some vector search operations 62 | - `SQLITE_VEC_OMIT_FS`, removes some obsure SQL functions and features that use the filesystem, meant for some WASM builds where there's no available filesystem 63 | - `SQLITE_VEC_STATIC`, meant for statically linking `sqlite-vec` 64 | -------------------------------------------------------------------------------- /.github/logos/turso.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /site/public/turso.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmarks/micro/benches/my_benchmark.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 2 | use micro::init_vec; 3 | use rand::Rng; 4 | use rusqlite::Connection; 5 | use zerocopy::AsBytes; 6 | 7 | fn random_vector(n: usize) -> Vec { 8 | let mut rng = rand::thread_rng(); 9 | (0..n).map(|_| rng.gen()).collect() 10 | } 11 | 12 | fn setup_base(page_size: usize, d: usize, n: i32) -> Connection { 13 | let base: Vec> = (0..n).map(|_| random_vector(d)).collect(); 14 | 15 | let mut db = Connection::open_in_memory().unwrap(); 16 | db.pragma_update( 17 | Some(rusqlite::DatabaseName::Main), 18 | "page_size", 19 | page_size, //, 20 | //|row| Ok(assert!(row.get::(0).unwrap() == page_size)), 21 | ) 22 | .unwrap(); 23 | assert_eq!( 24 | db.pragma_query_value(Some(rusqlite::DatabaseName::Main), "page_size", |v| { 25 | Ok(v.get::(0).unwrap()) 26 | }) 27 | .unwrap(), 28 | page_size, 29 | ); 30 | db.execute( 31 | format!("create virtual table vec_base using vec0(a float[{d}])").as_str(), 32 | [], 33 | ) 34 | .unwrap(); 35 | 36 | let tx = db.transaction().unwrap(); 37 | for item in &base { 38 | tx.execute("insert into vec_base(a) values (?)", [item.as_bytes()]) 39 | .unwrap(); 40 | } 41 | tx.commit().unwrap(); 42 | db 43 | } 44 | pub fn criterion_benchmark(c: &mut Criterion) { 45 | init_vec(); 46 | 47 | let n = 1_000_000; 48 | let d = 1536; 49 | let k = 10; 50 | let page_size = 8192; 51 | 52 | let page_sizes = [4096, 8192, 16384, 32768]; 53 | for page_size in page_sizes { 54 | let db = setup_base(page_size, d, n); 55 | 56 | let mut stmt = db 57 | .prepare("select rowid, a from vec_base where rowid = ?") 58 | .unwrap(); 59 | 60 | c.bench_function( 61 | format!("point page_size={page_size} n={n} dimension={d} k={k}").as_str(), 62 | |b| { 63 | let mut rng = rand::thread_rng(); 64 | let query: i64 = rng.gen_range(0..n.into()); 65 | 66 | b.iter(|| { 67 | let result: (i64, Vec) = stmt 68 | .query_row(rusqlite::params![query], |r| { 69 | Ok((r.get(0).unwrap(), r.get(1).unwrap())) 70 | }) 71 | .unwrap(); 72 | assert_eq!(result.0, query); 73 | }); 74 | }, 75 | ); 76 | /* 77 | c.bench_function( 78 | format!("KNN page_size={page_size} n={n} dimension={d} k={k}").as_str(), 79 | |b| { 80 | let query: Vec = random_vector(d); 81 | let db = setup_base(page_size, d, n); 82 | 83 | let mut stmt = db.prepare( 84 | "select rowid, distance from vec_base where a match ? order by distance limit ?", 85 | ) 86 | .unwrap(); 87 | 88 | b.iter(|| { 89 | let result: Vec<(i64, f64)> = stmt 90 | .query_map(rusqlite::params![query.as_bytes(), k], |r| { 91 | Ok((r.get(0).unwrap(), r.get(1).unwrap())) 92 | }) 93 | .unwrap() 94 | .collect::, _>>() 95 | .unwrap(); 96 | assert_eq!(result.len(), 10); 97 | }); 98 | stmt.finalize().unwrap() 99 | }, 100 | ); */ 101 | } 102 | } 103 | 104 | criterion_group!(benches, criterion_benchmark); 105 | criterion_main!(benches); 106 | -------------------------------------------------------------------------------- /site/.vitepress/theme/HeroImg.vue: -------------------------------------------------------------------------------- 1 | 2 | 3 | 39 | -------------------------------------------------------------------------------- /tests/test-partition-keys.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | from collections import OrderedDict 3 | 4 | 5 | def test_constructor_limit(db, snapshot): 6 | assert exec( 7 | db, 8 | """ 9 | create virtual table v using vec0( 10 | p1 int partition key, 11 | p2 int partition key, 12 | p3 int partition key, 13 | p4 int partition key, 14 | p5 int partition key, 15 | v float[1] 16 | ) 17 | """, 18 | ) == snapshot(name="max 4 partition keys") 19 | 20 | 21 | def test_normal(db, snapshot): 22 | db.execute( 23 | "create virtual table v using vec0(p1 int partition key, a float[1], chunk_size=8)" 24 | ) 25 | 26 | db.execute("insert into v(rowid, p1, a) values (1, 100, X'11223344')") 27 | assert vec0_shadow_table_contents(db, "v") == snapshot(name="1 row") 28 | db.execute("insert into v(rowid, p1, a) values (2, 100, X'44556677')") 29 | assert vec0_shadow_table_contents(db, "v") == snapshot(name="2 rows, same parition") 30 | db.execute("insert into v(rowid, p1, a) values (3, 200, X'8899aabb')") 31 | assert vec0_shadow_table_contents(db, "v") == snapshot(name="3 rows, 2 partitions") 32 | 33 | 34 | def test_types(db, snapshot): 35 | db.execute( 36 | "create virtual table v using vec0(p1 int partition key, a float[1], chunk_size=8)" 37 | ) 38 | 39 | # EVIDENCE-OF: V11454_28292 40 | assert exec( 41 | db, "insert into v(p1, a) values(?, ?)", ["not int", b"\x11\x22\x33\x44"] 42 | ) == snapshot(name="1. raises type error") 43 | 44 | assert vec0_shadow_table_contents(db, "v") == snapshot(name="2. empty DB") 45 | 46 | # but allow NULLs 47 | assert exec( 48 | db, "insert into v(p1, a) values(?, ?)", [None, b"\x11\x22\x33\x44"] 49 | ) == snapshot(name="3. allow nulls") 50 | 51 | assert vec0_shadow_table_contents(db, "v") == snapshot( 52 | name="4. show NULL partition key" 53 | ) 54 | 55 | 56 | def test_updates(db, snapshot): 57 | db.execute( 58 | "create virtual table v using vec0(p text partition key, a float[1], chunk_size=8)" 59 | ) 60 | 61 | db.execute( 62 | "insert into v(rowid, p, a) values (?, ?, ?)", [1, "a", b"\x11\x11\x11\x11"] 63 | ) 64 | db.execute( 65 | "insert into v(rowid, p, a) values (?, ?, ?)", [2, "a", b"\x22\x22\x22\x22"] 66 | ) 67 | db.execute( 68 | "insert into v(rowid, p, a) values (?, ?, ?)", [3, "a", b"\x33\x33\x33\x33"] 69 | ) 70 | 71 | assert exec(db, "select * from v") == snapshot(name="1. Initial dataset") 72 | assert exec(db, "update v set p = ? where rowid = ?", ["new", 1]) == snapshot( 73 | name="2. update #1" 74 | ) 75 | 76 | 77 | class Row: 78 | def __init__(self): 79 | pass 80 | 81 | def __repr__(self) -> str: 82 | return repr() 83 | 84 | 85 | def exec(db, sql, parameters=[]): 86 | try: 87 | rows = db.execute(sql, parameters).fetchall() 88 | except (sqlite3.OperationalError, sqlite3.DatabaseError) as e: 89 | return { 90 | "error": e.__class__.__name__, 91 | "message": str(e), 92 | } 93 | a = [] 94 | for row in rows: 95 | o = OrderedDict() 96 | for k in row.keys(): 97 | o[k] = row[k] 98 | a.append(o) 99 | result = OrderedDict() 100 | result["sql"] = sql 101 | result["rows"] = a 102 | return result 103 | 104 | 105 | def vec0_shadow_table_contents(db, v): 106 | shadow_tables = [ 107 | row[0] 108 | for row in db.execute( 109 | "select name from sqlite_master where name like ? order by 1", [f"{v}_%"] 110 | ).fetchall() 111 | ] 112 | o = {} 113 | for shadow_table in shadow_tables: 114 | if shadow_table.endswith("_info"): 115 | continue 116 | o[shadow_table] = exec(db, f"select * from {shadow_table}") 117 | return o 118 | -------------------------------------------------------------------------------- /site/using/go.md: -------------------------------------------------------------------------------- 1 | # Using `sqlite-vec` in Go 2 | 3 | 4 | 5 | There are two ways you can embed `sqlite-vec` into Go applications: a CGO option 6 | for libraries like 7 | [`github.com/mattn/go-sqlite3`](https://github.com/mattn/go-sqlite3), or a 8 | WASM-based option with 9 | [`github.com/ncruces/go-sqlite3`](https://github.com/ncruces/go-sqlite3). 10 | 11 | ## Option 1: CGO {#cgo} 12 | 13 | [![Go Reference](https://pkg.go.dev/badge/github.com/asg017/sqlite-vec-go-bindings/cgo.svg)](https://pkg.go.dev/github.com/asg017/sqlite-vec-go-bindings/cgo) 14 | 15 | If using [`github.com/mattn/go-sqlite3`](https://github.com/mattn/go-sqlite3) or another CGO-based SQLite library, then use the `github.com/asg017/sqlite-vec-go-bindings/cgo` module to embed `sqlite-vec` into your Go application. 16 | 17 | ```bash 18 | go get -u github.com/asg017/sqlite-vec-go-bindings/cgo 19 | ``` 20 | 21 | This will compile and statically link `sqlite-vec` into your project. The initial build will be slow, but later builds will be cached and much faster. 22 | 23 | Use `sqlite_vec.Auto()` to enable `sqlite-vec` functions in all future database connections. Also `sqlite_vec.Cancel()` is available to undo `Auto()`. 24 | 25 | ```go 26 | package main 27 | 28 | import ( 29 | "database/sql" 30 | "log" 31 | 32 | sqlite_vec "github.com/asg017/sqlite-vec-go-bindings/cgo" 33 | _ "github.com/mattn/go-sqlite3" 34 | ) 35 | 36 | func main() { 37 | sqlite_vec.Auto() 38 | db, err := sql.Open("sqlite3", ":memory:") 39 | if err != nil { 40 | log.Fatal(err) 41 | } 42 | defer db.Close() 43 | 44 | var vecVersion string 45 | err = db.QueryRow("select vec_version()").Scan(&vecVersion) 46 | if err != nil { 47 | log.Fatal(err) 48 | } 49 | log.Printf("vec_version=%s\n",vecVersion) 50 | } 51 | ``` 52 | 53 | See 54 | [`simple-go-cgo/demo.go`](https://github.com/asg017/sqlite-vec/blob/main/examples/simple-go-cgo/demo.go) 55 | for a more complete Go CGO demo. 56 | 57 | ## Option 2: WASM based with `ncruces/go-sqlite3` {#ncruces} 58 | 59 | [![Go Reference](https://pkg.go.dev/badge/github.com/asg017/sqlite-vec-go-bindings/ncruces.svg)](https://pkg.go.dev/github.com/asg017/sqlite-vec-go-bindings/ncruces) 60 | 61 | [`github.com/ncruces/go-sqlite3`](https://github.com/ncruces/go-sqlite3) is an alternative SQLite Go driver that avoids CGO by using a custom WASM build of SQLite. To use `sqlite-vec` from this library, use the specicial WASM binary provided in `github.com/asg017/sqlite-vec-go-bindings/ncruces`. 62 | 63 | ```bash 64 | go get -u github.com/asg017/sqlite-vec-go-bindings/ncruces 65 | ``` 66 | 67 | ```go 68 | package main 69 | 70 | import ( 71 | _ "embed" 72 | "log" 73 | 74 | _ "github.com/asg017/sqlite-vec-go-bindings/ncruces" 75 | "github.com/ncruces/go-sqlite3" 76 | ) 77 | 78 | func main() { 79 | db, err := sqlite3.Open(":memory:") 80 | if err != nil { 81 | log.Fatal(err) 82 | } 83 | 84 | stmt, _, err := db.Prepare(`SELECT vec_version()`) 85 | if err != nil { 86 | log.Fatal(err) 87 | } 88 | 89 | stmt.Step() 90 | log.Printf("vec_version=%s\n", stmt.ColumnText(0)) 91 | stmt.Close() 92 | } 93 | ``` 94 | 95 | See 96 | [`simple-go-ncruces/demo.go`](https://github.com/asg017/sqlite-vec/blob/main/examples/simple-go-ncruces/demo.go) 97 | for a more complete Go ncruces demo. 98 | 99 | The `github.com/asg017/sqlite-vec-go-bindings/ncruces` package embeds a custom WASM build of SQLite, so there's no need to use `github.com/ncruces/go-sqlite3/embed`. 100 | 101 | 102 | ## Working with vectors in Go 103 | 104 | If vectors are provided as a list of floats, use `SerializeFloat32(list)` to serialize them into the compact BLOB format that `sqlite-vec` expects. 105 | 106 | ```go 107 | values := []float32{0.1, 0.1, 0.1, 0.1} 108 | v, err := sqlite_vec.SerializeFloat32(values) 109 | if err != nil { 110 | log.Fatal(err) 111 | } 112 | _, err = db.Exec("INSERT INTO vec_items(rowid, embedding) VALUES (?, ?)", id, v) 113 | if err != nil { 114 | log.Fatal(err) 115 | } 116 | ``` 117 | -------------------------------------------------------------------------------- /tests/correctness/test-correctness.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.typing as npt 3 | import time 4 | import tqdm 5 | import pytest 6 | 7 | def cosine_similarity( 8 | vec: npt.NDArray[np.float32], mat: npt.NDArray[np.float32], do_norm: bool = True 9 | ) -> npt.NDArray[np.float32]: 10 | sim = vec @ mat.T 11 | if do_norm: 12 | sim /= np.linalg.norm(vec) * np.linalg.norm(mat, axis=1) 13 | return sim 14 | 15 | def distance_l2( 16 | vec: npt.NDArray[np.float32], mat: npt.NDArray[np.float32] 17 | ) -> npt.NDArray[np.float32]: 18 | return np.sqrt(np.sum((mat - vec) ** 2, axis=1)) 19 | 20 | 21 | def topk( 22 | vec: npt.NDArray[np.float32], 23 | mat: npt.NDArray[np.float32], 24 | k: int = 5, 25 | ) -> tuple[npt.NDArray[np.int32], npt.NDArray[np.float32]]: 26 | distances = distance_l2(vec, mat) 27 | # Rather than sorting all similarities and taking the top K, it's faster to 28 | # argpartition and then just sort the top K. 29 | # The difference is O(N logN) vs O(N + k logk) 30 | indices = np.argpartition(distances, kth=k)[:k] 31 | top_indices = indices[np.argsort(distances[indices])] 32 | return top_indices, distances[top_indices] 33 | 34 | 35 | 36 | vec = np.array([1.0, 2.0, 3.0], dtype=np.float32) 37 | mat = np.array([ 38 | [4.0, 5.0, 6.0], 39 | [1.0, 2.0, 1.0], 40 | [7.0, 8.0, 9.0] 41 | ], dtype=np.float32) 42 | indices, distances = topk(vec, mat, k=2) 43 | print(indices) 44 | print(distances) 45 | 46 | import sqlite3 47 | import json 48 | db = sqlite3.connect(":memory:") 49 | db.enable_load_extension(True) 50 | db.load_extension("../../dist/vec0") 51 | db.execute("select load_extension('../../dist/vec0', 'sqlite3_vec_fs_read_init')") 52 | db.enable_load_extension(False) 53 | 54 | results = db.execute( 55 | ''' 56 | select 57 | key, 58 | --value, 59 | vec_distance_l2(:q, value) as distance 60 | from json_each(:base) 61 | order by distance 62 | limit 2 63 | ''', 64 | { 65 | 'base': json.dumps(mat.tolist()), 66 | 'q': '[1.0, 2.0, 3.0]' 67 | }).fetchall() 68 | a = [row[0] for row in results] 69 | b = [row[1] for row in results] 70 | print(a) 71 | print(b) 72 | 73 | 74 | #import sys; sys.exit() 75 | 76 | db.execute('PRAGMA page_size=16384') 77 | 78 | print("Loading into sqlite-vec vec0 table...") 79 | t0 = time.time() 80 | db.execute("create virtual table v using vec0(a float[3072], chunk_size=16)") 81 | db.execute('insert into v select rowid, vector from vec_npy_each(vec_npy_file("dbpedia_openai_3_large_00.npy"))') 82 | print(time.time() - t0) 83 | 84 | print("loading numpy array...") 85 | t0 = time.time() 86 | base = np.load('dbpedia_openai_3_large_00.npy') 87 | print(time.time() - t0) 88 | 89 | np.random.seed(1) 90 | queries = base[np.random.choice(base.shape[0], 20, replace=False), :] 91 | 92 | np_durations = [] 93 | vec_durations = [] 94 | from random import randrange 95 | 96 | def test_all(): 97 | for idx, query in tqdm.tqdm(enumerate(queries)): 98 | #k = randrange(20, 1000) 99 | #k = 500 100 | k = 10 101 | 102 | t0 = time.time() 103 | np_ids, np_distances = topk(query, base, k=k) 104 | np_durations.append(time.time() - t0) 105 | 106 | t0 = time.time() 107 | rows = db.execute('select rowid, distance from v where a match ? and k = ?', [query, k]).fetchall() 108 | vec_durations.append(time.time() - t0) 109 | 110 | vec_ids = [row[0] for row in rows] 111 | vec_distances = [row[1] for row in rows] 112 | 113 | assert vec_distances == np_distances.tolist() 114 | #assert vec_ids == np_ids.tolist() 115 | #if (vec_ids != np_ids).any(): 116 | # print('idx', idx) 117 | # print('query', query) 118 | # print('np_ids', np_ids) 119 | # print('np_distances', np_distances) 120 | # print('vec_ids', vec_ids) 121 | # print('vec_distances', vec_distances) 122 | # raise Exception(idx) 123 | 124 | print('final', 'np' ,np.mean(np_durations), 'vec', np.mean(vec_durations)) 125 | -------------------------------------------------------------------------------- /site/guides/binary-quant.md: -------------------------------------------------------------------------------- 1 | # Binary Quantization 2 | 3 | "Quantization" refers to a variety of methods and techniques for reducing the 4 | size of vectors in a vector index. **Binary quantization** (BQ) refers to a 5 | specific technique where each individual floating point element in a vector is 6 | reduced to a single bit, typically by assigning `0` to negative numbers and `1` 7 | to positive numbers. 8 | 9 | For example, in this 8-dimensional `float32` vector: 10 | 11 | ```json 12 | [-0.73, -0.80, 0.12, -0.73, 0.79, -0.11, 0.23, 0.97] 13 | ``` 14 | 15 | Applying binary quantization would result in the following `bit` vector: 16 | 17 | ```json 18 | [0, 0, 1, 0, 1, 0, 1, 1] 19 | ``` 20 | 21 | The original 8-dimensional `float32` vector requires `8 * 4 = 32` bytes of space 22 | to store. For 1 million vectors, that would be `32MB`. On the other hand, the 23 | binary quantized 8-dimensional vector can be stored in a single byte — one bit 24 | per element. For 1 million vectors, that would be just `1MB`, a 32x reduction! 25 | 26 | Though keep in mind, you're bound to lose a lot quality when reducing 32 bits of 27 | information to 1 bit. [Oversampling and re-scoring](#re-scoring) will help a 28 | lot. 29 | 30 | The main goal of BQ is to dramatically reduce the size of your vector index, 31 | resulting in faster searches with less resources. This is especially useful in 32 | `sqlite-vec`, which is (currently) brute-force only and meant to run on small 33 | devices. BQ is an easy low-cost method to make larger vector datasets easier to 34 | manage. 35 | 36 | ## Binary Quantization `sqlite-vec` 37 | 38 | The `sqlite-vec` extension offers a `vec_quantize_binary()` SQL scalar function, 39 | which applies binary quanitization to a `float32` or `int8` vector. For every 40 | element in a given vector, it will apply `0` to negative values and `1` to 41 | positive values, and pack them into a `BLOB`. 42 | 43 | ```sqlite 44 | select vec_quantize_binary( 45 | '[-0.73, -0.80, 0.12, -0.73, 0.79, -0.11, 0.23, 0.97]' 46 | ); 47 | -- X'd4` 48 | ``` 49 | 50 | The single byte `0xd4` in hexadecimal is `11010100` in binary. 51 | 52 | 53 | 54 | ## Demo 55 | 56 | Here's an end-to-end example of using binary quantization with `vec0` virtual 57 | tables in `sqlite-vec`. 58 | 59 | ```sqlite 60 | create virtual table vec_movies using vec0( 61 | synopsis_embedding bit[768] 62 | ); 63 | ``` 64 | 65 | ```sqlite 66 | insert into vec_movies(rowid, synopsis_embedding) 67 | VALUES (:id, vec_quantize_binary(:vector)); 68 | ``` 69 | 70 | ```sqlite 71 | select 72 | rowid, 73 | distance 74 | from vec_movies 75 | where synopsis_embedding match vec_quantize_binary(:query) 76 | order by distance 77 | limit 20; 78 | ``` 79 | 80 | ### Re-scoring 81 | 82 | ```sqlite 83 | create virtual table vec_movies using vec0( 84 | synopsis_embedding float[768], 85 | synopsis_embedding_coarse bit[768] 86 | ); 87 | ``` 88 | 89 | ```sqlite 90 | insert into vec_movies(rowid, synopsis_embedding, synopsis_embedding_coarse) 91 | VALUES (:id, :vector, vec_quantize_binary(:vector)); 92 | ``` 93 | 94 | ```sqlite 95 | with coarse_matches as ( 96 | select 97 | rowid, 98 | synopsis_embedding 99 | from vec_movies 100 | where synopsis_embedding_coarse match vec_quantize_binary(:query) 101 | order by distance 102 | limit 20 * 8 103 | ), 104 | select 105 | rowid, 106 | vec_distance_L2(synopsis_embedding, :query) 107 | from coarse_matches 108 | order by 2 109 | limit 20; 110 | ``` 111 | 112 | # Benchmarks 113 | 114 | ## Model support 115 | 116 | Certain embedding models, like [Nomic](https://nomic.ai/)'s 117 | [`nomic-embed-text-v1.5`](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5) 118 | text embedding model and 119 | [mixedbread.ai](https://www.mixedbread.ai/blog/mxbai-embed-2d-large-v1)'s 120 | [`mxbai-embed-large-v1`](https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1) 121 | are specifically trained to perform well after binary quantization. 122 | 123 | Other embeddings models may not, but you can still try BQ and see if it works 124 | for your datasets. Chances are, if your vectors are normalized (ie between 125 | `-1.0` and `1.0`) there's a good chance you will see acceptable results with BQ. 126 | -------------------------------------------------------------------------------- /ARCHITECTURE.md: -------------------------------------------------------------------------------- 1 | # `sqlite-vec` Architecture 2 | 3 | Internal documentation for how `sqlite-vec` works under-the-hood. Not meant for 4 | users of the `sqlite-vec` project, consult 5 | [the official `sqlite-vec` documentation](https://alexgarcia.xyz/sqlite-vec) for 6 | how-to-guides. Rather, this is for people interested in how `sqlite-vec` works 7 | and some guidelines to any future contributors. 8 | 9 | Very much a WIP. 10 | 11 | ## `vec0` 12 | 13 | ### Shadow Tables 14 | 15 | #### `xyz_chunks` 16 | 17 | - `chunk_id INTEGER` 18 | - `size INTEGER` 19 | - `validity BLOB` 20 | - `rowids BLOB` 21 | 22 | #### `xyz_rowids` 23 | 24 | - `rowid INTEGER` 25 | - `id` 26 | - `chunk_id INTEGER` 27 | - `chunk_offset INTEGER` 28 | 29 | #### `xyz_vector_chunksNN` 30 | 31 | - `rowid INTEGER` 32 | - `vector BLOB` 33 | 34 | #### `xyz_auxiliary` 35 | 36 | - `rowid INTEGER` 37 | - `valueNN [type]` 38 | 39 | #### `xyz_metadatachunksNN` 40 | 41 | - `rowid INTEGER` 42 | - `data BLOB` 43 | 44 | #### `xyz_metadatatextNN` 45 | 46 | - `rowid INTEGER` 47 | - `data TEXT` 48 | 49 | ### idxStr 50 | 51 | The `vec0` idxStr is a string composed of single "header" character and 0 or 52 | more "blocks" of 4 characters each. 53 | 54 | The "header" charcter denotes the type of query plan, as determined by the 55 | `enum vec0_query_plan` values. The current possible values are: 56 | 57 | | Name | Value | Description | 58 | | -------------------------- | ----- | ---------------------------------------------------------------------- | 59 | | `VEC0_QUERY_PLAN_FULLSCAN` | `'1'` | Perform a full-scan on all rows | 60 | | `VEC0_QUERY_PLAN_POINT` | `'2'` | Perform a single-lookup point query for the provided rowid | 61 | | `VEC0_QUERY_PLAN_KNN` | `'3'` | Perform a KNN-style query on the provided query vector and parameters. | 62 | 63 | Each 4-character "block" is associated with a corresponding value in `argv[]`. 64 | For example, the 1st block at byte offset `1-4` (inclusive) is the 1st block and 65 | is associated with `argv[1]`. The 2nd block at byte offset `5-8` (inclusive) is 66 | associated with `argv[2]` and so on. Each block describes what kind of value or 67 | filter the given `argv[i]` value is. 68 | 69 | #### `VEC0_IDXSTR_KIND_KNN_MATCH` (`'{'`) 70 | 71 | `argv[i]` is the query vector of the KNN query. 72 | 73 | The remaining 3 characters of the block are `_` fillers. 74 | 75 | #### `VEC0_IDXSTR_KIND_KNN_K` (`'}'`) 76 | 77 | `argv[i]` is the limit/k value of the KNN query. 78 | 79 | The remaining 3 characters of the block are `_` fillers. 80 | 81 | #### `VEC0_IDXSTR_KIND_KNN_ROWID_IN` (`'['`) 82 | 83 | `argv[i]` is the optional `rowid in (...)` value, and must be handled with 84 | [`sqlite3_vtab_in_first()` / `sqlite3_vtab_in_next()`](https://www.sqlite.org/c3ref/vtab_in_first.html). 85 | 86 | The remaining 3 characters of the block are `_` fillers. 87 | 88 | #### `VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT` (`']'`) 89 | 90 | `argv[i]` is a "constraint" on a specific partition key. 91 | 92 | The second character of the block denotes which partition key to filter on, 93 | using `A` to denote the first partition key column, `B` for the second, etc. It 94 | is encoded with `'A' + partition_idx` and can be decoded with `c - 'A'`. 95 | 96 | The third character of the block denotes which operator is used in the 97 | constraint. It will be one of the values of `enum vec0_partition_operator`, as 98 | only a subset of operations are supported on partition keys. 99 | 100 | The fourth character of the block is a `_` filler. 101 | 102 | #### `VEC0_IDXSTR_KIND_POINT_ID` (`'!'`) 103 | 104 | `argv[i]` is the value of the rowid or id to match against for the point query. 105 | 106 | The remaining 3 characters of the block are `_` fillers. 107 | 108 | #### `VEC0_IDXSTR_KIND_METADATA_CONSTRAINT` (`'&'`) 109 | 110 | `argv[i]` is the value of the `WHERE` constraint for a metdata column in a KNN 111 | query. 112 | 113 | The second character of the block denotes which metadata column the constraint 114 | belongs to, using `A` to denote the first metadata column column, `B` for the 115 | second, etc. It is encoded with `'A' + metadata_idx` and can be decoded with 116 | `c - 'A'`. 117 | 118 | The third character of the block is the constraint operator. It will be one of 119 | `enum vec0_metadata_operator`, as only a subset of operators are supported on 120 | metadata column KNN filters. 121 | 122 | The foruth character of the block is a `_` filler. 123 | -------------------------------------------------------------------------------- /site/public/shinkai.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /.github/logos/shinkai.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /site/public/shinkai.dark.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.github/logos/shinkai.dark.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.github/logos/shinkai.small.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 14 | 15 | -------------------------------------------------------------------------------- /site/using/python.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: sqlite-vec in Python 3 | --- 4 | 5 | # Using `sqlite-vec` in Python 6 | 7 | [![PyPI](https://img.shields.io/pypi/v/sqlite-vec.svg?color=blue&logo=python&logoColor=white)](https://pypi.org/project/sqlite-vec/) 8 | 9 | To use `sqlite-vec` from Python, install the 10 | [`sqlite-vec` PyPi package](https://pypi.org/project/sqlite-vec/) using your 11 | favorite Python package manager: 12 | 13 | ```bash 14 | pip install sqlite-vec 15 | ``` 16 | 17 | Once installed, use the `sqlite_vec.load()` function to load `sqlite-vec` SQL 18 | functions into a SQLite connection. 19 | 20 | ```python 21 | import sqlite3 22 | import sqlite_vec 23 | 24 | db = sqlite3.connect(":memory:") 25 | db.enable_load_extension(True) 26 | sqlite_vec.load(db) 27 | db.enable_load_extension(False) 28 | 29 | vec_version, = db.execute("select vec_version()").fetchone() 30 | print(f"vec_version={vec_version}") 31 | ``` 32 | 33 | See 34 | [`simple-python/demo.py`](https://github.com/asg017/sqlite-vec/blob/main/examples/simple-python/demo.py) 35 | for a more complete Python demo. 36 | 37 | ## Working with Vectors 38 | 39 | ### Lists 40 | 41 | If your vectors in Python are provided as a list of floats, you can 42 | convert them into the compact BLOB format that `sqlite-vec` uses with 43 | `serialize_float32()`. This internally calls [`struct.pack()`](https://docs.python.org/3/library/struct.html#struct.pack). 44 | 45 | ```python 46 | from sqlite_vec import serialize_float32 47 | 48 | embedding = [0.1, 0.2, 0.3, 0.4] 49 | result = db.execute('select vec_length(?)', [serialize_float32(embedding)]) 50 | 51 | print(result.fetchone()[0]) # 4 52 | ``` 53 | 54 | ### NumPy Arrays 55 | 56 | If your vectors are NumPy arrays, the Python SQLite package allows you to 57 | pass it along as-is, since NumPy arrays implement [the Buffer protocol](https://docs.python.org/3/c-api/buffer.html). Make sure you cast your array elements to 32-bit floats 58 | with 59 | [`.astype(np.float32)`](https://numpy.org/doc/stable/reference/generated/numpy.ndarray.astype.html), 60 | as some embeddings will use `np.float64`. 61 | 62 | ```python 63 | import numpy as np 64 | embedding = np.array([0.1, 0.2, 0.3, 0.4]) 65 | db.execute( 66 | "SELECT vec_length(?)", [embedding.astype(np.float32)] 67 | ) # 4 68 | ``` 69 | 70 | 71 | ## Using an up-to-date version of SQLite {#updated-sqlite} 72 | 73 | Some features of `sqlite-vec` will require an up-to-date SQLite library. You can 74 | see what version of SQLite your Python environment uses with 75 | [`sqlite3.sqlite_version`](https://docs.python.org/3/library/sqlite3.html#sqlite3.sqlite_version), 76 | or with this one-line command: 77 | 78 | ```bash 79 | python -c 'import sqlite3; print(sqlite3.sqlite_version)' 80 | ``` 81 | 82 | Currently, **SQLite version 3.41 or higher** is recommended but not required. 83 | `sqlite-vec` will work with older versions, but certain features and queries will 84 | only work correctly in >=3.41. 85 | 86 | To "upgrade" the SQLite version your Python installation uses, you have a few 87 | options. 88 | 89 | ### Compile your own SQLite version 90 | 91 | You can compile an up-to-date version of SQLite and use some system environment 92 | variables (like `LD_PRELOAD` and `DYLD_LIBRARY_PATH`) to force Python to use a 93 | different SQLite library. 94 | [This guide](https://til.simonwillison.net/sqlite/sqlite-version-macos-python) 95 | goes into this approach in more details. 96 | 97 | Although compiling SQLite can be straightforward, there are a lot of different 98 | compilation options to consider, which makes it confusing. This also doesn't 99 | work with Windows, which statically compiles its own SQLite library. 100 | 101 | ### Use `pysqlite3` 102 | 103 | [`pysqlite3`](https://github.com/coleifer/pysqlite3) is a 3rd party PyPi package 104 | that bundles an up-to-date SQLite library as a separate pip package. 105 | 106 | While it's mostly compatible with the Python `sqlite3` module, there are a few 107 | rare edge cases where the APIs don't match. 108 | 109 | ### Upgrading your Python version 110 | 111 | Sometimes installing a latest version of Python will "magically" upgrade your 112 | SQLite version as well. This is a nuclear option, as upgrading Python 113 | installations can be quite the hassle, but most Python 3.12 builds will have a 114 | very recent SQLite version. 115 | 116 | 117 | ## MacOS blocks SQLite extensions by default 118 | 119 | The default SQLite library that is bundled with Mac operating systems do not include support for SQLite extensions. That means the default Python library that is bundled with MacOS also does not support SQLite extensions. 120 | 121 | This is the case if you come across the following error message: 122 | 123 | ``` 124 | AttributeError: 'sqlite3.Connection' object has no attribute 'enable_load_extension' 125 | ``` 126 | 127 | As a workaround, use the Homebrew version of Python (`brew install python`, new version at `/opt/homebrew/bin/python3`), which will use the Homebrew version of SQLite that allows SQLite extensions. 128 | 129 | Other workarounds can be found at [Using an up-to-date version of SQLite](#updated-sqlite); 130 | -------------------------------------------------------------------------------- /site/build-ref.mjs: -------------------------------------------------------------------------------- 1 | import Database from "better-sqlite3"; 2 | import { load } from "js-yaml"; 3 | import { fileURLToPath } from "node:url"; 4 | import { dirname, resolve } from "node:path"; 5 | import { readFileSync, writeFileSync } from "node:fs"; 6 | import * as v from "valibot"; 7 | import { table } from "table"; 8 | 9 | const HEADER = `--- 10 | outline: 2 11 | --- 12 | 13 | # API Reference 14 | 15 | A complete reference to all the SQL scalar functions, table functions, and virtual tables inside \`sqlite-vec\`. 16 | 17 | ::: warning 18 | sqlite-vec is pre-v1, so expect breaking changes. 19 | ::: 20 | 21 | [[toc]] 22 | 23 | `; 24 | 25 | const REF_PATH = resolve( 26 | dirname(fileURLToPath(import.meta.url)), 27 | "../reference.yaml", 28 | ); 29 | const EXT_PATH = resolve( 30 | dirname(fileURLToPath(import.meta.url)), 31 | "../dist/vec0", 32 | ); 33 | 34 | const DocSchema = v.objectWithRest( 35 | { 36 | sections: v.record( 37 | v.string(), 38 | v.object({ 39 | title: v.string(), 40 | desc: v.string(), 41 | }), 42 | ), 43 | }, 44 | v.record( 45 | v.string(), 46 | v.object({ 47 | params: v.array(v.string()), 48 | desc: v.string(), 49 | example: v.union([v.string(), v.array(v.string())]), 50 | }), 51 | ), 52 | ); 53 | 54 | const tableConfig = { 55 | border: { 56 | topBody: `─`, 57 | topJoin: `┬`, 58 | topLeft: `┌`, 59 | topRight: `┐`, 60 | 61 | bottomBody: `─`, 62 | bottomJoin: `┴`, 63 | bottomLeft: `└`, 64 | bottomRight: `┘`, 65 | 66 | bodyLeft: `│`, 67 | bodyRight: `│`, 68 | bodyJoin: `│`, 69 | 70 | joinBody: `─`, 71 | joinLeft: `├`, 72 | joinRight: `┤`, 73 | joinJoin: `┼`, 74 | }, 75 | }; 76 | 77 | function formatSingleValue(value) { 78 | if (typeof value === "string") { 79 | const s = `'${value.replace(/'/g, "''")}'`; 80 | if (s.split("\n").length > 1) { 81 | return `/*\n${s}\n*/`; 82 | } 83 | return `-- ${s}`; 84 | } 85 | if (typeof value === "number") return `-- ${value.toString()}`; 86 | if (value === null) return "-- NULL"; 87 | if (value instanceof Uint8Array) { 88 | let s = "X'"; 89 | for (const v of value) { 90 | s += v.toString(16).toUpperCase().padStart(2, "0"); 91 | } 92 | s += "'"; 93 | return `-- ${s}`; 94 | } 95 | if (typeof value === "object" || Array.isArray(value)) { 96 | return "-- " + JSON.stringify(value, null, 2); 97 | } 98 | } 99 | function formatValue(value) { 100 | if (typeof value === "string") return `'${value}'`; 101 | if (typeof value === "number") return value; 102 | if (value === null) return "NULL"; 103 | if (value instanceof Uint8Array) { 104 | let s = "X'"; 105 | for (const v of value) { 106 | s += v.toString(16).toUpperCase().padStart(2, "0"); 107 | } 108 | s += "'"; 109 | return s; 110 | } 111 | if (typeof value === "object" || Array.isArray(value)) { 112 | return JSON.stringify(value, null, 2); 113 | } 114 | } 115 | function tableize(stmt, results) { 116 | const columnNames = stmt.columns().map((c) => c.name); 117 | const rows = results.map((row) => 118 | row.map((value) => { 119 | return formatValue(value); 120 | }) 121 | ); 122 | return table([columnNames, ...rows], tableConfig); 123 | } 124 | 125 | function renderExamples(db, name, example) { 126 | let md = "```sql\n"; 127 | 128 | const examples = Array.isArray(example) ? example : [example]; 129 | for (const example of examples) { 130 | const sql = example 131 | /* Strip any '```sql' markdown at the beginning */ 132 | .replace(/^\w*```sql/, "") 133 | /* Strip any '```' markdown at the end */ 134 | .replace(/```\w*$/m, "") 135 | .trim(); 136 | let stmt, results, error; 137 | results = null; 138 | try { 139 | stmt = db.prepare(sql); 140 | try { 141 | stmt.raw(true); 142 | } catch (err) { 143 | 1; 144 | } 145 | } catch (error) { 146 | console.error(`Error preparing statement for ${name}:`); 147 | console.error(error); 148 | throw Error(); 149 | } 150 | 151 | try { 152 | results = stmt.all(); 153 | } catch (e) { 154 | error = e.message; 155 | } 156 | 157 | md += sql + "\n"; 158 | 159 | if (!results) { 160 | md += `-- ❌ ${error}\n\n`; 161 | continue; 162 | } 163 | 164 | const result = results.length > 1 || stmt.columns().length > 1 165 | ? `/*\n${tableize(stmt, results)}\n*/\n` 166 | : formatSingleValue(results[0][0]); 167 | md += result + "\n\n"; 168 | } 169 | 170 | md += "\n```\n\n"; 171 | 172 | return md; 173 | } 174 | 175 | let md = HEADER; 176 | const doc = v.parse(DocSchema, load(readFileSync(REF_PATH, "utf8"))); 177 | 178 | const db = new Database(); 179 | db.loadExtension(EXT_PATH); 180 | 181 | for (const section in doc.sections) { 182 | md += `## ${doc.sections[section].title} {#${section}} \n\n`; 183 | md += doc.sections[section].desc; 184 | md += "\n\n"; 185 | 186 | for ( 187 | const [name, { params, desc, example }] of Object.entries( 188 | doc[section], 189 | ) 190 | ) { 191 | const headerText = `\`${name}(${(params ?? []).join(", ")})\` {#${name}}`; 192 | 193 | md += "### " + headerText + "\n\n"; 194 | 195 | md += desc + "\n\n"; 196 | md += renderExamples(db, name, example); 197 | } 198 | } 199 | 200 | writeFileSync("api-reference.md", md, "utf8"); 201 | console.log("done"); 202 | -------------------------------------------------------------------------------- /tests/unittest.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | println!("Hello, world!"); 3 | println!("{:?}", _min_idx(vec![3.0, 2.0, 1.0], 2)); 4 | } 5 | 6 | fn _min_idx(distances: Vec, k: i32) -> Vec { 7 | let mut out: Vec = vec![0; k as usize]; 8 | 9 | unsafe { 10 | min_idx( 11 | distances.as_ptr().cast(), 12 | distances.len() as i32, 13 | out.as_mut_ptr(), 14 | k, 15 | ); 16 | } 17 | out 18 | } 19 | 20 | fn _merge_sorted_lists( 21 | a: &Vec, 22 | a_rowids: &Vec, 23 | b: &Vec, 24 | b_rowids: &Vec, 25 | b_top_idx: &Vec, 26 | n: usize, 27 | ) -> (Vec, Vec) { 28 | let mut out_used: i64 = 0; 29 | let mut out: Vec = Vec::with_capacity(n); 30 | let mut out_rowids: Vec = Vec::with_capacity(n); 31 | unsafe { 32 | merge_sorted_lists( 33 | a.as_ptr().cast(), 34 | a_rowids.as_ptr().cast(), 35 | a.len() as i64, 36 | b.as_ptr().cast(), 37 | b_rowids.as_ptr().cast(), 38 | b_top_idx.as_ptr().cast(), 39 | b.len() as i64, 40 | out.as_ptr().cast(), 41 | out_rowids.as_ptr().cast(), 42 | n as i64, 43 | &mut out_used, 44 | ); 45 | out.set_len(out_used as usize); 46 | out_rowids.set_len(out_used as usize); 47 | } 48 | 49 | (out_rowids, out) 50 | } 51 | 52 | #[link(name = "sqlite-vec-internal")] 53 | extern "C" { 54 | fn min_idx(distances: *const f32, n: i32, out: *mut i32, k: i32) -> i32; 55 | 56 | fn merge_sorted_lists( 57 | a: *const f32, 58 | a_rowids: *const i64, 59 | a_length: i64, 60 | b: *const f32, 61 | b_rowids: *const i64, 62 | b_top_idx: *const i32, 63 | b_length: i64, 64 | out: *const f32, 65 | out_rowids: *const i64, 66 | out_length: i64, 67 | out_used: *mut i64, 68 | ); 69 | } 70 | 71 | #[cfg(test)] 72 | mod tests { 73 | use super::*; 74 | 75 | #[test] 76 | fn test_basic() { 77 | assert_eq!(_min_idx(vec![1.0, 2.0, 3.0], 3), vec![0, 1, 2]); 78 | assert_eq!(_min_idx(vec![3.0, 2.0, 1.0], 3), vec![2, 1, 0]); 79 | 80 | assert_eq!(_min_idx(vec![1.0, 2.0, 3.0], 2), vec![0, 1]); 81 | assert_eq!(_min_idx(vec![3.0, 2.0, 1.0], 2), vec![2, 1]); 82 | } 83 | 84 | #[test] 85 | fn test_merge_sorted_lists() { 86 | let a = &vec![0.01, 0.02, 0.03]; 87 | let a_rowids = &vec![1, 2, 3]; 88 | 89 | //let b = &vec![0.1, 0.2, 0.3, 0.4]; 90 | //let b_rowids = &vec![4, 5, 6, 7]; 91 | let b = &vec![0.4, 0.2, 0.3, 0.1]; 92 | let b_rowids = &vec![7, 5, 6, 4]; 93 | let b_top_idx = &vec![3, 1, 2, 0]; 94 | 95 | assert_eq!( 96 | _merge_sorted_lists(a, a_rowids, b, b_rowids, b_top_idx, 0), 97 | (vec![], vec![]) 98 | ); 99 | assert_eq!( 100 | _merge_sorted_lists(a, a_rowids, b, b_rowids, b_top_idx, 1), 101 | (vec![1], vec![0.01]) 102 | ); 103 | assert_eq!( 104 | _merge_sorted_lists(a, a_rowids, b, b_rowids, b_top_idx, 2), 105 | (vec![1, 2], vec![0.01, 0.02]) 106 | ); 107 | assert_eq!( 108 | _merge_sorted_lists(a, a_rowids, b, b_rowids, b_top_idx, 3), 109 | (vec![1, 2, 3], vec![0.01, 0.02, 0.03]) 110 | ); 111 | assert_eq!( 112 | _merge_sorted_lists(a, a_rowids, b, b_rowids, b_top_idx, 4), 113 | (vec![1, 2, 3, 4], vec![0.01, 0.02, 0.03, 0.1]) 114 | ); 115 | assert_eq!( 116 | _merge_sorted_lists(a, a_rowids, b, b_rowids, b_top_idx, 5), 117 | (vec![1, 2, 3, 4, 5], vec![0.01, 0.02, 0.03, 0.1, 0.2]) 118 | ); 119 | assert_eq!( 120 | _merge_sorted_lists(a, a_rowids, b, b_rowids, b_top_idx, 6), 121 | ( 122 | vec![1, 2, 3, 4, 5, 6], 123 | vec![0.01, 0.02, 0.03, 0.1, 0.2, 0.3] 124 | ) 125 | ); 126 | assert_eq!( 127 | _merge_sorted_lists(a, a_rowids, b, b_rowids, b_top_idx, 7), 128 | ( 129 | vec![1, 2, 3, 4, 5, 6, 7], 130 | vec![0.01, 0.02, 0.03, 0.1, 0.2, 0.3, 0.4] 131 | ) 132 | ); 133 | 134 | assert_eq!( 135 | _merge_sorted_lists(a, a_rowids, b, b_rowids, b_top_idx, 8), 136 | ( 137 | vec![1, 2, 3, 4, 5, 6, 7], 138 | vec![0.01, 0.02, 0.03, 0.1, 0.2, 0.3, 0.4] 139 | ) 140 | ); 141 | } 142 | /* 143 | #[test] 144 | fn test_merge_sorted_lists_empty() { 145 | let x = vec![0.1, 0.2, 0.3]; 146 | let x_rowids = vec![666, 888, 777]; 147 | assert_eq!( 148 | _merge_sorted_lists(&x, &x_rowids, &vec![], &vec![], 3), 149 | (vec![666, 888, 777], vec![0.1, 0.2, 0.3]) 150 | ); 151 | assert_eq!( 152 | _merge_sorted_lists(&vec![], &vec![], &x, &x_rowids, 3), 153 | (vec![666, 888, 777], vec![0.1, 0.2, 0.3]) 154 | ); 155 | assert_eq!( 156 | _merge_sorted_lists(&vec![], &vec![], &x, &x_rowids, 4), 157 | (vec![666, 888, 777], vec![0.1, 0.2, 0.3]) 158 | ); 159 | assert_eq!( 160 | _merge_sorted_lists(&vec![], &vec![], &x, &x_rowids, 2), 161 | (vec![666, 888], vec![0.1, 0.2]) 162 | ); 163 | }*/ 164 | } 165 | -------------------------------------------------------------------------------- /site/using/js.md: -------------------------------------------------------------------------------- 1 | # Using `sqlite-vec` in Node.js, Deno, and Bun 2 | 3 | [![npm](https://img.shields.io/npm/v/sqlite-vec.svg?color=green&logo=nodedotjs&logoColor=white)](https://www.npmjs.com/package/sqlite-vec) 4 | 5 | To use `sqlite-vec` in Node.js, Deno or Bun, install the 6 | [`sqlite-vec` NPM package](https://npmjs.com/package/sqlite-vec) using your 7 | favorite package manager: 8 | 9 | ::: code-group 10 | 11 | ```bash [npm] 12 | npm install sqlite-vec 13 | ``` 14 | 15 | ```bash [Bun] 16 | bun install sqlite-vec 17 | ``` 18 | 19 | ```bash [Deno] 20 | deno add npm:sqlite-vec 21 | ``` 22 | 23 | ::: 24 | 25 | Once installed, use the `sqliteVec.load()` function to load `sqlite-vec` SQL 26 | functions into a SQLite connection. 27 | 28 | ```js 29 | import * as sqliteVec from "sqlite-vec"; 30 | import Database from "better-sqlite3"; 31 | 32 | const db = new Database(":memory:"); 33 | sqliteVec.load(db); 34 | 35 | const { vec_version } = db 36 | .prepare("select vec_version() as vec_version;") 37 | .get(); 38 | 39 | console.log(`vec_version=${vec_version}`); 40 | ``` 41 | 42 | The `load()` function is compatible with 43 | [`node:sqlite`](https://nodejs.org/api/sqlite.html#class-databasesync), 44 | [`better-sqlite3`](https://github.com/WiseLibs/better-sqlite3), 45 | [`node-sqlite3`](https://github.com/TryGhost/node-sqlite3), 46 | [`jsr:@db/sqlite`](https://jsr.io/@db/sqlite) (Deno), and 47 | [`bun:sqlite`](https://bun.sh/docs/api/sqlite). 48 | 49 | ## Working with vectors in JavaScript 50 | 51 | if your vectors are represented as an array of numbers, wrap it in a 52 | [`Float32Array`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Float32Array), 53 | and use the 54 | [`.buffer`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/TypedArray/buffer) 55 | accessor to bind as a parameter to `sqlite-vec` SQL functions. 56 | 57 | ```js 58 | const embedding = new Float32Array([0.1, 0.2, 0.3, 0.4]); 59 | const stmt = db.prepare("select vec_length(?)"); 60 | console.log(stmt.run(embedding.buffer)); // 4 61 | ``` 62 | 63 | ## Node.js 64 | 65 | If you're on Node.js `23.5.0` or above, you can use [the builtin `node:sqlite` module](https://nodejs.org/api/sqlite.html) with `sqlite-vec` like so: 66 | 67 | ```js 68 | import { DatabaseSync } from "node:sqlite"; 69 | import * as sqliteVec from "sqlite-vec"; 70 | 71 | const db = new DatabaseSync(":memory:", { allowExtension: true }); 72 | sqliteVec.load(db); 73 | 74 | const embedding = new Float32Array([0.1, 0.2, 0.3, 0.4]); 75 | const { result } = db 76 | .prepare("select vec_length(?) as result") 77 | .get(new Uint8Array(embedding.buffer)); 78 | 79 | console.log(result); // 4 80 | ``` 81 | 82 | 83 | See 84 | [`simple-node2/demo.mjs`](https://github.com/asg017/sqlite-vec/blob/main/examples/simple-node2/demo.mjs) 85 | for a complete `node:sqlite` + `sqlite-vec` demo. 86 | 87 | 88 | Alternatively, you can use the 89 | [`better-sqlite3`](https://github.com/WiseLibs/better-sqlite3) 90 | NPM package with `sqlite-vec` in Node as well. 91 | 92 | ```js 93 | import * as sqliteVec from "sqlite-vec"; 94 | import Database from "better-sqlite3"; 95 | 96 | const db = new Database(":memory:"); 97 | sqliteVec.load(db); 98 | 99 | const embedding = new Float32Array([0.1, 0.2, 0.3, 0.4]); 100 | const { result } = db 101 | .prepare("select vec_length(?)",) 102 | .get(embedding); 103 | 104 | console.log(result); // 4 105 | 106 | ``` 107 | 108 | See 109 | [`simple-node/demo.mjs`](https://github.com/asg017/sqlite-vec/blob/main/examples/simple-node/demo.mjs) 110 | for a more complete demo. 111 | 112 | ## Deno 113 | 114 | Here's a quick recipe of using `sqlite-vec` with 115 | [`jsr:@db/sqlite`](https://jsr.io/@db/sqlite) in Deno. It will only work on Deno 116 | version `1.44` or greater, because of a bug in previous Deno versions. 117 | 118 | 119 | 120 | ```ts 121 | import { Database } from "jsr:@db/sqlite"; 122 | import * as sqliteVec from "npm:sqlite-vec"; 123 | 124 | const db = new Database(":memory:"); 125 | db.enableLoadExtension = true; 126 | sqliteVec.load(db); 127 | db.enableLoadExtension = false; 128 | 129 | const embedding = new Float32Array([0.1, 0.2, 0.3, 0.4]); 130 | const [result] = db 131 | .prepare("select vec_length(?)") 132 | .value<[string]>(new Uint8Array(embedding.buffer)!); 133 | console.log(result); // 4 134 | ``` 135 | 136 | See 137 | [`simple-deno/demo.ts`](https://github.com/asg017/sqlite-vec/blob/main/examples/simple-deno/demo.ts) 138 | for a more complete Deno demo. 139 | 140 | The `better-sqlite3` example above also works in Deno, when the `better-sqlite3` import is prefixed with `npm:`: 141 | 142 | ```js 143 | import * from "better-sqlite3"; // [!code --] 144 | import * from "npm:better-sqlite3"; // [!code ++] 145 | ``` 146 | 147 | ## Bun 148 | 149 | Here's a quick recipe of using `sqlite-vec` with 150 | [`bun:sqlite`](https://bun.sh/docs/api/sqlite) in Bun. 151 | 152 | ```ts 153 | import { Database } from "bun:sqlite"; 154 | import * as sqliteVec from "sqlite-vec"; 155 | 156 | // MacOS *might* have to do this, as the builtin SQLite library on MacOS doesn't allow extensions 157 | Database.setCustomSQLite("/usr/local/opt/sqlite3/lib/libsqlite3.dylib"); 158 | 159 | const db = new Database(":memory:"); 160 | sqliteVec.load(db); 161 | 162 | const embedding = new Float32Array([0.1, 0.2, 0.3, 0.4]); 163 | const { result } = db 164 | .prepare("select vec_length(?) as result",) 165 | .get(embedding); 166 | 167 | console.log(result); // 4 168 | 169 | ``` 170 | 171 | See 172 | [`simple-bun/demo.ts`](https://github.com/asg017/sqlite-vec/blob/main/examples/simple-bun/demo.ts) 173 | for a more complete Bun demo. 174 | 175 | The `better-sqlite3` 176 | example above also works with Bun. -------------------------------------------------------------------------------- /tests/__snapshots__/test-general.ambr: -------------------------------------------------------------------------------- 1 | # serializer version: 1 2 | # name: test_info 3 | OrderedDict({ 4 | 'sql': 'select key, typeof(value) from v_info order by 1', 5 | 'rows': list([ 6 | OrderedDict({ 7 | 'key': 'CREATE_VERSION', 8 | 'typeof(value)': 'text', 9 | }), 10 | OrderedDict({ 11 | 'key': 'CREATE_VERSION_MAJOR', 12 | 'typeof(value)': 'integer', 13 | }), 14 | OrderedDict({ 15 | 'key': 'CREATE_VERSION_MINOR', 16 | 'typeof(value)': 'integer', 17 | }), 18 | OrderedDict({ 19 | 'key': 'CREATE_VERSION_PATCH', 20 | 'typeof(value)': 'integer', 21 | }), 22 | ]), 23 | }) 24 | # --- 25 | # name: test_shadow 26 | OrderedDict({ 27 | 'sql': 'select * from sqlite_master order by name', 28 | 'rows': list([ 29 | OrderedDict({ 30 | 'type': 'index', 31 | 'name': 'sqlite_autoindex_v_info_1', 32 | 'tbl_name': 'v_info', 33 | 'rootpage': 3, 34 | 'sql': None, 35 | }), 36 | OrderedDict({ 37 | 'type': 'index', 38 | 'name': 'sqlite_autoindex_v_metadatachunks00_1', 39 | 'tbl_name': 'v_metadatachunks00', 40 | 'rootpage': 10, 41 | 'sql': None, 42 | }), 43 | OrderedDict({ 44 | 'type': 'index', 45 | 'name': 'sqlite_autoindex_v_metadatatext00_1', 46 | 'tbl_name': 'v_metadatatext00', 47 | 'rootpage': 12, 48 | 'sql': None, 49 | }), 50 | OrderedDict({ 51 | 'type': 'index', 52 | 'name': 'sqlite_autoindex_v_vector_chunks00_1', 53 | 'tbl_name': 'v_vector_chunks00', 54 | 'rootpage': 8, 55 | 'sql': None, 56 | }), 57 | OrderedDict({ 58 | 'type': 'table', 59 | 'name': 'sqlite_sequence', 60 | 'tbl_name': 'sqlite_sequence', 61 | 'rootpage': 5, 62 | 'sql': 'CREATE TABLE sqlite_sequence(name,seq)', 63 | }), 64 | OrderedDict({ 65 | 'type': 'table', 66 | 'name': 'v', 67 | 'tbl_name': 'v', 68 | 'rootpage': 0, 69 | 'sql': 'CREATE VIRTUAL TABLE v using vec0(a float[1], partition text partition key, metadata text, +name text, chunk_size=8)', 70 | }), 71 | OrderedDict({ 72 | 'type': 'table', 73 | 'name': 'v_auxiliary', 74 | 'tbl_name': 'v_auxiliary', 75 | 'rootpage': 13, 76 | 'sql': 'CREATE TABLE "v_auxiliary"( rowid integer PRIMARY KEY , value00)', 77 | }), 78 | OrderedDict({ 79 | 'type': 'table', 80 | 'name': 'v_chunks', 81 | 'tbl_name': 'v_chunks', 82 | 'rootpage': 4, 83 | 'sql': 'CREATE TABLE "v_chunks"(chunk_id INTEGER PRIMARY KEY AUTOINCREMENT,size INTEGER NOT NULL,sequence_id integer,partition00,validity BLOB NOT NULL, rowids BLOB NOT NULL)', 84 | }), 85 | OrderedDict({ 86 | 'type': 'table', 87 | 'name': 'v_info', 88 | 'tbl_name': 'v_info', 89 | 'rootpage': 2, 90 | 'sql': 'CREATE TABLE "v_info" (key text primary key, value any)', 91 | }), 92 | OrderedDict({ 93 | 'type': 'table', 94 | 'name': 'v_metadatachunks00', 95 | 'tbl_name': 'v_metadatachunks00', 96 | 'rootpage': 9, 97 | 'sql': 'CREATE TABLE "v_metadatachunks00"(rowid PRIMARY KEY, data BLOB NOT NULL)', 98 | }), 99 | OrderedDict({ 100 | 'type': 'table', 101 | 'name': 'v_metadatatext00', 102 | 'tbl_name': 'v_metadatatext00', 103 | 'rootpage': 11, 104 | 'sql': 'CREATE TABLE "v_metadatatext00"(rowid PRIMARY KEY, data TEXT)', 105 | }), 106 | OrderedDict({ 107 | 'type': 'table', 108 | 'name': 'v_rowids', 109 | 'tbl_name': 'v_rowids', 110 | 'rootpage': 6, 111 | 'sql': 'CREATE TABLE "v_rowids"(rowid INTEGER PRIMARY KEY AUTOINCREMENT,id,chunk_id INTEGER,chunk_offset INTEGER)', 112 | }), 113 | OrderedDict({ 114 | 'type': 'table', 115 | 'name': 'v_vector_chunks00', 116 | 'tbl_name': 'v_vector_chunks00', 117 | 'rootpage': 7, 118 | 'sql': 'CREATE TABLE "v_vector_chunks00"(rowid PRIMARY KEY,vectors BLOB NOT NULL)', 119 | }), 120 | ]), 121 | }) 122 | # --- 123 | # name: test_shadow.1 124 | OrderedDict({ 125 | 'sql': "select * from pragma_table_list where type = 'shadow'", 126 | 'rows': list([ 127 | OrderedDict({ 128 | 'schema': 'main', 129 | 'name': 'v_auxiliary', 130 | 'type': 'shadow', 131 | 'ncol': 2, 132 | 'wr': 0, 133 | 'strict': 0, 134 | }), 135 | OrderedDict({ 136 | 'schema': 'main', 137 | 'name': 'v_chunks', 138 | 'type': 'shadow', 139 | 'ncol': 6, 140 | 'wr': 0, 141 | 'strict': 0, 142 | }), 143 | OrderedDict({ 144 | 'schema': 'main', 145 | 'name': 'v_info', 146 | 'type': 'shadow', 147 | 'ncol': 2, 148 | 'wr': 0, 149 | 'strict': 0, 150 | }), 151 | OrderedDict({ 152 | 'schema': 'main', 153 | 'name': 'v_rowids', 154 | 'type': 'shadow', 155 | 'ncol': 4, 156 | 'wr': 0, 157 | 'strict': 0, 158 | }), 159 | OrderedDict({ 160 | 'schema': 'main', 161 | 'name': 'v_metadatachunks00', 162 | 'type': 'shadow', 163 | 'ncol': 2, 164 | 'wr': 0, 165 | 'strict': 0, 166 | }), 167 | OrderedDict({ 168 | 'schema': 'main', 169 | 'name': 'v_metadatatext00', 170 | 'type': 'shadow', 171 | 'ncol': 2, 172 | 'wr': 0, 173 | 'strict': 0, 174 | }), 175 | ]), 176 | }) 177 | # --- 178 | # name: test_shadow.2 179 | OrderedDict({ 180 | 'sql': "select * from pragma_table_list where type = 'shadow'", 181 | 'rows': list([ 182 | ]), 183 | }) 184 | # --- 185 | --------------------------------------------------------------------------------