├── packages
├── node
│ ├── LICENSE.md
│ ├── .npmignore
│ ├── tsup.config.ts
│ ├── tsconfig.json
│ ├── package.json
│ ├── src
│ │ ├── index.test.ts
│ │ ├── platform.ts
│ │ └── index.ts
│ ├── README.md
│ └── generate-platform-packages.js
├── python
│ ├── LICENSE.md
│ ├── src
│ │ └── sqlite_vector
│ │ │ ├── __init__.py
│ │ │ └── _version.py
│ ├── requirements-dev.txt
│ ├── MANIFEST.in
│ ├── pyproject.toml
│ ├── setup.py
│ ├── README.md
│ └── download_artifacts.py
├── android
│ ├── gradle.properties
│ ├── gradle
│ │ └── wrapper
│ │ │ ├── gradle-wrapper.jar
│ │ │ └── gradle-wrapper.properties
│ ├── src
│ │ └── main
│ │ │ └── AndroidManifest.xml
│ ├── gradlew.bat
│ ├── build.gradle
│ └── gradlew
└── swift
│ ├── extension
│ └── vector.swift
│ └── plugin
│ └── vector.swift
├── examples
└── semantic_search
│ ├── requirements.txt
│ ├── samples
│ ├── sample-13.md
│ ├── sample-10.md
│ ├── sample-2.md
│ ├── sample-20.md
│ ├── sample-17.md
│ ├── sample-3.md
│ ├── sample-5.md
│ ├── sample-7.md
│ ├── sample-8.md
│ ├── sample-12.md
│ ├── sample-19.md
│ ├── sample-4.md
│ ├── sample-9.md
│ ├── sample-16.md
│ ├── sample-18.md
│ ├── sample-6.md
│ ├── sample-11.md
│ ├── sample-14.md
│ ├── sample-15.md
│ └── sample-1.md
│ ├── README.md
│ ├── semsearch.py
│ └── semantic_search.py
├── jitpack.yml
├── src
├── distance-avx2.h
├── distance-neon.h
├── distance-sse2.h
├── sqlite-vector.h
├── distance-cpu.h
└── distance-cpu.c
├── .gitignore
├── Package.swift
├── libs
└── fp16
│ ├── macros.h
│ ├── bitcasts.h
│ └── fp16.h
├── LICENSE.md
├── QUANTIZATION.md
├── .github
└── workflows
│ ├── python-package.yml
│ └── main.yml
├── Makefile
├── README.md
└── API.md
/packages/node/LICENSE.md:
--------------------------------------------------------------------------------
1 | ../../LICENSE.md
--------------------------------------------------------------------------------
/packages/python/LICENSE.md:
--------------------------------------------------------------------------------
1 | ../../LICENSE.md
--------------------------------------------------------------------------------
/packages/python/src/sqlite_vector/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/examples/semantic_search/requirements.txt:
--------------------------------------------------------------------------------
1 | sentence-transformers
2 |
--------------------------------------------------------------------------------
/packages/python/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | requests
2 | wheel
3 | build
4 |
--------------------------------------------------------------------------------
/packages/android/gradle.properties:
--------------------------------------------------------------------------------
1 | org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
2 | android.useAndroidX=true
--------------------------------------------------------------------------------
/packages/python/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include LICENSE.md
3 | recursive-include src/sqlite_vector/binaries *
4 |
--------------------------------------------------------------------------------
/packages/python/src/sqlite_vector/_version.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | __version__ = os.environ.get("PACKAGE_VERSION", "0.0.0")
--------------------------------------------------------------------------------
/packages/android/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sqliteai/sqlite-vector/HEAD/packages/android/gradle/wrapper/gradle-wrapper.jar
--------------------------------------------------------------------------------
/packages/android/src/main/AndroidManifest.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/jitpack.yml:
--------------------------------------------------------------------------------
1 | jdk:
2 | - openjdk17
3 | install:
4 | - make aar ANDROID_NDK=$ANDROID_HOME/ndk-bundle
5 | - export VERSION=$(make version 2>/dev/null | tail -1)
6 | - cd packages/android && ./gradlew publishToMavenLocal -PVERSION="$VERSION"
--------------------------------------------------------------------------------
/src/distance-avx2.h:
--------------------------------------------------------------------------------
1 | //
2 | // distance-avx2.h
3 | // sqlitevector
4 | //
5 | // Created by Marco Bambini on 20/06/25.
6 | //
7 |
8 | #ifndef __VECTOR_DISTANCE_AVX2__
9 | #define __VECTOR_DISTANCE_AVX2__
10 |
11 | #include
12 |
13 | void init_distance_functions_avx2 (void);
14 |
15 | #endif
16 |
--------------------------------------------------------------------------------
/src/distance-neon.h:
--------------------------------------------------------------------------------
1 | //
2 | // distance-neon.h
3 | // sqlitevector
4 | //
5 | // Created by Marco Bambini on 20/06/25.
6 | //
7 |
8 | #ifndef __VECTOR_DISTANCE_NEON__
9 | #define __VECTOR_DISTANCE_NEON__
10 |
11 | #include
12 |
13 | void init_distance_functions_neon (void);
14 |
15 | #endif
16 |
--------------------------------------------------------------------------------
/src/distance-sse2.h:
--------------------------------------------------------------------------------
1 | //
2 | // distance-sse2.h
3 | // sqlitevector
4 | //
5 | // Created by Marco Bambini on 20/06/25.
6 | //
7 |
8 | #ifndef __VECTOR_DISTANCE_SSE2__
9 | #define __VECTOR_DISTANCE_SSE2__
10 |
11 | #include
12 |
13 | void init_distance_functions_sse2 (void);
14 |
15 | #endif
16 |
--------------------------------------------------------------------------------
/packages/android/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | distributionBase=GRADLE_USER_HOME
2 | distributionPath=wrapper/dists
3 | distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.2-bin.zip
4 | networkTimeout=10000
5 | validateDistributionUrl=true
6 | zipStoreBase=GRADLE_USER_HOME
7 | zipStorePath=wrapper/dists
8 |
--------------------------------------------------------------------------------
/packages/node/.npmignore:
--------------------------------------------------------------------------------
1 | # Development and build files
2 | src/
3 | *.test.ts
4 | *.test.js
5 | tsconfig.json
6 | tsup.config.ts
7 |
8 | # Scripts (only for repo/CI)
9 | generate-platform-packages.js
10 |
11 | # Development files
12 | node_modules/
13 | package-lock.json
14 | coverage/
15 | *.log
16 |
17 | # Git
18 | .git/
19 | .gitignore
20 |
--------------------------------------------------------------------------------
/packages/node/tsup.config.ts:
--------------------------------------------------------------------------------
1 | import { defineConfig } from 'tsup';
2 |
3 | export default defineConfig({
4 | entry: ['src/index.ts'],
5 | format: ['cjs', 'esm'],
6 | dts: true,
7 | splitting: false,
8 | sourcemap: true,
9 | clean: true,
10 | treeshake: true,
11 | minify: false,
12 | target: 'node16',
13 | outDir: 'dist',
14 | });
15 |
--------------------------------------------------------------------------------
/examples/semantic_search/samples/sample-13.md:
--------------------------------------------------------------------------------
1 | # Article 13: Behavioral Analytics for Anomaly Detection
2 |
3 | Behavioral analytics leverages machine learning to establish baseline patterns of normal user and system behavior, flagging deviations that may indicate security threats. User and entity behavior analytics (UEBA) systems monitor login patterns, data access, and application usage to detect insider threats and compromised accounts. Machine learning models adapt to changing behavior patterns while maintaining sensitivity to subtle anomalies that human analysts might overlook.
4 |
--------------------------------------------------------------------------------
/examples/semantic_search/samples/sample-10.md:
--------------------------------------------------------------------------------
1 | # Article 10: Zero Trust Security Architecture
2 |
3 | Zero trust security operates on the principle of "never trust, always verify," requiring authentication and authorization for every access request regardless of location. This approach assumes breach scenarios and implements continuous verification throughout the network. Key components include identity verification, device compliance checking, least privilege access, and micro-segmentation. Zero trust frameworks help organizations protect against insider threats and advanced persistent attacks.
4 |
--------------------------------------------------------------------------------
/examples/semantic_search/samples/sample-2.md:
--------------------------------------------------------------------------------
1 | # Article 2: Natural Language Processing Fundamentals
2 |
3 | Natural language processing enables computers to understand, interpret, and generate human language. Key techniques include tokenization, part-of-speech tagging, named entity recognition, and sentiment analysis. Modern NLP leverages transformer architectures like BERT and GPT models for tasks such as language translation, text summarization, and question answering. Applications span chatbots, voice assistants, content moderation, and automated document analysis across various industries.
4 |
--------------------------------------------------------------------------------
/examples/semantic_search/samples/sample-20.md:
--------------------------------------------------------------------------------
1 | # Article 20: IoT Security Vulnerabilities
2 |
3 | Internet of Things devices often have weak security controls due to cost constraints and rapid deployment cycles. Common vulnerabilities include default passwords, unencrypted communications, lack of update mechanisms, and insufficient access controls. IoT botnets can launch massive distributed denial-of-service attacks. Security strategies include network segmentation, device lifecycle management, security-by-design principles, and regulatory compliance requirements for IoT manufacturers and deployments.
4 |
--------------------------------------------------------------------------------
/examples/semantic_search/samples/sample-17.md:
--------------------------------------------------------------------------------
1 | # Article 17: Supply Chain Security Risks
2 |
3 | Supply chain attacks target third-party vendors and software dependencies to compromise multiple organizations simultaneously. Attackers may insert malicious code into legitimate software updates, compromise hardware during manufacturing, or exploit trusted vendor relationships. Notable incidents include SolarWinds and Kaseya attacks affecting thousands of organizations. Mitigation strategies include vendor risk assessment, software composition analysis, and zero-trust principles for third-party integrations.
4 |
--------------------------------------------------------------------------------
/examples/semantic_search/samples/sample-3.md:
--------------------------------------------------------------------------------
1 | # Article 3: Computer Vision Applications
2 |
3 | Computer vision empowers machines to interpret and analyze visual information from images and videos. Core techniques include object detection, image classification, facial recognition, and motion tracking. Convolutional neural networks form the backbone of modern computer vision systems. Applications include autonomous vehicles, medical imaging diagnosis, quality control in manufacturing, augmented reality, and surveillance systems. Edge computing enables real-time computer vision processing on mobile devices.
4 |
--------------------------------------------------------------------------------
/examples/semantic_search/samples/sample-5.md:
--------------------------------------------------------------------------------
1 | # Article 5: Supervised vs Unsupervised Learning
2 |
3 | Supervised learning uses labeled training data to predict outcomes for new inputs, including classification and regression tasks. Common algorithms include decision trees, support vector machines, and random forests. Unsupervised learning discovers hidden patterns in unlabeled data through clustering, dimensionality reduction, and association rules. Semi-supervised learning combines both approaches when labeled data is scarce. Each paradigm serves different problem types and data availability scenarios.
4 |
--------------------------------------------------------------------------------
/examples/semantic_search/samples/sample-7.md:
--------------------------------------------------------------------------------
1 | # Article 7: Explainable AI and Interpretability
2 |
3 | Explainable AI focuses on making machine learning models more transparent and interpretable to human users. Black-box models like deep neural networks often lack interpretability, creating trust and accountability issues. Techniques include feature importance analysis, LIME (Local Interpretable Model-agnostic Explanations), and SHAP (SHapley Additive exPlanations). Interpretability is crucial for high-stakes applications like healthcare, finance, and criminal justice where decisions require justification.
4 |
--------------------------------------------------------------------------------
/examples/semantic_search/samples/sample-8.md:
--------------------------------------------------------------------------------
1 | # Article 8: AI Regulation and Compliance
2 |
3 | Governments worldwide are developing regulatory frameworks for artificial intelligence deployment and development. The European Union's AI Act categorizes AI systems by risk levels, imposing strict requirements for high-risk applications. Compliance involves documentation, risk assessment, human oversight, and algorithmic auditing. Organizations must navigate evolving regulations while maintaining innovation capabilities. Privacy laws like GDPR also impact AI data processing and automated decision-making systems.
4 |
--------------------------------------------------------------------------------
/examples/semantic_search/samples/sample-12.md:
--------------------------------------------------------------------------------
1 | # Article 12: Machine Learning for Malware Detection
2 |
3 | Machine learning enhances malware detection by analyzing file characteristics, behavioral patterns, and network communications to identify threats. Static analysis examines file properties without execution, while dynamic analysis observes runtime behavior in controlled environments. Ensemble methods combining multiple algorithms improve detection accuracy and reduce false positives. AI-powered systems can identify zero-day threats and polymorphic malware that traditional signature-based solutions miss.
4 |
--------------------------------------------------------------------------------
/examples/semantic_search/samples/sample-19.md:
--------------------------------------------------------------------------------
1 | # Article 19: Edge Computing Security Challenges
2 |
3 | Edge computing brings data processing closer to end users and devices, improving performance but creating new security challenges. Distributed edge nodes have limited security controls compared to centralized data centers. Attack surfaces expand across numerous endpoints with varying security capabilities. Key concerns include device authentication, data encryption, secure updates, and centralized security management. Zero-trust architectures and hardware-based security become essential for edge deployments.
4 |
--------------------------------------------------------------------------------
/examples/semantic_search/samples/sample-4.md:
--------------------------------------------------------------------------------
1 | # Article 4: Reinforcement Learning Algorithms
2 |
3 | Reinforcement learning trains agents to make optimal decisions through trial and error interactions with environments. Agents receive rewards or penalties based on their actions, gradually learning policies that maximize cumulative rewards. Q-learning and policy gradient methods are fundamental approaches. Applications include game playing (AlphaGo), robotics control, autonomous driving, recommendation systems, and financial trading algorithms. The exploration-exploitation trade-off remains a central challenge.
4 |
--------------------------------------------------------------------------------
/examples/semantic_search/samples/sample-9.md:
--------------------------------------------------------------------------------
1 | # Article 9: Threat Detection and Prevention
2 |
3 | Cybersecurity threat detection employs various technologies to identify malicious activities before they cause damage. Intrusion detection systems monitor network traffic for suspicious patterns, while endpoint protection software guards individual devices. Behavioral analysis identifies anomalies in user activities that may indicate compromised accounts. Security information and event management (SIEM) platforms aggregate and analyze security logs from multiple sources to provide comprehensive threat visibility.
4 |
--------------------------------------------------------------------------------
/examples/semantic_search/samples/sample-16.md:
--------------------------------------------------------------------------------
1 | # Article 16: Social Engineering Attack Vectors
2 |
3 | Social engineering exploits human psychology rather than technical vulnerabilities to gain unauthorized access to systems and information. Common techniques include phishing emails, pretexting phone calls, baiting with infected media, and physical tailgating. Attackers research targets through social media and public information to craft convincing scenarios. Defense requires security awareness training, verification procedures, and creating organizational cultures that encourage reporting suspicious communications.
4 |
--------------------------------------------------------------------------------
/examples/semantic_search/samples/sample-18.md:
--------------------------------------------------------------------------------
1 | # Article 18: Quantum Computing and Cryptography
2 |
3 | Quantum computing poses both opportunities and threats for cybersecurity. Quantum computers could break current cryptographic algorithms like RSA and ECC that secure internet communications and data protection. Organizations must prepare for post-quantum cryptography by implementing quantum-resistant algorithms. However, quantum technologies also enable quantum key distribution for theoretically unbreakable communication channels. The transition period requires careful planning and gradual migration strategies.
4 |
--------------------------------------------------------------------------------
/examples/semantic_search/samples/sample-6.md:
--------------------------------------------------------------------------------
1 | # Article 6: AI Ethics and Bias Mitigation
2 |
3 | Artificial intelligence systems can perpetuate or amplify human biases present in training data, leading to unfair outcomes across different demographic groups. Bias mitigation strategies include diverse dataset collection, algorithmic fairness constraints, and regular bias auditing. Ethical AI development requires transparency, accountability, and stakeholder involvement. Organizations must establish governance frameworks addressing privacy, consent, and algorithmic decision-making impacts on individuals and society.
4 |
--------------------------------------------------------------------------------
/examples/semantic_search/samples/sample-11.md:
--------------------------------------------------------------------------------
1 | # Article 11: Incident Response and Recovery
2 |
3 | Effective incident response requires predefined procedures for detecting, containing, and recovering from security breaches. Response teams follow structured phases: preparation, identification, containment, eradication, recovery, and lessons learned. Critical activities include forensic analysis, stakeholder communication, system restoration, and process improvement. Regular tabletop exercises and response plan updates ensure organizations can quickly minimize damage and restore normal operations after security incidents.
4 |
--------------------------------------------------------------------------------
/examples/semantic_search/samples/sample-14.md:
--------------------------------------------------------------------------------
1 | # Article 14: AI-Driven Security Orchestration
2 |
3 | Security orchestration platforms integrate multiple security tools and automate incident response workflows using artificial intelligence. These systems correlate alerts from various sources, prioritize threats based on risk assessment, and execute automated remediation actions. Natural language processing helps analyze threat intelligence reports, while machine learning improves decision-making accuracy over time. Orchestration reduces response times and analyst workload while maintaining consistent security procedures.
4 |
--------------------------------------------------------------------------------
/examples/semantic_search/samples/sample-15.md:
--------------------------------------------------------------------------------
1 | # Article 15: Advanced Persistent Threats (APTs)
2 |
3 | Advanced persistent threats represent sophisticated, long-term cyberattacks typically conducted by nation-states or organized criminal groups. APTs use multiple attack vectors, maintain persistent access, and employ stealth techniques to avoid detection. Common tactics include spear-phishing, zero-day exploits, living-off-the-land techniques, and lateral movement within networks. Defense requires continuous monitoring, threat hunting, and intelligence-driven security strategies to detect and neutralize these patient adversaries.
4 |
--------------------------------------------------------------------------------
/examples/semantic_search/samples/sample-1.md:
--------------------------------------------------------------------------------
1 | # Article 1: Deep Learning Neural Networks
2 |
3 | Deep learning utilizes artificial neural networks with multiple layers to process and learn from vast amounts of data. These networks automatically discover intricate patterns and representations without manual feature engineering. Convolutional neural networks excel at image recognition tasks, while recurrent neural networks handle sequential data like text and speech. Popular frameworks include TensorFlow, PyTorch, and Keras. Deep learning has revolutionized computer vision, natural language processing, and speech recognition applications.
4 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Build artifacts
2 | build/
3 | dist/
4 | .build
5 | *.a
6 | *.sqlite
7 |
8 | # iOS/macOS
9 | *.xcworkspacedata
10 | *.xcuserstate
11 | *.xcbkptlist
12 | *.plist
13 |
14 | # Android
15 | .gradle/
16 | *.aar
17 | local.properties
18 | jniLibs/
19 | *.apk
20 | *.ap_
21 | *.dex
22 |
23 | # Node.js
24 | node_modules/
25 | package-lock.json
26 | *.tsbuildinfo
27 | coverage/
28 | *.log
29 | npm-debug.log*
30 | yarn-debug.log*
31 | yarn-error.log*
32 | packages/node/platform-packages/
33 | packages/node/test-artifacts/
34 | packages/node/test-output/
35 | packages/node/test-platform-packages/
36 |
37 | # IDE
38 | .vscode
39 | .idea/
40 | *.iml
41 | *.swp
42 | *.swo
43 |
44 | # System
45 | .DS_Store
46 | Thumbs.db
--------------------------------------------------------------------------------
/src/sqlite-vector.h:
--------------------------------------------------------------------------------
1 | //
2 | // sqlite-vector.h
3 | // sqlitevector
4 | //
5 | // Created by Marco Bambini on 06/05/25.
6 | //
7 |
8 | #ifndef __SQLITE_VECTOR__
9 | #define __SQLITE_VECTOR__
10 |
11 | #ifndef SQLITE_CORE
12 | #include "sqlite3ext.h"
13 | #else
14 | #include "sqlite3.h"
15 | #endif
16 |
17 | #ifdef _WIN32
18 | #define SQLITE_VECTOR_API __declspec(dllexport)
19 | #else
20 | #define SQLITE_VECTOR_API
21 | #endif
22 |
23 | #ifdef __cplusplus
24 | extern "C" {
25 | #endif
26 |
27 | #define SQLITE_VECTOR_VERSION "0.9.52"
28 |
29 | SQLITE_VECTOR_API int sqlite3_vector_init (sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi);
30 |
31 | #ifdef __cplusplus
32 | }
33 | #endif
34 |
35 |
36 | #endif
37 |
--------------------------------------------------------------------------------
/packages/node/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target": "ES2020",
4 | "module": "ESNext",
5 | "lib": ["ES2020"],
6 | "moduleResolution": "bundler",
7 | "declaration": true,
8 | "declarationMap": true,
9 | "sourceMap": true,
10 | "outDir": "./dist",
11 | "rootDir": "./src",
12 | "strict": true,
13 | "esModuleInterop": true,
14 | "skipLibCheck": true,
15 | "forceConsistentCasingInFileNames": true,
16 | "resolveJsonModule": true,
17 | "isolatedModules": true,
18 | "noUnusedLocals": true,
19 | "noUnusedParameters": true,
20 | "noFallthroughCasesInSwitch": true,
21 | "allowSyntheticDefaultImports": true
22 | },
23 | "include": ["src/**/*"],
24 | "exclude": ["node_modules", "dist", "**/*.test.ts"]
25 | }
26 |
--------------------------------------------------------------------------------
/packages/swift/extension/vector.swift:
--------------------------------------------------------------------------------
1 | // vector.swift
2 | // This file serves as a placeholder for the vector target.
3 | // The actual SQLite extension is built using the Makefile through the build plugin.
4 |
5 | import Foundation
6 |
7 | /// Placeholder structure for vector
8 | public struct vector {
9 | /// Returns the path to the built vector dylib inside the XCFramework
10 | public static var path: String {
11 | #if os(macOS)
12 | return "vector.xcframework/macos-arm64_x86_64/vector.framework/vector"
13 | #elseif targetEnvironment(simulator)
14 | return "vector.xcframework/ios-arm64_x86_64-simulator/vector.framework/vector"
15 | #else
16 | return "vector.xcframework/ios-arm64/vector.framework/vector"
17 | #endif
18 | }
19 | }
--------------------------------------------------------------------------------
/Package.swift:
--------------------------------------------------------------------------------
1 | // swift-tools-version: 6.1
2 | // The swift-tools-version declares the minimum version of Swift required to build this package.
3 |
4 | import PackageDescription
5 |
6 | let package = Package(
7 | name: "vector",
8 | platforms: [.macOS(.v11), .iOS(.v11)],
9 | products: [
10 | // Products can be used to vend plugins, making them visible to other packages.
11 | .plugin(
12 | name: "vectorPlugin",
13 | targets: ["vectorPlugin"]),
14 | .library(
15 | name: "vector",
16 | targets: ["vector"])
17 | ],
18 | targets: [
19 | // Build tool plugin that invokes the Makefile
20 | .plugin(
21 | name: "vectorPlugin",
22 | capability: .buildTool(),
23 | path: "packages/swift/plugin"
24 | ),
25 | // vector library target
26 | .target(
27 | name: "vector",
28 | dependencies: [],
29 | path: "packages/swift/extension",
30 | plugins: ["vectorPlugin"]
31 | ),
32 | ]
33 | )
--------------------------------------------------------------------------------
/packages/python/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=61.0", "build", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "sqliteai-vector"
7 | dynamic = ["version", "classifiers"]
8 | description = "Python prebuilt binaries for SQLite Vector extension for all supported platforms and architectures."
9 | authors = [
10 | { name = "SQLite AI Team" }
11 | ]
12 | readme = "README.md"
13 | license = "LicenseRef-Elastic-2.0-Modified-For-Open-Source-Use"
14 | license-files = ["LICENSE.md"]
15 | requires-python = ">=3"
16 |
17 | [project.urls]
18 | Homepage = "https://sqlite.ai"
19 | Documentation = "https://github.com/sqliteai/sqlite-vector/blob/main/API.md"
20 | Repository = "https://github.com/sqliteai/sqlite-vector"
21 | Issues = "https://github.com/sqliteai/sqlite-vector/issues"
22 |
23 | [tool.setuptools]
24 | packages = {find = {where = ["src"]}}
25 | include-package-data = true
26 |
27 | [tool.setuptools.dynamic]
28 | version = {attr = "sqlite_vector._version.__version__"}
29 |
30 | [tool.bdist_wheel]
31 | # Force platform-specific wheels
32 | universal = false
33 |
--------------------------------------------------------------------------------
/libs/fp16/macros.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #ifndef FP16_MACROS_H
3 | #define FP16_MACROS_H
4 |
5 | #ifndef FP16_USE_NATIVE_CONVERSION
6 | #if (defined(__INTEL_COMPILER) || defined(__GNUC__)) && defined(__F16C__)
7 | #define FP16_USE_NATIVE_CONVERSION 1
8 | #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && defined(__AVX2__)
9 | #define FP16_USE_NATIVE_CONVERSION 1
10 | #elif defined(_MSC_VER) && defined(_M_ARM64)
11 | #define FP16_USE_NATIVE_CONVERSION 1
12 | #elif defined(__GNUC__) && defined(__aarch64__)
13 | #define FP16_USE_NATIVE_CONVERSION 1
14 | #endif
15 | #if !defined(FP16_USE_NATIVE_CONVERSION)
16 | #define FP16_USE_NATIVE_CONVERSION 0
17 | #endif // !defined(FP16_USE_NATIVE_CONVERSION)
18 | #endif // !define(FP16_USE_NATIVE_CONVERSION)
19 |
20 | #ifndef FP16_USE_FLOAT16_TYPE
21 | #if !defined(__clang__) && !defined(__INTEL_COMPILER) && defined(__GNUC__) && (__GNUC__ >= 12)
22 | #if defined(__F16C__)
23 | #define FP16_USE_FLOAT16_TYPE 1
24 | #endif
25 | #endif
26 | #if !defined(FP16_USE_FLOAT16_TYPE)
27 | #define FP16_USE_FLOAT16_TYPE 0
28 | #endif // !defined(FP16_USE_FLOAT16_TYPE)
29 | #endif // !defined(FP16_USE_FLOAT16_TYPE)
30 |
31 | #ifndef FP16_USE_FP16_TYPE
32 | #if defined(__clang__)
33 | #if defined(__F16C__) || defined(__aarch64__)
34 | #define FP16_USE_FP16_TYPE 1
35 | #endif
36 | #elif defined(__GNUC__)
37 | #if defined(__aarch64__)
38 | #define FP16_USE_FP16_TYPE 1
39 | #endif
40 | #endif
41 | #if !defined(FP16_USE_FP16_TYPE)
42 | #define FP16_USE_FP16_TYPE 0
43 | #endif // !defined(FP16_USE_FP16_TYPE)
44 | #endif // !defined(FP16_USE_FP16_TYPE)
45 |
46 | #endif /* FP16_MACROS_H */
47 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | Elastic License 2.0 (modified for open-source use)
2 |
3 | Copyright © 2025 SQLite Cloud, Inc.
4 |
5 | This software is licensed under the Elastic License 2.0, with the additional grant described below.
6 |
7 | You may not use this file except in compliance with the Elastic License 2.0 and the conditions outlined here.
8 |
9 | You may obtain a copy of the Elastic License 2.0 at:
10 |
11 | ```
12 | https://www.elastic.co/licensing/elastic-license
13 | ```
14 |
15 | Software distributed under the Elastic License is distributed on an "AS IS" BASIS,
16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 |
18 | See the Elastic License 2.0 for the specific language governing permissions and limitations under the license.
19 |
20 | ---
21 |
22 | ## Additional Grant for Open-Source Projects
23 |
24 | In addition to the permissions granted under the Elastic License 2.0:
25 |
26 | * **Free Use in Open-Source Projects**:
27 | You may use, copy, distribute, and prepare derivative works of the software — in source or object form, with or without modification — freely and without fee, provided the software is incorporated into or used by an **open-source project** licensed under an OSI-approved open-source license.
28 |
29 | ---
30 |
31 | ## Conditions
32 |
33 | 1. For **open-source projects**, the software may be used, copied, modified, and distributed without restriction or fee.
34 |
35 | 2. For **non–open-source or commercial production use**, you may use, copy, distribute, and prepare derivative works of the software only with a commercial license from SQLite Cloud, Inc.
36 |
37 | 3. You may not provide the software to third parties as a managed service, such as a hosted or cloud-based service, unless you have a license for that use.
38 |
39 | 4. The software may not be used to circumvent the license grant limitations.
40 |
41 | 5. Any permitted use is subject to compliance with the Elastic License 2.0, this additional grant, and applicable law.
42 |
--------------------------------------------------------------------------------
/packages/python/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | from setuptools import setup
3 | from setuptools.command.bdist_wheel import bdist_wheel
4 |
5 |
6 | class PlatformSpecificWheel(bdist_wheel):
7 | """Custom bdist_wheel to force platform-specific wheel."""
8 |
9 | def finalize_options(self):
10 | bdist_wheel.finalize_options(self)
11 | # Force platform-specific wheel
12 | self.root_is_pure = False
13 |
14 | # Set platform name from environment if provided
15 | plat_name = os.environ.get("PLAT_NAME")
16 | if plat_name:
17 | self.plat_name = plat_name
18 |
19 | def get_tag(self):
20 | # Force platform-specific tags with broader compatibility
21 | python_tag, abi_tag, platform_tag = bdist_wheel.get_tag(self)
22 |
23 | # Override platform tag if specified
24 | plat_name = os.environ.get("PLAT_NAME")
25 | if plat_name:
26 | platform_tag = plat_name
27 |
28 | # Use py3 for broader Python compatibility since we have pre-built binaries
29 | python_tag = "py3"
30 | abi_tag = "none"
31 |
32 | return python_tag, abi_tag, platform_tag
33 |
34 |
35 | def get_platform_classifiers():
36 | """Get platform-specific classifiers based on PLAT_NAME environment variable."""
37 | classifier_map = {
38 | "manylinux2014_x86_64": ["Operating System :: POSIX :: Linux"],
39 | "manylinux2014_aarch64": ["Operating System :: POSIX :: Linux"],
40 | "win_amd64": ["Operating System :: Microsoft :: Windows"],
41 | "macosx_10_9_x86_64": ["Operating System :: MacOS"],
42 | "macosx_11_0_arm64": ["Operating System :: MacOS"],
43 | }
44 |
45 | plat_name = os.environ.get("PLAT_NAME")
46 | if plat_name and plat_name in classifier_map:
47 | return ["Programming Language :: Python :: 3", classifier_map[plat_name][0]]
48 |
49 | raise ValueError(f"Unsupported or missing PLAT_NAME: {plat_name}")
50 |
51 |
52 | if __name__ == "__main__":
53 | setup(
54 | cmdclass={"bdist_wheel": PlatformSpecificWheel},
55 | classifiers=get_platform_classifiers(),
56 | )
57 |
--------------------------------------------------------------------------------
/packages/node/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "@sqliteai/sqlite-vector",
3 | "version": "0.9.45",
4 | "description": "SQLite vector search extension for Node.js - Cross-platform vector embeddings and similarity search",
5 | "main": "./dist/index.js",
6 | "module": "./dist/index.mjs",
7 | "types": "./dist/index.d.ts",
8 | "exports": {
9 | ".": {
10 | "import": {
11 | "types": "./dist/index.d.mts",
12 | "default": "./dist/index.mjs"
13 | },
14 | "require": {
15 | "types": "./dist/index.d.ts",
16 | "default": "./dist/index.js"
17 | }
18 | }
19 | },
20 | "files": [
21 | "dist",
22 | "README.md",
23 | "LICENSE.md"
24 | ],
25 | "scripts": {
26 | "build": "tsup",
27 | "prepublishOnly": "npm run build",
28 | "test": "vitest",
29 | "typecheck": "tsc --noEmit",
30 | "generate-platforms": "node generate-platform-packages.js"
31 | },
32 | "keywords": [
33 | "sqlite",
34 | "vector",
35 | "embedding",
36 | "ai",
37 | "machine-learning",
38 | "similarity-search",
39 | "semantic-search",
40 | "vector-database",
41 | "sqlite-extension"
42 | ],
43 | "author": "Gioele Cantoni (gioele@sqlitecloud.io)",
44 | "license": "SEE LICENSE IN LICENSE.md",
45 | "repository": {
46 | "type": "git",
47 | "url": "https://github.com/sqliteai/sqlite-vector.git",
48 | "directory": "packages/node"
49 | },
50 | "homepage": "https://github.com/sqliteai/sqlite-vector#readme",
51 | "bugs": {
52 | "url": "https://github.com/sqliteai/sqlite-vector/issues"
53 | },
54 | "engines": {
55 | "node": ">=16.0.0"
56 | },
57 | "optionalDependencies": {
58 | "@sqliteai/sqlite-vector-darwin-arm64": "0.9.45",
59 | "@sqliteai/sqlite-vector-darwin-x86_64": "0.9.45",
60 | "@sqliteai/sqlite-vector-linux-arm64": "0.9.45",
61 | "@sqliteai/sqlite-vector-linux-arm64-musl": "0.9.45",
62 | "@sqliteai/sqlite-vector-linux-x86_64": "0.9.45",
63 | "@sqliteai/sqlite-vector-linux-x86_64-musl": "0.9.45",
64 | "@sqliteai/sqlite-vector-win32-x86_64": "0.9.45"
65 | },
66 | "devDependencies": {
67 | "@types/node": "^20.0.0",
68 | "tsup": "^8.0.0",
69 | "typescript": "^5.3.0",
70 | "vitest": "^3.2.4"
71 | }
72 | }
73 |
--------------------------------------------------------------------------------
/packages/python/README.md:
--------------------------------------------------------------------------------
1 | ## SQLite Vector Python package
2 |
3 | This package provides the sqlite-vector extension prebuilt binaries for multiple platforms and architectures.
4 |
5 | ### SQLite Vector
6 |
7 | SQLite Vector is a cross-platform, ultra-efficient SQLite extension that brings vector search capabilities to your embedded database. It works seamlessly on iOS, Android, Windows, Linux, and macOS, using just 30MB of memory by default. With support for Float32, Float16, BFloat16, Int8, and UInt8, and highly optimized distance functions, it's the ideal solution for Edge AI applications.
8 |
9 | More details on the official repository [sqliteai/sqlite-vector](https://github.com/sqliteai/sqlite-vector).
10 |
11 | ### Documentation
12 |
13 | For detailed information on all available functions, their parameters, and examples, refer to the [comprehensive API Reference](https://github.com/sqliteai/sqlite-vector/blob/main/API.md).
14 |
15 | ### Supported Platforms and Architectures
16 |
17 | | Platform | Arch | Subpackage name | Binary name |
18 | | ------------- | ------------ | ------------------------ | ------------ |
19 | | Linux (CPU) | x86_64/arm64 | sqlite_vector.binaries | vector.so |
20 | | Windows (CPU) | x86_64 | sqlite_vector.binaries | vector.dll |
21 | | macOS (CPU) | x86_64/arm64 | sqlite_vector.binaries | vector.dylib |
22 |
23 | ## Usage
24 |
25 | > **Note:** Some SQLite installations on certain operating systems may have extension loading disabled by default.
26 | If you encounter issues loading the extension, refer to the [sqlite-extensions-guide](https://github.com/sqliteai/sqlite-extensions-guide/) for platform-specific instructions on enabling and using SQLite extensions.
27 |
28 | ```python
29 | import importlib.resources
30 | import sqlite3
31 |
32 | # Connect to your SQLite database
33 | conn = sqlite3.connect("example.db")
34 |
35 | # Load the sqlite-vector extension
36 | # pip will install the correct binary package for your platform and architecture
37 | ext_path = importlib.resources.files("sqlite_vector.binaries") / "vector"
38 |
39 | conn.enable_load_extension(True)
40 | conn.load_extension(str(ext_path))
41 | conn.enable_load_extension(False)
42 |
43 |
44 | # Now you can use sqlite-vector features in your SQL queries
45 | print(conn.execute("SELECT vector_version();").fetchone())
46 | ```
--------------------------------------------------------------------------------
/examples/semantic_search/README.md:
--------------------------------------------------------------------------------
1 | # Semantic Search Example with sqlite-vector
2 |
3 | This example in Python demonstrates how to build a semantic search engine using the [sqlite-vector](https://github.com/sqliteai/sqlite-vector) extension and a Sentence Transformer model. It allows you to index and search documents using vector similarity, powered by a local LLM embedding model.
4 |
5 | ### How it works
6 |
7 | - **Embeddings**: Uses [sentence-transformers](https://huggingface.co/sentence-transformers) to generate dense vector representations (embeddings) for text. The default model is [`all-MiniLM-L6-v2`](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2), a fast, lightweight model (384 dimensions) suitable for semantic search and retrieval tasks.
8 | - **Vector Store and Search**: Embeddings are stored in SQLite using the [`sqlite-vector`](https://github.com/sqliteai/sqlite-vector) extension, enabling fast similarity search (cosine distance) directly in the database.
9 | - **Sample Data**: The `samples/` directory contains example documents you can index and search immediately.
10 |
11 | ### Installation
12 |
13 | 1. Download the `sqlite-vector` extension for your platform [here](https://github.com/sqliteai/sqlite-vector/releases).
14 |
15 | 2. Extract the `vector.so` file in the main directory of the project.
16 |
17 | 3. Install the dependencies:
18 |
19 |
20 | ```bash
21 | $ python -m venv venv
22 |
23 | $ source venv/bin/activate
24 |
25 | $ pip install -r requirements.txt
26 | ```
27 |
28 | 4. On first use, the required model will be downloaded automatically.
29 |
30 | ### Usage
31 |
32 | Use the interactive mode to keep the model in memory and run multiple queries efficiently:
33 |
34 | ```bash
35 | python semsearch.py --repl
36 |
37 | # Index a directory of documents
38 | semsearch> index ./samples
39 |
40 | # Search for similar documents
41 | semsearch> search "neural network architectures for image recognition"
42 | ```
43 |
44 | ### Example Queries
45 |
46 | Try these queries to test semantic similarity:
47 |
48 | - "neural network architectures for image recognition"
49 | - "reinforcement learning in autonomous systems"
50 | - "explainable artificial intelligence methods"
51 | - "AI governance and regulatory compliance"
52 | - "network intrusion detection systems"
53 |
54 | **Note:**
55 | - Supported extension are `.md`, `.txt`, `.py`, `.js`, `.html`, `.css`, `.sql`, `.json`, `.xml`.
56 | - For more details, see the code in `semsearch.py` and `semantic_search.py`.
--------------------------------------------------------------------------------
/packages/swift/plugin/vector.swift:
--------------------------------------------------------------------------------
1 | import PackagePlugin
2 | import Foundation
3 |
4 | @main
5 | struct vector: BuildToolPlugin {
6 | /// Entry point for creating build commands for targets in Swift packages.
7 | func createBuildCommands(context: PluginContext, target: Target) async throws -> [Command] {
8 | let packageDirectory = context.package.directoryURL
9 | let outputDirectory = context.pluginWorkDirectoryURL
10 | return createvectorBuildCommands(packageDirectory: packageDirectory, outputDirectory: outputDirectory)
11 | }
12 | }
13 |
14 | #if canImport(XcodeProjectPlugin)
15 | import XcodeProjectPlugin
16 |
17 | extension vector: XcodeBuildToolPlugin {
18 | // Entry point for creating build commands for targets in Xcode projects.
19 | func createBuildCommands(context: XcodePluginContext, target: XcodeTarget) throws -> [Command] {
20 | let outputDirectory = context.pluginWorkDirectoryURL
21 | return createvectorBuildCommands(packageDirectory: nil, outputDirectory: outputDirectory)
22 | }
23 | }
24 |
25 | #endif
26 |
27 | /// Shared function to create vector build commands
28 | func createvectorBuildCommands(packageDirectory: URL?, outputDirectory: URL) -> [Command] {
29 |
30 | // For Xcode projects, use current directory; for Swift packages, use provided packageDirectory
31 | let workingDirectory = packageDirectory?.path ?? "$(pwd)"
32 | let packageDirInfo = packageDirectory != nil ? "Package directory: \(packageDirectory!.path)" : "Working directory: $(pwd)"
33 |
34 | return [
35 | .prebuildCommand(
36 | displayName: "Building vector XCFramework",
37 | executable: URL(fileURLWithPath: "/bin/bash"),
38 | arguments: [
39 | "-c",
40 | """
41 | set -e
42 | echo "Starting vector XCFramework prebuild..."
43 | echo "\(packageDirInfo)"
44 |
45 | # Clean and create output directory
46 | rm -rf "\(outputDirectory.path)"
47 | mkdir -p "\(outputDirectory.path)"
48 |
49 | # Build directly from source directory with custom output paths
50 | cd "\(workingDirectory)" && \
51 | echo "Building XCFramework..." && \
52 | make xcframework DIST_DIR="\(outputDirectory.path)" BUILD_DIR="\(outputDirectory.path)/build" && \
53 | rm -rf "\(outputDirectory.path)/build" && \
54 | echo "XCFramework build completed successfully!"
55 | """
56 | ],
57 | outputFilesDirectory: outputDirectory
58 | )
59 | ]
60 | }
--------------------------------------------------------------------------------
/packages/node/src/index.test.ts:
--------------------------------------------------------------------------------
1 | import { describe, it, expect } from 'vitest';
2 | import {
3 | getCurrentPlatform,
4 | getPlatformPackageName,
5 | getBinaryName,
6 | isMusl,
7 | getExtensionPath,
8 | getExtensionInfo,
9 | ExtensionNotFoundError
10 | } from './index';
11 |
12 | describe('Platform Detection', () => {
13 | it('getCurrentPlatform() returns a valid platform', () => {
14 | const platform = getCurrentPlatform();
15 | const validPlatforms = [
16 | 'darwin-arm64',
17 | 'darwin-x86_64',
18 | 'linux-arm64',
19 | 'linux-arm64-musl',
20 | 'linux-x86_64',
21 | 'linux-x86_64-musl',
22 | 'win32-x86_64',
23 | ];
24 |
25 | expect(validPlatforms).toContain(platform);
26 | });
27 |
28 | it('getPlatformPackageName() returns correct package name format', () => {
29 | const packageName = getPlatformPackageName();
30 |
31 | expect(packageName.startsWith('@sqliteai/sqlite-vector-')).toBe(true);
32 |
33 | expect(packageName).toMatch(
34 | /^@sqliteai\/sqlite-vector-(darwin|linux|win32)-(arm64|x86_64)(-musl)?$/
35 | );
36 | });
37 |
38 | it('getBinaryName() returns correct extension', () => {
39 | const binaryName = getBinaryName();
40 |
41 | expect(binaryName).toMatch(
42 | /^vector\.(dylib|so|dll)$/
43 | );
44 | });
45 |
46 | it('isMusl() returns a boolean', () => {
47 | expect(typeof isMusl()).toBe('boolean');
48 | });
49 | });
50 |
51 | describe('Extension Path Resolution', () => {
52 | it('getExtensionPath() returns a string or throws', () => {
53 | try {
54 | const path = getExtensionPath();
55 | expect(typeof path).toBe('string');
56 | expect(path.length).toBeGreaterThan(0);
57 | } catch (error) {
58 | expect(error instanceof ExtensionNotFoundError).toBe(true);
59 | }
60 | });
61 |
62 | it('getExtensionInfo() returns complete info object', () => {
63 | try {
64 | const info = getExtensionInfo();
65 |
66 | expect(info.platform).toBeTruthy();
67 | expect(info.packageName).toBeTruthy();
68 | expect(info.binaryName).toBeTruthy();
69 | expect(info.path).toBeTruthy();
70 |
71 | expect(typeof info.platform).toBe('string');
72 | expect(typeof info.packageName).toBe('string');
73 | expect(typeof info.binaryName).toBe('string');
74 | expect(typeof info.path).toBe('string');
75 | } catch (error) {
76 | expect(error instanceof ExtensionNotFoundError).toBe(true);
77 | }
78 | });
79 | });
80 |
81 | describe('Error Handling', () => {
82 | it('ExtensionNotFoundError has correct properties', () => {
83 | const error = new ExtensionNotFoundError('Test message');
84 |
85 | expect(error instanceof Error).toBe(true);
86 | expect(error.name).toBe('ExtensionNotFoundError');
87 | expect(error.message).toBe('Test message');
88 | });
89 | });
--------------------------------------------------------------------------------
/libs/fp16/bitcasts.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #ifndef FP16_BITCASTS_H
3 | #define FP16_BITCASTS_H
4 |
5 | #if defined(__cplusplus) && (__cplusplus >= 201103L)
6 | #include
7 | #elif !defined(__OPENCL_VERSION__)
8 | #include
9 | #endif
10 |
11 | #if defined(__INTEL_COMPILER)
12 | #include
13 | #endif
14 |
15 | #if defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64))
16 | #include
17 | #endif
18 |
19 |
20 | static inline float fp32_from_bits(uint32_t w) {
21 | #if defined(__OPENCL_VERSION__)
22 | return as_float(w);
23 | #elif defined(__CUDA_ARCH__)
24 | return __uint_as_float((unsigned int) w);
25 | #elif defined(__INTEL_COMPILER) || defined(_MSC_VER) && (_MSC_VER >= 1932) && (defined(_M_IX86) || defined(_M_X64))
26 | return _castu32_f32(w);
27 | #elif defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64))
28 | return _CopyFloatFromInt32((__int32) w);
29 | #else
30 | union {
31 | uint32_t as_bits;
32 | float as_value;
33 | } fp32 = { w };
34 | return fp32.as_value;
35 | #endif
36 | }
37 |
38 | static inline uint32_t fp32_to_bits(float f) {
39 | #if defined(__OPENCL_VERSION__)
40 | return as_uint(f);
41 | #elif defined(__CUDA_ARCH__)
42 | return (uint32_t) __float_as_uint(f);
43 | #elif defined(__INTEL_COMPILER) || defined(_MSC_VER) && (_MSC_VER >= 1932) && (defined(_M_IX86) || defined(_M_X64))
44 | return _castf32_u32(f);
45 | #elif defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64))
46 | return (uint32_t) _CopyInt32FromFloat(f);
47 | #else
48 | union {
49 | float as_value;
50 | uint32_t as_bits;
51 | } fp32 = { f };
52 | return fp32.as_bits;
53 | #endif
54 | }
55 |
56 | static inline double fp64_from_bits(uint64_t w) {
57 | #if defined(__OPENCL_VERSION__)
58 | return as_double(w);
59 | #elif defined(__CUDA_ARCH__)
60 | return __longlong_as_double((long long) w);
61 | #elif defined(__INTEL_COMPILER) || defined(_MSC_VER) && (_MSC_VER >= 1932) && (defined(_M_IX86) || defined(_M_X64))
62 | return _castu64_f64(w);
63 | #elif defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64))
64 | return _CopyDoubleFromInt64((__int64) w);
65 | #else
66 | union {
67 | uint64_t as_bits;
68 | double as_value;
69 | } fp64 = { w };
70 | return fp64.as_value;
71 | #endif
72 | }
73 |
74 | static inline uint64_t fp64_to_bits(double f) {
75 | #if defined(__OPENCL_VERSION__)
76 | return as_ulong(f);
77 | #elif defined(__CUDA_ARCH__)
78 | return (uint64_t) __double_as_longlong(f);
79 | #elif defined(__INTEL_COMPILER) || defined(_MSC_VER) && (_MSC_VER >= 1932) && (defined(_M_IX86) || defined(_M_X64))
80 | return _castf64_u64(f);
81 | #elif defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64))
82 | return (uint64_t) _CopyInt64FromDouble(f);
83 | #else
84 | union {
85 | double as_value;
86 | uint64_t as_bits;
87 | } fp64 = { f };
88 | return fp64.as_bits;
89 | #endif
90 | }
91 |
92 | #endif /* FP16_BITCASTS_H */
93 |
--------------------------------------------------------------------------------
/packages/python/download_artifacts.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import zipfile
3 | import requests
4 | from pathlib import Path
5 | import shutil
6 |
7 |
8 | # == USAGE ==
9 | # python3 download_artifacts.py PLATFORM VERSION
10 | # eg: python3 download_artifacts.py linux_x86_64 "0.5.9"
11 |
12 | REPO = "sqliteai/sqlite-vector"
13 | RELEASE_URL = f"https://github.com/{REPO}/releases/download"
14 |
15 | # Map Python plat_name to artifact names
16 | ARTIFACTS = {
17 | "manylinux2014_x86_64": ["vector-linux-x86_64"],
18 | "manylinux2014_aarch64": [
19 | "vector-linux-arm64",
20 | ],
21 | "win_amd64": ["vector-windows-x86_64"],
22 | "macosx_10_9_x86_64": ["vector-macos"],
23 | "macosx_11_0_arm64": ["vector-macos"],
24 | }
25 |
26 | BINARY_NAME = {
27 | "manylinux2014_x86_64": "vector.so",
28 | "manylinux2014_aarch64": "vector.so",
29 | "win_amd64": "vector.dll",
30 | "macosx_10_9_x86_64": "vector.dylib",
31 | "macosx_11_0_arm64": "vector.dylib",
32 | }
33 |
34 | BINARIES_DIR = Path(__file__).parent / "src/sqlite_vector/binaries"
35 |
36 |
37 | def download_and_extract(artifact_name, bin_name, version):
38 | artifact = f"{artifact_name}-{version}.zip"
39 | url = f"{RELEASE_URL}/{version}/{artifact}"
40 | print(f"Downloading {url}")
41 |
42 | r = requests.get(url)
43 | if r.status_code != 200:
44 | print(f"Failed to download {artifact}: {r.status_code}")
45 | sys.exit(1)
46 |
47 | zip_path = BINARIES_DIR / artifact
48 | with open(zip_path, "wb") as f:
49 | f.write(r.content)
50 |
51 | out_dir = BINARIES_DIR
52 | out_dir.mkdir(parents=True, exist_ok=True)
53 |
54 | with zipfile.ZipFile(zip_path, "r") as zip_ref:
55 | for member in zip_ref.namelist():
56 | if member.endswith(bin_name):
57 | zip_ref.extract(member, out_dir)
58 |
59 | # Move to expected name/location
60 | src = out_dir / member
61 | dst = out_dir / bin_name
62 | src.rename(dst)
63 |
64 | print(f"Extracted {dst}")
65 |
66 | zip_path.unlink()
67 |
68 |
69 | def main():
70 | version = None
71 | platform = None
72 | if len(sys.argv) == 3:
73 | platform = sys.argv[1].lower()
74 | version = sys.argv[2]
75 |
76 | if not version or not platform:
77 | print(
78 | 'Error: Version is not specified.\nUsage: \n python3 download_artifacts.py linux_x86_64 "0.5.9"'
79 | )
80 | sys.exit(1)
81 |
82 | print(BINARIES_DIR)
83 | if BINARIES_DIR.exists():
84 | shutil.rmtree(BINARIES_DIR)
85 | BINARIES_DIR.mkdir(parents=True, exist_ok=True)
86 |
87 | platform_artifacts = ARTIFACTS.get(platform, [])
88 | if not platform_artifacts:
89 | print(f"Error: Unknown platform '{platform}'")
90 | sys.exit(1)
91 |
92 | for artifact_name in platform_artifacts:
93 | download_and_extract(artifact_name, BINARY_NAME[platform], version)
94 |
95 |
96 | if __name__ == "__main__":
97 | main()
98 |
--------------------------------------------------------------------------------
/QUANTIZATION.md:
--------------------------------------------------------------------------------
1 | ### Vector Quantization for High Performance
2 |
3 | `sqlite-vector` supports **vector quantization**, a powerful technique to significantly accelerate vector search while reducing memory usage. You can quantize your vectors with:
4 |
5 | ```sql
6 | SELECT vector_quantize('my_table', 'my_column');
7 | ```
8 |
9 | To further boost performance, quantized vectors can be **preloaded in memory** using:
10 |
11 | ```sql
12 | SELECT vector_quantize_preload('my_table', 'my_column');
13 | ```
14 |
15 | This can result in a **4×–5× speedup** on nearest neighbor queries while keeping memory usage low.
16 |
17 | #### What is Quantization?
18 |
19 | Quantization compresses high-dimensional float vectors (e.g., `FLOAT32`) into compact representations using lower-precision formats (e.g., `UINT8`). This drastically reduces the size of the data—often by a factor of 4 to 8—making it practical to load large datasets entirely in memory, even on edge devices.
20 |
21 | #### Why is it Important?
22 |
23 | * **Faster Searches**: With preloaded quantized vectors, distance computations are up to 5× faster.
24 | * **Lower Memory Footprint**: Quantized vectors use significantly less RAM, allowing millions of vectors to fit in memory.
25 | * **Edge-ready**: The reduced size and in-memory access make this ideal for mobile, embedded, and on-device AI applications.
26 |
27 | #### Estimate Memory Usage
28 |
29 | Before preloading quantized vectors, you can **estimate the memory required** using:
30 |
31 | ```sql
32 | SELECT vector_quantize_memory('my_table', 'my_column');
33 | ```
34 |
35 | This gives you an approximate number of bytes needed to load the quantized vectors into memory.
36 |
37 | #### Accuracy You Can Trust
38 |
39 | Despite the compression, our quantization algorithms are finely tuned to maintain high accuracy. You can expect **recall rates greater than 0.95**, ensuring that approximate searches closely match exact results in quality.
40 |
41 | #### Measuring Recall in SQLite-Vector
42 |
43 | You can evaluate the recall of quantized search compared to exact search using a single SQL query. For example, assuming a table `vec_examples` with an `embedding` column, use:
44 |
45 | ```sql
46 | WITH
47 | exact_knn AS (
48 | SELECT e.rowid
49 | FROM vec_examples AS e
50 | JOIN vector_full_scan('vec_examples', 'embedding', ?1, ?2) AS v
51 | ON e.rowid = v.rowid
52 | ),
53 | approx_knn AS (
54 | SELECT e.rowid
55 | FROM vec_examples AS e
56 | JOIN vector_quantize_scan('vec_examples', 'embedding', ?1, ?2) AS v
57 | ON e.rowid = v.rowid
58 | ),
59 | matches AS (
60 | SELECT COUNT(*) AS match_count
61 | FROM exact_knn
62 | WHERE rowid IN (SELECT rowid FROM approx_knn)
63 | ),
64 | total AS (
65 | SELECT COUNT(*) AS total_count
66 | FROM exact_knn
67 | )
68 | SELECT
69 | (SELECT match_count FROM matches) AS match_count,
70 | (SELECT total_count FROM total) AS total_count,
71 | CAST((SELECT match_count FROM matches) AS FLOAT) /
72 | CAST((SELECT total_count FROM total) AS FLOAT) AS recall;
73 | ```
74 |
75 | Where `?1` is the input vector (as a BLOB) and `?2` is the number of nearest neighbors `k`.
76 | This query compares exact and quantized results and computes the recall ratio, helping you validate the quality of quantized search.
77 |
--------------------------------------------------------------------------------
/packages/android/gradlew.bat:
--------------------------------------------------------------------------------
1 | @rem
2 | @rem Copyright 2015 the original author or authors.
3 | @rem
4 | @rem Licensed under the Apache License, Version 2.0 (the "License");
5 | @rem you may not use this file except in compliance with the License.
6 | @rem You may obtain a copy of the License at
7 | @rem
8 | @rem https://www.apache.org/licenses/LICENSE-2.0
9 | @rem
10 | @rem Unless required by applicable law or agreed to in writing, software
11 | @rem distributed under the License is distributed on an "AS IS" BASIS,
12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | @rem See the License for the specific language governing permissions and
14 | @rem limitations under the License.
15 | @rem
16 | @rem SPDX-License-Identifier: Apache-2.0
17 | @rem
18 |
19 | @if "%DEBUG%"=="" @echo off
20 | @rem ##########################################################################
21 | @rem
22 | @rem Gradle startup script for Windows
23 | @rem
24 | @rem ##########################################################################
25 |
26 | @rem Set local scope for the variables with windows NT shell
27 | if "%OS%"=="Windows_NT" setlocal
28 |
29 | set DIRNAME=%~dp0
30 | if "%DIRNAME%"=="" set DIRNAME=.
31 | @rem This is normally unused
32 | set APP_BASE_NAME=%~n0
33 | set APP_HOME=%DIRNAME%
34 |
35 | @rem Resolve any "." and ".." in APP_HOME to make it shorter.
36 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
37 |
38 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
39 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
40 |
41 | @rem Find java.exe
42 | if defined JAVA_HOME goto findJavaFromJavaHome
43 |
44 | set JAVA_EXE=java.exe
45 | %JAVA_EXE% -version >NUL 2>&1
46 | if %ERRORLEVEL% equ 0 goto execute
47 |
48 | echo. 1>&2
49 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2
50 | echo. 1>&2
51 | echo Please set the JAVA_HOME variable in your environment to match the 1>&2
52 | echo location of your Java installation. 1>&2
53 |
54 | goto fail
55 |
56 | :findJavaFromJavaHome
57 | set JAVA_HOME=%JAVA_HOME:"=%
58 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe
59 |
60 | if exist "%JAVA_EXE%" goto execute
61 |
62 | echo. 1>&2
63 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2
64 | echo. 1>&2
65 | echo Please set the JAVA_HOME variable in your environment to match the 1>&2
66 | echo location of your Java installation. 1>&2
67 |
68 | goto fail
69 |
70 | :execute
71 | @rem Setup the command line
72 |
73 |
74 |
75 | @rem Execute Gradle
76 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -jar "%APP_HOME%\gradle\wrapper\gradle-wrapper.jar" %*
77 |
78 | :end
79 | @rem End local scope for the variables with windows NT shell
80 | if %ERRORLEVEL% equ 0 goto mainEnd
81 |
82 | :fail
83 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
84 | rem the _cmd.exe /c_ return code!
85 | set EXIT_CODE=%ERRORLEVEL%
86 | if %EXIT_CODE% equ 0 set EXIT_CODE=1
87 | if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE%
88 | exit /b %EXIT_CODE%
89 |
90 | :mainEnd
91 | if "%OS%"=="Windows_NT" endlocal
92 |
93 | :omega
94 |
--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
1 | name: Build and Publish Python Package
2 |
3 | on:
4 | workflow_dispatch:
5 | inputs:
6 | version:
7 | description: "Version to use for the Python package (e.g. 0.9.9)"
8 | required: true
9 | type: string
10 | test-pypi:
11 | description: "Publish to Test PyPI"
12 | required: false
13 | type: boolean
14 | default: false
15 |
16 | workflow_run:
17 | workflows: ["Build, Test and Release"]
18 | types:
19 | - completed
20 |
21 | jobs:
22 | build-and-publish:
23 | if: |
24 | github.event_name == 'workflow_dispatch' ||
25 | (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.head_branch == 'main')
26 | runs-on: ${{ matrix.os }}
27 | permissions:
28 | id-token: write # mandatory for Pypi trusted publishing
29 | strategy:
30 | matrix:
31 | include:
32 | - os: ubuntu-latest
33 | platform: linux
34 | python-version: "3.10"
35 | arch: x86_64
36 | plat_name: manylinux2014_x86_64
37 | - os: ubuntu-latest
38 | platform: linux
39 | python-version: "3.10"
40 | arch: arm64
41 | plat_name: manylinux2014_aarch64
42 | - os: ubuntu-latest
43 | platform: windows
44 | python-version: "3.10"
45 | arch: x86_64
46 | plat_name: win_amd64
47 | - os: ubuntu-latest
48 | platform: macos
49 | python-version: "3.10"
50 | arch: x86_64
51 | plat_name: macosx_10_9_x86_64
52 | - os: ubuntu-latest
53 | platform: macos
54 | python-version: "3.10"
55 | arch: arm64
56 | plat_name: macosx_11_0_arm64
57 | defaults:
58 | run:
59 | shell: bash
60 | steps:
61 | - uses: actions/checkout@v4
62 | with:
63 | submodules: false
64 |
65 | - name: Set up Python
66 | uses: actions/setup-python@v5
67 | with:
68 | python-version: ${{ matrix.python-version }}
69 |
70 | - name: Install build dependencies
71 | run: |
72 | cd packages/python
73 | python3 -m pip install --upgrade pip
74 | python3 -m pip install -r requirements-dev.txt
75 |
76 | - name: Get version
77 | id: get_version
78 | run: |
79 | if [[ "${{ github.event_name }}" == "workflow_run" ]]; then
80 | # Fetch latest published release tag from GitHub API
81 | VERSION=$(curl -s "https://api.github.com/repos/${{ github.repository }}/releases/latest" | jq -r '.tag_name')
82 | if [ "$VERSION" = "null" ] || [ -z "$VERSION" ]; then
83 | echo "Error: Failed to get latest release version"
84 | exit 1
85 | fi
86 | else
87 | VERSION="${{ github.event.inputs.version }}"
88 | fi
89 | VERSION=${VERSION#v}
90 | echo "version=$VERSION" >> $GITHUB_OUTPUT
91 |
92 | - name: Download artifacts for current platform
93 | run: |
94 | cd packages/python
95 | python3 download_artifacts.py "${{ matrix.plat_name }}" "${{ steps.get_version.outputs.version }}"
96 |
97 | - name: Build wheel
98 | env:
99 | PACKAGE_VERSION: ${{ steps.get_version.outputs.version }}
100 | PLAT_NAME: ${{ matrix.plat_name }}
101 | run: |
102 | cd packages/python
103 | python -m build --wheel
104 |
105 | - name: Publish to PyPI
106 | uses: pypa/gh-action-pypi-publish@release/v1
107 | with:
108 | packages-dir: packages/python/dist
109 | verbose: true
110 | # Avoid workflow to fail if the version has already been published
111 | skip-existing: true
112 | # Upload to Test Pypi for testing
113 | repository-url: ${{ github.event.inputs.test-pypi == 'true' && 'https://test.pypi.org/legacy/' || '' }}
114 |
--------------------------------------------------------------------------------
/packages/android/build.gradle:
--------------------------------------------------------------------------------
1 | buildscript {
2 | repositories {
3 | google()
4 | mavenCentral()
5 | }
6 | dependencies {
7 | classpath 'com.android.tools.build:gradle:8.5.2'
8 | }
9 | }
10 |
11 | plugins {
12 | id 'com.gradleup.nmcp.aggregation' version '1.2.0'
13 | }
14 |
15 | apply plugin: 'com.android.library'
16 | apply plugin: 'maven-publish'
17 | apply plugin: 'signing'
18 |
19 | android {
20 | namespace 'ai.sqlite.vector'
21 | compileSdk 34
22 |
23 | defaultConfig {
24 | minSdk 26
25 | targetSdk 34
26 | }
27 |
28 | buildTypes {
29 | release {
30 | minifyEnabled false
31 | }
32 | }
33 |
34 | compileOptions {
35 | sourceCompatibility JavaVersion.VERSION_1_8
36 | targetCompatibility JavaVersion.VERSION_1_8
37 | }
38 |
39 | sourceSets {
40 | main {
41 | jniLibs.srcDirs = ['src/main/jniLibs']
42 | }
43 | }
44 | }
45 |
46 | repositories {
47 | google()
48 | mavenCentral()
49 | maven { url 'https://jitpack.io' }
50 | }
51 |
52 | dependencies {
53 | }
54 |
55 | afterEvaluate {
56 | publishing {
57 | publications {
58 | release(MavenPublication) {
59 | groupId = 'ai.sqlite'
60 | artifactId = 'vector'
61 | version = project.hasProperty('VERSION') ? project.VERSION : ['make', 'version'].execute(null, file('../..')).text.trim()
62 |
63 | artifact(project.hasProperty('AAR_PATH') ? project.AAR_PATH : "$buildDir/outputs/aar/android-release.aar")
64 |
65 | // Maven Central metadata
66 | pom {
67 | name = 'sqlite-vector'
68 | description = 'A cross-platform, ultra-efficient SQLite extension that brings vector search capabilities to your embedded database. Works seamlessly on iOS, Android, Windows, Linux, and macOS, using just 30MB of memory by default.'
69 | url = 'https://github.com/sqliteai/sqlite-vector'
70 |
71 | licenses {
72 | license {
73 | name = 'Elastic License 2.0'
74 | url = 'https://www.elastic.co/licensing/elastic-license'
75 | }
76 | }
77 |
78 | developers {
79 | developer {
80 | id = 'sqliteai'
81 | name = 'SQLite Cloud, Inc.'
82 | email = 'info@sqlitecloud.io'
83 | organization = 'SQLite Cloud, Inc.'
84 | organizationUrl = 'https://sqlite.ai'
85 | }
86 | }
87 |
88 | scm {
89 | connection = 'scm:git:git://github.com/sqliteai/sqlite-vector.git'
90 | developerConnection = 'scm:git:ssh://github.com:sqliteai/sqlite-vector.git'
91 | url = 'https://github.com/sqliteai/sqlite-vector/tree/main'
92 | }
93 | }
94 | }
95 | }
96 | }
97 |
98 | // Signing configuration for Maven Central
99 | signing {
100 | required { project.hasProperty("SIGNING_KEY") }
101 | if (project.hasProperty("SIGNING_KEY")) {
102 | useInMemoryPgpKeys(
103 | project.property("SIGNING_KEY").toString(),
104 | project.property("SIGNING_PASSWORD").toString()
105 | )
106 | sign publishing.publications.release
107 | }
108 | }
109 | }
110 |
111 | // Maven Central publishing via NMCP aggregation
112 | nmcpAggregation {
113 | if (project.hasProperty("SONATYPE_USERNAME") && project.hasProperty("SONATYPE_PASSWORD")) {
114 | centralPortal {
115 | username = project.property("SONATYPE_USERNAME")
116 | password = project.property("SONATYPE_PASSWORD")
117 | publishingType = "AUTOMATIC"
118 | }
119 | publishAllProjectsProbablyBreakingProjectIsolation()
120 | }
121 | }
--------------------------------------------------------------------------------
/packages/node/src/platform.ts:
--------------------------------------------------------------------------------
1 | import { platform, arch } from 'node:os';
2 | import { existsSync, readFileSync } from 'node:fs';
3 | import { execSync } from 'node:child_process';
4 |
5 | /**
6 | * Supported platform identifiers
7 | */
8 | export type Platform =
9 | | 'darwin-arm64'
10 | | 'darwin-x86_64'
11 | | 'linux-arm64'
12 | | 'linux-arm64-musl'
13 | | 'linux-x86_64'
14 | | 'linux-x86_64-musl'
15 | | 'win32-x86_64';
16 |
17 | /**
18 | * Binary extension for each platform
19 | */
20 | export const PLATFORM_EXTENSIONS: Record = {
21 | darwin: '.dylib',
22 | linux: '.so',
23 | win32: '.dll',
24 | } as const;
25 |
26 | /**
27 | * Detects if the system uses musl libc (Alpine Linux, etc.)
28 | * Uses multiple detection strategies for reliability
29 | */
30 | export function isMusl(): boolean {
31 | // Only relevant for Linux
32 | if (platform() !== 'linux') {
33 | return false;
34 | }
35 |
36 | // Strategy 1: Check for musl-specific files
37 | const muslFiles = [
38 | '/lib/ld-musl-x86_64.so.1',
39 | '/lib/ld-musl-aarch64.so.1',
40 | '/lib/ld-musl-armhf.so.1',
41 | ];
42 |
43 | for (const file of muslFiles) {
44 | if (existsSync(file)) {
45 | return true;
46 | }
47 | }
48 |
49 | // Strategy 2: Check ldd version output
50 | try {
51 | const lddVersion = execSync('ldd --version 2>&1', {
52 | encoding: 'utf-8',
53 | stdio: ['pipe', 'pipe', 'pipe'],
54 | });
55 |
56 | if (lddVersion.includes('musl')) {
57 | return true;
58 | }
59 | } catch {
60 | // ldd command failed, continue to next strategy
61 | }
62 |
63 | // Strategy 3: Check /etc/os-release for Alpine
64 | try {
65 | if (existsSync('/etc/os-release')) {
66 | const osRelease = readFileSync('/etc/os-release', 'utf-8');
67 | if (osRelease.includes('Alpine') || osRelease.includes('musl')) {
68 | return true;
69 | }
70 | }
71 | } catch {
72 | // File read failed, continue to next strategy
73 | }
74 |
75 | // Strategy 4: Check process.report.getReport() for musl
76 | try {
77 | const report = (process as any).report?.getReport?.();
78 | if (report?.header?.glibcVersionRuntime === '') {
79 | // Empty glibc version often indicates musl
80 | return true;
81 | }
82 | } catch {
83 | // Report not available
84 | }
85 |
86 | return false;
87 | }
88 |
89 | /**
90 | * Gets the current platform identifier
91 | * @throws {Error} If the platform is unsupported
92 | */
93 | export function getCurrentPlatform(): Platform {
94 | const platformName = platform();
95 | const archName = arch();
96 |
97 | // macOS
98 | if (platformName === 'darwin') {
99 | if (archName === 'arm64') return 'darwin-arm64';
100 | if (archName === 'x64' || archName === 'ia32') return 'darwin-x86_64';
101 | }
102 |
103 | // Linux (with musl detection)
104 | if (platformName === 'linux') {
105 | const muslSuffix = isMusl() ? '-musl' : '';
106 |
107 | if (archName === 'arm64') {
108 | return `linux-arm64${muslSuffix}` as Platform;
109 | }
110 | if (archName === 'x64' || archName === 'ia32') {
111 | return `linux-x86_64${muslSuffix}` as Platform;
112 | }
113 | }
114 |
115 | // Windows
116 | if (platformName === 'win32') {
117 | if (archName === 'x64' || archName === 'ia32') return 'win32-x86_64';
118 | }
119 |
120 | // Unsupported platform
121 | throw new Error(
122 | `Unsupported platform: ${platformName}-${archName}. ` +
123 | `Supported platforms: darwin-arm64, darwin-x86_64, linux-arm64, linux-x86_64, win32-x86_64 ` +
124 | `(with glibc or musl support for Linux)`
125 | );
126 | }
127 |
128 | /**
129 | * Gets the package name for the current platform
130 | */
131 | export function getPlatformPackageName(): string {
132 | const currentPlatform = getCurrentPlatform();
133 | return `@sqliteai/sqlite-vector-${currentPlatform}`;
134 | }
135 |
136 | /**
137 | * Gets the binary filename for the current platform
138 | */
139 | export function getBinaryName(): string {
140 | const platformName = platform();
141 | const extension = PLATFORM_EXTENSIONS[platformName];
142 |
143 | if (!extension) {
144 | throw new Error(`Unknown platform: ${platformName}`);
145 | }
146 |
147 | return `vector${extension}`;
148 | }
149 |
--------------------------------------------------------------------------------
/src/distance-cpu.h:
--------------------------------------------------------------------------------
1 | //
2 | // distance-cpu.h
3 | // sqlitevector
4 | //
5 | // Created by Marco Bambini on 20/06/25.
6 | //
7 |
8 | #ifndef __VECTOR_DISTANCE_CPU__
9 | #define __VECTOR_DISTANCE_CPU__
10 |
11 | #include "fp16/fp16.h"
12 | #include
13 | #include
14 | #include
15 |
16 | // Detect builtin bit_cast
17 | #ifndef HAVE_BUILTIN_BIT_CAST
18 | /* Only use __builtin_bit_cast if the compiler has it AND
19 | we're compiling as C++ (GCC 11+) or as a C standard that supports it (C23+). */
20 | #if defined(__has_builtin)
21 | #if __has_builtin(__builtin_bit_cast)
22 | #if defined(__cplusplus) || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L)
23 | #define HAVE_BUILTIN_BIT_CAST 1
24 | #endif
25 | #endif
26 | #endif
27 |
28 | /* GCC note: in GCC 11–13, __builtin_bit_cast exists for C++ but NOT for C. */
29 | #if !defined(HAVE_BUILTIN_BIT_CAST) && defined(__GNUC__) && !defined(__clang__) && defined(__cplusplus)
30 | #if __GNUC__ >= 11
31 | #define HAVE_BUILTIN_BIT_CAST 1
32 | #endif
33 | #endif
34 | #endif
35 |
36 | typedef enum {
37 | VECTOR_TYPE_F32 = 1,
38 | VECTOR_TYPE_F16,
39 | VECTOR_TYPE_BF16,
40 | VECTOR_TYPE_U8,
41 | VECTOR_TYPE_I8
42 | } vector_type;
43 | #define VECTOR_TYPE_MAX 6
44 |
45 | typedef enum {
46 | VECTOR_QUANT_AUTO = 0,
47 | VECTOR_QUANT_U8BIT = 1,
48 | VECTOR_QUANT_S8BIT = 2
49 | } vector_qtype;
50 |
51 | typedef enum {
52 | VECTOR_DISTANCE_L2 = 1,
53 | VECTOR_DISTANCE_SQUARED_L2,
54 | VECTOR_DISTANCE_COSINE,
55 | VECTOR_DISTANCE_DOT,
56 | VECTOR_DISTANCE_L1,
57 | } vector_distance;
58 | #define VECTOR_DISTANCE_MAX 6
59 |
60 | typedef float (*distance_function_t)(const void *v1, const void *v2, int n);
61 |
62 | // ENTRYPOINT
63 | void init_distance_functions (bool force_cpu);
64 |
65 | // MARK: - FLOAT16/BFLOAT16 -
66 | // typedef uint16_t bfloat16_t; // don't typedef to bfloat16_t to avoid mix with ’s native bfloat16_t
67 |
68 | // float <-> uint32_t bit casts
69 | static inline uint32_t f32_to_bits (float f) {
70 | #if defined(HAVE_BUILTIN_BIT_CAST)
71 | return __builtin_bit_cast(uint32_t, f);
72 | #else
73 | union { float f; uint32_t u; } v = { .f = f };
74 | return v.u;
75 | #endif
76 | }
77 |
78 | static inline float bits_to_f32 (uint32_t u) {
79 | #if defined(HAVE_BUILTIN_BIT_CAST)
80 | return __builtin_bit_cast(float, u);
81 | #else
82 | union { uint32_t u; float f; } v = { .u = u };
83 | return v.f;
84 | #endif
85 | }
86 |
87 | // bfloat16 (stored as uint16_t) -> float32, and back (RNE)
88 | static inline bool bfloat16_is_nan(uint16_t h) { /* exp==0xFF && frac!=0 */
89 | return ((h & 0x7F80u) == 0x7F80u) && ((h & 0x007Fu) != 0);
90 | }
91 | static inline bool bfloat16_is_inf(uint16_t h) { /* exp==0xFF && frac==0 */
92 | return ((h & 0x7F80u) == 0x7F80u) && ((h & 0x007Fu) == 0);
93 | }
94 | static inline bool bfloat16_is_zero(uint16_t h) { /* ±0 */
95 | return (h & 0x7FFFu) == 0;
96 | }
97 | static inline int bfloat16_sign(uint16_t h) {
98 | return (h >> 15) & 1;
99 | }
100 | static inline float bfloat16_to_float32(uint16_t bf) {
101 | return bits_to_f32((uint32_t)bf << 16);
102 | }
103 | static inline uint16_t float32_to_bfloat16(float f) {
104 | uint32_t x = f32_to_bits(f);
105 | uint32_t lsb = (x >> 16) & 1u; /* ties-to-even */
106 | uint32_t rnd = 0x7FFFu + lsb;
107 | return (uint16_t)((x + rnd) >> 16);
108 | }
109 |
110 | // ---- float16 (binary16) classifiers (work on raw uint16_t bits)
111 | static inline bool f16_is_nan(uint16_t h) { /* exp==0x1F && frac!=0 */
112 | return ( (h & 0x7C00u) == 0x7C00u ) && ((h & 0x03FFu) != 0);
113 | }
114 | static inline bool f16_is_inf(uint16_t h) { /* exp==0x1F && frac==0 */
115 | return ( (h & 0x7C00u) == 0x7C00u ) && ((h & 0x03FFu) == 0);
116 | }
117 | static inline int f16_sign(uint16_t h) {
118 | return (h >> 15) & 1;
119 | }
120 | static inline bool f16_is_zero(uint16_t h) { /* ±0 */
121 | return (h & 0x7FFFu) == 0;
122 | }
123 | static inline uint16_t float32_to_float16 (float f) {
124 | return fp16_ieee_from_fp32_value(f);
125 | }
126 | static inline float float16_to_float32 (uint16_t h) {
127 | return fp16_ieee_to_fp32_value(h);
128 | }
129 |
130 | #endif
131 |
--------------------------------------------------------------------------------
/packages/node/src/index.ts:
--------------------------------------------------------------------------------
1 | import { resolve } from 'node:path';
2 | import { existsSync } from 'node:fs';
3 | import {
4 | getCurrentPlatform,
5 | getPlatformPackageName,
6 | getBinaryName,
7 | type Platform
8 | } from './platform.js';
9 |
10 | /**
11 | * Error thrown when the SQLite Vector extension cannot be found
12 | */
13 | export class ExtensionNotFoundError extends Error {
14 | constructor(message: string) {
15 | super(message);
16 | this.name = 'ExtensionNotFoundError';
17 | }
18 | }
19 |
20 | /**
21 | * Attempts to load the platform-specific package
22 | * @returns The path to the extension binary, or null if not found
23 | */
24 | function tryLoadPlatformPackage(): string | null {
25 | try {
26 | const packageName = getPlatformPackageName();
27 |
28 | // Try to dynamically import the platform package
29 | // This works in both CommonJS and ESM
30 | const platformPackage = require(packageName);
31 |
32 | if (platformPackage?.path && typeof platformPackage.path === 'string') {
33 | if (existsSync(platformPackage.path)) {
34 | return platformPackage.path;
35 | }
36 | }
37 | } catch (error) {
38 | // Platform package not installed or failed to load
39 | // This is expected when optionalDependencies fail
40 | }
41 |
42 | return null;
43 | }
44 |
45 | /**
46 | * Gets the absolute path to the SQLite Vector extension binary for the current platform
47 | *
48 | * @returns Absolute path to the extension binary (.so, .dylib, or .dll)
49 | * @throws {ExtensionNotFoundError} If the extension binary cannot be found
50 | *
51 | * @example
52 | * ```typescript
53 | * import { getExtensionPath } from '@sqliteai/sqlite-vector';
54 | *
55 | * const extensionPath = getExtensionPath();
56 | * // On macOS ARM64: /path/to/node_modules/@sqliteai/sqlite-vector-darwin-arm64/vector.dylib
57 | * ```
58 | */
59 | export function getExtensionPath(): string {
60 | // Try to load from platform-specific package
61 | const platformPath = tryLoadPlatformPackage();
62 | if (platformPath) {
63 | return resolve(platformPath);
64 | }
65 |
66 | // If we reach here, the platform package wasn't installed
67 | const currentPlatform = getCurrentPlatform();
68 | const packageName = getPlatformPackageName();
69 |
70 | throw new ExtensionNotFoundError(
71 | `SQLite Vector extension not found for platform: ${currentPlatform}\n\n` +
72 | `The platform-specific package "${packageName}" is not installed.\n` +
73 | `This usually happens when:\n` +
74 | ` 1. Your platform is not supported\n` +
75 | ` 2. npm failed to install optional dependencies\n` +
76 | ` 3. You're installing with --no-optional flag\n\n` +
77 | `Try running: npm install --force`
78 | );
79 | }
80 |
81 | /**
82 | * Information about the current platform and extension
83 | */
84 | export interface ExtensionInfo {
85 | /** Current platform identifier (e.g., 'darwin-arm64') */
86 | platform: Platform;
87 | /** Name of the platform-specific npm package */
88 | packageName: string;
89 | /** Filename of the binary (e.g., 'vector.dylib') */
90 | binaryName: string;
91 | /** Full path to the extension binary */
92 | path: string;
93 | }
94 |
95 | /**
96 | * Gets detailed information about the SQLite Vector extension
97 | *
98 | * @returns Extension information object
99 | *
100 | * @example
101 | * ```typescript
102 | * import { getExtensionInfo } from '@sqliteai/sqlite-vector';
103 | *
104 | * const info = getExtensionInfo();
105 | * console.log(info);
106 | * // {
107 | * // platform: 'darwin-arm64',
108 | * // packageName: '@sqliteai/sqlite-vector-darwin-arm64',
109 | * // binaryName: 'vector.dylib',
110 | * // path: '/path/to/vector.dylib'
111 | * // }
112 | * ```
113 | */
114 | export function getExtensionInfo(): ExtensionInfo {
115 | return {
116 | platform: getCurrentPlatform(),
117 | packageName: getPlatformPackageName(),
118 | binaryName: getBinaryName(),
119 | path: getExtensionPath(),
120 | };
121 | }
122 |
123 | // Default export for CommonJS compatibility
124 | export default {
125 | getExtensionPath,
126 | getExtensionInfo,
127 | ExtensionNotFoundError,
128 | };
129 |
130 | // Re-export platform utilities
131 | export { getCurrentPlatform, getPlatformPackageName, getBinaryName, isMusl } from './platform.js';
132 | export type { Platform } from './platform.js';
133 |
--------------------------------------------------------------------------------
/examples/semantic_search/semsearch.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Semantic Search CLI Tool using SQLite + sqlite-vec + sentence-transformers
4 | Usage:
5 | semsearch "query text" # Search for similar documents
6 | semsearch -i /path/to/documents # Index documents from directory
7 | semsearch -i /path/to/file.txt # Index single file
8 | """
9 |
10 | import argparse
11 | import os
12 | import sys
13 |
14 | from semantic_search import SemanticSearch
15 |
16 |
17 | def main():
18 | parser = argparse.ArgumentParser(
19 | description="Semantic search using SQLite + sqlite-vector",
20 | formatter_class=argparse.RawDescriptionHelpFormatter,
21 | epilog="""
22 | Examples:
23 | semsearch "machine learning algorithms"
24 | semsearch -i /path/to/documents
25 | semsearch -i document.txt
26 | semsearch --stats
27 | """
28 | )
29 |
30 | parser.add_argument("query", nargs="?", help="Search query")
31 | parser.add_argument("-i", "--index", metavar="PATH",
32 | help="Index file or directory")
33 | parser.add_argument("--limit", type=int, default=5,
34 | help="Number of results to return (default: 5)")
35 | parser.add_argument("--db", default="semsearch.db",
36 | help="Database file path (default: semsearch.db)")
37 | parser.add_argument("--model", default="all-MiniLM-L6-v2",
38 | help="Sentence transformer model (default: all-MiniLM-L6-v2)")
39 | parser.add_argument("--stats", action="store_true",
40 | help="Show database statistics")
41 | parser.add_argument("--repl", action="store_true",
42 | help="Run in interactive (keep model in memory)")
43 |
44 | args = parser.parse_args()
45 |
46 | if not any([args.query, args.index, args.stats, args.repl]):
47 | parser.print_help()
48 | return
49 |
50 | searcher = SemanticSearch(args.db, args.model)
51 |
52 | try:
53 | if args.stats:
54 | searcher.stats()
55 |
56 | elif args.index:
57 | if os.path.isdir(args.index):
58 | total = searcher.index_directory(args.index)
59 | print(f"Total chunks indexed: {total}")
60 | else:
61 | searcher.index_file(args.index)
62 |
63 | elif args.query:
64 | elapsed_ms, results = searcher.search(args.query, args.limit)
65 |
66 | if not results:
67 | print("No results found.")
68 | return
69 |
70 | print(f"Results for: '{args.query}' in {elapsed_ms}ms\n")
71 | for i, (filepath, content, similarity) in enumerate(results, 1):
72 | print(f"{i}. {filepath} (similarity: {similarity:.3f})")
73 | # Show first 200 chars of content
74 | preview = content[:200] + \
75 | "..." if len(content) > 200 else content
76 | print(f" {preview}\n")
77 |
78 | if args.repl:
79 | print("Entering interactive mode (keep the model in memory).\nType 'help' for commands, 'exit' to quit.")
80 | while True:
81 | try:
82 | cmd = input("semsearch> ").strip()
83 | if not cmd:
84 | continue
85 | if cmd in {"exit", "quit"}:
86 | break
87 | if cmd == "help":
88 | print(
89 | "Commands: search , index , stats, exit")
90 | continue
91 | if cmd.startswith("search "):
92 | query = cmd[len("search "):].strip()
93 | elapsed_ms, results = searcher.search(
94 | query, args.limit)
95 | if not results:
96 | print("No results found.")
97 | continue
98 | print(f"Results for: '{query}' in {elapsed_ms}ms\n")
99 | for i, (filepath, content, similarity) in enumerate(results, 1):
100 | print(
101 | f"{i}. {filepath} (similarity: {similarity:.3f})")
102 | preview = content[:200] + \
103 | ("..." if len(content) > 200 else "")
104 | print(f" {preview}\n")
105 | continue
106 | if cmd.startswith("index "):
107 | path = cmd[len("index "):].strip()
108 | if os.path.isdir(path):
109 | total = searcher.index_directory(path)
110 | print(f"Total chunks indexed: {total}")
111 | else:
112 | searcher.index_file(path)
113 | continue
114 | if cmd == "stats":
115 | searcher.stats()
116 | continue
117 | print("Unknown command. Type 'help' for available commands.")
118 | except KeyboardInterrupt:
119 | print("\nExiting REPL.")
120 | break
121 | except Exception as e:
122 | print(f"Error: {e}")
123 |
124 | if searcher:
125 | searcher.close()
126 | return
127 |
128 | except KeyboardInterrupt:
129 | print("\nOperation cancelled.")
130 | except Exception as e:
131 | print(f"Error: {e}")
132 | sys.exit(1)
133 | finally:
134 | if searcher:
135 | searcher.close()
136 |
137 |
138 | if __name__ == "__main__":
139 | main()
140 |
--------------------------------------------------------------------------------
/packages/node/README.md:
--------------------------------------------------------------------------------
1 | # @sqliteai/sqlite-vector
2 |
3 | [](https://badge.fury.io/js/@sqliteai%2Fsqlite-vector)
4 | [](LICENSE.md)
5 |
6 | > SQLite Vector extension packaged for Node.js
7 |
8 | **SQLite Vector** is a cross-platform, ultra-efficient SQLite extension that brings vector search capabilities to your embedded database. It works seamlessly on **iOS, Android, Windows, Linux, and macOS**, using just **30MB of memory** by default. With support for **Float32, Float16, BFloat16, Int8, and UInt8**, and **highly optimized distance functions**, it's the ideal solution for **Edge AI** applications.
9 |
10 | ## Features
11 |
12 | - ✅ **Cross-platform** - Works on macOS, Linux (glibc/musl), and Windows
13 | - ✅ **Zero configuration** - Automatically detects and loads the correct binary for your platform
14 | - ✅ **TypeScript native** - Full type definitions included
15 | - ✅ **Modern ESM + CJS** - Works with both ES modules and CommonJS
16 | - ✅ **Small footprint** - Only downloads binaries for your platform
17 | - ✅ **Offline-ready** - No external services required
18 |
19 | ## Installation
20 |
21 | ```bash
22 | npm install @sqliteai/sqlite-vector
23 | ```
24 |
25 | The package automatically downloads the correct native extension for your platform during installation.
26 |
27 | ### Supported Platforms
28 |
29 | | Platform | Architecture | Package |
30 | |----------|-------------|---------|
31 | | macOS | ARM64 (Apple Silicon) | `@sqliteai/sqlite-vector-darwin-arm64` |
32 | | macOS | x86_64 (Intel) | `@sqliteai/sqlite-vector-darwin-x86_64` |
33 | | Linux | ARM64 (glibc) | `@sqliteai/sqlite-vector-linux-arm64` |
34 | | Linux | ARM64 (musl/Alpine) | `@sqliteai/sqlite-vector-linux-arm64-musl` |
35 | | Linux | x86_64 (glibc) | `@sqliteai/sqlite-vector-linux-x86_64` |
36 | | Linux | x86_64 (musl/Alpine) | `@sqliteai/sqlite-vector-linux-x86_64-musl` |
37 | | Windows | x86_64 | `@sqliteai/sqlite-vector-win32-x86_64` |
38 |
39 | ## sqlite-vector API
40 |
41 | For detailed information on how to use the vector extension features, see the [main documentation](https://github.com/sqliteai/sqlite-vector/blob/main/README.md).
42 |
43 | ## Usage
44 |
45 | ```typescript
46 | import { getExtensionPath } from '@sqliteai/sqlite-vector';
47 | import Database from 'better-sqlite3';
48 |
49 | const db = new Database(':memory:');
50 | db.loadExtension(getExtensionPath());
51 |
52 | // Ready to use
53 | const version = db.prepare('SELECT vector_version()').pluck().get();
54 | console.log('Vector extension version:', version);
55 | ```
56 |
57 | ## Examples
58 |
59 | For complete, runnable examples, see the [sqlite-extensions-guide](https://github.com/sqliteai/sqlite-extensions-guide/tree/main/examples/node).
60 |
61 | These examples are generic and work with all SQLite extensions: `sqlite-vector`, `sqlite-sync`, `sqlite-js`, and `sqlite-ai`.
62 |
63 | ## API Reference
64 |
65 | ### `getExtensionPath(): string`
66 |
67 | Returns the absolute path to the SQLite Vector extension binary for the current platform.
68 |
69 | **Returns:** `string` - Absolute path to the extension file (`.so`, `.dylib`, or `.dll`)
70 |
71 | **Throws:** `ExtensionNotFoundError` - If the extension binary cannot be found for the current platform
72 |
73 | **Example:**
74 | ```typescript
75 | import { getExtensionPath } from '@sqliteai/sqlite-vector';
76 |
77 | const path = getExtensionPath();
78 | // => '/path/to/node_modules/@sqliteai/sqlite-vector-darwin-arm64/vector.dylib'
79 | ```
80 |
81 | ---
82 |
83 | ### `getExtensionInfo(): ExtensionInfo`
84 |
85 | Returns detailed information about the extension for the current platform.
86 |
87 | **Returns:** `ExtensionInfo` object with the following properties:
88 | - `platform: Platform` - Current platform identifier (e.g., `'darwin-arm64'`)
89 | - `packageName: string` - Name of the platform-specific npm package
90 | - `binaryName: string` - Filename of the binary (e.g., `'vector.dylib'`)
91 | - `path: string` - Full path to the extension binary
92 |
93 | **Throws:** `ExtensionNotFoundError` - If the extension binary cannot be found
94 |
95 | **Example:**
96 | ```typescript
97 | import { getExtensionInfo } from '@sqliteai/sqlite-vector';
98 |
99 | const info = getExtensionInfo();
100 | console.log(`Running on ${info.platform}`);
101 | console.log(`Extension path: ${info.path}`);
102 | ```
103 |
104 | ---
105 |
106 | ### `getCurrentPlatform(): Platform`
107 |
108 | Returns the current platform identifier.
109 |
110 | **Returns:** `Platform` - One of:
111 | - `'darwin-arm64'` - macOS ARM64
112 | - `'darwin-x86_64'` - macOS x86_64
113 | - `'linux-arm64'` - Linux ARM64 (glibc)
114 | - `'linux-arm64-musl'` - Linux ARM64 (musl)
115 | - `'linux-x86_64'` - Linux x86_64 (glibc)
116 | - `'linux-x86_64-musl'` - Linux x86_64 (musl)
117 | - `'win32-x86_64'` - Windows x86_64
118 |
119 | **Throws:** `Error` - If the platform is unsupported
120 |
121 | ---
122 |
123 | ### `isMusl(): boolean`
124 |
125 | Detects if the system uses musl libc (Alpine Linux, etc.).
126 |
127 | **Returns:** `boolean` - `true` if musl is detected, `false` otherwise
128 |
129 | ---
130 |
131 | ### `class ExtensionNotFoundError extends Error`
132 |
133 | Error thrown when the SQLite Vector extension cannot be found for the current platform.
134 |
135 | ## Related Projects
136 |
137 | - **[@sqliteai/sqlite-ai](https://www.npmjs.com/package/@sqliteai/sqlite-ai)** - On-device AI inference and embedding generation
138 | - **[@sqliteai/sqlite-sync](https://www.npmjs.com/package/@sqliteai/sqlite-sync)** - Sync on-device databases with the cloud
139 | - **[@sqliteai/sqlite-js](https://www.npmjs.com/package/@sqliteai/sqlite-js)** - Define SQLite functions in JavaScript
140 |
141 | ## License
142 |
143 | This project is licensed under the [Elastic License 2.0](LICENSE.md).
144 |
145 | For production or managed service use, please [contact SQLite Cloud, Inc](mailto:info@sqlitecloud.io) for a commercial license.
146 |
147 | ## Contributing
148 |
149 | Contributions are welcome! Please see the [main repository](https://github.com/sqliteai/sqlite-vector) to open an issue.
150 |
151 | ## Support
152 |
153 | - 📖 [Documentation](https://github.com/sqliteai/sqlite-vector/blob/main/API.md)
154 | - 🐛 [Report Issues](https://github.com/sqliteai/sqlite-vector/issues)
155 |
--------------------------------------------------------------------------------
/examples/semantic_search/semantic_search.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import sqlite3
4 | import sys
5 | import time
6 | from pathlib import Path
7 | from typing import List, Tuple
8 |
9 | from sentence_transformers import SentenceTransformer
10 |
11 |
12 | class SemanticSearch:
13 | def __init__(self, db_path: str = "semsearch.db", model_name: str = "all-MiniLM-L6-v2"):
14 | self.db_path = db_path
15 | self.model_name = model_name
16 | self.model = None
17 | self.conn = None
18 |
19 | def _get_model(self):
20 | """Lazy load the sentence transformer model"""
21 | if self.model is None:
22 | print(f"Loading model {self.model_name}...")
23 | self.model = SentenceTransformer(self.model_name)
24 | return self.model
25 |
26 | def _get_connection(self):
27 | """Get database connection, load SQLite Vector extension
28 | and ensure schema is created"""
29 | if self.conn is None:
30 | self.conn = sqlite3.connect(self.db_path)
31 |
32 | self.conn.enable_load_extension(True)
33 | self.conn.load_extension("./vector.so")
34 | self.conn.enable_load_extension(False)
35 |
36 | # Check if sqlite-vector is available
37 | try:
38 | self.conn.execute("SELECT vector_version()")
39 | except sqlite3.OperationalError:
40 | print("Error: sqlite-vector extension not found.")
41 | print(
42 | "Download it from https://github.com/sqliteai/sqlite-vector/releases")
43 | sys.exit(1)
44 |
45 | self._create_schema()
46 | return self.conn
47 |
48 | def _create_schema(self):
49 | """Create the documents table with vector support"""
50 | conn = self._get_connection()
51 | cursor = conn.cursor()
52 |
53 | # Create documents table
54 | cursor.execute("""
55 | CREATE TABLE IF NOT EXISTS documents (
56 | id INTEGER PRIMARY KEY AUTOINCREMENT,
57 | filepath TEXT NOT NULL,
58 | content TEXT NOT NULL,
59 | embedding BLOB,
60 | created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
61 | )
62 | """)
63 |
64 | # Create vector table using sqlite-vector extension
65 | # The default model 'all-MiniLM-L6-v2' produces 384-dimensional embeddings
66 |
67 | # Initialize the vector
68 | cursor.execute("""
69 | SELECT vector_init('documents', 'embedding', 'type=FLOAT32,dimension=384');
70 | """)
71 |
72 | conn.commit()
73 |
74 | def _chunk_text(self, text: str, chunk_size: int = 250, overlap: int = 50) -> List[str]:
75 | """Split text into overlapping chunks for better semantic search"""
76 | words = text.split()
77 | chunks = []
78 |
79 | for i in range(0, len(words), chunk_size - overlap):
80 | chunk = ' '.join(words[i:i + chunk_size])
81 | chunk = chunk.strip()
82 | if chunk:
83 | chunks.append(chunk)
84 |
85 | # Return original if no chunks created
86 | return chunks if chunks else [text]
87 |
88 | def index_file(self, filepath: str) -> int:
89 | """Index a single file and return number of chunks processed"""
90 | if not os.path.exists(filepath):
91 | print(f"File not found: {filepath}")
92 | return 0
93 |
94 | model = self._get_model()
95 | conn = self._get_connection()
96 |
97 | cursor = conn.execute(
98 | "SELECT id FROM documents WHERE filepath = ?", (filepath,))
99 | if cursor.fetchone() is not None:
100 | print(f"File already indexed: {filepath}")
101 | return 0
102 |
103 | try:
104 | with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
105 | content = f.read().strip()
106 | except Exception as e:
107 | print(f"Error reading {filepath}: {e}")
108 | return 0
109 |
110 | if not content:
111 | print(f"Empty file: {filepath}")
112 | return 0
113 |
114 | cursor = conn.cursor()
115 |
116 | # Split content into chunks.
117 | # The default model truncates text after 256 word pieces
118 | chunks = self._chunk_text(content)
119 | chunk_count = 0
120 |
121 | for chunk in chunks:
122 | # Generate embedding and insert into database
123 | embedding = model.encode(chunk)
124 | embedding_json = json.dumps(embedding.tolist())
125 |
126 | cursor.execute(
127 | "INSERT INTO documents (filepath, content, embedding) VALUES (?, ?, vector_as_f32(?))",
128 | (filepath, chunk, embedding_json)
129 | )
130 | chunk_count += 1
131 |
132 | conn.commit()
133 |
134 | # Perform quantization on the vector column
135 | cursor.execute("""
136 | SELECT vector_quantize('documents', 'embedding');
137 | """)
138 |
139 | print(f"Indexed {filepath}: {chunk_count} chunks")
140 | return chunk_count
141 |
142 | def index_directory(self, directory: str) -> int:
143 | """Index all text files in a directory"""
144 | total_chunks = 0
145 | text_extensions = {'.txt', '.md', '.mdx', '.py', '.js',
146 | '.html', '.css', '.sql', '.json', '.xml'}
147 |
148 | for root, _, files in os.walk(directory):
149 | for file in files:
150 | if Path(file).suffix.lower() in text_extensions:
151 | filepath = os.path.join(root, file)
152 | total_chunks += self.index_file(filepath)
153 |
154 | return total_chunks
155 |
156 | def search(self, query: str, limit: int = 3) -> Tuple[float, List[Tuple[str, str, float]]]:
157 | """Search for similar documents"""
158 | model = self._get_model()
159 | conn = self._get_connection()
160 |
161 | # Generate query embedding
162 | query_embedding = model.encode(query)
163 | query_json = json.dumps(query_embedding.tolist())
164 |
165 | # Search using sqlite-vec cosine similarity
166 | cursor = conn.cursor()
167 | start_time = time.time()
168 | cursor.execute("""
169 | SELECT d.id, d.filepath, d.content, v.distance
170 | FROM documents AS d
171 | JOIN vector_quantize_scan('documents', 'embedding', vector_as_f32(?), ?) AS v
172 | ON d.id = v.rowid;
173 | """, (query_json, limit))
174 | elapsed_ms = round((time.time() - start_time) * 1000, 2)
175 |
176 | results = []
177 | for id, filepath, content, distance in cursor.fetchall():
178 | results.append((filepath, content, distance))
179 |
180 | return (elapsed_ms, results)
181 |
182 | def stats(self):
183 | """Print database statistics"""
184 | conn = self._get_connection()
185 | cursor = conn.cursor()
186 |
187 | cursor.execute("SELECT COUNT(*) FROM documents")
188 | doc_count = cursor.fetchone()[0]
189 |
190 | cursor.execute("SELECT COUNT(DISTINCT filepath) FROM documents")
191 | file_count = cursor.fetchone()[0]
192 |
193 | print(f"Database: {self.db_path}")
194 | print(f"Files indexed: {file_count}")
195 | print(f"Document chunks: {doc_count}")
196 |
197 | def close(self):
198 | """Close the database connection"""
199 | if self.conn:
200 | self.conn.close()
201 | self.conn = None
202 | print("Database connection closed.")
203 |
--------------------------------------------------------------------------------
/packages/node/generate-platform-packages.js:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 |
3 | /**
4 | * Generates platform-specific packages dynamically
5 | *
6 | * This script creates npm packages for each platform from templates,
7 | * eliminating the need to maintain nearly-identical files in the repo.
8 | *
9 | * Usage:
10 | * node generate-platform-packages.js
11 | *
12 | * Example:
13 | * node generate-platform-packages.js 0.9.45 ./artifacts ./platform-packages
14 | */
15 |
16 | const fs = require('fs');
17 | const path = require('path');
18 |
19 | // Platform configuration
20 | const PLATFORMS = [
21 | {
22 | name: 'darwin-arm64',
23 | os: ['darwin'],
24 | cpu: ['arm64'],
25 | description: 'SQLite Vector extension for macOS ARM64 (Apple Silicon)',
26 | binaryName: 'vector.dylib',
27 | artifactFolder: 'vector-macos-arm64',
28 | },
29 | {
30 | name: 'darwin-x86_64',
31 | os: ['darwin'],
32 | cpu: ['x64', 'ia32'],
33 | description: 'SQLite Vector extension for macOS x86_64 (Intel)',
34 | binaryName: 'vector.dylib',
35 | artifactFolder: 'vector-macos-x86_64',
36 | },
37 | {
38 | name: 'linux-arm64',
39 | os: ['linux'],
40 | cpu: ['arm64'],
41 | description: 'SQLite Vector extension for Linux ARM64 (glibc)',
42 | binaryName: 'vector.so',
43 | artifactFolder: 'vector-linux-arm64',
44 | },
45 | {
46 | name: 'linux-arm64-musl',
47 | os: ['linux'],
48 | cpu: ['arm64'],
49 | description: 'SQLite Vector extension for Linux ARM64 (musl)',
50 | binaryName: 'vector.so',
51 | artifactFolder: 'vector-linux-musl-arm64',
52 | },
53 | {
54 | name: 'linux-x86_64',
55 | os: ['linux'],
56 | cpu: ['x64', 'ia32'],
57 | description: 'SQLite Vector extension for Linux x86_64 (glibc)',
58 | binaryName: 'vector.so',
59 | artifactFolder: 'vector-linux-x86_64',
60 | },
61 | {
62 | name: 'linux-x86_64-musl',
63 | os: ['linux'],
64 | cpu: ['x64', 'ia32'],
65 | description: 'SQLite Vector extension for Linux x86_64 (musl)',
66 | binaryName: 'vector.so',
67 | artifactFolder: 'vector-linux-musl-x86_64',
68 | },
69 | {
70 | name: 'win32-x86_64',
71 | os: ['win32'],
72 | cpu: ['x64', 'ia32'],
73 | description: 'SQLite Vector extension for Windows x86_64',
74 | binaryName: 'vector.dll',
75 | artifactFolder: 'vector-windows-x86_64',
76 | },
77 | ];
78 |
79 | /**
80 | * Generate package.json for a platform
81 | */
82 | function generatePackageJson(platform, version) {
83 | return {
84 | name: `@sqliteai/sqlite-vector-${platform.name}`,
85 | version: version,
86 | description: platform.description,
87 | main: 'index.js',
88 | os: platform.os,
89 | cpu: platform.cpu,
90 | files: [
91 | platform.binaryName,
92 | 'index.js',
93 | 'README.md',
94 | 'LICENSE.md',
95 | ],
96 | keywords: [
97 | 'sqlite',
98 | 'vector',
99 | ...platform.name.split('-'),
100 | ],
101 | author: 'Gioele Cantoni (gioele@sqlitecloud.io)',
102 | license: 'SEE LICENSE IN LICENSE.md',
103 | repository: {
104 | type: 'git',
105 | url: 'https://github.com/sqliteai/sqlite-vector.git',
106 | directory: 'packages/node',
107 | },
108 | engines: {
109 | node: '>=16.0.0',
110 | },
111 | };
112 | }
113 |
114 | /**
115 | * Generate index.js for a platform
116 | */
117 | function generateIndexJs(platform) {
118 | return `const { join } = require('path');
119 |
120 | module.exports = {
121 | path: join(__dirname, '${platform.binaryName}')
122 | };
123 | `;
124 | }
125 |
126 | /**
127 | * Generate README.md for a platform
128 | */
129 | function generateReadme(platform, version) {
130 | return `# @sqliteai/sqlite-vector-${platform.name}
131 |
132 | ${platform.description}
133 |
134 | **Version:** ${version}
135 |
136 | This is a platform-specific package for [@sqliteai/sqlite-vector](https://www.npmjs.com/package/@sqliteai/sqlite-vector).
137 |
138 | It is installed automatically as an optional dependency and should not be installed directly.
139 |
140 | ## Installation
141 |
142 | Install the main package instead:
143 |
144 | \`\`\`bash
145 | npm install @sqliteai/sqlite-vector
146 | \`\`\`
147 |
148 | ## Platform
149 |
150 | - **OS:** ${platform.os.join(', ')}
151 | - **CPU:** ${platform.cpu.join(', ')}
152 | - **Binary:** ${platform.binaryName}
153 |
154 | ## License
155 |
156 | See [LICENSE.md](./LICENSE.md) in the root directory.
157 | `;
158 | }
159 |
160 | /**
161 | * Main function
162 | */
163 | function main() {
164 | const args = process.argv.slice(2);
165 |
166 | if (args.length < 3) {
167 | console.error('Usage: node generate-platform-packages.js ');
168 | console.error('Example: node generate-platform-packages.js 0.9.45 ./artifacts ./platform-packages');
169 | process.exit(1);
170 | }
171 |
172 | const [version, artifactsDir, outputDir] = args;
173 |
174 | // Find LICENSE.md (should be in repo root)
175 | const licensePath = path.resolve(__dirname, '../../LICENSE.md');
176 | if (!fs.existsSync(licensePath)) {
177 | console.error(`Error: LICENSE.md not found at ${licensePath}`);
178 | process.exit(1);
179 | }
180 |
181 | // Validate version format
182 | if (!/^\d+\.\d+\.\d+$/.test(version)) {
183 | console.error(`Error: Invalid version format: ${version}`);
184 | console.error('Version must be in semver format (e.g., 0.9.45)');
185 | process.exit(1);
186 | }
187 |
188 | console.log(`Generating platform packages version ${version}...\n`);
189 |
190 | // Create output directory
191 | if (!fs.existsSync(outputDir)) {
192 | fs.mkdirSync(outputDir, { recursive: true });
193 | }
194 |
195 | let successCount = 0;
196 | let errorCount = 0;
197 |
198 | // Generate each platform package
199 | for (const platform of PLATFORMS) {
200 | const platformDir = path.join(outputDir, platform.name);
201 | const artifactPath = path.join(artifactsDir, platform.artifactFolder, platform.binaryName);
202 |
203 | try {
204 | // Create platform directory
205 | fs.mkdirSync(platformDir, { recursive: true });
206 |
207 | // Generate package.json
208 | const packageJson = generatePackageJson(platform, version);
209 | fs.writeFileSync(
210 | path.join(platformDir, 'package.json'),
211 | JSON.stringify(packageJson, null, 2) + '\n'
212 | );
213 |
214 | // Generate index.js
215 | const indexJs = generateIndexJs(platform);
216 | fs.writeFileSync(path.join(platformDir, 'index.js'), indexJs);
217 |
218 | // Generate README.md
219 | const readme = generateReadme(platform, version);
220 | fs.writeFileSync(path.join(platformDir, 'README.md'), readme);
221 |
222 | // Copy LICENSE.md
223 | fs.copyFileSync(licensePath, path.join(platformDir, 'LICENSE.md'));
224 |
225 | // Copy binary if it exists
226 | if (fs.existsSync(artifactPath)) {
227 | fs.copyFileSync(artifactPath, path.join(platformDir, platform.binaryName));
228 | console.log(`✓ ${platform.name} (with binary)`);
229 | } else {
230 | console.log(`✓ ${platform.name} (no binary found at ${artifactPath})`);
231 | }
232 |
233 | successCount++;
234 | } catch (error) {
235 | console.error(`✗ ${platform.name}: ${error.message}`);
236 | errorCount++;
237 | }
238 | }
239 |
240 | console.log(`\nGenerated ${successCount} platform package(s)`);
241 |
242 | if (errorCount > 0) {
243 | console.error(`Failed to generate ${errorCount} package(s)`);
244 | process.exit(1);
245 | }
246 |
247 | console.log('Done!');
248 | }
249 |
250 | // Run
251 | if (require.main === module) {
252 | main();
253 | }
254 |
255 | module.exports = { PLATFORMS, generatePackageJson, generateIndexJs, generateReadme };
256 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for SQLite Vector Extension
2 | # Supports compilation for Linux, macOS, Windows, Android and iOS
3 |
4 | # customize sqlite3 executable with
5 | # make test SQLITE3=/opt/homebrew/Cellar/sqlite/3.49.1/bin/sqlite3
6 | SQLITE3 ?= sqlite3
7 |
8 | # Set default platform if not specified
9 | ifeq ($(OS),Windows_NT)
10 | PLATFORM := windows
11 | HOST := windows
12 | CPUS := $(shell powershell -Command "[Environment]::ProcessorCount")
13 | else
14 | HOST = $(shell uname -s | tr '[:upper:]' '[:lower:]')
15 | ifeq ($(HOST),darwin)
16 | PLATFORM := macos
17 | CPUS := $(shell sysctl -n hw.ncpu)
18 | else
19 | PLATFORM := $(HOST)
20 | CPUS := $(shell nproc)
21 | endif
22 | endif
23 |
24 | # Speed up builds by using all available CPU cores
25 | MAKEFLAGS += -j$(CPUS)
26 |
27 | # Compiler and flags
28 | CC = gcc
29 | CFLAGS = -Wall -Wextra -Wno-unused-parameter -I$(SRC_DIR) -I$(LIB_DIR)
30 |
31 | # Directories
32 | SRC_DIR = src
33 | DIST_DIR = dist
34 | LIB_DIR = libs
35 | VPATH = $(SRC_DIR):$(LIB_DIR)
36 | BUILD_DIR = build
37 |
38 | # Files
39 | SRC_FILES = $(wildcard $(SRC_DIR)/*.c)
40 | OBJ_FILES = $(patsubst %.c, $(BUILD_DIR)/%.o, $(notdir $(SRC_FILES)))
41 |
42 | # Platform-specific settings
43 | ifeq ($(PLATFORM),windows)
44 | TARGET := $(DIST_DIR)/vector.dll
45 | LDFLAGS += -shared
46 | # Create .def file for Windows
47 | DEF_FILE := $(BUILD_DIR)/vector.def
48 | STRIP = strip --strip-unneeded $@
49 | else ifeq ($(PLATFORM),macos)
50 | TARGET := $(DIST_DIR)/vector.dylib
51 | ifndef ARCH
52 | LDFLAGS += -arch x86_64 -arch arm64
53 | CFLAGS += -arch x86_64 -arch arm64
54 | else
55 | LDFLAGS += -arch $(ARCH)
56 | CFLAGS += -arch $(ARCH)
57 | endif
58 | LDFLAGS += -dynamiclib -undefined dynamic_lookup -headerpad_max_install_names
59 | STRIP = strip -x -S $@
60 | else ifeq ($(PLATFORM),android)
61 | ifndef ARCH # Set ARCH to find Android NDK's Clang compiler, the user should set the ARCH
62 | $(error "Android ARCH must be set to ARCH=x86_64, ARCH=arm64-v8a, or ARCH=armeabi-v7a")
63 | endif
64 | ifndef ANDROID_NDK # Set ANDROID_NDK path to find android build tools; e.g. on MacOS: export ANDROID_NDK=/Users/username/Library/Android/sdk/ndk/25.2.9519653
65 | $(error "Android NDK must be set")
66 | endif
67 | BIN = $(ANDROID_NDK)/toolchains/llvm/prebuilt/$(HOST)-x86_64/bin
68 | ifneq (,$(filter $(ARCH),arm64 arm64-v8a))
69 | override ARCH := aarch64
70 | ANDROID_ABI := android26
71 | else ifeq ($(ARCH),armeabi-v7a)
72 | override ARCH := armv7a
73 | ANDROID_ABI := androideabi26
74 | else
75 | ANDROID_ABI := android26
76 | endif
77 | CC = $(BIN)/$(ARCH)-linux-$(ANDROID_ABI)-clang
78 | TARGET := $(DIST_DIR)/vector.so
79 | LDFLAGS += -lm -shared
80 | STRIP = $(BIN)/llvm-strip --strip-unneeded $@
81 | else ifeq ($(PLATFORM),ios)
82 | TARGET := $(DIST_DIR)/vector.dylib
83 | SDK := -isysroot $(shell xcrun --sdk iphoneos --show-sdk-path) -miphoneos-version-min=11.0
84 | LDFLAGS += -dynamiclib $(SDK) -headerpad_max_install_names
85 | CFLAGS += -arch arm64 $(SDK)
86 | STRIP = strip -x -S $@
87 | else ifeq ($(PLATFORM),ios-sim)
88 | TARGET := $(DIST_DIR)/vector.dylib
89 | SDK := -isysroot $(shell xcrun --sdk iphonesimulator --show-sdk-path) -miphonesimulator-version-min=11.0
90 | LDFLAGS += -arch x86_64 -arch arm64 -dynamiclib $(SDK) -headerpad_max_install_names
91 | CFLAGS += -arch x86_64 -arch arm64 $(SDK)
92 | STRIP = strip -x -S $@
93 | else # linux
94 | TARGET := $(DIST_DIR)/vector.so
95 | LDFLAGS += -shared
96 | STRIP = strip --strip-unneeded $@
97 | endif
98 |
99 | # Windows .def file generation
100 | $(DEF_FILE):
101 | ifeq ($(PLATFORM),windows)
102 | @echo "LIBRARY vector.dll" > $@
103 | @echo "EXPORTS" >> $@
104 | @echo " sqlite3_vector_init" >> $@
105 | endif
106 |
107 | # Make sure the build and dist directories exist
108 | $(shell mkdir -p $(BUILD_DIR) $(DIST_DIR))
109 |
110 | # Default target
111 | extension: $(TARGET)
112 | all: $(TARGET)
113 |
114 | # Loadable library
115 | $(TARGET): $(OBJ_FILES) $(DEF_FILE)
116 | $(CC) $(OBJ_FILES) $(DEF_FILE) -o $@ $(LDFLAGS)
117 | ifeq ($(PLATFORM),windows)
118 | # Generate import library for Windows
119 | dlltool -D $@ -d $(DEF_FILE) -l $(DIST_DIR)/vector.lib
120 | endif
121 | # Strip debug symbols
122 | $(STRIP)
123 |
124 | # Object files
125 | $(BUILD_DIR)/%.o: %.c
126 | $(CC) $(CFLAGS) -O3 -fPIC -c $< -o $@
127 |
128 | test: $(TARGET)
129 | $(SQLITE3) ":memory:" -cmd ".bail on" ".load ./dist/vector" "SELECT vector_version();"
130 |
131 | # Clean up generated files
132 | clean:
133 | rm -rf $(BUILD_DIR)/* $(DIST_DIR)/* *.gcda *.gcno *.gcov *.sqlite
134 |
135 | .NOTPARALLEL: %.dylib
136 | %.dylib:
137 | rm -rf $(BUILD_DIR) && $(MAKE) PLATFORM=$*
138 | mv $(DIST_DIR)/vector.dylib $(DIST_DIR)/$@
139 |
140 | define PLIST
141 | \
142 | \
143 | \
144 | \
145 | CFBundleDevelopmentRegion\
146 | en\
147 | CFBundleExecutable\
148 | vector\
149 | CFBundleIdentifier\
150 | ai.sqlite.vector\
151 | CFBundleInfoDictionaryVersion\
152 | 6.0\
153 | CFBundlePackageType\
154 | FMWK\
155 | CFBundleSignature\
156 | ????\
157 | CFBundleVersion\
158 | $(shell make version)\
159 | CFBundleShortVersionString\
160 | $(shell make version)\
161 | MinimumOSVersion\
162 | 11.0\
163 | \
164 |
165 | endef
166 |
167 | define MODULEMAP
168 | framework module vector {\
169 | umbrella header \"sqlite-vector.h\"\
170 | export *\
171 | }
172 | endef
173 |
174 | LIB_NAMES = ios.dylib ios-sim.dylib macos.dylib
175 | FMWK_NAMES = ios-arm64 ios-arm64_x86_64-simulator macos-arm64_x86_64
176 | $(DIST_DIR)/%.xcframework: $(LIB_NAMES)
177 | @$(foreach i,1 2 3,\
178 | lib=$(word $(i),$(LIB_NAMES)); \
179 | fmwk=$(word $(i),$(FMWK_NAMES)); \
180 | mkdir -p $(DIST_DIR)/$$fmwk/vector.framework/Headers; \
181 | mkdir -p $(DIST_DIR)/$$fmwk/vector.framework/Modules; \
182 | cp src/sqlite-vector.h $(DIST_DIR)/$$fmwk/vector.framework/Headers; \
183 | printf "$(PLIST)" > $(DIST_DIR)/$$fmwk/vector.framework/Info.plist; \
184 | printf "$(MODULEMAP)" > $(DIST_DIR)/$$fmwk/vector.framework/Modules/module.modulemap; \
185 | mv $(DIST_DIR)/$$lib $(DIST_DIR)/$$fmwk/vector.framework/vector; \
186 | install_name_tool -id "@rpath/vector.framework/vector" $(DIST_DIR)/$$fmwk/vector.framework/vector; \
187 | )
188 | xcodebuild -create-xcframework $(foreach fmwk,$(FMWK_NAMES),-framework $(DIST_DIR)/$(fmwk)/vector.framework) -output $@
189 | rm -rf $(foreach fmwk,$(FMWK_NAMES),$(DIST_DIR)/$(fmwk))
190 |
191 | xcframework: $(DIST_DIR)/vector.xcframework
192 |
193 | AAR_ARM64 = packages/android/src/main/jniLibs/arm64-v8a/
194 | AAR_ARM = packages/android/src/main/jniLibs/armeabi-v7a/
195 | AAR_X86 = packages/android/src/main/jniLibs/x86_64/
196 | aar:
197 | mkdir -p $(AAR_ARM64) $(AAR_ARM) $(AAR_X86)
198 | $(MAKE) clean && $(MAKE) PLATFORM=android ARCH=arm64-v8a
199 | mv $(DIST_DIR)/vector.so $(AAR_ARM64)
200 | $(MAKE) clean && $(MAKE) PLATFORM=android ARCH=armeabi-v7a
201 | mv $(DIST_DIR)/vector.so $(AAR_ARM)
202 | $(MAKE) clean && $(MAKE) PLATFORM=android ARCH=x86_64
203 | mv $(DIST_DIR)/vector.so $(AAR_X86)
204 | cd packages/android && ./gradlew clean assembleRelease
205 | cp packages/android/build/outputs/aar/android-release.aar $(DIST_DIR)/vector.aar
206 |
207 | version:
208 | @echo $(shell sed -n 's/^#define SQLITE_VECTOR_VERSION[[:space:]]*"\([^"]*\)".*/\1/p' src/sqlite-vector.h)
209 |
210 | # Help message
211 | help:
212 | @echo "SQLite Vector Extension Makefile"
213 | @echo "Usage:"
214 | @echo " make [PLATFORM=platform] [ARCH=arch] [ANDROID_NDK=\$$ANDROID_HOME/ndk/26.1.10909125] [target]"
215 | @echo ""
216 | @echo "Platforms:"
217 | @echo " linux (default on Linux)"
218 | @echo " macos (default on macOS)"
219 | @echo " windows (default on Windows)"
220 | @echo " android (needs ARCH to be set to x86_64, arm64-v8a, or armeabi-v7a and ANDROID_NDK to be set)"
221 | @echo " ios (only on macOS)"
222 | @echo " ios-sim (only on macOS)"
223 | @echo ""
224 | @echo "Targets:"
225 | @echo " all - Build the extension (default)"
226 | @echo " clean - Remove built files"
227 | @echo " test - Test the extension"
228 | @echo " help - Display this help message"
229 | @echo " xcframework - Build the Apple XCFramework"
230 | @echo " aar - Build the Android AAR package"
231 |
232 | .PHONY: all clean test extension help version xcframework aar
233 |
--------------------------------------------------------------------------------
/packages/android/gradlew:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | #
4 | # Copyright © 2015 the original authors.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # https://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 | # SPDX-License-Identifier: Apache-2.0
19 | #
20 |
21 | ##############################################################################
22 | #
23 | # Gradle start up script for POSIX generated by Gradle.
24 | #
25 | # Important for running:
26 | #
27 | # (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
28 | # noncompliant, but you have some other compliant shell such as ksh or
29 | # bash, then to run this script, type that shell name before the whole
30 | # command line, like:
31 | #
32 | # ksh Gradle
33 | #
34 | # Busybox and similar reduced shells will NOT work, because this script
35 | # requires all of these POSIX shell features:
36 | # * functions;
37 | # * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
38 | # «${var#prefix}», «${var%suffix}», and «$( cmd )»;
39 | # * compound commands having a testable exit status, especially «case»;
40 | # * various built-in commands including «command», «set», and «ulimit».
41 | #
42 | # Important for patching:
43 | #
44 | # (2) This script targets any POSIX shell, so it avoids extensions provided
45 | # by Bash, Ksh, etc; in particular arrays are avoided.
46 | #
47 | # The "traditional" practice of packing multiple parameters into a
48 | # space-separated string is a well documented source of bugs and security
49 | # problems, so this is (mostly) avoided, by progressively accumulating
50 | # options in "$@", and eventually passing that to Java.
51 | #
52 | # Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
53 | # and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
54 | # see the in-line comments for details.
55 | #
56 | # There are tweaks for specific operating systems such as AIX, CygWin,
57 | # Darwin, MinGW, and NonStop.
58 | #
59 | # (3) This script is generated from the Groovy template
60 | # https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
61 | # within the Gradle project.
62 | #
63 | # You can find Gradle at https://github.com/gradle/gradle/.
64 | #
65 | ##############################################################################
66 |
67 | # Attempt to set APP_HOME
68 |
69 | # Resolve links: $0 may be a link
70 | app_path=$0
71 |
72 | # Need this for daisy-chained symlinks.
73 | while
74 | APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
75 | [ -h "$app_path" ]
76 | do
77 | ls=$( ls -ld "$app_path" )
78 | link=${ls#*' -> '}
79 | case $link in #(
80 | /*) app_path=$link ;; #(
81 | *) app_path=$APP_HOME$link ;;
82 | esac
83 | done
84 |
85 | # This is normally unused
86 | # shellcheck disable=SC2034
87 | APP_BASE_NAME=${0##*/}
88 | # Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
89 | APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s\n' "$PWD" ) || exit
90 |
91 | # Use the maximum available, or set MAX_FD != -1 to use that value.
92 | MAX_FD=maximum
93 |
94 | warn () {
95 | echo "$*"
96 | } >&2
97 |
98 | die () {
99 | echo
100 | echo "$*"
101 | echo
102 | exit 1
103 | } >&2
104 |
105 | # OS specific support (must be 'true' or 'false').
106 | cygwin=false
107 | msys=false
108 | darwin=false
109 | nonstop=false
110 | case "$( uname )" in #(
111 | CYGWIN* ) cygwin=true ;; #(
112 | Darwin* ) darwin=true ;; #(
113 | MSYS* | MINGW* ) msys=true ;; #(
114 | NONSTOP* ) nonstop=true ;;
115 | esac
116 |
117 |
118 |
119 | # Determine the Java command to use to start the JVM.
120 | if [ -n "$JAVA_HOME" ] ; then
121 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
122 | # IBM's JDK on AIX uses strange locations for the executables
123 | JAVACMD=$JAVA_HOME/jre/sh/java
124 | else
125 | JAVACMD=$JAVA_HOME/bin/java
126 | fi
127 | if [ ! -x "$JAVACMD" ] ; then
128 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
129 |
130 | Please set the JAVA_HOME variable in your environment to match the
131 | location of your Java installation."
132 | fi
133 | else
134 | JAVACMD=java
135 | if ! command -v java >/dev/null 2>&1
136 | then
137 | die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
138 |
139 | Please set the JAVA_HOME variable in your environment to match the
140 | location of your Java installation."
141 | fi
142 | fi
143 |
144 | # Increase the maximum file descriptors if we can.
145 | if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
146 | case $MAX_FD in #(
147 | max*)
148 | # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
149 | # shellcheck disable=SC2039,SC3045
150 | MAX_FD=$( ulimit -H -n ) ||
151 | warn "Could not query maximum file descriptor limit"
152 | esac
153 | case $MAX_FD in #(
154 | '' | soft) :;; #(
155 | *)
156 | # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
157 | # shellcheck disable=SC2039,SC3045
158 | ulimit -n "$MAX_FD" ||
159 | warn "Could not set maximum file descriptor limit to $MAX_FD"
160 | esac
161 | fi
162 |
163 | # Collect all arguments for the java command, stacking in reverse order:
164 | # * args from the command line
165 | # * the main class name
166 | # * -classpath
167 | # * -D...appname settings
168 | # * --module-path (only if needed)
169 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
170 |
171 | # For Cygwin or MSYS, switch paths to Windows format before running java
172 | if "$cygwin" || "$msys" ; then
173 | APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
174 |
175 | JAVACMD=$( cygpath --unix "$JAVACMD" )
176 |
177 | # Now convert the arguments - kludge to limit ourselves to /bin/sh
178 | for arg do
179 | if
180 | case $arg in #(
181 | -*) false ;; # don't mess with options #(
182 | /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
183 | [ -e "$t" ] ;; #(
184 | *) false ;;
185 | esac
186 | then
187 | arg=$( cygpath --path --ignore --mixed "$arg" )
188 | fi
189 | # Roll the args list around exactly as many times as the number of
190 | # args, so each arg winds up back in the position where it started, but
191 | # possibly modified.
192 | #
193 | # NB: a `for` loop captures its iteration list before it begins, so
194 | # changing the positional parameters here affects neither the number of
195 | # iterations, nor the values presented in `arg`.
196 | shift # remove old arg
197 | set -- "$@" "$arg" # push replacement arg
198 | done
199 | fi
200 |
201 |
202 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
203 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
204 |
205 | # Collect all arguments for the java command:
206 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments,
207 | # and any embedded shellness will be escaped.
208 | # * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be
209 | # treated as '${Hostname}' itself on the command line.
210 |
211 | set -- \
212 | "-Dorg.gradle.appname=$APP_BASE_NAME" \
213 | -jar "$APP_HOME/gradle/wrapper/gradle-wrapper.jar" \
214 | "$@"
215 |
216 | # Stop when "xargs" is not available.
217 | if ! command -v xargs >/dev/null 2>&1
218 | then
219 | die "xargs is not available"
220 | fi
221 |
222 | # Use "xargs" to parse quoted args.
223 | #
224 | # With -n1 it outputs one arg per line, with the quotes and backslashes removed.
225 | #
226 | # In Bash we could simply go:
227 | #
228 | # readarray ARGS < <( xargs -n1 <<<"$var" ) &&
229 | # set -- "${ARGS[@]}" "$@"
230 | #
231 | # but POSIX shell has neither arrays nor command substitution, so instead we
232 | # post-process each arg (as a line of input to sed) to backslash-escape any
233 | # character that might be a shell metacharacter, then use eval to reverse
234 | # that process (while maintaining the separation between arguments), and wrap
235 | # the whole thing up as a single "set" statement.
236 | #
237 | # This will of course break if any of these variables contains a newline or
238 | # an unmatched quote.
239 | #
240 |
241 | eval "set -- $(
242 | printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
243 | xargs -n1 |
244 | sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
245 | tr '\n' ' '
246 | )" '"$@"'
247 |
248 | exec "$JAVACMD" "$@"
249 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # SQLite Vector
2 |
3 | **SQLite Vector** is a cross-platform, ultra-efficient SQLite extension that brings vector search capabilities to your embedded database. It works seamlessly on **iOS, Android, Windows, Linux, and macOS**, using just **30MB of memory** by default. With support for **Float32, Float16, BFloat16, Int8, and UInt8**, and **highly optimized distance functions**, it's the ideal solution for **Edge AI** applications.
4 |
5 | ## Highlights
6 |
7 | * **No virtual tables required** – store vectors directly as `BLOB`s in ordinary tables
8 | * **Blazing fast** – optimized C implementation with SIMD acceleration
9 | * **Low memory footprint** – defaults to just 30MB of RAM usage
10 | * **Zero preindexing needed** – no long preprocessing or index-building phases
11 | * **Works offline** – perfect for on-device, privacy-preserving AI workloads
12 | * **Plug-and-play** – drop into existing SQLite workflows with minimal effort
13 | * **Cross-platform** – works out of the box on all major OSes
14 |
15 |
16 | ## Why Use SQLite-Vector?
17 |
18 | | Feature | SQLite-Vector | Traditional Solutions |
19 | | ---------------------------- | ------------- | ------------------------------------------ |
20 | | Works with ordinary tables | ✅ | ❌ (usually require special virtual tables) |
21 | | Doesn't need preindexing | ✅ | ❌ (can take hours for large datasets) |
22 | | Doesn't need external server | ✅ | ❌ (often needs Redis/FAISS/Weaviate/etc.) |
23 | | Memory-efficient | ✅ | ❌ |
24 | | Easy to use SQL | ✅ | ❌ (often complex JOINs, subqueries) |
25 | | Offline/Edge ready | ✅ | ❌ |
26 | | Cross-platform | ✅ | ❌ |
27 |
28 | Unlike other vector databases or extensions that require complex setup, SQLite-Vector **just works** with your existing database schema and tools.
29 |
30 |
31 | ## Installation
32 |
33 | ### Pre-built Binaries
34 |
35 | Download the appropriate pre-built binary for your platform from the official [Releases](https://github.com/sqliteai/sqlite-vector/releases) page:
36 |
37 | - Linux: x86 and ARM
38 | - macOS: x86 and ARM
39 | - Windows: x86
40 | - Android
41 | - iOS
42 |
43 | ### Loading the Extension
44 |
45 | ```sql
46 | -- In SQLite CLI
47 | .load ./vector
48 |
49 | -- In SQL
50 | SELECT load_extension('./vector');
51 | ```
52 |
53 | Or embed it directly into your application.
54 |
55 | ### WASM Version
56 |
57 | You can download the WebAssembly (WASM) version of SQLite with the SQLite Vector extension enabled from: https://www.npmjs.com/package/@sqliteai/sqlite-wasm
58 |
59 | ## Example Usage
60 |
61 | ```sql
62 | -- Create a regular SQLite table
63 | CREATE TABLE images (
64 | id INTEGER PRIMARY KEY,
65 | embedding BLOB, -- store Float32/UInt8/etc.
66 | label TEXT
67 | );
68 |
69 | -- Insert a BLOB vector (Float32, 384 dimensions) using bindings
70 | INSERT INTO images (embedding, label) VALUES (?, 'cat');
71 |
72 | -- Insert a JSON vector (Float32, 384 dimensions)
73 | INSERT INTO images (embedding, label) VALUES (vector_as_f32('[0.3, 1.0, 0.9, 3.2, 1.4,...]'), 'dog');
74 |
75 | -- Initialize the vector. By default, the distance function is L2.
76 | -- To use a different metric, specify one of the following options:
77 | -- distance=L1, distance=COSINE, distance=DOT, or distance=SQUARED_L2.
78 | SELECT vector_init('images', 'embedding', 'type=FLOAT32,dimension=384');
79 |
80 | -- Quantize vector
81 | SELECT vector_quantize('images', 'embedding');
82 |
83 | -- Optional preload quantized version in memory (for a 4x/5x speedup)
84 | SELECT vector_quantize_preload('images', 'embedding');
85 |
86 | -- Run a nearest neighbor query on the quantized version (returns top 20 closest vectors)
87 | SELECT e.id, v.distance FROM images AS e
88 | JOIN vector_quantize_scan('images', 'embedding', ?, 20) AS v
89 | ON e.id = v.rowid;
90 | ```
91 |
92 | ### Swift Package
93 |
94 | You can [add this repository as a package dependency to your Swift project](https://developer.apple.com/documentation/xcode/adding-package-dependencies-to-your-app#Add-a-package-dependency). After adding the package, you'll need to set up SQLite with extension loading by following steps 4 and 5 of [this guide](https://github.com/sqliteai/sqlite-extensions-guide/blob/main/platforms/ios.md#4-set-up-sqlite-with-extension-loading).
95 |
96 | Here's an example of how to use the package:
97 | ```swift
98 | import vector
99 |
100 | ...
101 |
102 | var db: OpaquePointer?
103 | sqlite3_open(":memory:", &db)
104 | sqlite3_enable_load_extension(db, 1)
105 | var errMsg: UnsafeMutablePointer? = nil
106 | sqlite3_load_extension(db, vector.path, nil, &errMsg)
107 | var stmt: OpaquePointer?
108 | sqlite3_prepare_v2(db, "SELECT vector_version()", -1, &stmt, nil)
109 | defer { sqlite3_finalize(stmt) }
110 | sqlite3_step(stmt)
111 | log("vector_version(): \(String(cString: sqlite3_column_text(stmt, 0)))")
112 | sqlite3_close(db)
113 | ```
114 |
115 | ### Android Package
116 |
117 | Add the [following](https://central.sonatype.com/artifact/ai.sqlite/vector) to your Gradle dependencies:
118 |
119 | ```gradle
120 | implementation 'ai.sqlite:vector:0.9.34'
121 | ```
122 |
123 | Here's an example of how to use the package:
124 | ```java
125 | SQLiteCustomExtension vectorExtension = new SQLiteCustomExtension(getApplicationInfo().nativeLibraryDir + "/vector", null);
126 | SQLiteDatabaseConfiguration config = new SQLiteDatabaseConfiguration(
127 | getCacheDir().getPath() + "/vector_test.db",
128 | SQLiteDatabase.CREATE_IF_NECESSARY | SQLiteDatabase.OPEN_READWRITE,
129 | Collections.emptyList(),
130 | Collections.emptyList(),
131 | Collections.singletonList(vectorExtension)
132 | );
133 | SQLiteDatabase db = SQLiteDatabase.openDatabase(config, null, null);
134 | ```
135 |
136 | **Note:** Additional settings and configuration are required for a complete setup. For full implementation details, see the [complete Android example](https://github.com/sqliteai/sqlite-extensions-guide/blob/main/examples/android/README.md).
137 |
138 | ### Python Package
139 |
140 | Python developers can quickly get started using the ready-to-use `sqlite-vector` package available on PyPI:
141 |
142 | ```bash
143 | pip install sqliteai-vector
144 | ```
145 |
146 | For usage details and examples, see the [Python package documentation](./packages/python/README.md).
147 |
148 | ## Documentation
149 |
150 | Extensive API documentation can be found in the [API page](https://github.com/sqliteai/sqlite-vector/blob/main/API.md).
151 |
152 | More information about the quantization process can be found in the [QUANTIZATION document](https://github.com/sqliteai/sqlite-vector/blob/main/QUANTIZATION.md).
153 |
154 | ## Features
155 |
156 | ### Instant Vector Search – No Preindexing Required
157 |
158 | Unlike other SQLite vector extensions that rely on complex indexing algorithms such as DiskANN, HNSW, or IVF, which often require **preprocessing steps that can take hours or even days**, `sqlite-vector` works out of the box with your existing data. There’s **no need to preindex your vectors**—you can start performing fast, approximate or exact vector searches **immediately**.
159 |
160 | This means:
161 |
162 | * **No waiting time** before your app or service is usable
163 | * **Zero-cost updates** – you can add, remove, or modify vectors on the fly without rebuilding any index
164 | * **Works directly with BLOB columns** in ordinary SQLite tables – no special schema or virtual table required
165 | * **Ideal for edge and mobile use cases**, where preprocessing large datasets is not practical or possible
166 |
167 | By eliminating the need for heavyweight indexing, `sqlite-vector` offers a **simpler, faster, and more developer-friendly** approach to embedding vector search in your applications.
168 |
169 | ### Supported Vector Types
170 |
171 | You can store your vectors as `BLOB` columns in ordinary tables. Supported formats include:
172 |
173 | * `float32` (4 bytes per element)
174 | * `float16` (2 bytes per element)
175 | * `bfloat16` (2 bytes per element)
176 | * `int8` (1 byte per element)
177 | * `uint8` (1 byte per element)
178 |
179 | Simply insert a vector as a binary blob into your table. No special table types or schemas are required.
180 |
181 |
182 | ### Supported Distance Metrics
183 |
184 | Optimized implementations available:
185 |
186 | * **L2 Distance (Euclidean)**
187 | * **Squared L2**
188 | * **L1 Distance (Manhattan)**
189 | * **Cosine Distance**
190 | * **Dot Product**
191 |
192 | These are implemented in pure C and optimized for SIMD when available, ensuring maximum performance on modern CPUs and mobile devices.
193 |
194 | ---
195 |
196 | # What Is Vector Search?
197 |
198 | Vector search is the process of finding the closest match(es) to a given vector (a point in high-dimensional space) based on a similarity or distance metric. It is essential for AI and machine learning applications where data is often encoded into vector embeddings.
199 |
200 | ### Common Use Cases
201 |
202 | * **Semantic Search**: find documents, emails, or messages similar to a query
203 | * **Image Retrieval**: search for visually similar images
204 | * **Recommendation Systems**: match users with products, videos, or music
205 | * **Voice and Audio Search**: match voice queries or environmental sounds
206 | * **Anomaly Detection**: find outliers in real-time sensor data
207 | * **Robotics**: localize spatial features or behaviors using embedded observations
208 |
209 | In the AI era, embeddings are everywhere – from language models like GPT to vision transformers. Storing and searching them efficiently is the foundation of intelligent applications.
210 |
211 | ## Perfect for Edge AI
212 |
213 | SQLite-Vector is designed with the **Edge AI** use case in mind:
214 |
215 | * Runs offline – no internet required
216 | * Works on mobile devices – iOS/Android friendly
217 | * Keeps data local – ideal for privacy-focused apps
218 | * Extremely fast – real-time performance on device
219 |
220 | You can deploy powerful similarity search capabilities right inside your app or embedded system – **no cloud needed**.
221 |
222 | ## Integrations
223 |
224 | Use SQLite-AI alongside:
225 |
226 | * **[SQLite-AI](https://github.com/sqliteai/sqlite-ai)** – on-device inference, embedding generation, and model interaction directly into your database
227 | * **[SQLite-Sync](https://github.com/sqliteai/sqlite-sync)** – sync on-device databases with the cloud
228 | * **[SQLite-JS](https://github.com/sqliteai/sqlite-js)** – define SQLite functions in JavaScript
229 |
230 | ## License
231 |
232 | This project is licensed under the [Elastic License 2.0](./LICENSE.md). You can use, copy, modify, and distribute it under the terms of the license for non-production use. For production or managed service use, please [contact SQLite Cloud, Inc](mailto:info@sqlitecloud.io) for a commercial license.
233 |
--------------------------------------------------------------------------------
/API.md:
--------------------------------------------------------------------------------
1 | # SQLite Vector Extension – API Reference
2 |
3 | This extension enables efficient vector operations directly inside SQLite databases, making it ideal for on-device and edge AI applications. It supports various vector types and SIMD-accelerated distance functions.
4 |
5 | ### Getting started
6 |
7 | * All vectors must have a fixed dimension per column, set during `vector_init`.
8 | * Only tables explicitly initialized using `vector_init` are eligible for vector search.
9 | * You **must run `vector_quantize()`** before using `vector_quantize_scan()`.
10 | * You can preload quantization at database open using `vector_quantize_preload()`.
11 |
12 | ---
13 |
14 | ## `vector_version()`
15 |
16 | **Returns:** `TEXT`
17 |
18 | **Description:**
19 | Returns the current version of the SQLite Vector Extension.
20 |
21 | **Example:**
22 |
23 | ```sql
24 | SELECT vector_version();
25 | -- e.g., '1.0.0'
26 | ```
27 |
28 | ---
29 |
30 | ## `vector_backend()`
31 |
32 | **Returns:** `TEXT`
33 |
34 | **Description:**
35 | Returns the active backend used for vector computation. This indicates the SIMD or hardware acceleration available on the current system.
36 |
37 | **Possible Values:**
38 |
39 | * `CPU` – Generic fallback
40 | * `SSE2` – SIMD on Intel/AMD
41 | * `AVX2` – Advanced SIMD on modern x86 CPUs
42 | * `NEON` – SIMD on ARM (e.g., mobile)
43 |
44 | **Example:**
45 |
46 | ```sql
47 | SELECT vector_backend();
48 | -- e.g., 'AVX2'
49 | ```
50 |
51 | ---
52 |
53 | ## `vector_init(table, column, options)`
54 |
55 | **Returns:** `NULL`
56 |
57 | **Description:**
58 | Initializes the vector extension for a given table and column. This is **mandatory** before performing any vector search or quantization.
59 | `vector_init` must be called in every database connection that needs to perform vector operations.
60 |
61 | The target table must have a **`rowid`** (an integer primary key, either explicit or implicit).
62 | If the table was created using `WITHOUT ROWID`, it must have **exactly one primary key column of type `INTEGER`**.
63 | This ensures that each vector can be uniquely identified and efficiently referenced during search and quantization.
64 |
65 | **Parameters:**
66 |
67 | * `table` (TEXT): Name of the table containing vector data.
68 | * `column` (TEXT): Name of the column containing the vector embeddings (stored as BLOBs).
69 | * `options` (TEXT): Comma-separated key=value string.
70 |
71 | **Options:**
72 |
73 | * `dimension` (required): Integer specifying the length of each vector.
74 | * `type`: Vector data type. Options:
75 |
76 | * `FLOAT32` (default)
77 | * `FLOAT16`
78 | * `FLOATB16`
79 | * `INT8`
80 | * `UINT8`
81 | * `distance`: Distance function to use. Options:
82 |
83 | * `L2` (default)
84 | * `SQUARED_L2`
85 | * `COSINE`
86 | * `DOT`
87 | * `L1`
88 |
89 | **Example:**
90 |
91 | ```sql
92 | SELECT vector_init('documents', 'embedding', 'dimension=384,type=FLOAT32,distance=cosine');
93 | ```
94 |
95 | ---
96 |
97 | ## `vector_quantize(table, column, options)`
98 |
99 | **Returns:** `INTEGER`
100 |
101 | **Description:**
102 | Returns the total number of succesfully quantized rows.
103 |
104 | Performs quantization on the specified table and column. This precomputes internal data structures to support fast approximate nearest neighbor (ANN) search.
105 | Read more about quantization [here](https://github.com/sqliteai/sqlite-vector/blob/main/QUANTIZATION.md).
106 |
107 | If a quantization already exists for the specified table and column, it is replaced. If it was previously loaded into memory using `vector_quantize_preload`, the data is automatically reloaded. `vector_quantize` should be called once after data insertion. If called multiple times, the previous quantized data is replaced. The resulting quantization is shared across all database connections, so they do not need to call it again.
108 |
109 | **Parameters:**
110 |
111 | * `table` (TEXT): Name of the table.
112 | * `column` (TEXT): Name of the column containing vector data.
113 | * `options` (TEXT, optional): Comma-separated key=value string.
114 |
115 | **Available options:**
116 |
117 | * `max_memory`: Max memory to use for quantization (default: 30MB)
118 |
119 | **Example:**
120 |
121 | ```sql
122 | SELECT vector_quantize('documents', 'embedding', 'max_memory=50MB');
123 | ```
124 |
125 | ---
126 |
127 | ## `vector_quantize_memory(table, column)`
128 |
129 | **Returns:** `INTEGER`
130 |
131 | **Description:**
132 | Returns the amount of memory (in bytes) required to preload quantized data for the specified table and column.
133 |
134 | **Example:**
135 |
136 | ```sql
137 | SELECT vector_quantize_memory('documents', 'embedding');
138 | -- e.g., 28490112
139 | ```
140 |
141 | ---
142 |
143 | ## `vector_quantize_preload(table, column)`
144 |
145 | **Returns:** `NULL`
146 |
147 | **Description:**
148 | Loads the quantized representation for the specified table and column into memory. Should be used at startup to ensure optimal query performance.
149 | `vector_quantize_preload` should be called once after `vector_quantize`. The preloaded data is also shared across all database connections, so they do not need to call it again.
150 |
151 | **Example:**
152 |
153 | ```sql
154 | SELECT vector_quantize_preload('documents', 'embedding');
155 | ```
156 |
157 | ---
158 |
159 | ## `vector_quantize_cleanup(table, column)`
160 |
161 | **Returns:** `NULL`
162 |
163 | **Description:**
164 | Releases memory previously allocated by a `vector_quantize_preload` call and removes all quantization entries associated with the specified table and column.
165 | Use this function when quantization is no longer required. In some cases, running VACUUM may be necessary to reclaim the freed space from the database.
166 |
167 | If the data changes and you invoke `vector_quantize`, the existing quantization data is automatically replaced. In that case, calling this function is unnecessary.
168 |
169 | **Example:**
170 |
171 | ```sql
172 | SELECT vector_quantize_cleanup('documents', 'embedding');
173 | ```
174 |
175 | ---
176 |
177 | ## `vector_as_f32(value)`
178 |
179 | ## `vector_as_f16(value)`
180 |
181 | ## `vector_as_bf16(value)`
182 |
183 | ## `vector_as_i8(value)`
184 |
185 | ## `vector_as_u8(value)`
186 |
187 | **Returns:** `BLOB`
188 |
189 | **Description:**
190 | Encodes a vector into the required internal BLOB format to ensure correct storage and compatibility with the system’s vector representation.
191 | A real conversion is performed ONLY in case of JSON input. When input is a BLOB, it is assumed to be already properly formatted.
192 |
193 | Functions in the `vector_as_` family should be used in all `INSERT`, `UPDATE`, and `DELETE` statements to properly format vector values. However, they are *not* required when specifying input vectors for the `vector_full_scan` or `vector_quantize_scan` virtual tables.
194 |
195 | **Parameters:**
196 |
197 | * `value` (TEXT or BLOB):
198 |
199 | * If `TEXT`, it must be a JSON array (e.g., `"[0.1, 0.2, 0.3]"`).
200 | * If `BLOB`, no check is performed; the user must ensure the format matches the specified type and dimension.
201 |
202 | * `dimension` (INT, optional): Enforce a stricter sanity check, ensuring the input vector has the expected dimensionality.
203 |
204 | **Usage by format:**
205 |
206 | ```sql
207 | -- Insert a Float32 vector using JSON
208 | INSERT INTO documents(embedding) VALUES(vector_as_f32('[0.1, 0.2, 0.3]'));
209 |
210 | -- Insert a UInt8 vector using raw BLOB (ensure correct formatting!)
211 | INSERT INTO compressed_vectors(embedding) VALUES(vector_as_u8(X'010203'));
212 | ```
213 |
214 | ---
215 |
216 | ## 🔍 `vector_full_scan(table, column, vector, k)`
217 |
218 | **Returns:** `Virtual Table (rowid, distance)`
219 |
220 | **Description:**
221 | Performs a brute-force nearest neighbor search using the given vector. Despite its brute-force nature, this function is highly optimized and useful for small datasets (rows < 1000000) or validation.
222 | Since this interface only returns rowid and distance, if you need to access additional columns from the original table, you must use a SELF JOIN.
223 |
224 | **Parameters:**
225 |
226 | * `table` (TEXT): Name of the target table.
227 | * `column` (TEXT): Column containing vectors.
228 | * `vector` (BLOB or JSON): The query vector.
229 | * `k` (INTEGER): Number of nearest neighbors to return.
230 |
231 | **Example:**
232 |
233 | ```sql
234 | SELECT rowid, distance
235 | FROM vector_full_scan('documents', 'embedding', vector_as_f32('[0.1, 0.2, 0.3]'), 5);
236 | ```
237 |
238 | ---
239 |
240 | ## ⚡ `vector_quantize_scan(table, column, vector, k)`
241 |
242 | **Returns:** `Virtual Table (rowid, distance)`
243 |
244 | **Description:**
245 | Performs a fast approximate nearest neighbor search using the pre-quantized data. This is the **recommended query method** for large datasets due to its excellent speed/recall/memory trade-off. Since this interface only returns rowid and distance, if you need to access additional columns from the original table, you must use a SELF JOIN.
246 |
247 | You **must run `vector_quantize()`** before using `vector_quantize_scan()` and when data initialized for vectors changes.
248 |
249 | **Parameters:**
250 |
251 | * `table` (TEXT): Name of the target table.
252 | * `column` (TEXT): Column containing vectors.
253 | * `vector` (BLOB or JSON): The query vector.
254 | * `k` (INTEGER): Number of nearest neighbors to return.
255 |
256 | **Performance Highlights:**
257 |
258 | * Handles **1M vectors** of dimension 768 in a few milliseconds.
259 | * Uses **<50MB** of RAM.
260 | * Achieves **>0.95 recall**.
261 |
262 | **Example:**
263 |
264 | ```sql
265 | SELECT rowid, distance
266 | FROM vector_quantize_scan('documents', 'embedding', vector_as_f32('[0.1, 0.2, 0.3]'), 10);
267 | ```
268 |
269 | ---
270 |
271 | ## 🔁 Streaming Interfaces
272 |
273 | ### `vector_full_scan_stream` and `vector_quantize_scan_stream`
274 |
275 | **Returns:** `Virtual Table (rowid, distance)`
276 |
277 | **Description:**
278 | These streaming interfaces provide the same functionality as `vector_full_scan` and `vector_quantize_scan`, respectively, but are designed for incremental or filtered processing of results.
279 |
280 | Unlike their non-streaming counterparts, these functions **omit the fourth parameter (`k`)** and allow you to use standard SQL clauses such as `WHERE` and `LIMIT` to control filtering and result count. Since this interface only returns rowid and distance, if you need to access additional columns from the original table, you must use a SELF JOIN.
281 |
282 | This makes them ideal for combining vector search with additional query conditions or progressive result consumption in streaming applications.
283 |
284 | **Parameters:**
285 |
286 | * `table` (TEXT): Name of the target table.
287 | * `column` (TEXT): Column containing vectors.
288 | * `vector` (BLOB or JSON): The query vector.
289 |
290 | **Key Differences from Non-Streaming Variants:**
291 |
292 | | Function | Equivalent To | Requires `k` | Supports `WHERE` | Supports `LIMIT` |
293 | | ----------------------------- | ---------------------- | ------------ | ---------------- | ---------------- |
294 | | `vector_full_scan_stream` | `vector_full_scan` | ❌ | ✅ | ✅ |
295 | | `vector_quantize_scan_stream` | `vector_quantize_scan` | ❌ | ✅ | ✅ |
296 |
297 | **Examples:**
298 |
299 | ```sql
300 | -- Perform a filtered full scan
301 | SELECT rowid, distance
302 | FROM vector_full_scan_stream('documents', 'embedding', vector_as_f32('[0.1, 0.2, 0.3]'))
303 | LIMIT 5;
304 | ```
305 |
306 | ```sql
307 | -- Perform a filtered approximate scan using quantized data
308 | SELECT rowid, distance
309 | FROM vector_quantize_scan_stream('documents', 'embedding', vector_as_f32('[0.1, 0.2, 0.3]'))
310 | LIMIT 10;
311 | ```
312 |
313 | **Accessing Additional Columns:**
314 |
315 | ```sql
316 | -- Perform a filtered full scan with additional columns
317 | SELECT
318 | v.rowid,
319 | row_number() OVER (ORDER BY v.distance) AS rank_number,
320 | v.distance
321 | FROM vector_full_scan_stream('documents', 'embedding', vector_as_f32('[0.1, 0.2, 0.3]')) AS v
322 | JOIN documents ON documents.rowid = v.rowid
323 | WHERE documents.category = 'science'
324 | LIMIT 10;
325 | ```
326 |
327 | **Usage Notes:**
328 |
329 | * These interfaces return rows progressively and can efficiently combine vector similarity with SQL-level filters.
330 | * The `LIMIT` clause can be used to control how many rows are read or returned.
331 | * The query planner integrates the streaming virtual table into the overall SQL execution plan, enabling hybrid filtering and ranking operations.
332 |
--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
1 | name: Build, Test and Release
2 | on:
3 | push:
4 | workflow_dispatch:
5 |
6 | permissions:
7 | contents: write
8 | id-token: write
9 |
10 | jobs:
11 | build:
12 | runs-on: ${{ matrix.os }}
13 | container: ${{ matrix.container && matrix.container || '' }}
14 | name: ${{ matrix.name }}${{ matrix.arch && format('-{0}', matrix.arch) || '' }} build${{ matrix.arch != 'arm64-v8a' && matrix.arch != 'armeabi-v7a' && matrix.name != 'ios-sim' && matrix.name != 'ios' && matrix.name != 'apple-xcframework' && matrix.name != 'android-aar' && ( matrix.name != 'macos' || matrix.arch != 'x86_64' ) && ' + test' || ''}}
15 | timeout-minutes: 20
16 | strategy:
17 | fail-fast: false
18 | matrix:
19 | include:
20 | - os: ubuntu-22.04
21 | arch: x86_64
22 | name: linux
23 | - os: ubuntu-22.04-arm
24 | arch: arm64
25 | name: linux
26 | - os: ubuntu-22.04
27 | arch: x86_64
28 | name: linux-musl
29 | container: alpine:latest
30 | - os: ubuntu-22.04-arm
31 | arch: arm64
32 | name: linux-musl
33 | - os: macos-15
34 | name: macos
35 | - os: macos-15
36 | arch: x86_64
37 | name: macos
38 | make: ARCH=x86_64
39 | - os: macos-15
40 | arch: arm64
41 | name: macos
42 | make: ARCH=arm64
43 | - os: windows-2022
44 | arch: x86_64
45 | name: windows
46 | - os: ubuntu-22.04
47 | arch: arm64-v8a
48 | name: android
49 | make: PLATFORM=android ARCH=arm64-v8a
50 | - os: ubuntu-22.04
51 | arch: armeabi-v7a
52 | name: android
53 | make: PLATFORM=android ARCH=armeabi-v7a
54 | - os: ubuntu-22.04
55 | arch: x86_64
56 | name: android
57 | make: PLATFORM=android ARCH=x86_64
58 | sqlite-amalgamation-zip: https://sqlite.org/2025/sqlite-amalgamation-3490100.zip
59 | - os: macos-15
60 | name: ios
61 | make: PLATFORM=ios
62 | - os: macos-15
63 | name: ios-sim
64 | make: PLATFORM=ios-sim
65 | - os: macos-15
66 | name: apple-xcframework
67 | make: xcframework
68 | - os: ubuntu-22.04
69 | name: android-aar
70 | make: aar
71 |
72 | defaults:
73 | run:
74 | shell: ${{ matrix.container && 'sh' || 'bash' }}
75 |
76 | steps:
77 |
78 | - uses: actions/checkout@v4.2.2
79 |
80 | - name: android setup java
81 | if: matrix.name == 'android-aar'
82 | uses: actions/setup-java@v4
83 | with:
84 | distribution: 'temurin'
85 | java-version: '17'
86 |
87 | - name: windows install dependencies
88 | if: matrix.name == 'windows'
89 | run: choco install sqlite -y
90 |
91 | - name: macos install dependencies
92 | if: matrix.name == 'macos'
93 | run: brew link sqlite --force
94 |
95 | - name: linux-musl x86_64 install dependencies
96 | if: matrix.name == 'linux-musl' && matrix.arch == 'x86_64'
97 | run: apk update && apk add --no-cache gcc make sqlite musl-dev linux-headers
98 |
99 | - name: linux-musl arm64 setup container
100 | if: matrix.name == 'linux-musl' && matrix.arch == 'arm64'
101 | run: |
102 | docker run -d --name alpine \
103 | --platform linux/arm64 \
104 | -v ${{ github.workspace }}:/workspace \
105 | -w /workspace \
106 | alpine:latest \
107 | tail -f /dev/null
108 | docker exec alpine sh -c "apk update && apk add --no-cache gcc make sqlite musl-dev linux-headers"
109 |
110 | - name: build sqlite-vector
111 | run: ${{ matrix.name == 'linux-musl' && matrix.arch == 'arm64' && 'docker exec alpine' || '' }} make extension ${{ matrix.make && matrix.make || ''}}
112 |
113 | - name: create keychain for codesign
114 | if: matrix.os == 'macos-15'
115 | run: |
116 | echo "${{ secrets.APPLE_CERTIFICATE }}" | base64 --decode > certificate.p12
117 | security create-keychain -p "${{ secrets.KEYCHAIN_PASSWORD }}" build.keychain
118 | security default-keychain -s build.keychain
119 | security unlock-keychain -p "${{ secrets.KEYCHAIN_PASSWORD }}" build.keychain
120 | security import certificate.p12 -k build.keychain -P "${{ secrets.CERTIFICATE_PASSWORD }}" -T /usr/bin/codesign
121 | security set-key-partition-list -S apple-tool:,apple:,codesign: -s -k "${{ secrets.KEYCHAIN_PASSWORD }}" build.keychain
122 |
123 | - name: codesign and notarize dylib
124 | if: matrix.os == 'macos-15' && matrix.name != 'apple-xcframework'
125 | run: |
126 | codesign --sign "${{ secrets.APPLE_TEAM_ID }}" --timestamp --options runtime dist/vector.dylib
127 | ditto -c -k dist/vector.dylib dist/vector.zip
128 | xcrun notarytool submit dist/vector.zip --apple-id "${{ secrets.APPLE_ID }}" --password "${{ secrets.APPLE_PASSWORD }}" --team-id "${{ secrets.APPLE_TEAM_ID }}" --wait
129 | rm dist/vector.zip
130 |
131 | - name: codesign and notarize xcframework
132 | if: matrix.name == 'apple-xcframework'
133 | run: |
134 | find dist/vector.xcframework -name "*.framework" -exec echo "Signing: {}" \; -exec codesign --sign "${{ secrets.APPLE_TEAM_ID }}" --timestamp --options runtime {} \; # Sign each individual framework FIRST
135 | codesign --sign "${{ secrets.APPLE_TEAM_ID }}" --timestamp --options runtime dist/vector.xcframework # Then sign the xcframework wrapper
136 | ditto -c -k --keepParent dist/vector.xcframework dist/vector.xcframework.zip
137 | xcrun notarytool submit dist/vector.xcframework.zip --apple-id "${{ secrets.APPLE_ID }}" --password "${{ secrets.APPLE_PASSWORD }}" --team-id "${{ secrets.APPLE_TEAM_ID }}" --wait
138 | rm dist/vector.xcframework.zip
139 |
140 | - name: cleanup keychain for codesign
141 | if: matrix.os == 'macos-15'
142 | run: |
143 | rm certificate.p12
144 | security delete-keychain build.keychain
145 |
146 | - name: android setup test environment
147 | if: matrix.name == 'android' && matrix.arch != 'arm64-v8a' && matrix.arch != 'armeabi-v7a'
148 | run: |
149 |
150 | echo "::group::enable kvm group perms"
151 | echo 'KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"' | sudo tee /etc/udev/rules.d/99-kvm4all.rules
152 | sudo udevadm control --reload-rules
153 | sudo udevadm trigger --name-match=kvm
154 | echo "::endgroup::"
155 |
156 | echo "::group::download and build sqlite3 without SQLITE_OMIT_LOAD_EXTENSION"
157 | curl -O ${{ matrix.sqlite-amalgamation-zip }}
158 | unzip sqlite-amalgamation-*.zip
159 | export ${{ matrix.make }}
160 | $ANDROID_NDK/toolchains/llvm/prebuilt/linux-x86_64/bin/${{ matrix.arch }}-linux-android26-clang sqlite-amalgamation-*/shell.c sqlite-amalgamation-*/sqlite3.c -o sqlite3 -ldl
161 | # remove unused folders to save up space
162 | rm -rf sqlite-amalgamation-*.zip sqlite-amalgamation-*
163 | echo "::endgroup::"
164 |
165 | echo "::group::prepare the test script"
166 | make test PLATFORM=$PLATFORM ARCH=$ARCH || echo "It should fail. Running remaining commands in the emulator"
167 | cat > commands.sh << EOF
168 | mv -f /data/local/tmp/sqlite3 /system/xbin
169 | cd /data/local/tmp
170 | $(make test PLATFORM=$PLATFORM ARCH=$ARCH -n)
171 | EOF
172 | echo "::endgroup::"
173 |
174 | - name: android test sqlite-vector
175 | if: matrix.name == 'android' && matrix.arch != 'arm64-v8a' && matrix.arch != 'armeabi-v7a'
176 | uses: reactivecircus/android-emulator-runner@v2.34.0
177 | with:
178 | api-level: 26
179 | arch: ${{ matrix.arch }}
180 | script: |
181 | adb root
182 | adb remount
183 | adb push ${{ github.workspace }}/. /data/local/tmp/
184 | adb shell "sh /data/local/tmp/commands.sh"
185 |
186 | - name: test sqlite-vector
187 | if: contains(matrix.name, 'linux') || matrix.name == 'windows' || ( matrix.name == 'macos' && matrix.arch != 'x86_64' )
188 | run: ${{ matrix.name == 'linux-musl' && matrix.arch == 'arm64' && 'docker exec alpine' || '' }} make test ${{ matrix.make && matrix.make || ''}}
189 |
190 | - uses: actions/upload-artifact@v4.6.2
191 | if: always()
192 | with:
193 | name: vector-${{ matrix.name }}${{ matrix.arch && format('-{0}', matrix.arch) || '' }}
194 | path: dist/vector.*
195 | if-no-files-found: error
196 |
197 | release:
198 | runs-on: ubuntu-22.04
199 | name: release
200 | needs: build
201 | if: github.ref == 'refs/heads/main'
202 |
203 | env:
204 | GH_TOKEN: ${{ github.token }}
205 |
206 | steps:
207 |
208 | - uses: actions/checkout@v4.2.2
209 |
210 | - uses: actions/download-artifact@v4.2.1
211 | with:
212 | path: artifacts
213 |
214 | - name: zip artifacts
215 | run: |
216 | VERSION=$(make version)
217 | for folder in "artifacts"/*; do
218 | if [ -d "$folder" ]; then
219 | name=$(basename "$folder")
220 | if [[ "$name" != "vector-apple-xcframework" && "$name" != "vector-android-aar" ]]; then
221 | tar -czf "${name}-${VERSION}.tar.gz" -C "$folder" .
222 | fi
223 | if [[ "$name" != "vector-android-aar" ]]; then
224 | (cd "$folder" && zip -rq "../../${name}-${VERSION}.zip" .)
225 | else
226 | cp "$folder"/*.aar "${name}-${VERSION}.aar"
227 | fi
228 | fi
229 | done
230 |
231 | - name: release tag version from sqlite-vector.h
232 | id: tag
233 | run: |
234 | VERSION=$(make version)
235 | if [[ "$VERSION" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
236 | LATEST_RELEASE=$(curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" https://api.github.com/repos/${{ github.repository }}/releases/latest)
237 | LATEST=$(echo "$LATEST_RELEASE" | jq -r '.name')
238 |
239 | # Check artifact sizes against previous release
240 | if [ -n "$LATEST" ] && [ "$LATEST" != "null" ]; then
241 | echo "Checking artifact sizes against previous release: $LATEST"
242 | FAILED=0
243 |
244 | for artifact in vector-*-${VERSION}.*; do
245 | if [ ! -f "$artifact" ]; then
246 | continue
247 | fi
248 |
249 | # Get current artifact size
250 | NEW_SIZE=$(stat -c%s "$artifact" 2>/dev/null || stat -f%z "$artifact")
251 |
252 | # Get artifact name for previous release
253 | ARTIFACT_NAME=$(echo "$artifact" | sed "s/${VERSION}/${LATEST}/")
254 |
255 | # Get previous artifact size from GitHub API
256 | OLD_SIZE=$(echo "$LATEST_RELEASE" | jq -r ".assets[] | select(.name == \"$(basename "$ARTIFACT_NAME")\") | .size")
257 |
258 | if [ -z "$OLD_SIZE" ] || [ "$OLD_SIZE" = "null" ]; then
259 | echo "⚠️ Previous artifact not found: $(basename "$ARTIFACT_NAME"), skipping comparison"
260 | continue
261 | fi
262 |
263 | # Calculate percentage increase
264 | INCREASE=$(awk "BEGIN {printf \"%.2f\", (($NEW_SIZE - $OLD_SIZE) / $OLD_SIZE) * 100}")
265 |
266 | echo "📦 $artifact: $OLD_SIZE → $NEW_SIZE bytes (${INCREASE}% change)"
267 |
268 | # Check if increase is more than 5%
269 | if (( $(echo "$INCREASE > 5" | bc -l) )); then
270 | if [ "$GITHUB_EVENT_NAME" = "workflow_dispatch" ]; then
271 | echo "⚠️ WARNING: $artifact size increased by ${INCREASE}% (limit: 5%)"
272 | else
273 | echo "❌ ERROR: $artifact size increased by ${INCREASE}% (limit: 5%)"
274 | FAILED=1
275 | fi
276 | fi
277 | done
278 |
279 | if [ $FAILED -eq 1 ]; then
280 | echo ""
281 | echo "❌ One or more artifacts exceeded the 5% size increase limit"
282 | exit 1
283 | fi
284 |
285 | echo "✅ All artifacts within 5% size increase limit"
286 | fi
287 |
288 | if [[ "$VERSION" != "$LATEST" || "$GITHUB_EVENT_NAME" == "workflow_dispatch" ]]; then
289 | echo "version=$VERSION" >> $GITHUB_OUTPUT
290 | else
291 | echo "::warning file=src/sqlite-vector.h::To release a new version, please update the SQLITE_VECTOR_VERSION in src/sqlite-vector.h to be different than the latest $LATEST"
292 | fi
293 | exit 0
294 | fi
295 | echo "❌ SQLITE_VECTOR_VERSION not found in sqlite-vector.h"
296 | exit 1
297 |
298 | - uses: actions/checkout@v4.2.2
299 | if: steps.tag.outputs.version != ''
300 | with:
301 | repository: sqliteai/sqlite-wasm
302 | path: sqlite-wasm
303 | submodules: recursive
304 | token: ${{ secrets.PAT }}
305 |
306 | - name: release sqlite-wasm
307 | if: steps.tag.outputs.version != ''
308 | run: |
309 | cd sqlite-wasm
310 | git config --global user.email "$GITHUB_ACTOR@users.noreply.github.com"
311 | git config --global user.name "$GITHUB_ACTOR"
312 | cd modules/sqlite-vector
313 | git checkout ${{ github.sha }}
314 | cd ../..
315 | git add modules/sqlite-vector
316 | PKG=sqlite-wasm/package.json
317 | TMP=sqlite-wasm/package.tmp.json
318 | jq --arg version "$(cat modules/sqlite/VERSION)-sync.$(cd modules/sqlite-sync && make version)-vector.$(cd modules/sqlite-vector && make version)" '.version = $version' "$PKG" > "$TMP" && mv "$TMP" "$PKG"
319 | git add "$PKG"
320 | git commit -m "Bump sqlite-vector version to ${{ steps.tag.outputs.version }}"
321 | git push origin main
322 |
323 | - uses: actions/setup-java@v4
324 | if: steps.tag.outputs.version != ''
325 | with:
326 | distribution: 'temurin'
327 | java-version: '17'
328 |
329 | - name: release android aar to maven central
330 | if: steps.tag.outputs.version != ''
331 | run: cd packages/android && ./gradlew publishAggregationToCentralPortal -PSIGNING_KEY="${{ secrets.SIGNING_KEY }}" -PSIGNING_PASSWORD="${{ secrets.SIGNING_PASSWORD }}" -PSONATYPE_USERNAME="${{ secrets.MAVEN_CENTRAL_USERNAME }}" -PSONATYPE_PASSWORD="${{ secrets.MAVEN_CENTRAL_TOKEN }}" -PVERSION="${{ steps.tag.outputs.version }}" -PAAR_PATH="../../artifacts/vector-android-aar/vector.aar"
332 |
333 | - uses: actions/setup-node@v4
334 | if: steps.tag.outputs.version != ''
335 | with:
336 | node-version: '20'
337 | registry-url: 'https://registry.npmjs.org'
338 |
339 | - name: update npm # npm 11.5.1 is required for OIDC auth https://docs.npmjs.com/trusted-publishers
340 | run: npm install -g npm@11.5.1
341 |
342 | - name: build and publish npm packages
343 | if: steps.tag.outputs.version != ''
344 | run: |
345 | cd packages/node
346 |
347 | # Update version in package.json
348 | echo "Updating versions to ${{ steps.tag.outputs.version }}..."
349 |
350 | # Update package.json
351 | jq --arg version "${{ steps.tag.outputs.version }}" \
352 | '.version = $version | .optionalDependencies = (.optionalDependencies | with_entries(.value = $version))' \
353 | package.json > package.tmp.json && mv package.tmp.json package.json
354 |
355 | echo "✓ Updated package.json to version ${{ steps.tag.outputs.version }}"
356 |
357 | # Generate platform packages
358 | echo "Generating platform packages..."
359 | node generate-platform-packages.js "${{ steps.tag.outputs.version }}" "../../artifacts" "./platform-packages"
360 | echo "✓ Generated 7 platform packages"
361 | ls -la platform-packages/
362 |
363 | # Build main package
364 | echo "Building main package..."
365 | npm install
366 | npm run build
367 | npm test
368 | echo "✓ Main package built and tested"
369 |
370 | # Publish platform packages
371 | echo "Publishing platform packages to npm..."
372 | cd platform-packages
373 | for platform_dir in */; do
374 | platform_name=$(basename "$platform_dir")
375 | echo " Publishing @sqliteai/sqlite-vector-${platform_name}..."
376 | cd "$platform_dir"
377 | npm publish --provenance --access public
378 | cd ..
379 | echo " ✓ Published @sqliteai/sqlite-vector-${platform_name}"
380 | done
381 | cd ..
382 |
383 | # Publish main package
384 | echo "Publishing main package to npm..."
385 | npm publish --provenance --access public
386 | echo "✓ Published @sqliteai/sqlite-vector@${{ steps.tag.outputs.version }}"
387 |
388 | echo ""
389 | echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
390 | echo "✅ Successfully published 8 packages to npm"
391 | echo " Main: @sqliteai/sqlite-vector@${{ steps.tag.outputs.version }}"
392 | echo " Platform packages: 7"
393 | echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
394 |
395 | - uses: softprops/action-gh-release@v2.2.1
396 | if: steps.tag.outputs.version != ''
397 | with:
398 | body: |
399 | # Packages
400 |
401 | [**Node**](https://www.npmjs.com/package/@sqliteai/sqlite-vector): `npm install @sqliteai/sqlite-vector`
402 | [**WASM**](https://www.npmjs.com/package/@sqliteai/sqlite-wasm): `npm install @sqliteai/sqlite-wasm`
403 | [**Android**](https://central.sonatype.com/artifact/ai.sqlite/vector): `ai.sqlite:vector:${{ steps.tag.outputs.version }}`
404 | [**Python**](https://pypi.org/project/sqliteai-vector): `pip install sqliteai-vector`
405 | [**Swift**](https://github.com/sqliteai/sqlite-vector#swift-package): [Installation Guide](https://github.com/sqliteai/sqlite-vector#swift-package)
406 |
407 | ---
408 |
409 | generate_release_notes: true
410 | tag_name: ${{ steps.tag.outputs.version }}
411 | files: vector-*-${{ steps.tag.outputs.version }}.*
412 | make_latest: true
413 |
--------------------------------------------------------------------------------
/libs/fp16/fp16.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #ifndef FP16_FP16_H
3 | #define FP16_FP16_H
4 |
5 | #if defined(__cplusplus) && (__cplusplus >= 201103L)
6 | #include
7 | #include
8 | #elif !defined(__OPENCL_VERSION__)
9 | #include
10 | #include
11 | #endif
12 |
13 | #include
14 | #include
15 |
16 | #if defined(_MSC_VER)
17 | #include
18 | #endif
19 | #if defined(__F16C__) && FP16_USE_NATIVE_CONVERSION && !FP16_USE_FLOAT16_TYPE && !FP16_USE_FP16_TYPE
20 | #include
21 | #endif
22 | #if (defined(__aarch64__) || defined(_M_ARM64)) && FP16_USE_NATIVE_CONVERSION && !FP16_USE_FLOAT16_TYPE && !FP16_USE_FP16_TYPE
23 | #include
24 | #endif
25 |
26 |
27 | /*
28 | * Convert a 16-bit floating-point number in IEEE half-precision format, in bit representation, to
29 | * a 32-bit floating-point number in IEEE single-precision format, in bit representation.
30 | *
31 | * @note The implementation doesn't use any floating-point operations.
32 | */
33 | static inline uint32_t fp16_ieee_to_fp32_bits(uint16_t h) {
34 | /*
35 | * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word:
36 | * +---+-----+------------+-------------------+
37 | * | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000|
38 | * +---+-----+------------+-------------------+
39 | * Bits 31 26-30 16-25 0-15
40 | *
41 | * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 - zero bits.
42 | */
43 | const uint32_t w = (uint32_t) h << 16;
44 | /*
45 | * Extract the sign of the input number into the high bit of the 32-bit word:
46 | *
47 | * +---+----------------------------------+
48 | * | S |0000000 00000000 00000000 00000000|
49 | * +---+----------------------------------+
50 | * Bits 31 0-31
51 | */
52 | const uint32_t sign = w & UINT32_C(0x80000000);
53 | /*
54 | * Extract mantissa and biased exponent of the input number into the bits 0-30 of the 32-bit word:
55 | *
56 | * +---+-----+------------+-------------------+
57 | * | 0 |EEEEE|MM MMMM MMMM|0000 0000 0000 0000|
58 | * +---+-----+------------+-------------------+
59 | * Bits 30 27-31 17-26 0-16
60 | */
61 | const uint32_t nonsign = w & UINT32_C(0x7FFFFFFF);
62 | /*
63 | * Renorm shift is the number of bits to shift mantissa left to make the half-precision number normalized.
64 | * If the initial number is normalized, some of its high 6 bits (sign == 0 and 5-bit exponent) equals one.
65 | * In this case renorm_shift == 0. If the number is denormalize, renorm_shift > 0. Note that if we shift
66 | * denormalized nonsign by renorm_shift, the unit bit of mantissa will shift into exponent, turning the
67 | * biased exponent into 1, and making mantissa normalized (i.e. without leading 1).
68 | */
69 | #ifdef _MSC_VER
70 | unsigned long nonsign_bsr;
71 | _BitScanReverse(&nonsign_bsr, (unsigned long) nonsign);
72 | uint32_t renorm_shift = (uint32_t) nonsign_bsr ^ 31;
73 | #else
74 | uint32_t renorm_shift = __builtin_clz(nonsign);
75 | #endif
76 | renorm_shift = renorm_shift > 5 ? renorm_shift - 5 : 0;
77 | /*
78 | * Iff half-precision number has exponent of 15, the addition overflows it into bit 31,
79 | * and the subsequent shift turns the high 9 bits into 1. Thus
80 | * inf_nan_mask ==
81 | * 0x7F800000 if the half-precision number had exponent of 15 (i.e. was NaN or infinity)
82 | * 0x00000000 otherwise
83 | */
84 | const int32_t inf_nan_mask = ((int32_t) (nonsign + 0x04000000) >> 8) & INT32_C(0x7F800000);
85 | /*
86 | * Iff nonsign is 0, it overflows into 0xFFFFFFFF, turning bit 31 into 1. Otherwise, bit 31 remains 0.
87 | * The signed shift right by 31 broadcasts bit 31 into all bits of the zero_mask. Thus
88 | * zero_mask ==
89 | * 0xFFFFFFFF if the half-precision number was zero (+0.0h or -0.0h)
90 | * 0x00000000 otherwise
91 | */
92 | const int32_t zero_mask = (int32_t) (nonsign - 1) >> 31;
93 | /*
94 | * 1. Shift nonsign left by renorm_shift to normalize it (if the input was denormal)
95 | * 2. Shift nonsign right by 3 so the exponent (5 bits originally) becomes an 8-bit field and 10-bit mantissa
96 | * shifts into the 10 high bits of the 23-bit mantissa of IEEE single-precision number.
97 | * 3. Add 0x70 to the exponent (starting at bit 23) to compensate the different in exponent bias
98 | * (0x7F for single-precision number less 0xF for half-precision number).
99 | * 4. Subtract renorm_shift from the exponent (starting at bit 23) to account for renormalization. As renorm_shift
100 | * is less than 0x70, this can be combined with step 3.
101 | * 5. Binary OR with inf_nan_mask to turn the exponent into 0xFF if the input was NaN or infinity.
102 | * 6. Binary ANDNOT with zero_mask to turn the mantissa and exponent into zero if the input was zero.
103 | * 7. Combine with the sign of the input number.
104 | */
105 | return sign | ((((nonsign << renorm_shift >> 3) + ((0x70 - renorm_shift) << 23)) | inf_nan_mask) & ~zero_mask);
106 | }
107 |
108 | /*
109 | * Convert a 16-bit floating-point number in IEEE half-precision format, in bit representation, to
110 | * a 32-bit floating-point number in IEEE single-precision format.
111 | *
112 | * @note The implementation relies on IEEE-like (no assumption about rounding mode and no operations on denormals)
113 | * floating-point operations and bitcasts between integer and floating-point variables.
114 | */
115 | static inline float fp16_ieee_to_fp32_value(uint16_t h) {
116 | #if FP16_USE_NATIVE_CONVERSION
117 | #if FP16_USE_FLOAT16_TYPE
118 | union {
119 | uint16_t as_bits;
120 | _Float16 as_value;
121 | } fp16 = { h };
122 | return (float) fp16.as_value;
123 | #elif FP16_USE_FP16_TYPE
124 | union {
125 | uint16_t as_bits;
126 | __fp16 as_value;
127 | } fp16 = { h };
128 | return (float) fp16.as_value;
129 | #else
130 | #if (defined(__INTEL_COMPILER) || defined(__GNUC__)) && defined(__F16C__)
131 | return _cvtsh_ss((unsigned short) h);
132 | #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && defined(__AVX2__)
133 | return _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128((int) (unsigned int) h)));
134 | #elif defined(_M_ARM64) || defined(__aarch64__)
135 | return vgetq_lane_f32(vcvt_f32_f16(vreinterpret_f16_u16(vdup_n_u16(h))), 0);
136 | #else
137 | #error "Archtecture- or compiler-specific implementation required"
138 | #endif
139 | #endif
140 | #else
141 | /*
142 | * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word:
143 | * +---+-----+------------+-------------------+
144 | * | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000|
145 | * +---+-----+------------+-------------------+
146 | * Bits 31 26-30 16-25 0-15
147 | *
148 | * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 - zero bits.
149 | */
150 | const uint32_t w = (uint32_t) h << 16;
151 | /*
152 | * Extract the sign of the input number into the high bit of the 32-bit word:
153 | *
154 | * +---+----------------------------------+
155 | * | S |0000000 00000000 00000000 00000000|
156 | * +---+----------------------------------+
157 | * Bits 31 0-31
158 | */
159 | const uint32_t sign = w & UINT32_C(0x80000000);
160 | /*
161 | * Extract mantissa and biased exponent of the input number into the high bits of the 32-bit word:
162 | *
163 | * +-----+------------+---------------------+
164 | * |EEEEE|MM MMMM MMMM|0 0000 0000 0000 0000|
165 | * +-----+------------+---------------------+
166 | * Bits 27-31 17-26 0-16
167 | */
168 | const uint32_t two_w = w + w;
169 |
170 | /*
171 | * Shift mantissa and exponent into bits 23-28 and bits 13-22 so they become mantissa and exponent
172 | * of a single-precision floating-point number:
173 | *
174 | * S|Exponent | Mantissa
175 | * +-+---+-----+------------+----------------+
176 | * |0|000|EEEEE|MM MMMM MMMM|0 0000 0000 0000|
177 | * +-+---+-----+------------+----------------+
178 | * Bits | 23-31 | 0-22
179 | *
180 | * Next, there are some adjustments to the exponent:
181 | * - The exponent needs to be corrected by the difference in exponent bias between single-precision and half-precision
182 | * formats (0x7F - 0xF = 0x70)
183 | * - Inf and NaN values in the inputs should become Inf and NaN values after conversion to the single-precision number.
184 | * Therefore, if the biased exponent of the half-precision input was 0x1F (max possible value), the biased exponent
185 | * of the single-precision output must be 0xFF (max possible value). We do this correction in two steps:
186 | * - First, we adjust the exponent by (0xFF - 0x1F) = 0xE0 (see exp_offset below) rather than by 0x70 suggested
187 | * by the difference in the exponent bias (see above).
188 | * - Then we multiply the single-precision result of exponent adjustment by 2**(-112) to reverse the effect of
189 | * exponent adjustment by 0xE0 less the necessary exponent adjustment by 0x70 due to difference in exponent bias.
190 | * The floating-point multiplication hardware would ensure than Inf and NaN would retain their value on at least
191 | * partially IEEE754-compliant implementations.
192 | *
193 | * Note that the above operations do not handle denormal inputs (where biased exponent == 0). However, they also do not
194 | * operate on denormal inputs, and do not produce denormal results.
195 | */
196 | const uint32_t exp_offset = UINT32_C(0xE0) << 23;
197 | #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
198 | const float exp_scale = 0x1.0p-112f;
199 | #else
200 | const float exp_scale = fp32_from_bits(UINT32_C(0x7800000));
201 | #endif
202 | const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale;
203 |
204 | /*
205 | * Convert denormalized half-precision inputs into single-precision results (always normalized).
206 | * Zero inputs are also handled here.
207 | *
208 | * In a denormalized number the biased exponent is zero, and mantissa has on-zero bits.
209 | * First, we shift mantissa into bits 0-9 of the 32-bit word.
210 | *
211 | * zeros | mantissa
212 | * +---------------------------+------------+
213 | * |0000 0000 0000 0000 0000 00|MM MMMM MMMM|
214 | * +---------------------------+------------+
215 | * Bits 10-31 0-9
216 | *
217 | * Now, remember that denormalized half-precision numbers are represented as:
218 | * FP16 = mantissa * 2**(-24).
219 | * The trick is to construct a normalized single-precision number with the same mantissa and thehalf-precision input
220 | * and with an exponent which would scale the corresponding mantissa bits to 2**(-24).
221 | * A normalized single-precision floating-point number is represented as:
222 | * FP32 = (1 + mantissa * 2**(-23)) * 2**(exponent - 127)
223 | * Therefore, when the biased exponent is 126, a unit change in the mantissa of the input denormalized half-precision
224 | * number causes a change of the constructud single-precision number by 2**(-24), i.e. the same ammount.
225 | *
226 | * The last step is to adjust the bias of the constructed single-precision number. When the input half-precision number
227 | * is zero, the constructed single-precision number has the value of
228 | * FP32 = 1 * 2**(126 - 127) = 2**(-1) = 0.5
229 | * Therefore, we need to subtract 0.5 from the constructed single-precision number to get the numerical equivalent of
230 | * the input half-precision number.
231 | */
232 | const uint32_t magic_mask = UINT32_C(126) << 23;
233 | const float magic_bias = 0.5f;
234 | const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias;
235 |
236 | /*
237 | * - Choose either results of conversion of input as a normalized number, or as a denormalized number, depending on the
238 | * input exponent. The variable two_w contains input exponent in bits 27-31, therefore if its smaller than 2**27, the
239 | * input is either a denormal number, or zero.
240 | * - Combine the result of conversion of exponent and mantissa with the sign of the input number.
241 | */
242 | const uint32_t denormalized_cutoff = UINT32_C(1) << 27;
243 | const uint32_t result = sign |
244 | (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value));
245 | return fp32_from_bits(result);
246 | #endif
247 | }
248 |
249 | /*
250 | * Convert a 32-bit floating-point number in IEEE single-precision format to a 16-bit floating-point number in
251 | * IEEE half-precision format, in bit representation.
252 | *
253 | * @note The implementation relies on IEEE-like (no assumption about rounding mode and no operations on denormals)
254 | * floating-point operations and bitcasts between integer and floating-point variables.
255 | */
256 | static inline uint16_t fp16_ieee_from_fp32_value(float f) {
257 | #if FP16_USE_NATIVE_CONVERSION
258 | #if FP16_USE_FLOAT16_TYPE
259 | union {
260 | _Float16 as_value;
261 | uint16_t as_bits;
262 | } fp16 = { (_Float16) f };
263 | return fp16.as_bits;
264 | #elif FP16_USE_FP16_TYPE
265 | union {
266 | __fp16 as_value;
267 | uint16_t as_bits;
268 | } fp16 = { (__fp16) f };
269 | return fp16.as_bits;
270 | #else
271 | #if (defined(__INTEL_COMPILER) || defined(__GNUC__)) && defined(__F16C__)
272 | return _cvtss_sh(f, _MM_FROUND_CUR_DIRECTION);
273 | #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && defined(__AVX2__)
274 | return (uint16_t) _mm_cvtsi128_si32(_mm_cvtps_ph(_mm_set_ss(f), _MM_FROUND_CUR_DIRECTION));
275 | #elif defined(_M_ARM64) || defined(__aarch64__)
276 | return vget_lane_u16(vcvt_f16_f32(vdupq_n_f32(f)), 0);
277 | #else
278 | #error "Archtecture- or compiler-specific implementation required"
279 | #endif
280 | #endif
281 | #else
282 | #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
283 | const float scale_to_inf = 0x1.0p+112f;
284 | const float scale_to_zero = 0x1.0p-110f;
285 | #else
286 | const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000));
287 | const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000));
288 | #endif
289 | #if defined(_MSC_VER) && defined(_M_IX86_FP) && (_M_IX86_FP == 0) || defined(__GNUC__) && defined(__FLT_EVAL_METHOD__) && (__FLT_EVAL_METHOD__ != 0)
290 | const volatile float saturated_f = fabsf(f) * scale_to_inf;
291 | #else
292 | const float saturated_f = fabsf(f) * scale_to_inf;
293 | #endif
294 | float base = saturated_f * scale_to_zero;
295 |
296 | const uint32_t w = fp32_to_bits(f);
297 | const uint32_t shl1_w = w + w;
298 | const uint32_t sign = w & UINT32_C(0x80000000);
299 | uint32_t bias = shl1_w & UINT32_C(0xFF000000);
300 | if (bias < UINT32_C(0x71000000)) {
301 | bias = UINT32_C(0x71000000);
302 | }
303 |
304 | base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base;
305 | const uint32_t bits = fp32_to_bits(base);
306 | const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00);
307 | const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF);
308 | const uint32_t nonsign = exp_bits + mantissa_bits;
309 | return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign);
310 | #endif
311 | }
312 |
313 | /*
314 | * Convert a 16-bit floating-point number in ARM alternative half-precision format, in bit representation, to
315 | * a 32-bit floating-point number in IEEE single-precision format, in bit representation.
316 | *
317 | * @note The implementation doesn't use any floating-point operations.
318 | */
319 | static inline uint32_t fp16_alt_to_fp32_bits(uint16_t h) {
320 | /*
321 | * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word:
322 | * +---+-----+------------+-------------------+
323 | * | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000|
324 | * +---+-----+------------+-------------------+
325 | * Bits 31 26-30 16-25 0-15
326 | *
327 | * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 - zero bits.
328 | */
329 | const uint32_t w = (uint32_t) h << 16;
330 | /*
331 | * Extract the sign of the input number into the high bit of the 32-bit word:
332 | *
333 | * +---+----------------------------------+
334 | * | S |0000000 00000000 00000000 00000000|
335 | * +---+----------------------------------+
336 | * Bits 31 0-31
337 | */
338 | const uint32_t sign = w & UINT32_C(0x80000000);
339 | /*
340 | * Extract mantissa and biased exponent of the input number into the bits 0-30 of the 32-bit word:
341 | *
342 | * +---+-----+------------+-------------------+
343 | * | 0 |EEEEE|MM MMMM MMMM|0000 0000 0000 0000|
344 | * +---+-----+------------+-------------------+
345 | * Bits 30 27-31 17-26 0-16
346 | */
347 | const uint32_t nonsign = w & UINT32_C(0x7FFFFFFF);
348 | /*
349 | * Renorm shift is the number of bits to shift mantissa left to make the half-precision number normalized.
350 | * If the initial number is normalized, some of its high 6 bits (sign == 0 and 5-bit exponent) equals one.
351 | * In this case renorm_shift == 0. If the number is denormalize, renorm_shift > 0. Note that if we shift
352 | * denormalized nonsign by renorm_shift, the unit bit of mantissa will shift into exponent, turning the
353 | * biased exponent into 1, and making mantissa normalized (i.e. without leading 1).
354 | */
355 | #ifdef _MSC_VER
356 | unsigned long nonsign_bsr;
357 | _BitScanReverse(&nonsign_bsr, (unsigned long) nonsign);
358 | uint32_t renorm_shift = (uint32_t) nonsign_bsr ^ 31;
359 | #else
360 | uint32_t renorm_shift = __builtin_clz(nonsign);
361 | #endif
362 | renorm_shift = renorm_shift > 5 ? renorm_shift - 5 : 0;
363 | /*
364 | * Iff nonsign is 0, it overflows into 0xFFFFFFFF, turning bit 31 into 1. Otherwise, bit 31 remains 0.
365 | * The signed shift right by 31 broadcasts bit 31 into all bits of the zero_mask. Thus
366 | * zero_mask ==
367 | * 0xFFFFFFFF if the half-precision number was zero (+0.0h or -0.0h)
368 | * 0x00000000 otherwise
369 | */
370 | const int32_t zero_mask = (int32_t) (nonsign - 1) >> 31;
371 | /*
372 | * 1. Shift nonsign left by renorm_shift to normalize it (if the input was denormal)
373 | * 2. Shift nonsign right by 3 so the exponent (5 bits originally) becomes an 8-bit field and 10-bit mantissa
374 | * shifts into the 10 high bits of the 23-bit mantissa of IEEE single-precision number.
375 | * 3. Add 0x70 to the exponent (starting at bit 23) to compensate the different in exponent bias
376 | * (0x7F for single-precision number less 0xF for half-precision number).
377 | * 4. Subtract renorm_shift from the exponent (starting at bit 23) to account for renormalization. As renorm_shift
378 | * is less than 0x70, this can be combined with step 3.
379 | * 5. Binary ANDNOT with zero_mask to turn the mantissa and exponent into zero if the input was zero.
380 | * 6. Combine with the sign of the input number.
381 | */
382 | return sign | (((nonsign << renorm_shift >> 3) + ((0x70 - renorm_shift) << 23)) & ~zero_mask);
383 | }
384 |
385 | /*
386 | * Convert a 16-bit floating-point number in ARM alternative half-precision format, in bit representation, to
387 | * a 32-bit floating-point number in IEEE single-precision format.
388 | *
389 | * @note The implementation relies on IEEE-like (no assumption about rounding mode and no operations on denormals)
390 | * floating-point operations and bitcasts between integer and floating-point variables.
391 | */
392 | static inline float fp16_alt_to_fp32_value(uint16_t h) {
393 | /*
394 | * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word:
395 | * +---+-----+------------+-------------------+
396 | * | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000|
397 | * +---+-----+------------+-------------------+
398 | * Bits 31 26-30 16-25 0-15
399 | *
400 | * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 - zero bits.
401 | */
402 | const uint32_t w = (uint32_t) h << 16;
403 | /*
404 | * Extract the sign of the input number into the high bit of the 32-bit word:
405 | *
406 | * +---+----------------------------------+
407 | * | S |0000000 00000000 00000000 00000000|
408 | * +---+----------------------------------+
409 | * Bits 31 0-31
410 | */
411 | const uint32_t sign = w & UINT32_C(0x80000000);
412 | /*
413 | * Extract mantissa and biased exponent of the input number into the high bits of the 32-bit word:
414 | *
415 | * +-----+------------+---------------------+
416 | * |EEEEE|MM MMMM MMMM|0 0000 0000 0000 0000|
417 | * +-----+------------+---------------------+
418 | * Bits 27-31 17-26 0-16
419 | */
420 | const uint32_t two_w = w + w;
421 |
422 | /*
423 | * Shift mantissa and exponent into bits 23-28 and bits 13-22 so they become mantissa and exponent
424 | * of a single-precision floating-point number:
425 | *
426 | * S|Exponent | Mantissa
427 | * +-+---+-----+------------+----------------+
428 | * |0|000|EEEEE|MM MMMM MMMM|0 0000 0000 0000|
429 | * +-+---+-----+------------+----------------+
430 | * Bits | 23-31 | 0-22
431 | *
432 | * Next, the exponent is adjusted for the difference in exponent bias between single-precision and half-precision
433 | * formats (0x7F - 0xF = 0x70). This operation never overflows or generates non-finite values, as the largest
434 | * half-precision exponent is 0x1F and after the adjustment is can not exceed 0x8F < 0xFE (largest single-precision
435 | * exponent for non-finite values).
436 | *
437 | * Note that this operation does not handle denormal inputs (where biased exponent == 0). However, they also do not
438 | * operate on denormal inputs, and do not produce denormal results.
439 | */
440 | const uint32_t exp_offset = UINT32_C(0x70) << 23;
441 | const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset);
442 |
443 | /*
444 | * Convert denormalized half-precision inputs into single-precision results (always normalized).
445 | * Zero inputs are also handled here.
446 | *
447 | * In a denormalized number the biased exponent is zero, and mantissa has on-zero bits.
448 | * First, we shift mantissa into bits 0-9 of the 32-bit word.
449 | *
450 | * zeros | mantissa
451 | * +---------------------------+------------+
452 | * |0000 0000 0000 0000 0000 00|MM MMMM MMMM|
453 | * +---------------------------+------------+
454 | * Bits 10-31 0-9
455 | *
456 | * Now, remember that denormalized half-precision numbers are represented as:
457 | * FP16 = mantissa * 2**(-24).
458 | * The trick is to construct a normalized single-precision number with the same mantissa and thehalf-precision input
459 | * and with an exponent which would scale the corresponding mantissa bits to 2**(-24).
460 | * A normalized single-precision floating-point number is represented as:
461 | * FP32 = (1 + mantissa * 2**(-23)) * 2**(exponent - 127)
462 | * Therefore, when the biased exponent is 126, a unit change in the mantissa of the input denormalized half-precision
463 | * number causes a change of the constructud single-precision number by 2**(-24), i.e. the same ammount.
464 | *
465 | * The last step is to adjust the bias of the constructed single-precision number. When the input half-precision number
466 | * is zero, the constructed single-precision number has the value of
467 | * FP32 = 1 * 2**(126 - 127) = 2**(-1) = 0.5
468 | * Therefore, we need to subtract 0.5 from the constructed single-precision number to get the numerical equivalent of
469 | * the input half-precision number.
470 | */
471 | const uint32_t magic_mask = UINT32_C(126) << 23;
472 | const float magic_bias = 0.5f;
473 | const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias;
474 |
475 | /*
476 | * - Choose either results of conversion of input as a normalized number, or as a denormalized number, depending on the
477 | * input exponent. The variable two_w contains input exponent in bits 27-31, therefore if its smaller than 2**27, the
478 | * input is either a denormal number, or zero.
479 | * - Combine the result of conversion of exponent and mantissa with the sign of the input number.
480 | */
481 | const uint32_t denormalized_cutoff = UINT32_C(1) << 27;
482 | const uint32_t result = sign |
483 | (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value));
484 | return fp32_from_bits(result);
485 | }
486 |
487 | /*
488 | * Convert a 32-bit floating-point number in IEEE single-precision format to a 16-bit floating-point number in
489 | * ARM alternative half-precision format, in bit representation.
490 | *
491 | * @note The implementation relies on IEEE-like (no assumption about rounding mode and no operations on denormals)
492 | * floating-point operations and bitcasts between integer and floating-point variables.
493 | */
494 | static inline uint16_t fp16_alt_from_fp32_value(float f) {
495 | const uint32_t w = fp32_to_bits(f);
496 | const uint32_t sign = w & UINT32_C(0x80000000);
497 | const uint32_t shl1_w = w + w;
498 |
499 | const uint32_t shl1_max_fp16_fp32 = UINT32_C(0x8FFFC000);
500 | const uint32_t shl1_base = shl1_w > shl1_max_fp16_fp32 ? shl1_max_fp16_fp32 : shl1_w;
501 | uint32_t shl1_bias = shl1_base & UINT32_C(0xFF000000);
502 | const uint32_t exp_difference = 23 - 10;
503 | const uint32_t shl1_bias_min = (127 - 1 - exp_difference) << 24;
504 | if (shl1_bias < shl1_bias_min) {
505 | shl1_bias = shl1_bias_min;
506 | }
507 |
508 | const float bias = fp32_from_bits((shl1_bias >> 1) + ((exp_difference + 2) << 23));
509 | const float base = fp32_from_bits((shl1_base >> 1) + (2 << 23)) + bias;
510 |
511 | const uint32_t exp_f = fp32_to_bits(base) >> 13;
512 | return (sign >> 16) | ((exp_f & UINT32_C(0x00007C00)) + (fp32_to_bits(base) & UINT32_C(0x00000FFF)));
513 | }
514 |
515 | #endif /* FP16_FP16_H */
516 |
--------------------------------------------------------------------------------
/src/distance-cpu.c:
--------------------------------------------------------------------------------
1 | //
2 | // distance-cpu.c
3 | // sqlitevector
4 | //
5 | // Created by Marco Bambini on 20/06/25.
6 | //
7 |
8 | #include "distance-cpu.h"
9 |
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 |
16 | #include "distance-neon.h"
17 | #include "distance-sse2.h"
18 | #include "distance-avx2.h"
19 |
20 | char *distance_backend_name = "CPU";
21 | distance_function_t dispatch_distance_table[VECTOR_DISTANCE_MAX][VECTOR_TYPE_MAX] = {0};
22 |
23 | #define LASSQ_UPDATE(ad_) do { \
24 | double _ad = (ad_); \
25 | if (_ad != 0.0) { \
26 | if (scale < _ad) { \
27 | double r = scale / _ad; \
28 | ssq = 1.0 + ssq * (r * r); \
29 | scale = _ad; \
30 | } else { \
31 | double r = _ad / scale; \
32 | ssq += r * r; \
33 | } \
34 | } \
35 | } while (0)
36 |
37 | // MARK: FLOAT32 -
38 |
39 | float float32_distance_l2_impl_cpu (const void *v1, const void *v2, int n, bool use_sqrt) {
40 | const float *a = (const float *)v1;
41 | const float *b = (const float *)v2;
42 |
43 | float sum_sq = 0.0f;
44 | int i = 0;
45 |
46 | if (n >= 4) {
47 | // unroll the loop 4 times
48 | for (; i <= n - 4; i += 4) {
49 | float d0 = a[i] - b[i];
50 | float d1 = a[i+1] - b[i+1];
51 | float d2 = a[i+2] - b[i+2];
52 | float d3 = a[i+3] - b[i+3];
53 | sum_sq += d0*d0 + d1*d1 + d2*d2 + d3*d3;
54 | }
55 | }
56 |
57 | // tail loop
58 | for (; i < n; i++) {
59 | float d = a[i] - b[i];
60 | sum_sq += d * d;
61 | }
62 |
63 | return use_sqrt ? sqrtf(sum_sq) : sum_sq;
64 | }
65 |
66 | float float32_distance_l2_cpu (const void *v1, const void *v2, int n) {
67 | return float32_distance_l2_impl_cpu(v1, v2, n, true);
68 | }
69 |
70 | float float32_distance_l2_squared_cpu (const void *v1, const void *v2, int n) {
71 | return float32_distance_l2_impl_cpu(v1, v2, n, false);
72 | }
73 |
74 | float float32_distance_cosine_cpu (const void *v1, const void *v2, int n) {
75 | const float *a = (const float *)v1;
76 | const float *b = (const float *)v2;
77 |
78 | float dot = 0.0f;
79 | float norm_x = 0.0f;
80 | float norm_y = 0.0f;
81 | int i = 0;
82 |
83 | // unroll the loop 4 times
84 | for (; i <= n - 4; i += 4) {
85 | float x0 = a[i], y0 = b[i];
86 | float x1 = a[i + 1], y1 = b[i + 1];
87 | float x2 = a[i + 2], y2 = b[i + 2];
88 | float x3 = a[i + 3], y3 = b[i + 3];
89 |
90 | dot += x0*y0 + x1*y1 + x2*y2 + x3*y3;
91 | norm_x += x0*x0 + x1*x1 + x2*x2 + x3*x3;
92 | norm_y += y0*y0 + y1*y1 + y2*y2 + y3*y3;
93 | }
94 |
95 | // tail loop
96 | for (; i < n; i++) {
97 | float x = a[i];
98 | float y = b[i];
99 | dot += x * y;
100 | norm_x += x * x;
101 | norm_y += y * y;
102 | }
103 |
104 | // max distance if one vector is zero
105 | if (norm_x == 0.0f || norm_y == 0.0f) {
106 | return 1.0f;
107 | }
108 |
109 | return 1.0f - (dot / (sqrtf(norm_x) * sqrtf(norm_y)));
110 | }
111 |
112 | float float32_distance_dot_cpu (const void *v1, const void *v2, int n) {
113 | const float *a = (const float *)v1;
114 | const float *b = (const float *)v2;
115 |
116 | float dot = 0.0f;
117 | int i = 0;
118 |
119 | // unroll the loop 4 times
120 | for (; i <= n - 4; i += 4) {
121 | float x0 = a[i], y0 = b[i];
122 | float x1 = a[i + 1], y1 = b[i + 1];
123 | float x2 = a[i + 2], y2 = b[i + 2];
124 | float x3 = a[i + 3], y3 = b[i + 3];
125 | dot += x0*y0 + x1*y1 + x2*y2 + x3*y3;
126 | }
127 |
128 | // tail loop
129 | for (; i < n; i++) {
130 | float x = a[i];
131 | float y = b[i];
132 | dot += x * y;
133 | }
134 |
135 | return -dot;
136 | }
137 |
138 | float float32_distance_l1_cpu (const void *v1, const void *v2, int n) {
139 | const float *a = (const float *)v1;
140 | const float *b = (const float *)v2;
141 |
142 | float sum = 0.0f;
143 | int i = 0;
144 |
145 | // unroll the loop 4 times
146 | for (; i <= n - 4; i += 4) {
147 | sum += fabsf(a[i] - b[i]);
148 | sum += fabsf(a[i + 1] - b[i + 1]);
149 | sum += fabsf(a[i + 2] - b[i + 2]);
150 | sum += fabsf(a[i + 3] - b[i + 3]);
151 | }
152 |
153 | // tail loop
154 | for (; i < n; ++i) {
155 | sum += fabsf(a[i] - b[i]);
156 | }
157 |
158 | return sum;
159 | }
160 |
161 | // MARK: - BFLOAT16 -
162 |
163 | // Overflow/underflow-safe L2 using LASSQ, unrolled by 4
164 | static inline float bfloat16_distance_l2_impl_cpu (const void *v1, const void *v2, int n, bool use_sqrt) {
165 | const uint16_t *a = (const uint16_t *)v1;
166 | const uint16_t *b = (const uint16_t *)v2;
167 |
168 | double scale = 0.0;
169 | double ssq = 1.0;
170 | int i = 0;
171 |
172 | // unrolled main loop (x4)
173 | for (; i <= n - 4; i += 4) {
174 | float x0 = bfloat16_to_float32(a[i ]), y0 = bfloat16_to_float32(b[i ]);
175 | float x1 = bfloat16_to_float32(a[i + 1]), y1 = bfloat16_to_float32(b[i + 1]);
176 | float x2 = bfloat16_to_float32(a[i + 2]), y2 = bfloat16_to_float32(b[i + 2]);
177 | float x3 = bfloat16_to_float32(a[i + 3]), y3 = bfloat16_to_float32(b[i + 3]);
178 |
179 | float d0f = x0 - y0, d1f = x1 - y1, d2f = x2 - y2, d3f = x3 - y3;
180 |
181 | // If any difference is NaN, ignore that lane (treat contribution as 0)
182 | if (isinf(d0f)) return INFINITY; if (!isnan(d0f)) LASSQ_UPDATE(fabs((double)d0f));
183 | if (isinf(d1f)) return INFINITY; if (!isnan(d1f)) LASSQ_UPDATE(fabs((double)d1f));
184 | if (isinf(d2f)) return INFINITY; if (!isnan(d2f)) LASSQ_UPDATE(fabs((double)d2f));
185 | if (isinf(d3f)) return INFINITY; if (!isnan(d3f)) LASSQ_UPDATE(fabs((double)d3f));
186 | }
187 |
188 | for (; i < n; ++i) {
189 | float d = bfloat16_to_float32(a[i]) - bfloat16_to_float32(b[i]);
190 | if (isinf(d)) return INFINITY;
191 | if (!isnan(d)) LASSQ_UPDATE(fabs((double)d));
192 | }
193 |
194 | double sum_sq = (scale == 0.0) ? 0.0 : (scale * scale * ssq);
195 | double out = use_sqrt ? sqrt(sum_sq) : sum_sq;
196 | return (float)out;
197 | }
198 |
199 | float bfloat16_distance_l2_cpu (const void *v1, const void *v2, int n) {
200 | return bfloat16_distance_l2_impl_cpu(v1, v2, n, true);
201 | }
202 |
203 | float bfloat16_distance_l2_squared_cpu (const void *v1, const void *v2, int n) {
204 | return bfloat16_distance_l2_impl_cpu(v1, v2, n, false);
205 | }
206 |
207 | float bfloat16_distance_cosine_cpu(const void *v1, const void *v2, int n) {
208 | const uint16_t *a = (const uint16_t *)v1;
209 | const uint16_t *b = (const uint16_t *)v2;
210 |
211 | float dot = 0.0f, norm_x = 0.0f, norm_y = 0.0f;
212 | int i = 0;
213 |
214 | // unroll the loop 4 times
215 | for (; i <= n - 4; i += 4) {
216 | float x0 = bfloat16_to_float32(a[i ]), y0 = bfloat16_to_float32(b[i ]);
217 | float x1 = bfloat16_to_float32(a[i + 1]), y1 = bfloat16_to_float32(b[i + 1]);
218 | float x2 = bfloat16_to_float32(a[i + 2]), y2 = bfloat16_to_float32(b[i + 2]);
219 | float x3 = bfloat16_to_float32(a[i + 3]), y3 = bfloat16_to_float32(b[i + 3]);
220 |
221 | // accumulate (fmaf may fuse on capable CPUs)
222 | dot = fmaf(x0, y0, dot);
223 | dot = fmaf(x1, y1, dot);
224 | dot = fmaf(x2, y2, dot);
225 | dot = fmaf(x3, y3, dot);
226 |
227 | norm_x = fmaf(x0, x0, norm_x);
228 | norm_x = fmaf(x1, x1, norm_x);
229 | norm_x = fmaf(x2, x2, norm_x);
230 | norm_x = fmaf(x3, x3, norm_x);
231 |
232 | norm_y = fmaf(y0, y0, norm_y);
233 | norm_y = fmaf(y1, y1, norm_y);
234 | norm_y = fmaf(y2, y2, norm_y);
235 | norm_y = fmaf(y3, y3, norm_y);
236 | }
237 |
238 | // tail loop
239 | for (; i < n; ++i) {
240 | float x = bfloat16_to_float32(a[i]);
241 | float y = bfloat16_to_float32(b[i]);
242 | dot = fmaf(x, y, dot);
243 | norm_x = fmaf(x, x, norm_x);
244 | norm_y = fmaf(y, y, norm_y);
245 | }
246 |
247 | // max distance if one vector is zero
248 | if (norm_x == 0.0f || norm_y == 0.0f) {
249 | return 1.0f;
250 | }
251 |
252 | return 1.0f - (dot / (sqrtf(norm_x) * sqrtf(norm_y)));
253 | }
254 |
255 | float bfloat16_distance_dot_cpu (const void *v1, const void *v2, int n) {
256 | const uint16_t *a = (const uint16_t *)v1;
257 | const uint16_t *b = (const uint16_t *)v2;
258 |
259 | float dot = 0.0f;
260 | int i = 0;
261 |
262 | // unroll the loop 4 times
263 | for (; i <= n - 4; i += 4) {
264 | float x0 = bfloat16_to_float32(a[i ]), y0 = bfloat16_to_float32(b[i ]);
265 | float x1 = bfloat16_to_float32(a[i + 1]), y1 = bfloat16_to_float32(b[i + 1]);
266 | float x2 = bfloat16_to_float32(a[i + 2]), y2 = bfloat16_to_float32(b[i + 2]);
267 | float x3 = bfloat16_to_float32(a[i + 3]), y3 = bfloat16_to_float32(b[i + 3]);
268 |
269 | // fmaf often maps to a fused multiply-add, improving precision/speed
270 | dot = fmaf(x0, y0, dot);
271 | dot = fmaf(x1, y1, dot);
272 | dot = fmaf(x2, y2, dot);
273 | dot = fmaf(x3, y3, dot);
274 | }
275 |
276 | // tail loop
277 | for (; i < n; ++i) {
278 | float x = bfloat16_to_float32(a[i]);
279 | float y = bfloat16_to_float32(b[i]);
280 | dot = fmaf(x, y, dot);
281 | }
282 |
283 | return -dot;
284 | }
285 |
286 | float bfloat16_distance_l1_cpu (const void *v1, const void *v2, int n) {
287 | const uint16_t *a = (const uint16_t *)v1;
288 | const uint16_t *b = (const uint16_t *)v2;
289 |
290 | float sum = 0.0f;
291 | int i = 0;
292 |
293 | // unroll the loop 4 times
294 | for (; i <= n - 4; i += 4) {
295 | float a0 = bfloat16_to_float32(a[i ]), b0 = bfloat16_to_float32(b[i ]);
296 | float a1 = bfloat16_to_float32(a[i + 1]), b1 = bfloat16_to_float32(b[i + 1]);
297 | float a2 = bfloat16_to_float32(a[i + 2]), b2 = bfloat16_to_float32(b[i + 2]);
298 | float a3 = bfloat16_to_float32(a[i + 3]), b3 = bfloat16_to_float32(b[i + 3]);
299 |
300 | sum += fabsf(a0 - b0);
301 | sum += fabsf(a1 - b1);
302 | sum += fabsf(a2 - b2);
303 | sum += fabsf(a3 - b3);
304 | }
305 |
306 | // tail loop
307 | for (; i < n; ++i) {
308 | float da = bfloat16_to_float32(a[i]);
309 | float db = bfloat16_to_float32(b[i]);
310 | sum += fabsf(da - db);
311 | }
312 |
313 | return sum;
314 | }
315 |
316 | // MARK: - FLOAT16 -
317 |
318 | static inline float float16_distance_l2_impl_cpu (const void *v1, const void *v2, int n, bool use_sqrt) {
319 | const uint16_t *a = (const uint16_t *)v1; /* float16 bits */
320 | const uint16_t *b = (const uint16_t *)v2;
321 |
322 | double scale = 0.0;
323 | double ssq = 1.0;
324 | int i = 0;
325 |
326 | /* main loop, unrolled by 4 */
327 | for (; i <= n - 4; i += 4) {
328 | uint16_t a0=a[i], a1=a[i+1], a2=a[i+2], a3=a[i+3];
329 | uint16_t b0=b[i], b1=b[i+1], b2=b[i+2], b3=b[i+3];
330 |
331 | /* If any pair involves an infinity not matched with same-signed infinity → +Inf */
332 | if ((f16_is_inf(a0)||f16_is_inf(b0)) && !(f16_is_inf(a0)&&f16_is_inf(b0)&&f16_sign(a0)==f16_sign(b0))) return INFINITY;
333 | if ((f16_is_inf(a1)||f16_is_inf(b1)) && !(f16_is_inf(a1)&&f16_is_inf(b1)&&f16_sign(a1)==f16_sign(b1))) return INFINITY;
334 | if ((f16_is_inf(a2)||f16_is_inf(b2)) && !(f16_is_inf(a2)&&f16_is_inf(b2)&&f16_sign(a2)==f16_sign(b2))) return INFINITY;
335 | if ((f16_is_inf(a3)||f16_is_inf(b3)) && !(f16_is_inf(a3)&&f16_is_inf(b3)&&f16_sign(a3)==f16_sign(b3))) return INFINITY;
336 |
337 | /* NaN lanes contribute 0 */
338 | if (!f16_is_nan(a0) && !f16_is_nan(b0)) { double d = (double)float16_to_float32(a0) - (double)float16_to_float32(b0); LASSQ_UPDATE(fabs(d)); }
339 | if (!f16_is_nan(a1) && !f16_is_nan(b1)) { double d = (double)float16_to_float32(a1) - (double)float16_to_float32(b1); LASSQ_UPDATE(fabs(d)); }
340 | if (!f16_is_nan(a2) && !f16_is_nan(b2)) { double d = (double)float16_to_float32(a2) - (double)float16_to_float32(b2); LASSQ_UPDATE(fabs(d)); }
341 | if (!f16_is_nan(a3) && !f16_is_nan(b3)) { double d = (double)float16_to_float32(a3) - (double)float16_to_float32(b3); LASSQ_UPDATE(fabs(d)); }
342 | }
343 |
344 | /* tail */
345 | for (; i < n; ++i) {
346 | uint16_t ai=a[i], bi=b[i];
347 | if ((f16_is_inf(ai)||f16_is_inf(bi)) && !(f16_is_inf(ai)&&f16_is_inf(bi)&&f16_sign(ai)==f16_sign(bi))) return INFINITY;
348 | if (f16_is_nan(ai) || f16_is_nan(bi)) continue;
349 | double d = (double)float16_to_float32(ai) - (double)float16_to_float32(bi);
350 | LASSQ_UPDATE(fabs(d));
351 | }
352 |
353 | double sum_sq = (scale == 0.0) ? 0.0 : (scale * scale * ssq);
354 | double out = use_sqrt ? sqrt(sum_sq) : sum_sq;
355 | return (float)out;
356 | }
357 |
358 | float float16_distance_l2_cpu (const void *v1, const void *v2, int n) {
359 | return float16_distance_l2_impl_cpu(v1, v2, n, true);
360 | }
361 |
362 | float float16_distance_l2_squared_cpu (const void *v1, const void *v2, int n) {
363 | return float16_distance_l2_impl_cpu(v1, v2, n, false);
364 | }
365 |
366 | float float16_distance_l1_cpu (const void *v1, const void *v2, int n) {
367 | const uint16_t *a = (const uint16_t *)v1;
368 | const uint16_t *b = (const uint16_t *)v2;
369 |
370 | double sum = 0.0;
371 | int i = 0;
372 |
373 | for (; i <= n - 4; i += 4) {
374 | uint16_t a0=a[i], a1=a[i+1], a2=a[i+2], a3=a[i+3];
375 | uint16_t b0=b[i], b1=b[i+1], b2=b[i+2], b3=b[i+3];
376 |
377 | /* Inf differences yield +Inf */
378 | if ((f16_is_inf(a0)||f16_is_inf(b0)) && !(f16_is_inf(a0)&&f16_is_inf(b0)&&f16_sign(a0)==f16_sign(b0))) return INFINITY;
379 | if ((f16_is_inf(a1)||f16_is_inf(b1)) && !(f16_is_inf(a1)&&f16_is_inf(b1)&&f16_sign(a1)==f16_sign(b1))) return INFINITY;
380 | if ((f16_is_inf(a2)||f16_is_inf(b2)) && !(f16_is_inf(a2)&&f16_is_inf(b2)&&f16_sign(a2)==f16_sign(b2))) return INFINITY;
381 | if ((f16_is_inf(a3)||f16_is_inf(b3)) && !(f16_is_inf(a3)&&f16_is_inf(b3)&&f16_sign(a3)==f16_sign(b3))) return INFINITY;
382 |
383 | if (!f16_is_nan(a0) && !f16_is_nan(b0)) sum += fabs((double)float16_to_float32(a0) - (double)float16_to_float32(b0));
384 | if (!f16_is_nan(a1) && !f16_is_nan(b1)) sum += fabs((double)float16_to_float32(a1) - (double)float16_to_float32(b1));
385 | if (!f16_is_nan(a2) && !f16_is_nan(b2)) sum += fabs((double)float16_to_float32(a2) - (double)float16_to_float32(b2));
386 | if (!f16_is_nan(a3) && !f16_is_nan(b3)) sum += fabs((double)float16_to_float32(a3) - (double)float16_to_float32(b3));
387 | }
388 |
389 | for (; i < n; ++i) {
390 | uint16_t ai=a[i], bi=b[i];
391 | if ((f16_is_inf(ai)||f16_is_inf(bi)) && !(f16_is_inf(ai)&&f16_is_inf(bi)&&f16_sign(ai)==f16_sign(bi))) return INFINITY;
392 | if (f16_is_nan(ai) || f16_is_nan(bi)) continue;
393 | sum += fabs((double)float16_to_float32(ai) - (double)float16_to_float32(bi));
394 | }
395 |
396 | return (float)sum;
397 | }
398 |
399 | float float16_distance_dot_cpu (const void *v1, const void *v2, int n) {
400 | const uint16_t *a = (const uint16_t *)v1;
401 | const uint16_t *b = (const uint16_t *)v2;
402 |
403 | double dot = 0.0;
404 | int i = 0;
405 |
406 | for (; i <= n - 4; i += 4) {
407 | float x0 = float16_to_float32(a[i ]), y0 = float16_to_float32(b[i ]);
408 | float x1 = float16_to_float32(a[i + 1]), y1 = float16_to_float32(b[i + 1]);
409 | float x2 = float16_to_float32(a[i + 2]), y2 = float16_to_float32(b[i + 2]);
410 | float x3 = float16_to_float32(a[i + 3]), y3 = float16_to_float32(b[i + 3]);
411 |
412 | /* Skip NaN lanes */
413 | if (!isnan(x0) && !isnan(y0)) { double p = (double)x0 * (double)y0; if (isinf(p)) return (p>0)? -INFINITY : INFINITY; dot += p; }
414 | if (!isnan(x1) && !isnan(y1)) { double p = (double)x1 * (double)y1; if (isinf(p)) return (p>0)? -INFINITY : INFINITY; dot += p; }
415 | if (!isnan(x2) && !isnan(y2)) { double p = (double)x2 * (double)y2; if (isinf(p)) return (p>0)? -INFINITY : INFINITY; dot += p; }
416 | if (!isnan(x3) && !isnan(y3)) { double p = (double)x3 * (double)y3; if (isinf(p)) return (p>0)? -INFINITY : INFINITY; dot += p; }
417 | }
418 |
419 | for (; i < n; ++i) {
420 | float x = float16_to_float32(a[i]);
421 | float y = float16_to_float32(b[i]);
422 | if (isnan(x) || isnan(y)) continue;
423 | double p = (double)x * (double)y;
424 | if (isinf(p)) return (p>0)? -INFINITY : INFINITY;
425 | dot += p;
426 | }
427 |
428 | return (float)(-dot);
429 | }
430 |
431 | float float16_distance_cosine_cpu (const void *v1, const void *v2, int n) {
432 | const uint16_t *a = (const uint16_t *)v1;
433 | const uint16_t *b = (const uint16_t *)v2;
434 |
435 | double dot = 0.0, nx = 0.0, ny = 0.0;
436 | int i = 0;
437 |
438 | for (; i <= n - 4; i += 4) {
439 | float x0 = float16_to_float32(a[i ]), y0 = float16_to_float32(b[i ]);
440 | float x1 = float16_to_float32(a[i + 1]), y1 = float16_to_float32(b[i + 1]);
441 | float x2 = float16_to_float32(a[i + 2]), y2 = float16_to_float32(b[i + 2]);
442 | float x3 = float16_to_float32(a[i + 3]), y3 = float16_to_float32(b[i + 3]);
443 |
444 | if (!isnan(x0) && !isnan(y0)) { double xd=x0, yd=y0; if (isinf(xd)||isinf(yd)) return 1.0f; dot += xd*yd; nx += xd*xd; ny += yd*yd; }
445 | if (!isnan(x1) && !isnan(y1)) { double xd=x1, yd=y1; if (isinf(xd)||isinf(yd)) return 1.0f; dot += xd*yd; nx += xd*xd; ny += yd*yd; }
446 | if (!isnan(x2) && !isnan(y2)) { double xd=x2, yd=y2; if (isinf(xd)||isinf(yd)) return 1.0f; dot += xd*yd; nx += xd*xd; ny += yd*yd; }
447 | if (!isnan(x3) && !isnan(y3)) { double xd=x3, yd=y3; if (isinf(xd)||isinf(yd)) return 1.0f; dot += xd*yd; nx += xd*xd; ny += yd*yd; }
448 | }
449 |
450 | for (; i < n; ++i) {
451 | float x = float16_to_float32(a[i]);
452 | float y = float16_to_float32(b[i]);
453 | if (isnan(x) || isnan(y)) continue;
454 | if (isinf((double)x) || isinf((double)y)) return 1.0f;
455 | double xd=x, yd=y;
456 | dot += xd*yd; nx += xd*xd; ny += yd*yd;
457 | }
458 |
459 | double denom = sqrt(nx) * sqrt(ny);
460 | if (!(denom > 0.0) || !isfinite(denom) || !isfinite(dot)) return 1.0f;
461 |
462 | double cosv = dot / denom;
463 | if (cosv > 1.0) cosv = 1.0;
464 | if (cosv < -1.0) cosv = -1.0;
465 | return (float)(1.0 - cosv);
466 | }
467 |
468 | // MARK: - UINT8 -
469 |
470 | static inline float uint8_distance_l2_impl_cpu (const void *v1, const void *v2, int n, bool use_sqrt) {
471 | const uint8_t *a = (const uint8_t *)v1;
472 | const uint8_t *b = (const uint8_t *)v2;
473 |
474 | float sum = 0.0f;
475 | int i = 0;
476 |
477 | // unrolled loop
478 | for (; i <= n - 4; i += 4) {
479 | int d0 = (int)a[i + 0] - (int)b[i + 0];
480 | int d1 = (int)a[i + 1] - (int)b[i + 1];
481 | int d2 = (int)a[i + 2] - (int)b[i + 2];
482 | int d3 = (int)a[i + 3] - (int)b[i + 3];
483 |
484 | sum += (float)(d0 * d0 + d1 * d1 + d2 * d2 + d3 * d3);
485 | }
486 |
487 | // tail loop
488 | for (; i < n; ++i) {
489 | int d = (int)a[i] - (int)b[i];
490 | sum += (float)(d * d);
491 | }
492 |
493 | return use_sqrt ? sqrtf(sum) : sum;
494 | }
495 |
496 | float uint8_distance_l2_cpu (const void *v1, const void *v2, int n) {
497 | return uint8_distance_l2_impl_cpu(v1, v2, n, true);
498 | }
499 |
500 | float uint8_distance_l2_squared_cpu (const void *v1, const void *v2, int n) {
501 | return uint8_distance_l2_impl_cpu(v1, v2, n, false);
502 | }
503 |
504 | float uint8_distance_cosine_cpu (const void *v1, const void *v2, int n) {
505 | const uint8_t *a = (const uint8_t *)v1;
506 | const uint8_t *b = (const uint8_t *)v2;
507 |
508 | uint32_t dot = 0;
509 | uint32_t norm_a2 = 0;
510 | uint32_t norm_b2 = 0;
511 |
512 | int i = 0;
513 | for (; i <= n - 4; i += 4) {
514 | uint32_t a0 = a[i + 0], b0 = b[i + 0];
515 | uint32_t a1 = a[i + 1], b1 = b[i + 1];
516 | uint32_t a2 = a[i + 2], b2 = b[i + 2];
517 | uint32_t a3 = a[i + 3], b3 = b[i + 3];
518 |
519 | dot += a0 * b0 + a1 * b1 + a2 * b2 + a3 * b3;
520 | norm_a2 += a0 * a0 + a1 * a1 + a2 * a2 + a3 * a3;
521 | norm_b2 += b0 * b0 + b1 * b1 + b2 * b2 + b3 * b3;
522 | }
523 |
524 | // tail loop
525 | for (; i < n; ++i) {
526 | uint32_t ai = a[i];
527 | uint32_t bi = b[i];
528 | dot += ai * bi;
529 | norm_a2 += ai * ai;
530 | norm_b2 += bi * bi;
531 | }
532 |
533 | if (norm_a2 == 0 || norm_b2 == 0) {
534 | return 1.0f;
535 | }
536 |
537 | float cosine_similarity = dot / (sqrtf((float)norm_a2) * sqrtf((float)norm_b2));
538 | return 1.0f - cosine_similarity;
539 | }
540 |
541 | float uint8_distance_dot_cpu (const void *v1, const void *v2, int n) {
542 | const uint8_t *a = (const uint8_t *)v1;
543 | const uint8_t *b = (const uint8_t *)v2;
544 | float dot = 0.0f;
545 |
546 | int i = 0;
547 | for (; i <= n - 4; i += 4) {
548 | dot += (float)(a[i + 0]) * b[i + 0];
549 | dot += (float)(a[i + 1]) * b[i + 1];
550 | dot += (float)(a[i + 2]) * b[i + 2];
551 | dot += (float)(a[i + 3]) * b[i + 3];
552 | }
553 | for (; i < n; ++i) {
554 | dot += (float)(a[i]) * b[i];
555 | }
556 |
557 | return -dot; // dot distance = negative dot product
558 | }
559 |
560 | float uint8_distance_l1_cpu (const void *v1, const void *v2, int n) {
561 | const uint8_t *a = (const uint8_t *)v1;
562 | const uint8_t *b = (const uint8_t *)v2;
563 | float sum = 0.0f;
564 |
565 | int i = 0;
566 | for (; i <= n - 4; i += 4) {
567 | sum += fabsf((float)a[i + 0] - (float)b[i + 0]);
568 | sum += fabsf((float)a[i + 1] - (float)b[i + 1]);
569 | sum += fabsf((float)a[i + 2] - (float)b[i + 2]);
570 | sum += fabsf((float)a[i + 3] - (float)b[i + 3]);
571 | }
572 |
573 | for (; i < n; ++i) {
574 | sum += fabsf((float)a[i] - (float)b[i]);
575 | }
576 |
577 | return sum;
578 | }
579 |
580 | // MARK: - INT8 -
581 |
582 | float int8_distance_l2_impl_cpu (const void *v1, const void *v2, int n, bool use_sqrt) {
583 | const int8_t *a = (const int8_t *)v1;
584 | const int8_t *b = (const int8_t *)v2;
585 |
586 | float sum = 0.0f;
587 | int i = 0;
588 |
589 | // unrolled loop
590 | for (; i <= n - 4; i += 4) {
591 | int d0 = (int)a[i + 0] - (int)b[i + 0];
592 | int d1 = (int)a[i + 1] - (int)b[i + 1];
593 | int d2 = (int)a[i + 2] - (int)b[i + 2];
594 | int d3 = (int)a[i + 3] - (int)b[i + 3];
595 |
596 | sum += (float)(d0 * d0 + d1 * d1 + d2 * d2 + d3 * d3);
597 | }
598 |
599 | // tail loop
600 | for (; i < n; ++i) {
601 | int d = (int)a[i] - (int)b[i];
602 | sum += (float)(d * d);
603 | }
604 |
605 | return use_sqrt ? sqrtf(sum) : sum;
606 | }
607 |
608 | float int8_distance_l2_cpu (const void *v1, const void *v2, int n) {
609 | return int8_distance_l2_impl_cpu(v1, v2, n, true);
610 | }
611 |
612 | float int8_distance_l2_squared_cpu (const void *v1, const void *v2, int n) {
613 | return int8_distance_l2_impl_cpu(v1, v2, n, false);
614 | }
615 |
616 | float int8_distance_cosine_cpu (const void *v1, const void *v2, int n) {
617 | const int8_t *a = (const int8_t *)v1;
618 | const int8_t *b = (const int8_t *)v2;
619 |
620 | int32_t dot = 0;
621 | int32_t norm_a2 = 0;
622 | int32_t norm_b2 = 0;
623 |
624 | int i = 0;
625 | for (; i <= n - 4; i += 4) {
626 | int32_t a0 = a[i + 0], b0 = b[i + 0];
627 | int32_t a1 = a[i + 1], b1 = b[i + 1];
628 | int32_t a2 = a[i + 2], b2 = b[i + 2];
629 | int32_t a3 = a[i + 3], b3 = b[i + 3];
630 |
631 | dot += a0 * b0 + a1 * b1 + a2 * b2 + a3 * b3;
632 | norm_a2 += a0 * a0 + a1 * a1 + a2 * a2 + a3 * a3;
633 | norm_b2 += b0 * b0 + b1 * b1 + b2 * b2 + b3 * b3;
634 | }
635 |
636 | // tail loop
637 | for (; i < n; ++i) {
638 | int32_t ai = a[i];
639 | int32_t bi = b[i];
640 | dot += ai * bi;
641 | norm_a2 += ai * ai;
642 | norm_b2 += bi * bi;
643 | }
644 |
645 | if (norm_a2 == 0 || norm_b2 == 0) {
646 | return 1.0f;
647 | }
648 |
649 | float cosine_similarity = dot / (sqrtf((float)norm_a2) * sqrtf((float)norm_b2));
650 | return 1.0f - cosine_similarity;
651 | }
652 |
653 | float int8_distance_dot_cpu (const void *v1, const void *v2, int n) {
654 | const int8_t *a = (const int8_t *)v1;
655 | const int8_t *b = (const int8_t *)v2;
656 |
657 | float dot = 0.0f;
658 | int i = 0;
659 |
660 | for (; i <= n - 4; i += 4) {
661 | dot += (float)a[i + 0] * b[i + 0];
662 | dot += (float)a[i + 1] * b[i + 1];
663 | dot += (float)a[i + 2] * b[i + 2];
664 | dot += (float)a[i + 3] * b[i + 3];
665 | }
666 |
667 | for (; i < n; ++i) {
668 | dot += (float)a[i] * b[i];
669 | }
670 |
671 | return -dot;
672 | }
673 |
674 | float int8_distance_l1_cpu (const void *v1, const void *v2, int n) {
675 | const int8_t *a = (const int8_t *)v1;
676 | const int8_t *b = (const int8_t *)v2;
677 |
678 | float sum = 0.0f;
679 | int i = 0;
680 |
681 | for (; i <= n - 4; i += 4) {
682 | sum += fabsf((float)a[i + 0] - (float)b[i + 0]);
683 | sum += fabsf((float)a[i + 1] - (float)b[i + 1]);
684 | sum += fabsf((float)a[i + 2] - (float)b[i + 2]);
685 | sum += fabsf((float)a[i + 3] - (float)b[i + 3]);
686 | }
687 |
688 | for (; i < n; ++i) {
689 | sum += fabsf((float)a[i] - (float)b[i]);
690 | }
691 |
692 | return sum;
693 | }
694 |
695 | // MARK: - ENTRYPOINT -
696 |
697 | #if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)
698 | #include
699 |
700 | static void x86_cpuid(int leaf, int subleaf, int *eax, int *ebx, int *ecx, int *edx) {
701 | #if defined(_MSC_VER)
702 | int regs[4];
703 | __cpuidex(regs, leaf, subleaf);
704 | *eax = regs[0]; *ebx = regs[1]; *ecx = regs[2]; *edx = regs[3];
705 | #else
706 | __cpuid_count(leaf, subleaf, *eax, *ebx, *ecx, *edx);
707 | #endif
708 | }
709 |
710 | bool cpu_supports_avx2 (void) {
711 | #if FORCE_AVX2
712 | return true;
713 | #else
714 | int eax, ebx, ecx, edx;
715 | x86_cpuid(0, 0, &eax, &ebx, &ecx, &edx);
716 | if (eax < 7) return false;
717 | x86_cpuid(7, 0, &eax, &ebx, &ecx, &edx);
718 | return (ebx & (1 << 5)) != 0; // AVX2
719 | #endif
720 | }
721 |
722 | bool cpu_supports_sse2 (void) {
723 | int eax, ebx, ecx, edx;
724 | x86_cpuid(1, 0, &eax, &ebx, &ecx, &edx);
725 | return (edx & (1 << 26)) != 0; // SSE2
726 | }
727 |
728 | #else
729 | // For ARM (NEON is always present on aarch64, runtime detection rarely needed)
730 | #if defined(__aarch64__) || defined(__ARM_NEON) || defined(__ARM_NEON__)
731 | bool cpu_supports_neon (void) {
732 | return true;
733 | }
734 | #else
735 | #ifdef SQLITE_WASM_EXTRA_INIT
736 | bool cpu_supports_neon (void) {
737 | return false;
738 | }
739 | #else
740 | #include
741 | #include
742 | bool cpu_supports_neon (void) {
743 | #ifdef AT_HWCAP
744 | return (getauxval(AT_HWCAP) & HWCAP_NEON) != 0;
745 | #else
746 | return false;
747 | #endif
748 | }
749 | #endif
750 | #endif
751 | #endif
752 |
753 | // MARK: -
754 |
755 | void init_cpu_functions (void) {
756 | distance_function_t cpu_table[VECTOR_DISTANCE_MAX][VECTOR_TYPE_MAX] = {
757 | [VECTOR_DISTANCE_L2] = {
758 | [VECTOR_TYPE_F32] = float32_distance_l2_cpu,
759 | [VECTOR_TYPE_F16] = float16_distance_l2_cpu,
760 | [VECTOR_TYPE_BF16] = bfloat16_distance_l2_cpu,
761 | [VECTOR_TYPE_U8] = uint8_distance_l2_cpu,
762 | [VECTOR_TYPE_I8] = int8_distance_l2_cpu,
763 | },
764 | [VECTOR_DISTANCE_SQUARED_L2] = {
765 | [VECTOR_TYPE_F32] = float32_distance_l2_squared_cpu,
766 | [VECTOR_TYPE_F16] = float16_distance_l2_squared_cpu,
767 | [VECTOR_TYPE_BF16] = bfloat16_distance_l2_squared_cpu,
768 | [VECTOR_TYPE_U8] = uint8_distance_l2_squared_cpu,
769 | [VECTOR_TYPE_I8] = int8_distance_l2_squared_cpu,
770 | },
771 | [VECTOR_DISTANCE_COSINE] = {
772 | [VECTOR_TYPE_F32] = float32_distance_cosine_cpu,
773 | [VECTOR_TYPE_F16] = float16_distance_cosine_cpu,
774 | [VECTOR_TYPE_BF16] = bfloat16_distance_cosine_cpu,
775 | [VECTOR_TYPE_U8] = uint8_distance_cosine_cpu,
776 | [VECTOR_TYPE_I8] = int8_distance_cosine_cpu,
777 | },
778 | [VECTOR_DISTANCE_DOT] = {
779 | [VECTOR_TYPE_F32] = float32_distance_dot_cpu,
780 | [VECTOR_TYPE_F16] = float16_distance_dot_cpu,
781 | [VECTOR_TYPE_BF16] = bfloat16_distance_dot_cpu,
782 | [VECTOR_TYPE_U8] = uint8_distance_dot_cpu,
783 | [VECTOR_TYPE_I8] = int8_distance_dot_cpu,
784 | },
785 | [VECTOR_DISTANCE_L1] = {
786 | [VECTOR_TYPE_F32] = float32_distance_l1_cpu,
787 | [VECTOR_TYPE_F16] = float16_distance_l1_cpu,
788 | [VECTOR_TYPE_BF16] = bfloat16_distance_l1_cpu,
789 | [VECTOR_TYPE_U8] = uint8_distance_l1_cpu,
790 | [VECTOR_TYPE_I8] = int8_distance_l1_cpu,
791 | }
792 | };
793 |
794 | memcpy(dispatch_distance_table, cpu_table, sizeof(cpu_table));
795 | }
796 |
797 | void init_distance_functions (bool force_cpu) {
798 | init_cpu_functions();
799 | if (force_cpu) return;
800 |
801 | #if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)
802 | if (cpu_supports_avx2()) {
803 | init_distance_functions_avx2();
804 | } else if (cpu_supports_sse2()) {
805 | init_distance_functions_sse2();
806 | }
807 | #elif defined(__ARM_NEON) || defined(__aarch64__)
808 | if (cpu_supports_neon()) {
809 | init_distance_functions_neon();
810 | }
811 | #endif
812 | }
813 |
814 |
--------------------------------------------------------------------------------