├── packages ├── node │ ├── LICENSE.md │ ├── .npmignore │ ├── tsup.config.ts │ ├── tsconfig.json │ ├── package.json │ ├── src │ │ ├── index.test.ts │ │ ├── platform.ts │ │ └── index.ts │ ├── README.md │ └── generate-platform-packages.js ├── python │ ├── LICENSE.md │ ├── src │ │ └── sqlite_vector │ │ │ ├── __init__.py │ │ │ └── _version.py │ ├── requirements-dev.txt │ ├── MANIFEST.in │ ├── pyproject.toml │ ├── setup.py │ ├── README.md │ └── download_artifacts.py ├── android │ ├── gradle.properties │ ├── gradle │ │ └── wrapper │ │ │ ├── gradle-wrapper.jar │ │ │ └── gradle-wrapper.properties │ ├── src │ │ └── main │ │ │ └── AndroidManifest.xml │ ├── gradlew.bat │ ├── build.gradle │ └── gradlew └── swift │ ├── extension │ └── vector.swift │ └── plugin │ └── vector.swift ├── examples └── semantic_search │ ├── requirements.txt │ ├── samples │ ├── sample-13.md │ ├── sample-10.md │ ├── sample-2.md │ ├── sample-20.md │ ├── sample-17.md │ ├── sample-3.md │ ├── sample-5.md │ ├── sample-7.md │ ├── sample-8.md │ ├── sample-12.md │ ├── sample-19.md │ ├── sample-4.md │ ├── sample-9.md │ ├── sample-16.md │ ├── sample-18.md │ ├── sample-6.md │ ├── sample-11.md │ ├── sample-14.md │ ├── sample-15.md │ └── sample-1.md │ ├── README.md │ ├── semsearch.py │ └── semantic_search.py ├── jitpack.yml ├── src ├── distance-avx2.h ├── distance-neon.h ├── distance-sse2.h ├── sqlite-vector.h ├── distance-cpu.h └── distance-cpu.c ├── .gitignore ├── Package.swift ├── libs └── fp16 │ ├── macros.h │ ├── bitcasts.h │ └── fp16.h ├── LICENSE.md ├── QUANTIZATION.md ├── .github └── workflows │ ├── python-package.yml │ └── main.yml ├── Makefile ├── README.md └── API.md /packages/node/LICENSE.md: -------------------------------------------------------------------------------- 1 | ../../LICENSE.md -------------------------------------------------------------------------------- /packages/python/LICENSE.md: -------------------------------------------------------------------------------- 1 | ../../LICENSE.md -------------------------------------------------------------------------------- /packages/python/src/sqlite_vector/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/semantic_search/requirements.txt: -------------------------------------------------------------------------------- 1 | sentence-transformers 2 | -------------------------------------------------------------------------------- /packages/python/requirements-dev.txt: -------------------------------------------------------------------------------- 1 | requests 2 | wheel 3 | build 4 | -------------------------------------------------------------------------------- /packages/android/gradle.properties: -------------------------------------------------------------------------------- 1 | org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8 2 | android.useAndroidX=true -------------------------------------------------------------------------------- /packages/python/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE.md 3 | recursive-include src/sqlite_vector/binaries * 4 | -------------------------------------------------------------------------------- /packages/python/src/sqlite_vector/_version.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | __version__ = os.environ.get("PACKAGE_VERSION", "0.0.0") -------------------------------------------------------------------------------- /packages/android/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sqliteai/sqlite-vector/HEAD/packages/android/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /packages/android/src/main/AndroidManifest.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /jitpack.yml: -------------------------------------------------------------------------------- 1 | jdk: 2 | - openjdk17 3 | install: 4 | - make aar ANDROID_NDK=$ANDROID_HOME/ndk-bundle 5 | - export VERSION=$(make version 2>/dev/null | tail -1) 6 | - cd packages/android && ./gradlew publishToMavenLocal -PVERSION="$VERSION" -------------------------------------------------------------------------------- /src/distance-avx2.h: -------------------------------------------------------------------------------- 1 | // 2 | // distance-avx2.h 3 | // sqlitevector 4 | // 5 | // Created by Marco Bambini on 20/06/25. 6 | // 7 | 8 | #ifndef __VECTOR_DISTANCE_AVX2__ 9 | #define __VECTOR_DISTANCE_AVX2__ 10 | 11 | #include 12 | 13 | void init_distance_functions_avx2 (void); 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /src/distance-neon.h: -------------------------------------------------------------------------------- 1 | // 2 | // distance-neon.h 3 | // sqlitevector 4 | // 5 | // Created by Marco Bambini on 20/06/25. 6 | // 7 | 8 | #ifndef __VECTOR_DISTANCE_NEON__ 9 | #define __VECTOR_DISTANCE_NEON__ 10 | 11 | #include 12 | 13 | void init_distance_functions_neon (void); 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /src/distance-sse2.h: -------------------------------------------------------------------------------- 1 | // 2 | // distance-sse2.h 3 | // sqlitevector 4 | // 5 | // Created by Marco Bambini on 20/06/25. 6 | // 7 | 8 | #ifndef __VECTOR_DISTANCE_SSE2__ 9 | #define __VECTOR_DISTANCE_SSE2__ 10 | 11 | #include 12 | 13 | void init_distance_functions_sse2 (void); 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /packages/android/gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.2-bin.zip 4 | networkTimeout=10000 5 | validateDistributionUrl=true 6 | zipStoreBase=GRADLE_USER_HOME 7 | zipStorePath=wrapper/dists 8 | -------------------------------------------------------------------------------- /packages/node/.npmignore: -------------------------------------------------------------------------------- 1 | # Development and build files 2 | src/ 3 | *.test.ts 4 | *.test.js 5 | tsconfig.json 6 | tsup.config.ts 7 | 8 | # Scripts (only for repo/CI) 9 | generate-platform-packages.js 10 | 11 | # Development files 12 | node_modules/ 13 | package-lock.json 14 | coverage/ 15 | *.log 16 | 17 | # Git 18 | .git/ 19 | .gitignore 20 | -------------------------------------------------------------------------------- /packages/node/tsup.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'tsup'; 2 | 3 | export default defineConfig({ 4 | entry: ['src/index.ts'], 5 | format: ['cjs', 'esm'], 6 | dts: true, 7 | splitting: false, 8 | sourcemap: true, 9 | clean: true, 10 | treeshake: true, 11 | minify: false, 12 | target: 'node16', 13 | outDir: 'dist', 14 | }); 15 | -------------------------------------------------------------------------------- /examples/semantic_search/samples/sample-13.md: -------------------------------------------------------------------------------- 1 | # Article 13: Behavioral Analytics for Anomaly Detection 2 | 3 | Behavioral analytics leverages machine learning to establish baseline patterns of normal user and system behavior, flagging deviations that may indicate security threats. User and entity behavior analytics (UEBA) systems monitor login patterns, data access, and application usage to detect insider threats and compromised accounts. Machine learning models adapt to changing behavior patterns while maintaining sensitivity to subtle anomalies that human analysts might overlook. 4 | -------------------------------------------------------------------------------- /examples/semantic_search/samples/sample-10.md: -------------------------------------------------------------------------------- 1 | # Article 10: Zero Trust Security Architecture 2 | 3 | Zero trust security operates on the principle of "never trust, always verify," requiring authentication and authorization for every access request regardless of location. This approach assumes breach scenarios and implements continuous verification throughout the network. Key components include identity verification, device compliance checking, least privilege access, and micro-segmentation. Zero trust frameworks help organizations protect against insider threats and advanced persistent attacks. 4 | -------------------------------------------------------------------------------- /examples/semantic_search/samples/sample-2.md: -------------------------------------------------------------------------------- 1 | # Article 2: Natural Language Processing Fundamentals 2 | 3 | Natural language processing enables computers to understand, interpret, and generate human language. Key techniques include tokenization, part-of-speech tagging, named entity recognition, and sentiment analysis. Modern NLP leverages transformer architectures like BERT and GPT models for tasks such as language translation, text summarization, and question answering. Applications span chatbots, voice assistants, content moderation, and automated document analysis across various industries. 4 | -------------------------------------------------------------------------------- /examples/semantic_search/samples/sample-20.md: -------------------------------------------------------------------------------- 1 | # Article 20: IoT Security Vulnerabilities 2 | 3 | Internet of Things devices often have weak security controls due to cost constraints and rapid deployment cycles. Common vulnerabilities include default passwords, unencrypted communications, lack of update mechanisms, and insufficient access controls. IoT botnets can launch massive distributed denial-of-service attacks. Security strategies include network segmentation, device lifecycle management, security-by-design principles, and regulatory compliance requirements for IoT manufacturers and deployments. 4 | -------------------------------------------------------------------------------- /examples/semantic_search/samples/sample-17.md: -------------------------------------------------------------------------------- 1 | # Article 17: Supply Chain Security Risks 2 | 3 | Supply chain attacks target third-party vendors and software dependencies to compromise multiple organizations simultaneously. Attackers may insert malicious code into legitimate software updates, compromise hardware during manufacturing, or exploit trusted vendor relationships. Notable incidents include SolarWinds and Kaseya attacks affecting thousands of organizations. Mitigation strategies include vendor risk assessment, software composition analysis, and zero-trust principles for third-party integrations. 4 | -------------------------------------------------------------------------------- /examples/semantic_search/samples/sample-3.md: -------------------------------------------------------------------------------- 1 | # Article 3: Computer Vision Applications 2 | 3 | Computer vision empowers machines to interpret and analyze visual information from images and videos. Core techniques include object detection, image classification, facial recognition, and motion tracking. Convolutional neural networks form the backbone of modern computer vision systems. Applications include autonomous vehicles, medical imaging diagnosis, quality control in manufacturing, augmented reality, and surveillance systems. Edge computing enables real-time computer vision processing on mobile devices. 4 | -------------------------------------------------------------------------------- /examples/semantic_search/samples/sample-5.md: -------------------------------------------------------------------------------- 1 | # Article 5: Supervised vs Unsupervised Learning 2 | 3 | Supervised learning uses labeled training data to predict outcomes for new inputs, including classification and regression tasks. Common algorithms include decision trees, support vector machines, and random forests. Unsupervised learning discovers hidden patterns in unlabeled data through clustering, dimensionality reduction, and association rules. Semi-supervised learning combines both approaches when labeled data is scarce. Each paradigm serves different problem types and data availability scenarios. 4 | -------------------------------------------------------------------------------- /examples/semantic_search/samples/sample-7.md: -------------------------------------------------------------------------------- 1 | # Article 7: Explainable AI and Interpretability 2 | 3 | Explainable AI focuses on making machine learning models more transparent and interpretable to human users. Black-box models like deep neural networks often lack interpretability, creating trust and accountability issues. Techniques include feature importance analysis, LIME (Local Interpretable Model-agnostic Explanations), and SHAP (SHapley Additive exPlanations). Interpretability is crucial for high-stakes applications like healthcare, finance, and criminal justice where decisions require justification. 4 | -------------------------------------------------------------------------------- /examples/semantic_search/samples/sample-8.md: -------------------------------------------------------------------------------- 1 | # Article 8: AI Regulation and Compliance 2 | 3 | Governments worldwide are developing regulatory frameworks for artificial intelligence deployment and development. The European Union's AI Act categorizes AI systems by risk levels, imposing strict requirements for high-risk applications. Compliance involves documentation, risk assessment, human oversight, and algorithmic auditing. Organizations must navigate evolving regulations while maintaining innovation capabilities. Privacy laws like GDPR also impact AI data processing and automated decision-making systems. 4 | -------------------------------------------------------------------------------- /examples/semantic_search/samples/sample-12.md: -------------------------------------------------------------------------------- 1 | # Article 12: Machine Learning for Malware Detection 2 | 3 | Machine learning enhances malware detection by analyzing file characteristics, behavioral patterns, and network communications to identify threats. Static analysis examines file properties without execution, while dynamic analysis observes runtime behavior in controlled environments. Ensemble methods combining multiple algorithms improve detection accuracy and reduce false positives. AI-powered systems can identify zero-day threats and polymorphic malware that traditional signature-based solutions miss. 4 | -------------------------------------------------------------------------------- /examples/semantic_search/samples/sample-19.md: -------------------------------------------------------------------------------- 1 | # Article 19: Edge Computing Security Challenges 2 | 3 | Edge computing brings data processing closer to end users and devices, improving performance but creating new security challenges. Distributed edge nodes have limited security controls compared to centralized data centers. Attack surfaces expand across numerous endpoints with varying security capabilities. Key concerns include device authentication, data encryption, secure updates, and centralized security management. Zero-trust architectures and hardware-based security become essential for edge deployments. 4 | -------------------------------------------------------------------------------- /examples/semantic_search/samples/sample-4.md: -------------------------------------------------------------------------------- 1 | # Article 4: Reinforcement Learning Algorithms 2 | 3 | Reinforcement learning trains agents to make optimal decisions through trial and error interactions with environments. Agents receive rewards or penalties based on their actions, gradually learning policies that maximize cumulative rewards. Q-learning and policy gradient methods are fundamental approaches. Applications include game playing (AlphaGo), robotics control, autonomous driving, recommendation systems, and financial trading algorithms. The exploration-exploitation trade-off remains a central challenge. 4 | -------------------------------------------------------------------------------- /examples/semantic_search/samples/sample-9.md: -------------------------------------------------------------------------------- 1 | # Article 9: Threat Detection and Prevention 2 | 3 | Cybersecurity threat detection employs various technologies to identify malicious activities before they cause damage. Intrusion detection systems monitor network traffic for suspicious patterns, while endpoint protection software guards individual devices. Behavioral analysis identifies anomalies in user activities that may indicate compromised accounts. Security information and event management (SIEM) platforms aggregate and analyze security logs from multiple sources to provide comprehensive threat visibility. 4 | -------------------------------------------------------------------------------- /examples/semantic_search/samples/sample-16.md: -------------------------------------------------------------------------------- 1 | # Article 16: Social Engineering Attack Vectors 2 | 3 | Social engineering exploits human psychology rather than technical vulnerabilities to gain unauthorized access to systems and information. Common techniques include phishing emails, pretexting phone calls, baiting with infected media, and physical tailgating. Attackers research targets through social media and public information to craft convincing scenarios. Defense requires security awareness training, verification procedures, and creating organizational cultures that encourage reporting suspicious communications. 4 | -------------------------------------------------------------------------------- /examples/semantic_search/samples/sample-18.md: -------------------------------------------------------------------------------- 1 | # Article 18: Quantum Computing and Cryptography 2 | 3 | Quantum computing poses both opportunities and threats for cybersecurity. Quantum computers could break current cryptographic algorithms like RSA and ECC that secure internet communications and data protection. Organizations must prepare for post-quantum cryptography by implementing quantum-resistant algorithms. However, quantum technologies also enable quantum key distribution for theoretically unbreakable communication channels. The transition period requires careful planning and gradual migration strategies. 4 | -------------------------------------------------------------------------------- /examples/semantic_search/samples/sample-6.md: -------------------------------------------------------------------------------- 1 | # Article 6: AI Ethics and Bias Mitigation 2 | 3 | Artificial intelligence systems can perpetuate or amplify human biases present in training data, leading to unfair outcomes across different demographic groups. Bias mitigation strategies include diverse dataset collection, algorithmic fairness constraints, and regular bias auditing. Ethical AI development requires transparency, accountability, and stakeholder involvement. Organizations must establish governance frameworks addressing privacy, consent, and algorithmic decision-making impacts on individuals and society. 4 | -------------------------------------------------------------------------------- /examples/semantic_search/samples/sample-11.md: -------------------------------------------------------------------------------- 1 | # Article 11: Incident Response and Recovery 2 | 3 | Effective incident response requires predefined procedures for detecting, containing, and recovering from security breaches. Response teams follow structured phases: preparation, identification, containment, eradication, recovery, and lessons learned. Critical activities include forensic analysis, stakeholder communication, system restoration, and process improvement. Regular tabletop exercises and response plan updates ensure organizations can quickly minimize damage and restore normal operations after security incidents. 4 | -------------------------------------------------------------------------------- /examples/semantic_search/samples/sample-14.md: -------------------------------------------------------------------------------- 1 | # Article 14: AI-Driven Security Orchestration 2 | 3 | Security orchestration platforms integrate multiple security tools and automate incident response workflows using artificial intelligence. These systems correlate alerts from various sources, prioritize threats based on risk assessment, and execute automated remediation actions. Natural language processing helps analyze threat intelligence reports, while machine learning improves decision-making accuracy over time. Orchestration reduces response times and analyst workload while maintaining consistent security procedures. 4 | -------------------------------------------------------------------------------- /examples/semantic_search/samples/sample-15.md: -------------------------------------------------------------------------------- 1 | # Article 15: Advanced Persistent Threats (APTs) 2 | 3 | Advanced persistent threats represent sophisticated, long-term cyberattacks typically conducted by nation-states or organized criminal groups. APTs use multiple attack vectors, maintain persistent access, and employ stealth techniques to avoid detection. Common tactics include spear-phishing, zero-day exploits, living-off-the-land techniques, and lateral movement within networks. Defense requires continuous monitoring, threat hunting, and intelligence-driven security strategies to detect and neutralize these patient adversaries. 4 | -------------------------------------------------------------------------------- /examples/semantic_search/samples/sample-1.md: -------------------------------------------------------------------------------- 1 | # Article 1: Deep Learning Neural Networks 2 | 3 | Deep learning utilizes artificial neural networks with multiple layers to process and learn from vast amounts of data. These networks automatically discover intricate patterns and representations without manual feature engineering. Convolutional neural networks excel at image recognition tasks, while recurrent neural networks handle sequential data like text and speech. Popular frameworks include TensorFlow, PyTorch, and Keras. Deep learning has revolutionized computer vision, natural language processing, and speech recognition applications. 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Build artifacts 2 | build/ 3 | dist/ 4 | .build 5 | *.a 6 | *.sqlite 7 | 8 | # iOS/macOS 9 | *.xcworkspacedata 10 | *.xcuserstate 11 | *.xcbkptlist 12 | *.plist 13 | 14 | # Android 15 | .gradle/ 16 | *.aar 17 | local.properties 18 | jniLibs/ 19 | *.apk 20 | *.ap_ 21 | *.dex 22 | 23 | # Node.js 24 | node_modules/ 25 | package-lock.json 26 | *.tsbuildinfo 27 | coverage/ 28 | *.log 29 | npm-debug.log* 30 | yarn-debug.log* 31 | yarn-error.log* 32 | packages/node/platform-packages/ 33 | packages/node/test-artifacts/ 34 | packages/node/test-output/ 35 | packages/node/test-platform-packages/ 36 | 37 | # IDE 38 | .vscode 39 | .idea/ 40 | *.iml 41 | *.swp 42 | *.swo 43 | 44 | # System 45 | .DS_Store 46 | Thumbs.db -------------------------------------------------------------------------------- /src/sqlite-vector.h: -------------------------------------------------------------------------------- 1 | // 2 | // sqlite-vector.h 3 | // sqlitevector 4 | // 5 | // Created by Marco Bambini on 06/05/25. 6 | // 7 | 8 | #ifndef __SQLITE_VECTOR__ 9 | #define __SQLITE_VECTOR__ 10 | 11 | #ifndef SQLITE_CORE 12 | #include "sqlite3ext.h" 13 | #else 14 | #include "sqlite3.h" 15 | #endif 16 | 17 | #ifdef _WIN32 18 | #define SQLITE_VECTOR_API __declspec(dllexport) 19 | #else 20 | #define SQLITE_VECTOR_API 21 | #endif 22 | 23 | #ifdef __cplusplus 24 | extern "C" { 25 | #endif 26 | 27 | #define SQLITE_VECTOR_VERSION "0.9.52" 28 | 29 | SQLITE_VECTOR_API int sqlite3_vector_init (sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi); 30 | 31 | #ifdef __cplusplus 32 | } 33 | #endif 34 | 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /packages/node/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "module": "ESNext", 5 | "lib": ["ES2020"], 6 | "moduleResolution": "bundler", 7 | "declaration": true, 8 | "declarationMap": true, 9 | "sourceMap": true, 10 | "outDir": "./dist", 11 | "rootDir": "./src", 12 | "strict": true, 13 | "esModuleInterop": true, 14 | "skipLibCheck": true, 15 | "forceConsistentCasingInFileNames": true, 16 | "resolveJsonModule": true, 17 | "isolatedModules": true, 18 | "noUnusedLocals": true, 19 | "noUnusedParameters": true, 20 | "noFallthroughCasesInSwitch": true, 21 | "allowSyntheticDefaultImports": true 22 | }, 23 | "include": ["src/**/*"], 24 | "exclude": ["node_modules", "dist", "**/*.test.ts"] 25 | } 26 | -------------------------------------------------------------------------------- /packages/swift/extension/vector.swift: -------------------------------------------------------------------------------- 1 | // vector.swift 2 | // This file serves as a placeholder for the vector target. 3 | // The actual SQLite extension is built using the Makefile through the build plugin. 4 | 5 | import Foundation 6 | 7 | /// Placeholder structure for vector 8 | public struct vector { 9 | /// Returns the path to the built vector dylib inside the XCFramework 10 | public static var path: String { 11 | #if os(macOS) 12 | return "vector.xcframework/macos-arm64_x86_64/vector.framework/vector" 13 | #elseif targetEnvironment(simulator) 14 | return "vector.xcframework/ios-arm64_x86_64-simulator/vector.framework/vector" 15 | #else 16 | return "vector.xcframework/ios-arm64/vector.framework/vector" 17 | #endif 18 | } 19 | } -------------------------------------------------------------------------------- /Package.swift: -------------------------------------------------------------------------------- 1 | // swift-tools-version: 6.1 2 | // The swift-tools-version declares the minimum version of Swift required to build this package. 3 | 4 | import PackageDescription 5 | 6 | let package = Package( 7 | name: "vector", 8 | platforms: [.macOS(.v11), .iOS(.v11)], 9 | products: [ 10 | // Products can be used to vend plugins, making them visible to other packages. 11 | .plugin( 12 | name: "vectorPlugin", 13 | targets: ["vectorPlugin"]), 14 | .library( 15 | name: "vector", 16 | targets: ["vector"]) 17 | ], 18 | targets: [ 19 | // Build tool plugin that invokes the Makefile 20 | .plugin( 21 | name: "vectorPlugin", 22 | capability: .buildTool(), 23 | path: "packages/swift/plugin" 24 | ), 25 | // vector library target 26 | .target( 27 | name: "vector", 28 | dependencies: [], 29 | path: "packages/swift/extension", 30 | plugins: ["vectorPlugin"] 31 | ), 32 | ] 33 | ) -------------------------------------------------------------------------------- /packages/python/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.0", "build", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "sqliteai-vector" 7 | dynamic = ["version", "classifiers"] 8 | description = "Python prebuilt binaries for SQLite Vector extension for all supported platforms and architectures." 9 | authors = [ 10 | { name = "SQLite AI Team" } 11 | ] 12 | readme = "README.md" 13 | license = "LicenseRef-Elastic-2.0-Modified-For-Open-Source-Use" 14 | license-files = ["LICENSE.md"] 15 | requires-python = ">=3" 16 | 17 | [project.urls] 18 | Homepage = "https://sqlite.ai" 19 | Documentation = "https://github.com/sqliteai/sqlite-vector/blob/main/API.md" 20 | Repository = "https://github.com/sqliteai/sqlite-vector" 21 | Issues = "https://github.com/sqliteai/sqlite-vector/issues" 22 | 23 | [tool.setuptools] 24 | packages = {find = {where = ["src"]}} 25 | include-package-data = true 26 | 27 | [tool.setuptools.dynamic] 28 | version = {attr = "sqlite_vector._version.__version__"} 29 | 30 | [tool.bdist_wheel] 31 | # Force platform-specific wheels 32 | universal = false 33 | -------------------------------------------------------------------------------- /libs/fp16/macros.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef FP16_MACROS_H 3 | #define FP16_MACROS_H 4 | 5 | #ifndef FP16_USE_NATIVE_CONVERSION 6 | #if (defined(__INTEL_COMPILER) || defined(__GNUC__)) && defined(__F16C__) 7 | #define FP16_USE_NATIVE_CONVERSION 1 8 | #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && defined(__AVX2__) 9 | #define FP16_USE_NATIVE_CONVERSION 1 10 | #elif defined(_MSC_VER) && defined(_M_ARM64) 11 | #define FP16_USE_NATIVE_CONVERSION 1 12 | #elif defined(__GNUC__) && defined(__aarch64__) 13 | #define FP16_USE_NATIVE_CONVERSION 1 14 | #endif 15 | #if !defined(FP16_USE_NATIVE_CONVERSION) 16 | #define FP16_USE_NATIVE_CONVERSION 0 17 | #endif // !defined(FP16_USE_NATIVE_CONVERSION) 18 | #endif // !define(FP16_USE_NATIVE_CONVERSION) 19 | 20 | #ifndef FP16_USE_FLOAT16_TYPE 21 | #if !defined(__clang__) && !defined(__INTEL_COMPILER) && defined(__GNUC__) && (__GNUC__ >= 12) 22 | #if defined(__F16C__) 23 | #define FP16_USE_FLOAT16_TYPE 1 24 | #endif 25 | #endif 26 | #if !defined(FP16_USE_FLOAT16_TYPE) 27 | #define FP16_USE_FLOAT16_TYPE 0 28 | #endif // !defined(FP16_USE_FLOAT16_TYPE) 29 | #endif // !defined(FP16_USE_FLOAT16_TYPE) 30 | 31 | #ifndef FP16_USE_FP16_TYPE 32 | #if defined(__clang__) 33 | #if defined(__F16C__) || defined(__aarch64__) 34 | #define FP16_USE_FP16_TYPE 1 35 | #endif 36 | #elif defined(__GNUC__) 37 | #if defined(__aarch64__) 38 | #define FP16_USE_FP16_TYPE 1 39 | #endif 40 | #endif 41 | #if !defined(FP16_USE_FP16_TYPE) 42 | #define FP16_USE_FP16_TYPE 0 43 | #endif // !defined(FP16_USE_FP16_TYPE) 44 | #endif // !defined(FP16_USE_FP16_TYPE) 45 | 46 | #endif /* FP16_MACROS_H */ 47 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Elastic License 2.0 (modified for open-source use) 2 | 3 | Copyright © 2025 SQLite Cloud, Inc. 4 | 5 | This software is licensed under the Elastic License 2.0, with the additional grant described below. 6 | 7 | You may not use this file except in compliance with the Elastic License 2.0 and the conditions outlined here. 8 | 9 | You may obtain a copy of the Elastic License 2.0 at: 10 | 11 | ``` 12 | https://www.elastic.co/licensing/elastic-license 13 | ``` 14 | 15 | Software distributed under the Elastic License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | 18 | See the Elastic License 2.0 for the specific language governing permissions and limitations under the license. 19 | 20 | --- 21 | 22 | ## Additional Grant for Open-Source Projects 23 | 24 | In addition to the permissions granted under the Elastic License 2.0: 25 | 26 | * **Free Use in Open-Source Projects**: 27 | You may use, copy, distribute, and prepare derivative works of the software — in source or object form, with or without modification — freely and without fee, provided the software is incorporated into or used by an **open-source project** licensed under an OSI-approved open-source license. 28 | 29 | --- 30 | 31 | ## Conditions 32 | 33 | 1. For **open-source projects**, the software may be used, copied, modified, and distributed without restriction or fee. 34 | 35 | 2. For **non–open-source or commercial production use**, you may use, copy, distribute, and prepare derivative works of the software only with a commercial license from SQLite Cloud, Inc. 36 | 37 | 3. You may not provide the software to third parties as a managed service, such as a hosted or cloud-based service, unless you have a license for that use. 38 | 39 | 4. The software may not be used to circumvent the license grant limitations. 40 | 41 | 5. Any permitted use is subject to compliance with the Elastic License 2.0, this additional grant, and applicable law. 42 | -------------------------------------------------------------------------------- /packages/python/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup 3 | from setuptools.command.bdist_wheel import bdist_wheel 4 | 5 | 6 | class PlatformSpecificWheel(bdist_wheel): 7 | """Custom bdist_wheel to force platform-specific wheel.""" 8 | 9 | def finalize_options(self): 10 | bdist_wheel.finalize_options(self) 11 | # Force platform-specific wheel 12 | self.root_is_pure = False 13 | 14 | # Set platform name from environment if provided 15 | plat_name = os.environ.get("PLAT_NAME") 16 | if plat_name: 17 | self.plat_name = plat_name 18 | 19 | def get_tag(self): 20 | # Force platform-specific tags with broader compatibility 21 | python_tag, abi_tag, platform_tag = bdist_wheel.get_tag(self) 22 | 23 | # Override platform tag if specified 24 | plat_name = os.environ.get("PLAT_NAME") 25 | if plat_name: 26 | platform_tag = plat_name 27 | 28 | # Use py3 for broader Python compatibility since we have pre-built binaries 29 | python_tag = "py3" 30 | abi_tag = "none" 31 | 32 | return python_tag, abi_tag, platform_tag 33 | 34 | 35 | def get_platform_classifiers(): 36 | """Get platform-specific classifiers based on PLAT_NAME environment variable.""" 37 | classifier_map = { 38 | "manylinux2014_x86_64": ["Operating System :: POSIX :: Linux"], 39 | "manylinux2014_aarch64": ["Operating System :: POSIX :: Linux"], 40 | "win_amd64": ["Operating System :: Microsoft :: Windows"], 41 | "macosx_10_9_x86_64": ["Operating System :: MacOS"], 42 | "macosx_11_0_arm64": ["Operating System :: MacOS"], 43 | } 44 | 45 | plat_name = os.environ.get("PLAT_NAME") 46 | if plat_name and plat_name in classifier_map: 47 | return ["Programming Language :: Python :: 3", classifier_map[plat_name][0]] 48 | 49 | raise ValueError(f"Unsupported or missing PLAT_NAME: {plat_name}") 50 | 51 | 52 | if __name__ == "__main__": 53 | setup( 54 | cmdclass={"bdist_wheel": PlatformSpecificWheel}, 55 | classifiers=get_platform_classifiers(), 56 | ) 57 | -------------------------------------------------------------------------------- /packages/node/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@sqliteai/sqlite-vector", 3 | "version": "0.9.45", 4 | "description": "SQLite vector search extension for Node.js - Cross-platform vector embeddings and similarity search", 5 | "main": "./dist/index.js", 6 | "module": "./dist/index.mjs", 7 | "types": "./dist/index.d.ts", 8 | "exports": { 9 | ".": { 10 | "import": { 11 | "types": "./dist/index.d.mts", 12 | "default": "./dist/index.mjs" 13 | }, 14 | "require": { 15 | "types": "./dist/index.d.ts", 16 | "default": "./dist/index.js" 17 | } 18 | } 19 | }, 20 | "files": [ 21 | "dist", 22 | "README.md", 23 | "LICENSE.md" 24 | ], 25 | "scripts": { 26 | "build": "tsup", 27 | "prepublishOnly": "npm run build", 28 | "test": "vitest", 29 | "typecheck": "tsc --noEmit", 30 | "generate-platforms": "node generate-platform-packages.js" 31 | }, 32 | "keywords": [ 33 | "sqlite", 34 | "vector", 35 | "embedding", 36 | "ai", 37 | "machine-learning", 38 | "similarity-search", 39 | "semantic-search", 40 | "vector-database", 41 | "sqlite-extension" 42 | ], 43 | "author": "Gioele Cantoni (gioele@sqlitecloud.io)", 44 | "license": "SEE LICENSE IN LICENSE.md", 45 | "repository": { 46 | "type": "git", 47 | "url": "https://github.com/sqliteai/sqlite-vector.git", 48 | "directory": "packages/node" 49 | }, 50 | "homepage": "https://github.com/sqliteai/sqlite-vector#readme", 51 | "bugs": { 52 | "url": "https://github.com/sqliteai/sqlite-vector/issues" 53 | }, 54 | "engines": { 55 | "node": ">=16.0.0" 56 | }, 57 | "optionalDependencies": { 58 | "@sqliteai/sqlite-vector-darwin-arm64": "0.9.45", 59 | "@sqliteai/sqlite-vector-darwin-x86_64": "0.9.45", 60 | "@sqliteai/sqlite-vector-linux-arm64": "0.9.45", 61 | "@sqliteai/sqlite-vector-linux-arm64-musl": "0.9.45", 62 | "@sqliteai/sqlite-vector-linux-x86_64": "0.9.45", 63 | "@sqliteai/sqlite-vector-linux-x86_64-musl": "0.9.45", 64 | "@sqliteai/sqlite-vector-win32-x86_64": "0.9.45" 65 | }, 66 | "devDependencies": { 67 | "@types/node": "^20.0.0", 68 | "tsup": "^8.0.0", 69 | "typescript": "^5.3.0", 70 | "vitest": "^3.2.4" 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /packages/python/README.md: -------------------------------------------------------------------------------- 1 | ## SQLite Vector Python package 2 | 3 | This package provides the sqlite-vector extension prebuilt binaries for multiple platforms and architectures. 4 | 5 | ### SQLite Vector 6 | 7 | SQLite Vector is a cross-platform, ultra-efficient SQLite extension that brings vector search capabilities to your embedded database. It works seamlessly on iOS, Android, Windows, Linux, and macOS, using just 30MB of memory by default. With support for Float32, Float16, BFloat16, Int8, and UInt8, and highly optimized distance functions, it's the ideal solution for Edge AI applications. 8 | 9 | More details on the official repository [sqliteai/sqlite-vector](https://github.com/sqliteai/sqlite-vector). 10 | 11 | ### Documentation 12 | 13 | For detailed information on all available functions, their parameters, and examples, refer to the [comprehensive API Reference](https://github.com/sqliteai/sqlite-vector/blob/main/API.md). 14 | 15 | ### Supported Platforms and Architectures 16 | 17 | | Platform | Arch | Subpackage name | Binary name | 18 | | ------------- | ------------ | ------------------------ | ------------ | 19 | | Linux (CPU) | x86_64/arm64 | sqlite_vector.binaries | vector.so | 20 | | Windows (CPU) | x86_64 | sqlite_vector.binaries | vector.dll | 21 | | macOS (CPU) | x86_64/arm64 | sqlite_vector.binaries | vector.dylib | 22 | 23 | ## Usage 24 | 25 | > **Note:** Some SQLite installations on certain operating systems may have extension loading disabled by default. 26 | If you encounter issues loading the extension, refer to the [sqlite-extensions-guide](https://github.com/sqliteai/sqlite-extensions-guide/) for platform-specific instructions on enabling and using SQLite extensions. 27 | 28 | ```python 29 | import importlib.resources 30 | import sqlite3 31 | 32 | # Connect to your SQLite database 33 | conn = sqlite3.connect("example.db") 34 | 35 | # Load the sqlite-vector extension 36 | # pip will install the correct binary package for your platform and architecture 37 | ext_path = importlib.resources.files("sqlite_vector.binaries") / "vector" 38 | 39 | conn.enable_load_extension(True) 40 | conn.load_extension(str(ext_path)) 41 | conn.enable_load_extension(False) 42 | 43 | 44 | # Now you can use sqlite-vector features in your SQL queries 45 | print(conn.execute("SELECT vector_version();").fetchone()) 46 | ``` -------------------------------------------------------------------------------- /examples/semantic_search/README.md: -------------------------------------------------------------------------------- 1 | # Semantic Search Example with sqlite-vector 2 | 3 | This example in Python demonstrates how to build a semantic search engine using the [sqlite-vector](https://github.com/sqliteai/sqlite-vector) extension and a Sentence Transformer model. It allows you to index and search documents using vector similarity, powered by a local LLM embedding model. 4 | 5 | ### How it works 6 | 7 | - **Embeddings**: Uses [sentence-transformers](https://huggingface.co/sentence-transformers) to generate dense vector representations (embeddings) for text. The default model is [`all-MiniLM-L6-v2`](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2), a fast, lightweight model (384 dimensions) suitable for semantic search and retrieval tasks. 8 | - **Vector Store and Search**: Embeddings are stored in SQLite using the [`sqlite-vector`](https://github.com/sqliteai/sqlite-vector) extension, enabling fast similarity search (cosine distance) directly in the database. 9 | - **Sample Data**: The `samples/` directory contains example documents you can index and search immediately. 10 | 11 | ### Installation 12 | 13 | 1. Download the `sqlite-vector` extension for your platform [here](https://github.com/sqliteai/sqlite-vector/releases). 14 | 15 | 2. Extract the `vector.so` file in the main directory of the project. 16 | 17 | 3. Install the dependencies: 18 | 19 | 20 | ```bash 21 | $ python -m venv venv 22 | 23 | $ source venv/bin/activate 24 | 25 | $ pip install -r requirements.txt 26 | ``` 27 | 28 | 4. On first use, the required model will be downloaded automatically. 29 | 30 | ### Usage 31 | 32 | Use the interactive mode to keep the model in memory and run multiple queries efficiently: 33 | 34 | ```bash 35 | python semsearch.py --repl 36 | 37 | # Index a directory of documents 38 | semsearch> index ./samples 39 | 40 | # Search for similar documents 41 | semsearch> search "neural network architectures for image recognition" 42 | ``` 43 | 44 | ### Example Queries 45 | 46 | Try these queries to test semantic similarity: 47 | 48 | - "neural network architectures for image recognition" 49 | - "reinforcement learning in autonomous systems" 50 | - "explainable artificial intelligence methods" 51 | - "AI governance and regulatory compliance" 52 | - "network intrusion detection systems" 53 | 54 | **Note:** 55 | - Supported extension are `.md`, `.txt`, `.py`, `.js`, `.html`, `.css`, `.sql`, `.json`, `.xml`. 56 | - For more details, see the code in `semsearch.py` and `semantic_search.py`. -------------------------------------------------------------------------------- /packages/swift/plugin/vector.swift: -------------------------------------------------------------------------------- 1 | import PackagePlugin 2 | import Foundation 3 | 4 | @main 5 | struct vector: BuildToolPlugin { 6 | /// Entry point for creating build commands for targets in Swift packages. 7 | func createBuildCommands(context: PluginContext, target: Target) async throws -> [Command] { 8 | let packageDirectory = context.package.directoryURL 9 | let outputDirectory = context.pluginWorkDirectoryURL 10 | return createvectorBuildCommands(packageDirectory: packageDirectory, outputDirectory: outputDirectory) 11 | } 12 | } 13 | 14 | #if canImport(XcodeProjectPlugin) 15 | import XcodeProjectPlugin 16 | 17 | extension vector: XcodeBuildToolPlugin { 18 | // Entry point for creating build commands for targets in Xcode projects. 19 | func createBuildCommands(context: XcodePluginContext, target: XcodeTarget) throws -> [Command] { 20 | let outputDirectory = context.pluginWorkDirectoryURL 21 | return createvectorBuildCommands(packageDirectory: nil, outputDirectory: outputDirectory) 22 | } 23 | } 24 | 25 | #endif 26 | 27 | /// Shared function to create vector build commands 28 | func createvectorBuildCommands(packageDirectory: URL?, outputDirectory: URL) -> [Command] { 29 | 30 | // For Xcode projects, use current directory; for Swift packages, use provided packageDirectory 31 | let workingDirectory = packageDirectory?.path ?? "$(pwd)" 32 | let packageDirInfo = packageDirectory != nil ? "Package directory: \(packageDirectory!.path)" : "Working directory: $(pwd)" 33 | 34 | return [ 35 | .prebuildCommand( 36 | displayName: "Building vector XCFramework", 37 | executable: URL(fileURLWithPath: "/bin/bash"), 38 | arguments: [ 39 | "-c", 40 | """ 41 | set -e 42 | echo "Starting vector XCFramework prebuild..." 43 | echo "\(packageDirInfo)" 44 | 45 | # Clean and create output directory 46 | rm -rf "\(outputDirectory.path)" 47 | mkdir -p "\(outputDirectory.path)" 48 | 49 | # Build directly from source directory with custom output paths 50 | cd "\(workingDirectory)" && \ 51 | echo "Building XCFramework..." && \ 52 | make xcframework DIST_DIR="\(outputDirectory.path)" BUILD_DIR="\(outputDirectory.path)/build" && \ 53 | rm -rf "\(outputDirectory.path)/build" && \ 54 | echo "XCFramework build completed successfully!" 55 | """ 56 | ], 57 | outputFilesDirectory: outputDirectory 58 | ) 59 | ] 60 | } -------------------------------------------------------------------------------- /packages/node/src/index.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from 'vitest'; 2 | import { 3 | getCurrentPlatform, 4 | getPlatformPackageName, 5 | getBinaryName, 6 | isMusl, 7 | getExtensionPath, 8 | getExtensionInfo, 9 | ExtensionNotFoundError 10 | } from './index'; 11 | 12 | describe('Platform Detection', () => { 13 | it('getCurrentPlatform() returns a valid platform', () => { 14 | const platform = getCurrentPlatform(); 15 | const validPlatforms = [ 16 | 'darwin-arm64', 17 | 'darwin-x86_64', 18 | 'linux-arm64', 19 | 'linux-arm64-musl', 20 | 'linux-x86_64', 21 | 'linux-x86_64-musl', 22 | 'win32-x86_64', 23 | ]; 24 | 25 | expect(validPlatforms).toContain(platform); 26 | }); 27 | 28 | it('getPlatformPackageName() returns correct package name format', () => { 29 | const packageName = getPlatformPackageName(); 30 | 31 | expect(packageName.startsWith('@sqliteai/sqlite-vector-')).toBe(true); 32 | 33 | expect(packageName).toMatch( 34 | /^@sqliteai\/sqlite-vector-(darwin|linux|win32)-(arm64|x86_64)(-musl)?$/ 35 | ); 36 | }); 37 | 38 | it('getBinaryName() returns correct extension', () => { 39 | const binaryName = getBinaryName(); 40 | 41 | expect(binaryName).toMatch( 42 | /^vector\.(dylib|so|dll)$/ 43 | ); 44 | }); 45 | 46 | it('isMusl() returns a boolean', () => { 47 | expect(typeof isMusl()).toBe('boolean'); 48 | }); 49 | }); 50 | 51 | describe('Extension Path Resolution', () => { 52 | it('getExtensionPath() returns a string or throws', () => { 53 | try { 54 | const path = getExtensionPath(); 55 | expect(typeof path).toBe('string'); 56 | expect(path.length).toBeGreaterThan(0); 57 | } catch (error) { 58 | expect(error instanceof ExtensionNotFoundError).toBe(true); 59 | } 60 | }); 61 | 62 | it('getExtensionInfo() returns complete info object', () => { 63 | try { 64 | const info = getExtensionInfo(); 65 | 66 | expect(info.platform).toBeTruthy(); 67 | expect(info.packageName).toBeTruthy(); 68 | expect(info.binaryName).toBeTruthy(); 69 | expect(info.path).toBeTruthy(); 70 | 71 | expect(typeof info.platform).toBe('string'); 72 | expect(typeof info.packageName).toBe('string'); 73 | expect(typeof info.binaryName).toBe('string'); 74 | expect(typeof info.path).toBe('string'); 75 | } catch (error) { 76 | expect(error instanceof ExtensionNotFoundError).toBe(true); 77 | } 78 | }); 79 | }); 80 | 81 | describe('Error Handling', () => { 82 | it('ExtensionNotFoundError has correct properties', () => { 83 | const error = new ExtensionNotFoundError('Test message'); 84 | 85 | expect(error instanceof Error).toBe(true); 86 | expect(error.name).toBe('ExtensionNotFoundError'); 87 | expect(error.message).toBe('Test message'); 88 | }); 89 | }); -------------------------------------------------------------------------------- /libs/fp16/bitcasts.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef FP16_BITCASTS_H 3 | #define FP16_BITCASTS_H 4 | 5 | #if defined(__cplusplus) && (__cplusplus >= 201103L) 6 | #include 7 | #elif !defined(__OPENCL_VERSION__) 8 | #include 9 | #endif 10 | 11 | #if defined(__INTEL_COMPILER) 12 | #include 13 | #endif 14 | 15 | #if defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64)) 16 | #include 17 | #endif 18 | 19 | 20 | static inline float fp32_from_bits(uint32_t w) { 21 | #if defined(__OPENCL_VERSION__) 22 | return as_float(w); 23 | #elif defined(__CUDA_ARCH__) 24 | return __uint_as_float((unsigned int) w); 25 | #elif defined(__INTEL_COMPILER) || defined(_MSC_VER) && (_MSC_VER >= 1932) && (defined(_M_IX86) || defined(_M_X64)) 26 | return _castu32_f32(w); 27 | #elif defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64)) 28 | return _CopyFloatFromInt32((__int32) w); 29 | #else 30 | union { 31 | uint32_t as_bits; 32 | float as_value; 33 | } fp32 = { w }; 34 | return fp32.as_value; 35 | #endif 36 | } 37 | 38 | static inline uint32_t fp32_to_bits(float f) { 39 | #if defined(__OPENCL_VERSION__) 40 | return as_uint(f); 41 | #elif defined(__CUDA_ARCH__) 42 | return (uint32_t) __float_as_uint(f); 43 | #elif defined(__INTEL_COMPILER) || defined(_MSC_VER) && (_MSC_VER >= 1932) && (defined(_M_IX86) || defined(_M_X64)) 44 | return _castf32_u32(f); 45 | #elif defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64)) 46 | return (uint32_t) _CopyInt32FromFloat(f); 47 | #else 48 | union { 49 | float as_value; 50 | uint32_t as_bits; 51 | } fp32 = { f }; 52 | return fp32.as_bits; 53 | #endif 54 | } 55 | 56 | static inline double fp64_from_bits(uint64_t w) { 57 | #if defined(__OPENCL_VERSION__) 58 | return as_double(w); 59 | #elif defined(__CUDA_ARCH__) 60 | return __longlong_as_double((long long) w); 61 | #elif defined(__INTEL_COMPILER) || defined(_MSC_VER) && (_MSC_VER >= 1932) && (defined(_M_IX86) || defined(_M_X64)) 62 | return _castu64_f64(w); 63 | #elif defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64)) 64 | return _CopyDoubleFromInt64((__int64) w); 65 | #else 66 | union { 67 | uint64_t as_bits; 68 | double as_value; 69 | } fp64 = { w }; 70 | return fp64.as_value; 71 | #endif 72 | } 73 | 74 | static inline uint64_t fp64_to_bits(double f) { 75 | #if defined(__OPENCL_VERSION__) 76 | return as_ulong(f); 77 | #elif defined(__CUDA_ARCH__) 78 | return (uint64_t) __double_as_longlong(f); 79 | #elif defined(__INTEL_COMPILER) || defined(_MSC_VER) && (_MSC_VER >= 1932) && (defined(_M_IX86) || defined(_M_X64)) 80 | return _castf64_u64(f); 81 | #elif defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64)) 82 | return (uint64_t) _CopyInt64FromDouble(f); 83 | #else 84 | union { 85 | double as_value; 86 | uint64_t as_bits; 87 | } fp64 = { f }; 88 | return fp64.as_bits; 89 | #endif 90 | } 91 | 92 | #endif /* FP16_BITCASTS_H */ 93 | -------------------------------------------------------------------------------- /packages/python/download_artifacts.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import zipfile 3 | import requests 4 | from pathlib import Path 5 | import shutil 6 | 7 | 8 | # == USAGE == 9 | # python3 download_artifacts.py PLATFORM VERSION 10 | # eg: python3 download_artifacts.py linux_x86_64 "0.5.9" 11 | 12 | REPO = "sqliteai/sqlite-vector" 13 | RELEASE_URL = f"https://github.com/{REPO}/releases/download" 14 | 15 | # Map Python plat_name to artifact names 16 | ARTIFACTS = { 17 | "manylinux2014_x86_64": ["vector-linux-x86_64"], 18 | "manylinux2014_aarch64": [ 19 | "vector-linux-arm64", 20 | ], 21 | "win_amd64": ["vector-windows-x86_64"], 22 | "macosx_10_9_x86_64": ["vector-macos"], 23 | "macosx_11_0_arm64": ["vector-macos"], 24 | } 25 | 26 | BINARY_NAME = { 27 | "manylinux2014_x86_64": "vector.so", 28 | "manylinux2014_aarch64": "vector.so", 29 | "win_amd64": "vector.dll", 30 | "macosx_10_9_x86_64": "vector.dylib", 31 | "macosx_11_0_arm64": "vector.dylib", 32 | } 33 | 34 | BINARIES_DIR = Path(__file__).parent / "src/sqlite_vector/binaries" 35 | 36 | 37 | def download_and_extract(artifact_name, bin_name, version): 38 | artifact = f"{artifact_name}-{version}.zip" 39 | url = f"{RELEASE_URL}/{version}/{artifact}" 40 | print(f"Downloading {url}") 41 | 42 | r = requests.get(url) 43 | if r.status_code != 200: 44 | print(f"Failed to download {artifact}: {r.status_code}") 45 | sys.exit(1) 46 | 47 | zip_path = BINARIES_DIR / artifact 48 | with open(zip_path, "wb") as f: 49 | f.write(r.content) 50 | 51 | out_dir = BINARIES_DIR 52 | out_dir.mkdir(parents=True, exist_ok=True) 53 | 54 | with zipfile.ZipFile(zip_path, "r") as zip_ref: 55 | for member in zip_ref.namelist(): 56 | if member.endswith(bin_name): 57 | zip_ref.extract(member, out_dir) 58 | 59 | # Move to expected name/location 60 | src = out_dir / member 61 | dst = out_dir / bin_name 62 | src.rename(dst) 63 | 64 | print(f"Extracted {dst}") 65 | 66 | zip_path.unlink() 67 | 68 | 69 | def main(): 70 | version = None 71 | platform = None 72 | if len(sys.argv) == 3: 73 | platform = sys.argv[1].lower() 74 | version = sys.argv[2] 75 | 76 | if not version or not platform: 77 | print( 78 | 'Error: Version is not specified.\nUsage: \n python3 download_artifacts.py linux_x86_64 "0.5.9"' 79 | ) 80 | sys.exit(1) 81 | 82 | print(BINARIES_DIR) 83 | if BINARIES_DIR.exists(): 84 | shutil.rmtree(BINARIES_DIR) 85 | BINARIES_DIR.mkdir(parents=True, exist_ok=True) 86 | 87 | platform_artifacts = ARTIFACTS.get(platform, []) 88 | if not platform_artifacts: 89 | print(f"Error: Unknown platform '{platform}'") 90 | sys.exit(1) 91 | 92 | for artifact_name in platform_artifacts: 93 | download_and_extract(artifact_name, BINARY_NAME[platform], version) 94 | 95 | 96 | if __name__ == "__main__": 97 | main() 98 | -------------------------------------------------------------------------------- /QUANTIZATION.md: -------------------------------------------------------------------------------- 1 | ### Vector Quantization for High Performance 2 | 3 | `sqlite-vector` supports **vector quantization**, a powerful technique to significantly accelerate vector search while reducing memory usage. You can quantize your vectors with: 4 | 5 | ```sql 6 | SELECT vector_quantize('my_table', 'my_column'); 7 | ``` 8 | 9 | To further boost performance, quantized vectors can be **preloaded in memory** using: 10 | 11 | ```sql 12 | SELECT vector_quantize_preload('my_table', 'my_column'); 13 | ``` 14 | 15 | This can result in a **4×–5× speedup** on nearest neighbor queries while keeping memory usage low. 16 | 17 | #### What is Quantization? 18 | 19 | Quantization compresses high-dimensional float vectors (e.g., `FLOAT32`) into compact representations using lower-precision formats (e.g., `UINT8`). This drastically reduces the size of the data—often by a factor of 4 to 8—making it practical to load large datasets entirely in memory, even on edge devices. 20 | 21 | #### Why is it Important? 22 | 23 | * **Faster Searches**: With preloaded quantized vectors, distance computations are up to 5× faster. 24 | * **Lower Memory Footprint**: Quantized vectors use significantly less RAM, allowing millions of vectors to fit in memory. 25 | * **Edge-ready**: The reduced size and in-memory access make this ideal for mobile, embedded, and on-device AI applications. 26 | 27 | #### Estimate Memory Usage 28 | 29 | Before preloading quantized vectors, you can **estimate the memory required** using: 30 | 31 | ```sql 32 | SELECT vector_quantize_memory('my_table', 'my_column'); 33 | ``` 34 | 35 | This gives you an approximate number of bytes needed to load the quantized vectors into memory. 36 | 37 | #### Accuracy You Can Trust 38 | 39 | Despite the compression, our quantization algorithms are finely tuned to maintain high accuracy. You can expect **recall rates greater than 0.95**, ensuring that approximate searches closely match exact results in quality. 40 | 41 | #### Measuring Recall in SQLite-Vector 42 | 43 | You can evaluate the recall of quantized search compared to exact search using a single SQL query. For example, assuming a table `vec_examples` with an `embedding` column, use: 44 | 45 | ```sql 46 | WITH 47 | exact_knn AS ( 48 | SELECT e.rowid 49 | FROM vec_examples AS e 50 | JOIN vector_full_scan('vec_examples', 'embedding', ?1, ?2) AS v 51 | ON e.rowid = v.rowid 52 | ), 53 | approx_knn AS ( 54 | SELECT e.rowid 55 | FROM vec_examples AS e 56 | JOIN vector_quantize_scan('vec_examples', 'embedding', ?1, ?2) AS v 57 | ON e.rowid = v.rowid 58 | ), 59 | matches AS ( 60 | SELECT COUNT(*) AS match_count 61 | FROM exact_knn 62 | WHERE rowid IN (SELECT rowid FROM approx_knn) 63 | ), 64 | total AS ( 65 | SELECT COUNT(*) AS total_count 66 | FROM exact_knn 67 | ) 68 | SELECT 69 | (SELECT match_count FROM matches) AS match_count, 70 | (SELECT total_count FROM total) AS total_count, 71 | CAST((SELECT match_count FROM matches) AS FLOAT) / 72 | CAST((SELECT total_count FROM total) AS FLOAT) AS recall; 73 | ``` 74 | 75 | Where `?1` is the input vector (as a BLOB) and `?2` is the number of nearest neighbors `k`. 76 | This query compares exact and quantized results and computes the recall ratio, helping you validate the quality of quantized search. 77 | -------------------------------------------------------------------------------- /packages/android/gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2015 the original author or authors. 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | @rem SPDX-License-Identifier: Apache-2.0 17 | @rem 18 | 19 | @if "%DEBUG%"=="" @echo off 20 | @rem ########################################################################## 21 | @rem 22 | @rem Gradle startup script for Windows 23 | @rem 24 | @rem ########################################################################## 25 | 26 | @rem Set local scope for the variables with windows NT shell 27 | if "%OS%"=="Windows_NT" setlocal 28 | 29 | set DIRNAME=%~dp0 30 | if "%DIRNAME%"=="" set DIRNAME=. 31 | @rem This is normally unused 32 | set APP_BASE_NAME=%~n0 33 | set APP_HOME=%DIRNAME% 34 | 35 | @rem Resolve any "." and ".." in APP_HOME to make it shorter. 36 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi 37 | 38 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 39 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 40 | 41 | @rem Find java.exe 42 | if defined JAVA_HOME goto findJavaFromJavaHome 43 | 44 | set JAVA_EXE=java.exe 45 | %JAVA_EXE% -version >NUL 2>&1 46 | if %ERRORLEVEL% equ 0 goto execute 47 | 48 | echo. 1>&2 49 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2 50 | echo. 1>&2 51 | echo Please set the JAVA_HOME variable in your environment to match the 1>&2 52 | echo location of your Java installation. 1>&2 53 | 54 | goto fail 55 | 56 | :findJavaFromJavaHome 57 | set JAVA_HOME=%JAVA_HOME:"=% 58 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 59 | 60 | if exist "%JAVA_EXE%" goto execute 61 | 62 | echo. 1>&2 63 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2 64 | echo. 1>&2 65 | echo Please set the JAVA_HOME variable in your environment to match the 1>&2 66 | echo location of your Java installation. 1>&2 67 | 68 | goto fail 69 | 70 | :execute 71 | @rem Setup the command line 72 | 73 | 74 | 75 | @rem Execute Gradle 76 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -jar "%APP_HOME%\gradle\wrapper\gradle-wrapper.jar" %* 77 | 78 | :end 79 | @rem End local scope for the variables with windows NT shell 80 | if %ERRORLEVEL% equ 0 goto mainEnd 81 | 82 | :fail 83 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 84 | rem the _cmd.exe /c_ return code! 85 | set EXIT_CODE=%ERRORLEVEL% 86 | if %EXIT_CODE% equ 0 set EXIT_CODE=1 87 | if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% 88 | exit /b %EXIT_CODE% 89 | 90 | :mainEnd 91 | if "%OS%"=="Windows_NT" endlocal 92 | 93 | :omega 94 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | name: Build and Publish Python Package 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | version: 7 | description: "Version to use for the Python package (e.g. 0.9.9)" 8 | required: true 9 | type: string 10 | test-pypi: 11 | description: "Publish to Test PyPI" 12 | required: false 13 | type: boolean 14 | default: false 15 | 16 | workflow_run: 17 | workflows: ["Build, Test and Release"] 18 | types: 19 | - completed 20 | 21 | jobs: 22 | build-and-publish: 23 | if: | 24 | github.event_name == 'workflow_dispatch' || 25 | (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.head_branch == 'main') 26 | runs-on: ${{ matrix.os }} 27 | permissions: 28 | id-token: write # mandatory for Pypi trusted publishing 29 | strategy: 30 | matrix: 31 | include: 32 | - os: ubuntu-latest 33 | platform: linux 34 | python-version: "3.10" 35 | arch: x86_64 36 | plat_name: manylinux2014_x86_64 37 | - os: ubuntu-latest 38 | platform: linux 39 | python-version: "3.10" 40 | arch: arm64 41 | plat_name: manylinux2014_aarch64 42 | - os: ubuntu-latest 43 | platform: windows 44 | python-version: "3.10" 45 | arch: x86_64 46 | plat_name: win_amd64 47 | - os: ubuntu-latest 48 | platform: macos 49 | python-version: "3.10" 50 | arch: x86_64 51 | plat_name: macosx_10_9_x86_64 52 | - os: ubuntu-latest 53 | platform: macos 54 | python-version: "3.10" 55 | arch: arm64 56 | plat_name: macosx_11_0_arm64 57 | defaults: 58 | run: 59 | shell: bash 60 | steps: 61 | - uses: actions/checkout@v4 62 | with: 63 | submodules: false 64 | 65 | - name: Set up Python 66 | uses: actions/setup-python@v5 67 | with: 68 | python-version: ${{ matrix.python-version }} 69 | 70 | - name: Install build dependencies 71 | run: | 72 | cd packages/python 73 | python3 -m pip install --upgrade pip 74 | python3 -m pip install -r requirements-dev.txt 75 | 76 | - name: Get version 77 | id: get_version 78 | run: | 79 | if [[ "${{ github.event_name }}" == "workflow_run" ]]; then 80 | # Fetch latest published release tag from GitHub API 81 | VERSION=$(curl -s "https://api.github.com/repos/${{ github.repository }}/releases/latest" | jq -r '.tag_name') 82 | if [ "$VERSION" = "null" ] || [ -z "$VERSION" ]; then 83 | echo "Error: Failed to get latest release version" 84 | exit 1 85 | fi 86 | else 87 | VERSION="${{ github.event.inputs.version }}" 88 | fi 89 | VERSION=${VERSION#v} 90 | echo "version=$VERSION" >> $GITHUB_OUTPUT 91 | 92 | - name: Download artifacts for current platform 93 | run: | 94 | cd packages/python 95 | python3 download_artifacts.py "${{ matrix.plat_name }}" "${{ steps.get_version.outputs.version }}" 96 | 97 | - name: Build wheel 98 | env: 99 | PACKAGE_VERSION: ${{ steps.get_version.outputs.version }} 100 | PLAT_NAME: ${{ matrix.plat_name }} 101 | run: | 102 | cd packages/python 103 | python -m build --wheel 104 | 105 | - name: Publish to PyPI 106 | uses: pypa/gh-action-pypi-publish@release/v1 107 | with: 108 | packages-dir: packages/python/dist 109 | verbose: true 110 | # Avoid workflow to fail if the version has already been published 111 | skip-existing: true 112 | # Upload to Test Pypi for testing 113 | repository-url: ${{ github.event.inputs.test-pypi == 'true' && 'https://test.pypi.org/legacy/' || '' }} 114 | -------------------------------------------------------------------------------- /packages/android/build.gradle: -------------------------------------------------------------------------------- 1 | buildscript { 2 | repositories { 3 | google() 4 | mavenCentral() 5 | } 6 | dependencies { 7 | classpath 'com.android.tools.build:gradle:8.5.2' 8 | } 9 | } 10 | 11 | plugins { 12 | id 'com.gradleup.nmcp.aggregation' version '1.2.0' 13 | } 14 | 15 | apply plugin: 'com.android.library' 16 | apply plugin: 'maven-publish' 17 | apply plugin: 'signing' 18 | 19 | android { 20 | namespace 'ai.sqlite.vector' 21 | compileSdk 34 22 | 23 | defaultConfig { 24 | minSdk 26 25 | targetSdk 34 26 | } 27 | 28 | buildTypes { 29 | release { 30 | minifyEnabled false 31 | } 32 | } 33 | 34 | compileOptions { 35 | sourceCompatibility JavaVersion.VERSION_1_8 36 | targetCompatibility JavaVersion.VERSION_1_8 37 | } 38 | 39 | sourceSets { 40 | main { 41 | jniLibs.srcDirs = ['src/main/jniLibs'] 42 | } 43 | } 44 | } 45 | 46 | repositories { 47 | google() 48 | mavenCentral() 49 | maven { url 'https://jitpack.io' } 50 | } 51 | 52 | dependencies { 53 | } 54 | 55 | afterEvaluate { 56 | publishing { 57 | publications { 58 | release(MavenPublication) { 59 | groupId = 'ai.sqlite' 60 | artifactId = 'vector' 61 | version = project.hasProperty('VERSION') ? project.VERSION : ['make', 'version'].execute(null, file('../..')).text.trim() 62 | 63 | artifact(project.hasProperty('AAR_PATH') ? project.AAR_PATH : "$buildDir/outputs/aar/android-release.aar") 64 | 65 | // Maven Central metadata 66 | pom { 67 | name = 'sqlite-vector' 68 | description = 'A cross-platform, ultra-efficient SQLite extension that brings vector search capabilities to your embedded database. Works seamlessly on iOS, Android, Windows, Linux, and macOS, using just 30MB of memory by default.' 69 | url = 'https://github.com/sqliteai/sqlite-vector' 70 | 71 | licenses { 72 | license { 73 | name = 'Elastic License 2.0' 74 | url = 'https://www.elastic.co/licensing/elastic-license' 75 | } 76 | } 77 | 78 | developers { 79 | developer { 80 | id = 'sqliteai' 81 | name = 'SQLite Cloud, Inc.' 82 | email = 'info@sqlitecloud.io' 83 | organization = 'SQLite Cloud, Inc.' 84 | organizationUrl = 'https://sqlite.ai' 85 | } 86 | } 87 | 88 | scm { 89 | connection = 'scm:git:git://github.com/sqliteai/sqlite-vector.git' 90 | developerConnection = 'scm:git:ssh://github.com:sqliteai/sqlite-vector.git' 91 | url = 'https://github.com/sqliteai/sqlite-vector/tree/main' 92 | } 93 | } 94 | } 95 | } 96 | } 97 | 98 | // Signing configuration for Maven Central 99 | signing { 100 | required { project.hasProperty("SIGNING_KEY") } 101 | if (project.hasProperty("SIGNING_KEY")) { 102 | useInMemoryPgpKeys( 103 | project.property("SIGNING_KEY").toString(), 104 | project.property("SIGNING_PASSWORD").toString() 105 | ) 106 | sign publishing.publications.release 107 | } 108 | } 109 | } 110 | 111 | // Maven Central publishing via NMCP aggregation 112 | nmcpAggregation { 113 | if (project.hasProperty("SONATYPE_USERNAME") && project.hasProperty("SONATYPE_PASSWORD")) { 114 | centralPortal { 115 | username = project.property("SONATYPE_USERNAME") 116 | password = project.property("SONATYPE_PASSWORD") 117 | publishingType = "AUTOMATIC" 118 | } 119 | publishAllProjectsProbablyBreakingProjectIsolation() 120 | } 121 | } -------------------------------------------------------------------------------- /packages/node/src/platform.ts: -------------------------------------------------------------------------------- 1 | import { platform, arch } from 'node:os'; 2 | import { existsSync, readFileSync } from 'node:fs'; 3 | import { execSync } from 'node:child_process'; 4 | 5 | /** 6 | * Supported platform identifiers 7 | */ 8 | export type Platform = 9 | | 'darwin-arm64' 10 | | 'darwin-x86_64' 11 | | 'linux-arm64' 12 | | 'linux-arm64-musl' 13 | | 'linux-x86_64' 14 | | 'linux-x86_64-musl' 15 | | 'win32-x86_64'; 16 | 17 | /** 18 | * Binary extension for each platform 19 | */ 20 | export const PLATFORM_EXTENSIONS: Record = { 21 | darwin: '.dylib', 22 | linux: '.so', 23 | win32: '.dll', 24 | } as const; 25 | 26 | /** 27 | * Detects if the system uses musl libc (Alpine Linux, etc.) 28 | * Uses multiple detection strategies for reliability 29 | */ 30 | export function isMusl(): boolean { 31 | // Only relevant for Linux 32 | if (platform() !== 'linux') { 33 | return false; 34 | } 35 | 36 | // Strategy 1: Check for musl-specific files 37 | const muslFiles = [ 38 | '/lib/ld-musl-x86_64.so.1', 39 | '/lib/ld-musl-aarch64.so.1', 40 | '/lib/ld-musl-armhf.so.1', 41 | ]; 42 | 43 | for (const file of muslFiles) { 44 | if (existsSync(file)) { 45 | return true; 46 | } 47 | } 48 | 49 | // Strategy 2: Check ldd version output 50 | try { 51 | const lddVersion = execSync('ldd --version 2>&1', { 52 | encoding: 'utf-8', 53 | stdio: ['pipe', 'pipe', 'pipe'], 54 | }); 55 | 56 | if (lddVersion.includes('musl')) { 57 | return true; 58 | } 59 | } catch { 60 | // ldd command failed, continue to next strategy 61 | } 62 | 63 | // Strategy 3: Check /etc/os-release for Alpine 64 | try { 65 | if (existsSync('/etc/os-release')) { 66 | const osRelease = readFileSync('/etc/os-release', 'utf-8'); 67 | if (osRelease.includes('Alpine') || osRelease.includes('musl')) { 68 | return true; 69 | } 70 | } 71 | } catch { 72 | // File read failed, continue to next strategy 73 | } 74 | 75 | // Strategy 4: Check process.report.getReport() for musl 76 | try { 77 | const report = (process as any).report?.getReport?.(); 78 | if (report?.header?.glibcVersionRuntime === '') { 79 | // Empty glibc version often indicates musl 80 | return true; 81 | } 82 | } catch { 83 | // Report not available 84 | } 85 | 86 | return false; 87 | } 88 | 89 | /** 90 | * Gets the current platform identifier 91 | * @throws {Error} If the platform is unsupported 92 | */ 93 | export function getCurrentPlatform(): Platform { 94 | const platformName = platform(); 95 | const archName = arch(); 96 | 97 | // macOS 98 | if (platformName === 'darwin') { 99 | if (archName === 'arm64') return 'darwin-arm64'; 100 | if (archName === 'x64' || archName === 'ia32') return 'darwin-x86_64'; 101 | } 102 | 103 | // Linux (with musl detection) 104 | if (platformName === 'linux') { 105 | const muslSuffix = isMusl() ? '-musl' : ''; 106 | 107 | if (archName === 'arm64') { 108 | return `linux-arm64${muslSuffix}` as Platform; 109 | } 110 | if (archName === 'x64' || archName === 'ia32') { 111 | return `linux-x86_64${muslSuffix}` as Platform; 112 | } 113 | } 114 | 115 | // Windows 116 | if (platformName === 'win32') { 117 | if (archName === 'x64' || archName === 'ia32') return 'win32-x86_64'; 118 | } 119 | 120 | // Unsupported platform 121 | throw new Error( 122 | `Unsupported platform: ${platformName}-${archName}. ` + 123 | `Supported platforms: darwin-arm64, darwin-x86_64, linux-arm64, linux-x86_64, win32-x86_64 ` + 124 | `(with glibc or musl support for Linux)` 125 | ); 126 | } 127 | 128 | /** 129 | * Gets the package name for the current platform 130 | */ 131 | export function getPlatformPackageName(): string { 132 | const currentPlatform = getCurrentPlatform(); 133 | return `@sqliteai/sqlite-vector-${currentPlatform}`; 134 | } 135 | 136 | /** 137 | * Gets the binary filename for the current platform 138 | */ 139 | export function getBinaryName(): string { 140 | const platformName = platform(); 141 | const extension = PLATFORM_EXTENSIONS[platformName]; 142 | 143 | if (!extension) { 144 | throw new Error(`Unknown platform: ${platformName}`); 145 | } 146 | 147 | return `vector${extension}`; 148 | } 149 | -------------------------------------------------------------------------------- /src/distance-cpu.h: -------------------------------------------------------------------------------- 1 | // 2 | // distance-cpu.h 3 | // sqlitevector 4 | // 5 | // Created by Marco Bambini on 20/06/25. 6 | // 7 | 8 | #ifndef __VECTOR_DISTANCE_CPU__ 9 | #define __VECTOR_DISTANCE_CPU__ 10 | 11 | #include "fp16/fp16.h" 12 | #include 13 | #include 14 | #include 15 | 16 | // Detect builtin bit_cast 17 | #ifndef HAVE_BUILTIN_BIT_CAST 18 | /* Only use __builtin_bit_cast if the compiler has it AND 19 | we're compiling as C++ (GCC 11+) or as a C standard that supports it (C23+). */ 20 | #if defined(__has_builtin) 21 | #if __has_builtin(__builtin_bit_cast) 22 | #if defined(__cplusplus) || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) 23 | #define HAVE_BUILTIN_BIT_CAST 1 24 | #endif 25 | #endif 26 | #endif 27 | 28 | /* GCC note: in GCC 11–13, __builtin_bit_cast exists for C++ but NOT for C. */ 29 | #if !defined(HAVE_BUILTIN_BIT_CAST) && defined(__GNUC__) && !defined(__clang__) && defined(__cplusplus) 30 | #if __GNUC__ >= 11 31 | #define HAVE_BUILTIN_BIT_CAST 1 32 | #endif 33 | #endif 34 | #endif 35 | 36 | typedef enum { 37 | VECTOR_TYPE_F32 = 1, 38 | VECTOR_TYPE_F16, 39 | VECTOR_TYPE_BF16, 40 | VECTOR_TYPE_U8, 41 | VECTOR_TYPE_I8 42 | } vector_type; 43 | #define VECTOR_TYPE_MAX 6 44 | 45 | typedef enum { 46 | VECTOR_QUANT_AUTO = 0, 47 | VECTOR_QUANT_U8BIT = 1, 48 | VECTOR_QUANT_S8BIT = 2 49 | } vector_qtype; 50 | 51 | typedef enum { 52 | VECTOR_DISTANCE_L2 = 1, 53 | VECTOR_DISTANCE_SQUARED_L2, 54 | VECTOR_DISTANCE_COSINE, 55 | VECTOR_DISTANCE_DOT, 56 | VECTOR_DISTANCE_L1, 57 | } vector_distance; 58 | #define VECTOR_DISTANCE_MAX 6 59 | 60 | typedef float (*distance_function_t)(const void *v1, const void *v2, int n); 61 | 62 | // ENTRYPOINT 63 | void init_distance_functions (bool force_cpu); 64 | 65 | // MARK: - FLOAT16/BFLOAT16 - 66 | // typedef uint16_t bfloat16_t; // don't typedef to bfloat16_t to avoid mix with ’s native bfloat16_t 67 | 68 | // float <-> uint32_t bit casts 69 | static inline uint32_t f32_to_bits (float f) { 70 | #if defined(HAVE_BUILTIN_BIT_CAST) 71 | return __builtin_bit_cast(uint32_t, f); 72 | #else 73 | union { float f; uint32_t u; } v = { .f = f }; 74 | return v.u; 75 | #endif 76 | } 77 | 78 | static inline float bits_to_f32 (uint32_t u) { 79 | #if defined(HAVE_BUILTIN_BIT_CAST) 80 | return __builtin_bit_cast(float, u); 81 | #else 82 | union { uint32_t u; float f; } v = { .u = u }; 83 | return v.f; 84 | #endif 85 | } 86 | 87 | // bfloat16 (stored as uint16_t) -> float32, and back (RNE) 88 | static inline bool bfloat16_is_nan(uint16_t h) { /* exp==0xFF && frac!=0 */ 89 | return ((h & 0x7F80u) == 0x7F80u) && ((h & 0x007Fu) != 0); 90 | } 91 | static inline bool bfloat16_is_inf(uint16_t h) { /* exp==0xFF && frac==0 */ 92 | return ((h & 0x7F80u) == 0x7F80u) && ((h & 0x007Fu) == 0); 93 | } 94 | static inline bool bfloat16_is_zero(uint16_t h) { /* ±0 */ 95 | return (h & 0x7FFFu) == 0; 96 | } 97 | static inline int bfloat16_sign(uint16_t h) { 98 | return (h >> 15) & 1; 99 | } 100 | static inline float bfloat16_to_float32(uint16_t bf) { 101 | return bits_to_f32((uint32_t)bf << 16); 102 | } 103 | static inline uint16_t float32_to_bfloat16(float f) { 104 | uint32_t x = f32_to_bits(f); 105 | uint32_t lsb = (x >> 16) & 1u; /* ties-to-even */ 106 | uint32_t rnd = 0x7FFFu + lsb; 107 | return (uint16_t)((x + rnd) >> 16); 108 | } 109 | 110 | // ---- float16 (binary16) classifiers (work on raw uint16_t bits) 111 | static inline bool f16_is_nan(uint16_t h) { /* exp==0x1F && frac!=0 */ 112 | return ( (h & 0x7C00u) == 0x7C00u ) && ((h & 0x03FFu) != 0); 113 | } 114 | static inline bool f16_is_inf(uint16_t h) { /* exp==0x1F && frac==0 */ 115 | return ( (h & 0x7C00u) == 0x7C00u ) && ((h & 0x03FFu) == 0); 116 | } 117 | static inline int f16_sign(uint16_t h) { 118 | return (h >> 15) & 1; 119 | } 120 | static inline bool f16_is_zero(uint16_t h) { /* ±0 */ 121 | return (h & 0x7FFFu) == 0; 122 | } 123 | static inline uint16_t float32_to_float16 (float f) { 124 | return fp16_ieee_from_fp32_value(f); 125 | } 126 | static inline float float16_to_float32 (uint16_t h) { 127 | return fp16_ieee_to_fp32_value(h); 128 | } 129 | 130 | #endif 131 | -------------------------------------------------------------------------------- /packages/node/src/index.ts: -------------------------------------------------------------------------------- 1 | import { resolve } from 'node:path'; 2 | import { existsSync } from 'node:fs'; 3 | import { 4 | getCurrentPlatform, 5 | getPlatformPackageName, 6 | getBinaryName, 7 | type Platform 8 | } from './platform.js'; 9 | 10 | /** 11 | * Error thrown when the SQLite Vector extension cannot be found 12 | */ 13 | export class ExtensionNotFoundError extends Error { 14 | constructor(message: string) { 15 | super(message); 16 | this.name = 'ExtensionNotFoundError'; 17 | } 18 | } 19 | 20 | /** 21 | * Attempts to load the platform-specific package 22 | * @returns The path to the extension binary, or null if not found 23 | */ 24 | function tryLoadPlatformPackage(): string | null { 25 | try { 26 | const packageName = getPlatformPackageName(); 27 | 28 | // Try to dynamically import the platform package 29 | // This works in both CommonJS and ESM 30 | const platformPackage = require(packageName); 31 | 32 | if (platformPackage?.path && typeof platformPackage.path === 'string') { 33 | if (existsSync(platformPackage.path)) { 34 | return platformPackage.path; 35 | } 36 | } 37 | } catch (error) { 38 | // Platform package not installed or failed to load 39 | // This is expected when optionalDependencies fail 40 | } 41 | 42 | return null; 43 | } 44 | 45 | /** 46 | * Gets the absolute path to the SQLite Vector extension binary for the current platform 47 | * 48 | * @returns Absolute path to the extension binary (.so, .dylib, or .dll) 49 | * @throws {ExtensionNotFoundError} If the extension binary cannot be found 50 | * 51 | * @example 52 | * ```typescript 53 | * import { getExtensionPath } from '@sqliteai/sqlite-vector'; 54 | * 55 | * const extensionPath = getExtensionPath(); 56 | * // On macOS ARM64: /path/to/node_modules/@sqliteai/sqlite-vector-darwin-arm64/vector.dylib 57 | * ``` 58 | */ 59 | export function getExtensionPath(): string { 60 | // Try to load from platform-specific package 61 | const platformPath = tryLoadPlatformPackage(); 62 | if (platformPath) { 63 | return resolve(platformPath); 64 | } 65 | 66 | // If we reach here, the platform package wasn't installed 67 | const currentPlatform = getCurrentPlatform(); 68 | const packageName = getPlatformPackageName(); 69 | 70 | throw new ExtensionNotFoundError( 71 | `SQLite Vector extension not found for platform: ${currentPlatform}\n\n` + 72 | `The platform-specific package "${packageName}" is not installed.\n` + 73 | `This usually happens when:\n` + 74 | ` 1. Your platform is not supported\n` + 75 | ` 2. npm failed to install optional dependencies\n` + 76 | ` 3. You're installing with --no-optional flag\n\n` + 77 | `Try running: npm install --force` 78 | ); 79 | } 80 | 81 | /** 82 | * Information about the current platform and extension 83 | */ 84 | export interface ExtensionInfo { 85 | /** Current platform identifier (e.g., 'darwin-arm64') */ 86 | platform: Platform; 87 | /** Name of the platform-specific npm package */ 88 | packageName: string; 89 | /** Filename of the binary (e.g., 'vector.dylib') */ 90 | binaryName: string; 91 | /** Full path to the extension binary */ 92 | path: string; 93 | } 94 | 95 | /** 96 | * Gets detailed information about the SQLite Vector extension 97 | * 98 | * @returns Extension information object 99 | * 100 | * @example 101 | * ```typescript 102 | * import { getExtensionInfo } from '@sqliteai/sqlite-vector'; 103 | * 104 | * const info = getExtensionInfo(); 105 | * console.log(info); 106 | * // { 107 | * // platform: 'darwin-arm64', 108 | * // packageName: '@sqliteai/sqlite-vector-darwin-arm64', 109 | * // binaryName: 'vector.dylib', 110 | * // path: '/path/to/vector.dylib' 111 | * // } 112 | * ``` 113 | */ 114 | export function getExtensionInfo(): ExtensionInfo { 115 | return { 116 | platform: getCurrentPlatform(), 117 | packageName: getPlatformPackageName(), 118 | binaryName: getBinaryName(), 119 | path: getExtensionPath(), 120 | }; 121 | } 122 | 123 | // Default export for CommonJS compatibility 124 | export default { 125 | getExtensionPath, 126 | getExtensionInfo, 127 | ExtensionNotFoundError, 128 | }; 129 | 130 | // Re-export platform utilities 131 | export { getCurrentPlatform, getPlatformPackageName, getBinaryName, isMusl } from './platform.js'; 132 | export type { Platform } from './platform.js'; 133 | -------------------------------------------------------------------------------- /examples/semantic_search/semsearch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Semantic Search CLI Tool using SQLite + sqlite-vec + sentence-transformers 4 | Usage: 5 | semsearch "query text" # Search for similar documents 6 | semsearch -i /path/to/documents # Index documents from directory 7 | semsearch -i /path/to/file.txt # Index single file 8 | """ 9 | 10 | import argparse 11 | import os 12 | import sys 13 | 14 | from semantic_search import SemanticSearch 15 | 16 | 17 | def main(): 18 | parser = argparse.ArgumentParser( 19 | description="Semantic search using SQLite + sqlite-vector", 20 | formatter_class=argparse.RawDescriptionHelpFormatter, 21 | epilog=""" 22 | Examples: 23 | semsearch "machine learning algorithms" 24 | semsearch -i /path/to/documents 25 | semsearch -i document.txt 26 | semsearch --stats 27 | """ 28 | ) 29 | 30 | parser.add_argument("query", nargs="?", help="Search query") 31 | parser.add_argument("-i", "--index", metavar="PATH", 32 | help="Index file or directory") 33 | parser.add_argument("--limit", type=int, default=5, 34 | help="Number of results to return (default: 5)") 35 | parser.add_argument("--db", default="semsearch.db", 36 | help="Database file path (default: semsearch.db)") 37 | parser.add_argument("--model", default="all-MiniLM-L6-v2", 38 | help="Sentence transformer model (default: all-MiniLM-L6-v2)") 39 | parser.add_argument("--stats", action="store_true", 40 | help="Show database statistics") 41 | parser.add_argument("--repl", action="store_true", 42 | help="Run in interactive (keep model in memory)") 43 | 44 | args = parser.parse_args() 45 | 46 | if not any([args.query, args.index, args.stats, args.repl]): 47 | parser.print_help() 48 | return 49 | 50 | searcher = SemanticSearch(args.db, args.model) 51 | 52 | try: 53 | if args.stats: 54 | searcher.stats() 55 | 56 | elif args.index: 57 | if os.path.isdir(args.index): 58 | total = searcher.index_directory(args.index) 59 | print(f"Total chunks indexed: {total}") 60 | else: 61 | searcher.index_file(args.index) 62 | 63 | elif args.query: 64 | elapsed_ms, results = searcher.search(args.query, args.limit) 65 | 66 | if not results: 67 | print("No results found.") 68 | return 69 | 70 | print(f"Results for: '{args.query}' in {elapsed_ms}ms\n") 71 | for i, (filepath, content, similarity) in enumerate(results, 1): 72 | print(f"{i}. {filepath} (similarity: {similarity:.3f})") 73 | # Show first 200 chars of content 74 | preview = content[:200] + \ 75 | "..." if len(content) > 200 else content 76 | print(f" {preview}\n") 77 | 78 | if args.repl: 79 | print("Entering interactive mode (keep the model in memory).\nType 'help' for commands, 'exit' to quit.") 80 | while True: 81 | try: 82 | cmd = input("semsearch> ").strip() 83 | if not cmd: 84 | continue 85 | if cmd in {"exit", "quit"}: 86 | break 87 | if cmd == "help": 88 | print( 89 | "Commands: search , index , stats, exit") 90 | continue 91 | if cmd.startswith("search "): 92 | query = cmd[len("search "):].strip() 93 | elapsed_ms, results = searcher.search( 94 | query, args.limit) 95 | if not results: 96 | print("No results found.") 97 | continue 98 | print(f"Results for: '{query}' in {elapsed_ms}ms\n") 99 | for i, (filepath, content, similarity) in enumerate(results, 1): 100 | print( 101 | f"{i}. {filepath} (similarity: {similarity:.3f})") 102 | preview = content[:200] + \ 103 | ("..." if len(content) > 200 else "") 104 | print(f" {preview}\n") 105 | continue 106 | if cmd.startswith("index "): 107 | path = cmd[len("index "):].strip() 108 | if os.path.isdir(path): 109 | total = searcher.index_directory(path) 110 | print(f"Total chunks indexed: {total}") 111 | else: 112 | searcher.index_file(path) 113 | continue 114 | if cmd == "stats": 115 | searcher.stats() 116 | continue 117 | print("Unknown command. Type 'help' for available commands.") 118 | except KeyboardInterrupt: 119 | print("\nExiting REPL.") 120 | break 121 | except Exception as e: 122 | print(f"Error: {e}") 123 | 124 | if searcher: 125 | searcher.close() 126 | return 127 | 128 | except KeyboardInterrupt: 129 | print("\nOperation cancelled.") 130 | except Exception as e: 131 | print(f"Error: {e}") 132 | sys.exit(1) 133 | finally: 134 | if searcher: 135 | searcher.close() 136 | 137 | 138 | if __name__ == "__main__": 139 | main() 140 | -------------------------------------------------------------------------------- /packages/node/README.md: -------------------------------------------------------------------------------- 1 | # @sqliteai/sqlite-vector 2 | 3 | [![npm version](https://badge.fury.io/js/@sqliteai%2Fsqlite-vector.svg)](https://badge.fury.io/js/@sqliteai%2Fsqlite-vector) 4 | [![License](https://img.shields.io/badge/license-Elastic%202.0-blue.svg)](LICENSE.md) 5 | 6 | > SQLite Vector extension packaged for Node.js 7 | 8 | **SQLite Vector** is a cross-platform, ultra-efficient SQLite extension that brings vector search capabilities to your embedded database. It works seamlessly on **iOS, Android, Windows, Linux, and macOS**, using just **30MB of memory** by default. With support for **Float32, Float16, BFloat16, Int8, and UInt8**, and **highly optimized distance functions**, it's the ideal solution for **Edge AI** applications. 9 | 10 | ## Features 11 | 12 | - ✅ **Cross-platform** - Works on macOS, Linux (glibc/musl), and Windows 13 | - ✅ **Zero configuration** - Automatically detects and loads the correct binary for your platform 14 | - ✅ **TypeScript native** - Full type definitions included 15 | - ✅ **Modern ESM + CJS** - Works with both ES modules and CommonJS 16 | - ✅ **Small footprint** - Only downloads binaries for your platform 17 | - ✅ **Offline-ready** - No external services required 18 | 19 | ## Installation 20 | 21 | ```bash 22 | npm install @sqliteai/sqlite-vector 23 | ``` 24 | 25 | The package automatically downloads the correct native extension for your platform during installation. 26 | 27 | ### Supported Platforms 28 | 29 | | Platform | Architecture | Package | 30 | |----------|-------------|---------| 31 | | macOS | ARM64 (Apple Silicon) | `@sqliteai/sqlite-vector-darwin-arm64` | 32 | | macOS | x86_64 (Intel) | `@sqliteai/sqlite-vector-darwin-x86_64` | 33 | | Linux | ARM64 (glibc) | `@sqliteai/sqlite-vector-linux-arm64` | 34 | | Linux | ARM64 (musl/Alpine) | `@sqliteai/sqlite-vector-linux-arm64-musl` | 35 | | Linux | x86_64 (glibc) | `@sqliteai/sqlite-vector-linux-x86_64` | 36 | | Linux | x86_64 (musl/Alpine) | `@sqliteai/sqlite-vector-linux-x86_64-musl` | 37 | | Windows | x86_64 | `@sqliteai/sqlite-vector-win32-x86_64` | 38 | 39 | ## sqlite-vector API 40 | 41 | For detailed information on how to use the vector extension features, see the [main documentation](https://github.com/sqliteai/sqlite-vector/blob/main/README.md). 42 | 43 | ## Usage 44 | 45 | ```typescript 46 | import { getExtensionPath } from '@sqliteai/sqlite-vector'; 47 | import Database from 'better-sqlite3'; 48 | 49 | const db = new Database(':memory:'); 50 | db.loadExtension(getExtensionPath()); 51 | 52 | // Ready to use 53 | const version = db.prepare('SELECT vector_version()').pluck().get(); 54 | console.log('Vector extension version:', version); 55 | ``` 56 | 57 | ## Examples 58 | 59 | For complete, runnable examples, see the [sqlite-extensions-guide](https://github.com/sqliteai/sqlite-extensions-guide/tree/main/examples/node). 60 | 61 | These examples are generic and work with all SQLite extensions: `sqlite-vector`, `sqlite-sync`, `sqlite-js`, and `sqlite-ai`. 62 | 63 | ## API Reference 64 | 65 | ### `getExtensionPath(): string` 66 | 67 | Returns the absolute path to the SQLite Vector extension binary for the current platform. 68 | 69 | **Returns:** `string` - Absolute path to the extension file (`.so`, `.dylib`, or `.dll`) 70 | 71 | **Throws:** `ExtensionNotFoundError` - If the extension binary cannot be found for the current platform 72 | 73 | **Example:** 74 | ```typescript 75 | import { getExtensionPath } from '@sqliteai/sqlite-vector'; 76 | 77 | const path = getExtensionPath(); 78 | // => '/path/to/node_modules/@sqliteai/sqlite-vector-darwin-arm64/vector.dylib' 79 | ``` 80 | 81 | --- 82 | 83 | ### `getExtensionInfo(): ExtensionInfo` 84 | 85 | Returns detailed information about the extension for the current platform. 86 | 87 | **Returns:** `ExtensionInfo` object with the following properties: 88 | - `platform: Platform` - Current platform identifier (e.g., `'darwin-arm64'`) 89 | - `packageName: string` - Name of the platform-specific npm package 90 | - `binaryName: string` - Filename of the binary (e.g., `'vector.dylib'`) 91 | - `path: string` - Full path to the extension binary 92 | 93 | **Throws:** `ExtensionNotFoundError` - If the extension binary cannot be found 94 | 95 | **Example:** 96 | ```typescript 97 | import { getExtensionInfo } from '@sqliteai/sqlite-vector'; 98 | 99 | const info = getExtensionInfo(); 100 | console.log(`Running on ${info.platform}`); 101 | console.log(`Extension path: ${info.path}`); 102 | ``` 103 | 104 | --- 105 | 106 | ### `getCurrentPlatform(): Platform` 107 | 108 | Returns the current platform identifier. 109 | 110 | **Returns:** `Platform` - One of: 111 | - `'darwin-arm64'` - macOS ARM64 112 | - `'darwin-x86_64'` - macOS x86_64 113 | - `'linux-arm64'` - Linux ARM64 (glibc) 114 | - `'linux-arm64-musl'` - Linux ARM64 (musl) 115 | - `'linux-x86_64'` - Linux x86_64 (glibc) 116 | - `'linux-x86_64-musl'` - Linux x86_64 (musl) 117 | - `'win32-x86_64'` - Windows x86_64 118 | 119 | **Throws:** `Error` - If the platform is unsupported 120 | 121 | --- 122 | 123 | ### `isMusl(): boolean` 124 | 125 | Detects if the system uses musl libc (Alpine Linux, etc.). 126 | 127 | **Returns:** `boolean` - `true` if musl is detected, `false` otherwise 128 | 129 | --- 130 | 131 | ### `class ExtensionNotFoundError extends Error` 132 | 133 | Error thrown when the SQLite Vector extension cannot be found for the current platform. 134 | 135 | ## Related Projects 136 | 137 | - **[@sqliteai/sqlite-ai](https://www.npmjs.com/package/@sqliteai/sqlite-ai)** - On-device AI inference and embedding generation 138 | - **[@sqliteai/sqlite-sync](https://www.npmjs.com/package/@sqliteai/sqlite-sync)** - Sync on-device databases with the cloud 139 | - **[@sqliteai/sqlite-js](https://www.npmjs.com/package/@sqliteai/sqlite-js)** - Define SQLite functions in JavaScript 140 | 141 | ## License 142 | 143 | This project is licensed under the [Elastic License 2.0](LICENSE.md). 144 | 145 | For production or managed service use, please [contact SQLite Cloud, Inc](mailto:info@sqlitecloud.io) for a commercial license. 146 | 147 | ## Contributing 148 | 149 | Contributions are welcome! Please see the [main repository](https://github.com/sqliteai/sqlite-vector) to open an issue. 150 | 151 | ## Support 152 | 153 | - 📖 [Documentation](https://github.com/sqliteai/sqlite-vector/blob/main/API.md) 154 | - 🐛 [Report Issues](https://github.com/sqliteai/sqlite-vector/issues) 155 | -------------------------------------------------------------------------------- /examples/semantic_search/semantic_search.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import sqlite3 4 | import sys 5 | import time 6 | from pathlib import Path 7 | from typing import List, Tuple 8 | 9 | from sentence_transformers import SentenceTransformer 10 | 11 | 12 | class SemanticSearch: 13 | def __init__(self, db_path: str = "semsearch.db", model_name: str = "all-MiniLM-L6-v2"): 14 | self.db_path = db_path 15 | self.model_name = model_name 16 | self.model = None 17 | self.conn = None 18 | 19 | def _get_model(self): 20 | """Lazy load the sentence transformer model""" 21 | if self.model is None: 22 | print(f"Loading model {self.model_name}...") 23 | self.model = SentenceTransformer(self.model_name) 24 | return self.model 25 | 26 | def _get_connection(self): 27 | """Get database connection, load SQLite Vector extension 28 | and ensure schema is created""" 29 | if self.conn is None: 30 | self.conn = sqlite3.connect(self.db_path) 31 | 32 | self.conn.enable_load_extension(True) 33 | self.conn.load_extension("./vector.so") 34 | self.conn.enable_load_extension(False) 35 | 36 | # Check if sqlite-vector is available 37 | try: 38 | self.conn.execute("SELECT vector_version()") 39 | except sqlite3.OperationalError: 40 | print("Error: sqlite-vector extension not found.") 41 | print( 42 | "Download it from https://github.com/sqliteai/sqlite-vector/releases") 43 | sys.exit(1) 44 | 45 | self._create_schema() 46 | return self.conn 47 | 48 | def _create_schema(self): 49 | """Create the documents table with vector support""" 50 | conn = self._get_connection() 51 | cursor = conn.cursor() 52 | 53 | # Create documents table 54 | cursor.execute(""" 55 | CREATE TABLE IF NOT EXISTS documents ( 56 | id INTEGER PRIMARY KEY AUTOINCREMENT, 57 | filepath TEXT NOT NULL, 58 | content TEXT NOT NULL, 59 | embedding BLOB, 60 | created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP 61 | ) 62 | """) 63 | 64 | # Create vector table using sqlite-vector extension 65 | # The default model 'all-MiniLM-L6-v2' produces 384-dimensional embeddings 66 | 67 | # Initialize the vector 68 | cursor.execute(""" 69 | SELECT vector_init('documents', 'embedding', 'type=FLOAT32,dimension=384'); 70 | """) 71 | 72 | conn.commit() 73 | 74 | def _chunk_text(self, text: str, chunk_size: int = 250, overlap: int = 50) -> List[str]: 75 | """Split text into overlapping chunks for better semantic search""" 76 | words = text.split() 77 | chunks = [] 78 | 79 | for i in range(0, len(words), chunk_size - overlap): 80 | chunk = ' '.join(words[i:i + chunk_size]) 81 | chunk = chunk.strip() 82 | if chunk: 83 | chunks.append(chunk) 84 | 85 | # Return original if no chunks created 86 | return chunks if chunks else [text] 87 | 88 | def index_file(self, filepath: str) -> int: 89 | """Index a single file and return number of chunks processed""" 90 | if not os.path.exists(filepath): 91 | print(f"File not found: {filepath}") 92 | return 0 93 | 94 | model = self._get_model() 95 | conn = self._get_connection() 96 | 97 | cursor = conn.execute( 98 | "SELECT id FROM documents WHERE filepath = ?", (filepath,)) 99 | if cursor.fetchone() is not None: 100 | print(f"File already indexed: {filepath}") 101 | return 0 102 | 103 | try: 104 | with open(filepath, 'r', encoding='utf-8', errors='ignore') as f: 105 | content = f.read().strip() 106 | except Exception as e: 107 | print(f"Error reading {filepath}: {e}") 108 | return 0 109 | 110 | if not content: 111 | print(f"Empty file: {filepath}") 112 | return 0 113 | 114 | cursor = conn.cursor() 115 | 116 | # Split content into chunks. 117 | # The default model truncates text after 256 word pieces 118 | chunks = self._chunk_text(content) 119 | chunk_count = 0 120 | 121 | for chunk in chunks: 122 | # Generate embedding and insert into database 123 | embedding = model.encode(chunk) 124 | embedding_json = json.dumps(embedding.tolist()) 125 | 126 | cursor.execute( 127 | "INSERT INTO documents (filepath, content, embedding) VALUES (?, ?, vector_as_f32(?))", 128 | (filepath, chunk, embedding_json) 129 | ) 130 | chunk_count += 1 131 | 132 | conn.commit() 133 | 134 | # Perform quantization on the vector column 135 | cursor.execute(""" 136 | SELECT vector_quantize('documents', 'embedding'); 137 | """) 138 | 139 | print(f"Indexed {filepath}: {chunk_count} chunks") 140 | return chunk_count 141 | 142 | def index_directory(self, directory: str) -> int: 143 | """Index all text files in a directory""" 144 | total_chunks = 0 145 | text_extensions = {'.txt', '.md', '.mdx', '.py', '.js', 146 | '.html', '.css', '.sql', '.json', '.xml'} 147 | 148 | for root, _, files in os.walk(directory): 149 | for file in files: 150 | if Path(file).suffix.lower() in text_extensions: 151 | filepath = os.path.join(root, file) 152 | total_chunks += self.index_file(filepath) 153 | 154 | return total_chunks 155 | 156 | def search(self, query: str, limit: int = 3) -> Tuple[float, List[Tuple[str, str, float]]]: 157 | """Search for similar documents""" 158 | model = self._get_model() 159 | conn = self._get_connection() 160 | 161 | # Generate query embedding 162 | query_embedding = model.encode(query) 163 | query_json = json.dumps(query_embedding.tolist()) 164 | 165 | # Search using sqlite-vec cosine similarity 166 | cursor = conn.cursor() 167 | start_time = time.time() 168 | cursor.execute(""" 169 | SELECT d.id, d.filepath, d.content, v.distance 170 | FROM documents AS d 171 | JOIN vector_quantize_scan('documents', 'embedding', vector_as_f32(?), ?) AS v 172 | ON d.id = v.rowid; 173 | """, (query_json, limit)) 174 | elapsed_ms = round((time.time() - start_time) * 1000, 2) 175 | 176 | results = [] 177 | for id, filepath, content, distance in cursor.fetchall(): 178 | results.append((filepath, content, distance)) 179 | 180 | return (elapsed_ms, results) 181 | 182 | def stats(self): 183 | """Print database statistics""" 184 | conn = self._get_connection() 185 | cursor = conn.cursor() 186 | 187 | cursor.execute("SELECT COUNT(*) FROM documents") 188 | doc_count = cursor.fetchone()[0] 189 | 190 | cursor.execute("SELECT COUNT(DISTINCT filepath) FROM documents") 191 | file_count = cursor.fetchone()[0] 192 | 193 | print(f"Database: {self.db_path}") 194 | print(f"Files indexed: {file_count}") 195 | print(f"Document chunks: {doc_count}") 196 | 197 | def close(self): 198 | """Close the database connection""" 199 | if self.conn: 200 | self.conn.close() 201 | self.conn = None 202 | print("Database connection closed.") 203 | -------------------------------------------------------------------------------- /packages/node/generate-platform-packages.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | /** 4 | * Generates platform-specific packages dynamically 5 | * 6 | * This script creates npm packages for each platform from templates, 7 | * eliminating the need to maintain nearly-identical files in the repo. 8 | * 9 | * Usage: 10 | * node generate-platform-packages.js 11 | * 12 | * Example: 13 | * node generate-platform-packages.js 0.9.45 ./artifacts ./platform-packages 14 | */ 15 | 16 | const fs = require('fs'); 17 | const path = require('path'); 18 | 19 | // Platform configuration 20 | const PLATFORMS = [ 21 | { 22 | name: 'darwin-arm64', 23 | os: ['darwin'], 24 | cpu: ['arm64'], 25 | description: 'SQLite Vector extension for macOS ARM64 (Apple Silicon)', 26 | binaryName: 'vector.dylib', 27 | artifactFolder: 'vector-macos-arm64', 28 | }, 29 | { 30 | name: 'darwin-x86_64', 31 | os: ['darwin'], 32 | cpu: ['x64', 'ia32'], 33 | description: 'SQLite Vector extension for macOS x86_64 (Intel)', 34 | binaryName: 'vector.dylib', 35 | artifactFolder: 'vector-macos-x86_64', 36 | }, 37 | { 38 | name: 'linux-arm64', 39 | os: ['linux'], 40 | cpu: ['arm64'], 41 | description: 'SQLite Vector extension for Linux ARM64 (glibc)', 42 | binaryName: 'vector.so', 43 | artifactFolder: 'vector-linux-arm64', 44 | }, 45 | { 46 | name: 'linux-arm64-musl', 47 | os: ['linux'], 48 | cpu: ['arm64'], 49 | description: 'SQLite Vector extension for Linux ARM64 (musl)', 50 | binaryName: 'vector.so', 51 | artifactFolder: 'vector-linux-musl-arm64', 52 | }, 53 | { 54 | name: 'linux-x86_64', 55 | os: ['linux'], 56 | cpu: ['x64', 'ia32'], 57 | description: 'SQLite Vector extension for Linux x86_64 (glibc)', 58 | binaryName: 'vector.so', 59 | artifactFolder: 'vector-linux-x86_64', 60 | }, 61 | { 62 | name: 'linux-x86_64-musl', 63 | os: ['linux'], 64 | cpu: ['x64', 'ia32'], 65 | description: 'SQLite Vector extension for Linux x86_64 (musl)', 66 | binaryName: 'vector.so', 67 | artifactFolder: 'vector-linux-musl-x86_64', 68 | }, 69 | { 70 | name: 'win32-x86_64', 71 | os: ['win32'], 72 | cpu: ['x64', 'ia32'], 73 | description: 'SQLite Vector extension for Windows x86_64', 74 | binaryName: 'vector.dll', 75 | artifactFolder: 'vector-windows-x86_64', 76 | }, 77 | ]; 78 | 79 | /** 80 | * Generate package.json for a platform 81 | */ 82 | function generatePackageJson(platform, version) { 83 | return { 84 | name: `@sqliteai/sqlite-vector-${platform.name}`, 85 | version: version, 86 | description: platform.description, 87 | main: 'index.js', 88 | os: platform.os, 89 | cpu: platform.cpu, 90 | files: [ 91 | platform.binaryName, 92 | 'index.js', 93 | 'README.md', 94 | 'LICENSE.md', 95 | ], 96 | keywords: [ 97 | 'sqlite', 98 | 'vector', 99 | ...platform.name.split('-'), 100 | ], 101 | author: 'Gioele Cantoni (gioele@sqlitecloud.io)', 102 | license: 'SEE LICENSE IN LICENSE.md', 103 | repository: { 104 | type: 'git', 105 | url: 'https://github.com/sqliteai/sqlite-vector.git', 106 | directory: 'packages/node', 107 | }, 108 | engines: { 109 | node: '>=16.0.0', 110 | }, 111 | }; 112 | } 113 | 114 | /** 115 | * Generate index.js for a platform 116 | */ 117 | function generateIndexJs(platform) { 118 | return `const { join } = require('path'); 119 | 120 | module.exports = { 121 | path: join(__dirname, '${platform.binaryName}') 122 | }; 123 | `; 124 | } 125 | 126 | /** 127 | * Generate README.md for a platform 128 | */ 129 | function generateReadme(platform, version) { 130 | return `# @sqliteai/sqlite-vector-${platform.name} 131 | 132 | ${platform.description} 133 | 134 | **Version:** ${version} 135 | 136 | This is a platform-specific package for [@sqliteai/sqlite-vector](https://www.npmjs.com/package/@sqliteai/sqlite-vector). 137 | 138 | It is installed automatically as an optional dependency and should not be installed directly. 139 | 140 | ## Installation 141 | 142 | Install the main package instead: 143 | 144 | \`\`\`bash 145 | npm install @sqliteai/sqlite-vector 146 | \`\`\` 147 | 148 | ## Platform 149 | 150 | - **OS:** ${platform.os.join(', ')} 151 | - **CPU:** ${platform.cpu.join(', ')} 152 | - **Binary:** ${platform.binaryName} 153 | 154 | ## License 155 | 156 | See [LICENSE.md](./LICENSE.md) in the root directory. 157 | `; 158 | } 159 | 160 | /** 161 | * Main function 162 | */ 163 | function main() { 164 | const args = process.argv.slice(2); 165 | 166 | if (args.length < 3) { 167 | console.error('Usage: node generate-platform-packages.js '); 168 | console.error('Example: node generate-platform-packages.js 0.9.45 ./artifacts ./platform-packages'); 169 | process.exit(1); 170 | } 171 | 172 | const [version, artifactsDir, outputDir] = args; 173 | 174 | // Find LICENSE.md (should be in repo root) 175 | const licensePath = path.resolve(__dirname, '../../LICENSE.md'); 176 | if (!fs.existsSync(licensePath)) { 177 | console.error(`Error: LICENSE.md not found at ${licensePath}`); 178 | process.exit(1); 179 | } 180 | 181 | // Validate version format 182 | if (!/^\d+\.\d+\.\d+$/.test(version)) { 183 | console.error(`Error: Invalid version format: ${version}`); 184 | console.error('Version must be in semver format (e.g., 0.9.45)'); 185 | process.exit(1); 186 | } 187 | 188 | console.log(`Generating platform packages version ${version}...\n`); 189 | 190 | // Create output directory 191 | if (!fs.existsSync(outputDir)) { 192 | fs.mkdirSync(outputDir, { recursive: true }); 193 | } 194 | 195 | let successCount = 0; 196 | let errorCount = 0; 197 | 198 | // Generate each platform package 199 | for (const platform of PLATFORMS) { 200 | const platformDir = path.join(outputDir, platform.name); 201 | const artifactPath = path.join(artifactsDir, platform.artifactFolder, platform.binaryName); 202 | 203 | try { 204 | // Create platform directory 205 | fs.mkdirSync(platformDir, { recursive: true }); 206 | 207 | // Generate package.json 208 | const packageJson = generatePackageJson(platform, version); 209 | fs.writeFileSync( 210 | path.join(platformDir, 'package.json'), 211 | JSON.stringify(packageJson, null, 2) + '\n' 212 | ); 213 | 214 | // Generate index.js 215 | const indexJs = generateIndexJs(platform); 216 | fs.writeFileSync(path.join(platformDir, 'index.js'), indexJs); 217 | 218 | // Generate README.md 219 | const readme = generateReadme(platform, version); 220 | fs.writeFileSync(path.join(platformDir, 'README.md'), readme); 221 | 222 | // Copy LICENSE.md 223 | fs.copyFileSync(licensePath, path.join(platformDir, 'LICENSE.md')); 224 | 225 | // Copy binary if it exists 226 | if (fs.existsSync(artifactPath)) { 227 | fs.copyFileSync(artifactPath, path.join(platformDir, platform.binaryName)); 228 | console.log(`✓ ${platform.name} (with binary)`); 229 | } else { 230 | console.log(`✓ ${platform.name} (no binary found at ${artifactPath})`); 231 | } 232 | 233 | successCount++; 234 | } catch (error) { 235 | console.error(`✗ ${platform.name}: ${error.message}`); 236 | errorCount++; 237 | } 238 | } 239 | 240 | console.log(`\nGenerated ${successCount} platform package(s)`); 241 | 242 | if (errorCount > 0) { 243 | console.error(`Failed to generate ${errorCount} package(s)`); 244 | process.exit(1); 245 | } 246 | 247 | console.log('Done!'); 248 | } 249 | 250 | // Run 251 | if (require.main === module) { 252 | main(); 253 | } 254 | 255 | module.exports = { PLATFORMS, generatePackageJson, generateIndexJs, generateReadme }; 256 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for SQLite Vector Extension 2 | # Supports compilation for Linux, macOS, Windows, Android and iOS 3 | 4 | # customize sqlite3 executable with 5 | # make test SQLITE3=/opt/homebrew/Cellar/sqlite/3.49.1/bin/sqlite3 6 | SQLITE3 ?= sqlite3 7 | 8 | # Set default platform if not specified 9 | ifeq ($(OS),Windows_NT) 10 | PLATFORM := windows 11 | HOST := windows 12 | CPUS := $(shell powershell -Command "[Environment]::ProcessorCount") 13 | else 14 | HOST = $(shell uname -s | tr '[:upper:]' '[:lower:]') 15 | ifeq ($(HOST),darwin) 16 | PLATFORM := macos 17 | CPUS := $(shell sysctl -n hw.ncpu) 18 | else 19 | PLATFORM := $(HOST) 20 | CPUS := $(shell nproc) 21 | endif 22 | endif 23 | 24 | # Speed up builds by using all available CPU cores 25 | MAKEFLAGS += -j$(CPUS) 26 | 27 | # Compiler and flags 28 | CC = gcc 29 | CFLAGS = -Wall -Wextra -Wno-unused-parameter -I$(SRC_DIR) -I$(LIB_DIR) 30 | 31 | # Directories 32 | SRC_DIR = src 33 | DIST_DIR = dist 34 | LIB_DIR = libs 35 | VPATH = $(SRC_DIR):$(LIB_DIR) 36 | BUILD_DIR = build 37 | 38 | # Files 39 | SRC_FILES = $(wildcard $(SRC_DIR)/*.c) 40 | OBJ_FILES = $(patsubst %.c, $(BUILD_DIR)/%.o, $(notdir $(SRC_FILES))) 41 | 42 | # Platform-specific settings 43 | ifeq ($(PLATFORM),windows) 44 | TARGET := $(DIST_DIR)/vector.dll 45 | LDFLAGS += -shared 46 | # Create .def file for Windows 47 | DEF_FILE := $(BUILD_DIR)/vector.def 48 | STRIP = strip --strip-unneeded $@ 49 | else ifeq ($(PLATFORM),macos) 50 | TARGET := $(DIST_DIR)/vector.dylib 51 | ifndef ARCH 52 | LDFLAGS += -arch x86_64 -arch arm64 53 | CFLAGS += -arch x86_64 -arch arm64 54 | else 55 | LDFLAGS += -arch $(ARCH) 56 | CFLAGS += -arch $(ARCH) 57 | endif 58 | LDFLAGS += -dynamiclib -undefined dynamic_lookup -headerpad_max_install_names 59 | STRIP = strip -x -S $@ 60 | else ifeq ($(PLATFORM),android) 61 | ifndef ARCH # Set ARCH to find Android NDK's Clang compiler, the user should set the ARCH 62 | $(error "Android ARCH must be set to ARCH=x86_64, ARCH=arm64-v8a, or ARCH=armeabi-v7a") 63 | endif 64 | ifndef ANDROID_NDK # Set ANDROID_NDK path to find android build tools; e.g. on MacOS: export ANDROID_NDK=/Users/username/Library/Android/sdk/ndk/25.2.9519653 65 | $(error "Android NDK must be set") 66 | endif 67 | BIN = $(ANDROID_NDK)/toolchains/llvm/prebuilt/$(HOST)-x86_64/bin 68 | ifneq (,$(filter $(ARCH),arm64 arm64-v8a)) 69 | override ARCH := aarch64 70 | ANDROID_ABI := android26 71 | else ifeq ($(ARCH),armeabi-v7a) 72 | override ARCH := armv7a 73 | ANDROID_ABI := androideabi26 74 | else 75 | ANDROID_ABI := android26 76 | endif 77 | CC = $(BIN)/$(ARCH)-linux-$(ANDROID_ABI)-clang 78 | TARGET := $(DIST_DIR)/vector.so 79 | LDFLAGS += -lm -shared 80 | STRIP = $(BIN)/llvm-strip --strip-unneeded $@ 81 | else ifeq ($(PLATFORM),ios) 82 | TARGET := $(DIST_DIR)/vector.dylib 83 | SDK := -isysroot $(shell xcrun --sdk iphoneos --show-sdk-path) -miphoneos-version-min=11.0 84 | LDFLAGS += -dynamiclib $(SDK) -headerpad_max_install_names 85 | CFLAGS += -arch arm64 $(SDK) 86 | STRIP = strip -x -S $@ 87 | else ifeq ($(PLATFORM),ios-sim) 88 | TARGET := $(DIST_DIR)/vector.dylib 89 | SDK := -isysroot $(shell xcrun --sdk iphonesimulator --show-sdk-path) -miphonesimulator-version-min=11.0 90 | LDFLAGS += -arch x86_64 -arch arm64 -dynamiclib $(SDK) -headerpad_max_install_names 91 | CFLAGS += -arch x86_64 -arch arm64 $(SDK) 92 | STRIP = strip -x -S $@ 93 | else # linux 94 | TARGET := $(DIST_DIR)/vector.so 95 | LDFLAGS += -shared 96 | STRIP = strip --strip-unneeded $@ 97 | endif 98 | 99 | # Windows .def file generation 100 | $(DEF_FILE): 101 | ifeq ($(PLATFORM),windows) 102 | @echo "LIBRARY vector.dll" > $@ 103 | @echo "EXPORTS" >> $@ 104 | @echo " sqlite3_vector_init" >> $@ 105 | endif 106 | 107 | # Make sure the build and dist directories exist 108 | $(shell mkdir -p $(BUILD_DIR) $(DIST_DIR)) 109 | 110 | # Default target 111 | extension: $(TARGET) 112 | all: $(TARGET) 113 | 114 | # Loadable library 115 | $(TARGET): $(OBJ_FILES) $(DEF_FILE) 116 | $(CC) $(OBJ_FILES) $(DEF_FILE) -o $@ $(LDFLAGS) 117 | ifeq ($(PLATFORM),windows) 118 | # Generate import library for Windows 119 | dlltool -D $@ -d $(DEF_FILE) -l $(DIST_DIR)/vector.lib 120 | endif 121 | # Strip debug symbols 122 | $(STRIP) 123 | 124 | # Object files 125 | $(BUILD_DIR)/%.o: %.c 126 | $(CC) $(CFLAGS) -O3 -fPIC -c $< -o $@ 127 | 128 | test: $(TARGET) 129 | $(SQLITE3) ":memory:" -cmd ".bail on" ".load ./dist/vector" "SELECT vector_version();" 130 | 131 | # Clean up generated files 132 | clean: 133 | rm -rf $(BUILD_DIR)/* $(DIST_DIR)/* *.gcda *.gcno *.gcov *.sqlite 134 | 135 | .NOTPARALLEL: %.dylib 136 | %.dylib: 137 | rm -rf $(BUILD_DIR) && $(MAKE) PLATFORM=$* 138 | mv $(DIST_DIR)/vector.dylib $(DIST_DIR)/$@ 139 | 140 | define PLIST 141 | \ 142 | \ 143 | \ 144 | \ 145 | CFBundleDevelopmentRegion\ 146 | en\ 147 | CFBundleExecutable\ 148 | vector\ 149 | CFBundleIdentifier\ 150 | ai.sqlite.vector\ 151 | CFBundleInfoDictionaryVersion\ 152 | 6.0\ 153 | CFBundlePackageType\ 154 | FMWK\ 155 | CFBundleSignature\ 156 | ????\ 157 | CFBundleVersion\ 158 | $(shell make version)\ 159 | CFBundleShortVersionString\ 160 | $(shell make version)\ 161 | MinimumOSVersion\ 162 | 11.0\ 163 | \ 164 | 165 | endef 166 | 167 | define MODULEMAP 168 | framework module vector {\ 169 | umbrella header \"sqlite-vector.h\"\ 170 | export *\ 171 | } 172 | endef 173 | 174 | LIB_NAMES = ios.dylib ios-sim.dylib macos.dylib 175 | FMWK_NAMES = ios-arm64 ios-arm64_x86_64-simulator macos-arm64_x86_64 176 | $(DIST_DIR)/%.xcframework: $(LIB_NAMES) 177 | @$(foreach i,1 2 3,\ 178 | lib=$(word $(i),$(LIB_NAMES)); \ 179 | fmwk=$(word $(i),$(FMWK_NAMES)); \ 180 | mkdir -p $(DIST_DIR)/$$fmwk/vector.framework/Headers; \ 181 | mkdir -p $(DIST_DIR)/$$fmwk/vector.framework/Modules; \ 182 | cp src/sqlite-vector.h $(DIST_DIR)/$$fmwk/vector.framework/Headers; \ 183 | printf "$(PLIST)" > $(DIST_DIR)/$$fmwk/vector.framework/Info.plist; \ 184 | printf "$(MODULEMAP)" > $(DIST_DIR)/$$fmwk/vector.framework/Modules/module.modulemap; \ 185 | mv $(DIST_DIR)/$$lib $(DIST_DIR)/$$fmwk/vector.framework/vector; \ 186 | install_name_tool -id "@rpath/vector.framework/vector" $(DIST_DIR)/$$fmwk/vector.framework/vector; \ 187 | ) 188 | xcodebuild -create-xcframework $(foreach fmwk,$(FMWK_NAMES),-framework $(DIST_DIR)/$(fmwk)/vector.framework) -output $@ 189 | rm -rf $(foreach fmwk,$(FMWK_NAMES),$(DIST_DIR)/$(fmwk)) 190 | 191 | xcframework: $(DIST_DIR)/vector.xcframework 192 | 193 | AAR_ARM64 = packages/android/src/main/jniLibs/arm64-v8a/ 194 | AAR_ARM = packages/android/src/main/jniLibs/armeabi-v7a/ 195 | AAR_X86 = packages/android/src/main/jniLibs/x86_64/ 196 | aar: 197 | mkdir -p $(AAR_ARM64) $(AAR_ARM) $(AAR_X86) 198 | $(MAKE) clean && $(MAKE) PLATFORM=android ARCH=arm64-v8a 199 | mv $(DIST_DIR)/vector.so $(AAR_ARM64) 200 | $(MAKE) clean && $(MAKE) PLATFORM=android ARCH=armeabi-v7a 201 | mv $(DIST_DIR)/vector.so $(AAR_ARM) 202 | $(MAKE) clean && $(MAKE) PLATFORM=android ARCH=x86_64 203 | mv $(DIST_DIR)/vector.so $(AAR_X86) 204 | cd packages/android && ./gradlew clean assembleRelease 205 | cp packages/android/build/outputs/aar/android-release.aar $(DIST_DIR)/vector.aar 206 | 207 | version: 208 | @echo $(shell sed -n 's/^#define SQLITE_VECTOR_VERSION[[:space:]]*"\([^"]*\)".*/\1/p' src/sqlite-vector.h) 209 | 210 | # Help message 211 | help: 212 | @echo "SQLite Vector Extension Makefile" 213 | @echo "Usage:" 214 | @echo " make [PLATFORM=platform] [ARCH=arch] [ANDROID_NDK=\$$ANDROID_HOME/ndk/26.1.10909125] [target]" 215 | @echo "" 216 | @echo "Platforms:" 217 | @echo " linux (default on Linux)" 218 | @echo " macos (default on macOS)" 219 | @echo " windows (default on Windows)" 220 | @echo " android (needs ARCH to be set to x86_64, arm64-v8a, or armeabi-v7a and ANDROID_NDK to be set)" 221 | @echo " ios (only on macOS)" 222 | @echo " ios-sim (only on macOS)" 223 | @echo "" 224 | @echo "Targets:" 225 | @echo " all - Build the extension (default)" 226 | @echo " clean - Remove built files" 227 | @echo " test - Test the extension" 228 | @echo " help - Display this help message" 229 | @echo " xcframework - Build the Apple XCFramework" 230 | @echo " aar - Build the Android AAR package" 231 | 232 | .PHONY: all clean test extension help version xcframework aar 233 | -------------------------------------------------------------------------------- /packages/android/gradlew: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # 4 | # Copyright © 2015 the original authors. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # https://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | # SPDX-License-Identifier: Apache-2.0 19 | # 20 | 21 | ############################################################################## 22 | # 23 | # Gradle start up script for POSIX generated by Gradle. 24 | # 25 | # Important for running: 26 | # 27 | # (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is 28 | # noncompliant, but you have some other compliant shell such as ksh or 29 | # bash, then to run this script, type that shell name before the whole 30 | # command line, like: 31 | # 32 | # ksh Gradle 33 | # 34 | # Busybox and similar reduced shells will NOT work, because this script 35 | # requires all of these POSIX shell features: 36 | # * functions; 37 | # * expansions «$var», «${var}», «${var:-default}», «${var+SET}», 38 | # «${var#prefix}», «${var%suffix}», and «$( cmd )»; 39 | # * compound commands having a testable exit status, especially «case»; 40 | # * various built-in commands including «command», «set», and «ulimit». 41 | # 42 | # Important for patching: 43 | # 44 | # (2) This script targets any POSIX shell, so it avoids extensions provided 45 | # by Bash, Ksh, etc; in particular arrays are avoided. 46 | # 47 | # The "traditional" practice of packing multiple parameters into a 48 | # space-separated string is a well documented source of bugs and security 49 | # problems, so this is (mostly) avoided, by progressively accumulating 50 | # options in "$@", and eventually passing that to Java. 51 | # 52 | # Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, 53 | # and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; 54 | # see the in-line comments for details. 55 | # 56 | # There are tweaks for specific operating systems such as AIX, CygWin, 57 | # Darwin, MinGW, and NonStop. 58 | # 59 | # (3) This script is generated from the Groovy template 60 | # https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt 61 | # within the Gradle project. 62 | # 63 | # You can find Gradle at https://github.com/gradle/gradle/. 64 | # 65 | ############################################################################## 66 | 67 | # Attempt to set APP_HOME 68 | 69 | # Resolve links: $0 may be a link 70 | app_path=$0 71 | 72 | # Need this for daisy-chained symlinks. 73 | while 74 | APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path 75 | [ -h "$app_path" ] 76 | do 77 | ls=$( ls -ld "$app_path" ) 78 | link=${ls#*' -> '} 79 | case $link in #( 80 | /*) app_path=$link ;; #( 81 | *) app_path=$APP_HOME$link ;; 82 | esac 83 | done 84 | 85 | # This is normally unused 86 | # shellcheck disable=SC2034 87 | APP_BASE_NAME=${0##*/} 88 | # Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) 89 | APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s\n' "$PWD" ) || exit 90 | 91 | # Use the maximum available, or set MAX_FD != -1 to use that value. 92 | MAX_FD=maximum 93 | 94 | warn () { 95 | echo "$*" 96 | } >&2 97 | 98 | die () { 99 | echo 100 | echo "$*" 101 | echo 102 | exit 1 103 | } >&2 104 | 105 | # OS specific support (must be 'true' or 'false'). 106 | cygwin=false 107 | msys=false 108 | darwin=false 109 | nonstop=false 110 | case "$( uname )" in #( 111 | CYGWIN* ) cygwin=true ;; #( 112 | Darwin* ) darwin=true ;; #( 113 | MSYS* | MINGW* ) msys=true ;; #( 114 | NONSTOP* ) nonstop=true ;; 115 | esac 116 | 117 | 118 | 119 | # Determine the Java command to use to start the JVM. 120 | if [ -n "$JAVA_HOME" ] ; then 121 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 122 | # IBM's JDK on AIX uses strange locations for the executables 123 | JAVACMD=$JAVA_HOME/jre/sh/java 124 | else 125 | JAVACMD=$JAVA_HOME/bin/java 126 | fi 127 | if [ ! -x "$JAVACMD" ] ; then 128 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 129 | 130 | Please set the JAVA_HOME variable in your environment to match the 131 | location of your Java installation." 132 | fi 133 | else 134 | JAVACMD=java 135 | if ! command -v java >/dev/null 2>&1 136 | then 137 | die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 138 | 139 | Please set the JAVA_HOME variable in your environment to match the 140 | location of your Java installation." 141 | fi 142 | fi 143 | 144 | # Increase the maximum file descriptors if we can. 145 | if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then 146 | case $MAX_FD in #( 147 | max*) 148 | # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. 149 | # shellcheck disable=SC2039,SC3045 150 | MAX_FD=$( ulimit -H -n ) || 151 | warn "Could not query maximum file descriptor limit" 152 | esac 153 | case $MAX_FD in #( 154 | '' | soft) :;; #( 155 | *) 156 | # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. 157 | # shellcheck disable=SC2039,SC3045 158 | ulimit -n "$MAX_FD" || 159 | warn "Could not set maximum file descriptor limit to $MAX_FD" 160 | esac 161 | fi 162 | 163 | # Collect all arguments for the java command, stacking in reverse order: 164 | # * args from the command line 165 | # * the main class name 166 | # * -classpath 167 | # * -D...appname settings 168 | # * --module-path (only if needed) 169 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. 170 | 171 | # For Cygwin or MSYS, switch paths to Windows format before running java 172 | if "$cygwin" || "$msys" ; then 173 | APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) 174 | 175 | JAVACMD=$( cygpath --unix "$JAVACMD" ) 176 | 177 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 178 | for arg do 179 | if 180 | case $arg in #( 181 | -*) false ;; # don't mess with options #( 182 | /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath 183 | [ -e "$t" ] ;; #( 184 | *) false ;; 185 | esac 186 | then 187 | arg=$( cygpath --path --ignore --mixed "$arg" ) 188 | fi 189 | # Roll the args list around exactly as many times as the number of 190 | # args, so each arg winds up back in the position where it started, but 191 | # possibly modified. 192 | # 193 | # NB: a `for` loop captures its iteration list before it begins, so 194 | # changing the positional parameters here affects neither the number of 195 | # iterations, nor the values presented in `arg`. 196 | shift # remove old arg 197 | set -- "$@" "$arg" # push replacement arg 198 | done 199 | fi 200 | 201 | 202 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 203 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' 204 | 205 | # Collect all arguments for the java command: 206 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, 207 | # and any embedded shellness will be escaped. 208 | # * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be 209 | # treated as '${Hostname}' itself on the command line. 210 | 211 | set -- \ 212 | "-Dorg.gradle.appname=$APP_BASE_NAME" \ 213 | -jar "$APP_HOME/gradle/wrapper/gradle-wrapper.jar" \ 214 | "$@" 215 | 216 | # Stop when "xargs" is not available. 217 | if ! command -v xargs >/dev/null 2>&1 218 | then 219 | die "xargs is not available" 220 | fi 221 | 222 | # Use "xargs" to parse quoted args. 223 | # 224 | # With -n1 it outputs one arg per line, with the quotes and backslashes removed. 225 | # 226 | # In Bash we could simply go: 227 | # 228 | # readarray ARGS < <( xargs -n1 <<<"$var" ) && 229 | # set -- "${ARGS[@]}" "$@" 230 | # 231 | # but POSIX shell has neither arrays nor command substitution, so instead we 232 | # post-process each arg (as a line of input to sed) to backslash-escape any 233 | # character that might be a shell metacharacter, then use eval to reverse 234 | # that process (while maintaining the separation between arguments), and wrap 235 | # the whole thing up as a single "set" statement. 236 | # 237 | # This will of course break if any of these variables contains a newline or 238 | # an unmatched quote. 239 | # 240 | 241 | eval "set -- $( 242 | printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | 243 | xargs -n1 | 244 | sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | 245 | tr '\n' ' ' 246 | )" '"$@"' 247 | 248 | exec "$JAVACMD" "$@" 249 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SQLite Vector 2 | 3 | **SQLite Vector** is a cross-platform, ultra-efficient SQLite extension that brings vector search capabilities to your embedded database. It works seamlessly on **iOS, Android, Windows, Linux, and macOS**, using just **30MB of memory** by default. With support for **Float32, Float16, BFloat16, Int8, and UInt8**, and **highly optimized distance functions**, it's the ideal solution for **Edge AI** applications. 4 | 5 | ## Highlights 6 | 7 | * **No virtual tables required** – store vectors directly as `BLOB`s in ordinary tables 8 | * **Blazing fast** – optimized C implementation with SIMD acceleration 9 | * **Low memory footprint** – defaults to just 30MB of RAM usage 10 | * **Zero preindexing needed** – no long preprocessing or index-building phases 11 | * **Works offline** – perfect for on-device, privacy-preserving AI workloads 12 | * **Plug-and-play** – drop into existing SQLite workflows with minimal effort 13 | * **Cross-platform** – works out of the box on all major OSes 14 | 15 | 16 | ## Why Use SQLite-Vector? 17 | 18 | | Feature | SQLite-Vector | Traditional Solutions | 19 | | ---------------------------- | ------------- | ------------------------------------------ | 20 | | Works with ordinary tables | ✅ | ❌ (usually require special virtual tables) | 21 | | Doesn't need preindexing | ✅ | ❌ (can take hours for large datasets) | 22 | | Doesn't need external server | ✅ | ❌ (often needs Redis/FAISS/Weaviate/etc.) | 23 | | Memory-efficient | ✅ | ❌ | 24 | | Easy to use SQL | ✅ | ❌ (often complex JOINs, subqueries) | 25 | | Offline/Edge ready | ✅ | ❌ | 26 | | Cross-platform | ✅ | ❌ | 27 | 28 | Unlike other vector databases or extensions that require complex setup, SQLite-Vector **just works** with your existing database schema and tools. 29 | 30 | 31 | ## Installation 32 | 33 | ### Pre-built Binaries 34 | 35 | Download the appropriate pre-built binary for your platform from the official [Releases](https://github.com/sqliteai/sqlite-vector/releases) page: 36 | 37 | - Linux: x86 and ARM 38 | - macOS: x86 and ARM 39 | - Windows: x86 40 | - Android 41 | - iOS 42 | 43 | ### Loading the Extension 44 | 45 | ```sql 46 | -- In SQLite CLI 47 | .load ./vector 48 | 49 | -- In SQL 50 | SELECT load_extension('./vector'); 51 | ``` 52 | 53 | Or embed it directly into your application. 54 | 55 | ### WASM Version 56 | 57 | You can download the WebAssembly (WASM) version of SQLite with the SQLite Vector extension enabled from: https://www.npmjs.com/package/@sqliteai/sqlite-wasm 58 | 59 | ## Example Usage 60 | 61 | ```sql 62 | -- Create a regular SQLite table 63 | CREATE TABLE images ( 64 | id INTEGER PRIMARY KEY, 65 | embedding BLOB, -- store Float32/UInt8/etc. 66 | label TEXT 67 | ); 68 | 69 | -- Insert a BLOB vector (Float32, 384 dimensions) using bindings 70 | INSERT INTO images (embedding, label) VALUES (?, 'cat'); 71 | 72 | -- Insert a JSON vector (Float32, 384 dimensions) 73 | INSERT INTO images (embedding, label) VALUES (vector_as_f32('[0.3, 1.0, 0.9, 3.2, 1.4,...]'), 'dog'); 74 | 75 | -- Initialize the vector. By default, the distance function is L2. 76 | -- To use a different metric, specify one of the following options: 77 | -- distance=L1, distance=COSINE, distance=DOT, or distance=SQUARED_L2. 78 | SELECT vector_init('images', 'embedding', 'type=FLOAT32,dimension=384'); 79 | 80 | -- Quantize vector 81 | SELECT vector_quantize('images', 'embedding'); 82 | 83 | -- Optional preload quantized version in memory (for a 4x/5x speedup) 84 | SELECT vector_quantize_preload('images', 'embedding'); 85 | 86 | -- Run a nearest neighbor query on the quantized version (returns top 20 closest vectors) 87 | SELECT e.id, v.distance FROM images AS e 88 | JOIN vector_quantize_scan('images', 'embedding', ?, 20) AS v 89 | ON e.id = v.rowid; 90 | ``` 91 | 92 | ### Swift Package 93 | 94 | You can [add this repository as a package dependency to your Swift project](https://developer.apple.com/documentation/xcode/adding-package-dependencies-to-your-app#Add-a-package-dependency). After adding the package, you'll need to set up SQLite with extension loading by following steps 4 and 5 of [this guide](https://github.com/sqliteai/sqlite-extensions-guide/blob/main/platforms/ios.md#4-set-up-sqlite-with-extension-loading). 95 | 96 | Here's an example of how to use the package: 97 | ```swift 98 | import vector 99 | 100 | ... 101 | 102 | var db: OpaquePointer? 103 | sqlite3_open(":memory:", &db) 104 | sqlite3_enable_load_extension(db, 1) 105 | var errMsg: UnsafeMutablePointer? = nil 106 | sqlite3_load_extension(db, vector.path, nil, &errMsg) 107 | var stmt: OpaquePointer? 108 | sqlite3_prepare_v2(db, "SELECT vector_version()", -1, &stmt, nil) 109 | defer { sqlite3_finalize(stmt) } 110 | sqlite3_step(stmt) 111 | log("vector_version(): \(String(cString: sqlite3_column_text(stmt, 0)))") 112 | sqlite3_close(db) 113 | ``` 114 | 115 | ### Android Package 116 | 117 | Add the [following](https://central.sonatype.com/artifact/ai.sqlite/vector) to your Gradle dependencies: 118 | 119 | ```gradle 120 | implementation 'ai.sqlite:vector:0.9.34' 121 | ``` 122 | 123 | Here's an example of how to use the package: 124 | ```java 125 | SQLiteCustomExtension vectorExtension = new SQLiteCustomExtension(getApplicationInfo().nativeLibraryDir + "/vector", null); 126 | SQLiteDatabaseConfiguration config = new SQLiteDatabaseConfiguration( 127 | getCacheDir().getPath() + "/vector_test.db", 128 | SQLiteDatabase.CREATE_IF_NECESSARY | SQLiteDatabase.OPEN_READWRITE, 129 | Collections.emptyList(), 130 | Collections.emptyList(), 131 | Collections.singletonList(vectorExtension) 132 | ); 133 | SQLiteDatabase db = SQLiteDatabase.openDatabase(config, null, null); 134 | ``` 135 | 136 | **Note:** Additional settings and configuration are required for a complete setup. For full implementation details, see the [complete Android example](https://github.com/sqliteai/sqlite-extensions-guide/blob/main/examples/android/README.md). 137 | 138 | ### Python Package 139 | 140 | Python developers can quickly get started using the ready-to-use `sqlite-vector` package available on PyPI: 141 | 142 | ```bash 143 | pip install sqliteai-vector 144 | ``` 145 | 146 | For usage details and examples, see the [Python package documentation](./packages/python/README.md). 147 | 148 | ## Documentation 149 | 150 | Extensive API documentation can be found in the [API page](https://github.com/sqliteai/sqlite-vector/blob/main/API.md). 151 | 152 | More information about the quantization process can be found in the [QUANTIZATION document](https://github.com/sqliteai/sqlite-vector/blob/main/QUANTIZATION.md). 153 | 154 | ## Features 155 | 156 | ### Instant Vector Search – No Preindexing Required 157 | 158 | Unlike other SQLite vector extensions that rely on complex indexing algorithms such as DiskANN, HNSW, or IVF, which often require **preprocessing steps that can take hours or even days**, `sqlite-vector` works out of the box with your existing data. There’s **no need to preindex your vectors**—you can start performing fast, approximate or exact vector searches **immediately**. 159 | 160 | This means: 161 | 162 | * **No waiting time** before your app or service is usable 163 | * **Zero-cost updates** – you can add, remove, or modify vectors on the fly without rebuilding any index 164 | * **Works directly with BLOB columns** in ordinary SQLite tables – no special schema or virtual table required 165 | * **Ideal for edge and mobile use cases**, where preprocessing large datasets is not practical or possible 166 | 167 | By eliminating the need for heavyweight indexing, `sqlite-vector` offers a **simpler, faster, and more developer-friendly** approach to embedding vector search in your applications. 168 | 169 | ### Supported Vector Types 170 | 171 | You can store your vectors as `BLOB` columns in ordinary tables. Supported formats include: 172 | 173 | * `float32` (4 bytes per element) 174 | * `float16` (2 bytes per element) 175 | * `bfloat16` (2 bytes per element) 176 | * `int8` (1 byte per element) 177 | * `uint8` (1 byte per element) 178 | 179 | Simply insert a vector as a binary blob into your table. No special table types or schemas are required. 180 | 181 | 182 | ### Supported Distance Metrics 183 | 184 | Optimized implementations available: 185 | 186 | * **L2 Distance (Euclidean)** 187 | * **Squared L2** 188 | * **L1 Distance (Manhattan)** 189 | * **Cosine Distance** 190 | * **Dot Product** 191 | 192 | These are implemented in pure C and optimized for SIMD when available, ensuring maximum performance on modern CPUs and mobile devices. 193 | 194 | --- 195 | 196 | # What Is Vector Search? 197 | 198 | Vector search is the process of finding the closest match(es) to a given vector (a point in high-dimensional space) based on a similarity or distance metric. It is essential for AI and machine learning applications where data is often encoded into vector embeddings. 199 | 200 | ### Common Use Cases 201 | 202 | * **Semantic Search**: find documents, emails, or messages similar to a query 203 | * **Image Retrieval**: search for visually similar images 204 | * **Recommendation Systems**: match users with products, videos, or music 205 | * **Voice and Audio Search**: match voice queries or environmental sounds 206 | * **Anomaly Detection**: find outliers in real-time sensor data 207 | * **Robotics**: localize spatial features or behaviors using embedded observations 208 | 209 | In the AI era, embeddings are everywhere – from language models like GPT to vision transformers. Storing and searching them efficiently is the foundation of intelligent applications. 210 | 211 | ## Perfect for Edge AI 212 | 213 | SQLite-Vector is designed with the **Edge AI** use case in mind: 214 | 215 | * Runs offline – no internet required 216 | * Works on mobile devices – iOS/Android friendly 217 | * Keeps data local – ideal for privacy-focused apps 218 | * Extremely fast – real-time performance on device 219 | 220 | You can deploy powerful similarity search capabilities right inside your app or embedded system – **no cloud needed**. 221 | 222 | ## Integrations 223 | 224 | Use SQLite-AI alongside: 225 | 226 | * **[SQLite-AI](https://github.com/sqliteai/sqlite-ai)** – on-device inference, embedding generation, and model interaction directly into your database 227 | * **[SQLite-Sync](https://github.com/sqliteai/sqlite-sync)** – sync on-device databases with the cloud 228 | * **[SQLite-JS](https://github.com/sqliteai/sqlite-js)** – define SQLite functions in JavaScript 229 | 230 | ## License 231 | 232 | This project is licensed under the [Elastic License 2.0](./LICENSE.md). You can use, copy, modify, and distribute it under the terms of the license for non-production use. For production or managed service use, please [contact SQLite Cloud, Inc](mailto:info@sqlitecloud.io) for a commercial license. 233 | -------------------------------------------------------------------------------- /API.md: -------------------------------------------------------------------------------- 1 | # SQLite Vector Extension – API Reference 2 | 3 | This extension enables efficient vector operations directly inside SQLite databases, making it ideal for on-device and edge AI applications. It supports various vector types and SIMD-accelerated distance functions. 4 | 5 | ### Getting started 6 | 7 | * All vectors must have a fixed dimension per column, set during `vector_init`. 8 | * Only tables explicitly initialized using `vector_init` are eligible for vector search. 9 | * You **must run `vector_quantize()`** before using `vector_quantize_scan()`. 10 | * You can preload quantization at database open using `vector_quantize_preload()`. 11 | 12 | --- 13 | 14 | ## `vector_version()` 15 | 16 | **Returns:** `TEXT` 17 | 18 | **Description:** 19 | Returns the current version of the SQLite Vector Extension. 20 | 21 | **Example:** 22 | 23 | ```sql 24 | SELECT vector_version(); 25 | -- e.g., '1.0.0' 26 | ``` 27 | 28 | --- 29 | 30 | ## `vector_backend()` 31 | 32 | **Returns:** `TEXT` 33 | 34 | **Description:** 35 | Returns the active backend used for vector computation. This indicates the SIMD or hardware acceleration available on the current system. 36 | 37 | **Possible Values:** 38 | 39 | * `CPU` – Generic fallback 40 | * `SSE2` – SIMD on Intel/AMD 41 | * `AVX2` – Advanced SIMD on modern x86 CPUs 42 | * `NEON` – SIMD on ARM (e.g., mobile) 43 | 44 | **Example:** 45 | 46 | ```sql 47 | SELECT vector_backend(); 48 | -- e.g., 'AVX2' 49 | ``` 50 | 51 | --- 52 | 53 | ## `vector_init(table, column, options)` 54 | 55 | **Returns:** `NULL` 56 | 57 | **Description:** 58 | Initializes the vector extension for a given table and column. This is **mandatory** before performing any vector search or quantization. 59 | `vector_init` must be called in every database connection that needs to perform vector operations. 60 | 61 | The target table must have a **`rowid`** (an integer primary key, either explicit or implicit). 62 | If the table was created using `WITHOUT ROWID`, it must have **exactly one primary key column of type `INTEGER`**. 63 | This ensures that each vector can be uniquely identified and efficiently referenced during search and quantization. 64 | 65 | **Parameters:** 66 | 67 | * `table` (TEXT): Name of the table containing vector data. 68 | * `column` (TEXT): Name of the column containing the vector embeddings (stored as BLOBs). 69 | * `options` (TEXT): Comma-separated key=value string. 70 | 71 | **Options:** 72 | 73 | * `dimension` (required): Integer specifying the length of each vector. 74 | * `type`: Vector data type. Options: 75 | 76 | * `FLOAT32` (default) 77 | * `FLOAT16` 78 | * `FLOATB16` 79 | * `INT8` 80 | * `UINT8` 81 | * `distance`: Distance function to use. Options: 82 | 83 | * `L2` (default) 84 | * `SQUARED_L2` 85 | * `COSINE` 86 | * `DOT` 87 | * `L1` 88 | 89 | **Example:** 90 | 91 | ```sql 92 | SELECT vector_init('documents', 'embedding', 'dimension=384,type=FLOAT32,distance=cosine'); 93 | ``` 94 | 95 | --- 96 | 97 | ## `vector_quantize(table, column, options)` 98 | 99 | **Returns:** `INTEGER` 100 | 101 | **Description:** 102 | Returns the total number of succesfully quantized rows. 103 | 104 | Performs quantization on the specified table and column. This precomputes internal data structures to support fast approximate nearest neighbor (ANN) search. 105 | Read more about quantization [here](https://github.com/sqliteai/sqlite-vector/blob/main/QUANTIZATION.md). 106 | 107 | If a quantization already exists for the specified table and column, it is replaced. If it was previously loaded into memory using `vector_quantize_preload`, the data is automatically reloaded. `vector_quantize` should be called once after data insertion. If called multiple times, the previous quantized data is replaced. The resulting quantization is shared across all database connections, so they do not need to call it again. 108 | 109 | **Parameters:** 110 | 111 | * `table` (TEXT): Name of the table. 112 | * `column` (TEXT): Name of the column containing vector data. 113 | * `options` (TEXT, optional): Comma-separated key=value string. 114 | 115 | **Available options:** 116 | 117 | * `max_memory`: Max memory to use for quantization (default: 30MB) 118 | 119 | **Example:** 120 | 121 | ```sql 122 | SELECT vector_quantize('documents', 'embedding', 'max_memory=50MB'); 123 | ``` 124 | 125 | --- 126 | 127 | ## `vector_quantize_memory(table, column)` 128 | 129 | **Returns:** `INTEGER` 130 | 131 | **Description:** 132 | Returns the amount of memory (in bytes) required to preload quantized data for the specified table and column. 133 | 134 | **Example:** 135 | 136 | ```sql 137 | SELECT vector_quantize_memory('documents', 'embedding'); 138 | -- e.g., 28490112 139 | ``` 140 | 141 | --- 142 | 143 | ## `vector_quantize_preload(table, column)` 144 | 145 | **Returns:** `NULL` 146 | 147 | **Description:** 148 | Loads the quantized representation for the specified table and column into memory. Should be used at startup to ensure optimal query performance. 149 | `vector_quantize_preload` should be called once after `vector_quantize`. The preloaded data is also shared across all database connections, so they do not need to call it again. 150 | 151 | **Example:** 152 | 153 | ```sql 154 | SELECT vector_quantize_preload('documents', 'embedding'); 155 | ``` 156 | 157 | --- 158 | 159 | ## `vector_quantize_cleanup(table, column)` 160 | 161 | **Returns:** `NULL` 162 | 163 | **Description:** 164 | Releases memory previously allocated by a `vector_quantize_preload` call and removes all quantization entries associated with the specified table and column. 165 | Use this function when quantization is no longer required. In some cases, running VACUUM may be necessary to reclaim the freed space from the database. 166 | 167 | If the data changes and you invoke `vector_quantize`, the existing quantization data is automatically replaced. In that case, calling this function is unnecessary. 168 | 169 | **Example:** 170 | 171 | ```sql 172 | SELECT vector_quantize_cleanup('documents', 'embedding'); 173 | ``` 174 | 175 | --- 176 | 177 | ## `vector_as_f32(value)` 178 | 179 | ## `vector_as_f16(value)` 180 | 181 | ## `vector_as_bf16(value)` 182 | 183 | ## `vector_as_i8(value)` 184 | 185 | ## `vector_as_u8(value)` 186 | 187 | **Returns:** `BLOB` 188 | 189 | **Description:** 190 | Encodes a vector into the required internal BLOB format to ensure correct storage and compatibility with the system’s vector representation. 191 | A real conversion is performed ONLY in case of JSON input. When input is a BLOB, it is assumed to be already properly formatted. 192 | 193 | Functions in the `vector_as_` family should be used in all `INSERT`, `UPDATE`, and `DELETE` statements to properly format vector values. However, they are *not* required when specifying input vectors for the `vector_full_scan` or `vector_quantize_scan` virtual tables. 194 | 195 | **Parameters:** 196 | 197 | * `value` (TEXT or BLOB): 198 | 199 | * If `TEXT`, it must be a JSON array (e.g., `"[0.1, 0.2, 0.3]"`). 200 | * If `BLOB`, no check is performed; the user must ensure the format matches the specified type and dimension. 201 | 202 | * `dimension` (INT, optional): Enforce a stricter sanity check, ensuring the input vector has the expected dimensionality. 203 | 204 | **Usage by format:** 205 | 206 | ```sql 207 | -- Insert a Float32 vector using JSON 208 | INSERT INTO documents(embedding) VALUES(vector_as_f32('[0.1, 0.2, 0.3]')); 209 | 210 | -- Insert a UInt8 vector using raw BLOB (ensure correct formatting!) 211 | INSERT INTO compressed_vectors(embedding) VALUES(vector_as_u8(X'010203')); 212 | ``` 213 | 214 | --- 215 | 216 | ## 🔍 `vector_full_scan(table, column, vector, k)` 217 | 218 | **Returns:** `Virtual Table (rowid, distance)` 219 | 220 | **Description:** 221 | Performs a brute-force nearest neighbor search using the given vector. Despite its brute-force nature, this function is highly optimized and useful for small datasets (rows < 1000000) or validation. 222 | Since this interface only returns rowid and distance, if you need to access additional columns from the original table, you must use a SELF JOIN. 223 | 224 | **Parameters:** 225 | 226 | * `table` (TEXT): Name of the target table. 227 | * `column` (TEXT): Column containing vectors. 228 | * `vector` (BLOB or JSON): The query vector. 229 | * `k` (INTEGER): Number of nearest neighbors to return. 230 | 231 | **Example:** 232 | 233 | ```sql 234 | SELECT rowid, distance 235 | FROM vector_full_scan('documents', 'embedding', vector_as_f32('[0.1, 0.2, 0.3]'), 5); 236 | ``` 237 | 238 | --- 239 | 240 | ## ⚡ `vector_quantize_scan(table, column, vector, k)` 241 | 242 | **Returns:** `Virtual Table (rowid, distance)` 243 | 244 | **Description:** 245 | Performs a fast approximate nearest neighbor search using the pre-quantized data. This is the **recommended query method** for large datasets due to its excellent speed/recall/memory trade-off. Since this interface only returns rowid and distance, if you need to access additional columns from the original table, you must use a SELF JOIN. 246 | 247 | You **must run `vector_quantize()`** before using `vector_quantize_scan()` and when data initialized for vectors changes. 248 | 249 | **Parameters:** 250 | 251 | * `table` (TEXT): Name of the target table. 252 | * `column` (TEXT): Column containing vectors. 253 | * `vector` (BLOB or JSON): The query vector. 254 | * `k` (INTEGER): Number of nearest neighbors to return. 255 | 256 | **Performance Highlights:** 257 | 258 | * Handles **1M vectors** of dimension 768 in a few milliseconds. 259 | * Uses **<50MB** of RAM. 260 | * Achieves **>0.95 recall**. 261 | 262 | **Example:** 263 | 264 | ```sql 265 | SELECT rowid, distance 266 | FROM vector_quantize_scan('documents', 'embedding', vector_as_f32('[0.1, 0.2, 0.3]'), 10); 267 | ``` 268 | 269 | --- 270 | 271 | ## 🔁 Streaming Interfaces 272 | 273 | ### `vector_full_scan_stream` and `vector_quantize_scan_stream` 274 | 275 | **Returns:** `Virtual Table (rowid, distance)` 276 | 277 | **Description:** 278 | These streaming interfaces provide the same functionality as `vector_full_scan` and `vector_quantize_scan`, respectively, but are designed for incremental or filtered processing of results. 279 | 280 | Unlike their non-streaming counterparts, these functions **omit the fourth parameter (`k`)** and allow you to use standard SQL clauses such as `WHERE` and `LIMIT` to control filtering and result count. Since this interface only returns rowid and distance, if you need to access additional columns from the original table, you must use a SELF JOIN. 281 | 282 | This makes them ideal for combining vector search with additional query conditions or progressive result consumption in streaming applications. 283 | 284 | **Parameters:** 285 | 286 | * `table` (TEXT): Name of the target table. 287 | * `column` (TEXT): Column containing vectors. 288 | * `vector` (BLOB or JSON): The query vector. 289 | 290 | **Key Differences from Non-Streaming Variants:** 291 | 292 | | Function | Equivalent To | Requires `k` | Supports `WHERE` | Supports `LIMIT` | 293 | | ----------------------------- | ---------------------- | ------------ | ---------------- | ---------------- | 294 | | `vector_full_scan_stream` | `vector_full_scan` | ❌ | ✅ | ✅ | 295 | | `vector_quantize_scan_stream` | `vector_quantize_scan` | ❌ | ✅ | ✅ | 296 | 297 | **Examples:** 298 | 299 | ```sql 300 | -- Perform a filtered full scan 301 | SELECT rowid, distance 302 | FROM vector_full_scan_stream('documents', 'embedding', vector_as_f32('[0.1, 0.2, 0.3]')) 303 | LIMIT 5; 304 | ``` 305 | 306 | ```sql 307 | -- Perform a filtered approximate scan using quantized data 308 | SELECT rowid, distance 309 | FROM vector_quantize_scan_stream('documents', 'embedding', vector_as_f32('[0.1, 0.2, 0.3]')) 310 | LIMIT 10; 311 | ``` 312 | 313 | **Accessing Additional Columns:** 314 | 315 | ```sql 316 | -- Perform a filtered full scan with additional columns 317 | SELECT 318 | v.rowid, 319 | row_number() OVER (ORDER BY v.distance) AS rank_number, 320 | v.distance 321 | FROM vector_full_scan_stream('documents', 'embedding', vector_as_f32('[0.1, 0.2, 0.3]')) AS v 322 | JOIN documents ON documents.rowid = v.rowid 323 | WHERE documents.category = 'science' 324 | LIMIT 10; 325 | ``` 326 | 327 | **Usage Notes:** 328 | 329 | * These interfaces return rows progressively and can efficiently combine vector similarity with SQL-level filters. 330 | * The `LIMIT` clause can be used to control how many rows are read or returned. 331 | * The query planner integrates the streaming virtual table into the overall SQL execution plan, enabling hybrid filtering and ranking operations. 332 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Build, Test and Release 2 | on: 3 | push: 4 | workflow_dispatch: 5 | 6 | permissions: 7 | contents: write 8 | id-token: write 9 | 10 | jobs: 11 | build: 12 | runs-on: ${{ matrix.os }} 13 | container: ${{ matrix.container && matrix.container || '' }} 14 | name: ${{ matrix.name }}${{ matrix.arch && format('-{0}', matrix.arch) || '' }} build${{ matrix.arch != 'arm64-v8a' && matrix.arch != 'armeabi-v7a' && matrix.name != 'ios-sim' && matrix.name != 'ios' && matrix.name != 'apple-xcframework' && matrix.name != 'android-aar' && ( matrix.name != 'macos' || matrix.arch != 'x86_64' ) && ' + test' || ''}} 15 | timeout-minutes: 20 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | include: 20 | - os: ubuntu-22.04 21 | arch: x86_64 22 | name: linux 23 | - os: ubuntu-22.04-arm 24 | arch: arm64 25 | name: linux 26 | - os: ubuntu-22.04 27 | arch: x86_64 28 | name: linux-musl 29 | container: alpine:latest 30 | - os: ubuntu-22.04-arm 31 | arch: arm64 32 | name: linux-musl 33 | - os: macos-15 34 | name: macos 35 | - os: macos-15 36 | arch: x86_64 37 | name: macos 38 | make: ARCH=x86_64 39 | - os: macos-15 40 | arch: arm64 41 | name: macos 42 | make: ARCH=arm64 43 | - os: windows-2022 44 | arch: x86_64 45 | name: windows 46 | - os: ubuntu-22.04 47 | arch: arm64-v8a 48 | name: android 49 | make: PLATFORM=android ARCH=arm64-v8a 50 | - os: ubuntu-22.04 51 | arch: armeabi-v7a 52 | name: android 53 | make: PLATFORM=android ARCH=armeabi-v7a 54 | - os: ubuntu-22.04 55 | arch: x86_64 56 | name: android 57 | make: PLATFORM=android ARCH=x86_64 58 | sqlite-amalgamation-zip: https://sqlite.org/2025/sqlite-amalgamation-3490100.zip 59 | - os: macos-15 60 | name: ios 61 | make: PLATFORM=ios 62 | - os: macos-15 63 | name: ios-sim 64 | make: PLATFORM=ios-sim 65 | - os: macos-15 66 | name: apple-xcframework 67 | make: xcframework 68 | - os: ubuntu-22.04 69 | name: android-aar 70 | make: aar 71 | 72 | defaults: 73 | run: 74 | shell: ${{ matrix.container && 'sh' || 'bash' }} 75 | 76 | steps: 77 | 78 | - uses: actions/checkout@v4.2.2 79 | 80 | - name: android setup java 81 | if: matrix.name == 'android-aar' 82 | uses: actions/setup-java@v4 83 | with: 84 | distribution: 'temurin' 85 | java-version: '17' 86 | 87 | - name: windows install dependencies 88 | if: matrix.name == 'windows' 89 | run: choco install sqlite -y 90 | 91 | - name: macos install dependencies 92 | if: matrix.name == 'macos' 93 | run: brew link sqlite --force 94 | 95 | - name: linux-musl x86_64 install dependencies 96 | if: matrix.name == 'linux-musl' && matrix.arch == 'x86_64' 97 | run: apk update && apk add --no-cache gcc make sqlite musl-dev linux-headers 98 | 99 | - name: linux-musl arm64 setup container 100 | if: matrix.name == 'linux-musl' && matrix.arch == 'arm64' 101 | run: | 102 | docker run -d --name alpine \ 103 | --platform linux/arm64 \ 104 | -v ${{ github.workspace }}:/workspace \ 105 | -w /workspace \ 106 | alpine:latest \ 107 | tail -f /dev/null 108 | docker exec alpine sh -c "apk update && apk add --no-cache gcc make sqlite musl-dev linux-headers" 109 | 110 | - name: build sqlite-vector 111 | run: ${{ matrix.name == 'linux-musl' && matrix.arch == 'arm64' && 'docker exec alpine' || '' }} make extension ${{ matrix.make && matrix.make || ''}} 112 | 113 | - name: create keychain for codesign 114 | if: matrix.os == 'macos-15' 115 | run: | 116 | echo "${{ secrets.APPLE_CERTIFICATE }}" | base64 --decode > certificate.p12 117 | security create-keychain -p "${{ secrets.KEYCHAIN_PASSWORD }}" build.keychain 118 | security default-keychain -s build.keychain 119 | security unlock-keychain -p "${{ secrets.KEYCHAIN_PASSWORD }}" build.keychain 120 | security import certificate.p12 -k build.keychain -P "${{ secrets.CERTIFICATE_PASSWORD }}" -T /usr/bin/codesign 121 | security set-key-partition-list -S apple-tool:,apple:,codesign: -s -k "${{ secrets.KEYCHAIN_PASSWORD }}" build.keychain 122 | 123 | - name: codesign and notarize dylib 124 | if: matrix.os == 'macos-15' && matrix.name != 'apple-xcframework' 125 | run: | 126 | codesign --sign "${{ secrets.APPLE_TEAM_ID }}" --timestamp --options runtime dist/vector.dylib 127 | ditto -c -k dist/vector.dylib dist/vector.zip 128 | xcrun notarytool submit dist/vector.zip --apple-id "${{ secrets.APPLE_ID }}" --password "${{ secrets.APPLE_PASSWORD }}" --team-id "${{ secrets.APPLE_TEAM_ID }}" --wait 129 | rm dist/vector.zip 130 | 131 | - name: codesign and notarize xcframework 132 | if: matrix.name == 'apple-xcframework' 133 | run: | 134 | find dist/vector.xcframework -name "*.framework" -exec echo "Signing: {}" \; -exec codesign --sign "${{ secrets.APPLE_TEAM_ID }}" --timestamp --options runtime {} \; # Sign each individual framework FIRST 135 | codesign --sign "${{ secrets.APPLE_TEAM_ID }}" --timestamp --options runtime dist/vector.xcframework # Then sign the xcframework wrapper 136 | ditto -c -k --keepParent dist/vector.xcframework dist/vector.xcframework.zip 137 | xcrun notarytool submit dist/vector.xcframework.zip --apple-id "${{ secrets.APPLE_ID }}" --password "${{ secrets.APPLE_PASSWORD }}" --team-id "${{ secrets.APPLE_TEAM_ID }}" --wait 138 | rm dist/vector.xcframework.zip 139 | 140 | - name: cleanup keychain for codesign 141 | if: matrix.os == 'macos-15' 142 | run: | 143 | rm certificate.p12 144 | security delete-keychain build.keychain 145 | 146 | - name: android setup test environment 147 | if: matrix.name == 'android' && matrix.arch != 'arm64-v8a' && matrix.arch != 'armeabi-v7a' 148 | run: | 149 | 150 | echo "::group::enable kvm group perms" 151 | echo 'KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"' | sudo tee /etc/udev/rules.d/99-kvm4all.rules 152 | sudo udevadm control --reload-rules 153 | sudo udevadm trigger --name-match=kvm 154 | echo "::endgroup::" 155 | 156 | echo "::group::download and build sqlite3 without SQLITE_OMIT_LOAD_EXTENSION" 157 | curl -O ${{ matrix.sqlite-amalgamation-zip }} 158 | unzip sqlite-amalgamation-*.zip 159 | export ${{ matrix.make }} 160 | $ANDROID_NDK/toolchains/llvm/prebuilt/linux-x86_64/bin/${{ matrix.arch }}-linux-android26-clang sqlite-amalgamation-*/shell.c sqlite-amalgamation-*/sqlite3.c -o sqlite3 -ldl 161 | # remove unused folders to save up space 162 | rm -rf sqlite-amalgamation-*.zip sqlite-amalgamation-* 163 | echo "::endgroup::" 164 | 165 | echo "::group::prepare the test script" 166 | make test PLATFORM=$PLATFORM ARCH=$ARCH || echo "It should fail. Running remaining commands in the emulator" 167 | cat > commands.sh << EOF 168 | mv -f /data/local/tmp/sqlite3 /system/xbin 169 | cd /data/local/tmp 170 | $(make test PLATFORM=$PLATFORM ARCH=$ARCH -n) 171 | EOF 172 | echo "::endgroup::" 173 | 174 | - name: android test sqlite-vector 175 | if: matrix.name == 'android' && matrix.arch != 'arm64-v8a' && matrix.arch != 'armeabi-v7a' 176 | uses: reactivecircus/android-emulator-runner@v2.34.0 177 | with: 178 | api-level: 26 179 | arch: ${{ matrix.arch }} 180 | script: | 181 | adb root 182 | adb remount 183 | adb push ${{ github.workspace }}/. /data/local/tmp/ 184 | adb shell "sh /data/local/tmp/commands.sh" 185 | 186 | - name: test sqlite-vector 187 | if: contains(matrix.name, 'linux') || matrix.name == 'windows' || ( matrix.name == 'macos' && matrix.arch != 'x86_64' ) 188 | run: ${{ matrix.name == 'linux-musl' && matrix.arch == 'arm64' && 'docker exec alpine' || '' }} make test ${{ matrix.make && matrix.make || ''}} 189 | 190 | - uses: actions/upload-artifact@v4.6.2 191 | if: always() 192 | with: 193 | name: vector-${{ matrix.name }}${{ matrix.arch && format('-{0}', matrix.arch) || '' }} 194 | path: dist/vector.* 195 | if-no-files-found: error 196 | 197 | release: 198 | runs-on: ubuntu-22.04 199 | name: release 200 | needs: build 201 | if: github.ref == 'refs/heads/main' 202 | 203 | env: 204 | GH_TOKEN: ${{ github.token }} 205 | 206 | steps: 207 | 208 | - uses: actions/checkout@v4.2.2 209 | 210 | - uses: actions/download-artifact@v4.2.1 211 | with: 212 | path: artifacts 213 | 214 | - name: zip artifacts 215 | run: | 216 | VERSION=$(make version) 217 | for folder in "artifacts"/*; do 218 | if [ -d "$folder" ]; then 219 | name=$(basename "$folder") 220 | if [[ "$name" != "vector-apple-xcframework" && "$name" != "vector-android-aar" ]]; then 221 | tar -czf "${name}-${VERSION}.tar.gz" -C "$folder" . 222 | fi 223 | if [[ "$name" != "vector-android-aar" ]]; then 224 | (cd "$folder" && zip -rq "../../${name}-${VERSION}.zip" .) 225 | else 226 | cp "$folder"/*.aar "${name}-${VERSION}.aar" 227 | fi 228 | fi 229 | done 230 | 231 | - name: release tag version from sqlite-vector.h 232 | id: tag 233 | run: | 234 | VERSION=$(make version) 235 | if [[ "$VERSION" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then 236 | LATEST_RELEASE=$(curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" https://api.github.com/repos/${{ github.repository }}/releases/latest) 237 | LATEST=$(echo "$LATEST_RELEASE" | jq -r '.name') 238 | 239 | # Check artifact sizes against previous release 240 | if [ -n "$LATEST" ] && [ "$LATEST" != "null" ]; then 241 | echo "Checking artifact sizes against previous release: $LATEST" 242 | FAILED=0 243 | 244 | for artifact in vector-*-${VERSION}.*; do 245 | if [ ! -f "$artifact" ]; then 246 | continue 247 | fi 248 | 249 | # Get current artifact size 250 | NEW_SIZE=$(stat -c%s "$artifact" 2>/dev/null || stat -f%z "$artifact") 251 | 252 | # Get artifact name for previous release 253 | ARTIFACT_NAME=$(echo "$artifact" | sed "s/${VERSION}/${LATEST}/") 254 | 255 | # Get previous artifact size from GitHub API 256 | OLD_SIZE=$(echo "$LATEST_RELEASE" | jq -r ".assets[] | select(.name == \"$(basename "$ARTIFACT_NAME")\") | .size") 257 | 258 | if [ -z "$OLD_SIZE" ] || [ "$OLD_SIZE" = "null" ]; then 259 | echo "⚠️ Previous artifact not found: $(basename "$ARTIFACT_NAME"), skipping comparison" 260 | continue 261 | fi 262 | 263 | # Calculate percentage increase 264 | INCREASE=$(awk "BEGIN {printf \"%.2f\", (($NEW_SIZE - $OLD_SIZE) / $OLD_SIZE) * 100}") 265 | 266 | echo "📦 $artifact: $OLD_SIZE → $NEW_SIZE bytes (${INCREASE}% change)" 267 | 268 | # Check if increase is more than 5% 269 | if (( $(echo "$INCREASE > 5" | bc -l) )); then 270 | if [ "$GITHUB_EVENT_NAME" = "workflow_dispatch" ]; then 271 | echo "⚠️ WARNING: $artifact size increased by ${INCREASE}% (limit: 5%)" 272 | else 273 | echo "❌ ERROR: $artifact size increased by ${INCREASE}% (limit: 5%)" 274 | FAILED=1 275 | fi 276 | fi 277 | done 278 | 279 | if [ $FAILED -eq 1 ]; then 280 | echo "" 281 | echo "❌ One or more artifacts exceeded the 5% size increase limit" 282 | exit 1 283 | fi 284 | 285 | echo "✅ All artifacts within 5% size increase limit" 286 | fi 287 | 288 | if [[ "$VERSION" != "$LATEST" || "$GITHUB_EVENT_NAME" == "workflow_dispatch" ]]; then 289 | echo "version=$VERSION" >> $GITHUB_OUTPUT 290 | else 291 | echo "::warning file=src/sqlite-vector.h::To release a new version, please update the SQLITE_VECTOR_VERSION in src/sqlite-vector.h to be different than the latest $LATEST" 292 | fi 293 | exit 0 294 | fi 295 | echo "❌ SQLITE_VECTOR_VERSION not found in sqlite-vector.h" 296 | exit 1 297 | 298 | - uses: actions/checkout@v4.2.2 299 | if: steps.tag.outputs.version != '' 300 | with: 301 | repository: sqliteai/sqlite-wasm 302 | path: sqlite-wasm 303 | submodules: recursive 304 | token: ${{ secrets.PAT }} 305 | 306 | - name: release sqlite-wasm 307 | if: steps.tag.outputs.version != '' 308 | run: | 309 | cd sqlite-wasm 310 | git config --global user.email "$GITHUB_ACTOR@users.noreply.github.com" 311 | git config --global user.name "$GITHUB_ACTOR" 312 | cd modules/sqlite-vector 313 | git checkout ${{ github.sha }} 314 | cd ../.. 315 | git add modules/sqlite-vector 316 | PKG=sqlite-wasm/package.json 317 | TMP=sqlite-wasm/package.tmp.json 318 | jq --arg version "$(cat modules/sqlite/VERSION)-sync.$(cd modules/sqlite-sync && make version)-vector.$(cd modules/sqlite-vector && make version)" '.version = $version' "$PKG" > "$TMP" && mv "$TMP" "$PKG" 319 | git add "$PKG" 320 | git commit -m "Bump sqlite-vector version to ${{ steps.tag.outputs.version }}" 321 | git push origin main 322 | 323 | - uses: actions/setup-java@v4 324 | if: steps.tag.outputs.version != '' 325 | with: 326 | distribution: 'temurin' 327 | java-version: '17' 328 | 329 | - name: release android aar to maven central 330 | if: steps.tag.outputs.version != '' 331 | run: cd packages/android && ./gradlew publishAggregationToCentralPortal -PSIGNING_KEY="${{ secrets.SIGNING_KEY }}" -PSIGNING_PASSWORD="${{ secrets.SIGNING_PASSWORD }}" -PSONATYPE_USERNAME="${{ secrets.MAVEN_CENTRAL_USERNAME }}" -PSONATYPE_PASSWORD="${{ secrets.MAVEN_CENTRAL_TOKEN }}" -PVERSION="${{ steps.tag.outputs.version }}" -PAAR_PATH="../../artifacts/vector-android-aar/vector.aar" 332 | 333 | - uses: actions/setup-node@v4 334 | if: steps.tag.outputs.version != '' 335 | with: 336 | node-version: '20' 337 | registry-url: 'https://registry.npmjs.org' 338 | 339 | - name: update npm # npm 11.5.1 is required for OIDC auth https://docs.npmjs.com/trusted-publishers 340 | run: npm install -g npm@11.5.1 341 | 342 | - name: build and publish npm packages 343 | if: steps.tag.outputs.version != '' 344 | run: | 345 | cd packages/node 346 | 347 | # Update version in package.json 348 | echo "Updating versions to ${{ steps.tag.outputs.version }}..." 349 | 350 | # Update package.json 351 | jq --arg version "${{ steps.tag.outputs.version }}" \ 352 | '.version = $version | .optionalDependencies = (.optionalDependencies | with_entries(.value = $version))' \ 353 | package.json > package.tmp.json && mv package.tmp.json package.json 354 | 355 | echo "✓ Updated package.json to version ${{ steps.tag.outputs.version }}" 356 | 357 | # Generate platform packages 358 | echo "Generating platform packages..." 359 | node generate-platform-packages.js "${{ steps.tag.outputs.version }}" "../../artifacts" "./platform-packages" 360 | echo "✓ Generated 7 platform packages" 361 | ls -la platform-packages/ 362 | 363 | # Build main package 364 | echo "Building main package..." 365 | npm install 366 | npm run build 367 | npm test 368 | echo "✓ Main package built and tested" 369 | 370 | # Publish platform packages 371 | echo "Publishing platform packages to npm..." 372 | cd platform-packages 373 | for platform_dir in */; do 374 | platform_name=$(basename "$platform_dir") 375 | echo " Publishing @sqliteai/sqlite-vector-${platform_name}..." 376 | cd "$platform_dir" 377 | npm publish --provenance --access public 378 | cd .. 379 | echo " ✓ Published @sqliteai/sqlite-vector-${platform_name}" 380 | done 381 | cd .. 382 | 383 | # Publish main package 384 | echo "Publishing main package to npm..." 385 | npm publish --provenance --access public 386 | echo "✓ Published @sqliteai/sqlite-vector@${{ steps.tag.outputs.version }}" 387 | 388 | echo "" 389 | echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" 390 | echo "✅ Successfully published 8 packages to npm" 391 | echo " Main: @sqliteai/sqlite-vector@${{ steps.tag.outputs.version }}" 392 | echo " Platform packages: 7" 393 | echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" 394 | 395 | - uses: softprops/action-gh-release@v2.2.1 396 | if: steps.tag.outputs.version != '' 397 | with: 398 | body: | 399 | # Packages 400 | 401 | [**Node**](https://www.npmjs.com/package/@sqliteai/sqlite-vector): `npm install @sqliteai/sqlite-vector` 402 | [**WASM**](https://www.npmjs.com/package/@sqliteai/sqlite-wasm): `npm install @sqliteai/sqlite-wasm` 403 | [**Android**](https://central.sonatype.com/artifact/ai.sqlite/vector): `ai.sqlite:vector:${{ steps.tag.outputs.version }}` 404 | [**Python**](https://pypi.org/project/sqliteai-vector): `pip install sqliteai-vector` 405 | [**Swift**](https://github.com/sqliteai/sqlite-vector#swift-package): [Installation Guide](https://github.com/sqliteai/sqlite-vector#swift-package) 406 | 407 | --- 408 | 409 | generate_release_notes: true 410 | tag_name: ${{ steps.tag.outputs.version }} 411 | files: vector-*-${{ steps.tag.outputs.version }}.* 412 | make_latest: true 413 | -------------------------------------------------------------------------------- /libs/fp16/fp16.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef FP16_FP16_H 3 | #define FP16_FP16_H 4 | 5 | #if defined(__cplusplus) && (__cplusplus >= 201103L) 6 | #include 7 | #include 8 | #elif !defined(__OPENCL_VERSION__) 9 | #include 10 | #include 11 | #endif 12 | 13 | #include 14 | #include 15 | 16 | #if defined(_MSC_VER) 17 | #include 18 | #endif 19 | #if defined(__F16C__) && FP16_USE_NATIVE_CONVERSION && !FP16_USE_FLOAT16_TYPE && !FP16_USE_FP16_TYPE 20 | #include 21 | #endif 22 | #if (defined(__aarch64__) || defined(_M_ARM64)) && FP16_USE_NATIVE_CONVERSION && !FP16_USE_FLOAT16_TYPE && !FP16_USE_FP16_TYPE 23 | #include 24 | #endif 25 | 26 | 27 | /* 28 | * Convert a 16-bit floating-point number in IEEE half-precision format, in bit representation, to 29 | * a 32-bit floating-point number in IEEE single-precision format, in bit representation. 30 | * 31 | * @note The implementation doesn't use any floating-point operations. 32 | */ 33 | static inline uint32_t fp16_ieee_to_fp32_bits(uint16_t h) { 34 | /* 35 | * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word: 36 | * +---+-----+------------+-------------------+ 37 | * | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000| 38 | * +---+-----+------------+-------------------+ 39 | * Bits 31 26-30 16-25 0-15 40 | * 41 | * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 - zero bits. 42 | */ 43 | const uint32_t w = (uint32_t) h << 16; 44 | /* 45 | * Extract the sign of the input number into the high bit of the 32-bit word: 46 | * 47 | * +---+----------------------------------+ 48 | * | S |0000000 00000000 00000000 00000000| 49 | * +---+----------------------------------+ 50 | * Bits 31 0-31 51 | */ 52 | const uint32_t sign = w & UINT32_C(0x80000000); 53 | /* 54 | * Extract mantissa and biased exponent of the input number into the bits 0-30 of the 32-bit word: 55 | * 56 | * +---+-----+------------+-------------------+ 57 | * | 0 |EEEEE|MM MMMM MMMM|0000 0000 0000 0000| 58 | * +---+-----+------------+-------------------+ 59 | * Bits 30 27-31 17-26 0-16 60 | */ 61 | const uint32_t nonsign = w & UINT32_C(0x7FFFFFFF); 62 | /* 63 | * Renorm shift is the number of bits to shift mantissa left to make the half-precision number normalized. 64 | * If the initial number is normalized, some of its high 6 bits (sign == 0 and 5-bit exponent) equals one. 65 | * In this case renorm_shift == 0. If the number is denormalize, renorm_shift > 0. Note that if we shift 66 | * denormalized nonsign by renorm_shift, the unit bit of mantissa will shift into exponent, turning the 67 | * biased exponent into 1, and making mantissa normalized (i.e. without leading 1). 68 | */ 69 | #ifdef _MSC_VER 70 | unsigned long nonsign_bsr; 71 | _BitScanReverse(&nonsign_bsr, (unsigned long) nonsign); 72 | uint32_t renorm_shift = (uint32_t) nonsign_bsr ^ 31; 73 | #else 74 | uint32_t renorm_shift = __builtin_clz(nonsign); 75 | #endif 76 | renorm_shift = renorm_shift > 5 ? renorm_shift - 5 : 0; 77 | /* 78 | * Iff half-precision number has exponent of 15, the addition overflows it into bit 31, 79 | * and the subsequent shift turns the high 9 bits into 1. Thus 80 | * inf_nan_mask == 81 | * 0x7F800000 if the half-precision number had exponent of 15 (i.e. was NaN or infinity) 82 | * 0x00000000 otherwise 83 | */ 84 | const int32_t inf_nan_mask = ((int32_t) (nonsign + 0x04000000) >> 8) & INT32_C(0x7F800000); 85 | /* 86 | * Iff nonsign is 0, it overflows into 0xFFFFFFFF, turning bit 31 into 1. Otherwise, bit 31 remains 0. 87 | * The signed shift right by 31 broadcasts bit 31 into all bits of the zero_mask. Thus 88 | * zero_mask == 89 | * 0xFFFFFFFF if the half-precision number was zero (+0.0h or -0.0h) 90 | * 0x00000000 otherwise 91 | */ 92 | const int32_t zero_mask = (int32_t) (nonsign - 1) >> 31; 93 | /* 94 | * 1. Shift nonsign left by renorm_shift to normalize it (if the input was denormal) 95 | * 2. Shift nonsign right by 3 so the exponent (5 bits originally) becomes an 8-bit field and 10-bit mantissa 96 | * shifts into the 10 high bits of the 23-bit mantissa of IEEE single-precision number. 97 | * 3. Add 0x70 to the exponent (starting at bit 23) to compensate the different in exponent bias 98 | * (0x7F for single-precision number less 0xF for half-precision number). 99 | * 4. Subtract renorm_shift from the exponent (starting at bit 23) to account for renormalization. As renorm_shift 100 | * is less than 0x70, this can be combined with step 3. 101 | * 5. Binary OR with inf_nan_mask to turn the exponent into 0xFF if the input was NaN or infinity. 102 | * 6. Binary ANDNOT with zero_mask to turn the mantissa and exponent into zero if the input was zero. 103 | * 7. Combine with the sign of the input number. 104 | */ 105 | return sign | ((((nonsign << renorm_shift >> 3) + ((0x70 - renorm_shift) << 23)) | inf_nan_mask) & ~zero_mask); 106 | } 107 | 108 | /* 109 | * Convert a 16-bit floating-point number in IEEE half-precision format, in bit representation, to 110 | * a 32-bit floating-point number in IEEE single-precision format. 111 | * 112 | * @note The implementation relies on IEEE-like (no assumption about rounding mode and no operations on denormals) 113 | * floating-point operations and bitcasts between integer and floating-point variables. 114 | */ 115 | static inline float fp16_ieee_to_fp32_value(uint16_t h) { 116 | #if FP16_USE_NATIVE_CONVERSION 117 | #if FP16_USE_FLOAT16_TYPE 118 | union { 119 | uint16_t as_bits; 120 | _Float16 as_value; 121 | } fp16 = { h }; 122 | return (float) fp16.as_value; 123 | #elif FP16_USE_FP16_TYPE 124 | union { 125 | uint16_t as_bits; 126 | __fp16 as_value; 127 | } fp16 = { h }; 128 | return (float) fp16.as_value; 129 | #else 130 | #if (defined(__INTEL_COMPILER) || defined(__GNUC__)) && defined(__F16C__) 131 | return _cvtsh_ss((unsigned short) h); 132 | #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && defined(__AVX2__) 133 | return _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128((int) (unsigned int) h))); 134 | #elif defined(_M_ARM64) || defined(__aarch64__) 135 | return vgetq_lane_f32(vcvt_f32_f16(vreinterpret_f16_u16(vdup_n_u16(h))), 0); 136 | #else 137 | #error "Archtecture- or compiler-specific implementation required" 138 | #endif 139 | #endif 140 | #else 141 | /* 142 | * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word: 143 | * +---+-----+------------+-------------------+ 144 | * | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000| 145 | * +---+-----+------------+-------------------+ 146 | * Bits 31 26-30 16-25 0-15 147 | * 148 | * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 - zero bits. 149 | */ 150 | const uint32_t w = (uint32_t) h << 16; 151 | /* 152 | * Extract the sign of the input number into the high bit of the 32-bit word: 153 | * 154 | * +---+----------------------------------+ 155 | * | S |0000000 00000000 00000000 00000000| 156 | * +---+----------------------------------+ 157 | * Bits 31 0-31 158 | */ 159 | const uint32_t sign = w & UINT32_C(0x80000000); 160 | /* 161 | * Extract mantissa and biased exponent of the input number into the high bits of the 32-bit word: 162 | * 163 | * +-----+------------+---------------------+ 164 | * |EEEEE|MM MMMM MMMM|0 0000 0000 0000 0000| 165 | * +-----+------------+---------------------+ 166 | * Bits 27-31 17-26 0-16 167 | */ 168 | const uint32_t two_w = w + w; 169 | 170 | /* 171 | * Shift mantissa and exponent into bits 23-28 and bits 13-22 so they become mantissa and exponent 172 | * of a single-precision floating-point number: 173 | * 174 | * S|Exponent | Mantissa 175 | * +-+---+-----+------------+----------------+ 176 | * |0|000|EEEEE|MM MMMM MMMM|0 0000 0000 0000| 177 | * +-+---+-----+------------+----------------+ 178 | * Bits | 23-31 | 0-22 179 | * 180 | * Next, there are some adjustments to the exponent: 181 | * - The exponent needs to be corrected by the difference in exponent bias between single-precision and half-precision 182 | * formats (0x7F - 0xF = 0x70) 183 | * - Inf and NaN values in the inputs should become Inf and NaN values after conversion to the single-precision number. 184 | * Therefore, if the biased exponent of the half-precision input was 0x1F (max possible value), the biased exponent 185 | * of the single-precision output must be 0xFF (max possible value). We do this correction in two steps: 186 | * - First, we adjust the exponent by (0xFF - 0x1F) = 0xE0 (see exp_offset below) rather than by 0x70 suggested 187 | * by the difference in the exponent bias (see above). 188 | * - Then we multiply the single-precision result of exponent adjustment by 2**(-112) to reverse the effect of 189 | * exponent adjustment by 0xE0 less the necessary exponent adjustment by 0x70 due to difference in exponent bias. 190 | * The floating-point multiplication hardware would ensure than Inf and NaN would retain their value on at least 191 | * partially IEEE754-compliant implementations. 192 | * 193 | * Note that the above operations do not handle denormal inputs (where biased exponent == 0). However, they also do not 194 | * operate on denormal inputs, and do not produce denormal results. 195 | */ 196 | const uint32_t exp_offset = UINT32_C(0xE0) << 23; 197 | #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__) 198 | const float exp_scale = 0x1.0p-112f; 199 | #else 200 | const float exp_scale = fp32_from_bits(UINT32_C(0x7800000)); 201 | #endif 202 | const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale; 203 | 204 | /* 205 | * Convert denormalized half-precision inputs into single-precision results (always normalized). 206 | * Zero inputs are also handled here. 207 | * 208 | * In a denormalized number the biased exponent is zero, and mantissa has on-zero bits. 209 | * First, we shift mantissa into bits 0-9 of the 32-bit word. 210 | * 211 | * zeros | mantissa 212 | * +---------------------------+------------+ 213 | * |0000 0000 0000 0000 0000 00|MM MMMM MMMM| 214 | * +---------------------------+------------+ 215 | * Bits 10-31 0-9 216 | * 217 | * Now, remember that denormalized half-precision numbers are represented as: 218 | * FP16 = mantissa * 2**(-24). 219 | * The trick is to construct a normalized single-precision number with the same mantissa and thehalf-precision input 220 | * and with an exponent which would scale the corresponding mantissa bits to 2**(-24). 221 | * A normalized single-precision floating-point number is represented as: 222 | * FP32 = (1 + mantissa * 2**(-23)) * 2**(exponent - 127) 223 | * Therefore, when the biased exponent is 126, a unit change in the mantissa of the input denormalized half-precision 224 | * number causes a change of the constructud single-precision number by 2**(-24), i.e. the same ammount. 225 | * 226 | * The last step is to adjust the bias of the constructed single-precision number. When the input half-precision number 227 | * is zero, the constructed single-precision number has the value of 228 | * FP32 = 1 * 2**(126 - 127) = 2**(-1) = 0.5 229 | * Therefore, we need to subtract 0.5 from the constructed single-precision number to get the numerical equivalent of 230 | * the input half-precision number. 231 | */ 232 | const uint32_t magic_mask = UINT32_C(126) << 23; 233 | const float magic_bias = 0.5f; 234 | const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias; 235 | 236 | /* 237 | * - Choose either results of conversion of input as a normalized number, or as a denormalized number, depending on the 238 | * input exponent. The variable two_w contains input exponent in bits 27-31, therefore if its smaller than 2**27, the 239 | * input is either a denormal number, or zero. 240 | * - Combine the result of conversion of exponent and mantissa with the sign of the input number. 241 | */ 242 | const uint32_t denormalized_cutoff = UINT32_C(1) << 27; 243 | const uint32_t result = sign | 244 | (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value)); 245 | return fp32_from_bits(result); 246 | #endif 247 | } 248 | 249 | /* 250 | * Convert a 32-bit floating-point number in IEEE single-precision format to a 16-bit floating-point number in 251 | * IEEE half-precision format, in bit representation. 252 | * 253 | * @note The implementation relies on IEEE-like (no assumption about rounding mode and no operations on denormals) 254 | * floating-point operations and bitcasts between integer and floating-point variables. 255 | */ 256 | static inline uint16_t fp16_ieee_from_fp32_value(float f) { 257 | #if FP16_USE_NATIVE_CONVERSION 258 | #if FP16_USE_FLOAT16_TYPE 259 | union { 260 | _Float16 as_value; 261 | uint16_t as_bits; 262 | } fp16 = { (_Float16) f }; 263 | return fp16.as_bits; 264 | #elif FP16_USE_FP16_TYPE 265 | union { 266 | __fp16 as_value; 267 | uint16_t as_bits; 268 | } fp16 = { (__fp16) f }; 269 | return fp16.as_bits; 270 | #else 271 | #if (defined(__INTEL_COMPILER) || defined(__GNUC__)) && defined(__F16C__) 272 | return _cvtss_sh(f, _MM_FROUND_CUR_DIRECTION); 273 | #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && defined(__AVX2__) 274 | return (uint16_t) _mm_cvtsi128_si32(_mm_cvtps_ph(_mm_set_ss(f), _MM_FROUND_CUR_DIRECTION)); 275 | #elif defined(_M_ARM64) || defined(__aarch64__) 276 | return vget_lane_u16(vcvt_f16_f32(vdupq_n_f32(f)), 0); 277 | #else 278 | #error "Archtecture- or compiler-specific implementation required" 279 | #endif 280 | #endif 281 | #else 282 | #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__) 283 | const float scale_to_inf = 0x1.0p+112f; 284 | const float scale_to_zero = 0x1.0p-110f; 285 | #else 286 | const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000)); 287 | const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000)); 288 | #endif 289 | #if defined(_MSC_VER) && defined(_M_IX86_FP) && (_M_IX86_FP == 0) || defined(__GNUC__) && defined(__FLT_EVAL_METHOD__) && (__FLT_EVAL_METHOD__ != 0) 290 | const volatile float saturated_f = fabsf(f) * scale_to_inf; 291 | #else 292 | const float saturated_f = fabsf(f) * scale_to_inf; 293 | #endif 294 | float base = saturated_f * scale_to_zero; 295 | 296 | const uint32_t w = fp32_to_bits(f); 297 | const uint32_t shl1_w = w + w; 298 | const uint32_t sign = w & UINT32_C(0x80000000); 299 | uint32_t bias = shl1_w & UINT32_C(0xFF000000); 300 | if (bias < UINT32_C(0x71000000)) { 301 | bias = UINT32_C(0x71000000); 302 | } 303 | 304 | base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base; 305 | const uint32_t bits = fp32_to_bits(base); 306 | const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00); 307 | const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF); 308 | const uint32_t nonsign = exp_bits + mantissa_bits; 309 | return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign); 310 | #endif 311 | } 312 | 313 | /* 314 | * Convert a 16-bit floating-point number in ARM alternative half-precision format, in bit representation, to 315 | * a 32-bit floating-point number in IEEE single-precision format, in bit representation. 316 | * 317 | * @note The implementation doesn't use any floating-point operations. 318 | */ 319 | static inline uint32_t fp16_alt_to_fp32_bits(uint16_t h) { 320 | /* 321 | * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word: 322 | * +---+-----+------------+-------------------+ 323 | * | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000| 324 | * +---+-----+------------+-------------------+ 325 | * Bits 31 26-30 16-25 0-15 326 | * 327 | * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 - zero bits. 328 | */ 329 | const uint32_t w = (uint32_t) h << 16; 330 | /* 331 | * Extract the sign of the input number into the high bit of the 32-bit word: 332 | * 333 | * +---+----------------------------------+ 334 | * | S |0000000 00000000 00000000 00000000| 335 | * +---+----------------------------------+ 336 | * Bits 31 0-31 337 | */ 338 | const uint32_t sign = w & UINT32_C(0x80000000); 339 | /* 340 | * Extract mantissa and biased exponent of the input number into the bits 0-30 of the 32-bit word: 341 | * 342 | * +---+-----+------------+-------------------+ 343 | * | 0 |EEEEE|MM MMMM MMMM|0000 0000 0000 0000| 344 | * +---+-----+------------+-------------------+ 345 | * Bits 30 27-31 17-26 0-16 346 | */ 347 | const uint32_t nonsign = w & UINT32_C(0x7FFFFFFF); 348 | /* 349 | * Renorm shift is the number of bits to shift mantissa left to make the half-precision number normalized. 350 | * If the initial number is normalized, some of its high 6 bits (sign == 0 and 5-bit exponent) equals one. 351 | * In this case renorm_shift == 0. If the number is denormalize, renorm_shift > 0. Note that if we shift 352 | * denormalized nonsign by renorm_shift, the unit bit of mantissa will shift into exponent, turning the 353 | * biased exponent into 1, and making mantissa normalized (i.e. without leading 1). 354 | */ 355 | #ifdef _MSC_VER 356 | unsigned long nonsign_bsr; 357 | _BitScanReverse(&nonsign_bsr, (unsigned long) nonsign); 358 | uint32_t renorm_shift = (uint32_t) nonsign_bsr ^ 31; 359 | #else 360 | uint32_t renorm_shift = __builtin_clz(nonsign); 361 | #endif 362 | renorm_shift = renorm_shift > 5 ? renorm_shift - 5 : 0; 363 | /* 364 | * Iff nonsign is 0, it overflows into 0xFFFFFFFF, turning bit 31 into 1. Otherwise, bit 31 remains 0. 365 | * The signed shift right by 31 broadcasts bit 31 into all bits of the zero_mask. Thus 366 | * zero_mask == 367 | * 0xFFFFFFFF if the half-precision number was zero (+0.0h or -0.0h) 368 | * 0x00000000 otherwise 369 | */ 370 | const int32_t zero_mask = (int32_t) (nonsign - 1) >> 31; 371 | /* 372 | * 1. Shift nonsign left by renorm_shift to normalize it (if the input was denormal) 373 | * 2. Shift nonsign right by 3 so the exponent (5 bits originally) becomes an 8-bit field and 10-bit mantissa 374 | * shifts into the 10 high bits of the 23-bit mantissa of IEEE single-precision number. 375 | * 3. Add 0x70 to the exponent (starting at bit 23) to compensate the different in exponent bias 376 | * (0x7F for single-precision number less 0xF for half-precision number). 377 | * 4. Subtract renorm_shift from the exponent (starting at bit 23) to account for renormalization. As renorm_shift 378 | * is less than 0x70, this can be combined with step 3. 379 | * 5. Binary ANDNOT with zero_mask to turn the mantissa and exponent into zero if the input was zero. 380 | * 6. Combine with the sign of the input number. 381 | */ 382 | return sign | (((nonsign << renorm_shift >> 3) + ((0x70 - renorm_shift) << 23)) & ~zero_mask); 383 | } 384 | 385 | /* 386 | * Convert a 16-bit floating-point number in ARM alternative half-precision format, in bit representation, to 387 | * a 32-bit floating-point number in IEEE single-precision format. 388 | * 389 | * @note The implementation relies on IEEE-like (no assumption about rounding mode and no operations on denormals) 390 | * floating-point operations and bitcasts between integer and floating-point variables. 391 | */ 392 | static inline float fp16_alt_to_fp32_value(uint16_t h) { 393 | /* 394 | * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word: 395 | * +---+-----+------------+-------------------+ 396 | * | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000| 397 | * +---+-----+------------+-------------------+ 398 | * Bits 31 26-30 16-25 0-15 399 | * 400 | * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 - zero bits. 401 | */ 402 | const uint32_t w = (uint32_t) h << 16; 403 | /* 404 | * Extract the sign of the input number into the high bit of the 32-bit word: 405 | * 406 | * +---+----------------------------------+ 407 | * | S |0000000 00000000 00000000 00000000| 408 | * +---+----------------------------------+ 409 | * Bits 31 0-31 410 | */ 411 | const uint32_t sign = w & UINT32_C(0x80000000); 412 | /* 413 | * Extract mantissa and biased exponent of the input number into the high bits of the 32-bit word: 414 | * 415 | * +-----+------------+---------------------+ 416 | * |EEEEE|MM MMMM MMMM|0 0000 0000 0000 0000| 417 | * +-----+------------+---------------------+ 418 | * Bits 27-31 17-26 0-16 419 | */ 420 | const uint32_t two_w = w + w; 421 | 422 | /* 423 | * Shift mantissa and exponent into bits 23-28 and bits 13-22 so they become mantissa and exponent 424 | * of a single-precision floating-point number: 425 | * 426 | * S|Exponent | Mantissa 427 | * +-+---+-----+------------+----------------+ 428 | * |0|000|EEEEE|MM MMMM MMMM|0 0000 0000 0000| 429 | * +-+---+-----+------------+----------------+ 430 | * Bits | 23-31 | 0-22 431 | * 432 | * Next, the exponent is adjusted for the difference in exponent bias between single-precision and half-precision 433 | * formats (0x7F - 0xF = 0x70). This operation never overflows or generates non-finite values, as the largest 434 | * half-precision exponent is 0x1F and after the adjustment is can not exceed 0x8F < 0xFE (largest single-precision 435 | * exponent for non-finite values). 436 | * 437 | * Note that this operation does not handle denormal inputs (where biased exponent == 0). However, they also do not 438 | * operate on denormal inputs, and do not produce denormal results. 439 | */ 440 | const uint32_t exp_offset = UINT32_C(0x70) << 23; 441 | const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset); 442 | 443 | /* 444 | * Convert denormalized half-precision inputs into single-precision results (always normalized). 445 | * Zero inputs are also handled here. 446 | * 447 | * In a denormalized number the biased exponent is zero, and mantissa has on-zero bits. 448 | * First, we shift mantissa into bits 0-9 of the 32-bit word. 449 | * 450 | * zeros | mantissa 451 | * +---------------------------+------------+ 452 | * |0000 0000 0000 0000 0000 00|MM MMMM MMMM| 453 | * +---------------------------+------------+ 454 | * Bits 10-31 0-9 455 | * 456 | * Now, remember that denormalized half-precision numbers are represented as: 457 | * FP16 = mantissa * 2**(-24). 458 | * The trick is to construct a normalized single-precision number with the same mantissa and thehalf-precision input 459 | * and with an exponent which would scale the corresponding mantissa bits to 2**(-24). 460 | * A normalized single-precision floating-point number is represented as: 461 | * FP32 = (1 + mantissa * 2**(-23)) * 2**(exponent - 127) 462 | * Therefore, when the biased exponent is 126, a unit change in the mantissa of the input denormalized half-precision 463 | * number causes a change of the constructud single-precision number by 2**(-24), i.e. the same ammount. 464 | * 465 | * The last step is to adjust the bias of the constructed single-precision number. When the input half-precision number 466 | * is zero, the constructed single-precision number has the value of 467 | * FP32 = 1 * 2**(126 - 127) = 2**(-1) = 0.5 468 | * Therefore, we need to subtract 0.5 from the constructed single-precision number to get the numerical equivalent of 469 | * the input half-precision number. 470 | */ 471 | const uint32_t magic_mask = UINT32_C(126) << 23; 472 | const float magic_bias = 0.5f; 473 | const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias; 474 | 475 | /* 476 | * - Choose either results of conversion of input as a normalized number, or as a denormalized number, depending on the 477 | * input exponent. The variable two_w contains input exponent in bits 27-31, therefore if its smaller than 2**27, the 478 | * input is either a denormal number, or zero. 479 | * - Combine the result of conversion of exponent and mantissa with the sign of the input number. 480 | */ 481 | const uint32_t denormalized_cutoff = UINT32_C(1) << 27; 482 | const uint32_t result = sign | 483 | (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value)); 484 | return fp32_from_bits(result); 485 | } 486 | 487 | /* 488 | * Convert a 32-bit floating-point number in IEEE single-precision format to a 16-bit floating-point number in 489 | * ARM alternative half-precision format, in bit representation. 490 | * 491 | * @note The implementation relies on IEEE-like (no assumption about rounding mode and no operations on denormals) 492 | * floating-point operations and bitcasts between integer and floating-point variables. 493 | */ 494 | static inline uint16_t fp16_alt_from_fp32_value(float f) { 495 | const uint32_t w = fp32_to_bits(f); 496 | const uint32_t sign = w & UINT32_C(0x80000000); 497 | const uint32_t shl1_w = w + w; 498 | 499 | const uint32_t shl1_max_fp16_fp32 = UINT32_C(0x8FFFC000); 500 | const uint32_t shl1_base = shl1_w > shl1_max_fp16_fp32 ? shl1_max_fp16_fp32 : shl1_w; 501 | uint32_t shl1_bias = shl1_base & UINT32_C(0xFF000000); 502 | const uint32_t exp_difference = 23 - 10; 503 | const uint32_t shl1_bias_min = (127 - 1 - exp_difference) << 24; 504 | if (shl1_bias < shl1_bias_min) { 505 | shl1_bias = shl1_bias_min; 506 | } 507 | 508 | const float bias = fp32_from_bits((shl1_bias >> 1) + ((exp_difference + 2) << 23)); 509 | const float base = fp32_from_bits((shl1_base >> 1) + (2 << 23)) + bias; 510 | 511 | const uint32_t exp_f = fp32_to_bits(base) >> 13; 512 | return (sign >> 16) | ((exp_f & UINT32_C(0x00007C00)) + (fp32_to_bits(base) & UINT32_C(0x00000FFF))); 513 | } 514 | 515 | #endif /* FP16_FP16_H */ 516 | -------------------------------------------------------------------------------- /src/distance-cpu.c: -------------------------------------------------------------------------------- 1 | // 2 | // distance-cpu.c 3 | // sqlitevector 4 | // 5 | // Created by Marco Bambini on 20/06/25. 6 | // 7 | 8 | #include "distance-cpu.h" 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "distance-neon.h" 17 | #include "distance-sse2.h" 18 | #include "distance-avx2.h" 19 | 20 | char *distance_backend_name = "CPU"; 21 | distance_function_t dispatch_distance_table[VECTOR_DISTANCE_MAX][VECTOR_TYPE_MAX] = {0}; 22 | 23 | #define LASSQ_UPDATE(ad_) do { \ 24 | double _ad = (ad_); \ 25 | if (_ad != 0.0) { \ 26 | if (scale < _ad) { \ 27 | double r = scale / _ad; \ 28 | ssq = 1.0 + ssq * (r * r); \ 29 | scale = _ad; \ 30 | } else { \ 31 | double r = _ad / scale; \ 32 | ssq += r * r; \ 33 | } \ 34 | } \ 35 | } while (0) 36 | 37 | // MARK: FLOAT32 - 38 | 39 | float float32_distance_l2_impl_cpu (const void *v1, const void *v2, int n, bool use_sqrt) { 40 | const float *a = (const float *)v1; 41 | const float *b = (const float *)v2; 42 | 43 | float sum_sq = 0.0f; 44 | int i = 0; 45 | 46 | if (n >= 4) { 47 | // unroll the loop 4 times 48 | for (; i <= n - 4; i += 4) { 49 | float d0 = a[i] - b[i]; 50 | float d1 = a[i+1] - b[i+1]; 51 | float d2 = a[i+2] - b[i+2]; 52 | float d3 = a[i+3] - b[i+3]; 53 | sum_sq += d0*d0 + d1*d1 + d2*d2 + d3*d3; 54 | } 55 | } 56 | 57 | // tail loop 58 | for (; i < n; i++) { 59 | float d = a[i] - b[i]; 60 | sum_sq += d * d; 61 | } 62 | 63 | return use_sqrt ? sqrtf(sum_sq) : sum_sq; 64 | } 65 | 66 | float float32_distance_l2_cpu (const void *v1, const void *v2, int n) { 67 | return float32_distance_l2_impl_cpu(v1, v2, n, true); 68 | } 69 | 70 | float float32_distance_l2_squared_cpu (const void *v1, const void *v2, int n) { 71 | return float32_distance_l2_impl_cpu(v1, v2, n, false); 72 | } 73 | 74 | float float32_distance_cosine_cpu (const void *v1, const void *v2, int n) { 75 | const float *a = (const float *)v1; 76 | const float *b = (const float *)v2; 77 | 78 | float dot = 0.0f; 79 | float norm_x = 0.0f; 80 | float norm_y = 0.0f; 81 | int i = 0; 82 | 83 | // unroll the loop 4 times 84 | for (; i <= n - 4; i += 4) { 85 | float x0 = a[i], y0 = b[i]; 86 | float x1 = a[i + 1], y1 = b[i + 1]; 87 | float x2 = a[i + 2], y2 = b[i + 2]; 88 | float x3 = a[i + 3], y3 = b[i + 3]; 89 | 90 | dot += x0*y0 + x1*y1 + x2*y2 + x3*y3; 91 | norm_x += x0*x0 + x1*x1 + x2*x2 + x3*x3; 92 | norm_y += y0*y0 + y1*y1 + y2*y2 + y3*y3; 93 | } 94 | 95 | // tail loop 96 | for (; i < n; i++) { 97 | float x = a[i]; 98 | float y = b[i]; 99 | dot += x * y; 100 | norm_x += x * x; 101 | norm_y += y * y; 102 | } 103 | 104 | // max distance if one vector is zero 105 | if (norm_x == 0.0f || norm_y == 0.0f) { 106 | return 1.0f; 107 | } 108 | 109 | return 1.0f - (dot / (sqrtf(norm_x) * sqrtf(norm_y))); 110 | } 111 | 112 | float float32_distance_dot_cpu (const void *v1, const void *v2, int n) { 113 | const float *a = (const float *)v1; 114 | const float *b = (const float *)v2; 115 | 116 | float dot = 0.0f; 117 | int i = 0; 118 | 119 | // unroll the loop 4 times 120 | for (; i <= n - 4; i += 4) { 121 | float x0 = a[i], y0 = b[i]; 122 | float x1 = a[i + 1], y1 = b[i + 1]; 123 | float x2 = a[i + 2], y2 = b[i + 2]; 124 | float x3 = a[i + 3], y3 = b[i + 3]; 125 | dot += x0*y0 + x1*y1 + x2*y2 + x3*y3; 126 | } 127 | 128 | // tail loop 129 | for (; i < n; i++) { 130 | float x = a[i]; 131 | float y = b[i]; 132 | dot += x * y; 133 | } 134 | 135 | return -dot; 136 | } 137 | 138 | float float32_distance_l1_cpu (const void *v1, const void *v2, int n) { 139 | const float *a = (const float *)v1; 140 | const float *b = (const float *)v2; 141 | 142 | float sum = 0.0f; 143 | int i = 0; 144 | 145 | // unroll the loop 4 times 146 | for (; i <= n - 4; i += 4) { 147 | sum += fabsf(a[i] - b[i]); 148 | sum += fabsf(a[i + 1] - b[i + 1]); 149 | sum += fabsf(a[i + 2] - b[i + 2]); 150 | sum += fabsf(a[i + 3] - b[i + 3]); 151 | } 152 | 153 | // tail loop 154 | for (; i < n; ++i) { 155 | sum += fabsf(a[i] - b[i]); 156 | } 157 | 158 | return sum; 159 | } 160 | 161 | // MARK: - BFLOAT16 - 162 | 163 | // Overflow/underflow-safe L2 using LASSQ, unrolled by 4 164 | static inline float bfloat16_distance_l2_impl_cpu (const void *v1, const void *v2, int n, bool use_sqrt) { 165 | const uint16_t *a = (const uint16_t *)v1; 166 | const uint16_t *b = (const uint16_t *)v2; 167 | 168 | double scale = 0.0; 169 | double ssq = 1.0; 170 | int i = 0; 171 | 172 | // unrolled main loop (x4) 173 | for (; i <= n - 4; i += 4) { 174 | float x0 = bfloat16_to_float32(a[i ]), y0 = bfloat16_to_float32(b[i ]); 175 | float x1 = bfloat16_to_float32(a[i + 1]), y1 = bfloat16_to_float32(b[i + 1]); 176 | float x2 = bfloat16_to_float32(a[i + 2]), y2 = bfloat16_to_float32(b[i + 2]); 177 | float x3 = bfloat16_to_float32(a[i + 3]), y3 = bfloat16_to_float32(b[i + 3]); 178 | 179 | float d0f = x0 - y0, d1f = x1 - y1, d2f = x2 - y2, d3f = x3 - y3; 180 | 181 | // If any difference is NaN, ignore that lane (treat contribution as 0) 182 | if (isinf(d0f)) return INFINITY; if (!isnan(d0f)) LASSQ_UPDATE(fabs((double)d0f)); 183 | if (isinf(d1f)) return INFINITY; if (!isnan(d1f)) LASSQ_UPDATE(fabs((double)d1f)); 184 | if (isinf(d2f)) return INFINITY; if (!isnan(d2f)) LASSQ_UPDATE(fabs((double)d2f)); 185 | if (isinf(d3f)) return INFINITY; if (!isnan(d3f)) LASSQ_UPDATE(fabs((double)d3f)); 186 | } 187 | 188 | for (; i < n; ++i) { 189 | float d = bfloat16_to_float32(a[i]) - bfloat16_to_float32(b[i]); 190 | if (isinf(d)) return INFINITY; 191 | if (!isnan(d)) LASSQ_UPDATE(fabs((double)d)); 192 | } 193 | 194 | double sum_sq = (scale == 0.0) ? 0.0 : (scale * scale * ssq); 195 | double out = use_sqrt ? sqrt(sum_sq) : sum_sq; 196 | return (float)out; 197 | } 198 | 199 | float bfloat16_distance_l2_cpu (const void *v1, const void *v2, int n) { 200 | return bfloat16_distance_l2_impl_cpu(v1, v2, n, true); 201 | } 202 | 203 | float bfloat16_distance_l2_squared_cpu (const void *v1, const void *v2, int n) { 204 | return bfloat16_distance_l2_impl_cpu(v1, v2, n, false); 205 | } 206 | 207 | float bfloat16_distance_cosine_cpu(const void *v1, const void *v2, int n) { 208 | const uint16_t *a = (const uint16_t *)v1; 209 | const uint16_t *b = (const uint16_t *)v2; 210 | 211 | float dot = 0.0f, norm_x = 0.0f, norm_y = 0.0f; 212 | int i = 0; 213 | 214 | // unroll the loop 4 times 215 | for (; i <= n - 4; i += 4) { 216 | float x0 = bfloat16_to_float32(a[i ]), y0 = bfloat16_to_float32(b[i ]); 217 | float x1 = bfloat16_to_float32(a[i + 1]), y1 = bfloat16_to_float32(b[i + 1]); 218 | float x2 = bfloat16_to_float32(a[i + 2]), y2 = bfloat16_to_float32(b[i + 2]); 219 | float x3 = bfloat16_to_float32(a[i + 3]), y3 = bfloat16_to_float32(b[i + 3]); 220 | 221 | // accumulate (fmaf may fuse on capable CPUs) 222 | dot = fmaf(x0, y0, dot); 223 | dot = fmaf(x1, y1, dot); 224 | dot = fmaf(x2, y2, dot); 225 | dot = fmaf(x3, y3, dot); 226 | 227 | norm_x = fmaf(x0, x0, norm_x); 228 | norm_x = fmaf(x1, x1, norm_x); 229 | norm_x = fmaf(x2, x2, norm_x); 230 | norm_x = fmaf(x3, x3, norm_x); 231 | 232 | norm_y = fmaf(y0, y0, norm_y); 233 | norm_y = fmaf(y1, y1, norm_y); 234 | norm_y = fmaf(y2, y2, norm_y); 235 | norm_y = fmaf(y3, y3, norm_y); 236 | } 237 | 238 | // tail loop 239 | for (; i < n; ++i) { 240 | float x = bfloat16_to_float32(a[i]); 241 | float y = bfloat16_to_float32(b[i]); 242 | dot = fmaf(x, y, dot); 243 | norm_x = fmaf(x, x, norm_x); 244 | norm_y = fmaf(y, y, norm_y); 245 | } 246 | 247 | // max distance if one vector is zero 248 | if (norm_x == 0.0f || norm_y == 0.0f) { 249 | return 1.0f; 250 | } 251 | 252 | return 1.0f - (dot / (sqrtf(norm_x) * sqrtf(norm_y))); 253 | } 254 | 255 | float bfloat16_distance_dot_cpu (const void *v1, const void *v2, int n) { 256 | const uint16_t *a = (const uint16_t *)v1; 257 | const uint16_t *b = (const uint16_t *)v2; 258 | 259 | float dot = 0.0f; 260 | int i = 0; 261 | 262 | // unroll the loop 4 times 263 | for (; i <= n - 4; i += 4) { 264 | float x0 = bfloat16_to_float32(a[i ]), y0 = bfloat16_to_float32(b[i ]); 265 | float x1 = bfloat16_to_float32(a[i + 1]), y1 = bfloat16_to_float32(b[i + 1]); 266 | float x2 = bfloat16_to_float32(a[i + 2]), y2 = bfloat16_to_float32(b[i + 2]); 267 | float x3 = bfloat16_to_float32(a[i + 3]), y3 = bfloat16_to_float32(b[i + 3]); 268 | 269 | // fmaf often maps to a fused multiply-add, improving precision/speed 270 | dot = fmaf(x0, y0, dot); 271 | dot = fmaf(x1, y1, dot); 272 | dot = fmaf(x2, y2, dot); 273 | dot = fmaf(x3, y3, dot); 274 | } 275 | 276 | // tail loop 277 | for (; i < n; ++i) { 278 | float x = bfloat16_to_float32(a[i]); 279 | float y = bfloat16_to_float32(b[i]); 280 | dot = fmaf(x, y, dot); 281 | } 282 | 283 | return -dot; 284 | } 285 | 286 | float bfloat16_distance_l1_cpu (const void *v1, const void *v2, int n) { 287 | const uint16_t *a = (const uint16_t *)v1; 288 | const uint16_t *b = (const uint16_t *)v2; 289 | 290 | float sum = 0.0f; 291 | int i = 0; 292 | 293 | // unroll the loop 4 times 294 | for (; i <= n - 4; i += 4) { 295 | float a0 = bfloat16_to_float32(a[i ]), b0 = bfloat16_to_float32(b[i ]); 296 | float a1 = bfloat16_to_float32(a[i + 1]), b1 = bfloat16_to_float32(b[i + 1]); 297 | float a2 = bfloat16_to_float32(a[i + 2]), b2 = bfloat16_to_float32(b[i + 2]); 298 | float a3 = bfloat16_to_float32(a[i + 3]), b3 = bfloat16_to_float32(b[i + 3]); 299 | 300 | sum += fabsf(a0 - b0); 301 | sum += fabsf(a1 - b1); 302 | sum += fabsf(a2 - b2); 303 | sum += fabsf(a3 - b3); 304 | } 305 | 306 | // tail loop 307 | for (; i < n; ++i) { 308 | float da = bfloat16_to_float32(a[i]); 309 | float db = bfloat16_to_float32(b[i]); 310 | sum += fabsf(da - db); 311 | } 312 | 313 | return sum; 314 | } 315 | 316 | // MARK: - FLOAT16 - 317 | 318 | static inline float float16_distance_l2_impl_cpu (const void *v1, const void *v2, int n, bool use_sqrt) { 319 | const uint16_t *a = (const uint16_t *)v1; /* float16 bits */ 320 | const uint16_t *b = (const uint16_t *)v2; 321 | 322 | double scale = 0.0; 323 | double ssq = 1.0; 324 | int i = 0; 325 | 326 | /* main loop, unrolled by 4 */ 327 | for (; i <= n - 4; i += 4) { 328 | uint16_t a0=a[i], a1=a[i+1], a2=a[i+2], a3=a[i+3]; 329 | uint16_t b0=b[i], b1=b[i+1], b2=b[i+2], b3=b[i+3]; 330 | 331 | /* If any pair involves an infinity not matched with same-signed infinity → +Inf */ 332 | if ((f16_is_inf(a0)||f16_is_inf(b0)) && !(f16_is_inf(a0)&&f16_is_inf(b0)&&f16_sign(a0)==f16_sign(b0))) return INFINITY; 333 | if ((f16_is_inf(a1)||f16_is_inf(b1)) && !(f16_is_inf(a1)&&f16_is_inf(b1)&&f16_sign(a1)==f16_sign(b1))) return INFINITY; 334 | if ((f16_is_inf(a2)||f16_is_inf(b2)) && !(f16_is_inf(a2)&&f16_is_inf(b2)&&f16_sign(a2)==f16_sign(b2))) return INFINITY; 335 | if ((f16_is_inf(a3)||f16_is_inf(b3)) && !(f16_is_inf(a3)&&f16_is_inf(b3)&&f16_sign(a3)==f16_sign(b3))) return INFINITY; 336 | 337 | /* NaN lanes contribute 0 */ 338 | if (!f16_is_nan(a0) && !f16_is_nan(b0)) { double d = (double)float16_to_float32(a0) - (double)float16_to_float32(b0); LASSQ_UPDATE(fabs(d)); } 339 | if (!f16_is_nan(a1) && !f16_is_nan(b1)) { double d = (double)float16_to_float32(a1) - (double)float16_to_float32(b1); LASSQ_UPDATE(fabs(d)); } 340 | if (!f16_is_nan(a2) && !f16_is_nan(b2)) { double d = (double)float16_to_float32(a2) - (double)float16_to_float32(b2); LASSQ_UPDATE(fabs(d)); } 341 | if (!f16_is_nan(a3) && !f16_is_nan(b3)) { double d = (double)float16_to_float32(a3) - (double)float16_to_float32(b3); LASSQ_UPDATE(fabs(d)); } 342 | } 343 | 344 | /* tail */ 345 | for (; i < n; ++i) { 346 | uint16_t ai=a[i], bi=b[i]; 347 | if ((f16_is_inf(ai)||f16_is_inf(bi)) && !(f16_is_inf(ai)&&f16_is_inf(bi)&&f16_sign(ai)==f16_sign(bi))) return INFINITY; 348 | if (f16_is_nan(ai) || f16_is_nan(bi)) continue; 349 | double d = (double)float16_to_float32(ai) - (double)float16_to_float32(bi); 350 | LASSQ_UPDATE(fabs(d)); 351 | } 352 | 353 | double sum_sq = (scale == 0.0) ? 0.0 : (scale * scale * ssq); 354 | double out = use_sqrt ? sqrt(sum_sq) : sum_sq; 355 | return (float)out; 356 | } 357 | 358 | float float16_distance_l2_cpu (const void *v1, const void *v2, int n) { 359 | return float16_distance_l2_impl_cpu(v1, v2, n, true); 360 | } 361 | 362 | float float16_distance_l2_squared_cpu (const void *v1, const void *v2, int n) { 363 | return float16_distance_l2_impl_cpu(v1, v2, n, false); 364 | } 365 | 366 | float float16_distance_l1_cpu (const void *v1, const void *v2, int n) { 367 | const uint16_t *a = (const uint16_t *)v1; 368 | const uint16_t *b = (const uint16_t *)v2; 369 | 370 | double sum = 0.0; 371 | int i = 0; 372 | 373 | for (; i <= n - 4; i += 4) { 374 | uint16_t a0=a[i], a1=a[i+1], a2=a[i+2], a3=a[i+3]; 375 | uint16_t b0=b[i], b1=b[i+1], b2=b[i+2], b3=b[i+3]; 376 | 377 | /* Inf differences yield +Inf */ 378 | if ((f16_is_inf(a0)||f16_is_inf(b0)) && !(f16_is_inf(a0)&&f16_is_inf(b0)&&f16_sign(a0)==f16_sign(b0))) return INFINITY; 379 | if ((f16_is_inf(a1)||f16_is_inf(b1)) && !(f16_is_inf(a1)&&f16_is_inf(b1)&&f16_sign(a1)==f16_sign(b1))) return INFINITY; 380 | if ((f16_is_inf(a2)||f16_is_inf(b2)) && !(f16_is_inf(a2)&&f16_is_inf(b2)&&f16_sign(a2)==f16_sign(b2))) return INFINITY; 381 | if ((f16_is_inf(a3)||f16_is_inf(b3)) && !(f16_is_inf(a3)&&f16_is_inf(b3)&&f16_sign(a3)==f16_sign(b3))) return INFINITY; 382 | 383 | if (!f16_is_nan(a0) && !f16_is_nan(b0)) sum += fabs((double)float16_to_float32(a0) - (double)float16_to_float32(b0)); 384 | if (!f16_is_nan(a1) && !f16_is_nan(b1)) sum += fabs((double)float16_to_float32(a1) - (double)float16_to_float32(b1)); 385 | if (!f16_is_nan(a2) && !f16_is_nan(b2)) sum += fabs((double)float16_to_float32(a2) - (double)float16_to_float32(b2)); 386 | if (!f16_is_nan(a3) && !f16_is_nan(b3)) sum += fabs((double)float16_to_float32(a3) - (double)float16_to_float32(b3)); 387 | } 388 | 389 | for (; i < n; ++i) { 390 | uint16_t ai=a[i], bi=b[i]; 391 | if ((f16_is_inf(ai)||f16_is_inf(bi)) && !(f16_is_inf(ai)&&f16_is_inf(bi)&&f16_sign(ai)==f16_sign(bi))) return INFINITY; 392 | if (f16_is_nan(ai) || f16_is_nan(bi)) continue; 393 | sum += fabs((double)float16_to_float32(ai) - (double)float16_to_float32(bi)); 394 | } 395 | 396 | return (float)sum; 397 | } 398 | 399 | float float16_distance_dot_cpu (const void *v1, const void *v2, int n) { 400 | const uint16_t *a = (const uint16_t *)v1; 401 | const uint16_t *b = (const uint16_t *)v2; 402 | 403 | double dot = 0.0; 404 | int i = 0; 405 | 406 | for (; i <= n - 4; i += 4) { 407 | float x0 = float16_to_float32(a[i ]), y0 = float16_to_float32(b[i ]); 408 | float x1 = float16_to_float32(a[i + 1]), y1 = float16_to_float32(b[i + 1]); 409 | float x2 = float16_to_float32(a[i + 2]), y2 = float16_to_float32(b[i + 2]); 410 | float x3 = float16_to_float32(a[i + 3]), y3 = float16_to_float32(b[i + 3]); 411 | 412 | /* Skip NaN lanes */ 413 | if (!isnan(x0) && !isnan(y0)) { double p = (double)x0 * (double)y0; if (isinf(p)) return (p>0)? -INFINITY : INFINITY; dot += p; } 414 | if (!isnan(x1) && !isnan(y1)) { double p = (double)x1 * (double)y1; if (isinf(p)) return (p>0)? -INFINITY : INFINITY; dot += p; } 415 | if (!isnan(x2) && !isnan(y2)) { double p = (double)x2 * (double)y2; if (isinf(p)) return (p>0)? -INFINITY : INFINITY; dot += p; } 416 | if (!isnan(x3) && !isnan(y3)) { double p = (double)x3 * (double)y3; if (isinf(p)) return (p>0)? -INFINITY : INFINITY; dot += p; } 417 | } 418 | 419 | for (; i < n; ++i) { 420 | float x = float16_to_float32(a[i]); 421 | float y = float16_to_float32(b[i]); 422 | if (isnan(x) || isnan(y)) continue; 423 | double p = (double)x * (double)y; 424 | if (isinf(p)) return (p>0)? -INFINITY : INFINITY; 425 | dot += p; 426 | } 427 | 428 | return (float)(-dot); 429 | } 430 | 431 | float float16_distance_cosine_cpu (const void *v1, const void *v2, int n) { 432 | const uint16_t *a = (const uint16_t *)v1; 433 | const uint16_t *b = (const uint16_t *)v2; 434 | 435 | double dot = 0.0, nx = 0.0, ny = 0.0; 436 | int i = 0; 437 | 438 | for (; i <= n - 4; i += 4) { 439 | float x0 = float16_to_float32(a[i ]), y0 = float16_to_float32(b[i ]); 440 | float x1 = float16_to_float32(a[i + 1]), y1 = float16_to_float32(b[i + 1]); 441 | float x2 = float16_to_float32(a[i + 2]), y2 = float16_to_float32(b[i + 2]); 442 | float x3 = float16_to_float32(a[i + 3]), y3 = float16_to_float32(b[i + 3]); 443 | 444 | if (!isnan(x0) && !isnan(y0)) { double xd=x0, yd=y0; if (isinf(xd)||isinf(yd)) return 1.0f; dot += xd*yd; nx += xd*xd; ny += yd*yd; } 445 | if (!isnan(x1) && !isnan(y1)) { double xd=x1, yd=y1; if (isinf(xd)||isinf(yd)) return 1.0f; dot += xd*yd; nx += xd*xd; ny += yd*yd; } 446 | if (!isnan(x2) && !isnan(y2)) { double xd=x2, yd=y2; if (isinf(xd)||isinf(yd)) return 1.0f; dot += xd*yd; nx += xd*xd; ny += yd*yd; } 447 | if (!isnan(x3) && !isnan(y3)) { double xd=x3, yd=y3; if (isinf(xd)||isinf(yd)) return 1.0f; dot += xd*yd; nx += xd*xd; ny += yd*yd; } 448 | } 449 | 450 | for (; i < n; ++i) { 451 | float x = float16_to_float32(a[i]); 452 | float y = float16_to_float32(b[i]); 453 | if (isnan(x) || isnan(y)) continue; 454 | if (isinf((double)x) || isinf((double)y)) return 1.0f; 455 | double xd=x, yd=y; 456 | dot += xd*yd; nx += xd*xd; ny += yd*yd; 457 | } 458 | 459 | double denom = sqrt(nx) * sqrt(ny); 460 | if (!(denom > 0.0) || !isfinite(denom) || !isfinite(dot)) return 1.0f; 461 | 462 | double cosv = dot / denom; 463 | if (cosv > 1.0) cosv = 1.0; 464 | if (cosv < -1.0) cosv = -1.0; 465 | return (float)(1.0 - cosv); 466 | } 467 | 468 | // MARK: - UINT8 - 469 | 470 | static inline float uint8_distance_l2_impl_cpu (const void *v1, const void *v2, int n, bool use_sqrt) { 471 | const uint8_t *a = (const uint8_t *)v1; 472 | const uint8_t *b = (const uint8_t *)v2; 473 | 474 | float sum = 0.0f; 475 | int i = 0; 476 | 477 | // unrolled loop 478 | for (; i <= n - 4; i += 4) { 479 | int d0 = (int)a[i + 0] - (int)b[i + 0]; 480 | int d1 = (int)a[i + 1] - (int)b[i + 1]; 481 | int d2 = (int)a[i + 2] - (int)b[i + 2]; 482 | int d3 = (int)a[i + 3] - (int)b[i + 3]; 483 | 484 | sum += (float)(d0 * d0 + d1 * d1 + d2 * d2 + d3 * d3); 485 | } 486 | 487 | // tail loop 488 | for (; i < n; ++i) { 489 | int d = (int)a[i] - (int)b[i]; 490 | sum += (float)(d * d); 491 | } 492 | 493 | return use_sqrt ? sqrtf(sum) : sum; 494 | } 495 | 496 | float uint8_distance_l2_cpu (const void *v1, const void *v2, int n) { 497 | return uint8_distance_l2_impl_cpu(v1, v2, n, true); 498 | } 499 | 500 | float uint8_distance_l2_squared_cpu (const void *v1, const void *v2, int n) { 501 | return uint8_distance_l2_impl_cpu(v1, v2, n, false); 502 | } 503 | 504 | float uint8_distance_cosine_cpu (const void *v1, const void *v2, int n) { 505 | const uint8_t *a = (const uint8_t *)v1; 506 | const uint8_t *b = (const uint8_t *)v2; 507 | 508 | uint32_t dot = 0; 509 | uint32_t norm_a2 = 0; 510 | uint32_t norm_b2 = 0; 511 | 512 | int i = 0; 513 | for (; i <= n - 4; i += 4) { 514 | uint32_t a0 = a[i + 0], b0 = b[i + 0]; 515 | uint32_t a1 = a[i + 1], b1 = b[i + 1]; 516 | uint32_t a2 = a[i + 2], b2 = b[i + 2]; 517 | uint32_t a3 = a[i + 3], b3 = b[i + 3]; 518 | 519 | dot += a0 * b0 + a1 * b1 + a2 * b2 + a3 * b3; 520 | norm_a2 += a0 * a0 + a1 * a1 + a2 * a2 + a3 * a3; 521 | norm_b2 += b0 * b0 + b1 * b1 + b2 * b2 + b3 * b3; 522 | } 523 | 524 | // tail loop 525 | for (; i < n; ++i) { 526 | uint32_t ai = a[i]; 527 | uint32_t bi = b[i]; 528 | dot += ai * bi; 529 | norm_a2 += ai * ai; 530 | norm_b2 += bi * bi; 531 | } 532 | 533 | if (norm_a2 == 0 || norm_b2 == 0) { 534 | return 1.0f; 535 | } 536 | 537 | float cosine_similarity = dot / (sqrtf((float)norm_a2) * sqrtf((float)norm_b2)); 538 | return 1.0f - cosine_similarity; 539 | } 540 | 541 | float uint8_distance_dot_cpu (const void *v1, const void *v2, int n) { 542 | const uint8_t *a = (const uint8_t *)v1; 543 | const uint8_t *b = (const uint8_t *)v2; 544 | float dot = 0.0f; 545 | 546 | int i = 0; 547 | for (; i <= n - 4; i += 4) { 548 | dot += (float)(a[i + 0]) * b[i + 0]; 549 | dot += (float)(a[i + 1]) * b[i + 1]; 550 | dot += (float)(a[i + 2]) * b[i + 2]; 551 | dot += (float)(a[i + 3]) * b[i + 3]; 552 | } 553 | for (; i < n; ++i) { 554 | dot += (float)(a[i]) * b[i]; 555 | } 556 | 557 | return -dot; // dot distance = negative dot product 558 | } 559 | 560 | float uint8_distance_l1_cpu (const void *v1, const void *v2, int n) { 561 | const uint8_t *a = (const uint8_t *)v1; 562 | const uint8_t *b = (const uint8_t *)v2; 563 | float sum = 0.0f; 564 | 565 | int i = 0; 566 | for (; i <= n - 4; i += 4) { 567 | sum += fabsf((float)a[i + 0] - (float)b[i + 0]); 568 | sum += fabsf((float)a[i + 1] - (float)b[i + 1]); 569 | sum += fabsf((float)a[i + 2] - (float)b[i + 2]); 570 | sum += fabsf((float)a[i + 3] - (float)b[i + 3]); 571 | } 572 | 573 | for (; i < n; ++i) { 574 | sum += fabsf((float)a[i] - (float)b[i]); 575 | } 576 | 577 | return sum; 578 | } 579 | 580 | // MARK: - INT8 - 581 | 582 | float int8_distance_l2_impl_cpu (const void *v1, const void *v2, int n, bool use_sqrt) { 583 | const int8_t *a = (const int8_t *)v1; 584 | const int8_t *b = (const int8_t *)v2; 585 | 586 | float sum = 0.0f; 587 | int i = 0; 588 | 589 | // unrolled loop 590 | for (; i <= n - 4; i += 4) { 591 | int d0 = (int)a[i + 0] - (int)b[i + 0]; 592 | int d1 = (int)a[i + 1] - (int)b[i + 1]; 593 | int d2 = (int)a[i + 2] - (int)b[i + 2]; 594 | int d3 = (int)a[i + 3] - (int)b[i + 3]; 595 | 596 | sum += (float)(d0 * d0 + d1 * d1 + d2 * d2 + d3 * d3); 597 | } 598 | 599 | // tail loop 600 | for (; i < n; ++i) { 601 | int d = (int)a[i] - (int)b[i]; 602 | sum += (float)(d * d); 603 | } 604 | 605 | return use_sqrt ? sqrtf(sum) : sum; 606 | } 607 | 608 | float int8_distance_l2_cpu (const void *v1, const void *v2, int n) { 609 | return int8_distance_l2_impl_cpu(v1, v2, n, true); 610 | } 611 | 612 | float int8_distance_l2_squared_cpu (const void *v1, const void *v2, int n) { 613 | return int8_distance_l2_impl_cpu(v1, v2, n, false); 614 | } 615 | 616 | float int8_distance_cosine_cpu (const void *v1, const void *v2, int n) { 617 | const int8_t *a = (const int8_t *)v1; 618 | const int8_t *b = (const int8_t *)v2; 619 | 620 | int32_t dot = 0; 621 | int32_t norm_a2 = 0; 622 | int32_t norm_b2 = 0; 623 | 624 | int i = 0; 625 | for (; i <= n - 4; i += 4) { 626 | int32_t a0 = a[i + 0], b0 = b[i + 0]; 627 | int32_t a1 = a[i + 1], b1 = b[i + 1]; 628 | int32_t a2 = a[i + 2], b2 = b[i + 2]; 629 | int32_t a3 = a[i + 3], b3 = b[i + 3]; 630 | 631 | dot += a0 * b0 + a1 * b1 + a2 * b2 + a3 * b3; 632 | norm_a2 += a0 * a0 + a1 * a1 + a2 * a2 + a3 * a3; 633 | norm_b2 += b0 * b0 + b1 * b1 + b2 * b2 + b3 * b3; 634 | } 635 | 636 | // tail loop 637 | for (; i < n; ++i) { 638 | int32_t ai = a[i]; 639 | int32_t bi = b[i]; 640 | dot += ai * bi; 641 | norm_a2 += ai * ai; 642 | norm_b2 += bi * bi; 643 | } 644 | 645 | if (norm_a2 == 0 || norm_b2 == 0) { 646 | return 1.0f; 647 | } 648 | 649 | float cosine_similarity = dot / (sqrtf((float)norm_a2) * sqrtf((float)norm_b2)); 650 | return 1.0f - cosine_similarity; 651 | } 652 | 653 | float int8_distance_dot_cpu (const void *v1, const void *v2, int n) { 654 | const int8_t *a = (const int8_t *)v1; 655 | const int8_t *b = (const int8_t *)v2; 656 | 657 | float dot = 0.0f; 658 | int i = 0; 659 | 660 | for (; i <= n - 4; i += 4) { 661 | dot += (float)a[i + 0] * b[i + 0]; 662 | dot += (float)a[i + 1] * b[i + 1]; 663 | dot += (float)a[i + 2] * b[i + 2]; 664 | dot += (float)a[i + 3] * b[i + 3]; 665 | } 666 | 667 | for (; i < n; ++i) { 668 | dot += (float)a[i] * b[i]; 669 | } 670 | 671 | return -dot; 672 | } 673 | 674 | float int8_distance_l1_cpu (const void *v1, const void *v2, int n) { 675 | const int8_t *a = (const int8_t *)v1; 676 | const int8_t *b = (const int8_t *)v2; 677 | 678 | float sum = 0.0f; 679 | int i = 0; 680 | 681 | for (; i <= n - 4; i += 4) { 682 | sum += fabsf((float)a[i + 0] - (float)b[i + 0]); 683 | sum += fabsf((float)a[i + 1] - (float)b[i + 1]); 684 | sum += fabsf((float)a[i + 2] - (float)b[i + 2]); 685 | sum += fabsf((float)a[i + 3] - (float)b[i + 3]); 686 | } 687 | 688 | for (; i < n; ++i) { 689 | sum += fabsf((float)a[i] - (float)b[i]); 690 | } 691 | 692 | return sum; 693 | } 694 | 695 | // MARK: - ENTRYPOINT - 696 | 697 | #if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86) 698 | #include 699 | 700 | static void x86_cpuid(int leaf, int subleaf, int *eax, int *ebx, int *ecx, int *edx) { 701 | #if defined(_MSC_VER) 702 | int regs[4]; 703 | __cpuidex(regs, leaf, subleaf); 704 | *eax = regs[0]; *ebx = regs[1]; *ecx = regs[2]; *edx = regs[3]; 705 | #else 706 | __cpuid_count(leaf, subleaf, *eax, *ebx, *ecx, *edx); 707 | #endif 708 | } 709 | 710 | bool cpu_supports_avx2 (void) { 711 | #if FORCE_AVX2 712 | return true; 713 | #else 714 | int eax, ebx, ecx, edx; 715 | x86_cpuid(0, 0, &eax, &ebx, &ecx, &edx); 716 | if (eax < 7) return false; 717 | x86_cpuid(7, 0, &eax, &ebx, &ecx, &edx); 718 | return (ebx & (1 << 5)) != 0; // AVX2 719 | #endif 720 | } 721 | 722 | bool cpu_supports_sse2 (void) { 723 | int eax, ebx, ecx, edx; 724 | x86_cpuid(1, 0, &eax, &ebx, &ecx, &edx); 725 | return (edx & (1 << 26)) != 0; // SSE2 726 | } 727 | 728 | #else 729 | // For ARM (NEON is always present on aarch64, runtime detection rarely needed) 730 | #if defined(__aarch64__) || defined(__ARM_NEON) || defined(__ARM_NEON__) 731 | bool cpu_supports_neon (void) { 732 | return true; 733 | } 734 | #else 735 | #ifdef SQLITE_WASM_EXTRA_INIT 736 | bool cpu_supports_neon (void) { 737 | return false; 738 | } 739 | #else 740 | #include 741 | #include 742 | bool cpu_supports_neon (void) { 743 | #ifdef AT_HWCAP 744 | return (getauxval(AT_HWCAP) & HWCAP_NEON) != 0; 745 | #else 746 | return false; 747 | #endif 748 | } 749 | #endif 750 | #endif 751 | #endif 752 | 753 | // MARK: - 754 | 755 | void init_cpu_functions (void) { 756 | distance_function_t cpu_table[VECTOR_DISTANCE_MAX][VECTOR_TYPE_MAX] = { 757 | [VECTOR_DISTANCE_L2] = { 758 | [VECTOR_TYPE_F32] = float32_distance_l2_cpu, 759 | [VECTOR_TYPE_F16] = float16_distance_l2_cpu, 760 | [VECTOR_TYPE_BF16] = bfloat16_distance_l2_cpu, 761 | [VECTOR_TYPE_U8] = uint8_distance_l2_cpu, 762 | [VECTOR_TYPE_I8] = int8_distance_l2_cpu, 763 | }, 764 | [VECTOR_DISTANCE_SQUARED_L2] = { 765 | [VECTOR_TYPE_F32] = float32_distance_l2_squared_cpu, 766 | [VECTOR_TYPE_F16] = float16_distance_l2_squared_cpu, 767 | [VECTOR_TYPE_BF16] = bfloat16_distance_l2_squared_cpu, 768 | [VECTOR_TYPE_U8] = uint8_distance_l2_squared_cpu, 769 | [VECTOR_TYPE_I8] = int8_distance_l2_squared_cpu, 770 | }, 771 | [VECTOR_DISTANCE_COSINE] = { 772 | [VECTOR_TYPE_F32] = float32_distance_cosine_cpu, 773 | [VECTOR_TYPE_F16] = float16_distance_cosine_cpu, 774 | [VECTOR_TYPE_BF16] = bfloat16_distance_cosine_cpu, 775 | [VECTOR_TYPE_U8] = uint8_distance_cosine_cpu, 776 | [VECTOR_TYPE_I8] = int8_distance_cosine_cpu, 777 | }, 778 | [VECTOR_DISTANCE_DOT] = { 779 | [VECTOR_TYPE_F32] = float32_distance_dot_cpu, 780 | [VECTOR_TYPE_F16] = float16_distance_dot_cpu, 781 | [VECTOR_TYPE_BF16] = bfloat16_distance_dot_cpu, 782 | [VECTOR_TYPE_U8] = uint8_distance_dot_cpu, 783 | [VECTOR_TYPE_I8] = int8_distance_dot_cpu, 784 | }, 785 | [VECTOR_DISTANCE_L1] = { 786 | [VECTOR_TYPE_F32] = float32_distance_l1_cpu, 787 | [VECTOR_TYPE_F16] = float16_distance_l1_cpu, 788 | [VECTOR_TYPE_BF16] = bfloat16_distance_l1_cpu, 789 | [VECTOR_TYPE_U8] = uint8_distance_l1_cpu, 790 | [VECTOR_TYPE_I8] = int8_distance_l1_cpu, 791 | } 792 | }; 793 | 794 | memcpy(dispatch_distance_table, cpu_table, sizeof(cpu_table)); 795 | } 796 | 797 | void init_distance_functions (bool force_cpu) { 798 | init_cpu_functions(); 799 | if (force_cpu) return; 800 | 801 | #if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86) 802 | if (cpu_supports_avx2()) { 803 | init_distance_functions_avx2(); 804 | } else if (cpu_supports_sse2()) { 805 | init_distance_functions_sse2(); 806 | } 807 | #elif defined(__ARM_NEON) || defined(__aarch64__) 808 | if (cpu_supports_neon()) { 809 | init_distance_functions_neon(); 810 | } 811 | #endif 812 | } 813 | 814 | --------------------------------------------------------------------------------