├── .gitignore
├── README.MD
├── binary_js.png
├── binary_wasm.png
├── cover.gif
├── haming_distance_simd.wat
├── package.json
├── sc-binary-wasm.png
├── sc-binary.png
├── sc-db.png
└── src
    ├── index.d.ts
    └── index.js


/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 | dist/
3 | *.log
4 | *.env
5 | 
6 | 


--------------------------------------------------------------------------------
/README.MD:
--------------------------------------------------------------------------------
  1 | # EntityDB - Decentralized Ai Memory
  2 | 
  3 | ### Storing Vector Embeddings In The Browser wrapping indexedDB and Transformers.js
  4 | 
  5 | ![EntityDB](https://raw.githubusercontent.com/babycommando/entity-db/refs/heads/main/cover.gif)
  6 | 
  7 | ## Demo: [See EntityDB in action!](https://entity-db-landing.vercel.app/)
  8 | 
  9 | ## Overview
 10 | 
 11 | **EntityDB** is a powerful, lightweight in-browser database designed for storing and querying vectors. It integrates seamlessly with [IndexedDB](https://developer.mozilla.org/en-US/docs/Web/API/IndexedDB_API) for persistent storage and [Transformers.js](https://github.com/xenova/transformers) to generate embeddings from text, allowing you to build fast and efficient search systems with state-of-the-art models. Whether you're building search engines, recommendation systems, Ai memory or any app requiring vector similarity, EntityDB has got you covered.
 12 | 
 13 | ## Installation
 14 | 
 15 | To install **EntityDB** in your project, run:
 16 | 
 17 | ```bash
 18 | npm install @babycommando/entity-db
 19 | ```
 20 | 
 21 | ```bash
 22 | yarn add @babycommando/entity-db
 23 | ```
 24 | 
 25 | ```bash
 26 | pnpm add @babycommando/entity-db
 27 | ```
 28 | 
 29 | ```bash
 30 | bun add @babycommando/entity-db
 31 | ```
 32 | 
 33 | ## Features
 34 | 
 35 | - **In-browser**: Runs entirely in the browser using IndexedDB for local storage.
 36 | - **Seamless Integration with Transformers**: Easily generate text embeddings with Hugging Face models via Transformers.js.
 37 | - **Cosine Similarity Search**: Efficient querying based on cosine similarity between vectors.
 38 | - **Flexible**: Supports both automatic embedding generation and manual insertion of pre-computed embeddings.
 39 | - **Lightweight**: No need for a server-side component or complex setup.
 40 | 
 41 | ![EntityDB_in_action](sc-db.png)
 42 | 
 43 | ## Usage
 44 | 
 45 | ### Importing the Library
 46 | 
 47 | ```js
 48 | import { EntityDB } from "@babycommando/entity-db";
 49 | 
 50 | // Initialize the VectorDB instance
 51 | const db = new EntityDB({
 52 |   vectorPath: "db_name",
 53 |   model: "Xenova/all-MiniLM-L6-v2", // a HuggingFace embeddings model
 54 | });
 55 | ```
 56 | 
 57 | ### Inserting Data with Automatic Embedding Generation
 58 | 
 59 | You can insert data by simply passing a text field. The library will automatically generate embeddings using the specified transformer model.
 60 | 
 61 | ```js
 62 | await db.insert({
 63 |   text: "This is a sample text to embed",
 64 | });
 65 | ```
 66 | 
 67 | ### Inserting Manual Vectors
 68 | 
 69 | If you already have precomputed vectors, you can insert them directly into the database.
 70 | 
 71 | ```js
 72 | await db.insertManualVectors({
 73 |   text: "Another sample",
 74 |   embedding: [0.1, 0.2, 0.3, ...] // your precomputed embedding
 75 | });
 76 | ```
 77 | 
 78 | ### Querying (Cosine Similarity)
 79 | 
 80 | You can query the database by providing a text, and EntityDB will return the most similar results based on cosine similarity.
 81 | 
 82 | ```js
 83 | const results = await db.query("Find similar texts based on this query");
 84 | console.log(results);
 85 | ```
 86 | 
 87 | ### Querying Manual Vectors
 88 | 
 89 | If you have precomputed vectors and want to query them directly, use the queryManualVectors method.
 90 | 
 91 | ```js
 92 |   const queryVector = [0.1, 0.2, 0.3, ...]; // your precomputed query vector
 93 |   const results = await db.queryManualVectors(queryVector);
 94 |   console.log(results);
 95 | ```
 96 | 
 97 | ### Updating a Vector in the Database
 98 | 
 99 | If you need to update an existing vector in the database:
100 | 
101 | ```js
102 | await db.update("1234", {
103 |   vector: [0.4, 0.5, 0.6], // Updated vector data
104 |   metadata: { name: "Updated Item" }, // Additional updated data
105 | });
106 | ```
107 | 
108 | ### Deleting Data
109 | 
110 | You can delete a vector by its key.
111 | 
112 | ```js
113 | await db.delete(1);
114 | ```
115 | 
116 | ---
117 | 
118 | ## Experimental: Binary Vectors
119 | 
120 | While querying vectors by cosine similarity is already extremely fast, sometimes you want to go faster than light. Binary vectors are simplified versions of dense vectors where each value is turned into either a 0 or a 1 by comparing it to the middle value (median). This makes them smaller to store and faster to compare, which helps when working with a lot of data.
121 | 
122 | Note that this simplification can reduce the quality of the results because some detailed information in the original dense vector is lost. Use it for very long searches. For example, the set of vectors produced by _all-MiniLM-L6-v2_:
123 | 
124 | `[ -0.020319879055023193,  0.07605013996362686, 0.020568927749991417, ...]`
125 | after being binarized becomes:
126 | `[ 0, 1, 1, ...]`
127 | 
128 | For better JS processing, the binary vectors are packed into 64-bit integers (e.g., using BigUint64Array).
129 | Each 64-bit integer represents 64 binary values, and we use the XOR operation on 64-bit integers to find mismatched bits and then count the 1s.
130 | 
131 | ### Inserting Data and Generate Binarized Embeddings
132 | 
133 | To insert data to be vectorized and then binarized, use _insertBinary_.
134 | Note: to query over binarized vectors, use _queryBinary_ or _queryBinarySIMD_.
135 | 
136 | ```js
137 | await db.insertBinary({
138 |   text: "This is a sample text to embed and binarize",
139 | });
140 | ```
141 | 
142 | ### (Very Fast) Query Binary Embeddings Using Hamming Distance Over Native JS (64 bits at a time max)
143 | 
144 | ![EntityDB_in_action_binary_wasm](binary_js.png)
145 | 
146 | To query over binarized vectors use _queryBinary_.
147 | While cosine similarity measures the angle between two vectors in a multi-dimensional space, Hamming distance counts the number of positions where two binary vectors differ. It measures dissimilarity as a simple count of mismatches. For binarized vectors Hamming really is the tool for the job.
148 | 
149 | ```js
150 | const results = await db.queryBinary("Find similar texts based on this query");
151 | console.log(results);
152 | ```
153 | 
154 | Example of a binary hamming distance query over BigUint64Array (64 bits processed at a time using pure JS):
155 | ![EntityDB_in_action_binary](sc-binary.png)
156 | 
157 | ### (Insanely Fast) Query Binary Embeddings Using Hamming Distance Over WebAssembly SIMD (+128 bits at a time, shorter CPU cicles)
158 | 
159 | ![EntityDB_in_action_binary_wasm](binary_wasm.png)
160 | 
161 | The WebAssembly SIMD implementation processes 128 bits per iteration (via v128.xor) compared to 64 bits per iteration in the JavaScript implementation using BigUint64Array. This alone gives a theoretical 2x speedup.
162 | 
163 | However SIMD instructions execute XOR, popcount, and similar operations on multiple data lanes in parallel. This reduces the number of CPU cycles required for the same amount of work compared to sequential bitwise operations in JavaScript. SIMD in WebAssembly is likely 2x to 4x faster or more over big vectors.
164 | 
165 | Check haming_distance_simd.wat for the WASM source code. Compiled using wat2wasm.
166 | 
167 | ```js
168 | const results = await db.queryBinarySIMD(
169 |   "Find similar texts based on this query"
170 | );
171 | console.log(results);
172 | ```
173 | 
174 | Example of a binary hamming distance query over WebAssembly SIMD (+128 bits at a time, shorter CPU cicles):
175 | ![EntityDB_in_action_binary_wasm](sc-binary-wasm.png)
176 | The logs show offsets (0, 16, 32), which means the code processes 128 bits (16 bytes) at a time. Since the total vector is 384 bits, it takes 3 steps (384 ÷ 128 = 3), confirming 128-bit SIMD processing.
177 | 
178 | ---
179 | 
180 | #### For Nextjs
181 | 
182 | If you're using Next.js, you may need to configure Webpack to work with Transformers.js. Add the following to your next.config.js file:
183 | 
184 | ```js
185 |   webpack: (config) => {
186 |     // Override the default webpack configuration
187 |     config.resolve.alias = {
188 |       ...config.resolve.alias,
189 |       "onnxruntime-node$": false, // Disable onnxruntime-node for browser environments
190 |       "sharp$": false, // optional - Disable sharp package (used by some image processing packages)
191 |     };
192 | 
193 |     return config;
194 |   },
195 | ```
196 | 
197 | ## Contributing
198 | 
199 | Feel free to fork the repository, create issues, and submit pull requests. We welcome contributions and suggestions!
200 | 
201 | ## License
202 | 
203 | This project is licensed under the Apache License 2.0.
204 | 


--------------------------------------------------------------------------------
/binary_js.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/babycommando/entity-db/b9e024c557a8065b5cc65a11974f86ec827b1a3c/binary_js.png


--------------------------------------------------------------------------------
/binary_wasm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/babycommando/entity-db/b9e024c557a8065b5cc65a11974f86ec827b1a3c/binary_wasm.png


--------------------------------------------------------------------------------
/cover.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/babycommando/entity-db/b9e024c557a8065b5cc65a11974f86ec827b1a3c/cover.gif


--------------------------------------------------------------------------------
/haming_distance_simd.wat:
--------------------------------------------------------------------------------
 1 | (module
 2 |   (import "env" "log" (func $log (param i32))) ;; Import a logging function
 3 |   (memory (export "memory") 1) ;; Define and export memory
 4 | 
 5 |   (func $hamming_distance (param $ptrA i32) (param $ptrB i32) (param $len i32) (result i32)
 6 |     (local $dist i32)
 7 |     (local.set $dist (i32.const 0)) ;; Initialize distance
 8 | 
 9 |     (block $exit ;; Define a breakable block for the loop
10 |       (loop $loop ;; Start loop with a label
11 |         (if (i32.eqz (local.get $len)) ;; Break if length is 0
12 |           (br $exit)
13 |         )
14 | 
15 |         ;; Log the current pointer
16 |         (call $log (local.get $ptrA))
17 | 
18 |         ;; XOR the vectors and calculate popcount
19 |         (local.set $dist
20 |           (i32.add
21 |             (local.get $dist)
22 |             (i32.add
23 |               (i32.popcnt (i32x4.extract_lane 0
24 |                 (v128.xor
25 |                   (v128.load (local.get $ptrA))
26 |                   (v128.load (local.get $ptrB))
27 |                 )
28 |               ))
29 |               (i32.add
30 |                 (i32.popcnt (i32x4.extract_lane 1
31 |                   (v128.xor
32 |                     (v128.load (local.get $ptrA))
33 |                     (v128.load (local.get $ptrB))
34 |                   )
35 |                 ))
36 |                 (i32.add
37 |                   (i32.popcnt (i32x4.extract_lane 2
38 |                     (v128.xor
39 |                       (v128.load (local.get $ptrA))
40 |                       (v128.load (local.get $ptrB))
41 |                     )
42 |                   ))
43 |                   (i32.popcnt (i32x4.extract_lane 3
44 |                     (v128.xor
45 |                       (v128.load (local.get $ptrA))
46 |                       (v128.load (local.get $ptrB))
47 |                     )
48 |                   ))
49 |                 )
50 |               )
51 |             )
52 |           )
53 |         )
54 | 
55 |         ;; Advance pointers by 16 bytes (128 bits)
56 |         (local.set $ptrA (i32.add (local.get $ptrA) (i32.const 16)))
57 |         (local.set $ptrB (i32.add (local.get $ptrB) (i32.const 16)))
58 |         (local.set $len (i32.sub (local.get $len) (i32.const 16)))
59 | 
60 |         ;; Continue loop
61 |         (br $loop)
62 |       )
63 |     )
64 |     (local.get $dist) ;; Return the distance
65 |   )
66 |   (export "hamming_distance" (func $hamming_distance))
67 | )
68 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@babycommando/entity-db",
 3 |   "version": "1.0.10",
 4 |   "description": "EntityDB is an in-browser vector database wrapping indexedDB and Transformers.js",
 5 |   "main": "src/index.js",
 6 |   "types": "src/index.d.ts",
 7 |   "keywords": [
 8 |     "entity",
 9 |     "indexeddb",
10 |     "vectors",
11 |     "transformers",
12 |     "huggingface",
13 |     "embedding",
14 |     "entitydb",
15 |     "entity-db"
16 |   ],
17 |   "author": "babycommando",
18 |   "license": "Apache-2.0",
19 |   "dependencies": {
20 |     "idb": "^8.0.1",
21 |     "@xenova/transformers": "^2.17.2"
22 |   },
23 |   "devDependencies": {}
24 | }
25 | 


--------------------------------------------------------------------------------
/sc-binary-wasm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/babycommando/entity-db/b9e024c557a8065b5cc65a11974f86ec827b1a3c/sc-binary-wasm.png


--------------------------------------------------------------------------------
/sc-binary.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/babycommando/entity-db/b9e024c557a8065b5cc65a11974f86ec827b1a3c/sc-binary.png


--------------------------------------------------------------------------------
/sc-db.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/babycommando/entity-db/b9e024c557a8065b5cc65a11974f86ec827b1a3c/sc-db.png


--------------------------------------------------------------------------------
/src/index.d.ts:
--------------------------------------------------------------------------------
 1 | // src/index.d.ts
 2 | 
 3 | declare module "@babycommando/entity-db" {
 4 |   export class EntityDB {
 5 |     constructor(config: { vectorPath: string; model?: string });
 6 | 
 7 |     insert(data: { [key: string]: any }): Promise<number>;
 8 |     insertBinary(data: { [key: string]: any }): Promise<number>;
 9 |     insertManualVectors(data: { [key: string]: any }): Promise<number>;
10 | 
11 |     update(key: number, data: { [key: string]: any }): Promise<void>;
12 |     delete(key: number): Promise<void>;
13 | 
14 |     query(
15 |       queryText: string,
16 |       options?: { limit?: number }
17 |     ): Promise<{ [key: string]: any }[]>;
18 | 
19 |     queryBinary(
20 |       queryText: string,
21 |       options?: { limit?: number }
22 |     ): Promise<{ [key: string]: any }[]>;
23 | 
24 |     queryBinarySIMD(
25 |       queryText: string,
26 |       options?: { limit?: number }
27 |     ): Promise<{ [key: string]: any }[]>;
28 | 
29 |     queryManualVectors(
30 |       queryVector: number[],
31 |       options?: { limit?: number }
32 |     ): Promise<{ [key: string]: any }[]>;
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------
/src/index.js:
--------------------------------------------------------------------------------
  1 | import { openDB } from "idb";
  2 | import { pipeline } from "@xenova/transformers";
  3 | import { env } from "@xenova/transformers";
  4 | 
  5 | // Specify a custom location for models (defaults to '/models/').
  6 | env.localModelPath = "/huggingface";
  7 | 
  8 | // Disable the loading of remote models from the Hugging Face Hub:
  9 | // env.allowRemoteModels = false;
 10 | 
 11 | // Set location of .wasm files. Defaults to use a CDN.
 12 | // env.backends.onnx.wasm.wasmPaths = '/path/to/files/';
 13 | 
 14 | // Default pipeline (Xenova/all-MiniLM-L6-v2)
 15 | const defaultModel = "Xenova/all-MiniLM-L6-v2";
 16 | const pipePromise = pipeline("feature-extraction", defaultModel);
 17 | 
 18 | // Cosine similarity function
 19 | const cosineSimilarity = (vecA, vecB) => {
 20 |   const dotProduct = vecA.reduce(
 21 |     (sum, val, index) => sum + val * vecB[index],
 22 |     0
 23 |   );
 24 |   const magnitudeA = Math.sqrt(vecA.reduce((sum, val) => sum + val * val, 0));
 25 |   const magnitudeB = Math.sqrt(vecB.reduce((sum, val) => sum + val * val, 0));
 26 |   return dotProduct / (magnitudeA * magnitudeB);
 27 | };
 28 | 
 29 | // Function to get embeddings from text using HuggingFace pipeline
 30 | const getEmbeddingFromText = async (text, model = defaultModel) => {
 31 |   const pipe = await pipePromise;
 32 |   const output = await pipe(text, {
 33 |     pooling: "mean",
 34 |     normalize: true,
 35 |   });
 36 |   return Array.from(output.data);
 37 | };
 38 | 
 39 | // Binarizer function for dense vectors
 40 | const binarizeVector = (vector, threshold = null) => {
 41 |   if (threshold === null) {
 42 |     const sorted = [...vector].sort((a, b) => a - b);
 43 |     const mid = Math.floor(sorted.length / 2);
 44 |     threshold =
 45 |       sorted.length % 2 === 0
 46 |         ? (sorted[mid - 1] + sorted[mid]) / 2
 47 |         : sorted[mid];
 48 |   }
 49 |   return vector.map((val) => (val >= threshold ? 1 : 0));
 50 | };
 51 | 
 52 | // Function to calculate Hamming distance
 53 | const hammingDistance = (vectorA, vectorB) => {
 54 |   if (vectorA.length !== vectorB.length) {
 55 |     throw new Error("Vectors must be of the same length");
 56 |   }
 57 |   const length = vectorA.length;
 58 |   const bitsA = new BigUint64Array(vectorA.buffer);
 59 |   const bitsB = new BigUint64Array(vectorB.buffer);
 60 | 
 61 |   let distance = 0n;
 62 |   for (let i = 0; i < bitsA.length; i++) {
 63 |     const xorResult = bitsA[i] ^ bitsB[i];
 64 |     distance += BigInt(xorResult.toString(2).replace(/0/g, "").length); // Popcount equivalent
 65 |   }
 66 |   return Number(distance);
 67 | };
 68 | 
 69 | //Load Hamming Distance Over WASM SIMD (128 bits at a time - 2x faster than normal JS using BigUint64Array):
 70 | //haming_distance_simd.wat script compiled to WASM and base64 encoded:
 71 | const wasmBase64 =
 72 |   "AGFzbQEAAAABDAJgAX8AYAN/f38BfwILAQNlbnYDbG9nAAADAgEBBQMBAAEHHQIGbWVtb3J5AgAQaGFtbWluZ19kaXN0YW5jZQABCoYBAYMBAQF/QQAhAwJAA0AgAkUEQAwCCyAAEAAgAyAA/QAEACAB/QAEAP1R/RsAaSAA/QAEACAB/QAEAP1R/RsBaSAA/QAEACAB/QAEAP1R/RsCaSAA/QAEACAB/QAEAP1R/RsDaWpqamohAyAAQRBqIQAgAUEQaiEBIAJBEGshAgwACwsgAwsAPQRuYW1lARgCAANsb2cBEGhhbW1pbmdfZGlzdGFuY2UCHAIAAAEEAARwdHJBAQRwdHJCAgNsZW4DBGRpc3Q=";
 73 | 
 74 | function base64ToUint8Array(base64) {
 75 |   if (typeof window !== "undefined") {
 76 |     // Browser: Use atob to decode Base64
 77 |     return Uint8Array.from(atob(base64), (c) => c.charCodeAt(0));
 78 |   } else {
 79 |     // Node.js: Use Buffer to decode Base64
 80 |     return Uint8Array.from(Buffer.from(base64, "base64"));
 81 |   }
 82 | }
 83 | 
 84 | async function loadWasm() {
 85 |   const wasmBinary = base64ToUint8Array(wasmBase64); // Decode Base64
 86 |   const wasmModule = await WebAssembly.instantiate(wasmBinary, {
 87 |     env: {
 88 |       memory: new WebAssembly.Memory({ initial: 1 }),
 89 |       log: (ptr) => console.log(`Processing pointer at offset: ${ptr}`),
 90 |     },
 91 |   });
 92 | 
 93 |   // Check if memory is exported; if not, use the imported memory
 94 |   const memory =
 95 |     wasmModule.instance.exports.memory ||
 96 |     wasmModule.instance.exports.env.memory;
 97 | 
 98 |   if (!memory) {
 99 |     throw new Error("WebAssembly module does not export or provide memory.");
100 |   }
101 | 
102 |   return {
103 |     ...wasmModule.instance.exports,
104 |     memory, // Ensure memory is included
105 |   };
106 | }
107 | 
108 | class EntityDB {
109 |   constructor({ vectorPath, model = defaultModel }) {
110 |     this.vectorPath = vectorPath;
111 |     this.model = model;
112 |     this.dbPromise = this._initDB();
113 |   }
114 | 
115 |   // Initialize the IndexedDB
116 |   async _initDB() {
117 |     const db = await openDB("EntityDB", 1, {
118 |       upgrade(db) {
119 |         if (!db.objectStoreNames.contains("vectors")) {
120 |           db.createObjectStore("vectors", {
121 |             keyPath: "id",
122 |             autoIncrement: true,
123 |           });
124 |         }
125 |       },
126 |     });
127 |     return db;
128 |   }
129 | 
130 |   // Insert data by generating embeddings from text
131 |   async insert(data) {
132 |     try {
133 |       // Generate embedding if text is provided
134 |       let embedding = data[this.vectorPath];
135 |       if (data.text) {
136 |         embedding = await getEmbeddingFromText(data.text, this.model);
137 |       }
138 | 
139 |       const db = await this.dbPromise;
140 |       const transaction = db.transaction("vectors", "readwrite");
141 |       const store = transaction.objectStore("vectors");
142 |       const record = { vector: embedding, ...data };
143 |       const key = await store.add(record);
144 |       return key;
145 |     } catch (error) {
146 |       throw new Error(`Error inserting data: ${error}`);
147 |     }
148 |   }
149 | 
150 |   async insertBinary(data) {
151 |     try {
152 |       let embedding = data[this.vectorPath];
153 |       if (data.text) {
154 |         embedding = await getEmbeddingFromText(data.text, this.model);
155 |       }
156 | 
157 |       // Binarize the embedding and pack into BigUint64Array
158 |       const binaryEmbedding = binarizeVector(embedding);
159 |       const packedEmbedding = new BigUint64Array(
160 |         new ArrayBuffer(Math.ceil(binaryEmbedding.length / 64) * 8)
161 |       );
162 |       for (let i = 0; i < binaryEmbedding.length; i++) {
163 |         const bitIndex = i % 64;
164 |         const arrayIndex = Math.floor(i / 64);
165 |         if (binaryEmbedding[i] === 1) {
166 |           packedEmbedding[arrayIndex] |= 1n << BigInt(bitIndex);
167 |         }
168 |       }
169 | 
170 |       const db = await this.dbPromise;
171 |       const transaction = db.transaction("vectors", "readwrite");
172 |       const store = transaction.objectStore("vectors");
173 |       const record = { vector: packedEmbedding, ...data };
174 |       const key = await store.add(record);
175 |       return key;
176 |     } catch (error) {
177 |       throw new Error(`Error inserting binary data: ${error}`);
178 |     }
179 |   }
180 | 
181 |   // Insert manual vectors (no embedding generation, just insert provided vectors)
182 |   async insertManualVectors(data) {
183 |     try {
184 |       const db = await this.dbPromise;
185 |       const transaction = db.transaction("vectors", "readwrite");
186 |       const store = transaction.objectStore("vectors");
187 |       const record = { vector: data[this.vectorPath], ...data };
188 |       const key = await store.add(record);
189 |       return key;
190 |     } catch (error) {
191 |       throw new Error(`Error inserting manual vectors: ${error}`);
192 |     }
193 |   }
194 | 
195 |   // Update an existing vector in the database
196 |   async update(key, data) {
197 |     const db = await this.dbPromise;
198 |     const transaction = db.transaction("vectors", "readwrite");
199 |     const store = transaction.objectStore("vectors");
200 |     const vector = data[this.vectorPath];
201 |     const updatedData = { ...data, [store.keyPath]: key, vector };
202 |     await store.put(updatedData);
203 |   }
204 | 
205 |   // Delete a vector by key
206 |   async delete(key) {
207 |     const db = await this.dbPromise;
208 |     const transaction = db.transaction("vectors", "readwrite");
209 |     const store = transaction.objectStore("vectors");
210 |     await store.delete(key);
211 |   }
212 | 
213 |   // Query vectors by cosine similarity (using a text input that will be converted into embeddings)
214 |   async query(queryText, { limit = 10 } = {}) {
215 |     try {
216 |       // Get embeddings for the query text
217 |       const queryVector = await getEmbeddingFromText(queryText, this.model);
218 | 
219 |       const db = await this.dbPromise;
220 |       const transaction = db.transaction("vectors", "readonly");
221 |       const store = transaction.objectStore("vectors");
222 |       const vectors = await store.getAll(); // Retrieve all vectors
223 | 
224 |       // Calculate cosine similarity for each vector and sort by similarity
225 |       const similarities = vectors.map((entry) => {
226 |         const similarity = cosineSimilarity(queryVector, entry.vector);
227 |         return { ...entry, similarity };
228 |       });
229 | 
230 |       similarities.sort((a, b) => b.similarity - a.similarity); // Sort by similarity (descending)
231 |       return similarities.slice(0, limit); // Return the top N results based on limit
232 |     } catch (error) {
233 |       throw new Error(`Error querying vectors: ${error}`);
234 |     }
235 |   }
236 | 
237 |   //Query binarized vectors using Hamming distance nstead of cosine similarity
238 |   async queryBinary(queryText, { limit = 10 } = {}) {
239 |     try {
240 |       // Get embeddings and binarize them
241 |       const queryVector = await getEmbeddingFromText(queryText, this.model);
242 |       const binaryQueryVector = binarizeVector(queryVector);
243 | 
244 |       // Pack the query vector into BigUint64Array
245 |       const packedQueryVector = new BigUint64Array(
246 |         new ArrayBuffer(Math.ceil(binaryQueryVector.length / 64) * 8)
247 |       );
248 |       for (let i = 0; i < binaryQueryVector.length; i++) {
249 |         const bitIndex = i % 64;
250 |         const arrayIndex = Math.floor(i / 64);
251 |         if (binaryQueryVector[i] === 1) {
252 |           packedQueryVector[arrayIndex] |= 1n << BigInt(bitIndex);
253 |         }
254 |       }
255 | 
256 |       const db = await this.dbPromise;
257 |       const transaction = db.transaction("vectors", "readonly");
258 |       const store = transaction.objectStore("vectors");
259 |       const vectors = await store.getAll();
260 | 
261 |       // Calculate Hamming distance
262 |       const distances = vectors.map((entry) => {
263 |         const distance = hammingDistance(packedQueryVector, entry.vector);
264 |         return { ...entry, distance };
265 |       });
266 | 
267 |       // Sort by Hamming distance (ascending)
268 |       distances.sort((a, b) => a.distance - b.distance);
269 | 
270 |       // Return the top N results based on limit
271 |       return distances.slice(0, limit);
272 |     } catch (error) {
273 |       throw new Error(`Error querying binary vectors: ${error}`);
274 |     }
275 |   }
276 | 
277 |   /*Hamming Distance over WebAssembly SIMD:
278 |   The WebAssembly SIMD implementation processes 128 bits per iteration (via v128.xor) 
279 |   compared to 64 bits per iteration in the JavaScript implementation using BigUint64Array.
280 |   This alone gives a theoretical 2x speedup. 
281 |   
282 |   SIMD instructions execute XOR, popcount, and similar operations on multiple data lanes in parallel. 
283 |   This reduces the number of CPU cycles required for the same amount of work compared to sequential 
284 |   bitwise operations in JavaScript. SIMD in WebAssembly is likely 2x to 4x faster or more over big vectors.
285 |   */
286 |   async queryBinarySIMD(queryText, { limit = 10 } = {}) {
287 |     try {
288 |       const queryVector = await getEmbeddingFromText(queryText, this.model);
289 |       const binaryQueryVector = binarizeVector(queryVector);
290 | 
291 |       const packedQueryVector = new BigUint64Array(
292 |         new ArrayBuffer(Math.ceil(binaryQueryVector.length / 64) * 8)
293 |       );
294 |       for (let i = 0; i < binaryQueryVector.length; i++) {
295 |         const bitIndex = i % 64;
296 |         const arrayIndex = Math.floor(i / 64);
297 |         if (binaryQueryVector[i] === 1) {
298 |           packedQueryVector[arrayIndex] |= 1n << BigInt(bitIndex);
299 |         }
300 |       }
301 | 
302 |       console.log(
303 |         "Query Vector (binary):",
304 |         [...packedQueryVector].map((v) => v.toString(2))
305 |       );
306 | 
307 |       const db = await this.dbPromise;
308 |       const transaction = db.transaction("vectors", "readonly");
309 |       const store = transaction.objectStore("vectors");
310 |       const vectors = await store.getAll();
311 | 
312 |       vectors.forEach((entry, index) => {
313 |         console.log(
314 |           `DB Vector ${index} (binary):`,
315 |           [...new BigUint64Array(entry.vector.buffer)].map((v) => v.toString(2))
316 |         );
317 |       });
318 | 
319 |       const wasmModule = await loadWasm();
320 |       const { hamming_distance, memory } = wasmModule;
321 | 
322 |       if (!memory) {
323 |         throw new Error("WebAssembly memory is undefined.");
324 |       }
325 | 
326 |       const wasmMemory = new Uint8Array(memory.buffer);
327 |       wasmMemory.set(new Uint8Array(packedQueryVector.buffer), 0);
328 | 
329 |       const distances = vectors.map((entry) => {
330 |         const dbVector = new Uint8Array(entry.vector.buffer);
331 |         wasmMemory.set(dbVector, 16);
332 |         const distance = hamming_distance(0, 16, packedQueryVector.length * 8);
333 |         return { ...entry, distance };
334 |       });
335 | 
336 |       distances.sort((a, b) => a.distance - b.distance);
337 |       return distances.slice(0, limit);
338 |     } catch (error) {
339 |       console.error("Error querying binary vectors:", error);
340 |       throw error;
341 |     }
342 |   }
343 | 
344 |   // Query manual vectors directly (query pre-computed embeddings)
345 |   async queryManualVectors(queryVector, { limit = 10 } = {}) {
346 |     try {
347 |       const db = await this.dbPromise;
348 |       const transaction = db.transaction("vectors", "readonly");
349 |       const store = transaction.objectStore("vectors");
350 |       const vectors = await store.getAll(); // Retrieve all vectors
351 | 
352 |       // Calculate cosine similarity for each vector and sort by similarity
353 |       const similarities = vectors.map((entry) => {
354 |         const similarity = cosineSimilarity(queryVector, entry.vector);
355 |         return { ...entry, similarity };
356 |       });
357 | 
358 |       similarities.sort((a, b) => b.similarity - a.similarity); // Sort by similarity (descending)
359 |       return similarities.slice(0, limit); // Return the top N results based on limit
360 |     } catch (error) {
361 |       throw new Error(`Error querying manual vectors: ${error}`);
362 |     }
363 |   }
364 | }
365 | 
366 | // Export EntityDB class
367 | export { EntityDB };
368 | 


--------------------------------------------------------------------------------