├── .gitignore
├── LICENSE
├── README.md
├── package.json
└── src
    └── index.js


/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 | .knowledge/
3 | .env
4 | .DS_Store
5 | coverage/
6 | *.log
7 | test.mjs
8 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) [year] [fullname]
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## Overview
  2 | 
  3 | - `@openinterface/knowledge` npm package repo
  4 | - say hi 👋 [@n_raidenai](https://x.com/n_raidenai)
  5 | 
  6 | ## knowledge
  7 | 
  8 | - agent tool to autonomously learn how to use APIs, SDKs, infra tools , ...
  9 | - collects documentation for RAG, as it enables
 10 |   - crawling docs websites
 11 |   - crawling github repos for readmes, npm from package name
 12 |   - searching for use cases (via serper) from a single query
 13 |   - parse openapi/swagger definitions from urls
 14 | - automatically manages vectorizing , embedding , indexing , concurrency
 15 | - has local index powered by `@electric-sql/pglite` and `pgvector`
 16 | - (wip) post processes collected documents to clean up and improve formatting
 17 | - (wip) stores in remote index dbs (like supabase , weaviate , ... ) 
 18 | 
 19 | ## Installation
 20 | 
 21 | ```bash
 22 | npm install @openinterface/knowledge
 23 | ```
 24 | 
 25 | ## Usage
 26 | 
 27 | make a .env file, ensure it has these values
 28 | 
 29 | ```env
 30 | OPENAI_API_KEY = "REPLACE_KEY" # required
 31 | 
 32 | SERPER_API_KEY = "REPLACE_KEY" # to enable knowledge.collect.learn feature
 33 | SERPER_SEARCH_QUERIES = 2 # search queries per learn operation (if enabled)
 34 | 
 35 | GITHUB_API_KEY = "REPLACE_KEY" # to enable knowledge.collect.github feature
 36 | 
 37 | #PROXY_URL = http://your_proxy_url:port # optional , for scraping / crawling pages
 38 | ```
 39 | 
 40 | import as follows
 41 | 
 42 | ```javascript
 43 | import knowledge from '@openinterface/knowledge';
 44 | ```
 45 | 
 46 | ## Use Case Examples
 47 | 
 48 | ```javascript
 49 | import knowledge from '@openinterface/knowledge';
 50 | 
 51 | // ====================================================================
 52 | // FETCHING DOCS / DATA + EMBEDDING RESULTS + INDEXING IN VECTOR DB
 53 | // ====================================================================
 54 | 
 55 | // collect + index documentation by crawling a website's docs
 56 | await knowledge.collect.crawl({
 57 |   url: 'https://docs.railway.app/',
 58 |   vectorize: true,
 59 |   index: {
 60 |     local: true,
 61 |     // postgres : false, // remote index not implemented yet
 62 |     // weaviate : false, // remote index not implemented yet
 63 |   },
 64 | });
 65 | 
 66 | // collect + index tutorials/articles/docs by googling a use case (needs serper key in .env)
 67 | await knowledge.collect.learn({
 68 |   query: 'setup and deploy graphql with node',
 69 |   vectorize: true,
 70 |   index: { local: true },
 71 | });;
 72 | 
 73 | // collect + index readmes from a github (needs github key in .env)
 74 | await knowledge.collect.github({
 75 |   url: 'https://github.com/resend/react-email',
 76 |   vectorize: true,
 77 |   index: { local: true },
 78 | });
 79 | // collect + index readmes from a npm , by crawling its assigned github repo (needs github key in .env)
 80 | await knowledge.collect.npm({
 81 |   name: 'react-confetti',
 82 |   vectorize: true,
 83 |   index: { local: true },
 84 | });
 85 | 
 86 | // collect + index every {method,route} combination from an openapi specifications file url (can be yaml or json)
 87 | await knowledge.collect.openapi({
 88 |   url: 'https://raw.githubusercontent.com/resend/resend-openapi/refs/heads/main/resend.yaml',
 89 |   vectorize: true,
 90 |   index: { local: true },
 91 | });
 92 | 
 93 | 
 94 | // ====================================================================
 95 | // QUERYING THE COLLECTED DATA
 96 | // ====================================================================
 97 | 
 98 | // search example
 99 | const retrieved = await knowledge.index.query.local({
100 |   query: "create graphql schemas for invoices",
101 |   amount: 4
102 | })
103 | /*
104 |   -> retrieved : 
105 |   [
106 |     {
107 |       uid,
108 |       data: {
109 |         meta: {...}
110 |         content: "... documentation content ..."
111 |       },
112 |     },
113 |     ...
114 |   ]
115 | */
116 | 
117 | // RAG example
118 | const answer = await knowledge.index.ask.local({
119 |   query: `make a new nodejs project that :
120 | 
121 | > makes a local vectra index
122 | > indexes from a csv list of my clients , which is 'name,email,phone,task_description'
123 | > write test cases ;
124 | 
125 | no typescript, and use type : module
126 | 
127 | answer with the new , entire project codebase , with every file needed (including any example), in format :
128 | \`\`\`yaml
129 | repo:
130 |   - path: "" # full file path
131 |     content: "" # full file content
132 |   - ...
133 | \`\`\``,
134 |   model: `o1-mini`
135 | })
136 | console.dir({answer})
137 | ```
138 | 
139 | ## Potential Issues
140 | 
141 | - if using the local index features (and that depend on `@electric-sql/pglite` and `@electric-sql/pglite/pgvector`) in a cloud dockerized environment, might run into some issues.
142 |   the npm installer for pgvector does not handle the full installation by default
143 | - although, should work without problem in local / browsers envs
144 | 
145 | ## WIP
146 | 
147 | - post processing retrieved documents (clean up and reformat with LLM)
148 | - indexing in remote vector database (supabase , weaviate)


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@openinterface/knowledge",
 3 |   "version": "0.0.11",
 4 |   "description": "ai agent knowledge base management tool",
 5 |   "main": "src/index.js",
 6 |   "type": "module",
 7 |   "scripts": {
 8 |     "start": "node src/index.js",
 9 |     "test": "echo \"No tests yet\" && exit 0"
10 |   },
11 |   "repository": {
12 |     "type": "git",
13 |     "url": "git+https://github.com/raidendotai/knowledge.git"
14 |   },
15 |   "keywords": [
16 |     "knowledge",
17 |     "crawler",
18 |     "vectorize",
19 |     "index",
20 |     "LLM",
21 |     "embeddings"
22 |   ],
23 |   "author": "openinterface",
24 |   "license": "MIT",
25 |   "bugs": {
26 |     "url": "https://github.com/raidendotai/knowledge/issues"
27 |   },
28 |   "homepage": "https://github.com/raidendotai/knowledge#readme",
29 |   "dependencies": {
30 |     "@electric-sql/pglite": "^0.2.11",
31 |     "@readme/openapi-parser": "^2.6.0",
32 |     "@sindresorhus/slugify": "^2.2.1",
33 |     "async-retry": "^1.3.3",
34 |     "axios": "^1.7.7",
35 |     "cheerio": "^1.0.0",
36 |     "cli-progress": "^3.12.0",
37 |     "dotenv": "^16.4.5",
38 |     "fs-extra": "^11.2.0",
39 |     "https-proxy-agent": "^7.0.5",
40 |     "node-html-markdown": "^1.3.0",
41 |     "openai": "*",
42 |     "p-queue": "^7.4.1",
43 |     "tiktoken": "^1.0.17",
44 |     "yaml": "^2.5.1"
45 |   }
46 | }
47 | 


--------------------------------------------------------------------------------
/src/index.js:
--------------------------------------------------------------------------------
   1 | import axios from "axios";
   2 | import * as cheerio from "cheerio";
   3 | import PQueue from "p-queue";
   4 | import retry from "async-retry";
   5 | import crypto from "crypto";
   6 | import { URL } from "url";
   7 | import cliProgress from "cli-progress";
   8 | import path from "path";
   9 | import fs from "fs/promises";
  10 | import OpenAI from "openai";
  11 | import yaml from "yaml";
  12 | import dotenv from "dotenv";
  13 | import OpenAPIParser from "@readme/openapi-parser";
  14 | import slugify from "@sindresorhus/slugify";
  15 | import { PGlite } from "@electric-sql/pglite";
  16 | import { vector } from "@electric-sql/pglite/vector";
  17 | 
  18 | import pkg from "node-html-markdown";
  19 | const { NodeHtmlMarkdown, NodeHtmlMarkdownOptions } = pkg;
  20 | const nhm = new NodeHtmlMarkdown({});
  21 | 
  22 | dotenv.config();
  23 | import { get_encoding } from "tiktoken";
  24 | const enc = get_encoding("cl100k_base");
  25 | 
  26 | let openai;
  27 | try {
  28 |   openai = new OpenAI({
  29 |     apiKey: process.env.OPENAI_API_KEY,
  30 |   });
  31 | } catch (e) {
  32 |   console.error(e);
  33 | }
  34 | 
  35 | let LOCAL_PG_INSTANCE;
  36 | let LOCAL_KNOWLEDGE_DB = {};
  37 | 
  38 | const KNOWLEDGE_DIR = path.join(process.cwd(), ".knowledge");
  39 | const DB_DIR = path.join(KNOWLEDGE_DIR, "db");
  40 | const VECTORS_DIR = path.join(KNOWLEDGE_DIR, "vectors");
  41 | const INDEX_DIR = path.join(KNOWLEDGE_DIR, "index");
  42 | 
  43 | try {
  44 |   const files = await fs.readdir(VECTORS_DIR, { withFileTypes: true });
  45 |   const loadJsonFiles = async (dir) => {
  46 |     const entries = await fs.readdir(dir, { withFileTypes: true });
  47 |     for (const entry of entries) {
  48 |       const fullPath = path.join(dir, entry.name);
  49 |       if (entry.isDirectory()) {
  50 |         await loadJsonFiles(fullPath);
  51 |       } else if (entry.isFile() && entry.name.endsWith('.json')) {
  52 |         try {
  53 |           const fileContent = await fs.readFile(fullPath, 'utf-8');
  54 |           const jsonData = JSON.parse(fileContent);
  55 |           const { meta, uid, content } = jsonData;
  56 |           LOCAL_KNOWLEDGE_DB[uid] = { meta, content };
  57 |         } catch (e) {
  58 |           console.error(e)
  59 |         }
  60 |       }
  61 |     }
  62 |   };
  63 | 
  64 |   await loadJsonFiles(VECTORS_DIR);
  65 | } catch (e) {
  66 |   console.error(`> no previous vectors db data to load`);
  67 | }
  68 | 
  69 | const initDirs = async () => {
  70 |   await fs.mkdir(DB_DIR, { recursive: true });
  71 |   await fs.mkdir(VECTORS_DIR, { recursive: true });
  72 |   await fs.mkdir(INDEX_DIR, { recursive: true });
  73 | };
  74 | 
  75 | await initDirs();
  76 | 
  77 | const queues = {
  78 |   search: new PQueue({ concurrency: 5 }),
  79 |   llm: new PQueue({ concurrency: 2 }),
  80 |   embed: new PQueue({ concurrency: 10 }),
  81 | };
  82 | 
  83 | const SERPER_API_KEY = process.env.SERPER_API_KEY;
  84 | if (!SERPER_API_KEY) {
  85 |   console.error("Error: SERPER_API_KEY is not set in .env file.");
  86 |   process.exit(1);
  87 | }
  88 | 
  89 | const serperInstance = axios.create({
  90 |   baseURL: "https://google.serper.dev",
  91 |   headers: {
  92 |     "X-API-KEY": SERPER_API_KEY,
  93 |     "Content-Type": "application/json",
  94 |   },
  95 |   timeout: 10000,
  96 | });
  97 | 
  98 | const PROXY_URL = process.env.PROXY_URL || null;
  99 | const getAxiosInstance = async () => {
 100 |   const axiosConfig = {
 101 |     timeout: 10000,
 102 |     headers: {
 103 |       "User-Agent":
 104 |         "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " +
 105 |         "AppleWebKit/537.36 (KHTML, like Gecko) " +
 106 |         "Chrome/113.0.0.0 Safari/537.36",
 107 |       "Accept-Language": "en-US,en;q=0.9",
 108 |     },
 109 |     validateStatus: (status) => status >= 200 && status < 400,
 110 |   };
 111 | 
 112 |   if (PROXY_URL) {
 113 |     axiosConfig.proxy = false; // Disable default proxy handling
 114 |     axiosConfig.httpsAgent = new (await import("https-proxy-agent")).default(
 115 |       PROXY_URL,
 116 |     );
 117 |     axiosConfig.httpAgent = new (await import("https-proxy-agent")).default(
 118 |       PROXY_URL,
 119 |     );
 120 |   }
 121 | 
 122 |   return axios.create(axiosConfig);
 123 | };
 124 | const axiosInstance = await getAxiosInstance();
 125 | const crawlQueue = new PQueue({ concurrency: 10 });
 126 | 
 127 | const _chunkify = (array, size) => {
 128 |   const chunks = [];
 129 |   for (let i = 0; i < array.length; i += size) {
 130 |     chunks.push(array.slice(i, i + size));
 131 |   }
 132 |   return chunks;
 133 | };
 134 | 
 135 | const lib = {
 136 |   utils: {
 137 |     search: async ({ query }) => {
 138 |       return queues.search.add(() =>
 139 |         retry(
 140 |           async () => {
 141 |             const response = await serperInstance.post("/search", { q: query });
 142 |             return response.data;
 143 |           },
 144 |           {
 145 |             retries: 3,
 146 |             factor: 2,
 147 |             minTimeout: 1000,
 148 |             onRetry: (err, attempt) => {
 149 |               console.warn(
 150 |                 `Search retry ${attempt} for query "${query}" due to ${err.message}`,
 151 |               );
 152 |             },
 153 |           },
 154 |         ),
 155 |       );
 156 |     },
 157 |     llm: async ({
 158 |       model = "gpt-4o-mini",
 159 |       messages,
 160 |       stream = process.stdout,
 161 |     }) => {
 162 |       return queues.llm.add(() =>
 163 |         retry(
 164 |           async () => {
 165 |             let opts = {
 166 |               model,
 167 |               messages,
 168 |             }
 169 |             if (!model.startsWith('o1')) {
 170 |               opts.stream = true
 171 |               opts.stream_options = { include_usage: true }
 172 |               const streaming = await openai.chat.completions.create(opts);
 173 | 
 174 |               let text = "";
 175 |               for await (const chunk of streaming) {
 176 |                 const content = chunk.choices[0]?.delta?.content || "";
 177 |                 if (content) {
 178 |                   text += content;
 179 |                   stream.write(content);
 180 |                 }
 181 |               }
 182 |               stream.write(`\n`);
 183 |               return text.trim();
 184 |             } else {
 185 |               stream.write(`\no1 model thinking (stream disabled) ...`);
 186 |               const intervalId = setInterval(() => { stream.write(' ...') }, 1000);
 187 |               const response = await openai.chat.completions.create(opts);
 188 |               clearInterval(intervalId);
 189 |               stream.write(`\n`);
 190 |               return response.choices[0]?.message?.content.trim() || "";
 191 |             }
 192 |           },
 193 |           {
 194 |             retries: 3,
 195 |             factor: 2,
 196 |             minTimeout: 1000,
 197 |             onRetry: (err, attempt) => {
 198 |               console.warn(
 199 |                 `LLM retry ${attempt} due to ${err.message}`,
 200 |               );
 201 |             },
 202 |           },
 203 |         ),
 204 |       );
 205 |     },
 206 |     embed: async ({ texts, model = "text-embedding-3-small" }) => {
 207 |       const maxTokens = 8192; // Set maximum tokens limit
 208 | 
 209 |       const sliceTexts = (texts) => {
 210 |         return texts.map((text) => {
 211 |           const tokens = enc.encode(text); // Tokenize the text
 212 |           const txt = new TextDecoder().decode(
 213 |             enc.decode(tokens.slice(0, maxTokens)),
 214 |           );
 215 |           return txt; // Slice to max tokens and decode back to text
 216 |         });
 217 |       };
 218 | 
 219 |       texts = sliceTexts(texts);
 220 | 
 221 |       return queues.embed.add(() =>
 222 |         retry(
 223 |           async () => {
 224 |             const response = await openai.embeddings.create({
 225 |               model,
 226 |               input: texts,
 227 |               encoding_format: "float",
 228 |             });
 229 |             return {
 230 |               vectors: response.data
 231 |                 .sort((a, b) => a.index - b.index)
 232 |                 .map((e) => e.embedding),
 233 |               usage: { model, ...response.usage },
 234 |             };
 235 |           },
 236 |           {
 237 |             retries: 3,
 238 |             factor: 2,
 239 |             minTimeout: 1000,
 240 |             onRetry: (err, attempt) => {
 241 |               console.warn(`embed retry ${attempt} due to ${err.message}`);
 242 |             },
 243 |           },
 244 |         ),
 245 |       );
 246 |     },
 247 |     process: {
 248 |       html: async ({ url, proxy = false, use_puppeteer = false }) => { },
 249 |       typescript: async ({ name }) => { },
 250 |     },
 251 |   },
 252 |   collect: {
 253 |     crawl: async ({
 254 |       url,
 255 |       proxy = false,
 256 |       post_process = false,
 257 |       use_puppeteer = false,
 258 |       vectorize = false,
 259 |       index = false,
 260 |     }) => {
 261 |       const visited = new Set();
 262 |       const docs = [];
 263 |       const queue = new PQueue({ concurrency: 5 });
 264 | 
 265 |       const root_slug = url.split("://").slice(1).join("://").split("/")[0];
 266 | 
 267 |       // Progress bar setup
 268 |       const progressBar = new cliProgress.SingleBar({
 269 |         format: "Crawling |{bar}| {value}/{total} Pages",
 270 |         barCompleteChar: "\u2588",
 271 |         barIncompleteChar: "\u2591",
 272 |         hideCursor: true,
 273 |       });
 274 |       progressBar.start(1, 0);
 275 | 
 276 |       // URL normalization function
 277 |       const normalizeUrl = (inputUrl) => {
 278 |         try {
 279 |           const parsedUrl = new URL(inputUrl);
 280 |           parsedUrl.hash = ""; // Remove fragment
 281 |           parsedUrl.pathname = parsedUrl.pathname.replace(/\/$/, ""); // Remove trailing slash
 282 |           return parsedUrl.toString();
 283 |         } catch (e) {
 284 |           return null; // Invalid URL
 285 |         }
 286 |       };
 287 | 
 288 |       // Process a single URL
 289 |       const processUrl = async (currentUrl) => {
 290 |         const normalizedUrl = normalizeUrl(currentUrl);
 291 |         if (!normalizedUrl) {
 292 |           console.warn(`Invalid URL skipped: ${currentUrl}`);
 293 |           return;
 294 |         }
 295 | 
 296 |         try {
 297 |           const response = await retry(
 298 |             async () => {
 299 |               return await axiosInstance.get(normalizedUrl);
 300 |             },
 301 |             {
 302 |               retries: 4,
 303 |               factor: 2,
 304 |               minTimeout: 2000,
 305 |               onRetry: (err, attempt) => {
 306 |                 console.warn(
 307 |                   `> retry ${attempt} for ${normalizedUrl} due to ${err.message}`,
 308 |                 );
 309 |               },
 310 |             },
 311 |           );
 312 | 
 313 |           const contentType = response.headers['content-type'] || '';
 314 |           if (!contentType.includes('text/html')) {
 315 |             // console.warn(`Non-HTML content skipped: ${normalizedUrl}`);
 316 |             return;
 317 |           }
 318 | 
 319 |           const html = response.data;
 320 |           const $ = cheerio.load(html);
 321 |           const meta = {
 322 |             title: $("title").text() || "No title available",
 323 |             description:
 324 |               $('meta[name="description"]').attr("content") ||
 325 |               "No description available",
 326 |             url: normalizedUrl,
 327 |           };
 328 |           const bodyHtml = $("body").html(); // Get the body content
 329 |           const markdown = nhm.translate(bodyHtml); // Convert body content to markdown
 330 |           console.log(` > ${normalizedUrl}`);
 331 |           docs.push({ url: normalizedUrl, content: markdown });
 332 |           if (markdown.length) {
 333 |             const slug = slugify(
 334 |               normalizedUrl.split("://").slice(1).join("://"),
 335 |             );
 336 |             const dir = path.join(DB_DIR, "crawl", root_slug, "raw");
 337 |             await fs.mkdir(dir, { recursive: true });
 338 |             await fs.writeFile(
 339 |               path.join(dir, `${slug}.yaml`),
 340 |               yaml.stringify({
 341 |                 meta,
 342 |                 content: markdown,
 343 |               }),
 344 |               "utf-8",
 345 |             );
 346 |           }
 347 | 
 348 |           // Find all internal links
 349 |           const baseUrl = new URL(url);
 350 |           $("a[href]").each((_, elem) => {
 351 |             const href = $(elem).attr("href") || "";
 352 |             if (
 353 |               href.startsWith("mailto:") ||
 354 |               href.startsWith("tel:") ||
 355 |               href.startsWith("javascript:")
 356 |             ) {
 357 |               return; // Skip non-HTTP links
 358 |             }
 359 |             try {
 360 |               const link = new URL(href, normalizedUrl);
 361 |               if (link.origin === baseUrl.origin) {
 362 |                 // Only process same-origin links
 363 |                 const normalizedLink = normalizeUrl(link.toString());
 364 |                 if (normalizedLink && !visited.has(normalizedLink)) {
 365 |                   visited.add(normalizedLink);
 366 |                   queue.add(() => processUrl(normalizedLink));
 367 |                   progressBar.increment();
 368 |                   progressBar.setTotal(progressBar.getTotal() + 1);
 369 |                 }
 370 |               }
 371 |             } catch (e) {
 372 |               // Ignore invalid URLs
 373 |             }
 374 |           });
 375 |         } catch (error) {
 376 |           console.error(`Failed to process ${normalizedUrl}: ${error.message}`);
 377 |         }
 378 |       };
 379 | 
 380 |       // Start crawling from the root URL
 381 |       const startingUrl = normalizeUrl(url);
 382 |       if (!startingUrl) {
 383 |         throw new Error("Invalid root URL provided.");
 384 |       }
 385 |       visited.add(startingUrl);
 386 |       queue.add(() => processUrl(startingUrl));
 387 | 
 388 |       await queue.onIdle();
 389 |       progressBar.stop();
 390 | 
 391 |       // Clear the console after the progress bar finishes
 392 |       console.clear();
 393 | 
 394 |       console.dir({ crawl: { url, done: true } });
 395 | 
 396 |       // Post-process if enabled
 397 |       if (post_process) {
 398 |         await lib.post_process({});
 399 |       }
 400 | 
 401 |       if (vectorize) {
 402 |         await lib.vectorize({ root: `crawl/${root_slug}` });
 403 | 
 404 |         if (index) {
 405 |           const index_methods = Object.keys(index).filter(key => index[key] !== false);
 406 |           await Promise.all(
 407 |             index_methods.map(async (index_method) => {
 408 |               await lib.index.create[index_method]({ root: `crawl/${root_slug}` });
 409 |             })
 410 |           )
 411 |         }
 412 |       }
 413 | 
 414 |       return;
 415 |     },
 416 |     learn: async ({
 417 |       query,
 418 |       proxy = false,
 419 |       post_process = false,
 420 |       use_puppeteer = false,
 421 |       vectorize = false,
 422 |       index = false,
 423 |     }) => {
 424 |       // Generate search queries using LLM
 425 |       const searchPrompt = `Generate a list of exactly ${process.env.SERPER_SEARCH_QUERIES} search queries to find information about:\n"${query}"\n\nin text format. For example:\n\`\`\`txt\n- example query- another query\n\`\`\`\n\n> do not wrap the search queries between quotes , should be raw text , one query per line\nyou are to write a total of : ${process.env.SERPER_SEARCH_QUERIES} search queries`;
 426 |       const searchQuerieResponse = await lib.utils.llm({
 427 |         messages: [{ role: "user", content: searchPrompt }],
 428 |       });
 429 |       console.dir({ searchQuerieResponse });
 430 | 
 431 |       const queries = searchQuerieResponse
 432 |         .split("\n")
 433 |         .map((l) => l.trim())
 434 |         .filter((line) => line.startsWith("-"))
 435 |         .map((line) => line.split("-").slice(1).join("-").trim())
 436 |         .filter((q) => q.length > 0)
 437 |         .slice(0, parseInt(process.env.SERPER_SEARCH_QUERIES));
 438 |       // console.dir({queries})
 439 | 
 440 |       // Perform searches
 441 |       const searchResults = [];
 442 |       for (const q of queries) {
 443 |         const result = await lib.utils.search({ query: q });
 444 |         searchResults.push(result);
 445 |       }
 446 | 
 447 |       // Collect HTML content from search results
 448 |       const docs = [];
 449 |       for (const res of searchResults) {
 450 |         if (res.organic) {
 451 |           // Assuming Serper returns organic results
 452 |           for (const item of res.organic) {
 453 |             docs.push({ url: item.link });
 454 |           }
 455 |         }
 456 |       }
 457 | 
 458 |       const root_slug = slugify(query);
 459 |       // Save and process the documents as needed
 460 |       for (const doc of docs) {
 461 |         try {
 462 |           const response = await retry(
 463 |             async () => {
 464 |               return await axiosInstance.get(doc.url);
 465 |             },
 466 |             {
 467 |               retries: 2,
 468 |               factor: 2,
 469 |               minTimeout: 1000,
 470 |               onRetry: (err, attempt) => {
 471 |                 console.warn(
 472 |                   `Retry ${attempt} for ${doc.url} due to ${err.message}`,
 473 |                 );
 474 |               },
 475 |             },
 476 |           );
 477 | 
 478 |           const html = response.data;
 479 |           const $ = cheerio.load(html);
 480 |           const meta = {
 481 |             title: $("title").text() || "No title available",
 482 |             description:
 483 |               $('meta[name="description"]').attr("content") ||
 484 |               "No description available",
 485 |             url: doc.url,
 486 |           };
 487 |           const bodyHtml = $("body").html(); // Get the body content
 488 |           const markdown = nhm.translate(bodyHtml); // Convert body content to markdown
 489 |           console.log(` > ${doc.url}`);
 490 |           if (markdown.length) {
 491 |             const slug = slugify(doc.url.split("://").slice(1).join("://"));
 492 |             const dir = path.join(DB_DIR, "learn", root_slug, "raw");
 493 |             await fs.mkdir(dir, { recursive: true });
 494 |             await fs.writeFile(
 495 |               path.join(dir, `${slug}.yaml`),
 496 |               yaml.stringify({
 497 |                 meta,
 498 |                 content: markdown,
 499 |               }),
 500 |               "utf-8",
 501 |             );
 502 |           }
 503 |         } catch (error) {
 504 |           console.error(`Failed to process ${doc.url}: ${error.message}`);
 505 |         }
 506 |       }
 507 | 
 508 |       console.dir({ learn: { query, done: true } });
 509 | 
 510 |       if (post_process) {
 511 |         await lib.post_process({});
 512 |       }
 513 | 
 514 |       if (vectorize) {
 515 |         await lib.vectorize({ root: `learn/${root_slug}` });
 516 |         if (index) {
 517 |           const index_methods = Object.keys(index).filter(key => index[key] !== false);
 518 |           await Promise.all(
 519 |             index_methods.map(async (index_method) => {
 520 |               await lib.index.create[index_method]({ root: `learn/${root_slug}` });
 521 |             })
 522 |           )
 523 |         }
 524 |       }
 525 | 
 526 |       return;
 527 |     },
 528 |     openapi: async ({
 529 |       url,
 530 |       proxy = false,
 531 |       post_process = false,
 532 |       vectorize = false,
 533 |       index = false,
 534 |     }) => {
 535 |       const _circularReplacer = () => {
 536 |         const visited = new WeakSet();
 537 |         return (key, value) => {
 538 |           if (typeof value === "object" && value !== null) {
 539 |             if (visited.has(value)) {
 540 |               return;
 541 |             }
 542 |             visited.add(value);
 543 |           }
 544 |           return value;
 545 |         };
 546 |       };
 547 | 
 548 |       function openapi3(query) {
 549 |         const api = query.data;
 550 |         if (api.components) delete api.components;
 551 |         const _api = {
 552 |           openapi: api.openapi,
 553 |           servers: api.servers,
 554 |           info: api.info,
 555 |           tags: api.tags ? api.tags : false,
 556 |         };
 557 |         const paths = { ...api.paths };
 558 |         const api_functions = Object.keys(paths).map((path_key) => {
 559 |           return Object.keys(paths[path_key]).map((method_key) => {
 560 |             let descriptions = [];
 561 |             if (paths[path_key].summary)
 562 |               descriptions.push(paths[path_key].summary);
 563 |             if (paths[path_key].description)
 564 |               descriptions.push(paths[path_key].description);
 565 |             if (paths[path_key][method_key].summary)
 566 |               descriptions.push(paths[path_key][method_key].summary);
 567 |             if (paths[path_key][method_key].description)
 568 |               descriptions.push(paths[path_key][method_key].description);
 569 | 
 570 |             let category = false;
 571 |             try {
 572 |               category = api.info["x-apisguru-categories"][0];
 573 |             } catch (e) {
 574 |               false;
 575 |             }
 576 | 
 577 |             const openapi_specs = {
 578 |               ..._api,
 579 |               paths: {
 580 |                 [path_key]: {
 581 |                   [method_key]: paths[path_key][method_key],
 582 |                 },
 583 |               },
 584 |             };
 585 | 
 586 |             const _specs_string = JSON.stringify(openapi_specs).toLowerCase();
 587 | 
 588 |             const auth = [
 589 |               `Auth`,
 590 |               `Bearer`,
 591 |               `X-API-Key`,
 592 |               `X-Api-Key`,
 593 |               `BasicAuth`,
 594 |               `ApiKeyAuth`,
 595 |               `OpenID`,
 596 |               `OAuth2`,
 597 |             ].some((_auth_substring) =>
 598 |               _specs_string
 599 |                 .toLowerCase()
 600 |                 .includes(_auth_substring.toLowerCase()),
 601 |             );
 602 | 
 603 |             const _apiFunction = paths[path_key][method_key].operationId
 604 |               ? paths[path_key][method_key].operationId
 605 |               : `${path_key} : ${method_key}`;
 606 |             const _apiFunctionDescription = descriptions.join(`\n\n`);
 607 |             const _apiFunctionVectorize =
 608 |               `api info:\n` +
 609 |               `- name : ${api.info.title}\n` +
 610 |               `- description : ${api.info.description ? api.info.description.trim() : ""
 611 |               }\n` +
 612 |               `---\n` +
 613 |               `function name:\n` +
 614 |               `- ${_apiFunction.trim()}\n` +
 615 |               `---\n` +
 616 |               `function description:\n` +
 617 |               `- ${_apiFunctionDescription.trim()}\n` +
 618 |               `--\n` +
 619 |               `function route:\n` +
 620 |               `${Object.keys(
 621 |                 openapi_specs.paths[Object.keys(openapi_specs.paths)[0]],
 622 |               )} : ${Object.keys(openapi_specs.paths)[0]}`;
 623 |             return {
 624 |               meta: {
 625 |                 format: `openapi`,
 626 |                 api: api.info.title,
 627 |                 info: api.info.description ? api.info.description : ``,
 628 |                 description: _apiFunctionVectorize,
 629 |                 function: _apiFunction,
 630 |                 urls: query.url ? [query.url] : [],
 631 |               },
 632 |               content: yaml.stringify({
 633 |                 format: `openapi`,
 634 |                 api: {
 635 |                   name: api.info.title,
 636 |                   description: api.info.description ? api.info.description : ``,
 637 |                 },
 638 |                 function: _apiFunction,
 639 |                 description: _apiFunctionDescription,
 640 |                 category,
 641 |                 auth,
 642 |                 implement: {
 643 |                   usage: [],
 644 |                   openapi: openapi_specs,
 645 |                 },
 646 |               }),
 647 |             };
 648 |           });
 649 |         });
 650 |         return api_functions.flat();
 651 |       }
 652 | 
 653 |       async function run(query, attempt = 0) {
 654 |         if (attempt === 2) return false;
 655 | 
 656 |         if (!query.data) {
 657 |           if (!query.url) return false;
 658 |           query.data = (await axios.get(query.url)).data;
 659 |           query.data =
 660 |             typeof query.data === "string"
 661 |               ? yaml.parse(query.data)
 662 |               : query.data;
 663 |         }
 664 | 
 665 |         try {
 666 |           query.data = await OpenAPIParser.validate(query.data);
 667 |         } catch (e) {
 668 |           console.error(e, "fallback");
 669 |           query.data = await OpenAPIParser.parse(query.data);
 670 |         }
 671 |         query.data = JSON.parse(
 672 |           JSON.stringify(query.data, _circularReplacer()),
 673 |         );
 674 |         if (query.data.swagger) {
 675 |           if (!query.url) return false;
 676 |           attempt++;
 677 |           return await run(
 678 |             {
 679 |               url: `https://converter.swagger.io/api/convert?url=${query.url}`,
 680 |             },
 681 |             attempt,
 682 |           );
 683 |         }
 684 |         return openapi3(query);
 685 |       }
 686 | 
 687 |       let parsed_openapi;
 688 |       try {
 689 |         parsed_openapi = await run({ url });
 690 |       } catch (error) {
 691 |         console.error(`failed openapi parse : ${url}: ${error.message}`);
 692 |         return [];
 693 |       }
 694 | 
 695 |       const root_slug = slugify(url.split("://").slice(1).join("://"));
 696 |       for (let api_fn of parsed_openapi) {
 697 |         const { meta, content } = api_fn;
 698 |         const slug = slugify(api_fn.meta.function);
 699 |         const dir = path.join(DB_DIR, "openapi", root_slug, "raw");
 700 |         await fs.mkdir(dir, { recursive: true });
 701 |         await fs.writeFile(
 702 |           path.join(dir, `${slug}.yaml`),
 703 |           yaml.stringify({
 704 |             meta,
 705 |             content,
 706 |           }),
 707 |           "utf-8",
 708 |         );
 709 |       }
 710 | 
 711 |       if (post_process) {
 712 |         await lib.post_process({});
 713 |       }
 714 |       if (vectorize) {
 715 |         await lib.vectorize({ root: `openapi/${root_slug}` });
 716 |         if (index) {
 717 |           const index_methods = Object.keys(index).filter(key => index[key] !== false);
 718 |           await Promise.all(
 719 |             index_methods.map(async (index_method) => {
 720 |               await lib.index.create[index_method]({ root: `openapi/${root_slug}` });
 721 |             })
 722 |           )
 723 |         }
 724 |       }
 725 |     },
 726 |     github: async ({
 727 |       url,
 728 |       depth = 3,
 729 |       proxy = false,
 730 |       post_process = false,
 731 |       vectorize = false,
 732 |       index = false,
 733 |     }) => {
 734 |       const GITHUB_API_KEY = process.env.GITHUB_API_KEY;
 735 |       if (!GITHUB_API_KEY) {
 736 |         console.error("Error: GITHUB_API_KEY is not set in .env file.");
 737 |         return [];
 738 |       }
 739 | 
 740 |       const axiosInstance = axios.create({
 741 |         headers: {
 742 |           Authorization: `${GITHUB_API_KEY}`,
 743 |         },
 744 |       });
 745 | 
 746 |       // Helper function to fetch content with retry
 747 |       async function fetchContentWithRetry(fetchUrl) {
 748 |         return await retry(
 749 |           async (bail) => {
 750 |             try {
 751 |               const response = await axiosInstance.get(fetchUrl);
 752 |               return response.data;
 753 |             } catch (error) {
 754 |               if (error.response && error.response.status < 500) {
 755 |                 bail(new Error(`Non-retryable error: ${error.message}`));
 756 |                 return;
 757 |               }
 758 |               throw error;
 759 |             }
 760 |           },
 761 |           { retries: 5 },
 762 |         );
 763 |       }
 764 | 
 765 |       // Recursive function to traverse directories up to specified depth
 766 |       async function traverseRepo(path, currentDepth) {
 767 |         if (currentDepth > depth) return [];
 768 | 
 769 |         const apiUrl = `https://api.github.com/repos/${owner}/${repo}/contents/${path}`;
 770 |         let contents;
 771 |         try {
 772 |           contents = await fetchContentWithRetry(apiUrl);
 773 |         } catch (error) {
 774 |           console.error(
 775 |             `Failed to fetch contents of ${path}: ${error.message}`,
 776 |           );
 777 |           return [];
 778 |         }
 779 | 
 780 |         let readmes = [];
 781 | 
 782 |         for (const item of contents) {
 783 |           if (item.type === "file" && item.name.toLowerCase() === "readme.md") {
 784 |             const rawUrl = `https://raw.githubusercontent.com/${owner}/${repo}/${defaultBranch}/${item.path}`;
 785 |             try {
 786 |               const readmeContent = await fetchContentWithRetry(rawUrl);
 787 |               readmes.push({ git_path: item.path, content: readmeContent });
 788 |             } catch (error) {
 789 |               console.error(
 790 |                 `Failed to fetch README at ${item.path}: ${error.message}`,
 791 |               );
 792 |             }
 793 |           } else if (item.type === "dir") {
 794 |             const subReadmes = await traverseRepo(item.path, currentDepth + 1);
 795 |             readmes = readmes.concat(subReadmes);
 796 |           }
 797 |         }
 798 | 
 799 |         return readmes;
 800 |       }
 801 | 
 802 |       // Parse the repo URL to extract owner and repo
 803 |       let owner, repo;
 804 |       try {
 805 |         const parsedUrl = new URL(url);
 806 |         const pathSegments = parsedUrl.pathname
 807 |           .split("/")
 808 |           .filter((seg) => seg.length);
 809 |         if (pathSegments.length < 2) {
 810 |           throw new Error("Invalid GitHub repository URL.");
 811 |         }
 812 |         owner = pathSegments[0];
 813 |         repo = pathSegments[1].replace(/\.git$/, "");
 814 |       } catch (error) {
 815 |         console.error(`Invalid URL provided: ${error.message}`);
 816 |         return [];
 817 |       }
 818 | 
 819 |       // Fetch repository metadata to get default branch
 820 |       let defaultBranch = "main";
 821 |       try {
 822 |         const repoUrl = `https://api.github.com/repos/${owner}/${repo}`;
 823 |         const repoData = await fetchContentWithRetry(repoUrl);
 824 |         defaultBranch = repoData.default_branch;
 825 |       } catch (error) {
 826 |         console.error(`Failed to fetch repository metadata: ${error.message}`);
 827 |         return [];
 828 |       }
 829 | 
 830 |       // Start traversing the repository
 831 |       const readmeList = await traverseRepo("", 1);
 832 | 
 833 |       const root_slug = `github.${owner}.${repo}`;
 834 |       for (let readme_file of readmeList) {
 835 |         const { git_path, content } = readme_file;
 836 |         if (content.length) {
 837 |           const slug = slugify(git_path);
 838 |           const dir = path.join(DB_DIR, "crawl", root_slug, "raw");
 839 |           await fs.mkdir(dir, { recursive: true });
 840 |           await fs.writeFile(
 841 |             path.join(dir, `${slug}.yaml`),
 842 |             yaml.stringify({
 843 |               meta: {
 844 |                 owner,
 845 |                 repo,
 846 |                 path: git_path,
 847 |               },
 848 |               content,
 849 |             }),
 850 |             "utf-8",
 851 |           );
 852 |         }
 853 |       }
 854 | 
 855 |       if (post_process) {
 856 |         await lib.post_process({});
 857 |       }
 858 | 
 859 |       if (vectorize) {
 860 |         await lib.vectorize({ root: `crawl/${root_slug}` });
 861 |         if (index) {
 862 |           const index_methods = Object.keys(index).filter(key => index[key] !== false);
 863 |           await Promise.all(
 864 |             index_methods.map(async (index_method) => {
 865 |               await lib.index.create[index_method]({ root: `crawl/${root_slug}` });
 866 |             })
 867 |           )
 868 |         }
 869 |       }
 870 |     },
 871 |     npm: async ({
 872 |       name,
 873 |       proxy = false,
 874 |       post_process = false,
 875 |       vectorize = false,
 876 |       index = false,
 877 |     }) => {
 878 |       // Fetch package README using npm registry API
 879 |       // Fetch TypeScript definitions and process them
 880 |       try {
 881 |         const response = await axios.get(`https://registry.npmjs.org/${name}`);
 882 |         const latest = response.data["dist-tags"].latest;
 883 |         const git_url =
 884 |           response.data.versions[latest].homepage.split("#readme")[0];
 885 |         console.dir({ latest, git_url });
 886 |         if (git_url)
 887 |           return await lib.collect.github({
 888 |             url: git_url,
 889 |             proxy,
 890 |             post_process,
 891 |             vectorize,
 892 |             index,
 893 |           });
 894 |         return;
 895 |       } catch (error) {
 896 |         console.error(
 897 |           `Failed to fetch npm README for ${name}: ${error.message}`,
 898 |         );
 899 |         return [];
 900 |       }
 901 |     },
 902 |   },
 903 |   post_process: async ({ }) => {
 904 |     // Iterate over each entry and generate cleaner Markdown using LLM
 905 |     const entries = await fs.readdir(DB_DIR, { withFileTypes: true });
 906 |     for (const entry of entries) {
 907 |       if (entry.isDirectory()) {
 908 |         const contentPath = path.join(DB_DIR, entry.name, "content.md");
 909 |         try {
 910 |           const content = await fs.readFile(contentPath, "utf-8");
 911 |           const cleanerPrompt = `Improve the following markdown:\n\n${content}`;
 912 |           const cleaner = await lib.utils.llm({ prompt: cleanerPrompt });
 913 |           await fs.writeFile(contentPath, cleaner, "utf-8");
 914 |         } catch (error) {
 915 |           console.error(
 916 |             `Failed to post-process ${entry.name}: ${error.message}`,
 917 |           );
 918 |         }
 919 |       }
 920 |     }
 921 |     return { status: "> postprocessing done" };
 922 |   },
 923 |   vectorize: async ({ root }) => {
 924 |     const processedDir = path.join(DB_DIR, root, "processed");
 925 |     const rawDir = path.join(DB_DIR, root, "raw");
 926 |     const dirToRead = (await fs.stat(processedDir).catch(() => false))
 927 |       ? processedDir
 928 |       : rawDir;
 929 |     const entries = await fs.readdir(dirToRead, { withFileTypes: true });
 930 |     const dataset = await Promise.all(
 931 |       entries.map(async (entry) => {
 932 |         const filepath = path.join(dirToRead, entry.name);
 933 |         const filecontent = await fs.readFile(filepath, "utf8");
 934 |         const data = yaml.parse(filecontent);
 935 | 
 936 |         const uid = crypto
 937 |           .createHash("sha512")
 938 |           .update(data.content)
 939 |           .digest("hex");
 940 |         return {
 941 |           scope: root,
 942 |           filepath,
 943 |           filename: entry.name,
 944 |           uid,
 945 |           ...data,
 946 |           vector_text: `${yaml.stringify(data.meta)}\n---\n\n${data.content.trim()}`,
 947 |         };
 948 |       }),
 949 |     );
 950 |     const batches = _chunkify(dataset, 15);
 951 |     await Promise.all(
 952 |       batches.map(async (chunk, chunk_index) => {
 953 |         // console.dir({chunk , chunk_index})
 954 |         const vectors = (
 955 |           await lib.utils.embed({
 956 |             texts: chunk.map((entry) => entry.vector_text),
 957 |           })
 958 |         ).vectors;
 959 |         await Promise.all(
 960 |           vectors.map(async (vector, idx) => {
 961 |             const item = chunk[idx];
 962 |             const vectorDir = path.join(VECTORS_DIR, root);
 963 |             await fs.mkdir(vectorDir, { recursive: true });
 964 |             await fs.writeFile(
 965 |               path.join(VECTORS_DIR, root, `${item.uid}.json`),
 966 |               JSON.stringify({
 967 |                 ...item,
 968 |                 vector,
 969 |               }),
 970 |               "utf-8",
 971 |             );
 972 |             if (LOCAL_KNOWLEDGE_DB) {
 973 |               LOCAL_KNOWLEDGE_DB[item.uid] = { meta: item.meta, content: item.content }
 974 |             }
 975 |           }),
 976 |         );
 977 |       }),
 978 |     );
 979 |     console.log(`> vectorized : ${root}`);
 980 |   },
 981 |   index: {
 982 |     create: {
 983 |       local: async ({ root }) => {
 984 |         if (!LOCAL_PG_INSTANCE) {
 985 |           const metaDb = new PGlite(INDEX_DIR, {
 986 |             extensions: {
 987 |               vector,
 988 |             },
 989 |           });
 990 |           await metaDb.waitReady;
 991 |           LOCAL_PG_INSTANCE = metaDb;
 992 |         }
 993 |         try {
 994 |           await LOCAL_PG_INSTANCE.exec(`
 995 |             create extension if not exists vector;
 996 |             -- drop table if exists embeddings; -- Uncomment this line to reset the database
 997 |             create table if not exists embeddings (
 998 |               id bigint primary key generated always as identity,
 999 |               uid text not null unique,
1000 |               embedding vector (1536)
1001 |             );
1002 |             
1003 |             create index on embeddings using hnsw (embedding vector_ip_ops);
1004 |           `);
1005 |         } catch (e) {
1006 |           console.error(e);
1007 |         }
1008 |         const entries = await fs.readdir(path.join(VECTORS_DIR, root));
1009 |         const jsonFiles = entries.filter((file) => file.endsWith(".json"));
1010 |         const dataset = (await Promise.all(
1011 |           jsonFiles.map(async (file) => {
1012 |             try {
1013 | 
1014 |               const filePath = path.join(VECTORS_DIR, root, file);
1015 |               const content = await fs.readFile(filePath, "utf-8");
1016 |               return JSON.parse(content);
1017 |             } catch (e) {
1018 |               console.error(e)
1019 |             }
1020 |             return false
1021 |           }),
1022 |         )).filter(e => e);
1023 |         const chunks = _chunkify(dataset, 50);
1024 |         for (let chunk of chunks) {
1025 |           // Filter out entries that already exist in the database
1026 |           const existingUids = await LOCAL_PG_INSTANCE.query(`
1027 |     SELECT uid FROM embeddings WHERE uid IN (${chunk.map((entry) => `'${entry.uid}'`).join(", ")});
1028 |   `);
1029 |           const existingUidSet = new Set(
1030 |             existingUids.rows.map((row) => row.uid),
1031 |           );
1032 | 
1033 |           const newEntries = chunk.filter(
1034 |             (entry) => !existingUidSet.has(entry.uid),
1035 |           );
1036 |           if (newEntries.length > 0) {
1037 |             const pg_entries = newEntries
1038 |               .map((entry) => {
1039 |                 return `\t('${entry.uid}','${JSON.stringify(entry.vector)}')`;
1040 |               })
1041 |               .join(",\n");
1042 | 
1043 |             await LOCAL_PG_INSTANCE.exec(`
1044 | insert into embeddings (uid, embedding) values
1045 |   ${pg_entries};
1046 | `);
1047 |           }
1048 | 
1049 |           console.dir(
1050 |             await LOCAL_PG_INSTANCE.query(`SELECT COUNT(*) FROM embeddings;`),
1051 |             { depth: null },
1052 |           );
1053 |         }
1054 |       },
1055 |       supabase: async ({ }) => { },
1056 |       weaviate: async ({ }) => { },
1057 |     },
1058 |     query: {
1059 |       local: async ({ query, embedding = false, threshold = 0.0, amount = 6 }) => {
1060 |         if (!LOCAL_PG_INSTANCE) {
1061 |           const metaDb = new PGlite(INDEX_DIR, {
1062 |             extensions: {
1063 |               vector,
1064 |             },
1065 |           });
1066 |           await metaDb.waitReady;
1067 |           LOCAL_PG_INSTANCE = metaDb;
1068 |         }
1069 |         const query_vector = embedding ? embedding : (await lib.utils.embed({ texts: [query] })).vectors[0]
1070 |         const res = await LOCAL_PG_INSTANCE.query(
1071 |           `
1072 |           select * from embeddings
1073 |           where embeddings.embedding <#> $1 < $2
1074 |           order by embeddings.embedding <#> $1
1075 |           limit $3;
1076 |           `,
1077 |           [JSON.stringify(query_vector), -Number(threshold), Number(amount)]
1078 |         )
1079 |         return res.rows.map(item => {
1080 |           return {
1081 |             uid: item.uid,
1082 |             data: LOCAL_KNOWLEDGE_DB[item.uid] ? LOCAL_KNOWLEDGE_DB[item.uid] : false,
1083 |           }
1084 |         })
1085 |       },
1086 |       supabase: async ({ }) => { },
1087 |       weaviate: async ({ }) => { },
1088 |     },
1089 |     ask: {
1090 |       local: async ({ query, model = "gpt-4o" }) => {
1091 | 
1092 |         const retrieved = await lib.index.query.local({
1093 |           query,
1094 |           amount: 7
1095 |         })
1096 |         const messages = [
1097 |           {
1098 |             role: 'user',
1099 |             content: `# FOUND REFERENCES :
1100 | 
1101 | ${retrieved.map(entry => yaml.stringify(entry.data)).join('\n---\n')}
1102 | ------
1103 | 
1104 | # USER QUERY :
1105 | 
1106 | ${query}`
1107 |           }
1108 |         ]
1109 | 
1110 |         return await lib.utils.llm({ messages, model });
1111 |       }
1112 |     },
1113 |   },
1114 | };
1115 | 
1116 | export default lib;
1117 | 


--------------------------------------------------------------------------------