├── .npmignore ├── LICENSE ├── README.md ├── package-lock.json ├── package.json ├── src ├── api-client.ts ├── embeddings.ts ├── handlers │ ├── add-documentation.ts │ ├── base-handler.ts │ ├── list-documentation.ts │ ├── search-documentation.ts │ └── test-embeddings.ts ├── index.ts ├── tools │ ├── add-documentation.ts │ ├── content-fetcher.ts │ ├── list-utils.ts │ ├── qdrant-client.ts │ ├── search-utils.ts │ ├── text-chunker.ts │ └── url-processor.ts ├── types.ts └── types │ └── ollama.d.ts └── tsconfig.json /.npmignore: -------------------------------------------------------------------------------- 1 | src/ 2 | ref/ 3 | .clinecontext 4 | .clinelearn 5 | .clinerules 6 | ragdocs_plan.md 7 | tsconfig.json 8 | .git 9 | .gitignore 10 | node_modules/ 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2024 Cline Bot Inc. 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RagDocs MCP Server 2 | 3 | A Model Context Protocol (MCP) server that provides RAG (Retrieval-Augmented Generation) capabilities using Qdrant vector database and Ollama/OpenAI embeddings. This server enables semantic search and management of documentation through vector similarity. 4 | 5 | ## Features 6 | 7 | - Add documentation with metadata 8 | - Semantic search through documents 9 | - List and organize documentation 10 | - Delete documents 11 | - Support for both Ollama (free) and OpenAI (paid) embeddings 12 | - Automatic text chunking and embedding generation 13 | - Vector storage with Qdrant 14 | 15 | ## Prerequisites 16 | 17 | - Node.js 16 or higher 18 | - One of the following Qdrant setups: 19 | - Local instance using Docker (free) 20 | - Qdrant Cloud account with API key (managed service) 21 | - One of the following for embeddings: 22 | - Ollama running locally (default, free) 23 | - OpenAI API key (optional, paid) 24 | 25 | ## Available Tools 26 | 27 | ### 1. add_document 28 | Add a document to the RAG system. 29 | 30 | Parameters: 31 | - `url` (required): Document URL/identifier 32 | - `content` (required): Document content 33 | - `metadata` (optional): Document metadata 34 | - `title`: Document title 35 | - `contentType`: Content type (e.g., "text/markdown") 36 | 37 | ### 2. search_documents 38 | Search through stored documents using semantic similarity. 39 | 40 | Parameters: 41 | - `query` (required): Natural language search query 42 | - `options` (optional): 43 | - `limit`: Maximum number of results (1-20, default: 5) 44 | - `scoreThreshold`: Minimum similarity score (0-1, default: 0.7) 45 | - `filters`: 46 | - `domain`: Filter by domain 47 | - `hasCode`: Filter for documents containing code 48 | - `after`: Filter for documents after date (ISO format) 49 | - `before`: Filter for documents before date (ISO format) 50 | 51 | ### 3. list_documents 52 | List all stored documents with pagination and grouping options. 53 | 54 | Parameters (all optional): 55 | - `page`: Page number (default: 1) 56 | - `pageSize`: Number of documents per page (1-100, default: 20) 57 | - `groupByDomain`: Group documents by domain (default: false) 58 | - `sortBy`: Sort field ("timestamp", "title", or "domain") 59 | - `sortOrder`: Sort order ("asc" or "desc") 60 | 61 | ### 4. delete_document 62 | Delete a document from the RAG system. 63 | 64 | Parameters: 65 | - `url` (required): URL of the document to delete 66 | 67 | ## Installation 68 | 69 | ```bash 70 | npm install -g @mcpservers/ragdocs 71 | ``` 72 | 73 | ## MCP Server Configuration 74 | 75 | ```json 76 | { 77 | "mcpServers": { 78 | "ragdocs": { 79 | "command": "node", 80 | "args": ["@mcpservers/ragdocs"], 81 | "env": { 82 | "QDRANT_URL": "http://127.0.0.1:6333", 83 | "EMBEDDING_PROVIDER": "ollama" 84 | } 85 | } 86 | } 87 | } 88 | ``` 89 | 90 | Using Qdrant Cloud: 91 | ```json 92 | { 93 | "mcpServers": { 94 | "ragdocs": { 95 | "command": "node", 96 | "args": ["@mcpservers/ragdocs"], 97 | "env": { 98 | "QDRANT_URL": "https://your-cluster-url.qdrant.tech", 99 | "QDRANT_API_KEY": "your-qdrant-api-key", 100 | "EMBEDDING_PROVIDER": "ollama" 101 | } 102 | } 103 | } 104 | } 105 | ``` 106 | 107 | Using OpenAI: 108 | ```json 109 | { 110 | "mcpServers": { 111 | "ragdocs": { 112 | "command": "node", 113 | "args": ["@mcpservers/ragdocs"], 114 | "env": { 115 | "QDRANT_URL": "http://127.0.0.1:6333", 116 | "EMBEDDING_PROVIDER": "openai", 117 | "OPENAI_API_KEY": "your-api-key" 118 | } 119 | } 120 | } 121 | } 122 | ``` 123 | 124 | ## Local Qdrant with Docker 125 | 126 | ```bash 127 | docker run -d --name qdrant -p 6333:6333 -p 6334:6334 qdrant/qdrant 128 | ``` 129 | 130 | ## Environment Variables 131 | 132 | - `QDRANT_URL`: URL of your Qdrant instance 133 | - For local: "http://127.0.0.1:6333" (default) 134 | - For cloud: "https://your-cluster-url.qdrant.tech" 135 | - `QDRANT_API_KEY`: API key for Qdrant Cloud (required when using cloud instance) 136 | - `EMBEDDING_PROVIDER`: Choice of embedding provider ("ollama" or "openai", default: "ollama") 137 | - `OPENAI_API_KEY`: OpenAI API key (required if using OpenAI) 138 | - `EMBEDDING_MODEL`: Model to use for embeddings 139 | - For Ollama: defaults to "nomic-embed-text" 140 | - For OpenAI: defaults to "text-embedding-3-small" 141 | 142 | ## License 143 | 144 | Apache License 2.0 145 | -------------------------------------------------------------------------------- /package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@mcpservers/ragdocs", 3 | "version": "1.0.0", 4 | "lockfileVersion": 3, 5 | "requires": true, 6 | "packages": { 7 | "": { 8 | "name": "@mcpservers/ragdocs", 9 | "version": "1.0.0", 10 | "license": "Apache-2.0", 11 | "dependencies": { 12 | "@modelcontextprotocol/sdk": "^1.0.4", 13 | "@qdrant/js-client-rest": "^1.12.0", 14 | "axios": "^1.7.9", 15 | "cheerio": "^1.0.0", 16 | "ollama": "^0.5.11", 17 | "openai": "^4.77.0", 18 | "playwright": "^1.49.1" 19 | }, 20 | "bin": { 21 | "mcp-ragdocs": "build/index.js" 22 | }, 23 | "devDependencies": { 24 | "typescript": "^5.7.2" 25 | } 26 | }, 27 | "node_modules/@fastify/busboy": { 28 | "version": "2.1.1", 29 | "resolved": "https://registry.npmjs.org/@fastify/busboy/-/busboy-2.1.1.tgz", 30 | "integrity": "sha512-vBZP4NlzfOlerQTnba4aqZoMhE/a9HY7HRqoOPaETQcSQuWEIyZMHGfVu6w9wGtGK5fED5qRs2DteVCjOH60sA==", 31 | "license": "MIT", 32 | "engines": { 33 | "node": ">=14" 34 | } 35 | }, 36 | "node_modules/@modelcontextprotocol/sdk": { 37 | "version": "1.0.4", 38 | "resolved": "https://registry.npmjs.org/@modelcontextprotocol/sdk/-/sdk-1.0.4.tgz", 39 | "integrity": "sha512-C+jw1lF6HSGzs7EZpzHbXfzz9rj9him4BaoumlTciW/IDDgIpweF/qiCWKlP02QKg5PPcgY6xY2WCt5y2tpYow==", 40 | "license": "MIT", 41 | "dependencies": { 42 | "content-type": "^1.0.5", 43 | "raw-body": "^3.0.0", 44 | "zod": "^3.23.8" 45 | } 46 | }, 47 | "node_modules/@qdrant/js-client-rest": { 48 | "version": "1.12.0", 49 | "resolved": "https://registry.npmjs.org/@qdrant/js-client-rest/-/js-client-rest-1.12.0.tgz", 50 | "integrity": "sha512-H8VokZq2DYe9yfKG3c7xPNR+Oc5ZvwMUtPEr1wUO4xVi9w5P89MScJaCc9UW8mS5AR+/Y1h2t1YjSxBFPIYT2Q==", 51 | "license": "Apache-2.0", 52 | "dependencies": { 53 | "@qdrant/openapi-typescript-fetch": "1.2.6", 54 | "@sevinf/maybe": "0.5.0", 55 | "undici": "~5.28.4" 56 | }, 57 | "engines": { 58 | "node": ">=18.0.0", 59 | "pnpm": ">=8" 60 | }, 61 | "peerDependencies": { 62 | "typescript": ">=4.7" 63 | } 64 | }, 65 | "node_modules/@qdrant/openapi-typescript-fetch": { 66 | "version": "1.2.6", 67 | "resolved": "https://registry.npmjs.org/@qdrant/openapi-typescript-fetch/-/openapi-typescript-fetch-1.2.6.tgz", 68 | "integrity": "sha512-oQG/FejNpItrxRHoyctYvT3rwGZOnK4jr3JdppO/c78ktDvkWiPXPHNsrDf33K9sZdRb6PR7gi4noIapu5q4HA==", 69 | "license": "MIT", 70 | "engines": { 71 | "node": ">=18.0.0", 72 | "pnpm": ">=8" 73 | } 74 | }, 75 | "node_modules/@sevinf/maybe": { 76 | "version": "0.5.0", 77 | "resolved": "https://registry.npmjs.org/@sevinf/maybe/-/maybe-0.5.0.tgz", 78 | "integrity": "sha512-ARhyoYDnY1LES3vYI0fiG6e9esWfTNcXcO6+MPJJXcnyMV3bim4lnFt45VXouV7y82F4x3YH8nOQ6VztuvUiWg==", 79 | "license": "MIT" 80 | }, 81 | "node_modules/@types/node": { 82 | "version": "18.19.68", 83 | "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.68.tgz", 84 | "integrity": "sha512-QGtpFH1vB99ZmTa63K4/FU8twThj4fuVSBkGddTp7uIL/cuoLWIUSL2RcOaigBhfR+hg5pgGkBnkoOxrTVBMKw==", 85 | "license": "MIT", 86 | "dependencies": { 87 | "undici-types": "~5.26.4" 88 | } 89 | }, 90 | "node_modules/@types/node-fetch": { 91 | "version": "2.6.12", 92 | "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.12.tgz", 93 | "integrity": "sha512-8nneRWKCg3rMtF69nLQJnOYUcbafYeFSjqkw3jCRLsqkWFlHaoQrr5mXmofFGOx3DKn7UfmBMyov8ySvLRVldA==", 94 | "license": "MIT", 95 | "dependencies": { 96 | "@types/node": "*", 97 | "form-data": "^4.0.0" 98 | } 99 | }, 100 | "node_modules/abort-controller": { 101 | "version": "3.0.0", 102 | "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz", 103 | "integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==", 104 | "license": "MIT", 105 | "dependencies": { 106 | "event-target-shim": "^5.0.0" 107 | }, 108 | "engines": { 109 | "node": ">=6.5" 110 | } 111 | }, 112 | "node_modules/agentkeepalive": { 113 | "version": "4.5.0", 114 | "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.5.0.tgz", 115 | "integrity": "sha512-5GG/5IbQQpC9FpkRGsSvZI5QYeSCzlJHdpBQntCsuTOxhKD8lqKhrleg2Yi7yvMIf82Ycmmqln9U8V9qwEiJew==", 116 | "license": "MIT", 117 | "dependencies": { 118 | "humanize-ms": "^1.2.1" 119 | }, 120 | "engines": { 121 | "node": ">= 8.0.0" 122 | } 123 | }, 124 | "node_modules/asynckit": { 125 | "version": "0.4.0", 126 | "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", 127 | "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", 128 | "license": "MIT" 129 | }, 130 | "node_modules/axios": { 131 | "version": "1.7.9", 132 | "resolved": "https://registry.npmjs.org/axios/-/axios-1.7.9.tgz", 133 | "integrity": "sha512-LhLcE7Hbiryz8oMDdDptSrWowmB4Bl6RCt6sIJKpRB4XtVf0iEgewX3au/pJqm+Py1kCASkb/FFKjxQaLtxJvw==", 134 | "license": "MIT", 135 | "dependencies": { 136 | "follow-redirects": "^1.15.6", 137 | "form-data": "^4.0.0", 138 | "proxy-from-env": "^1.1.0" 139 | } 140 | }, 141 | "node_modules/boolbase": { 142 | "version": "1.0.0", 143 | "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", 144 | "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==", 145 | "license": "ISC" 146 | }, 147 | "node_modules/bytes": { 148 | "version": "3.1.2", 149 | "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz", 150 | "integrity": "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==", 151 | "license": "MIT", 152 | "engines": { 153 | "node": ">= 0.8" 154 | } 155 | }, 156 | "node_modules/cheerio": { 157 | "version": "1.0.0", 158 | "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.0.0.tgz", 159 | "integrity": "sha512-quS9HgjQpdaXOvsZz82Oz7uxtXiy6UIsIQcpBj7HRw2M63Skasm9qlDocAM7jNuaxdhpPU7c4kJN+gA5MCu4ww==", 160 | "license": "MIT", 161 | "dependencies": { 162 | "cheerio-select": "^2.1.0", 163 | "dom-serializer": "^2.0.0", 164 | "domhandler": "^5.0.3", 165 | "domutils": "^3.1.0", 166 | "encoding-sniffer": "^0.2.0", 167 | "htmlparser2": "^9.1.0", 168 | "parse5": "^7.1.2", 169 | "parse5-htmlparser2-tree-adapter": "^7.0.0", 170 | "parse5-parser-stream": "^7.1.2", 171 | "undici": "^6.19.5", 172 | "whatwg-mimetype": "^4.0.0" 173 | }, 174 | "engines": { 175 | "node": ">=18.17" 176 | }, 177 | "funding": { 178 | "url": "https://github.com/cheeriojs/cheerio?sponsor=1" 179 | } 180 | }, 181 | "node_modules/cheerio-select": { 182 | "version": "2.1.0", 183 | "resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-2.1.0.tgz", 184 | "integrity": "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==", 185 | "license": "BSD-2-Clause", 186 | "dependencies": { 187 | "boolbase": "^1.0.0", 188 | "css-select": "^5.1.0", 189 | "css-what": "^6.1.0", 190 | "domelementtype": "^2.3.0", 191 | "domhandler": "^5.0.3", 192 | "domutils": "^3.0.1" 193 | }, 194 | "funding": { 195 | "url": "https://github.com/sponsors/fb55" 196 | } 197 | }, 198 | "node_modules/cheerio/node_modules/undici": { 199 | "version": "6.21.0", 200 | "resolved": "https://registry.npmjs.org/undici/-/undici-6.21.0.tgz", 201 | "integrity": "sha512-BUgJXc752Kou3oOIuU1i+yZZypyZRqNPW0vqoMPl8VaoalSfeR0D8/t4iAS3yirs79SSMTxTag+ZC86uswv+Cw==", 202 | "license": "MIT", 203 | "engines": { 204 | "node": ">=18.17" 205 | } 206 | }, 207 | "node_modules/combined-stream": { 208 | "version": "1.0.8", 209 | "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", 210 | "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", 211 | "license": "MIT", 212 | "dependencies": { 213 | "delayed-stream": "~1.0.0" 214 | }, 215 | "engines": { 216 | "node": ">= 0.8" 217 | } 218 | }, 219 | "node_modules/content-type": { 220 | "version": "1.0.5", 221 | "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz", 222 | "integrity": "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==", 223 | "license": "MIT", 224 | "engines": { 225 | "node": ">= 0.6" 226 | } 227 | }, 228 | "node_modules/css-select": { 229 | "version": "5.1.0", 230 | "resolved": "https://registry.npmjs.org/css-select/-/css-select-5.1.0.tgz", 231 | "integrity": "sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==", 232 | "license": "BSD-2-Clause", 233 | "dependencies": { 234 | "boolbase": "^1.0.0", 235 | "css-what": "^6.1.0", 236 | "domhandler": "^5.0.2", 237 | "domutils": "^3.0.1", 238 | "nth-check": "^2.0.1" 239 | }, 240 | "funding": { 241 | "url": "https://github.com/sponsors/fb55" 242 | } 243 | }, 244 | "node_modules/css-what": { 245 | "version": "6.1.0", 246 | "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.1.0.tgz", 247 | "integrity": "sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==", 248 | "license": "BSD-2-Clause", 249 | "engines": { 250 | "node": ">= 6" 251 | }, 252 | "funding": { 253 | "url": "https://github.com/sponsors/fb55" 254 | } 255 | }, 256 | "node_modules/delayed-stream": { 257 | "version": "1.0.0", 258 | "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", 259 | "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", 260 | "license": "MIT", 261 | "engines": { 262 | "node": ">=0.4.0" 263 | } 264 | }, 265 | "node_modules/depd": { 266 | "version": "2.0.0", 267 | "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz", 268 | "integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==", 269 | "license": "MIT", 270 | "engines": { 271 | "node": ">= 0.8" 272 | } 273 | }, 274 | "node_modules/dom-serializer": { 275 | "version": "2.0.0", 276 | "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz", 277 | "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==", 278 | "license": "MIT", 279 | "dependencies": { 280 | "domelementtype": "^2.3.0", 281 | "domhandler": "^5.0.2", 282 | "entities": "^4.2.0" 283 | }, 284 | "funding": { 285 | "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1" 286 | } 287 | }, 288 | "node_modules/domelementtype": { 289 | "version": "2.3.0", 290 | "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", 291 | "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==", 292 | "funding": [ 293 | { 294 | "type": "github", 295 | "url": "https://github.com/sponsors/fb55" 296 | } 297 | ], 298 | "license": "BSD-2-Clause" 299 | }, 300 | "node_modules/domhandler": { 301 | "version": "5.0.3", 302 | "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", 303 | "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", 304 | "license": "BSD-2-Clause", 305 | "dependencies": { 306 | "domelementtype": "^2.3.0" 307 | }, 308 | "engines": { 309 | "node": ">= 4" 310 | }, 311 | "funding": { 312 | "url": "https://github.com/fb55/domhandler?sponsor=1" 313 | } 314 | }, 315 | "node_modules/domutils": { 316 | "version": "3.1.0", 317 | "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.1.0.tgz", 318 | "integrity": "sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==", 319 | "license": "BSD-2-Clause", 320 | "dependencies": { 321 | "dom-serializer": "^2.0.0", 322 | "domelementtype": "^2.3.0", 323 | "domhandler": "^5.0.3" 324 | }, 325 | "funding": { 326 | "url": "https://github.com/fb55/domutils?sponsor=1" 327 | } 328 | }, 329 | "node_modules/encoding-sniffer": { 330 | "version": "0.2.0", 331 | "resolved": "https://registry.npmjs.org/encoding-sniffer/-/encoding-sniffer-0.2.0.tgz", 332 | "integrity": "sha512-ju7Wq1kg04I3HtiYIOrUrdfdDvkyO9s5XM8QAj/bN61Yo/Vb4vgJxy5vi4Yxk01gWHbrofpPtpxM8bKger9jhg==", 333 | "license": "MIT", 334 | "dependencies": { 335 | "iconv-lite": "^0.6.3", 336 | "whatwg-encoding": "^3.1.1" 337 | }, 338 | "funding": { 339 | "url": "https://github.com/fb55/encoding-sniffer?sponsor=1" 340 | } 341 | }, 342 | "node_modules/entities": { 343 | "version": "4.5.0", 344 | "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", 345 | "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", 346 | "license": "BSD-2-Clause", 347 | "engines": { 348 | "node": ">=0.12" 349 | }, 350 | "funding": { 351 | "url": "https://github.com/fb55/entities?sponsor=1" 352 | } 353 | }, 354 | "node_modules/event-target-shim": { 355 | "version": "5.0.1", 356 | "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz", 357 | "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==", 358 | "license": "MIT", 359 | "engines": { 360 | "node": ">=6" 361 | } 362 | }, 363 | "node_modules/follow-redirects": { 364 | "version": "1.15.9", 365 | "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.9.tgz", 366 | "integrity": "sha512-gew4GsXizNgdoRyqmyfMHyAmXsZDk6mHkSxZFCzW9gwlbtOW44CDtYavM+y+72qD/Vq2l550kMF52DT8fOLJqQ==", 367 | "funding": [ 368 | { 369 | "type": "individual", 370 | "url": "https://github.com/sponsors/RubenVerborgh" 371 | } 372 | ], 373 | "license": "MIT", 374 | "engines": { 375 | "node": ">=4.0" 376 | }, 377 | "peerDependenciesMeta": { 378 | "debug": { 379 | "optional": true 380 | } 381 | } 382 | }, 383 | "node_modules/form-data": { 384 | "version": "4.0.1", 385 | "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.1.tgz", 386 | "integrity": "sha512-tzN8e4TX8+kkxGPK8D5u0FNmjPUjw3lwC9lSLxxoB/+GtsJG91CO8bSWy73APlgAZzZbXEYZJuxjkHH2w+Ezhw==", 387 | "license": "MIT", 388 | "dependencies": { 389 | "asynckit": "^0.4.0", 390 | "combined-stream": "^1.0.8", 391 | "mime-types": "^2.1.12" 392 | }, 393 | "engines": { 394 | "node": ">= 6" 395 | } 396 | }, 397 | "node_modules/form-data-encoder": { 398 | "version": "1.7.2", 399 | "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz", 400 | "integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==", 401 | "license": "MIT" 402 | }, 403 | "node_modules/formdata-node": { 404 | "version": "4.4.1", 405 | "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz", 406 | "integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==", 407 | "license": "MIT", 408 | "dependencies": { 409 | "node-domexception": "1.0.0", 410 | "web-streams-polyfill": "4.0.0-beta.3" 411 | }, 412 | "engines": { 413 | "node": ">= 12.20" 414 | } 415 | }, 416 | "node_modules/fsevents": { 417 | "version": "2.3.2", 418 | "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", 419 | "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", 420 | "hasInstallScript": true, 421 | "license": "MIT", 422 | "optional": true, 423 | "os": [ 424 | "darwin" 425 | ], 426 | "engines": { 427 | "node": "^8.16.0 || ^10.6.0 || >=11.0.0" 428 | } 429 | }, 430 | "node_modules/htmlparser2": { 431 | "version": "9.1.0", 432 | "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-9.1.0.tgz", 433 | "integrity": "sha512-5zfg6mHUoaer/97TxnGpxmbR7zJtPwIYFMZ/H5ucTlPZhKvtum05yiPK3Mgai3a0DyVxv7qYqoweaEd2nrYQzQ==", 434 | "funding": [ 435 | "https://github.com/fb55/htmlparser2?sponsor=1", 436 | { 437 | "type": "github", 438 | "url": "https://github.com/sponsors/fb55" 439 | } 440 | ], 441 | "license": "MIT", 442 | "dependencies": { 443 | "domelementtype": "^2.3.0", 444 | "domhandler": "^5.0.3", 445 | "domutils": "^3.1.0", 446 | "entities": "^4.5.0" 447 | } 448 | }, 449 | "node_modules/http-errors": { 450 | "version": "2.0.0", 451 | "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.0.tgz", 452 | "integrity": "sha512-FtwrG/euBzaEjYeRqOgly7G0qviiXoJWnvEH2Z1plBdXgbyjv34pHTSb9zoeHMyDy33+DWy5Wt9Wo+TURtOYSQ==", 453 | "license": "MIT", 454 | "dependencies": { 455 | "depd": "2.0.0", 456 | "inherits": "2.0.4", 457 | "setprototypeof": "1.2.0", 458 | "statuses": "2.0.1", 459 | "toidentifier": "1.0.1" 460 | }, 461 | "engines": { 462 | "node": ">= 0.8" 463 | } 464 | }, 465 | "node_modules/humanize-ms": { 466 | "version": "1.2.1", 467 | "resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz", 468 | "integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==", 469 | "license": "MIT", 470 | "dependencies": { 471 | "ms": "^2.0.0" 472 | } 473 | }, 474 | "node_modules/iconv-lite": { 475 | "version": "0.6.3", 476 | "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", 477 | "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", 478 | "license": "MIT", 479 | "dependencies": { 480 | "safer-buffer": ">= 2.1.2 < 3.0.0" 481 | }, 482 | "engines": { 483 | "node": ">=0.10.0" 484 | } 485 | }, 486 | "node_modules/inherits": { 487 | "version": "2.0.4", 488 | "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", 489 | "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", 490 | "license": "ISC" 491 | }, 492 | "node_modules/mime-db": { 493 | "version": "1.52.0", 494 | "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", 495 | "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", 496 | "license": "MIT", 497 | "engines": { 498 | "node": ">= 0.6" 499 | } 500 | }, 501 | "node_modules/mime-types": { 502 | "version": "2.1.35", 503 | "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", 504 | "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", 505 | "license": "MIT", 506 | "dependencies": { 507 | "mime-db": "1.52.0" 508 | }, 509 | "engines": { 510 | "node": ">= 0.6" 511 | } 512 | }, 513 | "node_modules/ms": { 514 | "version": "2.1.3", 515 | "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", 516 | "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", 517 | "license": "MIT" 518 | }, 519 | "node_modules/node-domexception": { 520 | "version": "1.0.0", 521 | "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz", 522 | "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==", 523 | "funding": [ 524 | { 525 | "type": "github", 526 | "url": "https://github.com/sponsors/jimmywarting" 527 | }, 528 | { 529 | "type": "github", 530 | "url": "https://paypal.me/jimmywarting" 531 | } 532 | ], 533 | "license": "MIT", 534 | "engines": { 535 | "node": ">=10.5.0" 536 | } 537 | }, 538 | "node_modules/node-fetch": { 539 | "version": "2.7.0", 540 | "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", 541 | "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", 542 | "license": "MIT", 543 | "dependencies": { 544 | "whatwg-url": "^5.0.0" 545 | }, 546 | "engines": { 547 | "node": "4.x || >=6.0.0" 548 | }, 549 | "peerDependencies": { 550 | "encoding": "^0.1.0" 551 | }, 552 | "peerDependenciesMeta": { 553 | "encoding": { 554 | "optional": true 555 | } 556 | } 557 | }, 558 | "node_modules/nth-check": { 559 | "version": "2.1.1", 560 | "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz", 561 | "integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==", 562 | "license": "BSD-2-Clause", 563 | "dependencies": { 564 | "boolbase": "^1.0.0" 565 | }, 566 | "funding": { 567 | "url": "https://github.com/fb55/nth-check?sponsor=1" 568 | } 569 | }, 570 | "node_modules/ollama": { 571 | "version": "0.5.11", 572 | "resolved": "https://registry.npmjs.org/ollama/-/ollama-0.5.11.tgz", 573 | "integrity": "sha512-lDAKcpmBU3VAOGF05NcQipHNKTdpKfAHpZ7bjCsElkUkmX7SNZImi6lwIxz/l1zQtLq0S3wuLneRuiXxX2KIew==", 574 | "license": "MIT", 575 | "dependencies": { 576 | "whatwg-fetch": "^3.6.20" 577 | } 578 | }, 579 | "node_modules/openai": { 580 | "version": "4.77.0", 581 | "resolved": "https://registry.npmjs.org/openai/-/openai-4.77.0.tgz", 582 | "integrity": "sha512-WWacavtns/7pCUkOWvQIjyOfcdr9X+9n9Vvb0zFeKVDAqwCMDHB+iSr24SVaBAhplvSG6JrRXFpcNM9gWhOGIw==", 583 | "license": "Apache-2.0", 584 | "dependencies": { 585 | "@types/node": "^18.11.18", 586 | "@types/node-fetch": "^2.6.4", 587 | "abort-controller": "^3.0.0", 588 | "agentkeepalive": "^4.2.1", 589 | "form-data-encoder": "1.7.2", 590 | "formdata-node": "^4.3.2", 591 | "node-fetch": "^2.6.7" 592 | }, 593 | "bin": { 594 | "openai": "bin/cli" 595 | }, 596 | "peerDependencies": { 597 | "zod": "^3.23.8" 598 | }, 599 | "peerDependenciesMeta": { 600 | "zod": { 601 | "optional": true 602 | } 603 | } 604 | }, 605 | "node_modules/parse5": { 606 | "version": "7.2.1", 607 | "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.2.1.tgz", 608 | "integrity": "sha512-BuBYQYlv1ckiPdQi/ohiivi9Sagc9JG+Ozs0r7b/0iK3sKmrb0b9FdWdBbOdx6hBCM/F9Ir82ofnBhtZOjCRPQ==", 609 | "license": "MIT", 610 | "dependencies": { 611 | "entities": "^4.5.0" 612 | }, 613 | "funding": { 614 | "url": "https://github.com/inikulin/parse5?sponsor=1" 615 | } 616 | }, 617 | "node_modules/parse5-htmlparser2-tree-adapter": { 618 | "version": "7.1.0", 619 | "resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.1.0.tgz", 620 | "integrity": "sha512-ruw5xyKs6lrpo9x9rCZqZZnIUntICjQAd0Wsmp396Ul9lN/h+ifgVV1x1gZHi8euej6wTfpqX8j+BFQxF0NS/g==", 621 | "license": "MIT", 622 | "dependencies": { 623 | "domhandler": "^5.0.3", 624 | "parse5": "^7.0.0" 625 | }, 626 | "funding": { 627 | "url": "https://github.com/inikulin/parse5?sponsor=1" 628 | } 629 | }, 630 | "node_modules/parse5-parser-stream": { 631 | "version": "7.1.2", 632 | "resolved": "https://registry.npmjs.org/parse5-parser-stream/-/parse5-parser-stream-7.1.2.tgz", 633 | "integrity": "sha512-JyeQc9iwFLn5TbvvqACIF/VXG6abODeB3Fwmv/TGdLk2LfbWkaySGY72at4+Ty7EkPZj854u4CrICqNk2qIbow==", 634 | "license": "MIT", 635 | "dependencies": { 636 | "parse5": "^7.0.0" 637 | }, 638 | "funding": { 639 | "url": "https://github.com/inikulin/parse5?sponsor=1" 640 | } 641 | }, 642 | "node_modules/playwright": { 643 | "version": "1.49.1", 644 | "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.49.1.tgz", 645 | "integrity": "sha512-VYL8zLoNTBxVOrJBbDuRgDWa3i+mfQgDTrL8Ah9QXZ7ax4Dsj0MSq5bYgytRnDVVe+njoKnfsYkH3HzqVj5UZA==", 646 | "license": "Apache-2.0", 647 | "dependencies": { 648 | "playwright-core": "1.49.1" 649 | }, 650 | "bin": { 651 | "playwright": "cli.js" 652 | }, 653 | "engines": { 654 | "node": ">=18" 655 | }, 656 | "optionalDependencies": { 657 | "fsevents": "2.3.2" 658 | } 659 | }, 660 | "node_modules/playwright-core": { 661 | "version": "1.49.1", 662 | "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.49.1.tgz", 663 | "integrity": "sha512-BzmpVcs4kE2CH15rWfzpjzVGhWERJfmnXmniSyKeRZUs9Ws65m+RGIi7mjJK/euCegfn3i7jvqWeWyHe9y3Vgg==", 664 | "license": "Apache-2.0", 665 | "bin": { 666 | "playwright-core": "cli.js" 667 | }, 668 | "engines": { 669 | "node": ">=18" 670 | } 671 | }, 672 | "node_modules/proxy-from-env": { 673 | "version": "1.1.0", 674 | "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", 675 | "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==", 676 | "license": "MIT" 677 | }, 678 | "node_modules/raw-body": { 679 | "version": "3.0.0", 680 | "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-3.0.0.tgz", 681 | "integrity": "sha512-RmkhL8CAyCRPXCE28MMH0z2PNWQBNk2Q09ZdxM9IOOXwxwZbN+qbWaatPkdkWIKL2ZVDImrN/pK5HTRz2PcS4g==", 682 | "license": "MIT", 683 | "dependencies": { 684 | "bytes": "3.1.2", 685 | "http-errors": "2.0.0", 686 | "iconv-lite": "0.6.3", 687 | "unpipe": "1.0.0" 688 | }, 689 | "engines": { 690 | "node": ">= 0.8" 691 | } 692 | }, 693 | "node_modules/safer-buffer": { 694 | "version": "2.1.2", 695 | "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", 696 | "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", 697 | "license": "MIT" 698 | }, 699 | "node_modules/setprototypeof": { 700 | "version": "1.2.0", 701 | "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz", 702 | "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==", 703 | "license": "ISC" 704 | }, 705 | "node_modules/statuses": { 706 | "version": "2.0.1", 707 | "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.1.tgz", 708 | "integrity": "sha512-RwNA9Z/7PrK06rYLIzFMlaF+l73iwpzsqRIFgbMLbTcLD6cOao82TaWefPXQvB2fOC4AjuYSEndS7N/mTCbkdQ==", 709 | "license": "MIT", 710 | "engines": { 711 | "node": ">= 0.8" 712 | } 713 | }, 714 | "node_modules/toidentifier": { 715 | "version": "1.0.1", 716 | "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz", 717 | "integrity": "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==", 718 | "license": "MIT", 719 | "engines": { 720 | "node": ">=0.6" 721 | } 722 | }, 723 | "node_modules/tr46": { 724 | "version": "0.0.3", 725 | "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", 726 | "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", 727 | "license": "MIT" 728 | }, 729 | "node_modules/typescript": { 730 | "version": "5.7.2", 731 | "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.7.2.tgz", 732 | "integrity": "sha512-i5t66RHxDvVN40HfDd1PsEThGNnlMCMT3jMUuoh9/0TaqWevNontacunWyN02LA9/fIbEWlcHZcgTKb9QoaLfg==", 733 | "license": "Apache-2.0", 734 | "bin": { 735 | "tsc": "bin/tsc", 736 | "tsserver": "bin/tsserver" 737 | }, 738 | "engines": { 739 | "node": ">=14.17" 740 | } 741 | }, 742 | "node_modules/undici": { 743 | "version": "5.28.4", 744 | "resolved": "https://registry.npmjs.org/undici/-/undici-5.28.4.tgz", 745 | "integrity": "sha512-72RFADWFqKmUb2hmmvNODKL3p9hcB6Gt2DOQMis1SEBaV6a4MH8soBvzg+95CYhCKPFedut2JY9bMfrDl9D23g==", 746 | "license": "MIT", 747 | "dependencies": { 748 | "@fastify/busboy": "^2.0.0" 749 | }, 750 | "engines": { 751 | "node": ">=14.0" 752 | } 753 | }, 754 | "node_modules/undici-types": { 755 | "version": "5.26.5", 756 | "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", 757 | "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", 758 | "license": "MIT" 759 | }, 760 | "node_modules/unpipe": { 761 | "version": "1.0.0", 762 | "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", 763 | "integrity": "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==", 764 | "license": "MIT", 765 | "engines": { 766 | "node": ">= 0.8" 767 | } 768 | }, 769 | "node_modules/web-streams-polyfill": { 770 | "version": "4.0.0-beta.3", 771 | "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz", 772 | "integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==", 773 | "license": "MIT", 774 | "engines": { 775 | "node": ">= 14" 776 | } 777 | }, 778 | "node_modules/webidl-conversions": { 779 | "version": "3.0.1", 780 | "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", 781 | "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==", 782 | "license": "BSD-2-Clause" 783 | }, 784 | "node_modules/whatwg-encoding": { 785 | "version": "3.1.1", 786 | "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz", 787 | "integrity": "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==", 788 | "license": "MIT", 789 | "dependencies": { 790 | "iconv-lite": "0.6.3" 791 | }, 792 | "engines": { 793 | "node": ">=18" 794 | } 795 | }, 796 | "node_modules/whatwg-fetch": { 797 | "version": "3.6.20", 798 | "resolved": "https://registry.npmjs.org/whatwg-fetch/-/whatwg-fetch-3.6.20.tgz", 799 | "integrity": "sha512-EqhiFU6daOA8kpjOWTL0olhVOF3i7OrFzSYiGsEMB8GcXS+RrzauAERX65xMeNWVqxA6HXH2m69Z9LaKKdisfg==", 800 | "license": "MIT" 801 | }, 802 | "node_modules/whatwg-mimetype": { 803 | "version": "4.0.0", 804 | "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz", 805 | "integrity": "sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==", 806 | "license": "MIT", 807 | "engines": { 808 | "node": ">=18" 809 | } 810 | }, 811 | "node_modules/whatwg-url": { 812 | "version": "5.0.0", 813 | "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", 814 | "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", 815 | "license": "MIT", 816 | "dependencies": { 817 | "tr46": "~0.0.3", 818 | "webidl-conversions": "^3.0.0" 819 | } 820 | }, 821 | "node_modules/zod": { 822 | "version": "3.24.1", 823 | "resolved": "https://registry.npmjs.org/zod/-/zod-3.24.1.tgz", 824 | "integrity": "sha512-muH7gBL9sI1nciMZV67X5fTKKBLtwpZ5VBp1vsOQzj1MhrBZ4wlVCm3gedKZWLp0Oyel8sIGfeiz54Su+OVT+A==", 825 | "license": "MIT", 826 | "funding": { 827 | "url": "https://github.com/sponsors/colinhacks" 828 | } 829 | } 830 | } 831 | } 832 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@mcpservers/ragdocs", 3 | "version": "1.0.0", 4 | "type": "module", 5 | "main": "build/index.js", 6 | "bin": { 7 | "mcp-ragdocs": "build/index.js" 8 | }, 9 | "files": [ 10 | "build/**/*" 11 | ], 12 | "scripts": { 13 | "build": "tsc && node -e \"require('fs').chmodSync('build/index.js', '755')\"", 14 | "prepublishOnly": "npm run build", 15 | "test": "echo \"Error: no test specified\" && exit 1" 16 | }, 17 | "keywords": [ 18 | "mcp", 19 | "rag", 20 | "documentation", 21 | "search", 22 | "embeddings" 23 | ], 24 | "author": "bossying", 25 | "license": "Apache License 2.0", 26 | "description": "MCP server for RAG-based document search and management", 27 | "homepage": "https://github.com/heltonteixeira/ragdocs", 28 | "repository": { 29 | "type": "git", 30 | "url": "git+https://github.com/heltonteixeira/ragdocs.git" 31 | }, 32 | "dependencies": { 33 | "@modelcontextprotocol/sdk": "^1.0.4", 34 | "@qdrant/js-client-rest": "^1.12.0", 35 | "axios": "^1.7.9", 36 | "cheerio": "^1.0.0", 37 | "ollama": "^0.5.11", 38 | "openai": "^4.77.0", 39 | "playwright": "^1.49.1" 40 | }, 41 | "devDependencies": { 42 | "typescript": "^5.7.2" 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/api-client.ts: -------------------------------------------------------------------------------- 1 | import { QdrantClient } from '@qdrant/js-client-rest'; 2 | import { chromium } from 'playwright'; 3 | import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js'; 4 | import { EmbeddingService } from './embeddings.js'; 5 | import { QdrantWrapper } from './tools/qdrant-client.js'; 6 | import { Document } from './types.js'; 7 | 8 | export interface QdrantCollectionConfig { 9 | params: { 10 | vectors: { 11 | size: number; 12 | distance: string; 13 | }; 14 | }; 15 | } 16 | 17 | export interface QdrantCollectionInfo { 18 | config: QdrantCollectionConfig; 19 | } 20 | 21 | export class ApiClient { 22 | qdrantClient: QdrantClient; 23 | private embeddingService: EmbeddingService; 24 | readonly qdrant: QdrantWrapper; 25 | browser: any; 26 | 27 | constructor(config: { 28 | embeddingConfig: { 29 | provider: 'ollama' | 'openai'; 30 | apiKey?: string; 31 | model?: string; 32 | }; 33 | qdrantUrl?: string; 34 | qdrantApiKey?: string; 35 | }) { 36 | this.embeddingService = EmbeddingService.createFromConfig(config.embeddingConfig); 37 | 38 | this.qdrant = new QdrantWrapper(config.qdrantUrl, config.qdrantApiKey); 39 | this.qdrantClient = this.qdrant.client; 40 | } 41 | 42 | async initBrowser() { 43 | if (!this.browser) { 44 | this.browser = await chromium.launch(); 45 | } 46 | } 47 | 48 | async cleanup() { 49 | if (this.browser) { 50 | await this.browser.close(); 51 | } 52 | } 53 | 54 | async getEmbeddings(text: string): Promise { 55 | return this.embeddingService.generateEmbeddings(text); 56 | } 57 | 58 | get embeddings(): EmbeddingService { 59 | return this.embeddingService; 60 | } 61 | 62 | async initCollection(collectionName: string) { 63 | try { 64 | const collections = await this.qdrantClient.getCollections(); 65 | const exists = collections.collections.some(c => c.name === collectionName); 66 | 67 | const requiredVectorSize = this.embeddingService.getVectorSize(); 68 | 69 | if (!exists) { 70 | console.error(`Creating new collection with vector size ${requiredVectorSize}`); 71 | await this.createCollection(collectionName, requiredVectorSize); 72 | return; 73 | } 74 | 75 | // Verify vector size of existing collection 76 | const collectionInfo = await this.qdrantClient.getCollection(collectionName) as QdrantCollectionInfo; 77 | const currentVectorSize = collectionInfo.config?.params?.vectors?.size; 78 | 79 | if (!currentVectorSize) { 80 | console.error('Could not determine current vector size, recreating collection...'); 81 | await this.recreateCollection(collectionName, requiredVectorSize); 82 | return; 83 | } 84 | 85 | if (currentVectorSize !== requiredVectorSize) { 86 | console.error(`Vector size mismatch: collection=${currentVectorSize}, required=${requiredVectorSize}`); 87 | await this.recreateCollection(collectionName, requiredVectorSize); 88 | } 89 | } catch (error) { 90 | if (error instanceof Error) { 91 | if (error.message.includes('unauthorized')) { 92 | throw new McpError( 93 | ErrorCode.InvalidRequest, 94 | 'Failed to authenticate with Qdrant. Please check your API key.' 95 | ); 96 | } else if (error.message.includes('ECONNREFUSED') || error.message.includes('ETIMEDOUT')) { 97 | throw new McpError( 98 | ErrorCode.InternalError, 99 | 'Failed to connect to Qdrant. Please check your QDRANT_URL.' 100 | ); 101 | } 102 | } 103 | throw new McpError( 104 | ErrorCode.InternalError, 105 | `Failed to initialize Qdrant collection: ${error}` 106 | ); 107 | } 108 | } 109 | 110 | private async createCollection(collectionName: string, vectorSize: number) { 111 | await this.qdrantClient.createCollection(collectionName, { 112 | vectors: { 113 | size: vectorSize, 114 | distance: 'Cosine', 115 | }, 116 | optimizers_config: { 117 | default_segment_number: 2, 118 | memmap_threshold: 20000, 119 | }, 120 | replication_factor: 2, 121 | }); 122 | 123 | // Create indexes for efficient filtering 124 | await this.qdrantClient.createPayloadIndex(collectionName, { 125 | field_name: 'url', 126 | field_schema: 'keyword', 127 | }); 128 | 129 | await this.qdrantClient.createPayloadIndex(collectionName, { 130 | field_name: 'timestamp', 131 | field_schema: 'datetime', 132 | }); 133 | } 134 | 135 | private async recreateCollection(collectionName: string, vectorSize: number) { 136 | try { 137 | console.error('Recreating collection with new vector size...'); 138 | await this.qdrantClient.deleteCollection(collectionName); 139 | await this.createCollection(collectionName, vectorSize); 140 | console.error(`Collection recreated with new vector size ${vectorSize}`); 141 | } catch (error) { 142 | throw new McpError( 143 | ErrorCode.InternalError, 144 | `Failed to recreate collection: ${error}` 145 | ); 146 | } 147 | } 148 | 149 | async isHealthy(): Promise { 150 | try { 151 | await this.qdrantClient.getCollections(); 152 | return true; 153 | } catch { 154 | return false; 155 | } 156 | } 157 | 158 | async addDocument(doc: Document): Promise { 159 | try { 160 | // Check if document already exists 161 | if (await this.qdrant.documentExists(doc.url)) { 162 | throw new McpError( 163 | ErrorCode.InvalidRequest, 164 | `Document with URL ${doc.url} already exists` 165 | ); 166 | } 167 | 168 | // Generate embeddings for the content 169 | const embedding = await this.embeddingService.generateEmbeddings(doc.content); 170 | 171 | // Store document in Qdrant 172 | await this.qdrant.storeDocumentChunks( 173 | [{ 174 | content: doc.content, 175 | index: 0, 176 | metadata: { 177 | startPosition: 0, 178 | endPosition: doc.content.length, 179 | isCodeBlock: /```/.test(doc.content) 180 | } 181 | }], 182 | [embedding], 183 | { 184 | url: doc.url, 185 | title: doc.metadata.title || '', 186 | domain: new URL(doc.url).hostname, 187 | timestamp: new Date().toISOString(), 188 | contentType: doc.metadata.contentType || 'text/plain', 189 | wordCount: doc.content.split(/\s+/).length, 190 | hasCode: /```|\bfunction\b|\bclass\b|\bconst\b|\blet\b|\bvar\b/.test(doc.content), 191 | } 192 | ); 193 | } catch (error) { 194 | throw new McpError( 195 | ErrorCode.InternalError, 196 | `Failed to add document: ${error}` 197 | ); 198 | } 199 | } 200 | 201 | async deleteDocument(url: string): Promise { 202 | try { 203 | await this.qdrant.removeDocument(url); 204 | } catch (error) { 205 | throw new McpError( 206 | ErrorCode.InternalError, 207 | `Failed to delete document: ${error}` 208 | ); 209 | } 210 | } 211 | } 212 | -------------------------------------------------------------------------------- /src/embeddings.ts: -------------------------------------------------------------------------------- 1 | import ollama from 'ollama'; 2 | import OpenAI from 'openai'; 3 | import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js'; 4 | 5 | export interface EmbeddingProvider { 6 | generateEmbeddings(text: string): Promise; 7 | getVectorSize(): number; 8 | } 9 | 10 | export class OllamaProvider implements EmbeddingProvider { 11 | private model: string; 12 | 13 | constructor(model: string = 'nomic-embed-text') { 14 | this.model = model; 15 | } 16 | 17 | async generateEmbeddings(text: string): Promise { 18 | try { 19 | console.error('Generating Ollama embeddings for text:', text.substring(0, 50) + '...'); 20 | const response = await ollama.embeddings({ 21 | model: this.model, 22 | prompt: text 23 | }); 24 | console.error('Successfully generated Ollama embeddings with size:', response.embedding.length); 25 | return response.embedding; 26 | } catch (error) { 27 | console.error('Ollama embedding error:', error); 28 | throw new McpError( 29 | ErrorCode.InternalError, 30 | `Failed to generate embeddings with Ollama: ${error}` 31 | ); 32 | } 33 | } 34 | 35 | getVectorSize(): number { 36 | // nomic-embed-text produces 768-dimensional vectors 37 | return 768; 38 | } 39 | } 40 | 41 | export class OpenAIProvider implements EmbeddingProvider { 42 | private client: OpenAI; 43 | private model: string; 44 | 45 | constructor(apiKey: string, model: string = 'text-embedding-3-small') { 46 | this.client = new OpenAI({ apiKey }); 47 | this.model = model; 48 | } 49 | 50 | async generateEmbeddings(text: string): Promise { 51 | try { 52 | console.error('Generating OpenAI embeddings for text:', text.substring(0, 50) + '...'); 53 | const response = await this.client.embeddings.create({ 54 | model: this.model, 55 | input: text, 56 | }); 57 | const embedding = response.data[0].embedding; 58 | console.error('Successfully generated OpenAI embeddings with size:', embedding.length); 59 | return embedding; 60 | } catch (error) { 61 | console.error('OpenAI embedding error:', error); 62 | throw new McpError( 63 | ErrorCode.InternalError, 64 | `Failed to generate embeddings with OpenAI: ${error}` 65 | ); 66 | } 67 | } 68 | 69 | getVectorSize(): number { 70 | // text-embedding-3-small produces 1536-dimensional vectors 71 | return 1536; 72 | } 73 | } 74 | 75 | export class EmbeddingService { 76 | private provider: EmbeddingProvider; 77 | 78 | constructor(provider: EmbeddingProvider) { 79 | this.provider = provider; 80 | } 81 | 82 | async generateEmbeddings(text: string): Promise { 83 | return this.provider.generateEmbeddings(text); 84 | } 85 | 86 | getVectorSize(): number { 87 | return this.provider.getVectorSize(); 88 | } 89 | 90 | static createFromConfig(config: { 91 | provider: 'ollama' | 'openai'; 92 | apiKey?: string; 93 | model?: string; 94 | }): EmbeddingService { 95 | switch (config.provider) { 96 | case 'ollama': 97 | return new EmbeddingService(new OllamaProvider(config.model)); 98 | case 'openai': 99 | if (!config.apiKey) { 100 | throw new McpError( 101 | ErrorCode.InvalidRequest, 102 | 'OpenAI API key is required' 103 | ); 104 | } 105 | return new EmbeddingService(new OpenAIProvider(config.apiKey, config.model)); 106 | default: 107 | throw new McpError( 108 | ErrorCode.InvalidRequest, 109 | `Unknown embedding provider: ${config.provider}` 110 | ); 111 | } 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /src/handlers/add-documentation.ts: -------------------------------------------------------------------------------- 1 | import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js'; 2 | import { Server } from '@modelcontextprotocol/sdk/server/index.js'; 3 | import { BaseHandler } from './base-handler.js'; 4 | import { ApiClient } from '../api-client.js'; 5 | import { DocumentChunk, ToolResult } from '../types.js'; 6 | import * as cheerio from 'cheerio'; 7 | import crypto from 'crypto'; 8 | 9 | const COLLECTION_NAME = 'documentation'; 10 | const BATCH_SIZE = 100; 11 | 12 | export class AddDocumentationHandler extends BaseHandler { 13 | constructor(server: Server, apiClient: ApiClient) { 14 | super(server, apiClient); 15 | } 16 | 17 | async handle(args: any): Promise { 18 | if (!args.url || typeof args.url !== 'string') { 19 | throw new McpError(ErrorCode.InvalidParams, 'URL is required'); 20 | } 21 | 22 | try { 23 | const chunks = await this.fetchAndProcessUrl(args.url); 24 | 25 | // Batch process chunks for better performance 26 | for (let i = 0; i < chunks.length; i += BATCH_SIZE) { 27 | const batch = chunks.slice(i, i + BATCH_SIZE); 28 | const points = await Promise.all( 29 | batch.map(async (chunk) => { 30 | const embedding = await this.apiClient.getEmbeddings(chunk.text); 31 | return { 32 | id: this.generatePointId(), 33 | vector: embedding, 34 | payload: { 35 | ...chunk, 36 | _type: 'DocumentChunk' as const, 37 | } as Record, 38 | }; 39 | }) 40 | ); 41 | 42 | try { 43 | await this.apiClient.qdrantClient.upsert(COLLECTION_NAME, { 44 | wait: true, 45 | points, 46 | }); 47 | } catch (error) { 48 | if (error instanceof Error) { 49 | if (error.message.includes('unauthorized')) { 50 | throw new McpError( 51 | ErrorCode.InvalidRequest, 52 | 'Failed to authenticate with Qdrant cloud while adding documents' 53 | ); 54 | } else if (error.message.includes('ECONNREFUSED') || error.message.includes('ETIMEDOUT')) { 55 | throw new McpError( 56 | ErrorCode.InternalError, 57 | 'Connection to Qdrant cloud failed while adding documents' 58 | ); 59 | } 60 | } 61 | throw error; 62 | } 63 | } 64 | 65 | return { 66 | content: [ 67 | { 68 | type: 'text', 69 | text: `Successfully added documentation from ${args.url} (${chunks.length} chunks processed in ${Math.ceil(chunks.length / BATCH_SIZE)} batches)`, 70 | }, 71 | ], 72 | }; 73 | } catch (error) { 74 | if (error instanceof McpError) { 75 | throw error; 76 | } 77 | return { 78 | content: [ 79 | { 80 | type: 'text', 81 | text: `Failed to add documentation: ${error}`, 82 | }, 83 | ], 84 | isError: true, 85 | }; 86 | } 87 | } 88 | 89 | private async fetchAndProcessUrl(url: string): Promise { 90 | await this.apiClient.initBrowser(); 91 | const page = await this.apiClient.browser.newPage(); 92 | 93 | try { 94 | await page.goto(url, { waitUntil: 'networkidle' }); 95 | const content = await page.content(); 96 | const $ = cheerio.load(content); 97 | 98 | // Remove script tags, style tags, and comments 99 | $('script').remove(); 100 | $('style').remove(); 101 | $('noscript').remove(); 102 | 103 | // Extract main content 104 | const title = $('title').text() || url; 105 | const mainContent = $('main, article, .content, .documentation, body').text(); 106 | 107 | // Split content into chunks 108 | const chunks = this.chunkText(mainContent, 1000); 109 | 110 | return chunks.map(chunk => ({ 111 | text: chunk, 112 | url, 113 | title, 114 | timestamp: new Date().toISOString(), 115 | })); 116 | } catch (error) { 117 | throw new McpError( 118 | ErrorCode.InternalError, 119 | `Failed to fetch URL ${url}: ${error}` 120 | ); 121 | } finally { 122 | await page.close(); 123 | } 124 | } 125 | 126 | private chunkText(text: string, maxChunkSize: number): string[] { 127 | const words = text.split(/\s+/); 128 | const chunks: string[] = []; 129 | let currentChunk: string[] = []; 130 | 131 | for (const word of words) { 132 | currentChunk.push(word); 133 | const currentLength = currentChunk.join(' ').length; 134 | 135 | if (currentLength >= maxChunkSize) { 136 | chunks.push(currentChunk.join(' ')); 137 | currentChunk = []; 138 | } 139 | } 140 | 141 | if (currentChunk.length > 0) { 142 | chunks.push(currentChunk.join(' ')); 143 | } 144 | 145 | return chunks; 146 | } 147 | 148 | private generatePointId(): string { 149 | return crypto.randomBytes(16).toString('hex'); 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /src/handlers/base-handler.ts: -------------------------------------------------------------------------------- 1 | import { Server } from '@modelcontextprotocol/sdk/server/index.js'; 2 | import { ApiClient } from '../api-client.js'; 3 | import { ToolResult } from '../types.js'; 4 | 5 | export abstract class BaseHandler { 6 | constructor( 7 | protected readonly server: Server, 8 | protected readonly apiClient: ApiClient 9 | ) {} 10 | 11 | abstract handle(args: any): Promise; 12 | } 13 | -------------------------------------------------------------------------------- /src/handlers/list-documentation.ts: -------------------------------------------------------------------------------- 1 | import { Server } from '@modelcontextprotocol/sdk/server/index.js'; 2 | import { BaseHandler } from './base-handler.js'; 3 | import { QdrantWrapper } from '../tools/qdrant-client.js'; 4 | import { ListOptions, ListResult, ListUtils } from '../tools/list-utils.js'; 5 | import { ToolResult } from '../types.js'; 6 | import { ApiClient } from '../api-client.js'; 7 | 8 | export class ListDocumentationHandler extends BaseHandler { 9 | protected server: Server; 10 | protected apiClient: ApiClient; 11 | 12 | constructor(server: Server, apiClient: ApiClient) { 13 | super(server, apiClient); 14 | this.server = server; 15 | this.apiClient = apiClient; 16 | } 17 | 18 | async handle(args: ListOptions): Promise { 19 | try { 20 | // Ensure Qdrant is initialized 21 | await this.apiClient.qdrant.initializeCollection(); 22 | 23 | // Set default values 24 | const page = args.page || 1; 25 | const pageSize = args.pageSize || 20; 26 | const sortBy = args.sortBy || 'timestamp'; 27 | const sortOrder = args.sortOrder || 'desc'; 28 | 29 | // Get documents with pagination 30 | const { total, documents } = await this.apiClient.qdrant.listDocuments({ 31 | offset: (page - 1) * pageSize, 32 | limit: pageSize, 33 | sortBy, 34 | sortOrder, 35 | }); 36 | 37 | // Calculate pagination details 38 | const { totalPages } = ListUtils.getPaginationDetails(total, page, pageSize); 39 | 40 | // Sort documents if needed 41 | const sortedDocs = ListUtils.sortDocuments(documents, sortBy, sortOrder); 42 | 43 | // Group by domain if requested 44 | const groupedDocs = args.groupByDomain 45 | ? ListUtils.groupByDomain(sortedDocs) 46 | : [{ documents: sortedDocs }]; 47 | 48 | // Prepare result 49 | const result: ListResult = { 50 | total, 51 | page, 52 | pageSize, 53 | totalPages, 54 | documents: groupedDocs, 55 | }; 56 | 57 | // Format as markdown 58 | const markdown = ListUtils.formatAsMarkdown(result); 59 | 60 | return { 61 | content: [ 62 | { 63 | type: 'text', 64 | text: markdown, 65 | }, 66 | ], 67 | }; 68 | } catch (error) { 69 | return { 70 | content: [ 71 | { 72 | type: 'text', 73 | text: `Failed to list documentation: ${(error as Error).message}`, 74 | }, 75 | ], 76 | isError: true, 77 | }; 78 | } 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/handlers/search-documentation.ts: -------------------------------------------------------------------------------- 1 | import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js'; 2 | import { BaseHandler } from './base-handler.js'; 3 | import { QdrantWrapper } from '../tools/qdrant-client.js'; 4 | import { EmbeddingService } from '../embeddings.js'; 5 | import { 6 | SearchOptions, 7 | SearchResult, 8 | validateSearchOptions, 9 | extractSnippet, 10 | normalizeScore, 11 | formatResultsAsMarkdown, 12 | } from '../tools/search-utils.js'; 13 | 14 | interface SearchDocumentationArgs { 15 | query: string; 16 | options?: SearchOptions; 17 | } 18 | 19 | export class SearchDocumentationHandler extends BaseHandler { 20 | private qdrant: QdrantWrapper; 21 | private embeddings: EmbeddingService; 22 | 23 | constructor( 24 | qdrant: QdrantWrapper, 25 | embeddings: EmbeddingService, 26 | ...args: ConstructorParameters 27 | ) { 28 | super(...args); 29 | this.qdrant = qdrant; 30 | this.embeddings = embeddings; 31 | } 32 | 33 | async handle(args: SearchDocumentationArgs) { 34 | // Validate input 35 | if (!args.query?.trim()) { 36 | throw new McpError( 37 | ErrorCode.InvalidRequest, 38 | 'Query string is required' 39 | ); 40 | } 41 | 42 | // Validate search options if provided 43 | if (args.options) { 44 | validateSearchOptions(args.options); 45 | } 46 | 47 | try { 48 | // Generate embeddings for the query 49 | console.error('Generating embeddings for query:', args.query); 50 | const queryVector = await this.embeddings.generateEmbeddings(args.query); 51 | 52 | // Search for similar documents 53 | console.error('Searching for similar documents...'); 54 | const searchResults = await this.qdrant.searchSimilar(queryVector, args.options); 55 | 56 | // Process and format results 57 | const formattedResults: SearchResult[] = searchResults.map(result => ({ 58 | url: result.url, 59 | title: result.title, 60 | domain: result.domain, 61 | timestamp: result.timestamp, 62 | score: normalizeScore(result.score), 63 | snippet: extractSnippet(result.content), 64 | metadata: { 65 | contentType: result.contentType, 66 | wordCount: result.wordCount, 67 | hasCode: result.hasCode, 68 | chunkIndex: result.chunkIndex, 69 | totalChunks: result.totalChunks, 70 | }, 71 | })); 72 | 73 | // Format results as markdown 74 | const markdown = formatResultsAsMarkdown(formattedResults); 75 | 76 | return { 77 | content: [ 78 | { 79 | type: 'text', 80 | text: markdown, 81 | }, 82 | ], 83 | }; 84 | } catch (error) { 85 | console.error('Search error:', error); 86 | throw new McpError( 87 | ErrorCode.InternalError, 88 | `Failed to search documentation: ${error}` 89 | ); 90 | } 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/handlers/test-embeddings.ts: -------------------------------------------------------------------------------- 1 | import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js'; 2 | import { Server } from '@modelcontextprotocol/sdk/server/index.js'; 3 | import { BaseHandler } from './base-handler.js'; 4 | import { ApiClient } from '../api-client.js'; 5 | import { ToolResult } from '../types.js'; 6 | import { EmbeddingService } from '../embeddings.js'; 7 | 8 | const COLLECTION_NAME = 'documentation'; 9 | 10 | export class TestEmbeddingsHandler extends BaseHandler { 11 | constructor(server: Server, apiClient: ApiClient) { 12 | super(server, apiClient); 13 | } 14 | 15 | async handle(args: any): Promise { 16 | if (!args.text || typeof args.text !== 'string') { 17 | throw new McpError(ErrorCode.InvalidParams, 'Text is required'); 18 | } 19 | 20 | try { 21 | // Create a new embedding service instance with the requested configuration 22 | const tempEmbeddingService = EmbeddingService.createFromConfig({ 23 | provider: args.provider || 'ollama', 24 | apiKey: args.apiKey, 25 | model: args.model 26 | }); 27 | 28 | const embedding = await tempEmbeddingService.generateEmbeddings(args.text); 29 | const provider = args.provider || 'ollama'; 30 | const model = args.model || (provider === 'ollama' ? 'nomic-embed-text' : 'text-embedding-3-small'); 31 | 32 | // If test is successful, update the server's embedding service 33 | const newApiClient = new ApiClient({ 34 | embeddingConfig: { 35 | provider: args.provider || 'ollama', 36 | apiKey: args.apiKey, 37 | model: args.model 38 | }, 39 | qdrantUrl: process.env.QDRANT_URL, 40 | qdrantApiKey: process.env.QDRANT_API_KEY 41 | }); 42 | 43 | // Initialize collection with new vector size 44 | await newApiClient.initCollection(COLLECTION_NAME); 45 | 46 | return { 47 | content: [ 48 | { 49 | type: 'text', 50 | text: `Successfully configured ${provider} embeddings (${model}).\nVector size: ${embedding.length}\nQdrant collection updated to match new vector size.`, 51 | }, 52 | ], 53 | }; 54 | } catch (error) { 55 | return { 56 | content: [ 57 | { 58 | type: 'text', 59 | text: `Failed to test embeddings: ${error}`, 60 | }, 61 | ], 62 | isError: true, 63 | }; 64 | } 65 | } 66 | } 67 | 68 | export const testEmbeddingsSchema = { 69 | type: 'object', 70 | properties: { 71 | text: { 72 | type: 'string', 73 | description: 'Text to generate embeddings for', 74 | }, 75 | provider: { 76 | type: 'string', 77 | description: 'Embedding provider to use (ollama or openai)', 78 | enum: ['ollama', 'openai'], 79 | default: 'ollama', 80 | }, 81 | apiKey: { 82 | type: 'string', 83 | description: 'OpenAI API key (required if provider is openai)', 84 | }, 85 | model: { 86 | type: 'string', 87 | description: 'Model to use for embeddings', 88 | }, 89 | }, 90 | required: ['text'], 91 | } as const; 92 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | import { Server } from '@modelcontextprotocol/sdk/server/index.js'; 3 | import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; 4 | import { CallToolRequestSchema, ListToolsRequestSchema, McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js'; 5 | import axios from 'axios'; 6 | import { ApiClient } from './api-client.js'; 7 | import { SearchDocumentationHandler } from './handlers/search-documentation.js'; 8 | import { ListDocumentationHandler } from './handlers/list-documentation.js'; 9 | import { ListOptions } from './tools/list-utils.js'; 10 | import { Document } from './types.js'; 11 | 12 | // Force using IP address to avoid hostname resolution issues 13 | const QDRANT_URL = process.env.QDRANT_URL || 'http://127.0.0.1:6333'; 14 | const QDRANT_API_KEY = process.env.QDRANT_API_KEY; 15 | const EMBEDDING_PROVIDER = process.env.EMBEDDING_PROVIDER || 'ollama'; 16 | const OPENAI_API_KEY = process.env.OPENAI_API_KEY; 17 | 18 | // Test connection with direct axios call first 19 | try { 20 | const response = await axios.get(`${QDRANT_URL}/collections`); 21 | console.error('Successfully connected to Qdrant:', response.data); 22 | } catch (error) { 23 | console.error('Failed to connect to Qdrant:', error); 24 | throw new McpError( 25 | ErrorCode.InternalError, 26 | 'Failed to establish initial connection to Qdrant server' 27 | ); 28 | } 29 | 30 | const client = new ApiClient({ 31 | qdrantUrl: QDRANT_URL, 32 | qdrantApiKey: QDRANT_API_KEY, 33 | embeddingConfig: { 34 | provider: EMBEDDING_PROVIDER as 'ollama' | 'openai', 35 | apiKey: OPENAI_API_KEY, 36 | model: EMBEDDING_PROVIDER === 'ollama' ? 'nomic-embed-text' : 'text-embedding-3-small' 37 | } 38 | }); 39 | 40 | try { 41 | // Initialize Qdrant collection 42 | await client.qdrant.initializeCollection(); 43 | console.error('Successfully initialized Qdrant collection'); 44 | } catch (error) { 45 | console.error('Failed to initialize Qdrant collection:', error); 46 | throw error; 47 | } 48 | 49 | class RagDocsServer { 50 | private server: Server; 51 | 52 | constructor() { 53 | this.server = new Server( 54 | { 55 | name: 'ragdocs', 56 | version: '0.1.0', 57 | }, 58 | { 59 | capabilities: { 60 | tools: {}, 61 | }, 62 | } 63 | ); 64 | 65 | this.setupToolHandlers(); 66 | this.server.onerror = (error) => console.error('[MCP Error]', error); 67 | } 68 | 69 | private setupToolHandlers() { 70 | this.server.setRequestHandler(ListToolsRequestSchema, async () => ({ 71 | tools: [ 72 | { 73 | name: 'add_document', 74 | description: 'Add a document to the RAG system', 75 | inputSchema: { 76 | type: 'object', 77 | properties: { 78 | url: { type: 'string', description: 'Document URL' }, 79 | content: { type: 'string', description: 'Document content' }, 80 | metadata: { 81 | type: 'object', 82 | properties: { 83 | title: { type: 'string', description: 'Document title' }, 84 | contentType: { type: 'string', description: 'Content type (e.g., text/plain, text/markdown)' }, 85 | }, 86 | additionalProperties: true, 87 | }, 88 | }, 89 | required: ['url', 'content'], 90 | }, 91 | }, 92 | { 93 | name: 'search_documents', 94 | description: 'Search for documents using semantic similarity', 95 | inputSchema: { 96 | type: 'object', 97 | properties: { 98 | query: { 99 | type: 'string', 100 | description: 'Natural language search query' 101 | }, 102 | options: { 103 | type: 'object', 104 | description: 'Search options', 105 | properties: { 106 | limit: { 107 | type: 'number', 108 | description: 'Maximum number of results (1-20)', 109 | minimum: 1, 110 | maximum: 20 111 | }, 112 | scoreThreshold: { 113 | type: 'number', 114 | description: 'Minimum similarity score (0-1)', 115 | minimum: 0, 116 | maximum: 1 117 | }, 118 | filters: { 119 | type: 'object', 120 | description: 'Optional filters', 121 | properties: { 122 | domain: { 123 | type: 'string', 124 | description: 'Filter by domain' 125 | }, 126 | hasCode: { 127 | type: 'boolean', 128 | description: 'Filter for documents containing code' 129 | }, 130 | after: { 131 | type: 'string', 132 | description: 'Filter for documents after date (ISO format)' 133 | }, 134 | before: { 135 | type: 'string', 136 | description: 'Filter for documents before date (ISO format)' 137 | } 138 | } 139 | } 140 | } 141 | } 142 | }, 143 | required: ['query'], 144 | }, 145 | }, 146 | { 147 | name: 'delete_document', 148 | description: 'Delete a document from the RAG system', 149 | inputSchema: { 150 | type: 'object', 151 | properties: { 152 | url: { type: 'string', description: 'Document URL to delete' }, 153 | }, 154 | required: ['url'], 155 | }, 156 | }, 157 | { 158 | name: 'list_documents', 159 | description: 'List all stored documents with pagination and grouping options', 160 | inputSchema: { 161 | type: 'object', 162 | properties: { 163 | page: { 164 | type: 'number', 165 | description: 'Page number (default: 1)', 166 | minimum: 1 167 | }, 168 | pageSize: { 169 | type: 'number', 170 | description: 'Number of documents per page (default: 20)', 171 | minimum: 1, 172 | maximum: 100 173 | }, 174 | groupByDomain: { 175 | type: 'boolean', 176 | description: 'Group documents by domain (default: false)' 177 | }, 178 | sortBy: { 179 | type: 'string', 180 | description: 'Sort field (default: timestamp)', 181 | enum: ['timestamp', 'title', 'domain'] 182 | }, 183 | sortOrder: { 184 | type: 'string', 185 | description: 'Sort order (default: desc)', 186 | enum: ['asc', 'desc'] 187 | } 188 | } 189 | } 190 | }, 191 | ], 192 | })); 193 | 194 | this.server.setRequestHandler(CallToolRequestSchema, async (request) => { 195 | try { 196 | switch (request.params.name) { 197 | case 'add_document': { 198 | const args = request.params.arguments as Record; 199 | if (!args || typeof args.url !== 'string' || typeof args.content !== 'string') { 200 | throw new Error('Invalid document format: url and content must be strings'); 201 | } 202 | const doc: Document = { 203 | url: args.url, 204 | content: args.content, 205 | metadata: (args.metadata as Record) || {} 206 | }; 207 | await client.addDocument(doc); 208 | return { 209 | content: [{ type: 'text', text: `Document ${doc.url} added successfully` }], 210 | }; 211 | } 212 | 213 | case 'search_documents': { 214 | const { query, options } = request.params.arguments as { 215 | query: string; 216 | options?: { 217 | limit?: number; 218 | scoreThreshold?: number; 219 | filters?: { 220 | domain?: string; 221 | hasCode?: boolean; 222 | after?: string; 223 | before?: string; 224 | }; 225 | }; 226 | }; 227 | 228 | const searchHandler = new SearchDocumentationHandler( 229 | client.qdrant, 230 | client.embeddings, 231 | this.server, 232 | client 233 | ); 234 | 235 | return await searchHandler.handle({ query, options }); 236 | } 237 | 238 | case 'delete_document': { 239 | const { url } = request.params.arguments as { url: string }; 240 | await client.deleteDocument(url); 241 | return { 242 | content: [{ type: 'text', text: `Document ${url} deleted successfully` }], 243 | }; 244 | } 245 | 246 | case 'list_documents': { 247 | const args = request.params.arguments as ListOptions; 248 | const listHandler = new ListDocumentationHandler(this.server, client); 249 | return await listHandler.handle(args || {}); 250 | } 251 | 252 | default: 253 | throw new Error(`Unknown tool: ${request.params.name}`); 254 | } 255 | } catch (error) { 256 | const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred'; 257 | console.error('[Tool Error]', errorMessage); 258 | return { 259 | content: [{ type: 'text', text: `Error: ${errorMessage}` }], 260 | isError: true, 261 | }; 262 | } 263 | }); 264 | } 265 | 266 | async run() { 267 | const transport = new StdioServerTransport(); 268 | await this.server.connect(transport); 269 | console.error('RagDocs MCP server running on stdio'); 270 | } 271 | } 272 | 273 | const server = new RagDocsServer(); 274 | server.run().catch(console.error); 275 | -------------------------------------------------------------------------------- /src/tools/add-documentation.ts: -------------------------------------------------------------------------------- 1 | import OpenAI from 'openai'; 2 | import { URLProcessor, URLProcessingError } from './url-processor.js'; 3 | import { ContentFetcher, ContentFetchError } from './content-fetcher.js'; 4 | import { TextChunker } from './text-chunker.js'; 5 | import { QdrantWrapper, QdrantError } from './qdrant-client.js'; 6 | 7 | export class AddDocumentationError extends Error { 8 | constructor(message: string, public readonly step: string) { 9 | super(message); 10 | this.name = 'AddDocumentationError'; 11 | } 12 | } 13 | 14 | export interface AddDocumentationResult { 15 | url: string; 16 | title: string; 17 | chunks: number; 18 | wordCount: number; 19 | } 20 | 21 | export class AddDocumentationTool { 22 | private openai: OpenAI; 23 | private qdrant: QdrantWrapper; 24 | 25 | constructor(openaiApiKey: string, qdrantUrl?: string) { 26 | if (!openaiApiKey) { 27 | throw new Error('OpenAI API key is required'); 28 | } 29 | 30 | this.openai = new OpenAI({ 31 | apiKey: openaiApiKey, 32 | }); 33 | 34 | this.qdrant = new QdrantWrapper(qdrantUrl); 35 | } 36 | 37 | /** 38 | * Adds a document to the RAG system 39 | * @param url URL of the document to add 40 | * @returns Result of the operation 41 | */ 42 | async addDocument(url: string): Promise { 43 | try { 44 | // Check Qdrant health 45 | const isHealthy = await this.qdrant.isHealthy(); 46 | if (!isHealthy) { 47 | throw new AddDocumentationError( 48 | 'Qdrant server is not available', 49 | 'health_check' 50 | ); 51 | } 52 | 53 | // Initialize collection if needed 54 | await this.qdrant.initializeCollection(); 55 | 56 | // Process URL 57 | const processedUrl = URLProcessor.processURL(url); 58 | if (!processedUrl.isValid) { 59 | throw new AddDocumentationError('Invalid URL format', 'url_validation'); 60 | } 61 | 62 | // Check if document already exists 63 | const exists = await this.qdrant.documentExists(processedUrl.normalizedUrl); 64 | if (exists) { 65 | // Remove existing document before adding new version 66 | await this.qdrant.removeDocument(processedUrl.normalizedUrl); 67 | } 68 | 69 | // Fetch content 70 | const content = await ContentFetcher.fetchContent(processedUrl.normalizedUrl); 71 | 72 | // Chunk content 73 | const chunks = TextChunker.chunkText(content.content, { 74 | maxChunkSize: 1500, // Leave room for metadata in context window 75 | minChunkSize: 100, 76 | overlap: 200, 77 | respectCodeBlocks: true, 78 | }); 79 | 80 | // Generate embeddings for each chunk 81 | const embeddings = await this.generateEmbeddings( 82 | chunks.map(chunk => chunk.content) 83 | ); 84 | 85 | // Store in Qdrant 86 | await this.qdrant.storeDocumentChunks(chunks, embeddings, { 87 | url: processedUrl.normalizedUrl, 88 | title: content.title, 89 | domain: processedUrl.domain, 90 | timestamp: content.timestamp, 91 | contentType: content.metadata.contentType, 92 | wordCount: content.metadata.wordCount, 93 | hasCode: content.metadata.hasCode, 94 | }); 95 | 96 | return { 97 | url: processedUrl.normalizedUrl, 98 | title: content.title, 99 | chunks: chunks.length, 100 | wordCount: content.metadata.wordCount, 101 | }; 102 | } catch (error) { 103 | if ( 104 | error instanceof URLProcessingError || 105 | error instanceof ContentFetchError || 106 | error instanceof QdrantError || 107 | error instanceof AddDocumentationError 108 | ) { 109 | throw error; 110 | } 111 | 112 | throw new AddDocumentationError( 113 | `Unexpected error: ${(error as Error).message}`, 114 | 'unknown' 115 | ); 116 | } 117 | } 118 | 119 | /** 120 | * Generates embeddings for text chunks using OpenAI's API 121 | * @param chunks Array of text chunks 122 | * @returns Array of embeddings 123 | */ 124 | private async generateEmbeddings(chunks: string[]): Promise { 125 | try { 126 | const response = await this.openai.embeddings.create({ 127 | model: 'text-embedding-ada-002', 128 | input: chunks, 129 | }); 130 | 131 | return response.data.map(item => item.embedding); 132 | } catch (error) { 133 | throw new AddDocumentationError( 134 | `Failed to generate embeddings: ${(error as Error).message}`, 135 | 'embedding_generation' 136 | ); 137 | } 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /src/tools/content-fetcher.ts: -------------------------------------------------------------------------------- 1 | import axios, { AxiosError } from 'axios'; 2 | import * as cheerio from 'cheerio'; 3 | 4 | export class ContentFetchError extends Error { 5 | constructor(message: string, public readonly url: string) { 6 | super(message); 7 | this.name = 'ContentFetchError'; 8 | } 9 | } 10 | 11 | export interface FetchedContent { 12 | url: string; 13 | title: string; 14 | content: string; 15 | timestamp: string; 16 | metadata: { 17 | domain: string; 18 | contentType: string; 19 | wordCount: number; 20 | hasCode: boolean; 21 | }; 22 | } 23 | 24 | export class ContentFetcher { 25 | private static readonly TIMEOUT = 30000; // 30 seconds 26 | private static readonly MAX_RETRIES = 3; 27 | private static readonly RETRY_DELAY = 1000; // 1 second 28 | 29 | /** 30 | * Fetches and processes content from a URL 31 | * @param url URL to fetch content from 32 | * @returns Processed content with metadata 33 | */ 34 | static async fetchContent(url: string): Promise { 35 | let retries = 0; 36 | let lastError: Error | null = null; 37 | 38 | while (retries < this.MAX_RETRIES) { 39 | try { 40 | const response = await axios.get(url, { 41 | timeout: this.TIMEOUT, 42 | maxRedirects: 5, 43 | headers: { 44 | 'User-Agent': 'Mozilla/5.0 (compatible; RagDocsBot/1.0)', 45 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9', 46 | 'Accept-Language': 'en-US,en;q=0.5', 47 | }, 48 | }); 49 | 50 | const contentType = response.headers['content-type'] || ''; 51 | if (!contentType.includes('html')) { 52 | throw new ContentFetchError('Unsupported content type: ' + contentType, url); 53 | } 54 | 55 | return this.processHtmlContent(url, response.data); 56 | } catch (error) { 57 | lastError = error as Error; 58 | if (error instanceof AxiosError && error.response?.status === 404) { 59 | throw new ContentFetchError('Page not found', url); 60 | } 61 | retries++; 62 | if (retries < this.MAX_RETRIES) { 63 | await new Promise(resolve => setTimeout(resolve, this.RETRY_DELAY)); 64 | } 65 | } 66 | } 67 | 68 | throw new ContentFetchError( 69 | `Failed to fetch content after ${this.MAX_RETRIES} attempts: ${lastError?.message}`, 70 | url 71 | ); 72 | } 73 | 74 | /** 75 | * Processes HTML content to extract relevant text and metadata 76 | * @param url Original URL 77 | * @param html Raw HTML content 78 | * @returns Processed content with metadata 79 | */ 80 | private static processHtmlContent(url: string, html: string): FetchedContent { 81 | const $ = cheerio.load(html); 82 | 83 | // Remove unwanted elements 84 | this.removeUnwantedElements($); 85 | 86 | // Extract title 87 | const title = $('title').text().trim() || 88 | $('h1').first().text().trim() || 89 | 'Untitled Document'; 90 | 91 | // Extract main content 92 | const mainContent = this.extractMainContent($); 93 | 94 | // Check for code blocks 95 | const hasCode = $('pre, code').length > 0 || 96 | mainContent.includes('```') || 97 | /\`[^\`]+\`/.test(mainContent); 98 | 99 | // Count words 100 | const wordCount = mainContent.split(/\s+/).filter(Boolean).length; 101 | 102 | return { 103 | url, 104 | title, 105 | content: mainContent, 106 | timestamp: new Date().toISOString(), 107 | metadata: { 108 | domain: new URL(url).hostname, 109 | contentType: 'text/html', 110 | wordCount, 111 | hasCode, 112 | }, 113 | }; 114 | } 115 | 116 | /** 117 | * Removes unwanted elements from the HTML 118 | * @param $ Cheerio instance 119 | */ 120 | private static removeUnwantedElements($: cheerio.CheerioAPI): void { 121 | // Remove common non-content elements 122 | const selectorsToRemove = [ 123 | 'script', 124 | 'style', 125 | 'nav', 126 | 'header', 127 | 'footer', 128 | 'iframe', 129 | '.advertisement', 130 | '.ads', 131 | '#comments', 132 | '.comments', 133 | '.social-share', 134 | '.related-posts', 135 | 'aside', 136 | ]; 137 | 138 | $(selectorsToRemove.join(', ')).remove(); 139 | } 140 | 141 | /** 142 | * Extracts main content from the HTML 143 | * @param $ Cheerio instance 144 | * @returns Extracted and cleaned content 145 | */ 146 | private static extractMainContent($: cheerio.CheerioAPI): string { 147 | // Try to find main content container 148 | const mainSelectors = [ 149 | 'article', 150 | 'main', 151 | '.main-content', 152 | '#main-content', 153 | '.post-content', 154 | '.article-content', 155 | '.entry-content', 156 | ]; 157 | 158 | let $content = $(); 159 | for (const selector of mainSelectors) { 160 | $content = $(selector); 161 | if ($content.length > 0) break; 162 | } 163 | 164 | // Fallback to body if no main content container found 165 | if ($content.length === 0) { 166 | $content = $('body'); 167 | } 168 | 169 | // Extract text content 170 | const text = $content 171 | .find('h1, h2, h3, h4, h5, h6, p, li, pre, code') 172 | .map((_, el) => { 173 | const $el = $(el); 174 | // Preserve code blocks 175 | if ($el.is('pre, code')) { 176 | return '\n```\n' + $el.text() + '\n```\n'; 177 | } 178 | return $el.text(); 179 | }) 180 | .get() 181 | .join('\n') 182 | .trim(); 183 | 184 | // Clean up the text 185 | return this.cleanText(text); 186 | } 187 | 188 | /** 189 | * Cleans extracted text content 190 | * @param text Raw text content 191 | * @returns Cleaned text 192 | */ 193 | private static cleanText(text: string): string { 194 | return text 195 | .replace(/[\r\n]+/g, '\n') // Normalize line endings 196 | .replace(/\n\s+\n/g, '\n\n') // Remove excess whitespace between paragraphs 197 | .replace(/\s+/g, ' ') // Normalize whitespace within paragraphs 198 | .split('\n') // Split into lines 199 | .map(line => line.trim()) // Trim each line 200 | .filter(Boolean) // Remove empty lines 201 | .join('\n') // Rejoin with newlines 202 | .trim(); // Final trim 203 | } 204 | } 205 | -------------------------------------------------------------------------------- /src/tools/list-utils.ts: -------------------------------------------------------------------------------- 1 | import { DocumentMetadata } from './qdrant-client.js'; 2 | 3 | export interface ListOptions { 4 | page?: number; 5 | pageSize?: number; 6 | groupByDomain?: boolean; 7 | sortBy?: 'timestamp' | 'title' | 'domain'; 8 | sortOrder?: 'asc' | 'desc'; 9 | } 10 | 11 | export interface ListResult { 12 | total: number; 13 | page: number; 14 | pageSize: number; 15 | totalPages: number; 16 | documents: DocumentGroup[]; 17 | } 18 | 19 | export interface DocumentGroup { 20 | domain?: string; 21 | documents: DocumentMetadata[]; 22 | } 23 | 24 | export class ListUtils { 25 | /** 26 | * Groups documents by domain 27 | */ 28 | static groupByDomain(documents: DocumentMetadata[]): DocumentGroup[] { 29 | const groupedMap = new Map(); 30 | 31 | for (const doc of documents) { 32 | const domain = doc.domain; 33 | if (!groupedMap.has(domain)) { 34 | groupedMap.set(domain, []); 35 | } 36 | groupedMap.get(domain)!.push(doc); 37 | } 38 | 39 | return Array.from(groupedMap.entries()).map(([domain, docs]) => ({ 40 | domain, 41 | documents: docs 42 | })); 43 | } 44 | 45 | /** 46 | * Sorts documents based on specified criteria 47 | */ 48 | static sortDocuments( 49 | documents: DocumentMetadata[], 50 | sortBy: 'timestamp' | 'title' | 'domain' = 'timestamp', 51 | sortOrder: 'asc' | 'desc' = 'desc' 52 | ): DocumentMetadata[] { 53 | return [...documents].sort((a, b) => { 54 | let comparison: number; 55 | switch (sortBy) { 56 | case 'timestamp': 57 | comparison = new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime(); 58 | break; 59 | case 'title': 60 | comparison = a.title.localeCompare(b.title); 61 | break; 62 | case 'domain': 63 | comparison = a.domain.localeCompare(b.domain); 64 | break; 65 | default: 66 | comparison = 0; 67 | } 68 | return sortOrder === 'desc' ? -comparison : comparison; 69 | }); 70 | } 71 | 72 | /** 73 | * Formats the list result as markdown 74 | */ 75 | static formatAsMarkdown(result: ListResult): string { 76 | const lines: string[] = []; 77 | 78 | // Add header with pagination info 79 | lines.push(`# Documentation List`); 80 | lines.push(`Page ${result.page} of ${result.totalPages} (${result.total} total documents)\n`); 81 | 82 | // Add documents grouped by domain 83 | for (const group of result.documents) { 84 | if (group.domain) { 85 | lines.push(`## ${group.domain}`); 86 | } 87 | 88 | for (const doc of group.documents) { 89 | const date = new Date(doc.timestamp).toLocaleDateString(); 90 | lines.push(`- [${doc.title}](${doc.url})`); 91 | lines.push(` - Added: ${date}`); 92 | lines.push(` - Type: ${doc.contentType}`); 93 | lines.push(` - Words: ${doc.wordCount}`); 94 | if (doc.hasCode) { 95 | lines.push(` - Contains code snippets`); 96 | } 97 | lines.push(``); 98 | } 99 | } 100 | 101 | return lines.join('\n'); 102 | } 103 | 104 | /** 105 | * Calculates pagination details 106 | */ 107 | static getPaginationDetails( 108 | total: number, 109 | page: number = 1, 110 | pageSize: number = 20 111 | ): { offset: number; limit: number; totalPages: number } { 112 | const totalPages = Math.ceil(total / pageSize); 113 | const currentPage = Math.min(Math.max(1, page), totalPages); 114 | const offset = (currentPage - 1) * pageSize; 115 | 116 | return { 117 | offset, 118 | limit: pageSize, 119 | totalPages 120 | }; 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /src/tools/qdrant-client.ts: -------------------------------------------------------------------------------- 1 | import { QdrantClient } from '@qdrant/js-client-rest'; 2 | import { TextChunk } from './text-chunker.js'; 3 | 4 | export interface DocumentMetadata { 5 | url: string; 6 | title: string; 7 | domain: string; 8 | timestamp: string; 9 | contentType: string; 10 | wordCount: number; 11 | hasCode: boolean; 12 | chunkIndex: number; 13 | totalChunks: number; 14 | } 15 | 16 | export class QdrantError extends Error { 17 | constructor(message: string) { 18 | super(message); 19 | this.name = 'QdrantError'; 20 | } 21 | } 22 | 23 | export class QdrantWrapper { 24 | public client: QdrantClient; 25 | private readonly collectionName = 'documentation'; 26 | private readonly vectorSize = 768; // Ollama nomic-embed-text size 27 | 28 | constructor(url?: string, apiKey?: string) { 29 | this.client = new QdrantClient({ 30 | url: url || 'http://10.1.1.199:6333', 31 | apiKey: apiKey, 32 | timeout: 10000 // Add timeout to help debug connection issues 33 | }); 34 | } 35 | 36 | /** 37 | * Initializes the Qdrant collection if it doesn't exist 38 | */ 39 | async initializeCollection(): Promise { 40 | try { 41 | const collections = await this.client.getCollections(); 42 | const exists = collections.collections.some(c => c.name === this.collectionName); 43 | 44 | if (!exists) { 45 | await this.client.createCollection(this.collectionName, { 46 | vectors: { 47 | size: this.vectorSize, 48 | distance: 'Cosine', 49 | }, 50 | optimizers_config: { 51 | default_segment_number: 2, 52 | }, 53 | replication_factor: 1, 54 | }); 55 | 56 | // Create indexes for efficient filtering 57 | await this.client.createPayloadIndex(this.collectionName, { 58 | field_name: 'url', 59 | field_schema: 'keyword', 60 | }); 61 | 62 | await this.client.createPayloadIndex(this.collectionName, { 63 | field_name: 'domain', 64 | field_schema: 'keyword', 65 | }); 66 | 67 | await this.client.createPayloadIndex(this.collectionName, { 68 | field_name: 'timestamp', 69 | field_schema: 'datetime', 70 | }); 71 | } 72 | } catch (error) { 73 | console.error('Qdrant initialization error:', error); 74 | if (error instanceof Error) { 75 | console.error('Error details:', { 76 | name: error.name, 77 | message: error.message, 78 | stack: error.stack 79 | }); 80 | } 81 | throw new QdrantError( 82 | `Failed to initialize Qdrant collection: ${error instanceof Error ? error.message : String(error)}` 83 | ); 84 | } 85 | } 86 | 87 | /** 88 | * Stores document chunks in the Qdrant collection 89 | * @param chunks Text chunks to store 90 | * @param embeddings Corresponding embeddings for each chunk 91 | * @param metadata Document metadata 92 | */ 93 | async storeDocumentChunks( 94 | chunks: TextChunk[], 95 | embeddings: number[][], 96 | metadata: Omit 97 | ): Promise { 98 | if (chunks.length !== embeddings.length) { 99 | throw new QdrantError('Number of chunks does not match number of embeddings'); 100 | } 101 | 102 | try { 103 | const points = chunks.map((chunk, index) => ({ 104 | id: this.generatePointId(metadata.url, chunk.index), 105 | vector: embeddings[index], 106 | payload: { 107 | ...metadata, 108 | content: chunk.content, 109 | chunkIndex: chunk.index, 110 | totalChunks: chunks.length, 111 | chunkMetadata: chunk.metadata, 112 | }, 113 | })); 114 | 115 | await this.client.upsert(this.collectionName, { 116 | wait: true, 117 | points, 118 | }); 119 | } catch (error) { 120 | throw new QdrantError( 121 | `Failed to store document chunks: ${(error as Error).message}` 122 | ); 123 | } 124 | } 125 | 126 | /** 127 | * Checks if a document already exists in the collection 128 | * @param url Document URL 129 | * @returns true if document exists 130 | */ 131 | async documentExists(url: string): Promise { 132 | try { 133 | const response = await this.client.scroll(this.collectionName, { 134 | filter: { 135 | must: [ 136 | { 137 | key: 'url', 138 | match: { 139 | value: url, 140 | }, 141 | }, 142 | ], 143 | }, 144 | limit: 1, 145 | }); 146 | 147 | return response.points.length > 0; 148 | } catch (error) { 149 | throw new QdrantError( 150 | `Failed to check document existence: ${(error as Error).message}` 151 | ); 152 | } 153 | } 154 | 155 | /** 156 | * Removes a document and all its chunks from the collection 157 | * @param url Document URL 158 | */ 159 | async removeDocument(url: string): Promise { 160 | try { 161 | await this.client.delete(this.collectionName, { 162 | filter: { 163 | must: [ 164 | { 165 | key: 'url', 166 | match: { 167 | value: url, 168 | }, 169 | }, 170 | ], 171 | }, 172 | wait: true, 173 | }); 174 | } catch (error) { 175 | throw new QdrantError( 176 | `Failed to remove document: ${(error as Error).message}` 177 | ); 178 | } 179 | } 180 | 181 | /** 182 | * Generates a unique point ID for a chunk 183 | * @param url Document URL 184 | * @param chunkIndex Chunk index 185 | * @returns Unique point ID 186 | */ 187 | private generatePointId(url: string, chunkIndex: number): number { 188 | // Create a hash of the URL + chunk index 189 | const str = `${url}:${chunkIndex}`; 190 | let hash = 0; 191 | for (let i = 0; i < str.length; i++) { 192 | const char = str.charCodeAt(i); 193 | hash = ((hash << 5) - hash) + char; 194 | hash = hash & hash; // Convert to 32-bit integer 195 | } 196 | return Math.abs(hash); 197 | } 198 | 199 | /** 200 | * Gets the health status of the Qdrant server 201 | * @returns true if server is healthy 202 | */ 203 | async isHealthy(): Promise { 204 | try { 205 | await this.client.getCollections(); 206 | return true; 207 | } catch { 208 | return false; 209 | } 210 | } 211 | 212 | /** 213 | * Lists all documents with pagination support 214 | * @param options Listing options including pagination and filtering 215 | * @returns Array of document metadata with pagination info 216 | */ 217 | async listDocuments(options: { 218 | offset?: number; 219 | limit?: number; 220 | domain?: string; 221 | sortBy?: 'timestamp' | 'title' | 'domain'; 222 | sortOrder?: 'asc' | 'desc'; 223 | } = {}): Promise<{ total: number; documents: DocumentMetadata[] }> { 224 | const filter: any = { 225 | must: [ 226 | { 227 | key: 'chunkIndex', 228 | match: { value: 0 }, // Only get first chunk to avoid duplicates 229 | }, 230 | ], 231 | }; 232 | 233 | if (options.domain) { 234 | filter.must.push({ 235 | key: 'domain', 236 | match: { value: options.domain }, 237 | }); 238 | } 239 | 240 | try { 241 | // Get total count first 242 | const countResponse = await this.client.count(this.collectionName, { 243 | filter, 244 | }); 245 | 246 | // Then get paginated results 247 | const response = await this.client.scroll(this.collectionName, { 248 | filter, 249 | limit: options.limit || 20, 250 | offset: options.offset || 0, 251 | with_payload: true, 252 | with_vector: false, 253 | }); 254 | 255 | const documents = response.points.map(point => { 256 | const payload = point.payload as any; 257 | return { 258 | url: String(payload.url), 259 | title: String(payload.title), 260 | domain: String(payload.domain), 261 | timestamp: String(payload.timestamp), 262 | contentType: String(payload.contentType), 263 | wordCount: Number(payload.wordCount), 264 | hasCode: Boolean(payload.hasCode), 265 | chunkIndex: Number(payload.chunkIndex), 266 | totalChunks: Number(payload.totalChunks), 267 | }; 268 | }); 269 | 270 | return { 271 | total: countResponse.count, 272 | documents, 273 | }; 274 | } catch (error) { 275 | throw new QdrantError( 276 | `Failed to list documents: ${(error as Error).message}` 277 | ); 278 | } 279 | } 280 | 281 | /** 282 | * Performs a semantic search using vector similarity 283 | * @param queryVector Query embedding vector 284 | * @param options Search options 285 | * @returns Array of search results with scores 286 | */ 287 | async searchSimilar( 288 | queryVector: number[], 289 | options: { 290 | limit?: number; 291 | scoreThreshold?: number; 292 | filters?: { 293 | domain?: string; 294 | hasCode?: boolean; 295 | after?: string; 296 | before?: string; 297 | }; 298 | } = {} 299 | ): Promise> { 300 | const limit = options.limit || 5; 301 | const scoreThreshold = options.scoreThreshold || 0.7; 302 | const filter: any = { must: [] }; 303 | 304 | // Add filters if specified 305 | if (options.filters?.domain) { 306 | filter.must.push({ 307 | key: 'domain', 308 | match: { value: options.filters.domain }, 309 | }); 310 | } 311 | 312 | if (options.filters?.hasCode !== undefined) { 313 | filter.must.push({ 314 | key: 'hasCode', 315 | match: { value: options.filters.hasCode }, 316 | }); 317 | } 318 | 319 | if (options.filters?.after) { 320 | filter.must.push({ 321 | key: 'timestamp', 322 | range: { gte: options.filters.after }, 323 | }); 324 | } 325 | 326 | if (options.filters?.before) { 327 | filter.must.push({ 328 | key: 'timestamp', 329 | range: { lte: options.filters.before }, 330 | }); 331 | } 332 | 333 | try { 334 | const response = await this.client.search(this.collectionName, { 335 | vector: queryVector, 336 | limit: Math.ceil(limit * 1.5), // Request extra results for post-filtering 337 | score_threshold: scoreThreshold, 338 | filter: filter.must.length > 0 ? filter : undefined, 339 | with_payload: true, 340 | }); 341 | 342 | return response 343 | .map(hit => { 344 | const payload = hit.payload as any; 345 | if (!payload || typeof payload !== 'object') { 346 | throw new QdrantError('Invalid payload structure in search result'); 347 | } 348 | 349 | // Extract and validate required fields 350 | const result = { 351 | score: hit.score || 0, 352 | url: String(payload.url), 353 | title: String(payload.title), 354 | domain: String(payload.domain), 355 | timestamp: String(payload.timestamp), 356 | contentType: String(payload.contentType), 357 | wordCount: Number(payload.wordCount), 358 | hasCode: Boolean(payload.hasCode), 359 | chunkIndex: Number(payload.chunkIndex), 360 | totalChunks: Number(payload.totalChunks), 361 | content: String(payload.content), 362 | }; 363 | 364 | // Validate all fields are present and of correct type 365 | if (Object.values(result).some(v => v === undefined)) { 366 | throw new QdrantError('Missing required fields in search result'); 367 | } 368 | 369 | return result; 370 | }) 371 | .slice(0, limit); // Return only requested number of results 372 | } catch (error) { 373 | throw new QdrantError( 374 | `Failed to perform search: ${(error as Error).message}` 375 | ); 376 | } 377 | } 378 | } 379 | -------------------------------------------------------------------------------- /src/tools/search-utils.ts: -------------------------------------------------------------------------------- 1 | import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js'; 2 | import { DocumentMetadata } from './qdrant-client.js'; 3 | 4 | export interface SearchResult { 5 | url: string; 6 | title: string; 7 | domain: string; 8 | timestamp: string; 9 | score: number; 10 | snippet: string; 11 | metadata: Partial; 12 | } 13 | 14 | export interface SearchOptions { 15 | limit?: number; 16 | scoreThreshold?: number; 17 | filters?: { 18 | domain?: string; 19 | hasCode?: boolean; 20 | after?: string; 21 | before?: string; 22 | }; 23 | } 24 | 25 | /** 26 | * Extracts a relevant snippet around the most relevant content 27 | */ 28 | export function extractSnippet(content: string, maxLength: number = 300): string { 29 | // If content is shorter than maxLength, return it as is 30 | if (content.length <= maxLength) { 31 | return content; 32 | } 33 | 34 | // Find a good breaking point near the middle 35 | const middle = Math.floor(content.length / 2); 36 | const radius = Math.floor(maxLength / 2); 37 | 38 | let start = Math.max(0, middle - radius); 39 | let end = Math.min(content.length, middle + radius); 40 | 41 | // Adjust to avoid breaking words 42 | while (start > 0 && /\S/.test(content[start - 1])) start--; 43 | while (end < content.length && /\S/.test(content[end])) end++; 44 | 45 | let snippet = content.slice(start, end).trim(); 46 | 47 | // Add ellipsis if we're not at the boundaries 48 | if (start > 0) snippet = '...' + snippet; 49 | if (end < content.length) snippet = snippet + '...'; 50 | 51 | return snippet; 52 | } 53 | 54 | /** 55 | * Normalizes scores to be between 0 and 1 56 | */ 57 | export function normalizeScore(score: number): number { 58 | // Qdrant uses cosine similarity which is already between -1 and 1 59 | // Convert to 0-1 range 60 | return (score + 1) / 2; 61 | } 62 | 63 | /** 64 | * Formats search results as markdown 65 | */ 66 | export function formatResultsAsMarkdown(results: SearchResult[]): string { 67 | if (results.length === 0) { 68 | return 'No matching documents found.'; 69 | } 70 | 71 | return results 72 | .map((result, index) => { 73 | const score = (result.score * 100).toFixed(1); 74 | return ` 75 | ### ${index + 1}. ${result.title} (${score}% match) 76 | **URL:** ${result.url} 77 | **Domain:** ${result.domain} 78 | **Date:** ${new Date(result.timestamp).toLocaleDateString()} 79 | 80 | ${result.snippet} 81 | `; 82 | }) 83 | .join('\n---\n'); 84 | } 85 | 86 | /** 87 | * Validates search options 88 | */ 89 | export function validateSearchOptions(options: SearchOptions): void { 90 | if (options.limit !== undefined && (options.limit < 1 || options.limit > 20)) { 91 | throw new McpError( 92 | ErrorCode.InvalidRequest, 93 | 'Limit must be between 1 and 20' 94 | ); 95 | } 96 | 97 | if ( 98 | options.scoreThreshold !== undefined && 99 | (options.scoreThreshold < 0 || options.scoreThreshold > 1) 100 | ) { 101 | throw new McpError( 102 | ErrorCode.InvalidRequest, 103 | 'Score threshold must be between 0 and 1' 104 | ); 105 | } 106 | 107 | if (options.filters?.after && isNaN(Date.parse(options.filters.after))) { 108 | throw new McpError(ErrorCode.InvalidRequest, 'Invalid after date format'); 109 | } 110 | 111 | if (options.filters?.before && isNaN(Date.parse(options.filters.before))) { 112 | throw new McpError(ErrorCode.InvalidRequest, 'Invalid before date format'); 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /src/tools/text-chunker.ts: -------------------------------------------------------------------------------- 1 | export interface ChunkOptions { 2 | maxChunkSize: number; 3 | minChunkSize: number; 4 | overlap: number; 5 | respectCodeBlocks?: boolean; 6 | } 7 | 8 | export interface TextChunk { 9 | content: string; 10 | index: number; 11 | metadata: { 12 | startPosition: number; 13 | endPosition: number; 14 | isCodeBlock?: boolean; 15 | }; 16 | } 17 | 18 | export class TextChunker { 19 | private static readonly DEFAULT_OPTIONS: ChunkOptions = { 20 | maxChunkSize: 1000, 21 | minChunkSize: 100, 22 | overlap: 200, 23 | respectCodeBlocks: true, 24 | }; 25 | 26 | /** 27 | * Splits text into chunks while preserving context and natural boundaries 28 | * @param text Text to split into chunks 29 | * @param options Chunking options 30 | * @returns Array of text chunks with metadata 31 | */ 32 | static chunkText(text: string, options?: Partial): TextChunk[] { 33 | const opts = { ...this.DEFAULT_OPTIONS, ...options }; 34 | const chunks: TextChunk[] = []; 35 | 36 | // First, separate code blocks from regular text 37 | const segments = this.separateCodeBlocks(text); 38 | let currentPosition = 0; 39 | let chunkIndex = 0; 40 | 41 | for (const segment of segments) { 42 | if (segment.isCodeBlock && opts.respectCodeBlocks) { 43 | // Keep code blocks as single chunks if they're not too large 44 | if (segment.content.length <= opts.maxChunkSize * 1.5) { 45 | chunks.push({ 46 | content: segment.content, 47 | index: chunkIndex++, 48 | metadata: { 49 | startPosition: currentPosition, 50 | endPosition: currentPosition + segment.content.length, 51 | isCodeBlock: true, 52 | }, 53 | }); 54 | currentPosition += segment.content.length; 55 | continue; 56 | } 57 | } 58 | 59 | // Process regular text or large code blocks 60 | const segmentChunks = this.chunkSegment( 61 | segment.content, 62 | opts, 63 | currentPosition, 64 | chunkIndex, 65 | segment.isCodeBlock 66 | ); 67 | 68 | chunks.push(...segmentChunks); 69 | chunkIndex += segmentChunks.length; 70 | currentPosition += segment.content.length; 71 | } 72 | 73 | return chunks; 74 | } 75 | 76 | /** 77 | * Separates code blocks from regular text 78 | * @param text Input text 79 | * @returns Array of text segments with code block flags 80 | */ 81 | private static separateCodeBlocks(text: string): Array<{ content: string; isCodeBlock: boolean }> { 82 | const segments: Array<{ content: string; isCodeBlock: boolean }> = []; 83 | const codeBlockRegex = /```[\s\S]*?```/g; 84 | 85 | let lastIndex = 0; 86 | let match: RegExpExecArray | null; 87 | 88 | while ((match = codeBlockRegex.exec(text)) !== null) { 89 | // Add text before code block 90 | if (match.index > lastIndex) { 91 | segments.push({ 92 | content: text.slice(lastIndex, match.index), 93 | isCodeBlock: false, 94 | }); 95 | } 96 | 97 | // Add code block 98 | segments.push({ 99 | content: match[0], 100 | isCodeBlock: true, 101 | }); 102 | 103 | lastIndex = match.index + match[0].length; 104 | } 105 | 106 | // Add remaining text 107 | if (lastIndex < text.length) { 108 | segments.push({ 109 | content: text.slice(lastIndex), 110 | isCodeBlock: false, 111 | }); 112 | } 113 | 114 | return segments; 115 | } 116 | 117 | /** 118 | * Chunks a single segment of text 119 | * @param text Text segment to chunk 120 | * @param options Chunking options 121 | * @param startPosition Starting position in original text 122 | * @param startIndex Starting chunk index 123 | * @param isCodeBlock Whether this is a code block 124 | * @returns Array of chunks 125 | */ 126 | private static chunkSegment( 127 | text: string, 128 | options: ChunkOptions, 129 | startPosition: number, 130 | startIndex: number, 131 | isCodeBlock: boolean 132 | ): TextChunk[] { 133 | const chunks: TextChunk[] = []; 134 | let currentChunk = ''; 135 | let currentPosition = 0; 136 | 137 | // Split into sentences/paragraphs first 138 | const blocks = isCodeBlock 139 | ? [text] // Keep code blocks together 140 | : text 141 | .split(/(?<=\.|\?|\!|\n)\s+/) 142 | .filter(Boolean) 143 | .map(block => block.trim()); 144 | 145 | for (const block of blocks) { 146 | // If adding this block would exceed max size, start new chunk 147 | if ( 148 | currentChunk && 149 | currentChunk.length + block.length > options.maxChunkSize && 150 | currentChunk.length >= options.minChunkSize 151 | ) { 152 | chunks.push({ 153 | content: currentChunk, 154 | index: startIndex + chunks.length, 155 | metadata: { 156 | startPosition: startPosition + currentPosition - currentChunk.length, 157 | endPosition: startPosition + currentPosition, 158 | isCodeBlock, 159 | }, 160 | }); 161 | 162 | // Start new chunk with overlap 163 | const words = currentChunk.split(/\s+/); 164 | const overlapWords = words.slice(-Math.ceil(options.overlap / 10)); // Approximate words for overlap 165 | currentChunk = overlapWords.join(' ') + ' ' + block; 166 | } else { 167 | currentChunk = currentChunk 168 | ? currentChunk + ' ' + block 169 | : block; 170 | } 171 | 172 | currentPosition += block.length + 1; // +1 for the space 173 | } 174 | 175 | // Add final chunk if not empty 176 | if (currentChunk) { 177 | chunks.push({ 178 | content: currentChunk, 179 | index: startIndex + chunks.length, 180 | metadata: { 181 | startPosition: startPosition + currentPosition - currentChunk.length, 182 | endPosition: startPosition + currentPosition, 183 | isCodeBlock, 184 | }, 185 | }); 186 | } 187 | 188 | return chunks; 189 | } 190 | 191 | /** 192 | * Validates chunk options and sets defaults 193 | * @param options User-provided options 194 | * @returns Validated options 195 | */ 196 | private static validateOptions(options: Partial): ChunkOptions { 197 | const opts = { ...this.DEFAULT_OPTIONS, ...options }; 198 | 199 | if (opts.maxChunkSize < opts.minChunkSize) { 200 | throw new Error('maxChunkSize must be greater than minChunkSize'); 201 | } 202 | 203 | if (opts.overlap >= opts.maxChunkSize) { 204 | throw new Error('overlap must be less than maxChunkSize'); 205 | } 206 | 207 | if (opts.minChunkSize <= 0 || opts.maxChunkSize <= 0 || opts.overlap < 0) { 208 | throw new Error('chunk sizes and overlap must be positive numbers'); 209 | } 210 | 211 | return opts; 212 | } 213 | } 214 | -------------------------------------------------------------------------------- /src/tools/url-processor.ts: -------------------------------------------------------------------------------- 1 | import { URL } from 'url'; 2 | 3 | export class URLProcessingError extends Error { 4 | constructor(message: string) { 5 | super(message); 6 | this.name = 'URLProcessingError'; 7 | } 8 | } 9 | 10 | export interface ProcessedURL { 11 | originalUrl: string; 12 | normalizedUrl: string; 13 | domain: string; 14 | path: string; 15 | isValid: boolean; 16 | } 17 | 18 | export class URLProcessor { 19 | /** 20 | * Validates and normalizes a URL, extracting key components 21 | * @param urlString The URL string to process 22 | * @returns ProcessedURL object containing normalized URL and metadata 23 | * @throws URLProcessingError if URL is invalid 24 | */ 25 | static processURL(urlString: string): ProcessedURL { 26 | try { 27 | // Trim whitespace and normalize 28 | const trimmedUrl = urlString.trim(); 29 | 30 | // Add protocol if missing 31 | const urlWithProtocol = trimmedUrl.startsWith('http') 32 | ? trimmedUrl 33 | : `https://${trimmedUrl}`; 34 | 35 | // Parse URL 36 | const url = new URL(urlWithProtocol); 37 | 38 | // Normalize URL 39 | // - Convert to lowercase 40 | // - Remove trailing slashes 41 | // - Remove default ports 42 | // - Sort query parameters 43 | const normalizedUrl = this.normalizeURL(url); 44 | 45 | return { 46 | originalUrl: urlString, 47 | normalizedUrl, 48 | domain: url.hostname.toLowerCase(), 49 | path: url.pathname, 50 | isValid: true, 51 | }; 52 | } catch (error) { 53 | throw new URLProcessingError( 54 | `Invalid URL "${urlString}": ${(error as Error).message}` 55 | ); 56 | } 57 | } 58 | 59 | /** 60 | * Normalizes a URL to ensure consistent format 61 | * @param url URL object to normalize 62 | * @returns Normalized URL string 63 | */ 64 | private static normalizeURL(url: URL): string { 65 | // Convert hostname to lowercase 66 | const hostname = url.hostname.toLowerCase(); 67 | 68 | // Remove default ports 69 | const port = url.port === '80' || url.port === '443' ? '' : url.port; 70 | 71 | // Sort query parameters 72 | const searchParams = new URLSearchParams([...url.searchParams].sort()); 73 | const search = searchParams.toString(); 74 | 75 | // Construct normalized path (remove trailing slash except for root) 76 | let path = url.pathname; 77 | if (path.length > 1 && path.endsWith('/')) { 78 | path = path.slice(0, -1); 79 | } 80 | 81 | // Construct normalized URL 82 | let normalizedUrl = `${url.protocol}//${hostname}`; 83 | if (port) normalizedUrl += `:${port}`; 84 | normalizedUrl += path; 85 | if (search) normalizedUrl += `?${search}`; 86 | if (url.hash) normalizedUrl += url.hash; 87 | 88 | return normalizedUrl; 89 | } 90 | 91 | /** 92 | * Checks if a URL points to a valid web page 93 | * @param urlString URL to validate 94 | * @returns true if URL is valid and accessible 95 | */ 96 | static isValidWebPage(urlString: string): boolean { 97 | try { 98 | const { protocol } = new URL(urlString); 99 | return protocol === 'http:' || protocol === 'https:'; 100 | } catch { 101 | return false; 102 | } 103 | } 104 | 105 | /** 106 | * Extracts the root domain from a URL 107 | * @param urlString URL to process 108 | * @returns Root domain string 109 | */ 110 | static extractRootDomain(urlString: string): string { 111 | try { 112 | const { hostname } = new URL(urlString); 113 | const parts = hostname.split('.'); 114 | if (parts.length <= 2) return hostname; 115 | 116 | // Handle special cases like co.uk, com.au 117 | const sld = parts[parts.length - 2]; 118 | const tld = parts[parts.length - 1]; 119 | if (sld.length <= 3 && tld.length <= 3 && parts.length > 2) { 120 | return parts.slice(-3).join('.'); 121 | } 122 | 123 | return parts.slice(-2).join('.'); 124 | } catch { 125 | throw new URLProcessingError(`Cannot extract domain from invalid URL: ${urlString}`); 126 | } 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /src/types.ts: -------------------------------------------------------------------------------- 1 | import { DocumentMetadata } from './tools/qdrant-client.js'; 2 | 3 | export interface Document { 4 | url: string; 5 | content: string; 6 | metadata: Partial; 7 | } 8 | 9 | export interface DocumentChunk { 10 | text: string; 11 | url: string; 12 | title: string; 13 | timestamp: string; 14 | } 15 | 16 | export interface DocumentPayload extends DocumentChunk { 17 | _type: 'DocumentChunk'; 18 | [key: string]: unknown; 19 | } 20 | 21 | export function isDocumentPayload(payload: unknown): payload is DocumentPayload { 22 | if (!payload || typeof payload !== 'object') return false; 23 | const p = payload as Partial; 24 | return ( 25 | p._type === 'DocumentChunk' && 26 | typeof p.text === 'string' && 27 | typeof p.url === 'string' && 28 | typeof p.title === 'string' && 29 | typeof p.timestamp === 'string' 30 | ); 31 | } 32 | 33 | export interface SearchOptions { 34 | limit?: number; 35 | scoreThreshold?: number; 36 | filters?: { 37 | domain?: string; 38 | hasCode?: boolean; 39 | after?: string; 40 | before?: string; 41 | }; 42 | } 43 | 44 | export interface ToolDefinition { 45 | name: string; 46 | description: string; 47 | inputSchema: { 48 | type: string; 49 | properties: Record; 50 | required: string[]; 51 | }; 52 | } 53 | 54 | export interface ToolResult { 55 | content: Array<{ 56 | type: string; 57 | text: string; 58 | }>; 59 | isError?: boolean; 60 | } 61 | 62 | export interface RagDocsConfig { 63 | qdrantUrl: string; 64 | qdrantApiKey?: string; 65 | openaiApiKey: string; 66 | collectionName: string; 67 | } 68 | -------------------------------------------------------------------------------- /src/types/ollama.d.ts: -------------------------------------------------------------------------------- 1 | declare module 'ollama' { 2 | export interface EmbeddingsRequest { 3 | model: string; 4 | prompt: string; 5 | options?: Record; 6 | } 7 | 8 | export interface EmbeddingsResponse { 9 | embedding: number[]; 10 | } 11 | 12 | const ollama: { 13 | embeddings(request: EmbeddingsRequest): Promise; 14 | }; 15 | 16 | export default ollama; 17 | } 18 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es2022", 4 | "module": "es2022", 5 | "moduleResolution": "node", 6 | "outDir": "build", 7 | "rootDir": "src", 8 | "strict": true, 9 | "esModuleInterop": true, 10 | "skipLibCheck": true, 11 | "forceConsistentCasingInFileNames": true, 12 | "resolveJsonModule": true 13 | }, 14 | "include": ["src/**/*"], 15 | "exclude": ["node_modules"] 16 | } 17 | --------------------------------------------------------------------------------