├── .gitignore ├── src ├── types │ ├── lunr-languages.d.ts │ └── jieba-wasm.d.ts ├── search.ts └── index.ts ├── tsconfig.json ├── Dockerfile ├── CHANGELOG.md ├── smithery.yaml ├── package.json ├── README.zh-CN.md └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | build/ 3 | *.log 4 | .env* -------------------------------------------------------------------------------- /src/types/lunr-languages.d.ts: -------------------------------------------------------------------------------- 1 | declare module 'lunr-languages' { 2 | export const zh: { 3 | tokenizer: (token: string) => string[], 4 | stemmer: (token: string) => string 5 | }; 6 | } -------------------------------------------------------------------------------- /src/types/jieba-wasm.d.ts: -------------------------------------------------------------------------------- 1 | declare module 'jieba-wasm' { 2 | export function load(): Promise; 3 | export function cut(text: string, hmm?: boolean): string[]; 4 | export function cutAll(text: string): string[]; 5 | export function cutForSearch(text: string, hmm?: boolean): string[]; 6 | export function tag(text: string, hmm?: boolean): Array<[string, string]>; 7 | export function extract(text: string, topk: number): Array<[string, number]>; 8 | } -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2022", 4 | "module": "Node16", 5 | "moduleResolution": "Node16", 6 | "outDir": "./build", 7 | "rootDir": "./src", 8 | "strict": true, 9 | "esModuleInterop": true, 10 | "skipLibCheck": true, 11 | "forceConsistentCasingInFileNames": true, 12 | "allowSyntheticDefaultImports": true, 13 | "typeRoots": [ 14 | "./node_modules/@types", 15 | "./src/types" 16 | ] 17 | }, 18 | "include": ["src/**/*"], 19 | "exclude": ["node_modules"] 20 | } 21 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Generated by https://smithery.ai. See: https://smithery.ai/docs/config#dockerfile 2 | FROM node:lts-alpine 3 | 4 | WORKDIR /app 5 | 6 | # Copy necessary files for dependency installation and build 7 | COPY package*.json tsconfig.json ./ 8 | COPY src ./src 9 | COPY README.md ./ 10 | COPY README.zh-CN.md ./ 11 | 12 | # Install dependencies (including dev dependencies needed for building) 13 | RUN npm install --ignore-scripts 14 | 15 | # Build the project 16 | RUN npm run build 17 | 18 | # Expose docs directory if needed (optional) 19 | 20 | # Command to run the MCP server 21 | CMD ["node", "build/index.js", "--docsDir", "./docs"] 22 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | ## [1.0.3] -2025-04-09 6 | 7 | ### Added 8 | - Smithery deployment support (thanks @calclavia) 9 | 10 | ### Fixed (thanks @KunihiroS) 11 | - Auto-create config file when not exists to prevent errors on first run 12 | - Fixed error when running `list_all_docs` or `list_enabled_docs` before any configuration 13 | - Fixed incorrect path handling in document crawling that ignored the `--docsDir` parameter 14 | - Added WSL compatibility options to Puppeteer for better performance in WSL environments 15 | 16 | ## [1.0.0] - 2025-03-25 17 | ### Added 18 | - Initial release of docs-mcp MCP Server 19 | - Core functionality for document management 20 | - MCP protocol implementation 21 | - Basic document summarization 22 | 23 | ### Changed 24 | - Updated project documentation 25 | - Improved README and project brief 26 | - Version bump to 1.0.0 27 | 28 | ### Fixed 29 | - Documentation formatting issues 30 | - Project metadata consistency -------------------------------------------------------------------------------- /smithery.yaml: -------------------------------------------------------------------------------- 1 | # Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml 2 | 3 | startCommand: 4 | type: stdio 5 | configSchema: 6 | # JSON Schema defining the configuration options for the MCP. 7 | type: object 8 | required: 9 | - docsDir 10 | properties: 11 | docsDir: 12 | type: string 13 | description: Path to the documentation directory to crawl. 14 | disabled: 15 | type: boolean 16 | default: false 17 | description: Optionally disable the server 18 | alwaysAllow: 19 | type: array 20 | items: 21 | type: string 22 | default: [] 23 | description: List of tool names allowed without confirmation 24 | commandFunction: 25 | # A JS function that produces the CLI command based on the given config to start the MCP on stdio. 26 | |- 27 | (config) => ({ 28 | command: 'node', 29 | args: ['build/index.js', '--docsDir', config.docsDir], 30 | env: {} 31 | }) 32 | exampleConfig: 33 | docsDir: ./docs 34 | disabled: false 35 | alwaysAllow: 36 | - search_docs 37 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "open-docs-mcp", 3 | "version": "1.0.3", 4 | "description": "open-source MCP implementation of cursor docs functionality", 5 | "author": "askme765cs@yahoo.com", 6 | "license": "MIT", 7 | "repository": { 8 | "type": "git", 9 | "url": "https://github.com/askme765cs/open-docs-mcp.git" 10 | }, 11 | "bugs": { 12 | "url": "https://github.com/askme765cs/open-docs-mcp/issues" 13 | }, 14 | "homepage": "https://github.com/askme765cs/open-docs-mcp#readme", 15 | "keywords": [ 16 | "mcp", 17 | "documentation", 18 | "search", 19 | "open-source" 20 | ], 21 | "private": false, 22 | "type": "module", 23 | "engines": { 24 | "node": ">=18.0.0" 25 | }, 26 | "bin": { 27 | "open-docs-mcp": "./build/index.js" 28 | }, 29 | "readme": "README.md", 30 | "files": [ 31 | "build", 32 | "README.md", 33 | "README.zh-CN.md", 34 | "CHANGELOG.md" 35 | ], 36 | "scripts": { 37 | "build": "tsc && node -e \"require('fs').chmodSync('build/index.js', '755')\"", 38 | "prepare": "npm run build", 39 | "watch": "tsc --watch", 40 | "inspector": "npx @modelcontextprotocol/inspector build/index.js" 41 | }, 42 | "devDependencies": { 43 | "@types/fs-extra": "^11.0.4", 44 | "@types/lunr": "^2.3.3", 45 | "@types/node": "^20.17.27", 46 | "@types/yargs": "^17.0.33", 47 | "typescript": "^5.3.3" 48 | }, 49 | "dependencies": { 50 | "@modelcontextprotocol/sdk": "^1.7.0", 51 | "cheerio": "^1.0.0", 52 | "fs-extra": "^11.3.0", 53 | "jieba-wasm": "^2.2.0", 54 | "lunr": "^2.3.9", 55 | "lunr-languages": "^1.4.0", 56 | "node-fetch": "^3.3.2", 57 | "puppeteer": "^24.4.0", 58 | "yargs": "^17.7.2" 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /README.zh-CN.md: -------------------------------------------------------------------------------- 1 | # open-docs-mcp MCP 服务器 2 | 3 | [![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) 4 | [![Node Version](https://img.shields.io/badge/node-%3E%3D16.0.0-brightgreen.svg)](package.json) 5 | [![TypeScript](https://img.shields.io/badge/TypeScript-4.9.5-blue.svg)](package.json) 6 | 7 | 开源的MCP实现,提供文档管理功能。[English Version][url-docen] 8 | 9 | ## 功能特性 10 | 11 | ### 文档管理 12 | - 从多种来源爬取和索引文档 13 | - 支持多种文档格式 14 | - 全文搜索功能 15 | 16 | ### MCP服务器接口 17 | - 基于资源的文档访问 18 | - 基于工具的文档管理 19 | 20 | ### 可用工具 21 | 1. **enable_doc** - 启用特定文档的爬取 22 | 2. **disable_doc** - 禁用特定文档的爬取 23 | 3. **crawl_docs** - 开始爬取已启用的文档 24 | 4. **build_index** - 构建文档搜索索引 25 | 5. **search_docs** - 搜索文档 26 | 6. **list_enabled_docs** - 列出已启用的文档 27 | 7. **list_all_docs** - 列出所有可用文档 28 | 29 | ### Cursor @Docs 兼容性 30 | 31 | 本项目旨在复现Cursor的@Docs功能,提供: 32 | 33 | 1. **文档索引**: 34 | - 从多种来源爬取和索引文档 35 | - 支持多种文档格式(HTML, Markdown等) 36 | - 自动重新索引保持文档更新 37 | 38 | 2. **文档访问**: 39 | - 在所有索引文档中搜索 40 | - 与MCP协议集成提供AI上下文 41 | 42 | 3. **自定义文档管理**: 43 | - 通过`enable_doc`工具添加新文档源 44 | - 通过`list_enabled_docs`工具管理已启用文档 45 | - 使用`crawl_docs`工具强制重新爬取 46 | 47 | ### 系统架构 48 | ``` 49 | ┌───────────────────────────────────────────────────────┐ 50 | │ open-docs-mcp Server │ 51 | ├───────────────────┬───────────────────┬───────────────┤ 52 | │ 爬取模块 │ 搜索引擎 │ MCP服务器 │ 53 | ├───────────────────┼───────────────────┼───────────────┤ 54 | │ - 网页爬取 │ - 全文索引 │ - 资源管理 │ 55 | │ - 文档转换 │ - 相关性评分 │ - 工具管理 │ 56 | │ - 存储管理 │ - 查询解析 │ - 提示管理 │ 57 | └───────────────────┴───────────────────┴───────────────┘ 58 | ``` 59 | 60 | ## 使用 61 | 62 | ```bash 63 | npx -y open-docs-mcp --docsDir ./docs 64 | ``` 65 | 66 | ### 通过Smithery安装 67 | 68 | 要通过 [Smithery](https://smithery.ai/server/@askme765cs/open-docs-mcp) 自动为 Claude Desktop 安装文档管理服务器: 69 | 70 | ```bash 71 | npx -y @smithery/cli install @askme765cs/open-docs-mcp --client claude 72 | ``` 73 | 74 | ### 配置 75 | 76 | 在Claude Desktop中使用,添加服务器配置: 77 | 78 | MacOS: `~/Library/Application Support/Claude/claude_desktop_config.json` 79 | Windows: `%APPDATA%/Claude/claude_desktop_config.json` 80 | 81 | ```json 82 | { 83 | "mcpServers": { 84 | "open-docs-mcp": { 85 | "command": "npx", 86 | "args": [ 87 | "-y", 88 | "open-docs-mcp", 89 | "--docsDir", 90 | "/path/to/docs" 91 | ] 92 | } 93 | } 94 | } 95 | ``` 96 | 97 | **配置选项:** 98 | - `command`: Node.js可执行文件 99 | - `args`: 传递给脚本的参数数组 100 | - `--docsDir`: 必需,指定文档目录路径 101 | - `disabled`: 设为true可临时禁用服务器 102 | - `alwaysAllow`: 无需确认即可使用的工具名称数组 103 | 104 | ## 开发 105 | 106 | ```bash 107 | npm run watch # 自动重建 108 | npm run inspector # 使用MCP检查器调试 109 | ``` 110 | 111 | ## 贡献 112 | 欢迎提交Pull Request。重大改动请先创建issue讨论。 113 | 114 | ## 许可证 115 | [MIT](LICENSE) 116 | 117 | [url-docen]: README.md -------------------------------------------------------------------------------- /src/search.ts: -------------------------------------------------------------------------------- 1 | import lunr from 'lunr'; 2 | import path from 'path'; 3 | import fs from 'fs-extra'; 4 | 5 | interface DocEntry { 6 | path: string; 7 | title: string; 8 | content: string; 9 | } 10 | 11 | class SearchEngine { 12 | private index!: lunr.Index; 13 | private docStore: Record = {}; 14 | private indexPath: string; 15 | 16 | constructor(docsDir: string) { 17 | this.indexPath = path.join(docsDir, 'search-index.json'); 18 | } 19 | 20 | async initialize() { 21 | if (await fs.pathExists(this.indexPath)) { 22 | await this.loadIndex(); 23 | } 24 | } 25 | 26 | private async loadIndex() { 27 | const indexData = await fs.readJson(this.indexPath); 28 | this.index = lunr.Index.load(indexData.index); 29 | this.docStore = indexData.docStore; 30 | } 31 | 32 | async buildIndex(docsDir: string) { 33 | const docs = await this.collectDocs(docsDir); 34 | this.index = lunr(function() { 35 | this.ref('path'); 36 | this.field('title'); 37 | this.field('content'); 38 | 39 | docs.forEach(doc => { 40 | this.add(doc); 41 | }); 42 | }); 43 | 44 | // Store documents separately 45 | docs.forEach(doc => { 46 | this.docStore[doc.path] = doc; 47 | }); 48 | 49 | await this.saveIndex(); 50 | } 51 | 52 | private async collectDocs(docsDir: string): Promise { 53 | const docs: DocEntry[] = []; 54 | const docCategories = await fs.readdir(docsDir); 55 | 56 | for (const category of docCategories) { 57 | const categoryPath = path.join(docsDir, category); 58 | if ((await fs.stat(categoryPath)).isDirectory()) { 59 | const files = await fs.readdir(categoryPath); 60 | 61 | for (const file of files) { 62 | if (file.endsWith('.md')) { 63 | const filePath = path.join(categoryPath, file); 64 | const content = await fs.readFile(filePath, 'utf-8'); 65 | docs.push({ 66 | path: filePath, 67 | title: `${category}/${path.basename(file, '.md')}`, 68 | content 69 | }); 70 | } 71 | } 72 | } 73 | } 74 | 75 | return docs; 76 | } 77 | 78 | private async saveIndex() { 79 | await fs.writeJson(this.indexPath, { 80 | version: new Date().toISOString(), 81 | index: this.index.toJSON(), 82 | docStore: this.docStore 83 | }); 84 | } 85 | 86 | async search(query: string, maxResults = 3, docName?: string, minScore = 0.2, offset = 0) { 87 | if (!this.index) { 88 | throw new Error('Index not initialized'); 89 | } 90 | 91 | let results = this.index.search(query); 92 | 93 | // 按文档分类筛选 94 | if (docName) { 95 | results = results.filter(result => { 96 | const doc = this.docStore[result.ref]; 97 | return doc.title.startsWith(`${docName}/`); 98 | }); 99 | } 100 | 101 | // 按分数筛选 102 | results = results.filter(result => result.score >= minScore); 103 | 104 | return results.slice(offset, offset + maxResults).map(result => { 105 | const doc = this.docStore[result.ref]; 106 | return { 107 | path: doc.path, 108 | score: result.score, 109 | title: doc.title, 110 | excerpt: this.createExcerpt(doc.content, query) 111 | }; 112 | }); 113 | } 114 | 115 | private createExcerpt(content: string, query: string): string { 116 | const pos = content.toLowerCase().indexOf(query.toLowerCase()); 117 | const start = Math.max(0, pos - 400); 118 | const end = Math.min(content.length, pos + query.length + 400); 119 | let excerpt = content.slice(start, end); 120 | 121 | if (pos >= 0) { 122 | excerpt = excerpt.replace( 123 | new RegExp(query, 'gi'), 124 | match => `**${match}**` 125 | ); 126 | } 127 | 128 | return excerpt; 129 | } 130 | } 131 | 132 | export { SearchEngine }; -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # open-docs-mcp MCP Server 2 | 3 | [![smithery badge](https://smithery.ai/badge/@askme765cs/open-docs-mcp)](https://smithery.ai/server/@askme765cs/open-docs-mcp) 4 | [![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) 5 | [![Node Version](https://img.shields.io/badge/node-%3E%3D16.0.0-brightgreen.svg)](package.json) 6 | [![TypeScript](https://img.shields.io/badge/TypeScript-4.9.5-blue.svg)](package.json) 7 | 8 | An open-source MCP implementation providing document management functionality. 9 | [中文文档][url-doczh] 10 | 11 | ## Features 12 | 13 | ### Document Management 14 | - Crawl and index documentation from various sources 15 | - Support for multiple document formats 16 | - Full-text search capabilities 17 | 18 | ### MCP Server API 19 | - Resource-based access to documents 20 | - Tool-based document management 21 | 22 | ### Available Tools 23 | 1. **enable_doc** - Enable crawling for a specific doc 24 | 2. **disable_doc** - Disable crawling for a specific doc 25 | 3. **crawl_docs** - Start crawling enabled docs 26 | 4. **build_index** - Build search index for docs 27 | 5. **search_docs** - Search documentation 28 | 6. **list_enabled_docs** - List enabled docs 29 | 7. **list_all_docs** - List all available docs 30 | 31 | ### Cursor @Docs Compatibility 32 | 33 | This project aims to replicate Cursor's @Docs functionality by providing: 34 | 35 | 1. **Document Indexing**: 36 | - Crawl and index documentation from various sources 37 | - Support for multiple document formats (HTML, Markdown, etc.) 38 | - Automatic re-indexing to keep docs up-to-date 39 | 40 | 2. **Document Access**: 41 | - Search across all indexed documentation 42 | - Integration with MCP protocol for AI context 43 | 44 | 3. **Custom Docs Management**: 45 | - Add new documentation sources via `enable_doc` tool 46 | - Manage enabled docs via `list_enabled_docs` tool 47 | - Force re-crawl with `crawl_docs` tool 48 | 49 | ### Architecture 50 | ``` 51 | ┌───────────────────────────────────────────────────────┐ 52 | │ open-docs-mcp Server │ 53 | ├───────────────────┬───────────────────┬───────────────┤ 54 | │ Crawler Module │ Search Engine │ MCP Server │ 55 | ├───────────────────┼───────────────────┼───────────────┤ 56 | │ - Web crawling │ - Full-text index │ - Resources │ 57 | │ - Doc conversion │ - Relevance score │ - Tools │ 58 | │ - Storage │ - Query parsing │ - Prompts │ 59 | └───────────────────┴───────────────────┴───────────────┘ 60 | ``` 61 | 62 | ## Usage 63 | 64 | ```bash 65 | npx -y open-docs-mcp --docsDir ./docs 66 | ``` 67 | 68 | ### Installing via Smithery 69 | 70 | To install Document Management Server for Claude Desktop automatically via [Smithery](https://smithery.ai/server/@askme765cs/open-docs-mcp): 71 | 72 | ```bash 73 | npx -y @smithery/cli install @askme765cs/open-docs-mcp --client claude 74 | ``` 75 | 76 | ### Configuration 77 | 78 | To use with Claude Desktop, add the server config: 79 | 80 | On MacOS: `~/Library/Application Support/Claude/claude_desktop_config.json` 81 | On Windows: `%APPDATA%/Claude/claude_desktop_config.json` 82 | 83 | ```json 84 | { 85 | "mcpServers": { 86 | "open-docs-mcp": { 87 | "command": "npx", 88 | "args": [ 89 | "-y", 90 | "open-docs-mcp", 91 | "--docsDir", 92 | "/path/to/docs" 93 | ] 94 | } 95 | } 96 | } 97 | ``` 98 | 99 | **Configuration Options:** 100 | - `command`: Node.js executable 101 | - `args`: Array of arguments to pass to the script 102 | - `--docsDir`: Required, specifies docs directory path 103 | - `disabled`: Set to true to temporarily disable the server 104 | - `alwaysAllow`: Array of tool names that can be used without confirmation 105 | 106 | ## Development 107 | 108 | ```bash 109 | npm run watch # Auto-rebuild on changes 110 | npm run inspector # Debug with MCP Inspector 111 | ``` 112 | 113 | ## Contributing 114 | Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change. 115 | 116 | ## License 117 | [MIT](LICENSE) 118 | 119 | [url-doczh]: README.zh-CN.md 120 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | /** 4 | * This is a template MCP server that implements a simple notes system. 5 | * It demonstrates core MCP concepts like resources and tools by allowing: 6 | * - Listing notes as resources 7 | * - Reading individual notes 8 | * - Creating new notes via a tool 9 | * - Summarizing all notes via a prompt 10 | */ 11 | 12 | import { Server } from "@modelcontextprotocol/sdk/server/index.js"; 13 | import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; 14 | import yargs from 'yargs'; 15 | import { hideBin } from 'yargs/helpers'; 16 | import { 17 | CallToolRequestSchema, 18 | ListResourcesRequestSchema, 19 | ListToolsRequestSchema, 20 | ReadResourceRequestSchema, 21 | ListPromptsRequestSchema, 22 | GetPromptRequestSchema, 23 | } from "@modelcontextprotocol/sdk/types.js"; 24 | import puppeteer from 'puppeteer'; 25 | import fs from 'fs-extra'; 26 | import path from 'path'; 27 | import { SearchEngine } from './search.js'; 28 | 29 | /** 30 | * Type alias for a note object. 31 | */ 32 | type Note = { title: string, content: string }; 33 | type Doc = { name: string, crawlerStart: string, crawlerPrefix: string }; 34 | type DocConfig = { [name: string]: boolean }; 35 | 36 | /** 37 | * Simple in-memory storage for notes and docs. 38 | * In a real implementation, this would likely be backed by a database. 39 | */ 40 | const notes: { [id: string]: Note } = { 41 | "1": { title: "First Note", content: "This is note 1" }, 42 | "2": { title: "Second Note", content: "This is note 2" } 43 | }; 44 | 45 | let docs: Doc[] = []; 46 | let docConfig: DocConfig = {}; 47 | 48 | // Parse command line arguments 49 | const argv = await yargs(hideBin(process.argv)) 50 | .option('docsDir', { 51 | alias: 'd', 52 | type: 'string', 53 | description: 'Directory to store docs and config', 54 | default: './docs' 55 | }) 56 | .parse(); 57 | 58 | const docDir = argv.docsDir || process.env.DOCS_DIR || './docs'; 59 | const configPath = path.join(docDir, 'docs-config.json'); 60 | 61 | /** 62 | * Create empty config file if it doesn't exist 63 | */ 64 | async function ensureConfigFile(): Promise { 65 | try { 66 | if (!(await fs.pathExists(configPath))) { 67 | await fs.ensureDir(docDir); 68 | await fs.writeJson(configPath, { 69 | enabledDocs: {}, 70 | crawledDocs: {} 71 | }, { spaces: 2 }); 72 | console.log(`Created empty config file at ${configPath}`); 73 | } 74 | } catch (error) { 75 | console.error('Failed to create config file:', error); 76 | } 77 | } 78 | 79 | /** 80 | * Load doc config from file 81 | */ 82 | async function loadDocConfig(): Promise { 83 | try { 84 | // Ensure config file exists before trying to load it 85 | await ensureConfigFile(); 86 | 87 | const config = await fs.readJson(configPath); 88 | docConfig = config.enabledDocs || {}; 89 | } catch (error) { 90 | console.error('Failed to load doc config:', error); 91 | docConfig = {}; 92 | } 93 | } 94 | 95 | /** 96 | * Save doc config to file 97 | */ 98 | async function saveDocConfig(): Promise { 99 | try { 100 | const config = { 101 | enabledDocs: docConfig, 102 | crawledDocs: {} 103 | }; 104 | if (await fs.pathExists(configPath)) { 105 | const existingConfig = await fs.readJson(configPath); 106 | config.crawledDocs = existingConfig.crawledDocs || {}; 107 | } 108 | await fs.ensureDir(docDir); 109 | await fs.writeJson(configPath, config, { spaces: 2 }); 110 | } catch (error) { 111 | console.error('Failed to save doc config:', error); 112 | } 113 | } 114 | 115 | async function updateCrawledDoc(name: string): Promise { 116 | try { 117 | // Ensure config file exists 118 | await ensureConfigFile(); 119 | 120 | const config: { enabledDocs: DocConfig, crawledDocs: { [name: string]: string } } = { 121 | enabledDocs: docConfig, 122 | crawledDocs: {} 123 | }; 124 | 125 | if (await fs.pathExists(configPath)) { 126 | const existingConfig = await fs.readJson(configPath); 127 | config.crawledDocs = existingConfig.crawledDocs || {}; 128 | } 129 | 130 | config.crawledDocs[name] = new Date().toISOString(); 131 | await fs.ensureDir(docDir); 132 | await fs.writeJson(configPath, config, { spaces: 2 }); 133 | } catch (error) { 134 | console.error('Failed to update crawled doc:', error); 135 | } 136 | } 137 | 138 | /** 139 | * Load docs from remote JSONL file 140 | */ 141 | async function loadDocs(): Promise { 142 | try { 143 | const response = await fetch('https://raw.githubusercontent.com/getcursor/crawler/main/docs.jsonl'); 144 | if (!response.ok) { 145 | throw new Error(`HTTP error! status: ${response.status}`); 146 | } 147 | const text = await response.text(); 148 | docs = text 149 | .split('\n') 150 | .filter(line => line.trim()) 151 | .map(line => { 152 | try { 153 | return JSON.parse(line); 154 | } catch (parseError) { 155 | console.error('Failed to parse line:', line, parseError); 156 | return null; 157 | } 158 | }) 159 | .filter(doc => doc !== null) as Doc[]; 160 | } catch (error) { 161 | console.error('Failed to load docs:', error); 162 | docs = []; // Fallback to empty array 163 | } 164 | } 165 | 166 | /** 167 | * Crawl and save docs locally 168 | */ 169 | async function crawlAndSaveDocs(force: boolean = false): Promise { 170 | await fs.ensureDir(docDir); 171 | console.error('========== START CRAWLING =========='); 172 | for (const doc of docs) { 173 | if (!docConfig[doc.name]) { 174 | console.error(`Skipping doc ${doc.name} - not enabled`); 175 | continue; 176 | } 177 | 178 | // Skip if already crawled and not forcing re-crawl 179 | if (!force && await fs.pathExists(configPath)) { 180 | const config = await fs.readJson(configPath); 181 | if (config.crawledDocs && config.crawledDocs[doc.name]) { 182 | console.error(`Skipping doc ${doc.name} - already crawled at ${config.crawledDocs[doc.name]}`); 183 | continue; 184 | } 185 | } 186 | 187 | try { 188 | // Create doc directory - FIX: use the correct path from docDir parameter 189 | const docDirPath = path.join(docDir, doc.name); 190 | await fs.ensureDir(docDirPath); 191 | 192 | // Launch browser and open new page 193 | const browser = await puppeteer.launch({ 194 | // WSL-friendly options to avoid GPU issues 195 | args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-gpu'], 196 | headless: true 197 | }); 198 | 199 | try { 200 | const page = await browser.newPage(); 201 | 202 | // Navigate to start page 203 | console.error(`Processing doc: ${doc.name}`); 204 | console.error(`Crawler start: ${doc.crawlerStart}, Crawler prefix: ${doc.crawlerPrefix}`); 205 | await page.goto(doc.crawlerStart, { waitUntil: 'networkidle2' }); 206 | 207 | // Extract all links 208 | const links = Array.from(new Set( 209 | await page.evaluate((prefix) => { 210 | const anchors = Array.from(document.querySelectorAll('a[href]')); 211 | return anchors 212 | .map(a => { 213 | const href = a.getAttribute('href'); 214 | if (!href) return null; 215 | try { 216 | const url = new URL(href, window.location.origin); 217 | return url.toString(); 218 | } catch (error) { 219 | console.error(`Failed to parse href ${href}:`, error); 220 | return null; 221 | } 222 | }) 223 | .filter(link => link && link.startsWith(prefix)); 224 | }, doc.crawlerPrefix) 225 | )); 226 | 227 | if (links.length > 0) { 228 | console.error(`Found ${links.length} valid links to process`); 229 | 230 | for (const link of links) { 231 | if (!link) continue; 232 | 233 | try { 234 | console.log(`Processing link: ${link}`); 235 | const newPage = await browser.newPage(); 236 | await newPage.goto(link, { waitUntil: 'networkidle2' }); 237 | // Extract content as Markdown 238 | const content = await newPage.evaluate(() => { 239 | // Get page title 240 | const title = document.title; 241 | 242 | // Find main content element 243 | const main = document.querySelector('main') || 244 | document.querySelector('article') || 245 | document.querySelector('.main-content') || 246 | document.body; 247 | 248 | // Convert content to Markdown 249 | let markdown = `# ${title}\n\n`; 250 | 251 | // Convert headings 252 | main.querySelectorAll('h1, h2, h3, h4, h5, h6').forEach(heading => { 253 | const level = parseInt(heading.tagName[1]); 254 | const text = heading.textContent?.trim(); 255 | if (text) { 256 | markdown += '#'.repeat(level) + ' ' + text + '\n\n'; 257 | } 258 | }); 259 | 260 | // Convert paragraphs 261 | main.querySelectorAll('p').forEach(p => { 262 | const text = p.textContent?.trim(); 263 | if (text) { 264 | markdown += text + '\n\n'; 265 | } 266 | }); 267 | 268 | // Convert code blocks 269 | main.querySelectorAll('pre').forEach(pre => { 270 | const text = pre.textContent?.trim(); 271 | if (text) { 272 | markdown += '```\n' + text + '\n```\n\n'; 273 | } 274 | }); 275 | 276 | // Convert lists 277 | main.querySelectorAll('ul, ol').forEach(list => { 278 | const isOrdered = list.tagName === 'OL'; 279 | list.querySelectorAll('li').forEach((li, index) => { 280 | const text = li.textContent?.trim(); 281 | if (text) { 282 | markdown += isOrdered ? `${index + 1}. ` : '- '; 283 | markdown += text + '\n'; 284 | } 285 | }); 286 | markdown += '\n'; 287 | }); 288 | 289 | return markdown.trim(); 290 | }); 291 | await newPage.close(); 292 | 293 | // Save Markdown file 294 | // Create safe file name from URL path 295 | const url = new URL(link); 296 | const pathParts = url.pathname.split('/').filter(part => part.length > 0); 297 | let fileName = pathParts.join('_'); 298 | 299 | // Add extension if not present 300 | if (!fileName.endsWith('.md')) { 301 | fileName += '.md'; 302 | } 303 | // FIX: Use docDirPath instead of docDir 304 | const filePath = path.join(docDirPath, fileName); 305 | await fs.writeFile(filePath, content); 306 | console.log(`Successfully saved ${filePath}`); 307 | await updateCrawledDoc(doc.name); 308 | } catch (error) { 309 | console.error(`Failed to process page ${link}:`, error); 310 | } 311 | } 312 | } else { 313 | console.error('No valid links found'); 314 | } 315 | } finally { 316 | await browser.close(); 317 | } 318 | } catch (error) { 319 | console.error(`Failed to process doc ${doc.name}:`, error); 320 | } 321 | } 322 | } 323 | 324 | // Load docs and config when server starts 325 | loadDocs(); 326 | loadDocConfig(); 327 | 328 | /** 329 | * Create an MCP server with capabilities for resources (to list/read notes), 330 | * tools (to create new notes), and prompts (to summarize notes). 331 | */ 332 | 333 | // 初始化搜索引擎 334 | const searchEngine = new SearchEngine(docDir); 335 | await searchEngine.initialize(); 336 | 337 | const server = new Server( 338 | { 339 | name: "docs-mcp", 340 | version: "0.1.0", 341 | }, 342 | { 343 | capabilities: { 344 | resources: {}, 345 | tools: {}, 346 | prompts: {}, 347 | } 348 | } 349 | ); 350 | 351 | /** 352 | * Handler for listing available resources (both notes and docs). 353 | * Each resource is exposed with: 354 | * - A unique URI scheme 355 | * - Plain text MIME type 356 | * - Human readable name and description 357 | */ 358 | server.setRequestHandler(ListResourcesRequestSchema, async () => { 359 | const noteResources = Object.entries(notes).map(([id, note]) => ({ 360 | uri: `note:///${id}`, 361 | mimeType: "text/plain", 362 | name: note.title, 363 | description: `A text note: ${note.title}` 364 | })); 365 | 366 | const docResources = docs.map((doc, index) => ({ 367 | uri: `doc:///${index}`, 368 | mimeType: "text/plain", 369 | name: doc.name, 370 | description: `Documentation for ${doc.name}` 371 | })); 372 | 373 | return { 374 | resources: [...noteResources, ...docResources] 375 | }; 376 | }); 377 | 378 | /** 379 | * Handler for reading the contents of a specific note. 380 | * Takes a note:// URI and returns the note content as plain text. 381 | */ 382 | server.setRequestHandler(ReadResourceRequestSchema, async (request) => { 383 | const url = new URL(request.params.uri); 384 | const id = url.pathname.replace(/^\//, ''); 385 | const note = notes[id]; 386 | 387 | if (!note) { 388 | throw new Error(`Note ${id} not found`); 389 | } 390 | 391 | return { 392 | contents: [{ 393 | uri: request.params.uri, 394 | mimeType: "text/plain", 395 | text: note.content 396 | }] 397 | }; 398 | }); 399 | 400 | /** 401 | * Handler that lists available tools. 402 | * Exposes tools for creating notes and managing docs. 403 | */ 404 | server.setRequestHandler(ListToolsRequestSchema, async () => { 405 | return { 406 | tools: [ 407 | { 408 | name: "enable_doc", 409 | description: "Enable crawling for a specific doc", 410 | inputSchema: { 411 | type: "object", 412 | properties: { 413 | name: { 414 | type: "string", 415 | description: "Name of the doc to enable" 416 | } 417 | }, 418 | required: ["name"] 419 | } 420 | }, 421 | { 422 | name: "disable_doc", 423 | description: "Disable crawling for a specific doc", 424 | inputSchema: { 425 | type: "object", 426 | properties: { 427 | name: { 428 | type: "string", 429 | description: "Name of the doc to disable" 430 | } 431 | }, 432 | required: ["name"] 433 | } 434 | }, 435 | { 436 | name: "crawl_docs", 437 | description: "Start crawling enabled docs", 438 | inputSchema: { 439 | type: "object", 440 | properties: { 441 | force: { 442 | type: "boolean", 443 | description: "Whether to force re-crawl all docs, ignoring previous crawl records" 444 | } 445 | } 446 | } 447 | }, 448 | { 449 | name: "build_index", 450 | description: "Build search index for docs", 451 | inputSchema: { 452 | type: "object", 453 | properties: { 454 | force: { 455 | type: "boolean", 456 | description: "Whether to force rebuild index" 457 | } 458 | } 459 | } 460 | }, 461 | { 462 | name: "search_docs", 463 | description: "Search documentation", 464 | inputSchema: { 465 | type: "object", 466 | properties: { 467 | query: { 468 | type: "string", 469 | description: "Search query" 470 | }, 471 | max_results: { 472 | type: "number", 473 | description: "Maximum number of results", 474 | default: 3 475 | }, 476 | doc_name: { 477 | type: "string", 478 | description: "Filter by document category" 479 | }, 480 | offset: { 481 | type: "number", 482 | description: "Number of results to skip", 483 | default: 0 484 | } 485 | }, 486 | required: ["query"] 487 | } 488 | }, 489 | { 490 | name: "build_index", 491 | description: "Build search index for docs", 492 | inputSchema: { 493 | type: "object", 494 | properties: { 495 | force: { 496 | type: "boolean", 497 | description: "Whether to force rebuild index" 498 | } 499 | } 500 | } 501 | }, 502 | { 503 | name: "list_enabled_docs", 504 | description: "List all enabled docs with their cache status", 505 | inputSchema: { 506 | type: "object", 507 | properties: { 508 | verbose: { 509 | type: "boolean", 510 | description: "Whether to show detailed information", 511 | default: false 512 | } 513 | } 514 | } 515 | }, 516 | { 517 | name: "list_all_docs", 518 | description: "List all available docs including disabled ones", 519 | inputSchema: { 520 | type: "object", 521 | properties: { 522 | verbose: { 523 | type: "boolean", 524 | description: "Whether to show detailed information", 525 | default: false 526 | } 527 | } 528 | } 529 | } 530 | ] 531 | }; 532 | }); 533 | 534 | /** 535 | * Handler for tool requests. 536 | */ 537 | server.setRequestHandler(CallToolRequestSchema, async (request) => { 538 | switch (request.params.name) { 539 | case "enable_doc": { 540 | const name = String(request.params.arguments?.name); 541 | docConfig[name] = true; 542 | await saveDocConfig(); 543 | return { 544 | content: [{ 545 | type: "text", 546 | text: `Enabled doc ${name}` 547 | }] 548 | }; 549 | } 550 | 551 | case "disable_doc": { 552 | const name = String(request.params.arguments?.name); 553 | docConfig[name] = false; 554 | await saveDocConfig(); 555 | return { 556 | content: [{ 557 | type: "text", 558 | text: `Disabled doc ${name}` 559 | }] 560 | }; 561 | } 562 | 563 | case "crawl_docs": { 564 | const force = Boolean(request.params.arguments?.force); 565 | await crawlAndSaveDocs(force); 566 | return { 567 | content: [{ 568 | type: "text", 569 | text: "Crawling completed" 570 | }] 571 | }; 572 | } 573 | 574 | case "build_index": { 575 | const force = Boolean(request.params.arguments?.force); 576 | await searchEngine.buildIndex(docDir); 577 | return { 578 | content: [{ 579 | type: "text", 580 | text: `Index built with ${Object.keys(searchEngine['docStore']).length} documents` 581 | }] 582 | }; 583 | } 584 | 585 | case "list_enabled_docs": { 586 | // Ensure config file exists before reading it 587 | await ensureConfigFile(); 588 | 589 | const verbose = Boolean(request.params.arguments?.verbose); 590 | const config = await fs.readJson(configPath); 591 | const enabledDocs = docs.filter(doc => docConfig[doc.name]); 592 | 593 | const result = enabledDocs.map(doc => { 594 | const crawledAt = config.crawledDocs?.[doc.name] || "Not crawled"; 595 | return verbose 596 | ? `${doc.name} (Enabled)\n Start URL: ${doc.crawlerStart}\n Last crawled: ${crawledAt}` 597 | : `${doc.name} [${crawledAt === "Not crawled" ? "Not cached" : "Cached"}]`; 598 | }); 599 | 600 | return { 601 | content: [{ 602 | type: "text", 603 | text: result.join("\n") || "No enabled docs found" 604 | }] 605 | }; 606 | } 607 | 608 | case "list_all_docs": { 609 | // Ensure config file exists before reading it 610 | await ensureConfigFile(); 611 | 612 | const verbose = Boolean(request.params.arguments?.verbose); 613 | const config = await fs.readJson(configPath); 614 | 615 | const result = docs.map(doc => { 616 | const isEnabled = docConfig[doc.name]; 617 | const crawledAt = isEnabled ? (config.crawledDocs?.[doc.name] || "Not crawled") : ""; 618 | return verbose 619 | ? `${doc.name} (${isEnabled ? "Enabled" : "Disabled"})\n Start URL: ${doc.crawlerStart}\n Last crawled: ${crawledAt || "N/A"}` 620 | : `${doc.name} [${isEnabled ? (crawledAt === "Not crawled" ? "Enabled, not cached" : "Enabled, cached") : "Disabled"}]`; 621 | }); 622 | 623 | return { 624 | content: [{ 625 | type: "text", 626 | text: result.join("\n") || "No docs found" 627 | }] 628 | }; 629 | } 630 | 631 | case "search_docs": { 632 | const query = String(request.params.arguments?.query); 633 | const maxResults = Number(request.params.arguments?.max_results) || 3; 634 | const docName = request.params.arguments?.doc_name ? 635 | String(request.params.arguments.doc_name) : undefined; 636 | const offset = Number(request.params.arguments?.offset) || 0; 637 | const results = await searchEngine.search(query, maxResults, docName, 0.2, offset); 638 | return { 639 | content: results.map(result => ({ 640 | type: "text", 641 | text: `[${result.score.toFixed(2)}] ${result.title}\n${result.excerpt}\n---` 642 | })) 643 | }; 644 | } 645 | 646 | default: 647 | throw new Error("Unknown tool"); 648 | } 649 | }); 650 | 651 | /** 652 | * Handler that lists available prompts. 653 | * Exposes a single "summarize_notes" prompt that summarizes all notes. 654 | */ 655 | server.setRequestHandler(ListPromptsRequestSchema, async () => { 656 | return { 657 | prompts: [ 658 | { 659 | name: "summarize_notes", 660 | description: "Summarize all notes", 661 | } 662 | ] 663 | }; 664 | }); 665 | 666 | /** 667 | * Handler for the summarize_notes prompt. 668 | * Returns a prompt that requests summarization of all notes, with the notes' contents embedded as resources. 669 | */ 670 | server.setRequestHandler(GetPromptRequestSchema, async (request) => { 671 | if (request.params.name !== "summarize_notes") { 672 | throw new Error("Unknown prompt"); 673 | } 674 | 675 | const embeddedNotes = Object.entries(notes).map(([id, note]) => ({ 676 | type: "resource" as const, 677 | resource: { 678 | uri: `note:///${id}`, 679 | mimeType: "text/plain", 680 | text: note.content 681 | } 682 | })); 683 | 684 | return { 685 | messages: [ 686 | { 687 | role: "user", 688 | content: { 689 | type: "text", 690 | text: "Please summarize the following notes:" 691 | } 692 | }, 693 | ...embeddedNotes.map(note => ({ 694 | role: "user" as const, 695 | content: note 696 | })), 697 | { 698 | role: "user", 699 | content: { 700 | type: "text", 701 | text: "Provide a concise summary of all the notes above." 702 | } 703 | } 704 | ] 705 | }; 706 | }); 707 | 708 | /** 709 | * Start the server using stdio transport. 710 | * This allows the server to communicate via standard input/output streams. 711 | */ 712 | async function main() { 713 | const transport = new StdioServerTransport(); 714 | await server.connect(transport); 715 | } 716 | 717 | main().catch((error) => { 718 | console.error("Server error:", error); 719 | process.exit(1); 720 | }); 721 | --------------------------------------------------------------------------------