├── .gitignore
├── src
    ├── types
    │   ├── lunr-languages.d.ts
    │   └── jieba-wasm.d.ts
    ├── search.ts
    └── index.ts
├── tsconfig.json
├── Dockerfile
├── CHANGELOG.md
├── smithery.yaml
├── package.json
├── README.zh-CN.md
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 | build/
3 | *.log
4 | .env*


--------------------------------------------------------------------------------
/src/types/lunr-languages.d.ts:
--------------------------------------------------------------------------------
1 | declare module 'lunr-languages' {
2 |   export const zh: {
3 |     tokenizer: (token: string) => string[],
4 |     stemmer: (token: string) => string
5 |   };
6 | }


--------------------------------------------------------------------------------
/src/types/jieba-wasm.d.ts:
--------------------------------------------------------------------------------
1 | declare module 'jieba-wasm' {
2 |   export function load(): Promise<void>;
3 |   export function cut(text: string, hmm?: boolean): string[];
4 |   export function cutAll(text: string): string[];
5 |   export function cutForSearch(text: string, hmm?: boolean): string[];
6 |   export function tag(text: string, hmm?: boolean): Array<[string, string]>;
7 |   export function extract(text: string, topk: number): Array<[string, number]>;
8 | }


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2022",
 4 |     "module": "Node16",
 5 |     "moduleResolution": "Node16",
 6 |     "outDir": "./build",
 7 |     "rootDir": "./src",
 8 |     "strict": true,
 9 |     "esModuleInterop": true,
10 |     "skipLibCheck": true,
11 |     "forceConsistentCasingInFileNames": true,
12 |     "allowSyntheticDefaultImports": true,
13 |     "typeRoots": [
14 |       "./node_modules/@types",
15 |       "./src/types"
16 |     ]
17 |   },
18 |   "include": ["src/**/*"],
19 |   "exclude": ["node_modules"]
20 | }
21 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Generated by https://smithery.ai. See: https://smithery.ai/docs/config#dockerfile
 2 | FROM node:lts-alpine
 3 | 
 4 | WORKDIR /app
 5 | 
 6 | # Copy necessary files for dependency installation and build
 7 | COPY package*.json tsconfig.json ./
 8 | COPY src ./src
 9 | COPY README.md ./
10 | COPY README.zh-CN.md ./
11 | 
12 | # Install dependencies (including dev dependencies needed for building)
13 | RUN npm install --ignore-scripts
14 | 
15 | # Build the project
16 | RUN npm run build
17 | 
18 | # Expose docs directory if needed (optional)
19 | 
20 | # Command to run the MCP server
21 | CMD ["node", "build/index.js", "--docsDir", "./docs"]
22 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | All notable changes to this project will be documented in this file.
 4 | 
 5 | ## [1.0.3] -2025-04-09
 6 | 
 7 | ### Added
 8 | - Smithery deployment support (thanks @calclavia)
 9 |   
10 | ### Fixed (thanks @KunihiroS)
11 | - Auto-create config file when not exists to prevent errors on first run
12 | - Fixed error when running `list_all_docs` or `list_enabled_docs` before any configuration
13 | - Fixed incorrect path handling in document crawling that ignored the `--docsDir` parameter
14 | - Added WSL compatibility options to Puppeteer for better performance in WSL environments
15 | 
16 | ## [1.0.0] - 2025-03-25
17 | ### Added
18 | - Initial release of docs-mcp MCP Server
19 | - Core functionality for document management
20 | - MCP protocol implementation
21 | - Basic document summarization
22 | 
23 | ### Changed
24 | - Updated project documentation
25 | - Improved README and project brief
26 | - Version bump to 1.0.0
27 | 
28 | ### Fixed
29 | - Documentation formatting issues
30 | - Project metadata consistency


--------------------------------------------------------------------------------
/smithery.yaml:
--------------------------------------------------------------------------------
 1 | # Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml
 2 | 
 3 | startCommand:
 4 |   type: stdio
 5 |   configSchema:
 6 |     # JSON Schema defining the configuration options for the MCP.
 7 |     type: object
 8 |     required:
 9 |       - docsDir
10 |     properties:
11 |       docsDir:
12 |         type: string
13 |         description: Path to the documentation directory to crawl.
14 |       disabled:
15 |         type: boolean
16 |         default: false
17 |         description: Optionally disable the server
18 |       alwaysAllow:
19 |         type: array
20 |         items:
21 |           type: string
22 |         default: []
23 |         description: List of tool names allowed without confirmation
24 |   commandFunction:
25 |     # A JS function that produces the CLI command based on the given config to start the MCP on stdio.
26 |     |-
27 |     (config) => ({
28 |       command: 'node',
29 |       args: ['build/index.js', '--docsDir', config.docsDir],
30 |       env: {}
31 |     })
32 |   exampleConfig:
33 |     docsDir: ./docs
34 |     disabled: false
35 |     alwaysAllow:
36 |       - search_docs
37 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "open-docs-mcp",
 3 |   "version": "1.0.3",
 4 |   "description": "open-source MCP implementation of cursor docs functionality",
 5 |   "author": "askme765cs@yahoo.com",
 6 |   "license": "MIT",
 7 |   "repository": {
 8 |     "type": "git",
 9 |     "url": "https://github.com/askme765cs/open-docs-mcp.git"
10 |   },
11 |   "bugs": {
12 |     "url": "https://github.com/askme765cs/open-docs-mcp/issues"
13 |   },
14 |   "homepage": "https://github.com/askme765cs/open-docs-mcp#readme",
15 |   "keywords": [
16 |     "mcp",
17 |     "documentation",
18 |     "search",
19 |     "open-source"
20 |   ],
21 |   "private": false,
22 |   "type": "module",
23 |   "engines": {
24 |     "node": ">=18.0.0"
25 |   },
26 |   "bin": {
27 |     "open-docs-mcp": "./build/index.js"
28 |   },
29 |   "readme": "README.md",
30 |   "files": [
31 |     "build",
32 |     "README.md",
33 |     "README.zh-CN.md",
34 |     "CHANGELOG.md"
35 |   ],
36 |   "scripts": {
37 |     "build": "tsc && node -e \"require('fs').chmodSync('build/index.js', '755')\"",
38 |     "prepare": "npm run build",
39 |     "watch": "tsc --watch",
40 |     "inspector": "npx @modelcontextprotocol/inspector build/index.js"
41 |   },
42 |   "devDependencies": {
43 |     "@types/fs-extra": "^11.0.4",
44 |     "@types/lunr": "^2.3.3",
45 |     "@types/node": "^20.17.27",
46 |     "@types/yargs": "^17.0.33",
47 |     "typescript": "^5.3.3"
48 |   },
49 |   "dependencies": {
50 |     "@modelcontextprotocol/sdk": "^1.7.0",
51 |     "cheerio": "^1.0.0",
52 |     "fs-extra": "^11.3.0",
53 |     "jieba-wasm": "^2.2.0",
54 |     "lunr": "^2.3.9",
55 |     "lunr-languages": "^1.4.0",
56 |     "node-fetch": "^3.3.2",
57 |     "puppeteer": "^24.4.0",
58 |     "yargs": "^17.7.2"
59 |   }
60 | }
61 | 


--------------------------------------------------------------------------------
/README.zh-CN.md:
--------------------------------------------------------------------------------
  1 | # open-docs-mcp MCP 服务器
  2 | 
  3 | [![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
  4 | [![Node Version](https://img.shields.io/badge/node-%3E%3D16.0.0-brightgreen.svg)](package.json)
  5 | [![TypeScript](https://img.shields.io/badge/TypeScript-4.9.5-blue.svg)](package.json)
  6 | 
  7 | 开源的MCP实现，提供文档管理功能。[English Version][url-docen]
  8 | 
  9 | ## 功能特性
 10 | 
 11 | ### 文档管理
 12 | - 从多种来源爬取和索引文档
 13 | - 支持多种文档格式
 14 | - 全文搜索功能
 15 | 
 16 | ### MCP服务器接口
 17 | - 基于资源的文档访问
 18 | - 基于工具的文档管理
 19 | 
 20 | ### 可用工具
 21 | 1. **enable_doc** - 启用特定文档的爬取
 22 | 2. **disable_doc** - 禁用特定文档的爬取  
 23 | 3. **crawl_docs** - 开始爬取已启用的文档
 24 | 4. **build_index** - 构建文档搜索索引
 25 | 5. **search_docs** - 搜索文档
 26 | 6. **list_enabled_docs** - 列出已启用的文档
 27 | 7. **list_all_docs** - 列出所有可用文档
 28 | 
 29 | ### Cursor @Docs 兼容性
 30 | 
 31 | 本项目旨在复现Cursor的@Docs功能，提供：
 32 | 
 33 | 1. **文档索引**:
 34 |    - 从多种来源爬取和索引文档
 35 |    - 支持多种文档格式(HTML, Markdown等)
 36 |    - 自动重新索引保持文档更新
 37 | 
 38 | 2. **文档访问**:
 39 |    - 在所有索引文档中搜索
 40 |    - 与MCP协议集成提供AI上下文
 41 | 
 42 | 3. **自定义文档管理**:
 43 |    - 通过`enable_doc`工具添加新文档源
 44 |    - 通过`list_enabled_docs`工具管理已启用文档
 45 |    - 使用`crawl_docs`工具强制重新爬取
 46 | 
 47 | ### 系统架构
 48 | ```
 49 | ┌───────────────────────────────────────────────────────┐
 50 | │                    open-docs-mcp Server                    │
 51 | ├───────────────────┬───────────────────┬───────────────┤
 52 | │   爬取模块        │   搜索引擎        │   MCP服务器   │
 53 | ├───────────────────┼───────────────────┼───────────────┤
 54 | │ - 网页爬取        │ - 全文索引       │ - 资源管理    │
 55 | │ - 文档转换        │ - 相关性评分     │ - 工具管理    │
 56 | │ - 存储管理        │ - 查询解析       │ - 提示管理    │
 57 | └───────────────────┴───────────────────┴───────────────┘
 58 | ```
 59 | 
 60 | ## 使用
 61 | 
 62 | ```bash
 63 | npx -y open-docs-mcp --docsDir ./docs
 64 | ```
 65 | 
 66 | ### 通过Smithery安装
 67 | 
 68 | 要通过 [Smithery](https://smithery.ai/server/@askme765cs/open-docs-mcp) 自动为 Claude Desktop 安装文档管理服务器：
 69 | 
 70 | ```bash
 71 | npx -y @smithery/cli install @askme765cs/open-docs-mcp --client claude
 72 | ```
 73 | 
 74 | ### 配置
 75 | 
 76 | 在Claude Desktop中使用，添加服务器配置：
 77 | 
 78 | MacOS: `~/Library/Application Support/Claude/claude_desktop_config.json`
 79 | Windows: `%APPDATA%/Claude/claude_desktop_config.json`
 80 | 
 81 | ```json
 82 | {
 83 |   "mcpServers": {
 84 |     "open-docs-mcp": {
 85 |       "command": "npx",
 86 |       "args": [
 87 |         "-y",
 88 |         "open-docs-mcp",
 89 |         "--docsDir",
 90 |         "/path/to/docs"
 91 |       ]
 92 |     }
 93 |   }
 94 | }
 95 | ```
 96 | 
 97 | **配置选项:**
 98 | - `command`: Node.js可执行文件
 99 | - `args`: 传递给脚本的参数数组
100 |   - `--docsDir`: 必需，指定文档目录路径
101 | - `disabled`: 设为true可临时禁用服务器
102 | - `alwaysAllow`: 无需确认即可使用的工具名称数组
103 | 
104 | ## 开发
105 | 
106 | ```bash
107 | npm run watch  # 自动重建
108 | npm run inspector  # 使用MCP检查器调试
109 | ```
110 | 
111 | ## 贡献
112 | 欢迎提交Pull Request。重大改动请先创建issue讨论。
113 | 
114 | ## 许可证
115 | [MIT](LICENSE)
116 | 
117 | [url-docen]: README.md


--------------------------------------------------------------------------------
/src/search.ts:
--------------------------------------------------------------------------------
  1 | import lunr from 'lunr';
  2 | import path from 'path';
  3 | import fs from 'fs-extra';
  4 | 
  5 | interface DocEntry {
  6 |   path: string;
  7 |   title: string;
  8 |   content: string;
  9 | }
 10 | 
 11 | class SearchEngine {
 12 |   private index!: lunr.Index;
 13 |   private docStore: Record<string, DocEntry> = {};
 14 |   private indexPath: string;
 15 | 
 16 |   constructor(docsDir: string) {
 17 |     this.indexPath = path.join(docsDir, 'search-index.json');
 18 |   }
 19 | 
 20 |   async initialize() {
 21 |     if (await fs.pathExists(this.indexPath)) {
 22 |       await this.loadIndex();
 23 |     }
 24 |   }
 25 | 
 26 |   private async loadIndex() {
 27 |     const indexData = await fs.readJson(this.indexPath);
 28 |     this.index = lunr.Index.load(indexData.index);
 29 |     this.docStore = indexData.docStore;
 30 |   }
 31 | 
 32 |   async buildIndex(docsDir: string) {
 33 |     const docs = await this.collectDocs(docsDir);
 34 |     this.index = lunr(function() {
 35 |       this.ref('path');
 36 |       this.field('title');
 37 |       this.field('content');
 38 |       
 39 |       docs.forEach(doc => {
 40 |         this.add(doc);
 41 |       });
 42 |     });
 43 | 
 44 |     // Store documents separately
 45 |     docs.forEach(doc => {
 46 |       this.docStore[doc.path] = doc;
 47 |     });
 48 | 
 49 |     await this.saveIndex();
 50 |   }
 51 | 
 52 |   private async collectDocs(docsDir: string): Promise<DocEntry[]> {
 53 |     const docs: DocEntry[] = [];
 54 |     const docCategories = await fs.readdir(docsDir);
 55 |     
 56 |     for (const category of docCategories) {
 57 |       const categoryPath = path.join(docsDir, category);
 58 |       if ((await fs.stat(categoryPath)).isDirectory()) {
 59 |         const files = await fs.readdir(categoryPath);
 60 |         
 61 |         for (const file of files) {
 62 |           if (file.endsWith('.md')) {
 63 |             const filePath = path.join(categoryPath, file);
 64 |             const content = await fs.readFile(filePath, 'utf-8');
 65 |             docs.push({
 66 |               path: filePath,
 67 |               title: `${category}/${path.basename(file, '.md')}`,
 68 |               content
 69 |             });
 70 |           }
 71 |         }
 72 |       }
 73 |     }
 74 |     
 75 |     return docs;
 76 |   }
 77 | 
 78 |   private async saveIndex() {
 79 |     await fs.writeJson(this.indexPath, {
 80 |       version: new Date().toISOString(),
 81 |       index: this.index.toJSON(),
 82 |       docStore: this.docStore
 83 |     });
 84 |   }
 85 | 
 86 |   async search(query: string, maxResults = 3, docName?: string, minScore = 0.2, offset = 0) {
 87 |     if (!this.index) {
 88 |       throw new Error('Index not initialized');
 89 |     }
 90 | 
 91 |     let results = this.index.search(query);
 92 |     
 93 |     // 按文档分类筛选
 94 |     if (docName) {
 95 |       results = results.filter(result => {
 96 |         const doc = this.docStore[result.ref];
 97 |         return doc.title.startsWith(`${docName}/`);
 98 |       });
 99 |     }
100 | 
101 |     // 按分数筛选
102 |     results = results.filter(result => result.score >= minScore);
103 | 
104 |     return results.slice(offset, offset + maxResults).map(result => {
105 |       const doc = this.docStore[result.ref];
106 |       return {
107 |         path: doc.path,
108 |         score: result.score,
109 |         title: doc.title,
110 |         excerpt: this.createExcerpt(doc.content, query)
111 |       };
112 |     });
113 |   }
114 | 
115 |   private createExcerpt(content: string, query: string): string {
116 |     const pos = content.toLowerCase().indexOf(query.toLowerCase());
117 |     const start = Math.max(0, pos - 400);
118 |     const end = Math.min(content.length, pos + query.length + 400);
119 |     let excerpt = content.slice(start, end);
120 |     
121 |     if (pos >= 0) {
122 |       excerpt = excerpt.replace(
123 |         new RegExp(query, 'gi'),
124 |         match => `**${match}**`
125 |       );
126 |     }
127 |     
128 |     return excerpt;
129 |   }
130 | }
131 | 
132 | export { SearchEngine };


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # open-docs-mcp MCP Server
  2 | 
  3 | [![smithery badge](https://smithery.ai/badge/@askme765cs/open-docs-mcp)](https://smithery.ai/server/@askme765cs/open-docs-mcp)
  4 | [![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
  5 | [![Node Version](https://img.shields.io/badge/node-%3E%3D16.0.0-brightgreen.svg)](package.json)
  6 | [![TypeScript](https://img.shields.io/badge/TypeScript-4.9.5-blue.svg)](package.json)
  7 | 
  8 | An open-source MCP implementation providing document management functionality.
  9 | [中文文档][url-doczh]
 10 | 
 11 | ## Features
 12 | 
 13 | ### Document Management
 14 | - Crawl and index documentation from various sources
 15 | - Support for multiple document formats
 16 | - Full-text search capabilities
 17 | 
 18 | ### MCP Server API
 19 | - Resource-based access to documents
 20 | - Tool-based document management
 21 | 
 22 | ### Available Tools
 23 | 1. **enable_doc** - Enable crawling for a specific doc
 24 | 2. **disable_doc** - Disable crawling for a specific doc
 25 | 3. **crawl_docs** - Start crawling enabled docs
 26 | 4. **build_index** - Build search index for docs
 27 | 5. **search_docs** - Search documentation
 28 | 6. **list_enabled_docs** - List enabled docs
 29 | 7. **list_all_docs** - List all available docs
 30 | 
 31 | ### Cursor @Docs Compatibility
 32 | 
 33 | This project aims to replicate Cursor's @Docs functionality by providing:
 34 | 
 35 | 1. **Document Indexing**:
 36 |    - Crawl and index documentation from various sources
 37 |    - Support for multiple document formats (HTML, Markdown, etc.)
 38 |    - Automatic re-indexing to keep docs up-to-date
 39 | 
 40 | 2. **Document Access**:
 41 |    - Search across all indexed documentation
 42 |    - Integration with MCP protocol for AI context
 43 | 
 44 | 3. **Custom Docs Management**:
 45 |    - Add new documentation sources via `enable_doc` tool
 46 |    - Manage enabled docs via `list_enabled_docs` tool
 47 |    - Force re-crawl with `crawl_docs` tool
 48 | 
 49 | ### Architecture
 50 | ```
 51 | ┌───────────────────────────────────────────────────────┐
 52 | │                    open-docs-mcp Server                    │
 53 | ├───────────────────┬───────────────────┬───────────────┤
 54 | │   Crawler Module  │  Search Engine    │  MCP Server   │
 55 | ├───────────────────┼───────────────────┼───────────────┤
 56 | │ - Web crawling    │ - Full-text index │ - Resources   │
 57 | │ - Doc conversion  │ - Relevance score │ - Tools       │
 58 | │ - Storage         │ - Query parsing   │ - Prompts     │
 59 | └───────────────────┴───────────────────┴───────────────┘
 60 | ```
 61 | 
 62 | ## Usage
 63 | 
 64 | ```bash
 65 | npx -y open-docs-mcp --docsDir ./docs
 66 | ```
 67 | 
 68 | ### Installing via Smithery
 69 | 
 70 | To install Document Management Server for Claude Desktop automatically via [Smithery](https://smithery.ai/server/@askme765cs/open-docs-mcp):
 71 | 
 72 | ```bash
 73 | npx -y @smithery/cli install @askme765cs/open-docs-mcp --client claude
 74 | ```
 75 | 
 76 | ### Configuration
 77 | 
 78 | To use with Claude Desktop, add the server config:
 79 | 
 80 | On MacOS: `~/Library/Application Support/Claude/claude_desktop_config.json`
 81 | On Windows: `%APPDATA%/Claude/claude_desktop_config.json`
 82 | 
 83 | ```json
 84 | {
 85 |   "mcpServers": {
 86 |     "open-docs-mcp": {
 87 |       "command": "npx",
 88 |       "args": [
 89 |         "-y",
 90 |         "open-docs-mcp",
 91 |         "--docsDir",
 92 |         "/path/to/docs"
 93 |       ]
 94 |     }
 95 |   }
 96 | }
 97 | ```
 98 | 
 99 | **Configuration Options:**
100 | - `command`: Node.js executable
101 | - `args`: Array of arguments to pass to the script
102 |   - `--docsDir`: Required, specifies docs directory path
103 | - `disabled`: Set to true to temporarily disable the server
104 | - `alwaysAllow`: Array of tool names that can be used without confirmation
105 | 
106 | ## Development
107 | 
108 | ```bash
109 | npm run watch  # Auto-rebuild on changes
110 | npm run inspector  # Debug with MCP Inspector
111 | ```
112 | 
113 | ## Contributing
114 | Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change.
115 | 
116 | ## License
117 | [MIT](LICENSE)
118 | 
119 | [url-doczh]: README.zh-CN.md
120 | 


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env node
  2 | 
  3 | /**
  4 |  * This is a template MCP server that implements a simple notes system.
  5 |  * It demonstrates core MCP concepts like resources and tools by allowing:
  6 |  * - Listing notes as resources
  7 |  * - Reading individual notes
  8 |  * - Creating new notes via a tool
  9 |  * - Summarizing all notes via a prompt
 10 |  */
 11 | 
 12 | import { Server } from "@modelcontextprotocol/sdk/server/index.js";
 13 | import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
 14 | import yargs from 'yargs';
 15 | import { hideBin } from 'yargs/helpers';
 16 | import {
 17 |   CallToolRequestSchema,
 18 |   ListResourcesRequestSchema,
 19 |   ListToolsRequestSchema,
 20 |   ReadResourceRequestSchema,
 21 |   ListPromptsRequestSchema,
 22 |   GetPromptRequestSchema,
 23 | } from "@modelcontextprotocol/sdk/types.js";
 24 | import puppeteer from 'puppeteer';
 25 | import fs from 'fs-extra';
 26 | import path from 'path';
 27 | import { SearchEngine } from './search.js';
 28 | 
 29 | /**
 30 |  * Type alias for a note object.
 31 |  */
 32 | type Note = { title: string, content: string };
 33 | type Doc = { name: string, crawlerStart: string, crawlerPrefix: string };
 34 | type DocConfig = { [name: string]: boolean };
 35 | 
 36 | /**
 37 |  * Simple in-memory storage for notes and docs.
 38 |  * In a real implementation, this would likely be backed by a database.
 39 |  */
 40 | const notes: { [id: string]: Note } = {
 41 |   "1": { title: "First Note", content: "This is note 1" },
 42 |   "2": { title: "Second Note", content: "This is note 2" }
 43 | };
 44 | 
 45 | let docs: Doc[] = [];
 46 | let docConfig: DocConfig = {};
 47 | 
 48 | // Parse command line arguments
 49 | const argv = await yargs(hideBin(process.argv))
 50 |   .option('docsDir', {
 51 |     alias: 'd',
 52 |     type: 'string',
 53 |     description: 'Directory to store docs and config',
 54 |     default: './docs'
 55 |   })
 56 |   .parse();
 57 | 
 58 | const docDir = argv.docsDir || process.env.DOCS_DIR || './docs';
 59 | const configPath = path.join(docDir, 'docs-config.json');
 60 | 
 61 | /**
 62 |  * Create empty config file if it doesn't exist
 63 |  */
 64 | async function ensureConfigFile(): Promise<void> {
 65 |   try {
 66 |     if (!(await fs.pathExists(configPath))) {
 67 |       await fs.ensureDir(docDir);
 68 |       await fs.writeJson(configPath, {
 69 |         enabledDocs: {},
 70 |         crawledDocs: {}
 71 |       }, { spaces: 2 });
 72 |       console.log(`Created empty config file at ${configPath}`);
 73 |     }
 74 |   } catch (error) {
 75 |     console.error('Failed to create config file:', error);
 76 |   }
 77 | }
 78 | 
 79 | /**
 80 |  * Load doc config from file
 81 |  */
 82 | async function loadDocConfig(): Promise<void> {
 83 |   try {
 84 |     // Ensure config file exists before trying to load it
 85 |     await ensureConfigFile();
 86 |     
 87 |     const config = await fs.readJson(configPath);
 88 |     docConfig = config.enabledDocs || {};
 89 |   } catch (error) {
 90 |     console.error('Failed to load doc config:', error);
 91 |     docConfig = {};
 92 |   }
 93 | }
 94 | 
 95 | /**
 96 |  * Save doc config to file
 97 |  */
 98 | async function saveDocConfig(): Promise<void> {
 99 |   try {
100 |     const config = {
101 |       enabledDocs: docConfig,
102 |       crawledDocs: {}
103 |     };
104 |     if (await fs.pathExists(configPath)) {
105 |       const existingConfig = await fs.readJson(configPath);
106 |       config.crawledDocs = existingConfig.crawledDocs || {};
107 |     }
108 |     await fs.ensureDir(docDir);
109 |     await fs.writeJson(configPath, config, { spaces: 2 });
110 |   } catch (error) {
111 |     console.error('Failed to save doc config:', error);
112 |   }
113 | }
114 | 
115 | async function updateCrawledDoc(name: string): Promise<void> {
116 |   try {
117 |     // Ensure config file exists
118 |     await ensureConfigFile();
119 |     
120 |     const config: { enabledDocs: DocConfig, crawledDocs: { [name: string]: string } } = {
121 |       enabledDocs: docConfig,
122 |       crawledDocs: {}
123 |     };
124 |     
125 |     if (await fs.pathExists(configPath)) {
126 |       const existingConfig = await fs.readJson(configPath);
127 |       config.crawledDocs = existingConfig.crawledDocs || {};
128 |     }
129 |     
130 |     config.crawledDocs[name] = new Date().toISOString();
131 |     await fs.ensureDir(docDir);
132 |     await fs.writeJson(configPath, config, { spaces: 2 });
133 |   } catch (error) {
134 |     console.error('Failed to update crawled doc:', error);
135 |   }
136 | }
137 | 
138 | /**
139 |  * Load docs from remote JSONL file
140 |  */
141 | async function loadDocs(): Promise<void> {
142 |   try {
143 |     const response = await fetch('https://raw.githubusercontent.com/getcursor/crawler/main/docs.jsonl');
144 |     if (!response.ok) {
145 |       throw new Error(`HTTP error! status: ${response.status}`);
146 |     }
147 |     const text = await response.text();
148 |     docs = text
149 |       .split('\n')
150 |       .filter(line => line.trim())
151 |       .map(line => {
152 |         try {
153 |           return JSON.parse(line);
154 |         } catch (parseError) {
155 |           console.error('Failed to parse line:', line, parseError);
156 |           return null;
157 |         }
158 |       })
159 |       .filter(doc => doc !== null) as Doc[];
160 |   } catch (error) {
161 |     console.error('Failed to load docs:', error);
162 |     docs = []; // Fallback to empty array
163 |   }
164 | }
165 | 
166 | /**
167 |  * Crawl and save docs locally
168 |  */
169 | async function crawlAndSaveDocs(force: boolean = false): Promise<void> {
170 |   await fs.ensureDir(docDir);
171 |   console.error('========== START CRAWLING ==========');
172 |   for (const doc of docs) {
173 |     if (!docConfig[doc.name]) {
174 |       console.error(`Skipping doc ${doc.name} - not enabled`);
175 |       continue;
176 |     }
177 | 
178 |     // Skip if already crawled and not forcing re-crawl
179 |     if (!force && await fs.pathExists(configPath)) {
180 |       const config = await fs.readJson(configPath);
181 |       if (config.crawledDocs && config.crawledDocs[doc.name]) {
182 |         console.error(`Skipping doc ${doc.name} - already crawled at ${config.crawledDocs[doc.name]}`);
183 |         continue;
184 |       }
185 |     }
186 | 
187 |     try {
188 |       // Create doc directory - FIX: use the correct path from docDir parameter
189 |       const docDirPath = path.join(docDir, doc.name);
190 |       await fs.ensureDir(docDirPath);
191 | 
192 |       // Launch browser and open new page
193 |       const browser = await puppeteer.launch({
194 |         // WSL-friendly options to avoid GPU issues
195 |         args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-gpu'],
196 |         headless: true
197 |       });
198 |       
199 |       try {
200 |         const page = await browser.newPage();
201 |         
202 |         // Navigate to start page
203 |         console.error(`Processing doc: ${doc.name}`);
204 |         console.error(`Crawler start: ${doc.crawlerStart}, Crawler prefix: ${doc.crawlerPrefix}`);
205 |         await page.goto(doc.crawlerStart, { waitUntil: 'networkidle2' });
206 | 
207 |         // Extract all links
208 |         const links = Array.from(new Set(
209 |           await page.evaluate((prefix) => {
210 |             const anchors = Array.from(document.querySelectorAll('a[href]'));
211 |             return anchors
212 |               .map(a => {
213 |                 const href = a.getAttribute('href');
214 |                 if (!href) return null;
215 |                 try {
216 |                   const url = new URL(href, window.location.origin);
217 |                   return url.toString();
218 |                 } catch (error) {
219 |                   console.error(`Failed to parse href ${href}:`, error);
220 |                   return null;
221 |                 }
222 |               })
223 |               .filter(link => link && link.startsWith(prefix));
224 |           }, doc.crawlerPrefix)
225 |         ));
226 | 
227 |         if (links.length > 0) {
228 |           console.error(`Found ${links.length} valid links to process`);
229 |           
230 |           for (const link of links) {
231 |             if (!link) continue;
232 |             
233 |             try {
234 |               console.log(`Processing link: ${link}`);
235 |               const newPage = await browser.newPage();
236 |               await newPage.goto(link, { waitUntil: 'networkidle2' });
237 |               // Extract content as Markdown
238 |               const content = await newPage.evaluate(() => {
239 |                 // Get page title
240 |                 const title = document.title;
241 |                 
242 |                 // Find main content element
243 |                 const main = document.querySelector('main') ||
244 |                            document.querySelector('article') ||
245 |                            document.querySelector('.main-content') ||
246 |                            document.body;
247 | 
248 |                 // Convert content to Markdown
249 |                 let markdown = `# ${title}\n\n`;
250 |                 
251 |                 // Convert headings
252 |                 main.querySelectorAll('h1, h2, h3, h4, h5, h6').forEach(heading => {
253 |                   const level = parseInt(heading.tagName[1]);
254 |                   const text = heading.textContent?.trim();
255 |                   if (text) {
256 |                     markdown += '#'.repeat(level) + ' ' + text + '\n\n';
257 |                   }
258 |                 });
259 | 
260 |                 // Convert paragraphs
261 |                 main.querySelectorAll('p').forEach(p => {
262 |                   const text = p.textContent?.trim();
263 |                   if (text) {
264 |                     markdown += text + '\n\n';
265 |                   }
266 |                 });
267 | 
268 |                 // Convert code blocks
269 |                 main.querySelectorAll('pre').forEach(pre => {
270 |                   const text = pre.textContent?.trim();
271 |                   if (text) {
272 |                     markdown += '```\n' + text + '\n```\n\n';
273 |                   }
274 |                 });
275 | 
276 |                 // Convert lists
277 |                 main.querySelectorAll('ul, ol').forEach(list => {
278 |                   const isOrdered = list.tagName === 'OL';
279 |                   list.querySelectorAll('li').forEach((li, index) => {
280 |                     const text = li.textContent?.trim();
281 |                     if (text) {
282 |                       markdown += isOrdered ? `${index + 1}. ` : '- ';
283 |                       markdown += text + '\n';
284 |                     }
285 |                   });
286 |                   markdown += '\n';
287 |                 });
288 | 
289 |                 return markdown.trim();
290 |               });
291 |               await newPage.close();
292 |               
293 |               // Save Markdown file
294 |               // Create safe file name from URL path
295 |               const url = new URL(link);
296 |               const pathParts = url.pathname.split('/').filter(part => part.length > 0);
297 |               let fileName = pathParts.join('_');
298 |               
299 |               // Add extension if not present
300 |               if (!fileName.endsWith('.md')) {
301 |                 fileName += '.md';
302 |               }
303 |               // FIX: Use docDirPath instead of docDir
304 |               const filePath = path.join(docDirPath, fileName);
305 |               await fs.writeFile(filePath, content);
306 |               console.log(`Successfully saved ${filePath}`);
307 |               await updateCrawledDoc(doc.name);
308 |             } catch (error) {
309 |               console.error(`Failed to process page ${link}:`, error);
310 |             }
311 |           }
312 |         } else {
313 |           console.error('No valid links found');
314 |         }
315 |       } finally {
316 |         await browser.close();
317 |       }
318 |     } catch (error) {
319 |       console.error(`Failed to process doc ${doc.name}:`, error);
320 |     }
321 |   }
322 | }
323 | 
324 | // Load docs and config when server starts
325 | loadDocs();
326 | loadDocConfig();
327 | 
328 | /**
329 |  * Create an MCP server with capabilities for resources (to list/read notes),
330 |  * tools (to create new notes), and prompts (to summarize notes).
331 |  */
332 | 
333 | // 初始化搜索引擎
334 | const searchEngine = new SearchEngine(docDir);
335 | await searchEngine.initialize();
336 | 
337 | const server = new Server(
338 |   {
339 |     name: "docs-mcp",
340 |     version: "0.1.0",
341 |   },
342 |   {
343 |     capabilities: {
344 |       resources: {},
345 |       tools: {},
346 |       prompts: {},
347 |     }
348 |   }
349 | );
350 | 
351 | /**
352 |  * Handler for listing available resources (both notes and docs).
353 |  * Each resource is exposed with:
354 |  * - A unique URI scheme
355 |  * - Plain text MIME type
356 |  * - Human readable name and description
357 |  */
358 | server.setRequestHandler(ListResourcesRequestSchema, async () => {
359 |   const noteResources = Object.entries(notes).map(([id, note]) => ({
360 |     uri: `note:///${id}`,
361 |     mimeType: "text/plain",
362 |     name: note.title,
363 |     description: `A text note: ${note.title}`
364 |   }));
365 | 
366 |   const docResources = docs.map((doc, index) => ({
367 |     uri: `doc:///${index}`,
368 |     mimeType: "text/plain",
369 |     name: doc.name,
370 |     description: `Documentation for ${doc.name}`
371 |   }));
372 | 
373 |   return {
374 |     resources: [...noteResources, ...docResources]
375 |   };
376 | });
377 | 
378 | /**
379 |  * Handler for reading the contents of a specific note.
380 |  * Takes a note:// URI and returns the note content as plain text.
381 |  */
382 | server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
383 |   const url = new URL(request.params.uri);
384 |   const id = url.pathname.replace(/^\//, '');
385 |   const note = notes[id];
386 | 
387 |   if (!note) {
388 |     throw new Error(`Note ${id} not found`);
389 |   }
390 | 
391 |   return {
392 |     contents: [{
393 |       uri: request.params.uri,
394 |       mimeType: "text/plain",
395 |       text: note.content
396 |     }]
397 |   };
398 | });
399 | 
400 | /**
401 |  * Handler that lists available tools.
402 |  * Exposes tools for creating notes and managing docs.
403 |  */
404 | server.setRequestHandler(ListToolsRequestSchema, async () => {
405 |   return {
406 |     tools: [
407 |       {
408 |         name: "enable_doc",
409 |         description: "Enable crawling for a specific doc",
410 |         inputSchema: {
411 |           type: "object",
412 |           properties: {
413 |             name: {
414 |               type: "string",
415 |               description: "Name of the doc to enable"
416 |             }
417 |           },
418 |           required: ["name"]
419 |         }
420 |       },
421 |       {
422 |         name: "disable_doc",
423 |         description: "Disable crawling for a specific doc",
424 |         inputSchema: {
425 |           type: "object",
426 |           properties: {
427 |             name: {
428 |               type: "string",
429 |               description: "Name of the doc to disable"
430 |             }
431 |           },
432 |           required: ["name"]
433 |         }
434 |       },
435 |       {
436 |         name: "crawl_docs",
437 |         description: "Start crawling enabled docs",
438 |         inputSchema: {
439 |           type: "object",
440 |           properties: {
441 |             force: {
442 |               type: "boolean",
443 |               description: "Whether to force re-crawl all docs, ignoring previous crawl records"
444 |             }
445 |           }
446 |         }
447 |       },
448 |       {
449 |         name: "build_index",
450 |         description: "Build search index for docs",
451 |         inputSchema: {
452 |           type: "object",
453 |           properties: {
454 |             force: {
455 |               type: "boolean",
456 |               description: "Whether to force rebuild index"
457 |             }
458 |           }
459 |         }
460 |       },
461 |       {
462 |         name: "search_docs",
463 |         description: "Search documentation",
464 |         inputSchema: {
465 |           type: "object",
466 |           properties: {
467 |             query: {
468 |               type: "string",
469 |               description: "Search query"
470 |             },
471 |             max_results: {
472 |               type: "number",
473 |               description: "Maximum number of results",
474 |               default: 3
475 |             },
476 |             doc_name: {
477 |               type: "string",
478 |               description: "Filter by document category"
479 |             },
480 |             offset: {
481 |               type: "number",
482 |               description: "Number of results to skip",
483 |               default: 0
484 |             }
485 |           },
486 |           required: ["query"]
487 |         }
488 |       },
489 |       {
490 |         name: "build_index",
491 |         description: "Build search index for docs",
492 |         inputSchema: {
493 |           type: "object",
494 |           properties: {
495 |             force: {
496 |               type: "boolean",
497 |               description: "Whether to force rebuild index"
498 |             }
499 |           }
500 |         }
501 |       },
502 |       {
503 |         name: "list_enabled_docs",
504 |         description: "List all enabled docs with their cache status",
505 |         inputSchema: {
506 |           type: "object",
507 |           properties: {
508 |             verbose: {
509 |               type: "boolean",
510 |               description: "Whether to show detailed information",
511 |               default: false
512 |             }
513 |           }
514 |         }
515 |       },
516 |       {
517 |         name: "list_all_docs",
518 |         description: "List all available docs including disabled ones",
519 |         inputSchema: {
520 |           type: "object",
521 |           properties: {
522 |             verbose: {
523 |               type: "boolean",
524 |               description: "Whether to show detailed information",
525 |               default: false
526 |             }
527 |           }
528 |         }
529 |       }
530 |     ]
531 |   };
532 | });
533 | 
534 | /**
535 |  * Handler for tool requests.
536 |  */
537 | server.setRequestHandler(CallToolRequestSchema, async (request) => {
538 |   switch (request.params.name) {
539 |     case "enable_doc": {
540 |       const name = String(request.params.arguments?.name);
541 |       docConfig[name] = true;
542 |       await saveDocConfig();
543 |       return {
544 |         content: [{
545 |           type: "text",
546 |           text: `Enabled doc ${name}`
547 |         }]
548 |       };
549 |     }
550 | 
551 |     case "disable_doc": {
552 |       const name = String(request.params.arguments?.name);
553 |       docConfig[name] = false;
554 |       await saveDocConfig();
555 |       return {
556 |         content: [{
557 |           type: "text",
558 |           text: `Disabled doc ${name}`
559 |         }]
560 |       };
561 |     }
562 | 
563 |     case "crawl_docs": {
564 |       const force = Boolean(request.params.arguments?.force);
565 |       await crawlAndSaveDocs(force);
566 |       return {
567 |         content: [{
568 |           type: "text",
569 |           text: "Crawling completed"
570 |         }]
571 |       };
572 |     }
573 | 
574 |     case "build_index": {
575 |       const force = Boolean(request.params.arguments?.force);
576 |       await searchEngine.buildIndex(docDir);
577 |       return {
578 |         content: [{
579 |           type: "text",
580 |           text: `Index built with ${Object.keys(searchEngine['docStore']).length} documents`
581 |         }]
582 |       };
583 |     }
584 | 
585 |     case "list_enabled_docs": {
586 |       // Ensure config file exists before reading it
587 |       await ensureConfigFile();
588 |       
589 |       const verbose = Boolean(request.params.arguments?.verbose);
590 |       const config = await fs.readJson(configPath);
591 |       const enabledDocs = docs.filter(doc => docConfig[doc.name]);
592 |       
593 |       const result = enabledDocs.map(doc => {
594 |         const crawledAt = config.crawledDocs?.[doc.name] || "Not crawled";
595 |         return verbose
596 |           ? `${doc.name} (Enabled)\n  Start URL: ${doc.crawlerStart}\n  Last crawled: ${crawledAt}`
597 |           : `${doc.name} [${crawledAt === "Not crawled" ? "Not cached" : "Cached"}]`;
598 |       });
599 | 
600 |       return {
601 |         content: [{
602 |           type: "text",
603 |           text: result.join("\n") || "No enabled docs found"
604 |         }]
605 |       };
606 |     }
607 | 
608 |     case "list_all_docs": {
609 |       // Ensure config file exists before reading it
610 |       await ensureConfigFile();
611 |       
612 |       const verbose = Boolean(request.params.arguments?.verbose);
613 |       const config = await fs.readJson(configPath);
614 |       
615 |       const result = docs.map(doc => {
616 |         const isEnabled = docConfig[doc.name];
617 |         const crawledAt = isEnabled ? (config.crawledDocs?.[doc.name] || "Not crawled") : "";
618 |         return verbose
619 |           ? `${doc.name} (${isEnabled ? "Enabled" : "Disabled"})\n  Start URL: ${doc.crawlerStart}\n  Last crawled: ${crawledAt || "N/A"}`
620 |           : `${doc.name} [${isEnabled ? (crawledAt === "Not crawled" ? "Enabled, not cached" : "Enabled, cached") : "Disabled"}]`;
621 |       });
622 | 
623 |       return {
624 |         content: [{
625 |           type: "text",
626 |           text: result.join("\n") || "No docs found"
627 |         }]
628 |       };
629 |     }
630 | 
631 |     case "search_docs": {
632 |       const query = String(request.params.arguments?.query);
633 |       const maxResults = Number(request.params.arguments?.max_results) || 3;
634 |       const docName = request.params.arguments?.doc_name ?
635 |         String(request.params.arguments.doc_name) : undefined;
636 |       const offset = Number(request.params.arguments?.offset) || 0;
637 |       const results = await searchEngine.search(query, maxResults, docName, 0.2, offset);
638 |       return {
639 |         content: results.map(result => ({
640 |           type: "text",
641 |           text: `[${result.score.toFixed(2)}] ${result.title}\n${result.excerpt}\n---`
642 |         }))
643 |       };
644 |     }
645 | 
646 |     default:
647 |       throw new Error("Unknown tool");
648 |   }
649 | });
650 | 
651 | /**
652 |  * Handler that lists available prompts.
653 |  * Exposes a single "summarize_notes" prompt that summarizes all notes.
654 |  */
655 | server.setRequestHandler(ListPromptsRequestSchema, async () => {
656 |   return {
657 |     prompts: [
658 |       {
659 |         name: "summarize_notes",
660 |         description: "Summarize all notes",
661 |       }
662 |     ]
663 |   };
664 | });
665 | 
666 | /**
667 |  * Handler for the summarize_notes prompt.
668 |  * Returns a prompt that requests summarization of all notes, with the notes' contents embedded as resources.
669 |  */
670 | server.setRequestHandler(GetPromptRequestSchema, async (request) => {
671 |   if (request.params.name !== "summarize_notes") {
672 |     throw new Error("Unknown prompt");
673 |   }
674 | 
675 |   const embeddedNotes = Object.entries(notes).map(([id, note]) => ({
676 |     type: "resource" as const,
677 |     resource: {
678 |       uri: `note:///${id}`,
679 |       mimeType: "text/plain",
680 |       text: note.content
681 |     }
682 |   }));
683 | 
684 |   return {
685 |     messages: [
686 |       {
687 |         role: "user",
688 |         content: {
689 |           type: "text",
690 |           text: "Please summarize the following notes:"
691 |         }
692 |       },
693 |       ...embeddedNotes.map(note => ({
694 |         role: "user" as const,
695 |         content: note
696 |       })),
697 |       {
698 |         role: "user",
699 |         content: {
700 |           type: "text",
701 |           text: "Provide a concise summary of all the notes above."
702 |         }
703 |       }
704 |     ]
705 |   };
706 | });
707 | 
708 | /**
709 |  * Start the server using stdio transport.
710 |  * This allows the server to communicate via standard input/output streams.
711 |  */
712 | async function main() {
713 |   const transport = new StdioServerTransport();
714 |   await server.connect(transport);
715 | }
716 | 
717 | main().catch((error) => {
718 |   console.error("Server error:", error);
719 |   process.exit(1);
720 | });
721 | 


--------------------------------------------------------------------------------