├── .clinerules
    └── mcp-ragdocs_clinerules.md
├── src
    ├── tools
    │   ├── index.ts
    │   ├── base-tool.ts
    │   ├── clear-queue.ts
    │   ├── list-queue.ts
    │   ├── list-sources.ts
    │   ├── search-documentation.ts
    │   ├── remove-documentation.ts
    │   ├── run-queue.ts
    │   └── extract-urls.ts
    ├── types
    │   └── ollama.d.ts
    ├── handlers
    │   ├── clear-queue.ts
    │   ├── base-handler.ts
    │   ├── index.ts
    │   ├── prompts-list.ts
    │   ├── resources-list.ts
    │   ├── list-queue.ts
    │   ├── search-documentation.ts
    │   ├── remove-documentation.ts
    │   ├── remove-repository.ts
    │   ├── list-repositories.ts
    │   ├── run-queue.ts
    │   ├── extract-urls.ts
    │   ├── get-indexing-status.ts
    │   ├── add-documentation.ts
    │   ├── list-sources.ts
    │   ├── watch-repository.ts
    │   ├── update-repository.ts
    │   └── local-repository.ts
    ├── types.ts
    ├── utils
    │   ├── language-detection.ts
    │   ├── repository-watcher.ts
    │   ├── indexing-status-manager.ts
    │   └── repository-config-loader.ts
    ├── index.ts
    ├── api-client.ts
    ├── services
    │   └── embeddings.ts
    ├── public
    │   └── index.html
    ├── server.ts
    └── handler-registry.ts
├── tsconfig.json
├── docker-compose.yml
├── repositories.json
├── .gitignore
├── smithery.yaml
├── TASK.md
├── Dockerfile
├── LICENSE
├── repositories.json.example
├── CHANGELOG.md
├── package.json
├── KNOWLEDGE.md
└── README.md


/.clinerules/mcp-ragdocs_clinerules.md:
--------------------------------------------------------------------------------
1 | - refer to KNOWLEDGE.md and README.md  for useful context
2 | - Keep those 2 files up-to-date after completing a task


--------------------------------------------------------------------------------
/src/tools/index.ts:
--------------------------------------------------------------------------------
1 | export * from './search-documentation.js';
2 | export * from './list-sources.js';
3 | export * from './extract-urls.js';
4 | export * from './remove-documentation.js';
5 | export * from './list-queue.js';
6 | export * from './run-queue.js';
7 | export * from './clear-queue.js';


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2022",
 4 |     "module": "Node16",
 5 |     "moduleResolution": "Node16",
 6 |     "outDir": "./build",
 7 |     "rootDir": "./src",
 8 |     "strict": true,
 9 |     "esModuleInterop": true,
10 |     "skipLibCheck": true,
11 |     "forceConsistentCasingInFileNames": true
12 |   },
13 |   "include": ["src/**/*"],
14 |   "exclude": ["node_modules"]
15 | }
16 | 


--------------------------------------------------------------------------------
/src/types/ollama.d.ts:
--------------------------------------------------------------------------------
 1 | declare module 'ollama' {
 2 |   export interface EmbeddingsRequest {
 3 |     model: string;
 4 |     prompt: string;
 5 |     options?: Record<string, any>;
 6 |   }
 7 | 
 8 |   export interface EmbeddingsResponse {
 9 |     embedding: number[];
10 |   }
11 | 
12 |   const ollama: {
13 |     embeddings(request: EmbeddingsRequest): Promise<EmbeddingsResponse>;
14 |   };
15 | 
16 |   export default ollama;
17 | }
18 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   qdrant:
 3 |     image: qdrant/qdrant:latest
 4 |     restart: always
 5 |     ports:
 6 |       - "6333:6333"  # REST API
 7 |       - "6334:6334"  # GRPC API
 8 |     volumes:
 9 |       - qdrant_storage:/qdrant/storage
10 |     environment:
11 |       - QDRANT_ALLOW_RECOVERY=true
12 |     healthcheck:
13 |       test: ["CMD", "curl", "-f", "http://localhost:6333/healthz"]
14 |       interval: 30s
15 |       timeout: 10s
16 |       retries: 3
17 | 
18 | volumes:
19 |   qdrant_storage:
20 | 


--------------------------------------------------------------------------------
/repositories.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "repositories": [
 3 |       {
 4 |         "path": "/Users/guillaumeb/dev",
 5 |         "name": "goali",
 6 |         "include": ["**/*.js", "**/*.ts", "**/*.md"],
 7 |         "exclude": ["**/node_modules/**", "**/.git/**"],
 8 |         "watchMode": true,
 9 |         "watchInterval": 60000,
10 |         "chunkSize": 1000,
11 |         "fileTypeConfig": {
12 |           ".py": { "include": true, "chunkStrategy": "semantic" },
13 |           ".md": { "include": true, "chunkStrategy": "semantic" }
14 |         }
15 |       }
16 |     ],
17 |     "autoWatch": true
18 |   }


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Dependencies
 2 | node_modules/
 3 | .pnp/
 4 | .pnp.js
 5 | 
 6 | # Build output
 7 | build/
 8 | dist/
 9 | *.tsbuildinfo
10 | 
11 | # Environment variables
12 | .env
13 | .env.local
14 | .env.development.local
15 | .env.test.local
16 | .env.production.local
17 | 
18 | # Logs
19 | logs/
20 | *.log
21 | npm-debug.log*
22 | yarn-debug.log*
23 | yarn-error.log*
24 | 
25 | # Editor directories and files
26 | .idea/
27 | .vscode/
28 | *.swp
29 | *.swo
30 | .DS_Store
31 | 
32 | # Test coverage
33 | coverage/
34 | 
35 | # Local documentation files
36 | INTERNAL.TXT
37 | queue.txt
38 | MCPguide.txt
39 | 


--------------------------------------------------------------------------------
/src/handlers/clear-queue.ts:
--------------------------------------------------------------------------------
 1 | import { Server } from '@modelcontextprotocol/sdk/server/index.js';
 2 | import { ApiClient } from '../api-client.js';
 3 | import { ClearQueueTool } from '../tools/clear-queue.js';
 4 | 
 5 | export class ClearQueueHandler extends ClearQueueTool {
 6 |   constructor(server: Server, apiClient: ApiClient) {
 7 |     super();
 8 |   }
 9 | 
10 | 		async handle(args: any, callContext?: { progressToken?: string | number, requestId: string | number }) {
11 | 				// ClearQueueTool.execute doesn't use callContext, so we don't pass it.
12 | 				return this.execute(args);
13 | 		}
14 | }
15 | 


--------------------------------------------------------------------------------
/src/handlers/base-handler.ts:
--------------------------------------------------------------------------------
 1 | import { Server } from '@modelcontextprotocol/sdk/server/index.js';
 2 | import { ApiClient } from '../api-client.js';
 3 | import { McpToolResponse } from '../types.js';
 4 | 
 5 | export abstract class BaseHandler {
 6 |   protected server: Server;
 7 |   protected apiClient: ApiClient;
 8 | 
 9 |   constructor(server: Server, apiClient: ApiClient) {
10 |     this.server = server;
11 |     this.apiClient = apiClient;
12 |   }
13 | 
14 |   protected abstract handle(args: any, callContext?: { progressToken?: string | number, requestId: string | number }): Promise<McpToolResponse>;
15 | }
16 | 


--------------------------------------------------------------------------------
/src/handlers/index.ts:
--------------------------------------------------------------------------------
 1 | export * from './base-handler.js';
 2 | export * from './add-documentation.js';
 3 | export * from './search-documentation.js';
 4 | export * from './list-sources.js';
 5 | export * from './extract-urls.js';
 6 | export * from './remove-documentation.js';
 7 | export * from './list-queue.js';
 8 | export * from './run-queue.js';
 9 | export * from './clear-queue.js';
10 | export * from './prompts-list.js';
11 | export * from './resources-list.js';
12 | export * from './local-repository.js';
13 | export * from './list-repositories.js';
14 | export * from './remove-repository.js';
15 | export * from './update-repository.js';
16 | export * from './watch-repository.js';
17 | export * from './get-indexing-status.js';


--------------------------------------------------------------------------------
/src/tools/base-tool.ts:
--------------------------------------------------------------------------------
 1 | import { ToolDefinition, McpToolResponse } from '../types.js';
 2 | 
 3 | export abstract class BaseTool {
 4 |   abstract get definition(): ToolDefinition;
 5 |   abstract execute(args: unknown): Promise<McpToolResponse>;
 6 | 
 7 |   protected formatResponse(data: unknown): McpToolResponse {
 8 |     return {
 9 |       content: [
10 |         {
11 |           type: 'text',
12 |           text: JSON.stringify(data, null, 2),
13 |         },
14 |       ],
15 |     };
16 |   }
17 | 
18 |   protected handleError(error: any): McpToolResponse {
19 |     return {
20 |       content: [
21 |         {
22 |           type: 'text',
23 |           text: `Error: ${error}`,
24 |         },
25 |       ],
26 |       isError: true,
27 |     };
28 |   }
29 | }


--------------------------------------------------------------------------------
/smithery.yaml:
--------------------------------------------------------------------------------
 1 | # Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml
 2 | 
 3 | startCommand:
 4 |   type: stdio
 5 |   configSchema:
 6 |     # JSON Schema defining the configuration options for the MCP.
 7 |     type: object
 8 |     required:
 9 |       - openAiApiKey
10 |       - qdrantUrl
11 |     properties:
12 |       openAiApiKey:
13 |         type: string
14 |         description: API key for accessing OpenAI's services.
15 |       qdrantUrl:
16 |         type: string
17 |         description: URL for the Qdrant vector database.
18 |   commandFunction:
19 |     # A function that produces the CLI command to start the MCP on stdio.
20 |     |-
21 |     (config) => ({command:'node', args:['build/index.js'], env:{OPENAI_API_KEY:config.openAiApiKey, QDRANT_URL:config.qdrantUrl}})


--------------------------------------------------------------------------------
/TASK.md:
--------------------------------------------------------------------------------
 1 | # MCP RAG Docs Tasks
 2 | 
 3 | ## Pending Tasks
 4 | 
 5 | ### Repository Indexing Enhancements
 6 | - [ ] Implement incremental indexing for faster updates
 7 | - [ ] Add support for custom chunking strategies
 8 | - [ ] Improve language detection for better code chunking
 9 | - [ ] Add support for binary file indexing (e.g., PDFs)
10 | 
11 | ### Web Interface Improvements
12 | - [ ] Add repository management to web interface
13 | - [ ] Implement real-time indexing status display
14 | - [ ] Add search interface for testing queries
15 | - [ ] Create dashboard for system monitoring
16 | 
17 | ### Documentation
18 | - [ ] Create comprehensive API documentation
19 | - [ ] Add examples for all tools
20 | - [ ] Create user guide with common workflows
21 | - [ ] Add developer documentation for extending the system


--------------------------------------------------------------------------------
/src/handlers/prompts-list.ts:
--------------------------------------------------------------------------------
 1 | import { Server } from '@modelcontextprotocol/sdk/server/index.js';
 2 | import { ApiClient } from '../api-client.js';
 3 | import { BaseHandler } from './base-handler.js';
 4 | import { McpToolResponse } from '../types.js';
 5 | 
 6 | export class PromptsListHandler extends BaseHandler {
 7 |   constructor(server: Server, apiClient: ApiClient) {
 8 |     super(server, apiClient);
 9 |   }
10 | 
11 |   async handle(_args: any, callContext?: { progressToken?: string | number, requestId: string | number }): Promise<McpToolResponse> {
12 |     // Return an empty list of prompts
13 |     // This is a minimal implementation to prevent the error
14 |     return {
15 |       content: [
16 |         {
17 |           type: 'text',
18 |           text: JSON.stringify({ prompts: [] })
19 |         }
20 |       ]
21 |     };
22 |   }
23 | }
24 | 


--------------------------------------------------------------------------------
/src/handlers/resources-list.ts:
--------------------------------------------------------------------------------
 1 | import { Server } from '@modelcontextprotocol/sdk/server/index.js';
 2 | import { ApiClient } from '../api-client.js';
 3 | import { BaseHandler } from './base-handler.js';
 4 | import { McpToolResponse } from '../types.js';
 5 | 
 6 | export class ResourcesListHandler extends BaseHandler {
 7 |   constructor(server: Server, apiClient: ApiClient) {
 8 |     super(server, apiClient);
 9 |   }
10 | 
11 |   async handle(_args: any, callContext?: { progressToken?: string | number, requestId: string | number }): Promise<McpToolResponse> {
12 |     // Return an empty list of resources
13 |     // This is a minimal implementation to prevent the error
14 |     return {
15 |       content: [
16 |         {
17 |           type: 'text',
18 |           text: JSON.stringify({ resources: [] })
19 |         }
20 |       ]
21 |     };
22 |   }
23 | }
24 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Generated by https://smithery.ai. See: https://smithery.ai/docs/config#dockerfile
 2 | # Stage 1: Build the TypeScript project
 3 | FROM node:18 AS builder
 4 | 
 5 | # Set working directory
 6 | WORKDIR /app
 7 | 
 8 | # Copy package.json and package-lock.json for npm install
 9 | COPY package.json package-lock.json ./
10 | 
11 | # Install dependencies
12 | RUN npm install
13 | 
14 | # Copy the entire source code to the working directory
15 | COPY . .
16 | 
17 | # Build the project
18 | RUN npm run build
19 | 
20 | # Stage 2: Run the project
21 | FROM node:18
22 | 
23 | # Set working directory
24 | WORKDIR /app
25 | 
26 | # Copy built files from the builder stage
27 | COPY --from=builder /app/build /app/build
28 | COPY --from=builder /app/node_modules /app/node_modules
29 | COPY --from=builder /app/package.json /app/package.json
30 | 
31 | # Define environment variables
32 | ENV OPENAI_API_KEY=your-api-key-here
33 | ENV QDRANT_URL=http://localhost:6333
34 | 
35 | # Start the application
36 | CMD ["node", "build/index.js"]
37 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | This project is a fork of qpd-v/mcp-ragdocs, originally developed by qpd-v.
 2 | The fork has been enhanced with additional features and improvements by Rahul Retnan.
 3 | 
 4 | MIT License
 5 | 
 6 | Copyright (c) 2025 Rahul Retnan
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | of this software and associated documentation files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in all
16 | copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 | SOFTWARE.
25 | 


--------------------------------------------------------------------------------
/repositories.json.example:
--------------------------------------------------------------------------------
 1 | {
 2 |   "repositories": [
 3 |     {
 4 |       "path": "/path/to/your/first/repo",
 5 |       "name": "my-project",
 6 |       "include": ["**/*.js", "**/*.ts", "**/*.md"],
 7 |       "exclude": [
 8 |         "**/node_modules/**",
 9 |         "**/.git/**",
10 |         "**/build/**",
11 |         "**/dist/**",
12 |         "**/*.min.js",
13 |         "**/*.map",
14 |         "**/package-lock.json",
15 |         "**/yarn.lock"
16 |       ],
17 |       "watchMode": true,
18 |       "watchInterval": 60000,
19 |       "chunkSize": 1000,
20 |       "fileTypeConfig": {
21 |         ".js": { "include": true, "chunkStrategy": "semantic" },
22 |         ".ts": { "include": true, "chunkStrategy": "semantic" },
23 |         ".md": { "include": true, "chunkStrategy": "semantic" },
24 |         ".json": { "include": true, "chunkStrategy": "line" }
25 |       }
26 |     },
27 |     {
28 |       "path": "/path/to/your/second/repo",
29 |       "name": "documentation",
30 |       "include": ["**/*.md", "**/*.txt", "**/*.rst"],
31 |       "exclude": [
32 |         "**/.git/**",
33 |         "**/node_modules/**"
34 |       ],
35 |       "watchMode": false,
36 |       "watchInterval": 300000,
37 |       "chunkSize": 1500,
38 |       "fileTypeConfig": {
39 |         ".md": { "include": true, "chunkStrategy": "semantic" },
40 |         ".txt": { "include": true, "chunkStrategy": "line" },
41 |         ".rst": { "include": true, "chunkStrategy": "semantic" }
42 |       }
43 |     }
44 |   ],
45 |   "autoWatch": true
46 | }
47 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | ## [1.1.0] - 2024-03-14
 4 | 
 5 | ### Initial Feature Addition
 6 | - Implemented new clear_queue tool for queue management
 7 |   - Created src/tools/clear-queue.ts with core functionality
 8 |   - Added handler in src/handlers/clear-queue.ts
 9 |   - Integrated with existing queue management system
10 |   - Added tool exports and registration
11 | 
12 | ### Code Organization
13 | - Improved tool ordering in handler-registry.ts
14 |   - Moved remove_documentation before extract_urls
15 |   - Enhanced logical grouping of related tools
16 |   - Updated imports to match new ordering
17 | 
18 | ### Documentation Enhancement Phase 1
19 | - Enhanced tool descriptions in handler-registry.ts:
20 |   1. search_documentation
21 |      - Added natural language query support details
22 |      - Clarified result ranking and context
23 |      - Improved limit parameter documentation
24 |   2. list_sources
25 |      - Added details about indexed documentation
26 |      - Clarified source information returned
27 |   3. extract_urls
28 |      - Enhanced URL crawling explanation
29 |      - Added queue integration details
30 |      - Clarified URL validation requirements
31 |   4. remove_documentation
32 |      - Added permanence warning
33 |      - Clarified URL matching requirements
34 |   5. list_queue
35 |      - Added queue monitoring details
36 |      - Clarified status checking capabilities
37 |   6. run_queue
38 |      - Added processing behavior details
39 |      - Documented error handling
40 |   7. clear_queue
41 |      - Detailed queue clearing behavior
42 |      - Added permanence warnings
43 |      - Documented URL re-adding requirements
44 | 
45 | ### Documentation Enhancement Phase 2
46 | - Updated README.md
47 |   - Removed add_documentation and queue_documentation tools
48 |   - Updated tool descriptions to match handler-registry.ts
49 |   - Added parameter format requirements
50 |   - Enhanced usage guidance


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@rahulretnan/mcp-ragdocs",
 3 |   "version": "1.0.0",
 4 |   "description": "An MCP server for semantic documentation search and retrieval using vector databases to augment LLM capabilities.",
 5 |   "private": false,
 6 |   "type": "module",
 7 |   "bin": {
 8 |     "@rahulretnan/mcp-ragdocs": "./build/index.js"
 9 |   },
10 |   "files": [
11 |     "build",
12 |     "README.md",
13 |     "LICENSE"
14 |   ],
15 |   "scripts": {
16 |     "build": "tsc && node -e \"require('fs').chmodSync('build/index.js', '755')\"",
17 |     "prepare": "npm run build",
18 |     "watch": "tsc --watch",
19 |     "inspector": "npx @modelcontextprotocol/inspector build/index.js",
20 |     "start": "node build/index.js"
21 |   },
22 |   "keywords": [
23 |     "mcp",
24 |     "model-context-protocol",
25 |     "rag",
26 |     "documentation",
27 |     "vector-database",
28 |     "qdrant",
29 |     "claude",
30 |     "llm"
31 |   ],
32 |   "author": "rahul",
33 |   "license": "MIT",
34 |   "repository": {
35 |     "type": "git",
36 |     "url": "git+https://github.com/rahulretnan/mcp-ragdocs.git"
37 |   },
38 |   "bugs": {
39 |     "url": "https://github.com/rahulretnan/mcp-ragdocs/issues"
40 |   },
41 |   "homepage": "https://github.com/rahulretnan/mcp-ragdocs#readme",
42 |   "dependencies": {
43 |     "@azure/openai": "2.0.0",
44 |     "@modelcontextprotocol/sdk": "1.0.3",
45 |     "@qdrant/js-client-rest": "1.12.0",
46 |     "axios": "^1.9.0",
47 |     "cheerio": "1.0.0",
48 |     "cors": "^2.8.5",
49 |     "express": "^4.21.2",
50 |     "file-type": "^18.7.0",
51 |     "glob": "^10.4.5",
52 |     "net": "^1.0.2",
53 |     "ollama": "^0.5.11",
54 |     "openai": "4.76.2",
55 |     "playwright": "1.49.1"
56 |   },
57 |   "devDependencies": {
58 |     "@types/cors": "^2.8.17",
59 |     "@types/express": "^5.0.0",
60 |     "@types/node": "^20.17.10",
61 |     "ts-node": "^10.9.2",
62 |     "typescript": "^5.7.2"
63 |   },
64 |   "publishConfig": {
65 |     "access": "public"
66 |   }
67 | }
68 | 


--------------------------------------------------------------------------------
/src/handlers/list-queue.ts:
--------------------------------------------------------------------------------
 1 | import { Server } from '@modelcontextprotocol/sdk/server/index.js';
 2 | import { ApiClient } from '../api-client.js';
 3 | import { BaseHandler } from './base-handler.js';
 4 | import fs from 'fs/promises';
 5 | import path from 'path';
 6 | import { fileURLToPath } from 'url';
 7 | 
 8 | // Get current directory in ES modules
 9 | const __filename = fileURLToPath(import.meta.url);
10 | const __dirname = path.dirname(__filename);
11 | const QUEUE_FILE = path.join(__dirname, '..', '..', 'queue.txt');
12 | 
13 | export class ListQueueHandler extends BaseHandler {
14 |   constructor(server: Server, apiClient: ApiClient) {
15 |     super(server, apiClient);
16 |   }
17 | 
18 |   async handle(_args: any, callContext?: { progressToken?: string | number, requestId: string | number }) {
19 |     try {
20 |       // Check if queue file exists
21 |       try {
22 |         await fs.access(QUEUE_FILE);
23 |       } catch {
24 |         return {
25 |           content: [
26 |             {
27 |               type: 'text',
28 |               text: 'Queue is empty (queue file does not exist)',
29 |             },
30 |           ],
31 |         };
32 |       }
33 | 
34 |       // Read queue file
35 |       const content = await fs.readFile(QUEUE_FILE, 'utf-8');
36 |       const urls = content.split('\n').filter(url => url.trim() !== '');
37 | 
38 |       if (urls.length === 0) {
39 |         return {
40 |           content: [
41 |             {
42 |               type: 'text',
43 |               text: 'Queue is empty',
44 |             },
45 |           ],
46 |         };
47 |       }
48 | 
49 |       return {
50 |         content: [
51 |           {
52 |             type: 'text',
53 |             text: `Queue contains ${urls.length} URLs:\n${urls.join('\n')}`,
54 |           },
55 |         ],
56 |       };
57 |     } catch (error) {
58 |       return {
59 |         content: [
60 |           {
61 |             type: 'text',
62 |             text: `Failed to read queue: ${error}`,
63 |           },
64 |         ],
65 |         isError: true,
66 |       };
67 |     }
68 |   }
69 | }
70 | 


--------------------------------------------------------------------------------
/src/tools/clear-queue.ts:
--------------------------------------------------------------------------------
 1 | import { BaseTool } from './base-tool.js';
 2 | import { ToolDefinition, McpToolResponse } from '../types.js';
 3 | import fs from 'fs/promises';
 4 | import path from 'path';
 5 | import { fileURLToPath } from 'url';
 6 | 
 7 | // Get current directory in ES modules
 8 | const __filename = fileURLToPath(import.meta.url);
 9 | const __dirname = path.dirname(__filename);
10 | const QUEUE_FILE = path.join(__dirname, '..', '..', 'queue.txt');
11 | 
12 | export class ClearQueueTool extends BaseTool {
13 |   get definition(): ToolDefinition {
14 |     return {
15 |       name: 'clear_queue',
16 |       description: 'Clear all URLs from the queue',
17 |       inputSchema: {
18 |         type: 'object',
19 |         properties: {},
20 |         required: [],
21 |       },
22 |     };
23 |   }
24 | 
25 |   async execute(_args: any): Promise<McpToolResponse> {
26 |     try {
27 |       // Check if queue file exists
28 |       try {
29 |         await fs.access(QUEUE_FILE);
30 |       } catch {
31 |         return {
32 |           content: [
33 |             {
34 |               type: 'text',
35 |               text: 'Queue is already empty (queue file does not exist)',
36 |             },
37 |           ],
38 |         };
39 |       }
40 | 
41 |       // Read current queue to get count of URLs being cleared
42 |       const content = await fs.readFile(QUEUE_FILE, 'utf-8');
43 |       const urlCount = content.split('\n').filter(url => url.trim() !== '').length;
44 | 
45 |       // Clear the queue by emptying the file
46 |       await fs.writeFile(QUEUE_FILE, '');
47 | 
48 |       return {
49 |         content: [
50 |           {
51 |             type: 'text',
52 |             text: `Queue cleared successfully. Removed ${urlCount} URL${urlCount === 1 ? '' : 's'} from the queue.`,
53 |           },
54 |         ],
55 |       };
56 |     } catch (error) {
57 |       return {
58 |         content: [
59 |           {
60 |             type: 'text',
61 |             text: `Failed to clear queue: ${error}`,
62 |           },
63 |         ],
64 |         isError: true,
65 |       };
66 |     }
67 |   }
68 | }


--------------------------------------------------------------------------------
/src/tools/list-queue.ts:
--------------------------------------------------------------------------------
 1 | import fs from "fs/promises";
 2 | import path, { dirname } from "path";
 3 | import { fileURLToPath } from "url";
 4 | import { McpToolResponse, ToolDefinition } from "../types.js";
 5 | import { BaseTool } from "./base-tool.js";
 6 | 
 7 | const __filename = fileURLToPath(import.meta.url);
 8 | const __dirname = dirname(__filename);
 9 | const rootDir = path.join(__dirname, "../..");
10 | const QUEUE_FILE = path.join(rootDir, "queue.txt");
11 | 
12 | export class ListQueueTool extends BaseTool {
13 |   constructor() {
14 |     super();
15 |   }
16 | 
17 |   get definition(): ToolDefinition {
18 |     return {
19 |       name: "list_queue",
20 |       description:
21 |         "List all URLs currently in the documentation processing queue",
22 |       inputSchema: {
23 |         type: "object",
24 |         properties: {},
25 |         required: [],
26 |       },
27 |     };
28 |   }
29 | 
30 |   async execute(_args: any): Promise<McpToolResponse> {
31 |     try {
32 |       // Check if queue file exists
33 |       try {
34 |         await fs.access(QUEUE_FILE);
35 |       } catch {
36 |         return {
37 |           content: [
38 |             {
39 |               type: "text",
40 |               text: "",
41 |             },
42 |           ],
43 |         };
44 |       }
45 | 
46 |       // Read queue file
47 |       const content = await fs.readFile(QUEUE_FILE, "utf-8");
48 |       const urls = content.split("\n").filter((url) => url.trim() !== "");
49 | 
50 |       if (urls.length === 0) {
51 |         return {
52 |           content: [
53 |             {
54 |               type: "text",
55 |               text: "",
56 |             },
57 |           ],
58 |         };
59 |       }
60 | 
61 |       // Return just the URLs, one per line
62 |       return {
63 |         content: [
64 |           {
65 |             type: "text",
66 |             text: urls.join("\n"),
67 |           },
68 |         ],
69 |       };
70 |     } catch (error) {
71 |       console.error("Error reading queue:", error);
72 |       return {
73 |         content: [
74 |           {
75 |             type: "text",
76 |             text: "",
77 |           },
78 |         ],
79 |       };
80 |     }
81 |   }
82 | }
83 | 


--------------------------------------------------------------------------------
/src/types.ts:
--------------------------------------------------------------------------------
 1 | export interface DocumentChunk {
 2 |   text: string;
 3 |   url: string;
 4 |   title: string;
 5 |   timestamp: string;
 6 |   filePath?: string;
 7 |   language?: string;
 8 |   chunkIndex?: number;
 9 |   totalChunks?: number;
10 |   repository?: string;
11 |   isRepositoryFile?: boolean;
12 | }
13 | 
14 | export interface DocumentPayload extends DocumentChunk {
15 |   _type: 'DocumentChunk';
16 |   [key: string]: unknown;
17 | }
18 | 
19 | export function isDocumentPayload(payload: unknown): payload is DocumentPayload {
20 |   if (!payload || typeof payload !== 'object') return false;
21 |   const p = payload as Partial<DocumentPayload>;
22 |   return (
23 |     p._type === 'DocumentChunk' &&
24 |     typeof p.text === 'string' &&
25 |     typeof p.url === 'string' &&
26 |     typeof p.title === 'string' &&
27 |     typeof p.timestamp === 'string'
28 |   );
29 | }
30 | 
31 | export interface ToolDefinition {
32 |   name: string;
33 |   description: string;
34 |   inputSchema: {
35 |     type: string;
36 |     properties: Record<string, any>;
37 |     required: string[];
38 |   };
39 | }
40 | 
41 | export interface McpToolResponse {
42 |   content: Array<{
43 |     type: string;
44 |     text: string;
45 |   }>;
46 |   isError?: boolean;
47 | }
48 | 
49 | export interface RepositoryConfig {
50 |   path: string;                // Absolute path to repository
51 |   name: string;                // User-friendly name
52 |   include: string[];           // Glob patterns to include
53 |   exclude: string[];           // Glob patterns to exclude
54 |   watchMode: boolean;          // Whether to watch for changes
55 |   watchInterval: number;       // Polling interval in ms
56 |   chunkSize: number;           // Default chunk size for files
57 |   fileTypeConfig: {            // Per file type configuration
58 |     [extension: string]: {
59 |       include: boolean;
60 |       chunkSize?: number;
61 |       chunkStrategy?: 'line' | 'character' | 'semantic';
62 |     }
63 |   }
64 | }
65 | 
66 | export interface IndexingStatus {
67 |   repositoryName: string;
68 |   status: 'pending' | 'processing' | 'completed' | 'failed';
69 |   startTime: string;
70 |   endTime?: string;
71 |   totalFiles?: number;
72 |   processedFiles?: number;
73 |   skippedFiles?: number;
74 |   totalChunks?: number;
75 |   indexedChunks?: number;
76 |   currentBatch?: number;
77 |   totalBatches?: number;
78 |   percentageComplete?: number;
79 |   error?: string;
80 |   lastUpdated: string;
81 | }


--------------------------------------------------------------------------------
/src/handlers/search-documentation.ts:
--------------------------------------------------------------------------------
 1 | import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
 2 | import { BaseHandler } from './base-handler.js';
 3 | import { McpToolResponse, isDocumentPayload } from '../types.js';
 4 | 
 5 | const COLLECTION_NAME = 'documentation';
 6 | 
 7 | export class SearchDocumentationHandler extends BaseHandler {
 8 |   async handle(args: any, callContext?: { progressToken?: string | number, requestId: string | number }): Promise<McpToolResponse> {
 9 |     if (!args.query || typeof args.query !== 'string') {
10 |       throw new McpError(ErrorCode.InvalidParams, 'Query is required');
11 |     }
12 | 
13 |     const limit = args.limit || 5;
14 | 
15 |     try {
16 |       const queryEmbedding = await this.apiClient.getEmbeddings(args.query);
17 |       
18 |       const searchResults = await this.apiClient.qdrantClient.search(COLLECTION_NAME, {
19 |         vector: queryEmbedding,
20 |         limit,
21 |         with_payload: true,
22 |         with_vector: false, // Optimize network transfer by not retrieving vectors
23 |         score_threshold: 0.7, // Only return relevant results
24 |       });
25 | 
26 |       const formattedResults = searchResults.map(result => {
27 |         if (!isDocumentPayload(result.payload)) {
28 |           throw new Error('Invalid payload type');
29 |         }
30 |         return `[${result.payload.title}](${result.payload.url})\nScore: ${result.score.toFixed(3)}\nContent: ${result.payload.text}\n`;
31 |       }).join('\n---\n');
32 | 
33 |       return {
34 |         content: [
35 |           {
36 |             type: 'text',
37 |             text: formattedResults || 'No results found matching the query.',
38 |           },
39 |         ],
40 |       };
41 |     } catch (error) {
42 |       if (error instanceof Error) {
43 |         if (error.message.includes('unauthorized')) {
44 |           throw new McpError(
45 |             ErrorCode.InvalidRequest,
46 |             'Failed to authenticate with Qdrant cloud while searching'
47 |           );
48 |         } else if (error.message.includes('ECONNREFUSED') || error.message.includes('ETIMEDOUT')) {
49 |           throw new McpError(
50 |             ErrorCode.InternalError,
51 |             'Connection to Qdrant cloud failed while searching'
52 |           );
53 |         }
54 |       }
55 |       return {
56 |         content: [
57 |           {
58 |             type: 'text',
59 |             text: `Search failed: ${error}`,
60 |           },
61 |         ],
62 |         isError: true,
63 |       };
64 |     }
65 |   }
66 | }
67 | 


--------------------------------------------------------------------------------
/src/handlers/remove-documentation.ts:
--------------------------------------------------------------------------------
 1 | import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
 2 | import { BaseHandler } from './base-handler.js';
 3 | import { McpToolResponse } from '../types.js';
 4 | 
 5 | const COLLECTION_NAME = 'documentation';
 6 | 
 7 | export class RemoveDocumentationHandler extends BaseHandler {
 8 |   async handle(args: any, callContext?: { progressToken?: string | number, requestId: string | number }): Promise<McpToolResponse> {
 9 |     if (!args.urls || !Array.isArray(args.urls) || args.urls.length === 0) {
10 |       throw new McpError(ErrorCode.InvalidParams, 'urls must be a non-empty array');
11 |     }
12 | 
13 |     if (!args.urls.every((url: string) => typeof url === 'string')) {
14 |       throw new McpError(ErrorCode.InvalidParams, 'All URLs must be strings');
15 |     }
16 | 
17 |     try {
18 |       // Delete using filter to match any of the provided URLs
19 |       const result = await this.apiClient.qdrantClient.delete(COLLECTION_NAME, {
20 |         filter: {
21 |           should: args.urls.map((url: string) => ({
22 |             key: 'url',
23 |             match: { value: url }
24 |           }))
25 |         },
26 |         wait: true // Ensure deletion is complete before responding
27 |       });
28 | 
29 |       if (!['acknowledged', 'completed'].includes(result.status)) {
30 |         throw new Error('Delete operation failed');
31 |       }
32 | 
33 |       return {
34 |         content: [
35 |           {
36 |             type: 'text',
37 |             text: `Successfully removed documentation from ${args.urls.length} source${args.urls.length > 1 ? 's' : ''}: ${args.urls.join(', ')}`,
38 |           },
39 |         ],
40 |       };
41 |     } catch (error) {
42 |       if (error instanceof Error) {
43 |         if (error.message.includes('unauthorized')) {
44 |           throw new McpError(
45 |             ErrorCode.InvalidRequest,
46 |             'Failed to authenticate with Qdrant cloud while removing documentation'
47 |           );
48 |         } else if (error.message.includes('ECONNREFUSED') || error.message.includes('ETIMEDOUT')) {
49 |           throw new McpError(
50 |             ErrorCode.InternalError,
51 |             'Connection to Qdrant cloud failed while removing documentation'
52 |           );
53 |         }
54 |       }
55 |       return {
56 |         content: [
57 |           {
58 |             type: 'text',
59 |             text: `Failed to remove documentation: ${error}`,
60 |           },
61 |         ],
62 |         isError: true,
63 |       };
64 |     }
65 |   }
66 | }
67 | 


--------------------------------------------------------------------------------
/src/handlers/remove-repository.ts:
--------------------------------------------------------------------------------
 1 | import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
 2 | import { BaseHandler } from './base-handler.js';
 3 | import { McpToolResponse } from '../types.js';
 4 | import fs from 'fs/promises';
 5 | import path from 'path';
 6 | import { RepositoryConfigLoader } from '../utils/repository-config-loader.js';
 7 | 
 8 | const REPO_CONFIG_DIR = path.join(process.cwd(), 'repo-configs');
 9 | const COLLECTION_NAME = 'documentation';
10 | 
11 | export class RemoveRepositoryHandler extends BaseHandler {
12 |   async handle(args: any, callContext?: { progressToken?: string | number, requestId: string | number }): Promise<McpToolResponse> {
13 |     if (!args.name || typeof args.name !== 'string') {
14 |       throw new McpError(ErrorCode.InvalidParams, 'Repository name is required');
15 |     }
16 | 
17 |     const repoName = args.name;
18 |     const configPath = path.join(REPO_CONFIG_DIR, `${repoName}.json`);
19 | 
20 |     try {
21 |       // Check if the repository config exists
22 |       try {
23 |         await fs.access(configPath);
24 |       } catch {
25 |         throw new McpError(ErrorCode.InvalidParams, `Repository not found: ${repoName}`);
26 |       }
27 | 
28 |       // Read the config to get repository details
29 |       const configContent = await fs.readFile(configPath, 'utf-8');
30 |       const config = JSON.parse(configContent);
31 | 
32 |       // Remove the repository config file
33 |       await fs.unlink(configPath);
34 | 
35 |       // Update the repositories.json configuration file
36 |       const configLoader = new RepositoryConfigLoader(this.server, this.apiClient);
37 |       await configLoader.removeRepositoryFromConfig(repoName);
38 | 
39 |       // Remove repository documents from the vector database
40 |       const result = await this.apiClient.qdrantClient.delete(COLLECTION_NAME, {
41 |         filter: {
42 |           must: [
43 |             {
44 |               key: 'repository',
45 |               match: { value: repoName }
46 |             },
47 |             {
48 |               key: 'isRepositoryFile',
49 |               match: { value: true }
50 |             }
51 |           ]
52 |         },
53 |         wait: true
54 |       });
55 | 
56 |       return {
57 |         content: [
58 |           {
59 |             type: 'text',
60 |             text: `Successfully removed repository: ${repoName} (${config.path})`,
61 |           },
62 |         ],
63 |       };
64 |     } catch (error) {
65 |       if (error instanceof McpError) {
66 |         throw error;
67 |       }
68 |       return {
69 |         content: [
70 |           {
71 |             type: 'text',
72 |             text: `Failed to remove repository: ${error}`,
73 |           },
74 |         ],
75 |         isError: true,
76 |       };
77 |     }
78 |   }
79 | }
80 | 


--------------------------------------------------------------------------------
/src/tools/list-sources.ts:
--------------------------------------------------------------------------------
 1 | import { BaseTool } from './base-tool.js';
 2 | import { ToolDefinition, McpToolResponse, isDocumentPayload } from '../types.js';
 3 | import { ApiClient } from '../api-client.js';
 4 | import { ErrorCode, McpError } from '@modelcontextprotocol/sdk/types.js';
 5 | 
 6 | const COLLECTION_NAME = 'documentation';
 7 | 
 8 | export class ListSourcesTool extends BaseTool {
 9 |   private apiClient: ApiClient;
10 | 
11 |   constructor(apiClient: ApiClient) {
12 |     super();
13 |     this.apiClient = apiClient;
14 |   }
15 | 
16 |   get definition(): ToolDefinition {
17 |     return {
18 |       name: 'list_sources',
19 |       description: 'List all documentation sources currently stored',
20 |       inputSchema: {
21 |         type: 'object',
22 |         properties: {},
23 |         required: [],
24 |       },
25 |     };
26 |   }
27 | 
28 |   async execute(args: any): Promise<McpToolResponse> {
29 |     try {
30 |       // Use pagination for better performance with large datasets
31 |       const pageSize = 100;
32 |       let offset: string | null = null;
33 |       const sources = new Set<string>();
34 |       
35 |       while (true) {
36 |         const scroll = await this.apiClient.qdrantClient.scroll(COLLECTION_NAME, {
37 |           with_payload: true,
38 |           with_vector: false, // Optimize network transfer
39 |           limit: pageSize,
40 |           offset,
41 |         });
42 | 
43 |         if (scroll.points.length === 0) break;
44 |         
45 |         for (const point of scroll.points) {
46 |           if (isDocumentPayload(point.payload)) {
47 |             sources.add(`${point.payload.title} (${point.payload.url})`);
48 |           }
49 |         }
50 | 
51 |         if (scroll.points.length < pageSize) break;
52 |         offset = scroll.points[scroll.points.length - 1].id as string;
53 |       }
54 | 
55 |       return {
56 |         content: [
57 |           {
58 |             type: 'text',
59 |             text: Array.from(sources).join('\n') || 'No documentation sources found in the cloud collection.',
60 |           },
61 |         ],
62 |       };
63 |     } catch (error) {
64 |       if (error instanceof Error) {
65 |         if (error.message.includes('unauthorized')) {
66 |           throw new McpError(
67 |             ErrorCode.InvalidRequest,
68 |             'Failed to authenticate with Qdrant cloud while listing sources'
69 |           );
70 |         } else if (error.message.includes('ECONNREFUSED') || error.message.includes('ETIMEDOUT')) {
71 |           throw new McpError(
72 |             ErrorCode.InternalError,
73 |             'Connection to Qdrant cloud failed while listing sources'
74 |           );
75 |         }
76 |       }
77 |       return {
78 |         content: [
79 |           {
80 |             type: 'text',
81 |             text: `Failed to list sources: ${error}`,
82 |           },
83 |         ],
84 |         isError: true,
85 |       };
86 |     }
87 |   }
88 | }


--------------------------------------------------------------------------------
/src/handlers/list-repositories.ts:
--------------------------------------------------------------------------------
 1 | import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
 2 | import { BaseHandler } from './base-handler.js';
 3 | import { McpToolResponse, RepositoryConfig } from '../types.js';
 4 | import fs from 'fs/promises';
 5 | import path from 'path';
 6 | 
 7 | const REPO_CONFIG_DIR = path.join(process.cwd(), 'repo-configs');
 8 | 
 9 | export class ListRepositoriesHandler extends BaseHandler {
10 |   async handle(_args: any, callContext?: { progressToken?: string | number, requestId: string | number }): Promise<McpToolResponse> {
11 |     try {
12 |       // Ensure the config directory exists
13 |       try {
14 |         await fs.mkdir(REPO_CONFIG_DIR, { recursive: true });
15 |       } catch (error) {
16 |         console.error('Error creating repository config directory:', error);
17 |       }
18 | 
19 |       // Get all repository config files
20 |       let configFiles: string[];
21 |       try {
22 |         configFiles = await fs.readdir(REPO_CONFIG_DIR);
23 |       } catch (error) {
24 |         return {
25 |           content: [
26 |             {
27 |               type: 'text',
28 |               text: 'No repositories found (config directory is empty)',
29 |             },
30 |           ],
31 |         };
32 |       }
33 | 
34 |       // Filter for JSON files
35 |       configFiles = configFiles.filter(file => file.endsWith('.json'));
36 | 
37 |       if (configFiles.length === 0) {
38 |         return {
39 |           content: [
40 |             {
41 |               type: 'text',
42 |               text: 'No repositories found',
43 |             },
44 |           ],
45 |         };
46 |       }
47 | 
48 |       // Load each repository config
49 |       const repositories: RepositoryConfig[] = [];
50 |       for (const file of configFiles) {
51 |         try {
52 |           const configPath = path.join(REPO_CONFIG_DIR, file);
53 |           const configContent = await fs.readFile(configPath, 'utf-8');
54 |           const config = JSON.parse(configContent) as RepositoryConfig;
55 |           repositories.push(config);
56 |         } catch (error) {
57 |           console.error(`Error loading repository config ${file}:`, error);
58 |         }
59 |       }
60 | 
61 |       // Format the response
62 |       const repoList = repositories.map(repo => {
63 |         return `- ${repo.name} (${repo.path})
64 |   Include: ${repo.include.join(', ')}
65 |   Exclude: ${repo.exclude.join(', ')}
66 |   Watch Mode: ${repo.watchMode ? 'Enabled' : 'Disabled'}
67 |   File Types: ${Object.keys(repo.fileTypeConfig).length} configured`;
68 |       });
69 | 
70 |       return {
71 |         content: [
72 |           {
73 |             type: 'text',
74 |             text: repositories.length > 0
75 |               ? `Found ${repositories.length} repositories:\n\n${repoList.join('\n\n')}`
76 |               : 'No valid repositories found',
77 |           },
78 |         ],
79 |       };
80 |     } catch (error) {
81 |       return {
82 |         content: [
83 |           {
84 |             type: 'text',
85 |             text: `Failed to list repositories: ${error}`,
86 |           },
87 |         ],
88 |         isError: true,
89 |       };
90 |     }
91 |   }
92 | }
93 | 


--------------------------------------------------------------------------------
/src/utils/language-detection.ts:
--------------------------------------------------------------------------------
  1 | import path from 'path';
  2 | 
  3 | // Map of file extensions to language names
  4 | const EXTENSION_TO_LANGUAGE: Record<string, string> = {
  5 |   // JavaScript and TypeScript
  6 |   '.js': 'javascript',
  7 |   '.jsx': 'javascript',
  8 |   '.ts': 'typescript',
  9 |   '.tsx': 'typescript',
 10 |   
 11 |   // Web
 12 |   '.html': 'html',
 13 |   '.css': 'css',
 14 |   '.scss': 'scss',
 15 |   '.less': 'less',
 16 |   
 17 |   // Python
 18 |   '.py': 'python',
 19 |   '.ipynb': 'jupyter',
 20 |   
 21 |   // Java and JVM languages
 22 |   '.java': 'java',
 23 |   '.kt': 'kotlin',
 24 |   '.scala': 'scala',
 25 |   '.groovy': 'groovy',
 26 |   
 27 |   // C-family
 28 |   '.c': 'c',
 29 |   '.cpp': 'cpp',
 30 |   '.cc': 'cpp',
 31 |   '.h': 'c',
 32 |   '.hpp': 'cpp',
 33 |   '.cs': 'csharp',
 34 |   
 35 |   // Ruby
 36 |   '.rb': 'ruby',
 37 |   '.erb': 'ruby',
 38 |   
 39 |   // PHP
 40 |   '.php': 'php',
 41 |   
 42 |   // Go
 43 |   '.go': 'go',
 44 |   
 45 |   // Rust
 46 |   '.rs': 'rust',
 47 |   
 48 |   // Swift
 49 |   '.swift': 'swift',
 50 |   
 51 |   // Shell
 52 |   '.sh': 'bash',
 53 |   '.bash': 'bash',
 54 |   '.zsh': 'bash',
 55 |   
 56 |   // Data formats
 57 |   '.json': 'json',
 58 |   '.xml': 'xml',
 59 |   '.yaml': 'yaml',
 60 |   '.yml': 'yaml',
 61 |   '.toml': 'toml',
 62 |   
 63 |   // Documentation
 64 |   '.md': 'markdown',
 65 |   '.markdown': 'markdown',
 66 |   '.rst': 'restructuredtext',
 67 |   '.txt': 'text',
 68 |   
 69 |   // Configuration
 70 |   '.ini': 'ini',
 71 |   '.cfg': 'ini',
 72 |   '.conf': 'ini',
 73 |   '.properties': 'properties',
 74 |   
 75 |   // Other
 76 |   '.sql': 'sql',
 77 |   '.graphql': 'graphql',
 78 |   '.proto': 'protobuf',
 79 |   '.dockerfile': 'dockerfile',
 80 |   '.Dockerfile': 'dockerfile',
 81 | };
 82 | 
 83 | // Shebang patterns for script files
 84 | const SHEBANG_PATTERNS: Array<[RegExp, string]> = [
 85 |   [/^#!.*\bpython\b/, 'python'],
 86 |   [/^#!.*\bnode\b/, 'javascript'],
 87 |   [/^#!.*\bbash\b/, 'bash'],
 88 |   [/^#!.*\bsh\b/, 'bash'],
 89 |   [/^#!.*\bruby\b/, 'ruby'],
 90 |   [/^#!.*\bperl\b/, 'perl'],
 91 |   [/^#!.*\bphp\b/, 'php'],
 92 | ];
 93 | 
 94 | /**
 95 |  * Detect the programming language of a file based on its extension and content
 96 |  * 
 97 |  * @param filePath The path to the file
 98 |  * @param content The content of the file
 99 |  * @returns The detected language or 'unknown'
100 |  */
101 | export function detectLanguage(filePath: string, content: string): string {
102 |   const extension = path.extname(filePath).toLowerCase();
103 |   
104 |   // Check if we have a direct mapping for this extension
105 |   if (extension in EXTENSION_TO_LANGUAGE) {
106 |     return EXTENSION_TO_LANGUAGE[extension];
107 |   }
108 |   
109 |   // Special case for Dockerfiles
110 |   if (path.basename(filePath) === 'Dockerfile') {
111 |     return 'dockerfile';
112 |   }
113 |   
114 |   // Check for shebang in the first line for script files
115 |   const firstLine = content.split('\n')[0];
116 |   for (const [pattern, language] of SHEBANG_PATTERNS) {
117 |     if (pattern.test(firstLine)) {
118 |       return language;
119 |     }
120 |   }
121 |   
122 |   // Default to 'unknown' if we couldn't determine the language
123 |   return 'unknown';
124 | }
125 | 


--------------------------------------------------------------------------------
/src/tools/search-documentation.ts:
--------------------------------------------------------------------------------
 1 | import { BaseTool } from './base-tool.js';
 2 | import { ToolDefinition, McpToolResponse, isDocumentPayload } from '../types.js';
 3 | import { ApiClient } from '../api-client.js';
 4 | import { ErrorCode, McpError } from '@modelcontextprotocol/sdk/types.js';
 5 | 
 6 | const COLLECTION_NAME = 'documentation';
 7 | 
 8 | export class SearchDocumentationTool extends BaseTool {
 9 |   private apiClient: ApiClient;
10 | 
11 |   constructor(apiClient: ApiClient) {
12 |     super();
13 |     this.apiClient = apiClient;
14 |   }
15 | 
16 |   get definition(): ToolDefinition {
17 |     return {
18 |       name: 'search_documentation',
19 |       description: 'Search through stored documentation',
20 |       inputSchema: {
21 |         type: 'object',
22 |         properties: {
23 |           query: {
24 |             type: 'string',
25 |             description: 'Search query',
26 |           },
27 |           limit: {
28 |             type: 'number',
29 |             description: 'Maximum number of results to return',
30 |             default: 5,
31 |           },
32 |         },
33 |         required: ['query'],
34 |       },
35 |     };
36 |   }
37 | 
38 |   async execute(args: any): Promise<McpToolResponse> {
39 |     if (!args.query || typeof args.query !== 'string') {
40 |       throw new McpError(ErrorCode.InvalidParams, 'Query is required');
41 |     }
42 | 
43 |     const limit = args.limit || 5;
44 | 
45 |     try {
46 |       const queryEmbedding = await this.apiClient.getEmbeddings(args.query);
47 |       
48 |       const searchResults = await this.apiClient.qdrantClient.search(COLLECTION_NAME, {
49 |         vector: queryEmbedding,
50 |         limit,
51 |         with_payload: true,
52 |         with_vector: false, // Optimize network transfer by not retrieving vectors
53 |         score_threshold: 0.7, // Only return relevant results
54 |       });
55 | 
56 |       const formattedResults = searchResults.map(result => {
57 |         if (!isDocumentPayload(result.payload)) {
58 |           throw new Error('Invalid payload type');
59 |         }
60 |         return `[${result.payload.title}](${result.payload.url})\nScore: ${result.score.toFixed(3)}\nContent: ${result.payload.text}\n`;
61 |       }).join('\n---\n');
62 | 
63 |       return {
64 |         content: [
65 |           {
66 |             type: 'text',
67 |             text: formattedResults || 'No results found matching the query.',
68 |           },
69 |         ],
70 |       };
71 |     } catch (error) {
72 |       if (error instanceof Error) {
73 |         if (error.message.includes('unauthorized')) {
74 |           throw new McpError(
75 |             ErrorCode.InvalidRequest,
76 |             'Failed to authenticate with Qdrant cloud while searching'
77 |           );
78 |         } else if (error.message.includes('ECONNREFUSED') || error.message.includes('ETIMEDOUT')) {
79 |           throw new McpError(
80 |             ErrorCode.InternalError,
81 |             'Connection to Qdrant cloud failed while searching'
82 |           );
83 |         }
84 |       }
85 |       return {
86 |         content: [
87 |           {
88 |             type: 'text',
89 |             text: `Search failed: ${error}`,
90 |           },
91 |         ],
92 |         isError: true,
93 |       };
94 |     }
95 |   }
96 | }


--------------------------------------------------------------------------------
/src/tools/remove-documentation.ts:
--------------------------------------------------------------------------------
 1 | import { BaseTool } from './base-tool.js';
 2 | import { ToolDefinition, McpToolResponse } from '../types.js';
 3 | import { ApiClient } from '../api-client.js';
 4 | import { ErrorCode, McpError } from '@modelcontextprotocol/sdk/types.js';
 5 | 
 6 | const COLLECTION_NAME = 'documentation';
 7 | 
 8 | export class RemoveDocumentationTool extends BaseTool {
 9 |   private apiClient: ApiClient;
10 | 
11 |   constructor(apiClient: ApiClient) {
12 |     super();
13 |     this.apiClient = apiClient;
14 |   }
15 | 
16 |   get definition(): ToolDefinition {
17 |     return {
18 |       name: 'remove_documentation',
19 |       description: 'Remove one or more documentation sources by their URLs',
20 |       inputSchema: {
21 |         type: 'object',
22 |         properties: {
23 |           urls: {
24 |             type: 'array',
25 |             items: {
26 |               type: 'string',
27 |               description: 'URL of a documentation source to remove'
28 |             },
29 |             description: 'Array of URLs to remove. Can be a single URL or multiple URLs.',
30 |             minItems: 1
31 |           }
32 |         },
33 |         required: ['urls'],
34 |       },
35 |     };
36 |   }
37 | 
38 |   async execute(args: { urls: string[] }): Promise<McpToolResponse> {
39 |     if (!Array.isArray(args.urls) || args.urls.length === 0) {
40 |       throw new McpError(ErrorCode.InvalidParams, 'At least one URL is required');
41 |     }
42 | 
43 |     if (!args.urls.every(url => typeof url === 'string')) {
44 |       throw new McpError(ErrorCode.InvalidParams, 'All URLs must be strings');
45 |     }
46 | 
47 |     try {
48 |       // Delete using filter to match any of the provided URLs
49 |       const result = await this.apiClient.qdrantClient.delete(COLLECTION_NAME, {
50 |         filter: {
51 |           should: args.urls.map(url => ({
52 |             key: 'url',
53 |             match: { value: url }
54 |           }))
55 |         },
56 |         wait: true
57 |       });
58 | 
59 |       if (!['acknowledged', 'completed'].includes(result.status)) {
60 |         throw new Error('Delete operation failed');
61 |       }
62 | 
63 |       return {
64 |         content: [
65 |           {
66 |             type: 'text',
67 |             text: `Successfully removed documentation from ${args.urls.length} source${args.urls.length > 1 ? 's' : ''}: ${args.urls.join(', ')}`,
68 |           },
69 |         ],
70 |       };
71 |     } catch (error) {
72 |       if (error instanceof Error) {
73 |         if (error.message.includes('unauthorized')) {
74 |           throw new McpError(
75 |             ErrorCode.InvalidRequest,
76 |             'Failed to authenticate with Qdrant cloud while removing documentation'
77 |           );
78 |         } else if (error.message.includes('ECONNREFUSED') || error.message.includes('ETIMEDOUT')) {
79 |           throw new McpError(
80 |             ErrorCode.InternalError,
81 |             'Connection to Qdrant cloud failed while removing documentation'
82 |           );
83 |         }
84 |       }
85 |       return {
86 |         content: [
87 |           {
88 |             type: 'text',
89 |             text: `Failed to remove documentation: ${error}`,
90 |           },
91 |         ],
92 |         isError: true,
93 |       };
94 |     }
95 |   }
96 | }


--------------------------------------------------------------------------------
/src/handlers/run-queue.ts:
--------------------------------------------------------------------------------
 1 | import { Server } from '@modelcontextprotocol/sdk/server/index.js';
 2 | import { ApiClient } from '../api-client.js';
 3 | import { BaseHandler } from './base-handler.js';
 4 | import { McpToolResponse } from '../types.js';
 5 | import { AddDocumentationHandler } from './add-documentation.js';
 6 | import fs from 'fs/promises';
 7 | import path from 'path';
 8 | import { fileURLToPath } from 'url';
 9 | 
10 | // Get current directory in ES modules
11 | const __filename = fileURLToPath(import.meta.url);
12 | const __dirname = path.dirname(__filename);
13 | const QUEUE_FILE = path.join(__dirname, '..', '..', 'queue.txt');
14 | 
15 | export class RunQueueHandler extends BaseHandler {
16 |   private addDocHandler: AddDocumentationHandler;
17 | 
18 |   constructor(server: Server, apiClient: ApiClient) {
19 |     super(server, apiClient);
20 |     this.addDocHandler = new AddDocumentationHandler(server, apiClient);
21 |   }
22 | 
23 |   async handle(_args: any, callContext?: { progressToken?: string | number, requestId: string | number }): Promise<McpToolResponse> {
24 |     try {
25 |       // Check if queue file exists
26 |       try {
27 |         await fs.access(QUEUE_FILE);
28 |       } catch {
29 |         return {
30 |           content: [
31 |             {
32 |               type: 'text',
33 |               text: 'Queue is empty (queue file does not exist)',
34 |             },
35 |           ],
36 |         };
37 |       }
38 | 
39 |       let processedCount = 0;
40 |       let failedCount = 0;
41 |       const failedUrls: string[] = [];
42 | 
43 |       while (true) {
44 |         // Read current queue
45 |         const content = await fs.readFile(QUEUE_FILE, 'utf-8');
46 |         const urls = content.split('\n').filter(url => url.trim() !== '');
47 | 
48 |         if (urls.length === 0) {
49 |           break; // Queue is empty
50 |         }
51 | 
52 |         const currentUrl = urls[0]; // Get first URL
53 |         
54 |         try {
55 |           // Process the URL using add_documentation handler
56 |           // Pass the callContext along if it exists
57 |           await this.addDocHandler.handle({ url: currentUrl }, callContext);
58 |           processedCount++;
59 |         } catch (error) {
60 |           failedCount++;
61 |           failedUrls.push(currentUrl);
62 |           console.error(`Failed to process URL ${currentUrl}:`, error);
63 |         }
64 | 
65 |         // Remove the processed URL from queue
66 |         const remainingUrls = urls.slice(1);
67 |         await fs.writeFile(QUEUE_FILE, remainingUrls.join('\n') + (remainingUrls.length > 0 ? '\n' : ''));
68 |       }
69 | 
70 |       let resultText = `Queue processing complete.\nProcessed: ${processedCount} URLs\nFailed: ${failedCount} URLs`;
71 |       if (failedUrls.length > 0) {
72 |         resultText += `\n\nFailed URLs:\n${failedUrls.join('\n')}`;
73 |       }
74 | 
75 |       return {
76 |         content: [
77 |           {
78 |             type: 'text',
79 |             text: resultText,
80 |           },
81 |         ],
82 |       };
83 |     } catch (error) {
84 |       return {
85 |         content: [
86 |           {
87 |             type: 'text',
88 |             text: `Failed to process queue: ${error}`,
89 |           },
90 |         ],
91 |         isError: true,
92 |       };
93 |     }
94 |   }
95 | }
96 | 


--------------------------------------------------------------------------------
/src/handlers/extract-urls.ts:
--------------------------------------------------------------------------------
  1 | import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
  2 | import { BaseHandler } from './base-handler.js';
  3 | import { McpToolResponse } from '../types.js';
  4 | import * as cheerio from 'cheerio';
  5 | import fs from 'fs/promises';
  6 | import path from 'path';
  7 | import { fileURLToPath } from 'url';
  8 | 
  9 | // Get current directory in ES modules
 10 | const __filename = fileURLToPath(import.meta.url);
 11 | const __dirname = path.dirname(__filename);
 12 | const QUEUE_FILE = path.join(__dirname, '..', '..', 'queue.txt');
 13 | 
 14 | export class ExtractUrlsHandler extends BaseHandler {
 15 |   async handle(args: any, callContext?: { progressToken?: string | number, requestId: string | number }): Promise<McpToolResponse> {
 16 |     if (!args.url || typeof args.url !== 'string') {
 17 |       throw new McpError(ErrorCode.InvalidParams, 'URL is required');
 18 |     }
 19 | 
 20 |     await this.apiClient.initBrowser();
 21 |     const page = await this.apiClient.browser.newPage();
 22 | 
 23 |     try {
 24 |       const baseUrl = new URL(args.url);
 25 |       const basePath = baseUrl.pathname.split('/').slice(0, 3).join('/'); // Get the base path (e.g., /3/ for Python docs)
 26 | 
 27 |       await page.goto(args.url, { waitUntil: 'networkidle' });
 28 |       const content = await page.content();
 29 |       const $ = cheerio.load(content);
 30 |       const urls = new Set<string>();
 31 | 
 32 |       $('a[href]').each((_, element) => {
 33 |         const href = $(element).attr('href');
 34 |         if (href) {
 35 |           try {
 36 |             const url = new URL(href, args.url);
 37 |             // Only include URLs from the same documentation section
 38 |             if (url.hostname === baseUrl.hostname && 
 39 |                 url.pathname.startsWith(basePath) && 
 40 |                 !url.hash && 
 41 |                 !url.href.endsWith('#')) {
 42 |               urls.add(url.href);
 43 |             }
 44 |           } catch (e) {
 45 |             // Ignore invalid URLs
 46 |           }
 47 |         }
 48 |       });
 49 | 
 50 |       const urlArray = Array.from(urls);
 51 | 
 52 |       if (args.add_to_queue) {
 53 |         try {
 54 |           // Ensure queue file exists
 55 |           try {
 56 |             await fs.access(QUEUE_FILE);
 57 |           } catch {
 58 |             await fs.writeFile(QUEUE_FILE, '');
 59 |           }
 60 | 
 61 |           // Append URLs to queue
 62 |           const urlsToAdd = urlArray.join('\n') + (urlArray.length > 0 ? '\n' : '');
 63 |           await fs.appendFile(QUEUE_FILE, urlsToAdd);
 64 | 
 65 |           return {
 66 |             content: [
 67 |               {
 68 |                 type: 'text',
 69 |                 text: `Successfully added ${urlArray.length} URLs to the queue`,
 70 |               },
 71 |             ],
 72 |           };
 73 |         } catch (error) {
 74 |           return {
 75 |             content: [
 76 |               {
 77 |                 type: 'text',
 78 |                 text: `Failed to add URLs to queue: ${error}`,
 79 |               },
 80 |             ],
 81 |             isError: true,
 82 |           };
 83 |         }
 84 |       }
 85 | 
 86 |       return {
 87 |         content: [
 88 |           {
 89 |             type: 'text',
 90 |             text: urlArray.join('\n') || 'No URLs found on this page.',
 91 |           },
 92 |         ],
 93 |       };
 94 |     } catch (error) {
 95 |       return {
 96 |         content: [
 97 |           {
 98 |             type: 'text',
 99 |             text: `Failed to extract URLs: ${error}`,
100 |           },
101 |         ],
102 |         isError: true,
103 |       };
104 |     } finally {
105 |       await page.close();
106 |     }
107 |   }
108 | }
109 | 


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env node
  2 | import { Server } from "@modelcontextprotocol/sdk/server/index.js";
  3 | import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
  4 | import { ApiClient } from "./api-client.js";
  5 | import { HandlerRegistry } from "./handler-registry.js";
  6 | import { WebInterface } from "./server.js";
  7 | import { RepositoryConfigLoader } from "./utils/repository-config-loader.js";
  8 | 
  9 | const COLLECTION_NAME = "documentation";
 10 | 
 11 | class RagDocsServer {
 12 |   private server: Server;
 13 |   private apiClient: ApiClient;
 14 |   private handlerRegistry: HandlerRegistry;
 15 |   private webInterface: WebInterface;
 16 |   private repoConfigLoader: RepositoryConfigLoader;
 17 | 
 18 |   constructor() {
 19 |     this.server = new Server(
 20 |       {
 21 |         name: "mcp-ragdocs",
 22 |         version: "1.0.0",
 23 |       },
 24 |       {
 25 |         capabilities: {
 26 |           tools: {},
 27 |           prompts: {
 28 |             listChanged: false
 29 |           },
 30 |           resources: {
 31 |             listChanged: false
 32 |           },
 33 |         },
 34 |       }
 35 |     );
 36 | 
 37 |     this.apiClient = new ApiClient();
 38 |     this.handlerRegistry = new HandlerRegistry(this.server, this.apiClient);
 39 |     this.webInterface = new WebInterface(this.apiClient);
 40 |     this.repoConfigLoader = new RepositoryConfigLoader(this.server, this.apiClient);
 41 | 
 42 |     // Error handling
 43 |     this.server.onerror = (error) => console.error("[MCP Error]", error);
 44 |     process.on("SIGINT", async () => {
 45 |       await this.cleanup();
 46 |       process.exit(0);
 47 |     });
 48 |   }
 49 | 
 50 |   private async cleanup() {
 51 |     await this.apiClient.cleanup();
 52 |     await this.webInterface.stop();
 53 |     await this.server.close();
 54 |   }
 55 | 
 56 |   async run() {
 57 |     try {
 58 |       // Redirect console methods to stderr to avoid interfering with JSON-RPC communication
 59 |       const originalConsoleLog = console.log;
 60 |       const originalConsoleInfo = console.info;
 61 |       const originalConsoleWarn = console.warn;
 62 |       const originalConsoleError = console.error;
 63 | 
 64 |       console.log = (...args) => {
 65 |         process.stderr.write(args.map(arg => String(arg)).join(' ') + '\n');
 66 |       };
 67 |       console.info = (...args) => {
 68 |         process.stderr.write(args.map(arg => String(arg)).join(' ') + '\n');
 69 |       };
 70 |       console.warn = (...args) => {
 71 |         process.stderr.write(args.map(arg => String(arg)).join(' ') + '\n');
 72 |       };
 73 |       console.error = (...args) => {
 74 |         process.stderr.write(args.map(arg => String(arg)).join(' ') + '\n');
 75 |       };
 76 | 
 77 |       // Initialize Qdrant collection
 78 |       console.log("Initializing Qdrant collection...");
 79 |       await this.apiClient.initCollection(COLLECTION_NAME);
 80 |       console.log("Qdrant collection initialized successfully");
 81 | 
 82 |       // Start web interface
 83 |       await this.webInterface.start();
 84 |       console.log("Web interface is running");
 85 | 
 86 |       // Load repositories from configuration
 87 |       console.log("Loading repositories from configuration...");
 88 |       await this.repoConfigLoader.loadRepositories();
 89 | 
 90 |       // Start MCP server
 91 |       const transport = new StdioServerTransport();
 92 |       await this.server.connect(transport);
 93 |       console.log("RAG Docs MCP server running on stdio");
 94 |     } catch (error) {
 95 |       process.stderr.write(`Failed to initialize server: ${error}\n`);
 96 |       process.exit(1);
 97 |     }
 98 |   }
 99 | }
100 | 
101 | const server = new RagDocsServer();
102 | server.run().catch((error) => {
103 |   process.stderr.write(`Fatal error: ${error}\n`);
104 |   process.exit(1);
105 | });
106 | 


--------------------------------------------------------------------------------
/src/tools/run-queue.ts:
--------------------------------------------------------------------------------
  1 | import { BaseTool } from './base-tool.js';
  2 | import { ToolDefinition, McpToolResponse } from '../types.js';
  3 | import { ErrorCode, McpError } from '@modelcontextprotocol/sdk/types.js';
  4 | import fs from 'fs/promises';
  5 | import path from 'path';
  6 | import { fileURLToPath } from 'url';
  7 | import { ApiClient } from '../api-client.js';
  8 | import { AddDocumentationHandler } from '../handlers/add-documentation.js';
  9 | import { Server } from '@modelcontextprotocol/sdk/server/index.js';
 10 | 
 11 | // Get current directory in ES modules
 12 | const __filename = fileURLToPath(import.meta.url);
 13 | const __dirname = path.dirname(__filename);
 14 | const QUEUE_FILE = path.join(__dirname, '..', '..', 'queue.txt');
 15 | 
 16 | export class RunQueueTool extends BaseTool {
 17 |   private apiClient: ApiClient;
 18 |   private addDocHandler: AddDocumentationHandler;
 19 | 
 20 |   constructor(apiClient: ApiClient) {
 21 |     super();
 22 |     this.apiClient = apiClient;
 23 |     // Create a temporary server instance just for the handler
 24 |     const tempServer = new Server(
 25 |       { name: 'temp', version: '0.0.0' },
 26 |       { capabilities: { tools: {} } }
 27 |     );
 28 |     this.addDocHandler = new AddDocumentationHandler(tempServer, apiClient);
 29 |   }
 30 | 
 31 |   get definition(): ToolDefinition {
 32 |     return {
 33 |       name: 'run_queue',
 34 |       description: 'Process URLs from the queue one at a time until complete',
 35 |       inputSchema: {
 36 |         type: 'object',
 37 |         properties: {},
 38 |         required: [],
 39 |       },
 40 |     };
 41 |   }
 42 | 
 43 |   async execute(_args: any): Promise<McpToolResponse> {
 44 |     try {
 45 |       // Check if queue file exists
 46 |       try {
 47 |         await fs.access(QUEUE_FILE);
 48 |       } catch {
 49 |         return {
 50 |           content: [
 51 |             {
 52 |               type: 'text',
 53 |               text: 'Queue is empty (queue file does not exist)',
 54 |             },
 55 |           ],
 56 |         };
 57 |       }
 58 | 
 59 |       let processedCount = 0;
 60 |       let failedCount = 0;
 61 |       const failedUrls: string[] = [];
 62 | 
 63 |       while (true) {
 64 |         // Read current queue
 65 |         const content = await fs.readFile(QUEUE_FILE, 'utf-8');
 66 |         const urls = content.split('\n').filter(url => url.trim() !== '');
 67 | 
 68 |         if (urls.length === 0) {
 69 |           break; // Queue is empty
 70 |         }
 71 | 
 72 |         const currentUrl = urls[0]; // Get first URL
 73 |         
 74 |         try {
 75 |           // Process the URL using the handler
 76 |           await this.addDocHandler.handle({ url: currentUrl });
 77 |           processedCount++;
 78 |         } catch (error) {
 79 |           failedCount++;
 80 |           failedUrls.push(currentUrl);
 81 |           console.error(`Failed to process URL ${currentUrl}:`, error);
 82 |         }
 83 | 
 84 |         // Remove the processed URL from queue
 85 |         const remainingUrls = urls.slice(1);
 86 |         await fs.writeFile(QUEUE_FILE, remainingUrls.join('\n') + (remainingUrls.length > 0 ? '\n' : ''));
 87 |       }
 88 | 
 89 |       let resultText = `Queue processing complete.\nProcessed: ${processedCount} URLs\nFailed: ${failedCount} URLs`;
 90 |       if (failedUrls.length > 0) {
 91 |         resultText += `\n\nFailed URLs:\n${failedUrls.join('\n')}`;
 92 |       }
 93 | 
 94 |       return {
 95 |         content: [
 96 |           {
 97 |             type: 'text',
 98 |             text: resultText,
 99 |           },
100 |         ],
101 |       };
102 |     } catch (error) {
103 |       return {
104 |         content: [
105 |           {
106 |             type: 'text',
107 |             text: `Failed to process queue: ${error}`,
108 |           },
109 |         ],
110 |         isError: true,
111 |       };
112 |     }
113 |   }
114 | }
115 | 


--------------------------------------------------------------------------------
/src/tools/extract-urls.ts:
--------------------------------------------------------------------------------
  1 | import { ErrorCode, McpError } from "@modelcontextprotocol/sdk/types.js";
  2 | import * as cheerio from "cheerio";
  3 | import fs from "fs/promises";
  4 | import path from "path";
  5 | import { fileURLToPath } from "url";
  6 | import { ApiClient } from "../api-client.js";
  7 | import { McpToolResponse, ToolDefinition } from "../types.js";
  8 | import { BaseTool } from "./base-tool.js";
  9 | 
 10 | // Get current directory in ES modules
 11 | const __filename = fileURLToPath(import.meta.url);
 12 | const __dirname = path.dirname(__filename);
 13 | const QUEUE_FILE = path.join(__dirname, "..", "..", "queue.txt");
 14 | 
 15 | export class ExtractUrlsTool extends BaseTool {
 16 |   private apiClient: ApiClient;
 17 | 
 18 |   constructor(apiClient: ApiClient) {
 19 |     super();
 20 |     this.apiClient = apiClient;
 21 |   }
 22 | 
 23 |   get definition(): ToolDefinition {
 24 |     return {
 25 |       name: "extract_urls",
 26 |       description: "Extract all URLs from a given web page",
 27 |       inputSchema: {
 28 |         type: "object",
 29 |         properties: {
 30 |           url: {
 31 |             type: "string",
 32 |             description: "URL of the page to extract URLs from",
 33 |           },
 34 |           add_to_queue: {
 35 |             type: "boolean",
 36 |             description:
 37 |               "If true, automatically add extracted URLs to the queue",
 38 |             default: false,
 39 |           },
 40 |         },
 41 |         required: ["url"],
 42 |       },
 43 |     };
 44 |   }
 45 | 
 46 |   async execute(args: any): Promise<McpToolResponse> {
 47 |     if (!args.url || typeof args.url !== "string") {
 48 |       throw new McpError(ErrorCode.InvalidParams, "URL is required");
 49 |     }
 50 | 
 51 |     await this.apiClient.initBrowser();
 52 |     const page = await this.apiClient.browser.newPage();
 53 | 
 54 |     try {
 55 |       await page.goto(args.url, { waitUntil: "networkidle" });
 56 |       const content = await page.content();
 57 |       const $ = cheerio.load(content);
 58 |       const urls = new Set<string>();
 59 | 
 60 |       $("a[href]").each((_, element) => {
 61 |         const href = $(element).attr("href");
 62 |         if (href) {
 63 |           try {
 64 |             const url = new URL(href, args.url);
 65 |             // Only include URLs from the same domain to avoid external links
 66 |             if (
 67 |               url.origin === new URL(args.url).origin &&
 68 |               !url.hash &&
 69 |               !url.href.endsWith("#")
 70 |             ) {
 71 |               urls.add(url.href);
 72 |             }
 73 |           } catch (e) {
 74 |             // Ignore invalid URLs
 75 |           }
 76 |         }
 77 |       });
 78 | 
 79 |       const urlArray = Array.from(urls);
 80 | 
 81 |       if (args.add_to_queue) {
 82 |         try {
 83 |           // Ensure queue file exists
 84 |           try {
 85 |             await fs.access(QUEUE_FILE);
 86 |           } catch {
 87 |             await fs.writeFile(QUEUE_FILE, "");
 88 |           }
 89 | 
 90 |           // Append URLs to queue
 91 |           const urlsToAdd =
 92 |             urlArray.join("\n") + (urlArray.length > 0 ? "\n" : "");
 93 |           await fs.appendFile(QUEUE_FILE, urlsToAdd);
 94 | 
 95 |           return {
 96 |             content: [
 97 |               {
 98 |                 type: "text",
 99 |                 text: `Successfully added ${urlArray.length} URLs to the queue`,
100 |               },
101 |             ],
102 |           };
103 |         } catch (error) {
104 |           return {
105 |             content: [
106 |               {
107 |                 type: "text",
108 |                 text: `Failed to add URLs to queue: ${error}`,
109 |               },
110 |             ],
111 |             isError: true,
112 |           };
113 |         }
114 |       }
115 | 
116 |       return {
117 |         content: [
118 |           {
119 |             type: "text",
120 |             text: urlArray.join("\n") || "No URLs found on this page.",
121 |           },
122 |         ],
123 |       };
124 |     } catch (error) {
125 |       return {
126 |         content: [
127 |           {
128 |             type: "text",
129 |             text: `Failed to extract URLs: ${error}`,
130 |           },
131 |         ],
132 |         isError: true,
133 |       };
134 |     } finally {
135 |       await page.close();
136 |     }
137 |   }
138 | }
139 | 


--------------------------------------------------------------------------------
/src/api-client.ts:
--------------------------------------------------------------------------------
  1 | import { ErrorCode, McpError } from "@modelcontextprotocol/sdk/types.js";
  2 | import { QdrantClient } from "@qdrant/js-client-rest";
  3 | import { chromium } from "playwright";
  4 | import { EmbeddingService } from "./services/embeddings.js";
  5 | 
  6 | // Environment variables for configuration
  7 | const EMBEDDING_PROVIDER = process.env.EMBEDDING_PROVIDER || 'ollama';
  8 | const EMBEDDING_MODEL = process.env.EMBEDDING_MODEL;
  9 | const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
 10 | const FALLBACK_PROVIDER = process.env.FALLBACK_PROVIDER;
 11 | const FALLBACK_MODEL = process.env.FALLBACK_MODEL;
 12 | const QDRANT_URL = process.env.QDRANT_URL || 'http://localhost:6333';
 13 | const QDRANT_API_KEY = process.env.QDRANT_API_KEY;
 14 | 
 15 | if (!QDRANT_URL) {
 16 |   throw new Error(
 17 |     "QDRANT_URL environment variable is required for cloud storage"
 18 |   );
 19 | }
 20 | 
 21 | if ((EMBEDDING_PROVIDER === 'openai' || FALLBACK_PROVIDER === 'openai') && !OPENAI_API_KEY) {
 22 |   throw new Error(
 23 |     "OPENAI_API_KEY environment variable is required when using OpenAI as either primary or fallback provider"
 24 |   );
 25 | }
 26 | 
 27 | if (EMBEDDING_PROVIDER === 'ollama') {
 28 |   console.warn('Using Ollama as primary provider. Make sure Ollama is running locally.');
 29 | }
 30 | 
 31 | export class ApiClient {
 32 |   qdrantClient: QdrantClient;
 33 |   embeddingService: EmbeddingService;
 34 |   browser: any;
 35 |   vectorSize: number;
 36 | 
 37 |   constructor() {
 38 |     // Initialize Qdrant client with cloud configuration
 39 |     this.qdrantClient = new QdrantClient({
 40 |       url: QDRANT_URL,
 41 |       apiKey: QDRANT_API_KEY,
 42 |     });
 43 | 
 44 |     // Initialize embedding service with configured provider
 45 |     this.embeddingService = EmbeddingService.createFromConfig({
 46 |       provider: EMBEDDING_PROVIDER as 'ollama' | 'openai',
 47 |       apiKey: EMBEDDING_PROVIDER === 'openai' ? OPENAI_API_KEY : undefined,
 48 |       model: EMBEDDING_MODEL,
 49 |       fallbackProvider: FALLBACK_PROVIDER as 'ollama' | 'openai' | undefined,
 50 |       fallbackApiKey: FALLBACK_PROVIDER === 'openai' ? OPENAI_API_KEY : undefined,
 51 |       fallbackModel: FALLBACK_MODEL
 52 |     });
 53 | 
 54 |     this.vectorSize = this.embeddingService.getVectorSize();
 55 |   }
 56 | 
 57 |   async initBrowser() {
 58 |     if (!this.browser) {
 59 |       this.browser = await chromium.launch();
 60 |     }
 61 |   }
 62 | 
 63 |   async cleanup() {
 64 |     if (this.browser) {
 65 |       await this.browser.close();
 66 |     }
 67 |   }
 68 | 
 69 |   async getEmbeddings(text: string): Promise<number[]> {
 70 |     try {
 71 |       return await this.embeddingService.generateEmbeddings(text);
 72 |     } catch (error) {
 73 |       if (error instanceof McpError) {
 74 |         throw error;
 75 |       }
 76 |       throw new McpError(
 77 |         ErrorCode.InternalError,
 78 |         `Failed to generate embeddings: ${error}`
 79 |       );
 80 |     }
 81 |   }
 82 | 
 83 |   async initCollection(COLLECTION_NAME: string) {
 84 |     try {
 85 |       const collections = await this.qdrantClient.getCollections();
 86 |       const exists = collections.collections.some(
 87 |         (c) => c.name === COLLECTION_NAME
 88 |       );
 89 | 
 90 |       if (!exists) {
 91 |         await this.qdrantClient.createCollection(COLLECTION_NAME, {
 92 |           vectors: {
 93 |             size: this.vectorSize, // Dynamic size based on provider
 94 |             distance: "Cosine",
 95 |           },
 96 |           optimizers_config: {
 97 |             default_segment_number: 2,
 98 |             memmap_threshold: 20000,
 99 |           },
100 |           replication_factor: 2,
101 |         });
102 |       }
103 |     } catch (error) {
104 |       if (error instanceof Error) {
105 |         if (error.message.includes("unauthorized")) {
106 |           throw new McpError(
107 |             ErrorCode.InvalidRequest,
108 |             "Failed to authenticate with Qdrant cloud. Please check your API key."
109 |           );
110 |         } else if (
111 |           error.message.includes("ECONNREFUSED") ||
112 |           error.message.includes("ETIMEDOUT")
113 |         ) {
114 |           throw new McpError(
115 |             ErrorCode.InternalError,
116 |             "Failed to connect to Qdrant cloud. Please check your QDRANT_URL."
117 |           );
118 |         }
119 |       }
120 |       throw new McpError(
121 |         ErrorCode.InternalError,
122 |         `Failed to initialize Qdrant cloud collection: ${error}`
123 |       );
124 |     }
125 |   }
126 | }
127 | 


--------------------------------------------------------------------------------
/src/handlers/get-indexing-status.ts:
--------------------------------------------------------------------------------
  1 | import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
  2 | import { BaseHandler } from './base-handler.js';
  3 | import { McpToolResponse } from '../types.js';
  4 | import { IndexingStatusManager } from '../utils/indexing-status-manager.js';
  5 | 
  6 | export class GetIndexingStatusHandler extends BaseHandler {
  7 |   private statusManager: IndexingStatusManager;
  8 | 
  9 |   constructor(server: any, apiClient: any) {
 10 |     super(server, apiClient);
 11 |     this.statusManager = new IndexingStatusManager();
 12 |   }
 13 | 
 14 |   async handle(args: any): Promise<McpToolResponse> {
 15 |     // If name is provided, get status for specific repository
 16 |     if (args.name && typeof args.name === 'string') {
 17 |       const status = await this.statusManager.getStatus(args.name);
 18 |       
 19 |       if (!status) {
 20 |         return {
 21 |           content: [
 22 |             {
 23 |               type: 'text',
 24 |               text: `No indexing status found for repository: ${args.name}`,
 25 |             },
 26 |           ],
 27 |         };
 28 |       }
 29 | 
 30 |       // Format the status information
 31 |       const formattedStatus = this.formatStatus(status);
 32 |       
 33 |       return {
 34 |         content: [
 35 |           {
 36 |             type: 'text',
 37 |             text: formattedStatus,
 38 |           },
 39 |         ],
 40 |       };
 41 |     } 
 42 |     // Otherwise, get all statuses
 43 |     else {
 44 |       const allStatuses = await this.statusManager.getAllStatuses();
 45 |       
 46 |       if (allStatuses.length === 0) {
 47 |         return {
 48 |           content: [
 49 |             {
 50 |               type: 'text',
 51 |               text: 'No repository indexing operations found.',
 52 |             },
 53 |           ],
 54 |         };
 55 |       }
 56 | 
 57 |       // Format all statuses
 58 |       const formattedStatuses = allStatuses.map(status => this.formatStatus(status)).join('\n\n---\n\n');
 59 |       
 60 |       return {
 61 |         content: [
 62 |           {
 63 |             type: 'text',
 64 |             text: formattedStatuses,
 65 |           },
 66 |         ],
 67 |       };
 68 |     }
 69 |   }
 70 | 
 71 |   private formatStatus(status: any): string {
 72 |     const startTime = new Date(status.startTime).toLocaleString();
 73 |     const endTime = status.endTime ? new Date(status.endTime).toLocaleString() : 'In progress';
 74 |     const duration = status.endTime 
 75 |       ? this.formatDuration(new Date(status.endTime).getTime() - new Date(status.startTime).getTime())
 76 |       : this.formatDuration(Date.now() - new Date(status.startTime).getTime());
 77 |     
 78 |     let statusText = '';
 79 |     
 80 |     switch (status.status) {
 81 |       case 'pending':
 82 |         statusText = '⏳ Pending';
 83 |         break;
 84 |       case 'processing':
 85 |         statusText = '🔄 Processing';
 86 |         break;
 87 |       case 'completed':
 88 |         statusText = '✅ Completed';
 89 |         break;
 90 |       case 'failed':
 91 |         statusText = '❌ Failed';
 92 |         break;
 93 |       default:
 94 |         statusText = status.status;
 95 |     }
 96 | 
 97 |     let result = `Repository: ${status.repositoryName}\n`;
 98 |     result += `Status: ${statusText}\n`;
 99 |     result += `Progress: ${status.percentageComplete || 0}%\n`;
100 |     result += `Started: ${startTime}\n`;
101 |     
102 |     if (status.status === 'completed' || status.status === 'failed') {
103 |       result += `Ended: ${endTime}\n`;
104 |     }
105 |     
106 |     result += `Duration: ${duration}\n`;
107 |     
108 |     if (status.totalFiles !== undefined) {
109 |       result += `Files: ${status.processedFiles || 0} processed, ${status.skippedFiles || 0} skipped (of ${status.totalFiles})\n`;
110 |     }
111 |     
112 |     if (status.totalChunks !== undefined) {
113 |       result += `Chunks: ${status.indexedChunks || 0} indexed (of ${status.totalChunks})\n`;
114 |     }
115 |     
116 |     if (status.currentBatch !== undefined && status.totalBatches !== undefined) {
117 |       result += `Batch: ${status.currentBatch} of ${status.totalBatches}\n`;
118 |     }
119 |     
120 |     if (status.error) {
121 |       result += `Error: ${status.error}\n`;
122 |     }
123 |     
124 |     return result;
125 |   }
126 | 
127 |   private formatDuration(ms: number): string {
128 |     const seconds = Math.floor(ms / 1000);
129 |     const minutes = Math.floor(seconds / 60);
130 |     const hours = Math.floor(minutes / 60);
131 |     
132 |     if (hours > 0) {
133 |       return `${hours}h ${minutes % 60}m ${seconds % 60}s`;
134 |     } else if (minutes > 0) {
135 |       return `${minutes}m ${seconds % 60}s`;
136 |     } else {
137 |       return `${seconds}s`;
138 |     }
139 |   }
140 | }
141 | 


--------------------------------------------------------------------------------
/KNOWLEDGE.md:
--------------------------------------------------------------------------------
  1 | # Knowledge Base for MCP RAG Docs
  2 | 
  3 | ## Architecture
  4 | 
  5 | ### Handler Registry
  6 | The system uses a handler registry pattern to manage tools. The key components are:
  7 | 
  8 | 1. **HandlerRegistry Class** (`src/handler-registry.ts`):
  9 |    - Manages all tool handlers
 10 |    - Registers handlers with the MCP server
 11 |    - Defines tool schemas and descriptions
 12 | 
 13 | 2. **Handler Registration Process**:
 14 |    - Handlers are set up in the `setupHandlers` method
 15 |    - Tools are exposed to clients via the `ListToolsRequestSchema` handler
 16 |    - **Important**: Tools must be included in both places to be available to clients
 17 | 
 18 | 3. **Tool Definition Structure**:
 19 |    ```typescript
 20 |    {
 21 |      name: 'tool_name',
 22 |      description: 'Tool description...',
 23 |      inputSchema: {
 24 |        type: 'object',
 25 |        properties: {
 26 |          // Tool parameters
 27 |        },
 28 |        required: ['param1', 'param2']
 29 |      }
 30 |    } as ToolDefinition
 31 |    ```
 32 | 
 33 | ## Tools
 34 | 
 35 | ### Documentation Management Tools
 36 | 
 37 | 1. **add_documentation**:
 38 |    - Directly adds documentation from a URL
 39 |    - Processes content immediately
 40 |    - Chunks text and creates embeddings
 41 |    - Stores in Qdrant vector database
 42 |    - Required parameter: `url`
 43 | 
 44 | 2. **Queue-Based Processing**:
 45 |    - `extract_urls`: Extracts URLs from a page
 46 |    - `list_queue`: Shows pending URLs
 47 |    - `run_queue`: Processes all queued URLs
 48 |    - `clear_queue`: Empties the queue
 49 | 
 50 | ## Client Integration
 51 | 
 52 | ### Claude Desktop Configuration
 53 | Claude Desktop requires explicit configuration to recognize tools:
 54 | 
 55 | 1. **Tool Registration**: Tools must be properly registered in the server code
 56 | 2. **Auto-Approval**: Tools must be listed in the `autoApprove` array in the configuration
 57 | 3. **Configuration File**: Located at `claude_desktop_config.json`
 58 | 
 59 | ### Common Issues
 60 | - Tools registered as handlers but not included in the `ListToolsRequestSchema` response won't appear in clients
 61 | - Changes to tool definitions require server restart
 62 | - Client applications may cache tool listings, requiring restart
 63 | 
 64 | ## Troubleshooting
 65 | 
 66 | ### Missing Tools
 67 | If tools are missing from client applications:
 68 | 1. Check the tool is registered in `setupHandlers`
 69 | 2. Verify the tool is included in the `tools` array in the `ListToolsRequestSchema` handler
 70 | 3. Ensure the client configuration includes the tool in any approval lists
 71 | 4. Restart both server and client applications
 72 | 
 73 | ### Server Logs
 74 | Server logs provide valuable debugging information. MCP servers typically redirect `console.log`, `console.info`, `console.error`, etc., to `stderr` to avoid interfering with the JSON-RPC communication over `stdout`. When troubleshooting or monitoring:
 75 | - Check `stderr` output for logs from handlers (e.g., progress during long operations like repository indexing).
 76 | - Logs can reveal tool registration issues.
 77 | - Client connection details are often logged.
 78 | - Request/response patterns can be observed.
 79 | - For long-running tools like `add_repository` and `update_repository`:
 80 |   - Detailed progress logs are sent to `stderr` to act as a server-side heartbeat.
 81 |   - MCP `$/progress` notifications are sent to the client to prevent request timeouts and provide client-side progress updates.
 82 |   - **Timeout Issue Solution**: The timeout issue with large repositories has been addressed by implementing asynchronous processing:
 83 |     - Repository indexing now runs in the background after initial setup
 84 |     - The MCP request returns quickly with a success message, preventing timeout
 85 |     - A new `get_indexing_status` tool allows checking the progress of ongoing indexing operations
 86 |     - Batch size reduced from 100 to 50 chunks per batch for more frequent progress updates
 87 |     - Status tracking implemented via the `IndexingStatusManager` class
 88 |     - Detailed status information includes progress percentage, file counts, and timing data
 89 | 
 90 |   - **Implementation Details**:
 91 |     - Added `IndexingStatus` type to track indexing progress
 92 |     - Created `IndexingStatusManager` class to manage status persistence
 93 |     - Modified `LocalRepositoryHandler` to use asynchronous processing
 94 |     - Added `processRepositoryAsync` method that runs in the background
 95 |     - Created `GetIndexingStatusHandler` for checking indexing status
 96 |     - Updated documentation to reflect the new asynchronous approach
 97 | 
 98 |   - **Additional Improvements**:
 99 |     - More robust error handling in batch processing
100 |     - Better progress reporting with detailed status information
101 |     - Status persistence across server restarts
102 |     - Ability to monitor multiple concurrent indexing operations
103 | 


--------------------------------------------------------------------------------
/src/services/embeddings.ts:
--------------------------------------------------------------------------------
  1 | import ollama from 'ollama';
  2 | import OpenAI from 'openai';
  3 | import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
  4 | 
  5 | export interface EmbeddingProvider {
  6 |   generateEmbeddings(text: string): Promise<number[]>;
  7 |   getVectorSize(): number;
  8 | }
  9 | 
 10 | export class OllamaProvider implements EmbeddingProvider {
 11 |   private model: string;
 12 | 
 13 |   constructor(model: string = 'nomic-embed-text') {
 14 |     this.model = model;
 15 |   }
 16 | 
 17 |   async generateEmbeddings(text: string): Promise<number[]> {
 18 |     try {
 19 |       console.error('Generating Ollama embeddings for text:', text.substring(0, 50) + '...');
 20 |       const response = await ollama.embeddings({
 21 |         model: this.model,
 22 |         prompt: text
 23 |       });
 24 |       console.error('Successfully generated Ollama embeddings with size:', response.embedding.length);
 25 |       return response.embedding;
 26 |     } catch (error) {
 27 |       console.error('Ollama embedding error:', error);
 28 |       throw new McpError(
 29 |         ErrorCode.InternalError,
 30 |         `Failed to generate embeddings with Ollama: ${error}`
 31 |       );
 32 |     }
 33 |   }
 34 | 
 35 |   getVectorSize(): number {
 36 |     // nomic-embed-text produces 768-dimensional vectors
 37 |     return 768;
 38 |   }
 39 | }
 40 | 
 41 | export class OpenAIProvider implements EmbeddingProvider {
 42 |   private client: OpenAI;
 43 |   private model: string;
 44 | 
 45 |   constructor(apiKey: string, model: string = 'text-embedding-3-small') {
 46 |     this.client = new OpenAI({ apiKey });
 47 |     this.model = model;
 48 |   }
 49 | 
 50 |   async generateEmbeddings(text: string): Promise<number[]> {
 51 |     try {
 52 |       console.error('Generating OpenAI embeddings for text:', text.substring(0, 50) + '...');
 53 |       const response = await this.client.embeddings.create({
 54 |         model: this.model,
 55 |         input: text,
 56 |       });
 57 |       const embedding = response.data[0].embedding;
 58 |       console.error('Successfully generated OpenAI embeddings with size:', embedding.length);
 59 |       return embedding;
 60 |     } catch (error) {
 61 |       console.error('OpenAI embedding error:', error);
 62 |       throw new McpError(
 63 |         ErrorCode.InternalError,
 64 |         `Failed to generate embeddings with OpenAI: ${error}`
 65 |       );
 66 |     }
 67 |   }
 68 | 
 69 |   getVectorSize(): number {
 70 |     // text-embedding-3-small produces 1536-dimensional vectors
 71 |     return 1536;
 72 |   }
 73 | }
 74 | 
 75 | export class EmbeddingService {
 76 |   private provider: EmbeddingProvider;
 77 |   private fallbackProvider?: EmbeddingProvider;
 78 | 
 79 |   constructor(provider: EmbeddingProvider, fallbackProvider?: EmbeddingProvider) {
 80 |     this.provider = provider;
 81 |     this.fallbackProvider = fallbackProvider;
 82 |   }
 83 | 
 84 |   async generateEmbeddings(text: string): Promise<number[]> {
 85 |     try {
 86 |       return await this.provider.generateEmbeddings(text);
 87 |     } catch (error) {
 88 |       if (this.fallbackProvider) {
 89 |         console.error('Primary provider failed, trying fallback provider...');
 90 |         return this.fallbackProvider.generateEmbeddings(text);
 91 |       }
 92 |       throw error;
 93 |     }
 94 |   }
 95 | 
 96 |   getVectorSize(): number {
 97 |     return this.provider.getVectorSize();
 98 |   }
 99 | 
100 |   static createFromConfig(config: {
101 |     provider: 'ollama' | 'openai';
102 |     apiKey?: string;
103 |     model?: string;
104 |     fallbackProvider?: 'ollama' | 'openai';
105 |     fallbackApiKey?: string;
106 |     fallbackModel?: string;
107 |   }): EmbeddingService {
108 |     const primaryProvider = EmbeddingService.createProvider(
109 |       config.provider,
110 |       config.apiKey,
111 |       config.model
112 |     );
113 | 
114 |     let fallbackProvider: EmbeddingProvider | undefined;
115 |     if (config.fallbackProvider) {
116 |       fallbackProvider = EmbeddingService.createProvider(
117 |         config.fallbackProvider,
118 |         config.fallbackApiKey,
119 |         config.fallbackModel
120 |       );
121 |     }
122 | 
123 |     return new EmbeddingService(primaryProvider, fallbackProvider);
124 |   }
125 | 
126 |   private static createProvider(
127 |     provider: 'ollama' | 'openai',
128 |     apiKey?: string,
129 |     model?: string
130 |   ): EmbeddingProvider {
131 |     switch (provider) {
132 |       case 'ollama':
133 |         return new OllamaProvider(model);
134 |       case 'openai':
135 |         if (!apiKey) {
136 |           throw new McpError(
137 |             ErrorCode.InvalidParams,
138 |             'OpenAI API key is required'
139 |           );
140 |         }
141 |         return new OpenAIProvider(apiKey, model);
142 |       default:
143 |         throw new McpError(
144 |           ErrorCode.InvalidParams,
145 |           `Unknown embedding provider: ${provider}`
146 |         );
147 |     }
148 |   }
149 | }
150 | 


--------------------------------------------------------------------------------
/src/handlers/add-documentation.ts:
--------------------------------------------------------------------------------
  1 | import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
  2 | import { BaseHandler } from './base-handler.js';
  3 | import { DocumentChunk, McpToolResponse } from '../types.js';
  4 | import * as cheerio from 'cheerio';
  5 | import crypto from 'crypto';
  6 | 
  7 | const COLLECTION_NAME = 'documentation';
  8 | 
  9 | export class AddDocumentationHandler extends BaseHandler {
 10 |   async handle(args: any, callContext?: { progressToken?: string | number, requestId: string | number }): Promise<McpToolResponse> {
 11 |     if (!args.url || typeof args.url !== 'string') {
 12 |       throw new McpError(ErrorCode.InvalidParams, 'URL is required');
 13 |     }
 14 | 
 15 |     try {
 16 |       const chunks = await this.fetchAndProcessUrl(args.url);
 17 |       
 18 |       // Batch process chunks for better performance
 19 |       const batchSize = 100;
 20 |       for (let i = 0; i < chunks.length; i += batchSize) {
 21 |         const batch = chunks.slice(i, i + batchSize);
 22 |         const points = await Promise.all(
 23 |           batch.map(async (chunk) => {
 24 |             const embedding = await this.apiClient.getEmbeddings(chunk.text);
 25 |             return {
 26 |               id: this.generatePointId(),
 27 |               vector: embedding,
 28 |               payload: {
 29 |                 ...chunk,
 30 |                 _type: 'DocumentChunk' as const,
 31 |               } as Record<string, unknown>,
 32 |             };
 33 |           })
 34 |         );
 35 | 
 36 |         try {
 37 |           await this.apiClient.qdrantClient.upsert(COLLECTION_NAME, {
 38 |             wait: true,
 39 |             points,
 40 |           });
 41 |         } catch (error) {
 42 |           if (error instanceof Error) {
 43 |             if (error.message.includes('unauthorized')) {
 44 |               throw new McpError(
 45 |                 ErrorCode.InvalidRequest,
 46 |                 'Failed to authenticate with Qdrant cloud while adding documents'
 47 |               );
 48 |             } else if (error.message.includes('ECONNREFUSED') || error.message.includes('ETIMEDOUT')) {
 49 |               throw new McpError(
 50 |                 ErrorCode.InternalError,
 51 |                 'Connection to Qdrant cloud failed while adding documents'
 52 |               );
 53 |             }
 54 |           }
 55 |           throw error;
 56 |         }
 57 |       }
 58 | 
 59 |       return {
 60 |         content: [
 61 |           {
 62 |             type: 'text',
 63 |             text: `Successfully added documentation from ${args.url} (${chunks.length} chunks processed in ${Math.ceil(chunks.length / batchSize)} batches)`,
 64 |           },
 65 |         ],
 66 |       };
 67 |     } catch (error) {
 68 |       if (error instanceof McpError) {
 69 |         throw error;
 70 |       }
 71 |       return {
 72 |         content: [
 73 |           {
 74 |             type: 'text',
 75 |             text: `Failed to add documentation: ${error}`,
 76 |           },
 77 |         ],
 78 |         isError: true,
 79 |       };
 80 |     }
 81 |   }
 82 | 
 83 |   private async fetchAndProcessUrl(url: string): Promise<DocumentChunk[]> {
 84 |     await this.apiClient.initBrowser();
 85 |     const page = await this.apiClient.browser.newPage();
 86 |     
 87 |     try {
 88 |       await page.goto(url, { waitUntil: 'networkidle' });
 89 |       const content = await page.content();
 90 |       const $ = cheerio.load(content);
 91 |       
 92 |       // Remove script tags, style tags, and comments
 93 |       $('script').remove();
 94 |       $('style').remove();
 95 |       $('noscript').remove();
 96 |       
 97 |       // Extract main content
 98 |       const title = $('title').text() || url;
 99 |       const mainContent = $('main, article, .content, .documentation, body').text();
100 |       
101 |       // Split content into chunks
102 |       const chunks = this.chunkText(mainContent, 1000);
103 |       
104 |       return chunks.map(chunk => ({
105 |         text: chunk,
106 |         url,
107 |         title,
108 |         timestamp: new Date().toISOString(),
109 |       }));
110 |     } catch (error) {
111 |       throw new McpError(
112 |         ErrorCode.InternalError,
113 |         `Failed to fetch URL ${url}: ${error}`
114 |       );
115 |     } finally {
116 |       await page.close();
117 |     }
118 |   }
119 | 
120 |   private chunkText(text: string, maxChunkSize: number): string[] {
121 |     const words = text.split(/\s+/);
122 |     const chunks: string[] = [];
123 |     let currentChunk: string[] = [];
124 |     
125 |     for (const word of words) {
126 |       currentChunk.push(word);
127 |       const currentLength = currentChunk.join(' ').length;
128 |       
129 |       if (currentLength >= maxChunkSize) {
130 |         chunks.push(currentChunk.join(' '));
131 |         currentChunk = [];
132 |       }
133 |     }
134 |     
135 |     if (currentChunk.length > 0) {
136 |       chunks.push(currentChunk.join(' '));
137 |     }
138 |     
139 |     return chunks;
140 |   }
141 | 
142 |   private generatePointId(): string {
143 |     return crypto.randomBytes(16).toString('hex');
144 |   }
145 | }
146 | 


--------------------------------------------------------------------------------
/src/handlers/list-sources.ts:
--------------------------------------------------------------------------------
  1 | import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
  2 | import { BaseHandler } from './base-handler.js';
  3 | import { McpToolResponse, isDocumentPayload } from '../types.js';
  4 | 
  5 | const COLLECTION_NAME = 'documentation';
  6 | 
  7 | interface Source {
  8 |   title: string;
  9 |   url: string;
 10 | }
 11 | 
 12 | interface GroupedSources {
 13 |   [domain: string]: {
 14 |     [subdomain: string]: Source[];
 15 |   };
 16 | }
 17 | 
 18 | export class ListSourcesHandler extends BaseHandler {
 19 |   private groupSourcesByDomainAndSubdomain(sources: Source[]): GroupedSources {
 20 |     const grouped: GroupedSources = {};
 21 | 
 22 |     for (const source of sources) {
 23 |       try {
 24 |         const url = new URL(source.url);
 25 |         const domain = url.hostname;
 26 |         const pathParts = url.pathname.split('/').filter(p => p);
 27 |         const subdomain = pathParts[0] || '/';
 28 | 
 29 |         if (!grouped[domain]) {
 30 |           grouped[domain] = {};
 31 |         }
 32 |         if (!grouped[domain][subdomain]) {
 33 |           grouped[domain][subdomain] = [];
 34 |         }
 35 |         grouped[domain][subdomain].push(source);
 36 |       } catch (error) {
 37 |         console.error(`Invalid URL: ${source.url}`);
 38 |       }
 39 |     }
 40 | 
 41 |     return grouped;
 42 |   }
 43 | 
 44 |   private formatGroupedSources(grouped: GroupedSources): string {
 45 |     const output: string[] = [];
 46 |     let domainCounter = 1;
 47 | 
 48 |     for (const [domain, subdomains] of Object.entries(grouped)) {
 49 |       output.push(`${domainCounter}. ${domain}`);
 50 |       
 51 |       // Create a Set of unique URL+title combinations
 52 |       const uniqueSources = new Map<string, Source>();
 53 |       for (const sources of Object.values(subdomains)) {
 54 |         for (const source of sources) {
 55 |           uniqueSources.set(source.url, source);
 56 |         }
 57 |       }
 58 | 
 59 |       // Convert to array and sort
 60 |       const sortedSources = Array.from(uniqueSources.values())
 61 |         .sort((a, b) => a.title.localeCompare(b.title));
 62 | 
 63 |       // Use letters for subdomain entries
 64 |       sortedSources.forEach((source, index) => {
 65 |         output.push(`${domainCounter}.${index + 1}. ${source.title} (${source.url})`);
 66 |       });
 67 | 
 68 |       output.push(''); // Add blank line between domains
 69 |       domainCounter++;
 70 |     }
 71 | 
 72 |     return output.join('\n');
 73 |   }
 74 | 
 75 |   async handle(args?: any, callContext?: { progressToken?: string | number, requestId: string | number }): Promise<McpToolResponse> {
 76 |     try {
 77 |       await this.apiClient.initCollection(COLLECTION_NAME);
 78 |       
 79 |       const pageSize = 100;
 80 |       let offset = null;
 81 |       const sources: Source[] = [];
 82 |       
 83 |       while (true) {
 84 |         const scroll = await this.apiClient.qdrantClient.scroll(COLLECTION_NAME, {
 85 |           with_payload: true,
 86 |           with_vector: false,
 87 |           limit: pageSize,
 88 |           offset,
 89 |         });
 90 | 
 91 |         if (scroll.points.length === 0) break;
 92 |         
 93 |         for (const point of scroll.points) {
 94 |           if (point.payload && typeof point.payload === 'object' && 'url' in point.payload && 'title' in point.payload) {
 95 |             const payload = point.payload as any;
 96 |             sources.push({
 97 |               title: payload.title,
 98 |               url: payload.url
 99 |             });
100 |           }
101 |         }
102 | 
103 |         if (scroll.points.length < pageSize) break;
104 |         offset = scroll.points[scroll.points.length - 1].id;
105 |       }
106 | 
107 |       if (sources.length === 0) {
108 |         return {
109 |           content: [
110 |             {
111 |               type: 'text',
112 |               text: 'No documentation sources found.',
113 |             },
114 |           ],
115 |         };
116 |       }
117 | 
118 |       const grouped = this.groupSourcesByDomainAndSubdomain(sources);
119 |       const formattedOutput = this.formatGroupedSources(grouped);
120 | 
121 |       return {
122 |         content: [
123 |           {
124 |             type: 'text',
125 |             text: formattedOutput,
126 |           },
127 |         ],
128 |       };
129 |     } catch (error) {
130 |       if (error instanceof Error) {
131 |         if (error.message.includes('unauthorized')) {
132 |           throw new McpError(
133 |             ErrorCode.InvalidRequest,
134 |             'Failed to authenticate with Qdrant cloud while listing sources'
135 |           );
136 |         } else if (error.message.includes('ECONNREFUSED') || error.message.includes('ETIMEDOUT')) {
137 |           throw new McpError(
138 |             ErrorCode.InternalError,
139 |             'Connection to Qdrant cloud failed while listing sources'
140 |           );
141 |         }
142 |       }
143 |       return {
144 |         content: [
145 |           {
146 |             type: 'text',
147 |             text: `Failed to list sources: ${error}`,
148 |           },
149 |         ],
150 |         isError: true,
151 |       };
152 |     }
153 |   }
154 | }
155 | 


--------------------------------------------------------------------------------
/src/handlers/watch-repository.ts:
--------------------------------------------------------------------------------
  1 | import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
  2 | import { BaseHandler } from './base-handler.js';
  3 | import { McpToolResponse, RepositoryConfig } from '../types.js';
  4 | import fs from 'fs/promises';
  5 | import path from 'path';
  6 | import { RepositoryWatcher } from '../utils/repository-watcher.js';
  7 | import { UpdateRepositoryHandler } from './update-repository.js';
  8 | import { RepositoryConfigLoader } from '../utils/repository-config-loader.js';
  9 | 
 10 | const REPO_CONFIG_DIR = path.join(process.cwd(), 'repo-configs');
 11 | 
 12 | // Map to store active watchers
 13 | const activeWatchers = new Map<string, RepositoryWatcher>();
 14 | 
 15 | export class WatchRepositoryHandler extends BaseHandler {
 16 |   private updateHandler: UpdateRepositoryHandler;
 17 | 
 18 |   constructor(server: any, apiClient: any) {
 19 |     super(server, apiClient);
 20 |     this.updateHandler = new UpdateRepositoryHandler(server, apiClient);
 21 |   }
 22 | 
 23 |   async handle(args: any, callContext?: { progressToken?: string | number, requestId: string | number }): Promise<McpToolResponse> {
 24 |     if (!args.name || typeof args.name !== 'string') {
 25 |       throw new McpError(ErrorCode.InvalidParams, 'Repository name is required');
 26 |     }
 27 | 
 28 |     if (args.action !== 'start' && args.action !== 'stop') {
 29 |       throw new McpError(ErrorCode.InvalidParams, 'Action must be either "start" or "stop"');
 30 |     }
 31 | 
 32 |     const repoName = args.name;
 33 |     const configPath = path.join(REPO_CONFIG_DIR, `${repoName}.json`);
 34 | 
 35 |     try {
 36 |       // Check if the repository config exists
 37 |       try {
 38 |         await fs.access(configPath);
 39 |       } catch {
 40 |         throw new McpError(ErrorCode.InvalidParams, `Repository not found: ${repoName}`);
 41 |       }
 42 | 
 43 |       // Read the config
 44 |       const configContent = await fs.readFile(configPath, 'utf-8');
 45 |       const config = JSON.parse(configContent) as RepositoryConfig;
 46 | 
 47 |       if (args.action === 'start') {
 48 |         // Check if already watching
 49 |         if (activeWatchers.has(repoName)) {
 50 |           return {
 51 |             content: [
 52 |               {
 53 |                 type: 'text',
 54 |                 text: `Repository ${repoName} is already being watched`,
 55 |               },
 56 |             ],
 57 |           };
 58 |         }
 59 | 
 60 |         // Create a new watcher
 61 |         const watcher = new RepositoryWatcher(
 62 |           config,
 63 |           async (changedFiles, removedFiles) => {
 64 |             console.log(`Repository ${repoName} changed: ${changedFiles.length} files changed, ${removedFiles.length} files removed`);
 65 | 
 66 |             // Update the repository index
 67 |             if (changedFiles.length > 0 || removedFiles.length > 0) {
 68 |               try {
 69 |                 // Pass the callContext along if it exists
 70 |                 await this.updateHandler.handle({ name: repoName }, callContext);
 71 |                 console.log(`Repository ${repoName} index updated successfully`);
 72 |               } catch (error) {
 73 |                 console.error(`Failed to update repository ${repoName} index:`, error);
 74 |               }
 75 |             }
 76 |           }
 77 |         );
 78 | 
 79 |         // Start watching
 80 |         await watcher.start();
 81 |         activeWatchers.set(repoName, watcher);
 82 | 
 83 |         // Update the config to reflect watch mode
 84 |         config.watchMode = true;
 85 |         await fs.writeFile(configPath, JSON.stringify(config, null, 2), 'utf-8');
 86 | 
 87 |         // Update the repositories.json configuration file
 88 |         const configLoader = new RepositoryConfigLoader(this.server, this.apiClient);
 89 |         await configLoader.addRepositoryToConfig(config);
 90 | 
 91 |         return {
 92 |           content: [
 93 |             {
 94 |               type: 'text',
 95 |               text: `Started watching repository: ${repoName} (${config.path})`,
 96 |             },
 97 |           ],
 98 |         };
 99 |       } else {
100 |         // Stop watching
101 |         const watcher = activeWatchers.get(repoName);
102 |         if (!watcher) {
103 |           return {
104 |             content: [
105 |               {
106 |                 type: 'text',
107 |                 text: `Repository ${repoName} is not currently being watched`,
108 |               },
109 |             ],
110 |           };
111 |         }
112 | 
113 |         watcher.stop();
114 |         activeWatchers.delete(repoName);
115 | 
116 |         // Update the config to reflect watch mode
117 |         config.watchMode = false;
118 |         await fs.writeFile(configPath, JSON.stringify(config, null, 2), 'utf-8');
119 | 
120 |         // Update the repositories.json configuration file
121 |         const configLoader = new RepositoryConfigLoader(this.server, this.apiClient);
122 |         await configLoader.addRepositoryToConfig(config);
123 | 
124 |         return {
125 |           content: [
126 |             {
127 |               type: 'text',
128 |               text: `Stopped watching repository: ${repoName} (${config.path})`,
129 |             },
130 |           ],
131 |         };
132 |       }
133 |     } catch (error) {
134 |       if (error instanceof McpError) {
135 |         throw error;
136 |       }
137 |       return {
138 |         content: [
139 |           {
140 |             type: 'text',
141 |             text: `Failed to ${args.action} watching repository: ${error}`,
142 |           },
143 |         ],
144 |         isError: true,
145 |       };
146 |     }
147 |   }
148 | }
149 | 


--------------------------------------------------------------------------------
/src/utils/repository-watcher.ts:
--------------------------------------------------------------------------------
  1 | import fs from 'fs';
  2 | import path from 'path';
  3 | import { glob } from 'glob';
  4 | import crypto from 'crypto';
  5 | import { RepositoryConfig } from '../types.js';
  6 | 
  7 | interface FileState {
  8 |   path: string;
  9 |   hash: string;
 10 |   lastModified: number;
 11 | }
 12 | 
 13 | export class RepositoryWatcher {
 14 |   private config: RepositoryConfig;
 15 |   private fileStates: Map<string, FileState> = new Map();
 16 |   private watchInterval: NodeJS.Timeout | null = null;
 17 |   private onFileChanged: (changedFiles: string[], removedFiles: string[]) => Promise<void>;
 18 | 
 19 |   constructor(
 20 |     config: RepositoryConfig,
 21 |     onFileChanged: (changedFiles: string[], removedFiles: string[]) => Promise<void>
 22 |   ) {
 23 |     this.config = config;
 24 |     this.onFileChanged = onFileChanged;
 25 |   }
 26 | 
 27 |   /**
 28 |    * Start watching the repository for changes
 29 |    */
 30 |   async start(): Promise<void> {
 31 |     // Initialize the file states
 32 |     await this.initializeFileStates();
 33 | 
 34 |     // Start the watch interval
 35 |     this.watchInterval = setInterval(
 36 |       () => this.checkForChanges(),
 37 |       this.config.watchInterval
 38 |     );
 39 | 
 40 |     console.log(`Started watching repository: ${this.config.name} (${this.config.path})`);
 41 |   }
 42 | 
 43 |   /**
 44 |    * Stop watching the repository
 45 |    */
 46 |   stop(): void {
 47 |     if (this.watchInterval) {
 48 |       clearInterval(this.watchInterval);
 49 |       this.watchInterval = null;
 50 |       console.log(`Stopped watching repository: ${this.config.name}`);
 51 |     }
 52 |   }
 53 | 
 54 |   /**
 55 |    * Initialize the file states by scanning the repository
 56 |    */
 57 |   private async initializeFileStates(): Promise<void> {
 58 |     const files = await glob(this.config.include, {
 59 |       cwd: this.config.path,
 60 |       ignore: this.config.exclude,
 61 |       absolute: true,
 62 |       nodir: true,
 63 |     });
 64 | 
 65 |     for (const file of files) {
 66 |       try {
 67 |         const stats = fs.statSync(file);
 68 |         const content = fs.readFileSync(file, 'utf-8');
 69 |         const hash = this.hashContent(content);
 70 | 
 71 |         this.fileStates.set(file, {
 72 |           path: file,
 73 |           hash,
 74 |           lastModified: stats.mtimeMs,
 75 |         });
 76 |       } catch (error) {
 77 |         console.error(`Error initializing file state for ${file}:`, error);
 78 |       }
 79 |     }
 80 | 
 81 |     console.log(`Initialized file states for ${this.fileStates.size} files in repository: ${this.config.name}`);
 82 |   }
 83 | 
 84 |   /**
 85 |    * Check for changes in the repository
 86 |    */
 87 |   private async checkForChanges(): Promise<void> {
 88 |     try {
 89 |       const currentFiles = await glob(this.config.include, {
 90 |         cwd: this.config.path,
 91 |         ignore: this.config.exclude,
 92 |         absolute: true,
 93 |         nodir: true,
 94 |       });
 95 | 
 96 |       const currentFilePaths = new Set(currentFiles);
 97 |       const previousFilePaths = new Set(this.fileStates.keys());
 98 | 
 99 |       // Find added or modified files
100 |       const changedFiles: string[] = [];
101 |       for (const file of currentFiles) {
102 |         try {
103 |           const stats = fs.statSync(file);
104 |           const previousState = this.fileStates.get(file);
105 | 
106 |           // If the file is new or the modification time has changed
107 |           if (!previousState || previousState.lastModified !== stats.mtimeMs) {
108 |             const content = fs.readFileSync(file, 'utf-8');
109 |             const hash = this.hashContent(content);
110 | 
111 |             // If the file is new or the content has changed
112 |             if (!previousState || previousState.hash !== hash) {
113 |               changedFiles.push(file);
114 | 
115 |               // Update the file state
116 |               this.fileStates.set(file, {
117 |                 path: file,
118 |                 hash,
119 |                 lastModified: stats.mtimeMs,
120 |               });
121 |             } else if (previousState) {
122 |               // Update just the modification time if only that changed
123 |               this.fileStates.set(file, {
124 |                 ...previousState,
125 |                 lastModified: stats.mtimeMs,
126 |               });
127 |             }
128 |           }
129 |         } catch (error) {
130 |           console.error(`Error checking file ${file}:`, error);
131 |         }
132 |       }
133 | 
134 |       // Find removed files
135 |       const removedFiles: string[] = [];
136 |       for (const file of previousFilePaths) {
137 |         if (!currentFilePaths.has(file)) {
138 |           removedFiles.push(file);
139 |           this.fileStates.delete(file);
140 |         }
141 |       }
142 | 
143 |       // If there are changes, notify the callback
144 |       if (changedFiles.length > 0 || removedFiles.length > 0) {
145 |         console.log(`Detected changes in repository ${this.config.name}:`);
146 |         if (changedFiles.length > 0) {
147 |           console.log(`- Changed files: ${changedFiles.length}`);
148 |         }
149 |         if (removedFiles.length > 0) {
150 |           console.log(`- Removed files: ${removedFiles.length}`);
151 |         }
152 | 
153 |         await this.onFileChanged(changedFiles, removedFiles);
154 |       }
155 |     } catch (error) {
156 |       console.error(`Error checking for changes in repository ${this.config.name}:`, error);
157 |     }
158 |   }
159 | 
160 |   /**
161 |    * Generate a hash of the file content
162 |    */
163 |   private hashContent(content: string): string {
164 |     return crypto.createHash('md5').update(content).digest('hex');
165 |   }
166 | }
167 | 


--------------------------------------------------------------------------------
/src/utils/indexing-status-manager.ts:
--------------------------------------------------------------------------------
  1 | import fs from 'fs/promises';
  2 | import path from 'path';
  3 | import { fileURLToPath } from 'url';
  4 | import { IndexingStatus } from '../types.js';
  5 | 
  6 | const __dirname = path.dirname(fileURLToPath(import.meta.url));
  7 | const STATUS_DIR = path.join(__dirname, '..', 'indexing-status');
  8 | const STATUS_FILE_PREFIX = 'status-';
  9 | 
 10 | /**
 11 |  * Manages the status of repository indexing operations
 12 |  */
 13 | export class IndexingStatusManager {
 14 |   /**
 15 |    * Initialize the status manager
 16 |    */
 17 |   constructor() {
 18 |     this.ensureStatusDirectory();
 19 |   }
 20 | 
 21 |   /**
 22 |    * Create a new indexing status entry
 23 |    */
 24 |   async createStatus(repositoryName: string): Promise<IndexingStatus> {
 25 |     await this.ensureStatusDirectory();
 26 |     
 27 |     const status: IndexingStatus = {
 28 |       repositoryName,
 29 |       status: 'pending',
 30 |       startTime: new Date().toISOString(),
 31 |       lastUpdated: new Date().toISOString()
 32 |     };
 33 | 
 34 |     await this.saveStatus(status);
 35 |     return status;
 36 |   }
 37 | 
 38 |   /**
 39 |    * Update an existing indexing status
 40 |    */
 41 |   async updateStatus(status: Partial<IndexingStatus> & { repositoryName: string }): Promise<IndexingStatus> {
 42 |     const currentStatus = await this.getStatus(status.repositoryName);
 43 |     
 44 |     if (!currentStatus) {
 45 |       throw new Error(`No status found for repository: ${status.repositoryName}`);
 46 |     }
 47 | 
 48 |     const updatedStatus: IndexingStatus = {
 49 |       ...currentStatus,
 50 |       ...status,
 51 |       lastUpdated: new Date().toISOString()
 52 |     };
 53 | 
 54 |     await this.saveStatus(updatedStatus);
 55 |     return updatedStatus;
 56 |   }
 57 | 
 58 |   /**
 59 |    * Get the current status for a repository
 60 |    */
 61 |   async getStatus(repositoryName: string): Promise<IndexingStatus | null> {
 62 |     try {
 63 |       const filePath = this.getStatusFilePath(repositoryName);
 64 |       const content = await fs.readFile(filePath, 'utf-8');
 65 |       return JSON.parse(content) as IndexingStatus;
 66 |     } catch (error) {
 67 |       // If file doesn't exist, return null
 68 |       if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
 69 |         return null;
 70 |       }
 71 |       throw error;
 72 |     }
 73 |   }
 74 | 
 75 |   /**
 76 |    * Get all indexing statuses
 77 |    */
 78 |   async getAllStatuses(): Promise<IndexingStatus[]> {
 79 |     await this.ensureStatusDirectory();
 80 |     
 81 |     try {
 82 |       const files = await fs.readdir(STATUS_DIR);
 83 |       const statusFiles = files.filter(file => file.startsWith(STATUS_FILE_PREFIX));
 84 |       
 85 |       const statuses: IndexingStatus[] = [];
 86 |       for (const file of statusFiles) {
 87 |         try {
 88 |           const content = await fs.readFile(path.join(STATUS_DIR, file), 'utf-8');
 89 |           statuses.push(JSON.parse(content) as IndexingStatus);
 90 |         } catch (error) {
 91 |           console.error(`Error reading status file ${file}:`, error);
 92 |         }
 93 |       }
 94 |       
 95 |       return statuses;
 96 |     } catch (error) {
 97 |       console.error('Error reading status directory:', error);
 98 |       return [];
 99 |     }
100 |   }
101 | 
102 |   /**
103 |    * Delete a status entry
104 |    */
105 |   async deleteStatus(repositoryName: string): Promise<void> {
106 |     try {
107 |       const filePath = this.getStatusFilePath(repositoryName);
108 |       await fs.unlink(filePath);
109 |     } catch (error) {
110 |       // Ignore if file doesn't exist
111 |       if ((error as NodeJS.ErrnoException).code !== 'ENOENT') {
112 |         throw error;
113 |       }
114 |     }
115 |   }
116 | 
117 |   /**
118 |    * Complete an indexing operation
119 |    */
120 |   async completeStatus(
121 |     repositoryName: string, 
122 |     success: boolean, 
123 |     stats?: { 
124 |       processedFiles: number, 
125 |       skippedFiles: number, 
126 |       totalChunks: number, 
127 |       indexedChunks: number 
128 |     },
129 |     error?: string
130 |   ): Promise<IndexingStatus> {
131 |     const status = await this.getStatus(repositoryName);
132 |     
133 |     if (!status) {
134 |       throw new Error(`No status found for repository: ${repositoryName}`);
135 |     }
136 | 
137 |     const updatedStatus: IndexingStatus = {
138 |       ...status,
139 |       status: success ? 'completed' : 'failed',
140 |       endTime: new Date().toISOString(),
141 |       lastUpdated: new Date().toISOString(),
142 |       percentageComplete: success ? 100 : status.percentageComplete,
143 |       error: error || status.error
144 |     };
145 | 
146 |     if (stats) {
147 |       updatedStatus.processedFiles = stats.processedFiles;
148 |       updatedStatus.skippedFiles = stats.skippedFiles;
149 |       updatedStatus.totalChunks = stats.totalChunks;
150 |       updatedStatus.indexedChunks = stats.indexedChunks;
151 |     }
152 | 
153 |     await this.saveStatus(updatedStatus);
154 |     return updatedStatus;
155 |   }
156 | 
157 |   /**
158 |    * Save status to file
159 |    */
160 |   private async saveStatus(status: IndexingStatus): Promise<void> {
161 |     await this.ensureStatusDirectory();
162 |     const filePath = this.getStatusFilePath(status.repositoryName);
163 |     await fs.writeFile(filePath, JSON.stringify(status, null, 2), 'utf-8');
164 |   }
165 | 
166 |   /**
167 |    * Get the file path for a status file
168 |    */
169 |   private getStatusFilePath(repositoryName: string): string {
170 |     return path.join(STATUS_DIR, `${STATUS_FILE_PREFIX}${repositoryName}.json`);
171 |   }
172 | 
173 |   /**
174 |    * Ensure the status directory exists
175 |    */
176 |   private async ensureStatusDirectory(): Promise<void> {
177 |     try {
178 |       await fs.mkdir(STATUS_DIR, { recursive: true });
179 |     } catch (error) {
180 |       console.error('Error creating status directory:', error);
181 |       throw error;
182 |     }
183 |   }
184 | }
185 | 


--------------------------------------------------------------------------------
/src/public/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  6 |     <title>MCP RAG Docs</title>
  7 |     <script src="https://cdn.tailwindcss.com"></script>
  8 |     <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
  9 |     <script>
 10 |         tailwind.config = {
 11 |             theme: {
 12 |                 extend: {
 13 |                     colors: {
 14 |                         primary: '#2563eb',
 15 |                         secondary: '#64748b',
 16 |                         danger: '#dc2626',
 17 |                         success: '#16a34a',
 18 |                         warning: '#d97706'
 19 |                     }
 20 |                 }
 21 |             }
 22 |         }
 23 |     </script>
 24 | </head>
 25 | <body class="bg-slate-50">
 26 |     <div class="max-w-7xl mx-auto p-4 md:p-8">
 27 |         <header class="mb-8 text-center">
 28 |             <h1 class="text-4xl font-bold text-primary"><i class="fas fa-book"></i> MCP RAG Docs</h1>
 29 |         </header>
 30 |         
 31 |         <main class="space-y-6">
 32 |             <section class="bg-white rounded-lg shadow p-6">
 33 |                 <h2 class="text-xl font-semibold mb-4 flex items-center gap-2">
 34 |                     <i class="fas fa-plus-circle text-primary"></i> Add Documentation
 35 |                 </h2>
 36 |                 <div class="flex flex-col md:flex-row gap-2 mb-4">
 37 |                     <input type="url" id="docUrl" placeholder="Enter documentation URL" 
 38 |                            class="flex-1 px-4 py-2 border border-slate-200 rounded-md focus:border-primary focus:outline-none">
 39 |                     <button id="extractUrlsBtn" class="btn-primary">
 40 |                         <i class="fas fa-link"></i> Extract URLs
 41 |                     </button>
 42 |                 </div>
 43 |                 <div id="extractedUrls" class="hidden border border-slate-200 rounded-md p-4 mt-4">
 44 |                     <h3 class="font-semibold mb-4">Extracted URLs</h3>
 45 |                     <div class="url-list max-h-48 overflow-y-auto mb-4"></div>
 46 |                     <div class="flex gap-2">
 47 |                         <button id="addAllUrlsBtn" class="btn-primary">
 48 |                             <i class="fas fa-plus-circle"></i> Add All
 49 |                         </button>
 50 |                         <button id="clearUrlsBtn" class="btn-secondary">
 51 |                             <i class="fas fa-times"></i> Clear
 52 |                         </button>
 53 |                     </div>
 54 |                 </div>
 55 |             </section>
 56 | 
 57 |             <section class="bg-white rounded-lg shadow p-6">
 58 |                 <div class="flex flex-col md:flex-row justify-between items-center mb-4">
 59 |                     <h2 class="text-xl font-semibold flex items-center gap-2">
 60 |                         <i class="fas fa-tasks text-primary"></i> Processing Queue
 61 |                         <span id="queueCount" class="text-sm font-normal text-slate-500"></span>
 62 |                     </h2>
 63 |                     <div class="flex gap-2 mt-2 md:mt-0">
 64 |                         <button id="processQueueBtn" class="btn-primary">
 65 |                             <i class="fas fa-play"></i> Process Queue
 66 |                         </button>
 67 |                         <button id="clearQueueBtn" class="btn-danger">
 68 |                             <i class="fas fa-trash"></i> Clear Queue
 69 |                         </button>
 70 |                     </div>
 71 |                 </div>
 72 |                 <div id="queueList" class="border border-slate-200 rounded-md divide-y divide-slate-200 max-h-[400px] overflow-y-auto">
 73 |                     <div class="p-8 text-center text-slate-500">Loading queue...</div>
 74 |                 </div>
 75 |             </section>
 76 | 
 77 |             <section class="bg-white rounded-lg shadow p-6">
 78 |                 <h2 class="text-xl font-semibold mb-4 flex items-center gap-2">
 79 |                     <i class="fas fa-search text-primary"></i> Search Documentation
 80 |                 </h2>
 81 |                 <div class="flex flex-col md:flex-row gap-2 mb-4">
 82 |                     <input type="text" id="searchQuery" placeholder="Enter your search query" 
 83 |                            class="flex-1 px-4 py-2 border border-slate-200 rounded-md focus:border-primary focus:outline-none">
 84 |                     <button id="searchBtn" class="btn-primary">
 85 |                         <i class="fas fa-search"></i> Search
 86 |                     </button>
 87 |                 </div>
 88 |                 <div id="searchResults" class="space-y-4"></div>
 89 |             </section>
 90 | 
 91 |             <section class="bg-white rounded-lg shadow p-6">
 92 |                 <h2 class="text-xl font-semibold mb-4 flex items-center gap-2">
 93 |                     <i class="fas fa-file-alt text-primary"></i> Available Documents
 94 |                 </h2>
 95 |                 <div id="documentsList" class="border border-slate-200 rounded-md divide-y divide-slate-200">
 96 |                     <div class="p-8 text-center text-slate-500">Loading documents...</div>
 97 |                 </div>
 98 |                 <div class="flex items-center justify-center gap-4 mt-4 pt-4 border-t border-slate-200">
 99 |                     <button id="prevPageBtn" class="btn-secondary" disabled>
100 |                         <i class="fas fa-chevron-left"></i> Previous
101 |                     </button>
102 |                     <span class="text-sm text-slate-500 select-none">
103 |                         Page <span id="currentPage">1</span> of <span id="totalPages">1</span>
104 |                     </span>
105 |                     <button id="nextPageBtn" class="btn-secondary">
106 |                         <i class="fas fa-chevron-right"></i> Next
107 |                     </button>
108 |                 </div>
109 |             </section>
110 |         </main>
111 | 
112 |         <div id="toast" class="fixed bottom-8 right-8 hidden max-w-md w-full bg-white rounded-md shadow-lg border-l-4 p-4 flex items-center gap-4">
113 |             <span class="message flex-1"></span>
114 |             <button class="close p-1 hover:bg-slate-100 rounded" title="Close notification">
115 |                 <i class="fas fa-times text-slate-500"></i>
116 |             </button>
117 |         </div>
118 |     </div>
119 | 
120 |     <style>
121 |         /* Button styles */
122 |         .btn-primary {
123 |             @apply px-4 py-2 bg-primary text-white rounded-md font-medium inline-flex items-center gap-2 hover:bg-blue-700 transition-colors disabled:opacity-50 disabled:cursor-not-allowed;
124 |         }
125 |         .btn-secondary {
126 |             @apply px-4 py-2 bg-secondary text-white rounded-md font-medium inline-flex items-center gap-2 hover:bg-slate-600 transition-colors disabled:opacity-50 disabled:cursor-not-allowed;
127 |         }
128 |         .btn-danger {
129 |             @apply px-4 py-2 bg-danger text-white rounded-md font-medium inline-flex items-center gap-2 hover:bg-red-700 transition-colors disabled:opacity-50 disabled:cursor-not-allowed relative;
130 |         }
131 |         /* Loading state */
132 |         .loading {
133 |             @apply relative text-transparent pointer-events-none opacity-70;
134 |         }
135 |         .loading::after {
136 |             @apply absolute inset-0 m-auto w-4 h-4 border-2 border-transparent border-t-current rounded-full animate-spin;
137 |             content: '';
138 |         }
139 |         /* Delete button animation */
140 |         .btn-danger.deleting {
141 |             @apply bg-red-800 pointer-events-none;
142 |             animation: pulse 1.5s cubic-bezier(0.4, 0, 0.6, 1) infinite;
143 |         }
144 |         @keyframes pulse {
145 |             0%, 100% {
146 |                 opacity: 1;
147 |             }
148 |             50% {
149 |                 opacity: 0.5;
150 |             }
151 |         }
152 |         /* Toast variants */
153 |         .toast.success {
154 |             @apply border-success;
155 |         }
156 |         .toast.error {
157 |             @apply border-danger;
158 |         }
159 |         /* Delete confirmation tooltip */
160 |         .btn-danger:not(.deleting) .tooltip {
161 |             @apply invisible opacity-0 absolute -top-10 left-1/2 -translate-x-1/2 px-2 py-1 bg-slate-800 text-white text-xs rounded whitespace-nowrap transition-all;
162 |         }
163 |         .btn-danger:not(.deleting):hover .tooltip {
164 |             @apply visible opacity-100;
165 |         }
166 |     </style>
167 |     <script src="app.js"></script>
168 | </body>
169 | </html> 


--------------------------------------------------------------------------------
/src/utils/repository-config-loader.ts:
--------------------------------------------------------------------------------
  1 | import fs from 'fs/promises';
  2 | import path from 'path';
  3 | import { fileURLToPath } from 'url';
  4 | import { RepositoryConfig } from '../types.js';
  5 | import { Server } from '@modelcontextprotocol/sdk/server/index.js';
  6 | import { ApiClient } from '../api-client.js';
  7 | import { UpdateRepositoryHandler } from '../handlers/update-repository.js';
  8 | import { LocalRepositoryHandler } from '../handlers/local-repository.js';
  9 | import { WatchRepositoryHandler } from '../handlers/watch-repository.js';
 10 | 
 11 | const __dirname = path.dirname(fileURLToPath(import.meta.url));
 12 | const CONFIG_FILE_PATH = path.join(__dirname, '..', '..', 'repositories.json');
 13 | const REPO_CONFIG_DIR = path.join(__dirname, '..', '..', 'repo-configs');
 14 | 
 15 | /**
 16 |  * Interface for the repositories configuration file
 17 |  */
 18 | interface RepositoriesConfig {
 19 |   repositories: RepositoryConfig[];
 20 |   autoWatch: boolean;
 21 | }
 22 | 
 23 | /**
 24 |  * Class for loading and managing repository configurations from a JSON file
 25 |  */
 26 | export class RepositoryConfigLoader {
 27 |   private server: Server;
 28 |   private apiClient: ApiClient;
 29 |   private updateHandler: UpdateRepositoryHandler;
 30 |   private addHandler: LocalRepositoryHandler;
 31 |   private watchHandler: WatchRepositoryHandler;
 32 | 
 33 |   constructor(server: Server, apiClient: ApiClient) {
 34 |     this.server = server;
 35 |     this.apiClient = apiClient;
 36 |     this.updateHandler = new UpdateRepositoryHandler(server, apiClient);
 37 |     this.addHandler = new LocalRepositoryHandler(server, apiClient);
 38 |     this.watchHandler = new WatchRepositoryHandler(server, apiClient);
 39 |   }
 40 | 
 41 |   /**
 42 |    * Load repositories from the configuration file and initialize them
 43 |    */
 44 |   async loadRepositories(): Promise<void> {
 45 |     try {
 46 |       // Check if the config file exists
 47 |       try {
 48 |         await fs.access(CONFIG_FILE_PATH);
 49 |       } catch {
 50 |         console.log('No repositories.json configuration file found. Creating default configuration...');
 51 |         await this.createDefaultConfig();
 52 |         return;
 53 |       }
 54 | 
 55 |       // Read the config file
 56 |       const configContent = await fs.readFile(CONFIG_FILE_PATH, 'utf-8');
 57 |       const config = JSON.parse(configContent) as RepositoriesConfig;
 58 | 
 59 |       // Ensure the repo-configs directory exists
 60 |       await fs.mkdir(REPO_CONFIG_DIR, { recursive: true });
 61 | 
 62 |       // Process each repository in the config
 63 |       console.log(`Loading ${config.repositories.length} repositories from configuration...`);
 64 | 
 65 |       for (const repoConfig of config.repositories) {
 66 |         try {
 67 |           // Check if the repository path exists
 68 |           try {
 69 |             const stats = await fs.stat(repoConfig.path);
 70 |             if (!stats.isDirectory()) {
 71 |               console.error(`Repository path is not a directory: ${repoConfig.path}`);
 72 |               continue;
 73 |             }
 74 |           } catch {
 75 |             console.error(`Repository path does not exist: ${repoConfig.path}`);
 76 |             continue;
 77 |           }
 78 | 
 79 |           // Check if the repository is already indexed
 80 |           const configPath = path.join(REPO_CONFIG_DIR, `${repoConfig.name}.json`);
 81 |           let isUpdate = false;
 82 | 
 83 |           try {
 84 |             await fs.access(configPath);
 85 |             isUpdate = true;
 86 |           } catch {
 87 |             // Repository doesn't exist yet, will be added
 88 |           }
 89 | 
 90 |           if (isUpdate) {
 91 |             // Update existing repository
 92 |             console.log(`Updating repository: ${repoConfig.name}`);
 93 |             await this.updateHandler.handle(repoConfig);
 94 |           } else {
 95 |             // Add new repository
 96 |             console.log(`Adding repository: ${repoConfig.name}`);
 97 |             await this.addHandler.handle(repoConfig);
 98 |           }
 99 | 
100 |           // Start watching if configured
101 |           if (config.autoWatch && repoConfig.watchMode) {
102 |             console.log(`Starting watch for repository: ${repoConfig.name}`);
103 |             await this.watchHandler.handle({
104 |               name: repoConfig.name,
105 |               action: 'start'
106 |             });
107 |           }
108 |         } catch (error) {
109 |           console.error(`Error processing repository ${repoConfig.name}:`, error);
110 |         }
111 |       }
112 | 
113 |       console.log('Repositories loaded successfully from configuration');
114 |     } catch (error) {
115 |       console.error('Error loading repositories from configuration:', error);
116 |     }
117 |   }
118 | 
119 |   /**
120 |    * Create a default configuration file if none exists
121 |    */
122 |   private async createDefaultConfig(): Promise<void> {
123 |     const defaultConfig: RepositoriesConfig = {
124 |       repositories: [],
125 |       autoWatch: true
126 |     };
127 | 
128 |     try {
129 |       await fs.writeFile(CONFIG_FILE_PATH, JSON.stringify(defaultConfig, null, 2), 'utf-8');
130 |       console.log(`Created default repositories configuration at ${CONFIG_FILE_PATH}`);
131 |     } catch (error) {
132 |       console.error('Error creating default configuration:', error);
133 |     }
134 |   }
135 | 
136 |   /**
137 |    * Update the configuration file with the current state of repositories
138 |    */
139 |   async updateConfigFile(): Promise<void> {
140 |     try {
141 |       // Get all repository config files
142 |       const configFiles = await fs.readdir(REPO_CONFIG_DIR);
143 |       const jsonFiles = configFiles.filter(file => file.endsWith('.json'));
144 | 
145 |       // Load each repository config
146 |       const repositories: RepositoryConfig[] = [];
147 |       for (const file of jsonFiles) {
148 |         try {
149 |           const configPath = path.join(REPO_CONFIG_DIR, file);
150 |           const configContent = await fs.readFile(configPath, 'utf-8');
151 |           const config = JSON.parse(configContent) as RepositoryConfig;
152 |           repositories.push(config);
153 |         } catch (error) {
154 |           console.error(`Error loading repository config ${file}:`, error);
155 |         }
156 |       }
157 | 
158 |       // Check if the config file exists
159 |       let existingConfig: RepositoriesConfig = { repositories: [], autoWatch: true };
160 |       try {
161 |         const configContent = await fs.readFile(CONFIG_FILE_PATH, 'utf-8');
162 |         existingConfig = JSON.parse(configContent) as RepositoriesConfig;
163 |       } catch {
164 |         // Config file doesn't exist yet, will use default
165 |       }
166 | 
167 |       // Update the config file
168 |       const updatedConfig: RepositoriesConfig = {
169 |         repositories,
170 |         autoWatch: existingConfig.autoWatch
171 |       };
172 | 
173 |       await fs.writeFile(CONFIG_FILE_PATH, JSON.stringify(updatedConfig, null, 2), 'utf-8');
174 |       console.log(`Updated repositories configuration at ${CONFIG_FILE_PATH}`);
175 |     } catch (error) {
176 |       console.error('Error updating configuration file:', error);
177 |     }
178 |   }
179 | 
180 |   /**
181 |    * Add a repository to the configuration file
182 |    */
183 |   async addRepositoryToConfig(config: RepositoryConfig): Promise<void> {
184 |     try {
185 |       // Check if the config file exists
186 |       let existingConfig: RepositoriesConfig = { repositories: [], autoWatch: true };
187 |       try {
188 |         const configContent = await fs.readFile(CONFIG_FILE_PATH, 'utf-8');
189 |         existingConfig = JSON.parse(configContent) as RepositoriesConfig;
190 |       } catch {
191 |         // Config file doesn't exist yet, will use default
192 |       }
193 | 
194 |       // Check if the repository already exists
195 |       const existingIndex = existingConfig.repositories.findIndex(repo => repo.name === config.name);
196 |       if (existingIndex >= 0) {
197 |         // Update existing repository
198 |         existingConfig.repositories[existingIndex] = config;
199 |       } else {
200 |         // Add new repository
201 |         existingConfig.repositories.push(config);
202 |       }
203 | 
204 |       // Update the config file
205 |       await fs.writeFile(CONFIG_FILE_PATH, JSON.stringify(existingConfig, null, 2), 'utf-8');
206 |       console.log(`Added repository ${config.name} to configuration`);
207 |     } catch (error) {
208 |       console.error(`Error adding repository ${config.name} to configuration:`, error);
209 |     }
210 |   }
211 | 
212 |   /**
213 |    * Remove a repository from the configuration file
214 |    */
215 |   async removeRepositoryFromConfig(name: string): Promise<void> {
216 |     try {
217 |       // Check if the config file exists
218 |       try {
219 |         await fs.access(CONFIG_FILE_PATH);
220 |       } catch {
221 |         console.log('No repositories.json configuration file found.');
222 |         return;
223 |       }
224 | 
225 |       // Read the config file
226 |       const configContent = await fs.readFile(CONFIG_FILE_PATH, 'utf-8');
227 |       const config = JSON.parse(configContent) as RepositoriesConfig;
228 | 
229 |       // Remove the repository
230 |       const initialLength = config.repositories.length;
231 |       config.repositories = config.repositories.filter(repo => repo.name !== name);
232 | 
233 |       if (config.repositories.length === initialLength) {
234 |         console.log(`Repository ${name} not found in configuration`);
235 |         return;
236 |       }
237 | 
238 |       // Update the config file
239 |       await fs.writeFile(CONFIG_FILE_PATH, JSON.stringify(config, null, 2), 'utf-8');
240 |       console.log(`Removed repository ${name} from configuration`);
241 |     } catch (error) {
242 |       console.error(`Error removing repository ${name} from configuration:`, error);
243 |     }
244 |   }
245 | }
246 | 


--------------------------------------------------------------------------------
/src/handlers/update-repository.ts:
--------------------------------------------------------------------------------
  1 | import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
  2 | import { BaseHandler } from './base-handler.js';
  3 | import { DocumentChunk, McpToolResponse, RepositoryConfig } from '../types.js';
  4 | import fs from 'fs/promises';
  5 | import path from 'path';
  6 | import { glob } from 'glob';
  7 | import crypto from 'crypto';
  8 | import { detectLanguage } from '../utils/language-detection.js';
  9 | import { RepositoryConfigLoader } from '../utils/repository-config-loader.js';
 10 | 
 11 | const COLLECTION_NAME = 'documentation';
 12 | const REPO_CONFIG_DIR = path.join(process.cwd(), 'repo-configs');
 13 | 
 14 | export class UpdateRepositoryHandler extends BaseHandler {
 15 |   private activeProgressToken: string | number | undefined;
 16 | 
 17 |   async handle(args: any, callContext?: { progressToken?: string | number, requestId: string | number }): Promise<McpToolResponse> {
 18 |     this.activeProgressToken = callContext?.progressToken || callContext?.requestId;
 19 | 
 20 |     if (!args.name || typeof args.name !== 'string') {
 21 |       throw new McpError(ErrorCode.InvalidParams, 'Repository name is required');
 22 |     }
 23 | 
 24 |     const repoName = args.name;
 25 |     const configPath = path.join(REPO_CONFIG_DIR, `${repoName}.json`);
 26 | 
 27 |     try {
 28 |       // Check if the repository config exists
 29 |       try {
 30 |         await fs.access(configPath);
 31 |       } catch {
 32 |         throw new McpError(ErrorCode.InvalidParams, `Repository not found: ${repoName}`);
 33 |       }
 34 | 
 35 |       // Read the config
 36 |       const configContent = await fs.readFile(configPath, 'utf-8');
 37 |       let config = JSON.parse(configContent) as RepositoryConfig;
 38 | 
 39 |       // Update config with any provided parameters
 40 |       if (args.include) config.include = args.include;
 41 |       if (args.exclude) config.exclude = args.exclude;
 42 |       if (args.watchMode !== undefined) config.watchMode = args.watchMode;
 43 |       if (args.watchInterval) config.watchInterval = args.watchInterval;
 44 |       if (args.chunkSize) config.chunkSize = args.chunkSize;
 45 |       if (args.fileTypeConfig) config.fileTypeConfig = { ...config.fileTypeConfig, ...args.fileTypeConfig };
 46 | 
 47 |       // Check if the repository path exists
 48 |       try {
 49 |         const stats = await fs.stat(config.path);
 50 |         if (!stats.isDirectory()) {
 51 |           throw new McpError(ErrorCode.InvalidParams, `Path is not a directory: ${config.path}`);
 52 |         }
 53 |       } catch (error) {
 54 |         throw new McpError(ErrorCode.InvalidParams, `Invalid repository path: ${config.path}`);
 55 |       }
 56 | 
 57 |       // Save the updated config
 58 |       await fs.writeFile(configPath, JSON.stringify(config, null, 2), 'utf-8');
 59 | 
 60 |       // Update the repositories.json configuration file
 61 |       const configLoader = new RepositoryConfigLoader(this.server, this.apiClient);
 62 |       await configLoader.addRepositoryToConfig(config);
 63 |       console.info(`[${config.name}] Repository configuration updated and saved.`);
 64 |       if (this.activeProgressToken) {
 65 |         (this.server as any).sendProgress(this.activeProgressToken, { message: "Repository configuration updated." });
 66 |       }
 67 | 
 68 |       // Process the repository
 69 |       console.info(`[${config.name}] Starting to re-process repository files...`);
 70 |       if (this.activeProgressToken) {
 71 |         (this.server as any).sendProgress(this.activeProgressToken, { message: "Starting to re-process repository files..." });
 72 |       }
 73 |       const { chunks, processedFiles, skippedFiles } = await this.processRepository(config);
 74 |       console.info(`[${config.name}] Finished re-processing repository files. Found ${chunks.length} chunks from ${processedFiles} processed files (${skippedFiles} skipped).`);
 75 |       if (this.activeProgressToken) {
 76 |         (this.server as any).sendProgress(this.activeProgressToken, { message: `Finished re-processing files. Found ${chunks.length} chunks.`, percentageComplete: 25 }); // 25% for file processing
 77 |       }
 78 | 
 79 |       // Remove existing repository documents from the vector database
 80 |       console.info(`[${config.name}] Removing existing documents from vector database...`);
 81 |       if (this.activeProgressToken) {
 82 |         (this.server as any).sendProgress(this.activeProgressToken, { message: "Removing existing documents...", percentageComplete: 50 }); // 50% after deletion
 83 |       }
 84 |       await this.apiClient.qdrantClient.delete(COLLECTION_NAME, {
 85 |         filter: {
 86 |           must: [
 87 |             {
 88 |               key: 'repository',
 89 |               match: { value: repoName }
 90 |             },
 91 |             {
 92 |               key: 'isRepositoryFile',
 93 |               match: { value: true }
 94 |             }
 95 |           ]
 96 |         },
 97 |         wait: true
 98 |       });
 99 | 
100 |       // Batch process chunks for better performance
101 |       const batchSize = 100;
102 |       let indexedChunks = 0;
103 |       const totalChunks = chunks.length;
104 | 
105 |       console.info(`[${config.name}] Starting to generate embeddings and re-index ${totalChunks} chunks...`);
106 |       if (this.activeProgressToken) {
107 |         (this.server as any).sendProgress(this.activeProgressToken, { message: `Starting to generate embeddings for ${totalChunks} chunks...`, percentageComplete: 50 });
108 |       }
109 | 
110 |       for (let i = 0; i < totalChunks; i += batchSize) {
111 |         const batchChunks = chunks.slice(i, i + batchSize);
112 | 
113 |         const embeddingResults = await Promise.allSettled(
114 |           batchChunks.map(async (chunk) => {
115 |             try {
116 |               const embedding = await this.apiClient.getEmbeddings(chunk.text);
117 |               return {
118 |                 id: this.generatePointId(),
119 |                 vector: embedding,
120 |                 payload: {
121 |                   ...chunk,
122 |                   _type: 'DocumentChunk' as const,
123 |                   repository: config.name,
124 |                   isRepositoryFile: true,
125 |                 } as Record<string, unknown>,
126 |               };
127 |             } catch (embeddingError) {
128 |               console.error(`[${config.name}] Failed to generate embedding for chunk from ${chunk.filePath || chunk.url} during update: ${embeddingError instanceof Error ? embeddingError.message : String(embeddingError)}`);
129 |               throw embeddingError; // Re-throw to be caught by Promise.allSettled
130 |             }
131 |           })
132 |         );
133 | 
134 |         const successfulPoints = embeddingResults
135 |           .filter(result => result.status === 'fulfilled')
136 |           .map(result => (result as PromiseFulfilledResult<any>).value);
137 | 
138 |         const failedEmbeddingsCount = embeddingResults.filter(result => result.status === 'rejected').length;
139 |         if (failedEmbeddingsCount > 0) {
140 |             console.warn(`[${config.name}] Failed to generate embeddings for ${failedEmbeddingsCount} of ${batchChunks.length} chunks in this batch during update.`);
141 |         }
142 | 
143 |         if (successfulPoints.length > 0) {
144 |           try {
145 |             await this.apiClient.qdrantClient.upsert(COLLECTION_NAME, {
146 |               wait: true,
147 |               points: successfulPoints,
148 |             });
149 |             indexedChunks += successfulPoints.length;
150 |           } catch (upsertError) {
151 |             console.error(`[${config.name}] Failed to upsert batch of ${successfulPoints.length} points to Qdrant during update: ${upsertError instanceof Error ? upsertError.message : String(upsertError)}`);
152 |           }
153 |         }
154 |         
155 |         const percentageComplete = 50 + Math.round(((i + batchChunks.length) / totalChunks) * 50); // Remaining 50% for indexing
156 |         console.info(`[${config.name}] Re-processed batch ${Math.floor(i / batchSize) + 1} of ${Math.ceil(totalChunks / batchSize)}. Successfully re-indexed in this batch: ${successfulPoints.length}. Total re-indexed so far: ${indexedChunks} chunks.`);
157 |         if (this.activeProgressToken) {
158 |           (this.server as any).sendProgress(this.activeProgressToken, { message: `Re-processed ${i + batchChunks.length} of ${totalChunks} chunks for embedding/indexing. Successfully re-indexed: ${indexedChunks}.`, percentageComplete });
159 |         }
160 |       }
161 |       console.info(`[${config.name}] Finished generating embeddings and re-indexing. Total indexed: ${indexedChunks} chunks.`);
162 |       if (this.activeProgressToken) {
163 |         (this.server as any).sendProgress(this.activeProgressToken, { message: `Finished re-indexing ${indexedChunks} chunks.`, percentageComplete: 100 });
164 |       }
165 | 
166 |       return {
167 |         content: [
168 |           {
169 |             type: 'text',
170 |             text: `Successfully updated repository: ${config.name} (${config.path})\n` +
171 |                   `Processed ${processedFiles} files, skipped ${skippedFiles} files\n` +
172 |                   `Created ${chunks.length} chunks, indexed ${indexedChunks} chunks\n` +
173 |                   `Watch mode: ${config.watchMode ? 'enabled' : 'disabled'}`,
174 |           },
175 |         ],
176 |       };
177 |     } catch (error) {
178 |       if (error instanceof McpError) {
179 |         throw error;
180 |       }
181 |       return {
182 |         content: [
183 |           {
184 |             type: 'text',
185 |             text: `Failed to update repository: ${error}`,
186 |           },
187 |         ],
188 |         isError: true,
189 |       };
190 |     }
191 |   }
192 | 
193 |   private async processRepository(config: RepositoryConfig): Promise<{
194 |     chunks: DocumentChunk[],
195 |     processedFiles: number,
196 |     skippedFiles: number
197 |   }> {
198 |     const chunks: DocumentChunk[] = [];
199 |     let processedFiles = 0;
200 |     let skippedFiles = 0;
201 |     let fileCounter = 0;
202 | 
203 |     // Get all files matching the include/exclude patterns
204 |     const files = await glob(config.include, {
205 |       cwd: config.path,
206 |       ignore: config.exclude,
207 |       absolute: true,
208 |       nodir: true,
209 |     });
210 |     const totalFiles = files.length;
211 | 
212 |     console.info(`[${config.name}] Found ${totalFiles} files to re-process based on include/exclude patterns.`);
213 |     if (this.activeProgressToken) {
214 |       (this.server as any).sendProgress(this.activeProgressToken, { message: `Found ${totalFiles} files to re-process.` });
215 |     }
216 | 
217 | 
218 |     for (const file of files) {
219 |       fileCounter++;
220 |       try {
221 |         const relativePath = path.relative(config.path, file);
222 |         const extension = path.extname(file);
223 |         const fileTypeConfig = config.fileTypeConfig[extension];
224 | 
225 |         // Skip files that should be excluded based on file type config
226 |         if (fileTypeConfig && fileTypeConfig.include === false) {
227 |           skippedFiles++;
228 |           continue;
229 |         }
230 | 
231 |         // Read file content
232 |         const content = await fs.readFile(file, 'utf-8');
233 | 
234 |         // Skip empty files
235 |         if (!content.trim()) {
236 |           skippedFiles++;
237 |           continue;
238 |         }
239 | 
240 |         // Detect language for better processing
241 |         const language = detectLanguage(file, content);
242 | 
243 |         // Process the file content into chunks
244 |         const fileChunks = this.chunkFileContent(
245 |           content,
246 |           file,
247 |           relativePath,
248 |           config,
249 |           language,
250 |           fileTypeConfig?.chunkStrategy || 'line'
251 |         );
252 | 
253 |         chunks.push(...fileChunks);
254 |         processedFiles++;
255 |         if (fileCounter % 50 === 0 && fileCounter > 0 && this.activeProgressToken) {
256 |           const percentageComplete = Math.round((fileCounter / totalFiles) * 25); // File processing is ~1/4 of the job here
257 |           (this.server as any).sendProgress(this.activeProgressToken, { message: `Re-processed ${fileCounter} of ${totalFiles} files...`, percentageComplete });
258 |           console.info(`[${config.name}] Re-processed ${fileCounter} of ${totalFiles} files... (${processedFiles} successful, ${skippedFiles} skipped/errored)`);
259 |         }
260 |       } catch (error) {
261 |         console.error(`[${config.name}] Error processing file ${file}:`, error);
262 |         skippedFiles++;
263 |       }
264 |     }
265 |     console.info(`[${config.name}] Completed file re-iteration. Processed: ${processedFiles}, Skipped/Errored: ${skippedFiles}.`);
266 | 
267 |     return { chunks, processedFiles, skippedFiles };
268 |   }
269 | 
270 |   private chunkFileContent(
271 |     content: string,
272 |     filePath: string,
273 |     relativePath: string,
274 |     config: RepositoryConfig,
275 |     language: string,
276 |     chunkStrategy: string
277 |   ): DocumentChunk[] {
278 |     const chunks: DocumentChunk[] = [];
279 |     const timestamp = new Date().toISOString();
280 |     const fileUrl = `file://${filePath}`;
281 |     const title = `${config.name}/${relativePath}`;
282 | 
283 |     // Different chunking strategies based on file type
284 |     let textChunks: string[] = [];
285 | 
286 |     switch (chunkStrategy) {
287 |       case 'semantic':
288 |         // For semantic chunking, we'd ideally use a more sophisticated approach
289 |         // For now, we'll use a simple paragraph-based approach
290 |         textChunks = this.chunkByParagraphs(content, config.chunkSize);
291 |         break;
292 |       case 'line':
293 |         // Chunk by lines, respecting max chunk size
294 |         textChunks = this.chunkByLines(content, config.chunkSize);
295 |         break;
296 |       default:
297 |         // Default to simple text chunking
298 |         textChunks = this.chunkText(content, config.chunkSize);
299 |     }
300 | 
301 |     // Create document chunks with metadata
302 |     chunks.push(...textChunks.map((text, index) => ({
303 |       text,
304 |       url: fileUrl,
305 |       title,
306 |       timestamp,
307 |       filePath: relativePath,
308 |       language,
309 |       chunkIndex: index,
310 |       totalChunks: textChunks.length,
311 |     })));
312 | 
313 |     return chunks;
314 |   }
315 | 
316 |   private chunkText(text: string, maxChunkSize: number): string[] {
317 |     const words = text.split(/\s+/);
318 |     const chunks: string[] = [];
319 |     let currentChunk: string[] = [];
320 | 
321 |     for (const word of words) {
322 |       currentChunk.push(word);
323 |       const currentLength = currentChunk.join(' ').length;
324 | 
325 |       if (currentLength >= maxChunkSize) {
326 |         chunks.push(currentChunk.join(' '));
327 |         currentChunk = [];
328 |       }
329 |     }
330 | 
331 |     if (currentChunk.length > 0) {
332 |       chunks.push(currentChunk.join(' '));
333 |     }
334 | 
335 |     return chunks;
336 |   }
337 | 
338 |   private chunkByLines(text: string, maxChunkSize: number): string[] {
339 |     const lines = text.split(/\r?\n/);
340 |     const chunks: string[] = [];
341 |     let currentChunk: string[] = [];
342 |     let currentLength = 0;
343 | 
344 |     for (const line of lines) {
345 |       const lineLength = line.length + 1; // +1 for the newline
346 | 
347 |       if (currentLength + lineLength > maxChunkSize && currentChunk.length > 0) {
348 |         chunks.push(currentChunk.join('\n'));
349 |         currentChunk = [];
350 |         currentLength = 0;
351 |       }
352 | 
353 |       currentChunk.push(line);
354 |       currentLength += lineLength;
355 |     }
356 | 
357 |     if (currentChunk.length > 0) {
358 |       chunks.push(currentChunk.join('\n'));
359 |     }
360 | 
361 |     return chunks;
362 |   }
363 | 
364 |   private chunkByParagraphs(text: string, maxChunkSize: number): string[] {
365 |     // Split by double newlines (paragraphs)
366 |     const paragraphs = text.split(/\r?\n\r?\n/);
367 |     const chunks: string[] = [];
368 |     let currentChunk: string[] = [];
369 |     let currentLength = 0;
370 | 
371 |     for (const paragraph of paragraphs) {
372 |       const paragraphLength = paragraph.length + 2; // +2 for the double newline
373 | 
374 |       if (currentLength + paragraphLength > maxChunkSize && currentChunk.length > 0) {
375 |         chunks.push(currentChunk.join('\n\n'));
376 |         currentChunk = [];
377 |         currentLength = 0;
378 |       }
379 | 
380 |       currentChunk.push(paragraph);
381 |       currentLength += paragraphLength;
382 |     }
383 | 
384 |     if (currentChunk.length > 0) {
385 |       chunks.push(currentChunk.join('\n\n'));
386 |     }
387 | 
388 |     return chunks;
389 |   }
390 | 
391 |   private generatePointId(): string {
392 |     return crypto.randomBytes(16).toString('hex');
393 |   }
394 | }
395 | 


--------------------------------------------------------------------------------
/src/server.ts:
--------------------------------------------------------------------------------
  1 | import cors from "cors";
  2 | import express, { Application, NextFunction, Request, Response } from "express";
  3 | import fs from "fs";
  4 | import { dirname, join } from "path";
  5 | import { fileURLToPath } from "url";
  6 | import { ApiClient } from "./api-client.js";
  7 | import { ClearQueueTool } from "./tools/clear-queue.js";
  8 | import { ExtractUrlsTool } from "./tools/extract-urls.js";
  9 | import { ListQueueTool } from "./tools/list-queue.js";
 10 | import { ListSourcesTool } from "./tools/list-sources.js";
 11 | import { RemoveDocumentationTool } from "./tools/remove-documentation.js";
 12 | import { RunQueueTool } from "./tools/run-queue.js";
 13 | import { SearchDocumentationTool } from "./tools/search-documentation.js";
 14 | 
 15 | const __filename = fileURLToPath(import.meta.url);
 16 | const __dirname = dirname(__filename);
 17 | const rootDir = join(__dirname, "..");
 18 | 
 19 | interface ApiError extends Error {
 20 |   status?: number;
 21 | }
 22 | 
 23 | interface SearchResponse {
 24 |   results: Array<{
 25 |     url: string;
 26 |     title: string;
 27 |     content: string;
 28 |     snippet?: string;
 29 |   }>;
 30 | }
 31 | 
 32 | interface ErrorResponse {
 33 |   error: string;
 34 |   details?: string;
 35 | }
 36 | 
 37 | interface Document {
 38 |   url: string;
 39 |   title: string;
 40 |   timestamp: string;
 41 |   status: string;
 42 | }
 43 | 
 44 | interface QueueItem {
 45 |   id: number;
 46 |   url: string;
 47 |   status: string;
 48 |   timestamp: string;
 49 | }
 50 | 
 51 | import net from 'net';
 52 | 
 53 | function getAvailablePort(startPort: number): Promise<number> {
 54 |   return new Promise((resolve, reject) => {
 55 |     const server = net.createServer();
 56 |     server.listen(startPort, () => {
 57 |       const { port } = server.address() as net.AddressInfo;
 58 |       server.close(() => resolve(port));
 59 |     });
 60 |     server.on('error', (err: any) => {
 61 |       if (err.code === 'EADDRINUSE') {
 62 |         resolve(getAvailablePort(startPort + 1));
 63 |       } else {
 64 |         reject(err);
 65 |       }
 66 |     });
 67 |   });
 68 | }
 69 | 
 70 | export class WebInterface {
 71 |   private app: Application;
 72 |   private server: any;
 73 |   private apiClient: ApiClient;
 74 |   private searchTool: SearchDocumentationTool;
 75 |   private runQueueTool: RunQueueTool;
 76 |   private listQueueTool: ListQueueTool;
 77 |   private listSourcesTool: ListSourcesTool;
 78 |   private clearQueueTool: ClearQueueTool;
 79 |   private removeDocTool: RemoveDocumentationTool;
 80 |   private extractUrlsTool: ExtractUrlsTool;
 81 |   private queuePath: string;
 82 | 
 83 |   constructor(apiClient: ApiClient) {
 84 |     this.apiClient = apiClient;
 85 |     this.app = express();
 86 |     this.queuePath = join(rootDir, "queue.txt");
 87 | 
 88 |     // Initialize tools
 89 |     this.searchTool = new SearchDocumentationTool(apiClient);
 90 |     this.runQueueTool = new RunQueueTool(apiClient);
 91 |     this.listQueueTool = new ListQueueTool();
 92 |     this.listSourcesTool = new ListSourcesTool(apiClient);
 93 |     this.clearQueueTool = new ClearQueueTool();
 94 |     this.removeDocTool = new RemoveDocumentationTool(apiClient);
 95 |     this.extractUrlsTool = new ExtractUrlsTool(apiClient);
 96 | 
 97 |     // Ensure queue file exists
 98 |     this.initializeQueueFile();
 99 | 
100 |     this.setupMiddleware();
101 |     this.setupRoutes();
102 |   }
103 | 
104 |   private async initializeQueueFile() {
105 |     try {
106 |       // Check if queue file exists
107 |       if (!fs.existsSync(this.queuePath)) {
108 |         // Create the file if it doesn't exist
109 |         await fs.promises.writeFile(this.queuePath, "", "utf8");
110 |         console.log("Queue file created at:", this.queuePath);
111 |       }
112 |     } catch (error) {
113 |       console.error("Error initializing queue file:", error);
114 |     }
115 |   }
116 | 
117 |   private setupMiddleware() {
118 |     this.app.use(cors());
119 |     this.app.use(express.json());
120 |     this.app.use(express.static(join(rootDir, "src/public")));
121 |     this.app.get("/", (req: Request, res: Response) => {
122 |       res.sendFile(join(rootDir, "src/public/index.html"));
123 |     });
124 |   }
125 | 
126 |   private setupRoutes() {
127 |     const errorHandler = (
128 |       err: ApiError,
129 |       req: Request,
130 |       res: Response,
131 |       next: NextFunction
132 |     ) => {
133 |       console.error("API Error:", err);
134 |       const status = err.status || 500;
135 |       const response: ErrorResponse = {
136 |         error: err.message || "Internal server error",
137 |       };
138 |       if (process.env.NODE_ENV === "development" && err.stack) {
139 |         response.details = err.stack;
140 |       }
141 |       res.status(status).json(response);
142 |     };
143 | 
144 |     // Get all available documents
145 |     this.app.get(
146 |       "/documents",
147 |       async (
148 |         req: Request,
149 |         res: Response,
150 |         next: NextFunction
151 |       ): Promise<void> => {
152 |         try {
153 |           const response = await this.listSourcesTool.execute({});
154 |           const sourcesText = response.content[0].text;
155 | 
156 |           if (
157 |             sourcesText ===
158 |             "No documentation sources found in the cloud collection."
159 |           ) {
160 |             res.json([]);
161 |             return;
162 |           }
163 | 
164 |           const documents = sourcesText
165 |             .split("\n")
166 |             .map((line) => {
167 |               const match = line.match(/(.*?) \((.*?)\)/);
168 |               if (match) {
169 |                 const [_, title, url] = match;
170 |                 return {
171 |                   url,
172 |                   title,
173 |                   timestamp: new Date().toISOString(), // Timestamp not available from list-sources
174 |                   status: "COMPLETED",
175 |                 };
176 |               }
177 |               return null;
178 |             })
179 |             .filter(Boolean);
180 | 
181 |           res.json(documents);
182 |         } catch (error) {
183 |           next(error);
184 |         }
185 |       }
186 |     );
187 | 
188 |     // Get queue status
189 |     this.app.get("/queue", async (req: Request, res: Response) => {
190 |       try {
191 |         // Ensure queue file exists
192 |         if (!fs.existsSync(this.queuePath)) {
193 |           await this.initializeQueueFile();
194 |           res.json([]);
195 |           return;
196 |         }
197 | 
198 |         // Read the queue file directly to get pending items
199 |         const queueContent = await fs.promises.readFile(this.queuePath, "utf8");
200 |         console.log("Queue file content:", queueContent);
201 | 
202 |         const pendingUrls = queueContent
203 |           .split("\n")
204 |           .filter((line) => line.trim());
205 |         console.log("Pending URLs:", pendingUrls);
206 | 
207 |         // Get processing status from list-queue tool
208 |         const response = await this.listQueueTool.execute({});
209 |         console.log("List queue tool response:", response);
210 | 
211 |         const queueText = response.content[0].text;
212 |         console.log("Queue text from tool:", queueText);
213 | 
214 |         const processingItems = queueText
215 |           .split("\n")
216 |           .filter((line) => line.trim())
217 |           .map((line) => {
218 |             const [url, status, timestamp] = line.split(" | ");
219 |             return {
220 |               id: Buffer.from(url).toString("base64"),
221 |               url,
222 |               status: status || "PROCESSING",
223 |               timestamp: timestamp || new Date().toISOString(),
224 |             };
225 |           });
226 |         console.log("Processing items:", processingItems);
227 | 
228 |         // Combine pending and processing items
229 |         const queue = [
230 |           // Add pending items that aren't in processing
231 |           ...pendingUrls
232 |             .filter((url) => !processingItems.some((item) => item.url === url))
233 |             .map((url) => ({
234 |               id: Buffer.from(url).toString("base64"),
235 |               url,
236 |               status: "PENDING",
237 |               timestamp: new Date().toISOString(),
238 |             })),
239 |           // Add processing items
240 |           ...processingItems,
241 |         ];
242 |         console.log("Final queue:", queue);
243 | 
244 |         res.json(queue);
245 |       } catch (error) {
246 |         console.error("Error getting queue:", error);
247 |         res.json([]);
248 |       }
249 |     });
250 | 
251 |     // Add document to queue
252 |     this.app.post(
253 |       "/add-doc",
254 |       async (req: Request, res: Response, next: NextFunction) => {
255 |         try {
256 |           const { url, urls } = req.body;
257 | 
258 |           if (!url && (!urls || !Array.isArray(urls))) {
259 |             const error: ApiError = new Error(
260 |               "URL or array of URLs is required"
261 |             );
262 |             error.status = 400;
263 |             throw error;
264 |           }
265 | 
266 |           // Ensure queue file exists
267 |           if (!fs.existsSync(this.queuePath)) {
268 |             await this.initializeQueueFile();
269 |           }
270 | 
271 |           const urlsToAdd = urls || [url];
272 |           const addedItems: QueueItem[] = [];
273 | 
274 |           for (const u of urlsToAdd) {
275 |             // Add newline only if file is not empty
276 |             const fileContent = await fs.promises.readFile(
277 |               this.queuePath,
278 |               "utf8"
279 |             );
280 |             const separator = fileContent.length > 0 ? "\n" : "";
281 |             await fs.promises.appendFile(this.queuePath, separator + u);
282 | 
283 |             addedItems.push({
284 |               id: Date.now(),
285 |               url: u,
286 |               status: "PENDING",
287 |               timestamp: new Date().toISOString(),
288 |             });
289 |           }
290 | 
291 |           // Start processing queue in background
292 |           this.runQueueTool.execute({}).catch((error) => {
293 |             console.error("Error processing queue:", error);
294 |           });
295 | 
296 |           res.json(addedItems);
297 |         } catch (error) {
298 |           next(error);
299 |         }
300 |       }
301 |     );
302 | 
303 |     // Search documentation
304 |     this.app.post(
305 |       "/search",
306 |       async (
307 |         req: Request,
308 |         res: Response,
309 |         next: NextFunction
310 |       ): Promise<void> => {
311 |         try {
312 |           const { query } = req.body;
313 |           if (!query) {
314 |             const error: ApiError = new Error("Query is required");
315 |             error.status = 400;
316 |             throw error;
317 |           }
318 | 
319 |           const searchResponse = await this.searchTool.execute({ query });
320 |           const searchText = searchResponse.content[0].text;
321 | 
322 |           if (searchText === "No results found matching the query.") {
323 |             res.json({ results: [] });
324 |           }
325 | 
326 |           // Parse the markdown formatted results
327 |           const results = searchText
328 |             .split("---")
329 |             .filter((block) => block.trim())
330 |             .map((block) => {
331 |               const titleMatch = block.match(/\[(.*?)\]\((.*?)\)/);
332 |               const contentMatch = block.match(/Content: (.*?)(?=\n|$)/s);
333 | 
334 |               return {
335 |                 title: titleMatch ? titleMatch[1] : "Unknown",
336 |                 url: titleMatch ? titleMatch[2] : "",
337 |                 content: contentMatch ? contentMatch[1] : "",
338 |                 snippet: contentMatch
339 |                   ? contentMatch[1].substring(0, 200) + "..."
340 |                   : undefined,
341 |               };
342 |             });
343 | 
344 |           const response: SearchResponse = { results };
345 |           res.json(response);
346 |         } catch (error) {
347 |           next(error);
348 |         }
349 |       }
350 |     );
351 | 
352 |     // Clear queue
353 |     this.app.post(
354 |       "/clear-queue",
355 |       async (req: Request, res: Response, next: NextFunction) => {
356 |         try {
357 |           // Call the clear queue tool
358 |           const response = await this.clearQueueTool.execute({});
359 | 
360 |           if (response.isError) {
361 |             throw new Error(response.content[0].text);
362 |           }
363 | 
364 |           // Also clear any running processes
365 |           await this.runQueueTool.execute({ action: "stop" });
366 | 
367 |           // Ensure the queue file is empty
368 |           await fs.promises.writeFile(this.queuePath, "", "utf8");
369 | 
370 |           res.json({ message: "Queue cleared successfully" });
371 |         } catch (error) {
372 |           next(error);
373 |         }
374 |       }
375 |     );
376 | 
377 |     // Process queue
378 |     this.app.post(
379 |       "/process-queue",
380 |       async (req: Request, res: Response, next: NextFunction) => {
381 |         try {
382 |           // Start processing queue in background
383 |           this.runQueueTool.execute({}).catch((error) => {
384 |             console.error("Error processing queue:", error);
385 |           });
386 | 
387 |           res.json({ message: "Queue processing started" });
388 |         } catch (error) {
389 |           next(error);
390 |         }
391 |       }
392 |     );
393 | 
394 |     // Remove documentation (single or multiple)
395 |     this.app.delete(
396 |       "/documents",
397 |       async (req: Request, res: Response, next: NextFunction) => {
398 |         try {
399 |           const { url, urls } = req.body;
400 |           if (!url && (!urls || !Array.isArray(urls))) {
401 |             const error: ApiError = new Error(
402 |               "URL or array of URLs is required"
403 |             );
404 |             error.status = 400;
405 |             throw error;
406 |           }
407 | 
408 |           const urlsToRemove = urls || [url];
409 |           await this.removeDocTool.execute({ urls: urlsToRemove });
410 |           res.json({
411 |             message: `${urlsToRemove.length} document${
412 |               urlsToRemove.length === 1 ? "" : "s"
413 |             } removed successfully`,
414 |             count: urlsToRemove.length,
415 |           });
416 |         } catch (error) {
417 |           next(error);
418 |         }
419 |       }
420 |     );
421 | 
422 |     // Remove all documents
423 |     this.app.delete(
424 |       "/documents/all",
425 |       async (req: Request, res: Response, next: NextFunction) => {
426 |         try {
427 |           // First get all documents
428 |           const response = await this.listSourcesTool.execute({});
429 |           const sourcesText = response.content[0].text;
430 | 
431 |           if (
432 |             sourcesText ===
433 |             "No documentation sources found in the cloud collection."
434 |           ) {
435 |             res.json({ message: "No documents to remove", count: 0 });
436 |             return;
437 |           }
438 | 
439 |           // Extract URLs from the sources
440 |           const urls = sourcesText
441 |             .split("\n")
442 |             .map((line) => {
443 |               const match = line.match(/(.*?) \((.*?)\)/);
444 |               return match ? match[2] : null;
445 |             })
446 |             .filter((url): url is string => url !== null);
447 | 
448 |           if (urls.length === 0) {
449 |             res.json({ message: "No documents to remove", count: 0 });
450 |             return;
451 |           }
452 | 
453 |           // Remove all documents
454 |           await this.removeDocTool.execute({ urls });
455 |           res.json({
456 |             message: `${urls.length} document${
457 |               urls.length === 1 ? "" : "s"
458 |             } removed successfully`,
459 |             count: urls.length,
460 |           });
461 |         } catch (error) {
462 |           next(error);
463 |         }
464 |       }
465 |     );
466 | 
467 |     // Extract URLs
468 |     this.app.post(
469 |       "/extract-urls",
470 |       async (req: Request, res: Response, next: NextFunction) => {
471 |         try {
472 |           const { url } = req.body;
473 |           if (!url) {
474 |             const error: ApiError = new Error("URL is required");
475 |             error.status = 400;
476 |             throw error;
477 |           }
478 | 
479 |           const response = await this.extractUrlsTool.execute({ url });
480 |           const urls = response.content[0].text
481 |             .split("\n")
482 |             .filter((url) => url.trim());
483 | 
484 |           res.json({ urls });
485 |         } catch (error) {
486 |           next(error);
487 |         }
488 |       }
489 |     );
490 | 
491 |     this.app.use(errorHandler);
492 |   }
493 | 
494 |   async start() {
495 |     const port = await getAvailablePort(3030);
496 |     this.server = this.app.listen(port, () => {
497 |       console.log(`Web interface running at http://localhost:${port}`);
498 |     });
499 |   }
500 | 
501 |   async stop() {
502 |     if (this.server) {
503 |       return new Promise((resolve) => {
504 |         this.server.close(() => {
505 |           console.log("Web interface stopped");
506 |           resolve(true);
507 |         });
508 |       });
509 |     }
510 |   }
511 | }
512 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # RAG Documentation MCP Server
  2 | [![smithery badge](https://smithery.ai/badge/@rahulretnan/mcp-ragdocs)](https://smithery.ai/server/@rahulretnan/mcp-ragdocs)
  3 | 
  4 | An MCP server implementation that provides tools for retrieving and processing documentation through vector search, enabling AI assistants to augment their responses with relevant documentation context.
  5 | 
  6 | ## Table of Contents
  7 | 
  8 | - [Features](#features)
  9 | - [Quick Start](#quick-start)
 10 | - [Docker Compose Setup](#docker-compose-setup)
 11 | - [Web Interface](#web-interface)
 12 | - [Configuration](#configuration)
 13 |   - [Cline Configuration](#cline-configuration)
 14 |   - [Claude Desktop Configuration](#claude-desktop-configuration)
 15 | - [Acknowledgments](#acknowledgments)
 16 | - [Troubleshooting](#troubleshooting)
 17 | 
 18 | ## Features
 19 | 
 20 | ### Tools
 21 | 
 22 | 1. **search_documentation**
 23 | 
 24 |    - Search through the documentation using vector search
 25 |    - Returns relevant chunks of documentation with source information
 26 | 
 27 | 2. **list_sources**
 28 | 
 29 |    - List all available documentation sources
 30 |    - Provides metadata about each source
 31 | 
 32 | 3. **extract_urls**
 33 | 
 34 |    - Extract URLs from text and check if they're already in the documentation
 35 |    - Useful for preventing duplicate documentation
 36 | 
 37 | 4. **remove_documentation**
 38 | 
 39 |    - Remove documentation from a specific source
 40 |    - Cleans up outdated or irrelevant documentation
 41 | 
 42 | 5. **list_queue**
 43 | 
 44 |    - List all items in the processing queue
 45 |    - Shows status of pending documentation processing
 46 | 
 47 | 6. **run_queue**
 48 | 
 49 |    - Process all items in the queue
 50 |    - Automatically adds new documentation to the vector store
 51 | 
 52 | 7. **clear_queue**
 53 | 
 54 |    - Clear all items from the processing queue
 55 |    - Useful for resetting the system
 56 | 
 57 | 8. **add_documentation**
 58 |    - Add new documentation directly to the system by providing a URL
 59 |    - Automatically fetches, processes, and indexes the content
 60 |    - Supports various web page formats and extracts relevant content
 61 |    - Chunks content intelligently for optimal retrieval
 62 |    - Required parameter: `url` (must include protocol, e.g., https://)
 63 | 
 64 | 9. **add_repository**
 65 |    - Index a local code repository for documentation
 66 |    - Configure include/exclude patterns for files and directories
 67 |    - Supports different chunking strategies based on file types
 68 |    - Uses asynchronous processing to avoid MCP timeouts with large repositories
 69 |    - Provides detailed progress logging (heartbeat) to `stderr` during indexing
 70 |    - Required parameter: `path` (absolute path to repository)
 71 | 
 72 | 10. **list_repositories**
 73 |     - List all indexed repositories with their configurations
 74 |     - Shows include/exclude patterns and watch status
 75 | 
 76 | 11. **update_repository**
 77 |     - Re-index a repository with updated configuration
 78 |     - Can modify include/exclude patterns and other settings
 79 |     - Provides detailed progress logging (heartbeat) to `stderr` during re-indexing
 80 |     - Required parameter: `name` (repository name)
 81 | 
 82 | 12. **remove_repository**
 83 |     - Remove a repository from the index
 84 |     - Deletes all associated documents from the vector database
 85 |     - Required parameter: `name` (repository name)
 86 | 
 87 | 13. **watch_repository**
 88 |     - Start or stop watching a repository for changes
 89 |     - Automatically updates the index when files change
 90 |     - Required parameters: `name` (repository name) and `action` ("start" or "stop")
 91 | 
 92 | 14. **get_indexing_status**
 93 |     - Get the current status of repository indexing operations
 94 |     - Provides detailed information about ongoing or completed indexing processes
 95 |     - Shows progress percentage, file counts, and timing information
 96 |     - Optional parameter: `name` (repository name) - if not provided, returns status for all repositories
 97 | 
 98 | ## Quick Start
 99 | 
100 | The RAG Documentation tool is designed for:
101 | 
102 | - Enhancing AI responses with relevant documentation
103 | - Building documentation-aware AI assistants
104 | - Creating context-aware tooling for developers
105 | - Implementing semantic documentation search
106 | - Augmenting existing knowledge bases
107 | 
108 | ## Docker Compose Setup
109 | 
110 | The project includes a `docker-compose.yml` file for easy containerized deployment. To start the services:
111 | 
112 | ```bash
113 | docker-compose up -d
114 | ```
115 | 
116 | To stop the services:
117 | 
118 | ```bash
119 | docker-compose down
120 | ```
121 | 
122 | ## Web Interface
123 | 
124 | The system includes a web interface that can be accessed after starting the Docker Compose services:
125 | 
126 | 1. Open your browser and navigate to: `http://localhost:3030`
127 | 2. The interface provides:
128 |    - Real-time queue monitoring
129 |    - Documentation source management
130 |    - Search interface for testing queries
131 |    - System status and health checks
132 | 
133 | ## Configuration
134 | 
135 | ### Embeddings Configuration
136 | 
137 | The system uses Ollama as the default embedding provider for local embeddings generation, with OpenAI available as a fallback option. This setup prioritizes local processing while maintaining reliability through cloud-based fallback.
138 | 
139 | #### Environment Variables
140 | 
141 | - `EMBEDDING_PROVIDER`: Choose the primary embedding provider ('ollama' or 'openai', default: 'ollama')
142 | - `EMBEDDING_MODEL`: Specify the model to use (optional)
143 |   - For OpenAI: defaults to 'text-embedding-3-small'
144 |   - For Ollama: defaults to 'nomic-embed-text'
145 | - `OPENAI_API_KEY`: Required when using OpenAI as provider
146 | - `FALLBACK_PROVIDER`: Optional backup provider ('ollama' or 'openai')
147 | - `FALLBACK_MODEL`: Optional model for fallback provider
148 | 
149 | ### Cline Configuration
150 | 
151 | Add this to your `cline_mcp_settings.json`:
152 | 
153 | ```json
154 | {
155 |   "mcpServers": {
156 |     "rag-docs": {
157 |       "command": "node",
158 |       "args": ["/path/to/your/mcp-ragdocs/build/index.js"],
159 |       "env": {
160 |         "EMBEDDING_PROVIDER": "ollama", // default
161 |         "EMBEDDING_MODEL": "nomic-embed-text", // optional
162 |         "OPENAI_API_KEY": "your-api-key-here", // required for fallback
163 |         "FALLBACK_PROVIDER": "openai", // recommended for reliability
164 |         "FALLBACK_MODEL": "nomic-embed-text", // optional
165 |         "QDRANT_URL": "http://localhost:6333"
166 |       },
167 |       "disabled": false,
168 |       "autoApprove": [
169 |         "search_documentation",
170 |         "list_sources",
171 |         "extract_urls",
172 |         "remove_documentation",
173 |         "list_queue",
174 |         "run_queue",
175 |         "clear_queue",
176 |         "add_documentation",
177 |         "add_repository",
178 |         "list_repositories",
179 |         "update_repository",
180 |         "remove_repository",
181 |         "watch_repository",
182 |         "get_indexing_status"
183 |       ]
184 |     }
185 |   }
186 | }
187 | ```
188 | 
189 | ### Claude Desktop Configuration
190 | 
191 | Add this to your `claude_desktop_config.json`:
192 | 
193 | ```json
194 | {
195 |   "mcpServers": {
196 |     "rag-docs": {
197 |       "command": "node",
198 |       "args": ["/path/to/your/mcp-ragdocs/build/index.js"],
199 |       "env": {
200 |         "EMBEDDING_PROVIDER": "ollama", // default
201 |         "EMBEDDING_MODEL": "nomic-embed-text", // optional
202 |         "OPENAI_API_KEY": "your-api-key-here", // required for fallback
203 |         "FALLBACK_PROVIDER": "openai", // recommended for reliability
204 |         "FALLBACK_MODEL": "nomic-embed-text", // optional
205 |         "QDRANT_URL": "http://localhost:6333"
206 |       },
207 |       "autoApprove": [
208 |         "search_documentation",
209 |         "list_sources",
210 |         "extract_urls",
211 |         "remove_documentation",
212 |         "list_queue",
213 |         "run_queue",
214 |         "clear_queue",
215 |         "add_documentation",
216 |         "add_repository",
217 |         "list_repositories",
218 |         "update_repository",
219 |         "remove_repository",
220 |         "watch_repository",
221 |         "get_indexing_status"
222 |       ]
223 |     }
224 |   }
225 | }
226 | ```
227 | 
228 | ### Default Configuration
229 | 
230 | The system uses Ollama by default for efficient local embedding generation. For optimal reliability:
231 | 
232 | 1. Install and run Ollama locally
233 | 2. Configure OpenAI as fallback (recommended):
234 |    ```json
235 |    {
236 |      // Ollama is used by default, no need to specify EMBEDDING_PROVIDER
237 |      "EMBEDDING_MODEL": "nomic-embed-text", // optional
238 |      "FALLBACK_PROVIDER": "openai",
239 |      "FALLBACK_MODEL": "text-embedding-3-small",
240 |      "OPENAI_API_KEY": "your-api-key-here"
241 |    }
242 |    ```
243 | 
244 | This configuration ensures:
245 | - Fast, local embedding generation with Ollama
246 | - Automatic fallback to OpenAI if Ollama fails
247 | - No external API calls unless necessary
248 | 
249 | Note: The system will automatically use the appropriate vector dimensions based on the provider:
250 | - Ollama (nomic-embed-text): 768 dimensions
251 | - OpenAI (text-embedding-3-small): 1536 dimensions
252 | 
253 | ## Documentation Management
254 | 
255 | ### Direct vs. Queue-Based Documentation Addition
256 | 
257 | The system provides two complementary approaches for adding documentation:
258 | 
259 | 1. **Direct Addition (`add_documentation` tool)**
260 |    - Immediately processes and indexes the documentation from a URL
261 |    - Best for adding individual documentation sources
262 |    - Provides immediate feedback on processing success/failure
263 |    - Example usage: `add_documentation` with `url: "https://example.com/docs"`
264 | 
265 | 2. **Queue-Based Processing**
266 |    - Add URLs to a processing queue (`extract_urls` with `add_to_queue: true`)
267 |    - Process multiple URLs in batch later (`run_queue`)
268 |    - Better for large-scale documentation ingestion
269 |    - Allows for scheduled processing of many documentation sources
270 |    - Provides resilience through the queue system
271 | 
272 | Choose the approach that best fits your documentation management needs. For small numbers of important documents, direct addition provides immediate results. For large documentation sets or recursive crawling, the queue-based approach offers better scalability.
273 | 
274 | ### Local Repository Indexing
275 | 
276 | The system supports indexing local code repositories, making their content searchable alongside web documentation:
277 | 
278 | 1. **Repository Configuration**
279 |    - Define which files to include/exclude using glob patterns
280 |    - Configure chunking strategies per file type
281 |    - Set up automatic change detection with watch mode
282 | 
283 | 2. **File Processing**
284 |    - Files are processed based on their type and language
285 |    - Code is chunked intelligently to preserve context
286 |    - Metadata like file path and language are preserved
287 | 
288 | 3. **Asynchronous Processing**
289 |    - Large repositories are processed asynchronously to avoid MCP timeouts
290 |    - Indexing continues in the background after the initial response
291 |    - Progress can be monitored using the `get_indexing_status` tool
292 |    - Smaller batch sizes (50 chunks per batch) improve responsiveness
293 | 
294 | 4. **Change Detection**
295 |    - Repositories can be watched for changes
296 |    - Modified files are automatically re-indexed
297 |    - Deleted files are removed from the index
298 | 
299 | Example usage:
300 | ```
301 | add_repository with {
302 |   "path": "/path/to/your/repo",
303 |   "name": "my-project",
304 |   "include": ["**/*.js", "**/*.ts", "**/*.md"],
305 |   "exclude": ["**/node_modules/**", "**/dist/**"],
306 |   "watchMode": true
307 | }
308 | ```
309 | 
310 | After starting the indexing process, you can check its status:
311 | ```
312 | get_indexing_status with {
313 |   "name": "my-project"
314 | }
315 | ```
316 | 
317 | This will return detailed information about the indexing progress:
318 | ```
319 | Repository: my-project
320 | Status: 🔄 Processing
321 | Progress: 45%
322 | Started: 5/11/2025, 2:45:30 PM
323 | Duration: 3m 15s
324 | Files: 120 processed, 15 skipped (of 250)
325 | Chunks: 1500 indexed (of 3300)
326 | Batch: 15 of 33
327 | ```
328 | 
329 | ### Repository Configuration File
330 | 
331 | The system supports a `repositories.json` configuration file that allows you to define repositories to be automatically indexed at startup:
332 | 
333 | ```json
334 | {
335 |   "repositories": [
336 |     {
337 |       "path": "/path/to/your/repo",
338 | ```
339 | 
340 | The configuration file is automatically updated when repositories are added, updated, or removed using the repository management tools. You can also manually edit the file to configure repositories before starting the server. The paths within the configuration file, such as the `path` for each repository and the implicit location of `repositories.json` itself, are resolved relative to the project root directory where the server is executed.
341 | 
342 | **Configuration Options:**
343 | 
344 | - `repositories`: Array of repository configurations
345 |   - `path`: Absolute path to the repository directory
346 |       "name": "my-project",
347 |       "include": ["**/*.js", "**/*.ts", "**/*.md"],
348 |       "exclude": ["**/node_modules/**", "**/.git/**"],
349 |       "watchMode": true,
350 |       "watchInterval": 60000,
351 |       "chunkSize": 1000,
352 |       "fileTypeConfig": {
353 |         ".js": { "include": true, "chunkStrategy": "semantic" },
354 |         ".ts": { "include": true, "chunkStrategy": "semantic" },
355 |         ".md": { "include": true, "chunkStrategy": "semantic" }
356 |       }
357 |     }
358 |   ],
359 |   "autoWatch": true
360 | }
361 | ```
362 | 
363 | The configuration file is automatically updated when repositories are added, updated, or removed using the repository management tools. You can also manually edit the file to configure repositories before starting the server.
364 | 
365 | **Configuration Options:**
366 | 
367 | - `repositories`: Array of repository configurations
368 |   - `path`: Absolute path to the repository directory
369 |   - `name`: Unique name for the repository
370 |   - `include`: Array of glob patterns to include
371 |   - `exclude`: Array of glob patterns to exclude
372 |   - `watchMode`: Whether to watch for changes
373 |   - `watchInterval`: Polling interval in milliseconds
374 |   - `chunkSize`: Default chunk size for files
375 |   - `fileTypeConfig`: Configuration for specific file types
376 |     - `include`: Whether to include this file type
377 |     - `chunkStrategy`: Chunking strategy ("semantic", "line", or "character")
378 |     - `chunkSize`: Optional override for chunk size
379 | 
380 | - `autoWatch`: Whether to automatically start watching repositories with `watchMode: true` at startup
381 | 
382 | ## Acknowledgments
383 | 
384 | This project is a fork of [qpd-v/mcp-ragdocs](https://github.com/qpd-v/mcp-ragdocs), originally developed by qpd-v. The original project provided the foundation for this implementation.
385 | 
386 | Special thanks to the original creator, qpd-v, for their innovative work on the initial version of this MCP server. This fork has been enhanced with additional features and improvements by Rahul Retnan.
387 | 
388 | ## Troubleshooting
389 | 
390 | ### Server Not Starting (Port Conflict)
391 | 
392 | If the MCP server fails to start due to a port conflict, follow these steps:
393 | 
394 | 1. Identify and kill the process using port 3030:
395 | 
396 | ```bash
397 | npx kill-port 3030
398 | ```
399 | 
400 | 2. Restart the MCP server
401 | 
402 | 3. If the issue persists, check for other processes using the port:
403 | 
404 | ```bash
405 | lsof -i :3030
406 | ```
407 | 
408 | 4. You can also change the default port in the configuration if needed
409 | 
410 | ### Missing Tools in Claude Desktop
411 | 
412 | If certain tools (like `add_documentation`) are not appearing in Claude Desktop:
413 | 
414 | 1. Verify that the tool is properly registered in the server's `handler-registry.ts` file
415 | 2. Make sure the tool is included in the `ListToolsRequestSchema` handler response
416 | 3. Check that your Claude Desktop configuration includes the tool in the `autoApprove` array
417 | 4. Restart the Claude Desktop application and the MCP server
418 | 5. Check the server logs for any errors related to tool registration
419 | 
420 | The most common cause of missing tools is that they are registered as handlers but not included in the `tools` array returned by the `ListToolsRequestSchema` handler.
421 | 
422 | ### Timeout Issues with Large Repositories
423 | 
424 | If you encounter timeout errors when indexing large repositories:
425 | 
426 | 1. The system now uses asynchronous processing to avoid MCP timeouts
427 | 2. When adding a repository with `add_repository`, the indexing will continue in the background
428 | 3. Use the `get_indexing_status` tool to monitor progress
429 | 4. If you still experience issues, try these solutions:
430 |    - Reduce the scope of indexing with more specific include/exclude patterns
431 |    - Break up very large repositories into smaller logical units
432 |    - Increase the batch size in the code if your system has more resources available
433 |    - Check system resources (memory, CPU) during indexing to identify bottlenecks
434 | 


--------------------------------------------------------------------------------
/src/handler-registry.ts:
--------------------------------------------------------------------------------
  1 | import {
  2 |   CallToolRequestSchema,
  3 |   ErrorCode,
  4 |   ListToolsRequestSchema,
  5 |   McpError,
  6 |   ListPromptsRequestSchema,
  7 |   ListResourcesRequestSchema,
  8 | } from '@modelcontextprotocol/sdk/types.js';
  9 | import { Server } from '@modelcontextprotocol/sdk/server/index.js';
 10 | import { ApiClient } from './api-client.js';
 11 | import { ToolDefinition } from './types.js';
 12 | import {
 13 |   AddDocumentationHandler,
 14 |   SearchDocumentationHandler,
 15 |   ListSourcesHandler,
 16 |   RemoveDocumentationHandler,
 17 |   ExtractUrlsHandler,
 18 |   ListQueueHandler,
 19 |   RunQueueHandler,
 20 |   ClearQueueHandler,
 21 |   PromptsListHandler,
 22 |   ResourcesListHandler,
 23 |   LocalRepositoryHandler,
 24 |   ListRepositoriesHandler,
 25 |   RemoveRepositoryHandler,
 26 |   UpdateRepositoryHandler,
 27 |   WatchRepositoryHandler,
 28 |   GetIndexingStatusHandler,
 29 | } from './handlers/index.js';
 30 | 
 31 | const COLLECTION_NAME = 'documentation';
 32 | 
 33 | export class HandlerRegistry {
 34 |   private server: Server;
 35 |   private apiClient: ApiClient;
 36 |   private handlers: Map<string, any>;
 37 | 
 38 |   constructor(server: Server, apiClient: ApiClient) {
 39 |     this.server = server;
 40 |     this.apiClient = apiClient;
 41 |     this.handlers = new Map();
 42 |     this.setupHandlers();
 43 |     this.registerHandlers();
 44 |   }
 45 | 
 46 |   private setupHandlers() {
 47 |     // Web documentation handlers
 48 |     this.handlers.set('add_documentation', new AddDocumentationHandler(this.server, this.apiClient));
 49 |     this.handlers.set('search_documentation', new SearchDocumentationHandler(this.server, this.apiClient));
 50 |     this.handlers.set('list_sources', new ListSourcesHandler(this.server, this.apiClient));
 51 |     this.handlers.set('remove_documentation', new RemoveDocumentationHandler(this.server, this.apiClient));
 52 |     this.handlers.set('extract_urls', new ExtractUrlsHandler(this.server, this.apiClient));
 53 |     this.handlers.set('list_queue', new ListQueueHandler(this.server, this.apiClient));
 54 |     this.handlers.set('run_queue', new RunQueueHandler(this.server, this.apiClient));
 55 |     this.handlers.set('clear_queue', new ClearQueueHandler(this.server, this.apiClient));
 56 | 
 57 |     // Repository handlers
 58 |     this.handlers.set('add_repository', new LocalRepositoryHandler(this.server, this.apiClient));
 59 |     this.handlers.set('list_repositories', new ListRepositoriesHandler(this.server, this.apiClient));
 60 |     this.handlers.set('remove_repository', new RemoveRepositoryHandler(this.server, this.apiClient));
 61 |     this.handlers.set('update_repository', new UpdateRepositoryHandler(this.server, this.apiClient));
 62 |     this.handlers.set('watch_repository', new WatchRepositoryHandler(this.server, this.apiClient));
 63 |     this.handlers.set('get_indexing_status', new GetIndexingStatusHandler(this.server, this.apiClient));
 64 | 
 65 |     // Setup prompts and resources handlers
 66 |     this.handlers.set('prompts/list', new PromptsListHandler(this.server, this.apiClient));
 67 |     this.handlers.set('resources/list', new ResourcesListHandler(this.server, this.apiClient));
 68 |   }
 69 | 
 70 |   private registerHandlers() {
 71 |     this.server.setRequestHandler(ListToolsRequestSchema, async () => ({
 72 |       tools: [
 73 |         {
 74 |           name: 'search_documentation',
 75 |           description: 'Search through stored documentation using natural language queries. Use this tool to find relevant information across all stored documentation sources. Returns matching excerpts with context, ranked by relevance. Useful for finding specific information, code examples, or related documentation.',
 76 |           inputSchema: {
 77 |             type: 'object',
 78 |             properties: {
 79 |               query: {
 80 |                 type: 'string',
 81 |                 description: 'The text to search for in the documentation. Can be a natural language query, specific terms, or code snippets.',
 82 |               },
 83 |               limit: {
 84 |                 type: 'number',
 85 |                 description: 'Maximum number of results to return (1-20). Higher limits provide more comprehensive results but may take longer to process. Default is 5.',
 86 |                 default: 5,
 87 |               },
 88 |             },
 89 |             required: ['query'],
 90 |           },
 91 |         } as ToolDefinition,
 92 |         {
 93 |           name: 'add_documentation',
 94 |           description: 'Add new documentation to the system by providing a URL. The tool will fetch the content, process it into chunks, and store it in the vector database for future searches. Supports various web page formats and automatically extracts relevant content.',
 95 |           inputSchema: {
 96 |             type: 'object',
 97 |             properties: {
 98 |               url: {
 99 |                 type: 'string',
100 |                 description: 'The complete URL of the documentation to add (must include protocol, e.g., https://). The page must be publicly accessible.',
101 |               },
102 |             },
103 |             required: ['url'],
104 |           },
105 |         } as ToolDefinition,
106 |         {
107 |           name: 'list_sources',
108 |           description: 'List all documentation sources currently stored in the system. Returns a comprehensive list of all indexed documentation including source URLs, titles, and last update times. Use this to understand what documentation is available for searching or to verify if specific sources have been indexed.',
109 |           inputSchema: {
110 |             type: 'object',
111 |             properties: {},
112 |           },
113 |         } as ToolDefinition,
114 |         {
115 |           name: 'extract_urls',
116 |           description: 'Extract and analyze all URLs from a given web page. This tool crawls the specified webpage, identifies all hyperlinks, and optionally adds them to the processing queue. Useful for discovering related documentation pages, API references, or building a documentation graph. Handles various URL formats and validates links before extraction.',
117 |           inputSchema: {
118 |             type: 'object',
119 |             properties: {
120 |               url: {
121 |                 type: 'string',
122 |                 description: 'The complete URL of the webpage to analyze (must include protocol, e.g., https://). The page must be publicly accessible.',
123 |               },
124 |               add_to_queue: {
125 |                 type: 'boolean',
126 |                 description: 'If true, automatically add extracted URLs to the processing queue for later indexing. This enables recursive documentation discovery. Use with caution on large sites to avoid excessive queuing.',
127 |                 default: false,
128 |               },
129 |             },
130 |             required: ['url'],
131 |           },
132 |         } as ToolDefinition,
133 |         {
134 |           name: 'remove_documentation',
135 |           description: 'Remove specific documentation sources from the system by their URLs. Use this tool to clean up outdated documentation, remove incorrect sources, or manage the documentation collection. The removal is permanent and will affect future search results. Supports removing multiple URLs in a single operation.',
136 |           inputSchema: {
137 |             type: 'object',
138 |             properties: {
139 |               urls: {
140 |                 type: 'array',
141 |                 items: {
142 |                   type: 'string',
143 |                   description: 'The complete URL of the documentation source to remove. Must exactly match the URL used when the documentation was added.',
144 |                 },
145 |                 description: 'Array of URLs to remove from the database',
146 |               },
147 |             },
148 |             required: ['urls'],
149 |           },
150 |         } as ToolDefinition,
151 |         {
152 |           name: 'list_queue',
153 |           description: 'List all URLs currently waiting in the documentation processing queue. Shows pending documentation sources that will be processed when run_queue is called. Use this to monitor queue status, verify URLs were added correctly, or check processing backlog. Returns URLs in the order they will be processed.',
154 |           inputSchema: {
155 |             type: 'object',
156 |             properties: {},
157 |           },
158 |         } as ToolDefinition,
159 |         {
160 |           name: 'run_queue',
161 |           description: 'Process and index all URLs currently in the documentation queue. Each URL is processed sequentially, with proper error handling and retry logic. Progress updates are provided as processing occurs. Use this after adding new URLs to ensure all documentation is indexed and searchable. Long-running operations will process until the queue is empty or an unrecoverable error occurs.',
162 |           inputSchema: {
163 |             type: 'object',
164 |             properties: {},
165 |           },
166 |         } as ToolDefinition,
167 |         {
168 |           name: 'clear_queue',
169 |           description: 'Remove all pending URLs from the documentation processing queue. Use this to reset the queue when you want to start fresh, remove unwanted URLs, or cancel pending processing. This operation is immediate and permanent - URLs will need to be re-added if you want to process them later. Returns the number of URLs that were cleared from the queue.',
170 |           inputSchema: {
171 |             type: 'object',
172 |             properties: {},
173 |           },
174 |         } as ToolDefinition,
175 |         {
176 |           name: 'add_repository',
177 |           description: 'Add a local code repository to the documentation system. This tool indexes all files in the repository according to the specified configuration, processes them into searchable chunks, and stores them in the vector database for future searches.',
178 |           inputSchema: {
179 |             type: 'object',
180 |             properties: {
181 |               path: {
182 |                 type: 'string',
183 |                 description: 'The absolute path to the repository directory on the local file system.',
184 |               },
185 |               name: {
186 |                 type: 'string',
187 |                 description: 'A user-friendly name for the repository. If not provided, the directory name will be used.',
188 |               },
189 |               include: {
190 |                 type: 'array',
191 |                 items: {
192 |                   type: 'string',
193 |                 },
194 |                 description: 'Array of glob patterns to include. Default is ["**/*"] (all files).',
195 |               },
196 |               exclude: {
197 |                 type: 'array',
198 |                 items: {
199 |                   type: 'string',
200 |                 },
201 |                 description: 'Array of glob patterns to exclude. Default excludes common non-source directories and files.',
202 |               },
203 |               watchMode: {
204 |                 type: 'boolean',
205 |                 description: 'Whether to watch the repository for changes and automatically update the index. Default is false.',
206 |               },
207 |               watchInterval: {
208 |                 type: 'number',
209 |                 description: 'Interval in milliseconds to check for changes when watch mode is enabled. Default is 60000 (1 minute).',
210 |               },
211 |               chunkSize: {
212 |                 type: 'number',
213 |                 description: 'Default maximum size of text chunks in characters. Default is 1000.',
214 |               },
215 |               fileTypeConfig: {
216 |                 type: 'object',
217 |                 description: 'Configuration for specific file types. Keys are file extensions, values are objects with include, chunkSize, and chunkStrategy properties.',
218 |               },
219 |             },
220 |             required: ['path'],
221 |           },
222 |         } as ToolDefinition,
223 |         {
224 |           name: 'list_repositories',
225 |           description: 'List all local repositories currently indexed in the system. Returns details about each repository including path, include/exclude patterns, and watch mode status.',
226 |           inputSchema: {
227 |             type: 'object',
228 |             properties: {},
229 |           },
230 |         } as ToolDefinition,
231 |         {
232 |           name: 'remove_repository',
233 |           description: 'Remove a repository from the system by its name. This removes both the repository configuration and all indexed documents from the vector database.',
234 |           inputSchema: {
235 |             type: 'object',
236 |             properties: {
237 |               name: {
238 |                 type: 'string',
239 |                 description: 'The name of the repository to remove.',
240 |               },
241 |             },
242 |             required: ['name'],
243 |           },
244 |         } as ToolDefinition,
245 |         {
246 |           name: 'update_repository',
247 |           description: 'Update an existing repository index. This re-processes all files in the repository according to the current configuration and updates the vector database.',
248 |           inputSchema: {
249 |             type: 'object',
250 |             properties: {
251 |               name: {
252 |                 type: 'string',
253 |                 description: 'The name of the repository to update.',
254 |               },
255 |               include: {
256 |                 type: 'array',
257 |                 items: {
258 |                   type: 'string',
259 |                 },
260 |                 description: 'Array of glob patterns to include. If provided, replaces the existing include patterns.',
261 |               },
262 |               exclude: {
263 |                 type: 'array',
264 |                 items: {
265 |                   type: 'string',
266 |                 },
267 |                 description: 'Array of glob patterns to exclude. If provided, replaces the existing exclude patterns.',
268 |               },
269 |               watchMode: {
270 |                 type: 'boolean',
271 |                 description: 'Whether to watch the repository for changes. If provided, updates the existing watch mode setting.',
272 |               },
273 |               watchInterval: {
274 |                 type: 'number',
275 |                 description: 'Interval in milliseconds to check for changes when watch mode is enabled.',
276 |               },
277 |               chunkSize: {
278 |                 type: 'number',
279 |                 description: 'Default maximum size of text chunks in characters.',
280 |               },
281 |               fileTypeConfig: {
282 |                 type: 'object',
283 |                 description: 'Configuration for specific file types. If provided, merges with the existing file type configuration.',
284 |               },
285 |             },
286 |             required: ['name'],
287 |           },
288 |         } as ToolDefinition,
289 |         {
290 |           name: 'watch_repository',
291 |           description: 'Start or stop watching a repository for changes. When watching is enabled, the system automatically detects file changes and updates the index accordingly.',
292 |           inputSchema: {
293 |             type: 'object',
294 |             properties: {
295 |               name: {
296 |                 type: 'string',
297 |                 description: 'The name of the repository to watch or unwatch.',
298 |               },
299 |               action: {
300 |                 type: 'string',
301 |                 enum: ['start', 'stop'],
302 |                 description: 'The action to perform: "start" to begin watching, "stop" to end watching.',
303 |               },
304 |             },
305 |             required: ['name', 'action'],
306 |           },
307 |         } as ToolDefinition,
308 |         {
309 |           name: 'get_indexing_status',
310 |           description: 'Get the current status of repository indexing operations. This tool provides detailed information about ongoing or completed indexing processes, including progress percentage, file counts, and timing information.',
311 |           inputSchema: {
312 |             type: 'object',
313 |             properties: {
314 |               name: {
315 |                 type: 'string',
316 |                 description: 'Optional. The name of the repository to get status for. If not provided, returns status for all repositories.',
317 |               },
318 |             },
319 |             required: [],
320 |           },
321 |         } as ToolDefinition,
322 |       ],
323 |     }));
324 | 
325 |     // Register the prompts/list handler
326 |     this.server.setRequestHandler(ListPromptsRequestSchema, async (request) => {
327 |       const handler = this.handlers.get('prompts/list');
328 |       if (!handler) {
329 |         throw new McpError(
330 |           ErrorCode.MethodNotFound,
331 |           'Method prompts/list not found'
332 |         );
333 |       }
334 | 
335 |       // Call the handler but ignore the response
336 |       await handler.handle(request.params);
337 |       // Return an empty list of prompts
338 |       return { prompts: [] };
339 |     });
340 | 
341 |     // Register the resources/list handler
342 |     this.server.setRequestHandler(ListResourcesRequestSchema, async (request) => {
343 |       const handler = this.handlers.get('resources/list');
344 |       if (!handler) {
345 |         throw new McpError(
346 |           ErrorCode.MethodNotFound,
347 |           'Method resources/list not found'
348 |         );
349 |       }
350 | 
351 |       // Call the handler but ignore the response
352 |       await handler.handle(request.params);
353 |       // Return an empty list of resources
354 |       return { resources: [] };
355 |     });
356 | 
357 |     this.server.setRequestHandler(CallToolRequestSchema, async (request) => {
358 |       await this.apiClient.initCollection(COLLECTION_NAME);
359 | 
360 |       const handler = this.handlers.get(request.params.name);
361 |       if (!handler) {
362 |         throw new McpError(
363 |           ErrorCode.MethodNotFound,
364 |           `Unknown tool: ${request.params.name}`
365 |         );
366 |       }
367 | 
368 |       // Extract progressToken or use requestId as fallback
369 |       const typedRequest = request as any; // Cast to any to access id
370 |       const callContext = {
371 |         progressToken: typedRequest.params._meta?.progressToken,
372 |         requestId: typedRequest.id
373 |       };
374 | 
375 |       const response = await handler.handle(typedRequest.params.arguments, callContext);
376 |       return {
377 |         _meta: {}, // Ensure _meta is always present in the response
378 |         ...response
379 |       };
380 |     });
381 |   }
382 | }
383 | 


--------------------------------------------------------------------------------
/src/handlers/local-repository.ts:
--------------------------------------------------------------------------------
  1 | import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
  2 | import { BaseHandler } from './base-handler.js';
  3 | import { DocumentChunk, McpToolResponse, RepositoryConfig, IndexingStatus } from '../types.js';
  4 | import fs from 'fs/promises';
  5 | import path from 'path';
  6 | import { fileURLToPath } from 'url';
  7 | import crypto from 'crypto';
  8 | import { glob } from 'glob';
  9 | import { fileTypeFromFile } from 'file-type';
 10 | import { detectLanguage } from '../utils/language-detection.js';
 11 | import { RepositoryConfigLoader } from '../utils/repository-config-loader.js';
 12 | import { IndexingStatusManager } from '../utils/indexing-status-manager.js';
 13 | 
 14 | const COLLECTION_NAME = 'documentation';
 15 | const __dirname = path.dirname(fileURLToPath(import.meta.url));
 16 | const REPO_CONFIG_DIR = path.join(__dirname, '..', 'repo-configs');
 17 | const DEFAULT_CHUNK_SIZE = 1000;
 18 | 
 19 | export class LocalRepositoryHandler extends BaseHandler {
 20 |   private activeProgressToken: string | number | undefined;
 21 |   private statusManager: IndexingStatusManager;
 22 |   // Track active indexing processes
 23 |   private static activeIndexingProcesses: Map<string, boolean> = new Map();
 24 |   // Smaller batch size to reduce processing time per batch
 25 |   private static BATCH_SIZE = 50;
 26 | 
 27 |   constructor(server: any, apiClient: any) {
 28 |     super(server, apiClient);
 29 |     this.statusManager = new IndexingStatusManager();
 30 |   }
 31 | 
 32 |   async handle(args: any, callContext?: { progressToken?: string | number, requestId: string | number }): Promise<McpToolResponse> {
 33 |     this.activeProgressToken = callContext?.progressToken || callContext?.requestId;
 34 | 
 35 |     // Validate required parameters
 36 |     if (!args.path || typeof args.path !== 'string') {
 37 |       throw new McpError(ErrorCode.InvalidParams, 'Repository path is required');
 38 |     }
 39 | 
 40 |     // Normalize the repository path
 41 |     const repoPath = path.resolve(args.path);
 42 | 
 43 |     // Check if the repository path exists
 44 |     try {
 45 |       const stats = await fs.stat(repoPath);
 46 |       if (!stats.isDirectory()) {
 47 |         throw new McpError(ErrorCode.InvalidParams, `Path is not a directory: ${repoPath}`);
 48 |       }
 49 |     } catch (error) {
 50 |       throw new McpError(ErrorCode.InvalidParams, `Invalid repository path: ${repoPath}`);
 51 |     }
 52 | 
 53 |     // Create repository configuration
 54 |     const config: RepositoryConfig = {
 55 |       path: repoPath,
 56 |       name: args.name || path.basename(repoPath),
 57 |       include: args.include || ['**/*'],
 58 |       exclude: args.exclude || [
 59 |         '**/node_modules/**',
 60 |         '**/.git/**',
 61 |         '**/build/**',
 62 |         '**/dist/**',
 63 |         '**/*.min.js',
 64 |         '**/*.map',
 65 |         '**/package-lock.json',
 66 |         '**/yarn.lock'
 67 |       ],
 68 |       watchMode: args.watchMode || false,
 69 |       watchInterval: args.watchInterval || 60000, // Default: 1 minute
 70 |       chunkSize: args.chunkSize || DEFAULT_CHUNK_SIZE,
 71 |       fileTypeConfig: args.fileTypeConfig || {
 72 |         // Default file type configurations
 73 |         '.js': { include: true, chunkStrategy: 'semantic' },
 74 |         '.ts': { include: true, chunkStrategy: 'semantic' },
 75 |         '.jsx': { include: true, chunkStrategy: 'semantic' },
 76 |         '.tsx': { include: true, chunkStrategy: 'semantic' },
 77 |         '.py': { include: true, chunkStrategy: 'semantic' },
 78 |         '.java': { include: true, chunkStrategy: 'semantic' },
 79 |         '.md': { include: true, chunkStrategy: 'semantic' },
 80 |         '.txt': { include: true, chunkStrategy: 'line' },
 81 |         '.json': { include: true, chunkStrategy: 'semantic' },
 82 |         '.html': { include: true, chunkStrategy: 'semantic' },
 83 |         '.css': { include: true, chunkStrategy: 'semantic' },
 84 |         '.scss': { include: true, chunkStrategy: 'semantic' },
 85 |         '.xml': { include: true, chunkStrategy: 'semantic' },
 86 |         '.yaml': { include: true, chunkStrategy: 'semantic' },
 87 |         '.yml': { include: true, chunkStrategy: 'semantic' },
 88 |       }
 89 |     };
 90 | 
 91 |     try {
 92 |       // Check if indexing is already in progress for this repository
 93 |       if (LocalRepositoryHandler.activeIndexingProcesses.has(config.name)) {
 94 |         // Get current status
 95 |         const status = await this.statusManager.getStatus(config.name);
 96 |         if (status && status.status === 'processing') {
 97 |           return {
 98 |             content: [
 99 |               {
100 |                 type: 'text',
101 |                 text: `Repository indexing already in progress for ${config.name}.\n` +
102 |                       `Current progress: ${status.percentageComplete || 0}%\n` +
103 |                       `Files processed: ${status.processedFiles || 0} of ${status.totalFiles || 'unknown'}\n` +
104 |                       `Chunks indexed: ${status.indexedChunks || 0} of ${status.totalChunks || 'unknown'}\n` +
105 |                       `Started at: ${new Date(status.startTime).toLocaleString()}`
106 |               },
107 |             ],
108 |           };
109 |         }
110 |       }
111 | 
112 |       // Save the repository configuration
113 |       await this.saveRepositoryConfig(config);
114 | 
115 |       // Update the repositories.json configuration file
116 |       const configLoader = new RepositoryConfigLoader(this.server, this.apiClient);
117 |       await configLoader.addRepositoryToConfig(config);
118 |       console.info(`[${config.name}] Repository configuration saved and loaded.`);
119 |       if (this.activeProgressToken) {
120 |         (this.server as any).sendProgress(this.activeProgressToken, { message: "Repository configuration saved." });
121 |       }
122 | 
123 |       // Create initial status
124 |       await this.statusManager.createStatus(config.name);
125 | 
126 |       // Start the indexing process asynchronously
127 |       this.processRepositoryAsync(config, this.activeProgressToken);
128 | 
129 |       return {
130 |         content: [
131 |           {
132 |             type: 'text',
133 |             text: `Repository configuration saved for ${config.name} (${repoPath}).\n` +
134 |                   `Indexing has started in the background and will continue after this response.\n` +
135 |                   `You can check the status using the 'get_indexing_status' tool with parameter name="${config.name}".\n` +
136 |                   `Watch mode: ${config.watchMode ? 'enabled' : 'disabled'}`
137 |           },
138 |         ],
139 |       };
140 |     } catch (error) {
141 |       if (error instanceof McpError) {
142 |         throw error;
143 |       }
144 |       return {
145 |         content: [
146 |           {
147 |             type: 'text',
148 |             text: `Failed to index repository: ${error}`,
149 |           },
150 |         ],
151 |         isError: true,
152 |       };
153 |     }
154 |   }
155 | 
156 |   private async processRepository(config: RepositoryConfig): Promise<{
157 |     chunks: DocumentChunk[],
158 |     processedFiles: number,
159 |     skippedFiles: number
160 |   }> {
161 |     const chunks: DocumentChunk[] = [];
162 |     let processedFiles = 0;
163 |     let skippedFiles = 0;
164 |     let fileCounter = 0;
165 | 
166 |     // Get all files matching the include/exclude patterns
167 |     const files = await glob(config.include, {
168 |       cwd: config.path,
169 |       ignore: config.exclude,
170 |       absolute: true,
171 |       nodir: true,
172 |     });
173 |     const totalFiles = files.length;
174 | 
175 |     console.info(`[${config.name}] Found ${totalFiles} files to process based on include/exclude patterns.`);
176 |     if (this.activeProgressToken) {
177 |       (this.server as any).sendProgress(this.activeProgressToken, { message: `Found ${totalFiles} files to process.` });
178 |     }
179 | 
180 |     for (const file of files) {
181 |       fileCounter++;
182 |       try {
183 |         const relativePath = path.relative(config.path, file);
184 |         const extension = path.extname(file);
185 |         const fileTypeConfig = config.fileTypeConfig[extension];
186 | 
187 |         // Skip files that should be excluded based on file type config
188 |         if (fileTypeConfig && fileTypeConfig.include === false) {
189 |           skippedFiles++;
190 |           continue;
191 |         }
192 | 
193 |         // Read file content
194 |         const content = await fs.readFile(file, 'utf-8');
195 | 
196 |         // Skip empty files
197 |         if (!content.trim()) {
198 |           skippedFiles++;
199 |           continue;
200 |         }
201 | 
202 |         // Detect language for better processing
203 |         const language = detectLanguage(file, content);
204 | 
205 |         // Process the file content into chunks
206 |         const fileChunks = this.chunkFileContent(
207 |           content,
208 |           file,
209 |           relativePath,
210 |           config,
211 |           language,
212 |           fileTypeConfig?.chunkStrategy || 'line'
213 |         );
214 | 
215 |         chunks.push(...fileChunks);
216 |         processedFiles++;
217 |         if (fileCounter % 50 === 0 && fileCounter > 0 && this.activeProgressToken) {
218 |           const percentageComplete = Math.round((fileCounter / totalFiles) * 33); // File processing is ~1/3 of the job
219 |           (this.server as any).sendProgress(this.activeProgressToken, { message: `Processed ${fileCounter} of ${totalFiles} files...`, percentageComplete });
220 |           console.info(`[${config.name}] Processed ${fileCounter} of ${totalFiles} files... (${processedFiles} successful, ${skippedFiles} skipped/errored)`);
221 |         }
222 |       } catch (error) {
223 |         console.error(`[${config.name}] Error processing file ${file}: ${error instanceof Error ? error.message : String(error)}`);
224 |         skippedFiles++;
225 |       }
226 |     }
227 |     console.info(`[${config.name}] Completed file iteration. Processed: ${processedFiles}, Skipped/Errored: ${skippedFiles}.`);
228 | 
229 |     return { chunks, processedFiles, skippedFiles };
230 |   }
231 | 
232 |   private chunkFileContent(
233 |     content: string,
234 |     filePath: string,
235 |     relativePath: string,
236 |     config: RepositoryConfig,
237 |     language: string,
238 |     chunkStrategy: string
239 |   ): DocumentChunk[] {
240 |     const chunks: DocumentChunk[] = [];
241 |     const timestamp = new Date().toISOString();
242 |     const fileUrl = `file://${filePath}`;
243 |     const title = `${config.name}/${relativePath}`;
244 | 
245 |     // Different chunking strategies based on file type
246 |     let textChunks: string[] = [];
247 | 
248 |     switch (chunkStrategy) {
249 |       case 'semantic':
250 |         // For semantic chunking, we'd ideally use a more sophisticated approach
251 |         // For now, we'll use a simple paragraph-based approach
252 |         textChunks = this.chunkByParagraphs(content, config.chunkSize);
253 |         break;
254 |       case 'line':
255 |         // Chunk by lines, respecting max chunk size
256 |         textChunks = this.chunkByLines(content, config.chunkSize);
257 |         break;
258 |       default:
259 |         // Default to simple text chunking
260 |         textChunks = this.chunkText(content, config.chunkSize);
261 |     }
262 | 
263 |     // Create document chunks with metadata
264 |     chunks.push(...textChunks.map((text, index) => ({
265 |       text,
266 |       url: fileUrl,
267 |       title,
268 |       timestamp,
269 |       filePath: relativePath,
270 |       language,
271 |       chunkIndex: index,
272 |       totalChunks: textChunks.length,
273 |     })));
274 | 
275 |     return chunks;
276 |   }
277 | 
278 |   private chunkText(text: string, maxChunkSize: number): string[] {
279 |     const words = text.split(/\s+/);
280 |     const chunks: string[] = [];
281 |     let currentChunk: string[] = [];
282 | 
283 |     for (const word of words) {
284 |       currentChunk.push(word);
285 |       const currentLength = currentChunk.join(' ').length;
286 | 
287 |       if (currentLength >= maxChunkSize) {
288 |         chunks.push(currentChunk.join(' '));
289 |         currentChunk = [];
290 |       }
291 |     }
292 | 
293 |     if (currentChunk.length > 0) {
294 |       chunks.push(currentChunk.join(' '));
295 |     }
296 | 
297 |     return chunks;
298 |   }
299 | 
300 |   private chunkByLines(text: string, maxChunkSize: number): string[] {
301 |     const lines = text.split(/\r?\n/);
302 |     const chunks: string[] = [];
303 |     let currentChunk: string[] = [];
304 |     let currentLength = 0;
305 | 
306 |     for (const line of lines) {
307 |       const lineLength = line.length + 1; // +1 for the newline
308 | 
309 |       if (currentLength + lineLength > maxChunkSize && currentChunk.length > 0) {
310 |         chunks.push(currentChunk.join('\n'));
311 |         currentChunk = [];
312 |         currentLength = 0;
313 |       }
314 | 
315 |       currentChunk.push(line);
316 |       currentLength += lineLength;
317 |     }
318 | 
319 |     if (currentChunk.length > 0) {
320 |       chunks.push(currentChunk.join('\n'));
321 |     }
322 | 
323 |     return chunks;
324 |   }
325 | 
326 |   private chunkByParagraphs(text: string, maxChunkSize: number): string[] {
327 |     // Split by double newlines (paragraphs)
328 |     const paragraphs = text.split(/\r?\n\r?\n/);
329 |     const chunks: string[] = [];
330 |     let currentChunk: string[] = [];
331 |     let currentLength = 0;
332 | 
333 |     for (const paragraph of paragraphs) {
334 |       const paragraphLength = paragraph.length + 2; // +2 for the double newline
335 | 
336 |       if (currentLength + paragraphLength > maxChunkSize && currentChunk.length > 0) {
337 |         chunks.push(currentChunk.join('\n\n'));
338 |         currentChunk = [];
339 |         currentLength = 0;
340 |       }
341 | 
342 |       currentChunk.push(paragraph);
343 |       currentLength += paragraphLength;
344 |     }
345 | 
346 |     if (currentChunk.length > 0) {
347 |       chunks.push(currentChunk.join('\n\n'));
348 |     }
349 | 
350 |     return chunks;
351 |   }
352 | 
353 |   private async saveRepositoryConfig(config: RepositoryConfig): Promise<void> {
354 |     // Ensure the config directory exists
355 |     try {
356 |       await fs.mkdir(REPO_CONFIG_DIR, { recursive: true });
357 |     } catch (error) {
358 |       console.error('Error creating repository config directory:', error);
359 |       throw new McpError(ErrorCode.InternalError, 'Failed to create repository config directory');
360 |     }
361 | 
362 |     // Save the config file
363 |     const configPath = path.join(REPO_CONFIG_DIR, `${config.name}.json`);
364 |     await fs.writeFile(configPath, JSON.stringify(config, null, 2), 'utf-8');
365 |   }
366 | 
367 |   private generatePointId(): string {
368 |     return crypto.randomBytes(16).toString('hex');
369 |   }
370 | 
371 |   /**
372 |    * Process repository asynchronously to avoid MCP timeout
373 |    */
374 |   private async processRepositoryAsync(config: RepositoryConfig, progressToken?: string | number): Promise<void> {
375 |     try {
376 |       // Mark this repository as being processed
377 |       LocalRepositoryHandler.activeIndexingProcesses.set(config.name, true);
378 | 
379 |       // Update status to processing
380 |       await this.statusManager.updateStatus({
381 |         repositoryName: config.name,
382 |         status: 'processing'
383 |       });
384 | 
385 |       console.info(`[${config.name}] Starting to process repository files asynchronously...`);
386 | 
387 |       // Process the repository files
388 |       const { chunks, processedFiles, skippedFiles } = await this.processRepository(config);
389 | 
390 |       // Update status with file processing results
391 |       await this.statusManager.updateStatus({
392 |         repositoryName: config.name,
393 |         totalFiles: processedFiles + skippedFiles,
394 |         processedFiles,
395 |         skippedFiles,
396 |         totalChunks: chunks.length,
397 |         percentageComplete: 33
398 |       });
399 | 
400 |       console.info(`[${config.name}] Finished processing repository files. Found ${chunks.length} chunks from ${processedFiles} files (${skippedFiles} skipped).`);
401 | 
402 |       // Batch process chunks with smaller batch size for better responsiveness
403 |       const batchSize = LocalRepositoryHandler.BATCH_SIZE;
404 |       let indexedChunks = 0;
405 |       const totalChunks = chunks.length;
406 |       const totalBatches = Math.ceil(totalChunks / batchSize);
407 | 
408 |       console.info(`[${config.name}] Starting to generate embeddings and index ${totalChunks} chunks in ${totalBatches} batches...`);
409 | 
410 |       const COLLECTION_NAME = 'documentation';
411 | 
412 |       for (let i = 0; i < totalChunks; i += batchSize) {
413 |         const batchChunks = chunks.slice(i, i + batchSize);
414 |         const currentBatch = Math.floor(i / batchSize) + 1;
415 | 
416 |         // Update status before processing batch
417 |         await this.statusManager.updateStatus({
418 |           repositoryName: config.name,
419 |           currentBatch,
420 |           totalBatches,
421 |           indexedChunks,
422 |           percentageComplete: 33 + Math.round((i / totalChunks) * 66)
423 |         });
424 | 
425 |         console.info(`[${config.name}] Processing batch ${currentBatch} of ${totalBatches}...`);
426 | 
427 |         try {
428 |           const embeddingResults = await Promise.allSettled(
429 |             batchChunks.map(async (chunk) => {
430 |               try {
431 |                 const embedding = await this.apiClient.getEmbeddings(chunk.text);
432 |                 return {
433 |                   id: this.generatePointId(),
434 |                   vector: embedding,
435 |                   payload: {
436 |                     ...chunk,
437 |                     _type: 'DocumentChunk' as const,
438 |                     repository: config.name,
439 |                     isRepositoryFile: true,
440 |                   } as Record<string, unknown>,
441 |                 };
442 |               } catch (embeddingError) {
443 |                 console.error(`[${config.name}] Failed to generate embedding for chunk from ${chunk.filePath || chunk.url}: ${embeddingError instanceof Error ? embeddingError.message : String(embeddingError)}`);
444 |                 throw embeddingError; // Re-throw to be caught by Promise.allSettled
445 |               }
446 |             })
447 |           );
448 | 
449 |           const successfulPoints = embeddingResults
450 |             .filter(result => result.status === 'fulfilled')
451 |             .map(result => (result as PromiseFulfilledResult<any>).value);
452 | 
453 |           const failedEmbeddingsCount = embeddingResults.filter(result => result.status === 'rejected').length;
454 |           if (failedEmbeddingsCount > 0) {
455 |             console.warn(`[${config.name}] Failed to generate embeddings for ${failedEmbeddingsCount} of ${batchChunks.length} chunks in batch ${currentBatch}.`);
456 |           }
457 | 
458 |           if (successfulPoints.length > 0) {
459 |             try {
460 |               await this.apiClient.qdrantClient.upsert(COLLECTION_NAME, {
461 |                 wait: true,
462 |                 points: successfulPoints,
463 |               });
464 |               indexedChunks += successfulPoints.length;
465 |             } catch (upsertError) {
466 |               console.error(`[${config.name}] Failed to upsert batch ${currentBatch} of ${successfulPoints.length} points to Qdrant: ${upsertError instanceof Error ? upsertError.message : String(upsertError)}`);
467 |             }
468 |           }
469 | 
470 |           const percentageComplete = 33 + Math.round(((i + batchChunks.length) / totalChunks) * 66);
471 |           console.info(`[${config.name}] Processed batch ${currentBatch} of ${totalBatches}. Successfully indexed in this batch: ${successfulPoints.length}. Total indexed so far: ${indexedChunks} chunks.`);
472 | 
473 |           // Update status after processing batch
474 |           await this.statusManager.updateStatus({
475 |             repositoryName: config.name,
476 |             currentBatch,
477 |             totalBatches,
478 |             indexedChunks,
479 |             percentageComplete
480 |           });
481 |         } catch (batchError) {
482 |           console.error(`[${config.name}] Error processing batch ${currentBatch}:`, batchError);
483 |           // Continue with next batch despite errors
484 |         }
485 |       }
486 | 
487 |       // Mark indexing as completed
488 |       console.info(`[${config.name}] Finished generating embeddings and indexing. Total indexed: ${indexedChunks} of ${totalChunks} chunks.`);
489 | 
490 |       await this.statusManager.completeStatus(config.name, true, {
491 |         processedFiles,
492 |         skippedFiles,
493 |         totalChunks,
494 |         indexedChunks
495 |       });
496 | 
497 |       // If watch mode is enabled, start the watcher
498 |       if (config.watchMode) {
499 |         // This would be implemented in a separate class
500 |         // this.startRepositoryWatcher(config);
501 |       }
502 |     } catch (error) {
503 |       console.error(`[${config.name}] Error during async repository processing:`, error);
504 | 
505 |       // Update status to failed
506 |       await this.statusManager.completeStatus(
507 |         config.name,
508 |         false,
509 |         undefined,
510 |         error instanceof Error ? error.message : String(error)
511 |       );
512 |     } finally {
513 |       // Remove from active processes
514 |       LocalRepositoryHandler.activeIndexingProcesses.delete(config.name);
515 |     }
516 |   }
517 | }
518 | 


--------------------------------------------------------------------------------