├── .gitignore ├── README.md ├── package-lock.json ├── package.json ├── src ├── config │ └── index.ts ├── core │ ├── indexing │ │ ├── DocumentFetcher.ts │ │ ├── VectorStoreManager.ts │ │ └── types.ts │ └── query │ │ ├── AgentService.ts │ │ ├── DirectQueryService.ts │ │ ├── SessionStore.ts │ │ └── types.ts ├── main.ts ├── mcp │ ├── server.ts │ └── tools │ │ ├── agentQueryTool.ts │ │ ├── clearMemoryTool.ts │ │ ├── directQueryTool.ts │ │ └── index.ts ├── scripts │ └── buildIndex.ts └── utils │ ├── fileUtils.ts │ └── helpers.ts └── tsconfig.json /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | dist 3 | files/ 4 | .env 5 | *.log -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Vercel AI SDK Documentation MCP Agent 2 | 3 | A Model Context Protocol (MCP) server that provides AI-powered search and querying capabilities for the Vercel AI SDK documentation. This project enables developers to ask questions about the Vercel AI SDK and receive accurate, contextualized responses based on the official documentation. 4 | 5 | [![MCP Compatible](https://img.shields.io/badge/MCP-Compatible-green)](https://modelcontextprotocol.io) 6 | [![TypeScript](https://img.shields.io/badge/TypeScript-4.9+-blue.svg)](https://www.typescriptlang.org/) 7 | [![Node.js](https://img.shields.io/badge/Node.js-18+-green.svg)](https://nodejs.org/) 8 | 9 | ## Features 10 | 11 | - **Direct Documentation Search**: Query the Vercel AI SDK documentation index directly using similarity search 12 | - **AI-Powered Agent**: Ask natural language questions about the Vercel AI SDK and receive comprehensive answers 13 | - **Session Management**: Maintain conversation context across multiple queries 14 | - **Automated Indexing**: Includes tools to fetch, process, and index the latest Vercel AI SDK documentation 15 | 16 | ## Architecture 17 | 18 | This system consists of several key components: 19 | 20 | 1. **MCP Server**: Exposes tools via the Model Context Protocol for integration with AI assistants 21 | 2. **DocumentFetcher**: Crawls and processes the Vercel AI SDK documentation 22 | 3. **VectorStoreManager**: Creates and manages the FAISS vector index for semantic search 23 | 4. **AgentService**: Provides AI-powered answers to questions using the Google Gemini model 24 | 5. **DirectQueryService**: Offers direct semantic search of the documentation 25 | 26 | ## Setup Instructions 27 | 28 | ### Prerequisites 29 | 30 | - Node.js 18+ 31 | - npm 32 | - A Google API key for Gemini model access 33 | 34 | ### Environment Variables 35 | 36 | Create a `.env` file in the project root with the following variables: 37 | 38 | ``` 39 | GOOGLE_GENERATIVE_AI_API_KEY=your-google-api-key-here 40 | ``` 41 | 42 | You'll need to obtain a Google Gemini API key from the [Google AI Studio](https://makersuite.google.com/app/apikey). 43 | 44 | ### Installation 45 | 46 | 1. Clone the repository 47 | ``` 48 | git clone https://github.com/IvanAmador/vercel-ai-docs-mcp.git 49 | cd vercel-ai-docs-mcp-agent 50 | ``` 51 | 52 | 2. Install dependencies 53 | ``` 54 | npm install 55 | ``` 56 | 57 | 3. Build the project 58 | ``` 59 | npm run build 60 | ``` 61 | 62 | 4. Build the documentation index 63 | ``` 64 | npm run build:index 65 | ``` 66 | 67 | 5. Start the MCP server 68 | ``` 69 | npm run start 70 | ``` 71 | 72 | ## Integration with Claude Desktop 73 | 74 | [Claude Desktop](https://www.anthropic.com/claude/download) is a powerful AI assistant that supports MCP servers. To connect the Vercel AI SDK Documentation MCP agent with Claude Desktop: 75 | 76 | 1. First, install [Claude Desktop](https://www.anthropic.com/claude/download) if you don't have it already. 77 | 78 | 2. Open Claude Desktop settings (via the application menu, not within the chat interface). 79 | 80 | 3. Navigate to the "Developer" tab and click "Edit Config". 81 | 82 | 4. Add the Vercel AI Docs MCP server to your configuration: 83 | 84 | ```json 85 | { 86 | "mcpServers": { 87 | "vercel-ai-docs": { 88 | "command": "node", 89 | "args": ["ABSOLUTE_PATH_TO_PROJECT/dist/main.js"], 90 | "env": { 91 | "GOOGLE_GENERATIVE_AI_API_KEY": "your-google-api-key-here" 92 | } 93 | } 94 | } 95 | } 96 | ``` 97 | 98 | Make sure to replace: 99 | - `ABSOLUTE_PATH_TO_PROJECT` with the actual path to your project folder 100 | - `your-google-api-key-here` with your Google Gemini API key 101 | 102 | 5. Save the config file and restart Claude Desktop. 103 | 104 | 6. To verify the server is connected, look for the hammer 🔨 icon in the Claude chat interface. 105 | 106 | For more detailed information about setting up MCP servers with Claude Desktop, visit the [MCP Quickstart Guide](https://modelcontextprotocol.io/quickstart/user). 107 | 108 | ## Integration with Other MCP Clients 109 | 110 | This MCP server is compatible with any client that implements the Model Context Protocol. Here are a few examples: 111 | 112 | ### Cursor 113 | 114 | [Cursor](https://cursor.sh/) is an AI-powered code editor that supports MCP servers. To integrate with Cursor: 115 | 116 | 1. Add a `.cursor/mcp.json` file to your project directory (for project-specific configuration) or a `~/.cursor/mcp.json` file in your home directory (for global configuration). 117 | 118 | 2. Add the following to your configuration file: 119 | 120 | ```json 121 | { 122 | "mcpServers": { 123 | "vercel-ai-docs": { 124 | "command": "node", 125 | "args": ["ABSOLUTE_PATH_TO_PROJECT/dist/main.js"], 126 | "env": { 127 | "GOOGLE_GENERATIVE_AI_API_KEY": "your-google-api-key-here" 128 | } 129 | } 130 | } 131 | } 132 | ``` 133 | 134 | For more information about using MCP with Cursor, refer to the [Cursor MCP documentation](https://modelcontextprotocol.io/example-clients/). 135 | 136 | ## Usage 137 | 138 | The MCP server exposes three primary tools: 139 | 140 | ### 1. agent-query 141 | 142 | Query the Vercel AI SDK documentation using an AI agent that can search and synthesize information. 143 | 144 | ```json 145 | { 146 | "name": "agent-query", 147 | "arguments": { 148 | "query": "How do I use the streamText function?", 149 | "sessionId": "unique-session-id" 150 | } 151 | } 152 | ``` 153 | 154 | ### 2. direct-query 155 | 156 | Perform a direct similarity search against the Vercel AI SDK documentation index. 157 | 158 | ```json 159 | { 160 | "name": "direct-query", 161 | "arguments": { 162 | "query": "streamText usage", 163 | "limit": 5 164 | } 165 | } 166 | ``` 167 | 168 | ### 3. clear-memory 169 | 170 | Clears the conversation memory for a specific session or all sessions. 171 | 172 | ```json 173 | { 174 | "name": "clear-memory", 175 | "arguments": { 176 | "sessionId": "unique-session-id" 177 | } 178 | } 179 | ``` 180 | 181 | To clear all sessions, omit the sessionId parameter. 182 | 183 | ## Development 184 | 185 | ### Project Structure 186 | 187 | ``` 188 | ├── config/ # Configuration settings 189 | ├── core/ # Core functionality 190 | │ ├── indexing/ # Document indexing and vector store 191 | │ └── query/ # Query services (agent and direct) 192 | ├── files/ # Storage directories 193 | │ ├── docs/ # Processed documentation 194 | │ ├── faiss_index/ # Vector index files 195 | │ └── sessions/ # Session data 196 | ├── mcp/ # MCP server and tools 197 | │ ├── server.ts # MCP server implementation 198 | │ └── tools/ # MCP tool definitions 199 | ├── scripts/ # Build and utility scripts 200 | └── utils/ # Helper utilities 201 | ``` 202 | 203 | ### Build Scripts 204 | 205 | - `npm run build`: Compile TypeScript files 206 | - `npm run build:index`: Build the documentation index 207 | - `npm run dev:index`: Build and index in development mode 208 | - `npm run dev`: Build and start in development mode 209 | 210 | ## Troubleshooting 211 | 212 | ### Common Issues 213 | 214 | 1. **Index not found or failed to load** 215 | 216 | Run `npm run build:index` to create the index before starting the server. 217 | 218 | 2. **API rate limits** 219 | 220 | When exceeding Google API rate limits, the agent service may return errors. Implement appropriate backoff strategies. 221 | 222 | 3. **Model connection issues** 223 | 224 | Ensure your Google API key is valid and has access to the specified Gemini model. 225 | 226 | 4. **Claude Desktop not showing MCP server** 227 | 228 | - Check your configuration file for syntax errors. 229 | - Make sure the path to the server is correct and absolute. 230 | - Check Claude Desktop logs for errors. 231 | - Restart Claude Desktop after making configuration changes. 232 | 233 | ## Contributing 234 | 235 | Contributions are welcome! Please feel free to submit a Pull Request. 236 | 237 | ## License 238 | 239 | MIT 240 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "vercel-ai-docs-mcp", 3 | "version": "0.0.1", 4 | "description": "MCP Server to query Vercel AI SDK documentation", 5 | "main": "dist/main.js", 6 | "type": "module", 7 | "scripts": { 8 | "build": "tsc", 9 | "build:index": "node dist/scripts/buildIndex.js", 10 | "start": "node dist/main.js", 11 | "dev:index": "npm run build && npm run build:index", 12 | "dev": "npm run build && npm start" 13 | }, 14 | "keywords": [ 15 | "mcp", 16 | "ai", 17 | "vercel", 18 | "langchain", 19 | "faiss", 20 | "agent" 21 | ], 22 | "author": "Ivan Amador", 23 | "license": "MIT", 24 | "dependencies": { 25 | "@ai-sdk/google": "^1.2.5", 26 | "@langchain/community": "^0.3.38", 27 | "@langchain/core": "^0.3.43", 28 | "@xenova/transformers": "^2.17.2", 29 | "@modelcontextprotocol/sdk": "^1.8.0", 30 | "ai": "^4.2.10", 31 | "axios": "^1.8.4", 32 | "cheerio": "^1.0.0", 33 | "dotenv": "^16.4.7", 34 | "faiss-node": "^0.5.1", 35 | "zod": "^3.24.2" 36 | }, 37 | "devDependencies": { 38 | "@types/node": "^20.14.10", 39 | "typescript": "^5.5.3" 40 | } 41 | } -------------------------------------------------------------------------------- /src/config/index.ts: -------------------------------------------------------------------------------- 1 | import path from 'path'; 2 | import { fileURLToPath } from 'url'; 3 | import dotenv from 'dotenv'; 4 | 5 | dotenv.config(); 6 | 7 | // Helper to get __dirname in ES modules 8 | const __filename = fileURLToPath(import.meta.url); 9 | const __dirname = path.dirname(__filename); 10 | 11 | // Resolve project root assuming config is in src/config 12 | const projectRoot = path.resolve(__dirname, '..', '..'); 13 | 14 | interface AppConfig { 15 | sitemapUrl: string; 16 | embeddingModelName: string; 17 | agentModelName: string; 18 | baseDir: string; 19 | docsDir: string; 20 | indexDir: string; 21 | sessionsDir: string; 22 | agentMaxSteps: number; 23 | directQueryLimitDefault: number; 24 | agentQueryLimitDefault: number; 25 | vectorDimensions: number; 26 | } 27 | 28 | export const config: AppConfig = { 29 | sitemapUrl: 'https://sdk.vercel.ai/sitemap.xml', 30 | embeddingModelName: "Xenova/all-MiniLM-L12-v2", 31 | agentModelName: 'gemini-2.0-flash', 32 | baseDir: path.join(projectRoot, 'files'), 33 | docsDir: path.join(projectRoot, 'files', 'docs'), 34 | indexDir: path.join(projectRoot, 'files', 'faiss_index'), 35 | sessionsDir: path.join(projectRoot, 'files', 'sessions'), 36 | agentMaxSteps: 8, 37 | directQueryLimitDefault: 5, 38 | agentQueryLimitDefault: 5, // Corresponds to k in similaritySearch within AgentService 39 | vectorDimensions: 384 // Matches HuggingFaceTransformersEmbeddings default for MiniLM 40 | }; -------------------------------------------------------------------------------- /src/core/indexing/DocumentFetcher.ts: -------------------------------------------------------------------------------- 1 | import axios from 'axios'; 2 | import * as cheerio from 'cheerio'; 3 | import * as path from 'path'; 4 | import { config } from '../../config/index.js'; 5 | import { FileUtils } from '../../utils/fileUtils.js'; 6 | import { formatDuration, generateHash, urlToFilename } from '../../utils/helpers.js'; 7 | import { 8 | CacheData, 9 | FetcherStats, 10 | FileProcessResult, 11 | PageContent, 12 | SitemapUrl 13 | } from './types.js'; // Added .js extension back 14 | 15 | const axiosInstance = axios.create({ 16 | timeout: 30000, 17 | headers: { 18 | 'User-Agent': 'VercelAIDocsIndexer/1.0', 19 | 'Accept': 'text/html,application/xhtml+xml,application/xml' 20 | }, 21 | maxRedirects: 5, 22 | responseType: 'text', 23 | validateStatus: (status) => status >= 200 && status < 400, 24 | }); 25 | 26 | export class DocumentFetcher { 27 | private readonly outputDir: string = config.docsDir; 28 | private readonly sitemapUrl: string = config.sitemapUrl; 29 | private readonly cacheFile: string = path.join(this.outputDir, 'lastmod_cache.json'); 30 | private stats: FetcherStats = { 31 | totalUrls: 0, 32 | processedUrls: 0, 33 | modifiedUrls: 0, 34 | errors: 0, 35 | totalBytes: 0, 36 | }; 37 | private startTime: number = 0; 38 | 39 | public async fetchAndProcessDocuments(forceUpdate: boolean = false): Promise { 40 | this.startTime = Date.now(); 41 | console.error(`Starting document fetch process from ${this.sitemapUrl}`); 42 | await FileUtils.ensureDirectoryExists(this.outputDir); 43 | 44 | const cache = await this.loadLastModCache(); 45 | console.error(`Last successful run recorded: ${cache.lastRun || 'Never'}`); 46 | 47 | const sitemapUrls = await this.extractSitemapUrls(); 48 | this.stats.totalUrls = sitemapUrls.length; 49 | console.error(`Found ${this.stats.totalUrls} URLs in sitemap.`); 50 | 51 | const urlsToProcess = forceUpdate ? sitemapUrls : this.filterUrlsToUpdate(sitemapUrls, cache); 52 | console.error(`Processing ${urlsToProcess.length} URLs (${forceUpdate ? 'forced update' : 'based on cache'}).`); 53 | 54 | if (urlsToProcess.length === 0 && !forceUpdate) { 55 | console.error("No documents require updating."); 56 | this.printStats(); 57 | return []; 58 | } 59 | 60 | const results: FileProcessResult[] = []; 61 | const batchSize = 10; 62 | for (let i = 0; i < urlsToProcess.length; i += batchSize) { 63 | const batch = urlsToProcess.slice(i, i + batchSize); 64 | const batchResults = await Promise.allSettled( 65 | batch.map(urlObj => this.processUrl(urlObj)) 66 | ); 67 | 68 | batchResults.forEach((result, index) => { 69 | if (result.status === 'fulfilled' && result.value) { 70 | results.push(result.value); 71 | this.stats.processedUrls++; 72 | if (result.value.isNew || result.value.modified) { 73 | this.stats.modifiedUrls++; 74 | } 75 | this.stats.totalBytes += result.value.contentLength; 76 | } else if (result.status === 'rejected') { 77 | this.stats.errors++; 78 | const reason = result.reason ?? 'Unknown error'; 79 | console.error(`Error processing URL ${batch[index]?.loc || 'unknown'}:`, reason); 80 | } 81 | }); 82 | this.printProgress(); 83 | } 84 | 85 | const successfulUrls = results.map(r => r.url); 86 | for (const urlObj of urlsToProcess) { 87 | if (successfulUrls.includes(urlObj.loc)) { 88 | cache.urls[urlObj.loc] = urlObj.lastmod; 89 | } 90 | } 91 | await this.saveLastModCache(cache); 92 | await this.updateSummary(results); 93 | 94 | console.error(`\nDocument fetch process completed.`); 95 | this.printStats(); 96 | return results; 97 | } 98 | 99 | private async processUrl(urlObj: SitemapUrl): Promise { 100 | const { loc: url, lastmod } = urlObj; 101 | const filename = urlToFilename(url); 102 | const filePath = path.join(this.outputDir, filename); 103 | 104 | try { 105 | const pageContent = await this.extractPageContent(url, lastmod); 106 | if (!pageContent) return null; 107 | 108 | const contentStr = JSON.stringify(pageContent, null, 2); 109 | const currentHash = generateHash(contentStr); 110 | let modified = false; 111 | let isNew = false; 112 | 113 | if (await FileUtils.fileExists(filePath)) { 114 | const existingContent = await FileUtils.readFile(filePath); 115 | const existingHash = generateHash(existingContent); 116 | modified = currentHash !== existingHash; 117 | } else { 118 | isNew = true; 119 | } 120 | 121 | if (isNew || modified) { 122 | await FileUtils.writeFile(filePath, contentStr); 123 | console.error(`Saved ${isNew ? 'new' : 'modified'} document: ${filename} (URL: ${url.substring(0, 60)}...)`); 124 | } 125 | 126 | return { 127 | url, 128 | filePath, 129 | hash: currentHash, 130 | modified, 131 | isNew, 132 | contentLength: contentStr.length 133 | }; 134 | 135 | } catch (error) { 136 | console.error(`Failed to process URL ${url}:`, error instanceof Error ? error.message : error); 137 | this.stats.errors++; 138 | return null; 139 | } 140 | } 141 | 142 | private async extractPageContent(url: string, lastmod: string): Promise { 143 | try { 144 | const response = await axiosInstance.get(url); 145 | const html = response.data; 146 | const $ = cheerio.load(html); 147 | 148 | const title = $('head title').text().trim() || $('h1').first().text().trim() || url; 149 | const description = $('meta[name="description"]').attr('content') || 150 | $('meta[property="og:description"]').attr('content') || ''; 151 | 152 | $('script, style, nav, header, footer, aside, .sidebar, .toc, .menu, .navigation, .ads, .advertisement, noscript').remove(); 153 | 154 | let contentElement = $('main').first(); 155 | if (!contentElement.length) contentElement = $('article').first(); 156 | if (!contentElement.length) contentElement = $('.content').first(); 157 | if (!contentElement.length) contentElement = $('#content').first(); 158 | if (!contentElement.length) contentElement = $('body'); 159 | 160 | let content = contentElement.text(); 161 | content = content.replace(/\s\s+/g, ' ').replace(/\n+/g, '\n').trim(); 162 | 163 | if (content.length < 50) { 164 | console.warn(`Extracted minimal content (${content.length} chars) from ${url}. Skipping.`); 165 | return null; 166 | } 167 | 168 | return { url, lastmod, title, description, content }; 169 | 170 | } catch (error: any) { 171 | console.error(`Error extracting content from ${url}: ${error.message}`); 172 | this.stats.errors++; 173 | return null; 174 | } 175 | } 176 | 177 | private async extractSitemapUrls(): Promise { 178 | try { 179 | console.error(`Downloading sitemap from ${this.sitemapUrl}`); 180 | const response = await axiosInstance.get(this.sitemapUrl); 181 | const xmlData = response.data; 182 | const $ = cheerio.load(xmlData, { xmlMode: true }); 183 | const urls: SitemapUrl[] = []; 184 | 185 | $('url').each((_, element) => { 186 | const loc = $(element).find('loc').text().trim(); 187 | const lastmod = $(element).find('lastmod').text().trim() || new Date(0).toISOString(); 188 | 189 | if (loc && !loc.includes('/playground')) { 190 | try { 191 | new URL(loc); 192 | urls.push({ loc, lastmod }); 193 | } catch (e) { 194 | console.warn(`Skipping invalid URL in sitemap: ${loc}`); 195 | } 196 | } 197 | }); 198 | return urls; 199 | } catch (error) { 200 | console.error(`Failed to extract sitemap URLs:`, error); 201 | throw new Error(`Could not fetch or parse sitemap from ${this.sitemapUrl}`); 202 | } 203 | } 204 | 205 | private filterUrlsToUpdate(sitemapUrls: SitemapUrl[], cache: CacheData): SitemapUrl[] { 206 | return sitemapUrls.filter(({ loc, lastmod }) => { 207 | const cachedLastmod = cache.urls[loc]; 208 | return !cachedLastmod || new Date(lastmod) > new Date(cachedLastmod); 209 | }); 210 | } 211 | 212 | private async loadLastModCache(): Promise { 213 | try { 214 | if (await FileUtils.fileExists(this.cacheFile)) { 215 | const cacheContent = await FileUtils.readFile(this.cacheFile); 216 | const parsed = JSON.parse(cacheContent); 217 | if (parsed && typeof parsed === 'object' && typeof parsed.urls === 'object') { 218 | return parsed as CacheData; 219 | } 220 | console.warn(`Cache file ${this.cacheFile} has invalid format. Starting fresh.`); 221 | } 222 | } catch (error) { 223 | console.error(`Error reading cache file ${this.cacheFile}, creating a new one:`, error); 224 | } 225 | return { lastRun: '', urls: {} }; 226 | } 227 | 228 | private async saveLastModCache(cache: CacheData): Promise { 229 | try { 230 | cache.lastRun = new Date().toISOString(); 231 | await FileUtils.writeFile(this.cacheFile, JSON.stringify(cache, null, 2)); 232 | console.error(`Cache saved to: ${this.cacheFile}`); 233 | } catch (error) { 234 | console.error(`Error saving cache to ${this.cacheFile}:`, error); 235 | } 236 | } 237 | 238 | private async updateSummary(results: FileProcessResult[]): Promise { 239 | const summaryPath = path.join(this.outputDir, 'summary.json'); 240 | let summary: { url: string; title: string; filename: string }[] = []; 241 | try { 242 | if (await FileUtils.fileExists(summaryPath)) { 243 | const existingContent = await FileUtils.readFile(summaryPath); 244 | summary = JSON.parse(existingContent); 245 | } 246 | } catch (error) { 247 | console.warn(`Could not load existing summary file: ${summaryPath}`, error); 248 | } 249 | 250 | const summaryMap = new Map(summary.map(item => [item.url, item])); 251 | let updatedCount = 0; 252 | 253 | for (const result of results) { 254 | if (result && (result.isNew || result.modified)) { 255 | try { 256 | const content = await FileUtils.readFile(result.filePath); 257 | const pageData = JSON.parse(content) as PageContent; 258 | summaryMap.set(result.url, { 259 | url: result.url, 260 | title: pageData.title || 'No Title', 261 | filename: path.basename(result.filePath) 262 | }); 263 | updatedCount++; 264 | } catch (error) { 265 | console.error(`Error reading/parsing updated file for summary: ${result.filePath}`, error); 266 | } 267 | } 268 | } 269 | 270 | if (updatedCount > 0) { 271 | try { 272 | await FileUtils.writeFile(summaryPath, JSON.stringify(Array.from(summaryMap.values()), null, 2)); 273 | console.error(`Summary file updated with ${updatedCount} changes at: ${summaryPath}`); 274 | } catch (error) { 275 | console.error(`Error writing summary file: ${summaryPath}`, error); 276 | } 277 | } else { 278 | console.error(`Summary file remains unchanged.`); 279 | } 280 | } 281 | 282 | private printProgress(): void { 283 | const processed = this.stats.processedUrls; 284 | const total = this.stats.totalUrls; 285 | const percentage = total > 0 ? ((processed / total) * 100).toFixed(1) : '0.0'; 286 | const elapsedSecs = (Date.now() - this.startTime) / 1000; 287 | const rate = elapsedSecs > 0 ? (processed / elapsedSecs).toFixed(2) : '0.00'; 288 | process.stderr.write( 289 | `\rProgress: ${processed}/${total} URLs (${percentage}%) | Modified: ${this.stats.modifiedUrls} | Errors: ${this.stats.errors} | Rate: ${rate} URLs/sec` 290 | ); 291 | } 292 | 293 | private printStats(): void { 294 | const elapsedSecs = (Date.now() - this.startTime) / 1000; 295 | const mbProcessed = (this.stats.totalBytes / (1024 * 1024)).toFixed(2); 296 | console.error(`\n--- Fetcher Stats ---`); 297 | console.error(`Total URLs in Sitemap: ${this.stats.totalUrls}`); 298 | console.error(`URLs Processed: ${this.stats.processedUrls}`); 299 | console.error(`URLs Modified/New: ${this.stats.modifiedUrls}`); 300 | console.error(`Errors Encountered: ${this.stats.errors}`); 301 | console.error(`Total Data Processed: ${mbProcessed} MB`); 302 | console.error(`Total Time: ${formatDuration(elapsedSecs)}`); 303 | console.error(`---------------------`); 304 | } 305 | } -------------------------------------------------------------------------------- /src/core/indexing/VectorStoreManager.ts: -------------------------------------------------------------------------------- 1 | import * as path from 'path'; 2 | import { Document } from '@langchain/core/documents'; 3 | import { FaissStore } from '@langchain/community/vectorstores/faiss'; 4 | import { HuggingFaceTransformersEmbeddings } from '@langchain/community/embeddings/hf_transformers'; 5 | import { FileUtils } from '../../utils/fileUtils.js'; 6 | import { config } from '../../config/index.js'; 7 | import { PageContent } from './types.js'; 8 | 9 | export class VectorStoreManager { 10 | private embeddings: HuggingFaceTransformersEmbeddings; 11 | private vectorStore: FaissStore | null = null; 12 | private readonly indexDirectory: string = config.indexDir; 13 | 14 | constructor() { 15 | this.embeddings = new HuggingFaceTransformersEmbeddings({ 16 | modelName: config.embeddingModelName, 17 | }); 18 | } 19 | 20 | public async loadJsonDocuments(directory: string): Promise { 21 | // Keep this as error as it's part of the indexing script run, not server runtime 22 | console.error(`Loading JSON documents from: ${directory}`); 23 | const jsonFiles = await FileUtils.listFiles(directory, '.json'); 24 | const documents: Document[] = []; 25 | const filesToSkip = ['summary.json', 'lastmod_cache.json', 'sitemap-index.json']; 26 | 27 | for (const file of jsonFiles) { 28 | if (filesToSkip.includes(file)) { 29 | continue; 30 | } 31 | const filePath = path.join(directory, file); 32 | try { 33 | const fileContent = await FileUtils.readFile(filePath); 34 | const pageData = JSON.parse(fileContent) as PageContent; 35 | const contentParts = [ 36 | pageData.title || '', 37 | pageData.description || '', 38 | pageData.content || '' 39 | ]; 40 | const pageContent = contentParts.filter(part => part?.trim()).join('\n\n').trim(); 41 | 42 | if (pageContent) { 43 | documents.push( 44 | new Document({ 45 | pageContent: pageContent, 46 | metadata: { 47 | source: path.basename(filePath), 48 | url: pageData.url || 'URL not available', 49 | title: pageData.title || 'No Title', 50 | lastmod: pageData.lastmod || 'Unknown', 51 | }, 52 | }) 53 | ); 54 | } else { 55 | // Use warn for potentially problematic but non-fatal issues during indexing 56 | console.warn(`Skipping document ${file} due to empty content after processing.`); 57 | } 58 | } catch (error) { 59 | console.error(`Error processing JSON file ${filePath}:`, error); 60 | } 61 | } 62 | // Keep this as error as it's part of the indexing script run 63 | console.error(`Successfully loaded ${documents.length} documents for indexing.`); 64 | return documents; 65 | } 66 | 67 | public async createIndex(documents: Document[]): Promise { 68 | if (documents.length === 0) { 69 | // Keep this as error as it's part of the indexing script run 70 | console.error("No documents provided to create index. Skipping index creation."); 71 | return; 72 | } 73 | // Keep this as error as it's part of the indexing script run 74 | console.error(`Creating FAISS index for ${documents.length} documents...`); 75 | try { 76 | await FileUtils.ensureDirectoryExists(this.indexDirectory); 77 | await FaissStore.importFaiss(); 78 | 79 | this.vectorStore = await FaissStore.fromDocuments( 80 | documents, 81 | this.embeddings 82 | ); 83 | await this.vectorStore.save(this.indexDirectory); 84 | // Keep this as error as it's part of the indexing script run 85 | console.error(`FAISS index successfully created and saved to: ${this.indexDirectory}`); 86 | } catch (error) { 87 | console.error('Error creating or saving FAISS index:', error); 88 | throw error; 89 | } 90 | } 91 | 92 | public async loadIndex(): Promise { 93 | // Change to console.error for server runtime logging 94 | console.error(`Attempting to load FAISS index from: ${this.indexDirectory}`); 95 | try { 96 | if (!(await FileUtils.directoryExists(this.indexDirectory))) { 97 | console.error(`Index directory not found: ${this.indexDirectory}`); 98 | return false; 99 | } 100 | const indexFile = path.join(this.indexDirectory, 'faiss.index'); 101 | const docstoreFile = path.join(this.indexDirectory, 'docstore.json'); 102 | if (!(await FileUtils.fileExists(indexFile)) || !(await FileUtils.fileExists(docstoreFile))) { 103 | console.error(`Essential index files (faiss.index, docstore.json) missing in ${this.indexDirectory}. Cannot load index.`); 104 | return false; 105 | } 106 | 107 | await FaissStore.importFaiss(); 108 | this.vectorStore = await FaissStore.load( 109 | this.indexDirectory, 110 | this.embeddings 111 | ); 112 | // Change to console.error for server runtime logging 113 | console.error('FAISS index loaded successfully.'); 114 | return true; 115 | } catch (error) { 116 | console.error(`Error loading FAISS index from ${this.indexDirectory}:`, error); 117 | this.vectorStore = null; 118 | return false; 119 | } 120 | } 121 | 122 | public async search(query: string, k: number = config.directQueryLimitDefault): Promise { 123 | if (!this.vectorStore) { 124 | throw new Error('FAISS index is not loaded. Cannot perform search.'); 125 | } 126 | // Change to console.error for server runtime logging 127 | console.error(`Performing similarity search for: "${query}" (k=${k})`); 128 | try { 129 | const results = await this.vectorStore.similaritySearch(query, k); 130 | // Change to console.error for server runtime logging 131 | console.error(`Found ${results.length} results for query.`); 132 | return results; 133 | } catch (error) { 134 | console.error(`Error during similarity search for "${query}":`, error); 135 | throw error; 136 | } 137 | } 138 | 139 | public isIndexLoaded(): boolean { 140 | return this.vectorStore !== null; 141 | } 142 | } -------------------------------------------------------------------------------- /src/core/indexing/types.ts: -------------------------------------------------------------------------------- 1 | // Types specific to the document indexing process 2 | 3 | export interface SitemapUrl { 4 | loc: string; 5 | lastmod: string; 6 | } 7 | 8 | export interface PageContent { 9 | url: string; 10 | lastmod: string; 11 | title: string; 12 | description: string; 13 | content: string; 14 | } 15 | 16 | export interface FileProcessResult { 17 | url: string; 18 | filePath: string; 19 | hash: string; 20 | modified: boolean; 21 | isNew: boolean; 22 | contentLength: number; 23 | } 24 | 25 | export interface FetcherStats { 26 | totalUrls: number; 27 | processedUrls: number; 28 | modifiedUrls: number; 29 | errors: number; 30 | totalBytes: number; 31 | } 32 | 33 | export interface CacheData { 34 | lastRun: string; 35 | urls: Record; // Map 36 | } -------------------------------------------------------------------------------- /src/core/query/AgentService.ts: -------------------------------------------------------------------------------- 1 | import { google } from '@ai-sdk/google'; 2 | import { CoreMessage, LanguageModel, generateText, tool } from 'ai'; 3 | import { z } from 'zod'; 4 | import path from 'path'; 5 | import { config } from '../../config/index.js'; 6 | import { VectorStoreManager } from '../indexing/VectorStoreManager.js'; 7 | import * as sessionStore from './SessionStore.js'; 8 | import { AgentResponse } from './types.js'; 9 | import { Document } from '@langchain/core/documents'; 10 | 11 | export class AgentService { 12 | private vectorStoreManager: VectorStoreManager; 13 | 14 | constructor(vectorStoreManager: VectorStoreManager) { 15 | if (!vectorStoreManager.isIndexLoaded()) { 16 | throw new Error("AgentService requires a loaded VectorStoreManager index."); 17 | } 18 | this.vectorStoreManager = vectorStoreManager; 19 | } 20 | 21 | private createDocumentSearchTool() { 22 | return tool({ 23 | description: 'Searches information in the Vercel AI SDK documentation. Use this tool exclusively to answer questions about Vercel AI SDK features, functions, usage, concepts, or code examples. Use specific technical keywords for precise results.', 24 | parameters: z.object({ 25 | query: z.string().describe('The specific question or topic to search for in the Vercel AI SDK documentation (e.g., "streamText usage", "useChat hook options", "handling tool errors").') 26 | }), 27 | execute: async ({ query }) => { 28 | console.error(`[Agent Internal Tool] Executing document search: "${query}"`); 29 | try { 30 | const results = await this.vectorStoreManager.search(query, config.agentQueryLimitDefault); 31 | if (results.length === 0) { 32 | return { info: `No relevant documents found for query: ${query}`, query }; 33 | } 34 | const formattedResults = results.map((doc: Document, i: number) => ({ 35 | index: i + 1, 36 | source: doc.metadata.source ? path.basename(doc.metadata.source) : 'unknown', 37 | content: doc.pageContent.substring(0, 1500), 38 | title: doc.metadata.title || 'No title', 39 | url: doc.metadata.url || 'URL not available', 40 | })); 41 | return { 42 | results: formattedResults, 43 | query: query, 44 | timestamp: new Date().toISOString() 45 | }; 46 | } catch (error) { 47 | console.error(`[Agent Internal Tool] Error during document search for "${query}":`, error); 48 | return { error: `Failed to search documents for query: ${query}. Inform the user you couldn't perform the search.`, query }; 49 | } 50 | } 51 | }); 52 | } 53 | 54 | public async generateAgentResponse( 55 | query: string, 56 | sessionId: string 57 | ): Promise { 58 | console.error(`Generating agent response for session ${sessionId}, query: "${query}"`); 59 | const loadedMessages = await sessionStore.loadSessionMessages(sessionId); 60 | const currentMessages: CoreMessage[] = [ 61 | ...loadedMessages, 62 | { role: 'user', content: query } 63 | ]; 64 | 65 | const tools = { 66 | documentSearch: this.createDocumentSearchTool() 67 | }; 68 | 69 | try { 70 | // Sem anotações de tipo explícitas aqui 71 | const geminiResponse = await generateText({ 72 | model: google(config.agentModelName) as LanguageModel, 73 | system: `You are a specialized Vercel AI SDK consultant, possessing deep expertise in its architecture, functions, and practical application across various frameworks and server environments. Your primary function is to provide accurate, comprehensive, and code-supported answers to user queries regarding the Vercel AI SDK. 74 | 75 | **Core Mandate & Workflow:** 76 | 77 | 1. **Analyze Query:** Deconstruct the user's request to identify the core concepts, functions, patterns, or issues being asked about. 78 | 2. **Memory Check:** Briefly consult your short-term session memory. If you have *already* searched and synthesized a complete, accurate, and relevant answer for this *exact* query within the current session, you may use that information. 79 | 3. **Mandatory Documentation Search (Default):** If the memory check fails (no prior relevant search in this session, or the query differs), you **MUST** use the \`documentSearchTool\`. This is your primary method for gathering information. 80 | * **Strategic Querying:** Plan your search. Do not rely on a single generic query. Instead: 81 | * Identify specific SDK function/hook names (\`streamText\`, \`useChat\`, \`generateObject\`, etc.). 82 | * Incorporate provider names if relevant (\`openai\`, \`anthropic\`, \`google\`). 83 | * Use technical keywords related to implementation patterns (\`tool streaming\`, \`multi-step calls\`, \`Generative UI\`, \`message persistence\`, \`error handling\`). 84 | * Include framework context (\`React\`, \`Svelte\`, \`Next.js\`, \`Express\`). 85 | * Target specific error types or limitations if mentioned. 86 | * **Iterative Search:** Consider executing *multiple*, targeted queries from different angles (e.g., one for the core function, another for a related option or error) to ensure comprehensive coverage, especially for complex questions. Aim for precision in each query. 87 | 4. **Synthesize Information:** Critically evaluate the search results returned by the \`documentSearchTool\`. Synthesize the information from the *most relevant* document snippets. **Do not introduce information not present in the search results or your explicit SDK Architecture Knowledge.** If the search results are insufficient or conflicting, state that clearly rather than guessing. 88 | 5. **Construct Response:** Generate your answer *strictly* based on the synthesized information (from search or verified memory). Adhere precisely to the **Detailed Response Structure** outlined below. 89 | 90 | **SDK Architecture Knowledge (Internal Context):** 91 | 92 | * Core modules: AI SDK Core (\`ai\`), AI SDK UI (\`@ai-sdk/react\`, etc.), AI SDK RSC (\`ai/rsc\`), provider-specific packages (\`@ai-sdk/openai\`, etc.). 93 | * Function categories: Text generation/streaming (\`generateText\`, \`streamText\`), Structured data (\`generateObject\`, \`streamObject\`), Tools/Agents (\`tool\`, \`maxSteps\`), Embeddings (\`embed\`, \`embedMany\`), Image Generation (\`generateImage\`). 94 | * UI Hooks: \`useChat\`, \`useCompletion\`, \`useObject\`, \`useAssistant\` (primarily React, check docs for others). 95 | * Server implementations: Next.js (App/Pages), Express, Fastify, Hono, Nest.js, Node.js HTTP. 96 | 97 | **Detailed Response Structure:** 98 | 99 | 1. **Concise Explanation:** Start by briefly explaining the relevant SDK concepts, functions, or hooks involved in the user's query, based *directly* on the search results. 100 | 2. **Code Examples:** Provide complete, runnable code examples (both server-side and client-side, if applicable) extracted or adapted *directly* from the search results. Clearly label filenames (e.g., \`app/api/chat/route.ts\`, \`app/page.tsx\`). 101 | 3. **Configuration & Options:** Highlight key configuration options or parameters relevant to the query, explaining their purpose as described in the documentation snippets. 102 | 4. **Error Handling / Limitations:** If relevant *and found in the search results*, include specific error handling patterns or mention known limitations/workarounds. 103 | 5. **Documentation Reference:** Cite the source documents used. Include the \`title\` and \`url\` (or filename if URL is unavailable) from the search result metadata. Example: "*(Source: Generating Text - /docs/ai-sdk-core/generating-text)*" or "*(Source: stream-text.ts)*". 104 | 6. **Provider Specifics:** If applicable and documented, explain provider-specific behaviors, optimizations (e.g., caching, reasoning), or requirements related to the query. 105 | 106 | **Special Topic Expertise (Utilize during Query Analysis & Search Planning):** 107 | 108 | * Tool implementation patterns (server-side, client-side, hybrid, user interaction). 109 | * UI streaming techniques, \`StreamData\`, message annotations, UI throttling. 110 | * Multi-modal capabilities (image/document/audio inputs/outputs). 111 | * Model capabilities and provider differences (tool support, object generation modes). 112 | * Provider features: Anthropic Cache Control, Google Search Grounding, OpenAI Reasoning. 113 | * Edge runtime considerations. 114 | * TypeScript usage and type safety within the SDK. 115 | 116 | **Constraint:** Your primary information source **MUST** be the \`documentSearchTool\` unless recalling information from an identical query *within the same session*. Do not invent features or behaviors not supported by the retrieved documentation. If the documentation is insufficient for a complete answer, explicitly state this limitation.`, 117 | messages: currentMessages, 118 | tools, 119 | maxSteps: config.agentMaxSteps, 120 | }); 121 | 122 | const updatedMessages: CoreMessage[] = [ 123 | ...currentMessages, 124 | ...geminiResponse.response.messages, 125 | ]; 126 | await sessionStore.saveSessionMessages(sessionId, updatedMessages); 127 | 128 | const steps = geminiResponse.steps || []; 129 | const finalAnswer = geminiResponse.text || "I couldn't generate a response."; 130 | 131 | // Abordagem mais segura com verificações de tipo 132 | const formattedResponse: AgentResponse = { 133 | answer: finalAnswer, 134 | rawTextResponse: geminiResponse.text, 135 | toolCalls: steps.flatMap(step => { 136 | // Verifica se toolCalls existe e é um array 137 | if (!step.toolCalls || !Array.isArray(step.toolCalls)) { 138 | return []; 139 | } 140 | 141 | return step.toolCalls.map(call => ({ 142 | tool: call.toolName, 143 | query: (call.args && typeof call.args === 'object' && 'query' in call.args) 144 | ? String(call.args.query || '') 145 | : 'N/A', 146 | timestamp: new Date().toISOString() 147 | })); 148 | }), 149 | toolResults: steps.flatMap(step => { 150 | // Verifica se toolResults existe e é um array 151 | if (!step.toolResults || !Array.isArray(step.toolResults)) { 152 | return []; 153 | } 154 | 155 | return step.toolResults.map(result => { 156 | const baseResult = { 157 | tool: result.toolName, 158 | timestamp: new Date().toISOString() 159 | }; 160 | 161 | if (result.toolName === 'documentSearch' && 162 | result.result && 163 | typeof result.result === 'object' && 164 | 'results' in result.result && 165 | Array.isArray(result.result.results)) { 166 | return { 167 | ...baseResult, 168 | documents: result.result.results.map((doc: any) => ({ 169 | title: doc.title || 'No title', 170 | url: doc.url || 'URL not available' 171 | })) 172 | }; 173 | } 174 | return baseResult; 175 | }); 176 | }), 177 | sessionId: sessionId, 178 | messages: updatedMessages, 179 | }; 180 | 181 | console.error(`Agent response generated successfully for session ${sessionId}.`); 182 | return formattedResponse; 183 | 184 | } catch (error) { 185 | console.error(`Error generating agent response for session ${sessionId}:`, error); 186 | if (error instanceof Error && 'url' in error && 'statusCode' in error) { 187 | console.error(`API Call Details: URL=${(error as any).url}, Status=${(error as any).statusCode}`); 188 | } 189 | throw new Error(`Agent processing failed: ${error instanceof Error ? error.message : String(error)}`); 190 | } 191 | } 192 | 193 | public async clearSession(sessionId?: string): Promise { 194 | if (sessionId) { 195 | await sessionStore.deleteSessionMessages(sessionId); 196 | console.error(`Cleared persistent memory for session: ${sessionId}`); 197 | } else { 198 | await sessionStore.deleteAllSessionMessages(); 199 | console.error('Cleared persistent memory for all sessions.'); 200 | } 201 | } 202 | } -------------------------------------------------------------------------------- /src/core/query/DirectQueryService.ts: -------------------------------------------------------------------------------- 1 | import { Document } from '@langchain/core/documents'; 2 | import { config } from '../../config/index.js'; 3 | import { VectorStoreManager } from '../indexing/VectorStoreManager.js'; 4 | import { FormattedSearchResult } from './types.js'; 5 | 6 | export class DirectQueryService { 7 | private vectorStoreManager: VectorStoreManager; 8 | 9 | constructor(vectorStoreManager: VectorStoreManager) { 10 | if (!vectorStoreManager.isIndexLoaded()) { 11 | throw new Error("DirectQueryService requires a loaded VectorStoreManager index."); 12 | } 13 | this.vectorStoreManager = vectorStoreManager; 14 | } 15 | 16 | public async performSearch( 17 | query: string, 18 | limit: number = config.directQueryLimitDefault 19 | ): Promise { 20 | try { 21 | const results: Document[] = await this.vectorStoreManager.search(query, limit); 22 | return this.formatResults(results); 23 | } catch (error) { 24 | console.error(`Direct search failed for query "${query}":`, error); 25 | return []; 26 | } 27 | } 28 | 29 | private formatResults(results: Document[]): FormattedSearchResult[] { 30 | return results.map((doc, index) => ({ 31 | index: index + 1, 32 | source: doc.metadata.source || 'Unknown source', 33 | url: doc.metadata.url || 'URL not available', 34 | title: doc.metadata.title || 'No Title', 35 | content: doc.pageContent, 36 | })); 37 | } 38 | } -------------------------------------------------------------------------------- /src/core/query/SessionStore.ts: -------------------------------------------------------------------------------- 1 | import { CoreMessage } from 'ai'; 2 | import * as fsp from 'fs/promises'; 3 | import * as path from 'path'; 4 | import { config } from '../../config/index.js'; 5 | import { FileUtils } from '../../utils/fileUtils.js'; 6 | 7 | function getSessionFilePath(sessionId: string): string { 8 | const sessionsDir = config.sessionsDir; 9 | const safeSessionId = path.basename(sessionId).replace(/[^a-z0-9_-]/gi, '_'); 10 | if (!safeSessionId) { 11 | throw new Error("Invalid sessionId provided."); 12 | } 13 | return path.join(sessionsDir, `${safeSessionId}.json`); 14 | } 15 | 16 | export async function loadSessionMessages(sessionId: string): Promise { 17 | const filePath = getSessionFilePath(sessionId); 18 | try { 19 | if (!(await FileUtils.fileExists(filePath))) { 20 | return []; 21 | } 22 | const fileContent = await FileUtils.readFile(filePath); 23 | const messages = JSON.parse(fileContent); 24 | if (Array.isArray(messages)) { 25 | return messages as CoreMessage[]; 26 | } else { 27 | console.warn(`Session file ${filePath} content is not an array. Returning empty history.`); 28 | await deleteSessionMessages(sessionId); 29 | return []; 30 | } 31 | } catch (error) { 32 | console.error(`Error loading or parsing session messages for ${sessionId} from ${filePath}:`, error); 33 | return []; 34 | } 35 | } 36 | 37 | export async function saveSessionMessages(sessionId: string, messages: CoreMessage[]): Promise { 38 | const filePath = getSessionFilePath(sessionId); 39 | try { 40 | await FileUtils.ensureDirectoryExists(config.sessionsDir); 41 | const content = JSON.stringify(messages, null, 2); 42 | await FileUtils.writeFile(filePath, content); 43 | } catch (error) { 44 | console.error(`Error saving session messages for ${sessionId} to ${filePath}:`, error); 45 | } 46 | } 47 | 48 | export async function deleteSessionMessages(sessionId: string): Promise { 49 | const filePath = getSessionFilePath(sessionId); 50 | try { 51 | if (await FileUtils.fileExists(filePath)) { 52 | await fsp.unlink(filePath); 53 | // Log deletion to stderr 54 | console.error(`Deleted session file for ${sessionId}: ${filePath}`); 55 | } 56 | } catch (error) { 57 | console.error(`Error deleting session file for ${sessionId} at ${filePath}:`, error); 58 | } 59 | } 60 | 61 | export async function deleteAllSessionMessages(): Promise { 62 | const sessionsDir = config.sessionsDir; 63 | try { 64 | if (!(await FileUtils.directoryExists(sessionsDir))) { 65 | return; 66 | } 67 | const files = await FileUtils.listFiles(sessionsDir, '.json'); 68 | let deletedCount = 0; 69 | for (const file of files) { 70 | const filePath = path.join(sessionsDir, file); 71 | try { 72 | await fsp.unlink(filePath); 73 | deletedCount++; 74 | } catch (unlinkError) { 75 | console.error(`Error deleting session file ${filePath}:`, unlinkError); 76 | } 77 | } 78 | if (deletedCount > 0) { 79 | // Log deletion to stderr 80 | console.error(`Deleted ${deletedCount} session files from ${sessionsDir}.`); 81 | } 82 | } catch (error) { 83 | console.error(`Error deleting all session messages from ${sessionsDir}:`, error); 84 | } 85 | } -------------------------------------------------------------------------------- /src/core/query/types.ts: -------------------------------------------------------------------------------- 1 | import { CoreMessage } from 'ai'; 2 | 3 | export interface FormattedSearchResult { 4 | index: number; 5 | source: string; // filename 6 | url: string; 7 | title: string; 8 | content: string; 9 | } 10 | 11 | export interface AgentResponse { 12 | answer: string; 13 | toolCalls: { tool: string; query: string; timestamp: string }[]; 14 | toolResults: { tool: string; documents?: { title: string; url: string }[]; timestamp: string }[]; 15 | sessionId?: string; // Include sessionId if provided in the request 16 | rawTextResponse: string; // The raw text output from the LLM 17 | messages: CoreMessage[]; // The final message history including the agent's response 18 | } -------------------------------------------------------------------------------- /src/main.ts: -------------------------------------------------------------------------------- 1 | import { startMcpServer } from './mcp/server.js'; 2 | import { fileURLToPath } from 'url'; 3 | 4 | async function main() { 5 | try { 6 | await startMcpServer(); 7 | // Log process waiting message to stderr 8 | console.error("MCP Server setup complete. Process waiting indefinitely for transport closure or signals."); 9 | await new Promise(() => {}); 10 | } catch (error) { 11 | console.error("Fatal error starting the application:", error); 12 | process.exit(1); 13 | } 14 | } 15 | 16 | const scriptPath = fileURLToPath(import.meta.url); 17 | const isDirectRun = process.argv[1] && (process.argv[1] === scriptPath || process.argv[1].endsWith('/dist/main.js')); 18 | 19 | if (isDirectRun) { 20 | main(); 21 | } -------------------------------------------------------------------------------- /src/mcp/server.ts: -------------------------------------------------------------------------------- 1 | import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; 2 | import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; 3 | import { ZodRawShape } from "zod"; 4 | import { VectorStoreManager } from '../core/indexing/VectorStoreManager.js'; 5 | import { AgentService } from '../core/query/AgentService.js'; 6 | import { DirectQueryService } from '../core/query/DirectQueryService.js'; 7 | import { createAllMcpTools } from './tools/index.js'; 8 | import { config } from "../config/index.js"; 9 | 10 | type McpToolExecuteResult = Promise<{ 11 | content: { type: "text"; text: string; }[]; 12 | isError?: boolean; 13 | }>; 14 | 15 | 16 | export async function startMcpServer(): Promise { 17 | // Log server start to stderr 18 | console.error('Starting Vercel AI Docs MCP Server...'); 19 | 20 | const vectorStoreManager = new VectorStoreManager(); 21 | const indexLoaded = await vectorStoreManager.loadIndex(); // Logs internally via console.error 22 | 23 | if (!indexLoaded) { 24 | console.error(`FAISS index not found or failed to load from ${config.indexDir}.`); 25 | console.error('Please run "npm run build:index" to create the index before starting the server.'); 26 | process.exit(1); 27 | } 28 | 29 | const agentService = new AgentService(vectorStoreManager); 30 | const directQueryService = new DirectQueryService(vectorStoreManager); 31 | // Log success to stderr 32 | console.error('Query services initialized.'); 33 | 34 | const server = new McpServer({ 35 | name: "vercel-ai-docs-search", 36 | version: "1.0.0", 37 | capabilities: { 38 | tools: { listChanged: false }, 39 | } 40 | }); 41 | 42 | const mcpTools = createAllMcpTools(agentService, directQueryService); 43 | 44 | server.tool( 45 | mcpTools.agentQueryTool.name, 46 | mcpTools.agentQueryTool.description, 47 | mcpTools.agentQueryTool.inputSchema.shape as ZodRawShape, 48 | (args: { [x: string]: any }): McpToolExecuteResult => { 49 | return mcpTools.agentQueryTool.execute(args as { query: string; sessionId: string; }); 50 | } 51 | ); 52 | // Log registration to stderr 53 | console.error(`Registered MCP tool: ${mcpTools.agentQueryTool.name}`); 54 | 55 | server.tool( 56 | mcpTools.directQueryTool.name, 57 | mcpTools.directQueryTool.description, 58 | mcpTools.directQueryTool.inputSchema.shape as ZodRawShape, 59 | (args: { [x: string]: any }): McpToolExecuteResult => { 60 | return mcpTools.directQueryTool.execute(args as { query: string; limit?: number; }); 61 | } 62 | ); 63 | // Log registration to stderr 64 | console.error(`Registered MCP tool: ${mcpTools.directQueryTool.name}`); 65 | 66 | server.tool( 67 | mcpTools.clearMemoryTool.name, 68 | mcpTools.clearMemoryTool.description, 69 | mcpTools.clearMemoryTool.inputSchema.shape as ZodRawShape, 70 | (args: { [x: string]: any }): McpToolExecuteResult => { 71 | return mcpTools.clearMemoryTool.execute(args as { sessionId?: string; }); 72 | } 73 | ); 74 | // Log registration to stderr 75 | console.error(`Registered MCP tool: ${mcpTools.clearMemoryTool.name}`); 76 | 77 | const transport = new StdioServerTransport(); 78 | try { 79 | // Log connection attempt to stderr 80 | console.error("Attempting to connect transport..."); 81 | await server.connect(transport); 82 | // Log connection success to stderr 83 | console.error("MCP server transport connected successfully via stdio. Ready for requests."); 84 | } catch (error) { 85 | console.error("Failed to connect MCP server transport:", error); 86 | process.exit(1); 87 | } 88 | 89 | process.on('SIGINT', async () => { 90 | // Log shutdown to stderr 91 | console.error("\nReceived SIGINT, shutting down MCP server..."); 92 | await server.close(); 93 | console.error("MCP server closed."); 94 | process.exit(0); 95 | }); 96 | 97 | process.on('SIGTERM', async () => { 98 | // Log shutdown to stderr 99 | console.error("\nReceived SIGTERM, shutting down MCP server..."); 100 | await server.close(); 101 | console.error("MCP server closed."); 102 | process.exit(0); 103 | }); 104 | 105 | process.on('uncaughtException', (error, origin) => { 106 | console.error(`Uncaught Exception at: ${origin}`, error); 107 | process.exit(1); 108 | }); 109 | 110 | process.on('unhandledRejection', (reason, promise) => { 111 | console.error('Unhandled Rejection at:', promise, 'reason:', reason); 112 | process.exit(1); 113 | }); 114 | } -------------------------------------------------------------------------------- /src/mcp/tools/agentQueryTool.ts: -------------------------------------------------------------------------------- 1 | import { z } from 'zod'; 2 | import { AgentService } from '../../core/query/AgentService.js'; 3 | 4 | type McpTextContent = { type: "text"; text: string; }; 5 | 6 | export function createAgentQueryTool(agentService: AgentService) { 7 | return { 8 | name: "agent-query", 9 | description: "Query the Vercel AI SDK documentation using an AI agent that can search and synthesize information. Requires a session ID for conversation history.", 10 | inputSchema: z.object({ 11 | query: z.string().describe("The question or topic to ask the agent about the Vercel AI SDK."), 12 | sessionId: z.string().uuid().describe("Required UUID session identifier for maintaining conversation history."), 13 | }), 14 | execute: async ({ query, sessionId }: { query: string; sessionId: string; }) => { 15 | try { 16 | // Log tool execution start to stderr 17 | console.error(`[MCP Tool] Received agent-query for session ${sessionId}`); 18 | const agentResponse = await agentService.generateAgentResponse(query, sessionId); 19 | 20 | const responseContent: McpTextContent = { 21 | type: "text" as const, 22 | text: JSON.stringify({ 23 | answer: agentResponse.answer, 24 | toolInteractions: agentResponse.toolCalls.map((tc, idx) => ({ 25 | call: idx + 1, 26 | tool: tc.tool, 27 | query: tc.query, 28 | resultsSummary: agentResponse.toolResults.find(tr => tr.tool === tc.tool && tr.timestamp >= tc.timestamp) 29 | ?.documents?.map(d => ({ title: d.title, url: d.url })) ?? 'No documents found or error' 30 | })), 31 | sessionId: agentResponse.sessionId, 32 | }, null, 2) 33 | }; 34 | 35 | return { content: [responseContent] }; 36 | } catch (error) { 37 | console.error(`[MCP Tool] Error processing agent-query for session ${sessionId}:`, error); 38 | const errorMessage = error instanceof Error ? error.message : String(error); 39 | const errorContent: McpTextContent = { 40 | type: "text" as const, 41 | text: JSON.stringify({ error: `Agent query failed: ${errorMessage}` }) 42 | }; 43 | return { content: [errorContent], isError: true }; 44 | } 45 | } 46 | }; 47 | } -------------------------------------------------------------------------------- /src/mcp/tools/clearMemoryTool.ts: -------------------------------------------------------------------------------- 1 | import { z } from 'zod'; 2 | import { AgentService } from '../../core/query/AgentService.js'; 3 | 4 | type McpTextContent = { type: "text"; text: string; }; 5 | 6 | export function createClearMemoryTool(agentService: AgentService) { 7 | return { 8 | name: "clear-memory", 9 | description: "Clears the conversation memory for a specific session or all sessions.", 10 | inputSchema: z.object({ 11 | sessionId: z.string().uuid().optional().describe("The UUID session identifier to clear. If omitted, clears memory for all sessions."), 12 | }), 13 | execute: async ({ sessionId }: { sessionId?: string; }) => { 14 | try { 15 | // Log tool execution start to stderr 16 | console.error(`[MCP Tool] Received clear-memory request for session: ${sessionId || 'ALL'}`); 17 | await agentService.clearSession(sessionId); 18 | const message = sessionId 19 | ? `Successfully cleared memory for session ${sessionId}.` 20 | : 'Successfully cleared memory for all sessions.'; 21 | const responseContent: McpTextContent = { 22 | type: "text" as const, 23 | text: JSON.stringify({ success: true, message: message }) 24 | }; 25 | return { content: [responseContent] }; 26 | } catch (error) { 27 | console.error(`[MCP Tool] Error processing clear-memory request for ${sessionId || 'ALL'}:`, error); 28 | const errorMessage = error instanceof Error ? error.message : String(error); 29 | const errorContent: McpTextContent = { 30 | type: "text" as const, 31 | text: JSON.stringify({ success: false, error: `Failed to clear memory: ${errorMessage}` }) 32 | }; 33 | return { content: [errorContent], isError: true }; 34 | } 35 | } 36 | }; 37 | } -------------------------------------------------------------------------------- /src/mcp/tools/directQueryTool.ts: -------------------------------------------------------------------------------- 1 | import { z } from 'zod'; 2 | import { DirectQueryService } from '../../core/query/DirectQueryService.js'; 3 | import { config } from '../../config/index.js'; 4 | 5 | type McpTextContent = { type: "text"; text: string; }; 6 | 7 | export function createDirectQueryTool(directQueryService: DirectQueryService) { 8 | return { 9 | name: "direct-query", 10 | description: "Perform a direct similarity search against the Vercel AI SDK documentation index.", 11 | inputSchema: z.object({ 12 | query: z.string().describe("The search query."), 13 | limit: z.number().optional().default(config.directQueryLimitDefault).describe("Maximum number of results to return."), 14 | }), 15 | execute: async ({ query, limit }: { query: string; limit?: number; }) => { 16 | try { 17 | // Log tool execution start to stderr 18 | console.error(`[MCP Tool] Received direct-query: "${query}" (limit: ${limit})`); 19 | const results = await directQueryService.performSearch(query, limit); 20 | const responseContent: McpTextContent = { 21 | type: "text" as const, 22 | text: JSON.stringify(results, null, 2) 23 | }; 24 | return { content: [responseContent] }; 25 | } catch (error) { 26 | console.error(`[MCP Tool] Error processing direct-query "${query}":`, error); 27 | const errorMessage = error instanceof Error ? error.message : String(error); 28 | const errorContent: McpTextContent = { 29 | type: "text" as const, 30 | text: JSON.stringify({ error: `Direct query failed: ${errorMessage}` }) 31 | }; 32 | return { content: [errorContent], isError: true }; 33 | } 34 | } 35 | }; 36 | } -------------------------------------------------------------------------------- /src/mcp/tools/index.ts: -------------------------------------------------------------------------------- 1 | import { AgentService } from '../../core/query/AgentService.js'; 2 | import { DirectQueryService } from '../../core/query/DirectQueryService.js'; 3 | import { createAgentQueryTool } from './agentQueryTool.js'; 4 | import { createClearMemoryTool } from './clearMemoryTool.js'; 5 | import { createDirectQueryTool } from './directQueryTool.js'; 6 | 7 | // Function to create all tools, injecting dependencies 8 | export function createAllMcpTools(agentService: AgentService, directQueryService: DirectQueryService) { 9 | return { 10 | agentQueryTool: createAgentQueryTool(agentService), 11 | directQueryTool: createDirectQueryTool(directQueryService), 12 | clearMemoryTool: createClearMemoryTool(agentService) 13 | }; 14 | } -------------------------------------------------------------------------------- /src/scripts/buildIndex.ts: -------------------------------------------------------------------------------- 1 | import { config } from '../config/index.js'; 2 | import { DocumentFetcher } from '../core/indexing/DocumentFetcher.js'; // Added .js extension 3 | import { VectorStoreManager } from '../core/indexing/VectorStoreManager.js'; // Added .js extension 4 | import { FileUtils } from '../utils/fileUtils.js'; 5 | import { fileURLToPath } from 'url'; 6 | import { FileProcessResult } from '../core/indexing/types.js'; // Import the type 7 | 8 | async function runIndexing(): Promise { 9 | console.log('Starting Vercel AI SDK Documentation Indexing Process...'); 10 | const startTime = Date.now(); 11 | 12 | const forceUpdate = process.argv.includes('--force'); 13 | if (forceUpdate) { 14 | console.log("Force update requested. Cache will be ignored and index rebuilt."); 15 | } 16 | 17 | const fetcher = new DocumentFetcher(); 18 | let processedFiles: FileProcessResult[]; // Add type annotation 19 | try { 20 | processedFiles = await fetcher.fetchAndProcessDocuments(forceUpdate); 21 | if (processedFiles.length === 0 && !forceUpdate) { 22 | console.log("No documents were updated based on the cache. Index remains unchanged."); 23 | const endTime = Date.now(); 24 | console.log(`Indexing process finished in ${(endTime - startTime) / 1000} seconds.`); 25 | return; 26 | } 27 | } catch (error) { 28 | console.error("Fatal error during document fetching/processing:", error); 29 | process.exit(1); 30 | } 31 | 32 | const vectorStoreManager = new VectorStoreManager(); 33 | const shouldRebuildIndex = forceUpdate || processedFiles.some((f: FileProcessResult) => f.isNew || f.modified); 34 | 35 | if (shouldRebuildIndex) { 36 | console.log("Changes detected or force update requested. Rebuilding FAISS index..."); 37 | try { 38 | console.log(`Cleaning index directory: ${config.indexDir}`); 39 | await FileUtils.removeDirectory(config.indexDir); 40 | await FileUtils.ensureDirectoryExists(config.indexDir); 41 | 42 | const documents = await vectorStoreManager.loadJsonDocuments(config.docsDir); 43 | if (documents.length > 0) { 44 | await vectorStoreManager.createIndex(documents); 45 | console.log("FAISS index rebuild complete."); 46 | } else { 47 | console.warn("No valid documents found to build the index after cleaning."); 48 | } 49 | } catch (error) { 50 | console.error("Fatal error during index creation:", error); 51 | process.exit(1); 52 | } 53 | } else { 54 | console.log("No significant changes detected. Index rebuild skipped."); 55 | if (!await vectorStoreManager.loadIndex()) { 56 | console.error("Index rebuild was skipped, but the existing index could not be loaded. Please check index files or run with --force."); 57 | } 58 | } 59 | 60 | const endTime = Date.now(); 61 | console.log(`Indexing process finished successfully in ${(endTime - startTime) / 1000} seconds.`); 62 | } 63 | 64 | const scriptPath = fileURLToPath(import.meta.url); 65 | const isDirectRun = process.argv[1] === scriptPath; 66 | 67 | if (isDirectRun) { 68 | runIndexing().catch(error => { 69 | console.error('Unhandled error during indexing:', error); 70 | process.exit(1); 71 | }); 72 | } -------------------------------------------------------------------------------- /src/utils/fileUtils.ts: -------------------------------------------------------------------------------- 1 | import * as fs from 'fs'; 2 | import * as fsp from 'fs/promises'; 3 | import * as path from 'path'; 4 | import { createHash } from 'crypto'; 5 | 6 | export class FileUtils { 7 | static async fileExists(filePath: string): Promise { 8 | try { 9 | await fsp.access(filePath, fs.constants.F_OK); 10 | return true; 11 | } catch { 12 | return false; 13 | } 14 | } 15 | 16 | static async directoryExists(dirPath: string): Promise { 17 | try { 18 | const stats = await fsp.stat(dirPath); 19 | return stats.isDirectory(); 20 | } catch (error: any) { 21 | if (error.code === 'ENOENT') { 22 | return false; 23 | } 24 | throw error; 25 | } 26 | } 27 | 28 | static async ensureDirectoryExists(dirPath: string): Promise { 29 | try { 30 | await fsp.mkdir(dirPath, { recursive: true }); 31 | } catch (error: any) { 32 | if (error.code !== 'EEXIST') { 33 | throw error; 34 | } 35 | } 36 | } 37 | 38 | static async readFile(filePath: string): Promise { 39 | return fsp.readFile(filePath, 'utf-8'); 40 | } 41 | 42 | static async writeFile(filePath: string, content: string): Promise { 43 | await this.ensureDirectoryExists(path.dirname(filePath)); 44 | await fsp.writeFile(filePath, content, 'utf-8'); 45 | } 46 | 47 | static async listFiles(directory: string, extension?: string): Promise { 48 | try { 49 | const files = await fsp.readdir(directory); 50 | if (extension) { 51 | return files.filter(file => file.endsWith(extension)); 52 | } 53 | return files; 54 | } catch (error: any) { 55 | if (error.code === 'ENOENT') { 56 | return []; // Directory doesn't exist, return empty list 57 | } 58 | throw error; 59 | } 60 | } 61 | 62 | static async removeDirectory(dirPath: string): Promise { 63 | await fsp.rm(dirPath, { recursive: true, force: true }); 64 | } 65 | 66 | static async generateHash(content: string, algorithm: string = 'sha256'): Promise { 67 | return createHash(algorithm).update(content).digest('hex'); 68 | } 69 | } -------------------------------------------------------------------------------- /src/utils/helpers.ts: -------------------------------------------------------------------------------- 1 | import { createHash } from 'crypto'; 2 | 3 | export function generateHash(content: string, algorithm: string = 'sha256'): string { 4 | return createHash(algorithm) 5 | .update(content) 6 | .digest('hex'); 7 | } 8 | 9 | export function formatDuration(seconds: number): string { 10 | const hours = Math.floor(seconds / 3600); 11 | const minutes = Math.floor((seconds % 3600) / 60); 12 | const secs = Math.floor(seconds % 60); 13 | 14 | return [ 15 | hours.toString().padStart(2, '0'), 16 | minutes.toString().padStart(2, '0'), 17 | secs.toString().padStart(2, '0') 18 | ].join(':'); 19 | } 20 | 21 | export function urlToFilename(url: string): string { 22 | let filename = url.replace(/^https?:\/\/sdk\.vercel\.ai\/?/, ''); 23 | filename = filename.replace(/[\/?<>\\:*|"]/g, '-'); 24 | filename = filename.replace(/-+/g, '-'); 25 | filename = filename.replace(/^-|-$/g, ''); 26 | 27 | if (filename.length > 100) { 28 | const hash = createHash('md5').update(url).digest('hex').substring(0, 8); 29 | filename = filename.substring(0, 92) + '-' + hash; 30 | } 31 | 32 | if (!filename || filename === '-') { 33 | filename = 'index'; 34 | } 35 | 36 | return filename + '.json'; 37 | } -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2022", 4 | "module": "NodeNext", 5 | "moduleResolution": "NodeNext", 6 | "outDir": "./dist", 7 | "rootDir": "./src", 8 | "strict": true, 9 | "esModuleInterop": true, 10 | "skipLibCheck": true, 11 | "forceConsistentCasingInFileNames": true, 12 | "resolveJsonModule": true 13 | }, 14 | "include": ["src/**/*"], 15 | "exclude": ["node_modules", "dist", "files"] 16 | } --------------------------------------------------------------------------------