├── .prettierrc ├── .gitignore ├── tsconfig.json ├── jest.config.js ├── .github └── workflows │ ├── ci.yml │ └── publish.yml ├── smithery.yaml ├── .eslintrc.json ├── LICENSE ├── Dockerfile ├── jest.setup.ts ├── package.json ├── CHANGELOG.md ├── src ├── index.test.ts └── index.ts └── README.md /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "semi": true, 3 | "trailingComma": "es5", 4 | "singleQuote": true, 5 | "printWidth": 80, 6 | "tabWidth": 2, 7 | "useTabs": false 8 | } 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Dependencies 2 | node_modules/ 3 | 4 | # Build 5 | dist/ 6 | 7 | # Logs 8 | logs 9 | *.log 10 | npm-debug.log* 11 | 12 | # Environment 13 | .env 14 | .env.local 15 | .env.*.local 16 | claude_desktop_config.json 17 | 18 | # IDE 19 | .idea/ 20 | .vscode/ 21 | *.swp 22 | *.swo 23 | .cursorrules.md 24 | IMPLEMENTATION.md 25 | v1.2.md 26 | 27 | # OS 28 | .DS_Store 29 | Thumbs.db -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2022", 4 | "module": "NodeNext", 5 | "moduleResolution": "NodeNext", 6 | "outDir": "./dist", 7 | "rootDir": "./src", 8 | "strict": true, 9 | "esModuleInterop": true, 10 | "skipLibCheck": true, 11 | "forceConsistentCasingInFileNames": true 12 | }, 13 | "include": ["src/**/*"], 14 | "exclude": ["node_modules", "dist", "tests"] 15 | } 16 | -------------------------------------------------------------------------------- /jest.config.js: -------------------------------------------------------------------------------- 1 | export default { 2 | preset: 'ts-jest/presets/default-esm', 3 | testEnvironment: 'node', 4 | extensionsToTreatAsEsm: ['.ts'], 5 | transform: { 6 | '^.+\\.tsx?$': [ 7 | 'ts-jest', 8 | { 9 | useESM: true, 10 | }, 11 | ], 12 | }, 13 | moduleNameMapper: { 14 | '^(\\.{1,2}/.*)\\.js$': '$1', 15 | }, 16 | testMatch: ['**/*.test.ts'], 17 | setupFilesAfterEnv: ['/jest.setup.ts'], 18 | }; 19 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - uses: actions/checkout@v3 15 | 16 | - name: Use Node.js 17 | uses: actions/setup-node@v3 18 | with: 19 | node-version: '20.x' 20 | cache: 'npm' 21 | 22 | - name: Install dependencies 23 | run: npm ci 24 | 25 | - name: Build 26 | run: npm run build 27 | 28 | - name: Lint 29 | run: npm run lint 30 | 31 | - name: Test 32 | run: npm test 33 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish Package 2 | 3 | on: 4 | release: 5 | types: [created] 6 | workflow_dispatch: 7 | 8 | jobs: 9 | publish: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - uses: actions/checkout@v3 14 | 15 | - name: Use Node.js 16 | uses: actions/setup-node@v3 17 | with: 18 | node-version: '20.x' 19 | registry-url: 'https://registry.npmjs.org' 20 | 21 | - name: Install dependencies 22 | run: npm ci 23 | 24 | - name: Build 25 | run: npm run build 26 | 27 | - name: Publish to NPM 28 | run: npm publish 29 | env: 30 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} 31 | -------------------------------------------------------------------------------- /smithery.yaml: -------------------------------------------------------------------------------- 1 | # Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml 2 | 3 | startCommand: 4 | type: stdio 5 | configSchema: 6 | # JSON Schema defining the configuration options for the MCP. 7 | type: object 8 | required: 9 | - fireCrawlApiKey 10 | properties: 11 | fireCrawlApiKey: 12 | type: string 13 | description: Your FireCrawl API key. Required for cloud API usage. 14 | fireCrawlApiUrl: 15 | type: string 16 | description: Custom API endpoint for self-hosted instances. If provided, API key 17 | becomes optional. 18 | commandFunction: 19 | # A function that produces the CLI command to start the MCP on stdio. 20 | |- 21 | (config) => ({ command: 'node', args: ['dist/src/index.js'], env: { FIRECRAWL_API_KEY: config.fireCrawlApiKey, FIRECRAWL_API_URL: config.fireCrawlApiUrl || '' } }) 22 | -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "parser": "@typescript-eslint/parser", 3 | "plugins": ["@typescript-eslint"], 4 | "extends": [ 5 | "eslint:recommended", 6 | "plugin:@typescript-eslint/recommended", 7 | "prettier" 8 | ], 9 | "env": { 10 | "node": true, 11 | "es2022": true 12 | }, 13 | "parserOptions": { 14 | "ecmaVersion": 2022, 15 | "sourceType": "module", 16 | "project": "./tsconfig.json" 17 | }, 18 | "rules": { 19 | "@typescript-eslint/explicit-function-return-type": "off", 20 | "@typescript-eslint/no-explicit-any": "off", 21 | "@typescript-eslint/no-unused-vars": [ 22 | "error", 23 | { "argsIgnorePattern": "^_" } 24 | ] 25 | }, 26 | "overrides": [ 27 | { 28 | "files": ["**/*.test.ts"], 29 | "rules": { 30 | "@typescript-eslint/no-unused-vars": "off", 31 | "@typescript-eslint/no-explicit-any": "off" 32 | } 33 | } 34 | ] 35 | } 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 vrknetha 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Generated by https://smithery.ai. See: https://smithery.ai/docs/config#dockerfile 2 | # Use a Node.js image as the base for building the application 3 | FROM node:18-alpine AS builder 4 | 5 | # Set the working directory inside the container 6 | WORKDIR /app 7 | 8 | # Copy package.json and package-lock.json to install dependencies 9 | COPY package.json package-lock.json ./ 10 | 11 | # Install dependencies (ignoring scripts to prevent running the prepare script) 12 | RUN npm install --ignore-scripts 13 | 14 | # Copy the rest of the application source code 15 | COPY . . 16 | 17 | # Build the application using TypeScript 18 | RUN npm run build 19 | 20 | # Use a smaller Node.js image for the final image 21 | FROM node:18-slim AS release 22 | 23 | # Set the working directory inside the container 24 | WORKDIR /app 25 | 26 | # Copy the built application from the builder stage 27 | COPY --from=builder /app/dist /app/dist 28 | COPY --from=builder /app/package.json /app/package.json 29 | COPY --from=builder /app/package-lock.json /app/package-lock.json 30 | 31 | # Install only production dependencies 32 | RUN npm ci --omit=dev 33 | 34 | # Set environment variables for API key and custom API URL if needed 35 | ENV FIRECRAWL_API_KEY=your-api-key 36 | ENV FIRECRAWL_API_URL=https://firecrawl.your-domain.com 37 | 38 | # Specify the command to run the application 39 | ENTRYPOINT ["node", "dist/src/index.js"] 40 | -------------------------------------------------------------------------------- /jest.setup.ts: -------------------------------------------------------------------------------- 1 | import { jest } from '@jest/globals'; 2 | import FirecrawlApp from '@mendable/firecrawl-js'; 3 | import type { 4 | SearchResponse, 5 | BatchScrapeResponse, 6 | BatchScrapeStatusResponse, 7 | FirecrawlDocument, 8 | } from '@mendable/firecrawl-js'; 9 | 10 | // Set test timeout 11 | jest.setTimeout(30000); 12 | 13 | // Create mock responses 14 | const mockSearchResponse: SearchResponse = { 15 | success: true, 16 | data: [ 17 | { 18 | url: 'https://example.com', 19 | title: 'Test Page', 20 | description: 'Test Description', 21 | markdown: '# Test Content', 22 | actions: null as never, 23 | }, 24 | ] as FirecrawlDocument[], 25 | }; 26 | 27 | const mockBatchScrapeResponse: BatchScrapeResponse = { 28 | success: true, 29 | id: 'test-batch-id', 30 | }; 31 | 32 | const mockBatchStatusResponse: BatchScrapeStatusResponse = { 33 | success: true, 34 | status: 'completed', 35 | completed: 1, 36 | total: 1, 37 | creditsUsed: 1, 38 | expiresAt: new Date(), 39 | data: [ 40 | { 41 | url: 'https://example.com', 42 | title: 'Test Page', 43 | description: 'Test Description', 44 | markdown: '# Test Content', 45 | actions: null as never, 46 | }, 47 | ] as FirecrawlDocument[], 48 | }; 49 | 50 | // Create mock instance methods 51 | const mockSearch = jest.fn().mockImplementation(async () => mockSearchResponse); 52 | const mockAsyncBatchScrapeUrls = jest 53 | .fn() 54 | .mockImplementation(async () => mockBatchScrapeResponse); 55 | const mockCheckBatchScrapeStatus = jest 56 | .fn() 57 | .mockImplementation(async () => mockBatchStatusResponse); 58 | 59 | // Create mock instance 60 | const mockInstance = { 61 | apiKey: 'test-api-key', 62 | apiUrl: 'test-api-url', 63 | search: mockSearch, 64 | asyncBatchScrapeUrls: mockAsyncBatchScrapeUrls, 65 | checkBatchScrapeStatus: mockCheckBatchScrapeStatus, 66 | }; 67 | 68 | // Mock the module 69 | jest.mock('@mendable/firecrawl-js', () => ({ 70 | __esModule: true, 71 | default: jest.fn().mockImplementation(() => mockInstance), 72 | })); 73 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "firecrawl-mcp", 3 | "version": "1.5.0", 4 | "description": "MCP server for FireCrawl web scraping integration. Supports both cloud and self-hosted instances. Features include web scraping, batch processing, structured data extraction, and LLM-powered content analysis.", 5 | "type": "module", 6 | "bin": { 7 | "firecrawl-mcp": "dist/index.js" 8 | }, 9 | "files": [ 10 | "dist" 11 | ], 12 | "publishConfig": { 13 | "access": "public" 14 | }, 15 | "scripts": { 16 | "build": "tsc && node -e \"require('fs').chmodSync('dist/index.js', '755')\"", 17 | "test": "node --experimental-vm-modules node_modules/jest/bin/jest.js", 18 | "start": "node dist/index.js", 19 | "lint": "eslint src/**/*.ts", 20 | "lint:fix": "eslint src/**/*.ts --fix", 21 | "format": "prettier --write .", 22 | "prepare": "npm run build", 23 | "publish": "npm run build && npm publish" 24 | }, 25 | "license": "ISC", 26 | "dependencies": { 27 | "@mendable/firecrawl-js": "^1.19.0", 28 | "@modelcontextprotocol/sdk": "^1.4.1", 29 | "dotenv": "^16.4.7", 30 | "p-queue": "^8.0.1", 31 | "shx": "^0.3.4" 32 | }, 33 | "devDependencies": { 34 | "@jest/globals": "^29.7.0", 35 | "@types/jest": "^29.5.14", 36 | "@types/node": "^20.10.5", 37 | "@typescript-eslint/eslint-plugin": "^7.0.0", 38 | "@typescript-eslint/parser": "^7.0.0", 39 | "eslint": "^8.56.0", 40 | "eslint-config-prettier": "^9.1.0", 41 | "jest": "^29.7.0", 42 | "jest-mock-extended": "^4.0.0-beta1", 43 | "prettier": "^3.1.1", 44 | "ts-jest": "^29.1.1", 45 | "typescript": "^5.3.3" 46 | }, 47 | "engines": { 48 | "node": ">=18.0.0" 49 | }, 50 | "keywords": [ 51 | "mcp", 52 | "firecrawl", 53 | "web-scraping", 54 | "crawler", 55 | "content-extraction" 56 | ], 57 | "repository": { 58 | "type": "git", 59 | "url": "git+https://github.com/mendableai/firecrawl-mcp-server.git" 60 | }, 61 | "author": "vrknetha", 62 | "bugs": { 63 | "url": "https://github.com/mendableai/firecrawl-mcp-server/issues" 64 | }, 65 | "homepage": "https://github.com/mendableai/firecrawl-mcp-server#readme" 66 | } 67 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## [1.2.4] - 2024-02-05 4 | 5 | ### Added 6 | 7 | - Environment variable support for all configuration options 8 | - Detailed configuration documentation in README 9 | 10 | ### Changed 11 | 12 | - Made retry and credit monitoring settings configurable via environment variables: 13 | - `FIRECRAWL_RETRY_MAX_ATTEMPTS` 14 | - `FIRECRAWL_RETRY_INITIAL_DELAY` 15 | - `FIRECRAWL_RETRY_MAX_DELAY` 16 | - `FIRECRAWL_RETRY_BACKOFF_FACTOR` 17 | - `FIRECRAWL_CREDIT_WARNING_THRESHOLD` 18 | - `FIRECRAWL_CREDIT_CRITICAL_THRESHOLD` 19 | - Enhanced configuration examples with detailed comments and use cases 20 | - Improved documentation for retry behavior and credit monitoring 21 | 22 | ### Documentation 23 | 24 | - Added comprehensive configuration examples for both cloud and self-hosted setups 25 | - Added detailed explanations of retry behavior with timing examples 26 | - Added credit monitoring threshold explanations 27 | - Updated Claude Desktop configuration documentation 28 | 29 | ## [1.2.3] - 2024-02-05 30 | 31 | ### Changed 32 | 33 | - Removed redundant batch configuration to rely on FireCrawl library's built-in functionality 34 | - Simplified batch processing logic by leveraging library's native implementation 35 | - Optimized parallel processing and rate limiting handling 36 | - Reduced code complexity and potential configuration conflicts 37 | 38 | ### Technical 39 | 40 | - Removed custom `CONFIG.batch` settings (`maxParallelOperations` and `delayBetweenRequests`) 41 | - Simplified batch operation processing to use library's built-in batch handling 42 | - Updated server startup logging to remove batch configuration references 43 | - Maintained credit usage tracking and error handling functionality 44 | 45 | ## [1.2.2] - 2025-02-05 46 | 47 | ### Fixed 48 | 49 | - Resolved unused interface warnings for ExtractParams and ExtractResponse 50 | - Improved type safety in extract operations 51 | - Fixed type casting issues in API responses 52 | 53 | ### Changed 54 | 55 | - Improved type guards for better type inference 56 | - Enhanced error messages for configuration validation 57 | 58 | ## [1.2.0] - 2024-01-03 59 | 60 | ### Added 61 | 62 | - Implemented automatic retries with exponential backoff for rate limits 63 | - Added queue system for batch operations with parallel processing 64 | - Integrated credit usage monitoring with warning thresholds 65 | - Enhanced content validation with configurable criteria 66 | - Added comprehensive logging system for operations and errors 67 | - New search tool (`firecrawl_search`) for web search with content extraction 68 | - Support for self-hosted FireCrawl instances via optional API URL configuration 69 | - New `FIRECRAWL_API_URL` environment variable 70 | - Automatic fallback to cloud API 71 | - Improved error messages for self-hosted instances 72 | 73 | ### Changed 74 | 75 | - Improved error handling for HTTP errors including 404s 76 | - Enhanced URL validation before scraping 77 | - Updated configuration with new retry and batch processing options 78 | - Optimized rate limiting with automatic backoff strategy 79 | - Improved documentation with new features and examples 80 | - Added detailed self-hosted configuration guide 81 | 82 | ### Fixed 83 | 84 | - Rate limit handling in batch operations 85 | - Error response formatting 86 | - Type definitions for response handlers 87 | - Test suite mock responses 88 | - Error handling for invalid search queries 89 | - API configuration validation 90 | 91 | ## [1.0.1] - 2023-12-03 92 | 93 | ### Added 94 | 95 | - Initial release with basic scraping functionality 96 | - Support for batch scraping 97 | - URL discovery and crawling capabilities 98 | - Rate limiting implementation 99 | -------------------------------------------------------------------------------- /src/index.test.ts: -------------------------------------------------------------------------------- 1 | import { Server } from '@modelcontextprotocol/sdk/server/index.js'; 2 | import { CallToolRequestSchema } from '@modelcontextprotocol/sdk/types.js'; 3 | import FirecrawlApp from '@mendable/firecrawl-js'; 4 | import type { 5 | SearchResponse, 6 | BatchScrapeResponse, 7 | BatchScrapeStatusResponse, 8 | CrawlResponse, 9 | CrawlStatusResponse, 10 | ScrapeResponse, 11 | FirecrawlDocument, 12 | SearchParams, 13 | } from '@mendable/firecrawl-js'; 14 | import { 15 | describe, 16 | expect, 17 | jest, 18 | test, 19 | beforeEach, 20 | afterEach, 21 | } from '@jest/globals'; 22 | import { mock, MockProxy } from 'jest-mock-extended'; 23 | 24 | // Mock FirecrawlApp 25 | jest.mock('@mendable/firecrawl-js'); 26 | 27 | // Test interfaces 28 | interface RequestParams { 29 | method: string; 30 | params: { 31 | name: string; 32 | arguments?: Record; 33 | }; 34 | } 35 | 36 | interface BatchScrapeArgs { 37 | urls: string[]; 38 | options?: { 39 | formats?: string[]; 40 | [key: string]: any; 41 | }; 42 | } 43 | 44 | interface StatusCheckArgs { 45 | id: string; 46 | } 47 | 48 | interface SearchArgs { 49 | query: string; 50 | scrapeOptions?: { 51 | formats?: string[]; 52 | onlyMainContent?: boolean; 53 | }; 54 | } 55 | 56 | interface ScrapeArgs { 57 | url: string; 58 | formats?: string[]; 59 | onlyMainContent?: boolean; 60 | } 61 | 62 | interface CrawlArgs { 63 | url: string; 64 | maxDepth?: number; 65 | limit?: number; 66 | } 67 | 68 | // Mock client interface 69 | interface MockFirecrawlClient { 70 | scrapeUrl(url: string, options?: any): Promise; 71 | search(query: string, params?: SearchParams): Promise; 72 | asyncBatchScrapeUrls( 73 | urls: string[], 74 | options?: any 75 | ): Promise; 76 | checkBatchScrapeStatus(id: string): Promise; 77 | asyncCrawlUrl(url: string, options?: any): Promise; 78 | checkCrawlStatus(id: string): Promise; 79 | mapUrl(url: string, options?: any): Promise<{ links: string[] }>; 80 | } 81 | 82 | describe('FireCrawl Tool Tests', () => { 83 | let mockClient: MockProxy; 84 | let requestHandler: (request: RequestParams) => Promise; 85 | 86 | beforeEach(() => { 87 | jest.clearAllMocks(); 88 | mockClient = mock(); 89 | 90 | // Set up mock implementations 91 | const mockInstance = new FirecrawlApp({ apiKey: 'test' }); 92 | Object.assign(mockInstance, mockClient); 93 | 94 | // Create request handler 95 | requestHandler = async (request: RequestParams) => { 96 | const { name, arguments: args } = request.params; 97 | if (!args) { 98 | throw new Error('No arguments provided'); 99 | } 100 | return handleRequest(name, args, mockClient); 101 | }; 102 | }); 103 | 104 | afterEach(() => { 105 | jest.clearAllMocks(); 106 | }); 107 | 108 | // Test scrape functionality 109 | test('should handle scrape request', async () => { 110 | const url = 'https://example.com'; 111 | const options = { formats: ['markdown'] }; 112 | 113 | const mockResponse: ScrapeResponse = { 114 | success: true, 115 | markdown: '# Test Content', 116 | html: undefined, 117 | rawHtml: undefined, 118 | url: 'https://example.com', 119 | actions: undefined as never, 120 | }; 121 | 122 | mockClient.scrapeUrl.mockResolvedValueOnce(mockResponse); 123 | 124 | const response = await requestHandler({ 125 | method: 'call_tool', 126 | params: { 127 | name: 'firecrawl_scrape', 128 | arguments: { url, ...options }, 129 | }, 130 | }); 131 | 132 | expect(response).toEqual({ 133 | content: [{ type: 'text', text: '# Test Content' }], 134 | isError: false, 135 | }); 136 | expect(mockClient.scrapeUrl).toHaveBeenCalledWith(url, { 137 | formats: ['markdown'], 138 | url, 139 | }); 140 | }); 141 | 142 | // Test batch scrape functionality 143 | test('should handle batch scrape request', async () => { 144 | const urls = ['https://example.com']; 145 | const options = { formats: ['markdown'] }; 146 | 147 | mockClient.asyncBatchScrapeUrls.mockResolvedValueOnce({ 148 | success: true, 149 | id: 'test-batch-id', 150 | }); 151 | 152 | const response = await requestHandler({ 153 | method: 'call_tool', 154 | params: { 155 | name: 'firecrawl_batch_scrape', 156 | arguments: { urls, options }, 157 | }, 158 | }); 159 | 160 | expect(response.content[0].text).toContain( 161 | 'Batch operation queued with ID: batch_' 162 | ); 163 | expect(mockClient.asyncBatchScrapeUrls).toHaveBeenCalledWith(urls, options); 164 | }); 165 | 166 | // Test search functionality 167 | test('should handle search request', async () => { 168 | const query = 'test query'; 169 | const scrapeOptions = { formats: ['markdown'] }; 170 | 171 | const mockSearchResponse: SearchResponse = { 172 | success: true, 173 | data: [ 174 | { 175 | url: 'https://example.com', 176 | title: 'Test Page', 177 | description: 'Test Description', 178 | markdown: '# Test Content', 179 | actions: undefined as never, 180 | }, 181 | ], 182 | }; 183 | 184 | mockClient.search.mockResolvedValueOnce(mockSearchResponse); 185 | 186 | const response = await requestHandler({ 187 | method: 'call_tool', 188 | params: { 189 | name: 'firecrawl_search', 190 | arguments: { query, scrapeOptions }, 191 | }, 192 | }); 193 | 194 | expect(response.isError).toBe(false); 195 | expect(response.content[0].text).toContain('Test Page'); 196 | expect(mockClient.search).toHaveBeenCalledWith(query, scrapeOptions); 197 | }); 198 | 199 | // Test crawl functionality 200 | test('should handle crawl request', async () => { 201 | const url = 'https://example.com'; 202 | const options = { maxDepth: 2 }; 203 | 204 | mockClient.asyncCrawlUrl.mockResolvedValueOnce({ 205 | success: true, 206 | id: 'test-crawl-id', 207 | }); 208 | 209 | const response = await requestHandler({ 210 | method: 'call_tool', 211 | params: { 212 | name: 'firecrawl_crawl', 213 | arguments: { url, ...options }, 214 | }, 215 | }); 216 | 217 | expect(response.isError).toBe(false); 218 | expect(response.content[0].text).toContain('test-crawl-id'); 219 | expect(mockClient.asyncCrawlUrl).toHaveBeenCalledWith(url, { 220 | maxDepth: 2, 221 | url, 222 | }); 223 | }); 224 | 225 | // Test error handling 226 | test('should handle API errors', async () => { 227 | const url = 'https://example.com'; 228 | 229 | mockClient.scrapeUrl.mockRejectedValueOnce(new Error('API Error')); 230 | 231 | const response = await requestHandler({ 232 | method: 'call_tool', 233 | params: { 234 | name: 'firecrawl_scrape', 235 | arguments: { url }, 236 | }, 237 | }); 238 | 239 | expect(response.isError).toBe(true); 240 | expect(response.content[0].text).toContain('API Error'); 241 | }); 242 | 243 | // Test rate limiting 244 | test('should handle rate limits', async () => { 245 | const url = 'https://example.com'; 246 | 247 | // Mock rate limit error 248 | mockClient.scrapeUrl.mockRejectedValueOnce( 249 | new Error('rate limit exceeded') 250 | ); 251 | 252 | const response = await requestHandler({ 253 | method: 'call_tool', 254 | params: { 255 | name: 'firecrawl_scrape', 256 | arguments: { url }, 257 | }, 258 | }); 259 | 260 | expect(response.isError).toBe(true); 261 | expect(response.content[0].text).toContain('rate limit exceeded'); 262 | }); 263 | }); 264 | 265 | // Helper function to simulate request handling 266 | async function handleRequest( 267 | name: string, 268 | args: any, 269 | client: MockFirecrawlClient 270 | ) { 271 | try { 272 | switch (name) { 273 | case 'firecrawl_scrape': { 274 | const response = await client.scrapeUrl(args.url, args); 275 | if (!response.success) { 276 | throw new Error(response.error || 'Scraping failed'); 277 | } 278 | return { 279 | content: [ 280 | { type: 'text', text: response.markdown || 'No content available' }, 281 | ], 282 | isError: false, 283 | }; 284 | } 285 | 286 | case 'firecrawl_batch_scrape': { 287 | const response = await client.asyncBatchScrapeUrls( 288 | args.urls, 289 | args.options 290 | ); 291 | return { 292 | content: [ 293 | { 294 | type: 'text', 295 | text: `Batch operation queued with ID: batch_1. Use firecrawl_check_batch_status to check progress.`, 296 | }, 297 | ], 298 | isError: false, 299 | }; 300 | } 301 | 302 | case 'firecrawl_search': { 303 | const response = await client.search(args.query, args.scrapeOptions); 304 | if (!response.success) { 305 | throw new Error(response.error || 'Search failed'); 306 | } 307 | const results = response.data 308 | .map( 309 | (result) => 310 | `URL: ${result.url}\nTitle: ${ 311 | result.title || 'No title' 312 | }\nDescription: ${result.description || 'No description'}\n${ 313 | result.markdown ? `\nContent:\n${result.markdown}` : '' 314 | }` 315 | ) 316 | .join('\n\n'); 317 | return { 318 | content: [{ type: 'text', text: results }], 319 | isError: false, 320 | }; 321 | } 322 | 323 | case 'firecrawl_crawl': { 324 | const response = await client.asyncCrawlUrl(args.url, args); 325 | if (!response.success) { 326 | throw new Error(response.error); 327 | } 328 | return { 329 | content: [ 330 | { 331 | type: 'text', 332 | text: `Started crawl for ${args.url} with job ID: ${response.id}`, 333 | }, 334 | ], 335 | isError: false, 336 | }; 337 | } 338 | 339 | default: 340 | throw new Error(`Unknown tool: ${name}`); 341 | } 342 | } catch (error) { 343 | return { 344 | content: [ 345 | { 346 | type: 'text', 347 | text: error instanceof Error ? error.message : String(error), 348 | }, 349 | ], 350 | isError: true, 351 | }; 352 | } 353 | } 354 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Firecrawl MCP Server 2 | 3 | A Model Context Protocol (MCP) server implementation that integrates with [Firecrawl](https://github.com/mendableai/firecrawl) for web scraping capabilities. 4 | 5 | Big thanks to [@vrknetha](https://github.com/vrknetha), [@cawstudios](https://caw.tech) for the initial implementation! 6 | 7 | ## Features 8 | 9 | - Scrape, crawl, search, extract, deep research and batch scrape support 10 | - Web scraping with JS rendering 11 | - URL discovery and crawling 12 | - Web search with content extraction 13 | - Automatic retries with exponential backoff 14 | - - Efficient batch processing with built-in rate limiting 15 | - Credit usage monitoring for cloud API 16 | - Comprehensive logging system 17 | - Support for cloud and self-hosted FireCrawl instances 18 | - Mobile/Desktop viewport support 19 | - Smart content filtering with tag inclusion/exclusion 20 | 21 | ## Installation 22 | 23 | ### Running with npx 24 | 25 | ```bash 26 | env FIRECRAWL_API_KEY=fc-YOUR_API_KEY npx -y firecrawl-mcp 27 | ``` 28 | 29 | ### Manual Installation 30 | 31 | ```bash 32 | npm install -g firecrawl-mcp 33 | ``` 34 | 35 | ### Running on Cursor 36 | 37 | Configuring Cursor 🖥️ 38 | Note: Requires Cursor version 0.45.6+ 39 | 40 | To configure FireCrawl MCP in Cursor: 41 | 42 | 1. Open Cursor Settings 43 | 2. Go to Features > MCP Servers 44 | 3. Click "+ Add New MCP Server" 45 | 4. Enter the following: 46 | - Name: "firecrawl-mcp" (or your preferred name) 47 | - Type: "command" 48 | - Command: `env FIRECRAWL_API_KEY=your-api-key npx -y firecrawl-mcp` 49 | 50 | > If you are using Windows and are running into issues, try `cmd /c "set FIRECRAWL_API_KEY=your-api-key && npx -y firecrawl-mcp"` 51 | 52 | Replace `your-api-key` with your FireCrawl API key. 53 | 54 | After adding, refresh the MCP server list to see the new tools. The Composer Agent will automatically use FireCrawl MCP when appropriate, but you can explicitly request it by describing your web scraping needs. Access the Composer via Command+L (Mac), select "Agent" next to the submit button, and enter your query. 55 | 56 | ### Running on Windsurf 57 | 58 | Add this to your `./codeium/windsurf/model_config.json`: 59 | 60 | ```json 61 | { 62 | "mcpServers": { 63 | "mcp-server-firecrawl": { 64 | "command": "npx", 65 | "args": ["-y", "firecrawl-mcp"], 66 | "env": { 67 | "FIRECRAWL_API_KEY": "YOUR_API_KEY_HERE" 68 | } 69 | } 70 | } 71 | } 72 | ``` 73 | 74 | 75 | ### Installing via Smithery (Legacy) 76 | 77 | To install FireCrawl for Claude Desktop automatically via [Smithery](https://smithery.ai/server/@mendableai/mcp-server-firecrawl): 78 | 79 | ```bash 80 | npx -y @smithery/cli install @mendableai/mcp-server-firecrawl --client claude 81 | ``` 82 | 83 | ## Configuration 84 | 85 | ### Environment Variables 86 | 87 | #### Required for Cloud API 88 | 89 | - `FIRECRAWL_API_KEY`: Your FireCrawl API key 90 | - Required when using cloud API (default) 91 | - Optional when using self-hosted instance with `FIRECRAWL_API_URL` 92 | - `FIRECRAWL_API_URL` (Optional): Custom API endpoint for self-hosted instances 93 | - Example: `https://firecrawl.your-domain.com` 94 | - If not provided, the cloud API will be used (requires API key) 95 | 96 | #### Optional Configuration 97 | 98 | ##### Retry Configuration 99 | 100 | - `FIRECRAWL_RETRY_MAX_ATTEMPTS`: Maximum number of retry attempts (default: 3) 101 | - `FIRECRAWL_RETRY_INITIAL_DELAY`: Initial delay in milliseconds before first retry (default: 1000) 102 | - `FIRECRAWL_RETRY_MAX_DELAY`: Maximum delay in milliseconds between retries (default: 10000) 103 | - `FIRECRAWL_RETRY_BACKOFF_FACTOR`: Exponential backoff multiplier (default: 2) 104 | 105 | ##### Credit Usage Monitoring 106 | 107 | - `FIRECRAWL_CREDIT_WARNING_THRESHOLD`: Credit usage warning threshold (default: 1000) 108 | - `FIRECRAWL_CREDIT_CRITICAL_THRESHOLD`: Credit usage critical threshold (default: 100) 109 | 110 | ### Configuration Examples 111 | 112 | For cloud API usage with custom retry and credit monitoring: 113 | 114 | ```bash 115 | # Required for cloud API 116 | export FIRECRAWL_API_KEY=your-api-key 117 | 118 | # Optional retry configuration 119 | export FIRECRAWL_RETRY_MAX_ATTEMPTS=5 # Increase max retry attempts 120 | export FIRECRAWL_RETRY_INITIAL_DELAY=2000 # Start with 2s delay 121 | export FIRECRAWL_RETRY_MAX_DELAY=30000 # Maximum 30s delay 122 | export FIRECRAWL_RETRY_BACKOFF_FACTOR=3 # More aggressive backoff 123 | 124 | # Optional credit monitoring 125 | export FIRECRAWL_CREDIT_WARNING_THRESHOLD=2000 # Warning at 2000 credits 126 | export FIRECRAWL_CREDIT_CRITICAL_THRESHOLD=500 # Critical at 500 credits 127 | ``` 128 | 129 | For self-hosted instance: 130 | 131 | ```bash 132 | # Required for self-hosted 133 | export FIRECRAWL_API_URL=https://firecrawl.your-domain.com 134 | 135 | # Optional authentication for self-hosted 136 | export FIRECRAWL_API_KEY=your-api-key # If your instance requires auth 137 | 138 | # Custom retry configuration 139 | export FIRECRAWL_RETRY_MAX_ATTEMPTS=10 140 | export FIRECRAWL_RETRY_INITIAL_DELAY=500 # Start with faster retries 141 | ``` 142 | 143 | ### Usage with Claude Desktop 144 | 145 | Add this to your `claude_desktop_config.json`: 146 | 147 | ```json 148 | { 149 | "mcpServers": { 150 | "mcp-server-firecrawl": { 151 | "command": "npx", 152 | "args": ["-y", "firecrawl-mcp"], 153 | "env": { 154 | "FIRECRAWL_API_KEY": "YOUR_API_KEY_HERE", 155 | 156 | "FIRECRAWL_RETRY_MAX_ATTEMPTS": "5", 157 | "FIRECRAWL_RETRY_INITIAL_DELAY": "2000", 158 | "FIRECRAWL_RETRY_MAX_DELAY": "30000", 159 | "FIRECRAWL_RETRY_BACKOFF_FACTOR": "3", 160 | 161 | "FIRECRAWL_CREDIT_WARNING_THRESHOLD": "2000", 162 | "FIRECRAWL_CREDIT_CRITICAL_THRESHOLD": "500" 163 | } 164 | } 165 | } 166 | } 167 | ``` 168 | 169 | ### System Configuration 170 | 171 | The server includes several configurable parameters that can be set via environment variables. Here are the default values if not configured: 172 | 173 | ```typescript 174 | const CONFIG = { 175 | retry: { 176 | maxAttempts: 3, // Number of retry attempts for rate-limited requests 177 | initialDelay: 1000, // Initial delay before first retry (in milliseconds) 178 | maxDelay: 10000, // Maximum delay between retries (in milliseconds) 179 | backoffFactor: 2, // Multiplier for exponential backoff 180 | }, 181 | credit: { 182 | warningThreshold: 1000, // Warn when credit usage reaches this level 183 | criticalThreshold: 100, // Critical alert when credit usage reaches this level 184 | }, 185 | }; 186 | ``` 187 | 188 | These configurations control: 189 | 190 | 1. **Retry Behavior** 191 | 192 | - Automatically retries failed requests due to rate limits 193 | - Uses exponential backoff to avoid overwhelming the API 194 | - Example: With default settings, retries will be attempted at: 195 | - 1st retry: 1 second delay 196 | - 2nd retry: 2 seconds delay 197 | - 3rd retry: 4 seconds delay (capped at maxDelay) 198 | 199 | 2. **Credit Usage Monitoring** 200 | - Tracks API credit consumption for cloud API usage 201 | - Provides warnings at specified thresholds 202 | - Helps prevent unexpected service interruption 203 | - Example: With default settings: 204 | - Warning at 1000 credits remaining 205 | - Critical alert at 100 credits remaining 206 | 207 | ### Rate Limiting and Batch Processing 208 | 209 | The server utilizes FireCrawl's built-in rate limiting and batch processing capabilities: 210 | 211 | - Automatic rate limit handling with exponential backoff 212 | - Efficient parallel processing for batch operations 213 | - Smart request queuing and throttling 214 | - Automatic retries for transient errors 215 | 216 | ## Available Tools 217 | 218 | ### 1. Scrape Tool (`firecrawl_scrape`) 219 | 220 | Scrape content from a single URL with advanced options. 221 | 222 | ```json 223 | { 224 | "name": "firecrawl_scrape", 225 | "arguments": { 226 | "url": "https://example.com", 227 | "formats": ["markdown"], 228 | "onlyMainContent": true, 229 | "waitFor": 1000, 230 | "timeout": 30000, 231 | "mobile": false, 232 | "includeTags": ["article", "main"], 233 | "excludeTags": ["nav", "footer"], 234 | "skipTlsVerification": false 235 | } 236 | } 237 | ``` 238 | 239 | ### 2. Batch Scrape Tool (`firecrawl_batch_scrape`) 240 | 241 | Scrape multiple URLs efficiently with built-in rate limiting and parallel processing. 242 | 243 | ```json 244 | { 245 | "name": "firecrawl_batch_scrape", 246 | "arguments": { 247 | "urls": ["https://example1.com", "https://example2.com"], 248 | "options": { 249 | "formats": ["markdown"], 250 | "onlyMainContent": true 251 | } 252 | } 253 | } 254 | ``` 255 | 256 | Response includes operation ID for status checking: 257 | 258 | ```json 259 | { 260 | "content": [ 261 | { 262 | "type": "text", 263 | "text": "Batch operation queued with ID: batch_1. Use firecrawl_check_batch_status to check progress." 264 | } 265 | ], 266 | "isError": false 267 | } 268 | ``` 269 | 270 | ### 3. Check Batch Status (`firecrawl_check_batch_status`) 271 | 272 | Check the status of a batch operation. 273 | 274 | ```json 275 | { 276 | "name": "firecrawl_check_batch_status", 277 | "arguments": { 278 | "id": "batch_1" 279 | } 280 | } 281 | ``` 282 | 283 | ### 4. Search Tool (`firecrawl_search`) 284 | 285 | Search the web and optionally extract content from search results. 286 | 287 | ```json 288 | { 289 | "name": "firecrawl_search", 290 | "arguments": { 291 | "query": "your search query", 292 | "limit": 5, 293 | "lang": "en", 294 | "country": "us", 295 | "scrapeOptions": { 296 | "formats": ["markdown"], 297 | "onlyMainContent": true 298 | } 299 | } 300 | } 301 | ``` 302 | 303 | ### 5. Crawl Tool (`firecrawl_crawl`) 304 | 305 | Start an asynchronous crawl with advanced options. 306 | 307 | ```json 308 | { 309 | "name": "firecrawl_crawl", 310 | "arguments": { 311 | "url": "https://example.com", 312 | "maxDepth": 2, 313 | "limit": 100, 314 | "allowExternalLinks": false, 315 | "deduplicateSimilarURLs": true 316 | } 317 | } 318 | ``` 319 | 320 | ### 6. Extract Tool (`firecrawl_extract`) 321 | 322 | Extract structured information from web pages using LLM capabilities. Supports both cloud AI and self-hosted LLM extraction. 323 | 324 | ```json 325 | { 326 | "name": "firecrawl_extract", 327 | "arguments": { 328 | "urls": ["https://example.com/page1", "https://example.com/page2"], 329 | "prompt": "Extract product information including name, price, and description", 330 | "systemPrompt": "You are a helpful assistant that extracts product information", 331 | "schema": { 332 | "type": "object", 333 | "properties": { 334 | "name": { "type": "string" }, 335 | "price": { "type": "number" }, 336 | "description": { "type": "string" } 337 | }, 338 | "required": ["name", "price"] 339 | }, 340 | "allowExternalLinks": false, 341 | "enableWebSearch": false, 342 | "includeSubdomains": false 343 | } 344 | } 345 | ``` 346 | 347 | Example response: 348 | 349 | ```json 350 | { 351 | "content": [ 352 | { 353 | "type": "text", 354 | "text": { 355 | "name": "Example Product", 356 | "price": 99.99, 357 | "description": "This is an example product description" 358 | } 359 | } 360 | ], 361 | "isError": false 362 | } 363 | ``` 364 | 365 | #### Extract Tool Options: 366 | 367 | - `urls`: Array of URLs to extract information from 368 | - `prompt`: Custom prompt for the LLM extraction 369 | - `systemPrompt`: System prompt to guide the LLM 370 | - `schema`: JSON schema for structured data extraction 371 | - `allowExternalLinks`: Allow extraction from external links 372 | - `enableWebSearch`: Enable web search for additional context 373 | - `includeSubdomains`: Include subdomains in extraction 374 | 375 | When using a self-hosted instance, the extraction will use your configured LLM. For cloud API, it uses FireCrawl's managed LLM service. 376 | 377 | ## Logging System 378 | 379 | The server includes comprehensive logging: 380 | 381 | - Operation status and progress 382 | - Performance metrics 383 | - Credit usage monitoring 384 | - Rate limit tracking 385 | - Error conditions 386 | 387 | Example log messages: 388 | 389 | ``` 390 | [INFO] FireCrawl MCP Server initialized successfully 391 | [INFO] Starting scrape for URL: https://example.com 392 | [INFO] Batch operation queued with ID: batch_1 393 | [WARNING] Credit usage has reached warning threshold 394 | [ERROR] Rate limit exceeded, retrying in 2s... 395 | ``` 396 | 397 | ## Error Handling 398 | 399 | The server provides robust error handling: 400 | 401 | - Automatic retries for transient errors 402 | - Rate limit handling with backoff 403 | - Detailed error messages 404 | - Credit usage warnings 405 | - Network resilience 406 | 407 | Example error response: 408 | 409 | ```json 410 | { 411 | "content": [ 412 | { 413 | "type": "text", 414 | "text": "Error: Rate limit exceeded. Retrying in 2 seconds..." 415 | } 416 | ], 417 | "isError": true 418 | } 419 | ``` 420 | 421 | ## Development 422 | 423 | ```bash 424 | # Install dependencies 425 | npm install 426 | 427 | # Build 428 | npm run build 429 | 430 | # Run tests 431 | npm test 432 | ``` 433 | 434 | ### Contributing 435 | 436 | 1. Fork the repository 437 | 2. Create your feature branch 438 | 3. Run tests: `npm test` 439 | 4. Submit a pull request 440 | 441 | ## License 442 | 443 | MIT License - see LICENSE file for details 444 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | import { Server } from '@modelcontextprotocol/sdk/server/index.js'; 4 | import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; 5 | import { 6 | Tool, 7 | CallToolRequestSchema, 8 | ListToolsRequestSchema, 9 | } from '@modelcontextprotocol/sdk/types.js'; 10 | import FirecrawlApp, { 11 | type ScrapeParams, 12 | type MapParams, 13 | type CrawlParams, 14 | type FirecrawlDocument, 15 | } from '@mendable/firecrawl-js'; 16 | import PQueue from 'p-queue'; 17 | 18 | import dotenv from 'dotenv'; 19 | 20 | dotenv.config(); 21 | 22 | // Tool definitions 23 | const SCRAPE_TOOL: Tool = { 24 | name: 'firecrawl_scrape', 25 | description: 26 | 'Scrape a single webpage with advanced options for content extraction. ' + 27 | 'Supports various formats including markdown, HTML, and screenshots. ' + 28 | 'Can execute custom actions like clicking or scrolling before scraping.', 29 | inputSchema: { 30 | type: 'object', 31 | properties: { 32 | url: { 33 | type: 'string', 34 | description: 'The URL to scrape', 35 | }, 36 | formats: { 37 | type: 'array', 38 | items: { 39 | type: 'string', 40 | enum: [ 41 | 'markdown', 42 | 'html', 43 | 'rawHtml', 44 | 'screenshot', 45 | 'links', 46 | 'screenshot@fullPage', 47 | 'extract', 48 | ], 49 | }, 50 | description: "Content formats to extract (default: ['markdown'])", 51 | }, 52 | onlyMainContent: { 53 | type: 'boolean', 54 | description: 55 | 'Extract only the main content, filtering out navigation, footers, etc.', 56 | }, 57 | includeTags: { 58 | type: 'array', 59 | items: { type: 'string' }, 60 | description: 'HTML tags to specifically include in extraction', 61 | }, 62 | excludeTags: { 63 | type: 'array', 64 | items: { type: 'string' }, 65 | description: 'HTML tags to exclude from extraction', 66 | }, 67 | waitFor: { 68 | type: 'number', 69 | description: 'Time in milliseconds to wait for dynamic content to load', 70 | }, 71 | timeout: { 72 | type: 'number', 73 | description: 74 | 'Maximum time in milliseconds to wait for the page to load', 75 | }, 76 | actions: { 77 | type: 'array', 78 | items: { 79 | type: 'object', 80 | properties: { 81 | type: { 82 | type: 'string', 83 | enum: [ 84 | 'wait', 85 | 'click', 86 | 'screenshot', 87 | 'write', 88 | 'press', 89 | 'scroll', 90 | 'scrape', 91 | 'executeJavascript', 92 | ], 93 | description: 'Type of action to perform', 94 | }, 95 | selector: { 96 | type: 'string', 97 | description: 'CSS selector for the target element', 98 | }, 99 | milliseconds: { 100 | type: 'number', 101 | description: 'Time to wait in milliseconds (for wait action)', 102 | }, 103 | text: { 104 | type: 'string', 105 | description: 'Text to write (for write action)', 106 | }, 107 | key: { 108 | type: 'string', 109 | description: 'Key to press (for press action)', 110 | }, 111 | direction: { 112 | type: 'string', 113 | enum: ['up', 'down'], 114 | description: 'Scroll direction', 115 | }, 116 | script: { 117 | type: 'string', 118 | description: 'JavaScript code to execute', 119 | }, 120 | fullPage: { 121 | type: 'boolean', 122 | description: 'Take full page screenshot', 123 | }, 124 | }, 125 | required: ['type'], 126 | }, 127 | description: 'List of actions to perform before scraping', 128 | }, 129 | extract: { 130 | type: 'object', 131 | properties: { 132 | schema: { 133 | type: 'object', 134 | description: 'Schema for structured data extraction', 135 | }, 136 | systemPrompt: { 137 | type: 'string', 138 | description: 'System prompt for LLM extraction', 139 | }, 140 | prompt: { 141 | type: 'string', 142 | description: 'User prompt for LLM extraction', 143 | }, 144 | }, 145 | description: 'Configuration for structured data extraction', 146 | }, 147 | mobile: { 148 | type: 'boolean', 149 | description: 'Use mobile viewport', 150 | }, 151 | skipTlsVerification: { 152 | type: 'boolean', 153 | description: 'Skip TLS certificate verification', 154 | }, 155 | removeBase64Images: { 156 | type: 'boolean', 157 | description: 'Remove base64 encoded images from output', 158 | }, 159 | location: { 160 | type: 'object', 161 | properties: { 162 | country: { 163 | type: 'string', 164 | description: 'Country code for geolocation', 165 | }, 166 | languages: { 167 | type: 'array', 168 | items: { type: 'string' }, 169 | description: 'Language codes for content', 170 | }, 171 | }, 172 | description: 'Location settings for scraping', 173 | }, 174 | }, 175 | required: ['url'], 176 | }, 177 | }; 178 | 179 | const MAP_TOOL: Tool = { 180 | name: 'firecrawl_map', 181 | description: 182 | 'Discover URLs from a starting point. Can use both sitemap.xml and HTML link discovery.', 183 | inputSchema: { 184 | type: 'object', 185 | properties: { 186 | url: { 187 | type: 'string', 188 | description: 'Starting URL for URL discovery', 189 | }, 190 | search: { 191 | type: 'string', 192 | description: 'Optional search term to filter URLs', 193 | }, 194 | ignoreSitemap: { 195 | type: 'boolean', 196 | description: 'Skip sitemap.xml discovery and only use HTML links', 197 | }, 198 | sitemapOnly: { 199 | type: 'boolean', 200 | description: 'Only use sitemap.xml for discovery, ignore HTML links', 201 | }, 202 | includeSubdomains: { 203 | type: 'boolean', 204 | description: 'Include URLs from subdomains in results', 205 | }, 206 | limit: { 207 | type: 'number', 208 | description: 'Maximum number of URLs to return', 209 | }, 210 | }, 211 | required: ['url'], 212 | }, 213 | }; 214 | 215 | const CRAWL_TOOL: Tool = { 216 | name: 'firecrawl_crawl', 217 | description: 218 | 'Start an asynchronous crawl of multiple pages from a starting URL. ' + 219 | 'Supports depth control, path filtering, and webhook notifications.', 220 | inputSchema: { 221 | type: 'object', 222 | properties: { 223 | url: { 224 | type: 'string', 225 | description: 'Starting URL for the crawl', 226 | }, 227 | excludePaths: { 228 | type: 'array', 229 | items: { type: 'string' }, 230 | description: 'URL paths to exclude from crawling', 231 | }, 232 | includePaths: { 233 | type: 'array', 234 | items: { type: 'string' }, 235 | description: 'Only crawl these URL paths', 236 | }, 237 | maxDepth: { 238 | type: 'number', 239 | description: 'Maximum link depth to crawl', 240 | }, 241 | ignoreSitemap: { 242 | type: 'boolean', 243 | description: 'Skip sitemap.xml discovery', 244 | }, 245 | limit: { 246 | type: 'number', 247 | description: 'Maximum number of pages to crawl', 248 | }, 249 | allowBackwardLinks: { 250 | type: 'boolean', 251 | description: 'Allow crawling links that point to parent directories', 252 | }, 253 | allowExternalLinks: { 254 | type: 'boolean', 255 | description: 'Allow crawling links to external domains', 256 | }, 257 | webhook: { 258 | oneOf: [ 259 | { 260 | type: 'string', 261 | description: 'Webhook URL to notify when crawl is complete', 262 | }, 263 | { 264 | type: 'object', 265 | properties: { 266 | url: { 267 | type: 'string', 268 | description: 'Webhook URL', 269 | }, 270 | headers: { 271 | type: 'object', 272 | description: 'Custom headers for webhook requests', 273 | }, 274 | }, 275 | required: ['url'], 276 | }, 277 | ], 278 | }, 279 | deduplicateSimilarURLs: { 280 | type: 'boolean', 281 | description: 'Remove similar URLs during crawl', 282 | }, 283 | ignoreQueryParameters: { 284 | type: 'boolean', 285 | description: 'Ignore query parameters when comparing URLs', 286 | }, 287 | scrapeOptions: { 288 | type: 'object', 289 | properties: { 290 | formats: { 291 | type: 'array', 292 | items: { 293 | type: 'string', 294 | enum: [ 295 | 'markdown', 296 | 'html', 297 | 'rawHtml', 298 | 'screenshot', 299 | 'links', 300 | 'screenshot@fullPage', 301 | 'extract', 302 | ], 303 | }, 304 | }, 305 | onlyMainContent: { 306 | type: 'boolean', 307 | }, 308 | includeTags: { 309 | type: 'array', 310 | items: { type: 'string' }, 311 | }, 312 | excludeTags: { 313 | type: 'array', 314 | items: { type: 'string' }, 315 | }, 316 | waitFor: { 317 | type: 'number', 318 | }, 319 | }, 320 | description: 'Options for scraping each page', 321 | }, 322 | }, 323 | required: ['url'], 324 | }, 325 | }; 326 | 327 | const BATCH_SCRAPE_TOOL: Tool = { 328 | name: 'firecrawl_batch_scrape', 329 | description: 330 | 'Scrape multiple URLs in batch mode. Returns a job ID that can be used to check status.', 331 | inputSchema: { 332 | type: 'object', 333 | properties: { 334 | urls: { 335 | type: 'array', 336 | items: { type: 'string' }, 337 | description: 'List of URLs to scrape', 338 | }, 339 | options: { 340 | type: 'object', 341 | properties: { 342 | formats: { 343 | type: 'array', 344 | items: { 345 | type: 'string', 346 | enum: [ 347 | 'markdown', 348 | 'html', 349 | 'rawHtml', 350 | 'screenshot', 351 | 'links', 352 | 'screenshot@fullPage', 353 | 'extract', 354 | ], 355 | }, 356 | }, 357 | onlyMainContent: { 358 | type: 'boolean', 359 | }, 360 | includeTags: { 361 | type: 'array', 362 | items: { type: 'string' }, 363 | }, 364 | excludeTags: { 365 | type: 'array', 366 | items: { type: 'string' }, 367 | }, 368 | waitFor: { 369 | type: 'number', 370 | }, 371 | }, 372 | }, 373 | }, 374 | required: ['urls'], 375 | }, 376 | }; 377 | 378 | const CHECK_BATCH_STATUS_TOOL: Tool = { 379 | name: 'firecrawl_check_batch_status', 380 | description: 'Check the status of a batch scraping job.', 381 | inputSchema: { 382 | type: 'object', 383 | properties: { 384 | id: { 385 | type: 'string', 386 | description: 'Batch job ID to check', 387 | }, 388 | }, 389 | required: ['id'], 390 | }, 391 | }; 392 | 393 | const CHECK_CRAWL_STATUS_TOOL: Tool = { 394 | name: 'firecrawl_check_crawl_status', 395 | description: 'Check the status of a crawl job.', 396 | inputSchema: { 397 | type: 'object', 398 | properties: { 399 | id: { 400 | type: 'string', 401 | description: 'Crawl job ID to check', 402 | }, 403 | }, 404 | required: ['id'], 405 | }, 406 | }; 407 | 408 | const SEARCH_TOOL: Tool = { 409 | name: 'firecrawl_search', 410 | description: 411 | 'Search and retrieve content from web pages with optional scraping. ' + 412 | 'Returns SERP results by default (url, title, description) or full page content when scrapeOptions are provided.', 413 | inputSchema: { 414 | type: 'object', 415 | properties: { 416 | query: { 417 | type: 'string', 418 | description: 'Search query string', 419 | }, 420 | limit: { 421 | type: 'number', 422 | description: 'Maximum number of results to return (default: 5)', 423 | }, 424 | lang: { 425 | type: 'string', 426 | description: 'Language code for search results (default: en)', 427 | }, 428 | country: { 429 | type: 'string', 430 | description: 'Country code for search results (default: us)', 431 | }, 432 | tbs: { 433 | type: 'string', 434 | description: 'Time-based search filter', 435 | }, 436 | filter: { 437 | type: 'string', 438 | description: 'Search filter', 439 | }, 440 | location: { 441 | type: 'object', 442 | properties: { 443 | country: { 444 | type: 'string', 445 | description: 'Country code for geolocation', 446 | }, 447 | languages: { 448 | type: 'array', 449 | items: { type: 'string' }, 450 | description: 'Language codes for content', 451 | }, 452 | }, 453 | description: 'Location settings for search', 454 | }, 455 | scrapeOptions: { 456 | type: 'object', 457 | properties: { 458 | formats: { 459 | type: 'array', 460 | items: { 461 | type: 'string', 462 | enum: ['markdown', 'html', 'rawHtml'], 463 | }, 464 | description: 'Content formats to extract from search results', 465 | }, 466 | onlyMainContent: { 467 | type: 'boolean', 468 | description: 'Extract only the main content from results', 469 | }, 470 | waitFor: { 471 | type: 'number', 472 | description: 'Time in milliseconds to wait for dynamic content', 473 | }, 474 | }, 475 | description: 'Options for scraping search results', 476 | }, 477 | }, 478 | required: ['query'], 479 | }, 480 | }; 481 | 482 | const EXTRACT_TOOL: Tool = { 483 | name: 'firecrawl_extract', 484 | description: 485 | 'Extract structured information from web pages using LLM. ' + 486 | 'Supports both cloud AI and self-hosted LLM extraction.', 487 | inputSchema: { 488 | type: 'object', 489 | properties: { 490 | urls: { 491 | type: 'array', 492 | items: { type: 'string' }, 493 | description: 'List of URLs to extract information from', 494 | }, 495 | prompt: { 496 | type: 'string', 497 | description: 'Prompt for the LLM extraction', 498 | }, 499 | systemPrompt: { 500 | type: 'string', 501 | description: 'System prompt for LLM extraction', 502 | }, 503 | schema: { 504 | type: 'object', 505 | description: 'JSON schema for structured data extraction', 506 | }, 507 | allowExternalLinks: { 508 | type: 'boolean', 509 | description: 'Allow extraction from external links', 510 | }, 511 | enableWebSearch: { 512 | type: 'boolean', 513 | description: 'Enable web search for additional context', 514 | }, 515 | includeSubdomains: { 516 | type: 'boolean', 517 | description: 'Include subdomains in extraction', 518 | }, 519 | }, 520 | required: ['urls'], 521 | }, 522 | }; 523 | 524 | const DEEP_RESEARCH_TOOL: Tool = { 525 | name: 'firecrawl_deep_research', 526 | description: 'Conduct deep research on a query using web crawling, search, and AI analysis.', 527 | inputSchema: { 528 | type: 'object', 529 | properties: { 530 | query: { 531 | type: 'string', 532 | description: 'The query to research', 533 | }, 534 | maxDepth: { 535 | type: 'number', 536 | description: 'Maximum depth of research iterations (1-10)', 537 | }, 538 | timeLimit: { 539 | type: 'number', 540 | description: 'Time limit in seconds (30-300)', 541 | }, 542 | maxUrls: { 543 | type: 'number', 544 | description: 'Maximum number of URLs to analyze (1-1000)', 545 | } 546 | }, 547 | required: ['query'], 548 | }, 549 | }; 550 | 551 | // Type definitions 552 | interface BatchScrapeOptions { 553 | urls: string[]; 554 | options?: Omit; 555 | } 556 | 557 | interface StatusCheckOptions { 558 | id: string; 559 | } 560 | 561 | interface SearchOptions { 562 | query: string; 563 | limit?: number; 564 | lang?: string; 565 | country?: string; 566 | tbs?: string; 567 | filter?: string; 568 | location?: { 569 | country?: string; 570 | languages?: string[]; 571 | }; 572 | scrapeOptions?: { 573 | formats?: string[]; 574 | onlyMainContent?: boolean; 575 | waitFor?: number; 576 | }; 577 | } 578 | 579 | // Add after other interfaces 580 | interface ExtractParams { 581 | prompt?: string; 582 | systemPrompt?: string; 583 | schema?: T | object; 584 | allowExternalLinks?: boolean; 585 | enableWebSearch?: boolean; 586 | includeSubdomains?: boolean; 587 | origin?: string; 588 | } 589 | 590 | interface ExtractArgs { 591 | urls: string[]; 592 | prompt?: string; 593 | systemPrompt?: string; 594 | schema?: object; 595 | allowExternalLinks?: boolean; 596 | enableWebSearch?: boolean; 597 | includeSubdomains?: boolean; 598 | origin?: string; 599 | } 600 | 601 | interface ExtractResponse { 602 | success: boolean; 603 | data: T; 604 | error?: string; 605 | warning?: string; 606 | creditsUsed?: number; 607 | } 608 | 609 | // Type guards 610 | function isScrapeOptions( 611 | args: unknown 612 | ): args is ScrapeParams & { url: string } { 613 | return ( 614 | typeof args === 'object' && 615 | args !== null && 616 | 'url' in args && 617 | typeof (args as { url: unknown }).url === 'string' 618 | ); 619 | } 620 | 621 | function isMapOptions(args: unknown): args is MapParams & { url: string } { 622 | return ( 623 | typeof args === 'object' && 624 | args !== null && 625 | 'url' in args && 626 | typeof (args as { url: unknown }).url === 'string' 627 | ); 628 | } 629 | 630 | function isCrawlOptions(args: unknown): args is CrawlParams & { url: string } { 631 | return ( 632 | typeof args === 'object' && 633 | args !== null && 634 | 'url' in args && 635 | typeof (args as { url: unknown }).url === 'string' 636 | ); 637 | } 638 | 639 | function isBatchScrapeOptions(args: unknown): args is BatchScrapeOptions { 640 | return ( 641 | typeof args === 'object' && 642 | args !== null && 643 | 'urls' in args && 644 | Array.isArray((args as { urls: unknown }).urls) && 645 | (args as { urls: unknown[] }).urls.every((url) => typeof url === 'string') 646 | ); 647 | } 648 | 649 | function isStatusCheckOptions(args: unknown): args is StatusCheckOptions { 650 | return ( 651 | typeof args === 'object' && 652 | args !== null && 653 | 'id' in args && 654 | typeof (args as { id: unknown }).id === 'string' 655 | ); 656 | } 657 | 658 | function isSearchOptions(args: unknown): args is SearchOptions { 659 | return ( 660 | typeof args === 'object' && 661 | args !== null && 662 | 'query' in args && 663 | typeof (args as { query: unknown }).query === 'string' 664 | ); 665 | } 666 | 667 | function isExtractOptions(args: unknown): args is ExtractArgs { 668 | if (typeof args !== 'object' || args === null) return false; 669 | const { urls } = args as { urls?: unknown }; 670 | return ( 671 | Array.isArray(urls) && 672 | urls.every((url): url is string => typeof url === 'string') 673 | ); 674 | } 675 | 676 | // Server implementation 677 | const server = new Server( 678 | { 679 | name: 'firecrawl-mcp', 680 | version: '1.3.2', 681 | }, 682 | { 683 | capabilities: { 684 | tools: {}, 685 | logging: {}, 686 | }, 687 | } 688 | ); 689 | 690 | // Get optional API URL 691 | const FIRECRAWL_API_URL = process.env.FIRECRAWL_API_URL; 692 | const FIRECRAWL_API_KEY = process.env.FIRECRAWL_API_KEY; 693 | 694 | // Check if API key is required (only for cloud service) 695 | if (!FIRECRAWL_API_URL && !FIRECRAWL_API_KEY) { 696 | console.error( 697 | 'Error: FIRECRAWL_API_KEY environment variable is required when using the cloud service' 698 | ); 699 | process.exit(1); 700 | } 701 | 702 | // Initialize FireCrawl client with optional API URL 703 | const client = new FirecrawlApp({ 704 | apiKey: FIRECRAWL_API_KEY || '', 705 | ...(FIRECRAWL_API_URL ? { apiUrl: FIRECRAWL_API_URL } : {}), 706 | }); 707 | 708 | // Configuration for retries and monitoring 709 | const CONFIG = { 710 | retry: { 711 | maxAttempts: Number(process.env.FIRECRAWL_RETRY_MAX_ATTEMPTS) || 3, 712 | initialDelay: Number(process.env.FIRECRAWL_RETRY_INITIAL_DELAY) || 1000, 713 | maxDelay: Number(process.env.FIRECRAWL_RETRY_MAX_DELAY) || 10000, 714 | backoffFactor: Number(process.env.FIRECRAWL_RETRY_BACKOFF_FACTOR) || 2, 715 | }, 716 | credit: { 717 | warningThreshold: 718 | Number(process.env.FIRECRAWL_CREDIT_WARNING_THRESHOLD) || 1000, 719 | criticalThreshold: 720 | Number(process.env.FIRECRAWL_CREDIT_CRITICAL_THRESHOLD) || 100, 721 | }, 722 | }; 723 | 724 | // Add credit tracking 725 | interface CreditUsage { 726 | total: number; 727 | lastCheck: number; 728 | } 729 | 730 | const creditUsage: CreditUsage = { 731 | total: 0, 732 | lastCheck: Date.now(), 733 | }; 734 | 735 | // Add utility function for delay 736 | function delay(ms: number): Promise { 737 | return new Promise((resolve) => setTimeout(resolve, ms)); 738 | } 739 | 740 | // Add retry logic with exponential backoff 741 | async function withRetry( 742 | operation: () => Promise, 743 | context: string, 744 | attempt = 1 745 | ): Promise { 746 | try { 747 | return await operation(); 748 | } catch (error) { 749 | const isRateLimit = 750 | error instanceof Error && 751 | (error.message.includes('rate limit') || error.message.includes('429')); 752 | 753 | if (isRateLimit && attempt < CONFIG.retry.maxAttempts) { 754 | const delayMs = Math.min( 755 | CONFIG.retry.initialDelay * 756 | Math.pow(CONFIG.retry.backoffFactor, attempt - 1), 757 | CONFIG.retry.maxDelay 758 | ); 759 | 760 | server.sendLoggingMessage({ 761 | level: 'warning', 762 | data: `Rate limit hit for ${context}. Attempt ${attempt}/${CONFIG.retry.maxAttempts}. Retrying in ${delayMs}ms`, 763 | }); 764 | 765 | await delay(delayMs); 766 | return withRetry(operation, context, attempt + 1); 767 | } 768 | 769 | throw error; 770 | } 771 | } 772 | 773 | // Add credit monitoring 774 | async function updateCreditUsage(creditsUsed: number): Promise { 775 | creditUsage.total += creditsUsed; 776 | 777 | // Log credit usage 778 | server.sendLoggingMessage({ 779 | level: 'info', 780 | data: `Credit usage: ${creditUsage.total} credits used total`, 781 | }); 782 | 783 | // Check thresholds 784 | if (creditUsage.total >= CONFIG.credit.criticalThreshold) { 785 | server.sendLoggingMessage({ 786 | level: 'error', 787 | data: `CRITICAL: Credit usage has reached ${creditUsage.total}`, 788 | }); 789 | } else if (creditUsage.total >= CONFIG.credit.warningThreshold) { 790 | server.sendLoggingMessage({ 791 | level: 'warning', 792 | data: `WARNING: Credit usage has reached ${creditUsage.total}`, 793 | }); 794 | } 795 | } 796 | 797 | // Add before server implementation 798 | interface QueuedBatchOperation { 799 | id: string; 800 | urls: string[]; 801 | options?: any; 802 | status: 'pending' | 'processing' | 'completed' | 'failed'; 803 | progress: { 804 | completed: number; 805 | total: number; 806 | }; 807 | result?: any; 808 | error?: string; 809 | } 810 | 811 | // Initialize queue system 812 | const batchQueue = new PQueue({ concurrency: 1 }); 813 | const batchOperations = new Map(); 814 | let operationCounter = 0; 815 | 816 | async function processBatchOperation( 817 | operation: QueuedBatchOperation 818 | ): Promise { 819 | try { 820 | operation.status = 'processing'; 821 | let totalCreditsUsed = 0; 822 | 823 | // Use library's built-in batch processing 824 | const response = await withRetry( 825 | async () => 826 | client.asyncBatchScrapeUrls(operation.urls, operation.options), 827 | `batch ${operation.id} processing` 828 | ); 829 | 830 | if (!response.success) { 831 | throw new Error(response.error || 'Batch operation failed'); 832 | } 833 | 834 | // Track credits if using cloud API 835 | if (!FIRECRAWL_API_URL && hasCredits(response)) { 836 | totalCreditsUsed += response.creditsUsed; 837 | await updateCreditUsage(response.creditsUsed); 838 | } 839 | 840 | operation.status = 'completed'; 841 | operation.result = response; 842 | 843 | // Log final credit usage for the batch 844 | if (!FIRECRAWL_API_URL) { 845 | server.sendLoggingMessage({ 846 | level: 'info', 847 | data: `Batch ${operation.id} completed. Total credits used: ${totalCreditsUsed}`, 848 | }); 849 | } 850 | } catch (error) { 851 | operation.status = 'failed'; 852 | operation.error = error instanceof Error ? error.message : String(error); 853 | 854 | server.sendLoggingMessage({ 855 | level: 'error', 856 | data: `Batch ${operation.id} failed: ${operation.error}`, 857 | }); 858 | } 859 | } 860 | 861 | // Tool handlers 862 | server.setRequestHandler(ListToolsRequestSchema, async () => ({ 863 | tools: [ 864 | SCRAPE_TOOL, 865 | MAP_TOOL, 866 | CRAWL_TOOL, 867 | BATCH_SCRAPE_TOOL, 868 | CHECK_BATCH_STATUS_TOOL, 869 | CHECK_CRAWL_STATUS_TOOL, 870 | SEARCH_TOOL, 871 | EXTRACT_TOOL, 872 | DEEP_RESEARCH_TOOL, 873 | ], 874 | })); 875 | 876 | server.setRequestHandler(CallToolRequestSchema, async (request) => { 877 | const startTime = Date.now(); 878 | try { 879 | const { name, arguments: args } = request.params; 880 | 881 | // Log incoming request with timestamp 882 | server.sendLoggingMessage({ 883 | level: 'info', 884 | data: `[${new Date().toISOString()}] Received request for tool: ${name}`, 885 | }); 886 | 887 | if (!args) { 888 | throw new Error('No arguments provided'); 889 | } 890 | 891 | switch (name) { 892 | case 'firecrawl_scrape': { 893 | if (!isScrapeOptions(args)) { 894 | throw new Error('Invalid arguments for firecrawl_scrape'); 895 | } 896 | const { url, ...options } = args; 897 | try { 898 | const scrapeStartTime = Date.now(); 899 | server.sendLoggingMessage({ 900 | level: 'info', 901 | data: `Starting scrape for URL: ${url} with options: ${JSON.stringify( 902 | options 903 | )}`, 904 | }); 905 | 906 | const response = await client.scrapeUrl(url, options); 907 | 908 | // Log performance metrics 909 | server.sendLoggingMessage({ 910 | level: 'info', 911 | data: `Scrape completed in ${Date.now() - scrapeStartTime}ms`, 912 | }); 913 | 914 | if ('success' in response && !response.success) { 915 | throw new Error(response.error || 'Scraping failed'); 916 | } 917 | 918 | 919 | // Format content based on requested formats 920 | const contentParts = []; 921 | 922 | if (options.formats?.includes('markdown') && response.markdown) { 923 | contentParts.push(response.markdown); 924 | } 925 | if (options.formats?.includes('html') && response.html) { 926 | contentParts.push(response.html); 927 | } 928 | if (options.formats?.includes('rawHtml') && response.rawHtml) { 929 | contentParts.push(response.rawHtml); 930 | } 931 | if (options.formats?.includes('links') && response.links) { 932 | contentParts.push(response.links.join('\n')); 933 | } 934 | if (options.formats?.includes('screenshot') && response.screenshot) { 935 | contentParts.push(response.screenshot); 936 | } 937 | if (options.formats?.includes('extract') && response.extract) { 938 | contentParts.push(JSON.stringify(response.extract, null, 2)); 939 | } 940 | 941 | // Add warning to response if present 942 | if (response.warning) { 943 | server.sendLoggingMessage({ 944 | level: 'warning', 945 | data: response.warning, 946 | }); 947 | } 948 | 949 | return { 950 | content: [ 951 | { type: 'text', text: contentParts.join('\n\n') || 'No content available' }, 952 | ], 953 | isError: false, 954 | }; 955 | } catch (error) { 956 | const errorMessage = 957 | error instanceof Error ? error.message : String(error); 958 | return { 959 | content: [{ type: 'text', text: errorMessage }], 960 | isError: true, 961 | }; 962 | } 963 | } 964 | 965 | case 'firecrawl_map': { 966 | if (!isMapOptions(args)) { 967 | throw new Error('Invalid arguments for firecrawl_map'); 968 | } 969 | const { url, ...options } = args; 970 | const response = await client.mapUrl(url, options); 971 | if ('error' in response) { 972 | throw new Error(response.error); 973 | } 974 | if (!response.links) { 975 | throw new Error('No links received from FireCrawl API'); 976 | } 977 | return { 978 | content: [{ type: 'text', text: response.links.join('\n') }], 979 | isError: false, 980 | }; 981 | } 982 | 983 | case 'firecrawl_batch_scrape': { 984 | if (!isBatchScrapeOptions(args)) { 985 | throw new Error('Invalid arguments for firecrawl_batch_scrape'); 986 | } 987 | 988 | try { 989 | const operationId = `batch_${++operationCounter}`; 990 | const operation: QueuedBatchOperation = { 991 | id: operationId, 992 | urls: args.urls, 993 | options: args.options, 994 | status: 'pending', 995 | progress: { 996 | completed: 0, 997 | total: args.urls.length, 998 | }, 999 | }; 1000 | 1001 | batchOperations.set(operationId, operation); 1002 | 1003 | // Queue the operation 1004 | batchQueue.add(() => processBatchOperation(operation)); 1005 | 1006 | server.sendLoggingMessage({ 1007 | level: 'info', 1008 | data: `Queued batch operation ${operationId} with ${args.urls.length} URLs`, 1009 | }); 1010 | 1011 | return { 1012 | content: [ 1013 | { 1014 | type: 'text', 1015 | text: `Batch operation queued with ID: ${operationId}. Use firecrawl_check_batch_status to check progress.`, 1016 | }, 1017 | ], 1018 | isError: false, 1019 | }; 1020 | } catch (error) { 1021 | const errorMessage = 1022 | error instanceof Error 1023 | ? error.message 1024 | : `Batch operation failed: ${JSON.stringify(error)}`; 1025 | return { 1026 | content: [{ type: 'text', text: errorMessage }], 1027 | isError: true, 1028 | }; 1029 | } 1030 | } 1031 | 1032 | case 'firecrawl_check_batch_status': { 1033 | if (!isStatusCheckOptions(args)) { 1034 | throw new Error( 1035 | 'Invalid arguments for firecrawl_check_batch_status' 1036 | ); 1037 | } 1038 | 1039 | const operation = batchOperations.get(args.id); 1040 | if (!operation) { 1041 | return { 1042 | content: [ 1043 | { 1044 | type: 'text', 1045 | text: `No batch operation found with ID: ${args.id}`, 1046 | }, 1047 | ], 1048 | isError: true, 1049 | }; 1050 | } 1051 | 1052 | const status = `Batch Status: 1053 | Status: ${operation.status} 1054 | Progress: ${operation.progress.completed}/${operation.progress.total} 1055 | ${operation.error ? `Error: ${operation.error}` : ''} 1056 | ${ 1057 | operation.result 1058 | ? `Results: ${JSON.stringify(operation.result, null, 2)}` 1059 | : '' 1060 | }`; 1061 | 1062 | return { 1063 | content: [{ type: 'text', text: status }], 1064 | isError: false, 1065 | }; 1066 | } 1067 | 1068 | case 'firecrawl_crawl': { 1069 | if (!isCrawlOptions(args)) { 1070 | throw new Error('Invalid arguments for firecrawl_crawl'); 1071 | } 1072 | const { url, ...options } = args; 1073 | 1074 | const response = await withRetry( 1075 | async () => client.asyncCrawlUrl(url, options), 1076 | 'crawl operation' 1077 | ); 1078 | 1079 | if (!response.success) { 1080 | throw new Error(response.error); 1081 | } 1082 | 1083 | // Monitor credits for cloud API 1084 | if (!FIRECRAWL_API_URL && hasCredits(response)) { 1085 | await updateCreditUsage(response.creditsUsed); 1086 | } 1087 | 1088 | return { 1089 | content: [ 1090 | { 1091 | type: 'text', 1092 | text: `Started crawl for ${url} with job ID: ${response.id}`, 1093 | }, 1094 | ], 1095 | isError: false, 1096 | }; 1097 | } 1098 | 1099 | case 'firecrawl_check_crawl_status': { 1100 | if (!isStatusCheckOptions(args)) { 1101 | throw new Error( 1102 | 'Invalid arguments for firecrawl_check_crawl_status' 1103 | ); 1104 | } 1105 | const response = await client.checkCrawlStatus(args.id); 1106 | if (!response.success) { 1107 | throw new Error(response.error); 1108 | } 1109 | const status = `Crawl Status: 1110 | Status: ${response.status} 1111 | Progress: ${response.completed}/${response.total} 1112 | Credits Used: ${response.creditsUsed} 1113 | Expires At: ${response.expiresAt} 1114 | ${ 1115 | response.data.length > 0 ? '\nResults:\n' + formatResults(response.data) : '' 1116 | }`; 1117 | return { 1118 | content: [{ type: 'text', text: status }], 1119 | isError: false, 1120 | }; 1121 | } 1122 | 1123 | case 'firecrawl_search': { 1124 | if (!isSearchOptions(args)) { 1125 | throw new Error('Invalid arguments for firecrawl_search'); 1126 | } 1127 | try { 1128 | const response = await withRetry( 1129 | async () => client.search(args.query, args), 1130 | 'search operation' 1131 | ); 1132 | 1133 | if (!response.success) { 1134 | throw new Error( 1135 | `Search failed: ${response.error || 'Unknown error'}` 1136 | ); 1137 | } 1138 | 1139 | // Monitor credits for cloud API 1140 | if (!FIRECRAWL_API_URL && hasCredits(response)) { 1141 | await updateCreditUsage(response.creditsUsed); 1142 | } 1143 | 1144 | // Format the results 1145 | const results = response.data 1146 | .map( 1147 | (result) => 1148 | `URL: ${result.url} 1149 | Title: ${result.title || 'No title'} 1150 | Description: ${result.description || 'No description'} 1151 | ${result.markdown ? `\nContent:\n${result.markdown}` : ''}` 1152 | ) 1153 | .join('\n\n'); 1154 | 1155 | return { 1156 | content: [{ type: 'text', text: results }], 1157 | isError: false, 1158 | }; 1159 | } catch (error) { 1160 | const errorMessage = 1161 | error instanceof Error 1162 | ? error.message 1163 | : `Search failed: ${JSON.stringify(error)}`; 1164 | return { 1165 | content: [{ type: 'text', text: errorMessage }], 1166 | isError: true, 1167 | }; 1168 | } 1169 | } 1170 | 1171 | case 'firecrawl_extract': { 1172 | if (!isExtractOptions(args)) { 1173 | throw new Error('Invalid arguments for firecrawl_extract'); 1174 | } 1175 | 1176 | try { 1177 | const extractStartTime = Date.now(); 1178 | 1179 | server.sendLoggingMessage({ 1180 | level: 'info', 1181 | data: `Starting extraction for URLs: ${args.urls.join(', ')}`, 1182 | }); 1183 | 1184 | // Log if using self-hosted instance 1185 | if (FIRECRAWL_API_URL) { 1186 | server.sendLoggingMessage({ 1187 | level: 'info', 1188 | data: 'Using self-hosted instance for extraction', 1189 | }); 1190 | } 1191 | 1192 | const extractResponse = await withRetry( 1193 | async () => 1194 | client.extract(args.urls, { 1195 | prompt: args.prompt, 1196 | systemPrompt: args.systemPrompt, 1197 | schema: args.schema, 1198 | allowExternalLinks: args.allowExternalLinks, 1199 | enableWebSearch: args.enableWebSearch, 1200 | includeSubdomains: args.includeSubdomains, 1201 | origin: 'mcp-server', 1202 | } as ExtractParams), 1203 | 'extract operation' 1204 | ); 1205 | 1206 | // Type guard for successful response 1207 | if (!('success' in extractResponse) || !extractResponse.success) { 1208 | throw new Error(extractResponse.error || 'Extraction failed'); 1209 | } 1210 | 1211 | const response = extractResponse as ExtractResponse; 1212 | 1213 | // Monitor credits for cloud API 1214 | if (!FIRECRAWL_API_URL && hasCredits(response)) { 1215 | await updateCreditUsage(response.creditsUsed || 0); 1216 | } 1217 | 1218 | // Log performance metrics 1219 | server.sendLoggingMessage({ 1220 | level: 'info', 1221 | data: `Extraction completed in ${Date.now() - extractStartTime}ms`, 1222 | }); 1223 | 1224 | // Add warning to response if present 1225 | const result = { 1226 | content: [ 1227 | { 1228 | type: 'text', 1229 | text: JSON.stringify(response.data, null, 2), 1230 | }, 1231 | ], 1232 | isError: false, 1233 | }; 1234 | 1235 | if (response.warning) { 1236 | server.sendLoggingMessage({ 1237 | level: 'warning', 1238 | data: response.warning, 1239 | }); 1240 | } 1241 | 1242 | return result; 1243 | } catch (error) { 1244 | const errorMessage = 1245 | error instanceof Error ? error.message : String(error); 1246 | 1247 | // Special handling for self-hosted instance errors 1248 | if ( 1249 | FIRECRAWL_API_URL && 1250 | errorMessage.toLowerCase().includes('not supported') 1251 | ) { 1252 | server.sendLoggingMessage({ 1253 | level: 'error', 1254 | data: 'Extraction is not supported by this self-hosted instance', 1255 | }); 1256 | return { 1257 | content: [ 1258 | { 1259 | type: 'text', 1260 | text: 'Extraction is not supported by this self-hosted instance. Please ensure LLM support is configured.', 1261 | }, 1262 | ], 1263 | isError: true, 1264 | }; 1265 | } 1266 | 1267 | return { 1268 | content: [{ type: 'text', text: errorMessage }], 1269 | isError: true, 1270 | }; 1271 | } 1272 | } 1273 | 1274 | case 'firecrawl_deep_research': { 1275 | if (!args || typeof args !== 'object' || !('query' in args)) { 1276 | throw new Error('Invalid arguments for firecrawl_deep_research'); 1277 | } 1278 | 1279 | try { 1280 | const researchStartTime = Date.now(); 1281 | server.sendLoggingMessage({ 1282 | level: 'info', 1283 | data: `Starting deep research for query: ${args.query}`, 1284 | }); 1285 | 1286 | const response = await client.deepResearch( 1287 | args.query as string, 1288 | { 1289 | maxDepth: args.maxDepth as number, 1290 | timeLimit: args.timeLimit as number, 1291 | maxUrls: args.maxUrls as number, 1292 | }, 1293 | // Activity callback 1294 | (activity) => { 1295 | server.sendLoggingMessage({ 1296 | level: 'info', 1297 | data: `Research activity: ${activity.message} (Depth: ${activity.depth})`, 1298 | }); 1299 | }, 1300 | // Source callback 1301 | (source) => { 1302 | server.sendLoggingMessage({ 1303 | level: 'info', 1304 | data: `Research source found: ${source.url}${source.title ? ` - ${source.title}` : ''}`, 1305 | }); 1306 | } 1307 | ); 1308 | 1309 | // Log performance metrics 1310 | server.sendLoggingMessage({ 1311 | level: 'info', 1312 | data: `Deep research completed in ${Date.now() - researchStartTime}ms`, 1313 | }); 1314 | 1315 | if (!response.success) { 1316 | throw new Error(response.error || 'Deep research failed'); 1317 | } 1318 | 1319 | // Format the results 1320 | const formattedResponse = { 1321 | finalAnalysis: response.data.finalAnalysis, 1322 | activities: response.data.activities, 1323 | sources: response.data.sources, 1324 | }; 1325 | 1326 | return { 1327 | content: [{ type: 'text', text: formattedResponse.finalAnalysis }], 1328 | isError: false, 1329 | }; 1330 | } catch (error) { 1331 | const errorMessage = error instanceof Error ? error.message : String(error); 1332 | return { 1333 | content: [{ type: 'text', text: errorMessage }], 1334 | isError: true, 1335 | }; 1336 | } 1337 | } 1338 | 1339 | default: 1340 | return { 1341 | content: [{ type: 'text', text: `Unknown tool: ${name}` }], 1342 | isError: true, 1343 | }; 1344 | } 1345 | } catch (error) { 1346 | // Log detailed error information 1347 | server.sendLoggingMessage({ 1348 | level: 'error', 1349 | data: { 1350 | message: `Request failed: ${ 1351 | error instanceof Error ? error.message : String(error) 1352 | }`, 1353 | tool: request.params.name, 1354 | arguments: request.params.arguments, 1355 | timestamp: new Date().toISOString(), 1356 | duration: Date.now() - startTime, 1357 | }, 1358 | }); 1359 | return { 1360 | content: [ 1361 | { 1362 | type: 'text', 1363 | text: `Error: ${ 1364 | error instanceof Error ? error.message : String(error) 1365 | }`, 1366 | }, 1367 | ], 1368 | isError: true, 1369 | }; 1370 | } finally { 1371 | // Log request completion with performance metrics 1372 | server.sendLoggingMessage({ 1373 | level: 'info', 1374 | data: `Request completed in ${Date.now() - startTime}ms`, 1375 | }); 1376 | } 1377 | }); 1378 | 1379 | // Helper function to format results 1380 | function formatResults(data: FirecrawlDocument[]): string { 1381 | return data 1382 | .map((doc) => { 1383 | const content = doc.markdown || doc.html || doc.rawHtml || 'No content'; 1384 | return `URL: ${doc.url || 'Unknown URL'} 1385 | Content: ${content.substring(0, 100)}${content.length > 100 ? '...' : ''} 1386 | ${doc.metadata?.title ? `Title: ${doc.metadata.title}` : ''}`; 1387 | }) 1388 | .join('\n\n'); 1389 | } 1390 | 1391 | // Server startup 1392 | async function runServer() { 1393 | try { 1394 | console.error('Initializing FireCrawl MCP Server...'); 1395 | 1396 | const transport = new StdioServerTransport(); 1397 | await server.connect(transport); 1398 | 1399 | // Now that we're connected, we can send logging messages 1400 | server.sendLoggingMessage({ 1401 | level: 'info', 1402 | data: 'FireCrawl MCP Server initialized successfully', 1403 | }); 1404 | 1405 | server.sendLoggingMessage({ 1406 | level: 'info', 1407 | data: `Configuration: API URL: ${FIRECRAWL_API_URL || 'default'}`, 1408 | }); 1409 | 1410 | console.error('FireCrawl MCP Server running on stdio'); 1411 | } catch (error) { 1412 | console.error('Fatal error running server:', error); 1413 | process.exit(1); 1414 | } 1415 | } 1416 | 1417 | runServer().catch((error) => { 1418 | console.error('Fatal error running server:', error); 1419 | process.exit(1); 1420 | }); 1421 | 1422 | // Add type guard for credit usage 1423 | function hasCredits(response: any): response is { creditsUsed: number } { 1424 | return 'creditsUsed' in response && typeof response.creditsUsed === 'number'; 1425 | } 1426 | --------------------------------------------------------------------------------