├── .prettierrc
├── .gitignore
├── tsconfig.json
├── jest.config.js
├── .github
    └── workflows
    │   ├── ci.yml
    │   └── publish.yml
├── smithery.yaml
├── .eslintrc.json
├── LICENSE
├── Dockerfile
├── jest.setup.ts
├── package.json
├── CHANGELOG.md
├── src
    ├── index.test.ts
    └── index.ts
└── README.md


/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 |   "semi": true,
3 |   "trailingComma": "es5",
4 |   "singleQuote": true,
5 |   "printWidth": 80,
6 |   "tabWidth": 2,
7 |   "useTabs": false
8 | }
9 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Dependencies
 2 | node_modules/
 3 | 
 4 | # Build
 5 | dist/
 6 | 
 7 | # Logs
 8 | logs
 9 | *.log
10 | npm-debug.log*
11 | 
12 | # Environment
13 | .env
14 | .env.local
15 | .env.*.local
16 | claude_desktop_config.json
17 | 
18 | # IDE
19 | .idea/
20 | .vscode/
21 | *.swp
22 | *.swo
23 | .cursorrules.md
24 | IMPLEMENTATION.md
25 | v1.2.md
26 | 
27 | # OS
28 | .DS_Store
29 | Thumbs.db 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2022",
 4 |     "module": "NodeNext",
 5 |     "moduleResolution": "NodeNext",
 6 |     "outDir": "./dist",
 7 |     "rootDir": "./src",
 8 |     "strict": true,
 9 |     "esModuleInterop": true,
10 |     "skipLibCheck": true,
11 |     "forceConsistentCasingInFileNames": true
12 |   },
13 |   "include": ["src/**/*"],
14 |   "exclude": ["node_modules", "dist", "tests"]
15 | }
16 | 


--------------------------------------------------------------------------------
/jest.config.js:
--------------------------------------------------------------------------------
 1 | export default {
 2 |   preset: 'ts-jest/presets/default-esm',
 3 |   testEnvironment: 'node',
 4 |   extensionsToTreatAsEsm: ['.ts'],
 5 |   transform: {
 6 |     '^.+\\.tsx?$': [
 7 |       'ts-jest',
 8 |       {
 9 |         useESM: true,
10 |       },
11 |     ],
12 |   },
13 |   moduleNameMapper: {
14 |     '^(\\.{1,2}/.*)\\.js$': '$1',
15 |   },
16 |   testMatch: ['**/*.test.ts'],
17 |   setupFilesAfterEnv: ['<rootDir>/jest.setup.ts'],
18 | };
19 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | jobs:
10 |   build:
11 |     runs-on: ubuntu-latest
12 | 
13 |     steps:
14 |       - uses: actions/checkout@v3
15 | 
16 |       - name: Use Node.js
17 |         uses: actions/setup-node@v3
18 |         with:
19 |           node-version: '20.x'
20 |           cache: 'npm'
21 | 
22 |       - name: Install dependencies
23 |         run: npm ci
24 | 
25 |       - name: Build
26 |         run: npm run build
27 | 
28 |       - name: Lint
29 |         run: npm run lint
30 | 
31 |       - name: Test
32 |         run: npm test
33 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish Package
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [created]
 6 |   workflow_dispatch:
 7 | 
 8 | jobs:
 9 |   publish:
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |       - uses: actions/checkout@v3
14 | 
15 |       - name: Use Node.js
16 |         uses: actions/setup-node@v3
17 |         with:
18 |           node-version: '20.x'
19 |           registry-url: 'https://registry.npmjs.org'
20 | 
21 |       - name: Install dependencies
22 |         run: npm ci
23 | 
24 |       - name: Build
25 |         run: npm run build
26 | 
27 |       - name: Publish to NPM
28 |         run: npm publish
29 |         env:
30 |           NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
31 | 


--------------------------------------------------------------------------------
/smithery.yaml:
--------------------------------------------------------------------------------
 1 | # Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml
 2 | 
 3 | startCommand:
 4 |   type: stdio
 5 |   configSchema:
 6 |     # JSON Schema defining the configuration options for the MCP.
 7 |     type: object
 8 |     required:
 9 |       - fireCrawlApiKey
10 |     properties:
11 |       fireCrawlApiKey:
12 |         type: string
13 |         description: Your FireCrawl API key. Required for cloud API usage.
14 |       fireCrawlApiUrl:
15 |         type: string
16 |         description: Custom API endpoint for self-hosted instances. If provided, API key
17 |           becomes optional.
18 |   commandFunction:
19 |     # A function that produces the CLI command to start the MCP on stdio.
20 |     |-
21 |     (config) => ({ command: 'node', args: ['dist/src/index.js'], env: { FIRECRAWL_API_KEY: config.fireCrawlApiKey, FIRECRAWL_API_URL: config.fireCrawlApiUrl || '' } })
22 | 


--------------------------------------------------------------------------------
/.eslintrc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "parser": "@typescript-eslint/parser",
 3 |   "plugins": ["@typescript-eslint"],
 4 |   "extends": [
 5 |     "eslint:recommended",
 6 |     "plugin:@typescript-eslint/recommended",
 7 |     "prettier"
 8 |   ],
 9 |   "env": {
10 |     "node": true,
11 |     "es2022": true
12 |   },
13 |   "parserOptions": {
14 |     "ecmaVersion": 2022,
15 |     "sourceType": "module",
16 |     "project": "./tsconfig.json"
17 |   },
18 |   "rules": {
19 |     "@typescript-eslint/explicit-function-return-type": "off",
20 |     "@typescript-eslint/no-explicit-any": "off",
21 |     "@typescript-eslint/no-unused-vars": [
22 |       "error",
23 |       { "argsIgnorePattern": "^_" }
24 |     ]
25 |   },
26 |   "overrides": [
27 |     {
28 |       "files": ["**/*.test.ts"],
29 |       "rules": {
30 |         "@typescript-eslint/no-unused-vars": "off",
31 |         "@typescript-eslint/no-explicit-any": "off"
32 |       }
33 |     }
34 |   ]
35 | }
36 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 vrknetha
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE. 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Generated by https://smithery.ai. See: https://smithery.ai/docs/config#dockerfile
 2 | # Use a Node.js image as the base for building the application
 3 | FROM node:18-alpine AS builder
 4 | 
 5 | # Set the working directory inside the container
 6 | WORKDIR /app
 7 | 
 8 | # Copy package.json and package-lock.json to install dependencies
 9 | COPY package.json package-lock.json ./
10 | 
11 | # Install dependencies (ignoring scripts to prevent running the prepare script)
12 | RUN npm install --ignore-scripts
13 | 
14 | # Copy the rest of the application source code
15 | COPY . .
16 | 
17 | # Build the application using TypeScript
18 | RUN npm run build
19 | 
20 | # Use a smaller Node.js image for the final image
21 | FROM node:18-slim AS release
22 | 
23 | # Set the working directory inside the container
24 | WORKDIR /app
25 | 
26 | # Copy the built application from the builder stage
27 | COPY --from=builder /app/dist /app/dist
28 | COPY --from=builder /app/package.json /app/package.json
29 | COPY --from=builder /app/package-lock.json /app/package-lock.json
30 | 
31 | # Install only production dependencies
32 | RUN npm ci --omit=dev
33 | 
34 | # Set environment variables for API key and custom API URL if needed
35 | ENV FIRECRAWL_API_KEY=your-api-key
36 | ENV FIRECRAWL_API_URL=https://firecrawl.your-domain.com
37 | 
38 | # Specify the command to run the application
39 | ENTRYPOINT ["node", "dist/src/index.js"]
40 | 


--------------------------------------------------------------------------------
/jest.setup.ts:
--------------------------------------------------------------------------------
 1 | import { jest } from '@jest/globals';
 2 | import FirecrawlApp from '@mendable/firecrawl-js';
 3 | import type {
 4 |   SearchResponse,
 5 |   BatchScrapeResponse,
 6 |   BatchScrapeStatusResponse,
 7 |   FirecrawlDocument,
 8 | } from '@mendable/firecrawl-js';
 9 | 
10 | // Set test timeout
11 | jest.setTimeout(30000);
12 | 
13 | // Create mock responses
14 | const mockSearchResponse: SearchResponse = {
15 |   success: true,
16 |   data: [
17 |     {
18 |       url: 'https://example.com',
19 |       title: 'Test Page',
20 |       description: 'Test Description',
21 |       markdown: '# Test Content',
22 |       actions: null as never,
23 |     },
24 |   ] as FirecrawlDocument<undefined, never>[],
25 | };
26 | 
27 | const mockBatchScrapeResponse: BatchScrapeResponse = {
28 |   success: true,
29 |   id: 'test-batch-id',
30 | };
31 | 
32 | const mockBatchStatusResponse: BatchScrapeStatusResponse = {
33 |   success: true,
34 |   status: 'completed',
35 |   completed: 1,
36 |   total: 1,
37 |   creditsUsed: 1,
38 |   expiresAt: new Date(),
39 |   data: [
40 |     {
41 |       url: 'https://example.com',
42 |       title: 'Test Page',
43 |       description: 'Test Description',
44 |       markdown: '# Test Content',
45 |       actions: null as never,
46 |     },
47 |   ] as FirecrawlDocument<undefined, never>[],
48 | };
49 | 
50 | // Create mock instance methods
51 | const mockSearch = jest.fn().mockImplementation(async () => mockSearchResponse);
52 | const mockAsyncBatchScrapeUrls = jest
53 |   .fn()
54 |   .mockImplementation(async () => mockBatchScrapeResponse);
55 | const mockCheckBatchScrapeStatus = jest
56 |   .fn()
57 |   .mockImplementation(async () => mockBatchStatusResponse);
58 | 
59 | // Create mock instance
60 | const mockInstance = {
61 |   apiKey: 'test-api-key',
62 |   apiUrl: 'test-api-url',
63 |   search: mockSearch,
64 |   asyncBatchScrapeUrls: mockAsyncBatchScrapeUrls,
65 |   checkBatchScrapeStatus: mockCheckBatchScrapeStatus,
66 | };
67 | 
68 | // Mock the module
69 | jest.mock('@mendable/firecrawl-js', () => ({
70 |   __esModule: true,
71 |   default: jest.fn().mockImplementation(() => mockInstance),
72 | }));
73 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "firecrawl-mcp",
 3 |   "version": "1.5.0",
 4 |   "description": "MCP server for FireCrawl web scraping integration. Supports both cloud and self-hosted instances. Features include web scraping, batch processing, structured data extraction, and LLM-powered content analysis.",
 5 |   "type": "module",
 6 |   "bin": {
 7 |     "firecrawl-mcp": "dist/index.js"
 8 |   },
 9 |   "files": [
10 |     "dist"
11 |   ],
12 |   "publishConfig": {
13 |     "access": "public"
14 |   },
15 |   "scripts": {
16 |     "build": "tsc && node -e \"require('fs').chmodSync('dist/index.js', '755')\"",
17 |     "test": "node --experimental-vm-modules node_modules/jest/bin/jest.js",
18 |     "start": "node dist/index.js",
19 |     "lint": "eslint src/**/*.ts",
20 |     "lint:fix": "eslint src/**/*.ts --fix",
21 |     "format": "prettier --write .",
22 |     "prepare": "npm run build",
23 |     "publish": "npm run build && npm publish"
24 |   },
25 |   "license": "ISC",
26 |   "dependencies": {
27 |     "@mendable/firecrawl-js": "^1.19.0",
28 |     "@modelcontextprotocol/sdk": "^1.4.1",
29 |     "dotenv": "^16.4.7",
30 |     "p-queue": "^8.0.1",
31 |     "shx": "^0.3.4"
32 |   },
33 |   "devDependencies": {
34 |     "@jest/globals": "^29.7.0",
35 |     "@types/jest": "^29.5.14",
36 |     "@types/node": "^20.10.5",
37 |     "@typescript-eslint/eslint-plugin": "^7.0.0",
38 |     "@typescript-eslint/parser": "^7.0.0",
39 |     "eslint": "^8.56.0",
40 |     "eslint-config-prettier": "^9.1.0",
41 |     "jest": "^29.7.0",
42 |     "jest-mock-extended": "^4.0.0-beta1",
43 |     "prettier": "^3.1.1",
44 |     "ts-jest": "^29.1.1",
45 |     "typescript": "^5.3.3"
46 |   },
47 |   "engines": {
48 |     "node": ">=18.0.0"
49 |   },
50 |   "keywords": [
51 |     "mcp",
52 |     "firecrawl",
53 |     "web-scraping",
54 |     "crawler",
55 |     "content-extraction"
56 |   ],
57 |   "repository": {
58 |     "type": "git",
59 |     "url": "git+https://github.com/mendableai/firecrawl-mcp-server.git"
60 |   },
61 |   "author": "vrknetha",
62 |   "bugs": {
63 |     "url": "https://github.com/mendableai/firecrawl-mcp-server/issues"
64 |   },
65 |   "homepage": "https://github.com/mendableai/firecrawl-mcp-server#readme"
66 | }
67 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | ## [1.2.4] - 2024-02-05
 4 | 
 5 | ### Added
 6 | 
 7 | - Environment variable support for all configuration options
 8 | - Detailed configuration documentation in README
 9 | 
10 | ### Changed
11 | 
12 | - Made retry and credit monitoring settings configurable via environment variables:
13 |   - `FIRECRAWL_RETRY_MAX_ATTEMPTS`
14 |   - `FIRECRAWL_RETRY_INITIAL_DELAY`
15 |   - `FIRECRAWL_RETRY_MAX_DELAY`
16 |   - `FIRECRAWL_RETRY_BACKOFF_FACTOR`
17 |   - `FIRECRAWL_CREDIT_WARNING_THRESHOLD`
18 |   - `FIRECRAWL_CREDIT_CRITICAL_THRESHOLD`
19 | - Enhanced configuration examples with detailed comments and use cases
20 | - Improved documentation for retry behavior and credit monitoring
21 | 
22 | ### Documentation
23 | 
24 | - Added comprehensive configuration examples for both cloud and self-hosted setups
25 | - Added detailed explanations of retry behavior with timing examples
26 | - Added credit monitoring threshold explanations
27 | - Updated Claude Desktop configuration documentation
28 | 
29 | ## [1.2.3] - 2024-02-05
30 | 
31 | ### Changed
32 | 
33 | - Removed redundant batch configuration to rely on FireCrawl library's built-in functionality
34 | - Simplified batch processing logic by leveraging library's native implementation
35 | - Optimized parallel processing and rate limiting handling
36 | - Reduced code complexity and potential configuration conflicts
37 | 
38 | ### Technical
39 | 
40 | - Removed custom `CONFIG.batch` settings (`maxParallelOperations` and `delayBetweenRequests`)
41 | - Simplified batch operation processing to use library's built-in batch handling
42 | - Updated server startup logging to remove batch configuration references
43 | - Maintained credit usage tracking and error handling functionality
44 | 
45 | ## [1.2.2] - 2025-02-05
46 | 
47 | ### Fixed
48 | 
49 | - Resolved unused interface warnings for ExtractParams and ExtractResponse
50 | - Improved type safety in extract operations
51 | - Fixed type casting issues in API responses
52 | 
53 | ### Changed
54 | 
55 | - Improved type guards for better type inference
56 | - Enhanced error messages for configuration validation
57 | 
58 | ## [1.2.0] - 2024-01-03
59 | 
60 | ### Added
61 | 
62 | - Implemented automatic retries with exponential backoff for rate limits
63 | - Added queue system for batch operations with parallel processing
64 | - Integrated credit usage monitoring with warning thresholds
65 | - Enhanced content validation with configurable criteria
66 | - Added comprehensive logging system for operations and errors
67 | - New search tool (`firecrawl_search`) for web search with content extraction
68 | - Support for self-hosted FireCrawl instances via optional API URL configuration
69 |   - New `FIRECRAWL_API_URL` environment variable
70 |   - Automatic fallback to cloud API
71 |   - Improved error messages for self-hosted instances
72 | 
73 | ### Changed
74 | 
75 | - Improved error handling for HTTP errors including 404s
76 | - Enhanced URL validation before scraping
77 | - Updated configuration with new retry and batch processing options
78 | - Optimized rate limiting with automatic backoff strategy
79 | - Improved documentation with new features and examples
80 | - Added detailed self-hosted configuration guide
81 | 
82 | ### Fixed
83 | 
84 | - Rate limit handling in batch operations
85 | - Error response formatting
86 | - Type definitions for response handlers
87 | - Test suite mock responses
88 | - Error handling for invalid search queries
89 | - API configuration validation
90 | 
91 | ## [1.0.1] - 2023-12-03
92 | 
93 | ### Added
94 | 
95 | - Initial release with basic scraping functionality
96 | - Support for batch scraping
97 | - URL discovery and crawling capabilities
98 | - Rate limiting implementation
99 | 


--------------------------------------------------------------------------------
/src/index.test.ts:
--------------------------------------------------------------------------------
  1 | import { Server } from '@modelcontextprotocol/sdk/server/index.js';
  2 | import { CallToolRequestSchema } from '@modelcontextprotocol/sdk/types.js';
  3 | import FirecrawlApp from '@mendable/firecrawl-js';
  4 | import type {
  5 |   SearchResponse,
  6 |   BatchScrapeResponse,
  7 |   BatchScrapeStatusResponse,
  8 |   CrawlResponse,
  9 |   CrawlStatusResponse,
 10 |   ScrapeResponse,
 11 |   FirecrawlDocument,
 12 |   SearchParams,
 13 | } from '@mendable/firecrawl-js';
 14 | import {
 15 |   describe,
 16 |   expect,
 17 |   jest,
 18 |   test,
 19 |   beforeEach,
 20 |   afterEach,
 21 | } from '@jest/globals';
 22 | import { mock, MockProxy } from 'jest-mock-extended';
 23 | 
 24 | // Mock FirecrawlApp
 25 | jest.mock('@mendable/firecrawl-js');
 26 | 
 27 | // Test interfaces
 28 | interface RequestParams {
 29 |   method: string;
 30 |   params: {
 31 |     name: string;
 32 |     arguments?: Record<string, any>;
 33 |   };
 34 | }
 35 | 
 36 | interface BatchScrapeArgs {
 37 |   urls: string[];
 38 |   options?: {
 39 |     formats?: string[];
 40 |     [key: string]: any;
 41 |   };
 42 | }
 43 | 
 44 | interface StatusCheckArgs {
 45 |   id: string;
 46 | }
 47 | 
 48 | interface SearchArgs {
 49 |   query: string;
 50 |   scrapeOptions?: {
 51 |     formats?: string[];
 52 |     onlyMainContent?: boolean;
 53 |   };
 54 | }
 55 | 
 56 | interface ScrapeArgs {
 57 |   url: string;
 58 |   formats?: string[];
 59 |   onlyMainContent?: boolean;
 60 | }
 61 | 
 62 | interface CrawlArgs {
 63 |   url: string;
 64 |   maxDepth?: number;
 65 |   limit?: number;
 66 | }
 67 | 
 68 | // Mock client interface
 69 | interface MockFirecrawlClient {
 70 |   scrapeUrl(url: string, options?: any): Promise<ScrapeResponse>;
 71 |   search(query: string, params?: SearchParams): Promise<SearchResponse>;
 72 |   asyncBatchScrapeUrls(
 73 |     urls: string[],
 74 |     options?: any
 75 |   ): Promise<BatchScrapeResponse>;
 76 |   checkBatchScrapeStatus(id: string): Promise<BatchScrapeStatusResponse>;
 77 |   asyncCrawlUrl(url: string, options?: any): Promise<CrawlResponse>;
 78 |   checkCrawlStatus(id: string): Promise<CrawlStatusResponse>;
 79 |   mapUrl(url: string, options?: any): Promise<{ links: string[] }>;
 80 | }
 81 | 
 82 | describe('FireCrawl Tool Tests', () => {
 83 |   let mockClient: MockProxy<MockFirecrawlClient>;
 84 |   let requestHandler: (request: RequestParams) => Promise<any>;
 85 | 
 86 |   beforeEach(() => {
 87 |     jest.clearAllMocks();
 88 |     mockClient = mock<MockFirecrawlClient>();
 89 | 
 90 |     // Set up mock implementations
 91 |     const mockInstance = new FirecrawlApp({ apiKey: 'test' });
 92 |     Object.assign(mockInstance, mockClient);
 93 | 
 94 |     // Create request handler
 95 |     requestHandler = async (request: RequestParams) => {
 96 |       const { name, arguments: args } = request.params;
 97 |       if (!args) {
 98 |         throw new Error('No arguments provided');
 99 |       }
100 |       return handleRequest(name, args, mockClient);
101 |     };
102 |   });
103 | 
104 |   afterEach(() => {
105 |     jest.clearAllMocks();
106 |   });
107 | 
108 |   // Test scrape functionality
109 |   test('should handle scrape request', async () => {
110 |     const url = 'https://example.com';
111 |     const options = { formats: ['markdown'] };
112 | 
113 |     const mockResponse: ScrapeResponse = {
114 |       success: true,
115 |       markdown: '# Test Content',
116 |       html: undefined,
117 |       rawHtml: undefined,
118 |       url: 'https://example.com',
119 |       actions: undefined as never,
120 |     };
121 | 
122 |     mockClient.scrapeUrl.mockResolvedValueOnce(mockResponse);
123 | 
124 |     const response = await requestHandler({
125 |       method: 'call_tool',
126 |       params: {
127 |         name: 'firecrawl_scrape',
128 |         arguments: { url, ...options },
129 |       },
130 |     });
131 | 
132 |     expect(response).toEqual({
133 |       content: [{ type: 'text', text: '# Test Content' }],
134 |       isError: false,
135 |     });
136 |     expect(mockClient.scrapeUrl).toHaveBeenCalledWith(url, {
137 |       formats: ['markdown'],
138 |       url,
139 |     });
140 |   });
141 | 
142 |   // Test batch scrape functionality
143 |   test('should handle batch scrape request', async () => {
144 |     const urls = ['https://example.com'];
145 |     const options = { formats: ['markdown'] };
146 | 
147 |     mockClient.asyncBatchScrapeUrls.mockResolvedValueOnce({
148 |       success: true,
149 |       id: 'test-batch-id',
150 |     });
151 | 
152 |     const response = await requestHandler({
153 |       method: 'call_tool',
154 |       params: {
155 |         name: 'firecrawl_batch_scrape',
156 |         arguments: { urls, options },
157 |       },
158 |     });
159 | 
160 |     expect(response.content[0].text).toContain(
161 |       'Batch operation queued with ID: batch_'
162 |     );
163 |     expect(mockClient.asyncBatchScrapeUrls).toHaveBeenCalledWith(urls, options);
164 |   });
165 | 
166 |   // Test search functionality
167 |   test('should handle search request', async () => {
168 |     const query = 'test query';
169 |     const scrapeOptions = { formats: ['markdown'] };
170 | 
171 |     const mockSearchResponse: SearchResponse = {
172 |       success: true,
173 |       data: [
174 |         {
175 |           url: 'https://example.com',
176 |           title: 'Test Page',
177 |           description: 'Test Description',
178 |           markdown: '# Test Content',
179 |           actions: undefined as never,
180 |         },
181 |       ],
182 |     };
183 | 
184 |     mockClient.search.mockResolvedValueOnce(mockSearchResponse);
185 | 
186 |     const response = await requestHandler({
187 |       method: 'call_tool',
188 |       params: {
189 |         name: 'firecrawl_search',
190 |         arguments: { query, scrapeOptions },
191 |       },
192 |     });
193 | 
194 |     expect(response.isError).toBe(false);
195 |     expect(response.content[0].text).toContain('Test Page');
196 |     expect(mockClient.search).toHaveBeenCalledWith(query, scrapeOptions);
197 |   });
198 | 
199 |   // Test crawl functionality
200 |   test('should handle crawl request', async () => {
201 |     const url = 'https://example.com';
202 |     const options = { maxDepth: 2 };
203 | 
204 |     mockClient.asyncCrawlUrl.mockResolvedValueOnce({
205 |       success: true,
206 |       id: 'test-crawl-id',
207 |     });
208 | 
209 |     const response = await requestHandler({
210 |       method: 'call_tool',
211 |       params: {
212 |         name: 'firecrawl_crawl',
213 |         arguments: { url, ...options },
214 |       },
215 |     });
216 | 
217 |     expect(response.isError).toBe(false);
218 |     expect(response.content[0].text).toContain('test-crawl-id');
219 |     expect(mockClient.asyncCrawlUrl).toHaveBeenCalledWith(url, {
220 |       maxDepth: 2,
221 |       url,
222 |     });
223 |   });
224 | 
225 |   // Test error handling
226 |   test('should handle API errors', async () => {
227 |     const url = 'https://example.com';
228 | 
229 |     mockClient.scrapeUrl.mockRejectedValueOnce(new Error('API Error'));
230 | 
231 |     const response = await requestHandler({
232 |       method: 'call_tool',
233 |       params: {
234 |         name: 'firecrawl_scrape',
235 |         arguments: { url },
236 |       },
237 |     });
238 | 
239 |     expect(response.isError).toBe(true);
240 |     expect(response.content[0].text).toContain('API Error');
241 |   });
242 | 
243 |   // Test rate limiting
244 |   test('should handle rate limits', async () => {
245 |     const url = 'https://example.com';
246 | 
247 |     // Mock rate limit error
248 |     mockClient.scrapeUrl.mockRejectedValueOnce(
249 |       new Error('rate limit exceeded')
250 |     );
251 | 
252 |     const response = await requestHandler({
253 |       method: 'call_tool',
254 |       params: {
255 |         name: 'firecrawl_scrape',
256 |         arguments: { url },
257 |       },
258 |     });
259 | 
260 |     expect(response.isError).toBe(true);
261 |     expect(response.content[0].text).toContain('rate limit exceeded');
262 |   });
263 | });
264 | 
265 | // Helper function to simulate request handling
266 | async function handleRequest(
267 |   name: string,
268 |   args: any,
269 |   client: MockFirecrawlClient
270 | ) {
271 |   try {
272 |     switch (name) {
273 |       case 'firecrawl_scrape': {
274 |         const response = await client.scrapeUrl(args.url, args);
275 |         if (!response.success) {
276 |           throw new Error(response.error || 'Scraping failed');
277 |         }
278 |         return {
279 |           content: [
280 |             { type: 'text', text: response.markdown || 'No content available' },
281 |           ],
282 |           isError: false,
283 |         };
284 |       }
285 | 
286 |       case 'firecrawl_batch_scrape': {
287 |         const response = await client.asyncBatchScrapeUrls(
288 |           args.urls,
289 |           args.options
290 |         );
291 |         return {
292 |           content: [
293 |             {
294 |               type: 'text',
295 |               text: `Batch operation queued with ID: batch_1. Use firecrawl_check_batch_status to check progress.`,
296 |             },
297 |           ],
298 |           isError: false,
299 |         };
300 |       }
301 | 
302 |       case 'firecrawl_search': {
303 |         const response = await client.search(args.query, args.scrapeOptions);
304 |         if (!response.success) {
305 |           throw new Error(response.error || 'Search failed');
306 |         }
307 |         const results = response.data
308 |           .map(
309 |             (result) =>
310 |               `URL: ${result.url}\nTitle: ${
311 |                 result.title || 'No title'
312 |               }\nDescription: ${result.description || 'No description'}\n${
313 |                 result.markdown ? `\nContent:\n${result.markdown}` : ''
314 |               }`
315 |           )
316 |           .join('\n\n');
317 |         return {
318 |           content: [{ type: 'text', text: results }],
319 |           isError: false,
320 |         };
321 |       }
322 | 
323 |       case 'firecrawl_crawl': {
324 |         const response = await client.asyncCrawlUrl(args.url, args);
325 |         if (!response.success) {
326 |           throw new Error(response.error);
327 |         }
328 |         return {
329 |           content: [
330 |             {
331 |               type: 'text',
332 |               text: `Started crawl for ${args.url} with job ID: ${response.id}`,
333 |             },
334 |           ],
335 |           isError: false,
336 |         };
337 |       }
338 | 
339 |       default:
340 |         throw new Error(`Unknown tool: ${name}`);
341 |     }
342 |   } catch (error) {
343 |     return {
344 |       content: [
345 |         {
346 |           type: 'text',
347 |           text: error instanceof Error ? error.message : String(error),
348 |         },
349 |       ],
350 |       isError: true,
351 |     };
352 |   }
353 | }
354 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Firecrawl MCP Server
  2 | 
  3 | A Model Context Protocol (MCP) server implementation that integrates with [Firecrawl](https://github.com/mendableai/firecrawl) for web scraping capabilities.
  4 | 
  5 | Big thanks to [@vrknetha](https://github.com/vrknetha), [@cawstudios](https://caw.tech) for the initial implementation!
  6 | 
  7 | ## Features
  8 | 
  9 | - Scrape, crawl, search, extract, deep research and batch scrape support
 10 | - Web scraping with JS rendering
 11 | - URL discovery and crawling
 12 | - Web search with content extraction
 13 | - Automatic retries with exponential backoff
 14 | - - Efficient batch processing with built-in rate limiting
 15 | - Credit usage monitoring for cloud API
 16 | - Comprehensive logging system
 17 | - Support for cloud and self-hosted FireCrawl instances
 18 | - Mobile/Desktop viewport support
 19 | - Smart content filtering with tag inclusion/exclusion
 20 | 
 21 | ## Installation
 22 | 
 23 | ### Running with npx
 24 | 
 25 | ```bash
 26 | env FIRECRAWL_API_KEY=fc-YOUR_API_KEY npx -y firecrawl-mcp
 27 | ```
 28 | 
 29 | ### Manual Installation
 30 | 
 31 | ```bash
 32 | npm install -g firecrawl-mcp
 33 | ```
 34 | 
 35 | ### Running on Cursor
 36 | 
 37 | Configuring Cursor 🖥️
 38 | Note: Requires Cursor version 0.45.6+
 39 | 
 40 | To configure FireCrawl MCP in Cursor:
 41 | 
 42 | 1. Open Cursor Settings
 43 | 2. Go to Features > MCP Servers 
 44 | 3. Click "+ Add New MCP Server"
 45 | 4. Enter the following:
 46 |    - Name: "firecrawl-mcp" (or your preferred name)
 47 |    - Type: "command"
 48 |    - Command: `env FIRECRAWL_API_KEY=your-api-key npx -y firecrawl-mcp`
 49 | 
 50 | > If you are using Windows and are running into issues, try `cmd /c "set FIRECRAWL_API_KEY=your-api-key && npx -y firecrawl-mcp"`
 51 | 
 52 | Replace `your-api-key` with your FireCrawl API key.
 53 | 
 54 | After adding, refresh the MCP server list to see the new tools. The Composer Agent will automatically use FireCrawl MCP when appropriate, but you can explicitly request it by describing your web scraping needs. Access the Composer via Command+L (Mac), select "Agent" next to the submit button, and enter your query.
 55 | 
 56 | ### Running on Windsurf
 57 | 
 58 | Add this to your `./codeium/windsurf/model_config.json`:
 59 | 
 60 | ```json
 61 | {
 62 |   "mcpServers": {
 63 |     "mcp-server-firecrawl": {
 64 |       "command": "npx",
 65 |       "args": ["-y", "firecrawl-mcp"],
 66 |       "env": {
 67 |         "FIRECRAWL_API_KEY": "YOUR_API_KEY_HERE"
 68 |       }
 69 |     }
 70 |   }
 71 | }
 72 | ```
 73 | 
 74 | 
 75 | ### Installing via Smithery (Legacy)
 76 | 
 77 | To install FireCrawl for Claude Desktop automatically via [Smithery](https://smithery.ai/server/@mendableai/mcp-server-firecrawl):
 78 | 
 79 | ```bash
 80 | npx -y @smithery/cli install @mendableai/mcp-server-firecrawl --client claude
 81 | ```
 82 | 
 83 | ## Configuration
 84 | 
 85 | ### Environment Variables
 86 | 
 87 | #### Required for Cloud API
 88 | 
 89 | - `FIRECRAWL_API_KEY`: Your FireCrawl API key
 90 |   - Required when using cloud API (default)
 91 |   - Optional when using self-hosted instance with `FIRECRAWL_API_URL`
 92 | - `FIRECRAWL_API_URL` (Optional): Custom API endpoint for self-hosted instances
 93 |   - Example: `https://firecrawl.your-domain.com`
 94 |   - If not provided, the cloud API will be used (requires API key)
 95 | 
 96 | #### Optional Configuration
 97 | 
 98 | ##### Retry Configuration
 99 | 
100 | - `FIRECRAWL_RETRY_MAX_ATTEMPTS`: Maximum number of retry attempts (default: 3)
101 | - `FIRECRAWL_RETRY_INITIAL_DELAY`: Initial delay in milliseconds before first retry (default: 1000)
102 | - `FIRECRAWL_RETRY_MAX_DELAY`: Maximum delay in milliseconds between retries (default: 10000)
103 | - `FIRECRAWL_RETRY_BACKOFF_FACTOR`: Exponential backoff multiplier (default: 2)
104 | 
105 | ##### Credit Usage Monitoring
106 | 
107 | - `FIRECRAWL_CREDIT_WARNING_THRESHOLD`: Credit usage warning threshold (default: 1000)
108 | - `FIRECRAWL_CREDIT_CRITICAL_THRESHOLD`: Credit usage critical threshold (default: 100)
109 | 
110 | ### Configuration Examples
111 | 
112 | For cloud API usage with custom retry and credit monitoring:
113 | 
114 | ```bash
115 | # Required for cloud API
116 | export FIRECRAWL_API_KEY=your-api-key
117 | 
118 | # Optional retry configuration
119 | export FIRECRAWL_RETRY_MAX_ATTEMPTS=5        # Increase max retry attempts
120 | export FIRECRAWL_RETRY_INITIAL_DELAY=2000    # Start with 2s delay
121 | export FIRECRAWL_RETRY_MAX_DELAY=30000       # Maximum 30s delay
122 | export FIRECRAWL_RETRY_BACKOFF_FACTOR=3      # More aggressive backoff
123 | 
124 | # Optional credit monitoring
125 | export FIRECRAWL_CREDIT_WARNING_THRESHOLD=2000    # Warning at 2000 credits
126 | export FIRECRAWL_CREDIT_CRITICAL_THRESHOLD=500    # Critical at 500 credits
127 | ```
128 | 
129 | For self-hosted instance:
130 | 
131 | ```bash
132 | # Required for self-hosted
133 | export FIRECRAWL_API_URL=https://firecrawl.your-domain.com
134 | 
135 | # Optional authentication for self-hosted
136 | export FIRECRAWL_API_KEY=your-api-key  # If your instance requires auth
137 | 
138 | # Custom retry configuration
139 | export FIRECRAWL_RETRY_MAX_ATTEMPTS=10
140 | export FIRECRAWL_RETRY_INITIAL_DELAY=500     # Start with faster retries
141 | ```
142 | 
143 | ### Usage with Claude Desktop
144 | 
145 | Add this to your `claude_desktop_config.json`:
146 | 
147 | ```json
148 | {
149 |   "mcpServers": {
150 |     "mcp-server-firecrawl": {
151 |       "command": "npx",
152 |       "args": ["-y", "firecrawl-mcp"],
153 |       "env": {
154 |         "FIRECRAWL_API_KEY": "YOUR_API_KEY_HERE",
155 | 
156 |         "FIRECRAWL_RETRY_MAX_ATTEMPTS": "5",
157 |         "FIRECRAWL_RETRY_INITIAL_DELAY": "2000",
158 |         "FIRECRAWL_RETRY_MAX_DELAY": "30000",
159 |         "FIRECRAWL_RETRY_BACKOFF_FACTOR": "3",
160 | 
161 |         "FIRECRAWL_CREDIT_WARNING_THRESHOLD": "2000",
162 |         "FIRECRAWL_CREDIT_CRITICAL_THRESHOLD": "500"
163 |       }
164 |     }
165 |   }
166 | }
167 | ```
168 | 
169 | ### System Configuration
170 | 
171 | The server includes several configurable parameters that can be set via environment variables. Here are the default values if not configured:
172 | 
173 | ```typescript
174 | const CONFIG = {
175 |   retry: {
176 |     maxAttempts: 3, // Number of retry attempts for rate-limited requests
177 |     initialDelay: 1000, // Initial delay before first retry (in milliseconds)
178 |     maxDelay: 10000, // Maximum delay between retries (in milliseconds)
179 |     backoffFactor: 2, // Multiplier for exponential backoff
180 |   },
181 |   credit: {
182 |     warningThreshold: 1000, // Warn when credit usage reaches this level
183 |     criticalThreshold: 100, // Critical alert when credit usage reaches this level
184 |   },
185 | };
186 | ```
187 | 
188 | These configurations control:
189 | 
190 | 1. **Retry Behavior**
191 | 
192 |    - Automatically retries failed requests due to rate limits
193 |    - Uses exponential backoff to avoid overwhelming the API
194 |    - Example: With default settings, retries will be attempted at:
195 |      - 1st retry: 1 second delay
196 |      - 2nd retry: 2 seconds delay
197 |      - 3rd retry: 4 seconds delay (capped at maxDelay)
198 | 
199 | 2. **Credit Usage Monitoring**
200 |    - Tracks API credit consumption for cloud API usage
201 |    - Provides warnings at specified thresholds
202 |    - Helps prevent unexpected service interruption
203 |    - Example: With default settings:
204 |      - Warning at 1000 credits remaining
205 |      - Critical alert at 100 credits remaining
206 | 
207 | ### Rate Limiting and Batch Processing
208 | 
209 | The server utilizes FireCrawl's built-in rate limiting and batch processing capabilities:
210 | 
211 | - Automatic rate limit handling with exponential backoff
212 | - Efficient parallel processing for batch operations
213 | - Smart request queuing and throttling
214 | - Automatic retries for transient errors
215 | 
216 | ## Available Tools
217 | 
218 | ### 1. Scrape Tool (`firecrawl_scrape`)
219 | 
220 | Scrape content from a single URL with advanced options.
221 | 
222 | ```json
223 | {
224 |   "name": "firecrawl_scrape",
225 |   "arguments": {
226 |     "url": "https://example.com",
227 |     "formats": ["markdown"],
228 |     "onlyMainContent": true,
229 |     "waitFor": 1000,
230 |     "timeout": 30000,
231 |     "mobile": false,
232 |     "includeTags": ["article", "main"],
233 |     "excludeTags": ["nav", "footer"],
234 |     "skipTlsVerification": false
235 |   }
236 | }
237 | ```
238 | 
239 | ### 2. Batch Scrape Tool (`firecrawl_batch_scrape`)
240 | 
241 | Scrape multiple URLs efficiently with built-in rate limiting and parallel processing.
242 | 
243 | ```json
244 | {
245 |   "name": "firecrawl_batch_scrape",
246 |   "arguments": {
247 |     "urls": ["https://example1.com", "https://example2.com"],
248 |     "options": {
249 |       "formats": ["markdown"],
250 |       "onlyMainContent": true
251 |     }
252 |   }
253 | }
254 | ```
255 | 
256 | Response includes operation ID for status checking:
257 | 
258 | ```json
259 | {
260 |   "content": [
261 |     {
262 |       "type": "text",
263 |       "text": "Batch operation queued with ID: batch_1. Use firecrawl_check_batch_status to check progress."
264 |     }
265 |   ],
266 |   "isError": false
267 | }
268 | ```
269 | 
270 | ### 3. Check Batch Status (`firecrawl_check_batch_status`)
271 | 
272 | Check the status of a batch operation.
273 | 
274 | ```json
275 | {
276 |   "name": "firecrawl_check_batch_status",
277 |   "arguments": {
278 |     "id": "batch_1"
279 |   }
280 | }
281 | ```
282 | 
283 | ### 4. Search Tool (`firecrawl_search`)
284 | 
285 | Search the web and optionally extract content from search results.
286 | 
287 | ```json
288 | {
289 |   "name": "firecrawl_search",
290 |   "arguments": {
291 |     "query": "your search query",
292 |     "limit": 5,
293 |     "lang": "en",
294 |     "country": "us",
295 |     "scrapeOptions": {
296 |       "formats": ["markdown"],
297 |       "onlyMainContent": true
298 |     }
299 |   }
300 | }
301 | ```
302 | 
303 | ### 5. Crawl Tool (`firecrawl_crawl`)
304 | 
305 | Start an asynchronous crawl with advanced options.
306 | 
307 | ```json
308 | {
309 |   "name": "firecrawl_crawl",
310 |   "arguments": {
311 |     "url": "https://example.com",
312 |     "maxDepth": 2,
313 |     "limit": 100,
314 |     "allowExternalLinks": false,
315 |     "deduplicateSimilarURLs": true
316 |   }
317 | }
318 | ```
319 | 
320 | ### 6. Extract Tool (`firecrawl_extract`)
321 | 
322 | Extract structured information from web pages using LLM capabilities. Supports both cloud AI and self-hosted LLM extraction.
323 | 
324 | ```json
325 | {
326 |   "name": "firecrawl_extract",
327 |   "arguments": {
328 |     "urls": ["https://example.com/page1", "https://example.com/page2"],
329 |     "prompt": "Extract product information including name, price, and description",
330 |     "systemPrompt": "You are a helpful assistant that extracts product information",
331 |     "schema": {
332 |       "type": "object",
333 |       "properties": {
334 |         "name": { "type": "string" },
335 |         "price": { "type": "number" },
336 |         "description": { "type": "string" }
337 |       },
338 |       "required": ["name", "price"]
339 |     },
340 |     "allowExternalLinks": false,
341 |     "enableWebSearch": false,
342 |     "includeSubdomains": false
343 |   }
344 | }
345 | ```
346 | 
347 | Example response:
348 | 
349 | ```json
350 | {
351 |   "content": [
352 |     {
353 |       "type": "text",
354 |       "text": {
355 |         "name": "Example Product",
356 |         "price": 99.99,
357 |         "description": "This is an example product description"
358 |       }
359 |     }
360 |   ],
361 |   "isError": false
362 | }
363 | ```
364 | 
365 | #### Extract Tool Options:
366 | 
367 | - `urls`: Array of URLs to extract information from
368 | - `prompt`: Custom prompt for the LLM extraction
369 | - `systemPrompt`: System prompt to guide the LLM
370 | - `schema`: JSON schema for structured data extraction
371 | - `allowExternalLinks`: Allow extraction from external links
372 | - `enableWebSearch`: Enable web search for additional context
373 | - `includeSubdomains`: Include subdomains in extraction
374 | 
375 | When using a self-hosted instance, the extraction will use your configured LLM. For cloud API, it uses FireCrawl's managed LLM service.
376 | 
377 | ## Logging System
378 | 
379 | The server includes comprehensive logging:
380 | 
381 | - Operation status and progress
382 | - Performance metrics
383 | - Credit usage monitoring
384 | - Rate limit tracking
385 | - Error conditions
386 | 
387 | Example log messages:
388 | 
389 | ```
390 | [INFO] FireCrawl MCP Server initialized successfully
391 | [INFO] Starting scrape for URL: https://example.com
392 | [INFO] Batch operation queued with ID: batch_1
393 | [WARNING] Credit usage has reached warning threshold
394 | [ERROR] Rate limit exceeded, retrying in 2s...
395 | ```
396 | 
397 | ## Error Handling
398 | 
399 | The server provides robust error handling:
400 | 
401 | - Automatic retries for transient errors
402 | - Rate limit handling with backoff
403 | - Detailed error messages
404 | - Credit usage warnings
405 | - Network resilience
406 | 
407 | Example error response:
408 | 
409 | ```json
410 | {
411 |   "content": [
412 |     {
413 |       "type": "text",
414 |       "text": "Error: Rate limit exceeded. Retrying in 2 seconds..."
415 |     }
416 |   ],
417 |   "isError": true
418 | }
419 | ```
420 | 
421 | ## Development
422 | 
423 | ```bash
424 | # Install dependencies
425 | npm install
426 | 
427 | # Build
428 | npm run build
429 | 
430 | # Run tests
431 | npm test
432 | ```
433 | 
434 | ### Contributing
435 | 
436 | 1. Fork the repository
437 | 2. Create your feature branch
438 | 3. Run tests: `npm test`
439 | 4. Submit a pull request
440 | 
441 | ## License
442 | 
443 | MIT License - see LICENSE file for details
444 | 


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env node
   2 | 
   3 | import { Server } from '@modelcontextprotocol/sdk/server/index.js';
   4 | import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
   5 | import {
   6 |   Tool,
   7 |   CallToolRequestSchema,
   8 |   ListToolsRequestSchema,
   9 | } from '@modelcontextprotocol/sdk/types.js';
  10 | import FirecrawlApp, {
  11 |   type ScrapeParams,
  12 |   type MapParams,
  13 |   type CrawlParams,
  14 |   type FirecrawlDocument,
  15 | } from '@mendable/firecrawl-js';
  16 | import PQueue from 'p-queue';
  17 | 
  18 | import dotenv from 'dotenv';
  19 | 
  20 | dotenv.config();
  21 | 
  22 | // Tool definitions
  23 | const SCRAPE_TOOL: Tool = {
  24 |   name: 'firecrawl_scrape',
  25 |   description:
  26 |     'Scrape a single webpage with advanced options for content extraction. ' +
  27 |     'Supports various formats including markdown, HTML, and screenshots. ' +
  28 |     'Can execute custom actions like clicking or scrolling before scraping.',
  29 |   inputSchema: {
  30 |     type: 'object',
  31 |     properties: {
  32 |       url: {
  33 |         type: 'string',
  34 |         description: 'The URL to scrape',
  35 |       },
  36 |       formats: {
  37 |         type: 'array',
  38 |         items: {
  39 |           type: 'string',
  40 |           enum: [
  41 |             'markdown',
  42 |             'html',
  43 |             'rawHtml',
  44 |             'screenshot',
  45 |             'links',
  46 |             'screenshot@fullPage',
  47 |             'extract',
  48 |           ],
  49 |         },
  50 |         description: "Content formats to extract (default: ['markdown'])",
  51 |       },
  52 |       onlyMainContent: {
  53 |         type: 'boolean',
  54 |         description:
  55 |           'Extract only the main content, filtering out navigation, footers, etc.',
  56 |       },
  57 |       includeTags: {
  58 |         type: 'array',
  59 |         items: { type: 'string' },
  60 |         description: 'HTML tags to specifically include in extraction',
  61 |       },
  62 |       excludeTags: {
  63 |         type: 'array',
  64 |         items: { type: 'string' },
  65 |         description: 'HTML tags to exclude from extraction',
  66 |       },
  67 |       waitFor: {
  68 |         type: 'number',
  69 |         description: 'Time in milliseconds to wait for dynamic content to load',
  70 |       },
  71 |       timeout: {
  72 |         type: 'number',
  73 |         description:
  74 |           'Maximum time in milliseconds to wait for the page to load',
  75 |       },
  76 |       actions: {
  77 |         type: 'array',
  78 |         items: {
  79 |           type: 'object',
  80 |           properties: {
  81 |             type: {
  82 |               type: 'string',
  83 |               enum: [
  84 |                 'wait',
  85 |                 'click',
  86 |                 'screenshot',
  87 |                 'write',
  88 |                 'press',
  89 |                 'scroll',
  90 |                 'scrape',
  91 |                 'executeJavascript',
  92 |               ],
  93 |               description: 'Type of action to perform',
  94 |             },
  95 |             selector: {
  96 |               type: 'string',
  97 |               description: 'CSS selector for the target element',
  98 |             },
  99 |             milliseconds: {
 100 |               type: 'number',
 101 |               description: 'Time to wait in milliseconds (for wait action)',
 102 |             },
 103 |             text: {
 104 |               type: 'string',
 105 |               description: 'Text to write (for write action)',
 106 |             },
 107 |             key: {
 108 |               type: 'string',
 109 |               description: 'Key to press (for press action)',
 110 |             },
 111 |             direction: {
 112 |               type: 'string',
 113 |               enum: ['up', 'down'],
 114 |               description: 'Scroll direction',
 115 |             },
 116 |             script: {
 117 |               type: 'string',
 118 |               description: 'JavaScript code to execute',
 119 |             },
 120 |             fullPage: {
 121 |               type: 'boolean',
 122 |               description: 'Take full page screenshot',
 123 |             },
 124 |           },
 125 |           required: ['type'],
 126 |         },
 127 |         description: 'List of actions to perform before scraping',
 128 |       },
 129 |       extract: {
 130 |         type: 'object',
 131 |         properties: {
 132 |           schema: {
 133 |             type: 'object',
 134 |             description: 'Schema for structured data extraction',
 135 |           },
 136 |           systemPrompt: {
 137 |             type: 'string',
 138 |             description: 'System prompt for LLM extraction',
 139 |           },
 140 |           prompt: {
 141 |             type: 'string',
 142 |             description: 'User prompt for LLM extraction',
 143 |           },
 144 |         },
 145 |         description: 'Configuration for structured data extraction',
 146 |       },
 147 |       mobile: {
 148 |         type: 'boolean',
 149 |         description: 'Use mobile viewport',
 150 |       },
 151 |       skipTlsVerification: {
 152 |         type: 'boolean',
 153 |         description: 'Skip TLS certificate verification',
 154 |       },
 155 |       removeBase64Images: {
 156 |         type: 'boolean',
 157 |         description: 'Remove base64 encoded images from output',
 158 |       },
 159 |       location: {
 160 |         type: 'object',
 161 |         properties: {
 162 |           country: {
 163 |             type: 'string',
 164 |             description: 'Country code for geolocation',
 165 |           },
 166 |           languages: {
 167 |             type: 'array',
 168 |             items: { type: 'string' },
 169 |             description: 'Language codes for content',
 170 |           },
 171 |         },
 172 |         description: 'Location settings for scraping',
 173 |       },
 174 |     },
 175 |     required: ['url'],
 176 |   },
 177 | };
 178 | 
 179 | const MAP_TOOL: Tool = {
 180 |   name: 'firecrawl_map',
 181 |   description:
 182 |     'Discover URLs from a starting point. Can use both sitemap.xml and HTML link discovery.',
 183 |   inputSchema: {
 184 |     type: 'object',
 185 |     properties: {
 186 |       url: {
 187 |         type: 'string',
 188 |         description: 'Starting URL for URL discovery',
 189 |       },
 190 |       search: {
 191 |         type: 'string',
 192 |         description: 'Optional search term to filter URLs',
 193 |       },
 194 |       ignoreSitemap: {
 195 |         type: 'boolean',
 196 |         description: 'Skip sitemap.xml discovery and only use HTML links',
 197 |       },
 198 |       sitemapOnly: {
 199 |         type: 'boolean',
 200 |         description: 'Only use sitemap.xml for discovery, ignore HTML links',
 201 |       },
 202 |       includeSubdomains: {
 203 |         type: 'boolean',
 204 |         description: 'Include URLs from subdomains in results',
 205 |       },
 206 |       limit: {
 207 |         type: 'number',
 208 |         description: 'Maximum number of URLs to return',
 209 |       },
 210 |     },
 211 |     required: ['url'],
 212 |   },
 213 | };
 214 | 
 215 | const CRAWL_TOOL: Tool = {
 216 |   name: 'firecrawl_crawl',
 217 |   description:
 218 |     'Start an asynchronous crawl of multiple pages from a starting URL. ' +
 219 |     'Supports depth control, path filtering, and webhook notifications.',
 220 |   inputSchema: {
 221 |     type: 'object',
 222 |     properties: {
 223 |       url: {
 224 |         type: 'string',
 225 |         description: 'Starting URL for the crawl',
 226 |       },
 227 |       excludePaths: {
 228 |         type: 'array',
 229 |         items: { type: 'string' },
 230 |         description: 'URL paths to exclude from crawling',
 231 |       },
 232 |       includePaths: {
 233 |         type: 'array',
 234 |         items: { type: 'string' },
 235 |         description: 'Only crawl these URL paths',
 236 |       },
 237 |       maxDepth: {
 238 |         type: 'number',
 239 |         description: 'Maximum link depth to crawl',
 240 |       },
 241 |       ignoreSitemap: {
 242 |         type: 'boolean',
 243 |         description: 'Skip sitemap.xml discovery',
 244 |       },
 245 |       limit: {
 246 |         type: 'number',
 247 |         description: 'Maximum number of pages to crawl',
 248 |       },
 249 |       allowBackwardLinks: {
 250 |         type: 'boolean',
 251 |         description: 'Allow crawling links that point to parent directories',
 252 |       },
 253 |       allowExternalLinks: {
 254 |         type: 'boolean',
 255 |         description: 'Allow crawling links to external domains',
 256 |       },
 257 |       webhook: {
 258 |         oneOf: [
 259 |           {
 260 |             type: 'string',
 261 |             description: 'Webhook URL to notify when crawl is complete',
 262 |           },
 263 |           {
 264 |             type: 'object',
 265 |             properties: {
 266 |               url: {
 267 |                 type: 'string',
 268 |                 description: 'Webhook URL',
 269 |               },
 270 |               headers: {
 271 |                 type: 'object',
 272 |                 description: 'Custom headers for webhook requests',
 273 |               },
 274 |             },
 275 |             required: ['url'],
 276 |           },
 277 |         ],
 278 |       },
 279 |       deduplicateSimilarURLs: {
 280 |         type: 'boolean',
 281 |         description: 'Remove similar URLs during crawl',
 282 |       },
 283 |       ignoreQueryParameters: {
 284 |         type: 'boolean',
 285 |         description: 'Ignore query parameters when comparing URLs',
 286 |       },
 287 |       scrapeOptions: {
 288 |         type: 'object',
 289 |         properties: {
 290 |           formats: {
 291 |             type: 'array',
 292 |             items: {
 293 |               type: 'string',
 294 |               enum: [
 295 |                 'markdown',
 296 |                 'html',
 297 |                 'rawHtml',
 298 |                 'screenshot',
 299 |                 'links',
 300 |                 'screenshot@fullPage',
 301 |                 'extract',
 302 |               ],
 303 |             },
 304 |           },
 305 |           onlyMainContent: {
 306 |             type: 'boolean',
 307 |           },
 308 |           includeTags: {
 309 |             type: 'array',
 310 |             items: { type: 'string' },
 311 |           },
 312 |           excludeTags: {
 313 |             type: 'array',
 314 |             items: { type: 'string' },
 315 |           },
 316 |           waitFor: {
 317 |             type: 'number',
 318 |           },
 319 |         },
 320 |         description: 'Options for scraping each page',
 321 |       },
 322 |     },
 323 |     required: ['url'],
 324 |   },
 325 | };
 326 | 
 327 | const BATCH_SCRAPE_TOOL: Tool = {
 328 |   name: 'firecrawl_batch_scrape',
 329 |   description:
 330 |     'Scrape multiple URLs in batch mode. Returns a job ID that can be used to check status.',
 331 |   inputSchema: {
 332 |     type: 'object',
 333 |     properties: {
 334 |       urls: {
 335 |         type: 'array',
 336 |         items: { type: 'string' },
 337 |         description: 'List of URLs to scrape',
 338 |       },
 339 |       options: {
 340 |         type: 'object',
 341 |         properties: {
 342 |           formats: {
 343 |             type: 'array',
 344 |             items: {
 345 |               type: 'string',
 346 |               enum: [
 347 |                 'markdown',
 348 |                 'html',
 349 |                 'rawHtml',
 350 |                 'screenshot',
 351 |                 'links',
 352 |                 'screenshot@fullPage',
 353 |                 'extract',
 354 |               ],
 355 |             },
 356 |           },
 357 |           onlyMainContent: {
 358 |             type: 'boolean',
 359 |           },
 360 |           includeTags: {
 361 |             type: 'array',
 362 |             items: { type: 'string' },
 363 |           },
 364 |           excludeTags: {
 365 |             type: 'array',
 366 |             items: { type: 'string' },
 367 |           },
 368 |           waitFor: {
 369 |             type: 'number',
 370 |           },
 371 |         },
 372 |       },
 373 |     },
 374 |     required: ['urls'],
 375 |   },
 376 | };
 377 | 
 378 | const CHECK_BATCH_STATUS_TOOL: Tool = {
 379 |   name: 'firecrawl_check_batch_status',
 380 |   description: 'Check the status of a batch scraping job.',
 381 |   inputSchema: {
 382 |     type: 'object',
 383 |     properties: {
 384 |       id: {
 385 |         type: 'string',
 386 |         description: 'Batch job ID to check',
 387 |       },
 388 |     },
 389 |     required: ['id'],
 390 |   },
 391 | };
 392 | 
 393 | const CHECK_CRAWL_STATUS_TOOL: Tool = {
 394 |   name: 'firecrawl_check_crawl_status',
 395 |   description: 'Check the status of a crawl job.',
 396 |   inputSchema: {
 397 |     type: 'object',
 398 |     properties: {
 399 |       id: {
 400 |         type: 'string',
 401 |         description: 'Crawl job ID to check',
 402 |       },
 403 |     },
 404 |     required: ['id'],
 405 |   },
 406 | };
 407 | 
 408 | const SEARCH_TOOL: Tool = {
 409 |   name: 'firecrawl_search',
 410 |   description:
 411 |     'Search and retrieve content from web pages with optional scraping. ' +
 412 |     'Returns SERP results by default (url, title, description) or full page content when scrapeOptions are provided.',
 413 |   inputSchema: {
 414 |     type: 'object',
 415 |     properties: {
 416 |       query: {
 417 |         type: 'string',
 418 |         description: 'Search query string',
 419 |       },
 420 |       limit: {
 421 |         type: 'number',
 422 |         description: 'Maximum number of results to return (default: 5)',
 423 |       },
 424 |       lang: {
 425 |         type: 'string',
 426 |         description: 'Language code for search results (default: en)',
 427 |       },
 428 |       country: {
 429 |         type: 'string',
 430 |         description: 'Country code for search results (default: us)',
 431 |       },
 432 |       tbs: {
 433 |         type: 'string',
 434 |         description: 'Time-based search filter',
 435 |       },
 436 |       filter: {
 437 |         type: 'string',
 438 |         description: 'Search filter',
 439 |       },
 440 |       location: {
 441 |         type: 'object',
 442 |         properties: {
 443 |           country: {
 444 |             type: 'string',
 445 |             description: 'Country code for geolocation',
 446 |           },
 447 |           languages: {
 448 |             type: 'array',
 449 |             items: { type: 'string' },
 450 |             description: 'Language codes for content',
 451 |           },
 452 |         },
 453 |         description: 'Location settings for search',
 454 |       },
 455 |       scrapeOptions: {
 456 |         type: 'object',
 457 |         properties: {
 458 |           formats: {
 459 |             type: 'array',
 460 |             items: {
 461 |               type: 'string',
 462 |               enum: ['markdown', 'html', 'rawHtml'],
 463 |             },
 464 |             description: 'Content formats to extract from search results',
 465 |           },
 466 |           onlyMainContent: {
 467 |             type: 'boolean',
 468 |             description: 'Extract only the main content from results',
 469 |           },
 470 |           waitFor: {
 471 |             type: 'number',
 472 |             description: 'Time in milliseconds to wait for dynamic content',
 473 |           },
 474 |         },
 475 |         description: 'Options for scraping search results',
 476 |       },
 477 |     },
 478 |     required: ['query'],
 479 |   },
 480 | };
 481 | 
 482 | const EXTRACT_TOOL: Tool = {
 483 |   name: 'firecrawl_extract',
 484 |   description:
 485 |     'Extract structured information from web pages using LLM. ' +
 486 |     'Supports both cloud AI and self-hosted LLM extraction.',
 487 |   inputSchema: {
 488 |     type: 'object',
 489 |     properties: {
 490 |       urls: {
 491 |         type: 'array',
 492 |         items: { type: 'string' },
 493 |         description: 'List of URLs to extract information from',
 494 |       },
 495 |       prompt: {
 496 |         type: 'string',
 497 |         description: 'Prompt for the LLM extraction',
 498 |       },
 499 |       systemPrompt: {
 500 |         type: 'string',
 501 |         description: 'System prompt for LLM extraction',
 502 |       },
 503 |       schema: {
 504 |         type: 'object',
 505 |         description: 'JSON schema for structured data extraction',
 506 |       },
 507 |       allowExternalLinks: {
 508 |         type: 'boolean',
 509 |         description: 'Allow extraction from external links',
 510 |       },
 511 |       enableWebSearch: {
 512 |         type: 'boolean',
 513 |         description: 'Enable web search for additional context',
 514 |       },
 515 |       includeSubdomains: {
 516 |         type: 'boolean',
 517 |         description: 'Include subdomains in extraction',
 518 |       },
 519 |     },
 520 |     required: ['urls'],
 521 |   },
 522 | };
 523 | 
 524 | const DEEP_RESEARCH_TOOL: Tool = {
 525 |   name: 'firecrawl_deep_research',
 526 |   description: 'Conduct deep research on a query using web crawling, search, and AI analysis.',
 527 |   inputSchema: {
 528 |     type: 'object',
 529 |     properties: {
 530 |       query: {
 531 |         type: 'string',
 532 |         description: 'The query to research',
 533 |       },
 534 |       maxDepth: {
 535 |         type: 'number',
 536 |         description: 'Maximum depth of research iterations (1-10)',
 537 |       },
 538 |       timeLimit: {
 539 |         type: 'number',
 540 |         description: 'Time limit in seconds (30-300)',
 541 |       },
 542 |       maxUrls: {
 543 |         type: 'number',
 544 |         description: 'Maximum number of URLs to analyze (1-1000)',
 545 |       }
 546 |     },
 547 |     required: ['query'],
 548 |   },
 549 | };
 550 | 
 551 | // Type definitions
 552 | interface BatchScrapeOptions {
 553 |   urls: string[];
 554 |   options?: Omit<ScrapeParams, 'url'>;
 555 | }
 556 | 
 557 | interface StatusCheckOptions {
 558 |   id: string;
 559 | }
 560 | 
 561 | interface SearchOptions {
 562 |   query: string;
 563 |   limit?: number;
 564 |   lang?: string;
 565 |   country?: string;
 566 |   tbs?: string;
 567 |   filter?: string;
 568 |   location?: {
 569 |     country?: string;
 570 |     languages?: string[];
 571 |   };
 572 |   scrapeOptions?: {
 573 |     formats?: string[];
 574 |     onlyMainContent?: boolean;
 575 |     waitFor?: number;
 576 |   };
 577 | }
 578 | 
 579 | // Add after other interfaces
 580 | interface ExtractParams<T = any> {
 581 |   prompt?: string;
 582 |   systemPrompt?: string;
 583 |   schema?: T | object;
 584 |   allowExternalLinks?: boolean;
 585 |   enableWebSearch?: boolean;
 586 |   includeSubdomains?: boolean;
 587 |   origin?: string;
 588 | }
 589 | 
 590 | interface ExtractArgs {
 591 |   urls: string[];
 592 |   prompt?: string;
 593 |   systemPrompt?: string;
 594 |   schema?: object;
 595 |   allowExternalLinks?: boolean;
 596 |   enableWebSearch?: boolean;
 597 |   includeSubdomains?: boolean;
 598 |   origin?: string;
 599 | }
 600 | 
 601 | interface ExtractResponse<T = any> {
 602 |   success: boolean;
 603 |   data: T;
 604 |   error?: string;
 605 |   warning?: string;
 606 |   creditsUsed?: number;
 607 | }
 608 | 
 609 | // Type guards
 610 | function isScrapeOptions(
 611 |   args: unknown
 612 | ): args is ScrapeParams & { url: string } {
 613 |   return (
 614 |     typeof args === 'object' &&
 615 |     args !== null &&
 616 |     'url' in args &&
 617 |     typeof (args as { url: unknown }).url === 'string'
 618 |   );
 619 | }
 620 | 
 621 | function isMapOptions(args: unknown): args is MapParams & { url: string } {
 622 |   return (
 623 |     typeof args === 'object' &&
 624 |     args !== null &&
 625 |     'url' in args &&
 626 |     typeof (args as { url: unknown }).url === 'string'
 627 |   );
 628 | }
 629 | 
 630 | function isCrawlOptions(args: unknown): args is CrawlParams & { url: string } {
 631 |   return (
 632 |     typeof args === 'object' &&
 633 |     args !== null &&
 634 |     'url' in args &&
 635 |     typeof (args as { url: unknown }).url === 'string'
 636 |   );
 637 | }
 638 | 
 639 | function isBatchScrapeOptions(args: unknown): args is BatchScrapeOptions {
 640 |   return (
 641 |     typeof args === 'object' &&
 642 |     args !== null &&
 643 |     'urls' in args &&
 644 |     Array.isArray((args as { urls: unknown }).urls) &&
 645 |     (args as { urls: unknown[] }).urls.every((url) => typeof url === 'string')
 646 |   );
 647 | }
 648 | 
 649 | function isStatusCheckOptions(args: unknown): args is StatusCheckOptions {
 650 |   return (
 651 |     typeof args === 'object' &&
 652 |     args !== null &&
 653 |     'id' in args &&
 654 |     typeof (args as { id: unknown }).id === 'string'
 655 |   );
 656 | }
 657 | 
 658 | function isSearchOptions(args: unknown): args is SearchOptions {
 659 |   return (
 660 |     typeof args === 'object' &&
 661 |     args !== null &&
 662 |     'query' in args &&
 663 |     typeof (args as { query: unknown }).query === 'string'
 664 |   );
 665 | }
 666 | 
 667 | function isExtractOptions(args: unknown): args is ExtractArgs {
 668 |   if (typeof args !== 'object' || args === null) return false;
 669 |   const { urls } = args as { urls?: unknown };
 670 |   return (
 671 |     Array.isArray(urls) &&
 672 |     urls.every((url): url is string => typeof url === 'string')
 673 |   );
 674 | }
 675 | 
 676 | // Server implementation
 677 | const server = new Server(
 678 |   {
 679 |     name: 'firecrawl-mcp',
 680 |     version: '1.3.2',
 681 |   },
 682 |   {
 683 |     capabilities: {
 684 |       tools: {},
 685 |       logging: {},
 686 |     },
 687 |   }
 688 | );
 689 | 
 690 | // Get optional API URL
 691 | const FIRECRAWL_API_URL = process.env.FIRECRAWL_API_URL;
 692 | const FIRECRAWL_API_KEY = process.env.FIRECRAWL_API_KEY;
 693 | 
 694 | // Check if API key is required (only for cloud service)
 695 | if (!FIRECRAWL_API_URL && !FIRECRAWL_API_KEY) {
 696 |   console.error(
 697 |     'Error: FIRECRAWL_API_KEY environment variable is required when using the cloud service'
 698 |   );
 699 |   process.exit(1);
 700 | }
 701 | 
 702 | // Initialize FireCrawl client with optional API URL
 703 | const client = new FirecrawlApp({
 704 |   apiKey: FIRECRAWL_API_KEY || '',
 705 |   ...(FIRECRAWL_API_URL ? { apiUrl: FIRECRAWL_API_URL } : {}),
 706 | });
 707 | 
 708 | // Configuration for retries and monitoring
 709 | const CONFIG = {
 710 |   retry: {
 711 |     maxAttempts: Number(process.env.FIRECRAWL_RETRY_MAX_ATTEMPTS) || 3,
 712 |     initialDelay: Number(process.env.FIRECRAWL_RETRY_INITIAL_DELAY) || 1000,
 713 |     maxDelay: Number(process.env.FIRECRAWL_RETRY_MAX_DELAY) || 10000,
 714 |     backoffFactor: Number(process.env.FIRECRAWL_RETRY_BACKOFF_FACTOR) || 2,
 715 |   },
 716 |   credit: {
 717 |     warningThreshold:
 718 |       Number(process.env.FIRECRAWL_CREDIT_WARNING_THRESHOLD) || 1000,
 719 |     criticalThreshold:
 720 |       Number(process.env.FIRECRAWL_CREDIT_CRITICAL_THRESHOLD) || 100,
 721 |   },
 722 | };
 723 | 
 724 | // Add credit tracking
 725 | interface CreditUsage {
 726 |   total: number;
 727 |   lastCheck: number;
 728 | }
 729 | 
 730 | const creditUsage: CreditUsage = {
 731 |   total: 0,
 732 |   lastCheck: Date.now(),
 733 | };
 734 | 
 735 | // Add utility function for delay
 736 | function delay(ms: number): Promise<void> {
 737 |   return new Promise((resolve) => setTimeout(resolve, ms));
 738 | }
 739 | 
 740 | // Add retry logic with exponential backoff
 741 | async function withRetry<T>(
 742 |   operation: () => Promise<T>,
 743 |   context: string,
 744 |   attempt = 1
 745 | ): Promise<T> {
 746 |   try {
 747 |     return await operation();
 748 |   } catch (error) {
 749 |     const isRateLimit =
 750 |       error instanceof Error &&
 751 |       (error.message.includes('rate limit') || error.message.includes('429'));
 752 | 
 753 |     if (isRateLimit && attempt < CONFIG.retry.maxAttempts) {
 754 |       const delayMs = Math.min(
 755 |         CONFIG.retry.initialDelay *
 756 |           Math.pow(CONFIG.retry.backoffFactor, attempt - 1),
 757 |         CONFIG.retry.maxDelay
 758 |       );
 759 | 
 760 |       server.sendLoggingMessage({
 761 |         level: 'warning',
 762 |         data: `Rate limit hit for ${context}. Attempt ${attempt}/${CONFIG.retry.maxAttempts}. Retrying in ${delayMs}ms`,
 763 |       });
 764 | 
 765 |       await delay(delayMs);
 766 |       return withRetry(operation, context, attempt + 1);
 767 |     }
 768 | 
 769 |     throw error;
 770 |   }
 771 | }
 772 | 
 773 | // Add credit monitoring
 774 | async function updateCreditUsage(creditsUsed: number): Promise<void> {
 775 |   creditUsage.total += creditsUsed;
 776 | 
 777 |   // Log credit usage
 778 |   server.sendLoggingMessage({
 779 |     level: 'info',
 780 |     data: `Credit usage: ${creditUsage.total} credits used total`,
 781 |   });
 782 | 
 783 |   // Check thresholds
 784 |   if (creditUsage.total >= CONFIG.credit.criticalThreshold) {
 785 |     server.sendLoggingMessage({
 786 |       level: 'error',
 787 |       data: `CRITICAL: Credit usage has reached ${creditUsage.total}`,
 788 |     });
 789 |   } else if (creditUsage.total >= CONFIG.credit.warningThreshold) {
 790 |     server.sendLoggingMessage({
 791 |       level: 'warning',
 792 |       data: `WARNING: Credit usage has reached ${creditUsage.total}`,
 793 |     });
 794 |   }
 795 | }
 796 | 
 797 | // Add before server implementation
 798 | interface QueuedBatchOperation {
 799 |   id: string;
 800 |   urls: string[];
 801 |   options?: any;
 802 |   status: 'pending' | 'processing' | 'completed' | 'failed';
 803 |   progress: {
 804 |     completed: number;
 805 |     total: number;
 806 |   };
 807 |   result?: any;
 808 |   error?: string;
 809 | }
 810 | 
 811 | // Initialize queue system
 812 | const batchQueue = new PQueue({ concurrency: 1 });
 813 | const batchOperations = new Map<string, QueuedBatchOperation>();
 814 | let operationCounter = 0;
 815 | 
 816 | async function processBatchOperation(
 817 |   operation: QueuedBatchOperation
 818 | ): Promise<void> {
 819 |   try {
 820 |     operation.status = 'processing';
 821 |     let totalCreditsUsed = 0;
 822 | 
 823 |     // Use library's built-in batch processing
 824 |     const response = await withRetry(
 825 |       async () =>
 826 |         client.asyncBatchScrapeUrls(operation.urls, operation.options),
 827 |       `batch ${operation.id} processing`
 828 |     );
 829 | 
 830 |     if (!response.success) {
 831 |       throw new Error(response.error || 'Batch operation failed');
 832 |     }
 833 | 
 834 |     // Track credits if using cloud API
 835 |     if (!FIRECRAWL_API_URL && hasCredits(response)) {
 836 |       totalCreditsUsed += response.creditsUsed;
 837 |       await updateCreditUsage(response.creditsUsed);
 838 |     }
 839 | 
 840 |     operation.status = 'completed';
 841 |     operation.result = response;
 842 | 
 843 |     // Log final credit usage for the batch
 844 |     if (!FIRECRAWL_API_URL) {
 845 |       server.sendLoggingMessage({
 846 |         level: 'info',
 847 |         data: `Batch ${operation.id} completed. Total credits used: ${totalCreditsUsed}`,
 848 |       });
 849 |     }
 850 |   } catch (error) {
 851 |     operation.status = 'failed';
 852 |     operation.error = error instanceof Error ? error.message : String(error);
 853 | 
 854 |     server.sendLoggingMessage({
 855 |       level: 'error',
 856 |       data: `Batch ${operation.id} failed: ${operation.error}`,
 857 |     });
 858 |   }
 859 | }
 860 | 
 861 | // Tool handlers
 862 | server.setRequestHandler(ListToolsRequestSchema, async () => ({
 863 |   tools: [
 864 |     SCRAPE_TOOL,
 865 |     MAP_TOOL,
 866 |     CRAWL_TOOL,
 867 |     BATCH_SCRAPE_TOOL,
 868 |     CHECK_BATCH_STATUS_TOOL,
 869 |     CHECK_CRAWL_STATUS_TOOL,
 870 |     SEARCH_TOOL,
 871 |     EXTRACT_TOOL,
 872 |     DEEP_RESEARCH_TOOL,
 873 |   ],
 874 | }));
 875 | 
 876 | server.setRequestHandler(CallToolRequestSchema, async (request) => {
 877 |   const startTime = Date.now();
 878 |   try {
 879 |     const { name, arguments: args } = request.params;
 880 | 
 881 |     // Log incoming request with timestamp
 882 |     server.sendLoggingMessage({
 883 |       level: 'info',
 884 |       data: `[${new Date().toISOString()}] Received request for tool: ${name}`,
 885 |     });
 886 | 
 887 |     if (!args) {
 888 |       throw new Error('No arguments provided');
 889 |     }
 890 | 
 891 |     switch (name) {
 892 |       case 'firecrawl_scrape': {
 893 |         if (!isScrapeOptions(args)) {
 894 |           throw new Error('Invalid arguments for firecrawl_scrape');
 895 |         }
 896 |         const { url, ...options } = args;
 897 |         try {
 898 |           const scrapeStartTime = Date.now();
 899 |           server.sendLoggingMessage({
 900 |             level: 'info',
 901 |             data: `Starting scrape for URL: ${url} with options: ${JSON.stringify(
 902 |               options
 903 |             )}`,
 904 |           });
 905 | 
 906 |           const response = await client.scrapeUrl(url, options);
 907 | 
 908 |           // Log performance metrics
 909 |           server.sendLoggingMessage({
 910 |             level: 'info',
 911 |             data: `Scrape completed in ${Date.now() - scrapeStartTime}ms`,
 912 |           });
 913 | 
 914 |           if ('success' in response && !response.success) {
 915 |             throw new Error(response.error || 'Scraping failed');
 916 |           }
 917 | 
 918 |           
 919 |           // Format content based on requested formats
 920 |           const contentParts = [];
 921 |           
 922 |           if (options.formats?.includes('markdown') && response.markdown) {
 923 |             contentParts.push(response.markdown);
 924 |           }
 925 |           if (options.formats?.includes('html') && response.html) {
 926 |             contentParts.push(response.html); 
 927 |           }
 928 |           if (options.formats?.includes('rawHtml') && response.rawHtml) {
 929 |             contentParts.push(response.rawHtml);
 930 |           }
 931 |           if (options.formats?.includes('links') && response.links) {
 932 |             contentParts.push(response.links.join('\n'));
 933 |           }
 934 |           if (options.formats?.includes('screenshot') && response.screenshot) {
 935 |             contentParts.push(response.screenshot);
 936 |           }
 937 |           if (options.formats?.includes('extract') && response.extract) {
 938 |             contentParts.push(JSON.stringify(response.extract, null, 2));
 939 |           }
 940 | 
 941 |           // Add warning to response if present
 942 |           if (response.warning) {
 943 |             server.sendLoggingMessage({
 944 |               level: 'warning', 
 945 |               data: response.warning,
 946 |             });
 947 |           }
 948 | 
 949 |           return {
 950 |             content: [
 951 |               { type: 'text', text: contentParts.join('\n\n') || 'No content available' },
 952 |             ],
 953 |             isError: false,
 954 |           };
 955 |         } catch (error) {
 956 |           const errorMessage =
 957 |             error instanceof Error ? error.message : String(error);
 958 |           return {
 959 |             content: [{ type: 'text', text: errorMessage }],
 960 |             isError: true,
 961 |           };
 962 |         }
 963 |       }
 964 | 
 965 |       case 'firecrawl_map': {
 966 |         if (!isMapOptions(args)) {
 967 |           throw new Error('Invalid arguments for firecrawl_map');
 968 |         }
 969 |         const { url, ...options } = args;
 970 |         const response = await client.mapUrl(url, options);
 971 |         if ('error' in response) {
 972 |           throw new Error(response.error);
 973 |         }
 974 |         if (!response.links) {
 975 |           throw new Error('No links received from FireCrawl API');
 976 |         }
 977 |         return {
 978 |           content: [{ type: 'text', text: response.links.join('\n') }],
 979 |           isError: false,
 980 |         };
 981 |       }
 982 | 
 983 |       case 'firecrawl_batch_scrape': {
 984 |         if (!isBatchScrapeOptions(args)) {
 985 |           throw new Error('Invalid arguments for firecrawl_batch_scrape');
 986 |         }
 987 | 
 988 |         try {
 989 |           const operationId = `batch_${++operationCounter}`;
 990 |           const operation: QueuedBatchOperation = {
 991 |             id: operationId,
 992 |             urls: args.urls,
 993 |             options: args.options,
 994 |             status: 'pending',
 995 |             progress: {
 996 |               completed: 0,
 997 |               total: args.urls.length,
 998 |             },
 999 |           };
1000 | 
1001 |           batchOperations.set(operationId, operation);
1002 | 
1003 |           // Queue the operation
1004 |           batchQueue.add(() => processBatchOperation(operation));
1005 | 
1006 |           server.sendLoggingMessage({
1007 |             level: 'info',
1008 |             data: `Queued batch operation ${operationId} with ${args.urls.length} URLs`,
1009 |           });
1010 | 
1011 |           return {
1012 |             content: [
1013 |               {
1014 |                 type: 'text',
1015 |                 text: `Batch operation queued with ID: ${operationId}. Use firecrawl_check_batch_status to check progress.`,
1016 |               },
1017 |             ],
1018 |             isError: false,
1019 |           };
1020 |         } catch (error) {
1021 |           const errorMessage =
1022 |             error instanceof Error
1023 |               ? error.message
1024 |               : `Batch operation failed: ${JSON.stringify(error)}`;
1025 |           return {
1026 |             content: [{ type: 'text', text: errorMessage }],
1027 |             isError: true,
1028 |           };
1029 |         }
1030 |       }
1031 | 
1032 |       case 'firecrawl_check_batch_status': {
1033 |         if (!isStatusCheckOptions(args)) {
1034 |           throw new Error(
1035 |             'Invalid arguments for firecrawl_check_batch_status'
1036 |           );
1037 |         }
1038 | 
1039 |         const operation = batchOperations.get(args.id);
1040 |         if (!operation) {
1041 |           return {
1042 |             content: [
1043 |               {
1044 |                 type: 'text',
1045 |                 text: `No batch operation found with ID: ${args.id}`,
1046 |               },
1047 |             ],
1048 |             isError: true,
1049 |           };
1050 |         }
1051 | 
1052 |         const status = `Batch Status:
1053 | Status: ${operation.status}
1054 | Progress: ${operation.progress.completed}/${operation.progress.total}
1055 | ${operation.error ? `Error: ${operation.error}` : ''}
1056 | ${
1057 |   operation.result
1058 |     ? `Results: ${JSON.stringify(operation.result, null, 2)}`
1059 |     : ''
1060 | }`;
1061 | 
1062 |         return {
1063 |           content: [{ type: 'text', text: status }],
1064 |           isError: false,
1065 |         };
1066 |       }
1067 | 
1068 |       case 'firecrawl_crawl': {
1069 |         if (!isCrawlOptions(args)) {
1070 |           throw new Error('Invalid arguments for firecrawl_crawl');
1071 |         }
1072 |         const { url, ...options } = args;
1073 | 
1074 |         const response = await withRetry(
1075 |           async () => client.asyncCrawlUrl(url, options),
1076 |           'crawl operation'
1077 |         );
1078 | 
1079 |         if (!response.success) {
1080 |           throw new Error(response.error);
1081 |         }
1082 | 
1083 |         // Monitor credits for cloud API
1084 |         if (!FIRECRAWL_API_URL && hasCredits(response)) {
1085 |           await updateCreditUsage(response.creditsUsed);
1086 |         }
1087 | 
1088 |         return {
1089 |           content: [
1090 |             {
1091 |               type: 'text',
1092 |               text: `Started crawl for ${url} with job ID: ${response.id}`,
1093 |             },
1094 |           ],
1095 |           isError: false,
1096 |         };
1097 |       }
1098 | 
1099 |       case 'firecrawl_check_crawl_status': {
1100 |         if (!isStatusCheckOptions(args)) {
1101 |           throw new Error(
1102 |             'Invalid arguments for firecrawl_check_crawl_status'
1103 |           );
1104 |         }
1105 |         const response = await client.checkCrawlStatus(args.id);
1106 |         if (!response.success) {
1107 |           throw new Error(response.error);
1108 |         }
1109 |         const status = `Crawl Status:
1110 | Status: ${response.status}
1111 | Progress: ${response.completed}/${response.total}
1112 | Credits Used: ${response.creditsUsed}
1113 | Expires At: ${response.expiresAt}
1114 | ${
1115 |   response.data.length > 0 ? '\nResults:\n' + formatResults(response.data) : ''
1116 | }`;
1117 |         return {
1118 |           content: [{ type: 'text', text: status }],
1119 |           isError: false,
1120 |         };
1121 |       }
1122 | 
1123 |       case 'firecrawl_search': {
1124 |         if (!isSearchOptions(args)) {
1125 |           throw new Error('Invalid arguments for firecrawl_search');
1126 |         }
1127 |         try {
1128 |           const response = await withRetry(
1129 |             async () => client.search(args.query, args),
1130 |             'search operation'
1131 |           );
1132 | 
1133 |           if (!response.success) {
1134 |             throw new Error(
1135 |               `Search failed: ${response.error || 'Unknown error'}`
1136 |             );
1137 |           }
1138 | 
1139 |           // Monitor credits for cloud API
1140 |           if (!FIRECRAWL_API_URL && hasCredits(response)) {
1141 |             await updateCreditUsage(response.creditsUsed);
1142 |           }
1143 | 
1144 |           // Format the results
1145 |           const results = response.data
1146 |             .map(
1147 |               (result) =>
1148 |                 `URL: ${result.url}
1149 | Title: ${result.title || 'No title'}
1150 | Description: ${result.description || 'No description'}
1151 | ${result.markdown ? `\nContent:\n${result.markdown}` : ''}`
1152 |             )
1153 |             .join('\n\n');
1154 | 
1155 |           return {
1156 |             content: [{ type: 'text', text: results }],
1157 |             isError: false,
1158 |           };
1159 |         } catch (error) {
1160 |           const errorMessage =
1161 |             error instanceof Error
1162 |               ? error.message
1163 |               : `Search failed: ${JSON.stringify(error)}`;
1164 |           return {
1165 |             content: [{ type: 'text', text: errorMessage }],
1166 |             isError: true,
1167 |           };
1168 |         }
1169 |       }
1170 | 
1171 |       case 'firecrawl_extract': {
1172 |         if (!isExtractOptions(args)) {
1173 |           throw new Error('Invalid arguments for firecrawl_extract');
1174 |         }
1175 | 
1176 |         try {
1177 |           const extractStartTime = Date.now();
1178 | 
1179 |           server.sendLoggingMessage({
1180 |             level: 'info',
1181 |             data: `Starting extraction for URLs: ${args.urls.join(', ')}`,
1182 |           });
1183 | 
1184 |           // Log if using self-hosted instance
1185 |           if (FIRECRAWL_API_URL) {
1186 |             server.sendLoggingMessage({
1187 |               level: 'info',
1188 |               data: 'Using self-hosted instance for extraction',
1189 |             });
1190 |           }
1191 | 
1192 |           const extractResponse = await withRetry(
1193 |             async () =>
1194 |               client.extract(args.urls, {
1195 |                 prompt: args.prompt,
1196 |                 systemPrompt: args.systemPrompt,
1197 |                 schema: args.schema,
1198 |                 allowExternalLinks: args.allowExternalLinks,
1199 |                 enableWebSearch: args.enableWebSearch,
1200 |                 includeSubdomains: args.includeSubdomains,
1201 |                 origin: 'mcp-server',
1202 |               } as ExtractParams),
1203 |             'extract operation'
1204 |           );
1205 | 
1206 |           // Type guard for successful response
1207 |           if (!('success' in extractResponse) || !extractResponse.success) {
1208 |             throw new Error(extractResponse.error || 'Extraction failed');
1209 |           }
1210 | 
1211 |           const response = extractResponse as ExtractResponse;
1212 | 
1213 |           // Monitor credits for cloud API
1214 |           if (!FIRECRAWL_API_URL && hasCredits(response)) {
1215 |             await updateCreditUsage(response.creditsUsed || 0);
1216 |           }
1217 | 
1218 |           // Log performance metrics
1219 |           server.sendLoggingMessage({
1220 |             level: 'info',
1221 |             data: `Extraction completed in ${Date.now() - extractStartTime}ms`,
1222 |           });
1223 | 
1224 |           // Add warning to response if present
1225 |           const result = {
1226 |             content: [
1227 |               {
1228 |                 type: 'text',
1229 |                 text: JSON.stringify(response.data, null, 2),
1230 |               },
1231 |             ],
1232 |             isError: false,
1233 |           };
1234 | 
1235 |           if (response.warning) {
1236 |             server.sendLoggingMessage({
1237 |               level: 'warning',
1238 |               data: response.warning,
1239 |             });
1240 |           }
1241 | 
1242 |           return result;
1243 |         } catch (error) {
1244 |           const errorMessage =
1245 |             error instanceof Error ? error.message : String(error);
1246 | 
1247 |           // Special handling for self-hosted instance errors
1248 |           if (
1249 |             FIRECRAWL_API_URL &&
1250 |             errorMessage.toLowerCase().includes('not supported')
1251 |           ) {
1252 |             server.sendLoggingMessage({
1253 |               level: 'error',
1254 |               data: 'Extraction is not supported by this self-hosted instance',
1255 |             });
1256 |             return {
1257 |               content: [
1258 |                 {
1259 |                   type: 'text',
1260 |                   text: 'Extraction is not supported by this self-hosted instance. Please ensure LLM support is configured.',
1261 |                 },
1262 |               ],
1263 |               isError: true,
1264 |             };
1265 |           }
1266 | 
1267 |           return {
1268 |             content: [{ type: 'text', text: errorMessage }],
1269 |             isError: true,
1270 |           };
1271 |         }
1272 |       }
1273 | 
1274 |       case 'firecrawl_deep_research': {
1275 |         if (!args || typeof args !== 'object' || !('query' in args)) {
1276 |           throw new Error('Invalid arguments for firecrawl_deep_research');
1277 |         }
1278 | 
1279 |         try {
1280 |           const researchStartTime = Date.now();
1281 |           server.sendLoggingMessage({
1282 |             level: 'info',
1283 |             data: `Starting deep research for query: ${args.query}`,
1284 |           });
1285 | 
1286 |           const response = await client.deepResearch(
1287 |             args.query as string,
1288 |             {
1289 |               maxDepth: args.maxDepth as number,
1290 |               timeLimit: args.timeLimit as number,
1291 |               maxUrls: args.maxUrls as number,
1292 |             },
1293 |             // Activity callback
1294 |             (activity) => {
1295 |               server.sendLoggingMessage({
1296 |                 level: 'info',
1297 |                 data: `Research activity: ${activity.message} (Depth: ${activity.depth})`,
1298 |               });
1299 |             },
1300 |             // Source callback
1301 |             (source) => {
1302 |               server.sendLoggingMessage({
1303 |                 level: 'info',
1304 |                 data: `Research source found: ${source.url}${source.title ? ` - ${source.title}` : ''}`,
1305 |               });
1306 |             }
1307 |           );
1308 | 
1309 |           // Log performance metrics
1310 |           server.sendLoggingMessage({
1311 |             level: 'info',
1312 |             data: `Deep research completed in ${Date.now() - researchStartTime}ms`,
1313 |           });
1314 | 
1315 |           if (!response.success) {
1316 |             throw new Error(response.error || 'Deep research failed');
1317 |           }
1318 | 
1319 |           // Format the results
1320 |           const formattedResponse = {
1321 |             finalAnalysis: response.data.finalAnalysis,
1322 |             activities: response.data.activities,
1323 |             sources: response.data.sources,
1324 |           };
1325 | 
1326 |           return {
1327 |             content: [{ type: 'text', text: formattedResponse.finalAnalysis }],
1328 |             isError: false,
1329 |           };
1330 |         } catch (error) {
1331 |           const errorMessage = error instanceof Error ? error.message : String(error);
1332 |           return {
1333 |             content: [{ type: 'text', text: errorMessage }],
1334 |             isError: true,
1335 |           };
1336 |         }
1337 |       }
1338 | 
1339 |       default:
1340 |         return {
1341 |           content: [{ type: 'text', text: `Unknown tool: ${name}` }],
1342 |           isError: true,
1343 |         };
1344 |     }
1345 |   } catch (error) {
1346 |     // Log detailed error information
1347 |     server.sendLoggingMessage({
1348 |       level: 'error',
1349 |       data: {
1350 |         message: `Request failed: ${
1351 |           error instanceof Error ? error.message : String(error)
1352 |         }`,
1353 |         tool: request.params.name,
1354 |         arguments: request.params.arguments,
1355 |         timestamp: new Date().toISOString(),
1356 |         duration: Date.now() - startTime,
1357 |       },
1358 |     });
1359 |     return {
1360 |       content: [
1361 |         {
1362 |           type: 'text',
1363 |           text: `Error: ${
1364 |             error instanceof Error ? error.message : String(error)
1365 |           }`,
1366 |         },
1367 |       ],
1368 |       isError: true,
1369 |     };
1370 |   } finally {
1371 |     // Log request completion with performance metrics
1372 |     server.sendLoggingMessage({
1373 |       level: 'info',
1374 |       data: `Request completed in ${Date.now() - startTime}ms`,
1375 |     });
1376 |   }
1377 | });
1378 | 
1379 | // Helper function to format results
1380 | function formatResults(data: FirecrawlDocument[]): string {
1381 |   return data
1382 |     .map((doc) => {
1383 |       const content = doc.markdown || doc.html || doc.rawHtml || 'No content';
1384 |       return `URL: ${doc.url || 'Unknown URL'}
1385 | Content: ${content.substring(0, 100)}${content.length > 100 ? '...' : ''}
1386 | ${doc.metadata?.title ? `Title: ${doc.metadata.title}` : ''}`;
1387 |     })
1388 |     .join('\n\n');
1389 | }
1390 | 
1391 | // Server startup
1392 | async function runServer() {
1393 |   try {
1394 |     console.error('Initializing FireCrawl MCP Server...');
1395 | 
1396 |     const transport = new StdioServerTransport();
1397 |     await server.connect(transport);
1398 | 
1399 |     // Now that we're connected, we can send logging messages
1400 |     server.sendLoggingMessage({
1401 |       level: 'info',
1402 |       data: 'FireCrawl MCP Server initialized successfully',
1403 |     });
1404 | 
1405 |     server.sendLoggingMessage({
1406 |       level: 'info',
1407 |       data: `Configuration: API URL: ${FIRECRAWL_API_URL || 'default'}`,
1408 |     });
1409 | 
1410 |     console.error('FireCrawl MCP Server running on stdio');
1411 |   } catch (error) {
1412 |     console.error('Fatal error running server:', error);
1413 |     process.exit(1);
1414 |   }
1415 | }
1416 | 
1417 | runServer().catch((error) => {
1418 |   console.error('Fatal error running server:', error);
1419 |   process.exit(1);
1420 | });
1421 | 
1422 | // Add type guard for credit usage
1423 | function hasCredits(response: any): response is { creditsUsed: number } {
1424 |   return 'creditsUsed' in response && typeof response.creditsUsed === 'number';
1425 | }
1426 | 


--------------------------------------------------------------------------------