├── .eslintrc.json
├── .github
    └── workflows
    │   ├── ci.yml
    │   ├── image.yml
    │   └── publish.yml
├── .gitignore
├── .prettierrc
├── CHANGELOG.md
├── Dockerfile
├── Dockerfile.service
├── LICENSE
├── README.md
├── jest.config.js
├── jest.setup.ts
├── package-lock.json
├── package.json
├── pnpm-lock.yaml
├── smithery.yaml
├── src
    ├── index.test.ts
    └── index.ts
└── tsconfig.json


/.eslintrc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "parser": "@typescript-eslint/parser",
 3 |   "plugins": ["@typescript-eslint"],
 4 |   "extends": [
 5 |     "eslint:recommended",
 6 |     "plugin:@typescript-eslint/recommended",
 7 |     "prettier"
 8 |   ],
 9 |   "env": {
10 |     "node": true,
11 |     "es2022": true
12 |   },
13 |   "parserOptions": {
14 |     "ecmaVersion": 2022,
15 |     "sourceType": "module",
16 |     "project": "./tsconfig.json"
17 |   },
18 |   "rules": {
19 |     "@typescript-eslint/explicit-function-return-type": "off",
20 |     "@typescript-eslint/no-explicit-any": "off",
21 |     "@typescript-eslint/no-unused-vars": [
22 |       "error",
23 |       { "argsIgnorePattern": "^_" }
24 |     ]
25 |   },
26 |   "overrides": [
27 |     {
28 |       "files": ["**/*.test.ts"],
29 |       "rules": {
30 |         "@typescript-eslint/no-unused-vars": "off",
31 |         "@typescript-eslint/no-explicit-any": "off"
32 |       }
33 |     }
34 |   ]
35 | }
36 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | jobs:
10 |   build:
11 |     runs-on: ubuntu-latest
12 | 
13 |     steps:
14 |       - uses: actions/checkout@v3
15 | 
16 |       - name: Use Node.js
17 |         uses: actions/setup-node@v3
18 |         with:
19 |           node-version: '20.x'
20 |           cache: 'npm'
21 | 
22 |       - name: Install dependencies
23 |         run: npm ci
24 | 
25 |       - name: Build
26 |         run: npm run build
27 | 
28 |       - name: Lint
29 |         run: npm run lint
30 | 
31 |       - name: Test
32 |         run: npm test
33 | 


--------------------------------------------------------------------------------
/.github/workflows/image.yml:
--------------------------------------------------------------------------------
 1 | name: Deploy Images to GHCR
 2 | 
 3 | env:
 4 |   DOTNET_VERSION: '6.0.x'
 5 | 
 6 | on:
 7 |   push:
 8 |     branches:
 9 |       - main
10 |   workflow_dispatch:
11 | 
12 | jobs:
13 |   push-image:
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - name: 'Checkout GitHub Action'
17 |         uses: actions/checkout@main
18 | 
19 |       - name: 'Login to GitHub Container Registry'
20 |         uses: docker/login-action@v1
21 |         with:
22 |           registry: ghcr.io
23 |           username: ${{github.actor}}
24 |           password: ${{secrets.GITHUB_TOKEN}}
25 | 
26 |       - name: 'Set up Docker Buildx'
27 |         uses: docker/setup-buildx-action@v1
28 | 
29 |       - name: 'Build Service Image'
30 |         uses: docker/build-push-action@v2
31 |         with:
32 |           context: .
33 |           file: ./Dockerfile
34 |           push: true
35 |           tags: ghcr.io/mendableai/firecrawl-mcp-server:latest
36 |           cache-from: type=registry,ref=ghcr.io/mendableai/firecrawl-mcp-server:latest
37 |           cache-to: type=inline
38 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish Package
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [created]
 6 |   workflow_dispatch:
 7 | 
 8 | jobs:
 9 |   publish:
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |       - uses: actions/checkout@v3
14 | 
15 |       - name: Use Node.js
16 |         uses: actions/setup-node@v3
17 |         with:
18 |           node-version: '20.x'
19 |           registry-url: 'https://registry.npmjs.org'
20 | 
21 |       - name: Install dependencies
22 |         run: npm ci
23 | 
24 |       - name: Build
25 |         run: npm run build
26 | 
27 |       - name: Publish to NPM
28 |         run: npm publish
29 |         env:
30 |           NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
31 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Dependencies
 2 | node_modules/
 3 | 
 4 | # Build
 5 | dist/
 6 | 
 7 | # Logs
 8 | logs
 9 | *.log
10 | npm-debug.log*
11 | 
12 | # Environment
13 | .env
14 | .env.local
15 | .env.*.local
16 | claude_desktop_config.json
17 | 
18 | # IDE
19 | .idea/
20 | .vscode/
21 | *.swp
22 | *.swo
23 | .cursorrules.md
24 | IMPLEMENTATION.md
25 | v1.2.md
26 | 
27 | # OS
28 | .DS_Store
29 | Thumbs.db 


--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 |   "semi": true,
3 |   "trailingComma": "es5",
4 |   "singleQuote": true,
5 |   "printWidth": 80,
6 |   "tabWidth": 2,
7 |   "useTabs": false
8 | }
9 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | ## [1.7.0] - 2025-03-18
  4 | 
  5 | ### Fixed
  6 | 
  7 | - Critical bugfix for stdio transport hanging issues with Python clients
  8 | - Implemented transport-aware logging that directs logs to stderr when using stdio transport
  9 | - Resolves issue #22 where Python clients would hang during initialization or tool execution
 10 | - Improves compatibility with non-JavaScript MCP clients
 11 | 
 12 | ## [1.2.4] - 2024-02-05
 13 | 
 14 | ### Added
 15 | 
 16 | - Environment variable support for all configuration options
 17 | - Detailed configuration documentation in README
 18 | 
 19 | ### Changed
 20 | 
 21 | - Made retry and credit monitoring settings configurable via environment variables:
 22 |   - `FIRECRAWL_RETRY_MAX_ATTEMPTS`
 23 |   - `FIRECRAWL_RETRY_INITIAL_DELAY`
 24 |   - `FIRECRAWL_RETRY_MAX_DELAY`
 25 |   - `FIRECRAWL_RETRY_BACKOFF_FACTOR`
 26 |   - `FIRECRAWL_CREDIT_WARNING_THRESHOLD`
 27 |   - `FIRECRAWL_CREDIT_CRITICAL_THRESHOLD`
 28 | - Enhanced configuration examples with detailed comments and use cases
 29 | - Improved documentation for retry behavior and credit monitoring
 30 | 
 31 | ### Documentation
 32 | 
 33 | - Added comprehensive configuration examples for both cloud and self-hosted setups
 34 | - Added detailed explanations of retry behavior with timing examples
 35 | - Added credit monitoring threshold explanations
 36 | - Updated Claude Desktop configuration documentation
 37 | 
 38 | ## [1.2.3] - 2024-02-05
 39 | 
 40 | ### Changed
 41 | 
 42 | - Removed redundant batch configuration to rely on Firecrawl library's built-in functionality
 43 | - Simplified batch processing logic by leveraging library's native implementation
 44 | - Optimized parallel processing and rate limiting handling
 45 | - Reduced code complexity and potential configuration conflicts
 46 | 
 47 | ### Technical
 48 | 
 49 | - Removed custom `CONFIG.batch` settings (`maxParallelOperations` and `delayBetweenRequests`)
 50 | - Simplified batch operation processing to use library's built-in batch handling
 51 | - Updated server startup logging to remove batch configuration references
 52 | - Maintained credit usage tracking and error handling functionality
 53 | 
 54 | ## [1.2.2] - 2025-02-05
 55 | 
 56 | ### Fixed
 57 | 
 58 | - Resolved unused interface warnings for ExtractParams and ExtractResponse
 59 | - Improved type safety in extract operations
 60 | - Fixed type casting issues in API responses
 61 | 
 62 | ### Changed
 63 | 
 64 | - Improved type guards for better type inference
 65 | - Enhanced error messages for configuration validation
 66 | 
 67 | ## [1.2.0] - 2024-01-03
 68 | 
 69 | ### Added
 70 | 
 71 | - Implemented automatic retries with exponential backoff for rate limits
 72 | - Added queue system for batch operations with parallel processing
 73 | - Integrated credit usage monitoring with warning thresholds
 74 | - Enhanced content validation with configurable criteria
 75 | - Added comprehensive logging system for operations and errors
 76 | - New search tool (`firecrawl_search`) for web search with content extraction
 77 | - Support for self-hosted Firecrawl instances via optional API URL configuration
 78 |   - New `FIRECRAWL_API_URL` environment variable
 79 |   - Automatic fallback to cloud API
 80 |   - Improved error messages for self-hosted instances
 81 | 
 82 | ### Changed
 83 | 
 84 | - Improved error handling for HTTP errors including 404s
 85 | - Enhanced URL validation before scraping
 86 | - Updated configuration with new retry and batch processing options
 87 | - Optimized rate limiting with automatic backoff strategy
 88 | - Improved documentation with new features and examples
 89 | - Added detailed self-hosted configuration guide
 90 | 
 91 | ### Fixed
 92 | 
 93 | - Rate limit handling in batch operations
 94 | - Error response formatting
 95 | - Type definitions for response handlers
 96 | - Test suite mock responses
 97 | - Error handling for invalid search queries
 98 | - API configuration validation
 99 | 
100 | ## [1.0.1] - 2023-12-03
101 | 
102 | ### Added
103 | 
104 | - Initial release with basic scraping functionality
105 | - Support for batch scraping
106 | - URL discovery and crawling capabilities
107 | - Rate limiting implementation
108 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Generated by https://smithery.ai. See: https://smithery.ai/docs/config#dockerfile
 2 | # Use a Node.js image as the base for building the application
 3 | FROM node:22-alpine AS builder
 4 | 
 5 | # Set the working directory inside the container
 6 | WORKDIR /app
 7 | 
 8 | # Copy package.json and package-lock.json to install dependencies
 9 | COPY package.json package-lock.json ./
10 | 
11 | # Install dependencies (ignoring scripts to prevent running the prepare script)
12 | RUN npm install --ignore-scripts
13 | 
14 | # Copy the rest of the application source code
15 | COPY . .
16 | 
17 | # Build the application using TypeScript
18 | RUN npm run build
19 | 
20 | # Use a smaller Node.js image for the final image
21 | FROM node:22-slim AS release
22 | 
23 | # Set the working directory inside the container
24 | WORKDIR /app
25 | 
26 | # Copy the built application from the builder stage
27 | COPY --from=builder /app/dist /app/dist
28 | COPY --from=builder /app/package.json /app/package.json
29 | COPY --from=builder /app/package-lock.json /app/package-lock.json
30 | 
31 | # Install only production dependencies
32 | RUN npm ci --omit=dev --ignore-scripts
33 | 
34 | # Set environment variables for API key and custom API URL if needed
35 | 
36 | 
37 | # Specify the command to run the application
38 | ENTRYPOINT ["node", "dist/index.js"]
39 | 


--------------------------------------------------------------------------------
/Dockerfile.service:
--------------------------------------------------------------------------------
 1 | FROM node:22-slim
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | COPY package.json package-lock.json* ./
 6 | 
 7 | 
 8 | COPY tsconfig.json ./
 9 | COPY src ./src
10 | RUN npm run build
11 | 
12 | ENV CLOUD_SERVICE=true
13 | ENV PORT=3000
14 | 
15 | # Expose the port
16 | EXPOSE 3000
17 | 
18 | # Run the server
19 | CMD ["node", "dist/index.js"]


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 vrknetha
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE. 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Firecrawl MCP Server
  2 | 
  3 | A Model Context Protocol (MCP) server implementation that integrates with [Firecrawl](https://github.com/mendableai/firecrawl) for web scraping capabilities.
  4 | 
  5 | > Big thanks to [@vrknetha](https://github.com/vrknetha), [@knacklabs](https://www.knacklabs.ai) for the initial implementation!
  6 | 
  7 | ## Features
  8 | 
  9 | - Web scraping, crawling, and discovery
 10 | - Search and content extraction
 11 | - Deep research and batch scraping
 12 | - Automatic retries and rate limiting
 13 | - Cloud and self-hosted support
 14 | - SSE support
 15 | 
 16 | > Play around with [our MCP Server on MCP.so's playground](https://mcp.so/playground?server=firecrawl-mcp-server) or on [Klavis AI](https://www.klavis.ai/mcp-servers).
 17 | 
 18 | ## Installation
 19 | 
 20 | ### Running with npx
 21 | 
 22 | ```bash
 23 | env FIRECRAWL_API_KEY=fc-YOUR_API_KEY npx -y firecrawl-mcp
 24 | ```
 25 | 
 26 | ### Manual Installation
 27 | 
 28 | ```bash
 29 | npm install -g firecrawl-mcp
 30 | ```
 31 | 
 32 | ### Running on Cursor
 33 | 
 34 | Configuring Cursor 🖥️
 35 | Note: Requires Cursor version 0.45.6+
 36 | For the most up-to-date configuration instructions, please refer to the official Cursor documentation on configuring MCP servers:
 37 | [Cursor MCP Server Configuration Guide](https://docs.cursor.com/context/model-context-protocol#configuring-mcp-servers)
 38 | 
 39 | To configure Firecrawl MCP in Cursor **v0.48.6**
 40 | 
 41 | 1. Open Cursor Settings
 42 | 2. Go to Features > MCP Servers
 43 | 3. Click "+ Add new global MCP server"
 44 | 4. Enter the following code:
 45 |    ```json
 46 |    {
 47 |      "mcpServers": {
 48 |        "firecrawl-mcp": {
 49 |          "command": "npx",
 50 |          "args": ["-y", "firecrawl-mcp"],
 51 |          "env": {
 52 |            "FIRECRAWL_API_KEY": "YOUR-API-KEY"
 53 |          }
 54 |        }
 55 |      }
 56 |    }
 57 |    ```
 58 |    
 59 | To configure Firecrawl MCP in Cursor **v0.45.6**
 60 | 
 61 | 1. Open Cursor Settings
 62 | 2. Go to Features > MCP Servers
 63 | 3. Click "+ Add New MCP Server"
 64 | 4. Enter the following:
 65 |    - Name: "firecrawl-mcp" (or your preferred name)
 66 |    - Type: "command"
 67 |    - Command: `env FIRECRAWL_API_KEY=your-api-key npx -y firecrawl-mcp`
 68 | 
 69 | 
 70 | 
 71 | > If you are using Windows and are running into issues, try `cmd /c "set FIRECRAWL_API_KEY=your-api-key && npx -y firecrawl-mcp"`
 72 | 
 73 | Replace `your-api-key` with your Firecrawl API key. If you don't have one yet, you can create an account and get it from https://www.firecrawl.dev/app/api-keys
 74 | 
 75 | After adding, refresh the MCP server list to see the new tools. The Composer Agent will automatically use Firecrawl MCP when appropriate, but you can explicitly request it by describing your web scraping needs. Access the Composer via Command+L (Mac), select "Agent" next to the submit button, and enter your query.
 76 | 
 77 | ### Running on Windsurf
 78 | 
 79 | Add this to your `./codeium/windsurf/model_config.json`:
 80 | 
 81 | ```json
 82 | {
 83 |   "mcpServers": {
 84 |     "mcp-server-firecrawl": {
 85 |       "command": "npx",
 86 |       "args": ["-y", "firecrawl-mcp"],
 87 |       "env": {
 88 |         "FIRECRAWL_API_KEY": "YOUR_API_KEY"
 89 |       }
 90 |     }
 91 |   }
 92 | }
 93 | ```
 94 | 
 95 | ### Running with SSE Local Mode
 96 | 
 97 | To run the server using Server-Sent Events (SSE) locally instead of the default stdio transport:
 98 | 
 99 | ```bash
100 | env SSE_LOCAL=true FIRECRAWL_API_KEY=fc-YOUR_API_KEY npx -y firecrawl-mcp
101 | ```
102 | 
103 | Use the url: http://localhost:3000/sse
104 | 
105 | ### Installing via Smithery (Legacy)
106 | 
107 | To install Firecrawl for Claude Desktop automatically via [Smithery](https://smithery.ai/server/@mendableai/mcp-server-firecrawl):
108 | 
109 | ```bash
110 | npx -y @smithery/cli install @mendableai/mcp-server-firecrawl --client claude
111 | ```
112 | 
113 | ### Running on VS Code
114 | 
115 | For one-click installation, click one of the install buttons below...
116 | 
117 | [![Install with NPX in VS Code](https://img.shields.io/badge/VS_Code-NPM-0098FF?style=flat-square&logo=visualstudiocode&logoColor=white)](https://insiders.vscode.dev/redirect/mcp/install?name=firecrawl&inputs=%5B%7B%22type%22%3A%22promptString%22%2C%22id%22%3A%22apiKey%22%2C%22description%22%3A%22Firecrawl%20API%20Key%22%2C%22password%22%3Atrue%7D%5D&config=%7B%22command%22%3A%22npx%22%2C%22args%22%3A%5B%22-y%22%2C%22firecrawl-mcp%22%5D%2C%22env%22%3A%7B%22FIRECRAWL_API_KEY%22%3A%22%24%7Binput%3AapiKey%7D%22%7D%7D) [![Install with NPX in VS Code Insiders](https://img.shields.io/badge/VS_Code_Insiders-NPM-24bfa5?style=flat-square&logo=visualstudiocode&logoColor=white)](https://insiders.vscode.dev/redirect/mcp/install?name=firecrawl&inputs=%5B%7B%22type%22%3A%22promptString%22%2C%22id%22%3A%22apiKey%22%2C%22description%22%3A%22Firecrawl%20API%20Key%22%2C%22password%22%3Atrue%7D%5D&config=%7B%22command%22%3A%22npx%22%2C%22args%22%3A%5B%22-y%22%2C%22firecrawl-mcp%22%5D%2C%22env%22%3A%7B%22FIRECRAWL_API_KEY%22%3A%22%24%7Binput%3AapiKey%7D%22%7D%7D&quality=insiders)
118 | 
119 | For manual installation, add the following JSON block to your User Settings (JSON) file in VS Code. You can do this by pressing `Ctrl + Shift + P` and typing `Preferences: Open User Settings (JSON)`.
120 | 
121 | ```json
122 | {
123 |   "mcp": {
124 |     "inputs": [
125 |       {
126 |         "type": "promptString",
127 |         "id": "apiKey",
128 |         "description": "Firecrawl API Key",
129 |         "password": true
130 |       }
131 |     ],
132 |     "servers": {
133 |       "firecrawl": {
134 |         "command": "npx",
135 |         "args": ["-y", "firecrawl-mcp"],
136 |         "env": {
137 |           "FIRECRAWL_API_KEY": "${input:apiKey}"
138 |         }
139 |       }
140 |     }
141 |   }
142 | }
143 | ```
144 | 
145 | Optionally, you can add it to a file called `.vscode/mcp.json` in your workspace. This will allow you to share the configuration with others:
146 | 
147 | ```json
148 | {
149 |   "inputs": [
150 |     {
151 |       "type": "promptString",
152 |       "id": "apiKey",
153 |       "description": "Firecrawl API Key",
154 |       "password": true
155 |     }
156 |   ],
157 |   "servers": {
158 |     "firecrawl": {
159 |       "command": "npx",
160 |       "args": ["-y", "firecrawl-mcp"],
161 |       "env": {
162 |         "FIRECRAWL_API_KEY": "${input:apiKey}"
163 |       }
164 |     }
165 |   }
166 | }
167 | ```
168 | 
169 | ## Configuration
170 | 
171 | ### Environment Variables
172 | 
173 | #### Required for Cloud API
174 | 
175 | - `FIRECRAWL_API_KEY`: Your Firecrawl API key
176 |   - Required when using cloud API (default)
177 |   - Optional when using self-hosted instance with `FIRECRAWL_API_URL`
178 | - `FIRECRAWL_API_URL` (Optional): Custom API endpoint for self-hosted instances
179 |   - Example: `https://firecrawl.your-domain.com`
180 |   - If not provided, the cloud API will be used (requires API key)
181 | 
182 | #### Optional Configuration
183 | 
184 | ##### Retry Configuration
185 | 
186 | - `FIRECRAWL_RETRY_MAX_ATTEMPTS`: Maximum number of retry attempts (default: 3)
187 | - `FIRECRAWL_RETRY_INITIAL_DELAY`: Initial delay in milliseconds before first retry (default: 1000)
188 | - `FIRECRAWL_RETRY_MAX_DELAY`: Maximum delay in milliseconds between retries (default: 10000)
189 | - `FIRECRAWL_RETRY_BACKOFF_FACTOR`: Exponential backoff multiplier (default: 2)
190 | 
191 | ##### Credit Usage Monitoring
192 | 
193 | - `FIRECRAWL_CREDIT_WARNING_THRESHOLD`: Credit usage warning threshold (default: 1000)
194 | - `FIRECRAWL_CREDIT_CRITICAL_THRESHOLD`: Credit usage critical threshold (default: 100)
195 | 
196 | ### Configuration Examples
197 | 
198 | For cloud API usage with custom retry and credit monitoring:
199 | 
200 | ```bash
201 | # Required for cloud API
202 | export FIRECRAWL_API_KEY=your-api-key
203 | 
204 | # Optional retry configuration
205 | export FIRECRAWL_RETRY_MAX_ATTEMPTS=5        # Increase max retry attempts
206 | export FIRECRAWL_RETRY_INITIAL_DELAY=2000    # Start with 2s delay
207 | export FIRECRAWL_RETRY_MAX_DELAY=30000       # Maximum 30s delay
208 | export FIRECRAWL_RETRY_BACKOFF_FACTOR=3      # More aggressive backoff
209 | 
210 | # Optional credit monitoring
211 | export FIRECRAWL_CREDIT_WARNING_THRESHOLD=2000    # Warning at 2000 credits
212 | export FIRECRAWL_CREDIT_CRITICAL_THRESHOLD=500    # Critical at 500 credits
213 | ```
214 | 
215 | For self-hosted instance:
216 | 
217 | ```bash
218 | # Required for self-hosted
219 | export FIRECRAWL_API_URL=https://firecrawl.your-domain.com
220 | 
221 | # Optional authentication for self-hosted
222 | export FIRECRAWL_API_KEY=your-api-key  # If your instance requires auth
223 | 
224 | # Custom retry configuration
225 | export FIRECRAWL_RETRY_MAX_ATTEMPTS=10
226 | export FIRECRAWL_RETRY_INITIAL_DELAY=500     # Start with faster retries
227 | ```
228 | 
229 | ### Usage with Claude Desktop
230 | 
231 | Add this to your `claude_desktop_config.json`:
232 | 
233 | ```json
234 | {
235 |   "mcpServers": {
236 |     "mcp-server-firecrawl": {
237 |       "command": "npx",
238 |       "args": ["-y", "firecrawl-mcp"],
239 |       "env": {
240 |         "FIRECRAWL_API_KEY": "YOUR_API_KEY_HERE",
241 | 
242 |         "FIRECRAWL_RETRY_MAX_ATTEMPTS": "5",
243 |         "FIRECRAWL_RETRY_INITIAL_DELAY": "2000",
244 |         "FIRECRAWL_RETRY_MAX_DELAY": "30000",
245 |         "FIRECRAWL_RETRY_BACKOFF_FACTOR": "3",
246 | 
247 |         "FIRECRAWL_CREDIT_WARNING_THRESHOLD": "2000",
248 |         "FIRECRAWL_CREDIT_CRITICAL_THRESHOLD": "500"
249 |       }
250 |     }
251 |   }
252 | }
253 | ```
254 | 
255 | ### System Configuration
256 | 
257 | The server includes several configurable parameters that can be set via environment variables. Here are the default values if not configured:
258 | 
259 | ```typescript
260 | const CONFIG = {
261 |   retry: {
262 |     maxAttempts: 3, // Number of retry attempts for rate-limited requests
263 |     initialDelay: 1000, // Initial delay before first retry (in milliseconds)
264 |     maxDelay: 10000, // Maximum delay between retries (in milliseconds)
265 |     backoffFactor: 2, // Multiplier for exponential backoff
266 |   },
267 |   credit: {
268 |     warningThreshold: 1000, // Warn when credit usage reaches this level
269 |     criticalThreshold: 100, // Critical alert when credit usage reaches this level
270 |   },
271 | };
272 | ```
273 | 
274 | These configurations control:
275 | 
276 | 1. **Retry Behavior**
277 | 
278 |    - Automatically retries failed requests due to rate limits
279 |    - Uses exponential backoff to avoid overwhelming the API
280 |    - Example: With default settings, retries will be attempted at:
281 |      - 1st retry: 1 second delay
282 |      - 2nd retry: 2 seconds delay
283 |      - 3rd retry: 4 seconds delay (capped at maxDelay)
284 | 
285 | 2. **Credit Usage Monitoring**
286 |    - Tracks API credit consumption for cloud API usage
287 |    - Provides warnings at specified thresholds
288 |    - Helps prevent unexpected service interruption
289 |    - Example: With default settings:
290 |      - Warning at 1000 credits remaining
291 |      - Critical alert at 100 credits remaining
292 | 
293 | ### Rate Limiting and Batch Processing
294 | 
295 | The server utilizes Firecrawl's built-in rate limiting and batch processing capabilities:
296 | 
297 | - Automatic rate limit handling with exponential backoff
298 | - Efficient parallel processing for batch operations
299 | - Smart request queuing and throttling
300 | - Automatic retries for transient errors
301 | 
302 | ## How to Choose a Tool
303 | 
304 | Use this guide to select the right tool for your task:
305 | 
306 | - **If you know the exact URL(s) you want:**
307 |   - For one: use **scrape**
308 |   - For many: use **batch_scrape**
309 | - **If you need to discover URLs on a site:** use **map**
310 | - **If you want to search the web for info:** use **search**
311 | - **If you want to extract structured data:** use **extract**
312 | - **If you want to analyze a whole site or section:** use **crawl** (with limits!)
313 | - **If you want to do in-depth research:** use **deep_research**
314 | - **If you want to generate LLMs.txt:** use **generate_llmstxt**
315 | 
316 | ### Quick Reference Table
317 | 
318 | | Tool                | Best for                                 | Returns         |
319 | |---------------------|------------------------------------------|-----------------|
320 | | scrape              | Single page content                      | markdown/html   |
321 | | batch_scrape        | Multiple known URLs                      | markdown/html[] |
322 | | map                 | Discovering URLs on a site               | URL[]           |
323 | | crawl               | Multi-page extraction (with limits)      | markdown/html[] |
324 | | search              | Web search for info                      | results[]       |
325 | | extract             | Structured data from pages               | JSON            |
326 | | deep_research       | In-depth, multi-source research          | summary, sources|
327 | | generate_llmstxt    | LLMs.txt for a domain                    | text            |
328 | 
329 | ## Available Tools
330 | 
331 | ### 1. Scrape Tool (`firecrawl_scrape`)
332 | 
333 | Scrape content from a single URL with advanced options.
334 | 
335 | **Best for:**
336 | - Single page content extraction, when you know exactly which page contains the information.
337 | 
338 | **Not recommended for:**
339 | - Extracting content from multiple pages (use batch_scrape for known URLs, or map + batch_scrape to discover URLs first, or crawl for full page content)
340 | - When you're unsure which page contains the information (use search)
341 | - When you need structured data (use extract)
342 | 
343 | **Common mistakes:**
344 | - Using scrape for a list of URLs (use batch_scrape instead).
345 | 
346 | **Prompt Example:**
347 | > "Get the content of the page at https://example.com."
348 | 
349 | **Usage Example:**
350 | ```json
351 | {
352 |   "name": "firecrawl_scrape",
353 |   "arguments": {
354 |     "url": "https://example.com",
355 |     "formats": ["markdown"],
356 |     "onlyMainContent": true,
357 |     "waitFor": 1000,
358 |     "timeout": 30000,
359 |     "mobile": false,
360 |     "includeTags": ["article", "main"],
361 |     "excludeTags": ["nav", "footer"],
362 |     "skipTlsVerification": false
363 |   }
364 | }
365 | ```
366 | 
367 | **Returns:**
368 | - Markdown, HTML, or other formats as specified.
369 | 
370 | ### 2. Batch Scrape Tool (`firecrawl_batch_scrape`)
371 | 
372 | Scrape multiple URLs efficiently with built-in rate limiting and parallel processing.
373 | 
374 | **Best for:**
375 | - Retrieving content from multiple pages, when you know exactly which pages to scrape.
376 | 
377 | **Not recommended for:**
378 | - Discovering URLs (use map first if you don't know the URLs)
379 | - Scraping a single page (use scrape)
380 | 
381 | **Common mistakes:**
382 | - Using batch_scrape with too many URLs at once (may hit rate limits or token overflow)
383 | 
384 | **Prompt Example:**
385 | > "Get the content of these three blog posts: [url1, url2, url3]."
386 | 
387 | **Usage Example:**
388 | ```json
389 | {
390 |   "name": "firecrawl_batch_scrape",
391 |   "arguments": {
392 |     "urls": ["https://example1.com", "https://example2.com"],
393 |     "options": {
394 |       "formats": ["markdown"],
395 |       "onlyMainContent": true
396 |     }
397 |   }
398 | }
399 | ```
400 | 
401 | **Returns:**
402 | - Response includes operation ID for status checking:
403 | 
404 | ```json
405 | {
406 |   "content": [
407 |     {
408 |       "type": "text",
409 |       "text": "Batch operation queued with ID: batch_1. Use firecrawl_check_batch_status to check progress."
410 |     }
411 |   ],
412 |   "isError": false
413 | }
414 | ```
415 | 
416 | ### 3. Check Batch Status (`firecrawl_check_batch_status`)
417 | 
418 | Check the status of a batch operation.
419 | 
420 | ```json
421 | {
422 |   "name": "firecrawl_check_batch_status",
423 |   "arguments": {
424 |     "id": "batch_1"
425 |   }
426 | }
427 | ```
428 | 
429 | ### 4. Map Tool (`firecrawl_map`)
430 | 
431 | Map a website to discover all indexed URLs on the site.
432 | 
433 | **Best for:**
434 | - Discovering URLs on a website before deciding what to scrape
435 | - Finding specific sections of a website
436 | 
437 | **Not recommended for:**
438 | - When you already know which specific URL you need (use scrape or batch_scrape)
439 | - When you need the content of the pages (use scrape after mapping)
440 | 
441 | **Common mistakes:**
442 | - Using crawl to discover URLs instead of map
443 | 
444 | **Prompt Example:**
445 | > "List all URLs on example.com."
446 | 
447 | **Usage Example:**
448 | ```json
449 | {
450 |   "name": "firecrawl_map",
451 |   "arguments": {
452 |     "url": "https://example.com"
453 |   }
454 | }
455 | ```
456 | 
457 | **Returns:**
458 | - Array of URLs found on the site
459 | 
460 | ### 5. Search Tool (`firecrawl_search`)
461 | 
462 | Search the web and optionally extract content from search results.
463 | 
464 | **Best for:**
465 | - Finding specific information across multiple websites, when you don't know which website has the information.
466 | - When you need the most relevant content for a query
467 | 
468 | **Not recommended for:**
469 | - When you already know which website to scrape (use scrape)
470 | - When you need comprehensive coverage of a single website (use map or crawl)
471 | 
472 | **Common mistakes:**
473 | - Using crawl or map for open-ended questions (use search instead)
474 | 
475 | **Usage Example:**
476 | ```json
477 | {
478 |   "name": "firecrawl_search",
479 |   "arguments": {
480 |     "query": "latest AI research papers 2023",
481 |     "limit": 5,
482 |     "lang": "en",
483 |     "country": "us",
484 |     "scrapeOptions": {
485 |       "formats": ["markdown"],
486 |       "onlyMainContent": true
487 |     }
488 |   }
489 | }
490 | ```
491 | 
492 | **Returns:**
493 | - Array of search results (with optional scraped content)
494 | 
495 | **Prompt Example:**
496 | > "Find the latest research papers on AI published in 2023."
497 | 
498 | ### 6. Crawl Tool (`firecrawl_crawl`)
499 | 
500 | Starts an asynchronous crawl job on a website and extract content from all pages.
501 | 
502 | **Best for:**
503 | - Extracting content from multiple related pages, when you need comprehensive coverage.
504 | 
505 | **Not recommended for:**
506 | - Extracting content from a single page (use scrape)
507 | - When token limits are a concern (use map + batch_scrape)
508 | - When you need fast results (crawling can be slow)
509 | 
510 | **Warning:** Crawl responses can be very large and may exceed token limits. Limit the crawl depth and number of pages, or use map + batch_scrape for better control.
511 | 
512 | **Common mistakes:**
513 | - Setting limit or maxDepth too high (causes token overflow)
514 | - Using crawl for a single page (use scrape instead)
515 | 
516 | **Prompt Example:**
517 | > "Get all blog posts from the first two levels of example.com/blog."
518 | 
519 | **Usage Example:**
520 | ```json
521 | {
522 |   "name": "firecrawl_crawl",
523 |   "arguments": {
524 |     "url": "https://example.com/blog/*",
525 |     "maxDepth": 2,
526 |     "limit": 100,
527 |     "allowExternalLinks": false,
528 |     "deduplicateSimilarURLs": true
529 |   }
530 | }
531 | ```
532 | 
533 | **Returns:**
534 | - Response includes operation ID for status checking:
535 | 
536 | ```json
537 | {
538 |   "content": [
539 |     {
540 |       "type": "text",
541 |       "text": "Started crawl for: https://example.com/* with job ID: 550e8400-e29b-41d4-a716-446655440000. Use firecrawl_check_crawl_status to check progress."
542 |     }
543 |   ],
544 |   "isError": false
545 | }
546 | ```
547 | 
548 | ### 7. Check Crawl Status (`firecrawl_check_crawl_status`)
549 | 
550 | Check the status of a crawl job.
551 | 
552 | ```json
553 | {
554 |   "name": "firecrawl_check_crawl_status",
555 |   "arguments": {
556 |     "id": "550e8400-e29b-41d4-a716-446655440000"
557 |   }
558 | }
559 | ```
560 | 
561 | **Returns:**
562 | - Response includes the status of the crawl job:
563 |   
564 | ### 8. Extract Tool (`firecrawl_extract`)
565 | 
566 | Extract structured information from web pages using LLM capabilities. Supports both cloud AI and self-hosted LLM extraction.
567 | 
568 | **Best for:**
569 | - Extracting specific structured data like prices, names, details.
570 | 
571 | **Not recommended for:**
572 | - When you need the full content of a page (use scrape)
573 | - When you're not looking for specific structured data
574 | 
575 | **Arguments:**
576 | - `urls`: Array of URLs to extract information from
577 | - `prompt`: Custom prompt for the LLM extraction
578 | - `systemPrompt`: System prompt to guide the LLM
579 | - `schema`: JSON schema for structured data extraction
580 | - `allowExternalLinks`: Allow extraction from external links
581 | - `enableWebSearch`: Enable web search for additional context
582 | - `includeSubdomains`: Include subdomains in extraction
583 | 
584 | When using a self-hosted instance, the extraction will use your configured LLM. For cloud API, it uses Firecrawl's managed LLM service.
585 | **Prompt Example:**
586 | > "Extract the product name, price, and description from these product pages."
587 | 
588 | **Usage Example:**
589 | ```json
590 | {
591 |   "name": "firecrawl_extract",
592 |   "arguments": {
593 |     "urls": ["https://example.com/page1", "https://example.com/page2"],
594 |     "prompt": "Extract product information including name, price, and description",
595 |     "systemPrompt": "You are a helpful assistant that extracts product information",
596 |     "schema": {
597 |       "type": "object",
598 |       "properties": {
599 |         "name": { "type": "string" },
600 |         "price": { "type": "number" },
601 |         "description": { "type": "string" }
602 |       },
603 |       "required": ["name", "price"]
604 |     },
605 |     "allowExternalLinks": false,
606 |     "enableWebSearch": false,
607 |     "includeSubdomains": false
608 |   }
609 | }
610 | ```
611 | 
612 | **Returns:**
613 | - Extracted structured data as defined by your schema
614 | 
615 | ```json
616 | {
617 |   "content": [
618 |     {
619 |       "type": "text",
620 |       "text": {
621 |         "name": "Example Product",
622 |         "price": 99.99,
623 |         "description": "This is an example product description"
624 |       }
625 |     }
626 |   ],
627 |   "isError": false
628 | }
629 | ```
630 | 
631 | ### 9. Deep Research Tool (`firecrawl_deep_research`)
632 | 
633 | Conduct deep web research on a query using intelligent crawling, search, and LLM analysis.
634 | 
635 | **Best for:**
636 | - Complex research questions requiring multiple sources, in-depth analysis.
637 | 
638 | **Not recommended for:**
639 | - Simple questions that can be answered with a single search
640 | - When you need very specific information from a known page (use scrape)
641 | - When you need results quickly (deep research can take time)
642 | 
643 | **Arguments:**
644 | - query (string, required): The research question or topic to explore.
645 | - maxDepth (number, optional): Maximum recursive depth for crawling/search (default: 3).
646 | - timeLimit (number, optional): Time limit in seconds for the research session (default: 120).
647 | - maxUrls (number, optional): Maximum number of URLs to analyze (default: 50).
648 | 
649 | **Prompt Example:**
650 | > "Research the environmental impact of electric vehicles versus gasoline vehicles."
651 | 
652 | **Usage Example:**
653 | ```json
654 | {
655 |   "name": "firecrawl_deep_research",
656 |   "arguments": {
657 |     "query": "What are the environmental impacts of electric vehicles compared to gasoline vehicles?",
658 |     "maxDepth": 3,
659 |     "timeLimit": 120,
660 |     "maxUrls": 50
661 |   }
662 | }
663 | ```
664 | 
665 | **Returns:**
666 | - Final analysis generated by an LLM based on research. (data.finalAnalysis)
667 | - May also include structured activities and sources used in the research process.
668 | 
669 | ### 10. Generate LLMs.txt Tool (`firecrawl_generate_llmstxt`)
670 | 
671 | Generate a standardized llms.txt (and optionally llms-full.txt) file for a given domain. This file defines how large language models should interact 
672 | with the site.
673 | 
674 | **Best for:**
675 | - Creating machine-readable permission guidelines for AI models.
676 | 
677 | **Not recommended for:**
678 | - General content extraction or research
679 | 
680 | **Arguments:**
681 | - url (string, required): The base URL of the website to analyze.
682 | - maxUrls (number, optional): Max number of URLs to include (default: 10).
683 | - showFullText (boolean, optional): Whether to include llms-full.txt contents in the response.
684 | 
685 | **Prompt Example:**
686 | > "Generate an LLMs.txt file for example.com."
687 | 
688 | **Usage Example:**
689 | ```json
690 | {
691 |   "name": "firecrawl_generate_llmstxt",
692 |   "arguments": {
693 |     "url": "https://example.com",
694 |     "maxUrls": 20,
695 |     "showFullText": true
696 |   }
697 | }
698 | ```
699 | 
700 | **Returns:**
701 | - LLMs.txt file contents (and optionally llms-full.txt)
702 | 
703 | ## Logging System
704 | 
705 | The server includes comprehensive logging:
706 | 
707 | - Operation status and progress
708 | - Performance metrics
709 | - Credit usage monitoring
710 | - Rate limit tracking
711 | - Error conditions
712 | 
713 | Example log messages:
714 | 
715 | ```
716 | [INFO] Firecrawl MCP Server initialized successfully
717 | [INFO] Starting scrape for URL: https://example.com
718 | [INFO] Batch operation queued with ID: batch_1
719 | [WARNING] Credit usage has reached warning threshold
720 | [ERROR] Rate limit exceeded, retrying in 2s...
721 | ```
722 | 
723 | ## Error Handling
724 | 
725 | The server provides robust error handling:
726 | 
727 | - Automatic retries for transient errors
728 | - Rate limit handling with backoff
729 | - Detailed error messages
730 | - Credit usage warnings
731 | - Network resilience
732 | 
733 | Example error response:
734 | 
735 | ```json
736 | {
737 |   "content": [
738 |     {
739 |       "type": "text",
740 |       "text": "Error: Rate limit exceeded. Retrying in 2 seconds..."
741 |     }
742 |   ],
743 |   "isError": true
744 | }
745 | ```
746 | 
747 | ## Development
748 | 
749 | ```bash
750 | # Install dependencies
751 | npm install
752 | 
753 | # Build
754 | npm run build
755 | 
756 | # Run tests
757 | npm test
758 | ```
759 | 
760 | ### Contributing
761 | 
762 | 1. Fork the repository
763 | 2. Create your feature branch
764 | 3. Run tests: `npm test`
765 | 4. Submit a pull request
766 | 
767 | ### Thanks to contributors
768 | 
769 | Thanks to [@vrknetha](https://github.com/vrknetha), [@cawstudios](https://caw.tech) for the initial implementation!
770 | 
771 | Thanks to MCP.so and Klavis AI for hosting and [@gstarwd](https://github.com/gstarwd), [@xiangkaiz](https://github.com/xiangkaiz) and [@zihaolin96](https://github.com/zihaolin96) for integrating our server.
772 | 
773 | ## License
774 | 
775 | MIT License - see LICENSE file for details
776 | 


--------------------------------------------------------------------------------
/jest.config.js:
--------------------------------------------------------------------------------
 1 | export default {
 2 |   preset: 'ts-jest/presets/default-esm',
 3 |   testEnvironment: 'node',
 4 |   extensionsToTreatAsEsm: ['.ts'],
 5 |   transform: {
 6 |     '^.+\\.tsx?$': [
 7 |       'ts-jest',
 8 |       {
 9 |         useESM: true,
10 |       },
11 |     ],
12 |   },
13 |   moduleNameMapper: {
14 |     '^(\\.{1,2}/.*)\\.js$': '$1',
15 |   },
16 |   testMatch: ['**/*.test.ts'],
17 |   setupFilesAfterEnv: ['<rootDir>/jest.setup.ts'],
18 | };
19 | 


--------------------------------------------------------------------------------
/jest.setup.ts:
--------------------------------------------------------------------------------
 1 | import { jest } from '@jest/globals';
 2 | import FirecrawlApp from '@mendable/firecrawl-js';
 3 | import type {
 4 |   SearchResponse,
 5 |   BatchScrapeResponse,
 6 |   BatchScrapeStatusResponse,
 7 |   FirecrawlDocument,
 8 | } from '@mendable/firecrawl-js';
 9 | 
10 | // Set test timeout
11 | jest.setTimeout(30000);
12 | 
13 | // Create mock responses
14 | const mockSearchResponse: SearchResponse = {
15 |   success: true,
16 |   data: [
17 |     {
18 |       url: 'https://example.com',
19 |       title: 'Test Page',
20 |       description: 'Test Description',
21 |       markdown: '# Test Content',
22 |       actions: null as never,
23 |     },
24 |   ] as FirecrawlDocument<undefined, never>[],
25 | };
26 | 
27 | const mockBatchScrapeResponse: BatchScrapeResponse = {
28 |   success: true,
29 |   id: 'test-batch-id',
30 | };
31 | 
32 | const mockBatchStatusResponse: BatchScrapeStatusResponse = {
33 |   success: true,
34 |   status: 'completed',
35 |   completed: 1,
36 |   total: 1,
37 |   creditsUsed: 1,
38 |   expiresAt: new Date(),
39 |   data: [
40 |     {
41 |       url: 'https://example.com',
42 |       title: 'Test Page',
43 |       description: 'Test Description',
44 |       markdown: '# Test Content',
45 |       actions: null as never,
46 |     },
47 |   ] as FirecrawlDocument<undefined, never>[],
48 | };
49 | 
50 | // Create mock instance methods
51 | const mockSearch = jest.fn().mockImplementation(async () => mockSearchResponse);
52 | const mockAsyncBatchScrapeUrls = jest
53 |   .fn()
54 |   .mockImplementation(async () => mockBatchScrapeResponse);
55 | const mockCheckBatchScrapeStatus = jest
56 |   .fn()
57 |   .mockImplementation(async () => mockBatchStatusResponse);
58 | 
59 | // Create mock instance
60 | const mockInstance = {
61 |   apiKey: 'test-api-key',
62 |   apiUrl: 'test-api-url',
63 |   search: mockSearch,
64 |   asyncBatchScrapeUrls: mockAsyncBatchScrapeUrls,
65 |   checkBatchScrapeStatus: mockCheckBatchScrapeStatus,
66 | };
67 | 
68 | // Mock the module
69 | jest.mock('@mendable/firecrawl-js', () => ({
70 |   __esModule: true,
71 |   default: jest.fn().mockImplementation(() => mockInstance),
72 | }));
73 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "firecrawl-mcp",
 3 |   "version": "1.9.0",
 4 |   "description": "MCP server for Firecrawl web scraping integration. Supports both cloud and self-hosted instances. Features include web scraping, batch processing, structured data extraction, and LLM-powered content analysis.",
 5 |   "type": "module",
 6 |   "bin": {
 7 |     "firecrawl-mcp": "dist/index.js"
 8 |   },
 9 |   "files": [
10 |     "dist"
11 |   ],
12 |   "publishConfig": {
13 |     "access": "public"
14 |   },
15 |   "scripts": {
16 |     "build": "tsc && node -e \"require('fs').chmodSync('dist/index.js', '755')\"",
17 |     "test": "node --experimental-vm-modules node_modules/jest/bin/jest.js",
18 |     "start": "node dist/index.js",
19 |     "lint": "eslint src/**/*.ts",
20 |     "lint:fix": "eslint src/**/*.ts --fix",
21 |     "format": "prettier --write .",
22 |     "prepare": "npm run build",
23 |     "publish": "npm run build && npm publish"
24 |   },
25 |   "license": "MIT",
26 |   "dependencies": {
27 |     "@mendable/firecrawl-js": "^1.19.0",
28 |     "@modelcontextprotocol/sdk": "^1.4.1",
29 |     "dotenv": "^16.4.7",
30 |     "express": "^5.1.0",
31 |     "shx": "^0.3.4",
32 |     "ws": "^8.18.1"
33 |   },
34 |   "devDependencies": {
35 |     "@jest/globals": "^29.7.0",
36 |     "@types/express": "^5.0.1",
37 |     "@types/jest": "^29.5.14",
38 |     "@types/node": "^20.10.5",
39 |     "@typescript-eslint/eslint-plugin": "^7.0.0",
40 |     "@typescript-eslint/parser": "^7.0.0",
41 |     "eslint": "^8.56.0",
42 |     "eslint-config-prettier": "^9.1.0",
43 |     "jest": "^29.7.0",
44 |     "jest-mock-extended": "^4.0.0-beta1",
45 |     "prettier": "^3.1.1",
46 |     "ts-jest": "^29.1.1",
47 |     "typescript": "^5.3.3"
48 |   },
49 |   "engines": {
50 |     "node": ">=18.0.0"
51 |   },
52 |   "keywords": [
53 |     "mcp",
54 |     "firecrawl",
55 |     "web-scraping",
56 |     "crawler",
57 |     "content-extraction"
58 |   ],
59 |   "repository": {
60 |     "type": "git",
61 |     "url": "git+https://github.com/mendableai/firecrawl-mcp-server.git"
62 |   },
63 |   "author": "vrknetha",
64 |   "bugs": {
65 |     "url": "https://github.com/mendableai/firecrawl-mcp-server/issues"
66 |   },
67 |   "homepage": "https://github.com/mendableai/firecrawl-mcp-server#readme"
68 | }
69 | 


--------------------------------------------------------------------------------
/smithery.yaml:
--------------------------------------------------------------------------------
 1 | # Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml
 2 | 
 3 | startCommand:
 4 |   type: stdio
 5 |   configSchema:
 6 |     # JSON Schema defining the configuration options for the MCP.
 7 |     type: object
 8 |     required:
 9 |       - fireCrawlApiKey
10 |     properties:
11 |       fireCrawlApiKey:
12 |         type: string
13 |         description: Your Firecrawl API key. Required for cloud API usage.
14 |       fireCrawlApiUrl:
15 |         type: string
16 |         description:
17 |           Custom API endpoint for self-hosted instances. If provided, API key
18 |           becomes optional.
19 |   commandFunction:
20 |     # A function that produces the CLI command to start the MCP on stdio.
21 |     |-
22 |     (config) => ({ command: 'node', args: ['dist/index.js'], env: { FIRECRAWL_API_KEY: config.fireCrawlApiKey, FIRECRAWL_API_URL: config.fireCrawlApiUrl || '' } })
23 | 


--------------------------------------------------------------------------------
/src/index.test.ts:
--------------------------------------------------------------------------------
  1 | import { Server } from '@modelcontextprotocol/sdk/server/index.js';
  2 | import { CallToolRequestSchema } from '@modelcontextprotocol/sdk/types.js';
  3 | import FirecrawlApp from '@mendable/firecrawl-js';
  4 | import type {
  5 |   SearchResponse,
  6 |   BatchScrapeResponse,
  7 |   BatchScrapeStatusResponse,
  8 |   CrawlResponse,
  9 |   CrawlStatusResponse,
 10 |   ScrapeResponse,
 11 |   FirecrawlDocument,
 12 |   SearchParams,
 13 | } from '@mendable/firecrawl-js';
 14 | import {
 15 |   describe,
 16 |   expect,
 17 |   jest,
 18 |   test,
 19 |   beforeEach,
 20 |   afterEach,
 21 | } from '@jest/globals';
 22 | import { mock, MockProxy } from 'jest-mock-extended';
 23 | 
 24 | // Mock FirecrawlApp
 25 | jest.mock('@mendable/firecrawl-js');
 26 | 
 27 | // Test interfaces
 28 | interface RequestParams {
 29 |   method: string;
 30 |   params: {
 31 |     name: string;
 32 |     arguments?: Record<string, any>;
 33 |   };
 34 | }
 35 | 
 36 | interface BatchScrapeArgs {
 37 |   urls: string[];
 38 |   options?: {
 39 |     formats?: string[];
 40 |     [key: string]: any;
 41 |   };
 42 | }
 43 | 
 44 | interface StatusCheckArgs {
 45 |   id: string;
 46 | }
 47 | 
 48 | interface SearchArgs {
 49 |   query: string;
 50 |   scrapeOptions?: {
 51 |     formats?: string[];
 52 |     onlyMainContent?: boolean;
 53 |   };
 54 | }
 55 | 
 56 | interface ScrapeArgs {
 57 |   url: string;
 58 |   formats?: string[];
 59 |   onlyMainContent?: boolean;
 60 | }
 61 | 
 62 | interface CrawlArgs {
 63 |   url: string;
 64 |   maxDepth?: number;
 65 |   limit?: number;
 66 | }
 67 | 
 68 | // Mock client interface
 69 | interface MockFirecrawlClient {
 70 |   scrapeUrl(url: string, options?: any): Promise<ScrapeResponse>;
 71 |   search(query: string, params?: SearchParams): Promise<SearchResponse>;
 72 |   asyncBatchScrapeUrls(
 73 |     urls: string[],
 74 |     options?: any
 75 |   ): Promise<BatchScrapeResponse>;
 76 |   checkBatchScrapeStatus(id: string): Promise<BatchScrapeStatusResponse>;
 77 |   asyncCrawlUrl(url: string, options?: any): Promise<CrawlResponse>;
 78 |   checkCrawlStatus(id: string): Promise<CrawlStatusResponse>;
 79 |   mapUrl(url: string, options?: any): Promise<{ links: string[] }>;
 80 | }
 81 | 
 82 | describe('Firecrawl Tool Tests', () => {
 83 |   let mockClient: MockProxy<MockFirecrawlClient>;
 84 |   let requestHandler: (request: RequestParams) => Promise<any>;
 85 | 
 86 |   beforeEach(() => {
 87 |     jest.clearAllMocks();
 88 |     mockClient = mock<MockFirecrawlClient>();
 89 | 
 90 |     // Set up mock implementations
 91 |     const mockInstance = new FirecrawlApp({ apiKey: 'test' });
 92 |     Object.assign(mockInstance, mockClient);
 93 | 
 94 |     // Create request handler
 95 |     requestHandler = async (request: RequestParams) => {
 96 |       const { name, arguments: args } = request.params;
 97 |       if (!args) {
 98 |         throw new Error('No arguments provided');
 99 |       }
100 |       return handleRequest(name, args, mockClient);
101 |     };
102 |   });
103 | 
104 |   afterEach(() => {
105 |     jest.clearAllMocks();
106 |   });
107 | 
108 |   // Test scrape functionality
109 |   test('should handle scrape request', async () => {
110 |     const url = 'https://example.com';
111 |     const options = { formats: ['markdown'] };
112 | 
113 |     const mockResponse: ScrapeResponse = {
114 |       success: true,
115 |       markdown: '# Test Content',
116 |       html: undefined,
117 |       rawHtml: undefined,
118 |       url: 'https://example.com',
119 |       actions: undefined as never,
120 |     };
121 | 
122 |     mockClient.scrapeUrl.mockResolvedValueOnce(mockResponse);
123 | 
124 |     const response = await requestHandler({
125 |       method: 'call_tool',
126 |       params: {
127 |         name: 'firecrawl_scrape',
128 |         arguments: { url, ...options },
129 |       },
130 |     });
131 | 
132 |     expect(response).toEqual({
133 |       content: [{ type: 'text', text: '# Test Content' }],
134 |       isError: false,
135 |     });
136 |     expect(mockClient.scrapeUrl).toHaveBeenCalledWith(url, {
137 |       formats: ['markdown'],
138 |       url,
139 |     });
140 |   });
141 | 
142 |   // Test batch scrape functionality
143 |   test('should handle batch scrape request', async () => {
144 |     const urls = ['https://example.com'];
145 |     const options = { formats: ['markdown'] };
146 | 
147 |     mockClient.asyncBatchScrapeUrls.mockResolvedValueOnce({
148 |       success: true,
149 |       id: 'test-batch-id',
150 |     });
151 | 
152 |     const response = await requestHandler({
153 |       method: 'call_tool',
154 |       params: {
155 |         name: 'firecrawl_batch_scrape',
156 |         arguments: { urls, options },
157 |       },
158 |     });
159 | 
160 |     expect(response.content[0].text).toContain(
161 |       'Batch operation queued with ID: batch_'
162 |     );
163 |     expect(mockClient.asyncBatchScrapeUrls).toHaveBeenCalledWith(urls, options);
164 |   });
165 | 
166 |   // Test search functionality
167 |   test('should handle search request', async () => {
168 |     const query = 'test query';
169 |     const scrapeOptions = { formats: ['markdown'] };
170 | 
171 |     const mockSearchResponse: SearchResponse = {
172 |       success: true,
173 |       data: [
174 |         {
175 |           url: 'https://example.com',
176 |           title: 'Test Page',
177 |           description: 'Test Description',
178 |           markdown: '# Test Content',
179 |           actions: undefined as never,
180 |         },
181 |       ],
182 |     };
183 | 
184 |     mockClient.search.mockResolvedValueOnce(mockSearchResponse);
185 | 
186 |     const response = await requestHandler({
187 |       method: 'call_tool',
188 |       params: {
189 |         name: 'firecrawl_search',
190 |         arguments: { query, scrapeOptions },
191 |       },
192 |     });
193 | 
194 |     expect(response.isError).toBe(false);
195 |     expect(response.content[0].text).toContain('Test Page');
196 |     expect(mockClient.search).toHaveBeenCalledWith(query, scrapeOptions);
197 |   });
198 | 
199 |   // Test crawl functionality
200 |   test('should handle crawl request', async () => {
201 |     const url = 'https://example.com';
202 |     const options = { maxDepth: 2 };
203 | 
204 |     mockClient.asyncCrawlUrl.mockResolvedValueOnce({
205 |       success: true,
206 |       id: 'test-crawl-id',
207 |     });
208 | 
209 |     const response = await requestHandler({
210 |       method: 'call_tool',
211 |       params: {
212 |         name: 'firecrawl_crawl',
213 |         arguments: { url, ...options },
214 |       },
215 |     });
216 | 
217 |     expect(response.isError).toBe(false);
218 |     expect(response.content[0].text).toContain('test-crawl-id');
219 |     expect(mockClient.asyncCrawlUrl).toHaveBeenCalledWith(url, {
220 |       maxDepth: 2,
221 |       url,
222 |     });
223 |   });
224 | 
225 |   // Test error handling
226 |   test('should handle API errors', async () => {
227 |     const url = 'https://example.com';
228 | 
229 |     mockClient.scrapeUrl.mockRejectedValueOnce(new Error('API Error'));
230 | 
231 |     const response = await requestHandler({
232 |       method: 'call_tool',
233 |       params: {
234 |         name: 'firecrawl_scrape',
235 |         arguments: { url },
236 |       },
237 |     });
238 | 
239 |     expect(response.isError).toBe(true);
240 |     expect(response.content[0].text).toContain('API Error');
241 |   });
242 | 
243 |   // Test rate limiting
244 |   test('should handle rate limits', async () => {
245 |     const url = 'https://example.com';
246 | 
247 |     // Mock rate limit error
248 |     mockClient.scrapeUrl.mockRejectedValueOnce(
249 |       new Error('rate limit exceeded')
250 |     );
251 | 
252 |     const response = await requestHandler({
253 |       method: 'call_tool',
254 |       params: {
255 |         name: 'firecrawl_scrape',
256 |         arguments: { url },
257 |       },
258 |     });
259 | 
260 |     expect(response.isError).toBe(true);
261 |     expect(response.content[0].text).toContain('rate limit exceeded');
262 |   });
263 | });
264 | 
265 | // Helper function to simulate request handling
266 | async function handleRequest(
267 |   name: string,
268 |   args: any,
269 |   client: MockFirecrawlClient
270 | ) {
271 |   try {
272 |     switch (name) {
273 |       case 'firecrawl_scrape': {
274 |         const response = await client.scrapeUrl(args.url, args);
275 |         if (!response.success) {
276 |           throw new Error(response.error || 'Scraping failed');
277 |         }
278 |         return {
279 |           content: [
280 |             { type: 'text', text: response.markdown || 'No content available' },
281 |           ],
282 |           isError: false,
283 |         };
284 |       }
285 | 
286 |       case 'firecrawl_batch_scrape': {
287 |         const response = await client.asyncBatchScrapeUrls(
288 |           args.urls,
289 |           args.options
290 |         );
291 |         return {
292 |           content: [
293 |             {
294 |               type: 'text',
295 |               text: `Batch operation queued with ID: batch_1. Use firecrawl_check_batch_status to check progress.`,
296 |             },
297 |           ],
298 |           isError: false,
299 |         };
300 |       }
301 | 
302 |       case 'firecrawl_search': {
303 |         const response = await client.search(args.query, args.scrapeOptions);
304 |         if (!response.success) {
305 |           throw new Error(response.error || 'Search failed');
306 |         }
307 |         const results = response.data
308 |           .map(
309 |             (result) =>
310 |               `URL: ${result.url}\nTitle: ${
311 |                 result.title || 'No title'
312 |               }\nDescription: ${result.description || 'No description'}\n${
313 |                 result.markdown ? `\nContent:\n${result.markdown}` : ''
314 |               }`
315 |           )
316 |           .join('\n\n');
317 |         return {
318 |           content: [{ type: 'text', text: results }],
319 |           isError: false,
320 |         };
321 |       }
322 | 
323 |       case 'firecrawl_crawl': {
324 |         const response = await client.asyncCrawlUrl(args.url, args);
325 |         if (!response.success) {
326 |           throw new Error(response.error);
327 |         }
328 |         return {
329 |           content: [
330 |             {
331 |               type: 'text',
332 |               text: `Started crawl for ${args.url} with job ID: ${response.id}`,
333 |             },
334 |           ],
335 |           isError: false,
336 |         };
337 |       }
338 | 
339 |       default:
340 |         throw new Error(`Unknown tool: ${name}`);
341 |     }
342 |   } catch (error) {
343 |     return {
344 |       content: [
345 |         {
346 |           type: 'text',
347 |           text: error instanceof Error ? error.message : String(error),
348 |         },
349 |       ],
350 |       isError: true,
351 |     };
352 |   }
353 | }
354 | 


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env node
   2 | 
   3 | import { Server } from '@modelcontextprotocol/sdk/server/index.js';
   4 | import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
   5 | import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js';
   6 | import {
   7 |   Tool,
   8 |   CallToolRequestSchema,
   9 |   ListToolsRequestSchema,
  10 | } from '@modelcontextprotocol/sdk/types.js';
  11 | import FirecrawlApp, {
  12 |   type ScrapeParams,
  13 |   type MapParams,
  14 |   type CrawlParams,
  15 |   type FirecrawlDocument,
  16 | } from '@mendable/firecrawl-js';
  17 | 
  18 | import express, { Request, Response } from 'express';
  19 | import dotenv from 'dotenv';
  20 | 
  21 | dotenv.config();
  22 | 
  23 | // Tool definitions
  24 | const SCRAPE_TOOL: Tool = {
  25 |   name: 'firecrawl_scrape',
  26 |   description: `
  27 | Scrape content from a single URL with advanced options.
  28 | 
  29 | **Best for:** Single page content extraction, when you know exactly which page contains the information.
  30 | **Not recommended for:** Multiple pages (use batch_scrape), unknown page (use search), structured data (use extract).
  31 | **Common mistakes:** Using scrape for a list of URLs (use batch_scrape instead).
  32 | **Prompt Example:** "Get the content of the page at https://example.com."
  33 | **Usage Example:**
  34 | \`\`\`json
  35 | {
  36 |   "name": "firecrawl_scrape",
  37 |   "arguments": {
  38 |     "url": "https://example.com",
  39 |     "formats": ["markdown"]
  40 |   }
  41 | }
  42 | \`\`\`
  43 | **Returns:** Markdown, HTML, or other formats as specified.
  44 | `,
  45 |   inputSchema: {
  46 |     type: 'object',
  47 |     properties: {
  48 |       url: {
  49 |         type: 'string',
  50 |         description: 'The URL to scrape',
  51 |       },
  52 |       formats: {
  53 |         type: 'array',
  54 |         items: {
  55 |           type: 'string',
  56 |           enum: [
  57 |             'markdown',
  58 |             'html',
  59 |             'rawHtml',
  60 |             'screenshot',
  61 |             'links',
  62 |             'screenshot@fullPage',
  63 |             'extract',
  64 |           ],
  65 |         },
  66 |         default: ['markdown'],
  67 |         description: "Content formats to extract (default: ['markdown'])",
  68 |       },
  69 |       onlyMainContent: {
  70 |         type: 'boolean',
  71 |         description:
  72 |           'Extract only the main content, filtering out navigation, footers, etc.',
  73 |       },
  74 |       includeTags: {
  75 |         type: 'array',
  76 |         items: { type: 'string' },
  77 |         description: 'HTML tags to specifically include in extraction',
  78 |       },
  79 |       excludeTags: {
  80 |         type: 'array',
  81 |         items: { type: 'string' },
  82 |         description: 'HTML tags to exclude from extraction',
  83 |       },
  84 |       waitFor: {
  85 |         type: 'number',
  86 |         description: 'Time in milliseconds to wait for dynamic content to load',
  87 |       },
  88 |       timeout: {
  89 |         type: 'number',
  90 |         description:
  91 |           'Maximum time in milliseconds to wait for the page to load',
  92 |       },
  93 |       actions: {
  94 |         type: 'array',
  95 |         items: {
  96 |           type: 'object',
  97 |           properties: {
  98 |             type: {
  99 |               type: 'string',
 100 |               enum: [
 101 |                 'wait',
 102 |                 'click',
 103 |                 'screenshot',
 104 |                 'write',
 105 |                 'press',
 106 |                 'scroll',
 107 |                 'scrape',
 108 |                 'executeJavascript',
 109 |               ],
 110 |               description: 'Type of action to perform',
 111 |             },
 112 |             selector: {
 113 |               type: 'string',
 114 |               description: 'CSS selector for the target element',
 115 |             },
 116 |             milliseconds: {
 117 |               type: 'number',
 118 |               description: 'Time to wait in milliseconds (for wait action)',
 119 |             },
 120 |             text: {
 121 |               type: 'string',
 122 |               description: 'Text to write (for write action)',
 123 |             },
 124 |             key: {
 125 |               type: 'string',
 126 |               description: 'Key to press (for press action)',
 127 |             },
 128 |             direction: {
 129 |               type: 'string',
 130 |               enum: ['up', 'down'],
 131 |               description: 'Scroll direction',
 132 |             },
 133 |             script: {
 134 |               type: 'string',
 135 |               description: 'JavaScript code to execute',
 136 |             },
 137 |             fullPage: {
 138 |               type: 'boolean',
 139 |               description: 'Take full page screenshot',
 140 |             },
 141 |           },
 142 |           required: ['type'],
 143 |         },
 144 |         description: 'List of actions to perform before scraping',
 145 |       },
 146 |       extract: {
 147 |         type: 'object',
 148 |         properties: {
 149 |           schema: {
 150 |             type: 'object',
 151 |             description: 'Schema for structured data extraction',
 152 |           },
 153 |           systemPrompt: {
 154 |             type: 'string',
 155 |             description: 'System prompt for LLM extraction',
 156 |           },
 157 |           prompt: {
 158 |             type: 'string',
 159 |             description: 'User prompt for LLM extraction',
 160 |           },
 161 |         },
 162 |         description: 'Configuration for structured data extraction',
 163 |       },
 164 |       mobile: {
 165 |         type: 'boolean',
 166 |         description: 'Use mobile viewport',
 167 |       },
 168 |       skipTlsVerification: {
 169 |         type: 'boolean',
 170 |         description: 'Skip TLS certificate verification',
 171 |       },
 172 |       removeBase64Images: {
 173 |         type: 'boolean',
 174 |         description: 'Remove base64 encoded images from output',
 175 |       },
 176 |       location: {
 177 |         type: 'object',
 178 |         properties: {
 179 |           country: {
 180 |             type: 'string',
 181 |             description: 'Country code for geolocation',
 182 |           },
 183 |           languages: {
 184 |             type: 'array',
 185 |             items: { type: 'string' },
 186 |             description: 'Language codes for content',
 187 |           },
 188 |         },
 189 |         description: 'Location settings for scraping',
 190 |       },
 191 |     },
 192 |     required: ['url'],
 193 |   },
 194 | };
 195 | 
 196 | const MAP_TOOL: Tool = {
 197 |   name: 'firecrawl_map',
 198 |   description: `
 199 | Map a website to discover all indexed URLs on the site.
 200 | 
 201 | **Best for:** Discovering URLs on a website before deciding what to scrape; finding specific sections of a website.
 202 | **Not recommended for:** When you already know which specific URL you need (use scrape or batch_scrape); when you need the content of the pages (use scrape after mapping).
 203 | **Common mistakes:** Using crawl to discover URLs instead of map.
 204 | **Prompt Example:** "List all URLs on example.com."
 205 | **Usage Example:**
 206 | \`\`\`json
 207 | {
 208 |   "name": "firecrawl_map",
 209 |   "arguments": {
 210 |     "url": "https://example.com"
 211 |   }
 212 | }
 213 | \`\`\`
 214 | **Returns:** Array of URLs found on the site.
 215 | `,
 216 |   inputSchema: {
 217 |     type: 'object',
 218 |     properties: {
 219 |       url: {
 220 |         type: 'string',
 221 |         description: 'Starting URL for URL discovery',
 222 |       },
 223 |       search: {
 224 |         type: 'string',
 225 |         description: 'Optional search term to filter URLs',
 226 |       },
 227 |       ignoreSitemap: {
 228 |         type: 'boolean',
 229 |         description: 'Skip sitemap.xml discovery and only use HTML links',
 230 |       },
 231 |       sitemapOnly: {
 232 |         type: 'boolean',
 233 |         description: 'Only use sitemap.xml for discovery, ignore HTML links',
 234 |       },
 235 |       includeSubdomains: {
 236 |         type: 'boolean',
 237 |         description: 'Include URLs from subdomains in results',
 238 |       },
 239 |       limit: {
 240 |         type: 'number',
 241 |         description: 'Maximum number of URLs to return',
 242 |       },
 243 |     },
 244 |     required: ['url'],
 245 |   },
 246 | };
 247 | 
 248 | const CRAWL_TOOL: Tool = {
 249 |   name: 'firecrawl_crawl',
 250 |   description: `
 251 | Starts an asynchronous crawl job on a website and extracts content from all pages.
 252 | 
 253 | **Best for:** Extracting content from multiple related pages, when you need comprehensive coverage.
 254 | **Not recommended for:** Extracting content from a single page (use scrape); when token limits are a concern (use map + batch_scrape); when you need fast results (crawling can be slow).
 255 | **Warning:** Crawl responses can be very large and may exceed token limits. Limit the crawl depth and number of pages, or use map + batch_scrape for better control.
 256 | **Common mistakes:** Setting limit or maxDepth too high (causes token overflow); using crawl for a single page (use scrape instead).
 257 | **Prompt Example:** "Get all blog posts from the first two levels of example.com/blog."
 258 | **Usage Example:**
 259 | \`\`\`json
 260 | {
 261 |   "name": "firecrawl_crawl",
 262 |   "arguments": {
 263 |     "url": "https://example.com/blog/*",
 264 |     "maxDepth": 2,
 265 |     "limit": 100,
 266 |     "allowExternalLinks": false,
 267 |     "deduplicateSimilarURLs": true
 268 |   }
 269 | }
 270 | \`\`\`
 271 | **Returns:** Operation ID for status checking; use firecrawl_check_crawl_status to check progress.
 272 | `,
 273 |   inputSchema: {
 274 |     type: 'object',
 275 |     properties: {
 276 |       url: {
 277 |         type: 'string',
 278 |         description: 'Starting URL for the crawl',
 279 |       },
 280 |       excludePaths: {
 281 |         type: 'array',
 282 |         items: { type: 'string' },
 283 |         description: 'URL paths to exclude from crawling',
 284 |       },
 285 |       includePaths: {
 286 |         type: 'array',
 287 |         items: { type: 'string' },
 288 |         description: 'Only crawl these URL paths',
 289 |       },
 290 |       maxDepth: {
 291 |         type: 'number',
 292 |         description: 'Maximum link depth to crawl',
 293 |       },
 294 |       ignoreSitemap: {
 295 |         type: 'boolean',
 296 |         description: 'Skip sitemap.xml discovery',
 297 |       },
 298 |       limit: {
 299 |         type: 'number',
 300 |         description: 'Maximum number of pages to crawl',
 301 |       },
 302 |       allowBackwardLinks: {
 303 |         type: 'boolean',
 304 |         description: 'Allow crawling links that point to parent directories',
 305 |       },
 306 |       allowExternalLinks: {
 307 |         type: 'boolean',
 308 |         description: 'Allow crawling links to external domains',
 309 |       },
 310 |       webhook: {
 311 |         oneOf: [
 312 |           {
 313 |             type: 'string',
 314 |             description: 'Webhook URL to notify when crawl is complete',
 315 |           },
 316 |           {
 317 |             type: 'object',
 318 |             properties: {
 319 |               url: {
 320 |                 type: 'string',
 321 |                 description: 'Webhook URL',
 322 |               },
 323 |               headers: {
 324 |                 type: 'object',
 325 |                 description: 'Custom headers for webhook requests',
 326 |               },
 327 |             },
 328 |             required: ['url'],
 329 |           },
 330 |         ],
 331 |       },
 332 |       deduplicateSimilarURLs: {
 333 |         type: 'boolean',
 334 |         description: 'Remove similar URLs during crawl',
 335 |       },
 336 |       ignoreQueryParameters: {
 337 |         type: 'boolean',
 338 |         description: 'Ignore query parameters when comparing URLs',
 339 |       },
 340 |       scrapeOptions: {
 341 |         type: 'object',
 342 |         properties: {
 343 |           formats: {
 344 |             type: 'array',
 345 |             items: {
 346 |               type: 'string',
 347 |               enum: [
 348 |                 'markdown',
 349 |                 'html',
 350 |                 'rawHtml',
 351 |                 'screenshot',
 352 |                 'links',
 353 |                 'screenshot@fullPage',
 354 |                 'extract',
 355 |               ],
 356 |             },
 357 |           },
 358 |           onlyMainContent: {
 359 |             type: 'boolean',
 360 |           },
 361 |           includeTags: {
 362 |             type: 'array',
 363 |             items: { type: 'string' },
 364 |           },
 365 |           excludeTags: {
 366 |             type: 'array',
 367 |             items: { type: 'string' },
 368 |           },
 369 |           waitFor: {
 370 |             type: 'number',
 371 |           },
 372 |         },
 373 |         description: 'Options for scraping each page',
 374 |       },
 375 |     },
 376 |     required: ['url'],
 377 |   },
 378 | };
 379 | 
 380 | const CHECK_CRAWL_STATUS_TOOL: Tool = {
 381 |   name: 'firecrawl_check_crawl_status',
 382 |   description: `
 383 | Check the status of a crawl job.
 384 | 
 385 | **Usage Example:**
 386 | \`\`\`json
 387 | {
 388 |   "name": "firecrawl_check_crawl_status",
 389 |   "arguments": {
 390 |     "id": "550e8400-e29b-41d4-a716-446655440000"
 391 |   }
 392 | }
 393 | \`\`\`
 394 | **Returns:** Status and progress of the crawl job, including results if available.
 395 | `,
 396 |   inputSchema: {
 397 |     type: 'object',
 398 |     properties: {
 399 |       id: {
 400 |         type: 'string',
 401 |         description: 'Crawl job ID to check',
 402 |       },
 403 |     },
 404 |     required: ['id'],
 405 |   },
 406 | };
 407 | 
 408 | const SEARCH_TOOL: Tool = {
 409 |   name: 'firecrawl_search',
 410 |   description: `
 411 | Search the web and optionally extract content from search results.
 412 | 
 413 | **Best for:** Finding specific information across multiple websites, when you don't know which website has the information; when you need the most relevant content for a query.
 414 | **Not recommended for:** When you already know which website to scrape (use scrape); when you need comprehensive coverage of a single website (use map or crawl).
 415 | **Common mistakes:** Using crawl or map for open-ended questions (use search instead).
 416 | **Prompt Example:** "Find the latest research papers on AI published in 2023."
 417 | **Usage Example:**
 418 | \`\`\`json
 419 | {
 420 |   "name": "firecrawl_search",
 421 |   "arguments": {
 422 |     "query": "latest AI research papers 2023",
 423 |     "limit": 5,
 424 |     "lang": "en",
 425 |     "country": "us",
 426 |     "scrapeOptions": {
 427 |       "formats": ["markdown"],
 428 |       "onlyMainContent": true
 429 |     }
 430 |   }
 431 | }
 432 | \`\`\`
 433 | **Returns:** Array of search results (with optional scraped content).
 434 | `,
 435 |   inputSchema: {
 436 |     type: 'object',
 437 |     properties: {
 438 |       query: {
 439 |         type: 'string',
 440 |         description: 'Search query string',
 441 |       },
 442 |       limit: {
 443 |         type: 'number',
 444 |         description: 'Maximum number of results to return (default: 5)',
 445 |       },
 446 |       lang: {
 447 |         type: 'string',
 448 |         description: 'Language code for search results (default: en)',
 449 |       },
 450 |       country: {
 451 |         type: 'string',
 452 |         description: 'Country code for search results (default: us)',
 453 |       },
 454 |       tbs: {
 455 |         type: 'string',
 456 |         description: 'Time-based search filter',
 457 |       },
 458 |       filter: {
 459 |         type: 'string',
 460 |         description: 'Search filter',
 461 |       },
 462 |       location: {
 463 |         type: 'object',
 464 |         properties: {
 465 |           country: {
 466 |             type: 'string',
 467 |             description: 'Country code for geolocation',
 468 |           },
 469 |           languages: {
 470 |             type: 'array',
 471 |             items: { type: 'string' },
 472 |             description: 'Language codes for content',
 473 |           },
 474 |         },
 475 |         description: 'Location settings for search',
 476 |       },
 477 |       scrapeOptions: {
 478 |         type: 'object',
 479 |         properties: {
 480 |           formats: {
 481 |             type: 'array',
 482 |             items: {
 483 |               type: 'string',
 484 |               enum: ['markdown', 'html', 'rawHtml'],
 485 |             },
 486 |             description: 'Content formats to extract from search results',
 487 |           },
 488 |           onlyMainContent: {
 489 |             type: 'boolean',
 490 |             description: 'Extract only the main content from results',
 491 |           },
 492 |           waitFor: {
 493 |             type: 'number',
 494 |             description: 'Time in milliseconds to wait for dynamic content',
 495 |           },
 496 |         },
 497 |         description: 'Options for scraping search results',
 498 |       },
 499 |     },
 500 |     required: ['query'],
 501 |   },
 502 | };
 503 | 
 504 | const EXTRACT_TOOL: Tool = {
 505 |   name: 'firecrawl_extract',
 506 |   description: `
 507 | Extract structured information from web pages using LLM capabilities. Supports both cloud AI and self-hosted LLM extraction.
 508 | 
 509 | **Best for:** Extracting specific structured data like prices, names, details.
 510 | **Not recommended for:** When you need the full content of a page (use scrape); when you're not looking for specific structured data.
 511 | **Arguments:**
 512 | - urls: Array of URLs to extract information from
 513 | - prompt: Custom prompt for the LLM extraction
 514 | - systemPrompt: System prompt to guide the LLM
 515 | - schema: JSON schema for structured data extraction
 516 | - allowExternalLinks: Allow extraction from external links
 517 | - enableWebSearch: Enable web search for additional context
 518 | - includeSubdomains: Include subdomains in extraction
 519 | **Prompt Example:** "Extract the product name, price, and description from these product pages."
 520 | **Usage Example:**
 521 | \`\`\`json
 522 | {
 523 |   "name": "firecrawl_extract",
 524 |   "arguments": {
 525 |     "urls": ["https://example.com/page1", "https://example.com/page2"],
 526 |     "prompt": "Extract product information including name, price, and description",
 527 |     "systemPrompt": "You are a helpful assistant that extracts product information",
 528 |     "schema": {
 529 |       "type": "object",
 530 |       "properties": {
 531 |         "name": { "type": "string" },
 532 |         "price": { "type": "number" },
 533 |         "description": { "type": "string" }
 534 |       },
 535 |       "required": ["name", "price"]
 536 |     },
 537 |     "allowExternalLinks": false,
 538 |     "enableWebSearch": false,
 539 |     "includeSubdomains": false
 540 |   }
 541 | }
 542 | \`\`\`
 543 | **Returns:** Extracted structured data as defined by your schema.
 544 | `,
 545 |   inputSchema: {
 546 |     type: 'object',
 547 |     properties: {
 548 |       urls: {
 549 |         type: 'array',
 550 |         items: { type: 'string' },
 551 |         description: 'List of URLs to extract information from',
 552 |       },
 553 |       prompt: {
 554 |         type: 'string',
 555 |         description: 'Prompt for the LLM extraction',
 556 |       },
 557 |       systemPrompt: {
 558 |         type: 'string',
 559 |         description: 'System prompt for LLM extraction',
 560 |       },
 561 |       schema: {
 562 |         type: 'object',
 563 |         description: 'JSON schema for structured data extraction',
 564 |       },
 565 |       allowExternalLinks: {
 566 |         type: 'boolean',
 567 |         description: 'Allow extraction from external links',
 568 |       },
 569 |       enableWebSearch: {
 570 |         type: 'boolean',
 571 |         description: 'Enable web search for additional context',
 572 |       },
 573 |       includeSubdomains: {
 574 |         type: 'boolean',
 575 |         description: 'Include subdomains in extraction',
 576 |       },
 577 |     },
 578 |     required: ['urls'],
 579 |   },
 580 | };
 581 | 
 582 | const DEEP_RESEARCH_TOOL: Tool = {
 583 |   name: 'firecrawl_deep_research',
 584 |   description: `
 585 | Conduct deep web research on a query using intelligent crawling, search, and LLM analysis.
 586 | 
 587 | **Best for:** Complex research questions requiring multiple sources, in-depth analysis.
 588 | **Not recommended for:** Simple questions that can be answered with a single search; when you need very specific information from a known page (use scrape); when you need results quickly (deep research can take time).
 589 | **Arguments:**
 590 | - query (string, required): The research question or topic to explore.
 591 | - maxDepth (number, optional): Maximum recursive depth for crawling/search (default: 3).
 592 | - timeLimit (number, optional): Time limit in seconds for the research session (default: 120).
 593 | - maxUrls (number, optional): Maximum number of URLs to analyze (default: 50).
 594 | **Prompt Example:** "Research the environmental impact of electric vehicles versus gasoline vehicles."
 595 | **Usage Example:**
 596 | \`\`\`json
 597 | {
 598 |   "name": "firecrawl_deep_research",
 599 |   "arguments": {
 600 |     "query": "What are the environmental impacts of electric vehicles compared to gasoline vehicles?",
 601 |     "maxDepth": 3,
 602 |     "timeLimit": 120,
 603 |     "maxUrls": 50
 604 |   }
 605 | }
 606 | \`\`\`
 607 | **Returns:** Final analysis generated by an LLM based on research. (data.finalAnalysis); may also include structured activities and sources used in the research process.
 608 | `,
 609 |   inputSchema: {
 610 |     type: 'object',
 611 |     properties: {
 612 |       query: {
 613 |         type: 'string',
 614 |         description: 'The query to research',
 615 |       },
 616 |       maxDepth: {
 617 |         type: 'number',
 618 |         description: 'Maximum depth of research iterations (1-10)',
 619 |       },
 620 |       timeLimit: {
 621 |         type: 'number',
 622 |         description: 'Time limit in seconds (30-300)',
 623 |       },
 624 |       maxUrls: {
 625 |         type: 'number',
 626 |         description: 'Maximum number of URLs to analyze (1-1000)',
 627 |       },
 628 |     },
 629 |     required: ['query'],
 630 |   },
 631 | };
 632 | 
 633 | const GENERATE_LLMSTXT_TOOL: Tool = {
 634 |   name: 'firecrawl_generate_llmstxt',
 635 |   description: `
 636 | Generate a standardized llms.txt (and optionally llms-full.txt) file for a given domain. This file defines how large language models should interact with the site.
 637 | 
 638 | **Best for:** Creating machine-readable permission guidelines for AI models.
 639 | **Not recommended for:** General content extraction or research.
 640 | **Arguments:**
 641 | - url (string, required): The base URL of the website to analyze.
 642 | - maxUrls (number, optional): Max number of URLs to include (default: 10).
 643 | - showFullText (boolean, optional): Whether to include llms-full.txt contents in the response.
 644 | **Prompt Example:** "Generate an LLMs.txt file for example.com."
 645 | **Usage Example:**
 646 | \`\`\`json
 647 | {
 648 |   "name": "firecrawl_generate_llmstxt",
 649 |   "arguments": {
 650 |     "url": "https://example.com",
 651 |     "maxUrls": 20,
 652 |     "showFullText": true
 653 |   }
 654 | }
 655 | \`\`\`
 656 | **Returns:** LLMs.txt file contents (and optionally llms-full.txt).
 657 | `,
 658 |   inputSchema: {
 659 |     type: 'object',
 660 |     properties: {
 661 |       url: {
 662 |         type: 'string',
 663 |         description: 'The URL to generate LLMs.txt from',
 664 |       },
 665 |       maxUrls: {
 666 |         type: 'number',
 667 |         description: 'Maximum number of URLs to process (1-100, default: 10)',
 668 |       },
 669 |       showFullText: {
 670 |         type: 'boolean',
 671 |         description: 'Whether to show the full LLMs-full.txt in the response',
 672 |       },
 673 |     },
 674 |     required: ['url'],
 675 |   },
 676 | };
 677 | 
 678 | /**
 679 |  * Parameters for LLMs.txt generation operations.
 680 |  */
 681 | interface GenerateLLMsTextParams {
 682 |   /**
 683 |    * Maximum number of URLs to process (1-100)
 684 |    * @default 10
 685 |    */
 686 |   maxUrls?: number;
 687 |   /**
 688 |    * Whether to show the full LLMs-full.txt in the response
 689 |    * @default false
 690 |    */
 691 |   showFullText?: boolean;
 692 |   /**
 693 |    * Experimental flag for streaming
 694 |    */
 695 |   __experimental_stream?: boolean;
 696 | }
 697 | 
 698 | /**
 699 |  * Response interface for LLMs.txt generation operations.
 700 |  */
 701 | // interface GenerateLLMsTextResponse {
 702 | //   success: boolean;
 703 | //   id: string;
 704 | // }
 705 | 
 706 | /**
 707 |  * Status response interface for LLMs.txt generation operations.
 708 |  */
 709 | // interface GenerateLLMsTextStatusResponse {
 710 | //   success: boolean;
 711 | //   data: {
 712 | //     llmstxt: string;
 713 | //     llmsfulltxt?: string;
 714 | //   };
 715 | //   status: 'processing' | 'completed' | 'failed';
 716 | //   error?: string;
 717 | //   expiresAt: string;
 718 | // }
 719 | 
 720 | interface StatusCheckOptions {
 721 |   id: string;
 722 | }
 723 | 
 724 | interface SearchOptions {
 725 |   query: string;
 726 |   limit?: number;
 727 |   lang?: string;
 728 |   country?: string;
 729 |   tbs?: string;
 730 |   filter?: string;
 731 |   location?: {
 732 |     country?: string;
 733 |     languages?: string[];
 734 |   };
 735 |   scrapeOptions?: {
 736 |     formats?: string[];
 737 |     onlyMainContent?: boolean;
 738 |     waitFor?: number;
 739 |     includeTags?: string[];
 740 |     excludeTags?: string[];
 741 |     timeout?: number;
 742 |   };
 743 | }
 744 | 
 745 | // Add after other interfaces
 746 | interface ExtractParams<T = any> {
 747 |   prompt?: string;
 748 |   systemPrompt?: string;
 749 |   schema?: T | object;
 750 |   allowExternalLinks?: boolean;
 751 |   enableWebSearch?: boolean;
 752 |   includeSubdomains?: boolean;
 753 |   origin?: string;
 754 | }
 755 | 
 756 | interface ExtractArgs {
 757 |   urls: string[];
 758 |   prompt?: string;
 759 |   systemPrompt?: string;
 760 |   schema?: object;
 761 |   allowExternalLinks?: boolean;
 762 |   enableWebSearch?: boolean;
 763 |   includeSubdomains?: boolean;
 764 |   origin?: string;
 765 | }
 766 | 
 767 | interface ExtractResponse<T = any> {
 768 |   success: boolean;
 769 |   data: T;
 770 |   error?: string;
 771 |   warning?: string;
 772 |   creditsUsed?: number;
 773 | }
 774 | 
 775 | // Type guards
 776 | function isScrapeOptions(
 777 |   args: unknown
 778 | ): args is ScrapeParams & { url: string } {
 779 |   return (
 780 |     typeof args === 'object' &&
 781 |     args !== null &&
 782 |     'url' in args &&
 783 |     typeof (args as { url: unknown }).url === 'string'
 784 |   );
 785 | }
 786 | 
 787 | function isMapOptions(args: unknown): args is MapParams & { url: string } {
 788 |   return (
 789 |     typeof args === 'object' &&
 790 |     args !== null &&
 791 |     'url' in args &&
 792 |     typeof (args as { url: unknown }).url === 'string'
 793 |   );
 794 | }
 795 | 
 796 | function isCrawlOptions(args: unknown): args is CrawlParams & { url: string } {
 797 |   return (
 798 |     typeof args === 'object' &&
 799 |     args !== null &&
 800 |     'url' in args &&
 801 |     typeof (args as { url: unknown }).url === 'string'
 802 |   );
 803 | }
 804 | 
 805 | function isStatusCheckOptions(args: unknown): args is StatusCheckOptions {
 806 |   return (
 807 |     typeof args === 'object' &&
 808 |     args !== null &&
 809 |     'id' in args &&
 810 |     typeof (args as { id: unknown }).id === 'string'
 811 |   );
 812 | }
 813 | 
 814 | function isSearchOptions(args: unknown): args is SearchOptions {
 815 |   return (
 816 |     typeof args === 'object' &&
 817 |     args !== null &&
 818 |     'query' in args &&
 819 |     typeof (args as { query: unknown }).query === 'string'
 820 |   );
 821 | }
 822 | 
 823 | function isExtractOptions(args: unknown): args is ExtractArgs {
 824 |   if (typeof args !== 'object' || args === null) return false;
 825 |   const { urls } = args as { urls?: unknown };
 826 |   return (
 827 |     Array.isArray(urls) &&
 828 |     urls.every((url): url is string => typeof url === 'string')
 829 |   );
 830 | }
 831 | 
 832 | function isGenerateLLMsTextOptions(
 833 |   args: unknown
 834 | ): args is { url: string } & Partial<GenerateLLMsTextParams> {
 835 |   return (
 836 |     typeof args === 'object' &&
 837 |     args !== null &&
 838 |     'url' in args &&
 839 |     typeof (args as { url: unknown }).url === 'string'
 840 |   );
 841 | }
 842 | 
 843 | // Server implementation
 844 | const server = new Server(
 845 |   {
 846 |     name: 'firecrawl-mcp',
 847 |     version: '1.7.0',
 848 |   },
 849 |   {
 850 |     capabilities: {
 851 |       tools: {},
 852 |       logging: {},
 853 |     },
 854 |   }
 855 | );
 856 | 
 857 | // Get optional API URL
 858 | const FIRECRAWL_API_URL = process.env.FIRECRAWL_API_URL;
 859 | const FIRECRAWL_API_KEY = process.env.FIRECRAWL_API_KEY;
 860 | 
 861 | // Check if API key is required (only for cloud service)
 862 | if (
 863 |   process.env.CLOUD_SERVICE !== 'true' &&
 864 |   !FIRECRAWL_API_URL &&
 865 |   !FIRECRAWL_API_KEY
 866 | ) {
 867 |   console.error(
 868 |     'Error: FIRECRAWL_API_KEY environment variable is required when using the cloud service'
 869 |   );
 870 |   process.exit(1);
 871 | }
 872 | 
 873 | // Initialize Firecrawl client with optional API URL
 874 | 
 875 | // Configuration for retries and monitoring
 876 | const CONFIG = {
 877 |   retry: {
 878 |     maxAttempts: Number(process.env.FIRECRAWL_RETRY_MAX_ATTEMPTS) || 3,
 879 |     initialDelay: Number(process.env.FIRECRAWL_RETRY_INITIAL_DELAY) || 1000,
 880 |     maxDelay: Number(process.env.FIRECRAWL_RETRY_MAX_DELAY) || 10000,
 881 |     backoffFactor: Number(process.env.FIRECRAWL_RETRY_BACKOFF_FACTOR) || 2,
 882 |   },
 883 |   credit: {
 884 |     warningThreshold:
 885 |       Number(process.env.FIRECRAWL_CREDIT_WARNING_THRESHOLD) || 1000,
 886 |     criticalThreshold:
 887 |       Number(process.env.FIRECRAWL_CREDIT_CRITICAL_THRESHOLD) || 100,
 888 |   },
 889 | };
 890 | 
 891 | // Add utility function for delay
 892 | function delay(ms: number): Promise<void> {
 893 |   return new Promise((resolve) => setTimeout(resolve, ms));
 894 | }
 895 | 
 896 | let isStdioTransport = false;
 897 | 
 898 | function safeLog(
 899 |   level:
 900 |     | 'error'
 901 |     | 'debug'
 902 |     | 'info'
 903 |     | 'notice'
 904 |     | 'warning'
 905 |     | 'critical'
 906 |     | 'alert'
 907 |     | 'emergency',
 908 |   data: any
 909 | ): void {
 910 |   if (isStdioTransport) {
 911 |     // For stdio transport, log to stderr to avoid protocol interference
 912 |     console.error(
 913 |       `[${level}] ${typeof data === 'object' ? JSON.stringify(data) : data}`
 914 |     );
 915 |   } else {
 916 |     // For other transport types, use the normal logging mechanism
 917 |     server.sendLoggingMessage({ level, data });
 918 |   }
 919 | }
 920 | 
 921 | // Add retry logic with exponential backoff
 922 | async function withRetry<T>(
 923 |   operation: () => Promise<T>,
 924 |   context: string,
 925 |   attempt = 1
 926 | ): Promise<T> {
 927 |   try {
 928 |     return await operation();
 929 |   } catch (error) {
 930 |     const isRateLimit =
 931 |       error instanceof Error &&
 932 |       (error.message.includes('rate limit') || error.message.includes('429'));
 933 | 
 934 |     if (isRateLimit && attempt < CONFIG.retry.maxAttempts) {
 935 |       const delayMs = Math.min(
 936 |         CONFIG.retry.initialDelay *
 937 |           Math.pow(CONFIG.retry.backoffFactor, attempt - 1),
 938 |         CONFIG.retry.maxDelay
 939 |       );
 940 | 
 941 |       safeLog(
 942 |         'warning',
 943 |         `Rate limit hit for ${context}. Attempt ${attempt}/${CONFIG.retry.maxAttempts}. Retrying in ${delayMs}ms`
 944 |       );
 945 | 
 946 |       await delay(delayMs);
 947 |       return withRetry(operation, context, attempt + 1);
 948 |     }
 949 | 
 950 |     throw error;
 951 |   }
 952 | }
 953 | 
 954 | // Tool handlers
 955 | server.setRequestHandler(ListToolsRequestSchema, async () => ({
 956 |   tools: [
 957 |     SCRAPE_TOOL,
 958 |     MAP_TOOL,
 959 |     CRAWL_TOOL,
 960 |     CHECK_CRAWL_STATUS_TOOL,
 961 |     SEARCH_TOOL,
 962 |     EXTRACT_TOOL,
 963 |     DEEP_RESEARCH_TOOL,
 964 |     GENERATE_LLMSTXT_TOOL,
 965 |   ],
 966 | }));
 967 | 
 968 | server.setRequestHandler(CallToolRequestSchema, async (request) => {
 969 |   const startTime = Date.now();
 970 |   try {
 971 |     const { name, arguments: args } = request.params;
 972 | 
 973 |     const apiKey = process.env.CLOUD_SERVICE
 974 |       ? (request.params._meta?.apiKey as string)
 975 |       : FIRECRAWL_API_KEY;
 976 |     if (process.env.CLOUD_SERVICE && !apiKey) {
 977 |       throw new Error('No API key provided');
 978 |     }
 979 | 
 980 |     const client = new FirecrawlApp({
 981 |       apiKey,
 982 |       ...(FIRECRAWL_API_URL ? { apiUrl: FIRECRAWL_API_URL } : {}),
 983 |     });
 984 |     // Log incoming request with timestamp
 985 |     safeLog(
 986 |       'info',
 987 |       `[${new Date().toISOString()}] Received request for tool: ${name}`
 988 |     );
 989 | 
 990 |     if (!args) {
 991 |       throw new Error('No arguments provided');
 992 |     }
 993 | 
 994 |     switch (name) {
 995 |       case 'firecrawl_scrape': {
 996 |         if (!isScrapeOptions(args)) {
 997 |           throw new Error('Invalid arguments for firecrawl_scrape');
 998 |         }
 999 |         const { url, ...options } = args;
1000 |         try {
1001 |           const scrapeStartTime = Date.now();
1002 |           safeLog(
1003 |             'info',
1004 |             `Starting scrape for URL: ${url} with options: ${JSON.stringify(options)}`
1005 |           );
1006 | 
1007 |           const response = await client.scrapeUrl(url, {
1008 |             ...options,
1009 |             // @ts-expect-error Extended API options including origin
1010 |             origin: 'mcp-server',
1011 |           });
1012 | 
1013 |           // Log performance metrics
1014 |           safeLog(
1015 |             'info',
1016 |             `Scrape completed in ${Date.now() - scrapeStartTime}ms`
1017 |           );
1018 | 
1019 |           if ('success' in response && !response.success) {
1020 |             throw new Error(response.error || 'Scraping failed');
1021 |           }
1022 | 
1023 |           // Format content based on requested formats
1024 |           const contentParts = [];
1025 | 
1026 |           if (options.formats?.includes('markdown') && response.markdown) {
1027 |             contentParts.push(response.markdown);
1028 |           }
1029 |           if (options.formats?.includes('html') && response.html) {
1030 |             contentParts.push(response.html);
1031 |           }
1032 |           if (options.formats?.includes('rawHtml') && response.rawHtml) {
1033 |             contentParts.push(response.rawHtml);
1034 |           }
1035 |           if (options.formats?.includes('links') && response.links) {
1036 |             contentParts.push(response.links.join('\n'));
1037 |           }
1038 |           if (options.formats?.includes('screenshot') && response.screenshot) {
1039 |             contentParts.push(response.screenshot);
1040 |           }
1041 |           if (options.formats?.includes('extract') && response.extract) {
1042 |             contentParts.push(JSON.stringify(response.extract, null, 2));
1043 |           }
1044 | 
1045 |           // If options.formats is empty, default to markdown
1046 |           if (!options.formats || options.formats.length === 0) {
1047 |             options.formats = ['markdown'];
1048 |           }
1049 | 
1050 |           // Add warning to response if present
1051 |           if (response.warning) {
1052 |             safeLog('warning', response.warning);
1053 |           }
1054 | 
1055 |           return {
1056 |             content: [
1057 |               {
1058 |                 type: 'text',
1059 |                 text: trimResponseText(
1060 |                   contentParts.join('\n\n') || 'No content available'
1061 |                 ),
1062 |               },
1063 |             ],
1064 |             isError: false,
1065 |           };
1066 |         } catch (error) {
1067 |           const errorMessage =
1068 |             error instanceof Error ? error.message : String(error);
1069 |           return {
1070 |             content: [{ type: 'text', text: trimResponseText(errorMessage) }],
1071 |             isError: true,
1072 |           };
1073 |         }
1074 |       }
1075 | 
1076 |       case 'firecrawl_map': {
1077 |         if (!isMapOptions(args)) {
1078 |           throw new Error('Invalid arguments for firecrawl_map');
1079 |         }
1080 |         const { url, ...options } = args;
1081 |         const response = await client.mapUrl(url, {
1082 |           ...options,
1083 |           // @ts-expect-error Extended API options including origin
1084 |           origin: 'mcp-server',
1085 |         });
1086 |         if ('error' in response) {
1087 |           throw new Error(response.error);
1088 |         }
1089 |         if (!response.links) {
1090 |           throw new Error('No links received from Firecrawl API');
1091 |         }
1092 |         return {
1093 |           content: [
1094 |             { type: 'text', text: trimResponseText(response.links.join('\n')) },
1095 |           ],
1096 |           isError: false,
1097 |         };
1098 |       }
1099 | 
1100 |       case 'firecrawl_crawl': {
1101 |         if (!isCrawlOptions(args)) {
1102 |           throw new Error('Invalid arguments for firecrawl_crawl');
1103 |         }
1104 |         const { url, ...options } = args;
1105 |         const response = await withRetry(
1106 |           async () =>
1107 |             // @ts-expect-error Extended API options including origin
1108 |             client.asyncCrawlUrl(url, { ...options, origin: 'mcp-server' }),
1109 |           'crawl operation'
1110 |         );
1111 | 
1112 |         if (!response.success) {
1113 |           throw new Error(response.error);
1114 |         }
1115 | 
1116 |         return {
1117 |           content: [
1118 |             {
1119 |               type: 'text',
1120 |               text: trimResponseText(
1121 |                 `Started crawl for ${url} with job ID: ${response.id}. Use firecrawl_check_crawl_status to check progress.`
1122 |               ),
1123 |             },
1124 |           ],
1125 |           isError: false,
1126 |         };
1127 |       }
1128 | 
1129 |       case 'firecrawl_check_crawl_status': {
1130 |         if (!isStatusCheckOptions(args)) {
1131 |           throw new Error('Invalid arguments for firecrawl_check_crawl_status');
1132 |         }
1133 |         const response = await client.checkCrawlStatus(args.id);
1134 |         if (!response.success) {
1135 |           throw new Error(response.error);
1136 |         }
1137 |         const status = `Crawl Status:
1138 | Status: ${response.status}
1139 | Progress: ${response.completed}/${response.total}
1140 | Credits Used: ${response.creditsUsed}
1141 | Expires At: ${response.expiresAt}
1142 | ${
1143 |   response.data.length > 0 ? '\nResults:\n' + formatResults(response.data) : ''
1144 | }`;
1145 |         return {
1146 |           content: [{ type: 'text', text: trimResponseText(status) }],
1147 |           isError: false,
1148 |         };
1149 |       }
1150 | 
1151 |       case 'firecrawl_search': {
1152 |         if (!isSearchOptions(args)) {
1153 |           throw new Error('Invalid arguments for firecrawl_search');
1154 |         }
1155 |         try {
1156 |           const response = await withRetry(
1157 |             async () =>
1158 |               client.search(args.query, { ...args, origin: 'mcp-server' }),
1159 |             'search operation'
1160 |           );
1161 | 
1162 |           if (!response.success) {
1163 |             throw new Error(
1164 |               `Search failed: ${response.error || 'Unknown error'}`
1165 |             );
1166 |           }
1167 | 
1168 |           // Format the results
1169 |           const results = response.data
1170 |             .map(
1171 |               (result) =>
1172 |                 `URL: ${result.url}
1173 | Title: ${result.title || 'No title'}
1174 | Description: ${result.description || 'No description'}
1175 | ${result.markdown ? `\nContent:\n${result.markdown}` : ''}`
1176 |             )
1177 |             .join('\n\n');
1178 | 
1179 |           return {
1180 |             content: [{ type: 'text', text: trimResponseText(results) }],
1181 |             isError: false,
1182 |           };
1183 |         } catch (error) {
1184 |           const errorMessage =
1185 |             error instanceof Error
1186 |               ? error.message
1187 |               : `Search failed: ${JSON.stringify(error)}`;
1188 |           return {
1189 |             content: [{ type: 'text', text: trimResponseText(errorMessage) }],
1190 |             isError: true,
1191 |           };
1192 |         }
1193 |       }
1194 | 
1195 |       case 'firecrawl_extract': {
1196 |         if (!isExtractOptions(args)) {
1197 |           throw new Error('Invalid arguments for firecrawl_extract');
1198 |         }
1199 | 
1200 |         try {
1201 |           const extractStartTime = Date.now();
1202 | 
1203 |           safeLog(
1204 |             'info',
1205 |             `Starting extraction for URLs: ${args.urls.join(', ')}`
1206 |           );
1207 | 
1208 |           // Log if using self-hosted instance
1209 |           if (FIRECRAWL_API_URL) {
1210 |             safeLog('info', 'Using self-hosted instance for extraction');
1211 |           }
1212 | 
1213 |           const extractResponse = await withRetry(
1214 |             async () =>
1215 |               client.extract(args.urls, {
1216 |                 prompt: args.prompt,
1217 |                 systemPrompt: args.systemPrompt,
1218 |                 schema: args.schema,
1219 |                 allowExternalLinks: args.allowExternalLinks,
1220 |                 enableWebSearch: args.enableWebSearch,
1221 |                 includeSubdomains: args.includeSubdomains,
1222 |                 origin: 'mcp-server',
1223 |               } as ExtractParams),
1224 |             'extract operation'
1225 |           );
1226 | 
1227 |           // Type guard for successful response
1228 |           if (!('success' in extractResponse) || !extractResponse.success) {
1229 |             throw new Error(extractResponse.error || 'Extraction failed');
1230 |           }
1231 | 
1232 |           const response = extractResponse as ExtractResponse;
1233 | 
1234 |           // Log performance metrics
1235 |           safeLog(
1236 |             'info',
1237 |             `Extraction completed in ${Date.now() - extractStartTime}ms`
1238 |           );
1239 | 
1240 |           // Add warning to response if present
1241 |           const result = {
1242 |             content: [
1243 |               {
1244 |                 type: 'text',
1245 |                 text: trimResponseText(JSON.stringify(response.data, null, 2)),
1246 |               },
1247 |             ],
1248 |             isError: false,
1249 |           };
1250 | 
1251 |           if (response.warning) {
1252 |             safeLog('warning', response.warning);
1253 |           }
1254 | 
1255 |           return result;
1256 |         } catch (error) {
1257 |           const errorMessage =
1258 |             error instanceof Error ? error.message : String(error);
1259 | 
1260 |           // Special handling for self-hosted instance errors
1261 |           if (
1262 |             FIRECRAWL_API_URL &&
1263 |             errorMessage.toLowerCase().includes('not supported')
1264 |           ) {
1265 |             safeLog(
1266 |               'error',
1267 |               'Extraction is not supported by this self-hosted instance'
1268 |             );
1269 |             return {
1270 |               content: [
1271 |                 {
1272 |                   type: 'text',
1273 |                   text: trimResponseText(
1274 |                     'Extraction is not supported by this self-hosted instance. Please ensure LLM support is configured.'
1275 |                   ),
1276 |                 },
1277 |               ],
1278 |               isError: true,
1279 |             };
1280 |           }
1281 | 
1282 |           return {
1283 |             content: [{ type: 'text', text: trimResponseText(errorMessage) }],
1284 |             isError: true,
1285 |           };
1286 |         }
1287 |       }
1288 | 
1289 |       case 'firecrawl_deep_research': {
1290 |         if (!args || typeof args !== 'object' || !('query' in args)) {
1291 |           throw new Error('Invalid arguments for firecrawl_deep_research');
1292 |         }
1293 | 
1294 |         try {
1295 |           const researchStartTime = Date.now();
1296 |           safeLog('info', `Starting deep research for query: ${args.query}`);
1297 | 
1298 |           const response = await client.deepResearch(
1299 |             args.query as string,
1300 |             {
1301 |               maxDepth: args.maxDepth as number,
1302 |               timeLimit: args.timeLimit as number,
1303 |               maxUrls: args.maxUrls as number,
1304 |               // @ts-expect-error Extended API options including origin
1305 |               origin: 'mcp-server',
1306 |             },
1307 |             // Activity callback
1308 |             (activity) => {
1309 |               safeLog(
1310 |                 'info',
1311 |                 `Research activity: ${activity.message} (Depth: ${activity.depth})`
1312 |               );
1313 |             },
1314 |             // Source callback
1315 |             (source) => {
1316 |               safeLog(
1317 |                 'info',
1318 |                 `Research source found: ${source.url}${source.title ? ` - ${source.title}` : ''}`
1319 |               );
1320 |             }
1321 |           );
1322 | 
1323 |           // Log performance metrics
1324 |           safeLog(
1325 |             'info',
1326 |             `Deep research completed in ${Date.now() - researchStartTime}ms`
1327 |           );
1328 | 
1329 |           if (!response.success) {
1330 |             throw new Error(response.error || 'Deep research failed');
1331 |           }
1332 | 
1333 |           // Format the results
1334 |           const formattedResponse = {
1335 |             finalAnalysis: response.data.finalAnalysis,
1336 |             activities: response.data.activities,
1337 |             sources: response.data.sources,
1338 |           };
1339 | 
1340 |           return {
1341 |             content: [
1342 |               {
1343 |                 type: 'text',
1344 |                 text: trimResponseText(formattedResponse.finalAnalysis),
1345 |               },
1346 |             ],
1347 |             isError: false,
1348 |           };
1349 |         } catch (error) {
1350 |           const errorMessage =
1351 |             error instanceof Error ? error.message : String(error);
1352 |           return {
1353 |             content: [{ type: 'text', text: trimResponseText(errorMessage) }],
1354 |             isError: true,
1355 |           };
1356 |         }
1357 |       }
1358 | 
1359 |       case 'firecrawl_generate_llmstxt': {
1360 |         if (!isGenerateLLMsTextOptions(args)) {
1361 |           throw new Error('Invalid arguments for firecrawl_generate_llmstxt');
1362 |         }
1363 | 
1364 |         try {
1365 |           const { url, ...params } = args;
1366 |           const generateStartTime = Date.now();
1367 | 
1368 |           safeLog('info', `Starting LLMs.txt generation for URL: ${url}`);
1369 | 
1370 |           // Start the generation process
1371 |           const response = await withRetry(
1372 |             async () =>
1373 |               // @ts-expect-error Extended API options including origin
1374 |               client.generateLLMsText(url, { ...params, origin: 'mcp-server' }),
1375 |             'LLMs.txt generation'
1376 |           );
1377 | 
1378 |           if (!response.success) {
1379 |             throw new Error(response.error || 'LLMs.txt generation failed');
1380 |           }
1381 | 
1382 |           // Log performance metrics
1383 |           safeLog(
1384 |             'info',
1385 |             `LLMs.txt generation completed in ${Date.now() - generateStartTime}ms`
1386 |           );
1387 | 
1388 |           // Format the response
1389 |           let resultText = '';
1390 | 
1391 |           if ('data' in response) {
1392 |             resultText = `LLMs.txt content:\n\n${response.data.llmstxt}`;
1393 | 
1394 |             if (args.showFullText && response.data.llmsfulltxt) {
1395 |               resultText += `\n\nLLMs-full.txt content:\n\n${response.data.llmsfulltxt}`;
1396 |             }
1397 |           }
1398 | 
1399 |           return {
1400 |             content: [{ type: 'text', text: trimResponseText(resultText) }],
1401 |             isError: false,
1402 |           };
1403 |         } catch (error) {
1404 |           const errorMessage =
1405 |             error instanceof Error ? error.message : String(error);
1406 |           return {
1407 |             content: [{ type: 'text', text: trimResponseText(errorMessage) }],
1408 |             isError: true,
1409 |           };
1410 |         }
1411 |       }
1412 | 
1413 |       default:
1414 |         return {
1415 |           content: [
1416 |             { type: 'text', text: trimResponseText(`Unknown tool: ${name}`) },
1417 |           ],
1418 |           isError: true,
1419 |         };
1420 |     }
1421 |   } catch (error) {
1422 |     // Log detailed error information
1423 |     safeLog('error', {
1424 |       message: `Request failed: ${
1425 |         error instanceof Error ? error.message : String(error)
1426 |       }`,
1427 |       tool: request.params.name,
1428 |       arguments: request.params.arguments,
1429 |       timestamp: new Date().toISOString(),
1430 |       duration: Date.now() - startTime,
1431 |     });
1432 |     return {
1433 |       content: [
1434 |         {
1435 |           type: 'text',
1436 |           text: trimResponseText(
1437 |             `Error: ${error instanceof Error ? error.message : String(error)}`
1438 |           ),
1439 |         },
1440 |       ],
1441 |       isError: true,
1442 |     };
1443 |   } finally {
1444 |     // Log request completion with performance metrics
1445 |     safeLog('info', `Request completed in ${Date.now() - startTime}ms`);
1446 |   }
1447 | });
1448 | 
1449 | // Helper function to format results
1450 | function formatResults(data: FirecrawlDocument[]): string {
1451 |   return data
1452 |     .map((doc) => {
1453 |       const content = doc.markdown || doc.html || doc.rawHtml || 'No content';
1454 |       return `URL: ${doc.url || 'Unknown URL'}
1455 | Content: ${content.substring(0, 100)}${content.length > 100 ? '...' : ''}
1456 | ${doc.metadata?.title ? `Title: ${doc.metadata.title}` : ''}`;
1457 |     })
1458 |     .join('\n\n');
1459 | }
1460 | 
1461 | // Utility function to trim trailing whitespace from text responses
1462 | // This prevents Claude API errors with "final assistant content cannot end with trailing whitespace"
1463 | function trimResponseText(text: string): string {
1464 |   return text.trim();
1465 | }
1466 | 
1467 | // Server startup
1468 | async function runLocalServer() {
1469 |   try {
1470 |     console.error('Initializing Firecrawl MCP Server...');
1471 | 
1472 |     const transport = new StdioServerTransport();
1473 | 
1474 |     // Detect if we're using stdio transport
1475 |     isStdioTransport = transport instanceof StdioServerTransport;
1476 |     if (isStdioTransport) {
1477 |       console.error(
1478 |         'Running in stdio mode, logging will be directed to stderr'
1479 |       );
1480 |     }
1481 | 
1482 |     await server.connect(transport);
1483 | 
1484 |     // Now that we're connected, we can send logging messages
1485 |     safeLog('info', 'Firecrawl MCP Server initialized successfully');
1486 |     safeLog(
1487 |       'info',
1488 |       `Configuration: API URL: ${FIRECRAWL_API_URL || 'default'}`
1489 |     );
1490 | 
1491 |     console.error('Firecrawl MCP Server running on stdio');
1492 |   } catch (error) {
1493 |     console.error('Fatal error running server:', error);
1494 |     process.exit(1);
1495 |   }
1496 | }
1497 | async function runSSELocalServer() {
1498 |   let transport: SSEServerTransport | null = null;
1499 |   const app = express();
1500 | 
1501 |   app.get('/sse', async (req, res) => {
1502 |     transport = new SSEServerTransport(`/messages`, res);
1503 |     res.on('close', () => {
1504 |       transport = null;
1505 |     });
1506 |     await server.connect(transport);
1507 |   });
1508 | 
1509 |   // Endpoint for the client to POST messages
1510 |   // Remove express.json() middleware - let the transport handle the body
1511 |   app.post('/messages', (req, res) => {
1512 |     if (transport) {
1513 |       transport.handlePostMessage(req, res);
1514 |     }
1515 |   });
1516 | 
1517 |   const PORT = process.env.PORT || 3000;
1518 |   console.log('Starting server on port', PORT);
1519 |   try {
1520 |     app.listen(PORT, () => {
1521 |       console.log(`MCP SSE Server listening on http://localhost:${PORT}`);
1522 |       console.log(`SSE endpoint: http://localhost:${PORT}/sse`);
1523 |       console.log(`Message endpoint: http://localhost:${PORT}/messages`);
1524 |     });
1525 |   } catch (error) {
1526 |     console.error('Error starting server:', error);
1527 |   }
1528 | }
1529 | 
1530 | async function runSSECloudServer() {
1531 |   const transports: { [sessionId: string]: SSEServerTransport } = {};
1532 |   const app = express();
1533 | 
1534 |   app.get('/health', (req, res) => {
1535 |     res.status(200).send('OK');
1536 |   });
1537 | 
1538 |   app.get('/:apiKey/sse', async (req, res) => {
1539 |     const apiKey = req.params.apiKey;
1540 |     const transport = new SSEServerTransport(`/${apiKey}/messages`, res);
1541 | 
1542 |     //todo: validate api key, close if invalid
1543 |     const compositeKey = `${apiKey}-${transport.sessionId}`;
1544 |     transports[compositeKey] = transport;
1545 |     res.on('close', () => {
1546 |       delete transports[compositeKey];
1547 |     });
1548 |     await server.connect(transport);
1549 |   });
1550 | 
1551 |   // Endpoint for the client to POST messages
1552 |   // Remove express.json() middleware - let the transport handle the body
1553 |   app.post(
1554 |     '/:apiKey/messages',
1555 |     express.json(),
1556 |     async (req: Request, res: Response) => {
1557 |       const apiKey = req.params.apiKey;
1558 |       const body = req.body;
1559 |       const enrichedBody = {
1560 |         ...body,
1561 |       };
1562 | 
1563 |       if (enrichedBody && enrichedBody.params && !enrichedBody.params._meta) {
1564 |         enrichedBody.params._meta = { apiKey };
1565 |       } else if (
1566 |         enrichedBody &&
1567 |         enrichedBody.params &&
1568 |         enrichedBody.params._meta
1569 |       ) {
1570 |         enrichedBody.params._meta.apiKey = apiKey;
1571 |       }
1572 | 
1573 |       console.log('enrichedBody', enrichedBody);
1574 | 
1575 |       const sessionId = req.query.sessionId as string;
1576 |       const compositeKey = `${apiKey}-${sessionId}`;
1577 |       const transport = transports[compositeKey];
1578 |       if (transport) {
1579 |         await transport.handlePostMessage(req, res, enrichedBody);
1580 |       } else {
1581 |         res.status(400).send('No transport found for sessionId');
1582 |       }
1583 |     }
1584 |   );
1585 | 
1586 |   const PORT = 3000;
1587 |   app.listen(PORT, () => {
1588 |     console.log(`MCP SSE Server listening on http://localhost:${PORT}`);
1589 |     console.log(`SSE endpoint: http://localhost:${PORT}/sse`);
1590 |     console.log(`Message endpoint: http://localhost:${PORT}/messages`);
1591 |   });
1592 | }
1593 | 
1594 | if (process.env.CLOUD_SERVICE === 'true') {
1595 |   runSSECloudServer().catch((error: any) => {
1596 |     console.error('Fatal error running server:', error);
1597 |     process.exit(1);
1598 |   });
1599 | } else if (process.env.SSE_LOCAL === 'true') {
1600 |   runSSELocalServer().catch((error: any) => {
1601 |     console.error('Fatal error running server:', error);
1602 |     process.exit(1);
1603 |   });
1604 | } else {
1605 |   runLocalServer().catch((error: any) => {
1606 |     console.error('Fatal error running server:', error);
1607 |     process.exit(1);
1608 |   });
1609 | }
1610 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2022",
 4 |     "module": "NodeNext",
 5 |     "moduleResolution": "NodeNext",
 6 |     "outDir": "./dist",
 7 |     "rootDir": "./src",
 8 |     "strict": true,
 9 |     "esModuleInterop": true,
10 |     "skipLibCheck": true,
11 |     "forceConsistentCasingInFileNames": true
12 |   },
13 |   "include": ["src/**/*"],
14 |   "exclude": ["node_modules", "dist", "tests"]
15 | }
16 | 


--------------------------------------------------------------------------------