├── .gitignore ├── types.d.ts ├── tsconfig.json ├── biome.json ├── .github └── workflows │ ├── ci.yml │ ├── release.yml │ └── publish.yml ├── RELEASE_NOTES_v1.5.2.md ├── LICENSE ├── CONTRIBUTING.md ├── RELEASE_NOTES_v1.5.1.md ├── package.json ├── tests └── image-fetch.test.ts ├── CLAUDE.md ├── README.md └── index.ts /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | node_modules -------------------------------------------------------------------------------- /types.d.ts: -------------------------------------------------------------------------------- 1 | declare module "applescript" { 2 | export function execString( 3 | script: string, 4 | callback: (err: Error | null, result: unknown) => void 5 | ): void; 6 | } 7 | 8 | declare module "robots-parser" { 9 | interface RobotsParser { 10 | isAllowed(url: string, userAgent: string): boolean; 11 | } 12 | export default function (robotsUrl: string, robotsTxt: string): RobotsParser; 13 | } 14 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2022", 4 | "strict": true, 5 | "esModuleInterop": true, 6 | "skipLibCheck": true, 7 | "forceConsistentCasingInFileNames": true, 8 | "resolveJsonModule": true, 9 | "outDir": "./dist", 10 | "rootDir": ".", 11 | "moduleResolution": "NodeNext", 12 | "module": "NodeNext" 13 | }, 14 | "exclude": ["node_modules"], 15 | "include": ["./**/*.ts"] 16 | } 17 | -------------------------------------------------------------------------------- /biome.json: -------------------------------------------------------------------------------- 1 | { 2 | "formatter": { 3 | "enabled": true, 4 | "indentStyle": "space", 5 | "indentWidth": 2, 6 | "lineWidth": 80 7 | }, 8 | "linter": { 9 | "enabled": true, 10 | "rules": { 11 | "recommended": true 12 | } 13 | }, 14 | "files": { 15 | "includes": ["./**/*.ts", "./**/*.json", "./**/*.md"], 16 | "experimentalScannerIgnores": ["dist/**", "node_modules/**"] 17 | }, 18 | "javascript": { 19 | "formatter": { 20 | "quoteStyle": "double", 21 | "trailingCommas": "es5" 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: [ main ] 7 | 8 | permissions: 9 | contents: read 10 | 11 | jobs: 12 | test: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Checkout 16 | uses: actions/checkout@v4 17 | 18 | - name: Use Node.js 19 | uses: actions/setup-node@v4 20 | with: 21 | node-version: 22 22 | cache: 'npm' 23 | 24 | - name: Install deps 25 | run: npm ci --ignore-scripts 26 | 27 | - name: Run tests (unit + typecheck + biome) 28 | run: npm test 29 | 30 | -------------------------------------------------------------------------------- /RELEASE_NOTES_v1.5.2.md: -------------------------------------------------------------------------------- 1 | # v1.5.2 — Fix: restore cross‑origin image fetching by default 2 | 3 | Release date: 2025-09-24 4 | 5 | ## Fixed 6 | - Regression in v1.5.1 where images hosted on different origins (e.g. CDNs) were blocked by default. 7 | - `allowCrossOriginImages` default is restored to `true` for backwards compatibility. 8 | 9 | ## Added 10 | - Unit tests (Vitest) to verify that: 11 | - Base64 image data is returned when requested 12 | - Images are saved to disk when enabled 13 | - Cross-origin images are blocked when `allowCrossOriginImages: false` 14 | 15 | ## Security 16 | - v1.5.1 hardening (SSRF guard, timeouts, redirect limits, size limits) remains in place. 17 | 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 kazuph 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | ## 開発ルール(必読) 4 | - main(マスター)へ「直接 push」しないこと。必ず Pull Request(PR)で変更を取り込みます。 5 | - 変更はトピックブランチで行い、命名は `feat/*`, `fix/*`, `chore/*`, `release/*` などを推奨します。 6 | - リリース作業は以下のフローに従います。 7 | 1. バージョンを `package.json` とサーバーメタ(`index.ts` 内の `version`)で更新 8 | 2. `RELEASE_NOTES_vX.Y.Z.md` を追加 9 | 3. ブランチ名 `release/vX.Y.Z` を作成し、コミット・push 10 | 4. PR を作成(base: `main`, head: `release/vX.Y.Z`) 11 | 5. CI が通過後にレビューを経て `main` へマージ 12 | 6. `main` へマージされたときのみ、GitHub Actions が npm へ publish(既に公開済みのバージョンは自動スキップ) 13 | - 直接タグ push による公開は行いません(`publish.yml` は `push` to `main` でのみ発火)。 14 | - PR が `main` にマージされると、以下が自動実行されます: 15 | - npm publish(未公開バージョンのみ) 16 | - タグ作成と GitHub Releases の発行(未作成のときのみ) 17 | - コミットメッセージは Conventional Commits を推奨(例: `fix: correct image fetch default`)。 18 | - テスト方針:`npm test` は unit → typecheck → format → biome を通過する必要があります。 19 | - テストでローカル HTTP サーバを用いるため、以下の環境変数でサーバ起動や SSRF ガードを無効化します(本番では設定しないこと)。 20 | - `MCP_FETCH_DISABLE_SERVER=1` 21 | - `MCP_FETCH_DISABLE_SSRF_GUARD=1` 22 | 23 | ## PR テンプレ 24 | - 目的 / 背景 25 | - 変更点(ユーザー影響 / 互換性) 26 | - セキュリティ観点(ネットワーク/ファイルI/O 等の変更があれば明記) 27 | - 動作確認(スクショ/ログ/テスト結果) 28 | - リリースノート(必要に応じて) 29 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Create Tag and GitHub Release on main 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | permissions: 9 | contents: write 10 | 11 | jobs: 12 | release: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Checkout 16 | uses: actions/checkout@v4 17 | with: 18 | fetch-depth: 0 19 | 20 | - name: Use Node.js 21 | uses: actions/setup-node@v4 22 | with: 23 | node-version: 22 24 | 25 | - name: Create tag and GitHub Release if missing 26 | env: 27 | GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 28 | run: | 29 | VERSION=$(node -p "require('./package.json').version") 30 | TAG="v${VERSION}" 31 | NOTES_FILE="RELEASE_NOTES_${TAG}.md" 32 | echo "Version: $VERSION | Tag: $TAG" 33 | if gh release view "$TAG" >/dev/null 2>&1; then 34 | echo "Release $TAG already exists. Skipping." 35 | exit 0 36 | fi 37 | if [ -f "$NOTES_FILE" ]; then 38 | gh release create "$TAG" -F "$NOTES_FILE" -t "$TAG" --latest 39 | else 40 | gh release create "$TAG" --generate-notes -t "$TAG" --latest 41 | fi 42 | 43 | -------------------------------------------------------------------------------- /RELEASE_NOTES_v1.5.1.md: -------------------------------------------------------------------------------- 1 | # v1.5.1 — Security hardening and safe fetch improvements 2 | 3 | Release date: 2025-09-24 4 | 5 | ## Highlights 6 | - Strong SSRF and DoS protections: 7 | - Only `http://` and `https://` URLs are allowed (pages and images) 8 | - Block loopback, private, link-local and multicast IPs; block `localhost`/`.local` hostnames 9 | - DNS resolution is checked to prevent private IPs via DNS 10 | - Manual redirect handling with validation (max 3 hops) 11 | - Request timeout (default 12s) 12 | - Response size limits: HTML up to 2MB, images up to 10MB 13 | - robots.txt fetch now uses the same safe pipeline and is size-limited 14 | - Same-origin image fetching by default; cross-origin can be explicitly enabled 15 | 16 | ## Added 17 | - `allowCrossOriginImages` (boolean, default `false`) to fetch images from different origins when needed. 18 | 19 | ## Changed 20 | - Default image policy is same-origin only for defense in depth. 21 | 22 | ## Configuration (env vars) 23 | - `MCP_FETCH_TIMEOUT_MS` (default: `12000`) 24 | - `MCP_FETCH_MAX_REDIRECTS` (default: `3`) 25 | - `MCP_FETCH_MAX_HTML_BYTES` (default: `2000000`) 26 | - `MCP_FETCH_MAX_IMAGE_BYTES` (default: `10000000`) 27 | 28 | ## Docs & QA 29 | - README updated with Security Hardening and env vars 30 | - Typecheck/build/audit: all passing; Biome lint/format integrated 31 | 32 | ## Compatibility notes 33 | - No breaking API changes. If your pages rely on CDN or third-party image hosts, pass `allowCrossOriginImages: true` in tool arguments. 34 | 35 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: npm publish on main 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | permissions: 9 | contents: read 10 | id-token: write # required for npm provenance (OIDC) 11 | 12 | concurrency: 13 | group: publish-${{ github.ref }} 14 | cancel-in-progress: false 15 | 16 | jobs: 17 | publish: 18 | name: Publish to npm 19 | runs-on: ubuntu-latest 20 | steps: 21 | - name: Checkout 22 | uses: actions/checkout@v4 23 | 24 | - name: Use Node.js 25 | uses: actions/setup-node@v4 26 | with: 27 | node-version: 22 28 | registry-url: 'https://registry.npmjs.org' 29 | cache: 'npm' 30 | 31 | - name: Install dependencies 32 | run: npm ci --ignore-scripts 33 | 34 | - name: Build 35 | run: npm run build 36 | 37 | - name: Run typecheck 38 | run: npm run typecheck 39 | 40 | - name: Check if version already published 41 | id: check 42 | run: | 43 | NAME=$(node -p "require('./package.json').name") 44 | VERSION=$(node -p "require('./package.json').version") 45 | echo "Package: $NAME@$VERSION" 46 | if npm view "$NAME@$VERSION" version >/dev/null 2>&1; then 47 | echo "already=true" >> "$GITHUB_OUTPUT" 48 | else 49 | echo "already=false" >> "$GITHUB_OUTPUT" 50 | fi 51 | 52 | - name: Publish to npm (with provenance) 53 | if: steps.check.outputs.already == 'false' 54 | env: 55 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} 56 | run: | 57 | npm publish --provenance --access public 58 | 59 | - name: Skip note 60 | if: steps.check.outputs.already == 'true' 61 | run: echo "This version is already published. Skipping npm publish." 62 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@kazuph/mcp-fetch", 3 | "version": "1.6.2", 4 | "type": "module", 5 | "description": "A Model Context Protocol server that provides web content fetching capabilities with automatic image saving and optional AI display", 6 | "main": "dist/index.js", 7 | "scripts": { 8 | "build": "tsc", 9 | "start": "node dist/index.js", 10 | "dev": "tsc && node dist/index.js", 11 | "check": "biome check .", 12 | "format": "biome format . --write", 13 | "lint": "biome lint .", 14 | "typecheck": "tsc --noEmit", 15 | "unit": "npm run build && vitest run --reporter=dot", 16 | "test": "npm run unit && npm run typecheck && npm run format && npm run check" 17 | }, 18 | "dependencies": { 19 | "@modelcontextprotocol/sdk": "^1.0.0", 20 | "@mozilla/readability": "^0.6.0", 21 | "@types/sharp": "^0.31.1", 22 | "jsdom": "^24.0.0", 23 | "node-fetch": "^3.3.2", 24 | "robots-parser": "^3.0.1", 25 | "sharp": "^0.33.5", 26 | "turndown": "^7.1.2", 27 | "zod": "^3.22.4", 28 | "zod-to-json-schema": "^3.22.4" 29 | }, 30 | "devDependencies": { 31 | "@biomejs/biome": "^2.2.4", 32 | "@types/jsdom": "^21.1.6", 33 | "@types/node": "^20.10.5", 34 | "@types/turndown": "^5.0.4", 35 | "typescript": "^5.3.3", 36 | "vitest": "^2.0.5" 37 | }, 38 | "author": "kazuph", 39 | "license": "MIT", 40 | "publishConfig": { 41 | "access": "public" 42 | }, 43 | "files": [ 44 | "dist", 45 | "dist/**/*.map", 46 | "README.md" 47 | ], 48 | "repository": { 49 | "type": "git", 50 | "url": "git+https://github.com/kazuph/mcp-fetch.git" 51 | }, 52 | "keywords": [ 53 | "mcp", 54 | "fetch", 55 | "web", 56 | "content", 57 | "image", 58 | "processing", 59 | "claude", 60 | "ai" 61 | ], 62 | "bugs": { 63 | "url": "https://github.com/kazuph/mcp-fetch/issues" 64 | }, 65 | "homepage": "https://github.com/kazuph/mcp-fetch#readme", 66 | "bin": { 67 | "mcp-fetch": "dist/index.js" 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /tests/image-fetch.test.ts: -------------------------------------------------------------------------------- 1 | import { promises as fs } from "node:fs"; 2 | import http from "node:http"; 3 | import sharp from "sharp"; 4 | import { afterAll, beforeAll, describe, expect, it } from "vitest"; 5 | 6 | // Disable server startup and SSRF guard for local test servers 7 | process.env.MCP_FETCH_DISABLE_SERVER = "1"; 8 | process.env.MCP_FETCH_DISABLE_SSRF_GUARD = "1"; 9 | // Import after setting env so guards read the right values 10 | // @ts-expect-error importing compiled file without types 11 | const { fetchUrl } = await import("../dist/index.js"); 12 | 13 | function startServer( 14 | port: number, 15 | handler: http.RequestListener 16 | ): Promise { 17 | return new Promise((resolve) => { 18 | const srv = http.createServer(handler); 19 | srv.listen(port, "127.0.0.1", () => resolve(srv)); 20 | }); 21 | } 22 | 23 | let IMG_BUF: Buffer; 24 | 25 | describe("imageFetch pipeline", () => { 26 | const PORT_PAGE = 19081; 27 | const PORT_IMG = 19082; 28 | let pageSrv: http.Server; 29 | let imgSrv: http.Server; 30 | 31 | beforeAll(async () => { 32 | IMG_BUF = await sharp({ 33 | create: { 34 | width: 1, 35 | height: 1, 36 | channels: 3, 37 | background: { r: 255, g: 0, b: 0 }, 38 | }, 39 | }) 40 | .jpeg({ quality: 80 }) 41 | .toBuffer(); 42 | 43 | imgSrv = await startServer(PORT_IMG, (req, res) => { 44 | if (req.url === "/img.jpg") { 45 | res.writeHead(200, { 46 | "Content-Type": "image/jpeg", 47 | "Content-Length": IMG_BUF.length, 48 | }); 49 | res.end(IMG_BUF); 50 | } else { 51 | res.writeHead(404).end(); 52 | } 53 | }); 54 | 55 | pageSrv = await startServer(PORT_PAGE, (_req, res) => { 56 | const html = `T 57 |

Title

Hello

58 | r 59 |
60 | `; 61 | res.writeHead(200, { "Content-Type": "text/html; charset=utf-8" }); 62 | res.end(html); 63 | }); 64 | }); 65 | 66 | afterAll(async () => { 67 | await new Promise((r) => pageSrv.close(() => r(null))); 68 | await new Promise((r) => imgSrv.close(() => r(null))); 69 | }); 70 | 71 | it("returns base64 image and saves file by default (cross-origin allowed)", async () => { 72 | const result = await fetchUrl( 73 | `http://127.0.0.1:${PORT_PAGE}/`, 74 | "test-agent", 75 | false, 76 | { 77 | enableFetchImages: true, 78 | imageMaxCount: 1, 79 | startIndex: 0, 80 | maxLength: 1000, 81 | imageStartIndex: 0, 82 | imageMaxHeight: 4000, 83 | imageMaxWidth: 1000, 84 | imageQuality: 80, 85 | returnBase64: true, 86 | saveImages: true, 87 | allowCrossOriginImages: true, // default true but be explicit 88 | } 89 | ); 90 | 91 | expect(result.images.length).toBe(1); 92 | expect(result.images[0].mimeType).toBe("image/jpeg"); 93 | expect(result.images[0].data.length).toBeGreaterThan(10); 94 | expect(result.images[0].filePath).toBeTruthy(); 95 | const pth = result.images[0].filePath || ""; 96 | const stat = await fs.stat(pth); 97 | expect(stat.isFile()).toBe(true); 98 | }); 99 | 100 | it("blocks cross-origin when explicitly disabled", async () => { 101 | const result = await fetchUrl( 102 | `http://127.0.0.1:${PORT_PAGE}/`, 103 | "test-agent", 104 | false, 105 | { 106 | enableFetchImages: true, 107 | imageMaxCount: 1, 108 | startIndex: 0, 109 | maxLength: 1000, 110 | imageStartIndex: 0, 111 | imageMaxHeight: 4000, 112 | imageMaxWidth: 1000, 113 | imageQuality: 80, 114 | returnBase64: true, 115 | saveImages: false, 116 | allowCrossOriginImages: false, 117 | } 118 | ); 119 | expect(result.images.length).toBe(0); 120 | }); 121 | }); 122 | -------------------------------------------------------------------------------- /CLAUDE.md: -------------------------------------------------------------------------------- 1 | # CLAUDE.md 2 | 3 | This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. 4 | 5 | ## Project Overview 6 | 7 | **mcp-fetch** is a Model Context Protocol (MCP) server that provides web content fetching capabilities for AI assistants. It converts HTML pages to clean markdown using Mozilla Readability and optionally processes images using Sharp. 8 | 9 | ## Development Commands 10 | 11 | ```bash 12 | # Build TypeScript to JavaScript 13 | npm run build 14 | 15 | # Type checking only (recommended before commits) 16 | npm run typecheck 17 | 18 | # Run all quality checks (biome linting + typecheck) 19 | npm test 20 | 21 | # Format code using Biome 22 | npm run format 23 | 24 | # Lint code using Biome 25 | npm run lint 26 | 27 | # Build and run the server 28 | npm run dev 29 | 30 | # Run the compiled server 31 | npm start 32 | ``` 33 | 34 | ## Architecture 35 | 36 | ### Single-File Design 37 | - **Core logic**: All functionality is in `index.ts` (606 lines) 38 | - **Type definitions**: External module types in `types.d.ts` 39 | - This is intentional - the tool has focused scope and benefits from centralized logic 40 | 41 | ### Key Components 42 | - **MCP Server**: Uses `@modelcontextprotocol/sdk` for protocol implementation 43 | - **Content Pipeline**: HTML → Readability → Markdown → Pagination 44 | - **Image Pipeline**: Fetch → JPEG conversion → Vertical merging → File saving → Optional Base64 encoding 45 | - **Parameter Validation**: Zod schemas with automatic type conversion from string/number unions 46 | 47 | ### Dependencies Architecture 48 | - **Content Processing**: `@mozilla/readability` + `jsdom` + `turndown` chain 49 | - **Image Processing**: `sharp` for high-performance image operations 50 | - **HTTP**: `node-fetch` for web requests 51 | - **Compliance**: `robots-parser` for robots.txt checking 52 | 53 | ## Code Patterns 54 | 55 | ### Parameter Handling 56 | Parameters use union types (`string | number`) with Zod validation for automatic type conversion: 57 | 58 | ```typescript 59 | url: z.string(), 60 | maxLength: z.union([z.string(), z.number()]).transform(Number).default(20000), 61 | enableFetchImages: z.union([z.string(), z.boolean()]).transform(toBool).default(false) 62 | ``` 63 | 64 | ### Error Handling 65 | Network operations include comprehensive error handling with specific error types for different failure scenarios. 66 | 67 | ### Image Optimization and File Saving 68 | - Images are always converted to JPEG format with configurable quality (default 80) 69 | - Multiple images are merged vertically when present 70 | - **Default behavior**: Images are automatically saved to `~/Downloads/mcp-fetch/YYYY-MM-DD/` directory 71 | - **Optional**: Base64 encoding for Claude Desktop display (enabled with `returnBase64: true`) 72 | - **Filename format**: `hostname_HHMMSS_index.jpg` 73 | 74 | ## Configuration 75 | 76 | ### Biome (Linting/Formatting) 77 | - 2-space indentation 78 | - Double quotes 79 | - 80-character line width 80 | - ES5 trailing commas 81 | - Uses modern Biome instead of ESLint + Prettier 82 | 83 | ### TypeScript 84 | - Target: ES2022 85 | - Module: NodeNext (ESM) 86 | - Strict mode enabled 87 | - Output: `./dist` 88 | 89 | ## Testing Strategy 90 | 91 | Current approach relies on: 92 | 1. TypeScript compilation as primary validation 93 | 2. Biome for code quality 94 | 3. Manual testing via Claude Desktop integration 95 | 96 | **Note**: No unit tests are currently implemented. The `npm test` command runs typecheck + biome checks only. 97 | 98 | ## Deployment 99 | 100 | The tool is designed for npx usage: 101 | ```bash 102 | npx -y @kazuph/mcp-fetch 103 | ``` 104 | 105 | For Claude Desktop integration, add to MCP tools configuration: 106 | ```json 107 | { 108 | "tools": { 109 | "imageFetch": { 110 | "command": "npx", 111 | "args": ["-y", "@kazuph/mcp-fetch"] 112 | } 113 | } 114 | } 115 | ``` 116 | 117 | ## Important Implementation Details 118 | 119 | ### Platform Specificity 120 | - Designed for macOS (mentioned in README) 121 | - Sharp binaries include Darwin ARM64 support 122 | 123 | ### Content Processing Limits 124 | - Default maxLength: 20,000 characters 125 | - Supports pagination via startIndex parameter 126 | - Image processing disabled by default (performance consideration) 127 | 128 | ### Robots.txt Compliance 129 | - Enabled by default for ethical web scraping 130 | - Can be disabled with `ignoreRobotsTxt: true` parameter 131 | 132 | ## Common Development Workflow 133 | 134 | 1. Make code changes in `index.ts` 135 | 2. Run `npm run typecheck` to verify TypeScript 136 | 3. Run `npm run format` to ensure consistent formatting 137 | 4. Run `npm test` to run all validations 138 | 5. Test manually with `npm run dev` or via Claude Desktop integration -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MCP Fetch 2 | 3 | Model Context Protocol server for fetching web content and processing images. This allows Claude Desktop (or any MCP client) to fetch web content and handle images appropriately. 4 | 5 | @kazuph/mcp-fetch MCP server 6 | 7 | ## Quick Start (For Users) 8 | 9 | To use this tool with Claude Desktop, simply add the following to your Claude Desktop configuration (`~/Library/Application Support/Claude/claude_desktop_config.json`): 10 | 11 | ```json 12 | { 13 | "tools": { 14 | "imageFetch": { 15 | "command": "npx", 16 | "args": ["-y", "@kazuph/mcp-fetch"] 17 | } 18 | } 19 | } 20 | ``` 21 | 22 | This will automatically download and run the latest version of the tool when needed. 23 | 24 | ### Required Setup 25 | 26 | 1. Enable Accessibility for Claude: 27 | - Open System Settings 28 | - Go to Privacy & Security > Accessibility 29 | - Click the "+" button 30 | - Add Claude from your Applications folder 31 | - Turn ON the toggle for Claude 32 | 33 | This accessibility setting is required for automated clipboard operations (Cmd+V) to work properly. 34 | 35 | ## Features 36 | 37 | - **Web Content Extraction**: Automatically extracts and formats web content as markdown 38 | - **Article Title Extraction**: Extracts and displays the title of the article 39 | - **Image Processing**: Optional processing of images from web pages with optimization (disabled by default, enable with `enableFetchImages: true`) 40 | - **File Saving**: Images are automatically saved to `~/Downloads/mcp-fetch/YYYY-MM-DD/` directory when processed 41 | - **Dual Output**: Both file saving and optional Base64 encoding for AI display 42 | - **Pagination Support**: Supports pagination for both text and images 43 | - **JPEG Optimization**: Automatically optimizes images as JPEG for better performance 44 | - **GIF Support**: Extracts first frame from animated GIFs 45 | 46 | ## For Developers 47 | 48 | The following sections are for those who want to develop or modify the tool. 49 | 50 | ## Prerequisites 51 | 52 | - Node.js 18+ 53 | - macOS (for clipboard operations) 54 | - Claude Desktop (install from https://claude.ai/desktop) 55 | - tsx (install via `npm install -g tsx`) 56 | 57 | ## Installation 58 | 59 | ```bash 60 | git clone https://github.com/kazuph/mcp-fetch.git 61 | cd mcp-fetch 62 | npm install 63 | npm run build 64 | ``` 65 | 66 | ## Image Processing Specifications 67 | 68 | When processing images from web content, the following optimizations are applied: 69 | 70 | - Images are converted to JPEG format with quality control 71 | - Maximum width limited to 1200px by default 72 | - Maximum height limited to 1600px by default 73 | - Chroma subsampling (4:2:0) for better compression 74 | - MozJPEG optimization for smaller file sizes 75 | 76 | ## Configuration 77 | 78 | 1. Make sure Claude Desktop is installed and running. 79 | 80 | 2. Install tsx globally if you haven't: 81 | ```bash 82 | npm install -g tsx 83 | # or 84 | pnpm add -g tsx 85 | ``` 86 | 87 | 3. Modify your Claude Desktop config located at: 88 | `~/Library/Application Support/Claude/claude_desktop_config.json` 89 | 90 | You can easily find this through the Claude Desktop menu: 91 | 1. Open Claude Desktop 92 | 2. Click Claude on the Mac menu bar 93 | 3. Click "Settings" 94 | 4. Click "Developer" 95 | 96 | Add the following to your MCP client's configuration: 97 | 98 | ```json 99 | { 100 | "tools": { 101 | "imageFetch": { 102 | "args": ["tsx", "/path/to/mcp-fetch/index.ts"] 103 | } 104 | } 105 | } 106 | ``` 107 | 108 | ## Available Parameters 109 | 110 | - `url` (required): The URL to fetch 111 | - `maxLength` (default: 20000): Maximum length of content to return 112 | - `startIndex` (default: 0): Starting position in content 113 | - `imageStartIndex` (default: 0): Starting position for image collection 114 | - `raw` (default: false): Return raw content instead of processed markdown 115 | - `imageMaxCount` (default: 3): Maximum number of images to process per request 116 | - `imageMaxHeight` (default: 4000): Maximum height of merged image 117 | - `imageMaxWidth` (default: 1000): Maximum width of merged image 118 | - `imageQuality` (default: 80): JPEG quality (1-100) 119 | - `enableFetchImages` (default: false): Enable fetching and processing of images. FOR CLAUDE USERS ONLY: YOU ARE ALLOWED TO ENABLE THIS FEATURE!!! CLAUDE CAN FETCH AND PROCESS IMAGES!!! ENJOY THIS CAPABILITY!!!!! 120 | - `allowCrossOriginImages` (default: true): When true, also fetch images hosted on different origins from the page 121 | - `saveImages` (default: true): Save processed images to local files 122 | - `returnBase64` (default: false): Return base64 encoded images for AI display 123 | - `ignoreRobotsTxt` (default: false): Ignore robots.txt restrictions 124 | 125 | ### Security Hardening (v1.5.1) 126 | 127 | - Only `http://` and `https://` URLs are allowed for page and image fetches 128 | - Blocks private/loopback/link-local IPs and local hostnames (e.g., `localhost`, `.local`) 129 | - Manual redirect handling with validation (max 3 hops) 130 | - Request timeouts (default 12s, configurable via `MCP_FETCH_TIMEOUT_MS`) 131 | - Response size limits: HTML up to 2MB, images up to 10MB (tunable via env) 132 | 133 | Environment variables: 134 | 135 | - `MCP_FETCH_TIMEOUT_MS` (default: 12000) 136 | - `MCP_FETCH_MAX_REDIRECTS` (default: 3) 137 | - `MCP_FETCH_MAX_HTML_BYTES` (default: 2000000) 138 | - `MCP_FETCH_MAX_IMAGE_BYTES` (default: 10000000) 139 | 140 | ## Examples 141 | 142 | ### Basic Content Fetching (No Images) 143 | ```json 144 | { 145 | "url": "https://example.com" 146 | } 147 | ``` 148 | 149 | ### Fetching with Images (File Saving Only) 150 | ```json 151 | { 152 | "url": "https://example.com", 153 | "enableFetchImages": true, 154 | "imageMaxCount": 3 155 | } 156 | ``` 157 | 158 | ### Fetching with Images for AI Display 159 | ```json 160 | { 161 | "url": "https://example.com", 162 | "enableFetchImages": true, 163 | "returnBase64": true, 164 | "imageMaxCount": 3 165 | } 166 | ``` 167 | 168 | ### Paginating Through Images 169 | ```json 170 | { 171 | "url": "https://example.com", 172 | "enableFetchImages": true, 173 | "imageStartIndex": 3, 174 | "imageMaxCount": 3 175 | } 176 | ``` 177 | 178 | ## Notes 179 | 180 | - This tool is designed for macOS only due to its dependency on macOS-specific clipboard operations. 181 | - Images are processed using Sharp for optimal performance and quality. 182 | - When multiple images are found, they are merged vertically with consideration for size limits. 183 | - Animated GIFs are automatically handled by extracting their first frame. 184 | - **File Saving**: Images are automatically saved to `~/Downloads/mcp-fetch/YYYY-MM-DD/` with filename format `hostname_HHMMSS_index.jpg` 185 | - **Tool Name**: The tool name has been changed from `fetch` to `imageFetch` to avoid conflicts with native fetch functions. 186 | 187 | ## Changelog 188 | 189 | ### v1.2.0 190 | - **BREAKING CHANGE**: Tool name changed from `fetch` to `imageFetch` to avoid conflicts 191 | - **NEW**: Automatic file saving - Images are now saved to `~/Downloads/mcp-fetch/YYYY-MM-DD/` by default 192 | - **NEW**: Added `saveImages` parameter (default: true) to control file saving 193 | - **NEW**: Added `returnBase64` parameter (default: false) for AI image display 194 | - **BEHAVIOR CHANGE**: Default behavior now saves files instead of only returning base64 195 | - Improved AI assistant integration with clear instructions for base64 option 196 | - Enhanced file organization with date-based directories and structured naming 197 | 198 | ### v1.1.3 199 | - Changed default behavior: Images are not fetched by default (`enableFetchImages: false`) 200 | - Removed `disableImages` in favor of `enableFetchImages` parameter 201 | 202 | ### v1.1.0 203 | - Added article title extraction feature 204 | - Improved response formatting to include article titles 205 | - Fixed type issues with MCP response content 206 | 207 | ### v1.0.0 208 | - Initial release 209 | - Web content extraction 210 | - Image processing and optimization 211 | - Pagination support 212 | -------------------------------------------------------------------------------- /index.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | import dns from "node:dns"; 4 | import { promises as fs } from "node:fs"; 5 | import net from "node:net"; 6 | import path from "node:path"; 7 | import type { Readable } from "node:stream"; 8 | import { URL } from "node:url"; 9 | import { Server } from "@modelcontextprotocol/sdk/server/index.js"; 10 | import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; 11 | import { Readability } from "@mozilla/readability"; 12 | import { JSDOM } from "jsdom"; 13 | import type { RequestInit } from "node-fetch"; 14 | import fetch, { type Response as FetchResponse } from "node-fetch"; 15 | import robotsParser from "robots-parser"; 16 | import sharp from "sharp"; 17 | import TurndownService from "turndown"; 18 | import { z } from "zod"; 19 | import { zodToJsonSchema } from "zod-to-json-schema"; 20 | 21 | interface Image { 22 | src: string; 23 | alt: string; 24 | data?: Buffer; 25 | filename?: string; 26 | } 27 | 28 | interface ExtractedContent { 29 | markdown: string; 30 | images: Image[]; 31 | title?: string; 32 | } 33 | 34 | interface ImageResource { 35 | uri: string; 36 | name: string; 37 | description: string; 38 | mimeType: string; 39 | filePath: string; 40 | } 41 | 42 | // Global resource registry for images 43 | const imageResources = new Map(); 44 | 45 | // Server instance to send notifications 46 | let serverInstance: Server; 47 | let serverConnected = false; 48 | 49 | // -------------------- 50 | // Security hardening 51 | // -------------------- 52 | // Defaults (can be overridden by env vars) 53 | const FETCH_TIMEOUT_MS = Number(process.env.MCP_FETCH_TIMEOUT_MS || 12000); 54 | const MAX_REDIRECTS = Number(process.env.MCP_FETCH_MAX_REDIRECTS || 3); 55 | const MAX_HTML_BYTES = Number( 56 | process.env.MCP_FETCH_MAX_HTML_BYTES || 2_000_000 57 | ); // 2MB 58 | const MAX_IMAGE_BYTES = Number( 59 | process.env.MCP_FETCH_MAX_IMAGE_BYTES || 10_000_000 60 | ); // 10MB 61 | const DISABLE_SSRF_GUARD = process.env.MCP_FETCH_DISABLE_SSRF_GUARD === "1"; 62 | 63 | function isPrivateIPv4(ip: string): boolean { 64 | const parts = ip.split(".").map((v) => Number(v)); 65 | if ( 66 | parts.length !== 4 || 67 | parts.some((n) => Number.isNaN(n) || n < 0 || n > 255) 68 | ) 69 | return false; 70 | const [a, b] = parts; 71 | if (a === 10) return true; // 10.0.0.0/8 72 | if (a === 172 && b >= 16 && b <= 31) return true; // 172.16.0.0/12 73 | if (a === 192 && b === 168) return true; // 192.168.0.0/16 74 | if (a === 127) return true; // loopback 75 | if (a === 169 && b === 254) return true; // link-local 76 | if (a === 0) return true; // non-routable 77 | if (a >= 224 && a <= 239) return true; // multicast 78 | if (a >= 240) return true; // reserved 79 | return false; 80 | } 81 | 82 | function isPrivateIPv6(ip: string): boolean { 83 | const lower = ip.toLowerCase(); 84 | return ( 85 | lower === "::" || 86 | lower === "::1" || 87 | lower.startsWith("fe80:") || // link-local 88 | lower.startsWith("fc") || // fc00::/7 (fc/fd) 89 | lower.startsWith("fd") || 90 | lower.startsWith("ff") // multicast 91 | ); 92 | } 93 | 94 | function isNodeErrorWithCode(error: unknown): error is NodeJS.ErrnoException { 95 | return ( 96 | error instanceof Error && 97 | typeof (error as NodeJS.ErrnoException).code === "string" 98 | ); 99 | } 100 | 101 | async function resolveAllIps(hostname: string): Promise { 102 | try { 103 | const records = await dns.promises.lookup(hostname, { 104 | all: true, 105 | verbatim: true, 106 | }); 107 | return records.map((r) => r.address); 108 | } catch { 109 | return []; 110 | } 111 | } 112 | 113 | async function isSafeUrl( 114 | input: string 115 | ): Promise<{ ok: true; url: URL } | { ok: false; reason: string }> { 116 | let u: URL; 117 | try { 118 | u = new URL(input); 119 | } catch { 120 | return { ok: false, reason: "Invalid URL" }; 121 | } 122 | if (!(u.protocol === "http:" || u.protocol === "https:")) { 123 | return { ok: false, reason: "Only http/https schemes are allowed" }; 124 | } 125 | if (DISABLE_SSRF_GUARD) { 126 | return { ok: true, url: u }; 127 | } 128 | const hostname = u.hostname; 129 | if (!hostname) return { ok: false, reason: "Missing hostname" }; 130 | const isIp = net.isIP(hostname) !== 0; 131 | if (isIp) { 132 | if (net.isIP(hostname) === 4 && isPrivateIPv4(hostname)) { 133 | return { ok: false, reason: "IPv4 address is private/reserved" }; 134 | } 135 | if (net.isIP(hostname) === 6 && isPrivateIPv6(hostname)) { 136 | return { ok: false, reason: "IPv6 address is private/reserved" }; 137 | } 138 | } else { 139 | const lower = hostname.toLowerCase(); 140 | if ( 141 | lower === "localhost" || 142 | lower.endsWith(".localhost") || 143 | lower.endsWith(".local") 144 | ) { 145 | return { ok: false, reason: "Local hostnames are not allowed" }; 146 | } 147 | const ips = await resolveAllIps(hostname); 148 | for (const ip of ips) { 149 | if ( 150 | (net.isIP(ip) === 4 && isPrivateIPv4(ip)) || 151 | (net.isIP(ip) === 6 && isPrivateIPv6(ip)) 152 | ) { 153 | return { 154 | ok: false, 155 | reason: "Hostname resolves to private/reserved address", 156 | }; 157 | } 158 | } 159 | } 160 | return { ok: true, url: u }; 161 | } 162 | 163 | function withTimeout( 164 | p: Promise, 165 | ms: number, 166 | label = "request" 167 | ): Promise { 168 | if (!ms || ms <= 0) return p; 169 | return new Promise((resolve, reject) => { 170 | const t = setTimeout( 171 | () => reject(new Error(`${label} timed out after ${ms}ms`)), 172 | ms 173 | ); 174 | p.then( 175 | (v) => { 176 | clearTimeout(t); 177 | resolve(v); 178 | }, 179 | (e) => { 180 | clearTimeout(t); 181 | reject(e); 182 | } 183 | ); 184 | }); 185 | } 186 | 187 | async function safeFollowFetch( 188 | inputUrl: string, 189 | init: RequestInit = {}, 190 | opts: { maxRedirects?: number; timeoutMs?: number } = {} 191 | ): Promise<{ response: FetchResponse; finalUrl: string }> { 192 | const maxRedirects = opts.maxRedirects ?? MAX_REDIRECTS; 193 | const timeoutMs = opts.timeoutMs ?? FETCH_TIMEOUT_MS; 194 | 195 | let current = inputUrl; 196 | for (let i = 0; i <= maxRedirects; i++) { 197 | const safe = await isSafeUrl(current); 198 | if (!safe.ok) throw new Error(`Blocked URL: ${safe.reason}`); 199 | const controller = new AbortController(); 200 | const timer = setTimeout(() => controller.abort(), timeoutMs); 201 | try { 202 | const reqInit: RequestInit = { 203 | ...(init || {}), 204 | redirect: "manual", 205 | signal: controller.signal, 206 | }; 207 | const resp: FetchResponse = await fetch(current, reqInit); 208 | clearTimeout(timer); 209 | if ([301, 302, 303, 307, 308].includes(resp.status)) { 210 | const loc = resp.headers.get("location"); 211 | if (!loc) 212 | throw new Error( 213 | `Redirect status ${resp.status} without Location header` 214 | ); 215 | const next = new URL(loc, current).toString(); 216 | current = next; 217 | continue; 218 | } 219 | return { response: resp, finalUrl: current }; 220 | } catch (e) { 221 | clearTimeout(timer); 222 | throw e; 223 | } 224 | } 225 | throw new Error("Too many redirects"); 226 | } 227 | 228 | async function readTextLimited( 229 | resp: FetchResponse, 230 | maxBytes: number 231 | ): Promise<{ text: string; contentType: string }> { 232 | const ct = resp.headers.get("content-type") || ""; 233 | const cl = resp.headers.get("content-length"); 234 | if (cl && Number(cl) > maxBytes) { 235 | throw new Error(`Response too large (${cl} bytes > ${maxBytes})`); 236 | } 237 | const body = resp.body as Readable | null; 238 | if (!body || typeof body.on !== "function") { 239 | const text = await withTimeout(resp.text(), FETCH_TIMEOUT_MS, "read text"); 240 | return { text, contentType: ct }; 241 | } 242 | let size = 0; 243 | const chunks: Buffer[] = []; 244 | await new Promise((resolve, reject) => { 245 | body.on("data", (chunk: Buffer) => { 246 | size += chunk.length; 247 | if (size > maxBytes) { 248 | body.destroy(); 249 | reject(new Error(`Response exceeded limit (${maxBytes} bytes)`)); 250 | return; 251 | } 252 | chunks.push(chunk); 253 | }); 254 | body.on("end", () => resolve()); 255 | body.on("error", (err: Error) => reject(err)); 256 | }); 257 | return { text: Buffer.concat(chunks).toString("utf8"), contentType: ct }; 258 | } 259 | 260 | async function readBufferLimited( 261 | resp: FetchResponse, 262 | maxBytes: number 263 | ): Promise { 264 | const cl = resp.headers.get("content-length"); 265 | if (cl && Number(cl) > maxBytes) { 266 | throw new Error(`Response too large (${cl} bytes > ${maxBytes})`); 267 | } 268 | const body = resp.body as Readable | null; 269 | if (!body || typeof body.on !== "function") { 270 | const ab = await withTimeout( 271 | resp.arrayBuffer(), 272 | FETCH_TIMEOUT_MS, 273 | "read buffer" 274 | ); 275 | const buf = Buffer.from(ab); 276 | if (buf.length > maxBytes) 277 | throw new Error(`Response exceeded limit (${maxBytes} bytes)`); 278 | return buf; 279 | } 280 | let size = 0; 281 | const chunks: Buffer[] = []; 282 | await new Promise((resolve, reject) => { 283 | body.on("data", (chunk: Buffer) => { 284 | size += chunk.length; 285 | if (size > maxBytes) { 286 | body.destroy(); 287 | reject(new Error(`Response exceeded limit (${maxBytes} bytes)`)); 288 | return; 289 | } 290 | chunks.push(chunk); 291 | }); 292 | body.on("end", () => resolve()); 293 | body.on("error", (err: Error) => reject(err)); 294 | }); 295 | return Buffer.concat(chunks); 296 | } 297 | 298 | /** 299 | * リソースリストが変更されたことをクライアントに通知 300 | */ 301 | async function notifyResourcesChanged(): Promise { 302 | if (!serverInstance || !serverConnected) return; 303 | try { 304 | await serverInstance.sendResourceListChanged(); 305 | } catch (error) { 306 | // When not connected to an MCP client, avoid noisy warnings in CI/tests 307 | if (serverConnected) { 308 | console.warn("Failed to notify resource list changed:", error); 309 | } 310 | } 311 | } 312 | 313 | /** 314 | * 既存のダウンロードファイルをスキャンしてリソースとして登録 315 | */ 316 | async function scanAndRegisterExistingFiles(): Promise { 317 | const homeDir = process.env.HOME || process.env.USERPROFILE || ""; 318 | const baseDir = path.join(homeDir, "Downloads", "mcp-fetch"); 319 | 320 | try { 321 | // 日付ディレクトリをスキャン 322 | const dateDirs = await fs.readdir(baseDir); 323 | 324 | for (const dateDir of dateDirs) { 325 | if (dateDir.startsWith(".")) continue; // .DS_Store などをスキップ 326 | 327 | const datePath = path.join(baseDir, dateDir); 328 | const stats = await fs.stat(datePath); 329 | 330 | if (!stats.isDirectory()) continue; 331 | 332 | try { 333 | // 日付ディレクトリ直下のファイルをチェック 334 | const files = await fs.readdir(datePath); 335 | 336 | for (const file of files) { 337 | if (!file.toLowerCase().endsWith(".jpg")) continue; 338 | 339 | const filePath = path.join(datePath, file); 340 | const fileStats = await fs.stat(filePath); 341 | 342 | if (!fileStats.isFile()) continue; 343 | 344 | // リソースURIを生成 (file:// scheme) 345 | const resourceUri = `file://${filePath}`; 346 | 347 | // ファイル名から情報を抽出 348 | const baseName = path.basename(file, ".jpg"); 349 | const isIndividual = file.includes("individual"); 350 | 351 | const resourceName = `${dateDir}/${baseName}`; 352 | const description = `${isIndividual ? "Individual" : "Merged"} image from ${dateDir}`; 353 | 354 | const resource: ImageResource = { 355 | uri: resourceUri, 356 | name: resourceName, 357 | description, 358 | mimeType: "image/jpeg", 359 | filePath, 360 | }; 361 | 362 | imageResources.set(resourceUri, resource); 363 | } 364 | 365 | // サブディレクトリもチェック (individual/merged が存在する場合) 366 | const subDirs = ["individual", "merged"]; 367 | 368 | for (const subDir of subDirs) { 369 | const subDirPath = path.join(datePath, subDir); 370 | 371 | try { 372 | const subFiles = await fs.readdir(subDirPath); 373 | 374 | for (const file of subFiles) { 375 | if (!file.toLowerCase().endsWith(".jpg")) continue; 376 | 377 | const filePath = path.join(subDirPath, file); 378 | const fileStats = await fs.stat(filePath); 379 | 380 | if (!fileStats.isFile()) continue; 381 | 382 | // リソースURIを生成 (file:// scheme) 383 | const resourceUri = `file://${filePath}`; 384 | 385 | // ファイル名から情報を抽出 386 | const baseName = path.basename(file, ".jpg"); 387 | const resourceName = `${dateDir}/${subDir}/${baseName}`; 388 | const description = `${subDir === "individual" ? "Individual" : "Merged"} image from ${dateDir}`; 389 | 390 | const resource: ImageResource = { 391 | uri: resourceUri, 392 | name: resourceName, 393 | description, 394 | mimeType: "image/jpeg", 395 | filePath, 396 | }; 397 | 398 | imageResources.set(resourceUri, resource); 399 | } 400 | } catch (_error) { 401 | // サブディレクトリが存在しない場合はスキップ 402 | } 403 | } 404 | } catch (error) { 405 | console.warn(`Failed to scan directory ${datePath}:`, error); 406 | } 407 | } 408 | 409 | console.error(`Registered ${imageResources.size} existing image resources`); 410 | } catch (error) { 411 | if (isNodeErrorWithCode(error) && error.code === "ENOENT") { 412 | // No downloads directory yet; nothing to register on startup 413 | return; 414 | } 415 | console.warn("Failed to scan existing downloads:", error); 416 | } 417 | } 418 | 419 | const DEFAULT_USER_AGENT_AUTONOMOUS = 420 | "ModelContextProtocol/1.0 (Autonomous; +https://github.com/modelcontextprotocol/servers)"; 421 | // const DEFAULT_USER_AGENT_MANUAL = 422 | // "ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotocol/servers)"; 423 | 424 | /** 425 | * URLから元のファイル名を抽出 426 | */ 427 | function extractFilenameFromUrl(url: string): string { 428 | try { 429 | const urlObj = new URL(url); 430 | const pathname = urlObj.pathname; 431 | const filename = path.basename(pathname); 432 | 433 | // ファイル名が空の場合や拡張子がない場合のデフォルト処理 434 | if (!filename || !filename.includes(".")) { 435 | return "image.jpg"; 436 | } 437 | 438 | return filename; 439 | } catch { 440 | return "image.jpg"; 441 | } 442 | } 443 | 444 | // New structured API (optional) 445 | const NewImagesSchema = z 446 | .union([ 447 | z.boolean(), 448 | z.object({ 449 | output: z.enum(["base64", "file", "both"]).optional(), 450 | layout: z.enum(["merged", "individual", "both"]).optional(), 451 | maxCount: z.number().int().min(0).max(10).optional(), 452 | startIndex: z.number().int().min(0).optional(), 453 | size: z 454 | .object({ 455 | maxWidth: z.number().int().min(100).max(10000).optional(), 456 | maxHeight: z.number().int().min(100).max(10000).optional(), 457 | quality: z.number().int().min(1).max(100).optional(), 458 | }) 459 | .optional(), 460 | originPolicy: z.enum(["cross-origin", "same-origin"]).optional(), 461 | saveDir: z.string().optional(), 462 | }), 463 | ]) 464 | .optional(); 465 | 466 | const NewTextSchema = z 467 | .object({ 468 | maxLength: z.number().int().positive().max(1000000).optional(), 469 | startIndex: z.number().int().min(0).optional(), 470 | raw: z.boolean().optional(), 471 | }) 472 | .optional(); 473 | 474 | const NewSecuritySchema = z 475 | .object({ 476 | ignoreRobotsTxt: z.boolean().optional(), 477 | }) 478 | .optional(); 479 | 480 | const FetchArgsSchema = z.object({ 481 | url: z 482 | .string() 483 | .url() 484 | .refine( 485 | (val) => { 486 | try { 487 | const u = new URL(val); 488 | return u.protocol === "http:" || u.protocol === "https:"; 489 | } catch { 490 | return false; 491 | } 492 | }, 493 | { message: "Only http/https URLs are allowed" } 494 | ), 495 | // legacy flat params (kept for backward compatibility) 496 | maxLength: z 497 | .union([z.number(), z.string()]) 498 | .transform((val) => Number(val)) 499 | .pipe(z.number().positive().max(1000000)) 500 | .default(20000), 501 | startIndex: z 502 | .union([z.number(), z.string()]) 503 | .transform((val) => Number(val)) 504 | .pipe(z.number().min(0)) 505 | .default(0), 506 | imageStartIndex: z 507 | .union([z.number(), z.string()]) 508 | .transform((val) => Number(val)) 509 | .pipe(z.number().min(0)) 510 | .default(0), 511 | raw: z 512 | .union([z.boolean(), z.string()]) 513 | .transform((val) => 514 | typeof val === "string" ? val.toLowerCase() === "true" : val 515 | ) 516 | .default(false), 517 | imageMaxCount: z 518 | .union([z.number(), z.string()]) 519 | .transform((val) => Number(val)) 520 | .pipe(z.number().min(0).max(10)) 521 | .default(3), 522 | imageMaxHeight: z 523 | .union([z.number(), z.string()]) 524 | .transform((val) => Number(val)) 525 | .pipe(z.number().min(100).max(10000)) 526 | .default(4000), 527 | imageMaxWidth: z 528 | .union([z.number(), z.string()]) 529 | .transform((val) => Number(val)) 530 | .pipe(z.number().min(100).max(10000)) 531 | .default(1000), 532 | imageQuality: z 533 | .union([z.number(), z.string()]) 534 | .transform((val) => Number(val)) 535 | .pipe(z.number().min(1).max(100)) 536 | .default(80), 537 | enableFetchImages: z 538 | .union([z.boolean(), z.string()]) 539 | .transform((val) => 540 | typeof val === "string" ? val.toLowerCase() === "true" : val 541 | ) 542 | .default(false), 543 | allowCrossOriginImages: z 544 | .union([z.boolean(), z.string()]) 545 | .transform((val) => 546 | typeof val === "string" ? val.toLowerCase() === "true" : val 547 | ) 548 | .default(true), 549 | ignoreRobotsTxt: z 550 | .union([z.boolean(), z.string()]) 551 | .transform((val) => 552 | typeof val === "string" ? val.toLowerCase() === "true" : val 553 | ) 554 | .default(false), 555 | saveImages: z 556 | .union([z.boolean(), z.string()]) 557 | .transform((val) => 558 | typeof val === "string" ? val.toLowerCase() === "true" : val 559 | ) 560 | .default(true), 561 | returnBase64: z 562 | .union([z.boolean(), z.string()]) 563 | .transform((val) => 564 | typeof val === "string" ? val.toLowerCase() === "true" : val 565 | ) 566 | .default(false), 567 | // new structured params (optional) 568 | images: NewImagesSchema, 569 | text: NewTextSchema, 570 | security: NewSecuritySchema, 571 | }); 572 | 573 | const ListToolsSchema = z.object({ 574 | method: z.literal("tools/list"), 575 | }); 576 | 577 | const CallToolSchema = z.object({ 578 | method: z.literal("tools/call"), 579 | params: z.object({ 580 | name: z.string(), 581 | arguments: z.record(z.unknown()).optional(), 582 | }), 583 | }); 584 | 585 | function extractContentFromHtml( 586 | html: string, 587 | url: string 588 | ): ExtractedContent | string { 589 | const dom = new JSDOM(html, { url }); 590 | const reader = new Readability(dom.window.document); 591 | const article = reader.parse(); 592 | 593 | if (!article || !article.content) { 594 | return "Page failed to be simplified from HTML"; 595 | } 596 | 597 | // Extract images from the article content only 598 | const articleDom = new JSDOM(article.content); 599 | const imgElements = Array.from( 600 | articleDom.window.document.querySelectorAll("img") 601 | ); 602 | 603 | const images: Image[] = imgElements.map((img) => { 604 | const src = img.src; 605 | const alt = img.alt || ""; 606 | const filename = extractFilenameFromUrl(src); 607 | return { src, alt, filename }; 608 | }); 609 | 610 | const turndownService = new TurndownService({ 611 | headingStyle: "atx", 612 | codeBlockStyle: "fenced", 613 | }); 614 | const markdown = turndownService.turndown(article.content); 615 | 616 | return { markdown, images, title: article.title ?? undefined }; 617 | } 618 | 619 | async function fetchImages( 620 | images: Image[], 621 | baseOrigin: string, 622 | allowCrossOrigin: boolean 623 | ): Promise<(Image & { data: Buffer })[]> { 624 | const fetchedImages = []; 625 | for (const img of images) { 626 | try { 627 | const safe = await isSafeUrl(img.src); 628 | if (!safe.ok) continue; 629 | const srcOrigin = new URL(img.src).origin; 630 | if (!allowCrossOrigin && srcOrigin !== baseOrigin) continue; 631 | const { response } = await safeFollowFetch( 632 | img.src, 633 | {}, 634 | { timeoutMs: FETCH_TIMEOUT_MS } 635 | ); 636 | const imageBuffer = await readBufferLimited(response, MAX_IMAGE_BYTES); 637 | 638 | // GIF画像の場合は最初のフレームのみ抽出 639 | if (img.src.toLowerCase().endsWith(".gif")) { 640 | // GIF処理のロジック 641 | } 642 | 643 | fetchedImages.push({ 644 | ...img, 645 | data: imageBuffer, 646 | }); 647 | } catch (error) { 648 | console.warn(`Failed to process image ${img.src}:`, error); 649 | } 650 | } 651 | return fetchedImages; 652 | } 653 | 654 | /** 655 | * 複数の画像を垂直方向に結合して1つの画像として返す 656 | */ 657 | async function mergeImagesVertically( 658 | images: Buffer[], 659 | maxWidth: number, 660 | maxHeight: number, 661 | quality: number 662 | ): Promise { 663 | if (images.length === 0) { 664 | throw new Error("No images to merge"); 665 | } 666 | 667 | // 各画像のメタデータを取得 668 | const imageMetas = await Promise.all( 669 | images.map(async (buffer) => { 670 | const metadata = await sharp(buffer).metadata(); 671 | return { 672 | width: metadata.width || 0, 673 | height: metadata.height || 0, 674 | buffer, 675 | }; 676 | }) 677 | ); 678 | 679 | // 最大幅を計算 680 | const width = Math.min( 681 | maxWidth, 682 | Math.max(...imageMetas.map((meta) => meta.width)) 683 | ); 684 | 685 | // 画像の高さを合計 686 | const totalHeight = Math.min( 687 | maxHeight, 688 | imageMetas.reduce((sum, meta) => sum + meta.height, 0) 689 | ); 690 | 691 | // 新しい画像を作成 692 | const composite = sharp({ 693 | create: { 694 | width, 695 | height: totalHeight, 696 | channels: 4, 697 | background: { r: 255, g: 255, b: 255, alpha: 1 }, 698 | }, 699 | }); 700 | 701 | // 各画像を配置 702 | let currentY = 0; 703 | const overlays = []; 704 | 705 | for (const meta of imageMetas) { 706 | // 画像がキャンバスの高さを超えないようにする 707 | if (currentY >= maxHeight) break; 708 | 709 | // 画像のリサイズ(必要な場合のみ) 710 | let processedImage = sharp(meta.buffer); 711 | if (meta.width > width) { 712 | processedImage = processedImage.resize(width); 713 | } 714 | 715 | const resizedBuffer = await processedImage.toBuffer(); 716 | const resizedMeta = await sharp(resizedBuffer).metadata(); 717 | 718 | overlays.push({ 719 | input: resizedBuffer, 720 | top: currentY, 721 | left: 0, 722 | }); 723 | 724 | currentY += resizedMeta.height || 0; 725 | } 726 | 727 | // 品質を指定して出力(PNGの代わりにJPEGを使用) 728 | return composite 729 | .composite(overlays) 730 | .jpeg({ 731 | quality, // JPEG品質を指定(1-100) 732 | mozjpeg: true, // mozjpegを使用して更に最適化 733 | }) 734 | .toBuffer(); 735 | } 736 | 737 | // removed unused getImageDimensions helper to satisfy linter 738 | 739 | /** 740 | * 画像を日付ベースのディレクトリに保存し、ファイルパスを返す 741 | */ 742 | async function saveImageToFile( 743 | imageBuffer: Buffer, 744 | sourceUrl: string, 745 | imageIndex: number = 0 746 | ): Promise { 747 | // 現在の日付をYYYY-MM-DD形式で取得 748 | const now = new Date(); 749 | const dateStr = now.toISOString().split("T")[0]; 750 | 751 | // 保存先ディレクトリ: ~/Downloads/mcp-fetch/YYYY-MM-DD/merged/ 752 | const homeDir = process.env.HOME || process.env.USERPROFILE || ""; 753 | const baseDir = path.join( 754 | homeDir, 755 | "Downloads", 756 | "mcp-fetch", 757 | dateStr, 758 | "merged" 759 | ); 760 | 761 | // ディレクトリが存在しない場合は作成 762 | await fs.mkdir(baseDir, { recursive: true }); 763 | 764 | // ファイル名を生成(URLのホスト名 + タイムスタンプ + インデックス) 765 | const urlObj = new URL(sourceUrl); 766 | const hostname = urlObj.hostname.replace(/[^a-zA-Z0-9]/g, "_"); 767 | const timestamp = now 768 | .toISOString() 769 | .replace(/[:.]/g, "-") 770 | .split("T")[1] 771 | .split(".")[0]; 772 | const filename = `${hostname}_${timestamp}_${imageIndex}.jpg`; 773 | 774 | const filePath = path.join(baseDir, filename); 775 | 776 | // ファイルに保存 777 | await fs.writeFile(filePath, imageBuffer); 778 | 779 | // リソースとして登録 780 | const resourceUri = `file://${filePath}`; 781 | const resourceName = `${dateStr}/merged/${filename}`; 782 | const description = `Merged image from ${sourceUrl} saved on ${dateStr}`; 783 | 784 | const resource: ImageResource = { 785 | uri: resourceUri, 786 | name: resourceName, 787 | description, 788 | mimeType: "image/jpeg", 789 | filePath, 790 | }; 791 | 792 | imageResources.set(resourceUri, resource); 793 | 794 | // クライアントにリソース変更を通知 795 | await notifyResourcesChanged(); 796 | 797 | return filePath; 798 | } 799 | 800 | /** 801 | * 個別画像を保存してリソースとして登録 802 | */ 803 | async function saveIndividualImageAndRegisterResource( 804 | imageBuffer: Buffer, 805 | sourceUrl: string, 806 | imageIndex: number, 807 | altText: string = "", 808 | originalFilename: string = "image.jpg" 809 | ): Promise { 810 | // 現在の日付をYYYY-MM-DD形式で取得 811 | const now = new Date(); 812 | const dateStr = now.toISOString().split("T")[0]; 813 | 814 | // 保存先ディレクトリ: ~/Downloads/mcp-fetch/YYYY-MM-DD/individual/ 815 | const homeDir = process.env.HOME || process.env.USERPROFILE || ""; 816 | const baseDir = path.join( 817 | homeDir, 818 | "Downloads", 819 | "mcp-fetch", 820 | dateStr, 821 | "individual" 822 | ); 823 | 824 | // ディレクトリが存在しない場合は作成 825 | await fs.mkdir(baseDir, { recursive: true }); 826 | 827 | // 元のファイル名を使用してユニークファイル名を生成 828 | const ext = path.extname(originalFilename); 829 | const baseName = path.basename(originalFilename, ext); 830 | const safeBaseName = baseName.replace(/[^a-zA-Z0-9\-_]/g, "_"); 831 | const filename = `${imageIndex}_${safeBaseName}${ext || ".jpg"}`; 832 | 833 | const filePath = path.join(baseDir, filename); 834 | 835 | // ファイルに保存 836 | await fs.writeFile(filePath, imageBuffer); 837 | 838 | // リソースとして登録 839 | const resourceUri = `file://${filePath}`; 840 | const resourceName = `${safeBaseName}_${imageIndex}`; 841 | const description = `${originalFilename}${altText ? ` (${altText})` : ""} from ${sourceUrl}`; 842 | 843 | const resource: ImageResource = { 844 | uri: resourceUri, 845 | name: resourceName, 846 | description, 847 | mimeType: "image/jpeg", 848 | filePath, 849 | }; 850 | 851 | imageResources.set(resourceUri, resource); 852 | 853 | // クライアントにリソース変更を通知 854 | await notifyResourcesChanged(); 855 | 856 | return filePath; 857 | } 858 | 859 | async function checkRobotsTxt( 860 | url: string, 861 | userAgent: string 862 | ): Promise { 863 | const { protocol, host } = new URL(url); 864 | const robotsUrl = `${protocol}//${host}/robots.txt`; 865 | 866 | try { 867 | const { response } = await safeFollowFetch( 868 | robotsUrl, 869 | { headers: { "User-Agent": userAgent } }, 870 | { timeoutMs: Math.min(FETCH_TIMEOUT_MS, 8000) } 871 | ); 872 | if (!response.ok) { 873 | if (response.status === 401 || response.status === 403) { 874 | throw new Error( 875 | "Autonomous fetching not allowed based on robots.txt response" 876 | ); 877 | } 878 | return true; // Allow if no robots.txt 879 | } 880 | 881 | const { text: robotsTxt } = await readTextLimited(response, 100_000); 882 | const robots = robotsParser(robotsUrl, robotsTxt); 883 | 884 | if (!robots.isAllowed(url, userAgent)) { 885 | throw new Error( 886 | "The site's robots.txt specifies that autonomous fetching is not allowed. " + 887 | "Try manually fetching the page using the fetch prompt." 888 | ); 889 | } 890 | return true; 891 | } catch (error) { 892 | // ロボットテキストの取得に失敗した場合はアクセスを許可する 893 | if (error instanceof Error && error.message.includes("robots.txt")) { 894 | throw error; 895 | } 896 | return true; 897 | } 898 | } 899 | 900 | interface FetchResult { 901 | content: string; 902 | images: { data: string; mimeType: string; filePath?: string }[]; 903 | remainingContent: number; 904 | remainingImages: number; 905 | title?: string; 906 | } 907 | 908 | async function fetchUrl( 909 | url: string, 910 | userAgent: string, 911 | forceRaw = false, 912 | options = { 913 | imageMaxCount: 3, 914 | imageMaxHeight: 4000, 915 | imageMaxWidth: 1000, 916 | imageQuality: 80, 917 | imageStartIndex: 0, 918 | startIndex: 0, 919 | maxLength: 20000, 920 | enableFetchImages: false, 921 | allowCrossOriginImages: true, 922 | saveImages: true, 923 | returnBase64: false, 924 | } 925 | ): Promise { 926 | const { response, finalUrl } = await safeFollowFetch(url, { 927 | headers: { "User-Agent": userAgent }, 928 | }); 929 | 930 | if (!response.ok) { 931 | throw new Error(`Failed to fetch ${url} - status code ${response.status}`); 932 | } 933 | 934 | const { text, contentType } = await readTextLimited(response, MAX_HTML_BYTES); 935 | const isHtml = 936 | text.toLowerCase().includes(" 0 && 955 | images.length > 0 956 | ) { 957 | try { 958 | const startIdx = options.imageStartIndex; 959 | const baseOrigin = new URL(finalUrl).origin; 960 | let fetchedImages = await fetchImages( 961 | images.slice(startIdx), 962 | baseOrigin, 963 | options.allowCrossOriginImages ?? false 964 | ); 965 | fetchedImages = fetchedImages.slice(0, options.imageMaxCount); 966 | 967 | if (fetchedImages.length > 0) { 968 | const imageBuffers = fetchedImages.map((img) => img.data); 969 | 970 | // 個別画像の保存(新API: layoutがindividual/both かつ outputがfile/both の場合のみ) 971 | type Layout = undefined | "merged" | "individual" | "both"; 972 | type Output = undefined | "base64" | "file" | "both"; 973 | const layout = (options as { layout?: Layout }).layout; 974 | const output = (options as { output?: Output }).output; 975 | const legacyMode = 976 | (options as { output?: Output }).output === undefined && 977 | (options as { layout?: Layout }).layout === undefined; 978 | const shouldSaveIndividual = legacyMode 979 | ? true // 互換性のため、レガシーでは常に保存 980 | : (layout === "individual" || layout === "both") && 981 | (output === "file" || output === "both"); 982 | 983 | if (shouldSaveIndividual) { 984 | for (let i = 0; i < fetchedImages.length; i++) { 985 | try { 986 | const img = fetchedImages[i]; 987 | const optimizedIndividualImage = await sharp(img.data) 988 | .jpeg({ quality: 80, mozjpeg: true }) 989 | .toBuffer(); 990 | await saveIndividualImageAndRegisterResource( 991 | optimizedIndividualImage, 992 | finalUrl, 993 | startIdx + i, 994 | img.alt, 995 | img.filename || "image.jpg" 996 | ); 997 | } catch (error) { 998 | console.warn(`Failed to save individual image ${i}:`, error); 999 | } 1000 | } 1001 | } 1002 | 1003 | const mergedImage = await mergeImagesVertically( 1004 | imageBuffers, 1005 | options.imageMaxWidth, 1006 | options.imageMaxHeight, 1007 | options.imageQuality 1008 | ); 1009 | 1010 | // Base64エンコード前に画像を最適化 1011 | const optimizedImage = await sharp(mergedImage) 1012 | .resize({ 1013 | width: Math.min(options.imageMaxWidth, 1200), // 最大幅を1200pxに制限 1014 | height: Math.min(options.imageMaxHeight, 1600), // 最大高さを1600pxに制限 1015 | fit: "inside", 1016 | withoutEnlargement: true, 1017 | }) 1018 | .jpeg({ 1019 | quality: Math.min(options.imageQuality, 85), // JPEG品質を制限 1020 | mozjpeg: true, 1021 | chromaSubsampling: "4:2:0", // クロマサブサンプリングを使用 1022 | }) 1023 | .toBuffer(); 1024 | 1025 | const base64Image = optimizedImage.toString("base64"); 1026 | 1027 | // ファイル保存機能(新API: outputがfile/both の場合のみ) 1028 | let filePath: string | undefined; 1029 | const shouldSaveMerged = legacyMode 1030 | ? options.saveImages 1031 | : output === "file" || output === "both"; 1032 | if (shouldSaveMerged) { 1033 | try { 1034 | filePath = await saveImageToFile( 1035 | optimizedImage, 1036 | finalUrl, 1037 | options.imageStartIndex 1038 | ); 1039 | if (serverConnected) { 1040 | console.error(`Image saved to: ${filePath}`); 1041 | } else { 1042 | console.log(`Image saved to: ${filePath}`); 1043 | } 1044 | } catch (error) { 1045 | console.warn("Failed to save image to file:", error); 1046 | } 1047 | } 1048 | 1049 | processedImages.push({ 1050 | data: 1051 | (legacyMode && options.returnBase64) || 1052 | (!legacyMode && (output === "base64" || output === "both")) 1053 | ? base64Image 1054 | : "", 1055 | mimeType: "image/jpeg", // MIMEタイプをJPEGに変更 1056 | filePath, 1057 | }); 1058 | } 1059 | } catch (err) { 1060 | console.error("Error processing images:", err); 1061 | } 1062 | } 1063 | 1064 | return { 1065 | content: markdown, 1066 | images: processedImages, 1067 | remainingContent: text.length - (options.startIndex + options.maxLength), 1068 | remainingImages: Math.max( 1069 | 0, 1070 | images.length - (options.imageStartIndex + options.imageMaxCount) 1071 | ), 1072 | title, 1073 | }; 1074 | } 1075 | 1076 | return { 1077 | content: `Content type ${contentType} cannot be simplified to markdown, but here is the raw content:\n${text}`, 1078 | images: [], 1079 | remainingContent: 0, 1080 | remainingImages: 0, 1081 | title: undefined, 1082 | }; 1083 | } 1084 | 1085 | // コマンドライン引数の解析 1086 | const args = process.argv.slice(2); 1087 | const IGNORE_ROBOTS_TXT = args.includes("--ignore-robots-txt"); 1088 | 1089 | // Server setup 1090 | const server = new Server( 1091 | { 1092 | name: "mcp-fetch", 1093 | version: "1.6.2", 1094 | }, 1095 | { 1096 | capabilities: { 1097 | tools: {}, 1098 | resources: { 1099 | subscribe: true, 1100 | listChanged: true, 1101 | }, 1102 | }, 1103 | } 1104 | ); 1105 | 1106 | // Store server instance for notifications 1107 | serverInstance = server; 1108 | 1109 | // コマンドライン引数の情報をログに出力 1110 | console.error( 1111 | `Server started with options: ${IGNORE_ROBOTS_TXT ? "ignore-robots-txt" : "respect-robots-txt"}` 1112 | ); 1113 | 1114 | interface RequestHandlerExtra { 1115 | signal: AbortSignal; 1116 | } 1117 | 1118 | server.setRequestHandler( 1119 | ListToolsSchema, 1120 | async (_request: { method: "tools/list" }, _extra: RequestHandlerExtra) => { 1121 | const tools = [ 1122 | { 1123 | name: "imageFetch", 1124 | description: ` 1125 | 画像取得に強いMCPフェッチツール。記事本文をMarkdown化し、ページ内の画像を抽出・最適化して返します。 1126 | 1127 | 新APIの既定(imagesを指定した場合) 1128 | - 画像: 取得してBASE64で返却(最大3枚を縦結合した1枚JPEG) 1129 | - 保存: しない(オプトイン) 1130 | - クロスオリジン: 許可(CDN想定) 1131 | 1132 | パラメータ(新API) 1133 | - url: 取得先URL(必須) 1134 | - images: true | { output, layout, maxCount, startIndex, size, originPolicy, saveDir } 1135 | - output: "base64" | "file" | "both"(既定: base64) 1136 | - layout: "merged" | "individual" | "both"(既定: merged) 1137 | - maxCount/startIndex(既定: 3 / 0) 1138 | - size: { maxWidth, maxHeight, quality }(既定: 1000/1600/80) 1139 | - originPolicy: "cross-origin" | "same-origin"(既定: cross-origin) 1140 | - text: { maxLength, startIndex, raw }(既定: 20000/0/false) 1141 | - security: { ignoreRobotsTxt }(既定: false) 1142 | 1143 | 旧APIキー(enableFetchImages, returnBase64, saveImages, imageMax*, imageStartIndex 等)は後方互換のため引き続き受け付けます(非推奨)。 1144 | 1145 | Examples(新API) 1146 | { 1147 | "url": "https://example.com", 1148 | "images": true 1149 | } 1150 | 1151 | { 1152 | "url": "https://example.com", 1153 | "images": { "output": "both", "layout": "both", "maxCount": 4 } 1154 | } 1155 | 1156 | Examples(旧API互換) 1157 | { 1158 | "url": "https://example.com", 1159 | "enableFetchImages": true, 1160 | "returnBase64": true, 1161 | "imageMaxCount": 2 1162 | }`, 1163 | inputSchema: zodToJsonSchema(FetchArgsSchema), 1164 | }, 1165 | ]; 1166 | return { tools }; 1167 | } 1168 | ); 1169 | 1170 | // MCPレスポンスの型定義 1171 | type MCPResponseContent = 1172 | | { type: "text"; text: string } 1173 | | { type: "image"; mimeType: string; data: string }; 1174 | 1175 | server.setRequestHandler( 1176 | CallToolSchema, 1177 | async ( 1178 | request: { 1179 | method: "tools/call"; 1180 | params: { name: string; arguments?: Record }; 1181 | }, 1182 | _extra: RequestHandlerExtra 1183 | ) => { 1184 | try { 1185 | const { name, arguments: args } = request.params; 1186 | 1187 | if (name !== "imageFetch") { 1188 | throw new Error(`Unknown tool: ${name}`); 1189 | } 1190 | 1191 | const parsed = FetchArgsSchema.safeParse(args || {}); 1192 | if (!parsed.success) { 1193 | throw new Error(`Invalid arguments: ${parsed.error}`); 1194 | } 1195 | 1196 | const a = parsed.data as Record & { 1197 | url: string; 1198 | images?: unknown; 1199 | text?: { maxLength?: number; startIndex?: number; raw?: boolean }; 1200 | security?: { ignoreRobotsTxt?: boolean }; 1201 | // legacy fields (all optional) 1202 | enableFetchImages?: boolean; 1203 | saveImages?: boolean; 1204 | returnBase64?: boolean; 1205 | imageMaxWidth?: number; 1206 | imageMaxHeight?: number; 1207 | imageQuality?: number; 1208 | imageStartIndex?: number; 1209 | allowCrossOriginImages?: boolean; 1210 | startIndex?: number; 1211 | maxLength?: number; 1212 | raw?: boolean; 1213 | ignoreRobotsTxt?: boolean; 1214 | }; 1215 | 1216 | // Legacy mode detection: no new keys and/or legacy keys present 1217 | const hasNewKeys = 1218 | a.images !== undefined || 1219 | a.text !== undefined || 1220 | a.security !== undefined; 1221 | const hasLegacyKeys = 1222 | a.enableFetchImages !== undefined || 1223 | a.saveImages !== undefined || 1224 | a.returnBase64 !== undefined || 1225 | a.imageMaxWidth !== undefined || 1226 | a.imageMaxHeight !== undefined || 1227 | a.imageQuality !== undefined || 1228 | a.imageStartIndex !== undefined || 1229 | a.allowCrossOriginImages !== undefined || 1230 | a.startIndex !== undefined || 1231 | a.maxLength !== undefined || 1232 | a.raw !== undefined; 1233 | 1234 | const legacyMode = 1235 | (!hasNewKeys && hasLegacyKeys) || (!hasNewKeys && !hasLegacyKeys); 1236 | 1237 | // Build fetch options with backward compatibility 1238 | const fetchOptions: { 1239 | imageMaxCount: number; 1240 | imageMaxHeight: number; 1241 | imageMaxWidth: number; 1242 | imageQuality: number; 1243 | imageStartIndex: number; 1244 | startIndex: number; 1245 | maxLength: number; 1246 | enableFetchImages: boolean; 1247 | allowCrossOriginImages: boolean; 1248 | saveImages: boolean; 1249 | returnBase64: boolean; 1250 | raw?: boolean; 1251 | output?: "base64" | "file" | "both"; 1252 | layout?: "merged" | "individual" | "both"; 1253 | } = { 1254 | imageMaxCount: 3, 1255 | imageMaxHeight: 4000, 1256 | imageMaxWidth: 1000, 1257 | imageQuality: 80, 1258 | imageStartIndex: 0, 1259 | startIndex: 0, 1260 | maxLength: 20000, 1261 | enableFetchImages: false, 1262 | allowCrossOriginImages: true, 1263 | saveImages: false, 1264 | returnBase64: false, 1265 | // new API additions (optional) 1266 | output: undefined, 1267 | layout: undefined, 1268 | }; 1269 | 1270 | if (legacyMode) { 1271 | // Legacy defaults 1272 | fetchOptions.startIndex = 1273 | (a.startIndex as number | undefined) ?? fetchOptions.startIndex; 1274 | fetchOptions.maxLength = 1275 | (a.maxLength as number | undefined) ?? fetchOptions.maxLength; 1276 | fetchOptions.raw = a.raw ?? false; 1277 | fetchOptions.imageMaxCount = 1278 | (a.imageMaxCount as number | undefined) ?? fetchOptions.imageMaxCount; 1279 | fetchOptions.imageMaxHeight = 1280 | (a.imageMaxHeight as number | undefined) ?? 1281 | fetchOptions.imageMaxHeight; 1282 | fetchOptions.imageMaxWidth = 1283 | (a.imageMaxWidth as number | undefined) ?? fetchOptions.imageMaxWidth; 1284 | fetchOptions.imageQuality = 1285 | (a.imageQuality as number | undefined) ?? fetchOptions.imageQuality; 1286 | fetchOptions.imageStartIndex = 1287 | (a.imageStartIndex as number | undefined) ?? 1288 | fetchOptions.imageStartIndex; 1289 | fetchOptions.enableFetchImages = a.enableFetchImages ?? false; 1290 | fetchOptions.allowCrossOriginImages = a.allowCrossOriginImages ?? true; 1291 | fetchOptions.saveImages = a.saveImages ?? true; // keep previous default behavior 1292 | fetchOptions.returnBase64 = a.returnBase64 ?? false; 1293 | // In legacy mode we preserve prior implicit behavior: individual images saved when any saving occurs 1294 | fetchOptions.output = 1295 | fetchOptions.saveImages && fetchOptions.returnBase64 1296 | ? "both" 1297 | : fetchOptions.returnBase64 1298 | ? "base64" 1299 | : fetchOptions.saveImages 1300 | ? "file" 1301 | : undefined; 1302 | fetchOptions.layout = "merged"; // merged remains primary; individual saving handled inside legacy path 1303 | } else { 1304 | // New API mode 1305 | const imagesCfg = a.images; 1306 | const textCfg = a.text || {}; 1307 | const securityCfg = a.security || {}; 1308 | 1309 | fetchOptions.startIndex = textCfg.startIndex ?? fetchOptions.startIndex; 1310 | fetchOptions.maxLength = textCfg.maxLength ?? fetchOptions.maxLength; 1311 | fetchOptions.raw = textCfg.raw ?? false; 1312 | 1313 | // images: true | object | undefined (default true for new API?) 1314 | const imagesEnabled = 1315 | imagesCfg === undefined 1316 | ? false 1317 | : typeof imagesCfg === "boolean" 1318 | ? imagesCfg 1319 | : true; 1320 | fetchOptions.enableFetchImages = imagesEnabled; 1321 | 1322 | if (imagesEnabled) { 1323 | const cfg = ( 1324 | typeof imagesCfg === "object" && imagesCfg !== null 1325 | ? (imagesCfg as any) 1326 | : {} 1327 | ) as { 1328 | output?: "base64" | "file" | "both"; 1329 | layout?: "merged" | "individual" | "both"; 1330 | maxCount?: number; 1331 | startIndex?: number; 1332 | size?: { maxWidth?: number; maxHeight?: number; quality?: number }; 1333 | originPolicy?: "cross-origin" | "same-origin"; 1334 | saveDir?: string; 1335 | }; 1336 | fetchOptions.imageMaxCount = 1337 | cfg.maxCount ?? fetchOptions.imageMaxCount; 1338 | fetchOptions.imageStartIndex = 1339 | cfg.startIndex ?? fetchOptions.imageStartIndex; 1340 | const size = cfg.size || {}; 1341 | fetchOptions.imageMaxWidth = 1342 | size.maxWidth ?? fetchOptions.imageMaxWidth; 1343 | fetchOptions.imageMaxHeight = 1344 | size.maxHeight ?? fetchOptions.imageMaxHeight; 1345 | fetchOptions.imageQuality = size.quality ?? fetchOptions.imageQuality; 1346 | fetchOptions.allowCrossOriginImages = 1347 | (cfg.originPolicy ?? "cross-origin") === "cross-origin"; 1348 | fetchOptions.saveImages = 1349 | (cfg.output ?? "base64") === "file" || 1350 | (cfg.output ?? "base64") === "both"; 1351 | fetchOptions.returnBase64 = 1352 | (cfg.output ?? "base64") === "base64" || 1353 | (cfg.output ?? "base64") === "both"; 1354 | fetchOptions.output = cfg.output ?? "base64"; 1355 | fetchOptions.layout = cfg.layout ?? "merged"; 1356 | // NOTE: saveDir (cfg.saveDir) is respected in save functions when implemented (future) 1357 | } 1358 | // security 1359 | a.ignoreRobotsTxt = securityCfg.ignoreRobotsTxt ?? false; 1360 | } 1361 | 1362 | // robots.txt respect unless ignored 1363 | if (!a.ignoreRobotsTxt && !IGNORE_ROBOTS_TXT) { 1364 | await checkRobotsTxt(a.url, DEFAULT_USER_AGENT_AUTONOMOUS); 1365 | } 1366 | 1367 | const { content, images, remainingContent, remainingImages, title } = 1368 | await fetchUrl( 1369 | a.url, 1370 | DEFAULT_USER_AGENT_AUTONOMOUS, 1371 | fetchOptions.raw ?? false, 1372 | fetchOptions 1373 | ); 1374 | 1375 | let finalContent = content.slice( 1376 | fetchOptions.startIndex, 1377 | fetchOptions.startIndex + fetchOptions.maxLength 1378 | ); 1379 | 1380 | // 残りの情報を追加 1381 | const remainingInfo = []; 1382 | if (remainingContent > 0) { 1383 | remainingInfo.push(`${remainingContent} characters of text remaining`); 1384 | } 1385 | if (remainingImages > 0) { 1386 | remainingInfo.push( 1387 | `${remainingImages} more images available (${fetchOptions.imageStartIndex + images.length}/${fetchOptions.imageStartIndex + images.length + remainingImages} shown)` 1388 | ); 1389 | } 1390 | 1391 | if (remainingInfo.length > 0) { 1392 | finalContent += `\n\nContent truncated. ${remainingInfo.join(", ")}. Call the imageFetch tool with start_index=${ 1393 | fetchOptions.startIndex + fetchOptions.maxLength 1394 | } and/or imageStartIndex=${fetchOptions.imageStartIndex + images.length} to get more content.`; 1395 | } 1396 | 1397 | // MCP レスポンスの作成 1398 | const responseContent: MCPResponseContent[] = [ 1399 | { 1400 | type: "text", 1401 | text: `Contents of ${parsed.data.url}${title ? `: ${title}` : ""}:\n${finalContent}`, 1402 | }, 1403 | ]; 1404 | 1405 | // 画像があれば追加(Base64データが存在する場合のみ) 1406 | for (const image of images) { 1407 | if (image.data) { 1408 | responseContent.push({ 1409 | type: "image", 1410 | mimeType: image.mimeType, 1411 | data: image.data, 1412 | }); 1413 | } 1414 | } 1415 | 1416 | // 保存されたファイルの情報があれば追加 1417 | const savedFiles = images.filter((img) => img.filePath); 1418 | if (savedFiles.length > 0) { 1419 | const fileInfoText = savedFiles 1420 | .map((img, index) => `Image ${index + 1} saved to: ${img.filePath}`) 1421 | .join("\n"); 1422 | 1423 | responseContent.push({ 1424 | type: "text", 1425 | text: `\n📁 Saved Images:\n${fileInfoText}`, 1426 | }); 1427 | } 1428 | 1429 | return { 1430 | content: responseContent, 1431 | }; 1432 | } catch (error) { 1433 | return { 1434 | content: [ 1435 | { 1436 | type: "text", 1437 | text: `Error: ${error instanceof Error ? error.message : String(error)}`, 1438 | }, 1439 | ], 1440 | isError: true, 1441 | }; 1442 | } 1443 | } 1444 | ); 1445 | 1446 | // Resources handlers 1447 | const ListResourcesSchema = z.object({ 1448 | method: z.literal("resources/list"), 1449 | }); 1450 | 1451 | const ReadResourceSchema = z.object({ 1452 | method: z.literal("resources/read"), 1453 | params: z.object({ 1454 | uri: z.string(), 1455 | }), 1456 | }); 1457 | 1458 | server.setRequestHandler( 1459 | ListResourcesSchema, 1460 | async (_request: { method: "resources/list" }) => { 1461 | const resources = Array.from(imageResources.values()).map((resource) => ({ 1462 | uri: resource.uri, 1463 | name: resource.name, 1464 | description: resource.description, 1465 | mimeType: resource.mimeType, 1466 | })); 1467 | 1468 | return { 1469 | resources, 1470 | }; 1471 | } 1472 | ); 1473 | 1474 | server.setRequestHandler( 1475 | ReadResourceSchema, 1476 | async (request: { method: "resources/read"; params: { uri: string } }) => { 1477 | const resource = imageResources.get(request.params.uri); 1478 | 1479 | if (!resource) { 1480 | throw new Error(`Resource not found: ${request.params.uri}`); 1481 | } 1482 | 1483 | try { 1484 | const fileData = await fs.readFile(resource.filePath); 1485 | const base64Data = fileData.toString("base64"); 1486 | 1487 | return { 1488 | contents: [ 1489 | { 1490 | uri: resource.uri, 1491 | mimeType: resource.mimeType, 1492 | blob: base64Data, 1493 | }, 1494 | ], 1495 | }; 1496 | } catch (error) { 1497 | throw new Error(`Failed to read resource file: ${error}`); 1498 | } 1499 | } 1500 | ); 1501 | 1502 | // Start server 1503 | async function runServer() { 1504 | // サーバー起動時に既存のファイルをリソースとして登録 1505 | await scanAndRegisterExistingFiles(); 1506 | 1507 | const transport = new StdioServerTransport(); 1508 | await server.connect(transport); 1509 | serverConnected = true; 1510 | } 1511 | 1512 | if (process.env.MCP_FETCH_DISABLE_SERVER !== "1") { 1513 | runServer().catch((error) => { 1514 | process.stderr.write(`Fatal error running server: ${error}\n`); 1515 | process.exit(1); 1516 | }); 1517 | } 1518 | 1519 | export { fetchUrl }; 1520 | --------------------------------------------------------------------------------