├── .gitignore
├── types.d.ts
├── tsconfig.json
├── biome.json
├── .github
    └── workflows
    │   ├── ci.yml
    │   ├── release.yml
    │   └── publish.yml
├── RELEASE_NOTES_v1.5.2.md
├── LICENSE
├── CONTRIBUTING.md
├── RELEASE_NOTES_v1.5.1.md
├── package.json
├── tests
    └── image-fetch.test.ts
├── CLAUDE.md
├── README.md
└── index.ts


/.gitignore:
--------------------------------------------------------------------------------
1 | dist
2 | node_modules


--------------------------------------------------------------------------------
/types.d.ts:
--------------------------------------------------------------------------------
 1 | declare module "applescript" {
 2 |   export function execString(
 3 |     script: string,
 4 |     callback: (err: Error | null, result: unknown) => void
 5 |   ): void;
 6 | }
 7 | 
 8 | declare module "robots-parser" {
 9 |   interface RobotsParser {
10 |     isAllowed(url: string, userAgent: string): boolean;
11 |   }
12 |   export default function (robotsUrl: string, robotsTxt: string): RobotsParser;
13 | }
14 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2022",
 4 |     "strict": true,
 5 |     "esModuleInterop": true,
 6 |     "skipLibCheck": true,
 7 |     "forceConsistentCasingInFileNames": true,
 8 |     "resolveJsonModule": true,
 9 |     "outDir": "./dist",
10 |     "rootDir": ".",
11 |     "moduleResolution": "NodeNext",
12 |     "module": "NodeNext"
13 |   },
14 |   "exclude": ["node_modules"],
15 |   "include": ["./**/*.ts"]
16 | }
17 | 


--------------------------------------------------------------------------------
/biome.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "formatter": {
 3 |     "enabled": true,
 4 |     "indentStyle": "space",
 5 |     "indentWidth": 2,
 6 |     "lineWidth": 80
 7 |   },
 8 |   "linter": {
 9 |     "enabled": true,
10 |     "rules": {
11 |       "recommended": true
12 |     }
13 |   },
14 |   "files": {
15 |     "includes": ["./**/*.ts", "./**/*.json", "./**/*.md"],
16 |     "experimentalScannerIgnores": ["dist/**", "node_modules/**"]
17 |   },
18 |   "javascript": {
19 |     "formatter": {
20 |       "quoteStyle": "double",
21 |       "trailingCommas": "es5"
22 |     }
23 |   }
24 | }
25 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   push:
 6 |     branches: [ main ]
 7 | 
 8 | permissions:
 9 |   contents: read
10 | 
11 | jobs:
12 |   test:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - name: Checkout
16 |         uses: actions/checkout@v4
17 | 
18 |       - name: Use Node.js
19 |         uses: actions/setup-node@v4
20 |         with:
21 |           node-version: 22
22 |           cache: 'npm'
23 | 
24 |       - name: Install deps
25 |         run: npm ci --ignore-scripts
26 | 
27 |       - name: Run tests (unit + typecheck + biome)
28 |         run: npm test
29 | 
30 | 


--------------------------------------------------------------------------------
/RELEASE_NOTES_v1.5.2.md:
--------------------------------------------------------------------------------
 1 | # v1.5.2 — Fix: restore cross‑origin image fetching by default
 2 | 
 3 | Release date: 2025-09-24
 4 | 
 5 | ## Fixed
 6 | - Regression in v1.5.1 where images hosted on different origins (e.g. CDNs) were blocked by default.
 7 |   - `allowCrossOriginImages` default is restored to `true` for backwards compatibility.
 8 | 
 9 | ## Added
10 | - Unit tests (Vitest) to verify that:
11 |   - Base64 image data is returned when requested
12 |   - Images are saved to disk when enabled
13 |   - Cross-origin images are blocked when `allowCrossOriginImages: false`
14 | 
15 | ## Security
16 | - v1.5.1 hardening (SSRF guard, timeouts, redirect limits, size limits) remains in place.
17 | 
18 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 kazuph
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | ## 開発ルール（必読）
 4 | - main（マスター）へ「直接 push」しないこと。必ず Pull Request（PR）で変更を取り込みます。
 5 | - 変更はトピックブランチで行い、命名は `feat/*`, `fix/*`, `chore/*`, `release/*` などを推奨します。
 6 | - リリース作業は以下のフローに従います。
 7 |   1. バージョンを `package.json` とサーバーメタ（`index.ts` 内の `version`）で更新
 8 |   2. `RELEASE_NOTES_vX.Y.Z.md` を追加
 9 |   3. ブランチ名 `release/vX.Y.Z` を作成し、コミット・push
10 |   4. PR を作成（base: `main`, head: `release/vX.Y.Z`）
11 |   5. CI が通過後にレビューを経て `main` へマージ
12 |   6. `main` へマージされたときのみ、GitHub Actions が npm へ publish（既に公開済みのバージョンは自動スキップ）
13 | - 直接タグ push による公開は行いません（`publish.yml` は `push` to `main` でのみ発火）。
14 | - PR が `main` にマージされると、以下が自動実行されます：
15 |   - npm publish（未公開バージョンのみ）
16 |   - タグ作成と GitHub Releases の発行（未作成のときのみ）
17 | - コミットメッセージは Conventional Commits を推奨（例: `fix: correct image fetch default`）。
18 | - テスト方針：`npm test` は unit → typecheck → format → biome を通過する必要があります。
19 | - テストでローカル HTTP サーバを用いるため、以下の環境変数でサーバ起動や SSRF ガードを無効化します（本番では設定しないこと）。
20 |   - `MCP_FETCH_DISABLE_SERVER=1`
21 |   - `MCP_FETCH_DISABLE_SSRF_GUARD=1`
22 | 
23 | ## PR テンプレ
24 | - 目的 / 背景
25 | - 変更点（ユーザー影響 / 互換性）
26 | - セキュリティ観点（ネットワーク/ファイルI/O 等の変更があれば明記）
27 | - 動作確認（スクショ/ログ/テスト結果）
28 | - リリースノート（必要に応じて）
29 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Create Tag and GitHub Release on main
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | permissions:
 9 |   contents: write
10 | 
11 | jobs:
12 |   release:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - name: Checkout
16 |         uses: actions/checkout@v4
17 |         with:
18 |           fetch-depth: 0
19 | 
20 |       - name: Use Node.js
21 |         uses: actions/setup-node@v4
22 |         with:
23 |           node-version: 22
24 | 
25 |       - name: Create tag and GitHub Release if missing
26 |         env:
27 |           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
28 |         run: |
29 |           VERSION=$(node -p "require('./package.json').version")
30 |           TAG="v${VERSION}"
31 |           NOTES_FILE="RELEASE_NOTES_${TAG}.md"
32 |           echo "Version: $VERSION | Tag: $TAG"
33 |           if gh release view "$TAG" >/dev/null 2>&1; then
34 |             echo "Release $TAG already exists. Skipping."
35 |             exit 0
36 |           fi
37 |           if [ -f "$NOTES_FILE" ]; then
38 |             gh release create "$TAG" -F "$NOTES_FILE" -t "$TAG" --latest
39 |           else
40 |             gh release create "$TAG" --generate-notes -t "$TAG" --latest
41 |           fi
42 | 
43 | 


--------------------------------------------------------------------------------
/RELEASE_NOTES_v1.5.1.md:
--------------------------------------------------------------------------------
 1 | # v1.5.1 — Security hardening and safe fetch improvements
 2 | 
 3 | Release date: 2025-09-24
 4 | 
 5 | ## Highlights
 6 | - Strong SSRF and DoS protections:
 7 |   - Only `http://` and `https://` URLs are allowed (pages and images)
 8 |   - Block loopback, private, link-local and multicast IPs; block `localhost`/`.local` hostnames
 9 |   - DNS resolution is checked to prevent private IPs via DNS
10 |   - Manual redirect handling with validation (max 3 hops)
11 |   - Request timeout (default 12s)
12 |   - Response size limits: HTML up to 2MB, images up to 10MB
13 | - robots.txt fetch now uses the same safe pipeline and is size-limited
14 | - Same-origin image fetching by default; cross-origin can be explicitly enabled
15 | 
16 | ## Added
17 | - `allowCrossOriginImages` (boolean, default `false`) to fetch images from different origins when needed.
18 | 
19 | ## Changed
20 | - Default image policy is same-origin only for defense in depth.
21 | 
22 | ## Configuration (env vars)
23 | - `MCP_FETCH_TIMEOUT_MS` (default: `12000`)
24 | - `MCP_FETCH_MAX_REDIRECTS` (default: `3`)
25 | - `MCP_FETCH_MAX_HTML_BYTES` (default: `2000000`)
26 | - `MCP_FETCH_MAX_IMAGE_BYTES` (default: `10000000`)
27 | 
28 | ## Docs & QA
29 | - README updated with Security Hardening and env vars
30 | - Typecheck/build/audit: all passing; Biome lint/format integrated
31 | 
32 | ## Compatibility notes
33 | - No breaking API changes. If your pages rely on CDN or third-party image hosts, pass `allowCrossOriginImages: true` in tool arguments.
34 | 
35 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: npm publish on main
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | permissions:
 9 |   contents: read
10 |   id-token: write # required for npm provenance (OIDC)
11 | 
12 | concurrency:
13 |   group: publish-${{ github.ref }}
14 |   cancel-in-progress: false
15 | 
16 | jobs:
17 |   publish:
18 |     name: Publish to npm
19 |     runs-on: ubuntu-latest
20 |     steps:
21 |       - name: Checkout
22 |         uses: actions/checkout@v4
23 | 
24 |       - name: Use Node.js
25 |         uses: actions/setup-node@v4
26 |         with:
27 |           node-version: 22
28 |           registry-url: 'https://registry.npmjs.org'
29 |           cache: 'npm'
30 | 
31 |       - name: Install dependencies
32 |         run: npm ci --ignore-scripts
33 | 
34 |       - name: Build
35 |         run: npm run build
36 | 
37 |       - name: Run typecheck
38 |         run: npm run typecheck
39 | 
40 |       - name: Check if version already published
41 |         id: check
42 |         run: |
43 |           NAME=$(node -p "require('./package.json').name")
44 |           VERSION=$(node -p "require('./package.json').version")
45 |           echo "Package: $NAME@$VERSION"
46 |           if npm view "$NAME@$VERSION" version >/dev/null 2>&1; then
47 |             echo "already=true" >> "$GITHUB_OUTPUT"
48 |           else
49 |             echo "already=false" >> "$GITHUB_OUTPUT"
50 |           fi
51 | 
52 |       - name: Publish to npm (with provenance)
53 |         if: steps.check.outputs.already == 'false'
54 |         env:
55 |           NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
56 |         run: |
57 |           npm publish --provenance --access public
58 | 
59 |       - name: Skip note
60 |         if: steps.check.outputs.already == 'true'
61 |         run: echo "This version is already published. Skipping npm publish."
62 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@kazuph/mcp-fetch",
 3 |   "version": "1.6.2",
 4 |   "type": "module",
 5 |   "description": "A Model Context Protocol server that provides web content fetching capabilities with automatic image saving and optional AI display",
 6 |   "main": "dist/index.js",
 7 |   "scripts": {
 8 |     "build": "tsc",
 9 |     "start": "node dist/index.js",
10 |     "dev": "tsc && node dist/index.js",
11 |     "check": "biome check .",
12 |     "format": "biome format . --write",
13 |     "lint": "biome lint .",
14 |     "typecheck": "tsc --noEmit",
15 |     "unit": "npm run build && vitest run --reporter=dot",
16 |     "test": "npm run unit && npm run typecheck && npm run format && npm run check"
17 |   },
18 |   "dependencies": {
19 |     "@modelcontextprotocol/sdk": "^1.0.0",
20 |     "@mozilla/readability": "^0.6.0",
21 |     "@types/sharp": "^0.31.1",
22 |     "jsdom": "^24.0.0",
23 |     "node-fetch": "^3.3.2",
24 |     "robots-parser": "^3.0.1",
25 |     "sharp": "^0.33.5",
26 |     "turndown": "^7.1.2",
27 |     "zod": "^3.22.4",
28 |     "zod-to-json-schema": "^3.22.4"
29 |   },
30 |   "devDependencies": {
31 |     "@biomejs/biome": "^2.2.4",
32 |     "@types/jsdom": "^21.1.6",
33 |     "@types/node": "^20.10.5",
34 |     "@types/turndown": "^5.0.4",
35 |     "typescript": "^5.3.3",
36 |     "vitest": "^2.0.5"
37 |   },
38 |   "author": "kazuph",
39 |   "license": "MIT",
40 |   "publishConfig": {
41 |     "access": "public"
42 |   },
43 |   "files": [
44 |     "dist",
45 |     "dist/**/*.map",
46 |     "README.md"
47 |   ],
48 |   "repository": {
49 |     "type": "git",
50 |     "url": "git+https://github.com/kazuph/mcp-fetch.git"
51 |   },
52 |   "keywords": [
53 |     "mcp",
54 |     "fetch",
55 |     "web",
56 |     "content",
57 |     "image",
58 |     "processing",
59 |     "claude",
60 |     "ai"
61 |   ],
62 |   "bugs": {
63 |     "url": "https://github.com/kazuph/mcp-fetch/issues"
64 |   },
65 |   "homepage": "https://github.com/kazuph/mcp-fetch#readme",
66 |   "bin": {
67 |     "mcp-fetch": "dist/index.js"
68 |   }
69 | }
70 | 


--------------------------------------------------------------------------------
/tests/image-fetch.test.ts:
--------------------------------------------------------------------------------
  1 | import { promises as fs } from "node:fs";
  2 | import http from "node:http";
  3 | import sharp from "sharp";
  4 | import { afterAll, beforeAll, describe, expect, it } from "vitest";
  5 | 
  6 | // Disable server startup and SSRF guard for local test servers
  7 | process.env.MCP_FETCH_DISABLE_SERVER = "1";
  8 | process.env.MCP_FETCH_DISABLE_SSRF_GUARD = "1";
  9 | // Import after setting env so guards read the right values
 10 | // @ts-expect-error importing compiled file without types
 11 | const { fetchUrl } = await import("../dist/index.js");
 12 | 
 13 | function startServer(
 14 |   port: number,
 15 |   handler: http.RequestListener
 16 | ): Promise<http.Server> {
 17 |   return new Promise((resolve) => {
 18 |     const srv = http.createServer(handler);
 19 |     srv.listen(port, "127.0.0.1", () => resolve(srv));
 20 |   });
 21 | }
 22 | 
 23 | let IMG_BUF: Buffer;
 24 | 
 25 | describe("imageFetch pipeline", () => {
 26 |   const PORT_PAGE = 19081;
 27 |   const PORT_IMG = 19082;
 28 |   let pageSrv: http.Server;
 29 |   let imgSrv: http.Server;
 30 | 
 31 |   beforeAll(async () => {
 32 |     IMG_BUF = await sharp({
 33 |       create: {
 34 |         width: 1,
 35 |         height: 1,
 36 |         channels: 3,
 37 |         background: { r: 255, g: 0, b: 0 },
 38 |       },
 39 |     })
 40 |       .jpeg({ quality: 80 })
 41 |       .toBuffer();
 42 | 
 43 |     imgSrv = await startServer(PORT_IMG, (req, res) => {
 44 |       if (req.url === "/img.jpg") {
 45 |         res.writeHead(200, {
 46 |           "Content-Type": "image/jpeg",
 47 |           "Content-Length": IMG_BUF.length,
 48 |         });
 49 |         res.end(IMG_BUF);
 50 |       } else {
 51 |         res.writeHead(404).end();
 52 |       }
 53 |     });
 54 | 
 55 |     pageSrv = await startServer(PORT_PAGE, (_req, res) => {
 56 |       const html = `<!doctype html><html><head><title>T</title></head><body>
 57 |         <article><h1>Title</h1><p>Hello</p>
 58 |           <img src="http://127.0.0.1:${PORT_IMG}/img.jpg" alt="r">
 59 |         </article>
 60 |       </body></html>`;
 61 |       res.writeHead(200, { "Content-Type": "text/html; charset=utf-8" });
 62 |       res.end(html);
 63 |     });
 64 |   });
 65 | 
 66 |   afterAll(async () => {
 67 |     await new Promise((r) => pageSrv.close(() => r(null)));
 68 |     await new Promise((r) => imgSrv.close(() => r(null)));
 69 |   });
 70 | 
 71 |   it("returns base64 image and saves file by default (cross-origin allowed)", async () => {
 72 |     const result = await fetchUrl(
 73 |       `http://127.0.0.1:${PORT_PAGE}/`,
 74 |       "test-agent",
 75 |       false,
 76 |       {
 77 |         enableFetchImages: true,
 78 |         imageMaxCount: 1,
 79 |         startIndex: 0,
 80 |         maxLength: 1000,
 81 |         imageStartIndex: 0,
 82 |         imageMaxHeight: 4000,
 83 |         imageMaxWidth: 1000,
 84 |         imageQuality: 80,
 85 |         returnBase64: true,
 86 |         saveImages: true,
 87 |         allowCrossOriginImages: true, // default true but be explicit
 88 |       }
 89 |     );
 90 | 
 91 |     expect(result.images.length).toBe(1);
 92 |     expect(result.images[0].mimeType).toBe("image/jpeg");
 93 |     expect(result.images[0].data.length).toBeGreaterThan(10);
 94 |     expect(result.images[0].filePath).toBeTruthy();
 95 |     const pth = result.images[0].filePath || "";
 96 |     const stat = await fs.stat(pth);
 97 |     expect(stat.isFile()).toBe(true);
 98 |   });
 99 | 
100 |   it("blocks cross-origin when explicitly disabled", async () => {
101 |     const result = await fetchUrl(
102 |       `http://127.0.0.1:${PORT_PAGE}/`,
103 |       "test-agent",
104 |       false,
105 |       {
106 |         enableFetchImages: true,
107 |         imageMaxCount: 1,
108 |         startIndex: 0,
109 |         maxLength: 1000,
110 |         imageStartIndex: 0,
111 |         imageMaxHeight: 4000,
112 |         imageMaxWidth: 1000,
113 |         imageQuality: 80,
114 |         returnBase64: true,
115 |         saveImages: false,
116 |         allowCrossOriginImages: false,
117 |       }
118 |     );
119 |     expect(result.images.length).toBe(0);
120 |   });
121 | });
122 | 


--------------------------------------------------------------------------------
/CLAUDE.md:
--------------------------------------------------------------------------------
  1 | # CLAUDE.md
  2 | 
  3 | This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
  4 | 
  5 | ## Project Overview
  6 | 
  7 | **mcp-fetch** is a Model Context Protocol (MCP) server that provides web content fetching capabilities for AI assistants. It converts HTML pages to clean markdown using Mozilla Readability and optionally processes images using Sharp.
  8 | 
  9 | ## Development Commands
 10 | 
 11 | ```bash
 12 | # Build TypeScript to JavaScript
 13 | npm run build
 14 | 
 15 | # Type checking only (recommended before commits)
 16 | npm run typecheck
 17 | 
 18 | # Run all quality checks (biome linting + typecheck)
 19 | npm test
 20 | 
 21 | # Format code using Biome
 22 | npm run format
 23 | 
 24 | # Lint code using Biome
 25 | npm run lint
 26 | 
 27 | # Build and run the server
 28 | npm run dev
 29 | 
 30 | # Run the compiled server
 31 | npm start
 32 | ```
 33 | 
 34 | ## Architecture
 35 | 
 36 | ### Single-File Design
 37 | - **Core logic**: All functionality is in `index.ts` (606 lines)
 38 | - **Type definitions**: External module types in `types.d.ts`
 39 | - This is intentional - the tool has focused scope and benefits from centralized logic
 40 | 
 41 | ### Key Components
 42 | - **MCP Server**: Uses `@modelcontextprotocol/sdk` for protocol implementation
 43 | - **Content Pipeline**: HTML → Readability → Markdown → Pagination
 44 | - **Image Pipeline**: Fetch → JPEG conversion → Vertical merging → File saving → Optional Base64 encoding
 45 | - **Parameter Validation**: Zod schemas with automatic type conversion from string/number unions
 46 | 
 47 | ### Dependencies Architecture
 48 | - **Content Processing**: `@mozilla/readability` + `jsdom` + `turndown` chain
 49 | - **Image Processing**: `sharp` for high-performance image operations
 50 | - **HTTP**: `node-fetch` for web requests
 51 | - **Compliance**: `robots-parser` for robots.txt checking
 52 | 
 53 | ## Code Patterns
 54 | 
 55 | ### Parameter Handling
 56 | Parameters use union types (`string | number`) with Zod validation for automatic type conversion:
 57 | 
 58 | ```typescript
 59 | url: z.string(),
 60 | maxLength: z.union([z.string(), z.number()]).transform(Number).default(20000),
 61 | enableFetchImages: z.union([z.string(), z.boolean()]).transform(toBool).default(false)
 62 | ```
 63 | 
 64 | ### Error Handling
 65 | Network operations include comprehensive error handling with specific error types for different failure scenarios.
 66 | 
 67 | ### Image Optimization and File Saving
 68 | - Images are always converted to JPEG format with configurable quality (default 80)
 69 | - Multiple images are merged vertically when present
 70 | - **Default behavior**: Images are automatically saved to `~/Downloads/mcp-fetch/YYYY-MM-DD/` directory
 71 | - **Optional**: Base64 encoding for Claude Desktop display (enabled with `returnBase64: true`)
 72 | - **Filename format**: `hostname_HHMMSS_index.jpg`
 73 | 
 74 | ## Configuration
 75 | 
 76 | ### Biome (Linting/Formatting)
 77 | - 2-space indentation
 78 | - Double quotes
 79 | - 80-character line width
 80 | - ES5 trailing commas
 81 | - Uses modern Biome instead of ESLint + Prettier
 82 | 
 83 | ### TypeScript
 84 | - Target: ES2022
 85 | - Module: NodeNext (ESM)
 86 | - Strict mode enabled
 87 | - Output: `./dist`
 88 | 
 89 | ## Testing Strategy
 90 | 
 91 | Current approach relies on:
 92 | 1. TypeScript compilation as primary validation
 93 | 2. Biome for code quality
 94 | 3. Manual testing via Claude Desktop integration
 95 | 
 96 | **Note**: No unit tests are currently implemented. The `npm test` command runs typecheck + biome checks only.
 97 | 
 98 | ## Deployment
 99 | 
100 | The tool is designed for npx usage:
101 | ```bash
102 | npx -y @kazuph/mcp-fetch
103 | ```
104 | 
105 | For Claude Desktop integration, add to MCP tools configuration:
106 | ```json
107 | {
108 |   "tools": {
109 |     "imageFetch": {
110 |       "command": "npx",
111 |       "args": ["-y", "@kazuph/mcp-fetch"]
112 |     }
113 |   }
114 | }
115 | ```
116 | 
117 | ## Important Implementation Details
118 | 
119 | ### Platform Specificity
120 | - Designed for macOS (mentioned in README)
121 | - Sharp binaries include Darwin ARM64 support
122 | 
123 | ### Content Processing Limits
124 | - Default maxLength: 20,000 characters
125 | - Supports pagination via startIndex parameter
126 | - Image processing disabled by default (performance consideration)
127 | 
128 | ### Robots.txt Compliance
129 | - Enabled by default for ethical web scraping
130 | - Can be disabled with `ignoreRobotsTxt: true` parameter
131 | 
132 | ## Common Development Workflow
133 | 
134 | 1. Make code changes in `index.ts`
135 | 2. Run `npm run typecheck` to verify TypeScript
136 | 3. Run `npm run format` to ensure consistent formatting
137 | 4. Run `npm test` to run all validations
138 | 5. Test manually with `npm run dev` or via Claude Desktop integration


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # MCP Fetch
  2 | 
  3 | Model Context Protocol server for fetching web content and processing images. This allows Claude Desktop (or any MCP client) to fetch web content and handle images appropriately.
  4 | 
  5 | <a href="https://glama.ai/mcp/servers/5mknfdhyrg"><img width="380" height="200" src="https://glama.ai/mcp/servers/5mknfdhyrg/badge" alt="@kazuph/mcp-fetch MCP server" /></a>
  6 | 
  7 | ## Quick Start (For Users)
  8 | 
  9 | To use this tool with Claude Desktop, simply add the following to your Claude Desktop configuration (`~/Library/Application Support/Claude/claude_desktop_config.json`):
 10 | 
 11 | ```json
 12 | {
 13 |   "tools": {
 14 |     "imageFetch": {
 15 |       "command": "npx",
 16 |       "args": ["-y", "@kazuph/mcp-fetch"]
 17 |     }
 18 |   }
 19 | }
 20 | ```
 21 | 
 22 | This will automatically download and run the latest version of the tool when needed.
 23 | 
 24 | ### Required Setup
 25 | 
 26 | 1. Enable Accessibility for Claude:
 27 |    - Open System Settings
 28 |    - Go to Privacy & Security > Accessibility
 29 |    - Click the "+" button
 30 |    - Add Claude from your Applications folder
 31 |    - Turn ON the toggle for Claude
 32 | 
 33 | This accessibility setting is required for automated clipboard operations (Cmd+V) to work properly.
 34 | 
 35 | ## Features
 36 | 
 37 | - **Web Content Extraction**: Automatically extracts and formats web content as markdown
 38 | - **Article Title Extraction**: Extracts and displays the title of the article
 39 | - **Image Processing**: Optional processing of images from web pages with optimization (disabled by default, enable with `enableFetchImages: true`)
 40 | - **File Saving**: Images are automatically saved to `~/Downloads/mcp-fetch/YYYY-MM-DD/` directory when processed
 41 | - **Dual Output**: Both file saving and optional Base64 encoding for AI display
 42 | - **Pagination Support**: Supports pagination for both text and images
 43 | - **JPEG Optimization**: Automatically optimizes images as JPEG for better performance
 44 | - **GIF Support**: Extracts first frame from animated GIFs
 45 | 
 46 | ## For Developers
 47 | 
 48 | The following sections are for those who want to develop or modify the tool.
 49 | 
 50 | ## Prerequisites
 51 | 
 52 | - Node.js 18+
 53 | - macOS (for clipboard operations)
 54 | - Claude Desktop (install from https://claude.ai/desktop)
 55 | - tsx (install via `npm install -g tsx`)
 56 | 
 57 | ## Installation
 58 | 
 59 | ```bash
 60 | git clone https://github.com/kazuph/mcp-fetch.git
 61 | cd mcp-fetch
 62 | npm install
 63 | npm run build
 64 | ```
 65 | 
 66 | ## Image Processing Specifications
 67 | 
 68 | When processing images from web content, the following optimizations are applied:
 69 | 
 70 | - Images are converted to JPEG format with quality control
 71 | - Maximum width limited to 1200px by default
 72 | - Maximum height limited to 1600px by default
 73 | - Chroma subsampling (4:2:0) for better compression
 74 | - MozJPEG optimization for smaller file sizes
 75 | 
 76 | ## Configuration
 77 | 
 78 | 1. Make sure Claude Desktop is installed and running.
 79 | 
 80 | 2. Install tsx globally if you haven't:
 81 | ```bash
 82 | npm install -g tsx
 83 | # or
 84 | pnpm add -g tsx
 85 | ```
 86 | 
 87 | 3. Modify your Claude Desktop config located at:
 88 | `~/Library/Application Support/Claude/claude_desktop_config.json`
 89 | 
 90 | You can easily find this through the Claude Desktop menu:
 91 | 1. Open Claude Desktop
 92 | 2. Click Claude on the Mac menu bar
 93 | 3. Click "Settings"
 94 | 4. Click "Developer"
 95 | 
 96 | Add the following to your MCP client's configuration:
 97 | 
 98 | ```json
 99 | {
100 |   "tools": {
101 |     "imageFetch": {
102 |       "args": ["tsx", "/path/to/mcp-fetch/index.ts"]
103 |     }
104 |   }
105 | }
106 | ```
107 | 
108 | ## Available Parameters
109 | 
110 | - `url` (required): The URL to fetch
111 | - `maxLength` (default: 20000): Maximum length of content to return
112 | - `startIndex` (default: 0): Starting position in content
113 | - `imageStartIndex` (default: 0): Starting position for image collection
114 | - `raw` (default: false): Return raw content instead of processed markdown
115 | - `imageMaxCount` (default: 3): Maximum number of images to process per request
116 | - `imageMaxHeight` (default: 4000): Maximum height of merged image
117 | - `imageMaxWidth` (default: 1000): Maximum width of merged image
118 | - `imageQuality` (default: 80): JPEG quality (1-100)
119 | - `enableFetchImages` (default: false): Enable fetching and processing of images. FOR CLAUDE USERS ONLY: YOU ARE ALLOWED TO ENABLE THIS FEATURE!!! CLAUDE CAN FETCH AND PROCESS IMAGES!!! ENJOY THIS CAPABILITY!!!!!
120 | - `allowCrossOriginImages` (default: true): When true, also fetch images hosted on different origins from the page
121 | - `saveImages` (default: true): Save processed images to local files
122 | - `returnBase64` (default: false): Return base64 encoded images for AI display
123 | - `ignoreRobotsTxt` (default: false): Ignore robots.txt restrictions
124 | 
125 | ### Security Hardening (v1.5.1)
126 | 
127 | - Only `http://` and `https://` URLs are allowed for page and image fetches
128 | - Blocks private/loopback/link-local IPs and local hostnames (e.g., `localhost`, `.local`)
129 | - Manual redirect handling with validation (max 3 hops)
130 | - Request timeouts (default 12s, configurable via `MCP_FETCH_TIMEOUT_MS`)
131 | - Response size limits: HTML up to 2MB, images up to 10MB (tunable via env)
132 | 
133 | Environment variables:
134 | 
135 | - `MCP_FETCH_TIMEOUT_MS` (default: 12000)
136 | - `MCP_FETCH_MAX_REDIRECTS` (default: 3)
137 | - `MCP_FETCH_MAX_HTML_BYTES` (default: 2000000)
138 | - `MCP_FETCH_MAX_IMAGE_BYTES` (default: 10000000)
139 | 
140 | ## Examples
141 | 
142 | ### Basic Content Fetching (No Images)
143 | ```json
144 | {
145 |   "url": "https://example.com"
146 | }
147 | ```
148 | 
149 | ### Fetching with Images (File Saving Only)
150 | ```json
151 | {
152 |   "url": "https://example.com",
153 |   "enableFetchImages": true,
154 |   "imageMaxCount": 3
155 | }
156 | ```
157 | 
158 | ### Fetching with Images for AI Display
159 | ```json
160 | {
161 |   "url": "https://example.com",
162 |   "enableFetchImages": true,
163 |   "returnBase64": true,
164 |   "imageMaxCount": 3
165 | }
166 | ```
167 | 
168 | ### Paginating Through Images
169 | ```json
170 | {
171 |   "url": "https://example.com",
172 |   "enableFetchImages": true,
173 |   "imageStartIndex": 3,
174 |   "imageMaxCount": 3
175 | }
176 | ```
177 | 
178 | ## Notes
179 | 
180 | - This tool is designed for macOS only due to its dependency on macOS-specific clipboard operations.
181 | - Images are processed using Sharp for optimal performance and quality.
182 | - When multiple images are found, they are merged vertically with consideration for size limits.
183 | - Animated GIFs are automatically handled by extracting their first frame.
184 | - **File Saving**: Images are automatically saved to `~/Downloads/mcp-fetch/YYYY-MM-DD/` with filename format `hostname_HHMMSS_index.jpg`
185 | - **Tool Name**: The tool name has been changed from `fetch` to `imageFetch` to avoid conflicts with native fetch functions.
186 | 
187 | ## Changelog
188 | 
189 | ### v1.2.0
190 | - **BREAKING CHANGE**: Tool name changed from `fetch` to `imageFetch` to avoid conflicts
191 | - **NEW**: Automatic file saving - Images are now saved to `~/Downloads/mcp-fetch/YYYY-MM-DD/` by default
192 | - **NEW**: Added `saveImages` parameter (default: true) to control file saving
193 | - **NEW**: Added `returnBase64` parameter (default: false) for AI image display
194 | - **BEHAVIOR CHANGE**: Default behavior now saves files instead of only returning base64
195 | - Improved AI assistant integration with clear instructions for base64 option
196 | - Enhanced file organization with date-based directories and structured naming
197 | 
198 | ### v1.1.3
199 | - Changed default behavior: Images are not fetched by default (`enableFetchImages: false`)
200 | - Removed `disableImages` in favor of `enableFetchImages` parameter
201 | 
202 | ### v1.1.0
203 | - Added article title extraction feature
204 | - Improved response formatting to include article titles
205 | - Fixed type issues with MCP response content
206 | 
207 | ### v1.0.0
208 | - Initial release
209 | - Web content extraction
210 | - Image processing and optimization
211 | - Pagination support
212 | 


--------------------------------------------------------------------------------
/index.ts:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env node
   2 | 
   3 | import dns from "node:dns";
   4 | import { promises as fs } from "node:fs";
   5 | import net from "node:net";
   6 | import path from "node:path";
   7 | import type { Readable } from "node:stream";
   8 | import { URL } from "node:url";
   9 | import { Server } from "@modelcontextprotocol/sdk/server/index.js";
  10 | import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
  11 | import { Readability } from "@mozilla/readability";
  12 | import { JSDOM } from "jsdom";
  13 | import type { RequestInit } from "node-fetch";
  14 | import fetch, { type Response as FetchResponse } from "node-fetch";
  15 | import robotsParser from "robots-parser";
  16 | import sharp from "sharp";
  17 | import TurndownService from "turndown";
  18 | import { z } from "zod";
  19 | import { zodToJsonSchema } from "zod-to-json-schema";
  20 | 
  21 | interface Image {
  22 |   src: string;
  23 |   alt: string;
  24 |   data?: Buffer;
  25 |   filename?: string;
  26 | }
  27 | 
  28 | interface ExtractedContent {
  29 |   markdown: string;
  30 |   images: Image[];
  31 |   title?: string;
  32 | }
  33 | 
  34 | interface ImageResource {
  35 |   uri: string;
  36 |   name: string;
  37 |   description: string;
  38 |   mimeType: string;
  39 |   filePath: string;
  40 | }
  41 | 
  42 | // Global resource registry for images
  43 | const imageResources = new Map<string, ImageResource>();
  44 | 
  45 | // Server instance to send notifications
  46 | let serverInstance: Server;
  47 | let serverConnected = false;
  48 | 
  49 | // --------------------
  50 | // Security hardening
  51 | // --------------------
  52 | // Defaults (can be overridden by env vars)
  53 | const FETCH_TIMEOUT_MS = Number(process.env.MCP_FETCH_TIMEOUT_MS || 12000);
  54 | const MAX_REDIRECTS = Number(process.env.MCP_FETCH_MAX_REDIRECTS || 3);
  55 | const MAX_HTML_BYTES = Number(
  56 |   process.env.MCP_FETCH_MAX_HTML_BYTES || 2_000_000
  57 | ); // 2MB
  58 | const MAX_IMAGE_BYTES = Number(
  59 |   process.env.MCP_FETCH_MAX_IMAGE_BYTES || 10_000_000
  60 | ); // 10MB
  61 | const DISABLE_SSRF_GUARD = process.env.MCP_FETCH_DISABLE_SSRF_GUARD === "1";
  62 | 
  63 | function isPrivateIPv4(ip: string): boolean {
  64 |   const parts = ip.split(".").map((v) => Number(v));
  65 |   if (
  66 |     parts.length !== 4 ||
  67 |     parts.some((n) => Number.isNaN(n) || n < 0 || n > 255)
  68 |   )
  69 |     return false;
  70 |   const [a, b] = parts;
  71 |   if (a === 10) return true; // 10.0.0.0/8
  72 |   if (a === 172 && b >= 16 && b <= 31) return true; // 172.16.0.0/12
  73 |   if (a === 192 && b === 168) return true; // 192.168.0.0/16
  74 |   if (a === 127) return true; // loopback
  75 |   if (a === 169 && b === 254) return true; // link-local
  76 |   if (a === 0) return true; // non-routable
  77 |   if (a >= 224 && a <= 239) return true; // multicast
  78 |   if (a >= 240) return true; // reserved
  79 |   return false;
  80 | }
  81 | 
  82 | function isPrivateIPv6(ip: string): boolean {
  83 |   const lower = ip.toLowerCase();
  84 |   return (
  85 |     lower === "::" ||
  86 |     lower === "::1" ||
  87 |     lower.startsWith("fe80:") || // link-local
  88 |     lower.startsWith("fc") || // fc00::/7 (fc/fd)
  89 |     lower.startsWith("fd") ||
  90 |     lower.startsWith("ff") // multicast
  91 |   );
  92 | }
  93 | 
  94 | function isNodeErrorWithCode(error: unknown): error is NodeJS.ErrnoException {
  95 |   return (
  96 |     error instanceof Error &&
  97 |     typeof (error as NodeJS.ErrnoException).code === "string"
  98 |   );
  99 | }
 100 | 
 101 | async function resolveAllIps(hostname: string): Promise<string[]> {
 102 |   try {
 103 |     const records = await dns.promises.lookup(hostname, {
 104 |       all: true,
 105 |       verbatim: true,
 106 |     });
 107 |     return records.map((r) => r.address);
 108 |   } catch {
 109 |     return [];
 110 |   }
 111 | }
 112 | 
 113 | async function isSafeUrl(
 114 |   input: string
 115 | ): Promise<{ ok: true; url: URL } | { ok: false; reason: string }> {
 116 |   let u: URL;
 117 |   try {
 118 |     u = new URL(input);
 119 |   } catch {
 120 |     return { ok: false, reason: "Invalid URL" };
 121 |   }
 122 |   if (!(u.protocol === "http:" || u.protocol === "https:")) {
 123 |     return { ok: false, reason: "Only http/https schemes are allowed" };
 124 |   }
 125 |   if (DISABLE_SSRF_GUARD) {
 126 |     return { ok: true, url: u };
 127 |   }
 128 |   const hostname = u.hostname;
 129 |   if (!hostname) return { ok: false, reason: "Missing hostname" };
 130 |   const isIp = net.isIP(hostname) !== 0;
 131 |   if (isIp) {
 132 |     if (net.isIP(hostname) === 4 && isPrivateIPv4(hostname)) {
 133 |       return { ok: false, reason: "IPv4 address is private/reserved" };
 134 |     }
 135 |     if (net.isIP(hostname) === 6 && isPrivateIPv6(hostname)) {
 136 |       return { ok: false, reason: "IPv6 address is private/reserved" };
 137 |     }
 138 |   } else {
 139 |     const lower = hostname.toLowerCase();
 140 |     if (
 141 |       lower === "localhost" ||
 142 |       lower.endsWith(".localhost") ||
 143 |       lower.endsWith(".local")
 144 |     ) {
 145 |       return { ok: false, reason: "Local hostnames are not allowed" };
 146 |     }
 147 |     const ips = await resolveAllIps(hostname);
 148 |     for (const ip of ips) {
 149 |       if (
 150 |         (net.isIP(ip) === 4 && isPrivateIPv4(ip)) ||
 151 |         (net.isIP(ip) === 6 && isPrivateIPv6(ip))
 152 |       ) {
 153 |         return {
 154 |           ok: false,
 155 |           reason: "Hostname resolves to private/reserved address",
 156 |         };
 157 |       }
 158 |     }
 159 |   }
 160 |   return { ok: true, url: u };
 161 | }
 162 | 
 163 | function withTimeout<T>(
 164 |   p: Promise<T>,
 165 |   ms: number,
 166 |   label = "request"
 167 | ): Promise<T> {
 168 |   if (!ms || ms <= 0) return p;
 169 |   return new Promise<T>((resolve, reject) => {
 170 |     const t = setTimeout(
 171 |       () => reject(new Error(`${label} timed out after ${ms}ms`)),
 172 |       ms
 173 |     );
 174 |     p.then(
 175 |       (v) => {
 176 |         clearTimeout(t);
 177 |         resolve(v);
 178 |       },
 179 |       (e) => {
 180 |         clearTimeout(t);
 181 |         reject(e);
 182 |       }
 183 |     );
 184 |   });
 185 | }
 186 | 
 187 | async function safeFollowFetch(
 188 |   inputUrl: string,
 189 |   init: RequestInit = {},
 190 |   opts: { maxRedirects?: number; timeoutMs?: number } = {}
 191 | ): Promise<{ response: FetchResponse; finalUrl: string }> {
 192 |   const maxRedirects = opts.maxRedirects ?? MAX_REDIRECTS;
 193 |   const timeoutMs = opts.timeoutMs ?? FETCH_TIMEOUT_MS;
 194 | 
 195 |   let current = inputUrl;
 196 |   for (let i = 0; i <= maxRedirects; i++) {
 197 |     const safe = await isSafeUrl(current);
 198 |     if (!safe.ok) throw new Error(`Blocked URL: ${safe.reason}`);
 199 |     const controller = new AbortController();
 200 |     const timer = setTimeout(() => controller.abort(), timeoutMs);
 201 |     try {
 202 |       const reqInit: RequestInit = {
 203 |         ...(init || {}),
 204 |         redirect: "manual",
 205 |         signal: controller.signal,
 206 |       };
 207 |       const resp: FetchResponse = await fetch(current, reqInit);
 208 |       clearTimeout(timer);
 209 |       if ([301, 302, 303, 307, 308].includes(resp.status)) {
 210 |         const loc = resp.headers.get("location");
 211 |         if (!loc)
 212 |           throw new Error(
 213 |             `Redirect status ${resp.status} without Location header`
 214 |           );
 215 |         const next = new URL(loc, current).toString();
 216 |         current = next;
 217 |         continue;
 218 |       }
 219 |       return { response: resp, finalUrl: current };
 220 |     } catch (e) {
 221 |       clearTimeout(timer);
 222 |       throw e;
 223 |     }
 224 |   }
 225 |   throw new Error("Too many redirects");
 226 | }
 227 | 
 228 | async function readTextLimited(
 229 |   resp: FetchResponse,
 230 |   maxBytes: number
 231 | ): Promise<{ text: string; contentType: string }> {
 232 |   const ct = resp.headers.get("content-type") || "";
 233 |   const cl = resp.headers.get("content-length");
 234 |   if (cl && Number(cl) > maxBytes) {
 235 |     throw new Error(`Response too large (${cl} bytes > ${maxBytes})`);
 236 |   }
 237 |   const body = resp.body as Readable | null;
 238 |   if (!body || typeof body.on !== "function") {
 239 |     const text = await withTimeout(resp.text(), FETCH_TIMEOUT_MS, "read text");
 240 |     return { text, contentType: ct };
 241 |   }
 242 |   let size = 0;
 243 |   const chunks: Buffer[] = [];
 244 |   await new Promise<void>((resolve, reject) => {
 245 |     body.on("data", (chunk: Buffer) => {
 246 |       size += chunk.length;
 247 |       if (size > maxBytes) {
 248 |         body.destroy();
 249 |         reject(new Error(`Response exceeded limit (${maxBytes} bytes)`));
 250 |         return;
 251 |       }
 252 |       chunks.push(chunk);
 253 |     });
 254 |     body.on("end", () => resolve());
 255 |     body.on("error", (err: Error) => reject(err));
 256 |   });
 257 |   return { text: Buffer.concat(chunks).toString("utf8"), contentType: ct };
 258 | }
 259 | 
 260 | async function readBufferLimited(
 261 |   resp: FetchResponse,
 262 |   maxBytes: number
 263 | ): Promise<Buffer> {
 264 |   const cl = resp.headers.get("content-length");
 265 |   if (cl && Number(cl) > maxBytes) {
 266 |     throw new Error(`Response too large (${cl} bytes > ${maxBytes})`);
 267 |   }
 268 |   const body = resp.body as Readable | null;
 269 |   if (!body || typeof body.on !== "function") {
 270 |     const ab = await withTimeout(
 271 |       resp.arrayBuffer(),
 272 |       FETCH_TIMEOUT_MS,
 273 |       "read buffer"
 274 |     );
 275 |     const buf = Buffer.from(ab);
 276 |     if (buf.length > maxBytes)
 277 |       throw new Error(`Response exceeded limit (${maxBytes} bytes)`);
 278 |     return buf;
 279 |   }
 280 |   let size = 0;
 281 |   const chunks: Buffer[] = [];
 282 |   await new Promise<void>((resolve, reject) => {
 283 |     body.on("data", (chunk: Buffer) => {
 284 |       size += chunk.length;
 285 |       if (size > maxBytes) {
 286 |         body.destroy();
 287 |         reject(new Error(`Response exceeded limit (${maxBytes} bytes)`));
 288 |         return;
 289 |       }
 290 |       chunks.push(chunk);
 291 |     });
 292 |     body.on("end", () => resolve());
 293 |     body.on("error", (err: Error) => reject(err));
 294 |   });
 295 |   return Buffer.concat(chunks);
 296 | }
 297 | 
 298 | /**
 299 |  * リソースリストが変更されたことをクライアントに通知
 300 |  */
 301 | async function notifyResourcesChanged(): Promise<void> {
 302 |   if (!serverInstance || !serverConnected) return;
 303 |   try {
 304 |     await serverInstance.sendResourceListChanged();
 305 |   } catch (error) {
 306 |     // When not connected to an MCP client, avoid noisy warnings in CI/tests
 307 |     if (serverConnected) {
 308 |       console.warn("Failed to notify resource list changed:", error);
 309 |     }
 310 |   }
 311 | }
 312 | 
 313 | /**
 314 |  * 既存のダウンロードファイルをスキャンしてリソースとして登録
 315 |  */
 316 | async function scanAndRegisterExistingFiles(): Promise<void> {
 317 |   const homeDir = process.env.HOME || process.env.USERPROFILE || "";
 318 |   const baseDir = path.join(homeDir, "Downloads", "mcp-fetch");
 319 | 
 320 |   try {
 321 |     // 日付ディレクトリをスキャン
 322 |     const dateDirs = await fs.readdir(baseDir);
 323 | 
 324 |     for (const dateDir of dateDirs) {
 325 |       if (dateDir.startsWith(".")) continue; // .DS_Store などをスキップ
 326 | 
 327 |       const datePath = path.join(baseDir, dateDir);
 328 |       const stats = await fs.stat(datePath);
 329 | 
 330 |       if (!stats.isDirectory()) continue;
 331 | 
 332 |       try {
 333 |         // 日付ディレクトリ直下のファイルをチェック
 334 |         const files = await fs.readdir(datePath);
 335 | 
 336 |         for (const file of files) {
 337 |           if (!file.toLowerCase().endsWith(".jpg")) continue;
 338 | 
 339 |           const filePath = path.join(datePath, file);
 340 |           const fileStats = await fs.stat(filePath);
 341 | 
 342 |           if (!fileStats.isFile()) continue;
 343 | 
 344 |           // リソースURIを生成 (file:// scheme)
 345 |           const resourceUri = `file://${filePath}`;
 346 | 
 347 |           // ファイル名から情報を抽出
 348 |           const baseName = path.basename(file, ".jpg");
 349 |           const isIndividual = file.includes("individual");
 350 | 
 351 |           const resourceName = `${dateDir}/${baseName}`;
 352 |           const description = `${isIndividual ? "Individual" : "Merged"} image from ${dateDir}`;
 353 | 
 354 |           const resource: ImageResource = {
 355 |             uri: resourceUri,
 356 |             name: resourceName,
 357 |             description,
 358 |             mimeType: "image/jpeg",
 359 |             filePath,
 360 |           };
 361 | 
 362 |           imageResources.set(resourceUri, resource);
 363 |         }
 364 | 
 365 |         // サブディレクトリもチェック (individual/merged が存在する場合)
 366 |         const subDirs = ["individual", "merged"];
 367 | 
 368 |         for (const subDir of subDirs) {
 369 |           const subDirPath = path.join(datePath, subDir);
 370 | 
 371 |           try {
 372 |             const subFiles = await fs.readdir(subDirPath);
 373 | 
 374 |             for (const file of subFiles) {
 375 |               if (!file.toLowerCase().endsWith(".jpg")) continue;
 376 | 
 377 |               const filePath = path.join(subDirPath, file);
 378 |               const fileStats = await fs.stat(filePath);
 379 | 
 380 |               if (!fileStats.isFile()) continue;
 381 | 
 382 |               // リソースURIを生成 (file:// scheme)
 383 |               const resourceUri = `file://${filePath}`;
 384 | 
 385 |               // ファイル名から情報を抽出
 386 |               const baseName = path.basename(file, ".jpg");
 387 |               const resourceName = `${dateDir}/${subDir}/${baseName}`;
 388 |               const description = `${subDir === "individual" ? "Individual" : "Merged"} image from ${dateDir}`;
 389 | 
 390 |               const resource: ImageResource = {
 391 |                 uri: resourceUri,
 392 |                 name: resourceName,
 393 |                 description,
 394 |                 mimeType: "image/jpeg",
 395 |                 filePath,
 396 |               };
 397 | 
 398 |               imageResources.set(resourceUri, resource);
 399 |             }
 400 |           } catch (_error) {
 401 |             // サブディレクトリが存在しない場合はスキップ
 402 |           }
 403 |         }
 404 |       } catch (error) {
 405 |         console.warn(`Failed to scan directory ${datePath}:`, error);
 406 |       }
 407 |     }
 408 | 
 409 |     console.error(`Registered ${imageResources.size} existing image resources`);
 410 |   } catch (error) {
 411 |     if (isNodeErrorWithCode(error) && error.code === "ENOENT") {
 412 |       // No downloads directory yet; nothing to register on startup
 413 |       return;
 414 |     }
 415 |     console.warn("Failed to scan existing downloads:", error);
 416 |   }
 417 | }
 418 | 
 419 | const DEFAULT_USER_AGENT_AUTONOMOUS =
 420 |   "ModelContextProtocol/1.0 (Autonomous; +https://github.com/modelcontextprotocol/servers)";
 421 | // const DEFAULT_USER_AGENT_MANUAL =
 422 | //   "ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotocol/servers)";
 423 | 
 424 | /**
 425 |  * URLから元のファイル名を抽出
 426 |  */
 427 | function extractFilenameFromUrl(url: string): string {
 428 |   try {
 429 |     const urlObj = new URL(url);
 430 |     const pathname = urlObj.pathname;
 431 |     const filename = path.basename(pathname);
 432 | 
 433 |     // ファイル名が空の場合や拡張子がない場合のデフォルト処理
 434 |     if (!filename || !filename.includes(".")) {
 435 |       return "image.jpg";
 436 |     }
 437 | 
 438 |     return filename;
 439 |   } catch {
 440 |     return "image.jpg";
 441 |   }
 442 | }
 443 | 
 444 | // New structured API (optional)
 445 | const NewImagesSchema = z
 446 |   .union([
 447 |     z.boolean(),
 448 |     z.object({
 449 |       output: z.enum(["base64", "file", "both"]).optional(),
 450 |       layout: z.enum(["merged", "individual", "both"]).optional(),
 451 |       maxCount: z.number().int().min(0).max(10).optional(),
 452 |       startIndex: z.number().int().min(0).optional(),
 453 |       size: z
 454 |         .object({
 455 |           maxWidth: z.number().int().min(100).max(10000).optional(),
 456 |           maxHeight: z.number().int().min(100).max(10000).optional(),
 457 |           quality: z.number().int().min(1).max(100).optional(),
 458 |         })
 459 |         .optional(),
 460 |       originPolicy: z.enum(["cross-origin", "same-origin"]).optional(),
 461 |       saveDir: z.string().optional(),
 462 |     }),
 463 |   ])
 464 |   .optional();
 465 | 
 466 | const NewTextSchema = z
 467 |   .object({
 468 |     maxLength: z.number().int().positive().max(1000000).optional(),
 469 |     startIndex: z.number().int().min(0).optional(),
 470 |     raw: z.boolean().optional(),
 471 |   })
 472 |   .optional();
 473 | 
 474 | const NewSecuritySchema = z
 475 |   .object({
 476 |     ignoreRobotsTxt: z.boolean().optional(),
 477 |   })
 478 |   .optional();
 479 | 
 480 | const FetchArgsSchema = z.object({
 481 |   url: z
 482 |     .string()
 483 |     .url()
 484 |     .refine(
 485 |       (val) => {
 486 |         try {
 487 |           const u = new URL(val);
 488 |           return u.protocol === "http:" || u.protocol === "https:";
 489 |         } catch {
 490 |           return false;
 491 |         }
 492 |       },
 493 |       { message: "Only http/https URLs are allowed" }
 494 |     ),
 495 |   // legacy flat params (kept for backward compatibility)
 496 |   maxLength: z
 497 |     .union([z.number(), z.string()])
 498 |     .transform((val) => Number(val))
 499 |     .pipe(z.number().positive().max(1000000))
 500 |     .default(20000),
 501 |   startIndex: z
 502 |     .union([z.number(), z.string()])
 503 |     .transform((val) => Number(val))
 504 |     .pipe(z.number().min(0))
 505 |     .default(0),
 506 |   imageStartIndex: z
 507 |     .union([z.number(), z.string()])
 508 |     .transform((val) => Number(val))
 509 |     .pipe(z.number().min(0))
 510 |     .default(0),
 511 |   raw: z
 512 |     .union([z.boolean(), z.string()])
 513 |     .transform((val) =>
 514 |       typeof val === "string" ? val.toLowerCase() === "true" : val
 515 |     )
 516 |     .default(false),
 517 |   imageMaxCount: z
 518 |     .union([z.number(), z.string()])
 519 |     .transform((val) => Number(val))
 520 |     .pipe(z.number().min(0).max(10))
 521 |     .default(3),
 522 |   imageMaxHeight: z
 523 |     .union([z.number(), z.string()])
 524 |     .transform((val) => Number(val))
 525 |     .pipe(z.number().min(100).max(10000))
 526 |     .default(4000),
 527 |   imageMaxWidth: z
 528 |     .union([z.number(), z.string()])
 529 |     .transform((val) => Number(val))
 530 |     .pipe(z.number().min(100).max(10000))
 531 |     .default(1000),
 532 |   imageQuality: z
 533 |     .union([z.number(), z.string()])
 534 |     .transform((val) => Number(val))
 535 |     .pipe(z.number().min(1).max(100))
 536 |     .default(80),
 537 |   enableFetchImages: z
 538 |     .union([z.boolean(), z.string()])
 539 |     .transform((val) =>
 540 |       typeof val === "string" ? val.toLowerCase() === "true" : val
 541 |     )
 542 |     .default(false),
 543 |   allowCrossOriginImages: z
 544 |     .union([z.boolean(), z.string()])
 545 |     .transform((val) =>
 546 |       typeof val === "string" ? val.toLowerCase() === "true" : val
 547 |     )
 548 |     .default(true),
 549 |   ignoreRobotsTxt: z
 550 |     .union([z.boolean(), z.string()])
 551 |     .transform((val) =>
 552 |       typeof val === "string" ? val.toLowerCase() === "true" : val
 553 |     )
 554 |     .default(false),
 555 |   saveImages: z
 556 |     .union([z.boolean(), z.string()])
 557 |     .transform((val) =>
 558 |       typeof val === "string" ? val.toLowerCase() === "true" : val
 559 |     )
 560 |     .default(true),
 561 |   returnBase64: z
 562 |     .union([z.boolean(), z.string()])
 563 |     .transform((val) =>
 564 |       typeof val === "string" ? val.toLowerCase() === "true" : val
 565 |     )
 566 |     .default(false),
 567 |   // new structured params (optional)
 568 |   images: NewImagesSchema,
 569 |   text: NewTextSchema,
 570 |   security: NewSecuritySchema,
 571 | });
 572 | 
 573 | const ListToolsSchema = z.object({
 574 |   method: z.literal("tools/list"),
 575 | });
 576 | 
 577 | const CallToolSchema = z.object({
 578 |   method: z.literal("tools/call"),
 579 |   params: z.object({
 580 |     name: z.string(),
 581 |     arguments: z.record(z.unknown()).optional(),
 582 |   }),
 583 | });
 584 | 
 585 | function extractContentFromHtml(
 586 |   html: string,
 587 |   url: string
 588 | ): ExtractedContent | string {
 589 |   const dom = new JSDOM(html, { url });
 590 |   const reader = new Readability(dom.window.document);
 591 |   const article = reader.parse();
 592 | 
 593 |   if (!article || !article.content) {
 594 |     return "<e>Page failed to be simplified from HTML</e>";
 595 |   }
 596 | 
 597 |   // Extract images from the article content only
 598 |   const articleDom = new JSDOM(article.content);
 599 |   const imgElements = Array.from(
 600 |     articleDom.window.document.querySelectorAll("img")
 601 |   );
 602 | 
 603 |   const images: Image[] = imgElements.map((img) => {
 604 |     const src = img.src;
 605 |     const alt = img.alt || "";
 606 |     const filename = extractFilenameFromUrl(src);
 607 |     return { src, alt, filename };
 608 |   });
 609 | 
 610 |   const turndownService = new TurndownService({
 611 |     headingStyle: "atx",
 612 |     codeBlockStyle: "fenced",
 613 |   });
 614 |   const markdown = turndownService.turndown(article.content);
 615 | 
 616 |   return { markdown, images, title: article.title ?? undefined };
 617 | }
 618 | 
 619 | async function fetchImages(
 620 |   images: Image[],
 621 |   baseOrigin: string,
 622 |   allowCrossOrigin: boolean
 623 | ): Promise<(Image & { data: Buffer })[]> {
 624 |   const fetchedImages = [];
 625 |   for (const img of images) {
 626 |     try {
 627 |       const safe = await isSafeUrl(img.src);
 628 |       if (!safe.ok) continue;
 629 |       const srcOrigin = new URL(img.src).origin;
 630 |       if (!allowCrossOrigin && srcOrigin !== baseOrigin) continue;
 631 |       const { response } = await safeFollowFetch(
 632 |         img.src,
 633 |         {},
 634 |         { timeoutMs: FETCH_TIMEOUT_MS }
 635 |       );
 636 |       const imageBuffer = await readBufferLimited(response, MAX_IMAGE_BYTES);
 637 | 
 638 |       // GIF画像の場合は最初のフレームのみ抽出
 639 |       if (img.src.toLowerCase().endsWith(".gif")) {
 640 |         // GIF処理のロジック
 641 |       }
 642 | 
 643 |       fetchedImages.push({
 644 |         ...img,
 645 |         data: imageBuffer,
 646 |       });
 647 |     } catch (error) {
 648 |       console.warn(`Failed to process image ${img.src}:`, error);
 649 |     }
 650 |   }
 651 |   return fetchedImages;
 652 | }
 653 | 
 654 | /**
 655 |  * 複数の画像を垂直方向に結合して1つの画像として返す
 656 |  */
 657 | async function mergeImagesVertically(
 658 |   images: Buffer[],
 659 |   maxWidth: number,
 660 |   maxHeight: number,
 661 |   quality: number
 662 | ): Promise<Buffer> {
 663 |   if (images.length === 0) {
 664 |     throw new Error("No images to merge");
 665 |   }
 666 | 
 667 |   // 各画像のメタデータを取得
 668 |   const imageMetas = await Promise.all(
 669 |     images.map(async (buffer) => {
 670 |       const metadata = await sharp(buffer).metadata();
 671 |       return {
 672 |         width: metadata.width || 0,
 673 |         height: metadata.height || 0,
 674 |         buffer,
 675 |       };
 676 |     })
 677 |   );
 678 | 
 679 |   // 最大幅を計算
 680 |   const width = Math.min(
 681 |     maxWidth,
 682 |     Math.max(...imageMetas.map((meta) => meta.width))
 683 |   );
 684 | 
 685 |   // 画像の高さを合計
 686 |   const totalHeight = Math.min(
 687 |     maxHeight,
 688 |     imageMetas.reduce((sum, meta) => sum + meta.height, 0)
 689 |   );
 690 | 
 691 |   // 新しい画像を作成
 692 |   const composite = sharp({
 693 |     create: {
 694 |       width,
 695 |       height: totalHeight,
 696 |       channels: 4,
 697 |       background: { r: 255, g: 255, b: 255, alpha: 1 },
 698 |     },
 699 |   });
 700 | 
 701 |   // 各画像を配置
 702 |   let currentY = 0;
 703 |   const overlays = [];
 704 | 
 705 |   for (const meta of imageMetas) {
 706 |     // 画像がキャンバスの高さを超えないようにする
 707 |     if (currentY >= maxHeight) break;
 708 | 
 709 |     // 画像のリサイズ（必要な場合のみ）
 710 |     let processedImage = sharp(meta.buffer);
 711 |     if (meta.width > width) {
 712 |       processedImage = processedImage.resize(width);
 713 |     }
 714 | 
 715 |     const resizedBuffer = await processedImage.toBuffer();
 716 |     const resizedMeta = await sharp(resizedBuffer).metadata();
 717 | 
 718 |     overlays.push({
 719 |       input: resizedBuffer,
 720 |       top: currentY,
 721 |       left: 0,
 722 |     });
 723 | 
 724 |     currentY += resizedMeta.height || 0;
 725 |   }
 726 | 
 727 |   // 品質を指定して出力（PNGの代わりにJPEGを使用）
 728 |   return composite
 729 |     .composite(overlays)
 730 |     .jpeg({
 731 |       quality, // JPEG品質を指定（1-100）
 732 |       mozjpeg: true, // mozjpegを使用して更に最適化
 733 |     })
 734 |     .toBuffer();
 735 | }
 736 | 
 737 | // removed unused getImageDimensions helper to satisfy linter
 738 | 
 739 | /**
 740 |  * 画像を日付ベースのディレクトリに保存し、ファイルパスを返す
 741 |  */
 742 | async function saveImageToFile(
 743 |   imageBuffer: Buffer,
 744 |   sourceUrl: string,
 745 |   imageIndex: number = 0
 746 | ): Promise<string> {
 747 |   // 現在の日付をYYYY-MM-DD形式で取得
 748 |   const now = new Date();
 749 |   const dateStr = now.toISOString().split("T")[0];
 750 | 
 751 |   // 保存先ディレクトリ: ~/Downloads/mcp-fetch/YYYY-MM-DD/merged/
 752 |   const homeDir = process.env.HOME || process.env.USERPROFILE || "";
 753 |   const baseDir = path.join(
 754 |     homeDir,
 755 |     "Downloads",
 756 |     "mcp-fetch",
 757 |     dateStr,
 758 |     "merged"
 759 |   );
 760 | 
 761 |   // ディレクトリが存在しない場合は作成
 762 |   await fs.mkdir(baseDir, { recursive: true });
 763 | 
 764 |   // ファイル名を生成（URLのホスト名 + タイムスタンプ + インデックス）
 765 |   const urlObj = new URL(sourceUrl);
 766 |   const hostname = urlObj.hostname.replace(/[^a-zA-Z0-9]/g, "_");
 767 |   const timestamp = now
 768 |     .toISOString()
 769 |     .replace(/[:.]/g, "-")
 770 |     .split("T")[1]
 771 |     .split(".")[0];
 772 |   const filename = `${hostname}_${timestamp}_${imageIndex}.jpg`;
 773 | 
 774 |   const filePath = path.join(baseDir, filename);
 775 | 
 776 |   // ファイルに保存
 777 |   await fs.writeFile(filePath, imageBuffer);
 778 | 
 779 |   // リソースとして登録
 780 |   const resourceUri = `file://${filePath}`;
 781 |   const resourceName = `${dateStr}/merged/${filename}`;
 782 |   const description = `Merged image from ${sourceUrl} saved on ${dateStr}`;
 783 | 
 784 |   const resource: ImageResource = {
 785 |     uri: resourceUri,
 786 |     name: resourceName,
 787 |     description,
 788 |     mimeType: "image/jpeg",
 789 |     filePath,
 790 |   };
 791 | 
 792 |   imageResources.set(resourceUri, resource);
 793 | 
 794 |   // クライアントにリソース変更を通知
 795 |   await notifyResourcesChanged();
 796 | 
 797 |   return filePath;
 798 | }
 799 | 
 800 | /**
 801 |  * 個別画像を保存してリソースとして登録
 802 |  */
 803 | async function saveIndividualImageAndRegisterResource(
 804 |   imageBuffer: Buffer,
 805 |   sourceUrl: string,
 806 |   imageIndex: number,
 807 |   altText: string = "",
 808 |   originalFilename: string = "image.jpg"
 809 | ): Promise<string> {
 810 |   // 現在の日付をYYYY-MM-DD形式で取得
 811 |   const now = new Date();
 812 |   const dateStr = now.toISOString().split("T")[0];
 813 | 
 814 |   // 保存先ディレクトリ: ~/Downloads/mcp-fetch/YYYY-MM-DD/individual/
 815 |   const homeDir = process.env.HOME || process.env.USERPROFILE || "";
 816 |   const baseDir = path.join(
 817 |     homeDir,
 818 |     "Downloads",
 819 |     "mcp-fetch",
 820 |     dateStr,
 821 |     "individual"
 822 |   );
 823 | 
 824 |   // ディレクトリが存在しない場合は作成
 825 |   await fs.mkdir(baseDir, { recursive: true });
 826 | 
 827 |   // 元のファイル名を使用してユニークファイル名を生成
 828 |   const ext = path.extname(originalFilename);
 829 |   const baseName = path.basename(originalFilename, ext);
 830 |   const safeBaseName = baseName.replace(/[^a-zA-Z0-9\-_]/g, "_");
 831 |   const filename = `${imageIndex}_${safeBaseName}${ext || ".jpg"}`;
 832 | 
 833 |   const filePath = path.join(baseDir, filename);
 834 | 
 835 |   // ファイルに保存
 836 |   await fs.writeFile(filePath, imageBuffer);
 837 | 
 838 |   // リソースとして登録
 839 |   const resourceUri = `file://${filePath}`;
 840 |   const resourceName = `${safeBaseName}_${imageIndex}`;
 841 |   const description = `${originalFilename}${altText ? ` (${altText})` : ""} from ${sourceUrl}`;
 842 | 
 843 |   const resource: ImageResource = {
 844 |     uri: resourceUri,
 845 |     name: resourceName,
 846 |     description,
 847 |     mimeType: "image/jpeg",
 848 |     filePath,
 849 |   };
 850 | 
 851 |   imageResources.set(resourceUri, resource);
 852 | 
 853 |   // クライアントにリソース変更を通知
 854 |   await notifyResourcesChanged();
 855 | 
 856 |   return filePath;
 857 | }
 858 | 
 859 | async function checkRobotsTxt(
 860 |   url: string,
 861 |   userAgent: string
 862 | ): Promise<boolean> {
 863 |   const { protocol, host } = new URL(url);
 864 |   const robotsUrl = `${protocol}//${host}/robots.txt`;
 865 | 
 866 |   try {
 867 |     const { response } = await safeFollowFetch(
 868 |       robotsUrl,
 869 |       { headers: { "User-Agent": userAgent } },
 870 |       { timeoutMs: Math.min(FETCH_TIMEOUT_MS, 8000) }
 871 |     );
 872 |     if (!response.ok) {
 873 |       if (response.status === 401 || response.status === 403) {
 874 |         throw new Error(
 875 |           "Autonomous fetching not allowed based on robots.txt response"
 876 |         );
 877 |       }
 878 |       return true; // Allow if no robots.txt
 879 |     }
 880 | 
 881 |     const { text: robotsTxt } = await readTextLimited(response, 100_000);
 882 |     const robots = robotsParser(robotsUrl, robotsTxt);
 883 | 
 884 |     if (!robots.isAllowed(url, userAgent)) {
 885 |       throw new Error(
 886 |         "The site's robots.txt specifies that autonomous fetching is not allowed. " +
 887 |           "Try manually fetching the page using the fetch prompt."
 888 |       );
 889 |     }
 890 |     return true;
 891 |   } catch (error) {
 892 |     // ロボットテキストの取得に失敗した場合はアクセスを許可する
 893 |     if (error instanceof Error && error.message.includes("robots.txt")) {
 894 |       throw error;
 895 |     }
 896 |     return true;
 897 |   }
 898 | }
 899 | 
 900 | interface FetchResult {
 901 |   content: string;
 902 |   images: { data: string; mimeType: string; filePath?: string }[];
 903 |   remainingContent: number;
 904 |   remainingImages: number;
 905 |   title?: string;
 906 | }
 907 | 
 908 | async function fetchUrl(
 909 |   url: string,
 910 |   userAgent: string,
 911 |   forceRaw = false,
 912 |   options = {
 913 |     imageMaxCount: 3,
 914 |     imageMaxHeight: 4000,
 915 |     imageMaxWidth: 1000,
 916 |     imageQuality: 80,
 917 |     imageStartIndex: 0,
 918 |     startIndex: 0,
 919 |     maxLength: 20000,
 920 |     enableFetchImages: false,
 921 |     allowCrossOriginImages: true,
 922 |     saveImages: true,
 923 |     returnBase64: false,
 924 |   }
 925 | ): Promise<FetchResult> {
 926 |   const { response, finalUrl } = await safeFollowFetch(url, {
 927 |     headers: { "User-Agent": userAgent },
 928 |   });
 929 | 
 930 |   if (!response.ok) {
 931 |     throw new Error(`Failed to fetch ${url} - status code ${response.status}`);
 932 |   }
 933 | 
 934 |   const { text, contentType } = await readTextLimited(response, MAX_HTML_BYTES);
 935 |   const isHtml =
 936 |     text.toLowerCase().includes("<html") || contentType.includes("text/html");
 937 | 
 938 |   if (isHtml && !forceRaw) {
 939 |     const result = extractContentFromHtml(text, finalUrl);
 940 |     if (typeof result === "string") {
 941 |       return {
 942 |         content: result,
 943 |         images: [],
 944 |         remainingContent: 0,
 945 |         remainingImages: 0,
 946 |       };
 947 |     }
 948 | 
 949 |     const { markdown, images, title } = result;
 950 |     const processedImages = [];
 951 | 
 952 |     if (
 953 |       options.enableFetchImages &&
 954 |       options.imageMaxCount > 0 &&
 955 |       images.length > 0
 956 |     ) {
 957 |       try {
 958 |         const startIdx = options.imageStartIndex;
 959 |         const baseOrigin = new URL(finalUrl).origin;
 960 |         let fetchedImages = await fetchImages(
 961 |           images.slice(startIdx),
 962 |           baseOrigin,
 963 |           options.allowCrossOriginImages ?? false
 964 |         );
 965 |         fetchedImages = fetchedImages.slice(0, options.imageMaxCount);
 966 | 
 967 |         if (fetchedImages.length > 0) {
 968 |           const imageBuffers = fetchedImages.map((img) => img.data);
 969 | 
 970 |           // 個別画像の保存（新API: layoutがindividual/both かつ outputがfile/both の場合のみ）
 971 |           type Layout = undefined | "merged" | "individual" | "both";
 972 |           type Output = undefined | "base64" | "file" | "both";
 973 |           const layout = (options as { layout?: Layout }).layout;
 974 |           const output = (options as { output?: Output }).output;
 975 |           const legacyMode =
 976 |             (options as { output?: Output }).output === undefined &&
 977 |             (options as { layout?: Layout }).layout === undefined;
 978 |           const shouldSaveIndividual = legacyMode
 979 |             ? true // 互換性のため、レガシーでは常に保存
 980 |             : (layout === "individual" || layout === "both") &&
 981 |               (output === "file" || output === "both");
 982 | 
 983 |           if (shouldSaveIndividual) {
 984 |             for (let i = 0; i < fetchedImages.length; i++) {
 985 |               try {
 986 |                 const img = fetchedImages[i];
 987 |                 const optimizedIndividualImage = await sharp(img.data)
 988 |                   .jpeg({ quality: 80, mozjpeg: true })
 989 |                   .toBuffer();
 990 |                 await saveIndividualImageAndRegisterResource(
 991 |                   optimizedIndividualImage,
 992 |                   finalUrl,
 993 |                   startIdx + i,
 994 |                   img.alt,
 995 |                   img.filename || "image.jpg"
 996 |                 );
 997 |               } catch (error) {
 998 |                 console.warn(`Failed to save individual image ${i}:`, error);
 999 |               }
1000 |             }
1001 |           }
1002 | 
1003 |           const mergedImage = await mergeImagesVertically(
1004 |             imageBuffers,
1005 |             options.imageMaxWidth,
1006 |             options.imageMaxHeight,
1007 |             options.imageQuality
1008 |           );
1009 | 
1010 |           // Base64エンコード前に画像を最適化
1011 |           const optimizedImage = await sharp(mergedImage)
1012 |             .resize({
1013 |               width: Math.min(options.imageMaxWidth, 1200), // 最大幅を1200pxに制限
1014 |               height: Math.min(options.imageMaxHeight, 1600), // 最大高さを1600pxに制限
1015 |               fit: "inside",
1016 |               withoutEnlargement: true,
1017 |             })
1018 |             .jpeg({
1019 |               quality: Math.min(options.imageQuality, 85), // JPEG品質を制限
1020 |               mozjpeg: true,
1021 |               chromaSubsampling: "4:2:0", // クロマサブサンプリングを使用
1022 |             })
1023 |             .toBuffer();
1024 | 
1025 |           const base64Image = optimizedImage.toString("base64");
1026 | 
1027 |           // ファイル保存機能（新API: outputがfile/both の場合のみ）
1028 |           let filePath: string | undefined;
1029 |           const shouldSaveMerged = legacyMode
1030 |             ? options.saveImages
1031 |             : output === "file" || output === "both";
1032 |           if (shouldSaveMerged) {
1033 |             try {
1034 |               filePath = await saveImageToFile(
1035 |                 optimizedImage,
1036 |                 finalUrl,
1037 |                 options.imageStartIndex
1038 |               );
1039 |               if (serverConnected) {
1040 |                 console.error(`Image saved to: ${filePath}`);
1041 |               } else {
1042 |                 console.log(`Image saved to: ${filePath}`);
1043 |               }
1044 |             } catch (error) {
1045 |               console.warn("Failed to save image to file:", error);
1046 |             }
1047 |           }
1048 | 
1049 |           processedImages.push({
1050 |             data:
1051 |               (legacyMode && options.returnBase64) ||
1052 |               (!legacyMode && (output === "base64" || output === "both"))
1053 |                 ? base64Image
1054 |                 : "",
1055 |             mimeType: "image/jpeg", // MIMEタイプをJPEGに変更
1056 |             filePath,
1057 |           });
1058 |         }
1059 |       } catch (err) {
1060 |         console.error("Error processing images:", err);
1061 |       }
1062 |     }
1063 | 
1064 |     return {
1065 |       content: markdown,
1066 |       images: processedImages,
1067 |       remainingContent: text.length - (options.startIndex + options.maxLength),
1068 |       remainingImages: Math.max(
1069 |         0,
1070 |         images.length - (options.imageStartIndex + options.imageMaxCount)
1071 |       ),
1072 |       title,
1073 |     };
1074 |   }
1075 | 
1076 |   return {
1077 |     content: `Content type ${contentType} cannot be simplified to markdown, but here is the raw content:\n${text}`,
1078 |     images: [],
1079 |     remainingContent: 0,
1080 |     remainingImages: 0,
1081 |     title: undefined,
1082 |   };
1083 | }
1084 | 
1085 | // コマンドライン引数の解析
1086 | const args = process.argv.slice(2);
1087 | const IGNORE_ROBOTS_TXT = args.includes("--ignore-robots-txt");
1088 | 
1089 | // Server setup
1090 | const server = new Server(
1091 |   {
1092 |     name: "mcp-fetch",
1093 |     version: "1.6.2",
1094 |   },
1095 |   {
1096 |     capabilities: {
1097 |       tools: {},
1098 |       resources: {
1099 |         subscribe: true,
1100 |         listChanged: true,
1101 |       },
1102 |     },
1103 |   }
1104 | );
1105 | 
1106 | // Store server instance for notifications
1107 | serverInstance = server;
1108 | 
1109 | // コマンドライン引数の情報をログに出力
1110 | console.error(
1111 |   `Server started with options: ${IGNORE_ROBOTS_TXT ? "ignore-robots-txt" : "respect-robots-txt"}`
1112 | );
1113 | 
1114 | interface RequestHandlerExtra {
1115 |   signal: AbortSignal;
1116 | }
1117 | 
1118 | server.setRequestHandler(
1119 |   ListToolsSchema,
1120 |   async (_request: { method: "tools/list" }, _extra: RequestHandlerExtra) => {
1121 |     const tools = [
1122 |       {
1123 |         name: "imageFetch",
1124 |         description: `
1125 | 画像取得に強いMCPフェッチツール。記事本文をMarkdown化し、ページ内の画像を抽出・最適化して返します。
1126 | 
1127 | 新APIの既定（imagesを指定した場合）
1128 | - 画像: 取得してBASE64で返却（最大3枚を縦結合した1枚JPEG）
1129 | - 保存: しない（オプトイン）
1130 | - クロスオリジン: 許可（CDN想定）
1131 | 
1132 | パラメータ（新API）
1133 | - url: 取得先URL（必須）
1134 | - images: true | { output, layout, maxCount, startIndex, size, originPolicy, saveDir }
1135 |   - output: "base64" | "file" | "both"（既定: base64）
1136 |   - layout: "merged" | "individual" | "both"（既定: merged）
1137 |   - maxCount/startIndex（既定: 3 / 0）
1138 |   - size: { maxWidth, maxHeight, quality }（既定: 1000/1600/80）
1139 |   - originPolicy: "cross-origin" | "same-origin"（既定: cross-origin）
1140 | - text: { maxLength, startIndex, raw }（既定: 20000/0/false）
1141 | - security: { ignoreRobotsTxt }（既定: false）
1142 | 
1143 | 旧APIキー（enableFetchImages, returnBase64, saveImages, imageMax*, imageStartIndex 等）は後方互換のため引き続き受け付けます（非推奨）。
1144 | 
1145 | Examples（新API）
1146 | {
1147 |   "url": "https://example.com",
1148 |   "images": true
1149 | }
1150 | 
1151 | {
1152 |   "url": "https://example.com",
1153 |   "images": { "output": "both", "layout": "both", "maxCount": 4 }
1154 | }
1155 | 
1156 | Examples（旧API互換）
1157 | {
1158 |   "url": "https://example.com",
1159 |   "enableFetchImages": true,
1160 |   "returnBase64": true,
1161 |   "imageMaxCount": 2
1162 | }`,
1163 |         inputSchema: zodToJsonSchema(FetchArgsSchema),
1164 |       },
1165 |     ];
1166 |     return { tools };
1167 |   }
1168 | );
1169 | 
1170 | // MCPレスポンスの型定義
1171 | type MCPResponseContent =
1172 |   | { type: "text"; text: string }
1173 |   | { type: "image"; mimeType: string; data: string };
1174 | 
1175 | server.setRequestHandler(
1176 |   CallToolSchema,
1177 |   async (
1178 |     request: {
1179 |       method: "tools/call";
1180 |       params: { name: string; arguments?: Record<string, unknown> };
1181 |     },
1182 |     _extra: RequestHandlerExtra
1183 |   ) => {
1184 |     try {
1185 |       const { name, arguments: args } = request.params;
1186 | 
1187 |       if (name !== "imageFetch") {
1188 |         throw new Error(`Unknown tool: ${name}`);
1189 |       }
1190 | 
1191 |       const parsed = FetchArgsSchema.safeParse(args || {});
1192 |       if (!parsed.success) {
1193 |         throw new Error(`Invalid arguments: ${parsed.error}`);
1194 |       }
1195 | 
1196 |       const a = parsed.data as Record<string, unknown> & {
1197 |         url: string;
1198 |         images?: unknown;
1199 |         text?: { maxLength?: number; startIndex?: number; raw?: boolean };
1200 |         security?: { ignoreRobotsTxt?: boolean };
1201 |         // legacy fields (all optional)
1202 |         enableFetchImages?: boolean;
1203 |         saveImages?: boolean;
1204 |         returnBase64?: boolean;
1205 |         imageMaxWidth?: number;
1206 |         imageMaxHeight?: number;
1207 |         imageQuality?: number;
1208 |         imageStartIndex?: number;
1209 |         allowCrossOriginImages?: boolean;
1210 |         startIndex?: number;
1211 |         maxLength?: number;
1212 |         raw?: boolean;
1213 |         ignoreRobotsTxt?: boolean;
1214 |       };
1215 | 
1216 |       // Legacy mode detection: no new keys and/or legacy keys present
1217 |       const hasNewKeys =
1218 |         a.images !== undefined ||
1219 |         a.text !== undefined ||
1220 |         a.security !== undefined;
1221 |       const hasLegacyKeys =
1222 |         a.enableFetchImages !== undefined ||
1223 |         a.saveImages !== undefined ||
1224 |         a.returnBase64 !== undefined ||
1225 |         a.imageMaxWidth !== undefined ||
1226 |         a.imageMaxHeight !== undefined ||
1227 |         a.imageQuality !== undefined ||
1228 |         a.imageStartIndex !== undefined ||
1229 |         a.allowCrossOriginImages !== undefined ||
1230 |         a.startIndex !== undefined ||
1231 |         a.maxLength !== undefined ||
1232 |         a.raw !== undefined;
1233 | 
1234 |       const legacyMode =
1235 |         (!hasNewKeys && hasLegacyKeys) || (!hasNewKeys && !hasLegacyKeys);
1236 | 
1237 |       // Build fetch options with backward compatibility
1238 |       const fetchOptions: {
1239 |         imageMaxCount: number;
1240 |         imageMaxHeight: number;
1241 |         imageMaxWidth: number;
1242 |         imageQuality: number;
1243 |         imageStartIndex: number;
1244 |         startIndex: number;
1245 |         maxLength: number;
1246 |         enableFetchImages: boolean;
1247 |         allowCrossOriginImages: boolean;
1248 |         saveImages: boolean;
1249 |         returnBase64: boolean;
1250 |         raw?: boolean;
1251 |         output?: "base64" | "file" | "both";
1252 |         layout?: "merged" | "individual" | "both";
1253 |       } = {
1254 |         imageMaxCount: 3,
1255 |         imageMaxHeight: 4000,
1256 |         imageMaxWidth: 1000,
1257 |         imageQuality: 80,
1258 |         imageStartIndex: 0,
1259 |         startIndex: 0,
1260 |         maxLength: 20000,
1261 |         enableFetchImages: false,
1262 |         allowCrossOriginImages: true,
1263 |         saveImages: false,
1264 |         returnBase64: false,
1265 |         // new API additions (optional)
1266 |         output: undefined,
1267 |         layout: undefined,
1268 |       };
1269 | 
1270 |       if (legacyMode) {
1271 |         // Legacy defaults
1272 |         fetchOptions.startIndex =
1273 |           (a.startIndex as number | undefined) ?? fetchOptions.startIndex;
1274 |         fetchOptions.maxLength =
1275 |           (a.maxLength as number | undefined) ?? fetchOptions.maxLength;
1276 |         fetchOptions.raw = a.raw ?? false;
1277 |         fetchOptions.imageMaxCount =
1278 |           (a.imageMaxCount as number | undefined) ?? fetchOptions.imageMaxCount;
1279 |         fetchOptions.imageMaxHeight =
1280 |           (a.imageMaxHeight as number | undefined) ??
1281 |           fetchOptions.imageMaxHeight;
1282 |         fetchOptions.imageMaxWidth =
1283 |           (a.imageMaxWidth as number | undefined) ?? fetchOptions.imageMaxWidth;
1284 |         fetchOptions.imageQuality =
1285 |           (a.imageQuality as number | undefined) ?? fetchOptions.imageQuality;
1286 |         fetchOptions.imageStartIndex =
1287 |           (a.imageStartIndex as number | undefined) ??
1288 |           fetchOptions.imageStartIndex;
1289 |         fetchOptions.enableFetchImages = a.enableFetchImages ?? false;
1290 |         fetchOptions.allowCrossOriginImages = a.allowCrossOriginImages ?? true;
1291 |         fetchOptions.saveImages = a.saveImages ?? true; // keep previous default behavior
1292 |         fetchOptions.returnBase64 = a.returnBase64 ?? false;
1293 |         // In legacy mode we preserve prior implicit behavior: individual images saved when any saving occurs
1294 |         fetchOptions.output =
1295 |           fetchOptions.saveImages && fetchOptions.returnBase64
1296 |             ? "both"
1297 |             : fetchOptions.returnBase64
1298 |               ? "base64"
1299 |               : fetchOptions.saveImages
1300 |                 ? "file"
1301 |                 : undefined;
1302 |         fetchOptions.layout = "merged"; // merged remains primary; individual saving handled inside legacy path
1303 |       } else {
1304 |         // New API mode
1305 |         const imagesCfg = a.images;
1306 |         const textCfg = a.text || {};
1307 |         const securityCfg = a.security || {};
1308 | 
1309 |         fetchOptions.startIndex = textCfg.startIndex ?? fetchOptions.startIndex;
1310 |         fetchOptions.maxLength = textCfg.maxLength ?? fetchOptions.maxLength;
1311 |         fetchOptions.raw = textCfg.raw ?? false;
1312 | 
1313 |         // images: true | object | undefined (default true for new API?)
1314 |         const imagesEnabled =
1315 |           imagesCfg === undefined
1316 |             ? false
1317 |             : typeof imagesCfg === "boolean"
1318 |               ? imagesCfg
1319 |               : true;
1320 |         fetchOptions.enableFetchImages = imagesEnabled;
1321 | 
1322 |         if (imagesEnabled) {
1323 |           const cfg = (
1324 |             typeof imagesCfg === "object" && imagesCfg !== null
1325 |               ? (imagesCfg as any)
1326 |               : {}
1327 |           ) as {
1328 |             output?: "base64" | "file" | "both";
1329 |             layout?: "merged" | "individual" | "both";
1330 |             maxCount?: number;
1331 |             startIndex?: number;
1332 |             size?: { maxWidth?: number; maxHeight?: number; quality?: number };
1333 |             originPolicy?: "cross-origin" | "same-origin";
1334 |             saveDir?: string;
1335 |           };
1336 |           fetchOptions.imageMaxCount =
1337 |             cfg.maxCount ?? fetchOptions.imageMaxCount;
1338 |           fetchOptions.imageStartIndex =
1339 |             cfg.startIndex ?? fetchOptions.imageStartIndex;
1340 |           const size = cfg.size || {};
1341 |           fetchOptions.imageMaxWidth =
1342 |             size.maxWidth ?? fetchOptions.imageMaxWidth;
1343 |           fetchOptions.imageMaxHeight =
1344 |             size.maxHeight ?? fetchOptions.imageMaxHeight;
1345 |           fetchOptions.imageQuality = size.quality ?? fetchOptions.imageQuality;
1346 |           fetchOptions.allowCrossOriginImages =
1347 |             (cfg.originPolicy ?? "cross-origin") === "cross-origin";
1348 |           fetchOptions.saveImages =
1349 |             (cfg.output ?? "base64") === "file" ||
1350 |             (cfg.output ?? "base64") === "both";
1351 |           fetchOptions.returnBase64 =
1352 |             (cfg.output ?? "base64") === "base64" ||
1353 |             (cfg.output ?? "base64") === "both";
1354 |           fetchOptions.output = cfg.output ?? "base64";
1355 |           fetchOptions.layout = cfg.layout ?? "merged";
1356 |           // NOTE: saveDir (cfg.saveDir) is respected in save functions when implemented (future)
1357 |         }
1358 |         // security
1359 |         a.ignoreRobotsTxt = securityCfg.ignoreRobotsTxt ?? false;
1360 |       }
1361 | 
1362 |       // robots.txt respect unless ignored
1363 |       if (!a.ignoreRobotsTxt && !IGNORE_ROBOTS_TXT) {
1364 |         await checkRobotsTxt(a.url, DEFAULT_USER_AGENT_AUTONOMOUS);
1365 |       }
1366 | 
1367 |       const { content, images, remainingContent, remainingImages, title } =
1368 |         await fetchUrl(
1369 |           a.url,
1370 |           DEFAULT_USER_AGENT_AUTONOMOUS,
1371 |           fetchOptions.raw ?? false,
1372 |           fetchOptions
1373 |         );
1374 | 
1375 |       let finalContent = content.slice(
1376 |         fetchOptions.startIndex,
1377 |         fetchOptions.startIndex + fetchOptions.maxLength
1378 |       );
1379 | 
1380 |       // 残りの情報を追加
1381 |       const remainingInfo = [];
1382 |       if (remainingContent > 0) {
1383 |         remainingInfo.push(`${remainingContent} characters of text remaining`);
1384 |       }
1385 |       if (remainingImages > 0) {
1386 |         remainingInfo.push(
1387 |           `${remainingImages} more images available (${fetchOptions.imageStartIndex + images.length}/${fetchOptions.imageStartIndex + images.length + remainingImages} shown)`
1388 |         );
1389 |       }
1390 | 
1391 |       if (remainingInfo.length > 0) {
1392 |         finalContent += `\n\n<e>Content truncated. ${remainingInfo.join(", ")}. Call the imageFetch tool with start_index=${
1393 |           fetchOptions.startIndex + fetchOptions.maxLength
1394 |         } and/or imageStartIndex=${fetchOptions.imageStartIndex + images.length} to get more content.</e>`;
1395 |       }
1396 | 
1397 |       // MCP レスポンスの作成
1398 |       const responseContent: MCPResponseContent[] = [
1399 |         {
1400 |           type: "text",
1401 |           text: `Contents of ${parsed.data.url}${title ? `: ${title}` : ""}:\n${finalContent}`,
1402 |         },
1403 |       ];
1404 | 
1405 |       // 画像があれば追加（Base64データが存在する場合のみ）
1406 |       for (const image of images) {
1407 |         if (image.data) {
1408 |           responseContent.push({
1409 |             type: "image",
1410 |             mimeType: image.mimeType,
1411 |             data: image.data,
1412 |           });
1413 |         }
1414 |       }
1415 | 
1416 |       // 保存されたファイルの情報があれば追加
1417 |       const savedFiles = images.filter((img) => img.filePath);
1418 |       if (savedFiles.length > 0) {
1419 |         const fileInfoText = savedFiles
1420 |           .map((img, index) => `Image ${index + 1} saved to: ${img.filePath}`)
1421 |           .join("\n");
1422 | 
1423 |         responseContent.push({
1424 |           type: "text",
1425 |           text: `\n📁 Saved Images:\n${fileInfoText}`,
1426 |         });
1427 |       }
1428 | 
1429 |       return {
1430 |         content: responseContent,
1431 |       };
1432 |     } catch (error) {
1433 |       return {
1434 |         content: [
1435 |           {
1436 |             type: "text",
1437 |             text: `Error: ${error instanceof Error ? error.message : String(error)}`,
1438 |           },
1439 |         ],
1440 |         isError: true,
1441 |       };
1442 |     }
1443 |   }
1444 | );
1445 | 
1446 | // Resources handlers
1447 | const ListResourcesSchema = z.object({
1448 |   method: z.literal("resources/list"),
1449 | });
1450 | 
1451 | const ReadResourceSchema = z.object({
1452 |   method: z.literal("resources/read"),
1453 |   params: z.object({
1454 |     uri: z.string(),
1455 |   }),
1456 | });
1457 | 
1458 | server.setRequestHandler(
1459 |   ListResourcesSchema,
1460 |   async (_request: { method: "resources/list" }) => {
1461 |     const resources = Array.from(imageResources.values()).map((resource) => ({
1462 |       uri: resource.uri,
1463 |       name: resource.name,
1464 |       description: resource.description,
1465 |       mimeType: resource.mimeType,
1466 |     }));
1467 | 
1468 |     return {
1469 |       resources,
1470 |     };
1471 |   }
1472 | );
1473 | 
1474 | server.setRequestHandler(
1475 |   ReadResourceSchema,
1476 |   async (request: { method: "resources/read"; params: { uri: string } }) => {
1477 |     const resource = imageResources.get(request.params.uri);
1478 | 
1479 |     if (!resource) {
1480 |       throw new Error(`Resource not found: ${request.params.uri}`);
1481 |     }
1482 | 
1483 |     try {
1484 |       const fileData = await fs.readFile(resource.filePath);
1485 |       const base64Data = fileData.toString("base64");
1486 | 
1487 |       return {
1488 |         contents: [
1489 |           {
1490 |             uri: resource.uri,
1491 |             mimeType: resource.mimeType,
1492 |             blob: base64Data,
1493 |           },
1494 |         ],
1495 |       };
1496 |     } catch (error) {
1497 |       throw new Error(`Failed to read resource file: ${error}`);
1498 |     }
1499 |   }
1500 | );
1501 | 
1502 | // Start server
1503 | async function runServer() {
1504 |   // サーバー起動時に既存のファイルをリソースとして登録
1505 |   await scanAndRegisterExistingFiles();
1506 | 
1507 |   const transport = new StdioServerTransport();
1508 |   await server.connect(transport);
1509 |   serverConnected = true;
1510 | }
1511 | 
1512 | if (process.env.MCP_FETCH_DISABLE_SERVER !== "1") {
1513 |   runServer().catch((error) => {
1514 |     process.stderr.write(`Fatal error running server: ${error}\n`);
1515 |     process.exit(1);
1516 |   });
1517 | }
1518 | 
1519 | export { fetchUrl };
1520 | 


--------------------------------------------------------------------------------