├── .gitignore ├── README.md ├── package-lock.json ├── package.json ├── src └── index.ts └── tsconfig.json /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | build/ 3 | *.log 4 | .env* -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Website Downloader MCP Server 2 | 3 | This MCP server provides a tool to download entire websites using wget. It preserves the website structure and converts links to work locally. 4 | 5 | Google Workspace Server MCP server 6 | 7 | ## Prerequisites 8 | 9 | The server requires `wget` to be installed on your system. 10 | 11 | ### Installing wget 12 | 13 | #### macOS 14 | Using Homebrew: 15 | ```bash 16 | brew install wget 17 | ``` 18 | 19 | #### Linux (Debian/Ubuntu) 20 | ```bash 21 | sudo apt-get update 22 | sudo apt-get install wget 23 | ``` 24 | 25 | #### Linux (Red Hat/Fedora) 26 | ```bash 27 | sudo dnf install wget 28 | ``` 29 | 30 | #### Windows 31 | 1. Using [Chocolatey](https://chocolatey.org/): 32 | ```bash 33 | choco install wget 34 | ``` 35 | 36 | 2. Or download the binary from: https://eternallybored.org/misc/wget/ 37 | - Download the latest wget.exe 38 | - Place it in a directory that's in your PATH (e.g., C:\Windows\System32) 39 | 40 | ## Usage 41 | 42 | The server provides a tool called `download_website` with the following parameters: 43 | 44 | - `url` (required): The URL of the website to download 45 | - `outputPath` (optional): The directory where the website should be downloaded. Defaults to the current directory. 46 | - `depth` (optional): Maximum depth level for recursive downloading. Defaults to infinite. Set to 0 for just the specified page, 1 for direct links, etc. 47 | 48 | ### Example 49 | 50 | ```json 51 | { 52 | "url": "https://example.com", 53 | "outputPath": "/path/to/output", 54 | "depth": 2 // Optional: Download up to 2 levels deep 55 | } 56 | ``` 57 | 58 | ## Features 59 | 60 | The website downloader: 61 | - Downloads recursively with infinite depth 62 | - Includes all page requisites (CSS, images, etc.) 63 | - Converts links to work locally 64 | - Adds appropriate extensions to files 65 | - Restricts downloads to the same domain 66 | - Preserves the website structure 67 | 68 | ## Installation 69 | 70 | 1. Build the server: 71 | ```bash 72 | npm install 73 | npm run build 74 | ``` 75 | 76 | 2. Add to MCP settings: 77 | ```json 78 | { 79 | "mcpServers": { 80 | "website-downloader": { 81 | "command": "node", 82 | "args": ["/path/to/website-downloader/build/index.js"] 83 | } 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "website-downloader", 3 | "version": "0.1.0", 4 | "lockfileVersion": 3, 5 | "requires": true, 6 | "packages": { 7 | "": { 8 | "name": "website-downloader", 9 | "version": "0.1.0", 10 | "dependencies": { 11 | "@modelcontextprotocol/sdk": "^0.6.0" 12 | }, 13 | "bin": { 14 | "website-downloader": "build/index.js" 15 | }, 16 | "devDependencies": { 17 | "@types/node": "^20.17.10", 18 | "typescript": "^5.3.3" 19 | } 20 | }, 21 | "node_modules/@modelcontextprotocol/sdk": { 22 | "version": "0.6.0", 23 | "resolved": "https://registry.npmjs.org/@modelcontextprotocol/sdk/-/sdk-0.6.0.tgz", 24 | "integrity": "sha512-9rsDudGhDtMbvxohPoMMyAUOmEzQsOK+XFchh6gZGqo8sx9sBuZQs+CUttXqa8RZXKDaJRCN2tUtgGof7jRkkw==", 25 | "dependencies": { 26 | "content-type": "^1.0.5", 27 | "raw-body": "^3.0.0", 28 | "zod": "^3.23.8" 29 | } 30 | }, 31 | "node_modules/@types/node": { 32 | "version": "20.17.10", 33 | "resolved": "https://registry.npmjs.org/@types/node/-/node-20.17.10.tgz", 34 | "integrity": "sha512-/jrvh5h6NXhEauFFexRin69nA0uHJ5gwk4iDivp/DeoEua3uwCUto6PC86IpRITBOs4+6i2I56K5x5b6WYGXHA==", 35 | "dev": true, 36 | "dependencies": { 37 | "undici-types": "~6.19.2" 38 | } 39 | }, 40 | "node_modules/bytes": { 41 | "version": "3.1.2", 42 | "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz", 43 | "integrity": "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==", 44 | "engines": { 45 | "node": ">= 0.8" 46 | } 47 | }, 48 | "node_modules/content-type": { 49 | "version": "1.0.5", 50 | "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz", 51 | "integrity": "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==", 52 | "engines": { 53 | "node": ">= 0.6" 54 | } 55 | }, 56 | "node_modules/depd": { 57 | "version": "2.0.0", 58 | "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz", 59 | "integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==", 60 | "engines": { 61 | "node": ">= 0.8" 62 | } 63 | }, 64 | "node_modules/http-errors": { 65 | "version": "2.0.0", 66 | "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.0.tgz", 67 | "integrity": "sha512-FtwrG/euBzaEjYeRqOgly7G0qviiXoJWnvEH2Z1plBdXgbyjv34pHTSb9zoeHMyDy33+DWy5Wt9Wo+TURtOYSQ==", 68 | "dependencies": { 69 | "depd": "2.0.0", 70 | "inherits": "2.0.4", 71 | "setprototypeof": "1.2.0", 72 | "statuses": "2.0.1", 73 | "toidentifier": "1.0.1" 74 | }, 75 | "engines": { 76 | "node": ">= 0.8" 77 | } 78 | }, 79 | "node_modules/iconv-lite": { 80 | "version": "0.6.3", 81 | "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", 82 | "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", 83 | "dependencies": { 84 | "safer-buffer": ">= 2.1.2 < 3.0.0" 85 | }, 86 | "engines": { 87 | "node": ">=0.10.0" 88 | } 89 | }, 90 | "node_modules/inherits": { 91 | "version": "2.0.4", 92 | "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", 93 | "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" 94 | }, 95 | "node_modules/raw-body": { 96 | "version": "3.0.0", 97 | "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-3.0.0.tgz", 98 | "integrity": "sha512-RmkhL8CAyCRPXCE28MMH0z2PNWQBNk2Q09ZdxM9IOOXwxwZbN+qbWaatPkdkWIKL2ZVDImrN/pK5HTRz2PcS4g==", 99 | "dependencies": { 100 | "bytes": "3.1.2", 101 | "http-errors": "2.0.0", 102 | "iconv-lite": "0.6.3", 103 | "unpipe": "1.0.0" 104 | }, 105 | "engines": { 106 | "node": ">= 0.8" 107 | } 108 | }, 109 | "node_modules/safer-buffer": { 110 | "version": "2.1.2", 111 | "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", 112 | "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" 113 | }, 114 | "node_modules/setprototypeof": { 115 | "version": "1.2.0", 116 | "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz", 117 | "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==" 118 | }, 119 | "node_modules/statuses": { 120 | "version": "2.0.1", 121 | "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.1.tgz", 122 | "integrity": "sha512-RwNA9Z/7PrK06rYLIzFMlaF+l73iwpzsqRIFgbMLbTcLD6cOao82TaWefPXQvB2fOC4AjuYSEndS7N/mTCbkdQ==", 123 | "engines": { 124 | "node": ">= 0.8" 125 | } 126 | }, 127 | "node_modules/toidentifier": { 128 | "version": "1.0.1", 129 | "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz", 130 | "integrity": "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==", 131 | "engines": { 132 | "node": ">=0.6" 133 | } 134 | }, 135 | "node_modules/typescript": { 136 | "version": "5.7.2", 137 | "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.7.2.tgz", 138 | "integrity": "sha512-i5t66RHxDvVN40HfDd1PsEThGNnlMCMT3jMUuoh9/0TaqWevNontacunWyN02LA9/fIbEWlcHZcgTKb9QoaLfg==", 139 | "dev": true, 140 | "bin": { 141 | "tsc": "bin/tsc", 142 | "tsserver": "bin/tsserver" 143 | }, 144 | "engines": { 145 | "node": ">=14.17" 146 | } 147 | }, 148 | "node_modules/undici-types": { 149 | "version": "6.19.8", 150 | "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz", 151 | "integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==", 152 | "dev": true 153 | }, 154 | "node_modules/unpipe": { 155 | "version": "1.0.0", 156 | "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", 157 | "integrity": "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==", 158 | "engines": { 159 | "node": ">= 0.8" 160 | } 161 | }, 162 | "node_modules/zod": { 163 | "version": "3.24.1", 164 | "resolved": "https://registry.npmjs.org/zod/-/zod-3.24.1.tgz", 165 | "integrity": "sha512-muH7gBL9sI1nciMZV67X5fTKKBLtwpZ5VBp1vsOQzj1MhrBZ4wlVCm3gedKZWLp0Oyel8sIGfeiz54Su+OVT+A==", 166 | "funding": { 167 | "url": "https://github.com/sponsors/colinhacks" 168 | } 169 | } 170 | } 171 | } 172 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "website-downloader", 3 | "version": "0.1.0", 4 | "description": "Download entire websites.", 5 | "private": true, 6 | "type": "module", 7 | "bin": { 8 | "website-downloader": "./build/index.js" 9 | }, 10 | "files": [ 11 | "build" 12 | ], 13 | "scripts": { 14 | "build": "tsc && node -e \"require('fs').chmodSync('build/index.js', '755')\"", 15 | "prepare": "npm run build", 16 | "watch": "tsc --watch", 17 | "inspector": "npx @modelcontextprotocol/inspector build/index.js" 18 | }, 19 | "dependencies": { 20 | "@modelcontextprotocol/sdk": "^0.6.0" 21 | }, 22 | "devDependencies": { 23 | "@types/node": "^20.17.10", 24 | "typescript": "^5.3.3" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | import { Server } from '@modelcontextprotocol/sdk/server/index.js'; 3 | import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; 4 | import { 5 | CallToolRequestSchema, 6 | ErrorCode, 7 | ListToolsRequestSchema, 8 | McpError, 9 | } from '@modelcontextprotocol/sdk/types.js'; 10 | import { exec } from 'child_process'; 11 | import { promisify } from 'util'; 12 | import { existsSync } from 'fs'; 13 | 14 | const execAsync = promisify(exec); 15 | 16 | interface DownloadWebsiteArgs { 17 | url: string; 18 | outputPath?: string; 19 | depth?: number; 20 | } 21 | 22 | const isValidDownloadArgs = (args: any): args is DownloadWebsiteArgs => 23 | typeof args === 'object' && 24 | args !== null && 25 | typeof args.url === 'string' && 26 | (args.outputPath === undefined || typeof args.outputPath === 'string') && 27 | (args.depth === undefined || (typeof args.depth === 'number' && args.depth >= 0)); 28 | 29 | class WebsiteDownloaderServer { 30 | private server: Server; 31 | 32 | constructor() { 33 | this.server = new Server( 34 | { 35 | name: 'website-downloader', 36 | version: '0.1.0', 37 | }, 38 | { 39 | capabilities: { 40 | tools: {}, 41 | }, 42 | } 43 | ); 44 | 45 | this.setupToolHandlers(); 46 | 47 | this.server.onerror = (error) => console.error('[MCP Error]', error); 48 | process.on('SIGINT', async () => { 49 | await this.server.close(); 50 | process.exit(0); 51 | }); 52 | } 53 | 54 | private setupToolHandlers() { 55 | this.server.setRequestHandler(ListToolsRequestSchema, async () => ({ 56 | tools: [ 57 | { 58 | name: 'download_website', 59 | description: 'Download an entire website using wget', 60 | inputSchema: { 61 | type: 'object', 62 | properties: { 63 | url: { 64 | type: 'string', 65 | description: 'URL of the website to download', 66 | }, 67 | outputPath: { 68 | type: 'string', 69 | description: 'Path where the website should be downloaded (optional, defaults to current directory)', 70 | }, 71 | depth: { 72 | type: 'number', 73 | description: 'Maximum depth level for recursive downloading (optional, defaults to infinite)', 74 | minimum: 0 75 | } 76 | }, 77 | required: ['url'], 78 | }, 79 | }, 80 | ], 81 | })); 82 | 83 | this.server.setRequestHandler(CallToolRequestSchema, async (request) => { 84 | if (request.params.name !== 'download_website') { 85 | throw new McpError( 86 | ErrorCode.MethodNotFound, 87 | `Unknown tool: ${request.params.name}` 88 | ); 89 | } 90 | 91 | if (!isValidDownloadArgs(request.params.arguments)) { 92 | throw new McpError( 93 | ErrorCode.InvalidParams, 94 | 'Invalid download arguments' 95 | ); 96 | } 97 | 98 | const { url, outputPath = process.cwd(), depth } = request.params.arguments; 99 | 100 | try { 101 | // Check if wget is installed 102 | await execAsync('which wget'); 103 | } catch (error: any) { 104 | return { 105 | content: [ 106 | { 107 | type: 'text', 108 | text: `Error downloading website: ${error.message || 'Unknown error'}` 109 | }, 110 | ], 111 | isError: true, 112 | }; 113 | } 114 | 115 | try { 116 | // Create wget command with options for downloading website 117 | const wgetCommand = [ 118 | 'wget', 119 | '--recursive', // Download recursively 120 | `--level=${depth !== undefined ? depth : 'inf'}`, // Recursion depth (infinite if not specified) 121 | '--page-requisites', // Get all assets needed to display the page 122 | '--convert-links', // Convert links to work locally 123 | '--adjust-extension', // Add appropriate extensions to files 124 | '--span-hosts', // Include necessary resources from other hosts 125 | '--domains=' + new URL(url).hostname, // Restrict to same domain 126 | '--no-parent', // Don't follow links to parent directory 127 | '--directory-prefix=' + outputPath, // Output directory 128 | url 129 | ].join(' '); 130 | 131 | const { stdout, stderr } = await execAsync(wgetCommand); 132 | 133 | return { 134 | content: [ 135 | { 136 | type: 'text', 137 | text: `Website downloaded successfully to ${outputPath}\n\nOutput:\n${stdout}\n${stderr}`, 138 | }, 139 | ], 140 | }; 141 | } catch (error: any) { 142 | return { 143 | content: [ 144 | { 145 | type: 'text', 146 | text: `Error downloading website: ${error.message || 'Unknown error'}`, 147 | }, 148 | ], 149 | isError: true, 150 | }; 151 | } 152 | }); 153 | } 154 | 155 | async run() { 156 | const transport = new StdioServerTransport(); 157 | await this.server.connect(transport); 158 | console.error('Website Downloader MCP server running on stdio'); 159 | } 160 | } 161 | 162 | const server = new WebsiteDownloaderServer(); 163 | server.run().catch(console.error); 164 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2022", 4 | "module": "Node16", 5 | "moduleResolution": "Node16", 6 | "outDir": "./build", 7 | "rootDir": "./src", 8 | "strict": true, 9 | "esModuleInterop": true, 10 | "skipLibCheck": true, 11 | "forceConsistentCasingInFileNames": true 12 | }, 13 | "include": ["src/**/*"], 14 | "exclude": ["node_modules"] 15 | } 16 | --------------------------------------------------------------------------------