├── .gitignore ├── tsconfig.json ├── package.json ├── README.md └── src └── index.ts /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | build/ 3 | *.log 4 | .env* -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2022", 4 | "module": "Node16", 5 | "moduleResolution": "Node16", 6 | "outDir": "./build", 7 | "rootDir": "./src", 8 | "strict": true, 9 | "esModuleInterop": true, 10 | "skipLibCheck": true, 11 | "forceConsistentCasingInFileNames": true 12 | }, 13 | "include": ["src/**/*"], 14 | "exclude": ["node_modules"] 15 | } 16 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "website-downloader", 3 | "version": "0.1.0", 4 | "description": "Download entire websites.", 5 | "private": true, 6 | "type": "module", 7 | "bin": { 8 | "website-downloader": "./build/index.js" 9 | }, 10 | "files": [ 11 | "build" 12 | ], 13 | "scripts": { 14 | "build": "tsc && node -e \"require('fs').chmodSync('build/index.js', '755')\"", 15 | "prepare": "npm run build", 16 | "watch": "tsc --watch", 17 | "inspector": "npx @modelcontextprotocol/inspector build/index.js" 18 | }, 19 | "dependencies": { 20 | "@modelcontextprotocol/sdk": "^0.6.0" 21 | }, 22 | "devDependencies": { 23 | "@types/node": "^20.17.10", 24 | "typescript": "^5.3.3" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Website Downloader MCP Server 2 | 3 | This MCP server provides a tool to download entire websites using wget. It preserves the website structure and converts links to work locally. 4 | 5 | Google Workspace Server MCP server 6 | 7 | ## Prerequisites 8 | 9 | The server requires `wget` to be installed on your system. 10 | 11 | ### Installing wget 12 | 13 | #### macOS 14 | Using Homebrew: 15 | ```bash 16 | brew install wget 17 | ``` 18 | 19 | #### Linux (Debian/Ubuntu) 20 | ```bash 21 | sudo apt-get update 22 | sudo apt-get install wget 23 | ``` 24 | 25 | #### Linux (Red Hat/Fedora) 26 | ```bash 27 | sudo dnf install wget 28 | ``` 29 | 30 | #### Windows 31 | 1. Using [Chocolatey](https://chocolatey.org/): 32 | ```bash 33 | choco install wget 34 | ``` 35 | 36 | 2. Or download the binary from: https://eternallybored.org/misc/wget/ 37 | - Download the latest wget.exe 38 | - Place it in a directory that's in your PATH (e.g., C:\Windows\System32) 39 | 40 | ## Usage 41 | 42 | The server provides a tool called `download_website` with the following parameters: 43 | 44 | - `url` (required): The URL of the website to download 45 | - `outputPath` (optional): The directory where the website should be downloaded. Defaults to the current directory. 46 | - `depth` (optional): Maximum depth level for recursive downloading. Defaults to infinite. Set to 0 for just the specified page, 1 for direct links, etc. 47 | 48 | ### Example 49 | 50 | ```json 51 | { 52 | "url": "https://example.com", 53 | "outputPath": "/path/to/output", 54 | "depth": 2 // Optional: Download up to 2 levels deep 55 | } 56 | ``` 57 | 58 | ## Features 59 | 60 | The website downloader: 61 | - Downloads recursively with infinite depth 62 | - Includes all page requisites (CSS, images, etc.) 63 | - Converts links to work locally 64 | - Adds appropriate extensions to files 65 | - Restricts downloads to the same domain 66 | - Preserves the website structure 67 | 68 | ## Installation 69 | 70 | 1. Build the server: 71 | ```bash 72 | npm install 73 | npm run build 74 | ``` 75 | 76 | 2. Add to MCP settings: 77 | ```json 78 | { 79 | "mcpServers": { 80 | "website-downloader": { 81 | "command": "node", 82 | "args": ["/path/to/website-downloader/build/index.js"] 83 | } 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | import { Server } from '@modelcontextprotocol/sdk/server/index.js'; 3 | import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; 4 | import { 5 | CallToolRequestSchema, 6 | ErrorCode, 7 | ListToolsRequestSchema, 8 | McpError, 9 | } from '@modelcontextprotocol/sdk/types.js'; 10 | import { exec } from 'child_process'; 11 | import { promisify } from 'util'; 12 | import { existsSync } from 'fs'; 13 | 14 | const execAsync = promisify(exec); 15 | 16 | interface DownloadWebsiteArgs { 17 | url: string; 18 | outputPath?: string; 19 | depth?: number; 20 | } 21 | 22 | const isValidDownloadArgs = (args: any): args is DownloadWebsiteArgs => 23 | typeof args === 'object' && 24 | args !== null && 25 | typeof args.url === 'string' && 26 | (args.outputPath === undefined || typeof args.outputPath === 'string') && 27 | (args.depth === undefined || (typeof args.depth === 'number' && args.depth >= 0)); 28 | 29 | class WebsiteDownloaderServer { 30 | private server: Server; 31 | 32 | constructor() { 33 | this.server = new Server( 34 | { 35 | name: 'website-downloader', 36 | version: '0.1.0', 37 | }, 38 | { 39 | capabilities: { 40 | tools: {}, 41 | }, 42 | } 43 | ); 44 | 45 | this.setupToolHandlers(); 46 | 47 | this.server.onerror = (error) => console.error('[MCP Error]', error); 48 | process.on('SIGINT', async () => { 49 | await this.server.close(); 50 | process.exit(0); 51 | }); 52 | } 53 | 54 | private setupToolHandlers() { 55 | this.server.setRequestHandler(ListToolsRequestSchema, async () => ({ 56 | tools: [ 57 | { 58 | name: 'download_website', 59 | description: 'Download an entire website using wget', 60 | inputSchema: { 61 | type: 'object', 62 | properties: { 63 | url: { 64 | type: 'string', 65 | description: 'URL of the website to download', 66 | }, 67 | outputPath: { 68 | type: 'string', 69 | description: 'Path where the website should be downloaded (optional, defaults to current directory)', 70 | }, 71 | depth: { 72 | type: 'number', 73 | description: 'Maximum depth level for recursive downloading (optional, defaults to infinite)', 74 | minimum: 0 75 | } 76 | }, 77 | required: ['url'], 78 | }, 79 | }, 80 | ], 81 | })); 82 | 83 | this.server.setRequestHandler(CallToolRequestSchema, async (request) => { 84 | if (request.params.name !== 'download_website') { 85 | throw new McpError( 86 | ErrorCode.MethodNotFound, 87 | `Unknown tool: ${request.params.name}` 88 | ); 89 | } 90 | 91 | if (!isValidDownloadArgs(request.params.arguments)) { 92 | throw new McpError( 93 | ErrorCode.InvalidParams, 94 | 'Invalid download arguments' 95 | ); 96 | } 97 | 98 | const { url, outputPath = process.cwd(), depth } = request.params.arguments; 99 | 100 | try { 101 | // Check if wget is installed 102 | await execAsync('which wget'); 103 | } catch (error: any) { 104 | return { 105 | content: [ 106 | { 107 | type: 'text', 108 | text: `Error downloading website: ${error.message || 'Unknown error'}` 109 | }, 110 | ], 111 | isError: true, 112 | }; 113 | } 114 | 115 | try { 116 | // Create wget command with options for downloading website 117 | const wgetCommand = [ 118 | 'wget', 119 | '--recursive', // Download recursively 120 | `--level=${depth !== undefined ? depth : 'inf'}`, // Recursion depth (infinite if not specified) 121 | '--page-requisites', // Get all assets needed to display the page 122 | '--convert-links', // Convert links to work locally 123 | '--adjust-extension', // Add appropriate extensions to files 124 | '--span-hosts', // Include necessary resources from other hosts 125 | '--domains=' + new URL(url).hostname, // Restrict to same domain 126 | '--no-parent', // Don't follow links to parent directory 127 | '--directory-prefix=' + outputPath, // Output directory 128 | url 129 | ].join(' '); 130 | 131 | const { stdout, stderr } = await execAsync(wgetCommand); 132 | 133 | return { 134 | content: [ 135 | { 136 | type: 'text', 137 | text: `Website downloaded successfully to ${outputPath}\n\nOutput:\n${stdout}\n${stderr}`, 138 | }, 139 | ], 140 | }; 141 | } catch (error: any) { 142 | return { 143 | content: [ 144 | { 145 | type: 'text', 146 | text: `Error downloading website: ${error.message || 'Unknown error'}`, 147 | }, 148 | ], 149 | isError: true, 150 | }; 151 | } 152 | }); 153 | } 154 | 155 | async run() { 156 | const transport = new StdioServerTransport(); 157 | await this.server.connect(transport); 158 | console.error('Website Downloader MCP server running on stdio'); 159 | } 160 | } 161 | 162 | const server = new WebsiteDownloaderServer(); 163 | server.run().catch(console.error); 164 | --------------------------------------------------------------------------------