├── .gitignore ├── .npmignore ├── LICENSE ├── README.md ├── docs ├── google-search-spec.md └── publish-npm.md ├── jest.config.mjs ├── package-lock.json ├── package.json ├── src ├── index.ts ├── tools │ ├── fetch.ts │ └── search.ts ├── toolsImpl │ ├── searchTool │ │ └── index.ts │ └── webFetchTool │ │ ├── htmlParser.ts │ │ ├── index.ts │ │ ├── scriptGenerator.ts │ │ └── types.ts ├── types │ └── search.ts └── utils │ ├── osascript.ts │ └── url.ts ├── test ├── data │ └── wikipedia.home.html ├── integration │ ├── web-fetch.test.ts │ └── web-search-tool.test.ts └── unit │ ├── web-fetch.test.ts │ └── web-search-url.test.ts └── tsconfig.json /.gitignore: -------------------------------------------------------------------------------- 1 | # Dependencies 2 | node_modules/ 3 | .pnp/ 4 | .pnp.js 5 | 6 | # Production build 7 | dist/ 8 | build/ 9 | 10 | # TypeScript cache 11 | *.tsbuildinfo 12 | 13 | # Environment variables 14 | .env 15 | .env.local 16 | .env.development.local 17 | .env.test.local 18 | .env.production.local 19 | 20 | # Logs 21 | logs/ 22 | *.log 23 | npm-debug.log* 24 | yarn-debug.log* 25 | yarn-error.log* 26 | 27 | # Editor directories and files 28 | .idea/ 29 | .vscode/ 30 | *.swp 31 | *.swo 32 | .DS_Store 33 | *.sublime-workspace 34 | *.sublime-project 35 | 36 | # Testing 37 | coverage/ 38 | 39 | # Temporary files 40 | *.tmp 41 | *.temp 42 | 43 | # Debug 44 | .debug/ 45 | 46 | # OS generated files 47 | .DS_Store 48 | .DS_Store? 49 | ._* 50 | .Spotlight-V100 51 | .Trashes 52 | ehthumbs.db 53 | Thumbs.db 54 | .aider* 55 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | # Source 2 | src/ 3 | 4 | # Tests 5 | **/*.test.ts 6 | **/*.spec.ts 7 | coverage/ 8 | jest.config.js 9 | 10 | # Development configs 11 | .eslintrc 12 | .prettierrc 13 | tsconfig.json 14 | .editorconfig 15 | .git* 16 | 17 | # IDE 18 | .vscode/ 19 | .idea/ 20 | 21 | # Logs 22 | *.log 23 | npm-debug.log* 24 | 25 | # Dependencies 26 | node_modules/ 27 | 28 | # Misc 29 | .DS_Store 30 | *.env 31 | .env.* -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MCP Chrome Google Search Tool 2 | 3 | MCP tool for Google search and webpage content extraction using Chrome browser. Works with Claude to enable Google search and content fetching capabilities. 4 | 5 | ## Quick Installation 6 | 7 | 1. **Configure Claude Desktop** 8 | - Open Claude Desktop on Mac 9 | - Go to Claude > Settings > Developer > Edit Config 10 | - Add the following to your config file: 11 | ```json 12 | { 13 | "mcpServers": { 14 | "mcp-chrome-google-search": { 15 | "command": "npx", 16 | "args": [ 17 | "-y", 18 | "@cmann50/mcp-chrome-google-search" 19 | ] 20 | } 21 | } 22 | } 23 | ``` 24 | - Restart Claude Desktop 25 | 26 | 2. **First Time Setup** 27 | - **Grant Accessibility Permissions** 28 | - On first run, approve macOS accessibility permissions prompt 29 | - Navigate to: System Preferences > Security & Privacy > Privacy > Accessibility 30 | - Add and enable permissions for your terminal app 31 | 32 | - **Enable Chrome JavaScript from Apple Events** 33 | - Open Chrome 34 | - Navigate to: View > Developer > Allow JavaScript from Apple Events 35 | - One-time setup only 36 | 37 | Once configured, Claude will be able to perform Google searches and extract webpage content through Chrome when you make requests. 38 | 39 | ## Key Advantages 40 | 41 | - Free to search google 42 | - Opens and small windows and uses your chrome browser, so should not get blocked 43 | - Since it is using your Chrome window it can access authenticated content. Claude can just open the URL in your browser. 44 | 45 | ## Platform Support 46 | - ✅ macOS 47 | - ❌ Windows (not supported) 48 | - ❌ Linux (not supported) 49 | 50 | ## Requirements 51 | 1. macOS 52 | 2. Google Chrome 53 | 3. Node.js 20 or higher 54 | 55 | ## Alternative Installation Methods 56 | 57 | ### NPX Installation 58 | ```bash 59 | npx mcp-chrome-google-search 60 | ``` 61 | 62 | 63 | ### Custom Installation 64 | 1. Checkout from git 65 | 2. Run `npm run build` 66 | 3. Add to Claude config (use absolute path): 67 | ```json 68 | { 69 | "google-tools": { 70 | "command": "node", 71 | "args": [ 72 | "/your/checkout/path/mcp/mcp-chrome-google-search/dist/index.js" 73 | ] 74 | } 75 | } 76 | ``` 77 | 78 | ## Local development 79 | 80 | To test changes locally bump package.json version and run 81 | to put it in edit mode: 82 | ``` 83 | npm install -g . 84 | ``` 85 | Then just do `npm run build` and the files will go in dist where claude is monitoring 86 | 87 | Then press ctrl-R in claude desktop, no need to restart it 88 | 89 | ## Debugging 90 | 91 | ### Log Monitoring 92 | ```bash 93 | # Follow logs in real-time 94 | tail -n 20 -F ~/Library/Logs/Claude/mcp*.log 95 | ``` 96 | 97 | ### Dev Tools Access 98 | 1. Enable developer settings: 99 | ```bash 100 | echo '{"allowDevTools": true}' > ~/Library/Application\ Support/Claude/developer_settings.json 101 | ``` 102 | 2. Open DevTools: Command-Option-Shift-i in Claude desktop 103 | 3. Use ctrl-r in Claude desktop while tailing for better errors 104 | 105 | ## Troubleshooting 106 | 107 | ### Chrome JavaScript Error 108 | If you see: 109 | ``` 110 | execution error: Google Chrome got an error: Executing JavaScript through AppleScript 111 | is turned off. For more information: https://support.google.com/chrome/?p=applescript (12) 112 | ``` 113 | 114 | Solution: 115 | 1. Open Chrome 116 | 2. View > Developer > Allow JavaScript from Apple Events 117 | 118 | ### Accessibility Permission Issues 119 | If Chrome control fails: 120 | 1. Open System Preferences 121 | 2. Security & Privacy > Privacy > Accessibility 122 | 3. Ensure terminal app is listed and enabled 123 | 4. Use lock icon to make changes if needed 124 | 125 | ## Implementation Details 126 | 127 | - Uses AppleScript for Chrome control 128 | - Visible automation - Chrome windows will open/navigate 129 | - Each request opens a new Chrome tab 130 | - Close unused tabs periodically for optimal performance 131 | - Only use with trusted Claude instances (has Chrome control access) 132 | 133 | ## Support 134 | 135 | - Create GitHub issues for problems 136 | - Include macOS and Chrome version details 137 | 138 | ## License 139 | 140 | MIT License - see LICENSE file for details -------------------------------------------------------------------------------- /docs/google-search-spec.md: -------------------------------------------------------------------------------- 1 | # Google Search URL Specification 2 | 3 | ## Basic Search URLs 4 | 5 | ``` 6 | # Basic search 7 | https://www.google.com/search?q=news 8 | 9 | # Site-specific search 10 | https://www.google.com/search?q=site:apple.com+news 11 | 12 | # Site-specific search with time filter 13 | https://www.google.com/search?q=site:apple.com+news&tbs=qdr:d 14 | ``` 15 | 16 | ## Time Filter Parameters 17 | 18 | Relative time filters using `tbs=qdr:X`: 19 | ``` 20 | h : past hour 21 | d : past 24 hours 22 | w : past week 23 | m : past month 24 | y : past year 25 | ``` 26 | 27 | Custom date range using `tbs=cdr:1,cd_min:MM/DD/YYYY,cd_max:MM/DD/YYYY`: 28 | ``` 29 | Example: tbs=cdr:1,cd_min:12/1/2024,cd_max:12/31/2024 30 | ``` 31 | 32 | ## TypeScript Interface 33 | 34 | ```typescript 35 | interface SearchParams { 36 | query_text: string; // Plain text to search for (no Google operators) 37 | 38 | site?: string; // Optional site restriction (e.g. "apple.com") 39 | timeframe?: { 40 | type: 'relative'; // For qdr: filters 41 | period: 'h' | 'd' | 'w' | 'm' | 'y'; 42 | } | { 43 | type: 'custom'; // For custom date range 44 | startDate: Date; // Will be formatted as MM/DD/YYYY 45 | endDate: Date; 46 | }; 47 | } 48 | ``` 49 | 50 | Note: The query_text parameter and date portions in custom date ranges require URL encoding. -------------------------------------------------------------------------------- /docs/publish-npm.md: -------------------------------------------------------------------------------- 1 | # Publishing Guide 2 | 3 | ## NPM 4 | ```bash 5 | # Update version 6 | npm version patch|minor|major 7 | 8 | # Build and publish 9 | npm run build 10 | npm publish --access public 11 | ``` 12 | 13 | ## GitHub 14 | ```bash 15 | # Push changes 16 | git add . 17 | git commit -m "feat: description" 18 | git push origin main 19 | 20 | # Push tags 21 | git push --tags 22 | ``` 23 | 24 | Note: Ensure you're logged into npm (`npm login`) and GitHub before publishing. -------------------------------------------------------------------------------- /jest.config.mjs: -------------------------------------------------------------------------------- 1 | export default { 2 | preset: 'ts-jest', 3 | testEnvironment: 'node', 4 | moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'node'], 5 | testMatch: ['**/__tests__/**/*.[jt]s?(x)', '**/?(*.)+(spec|test).[tj]s?(x)'], 6 | extensionsToTreatAsEsm: ['.ts'], 7 | transform: { 8 | '^.+\\.tsx?$': ['ts-jest', { 9 | useESM: true, 10 | }] 11 | }, 12 | moduleNameMapper: { 13 | '^(\\.{1,2}/.*)\\.js$': '$1', 14 | } 15 | }; -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@cmann50/mcp-chrome-google-search", 3 | "version": "1.0.6", 4 | "type": "module", 5 | "bin": { 6 | "mcp-chrome-google-search": "./dist/index.js" 7 | }, 8 | "files": [ 9 | "dist" 10 | ], 11 | "main": "dist/index.js", 12 | "scripts": { 13 | "build": "tsc && chmod +x dist/*.js", 14 | "prepare": "npm run build", 15 | "start": "node dist/index.js", 16 | "test": "NODE_OPTIONS=--experimental-vm-modules jest --config jest.config.mjs", 17 | "test:watch": "NODE_OPTIONS=--experimental-vm-modules jest --config jest.config.mjs --watch", 18 | "test:coverage": "NODE_OPTIONS=--experimental-vm-modules jest --config jest.config.mjs --coverage" 19 | }, 20 | "dependencies": { 21 | "@modelcontextprotocol/sdk": "^1.0.0", 22 | "@types/cheerio": "^0.22.35", 23 | "cheerio": "^1.0.0", 24 | "zod": "^3.22.0" 25 | }, 26 | "devDependencies": { 27 | "@types/jest": "^29.5.14", 28 | "@types/node": "^20.0.0", 29 | "jest": "^29.7.0", 30 | "ts-jest": "^29.2.5", 31 | "typescript": "^5.0.0" 32 | }, 33 | "types": "./dist/index.d.ts", 34 | "description": "MCP tool for Google search and webpage content extraction using Chrome browser. Works with Claude to enable Google search and content fetching capabilities.", 35 | "repository": { 36 | "type": "git", 37 | "url": "git+https://github.com/cmann50/mcp-chrome-google-search.git" 38 | }, 39 | "keywords": [ 40 | "mcp", 41 | "model-context-protocol", 42 | "claude-desktop-tool", 43 | "google-search", 44 | "chrome", 45 | "web-search", 46 | "macos", 47 | "browser-automation" 48 | ], 49 | "author": "Chris Mann", 50 | "license": "MIT", 51 | "bugs": { 52 | "url": "https://github.com/cmann50/mcp-chrome-google-search/issues" 53 | }, 54 | "homepage": "https://github.com/cmann50/mcp-chrome-google-search#readme" 55 | } 56 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; 4 | import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; 5 | import { registerSearchTool } from './tools/search.js'; 6 | import { registerFetchTool } from './tools/fetch.js'; 7 | 8 | const server = new McpServer({ 9 | name: "mcp-chrome-google-search", 10 | version: "1.0.0", 11 | }); 12 | 13 | registerSearchTool(server); 14 | registerFetchTool(server); 15 | 16 | async function main() { 17 | const transport = new StdioServerTransport(); 18 | await server.connect(transport); 19 | console.error("MCP Chrome Google Search Server running on stdio"); 20 | } 21 | 22 | main().catch(error => { 23 | console.error("Fatal error:", error); 24 | process.exit(1); 25 | }); -------------------------------------------------------------------------------- /src/tools/fetch.ts: -------------------------------------------------------------------------------- 1 | import { z } from "zod"; 2 | import { getWebContent } from '../toolsImpl/webFetchTool/index.js'; 3 | import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; 4 | 5 | export function registerFetchTool(server: McpServer) { 6 | server.tool( 7 | "web_fetch", 8 | "Extract readable text content from a webpage using Chrome browser automation.\n\nKey Features:\n- Returns main content text and optionally links", 9 | { 10 | url: z.string().url() 11 | .describe("Webpage URL to fetch (must include http:// or https://)"), 12 | 13 | includeLinks: z.boolean().optional().default(false) 14 | .describe("Whether to include extracted links in the output") 15 | }, 16 | async ({ url, includeLinks }) => { 17 | try { 18 | const content = await getWebContent(url, { includeLinks }); 19 | if (!content) { 20 | return { 21 | content: [{ 22 | type: "text", 23 | text: "Failed to retrieve web content" 24 | }] 25 | }; 26 | } 27 | 28 | // Ensure the content is properly formatted and trimmed 29 | const formattedContent = content.trim(); 30 | 31 | return { 32 | content: [{ 33 | type: "text", 34 | text: formattedContent 35 | }] 36 | }; 37 | } catch (error) { 38 | const errorMessage = error instanceof Error ? error.message : String(error); 39 | return { 40 | content: [{ 41 | type: "text", 42 | text: `Content fetch failed - please try again: ${errorMessage}`.trim() 43 | }] 44 | }; 45 | } 46 | } 47 | ); 48 | } -------------------------------------------------------------------------------- /src/tools/search.ts: -------------------------------------------------------------------------------- 1 | import { z } from "zod"; 2 | import { performGoogleSearch } from '../toolsImpl/searchTool/index.js'; 3 | import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; 4 | 5 | export function registerSearchTool(server: McpServer) { 6 | server.tool( 7 | "web-search", 8 | "Search webpages and get a specific page of results (each page has ~10 results). Optionally filter by site and timeframe.", 9 | { 10 | query_text: z.string().min(1).describe("Plain text to search for (no Google operators plain text only - use other parameters for site/date filtering)"), 11 | site: z.string().optional().describe("Limit search to specific domain (e.g. 'github.com' or 'docs.python.org')"), 12 | timeframe: z.enum(['h', 'd', 'w', 'm', 'y']).optional().describe("Time range filter (h=hour, d=day, w=week, m=month, y=year)"), 13 | pageNumber: z.number().min(1).max(5).optional().default(1).describe( 14 | "Which page of results to fetch (1-5). Each page contains ~10 results" 15 | ) 16 | }, 17 | async ({ query_text, site, timeframe, pageNumber }) => { 18 | console.error(`Executing Google search for: ${query_text} (page ${pageNumber})`); 19 | try { 20 | const searchParams = { query_text, site, timeframe }; 21 | const results = await performGoogleSearch(searchParams, pageNumber); 22 | 23 | return { 24 | content: [{ 25 | type: "text" as const, 26 | text: results 27 | }] 28 | }; 29 | } catch (error) { 30 | return { 31 | content: [{ 32 | type: "text" as const, 33 | text: `Search failed - please try again: ${error instanceof Error ? error.message : String(error)}` 34 | }], 35 | isError: true 36 | }; 37 | } 38 | } 39 | ); 40 | } 41 | -------------------------------------------------------------------------------- /src/toolsImpl/searchTool/index.ts: -------------------------------------------------------------------------------- 1 | import { runOsascript } from '../../utils/osascript.js'; 2 | import { buildGoogleSearchUrl } from '../../utils/url.js'; 3 | import type { SearchParams, SearchResult } from '../../types/search.js'; 4 | import * as cheerio from 'cheerio'; 5 | 6 | function parseHtml(html: string): SearchResult[] { 7 | const $ = cheerio.load(html); 8 | const results: SearchResult[] = []; 9 | 10 | // Find all main result containers 11 | $('.g').each((_, resultDiv) => { 12 | // Look for the first link in this container 13 | const link = $(resultDiv).find('a').first(); 14 | const href = link.attr('href'); 15 | 16 | // Find the description - it's typically the last text block in the container 17 | const description = $(resultDiv).find('div[style*="-webkit-line-clamp"], div.VwiC3b, .aCOpRe').text(); 18 | 19 | if (href?.startsWith('http') && 20 | !href.includes('google.com') && 21 | description.trim().length > 0) { 22 | results.push({ 23 | url: href, 24 | description: description.trim() 25 | }); 26 | } 27 | }); 28 | 29 | return results; 30 | } 31 | 32 | async function fetchSearchPage(searchParams: SearchParams, pageNumber: number): Promise { 33 | // Convert 1-based page number to 0-based for URL 34 | const pageIndex = pageNumber - 1; 35 | const searchUrl = buildGoogleSearchUrl(searchParams, pageIndex); 36 | 37 | const script = ` 38 | tell application "Google Chrome" 39 | make new window with properties {bounds:{50, 50, 425, 717}} 40 | set newWindow to window 1 41 | 42 | tell newWindow 43 | set URL of active tab to "${searchUrl}" 44 | end tell 45 | 46 | -- Return focus to Claude 47 | tell application "Claude" to activate 48 | 49 | -- Wait for page to load 50 | tell active tab of newWindow 51 | repeat until (loading is false) 52 | delay 0.1 53 | end repeat 54 | end tell 55 | 56 | -- Get page content 57 | tell active tab of newWindow 58 | set pageContent to (execute javascript "document.documentElement.outerHTML;") 59 | end tell 60 | 61 | -- Close the window 62 | close newWindow 63 | end tell 64 | 65 | return pageContent 66 | `; 67 | 68 | const html = await runOsascript(script); 69 | return parseHtml(html); 70 | } 71 | 72 | export async function performGoogleSearch(searchParams: SearchParams, pages: number = 1): Promise { 73 | try { 74 | const allResults: SearchResult[] = []; 75 | 76 | // Fetch results from multiple pages 77 | for (let page = 1; page <= pages; page++) { 78 | const pageResults = await fetchSearchPage(searchParams, page); 79 | allResults.push(...pageResults); 80 | 81 | // Add a small delay between page fetches 82 | if (page < pages) { 83 | await new Promise(resolve => setTimeout(resolve, 1000)); 84 | } 85 | } 86 | 87 | return allResults.map(r => `${r.url}\n${r.description}`).join('\n\n'); 88 | } catch (error: unknown) { 89 | const errorMessage = error instanceof Error ? error.message : String(error); 90 | throw new Error(`Failed to perform Google search: ${errorMessage}`); 91 | } 92 | } -------------------------------------------------------------------------------- /src/toolsImpl/webFetchTool/htmlParser.ts: -------------------------------------------------------------------------------- 1 | import * as cheerio from 'cheerio'; 2 | import type { ParsedContent, Link } from './types.js'; 3 | 4 | export function parseHtml(htmlContent: string): ParsedContent { 5 | const $ = cheerio.load(htmlContent); 6 | 7 | // Only remove script and style elements 8 | $('script, style').remove(); 9 | 10 | // Get meaningful content from the body 11 | const mainContent: string[] = []; 12 | 13 | // Process main content areas 14 | $('body').find('*').each((_, elem) => { 15 | const $elem = $(elem); 16 | 17 | // Skip hidden elements 18 | if ($elem.css('display') === 'none' || $elem.css('visibility') === 'hidden') { 19 | return; 20 | } 21 | 22 | // Get direct text nodes only (not nested text) 23 | const directText = $elem.clone().children().remove().end().text().trim(); 24 | if (directText && directText.length > 0) { 25 | mainContent.push(directText); 26 | } 27 | }); 28 | 29 | // Extract links 30 | const links: Link[] = []; 31 | const seenUrls = new Set(); 32 | const seenTexts = new Set(); 33 | 34 | $('a[href]').each((_, elem) => { 35 | const $elem = $(elem); 36 | const url = $elem.attr('href')?.trim(); 37 | const text = $elem.text().trim(); 38 | 39 | if (!url || !text || seenUrls.has(url) || seenTexts.has(text)) return; 40 | 41 | // Skip javascript: and other non-http links 42 | if (!url.startsWith('javascript:') && 43 | !url.startsWith('tel:') && 44 | !url.startsWith('mailto:') && 45 | text.length > 2) { 46 | 47 | // Normalize URLs 48 | let finalUrl = url; 49 | if (url.startsWith('//')) { 50 | finalUrl = 'https:' + url; 51 | } else if (url.startsWith('/')) { 52 | // Handle relative URLs later when we have base URL 53 | finalUrl = url; 54 | } 55 | 56 | links.push({ text, url: finalUrl }); 57 | seenUrls.add(finalUrl); 58 | seenTexts.add(text); 59 | } 60 | }); 61 | 62 | // Clean and format the content 63 | const text = mainContent 64 | .filter(section => section.length > 0) 65 | .map(section => section 66 | .replace(/\\s+/g, ' ') // Normalize whitespace 67 | .trim()) 68 | .join('\\n') 69 | .replace(/\\n{3,}/g, '\\n\\n') // Max 2 newlines in a row 70 | .trim(); 71 | 72 | return { 73 | text: text || 'No content found on the page.', 74 | links: links.slice(0, 50) // Include more links 75 | }; 76 | } -------------------------------------------------------------------------------- /src/toolsImpl/webFetchTool/index.ts: -------------------------------------------------------------------------------- 1 | import { runOsascript } from '../../utils/osascript.js'; 2 | import { generateAppleScript } from './scriptGenerator.js'; 3 | import { parseHtml } from './htmlParser.js'; 4 | import type { WebContentOptions } from './types.js'; 5 | 6 | export async function getWebContent(url: string, options: WebContentOptions = {}): Promise { 7 | try { 8 | const script = generateAppleScript(url); 9 | const rawContent = await runOsascript(script); 10 | 11 | if (!rawContent) { 12 | throw new Error('No content received from page'); 13 | } 14 | 15 | const { text, links } = parseHtml(rawContent); 16 | 17 | if (!options.includeLinks || links.length === 0) { 18 | return text; 19 | } 20 | 21 | return `${text}\n\n=== Links ===\n${links.map(link => 22 | `${link.text} (${link.url})`).join('\n')}`; 23 | 24 | } catch (error: unknown) { 25 | throw new Error(`Failed to get web content: ${error instanceof Error ? error.message : String(error)}`); 26 | } 27 | } -------------------------------------------------------------------------------- /src/toolsImpl/webFetchTool/scriptGenerator.ts: -------------------------------------------------------------------------------- 1 | export function generateAppleScript(url: string): string { 2 | const script = ` 3 | tell application "Google Chrome" 4 | make new window with properties {bounds:{50, 50, 425, 717}} 5 | set newWindow to window 1 6 | 7 | tell newWindow 8 | set URL of active tab to "${url}" 9 | end tell 10 | 11 | -- Return focus to Claude 12 | tell application "Claude" to activate 13 | 14 | -- Wait for page to load 15 | tell active tab of newWindow 16 | repeat until (loading is false) 17 | delay 0.1 18 | end repeat 19 | end tell 20 | 21 | -- Get page content 22 | tell active tab of newWindow 23 | set pageContent to (execute javascript "document.documentElement.outerHTML;") 24 | end tell 25 | 26 | -- Close the window 27 | close newWindow 28 | end tell 29 | 30 | return pageContent 31 | `; 32 | return script; 33 | } -------------------------------------------------------------------------------- /src/toolsImpl/webFetchTool/types.ts: -------------------------------------------------------------------------------- 1 | export interface Link { 2 | text: string; 3 | url: string; 4 | } 5 | 6 | export interface ParsedContent { 7 | text: string; 8 | links: Link[]; 9 | } 10 | 11 | export interface WebContentOptions { 12 | includeLinks?: boolean; 13 | } -------------------------------------------------------------------------------- /src/types/search.ts: -------------------------------------------------------------------------------- 1 | export interface SearchParams { 2 | query_text: string; 3 | site?: string; 4 | timeframe?: 'h' | 'd' | 'w' | 'm' | 'y'; 5 | } 6 | 7 | export interface SearchResult { 8 | url: string; 9 | description: string; 10 | } -------------------------------------------------------------------------------- /src/utils/osascript.ts: -------------------------------------------------------------------------------- 1 | import { execFile } from 'node:child_process'; 2 | import { promisify } from 'node:util'; 3 | 4 | const execFileAsync = promisify(execFile); 5 | 6 | export async function runOsascript(script: string): Promise { 7 | // Set maxBuffer to 10MB (10 * 1024 * 1024) 8 | const { stdout } = await execFileAsync('osascript', ['-e', script], { 9 | maxBuffer: 10 * 1024 * 1024 10 | }); 11 | return stdout; 12 | } -------------------------------------------------------------------------------- /src/utils/url.ts: -------------------------------------------------------------------------------- 1 | import type { SearchParams } from '../types/search.js'; 2 | 3 | function formatDate(date: Date): string { 4 | return `${date.getMonth() + 1}/${date.getDate()}/${date.getFullYear()}`; 5 | } 6 | 7 | export function buildGoogleSearchUrl(params: SearchParams, pageNum: number = 0): string { 8 | // Build base query 9 | let searchQuery = params.query_text; 10 | 11 | // Add site restriction if specified 12 | if (params.site) { 13 | searchQuery = `site:${params.site} ${searchQuery}`; 14 | } 15 | 16 | // Base URL with encoded query and page number 17 | let url = `https://www.google.com/search?q=${encodeURIComponent(searchQuery)}&hl=en&start=${pageNum * 10}`; 18 | 19 | // Add time restriction if specified 20 | if (params.timeframe) { 21 | url += `&tbs=qdr:${params.timeframe}`; 22 | } 23 | 24 | return url; 25 | } -------------------------------------------------------------------------------- /test/data/wikipedia.home.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Wikipedia 6 | 7 | 10 | 11 | 12 | 13 | 14 | 17 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 |
30 | 39 | 111 | 208 | 641 |
642 |
643 | 837 | 870 | 871 | 872 | 877 | 878 | 879 | -------------------------------------------------------------------------------- /test/integration/web-fetch.test.ts: -------------------------------------------------------------------------------- 1 | import {getWebContent} from '../../src/toolsImpl/webFetchTool'; 2 | 3 | describe('web-fetch Tool', () => { 4 | it('should fetch and parse content from a URL', async () => { 5 | const url = 'https://apple.com'; 6 | 7 | try { 8 | console.log('Test: Calling getWebContent with URL:', url); 9 | const result = await getWebContent(url); 10 | console.log('Test: Received result type:', typeof result); 11 | console.log('Test: Result length:', result?.length); 12 | console.log('Test: Raw result:', result); 13 | 14 | // Check that we got content 15 | expect(result).toBeTruthy(); 16 | expect(result.length).toBeGreaterThan(0); 17 | 18 | // Basic content validation - apple.com should contain Apple-related content 19 | expect(result.toLowerCase()).toMatch(/apple|iphone|mac|ipad/); 20 | 21 | // Log the results for inspection 22 | console.log('Web Content Results:'); 23 | console.log('-----------------'); 24 | console.log(result); 25 | console.log('-----------------'); 26 | 27 | } catch (error: unknown) { 28 | const errorMessage = error instanceof Error ? error.message : String(error); 29 | throw new Error('Should not throw an error: ' + errorMessage); 30 | } 31 | }, 30000); 32 | 33 | it('should include links when requested', async () => { 34 | const url = 'https://apple.com'; 35 | 36 | const result = await getWebContent(url, {includeLinks: true}); 37 | 38 | // Check for links section and content 39 | expect(result).toBeTruthy(); 40 | expect(result.length).toBeGreaterThan(0); 41 | 42 | // If we have links, they should be properly formatted 43 | if (result.includes('=== Links ===')) { 44 | expect(result).toMatch(/\([^)]+\)/); 45 | } 46 | 47 | console.log('Content with Links:'); 48 | console.log('-----------------'); 49 | console.log(result); 50 | console.log('-----------------'); 51 | }, 30000); 52 | 53 | 54 | }); -------------------------------------------------------------------------------- /test/integration/web-search-tool.test.ts: -------------------------------------------------------------------------------- 1 | import { performGoogleSearch } from '../../src/toolsImpl/searchTool'; 2 | import type { SearchParams } from '../../src/types/search'; 3 | 4 | describe('web-search Tool', () => { 5 | it('should perform a basic search and return results', async () => { 6 | const searchParams: SearchParams = { 7 | query_text: 'integration test search' 8 | }; 9 | 10 | try { 11 | const result = await performGoogleSearch(searchParams, 1); 12 | expect(result.length).toBeGreaterThan(0); 13 | 14 | const blocks = result.split('\n\n'); 15 | expect(blocks.length).toBeGreaterThan(1); 16 | 17 | blocks.forEach((block: string) => { 18 | const [url, description] = block.split('\n'); 19 | expect(url).toMatch(/^https?:\/\/.+/); 20 | expect(description?.length).toBeGreaterThan(0); 21 | }); 22 | 23 | } catch (error: unknown) { 24 | const errorMessage = error instanceof Error ? error.message : String(error); 25 | throw new Error('Search should succeed: ' + errorMessage); 26 | } 27 | }, 30000); 28 | 29 | it('should handle site filtering', async () => { 30 | const searchParams: SearchParams = { 31 | query_text: 'documentation', 32 | site: 'nodejs.org' 33 | }; 34 | 35 | try { 36 | const result = await performGoogleSearch(searchParams, 1); 37 | expect(result.length).toBeGreaterThan(0); 38 | 39 | const blocks = result.split('\n\n'); 40 | blocks.forEach((block: string) => { 41 | const [url] = block.split('\n'); 42 | expect(url).toContain('nodejs.org'); 43 | }); 44 | 45 | } catch (error: unknown) { 46 | const errorMessage = error instanceof Error ? error.message : String(error); 47 | throw new Error('Search with site filter should succeed: ' + errorMessage); 48 | } 49 | }, 30000); 50 | 51 | it('should handle time filtering', async () => { 52 | const searchParams: SearchParams = { 53 | query_text: 'news', 54 | timeframe: 'd' 55 | }; 56 | 57 | try { 58 | const result = await performGoogleSearch(searchParams, 1); 59 | expect(result.length).toBeGreaterThan(0); 60 | 61 | const blocks = result.split('\n\n'); 62 | expect(blocks.length).toBeGreaterThan(1); 63 | 64 | } catch (error: unknown) { 65 | const errorMessage = error instanceof Error ? error.message : String(error); 66 | throw new Error('Search with time filter should succeed: ' + errorMessage); 67 | } 68 | }, 30000); 69 | 70 | it('should successfully fetch multiple pages of results', async () => { 71 | const searchParams: SearchParams = { 72 | query_text: 'latest news' 73 | }; 74 | 75 | try { 76 | // Test fetching 3 different pages 77 | for (let page = 1; page <= 3; page++) { 78 | const results = await performGoogleSearch(searchParams, page); 79 | 80 | // Basic validation that we got results 81 | expect(results.length).toBeGreaterThan(0); 82 | expect(results.split('\n\n').length).toBeGreaterThan(1); 83 | } 84 | } catch (error: unknown) { 85 | const errorMessage = error instanceof Error ? error.message : String(error); 86 | throw new Error('Failed to fetch multiple pages: ' + errorMessage); 87 | } 88 | }, 30000); 89 | }); -------------------------------------------------------------------------------- /test/unit/web-fetch.test.ts: -------------------------------------------------------------------------------- 1 | import { jest } from '@jest/globals'; 2 | import fs from 'fs'; 3 | import path from 'path'; 4 | import { registerFetchTool } from '../../src/tools/fetch'; 5 | import { getWebContent } from '../../src/toolsImpl/webFetchTool'; 6 | import { generateAppleScript } from '../../src/toolsImpl/webFetchTool/scriptGenerator'; 7 | import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; 8 | 9 | // Define the type for the tool callback 10 | type WebFetchCallback = (params: { url: string; includeLinks?: boolean }) => Promise<{ 11 | content: Array<{ type: string; text: string }>; 12 | }>; 13 | 14 | // Mock the script generator 15 | jest.mock('../../src/toolsImpl/webFetchTool/scriptGenerator'); 16 | 17 | // Mock utils/osascript 18 | jest.mock('../../src/utils/osascript', () => ({ 19 | runOsascript: jest.fn().mockImplementation(async () => { 20 | // Read the fixture file 21 | const fixturePath = path.join(__dirname, '../data/wikipedia.home.html'); 22 | return fs.readFileSync(fixturePath, 'utf-8'); 23 | }) 24 | })); 25 | 26 | describe('Web Fetch Tool', () => { 27 | const mockServer = { 28 | tool: jest.fn() 29 | }; 30 | 31 | beforeEach(() => { 32 | jest.clearAllMocks(); 33 | }); 34 | 35 | it('should fetch and process web content', async () => { 36 | // Register the tool 37 | registerFetchTool(mockServer as unknown as McpServer); 38 | 39 | // Get the callback function that was passed to mockServer.tool 40 | const toolCallback = mockServer.tool.mock.calls[0][3] as WebFetchCallback; 41 | 42 | // Call the callback with test parameters 43 | const result = await toolCallback({ 44 | url: 'https://wikipedia.org', 45 | includeLinks: true 46 | }); 47 | 48 | // Print the returned text for now 49 | console.log('Returned content:', result.content[0].text); 50 | 51 | // Basic assertions 52 | expect(result).toBeDefined(); 53 | expect(result.content).toBeInstanceOf(Array); 54 | expect(result.content[0].type).toBe('text'); 55 | expect(result.content[0].text).toBeTruthy(); 56 | }); 57 | 58 | // Add more test cases as needed 59 | }); 60 | -------------------------------------------------------------------------------- /test/unit/web-search-url.test.ts: -------------------------------------------------------------------------------- 1 | import { buildGoogleSearchUrl } from '../../src/utils/url'; 2 | import type { SearchParams } from '../../src/types/search'; 3 | 4 | describe('web-search URL Generation', () => { 5 | it('generates basic search URL', () => { 6 | const params: SearchParams = { 7 | query_text: 'plain search text' 8 | }; 9 | const url = buildGoogleSearchUrl(params); 10 | expect(url).toBe('https://www.google.com/search?q=plain%20search%20text&hl=en&start=0'); 11 | }); 12 | 13 | it('generates URL with site filter', () => { 14 | const params: SearchParams = { 15 | query_text: 'search text', 16 | site: 'example.com' 17 | }; 18 | const url = buildGoogleSearchUrl(params); 19 | expect(url).toBe('https://www.google.com/search?q=site%3Aexample.com%20search%20text&hl=en&start=0'); 20 | }); 21 | 22 | it('generates URLs with time filters', () => { 23 | const periods: Array<'h' | 'd' | 'w' | 'm' | 'y'> = ['h', 'd', 'w', 'm', 'y']; 24 | 25 | periods.forEach(period => { 26 | const params: SearchParams = { 27 | query_text: 'news', 28 | timeframe: period 29 | }; 30 | const url = buildGoogleSearchUrl(params); 31 | expect(url).toBe(`https://www.google.com/search?q=news&hl=en&start=0&tbs=qdr:${period}`); 32 | }); 33 | }); 34 | 35 | it('generates URLs for different pages', () => { 36 | const params: SearchParams = { 37 | query_text: 'test search' 38 | }; 39 | 40 | // Test zero-based page numbers (0 = first page, 1 = second page, etc) 41 | const testCases = [ 42 | { pageNum: 0, expected: 'https://www.google.com/search?q=test%20search&hl=en&start=0' }, 43 | { pageNum: 1, expected: 'https://www.google.com/search?q=test%20search&hl=en&start=10' }, 44 | { pageNum: 2, expected: 'https://www.google.com/search?q=test%20search&hl=en&start=20' } 45 | ]; 46 | 47 | testCases.forEach(({ pageNum, expected }) => { 48 | const url = buildGoogleSearchUrl(params, pageNum); 49 | expect(url).toBe(expected); 50 | }); 51 | }); 52 | 53 | it('combines site filter with time filter', () => { 54 | const params: SearchParams = { 55 | query_text: 'release notes', 56 | site: 'github.com', 57 | timeframe: 'm' 58 | }; 59 | const url = buildGoogleSearchUrl(params); 60 | expect(url).toBe('https://www.google.com/search?q=site%3Agithub.com%20release%20notes&hl=en&start=0&tbs=qdr:m'); 61 | }); 62 | }); -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "module": "ES2022", 5 | "moduleResolution": "node", 6 | "esModuleInterop": true, 7 | "outDir": "./dist", 8 | "rootDir": "./src", 9 | "strict": true 10 | }, 11 | "include": ["src/**/*"] 12 | } --------------------------------------------------------------------------------