├── public └── assets │ └── preview.png ├── docker-compose.yml ├── tsconfig.json ├── jest.config.js ├── .github └── FUNDING.yml ├── smithery.yaml ├── src ├── tests │ ├── docker-test.js │ ├── basic.test.ts │ └── browser-error.test.ts ├── handlers.ts ├── index.ts ├── tools.ts └── executor.ts ├── Dockerfile ├── tests ├── basic.test.ts └── browser-error.test.ts ├── package.json ├── SECURITY.md ├── .gitignore ├── LICENSE └── README.md /public/assets/preview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imprvhub/mcp-browser-agent/HEAD/public/assets/preview.png -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | mcp-browser-agent: 4 | build: 5 | context: . 6 | dockerfile: Dockerfile 7 | volumes: 8 | - /tmp:/tmp # For file sharing if needed 9 | environment: 10 | - MCP_BROWSER_TYPE=chromium # Use chromium by default 11 | - MCP_VIEWPORT_WIDTH=1280 12 | - MCP_VIEWPORT_HEIGHT=800 13 | - MCP_DEVICE_SCALE_FACTOR=1.25 14 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2022", 4 | "module": "NodeNext", 5 | "moduleResolution": "NodeNext", 6 | "esModuleInterop": true, 7 | "forceConsistentCasingInFileNames": true, 8 | "strict": true, 9 | "skipLibCheck": true, 10 | "isolatedModules": true, 11 | "rootDir": "src", 12 | "outDir": "dist", 13 | "declaration": true 14 | }, 15 | "include": ["src/**/*", "src/tests/**/*"], 16 | "exclude": ["node_modules", "dist"] 17 | } -------------------------------------------------------------------------------- /jest.config.js: -------------------------------------------------------------------------------- 1 | export default { 2 | preset: 'ts-jest', 3 | testEnvironment: 'node', 4 | extensionsToTreatAsEsm: ['.ts'], 5 | testMatch: ['**/tests/**/*.test.ts'], 6 | clearMocks: true, 7 | resetMocks: true, 8 | moduleNameMapper: { 9 | '^(\\.{1,2}/.*)\\.js$': '$1', 10 | }, 11 | transformIgnorePatterns: ['/node_modules/(?!(@modelcontextprotocol)/)'], 12 | transform: { 13 | '^.+\\.tsx?$': [ 14 | 'ts-jest', 15 | { 16 | useESM: true, 17 | }, 18 | ], 19 | }, 20 | } 21 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: imprvhub 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 12 | polar: # Replace with a single Polar username 13 | buy_me_a_coffee: ivanlunadev 14 | thanks_dev: # Replace with a single thanks.dev username 15 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 16 | -------------------------------------------------------------------------------- /smithery.yaml: -------------------------------------------------------------------------------- 1 | name: mcp-browser-agent 2 | displayName: Browser Agent MCP 3 | description: A Model Context Protocol (MCP) integration that provides Claude Desktop with autonomous browser automation capabilities. 4 | visibility: public 5 | type: mcp 6 | author: 7 | name: Iván Luna 8 | url: https://github.com/imprvhub 9 | repository: https://github.com/imprvhub/mcp-browser-agent 10 | keywords: 11 | - browser 12 | - agent 13 | - orchestration 14 | - automation 15 | files: 16 | - README.md 17 | - package.json 18 | - tsconfig.json 19 | - Dockerfile 20 | - src/index.ts 21 | - src/tools.ts 22 | - src/handlers.ts 23 | - src/executor.ts 24 | startCommand: 25 | type: stdio 26 | configSchema: 27 | type: object 28 | properties: {} 29 | commandFunction: |- 30 | (config) => ({ 31 | command: 'node', 32 | args: ['build/index.js'], 33 | env: {} 34 | }) 35 | exampleConfig: {} -------------------------------------------------------------------------------- /src/tests/docker-test.js: -------------------------------------------------------------------------------- 1 | const { chromium } = require('playwright'); 2 | 3 | async function test() { 4 | console.log('Starting browser test in Docker environment...'); 5 | 6 | const isDocker = require('fs').existsSync('/.dockerenv') || 7 | (require('fs').existsSync('/proc/1/cgroup') && 8 | require('fs').readFileSync('/proc/1/cgroup', 'utf8').includes('docker')); 9 | 10 | console.log(`Running in Docker environment: ${isDocker}`); 11 | 12 | try { 13 | const browser = await chromium.launch({ headless: true }); 14 | console.log('Browser launched successfully'); 15 | const context = await browser.newContext(); 16 | const page = await context.newPage(); 17 | console.log('Browser page created successfully'); 18 | await page.goto('https://example.com'); 19 | console.log('Navigation successful'); 20 | await page.screenshot({ path: '/tmp/example.png' }); 21 | console.log('Screenshot saved to /tmp/example.png'); 22 | const title = await page.title(); 23 | console.log(`Page title: ${title}`); 24 | await browser.close(); 25 | console.log('Browser closed successfully'); 26 | console.log('Test completed successfully!'); 27 | } catch (error) { 28 | console.error('Test failed:', error); 29 | } 30 | } 31 | 32 | test(); 33 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Generated by https://smithery.ai. See: https://smithery.ai/docs/config#dockerfile 2 | # Builder stage 3 | FROM node:lts AS builder 4 | WORKDIR /app 5 | 6 | # Copy project files 7 | COPY package.json tsconfig.json ./ 8 | COPY src ./src 9 | 10 | # Install dependencies and build TypeScript 11 | RUN npm install && \ 12 | npm run build 13 | 14 | # Final image 15 | FROM node:lts 16 | WORKDIR /app 17 | 18 | COPY --from=builder /app/dist ./dist 19 | COPY --from=builder /app/node_modules ./node_modules 20 | COPY package.json ./ 21 | 22 | # Install Playwright browser dependencies 23 | RUN apt-get update && apt-get install -y --no-install-recommends \ 24 | libglib2.0-0 \ 25 | libnss3 \ 26 | libnspr4 \ 27 | libatk1.0-0 \ 28 | libatk-bridge2.0-0 \ 29 | libcups2 \ 30 | libdrm2 \ 31 | libdbus-1-3 \ 32 | libxcb1 \ 33 | libxkbcommon0 \ 34 | libx11-6 \ 35 | libxcomposite1 \ 36 | libxdamage1 \ 37 | libxext6 \ 38 | libxfixes3 \ 39 | libxrandr2 \ 40 | libgbm1 \ 41 | libpango-1.0-0 \ 42 | libcairo2 \ 43 | libasound2 \ 44 | libatspi2.0-0 \ 45 | && rm -rf /var/lib/apt/lists/* 46 | 47 | # Install Playwright browsers 48 | RUN npx playwright install chromium firefox webkit --with-deps 49 | 50 | # Expose stdio (no ports) and run the MCP server 51 | CMD ["node", "dist/index.js"] -------------------------------------------------------------------------------- /tests/basic.test.ts: -------------------------------------------------------------------------------- 1 | import { test, expect, describe, jest } from '@jest/globals'; 2 | import fs from 'node:fs'; 3 | import path from 'node:path'; 4 | import { fileURLToPath } from 'node:url'; 5 | 6 | const __filename = fileURLToPath(import.meta.url); 7 | const __dirname = path.dirname(__filename); 8 | const rootDir = path.resolve(__dirname, '..'); 9 | 10 | describe('MCP Browser Agent - Basic Tests', () => { 11 | test('Package should be properly configured', async () => { 12 | const packageJsonPath = path.join(rootDir, 'package.json'); 13 | const packageJsonContent = await fs.promises.readFile(packageJsonPath, 'utf8'); 14 | const packageJson = JSON.parse(packageJsonContent); 15 | 16 | expect(packageJson.name).toBe('mcp-browser-agent'); 17 | expect(packageJson.type).toBe('module'); 18 | expect(typeof packageJson.scripts.test).toBe('string'); 19 | }); 20 | 21 | test('Project should have required files', async () => { 22 | expect(fs.existsSync(path.join(rootDir, 'src/index.ts'))).toBeTruthy(); 23 | expect(fs.existsSync(path.join(rootDir, 'src/executor.ts'))).toBeTruthy(); 24 | expect(fs.existsSync(path.join(rootDir, 'src/tools.ts'))).toBeTruthy(); 25 | expect(fs.existsSync(path.join(rootDir, 'src/handlers.ts'))).toBeTruthy(); 26 | }); 27 | 28 | test('Tools module should exist', async () => { 29 | const toolsPath = path.join(rootDir, 'src/tools.ts'); 30 | expect(fs.existsSync(toolsPath)).toBeTruthy(); 31 | 32 | const toolsContent = await fs.promises.readFile(toolsPath, 'utf8'); 33 | expect(toolsContent).toContain('BROWSER_TOOLS'); 34 | expect(toolsContent).toContain('browser_navigate'); 35 | expect(toolsContent).toContain('browser_screenshot'); 36 | expect(toolsContent).toContain('API_TOOLS'); 37 | expect(toolsContent).toContain('api_get'); 38 | expect(toolsContent).toContain('api_post'); 39 | }); 40 | }); 41 | -------------------------------------------------------------------------------- /src/tests/basic.test.ts: -------------------------------------------------------------------------------- 1 | import { test, expect, describe, jest } from '@jest/globals'; 2 | import fs from 'node:fs'; 3 | import path from 'node:path'; 4 | import { fileURLToPath } from 'node:url'; 5 | 6 | const __filename = fileURLToPath(import.meta.url); 7 | const __dirname = path.dirname(__filename); 8 | const rootDir = path.resolve(__dirname, '..'); 9 | 10 | describe('MCP Browser Agent - Basic Tests', () => { 11 | test('Package should be properly configured', async () => { 12 | const packageJsonPath = path.join(rootDir, 'package.json'); 13 | const packageJsonContent = await fs.promises.readFile(packageJsonPath, 'utf8'); 14 | const packageJson = JSON.parse(packageJsonContent); 15 | 16 | expect(packageJson.name).toBe('mcp-browser-agent'); 17 | expect(packageJson.type).toBe('module'); 18 | expect(typeof packageJson.scripts.test).toBe('string'); 19 | }); 20 | 21 | test('Project should have required files', async () => { 22 | expect(fs.existsSync(path.join(rootDir, 'src/index.ts'))).toBeTruthy(); 23 | expect(fs.existsSync(path.join(rootDir, 'src/executor.ts'))).toBeTruthy(); 24 | expect(fs.existsSync(path.join(rootDir, 'src/tools.ts'))).toBeTruthy(); 25 | expect(fs.existsSync(path.join(rootDir, 'src/handlers.ts'))).toBeTruthy(); 26 | }); 27 | 28 | test('Tools module should exist', async () => { 29 | const toolsPath = path.join(rootDir, 'src/tools.ts'); 30 | expect(fs.existsSync(toolsPath)).toBeTruthy(); 31 | 32 | const toolsContent = await fs.promises.readFile(toolsPath, 'utf8'); 33 | expect(toolsContent).toContain('BROWSER_TOOLS'); 34 | expect(toolsContent).toContain('browser_navigate'); 35 | expect(toolsContent).toContain('browser_screenshot'); 36 | expect(toolsContent).toContain('API_TOOLS'); 37 | expect(toolsContent).toContain('api_get'); 38 | expect(toolsContent).toContain('api_post'); 39 | }); 40 | }); 41 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mcp-browser-agent", 3 | "version": "0.8.0", 4 | "author":{ 5 | "name": "Iván Luna", 6 | "email": "contact@ivanluna.dev", 7 | "url": "https://github.com/imprvhub" 8 | }, 9 | "description": "A Model Context Protocol (MCP) integration that provides Claude Desktop with autonomous browser automation capabilities.", 10 | "type": "module", 11 | "bin": { 12 | "mcp-browser-agent": "dist/index.js" 13 | }, 14 | "files": [ 15 | "dist" 16 | ], 17 | "scripts": { 18 | "build": "tsc && shx chmod +x dist/*.js", 19 | "start": "npm run build && node dist/index.js", 20 | "start:chrome": "npm run build && node dist/index.js --browser chrome", 21 | "start:firefox": "npm run build && node dist/index.js --browser firefox", 22 | "start:webkit": "npm run build && node dist/index.js --browser webkit", 23 | "start:edge": "npm run build && node dist/index.js --browser edge", 24 | "watch": "tsc --watch", 25 | "test": "cross-env NODE_OPTIONS=--experimental-vm-modules jest", 26 | "test:watch": "cross-env NODE_OPTIONS=--experimental-vm-modules jest --watch", 27 | "test:coverage": "cross-env NODE_OPTIONS=--experimental-vm-modules jest --coverage" 28 | }, 29 | "dependencies": { 30 | "@modelcontextprotocol/sdk": "1.24.0", 31 | "@playwright/browser-chromium": "1.49.1", 32 | "@playwright/browser-firefox": "1.49.1", 33 | "@playwright/browser-webkit": "1.49.1", 34 | "playwright": "1.55.1" 35 | }, 36 | "devDependencies": { 37 | "@types/jest": "^29.5.14", 38 | "@types/node": "^20.10.5", 39 | "@types/sinon": "^17.0.4", 40 | "cross-env": "^7.0.3", 41 | "jest": "^29.7.0", 42 | "shx": "^0.3.4", 43 | "sinon": "^17.0.1", 44 | "ts-jest": "^29.3.2", 45 | "typescript": "^5.6.2" 46 | }, 47 | "keywords": [ 48 | "playwright", 49 | "browser-automation", 50 | "api-client", 51 | "model-context-protocol", 52 | "mcp" 53 | ], 54 | "license": "Mozilla Public License Version 2.0" 55 | } 56 | -------------------------------------------------------------------------------- /src/handlers.ts: -------------------------------------------------------------------------------- 1 | import { Server } from "@modelcontextprotocol/sdk/server/index.js"; 2 | import { 3 | ListResourcesRequestSchema, 4 | ReadResourceRequestSchema, 5 | ListToolsRequestSchema, 6 | CallToolRequestSchema, 7 | Tool 8 | } from "@modelcontextprotocol/sdk/types.js"; 9 | import { executeToolCall, getBrowserLogs, getScreenshotRegistry } from "./executor.js"; 10 | 11 | export function setupHandlers(server: Server, tools: Tool[]) { 12 | server.setRequestHandler(ListResourcesRequestSchema, async () => { 13 | const resources = [ 14 | { 15 | uri: "browser://logs", 16 | mimeType: "text/plain", 17 | name: "Browser console logs", 18 | } 19 | ]; 20 | 21 | const screenshots = getScreenshotRegistry(); 22 | for (const name of screenshots.keys()) { 23 | resources.push({ 24 | uri: `screenshot://${name}`, 25 | mimeType: "image/png", 26 | name: `Screenshot: ${name}`, 27 | }); 28 | } 29 | return { resources }; 30 | }); 31 | 32 | server.setRequestHandler(ReadResourceRequestSchema, async (request) => { 33 | const uri = request.params.uri.toString(); 34 | if (uri === "browser://logs") { 35 | return { 36 | contents: [{ 37 | uri, 38 | mimeType: "text/plain", 39 | text: getBrowserLogs().join("\n"), 40 | }], 41 | }; 42 | } 43 | 44 | if (uri.startsWith("screenshot://")) { 45 | const name = uri.split("://")[1]; 46 | const screenshot = getScreenshotRegistry().get(name); 47 | if (screenshot) { 48 | return { 49 | contents: [{ 50 | uri, 51 | mimeType: "image/png", 52 | blob: screenshot, 53 | }], 54 | }; 55 | } 56 | } 57 | 58 | throw new Error(`Resource not found: ${uri}`); 59 | }); 60 | 61 | server.setRequestHandler(ListToolsRequestSchema, async () => ({ 62 | tools: tools, 63 | })); 64 | 65 | server.setRequestHandler(CallToolRequestSchema, async (request) => { 66 | return executeToolCall( 67 | request.params.name, 68 | request.params.arguments ?? {}, 69 | server 70 | ); 71 | }); 72 | } -------------------------------------------------------------------------------- /tests/browser-error.test.ts: -------------------------------------------------------------------------------- 1 | import { test, expect, describe, jest, beforeAll } from '@jest/globals'; 2 | import fs from 'node:fs'; 3 | import path from 'node:path'; 4 | import { fileURLToPath } from 'node:url'; 5 | 6 | const __filename = fileURLToPath(import.meta.url); 7 | const __dirname = path.dirname(__filename); 8 | const rootDir = path.resolve(__dirname, '..'); 9 | const processEvents: Record void>> = { 10 | 'SIGINT': [], 11 | 'SIGTERM': [] 12 | }; 13 | 14 | let cleanupFn: boolean | undefined; 15 | 16 | beforeAll(() => { 17 | const executorPath = path.join(rootDir, 'src/executor.ts'); 18 | const executorContent = fs.readFileSync(executorPath, 'utf8'); 19 | if (executorContent.includes('process.on(\'SIGINT\'')) { 20 | processEvents['SIGINT'] = [() => {}]; 21 | } 22 | 23 | if (executorContent.includes('process.on(\'SIGTERM\'')) { 24 | processEvents['SIGTERM'] = [() => {}]; 25 | } 26 | 27 | if (executorContent.includes('cleanupBrowser')) { 28 | cleanupFn = true; 29 | } 30 | }); 31 | 32 | describe('Browser Error Handling Tests', () => { 33 | test('Executor should contain process cleanup handlers', async () => { 34 | const executorPath = path.join(rootDir, 'src/executor.ts'); 35 | const executorContent = fs.readFileSync(executorPath, 'utf8'); 36 | expect(executorContent.includes('process.on(\'SIGINT\'')).toBeTruthy(); 37 | expect(executorContent.includes('process.on(\'SIGTERM\'')).toBeTruthy(); 38 | expect(executorContent.includes('cleanupBrowser')).toBeTruthy(); 39 | }); 40 | 41 | test('README should contain browser process cleanup documentation', () => { 42 | const readmePath = path.join(rootDir, 'README.md'); 43 | const readmeContent = fs.readFileSync(readmePath, 'utf8'); 44 | expect(readmeContent).toContain('Browser process not closing properly'); 45 | expect(readmeContent).toContain('Windows'); 46 | expect(readmeContent).toContain('macOS'); 47 | expect(readmeContent).toContain('Linux'); 48 | expect(readmeContent).toContain('Playwright'); 49 | expect(readmeContent).toContain('issues'); 50 | expect(readmeContent).toContain('github.com/microsoft/playwright/issues'); 51 | }); 52 | }); 53 | -------------------------------------------------------------------------------- /src/tests/browser-error.test.ts: -------------------------------------------------------------------------------- 1 | import { test, expect, describe, jest, beforeAll } from '@jest/globals'; 2 | import fs from 'node:fs'; 3 | import path from 'node:path'; 4 | import { fileURLToPath } from 'node:url'; 5 | 6 | const __filename = fileURLToPath(import.meta.url); 7 | const __dirname = path.dirname(__filename); 8 | const rootDir = path.resolve(__dirname, '..'); 9 | const processEvents: Record void>> = { 10 | 'SIGINT': [], 11 | 'SIGTERM': [] 12 | }; 13 | 14 | let cleanupFn: boolean | undefined; 15 | 16 | beforeAll(() => { 17 | const executorPath = path.join(rootDir, 'src/executor.ts'); 18 | const executorContent = fs.readFileSync(executorPath, 'utf8'); 19 | if (executorContent.includes('process.on(\'SIGINT\'')) { 20 | processEvents['SIGINT'] = [() => {}]; 21 | } 22 | 23 | if (executorContent.includes('process.on(\'SIGTERM\'')) { 24 | processEvents['SIGTERM'] = [() => {}]; 25 | } 26 | 27 | if (executorContent.includes('cleanupBrowser')) { 28 | cleanupFn = true; 29 | } 30 | }); 31 | 32 | describe('Browser Error Handling Tests', () => { 33 | test('Executor should contain process cleanup handlers', async () => { 34 | const executorPath = path.join(rootDir, 'src/executor.ts'); 35 | const executorContent = fs.readFileSync(executorPath, 'utf8'); 36 | expect(executorContent.includes('process.on(\'SIGINT\'')).toBeTruthy(); 37 | expect(executorContent.includes('process.on(\'SIGTERM\'')).toBeTruthy(); 38 | expect(executorContent.includes('cleanupBrowser')).toBeTruthy(); 39 | }); 40 | 41 | test('README should contain browser process cleanup documentation', () => { 42 | const readmePath = path.join(rootDir, 'README.md'); 43 | const readmeContent = fs.readFileSync(readmePath, 'utf8'); 44 | expect(readmeContent).toContain('Browser process not closing properly'); 45 | expect(readmeContent).toContain('Windows'); 46 | expect(readmeContent).toContain('macOS'); 47 | expect(readmeContent).toContain('Linux'); 48 | expect(readmeContent).toContain('Playwright'); 49 | expect(readmeContent).toContain('issues'); 50 | expect(readmeContent).toContain('github.com/microsoft/playwright/issues'); 51 | }); 52 | }); 53 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | import { Server } from "@modelcontextprotocol/sdk/server/index.js"; 3 | import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; 4 | import { registerTools } from "./tools.js"; 5 | import { setupHandlers } from "./handlers.js"; 6 | import fs from 'node:fs'; 7 | import path from 'node:path'; 8 | import os from 'node:os'; 9 | 10 | const parseArgs = () => { 11 | const args = process.argv.slice(2); 12 | let browserType = null; 13 | let viewportWidth = null; 14 | let viewportHeight = null; 15 | let deviceScaleFactor = null; 16 | 17 | for (let i = 0; i < args.length; i++) { 18 | if (args[i] === '--browser' && i + 1 < args.length) { 19 | browserType = args[i + 1].toLowerCase(); 20 | } 21 | if (args[i] === '--viewport-width' && i + 1 < args.length) { 22 | viewportWidth = parseInt(args[i + 1], 10); 23 | } 24 | if (args[i] === '--viewport-height' && i + 1 < args.length) { 25 | viewportHeight = parseInt(args[i + 1], 10); 26 | } 27 | if (args[i] === '--device-scale-factor' && i + 1 < args.length) { 28 | deviceScaleFactor = parseFloat(args[i + 1]); 29 | } 30 | } 31 | 32 | try { 33 | const configPath = path.join(os.homedir(), '.mcp_browser_agent_config.json'); 34 | const config = fs.existsSync(configPath) 35 | ? JSON.parse(fs.readFileSync(configPath, 'utf8')) 36 | : {}; 37 | 38 | if (browserType) { 39 | process.env.MCP_BROWSER_TYPE = browserType; 40 | config.browserType = browserType; 41 | } 42 | 43 | if (viewportWidth) { 44 | process.env.MCP_VIEWPORT_WIDTH = viewportWidth.toString(); 45 | config.viewportWidth = viewportWidth; 46 | } 47 | 48 | if (viewportHeight) { 49 | process.env.MCP_VIEWPORT_HEIGHT = viewportHeight.toString(); 50 | config.viewportHeight = viewportHeight; 51 | } 52 | 53 | if (deviceScaleFactor) { 54 | process.env.MCP_DEVICE_SCALE_FACTOR = deviceScaleFactor.toString(); 55 | config.deviceScaleFactor = deviceScaleFactor; 56 | } 57 | 58 | fs.writeFileSync(configPath, JSON.stringify(config, null, 2)); 59 | } catch (error) { 60 | console.error('Error saving config:', error); 61 | } 62 | }; 63 | 64 | async function startServer() { 65 | parseArgs(); 66 | const server = new Server( 67 | { 68 | name: "mcp-browser-agent", 69 | version: "0.1.0", 70 | }, 71 | { 72 | capabilities: { 73 | resources: {}, 74 | tools: {}, 75 | }, 76 | } 77 | ); 78 | 79 | const tools = registerTools(); 80 | setupHandlers(server, tools); 81 | const transport = new StdioServerTransport(); 82 | await server.connect(transport); 83 | } 84 | 85 | startServer().catch(error => { 86 | console.error("Server error:", error); 87 | process.exit(1); 88 | }); -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | Only the latest version of the MCP Browser Agent is actively maintained and receives security updates. 6 | 7 | | Version | Supported | 8 | | ------- | ------------------ | 9 | | 0.8.x | :white_check_mark: | 10 | | < 0.8.0 | :x: | 11 | 12 | ## Reporting a Vulnerability 13 | 14 | If you discover a security vulnerability in the MCP Browser Agent, please follow these steps: 15 | 16 | 1. **Do not disclose the vulnerability publicly** until it has been addressed by the maintainers. 17 | 2. Email the details to [your-email@example.com] with "MCP Browser Agent Security Vulnerability" in the subject line. 18 | 3. Include a detailed description of the vulnerability and steps to reproduce it if possible. 19 | 4. You can expect an initial response within 48 hours acknowledging receipt of your report. 20 | 5. We will keep you informed about the progress of addressing the vulnerability. 21 | 6. Once the vulnerability is fixed, we will credit you (if desired) in the release notes. 22 | 23 | ## Prohibited Uses and Security Warnings 24 | 25 | The MCP Browser Agent is a powerful tool that provides Claude with autonomous browser control capabilities. Due to its nature, there are significant security implications to be aware of: 26 | 27 | ### Prohibited Uses 28 | 29 | The MCP Browser Agent must NOT be used for: 30 | 31 | - **Unauthorized Data Collection**: Scraping data from websites without permission or in violation of terms of service. 32 | - **Privacy Invasion**: Capturing screenshots or data containing personal information without proper consent. 33 | - **Security Exploitation**: Executing malicious code, performing XSS attacks, or exploiting vulnerabilities. 34 | - **Automated Abuse**: Credential stuffing, brute force attacks, spam creation, or DDoS attacks. 35 | - **Authentication Bypass**: Circumventing authentication systems or accessing unauthorized content. 36 | - **Social Engineering**: Creating deceptive content or conducting phishing campaigns. 37 | - **API Abuse**: Overwhelming services with excessive requests or manipulating APIs without authorization. 38 | - **Impersonation**: Interacting with websites while impersonating real users for deceptive purposes. 39 | - **Scraping Restrictions**: Users must respect robots.txt files and website scraping policies. 40 | 41 | ### Security Implications 42 | 43 | Users should be aware of the following security implications: 44 | 45 | 1. **Browser Context Exposure**: The browser instance controlled by the MCP Agent can execute arbitrary JavaScript, potentially exposing the host system to risks. 46 | 2. **Credential Security**: Any credentials used within the browser session could potentially be exposed through logs or screenshots. 47 | 3. **Network Exposure**: The agent can make HTTP requests to any endpoint, potentially accessing internal network resources. 48 | 4. **Resource Consumption**: Automated browsing can consume significant system resources, especially with multiple concurrent sessions. 49 | 5. **Data Leakage**: Screenshots and logs may contain sensitive information that should be handled securely. 50 | 51 | ### Security Best Practices 52 | 53 | To mitigate these risks: 54 | 55 | - Use the agent in an isolated environment whenever possible. 56 | - Never expose the agent to untrusted inputs or instructions. 57 | - Regularly review browser logs and screenshots to monitor usage. 58 | - Do not use the agent with sensitive accounts or on networks containing sensitive information. 59 | - Limit the agent's access to specific domains when possible. 60 | - Treat the browser instance as potentially compromised - do not use it for personal or sensitive activities. 61 | 62 | ## Legal Compliance 63 | 64 | The user is solely responsible for ensuring their use of this tool complies with: 65 | - Website terms of service and usage policies 66 | - Data protection regulations (such as GDPR, CCPA, etc.) 67 | - Computer fraud and abuse laws 68 | - Intellectual property rights 69 | 70 | ## Disclaimer 71 | 72 | The developers of the MCP Browser Agent assume no liability for any misuse, damage, or legal consequences arising from the use of this tool. Users assume all responsibility and risk associated with deploying and using this agent. -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Node .gitignore 2 | # Logs 3 | logs 4 | *.log 5 | npm-debug.log* 6 | yarn-debug.log* 7 | yarn-error.log* 8 | lerna-debug.log* 9 | .pnpm-debug.log* 10 | 11 | # Diagnostic reports (https://nodejs.org/api/report.html) 12 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 13 | 14 | # Runtime data 15 | pids 16 | *.pid 17 | *.seed 18 | *.pid.lock 19 | 20 | # Directory for instrumented libs generated by jscoverage/JSCover 21 | lib-cov 22 | 23 | # Coverage directory used by tools like istanbul 24 | coverage 25 | *.lcov 26 | 27 | # nyc test coverage 28 | .nyc_output 29 | 30 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 31 | .grunt 32 | 33 | # Bower dependency directory (https://bower.io/) 34 | bower_components 35 | 36 | # node-waf configuration 37 | .lock-wscript 38 | 39 | # Compiled binary addons (https://nodejs.org/api/addons.html) 40 | build/Release 41 | 42 | # Dependency directories 43 | node_modules/ 44 | jspm_packages/ 45 | 46 | # Snowpack dependency directory (https://snowpack.dev/) 47 | web_modules/ 48 | 49 | # TypeScript cache 50 | *.tsbuildinfo 51 | 52 | # Optional npm cache directory 53 | .npm 54 | 55 | # Optional eslint cache 56 | .eslintcache 57 | 58 | # Optional stylelint cache 59 | .stylelintcache 60 | 61 | # Microbundle cache 62 | .rpt2_cache/ 63 | .rts2_cache_cjs/ 64 | .rts2_cache_es/ 65 | .rts2_cache_umd/ 66 | 67 | # Optional REPL history 68 | .node_repl_history 69 | 70 | # Output of 'npm pack' 71 | *.tgz 72 | 73 | # Yarn Integrity file 74 | .yarn-integrity 75 | 76 | # dotenv environment variable files 77 | .env 78 | .env.development.local 79 | .env.test.local 80 | .env.production.local 81 | .env.local 82 | 83 | # parcel-bundler cache (https://parceljs.org/) 84 | .cache 85 | .parcel-cache 86 | 87 | # Next.js build output 88 | .next 89 | out 90 | 91 | # Nuxt.js build / generate output 92 | .nuxt 93 | dist 94 | 95 | # Gatsby files 96 | .cache/ 97 | # Comment in the public line in if your project uses Gatsby and not Next.js 98 | # https://nextjs.org/blog/next-9-1#public-directory-support 99 | # public 100 | 101 | # vuepress build output 102 | .vuepress/dist 103 | 104 | # vuepress v2.x temp and cache directory 105 | .temp 106 | .cache 107 | 108 | # Docusaurus cache and generated files 109 | .docusaurus 110 | 111 | # Serverless directories 112 | .serverless/ 113 | 114 | # FuseBox cache 115 | .fusebox/ 116 | 117 | # DynamoDB Local files 118 | .dynamodb/ 119 | 120 | # TernJS port file 121 | .tern-port 122 | 123 | # Stores VSCode versions used for testing VSCode extensions 124 | .vscode-test 125 | 126 | # yarn v2 127 | .yarn/cache 128 | .yarn/unplugged 129 | .yarn/build-state.yml 130 | .yarn/install-state.gz 131 | .pnp.* 132 | 133 | # macOS .gitignore 134 | # General 135 | .DS_Store 136 | .AppleDouble 137 | .LSOverride 138 | 139 | # Icon must end with two \r 140 | Icon 141 | Icon? 142 | 143 | # Thumbnails 144 | ._* 145 | 146 | # Files that might appear in the root of a volume 147 | .DocumentRevisions-V100 148 | .fseventsd 149 | .Spotlight-V100 150 | .TemporaryItems 151 | .Trashes 152 | .VolumeIcon.icns 153 | .com.apple.timemachine.donotpresent 154 | 155 | # Directories potentially created on remote AFP share 156 | .AppleDB 157 | .AppleDesktop 158 | Network Trash Folder 159 | Temporary Items 160 | .apdisk 161 | 162 | # Windows .gitignore 163 | # Windows thumbnail cache files 164 | Thumbs.db 165 | Thumbs.db:encryptable 166 | ehthumbs.db 167 | ehthumbs_vista.db 168 | 169 | # Dump file 170 | *.stackdump 171 | 172 | # Folder config file 173 | [Dd]esktop.ini 174 | 175 | # Recycle Bin used on file shares 176 | $RECYCLE.BIN/ 177 | 178 | # Windows Installer files 179 | *.cab 180 | *.msi 181 | *.msix 182 | *.msm 183 | *.msp 184 | 185 | # Windows shortcuts 186 | *.lnk 187 | 188 | # Linux .gitignore 189 | # gitginore template for creating Snap packages 190 | # website: https://snapcraft.io/ 191 | 192 | parts/ 193 | prime/ 194 | stage/ 195 | *.snap 196 | 197 | # Snapcraft global state tracking data(automatically generated) 198 | # https://forum.snapcraft.io/t/location-to-save-global-state/768 199 | /snap/.snapcraft/ 200 | 201 | # Source archive packed by `snapcraft cleanbuild` before pushing to the LXD container 202 | /*_source.tar.bz2 203 | -------------------------------------------------------------------------------- /src/tools.ts: -------------------------------------------------------------------------------- 1 | import { Tool } from "@modelcontextprotocol/sdk/types.js"; 2 | 3 | export const BROWSER_TOOLS = [ 4 | "browser_navigate", 5 | "browser_screenshot", 6 | "browser_click", 7 | "browser_fill", 8 | "browser_select", 9 | "browser_hover", 10 | "browser_evaluate", 11 | "browser_set_viewport" 12 | ]; 13 | 14 | export const API_TOOLS = [ 15 | "api_get", 16 | "api_post", 17 | "api_put", 18 | "api_patch", 19 | "api_delete" 20 | ]; 21 | 22 | export function registerTools(): Tool[] { 23 | return [ 24 | { 25 | name: "browser_set_viewport", 26 | description: "Change the browser's viewport size and scale factor", 27 | inputSchema: { 28 | type: "object", 29 | properties: { 30 | width: { type: "number", description: "Viewport width in pixels" }, 31 | height: { type: "number", description: "Viewport height in pixels" }, 32 | deviceScaleFactor: { type: "number", description: "Device scale factor (affects how content is scaled)" } 33 | }, 34 | required: [] 35 | } 36 | }, 37 | { 38 | name: "browser_navigate", 39 | description: "Navigate to a specific URL", 40 | inputSchema: { 41 | type: "object", 42 | properties: { 43 | url: { type: "string", description: "URL to navigate to" }, 44 | timeout: { type: "number", description: "Navigation timeout in milliseconds" }, 45 | waitUntil: { 46 | type: "string", 47 | description: "Navigation wait criteria", 48 | enum: ["load", "domcontentloaded", "networkidle", "commit"] 49 | } 50 | }, 51 | required: ["url"] 52 | } 53 | }, 54 | { 55 | name: "browser_screenshot", 56 | description: "Capture a screenshot of the current page or a specific element", 57 | inputSchema: { 58 | type: "object", 59 | properties: { 60 | name: { type: "string", description: "Identifier for the screenshot" }, 61 | selector: { type: "string", description: "CSS selector for element to capture" }, 62 | fullPage: { type: "boolean", description: "Capture full page height" }, 63 | mask: { 64 | type: "array", 65 | description: "Selectors for elements to mask", 66 | items: { type: "string" } 67 | }, 68 | savePath: { type: "string", description: "Path to save screenshot (default: user's Downloads folder)" } 69 | }, 70 | required: ["name"] 71 | } 72 | }, 73 | { 74 | name: "browser_click", 75 | description: "Click an element on the page", 76 | inputSchema: { 77 | type: "object", 78 | properties: { 79 | selector: { type: "string", description: "CSS selector for element to click" } 80 | }, 81 | required: ["selector"] 82 | } 83 | }, 84 | { 85 | name: "browser_fill", 86 | description: "Fill a form input with text", 87 | inputSchema: { 88 | type: "object", 89 | properties: { 90 | selector: { type: "string", description: "CSS selector for input field" }, 91 | value: { type: "string", description: "Text to enter in the field" } 92 | }, 93 | required: ["selector", "value"] 94 | } 95 | }, 96 | { 97 | name: "browser_select", 98 | description: "Select an option from a dropdown menu", 99 | inputSchema: { 100 | type: "object", 101 | properties: { 102 | selector: { type: "string", description: "CSS selector for select element" }, 103 | value: { type: "string", description: "Value or label to select" } 104 | }, 105 | required: ["selector", "value"] 106 | } 107 | }, 108 | { 109 | name: "browser_hover", 110 | description: "Hover over an element on the page", 111 | inputSchema: { 112 | type: "object", 113 | properties: { 114 | selector: { type: "string", description: "CSS selector for element to hover over" } 115 | }, 116 | required: ["selector"] 117 | } 118 | }, 119 | { 120 | name: "browser_evaluate", 121 | description: "Execute JavaScript in the browser context", 122 | inputSchema: { 123 | type: "object", 124 | properties: { 125 | script: { type: "string", description: "JavaScript code to execute" } 126 | }, 127 | required: ["script"] 128 | } 129 | }, 130 | 131 | { 132 | name: "api_get", 133 | description: "Perform a GET request to an API endpoint", 134 | inputSchema: { 135 | type: "object", 136 | properties: { 137 | url: { type: "string", description: "API endpoint URL" }, 138 | headers: { 139 | type: "object", 140 | description: "Request headers", 141 | additionalProperties: { type: "string" } 142 | } 143 | }, 144 | required: ["url"] 145 | } 146 | }, 147 | { 148 | name: "api_post", 149 | description: "Perform a POST request to an API endpoint", 150 | inputSchema: { 151 | type: "object", 152 | properties: { 153 | url: { type: "string", description: "API endpoint URL" }, 154 | data: { type: "string", description: "Request body data (JSON string)" }, 155 | headers: { 156 | type: "object", 157 | description: "Request headers", 158 | additionalProperties: { type: "string" } 159 | } 160 | }, 161 | required: ["url", "data"] 162 | } 163 | }, 164 | { 165 | name: "api_put", 166 | description: "Perform a PUT request to an API endpoint", 167 | inputSchema: { 168 | type: "object", 169 | properties: { 170 | url: { type: "string", description: "API endpoint URL" }, 171 | data: { type: "string", description: "Request body data (JSON string)" }, 172 | headers: { 173 | type: "object", 174 | description: "Request headers", 175 | additionalProperties: { type: "string" } 176 | } 177 | }, 178 | required: ["url", "data"] 179 | } 180 | }, 181 | { 182 | name: "api_patch", 183 | description: "Perform a PATCH request to an API endpoint", 184 | inputSchema: { 185 | type: "object", 186 | properties: { 187 | url: { type: "string", description: "API endpoint URL" }, 188 | data: { type: "string", description: "Request body data (JSON string)" }, 189 | headers: { 190 | type: "object", 191 | description: "Request headers", 192 | additionalProperties: { type: "string" } 193 | } 194 | }, 195 | required: ["url", "data"] 196 | } 197 | }, 198 | { 199 | name: "api_delete", 200 | description: "Perform a DELETE request to an API endpoint", 201 | inputSchema: { 202 | type: "object", 203 | properties: { 204 | url: { type: "string", description: "API endpoint URL" }, 205 | headers: { 206 | type: "object", 207 | description: "Request headers", 208 | additionalProperties: { type: "string" } 209 | } 210 | }, 211 | required: ["url"] 212 | } 213 | } 214 | ]; 215 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Mozilla Public License Version 2.0 2 | ================================== 3 | 4 | 1. Definitions 5 | -------------- 6 | 7 | 1.1. "Contributor" 8 | means each individual or legal entity that creates, contributes to 9 | the creation of, or owns Covered Software. 10 | 11 | 1.2. "Contributor Version" 12 | means the combination of the Contributions of others (if any) used 13 | by a Contributor and that particular Contributor's Contribution. 14 | 15 | 1.3. "Contribution" 16 | means Covered Software of a particular Contributor. 17 | 18 | 1.4. "Covered Software" 19 | means Source Code Form to which the initial Contributor has attached 20 | the notice in Exhibit A, the Executable Form of such Source Code 21 | Form, and Modifications of such Source Code Form, in each case 22 | including portions thereof. 23 | 24 | 1.5. "Incompatible With Secondary Licenses" 25 | means 26 | 27 | (a) that the initial Contributor has attached the notice described 28 | in Exhibit B to the Covered Software; or 29 | 30 | (b) that the Covered Software was made available under the terms of 31 | version 1.1 or earlier of the License, but not also under the 32 | terms of a Secondary License. 33 | 34 | 1.6. "Executable Form" 35 | means any form of the work other than Source Code Form. 36 | 37 | 1.7. "Larger Work" 38 | means a work that combines Covered Software with other material, in 39 | a separate file or files, that is not Covered Software. 40 | 41 | 1.8. "License" 42 | means this document. 43 | 44 | 1.9. "Licensable" 45 | means having the right to grant, to the maximum extent possible, 46 | whether at the time of the initial grant or subsequently, any and 47 | all of the rights conveyed by this License. 48 | 49 | 1.10. "Modifications" 50 | means any of the following: 51 | 52 | (a) any file in Source Code Form that results from an addition to, 53 | deletion from, or modification of the contents of Covered 54 | Software; or 55 | 56 | (b) any new file in Source Code Form that contains any Covered 57 | Software. 58 | 59 | 1.11. "Patent Claims" of a Contributor 60 | means any patent claim(s), including without limitation, method, 61 | process, and apparatus claims, in any patent Licensable by such 62 | Contributor that would be infringed, but for the grant of the 63 | License, by the making, using, selling, offering for sale, having 64 | made, import, or transfer of either its Contributions or its 65 | Contributor Version. 66 | 67 | 1.12. "Secondary License" 68 | means either the GNU General Public License, Version 2.0, the GNU 69 | Lesser General Public License, Version 2.1, the GNU Affero General 70 | Public License, Version 3.0, or any later versions of those 71 | licenses. 72 | 73 | 1.13. "Source Code Form" 74 | means the form of the work preferred for making modifications. 75 | 76 | 1.14. "You" (or "Your") 77 | means an individual or a legal entity exercising rights under this 78 | License. For legal entities, "You" includes any entity that 79 | controls, is controlled by, or is under common control with You. For 80 | purposes of this definition, "control" means (a) the power, direct 81 | or indirect, to cause the direction or management of such entity, 82 | whether by contract or otherwise, or (b) ownership of more than 83 | fifty percent (50%) of the outstanding shares or beneficial 84 | ownership of such entity. 85 | 86 | 2. License Grants and Conditions 87 | -------------------------------- 88 | 89 | 2.1. Grants 90 | 91 | Each Contributor hereby grants You a world-wide, royalty-free, 92 | non-exclusive license: 93 | 94 | (a) under intellectual property rights (other than patent or trademark) 95 | Licensable by such Contributor to use, reproduce, make available, 96 | modify, display, perform, distribute, and otherwise exploit its 97 | Contributions, either on an unmodified basis, with Modifications, or 98 | as part of a Larger Work; and 99 | 100 | (b) under Patent Claims of such Contributor to make, use, sell, offer 101 | for sale, have made, import, and otherwise transfer either its 102 | Contributions or its Contributor Version. 103 | 104 | 2.2. Effective Date 105 | 106 | The licenses granted in Section 2.1 with respect to any Contribution 107 | become effective for each Contribution on the date the Contributor first 108 | distributes such Contribution. 109 | 110 | 2.3. Limitations on Grant Scope 111 | 112 | The licenses granted in this Section 2 are the only rights granted under 113 | this License. No additional rights or licenses will be implied from the 114 | distribution or licensing of Covered Software under this License. 115 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 116 | Contributor: 117 | 118 | (a) for any code that a Contributor has removed from Covered Software; 119 | or 120 | 121 | (b) for infringements caused by: (i) Your and any other third party's 122 | modifications of Covered Software, or (ii) the combination of its 123 | Contributions with other software (except as part of its Contributor 124 | Version); or 125 | 126 | (c) under Patent Claims infringed by Covered Software in the absence of 127 | its Contributions. 128 | 129 | This License does not grant any rights in the trademarks, service marks, 130 | or logos of any Contributor (except as may be necessary to comply with 131 | the notice requirements in Section 3.4). 132 | 133 | 2.4. Subsequent Licenses 134 | 135 | No Contributor makes additional grants as a result of Your choice to 136 | distribute the Covered Software under a subsequent version of this 137 | License (see Section 10.2) or under the terms of a Secondary License (if 138 | permitted under the terms of Section 3.3). 139 | 140 | 2.5. Representation 141 | 142 | Each Contributor represents that the Contributor believes its 143 | Contributions are its original creation(s) or it has sufficient rights 144 | to grant the rights to its Contributions conveyed by this License. 145 | 146 | 2.6. Fair Use 147 | 148 | This License is not intended to limit any rights You have under 149 | applicable copyright doctrines of fair use, fair dealing, or other 150 | equivalents. 151 | 152 | 2.7. Conditions 153 | 154 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted 155 | in Section 2.1. 156 | 157 | 3. Responsibilities 158 | ------------------- 159 | 160 | 3.1. Distribution of Source Form 161 | 162 | All distribution of Covered Software in Source Code Form, including any 163 | Modifications that You create or to which You contribute, must be under 164 | the terms of this License. You must inform recipients that the Source 165 | Code Form of the Covered Software is governed by the terms of this 166 | License, and how they can obtain a copy of this License. You may not 167 | attempt to alter or restrict the recipients' rights in the Source Code 168 | Form. 169 | 170 | 3.2. Distribution of Executable Form 171 | 172 | If You distribute Covered Software in Executable Form then: 173 | 174 | (a) such Covered Software must also be made available in Source Code 175 | Form, as described in Section 3.1, and You must inform recipients of 176 | the Executable Form how they can obtain a copy of such Source Code 177 | Form by reasonable means in a timely manner, at a charge no more 178 | than the cost of distribution to the recipient; and 179 | 180 | (b) You may distribute such Executable Form under the terms of this 181 | License, or sublicense it under different terms, provided that the 182 | license for the Executable Form does not attempt to limit or alter 183 | the recipients' rights in the Source Code Form under this License. 184 | 185 | 3.3. Distribution of a Larger Work 186 | 187 | You may create and distribute a Larger Work under terms of Your choice, 188 | provided that You also comply with the requirements of this License for 189 | the Covered Software. If the Larger Work is a combination of Covered 190 | Software with a work governed by one or more Secondary Licenses, and the 191 | Covered Software is not Incompatible With Secondary Licenses, this 192 | License permits You to additionally distribute such Covered Software 193 | under the terms of such Secondary License(s), so that the recipient of 194 | the Larger Work may, at their option, further distribute the Covered 195 | Software under the terms of either this License or such Secondary 196 | License(s). 197 | 198 | 3.4. Notices 199 | 200 | You may not remove or alter the substance of any license notices 201 | (including copyright notices, patent notices, disclaimers of warranty, 202 | or limitations of liability) contained within the Source Code Form of 203 | the Covered Software, except that You may alter any license notices to 204 | the extent required to remedy known factual inaccuracies. 205 | 206 | 3.5. Application of Additional Terms 207 | 208 | You may choose to offer, and to charge a fee for, warranty, support, 209 | indemnity or liability obligations to one or more recipients of Covered 210 | Software. However, You may do so only on Your own behalf, and not on 211 | behalf of any Contributor. You must make it absolutely clear that any 212 | such warranty, support, indemnity, or liability obligation is offered by 213 | You alone, and You hereby agree to indemnify every Contributor for any 214 | liability incurred by such Contributor as a result of warranty, support, 215 | indemnity or liability terms You offer. You may include additional 216 | disclaimers of warranty and limitations of liability specific to any 217 | jurisdiction. 218 | 219 | 4. Inability to Comply Due to Statute or Regulation 220 | --------------------------------------------------- 221 | 222 | If it is impossible for You to comply with any of the terms of this 223 | License with respect to some or all of the Covered Software due to 224 | statute, judicial order, or regulation then You must: (a) comply with 225 | the terms of this License to the maximum extent possible; and (b) 226 | describe the limitations and the code they affect. Such description must 227 | be placed in a text file included with all distributions of the Covered 228 | Software under this License. Except to the extent prohibited by statute 229 | or regulation, such description must be sufficiently detailed for a 230 | recipient of ordinary skill to be able to understand it. 231 | 232 | 5. Termination 233 | -------------- 234 | 235 | 5.1. The rights granted under this License will terminate automatically 236 | if You fail to comply with any of its terms. However, if You become 237 | compliant, then the rights granted under this License from a particular 238 | Contributor are reinstated (a) provisionally, unless and until such 239 | Contributor explicitly and finally terminates Your grants, and (b) on an 240 | ongoing basis, if such Contributor fails to notify You of the 241 | non-compliance by some reasonable means prior to 60 days after You have 242 | come back into compliance. Moreover, Your grants from a particular 243 | Contributor are reinstated on an ongoing basis if such Contributor 244 | notifies You of the non-compliance by some reasonable means, this is the 245 | first time You have received notice of non-compliance with this License 246 | from such Contributor, and You become compliant prior to 30 days after 247 | Your receipt of the notice. 248 | 249 | 5.2. If You initiate litigation against any entity by asserting a patent 250 | infringement claim (excluding declaratory judgment actions, 251 | counter-claims, and cross-claims) alleging that a Contributor Version 252 | directly or indirectly infringes any patent, then the rights granted to 253 | You by any and all Contributors for the Covered Software under Section 254 | 2.1 of this License shall terminate. 255 | 256 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all 257 | end user license agreements (excluding distributors and resellers) which 258 | have been validly granted by You or Your distributors under this License 259 | prior to termination shall survive termination. 260 | 261 | ************************************************************************ 262 | * * 263 | * 6. Disclaimer of Warranty * 264 | * ------------------------- * 265 | * * 266 | * Covered Software is provided under this License on an "as is" * 267 | * basis, without warranty of any kind, either expressed, implied, or * 268 | * statutory, including, without limitation, warranties that the * 269 | * Covered Software is free of defects, merchantable, fit for a * 270 | * particular purpose or non-infringing. The entire risk as to the * 271 | * quality and performance of the Covered Software is with You. * 272 | * Should any Covered Software prove defective in any respect, You * 273 | * (not any Contributor) assume the cost of any necessary servicing, * 274 | * repair, or correction. This disclaimer of warranty constitutes an * 275 | * essential part of this License. No use of any Covered Software is * 276 | * authorized under this License except under this disclaimer. * 277 | * * 278 | ************************************************************************ 279 | 280 | ************************************************************************ 281 | * * 282 | * 7. Limitation of Liability * 283 | * -------------------------- * 284 | * * 285 | * Under no circumstances and under no legal theory, whether tort * 286 | * (including negligence), contract, or otherwise, shall any * 287 | * Contributor, or anyone who distributes Covered Software as * 288 | * permitted above, be liable to You for any direct, indirect, * 289 | * special, incidental, or consequential damages of any character * 290 | * including, without limitation, damages for lost profits, loss of * 291 | * goodwill, work stoppage, computer failure or malfunction, or any * 292 | * and all other commercial damages or losses, even if such party * 293 | * shall have been informed of the possibility of such damages. This * 294 | * limitation of liability shall not apply to liability for death or * 295 | * personal injury resulting from such party's negligence to the * 296 | * extent applicable law prohibits such limitation. Some * 297 | * jurisdictions do not allow the exclusion or limitation of * 298 | * incidental or consequential damages, so this exclusion and * 299 | * limitation may not apply to You. * 300 | * * 301 | ************************************************************************ 302 | 303 | 8. Litigation 304 | ------------- 305 | 306 | Any litigation relating to this License may be brought only in the 307 | courts of a jurisdiction where the defendant maintains its principal 308 | place of business and such litigation shall be governed by laws of that 309 | jurisdiction, without reference to its conflict-of-law provisions. 310 | Nothing in this Section shall prevent a party's ability to bring 311 | cross-claims or counter-claims. 312 | 313 | 9. Miscellaneous 314 | ---------------- 315 | 316 | This License represents the complete agreement concerning the subject 317 | matter hereof. If any provision of this License is held to be 318 | unenforceable, such provision shall be reformed only to the extent 319 | necessary to make it enforceable. Any law or regulation which provides 320 | that the language of a contract shall be construed against the drafter 321 | shall not be used to construe this License against a Contributor. 322 | 323 | 10. Versions of the License 324 | --------------------------- 325 | 326 | 10.1. New Versions 327 | 328 | Mozilla Foundation is the license steward. Except as provided in Section 329 | 10.3, no one other than the license steward has the right to modify or 330 | publish new versions of this License. Each version will be given a 331 | distinguishing version number. 332 | 333 | 10.2. Effect of New Versions 334 | 335 | You may distribute the Covered Software under the terms of the version 336 | of the License under which You originally received the Covered Software, 337 | or under the terms of any subsequent version published by the license 338 | steward. 339 | 340 | 10.3. Modified Versions 341 | 342 | If you create software not governed by this License, and you want to 343 | create a new license for such software, you may create and use a 344 | modified version of this License if you rename the license and remove 345 | any references to the name of the license steward (except to note that 346 | such modified license differs from this License). 347 | 348 | 10.4. Distributing Source Code Form that is Incompatible With Secondary 349 | Licenses 350 | 351 | If You choose to distribute Source Code Form that is Incompatible With 352 | Secondary Licenses under the terms of this version of the License, the 353 | notice described in Exhibit B of this License must be attached. 354 | 355 | Exhibit A - Source Code Form License Notice 356 | ------------------------------------------- 357 | 358 | This Source Code Form is subject to the terms of the Mozilla Public 359 | License, v. 2.0. If a copy of the MPL was not distributed with this 360 | file, You can obtain one at http://mozilla.org/MPL/2.0/. 361 | 362 | If it is not possible or desirable to put the notice in a particular 363 | file, then You may include the notice in a location (such as a LICENSE 364 | file in a relevant directory) where a recipient would be likely to look 365 | for such a notice. 366 | 367 | You may add additional accurate notices of copyright ownership. 368 | 369 | Exhibit B - "Incompatible With Secondary Licenses" Notice 370 | --------------------------------------------------------- 371 | 372 | This Source Code Form is "Incompatible With Secondary Licenses", as 373 | defined by the Mozilla Public License, v. 2.0. 374 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MCP Browser Agent 2 | 3 | [![Trust Score](https://archestra.ai/mcp-catalog/api/badge/quality/imprvhub/mcp-browser-agent)](https://archestra.ai/mcp-catalog/imprvhub__mcp-browser-agent) 4 | [![smithery badge](https://smithery.ai/badge/@imprvhub/mcp-browser-agent)](https://smithery.ai/server/@imprvhub/mcp-browser-agent) 5 | 6 | 7 | 8 | 9 | 12 | 13 |
A powerful Model Context Protocol (MCP) integration that provides Claude Desktop with autonomous browser automation capabilities. 10 | Browser Agent MCP server 11 |
14 | 15 | ## Features 16 | 17 | - **Advanced Browser Automation** 18 | - Navigate to any URL with customizable load strategies 19 | - Capture full-page or element-specific screenshots 20 | - Perform precise DOM interactions (click, fill, select, hover) 21 | - Execute arbitrary JavaScript in browser context with console logs capture 22 | 23 | - **Powerful API Client** 24 | - Execute HTTP requests (GET, POST, PUT, PATCH, DELETE) 25 | - Configure request headers and body content 26 | - Process response data with JSON formatting 27 | - Error handling with detailed feedback 28 | 29 | - **MCP Resource Management** 30 | - Access browser console logs as resources 31 | - Retrieve screenshots through MCP resource interface 32 | - Persistent session with headful browser instance 33 | 34 | - **AI Agent Capabilities** 35 | - Chain multiple browser operations for complex tasks 36 | - Follow multi-step instructions with intelligent error recovery 37 | - Technical task automation through natural language instructions 38 | 39 | ## Demo 40 | 41 |

42 | 43 | Browser Agent MCP server demo 44 | 45 |

46 | 47 |
48 | Timestamps: 49 | 50 | Click on any timestamp to jump to that section of the video 51 | 52 | [**00:00**](https://www.youtube.com/watch?v=0lMsKiTy7TE&t=0s) - **Google Search for MCP** 53 | Navigation to Google homepage and search for "Model Context Protocol". Demonstration of Claude Desktop using the MCP integration to perform a basic web search and process the results. 54 | 55 | [**00:33**](https://www.youtube.com/watch?v=0lMsKiTy7TE&t=33s) - **Screenshot Capture** 56 | Taking a screenshot of the search results with a custom filename and showcasing it in Finder. Shows how Claude can capture and save visual content from web pages during browser automation. 57 | 58 | [**01:00**](https://www.youtube.com/watch?v=0lMsKiTy7TE&t=60s) - **Wikipedia Search** 59 | Navigation to Wikipedia.org and search for "Model Context Protocol". Illustrates Claude's ability to interact with different websites and their search functionality through the MCP integration. 60 | 61 | [**01:38**](https://www.youtube.com/watch?v=0lMsKiTy7TE&t=98s) - **Dropdown Menu Interaction I** 62 | Navigation to a test website (the-internet.herokuapp.com/dropdown) and selection of "Option 1" from a dropdown menu. Demonstrates Claude's capability to interact with form elements and make selections. 63 | 64 | [**01:56**](https://www.youtube.com/watch?v=0lMsKiTy7TE&t=116s) - **Dropdown Menu Interaction II** 65 | Changing the selection to "Option 2" from the same dropdown menu. Shows Claude's ability to manipulate the same form element multiple times and make different selections. 66 | 67 | [**02:09**](https://www.youtube.com/watch?v=0lMsKiTy7TE&t=129s) - **Login Form Completion** 68 | Navigation to a login page (the-internet.herokuapp.com/login) and filling in the username field with "tomsmith" and password field with "SuperSecretPassword!". Demonstrates form filling automation. 69 | 70 | [**02:28**](https://www.youtube.com/watch?v=0lMsKiTy7TE&t=148s) - **Login Submission** 71 | Submitting the login credentials and completing the authentication process. Shows Claude's ability to trigger form submissions and navigate through multi-step processes. 72 | 73 | [**02:36**](https://www.youtube.com/watch?v=0lMsKiTy7TE&t=156s) - **API Request Execution** 74 | Performing a GET request to JSONPlaceholder API endpoint. Demonstrates Claude's capability to make direct API calls and process the returned data through the MCP integration. 75 |
76 | 77 | ## Requirements 78 | 79 | - Node.js 16 or higher 80 | - Claude Desktop 81 | - Playwright dependencies 82 | 83 | ### Browser Support 84 | 85 | ```bash 86 | npm init playwright@latest 87 | ``` 88 | 89 | This package includes Playwright and the necessary dependencies for running browser automation. When you run `npm install`, the required Playwright dependencies will be installed. The package supports the following browsers: 90 | 91 | - Chrome (default) 92 | - Firefox 93 | - Microsoft Edge 94 | - WebKit (Safari engine) 95 | 96 | When you first use a browser type, Playwright will automatically install the corresponding browser drivers as needed. You can also install them manually with the following commands: 97 | 98 | ``` 99 | npx playwright install chrome 100 | npx playwright install firefox 101 | npx playwright install webkit 102 | npx playwright install msedge 103 | ``` 104 | 105 | > **Note about Safari**: Playwright doesn't provide direct support for Safari browser. Instead, it uses WebKit, which is the browser engine that powers Safari. 106 | > 107 | > **Note about Edge**: When selecting Edge as the browser type, the agent will actually launch Microsoft Edge (not Chromium). Technically, in Playwright, Edge is launched using the Chromium browser instance with the 'msedge' channel parameter because Microsoft Edge is based on Chromium. 108 | ## Installation 109 | 110 | ### Installing Manually 111 | 1. Clone or download this repository: 112 | ``` 113 | git clone https://github.com/imprvhub/mcp-browser-agent 114 | cd mcp-browser-agent 115 | ``` 116 | 117 | 2. Install dependencies: 118 | ``` 119 | npm install 120 | ``` 121 | 122 | 3. Build the project: 123 | ``` 124 | npm run build 125 | ``` 126 | 127 | ## Running the MCP Server 128 | 129 | There are two ways to run the MCP server: 130 | 131 | ### Option 1: Running manually 132 | 133 | 1. Open a terminal or command prompt 134 | 2. Navigate to the project directory 135 | 3. Run the server directly: 136 | 137 | ``` 138 | node dist/index.js 139 | ``` 140 | 141 | Keep this terminal window open while using Claude Desktop. The server will run until you close the terminal. 142 | 143 | ### Option 2: Auto-starting with Claude Desktop (recommended for regular use) 144 | 145 | The Claude Desktop can automatically start the MCP server when needed. To set this up: 146 | 147 | #### Configuration 148 | 149 | The Claude Desktop configuration file is located at: 150 | 151 | - **macOS**: `~/Library/Application Support/Claude/claude_desktop_config.json` 152 | - **Windows**: `%APPDATA%\Claude\claude_desktop_config.json` 153 | - **Linux**: `~/.config/Claude/claude_desktop_config.json` 154 | 155 | Edit this file to add the Browser Agent MCP configuration. If the file doesn't exist, create it: 156 | 157 | ```json 158 | { 159 | "mcpServers": { 160 | "browserAgent": { 161 | "command": "node", 162 | "args": ["ABSOLUTE_PATH_TO_DIRECTORY/mcp-browser-agent/dist/index.js", 163 | "--browser", 164 | "chrome" 165 | ] 166 | } 167 | } 168 | } 169 | ``` 170 | 171 | **Important**: Replace `ABSOLUTE_PATH_TO_DIRECTORY` with the **complete absolute path** where you installed the MCP 172 | - macOS/Linux example: `/Users/username/mcp-browser-agent` 173 | - Windows example: `C:\\Users\\username\\mcp-browser-agent` 174 | 175 | If you already have other MCPs configured, simply add the "browserAgent" section inside the "mcpServers" object. Here's an example of a configuration with multiple MCPs: 176 | 177 | ```json 178 | { 179 | "mcpServers": { 180 | "otherMcp1": { 181 | "command": "...", 182 | "args": ["..."] 183 | }, 184 | "otherMcp2": { 185 | "command": "...", 186 | "args": ["..."] 187 | }, 188 | "browserAgent": { 189 | "command": "node", 190 | "args": [ 191 | "ABSOLUTE_PATH_TO_DIRECTORY/mcp-browser-agent/dist/index.js", 192 | "--browser", 193 | "chrome" 194 | ] 195 | } 196 | } 197 | } 198 | ``` 199 | 200 | ## Browser Selection 201 | 202 | The MCP Browser Agent supports multiple browser types. By default, it uses Chrome, but you can specify a different browser in several ways: 203 | 204 | ### Option 1: Configuration File 205 | 206 | Create or edit the file `.mcp_browser_agent_config.json` in your home directory: 207 | 208 | ```json 209 | { 210 | "browserType": "chrome" 211 | } 212 | ``` 213 | 214 | Supported values for `browserType` are: 215 | - `chrome` - Uses installed Chrome (default) 216 | - `firefox` - Uses Firefox 'Nightly' browser 217 | - `webkit` - Uses WebKit engine (Note: This is not Safari itself but the WebKit rendering engine that powers Safari) 218 | - `edge` - Uses Microsoft Edge 219 | 220 | > **Note about Safari**: Playwright doesn't provide direct support for Safari browser. Instead, it uses WebKit, which is the browser engine that powers Safari. The WebKit implementation in Playwright provides similar functionality but is not identical to the Safari browser experience. 221 | 222 | ### Option 2: Command Line Argument 223 | 224 | When starting the MCP server manually, you can specify the browser type: 225 | 226 | ``` 227 | node dist/index.js --browser firefox 228 | ``` 229 | 230 | ### Option 3: Environment Variable 231 | 232 | Set the `MCP_BROWSER_TYPE` environment variable: 233 | 234 | ``` 235 | MCP_BROWSER_TYPE=firefox node dist/index.js 236 | ``` 237 | 238 | ### Option 4: Claude Desktop Configuration 239 | 240 | When configuring the MCP in Claude Desktop's `claude_desktop_config.json`, you can specify the browser type: 241 | 242 | ```json 243 | { 244 | "mcpServers": { 245 | "browserAgent": { 246 | "command": "node", 247 | "args": [ 248 | "ABSOLUTE_PATH_TO_DIRECTORY/mcp-browser-agent/dist/index.js", 249 | "--browser", 250 | "chrome" 251 | ] 252 | } 253 | } 254 | } 255 | ``` 256 | 257 | ## Technical Implementation 258 | 259 | MCP Browser Agent is built on the Model Context Protocol, enabling Claude to interact with a headful browser through Playwright. The implementation consists of four main components: 260 | 261 | 1. **Server (index.ts)** 262 | - Initializes the MCP server with Model Context Protocol standard protocol 263 | - Configures server capabilities for tools and resources 264 | - Establishes communication with Claude through the stdio transport 265 | 266 | 2. **Tools Registry (tools.ts)** 267 | - Defines browser and API tool schemas 268 | - Specifies parameters, validation rules, and descriptions 269 | - Registers tools with the MCP server for Claude's discovery 270 | 271 | 3. **Request Handlers (handlers.ts)** 272 | - Manages MCP protocol requests for tools and resources 273 | - Exposes browser logs and screenshots as queryable resources 274 | - Routes tool execution requests to the appropriate handlers 275 | 276 | 4. **Executor (executor.ts)** 277 | - Manages browser and API client lifecycle 278 | - Implements browser automation functions using Playwright 279 | - Handles API requests with proper error handling and response parsing 280 | - Maintains stateful browser session between commands 281 | 282 | ### Agent Capabilities 283 | 284 | Unlike basic integrations, MCP Browser Agent functions as a true AI agent by: 285 | 286 | - Maintaining persistent browser state across multiple commands 287 | - Capturing detailed console logs for debugging 288 | - Storing screenshots for reference and review 289 | - Managing complex interaction sequences 290 | - Providing detailed error information for recovery 291 | - Supporting chained operations for complex workflows 292 | 293 | ## Available Tools 294 | 295 | ### Browser Tools 296 | 297 | | Tool Name | Description | Parameters | 298 | |-----------|-------------|------------| 299 | | `browser_navigate` | Navigate to a URL | `url` (required), `timeout`, `waitUntil` | 300 | | `browser_screenshot` | Capture screenshot | `name` (required), `selector`, `fullPage`, `mask`, `savePath` | 301 | | `browser_click` | Click element | `selector` (required) | 302 | | `browser_fill` | Fill form input | `selector` (required), `value` (required) | 303 | | `browser_select` | Select dropdown option | `selector` (required), `value` (required) | 304 | | `browser_hover` | Hover over element | `selector` (required) | 305 | | `browser_evaluate` | Execute JavaScript | `script` (required) | 306 | 307 | ### API Tools 308 | 309 | | Tool Name | Description | Parameters | 310 | |-----------|-------------|------------| 311 | | `api_get` | GET request | `url` (required), `headers` | 312 | | `api_post` | POST request | `url` (required), `data` (required), `headers` | 313 | | `api_put` | PUT request | `url` (required), `data` (required), `headers` | 314 | | `api_patch` | PATCH request | `url` (required), `data` (required), `headers` | 315 | | `api_delete` | DELETE request | `url` (required), `headers` | 316 | 317 | ## Resource Access 318 | 319 | The MCP Browser Agent exposes the following resources: 320 | 321 | - `browser://logs` - Access browser console logs 322 | - `screenshot://[name]` - Access screenshots by name 323 | 324 | 325 | ## Example Usage 326 | 327 | Here are some realistic examples of how to use the MCP Browser Agent with Claude: 328 | 329 | ### Basic Browser Navigation 330 | 331 | ``` 332 | Navigate to the Google homepage at https://www.google.com 333 | ``` 334 | 335 | ``` 336 | Take a screenshot of the current page and name it "google-homepage" 337 | ``` 338 | 339 | ``` 340 | Type "weather forecast" in the search box 341 | ``` 342 | 343 | ### Simple Interactions 344 | 345 | ``` 346 | Navigate to https://www.wikipedia.org and search for "Model Context Protocol" 347 | ``` 348 | 349 | ``` 350 | Go to https://the-internet.herokuapp.com/dropdown and select the option "Option 1" from the dropdown 351 | ``` 352 | 353 | ### Basic Form Filling 354 | 355 | ``` 356 | Navigate to https://the-internet.herokuapp.com/login and fill in the username field with "tomsmith" and the password field with "SuperSecretPassword!" 357 | ``` 358 | 359 | ``` 360 | Go to https://the-internet.herokuapp.com/login, fill in the username and password fields, then click the login button 361 | ``` 362 | 363 | ### Simple JavaScript Execution 364 | 365 | ``` 366 | Go to https://example.com and execute a JavaScript script to return the page title 367 | ``` 368 | 369 | ``` 370 | Navigate to https://www.google.com and execute a JavaScript script to count the number of links on the page 371 | ``` 372 | 373 | ### Basic API Requests 374 | 375 | ``` 376 | Perform a GET request to https://jsonplaceholder.typicode.com/todos/1 377 | ``` 378 | 379 | ``` 380 | Make a POST request to https://jsonplaceholder.typicode.com/posts with appropriate JSON data 381 | ``` 382 | 383 | These examples represent the actual capabilities of the MCP Browser Agent and are more realistic about what it can accomplish in its current state. 384 | 385 | ## Troubleshooting 386 | 387 | ### "Server disconnected" error 388 | If you see the error "MCP Browser Agent: Server disconnected" in Claude Desktop: 389 | 390 | 1. **Verify the server is running**: 391 | - Open a terminal and manually run `node dist/index.js` from the project directory 392 | - If the server starts successfully, use Claude while keeping this terminal open 393 | 394 | 2. **Check your configuration**: 395 | - Ensure the absolute path in `claude_desktop_config.json` is correct for your system 396 | - Double-check that you've used double backslashes (`\\`) for Windows paths 397 | - Verify you're using the complete path from the root of your filesystem 398 | 399 | ### Browser not appearing 400 | If the browser doesn't launch or you don't see it: 401 | 402 | 1. **Check if the specified browser is installed** 403 | - Verify that you have the browser (Chrome, Firefox, Edge, or Safari/WebKit) installed on your system 404 | - The browser drivers are handled automatically by Playwright 405 | 406 | 2. **Restart the server and Claude Desktop** 407 | - Kill any existing node processes that might be running the server 408 | - Restart Claude Desktop to establish a fresh connection 409 | 410 | ### Browser process not closing properly 411 | There are known issues with Chromium and Chrome browsers where the process sometimes doesn't terminate properly after use. If you experience this issue: 412 | 413 | 1. **Manually close the browser process**: 414 | - **Windows**: Press Ctrl+Shift+Esc to open Task Manager, find the Chrome/Chromium process and end it 415 | - **macOS**: Open Activity Monitor (Applications > Utilities > Activity Monitor), find the Chrome/Chromium process and click the X to terminate it 416 | - **Linux**: Run `ps aux | grep chrome` or `ps aux | grep chromium` to find the process, then `kill ` to terminate it 417 | 418 | 2. **Note about browser compatibility**: 419 | - This issue has been observed primarily with Chromium and Chrome 420 | - Firefox and Playwright's built-in browser don't typically experience this problem 421 | 422 | > [!CAUTION] 423 | > This MCP integration is built on Playwright, which has known issues and bugs that may affect its operation. Please report any issues you encounter with the browser automation to [Playwright's GitHub issues](https://github.com/microsoft/playwright/issues). The Playwright team is continuously working to address these issues, but this agent provides a foundation for browser automation capabilities with Claude Desktop despite these limitations. 424 | 425 | ## Development 426 | 427 | ### Project Structure 428 | 429 | - `src/index.ts`: Main entry point and MCP server initialization 430 | - `src/tools.ts`: Tool schemas and registration 431 | - `src/handlers.ts`: MCP request handlers for tools and resources 432 | - `src/executor.ts`: Tool implementation logic using Playwright 433 | 434 | ### Building 435 | 436 | ``` 437 | npm run build 438 | ``` 439 | 440 | ### Watching for Changes 441 | 442 | ``` 443 | npm run watch 444 | ``` 445 | 446 | ## Testing 447 | 448 | The project includes tests to verify core functionality and browser handling. 449 | 450 | ``` 451 | npm test # Run tests 452 | npm run test:watch # Watch mode 453 | npm run test:coverage # Coverage report 454 | ``` 455 | 456 | Tests verify configuration integrity, browser automation features, error handling, and process cleanup. The test suite focuses particularly on ensuring proper handling of browser processes due to known issues with Chrome/Chromium termination. 457 | 458 | ## Security Considerations 459 | 460 | > [!IMPORTANT] 461 | > This MCP integration provides Claude with autonomous browser control capabilities. Please review our [Security Policy](SECURITY.md) for important information about prohibited uses, security implications, and best practices. 462 | 463 | The MCP Browser Agent is designed for legitimate automation tasks but could potentially be misused. Users are responsible for ensuring their use complies with all applicable laws, terms of service, and ethical guidelines. See our detailed [Security Policy](SECURITY.md) for more information. 464 | 465 | ## Contributing 466 | 467 | Contributions to the MCP Browser Agent are welcome! Here are some areas where you can help: 468 | 469 | - Adding new browser automation capabilities 470 | - Improving error handling and recovery 471 | - Enhancing screenshot and resource management 472 | - Creating useful workflows and examples 473 | - Optimizing performance for complex operations 474 | 475 | ## License 476 | 477 | This project is licensed under the Mozilla Public License 2.0 - see the [LICENSE](https://github.com/imprvhub/mcp-browser-agent/blob/main/LICENSE) file for details. 478 | 479 | ## Related Links 480 | 481 | - [Model Context Protocol](https://modelcontextprotocol.io/) 482 | - [Claude Desktop](https://claude.ai/download) 483 | - [Playwright Documentation](https://playwright.dev/docs/intro) 484 | - [MCP Series](https://github.com/mcp-series) 485 | -------------------------------------------------------------------------------- /src/executor.ts: -------------------------------------------------------------------------------- 1 | import { chromium, firefox, webkit, Browser, Page, request, APIRequestContext, BrowserType } from "playwright"; 2 | import { CallToolResult, TextContent, ImageContent } from "@modelcontextprotocol/sdk/types.js"; 3 | import { BROWSER_TOOLS, API_TOOLS } from "./tools.js"; 4 | import path from 'node:path'; 5 | import fs from 'node:fs'; 6 | import os from 'node:os'; 7 | 8 | let browser: Browser | null = null; 9 | let page: Page | null = null; 10 | const browserLogs: string[] = []; 11 | const screenshotRegistry = new Map(); 12 | const defaultDownloadsPath = path.join(os.homedir(), 'Downloads'); 13 | 14 | const getConfig = () => { 15 | const config = { 16 | browserType: 'chrome', 17 | viewportWidth: 1280, 18 | viewportHeight: 800, 19 | deviceScaleFactor: 1.25 20 | }; 21 | 22 | if (process.env.MCP_BROWSER_TYPE) { 23 | config.browserType = process.env.MCP_BROWSER_TYPE.toLowerCase(); 24 | } 25 | 26 | if (process.env.MCP_VIEWPORT_WIDTH) { 27 | config.viewportWidth = parseInt(process.env.MCP_VIEWPORT_WIDTH, 10); 28 | } 29 | 30 | if (process.env.MCP_VIEWPORT_HEIGHT) { 31 | config.viewportHeight = parseInt(process.env.MCP_VIEWPORT_HEIGHT, 10); 32 | } 33 | 34 | if (process.env.MCP_DEVICE_SCALE_FACTOR) { 35 | config.deviceScaleFactor = parseFloat(process.env.MCP_DEVICE_SCALE_FACTOR); 36 | } 37 | 38 | try { 39 | const configPath = path.join(os.homedir(), '.mcp_browser_agent_config.json'); 40 | if (fs.existsSync(configPath)) { 41 | const fileConfig = JSON.parse(fs.readFileSync(configPath, 'utf8')); 42 | if (fileConfig.browserType && !process.env.MCP_BROWSER_TYPE) { 43 | config.browserType = fileConfig.browserType.toLowerCase(); 44 | } 45 | if (fileConfig.viewportWidth && !process.env.MCP_VIEWPORT_WIDTH) { 46 | config.viewportWidth = fileConfig.viewportWidth; 47 | } 48 | if (fileConfig.viewportHeight && !process.env.MCP_VIEWPORT_HEIGHT) { 49 | config.viewportHeight = fileConfig.viewportHeight; 50 | } 51 | if (fileConfig.deviceScaleFactor && !process.env.MCP_DEVICE_SCALE_FACTOR) { 52 | config.deviceScaleFactor = fileConfig.deviceScaleFactor; 53 | } 54 | } 55 | } catch (error) { 56 | console.error('Error reading config file:', error); 57 | } 58 | 59 | return config; 60 | }; 61 | 62 | export function getBrowserLogs(): string[] { 63 | return browserLogs; 64 | } 65 | 66 | export function getScreenshotRegistry(): Map { 67 | return screenshotRegistry; 68 | } 69 | 70 | process.on('SIGINT', async () => { 71 | await cleanupBrowser(); 72 | process.exit(0); 73 | }); 74 | 75 | process.on('SIGTERM', async () => { 76 | await cleanupBrowser(); 77 | process.exit(0); 78 | }); 79 | 80 | async function cleanupBrowser() { 81 | if (browser) { 82 | try { 83 | await browser.close(); 84 | browser = null; 85 | page = null; 86 | console.log('Browser instance closed successfully'); 87 | } catch (error) { 88 | console.error('Error closing browser:', error); 89 | } 90 | } 91 | } 92 | 93 | async function initBrowser(): Promise { 94 | if (!browser) { 95 | const config = getConfig(); 96 | let browserInstance: BrowserType; 97 | 98 | switch (config.browserType) { 99 | case 'firefox': 100 | browserInstance = firefox; 101 | break; 102 | case 'webkit': 103 | case 'safari': 104 | browserInstance = webkit; 105 | break; 106 | case 'chrome': 107 | case 'chromium': 108 | default: 109 | browserInstance = chromium; 110 | break; 111 | } 112 | 113 | // Determine if we're running in a Docker container 114 | const isDocker = fs.existsSync('/.dockerenv') || fs.existsSync('/proc/1/cgroup') && fs.readFileSync('/proc/1/cgroup', 'utf8').includes('docker'); 115 | 116 | browser = await browserInstance.launch({ 117 | headless: isDocker ? true : false, 118 | channel: config.browserType === 'chrome' && !isDocker ? 'chrome' : undefined 119 | }); 120 | 121 | const context = await browser.newContext({ 122 | viewport: { 123 | width: config.viewportWidth, 124 | height: config.viewportHeight 125 | }, 126 | deviceScaleFactor: config.deviceScaleFactor, 127 | }); 128 | 129 | page = await context.newPage(); 130 | page.on("console", (msg) => { 131 | const logEntry = `[${msg.type()}] ${msg.text()}`; 132 | browserLogs.push(logEntry); 133 | }); 134 | } 135 | return page!; 136 | } 137 | 138 | async function initApiClient(baseUrl: string): Promise { 139 | return await request.newContext({ 140 | baseURL: baseUrl, 141 | }); 142 | } 143 | 144 | async function getResponseData(response: any): Promise { 145 | const contentType = response.headers()['content-type'] || ''; 146 | let responseText: string; 147 | if (contentType.includes('application/json')) { 148 | try { 149 | const json = await response.json(); 150 | responseText = JSON.stringify(json, null, 2); 151 | } catch (e) { 152 | responseText = await response.text(); 153 | } 154 | } else { 155 | responseText = await response.text(); 156 | } 157 | return [{ 158 | type: "text", 159 | text: `Response body:\n${responseText}`, 160 | } as TextContent]; 161 | } 162 | 163 | export async function executeToolCall( 164 | toolName: string, 165 | args: any, 166 | server: any 167 | ): Promise<{ toolResult: CallToolResult }> { 168 | try { 169 | const isBrowserTool = BROWSER_TOOLS.includes(toolName); 170 | const isApiTool = API_TOOLS.includes(toolName); 171 | 172 | let activePage: Page | null = null; 173 | let apiClient: APIRequestContext | null = null; 174 | 175 | if (isBrowserTool) { 176 | activePage = await initBrowser(); 177 | } 178 | 179 | if (isApiTool) { 180 | apiClient = await initApiClient(args.url); 181 | } 182 | 183 | switch (toolName) { 184 | 185 | case "browser_set_viewport": 186 | return await handleBrowserSetViewport(activePage!, args); 187 | 188 | case "browser_navigate": 189 | return await handleBrowserNavigate(activePage!, args); 190 | 191 | case "browser_screenshot": 192 | return await handleBrowserScreenshot(activePage!, args, server); 193 | 194 | case "browser_click": 195 | return await handleBrowserClick(activePage!, args); 196 | 197 | case "browser_fill": 198 | return await handleBrowserFill(activePage!, args); 199 | 200 | case "browser_select": 201 | return await handleBrowserSelect(activePage!, args); 202 | 203 | case "browser_hover": 204 | return await handleBrowserHover(activePage!, args); 205 | 206 | case "browser_evaluate": 207 | return await handleBrowserEvaluate(activePage!, args); 208 | 209 | case "api_get": 210 | return await handleApiGet(apiClient!, args); 211 | 212 | case "api_post": 213 | return await handleApiPost(apiClient!, args); 214 | 215 | case "api_put": 216 | return await handleApiPut(apiClient!, args); 217 | 218 | case "api_patch": 219 | return await handleApiPatch(apiClient!, args); 220 | 221 | case "api_delete": 222 | return await handleApiDelete(apiClient!, args); 223 | 224 | default: 225 | return { 226 | toolResult: { 227 | content: [{ 228 | type: "text", 229 | text: `Unknown tool: ${toolName}`, 230 | }], 231 | isError: true, 232 | }, 233 | }; 234 | } 235 | } catch (error) { 236 | return { 237 | toolResult: { 238 | content: [{ 239 | type: "text", 240 | text: `Tool execution error: ${(error as Error).message}`, 241 | }], 242 | isError: true, 243 | }, 244 | }; 245 | } 246 | } 247 | 248 | async function handleBrowserNavigate(page: Page, args: any): Promise<{ toolResult: CallToolResult }> { 249 | try { 250 | await page.goto(args.url, { 251 | timeout: args.timeout || 30000, 252 | waitUntil: args.waitUntil || "load" 253 | }); 254 | return { 255 | toolResult: { 256 | content: [{ 257 | type: "text", 258 | text: `Navigated to ${args.url}`, 259 | }], 260 | isError: false, 261 | }, 262 | }; 263 | } catch (error) { 264 | return { 265 | toolResult: { 266 | content: [{ 267 | type: "text", 268 | text: `Navigation failed: ${(error as Error).message}`, 269 | }], 270 | isError: true, 271 | }, 272 | }; 273 | } 274 | } 275 | 276 | async function handleBrowserScreenshot(page: Page, args: any, server: any): Promise<{ toolResult: CallToolResult }> { 277 | try { 278 | const options: any = { 279 | type: "png", 280 | fullPage: !!args.fullPage 281 | }; 282 | 283 | if (args.selector) { 284 | const element = await page.$(args.selector); 285 | if (!element) { 286 | return { 287 | toolResult: { 288 | content: [{ 289 | type: "text", 290 | text: `Element not found: ${args.selector}`, 291 | }], 292 | isError: true, 293 | }, 294 | }; 295 | } 296 | options.element = element; 297 | } 298 | 299 | if (args.mask && Array.isArray(args.mask)) { 300 | options.mask = await Promise.all( 301 | args.mask.map(async (selector: string) => await page.$(selector)) 302 | ); 303 | } 304 | 305 | const screenshot = await page.screenshot(options); 306 | const base64Screenshot = screenshot.toString('base64'); 307 | const responseContent: (TextContent | ImageContent)[] = []; 308 | const savePath = args.savePath || defaultDownloadsPath; 309 | const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); 310 | const filename = `${args.name}-${timestamp}.png`; 311 | const filePath = path.join(savePath, filename); 312 | const dir = path.dirname(filePath); 313 | if (!fs.existsSync(dir)) { 314 | fs.mkdirSync(dir, { recursive: true }); 315 | } 316 | 317 | fs.writeFileSync(filePath, screenshot); 318 | responseContent.push({ 319 | type: "text", 320 | text: `Screenshot saved to: ${filePath}`, 321 | } as TextContent); 322 | 323 | screenshotRegistry.set(args.name, base64Screenshot); 324 | server.notification({ 325 | method: "notifications/resources/list_changed", 326 | }); 327 | 328 | responseContent.push({ 329 | type: "image", 330 | data: base64Screenshot, 331 | mimeType: "image/png", 332 | } as ImageContent); 333 | 334 | return { 335 | toolResult: { 336 | content: responseContent, 337 | isError: false, 338 | }, 339 | }; 340 | } catch (error) { 341 | return { 342 | toolResult: { 343 | content: [{ 344 | type: "text", 345 | text: `Screenshot failed: ${(error as Error).message}`, 346 | }], 347 | isError: true, 348 | }, 349 | }; 350 | } 351 | } 352 | 353 | async function handleBrowserClick(page: Page, args: any): Promise<{ toolResult: CallToolResult }> { 354 | try { 355 | await page.click(args.selector); 356 | return { 357 | toolResult: { 358 | content: [{ 359 | type: "text", 360 | text: `Clicked element: ${args.selector}`, 361 | }], 362 | isError: false, 363 | }, 364 | }; 365 | } catch (error) { 366 | return { 367 | toolResult: { 368 | content: [{ 369 | type: "text", 370 | text: `Click failed on ${args.selector}: ${(error as Error).message}`, 371 | }], 372 | isError: true, 373 | }, 374 | }; 375 | } 376 | } 377 | 378 | async function handleBrowserFill(page: Page, args: any): Promise<{ toolResult: CallToolResult }> { 379 | try { 380 | await page.waitForSelector(args.selector); 381 | await page.fill(args.selector, args.value); 382 | return { 383 | toolResult: { 384 | content: [{ 385 | type: "text", 386 | text: `Filled ${args.selector} with: ${args.value}`, 387 | }], 388 | isError: false, 389 | }, 390 | }; 391 | } catch (error) { 392 | return { 393 | toolResult: { 394 | content: [{ 395 | type: "text", 396 | text: `Fill operation failed on ${args.selector}: ${(error as Error).message}`, 397 | }], 398 | isError: true, 399 | }, 400 | }; 401 | } 402 | } 403 | 404 | async function handleBrowserSelect(page: Page, args: any): Promise<{ toolResult: CallToolResult }> { 405 | try { 406 | await page.waitForSelector(args.selector); 407 | await page.selectOption(args.selector, args.value); 408 | return { 409 | toolResult: { 410 | content: [{ 411 | type: "text", 412 | text: `Selected option ${args.value} in ${args.selector}`, 413 | }], 414 | isError: false, 415 | }, 416 | }; 417 | } catch (error) { 418 | return { 419 | toolResult: { 420 | content: [{ 421 | type: "text", 422 | text: `Selection failed on ${args.selector}: ${(error as Error).message}`, 423 | }], 424 | isError: true, 425 | }, 426 | }; 427 | } 428 | } 429 | 430 | async function handleBrowserHover(page: Page, args: any): Promise<{ toolResult: CallToolResult }> { 431 | try { 432 | await page.waitForSelector(args.selector); 433 | await page.hover(args.selector); 434 | return { 435 | toolResult: { 436 | content: [{ 437 | type: "text", 438 | text: `Hovered over element: ${args.selector}`, 439 | }], 440 | isError: false, 441 | }, 442 | }; 443 | } catch (error) { 444 | return { 445 | toolResult: { 446 | content: [{ 447 | type: "text", 448 | text: `Hover failed on ${args.selector}: ${(error as Error).message}`, 449 | }], 450 | isError: true, 451 | }, 452 | }; 453 | } 454 | } 455 | 456 | async function handleBrowserEvaluate(page: Page, args: any): Promise<{ toolResult: CallToolResult }> { 457 | try { 458 | const result = await page.evaluate((script) => { 459 | 460 | const logs: string[] = []; 461 | const originalConsole = { ...console }; 462 | 463 | 464 | ['log', 'info', 'warn', 'error'].forEach(method => { 465 | (console as any)[method] = (...args: any[]) => { 466 | logs.push(`[${method}] ${args.join(' ')}`); 467 | (originalConsole as any)[method](...args); 468 | }; 469 | }); 470 | 471 | try { 472 | 473 | const result = eval(script); 474 | 475 | Object.assign(console, originalConsole); 476 | return { result, logs }; 477 | } catch (error) { 478 | 479 | Object.assign(console, originalConsole); 480 | throw error; 481 | } 482 | }, args.script); 483 | 484 | return { 485 | toolResult: { 486 | content: [ 487 | { 488 | type: "text", 489 | text: `Script result: ${JSON.stringify(result.result, null, 2)}`, 490 | }, 491 | { 492 | type: "text", 493 | text: `Console output:\n${result.logs.join('\n')}`, 494 | } 495 | ], 496 | isError: false, 497 | }, 498 | }; 499 | } catch (error) { 500 | return { 501 | toolResult: { 502 | content: [{ 503 | type: "text", 504 | text: `Script execution failed: ${(error as Error).message}`, 505 | }], 506 | isError: true, 507 | }, 508 | }; 509 | } 510 | } 511 | 512 | async function handleApiGet(client: APIRequestContext, args: any): Promise<{ toolResult: CallToolResult }> { 513 | try { 514 | const options = args.headers ? { headers: args.headers } : undefined; 515 | const response = await client.get(args.url, options); 516 | const responseData = await getResponseData(response); 517 | 518 | return { 519 | toolResult: { 520 | content: [ 521 | { 522 | type: "text", 523 | text: `GET ${args.url} - Status: ${response.status()}`, 524 | }, 525 | ...responseData 526 | ], 527 | isError: false, 528 | }, 529 | }; 530 | } catch (error) { 531 | return { 532 | toolResult: { 533 | content: [{ 534 | type: "text", 535 | text: `GET request failed: ${(error as Error).message}`, 536 | }], 537 | isError: true, 538 | }, 539 | }; 540 | } 541 | } 542 | 543 | async function handleApiPost(client: APIRequestContext, args: any): Promise<{ toolResult: CallToolResult }> { 544 | try { 545 | const options = { 546 | data: args.data, 547 | headers: args.headers || { 'Content-Type': 'application/json' } 548 | }; 549 | 550 | const response = await client.post(args.url, options); 551 | const responseData = await getResponseData(response); 552 | 553 | return { 554 | toolResult: { 555 | content: [ 556 | { 557 | type: "text", 558 | text: `POST ${args.url} - Status: ${response.status()}`, 559 | }, 560 | ...responseData 561 | ], 562 | isError: false, 563 | }, 564 | }; 565 | } catch (error) { 566 | return { 567 | toolResult: { 568 | content: [{ 569 | type: "text", 570 | text: `POST request failed: ${(error as Error).message}`, 571 | }], 572 | isError: true, 573 | }, 574 | }; 575 | } 576 | } 577 | 578 | async function handleApiPut(client: APIRequestContext, args: any): Promise<{ toolResult: CallToolResult }> { 579 | try { 580 | const options = { 581 | data: args.data, 582 | headers: args.headers || { 'Content-Type': 'application/json' } 583 | }; 584 | 585 | const response = await client.put(args.url, options); 586 | const responseData = await getResponseData(response); 587 | 588 | return { 589 | toolResult: { 590 | content: [ 591 | { 592 | type: "text", 593 | text: `PUT ${args.url} - Status: ${response.status()}`, 594 | }, 595 | ...responseData 596 | ], 597 | isError: false, 598 | }, 599 | }; 600 | } catch (error) { 601 | return { 602 | toolResult: { 603 | content: [{ 604 | type: "text", 605 | text: `PUT request failed: ${(error as Error).message}`, 606 | }], 607 | isError: true, 608 | }, 609 | }; 610 | } 611 | } 612 | 613 | async function handleApiPatch(client: APIRequestContext, args: any): Promise<{ toolResult: CallToolResult }> { 614 | try { 615 | const options = { 616 | data: args.data, 617 | headers: args.headers || { 'Content-Type': 'application/json' } 618 | }; 619 | 620 | const response = await client.patch(args.url, options); 621 | const responseData = await getResponseData(response); 622 | 623 | return { 624 | toolResult: { 625 | content: [ 626 | { 627 | type: "text", 628 | text: `PATCH ${args.url} - Status: ${response.status()}`, 629 | }, 630 | ...responseData 631 | ], 632 | isError: false, 633 | }, 634 | }; 635 | } catch (error) { 636 | return { 637 | toolResult: { 638 | content: [{ 639 | type: "text", 640 | text: `PATCH request failed: ${(error as Error).message}`, 641 | }], 642 | isError: true, 643 | }, 644 | }; 645 | } 646 | } 647 | 648 | async function handleBrowserSetViewport(page: Page, args: any): Promise<{ toolResult: CallToolResult }> { 649 | try { 650 | const config = getConfig(); 651 | 652 | // Get current values or use defaults from config 653 | const width = args.width || config.viewportWidth; 654 | const height = args.height || config.viewportHeight; 655 | const deviceScaleFactor = args.deviceScaleFactor || config.deviceScaleFactor; 656 | 657 | // Set the new viewport size 658 | await page.setViewportSize({ width, height }); 659 | 660 | // Save the configuration for future sessions 661 | try { 662 | const configPath = path.join(os.homedir(), '.mcp_browser_agent_config.json'); 663 | const config = fs.existsSync(configPath) 664 | ? JSON.parse(fs.readFileSync(configPath, 'utf8')) 665 | : {}; 666 | 667 | if (args.width) { 668 | config.viewportWidth = width; 669 | process.env.MCP_VIEWPORT_WIDTH = width.toString(); 670 | } 671 | 672 | if (args.height) { 673 | config.viewportHeight = height; 674 | process.env.MCP_VIEWPORT_HEIGHT = height.toString(); 675 | } 676 | 677 | if (args.deviceScaleFactor) { 678 | config.deviceScaleFactor = deviceScaleFactor; 679 | process.env.MCP_DEVICE_SCALE_FACTOR = deviceScaleFactor.toString(); 680 | } 681 | 682 | fs.writeFileSync(configPath, JSON.stringify(config, null, 2)); 683 | } catch (error) { 684 | console.error('Error saving viewport config:', error); 685 | } 686 | 687 | return { 688 | toolResult: { 689 | content: [{ 690 | type: "text", 691 | text: `Set viewport to width: ${width}, height: ${height}, scale factor: ${deviceScaleFactor}`, 692 | }], 693 | isError: false, 694 | }, 695 | }; 696 | } catch (error) { 697 | return { 698 | toolResult: { 699 | content: [{ 700 | type: "text", 701 | text: `Failed to set viewport: ${(error as Error).message}`, 702 | }], 703 | isError: true, 704 | }, 705 | }; 706 | } 707 | } 708 | 709 | async function handleApiDelete(client: APIRequestContext, args: any): Promise<{ toolResult: CallToolResult }> { 710 | try { 711 | const options = args.headers ? { headers: args.headers } : undefined; 712 | const response = await client.delete(args.url, options); 713 | return { 714 | toolResult: { 715 | content: [ 716 | { 717 | type: "text", 718 | text: `DELETE ${args.url} - Status: ${response.status()}`, 719 | } 720 | ], 721 | isError: false, 722 | }, 723 | }; 724 | } catch (error) { 725 | return { 726 | toolResult: { 727 | content: [{ 728 | type: "text", 729 | text: `DELETE request failed: ${(error as Error).message}`, 730 | }], 731 | isError: true, 732 | }, 733 | }; 734 | } 735 | } --------------------------------------------------------------------------------