├── smithery.yaml ├── package.json ├── .gitignore ├── Dockerfile ├── bin └── mcp-selenium.js ├── LICENSE ├── README.md └── src └── lib └── server.js /smithery.yaml: -------------------------------------------------------------------------------- 1 | # Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml 2 | 3 | startCommand: 4 | type: stdio 5 | configSchema: 6 | # JSON Schema defining the configuration options for the MCP. 7 | type: object 8 | required: [] 9 | properties: {} 10 | commandFunction: 11 | # A function that produces the CLI command to start the MCP on stdio. 12 | |- 13 | (config) => ({command:'node', args:['src/lib/server.js'], env:{}}) -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@angiejones/mcp-selenium", 3 | "version": "0.1.21", 4 | "description": "Selenium WebDriver MCP Server", 5 | "type": "module", 6 | "main": "src/lib/server.js", 7 | "bin": { 8 | "mcp-selenium": "./src/lib/server.js" 9 | }, 10 | "scripts": { 11 | "test": "echo \"Error: no test specified\" && exit 1" 12 | }, 13 | "keywords": [], 14 | "author": "", 15 | "license": "ISC", 16 | "dependencies": { 17 | "@modelcontextprotocol/sdk": "^1.7.0", 18 | "selenium-webdriver": "^4.18.1" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.so 6 | .Python 7 | build/ 8 | develop-eggs/ 9 | dist/ 10 | downloads/ 11 | eggs/ 12 | .eggs/ 13 | lib/ 14 | lib64/ 15 | parts/ 16 | sdist/ 17 | var/ 18 | wheels/ 19 | *.egg-info/ 20 | .installed.cfg 21 | *.egg 22 | 23 | # Virtual Environment 24 | venv/ 25 | ENV/ 26 | env/ 27 | 28 | # IDE 29 | .idea/ 30 | .vscode/ 31 | *.swp 32 | *.swo 33 | 34 | # Node 35 | node_modules/ 36 | npm-debug.log* 37 | 38 | # Misc 39 | .DS_Store 40 | .env 41 | .env.local 42 | .env.*.local 43 | 44 | # Selenium 45 | geckodriver.log 46 | chromedriver.log 47 | .goose/ -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:18-alpine 2 | 3 | # Install Chrome and dependencies 4 | RUN apk update && apk add --no-cache \ 5 | chromium \ 6 | chromium-chromedriver \ 7 | nss \ 8 | freetype \ 9 | freetype-dev \ 10 | harfbuzz \ 11 | ca-certificates \ 12 | ttf-freefont \ 13 | udev \ 14 | ttf-opensans \ 15 | chromium-chromedriver 16 | 17 | # Set Chrome environment variables 18 | ENV CHROME_BIN=/usr/bin/chromium-browser 19 | ENV CHROME_PATH=/usr/lib/chromium/ 20 | ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true 21 | 22 | WORKDIR /app 23 | 24 | # Copy package files 25 | COPY package*.json ./ 26 | 27 | # Install dependencies 28 | RUN npm install 29 | 30 | # Copy application code 31 | COPY . . 32 | 33 | # Start the MCP server 34 | CMD ["node", "src/lib/server.js"] -------------------------------------------------------------------------------- /bin/mcp-selenium.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | import { fileURLToPath } from 'url'; 4 | import { dirname, resolve } from 'path'; 5 | import { spawn } from 'child_process'; 6 | 7 | const __filename = fileURLToPath(import.meta.url); 8 | const __dirname = dirname(__filename); 9 | 10 | const serverPath = resolve(__dirname, '../src/lib/server.js'); 11 | 12 | // Start the server 13 | const child = spawn('node', [serverPath], { 14 | stdio: 'inherit' 15 | }); 16 | 17 | child.on('error', (error) => { 18 | console.error(`Error starting server: ${error.message}`); 19 | process.exit(1); 20 | }); 21 | 22 | // Handle process termination 23 | process.on('SIGTERM', () => { 24 | child.kill('SIGTERM'); 25 | }); 26 | 27 | process.on('SIGINT', () => { 28 | child.kill('SIGINT'); 29 | }); -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Angie Jones 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![MseeP.ai Security Assessment Badge](https://mseep.net/pr/angiejones-mcp-selenium-badge.png)](https://mseep.ai/app/angiejones-mcp-selenium) 2 | 3 | # MCP Selenium Server 4 | 5 | A Model Context Protocol (MCP) server implementation for Selenium WebDriver, enabling browser automation through standardized MCP clients. 6 | 7 | ## Video Demo (Click to Watch) 8 | 9 | [![Watch the video](https://img.youtube.com/vi/mRV0N8hcgYA/sddefault.jpg)](https://youtu.be/mRV0N8hcgYA) 10 | 11 | 12 | ## Features 13 | 14 | - Start browser sessions with customizable options 15 | - Navigate to URLs 16 | - Find elements using various locator strategies 17 | - Click, type, and interact with elements 18 | - Perform mouse actions (hover, drag and drop) 19 | - Handle keyboard input 20 | - Take screenshots 21 | - Upload files 22 | - Support for headless mode 23 | 24 | ## Supported Browsers 25 | 26 | - Chrome 27 | - Firefox 28 | - MS Edge 29 | 30 | ## Use with Goose 31 | 32 | ### Option 1: One-click install 33 | Copy and paste the link below into a browser address bar to add this extension to goose desktop: 34 | 35 | ``` 36 | goose://extension?cmd=npx&arg=-y&arg=%40angiejones%2Fmcp-selenium&id=selenium-mcp&name=Selenium%20MCP&description=automates%20browser%20interactions 37 | ``` 38 | 39 | 40 | ### Option 2: Add manually to desktop or CLI 41 | 42 | * Name: `Selenium MCP` 43 | * Description: `automates browser interactions` 44 | * Command: `npx -y @angiejones/mcp-selenium` 45 | 46 | ## Use with other MCP clients (e.g. Claude Desktop, etc) 47 | ```json 48 | { 49 | "mcpServers": { 50 | "selenium": { 51 | "command": "npx", 52 | "args": ["-y", "@angiejones/mcp-selenium"] 53 | } 54 | } 55 | } 56 | ``` 57 | 58 | --- 59 | 60 | ## Development 61 | 62 | To work on this project: 63 | 64 | 1. Clone the repository 65 | 2. Install dependencies: `npm install` 66 | 3. Run the server: `npm start` 67 | 68 | ### Installation 69 | 70 | #### Installing via Smithery 71 | 72 | To install MCP Selenium for Claude Desktop automatically via [Smithery](https://smithery.ai/server/@angiejones/mcp-selenium): 73 | 74 | ```bash 75 | npx -y @smithery/cli install @angiejones/mcp-selenium --client claude 76 | ``` 77 | 78 | #### Manual Installation 79 | ```bash 80 | npm install -g @angiejones/mcp-selenium 81 | ``` 82 | 83 | 84 | ### Usage 85 | 86 | Start the server by running: 87 | 88 | ```bash 89 | mcp-selenium 90 | ``` 91 | 92 | Or use with NPX in your MCP configuration: 93 | 94 | ```json 95 | { 96 | "mcpServers": { 97 | "selenium": { 98 | "command": "npx", 99 | "args": [ 100 | "-y", 101 | "@angiejones/mcp-selenium" 102 | ] 103 | } 104 | } 105 | } 106 | ``` 107 | 108 | 109 | 110 | ## Tools 111 | 112 | ### start_browser 113 | Launches a browser session. 114 | 115 | **Parameters:** 116 | - `browser` (required): Browser to launch 117 | - Type: string 118 | - Enum: ["chrome", "firefox"] 119 | - `options`: Browser configuration options 120 | - Type: object 121 | - Properties: 122 | - `headless`: Run browser in headless mode 123 | - Type: boolean 124 | - `arguments`: Additional browser arguments 125 | - Type: array of strings 126 | 127 | **Example:** 128 | ```json 129 | { 130 | "tool": "start_browser", 131 | "parameters": { 132 | "browser": "chrome", 133 | "options": { 134 | "headless": true, 135 | "arguments": ["--no-sandbox"] 136 | } 137 | } 138 | } 139 | ``` 140 | 141 | ### navigate 142 | Navigates to a URL. 143 | 144 | **Parameters:** 145 | - `url` (required): URL to navigate to 146 | - Type: string 147 | 148 | **Example:** 149 | ```json 150 | { 151 | "tool": "navigate", 152 | "parameters": { 153 | "url": "https://www.example.com" 154 | } 155 | } 156 | ``` 157 | 158 | ### find_element 159 | Finds an element on the page. 160 | 161 | **Parameters:** 162 | - `by` (required): Locator strategy 163 | - Type: string 164 | - Enum: ["id", "css", "xpath", "name", "tag", "class"] 165 | - `value` (required): Value for the locator strategy 166 | - Type: string 167 | - `timeout`: Maximum time to wait for element in milliseconds 168 | - Type: number 169 | - Default: 10000 170 | 171 | **Example:** 172 | ```json 173 | { 174 | "tool": "find_element", 175 | "parameters": { 176 | "by": "id", 177 | "value": "search-input", 178 | "timeout": 5000 179 | } 180 | } 181 | ``` 182 | 183 | ### click_element 184 | Clicks an element. 185 | 186 | **Parameters:** 187 | - `by` (required): Locator strategy 188 | - Type: string 189 | - Enum: ["id", "css", "xpath", "name", "tag", "class"] 190 | - `value` (required): Value for the locator strategy 191 | - Type: string 192 | - `timeout`: Maximum time to wait for element in milliseconds 193 | - Type: number 194 | - Default: 10000 195 | 196 | **Example:** 197 | ```json 198 | { 199 | "tool": "click_element", 200 | "parameters": { 201 | "by": "css", 202 | "value": ".submit-button" 203 | } 204 | } 205 | ``` 206 | 207 | ### send_keys 208 | Sends keys to an element (typing). 209 | 210 | **Parameters:** 211 | - `by` (required): Locator strategy 212 | - Type: string 213 | - Enum: ["id", "css", "xpath", "name", "tag", "class"] 214 | - `value` (required): Value for the locator strategy 215 | - Type: string 216 | - `text` (required): Text to enter into the element 217 | - Type: string 218 | - `timeout`: Maximum time to wait for element in milliseconds 219 | - Type: number 220 | - Default: 10000 221 | 222 | **Example:** 223 | ```json 224 | { 225 | "tool": "send_keys", 226 | "parameters": { 227 | "by": "name", 228 | "value": "username", 229 | "text": "testuser" 230 | } 231 | } 232 | ``` 233 | 234 | ### get_element_text 235 | Gets the text() of an element. 236 | 237 | **Parameters:** 238 | - `by` (required): Locator strategy 239 | - Type: string 240 | - Enum: ["id", "css", "xpath", "name", "tag", "class"] 241 | - `value` (required): Value for the locator strategy 242 | - Type: string 243 | - `timeout`: Maximum time to wait for element in milliseconds 244 | - Type: number 245 | - Default: 10000 246 | 247 | **Example:** 248 | ```json 249 | { 250 | "tool": "get_element_text", 251 | "parameters": { 252 | "by": "css", 253 | "value": ".message" 254 | } 255 | } 256 | ``` 257 | 258 | ### hover 259 | Moves the mouse to hover over an element. 260 | 261 | **Parameters:** 262 | - `by` (required): Locator strategy 263 | - Type: string 264 | - Enum: ["id", "css", "xpath", "name", "tag", "class"] 265 | - `value` (required): Value for the locator strategy 266 | - Type: string 267 | - `timeout`: Maximum time to wait for element in milliseconds 268 | - Type: number 269 | - Default: 10000 270 | 271 | **Example:** 272 | ```json 273 | { 274 | "tool": "hover", 275 | "parameters": { 276 | "by": "css", 277 | "value": ".dropdown-menu" 278 | } 279 | } 280 | ``` 281 | 282 | ### drag_and_drop 283 | Drags an element and drops it onto another element. 284 | 285 | **Parameters:** 286 | - `by` (required): Locator strategy for source element 287 | - Type: string 288 | - Enum: ["id", "css", "xpath", "name", "tag", "class"] 289 | - `value` (required): Value for the source locator strategy 290 | - Type: string 291 | - `targetBy` (required): Locator strategy for target element 292 | - Type: string 293 | - Enum: ["id", "css", "xpath", "name", "tag", "class"] 294 | - `targetValue` (required): Value for the target locator strategy 295 | - Type: string 296 | - `timeout`: Maximum time to wait for elements in milliseconds 297 | - Type: number 298 | - Default: 10000 299 | 300 | **Example:** 301 | ```json 302 | { 303 | "tool": "drag_and_drop", 304 | "parameters": { 305 | "by": "id", 306 | "value": "draggable", 307 | "targetBy": "id", 308 | "targetValue": "droppable" 309 | } 310 | } 311 | ``` 312 | 313 | ### double_click 314 | Performs a double click on an element. 315 | 316 | **Parameters:** 317 | - `by` (required): Locator strategy 318 | - Type: string 319 | - Enum: ["id", "css", "xpath", "name", "tag", "class"] 320 | - `value` (required): Value for the locator strategy 321 | - Type: string 322 | - `timeout`: Maximum time to wait for element in milliseconds 323 | - Type: number 324 | - Default: 10000 325 | 326 | **Example:** 327 | ```json 328 | { 329 | "tool": "double_click", 330 | "parameters": { 331 | "by": "css", 332 | "value": ".editable-text" 333 | } 334 | } 335 | ``` 336 | 337 | ### right_click 338 | Performs a right click (context click) on an element. 339 | 340 | **Parameters:** 341 | - `by` (required): Locator strategy 342 | - Type: string 343 | - Enum: ["id", "css", "xpath", "name", "tag", "class"] 344 | - `value` (required): Value for the locator strategy 345 | - Type: string 346 | - `timeout`: Maximum time to wait for element in milliseconds 347 | - Type: number 348 | - Default: 10000 349 | 350 | **Example:** 351 | ```json 352 | { 353 | "tool": "right_click", 354 | "parameters": { 355 | "by": "css", 356 | "value": ".context-menu-trigger" 357 | } 358 | } 359 | ``` 360 | 361 | ### press_key 362 | Simulates pressing a keyboard key. 363 | 364 | **Parameters:** 365 | - `key` (required): Key to press (e.g., 'Enter', 'Tab', 'a', etc.) 366 | - Type: string 367 | 368 | **Example:** 369 | ```json 370 | { 371 | "tool": "press_key", 372 | "parameters": { 373 | "key": "Enter" 374 | } 375 | } 376 | ``` 377 | 378 | ### upload_file 379 | Uploads a file using a file input element. 380 | 381 | **Parameters:** 382 | - `by` (required): Locator strategy 383 | - Type: string 384 | - Enum: ["id", "css", "xpath", "name", "tag", "class"] 385 | - `value` (required): Value for the locator strategy 386 | - Type: string 387 | - `filePath` (required): Absolute path to the file to upload 388 | - Type: string 389 | - `timeout`: Maximum time to wait for element in milliseconds 390 | - Type: number 391 | - Default: 10000 392 | 393 | **Example:** 394 | ```json 395 | { 396 | "tool": "upload_file", 397 | "parameters": { 398 | "by": "id", 399 | "value": "file-input", 400 | "filePath": "/path/to/file.pdf" 401 | } 402 | } 403 | ``` 404 | 405 | ### take_screenshot 406 | Captures a screenshot of the current page. 407 | 408 | **Parameters:** 409 | - `outputPath` (optional): Path where to save the screenshot. If not provided, returns base64 data. 410 | - Type: string 411 | 412 | **Example:** 413 | ```json 414 | { 415 | "tool": "take_screenshot", 416 | "parameters": { 417 | "outputPath": "/path/to/screenshot.png" 418 | } 419 | } 420 | ``` 421 | 422 | ### close_session 423 | Closes the current browser session and cleans up resources. 424 | 425 | **Parameters:** 426 | None required 427 | 428 | **Example:** 429 | ```json 430 | { 431 | "tool": "close_session", 432 | "parameters": {} 433 | } 434 | ``` 435 | 436 | 437 | ## License 438 | 439 | MIT 440 | -------------------------------------------------------------------------------- /src/lib/server.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js"; 4 | import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; 5 | import { z } from "zod"; 6 | import pkg from 'selenium-webdriver'; 7 | const { Builder, By, Key, until, Actions } = pkg; 8 | import { Options as ChromeOptions } from 'selenium-webdriver/chrome.js'; 9 | import { Options as FirefoxOptions } from 'selenium-webdriver/firefox.js'; 10 | import { Options as EdgeOptions } from 'selenium-webdriver/edge.js'; 11 | 12 | 13 | // Create an MCP server 14 | const server = new McpServer({ 15 | name: "MCP Selenium", 16 | version: "1.0.0" 17 | }); 18 | 19 | // Server state 20 | const state = { 21 | drivers: new Map(), 22 | currentSession: null 23 | }; 24 | 25 | // Helper functions 26 | const getDriver = () => { 27 | const driver = state.drivers.get(state.currentSession); 28 | if (!driver) { 29 | throw new Error('No active browser session'); 30 | } 31 | return driver; 32 | }; 33 | 34 | const getLocator = (by, value) => { 35 | switch (by.toLowerCase()) { 36 | case 'id': return By.id(value); 37 | case 'css': return By.css(value); 38 | case 'xpath': return By.xpath(value); 39 | case 'name': return By.name(value); 40 | case 'tag': return By.css(value); 41 | case 'class': return By.className(value); 42 | default: throw new Error(`Unsupported locator strategy: ${by}`); 43 | } 44 | }; 45 | 46 | // Common schemas 47 | const browserOptionsSchema = z.object({ 48 | headless: z.boolean().optional().describe("Run browser in headless mode"), 49 | arguments: z.array(z.string()).optional().describe("Additional browser arguments") 50 | }).optional(); 51 | 52 | const locatorSchema = { 53 | by: z.enum(["id", "css", "xpath", "name", "tag", "class"]).describe("Locator strategy to find element"), 54 | value: z.string().describe("Value for the locator strategy"), 55 | timeout: z.number().optional().describe("Maximum time to wait for element in milliseconds") 56 | }; 57 | 58 | // Browser Management Tools 59 | server.tool( 60 | "start_browser", 61 | "launches browser", 62 | { 63 | browser: z.enum(["chrome", "firefox", "edge"]).describe("Browser to launch (chrome or firefox or microsoft edge)"), 64 | options: browserOptionsSchema 65 | }, 66 | async ({ browser, options = {} }) => { 67 | try { 68 | let builder = new Builder(); 69 | let driver; 70 | switch (browser) { 71 | case 'chrome': { 72 | const chromeOptions = new ChromeOptions(); 73 | if (options.headless) { 74 | chromeOptions.addArguments('--headless=new'); 75 | } 76 | if (options.arguments) { 77 | options.arguments.forEach(arg => chromeOptions.addArguments(arg)); 78 | } 79 | driver = await builder 80 | .forBrowser('chrome') 81 | .setChromeOptions(chromeOptions) 82 | .build(); 83 | break; 84 | } 85 | case 'edge': { 86 | const edgeOptions = new EdgeOptions(); 87 | if (options.headless) { 88 | edgeOptions.addArguments('--headless=new'); 89 | } 90 | if (options.arguments) { 91 | options.arguments.forEach(arg => edgeOptions.addArguments(arg)); 92 | } 93 | driver = await builder 94 | .forBrowser('edge') 95 | .setEdgeOptions(edgeOptions) 96 | .build(); 97 | break; 98 | } 99 | case 'firefox': { 100 | const firefoxOptions = new FirefoxOptions(); 101 | if (options.headless) { 102 | firefoxOptions.addArguments('--headless'); 103 | } 104 | if (options.arguments) { 105 | options.arguments.forEach(arg => firefoxOptions.addArguments(arg)); 106 | } 107 | driver = await builder 108 | .forBrowser('firefox') 109 | .setFirefoxOptions(firefoxOptions) 110 | .build(); 111 | break; 112 | } 113 | default: { 114 | throw new Error(`Unsupported browser: ${browser}`); 115 | } 116 | } 117 | const sessionId = `${browser}_${Date.now()}`; 118 | state.drivers.set(sessionId, driver); 119 | state.currentSession = sessionId; 120 | 121 | return { 122 | content: [{ type: 'text', text: `Browser started with session_id: ${sessionId}` }] 123 | }; 124 | } catch (e) { 125 | return { 126 | content: [{ type: 'text', text: `Error starting browser: ${e.message}` }] 127 | }; 128 | } 129 | } 130 | ); 131 | 132 | server.tool( 133 | "navigate", 134 | "navigates to a URL", 135 | { 136 | url: z.string().describe("URL to navigate to") 137 | }, 138 | async ({ url }) => { 139 | try { 140 | const driver = getDriver(); 141 | await driver.get(url); 142 | return { 143 | content: [{ type: 'text', text: `Navigated to ${url}` }] 144 | }; 145 | } catch (e) { 146 | return { 147 | content: [{ type: 'text', text: `Error navigating: ${e.message}` }] 148 | }; 149 | } 150 | } 151 | ); 152 | 153 | // Element Interaction Tools 154 | server.tool( 155 | "find_element", 156 | "finds an element", 157 | { 158 | ...locatorSchema 159 | }, 160 | async ({ by, value, timeout = 10000 }) => { 161 | try { 162 | const driver = getDriver(); 163 | const locator = getLocator(by, value); 164 | await driver.wait(until.elementLocated(locator), timeout); 165 | return { 166 | content: [{ type: 'text', text: 'Element found' }] 167 | }; 168 | } catch (e) { 169 | return { 170 | content: [{ type: 'text', text: `Error finding element: ${e.message}` }] 171 | }; 172 | } 173 | } 174 | ); 175 | 176 | server.tool( 177 | "click_element", 178 | "clicks an element", 179 | { 180 | ...locatorSchema 181 | }, 182 | async ({ by, value, timeout = 10000 }) => { 183 | try { 184 | const driver = getDriver(); 185 | const locator = getLocator(by, value); 186 | const element = await driver.wait(until.elementLocated(locator), timeout); 187 | await element.click(); 188 | return { 189 | content: [{ type: 'text', text: 'Element clicked' }] 190 | }; 191 | } catch (e) { 192 | return { 193 | content: [{ type: 'text', text: `Error clicking element: ${e.message}` }] 194 | }; 195 | } 196 | } 197 | ); 198 | 199 | server.tool( 200 | "send_keys", 201 | "sends keys to an element, aka typing", 202 | { 203 | ...locatorSchema, 204 | text: z.string().describe("Text to enter into the element") 205 | }, 206 | async ({ by, value, text, timeout = 10000 }) => { 207 | try { 208 | const driver = getDriver(); 209 | const locator = getLocator(by, value); 210 | const element = await driver.wait(until.elementLocated(locator), timeout); 211 | await element.clear(); 212 | await element.sendKeys(text); 213 | return { 214 | content: [{ type: 'text', text: `Text "${text}" entered into element` }] 215 | }; 216 | } catch (e) { 217 | return { 218 | content: [{ type: 'text', text: `Error entering text: ${e.message}` }] 219 | }; 220 | } 221 | } 222 | ); 223 | 224 | server.tool( 225 | "get_element_text", 226 | "gets the text() of an element", 227 | { 228 | ...locatorSchema 229 | }, 230 | async ({ by, value, timeout = 10000 }) => { 231 | try { 232 | const driver = getDriver(); 233 | const locator = getLocator(by, value); 234 | const element = await driver.wait(until.elementLocated(locator), timeout); 235 | const text = await element.getText(); 236 | return { 237 | content: [{ type: 'text', text }] 238 | }; 239 | } catch (e) { 240 | return { 241 | content: [{ type: 'text', text: `Error getting element text: ${e.message}` }] 242 | }; 243 | } 244 | } 245 | ); 246 | 247 | server.tool( 248 | "hover", 249 | "moves the mouse to hover over an element", 250 | { 251 | ...locatorSchema 252 | }, 253 | async ({ by, value, timeout = 10000 }) => { 254 | try { 255 | const driver = getDriver(); 256 | const locator = getLocator(by, value); 257 | const element = await driver.wait(until.elementLocated(locator), timeout); 258 | const actions = driver.actions({ bridge: true }); 259 | await actions.move({ origin: element }).perform(); 260 | return { 261 | content: [{ type: 'text', text: 'Hovered over element' }] 262 | }; 263 | } catch (e) { 264 | return { 265 | content: [{ type: 'text', text: `Error hovering over element: ${e.message}` }] 266 | }; 267 | } 268 | } 269 | ); 270 | 271 | server.tool( 272 | "drag_and_drop", 273 | "drags an element and drops it onto another element", 274 | { 275 | ...locatorSchema, 276 | targetBy: z.enum(["id", "css", "xpath", "name", "tag", "class"]).describe("Locator strategy to find target element"), 277 | targetValue: z.string().describe("Value for the target locator strategy") 278 | }, 279 | async ({ by, value, targetBy, targetValue, timeout = 10000 }) => { 280 | try { 281 | const driver = getDriver(); 282 | const sourceLocator = getLocator(by, value); 283 | const targetLocator = getLocator(targetBy, targetValue); 284 | const sourceElement = await driver.wait(until.elementLocated(sourceLocator), timeout); 285 | const targetElement = await driver.wait(until.elementLocated(targetLocator), timeout); 286 | const actions = driver.actions({ bridge: true }); 287 | await actions.dragAndDrop(sourceElement, targetElement).perform(); 288 | return { 289 | content: [{ type: 'text', text: 'Drag and drop completed' }] 290 | }; 291 | } catch (e) { 292 | return { 293 | content: [{ type: 'text', text: `Error performing drag and drop: ${e.message}` }] 294 | }; 295 | } 296 | } 297 | ); 298 | 299 | server.tool( 300 | "double_click", 301 | "performs a double click on an element", 302 | { 303 | ...locatorSchema 304 | }, 305 | async ({ by, value, timeout = 10000 }) => { 306 | try { 307 | const driver = getDriver(); 308 | const locator = getLocator(by, value); 309 | const element = await driver.wait(until.elementLocated(locator), timeout); 310 | const actions = driver.actions({ bridge: true }); 311 | await actions.doubleClick(element).perform(); 312 | return { 313 | content: [{ type: 'text', text: 'Double click performed' }] 314 | }; 315 | } catch (e) { 316 | return { 317 | content: [{ type: 'text', text: `Error performing double click: ${e.message}` }] 318 | }; 319 | } 320 | } 321 | ); 322 | 323 | server.tool( 324 | "right_click", 325 | "performs a right click (context click) on an element", 326 | { 327 | ...locatorSchema 328 | }, 329 | async ({ by, value, timeout = 10000 }) => { 330 | try { 331 | const driver = getDriver(); 332 | const locator = getLocator(by, value); 333 | const element = await driver.wait(until.elementLocated(locator), timeout); 334 | const actions = driver.actions({ bridge: true }); 335 | await actions.contextClick(element).perform(); 336 | return { 337 | content: [{ type: 'text', text: 'Right click performed' }] 338 | }; 339 | } catch (e) { 340 | return { 341 | content: [{ type: 'text', text: `Error performing right click: ${e.message}` }] 342 | }; 343 | } 344 | } 345 | ); 346 | 347 | server.tool( 348 | "press_key", 349 | "simulates pressing a keyboard key", 350 | { 351 | key: z.string().describe("Key to press (e.g., 'Enter', 'Tab', 'a', etc.)") 352 | }, 353 | async ({ key }) => { 354 | try { 355 | const driver = getDriver(); 356 | const actions = driver.actions({ bridge: true }); 357 | await actions.keyDown(key).keyUp(key).perform(); 358 | return { 359 | content: [{ type: 'text', text: `Key '${key}' pressed` }] 360 | }; 361 | } catch (e) { 362 | return { 363 | content: [{ type: 'text', text: `Error pressing key: ${e.message}` }] 364 | }; 365 | } 366 | } 367 | ); 368 | 369 | server.tool( 370 | "upload_file", 371 | "uploads a file using a file input element", 372 | { 373 | ...locatorSchema, 374 | filePath: z.string().describe("Absolute path to the file to upload") 375 | }, 376 | async ({ by, value, filePath, timeout = 10000 }) => { 377 | try { 378 | const driver = getDriver(); 379 | const locator = getLocator(by, value); 380 | const element = await driver.wait(until.elementLocated(locator), timeout); 381 | await element.sendKeys(filePath); 382 | return { 383 | content: [{ type: 'text', text: 'File upload initiated' }] 384 | }; 385 | } catch (e) { 386 | return { 387 | content: [{ type: 'text', text: `Error uploading file: ${e.message}` }] 388 | }; 389 | } 390 | } 391 | ); 392 | 393 | server.tool( 394 | "take_screenshot", 395 | "captures a screenshot of the current page", 396 | { 397 | outputPath: z.string().optional().describe("Optional path where to save the screenshot. If not provided, returns base64 data.") 398 | }, 399 | async ({ outputPath }) => { 400 | try { 401 | const driver = getDriver(); 402 | const screenshot = await driver.takeScreenshot(); 403 | if (outputPath) { 404 | const fs = await import('fs'); 405 | await fs.promises.writeFile(outputPath, screenshot, 'base64'); 406 | return { 407 | content: [{ type: 'text', text: `Screenshot saved to ${outputPath}` }] 408 | }; 409 | } else { 410 | return { 411 | content: [ 412 | { type: 'text', text: 'Screenshot captured as base64:' }, 413 | { type: 'text', text: screenshot } 414 | ] 415 | }; 416 | } 417 | } catch (e) { 418 | return { 419 | content: [{ type: 'text', text: `Error taking screenshot: ${e.message}` }] 420 | }; 421 | } 422 | } 423 | ); 424 | 425 | server.tool( 426 | "close_session", 427 | "closes the current browser session", 428 | {}, 429 | async () => { 430 | try { 431 | const driver = getDriver(); 432 | await driver.quit(); 433 | state.drivers.delete(state.currentSession); 434 | const sessionId = state.currentSession; 435 | state.currentSession = null; 436 | return { 437 | content: [{ type: 'text', text: `Browser session ${sessionId} closed` }] 438 | }; 439 | } catch (e) { 440 | return { 441 | content: [{ type: 'text', text: `Error closing session: ${e.message}` }] 442 | }; 443 | } 444 | } 445 | ); 446 | 447 | // Resources 448 | server.resource( 449 | "browser-status", 450 | new ResourceTemplate("browser-status://current"), 451 | async (uri) => ({ 452 | contents: [{ 453 | uri: uri.href, 454 | text: state.currentSession 455 | ? `Active browser session: ${state.currentSession}` 456 | : "No active browser session" 457 | }] 458 | }) 459 | ); 460 | 461 | // Cleanup handler 462 | async function cleanup() { 463 | for (const [sessionId, driver] of state.drivers) { 464 | try { 465 | await driver.quit(); 466 | } catch (e) { 467 | console.error(`Error closing browser session ${sessionId}:`, e); 468 | } 469 | } 470 | state.drivers.clear(); 471 | state.currentSession = null; 472 | process.exit(0); 473 | } 474 | 475 | process.on('SIGTERM', cleanup); 476 | process.on('SIGINT', cleanup); 477 | 478 | // Start the server 479 | const transport = new StdioServerTransport(); 480 | await server.connect(transport); --------------------------------------------------------------------------------