├── smithery.yaml
├── package.json
├── .gitignore
├── Dockerfile
├── bin
    └── mcp-selenium.js
├── LICENSE
├── README.md
└── src
    └── lib
        └── server.js


/smithery.yaml:
--------------------------------------------------------------------------------
 1 | # Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml
 2 | 
 3 | startCommand:
 4 |   type: stdio
 5 |   configSchema:
 6 |     # JSON Schema defining the configuration options for the MCP.
 7 |     type: object
 8 |     required: []
 9 |     properties: {}
10 |   commandFunction:
11 |     # A function that produces the CLI command to start the MCP on stdio.
12 |     |-
13 |     (config) => ({command:'node', args:['src/lib/server.js'], env:{}})


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@angiejones/mcp-selenium",
 3 |   "version": "0.1.21",
 4 |   "description": "Selenium WebDriver MCP Server",
 5 |   "type": "module",
 6 |   "main": "src/lib/server.js",
 7 |   "bin": {
 8 |     "mcp-selenium": "./src/lib/server.js"
 9 |   },
10 |   "scripts": {
11 |     "test": "echo \"Error: no test specified\" && exit 1"
12 |   },
13 |   "keywords": [],
14 |   "author": "",
15 |   "license": "ISC",
16 |   "dependencies": {
17 |     "@modelcontextprotocol/sdk": "^1.7.0",
18 |     "selenium-webdriver": "^4.18.1"
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | *.so
 6 | .Python
 7 | build/
 8 | develop-eggs/
 9 | dist/
10 | downloads/
11 | eggs/
12 | .eggs/
13 | lib/
14 | lib64/
15 | parts/
16 | sdist/
17 | var/
18 | wheels/
19 | *.egg-info/
20 | .installed.cfg
21 | *.egg
22 | 
23 | # Virtual Environment
24 | venv/
25 | ENV/
26 | env/
27 | 
28 | # IDE
29 | .idea/
30 | .vscode/
31 | *.swp
32 | *.swo
33 | 
34 | # Node
35 | node_modules/
36 | npm-debug.log*
37 | 
38 | # Misc
39 | .DS_Store
40 | .env
41 | .env.local
42 | .env.*.local
43 | 
44 | # Selenium
45 | geckodriver.log
46 | chromedriver.log
47 | .goose/


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM node:18-alpine
 2 | 
 3 | # Install Chrome and dependencies
 4 | RUN apk update && apk add --no-cache \
 5 |     chromium \
 6 |     chromium-chromedriver \
 7 |     nss \
 8 |     freetype \
 9 |     freetype-dev \
10 |     harfbuzz \
11 |     ca-certificates \
12 |     ttf-freefont \
13 |     udev \
14 |     ttf-opensans \
15 |     chromium-chromedriver
16 | 
17 | # Set Chrome environment variables
18 | ENV CHROME_BIN=/usr/bin/chromium-browser
19 | ENV CHROME_PATH=/usr/lib/chromium/
20 | ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
21 | 
22 | WORKDIR /app
23 | 
24 | # Copy package files
25 | COPY package*.json ./
26 | 
27 | # Install dependencies
28 | RUN npm install
29 | 
30 | # Copy application code
31 | COPY . .
32 | 
33 | # Start the MCP server
34 | CMD ["node", "src/lib/server.js"]


--------------------------------------------------------------------------------
/bin/mcp-selenium.js:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | 
 3 | import { fileURLToPath } from 'url';
 4 | import { dirname, resolve } from 'path';
 5 | import { spawn } from 'child_process';
 6 | 
 7 | const __filename = fileURLToPath(import.meta.url);
 8 | const __dirname = dirname(__filename);
 9 | 
10 | const serverPath = resolve(__dirname, '../src/lib/server.js');
11 | 
12 | // Start the server
13 | const child = spawn('node', [serverPath], {
14 |     stdio: 'inherit'
15 | });
16 | 
17 | child.on('error', (error) => {
18 |     console.error(`Error starting server: ${error.message}`);
19 |     process.exit(1);
20 | });
21 | 
22 | // Handle process termination
23 | process.on('SIGTERM', () => {
24 |     child.kill('SIGTERM');
25 | });
26 | 
27 | process.on('SIGINT', () => {
28 |     child.kill('SIGINT');
29 | });


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Angie Jones
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![MseeP.ai Security Assessment Badge](https://mseep.net/pr/angiejones-mcp-selenium-badge.png)](https://mseep.ai/app/angiejones-mcp-selenium)
  2 | 
  3 | # MCP Selenium Server
  4 | 
  5 | A Model Context Protocol (MCP) server implementation for Selenium WebDriver, enabling browser automation through standardized MCP clients.
  6 | 
  7 | ## Video Demo (Click to Watch)
  8 | 
  9 | [![Watch the video](https://img.youtube.com/vi/mRV0N8hcgYA/sddefault.jpg)](https://youtu.be/mRV0N8hcgYA)
 10 | 
 11 | 
 12 | ## Features
 13 | 
 14 | - Start browser sessions with customizable options
 15 | - Navigate to URLs
 16 | - Find elements using various locator strategies
 17 | - Click, type, and interact with elements
 18 | - Perform mouse actions (hover, drag and drop)
 19 | - Handle keyboard input
 20 | - Take screenshots
 21 | - Upload files
 22 | - Support for headless mode
 23 | 
 24 | ## Supported Browsers
 25 | 
 26 | - Chrome
 27 | - Firefox
 28 | - MS Edge
 29 | 
 30 | ## Use with Goose
 31 | 
 32 | ### Option 1: One-click install
 33 | Copy and paste the link below into a browser address bar to add this extension to goose desktop:
 34 | 
 35 | ```
 36 | goose://extension?cmd=npx&arg=-y&arg=%40angiejones%2Fmcp-selenium&id=selenium-mcp&name=Selenium%20MCP&description=automates%20browser%20interactions
 37 | ```
 38 | 
 39 | 
 40 | ### Option 2: Add manually to desktop or CLI
 41 | 
 42 | * Name: `Selenium MCP`
 43 | * Description: `automates browser interactions`
 44 | * Command: `npx -y @angiejones/mcp-selenium`
 45 | 
 46 | ## Use with other MCP clients (e.g. Claude Desktop, etc)
 47 | ```json
 48 | {
 49 |   "mcpServers": {
 50 |     "selenium": {
 51 |       "command": "npx",
 52 |       "args": ["-y", "@angiejones/mcp-selenium"]
 53 |     }
 54 |   }
 55 | }
 56 | ```
 57 | 
 58 | ---
 59 | 
 60 | ## Development
 61 | 
 62 | To work on this project:
 63 | 
 64 | 1. Clone the repository
 65 | 2. Install dependencies: `npm install`
 66 | 3. Run the server: `npm start`
 67 | 
 68 | ### Installation
 69 | 
 70 | #### Installing via Smithery
 71 | 
 72 | To install MCP Selenium for Claude Desktop automatically via [Smithery](https://smithery.ai/server/@angiejones/mcp-selenium):
 73 | 
 74 | ```bash
 75 | npx -y @smithery/cli install @angiejones/mcp-selenium --client claude
 76 | ```
 77 | 
 78 | #### Manual Installation
 79 | ```bash
 80 | npm install -g @angiejones/mcp-selenium
 81 | ```
 82 | 
 83 | 
 84 | ### Usage
 85 | 
 86 | Start the server by running:
 87 | 
 88 | ```bash
 89 | mcp-selenium
 90 | ```
 91 | 
 92 | Or use with NPX in your MCP configuration:
 93 | 
 94 | ```json
 95 | {
 96 |   "mcpServers": {
 97 |     "selenium": {
 98 |       "command": "npx",
 99 |       "args": [
100 |         "-y",
101 |         "@angiejones/mcp-selenium"
102 |       ]
103 |     }
104 |   }
105 | }
106 | ```
107 | 
108 | 
109 | 
110 | ## Tools
111 | 
112 | ### start_browser
113 | Launches a browser session.
114 | 
115 | **Parameters:**
116 | - `browser` (required): Browser to launch
117 |   - Type: string
118 |   - Enum: ["chrome", "firefox"]
119 | - `options`: Browser configuration options
120 |   - Type: object
121 |   - Properties:
122 |     - `headless`: Run browser in headless mode
123 |       - Type: boolean
124 |     - `arguments`: Additional browser arguments
125 |       - Type: array of strings
126 | 
127 | **Example:**
128 | ```json
129 | {
130 |   "tool": "start_browser",
131 |   "parameters": {
132 |     "browser": "chrome",
133 |     "options": {
134 |       "headless": true,
135 |       "arguments": ["--no-sandbox"]
136 |     }
137 |   }
138 | }
139 | ```
140 | 
141 | ### navigate
142 | Navigates to a URL.
143 | 
144 | **Parameters:**
145 | - `url` (required): URL to navigate to
146 |   - Type: string
147 | 
148 | **Example:**
149 | ```json
150 | {
151 |   "tool": "navigate",
152 |   "parameters": {
153 |     "url": "https://www.example.com"
154 |   }
155 | }
156 | ```
157 | 
158 | ### find_element
159 | Finds an element on the page.
160 | 
161 | **Parameters:**
162 | - `by` (required): Locator strategy
163 |   - Type: string
164 |   - Enum: ["id", "css", "xpath", "name", "tag", "class"]
165 | - `value` (required): Value for the locator strategy
166 |   - Type: string
167 | - `timeout`: Maximum time to wait for element in milliseconds
168 |   - Type: number
169 |   - Default: 10000
170 | 
171 | **Example:**
172 | ```json
173 | {
174 |   "tool": "find_element",
175 |   "parameters": {
176 |     "by": "id",
177 |     "value": "search-input",
178 |     "timeout": 5000
179 |   }
180 | }
181 | ```
182 | 
183 | ### click_element
184 | Clicks an element.
185 | 
186 | **Parameters:**
187 | - `by` (required): Locator strategy
188 |   - Type: string
189 |   - Enum: ["id", "css", "xpath", "name", "tag", "class"]
190 | - `value` (required): Value for the locator strategy
191 |   - Type: string
192 | - `timeout`: Maximum time to wait for element in milliseconds
193 |   - Type: number
194 |   - Default: 10000
195 | 
196 | **Example:**
197 | ```json
198 | {
199 |   "tool": "click_element",
200 |   "parameters": {
201 |     "by": "css",
202 |     "value": ".submit-button"
203 |   }
204 | }
205 | ```
206 | 
207 | ### send_keys
208 | Sends keys to an element (typing).
209 | 
210 | **Parameters:**
211 | - `by` (required): Locator strategy
212 |   - Type: string
213 |   - Enum: ["id", "css", "xpath", "name", "tag", "class"]
214 | - `value` (required): Value for the locator strategy
215 |   - Type: string
216 | - `text` (required): Text to enter into the element
217 |   - Type: string
218 | - `timeout`: Maximum time to wait for element in milliseconds
219 |   - Type: number
220 |   - Default: 10000
221 | 
222 | **Example:**
223 | ```json
224 | {
225 |   "tool": "send_keys",
226 |   "parameters": {
227 |     "by": "name",
228 |     "value": "username",
229 |     "text": "testuser"
230 |   }
231 | }
232 | ```
233 | 
234 | ### get_element_text
235 | Gets the text() of an element.
236 | 
237 | **Parameters:**
238 | - `by` (required): Locator strategy
239 |   - Type: string
240 |   - Enum: ["id", "css", "xpath", "name", "tag", "class"]
241 | - `value` (required): Value for the locator strategy
242 |   - Type: string
243 | - `timeout`: Maximum time to wait for element in milliseconds
244 |   - Type: number
245 |   - Default: 10000
246 | 
247 | **Example:**
248 | ```json
249 | {
250 |   "tool": "get_element_text",
251 |   "parameters": {
252 |     "by": "css",
253 |     "value": ".message"
254 |   }
255 | }
256 | ```
257 | 
258 | ### hover
259 | Moves the mouse to hover over an element.
260 | 
261 | **Parameters:**
262 | - `by` (required): Locator strategy
263 |   - Type: string
264 |   - Enum: ["id", "css", "xpath", "name", "tag", "class"]
265 | - `value` (required): Value for the locator strategy
266 |   - Type: string
267 | - `timeout`: Maximum time to wait for element in milliseconds
268 |   - Type: number
269 |   - Default: 10000
270 | 
271 | **Example:**
272 | ```json
273 | {
274 |   "tool": "hover",
275 |   "parameters": {
276 |     "by": "css",
277 |     "value": ".dropdown-menu"
278 |   }
279 | }
280 | ```
281 | 
282 | ### drag_and_drop
283 | Drags an element and drops it onto another element.
284 | 
285 | **Parameters:**
286 | - `by` (required): Locator strategy for source element
287 |   - Type: string
288 |   - Enum: ["id", "css", "xpath", "name", "tag", "class"]
289 | - `value` (required): Value for the source locator strategy
290 |   - Type: string
291 | - `targetBy` (required): Locator strategy for target element
292 |   - Type: string
293 |   - Enum: ["id", "css", "xpath", "name", "tag", "class"]
294 | - `targetValue` (required): Value for the target locator strategy
295 |   - Type: string
296 | - `timeout`: Maximum time to wait for elements in milliseconds
297 |   - Type: number
298 |   - Default: 10000
299 | 
300 | **Example:**
301 | ```json
302 | {
303 |   "tool": "drag_and_drop",
304 |   "parameters": {
305 |     "by": "id",
306 |     "value": "draggable",
307 |     "targetBy": "id",
308 |     "targetValue": "droppable"
309 |   }
310 | }
311 | ```
312 | 
313 | ### double_click
314 | Performs a double click on an element.
315 | 
316 | **Parameters:**
317 | - `by` (required): Locator strategy
318 |   - Type: string
319 |   - Enum: ["id", "css", "xpath", "name", "tag", "class"]
320 | - `value` (required): Value for the locator strategy
321 |   - Type: string
322 | - `timeout`: Maximum time to wait for element in milliseconds
323 |   - Type: number
324 |   - Default: 10000
325 | 
326 | **Example:**
327 | ```json
328 | {
329 |   "tool": "double_click",
330 |   "parameters": {
331 |     "by": "css",
332 |     "value": ".editable-text"
333 |   }
334 | }
335 | ```
336 | 
337 | ### right_click
338 | Performs a right click (context click) on an element.
339 | 
340 | **Parameters:**
341 | - `by` (required): Locator strategy
342 |   - Type: string
343 |   - Enum: ["id", "css", "xpath", "name", "tag", "class"]
344 | - `value` (required): Value for the locator strategy
345 |   - Type: string
346 | - `timeout`: Maximum time to wait for element in milliseconds
347 |   - Type: number
348 |   - Default: 10000
349 | 
350 | **Example:**
351 | ```json
352 | {
353 |   "tool": "right_click",
354 |   "parameters": {
355 |     "by": "css",
356 |     "value": ".context-menu-trigger"
357 |   }
358 | }
359 | ```
360 | 
361 | ### press_key
362 | Simulates pressing a keyboard key.
363 | 
364 | **Parameters:**
365 | - `key` (required): Key to press (e.g., 'Enter', 'Tab', 'a', etc.)
366 |   - Type: string
367 | 
368 | **Example:**
369 | ```json
370 | {
371 |   "tool": "press_key",
372 |   "parameters": {
373 |     "key": "Enter"
374 |   }
375 | }
376 | ```
377 | 
378 | ### upload_file
379 | Uploads a file using a file input element.
380 | 
381 | **Parameters:**
382 | - `by` (required): Locator strategy
383 |   - Type: string
384 |   - Enum: ["id", "css", "xpath", "name", "tag", "class"]
385 | - `value` (required): Value for the locator strategy
386 |   - Type: string
387 | - `filePath` (required): Absolute path to the file to upload
388 |   - Type: string
389 | - `timeout`: Maximum time to wait for element in milliseconds
390 |   - Type: number
391 |   - Default: 10000
392 | 
393 | **Example:**
394 | ```json
395 | {
396 |   "tool": "upload_file",
397 |   "parameters": {
398 |     "by": "id",
399 |     "value": "file-input",
400 |     "filePath": "/path/to/file.pdf"
401 |   }
402 | }
403 | ```
404 | 
405 | ### take_screenshot
406 | Captures a screenshot of the current page.
407 | 
408 | **Parameters:**
409 | - `outputPath` (optional): Path where to save the screenshot. If not provided, returns base64 data.
410 |   - Type: string
411 | 
412 | **Example:**
413 | ```json
414 | {
415 |   "tool": "take_screenshot",
416 |   "parameters": {
417 |     "outputPath": "/path/to/screenshot.png"
418 |   }
419 | }
420 | ```
421 | 
422 | ### close_session
423 | Closes the current browser session and cleans up resources.
424 | 
425 | **Parameters:**
426 | None required
427 | 
428 | **Example:**
429 | ```json
430 | {
431 |   "tool": "close_session",
432 |   "parameters": {}
433 | }
434 | ```
435 | 
436 | 
437 | ## License
438 | 
439 | MIT
440 | 


--------------------------------------------------------------------------------
/src/lib/server.js:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env node
  2 | 
  3 | import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
  4 | import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
  5 | import { z } from "zod";
  6 | import pkg from 'selenium-webdriver';
  7 | const { Builder, By, Key, until, Actions } = pkg;
  8 | import { Options as ChromeOptions } from 'selenium-webdriver/chrome.js';
  9 | import { Options as FirefoxOptions } from 'selenium-webdriver/firefox.js';
 10 | import { Options as EdgeOptions } from 'selenium-webdriver/edge.js';
 11 | 
 12 | 
 13 | // Create an MCP server
 14 | const server = new McpServer({
 15 |     name: "MCP Selenium",
 16 |     version: "1.0.0"
 17 | });
 18 | 
 19 | // Server state
 20 | const state = {
 21 |     drivers: new Map(),
 22 |     currentSession: null
 23 | };
 24 | 
 25 | // Helper functions
 26 | const getDriver = () => {
 27 |     const driver = state.drivers.get(state.currentSession);
 28 |     if (!driver) {
 29 |         throw new Error('No active browser session');
 30 |     }
 31 |     return driver;
 32 | };
 33 | 
 34 | const getLocator = (by, value) => {
 35 |     switch (by.toLowerCase()) {
 36 |         case 'id': return By.id(value);
 37 |         case 'css': return By.css(value);
 38 |         case 'xpath': return By.xpath(value);
 39 |         case 'name': return By.name(value);
 40 |         case 'tag': return By.css(value);
 41 |         case 'class': return By.className(value);
 42 |         default: throw new Error(`Unsupported locator strategy: ${by}`);
 43 |     }
 44 | };
 45 | 
 46 | // Common schemas
 47 | const browserOptionsSchema = z.object({
 48 |     headless: z.boolean().optional().describe("Run browser in headless mode"),
 49 |     arguments: z.array(z.string()).optional().describe("Additional browser arguments")
 50 | }).optional();
 51 | 
 52 | const locatorSchema = {
 53 |     by: z.enum(["id", "css", "xpath", "name", "tag", "class"]).describe("Locator strategy to find element"),
 54 |     value: z.string().describe("Value for the locator strategy"),
 55 |     timeout: z.number().optional().describe("Maximum time to wait for element in milliseconds")
 56 | };
 57 | 
 58 | // Browser Management Tools
 59 | server.tool(
 60 |     "start_browser",
 61 |     "launches browser",
 62 |     {
 63 |         browser: z.enum(["chrome", "firefox", "edge"]).describe("Browser to launch (chrome or firefox or microsoft edge)"),
 64 |         options: browserOptionsSchema
 65 |     },
 66 |     async ({ browser, options = {} }) => {
 67 |         try {
 68 |             let builder = new Builder();
 69 |             let driver;
 70 |             switch (browser) {
 71 |                 case 'chrome': {
 72 |                     const chromeOptions = new ChromeOptions();
 73 |                     if (options.headless) {
 74 |                         chromeOptions.addArguments('--headless=new');
 75 |                     }
 76 |                     if (options.arguments) {
 77 |                         options.arguments.forEach(arg => chromeOptions.addArguments(arg));
 78 |                     }
 79 |                     driver = await builder
 80 |                         .forBrowser('chrome')
 81 |                         .setChromeOptions(chromeOptions)
 82 |                         .build();
 83 |                     break;
 84 |                 }
 85 |                 case 'edge': {
 86 |                     const edgeOptions = new EdgeOptions();
 87 |                     if (options.headless) {
 88 |                         edgeOptions.addArguments('--headless=new');
 89 |                     }
 90 |                     if (options.arguments) {
 91 |                         options.arguments.forEach(arg => edgeOptions.addArguments(arg));
 92 |                     }
 93 |                     driver = await builder
 94 |                         .forBrowser('edge')
 95 |                         .setEdgeOptions(edgeOptions)
 96 |                         .build();
 97 |                     break;
 98 |                 }
 99 |                 case 'firefox': {
100 |                     const firefoxOptions = new FirefoxOptions();
101 |                     if (options.headless) {
102 |                         firefoxOptions.addArguments('--headless');
103 |                     }
104 |                     if (options.arguments) {
105 |                         options.arguments.forEach(arg => firefoxOptions.addArguments(arg));
106 |                     }
107 |                     driver = await builder
108 |                         .forBrowser('firefox')
109 |                         .setFirefoxOptions(firefoxOptions)
110 |                         .build();
111 |                     break;
112 |                 }
113 |                 default: {
114 |                     throw new Error(`Unsupported browser: ${browser}`);
115 |                 }
116 |             }
117 |             const sessionId = `${browser}_${Date.now()}`;
118 |             state.drivers.set(sessionId, driver);
119 |             state.currentSession = sessionId;
120 | 
121 |             return {
122 |                 content: [{ type: 'text', text: `Browser started with session_id: ${sessionId}` }]
123 |             };
124 |         } catch (e) {
125 |             return {
126 |                 content: [{ type: 'text', text: `Error starting browser: ${e.message}` }]
127 |             };
128 |         }
129 |     }
130 | );
131 | 
132 | server.tool(
133 |     "navigate",
134 |     "navigates to a URL",
135 |     {
136 |         url: z.string().describe("URL to navigate to")
137 |     },
138 |     async ({ url }) => {
139 |         try {
140 |             const driver = getDriver();
141 |             await driver.get(url);
142 |             return {
143 |                 content: [{ type: 'text', text: `Navigated to ${url}` }]
144 |             };
145 |         } catch (e) {
146 |             return {
147 |                 content: [{ type: 'text', text: `Error navigating: ${e.message}` }]
148 |             };
149 |         }
150 |     }
151 | );
152 | 
153 | // Element Interaction Tools
154 | server.tool(
155 |     "find_element",
156 |     "finds an element",
157 |     {
158 |         ...locatorSchema
159 |     },
160 |     async ({ by, value, timeout = 10000 }) => {
161 |         try {
162 |             const driver = getDriver();
163 |             const locator = getLocator(by, value);
164 |             await driver.wait(until.elementLocated(locator), timeout);
165 |             return {
166 |                 content: [{ type: 'text', text: 'Element found' }]
167 |             };
168 |         } catch (e) {
169 |             return {
170 |                 content: [{ type: 'text', text: `Error finding element: ${e.message}` }]
171 |             };
172 |         }
173 |     }
174 | );
175 | 
176 | server.tool(
177 |     "click_element",
178 |     "clicks an element",
179 |     {
180 |         ...locatorSchema
181 |     },
182 |     async ({ by, value, timeout = 10000 }) => {
183 |         try {
184 |             const driver = getDriver();
185 |             const locator = getLocator(by, value);
186 |             const element = await driver.wait(until.elementLocated(locator), timeout);
187 |             await element.click();
188 |             return {
189 |                 content: [{ type: 'text', text: 'Element clicked' }]
190 |             };
191 |         } catch (e) {
192 |             return {
193 |                 content: [{ type: 'text', text: `Error clicking element: ${e.message}` }]
194 |             };
195 |         }
196 |     }
197 | );
198 | 
199 | server.tool(
200 |     "send_keys",
201 |     "sends keys to an element, aka typing",
202 |     {
203 |         ...locatorSchema,
204 |         text: z.string().describe("Text to enter into the element")
205 |     },
206 |     async ({ by, value, text, timeout = 10000 }) => {
207 |         try {
208 |             const driver = getDriver();
209 |             const locator = getLocator(by, value);
210 |             const element = await driver.wait(until.elementLocated(locator), timeout);
211 |             await element.clear();
212 |             await element.sendKeys(text);
213 |             return {
214 |                 content: [{ type: 'text', text: `Text "${text}" entered into element` }]
215 |             };
216 |         } catch (e) {
217 |             return {
218 |                 content: [{ type: 'text', text: `Error entering text: ${e.message}` }]
219 |             };
220 |         }
221 |     }
222 | );
223 | 
224 | server.tool(
225 |     "get_element_text",
226 |     "gets the text() of an element",
227 |     {
228 |         ...locatorSchema
229 |     },
230 |     async ({ by, value, timeout = 10000 }) => {
231 |         try {
232 |             const driver = getDriver();
233 |             const locator = getLocator(by, value);
234 |             const element = await driver.wait(until.elementLocated(locator), timeout);
235 |             const text = await element.getText();
236 |             return {
237 |                 content: [{ type: 'text', text }]
238 |             };
239 |         } catch (e) {
240 |             return {
241 |                 content: [{ type: 'text', text: `Error getting element text: ${e.message}` }]
242 |             };
243 |         }
244 |     }
245 | );
246 | 
247 | server.tool(
248 |     "hover",
249 |     "moves the mouse to hover over an element",
250 |     {
251 |         ...locatorSchema
252 |     },
253 |     async ({ by, value, timeout = 10000 }) => {
254 |         try {
255 |             const driver = getDriver();
256 |             const locator = getLocator(by, value);
257 |             const element = await driver.wait(until.elementLocated(locator), timeout);
258 |             const actions = driver.actions({ bridge: true });
259 |             await actions.move({ origin: element }).perform();
260 |             return {
261 |                 content: [{ type: 'text', text: 'Hovered over element' }]
262 |             };
263 |         } catch (e) {
264 |             return {
265 |                 content: [{ type: 'text', text: `Error hovering over element: ${e.message}` }]
266 |             };
267 |         }
268 |     }
269 | );
270 | 
271 | server.tool(
272 |     "drag_and_drop",
273 |     "drags an element and drops it onto another element",
274 |     {
275 |         ...locatorSchema,
276 |         targetBy: z.enum(["id", "css", "xpath", "name", "tag", "class"]).describe("Locator strategy to find target element"),
277 |         targetValue: z.string().describe("Value for the target locator strategy")
278 |     },
279 |     async ({ by, value, targetBy, targetValue, timeout = 10000 }) => {
280 |         try {
281 |             const driver = getDriver();
282 |             const sourceLocator = getLocator(by, value);
283 |             const targetLocator = getLocator(targetBy, targetValue);
284 |             const sourceElement = await driver.wait(until.elementLocated(sourceLocator), timeout);
285 |             const targetElement = await driver.wait(until.elementLocated(targetLocator), timeout);
286 |             const actions = driver.actions({ bridge: true });
287 |             await actions.dragAndDrop(sourceElement, targetElement).perform();
288 |             return {
289 |                 content: [{ type: 'text', text: 'Drag and drop completed' }]
290 |             };
291 |         } catch (e) {
292 |             return {
293 |                 content: [{ type: 'text', text: `Error performing drag and drop: ${e.message}` }]
294 |             };
295 |         }
296 |     }
297 | );
298 | 
299 | server.tool(
300 |     "double_click",
301 |     "performs a double click on an element",
302 |     {
303 |         ...locatorSchema
304 |     },
305 |     async ({ by, value, timeout = 10000 }) => {
306 |         try {
307 |             const driver = getDriver();
308 |             const locator = getLocator(by, value);
309 |             const element = await driver.wait(until.elementLocated(locator), timeout);
310 |             const actions = driver.actions({ bridge: true });
311 |             await actions.doubleClick(element).perform();
312 |             return {
313 |                 content: [{ type: 'text', text: 'Double click performed' }]
314 |             };
315 |         } catch (e) {
316 |             return {
317 |                 content: [{ type: 'text', text: `Error performing double click: ${e.message}` }]
318 |             };
319 |         }
320 |     }
321 | );
322 | 
323 | server.tool(
324 |     "right_click",
325 |     "performs a right click (context click) on an element",
326 |     {
327 |         ...locatorSchema
328 |     },
329 |     async ({ by, value, timeout = 10000 }) => {
330 |         try {
331 |             const driver = getDriver();
332 |             const locator = getLocator(by, value);
333 |             const element = await driver.wait(until.elementLocated(locator), timeout);
334 |             const actions = driver.actions({ bridge: true });
335 |             await actions.contextClick(element).perform();
336 |             return {
337 |                 content: [{ type: 'text', text: 'Right click performed' }]
338 |             };
339 |         } catch (e) {
340 |             return {
341 |                 content: [{ type: 'text', text: `Error performing right click: ${e.message}` }]
342 |             };
343 |         }
344 |     }
345 | );
346 | 
347 | server.tool(
348 |     "press_key",
349 |     "simulates pressing a keyboard key",
350 |     {
351 |         key: z.string().describe("Key to press (e.g., 'Enter', 'Tab', 'a', etc.)")
352 |     },
353 |     async ({ key }) => {
354 |         try {
355 |             const driver = getDriver();
356 |             const actions = driver.actions({ bridge: true });
357 |             await actions.keyDown(key).keyUp(key).perform();
358 |             return {
359 |                 content: [{ type: 'text', text: `Key '${key}' pressed` }]
360 |             };
361 |         } catch (e) {
362 |             return {
363 |                 content: [{ type: 'text', text: `Error pressing key: ${e.message}` }]
364 |             };
365 |         }
366 |     }
367 | );
368 | 
369 | server.tool(
370 |     "upload_file",
371 |     "uploads a file using a file input element",
372 |     {
373 |         ...locatorSchema,
374 |         filePath: z.string().describe("Absolute path to the file to upload")
375 |     },
376 |     async ({ by, value, filePath, timeout = 10000 }) => {
377 |         try {
378 |             const driver = getDriver();
379 |             const locator = getLocator(by, value);
380 |             const element = await driver.wait(until.elementLocated(locator), timeout);
381 |             await element.sendKeys(filePath);
382 |             return {
383 |                 content: [{ type: 'text', text: 'File upload initiated' }]
384 |             };
385 |         } catch (e) {
386 |             return {
387 |                 content: [{ type: 'text', text: `Error uploading file: ${e.message}` }]
388 |             };
389 |         }
390 |     }
391 | );
392 | 
393 | server.tool(
394 |     "take_screenshot",
395 |     "captures a screenshot of the current page",
396 |     {
397 |         outputPath: z.string().optional().describe("Optional path where to save the screenshot. If not provided, returns base64 data.")
398 |     },
399 |     async ({ outputPath }) => {
400 |         try {
401 |             const driver = getDriver();
402 |             const screenshot = await driver.takeScreenshot();
403 |             if (outputPath) {
404 |                 const fs = await import('fs');
405 |                 await fs.promises.writeFile(outputPath, screenshot, 'base64');
406 |                 return {
407 |                     content: [{ type: 'text', text: `Screenshot saved to ${outputPath}` }]
408 |                 };
409 |             } else {
410 |                 return {
411 |                     content: [
412 |                         { type: 'text', text: 'Screenshot captured as base64:' },
413 |                         { type: 'text', text: screenshot }
414 |                     ]
415 |                 };
416 |             }
417 |         } catch (e) {
418 |             return {
419 |                 content: [{ type: 'text', text: `Error taking screenshot: ${e.message}` }]
420 |             };
421 |         }
422 |     }
423 | );
424 | 
425 | server.tool(
426 |     "close_session",
427 |     "closes the current browser session",
428 |     {},
429 |     async () => {
430 |         try {
431 |             const driver = getDriver();
432 |             await driver.quit();
433 |             state.drivers.delete(state.currentSession);
434 |             const sessionId = state.currentSession;
435 |             state.currentSession = null;
436 |             return {
437 |                 content: [{ type: 'text', text: `Browser session ${sessionId} closed` }]
438 |             };
439 |         } catch (e) {
440 |             return {
441 |                 content: [{ type: 'text', text: `Error closing session: ${e.message}` }]
442 |             };
443 |         }
444 |     }
445 | );
446 | 
447 | // Resources
448 | server.resource(
449 |     "browser-status",
450 |     new ResourceTemplate("browser-status://current"),
451 |     async (uri) => ({
452 |         contents: [{
453 |             uri: uri.href,
454 |             text: state.currentSession
455 |                 ? `Active browser session: ${state.currentSession}`
456 |                 : "No active browser session"
457 |         }]
458 |     })
459 | );
460 | 
461 | // Cleanup handler
462 | async function cleanup() {
463 |     for (const [sessionId, driver] of state.drivers) {
464 |         try {
465 |             await driver.quit();
466 |         } catch (e) {
467 |             console.error(`Error closing browser session ${sessionId}:`, e);
468 |         }
469 |     }
470 |     state.drivers.clear();
471 |     state.currentSession = null;
472 |     process.exit(0);
473 | }
474 | 
475 | process.on('SIGTERM', cleanup);
476 | process.on('SIGINT', cleanup);
477 | 
478 | // Start the server
479 | const transport = new StdioServerTransport();
480 | await server.connect(transport);


--------------------------------------------------------------------------------