├── .github └── workflows │ ├── ci.yml │ └── publish.yml ├── .gitignore ├── .npmignore ├── LICENSE ├── PLAN.md ├── README.md ├── SECURITY.md ├── cli.js ├── eslint.config.mjs ├── index.d.ts ├── index.js ├── package-lock.json ├── package.json ├── playwright.config.ts ├── pnpm-lock.yaml ├── src ├── context.ts ├── httpServer.ts ├── index.ts ├── program.ts ├── resources │ ├── console.ts │ └── resource.ts ├── server.ts └── tools │ ├── common.ts │ ├── extractContent.ts │ ├── files.ts │ ├── install.ts │ ├── keyboard.ts │ ├── navigate.ts │ ├── pdf.ts │ ├── screen.ts │ ├── snapshot.ts │ ├── tabs.ts │ ├── tool.ts │ └── utils.ts ├── tests ├── basic.spec.ts ├── capabilities.spec.ts ├── cdp.spec.ts ├── console.spec.ts ├── fixtures.ts ├── httpApi.spec.ts ├── iframes.spec.ts ├── launch.spec.ts ├── pdf.spec.ts ├── sse.spec.ts └── tabs.spec.ts ├── tsconfig.json └── utils └── copyright.js /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | build-and-test: 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - uses: actions/checkout@v4 15 | 16 | - name: Use Node.js 18 17 | uses: actions/setup-node@v4 18 | with: 19 | node-version: '18' 20 | cache: 'npm' 21 | 22 | - name: Install dependencies 23 | run: npm ci 24 | 25 | - name: Run linting 26 | run: npm run lint 27 | 28 | - name: Build 29 | run: npm run build 30 | 31 | - name: Install Playwright browsers 32 | run: npx playwright install --with-deps 33 | 34 | - name: Run tests 35 | run: npm test 36 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish 2 | on: 3 | release: 4 | types: [published] 5 | jobs: 6 | publish-npm: 7 | runs-on: ubuntu-latest 8 | permissions: 9 | contents: read 10 | id-token: write 11 | steps: 12 | - uses: actions/checkout@v4 13 | - uses: actions/setup-node@v4 14 | with: 15 | node-version: 18 16 | registry-url: https://registry.npmjs.org/ 17 | - run: npm ci 18 | - run: npm run build 19 | - run: npm run lint 20 | - run: npm run test 21 | - run: npm publish --provenance 22 | env: 23 | NODE_AUTH_TOKEN: ${{secrets.NPM_TOKEN}} 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | lib/ 2 | node_modules/ 3 | test-results/ 4 | 5 | 6 | dify-docs/ 7 | .aider* 8 | .prompt 9 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | **/* 2 | README.md 3 | LICENSE 4 | !lib/**/*.js 5 | !cli.js 6 | !index.* 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Portions Copyright (c) Microsoft Corporation. 190 | Portions Copyright 2017 Google Inc. 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /PLAN.md: -------------------------------------------------------------------------------- 1 | # Playwright MCP 改造计划 2 | 3 | ## 项目背景 4 | 5 | Playwright MCP 是一个 Model Context Protocol (MCP) 服务器,提供基于 Playwright 的浏览器自动化能力。目前,该服务器支持两种通信方式: 6 | 7 | 1. **Stdio 传输**:通过标准输入/输出进行通信(默认模式) 8 | 2. **SSE 传输**:当使用 `--port` 参数时,启动一个基于 HTTP 的 Server-Sent Events (SSE) 服务 9 | 10 | ## 改造目标 11 | 12 | 本次改造的主要目标是: 13 | 14 | 1. **增加标准 HTTP API**:实现一个基于请求/响应模式的 HTTP API,允许客户端通过发送单个 HTTP 请求来执行 MCP 工具,并直接在 HTTP 响应中接收结果 15 | 2. **保持现有功能**:确保现有的 Stdio 和 SSE 传输方式继续正常工作 16 | 17 | ## 技术方案 18 | 19 | ### 1. 命令行参数 20 | 21 | 添加一个新的命令行参数 `--http-port `,用于指定 HTTP API 的监听端口: 22 | 23 | ``` 24 | npx playwright-mcp-bypass@latest --http-port 8080 25 | ``` 26 | 27 | ### 2. HTTP API 设计 28 | 29 | #### 端点结构 30 | 31 | - 基本路径:`/tools/{tool_name}` 32 | - 例如: 33 | - `/tools/browser_navigate` 34 | - `/tools/browser_click` 35 | - `/tools/browser_type` 36 | 37 | #### HTTP 方法 38 | 39 | - 主要使用 `POST` 方法执行工具操作 40 | - 可选:对于只读操作(如 `browser_tab_list`)可以支持 `GET` 方法 41 | 42 | #### 请求格式 43 | 44 | - Content-Type: `application/json` 45 | - 请求体:包含工具所需的参数,格式为 JSON 46 | 47 | 示例(调用 `browser_navigate`): 48 | ```json 49 | { 50 | "url": "https://example.com" 51 | } 52 | ``` 53 | 54 | 示例(调用 `browser_click`): 55 | ```json 56 | { 57 | "element": "Login Button", 58 | "ref": "button#login" 59 | } 60 | ``` 61 | 62 | #### 响应格式 63 | 64 | - Content-Type: `application/json` 65 | - 成功响应(HTTP 200): 66 | ```json 67 | { 68 | "success": true, 69 | "result": { ... } // 工具执行结果 70 | } 71 | ``` 72 | - 错误响应(HTTP 4xx/5xx): 73 | ```json 74 | { 75 | "success": false, 76 | "error": "错误信息" 77 | } 78 | ``` 79 | 80 | ### 3. 实现步骤 81 | 82 | #### 3.1 修改 `src/program.ts` 83 | 84 | 1. 添加新的命令行选项 `--http-port `: 85 | ```typescript 86 | .option('--http-port ', 'Port to listen on for HTTP API.') 87 | ``` 88 | 89 | 2. 引入 Koa.js 及相关中间件: 90 | ```typescript 91 | import Koa from 'koa'; 92 | import Router from '@koa/router'; 93 | import bodyParser from 'koa-bodyparser'; 94 | import cors from '@koa/cors'; 95 | import http from 'http'; // 仍然需要 http 来创建服务器实例 96 | ``` 97 | 98 | 3. 创建新的函数 `startHttpServer`,使用 Koa.js: 99 | ```typescript 100 | async function startHttpServer(port: number, serverList: ServerList) { 101 | // 会话管理 102 | const sessions = new Map(); 103 | const sessionTimers = new Map(); // 用于会话超时 104 | const sessionTimeout = 30 * 60 * 1000; // 30 分钟 105 | 106 | const app = new Koa(); 107 | const router = new Router(); 108 | 109 | // 中间件 110 | app.use(cors({ 111 | allowHeaders: ['Content-Type', 'Session-Id'], // 允许 Session-Id 头 112 | exposeHeaders: [], // 根据需要暴露头 113 | })); 114 | app.use(bodyParser()); 115 | 116 | // 会话处理中间件 117 | app.use(async (ctx, next) => { 118 | const sessionId = ctx.get('session-id') || 'default'; 119 | let server = sessions.get(sessionId); 120 | 121 | if (!server) { 122 | console.log(`Creating new session: ${sessionId}`); 123 | server = await serverList.create(); 124 | sessions.set(sessionId, server); 125 | } else { 126 | console.log(`Reusing session: ${sessionId}`); 127 | } 128 | 129 | // 更新会话超时 130 | if (sessionTimers.has(sessionId)) { 131 | clearTimeout(sessionTimers.get(sessionId)!); 132 | } 133 | const timer = setTimeout(async () => { 134 | console.log(`Session timed out: ${sessionId}`); 135 | const serverToClose = sessions.get(sessionId); 136 | if (serverToClose) { 137 | await serverList.close(serverToClose); 138 | sessions.delete(sessionId); 139 | } 140 | sessionTimers.delete(sessionId); 141 | }, sessionTimeout); 142 | sessionTimers.set(sessionId, timer); 143 | 144 | ctx.state.server = server; // 将 server 实例传递给后续中间件/路由 145 | ctx.state.sessionId = sessionId; 146 | await next(); 147 | }); 148 | 149 | // 路由定义 150 | router.post('/tools/:toolName', async (ctx) => { 151 | const { toolName } = ctx.params; 152 | const params = ctx.request.body || {}; 153 | const server: Server = ctx.state.server; 154 | 155 | try { 156 | // 可选:检查工具是否存在 157 | const tools = await server.listTools(); 158 | const toolExists = tools.tools.some(tool => tool.name === toolName); 159 | if (!toolExists) { 160 | ctx.status = 404; 161 | ctx.body = { success: false, error: `Tool "${toolName}" not found` }; 162 | return; 163 | } 164 | 165 | console.log(`Calling tool "${toolName}" for session ${ctx.state.sessionId} with params:`, params); 166 | const result = await server.callTool(toolName, params); 167 | ctx.status = 200; 168 | ctx.body = { success: true, result }; 169 | } catch (error: any) { 170 | console.error(`Error calling tool "${toolName}" for session ${ctx.state.sessionId}:`, error); 171 | ctx.status = 500; 172 | ctx.body = { success: false, error: String(error.message || error) }; 173 | } 174 | }); 175 | 176 | // 特殊处理 GET /tools/browser_tab_list 177 | router.get('/tools/browser_tab_list', async (ctx) => { 178 | const server: Server = ctx.state.server; 179 | try { 180 | console.log(`Calling tool "browser_tab_list" for session ${ctx.state.sessionId}`); 181 | const result = await server.callTool('browser_tab_list', {}); 182 | ctx.status = 200; 183 | ctx.body = { success: true, result }; 184 | } catch (error: any) { 185 | console.error(`Error calling tool "browser_tab_list" for session ${ctx.state.sessionId}:`, error); 186 | ctx.status = 500; 187 | ctx.body = { success: false, error: String(error.message || error) }; 188 | } 189 | }); 190 | 191 | app.use(router.routes()).use(router.allowedMethods()); 192 | 193 | // 启动服务器 194 | const httpServer = http.createServer(app.callback()); 195 | 196 | httpServer.listen(port, () => { 197 | console.log(`HTTP API server listening on port ${port}`); 198 | }); 199 | 200 | // 添加优雅关闭处理 201 | process.on('SIGINT', async () => { 202 | console.log('Closing HTTP server...'); 203 | httpServer.close(); 204 | 205 | // 清理所有会话 206 | for (const [sessionId, server] of sessions.entries()) { 207 | console.log(`Closing session: ${sessionId}`); 208 | await serverList.close(server); 209 | } 210 | sessions.clear(); 211 | 212 | // 清理所有定时器 213 | for (const timer of sessionTimers.values()) { 214 | clearTimeout(timer); 215 | } 216 | sessionTimers.clear(); 217 | 218 | process.exit(0); 219 | }); 220 | } 221 | ``` 222 | 223 | 4. 在 `program.ts` 的 action 回调中添加对 `--http-port` 的处理: 224 | ```typescript 225 | if (options.httpPort) { 226 | startHttpServer(+options.httpPort, serverList); 227 | } else if (options.port) { 228 | startSSEServer(+options.port, serverList); 229 | } else { 230 | const server = await serverList.create(); 231 | await server.connect(new StdioServerTransport()); 232 | } 233 | ``` 234 | 235 | #### 3.2 实现 HTTP 请求处理 (使用 Koa.js) 236 | 237 | 在 `startHttpServer` 函数中,我们使用 Koa.js 及其路由和中间件来处理请求: 238 | 239 | 1. **Koa 实例创建**:`const app = new Koa();` 240 | 2. **中间件使用**: 241 | - `cors()`: 处理跨域请求。 242 | - `bodyParser()`: 解析 POST 请求的 JSON 或表单数据。 243 | - **自定义会话中间件**: 244 | - 从 `ctx.get('session-id')` 获取会话 ID。 245 | - 使用 `sessions` Map 获取或创建 `Server` 实例。 246 | - 更新会话超时定时器 (`sessionTimers`)。 247 | - 将 `server` 实例和 `sessionId` 存储在 `ctx.state` 中,以便后续路由访问。 248 | 3. **路由定义 (`@koa/router`)**: 249 | - `router.post('/tools/:toolName', ...)`: 处理工具调用请求。 250 | - 从 `ctx.params` 获取 `toolName`。 251 | - 从 `ctx.request.body` 获取参数。 252 | - 从 `ctx.state.server` 获取 `Server` 实例。 253 | - 调用 `server.callTool(toolName, params)`。 254 | - 根据结果设置 `ctx.status` 和 `ctx.body`。 255 | - `router.get('/tools/browser_tab_list', ...)`: 处理特定的 GET 请求。 256 | 4. **启动服务器**: 257 | - `const httpServer = http.createServer(app.callback());` 258 | - `httpServer.listen(port, ...)` 259 | 260 | #### 3.3 错误处理 261 | 262 | Koa 的错误处理通常通过 `try...catch` 块或专门的错误处理中间件完成。在我们的路由处理函数中: 263 | 264 | 1. 无效的 URL 路径:返回 404 Not Found 265 | 2. 无效的 JSON 格式:返回 400 Bad Request 266 | 3. 工具执行错误:返回 500 Internal Server Error 267 | 4. 不支持的 HTTP 方法:返回 405 Method Not Allowed 268 | 269 | 可以进一步增强错误处理: 270 | 271 | ```typescript 272 | // 在 server.callTool 之前添加工具存在性检查 273 | const tools = await server.listTools(); 274 | const toolExists = tools.tools.some(tool => tool.name === toolName); 275 | if (!toolExists) { 276 | res.statusCode = 404; 277 | res.setHeader('Content-Type', 'application/json'); 278 | res.end(JSON.stringify({ success: false, error: `Tool "${toolName}" not found` })); 279 | return; 280 | } 281 | ``` 282 | 283 | #### 3.4 会话管理 (使用 Koa 中间件) 284 | 285 | 我们使用一个自定义的 Koa 中间件来处理会话: 286 | 287 | ```typescript 288 | // 在 startHttpServer 函数中 289 | const sessions = new Map(); 290 | const sessionTimers = new Map(); 291 | const sessionTimeout = 30 * 60 * 1000; // 30 分钟 292 | 293 | app.use(async (ctx, next) => { 294 | const sessionId = ctx.get('session-id') || 'default'; 295 | let server = sessions.get(sessionId); 296 | 297 | if (!server) { 298 | console.log(`Creating new session: ${sessionId}`); 299 | server = await serverList.create(); 300 | sessions.set(sessionId, server); 301 | } else { 302 | console.log(`Reusing session: ${sessionId}`); 303 | } 304 | 305 | // 更新会话超时 306 | if (sessionTimers.has(sessionId)) { 307 | clearTimeout(sessionTimers.get(sessionId)!); 308 | } 309 | const timer = setTimeout(async () => { 310 | console.log(`Session timed out: ${sessionId}`); 311 | const serverToClose = sessions.get(sessionId); 312 | if (serverToClose) { 313 | await serverList.close(serverToClose); 314 | sessions.delete(sessionId); 315 | } 316 | sessionTimers.delete(sessionId); 317 | }, sessionTimeout); 318 | sessionTimers.set(sessionId, timer); 319 | 320 | ctx.state.server = server; // 传递给后续处理 321 | ctx.state.sessionId = sessionId; 322 | await next(); // 调用下一个中间件或路由 323 | }); 324 | ``` 325 | 326 | 优雅关闭处理(`process.on('SIGINT', ...)`)保持不变,确保在服务器停止时正确关闭所有浏览器实例和清理资源。 327 | 328 | ### 4. 依赖项 329 | 330 | 需要安装以下依赖项: 331 | 332 | ```bash 333 | pnpm add koa @koa/router koa-bodyparser @koa/cors 334 | # 同时需要安装它们的类型定义(如果使用 TypeScript) 335 | pnpm add -D @types/koa @types/koa__router @types/koa-bodyparser @types/koa__cors 336 | ``` 337 | 338 | ### 5. 文档更新 339 | 340 | 更新 `README.md`,添加以下内容: 341 | 342 | 1. 新的命令行参数 `--http-port` 的说明 343 | 2. HTTP API 的使用方法和示例 344 | 3. 会话管理的说明 345 | 4. 与现有 SSE 传输的区别 346 | 347 | ## 调用示例 348 | 349 | ### 使用 curl 350 | 351 | ```bash 352 | # 导航到指定 URL 353 | curl -X POST http://localhost:8080/tools/browser_navigate \ 354 | -H "Content-Type: application/json" \ 355 | -d '{ 356 | "url": "https://example.com" 357 | }' 358 | 359 | # 点击元素 360 | curl -X POST http://localhost:8080/tools/browser_click \ 361 | -H "Content-Type: application/json" \ 362 | -d '{ 363 | "element": "Login Button", 364 | "ref": "button#login" 365 | }' 366 | 367 | # 在输入框中输入文本 368 | curl -X POST http://localhost:8080/tools/browser_type \ 369 | -H "Content-Type: application/json" \ 370 | -d '{ 371 | "element": "Username Input", 372 | "ref": "input#username", 373 | "text": "myUsername", 374 | "submit": false 375 | }' 376 | 377 | # 获取标签列表 378 | curl -X GET http://localhost:8080/tools/browser_tab_list 379 | ``` 380 | 381 | ### 使用 JavaScript 382 | 383 | ```javascript 384 | // 导航到指定 URL 385 | fetch('http://localhost:8080/tools/browser_navigate', { 386 | method: 'POST', 387 | headers: { 388 | 'Content-Type': 'application/json' 389 | }, 390 | body: JSON.stringify({ 391 | url: 'https://example.com' 392 | }) 393 | }) 394 | .then(response => response.json()) 395 | .then(data => console.log(data)); 396 | ``` 397 | 398 | ### 使用 Python 399 | 400 | ```python 401 | import requests 402 | import json 403 | 404 | # 导航到指定 URL 405 | response = requests.post( 406 | 'http://localhost:8080/tools/browser_navigate', 407 | headers={'Content-Type': 'application/json'}, 408 | data=json.dumps({'url': 'https://example.com'}) 409 | ) 410 | print(response.json()) 411 | ``` 412 | 413 | ## 技术架构图 414 | 415 | ``` 416 | +------------------+ HTTP Request +-------------------------+ 417 | | | ------------------------> | | 418 | | HTTP Client | | Playwright MCP Server | 419 | | (curl, script) | <------------------------ | (with HTTP API) | 420 | | | HTTP Response | | 421 | +------------------+ +-------------------------+ 422 | | 423 | | Controls 424 | v 425 | +-------------------------+ 426 | | | 427 | | Browser Instance | 428 | | (Chrome, Firefox) | 429 | | | 430 | +-------------------------+ 431 | ``` 432 | 433 | ## 技术实现细节 (Koa) 434 | 435 | ### Koa 中间件流程 436 | 437 | 1. **CORS 中间件**: 处理跨域请求头。 438 | 2. **Body Parser 中间件**: 解析请求体 (`ctx.request.body`)。 439 | 3. **会话管理中间件**: 440 | - 获取 `session-id`。 441 | - 获取或创建 `Server` 实例。 442 | - 更新会话超时。 443 | - 将 `server` 和 `sessionId` 存入 `ctx.state`。 444 | - 调用 `next()`。 445 | 4. **Router 中间件**: 446 | - 匹配路由 (`/tools/:toolName`)。 447 | - 执行对应的路由处理函数。 448 | - 路由处理函数从 `ctx.state` 获取 `server`,从 `ctx.params` 获取 `toolName`,从 `ctx.request.body` 获取参数。 449 | - 调用 `server.callTool()`。 450 | - 设置响应 `ctx.status` 和 `ctx.body`。 451 | 452 | ### Server 类扩展 453 | 454 | 为了支持 HTTP API,我们需要在 `Server` 类中添加一个便捷方法来调用工具(这部分与原计划相同): 455 | 456 | ```typescript 457 | // 在 src/server.ts 中扩展 Server 类 458 | Server.prototype.callTool = async function(name: string, args: any) { 459 | const result = await this.handleRequest({ 460 | jsonrpc: '2.0', 461 | id: String(Date.now()), 462 | method: 'callTool', 463 | params: { 464 | name, 465 | arguments: args 466 | } 467 | }); 468 | 469 | if (result.error) { 470 | throw new Error(result.error.message); 471 | } 472 | 473 | return result.result; 474 | }; 475 | ``` 476 | 477 | ### 工具调用流程 478 | 479 | 当通过 HTTP API 调用工具时,完整的流程如下: 480 | 481 | 1. 客户端发送 HTTP 请求到 `/tools/{tool_name}` 482 | 2. HTTP 服务器解析请求,提取工具名称和参数 483 | 3. 服务器根据会话 ID 获取或创建 `Server` 实例 484 | 4. 服务器调用 `server.callTool(toolName, params)` 485 | 5. `callTool` 方法创建一个 MCP 请求并调用 `server.handleRequest` 486 | 6. `handleRequest` 方法将请求分发给相应的请求处理程序(在这里是 `CallToolRequestSchema` 处理程序) 487 | 7. 请求处理程序查找匹配的工具并调用其 `handle` 方法 488 | 8. 工具的 `handle` 方法使用 `Context` 实例执行操作(如导航、点击等) 489 | 9. 结果返回给客户端 490 | 491 | ### 会话管理详解 (Koa 中间件) 492 | 493 | 会话管理通过自定义的 Koa 中间件实现: 494 | 495 | 1. **会话标识**:从请求头 `ctx.get('session-id')` 读取。 496 | 2. **会话存储**:使用 `sessions` Map。 497 | 3. **会话创建/复用**:在中间件中处理。 498 | 4. **会话超时**:使用 `sessionTimers` Map 和 `setTimeout` 实现。每次访问时重置定时器。 499 | 5. **会话传递**:通过 `ctx.state.server` 将 `Server` 实例传递给路由处理函数。 500 | 6. **会话清理**:通过 `process.on('SIGINT', ...)` 处理。 501 | 502 | ## 后续详细步骤及验证方法 503 | 504 | ### 阶段一:基础框架搭建与验证 505 | 506 | 1. **安装依赖项** 507 | * **操作**: 执行 `pnpm add koa @koa/router koa-bodyparser @koa/cors && pnpm add -D @types/koa @types/koa__router @types/koa-bodyparser @types/koa__cors` 508 | * **验证**: 509 | * 检查 `package.json` 的 `dependencies` 和 `devDependencies` 是否包含新添加的包。 510 | * 检查 `pnpm-lock.yaml` 文件是否已更新。 511 | * 运行 `pnpm install` 确保没有报错。 512 | 513 | 2. **修改 `src/program.ts`** 514 | * **操作**: 515 | * 使用 `commander` 添加 `--http-port ` 选项。 516 | * 导入 `startHttpServer` 函数 (稍后创建)。 517 | * 在 `program.action` 的逻辑中,添加对 `options.httpPort` 的判断,如果存在则调用 `startHttpServer`。 518 | * **验证**: 519 | * 运行 `node cli.js --help`,检查输出是否包含 `--http-port` 选项。 520 | * (暂时无法完全验证,需等待 `startHttpServer` 实现) 尝试运行 `node cli.js --http-port 8080`,预期不应立即报错(可能因 `startHttpServer` 未定义而失败,这是正常的)。 521 | 522 | 3. **创建并实现 `src/httpServer.ts` (基础结构)** 523 | * **操作**: 524 | * 创建新文件 `src/httpServer.ts`。 525 | * 实现 `startHttpServer` 函数的基本框架: 526 | * 导入 `Koa`, `Router`, `bodyParser`, `cors`, `http`。 527 | * 创建 Koa 实例 (`app`) 和 Router 实例 (`router`)。 528 | * 使用 `cors()` 和 `bodyParser()` 中间件。 529 | * 添加一个临时的根路由 (`router.get('/', ctx => { ctx.body = 'OK'; })`) 用于测试。 530 | * 应用路由 (`app.use(router.routes()).use(router.allowedMethods())`)。 531 | * 创建 HTTP 服务器 (`http.createServer(app.callback())`)。 532 | * 启动服务器监听指定端口,并打印日志。 533 | * 在 `src/program.ts` 中正确导入 `startHttpServer`。 534 | * **验证**: 535 | * 运行 `node cli.js --http-port 8080`。 536 | * 检查控制台是否输出 "HTTP API server listening on port 8080"。 537 | * 使用 `curl http://localhost:8080/`,预期收到 "OK"。 538 | * 使用 `curl -X OPTIONS http://localhost:8080/` -v,检查响应头是否包含正确的 CORS 头 (e.g., `Access-Control-Allow-Origin: *`)。 539 | * 使用 `curl -X POST http://localhost:8080/ -H "Content-Type: application/json" -d '{"test":1}'`,预期不会报错(即使路由不存在,body-parser 应该能处理)。 540 | 541 | ### 阶段二:核心功能实现与验证 542 | 543 | 4. **扩展 `Server` 类添加 `callTool` 方法** 544 | * **操作**: 545 | * 打开 `src/server.ts`。 546 | * 在 `Server` 类或其原型上添加 `async callTool(name: string, args: any)` 方法,实现如 `PLAN.md` 中所示的逻辑(构造 MCP 请求,调用 `handleRequest`,处理结果/错误)。 547 | * **验证**: 548 | * **单元测试**: 编写针对 `Server.prototype.callTool` 的单元测试。模拟 `handleRequest` 方法,测试 `callTool` 在不同输入(有效工具名/参数,无效工具名,`handleRequest` 返回错误等)下的行为。运行单元测试并确保通过。 549 | 550 | 5. **实现会话管理中间件** 551 | * **操作**: 552 | * 在 `src/httpServer.ts` 的 `startHttpServer` 函数中,在 `bodyParser` 之后、路由之前,添加会话管理中间件 (`app.use(async (ctx, next) => { ... })`)。 553 | * 实现中间件逻辑:获取 `session-id`,管理 `sessions` Map 和 `sessionTimers` Map,创建/复用 `Server` 实例,更新超时,将 `server` 和 `sessionId` 存入 `ctx.state`。 554 | * **验证**: 555 | * 运行 `node cli.js --http-port 8080`。 556 | * **首次请求**: 使用 `curl http://localhost:8080/` (或其他已定义的路由),检查服务器日志是否输出 "Creating new session: default"。 557 | * **带 ID 的首次请求**: 使用 `curl -H "Session-Id: test1234" http://localhost:8080/`,检查日志是否输出 "Creating new session: test1234"。 558 | * **会话复用**: 再次发送相同 `Session-Id` 的请求 (`curl -H "Session-Id: test1234" http://localhost:8080/`),检查日志是否输出 "Reusing session: test1234"。 559 | * **会话超时**: (需要将 `sessionTimeout` 临时调小,例如 5 秒) 发送一个请求,等待超过超时时间,检查日志是否输出 "Session timed out: ..."。再次发送相同 `Session-Id` 的请求,检查日志是否输出 "Creating new session: ..."。 560 | 561 | 6. **实现工具调用路由 (`POST /tools/:toolName`)** 562 | * **操作**: 563 | * 在 `src/httpServer.ts` 中,移除临时根路由,添加 `router.post('/tools/:toolName', async (ctx) => { ... })`。 564 | * 实现路由处理逻辑:从 `ctx.params`, `ctx.request.body`, `ctx.state` 获取所需信息,调用 `ctx.state.server.callTool()`,处理成功/错误响应。 565 | * (可选) 添加工具存在性检查。 566 | * **验证**: 567 | * 运行 `node cli.js --http-port 8080`。 568 | * **调用有效工具 (无参数)**: `curl -X POST http://localhost:8080/tools/browser_snapshot`,预期收到 `{"success":true, "result":{...}}` (具体 result 取决于 snapshot 内容)。 569 | * **调用有效工具 (带参数)**: `curl -X POST -H "Content-Type: application/json" -d '{"url":"about:blank"}' http://localhost:8080/tools/browser_navigate`,预期收到 `{"success":true, "result":null}` 或类似成功响应。 570 | * **调用无效工具**: `curl -X POST http://localhost:8080/tools/invalid_tool_name`,预期收到 `{"success":false, "error":"Tool \"invalid_tool_name\" not found"}` (如果做了检查) 或其他 500 错误。 571 | * **调用带无效参数**: `curl -X POST -H "Content-Type: application/json" -d '{"invalid_param":"foo"}' http://localhost:8080/tools/browser_navigate`,预期收到 `{"success":false, "error":"..."}` (具体的错误信息取决于 Playwright 或工具本身的校验)。 572 | * **使用会话**: 573 | * `curl -H "Session-Id: nav-test" -X POST -H "Content-Type: application/json" -d '{"url":"https://example.com"}' http://localhost:8080/tools/browser_navigate` 574 | * `curl -H "Session-Id: nav-test" -X POST http://localhost:8080/tools/browser_snapshot` (检查快照是否为 example.com) 575 | 576 | 7. **实现特定路由 (`GET /tools/browser_tab_list`)** 577 | * **操作**: 在 `src/httpServer.ts` 中添加 `router.get('/tools/browser_tab_list', async (ctx) => { ... })`。 578 | * **验证**: 579 | * 运行 `node cli.js --http-port 8080`。 580 | * `curl http://localhost:8080/tools/browser_tab_list`,预期收到 `{"success":true, "result":{ "tabs": [...] }}`。 581 | * (可选) 使用 POST 调用 `browser_tab_new` 创建新标签页,然后再次 GET `browser_tab_list` 验证列表是否更新。 582 | 583 | 8. **实现优雅关闭 (`SIGINT` 处理)** 584 | * **操作**: 在 `src/httpServer.ts` 的 `startHttpServer` 中添加 `process.on('SIGINT', ...)` 逻辑,确保关闭 HTTP 服务器、清理所有会话和定时器。 585 | * **验证**: 586 | * 运行 `node cli.js --http-port 8080`。 587 | * 创建几个会话 (使用不同 `Session-Id` 发送请求)。 588 | * 按 `Ctrl+C` 终止服务器。 589 | * 检查服务器日志是否输出 "Closing HTTP server..." 以及每个活动会话的 "Closing session: ..." 日志。 590 | * 检查进程是否正常退出 (退出码 0)。 591 | 592 | ### 阶段三:测试与文档 593 | 594 | 9. **编写集成测试** 595 | * **操作**: 596 | * 在 `tests/` 目录下创建新的测试文件,例如 `tests/httpApi.spec.ts`。 597 | * 使用测试框架 (如 Playwright Test 自带的) 和 HTTP 请求库 (如 `node-fetch` 或 `axios`) 编写测试用例。 598 | * 测试用例应覆盖: 599 | * 启动带 `--http-port` 的服务器。 600 | * 调用各种工具 (GET 和 POST)。 601 | * 验证成功和失败的响应。 602 | * 测试会话管理(使用不同 `Session-Id`)。 603 | * 测试错误处理 (无效工具、无效参数)。 604 | * **验证**: 运行 `pnpm test` (或具体的测试命令),确保所有 HTTP API 相关测试用例通过。 605 | 606 | 10. **更新文档 (`README.md`)** 607 | * **操作**: 608 | * 添加关于 `--http-port` 命令行参数的说明。 609 | * 添加 HTTP API 的使用方法:端点、请求/响应格式、会话管理 (`Session-Id` 头)。 610 | * 提供 `curl`、JavaScript (`fetch`)、Python (`requests`) 的调用示例。 611 | * 说明与 SSE 传输的区别。 612 | * **验证**: 人工审阅 `README.md`,确保信息准确、清晰、完整,示例可运行。 613 | 614 | ### 阶段四:发布 615 | 616 | 11. **准备发布** 617 | * **操作**: 618 | * 确保所有代码已提交,并且所有测试通过。 619 | * 更新 `package.json` 中的 `version` 字段。 620 | * (可选) 更新 `CHANGELOG.md`。 621 | * **验证**: 622 | * 检查 `git status` 是否干净。 623 | * 确认 `pnpm test` 通过。 624 | * 检查 `package.json` 中的版本号。 625 | 626 | 12. **发布到 npm** 627 | * **操作**: 运行 `pnpm publish` (可能需要先登录 npm)。 628 | * **验证**: 在 npmjs.com 上检查新版本是否已发布成功。尝试使用 `npx playwright-mcp-bypass@ --http-port 8080` 运行新版本。 629 | ## 当前进度 630 | 631 | **当前阶段**: 阶段一:基础框架搭建与验证 632 | **当前步骤**: 1. 安装依赖项 (待开始) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## playwright-mcp-bypass 2 | 3 | A Model Context Protocol (MCP) server that provides browser automation capabilities using [Playwright](https://playwright.dev), forked to enhance anti-bot detection bypass. This server enables LLMs to interact with web pages through structured accessibility snapshots. 4 | 5 | ### Key Features 6 | 7 | - **Fast and lightweight**: Uses Playwright's accessibility tree, not pixel-based input. 8 | - **LLM-friendly**: No vision models needed, operates purely on structured data. 9 | - **Deterministic tool application**: Avoids ambiguity common with screenshot-based approaches. 10 | 11 | ### Project Information 12 | 13 | This project (`playwright-mcp-bypass`) is maintained by [yan5xu](https://x.com/yan5xu) and is available at [yan5xu/playwright-mcp-bypass](https://github.com/yan5xu/playwright-mcp-bypass). 14 | 15 | It originated as a fork of `microsoft/playwright-mcp` with the primary goal of enhancing the ability to bypass anti-bot detection mechanisms employed by some websites. This is achieved by adding the `--disable-blink-features=AutomationControlled` argument to the browser launch options, making the automated browser appear more like a regular user's browser. 16 | ### Use Cases 17 | 18 | - Web navigation and form-filling 19 | - Data extraction from structured content 20 | - Automated testing driven by LLMs 21 | - General-purpose browser interaction for agents 22 | 23 | ### Example config 24 | 25 | ```js 26 | { 27 | "mcpServers": { 28 | "playwright-mcp-bypass": { 29 | "command": "npx", 30 | "args": [ 31 | "playwright-mcp-bypass@latest" 32 | ] 33 | } 34 | } 35 | } 36 | ``` 37 | 38 | 39 | #### Installation in VS Code 40 | 41 | Install the Playwright MCP server in VS Code using one of these buttons: 42 | 43 | 50 | 51 | [Install in VS Code](https://insiders.vscode.dev/redirect?url=vscode%3Amcp%2Finstall%3F%257B%2522name%2522%253A%2522playwright-mcp-bypass%2522%252C%2522command%2522%253A%2522npx%2522%252C%2522args%2522%253A%255B%2522-y%2522%252C%2522playwright-mcp-bypass%2540latest%2522%255D%257D) [Install in VS Code Insiders](https://insiders.vscode.dev/redirect?url=vscode-insiders%3Amcp%2Finstall%3F%257B%2522name%2522%253A%2522playwright-mcp-bypass%2522%252C%2522command%2522%253A%2522npx%2522%252C%2522args%2522%253A%255B%2522-y%2522%252C%2522playwright-mcp-bypass%2540latest%2522%255D%257D) 52 | 53 | Alternatively, you can install the Playwright MCP server using the VS Code CLI: 54 | 55 | ```bash 56 | # For VS Code 57 | code --add-mcp '{"name":"playwright-mcp-bypass","command":"npx","args":["playwright-mcp-bypass@latest"]}' 58 | ``` 59 | 60 | ```bash 61 | # For VS Code Insiders 62 | code-insiders --add-mcp '{"name":"playwright-mcp-bypass","command":"npx","args":["playwright-mcp-bypass@latest"]}' 63 | ``` 64 | 65 | After installation, the Playwright MCP server will be available for use with your GitHub Copilot agent in VS Code. 66 | 67 | ### CLI Options 68 | 69 | The Playwright MCP server supports the following command-line options: 70 | 71 | - `--browser `: Browser or chrome channel to use. Possible values: 72 | - `chrome`, `firefox`, `webkit`, `msedge` 73 | - Chrome channels: `chrome-beta`, `chrome-canary`, `chrome-dev` 74 | - Edge channels: `msedge-beta`, `msedge-canary`, `msedge-dev` 75 | - Default: `chrome` 76 | - `--caps `: Comma-separated list of capabilities to enable, possible values: tabs, pdf, history, wait, files, install. Default is all. 77 | - `--cdp-endpoint `: CDP endpoint to connect to 78 | - `--executable-path `: Path to the browser executable 79 | - `--headless`: Run browser in headless mode (headed by default) 80 | - `--port `: Port to listen on for SSE transport 81 | - `--http-port `: Port to listen on for the request/response HTTP API. 82 | - `--user-data-dir `: Path to the user data directory 83 | - `--vision`: Run server that uses screenshots (Aria snapshots are used by default) 84 | 85 | ### User data directory 86 | 87 | Playwright MCP will launch the browser with the new profile, located at 88 | 89 | ``` 90 | - `%USERPROFILE%\AppData\Local\ms-playwright\mcp-chrome-profile` on Windows 91 | - `~/Library/Caches/ms-playwright/mcp-chrome-profile` on macOS 92 | - `~/.cache/ms-playwright/mcp-chrome-profile` on Linux 93 | ``` 94 | 95 | All the logged in information will be stored in that profile, you can delete it between sessions if you'd like to clear the offline state. 96 | 97 | 98 | ### Running headless browser (Browser without GUI). 99 | 100 | This mode is useful for background or batch operations. 101 | 102 | ```js 103 | { 104 | "mcpServers": { 105 | "playwright-mcp-bypass": { 106 | "command": "npx", 107 | "args": [ 108 | "playwright-mcp-bypass@latest", 109 | "--headless" 110 | ] 111 | } 112 | } 113 | } 114 | ``` 115 | 116 | ### Running headed browser on Linux w/o DISPLAY 117 | 118 | When running headed browser on system w/o display or from worker processes of the IDEs, 119 | run the MCP server from environment with the DISPLAY and pass the `--port` flag to enable SSE transport. 120 | 121 | ```bash 122 | npx playwright-mcp-bypass@latest --port 8931 123 | ``` 124 | 125 | And then in MCP client config, set the `url` to the SSE endpoint: 126 | 127 | ```js 128 | { 129 | "mcpServers": { 130 | "playwright-mcp-bypass": { 131 | "url": "http://localhost:8931/sse" 132 | } 133 | } 134 | } 135 | ### HTTP API Usage (Request/Response) 136 | 137 | In addition to the default Stdio transport and the SSE transport (`--port`), this server provides a standard HTTP API for request/response interactions. This is useful for clients that prefer simple HTTP calls over persistent connections. 138 | 139 | #### Enabling the HTTP API 140 | 141 | To enable the HTTP API, use the `--http-port` command-line option: 142 | 143 | ```bash 144 | npx playwright-mcp-bypass@latest --http-port 8080 145 | ``` 146 | 147 | The server will then listen on the specified port (e.g., 8080) for incoming HTTP requests. 148 | 149 | #### Endpoints 150 | 151 | - **Base Path**: `/tools/{tool_name}` 152 | - **Method**: 153 | - `POST`: Used for executing most tools. Tool parameters are sent in the JSON request body. 154 | - `GET`: Can be used for specific read-only tools like `browser_tab_list`. No request body is needed. 155 | - **Examples**: 156 | - `POST /tools/browser_navigate` 157 | - `POST /tools/browser_click` 158 | - `GET /tools/browser_tab_list` 159 | 160 | #### Request Format (POST) 161 | 162 | - **Headers**: 163 | - `Content-Type: application/json` 164 | - `Session-Id: ` (Optional, see Session Management) 165 | - **Body**: A JSON object containing the parameters required by the specific tool. 166 | 167 | Example (`browser_navigate`): 168 | ```json 169 | { 170 | "url": "https://example.com" 171 | } 172 | ``` 173 | 174 | Example (`browser_click`): 175 | ```json 176 | { 177 | "element": "Login Button", 178 | "ref": "button#login" 179 | } 180 | ``` 181 | 182 | #### Response Format 183 | 184 | - **Content-Type**: `application/json` 185 | - **Success (HTTP 200)**: 186 | ```json 187 | { 188 | "success": true, 189 | "result": { ... } // The result returned by the tool execution 190 | } 191 | ``` 192 | - **Error (HTTP 4xx/5xx)**: 193 | ```json 194 | { 195 | "success": false, 196 | "error": "Error message describing the failure" 197 | } 198 | ``` 199 | 200 | #### Session Management 201 | 202 | The HTTP API manages browser state using sessions. Each session corresponds to an independent browser instance with its own context (unless a global `--user-data-dir` is specified). 203 | 204 | - **Session ID**: Sessions are identified by the `Session-Id` HTTP header in the request. 205 | - **Default Session**: If the `Session-Id` header is not provided, a default session named `"default"` is used. 206 | - **Session Creation**: A new browser instance is automatically created when a request with a previously unseen `Session-Id` (or no ID for the default session) is received. 207 | - **Session Reuse**: Subsequent requests with the same `Session-Id` will reuse the existing browser instance for that session. 208 | - **Session Timeout**: Sessions automatically time out and close after 30 minutes of inactivity to conserve resources. Any request to an active session resets the timer. 209 | - **User Data Directory**: By default, each session gets its own isolated user data directory (e.g., `~/.cache/ms-playwright/mcp-chromium-profile-`). If you specify `--user-data-dir` when starting the server, *all* HTTP sessions will share that single directory, which can lead to conflicts and is generally not recommended for concurrent sessions. 210 | 211 | #### Comparison with SSE Transport (`--port`) 212 | 213 | - **SSE (`--port`)**: Establishes a persistent connection per client. State (browser instance) is tied to the connection lifetime. Communication is typically streaming (server sends events). 214 | - **HTTP API (`--http-port`)**: Uses standard request/response cycles. State is managed via the `Session-Id` header and has a timeout. Simpler for clients that don't need persistent connections. 215 | 216 | #### Examples 217 | 218 | ##### curl 219 | 220 | ```bash 221 | # Navigate (uses default session if Session-Id header is omitted) 222 | curl -X POST http://localhost:8080/tools/browser_navigate \ 223 | -H "Content-Type: application/json" \ 224 | -d '{ "url": "https://example.com" }' 225 | 226 | # Click an element in a specific session 227 | curl -X POST http://localhost:8080/tools/browser_click \ 228 | -H "Content-Type: application/json" \ 229 | -H "Session-Id: my-session-123" \ 230 | -d '{ "element": "Login Button", "ref": "button#login" }' 231 | 232 | # Get tab list (GET request, uses default session) 233 | curl http://localhost:8080/tools/browser_tab_list 234 | 235 | # Get tab list for a specific session 236 | curl -H "Session-Id: my-session-123" http://localhost:8080/tools/browser_tab_list 237 | ``` 238 | 239 | ##### JavaScript (fetch) 240 | 241 | ```javascript 242 | // Navigate in default session 243 | fetch('http://localhost:8080/tools/browser_navigate', { 244 | method: 'POST', 245 | headers: { 246 | 'Content-Type': 'application/json' 247 | }, 248 | body: JSON.stringify({ url: 'https://example.com' }) 249 | }) 250 | .then(response => response.json()) 251 | .then(data => console.log(data)); 252 | 253 | // Type text in a specific session 254 | fetch('http://localhost:8080/tools/browser_type', { 255 | method: 'POST', 256 | headers: { 257 | 'Content-Type': 'application/json', 258 | 'Session-Id': 'user-abc-session' 259 | }, 260 | body: JSON.stringify({ 261 | element: "Search Input", 262 | ref: "input[name='q']", 263 | text: "Playwright MCP" 264 | }) 265 | }) 266 | .then(response => response.json()) 267 | .then(data => console.log(data)); 268 | 269 | // Get tab list for a specific session 270 | fetch('http://localhost:8080/tools/browser_tab_list', { 271 | headers: { 272 | 'Session-Id': 'user-abc-session' 273 | } 274 | }) 275 | .then(response => response.json()) 276 | .then(data => console.log(data)); 277 | ``` 278 | 279 | ##### Python (requests) 280 | 281 | ```python 282 | import requests 283 | import json 284 | 285 | base_url = 'http://localhost:8080/tools' 286 | session_id = 'python-session-456' 287 | 288 | # Navigate in a specific session 289 | headers = { 290 | 'Content-Type': 'application/json', 291 | 'Session-Id': session_id 292 | } 293 | payload = {'url': 'https://github.com'} 294 | response = requests.post(f'{base_url}/browser_navigate', headers=headers, data=json.dumps(payload)) 295 | print(response.json()) 296 | 297 | # Get tab list for the same session 298 | headers_no_content = {'Session-Id': session_id} 299 | response = requests.get(f'{base_url}/browser_tab_list', headers=headers_no_content) 300 | print(response.json()) 301 | ``` 302 | ``` 303 | 304 | ### Tool Modes 305 | 306 | The tools are available in two modes: 307 | 308 | 1. **Snapshot Mode** (default): Uses accessibility snapshots for better performance and reliability 309 | 2. **Vision Mode**: Uses screenshots for visual-based interactions 310 | 311 | To use Vision Mode, add the `--vision` flag when starting the server: 312 | 313 | ```js 314 | { 315 | "mcpServers": { 316 | "playwright-mcp-bypass": { 317 | "command": "npx", 318 | "args": [ 319 | "playwright-mcp-bypass@latest", 320 | "--vision" 321 | ] 322 | } 323 | } 324 | } 325 | ``` 326 | 327 | Vision Mode works best with the computer use models that are able to interact with elements using 328 | X Y coordinate space, based on the provided screenshot. 329 | 330 | ### Programmatic usage with custom transports 331 | 332 | ```js 333 | import { createServer } from '@playwright/mcp'; 334 | 335 | // ... 336 | 337 | const server = createServer({ 338 | launchOptions: { headless: true } 339 | }); 340 | transport = new SSEServerTransport("/messages", res); 341 | server.connect(transport); 342 | ``` 343 | 344 | ### Snapshot-based Interactions 345 | 346 | - **browser_click** 347 | - Description: Perform click on a web page 348 | - Parameters: 349 | - `element` (string): Human-readable element description used to obtain permission to interact with the element 350 | - `ref` (string): Exact target element reference from the page snapshot 351 | 352 | - **browser_hover** 353 | - Description: Hover over element on page 354 | - Parameters: 355 | - `element` (string): Human-readable element description used to obtain permission to interact with the element 356 | - `ref` (string): Exact target element reference from the page snapshot 357 | 358 | - **browser_drag** 359 | - Description: Perform drag and drop between two elements 360 | - Parameters: 361 | - `startElement` (string): Human-readable source element description used to obtain permission to interact with the element 362 | - `startRef` (string): Exact source element reference from the page snapshot 363 | - `endElement` (string): Human-readable target element description used to obtain permission to interact with the element 364 | - `endRef` (string): Exact target element reference from the page snapshot 365 | 366 | - **browser_type** 367 | - Description: Type text into editable element 368 | - Parameters: 369 | - `element` (string): Human-readable element description used to obtain permission to interact with the element 370 | - `ref` (string): Exact target element reference from the page snapshot 371 | - `text` (string): Text to type into the element 372 | - `submit` (boolean, optional): Whether to submit entered text (press Enter after) 373 | - `slowly` (boolean, optional): Whether to type one character at a time. Useful for triggering key handlers in the page. By default entire text is filled in at once. 374 | 375 | - **browser_select_option** 376 | - Description: Select an option in a dropdown 377 | - Parameters: 378 | - `element` (string): Human-readable element description used to obtain permission to interact with the element 379 | - `ref` (string): Exact target element reference from the page snapshot 380 | - `values` (array): Array of values to select in the dropdown. This can be a single value or multiple values. 381 | 382 | - **browser_snapshot** 383 | - Description: Capture accessibility snapshot of the current page, this is better than screenshot 384 | - Parameters: None 385 | 386 | - **browser_take_screenshot** 387 | - Description: Take a screenshot of the current page. You can't perform actions based on the screenshot, use browser_snapshot for actions. 388 | - Parameters: 389 | - `raw` (boolean, optional): Whether to return without compression (in PNG format). Default is false, which returns a JPEG image. 390 | 391 | ### Vision-based Interactions 392 | 393 | - **browser_screen_move_mouse** 394 | - Description: Move mouse to a given position 395 | - Parameters: 396 | - `element` (string): Human-readable element description used to obtain permission to interact with the element 397 | - `x` (number): X coordinate 398 | - `y` (number): Y coordinate 399 | 400 | - **browser_screen_capture** 401 | - Description: Take a screenshot of the current page 402 | - Parameters: None 403 | 404 | - **browser_screen_click** 405 | - Description: Click left mouse button 406 | - Parameters: 407 | - `element` (string): Human-readable element description used to obtain permission to interact with the element 408 | - `x` (number): X coordinate 409 | - `y` (number): Y coordinate 410 | 411 | - **browser_screen_drag** 412 | - Description: Drag left mouse button 413 | - Parameters: 414 | - `element` (string): Human-readable element description used to obtain permission to interact with the element 415 | - `startX` (number): Start X coordinate 416 | - `startY` (number): Start Y coordinate 417 | - `endX` (number): End X coordinate 418 | - `endY` (number): End Y coordinate 419 | 420 | - **browser_screen_type** 421 | - Description: Type text 422 | - Parameters: 423 | - `text` (string): Text to type 424 | - `submit` (boolean, optional): Whether to submit entered text (press Enter after) 425 | 426 | - **browser_press_key** 427 | - Description: Press a key on the keyboard 428 | - Parameters: 429 | - `key` (string): Name of the key to press or a character to generate, such as `ArrowLeft` or `a` 430 | 431 | ### Tab Management 432 | 433 | - **browser_tab_list** 434 | - Description: List browser tabs 435 | - Parameters: None 436 | 437 | - **browser_tab_new** 438 | - Description: Open a new tab 439 | - Parameters: 440 | - `url` (string, optional): The URL to navigate to in the new tab. If not provided, the new tab will be blank. 441 | 442 | - **browser_tab_select** 443 | - Description: Select a tab by index 444 | - Parameters: 445 | - `index` (number): The index of the tab to select 446 | 447 | - **browser_tab_close** 448 | - Description: Close a tab 449 | - Parameters: 450 | - `index` (number, optional): The index of the tab to close. Closes current tab if not provided. 451 | 452 | ### Navigation 453 | 454 | - **browser_navigate** 455 | - Description: Navigate to a URL 456 | - Parameters: 457 | - `url` (string): The URL to navigate to 458 | 459 | - **browser_navigate_back** 460 | - Description: Go back to the previous page 461 | - Parameters: None 462 | 463 | - **browser_navigate_forward** 464 | - Description: Go forward to the next page 465 | - Parameters: None 466 | 467 | ### Keyboard 468 | 469 | - **browser_press_key** 470 | - Description: Press a key on the keyboard 471 | - Parameters: 472 | - `key` (string): Name of the key to press or a character to generate, such as `ArrowLeft` or `a` 473 | 474 | ### Files and Media 475 | 476 | - **browser_file_upload** 477 | - Description: Choose one or multiple files to upload 478 | - Parameters: 479 | - `paths` (array): The absolute paths to the files to upload. Can be a single file or multiple files. 480 | 481 | - **browser_pdf_save** 482 | - Description: Save page as PDF 483 | - Parameters: None 484 | 485 | ### Utilities 486 | 487 | - **browser_wait** 488 | - Description: Wait for a specified time in seconds 489 | - Parameters: 490 | - `time` (number): The time to wait in seconds (capped at 10 seconds) 491 | 492 | - **browser_close** 493 | - Description: Close the page 494 | - Parameters: None 495 | 496 | - **browser_install** 497 | - Description: Install the browser specified in the config. Call this if you get an error about the browser not being installed. 498 | - Parameters: None 499 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /cli.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | /** 3 | * Copyright (c) Microsoft Corporation. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | require('./lib/program'); 19 | -------------------------------------------------------------------------------- /eslint.config.mjs: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import typescriptEslint from "@typescript-eslint/eslint-plugin"; 18 | import tsParser from "@typescript-eslint/parser"; 19 | import notice from "eslint-plugin-notice"; 20 | import path from "path"; 21 | import { fileURLToPath } from "url"; 22 | import stylistic from "@stylistic/eslint-plugin"; 23 | import importRules from "eslint-plugin-import"; 24 | 25 | const __filename = fileURLToPath(import.meta.url); 26 | const __dirname = path.dirname(__filename); 27 | 28 | const plugins = { 29 | "@stylistic": stylistic, 30 | "@typescript-eslint": typescriptEslint, 31 | notice, 32 | import: importRules, 33 | }; 34 | 35 | export const baseRules = { 36 | "@typescript-eslint/no-unused-vars": [ 37 | 2, 38 | { args: "none", caughtErrors: "none" }, 39 | ], 40 | 41 | /** 42 | * Enforced rules 43 | */ 44 | // syntax preferences 45 | "object-curly-spacing": ["error", "always"], 46 | quotes: [ 47 | 2, 48 | "single", 49 | { 50 | avoidEscape: true, 51 | allowTemplateLiterals: true, 52 | }, 53 | ], 54 | "jsx-quotes": [2, "prefer-single"], 55 | "no-extra-semi": 2, 56 | "@stylistic/semi": [2], 57 | "comma-style": [2, "last"], 58 | "wrap-iife": [2, "inside"], 59 | "spaced-comment": [ 60 | 2, 61 | "always", 62 | { 63 | markers: ["*"], 64 | }, 65 | ], 66 | eqeqeq: [2], 67 | "accessor-pairs": [ 68 | 2, 69 | { 70 | getWithoutSet: false, 71 | setWithoutGet: false, 72 | }, 73 | ], 74 | "brace-style": [2, "1tbs", { allowSingleLine: true }], 75 | curly: [2, "multi-or-nest", "consistent"], 76 | "new-parens": 2, 77 | "arrow-parens": [2, "as-needed"], 78 | "prefer-const": 2, 79 | "quote-props": [2, "consistent"], 80 | "nonblock-statement-body-position": [2, "below"], 81 | 82 | // anti-patterns 83 | "no-var": 2, 84 | "no-with": 2, 85 | "no-multi-str": 2, 86 | "no-caller": 2, 87 | "no-implied-eval": 2, 88 | "no-labels": 2, 89 | "no-new-object": 2, 90 | "no-octal-escape": 2, 91 | "no-self-compare": 2, 92 | "no-shadow-restricted-names": 2, 93 | "no-cond-assign": 2, 94 | "no-debugger": 2, 95 | "no-dupe-keys": 2, 96 | "no-duplicate-case": 2, 97 | "no-empty-character-class": 2, 98 | "no-unreachable": 2, 99 | "no-unsafe-negation": 2, 100 | radix: 2, 101 | "valid-typeof": 2, 102 | "no-implicit-globals": [2], 103 | "no-unused-expressions": [ 104 | 2, 105 | { allowShortCircuit: true, allowTernary: true, allowTaggedTemplates: true }, 106 | ], 107 | "no-proto": 2, 108 | 109 | // es2015 features 110 | "require-yield": 2, 111 | "template-curly-spacing": [2, "never"], 112 | 113 | // spacing details 114 | "space-infix-ops": 2, 115 | "space-in-parens": [2, "never"], 116 | "array-bracket-spacing": [2, "never"], 117 | "comma-spacing": [2, { before: false, after: true }], 118 | "keyword-spacing": [2, "always"], 119 | "space-before-function-paren": [ 120 | 2, 121 | { 122 | anonymous: "never", 123 | named: "never", 124 | asyncArrow: "always", 125 | }, 126 | ], 127 | "no-whitespace-before-property": 2, 128 | "keyword-spacing": [ 129 | 2, 130 | { 131 | overrides: { 132 | if: { after: true }, 133 | else: { after: true }, 134 | for: { after: true }, 135 | while: { after: true }, 136 | do: { after: true }, 137 | switch: { after: true }, 138 | return: { after: true }, 139 | }, 140 | }, 141 | ], 142 | "arrow-spacing": [ 143 | 2, 144 | { 145 | after: true, 146 | before: true, 147 | }, 148 | ], 149 | "@stylistic/func-call-spacing": 2, 150 | "@stylistic/type-annotation-spacing": 2, 151 | 152 | // file whitespace 153 | "no-multiple-empty-lines": [2, { max: 2, maxEOF: 0 }], 154 | "no-mixed-spaces-and-tabs": 2, 155 | "no-trailing-spaces": 2, 156 | "linebreak-style": [process.platform === "win32" ? 0 : 2, "unix"], 157 | indent: [ 158 | 2, 159 | 2, 160 | { SwitchCase: 1, CallExpression: { arguments: 2 }, MemberExpression: 2 }, 161 | ], 162 | "key-spacing": [ 163 | 2, 164 | { 165 | beforeColon: false, 166 | }, 167 | ], 168 | "eol-last": 2, 169 | 170 | // copyright 171 | "notice/notice": [ 172 | 2, 173 | { 174 | mustMatch: "Copyright", 175 | templateFile: path.join(__dirname, "utils", "copyright.js"), 176 | }, 177 | ], 178 | 179 | // react 180 | "react/react-in-jsx-scope": 0, 181 | }; 182 | 183 | const languageOptions = { 184 | parser: tsParser, 185 | ecmaVersion: 9, 186 | sourceType: "module", 187 | }; 188 | 189 | export default [ 190 | { 191 | ignores: ["**/*.js"], 192 | }, 193 | { 194 | files: ["**/*.ts", "**/*.tsx"], 195 | plugins, 196 | languageOptions, 197 | rules: baseRules, 198 | }, 199 | ]; 200 | -------------------------------------------------------------------------------- /index.d.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | /** 3 | * Copyright (c) Microsoft Corporation. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | import type { LaunchOptions } from 'playwright'; 19 | import type { Server } from '@modelcontextprotocol/sdk/server/index.js'; 20 | 21 | type ToolCapability = 'core' | 'tabs' | 'pdf' | 'history' | 'wait' | 'files' | 'install'; 22 | 23 | type Options = { 24 | /** 25 | * Path to the user data directory. 26 | */ 27 | userDataDir?: string; 28 | 29 | /** 30 | * Launch options for the browser. 31 | */ 32 | launchOptions?: LaunchOptions; 33 | 34 | /** 35 | * Use screenshots instead of snapshots. Less accurate, reliable and overall 36 | * slower, but contains visual representation of the page. 37 | * @default false 38 | */ 39 | vision?: boolean; 40 | 41 | /** 42 | * Capabilities to enable. 43 | */ 44 | capabilities?: ToolCapability[]; 45 | }; 46 | 47 | export function createServer(options?: Options): Server; 48 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | /** 3 | * Copyright (c) Microsoft Corporation. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | const { createServer } = require('./lib/index'); 19 | module.exports = { createServer }; 20 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "playwright-mcp-bypass", 3 | "version": "0.0.10", 4 | "description": "Playwright Tools for MCP", 5 | "repository": { 6 | "type": "git", 7 | "url": "git+https://github.com/yan5xu/playwright-mcp-bypass.git" 8 | }, 9 | "homepage": "https://github.com/yan5xu/playwright-mcp-bypass", 10 | "engines": { 11 | "node": ">=18" 12 | }, 13 | "author": { 14 | "name": "yan5xu" 15 | }, 16 | "license": "Apache-2.0", 17 | "scripts": { 18 | "build": "tsc", 19 | "lint": "eslint .", 20 | "watch": "tsc --watch", 21 | "test": "playwright test", 22 | "clean": "rm -rf lib", 23 | "npm-publish": "npm run clean && npm run build && npm run test && npm publish" 24 | }, 25 | "exports": { 26 | "./package.json": "./package.json", 27 | ".": { 28 | "types": "./index.d.ts", 29 | "default": "./index.js" 30 | } 31 | }, 32 | "dependencies": { 33 | "@koa/cors": "^5.0.0", 34 | "@koa/router": "^13.1.0", 35 | "@modelcontextprotocol/sdk": "^1.6.1", 36 | "@mozilla/readability": "^0.6.0", 37 | "commander": "^13.1.0", 38 | "jsdom": "^26.1.0", 39 | "koa": "^2.16.1", 40 | "koa-bodyparser": "^4.4.1", 41 | "playwright": "^1.52.0-alpha-1743163434000", 42 | "turndown": "^7.2.0", 43 | "yaml": "^2.7.1", 44 | "zod": "^3.24.2", 45 | "zod-to-json-schema": "^3.24.4" 46 | }, 47 | "devDependencies": { 48 | "@eslint/eslintrc": "^3.2.0", 49 | "@eslint/js": "^9.19.0", 50 | "@playwright/test": "^1.52.0-alpha-1743163434000", 51 | "@stylistic/eslint-plugin": "^3.0.1", 52 | "@types/jsdom": "^21.1.7", 53 | "@types/koa": "^2.15.0", 54 | "@types/koa-bodyparser": "^4.3.12", 55 | "@types/koa__cors": "^5.0.0", 56 | "@types/koa__router": "^12.0.4", 57 | "@types/node": "^22.13.10", 58 | "@types/turndown": "^5.0.5", 59 | "@typescript-eslint/eslint-plugin": "^8.26.1", 60 | "@typescript-eslint/parser": "^8.26.1", 61 | "@typescript-eslint/utils": "^8.26.1", 62 | "eslint": "^9.19.0", 63 | "eslint-plugin-import": "^2.31.0", 64 | "eslint-plugin-notice": "^1.0.0", 65 | "get-port": "^7.1.0", 66 | "typescript": "^5.8.2" 67 | }, 68 | "bin": { 69 | "mcp-server-playwright": "cli.js" 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /playwright.config.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import { defineConfig } from '@playwright/test'; 18 | 19 | export default defineConfig({ 20 | testDir: './tests', 21 | fullyParallel: true, 22 | forbidOnly: !!process.env.CI, 23 | retries: process.env.CI ? 2 : 0, 24 | workers: process.env.CI ? 1 : undefined, 25 | reporter: 'list', 26 | projects: [{ name: 'default' }], 27 | }); 28 | -------------------------------------------------------------------------------- /src/context.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import * as playwright from 'playwright'; 18 | import yaml from 'yaml'; 19 | 20 | import { waitForCompletion } from './tools/utils'; 21 | import { ToolResult } from './tools/tool'; 22 | 23 | export type ContextOptions = { 24 | browserName?: 'chromium' | 'firefox' | 'webkit'; 25 | userDataDir: string; 26 | launchOptions?: playwright.LaunchOptions; 27 | cdpEndpoint?: string; 28 | remoteEndpoint?: string; 29 | }; 30 | 31 | type PageOrFrameLocator = playwright.Page | playwright.FrameLocator; 32 | 33 | type RunOptions = { 34 | captureSnapshot?: boolean; 35 | waitForCompletion?: boolean; 36 | status?: string; 37 | noClearFileChooser?: boolean; 38 | }; 39 | 40 | export class Context { 41 | readonly options: ContextOptions; 42 | private _browser: playwright.Browser | undefined; 43 | private _browserContext: playwright.BrowserContext | undefined; 44 | private _tabs: Tab[] = []; 45 | private _currentTab: Tab | undefined; 46 | 47 | constructor(options: ContextOptions) { 48 | this.options = options; 49 | } 50 | 51 | tabs(): Tab[] { 52 | return this._tabs; 53 | } 54 | 55 | currentTab(): Tab { 56 | if (!this._currentTab) 57 | throw new Error('Navigate to a location to create a tab'); 58 | return this._currentTab; 59 | } 60 | 61 | async newTab(): Promise { 62 | const browserContext = await this._ensureBrowserContext(); 63 | const page = await browserContext.newPage(); 64 | this._currentTab = this._tabs.find(t => t.page === page)!; 65 | return this._currentTab; 66 | } 67 | 68 | async selectTab(index: number) { 69 | this._currentTab = this._tabs[index - 1]; 70 | await this._currentTab.page.bringToFront(); 71 | } 72 | 73 | async ensureTab(): Promise { 74 | const context = await this._ensureBrowserContext(); 75 | if (!this._currentTab) 76 | await context.newPage(); 77 | return this._currentTab!; 78 | } 79 | 80 | async listTabs(): Promise { 81 | if (!this._tabs.length) 82 | return 'No tabs open'; 83 | const lines: string[] = ['Open tabs:']; 84 | for (let i = 0; i < this._tabs.length; i++) { 85 | const tab = this._tabs[i]; 86 | const title = await tab.page.title(); 87 | const url = tab.page.url(); 88 | const current = tab === this._currentTab ? ' (current)' : ''; 89 | lines.push(`- ${i + 1}:${current} [${title}] (${url})`); 90 | } 91 | return lines.join('\n'); 92 | } 93 | 94 | async closeTab(index: number | undefined) { 95 | const tab = index === undefined ? this.currentTab() : this._tabs[index - 1]; 96 | await tab.page.close(); 97 | return await this.listTabs(); 98 | } 99 | 100 | private _onPageCreated(page: playwright.Page) { 101 | const tab = new Tab(this, page, tab => this._onPageClosed(tab)); 102 | this._tabs.push(tab); 103 | if (!this._currentTab) 104 | this._currentTab = tab; 105 | } 106 | 107 | private _onPageClosed(tab: Tab) { 108 | const index = this._tabs.indexOf(tab); 109 | if (index === -1) 110 | return; 111 | this._tabs.splice(index, 1); 112 | 113 | if (this._currentTab === tab) 114 | this._currentTab = this._tabs[Math.min(index, this._tabs.length - 1)]; 115 | const browser = this._browser; 116 | if (this._browserContext && !this._tabs.length) { 117 | void this._browserContext.close().then(() => browser?.close()).catch(() => {}); 118 | this._browser = undefined; 119 | this._browserContext = undefined; 120 | } 121 | } 122 | 123 | async close() { 124 | if (!this._browserContext) 125 | return; 126 | await this._browserContext.close(); 127 | } 128 | 129 | private async _ensureBrowserContext() { 130 | if (!this._browserContext) { 131 | const context = await this._createBrowserContext(); 132 | this._browser = context.browser; 133 | this._browserContext = context.browserContext; 134 | for (const page of this._browserContext.pages()) 135 | this._onPageCreated(page); 136 | this._browserContext.on('page', page => this._onPageCreated(page)); 137 | 138 | // Ensure there is at least one tab after initialization 139 | if (this._tabs.length === 0) { 140 | console.log('[Context] No initial tabs found, creating a new one.'); 141 | await this._browserContext.newPage(); // This will trigger _onPageCreated 142 | } 143 | } 144 | return this._browserContext; 145 | } 146 | 147 | private async _createBrowserContext(): Promise<{ browser?: playwright.Browser, browserContext: playwright.BrowserContext }> { 148 | if (this.options.remoteEndpoint) { 149 | const url = new URL(this.options.remoteEndpoint); 150 | if (this.options.browserName) 151 | url.searchParams.set('browser', this.options.browserName); 152 | if (this.options.launchOptions) 153 | url.searchParams.set('launch-options', JSON.stringify(this.options.launchOptions)); 154 | const browser = await playwright[this.options.browserName ?? 'chromium'].connect(String(url)); 155 | const browserContext = await browser.newContext(); 156 | return { browser, browserContext }; 157 | } 158 | 159 | // If a CDP endpoint is provided in the options, connect using it. 160 | if (this.options.cdpEndpoint) { 161 | // Use Playwright's function to connect to an existing browser via CDP. 162 | const browser = await playwright.chromium.connectOverCDP(this.options.cdpEndpoint); 163 | // Assume the first context is the one we want to use. 164 | const browserContext = browser.contexts()[0]; 165 | return { browser, browserContext }; 166 | } 167 | 168 | // Otherwise, launch a new persistent browser context. 169 | const browserContext = await this._launchPersistentContext(); 170 | return { browserContext }; 171 | } 172 | 173 | private async _launchPersistentContext(): Promise { 174 | try { 175 | const browserType = this.options.browserName ? playwright[this.options.browserName] : playwright.chromium; 176 | const launchOptions = { 177 | ...(this.options.launchOptions ?? {}), 178 | args: Array.from(new Set([ 179 | ...(this.options.launchOptions?.args ?? []), 180 | '--disable-blink-features=AutomationControlled' 181 | ])) 182 | }; 183 | return await browserType.launchPersistentContext(this.options.userDataDir, launchOptions); 184 | } catch (error: any) { 185 | if (error.message.includes('Executable doesn\'t exist')) 186 | throw new Error(`Browser specified in your config is not installed. Either install it (likely) or change the config.`); 187 | throw error; 188 | } 189 | } 190 | } 191 | 192 | class Tab { 193 | readonly context: Context; 194 | readonly page: playwright.Page; 195 | private _console: playwright.ConsoleMessage[] = []; 196 | private _fileChooser: playwright.FileChooser | undefined; 197 | private _snapshot: PageSnapshot | undefined; 198 | private _onPageClose: (tab: Tab) => void; 199 | 200 | constructor(context: Context, page: playwright.Page, onPageClose: (tab: Tab) => void) { 201 | this.context = context; 202 | this.page = page; 203 | this._onPageClose = onPageClose; 204 | page.on('console', event => this._console.push(event)); 205 | page.on('framenavigated', frame => { 206 | if (!frame.parentFrame()) 207 | this._console.length = 0; 208 | }); 209 | page.on('close', () => this._onClose()); 210 | page.on('filechooser', chooser => this._fileChooser = chooser); 211 | page.setDefaultNavigationTimeout(60000); 212 | page.setDefaultTimeout(5000); 213 | } 214 | 215 | private _onClose() { 216 | this._fileChooser = undefined; 217 | this._console.length = 0; 218 | this._onPageClose(this); 219 | } 220 | 221 | async navigate(url: string) { 222 | await this.page.goto(url, { waitUntil: 'domcontentloaded' }); 223 | // Cap load event to 5 seconds, the page is operational at this point. 224 | await this.page.waitForLoadState('load', { timeout: 5000 }).catch(() => {}); 225 | } 226 | 227 | async run(callback: (tab: Tab) => Promise, options?: RunOptions): Promise { 228 | try { 229 | if (!options?.noClearFileChooser) 230 | this._fileChooser = undefined; 231 | if (options?.waitForCompletion) 232 | await waitForCompletion(this.page, () => callback(this)); 233 | else 234 | await callback(this); 235 | } finally { 236 | if (options?.captureSnapshot) 237 | this._snapshot = await PageSnapshot.create(this.page); 238 | } 239 | const tabList = this.context.tabs().length > 1 ? await this.context.listTabs() + '\n\nCurrent tab:' + '\n' : ''; 240 | const snapshot = this._snapshot?.text({ status: options?.status, hasFileChooser: !!this._fileChooser }) ?? options?.status ?? ''; 241 | return { 242 | content: [{ 243 | type: 'text', 244 | text: tabList + snapshot, 245 | }], 246 | }; 247 | } 248 | 249 | async runAndWait(callback: (tab: Tab) => Promise, options?: RunOptions): Promise { 250 | return await this.run(callback, { 251 | waitForCompletion: true, 252 | ...options, 253 | }); 254 | } 255 | 256 | async runAndWaitWithSnapshot(callback: (tab: Tab) => Promise, options?: RunOptions): Promise { 257 | return await this.run(callback, { 258 | captureSnapshot: true, 259 | waitForCompletion: true, 260 | ...options, 261 | }); 262 | } 263 | 264 | lastSnapshot(): PageSnapshot { 265 | if (!this._snapshot) 266 | throw new Error('No snapshot available'); 267 | return this._snapshot; 268 | } 269 | 270 | async console(): Promise { 271 | return this._console; 272 | } 273 | 274 | async submitFileChooser(paths: string[]) { 275 | if (!this._fileChooser) 276 | throw new Error('No file chooser visible'); 277 | await this._fileChooser.setFiles(paths); 278 | this._fileChooser = undefined; 279 | } 280 | } 281 | 282 | class PageSnapshot { 283 | private _frameLocators: PageOrFrameLocator[] = []; 284 | private _text!: string; 285 | 286 | constructor() { 287 | } 288 | 289 | static async create(page: playwright.Page): Promise { 290 | const snapshot = new PageSnapshot(); 291 | await snapshot._build(page); 292 | return snapshot; 293 | } 294 | 295 | text(options?: { status?: string, hasFileChooser?: boolean }): string { 296 | const results: string[] = []; 297 | if (options?.status) { 298 | results.push(options.status); 299 | results.push(''); 300 | } 301 | if (options?.hasFileChooser) { 302 | results.push('- There is a file chooser visible that requires browser_file_upload to be called'); 303 | results.push(''); 304 | } 305 | results.push(this._text); 306 | return results.join('\n'); 307 | } 308 | 309 | private async _build(page: playwright.Page) { 310 | const yamlDocument = await this._snapshotFrame(page); 311 | const lines = []; 312 | lines.push( 313 | `- Page URL: ${page.url()}`, 314 | `- Page Title: ${await page.title()}` 315 | ); 316 | lines.push( 317 | `- Page Snapshot`, 318 | '```yaml', 319 | yamlDocument.toString().trim(), 320 | '```', 321 | '' 322 | ); 323 | this._text = lines.join('\n'); 324 | } 325 | 326 | private async _snapshotFrame(frame: playwright.Page | playwright.FrameLocator) { 327 | const frameIndex = this._frameLocators.push(frame) - 1; 328 | // This line captures the accessibility tree of the frame's body as a YAML string. 329 | // The structure (like 'navigation', 'search', 'link', 'button') comes from standard ARIA roles 330 | // derived from the HTML structure of the page. 331 | const snapshotString = await frame.locator('body').ariaSnapshot({ ref: true }); 332 | const snapshot = yaml.parseDocument(snapshotString); 333 | 334 | const visit = async (node: any): Promise => { 335 | if (yaml.isPair(node)) { 336 | await Promise.all([ 337 | visit(node.key).then(k => node.key = k), 338 | visit(node.value).then(v => node.value = v) 339 | ]); 340 | } else if (yaml.isSeq(node) || yaml.isMap(node)) { 341 | node.items = await Promise.all(node.items.map(visit)); 342 | } else if (yaml.isScalar(node)) { 343 | if (typeof node.value === 'string') { 344 | const value = node.value; 345 | if (frameIndex > 0) 346 | node.value = value.replace('[ref=', `[ref=f${frameIndex}`); 347 | if (value.startsWith('iframe ')) { 348 | const ref = value.match(/\[ref=(.*)\]/)?.[1]; 349 | if (ref) { 350 | try { 351 | const childSnapshot = await this._snapshotFrame(frame.frameLocator(`aria-ref=${ref}`)); 352 | return snapshot.createPair(node.value, childSnapshot); 353 | } catch (error) { 354 | return snapshot.createPair(node.value, ''); 355 | } 356 | } 357 | } 358 | } 359 | } 360 | 361 | return node; 362 | }; 363 | await visit(snapshot.contents); 364 | return snapshot; 365 | } 366 | 367 | refLocator(ref: string): playwright.Locator { 368 | let frame = this._frameLocators[0]; 369 | const match = ref.match(/^f(\d+)(.*)/); 370 | if (match) { 371 | const frameIndex = parseInt(match[1], 10); 372 | frame = this._frameLocators[frameIndex]; 373 | ref = match[2]; 374 | } 375 | 376 | if (!frame) 377 | throw new Error(`Frame does not exist. Provide ref from the most current snapshot.`); 378 | 379 | return frame.locator(`aria-ref=${ref}`); 380 | } 381 | } 382 | -------------------------------------------------------------------------------- /src/httpServer.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import Koa from 'koa'; 18 | import Router from '@koa/router'; 19 | import bodyParser from 'koa-bodyparser'; 20 | import cors from '@koa/cors'; 21 | import http from 'http'; 22 | import type { ServerList } from './server'; 23 | import { Server } from '@modelcontextprotocol/sdk/server/index.js'; 24 | import { ToolSchema } from '@modelcontextprotocol/sdk/types.js'; // Import ToolSchema type 25 | 26 | export async function startHttpServer(port: number, serverList: ServerList) { 27 | // Session management 28 | const sessions = new Map(); 29 | const sessionTimers = new Map(); // For session timeout 30 | const sessionTimeout = 30 * 60 * 1000; // 30 minutes (in milliseconds) 31 | 32 | const app = new Koa(); 33 | const router = new Router(); 34 | 35 | // Middleware 36 | app.use(cors({ 37 | allowHeaders: ['Content-Type', 'Session-Id'], // Allow Session-Id header 38 | exposeHeaders: [], // Adjust as needed 39 | })); 40 | app.use(bodyParser()); 41 | 42 | // Session handling middleware 43 | app.use(async (ctx, next) => { 44 | const sessionId = ctx.get('session-id') || 'default'; // Get session ID from header or use 'default' 45 | let server = sessions.get(sessionId); 46 | 47 | if (!server) { 48 | console.log(`[HTTP Server] Creating new session: ${sessionId}`); 49 | // Pass the sessionId to create() so the factory can generate a unique user data dir 50 | server = await serverList.create(sessionId); 51 | sessions.set(sessionId, server); 52 | } else { 53 | console.log(`[HTTP Server] Reusing session: ${sessionId}`); 54 | } 55 | 56 | // Reset session timeout on activity 57 | if (sessionTimers.has(sessionId)) { 58 | clearTimeout(sessionTimers.get(sessionId)!); 59 | } 60 | const timer = setTimeout(async () => { 61 | console.log(`[HTTP Server] Session timed out: ${sessionId}`); 62 | const serverToClose = sessions.get(sessionId); 63 | if (serverToClose) { 64 | await serverList.close(serverToClose); // Use serverList to close 65 | sessions.delete(sessionId); 66 | } 67 | sessionTimers.delete(sessionId); 68 | }, sessionTimeout); 69 | sessionTimers.set(sessionId, timer); 70 | 71 | // Pass the server instance and sessionId to the route handlers via context state 72 | ctx.state.server = server; 73 | ctx.state.sessionId = sessionId; 74 | 75 | await next(); // Proceed to the next middleware (router) 76 | }); 77 | 78 | // Tool calling route 79 | router.post('/tools/:toolName', async (ctx) => { 80 | const { toolName } = ctx.params; 81 | const params = ctx.request.body || {}; // Get params from request body 82 | const server: Server = ctx.state.server; // Get Server instance from session middleware 83 | const sessionId: string = ctx.state.sessionId; // Get sessionId for logging 84 | 85 | try { 86 | // Optional: Check if the tool exists before calling 87 | // Note: server.listTools() might not be available directly on the SDK's Server type. 88 | // If needed, we might need to adjust how tools are accessed or skip this check. 89 | // For now, we'll rely on callTool to handle non-existent tools. 90 | 91 | console.log(`[HTTP Server] Calling tool "${toolName}" for session ${sessionId} with params:`, params); 92 | // Use the callTool method we added to the Server prototype 93 | const result = await (server as any).callTool(toolName, params); 94 | 95 | ctx.status = 200; // OK 96 | ctx.body = { success: true, result }; // Return success and result 97 | } catch (error: any) { 98 | console.error(`[HTTP Server] Error calling tool "${toolName}" for session ${sessionId}:`, error); 99 | ctx.status = 500; // Internal Server Error (or potentially 400/404 depending on error type) 100 | // Respond with error details 101 | ctx.body = { success: false, error: String(error.message || error) }; 102 | } 103 | }); 104 | 105 | // Specific route for GET /tools/browser_tab_list 106 | router.get('/tools/browser_tab_list', async (ctx) => { 107 | const server: Server = ctx.state.server; 108 | const sessionId: string = ctx.state.sessionId; 109 | const toolName = 'browser_tab_list'; 110 | 111 | try { 112 | console.log(`[HTTP Server] Calling tool "${toolName}" for session ${sessionId}`); 113 | const result = await (server as any).callTool(toolName, {}); // No parameters needed 114 | ctx.status = 200; 115 | ctx.body = { success: true, result }; 116 | } catch (error: any) { 117 | console.error(`[HTTP Server] Error calling tool "${toolName}" for session ${sessionId}:`, error); 118 | ctx.status = 500; 119 | ctx.body = { success: false, error: String(error.message || error) }; 120 | } 121 | }); 122 | 123 | // Route to generate OpenAPI specification 124 | router.get('/openapi.json', async (ctx) => { 125 | try { 126 | // Get a server instance (use default session, create if needed) 127 | // We need to ensure a server instance exists to list tools. 128 | // The session middleware already handles this, so ctx.state.server should be valid. 129 | const server: Server = ctx.state.server; 130 | const serverUrl = `http://localhost:${port}`; // Assuming localhost for spec 131 | 132 | // Get the registered tools from the server instance property we added 133 | const registeredTools = (server as any)._registeredTools || []; 134 | const toolSchemas = registeredTools.map((tool: any) => tool.schema); 135 | 136 | // Basic OpenAPI structure (without components) 137 | const openApiSpec: any = { 138 | openapi: '3.1.0', 139 | info: { 140 | title: 'Playwright MCP HTTP API', 141 | version: require('../package.json').version, 142 | description: 'HTTP API for interacting with the Playwright MCP server.', 143 | }, 144 | servers: [ 145 | { url: serverUrl, description: 'Local development server' } 146 | ], 147 | paths: {}, // Paths will be populated below 148 | }; 149 | 150 | // Add paths for each tool 151 | for (const tool of toolSchemas) { 152 | const path = `/tools/${tool.name}`; 153 | const isGetOperation = tool.name === 'browser_tab_list'; // Special case for GET 154 | const method = isGetOperation ? 'get' : 'post'; 155 | 156 | openApiSpec.paths[path] = { 157 | [method]: { 158 | tags: ['Tools'], 159 | summary: tool.description || `Execute ${tool.name}`, 160 | operationId: tool.name, 161 | parameters: [ 162 | // Inline Session-Id parameter definition 163 | { 164 | name: 'Session-Id', 165 | in: 'header', 166 | required: false, 167 | description: 'Optional session identifier. If not provided, uses the "default" session.', 168 | schema: { type: 'string' } 169 | } 170 | ], 171 | responses: { 172 | '200': { 173 | description: 'Successful operation', 174 | content: { 175 | 'application/json': { 176 | // Inline SuccessResponse schema definition 177 | schema: { 178 | type: 'object', 179 | properties: { 180 | success: { type: 'boolean', example: true }, 181 | result: { type: 'object', description: 'Result from the tool execution' } 182 | } 183 | } 184 | } 185 | } 186 | }, 187 | '500': { 188 | description: 'Internal server error or tool execution error', 189 | content: { 190 | 'application/json': { 191 | // Inline ErrorResponse schema definition 192 | schema: { 193 | type: 'object', 194 | properties: { 195 | success: { type: 'boolean', example: false }, 196 | error: { type: 'string', description: 'Error message' } 197 | } 198 | } 199 | } 200 | } 201 | } 202 | } 203 | } 204 | }; 205 | 206 | // Add requestBody for POST operations only if inputSchema exists and has properties or is required 207 | if (!isGetOperation && tool.inputSchema) { 208 | const inputSchema = tool.inputSchema as any; // Cast to any to access properties easily 209 | const hasProperties = inputSchema.properties && Object.keys(inputSchema.properties).length > 0; 210 | const isRequired = inputSchema.required && inputSchema.required.length > 0; 211 | 212 | if (hasProperties || isRequired) { 213 | // Clone the schema and remove the $schema property 214 | const schemaForRequestBody = { ...inputSchema }; 215 | delete schemaForRequestBody.$schema; // Remove $schema 216 | 217 | openApiSpec.paths[path][method].requestBody = { 218 | description: 'Parameters for the tool', 219 | // Set required based on whether the schema has any required properties 220 | required: isRequired, 221 | content: { 222 | 'application/json': { 223 | schema: schemaForRequestBody // Use the cleaned schema 224 | } 225 | } 226 | }; 227 | } 228 | // If no properties and not required, omit requestBody entirely 229 | } 230 | } 231 | 232 | ctx.status = 200; 233 | ctx.type = 'application/json'; 234 | ctx.body = openApiSpec; 235 | 236 | } catch (error: any) { 237 | console.error('[HTTP Server] Error generating OpenAPI spec:', error); 238 | ctx.status = 500; 239 | ctx.body = { success: false, error: `Failed to generate OpenAPI spec: ${error.message}` }; 240 | } 241 | }); 242 | 243 | 244 | // Apply routes 245 | app.use(router.routes()).use(router.allowedMethods()); 246 | 247 | // Create and start the HTTP server 248 | const httpServer = http.createServer(app.callback()); 249 | 250 | httpServer.listen(port, () => { 251 | console.log(`[HTTP Server] HTTP API server listening on port ${port}`); 252 | }); 253 | 254 | // Graceful shutdown handling 255 | const gracefulShutdown = async () => { 256 | console.log('[HTTP Server] Closing HTTP server...'); 257 | httpServer.close(async (err) => { 258 | if (err) { 259 | console.error('[HTTP Server] Error closing HTTP server:', err); 260 | } else { 261 | console.log('[HTTP Server] HTTP server closed.'); 262 | } 263 | 264 | // Clean up all sessions 265 | console.log('[HTTP Server] Closing all browser sessions...'); 266 | const closingPromises: Promise[] = []; 267 | for (const [sessionId, server] of sessions.entries()) { 268 | console.log(`[HTTP Server] Closing session: ${sessionId}`); 269 | closingPromises.push(serverList.close(server)); // Use serverList to close 270 | // Clear associated timer 271 | if (sessionTimers.has(sessionId)) { 272 | clearTimeout(sessionTimers.get(sessionId)!); 273 | sessionTimers.delete(sessionId); 274 | } 275 | } 276 | sessions.clear(); // Clear the sessions map 277 | 278 | try { 279 | await Promise.all(closingPromises); 280 | console.log('[HTTP Server] All sessions closed.'); 281 | } catch (closeError) { 282 | console.error('[HTTP Server] Error closing sessions:', closeError); 283 | } finally { 284 | process.exit(err ? 1 : 0); // Exit with appropriate code 285 | } 286 | }); 287 | 288 | // Force close after a timeout if graceful shutdown fails 289 | setTimeout(() => { 290 | console.error('[HTTP Server] Graceful shutdown timed out. Forcing exit.'); 291 | process.exit(1); 292 | }, 15000); // 15 seconds timeout 293 | }; 294 | 295 | // Listen for termination signals 296 | process.on('SIGINT', gracefulShutdown); 297 | process.on('SIGTERM', gracefulShutdown); 298 | } -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import { createServerWithTools } from './server'; 18 | import common from './tools/common'; 19 | import files from './tools/files'; 20 | import install from './tools/install'; 21 | import keyboard from './tools/keyboard'; 22 | import navigate from './tools/navigate'; 23 | import pdf from './tools/pdf'; 24 | import snapshot from './tools/snapshot'; 25 | import tabs from './tools/tabs'; 26 | import screen from './tools/screen'; 27 | import extractContent from './tools/extractContent'; // Import the new tool 28 | import { console as consoleResource } from './resources/console'; 29 | 30 | import type { Tool, ToolCapability } from './tools/tool'; 31 | import type { Resource } from './resources/resource'; 32 | import type { Server } from '@modelcontextprotocol/sdk/server/index.js'; 33 | import type { LaunchOptions } from 'playwright'; 34 | 35 | const snapshotTools: Tool[] = [ 36 | ...common, 37 | ...files(true), 38 | ...install, 39 | ...keyboard(true), 40 | ...navigate(true), 41 | ...pdf, 42 | ...snapshot, 43 | ...tabs(true), 44 | ...extractContent(true), // Add the new tool to snapshot tools 45 | ]; 46 | 47 | const screenshotTools: Tool[] = [ 48 | ...common, 49 | ...files(false), 50 | ...install, 51 | ...keyboard(false), 52 | ...navigate(false), 53 | ...pdf, 54 | ...screen, 55 | ...tabs(false), 56 | ...extractContent(false), // Add the new tool to screenshot tools 57 | ]; 58 | 59 | const resources: Resource[] = [ 60 | consoleResource, 61 | ]; 62 | 63 | type Options = { 64 | browserName?: 'chromium' | 'firefox' | 'webkit'; 65 | userDataDir?: string; 66 | launchOptions?: LaunchOptions; 67 | cdpEndpoint?: string; 68 | vision?: boolean; 69 | capabilities?: ToolCapability[]; 70 | }; 71 | 72 | const packageJSON = require('../package.json'); 73 | 74 | export function createServer(options?: Options): Server { 75 | const allTools = options?.vision ? screenshotTools : snapshotTools; 76 | const tools = allTools.filter(tool => !options?.capabilities || tool.capability === 'core' || options.capabilities.includes(tool.capability)); 77 | return createServerWithTools({ 78 | name: 'Playwright', 79 | version: packageJSON.version, 80 | tools, 81 | resources, 82 | browserName: options?.browserName, 83 | userDataDir: options?.userDataDir ?? '', 84 | launchOptions: options?.launchOptions, 85 | cdpEndpoint: options?.cdpEndpoint, 86 | }); 87 | } 88 | -------------------------------------------------------------------------------- /src/program.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import http from 'http'; 18 | import fs from 'fs'; 19 | import os from 'os'; 20 | import path from 'path'; 21 | 22 | import { program } from 'commander'; 23 | import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; 24 | import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js'; 25 | 26 | 27 | import { createServer } from './index'; 28 | import { ServerList } from './server'; 29 | import { startHttpServer } from './httpServer'; // Import the new function 30 | 31 | import type { LaunchOptions } from 'playwright'; 32 | import assert from 'assert'; 33 | import { ToolCapability } from './tools/tool'; 34 | 35 | const packageJSON = require('../package.json'); 36 | 37 | program 38 | .version('Version ' + packageJSON.version) 39 | .name(packageJSON.name) 40 | .option('--browser ', 'Browser or chrome channel to use, possible values: chrome, firefox, webkit, msedge.') 41 | .option('--caps ', 'Comma-separated list of capabilities to enable, possible values: tabs, pdf, history, wait, files, install. Default is all.') 42 | .option('--cdp-endpoint ', 'CDP endpoint to connect to.') 43 | .option('--executable-path ', 'Path to the browser executable.') 44 | .option('--headless', 'Run browser in headless mode, headed by default') 45 | .option('--port ', 'Port to listen on for SSE transport.') 46 | .option('--user-data-dir ', 'Path to the user data directory') 47 | .option('--vision', 'Run server that uses screenshots (Aria snapshots are used by default)') 48 | .option('--http-port ', 'Port to listen on for HTTP API.') // Add the new option 49 | .action(async options => { 50 | let browserName: 'chromium' | 'firefox' | 'webkit'; 51 | let channel: string | undefined; 52 | switch (options.browser) { 53 | case 'chrome': 54 | case 'chrome-beta': 55 | case 'chrome-canary': 56 | case 'chrome-dev': 57 | case 'msedge': 58 | case 'msedge-beta': 59 | case 'msedge-canary': 60 | case 'msedge-dev': 61 | browserName = 'chromium'; 62 | channel = options.browser; 63 | break; 64 | case 'chromium': 65 | browserName = 'chromium'; 66 | break; 67 | case 'firefox': 68 | browserName = 'firefox'; 69 | break; 70 | case 'webkit': 71 | browserName = 'webkit'; 72 | break; 73 | default: 74 | browserName = 'chromium'; 75 | channel = 'chrome'; 76 | } 77 | 78 | const launchOptions: LaunchOptions = { 79 | headless: !!options.headless, 80 | channel, 81 | executablePath: options.executablePath, 82 | }; 83 | 84 | // Define the server factory function that now accepts an optional sessionId 85 | const serverFactory = async (sessionId?: string) => { 86 | // Determine the user data directory: 87 | // 1. Use the one provided via CLI if available. 88 | // 2. Otherwise, create a session-specific one. 89 | // Note: If a CLI path is provided, all sessions will share it, potentially causing conflicts. 90 | const effectiveUserDataDir = options.userDataDir ?? await createUserDataDir(browserName, sessionId); 91 | 92 | return createServer({ 93 | browserName, 94 | userDataDir: effectiveUserDataDir, // Use the determined directory 95 | launchOptions, 96 | vision: !!options.vision, 97 | cdpEndpoint: options.cdpEndpoint, 98 | capabilities: options.caps?.split(',').map((c: string) => c.trim() as ToolCapability), 99 | }); 100 | }; 101 | 102 | // Pass the factory function to ServerList 103 | const serverList = new ServerList(serverFactory); 104 | setupExitWatchdog(serverList); 105 | 106 | if (options.httpPort) { // Check for httpPort first 107 | startHttpServer(+options.httpPort, serverList); 108 | } else if (options.port) { // Then check for port (SSE) 109 | startSSEServer(+options.port, serverList); 110 | } else { // Default to Stdio (doesn't support multiple sessions, uses default profile) 111 | const server = await serverList.create(); // Create without sessionId for stdio 112 | await server.connect(new StdioServerTransport()); 113 | } 114 | }); 115 | 116 | function setupExitWatchdog(serverList: ServerList) { 117 | const handleExit = async () => { 118 | setTimeout(() => process.exit(0), 15000); 119 | await serverList.closeAll(); 120 | process.exit(0); 121 | }; 122 | 123 | process.stdin.on('close', handleExit); 124 | process.on('SIGINT', handleExit); 125 | process.on('SIGTERM', handleExit); 126 | } 127 | 128 | program.parse(process.argv); 129 | 130 | // Modified to accept an optional sessionId 131 | async function createUserDataDir(browserName: 'chromium' | 'firefox' | 'webkit', sessionId?: string) { 132 | let cacheDirectory: string; 133 | if (process.platform === 'linux') 134 | cacheDirectory = process.env.XDG_CACHE_HOME || path.join(os.homedir(), '.cache'); 135 | else if (process.platform === 'darwin') 136 | cacheDirectory = path.join(os.homedir(), 'Library', 'Caches'); 137 | else if (process.platform === 'win32') 138 | cacheDirectory = process.env.LOCALAPPDATA || path.join(os.homedir(), 'AppData', 'Local'); 139 | else 140 | throw new Error('Unsupported platform: ' + process.platform); 141 | 142 | // Append sessionId if provided and not empty/default, otherwise use the default profile name 143 | const profileSuffix = sessionId && sessionId !== 'default' ? `-${sessionId}` : ''; 144 | const profileDirName = `mcp-${browserName}-profile${profileSuffix}`; 145 | 146 | const result = path.join(cacheDirectory, 'ms-playwright', profileDirName); 147 | await fs.promises.mkdir(result, { recursive: true }); 148 | return result; 149 | } 150 | 151 | async function startSSEServer(port: number, serverList: ServerList) { 152 | const sessions = new Map(); 153 | const httpServer = http.createServer(async (req, res) => { 154 | if (req.method === 'POST') { 155 | const searchParams = new URL(`http://localhost${req.url}`).searchParams; 156 | const sessionId = searchParams.get('sessionId'); 157 | if (!sessionId) { 158 | res.statusCode = 400; 159 | res.end('Missing sessionId'); 160 | return; 161 | } 162 | const transport = sessions.get(sessionId); 163 | if (!transport) { 164 | res.statusCode = 404; 165 | res.end('Session not found'); 166 | return; 167 | } 168 | 169 | await transport.handlePostMessage(req, res); 170 | return; 171 | } else if (req.method === 'GET') { 172 | const transport = new SSEServerTransport('/sse', res); 173 | const sessionId = transport.sessionId; // Get sessionId from transport 174 | sessions.set(sessionId, transport); 175 | // Pass sessionId when creating server for SSE 176 | const server = await serverList.create(sessionId); 177 | res.on('close', () => { 178 | sessions.delete(sessionId); 179 | serverList.close(server).catch(e => console.error(e)); 180 | }); 181 | await server.connect(transport); 182 | return; 183 | } else { 184 | res.statusCode = 405; 185 | res.end('Method not allowed'); 186 | } 187 | }); 188 | 189 | httpServer.listen(port, () => { 190 | const address = httpServer.address(); 191 | assert(address, 'Could not bind server socket'); 192 | let url: string; 193 | if (typeof address === 'string') { 194 | url = address; 195 | } else { 196 | const resolvedPort = address.port; 197 | let resolvedHost = address.family === 'IPv4' ? address.address : `[${address.address}]`; 198 | if (resolvedHost === '0.0.0.0' || resolvedHost === '[::]') 199 | resolvedHost = 'localhost'; 200 | url = `http://${resolvedHost}:${resolvedPort}`; 201 | } 202 | console.log(`Listening on ${url}`); 203 | console.log('Put this in your client config:'); 204 | console.log(JSON.stringify({ 205 | 'mcpServers': { 206 | 'playwright': { 207 | 'url': `${url}/sse` 208 | } 209 | } 210 | }, undefined, 2)); 211 | }); 212 | } 213 | -------------------------------------------------------------------------------- /src/resources/console.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import type { Resource } from './resource'; 18 | 19 | export const console: Resource = { 20 | schema: { 21 | uri: 'browser://console', 22 | name: 'Page console', 23 | mimeType: 'text/plain', 24 | }, 25 | 26 | read: async (context, uri) => { 27 | const messages = await context.currentTab().console(); 28 | const log = messages.map(message => `[${message.type().toUpperCase()}] ${message.text()}`).join('\n'); 29 | return [{ 30 | uri, 31 | mimeType: 'text/plain', 32 | text: log 33 | }]; 34 | }, 35 | }; 36 | -------------------------------------------------------------------------------- /src/resources/resource.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import type { Context } from '../context'; 18 | 19 | export type ResourceSchema = { 20 | uri: string; 21 | name: string; 22 | description?: string; 23 | mimeType?: string; 24 | }; 25 | 26 | export type ResourceResult = { 27 | uri: string; 28 | mimeType?: string; 29 | text?: string; 30 | blob?: string; 31 | }; 32 | 33 | export type Resource = { 34 | schema: ResourceSchema; 35 | read: (context: Context, uri: string) => Promise; 36 | }; 37 | -------------------------------------------------------------------------------- /src/server.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import { Server } from '@modelcontextprotocol/sdk/server/index.js'; 18 | import { CallToolRequestSchema, ListResourcesRequestSchema, ListToolsRequestSchema, ReadResourceRequestSchema } from '@modelcontextprotocol/sdk/types.js'; 19 | 20 | import { Context } from './context'; 21 | 22 | import type { Tool } from './tools/tool'; 23 | import type { Resource } from './resources/resource'; 24 | import type { ContextOptions } from './context'; 25 | 26 | type Options = ContextOptions & { 27 | name: string; 28 | version: string; 29 | tools: Tool[]; 30 | resources: Resource[], 31 | }; 32 | 33 | export function createServerWithTools(options: Options): Server { 34 | const { name, version, tools, resources } = options; 35 | const context = new Context(options); 36 | const server = new Server({ name, version }, { 37 | capabilities: { 38 | tools: {}, 39 | resources: {}, 40 | } 41 | }); 42 | 43 | server.setRequestHandler(ListToolsRequestSchema, async () => { 44 | return { tools: tools.map(tool => tool.schema) }; 45 | }); 46 | 47 | server.setRequestHandler(ListResourcesRequestSchema, async () => { 48 | return { resources: resources.map(resource => resource.schema) }; 49 | }); 50 | 51 | server.setRequestHandler(CallToolRequestSchema, async request => { 52 | const tool = tools.find(tool => tool.schema.name === request.params.name); 53 | if (!tool) { 54 | return { 55 | content: [{ type: 'text', text: `Tool "${request.params.name}" not found` }], 56 | isError: true, 57 | }; 58 | } 59 | 60 | try { 61 | const result = await tool.handle(context, request.params.arguments); 62 | return result; 63 | } catch (error) { 64 | return { 65 | content: [{ type: 'text', text: String(error) }], 66 | isError: true, 67 | }; 68 | } 69 | }); 70 | 71 | server.setRequestHandler(ReadResourceRequestSchema, async request => { 72 | const resource = resources.find(resource => resource.schema.uri === request.params.uri); 73 | if (!resource) 74 | return { contents: [] }; 75 | 76 | const contents = await resource.read(context, request.params.uri); 77 | return { contents }; 78 | }); 79 | 80 | const oldClose = server.close.bind(server); 81 | 82 | server.close = async () => { 83 | await oldClose(); 84 | await context.close(); 85 | }; 86 | 87 | // Add the callTool method directly to the server instance 88 | (server as any).callTool = async (name: string, args: any) => { 89 | // Find the registered handler for CallToolRequestSchema 90 | // This relies on the internal structure of the SDK's Server class, 91 | // specifically how request handlers are stored. This might be fragile. 92 | // A potentially safer approach would be to directly invoke the logic 93 | // defined in the setRequestHandler call above. 94 | 95 | // Let's try invoking the logic directly: 96 | const tool = tools.find(tool => tool.schema.name === name); 97 | if (!tool) { 98 | throw new Error(`Tool "${name}" not found`); 99 | } 100 | 101 | try { 102 | // Simulate the result structure expected by the HTTP handler 103 | const result = await tool.handle(context, args); 104 | // Assuming tool.handle returns the direct result or throws an error 105 | // We need to check the structure of 'result' if it matches MCP response format 106 | if (result && (result as any).isError) { 107 | // Attempt to extract a meaningful error message 108 | const errorContent = (result as any).content?.find((c: any) => c.type === 'text')?.text; 109 | throw new Error(errorContent || `Tool "${name}" execution failed`); 110 | } 111 | return result; // Return the direct result 112 | } catch (error: any) { 113 | // Re-throw the error to be caught by the HTTP handler 114 | throw new Error(String(error.message || error)); 115 | } 116 | }; 117 | 118 | // Attach the actual tools array to the server instance for later retrieval 119 | (server as any)._registeredTools = tools; 120 | 121 | return server; 122 | } 123 | 124 | export class ServerList { 125 | private _servers: Server[] = []; 126 | // Update factory function type to accept optional sessionId and return a Promise 127 | private _serverFactory: (sessionId?: string) => Promise; 128 | 129 | constructor(serverFactory: (sessionId?: string) => Promise) { 130 | this._serverFactory = serverFactory; 131 | } 132 | 133 | // Update create method to accept optional sessionId 134 | async create(sessionId?: string) { 135 | // Call the factory with the sessionId 136 | const server = await this._serverFactory(sessionId); 137 | this._servers.push(server); 138 | return server; 139 | } 140 | 141 | async close(server: Server) { 142 | const index = this._servers.indexOf(server); 143 | if (index !== -1) 144 | this._servers.splice(index, 1); 145 | await server.close(); 146 | } 147 | 148 | async closeAll() { 149 | await Promise.all(this._servers.map(server => server.close())); 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /src/tools/common.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import { z } from 'zod'; 18 | import { zodToJsonSchema } from 'zod-to-json-schema'; 19 | 20 | import type { Tool } from './tool'; 21 | 22 | const waitSchema = z.object({ 23 | time: z.number().describe('The time to wait in seconds'), 24 | }); 25 | 26 | const wait: Tool = { 27 | capability: 'wait', 28 | schema: { 29 | name: 'browser_wait', 30 | description: 'Wait for a specified time in seconds', 31 | inputSchema: zodToJsonSchema(waitSchema), 32 | }, 33 | handle: async (context, params) => { 34 | const validatedParams = waitSchema.parse(params); 35 | await new Promise(f => setTimeout(f, Math.min(10000, validatedParams.time * 1000))); 36 | return { 37 | content: [{ 38 | type: 'text', 39 | text: `Waited for ${validatedParams.time} seconds`, 40 | }], 41 | }; 42 | }, 43 | }; 44 | 45 | const closeSchema = z.object({}); 46 | 47 | const close: Tool = { 48 | capability: 'core', 49 | schema: { 50 | name: 'browser_close', 51 | description: 'Close the page', 52 | inputSchema: zodToJsonSchema(closeSchema), 53 | }, 54 | handle: async context => { 55 | await context.close(); 56 | return { 57 | content: [{ 58 | type: 'text', 59 | text: `Page closed`, 60 | }], 61 | }; 62 | }, 63 | }; 64 | 65 | export default [ 66 | close, 67 | wait, 68 | ]; 69 | -------------------------------------------------------------------------------- /src/tools/extractContent.ts: -------------------------------------------------------------------------------- 1 | import { Page } from 'playwright'; 2 | import { JSDOM } from 'jsdom'; 3 | // @ts-ignore - No official types for readability 4 | import { Readability } from '@mozilla/readability'; 5 | import TurndownService from 'turndown'; 6 | import { z } from 'zod'; 7 | import { zodToJsonSchema } from 'zod-to-json-schema'; 8 | import type { ToolFactory, ToolResult, ToolSchema } from './tool.js'; // Corrected import 9 | import type { Context } from '../context.js'; // Corrected import 10 | 11 | // Define the input schema using Zod 12 | const ExtractContentInputSchema = z.object({ 13 | url: z.string().url().describe('The URL of the web page to extract content from.'), 14 | // Optional: Add timeout, waitUntil etc. if needed 15 | }); 16 | 17 | // Convert Zod schema to JSON schema for MCP 18 | const extractContentInputJsonSchema = zodToJsonSchema(ExtractContentInputSchema); 19 | 20 | // Tool Factory function 21 | const extractContent: ToolFactory = (captureSnapshot) => ({ 22 | capability: 'core', // Assign a capability 23 | schema: { // Define schema property 24 | name: 'browser_extract_content', 25 | description: 'Navigate to a URL and extract the main readable content as Markdown.', 26 | inputSchema: extractContentInputJsonSchema, 27 | }, 28 | // Correct handle signature 29 | handle: async (context: Context, params?: Record): Promise => { 30 | const { url } = ExtractContentInputSchema.parse(params); 31 | const currentTab = await context.ensureTab(); 32 | const page = currentTab.page; 33 | 34 | try { 35 | console.log(`[browser_extract_content] Navigating to: ${url}`); 36 | // Navigate and wait for the page to load 37 | await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 }); 38 | // Optional: Add a small delay or wait for network idle if needed for dynamic content 39 | await page.waitForLoadState('load', { timeout: 5000 }).catch(() => { 40 | console.log('[browser_extract_content] Page load timeout after domcontentloaded, proceeding anyway.'); 41 | }); 42 | console.log(`[browser_extract_content] Navigation successful.`); 43 | 44 | // Get page HTML content 45 | const html = await page.content(); 46 | console.log(`[browser_extract_content] Retrieved HTML content (length: ${html.length}).`); 47 | 48 | if (!html) { 49 | throw new Error('Failed to retrieve HTML content from the page.'); 50 | } 51 | 52 | // Process content using Readability and Turndown 53 | const processedContent = processHtmlContent(html, url); 54 | console.log(`[browser_extract_content] Processed content (length: ${processedContent.length}).`); 55 | 56 | // Return the extracted content directly 57 | return { 58 | content: [{ type: 'text', text: processedContent }], 59 | // isError can be omitted if success 60 | }; 61 | 62 | } catch (error: any) { 63 | console.error(`[browser_extract_content] Error processing ${url}: ${error.message}`); 64 | // Return a structured error message 65 | const errorMessage = `Failed to extract content from ${url}: ${error.message}`; 66 | return { 67 | content: [{ type: 'text', text: errorMessage }], 68 | isError: true, // Mark as error 69 | }; 70 | } 71 | }, 72 | }); 73 | 74 | /** 75 | * Helper function to process HTML content using Readability and Turndown. 76 | * @param html The HTML content string. 77 | * @param url The base URL for resolving relative links (optional). 78 | * @returns The processed content as Markdown string. 79 | */ 80 | function processHtmlContent(html: string, url?: string): string { 81 | try { 82 | const dom = new JSDOM(html, { url }); 83 | // @ts-ignore 84 | const reader = new Readability(dom.window.document); 85 | const article = reader.parse(); 86 | 87 | if (!article || !article.content) { 88 | console.warn('[browser_extract_content] Readability could not extract main content. Returning warning message.'); 89 | // Return a more informative message instead of empty string 90 | return 'Readability could not extract main content from this page.'; 91 | } 92 | 93 | console.log(`[browser_extract_content] Readability extracted content (length: ${article.content.length}).`); 94 | 95 | // Convert extracted HTML to Markdown 96 | const turndownService = new TurndownService({ 97 | headingStyle: 'atx', // Use '#' for headings 98 | codeBlockStyle: 'fenced', // Use ``` for code blocks 99 | }); 100 | // Add a rule to handle preformatted text better 101 | turndownService.addRule('pre', { 102 | filter: 'pre', 103 | replacement: function (content, node) { 104 | // Trim leading/trailing newlines often added by turndown 105 | const code = content.replace(/^\n+|\n+$/g, ''); 106 | // Attempt to get language from class attribute 107 | const language = (node as HTMLElement).getAttribute('class')?.match(/language-(\S+)/)?.[1] || ''; 108 | return '\n```' + language + '\n' + code + '\n```\n'; 109 | } 110 | }); 111 | 112 | const markdown = turndownService.turndown(article.content); 113 | 114 | console.log(`[browser_extract_content] Converted to Markdown (length: ${markdown.length}).`); 115 | // Trim potential excessive newlines from the final markdown 116 | return markdown.replace(/\n{3,}/g, '\n\n').trim(); 117 | 118 | } catch (error: any) { 119 | console.error(`[browser_extract_content] Error during HTML processing: ${error.message}`); 120 | // Return error message formatted as markdown error 121 | return `Error processing HTML content: ${error.message}`; 122 | } 123 | } 124 | 125 | 126 | // Export the factory function, likely in an array like other tools 127 | export default (captureSnapshot: boolean) => [ 128 | extractContent(captureSnapshot), 129 | ]; -------------------------------------------------------------------------------- /src/tools/files.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import { z } from 'zod'; 18 | import { zodToJsonSchema } from 'zod-to-json-schema'; 19 | 20 | import type { ToolFactory } from './tool'; 21 | 22 | const uploadFileSchema = z.object({ 23 | paths: z.array(z.string()).describe('The absolute paths to the files to upload. Can be a single file or multiple files.'), 24 | }); 25 | 26 | const uploadFile: ToolFactory = captureSnapshot => ({ 27 | capability: 'files', 28 | schema: { 29 | name: 'browser_file_upload', 30 | description: 'Upload one or multiple files', 31 | inputSchema: zodToJsonSchema(uploadFileSchema), 32 | }, 33 | handle: async (context, params) => { 34 | const validatedParams = uploadFileSchema.parse(params); 35 | const tab = context.currentTab(); 36 | return await tab.runAndWait(async () => { 37 | await tab.submitFileChooser(validatedParams.paths); 38 | }, { 39 | status: `Chose files ${validatedParams.paths.join(', ')}`, 40 | captureSnapshot, 41 | noClearFileChooser: true, 42 | }); 43 | }, 44 | }); 45 | 46 | export default (captureSnapshot: boolean) => [ 47 | uploadFile(captureSnapshot), 48 | ]; 49 | -------------------------------------------------------------------------------- /src/tools/install.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import { fork } from 'child_process'; 18 | import path from 'path'; 19 | 20 | import { z } from 'zod'; 21 | import { zodToJsonSchema } from 'zod-to-json-schema'; 22 | 23 | import type { Tool } from './tool'; 24 | 25 | const install: Tool = { 26 | capability: 'install', 27 | schema: { 28 | name: 'browser_install', 29 | description: 'Install the browser specified in the config. Call this if you get an error about the browser not being installed.', 30 | inputSchema: zodToJsonSchema(z.object({})), 31 | }, 32 | 33 | handle: async context => { 34 | const channel = context.options.launchOptions?.channel ?? context.options.browserName ?? 'chrome'; 35 | const cli = path.join(require.resolve('playwright/package.json'), '..', 'cli.js'); 36 | const child = fork(cli, ['install', channel], { 37 | stdio: 'pipe', 38 | }); 39 | const output: string[] = []; 40 | child.stdout?.on('data', data => output.push(data.toString())); 41 | child.stderr?.on('data', data => output.push(data.toString())); 42 | await new Promise((resolve, reject) => { 43 | child.on('close', code => { 44 | if (code === 0) 45 | resolve(); 46 | else 47 | reject(new Error(`Failed to install browser: ${output.join('')}`)); 48 | }); 49 | }); 50 | return { 51 | content: [{ 52 | type: 'text', 53 | text: `Browser ${channel} installed`, 54 | }], 55 | }; 56 | }, 57 | }; 58 | 59 | export default [ 60 | install, 61 | ]; 62 | -------------------------------------------------------------------------------- /src/tools/keyboard.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import { z } from 'zod'; 18 | import zodToJsonSchema from 'zod-to-json-schema'; 19 | 20 | import type { ToolFactory } from './tool'; 21 | 22 | const pressKeySchema = z.object({ 23 | key: z.string().describe('Name of the key to press or a character to generate, such as `ArrowLeft` or `a`'), 24 | }); 25 | 26 | const pressKey: ToolFactory = captureSnapshot => ({ 27 | capability: 'core', 28 | schema: { 29 | name: 'browser_press_key', 30 | description: 'Press a key on the keyboard', 31 | inputSchema: zodToJsonSchema(pressKeySchema), 32 | }, 33 | handle: async (context, params) => { 34 | const validatedParams = pressKeySchema.parse(params); 35 | return await context.currentTab().runAndWait(async tab => { 36 | await tab.page.keyboard.press(validatedParams.key); 37 | }, { 38 | status: `Pressed key ${validatedParams.key}`, 39 | captureSnapshot, 40 | }); 41 | }, 42 | }); 43 | 44 | export default (captureSnapshot: boolean) => [ 45 | pressKey(captureSnapshot), 46 | ]; 47 | -------------------------------------------------------------------------------- /src/tools/navigate.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import { z } from 'zod'; 18 | import { zodToJsonSchema } from 'zod-to-json-schema'; 19 | 20 | import type { ToolFactory } from './tool'; 21 | 22 | const navigateSchema = z.object({ 23 | url: z.string().describe('The URL to navigate to'), 24 | }); 25 | 26 | const navigate: ToolFactory = captureSnapshot => ({ 27 | capability: 'core', 28 | schema: { 29 | name: 'browser_navigate', 30 | description: 'Navigate to a URL', 31 | inputSchema: zodToJsonSchema(navigateSchema), 32 | }, 33 | handle: async (context, params) => { 34 | const validatedParams = navigateSchema.parse(params); 35 | const currentTab = await context.ensureTab(); 36 | return await currentTab.run(async tab => { 37 | await tab.navigate(validatedParams.url); 38 | }, { 39 | status: `Navigated to ${validatedParams.url}`, 40 | captureSnapshot, 41 | }); 42 | }, 43 | }); 44 | 45 | const goBackSchema = z.object({}); 46 | 47 | const goBack: ToolFactory = snapshot => ({ 48 | capability: 'history', 49 | schema: { 50 | name: 'browser_navigate_back', 51 | description: 'Go back to the previous page', 52 | inputSchema: zodToJsonSchema(goBackSchema), 53 | }, 54 | handle: async context => { 55 | return await context.currentTab().runAndWait(async tab => { 56 | await tab.page.goBack(); 57 | }, { 58 | status: 'Navigated back', 59 | captureSnapshot: snapshot, 60 | }); 61 | }, 62 | }); 63 | 64 | const goForwardSchema = z.object({}); 65 | 66 | const goForward: ToolFactory = snapshot => ({ 67 | capability: 'history', 68 | schema: { 69 | name: 'browser_navigate_forward', 70 | description: 'Go forward to the next page', 71 | inputSchema: zodToJsonSchema(goForwardSchema), 72 | }, 73 | handle: async context => { 74 | return await context.currentTab().runAndWait(async tab => { 75 | await tab.page.goForward(); 76 | }, { 77 | status: 'Navigated forward', 78 | captureSnapshot: snapshot, 79 | }); 80 | }, 81 | }); 82 | 83 | export default (captureSnapshot: boolean) => [ 84 | navigate(captureSnapshot), 85 | goBack(captureSnapshot), 86 | goForward(captureSnapshot), 87 | ]; 88 | -------------------------------------------------------------------------------- /src/tools/pdf.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import os from 'os'; 18 | import path from 'path'; 19 | 20 | import { z } from 'zod'; 21 | import { zodToJsonSchema } from 'zod-to-json-schema'; 22 | 23 | import { sanitizeForFilePath } from './utils'; 24 | 25 | import type { Tool } from './tool'; 26 | 27 | const pdfSchema = z.object({}); 28 | 29 | const pdf: Tool = { 30 | capability: 'pdf', 31 | schema: { 32 | name: 'browser_pdf_save', 33 | description: 'Save page as PDF', 34 | inputSchema: zodToJsonSchema(pdfSchema), 35 | }, 36 | handle: async context => { 37 | const tab = context.currentTab(); 38 | const fileName = path.join(os.tmpdir(), sanitizeForFilePath(`page-${new Date().toISOString()}`)) + '.pdf'; 39 | await tab.page.pdf({ path: fileName }); 40 | return { 41 | content: [{ 42 | type: 'text', 43 | text: `Saved as ${fileName}`, 44 | }], 45 | }; 46 | }, 47 | }; 48 | 49 | export default [ 50 | pdf, 51 | ]; 52 | -------------------------------------------------------------------------------- /src/tools/screen.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import { z } from 'zod'; 18 | import { zodToJsonSchema } from 'zod-to-json-schema'; 19 | 20 | import type { Tool } from './tool'; 21 | 22 | const screenshot: Tool = { 23 | capability: 'core', 24 | schema: { 25 | name: 'browser_screen_capture', 26 | description: 'Take a screenshot of the current page', 27 | inputSchema: zodToJsonSchema(z.object({})), 28 | }, 29 | 30 | handle: async context => { 31 | const tab = context.currentTab(); 32 | const screenshot = await tab.page.screenshot({ type: 'jpeg', quality: 50, scale: 'css' }); 33 | return { 34 | content: [{ type: 'image', data: screenshot.toString('base64'), mimeType: 'image/jpeg' }], 35 | }; 36 | }, 37 | }; 38 | 39 | const elementSchema = z.object({ 40 | element: z.string().describe('Human-readable element description used to obtain permission to interact with the element'), 41 | }); 42 | 43 | const moveMouseSchema = elementSchema.extend({ 44 | x: z.number().describe('X coordinate'), 45 | y: z.number().describe('Y coordinate'), 46 | }); 47 | 48 | const moveMouse: Tool = { 49 | capability: 'core', 50 | schema: { 51 | name: 'browser_screen_move_mouse', 52 | description: 'Move mouse to a given position', 53 | inputSchema: zodToJsonSchema(moveMouseSchema), 54 | }, 55 | 56 | handle: async (context, params) => { 57 | const validatedParams = moveMouseSchema.parse(params); 58 | const tab = context.currentTab(); 59 | await tab.page.mouse.move(validatedParams.x, validatedParams.y); 60 | return { 61 | content: [{ type: 'text', text: `Moved mouse to (${validatedParams.x}, ${validatedParams.y})` }], 62 | }; 63 | }, 64 | }; 65 | 66 | const clickSchema = elementSchema.extend({ 67 | x: z.number().describe('X coordinate'), 68 | y: z.number().describe('Y coordinate'), 69 | }); 70 | 71 | const click: Tool = { 72 | capability: 'core', 73 | schema: { 74 | name: 'browser_screen_click', 75 | description: 'Click left mouse button', 76 | inputSchema: zodToJsonSchema(clickSchema), 77 | }, 78 | 79 | handle: async (context, params) => { 80 | return await context.currentTab().runAndWait(async tab => { 81 | const validatedParams = clickSchema.parse(params); 82 | await tab.page.mouse.move(validatedParams.x, validatedParams.y); 83 | await tab.page.mouse.down(); 84 | await tab.page.mouse.up(); 85 | }, { 86 | status: 'Clicked mouse', 87 | }); 88 | }, 89 | }; 90 | 91 | const dragSchema = elementSchema.extend({ 92 | startX: z.number().describe('Start X coordinate'), 93 | startY: z.number().describe('Start Y coordinate'), 94 | endX: z.number().describe('End X coordinate'), 95 | endY: z.number().describe('End Y coordinate'), 96 | }); 97 | 98 | const drag: Tool = { 99 | capability: 'core', 100 | schema: { 101 | name: 'browser_screen_drag', 102 | description: 'Drag left mouse button', 103 | inputSchema: zodToJsonSchema(dragSchema), 104 | }, 105 | 106 | handle: async (context, params) => { 107 | const validatedParams = dragSchema.parse(params); 108 | return await context.currentTab().runAndWait(async tab => { 109 | await tab.page.mouse.move(validatedParams.startX, validatedParams.startY); 110 | await tab.page.mouse.down(); 111 | await tab.page.mouse.move(validatedParams.endX, validatedParams.endY); 112 | await tab.page.mouse.up(); 113 | }, { 114 | status: `Dragged mouse from (${validatedParams.startX}, ${validatedParams.startY}) to (${validatedParams.endX}, ${validatedParams.endY})`, 115 | }); 116 | }, 117 | }; 118 | 119 | const typeSchema = z.object({ 120 | text: z.string().describe('Text to type into the element'), 121 | submit: z.boolean().optional().describe('Whether to submit entered text (press Enter after)'), 122 | }); 123 | 124 | const type: Tool = { 125 | capability: 'core', 126 | schema: { 127 | name: 'browser_screen_type', 128 | description: 'Type text', 129 | inputSchema: zodToJsonSchema(typeSchema), 130 | }, 131 | 132 | handle: async (context, params) => { 133 | const validatedParams = typeSchema.parse(params); 134 | return await context.currentTab().runAndWait(async tab => { 135 | await tab.page.keyboard.type(validatedParams.text); 136 | if (validatedParams.submit) 137 | await tab.page.keyboard.press('Enter'); 138 | }, { 139 | status: `Typed text "${validatedParams.text}"`, 140 | }); 141 | }, 142 | }; 143 | 144 | export default [ 145 | screenshot, 146 | moveMouse, 147 | click, 148 | drag, 149 | type, 150 | ]; 151 | -------------------------------------------------------------------------------- /src/tools/snapshot.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import { z } from 'zod'; 18 | import zodToJsonSchema from 'zod-to-json-schema'; 19 | 20 | import type * as playwright from 'playwright'; 21 | import type { Tool } from './tool'; 22 | 23 | const snapshot: Tool = { 24 | capability: 'core', 25 | schema: { 26 | name: 'browser_snapshot', 27 | description: 'Capture accessibility snapshot of the current page, this is better than screenshot', 28 | inputSchema: zodToJsonSchema(z.object({})), 29 | }, 30 | 31 | handle: async context => { 32 | // Use ensureTab() instead of currentTab() to guarantee a tab exists 33 | const tab = await context.ensureTab(); 34 | return await tab.run(async () => {}, { captureSnapshot: true }); 35 | }, 36 | }; 37 | 38 | const elementSchema = z.object({ 39 | element: z.string().describe('Human-readable element description used to obtain permission to interact with the element'), 40 | ref: z.string().describe('Exact target element reference from the page snapshot'), 41 | }); 42 | 43 | const click: Tool = { 44 | capability: 'core', 45 | schema: { 46 | name: 'browser_click', 47 | description: 'Perform click on a web page', 48 | inputSchema: zodToJsonSchema(elementSchema), 49 | }, 50 | 51 | handle: async (context, params) => { 52 | const validatedParams = elementSchema.parse(params); 53 | return await context.currentTab().runAndWaitWithSnapshot(async tab => { 54 | const locator = tab.lastSnapshot().refLocator(validatedParams.ref); 55 | await locator.click(); 56 | }, { 57 | status: `Clicked "${validatedParams.element}"`, 58 | }); 59 | }, 60 | }; 61 | 62 | const dragSchema = z.object({ 63 | startElement: z.string().describe('Human-readable source element description used to obtain the permission to interact with the element'), 64 | startRef: z.string().describe('Exact source element reference from the page snapshot'), 65 | endElement: z.string().describe('Human-readable target element description used to obtain the permission to interact with the element'), 66 | endRef: z.string().describe('Exact target element reference from the page snapshot'), 67 | }); 68 | 69 | const drag: Tool = { 70 | capability: 'core', 71 | schema: { 72 | name: 'browser_drag', 73 | description: 'Perform drag and drop between two elements', 74 | inputSchema: zodToJsonSchema(dragSchema), 75 | }, 76 | 77 | handle: async (context, params) => { 78 | const validatedParams = dragSchema.parse(params); 79 | return await context.currentTab().runAndWaitWithSnapshot(async tab => { 80 | const startLocator = tab.lastSnapshot().refLocator(validatedParams.startRef); 81 | const endLocator = tab.lastSnapshot().refLocator(validatedParams.endRef); 82 | await startLocator.dragTo(endLocator); 83 | }, { 84 | status: `Dragged "${validatedParams.startElement}" to "${validatedParams.endElement}"`, 85 | }); 86 | }, 87 | }; 88 | 89 | const hover: Tool = { 90 | capability: 'core', 91 | schema: { 92 | name: 'browser_hover', 93 | description: 'Hover over element on page', 94 | inputSchema: zodToJsonSchema(elementSchema), 95 | }, 96 | 97 | handle: async (context, params) => { 98 | const validatedParams = elementSchema.parse(params); 99 | return await context.currentTab().runAndWaitWithSnapshot(async tab => { 100 | const locator = tab.lastSnapshot().refLocator(validatedParams.ref); 101 | await locator.hover(); 102 | }, { 103 | status: `Hovered over "${validatedParams.element}"`, 104 | }); 105 | }, 106 | }; 107 | 108 | const typeSchema = elementSchema.extend({ 109 | text: z.string().describe('Text to type into the element'), 110 | submit: z.boolean().optional().describe('Whether to submit entered text (press Enter after)'), 111 | slowly: z.boolean().optional().describe('Whether to type one character at a time. Useful for triggering key handlers in the page. By default entire text is filled in at once.'), 112 | }); 113 | 114 | const type: Tool = { 115 | capability: 'core', 116 | schema: { 117 | name: 'browser_type', 118 | description: 'Type text into editable element', 119 | inputSchema: zodToJsonSchema(typeSchema), 120 | }, 121 | 122 | handle: async (context, params) => { 123 | const validatedParams = typeSchema.parse(params); 124 | return await context.currentTab().runAndWaitWithSnapshot(async tab => { 125 | const locator = tab.lastSnapshot().refLocator(validatedParams.ref); 126 | if (validatedParams.slowly) 127 | await locator.pressSequentially(validatedParams.text); 128 | else 129 | await locator.fill(validatedParams.text); 130 | if (validatedParams.submit) 131 | await locator.press('Enter'); 132 | }, { 133 | status: `Typed "${validatedParams.text}" into "${validatedParams.element}"`, 134 | }); 135 | }, 136 | }; 137 | 138 | const selectOptionSchema = elementSchema.extend({ 139 | values: z.array(z.string()).describe('Array of values to select in the dropdown. This can be a single value or multiple values.'), 140 | }); 141 | 142 | const selectOption: Tool = { 143 | capability: 'core', 144 | schema: { 145 | name: 'browser_select_option', 146 | description: 'Select an option in a dropdown', 147 | inputSchema: zodToJsonSchema(selectOptionSchema), 148 | }, 149 | 150 | handle: async (context, params) => { 151 | const validatedParams = selectOptionSchema.parse(params); 152 | return await context.currentTab().runAndWaitWithSnapshot(async tab => { 153 | const locator = tab.lastSnapshot().refLocator(validatedParams.ref); 154 | await locator.selectOption(validatedParams.values); 155 | }, { 156 | status: `Selected option in "${validatedParams.element}"`, 157 | }); 158 | }, 159 | }; 160 | 161 | const screenshotSchema = z.object({ 162 | raw: z.boolean().optional().describe('Whether to return without compression (in PNG format). Default is false, which returns a JPEG image.'), 163 | }); 164 | 165 | const screenshot: Tool = { 166 | capability: 'core', 167 | schema: { 168 | name: 'browser_take_screenshot', 169 | description: `Take a screenshot of the current page. You can't perform actions based on the screenshot, use browser_snapshot for actions.`, 170 | inputSchema: zodToJsonSchema(screenshotSchema), 171 | }, 172 | 173 | handle: async (context, params) => { 174 | const validatedParams = screenshotSchema.parse(params); 175 | const tab = context.currentTab(); 176 | const options: playwright.PageScreenshotOptions = validatedParams.raw ? { type: 'png', scale: 'css' } : { type: 'jpeg', quality: 50, scale: 'css' }; 177 | const screenshot = await tab.page.screenshot(options); 178 | return { 179 | content: [{ type: 'image', data: screenshot.toString('base64'), mimeType: validatedParams.raw ? 'image/png' : 'image/jpeg' }], 180 | }; 181 | }, 182 | }; 183 | 184 | export default [ 185 | snapshot, 186 | click, 187 | drag, 188 | hover, 189 | type, 190 | selectOption, 191 | screenshot, 192 | ]; 193 | -------------------------------------------------------------------------------- /src/tools/tabs.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import { z } from 'zod'; 18 | import { zodToJsonSchema } from 'zod-to-json-schema'; 19 | 20 | import type { ToolFactory, Tool } from './tool'; 21 | 22 | const listTabs: Tool = { 23 | capability: 'tabs', 24 | schema: { 25 | name: 'browser_tab_list', 26 | description: 'List browser tabs', 27 | inputSchema: zodToJsonSchema(z.object({})), 28 | }, 29 | handle: async context => { 30 | return { 31 | content: [{ 32 | type: 'text', 33 | text: await context.listTabs(), 34 | }], 35 | }; 36 | }, 37 | }; 38 | 39 | const selectTabSchema = z.object({ 40 | index: z.number().describe('The index of the tab to select'), 41 | }); 42 | 43 | const selectTab: ToolFactory = captureSnapshot => ({ 44 | capability: 'tabs', 45 | schema: { 46 | name: 'browser_tab_select', 47 | description: 'Select a tab by index', 48 | inputSchema: zodToJsonSchema(selectTabSchema), 49 | }, 50 | handle: async (context, params) => { 51 | const validatedParams = selectTabSchema.parse(params); 52 | await context.selectTab(validatedParams.index); 53 | const currentTab = await context.ensureTab(); 54 | return await currentTab.run(async () => {}, { captureSnapshot }); 55 | }, 56 | }); 57 | 58 | const newTabSchema = z.object({ 59 | url: z.string().optional().describe('The URL to navigate to in the new tab. If not provided, the new tab will be blank.'), 60 | }); 61 | 62 | const newTab: Tool = { 63 | capability: 'tabs', 64 | schema: { 65 | name: 'browser_tab_new', 66 | description: 'Open a new tab', 67 | inputSchema: zodToJsonSchema(newTabSchema), 68 | }, 69 | handle: async (context, params) => { 70 | const validatedParams = newTabSchema.parse(params); 71 | await context.newTab(); 72 | if (validatedParams.url) 73 | await context.currentTab().navigate(validatedParams.url); 74 | return await context.currentTab().run(async () => {}, { captureSnapshot: true }); 75 | }, 76 | }; 77 | 78 | const closeTabSchema = z.object({ 79 | index: z.number().optional().describe('The index of the tab to close. Closes current tab if not provided.'), 80 | }); 81 | 82 | const closeTab: ToolFactory = captureSnapshot => ({ 83 | capability: 'tabs', 84 | schema: { 85 | name: 'browser_tab_close', 86 | description: 'Close a tab', 87 | inputSchema: zodToJsonSchema(closeTabSchema), 88 | }, 89 | handle: async (context, params) => { 90 | const validatedParams = closeTabSchema.parse(params); 91 | await context.closeTab(validatedParams.index); 92 | const currentTab = await context.currentTab(); 93 | if (currentTab) 94 | return await currentTab.run(async () => {}, { captureSnapshot }); 95 | return { 96 | content: [{ 97 | type: 'text', 98 | text: await context.listTabs(), 99 | }], 100 | }; 101 | }, 102 | }); 103 | 104 | export default (captureSnapshot: boolean) => [ 105 | listTabs, 106 | newTab, 107 | selectTab(captureSnapshot), 108 | closeTab(captureSnapshot), 109 | ]; 110 | -------------------------------------------------------------------------------- /src/tools/tool.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import type { ImageContent, TextContent } from '@modelcontextprotocol/sdk/types'; 18 | import type { JsonSchema7Type } from 'zod-to-json-schema'; 19 | import type { Context } from '../context'; 20 | 21 | export type ToolCapability = 'core' | 'tabs' | 'pdf' | 'history' | 'wait' | 'files' | 'install'; 22 | 23 | export type ToolSchema = { 24 | name: string; 25 | description: string; 26 | inputSchema: JsonSchema7Type; 27 | }; 28 | 29 | export type ToolResult = { 30 | content: (ImageContent | TextContent)[]; 31 | isError?: boolean; 32 | }; 33 | 34 | export type Tool = { 35 | capability: ToolCapability; 36 | schema: ToolSchema; 37 | handle: (context: Context, params?: Record) => Promise; 38 | }; 39 | 40 | export type ToolFactory = (snapshot: boolean) => Tool; 41 | -------------------------------------------------------------------------------- /src/tools/utils.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import type * as playwright from 'playwright'; 18 | 19 | export async function waitForCompletion(page: playwright.Page, callback: () => Promise): Promise { 20 | const requests = new Set(); 21 | let frameNavigated = false; 22 | let waitCallback: () => void = () => {}; 23 | const waitBarrier = new Promise(f => { waitCallback = f; }); 24 | 25 | const requestListener = (request: playwright.Request) => requests.add(request); 26 | const requestFinishedListener = (request: playwright.Request) => { 27 | requests.delete(request); 28 | if (!requests.size) 29 | waitCallback(); 30 | }; 31 | 32 | const frameNavigateListener = (frame: playwright.Frame) => { 33 | if (frame.parentFrame()) 34 | return; 35 | frameNavigated = true; 36 | dispose(); 37 | clearTimeout(timeout); 38 | void frame.waitForLoadState('load').then(() => { 39 | waitCallback(); 40 | }); 41 | }; 42 | 43 | const onTimeout = () => { 44 | dispose(); 45 | waitCallback(); 46 | }; 47 | 48 | page.on('request', requestListener); 49 | page.on('requestfinished', requestFinishedListener); 50 | page.on('framenavigated', frameNavigateListener); 51 | const timeout = setTimeout(onTimeout, 10000); 52 | 53 | const dispose = () => { 54 | page.off('request', requestListener); 55 | page.off('requestfinished', requestFinishedListener); 56 | page.off('framenavigated', frameNavigateListener); 57 | clearTimeout(timeout); 58 | }; 59 | 60 | try { 61 | const result = await callback(); 62 | if (!requests.size && !frameNavigated) 63 | waitCallback(); 64 | await waitBarrier; 65 | await page.evaluate(() => new Promise(f => setTimeout(f, 1000))); 66 | return result; 67 | } finally { 68 | dispose(); 69 | } 70 | } 71 | 72 | export function sanitizeForFilePath(s: string) { 73 | return s.replace(/[\x00-\x2C\x2E-\x2F\x3A-\x40\x5B-\x60\x7B-\x7F]+/g, '-'); 74 | } 75 | -------------------------------------------------------------------------------- /tests/basic.spec.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import fs from 'fs/promises'; 18 | import { test, expect } from './fixtures'; 19 | 20 | test('browser_navigate', async ({ client }) => { 21 | expect(await client.callTool({ 22 | name: 'browser_navigate', 23 | arguments: { 24 | url: 'data:text/html,TitleHello, world!', 25 | }, 26 | })).toHaveTextContent(` 27 | Navigated to data:text/html,TitleHello, world! 28 | 29 | - Page URL: data:text/html,TitleHello, world! 30 | - Page Title: Title 31 | - Page Snapshot 32 | \`\`\`yaml 33 | - text: Hello, world! 34 | \`\`\` 35 | ` 36 | ); 37 | }); 38 | 39 | test('browser_click', async ({ client }) => { 40 | await client.callTool({ 41 | name: 'browser_navigate', 42 | arguments: { 43 | url: 'data:text/html,Title', 44 | }, 45 | }); 46 | 47 | expect(await client.callTool({ 48 | name: 'browser_click', 49 | arguments: { 50 | element: 'Submit button', 51 | ref: 's1e3', 52 | }, 53 | })).toHaveTextContent(`Clicked "Submit button" 54 | 55 | - Page URL: data:text/html,Title 56 | - Page Title: Title 57 | - Page Snapshot 58 | \`\`\`yaml 59 | - button "Submit" [ref=s2e3] 60 | \`\`\` 61 | `); 62 | }); 63 | 64 | 65 | test('browser_select_option', async ({ client }) => { 66 | await client.callTool({ 67 | name: 'browser_navigate', 68 | arguments: { 69 | url: 'data:text/html,Title', 70 | }, 71 | }); 72 | 73 | expect(await client.callTool({ 74 | name: 'browser_select_option', 75 | arguments: { 76 | element: 'Select', 77 | ref: 's1e3', 78 | values: ['bar'], 79 | }, 80 | })).toHaveTextContent(`Selected option in "Select" 81 | 82 | - Page URL: data:text/html,Title 83 | - Page Title: Title 84 | - Page Snapshot 85 | \`\`\`yaml 86 | - combobox [ref=s2e3]: 87 | - option "Foo" [ref=s2e4] 88 | - option "Bar" [selected] [ref=s2e5] 89 | \`\`\` 90 | `); 91 | }); 92 | 93 | test('browser_select_option (multiple)', async ({ client }) => { 94 | await client.callTool({ 95 | name: 'browser_navigate', 96 | arguments: { 97 | url: 'data:text/html,Title', 98 | }, 99 | }); 100 | 101 | expect(await client.callTool({ 102 | name: 'browser_select_option', 103 | arguments: { 104 | element: 'Select', 105 | ref: 's1e3', 106 | values: ['bar', 'baz'], 107 | }, 108 | })).toHaveTextContent(`Selected option in "Select" 109 | 110 | - Page URL: data:text/html,Title 111 | - Page Title: Title 112 | - Page Snapshot 113 | \`\`\`yaml 114 | - listbox [ref=s2e3]: 115 | - option "Foo" [ref=s2e4] 116 | - option "Bar" [selected] [ref=s2e5] 117 | - option "Baz" [selected] [ref=s2e6] 118 | \`\`\` 119 | `); 120 | }); 121 | 122 | test('browser_file_upload', async ({ client }) => { 123 | expect(await client.callTool({ 124 | name: 'browser_navigate', 125 | arguments: { 126 | url: 'data:text/html,Title', 127 | }, 128 | })).toContainTextContent('- textbox [ref=s1e3]'); 129 | 130 | expect(await client.callTool({ 131 | name: 'browser_click', 132 | arguments: { 133 | element: 'Textbox', 134 | ref: 's1e3', 135 | }, 136 | })).toContainTextContent('There is a file chooser visible that requires browser_file_upload to be called'); 137 | 138 | const filePath = test.info().outputPath('test.txt'); 139 | await fs.writeFile(filePath, 'Hello, world!'); 140 | 141 | { 142 | const response = await client.callTool({ 143 | name: 'browser_file_upload', 144 | arguments: { 145 | paths: [filePath], 146 | }, 147 | }); 148 | 149 | expect(response).not.toContainTextContent('There is a file chooser visible that requires browser_file_upload to be called'); 150 | expect(response).toContainTextContent('textbox [ref=s3e3]: C:\\fakepath\\test.txt'); 151 | } 152 | 153 | { 154 | const response = await client.callTool({ 155 | name: 'browser_click', 156 | arguments: { 157 | element: 'Textbox', 158 | ref: 's3e3', 159 | }, 160 | }); 161 | 162 | expect(response).toContainTextContent('There is a file chooser visible that requires browser_file_upload to be called'); 163 | expect(response).toContainTextContent('button "Button" [ref=s4e4]'); 164 | } 165 | 166 | { 167 | const response = await client.callTool({ 168 | name: 'browser_click', 169 | arguments: { 170 | element: 'Button', 171 | ref: 's4e4', 172 | }, 173 | }); 174 | 175 | expect(response, 'not submitting browser_file_upload dismisses file chooser').not.toContainTextContent('There is a file chooser visible that requires browser_file_upload to be called'); 176 | } 177 | }); 178 | 179 | test('browser_type', async ({ client }) => { 180 | await client.callTool({ 181 | name: 'browser_navigate', 182 | arguments: { 183 | url: `data:text/html,`, 184 | }, 185 | }); 186 | await client.callTool({ 187 | name: 'browser_type', 188 | arguments: { 189 | element: 'textbox', 190 | ref: 's1e3', 191 | text: 'Hi!', 192 | submit: true, 193 | }, 194 | }); 195 | const resource = await client.readResource({ 196 | uri: 'browser://console', 197 | }); 198 | expect(resource.contents).toEqual([{ 199 | uri: 'browser://console', 200 | mimeType: 'text/plain', 201 | text: '[LOG] Key pressed: Enter , Text: Hi!', 202 | }]); 203 | }); 204 | 205 | test('browser_type (slowly)', async ({ client }) => { 206 | await client.callTool({ 207 | name: 'browser_navigate', 208 | arguments: { 209 | url: `data:text/html,`, 210 | }, 211 | }); 212 | await client.callTool({ 213 | name: 'browser_type', 214 | arguments: { 215 | element: 'textbox', 216 | ref: 's1e3', 217 | text: 'Hi!', 218 | submit: true, 219 | slowly: true, 220 | }, 221 | }); 222 | const resource = await client.readResource({ 223 | uri: 'browser://console', 224 | }); 225 | expect(resource.contents).toEqual([{ 226 | uri: 'browser://console', 227 | mimeType: 'text/plain', 228 | text: [ 229 | '[LOG] Key pressed: H Text: ', 230 | '[LOG] Key pressed: i Text: H', 231 | '[LOG] Key pressed: ! Text: Hi', 232 | '[LOG] Key pressed: Enter Text: Hi!', 233 | ].join('\n'), 234 | }]); 235 | }); 236 | -------------------------------------------------------------------------------- /tests/capabilities.spec.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import { test, expect } from './fixtures'; 18 | 19 | test('test snapshot tool list', async ({ client }) => { 20 | const { tools } = await client.listTools(); 21 | expect(new Set(tools.map(t => t.name))).toEqual(new Set([ 22 | 'browser_click', 23 | 'browser_drag', 24 | 'browser_file_upload', 25 | 'browser_hover', 26 | 'browser_select_option', 27 | 'browser_type', 28 | 'browser_close', 29 | 'browser_install', 30 | 'browser_navigate_back', 31 | 'browser_navigate_forward', 32 | 'browser_navigate', 33 | 'browser_pdf_save', 34 | 'browser_press_key', 35 | 'browser_snapshot', 36 | 'browser_tab_close', 37 | 'browser_tab_list', 38 | 'browser_tab_new', 39 | 'browser_tab_select', 40 | 'browser_take_screenshot', 41 | 'browser_wait', 42 | ])); 43 | }); 44 | 45 | test('test vision tool list', async ({ visionClient }) => { 46 | const { tools: visionTools } = await visionClient.listTools(); 47 | expect(new Set(visionTools.map(t => t.name))).toEqual(new Set([ 48 | 'browser_close', 49 | 'browser_file_upload', 50 | 'browser_install', 51 | 'browser_navigate_back', 52 | 'browser_navigate_forward', 53 | 'browser_navigate', 54 | 'browser_pdf_save', 55 | 'browser_press_key', 56 | 'browser_screen_capture', 57 | 'browser_screen_click', 58 | 'browser_screen_drag', 59 | 'browser_screen_move_mouse', 60 | 'browser_screen_type', 61 | 'browser_tab_close', 62 | 'browser_tab_list', 63 | 'browser_tab_new', 64 | 'browser_tab_select', 65 | 'browser_wait', 66 | ])); 67 | }); 68 | 69 | test('test resources list', async ({ client }) => { 70 | const { resources } = await client.listResources(); 71 | expect(resources).toEqual([ 72 | expect.objectContaining({ 73 | uri: 'browser://console', 74 | mimeType: 'text/plain', 75 | }), 76 | ]); 77 | }); 78 | 79 | test('test capabilities', async ({ startClient }) => { 80 | const client = await startClient({ 81 | args: ['--caps="core"'], 82 | }); 83 | const { tools } = await client.listTools(); 84 | const toolNames = tools.map(t => t.name); 85 | expect(toolNames).not.toContain('browser_file_upload'); 86 | expect(toolNames).not.toContain('browser_pdf_save'); 87 | expect(toolNames).not.toContain('browser_screen_capture'); 88 | expect(toolNames).not.toContain('browser_screen_click'); 89 | expect(toolNames).not.toContain('browser_screen_drag'); 90 | expect(toolNames).not.toContain('browser_screen_move_mouse'); 91 | expect(toolNames).not.toContain('browser_screen_type'); 92 | }); 93 | -------------------------------------------------------------------------------- /tests/cdp.spec.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import { test, expect } from './fixtures'; 18 | 19 | test('cdp server', async ({ cdpEndpoint, startClient }) => { 20 | const client = await startClient({ args: [`--cdp-endpoint=${cdpEndpoint}`] }); 21 | expect(await client.callTool({ 22 | name: 'browser_navigate', 23 | arguments: { 24 | url: 'data:text/html,TitleHello, world!', 25 | }, 26 | })).toHaveTextContent(` 27 | Navigated to data:text/html,TitleHello, world! 28 | 29 | - Page URL: data:text/html,TitleHello, world! 30 | - Page Title: Title 31 | - Page Snapshot 32 | \`\`\`yaml 33 | - text: Hello, world! 34 | \`\`\` 35 | ` 36 | ); 37 | }); 38 | -------------------------------------------------------------------------------- /tests/console.spec.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import { test, expect } from './fixtures'; 18 | 19 | test('browser://console', async ({ client }) => { 20 | await client.callTool({ 21 | name: 'browser_navigate', 22 | arguments: { 23 | url: 'data:text/html,', 24 | }, 25 | }); 26 | 27 | const resource = await client.readResource({ 28 | uri: 'browser://console', 29 | }); 30 | expect(resource.contents).toEqual([{ 31 | uri: 'browser://console', 32 | mimeType: 'text/plain', 33 | text: '[LOG] Hello, world!\n[ERROR] Error', 34 | }]); 35 | }); 36 | -------------------------------------------------------------------------------- /tests/fixtures.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import path from 'path'; 18 | import { chromium } from 'playwright'; 19 | 20 | import { test as baseTest, expect as baseExpect } from '@playwright/test'; 21 | import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js'; 22 | import { Client } from '@modelcontextprotocol/sdk/client/index.js'; 23 | 24 | type Fixtures = { 25 | client: Client; 26 | visionClient: Client; 27 | startClient: (options?: { args?: string[] }) => Promise; 28 | wsEndpoint: string; 29 | cdpEndpoint: string; 30 | }; 31 | 32 | export const test = baseTest.extend({ 33 | 34 | client: async ({ startClient }, use) => { 35 | await use(await startClient()); 36 | }, 37 | 38 | visionClient: async ({ startClient }, use) => { 39 | await use(await startClient({ args: ['--vision'] })); 40 | }, 41 | 42 | startClient: async ({ }, use, testInfo) => { 43 | const userDataDir = testInfo.outputPath('user-data-dir'); 44 | let client: StdioClientTransport | undefined; 45 | 46 | use(async options => { 47 | const args = ['--headless', '--user-data-dir', userDataDir]; 48 | if (options?.args) 49 | args.push(...options.args); 50 | const transport = new StdioClientTransport({ 51 | command: 'node', 52 | args: [path.join(__dirname, '../cli.js'), ...args], 53 | }); 54 | const client = new Client({ name: 'test', version: '1.0.0' }); 55 | await client.connect(transport); 56 | await client.ping(); 57 | return client; 58 | }); 59 | 60 | await client?.close(); 61 | }, 62 | 63 | wsEndpoint: async ({ }, use) => { 64 | const browserServer = await chromium.launchServer(); 65 | await use(browserServer.wsEndpoint()); 66 | await browserServer.close(); 67 | }, 68 | 69 | cdpEndpoint: async ({ }, use, testInfo) => { 70 | const port = 3200 + (+process.env.TEST_PARALLEL_INDEX!); 71 | const browser = await chromium.launchPersistentContext(testInfo.outputPath('user-data-dir'), { 72 | channel: 'chrome', 73 | args: [`--remote-debugging-port=${port}`], 74 | }); 75 | await use(`http://localhost:${port}`); 76 | await browser.close(); 77 | }, 78 | }); 79 | 80 | type Response = Awaited>; 81 | 82 | export const expect = baseExpect.extend({ 83 | toHaveTextContent(response: Response, content: string | RegExp) { 84 | const isNot = this.isNot; 85 | try { 86 | const text = (response.content as any)[0].text; 87 | if (typeof content === 'string') { 88 | if (isNot) 89 | baseExpect(text.trim()).not.toBe(content.trim()); 90 | else 91 | baseExpect(text.trim()).toBe(content.trim()); 92 | } else { 93 | if (isNot) 94 | baseExpect(text).not.toMatch(content); 95 | else 96 | baseExpect(text).toMatch(content); 97 | } 98 | } catch (e) { 99 | return { 100 | pass: isNot, 101 | message: () => e.message, 102 | }; 103 | } 104 | return { 105 | pass: !isNot, 106 | message: () => ``, 107 | }; 108 | }, 109 | 110 | toContainTextContent(response: Response, content: string | string[]) { 111 | const isNot = this.isNot; 112 | try { 113 | content = Array.isArray(content) ? content : [content]; 114 | const texts = (response.content as any).map(c => c.text); 115 | for (let i = 0; i < texts.length; i++) { 116 | if (isNot) 117 | expect(texts[i]).not.toContain(content[i]); 118 | else 119 | expect(texts[i]).toContain(content[i]); 120 | } 121 | } catch (e) { 122 | return { 123 | pass: isNot, 124 | message: () => e.message, 125 | }; 126 | } 127 | return { 128 | pass: !isNot, 129 | message: () => ``, 130 | }; 131 | }, 132 | }); 133 | -------------------------------------------------------------------------------- /tests/httpApi.spec.ts: -------------------------------------------------------------------------------- 1 | import { test, expect } from '@playwright/test'; 2 | import { spawn, ChildProcessWithoutNullStreams } from 'child_process'; 3 | import getPort from 'get-port'; // Utility to find an available port 4 | 5 | // Helper function to start the server with --http-port 6 | async function startServer(): Promise<{ serverProcess: ChildProcessWithoutNullStreams, port: number, url: string }> { 7 | const port = await getPort(); 8 | const url = `http://localhost:${port}`; 9 | console.log(`Starting HTTP API server on port ${port}...`); 10 | 11 | // Use node directly to run the compiled cli.js 12 | const serverProcess = spawn('node', ['cli.js', '--http-port', String(port)], { 13 | stdio: ['pipe', 'pipe', 'pipe'], // Pipe all streams to satisfy ChildProcessWithoutNullStreams type 14 | detached: true, // Allows killing the process group 15 | }); 16 | 17 | let stderrOutput = ''; 18 | serverProcess.stderr.on('data', (data) => { 19 | stderrOutput += data.toString(); 20 | console.error(`[Server STDERR]: ${data}`); 21 | }); 22 | 23 | // Wait for the server to log the listening message or exit 24 | await new Promise((resolve, reject) => { 25 | const handleData = (data: Buffer) => { 26 | const output = data.toString(); 27 | console.log(`[Server STDOUT]: ${output}`); 28 | if (output.includes(`HTTP API server listening on port ${port}`)) { 29 | serverProcess.stdout.removeListener('data', handleData); // Clean up listener 30 | serverProcess.stderr.removeListener('data', handleData); 31 | resolve(); 32 | } 33 | }; 34 | 35 | const handleExit = (code: number | null) => { 36 | reject(new Error(`Server process exited prematurely with code ${code}. Stderr: ${stderrOutput}`)); 37 | }; 38 | 39 | serverProcess.stdout.on('data', handleData); 40 | serverProcess.stderr.on('data', handleData); // Also listen on stderr for potential errors during startup 41 | serverProcess.once('exit', handleExit); 42 | 43 | // Timeout for server start 44 | setTimeout(() => { 45 | serverProcess.stdout.removeListener('data', handleData); 46 | serverProcess.stderr.removeListener('data', handleData); 47 | serverProcess.removeListener('exit', handleExit); 48 | reject(new Error(`Server failed to start within timeout. Stderr: ${stderrOutput}`)); 49 | }, 15000); // 15 seconds timeout 50 | }); 51 | 52 | console.log(`Server started successfully on ${url}`); 53 | return { serverProcess, port, url }; 54 | } 55 | 56 | // Test suite for HTTP API 57 | test.describe('HTTP API', () => { 58 | let serverProcess: ChildProcessWithoutNullStreams; 59 | let serverUrl: string; 60 | 61 | // Start server before all tests in this suite 62 | test.beforeAll(async () => { 63 | const { serverProcess: proc, url } = await startServer(); 64 | serverProcess = proc; 65 | serverUrl = url; 66 | }); 67 | 68 | // Stop server after all tests in this suite 69 | test.afterAll(async () => { 70 | console.log('Stopping HTTP API server...'); 71 | if (serverProcess && !serverProcess.killed) { 72 | // Kill the process group to ensure child processes are also terminated 73 | process.kill(-serverProcess.pid!, 'SIGINT'); 74 | await new Promise(resolve => serverProcess.once('close', resolve)); 75 | console.log('Server stopped.'); 76 | } 77 | }); 78 | 79 | // Test case 1: Basic GET request for browser_tab_list 80 | test('should handle GET /tools/browser_tab_list', async () => { 81 | const response = await fetch(`${serverUrl}/tools/browser_tab_list`); 82 | expect(response.status).toBe(200); 83 | 84 | const body = await response.json(); 85 | expect(body.success).toBe(true); 86 | expect(body.result).toBeDefined(); 87 | // Since it's a fresh server, expect "No tabs open" or an initial about:blank tab 88 | expect(body.result.content[0].type).toBe('text'); 89 | // The exact text might vary slightly, check for key parts 90 | expect(body.result.content[0].text).toContain('tabs'); 91 | }); 92 | 93 | // Test case 2: Basic POST request for browser_navigate 94 | test('should handle POST /tools/browser_navigate', async () => { 95 | const sessionId = 'test-nav-session'; 96 | const targetUrl = 'https://example.com'; 97 | 98 | const response = await fetch(`${serverUrl}/tools/browser_navigate`, { 99 | method: 'POST', 100 | headers: { 101 | 'Content-Type': 'application/json', 102 | 'Session-Id': sessionId, 103 | }, 104 | body: JSON.stringify({ url: targetUrl }), 105 | }); 106 | 107 | expect(response.status).toBe(200); 108 | const body = await response.json(); 109 | expect(body.success).toBe(true); 110 | expect(body.result).toBeDefined(); 111 | expect(body.result.content[0].type).toBe('text'); 112 | expect(body.result.content[0].text).toContain(`Navigated to ${targetUrl}`); 113 | expect(body.result.content[0].text).toContain('Example Domain'); // Check for page title/content 114 | }); 115 | 116 | // Test case 3: POST request with session reuse 117 | test('should reuse session for subsequent POST requests', async () => { 118 | const sessionId = 'test-reuse-session'; 119 | const url1 = 'https://example.com'; 120 | const url2 = 'about:blank'; 121 | 122 | // First request (creates session) 123 | await fetch(`${serverUrl}/tools/browser_navigate`, { 124 | method: 'POST', 125 | headers: { 'Content-Type': 'application/json', 'Session-Id': sessionId }, 126 | body: JSON.stringify({ url: url1 }), 127 | }); 128 | 129 | // Second request (reuses session) 130 | const response = await fetch(`${serverUrl}/tools/browser_navigate`, { 131 | method: 'POST', 132 | headers: { 'Content-Type': 'application/json', 'Session-Id': sessionId }, 133 | body: JSON.stringify({ url: url2 }), 134 | }); 135 | 136 | expect(response.status).toBe(200); 137 | const body = await response.json(); 138 | expect(body.success).toBe(true); 139 | expect(body.result.content[0].text).toContain(`Navigated to ${url2}`); 140 | // Check that it's not the previous page's content 141 | expect(body.result.content[0].text).not.toContain('Example Domain'); 142 | }); 143 | 144 | // Test case 4: Error handling for non-existent tool 145 | test('should return error for non-existent tool', async () => { 146 | const response = await fetch(`${serverUrl}/tools/invalid_tool_name`, { 147 | method: 'POST', 148 | headers: { 'Content-Type': 'application/json' }, 149 | body: JSON.stringify({}), 150 | }); 151 | 152 | expect(response.status).toBe(500); // Or 404 if we implement specific check 153 | const body = await response.json(); 154 | expect(body.success).toBe(false); 155 | expect(body.error).toContain('Tool "invalid_tool_name" not found'); 156 | }); 157 | 158 | // Test case 5: Error handling for missing required parameter 159 | test('should return error for missing required parameter', async () => { 160 | const response = await fetch(`${serverUrl}/tools/browser_navigate`, { 161 | method: 'POST', 162 | headers: { 'Content-Type': 'application/json' }, 163 | body: JSON.stringify({}), // Missing 'url' 164 | }); 165 | 166 | expect(response.status).toBe(500); 167 | const body = await response.json(); 168 | expect(body.success).toBe(false); 169 | expect(body.error).toContain('Required'); // Check for Zod validation error message 170 | // Make the check less sensitive to whitespace around brackets/colons 171 | expect(body.error).toContain('"path":'); 172 | expect(body.error).toContain('"url"'); 173 | }); 174 | 175 | // Add more tests here for: 176 | // - Different tools (click, type, snapshot) 177 | // - Different sessions interacting concurrently (might be harder to test reliably) 178 | // - GET /tools/browser_tab_list with specific session 179 | // - Error cases (invalid ref, etc.) 180 | 181 | }); -------------------------------------------------------------------------------- /tests/iframes.spec.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import { test, expect } from './fixtures'; 18 | 19 | test('stitched aria frames', async ({ client }) => { 20 | expect(await client.callTool({ 21 | name: 'browser_navigate', 22 | arguments: { 23 | url: `data:text/html,

Hello

`, 24 | }, 25 | })).toContainTextContent(` 26 | \`\`\`yaml 27 | - heading "Hello" [level=1] [ref=s1e3] 28 | - iframe [ref=s1e4]: 29 | - button "World" [ref=f1s1e3] 30 | - main [ref=f1s1e4]: 31 | - iframe [ref=f1s1e5]: 32 | - paragraph [ref=f2s1e3]: Nested 33 | \`\`\` 34 | `); 35 | 36 | expect(await client.callTool({ 37 | name: 'browser_click', 38 | arguments: { 39 | element: 'World', 40 | ref: 'f1s1e3', 41 | }, 42 | })).toContainTextContent('Clicked "World"'); 43 | }); 44 | -------------------------------------------------------------------------------- /tests/launch.spec.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import { test, expect } from './fixtures'; 18 | 19 | test('test reopen browser', async ({ client }) => { 20 | await client.callTool({ 21 | name: 'browser_navigate', 22 | arguments: { 23 | url: 'data:text/html,TitleHello, world!', 24 | }, 25 | }); 26 | 27 | expect(await client.callTool({ 28 | name: 'browser_close', 29 | })).toHaveTextContent('Page closed'); 30 | 31 | expect(await client.callTool({ 32 | name: 'browser_navigate', 33 | arguments: { 34 | url: 'data:text/html,TitleHello, world!', 35 | }, 36 | })).toHaveTextContent(` 37 | Navigated to data:text/html,TitleHello, world! 38 | 39 | - Page URL: data:text/html,TitleHello, world! 40 | - Page Title: Title 41 | - Page Snapshot 42 | \`\`\`yaml 43 | - text: Hello, world! 44 | \`\`\` 45 | `); 46 | }); 47 | 48 | test('executable path', async ({ startClient }) => { 49 | const client = await startClient({ args: [`--executable-path=bogus`] }); 50 | const response = await client.callTool({ 51 | name: 'browser_navigate', 52 | arguments: { 53 | url: 'data:text/html,TitleHello, world!', 54 | }, 55 | }); 56 | expect(response).toContainTextContent(`executable doesn't exist`); 57 | }); 58 | -------------------------------------------------------------------------------- /tests/pdf.spec.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import { test, expect } from './fixtures'; 18 | 19 | test('save as pdf unavailable', async ({ startClient }) => { 20 | const client = await startClient({ args: ['--caps="no-pdf"'] }); 21 | await client.callTool({ 22 | name: 'browser_navigate', 23 | arguments: { 24 | url: 'data:text/html,TitleHello, world!', 25 | }, 26 | }); 27 | 28 | expect(await client.callTool({ 29 | name: 'browser_pdf_save', 30 | })).toHaveTextContent(/Tool \"browser_pdf_save\" not found/); 31 | }); 32 | 33 | test('save as pdf', async ({ client }) => { 34 | expect(await client.callTool({ 35 | name: 'browser_navigate', 36 | arguments: { 37 | url: 'data:text/html,TitleHello, world!', 38 | }, 39 | })).toHaveTextContent(` 40 | Navigated to data:text/html,TitleHello, world! 41 | 42 | - Page URL: data:text/html,TitleHello, world! 43 | - Page Title: Title 44 | - Page Snapshot 45 | \`\`\`yaml 46 | - text: Hello, world! 47 | \`\`\` 48 | ` 49 | ); 50 | 51 | const response = await client.callTool({ 52 | name: 'browser_pdf_save', 53 | }); 54 | expect(response).toHaveTextContent(/^Saved as.*page-[^:]+.pdf$/); 55 | }); 56 | -------------------------------------------------------------------------------- /tests/sse.spec.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import { spawn } from 'node:child_process'; 18 | import path from 'node:path'; 19 | import { test } from './fixtures'; 20 | 21 | test('sse transport', async () => { 22 | const cp = spawn('node', [path.join(__dirname, '../cli.js'), '--port', '0'], { stdio: 'pipe' }); 23 | try { 24 | let stdout = ''; 25 | const url = await new Promise(resolve => cp.stdout?.on('data', data => { 26 | stdout += data.toString(); 27 | const match = stdout.match(/Listening on (http:\/\/.*)/); 28 | if (match) 29 | resolve(match[1]); 30 | })); 31 | 32 | // need dynamic import b/c of some ESM nonsense 33 | const { SSEClientTransport } = await import('@modelcontextprotocol/sdk/client/sse.js'); 34 | const { Client } = await import('@modelcontextprotocol/sdk/client/index.js'); 35 | const transport = new SSEClientTransport(new URL(url)); 36 | const client = new Client({ name: 'test', version: '1.0.0' }); 37 | await client.connect(transport); 38 | await client.ping(); 39 | } finally { 40 | cp.kill(); 41 | } 42 | }); 43 | -------------------------------------------------------------------------------- /tests/tabs.spec.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import { chromium } from 'playwright'; 18 | 19 | import { test, expect } from './fixtures'; 20 | 21 | import type { Client } from '@modelcontextprotocol/sdk/client/index.js'; 22 | 23 | async function createTab(client: Client, title: string, body: string) { 24 | return await client.callTool({ 25 | name: 'browser_tab_new', 26 | arguments: { 27 | url: `data:text/html,${title}${body}`, 28 | }, 29 | }); 30 | } 31 | 32 | test('create new tab', async ({ client }) => { 33 | expect(await createTab(client, 'Tab one', 'Body one')).toHaveTextContent(` 34 | Open tabs: 35 | - 1: [] (about:blank) 36 | - 2: (current) [Tab one] (data:text/html,Tab oneBody one) 37 | 38 | Current tab: 39 | - Page URL: data:text/html,Tab oneBody one 40 | - Page Title: Tab one 41 | - Page Snapshot 42 | \`\`\`yaml 43 | - text: Body one 44 | \`\`\``); 45 | 46 | expect(await createTab(client, 'Tab two', 'Body two')).toHaveTextContent(` 47 | Open tabs: 48 | - 1: [] (about:blank) 49 | - 2: [Tab one] (data:text/html,Tab oneBody one) 50 | - 3: (current) [Tab two] (data:text/html,Tab twoBody two) 51 | 52 | Current tab: 53 | - Page URL: data:text/html,Tab twoBody two 54 | - Page Title: Tab two 55 | - Page Snapshot 56 | \`\`\`yaml 57 | - text: Body two 58 | \`\`\``); 59 | }); 60 | 61 | test('select tab', async ({ client }) => { 62 | await createTab(client, 'Tab one', 'Body one'); 63 | await createTab(client, 'Tab two', 'Body two'); 64 | expect(await client.callTool({ 65 | name: 'browser_tab_select', 66 | arguments: { 67 | index: 2, 68 | }, 69 | })).toHaveTextContent(` 70 | Open tabs: 71 | - 1: [] (about:blank) 72 | - 2: (current) [Tab one] (data:text/html,Tab oneBody one) 73 | - 3: [Tab two] (data:text/html,Tab twoBody two) 74 | 75 | Current tab: 76 | - Page URL: data:text/html,Tab oneBody one 77 | - Page Title: Tab one 78 | - Page Snapshot 79 | \`\`\`yaml 80 | - text: Body one 81 | \`\`\``); 82 | }); 83 | 84 | test('close tab', async ({ client }) => { 85 | await createTab(client, 'Tab one', 'Body one'); 86 | await createTab(client, 'Tab two', 'Body two'); 87 | expect(await client.callTool({ 88 | name: 'browser_tab_close', 89 | arguments: { 90 | index: 3, 91 | }, 92 | })).toHaveTextContent(` 93 | Open tabs: 94 | - 1: [] (about:blank) 95 | - 2: (current) [Tab one] (data:text/html,Tab oneBody one) 96 | 97 | Current tab: 98 | - Page URL: data:text/html,Tab oneBody one 99 | - Page Title: Tab one 100 | - Page Snapshot 101 | \`\`\`yaml 102 | - text: Body one 103 | \`\`\``); 104 | }); 105 | 106 | test('reuse first tab when navigating', async ({ startClient, cdpEndpoint }) => { 107 | const browser = await chromium.connectOverCDP(cdpEndpoint); 108 | const [context] = browser.contexts(); 109 | const pages = context.pages(); 110 | 111 | const client = await startClient({ args: [`--cdp-endpoint=${cdpEndpoint}`] }); 112 | await client.callTool({ 113 | name: 'browser_navigate', 114 | arguments: { 115 | url: 'data:text/html,TitleBody', 116 | }, 117 | }); 118 | 119 | expect(pages.length).toBe(1); 120 | expect(await pages[0].title()).toBe('Title'); 121 | }); 122 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ESNext", 4 | "skipLibCheck": true, 5 | "esModuleInterop": true, 6 | "moduleResolution": "node", 7 | "strict": true, 8 | "module": "CommonJS", 9 | "outDir": "./lib" 10 | }, 11 | "include": [ 12 | "src", 13 | ], 14 | } 15 | -------------------------------------------------------------------------------- /utils/copyright.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | --------------------------------------------------------------------------------