├── .github
    └── workflows
    │   ├── ci.yml
    │   └── publish.yml
├── .gitignore
├── .npmignore
├── LICENSE
├── PLAN.md
├── README.md
├── SECURITY.md
├── cli.js
├── eslint.config.mjs
├── index.d.ts
├── index.js
├── package-lock.json
├── package.json
├── playwright.config.ts
├── pnpm-lock.yaml
├── src
    ├── context.ts
    ├── httpServer.ts
    ├── index.ts
    ├── program.ts
    ├── resources
    │   ├── console.ts
    │   └── resource.ts
    ├── server.ts
    └── tools
    │   ├── common.ts
    │   ├── extractContent.ts
    │   ├── files.ts
    │   ├── install.ts
    │   ├── keyboard.ts
    │   ├── navigate.ts
    │   ├── pdf.ts
    │   ├── screen.ts
    │   ├── snapshot.ts
    │   ├── tabs.ts
    │   ├── tool.ts
    │   └── utils.ts
├── tests
    ├── basic.spec.ts
    ├── capabilities.spec.ts
    ├── cdp.spec.ts
    ├── console.spec.ts
    ├── fixtures.ts
    ├── httpApi.spec.ts
    ├── iframes.spec.ts
    ├── launch.spec.ts
    ├── pdf.spec.ts
    ├── sse.spec.ts
    └── tabs.spec.ts
├── tsconfig.json
└── utils
    └── copyright.js


/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | jobs:
10 |   build-and-test:
11 |     runs-on: ubuntu-latest
12 | 
13 |     steps:
14 |     - uses: actions/checkout@v4
15 | 
16 |     - name: Use Node.js 18
17 |       uses: actions/setup-node@v4
18 |       with:
19 |         node-version: '18'
20 |         cache: 'npm'
21 | 
22 |     - name: Install dependencies
23 |       run: npm ci
24 | 
25 |     - name: Run linting
26 |       run: npm run lint
27 | 
28 |     - name: Build
29 |       run: npm run build
30 | 
31 |     - name: Install Playwright browsers
32 |       run: npx playwright install --with-deps
33 | 
34 |     - name: Run tests
35 |       run: npm test
36 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish
 2 | on:
 3 |   release:
 4 |     types: [published]
 5 | jobs:
 6 |   publish-npm:
 7 |     runs-on: ubuntu-latest
 8 |     permissions:
 9 |       contents: read
10 |       id-token: write
11 |     steps:
12 |       - uses: actions/checkout@v4
13 |       - uses: actions/setup-node@v4
14 |         with:
15 |           node-version: 18
16 |           registry-url: https://registry.npmjs.org/
17 |       - run: npm ci
18 |       - run: npm run build
19 |       - run: npm run lint
20 |       - run: npm run test
21 |       - run: npm publish --provenance
22 |         env:
23 |           NODE_AUTH_TOKEN: ${{secrets.NPM_TOKEN}}
24 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | lib/
2 | node_modules/
3 | test-results/
4 | 
5 | 
6 | dify-docs/
7 | .aider*
8 | .prompt
9 | 


--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
1 | **/*
2 | README.md
3 | LICENSE
4 | !lib/**/*.js
5 | !cli.js
6 | !index.*
7 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Portions Copyright (c) Microsoft Corporation.
190 |    Portions Copyright 2017 Google Inc.
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/PLAN.md:
--------------------------------------------------------------------------------
  1 | # Playwright MCP 改造计划
  2 | 
  3 | ## 项目背景
  4 | 
  5 | Playwright MCP 是一个 Model Context Protocol (MCP) 服务器，提供基于 Playwright 的浏览器自动化能力。目前，该服务器支持两种通信方式：
  6 | 
  7 | 1. **Stdio 传输**：通过标准输入/输出进行通信（默认模式）
  8 | 2. **SSE 传输**：当使用 `--port` 参数时，启动一个基于 HTTP 的 Server-Sent Events (SSE) 服务
  9 | 
 10 | ## 改造目标
 11 | 
 12 | 本次改造的主要目标是：
 13 | 
 14 | 1. **增加标准 HTTP API**：实现一个基于请求/响应模式的 HTTP API，允许客户端通过发送单个 HTTP 请求来执行 MCP 工具，并直接在 HTTP 响应中接收结果
 15 | 2. **保持现有功能**：确保现有的 Stdio 和 SSE 传输方式继续正常工作
 16 | 
 17 | ## 技术方案
 18 | 
 19 | ### 1. 命令行参数
 20 | 
 21 | 添加一个新的命令行参数 `--http-port <port>`，用于指定 HTTP API 的监听端口：
 22 | 
 23 | ```
 24 | npx playwright-mcp-bypass@latest --http-port 8080
 25 | ```
 26 | 
 27 | ### 2. HTTP API 设计
 28 | 
 29 | #### 端点结构
 30 | 
 31 | - 基本路径：`/tools/{tool_name}`
 32 | - 例如：
 33 |   - `/tools/browser_navigate`
 34 |   - `/tools/browser_click`
 35 |   - `/tools/browser_type`
 36 | 
 37 | #### HTTP 方法
 38 | 
 39 | - 主要使用 `POST` 方法执行工具操作
 40 | - 可选：对于只读操作（如 `browser_tab_list`）可以支持 `GET` 方法
 41 | 
 42 | #### 请求格式
 43 | 
 44 | - Content-Type: `application/json`
 45 | - 请求体：包含工具所需的参数，格式为 JSON
 46 | 
 47 | 示例（调用 `browser_navigate`）：
 48 | ```json
 49 | {
 50 |   "url": "https://example.com"
 51 | }
 52 | ```
 53 | 
 54 | 示例（调用 `browser_click`）：
 55 | ```json
 56 | {
 57 |   "element": "Login Button",
 58 |   "ref": "button#login"
 59 | }
 60 | ```
 61 | 
 62 | #### 响应格式
 63 | 
 64 | - Content-Type: `application/json`
 65 | - 成功响应（HTTP 200）：
 66 |   ```json
 67 |   {
 68 |     "success": true,
 69 |     "result": { ... } // 工具执行结果
 70 |   }
 71 |   ```
 72 | - 错误响应（HTTP 4xx/5xx）：
 73 |   ```json
 74 |   {
 75 |     "success": false,
 76 |     "error": "错误信息"
 77 |   }
 78 |   ```
 79 | 
 80 | ### 3. 实现步骤
 81 | 
 82 | #### 3.1 修改 `src/program.ts`
 83 | 
 84 | 1. 添加新的命令行选项 `--http-port <port>`：
 85 |    ```typescript
 86 |    .option('--http-port <port>', 'Port to listen on for HTTP API.')
 87 |    ```
 88 | 
 89 | 2. 引入 Koa.js 及相关中间件：
 90 |   ```typescript
 91 |   import Koa from 'koa';
 92 |   import Router from '@koa/router';
 93 |   import bodyParser from 'koa-bodyparser';
 94 |   import cors from '@koa/cors';
 95 |   import http from 'http'; // 仍然需要 http 来创建服务器实例
 96 |   ```
 97 | 
 98 | 3. 创建新的函数 `startHttpServer`，使用 Koa.js：
 99 |   ```typescript
100 |   async function startHttpServer(port: number, serverList: ServerList) {
101 |     // 会话管理
102 |     const sessions = new Map<string, Server>();
103 |     const sessionTimers = new Map<string, NodeJS.Timeout>(); // 用于会话超时
104 |     const sessionTimeout = 30 * 60 * 1000; // 30 分钟
105 | 
106 |     const app = new Koa();
107 |     const router = new Router();
108 | 
109 |     // 中间件
110 |     app.use(cors({
111 |       allowHeaders: ['Content-Type', 'Session-Id'], // 允许 Session-Id 头
112 |       exposeHeaders: [], // 根据需要暴露头
113 |     }));
114 |     app.use(bodyParser());
115 | 
116 |     // 会话处理中间件
117 |     app.use(async (ctx, next) => {
118 |       const sessionId = ctx.get('session-id') || 'default';
119 |       let server = sessions.get(sessionId);
120 | 
121 |       if (!server) {
122 |         console.log(`Creating new session: ${sessionId}`);
123 |         server = await serverList.create();
124 |         sessions.set(sessionId, server);
125 |       } else {
126 |         console.log(`Reusing session: ${sessionId}`);
127 |       }
128 | 
129 |       // 更新会话超时
130 |       if (sessionTimers.has(sessionId)) {
131 |         clearTimeout(sessionTimers.get(sessionId)!);
132 |       }
133 |       const timer = setTimeout(async () => {
134 |         console.log(`Session timed out: ${sessionId}`);
135 |         const serverToClose = sessions.get(sessionId);
136 |         if (serverToClose) {
137 |           await serverList.close(serverToClose);
138 |           sessions.delete(sessionId);
139 |         }
140 |         sessionTimers.delete(sessionId);
141 |       }, sessionTimeout);
142 |       sessionTimers.set(sessionId, timer);
143 | 
144 |       ctx.state.server = server; // 将 server 实例传递给后续中间件/路由
145 |       ctx.state.sessionId = sessionId;
146 |       await next();
147 |     });
148 | 
149 |     // 路由定义
150 |     router.post('/tools/:toolName', async (ctx) => {
151 |       const { toolName } = ctx.params;
152 |       const params = ctx.request.body || {};
153 |       const server: Server = ctx.state.server;
154 | 
155 |       try {
156 |         // 可选：检查工具是否存在
157 |         const tools = await server.listTools();
158 |         const toolExists = tools.tools.some(tool => tool.name === toolName);
159 |         if (!toolExists) {
160 |           ctx.status = 404;
161 |           ctx.body = { success: false, error: `Tool "${toolName}" not found` };
162 |           return;
163 |         }
164 | 
165 |         console.log(`Calling tool "${toolName}" for session ${ctx.state.sessionId} with params:`, params);
166 |         const result = await server.callTool(toolName, params);
167 |         ctx.status = 200;
168 |         ctx.body = { success: true, result };
169 |       } catch (error: any) {
170 |         console.error(`Error calling tool "${toolName}" for session ${ctx.state.sessionId}:`, error);
171 |         ctx.status = 500;
172 |         ctx.body = { success: false, error: String(error.message || error) };
173 |       }
174 |     });
175 | 
176 |     // 特殊处理 GET /tools/browser_tab_list
177 |     router.get('/tools/browser_tab_list', async (ctx) => {
178 |       const server: Server = ctx.state.server;
179 |       try {
180 |         console.log(`Calling tool "browser_tab_list" for session ${ctx.state.sessionId}`);
181 |         const result = await server.callTool('browser_tab_list', {});
182 |         ctx.status = 200;
183 |         ctx.body = { success: true, result };
184 |       } catch (error: any) {
185 |         console.error(`Error calling tool "browser_tab_list" for session ${ctx.state.sessionId}:`, error);
186 |         ctx.status = 500;
187 |         ctx.body = { success: false, error: String(error.message || error) };
188 |       }
189 |     });
190 | 
191 |     app.use(router.routes()).use(router.allowedMethods());
192 | 
193 |     // 启动服务器
194 |     const httpServer = http.createServer(app.callback());
195 | 
196 |     httpServer.listen(port, () => {
197 |       console.log(`HTTP API server listening on port ${port}`);
198 |     });
199 | 
200 |     // 添加优雅关闭处理
201 |     process.on('SIGINT', async () => {
202 |       console.log('Closing HTTP server...');
203 |       httpServer.close();
204 | 
205 |       // 清理所有会话
206 |       for (const [sessionId, server] of sessions.entries()) {
207 |         console.log(`Closing session: ${sessionId}`);
208 |         await serverList.close(server);
209 |       }
210 |       sessions.clear();
211 | 
212 |       // 清理所有定时器
213 |       for (const timer of sessionTimers.values()) {
214 |         clearTimeout(timer);
215 |       }
216 |       sessionTimers.clear();
217 | 
218 |       process.exit(0);
219 |     });
220 |   }
221 |   ```
222 | 
223 | 4. 在 `program.ts` 的 action 回调中添加对 `--http-port` 的处理：
224 |    ```typescript
225 |    if (options.httpPort) {
226 |      startHttpServer(+options.httpPort, serverList);
227 |    } else if (options.port) {
228 |      startSSEServer(+options.port, serverList);
229 |    } else {
230 |      const server = await serverList.create();
231 |      await server.connect(new StdioServerTransport());
232 |    }
233 |    ```
234 | 
235 | #### 3.2 实现 HTTP 请求处理 (使用 Koa.js)
236 | 
237 | 在 `startHttpServer` 函数中，我们使用 Koa.js 及其路由和中间件来处理请求：
238 | 
239 | 1. **Koa 实例创建**：`const app = new Koa();`
240 | 2. **中间件使用**：
241 |   - `cors()`: 处理跨域请求。
242 |   - `bodyParser()`: 解析 POST 请求的 JSON 或表单数据。
243 |   - **自定义会话中间件**:
244 |     - 从 `ctx.get('session-id')` 获取会话 ID。
245 |     - 使用 `sessions` Map 获取或创建 `Server` 实例。
246 |     - 更新会话超时定时器 (`sessionTimers`)。
247 |     - 将 `server` 实例和 `sessionId` 存储在 `ctx.state` 中，以便后续路由访问。
248 | 3. **路由定义 (`@koa/router`)**：
249 |   - `router.post('/tools/:toolName', ...)`: 处理工具调用请求。
250 |     - 从 `ctx.params` 获取 `toolName`。
251 |     - 从 `ctx.request.body` 获取参数。
252 |     - 从 `ctx.state.server` 获取 `Server` 实例。
253 |     - 调用 `server.callTool(toolName, params)`。
254 |     - 根据结果设置 `ctx.status` 和 `ctx.body`。
255 |   - `router.get('/tools/browser_tab_list', ...)`: 处理特定的 GET 请求。
256 | 4. **启动服务器**：
257 |   - `const httpServer = http.createServer(app.callback());`
258 |   - `httpServer.listen(port, ...)`
259 | 
260 | #### 3.3 错误处理
261 | 
262 | Koa 的错误处理通常通过 `try...catch` 块或专门的错误处理中间件完成。在我们的路由处理函数中：
263 | 
264 | 1. 无效的 URL 路径：返回 404 Not Found
265 | 2. 无效的 JSON 格式：返回 400 Bad Request
266 | 3. 工具执行错误：返回 500 Internal Server Error
267 | 4. 不支持的 HTTP 方法：返回 405 Method Not Allowed
268 | 
269 | 可以进一步增强错误处理：
270 | 
271 | ```typescript
272 | // 在 server.callTool 之前添加工具存在性检查
273 | const tools = await server.listTools();
274 | const toolExists = tools.tools.some(tool => tool.name === toolName);
275 | if (!toolExists) {
276 |   res.statusCode = 404;
277 |   res.setHeader('Content-Type', 'application/json');
278 |   res.end(JSON.stringify({ success: false, error: `Tool "${toolName}" not found` }));
279 |   return;
280 | }
281 | ```
282 | 
283 | #### 3.4 会话管理 (使用 Koa 中间件)
284 | 
285 | 我们使用一个自定义的 Koa 中间件来处理会话：
286 | 
287 | ```typescript
288 | // 在 startHttpServer 函数中
289 | const sessions = new Map<string, Server>();
290 | const sessionTimers = new Map<string, NodeJS.Timeout>();
291 | const sessionTimeout = 30 * 60 * 1000; // 30 分钟
292 | 
293 | app.use(async (ctx, next) => {
294 |  const sessionId = ctx.get('session-id') || 'default';
295 |  let server = sessions.get(sessionId);
296 | 
297 |  if (!server) {
298 |    console.log(`Creating new session: ${sessionId}`);
299 |    server = await serverList.create();
300 |    sessions.set(sessionId, server);
301 |  } else {
302 |    console.log(`Reusing session: ${sessionId}`);
303 |  }
304 | 
305 |  // 更新会话超时
306 |  if (sessionTimers.has(sessionId)) {
307 |    clearTimeout(sessionTimers.get(sessionId)!);
308 |  }
309 |  const timer = setTimeout(async () => {
310 |    console.log(`Session timed out: ${sessionId}`);
311 |    const serverToClose = sessions.get(sessionId);
312 |    if (serverToClose) {
313 |      await serverList.close(serverToClose);
314 |      sessions.delete(sessionId);
315 |    }
316 |    sessionTimers.delete(sessionId);
317 |  }, sessionTimeout);
318 |  sessionTimers.set(sessionId, timer);
319 | 
320 |  ctx.state.server = server; // 传递给后续处理
321 |  ctx.state.sessionId = sessionId;
322 |  await next(); // 调用下一个中间件或路由
323 | });
324 | ```
325 | 
326 | 优雅关闭处理（`process.on('SIGINT', ...)`）保持不变，确保在服务器停止时正确关闭所有浏览器实例和清理资源。
327 | 
328 | ### 4. 依赖项
329 | 
330 | 需要安装以下依赖项：
331 | 
332 | ```bash
333 | pnpm add koa @koa/router koa-bodyparser @koa/cors
334 | # 同时需要安装它们的类型定义（如果使用 TypeScript）
335 | pnpm add -D @types/koa @types/koa__router @types/koa-bodyparser @types/koa__cors
336 | ```
337 | 
338 | ### 5. 文档更新
339 | 
340 | 更新 `README.md`，添加以下内容：
341 | 
342 | 1. 新的命令行参数 `--http-port` 的说明
343 | 2. HTTP API 的使用方法和示例
344 | 3. 会话管理的说明
345 | 4. 与现有 SSE 传输的区别
346 | 
347 | ## 调用示例
348 | 
349 | ### 使用 curl
350 | 
351 | ```bash
352 | # 导航到指定 URL
353 | curl -X POST http://localhost:8080/tools/browser_navigate \
354 |      -H "Content-Type: application/json" \
355 |      -d '{
356 |            "url": "https://example.com"
357 |          }'
358 | 
359 | # 点击元素
360 | curl -X POST http://localhost:8080/tools/browser_click \
361 |      -H "Content-Type: application/json" \
362 |      -d '{
363 |            "element": "Login Button",
364 |            "ref": "button#login"
365 |          }'
366 | 
367 | # 在输入框中输入文本
368 | curl -X POST http://localhost:8080/tools/browser_type \
369 |      -H "Content-Type: application/json" \
370 |      -d '{
371 |            "element": "Username Input",
372 |            "ref": "input#username",
373 |            "text": "myUsername",
374 |            "submit": false
375 |          }'
376 | 
377 | # 获取标签列表
378 | curl -X GET http://localhost:8080/tools/browser_tab_list
379 | ```
380 | 
381 | ### 使用 JavaScript
382 | 
383 | ```javascript
384 | // 导航到指定 URL
385 | fetch('http://localhost:8080/tools/browser_navigate', {
386 |   method: 'POST',
387 |   headers: {
388 |     'Content-Type': 'application/json'
389 |   },
390 |   body: JSON.stringify({
391 |     url: 'https://example.com'
392 |   })
393 | })
394 | .then(response => response.json())
395 | .then(data => console.log(data));
396 | ```
397 | 
398 | ### 使用 Python
399 | 
400 | ```python
401 | import requests
402 | import json
403 | 
404 | # 导航到指定 URL
405 | response = requests.post(
406 |     'http://localhost:8080/tools/browser_navigate',
407 |     headers={'Content-Type': 'application/json'},
408 |     data=json.dumps({'url': 'https://example.com'})
409 | )
410 | print(response.json())
411 | ```
412 | 
413 | ## 技术架构图
414 | 
415 | ```
416 | +------------------+        HTTP Request        +-------------------------+
417 | |                  |  ------------------------>  |                         |
418 | |   HTTP Client    |                            |   Playwright MCP Server  |
419 | |  (curl, script)  |  <------------------------  |   (with HTTP API)       |
420 | |                  |        HTTP Response       |                         |
421 | +------------------+                            +-------------------------+
422 |                                                             |
423 |                                                             | Controls
424 |                                                             v
425 |                                                 +-------------------------+
426 |                                                 |                         |
427 |                                                 |   Browser Instance      |
428 |                                                 |   (Chrome, Firefox)     |
429 |                                                 |                         |
430 |                                                 +-------------------------+
431 | ```
432 | 
433 | ## 技术实现细节 (Koa)
434 | 
435 | ### Koa 中间件流程
436 | 
437 | 1. **CORS 中间件**: 处理跨域请求头。
438 | 2. **Body Parser 中间件**: 解析请求体 (`ctx.request.body`)。
439 | 3. **会话管理中间件**:
440 |   - 获取 `session-id`。
441 |   - 获取或创建 `Server` 实例。
442 |   - 更新会话超时。
443 |   - 将 `server` 和 `sessionId` 存入 `ctx.state`。
444 |   - 调用 `next()`。
445 | 4. **Router 中间件**:
446 |   - 匹配路由 (`/tools/:toolName`)。
447 |   - 执行对应的路由处理函数。
448 |   - 路由处理函数从 `ctx.state` 获取 `server`，从 `ctx.params` 获取 `toolName`，从 `ctx.request.body` 获取参数。
449 |   - 调用 `server.callTool()`。
450 |   - 设置响应 `ctx.status` 和 `ctx.body`。
451 | 
452 | ### Server 类扩展
453 | 
454 | 为了支持 HTTP API，我们需要在 `Server` 类中添加一个便捷方法来调用工具（这部分与原计划相同）：
455 | 
456 | ```typescript
457 | // 在 src/server.ts 中扩展 Server 类
458 | Server.prototype.callTool = async function(name: string, args: any) {
459 |   const result = await this.handleRequest({
460 |     jsonrpc: '2.0',
461 |     id: String(Date.now()),
462 |     method: 'callTool',
463 |     params: {
464 |       name,
465 |       arguments: args
466 |     }
467 |   });
468 |   
469 |   if (result.error) {
470 |     throw new Error(result.error.message);
471 |   }
472 |   
473 |   return result.result;
474 | };
475 | ```
476 | 
477 | ### 工具调用流程
478 | 
479 | 当通过 HTTP API 调用工具时，完整的流程如下：
480 | 
481 | 1. 客户端发送 HTTP 请求到 `/tools/{tool_name}`
482 | 2. HTTP 服务器解析请求，提取工具名称和参数
483 | 3. 服务器根据会话 ID 获取或创建 `Server` 实例
484 | 4. 服务器调用 `server.callTool(toolName, params)`
485 | 5. `callTool` 方法创建一个 MCP 请求并调用 `server.handleRequest`
486 | 6. `handleRequest` 方法将请求分发给相应的请求处理程序（在这里是 `CallToolRequestSchema` 处理程序）
487 | 7. 请求处理程序查找匹配的工具并调用其 `handle` 方法
488 | 8. 工具的 `handle` 方法使用 `Context` 实例执行操作（如导航、点击等）
489 | 9. 结果返回给客户端
490 | 
491 | ### 会话管理详解 (Koa 中间件)
492 | 
493 | 会话管理通过自定义的 Koa 中间件实现：
494 | 
495 | 1. **会话标识**：从请求头 `ctx.get('session-id')` 读取。
496 | 2. **会话存储**：使用 `sessions` Map。
497 | 3. **会话创建/复用**：在中间件中处理。
498 | 4. **会话超时**：使用 `sessionTimers` Map 和 `setTimeout` 实现。每次访问时重置定时器。
499 | 5. **会话传递**：通过 `ctx.state.server` 将 `Server` 实例传递给路由处理函数。
500 | 6. **会话清理**：通过 `process.on('SIGINT', ...)` 处理。
501 | 
502 | ## 后续详细步骤及验证方法
503 | 
504 | ### 阶段一：基础框架搭建与验证
505 | 
506 | 1.  **安装依赖项**
507 |    *   **操作**: 执行 `pnpm add koa @koa/router koa-bodyparser @koa/cors && pnpm add -D @types/koa @types/koa__router @types/koa-bodyparser @types/koa__cors`
508 |    *   **验证**:
509 |        *   检查 `package.json` 的 `dependencies` 和 `devDependencies` 是否包含新添加的包。
510 |        *   检查 `pnpm-lock.yaml` 文件是否已更新。
511 |        *   运行 `pnpm install` 确保没有报错。
512 | 
513 | 2.  **修改 `src/program.ts`**
514 |    *   **操作**:
515 |        *   使用 `commander` 添加 `--http-port <port>` 选项。
516 |        *   导入 `startHttpServer` 函数 (稍后创建)。
517 |        *   在 `program.action` 的逻辑中，添加对 `options.httpPort` 的判断，如果存在则调用 `startHttpServer`。
518 |    *   **验证**:
519 |        *   运行 `node cli.js --help`，检查输出是否包含 `--http-port` 选项。
520 |        *   (暂时无法完全验证，需等待 `startHttpServer` 实现) 尝试运行 `node cli.js --http-port 8080`，预期不应立即报错（可能因 `startHttpServer` 未定义而失败，这是正常的）。
521 | 
522 | 3.  **创建并实现 `src/httpServer.ts` (基础结构)**
523 |    *   **操作**:
524 |        *   创建新文件 `src/httpServer.ts`。
525 |        *   实现 `startHttpServer` 函数的基本框架：
526 |            *   导入 `Koa`, `Router`, `bodyParser`, `cors`, `http`。
527 |            *   创建 Koa 实例 (`app`) 和 Router 实例 (`router`)。
528 |            *   使用 `cors()` 和 `bodyParser()` 中间件。
529 |            *   添加一个临时的根路由 (`router.get('/', ctx => { ctx.body = 'OK'; })`) 用于测试。
530 |            *   应用路由 (`app.use(router.routes()).use(router.allowedMethods())`)。
531 |            *   创建 HTTP 服务器 (`http.createServer(app.callback())`)。
532 |            *   启动服务器监听指定端口，并打印日志。
533 |        *   在 `src/program.ts` 中正确导入 `startHttpServer`。
534 |    *   **验证**:
535 |        *   运行 `node cli.js --http-port 8080`。
536 |        *   检查控制台是否输出 "HTTP API server listening on port 8080"。
537 |        *   使用 `curl http://localhost:8080/`，预期收到 "OK"。
538 |        *   使用 `curl -X OPTIONS http://localhost:8080/` -v，检查响应头是否包含正确的 CORS 头 (e.g., `Access-Control-Allow-Origin: *`)。
539 |        *   使用 `curl -X POST http://localhost:8080/ -H "Content-Type: application/json" -d '{"test":1}'`，预期不会报错（即使路由不存在，body-parser 应该能处理）。
540 | 
541 | ### 阶段二：核心功能实现与验证
542 | 
543 | 4.  **扩展 `Server` 类添加 `callTool` 方法**
544 |    *   **操作**:
545 |        *   打开 `src/server.ts`。
546 |        *   在 `Server` 类或其原型上添加 `async callTool(name: string, args: any)` 方法，实现如 `PLAN.md` 中所示的逻辑（构造 MCP 请求，调用 `handleRequest`，处理结果/错误）。
547 |    *   **验证**:
548 |        *   **单元测试**: 编写针对 `Server.prototype.callTool` 的单元测试。模拟 `handleRequest` 方法，测试 `callTool` 在不同输入（有效工具名/参数，无效工具名，`handleRequest` 返回错误等）下的行为。运行单元测试并确保通过。
549 | 
550 | 5.  **实现会话管理中间件**
551 |    *   **操作**:
552 |        *   在 `src/httpServer.ts` 的 `startHttpServer` 函数中，在 `bodyParser` 之后、路由之前，添加会话管理中间件 (`app.use(async (ctx, next) => { ... })`)。
553 |        *   实现中间件逻辑：获取 `session-id`，管理 `sessions` Map 和 `sessionTimers` Map，创建/复用 `Server` 实例，更新超时，将 `server` 和 `sessionId` 存入 `ctx.state`。
554 |    *   **验证**:
555 |        *   运行 `node cli.js --http-port 8080`。
556 |        *   **首次请求**: 使用 `curl http://localhost:8080/` (或其他已定义的路由)，检查服务器日志是否输出 "Creating new session: default"。
557 |        *   **带 ID 的首次请求**: 使用 `curl -H "Session-Id: test1234" http://localhost:8080/`，检查日志是否输出 "Creating new session: test1234"。
558 |        *   **会话复用**: 再次发送相同 `Session-Id` 的请求 (`curl -H "Session-Id: test1234" http://localhost:8080/`)，检查日志是否输出 "Reusing session: test1234"。
559 |        *   **会话超时**: (需要将 `sessionTimeout` 临时调小，例如 5 秒) 发送一个请求，等待超过超时时间，检查日志是否输出 "Session timed out: ..."。再次发送相同 `Session-Id` 的请求，检查日志是否输出 "Creating new session: ..."。
560 | 
561 | 6.  **实现工具调用路由 (`POST /tools/:toolName`)**
562 |    *   **操作**:
563 |        *   在 `src/httpServer.ts` 中，移除临时根路由，添加 `router.post('/tools/:toolName', async (ctx) => { ... })`。
564 |        *   实现路由处理逻辑：从 `ctx.params`, `ctx.request.body`, `ctx.state` 获取所需信息，调用 `ctx.state.server.callTool()`，处理成功/错误响应。
565 |        *   (可选) 添加工具存在性检查。
566 |    *   **验证**:
567 |        *   运行 `node cli.js --http-port 8080`。
568 |        *   **调用有效工具 (无参数)**: `curl -X POST http://localhost:8080/tools/browser_snapshot`，预期收到 `{"success":true, "result":{...}}` (具体 result 取决于 snapshot 内容)。
569 |        *   **调用有效工具 (带参数)**: `curl -X POST -H "Content-Type: application/json" -d '{"url":"about:blank"}' http://localhost:8080/tools/browser_navigate`，预期收到 `{"success":true, "result":null}` 或类似成功响应。
570 |        *   **调用无效工具**: `curl -X POST http://localhost:8080/tools/invalid_tool_name`，预期收到 `{"success":false, "error":"Tool \"invalid_tool_name\" not found"}` (如果做了检查) 或其他 500 错误。
571 |        *   **调用带无效参数**: `curl -X POST -H "Content-Type: application/json" -d '{"invalid_param":"foo"}' http://localhost:8080/tools/browser_navigate`，预期收到 `{"success":false, "error":"..."}` (具体的错误信息取决于 Playwright 或工具本身的校验)。
572 |        *   **使用会话**:
573 |            *   `curl -H "Session-Id: nav-test" -X POST -H "Content-Type: application/json" -d '{"url":"https://example.com"}' http://localhost:8080/tools/browser_navigate`
574 |            *   `curl -H "Session-Id: nav-test" -X POST http://localhost:8080/tools/browser_snapshot` (检查快照是否为 example.com)
575 | 
576 | 7.  **实现特定路由 (`GET /tools/browser_tab_list`)**
577 |    *   **操作**: 在 `src/httpServer.ts` 中添加 `router.get('/tools/browser_tab_list', async (ctx) => { ... })`。
578 |    *   **验证**:
579 |        *   运行 `node cli.js --http-port 8080`。
580 |        *   `curl http://localhost:8080/tools/browser_tab_list`，预期收到 `{"success":true, "result":{ "tabs": [...] }}`。
581 |        *   (可选) 使用 POST 调用 `browser_tab_new` 创建新标签页，然后再次 GET `browser_tab_list` 验证列表是否更新。
582 | 
583 | 8.  **实现优雅关闭 (`SIGINT` 处理)**
584 |    *   **操作**: 在 `src/httpServer.ts` 的 `startHttpServer` 中添加 `process.on('SIGINT', ...)` 逻辑，确保关闭 HTTP 服务器、清理所有会话和定时器。
585 |    *   **验证**:
586 |        *   运行 `node cli.js --http-port 8080`。
587 |        *   创建几个会话 (使用不同 `Session-Id` 发送请求)。
588 |        *   按 `Ctrl+C` 终止服务器。
589 |        *   检查服务器日志是否输出 "Closing HTTP server..." 以及每个活动会话的 "Closing session: ..." 日志。
590 |        *   检查进程是否正常退出 (退出码 0)。
591 | 
592 | ### 阶段三：测试与文档
593 | 
594 | 9.  **编写集成测试**
595 |    *   **操作**:
596 |        *   在 `tests/` 目录下创建新的测试文件，例如 `tests/httpApi.spec.ts`。
597 |        *   使用测试框架 (如 Playwright Test 自带的) 和 HTTP 请求库 (如 `node-fetch` 或 `axios`) 编写测试用例。
598 |        *   测试用例应覆盖：
599 |            *   启动带 `--http-port` 的服务器。
600 |            *   调用各种工具 (GET 和 POST)。
601 |            *   验证成功和失败的响应。
602 |            *   测试会话管理（使用不同 `Session-Id`）。
603 |            *   测试错误处理 (无效工具、无效参数)。
604 |    *   **验证**: 运行 `pnpm test` (或具体的测试命令)，确保所有 HTTP API 相关测试用例通过。
605 | 
606 | 10. **更新文档 (`README.md`)**
607 |    *   **操作**:
608 |        *   添加关于 `--http-port` 命令行参数的说明。
609 |        *   添加 HTTP API 的使用方法：端点、请求/响应格式、会话管理 (`Session-Id` 头)。
610 |        *   提供 `curl`、JavaScript (`fetch`)、Python (`requests`) 的调用示例。
611 |        *   说明与 SSE 传输的区别。
612 |    *   **验证**: 人工审阅 `README.md`，确保信息准确、清晰、完整，示例可运行。
613 | 
614 | ### 阶段四：发布
615 | 
616 | 11. **准备发布**
617 |    *   **操作**:
618 |        *   确保所有代码已提交，并且所有测试通过。
619 |        *   更新 `package.json` 中的 `version` 字段。
620 |        *   (可选) 更新 `CHANGELOG.md`。
621 |    *   **验证**:
622 |        *   检查 `git status` 是否干净。
623 |        *   确认 `pnpm test` 通过。
624 |        *   检查 `package.json` 中的版本号。
625 | 
626 | 12. **发布到 npm**
627 |    *   **操作**: 运行 `pnpm publish` (可能需要先登录 npm)。
628 |    *   **验证**: 在 npmjs.com 上检查新版本是否已发布成功。尝试使用 `npx playwright-mcp-bypass@<new_version> --http-port 8080` 运行新版本。
629 | ## 当前进度
630 | 
631 | **当前阶段**: 阶段一：基础框架搭建与验证
632 | **当前步骤**: 1. 安装依赖项 (待开始)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## playwright-mcp-bypass
  2 | 
  3 | A Model Context Protocol (MCP) server that provides browser automation capabilities using [Playwright](https://playwright.dev), forked to enhance anti-bot detection bypass. This server enables LLMs to interact with web pages through structured accessibility snapshots.
  4 | 
  5 | ### Key Features
  6 | 
  7 | - **Fast and lightweight**: Uses Playwright's accessibility tree, not pixel-based input.
  8 | - **LLM-friendly**: No vision models needed, operates purely on structured data.
  9 | - **Deterministic tool application**: Avoids ambiguity common with screenshot-based approaches.
 10 | 
 11 | ### Project Information
 12 | 
 13 | This project (`playwright-mcp-bypass`) is maintained by [yan5xu](https://x.com/yan5xu) and is available at [yan5xu/playwright-mcp-bypass](https://github.com/yan5xu/playwright-mcp-bypass).
 14 | 
 15 | It originated as a fork of `microsoft/playwright-mcp` with the primary goal of enhancing the ability to bypass anti-bot detection mechanisms employed by some websites. This is achieved by adding the `--disable-blink-features=AutomationControlled` argument to the browser launch options, making the automated browser appear more like a regular user's browser.
 16 | ### Use Cases
 17 | 
 18 | - Web navigation and form-filling
 19 | - Data extraction from structured content
 20 | - Automated testing driven by LLMs
 21 | - General-purpose browser interaction for agents
 22 | 
 23 | ### Example config
 24 | 
 25 | ```js
 26 | {
 27 |   "mcpServers": {
 28 |     "playwright-mcp-bypass": {
 29 |       "command": "npx",
 30 |       "args": [
 31 |         "playwright-mcp-bypass@latest"
 32 |       ]
 33 |     }
 34 |   }
 35 | }
 36 | ```
 37 | 
 38 | 
 39 | #### Installation in VS Code
 40 | 
 41 | Install the Playwright MCP server in VS Code using one of these buttons:
 42 | 
 43 | <!--
 44 | // Generate using?:
 45 | const config = JSON.stringify({ name: 'playwright-mcp-bypass', command: 'npx', args: ["-y", "playwright-mcp-bypass@latest"] });
 46 | const urlForWebsites = `vscode:mcp/install?${encodeURIComponent(config)}`;
 47 | // Github markdown does not allow linking to `vscode:` directly, so you can use our redirect:
 48 | const urlForGithub = `https://insiders.vscode.dev/redirect?url=${encodeURIComponent(urlForWebsites)}`;
 49 | -->
 50 | 
 51 | [<img src="https://img.shields.io/badge/VS_Code-VS_Code?style=flat-square&label=Install%20Server&color=0098FF" alt="Install in VS Code">](https://insiders.vscode.dev/redirect?url=vscode%3Amcp%2Finstall%3F%257B%2522name%2522%253A%2522playwright-mcp-bypass%2522%252C%2522command%2522%253A%2522npx%2522%252C%2522args%2522%253A%255B%2522-y%2522%252C%2522playwright-mcp-bypass%2540latest%2522%255D%257D)  [<img alt="Install in VS Code Insiders" src="https://img.shields.io/badge/VS_Code_Insiders-VS_Code_Insiders?style=flat-square&label=Install%20Server&color=24bfa5">](https://insiders.vscode.dev/redirect?url=vscode-insiders%3Amcp%2Finstall%3F%257B%2522name%2522%253A%2522playwright-mcp-bypass%2522%252C%2522command%2522%253A%2522npx%2522%252C%2522args%2522%253A%255B%2522-y%2522%252C%2522playwright-mcp-bypass%2540latest%2522%255D%257D)
 52 | 
 53 | Alternatively, you can install the Playwright MCP server using the VS Code CLI:
 54 | 
 55 | ```bash
 56 | # For VS Code
 57 | code --add-mcp '{"name":"playwright-mcp-bypass","command":"npx","args":["playwright-mcp-bypass@latest"]}'
 58 | ```
 59 | 
 60 | ```bash
 61 | # For VS Code Insiders
 62 | code-insiders --add-mcp '{"name":"playwright-mcp-bypass","command":"npx","args":["playwright-mcp-bypass@latest"]}'
 63 | ```
 64 | 
 65 | After installation, the Playwright MCP server will be available for use with your GitHub Copilot agent in VS Code.
 66 | 
 67 | ### CLI Options
 68 | 
 69 | The Playwright MCP server supports the following command-line options:
 70 | 
 71 | - `--browser <browser>`: Browser or chrome channel to use. Possible values:
 72 |   - `chrome`, `firefox`, `webkit`, `msedge`
 73 |   - Chrome channels: `chrome-beta`, `chrome-canary`, `chrome-dev`
 74 |   - Edge channels: `msedge-beta`, `msedge-canary`, `msedge-dev`
 75 |   - Default: `chrome`
 76 | - `--caps <caps>`: Comma-separated list of capabilities to enable, possible values: tabs, pdf, history, wait, files, install. Default is all.
 77 | - `--cdp-endpoint <endpoint>`: CDP endpoint to connect to
 78 | - `--executable-path <path>`: Path to the browser executable
 79 | - `--headless`: Run browser in headless mode (headed by default)
 80 | - `--port <port>`: Port to listen on for SSE transport
 81 | - `--http-port <port>`: Port to listen on for the request/response HTTP API.
 82 | - `--user-data-dir <path>`: Path to the user data directory
 83 | - `--vision`: Run server that uses screenshots (Aria snapshots are used by default)
 84 | 
 85 | ### User data directory
 86 | 
 87 | Playwright MCP will launch the browser with the new profile, located at
 88 | 
 89 | ```
 90 | - `%USERPROFILE%\AppData\Local\ms-playwright\mcp-chrome-profile` on Windows
 91 | - `~/Library/Caches/ms-playwright/mcp-chrome-profile` on macOS
 92 | - `~/.cache/ms-playwright/mcp-chrome-profile` on Linux
 93 | ```
 94 | 
 95 | All the logged in information will be stored in that profile, you can delete it between sessions if you'd like to clear the offline state.
 96 | 
 97 | 
 98 | ### Running headless browser (Browser without GUI).
 99 | 
100 | This mode is useful for background or batch operations.
101 | 
102 | ```js
103 | {
104 |   "mcpServers": {
105 |     "playwright-mcp-bypass": {
106 |       "command": "npx",
107 |       "args": [
108 |         "playwright-mcp-bypass@latest",
109 |         "--headless"
110 |       ]
111 |     }
112 |   }
113 | }
114 | ```
115 | 
116 | ### Running headed browser on Linux w/o DISPLAY
117 | 
118 | When running headed browser on system w/o display or from worker processes of the IDEs,
119 | run the MCP server from environment with the DISPLAY and pass the `--port` flag to enable SSE transport.
120 | 
121 | ```bash
122 | npx playwright-mcp-bypass@latest --port 8931
123 | ```
124 | 
125 | And then in MCP client config, set the `url` to the SSE endpoint:
126 | 
127 | ```js
128 | {
129 |   "mcpServers": {
130 |     "playwright-mcp-bypass": {
131 |       "url": "http://localhost:8931/sse"
132 |     }
133 |   }
134 | }
135 | ### HTTP API Usage (Request/Response)
136 | 
137 | In addition to the default Stdio transport and the SSE transport (`--port`), this server provides a standard HTTP API for request/response interactions. This is useful for clients that prefer simple HTTP calls over persistent connections.
138 | 
139 | #### Enabling the HTTP API
140 | 
141 | To enable the HTTP API, use the `--http-port` command-line option:
142 | 
143 | ```bash
144 | npx playwright-mcp-bypass@latest --http-port 8080
145 | ```
146 | 
147 | The server will then listen on the specified port (e.g., 8080) for incoming HTTP requests.
148 | 
149 | #### Endpoints
150 | 
151 | - **Base Path**: `/tools/{tool_name}`
152 | - **Method**:
153 |     - `POST`: Used for executing most tools. Tool parameters are sent in the JSON request body.
154 |     - `GET`: Can be used for specific read-only tools like `browser_tab_list`. No request body is needed.
155 | - **Examples**:
156 |     - `POST /tools/browser_navigate`
157 |     - `POST /tools/browser_click`
158 |     - `GET /tools/browser_tab_list`
159 | 
160 | #### Request Format (POST)
161 | 
162 | - **Headers**:
163 |     - `Content-Type: application/json`
164 |     - `Session-Id: <your_session_id>` (Optional, see Session Management)
165 | - **Body**: A JSON object containing the parameters required by the specific tool.
166 | 
167 | Example (`browser_navigate`):
168 | ```json
169 | {
170 |   "url": "https://example.com"
171 | }
172 | ```
173 | 
174 | Example (`browser_click`):
175 | ```json
176 | {
177 |   "element": "Login Button",
178 |   "ref": "button#login"
179 | }
180 | ```
181 | 
182 | #### Response Format
183 | 
184 | - **Content-Type**: `application/json`
185 | - **Success (HTTP 200)**:
186 |   ```json
187 |   {
188 |     "success": true,
189 |     "result": { ... } // The result returned by the tool execution
190 |   }
191 |   ```
192 | - **Error (HTTP 4xx/5xx)**:
193 |   ```json
194 |   {
195 |     "success": false,
196 |     "error": "Error message describing the failure"
197 |   }
198 |   ```
199 | 
200 | #### Session Management
201 | 
202 | The HTTP API manages browser state using sessions. Each session corresponds to an independent browser instance with its own context (unless a global `--user-data-dir` is specified).
203 | 
204 | - **Session ID**: Sessions are identified by the `Session-Id` HTTP header in the request.
205 | - **Default Session**: If the `Session-Id` header is not provided, a default session named `"default"` is used.
206 | - **Session Creation**: A new browser instance is automatically created when a request with a previously unseen `Session-Id` (or no ID for the default session) is received.
207 | - **Session Reuse**: Subsequent requests with the same `Session-Id` will reuse the existing browser instance for that session.
208 | - **Session Timeout**: Sessions automatically time out and close after 30 minutes of inactivity to conserve resources. Any request to an active session resets the timer.
209 | - **User Data Directory**: By default, each session gets its own isolated user data directory (e.g., `~/.cache/ms-playwright/mcp-chromium-profile-<session_id>`). If you specify `--user-data-dir` when starting the server, *all* HTTP sessions will share that single directory, which can lead to conflicts and is generally not recommended for concurrent sessions.
210 | 
211 | #### Comparison with SSE Transport (`--port`)
212 | 
213 | - **SSE (`--port`)**: Establishes a persistent connection per client. State (browser instance) is tied to the connection lifetime. Communication is typically streaming (server sends events).
214 | - **HTTP API (`--http-port`)**: Uses standard request/response cycles. State is managed via the `Session-Id` header and has a timeout. Simpler for clients that don't need persistent connections.
215 | 
216 | #### Examples
217 | 
218 | ##### curl
219 | 
220 | ```bash
221 | # Navigate (uses default session if Session-Id header is omitted)
222 | curl -X POST http://localhost:8080/tools/browser_navigate \
223 |      -H "Content-Type: application/json" \
224 |      -d '{ "url": "https://example.com" }'
225 | 
226 | # Click an element in a specific session
227 | curl -X POST http://localhost:8080/tools/browser_click \
228 |      -H "Content-Type: application/json" \
229 |      -H "Session-Id: my-session-123" \
230 |      -d '{ "element": "Login Button", "ref": "button#login" }'
231 | 
232 | # Get tab list (GET request, uses default session)
233 | curl http://localhost:8080/tools/browser_tab_list
234 | 
235 | # Get tab list for a specific session
236 | curl -H "Session-Id: my-session-123" http://localhost:8080/tools/browser_tab_list
237 | ```
238 | 
239 | ##### JavaScript (fetch)
240 | 
241 | ```javascript
242 | // Navigate in default session
243 | fetch('http://localhost:8080/tools/browser_navigate', {
244 |   method: 'POST',
245 |   headers: {
246 |     'Content-Type': 'application/json'
247 |   },
248 |   body: JSON.stringify({ url: 'https://example.com' })
249 | })
250 | .then(response => response.json())
251 | .then(data => console.log(data));
252 | 
253 | // Type text in a specific session
254 | fetch('http://localhost:8080/tools/browser_type', {
255 |   method: 'POST',
256 |   headers: {
257 |     'Content-Type': 'application/json',
258 |     'Session-Id': 'user-abc-session'
259 |   },
260 |   body: JSON.stringify({
261 |     element: "Search Input",
262 |     ref: "input[name='q']",
263 |     text: "Playwright MCP"
264 |   })
265 | })
266 | .then(response => response.json())
267 | .then(data => console.log(data));
268 | 
269 | // Get tab list for a specific session
270 | fetch('http://localhost:8080/tools/browser_tab_list', {
271 |   headers: {
272 |     'Session-Id': 'user-abc-session'
273 |   }
274 | })
275 | .then(response => response.json())
276 | .then(data => console.log(data));
277 | ```
278 | 
279 | ##### Python (requests)
280 | 
281 | ```python
282 | import requests
283 | import json
284 | 
285 | base_url = 'http://localhost:8080/tools'
286 | session_id = 'python-session-456'
287 | 
288 | # Navigate in a specific session
289 | headers = {
290 |     'Content-Type': 'application/json',
291 |     'Session-Id': session_id
292 | }
293 | payload = {'url': 'https://github.com'}
294 | response = requests.post(f'{base_url}/browser_navigate', headers=headers, data=json.dumps(payload))
295 | print(response.json())
296 | 
297 | # Get tab list for the same session
298 | headers_no_content = {'Session-Id': session_id}
299 | response = requests.get(f'{base_url}/browser_tab_list', headers=headers_no_content)
300 | print(response.json())
301 | ```
302 | ```
303 | 
304 | ### Tool Modes
305 | 
306 | The tools are available in two modes:
307 | 
308 | 1. **Snapshot Mode** (default): Uses accessibility snapshots for better performance and reliability
309 | 2. **Vision Mode**: Uses screenshots for visual-based interactions
310 | 
311 | To use Vision Mode, add the `--vision` flag when starting the server:
312 | 
313 | ```js
314 | {
315 |   "mcpServers": {
316 |     "playwright-mcp-bypass": {
317 |       "command": "npx",
318 |       "args": [
319 |         "playwright-mcp-bypass@latest",
320 |         "--vision"
321 |       ]
322 |     }
323 |   }
324 | }
325 | ```
326 | 
327 | Vision Mode works best with the computer use models that are able to interact with elements using
328 | X Y coordinate space, based on the provided screenshot.
329 | 
330 | ### Programmatic usage with custom transports
331 | 
332 | ```js
333 | import { createServer } from '@playwright/mcp';
334 | 
335 | // ...
336 | 
337 | const server = createServer({
338 |   launchOptions: { headless: true }
339 | });
340 | transport = new SSEServerTransport("/messages", res);
341 | server.connect(transport);
342 | ```
343 | 
344 | ### Snapshot-based Interactions
345 | 
346 | - **browser_click**
347 |   - Description: Perform click on a web page
348 |   - Parameters:
349 |     - `element` (string): Human-readable element description used to obtain permission to interact with the element
350 |     - `ref` (string): Exact target element reference from the page snapshot
351 | 
352 | - **browser_hover**
353 |   - Description: Hover over element on page
354 |   - Parameters:
355 |     - `element` (string): Human-readable element description used to obtain permission to interact with the element
356 |     - `ref` (string): Exact target element reference from the page snapshot
357 | 
358 | - **browser_drag**
359 |   - Description: Perform drag and drop between two elements
360 |   - Parameters:
361 |     - `startElement` (string): Human-readable source element description used to obtain permission to interact with the element
362 |     - `startRef` (string): Exact source element reference from the page snapshot
363 |     - `endElement` (string): Human-readable target element description used to obtain permission to interact with the element
364 |     - `endRef` (string): Exact target element reference from the page snapshot
365 | 
366 | - **browser_type**
367 |   - Description: Type text into editable element
368 |   - Parameters:
369 |     - `element` (string): Human-readable element description used to obtain permission to interact with the element
370 |     - `ref` (string): Exact target element reference from the page snapshot
371 |     - `text` (string): Text to type into the element
372 |     - `submit` (boolean, optional): Whether to submit entered text (press Enter after)
373 |     - `slowly` (boolean, optional): Whether to type one character at a time. Useful for triggering key handlers in the page. By default entire text is filled in at once.
374 | 
375 | - **browser_select_option**
376 |   - Description: Select an option in a dropdown
377 |   - Parameters:
378 |     - `element` (string): Human-readable element description used to obtain permission to interact with the element
379 |     - `ref` (string): Exact target element reference from the page snapshot
380 |     - `values` (array): Array of values to select in the dropdown. This can be a single value or multiple values.
381 | 
382 | - **browser_snapshot**
383 |   - Description: Capture accessibility snapshot of the current page, this is better than screenshot
384 |   - Parameters: None
385 | 
386 | - **browser_take_screenshot**
387 |   - Description: Take a screenshot of the current page. You can't perform actions based on the screenshot, use browser_snapshot for actions.
388 |   - Parameters:
389 |     - `raw` (boolean, optional): Whether to return without compression (in PNG format). Default is false, which returns a JPEG image.
390 | 
391 | ### Vision-based Interactions
392 | 
393 | - **browser_screen_move_mouse**
394 |   - Description: Move mouse to a given position
395 |   - Parameters:
396 |     - `element` (string): Human-readable element description used to obtain permission to interact with the element
397 |     - `x` (number): X coordinate
398 |     - `y` (number): Y coordinate
399 | 
400 | - **browser_screen_capture**
401 |   - Description: Take a screenshot of the current page
402 |   - Parameters: None
403 | 
404 | - **browser_screen_click**
405 |   - Description: Click left mouse button
406 |   - Parameters:
407 |     - `element` (string): Human-readable element description used to obtain permission to interact with the element
408 |     - `x` (number): X coordinate
409 |     - `y` (number): Y coordinate
410 | 
411 | - **browser_screen_drag**
412 |   - Description: Drag left mouse button
413 |   - Parameters:
414 |     - `element` (string): Human-readable element description used to obtain permission to interact with the element
415 |     - `startX` (number): Start X coordinate
416 |     - `startY` (number): Start Y coordinate
417 |     - `endX` (number): End X coordinate
418 |     - `endY` (number): End Y coordinate
419 | 
420 | - **browser_screen_type**
421 |   - Description: Type text
422 |   - Parameters:
423 |     - `text` (string): Text to type
424 |     - `submit` (boolean, optional): Whether to submit entered text (press Enter after)
425 | 
426 | - **browser_press_key**
427 |   - Description: Press a key on the keyboard
428 |   - Parameters:
429 |     - `key` (string): Name of the key to press or a character to generate, such as `ArrowLeft` or `a`
430 | 
431 | ### Tab Management
432 | 
433 | - **browser_tab_list**
434 |   - Description: List browser tabs
435 |   - Parameters: None
436 | 
437 | - **browser_tab_new**
438 |   - Description: Open a new tab
439 |   - Parameters:
440 |     - `url` (string, optional): The URL to navigate to in the new tab. If not provided, the new tab will be blank.
441 | 
442 | - **browser_tab_select**
443 |   - Description: Select a tab by index
444 |   - Parameters:
445 |     - `index` (number): The index of the tab to select
446 | 
447 | - **browser_tab_close**
448 |   - Description: Close a tab
449 |   - Parameters:
450 |     - `index` (number, optional): The index of the tab to close. Closes current tab if not provided.
451 | 
452 | ### Navigation
453 | 
454 | - **browser_navigate**
455 |   - Description: Navigate to a URL
456 |   - Parameters:
457 |     - `url` (string): The URL to navigate to
458 | 
459 | - **browser_navigate_back**
460 |   - Description: Go back to the previous page
461 |   - Parameters: None
462 | 
463 | - **browser_navigate_forward**
464 |   - Description: Go forward to the next page
465 |   - Parameters: None
466 | 
467 | ### Keyboard
468 | 
469 | - **browser_press_key**
470 |   - Description: Press a key on the keyboard
471 |   - Parameters:
472 |     - `key` (string): Name of the key to press or a character to generate, such as `ArrowLeft` or `a`
473 | 
474 | ### Files and Media
475 | 
476 | - **browser_file_upload**
477 |   - Description: Choose one or multiple files to upload
478 |   - Parameters:
479 |     - `paths` (array): The absolute paths to the files to upload. Can be a single file or multiple files.
480 | 
481 | - **browser_pdf_save**
482 |   - Description: Save page as PDF
483 |   - Parameters: None
484 | 
485 | ### Utilities
486 | 
487 | - **browser_wait**
488 |   - Description: Wait for a specified time in seconds
489 |   - Parameters:
490 |     - `time` (number): The time to wait in seconds (capped at 10 seconds)
491 | 
492 | - **browser_close**
493 |   - Description: Close the page
494 |   - Parameters: None
495 | 
496 | - **browser_install**
497 |   - Description: Install the browser specified in the config. Call this if you get an error about the browser not being installed.
498 |   - Parameters: None
499 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.9 BLOCK -->
 2 | 
 3 | ## Security
 4 | 
 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin).
 6 | 
 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below.
 8 | 
 9 | ## Reporting Security Issues
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 | 
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report).
14 | 
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp).
16 | 
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 
18 | 
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 | 
21 |   * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 |   * Full paths of source file(s) related to the manifestation of the issue
23 |   * The location of the affected source code (tag/branch/commit or direct URL)
24 |   * Any special configuration required to reproduce the issue
25 |   * Step-by-step instructions to reproduce the issue
26 |   * Proof-of-concept or exploit code (if possible)
27 |   * Impact of the issue, including how an attacker might exploit the issue
28 | 
29 | This information will help us triage your report more quickly.
30 | 
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs.
32 | 
33 | ## Preferred Languages
34 | 
35 | We prefer all communications to be in English.
36 | 
37 | ## Policy
38 | 
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd).
40 | 
41 | <!-- END MICROSOFT SECURITY.MD BLOCK -->
42 | 


--------------------------------------------------------------------------------
/cli.js:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | /**
 3 |  * Copyright (c) Microsoft Corporation.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  * http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | require('./lib/program');
19 | 


--------------------------------------------------------------------------------
/eslint.config.mjs:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (c) Microsoft Corporation.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  * http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | import typescriptEslint from "@typescript-eslint/eslint-plugin";
 18 | import tsParser from "@typescript-eslint/parser";
 19 | import notice from "eslint-plugin-notice";
 20 | import path from "path";
 21 | import { fileURLToPath } from "url";
 22 | import stylistic from "@stylistic/eslint-plugin";
 23 | import importRules from "eslint-plugin-import";
 24 | 
 25 | const __filename = fileURLToPath(import.meta.url);
 26 | const __dirname = path.dirname(__filename);
 27 | 
 28 | const plugins = {
 29 |   "@stylistic": stylistic,
 30 |   "@typescript-eslint": typescriptEslint,
 31 |   notice,
 32 |   import: importRules,
 33 | };
 34 | 
 35 | export const baseRules = {
 36 |   "@typescript-eslint/no-unused-vars": [
 37 |     2,
 38 |     { args: "none", caughtErrors: "none" },
 39 |   ],
 40 | 
 41 |   /**
 42 |    * Enforced rules
 43 |    */
 44 |   // syntax preferences
 45 |   "object-curly-spacing": ["error", "always"],
 46 |   quotes: [
 47 |     2,
 48 |     "single",
 49 |     {
 50 |       avoidEscape: true,
 51 |       allowTemplateLiterals: true,
 52 |     },
 53 |   ],
 54 |   "jsx-quotes": [2, "prefer-single"],
 55 |   "no-extra-semi": 2,
 56 |   "@stylistic/semi": [2],
 57 |   "comma-style": [2, "last"],
 58 |   "wrap-iife": [2, "inside"],
 59 |   "spaced-comment": [
 60 |     2,
 61 |     "always",
 62 |     {
 63 |       markers: ["*"],
 64 |     },
 65 |   ],
 66 |   eqeqeq: [2],
 67 |   "accessor-pairs": [
 68 |     2,
 69 |     {
 70 |       getWithoutSet: false,
 71 |       setWithoutGet: false,
 72 |     },
 73 |   ],
 74 |   "brace-style": [2, "1tbs", { allowSingleLine: true }],
 75 |   curly: [2, "multi-or-nest", "consistent"],
 76 |   "new-parens": 2,
 77 |   "arrow-parens": [2, "as-needed"],
 78 |   "prefer-const": 2,
 79 |   "quote-props": [2, "consistent"],
 80 |   "nonblock-statement-body-position": [2, "below"],
 81 | 
 82 |   // anti-patterns
 83 |   "no-var": 2,
 84 |   "no-with": 2,
 85 |   "no-multi-str": 2,
 86 |   "no-caller": 2,
 87 |   "no-implied-eval": 2,
 88 |   "no-labels": 2,
 89 |   "no-new-object": 2,
 90 |   "no-octal-escape": 2,
 91 |   "no-self-compare": 2,
 92 |   "no-shadow-restricted-names": 2,
 93 |   "no-cond-assign": 2,
 94 |   "no-debugger": 2,
 95 |   "no-dupe-keys": 2,
 96 |   "no-duplicate-case": 2,
 97 |   "no-empty-character-class": 2,
 98 |   "no-unreachable": 2,
 99 |   "no-unsafe-negation": 2,
100 |   radix: 2,
101 |   "valid-typeof": 2,
102 |   "no-implicit-globals": [2],
103 |   "no-unused-expressions": [
104 |     2,
105 |     { allowShortCircuit: true, allowTernary: true, allowTaggedTemplates: true },
106 |   ],
107 |   "no-proto": 2,
108 | 
109 |   // es2015 features
110 |   "require-yield": 2,
111 |   "template-curly-spacing": [2, "never"],
112 | 
113 |   // spacing details
114 |   "space-infix-ops": 2,
115 |   "space-in-parens": [2, "never"],
116 |   "array-bracket-spacing": [2, "never"],
117 |   "comma-spacing": [2, { before: false, after: true }],
118 |   "keyword-spacing": [2, "always"],
119 |   "space-before-function-paren": [
120 |     2,
121 |     {
122 |       anonymous: "never",
123 |       named: "never",
124 |       asyncArrow: "always",
125 |     },
126 |   ],
127 |   "no-whitespace-before-property": 2,
128 |   "keyword-spacing": [
129 |     2,
130 |     {
131 |       overrides: {
132 |         if: { after: true },
133 |         else: { after: true },
134 |         for: { after: true },
135 |         while: { after: true },
136 |         do: { after: true },
137 |         switch: { after: true },
138 |         return: { after: true },
139 |       },
140 |     },
141 |   ],
142 |   "arrow-spacing": [
143 |     2,
144 |     {
145 |       after: true,
146 |       before: true,
147 |     },
148 |   ],
149 |   "@stylistic/func-call-spacing": 2,
150 |   "@stylistic/type-annotation-spacing": 2,
151 | 
152 |   // file whitespace
153 |   "no-multiple-empty-lines": [2, { max: 2, maxEOF: 0 }],
154 |   "no-mixed-spaces-and-tabs": 2,
155 |   "no-trailing-spaces": 2,
156 |   "linebreak-style": [process.platform === "win32" ? 0 : 2, "unix"],
157 |   indent: [
158 |     2,
159 |     2,
160 |     { SwitchCase: 1, CallExpression: { arguments: 2 }, MemberExpression: 2 },
161 |   ],
162 |   "key-spacing": [
163 |     2,
164 |     {
165 |       beforeColon: false,
166 |     },
167 |   ],
168 |   "eol-last": 2,
169 | 
170 |   // copyright
171 |   "notice/notice": [
172 |     2,
173 |     {
174 |       mustMatch: "Copyright",
175 |       templateFile: path.join(__dirname, "utils", "copyright.js"),
176 |     },
177 |   ],
178 | 
179 |   // react
180 |   "react/react-in-jsx-scope": 0,
181 | };
182 | 
183 | const languageOptions = {
184 |   parser: tsParser,
185 |   ecmaVersion: 9,
186 |   sourceType: "module",
187 | };
188 | 
189 | export default [
190 |   {
191 |     ignores: ["**/*.js"],
192 |   },
193 |   {
194 |     files: ["**/*.ts", "**/*.tsx"],
195 |     plugins,
196 |     languageOptions,
197 |     rules: baseRules,
198 |   },
199 | ];
200 | 


--------------------------------------------------------------------------------
/index.d.ts:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | /**
 3 |  * Copyright (c) Microsoft Corporation.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  * http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | import type { LaunchOptions } from 'playwright';
19 | import type { Server } from '@modelcontextprotocol/sdk/server/index.js';
20 | 
21 | type ToolCapability = 'core' | 'tabs' | 'pdf' | 'history' | 'wait' | 'files' | 'install';
22 | 
23 | type Options = {
24 |   /**
25 |    * Path to the user data directory.
26 |    */
27 |   userDataDir?: string;
28 | 
29 |   /**
30 |    * Launch options for the browser.
31 |    */
32 |   launchOptions?: LaunchOptions;
33 | 
34 |   /**
35 |    * Use screenshots instead of snapshots. Less accurate, reliable and overall
36 |    * slower, but contains visual representation of the page.
37 |    * @default false
38 |    */
39 |   vision?: boolean;
40 | 
41 |   /**
42 |    * Capabilities to enable.
43 |    */
44 |   capabilities?: ToolCapability[];
45 | };
46 | 
47 | export function createServer(options?: Options): Server;
48 | 


--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | /**
 3 |  * Copyright (c) Microsoft Corporation.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  * http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | const { createServer } = require('./lib/index');
19 | module.exports = { createServer };
20 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "playwright-mcp-bypass",
 3 |   "version": "0.0.10",
 4 |   "description": "Playwright Tools for MCP",
 5 |   "repository": {
 6 |     "type": "git",
 7 |     "url": "git+https://github.com/yan5xu/playwright-mcp-bypass.git"
 8 |   },
 9 |   "homepage": "https://github.com/yan5xu/playwright-mcp-bypass",
10 |   "engines": {
11 |     "node": ">=18"
12 |   },
13 |   "author": {
14 |     "name": "yan5xu"
15 |   },
16 |   "license": "Apache-2.0",
17 |   "scripts": {
18 |     "build": "tsc",
19 |     "lint": "eslint .",
20 |     "watch": "tsc --watch",
21 |     "test": "playwright test",
22 |     "clean": "rm -rf lib",
23 |     "npm-publish": "npm run clean && npm run build && npm run test && npm publish"
24 |   },
25 |   "exports": {
26 |     "./package.json": "./package.json",
27 |     ".": {
28 |       "types": "./index.d.ts",
29 |       "default": "./index.js"
30 |     }
31 |   },
32 |   "dependencies": {
33 |     "@koa/cors": "^5.0.0",
34 |     "@koa/router": "^13.1.0",
35 |     "@modelcontextprotocol/sdk": "^1.6.1",
36 |     "@mozilla/readability": "^0.6.0",
37 |     "commander": "^13.1.0",
38 |     "jsdom": "^26.1.0",
39 |     "koa": "^2.16.1",
40 |     "koa-bodyparser": "^4.4.1",
41 |     "playwright": "^1.52.0-alpha-1743163434000",
42 |     "turndown": "^7.2.0",
43 |     "yaml": "^2.7.1",
44 |     "zod": "^3.24.2",
45 |     "zod-to-json-schema": "^3.24.4"
46 |   },
47 |   "devDependencies": {
48 |     "@eslint/eslintrc": "^3.2.0",
49 |     "@eslint/js": "^9.19.0",
50 |     "@playwright/test": "^1.52.0-alpha-1743163434000",
51 |     "@stylistic/eslint-plugin": "^3.0.1",
52 |     "@types/jsdom": "^21.1.7",
53 |     "@types/koa": "^2.15.0",
54 |     "@types/koa-bodyparser": "^4.3.12",
55 |     "@types/koa__cors": "^5.0.0",
56 |     "@types/koa__router": "^12.0.4",
57 |     "@types/node": "^22.13.10",
58 |     "@types/turndown": "^5.0.5",
59 |     "@typescript-eslint/eslint-plugin": "^8.26.1",
60 |     "@typescript-eslint/parser": "^8.26.1",
61 |     "@typescript-eslint/utils": "^8.26.1",
62 |     "eslint": "^9.19.0",
63 |     "eslint-plugin-import": "^2.31.0",
64 |     "eslint-plugin-notice": "^1.0.0",
65 |     "get-port": "^7.1.0",
66 |     "typescript": "^5.8.2"
67 |   },
68 |   "bin": {
69 |     "mcp-server-playwright": "cli.js"
70 |   }
71 | }
72 | 


--------------------------------------------------------------------------------
/playwright.config.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (c) Microsoft Corporation.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | import { defineConfig } from '@playwright/test';
18 | 
19 | export default defineConfig({
20 |   testDir: './tests',
21 |   fullyParallel: true,
22 |   forbidOnly: !!process.env.CI,
23 |   retries: process.env.CI ? 2 : 0,
24 |   workers: process.env.CI ? 1 : undefined,
25 |   reporter: 'list',
26 |   projects: [{ name: 'default' }],
27 | });
28 | 


--------------------------------------------------------------------------------
/src/context.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (c) Microsoft Corporation.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  * http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | import * as playwright from 'playwright';
 18 | import yaml from 'yaml';
 19 | 
 20 | import { waitForCompletion } from './tools/utils';
 21 | import { ToolResult } from './tools/tool';
 22 | 
 23 | export type ContextOptions = {
 24 |   browserName?: 'chromium' | 'firefox' | 'webkit';
 25 |   userDataDir: string;
 26 |   launchOptions?: playwright.LaunchOptions;
 27 |   cdpEndpoint?: string;
 28 |   remoteEndpoint?: string;
 29 | };
 30 | 
 31 | type PageOrFrameLocator = playwright.Page | playwright.FrameLocator;
 32 | 
 33 | type RunOptions = {
 34 |   captureSnapshot?: boolean;
 35 |   waitForCompletion?: boolean;
 36 |   status?: string;
 37 |   noClearFileChooser?: boolean;
 38 | };
 39 | 
 40 | export class Context {
 41 |   readonly options: ContextOptions;
 42 |   private _browser: playwright.Browser | undefined;
 43 |   private _browserContext: playwright.BrowserContext | undefined;
 44 |   private _tabs: Tab[] = [];
 45 |   private _currentTab: Tab | undefined;
 46 | 
 47 |   constructor(options: ContextOptions) {
 48 |     this.options = options;
 49 |   }
 50 | 
 51 |   tabs(): Tab[] {
 52 |     return this._tabs;
 53 |   }
 54 | 
 55 |   currentTab(): Tab {
 56 |     if (!this._currentTab)
 57 |       throw new Error('Navigate to a location to create a tab');
 58 |     return this._currentTab;
 59 |   }
 60 | 
 61 |   async newTab(): Promise<Tab> {
 62 |     const browserContext = await this._ensureBrowserContext();
 63 |     const page = await browserContext.newPage();
 64 |     this._currentTab = this._tabs.find(t => t.page === page)!;
 65 |     return this._currentTab;
 66 |   }
 67 | 
 68 |   async selectTab(index: number) {
 69 |     this._currentTab = this._tabs[index - 1];
 70 |     await this._currentTab.page.bringToFront();
 71 |   }
 72 | 
 73 |   async ensureTab(): Promise<Tab> {
 74 |     const context = await this._ensureBrowserContext();
 75 |     if (!this._currentTab)
 76 |       await context.newPage();
 77 |     return this._currentTab!;
 78 |   }
 79 | 
 80 |   async listTabs(): Promise<string> {
 81 |     if (!this._tabs.length)
 82 |       return 'No tabs open';
 83 |     const lines: string[] = ['Open tabs:'];
 84 |     for (let i = 0; i < this._tabs.length; i++) {
 85 |       const tab = this._tabs[i];
 86 |       const title = await tab.page.title();
 87 |       const url = tab.page.url();
 88 |       const current = tab === this._currentTab ? ' (current)' : '';
 89 |       lines.push(`- ${i + 1}:${current} [${title}] (${url})`);
 90 |     }
 91 |     return lines.join('\n');
 92 |   }
 93 | 
 94 |   async closeTab(index: number | undefined) {
 95 |     const tab = index === undefined ? this.currentTab() : this._tabs[index - 1];
 96 |     await tab.page.close();
 97 |     return await this.listTabs();
 98 |   }
 99 | 
100 |   private _onPageCreated(page: playwright.Page) {
101 |     const tab = new Tab(this, page, tab => this._onPageClosed(tab));
102 |     this._tabs.push(tab);
103 |     if (!this._currentTab)
104 |       this._currentTab = tab;
105 |   }
106 | 
107 |   private _onPageClosed(tab: Tab) {
108 |     const index = this._tabs.indexOf(tab);
109 |     if (index === -1)
110 |       return;
111 |     this._tabs.splice(index, 1);
112 | 
113 |     if (this._currentTab === tab)
114 |       this._currentTab = this._tabs[Math.min(index, this._tabs.length - 1)];
115 |     const browser = this._browser;
116 |     if (this._browserContext && !this._tabs.length) {
117 |       void this._browserContext.close().then(() => browser?.close()).catch(() => {});
118 |       this._browser = undefined;
119 |       this._browserContext = undefined;
120 |     }
121 |   }
122 | 
123 |   async close() {
124 |     if (!this._browserContext)
125 |       return;
126 |     await this._browserContext.close();
127 |   }
128 | 
129 |   private async _ensureBrowserContext() {
130 |     if (!this._browserContext) {
131 |       const context = await this._createBrowserContext();
132 |       this._browser = context.browser;
133 |       this._browserContext = context.browserContext;
134 |       for (const page of this._browserContext.pages())
135 |         this._onPageCreated(page);
136 |       this._browserContext.on('page', page => this._onPageCreated(page));
137 | 
138 |       // Ensure there is at least one tab after initialization
139 |       if (this._tabs.length === 0) {
140 |         console.log('[Context] No initial tabs found, creating a new one.');
141 |         await this._browserContext.newPage(); // This will trigger _onPageCreated
142 |       }
143 |     }
144 |     return this._browserContext;
145 |   }
146 | 
147 |   private async _createBrowserContext(): Promise<{ browser?: playwright.Browser, browserContext: playwright.BrowserContext }> {
148 |     if (this.options.remoteEndpoint) {
149 |       const url = new URL(this.options.remoteEndpoint);
150 |       if (this.options.browserName)
151 |         url.searchParams.set('browser', this.options.browserName);
152 |       if (this.options.launchOptions)
153 |         url.searchParams.set('launch-options', JSON.stringify(this.options.launchOptions));
154 |       const browser = await playwright[this.options.browserName ?? 'chromium'].connect(String(url));
155 |       const browserContext = await browser.newContext();
156 |       return { browser, browserContext };
157 |     }
158 | 
159 |     // If a CDP endpoint is provided in the options, connect using it.
160 |     if (this.options.cdpEndpoint) {
161 |       // Use Playwright's function to connect to an existing browser via CDP.
162 |       const browser = await playwright.chromium.connectOverCDP(this.options.cdpEndpoint);
163 |       // Assume the first context is the one we want to use.
164 |       const browserContext = browser.contexts()[0];
165 |       return { browser, browserContext };
166 |     }
167 | 
168 |     // Otherwise, launch a new persistent browser context.
169 |     const browserContext = await this._launchPersistentContext();
170 |     return { browserContext };
171 |   }
172 | 
173 |   private async _launchPersistentContext(): Promise<playwright.BrowserContext> {
174 |     try {
175 |       const browserType = this.options.browserName ? playwright[this.options.browserName] : playwright.chromium;
176 |       const launchOptions = {
177 |         ...(this.options.launchOptions ?? {}),
178 |         args: Array.from(new Set([
179 |           ...(this.options.launchOptions?.args ?? []),
180 |           '--disable-blink-features=AutomationControlled'
181 |         ]))
182 |       };
183 |       return await browserType.launchPersistentContext(this.options.userDataDir, launchOptions);
184 |     } catch (error: any) {
185 |       if (error.message.includes('Executable doesn\'t exist'))
186 |         throw new Error(`Browser specified in your config is not installed. Either install it (likely) or change the config.`);
187 |       throw error;
188 |     }
189 |   }
190 | }
191 | 
192 | class Tab {
193 |   readonly context: Context;
194 |   readonly page: playwright.Page;
195 |   private _console: playwright.ConsoleMessage[] = [];
196 |   private _fileChooser: playwright.FileChooser | undefined;
197 |   private _snapshot: PageSnapshot | undefined;
198 |   private _onPageClose: (tab: Tab) => void;
199 | 
200 |   constructor(context: Context, page: playwright.Page, onPageClose: (tab: Tab) => void) {
201 |     this.context = context;
202 |     this.page = page;
203 |     this._onPageClose = onPageClose;
204 |     page.on('console', event => this._console.push(event));
205 |     page.on('framenavigated', frame => {
206 |       if (!frame.parentFrame())
207 |         this._console.length = 0;
208 |     });
209 |     page.on('close', () => this._onClose());
210 |     page.on('filechooser', chooser => this._fileChooser = chooser);
211 |     page.setDefaultNavigationTimeout(60000);
212 |     page.setDefaultTimeout(5000);
213 |   }
214 | 
215 |   private _onClose() {
216 |     this._fileChooser = undefined;
217 |     this._console.length = 0;
218 |     this._onPageClose(this);
219 |   }
220 | 
221 |   async navigate(url: string) {
222 |     await this.page.goto(url, { waitUntil: 'domcontentloaded' });
223 |     // Cap load event to 5 seconds, the page is operational at this point.
224 |     await this.page.waitForLoadState('load', { timeout: 5000 }).catch(() => {});
225 |   }
226 | 
227 |   async run(callback: (tab: Tab) => Promise<void>, options?: RunOptions): Promise<ToolResult> {
228 |     try {
229 |       if (!options?.noClearFileChooser)
230 |         this._fileChooser = undefined;
231 |       if (options?.waitForCompletion)
232 |         await waitForCompletion(this.page, () => callback(this));
233 |       else
234 |         await callback(this);
235 |     } finally {
236 |       if (options?.captureSnapshot)
237 |         this._snapshot = await PageSnapshot.create(this.page);
238 |     }
239 |     const tabList = this.context.tabs().length > 1 ? await this.context.listTabs() + '\n\nCurrent tab:' + '\n' : '';
240 |     const snapshot = this._snapshot?.text({ status: options?.status, hasFileChooser: !!this._fileChooser }) ?? options?.status ?? '';
241 |     return {
242 |       content: [{
243 |         type: 'text',
244 |         text: tabList + snapshot,
245 |       }],
246 |     };
247 |   }
248 | 
249 |   async runAndWait(callback: (tab: Tab) => Promise<void>, options?: RunOptions): Promise<ToolResult> {
250 |     return await this.run(callback, {
251 |       waitForCompletion: true,
252 |       ...options,
253 |     });
254 |   }
255 | 
256 |   async runAndWaitWithSnapshot(callback: (tab: Tab) => Promise<void>, options?: RunOptions): Promise<ToolResult> {
257 |     return await this.run(callback, {
258 |       captureSnapshot: true,
259 |       waitForCompletion: true,
260 |       ...options,
261 |     });
262 |   }
263 | 
264 |   lastSnapshot(): PageSnapshot {
265 |     if (!this._snapshot)
266 |       throw new Error('No snapshot available');
267 |     return this._snapshot;
268 |   }
269 | 
270 |   async console(): Promise<playwright.ConsoleMessage[]> {
271 |     return this._console;
272 |   }
273 | 
274 |   async submitFileChooser(paths: string[]) {
275 |     if (!this._fileChooser)
276 |       throw new Error('No file chooser visible');
277 |     await this._fileChooser.setFiles(paths);
278 |     this._fileChooser = undefined;
279 |   }
280 | }
281 | 
282 | class PageSnapshot {
283 |   private _frameLocators: PageOrFrameLocator[] = [];
284 |   private _text!: string;
285 | 
286 |   constructor() {
287 |   }
288 | 
289 |   static async create(page: playwright.Page): Promise<PageSnapshot> {
290 |     const snapshot = new PageSnapshot();
291 |     await snapshot._build(page);
292 |     return snapshot;
293 |   }
294 | 
295 |   text(options?: { status?: string, hasFileChooser?: boolean }): string {
296 |     const results: string[] = [];
297 |     if (options?.status) {
298 |       results.push(options.status);
299 |       results.push('');
300 |     }
301 |     if (options?.hasFileChooser) {
302 |       results.push('- There is a file chooser visible that requires browser_file_upload to be called');
303 |       results.push('');
304 |     }
305 |     results.push(this._text);
306 |     return results.join('\n');
307 |   }
308 | 
309 |   private async _build(page: playwright.Page) {
310 |     const yamlDocument = await this._snapshotFrame(page);
311 |     const lines = [];
312 |     lines.push(
313 |         `- Page URL: ${page.url()}`,
314 |         `- Page Title: ${await page.title()}`
315 |     );
316 |     lines.push(
317 |         `- Page Snapshot`,
318 |         '```yaml',
319 |         yamlDocument.toString().trim(),
320 |         '```',
321 |         ''
322 |     );
323 |     this._text = lines.join('\n');
324 |   }
325 | 
326 |   private async _snapshotFrame(frame: playwright.Page | playwright.FrameLocator) {
327 |     const frameIndex = this._frameLocators.push(frame) - 1;
328 |     // This line captures the accessibility tree of the frame's body as a YAML string.
329 |     // The structure (like 'navigation', 'search', 'link', 'button') comes from standard ARIA roles
330 |     // derived from the HTML structure of the page.
331 |     const snapshotString = await frame.locator('body').ariaSnapshot({ ref: true });
332 |     const snapshot = yaml.parseDocument(snapshotString);
333 | 
334 |     const visit = async (node: any): Promise<unknown> => {
335 |       if (yaml.isPair(node)) {
336 |         await Promise.all([
337 |           visit(node.key).then(k => node.key = k),
338 |           visit(node.value).then(v => node.value = v)
339 |         ]);
340 |       } else if (yaml.isSeq(node) || yaml.isMap(node)) {
341 |         node.items = await Promise.all(node.items.map(visit));
342 |       } else if (yaml.isScalar(node)) {
343 |         if (typeof node.value === 'string') {
344 |           const value = node.value;
345 |           if (frameIndex > 0)
346 |             node.value = value.replace('[ref=', `[ref=f${frameIndex}`);
347 |           if (value.startsWith('iframe ')) {
348 |             const ref = value.match(/\[ref=(.*)\]/)?.[1];
349 |             if (ref) {
350 |               try {
351 |                 const childSnapshot = await this._snapshotFrame(frame.frameLocator(`aria-ref=${ref}`));
352 |                 return snapshot.createPair(node.value, childSnapshot);
353 |               } catch (error) {
354 |                 return snapshot.createPair(node.value, '<could not take iframe snapshot>');
355 |               }
356 |             }
357 |           }
358 |         }
359 |       }
360 | 
361 |       return node;
362 |     };
363 |     await visit(snapshot.contents);
364 |     return snapshot;
365 |   }
366 | 
367 |   refLocator(ref: string): playwright.Locator {
368 |     let frame = this._frameLocators[0];
369 |     const match = ref.match(/^f(\d+)(.*)/);
370 |     if (match) {
371 |       const frameIndex = parseInt(match[1], 10);
372 |       frame = this._frameLocators[frameIndex];
373 |       ref = match[2];
374 |     }
375 | 
376 |     if (!frame)
377 |       throw new Error(`Frame does not exist. Provide ref from the most current snapshot.`);
378 | 
379 |     return frame.locator(`aria-ref=${ref}`);
380 |   }
381 | }
382 | 


--------------------------------------------------------------------------------
/src/httpServer.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (c) Microsoft Corporation.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  * http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | import Koa from 'koa';
 18 | import Router from '@koa/router';
 19 | import bodyParser from 'koa-bodyparser';
 20 | import cors from '@koa/cors';
 21 | import http from 'http';
 22 | import type { ServerList } from './server';
 23 | import { Server } from '@modelcontextprotocol/sdk/server/index.js';
 24 | import { ToolSchema } from '@modelcontextprotocol/sdk/types.js'; // Import ToolSchema type
 25 | 
 26 | export async function startHttpServer(port: number, serverList: ServerList) {
 27 |   // Session management
 28 |   const sessions = new Map<string, Server>();
 29 |   const sessionTimers = new Map<string, NodeJS.Timeout>(); // For session timeout
 30 |   const sessionTimeout = 30 * 60 * 1000; // 30 minutes (in milliseconds)
 31 | 
 32 |   const app = new Koa();
 33 |   const router = new Router();
 34 | 
 35 |   // Middleware
 36 |   app.use(cors({
 37 |     allowHeaders: ['Content-Type', 'Session-Id'], // Allow Session-Id header
 38 |     exposeHeaders: [], // Adjust as needed
 39 |   }));
 40 |   app.use(bodyParser());
 41 | 
 42 |   // Session handling middleware
 43 |   app.use(async (ctx, next) => {
 44 |     const sessionId = ctx.get('session-id') || 'default'; // Get session ID from header or use 'default'
 45 |     let server = sessions.get(sessionId);
 46 | 
 47 |     if (!server) {
 48 |       console.log(`[HTTP Server] Creating new session: ${sessionId}`);
 49 |       // Pass the sessionId to create() so the factory can generate a unique user data dir
 50 |       server = await serverList.create(sessionId);
 51 |       sessions.set(sessionId, server);
 52 |     } else {
 53 |       console.log(`[HTTP Server] Reusing session: ${sessionId}`);
 54 |     }
 55 | 
 56 |     // Reset session timeout on activity
 57 |     if (sessionTimers.has(sessionId)) {
 58 |       clearTimeout(sessionTimers.get(sessionId)!);
 59 |     }
 60 |     const timer = setTimeout(async () => {
 61 |       console.log(`[HTTP Server] Session timed out: ${sessionId}`);
 62 |       const serverToClose = sessions.get(sessionId);
 63 |       if (serverToClose) {
 64 |         await serverList.close(serverToClose); // Use serverList to close
 65 |         sessions.delete(sessionId);
 66 |       }
 67 |       sessionTimers.delete(sessionId);
 68 |     }, sessionTimeout);
 69 |     sessionTimers.set(sessionId, timer);
 70 | 
 71 |     // Pass the server instance and sessionId to the route handlers via context state
 72 |     ctx.state.server = server;
 73 |     ctx.state.sessionId = sessionId;
 74 | 
 75 |     await next(); // Proceed to the next middleware (router)
 76 |   });
 77 | 
 78 |   // Tool calling route
 79 |   router.post('/tools/:toolName', async (ctx) => {
 80 |     const { toolName } = ctx.params;
 81 |     const params = ctx.request.body || {}; // Get params from request body
 82 |     const server: Server = ctx.state.server; // Get Server instance from session middleware
 83 |     const sessionId: string = ctx.state.sessionId; // Get sessionId for logging
 84 | 
 85 |     try {
 86 |       // Optional: Check if the tool exists before calling
 87 |       // Note: server.listTools() might not be available directly on the SDK's Server type.
 88 |       // If needed, we might need to adjust how tools are accessed or skip this check.
 89 |       // For now, we'll rely on callTool to handle non-existent tools.
 90 | 
 91 |       console.log(`[HTTP Server] Calling tool "${toolName}" for session ${sessionId} with params:`, params);
 92 |       // Use the callTool method we added to the Server prototype
 93 |       const result = await (server as any).callTool(toolName, params);
 94 | 
 95 |       ctx.status = 200; // OK
 96 |       ctx.body = { success: true, result }; // Return success and result
 97 |     } catch (error: any) {
 98 |       console.error(`[HTTP Server] Error calling tool "${toolName}" for session ${sessionId}:`, error);
 99 |       ctx.status = 500; // Internal Server Error (or potentially 400/404 depending on error type)
100 |       // Respond with error details
101 |       ctx.body = { success: false, error: String(error.message || error) };
102 |     }
103 |   });
104 | 
105 |   // Specific route for GET /tools/browser_tab_list
106 |   router.get('/tools/browser_tab_list', async (ctx) => {
107 |     const server: Server = ctx.state.server;
108 |     const sessionId: string = ctx.state.sessionId;
109 |     const toolName = 'browser_tab_list';
110 | 
111 |     try {
112 |       console.log(`[HTTP Server] Calling tool "${toolName}" for session ${sessionId}`);
113 |       const result = await (server as any).callTool(toolName, {}); // No parameters needed
114 |       ctx.status = 200;
115 |       ctx.body = { success: true, result };
116 |     } catch (error: any) {
117 |       console.error(`[HTTP Server] Error calling tool "${toolName}" for session ${sessionId}:`, error);
118 |       ctx.status = 500;
119 |       ctx.body = { success: false, error: String(error.message || error) };
120 |     }
121 |   });
122 | 
123 |   // Route to generate OpenAPI specification
124 |   router.get('/openapi.json', async (ctx) => {
125 |     try {
126 |       // Get a server instance (use default session, create if needed)
127 |       // We need to ensure a server instance exists to list tools.
128 |       // The session middleware already handles this, so ctx.state.server should be valid.
129 |       const server: Server = ctx.state.server;
130 |       const serverUrl = `http://localhost:${port}`; // Assuming localhost for spec
131 | 
132 |       // Get the registered tools from the server instance property we added
133 |       const registeredTools = (server as any)._registeredTools || [];
134 |       const toolSchemas = registeredTools.map((tool: any) => tool.schema);
135 | 
136 |       // Basic OpenAPI structure (without components)
137 |       const openApiSpec: any = {
138 |         openapi: '3.1.0',
139 |         info: {
140 |           title: 'Playwright MCP HTTP API',
141 |           version: require('../package.json').version,
142 |           description: 'HTTP API for interacting with the Playwright MCP server.',
143 |         },
144 |         servers: [
145 |           { url: serverUrl, description: 'Local development server' }
146 |         ],
147 |         paths: {}, // Paths will be populated below
148 |       };
149 | 
150 |       // Add paths for each tool
151 |       for (const tool of toolSchemas) {
152 |         const path = `/tools/${tool.name}`;
153 |         const isGetOperation = tool.name === 'browser_tab_list'; // Special case for GET
154 |         const method = isGetOperation ? 'get' : 'post';
155 | 
156 |         openApiSpec.paths[path] = {
157 |           [method]: {
158 |             tags: ['Tools'],
159 |             summary: tool.description || `Execute ${tool.name}`,
160 |             operationId: tool.name,
161 |             parameters: [
162 |               // Inline Session-Id parameter definition
163 |               {
164 |                 name: 'Session-Id',
165 |                 in: 'header',
166 |                 required: false,
167 |                 description: 'Optional session identifier. If not provided, uses the "default" session.',
168 |                 schema: { type: 'string' }
169 |               }
170 |             ],
171 |             responses: {
172 |               '200': {
173 |                 description: 'Successful operation',
174 |                 content: {
175 |                   'application/json': {
176 |                     // Inline SuccessResponse schema definition
177 |                     schema: {
178 |                       type: 'object',
179 |                       properties: {
180 |                         success: { type: 'boolean', example: true },
181 |                         result: { type: 'object', description: 'Result from the tool execution' }
182 |                       }
183 |                     }
184 |                   }
185 |                 }
186 |               },
187 |               '500': {
188 |                 description: 'Internal server error or tool execution error',
189 |                 content: {
190 |                   'application/json': {
191 |                     // Inline ErrorResponse schema definition
192 |                     schema: {
193 |                       type: 'object',
194 |                       properties: {
195 |                         success: { type: 'boolean', example: false },
196 |                         error: { type: 'string', description: 'Error message' }
197 |                       }
198 |                     }
199 |                   }
200 |                 }
201 |               }
202 |             }
203 |           }
204 |         };
205 | 
206 |         // Add requestBody for POST operations only if inputSchema exists and has properties or is required
207 |         if (!isGetOperation && tool.inputSchema) {
208 |           const inputSchema = tool.inputSchema as any; // Cast to any to access properties easily
209 |           const hasProperties = inputSchema.properties && Object.keys(inputSchema.properties).length > 0;
210 |           const isRequired = inputSchema.required && inputSchema.required.length > 0;
211 | 
212 |           if (hasProperties || isRequired) {
213 |             // Clone the schema and remove the $schema property
214 |             const schemaForRequestBody = { ...inputSchema };
215 |             delete schemaForRequestBody.$schema; // Remove $schema
216 | 
217 |             openApiSpec.paths[path][method].requestBody = {
218 |               description: 'Parameters for the tool',
219 |               // Set required based on whether the schema has any required properties
220 |               required: isRequired,
221 |               content: {
222 |                 'application/json': {
223 |                   schema: schemaForRequestBody // Use the cleaned schema
224 |                 }
225 |               }
226 |             };
227 |           }
228 |           // If no properties and not required, omit requestBody entirely
229 |         }
230 |       }
231 | 
232 |       ctx.status = 200;
233 |       ctx.type = 'application/json';
234 |       ctx.body = openApiSpec;
235 | 
236 |     } catch (error: any) {
237 |       console.error('[HTTP Server] Error generating OpenAPI spec:', error);
238 |       ctx.status = 500;
239 |       ctx.body = { success: false, error: `Failed to generate OpenAPI spec: ${error.message}` };
240 |     }
241 |   });
242 | 
243 | 
244 |   // Apply routes
245 |   app.use(router.routes()).use(router.allowedMethods());
246 | 
247 |   // Create and start the HTTP server
248 |   const httpServer = http.createServer(app.callback());
249 | 
250 |   httpServer.listen(port, () => {
251 |     console.log(`[HTTP Server] HTTP API server listening on port ${port}`);
252 |   });
253 | 
254 |   // Graceful shutdown handling
255 |   const gracefulShutdown = async () => {
256 |     console.log('[HTTP Server] Closing HTTP server...');
257 |     httpServer.close(async (err) => {
258 |       if (err) {
259 |         console.error('[HTTP Server] Error closing HTTP server:', err);
260 |       } else {
261 |         console.log('[HTTP Server] HTTP server closed.');
262 |       }
263 | 
264 |       // Clean up all sessions
265 |       console.log('[HTTP Server] Closing all browser sessions...');
266 |       const closingPromises: Promise<void>[] = [];
267 |       for (const [sessionId, server] of sessions.entries()) {
268 |         console.log(`[HTTP Server] Closing session: ${sessionId}`);
269 |         closingPromises.push(serverList.close(server)); // Use serverList to close
270 |         // Clear associated timer
271 |         if (sessionTimers.has(sessionId)) {
272 |           clearTimeout(sessionTimers.get(sessionId)!);
273 |           sessionTimers.delete(sessionId);
274 |         }
275 |       }
276 |       sessions.clear(); // Clear the sessions map
277 | 
278 |       try {
279 |         await Promise.all(closingPromises);
280 |         console.log('[HTTP Server] All sessions closed.');
281 |       } catch (closeError) {
282 |         console.error('[HTTP Server] Error closing sessions:', closeError);
283 |       } finally {
284 |         process.exit(err ? 1 : 0); // Exit with appropriate code
285 |       }
286 |     });
287 | 
288 |     // Force close after a timeout if graceful shutdown fails
289 |     setTimeout(() => {
290 |       console.error('[HTTP Server] Graceful shutdown timed out. Forcing exit.');
291 |       process.exit(1);
292 |     }, 15000); // 15 seconds timeout
293 |   };
294 | 
295 |   // Listen for termination signals
296 |   process.on('SIGINT', gracefulShutdown);
297 |   process.on('SIGTERM', gracefulShutdown);
298 | }


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (c) Microsoft Corporation.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | import { createServerWithTools } from './server';
18 | import common from './tools/common';
19 | import files from './tools/files';
20 | import install from './tools/install';
21 | import keyboard from './tools/keyboard';
22 | import navigate from './tools/navigate';
23 | import pdf from './tools/pdf';
24 | import snapshot from './tools/snapshot';
25 | import tabs from './tools/tabs';
26 | import screen from './tools/screen';
27 | import extractContent from './tools/extractContent'; // Import the new tool
28 | import { console as consoleResource } from './resources/console';
29 | 
30 | import type { Tool, ToolCapability } from './tools/tool';
31 | import type { Resource } from './resources/resource';
32 | import type { Server } from '@modelcontextprotocol/sdk/server/index.js';
33 | import type { LaunchOptions } from 'playwright';
34 | 
35 | const snapshotTools: Tool[] = [
36 |   ...common,
37 |   ...files(true),
38 |   ...install,
39 |   ...keyboard(true),
40 |   ...navigate(true),
41 |   ...pdf,
42 |   ...snapshot,
43 |   ...tabs(true),
44 |   ...extractContent(true), // Add the new tool to snapshot tools
45 | ];
46 | 
47 | const screenshotTools: Tool[] = [
48 |   ...common,
49 |   ...files(false),
50 |   ...install,
51 |   ...keyboard(false),
52 |   ...navigate(false),
53 |   ...pdf,
54 |   ...screen,
55 |   ...tabs(false),
56 |   ...extractContent(false), // Add the new tool to screenshot tools
57 | ];
58 | 
59 | const resources: Resource[] = [
60 |   consoleResource,
61 | ];
62 | 
63 | type Options = {
64 |   browserName?: 'chromium' | 'firefox' | 'webkit';
65 |   userDataDir?: string;
66 |   launchOptions?: LaunchOptions;
67 |   cdpEndpoint?: string;
68 |   vision?: boolean;
69 |   capabilities?: ToolCapability[];
70 | };
71 | 
72 | const packageJSON = require('../package.json');
73 | 
74 | export function createServer(options?: Options): Server {
75 |   const allTools = options?.vision ? screenshotTools : snapshotTools;
76 |   const tools = allTools.filter(tool => !options?.capabilities || tool.capability === 'core' || options.capabilities.includes(tool.capability));
77 |   return createServerWithTools({
78 |     name: 'Playwright',
79 |     version: packageJSON.version,
80 |     tools,
81 |     resources,
82 |     browserName: options?.browserName,
83 |     userDataDir: options?.userDataDir ?? '',
84 |     launchOptions: options?.launchOptions,
85 |     cdpEndpoint: options?.cdpEndpoint,
86 |   });
87 | }
88 | 


--------------------------------------------------------------------------------
/src/program.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (c) Microsoft Corporation.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  * http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | import http from 'http';
 18 | import fs from 'fs';
 19 | import os from 'os';
 20 | import path from 'path';
 21 | 
 22 | import { program } from 'commander';
 23 | import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
 24 | import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js';
 25 | 
 26 | 
 27 | import { createServer } from './index';
 28 | import { ServerList } from './server';
 29 | import { startHttpServer } from './httpServer'; // Import the new function
 30 | 
 31 | import type { LaunchOptions } from 'playwright';
 32 | import assert from 'assert';
 33 | import { ToolCapability } from './tools/tool';
 34 | 
 35 | const packageJSON = require('../package.json');
 36 | 
 37 | program
 38 |     .version('Version ' + packageJSON.version)
 39 |     .name(packageJSON.name)
 40 |     .option('--browser <browser>', 'Browser or chrome channel to use, possible values: chrome, firefox, webkit, msedge.')
 41 |     .option('--caps <caps>', 'Comma-separated list of capabilities to enable, possible values: tabs, pdf, history, wait, files, install. Default is all.')
 42 |     .option('--cdp-endpoint <endpoint>', 'CDP endpoint to connect to.')
 43 |     .option('--executable-path <path>', 'Path to the browser executable.')
 44 |     .option('--headless', 'Run browser in headless mode, headed by default')
 45 |     .option('--port <port>', 'Port to listen on for SSE transport.')
 46 |     .option('--user-data-dir <path>', 'Path to the user data directory')
 47 |     .option('--vision', 'Run server that uses screenshots (Aria snapshots are used by default)')
 48 |     .option('--http-port <port>', 'Port to listen on for HTTP API.') // Add the new option
 49 |     .action(async options => {
 50 |       let browserName: 'chromium' | 'firefox' | 'webkit';
 51 |       let channel: string | undefined;
 52 |       switch (options.browser) {
 53 |         case 'chrome':
 54 |         case 'chrome-beta':
 55 |         case 'chrome-canary':
 56 |         case 'chrome-dev':
 57 |         case 'msedge':
 58 |         case 'msedge-beta':
 59 |         case 'msedge-canary':
 60 |         case 'msedge-dev':
 61 |           browserName = 'chromium';
 62 |           channel = options.browser;
 63 |           break;
 64 |         case 'chromium':
 65 |           browserName = 'chromium';
 66 |           break;
 67 |         case 'firefox':
 68 |           browserName = 'firefox';
 69 |           break;
 70 |         case 'webkit':
 71 |           browserName = 'webkit';
 72 |           break;
 73 |         default:
 74 |           browserName = 'chromium';
 75 |           channel = 'chrome';
 76 |       }
 77 | 
 78 |       const launchOptions: LaunchOptions = {
 79 |         headless: !!options.headless,
 80 |         channel,
 81 |         executablePath: options.executablePath,
 82 |       };
 83 | 
 84 |       // Define the server factory function that now accepts an optional sessionId
 85 |       const serverFactory = async (sessionId?: string) => {
 86 |         // Determine the user data directory:
 87 |         // 1. Use the one provided via CLI if available.
 88 |         // 2. Otherwise, create a session-specific one.
 89 |         // Note: If a CLI path is provided, all sessions will share it, potentially causing conflicts.
 90 |         const effectiveUserDataDir = options.userDataDir ?? await createUserDataDir(browserName, sessionId);
 91 | 
 92 |         return createServer({
 93 |           browserName,
 94 |           userDataDir: effectiveUserDataDir, // Use the determined directory
 95 |           launchOptions,
 96 |         vision: !!options.vision,
 97 |           cdpEndpoint: options.cdpEndpoint,
 98 |           capabilities: options.caps?.split(',').map((c: string) => c.trim() as ToolCapability),
 99 |         });
100 |       };
101 | 
102 |       // Pass the factory function to ServerList
103 |       const serverList = new ServerList(serverFactory);
104 |       setupExitWatchdog(serverList);
105 | 
106 |       if (options.httpPort) { // Check for httpPort first
107 |         startHttpServer(+options.httpPort, serverList);
108 |       } else if (options.port) { // Then check for port (SSE)
109 |         startSSEServer(+options.port, serverList);
110 |       } else { // Default to Stdio (doesn't support multiple sessions, uses default profile)
111 |         const server = await serverList.create(); // Create without sessionId for stdio
112 |         await server.connect(new StdioServerTransport());
113 |       }
114 |     });
115 | 
116 | function setupExitWatchdog(serverList: ServerList) {
117 |   const handleExit = async () => {
118 |     setTimeout(() => process.exit(0), 15000);
119 |     await serverList.closeAll();
120 |     process.exit(0);
121 |   };
122 | 
123 |   process.stdin.on('close', handleExit);
124 |   process.on('SIGINT', handleExit);
125 |   process.on('SIGTERM', handleExit);
126 | }
127 | 
128 | program.parse(process.argv);
129 | 
130 | // Modified to accept an optional sessionId
131 | async function createUserDataDir(browserName: 'chromium' | 'firefox' | 'webkit', sessionId?: string) {
132 |   let cacheDirectory: string;
133 |   if (process.platform === 'linux')
134 |     cacheDirectory = process.env.XDG_CACHE_HOME || path.join(os.homedir(), '.cache');
135 |   else if (process.platform === 'darwin')
136 |     cacheDirectory = path.join(os.homedir(), 'Library', 'Caches');
137 |   else if (process.platform === 'win32')
138 |     cacheDirectory = process.env.LOCALAPPDATA || path.join(os.homedir(), 'AppData', 'Local');
139 |   else
140 |     throw new Error('Unsupported platform: ' + process.platform);
141 | 
142 |   // Append sessionId if provided and not empty/default, otherwise use the default profile name
143 |   const profileSuffix = sessionId && sessionId !== 'default' ? `-${sessionId}` : '';
144 |   const profileDirName = `mcp-${browserName}-profile${profileSuffix}`;
145 | 
146 |   const result = path.join(cacheDirectory, 'ms-playwright', profileDirName);
147 |   await fs.promises.mkdir(result, { recursive: true });
148 |   return result;
149 | }
150 | 
151 | async function startSSEServer(port: number, serverList: ServerList) {
152 |   const sessions = new Map<string, SSEServerTransport>();
153 |   const httpServer = http.createServer(async (req, res) => {
154 |     if (req.method === 'POST') {
155 |       const searchParams = new URL(`http://localhost${req.url}`).searchParams;
156 |       const sessionId = searchParams.get('sessionId');
157 |       if (!sessionId) {
158 |         res.statusCode = 400;
159 |         res.end('Missing sessionId');
160 |         return;
161 |       }
162 |       const transport = sessions.get(sessionId);
163 |       if (!transport) {
164 |         res.statusCode = 404;
165 |         res.end('Session not found');
166 |         return;
167 |       }
168 | 
169 |       await transport.handlePostMessage(req, res);
170 |       return;
171 |     } else if (req.method === 'GET') {
172 |       const transport = new SSEServerTransport('/sse', res);
173 |       const sessionId = transport.sessionId; // Get sessionId from transport
174 |       sessions.set(sessionId, transport);
175 |       // Pass sessionId when creating server for SSE
176 |       const server = await serverList.create(sessionId);
177 |       res.on('close', () => {
178 |         sessions.delete(sessionId);
179 |         serverList.close(server).catch(e => console.error(e));
180 |       });
181 |       await server.connect(transport);
182 |       return;
183 |     } else {
184 |       res.statusCode = 405;
185 |       res.end('Method not allowed');
186 |     }
187 |   });
188 | 
189 |   httpServer.listen(port, () => {
190 |     const address = httpServer.address();
191 |     assert(address, 'Could not bind server socket');
192 |     let url: string;
193 |     if (typeof address === 'string') {
194 |       url = address;
195 |     } else {
196 |       const resolvedPort = address.port;
197 |       let resolvedHost = address.family === 'IPv4' ? address.address : `[${address.address}]`;
198 |       if (resolvedHost === '0.0.0.0' || resolvedHost === '[::]')
199 |         resolvedHost = 'localhost';
200 |       url = `http://${resolvedHost}:${resolvedPort}`;
201 |     }
202 |     console.log(`Listening on ${url}`);
203 |     console.log('Put this in your client config:');
204 |     console.log(JSON.stringify({
205 |       'mcpServers': {
206 |         'playwright': {
207 |           'url': `${url}/sse`
208 |         }
209 |       }
210 |     }, undefined, 2));
211 |   });
212 | }
213 | 


--------------------------------------------------------------------------------
/src/resources/console.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (c) Microsoft Corporation.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | import type { Resource } from './resource';
18 | 
19 | export const console: Resource = {
20 |   schema: {
21 |     uri: 'browser://console',
22 |     name: 'Page console',
23 |     mimeType: 'text/plain',
24 |   },
25 | 
26 |   read: async (context, uri) => {
27 |     const messages = await context.currentTab().console();
28 |     const log = messages.map(message => `[${message.type().toUpperCase()}] ${message.text()}`).join('\n');
29 |     return [{
30 |       uri,
31 |       mimeType: 'text/plain',
32 |       text: log
33 |     }];
34 |   },
35 | };
36 | 


--------------------------------------------------------------------------------
/src/resources/resource.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (c) Microsoft Corporation.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | import type { Context } from '../context';
18 | 
19 | export type ResourceSchema = {
20 |   uri: string;
21 |   name: string;
22 |   description?: string;
23 |   mimeType?: string;
24 | };
25 | 
26 | export type ResourceResult = {
27 |   uri: string;
28 |   mimeType?: string;
29 |   text?: string;
30 |   blob?: string;
31 | };
32 | 
33 | export type Resource = {
34 |   schema: ResourceSchema;
35 |   read: (context: Context, uri: string) => Promise<ResourceResult[]>;
36 | };
37 | 


--------------------------------------------------------------------------------
/src/server.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (c) Microsoft Corporation.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  * http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | import { Server } from '@modelcontextprotocol/sdk/server/index.js';
 18 | import { CallToolRequestSchema, ListResourcesRequestSchema, ListToolsRequestSchema, ReadResourceRequestSchema } from '@modelcontextprotocol/sdk/types.js';
 19 | 
 20 | import { Context } from './context';
 21 | 
 22 | import type { Tool } from './tools/tool';
 23 | import type { Resource } from './resources/resource';
 24 | import type { ContextOptions } from './context';
 25 | 
 26 | type Options = ContextOptions & {
 27 |   name: string;
 28 |   version: string;
 29 |   tools: Tool[];
 30 |   resources: Resource[],
 31 | };
 32 | 
 33 | export function createServerWithTools(options: Options): Server {
 34 |   const { name, version, tools, resources } = options;
 35 |   const context = new Context(options);
 36 |   const server = new Server({ name, version }, {
 37 |     capabilities: {
 38 |       tools: {},
 39 |       resources: {},
 40 |     }
 41 |   });
 42 | 
 43 |   server.setRequestHandler(ListToolsRequestSchema, async () => {
 44 |     return { tools: tools.map(tool => tool.schema) };
 45 |   });
 46 | 
 47 |   server.setRequestHandler(ListResourcesRequestSchema, async () => {
 48 |     return { resources: resources.map(resource => resource.schema) };
 49 |   });
 50 | 
 51 |   server.setRequestHandler(CallToolRequestSchema, async request => {
 52 |     const tool = tools.find(tool => tool.schema.name === request.params.name);
 53 |     if (!tool) {
 54 |       return {
 55 |         content: [{ type: 'text', text: `Tool "${request.params.name}" not found` }],
 56 |         isError: true,
 57 |       };
 58 |     }
 59 | 
 60 |     try {
 61 |       const result = await tool.handle(context, request.params.arguments);
 62 |       return result;
 63 |     } catch (error) {
 64 |       return {
 65 |         content: [{ type: 'text', text: String(error) }],
 66 |         isError: true,
 67 |       };
 68 |     }
 69 |   });
 70 | 
 71 |   server.setRequestHandler(ReadResourceRequestSchema, async request => {
 72 |     const resource = resources.find(resource => resource.schema.uri === request.params.uri);
 73 |     if (!resource)
 74 |       return { contents: [] };
 75 | 
 76 |     const contents = await resource.read(context, request.params.uri);
 77 |     return { contents };
 78 |   });
 79 | 
 80 |   const oldClose = server.close.bind(server);
 81 | 
 82 |   server.close = async () => {
 83 |     await oldClose();
 84 |     await context.close();
 85 |   };
 86 | 
 87 |   // Add the callTool method directly to the server instance
 88 |   (server as any).callTool = async (name: string, args: any) => {
 89 |     // Find the registered handler for CallToolRequestSchema
 90 |     // This relies on the internal structure of the SDK's Server class,
 91 |     // specifically how request handlers are stored. This might be fragile.
 92 |     // A potentially safer approach would be to directly invoke the logic
 93 |     // defined in the setRequestHandler call above.
 94 | 
 95 |     // Let's try invoking the logic directly:
 96 |     const tool = tools.find(tool => tool.schema.name === name);
 97 |     if (!tool) {
 98 |       throw new Error(`Tool "${name}" not found`);
 99 |     }
100 | 
101 |     try {
102 |       // Simulate the result structure expected by the HTTP handler
103 |       const result = await tool.handle(context, args);
104 |       // Assuming tool.handle returns the direct result or throws an error
105 |       // We need to check the structure of 'result' if it matches MCP response format
106 |       if (result && (result as any).isError) {
107 |          // Attempt to extract a meaningful error message
108 |          const errorContent = (result as any).content?.find((c: any) => c.type === 'text')?.text;
109 |          throw new Error(errorContent || `Tool "${name}" execution failed`);
110 |       }
111 |       return result; // Return the direct result
112 |     } catch (error: any) {
113 |       // Re-throw the error to be caught by the HTTP handler
114 |       throw new Error(String(error.message || error));
115 |     }
116 |   };
117 | 
118 |   // Attach the actual tools array to the server instance for later retrieval
119 |   (server as any)._registeredTools = tools;
120 | 
121 |   return server;
122 | }
123 | 
124 | export class ServerList {
125 |   private _servers: Server[] = [];
126 |   // Update factory function type to accept optional sessionId and return a Promise
127 |   private _serverFactory: (sessionId?: string) => Promise<Server>;
128 | 
129 |   constructor(serverFactory: (sessionId?: string) => Promise<Server>) {
130 |     this._serverFactory = serverFactory;
131 |   }
132 | 
133 |   // Update create method to accept optional sessionId
134 |   async create(sessionId?: string) {
135 |     // Call the factory with the sessionId
136 |     const server = await this._serverFactory(sessionId);
137 |     this._servers.push(server);
138 |     return server;
139 |   }
140 | 
141 |   async close(server: Server) {
142 |     const index = this._servers.indexOf(server);
143 |     if (index !== -1)
144 |       this._servers.splice(index, 1);
145 |     await server.close();
146 |   }
147 | 
148 |   async closeAll() {
149 |     await Promise.all(this._servers.map(server => server.close()));
150 |   }
151 | }
152 | 


--------------------------------------------------------------------------------
/src/tools/common.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (c) Microsoft Corporation.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | import { z } from 'zod';
18 | import { zodToJsonSchema } from 'zod-to-json-schema';
19 | 
20 | import type { Tool } from './tool';
21 | 
22 | const waitSchema = z.object({
23 |   time: z.number().describe('The time to wait in seconds'),
24 | });
25 | 
26 | const wait: Tool = {
27 |   capability: 'wait',
28 |   schema: {
29 |     name: 'browser_wait',
30 |     description: 'Wait for a specified time in seconds',
31 |     inputSchema: zodToJsonSchema(waitSchema),
32 |   },
33 |   handle: async (context, params) => {
34 |     const validatedParams = waitSchema.parse(params);
35 |     await new Promise(f => setTimeout(f, Math.min(10000, validatedParams.time * 1000)));
36 |     return {
37 |       content: [{
38 |         type: 'text',
39 |         text: `Waited for ${validatedParams.time} seconds`,
40 |       }],
41 |     };
42 |   },
43 | };
44 | 
45 | const closeSchema = z.object({});
46 | 
47 | const close: Tool = {
48 |   capability: 'core',
49 |   schema: {
50 |     name: 'browser_close',
51 |     description: 'Close the page',
52 |     inputSchema: zodToJsonSchema(closeSchema),
53 |   },
54 |   handle: async context => {
55 |     await context.close();
56 |     return {
57 |       content: [{
58 |         type: 'text',
59 |         text: `Page closed`,
60 |       }],
61 |     };
62 |   },
63 | };
64 | 
65 | export default [
66 |   close,
67 |   wait,
68 | ];
69 | 


--------------------------------------------------------------------------------
/src/tools/extractContent.ts:
--------------------------------------------------------------------------------
  1 | import { Page } from 'playwright';
  2 | import { JSDOM } from 'jsdom';
  3 | // @ts-ignore - No official types for readability
  4 | import { Readability } from '@mozilla/readability';
  5 | import TurndownService from 'turndown';
  6 | import { z } from 'zod';
  7 | import { zodToJsonSchema } from 'zod-to-json-schema';
  8 | import type { ToolFactory, ToolResult, ToolSchema } from './tool.js'; // Corrected import
  9 | import type { Context } from '../context.js'; // Corrected import
 10 | 
 11 | // Define the input schema using Zod
 12 | const ExtractContentInputSchema = z.object({
 13 |   url: z.string().url().describe('The URL of the web page to extract content from.'),
 14 |   // Optional: Add timeout, waitUntil etc. if needed
 15 | });
 16 | 
 17 | // Convert Zod schema to JSON schema for MCP
 18 | const extractContentInputJsonSchema = zodToJsonSchema(ExtractContentInputSchema);
 19 | 
 20 | // Tool Factory function
 21 | const extractContent: ToolFactory = (captureSnapshot) => ({
 22 |   capability: 'core', // Assign a capability
 23 |   schema: { // Define schema property
 24 |     name: 'browser_extract_content',
 25 |     description: 'Navigate to a URL and extract the main readable content as Markdown.',
 26 |     inputSchema: extractContentInputJsonSchema,
 27 |   },
 28 |   // Correct handle signature
 29 |   handle: async (context: Context, params?: Record<string, any>): Promise<ToolResult> => {
 30 |     const { url } = ExtractContentInputSchema.parse(params);
 31 |     const currentTab = await context.ensureTab();
 32 |     const page = currentTab.page;
 33 | 
 34 |     try {
 35 |       console.log(`[browser_extract_content] Navigating to: ${url}`);
 36 |       // Navigate and wait for the page to load
 37 |       await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
 38 |       // Optional: Add a small delay or wait for network idle if needed for dynamic content
 39 |       await page.waitForLoadState('load', { timeout: 5000 }).catch(() => {
 40 |           console.log('[browser_extract_content] Page load timeout after domcontentloaded, proceeding anyway.');
 41 |       });
 42 |       console.log(`[browser_extract_content] Navigation successful.`);
 43 | 
 44 |       // Get page HTML content
 45 |       const html = await page.content();
 46 |       console.log(`[browser_extract_content] Retrieved HTML content (length: ${html.length}).`);
 47 | 
 48 |       if (!html) {
 49 |         throw new Error('Failed to retrieve HTML content from the page.');
 50 |       }
 51 | 
 52 |       // Process content using Readability and Turndown
 53 |       const processedContent = processHtmlContent(html, url);
 54 |       console.log(`[browser_extract_content] Processed content (length: ${processedContent.length}).`);
 55 | 
 56 |       // Return the extracted content directly
 57 |       return {
 58 |         content: [{ type: 'text', text: processedContent }],
 59 |         // isError can be omitted if success
 60 |       };
 61 | 
 62 |     } catch (error: any) {
 63 |       console.error(`[browser_extract_content] Error processing ${url}: ${error.message}`);
 64 |       // Return a structured error message
 65 |       const errorMessage = `<error>Failed to extract content from ${url}: ${error.message}</error>`;
 66 |       return {
 67 |         content: [{ type: 'text', text: errorMessage }],
 68 |         isError: true, // Mark as error
 69 |       };
 70 |     }
 71 |   },
 72 | });
 73 | 
 74 | /**
 75 |  * Helper function to process HTML content using Readability and Turndown.
 76 |  * @param html The HTML content string.
 77 |  * @param url The base URL for resolving relative links (optional).
 78 |  * @returns The processed content as Markdown string.
 79 |  */
 80 | function processHtmlContent(html: string, url?: string): string {
 81 |      try {
 82 |         const dom = new JSDOM(html, { url });
 83 |         // @ts-ignore
 84 |         const reader = new Readability(dom.window.document);
 85 |         const article = reader.parse();
 86 | 
 87 |         if (!article || !article.content) {
 88 |           console.warn('[browser_extract_content] Readability could not extract main content. Returning warning message.');
 89 |           // Return a more informative message instead of empty string
 90 |           return '<warning>Readability could not extract main content from this page.</warning>';
 91 |         }
 92 | 
 93 |         console.log(`[browser_extract_content] Readability extracted content (length: ${article.content.length}).`);
 94 | 
 95 |         // Convert extracted HTML to Markdown
 96 |         const turndownService = new TurndownService({
 97 |             headingStyle: 'atx', // Use '#' for headings
 98 |             codeBlockStyle: 'fenced', // Use ``` for code blocks
 99 |         });
100 |         // Add a rule to handle preformatted text better
101 |         turndownService.addRule('pre', {
102 |             filter: 'pre',
103 |             replacement: function (content, node) {
104 |                 // Trim leading/trailing newlines often added by turndown
105 |                 const code = content.replace(/^\n+|\n+$/g, '');
106 |                 // Attempt to get language from class attribute
107 |                 const language = (node as HTMLElement).getAttribute('class')?.match(/language-(\S+)/)?.[1] || '';
108 |                 return '\n```' + language + '\n' + code + '\n```\n';
109 |             }
110 |         });
111 | 
112 |         const markdown = turndownService.turndown(article.content);
113 | 
114 |         console.log(`[browser_extract_content] Converted to Markdown (length: ${markdown.length}).`);
115 |         // Trim potential excessive newlines from the final markdown
116 |         return markdown.replace(/\n{3,}/g, '\n\n').trim();
117 | 
118 |       } catch (error: any) {
119 |           console.error(`[browser_extract_content] Error during HTML processing: ${error.message}`);
120 |           // Return error message formatted as markdown error
121 |           return `<error>Error processing HTML content: ${error.message}</error>`;
122 |       }
123 | }
124 | 
125 | 
126 | // Export the factory function, likely in an array like other tools
127 | export default (captureSnapshot: boolean) => [
128 |   extractContent(captureSnapshot),
129 | ];


--------------------------------------------------------------------------------
/src/tools/files.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (c) Microsoft Corporation.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | import { z } from 'zod';
18 | import { zodToJsonSchema } from 'zod-to-json-schema';
19 | 
20 | import type { ToolFactory } from './tool';
21 | 
22 | const uploadFileSchema = z.object({
23 |   paths: z.array(z.string()).describe('The absolute paths to the files to upload. Can be a single file or multiple files.'),
24 | });
25 | 
26 | const uploadFile: ToolFactory = captureSnapshot => ({
27 |   capability: 'files',
28 |   schema: {
29 |     name: 'browser_file_upload',
30 |     description: 'Upload one or multiple files',
31 |     inputSchema: zodToJsonSchema(uploadFileSchema),
32 |   },
33 |   handle: async (context, params) => {
34 |     const validatedParams = uploadFileSchema.parse(params);
35 |     const tab = context.currentTab();
36 |     return await tab.runAndWait(async () => {
37 |       await tab.submitFileChooser(validatedParams.paths);
38 |     }, {
39 |       status: `Chose files ${validatedParams.paths.join(', ')}`,
40 |       captureSnapshot,
41 |       noClearFileChooser: true,
42 |     });
43 |   },
44 | });
45 | 
46 | export default (captureSnapshot: boolean) => [
47 |   uploadFile(captureSnapshot),
48 | ];
49 | 


--------------------------------------------------------------------------------
/src/tools/install.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (c) Microsoft Corporation.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | import { fork } from 'child_process';
18 | import path from 'path';
19 | 
20 | import { z } from 'zod';
21 | import { zodToJsonSchema } from 'zod-to-json-schema';
22 | 
23 | import type { Tool } from './tool';
24 | 
25 | const install: Tool = {
26 |   capability: 'install',
27 |   schema: {
28 |     name: 'browser_install',
29 |     description: 'Install the browser specified in the config. Call this if you get an error about the browser not being installed.',
30 |     inputSchema: zodToJsonSchema(z.object({})),
31 |   },
32 | 
33 |   handle: async context => {
34 |     const channel = context.options.launchOptions?.channel ?? context.options.browserName ?? 'chrome';
35 |     const cli = path.join(require.resolve('playwright/package.json'), '..', 'cli.js');
36 |     const child = fork(cli, ['install', channel], {
37 |       stdio: 'pipe',
38 |     });
39 |     const output: string[] = [];
40 |     child.stdout?.on('data', data => output.push(data.toString()));
41 |     child.stderr?.on('data', data => output.push(data.toString()));
42 |     await new Promise<void>((resolve, reject) => {
43 |       child.on('close', code => {
44 |         if (code === 0)
45 |           resolve();
46 |         else
47 |           reject(new Error(`Failed to install browser: ${output.join('')}`));
48 |       });
49 |     });
50 |     return {
51 |       content: [{
52 |         type: 'text',
53 |         text: `Browser ${channel} installed`,
54 |       }],
55 |     };
56 |   },
57 | };
58 | 
59 | export default [
60 |   install,
61 | ];
62 | 


--------------------------------------------------------------------------------
/src/tools/keyboard.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (c) Microsoft Corporation.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | import { z } from 'zod';
18 | import zodToJsonSchema from 'zod-to-json-schema';
19 | 
20 | import type { ToolFactory } from './tool';
21 | 
22 | const pressKeySchema = z.object({
23 |   key: z.string().describe('Name of the key to press or a character to generate, such as `ArrowLeft` or `a`'),
24 | });
25 | 
26 | const pressKey: ToolFactory = captureSnapshot => ({
27 |   capability: 'core',
28 |   schema: {
29 |     name: 'browser_press_key',
30 |     description: 'Press a key on the keyboard',
31 |     inputSchema: zodToJsonSchema(pressKeySchema),
32 |   },
33 |   handle: async (context, params) => {
34 |     const validatedParams = pressKeySchema.parse(params);
35 |     return await context.currentTab().runAndWait(async tab => {
36 |       await tab.page.keyboard.press(validatedParams.key);
37 |     }, {
38 |       status: `Pressed key ${validatedParams.key}`,
39 |       captureSnapshot,
40 |     });
41 |   },
42 | });
43 | 
44 | export default (captureSnapshot: boolean) => [
45 |   pressKey(captureSnapshot),
46 | ];
47 | 


--------------------------------------------------------------------------------
/src/tools/navigate.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (c) Microsoft Corporation.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | import { z } from 'zod';
18 | import { zodToJsonSchema } from 'zod-to-json-schema';
19 | 
20 | import type { ToolFactory } from './tool';
21 | 
22 | const navigateSchema = z.object({
23 |   url: z.string().describe('The URL to navigate to'),
24 | });
25 | 
26 | const navigate: ToolFactory = captureSnapshot => ({
27 |   capability: 'core',
28 |   schema: {
29 |     name: 'browser_navigate',
30 |     description: 'Navigate to a URL',
31 |     inputSchema: zodToJsonSchema(navigateSchema),
32 |   },
33 |   handle: async (context, params) => {
34 |     const validatedParams = navigateSchema.parse(params);
35 |     const currentTab = await context.ensureTab();
36 |     return await currentTab.run(async tab => {
37 |       await tab.navigate(validatedParams.url);
38 |     }, {
39 |       status: `Navigated to ${validatedParams.url}`,
40 |       captureSnapshot,
41 |     });
42 |   },
43 | });
44 | 
45 | const goBackSchema = z.object({});
46 | 
47 | const goBack: ToolFactory = snapshot => ({
48 |   capability: 'history',
49 |   schema: {
50 |     name: 'browser_navigate_back',
51 |     description: 'Go back to the previous page',
52 |     inputSchema: zodToJsonSchema(goBackSchema),
53 |   },
54 |   handle: async context => {
55 |     return await context.currentTab().runAndWait(async tab => {
56 |       await tab.page.goBack();
57 |     }, {
58 |       status: 'Navigated back',
59 |       captureSnapshot: snapshot,
60 |     });
61 |   },
62 | });
63 | 
64 | const goForwardSchema = z.object({});
65 | 
66 | const goForward: ToolFactory = snapshot => ({
67 |   capability: 'history',
68 |   schema: {
69 |     name: 'browser_navigate_forward',
70 |     description: 'Go forward to the next page',
71 |     inputSchema: zodToJsonSchema(goForwardSchema),
72 |   },
73 |   handle: async context => {
74 |     return await context.currentTab().runAndWait(async tab => {
75 |       await tab.page.goForward();
76 |     }, {
77 |       status: 'Navigated forward',
78 |       captureSnapshot: snapshot,
79 |     });
80 |   },
81 | });
82 | 
83 | export default (captureSnapshot: boolean) => [
84 |   navigate(captureSnapshot),
85 |   goBack(captureSnapshot),
86 |   goForward(captureSnapshot),
87 | ];
88 | 


--------------------------------------------------------------------------------
/src/tools/pdf.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (c) Microsoft Corporation.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | import os from 'os';
18 | import path from 'path';
19 | 
20 | import { z } from 'zod';
21 | import { zodToJsonSchema } from 'zod-to-json-schema';
22 | 
23 | import { sanitizeForFilePath } from './utils';
24 | 
25 | import type { Tool } from './tool';
26 | 
27 | const pdfSchema = z.object({});
28 | 
29 | const pdf: Tool = {
30 |   capability: 'pdf',
31 |   schema: {
32 |     name: 'browser_pdf_save',
33 |     description: 'Save page as PDF',
34 |     inputSchema: zodToJsonSchema(pdfSchema),
35 |   },
36 |   handle: async context => {
37 |     const tab = context.currentTab();
38 |     const fileName = path.join(os.tmpdir(), sanitizeForFilePath(`page-${new Date().toISOString()}`)) + '.pdf';
39 |     await tab.page.pdf({ path: fileName });
40 |     return {
41 |       content: [{
42 |         type: 'text',
43 |         text: `Saved as ${fileName}`,
44 |       }],
45 |     };
46 |   },
47 | };
48 | 
49 | export default [
50 |   pdf,
51 | ];
52 | 


--------------------------------------------------------------------------------
/src/tools/screen.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (c) Microsoft Corporation.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  * http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | import { z } from 'zod';
 18 | import { zodToJsonSchema } from 'zod-to-json-schema';
 19 | 
 20 | import type { Tool } from './tool';
 21 | 
 22 | const screenshot: Tool = {
 23 |   capability: 'core',
 24 |   schema: {
 25 |     name: 'browser_screen_capture',
 26 |     description: 'Take a screenshot of the current page',
 27 |     inputSchema: zodToJsonSchema(z.object({})),
 28 |   },
 29 | 
 30 |   handle: async context => {
 31 |     const tab = context.currentTab();
 32 |     const screenshot = await tab.page.screenshot({ type: 'jpeg', quality: 50, scale: 'css' });
 33 |     return {
 34 |       content: [{ type: 'image', data: screenshot.toString('base64'), mimeType: 'image/jpeg' }],
 35 |     };
 36 |   },
 37 | };
 38 | 
 39 | const elementSchema = z.object({
 40 |   element: z.string().describe('Human-readable element description used to obtain permission to interact with the element'),
 41 | });
 42 | 
 43 | const moveMouseSchema = elementSchema.extend({
 44 |   x: z.number().describe('X coordinate'),
 45 |   y: z.number().describe('Y coordinate'),
 46 | });
 47 | 
 48 | const moveMouse: Tool = {
 49 |   capability: 'core',
 50 |   schema: {
 51 |     name: 'browser_screen_move_mouse',
 52 |     description: 'Move mouse to a given position',
 53 |     inputSchema: zodToJsonSchema(moveMouseSchema),
 54 |   },
 55 | 
 56 |   handle: async (context, params) => {
 57 |     const validatedParams = moveMouseSchema.parse(params);
 58 |     const tab = context.currentTab();
 59 |     await tab.page.mouse.move(validatedParams.x, validatedParams.y);
 60 |     return {
 61 |       content: [{ type: 'text', text: `Moved mouse to (${validatedParams.x}, ${validatedParams.y})` }],
 62 |     };
 63 |   },
 64 | };
 65 | 
 66 | const clickSchema = elementSchema.extend({
 67 |   x: z.number().describe('X coordinate'),
 68 |   y: z.number().describe('Y coordinate'),
 69 | });
 70 | 
 71 | const click: Tool = {
 72 |   capability: 'core',
 73 |   schema: {
 74 |     name: 'browser_screen_click',
 75 |     description: 'Click left mouse button',
 76 |     inputSchema: zodToJsonSchema(clickSchema),
 77 |   },
 78 | 
 79 |   handle: async (context, params) => {
 80 |     return await context.currentTab().runAndWait(async tab => {
 81 |       const validatedParams = clickSchema.parse(params);
 82 |       await tab.page.mouse.move(validatedParams.x, validatedParams.y);
 83 |       await tab.page.mouse.down();
 84 |       await tab.page.mouse.up();
 85 |     }, {
 86 |       status: 'Clicked mouse',
 87 |     });
 88 |   },
 89 | };
 90 | 
 91 | const dragSchema = elementSchema.extend({
 92 |   startX: z.number().describe('Start X coordinate'),
 93 |   startY: z.number().describe('Start Y coordinate'),
 94 |   endX: z.number().describe('End X coordinate'),
 95 |   endY: z.number().describe('End Y coordinate'),
 96 | });
 97 | 
 98 | const drag: Tool = {
 99 |   capability: 'core',
100 |   schema: {
101 |     name: 'browser_screen_drag',
102 |     description: 'Drag left mouse button',
103 |     inputSchema: zodToJsonSchema(dragSchema),
104 |   },
105 | 
106 |   handle: async (context, params) => {
107 |     const validatedParams = dragSchema.parse(params);
108 |     return await context.currentTab().runAndWait(async tab => {
109 |       await tab.page.mouse.move(validatedParams.startX, validatedParams.startY);
110 |       await tab.page.mouse.down();
111 |       await tab.page.mouse.move(validatedParams.endX, validatedParams.endY);
112 |       await tab.page.mouse.up();
113 |     }, {
114 |       status: `Dragged mouse from (${validatedParams.startX}, ${validatedParams.startY}) to (${validatedParams.endX}, ${validatedParams.endY})`,
115 |     });
116 |   },
117 | };
118 | 
119 | const typeSchema = z.object({
120 |   text: z.string().describe('Text to type into the element'),
121 |   submit: z.boolean().optional().describe('Whether to submit entered text (press Enter after)'),
122 | });
123 | 
124 | const type: Tool = {
125 |   capability: 'core',
126 |   schema: {
127 |     name: 'browser_screen_type',
128 |     description: 'Type text',
129 |     inputSchema: zodToJsonSchema(typeSchema),
130 |   },
131 | 
132 |   handle: async (context, params) => {
133 |     const validatedParams = typeSchema.parse(params);
134 |     return await context.currentTab().runAndWait(async tab => {
135 |       await tab.page.keyboard.type(validatedParams.text);
136 |       if (validatedParams.submit)
137 |         await tab.page.keyboard.press('Enter');
138 |     }, {
139 |       status: `Typed text "${validatedParams.text}"`,
140 |     });
141 |   },
142 | };
143 | 
144 | export default [
145 |   screenshot,
146 |   moveMouse,
147 |   click,
148 |   drag,
149 |   type,
150 | ];
151 | 


--------------------------------------------------------------------------------
/src/tools/snapshot.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (c) Microsoft Corporation.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  * http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | import { z } from 'zod';
 18 | import zodToJsonSchema from 'zod-to-json-schema';
 19 | 
 20 | import type * as playwright from 'playwright';
 21 | import type { Tool } from './tool';
 22 | 
 23 | const snapshot: Tool = {
 24 |   capability: 'core',
 25 |   schema: {
 26 |     name: 'browser_snapshot',
 27 |     description: 'Capture accessibility snapshot of the current page, this is better than screenshot',
 28 |     inputSchema: zodToJsonSchema(z.object({})),
 29 |   },
 30 | 
 31 |   handle: async context => {
 32 |     // Use ensureTab() instead of currentTab() to guarantee a tab exists
 33 |     const tab = await context.ensureTab();
 34 |     return await tab.run(async () => {}, { captureSnapshot: true });
 35 |   },
 36 | };
 37 | 
 38 | const elementSchema = z.object({
 39 |   element: z.string().describe('Human-readable element description used to obtain permission to interact with the element'),
 40 |   ref: z.string().describe('Exact target element reference from the page snapshot'),
 41 | });
 42 | 
 43 | const click: Tool = {
 44 |   capability: 'core',
 45 |   schema: {
 46 |     name: 'browser_click',
 47 |     description: 'Perform click on a web page',
 48 |     inputSchema: zodToJsonSchema(elementSchema),
 49 |   },
 50 | 
 51 |   handle: async (context, params) => {
 52 |     const validatedParams = elementSchema.parse(params);
 53 |     return await context.currentTab().runAndWaitWithSnapshot(async tab => {
 54 |       const locator = tab.lastSnapshot().refLocator(validatedParams.ref);
 55 |       await locator.click();
 56 |     }, {
 57 |       status: `Clicked "${validatedParams.element}"`,
 58 |     });
 59 |   },
 60 | };
 61 | 
 62 | const dragSchema = z.object({
 63 |   startElement: z.string().describe('Human-readable source element description used to obtain the permission to interact with the element'),
 64 |   startRef: z.string().describe('Exact source element reference from the page snapshot'),
 65 |   endElement: z.string().describe('Human-readable target element description used to obtain the permission to interact with the element'),
 66 |   endRef: z.string().describe('Exact target element reference from the page snapshot'),
 67 | });
 68 | 
 69 | const drag: Tool = {
 70 |   capability: 'core',
 71 |   schema: {
 72 |     name: 'browser_drag',
 73 |     description: 'Perform drag and drop between two elements',
 74 |     inputSchema: zodToJsonSchema(dragSchema),
 75 |   },
 76 | 
 77 |   handle: async (context, params) => {
 78 |     const validatedParams = dragSchema.parse(params);
 79 |     return await context.currentTab().runAndWaitWithSnapshot(async tab => {
 80 |       const startLocator = tab.lastSnapshot().refLocator(validatedParams.startRef);
 81 |       const endLocator = tab.lastSnapshot().refLocator(validatedParams.endRef);
 82 |       await startLocator.dragTo(endLocator);
 83 |     }, {
 84 |       status: `Dragged "${validatedParams.startElement}" to "${validatedParams.endElement}"`,
 85 |     });
 86 |   },
 87 | };
 88 | 
 89 | const hover: Tool = {
 90 |   capability: 'core',
 91 |   schema: {
 92 |     name: 'browser_hover',
 93 |     description: 'Hover over element on page',
 94 |     inputSchema: zodToJsonSchema(elementSchema),
 95 |   },
 96 | 
 97 |   handle: async (context, params) => {
 98 |     const validatedParams = elementSchema.parse(params);
 99 |     return await context.currentTab().runAndWaitWithSnapshot(async tab => {
100 |       const locator = tab.lastSnapshot().refLocator(validatedParams.ref);
101 |       await locator.hover();
102 |     }, {
103 |       status: `Hovered over "${validatedParams.element}"`,
104 |     });
105 |   },
106 | };
107 | 
108 | const typeSchema = elementSchema.extend({
109 |   text: z.string().describe('Text to type into the element'),
110 |   submit: z.boolean().optional().describe('Whether to submit entered text (press Enter after)'),
111 |   slowly: z.boolean().optional().describe('Whether to type one character at a time. Useful for triggering key handlers in the page. By default entire text is filled in at once.'),
112 | });
113 | 
114 | const type: Tool = {
115 |   capability: 'core',
116 |   schema: {
117 |     name: 'browser_type',
118 |     description: 'Type text into editable element',
119 |     inputSchema: zodToJsonSchema(typeSchema),
120 |   },
121 | 
122 |   handle: async (context, params) => {
123 |     const validatedParams = typeSchema.parse(params);
124 |     return await context.currentTab().runAndWaitWithSnapshot(async tab => {
125 |       const locator = tab.lastSnapshot().refLocator(validatedParams.ref);
126 |       if (validatedParams.slowly)
127 |         await locator.pressSequentially(validatedParams.text);
128 |       else
129 |         await locator.fill(validatedParams.text);
130 |       if (validatedParams.submit)
131 |         await locator.press('Enter');
132 |     }, {
133 |       status: `Typed "${validatedParams.text}" into "${validatedParams.element}"`,
134 |     });
135 |   },
136 | };
137 | 
138 | const selectOptionSchema = elementSchema.extend({
139 |   values: z.array(z.string()).describe('Array of values to select in the dropdown. This can be a single value or multiple values.'),
140 | });
141 | 
142 | const selectOption: Tool = {
143 |   capability: 'core',
144 |   schema: {
145 |     name: 'browser_select_option',
146 |     description: 'Select an option in a dropdown',
147 |     inputSchema: zodToJsonSchema(selectOptionSchema),
148 |   },
149 | 
150 |   handle: async (context, params) => {
151 |     const validatedParams = selectOptionSchema.parse(params);
152 |     return await context.currentTab().runAndWaitWithSnapshot(async tab => {
153 |       const locator = tab.lastSnapshot().refLocator(validatedParams.ref);
154 |       await locator.selectOption(validatedParams.values);
155 |     }, {
156 |       status: `Selected option in "${validatedParams.element}"`,
157 |     });
158 |   },
159 | };
160 | 
161 | const screenshotSchema = z.object({
162 |   raw: z.boolean().optional().describe('Whether to return without compression (in PNG format). Default is false, which returns a JPEG image.'),
163 | });
164 | 
165 | const screenshot: Tool = {
166 |   capability: 'core',
167 |   schema: {
168 |     name: 'browser_take_screenshot',
169 |     description: `Take a screenshot of the current page. You can't perform actions based on the screenshot, use browser_snapshot for actions.`,
170 |     inputSchema: zodToJsonSchema(screenshotSchema),
171 |   },
172 | 
173 |   handle: async (context, params) => {
174 |     const validatedParams = screenshotSchema.parse(params);
175 |     const tab = context.currentTab();
176 |     const options: playwright.PageScreenshotOptions = validatedParams.raw ? { type: 'png', scale: 'css' } : { type: 'jpeg', quality: 50, scale: 'css' };
177 |     const screenshot = await tab.page.screenshot(options);
178 |     return {
179 |       content: [{ type: 'image', data: screenshot.toString('base64'), mimeType: validatedParams.raw ? 'image/png' : 'image/jpeg' }],
180 |     };
181 |   },
182 | };
183 | 
184 | export default [
185 |   snapshot,
186 |   click,
187 |   drag,
188 |   hover,
189 |   type,
190 |   selectOption,
191 |   screenshot,
192 | ];
193 | 


--------------------------------------------------------------------------------
/src/tools/tabs.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (c) Microsoft Corporation.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  * http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | import { z } from 'zod';
 18 | import { zodToJsonSchema } from 'zod-to-json-schema';
 19 | 
 20 | import type { ToolFactory, Tool } from './tool';
 21 | 
 22 | const listTabs: Tool = {
 23 |   capability: 'tabs',
 24 |   schema: {
 25 |     name: 'browser_tab_list',
 26 |     description: 'List browser tabs',
 27 |     inputSchema: zodToJsonSchema(z.object({})),
 28 |   },
 29 |   handle: async context => {
 30 |     return {
 31 |       content: [{
 32 |         type: 'text',
 33 |         text: await context.listTabs(),
 34 |       }],
 35 |     };
 36 |   },
 37 | };
 38 | 
 39 | const selectTabSchema = z.object({
 40 |   index: z.number().describe('The index of the tab to select'),
 41 | });
 42 | 
 43 | const selectTab: ToolFactory = captureSnapshot => ({
 44 |   capability: 'tabs',
 45 |   schema: {
 46 |     name: 'browser_tab_select',
 47 |     description: 'Select a tab by index',
 48 |     inputSchema: zodToJsonSchema(selectTabSchema),
 49 |   },
 50 |   handle: async (context, params) => {
 51 |     const validatedParams = selectTabSchema.parse(params);
 52 |     await context.selectTab(validatedParams.index);
 53 |     const currentTab = await context.ensureTab();
 54 |     return await currentTab.run(async () => {}, { captureSnapshot });
 55 |   },
 56 | });
 57 | 
 58 | const newTabSchema = z.object({
 59 |   url: z.string().optional().describe('The URL to navigate to in the new tab. If not provided, the new tab will be blank.'),
 60 | });
 61 | 
 62 | const newTab: Tool = {
 63 |   capability: 'tabs',
 64 |   schema: {
 65 |     name: 'browser_tab_new',
 66 |     description: 'Open a new tab',
 67 |     inputSchema: zodToJsonSchema(newTabSchema),
 68 |   },
 69 |   handle: async (context, params) => {
 70 |     const validatedParams = newTabSchema.parse(params);
 71 |     await context.newTab();
 72 |     if (validatedParams.url)
 73 |       await context.currentTab().navigate(validatedParams.url);
 74 |     return await context.currentTab().run(async () => {}, { captureSnapshot: true });
 75 |   },
 76 | };
 77 | 
 78 | const closeTabSchema = z.object({
 79 |   index: z.number().optional().describe('The index of the tab to close. Closes current tab if not provided.'),
 80 | });
 81 | 
 82 | const closeTab: ToolFactory = captureSnapshot => ({
 83 |   capability: 'tabs',
 84 |   schema: {
 85 |     name: 'browser_tab_close',
 86 |     description: 'Close a tab',
 87 |     inputSchema: zodToJsonSchema(closeTabSchema),
 88 |   },
 89 |   handle: async (context, params) => {
 90 |     const validatedParams = closeTabSchema.parse(params);
 91 |     await context.closeTab(validatedParams.index);
 92 |     const currentTab = await context.currentTab();
 93 |     if (currentTab)
 94 |       return await currentTab.run(async () => {}, { captureSnapshot });
 95 |     return {
 96 |       content: [{
 97 |         type: 'text',
 98 |         text: await context.listTabs(),
 99 |       }],
100 |     };
101 |   },
102 | });
103 | 
104 | export default (captureSnapshot: boolean) => [
105 |   listTabs,
106 |   newTab,
107 |   selectTab(captureSnapshot),
108 |   closeTab(captureSnapshot),
109 | ];
110 | 


--------------------------------------------------------------------------------
/src/tools/tool.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (c) Microsoft Corporation.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | import type { ImageContent, TextContent } from '@modelcontextprotocol/sdk/types';
18 | import type { JsonSchema7Type } from 'zod-to-json-schema';
19 | import type { Context } from '../context';
20 | 
21 | export type ToolCapability = 'core' | 'tabs' | 'pdf' | 'history' | 'wait' | 'files' | 'install';
22 | 
23 | export type ToolSchema = {
24 |   name: string;
25 |   description: string;
26 |   inputSchema: JsonSchema7Type;
27 | };
28 | 
29 | export type ToolResult = {
30 |   content: (ImageContent | TextContent)[];
31 |   isError?: boolean;
32 | };
33 | 
34 | export type Tool = {
35 |   capability: ToolCapability;
36 |   schema: ToolSchema;
37 |   handle: (context: Context, params?: Record<string, any>) => Promise<ToolResult>;
38 | };
39 | 
40 | export type ToolFactory = (snapshot: boolean) => Tool;
41 | 


--------------------------------------------------------------------------------
/src/tools/utils.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (c) Microsoft Corporation.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | import type * as playwright from 'playwright';
18 | 
19 | export async function waitForCompletion<R>(page: playwright.Page, callback: () => Promise<R>): Promise<R> {
20 |   const requests = new Set<playwright.Request>();
21 |   let frameNavigated = false;
22 |   let waitCallback: () => void = () => {};
23 |   const waitBarrier = new Promise<void>(f => { waitCallback = f; });
24 | 
25 |   const requestListener = (request: playwright.Request) => requests.add(request);
26 |   const requestFinishedListener = (request: playwright.Request) => {
27 |     requests.delete(request);
28 |     if (!requests.size)
29 |       waitCallback();
30 |   };
31 | 
32 |   const frameNavigateListener = (frame: playwright.Frame) => {
33 |     if (frame.parentFrame())
34 |       return;
35 |     frameNavigated = true;
36 |     dispose();
37 |     clearTimeout(timeout);
38 |     void frame.waitForLoadState('load').then(() => {
39 |       waitCallback();
40 |     });
41 |   };
42 | 
43 |   const onTimeout = () => {
44 |     dispose();
45 |     waitCallback();
46 |   };
47 | 
48 |   page.on('request', requestListener);
49 |   page.on('requestfinished', requestFinishedListener);
50 |   page.on('framenavigated', frameNavigateListener);
51 |   const timeout = setTimeout(onTimeout, 10000);
52 | 
53 |   const dispose = () => {
54 |     page.off('request', requestListener);
55 |     page.off('requestfinished', requestFinishedListener);
56 |     page.off('framenavigated', frameNavigateListener);
57 |     clearTimeout(timeout);
58 |   };
59 | 
60 |   try {
61 |     const result = await callback();
62 |     if (!requests.size && !frameNavigated)
63 |       waitCallback();
64 |     await waitBarrier;
65 |     await page.evaluate(() => new Promise(f => setTimeout(f, 1000)));
66 |     return result;
67 |   } finally {
68 |     dispose();
69 |   }
70 | }
71 | 
72 | export function sanitizeForFilePath(s: string) {
73 |   return s.replace(/[\x00-\x2C\x2E-\x2F\x3A-\x40\x5B-\x60\x7B-\x7F]+/g, '-');
74 | }
75 | 


--------------------------------------------------------------------------------
/tests/basic.spec.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (c) Microsoft Corporation.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  * http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | import fs from 'fs/promises';
 18 | import { test, expect } from './fixtures';
 19 | 
 20 | test('browser_navigate', async ({ client }) => {
 21 |   expect(await client.callTool({
 22 |     name: 'browser_navigate',
 23 |     arguments: {
 24 |       url: 'data:text/html,<html><title>Title</title><body>Hello, world!</body></html>',
 25 |     },
 26 |   })).toHaveTextContent(`
 27 | Navigated to data:text/html,<html><title>Title</title><body>Hello, world!</body></html>
 28 | 
 29 | - Page URL: data:text/html,<html><title>Title</title><body>Hello, world!</body></html>
 30 | - Page Title: Title
 31 | - Page Snapshot
 32 | \`\`\`yaml
 33 | - text: Hello, world!
 34 | \`\`\`
 35 | `
 36 |   );
 37 | });
 38 | 
 39 | test('browser_click', async ({ client }) => {
 40 |   await client.callTool({
 41 |     name: 'browser_navigate',
 42 |     arguments: {
 43 |       url: 'data:text/html,<html><title>Title</title><button>Submit</button></html>',
 44 |     },
 45 |   });
 46 | 
 47 |   expect(await client.callTool({
 48 |     name: 'browser_click',
 49 |     arguments: {
 50 |       element: 'Submit button',
 51 |       ref: 's1e3',
 52 |     },
 53 |   })).toHaveTextContent(`Clicked "Submit button"
 54 | 
 55 | - Page URL: data:text/html,<html><title>Title</title><button>Submit</button></html>
 56 | - Page Title: Title
 57 | - Page Snapshot
 58 | \`\`\`yaml
 59 | - button "Submit" [ref=s2e3]
 60 | \`\`\`
 61 | `);
 62 | });
 63 | 
 64 | 
 65 | test('browser_select_option', async ({ client }) => {
 66 |   await client.callTool({
 67 |     name: 'browser_navigate',
 68 |     arguments: {
 69 |       url: 'data:text/html,<html><title>Title</title><select><option value="foo">Foo</option><option value="bar">Bar</option></select></html>',
 70 |     },
 71 |   });
 72 | 
 73 |   expect(await client.callTool({
 74 |     name: 'browser_select_option',
 75 |     arguments: {
 76 |       element: 'Select',
 77 |       ref: 's1e3',
 78 |       values: ['bar'],
 79 |     },
 80 |   })).toHaveTextContent(`Selected option in "Select"
 81 | 
 82 | - Page URL: data:text/html,<html><title>Title</title><select><option value="foo">Foo</option><option value="bar">Bar</option></select></html>
 83 | - Page Title: Title
 84 | - Page Snapshot
 85 | \`\`\`yaml
 86 | - combobox [ref=s2e3]:
 87 |     - option "Foo" [ref=s2e4]
 88 |     - option "Bar" [selected] [ref=s2e5]
 89 | \`\`\`
 90 | `);
 91 | });
 92 | 
 93 | test('browser_select_option (multiple)', async ({ client }) => {
 94 |   await client.callTool({
 95 |     name: 'browser_navigate',
 96 |     arguments: {
 97 |       url: 'data:text/html,<html><title>Title</title><select multiple><option value="foo">Foo</option><option value="bar">Bar</option><option value="baz">Baz</option></select></html>',
 98 |     },
 99 |   });
100 | 
101 |   expect(await client.callTool({
102 |     name: 'browser_select_option',
103 |     arguments: {
104 |       element: 'Select',
105 |       ref: 's1e3',
106 |       values: ['bar', 'baz'],
107 |     },
108 |   })).toHaveTextContent(`Selected option in "Select"
109 | 
110 | - Page URL: data:text/html,<html><title>Title</title><select multiple><option value="foo">Foo</option><option value="bar">Bar</option><option value="baz">Baz</option></select></html>
111 | - Page Title: Title
112 | - Page Snapshot
113 | \`\`\`yaml
114 | - listbox [ref=s2e3]:
115 |     - option "Foo" [ref=s2e4]
116 |     - option "Bar" [selected] [ref=s2e5]
117 |     - option "Baz" [selected] [ref=s2e6]
118 | \`\`\`
119 | `);
120 | });
121 | 
122 | test('browser_file_upload', async ({ client }) => {
123 |   expect(await client.callTool({
124 |     name: 'browser_navigate',
125 |     arguments: {
126 |       url: 'data:text/html,<html><title>Title</title><input type="file" /><button>Button</button></html>',
127 |     },
128 |   })).toContainTextContent('- textbox [ref=s1e3]');
129 | 
130 |   expect(await client.callTool({
131 |     name: 'browser_click',
132 |     arguments: {
133 |       element: 'Textbox',
134 |       ref: 's1e3',
135 |     },
136 |   })).toContainTextContent('There is a file chooser visible that requires browser_file_upload to be called');
137 | 
138 |   const filePath = test.info().outputPath('test.txt');
139 |   await fs.writeFile(filePath, 'Hello, world!');
140 | 
141 |   {
142 |     const response = await client.callTool({
143 |       name: 'browser_file_upload',
144 |       arguments: {
145 |         paths: [filePath],
146 |       },
147 |     });
148 | 
149 |     expect(response).not.toContainTextContent('There is a file chooser visible that requires browser_file_upload to be called');
150 |     expect(response).toContainTextContent('textbox [ref=s3e3]: C:\\fakepath\\test.txt');
151 |   }
152 | 
153 |   {
154 |     const response = await client.callTool({
155 |       name: 'browser_click',
156 |       arguments: {
157 |         element: 'Textbox',
158 |         ref: 's3e3',
159 |       },
160 |     });
161 | 
162 |     expect(response).toContainTextContent('There is a file chooser visible that requires browser_file_upload to be called');
163 |     expect(response).toContainTextContent('button "Button" [ref=s4e4]');
164 |   }
165 | 
166 |   {
167 |     const response = await client.callTool({
168 |       name: 'browser_click',
169 |       arguments: {
170 |         element: 'Button',
171 |         ref: 's4e4',
172 |       },
173 |     });
174 | 
175 |     expect(response, 'not submitting browser_file_upload dismisses file chooser').not.toContainTextContent('There is a file chooser visible that requires browser_file_upload to be called');
176 |   }
177 | });
178 | 
179 | test('browser_type', async ({ client }) => {
180 |   await client.callTool({
181 |     name: 'browser_navigate',
182 |     arguments: {
183 |       url: `data:text/html,<input type='keypress' onkeypress="console.log('Key pressed:', event.key, ', Text:', event.target.value)"></input>`,
184 |     },
185 |   });
186 |   await client.callTool({
187 |     name: 'browser_type',
188 |     arguments: {
189 |       element: 'textbox',
190 |       ref: 's1e3',
191 |       text: 'Hi!',
192 |       submit: true,
193 |     },
194 |   });
195 |   const resource = await client.readResource({
196 |     uri: 'browser://console',
197 |   });
198 |   expect(resource.contents).toEqual([{
199 |     uri: 'browser://console',
200 |     mimeType: 'text/plain',
201 |     text: '[LOG] Key pressed: Enter , Text: Hi!',
202 |   }]);
203 | });
204 | 
205 | test('browser_type (slowly)', async ({ client }) => {
206 |   await client.callTool({
207 |     name: 'browser_navigate',
208 |     arguments: {
209 |       url: `data:text/html,<input type='text' onkeydown="console.log('Key pressed:', event.key, 'Text:', event.target.value)"></input>`,
210 |     },
211 |   });
212 |   await client.callTool({
213 |     name: 'browser_type',
214 |     arguments: {
215 |       element: 'textbox',
216 |       ref: 's1e3',
217 |       text: 'Hi!',
218 |       submit: true,
219 |       slowly: true,
220 |     },
221 |   });
222 |   const resource = await client.readResource({
223 |     uri: 'browser://console',
224 |   });
225 |   expect(resource.contents).toEqual([{
226 |     uri: 'browser://console',
227 |     mimeType: 'text/plain',
228 |     text: [
229 |       '[LOG] Key pressed: H Text: ',
230 |       '[LOG] Key pressed: i Text: H',
231 |       '[LOG] Key pressed: ! Text: Hi',
232 |       '[LOG] Key pressed: Enter Text: Hi!',
233 |     ].join('\n'),
234 |   }]);
235 | });
236 | 


--------------------------------------------------------------------------------
/tests/capabilities.spec.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (c) Microsoft Corporation.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | import { test, expect } from './fixtures';
18 | 
19 | test('test snapshot tool list', async ({ client }) => {
20 |   const { tools } = await client.listTools();
21 |   expect(new Set(tools.map(t => t.name))).toEqual(new Set([
22 |     'browser_click',
23 |     'browser_drag',
24 |     'browser_file_upload',
25 |     'browser_hover',
26 |     'browser_select_option',
27 |     'browser_type',
28 |     'browser_close',
29 |     'browser_install',
30 |     'browser_navigate_back',
31 |     'browser_navigate_forward',
32 |     'browser_navigate',
33 |     'browser_pdf_save',
34 |     'browser_press_key',
35 |     'browser_snapshot',
36 |     'browser_tab_close',
37 |     'browser_tab_list',
38 |     'browser_tab_new',
39 |     'browser_tab_select',
40 |     'browser_take_screenshot',
41 |     'browser_wait',
42 |   ]));
43 | });
44 | 
45 | test('test vision tool list', async ({ visionClient }) => {
46 |   const { tools: visionTools } = await visionClient.listTools();
47 |   expect(new Set(visionTools.map(t => t.name))).toEqual(new Set([
48 |     'browser_close',
49 |     'browser_file_upload',
50 |     'browser_install',
51 |     'browser_navigate_back',
52 |     'browser_navigate_forward',
53 |     'browser_navigate',
54 |     'browser_pdf_save',
55 |     'browser_press_key',
56 |     'browser_screen_capture',
57 |     'browser_screen_click',
58 |     'browser_screen_drag',
59 |     'browser_screen_move_mouse',
60 |     'browser_screen_type',
61 |     'browser_tab_close',
62 |     'browser_tab_list',
63 |     'browser_tab_new',
64 |     'browser_tab_select',
65 |     'browser_wait',
66 |   ]));
67 | });
68 | 
69 | test('test resources list', async ({ client }) => {
70 |   const { resources } = await client.listResources();
71 |   expect(resources).toEqual([
72 |     expect.objectContaining({
73 |       uri: 'browser://console',
74 |       mimeType: 'text/plain',
75 |     }),
76 |   ]);
77 | });
78 | 
79 | test('test capabilities', async ({ startClient }) => {
80 |   const client = await startClient({
81 |     args: ['--caps="core"'],
82 |   });
83 |   const { tools } = await client.listTools();
84 |   const toolNames = tools.map(t => t.name);
85 |   expect(toolNames).not.toContain('browser_file_upload');
86 |   expect(toolNames).not.toContain('browser_pdf_save');
87 |   expect(toolNames).not.toContain('browser_screen_capture');
88 |   expect(toolNames).not.toContain('browser_screen_click');
89 |   expect(toolNames).not.toContain('browser_screen_drag');
90 |   expect(toolNames).not.toContain('browser_screen_move_mouse');
91 |   expect(toolNames).not.toContain('browser_screen_type');
92 | });
93 | 


--------------------------------------------------------------------------------
/tests/cdp.spec.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (c) Microsoft Corporation.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | import { test, expect } from './fixtures';
18 | 
19 | test('cdp server', async ({ cdpEndpoint, startClient }) => {
20 |   const client = await startClient({ args: [`--cdp-endpoint=${cdpEndpoint}`] });
21 |   expect(await client.callTool({
22 |     name: 'browser_navigate',
23 |     arguments: {
24 |       url: 'data:text/html,<html><title>Title</title><body>Hello, world!</body></html>',
25 |     },
26 |   })).toHaveTextContent(`
27 | Navigated to data:text/html,<html><title>Title</title><body>Hello, world!</body></html>
28 | 
29 | - Page URL: data:text/html,<html><title>Title</title><body>Hello, world!</body></html>
30 | - Page Title: Title
31 | - Page Snapshot
32 | \`\`\`yaml
33 | - text: Hello, world!
34 | \`\`\`
35 | `
36 |   );
37 | });
38 | 


--------------------------------------------------------------------------------
/tests/console.spec.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (c) Microsoft Corporation.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | import { test, expect } from './fixtures';
18 | 
19 | test('browser://console', async ({ client }) => {
20 |   await client.callTool({
21 |     name: 'browser_navigate',
22 |     arguments: {
23 |       url: 'data:text/html,<html><script>console.log("Hello, world!");console.error("Error"); </script></html>',
24 |     },
25 |   });
26 | 
27 |   const resource = await client.readResource({
28 |     uri: 'browser://console',
29 |   });
30 |   expect(resource.contents).toEqual([{
31 |     uri: 'browser://console',
32 |     mimeType: 'text/plain',
33 |     text: '[LOG] Hello, world!\n[ERROR] Error',
34 |   }]);
35 | });
36 | 


--------------------------------------------------------------------------------
/tests/fixtures.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (c) Microsoft Corporation.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  * http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | import path from 'path';
 18 | import { chromium } from 'playwright';
 19 | 
 20 | import { test as baseTest, expect as baseExpect } from '@playwright/test';
 21 | import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
 22 | import { Client } from '@modelcontextprotocol/sdk/client/index.js';
 23 | 
 24 | type Fixtures = {
 25 |   client: Client;
 26 |   visionClient: Client;
 27 |   startClient: (options?: { args?: string[] }) => Promise<Client>;
 28 |   wsEndpoint: string;
 29 |   cdpEndpoint: string;
 30 | };
 31 | 
 32 | export const test = baseTest.extend<Fixtures>({
 33 | 
 34 |   client: async ({ startClient }, use) => {
 35 |     await use(await startClient());
 36 |   },
 37 | 
 38 |   visionClient: async ({ startClient }, use) => {
 39 |     await use(await startClient({ args: ['--vision'] }));
 40 |   },
 41 | 
 42 |   startClient: async ({ }, use, testInfo) => {
 43 |     const userDataDir = testInfo.outputPath('user-data-dir');
 44 |     let client: StdioClientTransport | undefined;
 45 | 
 46 |     use(async options => {
 47 |       const args = ['--headless', '--user-data-dir', userDataDir];
 48 |       if (options?.args)
 49 |         args.push(...options.args);
 50 |       const transport = new StdioClientTransport({
 51 |         command: 'node',
 52 |         args: [path.join(__dirname, '../cli.js'), ...args],
 53 |       });
 54 |       const client = new Client({ name: 'test', version: '1.0.0' });
 55 |       await client.connect(transport);
 56 |       await client.ping();
 57 |       return client;
 58 |     });
 59 | 
 60 |     await client?.close();
 61 |   },
 62 | 
 63 |   wsEndpoint: async ({ }, use) => {
 64 |     const browserServer = await chromium.launchServer();
 65 |     await use(browserServer.wsEndpoint());
 66 |     await browserServer.close();
 67 |   },
 68 | 
 69 |   cdpEndpoint: async ({ }, use, testInfo) => {
 70 |     const port = 3200 + (+process.env.TEST_PARALLEL_INDEX!);
 71 |     const browser = await chromium.launchPersistentContext(testInfo.outputPath('user-data-dir'), {
 72 |       channel: 'chrome',
 73 |       args: [`--remote-debugging-port=${port}`],
 74 |     });
 75 |     await use(`http://localhost:${port}`);
 76 |     await browser.close();
 77 |   },
 78 | });
 79 | 
 80 | type Response = Awaited<ReturnType<Client['callTool']>>;
 81 | 
 82 | export const expect = baseExpect.extend({
 83 |   toHaveTextContent(response: Response, content: string | RegExp) {
 84 |     const isNot = this.isNot;
 85 |     try {
 86 |       const text = (response.content as any)[0].text;
 87 |       if (typeof content === 'string') {
 88 |         if (isNot)
 89 |           baseExpect(text.trim()).not.toBe(content.trim());
 90 |         else
 91 |           baseExpect(text.trim()).toBe(content.trim());
 92 |       } else {
 93 |         if (isNot)
 94 |           baseExpect(text).not.toMatch(content);
 95 |         else
 96 |           baseExpect(text).toMatch(content);
 97 |       }
 98 |     } catch (e) {
 99 |       return {
100 |         pass: isNot,
101 |         message: () => e.message,
102 |       };
103 |     }
104 |     return {
105 |       pass: !isNot,
106 |       message: () => ``,
107 |     };
108 |   },
109 | 
110 |   toContainTextContent(response: Response, content: string | string[]) {
111 |     const isNot = this.isNot;
112 |     try {
113 |       content = Array.isArray(content) ? content : [content];
114 |       const texts = (response.content as any).map(c => c.text);
115 |       for (let i = 0; i < texts.length; i++) {
116 |         if (isNot)
117 |           expect(texts[i]).not.toContain(content[i]);
118 |         else
119 |           expect(texts[i]).toContain(content[i]);
120 |       }
121 |     } catch (e) {
122 |       return {
123 |         pass: isNot,
124 |         message: () => e.message,
125 |       };
126 |     }
127 |     return {
128 |       pass: !isNot,
129 |       message: () => ``,
130 |     };
131 |   },
132 | });
133 | 


--------------------------------------------------------------------------------
/tests/httpApi.spec.ts:
--------------------------------------------------------------------------------
  1 | import { test, expect } from '@playwright/test';
  2 | import { spawn, ChildProcessWithoutNullStreams } from 'child_process';
  3 | import getPort from 'get-port'; // Utility to find an available port
  4 | 
  5 | // Helper function to start the server with --http-port
  6 | async function startServer(): Promise<{ serverProcess: ChildProcessWithoutNullStreams, port: number, url: string }> {
  7 |   const port = await getPort();
  8 |   const url = `http://localhost:${port}`;
  9 |   console.log(`Starting HTTP API server on port ${port}...`);
 10 | 
 11 |   // Use node directly to run the compiled cli.js
 12 |   const serverProcess = spawn('node', ['cli.js', '--http-port', String(port)], {
 13 |     stdio: ['pipe', 'pipe', 'pipe'], // Pipe all streams to satisfy ChildProcessWithoutNullStreams type
 14 |     detached: true, // Allows killing the process group
 15 |   });
 16 | 
 17 |   let stderrOutput = '';
 18 |   serverProcess.stderr.on('data', (data) => {
 19 |     stderrOutput += data.toString();
 20 |     console.error(`[Server STDERR]: ${data}`);
 21 |   });
 22 | 
 23 |   // Wait for the server to log the listening message or exit
 24 |   await new Promise<void>((resolve, reject) => {
 25 |     const handleData = (data: Buffer) => {
 26 |       const output = data.toString();
 27 |       console.log(`[Server STDOUT]: ${output}`);
 28 |       if (output.includes(`HTTP API server listening on port ${port}`)) {
 29 |         serverProcess.stdout.removeListener('data', handleData); // Clean up listener
 30 |         serverProcess.stderr.removeListener('data', handleData);
 31 |         resolve();
 32 |       }
 33 |     };
 34 | 
 35 |     const handleExit = (code: number | null) => {
 36 |       reject(new Error(`Server process exited prematurely with code ${code}. Stderr: ${stderrOutput}`));
 37 |     };
 38 | 
 39 |     serverProcess.stdout.on('data', handleData);
 40 |     serverProcess.stderr.on('data', handleData); // Also listen on stderr for potential errors during startup
 41 |     serverProcess.once('exit', handleExit);
 42 | 
 43 |     // Timeout for server start
 44 |     setTimeout(() => {
 45 |       serverProcess.stdout.removeListener('data', handleData);
 46 |       serverProcess.stderr.removeListener('data', handleData);
 47 |       serverProcess.removeListener('exit', handleExit);
 48 |       reject(new Error(`Server failed to start within timeout. Stderr: ${stderrOutput}`));
 49 |     }, 15000); // 15 seconds timeout
 50 |   });
 51 | 
 52 |   console.log(`Server started successfully on ${url}`);
 53 |   return { serverProcess, port, url };
 54 | }
 55 | 
 56 | // Test suite for HTTP API
 57 | test.describe('HTTP API', () => {
 58 |   let serverProcess: ChildProcessWithoutNullStreams;
 59 |   let serverUrl: string;
 60 | 
 61 |   // Start server before all tests in this suite
 62 |   test.beforeAll(async () => {
 63 |     const { serverProcess: proc, url } = await startServer();
 64 |     serverProcess = proc;
 65 |     serverUrl = url;
 66 |   });
 67 | 
 68 |   // Stop server after all tests in this suite
 69 |   test.afterAll(async () => {
 70 |     console.log('Stopping HTTP API server...');
 71 |     if (serverProcess && !serverProcess.killed) {
 72 |       // Kill the process group to ensure child processes are also terminated
 73 |       process.kill(-serverProcess.pid!, 'SIGINT');
 74 |       await new Promise<void>(resolve => serverProcess.once('close', resolve));
 75 |       console.log('Server stopped.');
 76 |     }
 77 |   });
 78 | 
 79 |   // Test case 1: Basic GET request for browser_tab_list
 80 |   test('should handle GET /tools/browser_tab_list', async () => {
 81 |     const response = await fetch(`${serverUrl}/tools/browser_tab_list`);
 82 |     expect(response.status).toBe(200);
 83 | 
 84 |     const body = await response.json();
 85 |     expect(body.success).toBe(true);
 86 |     expect(body.result).toBeDefined();
 87 |     // Since it's a fresh server, expect "No tabs open" or an initial about:blank tab
 88 |     expect(body.result.content[0].type).toBe('text');
 89 |     // The exact text might vary slightly, check for key parts
 90 |     expect(body.result.content[0].text).toContain('tabs');
 91 |   });
 92 | 
 93 |   // Test case 2: Basic POST request for browser_navigate
 94 |   test('should handle POST /tools/browser_navigate', async () => {
 95 |     const sessionId = 'test-nav-session';
 96 |     const targetUrl = 'https://example.com';
 97 | 
 98 |     const response = await fetch(`${serverUrl}/tools/browser_navigate`, {
 99 |       method: 'POST',
100 |       headers: {
101 |         'Content-Type': 'application/json',
102 |         'Session-Id': sessionId,
103 |       },
104 |       body: JSON.stringify({ url: targetUrl }),
105 |     });
106 | 
107 |     expect(response.status).toBe(200);
108 |     const body = await response.json();
109 |     expect(body.success).toBe(true);
110 |     expect(body.result).toBeDefined();
111 |     expect(body.result.content[0].type).toBe('text');
112 |     expect(body.result.content[0].text).toContain(`Navigated to ${targetUrl}`);
113 |     expect(body.result.content[0].text).toContain('Example Domain'); // Check for page title/content
114 |   });
115 | 
116 |    // Test case 3: POST request with session reuse
117 |    test('should reuse session for subsequent POST requests', async () => {
118 |     const sessionId = 'test-reuse-session';
119 |     const url1 = 'https://example.com';
120 |     const url2 = 'about:blank';
121 | 
122 |     // First request (creates session)
123 |     await fetch(`${serverUrl}/tools/browser_navigate`, {
124 |       method: 'POST',
125 |       headers: { 'Content-Type': 'application/json', 'Session-Id': sessionId },
126 |       body: JSON.stringify({ url: url1 }),
127 |     });
128 | 
129 |     // Second request (reuses session)
130 |     const response = await fetch(`${serverUrl}/tools/browser_navigate`, {
131 |       method: 'POST',
132 |       headers: { 'Content-Type': 'application/json', 'Session-Id': sessionId },
133 |       body: JSON.stringify({ url: url2 }),
134 |     });
135 | 
136 |     expect(response.status).toBe(200);
137 |     const body = await response.json();
138 |     expect(body.success).toBe(true);
139 |     expect(body.result.content[0].text).toContain(`Navigated to ${url2}`);
140 |     // Check that it's not the previous page's content
141 |     expect(body.result.content[0].text).not.toContain('Example Domain');
142 |   });
143 | 
144 |   // Test case 4: Error handling for non-existent tool
145 |   test('should return error for non-existent tool', async () => {
146 |     const response = await fetch(`${serverUrl}/tools/invalid_tool_name`, {
147 |       method: 'POST',
148 |       headers: { 'Content-Type': 'application/json' },
149 |       body: JSON.stringify({}),
150 |     });
151 | 
152 |     expect(response.status).toBe(500); // Or 404 if we implement specific check
153 |     const body = await response.json();
154 |     expect(body.success).toBe(false);
155 |     expect(body.error).toContain('Tool "invalid_tool_name" not found');
156 |   });
157 | 
158 |   // Test case 5: Error handling for missing required parameter
159 |   test('should return error for missing required parameter', async () => {
160 |     const response = await fetch(`${serverUrl}/tools/browser_navigate`, {
161 |       method: 'POST',
162 |       headers: { 'Content-Type': 'application/json' },
163 |       body: JSON.stringify({}), // Missing 'url'
164 |     });
165 | 
166 |     expect(response.status).toBe(500);
167 |     const body = await response.json();
168 |     expect(body.success).toBe(false);
169 |     expect(body.error).toContain('Required'); // Check for Zod validation error message
170 |     // Make the check less sensitive to whitespace around brackets/colons
171 |     expect(body.error).toContain('"path":');
172 |     expect(body.error).toContain('"url"');
173 |   });
174 | 
175 |   // Add more tests here for:
176 |   // - Different tools (click, type, snapshot)
177 |   // - Different sessions interacting concurrently (might be harder to test reliably)
178 |   // - GET /tools/browser_tab_list with specific session
179 |   // - Error cases (invalid ref, etc.)
180 | 
181 | });


--------------------------------------------------------------------------------
/tests/iframes.spec.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (c) Microsoft Corporation.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | import { test, expect } from './fixtures';
18 | 
19 | test('stitched aria frames', async ({ client }) => {
20 |   expect(await client.callTool({
21 |     name: 'browser_navigate',
22 |     arguments: {
23 |       url: `data:text/html,<h1>Hello</h1><iframe src="data:text/html,<button>World</button><main><iframe src='data:text/html,<p>Nested</p>'></iframe></main>"></iframe><iframe src="data:text/html,<h1>Should be invisible</h1>" style="display: none;"></iframe>`,
24 |     },
25 |   })).toContainTextContent(`
26 | \`\`\`yaml
27 | - heading "Hello" [level=1] [ref=s1e3]
28 | - iframe [ref=s1e4]:
29 |     - button "World" [ref=f1s1e3]
30 |     - main [ref=f1s1e4]:
31 |         - iframe [ref=f1s1e5]:
32 |             - paragraph [ref=f2s1e3]: Nested
33 | \`\`\`
34 | `);
35 | 
36 |   expect(await client.callTool({
37 |     name: 'browser_click',
38 |     arguments: {
39 |       element: 'World',
40 |       ref: 'f1s1e3',
41 |     },
42 |   })).toContainTextContent('Clicked "World"');
43 | });
44 | 


--------------------------------------------------------------------------------
/tests/launch.spec.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (c) Microsoft Corporation.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | import { test, expect } from './fixtures';
18 | 
19 | test('test reopen browser', async ({ client }) => {
20 |   await client.callTool({
21 |     name: 'browser_navigate',
22 |     arguments: {
23 |       url: 'data:text/html,<html><title>Title</title><body>Hello, world!</body></html>',
24 |     },
25 |   });
26 | 
27 |   expect(await client.callTool({
28 |     name: 'browser_close',
29 |   })).toHaveTextContent('Page closed');
30 | 
31 |   expect(await client.callTool({
32 |     name: 'browser_navigate',
33 |     arguments: {
34 |       url: 'data:text/html,<html><title>Title</title><body>Hello, world!</body></html>',
35 |     },
36 |   })).toHaveTextContent(`
37 | Navigated to data:text/html,<html><title>Title</title><body>Hello, world!</body></html>
38 | 
39 | - Page URL: data:text/html,<html><title>Title</title><body>Hello, world!</body></html>
40 | - Page Title: Title
41 | - Page Snapshot
42 | \`\`\`yaml
43 | - text: Hello, world!
44 | \`\`\`
45 | `);
46 | });
47 | 
48 | test('executable path', async ({ startClient }) => {
49 |   const client = await startClient({ args: [`--executable-path=bogus`] });
50 |   const response = await client.callTool({
51 |     name: 'browser_navigate',
52 |     arguments: {
53 |       url: 'data:text/html,<html><title>Title</title><body>Hello, world!</body></html>',
54 |     },
55 |   });
56 |   expect(response).toContainTextContent(`executable doesn't exist`);
57 | });
58 | 


--------------------------------------------------------------------------------
/tests/pdf.spec.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (c) Microsoft Corporation.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | import { test, expect } from './fixtures';
18 | 
19 | test('save as pdf unavailable', async ({ startClient }) => {
20 |   const client = await startClient({ args: ['--caps="no-pdf"'] });
21 |   await client.callTool({
22 |     name: 'browser_navigate',
23 |     arguments: {
24 |       url: 'data:text/html,<html><title>Title</title><body>Hello, world!</body></html>',
25 |     },
26 |   });
27 | 
28 |   expect(await client.callTool({
29 |     name: 'browser_pdf_save',
30 |   })).toHaveTextContent(/Tool \"browser_pdf_save\" not found/);
31 | });
32 | 
33 | test('save as pdf', async ({ client }) => {
34 |   expect(await client.callTool({
35 |     name: 'browser_navigate',
36 |     arguments: {
37 |       url: 'data:text/html,<html><title>Title</title><body>Hello, world!</body></html>',
38 |     },
39 |   })).toHaveTextContent(`
40 | Navigated to data:text/html,<html><title>Title</title><body>Hello, world!</body></html>
41 | 
42 | - Page URL: data:text/html,<html><title>Title</title><body>Hello, world!</body></html>
43 | - Page Title: Title
44 | - Page Snapshot
45 | \`\`\`yaml
46 | - text: Hello, world!
47 | \`\`\`
48 | `
49 |   );
50 | 
51 |   const response = await client.callTool({
52 |     name: 'browser_pdf_save',
53 |   });
54 |   expect(response).toHaveTextContent(/^Saved as.*page-[^:]+.pdf$/);
55 | });
56 | 


--------------------------------------------------------------------------------
/tests/sse.spec.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (c) Microsoft Corporation.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | import { spawn } from 'node:child_process';
18 | import path from 'node:path';
19 | import { test } from './fixtures';
20 | 
21 | test('sse transport', async () => {
22 |   const cp = spawn('node', [path.join(__dirname, '../cli.js'), '--port', '0'], { stdio: 'pipe' });
23 |   try {
24 |     let stdout = '';
25 |     const url = await new Promise<string>(resolve => cp.stdout?.on('data', data => {
26 |       stdout += data.toString();
27 |       const match = stdout.match(/Listening on (http:\/\/.*)/);
28 |       if (match)
29 |         resolve(match[1]);
30 |     }));
31 | 
32 |     // need dynamic import b/c of some ESM nonsense
33 |     const { SSEClientTransport } = await import('@modelcontextprotocol/sdk/client/sse.js');
34 |     const { Client } = await import('@modelcontextprotocol/sdk/client/index.js');
35 |     const transport = new SSEClientTransport(new URL(url));
36 |     const client = new Client({ name: 'test', version: '1.0.0' });
37 |     await client.connect(transport);
38 |     await client.ping();
39 |   } finally {
40 |     cp.kill();
41 |   }
42 | });
43 | 


--------------------------------------------------------------------------------
/tests/tabs.spec.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (c) Microsoft Corporation.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  * http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | import { chromium } from 'playwright';
 18 | 
 19 | import { test, expect } from './fixtures';
 20 | 
 21 | import type { Client } from '@modelcontextprotocol/sdk/client/index.js';
 22 | 
 23 | async function createTab(client: Client, title: string, body: string) {
 24 |   return await client.callTool({
 25 |     name: 'browser_tab_new',
 26 |     arguments: {
 27 |       url: `data:text/html,<title>${title}</title><body>${body}</body>`,
 28 |     },
 29 |   });
 30 | }
 31 | 
 32 | test('create new tab', async ({ client }) => {
 33 |   expect(await createTab(client, 'Tab one', 'Body one')).toHaveTextContent(`
 34 | Open tabs:
 35 | - 1: [] (about:blank)
 36 | - 2: (current) [Tab one] (data:text/html,<title>Tab one</title><body>Body one</body>)
 37 | 
 38 | Current tab:
 39 | - Page URL: data:text/html,<title>Tab one</title><body>Body one</body>
 40 | - Page Title: Tab one
 41 | - Page Snapshot
 42 | \`\`\`yaml
 43 | - text: Body one
 44 | \`\`\``);
 45 | 
 46 |   expect(await createTab(client, 'Tab two', 'Body two')).toHaveTextContent(`
 47 | Open tabs:
 48 | - 1: [] (about:blank)
 49 | - 2: [Tab one] (data:text/html,<title>Tab one</title><body>Body one</body>)
 50 | - 3: (current) [Tab two] (data:text/html,<title>Tab two</title><body>Body two</body>)
 51 | 
 52 | Current tab:
 53 | - Page URL: data:text/html,<title>Tab two</title><body>Body two</body>
 54 | - Page Title: Tab two
 55 | - Page Snapshot
 56 | \`\`\`yaml
 57 | - text: Body two
 58 | \`\`\``);
 59 | });
 60 | 
 61 | test('select tab', async ({ client }) => {
 62 |   await createTab(client, 'Tab one', 'Body one');
 63 |   await createTab(client, 'Tab two', 'Body two');
 64 |   expect(await client.callTool({
 65 |     name: 'browser_tab_select',
 66 |     arguments: {
 67 |       index: 2,
 68 |     },
 69 |   })).toHaveTextContent(`
 70 | Open tabs:
 71 | - 1: [] (about:blank)
 72 | - 2: (current) [Tab one] (data:text/html,<title>Tab one</title><body>Body one</body>)
 73 | - 3: [Tab two] (data:text/html,<title>Tab two</title><body>Body two</body>)
 74 | 
 75 | Current tab:
 76 | - Page URL: data:text/html,<title>Tab one</title><body>Body one</body>
 77 | - Page Title: Tab one
 78 | - Page Snapshot
 79 | \`\`\`yaml
 80 | - text: Body one
 81 | \`\`\``);
 82 | });
 83 | 
 84 | test('close tab', async ({ client }) => {
 85 |   await createTab(client, 'Tab one', 'Body one');
 86 |   await createTab(client, 'Tab two', 'Body two');
 87 |   expect(await client.callTool({
 88 |     name: 'browser_tab_close',
 89 |     arguments: {
 90 |       index: 3,
 91 |     },
 92 |   })).toHaveTextContent(`
 93 | Open tabs:
 94 | - 1: [] (about:blank)
 95 | - 2: (current) [Tab one] (data:text/html,<title>Tab one</title><body>Body one</body>)
 96 | 
 97 | Current tab:
 98 | - Page URL: data:text/html,<title>Tab one</title><body>Body one</body>
 99 | - Page Title: Tab one
100 | - Page Snapshot
101 | \`\`\`yaml
102 | - text: Body one
103 | \`\`\``);
104 | });
105 | 
106 | test('reuse first tab when navigating', async ({ startClient, cdpEndpoint }) => {
107 |   const browser = await chromium.connectOverCDP(cdpEndpoint);
108 |   const [context] = browser.contexts();
109 |   const pages = context.pages();
110 | 
111 |   const client = await startClient({ args: [`--cdp-endpoint=${cdpEndpoint}`] });
112 |   await client.callTool({
113 |     name: 'browser_navigate',
114 |     arguments: {
115 |       url: 'data:text/html,<title>Title</title><body>Body</body>',
116 |     },
117 |   });
118 | 
119 |   expect(pages.length).toBe(1);
120 |   expect(await pages[0].title()).toBe('Title');
121 | });
122 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ESNext",
 4 |     "skipLibCheck": true,
 5 |     "esModuleInterop": true,
 6 |     "moduleResolution": "node",
 7 |     "strict": true,
 8 |     "module": "CommonJS",
 9 |     "outDir": "./lib"
10 |   },
11 |   "include": [
12 |     "src",
13 |   ],
14 | }
15 | 


--------------------------------------------------------------------------------
/utils/copyright.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (c) Microsoft Corporation.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 


--------------------------------------------------------------------------------