├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── assets ├── browserbase-demo.png ├── browserbase-mcp.png ├── cover-mcp.png ├── cover.png ├── session.png └── stagehand-mcp.png ├── browserbase ├── .npmignore ├── Dockerfile ├── README.md ├── cli.js ├── config.d.ts ├── index.d.ts ├── index.js ├── package-lock.json ├── package.json ├── playwright.config.ts ├── smithery.yaml ├── src │ ├── config.ts │ ├── context.ts │ ├── index.ts │ ├── pageSnapshot.ts │ ├── program.ts │ ├── server.ts │ ├── sessionManager.ts │ ├── tools │ │ ├── common.ts │ │ ├── context.ts │ │ ├── getText.ts │ │ ├── hover.ts │ │ ├── keyboard.ts │ │ ├── navigate.ts │ │ ├── selectOption.ts │ │ ├── session.ts │ │ ├── snapshot.ts │ │ ├── tool.ts │ │ ├── toolUtils.ts │ │ └── utils.ts │ └── transport.ts ├── tests │ └── .gitkeep ├── tsconfig.json └── utils │ └── .gitkeep └── stagehand ├── README.md ├── package-lock.json ├── package.json ├── src ├── index.ts ├── logging.ts ├── prompts.ts ├── resources.ts ├── server.ts ├── tools.ts └── utils.ts └── tsconfig.json /.gitattributes: -------------------------------------------------------------------------------- 1 | package-lock.json linguist-generated=true 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | lerna-debug.log* 8 | .pnpm-debug.log* 9 | 10 | # Diagnostic reports (https://nodejs.org/api/report.html) 11 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 12 | 13 | # Runtime data 14 | pids 15 | *.pid 16 | *.seed 17 | *.pid.lock 18 | 19 | # Directory for instrumented libs generated by jscoverage/JSCover 20 | lib-cov 21 | 22 | # Coverage directory used by tools like istanbul 23 | coverage 24 | *.lcov 25 | 26 | # nyc test coverage 27 | .nyc_output 28 | 29 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 30 | .grunt 31 | 32 | # Bower dependency directory (https://bower.io/) 33 | bower_components 34 | 35 | # node-waf configuration 36 | .lock-wscript 37 | 38 | # Compiled binary addons (https://nodejs.org/api/addons.html) 39 | build/Release 40 | 41 | # Dependency directories 42 | node_modules/ 43 | jspm_packages/ 44 | 45 | # Snowpack dependency directory (https://snowpack.dev/) 46 | web_modules/ 47 | 48 | # TypeScript cache 49 | *.tsbuildinfo 50 | 51 | # Optional npm cache directory 52 | .npm 53 | 54 | # Optional eslint cache 55 | .eslintcache 56 | 57 | # Optional stylelint cache 58 | .stylelintcache 59 | 60 | # Microbundle cache 61 | .rpt2_cache/ 62 | .rts2_cache_cjs/ 63 | .rts2_cache_es/ 64 | .rts2_cache_umd/ 65 | 66 | # Optional REPL history 67 | .node_repl_history 68 | 69 | # Output of 'npm pack' 70 | *.tgz 71 | 72 | # Yarn Integrity file 73 | .yarn-integrity 74 | 75 | # dotenv environment variable files 76 | .env 77 | .env.development.local 78 | .env.test.local 79 | .env.production.local 80 | .env.local 81 | 82 | # parcel-bundler cache (https://parceljs.org/) 83 | .cache 84 | .parcel-cache 85 | 86 | # Next.js build output 87 | .next 88 | out 89 | 90 | # Nuxt.js build / generate output 91 | .nuxt 92 | dist 93 | 94 | # Gatsby files 95 | .cache/ 96 | # Comment in the public line in if your project uses Gatsby and not Next.js 97 | # https://nextjs.org/blog/next-9-1#public-directory-support 98 | # public 99 | 100 | # vuepress build output 101 | .vuepress/dist 102 | 103 | # vuepress v2.x temp and cache directory 104 | .temp 105 | .cache 106 | 107 | # Docusaurus cache and generated files 108 | .docusaurus 109 | 110 | # Serverless directories 111 | .serverless/ 112 | 113 | # FuseBox cache 114 | .fusebox/ 115 | 116 | # DynamoDB Local files 117 | .dynamodb/ 118 | 119 | # TernJS port file 120 | .tern-port 121 | 122 | # Stores VSCode versions used for testing VSCode extensions 123 | .vscode-test 124 | 125 | # yarn v2 126 | .yarn/cache 127 | .yarn/unplugged 128 | .yarn/build-state.yml 129 | .yarn/install-state.gz 130 | .pnp.* 131 | 132 | build/ 133 | 134 | gcp-oauth.keys.json 135 | .*-server-credentials.json 136 | 137 | # Byte-compiled / optimized / DLL files 138 | __pycache__/ 139 | *.py[cod] 140 | *$py.class 141 | 142 | # C extensions 143 | *.so 144 | 145 | # Distribution / packaging 146 | .Python 147 | build/ 148 | develop-eggs/ 149 | dist/ 150 | downloads/ 151 | eggs/ 152 | .eggs/ 153 | lib/ 154 | lib64/ 155 | parts/ 156 | sdist/ 157 | var/ 158 | wheels/ 159 | share/python-wheels/ 160 | *.egg-info/ 161 | .installed.cfg 162 | *.egg 163 | MANIFEST 164 | 165 | # PyInstaller 166 | # Usually these files are written by a python script from a template 167 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 168 | *.manifest 169 | *.spec 170 | 171 | # Installer logs 172 | pip-log.txt 173 | pip-delete-this-directory.txt 174 | 175 | # Unit test / coverage reports 176 | htmlcov/ 177 | .tox/ 178 | .nox/ 179 | .coverage 180 | .coverage.* 181 | .cache 182 | nosetests.xml 183 | coverage.xml 184 | *.cover 185 | *.py,cover 186 | .hypothesis/ 187 | .pytest_cache/ 188 | cover/ 189 | 190 | # Translations 191 | *.mo 192 | *.pot 193 | 194 | # Django stuff: 195 | *.log 196 | local_settings.py 197 | db.sqlite3 198 | db.sqlite3-journal 199 | 200 | # Flask stuff: 201 | instance/ 202 | .webassets-cache 203 | 204 | # Scrapy stuff: 205 | .scrapy 206 | 207 | # Sphinx documentation 208 | docs/_build/ 209 | 210 | # PyBuilder 211 | .pybuilder/ 212 | target/ 213 | 214 | # Jupyter Notebook 215 | .ipynb_checkpoints 216 | 217 | # IPython 218 | profile_default/ 219 | ipython_config.py 220 | 221 | # pyenv 222 | # For a library or package, you might want to ignore these files since the code is 223 | # intended to run in multiple environments; otherwise, check them in: 224 | # .python-version 225 | 226 | # pipenv 227 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 228 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 229 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 230 | # install all needed dependencies. 231 | #Pipfile.lock 232 | 233 | # poetry 234 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 235 | # This is especially recommended for binary packages to ensure reproducibility, and is more 236 | # commonly ignored for libraries. 237 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 238 | #poetry.lock 239 | 240 | # pdm 241 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 242 | #pdm.lock 243 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 244 | # in version control. 245 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 246 | .pdm.toml 247 | .pdm-python 248 | .pdm-build/ 249 | 250 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 251 | __pypackages__/ 252 | 253 | # Celery stuff 254 | celerybeat-schedule 255 | celerybeat.pid 256 | 257 | # SageMath parsed files 258 | *.sage.py 259 | 260 | # Environments 261 | .env 262 | .venv 263 | env/ 264 | venv/ 265 | ENV/ 266 | env.bak/ 267 | venv.bak/ 268 | 269 | # Spyder project settings 270 | .spyderproject 271 | .spyproject 272 | 273 | # Rope project settings 274 | .ropeproject 275 | 276 | # mkdocs documentation 277 | /site 278 | 279 | # mypy 280 | .mypy_cache/ 281 | .dmypy.json 282 | dmypy.json 283 | 284 | # Pyre type checker 285 | .pyre/ 286 | 287 | # pytype static type analyzer 288 | .pytype/ 289 | 290 | # Cython debug symbols 291 | cython_debug/ 292 | 293 | .DS_Store 294 | 295 | # PyCharm 296 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 297 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 298 | # and can be added to the global gitignore or merged into this file. For a more nuclear 299 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 300 | #.idea/ 301 | 302 | 303 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Portions Copyright 2025 Browserbase, Inc 190 | Portions Copyright (c) Microsoft Corporation. 191 | Portions Copyright 2017 Google Inc. 192 | 193 | Licensed under the Apache License, Version 2.0 (the "License"); 194 | you may not use this file except in compliance with the License. 195 | You may obtain a copy of the License at 196 | 197 | http://www.apache.org/licenses/LICENSE-2.0 198 | 199 | Unless required by applicable law or agreed to in writing, software 200 | distributed under the License is distributed on an "AS IS" BASIS, 201 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 202 | See the License for the specific language governing permissions and 203 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Browserbase MCP Server 2 | 3 | ![cover](assets/cover-mcp.png) 4 | 5 | [The Model Context Protocol (MCP)](https://modelcontextprotocol.io/introduction) is an open protocol that enables seamless integration between LLM applications and external data sources and tools. Whether you’re building an AI-powered IDE, enhancing a chat interface, or creating custom AI workflows, MCP provides a standardized way to connect LLMs with the context they need. 6 | 7 | This server provides cloud browser automation capabilities using [Browserbase](https://www.browserbase.com/) and [Stagehand](https://github.com/browserbase/stagehand). This server enables LLMs to interact with web pages, take screenshots, and execute JavaScript in a cloud browser environment. 8 | 9 | To learn to get started with Browserbase, check out [Browserbase MCP](./browserbase/README.md) or [Stagehand MCP](./stagehand/README.md). 10 | 11 | ## Getting Started with available MCPs 12 | 13 | 🌐 **Browserbase MCP** - Located in [`browserbase/`](./browserbase/) 14 | 15 | | Feature | Description | 16 | | ------------------ | ----------------------------------------- | 17 | | Browser Automation | Control and orchestrate cloud browsers | 18 | | Data Extraction | Extract structured data from any webpage | 19 | | Console Monitoring | Track and analyze browser console logs | 20 | | Screenshots | Capture full-page and element screenshots | 21 | | Web Interaction | Navigate, click, and fill forms with ease | 22 | 23 | 🤘 **Stagehand MCP** - Located in [`stagehand/`](./stagehand/) 24 | 25 | | Feature | Description | 26 | | ------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------- | 27 | | Atomic Instructions | Execute precise actions like `act("click the login button")` or `extract("find the red shoes")` | 28 | | Model Flexibility | Supports multiple models, including OpenAI's GPT-4 and Anthropic's Claude-3.7 Sonnet | 29 | | Modular Design | Easily integrate new models with minimal changes | 30 | | Vision Support | Use annotated screenshots for complex DOMs | 31 | | Open Source | Contribute to the project and join the [Slack community](https://join.slack.com/t/stagehand-dev/shared_invite/zt-2uvuobu50-~wVSx2Si75CPa3332hwVEw) for support | 32 | 33 | ### Alternative Installation Methods 34 | 35 | [Smithery](https://smithery.ai/server/@browserbasehq/mcp-browserbase) 36 | 37 | ### Credits 38 | 39 | Huge thanks and shoutout to the Playwright team for their contributions to the framework, and their work on the [Playwright MCP Server](https://github.com/microsoft/playwright-mcp) -------------------------------------------------------------------------------- /assets/browserbase-demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/browserbase/mcp-server-browserbase/b81ffe57d16ccb92b6b128894a05ee39ea822d29/assets/browserbase-demo.png -------------------------------------------------------------------------------- /assets/browserbase-mcp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/browserbase/mcp-server-browserbase/b81ffe57d16ccb92b6b128894a05ee39ea822d29/assets/browserbase-mcp.png -------------------------------------------------------------------------------- /assets/cover-mcp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/browserbase/mcp-server-browserbase/b81ffe57d16ccb92b6b128894a05ee39ea822d29/assets/cover-mcp.png -------------------------------------------------------------------------------- /assets/cover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/browserbase/mcp-server-browserbase/b81ffe57d16ccb92b6b128894a05ee39ea822d29/assets/cover.png -------------------------------------------------------------------------------- /assets/session.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/browserbase/mcp-server-browserbase/b81ffe57d16ccb92b6b128894a05ee39ea822d29/assets/session.png -------------------------------------------------------------------------------- /assets/stagehand-mcp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/browserbase/mcp-server-browserbase/b81ffe57d16ccb92b6b128894a05ee39ea822d29/assets/stagehand-mcp.png -------------------------------------------------------------------------------- /browserbase/.npmignore: -------------------------------------------------------------------------------- 1 | # Ignore node_modules, build output, logs, env files, etc. 2 | node_modules 3 | dist 4 | *.log 5 | .env* 6 | 7 | # Ignore IDE/editor files 8 | .vscode 9 | .idea 10 | *.swp 11 | *.swo 12 | 13 | # Ignore OS files 14 | .DS_Store 15 | Thumbs.db -------------------------------------------------------------------------------- /browserbase/Dockerfile: -------------------------------------------------------------------------------- 1 | # Build stage 2 | FROM node:18-alpine AS builder 3 | 4 | # Set the working directory 5 | WORKDIR /app 6 | 7 | # Copy package.json and package-lock.json 8 | COPY package*.json ./ 9 | 10 | # Copy TSConfig 11 | COPY tsconfig.json ./ 12 | 13 | # Copy the source code first 14 | COPY . . 15 | 16 | # Install dependencies 17 | RUN npm install 18 | 19 | # Build the TypeScript source code 20 | RUN npm run build 21 | 22 | # Create the final image from a smaller Node.js runtime 23 | FROM node:18-alpine 24 | 25 | # Set the working directory 26 | WORKDIR /app 27 | 28 | # Copy built files from builder stage 29 | COPY --from=builder /app/dist /app/dist 30 | COPY --from=builder /app/cli.js /app/cli.js 31 | COPY --from=builder /app/index.js /app/index.js 32 | COPY --from=builder /app/index.d.ts /app/index.d.ts 33 | COPY --from=builder /app/config.d.ts /app/config.d.ts 34 | COPY --from=builder /app/package.json /app/package.json 35 | COPY --from=builder /app/package-lock.json /app/package-lock.json 36 | COPY --from=builder /app/node_modules /app/node_modules 37 | 38 | # Set environment variables (to be configured at runtime) 39 | ENV BROWSERBASE_API_KEY= 40 | ENV BROWSERBASE_PROJECT_ID= 41 | 42 | # Command to run the application with absolute path 43 | ENTRYPOINT [ "node", "/app/cli.js" ] -------------------------------------------------------------------------------- /browserbase/README.md: -------------------------------------------------------------------------------- 1 | # Playwright Browserbase MCP Server 2 | 3 | ![cover](../assets/browserbase-mcp.png) 4 | 5 | The Model Context Protocol (MCP) is an open protocol that enables seamless integration between LLM applications and external data sources and tools. Whether you’re building an AI-powered IDE, enhancing a chat interface, or creating custom AI workflows, MCP provides a standardized way to connect LLMs with the context they need. 6 | 7 |
8 | 9 | 10 | 11 |
12 | 13 | ## How to setup in MCP json 14 | 15 | You can either use our Server hosted on NPM or run it completely locally by cloning this repo. 16 | 17 | ### To run on NPM (Recommended) 18 | 19 | Go into your MCP Config JSON and add the Browserbase Server: 20 | 21 | ```json 22 | { 23 | "mcpServers": { 24 | "browserbase": { 25 | "command": "npx", 26 | "args" : ["@browserbasehq/mcp"], 27 | "env": { 28 | "BROWSERBASE_API_KEY": "", 29 | "BROWSERBASE_PROJECT_ID": "" 30 | } 31 | } 32 | } 33 | } 34 | ``` 35 | 36 | Thats it! Reload your MCP client and Claude will be able to use Browserbase. 37 | 38 | ### To run 100% local: 39 | 40 | ```bash 41 | # Clone the Repo 42 | git clone https://github.com/browserbase/mcp-server-browserbase.git 43 | 44 | # Install the dependencies in the proper directory and build the project 45 | cd browserbase 46 | npm install && npm run build 47 | 48 | ``` 49 | 50 | Then in your MCP Config JSON run the server. To run locally we can use STDIO or self-host over SSE. 51 | 52 | ### STDIO: 53 | 54 | To your MCP Config JSON file add the following: 55 | 56 | ```json 57 | { 58 | "mcpServers": { 59 | "browserbase": { 60 | "command" : "node", 61 | "args" : ["/path/to/mcp-server-browserbase/browserbase/cli.js"], 62 | "env": { 63 | "BROWSERBASE_API_KEY": "", 64 | "BROWSERBASE_PROJECT_ID": "" 65 | } 66 | } 67 | } 68 | } 69 | ``` 70 | 71 | ### SSE: 72 | 73 | Run the following command in your terminal. You can add any flags (see options below) that you see fit to customize your configuration. 74 | 75 | ```bash 76 | node cli.js --port 8931 77 | ``` 78 | 79 | Then in your MCP Config JSON file put the following: 80 | 81 | ```json 82 | { 83 | "mcpServers": { 84 | "browserbase": { 85 | "url": "http://localhost:8931/sse", 86 | "env": { 87 | "BROWSERBASE_API_KEY": "", 88 | "BROWSERBASE_PROJECT_ID": "" 89 | } 90 | } 91 | } 92 | } 93 | ``` 94 | 95 | Then reload your MCP client and you should be good to go! 96 | 97 | ## Flags Explained: 98 | 99 | The Browserbase MCP server accepts the following command-line flags: 100 | 101 | | Flag | Description | 102 | |------|-------------| 103 | | `--browserbaseApiKey ` | Your Browserbase API key for authentication | 104 | | `--browserbaseProjectId ` | Your Browserbase project ID | 105 | | `--proxies` | Enable Browserbase proxies for the session | 106 | | `--advancedStealth` | Enable Browserbase Advanced Stealth (Only for Scale Plan Users) | 107 | | `--contextId ` | Specify a Browserbase Context ID to use | 108 | | `--persist [boolean]` | Whether to persist the Browserbase context (default: true) | 109 | | `--port ` | Port to listen on for HTTP/SSE transport | 110 | | `--host ` | Host to bind server to (default: localhost, use 0.0.0.0 for all interfaces) | 111 | | `--cookies [json]` | JSON array of cookies to inject into the browser | 112 | | `--browserWidth ` | Browser viewport width (default: 1024) | 113 | | `--browserHeight ` | Browser viewport height (default: 768) | 114 | 115 | These flags can be passed directly to the CLI or configured in your MCP configuration file. 116 | 117 | ### NOTE: 118 | 119 | Currently, these flags can only be used with the local server (npx @browserbasehq/mcp). 120 | 121 | ____ 122 | 123 | ## Flags & Example Configs 124 | 125 | ### Proxies 126 | 127 | Here are our docs on [Proxies](https://docs.browserbase.com/features/proxies). 128 | 129 | To use proxies in STDIO, set the --proxies flag in your MCP Config: 130 | 131 | ```json 132 | { 133 | "mcpServers": { 134 | "browserbase": { 135 | "command" : "npx", 136 | "args" : ["@browserbasehq/mcp", "--proxies"], 137 | "env": { 138 | "BROWSERBASE_API_KEY": "", 139 | "BROWSERBASE_PROJECT_ID": "" 140 | } 141 | } 142 | } 143 | } 144 | ``` 145 | ### Advanced Stealth 146 | 147 | Here are our docs on [Advanced Stealth](https://docs.browserbase.com/features/stealth-mode#advanced-stealth-mode). 148 | 149 | To use proxies in STDIO, set the --advancedStealth flag in your MCP Config: 150 | 151 | ```json 152 | { 153 | "mcpServers": { 154 | "browserbase": { 155 | "command" : "npx", 156 | "args" : ["@browserbasehq/mcp", "--advancedStealth"], 157 | "env": { 158 | "BROWSERBASE_API_KEY": "", 159 | "BROWSERBASE_PROJECT_ID": "" 160 | } 161 | } 162 | } 163 | } 164 | ``` 165 | 166 | ### Contexts 167 | 168 | Here are our docs on [Contexts](https://docs.browserbase.com/features/contexts) 169 | 170 | To use contexts in STDIO, set the --contextId flag in your MCP Config: 171 | 172 | ```json 173 | { 174 | "mcpServers": { 175 | "browserbase": { 176 | "command" : "npx", 177 | "args" : ["@browserbasehq/mcp", "--contextId", ""], 178 | "env": { 179 | "BROWSERBASE_API_KEY": "", 180 | "BROWSERBASE_PROJECT_ID": "" 181 | } 182 | } 183 | } 184 | } 185 | ``` 186 | 187 | ### Cookie Injection 188 | 189 | Why would you need to inject cookies? Our context API currently works on persistent cookies, but not session cookies. So sometimes our persistent auth might not work (we're working hard to add this functionality). 190 | 191 | You can flag cookies into the MCP by adding the cookies.json to your MCP Config. 192 | 193 | To use proxies in STDIO, set the --proxies flag in your MCP Config. Your cookies JSON must be in the type of [Playwright Cookies](https://playwright.dev/docs/api/class-browsercontext#browser-context-cookies) 194 | 195 | ```json 196 | { 197 | "mcpServers": { 198 | "browserbase" { 199 | "command" : "npx", 200 | "args" : [ 201 | "@browserbasehq/mcp", "--cookies", 202 | '{ 203 | "cookies": json, 204 | }' 205 | ], 206 | "env": { 207 | "BROWSERBASE_API_KEY": "", 208 | "BROWSERBASE_PROJECT_ID": "" 209 | } 210 | } 211 | } 212 | } 213 | ``` 214 | 215 | ### Browser Viewport Sizing 216 | 217 | The default viewport sizing for a browser session is 1024 x 768. You can adjust the Browser viewport sizing with browserWidth and browserHeight flags. 218 | 219 | Here's how to use it for custom browser sizing. We recommend to stick with 16:9 aspect ratios (ie: 1920 x 1080, 1280, 720, 1024 x 768) 220 | 221 | ```json 222 | { 223 | "mcpServers": { 224 | "browserbase": { 225 | "command" : "npx", 226 | "args" : [ 227 | "@browserbasehq/mcp", 228 | "--browserHeight 1080", 229 | "--browserWidth 1920", 230 | ], 231 | "env": { 232 | "BROWSERBASE_API_KEY": "", 233 | "BROWSERBASE_PROJECT_ID": "" 234 | } 235 | } 236 | } 237 | } 238 | ``` 239 | 240 | ## Structure 241 | 242 | * `src/`: TypeScript source code 243 | * `index.ts`: Main entry point, env checks, shutdown 244 | * `server.ts`: MCP Server setup and request routing 245 | * `sessionManager.ts`: Handles Browserbase session creation/management 246 | * `tools/`: Tool definitions and implementations 247 | * `resources/`: Resource (screenshot) handling 248 | * `types.ts`: Shared TypeScript types 249 | * `dist/`: Compiled JavaScript output 250 | * `tests/`: Placeholder for tests 251 | * `utils/`: Placeholder for utility scripts 252 | * `Dockerfile`: For building a Docker image 253 | * Configuration files (`.json`, `.ts`, `.mjs`, `.npmignore`) 254 | 255 | ## Contexts for Persistence 256 | 257 | This server supports Browserbase's Contexts feature, which allows persisting cookies, authentication, and cached data across browser sessions: 258 | 259 | 1. **Creating a Context**: 260 | ``` 261 | browserbase_context_create: Creates a new context, optionally with a friendly name 262 | ``` 263 | 264 | 2. **Using a Context with a Session**: 265 | ``` 266 | browserbase_session_create: Now accepts a 'context' parameter with: 267 | - id: The context ID to use 268 | - name: Alternative to ID, the friendly name of the context 269 | - persist: Whether to save changes (cookies, cache) back to the context (default: true) 270 | ``` 271 | 272 | 3. **Deleting a Context**: 273 | ``` 274 | browserbase_context_delete: Deletes a context when you no longer need it 275 | ``` 276 | 277 | Contexts make it much easier to: 278 | - Maintain login state across sessions 279 | - Reduce page load times by preserving cache 280 | - Avoid CAPTCHAs and detection by reusing browser fingerprints 281 | 282 | ## Cookie Management 283 | 284 | This server also provides direct cookie management capabilities: 285 | 286 | 1. **Adding Cookies**: 287 | ``` 288 | browserbase_cookies_add: Add cookies to the current browser session with full control over properties 289 | ``` 290 | 291 | 2. **Getting Cookies**: 292 | ``` 293 | browserbase_cookies_get: View all cookies in the current session (optionally filtered by URLs) 294 | ``` 295 | 296 | 3. **Deleting Cookies**: 297 | ``` 298 | browserbase_cookies_delete: Delete specific cookies or clear all cookies from the session 299 | ``` 300 | 301 | These tools are useful for: 302 | - Setting authentication cookies without navigating to login pages 303 | - Backing up and restoring cookie state 304 | - Debugging cookie-related issues 305 | - Manipulating cookie attributes (expiration, security flags, etc.) 306 | 307 | ## TODO/Roadmap 308 | 309 | * Implement true `ref`-based interaction logic for click, type, drag, hover, select_option. 310 | * Implement element-specific screenshots using `ref`. 311 | * Add more standard MCP tools (tabs, navigation, etc.). 312 | * Add tests. 313 | -------------------------------------------------------------------------------- /browserbase/cli.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | import './dist/program.js'; -------------------------------------------------------------------------------- /browserbase/config.d.ts: -------------------------------------------------------------------------------- 1 | import type { Cookie } from "playwright-core"; 2 | 3 | export type Config = { 4 | /** 5 | * The Browserbase API Key to use 6 | */ 7 | browserbaseApiKey?: string; 8 | /** 9 | * The Browserbase Project ID to use 10 | */ 11 | browserbaseProjectId?: string; 12 | /** 13 | * Whether or not to use Browserbase proxies 14 | * https://docs.browserbase.com/features/proxies 15 | * 16 | * @default false 17 | */ 18 | proxies?: boolean; 19 | /** 20 | * Use advanced stealth mode. Only available to Browserbase Scale Plan users. 21 | * 22 | * @default false 23 | */ 24 | advancedStealth?: boolean; 25 | /** 26 | * Potential Browserbase Context to use 27 | * Would be a context ID 28 | */ 29 | context?: { 30 | /** 31 | * The ID of the context to use 32 | */ 33 | contextId?: string; 34 | /** 35 | * Whether or not to persist the context 36 | * 37 | * @default true 38 | */ 39 | persist?: boolean; 40 | }; 41 | /** 42 | * 43 | */ 44 | viewPort?: { 45 | /** 46 | * The width of the browser 47 | */ 48 | browserWidth?: number; 49 | /** 50 | * The height of the browser 51 | */ 52 | browserHeight?: number; 53 | }; 54 | /** 55 | * Cookies to inject into the Browserbase context 56 | * Format: Array of cookie objects with name, value, domain, and optional path, expires, httpOnly, secure, sameSite 57 | */ 58 | cookies?: Cookie[]; 59 | /** 60 | * Whether or not to port to a server 61 | * 62 | */ 63 | server?: { 64 | /** 65 | * The port to listen on for SSE or MCP transport. 66 | */ 67 | port?: number; 68 | /** 69 | * The host to bind the server to. Default is localhost. Use 0.0.0.0 to bind to all interfaces. 70 | */ 71 | host?: string; 72 | }; 73 | tools?: { 74 | /** 75 | * Configuration for the browser_take_screenshot tool. 76 | */ 77 | browserbase_take_screenshot?: { 78 | /** 79 | * Whether to disable base64-encoded image responses to the clients that 80 | * don't support binary data or prefer to save on tokens. 81 | */ 82 | omitBase64?: boolean; 83 | } 84 | } 85 | }; -------------------------------------------------------------------------------- /browserbase/index.d.ts: -------------------------------------------------------------------------------- 1 | import type { Server } from '@modelcontextprotocol/sdk/server/index.js'; 2 | 3 | import type { Config } from './config'; 4 | 5 | export declare function createServer(config?: Config): Promise; 6 | export {}; -------------------------------------------------------------------------------- /browserbase/index.js: -------------------------------------------------------------------------------- 1 | import { createServer } from './dist/index.js'; 2 | export default { createServer }; -------------------------------------------------------------------------------- /browserbase/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@browserbasehq/mcp", 3 | "version": "1.0.5", 4 | "description": "MCP server for browser automation using browserbase", 5 | "author": "Browserbase, Inc. (https://browserbase.com)", 6 | "homepage": "https://browserbase.com", 7 | "type": "module", 8 | "main": "./cli.js", 9 | "engines": { 10 | "node": ">=18" 11 | }, 12 | "files": [ 13 | "../assets/browserbase-mcp.png", 14 | "dist", 15 | "cli.js", 16 | "index.d.ts", 17 | "index.js", 18 | "config.d.ts", 19 | "config.js" 20 | ], 21 | "scripts": { 22 | "build": "tsc && shx chmod +x dist/*.js && shx chmod +x cli.js", 23 | "prepare": "npm run build", 24 | "watch": "tsc --watch", 25 | "inspector": "npx @modelcontextprotocol/inspector build/index.js", 26 | "test-local": "npm pack && npm install -g $(pwd)/$(ls -t *.tgz | head -1) && mcp-server-browserbase" 27 | }, 28 | "exports": { 29 | "./package.json": "./package.json", 30 | ".": { 31 | "types": "./index.d.ts", 32 | "default": "./cli.js" 33 | } 34 | }, 35 | "dependencies": { 36 | "@browserbasehq/sdk": "^2.5.0", 37 | "@modelcontextprotocol/sdk": "^1.10.2", 38 | "@types/yaml": "^1.9.6", 39 | "commander": "^13.1.0", 40 | "dotenv": "^16.5.0", 41 | "playwright": "^1.53.0-alpha-2025-05-05", 42 | "puppeteer-core": "^23.9.0", 43 | "yaml": "^2.7.1", 44 | "zod": "^3.24.3", 45 | "zod-to-json-schema": "^3.24.5" 46 | }, 47 | "devDependencies": { 48 | "shx": "^0.3.4", 49 | "typescript": "^5.6.2" 50 | }, 51 | "bin": { 52 | "mcp-server-browserbase": "cli.js" 53 | }, 54 | "publishConfig": { 55 | "access": "public" 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /browserbase/playwright.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig, devices } from '@playwright/test'; 2 | 3 | /** 4 | * Basic Playwright config - primarily useful if adding actual 5 | * tests later, might not be strictly needed for the MCP server itself. 6 | */ 7 | export default defineConfig({ 8 | testDir: './tests', 9 | fullyParallel: true, 10 | forbidOnly: !!process.env.CI, 11 | retries: process.env.CI ? 2 : 0, 12 | workers: process.env.CI ? 1 : undefined, 13 | reporter: 'html', 14 | use: { 15 | trace: 'on-first-retry', 16 | // Base URL to use in actions like `await page.goto('/')` 17 | // baseURL: 'http://127.0.0.1:3000', 18 | }, 19 | 20 | /* Configure projects for major browsers */ 21 | // projects: [ 22 | // { 23 | // name: 'chromium', 24 | // use: { ...devices['Desktop Chrome'] }, 25 | // }, 26 | // ], 27 | 28 | /* Run your local dev server before starting the tests */ 29 | // webServer: { 30 | // command: 'npm run start', 31 | // url: 'http://127.0.0.1:3000', 32 | // reuseExistingServer: !process.env.CI, 33 | // }, 34 | }); -------------------------------------------------------------------------------- /browserbase/smithery.yaml: -------------------------------------------------------------------------------- 1 | # Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml 2 | 3 | startCommand: 4 | # Using stdio type which is the most common for MCPs 5 | type: stdio 6 | configSchema: 7 | # JSON Schema defining the configuration options for the MCP. 8 | type: object 9 | required: 10 | - browserbaseApiKey 11 | - browserbaseProjectId 12 | properties: 13 | browserbaseApiKey: 14 | type: string 15 | description: The API key for Browserbase. 16 | browserbaseProjectId: 17 | type: string 18 | description: The project ID for Browserbase. 19 | port: 20 | type: number 21 | description: The port to listen on. 22 | host: 23 | type: string 24 | description: The host to listen on. 25 | contextId: 26 | type: string 27 | description: The context ID to use for the session. 28 | persist: 29 | type: boolean 30 | description: Whether to persist the context. 31 | proxies: 32 | type: boolean 33 | description: Whether to use proxies. 34 | cookies: 35 | type: object 36 | description: Cookies to use for the session. 37 | browserWidth: 38 | type: number 39 | description: Width of the browser window. 40 | browserHeight: 41 | type: number 42 | description: Height of the browser window. 43 | 44 | commandFunction: | 45 | config => ({ 46 | command: 'node', 47 | args: ['cli.js'], 48 | env: { 49 | BROWSERBASE_API_KEY: config.browserbaseApiKey, 50 | BROWSERBASE_PROJECT_ID: config.browserbaseProjectId 51 | } 52 | }) -------------------------------------------------------------------------------- /browserbase/src/config.ts: -------------------------------------------------------------------------------- 1 | import os from 'os'; 2 | import fs from 'fs'; 3 | import path from 'path'; 4 | import { sanitizeForFilePath } from './tools/utils.js'; 5 | import type { Cookie } from "playwright-core"; 6 | 7 | export type ToolCapability = 'core' | string; 8 | 9 | export interface Config { 10 | browserbaseApiKey?: string; 11 | browserbaseProjectId?: string; 12 | server?: { 13 | port?: number; 14 | host?: string; 15 | }; 16 | proxies?: boolean; 17 | advancedStealth?: boolean; 18 | context?: { 19 | contextId?: string; 20 | persist?: boolean; 21 | }; 22 | viewPort?: { 23 | browserWidth?: number; 24 | browserHeight?: number; 25 | }; 26 | cookies?: Cookie[]; 27 | } 28 | 29 | // Define Command Line Options Structure 30 | export type CLIOptions = { 31 | browserbaseApiKey?: string; 32 | browserbaseProjectId?: string; 33 | proxies?: boolean; 34 | advancedStealth?: boolean; 35 | contextId?: string; 36 | persist?: boolean; 37 | port?: number; 38 | host?: string; 39 | cookies?: Cookie[]; 40 | browserWidth?: number; 41 | browserHeight?: number; 42 | }; 43 | 44 | // Default Configuration Values 45 | const defaultConfig: Config = { 46 | browserbaseApiKey: process.env.BROWSERBASE_API_KEY, 47 | browserbaseProjectId: process.env.BROWSERBASE_PROJECT_ID, 48 | proxies: false, 49 | server: { 50 | port: undefined, 51 | host: undefined, 52 | }, 53 | viewPort: { 54 | browserWidth: 1024, 55 | browserHeight: 768, 56 | }, 57 | cookies: undefined, 58 | }; 59 | 60 | // Resolve final configuration by merging defaults, file config, and CLI options 61 | export async function resolveConfig(cliOptions: CLIOptions): Promise { 62 | const cliConfig = await configFromCLIOptions(cliOptions); 63 | // Order: Defaults < File Config < CLI Overrides 64 | const mergedConfig = mergeConfig(defaultConfig, cliConfig); 65 | 66 | // --- Add Browserbase Env Vars --- 67 | // Ensure env vars are read *after* dotenv potentially runs (in index.ts) 68 | if (!mergedConfig.browserbaseApiKey) { 69 | mergedConfig.browserbaseApiKey = process.env.BROWSERBASE_API_KEY; 70 | } 71 | if (!mergedConfig.browserbaseProjectId) { 72 | mergedConfig.browserbaseProjectId = process.env.BROWSERBASE_PROJECT_ID; 73 | } 74 | // -------------------------------- 75 | 76 | // Basic validation for Browserbase keys 77 | if (!mergedConfig.browserbaseApiKey) { 78 | console.warn("Warning: BROWSERBASE_API_KEY environment variable not set."); 79 | } 80 | if (!mergedConfig.browserbaseProjectId) { 81 | console.warn("Warning: BROWSERBASE_PROJECT_ID environment variable not set."); 82 | } 83 | 84 | return mergedConfig; 85 | } 86 | 87 | // Create Config structure based on CLI options 88 | export async function configFromCLIOptions(cliOptions: CLIOptions): Promise { 89 | return { 90 | browserbaseApiKey: cliOptions.browserbaseApiKey, 91 | browserbaseProjectId: cliOptions.browserbaseProjectId, 92 | server: { 93 | port: cliOptions.port, 94 | host: cliOptions.host, 95 | }, 96 | proxies: cliOptions.proxies, 97 | context: { 98 | contextId: cliOptions.contextId, 99 | persist: cliOptions.persist, 100 | }, 101 | viewPort: { 102 | browserWidth: cliOptions.browserWidth, 103 | browserHeight: cliOptions.browserHeight, 104 | }, 105 | advancedStealth: cliOptions.advancedStealth, 106 | cookies: cliOptions.cookies, 107 | }; 108 | } 109 | 110 | // Create an output file path within the configured output directory 111 | export async function outputFile(config: Config, name: string): Promise { 112 | const outputDir = os.tmpdir(); 113 | await fs.promises.mkdir(outputDir, { recursive: true }); 114 | const sanitizedName = sanitizeForFilePath(name); 115 | return path.join(outputDir, sanitizedName); 116 | } 117 | 118 | // Helper function to merge config objects, excluding undefined values 119 | function pickDefined(obj: T | undefined): Partial { 120 | if (!obj) return {}; 121 | return Object.fromEntries( 122 | Object.entries(obj).filter(([_, v]) => v !== undefined) 123 | ) as Partial; 124 | } 125 | 126 | // Merge two configuration objects (overrides takes precedence) 127 | function mergeConfig(base: Config, overrides: Config): Config { 128 | const baseFiltered = pickDefined(base); 129 | const overridesFiltered = pickDefined(overrides); 130 | 131 | // Create the result object 132 | const result = { ...baseFiltered } as Config; 133 | 134 | // For each property in overrides 135 | for (const [key, value] of Object.entries(overridesFiltered)) { 136 | if (key === 'context' && value && result.context) { 137 | // Special handling for context object to ensure deep merge 138 | result.context = { 139 | ...result.context, 140 | ...(value as Config['context']) 141 | }; 142 | } else if ( 143 | value && 144 | typeof value === 'object' && 145 | !Array.isArray(value) && 146 | result[key as keyof Config] && 147 | typeof result[key as keyof Config] === 'object' 148 | ) { 149 | // Deep merge for other nested objects 150 | result[key as keyof Config] = { 151 | ...(result[key as keyof Config] as object), 152 | ...value 153 | } as any; 154 | } else { 155 | // Simple override for primitives, arrays, etc. 156 | result[key as keyof Config] = value as any; 157 | } 158 | } 159 | 160 | return result; 161 | } -------------------------------------------------------------------------------- /browserbase/src/context.ts: -------------------------------------------------------------------------------- 1 | import type { Server } from "@modelcontextprotocol/sdk/server/index.js"; 2 | import type { BrowserSession } from "./sessionManager.js"; 3 | import { 4 | getSession, 5 | defaultSessionId, 6 | getSessionReadOnly, 7 | } from "./sessionManager.js"; 8 | import type { Tool, ToolResult } from "./tools/tool.js"; 9 | import type { Config } from "../config.js"; 10 | import { 11 | Resource, 12 | CallToolResult, 13 | TextContent, 14 | ImageContent, 15 | } from "@modelcontextprotocol/sdk/types.js"; 16 | import { z } from "zod"; 17 | import { PageSnapshot } from "./pageSnapshot.js"; 18 | import type { Page, Locator } from "playwright"; 19 | 20 | export type ToolActionResult = 21 | | { content?: (ImageContent | TextContent)[] } 22 | | undefined 23 | | void; 24 | 25 | /** 26 | * Manages the context for tool execution within a specific Browserbase session. 27 | */ 28 | 29 | export class Context { 30 | private server: Server; 31 | public readonly config: Config; 32 | public currentSessionId: string = defaultSessionId; 33 | private latestSnapshots = new Map(); 34 | private screenshotResources = new Map< 35 | string, 36 | { format: string; bytes: string; uri: string } 37 | >(); 38 | 39 | constructor(server: Server, config: Config) { 40 | this.server = server; 41 | this.config = config; 42 | this.screenshotResources = new Map(); 43 | } 44 | 45 | // --- Snapshot State Handling (Using PageSnapshot) --- 46 | 47 | /** 48 | * Returns the latest PageSnapshot for the currently active session. 49 | * Throws an error if no snapshot is available for the active session. 50 | */ 51 | snapshotOrDie(): PageSnapshot { 52 | const snapshot = this.latestSnapshots.get(this.currentSessionId); 53 | if (!snapshot) { 54 | throw new Error( 55 | `No snapshot available for the current session (${this.currentSessionId}). Capture a snapshot first.` 56 | ); 57 | } 58 | return snapshot; 59 | } 60 | 61 | /** 62 | * Clears the snapshot for the currently active session. 63 | */ 64 | clearLatestSnapshot(): void { 65 | this.latestSnapshots.delete(this.currentSessionId); 66 | } 67 | 68 | /** 69 | * Captures a new PageSnapshot for the currently active session and stores it. 70 | * Returns the captured snapshot or undefined if capture failed. 71 | */ 72 | async captureSnapshot(): Promise { 73 | const logPrefix = `[Context.captureSnapshot] ${new Date().toISOString()} Session ${ 74 | this.currentSessionId 75 | }:`; 76 | let page; 77 | try { 78 | page = await this.getActivePage(); 79 | } catch (error) { 80 | this.clearLatestSnapshot(); 81 | return undefined; 82 | } 83 | 84 | if (!page) { 85 | this.clearLatestSnapshot(); 86 | return undefined; 87 | } 88 | 89 | try { 90 | await this.waitForTimeout(100); // Small delay for UI settlement 91 | const snapshot = await PageSnapshot.create(page); 92 | this.latestSnapshots.set(this.currentSessionId, snapshot); 93 | return snapshot; 94 | } catch (error) { 95 | process.stderr.write( 96 | `${logPrefix} Failed to capture snapshot: ${ 97 | error instanceof Error ? error.message : String(error) 98 | }\\n` 99 | ); // Enhanced logging 100 | this.clearLatestSnapshot(); 101 | return undefined; 102 | } 103 | } 104 | 105 | // --- Resource Handling Methods --- 106 | 107 | listResources(): Resource[] { 108 | const resources: Resource[] = []; 109 | for (const [name, data] of this.screenshotResources.entries()) { 110 | resources.push({ 111 | uri: data.uri, 112 | mimeType: `image/${data.format}`, // Ensure correct mime type 113 | name: `Screenshot: ${name}`, 114 | }); 115 | } 116 | return resources; 117 | } 118 | 119 | readResource(uri: string): { uri: string; mimeType: string; blob: string } { 120 | const prefix = "mcp://screenshots/"; 121 | if (uri.startsWith(prefix)) { 122 | const name = uri.split("/").pop() || ""; 123 | const data = this.screenshotResources.get(name); 124 | if (data) { 125 | return { 126 | uri, 127 | mimeType: `image/${data.format}`, // Ensure correct mime type 128 | blob: data.bytes, 129 | }; 130 | } else { 131 | throw new Error(`Screenshot resource not found: ${name}`); 132 | } 133 | } else { 134 | throw new Error(`Resource URI format not recognized: ${uri}`); 135 | } 136 | } 137 | 138 | addScreenshot(name: string, format: "png" | "jpeg", bytes: string): void { 139 | const uri = `mcp://screenshots/${name}`; 140 | this.screenshotResources.set(name, { format, bytes, uri }); 141 | this.server.notification({ 142 | method: "resources/list_changed", 143 | params: {}, 144 | }); 145 | } 146 | 147 | // --- Session and Tool Execution --- 148 | 149 | public async getActivePage(): Promise { 150 | const session = await getSession(this.currentSessionId, this.config); 151 | if (!session || !session.page || session.page.isClosed()) { 152 | try { 153 | // getSession does not support a refresh flag currently. 154 | // If a session is invalid, it needs to be recreated or re-established upstream. 155 | // For now, just return null if the fetched session is invalid. 156 | const currentSession = await getSession( 157 | this.currentSessionId, 158 | this.config 159 | ); 160 | if ( 161 | !currentSession || 162 | !currentSession.page || 163 | currentSession.page.isClosed() 164 | ) { 165 | return null; 166 | } 167 | return currentSession.page; 168 | } catch (refreshError) { 169 | return null; 170 | } 171 | } 172 | return session.page; 173 | } 174 | 175 | public async getActiveBrowser(): Promise { 176 | const session = await getSession(this.currentSessionId, this.config); 177 | if (!session || !session.browser || !session.browser.isConnected()) { 178 | try { 179 | // getSession does not support a refresh flag currently. 180 | const currentSession = await getSession( 181 | this.currentSessionId, 182 | this.config 183 | ); 184 | if ( 185 | !currentSession || 186 | !currentSession.browser || 187 | !currentSession.browser.isConnected() 188 | ) { 189 | return null; 190 | } 191 | return currentSession.browser; 192 | } catch (refreshError) { 193 | return null; 194 | } 195 | } 196 | return session.browser; 197 | } 198 | 199 | /** 200 | * Get the active browser without triggering session creation. 201 | * This is a read-only operation used when we need to check for an existing browser 202 | * without side effects (e.g., during close operations). 203 | * @returns The browser if it exists and is connected, null otherwise 204 | */ 205 | public getActiveBrowserReadOnly(): BrowserSession["browser"] | null { 206 | const session = getSessionReadOnly(this.currentSessionId); 207 | if (!session || !session.browser || !session.browser.isConnected()) { 208 | return null; 209 | } 210 | return session.browser; 211 | } 212 | 213 | /** 214 | * Get the active page without triggering session creation. 215 | * This is a read-only operation used when we need to check for an existing page 216 | * without side effects. 217 | * @returns The page if it exists and is not closed, null otherwise 218 | */ 219 | public getActivePageReadOnly(): BrowserSession["page"] | null { 220 | const session = getSessionReadOnly(this.currentSessionId); 221 | if (!session || !session.page || session.page.isClosed()) { 222 | return null; 223 | } 224 | return session.page; 225 | } 226 | 227 | public async waitForTimeout(timeoutMillis: number): Promise { 228 | return new Promise((resolve) => setTimeout(resolve, timeoutMillis)); 229 | } 230 | 231 | private createErrorResult(message: string, toolName: string): CallToolResult { 232 | return { 233 | content: [{ type: "text", text: `Error: ${message}` }], 234 | isError: true, 235 | }; 236 | } 237 | 238 | // --- Refactored Action Execution with Retries --- 239 | private async executeRefAction( 240 | toolName: string, 241 | validatedArgs: any, 242 | actionFn: ( 243 | page: Page, 244 | identifier: string | undefined, 245 | args: any, 246 | locator: Locator | undefined, 247 | identifierType: "ref" | "selector" | "none" 248 | ) => Promise, 249 | requiresIdentifier: boolean = true 250 | ): Promise<{ resultText: string; actionResult?: ToolActionResult | void }> { 251 | let lastError: Error | null = null; 252 | let page: Page | null = null; 253 | let actionResult: ToolActionResult | void | undefined; 254 | let resultText = ""; 255 | let identifier: string | undefined = undefined; 256 | let identifierType: "ref" | "selector" | "none" = "none"; 257 | 258 | // --- Get page and snapshot BEFORE the loop --- 259 | page = await this.getActivePage(); 260 | if (!page) { 261 | throw new Error("Failed to get active page before action attempts."); 262 | } 263 | 264 | // Get the CURRENT latest snapshot - DO NOT capture a new one here. 265 | const snapshot = this.latestSnapshots.get(this.currentSessionId); 266 | const initialSnapshotIdentifier = 267 | snapshot?.text().substring(0, 60).replace(/\\n/g, "\\\\n") ?? 268 | "[No Snapshot]"; 269 | 270 | let locator: Locator | undefined; 271 | 272 | // --- Resolve locator: Prioritize selector, then ref --- 273 | if (validatedArgs?.selector) { 274 | identifier = validatedArgs.selector; 275 | identifierType = "selector"; 276 | if (!identifier) { 277 | throw new Error( 278 | `Missing required 'selector' argument for tool ${toolName}.` 279 | ); 280 | } 281 | try { 282 | locator = page.locator(identifier); 283 | } catch (locatorError) { 284 | throw new Error( 285 | `Failed to create locator for selector '${identifier}': ${ 286 | locatorError instanceof Error 287 | ? locatorError.message 288 | : String(locatorError) 289 | }` 290 | ); 291 | } 292 | } else if (validatedArgs?.ref) { 293 | identifier = validatedArgs.ref; 294 | identifierType = "ref"; 295 | if (!identifier) { 296 | throw new Error( 297 | `Missing required 'ref' argument for tool ${toolName}.` 298 | ); 299 | } 300 | if (!snapshot) { 301 | throw new Error( 302 | `Cannot resolve ref '${identifier}' because no snapshot is available for session ${this.currentSessionId}. Capture a snapshot or ensure one exists.` 303 | ); 304 | } 305 | try { 306 | // Resolve using the snapshot we just retrieved 307 | locator = snapshot.refLocator(identifier); 308 | } catch (locatorError) { 309 | // Use the existing snapshot identifier in the error 310 | throw new Error( 311 | `Failed to resolve ref ${identifier} using existing snapshot ${initialSnapshotIdentifier} before action attempt: ${ 312 | locatorError instanceof Error 313 | ? locatorError.message 314 | : String(locatorError) 315 | }` 316 | ); 317 | } 318 | } else if (requiresIdentifier) { 319 | // If neither ref nor selector is provided, but one is required 320 | throw new Error( 321 | `Missing required 'ref' or 'selector' argument for tool ${toolName}.` 322 | ); 323 | } else { 324 | // No identifier needed or provided 325 | identifierType = "none"; // Explicitly set to none 326 | } 327 | 328 | // --- Single Attempt --- 329 | try { 330 | // Pass page, the used identifier (selector or ref), args, the resolved locator, and identifierType 331 | const actionFnResult = await actionFn( 332 | page, 333 | identifier, 334 | validatedArgs, 335 | locator, 336 | identifierType 337 | ); 338 | 339 | if (typeof actionFnResult === "string") { 340 | resultText = actionFnResult; 341 | actionResult = undefined; 342 | } else { 343 | actionResult = actionFnResult; 344 | const content = actionResult?.content; 345 | if (Array.isArray(content) && content.length > 0) { 346 | resultText = 347 | content 348 | .map((c: { type: string; text?: string }) => 349 | c.type === "text" ? c.text : `[${c.type}]` 350 | ) 351 | .filter(Boolean) 352 | .join(" ") || `${toolName} action completed.`; 353 | } else { 354 | resultText = `${toolName} action completed successfully.`; 355 | } 356 | } 357 | lastError = null; 358 | return { resultText, actionResult }; 359 | } catch (error: any) { 360 | throw new Error( 361 | `Action ${toolName} failed: ${ 362 | error instanceof Error ? error.message : String(error) 363 | }` 364 | ); 365 | } 366 | } 367 | 368 | async run(tool: Tool, args: any): Promise { 369 | const toolName = tool.schema.name; 370 | let initialPage: Page | null = null; 371 | let initialBrowser: BrowserSession["browser"] | null = null; 372 | let toolResultFromHandle: ToolResult | null = null; // Legacy handle result 373 | let finalResult: CallToolResult = { 374 | // Initialize finalResult here 375 | content: [{ type: "text", text: `Initialization error for ${toolName}` }], 376 | isError: true, 377 | }; 378 | 379 | const logPrefix = `[Context.run ${toolName}] ${new Date().toISOString()}:`; 380 | 381 | let validatedArgs: any; 382 | try { 383 | validatedArgs = tool.schema.inputSchema.parse(args); 384 | } catch (error) { 385 | if (error instanceof z.ZodError) { 386 | const errorMsg = error.issues.map((issue) => issue.message).join(", "); 387 | return this.createErrorResult( 388 | `Input validation failed: ${errorMsg}`, 389 | toolName 390 | ); 391 | } 392 | return this.createErrorResult( 393 | `Input validation failed: ${ 394 | error instanceof Error ? error.message : String(error) 395 | }`, 396 | toolName 397 | ); 398 | } 399 | 400 | const previousSessionId = this.currentSessionId; 401 | if ( 402 | validatedArgs?.sessionId && 403 | validatedArgs.sessionId !== this.currentSessionId 404 | ) { 405 | this.currentSessionId = validatedArgs.sessionId; 406 | this.clearLatestSnapshot(); 407 | } 408 | 409 | if (toolName !== "browserbase_session_create") { 410 | try { 411 | const session = await getSession(this.currentSessionId, this.config); 412 | if ( 413 | !session || 414 | !session.page || 415 | session.page.isClosed() || 416 | !session.browser || 417 | !session.browser.isConnected() 418 | ) { 419 | if (this.currentSessionId !== previousSessionId) { 420 | this.currentSessionId = previousSessionId; 421 | } 422 | throw new Error( 423 | `Session ${this.currentSessionId} is invalid or browser/page is not available.` 424 | ); 425 | } 426 | initialPage = session.page; 427 | initialBrowser = session.browser; 428 | } catch (sessionError) { 429 | return this.createErrorResult( 430 | `Error retrieving or validating session ${this.currentSessionId}: ${ 431 | sessionError instanceof Error 432 | ? sessionError.message 433 | : String(sessionError) 434 | }`, 435 | toolName 436 | ); 437 | } 438 | } 439 | 440 | let toolActionOutput: ToolActionResult | undefined = undefined; // New variable to store direct tool action output 441 | let actionSucceeded = false; 442 | let shouldCaptureSnapshotAfterAction = false; 443 | let postActionSnapshot: PageSnapshot | undefined = undefined; 444 | 445 | try { 446 | let actionToRun: (() => Promise) | undefined = 447 | undefined; 448 | let shouldCaptureSnapshot = false; 449 | 450 | try { 451 | if ("handle" in tool && typeof tool.handle === "function") { 452 | toolResultFromHandle = await tool.handle(this as any, validatedArgs); 453 | actionToRun = toolResultFromHandle?.action; 454 | shouldCaptureSnapshot = 455 | toolResultFromHandle?.captureSnapshot ?? false; 456 | shouldCaptureSnapshotAfterAction = shouldCaptureSnapshot; 457 | } else { 458 | throw new Error( 459 | `Tool ${toolName} could not be handled (no handle method).` 460 | ); 461 | } 462 | 463 | if (actionToRun) { 464 | toolActionOutput = await actionToRun(); 465 | actionSucceeded = true; 466 | } else { 467 | throw new Error(`Tool ${toolName} handled without action.`); 468 | } 469 | } catch (error) { 470 | process.stderr.write( 471 | `${logPrefix} Error executing tool ${toolName}: ${ 472 | error instanceof Error ? error.message : String(error) 473 | }\\n` 474 | ); 475 | if (error instanceof Error && error.stack) { 476 | process.stderr.write(`${logPrefix} Stack Trace: ${error.stack}\\n`); 477 | } 478 | // ----------------------- 479 | finalResult = this.createErrorResult( 480 | `Execution failed: ${ 481 | error instanceof Error ? error.message : String(error) 482 | }`, 483 | toolName 484 | ); 485 | actionSucceeded = false; 486 | shouldCaptureSnapshotAfterAction = false; 487 | if ( 488 | this.currentSessionId !== previousSessionId && 489 | toolName !== "browserbase_session_create" 490 | ) { 491 | this.currentSessionId = previousSessionId; 492 | } 493 | } finally { 494 | if (actionSucceeded && shouldCaptureSnapshotAfterAction) { 495 | const preSnapshotDelay = 500; 496 | await this.waitForTimeout(preSnapshotDelay); 497 | try { 498 | postActionSnapshot = await this.captureSnapshot(); 499 | if (postActionSnapshot) { 500 | process.stderr.write( 501 | `[Context.run ${toolName}] Added snapshot to final result text.\n` 502 | ); 503 | } else { 504 | process.stderr.write( 505 | `[Context.run ${toolName}] WARN: Snapshot was expected after action but failed to capture.\n` 506 | ); // Keep warning 507 | } 508 | } catch (postSnapError) { 509 | process.stderr.write( 510 | `[Context.run ${toolName}] WARN: Error capturing post-action snapshot: ${ 511 | postSnapError instanceof Error 512 | ? postSnapError.message 513 | : String(postSnapError) 514 | }\n` 515 | ); // Keep warning 516 | } 517 | } else if ( 518 | actionSucceeded && 519 | toolName === "browserbase_snapshot" && 520 | !postActionSnapshot 521 | ) { 522 | postActionSnapshot = this.latestSnapshots.get(this.currentSessionId); 523 | } 524 | 525 | if (actionSucceeded) { 526 | const finalContentItems: (TextContent | ImageContent)[] = []; 527 | 528 | // 1. Add content from the tool action itself 529 | if (toolActionOutput?.content && toolActionOutput.content.length > 0) { 530 | finalContentItems.push(...toolActionOutput.content); 531 | } else { 532 | // If toolActionOutput.content is empty/undefined but action succeeded, 533 | // provide a generic success message. 534 | finalContentItems.push({ type: "text", text: `${toolName} action completed successfully.` }); 535 | } 536 | 537 | // 2. Prepare and add additional textual information (URL, Title, Snapshot) 538 | const additionalInfoParts: string[] = []; 539 | // Use read-only version to avoid creating sessions after close 540 | const currentPage = this.getActivePageReadOnly(); 541 | 542 | if (currentPage) { 543 | try { 544 | const url = currentPage.url(); 545 | const title = await currentPage 546 | .title() 547 | .catch(() => "[Error retrieving title]"); 548 | additionalInfoParts.push(`- Page URL: ${url}`); 549 | additionalInfoParts.push(`- Page Title: ${title}`); 550 | } catch (pageStateError) { 551 | additionalInfoParts.push( 552 | "- [Error retrieving page state after action]" 553 | ); 554 | } 555 | } else { 556 | additionalInfoParts.push("- [Page unavailable after action]"); 557 | } 558 | 559 | const snapshotToAdd = postActionSnapshot; 560 | if (snapshotToAdd) { 561 | additionalInfoParts.push( 562 | `- Page Snapshot\n\`\`\`yaml\n${snapshotToAdd.text()}\n\`\`\`\n` 563 | ); 564 | } else { 565 | additionalInfoParts.push( 566 | `- [No relevant snapshot available after action]` 567 | ); 568 | } 569 | 570 | // 3. Add the additional information as a new TextContent item if it's not empty 571 | if (additionalInfoParts.length > 0) { 572 | // Add leading newlines if there's preceding content, to maintain separation 573 | const additionalInfoText = (finalContentItems.length > 0 ? "\\n\\n" : "") + additionalInfoParts.join("\\n"); 574 | finalContentItems.push({ type: "text", text: additionalInfoText }); 575 | } 576 | 577 | finalResult = { 578 | content: finalContentItems, 579 | isError: false, 580 | }; 581 | } else { 582 | // Error result is already set in catch block, but ensure it IS set. 583 | if (!finalResult || !finalResult.isError) { 584 | finalResult = this.createErrorResult( 585 | `Unknown error occurred during ${toolName}`, 586 | toolName 587 | ); 588 | } 589 | } 590 | return finalResult; 591 | } 592 | } catch (error) { 593 | process.stderr.write( 594 | `${logPrefix} Error running tool ${toolName}: ${ 595 | error instanceof Error ? error.message : String(error) 596 | }\n` 597 | ); 598 | throw error; 599 | } 600 | } 601 | } 602 | -------------------------------------------------------------------------------- /browserbase/src/index.ts: -------------------------------------------------------------------------------- 1 | import dotenv from "dotenv"; 2 | dotenv.config(); 3 | 4 | import { Config } from "../config.js"; 5 | import type { Tool } from "./tools/tool.js"; 6 | 7 | import navigate from "./tools/navigate.js"; 8 | import snapshot from "./tools/snapshot.js"; 9 | import keyboard from "./tools/keyboard.js"; 10 | import getText from "./tools/getText.js"; 11 | import session from "./tools/session.js"; 12 | import common from "./tools/common.js"; 13 | import contextTools from "./tools/context.js"; 14 | 15 | import { Server } from "@modelcontextprotocol/sdk/server/index.js"; 16 | import { CallToolRequestSchema, ListResourcesRequestSchema, ListToolsRequestSchema, ReadResourceRequestSchema } from "@modelcontextprotocol/sdk/types.js"; 17 | import { z } from "zod"; 18 | import { zodToJsonSchema } from "zod-to-json-schema"; 19 | import { Context } from "./context.js"; 20 | 21 | // Environment variables configuration 22 | const requiredEnvVars = { 23 | BROWSERBASE_API_KEY: process.env.BROWSERBASE_API_KEY, 24 | BROWSERBASE_PROJECT_ID: process.env.BROWSERBASE_PROJECT_ID, 25 | }; 26 | 27 | // Validate required environment variables 28 | Object.entries(requiredEnvVars).forEach(([name, value]) => { 29 | if (!value) throw new Error(`${name} environment variable is required`); 30 | }); 31 | 32 | export async function createServer(config: Config): Promise { 33 | // Create the server 34 | const server = new Server( 35 | { name: "mcp-server-browserbase", version: "0.5.1" }, 36 | { 37 | capabilities: { 38 | resources: { list: true, read: true }, 39 | tools: { list: true, call: true }, 40 | prompts: { list: true, get: true }, 41 | notifications: { resources: { list_changed: true } }, 42 | }, 43 | } 44 | ); 45 | 46 | // Create the context, passing server instance and config 47 | const context = new Context(server, config); 48 | 49 | const tools: Tool[] = [ 50 | ...common, 51 | ...snapshot, 52 | ...keyboard, 53 | ...getText, 54 | ...navigate, 55 | ...session, 56 | ...contextTools, 57 | ]; 58 | 59 | const toolsMap = new Map(tools.map(tool => [tool.schema.name, tool])); 60 | // --- Setup Request Handlers --- 61 | 62 | server.setRequestHandler(ListResourcesRequestSchema, async () => { 63 | return { resources: context.listResources() }; 64 | }); 65 | 66 | server.setRequestHandler(ReadResourceRequestSchema, async (request) => { 67 | try { 68 | const resourceContent = context.readResource(request.params.uri.toString()); 69 | return { contents: [resourceContent] }; 70 | } catch (error) { 71 | // Keep this error log 72 | console.error(`Error reading resource via context: ${error}`); 73 | throw error; 74 | } 75 | }); 76 | 77 | server.setRequestHandler(ListToolsRequestSchema, async () => { 78 | return { 79 | tools: tools.map(tool => { 80 | let finalInputSchema; 81 | // Check if inputSchema is a Zod schema before converting 82 | if (tool.schema.inputSchema instanceof z.Schema) { 83 | // Add type assertion to help compiler 84 | finalInputSchema = zodToJsonSchema(tool.schema.inputSchema as any); 85 | } else if (typeof tool.schema.inputSchema === 'object' && tool.schema.inputSchema !== null) { 86 | // Assume it's already a valid JSON schema object 87 | finalInputSchema = tool.schema.inputSchema; 88 | } else { 89 | // Fallback or error handling if schema is neither 90 | // Keep this error log 91 | console.error(`Warning: Tool '${tool.schema.name}' has an unexpected inputSchema type.`); 92 | finalInputSchema = { type: "object" }; // Default to empty object schema 93 | } 94 | 95 | return { 96 | name: tool.schema.name, 97 | description: tool.schema.description, 98 | inputSchema: finalInputSchema, 99 | }; 100 | }), 101 | }; 102 | }); 103 | 104 | server.setRequestHandler(CallToolRequestSchema, async (request) => { 105 | const logError = (message: string) => { 106 | // Ensure error logs definitely go to stderr 107 | process.stderr.write(`[server.ts Error] ${new Date().toISOString()} ${message}\\n`); 108 | }; 109 | 110 | // Use the map built from the passed-in tools 111 | const tool = toolsMap.get(request.params.name); 112 | 113 | if (!tool) { 114 | // Use the explicit error logger 115 | logError(`Tool "${request.params.name}" not found.`); 116 | // Return a simplified error object 117 | return { content: [{ type: 'text', text: `Tool "${request.params.name}" not found` }], isError: true }; 118 | } 119 | 120 | try { 121 | // Delegate execution to the context 122 | const result = await context.run(tool, request.params.arguments ?? {}); 123 | return result; 124 | } catch (error) { 125 | // Use the explicit error logger 126 | const errorMessage = error instanceof Error ? error.message : String(error); 127 | logError(`Error running tool ${request.params.name} via context: ${errorMessage}`); 128 | logError(`Original error stack (if available): ${error instanceof Error ? error.stack : 'N/A'}`); // Log stack trace 129 | // Return a simplified error object 130 | return { content: [{ type: 'text', text: `Failed to run tool '${request.params.name}': ${errorMessage}` }], isError: true }; 131 | } 132 | }); 133 | 134 | // Wrap server close to also close context 135 | const originalClose = server.close.bind(server); 136 | server.close = async () => { 137 | await originalClose(); 138 | }; 139 | 140 | // Return the configured server instance 141 | return server; 142 | } -------------------------------------------------------------------------------- /browserbase/src/pageSnapshot.ts: -------------------------------------------------------------------------------- 1 | import type { Page, FrameLocator, Locator } from 'playwright-core'; 2 | import yaml from 'yaml'; 3 | 4 | type PageOrFrameLocator = Page | FrameLocator; 5 | 6 | export class PageSnapshot { 7 | private _frameLocators: PageOrFrameLocator[] = []; 8 | private _text!: string; 9 | 10 | constructor() { 11 | } 12 | 13 | static async create(page: Page): Promise { 14 | const snapshot = new PageSnapshot(); 15 | await snapshot._build(page); 16 | return snapshot; 17 | } 18 | 19 | text(): string { 20 | return this._text; 21 | } 22 | 23 | private async _build(page: Page) { 24 | const yamlDocument = await this._snapshotFrame(page); 25 | this._text = [ 26 | `- Page Snapshot`, 27 | '```yaml', 28 | // Generate text directly from the returned document 29 | yamlDocument.toString({ indentSeq: false }).trim(), 30 | '```', 31 | ].join('\n'); 32 | } 33 | 34 | private async _snapshotFrame(frame: Page | FrameLocator) { 35 | const frameIndex = this._frameLocators.push(frame) - 1; 36 | let snapshotString = ''; 37 | try { 38 | snapshotString = await (frame.locator('body') as any).ariaSnapshot({ ref: true, emitGeneric: true }); 39 | } catch (e) { 40 | snapshotString = `error: Could not take snapshot. Error: ${e instanceof Error ? e.message : String(e)}`; 41 | } 42 | 43 | const snapshot = yaml.parseDocument(snapshotString); 44 | 45 | const visit = async (node: any): Promise => { 46 | if (yaml.isPair(node)) { 47 | await Promise.all([ 48 | visit(node.key).then(k => node.key = k), 49 | visit(node.value).then(v => node.value = v) 50 | ]); 51 | } else if (yaml.isSeq(node) || yaml.isMap(node)) { 52 | const items = [...node.items]; 53 | node.items = await Promise.all(items.map(visit)); 54 | } else if (yaml.isScalar(node)) { 55 | if (typeof node.value === 'string') { 56 | const value = node.value; 57 | if (frameIndex > 0) 58 | node.value = value.replace('[ref=', `[ref=f${frameIndex}`); 59 | 60 | if (value.startsWith('iframe ')) { 61 | const ref = value.match(/\[ref=(.*)\]/)?.[1]; 62 | if (ref) { 63 | try { 64 | const childFrameLocator = frame.frameLocator(`aria-ref=${ref}`); 65 | const childSnapshot = await this._snapshotFrame(childFrameLocator); 66 | return snapshot.createPair(node.value, childSnapshot); 67 | } catch (error) { 68 | return snapshot.createPair(node.value, ''); 69 | } 70 | } 71 | } 72 | } 73 | } 74 | return node; 75 | }; 76 | 77 | 78 | if (snapshot.contents) { 79 | await visit(snapshot.contents); 80 | } else { 81 | const emptyMapDoc = yaml.parseDocument('{}'); 82 | snapshot.contents = emptyMapDoc.contents; 83 | } 84 | return snapshot; 85 | } 86 | 87 | refLocator(ref: string): Locator { 88 | let frameIndex = 0; 89 | let frame: PageOrFrameLocator; 90 | let targetRef = ref; 91 | 92 | const match = ref.match(/^f(\d+)(.*)/); 93 | if (match) { 94 | frameIndex = parseInt(match[1], 10); 95 | targetRef = match[2]; 96 | } 97 | 98 | if (this._frameLocators.length === 0) { 99 | throw new Error(`Frame locators not initialized. Cannot find frame for ref '${ref}'.`); 100 | } 101 | 102 | if (frameIndex < 0 || frameIndex >= this._frameLocators.length) { 103 | throw new Error(`Validation Error: Frame index ${frameIndex} derived from ref '${ref}' is out of bounds (found ${this._frameLocators.length} frames).`); 104 | } 105 | frame = this._frameLocators[frameIndex]; 106 | 107 | if (!frame) 108 | throw new Error(`Frame (index ${frameIndex}) could not be determined. Provide ref from the most current snapshot.`); 109 | 110 | return frame.locator(`aria-ref=${targetRef}`); 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /browserbase/src/program.ts: -------------------------------------------------------------------------------- 1 | import { program } from 'commander'; 2 | import * as fs from 'fs'; 3 | import * as path from 'path'; 4 | import { fileURLToPath } from 'url'; 5 | 6 | import { createServer } from './index.js'; 7 | import { ServerList } from './server.js'; 8 | 9 | import { startHttpTransport, startStdioTransport } from './transport.js'; 10 | 11 | import { resolveConfig } from './config.js'; 12 | 13 | // Determine the directory of the current module 14 | const __filename = fileURLToPath(import.meta.url); 15 | const __dirname = path.dirname(__filename); 16 | 17 | // Load package.json using fs 18 | const packageJSONPath = path.resolve(__dirname, '../package.json'); 19 | const packageJSONBuffer = fs.readFileSync(packageJSONPath); 20 | const packageJSON = JSON.parse(packageJSONBuffer.toString()); 21 | 22 | program 23 | .version('Version ' + packageJSON.version) 24 | .name(packageJSON.name) 25 | .option('--browserbaseApiKey ', 'The Browserbase API Key to use') 26 | .option('--browserbaseProjectId ', 'The Browserbase Project ID to use') 27 | .option('--proxies', 'Use Browserbase proxies.') 28 | .option('--advancedStealth', 'Use advanced stealth mode. Only available to Browserbase Scale Plan users.') 29 | .option('--contextId ', 'Browserbase Context ID to use.') 30 | .option('--persist [boolean]', 'Whether to persist the Browserbase context', true) 31 | .option('--port ', 'Port to listen on for SSE transport.') 32 | .option('--host ', 'Host to bind server to. Default is localhost. Use 0.0.0.0 to bind to all interfaces.') 33 | .option('--cookies [json]', 'JSON array of cookies to inject into the browser. Format: [{"name":"cookie1","value":"val1","domain":"example.com"}, ...]') 34 | .option('--browserWidth ', 'Browser width to use for the browser.') 35 | .option('--browserHeight ', 'Browser height to use for the browser.') 36 | .action(async options => { 37 | const config = await resolveConfig(options); 38 | const serverList = new ServerList(async() => createServer(config)); 39 | setupExitWatchdog(serverList); 40 | 41 | if (options.port) 42 | startHttpTransport(+options.port, options.host, serverList); 43 | else 44 | await startStdioTransport(serverList); 45 | }); 46 | 47 | function setupExitWatchdog(serverList: ServerList) { 48 | const handleExit = async () => { 49 | setTimeout(() => process.exit(0), 15000); 50 | await serverList.closeAll(); 51 | process.exit(0); 52 | }; 53 | 54 | process.stdin.on('close', handleExit); 55 | process.on('SIGINT', handleExit); 56 | process.on('SIGTERM', handleExit); 57 | } 58 | 59 | program.parse(process.argv); -------------------------------------------------------------------------------- /browserbase/src/server.ts: -------------------------------------------------------------------------------- 1 | import { Server } from "@modelcontextprotocol/sdk/server/index.js"; 2 | 3 | export class ServerList { 4 | private _servers: Server[] = []; 5 | private _serverFactory: () => Promise; 6 | 7 | constructor(serverFactory: () => Promise) { 8 | this._serverFactory = serverFactory; 9 | } 10 | 11 | async create() { 12 | const server = await this._serverFactory(); 13 | this._servers.push(server); 14 | return server; 15 | } 16 | 17 | async close(server: Server) { 18 | const index = this._servers.indexOf(server); 19 | if (index !== -1) 20 | this._servers.splice(index, 1); 21 | await server.close(); 22 | } 23 | 24 | async closeAll() { 25 | await Promise.all(this._servers.map(server => server.close())); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /browserbase/src/sessionManager.ts: -------------------------------------------------------------------------------- 1 | import { 2 | chromium, 3 | Browser, 4 | Page, 5 | } from "playwright-core"; 6 | import { Browserbase } from "@browserbasehq/sdk"; 7 | import type { Config } from "./config.js"; 8 | import { SessionCreateParams } from "@browserbasehq/sdk/src/resources/sessions/sessions.js"; 9 | import type { Cookie } from "playwright-core"; 10 | 11 | // Define the type for a session object 12 | export type BrowserSession = { 13 | browser: Browser; 14 | page: Page; 15 | sessionId: string; 16 | }; 17 | 18 | // Global state for managing browser sessions 19 | const browsers = new Map(); 20 | 21 | // Keep track of the default session explicitly 22 | let defaultBrowserSession: BrowserSession | null = null; 23 | 24 | // Define a specific ID for the default session 25 | export const defaultSessionId = "browserbase_session_main"; 26 | 27 | // Keep track of the active session ID. Defaults to the main session. 28 | let activeSessionId: string = defaultSessionId; 29 | 30 | /** 31 | * Sets the active session ID. 32 | * @param id The ID of the session to set as active. 33 | */ 34 | export function setActiveSessionId(id: string): void { 35 | if (browsers.has(id) || id === defaultSessionId) { 36 | activeSessionId = id; 37 | } else { 38 | process.stderr.write( 39 | `[SessionManager] WARN - Set active session failed for non-existent ID: ${id}\n` 40 | ); 41 | } 42 | } 43 | 44 | /** 45 | * Gets the active session ID. 46 | * @returns The active session ID. 47 | */ 48 | export function getActiveSessionId(): string { 49 | return activeSessionId; 50 | } 51 | 52 | /** 53 | * Adds cookies to a browser context 54 | * @param context Playwright browser context 55 | * @param cookies Array of cookies to add 56 | */ 57 | export async function addCookiesToContext(context: any, cookies: Cookie[]): Promise { 58 | if (!cookies || cookies.length === 0) { 59 | return; 60 | } 61 | 62 | try { 63 | process.stderr.write(`[SessionManager] Adding ${cookies.length} cookies to browser context\n`); 64 | await context.addCookies(cookies); 65 | process.stderr.write(`[SessionManager] Successfully added cookies to browser context\n`); 66 | } catch (error) { 67 | process.stderr.write( 68 | `[SessionManager] Error adding cookies to browser context: ${ 69 | error instanceof Error ? error.message : String(error) 70 | }\n` 71 | ); 72 | } 73 | } 74 | 75 | // Function to create a new Browserbase session and connect Playwright 76 | export async function createNewBrowserSession( 77 | newSessionId: string, 78 | config: Config, 79 | ): Promise { 80 | if (!config.browserbaseApiKey) { 81 | throw new Error("Browserbase API Key is missing in the configuration."); 82 | } 83 | if (!config.browserbaseProjectId) { 84 | throw new Error("Browserbase Project ID is missing in the configuration."); 85 | } 86 | 87 | const bb = new Browserbase({ 88 | apiKey: config.browserbaseApiKey, 89 | }); 90 | 91 | // Prepare session creation options 92 | const sessionOptions: SessionCreateParams = { 93 | // Use non-null assertion after check 94 | projectId: config.browserbaseProjectId!, 95 | proxies: config.proxies, 96 | browserSettings: { 97 | viewport: { 98 | width: config.viewPort?.browserWidth ?? 1024, 99 | height: config.viewPort?.browserHeight ?? 768, 100 | }, 101 | context: config.context?.contextId ? { 102 | id: config.context?.contextId, 103 | persist: config.context?.persist ?? true, 104 | } : undefined, 105 | advancedStealth: config.advancedStealth ?? undefined, 106 | } 107 | }; 108 | 109 | try { 110 | process.stderr.write( 111 | `[SessionManager] Creating session ${newSessionId}...\n` 112 | ); 113 | const bbSession = await bb.sessions.create(sessionOptions); 114 | process.stderr.write( 115 | `[SessionManager] Browserbase session created: ${bbSession.id}\n` 116 | ); 117 | 118 | const browser = await chromium.connectOverCDP(bbSession.connectUrl); 119 | process.stderr.write( 120 | `[SessionManager] Browserbase Live Debugger URL: https://www.browserbase.com/sessions/${bbSession.id}\n` 121 | ); 122 | 123 | browser.on("disconnected", () => { 124 | process.stderr.write(`[SessionManager] Disconnected: ${newSessionId}\n`); 125 | browsers.delete(newSessionId); 126 | if (defaultBrowserSession && defaultBrowserSession.browser === browser) { 127 | process.stderr.write( 128 | `[SessionManager] Disconnected (default): ${newSessionId}\n` 129 | ); 130 | defaultBrowserSession = null; 131 | } 132 | if ( 133 | activeSessionId === newSessionId && 134 | newSessionId !== defaultSessionId 135 | ) { 136 | process.stderr.write( 137 | `[SessionManager] WARN - Active session disconnected, resetting to default: ${newSessionId}\n` 138 | ); 139 | setActiveSessionId(defaultSessionId); 140 | } 141 | }); 142 | 143 | let context = browser.contexts()[0]; 144 | if (!context) { 145 | context = await browser.newContext(); 146 | } 147 | 148 | // Add cookies to the context if they are provided in the config 149 | if (config.cookies && Array.isArray(config.cookies) && config.cookies.length > 0) { 150 | await addCookiesToContext(context, config.cookies); 151 | } 152 | 153 | let page = context.pages()[0]; 154 | if (!page) { 155 | page = await context.newPage(); 156 | } 157 | 158 | const sessionObj: BrowserSession = { 159 | browser, 160 | page, 161 | sessionId: bbSession.id, 162 | }; 163 | 164 | browsers.set(newSessionId, sessionObj); 165 | 166 | if (newSessionId === defaultSessionId) { 167 | defaultBrowserSession = sessionObj; 168 | } 169 | 170 | setActiveSessionId(newSessionId); 171 | process.stderr.write( 172 | `[SessionManager] Session created and active: ${newSessionId}\n` 173 | ); 174 | 175 | return sessionObj; 176 | } catch (creationError) { 177 | const errorMessage = 178 | creationError instanceof Error 179 | ? creationError.message 180 | : String(creationError); 181 | process.stderr.write( 182 | `[SessionManager] Creating session ${newSessionId} failed: ${ 183 | creationError instanceof Error 184 | ? creationError.message 185 | : String(creationError) 186 | }` 187 | ); 188 | throw new Error( 189 | `Failed to create/connect session ${newSessionId}: ${errorMessage}` 190 | ); 191 | } 192 | } 193 | 194 | async function closeBrowserGracefully( 195 | session: BrowserSession | undefined | null, 196 | sessionIdToLog: string 197 | ): Promise { 198 | if (session?.browser?.isConnected()) { 199 | process.stderr.write( 200 | `[SessionManager] Closing browser for session: ${sessionIdToLog}\n` 201 | ); 202 | try { 203 | await session.browser.close(); 204 | } catch (closeError) { 205 | process.stderr.write( 206 | `[SessionManager] WARN - Error closing browser for session ${sessionIdToLog}: ${ 207 | closeError instanceof Error ? closeError.message : String(closeError) 208 | }\n` 209 | ); 210 | } 211 | } 212 | } 213 | 214 | // Internal function to ensure default session 215 | export async function ensureDefaultSessionInternal( 216 | config: Config 217 | ): Promise { 218 | const sessionId = defaultSessionId; 219 | let needsRecreation = false; 220 | 221 | if (!defaultBrowserSession) { 222 | needsRecreation = true; 223 | process.stderr.write( 224 | `[SessionManager] Default session ${sessionId} not found, creating.\n` 225 | ); 226 | } else if ( 227 | !defaultBrowserSession.browser.isConnected() || 228 | defaultBrowserSession.page.isClosed() 229 | ) { 230 | needsRecreation = true; 231 | process.stderr.write( 232 | `[SessionManager] Default session ${sessionId} is stale, recreating.\n` 233 | ); 234 | await closeBrowserGracefully(defaultBrowserSession, sessionId); 235 | defaultBrowserSession = null; 236 | browsers.delete(sessionId); 237 | } 238 | 239 | if (needsRecreation) { 240 | try { 241 | defaultBrowserSession = await createNewBrowserSession(sessionId, config); 242 | return defaultBrowserSession; 243 | } catch (error) { 244 | // Error during initial creation or recreation 245 | process.stderr.write( 246 | `[SessionManager] Initial/Recreation attempt for default session ${sessionId} failed. Error: ${ 247 | error instanceof Error ? error.message : String(error) 248 | }\n` 249 | ); 250 | // Attempt one more time after a failure 251 | process.stderr.write( 252 | `[SessionManager] Retrying creation of default session ${sessionId} after error...\n` 253 | ); 254 | try { 255 | defaultBrowserSession = await createNewBrowserSession(sessionId, config); 256 | return defaultBrowserSession; 257 | } catch (retryError) { 258 | const finalErrorMessage = 259 | retryError instanceof Error 260 | ? retryError.message 261 | : String(retryError); 262 | process.stderr.write( 263 | `[SessionManager] Failed to recreate default session ${sessionId} after retry: ${finalErrorMessage}\n` 264 | ); 265 | throw new Error( 266 | `Failed to ensure default session ${sessionId} after initial error and retry: ${finalErrorMessage}` 267 | ); 268 | } 269 | } 270 | } 271 | 272 | // If we reached here, the existing default session is considered okay. 273 | setActiveSessionId(sessionId); // Ensure default is marked active 274 | return defaultBrowserSession!; // Non-null assertion: logic ensures it's not null here 275 | } 276 | 277 | // Get a specific session by ID 278 | export async function getSession( 279 | sessionId: string, 280 | config: Config 281 | ): Promise { 282 | if (sessionId === defaultSessionId) { 283 | try { 284 | return await ensureDefaultSessionInternal(config); 285 | } catch (error) { 286 | // ensureDefaultSessionInternal already logs extensively 287 | process.stderr.write( 288 | `[SessionManager] Failed to get default session due to error in ensureDefaultSessionInternal for ${sessionId}. See previous messages for details.\n` 289 | ); 290 | return null; // Or rethrow if getSession failing for default is critical 291 | } 292 | } 293 | 294 | // For non-default sessions 295 | process.stderr.write(`[SessionManager] Getting session: ${sessionId}\n`); 296 | let sessionObj = browsers.get(sessionId); 297 | 298 | if (!sessionObj) { 299 | process.stderr.write( 300 | `[SessionManager] WARN - Session not found in map: ${sessionId}\n` 301 | ); 302 | return null; 303 | } 304 | 305 | // Validate the found session 306 | if (!sessionObj.browser.isConnected() || sessionObj.page.isClosed()) { 307 | process.stderr.write( 308 | `[SessionManager] WARN - Found session ${sessionId} is stale, removing.\n` 309 | ); 310 | await closeBrowserGracefully(sessionObj, sessionId); 311 | browsers.delete(sessionId); 312 | if (activeSessionId === sessionId) { 313 | process.stderr.write( 314 | `[SessionManager] WARN - Invalidated active session ${sessionId}, resetting to default.\n` 315 | ); 316 | setActiveSessionId(defaultSessionId); 317 | } 318 | return null; 319 | } 320 | 321 | // Session appears valid, make it active 322 | setActiveSessionId(sessionId); 323 | process.stderr.write(`[SessionManager] Using valid session: ${sessionId}\n`); 324 | return sessionObj; 325 | } 326 | 327 | /** 328 | * Get a session by ID without creating new sessions. 329 | * This is a read-only operation that never triggers session creation. 330 | * Used for operations like closing sessions where we don't want side effects. 331 | * @param sessionId The session ID to retrieve 332 | * @returns The session if it exists and is valid, null otherwise 333 | */ 334 | export function getSessionReadOnly(sessionId: string): BrowserSession | null { 335 | // Check if it's the default session 336 | if (sessionId === defaultSessionId && defaultBrowserSession) { 337 | // Only return if it's actually connected and valid 338 | if (defaultBrowserSession.browser.isConnected() && !defaultBrowserSession.page.isClosed()) { 339 | return defaultBrowserSession; 340 | } 341 | return null; 342 | } 343 | 344 | // For non-default sessions, check the browsers map 345 | const sessionObj = browsers.get(sessionId); 346 | if (!sessionObj) { 347 | return null; 348 | } 349 | 350 | // Validate the session is still active 351 | if (!sessionObj.browser.isConnected() || sessionObj.page.isClosed()) { 352 | return null; 353 | } 354 | 355 | return sessionObj; 356 | } 357 | 358 | /** 359 | * Clean up a session by removing it from tracking. 360 | * This is called after a browser is closed to ensure proper cleanup. 361 | * @param sessionId The session ID to clean up 362 | */ 363 | export function cleanupSession(sessionId: string): void { 364 | process.stderr.write( 365 | `[SessionManager] Cleaning up session: ${sessionId}\n` 366 | ); 367 | 368 | // Remove from browsers map 369 | browsers.delete(sessionId); 370 | 371 | // Clear default session reference if this was the default 372 | if (sessionId === defaultSessionId && defaultBrowserSession) { 373 | defaultBrowserSession = null; 374 | } 375 | 376 | // Reset active session to default if this was the active one 377 | if (activeSessionId === sessionId) { 378 | process.stderr.write( 379 | `[SessionManager] Cleaned up active session ${sessionId}, resetting to default.\n` 380 | ); 381 | setActiveSessionId(defaultSessionId); 382 | } 383 | } 384 | 385 | // Function to close all managed browser sessions gracefully 386 | export async function closeAllSessions(): Promise { 387 | process.stderr.write(`[SessionManager] Closing all sessions...\n`); 388 | const closePromises: Promise[] = []; 389 | for (const [id, session] of browsers.entries()) { 390 | process.stderr.write(`[SessionManager] Closing session: ${id}\n`); 391 | closePromises.push( 392 | // Use the helper for consistent logging/error handling 393 | closeBrowserGracefully(session, id) 394 | ); 395 | } 396 | try { 397 | await Promise.all(closePromises); 398 | } catch(e) { 399 | // Individual errors are caught and logged by closeBrowserGracefully 400 | process.stderr.write( 401 | `[SessionManager] WARN - Some errors occurred during batch session closing. See individual messages.\n` 402 | ); 403 | } 404 | 405 | browsers.clear(); 406 | defaultBrowserSession = null; 407 | setActiveSessionId(defaultSessionId); // Reset active session to default 408 | process.stderr.write(`[SessionManager] All sessions closed and cleared.\n`); 409 | } 410 | -------------------------------------------------------------------------------- /browserbase/src/tools/common.ts: -------------------------------------------------------------------------------- 1 | export {}; // Ensure file is treated as a module 2 | 3 | import { z } from 'zod'; 4 | import type { Tool, ToolSchema, ToolResult } from "./tool.js"; 5 | import type { Context } from '../context.js'; 6 | import type { ToolActionResult } from '../context.js'; 7 | 8 | // --- Tool: Wait --- 9 | const WaitInputSchema = z.object({ 10 | time: z.number().describe("Time in seconds") 11 | }); 12 | type WaitInput = z.infer; 13 | 14 | const waitSchema: ToolSchema = { 15 | name: "browserbase_wait", 16 | description: "Wait for a specified time in seconds", 17 | inputSchema: WaitInputSchema, 18 | }; 19 | 20 | // Handle function for Wait 21 | async function handleWait(context: Context, params: WaitInput): Promise { // Uses Context, returns ToolResult 22 | const action = async (): Promise => { 23 | await new Promise(resolve => setTimeout(resolve, params.time * 1000)); 24 | return { content: [{ type: 'text', text: `Waited for ${params.time} seconds.` }] }; 25 | }; 26 | return { action, code: [], captureSnapshot: false, waitForNetwork: false }; 27 | } 28 | 29 | // Define tool using handle 30 | const waitTool: Tool = { 31 | capability: 'core', 32 | schema: waitSchema, 33 | handle: handleWait, 34 | }; 35 | 36 | 37 | // --- Tool: Close --- 38 | const CloseInputSchema = z.object({ 39 | random_string: z.string().optional().describe("Dummy parameter") 40 | }); 41 | type CloseInput = z.infer; 42 | 43 | const closeSchema: ToolSchema = { 44 | name: "browserbase_close", 45 | description: "Close the current page...", 46 | inputSchema: CloseInputSchema, 47 | }; 48 | 49 | // Handle function for Close 50 | async function handleClose(context: Context, params: CloseInput): Promise { 51 | const action = async (): Promise => { 52 | const page = await context.getActivePage(); 53 | if (page && !page.isClosed()) { 54 | await page.close(); 55 | return { content: [{ type: 'text', text: `Page closed.` }] }; 56 | } else { 57 | return { content: [{ type: 'text', text: `No active page to close.` }] }; 58 | } 59 | }; 60 | return { action, code: [], captureSnapshot: false, waitForNetwork: false }; 61 | } 62 | 63 | // Define tool using handle 64 | const closeTool: Tool = { 65 | capability: 'core', // Add capability 66 | schema: closeSchema, 67 | handle: handleClose, 68 | }; 69 | 70 | 71 | // --- Tool: Resize --- 72 | const ResizeInputSchema = z.object({ 73 | width: z.number(), 74 | height: z.number() 75 | }); 76 | type ResizeInput = z.infer; 77 | 78 | const resizeSchema: ToolSchema = { 79 | name: "browserbase_resize", 80 | description: "Resize window...", 81 | inputSchema: ResizeInputSchema, 82 | }; 83 | 84 | // Handle function for Resize 85 | async function handleResize(context: Context, params: ResizeInput): Promise { 86 | const action = async (): Promise => { 87 | const page = await context.getActivePage(); 88 | if (page && !page.isClosed()) { 89 | await page.setViewportSize({ width: params.width, height: params.height }); 90 | return { content: [{ type: 'text', text: `Resized page to ${params.width}x${params.height}.` }] }; 91 | } else { 92 | return { content: [{ type: 'text', text: `No active page to resize.` }] }; 93 | } 94 | }; 95 | return { action, code: [], captureSnapshot: true, waitForNetwork: false }; 96 | } 97 | 98 | // Define tool using handle 99 | const resizeTool: Tool = { 100 | capability: 'core', // Add capability 101 | schema: resizeSchema, 102 | handle: handleResize, 103 | }; 104 | 105 | 106 | // Export array of tools directly 107 | export default [ 108 | waitTool, 109 | closeTool, 110 | resizeTool, 111 | ]; -------------------------------------------------------------------------------- /browserbase/src/tools/context.ts: -------------------------------------------------------------------------------- 1 | import { z } from "zod"; 2 | import type { Tool, ToolSchema, ToolResult } from "./tool.js"; 3 | import type { Context } from "../context.js"; 4 | import type { ToolActionResult } from "../context.js"; 5 | import { Browserbase } from "@browserbasehq/sdk"; 6 | 7 | // Store contexts in memory 8 | const contexts = new Map(); 9 | 10 | // --- Tool: Create Context --- 11 | const CreateContextInputSchema = z.object({ 12 | name: z 13 | .string() 14 | .optional() 15 | .describe("Optional friendly name to reference this context later (otherwise, you'll need to use the returned ID)"), 16 | }); 17 | type CreateContextInput = z.infer; 18 | 19 | const createContextSchema: ToolSchema = { 20 | name: "browserbase_context_create", 21 | description: "Create a new Browserbase context for reusing cookies, authentication, and cached data across browser sessions", 22 | inputSchema: CreateContextInputSchema, 23 | }; 24 | 25 | async function handleCreateContext( 26 | context: Context, 27 | params: CreateContextInput 28 | ): Promise { 29 | try { 30 | const config = context.config; 31 | 32 | if (!config.browserbaseApiKey || !config.browserbaseProjectId) { 33 | throw new Error("Browserbase API Key or Project ID is missing in the configuration"); 34 | } 35 | 36 | const bb = new Browserbase({ 37 | apiKey: config.browserbaseApiKey, 38 | }); 39 | 40 | console.error("Creating new Browserbase context"); 41 | const bbContext = await bb.contexts.create({ 42 | projectId: config.browserbaseProjectId, 43 | }); 44 | 45 | console.error(`Successfully created context: ${bbContext.id}`); 46 | 47 | // Store context ID with optional name if provided 48 | const contextName = params.name || bbContext.id; 49 | contexts.set(contextName, bbContext.id); 50 | 51 | const result: ToolActionResult = { 52 | content: [ 53 | { 54 | type: "text", 55 | text: `Created new Browserbase context with ID: ${bbContext.id}${params.name ? ` and name: ${params.name}` : ''}`, 56 | }, 57 | ], 58 | }; 59 | 60 | return { 61 | resultOverride: result, 62 | action: async () => { 63 | console.error("Create Context action"); 64 | return result; 65 | }, 66 | code: [], 67 | captureSnapshot: false, 68 | waitForNetwork: false, 69 | }; 70 | } catch (error: any) { 71 | console.error(`CreateContext handle failed: ${error.message || error}`); 72 | throw new Error(`Failed to create Browserbase context: ${error.message || error}`); 73 | } 74 | } 75 | 76 | // --- Tool: Delete Context --- 77 | const DeleteContextInputSchema = z.object({ 78 | contextId: z 79 | .string() 80 | .optional() 81 | .describe("The context ID to delete (required if name not provided)"), 82 | name: z 83 | .string() 84 | .optional() 85 | .describe("The friendly name of the context to delete (required if contextId not provided)"), 86 | }); 87 | type DeleteContextInput = z.infer; 88 | 89 | const deleteContextSchema: ToolSchema = { 90 | name: "browserbase_context_delete", 91 | description: "Delete a Browserbase context when you no longer need it", 92 | inputSchema: DeleteContextInputSchema, 93 | }; 94 | 95 | async function handleDeleteContext( 96 | context: Context, 97 | params: DeleteContextInput 98 | ): Promise { 99 | try { 100 | const config = context.config; 101 | 102 | if (!config.browserbaseApiKey) { 103 | throw new Error("Browserbase API Key is missing in the configuration"); 104 | } 105 | 106 | if (!params.contextId && !params.name) { 107 | throw new Error("Missing required argument: either contextId or name must be provided"); 108 | } 109 | 110 | // Resolve context ID either directly or by name 111 | let contextId = params.contextId; 112 | if (!contextId && params.name) { 113 | contextId = contexts.get(params.name); 114 | if (!contextId) { 115 | throw new Error(`Context with name "${params.name}" not found`); 116 | } 117 | } 118 | 119 | console.error(`Deleting Browserbase context: ${contextId}`); 120 | 121 | // Delete using Browserbase API 122 | const response = await fetch(`https://api.browserbase.com/v1/contexts/${contextId}`, { 123 | method: 'DELETE', 124 | headers: { 125 | 'X-BB-API-Key': config.browserbaseApiKey, 126 | }, 127 | }); 128 | 129 | if (response.status !== 204) { 130 | const errorText = await response.text(); 131 | throw new Error(`Failed to delete context with status ${response.status}: ${errorText}`); 132 | } 133 | 134 | // Remove from local store 135 | if (params.name) { 136 | contexts.delete(params.name); 137 | } 138 | 139 | // Delete by ID too (in case it was stored multiple ways) 140 | for (const [name, id] of contexts.entries()) { 141 | if (id === contextId) { 142 | contexts.delete(name); 143 | } 144 | } 145 | 146 | console.error(`Successfully deleted context: ${contextId}`); 147 | 148 | const result: ToolActionResult = { 149 | content: [ 150 | { 151 | type: "text", 152 | text: `Deleted Browserbase context with ID: ${contextId}`, 153 | }, 154 | ], 155 | }; 156 | 157 | return { 158 | resultOverride: result, 159 | action: async () => { 160 | console.error("Delete Context action"); 161 | return result; 162 | }, 163 | code: [], 164 | captureSnapshot: false, 165 | waitForNetwork: false, 166 | }; 167 | } catch (error: any) { 168 | console.error(`DeleteContext handle failed: ${error.message || error}`); 169 | throw new Error(`Failed to delete Browserbase context: ${error.message || error}`); 170 | } 171 | } 172 | 173 | // Helper function to get a context ID from name or direct ID (exported for use by session.ts) 174 | export function getContextId(nameOrId: string): string | undefined { 175 | // First check if it's a direct context ID 176 | if (nameOrId.length == 32) { // 32 char uuid 177 | return nameOrId; 178 | } 179 | 180 | // Otherwise, look it up by name 181 | return contexts.get(nameOrId); 182 | } 183 | 184 | // Define tools 185 | const createContextTool: Tool = { 186 | capability: "core", 187 | schema: createContextSchema, 188 | handle: handleCreateContext, 189 | }; 190 | 191 | const deleteContextTool: Tool = { 192 | capability: "core", 193 | schema: deleteContextSchema, 194 | handle: handleDeleteContext, 195 | }; 196 | 197 | // Export as an array of tools 198 | export default [createContextTool, deleteContextTool]; -------------------------------------------------------------------------------- /browserbase/src/tools/getText.ts: -------------------------------------------------------------------------------- 1 | import { z } from 'zod'; 2 | import type { Tool, ToolSchema, ToolResult } from "./tool.js"; 3 | import type { Context } from '../context.js'; 4 | import type { ToolActionResult } from '../context.js'; 5 | 6 | // --- Tool: Get Text --- 7 | const GetTextInputSchema = z.object({ 8 | selector: z.string().optional().describe("Optional CSS selector to get text from. If omitted, gets text from the whole body."), 9 | }); 10 | type GetTextInput = z.infer; 11 | 12 | const getTextSchema: ToolSchema = { 13 | name: "browserbase_get_text", 14 | description: "Extract text content from the page or a specific element.", 15 | inputSchema: GetTextInputSchema, 16 | }; 17 | 18 | // Handle function for GetText 19 | async function handleGetText(context: Context, params: GetTextInput): Promise { 20 | const action = async (): Promise => { 21 | const page = await context.getActivePage(); 22 | if (!page) { 23 | throw new Error('No active page found for getText'); 24 | } 25 | try { 26 | let textContent: string | null; 27 | if (params.selector) { 28 | textContent = await page.textContent(params.selector, { timeout: 10000 }); 29 | } else { 30 | textContent = await page.textContent('body', { timeout: 10000 }); 31 | } 32 | return { content: [{ type: 'text', text: textContent ?? "" }] }; 33 | } catch (error) { 34 | console.error(`GetText action failed: ${error}`); 35 | throw error; // Rethrow to be caught by Context.run's try/catch around handle/action 36 | } 37 | }; 38 | 39 | return { 40 | action, 41 | code: [], 42 | captureSnapshot: false, 43 | waitForNetwork: false, 44 | }; 45 | } 46 | 47 | // Define tool using handle 48 | const getTextTool: Tool = { 49 | capability: 'core', // Add capability 50 | schema: getTextSchema, 51 | handle: handleGetText, 52 | }; 53 | 54 | export default [getTextTool]; -------------------------------------------------------------------------------- /browserbase/src/tools/hover.ts: -------------------------------------------------------------------------------- 1 | import type { Tool } from "./tool.js"; 2 | 3 | // Placeholder function for hover tool, accepting the flag 4 | export function hover(captureSnapshot: boolean): Tool[] { 5 | // TODO: Implement hoverTool and potentially use flag 6 | return []; 7 | } 8 | export default hover; -------------------------------------------------------------------------------- /browserbase/src/tools/keyboard.ts: -------------------------------------------------------------------------------- 1 | import { z } from 'zod'; 2 | import { defineTool, type ToolFactory } from './tool.js'; 3 | 4 | const pressKey: ToolFactory = captureSnapshot => defineTool({ 5 | capability: 'core', 6 | 7 | schema: { 8 | name: 'browserbase_press_key', 9 | description: 'Press a key on the keyboard', 10 | inputSchema: z.object({ 11 | key: z.string().describe('Name of the key to press or a character to generate, such as `ArrowLeft` or `a`'), 12 | }), 13 | }, 14 | 15 | handle: async (context, params) => { 16 | const page = await context.getActivePage(); 17 | if (!page) { 18 | throw new Error('No active page found for pressKey'); 19 | } 20 | 21 | const code = [ 22 | `// Press ${params.key}`, 23 | `await page.keyboard.press('${params.key.replace(/'/g, "\\'")}');`, 24 | ]; 25 | 26 | const action = () => page.keyboard.press(params.key); // Changed from tab.page to page 27 | 28 | return { 29 | code, 30 | action, 31 | captureSnapshot, 32 | waitForNetwork: true 33 | }; 34 | }, 35 | }); 36 | 37 | const captureSnapshotValue = true; 38 | 39 | export default [ 40 | pressKey(captureSnapshotValue), 41 | ]; -------------------------------------------------------------------------------- /browserbase/src/tools/navigate.ts: -------------------------------------------------------------------------------- 1 | import { z } from 'zod'; 2 | import { defineTool, type ToolFactory } from './tool.js'; 3 | import type { ToolActionResult } from '../context.js'; 4 | 5 | const navigate: ToolFactory = captureSnapshot => defineTool({ 6 | capability: 'core', 7 | 8 | schema: { 9 | name: 'browserbase_navigate', 10 | description: 'Navigate to a URL', 11 | inputSchema: z.object({ 12 | url: z.string().describe('The URL to navigate to'), 13 | }), 14 | }, 15 | 16 | handle: async (context, params) => { 17 | const page = await context.getActivePage(); 18 | if (!page) { 19 | throw new Error('No active page found for navigate'); 20 | } 21 | const action = async (): Promise => { 22 | await page.goto(params.url); 23 | return { content: [{ type: 'text', text: `Navigated to ${params.url}` }] }; 24 | }; 25 | 26 | const code = [ 27 | `// Navigate to ${params.url}`, 28 | `await page.goto('${params.url}');`, 29 | ]; 30 | 31 | return { 32 | action, 33 | code, 34 | captureSnapshot, 35 | waitForNetwork: false, 36 | }; 37 | }, 38 | }); 39 | 40 | const goBack: ToolFactory = captureSnapshot => defineTool({ 41 | capability: 'history', 42 | schema: { 43 | name: 'browserbase_navigate_back', 44 | description: 'Go back to the previous page', 45 | inputSchema: z.object({}), 46 | }, 47 | 48 | handle: async context => { 49 | const page = await context.getActivePage(); 50 | if (!page) { 51 | throw new Error('No active page found for goBack'); 52 | } 53 | const action = async (): Promise => { 54 | await page.goBack(); 55 | return { content: [{ type: 'text', text: 'Navigated back' }] }; 56 | }; 57 | const code = [ 58 | `// Navigate back`, 59 | `await page.goBack();`, 60 | ]; 61 | 62 | return { 63 | action, 64 | code, 65 | captureSnapshot, 66 | waitForNetwork: true, 67 | }; 68 | }, 69 | }); 70 | 71 | const goForward: ToolFactory = captureSnapshot => defineTool({ 72 | capability: 'history', 73 | schema: { 74 | name: 'browserbase_navigate_forward', 75 | description: 'Go forward to the next page', 76 | inputSchema: z.object({}), 77 | }, 78 | handle: async context => { 79 | const page = await context.getActivePage(); 80 | if (!page) { 81 | throw new Error('No active page found for goForward'); 82 | } 83 | const action = async (): Promise => { 84 | await page.goForward(); 85 | return { content: [{ type: 'text', text: 'Navigated forward' }] }; 86 | }; 87 | const code = [ 88 | `// Navigate forward`, 89 | `await page.goForward();`, 90 | ]; 91 | return { 92 | action, 93 | code, 94 | captureSnapshot, 95 | waitForNetwork: true, 96 | }; 97 | }, 98 | }); 99 | 100 | const captureSnapshotValue = true; 101 | 102 | export default [ 103 | navigate(captureSnapshotValue), 104 | goBack(captureSnapshotValue), 105 | goForward(captureSnapshotValue), 106 | ]; -------------------------------------------------------------------------------- /browserbase/src/tools/selectOption.ts: -------------------------------------------------------------------------------- 1 | import type { Tool } from "./tool.js"; 2 | 3 | // Placeholder function for select option tool, accepting the flag 4 | export function selectOption(captureSnapshot: boolean): Tool[] { 5 | // TODO: Implement selectOptionTool and potentially use flag 6 | return []; 7 | } 8 | export default selectOption; -------------------------------------------------------------------------------- /browserbase/src/tools/session.ts: -------------------------------------------------------------------------------- 1 | import { z } from "zod"; 2 | import type { Tool, ToolSchema, ToolResult } from "./tool.js"; 3 | import type { Context } from "../context.js"; 4 | import type { ToolActionResult } from "../context.js"; 5 | 6 | // Import SessionManager functions 7 | import { 8 | createNewBrowserSession, 9 | defaultSessionId, 10 | ensureDefaultSessionInternal, 11 | cleanupSession, 12 | type BrowserSession, 13 | } from "../sessionManager.js"; 14 | 15 | // --- Tool: Create Session --- 16 | const CreateSessionInputSchema = z.object({ 17 | // Keep sessionId optional, but clarify its role 18 | sessionId: z 19 | .string() 20 | .optional() 21 | .describe( 22 | "Optional session ID to use/reuse. If not provided or invalid, a new session is created." 23 | ), 24 | }); 25 | type CreateSessionInput = z.infer; 26 | 27 | const createSessionSchema: ToolSchema = { 28 | name: "browserbase_session_create", 29 | description: 30 | "Create or reuse a cloud browser session using Browserbase. Updates the active session.", 31 | inputSchema: CreateSessionInputSchema, 32 | }; 33 | 34 | 35 | // Handle function for CreateSession using SessionManager 36 | async function handleCreateSession( 37 | context: Context, 38 | params: CreateSessionInput 39 | ): Promise { 40 | const action = async (): Promise => { 41 | try { 42 | const config = context.config; // Get config from context 43 | let targetSessionId: string; 44 | 45 | if (params.sessionId) { 46 | const projectId = config.browserbaseProjectId || ''; 47 | targetSessionId = `${params.sessionId}_${projectId}`; 48 | process.stderr.write( 49 | `[tool.createSession] Attempting to create/assign session with specified ID: ${targetSessionId}` 50 | ); 51 | } else { 52 | targetSessionId = defaultSessionId; 53 | } 54 | 55 | let session: BrowserSession; 56 | if (targetSessionId === defaultSessionId) { 57 | session = await ensureDefaultSessionInternal(config); 58 | } else { 59 | session = await createNewBrowserSession(targetSessionId, config); 60 | } 61 | 62 | if (!session || !session.browser || !session.page || !session.sessionId) { 63 | throw new Error( 64 | `SessionManager failed to return a valid session object with actualSessionId for ID: ${targetSessionId}` 65 | ); 66 | } 67 | 68 | context.currentSessionId = targetSessionId; 69 | process.stderr.write( 70 | `[tool.connected] Successfully connected to Browserbase session. Internal ID: ${targetSessionId}, Actual ID: ${session.sessionId}` 71 | ); 72 | 73 | process.stderr.write(`[SessionManager] Browserbase Live Debugger URL: https://www.browserbase.com/sessions/${session.sessionId}`); 74 | 75 | return { 76 | content: [ 77 | { 78 | type: "text", 79 | text: `https://www.browserbase.com/sessions/${session.sessionId}`, 80 | }, 81 | ], 82 | }; 83 | } catch (error: any) { 84 | process.stderr.write( 85 | `[tool.createSession] Action failed: ${ 86 | error.message || String(error) 87 | }` 88 | ); 89 | // Re-throw to be caught by Context.run's error handling for actions 90 | throw new Error( 91 | `Failed to create Browserbase session: ${ 92 | error.message || String(error) 93 | }` 94 | ); 95 | } 96 | }; 97 | 98 | // Return the ToolResult structure expected by Context.run 99 | return { 100 | action: action, 101 | captureSnapshot: false, 102 | code: [], 103 | waitForNetwork: false, 104 | }; 105 | } 106 | 107 | // Define tool using handle 108 | const createSessionTool: Tool = { 109 | capability: "core", // Add capability 110 | schema: createSessionSchema, 111 | handle: handleCreateSession, 112 | }; 113 | 114 | // --- Tool: Close Session --- 115 | const CloseSessionInputSchema = z.object({ 116 | random_string: z 117 | .string() 118 | .optional() 119 | .describe("Dummy parameter to ensure consistent tool call format."), 120 | }); 121 | type CloseSessionInput = z.infer; 122 | 123 | const closeSessionSchema: ToolSchema = { 124 | name: "browserbase_session_close", 125 | description: 126 | "Closes the current Browserbase session by disconnecting the Playwright browser. This will terminate the recording for the session.", 127 | inputSchema: CloseSessionInputSchema, 128 | }; 129 | 130 | async function handleCloseSession( 131 | context: Context, 132 | _params: CloseSessionInput 133 | ): Promise { 134 | const code = [`// Attempting to close the current Browserbase session.`]; 135 | 136 | const action = async (): Promise => { 137 | // Store the current session ID before it's potentially changed. 138 | // This allows us to reference the original session ID later if needed. 139 | const previousSessionId = context.currentSessionId; // Capture the ID before any changes 140 | let browser: BrowserSession["browser"] | null = null; 141 | let browserClosedSuccessfully = false; 142 | let browserCloseErrorMessage = ""; 143 | 144 | // Step 1: Attempt to get the active browser instance WITHOUT creating a new one 145 | try { 146 | // Use read-only version to avoid creating new sessions 147 | browser = context.getActiveBrowserReadOnly(); 148 | } catch (error: any) { 149 | process.stderr.write( 150 | `[tool.closeSession] Error retrieving active browser (session ID was ${previousSessionId || 'default/unknown'}): ${error.message || String(error)}` 151 | ); 152 | // If we can't even get the browser, we can't close it. 153 | // We will still proceed to reset context. 154 | } 155 | 156 | // Step 2: If a browser instance was retrieved, attempt to close it 157 | if (browser) { 158 | try { 159 | process.stderr.write( 160 | `[tool.closeSession] Attempting to close browser for session: ${previousSessionId || 'default (actual might differ)'}` 161 | ); 162 | await browser.close(); 163 | browserClosedSuccessfully = true; 164 | process.stderr.write( 165 | `[tool.closeSession] Browser connection for session (was ${previousSessionId}) closed.` 166 | ); 167 | 168 | // Clean up the session from tracking 169 | cleanupSession(previousSessionId); 170 | 171 | process.stderr.write( 172 | `[tool.closeSession] View session replay at https://www.browserbase.com/sessions/${previousSessionId}` 173 | ); 174 | 175 | } catch (error: any) { 176 | browserCloseErrorMessage = error.message || String(error); 177 | process.stderr.write( 178 | `[tool.closeSession] Error during browser.close() for session (was ${previousSessionId}): ${browserCloseErrorMessage}` 179 | ); 180 | } 181 | } else { 182 | process.stderr.write( 183 | `[tool.closeSession] No active browser instance found to close. (Session ID in context was: ${previousSessionId || 'default/unknown'}).` 184 | ); 185 | } 186 | 187 | // Step 3: Always reset the context's current session ID to default 188 | // and clear snapshot if the previous session was a specific one. 189 | const oldContextSessionId = context.currentSessionId; // This should effectively be 'previousSessionId' 190 | context.currentSessionId = defaultSessionId; 191 | if (oldContextSessionId && oldContextSessionId !== defaultSessionId) { 192 | context.clearLatestSnapshot(); 193 | process.stderr.write( 194 | `[tool.closeSession] Snapshot cleared for previous session: ${oldContextSessionId}.` 195 | ); 196 | } 197 | process.stderr.write( 198 | `[tool.closeSession] Session context reset to default. Previous context session ID was ${oldContextSessionId || 'default/unknown'}.` 199 | ); 200 | 201 | // Step 4: Determine the result message 202 | if (browser && !browserClosedSuccessfully) { // An attempt was made to close, but it failed 203 | throw new Error( 204 | `Failed to close the Browserbase browser (session ID in context was ${previousSessionId || 'default/unknown'}). Error: ${browserCloseErrorMessage}. Session context has been reset to default.` 205 | ); 206 | } 207 | 208 | if (browserClosedSuccessfully) { // Browser was present and closed 209 | let successMessage = `Browserbase session (associated with context ID ${previousSessionId || 'default'}) closed successfully. Context reset to default.`; 210 | if (previousSessionId && previousSessionId !== defaultSessionId) { 211 | successMessage += ` If this was a uniquely named session (${previousSessionId}), view replay (if available) at https://browserbase.com/sessions`; 212 | } 213 | return { content: [{ type: "text", text: successMessage }] }; 214 | } 215 | 216 | // No browser was found, or browser was null initially. 217 | let infoMessage = "No active browser instance was found to close. Session context has been reset to default."; 218 | if (previousSessionId && previousSessionId !== defaultSessionId) { 219 | // This means a specific session was in context, but no browser for it. 220 | infoMessage = `No active browser found for session ID '${previousSessionId}' in context. The context has been reset to default.`; 221 | } 222 | return { content: [{ type: "text", text: infoMessage }] }; 223 | }; 224 | 225 | return { 226 | action: action, 227 | code: code, 228 | captureSnapshot: false, 229 | waitForNetwork: false, 230 | }; 231 | } 232 | 233 | const closeSessionTool: Tool = { 234 | capability: "core", 235 | schema: closeSessionSchema, 236 | handle: handleCloseSession, 237 | }; 238 | 239 | export default [createSessionTool, closeSessionTool]; -------------------------------------------------------------------------------- /browserbase/src/tools/snapshot.ts: -------------------------------------------------------------------------------- 1 | import { z } from "zod"; 2 | import type { 3 | TextContent, 4 | ImageContent, 5 | } from "@modelcontextprotocol/sdk/types.js"; 6 | import type { Locator, PageScreenshotOptions } from "playwright-core"; 7 | 8 | import { defineTool, type ToolResult, } from "./tool.js"; 9 | import type { Context, ToolActionResult } from "../context.js"; 10 | import { PageSnapshot } from "../pageSnapshot.js"; 11 | import { outputFile } from "../config.js"; 12 | 13 | // --- Tool: Snapshot --- 14 | const SnapshotInputSchema = z.object({}); 15 | type SnapshotInput = z.infer; 16 | 17 | const snapshot = defineTool({ 18 | capability: "core", 19 | schema: { 20 | name: "browserbase_snapshot", 21 | description: 22 | "Capture a new accessibility snapshot of the current page state. Use this if the page has changed to ensure subsequent actions use an up-to-date page representation.", 23 | inputSchema: SnapshotInputSchema, 24 | }, 25 | 26 | handle: async ( 27 | context: Context, 28 | params: SnapshotInput 29 | ): Promise => { 30 | const action = async (): Promise => { 31 | const content: (TextContent | ImageContent)[] = [ 32 | { type: "text", text: "Accessibility snapshot captured." }, 33 | ]; 34 | return { content }; 35 | }; 36 | 37 | return { 38 | action, 39 | code: [`// Request accessibility snapshot`], 40 | captureSnapshot: true, 41 | waitForNetwork: false, 42 | resultOverride: { 43 | content: [{ type: "text", text: "Accessibility snapshot initiated." }], 44 | }, 45 | }; 46 | }, 47 | }); 48 | 49 | // --- Element Schema & Types --- 50 | const elementSchema = z.object({ 51 | element: z.string().describe("Human-readable element description"), 52 | ref: z 53 | .string() 54 | .describe("Exact target element reference from the page snapshot"), 55 | }); 56 | type ElementInput = z.infer; 57 | 58 | // --- Tool: Click (Adapted Handle, Example Action) --- 59 | const click = defineTool({ 60 | capability: "core", 61 | schema: { 62 | name: "browserbase_click", 63 | description: "Perform click on a web page using ref", 64 | inputSchema: elementSchema, 65 | }, 66 | handle: async ( 67 | context: Context, 68 | params: ElementInput 69 | ): Promise => { 70 | // Get locator directly from snapshot 71 | const snapshot = context.snapshotOrDie(); 72 | const locator = snapshot.refLocator(params.ref); 73 | 74 | const code = [ 75 | `// Click ${params.element}`, 76 | // Use generateLocator for code string 77 | `// await page.${await generateLocator(locator)}.click();`, 78 | ]; 79 | 80 | const action = async (): Promise => { 81 | try { 82 | // Use the locator directly for the action 83 | await locator.click({ force: true, timeout: 30000 }); // Increased timeout like logs 84 | } catch (actionError) { 85 | const errorMessage = 86 | actionError instanceof Error 87 | ? actionError.message 88 | : String(actionError); 89 | throw new Error( 90 | `Failed to click element '${params.element}'. Error: ${errorMessage}` 91 | ); 92 | } 93 | return { 94 | content: [{ type: "text", text: `Clicked ${params.element}` }], 95 | }; 96 | }; 97 | 98 | return { 99 | code, 100 | action, 101 | captureSnapshot: true, 102 | waitForNetwork: true, 103 | }; 104 | }, 105 | }); 106 | 107 | // --- Tool: Drag (Adapted Handle, Example Action) --- 108 | const dragInputSchema = z.object({ 109 | startElement: z.string().describe("Source element description"), 110 | startRef: z 111 | .string() 112 | .describe("Exact source element reference from the page snapshot"), 113 | endElement: z.string().describe("Target element description"), 114 | endRef: z 115 | .string() 116 | .describe("Exact target element reference from the page snapshot"), 117 | }); 118 | type DragInput = z.infer; 119 | 120 | const drag = defineTool({ 121 | capability: "core", 122 | schema: { 123 | name: "browserbase_drag", 124 | description: "Perform drag and drop between two elements using ref.", 125 | inputSchema: dragInputSchema, 126 | }, 127 | handle: async (context: Context, params: DragInput): Promise => { 128 | // Get locators directly from snapshot 129 | const snapshot = context.snapshotOrDie(); 130 | const startLocator = snapshot.refLocator(params.startRef); 131 | const endLocator = snapshot.refLocator(params.endRef); 132 | 133 | const code = [ 134 | `// Drag ${params.startElement} to ${params.endElement}`, 135 | // Use generateLocator for code string 136 | `// await page.${await generateLocator( 137 | startLocator 138 | )}.dragTo(page.${await generateLocator(endLocator)});`, 139 | ]; 140 | 141 | const action = async (): Promise => { 142 | try { 143 | // Use locators directly for the action 144 | await startLocator.dragTo(endLocator, { timeout: 5000 }); 145 | } catch (dragError) { 146 | const errorMsg = 147 | dragError instanceof Error ? dragError.message : String(dragError); 148 | throw new Error( 149 | `Failed to drag '${params.startElement}' to '${params.endElement}'. Error: ${errorMsg}` 150 | ); 151 | } 152 | return { 153 | content: [ 154 | { 155 | type: "text", 156 | text: `Dragged ${params.startElement} to ${params.endElement}`, 157 | }, 158 | ], 159 | }; 160 | }; 161 | 162 | return { action, code, captureSnapshot: true, waitForNetwork: true }; 163 | }, 164 | }); 165 | 166 | // --- Tool: Hover (Adapted Handle, Example Action) --- 167 | const hover = defineTool({ 168 | capability: "core", 169 | schema: { 170 | name: "browserbase_hover", 171 | description: "Hover over element on page using ref.", 172 | inputSchema: elementSchema, 173 | }, 174 | handle: async ( 175 | context: Context, 176 | params: ElementInput 177 | ): Promise => { 178 | // Get locator directly from snapshot 179 | const snapshot = context.snapshotOrDie(); 180 | const locator = snapshot.refLocator(params.ref); 181 | 182 | const code = [ 183 | `// Hover over ${params.element}`, 184 | // Use generateLocator for code string 185 | `// await page.${await generateLocator(locator)}.hover();`, 186 | ]; 187 | 188 | const action = async (): Promise => { 189 | try { 190 | // Use locator directly for the action 191 | await locator.hover({ timeout: 5000 }); 192 | } catch (hoverError) { 193 | const errorMsg = 194 | hoverError instanceof Error ? hoverError.message : String(hoverError); 195 | throw new Error( 196 | `Failed to hover over element '${params.element}'. Error: ${errorMsg}` 197 | ); 198 | } 199 | return { 200 | content: [{ type: "text", text: `Hovered over: ${params.element}` }], 201 | }; 202 | }; 203 | 204 | return { action, code, captureSnapshot: true, waitForNetwork: true }; 205 | }, 206 | }); 207 | 208 | // --- Tool: Type (Adapted Handle, Example Action) --- 209 | const typeSchema = elementSchema.extend({ 210 | text: z.string().describe("Text to type into the element"), 211 | submit: z 212 | .boolean() 213 | .optional() 214 | .describe("Whether to submit entered text (press Enter after)"), 215 | slowly: z 216 | .boolean() 217 | .optional() 218 | .default(true) 219 | .describe("Whether to type one character at a time."), 220 | }); 221 | type TypeInput = z.infer; 222 | 223 | const type = defineTool({ 224 | capability: "core", 225 | schema: { 226 | name: "browserbase_type", 227 | description: "Type text into editable element using ref.", 228 | inputSchema: typeSchema, 229 | }, 230 | handle: async (context: Context, params: TypeInput): Promise => { 231 | // Get locator directly from snapshot 232 | const snapshot = context.snapshotOrDie(); 233 | const locator = snapshot.refLocator(params.ref); 234 | 235 | const code: string[] = []; 236 | const steps: (() => Promise)[] = []; 237 | 238 | if (params.slowly) { 239 | code.push( 240 | `// Press "${params.text}" sequentially into "${params.element}"` 241 | ); 242 | code.push( 243 | `// await page.${await generateLocator( 244 | locator 245 | )}.pressSequentially('${params.text.replace(/'/g, "\\'")}');` 246 | ); 247 | steps.push(() => 248 | locator.pressSequentially(params.text, { delay: 50 }) 249 | ); 250 | } else { 251 | code.push(`// Fill "${params.text}" into "${params.element}"`); 252 | code.push( 253 | `// await page.${await generateLocator( 254 | locator 255 | )}.fill('${params.text.replace(/'/g, "\\'")}');` 256 | ); 257 | steps.push(async () => { 258 | await locator.waitFor({ state: "visible"}); 259 | if (!(await locator.isEditable())) { 260 | throw new Error( 261 | `Element '${params.element}' was visible but not editable.` 262 | ); 263 | } 264 | await locator.fill("", { force: true, timeout: 5000 }); // Force empty fill first 265 | await locator.fill(params.text, { force: true, timeout: 5000 }); // Force fill with text 266 | }); 267 | } 268 | 269 | if (params.submit) { 270 | code.push(`// Submit text`); 271 | code.push( 272 | `// await page.${await generateLocator(locator)}.press('Enter');` 273 | ); 274 | steps.push(() => locator.press("Enter", { timeout: 5000 })); 275 | } 276 | 277 | const action = async (): Promise => { 278 | try { 279 | // Execute the steps sequentially 280 | await steps.reduce((acc, step) => acc.then(step), Promise.resolve()); 281 | } catch (typeError) { 282 | const errorMsg = 283 | typeError instanceof Error ? typeError.message : String(typeError); 284 | throw new Error( 285 | `Failed to type into or submit element '${params.element}'. Error: ${errorMsg}` 286 | ); 287 | } 288 | return { 289 | content: [ 290 | { 291 | type: "text", 292 | text: `Typed "${params.text}" into: ${params.element}${ 293 | params.submit ? " and submitted" : "" 294 | }`, 295 | }, 296 | ], 297 | }; 298 | }; 299 | 300 | return { action, code, captureSnapshot: true, waitForNetwork: true }; 301 | }, 302 | }); 303 | 304 | // --- Tool: Select Option (Adapted Handle, Example Action) --- 305 | const selectOptionSchema = elementSchema.extend({ 306 | values: z 307 | .array(z.string()) 308 | .describe("Array of values to select in the dropdown."), 309 | }); 310 | type SelectOptionInput = z.infer; 311 | 312 | const selectOption = defineTool({ 313 | capability: "core", 314 | schema: { 315 | name: "browserbase_select_option", 316 | description: "Select an option in a dropdown using ref.", 317 | inputSchema: selectOptionSchema, 318 | }, 319 | handle: async ( 320 | context: Context, 321 | params: SelectOptionInput 322 | ): Promise => { 323 | // Get locator directly from snapshot 324 | const snapshot = context.snapshotOrDie(); 325 | const locator = snapshot.refLocator(params.ref); 326 | 327 | const code = [ 328 | `// Select options [${params.values.join(", ")}] in ${params.element}`, 329 | // Remove javascript.formatObject, use simple JSON.stringify for code comment 330 | `// await page.${await generateLocator( 331 | locator 332 | )}.selectOption(${JSON.stringify(params.values)});`, 333 | ]; 334 | 335 | const action = async (): Promise => { 336 | try { 337 | // Use locator directly for the action 338 | await locator.waitFor({ state: "visible", timeout: 5000 }); 339 | await locator.selectOption(params.values, { timeout: 5000 }); 340 | } catch (selectError) { 341 | const errorMsg = 342 | selectError instanceof Error 343 | ? selectError.message 344 | : String(selectError); 345 | throw new Error( 346 | `Failed to select option(s) in element '${params.element}'. Error: ${errorMsg}` 347 | ); 348 | } 349 | return { 350 | content: [ 351 | { type: "text", text: `Selected options in: ${params.element}` }, 352 | ], 353 | }; 354 | }; 355 | 356 | return { action, code, captureSnapshot: true, waitForNetwork: true }; 357 | }, 358 | }); 359 | 360 | // --- Tool: Screenshot (Adapted Handle, Example Action) --- 361 | const screenshotSchema = z.object({ 362 | raw: z 363 | .boolean() 364 | .optional() 365 | .describe( 366 | "Whether to return without compression (PNG). Default is false (JPEG)." 367 | ), 368 | element: z 369 | .string() 370 | .optional() 371 | .describe("Human-readable element description."), 372 | ref: z 373 | .string() 374 | .optional() 375 | .describe("Exact target element reference from the page snapshot.") 376 | }); 377 | 378 | type ScreenshotInput = z.infer; 379 | 380 | const screenshot = defineTool({ 381 | capability: "core", 382 | schema: { 383 | name: "browserbase_take_screenshot", 384 | description: `Take a screenshot of the current page or element using ref.`, 385 | inputSchema: screenshotSchema, 386 | }, 387 | handle: async ( 388 | context: Context, 389 | params: ScreenshotInput 390 | ): Promise => { 391 | if (!!params.element !== !!params.ref) { 392 | throw new Error("Both element and ref must be provided or neither."); 393 | } 394 | 395 | const page = await context.getActivePage(); 396 | if (!page) { 397 | throw new Error("No active page found for screenshot"); 398 | } 399 | // Conditionally get snapshot only if ref is provided 400 | let pageSnapshot: PageSnapshot | null = null; 401 | if (params.ref) { 402 | pageSnapshot = context.snapshotOrDie(); 403 | } 404 | const fileType = params.raw ? "png" : "jpeg"; 405 | const fileName = await outputFile( 406 | context.config, 407 | `screenshot-${Date.now()}.${fileType}` 408 | ); 409 | 410 | const baseOptions: PageScreenshotOptions = { 411 | scale: "css", 412 | timeout: 15000, // Kept existing timeout 413 | }; 414 | 415 | let options: PageScreenshotOptions; 416 | 417 | if (fileType === "jpeg") { 418 | options = { 419 | ...baseOptions, 420 | type: "jpeg", 421 | quality: 50, // Quality is only for jpeg 422 | path: fileName, 423 | }; 424 | } else { 425 | options = { 426 | ...baseOptions, 427 | type: "png", 428 | path: fileName, 429 | }; 430 | } 431 | 432 | const isElementScreenshot = params.element && params.ref; 433 | const code: string[] = []; 434 | code.push( 435 | `// Screenshot ${ 436 | isElementScreenshot ? params.element : "viewport" 437 | } and save it as ${fileName}` 438 | ); 439 | 440 | // Conditionally get locator only if ref and snapshot are available 441 | const locator = 442 | params.ref && pageSnapshot ? pageSnapshot.refLocator(params.ref) : null; 443 | 444 | // Use JSON.stringify for code generation as javascript.formatObject is not available 445 | const optionsForCode = { ...options }; 446 | // delete optionsForCode.path; // Path is an internal detail for saving, not usually part of the "command" log 447 | 448 | if (locator) { 449 | code.push( 450 | `// await page.${await generateLocator( 451 | locator 452 | )}.screenshot(${JSON.stringify(optionsForCode)});` 453 | ); 454 | } else { 455 | code.push(`// await page.screenshot(${JSON.stringify(optionsForCode)});`); 456 | } 457 | 458 | const action = async (): Promise => { 459 | // Access config via context.config 460 | const includeBase64 = 461 | !context.config.tools?.browserbase_take_screenshot?.omitBase64; 462 | 463 | // Use the page directly for full page screenshots if locator is null 464 | const screenshotBuffer = locator 465 | ? await locator.screenshot(options) 466 | : await page.screenshot(options); 467 | 468 | if (includeBase64) { 469 | const rawBase64 = screenshotBuffer.toString("base64"); 470 | return { 471 | content: [ 472 | { 473 | type: "image", 474 | format: fileType, // format might be redundant if mimeType is present, but kept for now 475 | mimeType: fileType === "png" ? `image/png` : `image/jpeg`, 476 | data: rawBase64, 477 | }, 478 | ], 479 | }; 480 | } else { 481 | // If base64 is not included, return an empty content array 482 | return { content: [] }; 483 | } 484 | }; 485 | 486 | return { 487 | code, 488 | action, 489 | captureSnapshot: true, 490 | waitForNetwork: false, 491 | }; 492 | }, 493 | }); 494 | 495 | export async function generateLocator(locator: Locator): Promise { 496 | return (locator as any)._generateLocatorString(); 497 | } 498 | 499 | export default [snapshot, click, drag, hover, type, selectOption, screenshot]; -------------------------------------------------------------------------------- /browserbase/src/tools/tool.ts: -------------------------------------------------------------------------------- 1 | import type { ImageContent, TextContent } from '@modelcontextprotocol/sdk/types.js'; 2 | import type { z } from 'zod'; 3 | import type { Context } from '../context.js'; 4 | import type * as playwright from 'playwright'; 5 | import type { ToolCapability } from '../config.js'; 6 | import type { BrowserSession } from '../sessionManager.js'; 7 | import type { Server } from '@modelcontextprotocol/sdk/server/index.js'; 8 | import type { Config } from '../config.js'; 9 | 10 | export type ToolSchema = { 11 | name: string; 12 | description: string; 13 | inputSchema: Input; 14 | }; 15 | 16 | // Export InputType 17 | export type InputType = z.Schema; 18 | 19 | export type FileUploadModalState = { 20 | type: 'fileChooser'; 21 | description: string; 22 | fileChooser: playwright.FileChooser; 23 | }; 24 | 25 | export type DialogModalState = { 26 | type: 'dialog'; 27 | description: string; 28 | dialog: playwright.Dialog; 29 | }; 30 | 31 | export type ModalState = FileUploadModalState | DialogModalState; 32 | 33 | export type ToolActionResult = { content?: (ImageContent | TextContent)[] } | undefined | void; 34 | 35 | export type ToolResult = { 36 | code: string[]; 37 | action?: () => Promise; 38 | captureSnapshot: boolean; 39 | waitForNetwork: boolean; 40 | resultOverride?: ToolActionResult; 41 | }; 42 | 43 | export type Tool = { 44 | capability: ToolCapability; 45 | schema: ToolSchema; 46 | clearsModalState?: ModalState['type']; 47 | handle: (context: Context, params: z.output) => Promise; 48 | }; 49 | 50 | export type ToolFactory = (snapshot: boolean) => Tool; 51 | 52 | export function defineTool(tool: Tool): Tool { 53 | return tool; 54 | } 55 | 56 | export {}; // Ensure this is treated as a module 57 | 58 | // Represents the execution context for a tool 59 | // Might include the page, server instance for notifications, etc. 60 | export interface ToolContext { 61 | page: BrowserSession['page']; 62 | browser: BrowserSession['browser']; 63 | server: Server; 64 | sessionId: string; 65 | config: Config; 66 | context: Context; // The main context instance 67 | } -------------------------------------------------------------------------------- /browserbase/src/tools/toolUtils.ts: -------------------------------------------------------------------------------- 1 | import { CallToolResult, TextContent } from "@modelcontextprotocol/sdk/types.js"; 2 | 3 | /** 4 | * Creates a standardized error result for tool calls. 5 | * @param message The error message text. 6 | * @param toolName Optional tool name for logging/context. 7 | * @returns CallToolResult object indicating an error. 8 | */ 9 | export function createErrorResult(message: string, toolName?: string): CallToolResult { 10 | const prefix = toolName ? `[${toolName}] Error: ` : "Error: "; 11 | // console.error(prefix + message); 12 | return { 13 | content: [{ type: "text", text: prefix + message } as TextContent], 14 | isError: true, 15 | }; 16 | } 17 | 18 | /** 19 | * Creates a standardized success result with text content. 20 | * @param message The success message text. 21 | * @param toolName Optional tool name for logging/context. 22 | * @returns CallToolResult object indicating success. 23 | */ 24 | export function createSuccessResult(message: string, toolName?: string): CallToolResult { 25 | const prefix = toolName ? `[${toolName}] Success: ` : "Success: "; 26 | // console.log(prefix + message); // Log success 27 | return { 28 | content: [{ type: "text", text: message } as TextContent], 29 | isError: false, 30 | }; 31 | } -------------------------------------------------------------------------------- /browserbase/src/tools/utils.ts: -------------------------------------------------------------------------------- 1 | import type * as playwright from 'playwright'; 2 | import type { Context } from '../context.js'; 3 | 4 | export async function waitForCompletion(context: Context, page: playwright.Page, callback: () => Promise): Promise { 5 | const requests = new Set(); 6 | let frameNavigated = false; 7 | let waitCallback: () => void = () => {}; 8 | const waitBarrier = new Promise(f => { waitCallback = f; }); 9 | 10 | const requestListener = (request: playwright.Request) => requests.add(request); 11 | const requestFinishedListener = (request: playwright.Request) => { 12 | requests.delete(request); 13 | if (!requests.size) 14 | waitCallback(); 15 | }; 16 | 17 | const frameNavigateListener = (frame: playwright.Frame) => { 18 | if (frame.parentFrame()) 19 | return; 20 | frameNavigated = true; 21 | dispose(); 22 | clearTimeout(timeout); 23 | void frame.waitForLoadState('load').then(() => { 24 | waitCallback(); 25 | }); 26 | }; 27 | 28 | const onTimeout = () => { 29 | dispose(); 30 | waitCallback(); 31 | }; 32 | 33 | page.on('request', requestListener); 34 | page.on('requestfinished', requestFinishedListener); 35 | page.on('framenavigated', frameNavigateListener); 36 | const timeout = setTimeout(onTimeout, 10000); 37 | 38 | const dispose = () => { 39 | page.off('request', requestListener); 40 | page.off('requestfinished', requestFinishedListener); 41 | page.off('framenavigated', frameNavigateListener); 42 | clearTimeout(timeout); 43 | }; 44 | 45 | try { 46 | const result = await callback(); 47 | if (!requests.size && !frameNavigated) 48 | waitCallback(); 49 | await waitBarrier; 50 | await context.waitForTimeout(1000); 51 | return result; 52 | } finally { 53 | dispose(); 54 | } 55 | } 56 | 57 | export function sanitizeForFilePath(s: string) { 58 | return s.replace(/[^a-zA-Z0-9_.-]/g, '_'); // More robust sanitization 59 | } -------------------------------------------------------------------------------- /browserbase/src/transport.ts: -------------------------------------------------------------------------------- 1 | import http from 'node:http'; 2 | import assert from 'node:assert'; 3 | import crypto from 'node:crypto'; 4 | 5 | import { ServerList } from './server.js'; 6 | import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; 7 | import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js'; 8 | import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js'; 9 | 10 | export async function startStdioTransport(serverList: ServerList) { 11 | const server = await serverList.create(); 12 | await server.connect(new StdioServerTransport()); 13 | } 14 | 15 | async function handleSSE(req: http.IncomingMessage, res: http.ServerResponse, url: URL, serverList: ServerList, sessions: Map) { 16 | if (req.method === 'POST') { 17 | const sessionId = url.searchParams.get('sessionId'); 18 | if (!sessionId) { 19 | res.statusCode = 400; 20 | return res.end('Missing sessionId'); 21 | } 22 | 23 | const transport = sessions.get(sessionId); 24 | if (!transport) { 25 | res.statusCode = 404; 26 | return res.end('Session not found'); 27 | } 28 | 29 | return await transport.handlePostMessage(req, res); 30 | } else if (req.method === 'GET') { 31 | const transport = new SSEServerTransport('/sse', res); 32 | sessions.set(transport.sessionId, transport); 33 | const server = await serverList.create(); 34 | res.on('close', () => { 35 | sessions.delete(transport.sessionId); 36 | serverList.close(server).catch(e => { 37 | // eslint-disable-next-line no-console 38 | // console.error(e); 39 | }); 40 | }); 41 | return await server.connect(transport); 42 | } 43 | 44 | res.statusCode = 405; 45 | res.end('Method not allowed'); 46 | } 47 | 48 | async function handleStreamable(req: http.IncomingMessage, res: http.ServerResponse, serverList: ServerList, sessions: Map) { 49 | const sessionId = req.headers['mcp-session-id'] as string | undefined; 50 | if (sessionId) { 51 | const transport = sessions.get(sessionId); 52 | if (!transport) { 53 | res.statusCode = 404; 54 | res.end('Session not found'); 55 | return; 56 | } 57 | return await transport.handleRequest(req, res); 58 | } 59 | 60 | if (req.method === 'POST') { 61 | const transport = new StreamableHTTPServerTransport({ 62 | sessionIdGenerator: () => crypto.randomUUID(), 63 | onsessioninitialized: sessionId => { 64 | sessions.set(sessionId, transport); 65 | } 66 | }); 67 | transport.onclose = () => { 68 | if (transport.sessionId) 69 | sessions.delete(transport.sessionId); 70 | }; 71 | const server = await serverList.create(); 72 | await server.connect(transport); 73 | return await transport.handleRequest(req, res); 74 | } 75 | 76 | res.statusCode = 400; 77 | res.end('Invalid request'); 78 | } 79 | 80 | export function startHttpTransport(port: number, hostname: string | undefined, serverList: ServerList) { 81 | const sseSessions = new Map(); 82 | const streamableSessions = new Map(); 83 | const httpServer = http.createServer(async (req, res) => { 84 | const url = new URL(`http://localhost${req.url}`); 85 | if (url.pathname.startsWith('/mcp')) 86 | await handleStreamable(req, res, serverList, streamableSessions); 87 | else 88 | await handleSSE(req, res, url, serverList, sseSessions); 89 | }); 90 | httpServer.listen(port, hostname, () => { 91 | const address = httpServer.address(); 92 | assert(address, 'Could not bind server socket'); 93 | let url: string; 94 | if (typeof address === 'string') { 95 | url = address; 96 | } else { 97 | const resolvedPort = address.port; 98 | let resolvedHost = address.family === 'IPv4' ? address.address : `[${address.address}]`; 99 | if (resolvedHost === '0.0.0.0' || resolvedHost === '[::]') 100 | resolvedHost = 'localhost'; 101 | url = `http://${resolvedHost}:${resolvedPort}`; 102 | } 103 | const message = [ 104 | `Listening on ${url}`, 105 | 'Put this in your client config:', 106 | JSON.stringify({ 107 | 'mcpServers': { 108 | 'browserbase': { 109 | 'url': `${url}/sse` 110 | } 111 | } 112 | }, undefined, 2), 113 | 'If your client supports streamable HTTP, you can use the /mcp endpoint instead.', 114 | ].join('\n'); 115 | // eslint-disable-next-line no-console 116 | console.log(message); 117 | }); 118 | } -------------------------------------------------------------------------------- /browserbase/tests/.gitkeep: -------------------------------------------------------------------------------- 1 | # Placeholder for tests -------------------------------------------------------------------------------- /browserbase/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2022", 4 | "module": "NodeNext", 5 | "moduleResolution": "NodeNext", 6 | "strict": true, 7 | "esModuleInterop": true, 8 | "skipLibCheck": true, 9 | "forceConsistentCasingInFileNames": true, 10 | "resolveJsonModule": true, 11 | "outDir": "dist", 12 | "rootDir": "src" 13 | }, 14 | "include": ["src/**/*.ts"], 15 | "exclude": ["node_modules"] 16 | } -------------------------------------------------------------------------------- /browserbase/utils/.gitkeep: -------------------------------------------------------------------------------- 1 | # Placeholder for utility scripts -------------------------------------------------------------------------------- /stagehand/README.md: -------------------------------------------------------------------------------- 1 | # Stagehand MCP Server 2 | 3 | ![cover](../assets/stagehand-mcp.png) 4 | 5 | A Model Context Protocol (MCP) server that provides AI-powered web automation capabilities using [Stagehand](https://github.com/browserbase/stagehand). This server enables LLMs to interact with web pages, perform actions, extract data, and observe possible actions in a real browser environment. 6 | 7 | ## Get Started 8 | 9 | 1. Run `npm install` to install the necessary dependencies, then run `npm run build` to get `dist/index.js`. 10 | 11 | 2. Set up your Claude Desktop configuration to use the server. 12 | 13 | ```json 14 | { 15 | "mcpServers": { 16 | "stagehand": { 17 | "command": "node", 18 | "args": ["path/to/mcp-server-browserbase/stagehand/dist/index.js"], 19 | "env": { 20 | "BROWSERBASE_API_KEY": "", 21 | "BROWSERBASE_PROJECT_ID": "", 22 | "OPENAI_API_KEY": "", 23 | "CONTEXT_ID": "" 24 | } 25 | } 26 | } 27 | } 28 | ``` 29 | or, for running locally, first [**open Chrome in debug mode**](https://docs.stagehand.dev/examples/customize_browser#use-your-personal-browser) like so: 30 | 31 | `open -a "Google Chrome" --args --remote-debugging-port=9222` 32 | ```json 33 | { 34 | "mcpServers": { 35 | "stagehand": { 36 | "command": "node", 37 | "args": ["path/to/mcp-server-browserbase/stagehand/dist/index.js"], 38 | "env": { 39 | "OPENAI_API_KEY": "", 40 | "LOCAL_CDP_URL": "http://localhost:9222" 41 | } 42 | } 43 | } 44 | } 45 | ``` 46 | > 💡 Check out our [documentation](https://docs.stagehand.dev/examples/customize_browser#use-your-personal-browser) for getting your local CDP url! 47 | 48 | 3. Restart your Claude Desktop app and you should see the tools available clicking the 🔨 icon. 49 | 50 | 4. Start using the tools! Below is a demo video of Claude doing a Google search for OpenAI using stagehand MCP server and Browserbase for a remote headless browser. 51 | 52 | 60 | 61 | ## Tools 62 | 63 | ### Stagehand commands 64 | 65 | - **stagehand_navigate** 66 | - Navigate to any URL in the browser 67 | - Input: 68 | - `url` (string): The URL to navigate to 69 | 70 | - **stagehand_act** 71 | - Perform an action on the web page 72 | - Inputs: 73 | - `action` (string): The action to perform (e.g., "click the login button") 74 | - `variables` (object, optional): Variables used in the action template 75 | 76 | - **stagehand_extract** 77 | - Extract data from the web page 78 | 79 | - **stagehand_observe** 80 | - Observe actions that can be performed on the web page 81 | - Input: 82 | - `instruction` (string, optional): Instruction for observation 83 | 84 | ### Resources 85 | 86 | The server provides access to one resource: 87 | 88 | 1. **Console Logs** (`console://logs`) 89 | 90 | - Browser console output in text format 91 | - Includes all console messages from the browser 92 | 93 | 2. **Screenshots** (`screenshot://`) 94 | - PNG images of captured screenshots 95 | - Accessible via the screenshot name specified during capture 96 | 97 | ## File Structure 98 | 99 | The codebase is organized into the following modules: 100 | 101 | - **index.ts**: Entry point that initializes and runs the server. 102 | - **server.ts**: Core server logic, including server creation, configuration, and request handling. 103 | - **tools.ts**: Definitions and implementations of tools that can be called by MCP clients. 104 | - **prompts.ts**: Prompt templates that can be used by MCP clients. 105 | - **resources.ts**: Resource definitions and handlers for resource-related requests. 106 | - **logging.ts**: Comprehensive logging system with rotation and formatting capabilities. 107 | - **utils.ts**: Utility functions including JSON Schema to Zod schema conversion and message sanitization. 108 | 109 | ## Module Descriptions 110 | 111 | ### index.ts 112 | 113 | The main entry point for the application. It: 114 | - Initializes the logging system 115 | - Creates the server instance 116 | - Connects to the stdio transport to receive and respond to requests 117 | 118 | ### server.ts 119 | 120 | Contains core server functionality: 121 | - Creates and configures the MCP server 122 | - Defines Stagehand configuration 123 | - Sets up request handlers for all MCP operations 124 | - Manages the Stagehand browser instance 125 | 126 | ### tools.ts 127 | 128 | Implements the tools that can be called by MCP clients: 129 | - `stagehand_navigate`: Navigate to URLs 130 | - `stagehand_act`: Perform actions on web elements 131 | - `stagehand_extract`: Extract structured data from web pages 132 | - `stagehand_observe`: Observe elements on the page 133 | - `screenshot`: Take screenshots of the current page 134 | 135 | ### prompts.ts 136 | 137 | Defines prompt templates for MCP clients: 138 | - `click_search_button`: Template for clicking search buttons 139 | 140 | ### resources.ts 141 | 142 | Manages resources in the MCP protocol: 143 | - Currently provides empty resource and resource template lists 144 | 145 | ### logging.ts 146 | 147 | Implements a comprehensive logging system: 148 | - File-based logging with rotation 149 | - In-memory operation logs 150 | - Log formatting and sanitization 151 | - Console logging for debugging 152 | 153 | ### utils.ts 154 | 155 | Provides utility functions: 156 | - `jsonSchemaToZod`: Converts JSON Schema to Zod schema for validation 157 | - `sanitizeMessage`: Ensures messages are properly formatted JSON 158 | 159 | ## Key Features 160 | 161 | - AI-powered web automation 162 | - Perform actions on web pages 163 | - Extract structured data from web pages 164 | - Observe possible actions on web pages 165 | - Simple and extensible API 166 | - Model-agnostic support for various LLM providers 167 | 168 | ## Environment Variables 169 | 170 | - `BROWSERBASE_API_KEY`: API key for BrowserBase authentication 171 | - `BROWSERBASE_PROJECT_ID`: Project ID for BrowserBase 172 | - `OPENAI_API_KEY`: API key for OpenAI (used by Stagehand) 173 | - `DEBUG`: Enable debug logging 174 | 175 | ## MCP Capabilities 176 | 177 | This server implements the following MCP capabilities: 178 | 179 | - **Tools**: Allows clients to call tools that control a browser instance 180 | - **Prompts**: Provides prompt templates for common operations 181 | - **Resources**: (Currently empty but structured for future expansion) 182 | - **Logging**: Provides detailed logging capabilities 183 | 184 | For more information about the Model Context Protocol, visit: 185 | - [MCP Documentation](https://modelcontextprotocol.io/docs) 186 | - [MCP Specification](https://spec.modelcontextprotocol.io/) 187 | 188 | ## License 189 | 190 | Licensed under the MIT License. 191 | 192 | Copyright 2024 Browserbase, Inc. 193 | -------------------------------------------------------------------------------- /stagehand/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@browserbasehq/mcp-stagehand", 3 | "version": "0.5.1", 4 | "description": "MCP server for AI web browser automation using Stagehand", 5 | "license": "MIT", 6 | "author": "Browserbase, Inc. (https://www.browserbase.com/)", 7 | "homepage": "https://modelcontextprotocol.io", 8 | "bugs": "https://github.com/modelcontextprotocol/servers/issues", 9 | "type": "module", 10 | "bin": { 11 | "mcp-server-stagehand": "dist/index.js" 12 | }, 13 | "files": [ 14 | "dist" 15 | ], 16 | "scripts": { 17 | "build": "tsc && shx chmod +x dist/*.js", 18 | "prepare": "npm run build", 19 | "watch": "tsc --watch" 20 | }, 21 | "dependencies": { 22 | "@browserbasehq/sdk": "^2.0.0", 23 | "@browserbasehq/stagehand": "^2.0.0", 24 | "@modelcontextprotocol/sdk": "^1.0.3", 25 | "@modelcontextprotocol/server-stagehand": "file:", 26 | "@playwright/test": "^1.49.0" 27 | }, 28 | "devDependencies": { 29 | "shx": "^0.3.4", 30 | "typescript": "^5.6.2" 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /stagehand/src/index.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; 4 | import { createServer } from "./server.js"; 5 | import { ensureLogDirectory, registerExitHandlers, scheduleLogRotation, setupLogRotation } from "./logging.js"; 6 | 7 | // Run setup for logging 8 | ensureLogDirectory(); 9 | setupLogRotation(); 10 | scheduleLogRotation(); 11 | registerExitHandlers(); 12 | 13 | // Run the server 14 | async function runServer() { 15 | const server = createServer(); 16 | const transport = new StdioServerTransport(); 17 | await server.connect(transport); 18 | server.sendLoggingMessage({ 19 | level: "info", 20 | data: "Stagehand MCP server is ready to accept requests", 21 | }); 22 | } 23 | 24 | runServer().catch((error) => { 25 | const errorMsg = error instanceof Error ? error.message : String(error); 26 | console.error(errorMsg); 27 | }); 28 | -------------------------------------------------------------------------------- /stagehand/src/logging.ts: -------------------------------------------------------------------------------- 1 | import fs from 'fs'; 2 | import path from 'path'; 3 | import { fileURLToPath } from 'url'; 4 | import type { LogLine } from "@browserbasehq/stagehand"; 5 | import { Server } from "@modelcontextprotocol/sdk/server/index.js"; 6 | 7 | // Get the directory name for the current module 8 | const __dirname = path.dirname(fileURLToPath(import.meta.url)); 9 | 10 | // Configure logging 11 | const LOG_DIR = path.join(__dirname, '../logs'); 12 | const LOG_FILE = path.join(LOG_DIR, `stagehand-${new Date().toISOString().split('T')[0]}.log`); 13 | const MAX_LOG_FILES = 10; // Maximum number of log files to keep 14 | const MAX_LOG_SIZE = 10 * 1024 * 1024; // 10MB max log file size 15 | 16 | // Queue for batching log writes 17 | let logQueue: string[] = []; 18 | let logWriteTimeout: NodeJS.Timeout | null = null; 19 | const LOG_FLUSH_INTERVAL = 1000; // Flush logs every second 20 | const MAX_OPERATION_LOGS = 1000; // Prevent operation logs from growing too large 21 | 22 | // Operation logs stored in memory 23 | export const operationLogs: string[] = []; 24 | export const consoleLogs: string[] = []; 25 | 26 | // Reference to server instance for logging 27 | let serverInstance: Server | undefined; 28 | 29 | // Set server for logging 30 | export function setServerInstance(server: Server) { 31 | serverInstance = server; 32 | } 33 | 34 | // Get server instance for notifications and logging 35 | export function getServerInstance() { 36 | return serverInstance; 37 | } 38 | 39 | // Ensure log directory exists 40 | export function ensureLogDirectory() { 41 | if (!fs.existsSync(LOG_DIR)) { 42 | fs.mkdirSync(LOG_DIR, { recursive: true }); 43 | } 44 | } 45 | 46 | // Setup log rotation management 47 | export function setupLogRotation() { 48 | try { 49 | // Check if current log file exceeds max size 50 | if (fs.existsSync(LOG_FILE) && fs.statSync(LOG_FILE).size > MAX_LOG_SIZE) { 51 | const timestamp = new Date().toISOString().replace(/:/g, '-'); 52 | const rotatedLogFile = path.join(LOG_DIR, `stagehand-${timestamp}.log`); 53 | fs.renameSync(LOG_FILE, rotatedLogFile); 54 | } 55 | 56 | // Clean up old log files if we have too many 57 | const logFiles = fs.readdirSync(LOG_DIR) 58 | .filter(file => file.startsWith('stagehand-') && file.endsWith('.log')) 59 | .map(file => path.join(LOG_DIR, file)) 60 | .sort((a, b) => fs.statSync(b).mtime.getTime() - fs.statSync(a).mtime.getTime()); 61 | 62 | if (logFiles.length > MAX_LOG_FILES) { 63 | logFiles.slice(MAX_LOG_FILES).forEach(file => { 64 | try { 65 | fs.unlinkSync(file); 66 | } catch (err) { 67 | console.error(`Failed to delete old log file ${file}:`, err); 68 | } 69 | }); 70 | } 71 | } catch (err) { 72 | console.error('Error in log rotation:', err); 73 | } 74 | } 75 | 76 | // Flush logs to disk asynchronously 77 | export async function flushLogs() { 78 | if (logQueue.length === 0) return; 79 | 80 | const logsToWrite = logQueue.join('\n') + '\n'; 81 | logQueue = []; 82 | logWriteTimeout = null; 83 | 84 | try { 85 | await fs.promises.appendFile(LOG_FILE, logsToWrite); 86 | 87 | // Check if we need to rotate logs after write 88 | const stats = await fs.promises.stat(LOG_FILE); 89 | if (stats.size > MAX_LOG_SIZE) { 90 | setupLogRotation(); 91 | } 92 | } catch (err) { 93 | console.error('Failed to write logs to file:', err); 94 | // If write fails, try to use sync version as fallback 95 | try { 96 | fs.appendFileSync(LOG_FILE, logsToWrite); 97 | } catch (syncErr) { 98 | console.error('Failed to write logs synchronously:', syncErr); 99 | } 100 | } 101 | } 102 | 103 | // Helper function to convert LogLine to string 104 | export function logLineToString(logLine: LogLine): string { 105 | const timestamp = logLine.timestamp ? new Date(logLine.timestamp).toISOString() : new Date().toISOString(); 106 | const level = logLine.level !== undefined ? 107 | (logLine.level === 0 ? 'DEBUG' : 108 | logLine.level === 1 ? 'INFO' : 109 | logLine.level === 2 ? 'ERROR' : 'UNKNOWN') : 'UNKNOWN'; 110 | return `[${timestamp}] [${level}] ${logLine.message || ''}`; 111 | } 112 | 113 | // Main logging function 114 | export function log(message: string, level: 'info' | 'error' | 'debug' = 'info') { 115 | const timestamp = new Date().toISOString(); 116 | const logMessage = `[${timestamp}] [${level.toUpperCase()}] ${message}`; 117 | 118 | // Manage operation logs with size limit 119 | operationLogs.push(logMessage); 120 | if (operationLogs.length > MAX_OPERATION_LOGS) { 121 | // Keep most recent logs but trim the middle to maintain context 122 | const half = Math.floor(MAX_OPERATION_LOGS / 2); 123 | // Keep first 100 and last (MAX_OPERATION_LOGS - 100) logs 124 | const firstLogs = operationLogs.slice(0, 100); 125 | const lastLogs = operationLogs.slice(operationLogs.length - (MAX_OPERATION_LOGS - 100)); 126 | operationLogs.length = 0; 127 | operationLogs.push(...firstLogs); 128 | operationLogs.push(`[...${operationLogs.length - MAX_OPERATION_LOGS} logs truncated...]`); 129 | operationLogs.push(...lastLogs); 130 | } 131 | 132 | // Queue log for async writing 133 | logQueue.push(logMessage); 134 | 135 | // Setup timer to flush logs if not already scheduled 136 | if (!logWriteTimeout) { 137 | logWriteTimeout = setTimeout(flushLogs, LOG_FLUSH_INTERVAL); 138 | } 139 | 140 | // Console output to stderr for debugging 141 | if (process.env.DEBUG || level === 'error') { 142 | console.error(logMessage); 143 | } 144 | 145 | // Send logging message to client for important events 146 | if (serverInstance && (level === 'info' || level === 'error')) { 147 | serverInstance.sendLoggingMessage({ 148 | level: level, 149 | data: message, 150 | }); 151 | } 152 | } 153 | 154 | // Format logs for response 155 | export function formatLogResponse(logs: string[]): string { 156 | if (logs.length <= 100) { 157 | return logs.join("\n"); 158 | } 159 | 160 | // For very long logs, include first and last parts with truncation notice 161 | const first = logs.slice(0, 50); 162 | const last = logs.slice(-50); 163 | return [ 164 | ...first, 165 | `\n... ${logs.length - 100} more log entries (truncated) ...\n`, 166 | ...last 167 | ].join("\n"); 168 | } 169 | 170 | // Log request 171 | export function logRequest(type: string, params: any) { 172 | const requestLog = { 173 | timestamp: new Date().toISOString(), 174 | type, 175 | params, 176 | }; 177 | log(`REQUEST: ${JSON.stringify(requestLog, null, 2)}`, 'debug'); 178 | } 179 | 180 | // Log response 181 | export function logResponse(type: string, response: any) { 182 | const responseLog = { 183 | timestamp: new Date().toISOString(), 184 | type, 185 | response, 186 | }; 187 | log(`RESPONSE: ${JSON.stringify(responseLog, null, 2)}`, 'debug'); 188 | } 189 | 190 | // Register handlers for process exit 191 | export function registerExitHandlers() { 192 | // Make sure logs are flushed when the process exits 193 | process.on('exit', () => { 194 | if (logQueue.length > 0) { 195 | try { 196 | fs.appendFileSync(LOG_FILE, logQueue.join('\n') + '\n'); 197 | } catch (err) { 198 | console.error('Failed to flush logs on exit:', err); 199 | } 200 | } 201 | }); 202 | 203 | process.on('SIGINT', () => { 204 | // Flush logs and exit 205 | if (logQueue.length > 0) { 206 | try { 207 | fs.appendFileSync(LOG_FILE, logQueue.join('\n') + '\n'); 208 | } catch (err) { 209 | console.error('Failed to flush logs on SIGINT:', err); 210 | } 211 | } 212 | process.exit(0); 213 | }); 214 | } 215 | 216 | // Schedule periodic log rotation 217 | export function scheduleLogRotation() { 218 | // Add log rotation check periodically 219 | setInterval(() => { 220 | setupLogRotation(); 221 | }, 15 * 60 * 1000); // Check every 15 minutes 222 | } -------------------------------------------------------------------------------- /stagehand/src/prompts.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Prompts module for the Stagehand MCP server 3 | * Contains prompts definitions and handlers for prompt-related requests 4 | */ 5 | 6 | // Define the prompts 7 | export const PROMPTS = [ 8 | { 9 | name: "click_search_button", 10 | description: "A prompt template for clicking on a search button", 11 | arguments: [] // No arguments required for this specific prompt 12 | } 13 | ]; 14 | 15 | /** 16 | * Get a prompt by name 17 | * @param name The name of the prompt to retrieve 18 | * @returns The prompt definition or throws an error if not found 19 | */ 20 | export function getPrompt(name: string) { 21 | if (name === "click_search_button") { 22 | return { 23 | description: "This prompt provides instructions for clicking on a search button", 24 | messages: [ 25 | { 26 | role: "user", 27 | content: { 28 | type: "text", 29 | text: "Please click on the search button" 30 | } 31 | } 32 | ] 33 | }; 34 | } 35 | 36 | throw new Error(`Invalid prompt name: ${name}`); 37 | } -------------------------------------------------------------------------------- /stagehand/src/resources.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Resources module for the Stagehand MCP server 3 | * Contains resources definitions and handlers for resource-related requests 4 | */ 5 | 6 | // Define the resources 7 | export const RESOURCES = []; 8 | 9 | // Define the resource templates 10 | export const RESOURCE_TEMPLATES = []; 11 | 12 | // Store screenshots in a map 13 | export const screenshots = new Map(); 14 | 15 | /** 16 | * Handle listing resources request 17 | * @returns A list of available resources including screenshots 18 | */ 19 | export function listResources() { 20 | return { 21 | resources: [ 22 | ...Array.from(screenshots.keys()).map((name) => ({ 23 | uri: `screenshot://${name}`, 24 | mimeType: "image/png", 25 | name: `Screenshot: ${name}`, 26 | })), 27 | ] 28 | }; 29 | } 30 | 31 | /** 32 | * Handle listing resource templates request 33 | * @returns An empty resource templates list response 34 | */ 35 | export function listResourceTemplates() { 36 | return { resourceTemplates: [] }; 37 | } 38 | 39 | /** 40 | * Read a resource by its URI 41 | * @param uri The URI of the resource to read 42 | * @returns The resource content or throws if not found 43 | */ 44 | export function readResource(uri: string) { 45 | if (uri.startsWith("screenshot://")) { 46 | const name = uri.split("://")[1]; 47 | const screenshot = screenshots.get(name); 48 | if (screenshot) { 49 | return { 50 | contents: [ 51 | { 52 | uri, 53 | mimeType: "image/png", 54 | blob: screenshot, 55 | }, 56 | ], 57 | }; 58 | } 59 | } 60 | 61 | throw new Error(`Resource not found: ${uri}`); 62 | } -------------------------------------------------------------------------------- /stagehand/src/server.ts: -------------------------------------------------------------------------------- 1 | import { Server } from "@modelcontextprotocol/sdk/server/index.js"; 2 | import { 3 | CallToolRequestSchema, 4 | ListToolsRequestSchema, 5 | ListResourcesRequestSchema, 6 | ListResourceTemplatesRequestSchema, 7 | ListPromptsRequestSchema, 8 | GetPromptRequestSchema, 9 | ReadResourceRequestSchema, 10 | } from "@modelcontextprotocol/sdk/types.js"; 11 | import { Stagehand } from "@browserbasehq/stagehand"; 12 | import type { ConstructorParams } from "@browserbasehq/stagehand"; 13 | 14 | import { sanitizeMessage } from "./utils.js"; 15 | import { 16 | log, 17 | logRequest, 18 | logResponse, 19 | operationLogs, 20 | setServerInstance, 21 | } from "./logging.js"; 22 | import { TOOLS, handleToolCall } from "./tools.js"; 23 | import { PROMPTS, getPrompt } from "./prompts.js"; 24 | import { 25 | listResources, 26 | listResourceTemplates, 27 | readResource, 28 | } from "./resources.js"; 29 | 30 | // Define Stagehand configuration 31 | export const stagehandConfig: ConstructorParams = { 32 | env: 33 | process.env.BROWSERBASE_API_KEY && process.env.BROWSERBASE_PROJECT_ID 34 | ? "BROWSERBASE" 35 | : "LOCAL", 36 | apiKey: process.env.BROWSERBASE_API_KEY /* API key for authentication */, 37 | projectId: process.env.BROWSERBASE_PROJECT_ID /* Project identifier */, 38 | logger: (message) => 39 | console.error( 40 | logLineToString(message) 41 | ) /* Custom logging function to stderr */, 42 | domSettleTimeoutMs: 30_000 /* Timeout for DOM to settle in milliseconds */, 43 | browserbaseSessionCreateParams: 44 | process.env.BROWSERBASE_API_KEY && process.env.BROWSERBASE_PROJECT_ID 45 | ? { 46 | projectId: process.env.BROWSERBASE_PROJECT_ID!, 47 | browserSettings: process.env.CONTEXT_ID 48 | ? { 49 | context: { 50 | id: process.env.CONTEXT_ID, 51 | persist: true, 52 | }, 53 | } 54 | : undefined, 55 | } 56 | : undefined, 57 | localBrowserLaunchOptions: process.env.LOCAL_CDP_URL 58 | ? { 59 | cdpUrl: process.env.LOCAL_CDP_URL, 60 | } 61 | : undefined, 62 | enableCaching: true /* Enable caching functionality */, 63 | browserbaseSessionID: 64 | undefined /* Session ID for resuming Browserbase sessions */, 65 | modelName: "gpt-4o" /* Name of the model to use */, 66 | modelClientOptions: { 67 | apiKey: process.env.OPENAI_API_KEY, 68 | } /* Configuration options for the model client */, 69 | useAPI: false, 70 | }; 71 | 72 | // Global state 73 | let stagehand: Stagehand | undefined; 74 | 75 | // Ensure Stagehand is initialized 76 | export async function ensureStagehand() { 77 | if ( 78 | stagehandConfig.env === "LOCAL" && 79 | !stagehandConfig.localBrowserLaunchOptions?.cdpUrl 80 | ) { 81 | throw new Error( 82 | 'Using a local browser without providing a CDP URL is not supported. Please provide a CDP URL using the LOCAL_CDP_URL environment variable.\n\nTo launch your browser in "debug", see our documentation.\n\nhttps://docs.stagehand.dev/examples/customize_browser#use-your-personal-browser' 83 | ); 84 | } 85 | 86 | try { 87 | if (!stagehand) { 88 | stagehand = new Stagehand(stagehandConfig); 89 | await stagehand.init(); 90 | return stagehand; 91 | } 92 | 93 | // Try to perform a simple operation to check if the session is still valid 94 | try { 95 | await stagehand.page.evaluate(() => document.title); 96 | return stagehand; 97 | } catch (error) { 98 | // If we get an error indicating the session is invalid, reinitialize 99 | if ( 100 | error instanceof Error && 101 | (error.message.includes( 102 | "Target page, context or browser has been closed" 103 | ) || 104 | error.message.includes("Session expired") || 105 | error.message.includes("context destroyed")) 106 | ) { 107 | log("Browser session expired, reinitializing Stagehand...", "info"); 108 | stagehand = new Stagehand(stagehandConfig); 109 | await stagehand.init(); 110 | return stagehand; 111 | } 112 | throw error; // Re-throw if it's a different type of error 113 | } 114 | } catch (error) { 115 | const errorMsg = error instanceof Error ? error.message : String(error); 116 | log(`Failed to initialize/reinitialize Stagehand: ${errorMsg}`, "error"); 117 | throw error; 118 | } 119 | } 120 | 121 | // Create the server 122 | export function createServer() { 123 | const server = new Server( 124 | { 125 | name: "stagehand", 126 | version: "0.1.0", 127 | }, 128 | { 129 | capabilities: { 130 | resources: {}, 131 | tools: {}, 132 | logging: {}, 133 | prompts: {}, 134 | }, 135 | } 136 | ); 137 | 138 | // Store server instance for logging 139 | setServerInstance(server); 140 | 141 | // Setup request handlers 142 | server.setRequestHandler(ListToolsRequestSchema, async (request) => { 143 | try { 144 | logRequest("ListTools", request.params); 145 | const response = { tools: TOOLS }; 146 | const sanitizedResponse = sanitizeMessage(response); 147 | logResponse("ListTools", JSON.parse(sanitizedResponse)); 148 | return JSON.parse(sanitizedResponse); 149 | } catch (error) { 150 | const errorMsg = error instanceof Error ? error.message : String(error); 151 | return { 152 | error: { 153 | code: -32603, 154 | message: `Internal error: ${errorMsg}`, 155 | }, 156 | }; 157 | } 158 | }); 159 | 160 | server.setRequestHandler(CallToolRequestSchema, async (request) => { 161 | try { 162 | logRequest("CallTool", request.params); 163 | operationLogs.length = 0; // Clear logs for new operation 164 | 165 | if ( 166 | !request.params?.name || 167 | !TOOLS.find((t) => t.name === request.params.name) 168 | ) { 169 | throw new Error(`Invalid tool name: ${request.params?.name}`); 170 | } 171 | 172 | // Ensure Stagehand is initialized 173 | try { 174 | stagehand = await ensureStagehand(); 175 | } catch (error) { 176 | const errorMsg = error instanceof Error ? error.message : String(error); 177 | return { 178 | content: [ 179 | { 180 | type: "text", 181 | text: `Failed to initialize Stagehand: ${errorMsg}.\n\nConfig: ${JSON.stringify( 182 | stagehandConfig, 183 | null, 184 | 2 185 | )}`, 186 | }, 187 | { 188 | type: "text", 189 | text: `Operation logs:\n${operationLogs.join("\n")}`, 190 | }, 191 | ], 192 | isError: true, 193 | }; 194 | } 195 | 196 | const result = await handleToolCall( 197 | request.params.name, 198 | request.params.arguments ?? {}, 199 | stagehand 200 | ); 201 | 202 | const sanitizedResult = sanitizeMessage(result); 203 | logResponse("CallTool", JSON.parse(sanitizedResult)); 204 | return JSON.parse(sanitizedResult); 205 | } catch (error) { 206 | const errorMsg = error instanceof Error ? error.message : String(error); 207 | return { 208 | error: { 209 | code: -32603, 210 | message: `Internal error: ${errorMsg}`, 211 | }, 212 | }; 213 | } 214 | }); 215 | 216 | server.setRequestHandler(ListResourcesRequestSchema, async (request) => { 217 | try { 218 | logRequest("ListResources", request.params); 219 | const response = listResources(); 220 | const sanitizedResponse = sanitizeMessage(response); 221 | logResponse("ListResources", JSON.parse(sanitizedResponse)); 222 | return JSON.parse(sanitizedResponse); 223 | } catch (error) { 224 | const errorMsg = error instanceof Error ? error.message : String(error); 225 | return { 226 | error: { 227 | code: -32603, 228 | message: `Internal error: ${errorMsg}`, 229 | }, 230 | }; 231 | } 232 | }); 233 | 234 | server.setRequestHandler( 235 | ListResourceTemplatesRequestSchema, 236 | async (request) => { 237 | try { 238 | logRequest("ListResourceTemplates", request.params); 239 | const response = listResourceTemplates(); 240 | const sanitizedResponse = sanitizeMessage(response); 241 | logResponse("ListResourceTemplates", JSON.parse(sanitizedResponse)); 242 | return JSON.parse(sanitizedResponse); 243 | } catch (error) { 244 | const errorMsg = error instanceof Error ? error.message : String(error); 245 | return { 246 | error: { 247 | code: -32603, 248 | message: `Internal error: ${errorMsg}`, 249 | }, 250 | }; 251 | } 252 | } 253 | ); 254 | 255 | server.setRequestHandler(ReadResourceRequestSchema, async (request) => { 256 | try { 257 | logRequest("ReadResource", request.params); 258 | const uri = request.params.uri.toString(); 259 | const response = readResource(uri); 260 | const sanitizedResponse = sanitizeMessage(response); 261 | logResponse("ReadResource", JSON.parse(sanitizedResponse)); 262 | return JSON.parse(sanitizedResponse); 263 | } catch (error) { 264 | const errorMsg = error instanceof Error ? error.message : String(error); 265 | return { 266 | error: { 267 | code: -32603, 268 | message: `Internal error: ${errorMsg}`, 269 | }, 270 | }; 271 | } 272 | }); 273 | 274 | server.setRequestHandler(ListPromptsRequestSchema, async (request) => { 275 | try { 276 | logRequest("ListPrompts", request.params); 277 | const response = { prompts: PROMPTS }; 278 | const sanitizedResponse = sanitizeMessage(response); 279 | logResponse("ListPrompts", JSON.parse(sanitizedResponse)); 280 | return JSON.parse(sanitizedResponse); 281 | } catch (error) { 282 | const errorMsg = error instanceof Error ? error.message : String(error); 283 | return { 284 | error: { 285 | code: -32603, 286 | message: `Internal error: ${errorMsg}`, 287 | }, 288 | }; 289 | } 290 | }); 291 | 292 | server.setRequestHandler(GetPromptRequestSchema, async (request) => { 293 | try { 294 | logRequest("GetPrompt", request.params); 295 | 296 | // Check if prompt name is valid and get the prompt 297 | try { 298 | const prompt = getPrompt(request.params?.name || ""); 299 | const sanitizedResponse = sanitizeMessage(prompt); 300 | logResponse("GetPrompt", JSON.parse(sanitizedResponse)); 301 | return JSON.parse(sanitizedResponse); 302 | } catch (error) { 303 | throw new Error(`Invalid prompt name: ${request.params?.name}`); 304 | } 305 | } catch (error) { 306 | const errorMsg = error instanceof Error ? error.message : String(error); 307 | return { 308 | error: { 309 | code: -32603, 310 | message: `Internal error: ${errorMsg}`, 311 | }, 312 | }; 313 | } 314 | }); 315 | 316 | return server; 317 | } 318 | 319 | // Import missing function from logging 320 | import { formatLogResponse, logLineToString } from "./logging.js"; 321 | -------------------------------------------------------------------------------- /stagehand/src/tools.ts: -------------------------------------------------------------------------------- 1 | import { Stagehand } from "@browserbasehq/stagehand"; 2 | import { CallToolResult, Tool } from "@modelcontextprotocol/sdk/types.js"; 3 | import { getServerInstance, operationLogs } from "./logging.js"; 4 | import { screenshots } from "./resources.js"; 5 | 6 | // Define the Stagehand tools 7 | export const TOOLS: Tool[] = [ 8 | { 9 | name: "stagehand_navigate", 10 | description: 11 | "Navigate to a URL in the browser. Only use this tool with URLs you're confident will work and stay up to date. Otheriwse use https://google.com as the starting point", 12 | inputSchema: { 13 | type: "object", 14 | properties: { 15 | url: { type: "string", description: "The URL to navigate to" }, 16 | }, 17 | required: ["url"], 18 | }, 19 | }, 20 | { 21 | name: "stagehand_act", 22 | description: `Performs an action on a web page element. Act actions should be as atomic and 23 | specific as possible, i.e. "Click the sign in button" or "Type 'hello' into the search input". 24 | AVOID actions that are more than one step, i.e. "Order me pizza" or "Send an email to Paul 25 | asking him to call me". `, 26 | inputSchema: { 27 | type: "object", 28 | properties: { 29 | action: { 30 | type: "string", 31 | description: `The action to perform. Should be as atomic and specific as possible, 32 | i.e. 'Click the sign in button' or 'Type 'hello' into the search input'. AVOID actions that are more than one 33 | step, i.e. 'Order me pizza' or 'Send an email to Paul asking him to call me'. The instruction should be just as specific as possible, 34 | and have a strong correlation to the text on the page. If unsure, use observe before using act."`, 35 | }, 36 | variables: { 37 | type: "object", 38 | additionalProperties: true, 39 | description: `Variables used in the action template. ONLY use variables if you're dealing 40 | with sensitive data or dynamic content. For example, if you're logging in to a website, 41 | you can use a variable for the password. When using variables, you MUST have the variable 42 | key in the action template. For example: {"action": "Fill in the password", "variables": {"password": "123456"}}`, 43 | }, 44 | }, 45 | required: ["action"], 46 | }, 47 | }, 48 | { 49 | name: "stagehand_extract", 50 | description: `Extracts all of the text from the current page.`, 51 | inputSchema: { 52 | type: "object", 53 | properties: {}, 54 | }, 55 | }, 56 | { 57 | name: "stagehand_observe", 58 | description: 59 | "Observes elements on the web page. Use this tool to observe elements that you can later use in an action. Use observe instead of extract when dealing with actionable (interactable) elements rather than text. More often than not, you'll want to use extract instead of observe when dealing with scraping or extracting structured text.", 60 | inputSchema: { 61 | type: "object", 62 | properties: { 63 | instruction: { 64 | type: "string", 65 | description: 66 | "Instruction for observation (e.g., 'find the login button'). This instruction must be extremely specific.", 67 | }, 68 | }, 69 | required: ["instruction"], 70 | }, 71 | }, 72 | { 73 | name: "screenshot", 74 | description: 75 | "Takes a screenshot of the current page. Use this tool to learn where you are on the page when controlling the browser with Stagehand. Only use this tool when the other tools are not sufficient to get the information you need.", 76 | inputSchema: { 77 | type: "object", 78 | properties: {}, 79 | }, 80 | }, 81 | ]; 82 | 83 | // Handle tool calls 84 | export async function handleToolCall( 85 | name: string, 86 | args: any, 87 | stagehand: Stagehand 88 | ): Promise { 89 | switch (name) { 90 | case "stagehand_navigate": 91 | try { 92 | await stagehand.page.goto(args.url); 93 | return { 94 | content: [ 95 | { 96 | type: "text", 97 | text: `Navigated to: ${args.url}`, 98 | }, 99 | { 100 | type: "text", 101 | text: `View the live session here: https://browserbase.com/sessions/${stagehand.browserbaseSessionID}`, 102 | }, 103 | ], 104 | isError: false, 105 | }; 106 | } catch (error) { 107 | const errorMsg = error instanceof Error ? error.message : String(error); 108 | return { 109 | content: [ 110 | { 111 | type: "text", 112 | text: `Failed to navigate: ${errorMsg}`, 113 | }, 114 | { 115 | type: "text", 116 | text: `Operation logs:\n${operationLogs.join("\n")}`, 117 | }, 118 | ], 119 | isError: true, 120 | }; 121 | } 122 | 123 | case "stagehand_act": 124 | try { 125 | await stagehand.page.act({ 126 | action: args.action, 127 | variables: args.variables, 128 | }); 129 | return { 130 | content: [ 131 | { 132 | type: "text", 133 | text: `Action performed: ${args.action}`, 134 | }, 135 | ], 136 | isError: false, 137 | }; 138 | } catch (error) { 139 | const errorMsg = error instanceof Error ? error.message : String(error); 140 | return { 141 | content: [ 142 | { 143 | type: "text", 144 | text: `Failed to perform action: ${errorMsg}`, 145 | }, 146 | { 147 | type: "text", 148 | text: `Operation logs:\n${operationLogs.join("\n")}`, 149 | }, 150 | ], 151 | isError: true, 152 | }; 153 | } 154 | 155 | case "stagehand_extract": { 156 | try { 157 | const bodyText = await stagehand.page.evaluate( 158 | () => document.body.innerText 159 | ); 160 | const content = bodyText 161 | .split("\n") 162 | .map((line) => line.trim()) 163 | .filter((line) => { 164 | if (!line) return false; 165 | 166 | if ( 167 | (line.includes("{") && line.includes("}")) || 168 | line.includes("@keyframes") || // Remove CSS animations 169 | line.match(/^\.[a-zA-Z0-9_-]+\s*{/) || // Remove CSS lines starting with .className { 170 | line.match(/^[a-zA-Z-]+:[a-zA-Z0-9%\s\(\)\.,-]+;$/) // Remove lines like "color: blue;" or "margin: 10px;" 171 | ) { 172 | return false; 173 | } 174 | return true; 175 | }) 176 | .map((line) => { 177 | return line.replace(/\\u([0-9a-fA-F]{4})/g, (_, hex) => 178 | String.fromCharCode(parseInt(hex, 16)) 179 | ); 180 | }); 181 | 182 | return { 183 | content: [ 184 | { 185 | type: "text", 186 | text: `Extracted content:\n${content.join("\n")}`, 187 | }, 188 | ], 189 | isError: false, 190 | }; 191 | } catch (error) { 192 | return { 193 | content: [ 194 | { 195 | type: "text", 196 | text: `Failed to extract content: ${(error as Error).message}`, 197 | }, 198 | ], 199 | isError: true, 200 | }; 201 | } 202 | } 203 | 204 | case "stagehand_observe": 205 | try { 206 | const observations = await stagehand.page.observe({ 207 | instruction: args.instruction, 208 | returnAction: false, 209 | }); 210 | return { 211 | content: [ 212 | { 213 | type: "text", 214 | text: `Observations: ${JSON.stringify(observations)}`, 215 | }, 216 | ], 217 | isError: false, 218 | }; 219 | } catch (error) { 220 | const errorMsg = error instanceof Error ? error.message : String(error); 221 | return { 222 | content: [ 223 | { 224 | type: "text", 225 | text: `Failed to observe: ${errorMsg}`, 226 | }, 227 | { 228 | type: "text", 229 | text: `Operation logs:\n${operationLogs.join("\n")}`, 230 | }, 231 | ], 232 | isError: true, 233 | }; 234 | } 235 | 236 | case "screenshot": 237 | try { 238 | const screenshotBuffer = await stagehand.page.screenshot({ 239 | fullPage: false, 240 | }); 241 | 242 | // Convert buffer to base64 string and store in memory 243 | const screenshotBase64 = screenshotBuffer.toString("base64"); 244 | const name = `screenshot-${new Date() 245 | .toISOString() 246 | .replace(/:/g, "-")}`; 247 | screenshots.set(name, screenshotBase64); 248 | 249 | // Notify the client that the resources changed 250 | const serverInstance = getServerInstance(); 251 | if (serverInstance) { 252 | serverInstance.notification({ 253 | method: "notifications/resources/list_changed", 254 | }); 255 | } 256 | 257 | return { 258 | content: [ 259 | { 260 | type: "text", 261 | text: `Screenshot taken with name: ${name}`, 262 | }, 263 | { 264 | type: "image", 265 | data: screenshotBase64, 266 | mimeType: "image/png", 267 | }, 268 | ], 269 | isError: false, 270 | }; 271 | } catch (error) { 272 | const errorMsg = error instanceof Error ? error.message : String(error); 273 | return { 274 | content: [ 275 | { 276 | type: "text", 277 | text: `Failed to take screenshot: ${errorMsg}`, 278 | }, 279 | { 280 | type: "text", 281 | text: `Operation logs:\n${operationLogs.join("\n")}`, 282 | }, 283 | ], 284 | isError: true, 285 | }; 286 | } 287 | 288 | default: 289 | return { 290 | content: [ 291 | { 292 | type: "text", 293 | text: `Unknown tool: ${name}`, 294 | }, 295 | { 296 | type: "text", 297 | text: `Operation logs:\n${operationLogs.join("\n")}`, 298 | }, 299 | ], 300 | isError: true, 301 | }; 302 | } 303 | } 304 | -------------------------------------------------------------------------------- /stagehand/src/utils.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Sanitizes a message to ensure it's properly formatted JSON 3 | * @param message The message to sanitize 4 | * @returns A sanitized JSON string 5 | */ 6 | export function sanitizeMessage(message: any): string { 7 | try { 8 | // Ensure the message is properly stringified JSON 9 | if (typeof message === 'string') { 10 | JSON.parse(message); // Validate JSON structure 11 | return message; 12 | } 13 | return JSON.stringify(message); 14 | } catch (error) { 15 | return JSON.stringify({ 16 | jsonrpc: '2.0', 17 | error: { 18 | code: -32700, 19 | message: 'Parse error', 20 | }, 21 | id: null, 22 | }); 23 | } 24 | } -------------------------------------------------------------------------------- /stagehand/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2022", 4 | "module": "Node16", 5 | "moduleResolution": "Node16", 6 | "strict": true, 7 | "esModuleInterop": true, 8 | "skipLibCheck": true, 9 | "forceConsistentCasingInFileNames": true, 10 | "resolveJsonModule": true, 11 | "outDir": "dist", 12 | "rootDir": "src" 13 | }, 14 | "include": ["src/**/*.ts"], 15 | "exclude": ["node_modules"] 16 | } 17 | --------------------------------------------------------------------------------