├── .gitattributes
├── .gitignore
├── LICENSE
├── README.md
├── assets
    ├── browserbase-demo.png
    ├── browserbase-mcp.png
    ├── cover-mcp.png
    ├── cover.png
    ├── session.png
    └── stagehand-mcp.png
├── browserbase
    ├── .npmignore
    ├── Dockerfile
    ├── README.md
    ├── cli.js
    ├── config.d.ts
    ├── index.d.ts
    ├── index.js
    ├── package-lock.json
    ├── package.json
    ├── playwright.config.ts
    ├── smithery.yaml
    ├── src
    │   ├── config.ts
    │   ├── context.ts
    │   ├── index.ts
    │   ├── pageSnapshot.ts
    │   ├── program.ts
    │   ├── server.ts
    │   ├── sessionManager.ts
    │   ├── tools
    │   │   ├── common.ts
    │   │   ├── context.ts
    │   │   ├── getText.ts
    │   │   ├── hover.ts
    │   │   ├── keyboard.ts
    │   │   ├── navigate.ts
    │   │   ├── selectOption.ts
    │   │   ├── session.ts
    │   │   ├── snapshot.ts
    │   │   ├── tool.ts
    │   │   ├── toolUtils.ts
    │   │   └── utils.ts
    │   └── transport.ts
    ├── tests
    │   └── .gitkeep
    ├── tsconfig.json
    └── utils
    │   └── .gitkeep
└── stagehand
    ├── README.md
    ├── package-lock.json
    ├── package.json
    ├── src
        ├── index.ts
        ├── logging.ts
        ├── prompts.ts
        ├── resources.ts
        ├── server.ts
        ├── tools.ts
        └── utils.ts
    └── tsconfig.json


/.gitattributes:
--------------------------------------------------------------------------------
1 | package-lock.json linguist-generated=true
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Logs
  2 | logs
  3 | *.log
  4 | npm-debug.log*
  5 | yarn-debug.log*
  6 | yarn-error.log*
  7 | lerna-debug.log*
  8 | .pnpm-debug.log*
  9 | 
 10 | # Diagnostic reports (https://nodejs.org/api/report.html)
 11 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
 12 | 
 13 | # Runtime data
 14 | pids
 15 | *.pid
 16 | *.seed
 17 | *.pid.lock
 18 | 
 19 | # Directory for instrumented libs generated by jscoverage/JSCover
 20 | lib-cov
 21 | 
 22 | # Coverage directory used by tools like istanbul
 23 | coverage
 24 | *.lcov
 25 | 
 26 | # nyc test coverage
 27 | .nyc_output
 28 | 
 29 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
 30 | .grunt
 31 | 
 32 | # Bower dependency directory (https://bower.io/)
 33 | bower_components
 34 | 
 35 | # node-waf configuration
 36 | .lock-wscript
 37 | 
 38 | # Compiled binary addons (https://nodejs.org/api/addons.html)
 39 | build/Release
 40 | 
 41 | # Dependency directories
 42 | node_modules/
 43 | jspm_packages/
 44 | 
 45 | # Snowpack dependency directory (https://snowpack.dev/)
 46 | web_modules/
 47 | 
 48 | # TypeScript cache
 49 | *.tsbuildinfo
 50 | 
 51 | # Optional npm cache directory
 52 | .npm
 53 | 
 54 | # Optional eslint cache
 55 | .eslintcache
 56 | 
 57 | # Optional stylelint cache
 58 | .stylelintcache
 59 | 
 60 | # Microbundle cache
 61 | .rpt2_cache/
 62 | .rts2_cache_cjs/
 63 | .rts2_cache_es/
 64 | .rts2_cache_umd/
 65 | 
 66 | # Optional REPL history
 67 | .node_repl_history
 68 | 
 69 | # Output of 'npm pack'
 70 | *.tgz
 71 | 
 72 | # Yarn Integrity file
 73 | .yarn-integrity
 74 | 
 75 | # dotenv environment variable files
 76 | .env
 77 | .env.development.local
 78 | .env.test.local
 79 | .env.production.local
 80 | .env.local
 81 | 
 82 | # parcel-bundler cache (https://parceljs.org/)
 83 | .cache
 84 | .parcel-cache
 85 | 
 86 | # Next.js build output
 87 | .next
 88 | out
 89 | 
 90 | # Nuxt.js build / generate output
 91 | .nuxt
 92 | dist
 93 | 
 94 | # Gatsby files
 95 | .cache/
 96 | # Comment in the public line in if your project uses Gatsby and not Next.js
 97 | # https://nextjs.org/blog/next-9-1#public-directory-support
 98 | # public
 99 | 
100 | # vuepress build output
101 | .vuepress/dist
102 | 
103 | # vuepress v2.x temp and cache directory
104 | .temp
105 | .cache
106 | 
107 | # Docusaurus cache and generated files
108 | .docusaurus
109 | 
110 | # Serverless directories
111 | .serverless/
112 | 
113 | # FuseBox cache
114 | .fusebox/
115 | 
116 | # DynamoDB Local files
117 | .dynamodb/
118 | 
119 | # TernJS port file
120 | .tern-port
121 | 
122 | # Stores VSCode versions used for testing VSCode extensions
123 | .vscode-test
124 | 
125 | # yarn v2
126 | .yarn/cache
127 | .yarn/unplugged
128 | .yarn/build-state.yml
129 | .yarn/install-state.gz
130 | .pnp.*
131 | 
132 | build/
133 | 
134 | gcp-oauth.keys.json
135 | .*-server-credentials.json
136 | 
137 | # Byte-compiled / optimized / DLL files
138 | __pycache__/
139 | *.py[cod]
140 | *$py.class
141 | 
142 | # C extensions
143 | *.so
144 | 
145 | # Distribution / packaging
146 | .Python
147 | build/
148 | develop-eggs/
149 | dist/
150 | downloads/
151 | eggs/
152 | .eggs/
153 | lib/
154 | lib64/
155 | parts/
156 | sdist/
157 | var/
158 | wheels/
159 | share/python-wheels/
160 | *.egg-info/
161 | .installed.cfg
162 | *.egg
163 | MANIFEST
164 | 
165 | # PyInstaller
166 | #  Usually these files are written by a python script from a template
167 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
168 | *.manifest
169 | *.spec
170 | 
171 | # Installer logs
172 | pip-log.txt
173 | pip-delete-this-directory.txt
174 | 
175 | # Unit test / coverage reports
176 | htmlcov/
177 | .tox/
178 | .nox/
179 | .coverage
180 | .coverage.*
181 | .cache
182 | nosetests.xml
183 | coverage.xml
184 | *.cover
185 | *.py,cover
186 | .hypothesis/
187 | .pytest_cache/
188 | cover/
189 | 
190 | # Translations
191 | *.mo
192 | *.pot
193 | 
194 | # Django stuff:
195 | *.log
196 | local_settings.py
197 | db.sqlite3
198 | db.sqlite3-journal
199 | 
200 | # Flask stuff:
201 | instance/
202 | .webassets-cache
203 | 
204 | # Scrapy stuff:
205 | .scrapy
206 | 
207 | # Sphinx documentation
208 | docs/_build/
209 | 
210 | # PyBuilder
211 | .pybuilder/
212 | target/
213 | 
214 | # Jupyter Notebook
215 | .ipynb_checkpoints
216 | 
217 | # IPython
218 | profile_default/
219 | ipython_config.py
220 | 
221 | # pyenv
222 | #   For a library or package, you might want to ignore these files since the code is
223 | #   intended to run in multiple environments; otherwise, check them in:
224 | # .python-version
225 | 
226 | # pipenv
227 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
228 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
229 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
230 | #   install all needed dependencies.
231 | #Pipfile.lock
232 | 
233 | # poetry
234 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
235 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
236 | #   commonly ignored for libraries.
237 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
238 | #poetry.lock
239 | 
240 | # pdm
241 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
242 | #pdm.lock
243 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
244 | #   in version control.
245 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
246 | .pdm.toml
247 | .pdm-python
248 | .pdm-build/
249 | 
250 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
251 | __pypackages__/
252 | 
253 | # Celery stuff
254 | celerybeat-schedule
255 | celerybeat.pid
256 | 
257 | # SageMath parsed files
258 | *.sage.py
259 | 
260 | # Environments
261 | .env
262 | .venv
263 | env/
264 | venv/
265 | ENV/
266 | env.bak/
267 | venv.bak/
268 | 
269 | # Spyder project settings
270 | .spyderproject
271 | .spyproject
272 | 
273 | # Rope project settings
274 | .ropeproject
275 | 
276 | # mkdocs documentation
277 | /site
278 | 
279 | # mypy
280 | .mypy_cache/
281 | .dmypy.json
282 | dmypy.json
283 | 
284 | # Pyre type checker
285 | .pyre/
286 | 
287 | # pytype static type analyzer
288 | .pytype/
289 | 
290 | # Cython debug symbols
291 | cython_debug/
292 | 
293 | .DS_Store
294 | 
295 | # PyCharm
296 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
297 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
298 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
299 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
300 | #.idea/
301 | 
302 | 
303 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Portions Copyright 2025 Browserbase, Inc
190 |    Portions Copyright (c) Microsoft Corporation.
191 |    Portions Copyright 2017 Google Inc.
192 | 
193 |    Licensed under the Apache License, Version 2.0 (the "License");
194 |    you may not use this file except in compliance with the License.
195 |    You may obtain a copy of the License at
196 | 
197 |        http://www.apache.org/licenses/LICENSE-2.0
198 | 
199 |    Unless required by applicable law or agreed to in writing, software
200 |    distributed under the License is distributed on an "AS IS" BASIS,
201 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
202 |    See the License for the specific language governing permissions and
203 |    limitations under the License.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Browserbase MCP Server
 2 | 
 3 | ![cover](assets/cover-mcp.png)
 4 | 
 5 | [The Model Context Protocol (MCP)](https://modelcontextprotocol.io/introduction) is an open protocol that enables seamless integration between LLM applications and external data sources and tools. Whether you’re building an AI-powered IDE, enhancing a chat interface, or creating custom AI workflows, MCP provides a standardized way to connect LLMs with the context they need.
 6 | 
 7 | This server provides cloud browser automation capabilities using [Browserbase](https://www.browserbase.com/) and [Stagehand](https://github.com/browserbase/stagehand). This server enables LLMs to interact with web pages, take screenshots, and execute JavaScript in a cloud browser environment.
 8 | 
 9 | To learn to get started with Browserbase, check out [Browserbase MCP](./browserbase/README.md) or [Stagehand MCP](./stagehand/README.md).
10 | 
11 | ## Getting Started with available MCPs
12 | 
13 | 🌐 **Browserbase MCP** - Located in [`browserbase/`](./browserbase/)
14 | 
15 | | Feature            | Description                               |
16 | | ------------------ | ----------------------------------------- |
17 | | Browser Automation | Control and orchestrate cloud browsers    |
18 | | Data Extraction    | Extract structured data from any webpage  |
19 | | Console Monitoring | Track and analyze browser console logs    |
20 | | Screenshots        | Capture full-page and element screenshots |
21 | | Web Interaction    | Navigate, click, and fill forms with ease |
22 | 
23 | 🤘 **Stagehand MCP** - Located in [`stagehand/`](./stagehand/)
24 | 
25 | | Feature             | Description                                                                                                                                                    |
26 | | ------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------- |
27 | | Atomic Instructions | Execute precise actions like `act("click the login button")` or `extract("find the red shoes")`                                                                |
28 | | Model Flexibility   | Supports multiple models, including OpenAI's GPT-4 and Anthropic's Claude-3.7 Sonnet                                                                           |
29 | | Modular Design      | Easily integrate new models with minimal changes                                                                                                               |
30 | | Vision Support      | Use annotated screenshots for complex DOMs                                                                                                                     |
31 | | Open Source         | Contribute to the project and join the [Slack community](https://join.slack.com/t/stagehand-dev/shared_invite/zt-2uvuobu50-~wVSx2Si75CPa3332hwVEw) for support |
32 | 
33 | ### Alternative Installation Methods
34 | 
35 | [Smithery](https://smithery.ai/server/@browserbasehq/mcp-browserbase)
36 | 
37 | ### Credits
38 | 
39 | Huge thanks and shoutout to the Playwright team for their contributions to the framework, and their work on the [Playwright MCP Server](https://github.com/microsoft/playwright-mcp)


--------------------------------------------------------------------------------
/assets/browserbase-demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browserbase/mcp-server-browserbase/b81ffe57d16ccb92b6b128894a05ee39ea822d29/assets/browserbase-demo.png


--------------------------------------------------------------------------------
/assets/browserbase-mcp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browserbase/mcp-server-browserbase/b81ffe57d16ccb92b6b128894a05ee39ea822d29/assets/browserbase-mcp.png


--------------------------------------------------------------------------------
/assets/cover-mcp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browserbase/mcp-server-browserbase/b81ffe57d16ccb92b6b128894a05ee39ea822d29/assets/cover-mcp.png


--------------------------------------------------------------------------------
/assets/cover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browserbase/mcp-server-browserbase/b81ffe57d16ccb92b6b128894a05ee39ea822d29/assets/cover.png


--------------------------------------------------------------------------------
/assets/session.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browserbase/mcp-server-browserbase/b81ffe57d16ccb92b6b128894a05ee39ea822d29/assets/session.png


--------------------------------------------------------------------------------
/assets/stagehand-mcp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browserbase/mcp-server-browserbase/b81ffe57d16ccb92b6b128894a05ee39ea822d29/assets/stagehand-mcp.png


--------------------------------------------------------------------------------
/browserbase/.npmignore:
--------------------------------------------------------------------------------
 1 | # Ignore node_modules, build output, logs, env files, etc.
 2 | node_modules
 3 | dist
 4 | *.log
 5 | .env*
 6 | 
 7 | # Ignore IDE/editor files
 8 | .vscode
 9 | .idea
10 | *.swp
11 | *.swo
12 | 
13 | # Ignore OS files
14 | .DS_Store
15 | Thumbs.db 


--------------------------------------------------------------------------------
/browserbase/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Build stage
 2 | FROM node:18-alpine AS builder
 3 | 
 4 | # Set the working directory
 5 | WORKDIR /app
 6 | 
 7 | # Copy package.json and package-lock.json
 8 | COPY package*.json ./
 9 | 
10 | # Copy TSConfig
11 | COPY tsconfig.json ./
12 | 
13 | # Copy the source code first
14 | COPY . .
15 | 
16 | # Install dependencies
17 | RUN npm install
18 | 
19 | # Build the TypeScript source code
20 | RUN npm run build
21 | 
22 | # Create the final image from a smaller Node.js runtime
23 | FROM node:18-alpine
24 | 
25 | # Set the working directory
26 | WORKDIR /app
27 | 
28 | # Copy built files from builder stage
29 | COPY --from=builder /app/dist /app/dist
30 | COPY --from=builder /app/cli.js /app/cli.js
31 | COPY --from=builder /app/index.js /app/index.js
32 | COPY --from=builder /app/index.d.ts /app/index.d.ts
33 | COPY --from=builder /app/config.d.ts /app/config.d.ts
34 | COPY --from=builder /app/package.json /app/package.json
35 | COPY --from=builder /app/package-lock.json /app/package-lock.json
36 | COPY --from=builder /app/node_modules /app/node_modules
37 | 
38 | # Set environment variables (to be configured at runtime)
39 | ENV BROWSERBASE_API_KEY=<YOUR_BROWSERBASE_API_KEY>
40 | ENV BROWSERBASE_PROJECT_ID=<YOUR_BROWSERBASE_PROJECT_ID>
41 | 
42 | # Command to run the application with absolute path
43 | ENTRYPOINT [ "node", "/app/cli.js" ] 


--------------------------------------------------------------------------------
/browserbase/README.md:
--------------------------------------------------------------------------------
  1 | # Playwright Browserbase MCP Server
  2 | 
  3 | ![cover](../assets/browserbase-mcp.png)
  4 | 
  5 | The Model Context Protocol (MCP) is an open protocol that enables seamless integration between LLM applications and external data sources and tools. Whether you’re building an AI-powered IDE, enhancing a chat interface, or creating custom AI workflows, MCP provides a standardized way to connect LLMs with the context they need. 
  6 | 
  7 | <div>
  8 |    <a href="https://www.loom.com/share/d285d6093b2843e98908c65592031218">
  9 |    <img style="max-width:600px;" src="https://cdn.loom.com/sessions/thumbnails/d285d6093b2843e98908c65592031218-1ab1288912ffd40c-full-play.gif">
 10 |    </a>
 11 | </div>
 12 | 
 13 | ## How to setup in MCP json
 14 | 
 15 | You can either use our Server hosted on NPM or run it completely locally by cloning this repo. 
 16 | 
 17 | ### To run on NPM (Recommended)
 18 | 
 19 | Go into your MCP Config JSON and add the Browserbase Server:
 20 | 
 21 | ```json
 22 | {
 23 |    "mcpServers": {
 24 |       "browserbase": {
 25 |          "command": "npx",
 26 |          "args" : ["@browserbasehq/mcp"],
 27 |          "env": {
 28 |             "BROWSERBASE_API_KEY": "",
 29 |             "BROWSERBASE_PROJECT_ID": ""
 30 |          }
 31 |       }
 32 |    }
 33 | }
 34 | ```
 35 | 
 36 | Thats it! Reload your MCP client and Claude will be able to use Browserbase. 
 37 | 
 38 | ### To run 100% local: 
 39 | 
 40 | ```bash
 41 | # Clone the Repo 
 42 | git clone https://github.com/browserbase/mcp-server-browserbase.git
 43 | 
 44 | # Install the dependencies in the proper directory and build the project
 45 | cd browserbase
 46 | npm install && npm run build
 47 | 
 48 | ```
 49 | 
 50 | Then in your MCP Config JSON run the server. To run locally we can use STDIO or self-host over SSE. 
 51 | 
 52 | ### STDIO: 
 53 | 
 54 | To your MCP Config JSON file add the following: 
 55 | 
 56 | ```json
 57 | {
 58 | "mcpServers": {
 59 |    "browserbase": {
 60 |       "command" : "node",
 61 |       "args" : ["/path/to/mcp-server-browserbase/browserbase/cli.js"],
 62 |       "env": {
 63 |          "BROWSERBASE_API_KEY": "",
 64 |          "BROWSERBASE_PROJECT_ID": ""
 65 |          }
 66 |       }
 67 |    }
 68 | }
 69 | ```
 70 | 
 71 | ### SSE: 
 72 | 
 73 | Run the following command in your terminal. You can add any flags (see options below) that you see fit to customize your configuration. 
 74 | 
 75 | ```bash
 76 |    node cli.js --port 8931
 77 | ```
 78 | 
 79 | Then in your MCP Config JSON file put the following: 
 80 | 
 81 | ```json
 82 |    {
 83 |       "mcpServers": {
 84 |          "browserbase": {
 85 |             "url": "http://localhost:8931/sse",
 86 |             "env": {
 87 |                "BROWSERBASE_API_KEY": "",
 88 |                "BROWSERBASE_PROJECT_ID": ""
 89 |             }
 90 |          }
 91 |       }
 92 |    }
 93 | ```
 94 | 
 95 | Then reload your MCP client and you should be good to go!
 96 | 
 97 | ## Flags Explained:
 98 | 
 99 | The Browserbase MCP server accepts the following command-line flags:
100 | 
101 | | Flag | Description |
102 | |------|-------------|
103 | | `--browserbaseApiKey <key>` | Your Browserbase API key for authentication |
104 | | `--browserbaseProjectId <id>` | Your Browserbase project ID |
105 | | `--proxies` | Enable Browserbase proxies for the session |
106 | | `--advancedStealth` | Enable Browserbase Advanced Stealth (Only for Scale Plan Users) | 
107 | | `--contextId <contextId>` | Specify a Browserbase Context ID to use |
108 | | `--persist [boolean]` | Whether to persist the Browserbase context (default: true) |
109 | | `--port <port>` | Port to listen on for HTTP/SSE transport |
110 | | `--host <host>` | Host to bind server to (default: localhost, use 0.0.0.0 for all interfaces) |
111 | | `--cookies [json]` | JSON array of cookies to inject into the browser |
112 | | `--browserWidth <width>` | Browser viewport width (default: 1024) |
113 | | `--browserHeight <height>` | Browser viewport height (default: 768) |
114 | 
115 | These flags can be passed directly to the CLI or configured in your MCP configuration file.
116 | 
117 | ### NOTE: 
118 | 
119 | Currently, these flags can only be used with the local server (npx @browserbasehq/mcp). 
120 | 
121 | ____
122 | 
123 | ## Flags & Example Configs
124 | 
125 | ### Proxies
126 | 
127 | Here are our docs on [Proxies](https://docs.browserbase.com/features/proxies).
128 | 
129 | To use proxies in STDIO, set the --proxies flag in your MCP Config:
130 | 
131 | ```json
132 | {
133 |    "mcpServers": {
134 |       "browserbase": {
135 |          "command" : "npx",
136 |          "args" : ["@browserbasehq/mcp", "--proxies"],
137 |          "env": {
138 |             "BROWSERBASE_API_KEY": "",
139 |             "BROWSERBASE_PROJECT_ID": ""
140 |          }
141 |       }
142 |    }
143 | }
144 | ```
145 | ### Advanced Stealth 
146 | 
147 | Here are our docs on [Advanced Stealth](https://docs.browserbase.com/features/stealth-mode#advanced-stealth-mode).
148 | 
149 | To use proxies in STDIO, set the --advancedStealth flag in your MCP Config:
150 | 
151 | ```json
152 | {
153 |    "mcpServers": {
154 |       "browserbase": {
155 |          "command" : "npx",
156 |          "args" : ["@browserbasehq/mcp", "--advancedStealth"],
157 |          "env": {
158 |             "BROWSERBASE_API_KEY": "",
159 |             "BROWSERBASE_PROJECT_ID": ""
160 |          }
161 |       }
162 |    }
163 | }
164 | ```
165 | 
166 | ### Contexts
167 | 
168 | Here are our docs on [Contexts](https://docs.browserbase.com/features/contexts)
169 | 
170 | To use contexts in STDIO, set the --contextId flag in your MCP Config:
171 | 
172 | ```json
173 | {
174 |    "mcpServers": {
175 |       "browserbase": {
176 |          "command" : "npx",
177 |          "args" : ["@browserbasehq/mcp", "--contextId", "<YOUR_CONTEXT_ID>"],
178 |          "env": {
179 |             "BROWSERBASE_API_KEY": "",
180 |             "BROWSERBASE_PROJECT_ID": ""
181 |          }
182 |       }
183 |    }
184 | }
185 | ```
186 | 
187 | ### Cookie Injection
188 | 
189 | Why would you need to inject cookies? Our context API currently works on persistent cookies, but not session cookies. So sometimes our persistent auth might not work (we're working hard to add this functionality). 
190 | 
191 | You can flag cookies into the MCP by adding the cookies.json to your MCP Config.
192 | 
193 | To use proxies in STDIO, set the --proxies flag in your MCP Config. Your cookies JSON must be in the type of [Playwright Cookies](https://playwright.dev/docs/api/class-browsercontext#browser-context-cookies)
194 | 
195 | ```json
196 | {
197 |    "mcpServers": {
198 |       "browserbase" {
199 |          "command" : "npx",
200 |          "args" : [
201 |             "@browserbasehq/mcp", "--cookies", 
202 |             '{
203 |                "cookies": json,
204 |             }'
205 |          ],
206 |          "env": {
207 |             "BROWSERBASE_API_KEY": "",
208 |             "BROWSERBASE_PROJECT_ID": ""
209 |          }
210 |       }
211 |    }
212 | }
213 | ```
214 | 
215 | ### Browser Viewport Sizing 
216 | 
217 | The default viewport sizing for a browser session is 1024 x 768. You can adjust the Browser viewport sizing with browserWidth and browserHeight flags. 
218 | 
219 | Here's how to use it for custom browser sizing. We recommend to stick with 16:9 aspect ratios (ie: 1920 x 1080, 1280, 720, 1024 x 768)
220 | 
221 | ```json
222 | {
223 |    "mcpServers": {
224 |       "browserbase": {
225 |          "command" : "npx",
226 |          "args" : [
227 |             "@browserbasehq/mcp",
228 |             "--browserHeight 1080",
229 |             "--browserWidth 1920",
230 |          ],
231 |          "env": {
232 |             "BROWSERBASE_API_KEY": "",
233 |             "BROWSERBASE_PROJECT_ID": ""
234 |          }
235 |       }
236 |    }
237 | }
238 | ```
239 | 
240 | ## Structure
241 | 
242 | *   `src/`: TypeScript source code
243 |     *   `index.ts`: Main entry point, env checks, shutdown
244 |     *   `server.ts`: MCP Server setup and request routing
245 |     *   `sessionManager.ts`: Handles Browserbase session creation/management
246 |     *   `tools/`: Tool definitions and implementations
247 |     *   `resources/`: Resource (screenshot) handling
248 |     *   `types.ts`: Shared TypeScript types
249 | *   `dist/`: Compiled JavaScript output
250 | *   `tests/`: Placeholder for tests
251 | *   `utils/`: Placeholder for utility scripts
252 | *   `Dockerfile`: For building a Docker image
253 | *   Configuration files (`.json`, `.ts`, `.mjs`, `.npmignore`)
254 | 
255 | ## Contexts for Persistence
256 | 
257 | This server supports Browserbase's Contexts feature, which allows persisting cookies, authentication, and cached data across browser sessions:
258 | 
259 | 1. **Creating a Context**:
260 |    ```
261 |    browserbase_context_create: Creates a new context, optionally with a friendly name
262 |    ```
263 | 
264 | 2. **Using a Context with a Session**:
265 |    ```
266 |    browserbase_session_create: Now accepts a 'context' parameter with:
267 |      - id: The context ID to use
268 |      - name: Alternative to ID, the friendly name of the context
269 |      - persist: Whether to save changes (cookies, cache) back to the context (default: true)
270 |    ```
271 | 
272 | 3. **Deleting a Context**:
273 |    ```
274 |    browserbase_context_delete: Deletes a context when you no longer need it
275 |    ```
276 | 
277 | Contexts make it much easier to:
278 | - Maintain login state across sessions
279 | - Reduce page load times by preserving cache
280 | - Avoid CAPTCHAs and detection by reusing browser fingerprints
281 | 
282 | ## Cookie Management
283 | 
284 | This server also provides direct cookie management capabilities:
285 | 
286 | 1. **Adding Cookies**:
287 |    ```
288 |    browserbase_cookies_add: Add cookies to the current browser session with full control over properties
289 |    ```
290 | 
291 | 2. **Getting Cookies**:
292 |    ```
293 |    browserbase_cookies_get: View all cookies in the current session (optionally filtered by URLs)
294 |    ```
295 | 
296 | 3. **Deleting Cookies**:
297 |    ```
298 |    browserbase_cookies_delete: Delete specific cookies or clear all cookies from the session
299 |    ```
300 | 
301 | These tools are useful for:
302 | - Setting authentication cookies without navigating to login pages
303 | - Backing up and restoring cookie state
304 | - Debugging cookie-related issues
305 | - Manipulating cookie attributes (expiration, security flags, etc.)
306 | 
307 | ## TODO/Roadmap
308 | 
309 | *   Implement true `ref`-based interaction logic for click, type, drag, hover, select_option.
310 | *   Implement element-specific screenshots using `ref`.
311 | *   Add more standard MCP tools (tabs, navigation, etc.).
312 | *   Add tests.
313 | 


--------------------------------------------------------------------------------
/browserbase/cli.js:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 | import './dist/program.js';


--------------------------------------------------------------------------------
/browserbase/config.d.ts:
--------------------------------------------------------------------------------
 1 | import type { Cookie } from "playwright-core";
 2 | 
 3 | export type Config = {
 4 |     /**
 5 |      * The Browserbase API Key to use
 6 |      */
 7 |     browserbaseApiKey?: string;
 8 |     /**
 9 |      * The Browserbase Project ID to use
10 |      */
11 |     browserbaseProjectId?: string;
12 |     /** 
13 |      * Whether or not to use Browserbase proxies  
14 |      * https://docs.browserbase.com/features/proxies
15 |      * 
16 |      * @default false
17 |      */
18 |     proxies?: boolean;
19 |     /**
20 |      * Use advanced stealth mode. Only available to Browserbase Scale Plan users.
21 |      * 
22 |      * @default false
23 |      */
24 |     advancedStealth?: boolean;
25 |     /**
26 |      * Potential Browserbase Context to use 
27 |      * Would be a context ID 
28 |      */
29 |     context?: {
30 |         /**
31 |          * The ID of the context to use
32 |          */
33 |         contextId?: string;
34 |         /**
35 |          * Whether or not to persist the context
36 |          * 
37 |          * @default true
38 |          */
39 |         persist?: boolean;
40 |     };
41 |     /**
42 |      * 
43 |      */
44 |     viewPort?: {
45 |         /**
46 |          * The width of the browser
47 |          */
48 |         browserWidth?: number;
49 |         /**
50 |          * The height of the browser
51 |          */
52 |         browserHeight?: number;
53 |     };
54 |     /**
55 |      * Cookies to inject into the Browserbase context
56 |      * Format: Array of cookie objects with name, value, domain, and optional path, expires, httpOnly, secure, sameSite
57 |      */
58 |     cookies?: Cookie[];
59 |     /**
60 |      * Whether or not to port to a server
61 |      * 
62 |      */
63 |     server?: {
64 |         /**
65 |          * The port to listen on for SSE or MCP transport.
66 |          */
67 |         port?: number;
68 |         /**
69 |          * The host to bind the server to. Default is localhost. Use 0.0.0.0 to bind to all interfaces.
70 |          */
71 |         host?: string;
72 |     };
73 |     tools?: {
74 |         /**
75 |          * Configuration for the browser_take_screenshot tool.
76 |          */
77 |         browserbase_take_screenshot?: {
78 |             /**
79 |              * Whether to disable base64-encoded image responses to the clients that
80 |              * don't support binary data or prefer to save on tokens.
81 |             */
82 |             omitBase64?: boolean;
83 |         }
84 |     }
85 | };


--------------------------------------------------------------------------------
/browserbase/index.d.ts:
--------------------------------------------------------------------------------
1 | import type { Server } from '@modelcontextprotocol/sdk/server/index.js';
2 | 
3 | import type { Config } from './config';
4 | 
5 | export declare function createServer(config?: Config): Promise<Server>;
6 | export {};


--------------------------------------------------------------------------------
/browserbase/index.js:
--------------------------------------------------------------------------------
1 | import { createServer } from './dist/index.js';
2 | export default { createServer };


--------------------------------------------------------------------------------
/browserbase/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@browserbasehq/mcp",
 3 |   "version": "1.0.5",
 4 |   "description": "MCP server for browser automation using browserbase",
 5 |   "author": "Browserbase, Inc. (https://browserbase.com)",
 6 |   "homepage": "https://browserbase.com",
 7 |   "type": "module",
 8 |   "main": "./cli.js",
 9 |   "engines": {
10 |     "node": ">=18"
11 |   },
12 |   "files": [
13 |     "../assets/browserbase-mcp.png",
14 |     "dist",
15 |     "cli.js",
16 |     "index.d.ts",
17 |     "index.js",
18 |     "config.d.ts",
19 |     "config.js"
20 |   ],
21 |   "scripts": {
22 |     "build": "tsc && shx chmod +x dist/*.js && shx chmod +x cli.js",
23 |     "prepare": "npm run build",
24 |     "watch": "tsc --watch",
25 |     "inspector": "npx @modelcontextprotocol/inspector build/index.js",
26 |     "test-local": "npm pack && npm install -g $(pwd)/$(ls -t *.tgz | head -1) && mcp-server-browserbase"
27 |   },
28 |   "exports": {
29 |     "./package.json": "./package.json",
30 |     ".": {
31 |       "types": "./index.d.ts",
32 |       "default": "./cli.js"
33 |     }
34 |   },
35 |   "dependencies": {
36 |     "@browserbasehq/sdk": "^2.5.0",
37 |     "@modelcontextprotocol/sdk": "^1.10.2",
38 |     "@types/yaml": "^1.9.6",
39 |     "commander": "^13.1.0",
40 |     "dotenv": "^16.5.0",
41 |     "playwright": "^1.53.0-alpha-2025-05-05",
42 |     "puppeteer-core": "^23.9.0",
43 |     "yaml": "^2.7.1",
44 |     "zod": "^3.24.3",
45 |     "zod-to-json-schema": "^3.24.5"
46 |   },
47 |   "devDependencies": {
48 |     "shx": "^0.3.4",
49 |     "typescript": "^5.6.2"
50 |   },
51 |   "bin": {
52 |     "mcp-server-browserbase": "cli.js"
53 |   },
54 |   "publishConfig": {
55 |     "access": "public"
56 |   }
57 | }
58 | 


--------------------------------------------------------------------------------
/browserbase/playwright.config.ts:
--------------------------------------------------------------------------------
 1 | import { defineConfig, devices } from '@playwright/test';
 2 | 
 3 | /**
 4 |  * Basic Playwright config - primarily useful if adding actual
 5 |  * tests later, might not be strictly needed for the MCP server itself.
 6 |  */
 7 | export default defineConfig({
 8 |   testDir: './tests',
 9 |   fullyParallel: true,
10 |   forbidOnly: !!process.env.CI,
11 |   retries: process.env.CI ? 2 : 0,
12 |   workers: process.env.CI ? 1 : undefined,
13 |   reporter: 'html',
14 |   use: {
15 |     trace: 'on-first-retry',
16 |     // Base URL to use in actions like `await page.goto('/')`
17 |     // baseURL: 'http://127.0.0.1:3000',
18 |   },
19 | 
20 |   /* Configure projects for major browsers */
21 |   // projects: [
22 |   //   {
23 |   //     name: 'chromium',
24 |   //     use: { ...devices['Desktop Chrome'] },
25 |   //   },
26 |   // ],
27 | 
28 |   /* Run your local dev server before starting the tests */
29 |   // webServer: {
30 |   //   command: 'npm run start',
31 |   //   url: 'http://127.0.0.1:3000',
32 |   //   reuseExistingServer: !process.env.CI,
33 |   // },
34 | }); 


--------------------------------------------------------------------------------
/browserbase/smithery.yaml:
--------------------------------------------------------------------------------
 1 | # Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml
 2 | 
 3 | startCommand:
 4 |   # Using stdio type which is the most common for MCPs
 5 |   type: stdio
 6 |   configSchema:
 7 |     # JSON Schema defining the configuration options for the MCP.
 8 |     type: object
 9 |     required:
10 |       - browserbaseApiKey
11 |       - browserbaseProjectId
12 |     properties:
13 |       browserbaseApiKey:
14 |         type: string
15 |         description: The API key for Browserbase.
16 |       browserbaseProjectId:
17 |         type: string
18 |         description: The project ID for Browserbase.
19 |       port:
20 |         type: number
21 |         description: The port to listen on.
22 |       host:
23 |         type: string
24 |         description: The host to listen on.
25 |       contextId:
26 |         type: string
27 |         description: The context ID to use for the session.
28 |       persist:
29 |         type: boolean
30 |         description: Whether to persist the context.
31 |       proxies:
32 |         type: boolean
33 |         description: Whether to use proxies.
34 |       cookies:
35 |         type: object
36 |         description: Cookies to use for the session.
37 |       browserWidth:
38 |         type: number
39 |         description: Width of the browser window.
40 |       browserHeight:
41 |         type: number
42 |         description: Height of the browser window.
43 |       
44 |   commandFunction: |
45 |     config => ({
46 |       command: 'node',
47 |       args: ['cli.js'],
48 |       env: {
49 |         BROWSERBASE_API_KEY: config.browserbaseApiKey,
50 |         BROWSERBASE_PROJECT_ID: config.browserbaseProjectId
51 |       }
52 |     })


--------------------------------------------------------------------------------
/browserbase/src/config.ts:
--------------------------------------------------------------------------------
  1 | import os from 'os';
  2 | import fs from 'fs';
  3 | import path from 'path';
  4 | import { sanitizeForFilePath } from './tools/utils.js'; 
  5 | import type { Cookie } from "playwright-core";
  6 | 
  7 | export type ToolCapability = 'core' | string; 
  8 | 
  9 | export interface Config {
 10 |   browserbaseApiKey?: string; 
 11 |   browserbaseProjectId?: string; 
 12 |   server?: {
 13 |     port?: number;
 14 |     host?: string;
 15 |   };
 16 |   proxies?: boolean;
 17 |   advancedStealth?: boolean;
 18 |   context?: {
 19 |     contextId?: string;
 20 |     persist?: boolean;
 21 |   };
 22 |   viewPort?: {
 23 |     browserWidth?: number;
 24 |     browserHeight?: number;
 25 |   };
 26 |   cookies?: Cookie[]; 
 27 | }
 28 | 
 29 | // Define Command Line Options Structure
 30 | export type CLIOptions = {
 31 |   browserbaseApiKey?: string;
 32 |   browserbaseProjectId?: string;
 33 |   proxies?: boolean;
 34 |   advancedStealth?: boolean;
 35 |   contextId?: string;
 36 |   persist?: boolean;
 37 |   port?: number;
 38 |   host?: string;
 39 |   cookies?: Cookie[];
 40 |   browserWidth?: number;
 41 |   browserHeight?: number;
 42 | };
 43 | 
 44 | // Default Configuration Values
 45 | const defaultConfig: Config = {
 46 |   browserbaseApiKey: process.env.BROWSERBASE_API_KEY,
 47 |   browserbaseProjectId: process.env.BROWSERBASE_PROJECT_ID,
 48 |   proxies: false,
 49 |   server: {
 50 |     port: undefined,
 51 |     host: undefined,
 52 |   },
 53 |   viewPort: {
 54 |     browserWidth: 1024,
 55 |     browserHeight: 768,
 56 |   },
 57 |   cookies: undefined,
 58 | };
 59 | 
 60 | // Resolve final configuration by merging defaults, file config, and CLI options
 61 | export async function resolveConfig(cliOptions: CLIOptions): Promise<Config> {
 62 |   const cliConfig = await configFromCLIOptions(cliOptions);
 63 |   // Order: Defaults < File Config < CLI Overrides
 64 |   const mergedConfig = mergeConfig(defaultConfig, cliConfig);
 65 | 
 66 |   // --- Add Browserbase Env Vars ---
 67 |   // Ensure env vars are read *after* dotenv potentially runs (in index.ts)
 68 |   if (!mergedConfig.browserbaseApiKey) {
 69 |     mergedConfig.browserbaseApiKey = process.env.BROWSERBASE_API_KEY;
 70 |   }
 71 |   if (!mergedConfig.browserbaseProjectId) {
 72 |     mergedConfig.browserbaseProjectId = process.env.BROWSERBASE_PROJECT_ID;
 73 |   }
 74 |   // --------------------------------
 75 | 
 76 |   // Basic validation for Browserbase keys
 77 |   if (!mergedConfig.browserbaseApiKey) {
 78 |     console.warn("Warning: BROWSERBASE_API_KEY environment variable not set.");
 79 |   }
 80 |   if (!mergedConfig.browserbaseProjectId) {
 81 |       console.warn("Warning: BROWSERBASE_PROJECT_ID environment variable not set.");
 82 |   }
 83 | 
 84 |   return mergedConfig;
 85 | }
 86 | 
 87 | // Create Config structure based on CLI options
 88 | export async function configFromCLIOptions(cliOptions: CLIOptions): Promise<Config> {
 89 |  return {
 90 |     browserbaseApiKey: cliOptions.browserbaseApiKey,
 91 |     browserbaseProjectId: cliOptions.browserbaseProjectId,
 92 |     server: {
 93 |       port: cliOptions.port,
 94 |       host: cliOptions.host,
 95 |     },
 96 |     proxies: cliOptions.proxies,
 97 |     context: {
 98 |       contextId: cliOptions.contextId,
 99 |       persist: cliOptions.persist,
100 |     },
101 |     viewPort: {
102 |       browserWidth: cliOptions.browserWidth,
103 |       browserHeight: cliOptions.browserHeight,
104 |     },
105 |     advancedStealth: cliOptions.advancedStealth,
106 |     cookies: cliOptions.cookies,
107 |   };
108 | }
109 | 
110 | // Create an output file path within the configured output directory
111 | export async function outputFile(config: Config, name: string): Promise<string> {
112 |   const outputDir = os.tmpdir();
113 |   await fs.promises.mkdir(outputDir, { recursive: true });
114 |   const sanitizedName = sanitizeForFilePath(name);
115 |   return path.join(outputDir, sanitizedName);
116 | }
117 | 
118 | // Helper function to merge config objects, excluding undefined values
119 | function pickDefined<T extends object>(obj: T | undefined): Partial<T> {
120 |   if (!obj) return {};
121 |   return Object.fromEntries(
122 |       Object.entries(obj).filter(([_, v]) => v !== undefined)
123 |   ) as Partial<T>;
124 | }
125 | 
126 | // Merge two configuration objects (overrides takes precedence)
127 | function mergeConfig(base: Config, overrides: Config): Config {
128 |   const baseFiltered = pickDefined(base);
129 |   const overridesFiltered = pickDefined(overrides);
130 |   
131 |   // Create the result object
132 |   const result = { ...baseFiltered } as Config;
133 |   
134 |   // For each property in overrides
135 |   for (const [key, value] of Object.entries(overridesFiltered)) {
136 |     if (key === 'context' && value && result.context) {
137 |       // Special handling for context object to ensure deep merge
138 |       result.context = {
139 |         ...result.context,
140 |         ...(value as Config['context'])
141 |       };
142 |     } else if (
143 |       value && 
144 |       typeof value === 'object' && 
145 |       !Array.isArray(value) && 
146 |       result[key as keyof Config] && 
147 |       typeof result[key as keyof Config] === 'object'
148 |     ) {
149 |       // Deep merge for other nested objects
150 |       result[key as keyof Config] = {
151 |         ...(result[key as keyof Config] as object),
152 |         ...value
153 |       } as any;
154 |     } else {
155 |       // Simple override for primitives, arrays, etc.
156 |       result[key as keyof Config] = value as any;
157 |     }
158 |   }
159 |   
160 |   return result;
161 | } 


--------------------------------------------------------------------------------
/browserbase/src/context.ts:
--------------------------------------------------------------------------------
  1 | import type { Server } from "@modelcontextprotocol/sdk/server/index.js";
  2 | import type { BrowserSession } from "./sessionManager.js";
  3 | import {
  4 |   getSession,
  5 |   defaultSessionId,
  6 |   getSessionReadOnly,
  7 | } from "./sessionManager.js";
  8 | import type { Tool, ToolResult } from "./tools/tool.js";
  9 | import type { Config } from "../config.js";
 10 | import {
 11 |   Resource,
 12 |   CallToolResult,
 13 |   TextContent,
 14 |   ImageContent,
 15 | } from "@modelcontextprotocol/sdk/types.js";
 16 | import { z } from "zod";
 17 | import { PageSnapshot } from "./pageSnapshot.js";
 18 | import type { Page, Locator } from "playwright"; 
 19 | 
 20 | export type ToolActionResult =
 21 |   | { content?: (ImageContent | TextContent)[] }
 22 |   | undefined
 23 |   | void;
 24 | 
 25 | /**
 26 |  * Manages the context for tool execution within a specific Browserbase session.
 27 |  */
 28 | 
 29 | export class Context {
 30 |   private server: Server;
 31 |   public readonly config: Config;
 32 |   public currentSessionId: string = defaultSessionId;
 33 |   private latestSnapshots = new Map<string, PageSnapshot>();
 34 |   private screenshotResources = new Map<
 35 |     string,
 36 |     { format: string; bytes: string; uri: string }
 37 |   >();
 38 | 
 39 |   constructor(server: Server, config: Config) {
 40 |     this.server = server;
 41 |     this.config = config;
 42 |     this.screenshotResources = new Map();
 43 |   }
 44 | 
 45 |   // --- Snapshot State Handling (Using PageSnapshot) ---
 46 | 
 47 |   /**
 48 |    * Returns the latest PageSnapshot for the currently active session.
 49 |    * Throws an error if no snapshot is available for the active session.
 50 |    */
 51 |   snapshotOrDie(): PageSnapshot {
 52 |     const snapshot = this.latestSnapshots.get(this.currentSessionId);
 53 |     if (!snapshot) {
 54 |       throw new Error(
 55 |         `No snapshot available for the current session (${this.currentSessionId}). Capture a snapshot first.`
 56 |       );
 57 |     }
 58 |     return snapshot;
 59 |   }
 60 | 
 61 |   /**
 62 |    * Clears the snapshot for the currently active session.
 63 |    */
 64 |   clearLatestSnapshot(): void {
 65 |     this.latestSnapshots.delete(this.currentSessionId);
 66 |   }
 67 | 
 68 |   /**
 69 |    * Captures a new PageSnapshot for the currently active session and stores it.
 70 |    * Returns the captured snapshot or undefined if capture failed.
 71 |    */
 72 |   async captureSnapshot(): Promise<PageSnapshot | undefined> {
 73 |     const logPrefix = `[Context.captureSnapshot] ${new Date().toISOString()} Session ${
 74 |       this.currentSessionId
 75 |     }:`;
 76 |     let page;
 77 |     try {
 78 |       page = await this.getActivePage();
 79 |     } catch (error) {
 80 |       this.clearLatestSnapshot();
 81 |       return undefined;
 82 |     }
 83 | 
 84 |     if (!page) {
 85 |       this.clearLatestSnapshot();
 86 |       return undefined;
 87 |     }
 88 | 
 89 |     try {
 90 |       await this.waitForTimeout(100); // Small delay for UI settlement
 91 |       const snapshot = await PageSnapshot.create(page);
 92 |       this.latestSnapshots.set(this.currentSessionId, snapshot);
 93 |       return snapshot;
 94 |     } catch (error) {
 95 |       process.stderr.write(
 96 |         `${logPrefix} Failed to capture snapshot: ${
 97 |           error instanceof Error ? error.message : String(error)
 98 |         }\\n`
 99 |       ); // Enhanced logging
100 |       this.clearLatestSnapshot();
101 |       return undefined;
102 |     }
103 |   }
104 | 
105 |   // --- Resource Handling Methods ---
106 | 
107 |   listResources(): Resource[] {
108 |     const resources: Resource[] = [];
109 |     for (const [name, data] of this.screenshotResources.entries()) {
110 |       resources.push({
111 |         uri: data.uri,
112 |         mimeType: `image/${data.format}`, // Ensure correct mime type
113 |         name: `Screenshot: ${name}`,
114 |       });
115 |     }
116 |     return resources;
117 |   }
118 | 
119 |   readResource(uri: string): { uri: string; mimeType: string; blob: string } {
120 |     const prefix = "mcp://screenshots/";
121 |     if (uri.startsWith(prefix)) {
122 |       const name = uri.split("/").pop() || "";
123 |       const data = this.screenshotResources.get(name);
124 |       if (data) {
125 |         return {
126 |           uri,
127 |           mimeType: `image/${data.format}`, // Ensure correct mime type
128 |           blob: data.bytes,
129 |         };
130 |       } else {
131 |         throw new Error(`Screenshot resource not found: ${name}`);
132 |       }
133 |     } else {
134 |       throw new Error(`Resource URI format not recognized: ${uri}`);
135 |     }
136 |   }
137 | 
138 |   addScreenshot(name: string, format: "png" | "jpeg", bytes: string): void {
139 |     const uri = `mcp://screenshots/${name}`;
140 |     this.screenshotResources.set(name, { format, bytes, uri });
141 |     this.server.notification({
142 |       method: "resources/list_changed",
143 |       params: {},
144 |     });
145 |   }
146 | 
147 |   // --- Session and Tool Execution ---
148 | 
149 |   public async getActivePage(): Promise<BrowserSession["page"] | null> {
150 |     const session = await getSession(this.currentSessionId, this.config);
151 |     if (!session || !session.page || session.page.isClosed()) {
152 |       try {
153 |         // getSession does not support a refresh flag currently.
154 |         // If a session is invalid, it needs to be recreated or re-established upstream.
155 |         // For now, just return null if the fetched session is invalid.
156 |         const currentSession = await getSession(
157 |           this.currentSessionId,
158 |           this.config
159 |         );
160 |         if (
161 |           !currentSession ||
162 |           !currentSession.page ||
163 |           currentSession.page.isClosed()
164 |         ) {
165 |           return null;
166 |         }
167 |         return currentSession.page;
168 |       } catch (refreshError) {
169 |         return null;
170 |       }
171 |     }
172 |     return session.page;
173 |   }
174 | 
175 |   public async getActiveBrowser(): Promise<BrowserSession["browser"] | null> {
176 |     const session = await getSession(this.currentSessionId, this.config);
177 |     if (!session || !session.browser || !session.browser.isConnected()) {
178 |       try {
179 |         // getSession does not support a refresh flag currently.
180 |         const currentSession = await getSession(
181 |           this.currentSessionId,
182 |           this.config
183 |         );
184 |         if (
185 |           !currentSession ||
186 |           !currentSession.browser ||
187 |           !currentSession.browser.isConnected()
188 |         ) {
189 |           return null;
190 |         }
191 |         return currentSession.browser;
192 |       } catch (refreshError) {
193 |         return null;
194 |       }
195 |     }
196 |     return session.browser;
197 |   }
198 | 
199 |   /**
200 |    * Get the active browser without triggering session creation.
201 |    * This is a read-only operation used when we need to check for an existing browser
202 |    * without side effects (e.g., during close operations).
203 |    * @returns The browser if it exists and is connected, null otherwise
204 |    */
205 |   public getActiveBrowserReadOnly(): BrowserSession["browser"] | null {
206 |     const session = getSessionReadOnly(this.currentSessionId);
207 |     if (!session || !session.browser || !session.browser.isConnected()) {
208 |       return null;
209 |     }
210 |     return session.browser;
211 |   }
212 | 
213 |   /**
214 |    * Get the active page without triggering session creation.
215 |    * This is a read-only operation used when we need to check for an existing page
216 |    * without side effects.
217 |    * @returns The page if it exists and is not closed, null otherwise
218 |    */
219 |   public getActivePageReadOnly(): BrowserSession["page"] | null {
220 |     const session = getSessionReadOnly(this.currentSessionId);
221 |     if (!session || !session.page || session.page.isClosed()) {
222 |       return null;
223 |     }
224 |     return session.page;
225 |   }
226 | 
227 |   public async waitForTimeout(timeoutMillis: number): Promise<void> {
228 |     return new Promise((resolve) => setTimeout(resolve, timeoutMillis));
229 |   }
230 | 
231 |   private createErrorResult(message: string, toolName: string): CallToolResult {
232 |     return {
233 |       content: [{ type: "text", text: `Error: ${message}` }],
234 |       isError: true,
235 |     };
236 |   }
237 | 
238 |   // --- Refactored Action Execution with Retries ---
239 |   private async executeRefAction(
240 |     toolName: string,
241 |     validatedArgs: any,
242 |     actionFn: (
243 |       page: Page,
244 |       identifier: string | undefined,
245 |       args: any,
246 |       locator: Locator | undefined,
247 |       identifierType: "ref" | "selector" | "none"
248 |     ) => Promise<ToolActionResult | void | string>,
249 |     requiresIdentifier: boolean = true
250 |   ): Promise<{ resultText: string; actionResult?: ToolActionResult | void }> {
251 |     let lastError: Error | null = null;
252 |     let page: Page | null = null;
253 |     let actionResult: ToolActionResult | void | undefined;
254 |     let resultText = "";
255 |     let identifier: string | undefined = undefined;
256 |     let identifierType: "ref" | "selector" | "none" = "none";
257 | 
258 |     // --- Get page and snapshot BEFORE the loop ---
259 |     page = await this.getActivePage();
260 |     if (!page) {
261 |       throw new Error("Failed to get active page before action attempts.");
262 |     }
263 | 
264 |     // Get the CURRENT latest snapshot - DO NOT capture a new one here.
265 |     const snapshot = this.latestSnapshots.get(this.currentSessionId);
266 |     const initialSnapshotIdentifier =
267 |       snapshot?.text().substring(0, 60).replace(/\\n/g, "\\\\n") ??
268 |       "[No Snapshot]";
269 | 
270 |     let locator: Locator | undefined;
271 | 
272 |     // --- Resolve locator: Prioritize selector, then ref ---
273 |     if (validatedArgs?.selector) {
274 |       identifier = validatedArgs.selector;
275 |       identifierType = "selector";
276 |       if (!identifier) {
277 |         throw new Error(
278 |           `Missing required 'selector' argument for tool ${toolName}.`
279 |         );
280 |       }
281 |       try {
282 |         locator = page.locator(identifier);
283 |       } catch (locatorError) {
284 |         throw new Error(
285 |           `Failed to create locator for selector '${identifier}': ${
286 |             locatorError instanceof Error
287 |               ? locatorError.message
288 |               : String(locatorError)
289 |           }`
290 |         );
291 |       }
292 |     } else if (validatedArgs?.ref) {
293 |       identifier = validatedArgs.ref;
294 |       identifierType = "ref";
295 |       if (!identifier) {
296 |         throw new Error(
297 |           `Missing required 'ref' argument for tool ${toolName}.`
298 |         );
299 |       }
300 |       if (!snapshot) {
301 |         throw new Error(
302 |           `Cannot resolve ref '${identifier}' because no snapshot is available for session ${this.currentSessionId}. Capture a snapshot or ensure one exists.`
303 |         );
304 |       }
305 |       try {
306 |         // Resolve using the snapshot we just retrieved
307 |         locator = snapshot.refLocator(identifier);
308 |       } catch (locatorError) {
309 |         // Use the existing snapshot identifier in the error
310 |         throw new Error(
311 |           `Failed to resolve ref ${identifier} using existing snapshot ${initialSnapshotIdentifier} before action attempt: ${
312 |             locatorError instanceof Error
313 |               ? locatorError.message
314 |               : String(locatorError)
315 |           }`
316 |         );
317 |       }
318 |     } else if (requiresIdentifier) {
319 |       // If neither ref nor selector is provided, but one is required
320 |       throw new Error(
321 |         `Missing required 'ref' or 'selector' argument for tool ${toolName}.`
322 |       );
323 |     } else {
324 |       // No identifier needed or provided
325 |       identifierType = "none"; // Explicitly set to none
326 |     }
327 | 
328 |     // --- Single Attempt ---
329 |     try {
330 |       // Pass page, the used identifier (selector or ref), args, the resolved locator, and identifierType
331 |       const actionFnResult = await actionFn(
332 |         page,
333 |         identifier,
334 |         validatedArgs,
335 |         locator,
336 |         identifierType
337 |       );
338 | 
339 |       if (typeof actionFnResult === "string") {
340 |         resultText = actionFnResult;
341 |         actionResult = undefined;
342 |       } else {
343 |         actionResult = actionFnResult;
344 |         const content = actionResult?.content;
345 |         if (Array.isArray(content) && content.length > 0) {
346 |           resultText =
347 |             content
348 |               .map((c: { type: string; text?: string }) =>
349 |                 c.type === "text" ? c.text : `[${c.type}]`
350 |               )
351 |               .filter(Boolean)
352 |               .join(" ") || `${toolName} action completed.`;
353 |         } else {
354 |           resultText = `${toolName} action completed successfully.`;
355 |         }
356 |       }
357 |       lastError = null;
358 |       return { resultText, actionResult };
359 |     } catch (error: any) {
360 |       throw new Error(
361 |         `Action ${toolName} failed: ${
362 |           error instanceof Error ? error.message : String(error)
363 |         }`
364 |       );
365 |     }
366 |   }
367 | 
368 |   async run(tool: Tool<any>, args: any): Promise<CallToolResult> {
369 |     const toolName = tool.schema.name;
370 |     let initialPage: Page | null = null;
371 |     let initialBrowser: BrowserSession["browser"] | null = null;
372 |     let toolResultFromHandle: ToolResult | null = null; // Legacy handle result
373 |     let finalResult: CallToolResult = {
374 |       // Initialize finalResult here
375 |       content: [{ type: "text", text: `Initialization error for ${toolName}` }],
376 |       isError: true,
377 |     };
378 | 
379 |     const logPrefix = `[Context.run ${toolName}] ${new Date().toISOString()}:`;
380 | 
381 |     let validatedArgs: any;
382 |     try {
383 |       validatedArgs = tool.schema.inputSchema.parse(args);
384 |     } catch (error) {
385 |       if (error instanceof z.ZodError) {
386 |         const errorMsg = error.issues.map((issue) => issue.message).join(", ");
387 |         return this.createErrorResult(
388 |           `Input validation failed: ${errorMsg}`,
389 |           toolName
390 |         );
391 |       }
392 |       return this.createErrorResult(
393 |         `Input validation failed: ${
394 |           error instanceof Error ? error.message : String(error)
395 |         }`,
396 |         toolName
397 |       );
398 |     }
399 | 
400 |     const previousSessionId = this.currentSessionId;
401 |     if (
402 |       validatedArgs?.sessionId &&
403 |       validatedArgs.sessionId !== this.currentSessionId
404 |     ) {
405 |       this.currentSessionId = validatedArgs.sessionId;
406 |       this.clearLatestSnapshot();
407 |     }
408 | 
409 |     if (toolName !== "browserbase_session_create") {
410 |       try {
411 |         const session = await getSession(this.currentSessionId, this.config);
412 |         if (
413 |           !session ||
414 |           !session.page ||
415 |           session.page.isClosed() ||
416 |           !session.browser ||
417 |           !session.browser.isConnected()
418 |         ) {
419 |           if (this.currentSessionId !== previousSessionId) {
420 |             this.currentSessionId = previousSessionId;
421 |           }
422 |           throw new Error(
423 |             `Session ${this.currentSessionId} is invalid or browser/page is not available.`
424 |           );
425 |         }
426 |         initialPage = session.page;
427 |         initialBrowser = session.browser;
428 |       } catch (sessionError) {
429 |         return this.createErrorResult(
430 |           `Error retrieving or validating session ${this.currentSessionId}: ${
431 |             sessionError instanceof Error
432 |               ? sessionError.message
433 |               : String(sessionError)
434 |           }`,
435 |           toolName
436 |         );
437 |       }
438 |     }
439 | 
440 |     let toolActionOutput: ToolActionResult | undefined = undefined; // New variable to store direct tool action output
441 |     let actionSucceeded = false;
442 |     let shouldCaptureSnapshotAfterAction = false;
443 |     let postActionSnapshot: PageSnapshot | undefined = undefined;
444 | 
445 |     try {
446 |       let actionToRun: (() => Promise<ToolActionResult>) | undefined =
447 |         undefined;
448 |       let shouldCaptureSnapshot = false;
449 | 
450 |       try {
451 |         if ("handle" in tool && typeof tool.handle === "function") {
452 |           toolResultFromHandle = await tool.handle(this as any, validatedArgs);
453 |           actionToRun = toolResultFromHandle?.action;
454 |           shouldCaptureSnapshot =
455 |             toolResultFromHandle?.captureSnapshot ?? false;
456 |           shouldCaptureSnapshotAfterAction = shouldCaptureSnapshot;
457 |         } else {
458 |           throw new Error(
459 |             `Tool ${toolName} could not be handled (no handle method).`
460 |           );
461 |         }
462 | 
463 |         if (actionToRun) {
464 |           toolActionOutput = await actionToRun();
465 |           actionSucceeded = true;
466 |         } else {
467 |           throw new Error(`Tool ${toolName} handled without action.`);
468 |         }
469 |       } catch (error) {
470 |         process.stderr.write(
471 |           `${logPrefix} Error executing tool ${toolName}: ${
472 |             error instanceof Error ? error.message : String(error)
473 |           }\\n`
474 |         ); 
475 |         if (error instanceof Error && error.stack) {
476 |           process.stderr.write(`${logPrefix} Stack Trace: ${error.stack}\\n`);
477 |         }
478 |         // -----------------------
479 |         finalResult = this.createErrorResult(
480 |           `Execution failed: ${
481 |             error instanceof Error ? error.message : String(error)
482 |           }`,
483 |           toolName
484 |         );
485 |         actionSucceeded = false;
486 |         shouldCaptureSnapshotAfterAction = false;
487 |         if (
488 |           this.currentSessionId !== previousSessionId &&
489 |           toolName !== "browserbase_session_create"
490 |         ) {
491 |           this.currentSessionId = previousSessionId;
492 |         }
493 |       } finally {
494 |         if (actionSucceeded && shouldCaptureSnapshotAfterAction) {
495 |           const preSnapshotDelay = 500;
496 |           await this.waitForTimeout(preSnapshotDelay);
497 |           try {
498 |             postActionSnapshot = await this.captureSnapshot();
499 |             if (postActionSnapshot) {
500 |               process.stderr.write(
501 |                 `[Context.run ${toolName}] Added snapshot to final result text.\n`
502 |               );
503 |             } else {
504 |               process.stderr.write(
505 |                 `[Context.run ${toolName}] WARN: Snapshot was expected after action but failed to capture.\n`
506 |               ); // Keep warning
507 |             }
508 |           } catch (postSnapError) {
509 |             process.stderr.write(
510 |               `[Context.run ${toolName}] WARN: Error capturing post-action snapshot: ${
511 |                 postSnapError instanceof Error
512 |                   ? postSnapError.message
513 |                   : String(postSnapError)
514 |               }\n`
515 |             ); // Keep warning
516 |           }
517 |         } else if (
518 |           actionSucceeded &&
519 |           toolName === "browserbase_snapshot" &&
520 |           !postActionSnapshot
521 |         ) {
522 |           postActionSnapshot = this.latestSnapshots.get(this.currentSessionId);
523 |         }
524 | 
525 |         if (actionSucceeded) {
526 |           const finalContentItems: (TextContent | ImageContent)[] = [];
527 | 
528 |           // 1. Add content from the tool action itself
529 |           if (toolActionOutput?.content && toolActionOutput.content.length > 0) {
530 |             finalContentItems.push(...toolActionOutput.content);
531 |           } else {
532 |             // If toolActionOutput.content is empty/undefined but action succeeded,
533 |             // provide a generic success message.
534 |             finalContentItems.push({ type: "text", text: `${toolName} action completed successfully.` });
535 |           }
536 | 
537 |           // 2. Prepare and add additional textual information (URL, Title, Snapshot)
538 |           const additionalInfoParts: string[] = [];
539 |           // Use read-only version to avoid creating sessions after close
540 |           const currentPage = this.getActivePageReadOnly();
541 | 
542 |           if (currentPage) {
543 |             try {
544 |               const url = currentPage.url();
545 |               const title = await currentPage
546 |                 .title()
547 |                 .catch(() => "[Error retrieving title]");
548 |               additionalInfoParts.push(`- Page URL: ${url}`);
549 |               additionalInfoParts.push(`- Page Title: ${title}`);
550 |             } catch (pageStateError) {
551 |               additionalInfoParts.push(
552 |                 "- [Error retrieving page state after action]"
553 |               );
554 |             }
555 |           } else {
556 |             additionalInfoParts.push("- [Page unavailable after action]");
557 |           }
558 | 
559 |           const snapshotToAdd = postActionSnapshot;
560 |           if (snapshotToAdd) {
561 |             additionalInfoParts.push(
562 |               `- Page Snapshot\n\`\`\`yaml\n${snapshotToAdd.text()}\n\`\`\`\n`
563 |             );
564 |           } else {
565 |             additionalInfoParts.push(
566 |               `- [No relevant snapshot available after action]`
567 |             );
568 |           }
569 | 
570 |           // 3. Add the additional information as a new TextContent item if it's not empty
571 |           if (additionalInfoParts.length > 0) {
572 |             // Add leading newlines if there's preceding content, to maintain separation
573 |             const additionalInfoText = (finalContentItems.length > 0 ? "\\n\\n" : "") + additionalInfoParts.join("\\n");
574 |             finalContentItems.push({ type: "text", text: additionalInfoText });
575 |           }
576 | 
577 |           finalResult = {
578 |             content: finalContentItems,
579 |             isError: false,
580 |           };
581 |         } else {
582 |           // Error result is already set in catch block, but ensure it IS set.
583 |           if (!finalResult || !finalResult.isError) {
584 |             finalResult = this.createErrorResult(
585 |               `Unknown error occurred during ${toolName}`,
586 |               toolName
587 |             );
588 |           }
589 |         }
590 |         return finalResult;
591 |       }
592 |     } catch (error) {
593 |       process.stderr.write(
594 |         `${logPrefix} Error running tool ${toolName}: ${
595 |           error instanceof Error ? error.message : String(error)
596 |         }\n`
597 |       );
598 |       throw error;
599 |     }
600 |   }
601 | }
602 | 


--------------------------------------------------------------------------------
/browserbase/src/index.ts:
--------------------------------------------------------------------------------
  1 | import dotenv from "dotenv";
  2 | dotenv.config();
  3 | 
  4 | import { Config } from "../config.js";
  5 | import type { Tool } from "./tools/tool.js";
  6 | 
  7 | import navigate from "./tools/navigate.js";
  8 | import snapshot from "./tools/snapshot.js";
  9 | import keyboard from "./tools/keyboard.js";
 10 | import getText from "./tools/getText.js";
 11 | import session from "./tools/session.js";
 12 | import common from "./tools/common.js";
 13 | import contextTools from "./tools/context.js";
 14 | 
 15 | import { Server } from "@modelcontextprotocol/sdk/server/index.js";
 16 | import { CallToolRequestSchema, ListResourcesRequestSchema, ListToolsRequestSchema, ReadResourceRequestSchema } from "@modelcontextprotocol/sdk/types.js";
 17 | import { z } from "zod";
 18 | import { zodToJsonSchema } from "zod-to-json-schema";
 19 | import { Context } from "./context.js";
 20 | 
 21 | // Environment variables configuration
 22 | const requiredEnvVars = {
 23 |   BROWSERBASE_API_KEY: process.env.BROWSERBASE_API_KEY,
 24 |   BROWSERBASE_PROJECT_ID: process.env.BROWSERBASE_PROJECT_ID,
 25 | };
 26 | 
 27 | // Validate required environment variables
 28 | Object.entries(requiredEnvVars).forEach(([name, value]) => {
 29 |   if (!value) throw new Error(`${name} environment variable is required`);
 30 | });
 31 | 
 32 | export async function createServer(config: Config): Promise<Server> {
 33 |   // Create the server
 34 |   const server = new Server(
 35 |     { name: "mcp-server-browserbase", version: "0.5.1" },
 36 |     {
 37 |       capabilities: {
 38 |         resources: { list: true, read: true },
 39 |         tools: { list: true, call: true },
 40 |         prompts: { list: true, get: true },
 41 |         notifications: { resources: { list_changed: true } },
 42 |       },
 43 |     }
 44 |   ); 
 45 | 
 46 |   // Create the context, passing server instance and config
 47 |   const context = new Context(server, config);
 48 | 
 49 |   const tools: Tool<any>[] = [
 50 |     ...common,
 51 |     ...snapshot,
 52 |     ...keyboard,
 53 |     ...getText,
 54 |     ...navigate,
 55 |     ...session,
 56 |     ...contextTools,
 57 |   ];
 58 | 
 59 |   const toolsMap = new Map(tools.map(tool => [tool.schema.name, tool]));
 60 |    // --- Setup Request Handlers ---
 61 | 
 62 |   server.setRequestHandler(ListResourcesRequestSchema, async () => {
 63 |     return { resources: context.listResources() }; 
 64 |   });
 65 | 
 66 |   server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
 67 |     try {
 68 |       const resourceContent = context.readResource(request.params.uri.toString());
 69 |       return { contents: [resourceContent] };
 70 |     } catch (error) {
 71 |       // Keep this error log
 72 |       console.error(`Error reading resource via context: ${error}`);
 73 |       throw error;
 74 |     }
 75 |   });
 76 | 
 77 |   server.setRequestHandler(ListToolsRequestSchema, async () => {
 78 |     return {
 79 |       tools: tools.map(tool => {
 80 |         let finalInputSchema;
 81 |         // Check if inputSchema is a Zod schema before converting
 82 |         if (tool.schema.inputSchema instanceof z.Schema) {
 83 |           // Add type assertion to help compiler
 84 |           finalInputSchema = zodToJsonSchema(tool.schema.inputSchema as any);
 85 |         } else if (typeof tool.schema.inputSchema === 'object' && tool.schema.inputSchema !== null) {
 86 |           // Assume it's already a valid JSON schema object
 87 |           finalInputSchema = tool.schema.inputSchema;
 88 |         } else {
 89 |           // Fallback or error handling if schema is neither
 90 |           // Keep this error log
 91 |           console.error(`Warning: Tool '${tool.schema.name}' has an unexpected inputSchema type.`);
 92 |           finalInputSchema = { type: "object" }; // Default to empty object schema
 93 |         }
 94 |         
 95 |         return {
 96 |           name: tool.schema.name,
 97 |           description: tool.schema.description,
 98 |           inputSchema: finalInputSchema,
 99 |         };
100 |       }),
101 |     };
102 |   });
103 | 
104 |   server.setRequestHandler(CallToolRequestSchema, async (request) => {
105 |     const logError = (message: string) => {
106 |       // Ensure error logs definitely go to stderr
107 |       process.stderr.write(`[server.ts Error] ${new Date().toISOString()} ${message}\\n`);
108 |     };
109 | 
110 |     // Use the map built from the passed-in tools
111 |     const tool = toolsMap.get(request.params.name);
112 | 
113 |     if (!tool) {
114 |       // Use the explicit error logger
115 |       logError(`Tool "${request.params.name}" not found.`);
116 |       // Return a simplified error object
117 |       return { content: [{ type: 'text', text: `Tool "${request.params.name}" not found` }], isError: true };
118 |     }
119 | 
120 |     try {
121 |       // Delegate execution to the context
122 |       const result = await context.run(tool, request.params.arguments ?? {});
123 |       return result;
124 |     } catch (error) {
125 |       // Use the explicit error logger
126 |       const errorMessage = error instanceof Error ? error.message : String(error);
127 |       logError(`Error running tool ${request.params.name} via context: ${errorMessage}`);
128 |       logError(`Original error stack (if available): ${error instanceof Error ? error.stack : 'N/A'}`); // Log stack trace
129 |       // Return a simplified error object
130 |       return { content: [{ type: 'text', text: `Failed to run tool '${request.params.name}': ${errorMessage}` }], isError: true };
131 |     }
132 |   });
133 | 
134 |   // Wrap server close to also close context
135 |   const originalClose = server.close.bind(server);
136 |   server.close = async () => {
137 |     await originalClose();
138 |   };
139 |   
140 |   // Return the configured server instance
141 |   return server;
142 | } 


--------------------------------------------------------------------------------
/browserbase/src/pageSnapshot.ts:
--------------------------------------------------------------------------------
  1 | import type { Page, FrameLocator, Locator } from 'playwright-core';
  2 | import yaml from 'yaml';
  3 | 
  4 | type PageOrFrameLocator = Page | FrameLocator;
  5 | 
  6 | export class PageSnapshot {
  7 |   private _frameLocators: PageOrFrameLocator[] = [];
  8 |   private _text!: string;
  9 | 
 10 |   constructor() {
 11 |   }
 12 | 
 13 |   static async create(page: Page): Promise<PageSnapshot> {
 14 |     const snapshot = new PageSnapshot();
 15 |     await snapshot._build(page);
 16 |     return snapshot;
 17 |   }
 18 | 
 19 |   text(): string {
 20 |     return this._text;
 21 |   }
 22 | 
 23 |   private async _build(page: Page) {
 24 |     const yamlDocument = await this._snapshotFrame(page);
 25 |     this._text = [
 26 |       `- Page Snapshot`,
 27 |       '```yaml',
 28 |       // Generate text directly from the returned document
 29 |       yamlDocument.toString({ indentSeq: false }).trim(),
 30 |       '```',
 31 |     ].join('\n');
 32 |   }
 33 | 
 34 |   private async _snapshotFrame(frame: Page | FrameLocator) {
 35 |     const frameIndex = this._frameLocators.push(frame) - 1;
 36 |     let snapshotString = '';
 37 |     try {
 38 |         snapshotString = await (frame.locator('body') as any).ariaSnapshot({ ref: true, emitGeneric: true });
 39 |     } catch (e) {
 40 |         snapshotString = `error: Could not take snapshot. Error: ${e instanceof Error ? e.message : String(e)}`;
 41 |     }
 42 | 
 43 |     const snapshot = yaml.parseDocument(snapshotString);
 44 | 
 45 |     const visit = async (node: any): Promise<unknown> => {
 46 |       if (yaml.isPair(node)) {
 47 |         await Promise.all([
 48 |           visit(node.key).then(k => node.key = k),
 49 |           visit(node.value).then(v => node.value = v)
 50 |         ]);
 51 |       } else if (yaml.isSeq(node) || yaml.isMap(node)) {
 52 |         const items = [...node.items];
 53 |         node.items = await Promise.all(items.map(visit));
 54 |       } else if (yaml.isScalar(node)) {
 55 |         if (typeof node.value === 'string') {
 56 |           const value = node.value;
 57 |           if (frameIndex > 0)
 58 |             node.value = value.replace('[ref=', `[ref=f${frameIndex}`);
 59 | 
 60 |           if (value.startsWith('iframe ')) {
 61 |             const ref = value.match(/\[ref=(.*)\]/)?.[1]; 
 62 |             if (ref) {
 63 |               try {
 64 |                 const childFrameLocator = frame.frameLocator(`aria-ref=${ref}`);
 65 |                 const childSnapshot = await this._snapshotFrame(childFrameLocator);
 66 |                 return snapshot.createPair(node.value, childSnapshot);
 67 |               } catch (error) {
 68 |                 return snapshot.createPair(node.value, '<could not take iframe snapshot>');
 69 |               }
 70 |             }
 71 |           }
 72 |         }
 73 |       }
 74 |       return node;
 75 |     };
 76 | 
 77 | 
 78 |     if (snapshot.contents) {
 79 |         await visit(snapshot.contents);
 80 |     } else {
 81 |         const emptyMapDoc = yaml.parseDocument('{}');
 82 |         snapshot.contents = emptyMapDoc.contents;
 83 |     }
 84 |     return snapshot; 
 85 |   }
 86 | 
 87 |   refLocator(ref: string): Locator {
 88 |     let frameIndex = 0;
 89 |     let frame: PageOrFrameLocator;
 90 |     let targetRef = ref;
 91 | 
 92 |     const match = ref.match(/^f(\d+)(.*)/);
 93 |     if (match) {
 94 |       frameIndex = parseInt(match[1], 10);
 95 |       targetRef = match[2];
 96 |     }
 97 | 
 98 |     if (this._frameLocators.length === 0) {
 99 |         throw new Error(`Frame locators not initialized. Cannot find frame for ref '${ref}'.`);
100 |     }
101 | 
102 |      if (frameIndex < 0 || frameIndex >= this._frameLocators.length) {
103 |         throw new Error(`Validation Error: Frame index ${frameIndex} derived from ref '${ref}' is out of bounds (found ${this._frameLocators.length} frames).`);
104 |      }
105 |      frame = this._frameLocators[frameIndex];
106 | 
107 |     if (!frame)
108 |       throw new Error(`Frame (index ${frameIndex}) could not be determined. Provide ref from the most current snapshot.`);
109 | 
110 |     return frame.locator(`aria-ref=${targetRef}`);
111 |   }
112 | }
113 | 


--------------------------------------------------------------------------------
/browserbase/src/program.ts:
--------------------------------------------------------------------------------
 1 | import { program } from 'commander';
 2 | import * as fs from 'fs';
 3 | import * as path from 'path';
 4 | import { fileURLToPath } from 'url';
 5 | 
 6 | import { createServer } from './index.js';
 7 | import { ServerList } from './server.js';
 8 | 
 9 | import { startHttpTransport, startStdioTransport } from './transport.js';
10 | 
11 | import { resolveConfig } from './config.js';
12 | 
13 | // Determine the directory of the current module
14 | const __filename = fileURLToPath(import.meta.url);
15 | const __dirname = path.dirname(__filename);
16 | 
17 | // Load package.json using fs
18 | const packageJSONPath = path.resolve(__dirname, '../package.json');
19 | const packageJSONBuffer = fs.readFileSync(packageJSONPath);
20 | const packageJSON = JSON.parse(packageJSONBuffer.toString());
21 | 
22 | program
23 |     .version('Version ' + packageJSON.version)
24 |     .name(packageJSON.name)
25 |     .option('--browserbaseApiKey <key>', 'The Browserbase API Key to use')
26 |     .option('--browserbaseProjectId <id>', 'The Browserbase Project ID to use')
27 |     .option('--proxies', 'Use Browserbase proxies.')
28 |     .option('--advancedStealth', 'Use advanced stealth mode. Only available to Browserbase Scale Plan users.')
29 |     .option('--contextId <contextId>', 'Browserbase Context ID to use.')
30 |     .option('--persist [boolean]', 'Whether to persist the Browserbase context', true)
31 |     .option('--port <port>', 'Port to listen on for SSE transport.')
32 |     .option('--host <host>', 'Host to bind server to. Default is localhost. Use 0.0.0.0 to bind to all interfaces.')
33 |     .option('--cookies [json]', 'JSON array of cookies to inject into the browser. Format: [{"name":"cookie1","value":"val1","domain":"example.com"}, ...]')
34 |     .option('--browserWidth <width>', 'Browser width to use for the browser.')
35 |     .option('--browserHeight <height>', 'Browser height to use for the browser.')
36 |     .action(async options => {
37 |       const config = await resolveConfig(options);
38 |       const serverList = new ServerList(async() => createServer(config));
39 |       setupExitWatchdog(serverList);
40 | 
41 |       if (options.port)
42 |         startHttpTransport(+options.port, options.host, serverList);
43 |       else
44 |         await startStdioTransport(serverList);
45 |     });
46 | 
47 | function setupExitWatchdog(serverList: ServerList) {
48 |   const handleExit = async () => {
49 |     setTimeout(() => process.exit(0), 15000);
50 |     await serverList.closeAll();
51 |     process.exit(0);
52 |   };
53 | 
54 |   process.stdin.on('close', handleExit);
55 |   process.on('SIGINT', handleExit);
56 |   process.on('SIGTERM', handleExit);
57 | }
58 | 
59 | program.parse(process.argv);


--------------------------------------------------------------------------------
/browserbase/src/server.ts:
--------------------------------------------------------------------------------
 1 | import { Server } from "@modelcontextprotocol/sdk/server/index.js";
 2 | 
 3 | export class ServerList {
 4 |   private _servers: Server[] = [];
 5 |   private _serverFactory: () => Promise<Server>;
 6 | 
 7 |   constructor(serverFactory: () => Promise<Server>) {
 8 |     this._serverFactory = serverFactory;
 9 |   }
10 | 
11 |   async create() {
12 |     const server = await this._serverFactory();
13 |     this._servers.push(server);
14 |     return server;
15 |   }
16 | 
17 |   async close(server: Server) {
18 |     const index = this._servers.indexOf(server);
19 |     if (index !== -1)
20 |       this._servers.splice(index, 1);
21 |     await server.close();
22 |   }
23 | 
24 |   async closeAll() {
25 |     await Promise.all(this._servers.map(server => server.close()));
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/browserbase/src/sessionManager.ts:
--------------------------------------------------------------------------------
  1 | import {
  2 |   chromium,
  3 |   Browser,
  4 |   Page,
  5 | } from "playwright-core";
  6 | import { Browserbase } from "@browserbasehq/sdk";
  7 | import type { Config } from "./config.js"; 
  8 | import { SessionCreateParams } from "@browserbasehq/sdk/src/resources/sessions/sessions.js";
  9 | import type { Cookie } from "playwright-core";
 10 | 
 11 | // Define the type for a session object
 12 | export type BrowserSession = {
 13 |   browser: Browser;
 14 |   page: Page;
 15 |   sessionId: string;
 16 | };
 17 | 
 18 | // Global state for managing browser sessions
 19 | const browsers = new Map<string, BrowserSession>();
 20 | 
 21 | // Keep track of the default session explicitly
 22 | let defaultBrowserSession: BrowserSession | null = null;
 23 | 
 24 | // Define a specific ID for the default session
 25 | export const defaultSessionId = "browserbase_session_main";
 26 | 
 27 | // Keep track of the active session ID. Defaults to the main session.
 28 | let activeSessionId: string = defaultSessionId;
 29 | 
 30 | /**
 31 |  * Sets the active session ID.
 32 |  * @param id The ID of the session to set as active.
 33 |  */
 34 | export function setActiveSessionId(id: string): void {
 35 |   if (browsers.has(id) || id === defaultSessionId) {
 36 |     activeSessionId = id;
 37 |   } else {
 38 |     process.stderr.write(
 39 |       `[SessionManager] WARN - Set active session failed for non-existent ID: ${id}\n`
 40 |     );
 41 |   }
 42 | }
 43 | 
 44 | /**
 45 |  * Gets the active session ID.
 46 |  * @returns The active session ID.
 47 |  */
 48 | export function getActiveSessionId(): string {
 49 |   return activeSessionId;
 50 | }
 51 | 
 52 | /**
 53 |  * Adds cookies to a browser context
 54 |  * @param context Playwright browser context
 55 |  * @param cookies Array of cookies to add
 56 |  */
 57 | export async function addCookiesToContext(context: any, cookies: Cookie[]): Promise<void> {
 58 |   if (!cookies || cookies.length === 0) {
 59 |     return;
 60 |   }
 61 |   
 62 |   try {
 63 |     process.stderr.write(`[SessionManager] Adding ${cookies.length} cookies to browser context\n`);
 64 |     await context.addCookies(cookies);
 65 |     process.stderr.write(`[SessionManager] Successfully added cookies to browser context\n`);
 66 |   } catch (error) {
 67 |     process.stderr.write(
 68 |       `[SessionManager] Error adding cookies to browser context: ${
 69 |         error instanceof Error ? error.message : String(error)
 70 |       }\n`
 71 |     );
 72 |   }
 73 | }
 74 | 
 75 | // Function to create a new Browserbase session and connect Playwright
 76 | export async function createNewBrowserSession(
 77 |   newSessionId: string,
 78 |   config: Config, 
 79 | ): Promise<BrowserSession> {
 80 |   if (!config.browserbaseApiKey) {
 81 |     throw new Error("Browserbase API Key is missing in the configuration.");
 82 |   }
 83 |   if (!config.browserbaseProjectId) {
 84 |     throw new Error("Browserbase Project ID is missing in the configuration.");
 85 |   }
 86 | 
 87 |   const bb = new Browserbase({
 88 |     apiKey: config.browserbaseApiKey,
 89 |   });
 90 | 
 91 |   // Prepare session creation options
 92 |   const sessionOptions: SessionCreateParams = {
 93 |     // Use non-null assertion after check
 94 |     projectId: config.browserbaseProjectId!,
 95 |     proxies: config.proxies, 
 96 |     browserSettings: {
 97 |       viewport: {
 98 |         width: config.viewPort?.browserWidth ?? 1024,
 99 |         height: config.viewPort?.browserHeight ?? 768,
100 |       },
101 |       context: config.context?.contextId ? {
102 |         id: config.context?.contextId,
103 |         persist: config.context?.persist ?? true,
104 |       } : undefined,
105 |       advancedStealth: config.advancedStealth ?? undefined,
106 |     }
107 |   };
108 | 
109 |   try {
110 |     process.stderr.write(
111 |       `[SessionManager] Creating session ${newSessionId}...\n`
112 |     );
113 |     const bbSession = await bb.sessions.create(sessionOptions);
114 |     process.stderr.write(
115 |       `[SessionManager] Browserbase session created: ${bbSession.id}\n`
116 |     );
117 | 
118 |     const browser = await chromium.connectOverCDP(bbSession.connectUrl);
119 |     process.stderr.write(
120 |       `[SessionManager] Browserbase Live Debugger URL: https://www.browserbase.com/sessions/${bbSession.id}\n`
121 |     );
122 | 
123 |     browser.on("disconnected", () => {
124 |       process.stderr.write(`[SessionManager] Disconnected: ${newSessionId}\n`);
125 |       browsers.delete(newSessionId);
126 |       if (defaultBrowserSession && defaultBrowserSession.browser === browser) {
127 |         process.stderr.write(
128 |           `[SessionManager] Disconnected (default): ${newSessionId}\n`
129 |         );
130 |         defaultBrowserSession = null;
131 |       }
132 |       if (
133 |         activeSessionId === newSessionId &&
134 |         newSessionId !== defaultSessionId
135 |       ) {
136 |         process.stderr.write(
137 |           `[SessionManager] WARN - Active session disconnected, resetting to default: ${newSessionId}\n`
138 |         );
139 |         setActiveSessionId(defaultSessionId);
140 |       }
141 |     });
142 | 
143 |     let context = browser.contexts()[0];
144 |     if (!context) {
145 |       context = await browser.newContext();
146 |     }
147 |     
148 |     // Add cookies to the context if they are provided in the config
149 |     if (config.cookies && Array.isArray(config.cookies) && config.cookies.length > 0) {
150 |       await addCookiesToContext(context, config.cookies);
151 |     }
152 |     
153 |     let page = context.pages()[0];
154 |     if (!page) {
155 |       page = await context.newPage();
156 |     }
157 | 
158 |     const sessionObj: BrowserSession = {
159 |       browser,
160 |       page,
161 |       sessionId: bbSession.id, 
162 |     };
163 | 
164 |     browsers.set(newSessionId, sessionObj);
165 | 
166 |     if (newSessionId === defaultSessionId) {
167 |       defaultBrowserSession = sessionObj;
168 |     }
169 | 
170 |     setActiveSessionId(newSessionId);
171 |     process.stderr.write(
172 |       `[SessionManager] Session created and active: ${newSessionId}\n`
173 |     );
174 | 
175 |     return sessionObj;
176 |   } catch (creationError) {
177 |     const errorMessage =
178 |       creationError instanceof Error
179 |         ? creationError.message
180 |         : String(creationError);
181 |     process.stderr.write(
182 |       `[SessionManager] Creating session ${newSessionId} failed: ${
183 |         creationError instanceof Error
184 |           ? creationError.message
185 |           : String(creationError)
186 |       }`
187 |     ); 
188 |     throw new Error(
189 |       `Failed to create/connect session ${newSessionId}: ${errorMessage}`
190 |     );
191 |   }
192 | }
193 | 
194 | async function closeBrowserGracefully(
195 |   session: BrowserSession | undefined | null,
196 |   sessionIdToLog: string
197 | ): Promise<void> {
198 |   if (session?.browser?.isConnected()) {
199 |     process.stderr.write(
200 |       `[SessionManager] Closing browser for session: ${sessionIdToLog}\n`
201 |     );
202 |     try {
203 |       await session.browser.close();
204 |     } catch (closeError) {
205 |       process.stderr.write(
206 |         `[SessionManager] WARN - Error closing browser for session ${sessionIdToLog}: ${
207 |           closeError instanceof Error ? closeError.message : String(closeError)
208 |         }\n`
209 |       );
210 |     }
211 |   }
212 | }
213 | 
214 | // Internal function to ensure default session
215 | export async function ensureDefaultSessionInternal(
216 |   config: Config
217 | ): Promise<BrowserSession> {
218 |   const sessionId = defaultSessionId;
219 |   let needsRecreation = false;
220 | 
221 |   if (!defaultBrowserSession) {
222 |     needsRecreation = true;
223 |     process.stderr.write(
224 |       `[SessionManager] Default session ${sessionId} not found, creating.\n`
225 |     );
226 |   } else if (
227 |     !defaultBrowserSession.browser.isConnected() ||
228 |     defaultBrowserSession.page.isClosed()
229 |   ) {
230 |     needsRecreation = true;
231 |     process.stderr.write(
232 |       `[SessionManager] Default session ${sessionId} is stale, recreating.\n`
233 |     );
234 |     await closeBrowserGracefully(defaultBrowserSession, sessionId);
235 |     defaultBrowserSession = null;
236 |     browsers.delete(sessionId);
237 |   }
238 | 
239 |   if (needsRecreation) {
240 |     try {
241 |       defaultBrowserSession = await createNewBrowserSession(sessionId, config);
242 |       return defaultBrowserSession;
243 |     } catch (error) {
244 |       // Error during initial creation or recreation
245 |       process.stderr.write(
246 |         `[SessionManager] Initial/Recreation attempt for default session ${sessionId} failed. Error: ${
247 |           error instanceof Error ? error.message : String(error)
248 |         }\n`
249 |       );
250 |       // Attempt one more time after a failure
251 |       process.stderr.write(
252 |         `[SessionManager] Retrying creation of default session ${sessionId} after error...\n`
253 |       );
254 |       try {
255 |         defaultBrowserSession = await createNewBrowserSession(sessionId, config);
256 |         return defaultBrowserSession;
257 |       } catch (retryError) {
258 |         const finalErrorMessage =
259 |           retryError instanceof Error
260 |             ? retryError.message
261 |             : String(retryError);
262 |         process.stderr.write(
263 |           `[SessionManager] Failed to recreate default session ${sessionId} after retry: ${finalErrorMessage}\n`
264 |         );
265 |         throw new Error(
266 |           `Failed to ensure default session ${sessionId} after initial error and retry: ${finalErrorMessage}`
267 |         );
268 |       }
269 |     }
270 |   }
271 | 
272 |   // If we reached here, the existing default session is considered okay.
273 |   setActiveSessionId(sessionId); // Ensure default is marked active
274 |   return defaultBrowserSession!; // Non-null assertion: logic ensures it's not null here
275 | }
276 | 
277 | // Get a specific session by ID
278 | export async function getSession(
279 |   sessionId: string,
280 |   config: Config
281 | ): Promise<BrowserSession | null> {
282 |   if (sessionId === defaultSessionId) {
283 |     try {
284 |       return await ensureDefaultSessionInternal(config);
285 |     } catch (error) {
286 |       // ensureDefaultSessionInternal already logs extensively
287 |       process.stderr.write(
288 |         `[SessionManager] Failed to get default session due to error in ensureDefaultSessionInternal for ${sessionId}. See previous messages for details.\n`
289 |       );
290 |       return null; // Or rethrow if getSession failing for default is critical
291 |     }
292 |   }
293 | 
294 |   // For non-default sessions
295 |   process.stderr.write(`[SessionManager] Getting session: ${sessionId}\n`);
296 |   let sessionObj = browsers.get(sessionId);
297 | 
298 |   if (!sessionObj) {
299 |     process.stderr.write(
300 |       `[SessionManager] WARN - Session not found in map: ${sessionId}\n`
301 |     );
302 |     return null;
303 |   }
304 | 
305 |   // Validate the found session
306 |   if (!sessionObj.browser.isConnected() || sessionObj.page.isClosed()) {
307 |     process.stderr.write(
308 |       `[SessionManager] WARN - Found session ${sessionId} is stale, removing.\n`
309 |     );
310 |     await closeBrowserGracefully(sessionObj, sessionId);
311 |     browsers.delete(sessionId);
312 |     if (activeSessionId === sessionId) {
313 |       process.stderr.write(
314 |         `[SessionManager] WARN - Invalidated active session ${sessionId}, resetting to default.\n`
315 |       );
316 |       setActiveSessionId(defaultSessionId);
317 |     }
318 |     return null;
319 |   }
320 | 
321 |   // Session appears valid, make it active
322 |   setActiveSessionId(sessionId);
323 |   process.stderr.write(`[SessionManager] Using valid session: ${sessionId}\n`);
324 |   return sessionObj;
325 | }
326 | 
327 | /**
328 |  * Get a session by ID without creating new sessions.
329 |  * This is a read-only operation that never triggers session creation.
330 |  * Used for operations like closing sessions where we don't want side effects.
331 |  * @param sessionId The session ID to retrieve
332 |  * @returns The session if it exists and is valid, null otherwise
333 |  */
334 | export function getSessionReadOnly(sessionId: string): BrowserSession | null {
335 |   // Check if it's the default session
336 |   if (sessionId === defaultSessionId && defaultBrowserSession) {
337 |     // Only return if it's actually connected and valid
338 |     if (defaultBrowserSession.browser.isConnected() && !defaultBrowserSession.page.isClosed()) {
339 |       return defaultBrowserSession;
340 |     }
341 |     return null;
342 |   }
343 | 
344 |   // For non-default sessions, check the browsers map
345 |   const sessionObj = browsers.get(sessionId);
346 |   if (!sessionObj) {
347 |     return null;
348 |   }
349 | 
350 |   // Validate the session is still active
351 |   if (!sessionObj.browser.isConnected() || sessionObj.page.isClosed()) {
352 |     return null;
353 |   }
354 | 
355 |   return sessionObj;
356 | }
357 | 
358 | /**
359 |  * Clean up a session by removing it from tracking.
360 |  * This is called after a browser is closed to ensure proper cleanup.
361 |  * @param sessionId The session ID to clean up
362 |  */
363 | export function cleanupSession(sessionId: string): void {
364 |   process.stderr.write(
365 |     `[SessionManager] Cleaning up session: ${sessionId}\n`
366 |   );
367 |   
368 |   // Remove from browsers map
369 |   browsers.delete(sessionId);
370 |   
371 |   // Clear default session reference if this was the default
372 |   if (sessionId === defaultSessionId && defaultBrowserSession) {
373 |     defaultBrowserSession = null;
374 |   }
375 |   
376 |   // Reset active session to default if this was the active one
377 |   if (activeSessionId === sessionId) {
378 |     process.stderr.write(
379 |       `[SessionManager] Cleaned up active session ${sessionId}, resetting to default.\n`
380 |     );
381 |     setActiveSessionId(defaultSessionId);
382 |   }
383 | }
384 | 
385 | // Function to close all managed browser sessions gracefully
386 | export async function closeAllSessions(): Promise<void> {
387 |   process.stderr.write(`[SessionManager] Closing all sessions...\n`);
388 |   const closePromises: Promise<void>[] = [];
389 |   for (const [id, session] of browsers.entries()) {
390 |     process.stderr.write(`[SessionManager] Closing session: ${id}\n`);
391 |     closePromises.push(
392 |       // Use the helper for consistent logging/error handling
393 |       closeBrowserGracefully(session, id)
394 |     );
395 |   }
396 |   try {
397 |     await Promise.all(closePromises);
398 |   } catch(e) {
399 |     // Individual errors are caught and logged by closeBrowserGracefully
400 |     process.stderr.write(
401 |       `[SessionManager] WARN - Some errors occurred during batch session closing. See individual messages.\n`
402 |     );
403 |   }
404 | 
405 |   browsers.clear();
406 |   defaultBrowserSession = null;
407 |   setActiveSessionId(defaultSessionId); // Reset active session to default
408 |   process.stderr.write(`[SessionManager] All sessions closed and cleared.\n`);
409 | }
410 | 


--------------------------------------------------------------------------------
/browserbase/src/tools/common.ts:
--------------------------------------------------------------------------------
  1 | export {}; // Ensure file is treated as a module 
  2 | 
  3 | import { z } from 'zod';
  4 | import type { Tool, ToolSchema, ToolResult } from "./tool.js"; 
  5 | import type { Context } from '../context.js'; 
  6 | import type { ToolActionResult } from '../context.js'; 
  7 | 
  8 | // --- Tool: Wait ---
  9 | const WaitInputSchema = z.object({
 10 |     time: z.number().describe("Time in seconds")
 11 | });
 12 | type WaitInput = z.infer<typeof WaitInputSchema>;
 13 | 
 14 | const waitSchema: ToolSchema<typeof WaitInputSchema> = {
 15 |     name: "browserbase_wait",
 16 |     description: "Wait for a specified time in seconds",
 17 |     inputSchema: WaitInputSchema,
 18 | };
 19 | 
 20 | // Handle function for Wait
 21 | async function handleWait(context: Context, params: WaitInput): Promise<ToolResult> { // Uses Context, returns ToolResult
 22 |     const action = async (): Promise<ToolActionResult> => {
 23 |         await new Promise(resolve => setTimeout(resolve, params.time * 1000));
 24 |         return { content: [{ type: 'text', text: `Waited for ${params.time} seconds.` }] };
 25 |     };
 26 |     return { action, code: [], captureSnapshot: false, waitForNetwork: false };
 27 | }
 28 | 
 29 | // Define tool using handle
 30 | const waitTool: Tool<typeof WaitInputSchema> = {
 31 |     capability: 'core', 
 32 |     schema: waitSchema,
 33 |     handle: handleWait,
 34 | };
 35 | 
 36 | 
 37 | // --- Tool: Close ---
 38 | const CloseInputSchema = z.object({
 39 |     random_string: z.string().optional().describe("Dummy parameter") 
 40 | });
 41 | type CloseInput = z.infer<typeof CloseInputSchema>;
 42 | 
 43 | const closeSchema: ToolSchema<typeof CloseInputSchema> = {
 44 |     name: "browserbase_close",
 45 |     description: "Close the current page...",
 46 |     inputSchema: CloseInputSchema,
 47 | };
 48 | 
 49 | // Handle function for Close
 50 | async function handleClose(context: Context, params: CloseInput): Promise<ToolResult> {
 51 |     const action = async (): Promise<ToolActionResult> => {
 52 |         const page = await context.getActivePage();
 53 |         if (page && !page.isClosed()) {
 54 |             await page.close();
 55 |             return { content: [{ type: 'text', text: `Page closed.` }] };
 56 |         } else {
 57 |             return { content: [{ type: 'text', text: `No active page to close.` }] };
 58 |         }
 59 |     };
 60 |     return { action, code: [], captureSnapshot: false, waitForNetwork: false };
 61 | }
 62 | 
 63 | // Define tool using handle
 64 | const closeTool: Tool<typeof CloseInputSchema> = {
 65 |     capability: 'core', // Add capability
 66 |     schema: closeSchema,
 67 |     handle: handleClose,
 68 | };
 69 | 
 70 | 
 71 | // --- Tool: Resize ---
 72 | const ResizeInputSchema = z.object({
 73 |     width: z.number(),
 74 |     height: z.number()
 75 | });
 76 | type ResizeInput = z.infer<typeof ResizeInputSchema>;
 77 | 
 78 | const resizeSchema: ToolSchema<typeof ResizeInputSchema> = {
 79 |     name: "browserbase_resize",
 80 |     description: "Resize window...",
 81 |     inputSchema: ResizeInputSchema,
 82 | };
 83 | 
 84 | // Handle function for Resize
 85 | async function handleResize(context: Context, params: ResizeInput): Promise<ToolResult> {
 86 |     const action = async (): Promise<ToolActionResult> => {
 87 |         const page = await context.getActivePage();
 88 |         if (page && !page.isClosed()) {
 89 |             await page.setViewportSize({ width: params.width, height: params.height });
 90 |             return { content: [{ type: 'text', text: `Resized page to ${params.width}x${params.height}.` }] };
 91 |         } else {
 92 |             return { content: [{ type: 'text', text: `No active page to resize.` }] };
 93 |         }
 94 |     };
 95 |     return { action, code: [], captureSnapshot: true, waitForNetwork: false };
 96 | }
 97 | 
 98 | // Define tool using handle
 99 | const resizeTool: Tool<typeof ResizeInputSchema> = {
100 |     capability: 'core', // Add capability
101 |     schema: resizeSchema,
102 |     handle: handleResize,
103 | };
104 | 
105 | 
106 | // Export array of tools directly
107 | export default [
108 |     waitTool,
109 |     closeTool,
110 |     resizeTool,
111 | ];


--------------------------------------------------------------------------------
/browserbase/src/tools/context.ts:
--------------------------------------------------------------------------------
  1 | import { z } from "zod";
  2 | import type { Tool, ToolSchema, ToolResult } from "./tool.js";
  3 | import type { Context } from "../context.js";
  4 | import type { ToolActionResult } from "../context.js";
  5 | import { Browserbase } from "@browserbasehq/sdk";
  6 | 
  7 | // Store contexts in memory 
  8 | const contexts = new Map<string, string>(); 
  9 | 
 10 | // --- Tool: Create Context ---
 11 | const CreateContextInputSchema = z.object({
 12 |   name: z
 13 |     .string()
 14 |     .optional()
 15 |     .describe("Optional friendly name to reference this context later (otherwise, you'll need to use the returned ID)"),
 16 | });
 17 | type CreateContextInput = z.infer<typeof CreateContextInputSchema>;
 18 | 
 19 | const createContextSchema: ToolSchema<typeof CreateContextInputSchema> = {
 20 |   name: "browserbase_context_create",
 21 |   description: "Create a new Browserbase context for reusing cookies, authentication, and cached data across browser sessions",
 22 |   inputSchema: CreateContextInputSchema,
 23 | };
 24 | 
 25 | async function handleCreateContext(
 26 |   context: Context,
 27 |   params: CreateContextInput
 28 | ): Promise<ToolResult> {
 29 |   try {
 30 |     const config = context.config;
 31 |     
 32 |     if (!config.browserbaseApiKey || !config.browserbaseProjectId) {
 33 |       throw new Error("Browserbase API Key or Project ID is missing in the configuration");
 34 |     }
 35 |     
 36 |     const bb = new Browserbase({
 37 |       apiKey: config.browserbaseApiKey,
 38 |     });
 39 | 
 40 |     console.error("Creating new Browserbase context");
 41 |     const bbContext = await bb.contexts.create({
 42 |       projectId: config.browserbaseProjectId,
 43 |     });
 44 | 
 45 |     console.error(`Successfully created context: ${bbContext.id}`);
 46 |     
 47 |     // Store context ID with optional name if provided
 48 |     const contextName = params.name || bbContext.id;
 49 |     contexts.set(contextName, bbContext.id);
 50 |     
 51 |     const result: ToolActionResult = {
 52 |       content: [
 53 |         {
 54 |           type: "text",
 55 |           text: `Created new Browserbase context with ID: ${bbContext.id}${params.name ? ` and name: ${params.name}` : ''}`,
 56 |         },
 57 |       ],
 58 |     };
 59 | 
 60 |     return {
 61 |       resultOverride: result,
 62 |       action: async () => {
 63 |         console.error("Create Context action");
 64 |         return result;
 65 |       },
 66 |       code: [],
 67 |       captureSnapshot: false,
 68 |       waitForNetwork: false,
 69 |     };
 70 |   } catch (error: any) {
 71 |     console.error(`CreateContext handle failed: ${error.message || error}`);
 72 |     throw new Error(`Failed to create Browserbase context: ${error.message || error}`);
 73 |   }
 74 | }
 75 | 
 76 | // --- Tool: Delete Context ---
 77 | const DeleteContextInputSchema = z.object({
 78 |   contextId: z
 79 |     .string()
 80 |     .optional()
 81 |     .describe("The context ID to delete (required if name not provided)"),
 82 |   name: z
 83 |     .string()
 84 |     .optional()
 85 |     .describe("The friendly name of the context to delete (required if contextId not provided)"),
 86 | });
 87 | type DeleteContextInput = z.infer<typeof DeleteContextInputSchema>;
 88 | 
 89 | const deleteContextSchema: ToolSchema<typeof DeleteContextInputSchema> = {
 90 |   name: "browserbase_context_delete",
 91 |   description: "Delete a Browserbase context when you no longer need it",
 92 |   inputSchema: DeleteContextInputSchema,
 93 | };
 94 | 
 95 | async function handleDeleteContext(
 96 |   context: Context,
 97 |   params: DeleteContextInput
 98 | ): Promise<ToolResult> {
 99 |   try {
100 |     const config = context.config;
101 |     
102 |     if (!config.browserbaseApiKey) {
103 |       throw new Error("Browserbase API Key is missing in the configuration");
104 |     }
105 |     
106 |     if (!params.contextId && !params.name) {
107 |       throw new Error("Missing required argument: either contextId or name must be provided");
108 |     }
109 | 
110 |     // Resolve context ID either directly or by name
111 |     let contextId = params.contextId;
112 |     if (!contextId && params.name) {
113 |       contextId = contexts.get(params.name);
114 |       if (!contextId) {
115 |         throw new Error(`Context with name "${params.name}" not found`);
116 |       }
117 |     }
118 | 
119 |     console.error(`Deleting Browserbase context: ${contextId}`);
120 |     
121 |     // Delete using Browserbase API
122 |     const response = await fetch(`https://api.browserbase.com/v1/contexts/${contextId}`, {
123 |       method: 'DELETE',
124 |       headers: {
125 |         'X-BB-API-Key': config.browserbaseApiKey,
126 |       },
127 |     });
128 |     
129 |     if (response.status !== 204) {
130 |       const errorText = await response.text();
131 |       throw new Error(`Failed to delete context with status ${response.status}: ${errorText}`);
132 |     }
133 |     
134 |     // Remove from local store
135 |     if (params.name) {
136 |       contexts.delete(params.name);
137 |     }
138 |     
139 |     // Delete by ID too (in case it was stored multiple ways)
140 |     for (const [name, id] of contexts.entries()) {
141 |       if (id === contextId) {
142 |         contexts.delete(name);
143 |       }
144 |     }
145 |     
146 |     console.error(`Successfully deleted context: ${contextId}`);
147 |     
148 |     const result: ToolActionResult = {
149 |       content: [
150 |         {
151 |           type: "text",
152 |           text: `Deleted Browserbase context with ID: ${contextId}`,
153 |         },
154 |       ],
155 |     };
156 | 
157 |     return {
158 |       resultOverride: result,
159 |       action: async () => {
160 |         console.error("Delete Context action");
161 |         return result;
162 |       },
163 |       code: [],
164 |       captureSnapshot: false,
165 |       waitForNetwork: false,
166 |     };
167 |   } catch (error: any) {
168 |     console.error(`DeleteContext handle failed: ${error.message || error}`);
169 |     throw new Error(`Failed to delete Browserbase context: ${error.message || error}`);
170 |   }
171 | }
172 | 
173 | // Helper function to get a context ID from name or direct ID (exported for use by session.ts)
174 | export function getContextId(nameOrId: string): string | undefined {
175 |   // First check if it's a direct context ID
176 |   if (nameOrId.length == 32) {   // 32 char uuid
177 |     return nameOrId;
178 |   }
179 |   
180 |   // Otherwise, look it up by name
181 |   return contexts.get(nameOrId);
182 | }
183 | 
184 | // Define tools
185 | const createContextTool: Tool<typeof CreateContextInputSchema> = {
186 |   capability: "core",
187 |   schema: createContextSchema,
188 |   handle: handleCreateContext,
189 | };
190 | 
191 | const deleteContextTool: Tool<typeof DeleteContextInputSchema> = {
192 |   capability: "core",
193 |   schema: deleteContextSchema,
194 |   handle: handleDeleteContext,
195 | };
196 | 
197 | // Export as an array of tools
198 | export default [createContextTool, deleteContextTool]; 


--------------------------------------------------------------------------------
/browserbase/src/tools/getText.ts:
--------------------------------------------------------------------------------
 1 | import { z } from 'zod';
 2 | import type { Tool, ToolSchema, ToolResult } from "./tool.js";
 3 | import type { Context } from '../context.js'; 
 4 | import type { ToolActionResult } from '../context.js'; 
 5 | 
 6 | // --- Tool: Get Text ---
 7 | const GetTextInputSchema = z.object({
 8 |     selector: z.string().optional().describe("Optional CSS selector to get text from. If omitted, gets text from the whole body."),
 9 | });
10 | type GetTextInput = z.infer<typeof GetTextInputSchema>;
11 | 
12 | const getTextSchema: ToolSchema<typeof GetTextInputSchema> = {
13 |     name: "browserbase_get_text",
14 |     description: "Extract text content from the page or a specific element.",
15 |     inputSchema: GetTextInputSchema,
16 | };
17 | 
18 | // Handle function for GetText
19 | async function handleGetText(context: Context, params: GetTextInput): Promise<ToolResult> {
20 |     const action = async (): Promise<ToolActionResult> => {
21 |         const page = await context.getActivePage();
22 |         if (!page) {
23 |             throw new Error('No active page found for getText');
24 |         }
25 |         try {
26 |             let textContent: string | null;
27 |             if (params.selector) {
28 |                 textContent = await page.textContent(params.selector, { timeout: 10000 });
29 |             } else {
30 |                 textContent = await page.textContent('body', { timeout: 10000 });
31 |             }
32 |             return { content: [{ type: 'text', text: textContent ?? "" }] };
33 |         } catch (error) {
34 |             console.error(`GetText action failed: ${error}`);
35 |             throw error; // Rethrow to be caught by Context.run's try/catch around handle/action
36 |         }
37 |     };
38 | 
39 |     return {
40 |         action,
41 |         code: [],
42 |         captureSnapshot: false,
43 |         waitForNetwork: false,
44 |     };
45 | }
46 | 
47 | // Define tool using handle
48 | const getTextTool: Tool<typeof GetTextInputSchema> = {
49 |     capability: 'core', // Add capability
50 |     schema: getTextSchema,
51 |     handle: handleGetText,
52 | };
53 | 
54 | export default [getTextTool]; 


--------------------------------------------------------------------------------
/browserbase/src/tools/hover.ts:
--------------------------------------------------------------------------------
1 | import type { Tool } from "./tool.js";
2 | 
3 | // Placeholder function for hover tool, accepting the flag
4 | export function hover(captureSnapshot: boolean): Tool[] {
5 |     // TODO: Implement hoverTool and potentially use flag
6 |     return [];
7 | }
8 | export default hover; 


--------------------------------------------------------------------------------
/browserbase/src/tools/keyboard.ts:
--------------------------------------------------------------------------------
 1 | import { z } from 'zod'; 
 2 | import { defineTool, type ToolFactory } from './tool.js'; 
 3 | 
 4 | const pressKey: ToolFactory = captureSnapshot => defineTool({
 5 |   capability: 'core',
 6 | 
 7 |   schema: {
 8 |     name: 'browserbase_press_key',
 9 |     description: 'Press a key on the keyboard',
10 |     inputSchema: z.object({
11 |       key: z.string().describe('Name of the key to press or a character to generate, such as `ArrowLeft` or `a`'),
12 |     }),
13 |   },
14 | 
15 |   handle: async (context, params) => {
16 |     const page = await context.getActivePage();
17 |     if (!page) {
18 |       throw new Error('No active page found for pressKey');
19 |     }
20 | 
21 |     const code = [
22 |       `// Press ${params.key}`,
23 |       `await page.keyboard.press('${params.key.replace(/'/g, "\\'")}');`, 
24 |     ];
25 | 
26 |     const action = () => page.keyboard.press(params.key); // Changed from tab.page to page
27 | 
28 |     return {
29 |       code,
30 |       action,
31 |       captureSnapshot, 
32 |       waitForNetwork: true 
33 |     };
34 |   },
35 | });
36 | 
37 | const captureSnapshotValue = true;
38 | 
39 | export default [
40 |   pressKey(captureSnapshotValue),
41 | ]; 


--------------------------------------------------------------------------------
/browserbase/src/tools/navigate.ts:
--------------------------------------------------------------------------------
  1 | import { z } from 'zod';
  2 | import { defineTool, type ToolFactory } from './tool.js';
  3 | import type { ToolActionResult } from '../context.js';
  4 | 
  5 | const navigate: ToolFactory = captureSnapshot => defineTool({
  6 |   capability: 'core',
  7 | 
  8 |   schema: {
  9 |     name: 'browserbase_navigate',
 10 |     description: 'Navigate to a URL',
 11 |     inputSchema: z.object({
 12 |       url: z.string().describe('The URL to navigate to'),
 13 |     }),
 14 |   },
 15 | 
 16 |   handle: async (context, params) => {
 17 |     const page = await context.getActivePage();
 18 |     if (!page) {
 19 |       throw new Error('No active page found for navigate');
 20 |     }
 21 |     const action = async (): Promise<ToolActionResult> => {
 22 |       await page.goto(params.url);
 23 |       return { content: [{ type: 'text', text: `Navigated to ${params.url}` }] };
 24 |     };
 25 | 
 26 |     const code = [
 27 |       `// Navigate to ${params.url}`,
 28 |       `await page.goto('${params.url}');`,
 29 |     ];
 30 | 
 31 |     return {
 32 |       action,
 33 |       code,
 34 |       captureSnapshot,
 35 |       waitForNetwork: false,
 36 |     };
 37 |   },
 38 | });
 39 | 
 40 | const goBack: ToolFactory = captureSnapshot => defineTool({
 41 |   capability: 'history',
 42 |   schema: {
 43 |     name: 'browserbase_navigate_back',
 44 |     description: 'Go back to the previous page',
 45 |     inputSchema: z.object({}),
 46 |   },
 47 | 
 48 |   handle: async context => {
 49 |     const page = await context.getActivePage();
 50 |     if (!page) {
 51 |       throw new Error('No active page found for goBack');
 52 |     }
 53 |     const action = async (): Promise<ToolActionResult> => {
 54 |       await page.goBack();
 55 |       return { content: [{ type: 'text', text: 'Navigated back' }] };
 56 |     };
 57 |     const code = [
 58 |       `// Navigate back`,
 59 |       `await page.goBack();`,
 60 |     ];
 61 | 
 62 |     return {
 63 |       action,
 64 |       code,
 65 |       captureSnapshot,
 66 |       waitForNetwork: true,
 67 |     };
 68 |   },
 69 | });
 70 | 
 71 | const goForward: ToolFactory = captureSnapshot => defineTool({
 72 |   capability: 'history',
 73 |   schema: {
 74 |     name: 'browserbase_navigate_forward',
 75 |     description: 'Go forward to the next page',
 76 |     inputSchema: z.object({}),
 77 |   },
 78 |   handle: async context => {
 79 |     const page = await context.getActivePage();
 80 |     if (!page) {
 81 |       throw new Error('No active page found for goForward');
 82 |     }
 83 |     const action = async (): Promise<ToolActionResult> => {
 84 |       await page.goForward();
 85 |       return { content: [{ type: 'text', text: 'Navigated forward' }] };
 86 |     };
 87 |     const code = [
 88 |       `// Navigate forward`,
 89 |       `await page.goForward();`,
 90 |     ];
 91 |     return {
 92 |       action,
 93 |       code,
 94 |       captureSnapshot,
 95 |       waitForNetwork: true,
 96 |     };
 97 |   },
 98 | });
 99 | 
100 | const captureSnapshotValue = true;
101 | 
102 | export default [
103 |   navigate(captureSnapshotValue),
104 |   goBack(captureSnapshotValue),
105 |   goForward(captureSnapshotValue),
106 | ]; 


--------------------------------------------------------------------------------
/browserbase/src/tools/selectOption.ts:
--------------------------------------------------------------------------------
1 | import type { Tool } from "./tool.js";
2 | 
3 | // Placeholder function for select option tool, accepting the flag
4 | export function selectOption(captureSnapshot: boolean): Tool[] {
5 |     // TODO: Implement selectOptionTool and potentially use flag
6 |     return [];
7 | }
8 | export default selectOption; 


--------------------------------------------------------------------------------
/browserbase/src/tools/session.ts:
--------------------------------------------------------------------------------
  1 | import { z } from "zod";
  2 | import type { Tool, ToolSchema, ToolResult } from "./tool.js"; 
  3 | import type { Context } from "../context.js"; 
  4 | import type { ToolActionResult } from "../context.js"; 
  5 | 
  6 | // Import SessionManager functions
  7 | import {
  8 |   createNewBrowserSession,
  9 |   defaultSessionId,
 10 |   ensureDefaultSessionInternal,
 11 |   cleanupSession,
 12 |   type BrowserSession,
 13 | } from "../sessionManager.js";
 14 | 
 15 | // --- Tool: Create Session ---
 16 | const CreateSessionInputSchema = z.object({
 17 |   // Keep sessionId optional, but clarify its role
 18 |   sessionId: z
 19 |     .string()
 20 |     .optional()
 21 |     .describe(
 22 |       "Optional session ID to use/reuse. If not provided or invalid, a new session is created."
 23 |     ),
 24 | });
 25 | type CreateSessionInput = z.infer<typeof CreateSessionInputSchema>;
 26 | 
 27 | const createSessionSchema: ToolSchema<typeof CreateSessionInputSchema> = {
 28 |   name: "browserbase_session_create", 
 29 |   description:
 30 |     "Create or reuse a cloud browser session using Browserbase. Updates the active session.", 
 31 |   inputSchema: CreateSessionInputSchema,
 32 | };
 33 | 
 34 | 
 35 | // Handle function for CreateSession using SessionManager
 36 | async function handleCreateSession(
 37 |   context: Context,
 38 |   params: CreateSessionInput
 39 | ): Promise<ToolResult> {
 40 |   const action = async (): Promise<ToolActionResult> => {
 41 |     try {
 42 |       const config = context.config; // Get config from context
 43 |       let targetSessionId: string;
 44 | 
 45 |       if (params.sessionId) {
 46 |         const projectId = config.browserbaseProjectId || '';
 47 |         targetSessionId = `${params.sessionId}_${projectId}`;
 48 |         process.stderr.write(
 49 |           `[tool.createSession] Attempting to create/assign session with specified ID: ${targetSessionId}`
 50 |         );
 51 |       } else {
 52 |         targetSessionId = defaultSessionId;
 53 |       }
 54 | 
 55 |       let session: BrowserSession;
 56 |       if (targetSessionId === defaultSessionId) {
 57 |         session = await ensureDefaultSessionInternal(config);
 58 |       } else {
 59 |         session = await createNewBrowserSession(targetSessionId, config);
 60 |       }
 61 | 
 62 |       if (!session || !session.browser || !session.page || !session.sessionId) {
 63 |         throw new Error(
 64 |           `SessionManager failed to return a valid session object with actualSessionId for ID: ${targetSessionId}`
 65 |         );
 66 |       }
 67 | 
 68 |       context.currentSessionId = targetSessionId;
 69 |       process.stderr.write(
 70 |         `[tool.connected] Successfully connected to Browserbase session. Internal ID: ${targetSessionId}, Actual ID: ${session.sessionId}`
 71 |       );
 72 | 
 73 |       process.stderr.write(`[SessionManager] Browserbase Live Debugger URL: https://www.browserbase.com/sessions/${session.sessionId}`);
 74 | 
 75 |       return {
 76 |         content: [
 77 |           {
 78 |             type: "text",
 79 |             text: `https://www.browserbase.com/sessions/${session.sessionId}`,
 80 |           },
 81 |         ],
 82 |       };
 83 |     } catch (error: any) {
 84 |       process.stderr.write(
 85 |         `[tool.createSession] Action failed: ${
 86 |           error.message || String(error)
 87 |         }`
 88 |       );
 89 |       // Re-throw to be caught by Context.run's error handling for actions
 90 |       throw new Error(
 91 |         `Failed to create Browserbase session: ${
 92 |           error.message || String(error)
 93 |         }`
 94 |       );
 95 |     }
 96 |   };
 97 | 
 98 |   // Return the ToolResult structure expected by Context.run
 99 |   return {
100 |     action: action, 
101 |     captureSnapshot: false, 
102 |     code: [],  
103 |     waitForNetwork: false, 
104 |   };
105 | }
106 | 
107 | // Define tool using handle
108 | const createSessionTool: Tool<typeof CreateSessionInputSchema> = {
109 |   capability: "core", // Add capability
110 |   schema: createSessionSchema,
111 |   handle: handleCreateSession,
112 | };
113 | 
114 | // --- Tool: Close Session ---
115 | const CloseSessionInputSchema = z.object({
116 |   random_string: z
117 |     .string()
118 |     .optional()
119 |     .describe("Dummy parameter to ensure consistent tool call format."),
120 | });
121 | type CloseSessionInput = z.infer<typeof CloseSessionInputSchema>;
122 | 
123 | const closeSessionSchema: ToolSchema<typeof CloseSessionInputSchema> = {
124 |   name: "browserbase_session_close",
125 |   description:
126 |     "Closes the current Browserbase session by disconnecting the Playwright browser. This will terminate the recording for the session.",
127 |   inputSchema: CloseSessionInputSchema,
128 | };
129 | 
130 | async function handleCloseSession(
131 |   context: Context,
132 |   _params: CloseSessionInput
133 | ): Promise<ToolResult> {
134 |   const code = [`// Attempting to close the current Browserbase session.`];
135 | 
136 |   const action = async (): Promise<ToolActionResult> => {
137 |     // Store the current session ID before it's potentially changed.
138 |     // This allows us to reference the original session ID later if needed.
139 |     const previousSessionId = context.currentSessionId; // Capture the ID before any changes
140 |     let browser: BrowserSession["browser"] | null = null;
141 |     let browserClosedSuccessfully = false;
142 |     let browserCloseErrorMessage = "";
143 | 
144 |     // Step 1: Attempt to get the active browser instance WITHOUT creating a new one
145 |     try {
146 |       // Use read-only version to avoid creating new sessions
147 |       browser = context.getActiveBrowserReadOnly();
148 |     } catch (error: any) {
149 |       process.stderr.write(
150 |         `[tool.closeSession] Error retrieving active browser (session ID was ${previousSessionId || 'default/unknown'}): ${error.message || String(error)}`
151 |       );
152 |       // If we can't even get the browser, we can't close it.
153 |       // We will still proceed to reset context.
154 |     }
155 | 
156 |     // Step 2: If a browser instance was retrieved, attempt to close it
157 |     if (browser) {
158 |       try {
159 |         process.stderr.write(
160 |           `[tool.closeSession] Attempting to close browser for session: ${previousSessionId || 'default (actual might differ)'}`
161 |         );
162 |         await browser.close();
163 |         browserClosedSuccessfully = true;
164 |         process.stderr.write(
165 |           `[tool.closeSession] Browser connection for session (was ${previousSessionId}) closed.`
166 |         );
167 | 
168 |         // Clean up the session from tracking
169 |         cleanupSession(previousSessionId);
170 | 
171 |         process.stderr.write(
172 |           `[tool.closeSession] View session replay at https://www.browserbase.com/sessions/${previousSessionId}`
173 |         );
174 |         
175 |       } catch (error: any) {
176 |         browserCloseErrorMessage = error.message || String(error);
177 |         process.stderr.write(
178 |           `[tool.closeSession] Error during browser.close() for session (was ${previousSessionId}): ${browserCloseErrorMessage}`
179 |         );
180 |       }
181 |     } else {
182 |       process.stderr.write(
183 |         `[tool.closeSession] No active browser instance found to close. (Session ID in context was: ${previousSessionId || 'default/unknown'}).`
184 |       );
185 |     }
186 | 
187 |     // Step 3: Always reset the context's current session ID to default
188 |     // and clear snapshot if the previous session was a specific one.
189 |     const oldContextSessionId = context.currentSessionId; // This should effectively be 'previousSessionId'
190 |     context.currentSessionId = defaultSessionId;
191 |     if (oldContextSessionId && oldContextSessionId !== defaultSessionId) {
192 |       context.clearLatestSnapshot();
193 |       process.stderr.write(
194 |         `[tool.closeSession] Snapshot cleared for previous session: ${oldContextSessionId}.`
195 |       );
196 |     }
197 |     process.stderr.write(
198 |       `[tool.closeSession] Session context reset to default. Previous context session ID was ${oldContextSessionId || 'default/unknown'}.`
199 |     );
200 | 
201 |     // Step 4: Determine the result message
202 |     if (browser && !browserClosedSuccessfully) { // An attempt was made to close, but it failed
203 |       throw new Error(
204 |         `Failed to close the Browserbase browser (session ID in context was ${previousSessionId || 'default/unknown'}). Error: ${browserCloseErrorMessage}. Session context has been reset to default.`
205 |       );
206 |     }
207 | 
208 |     if (browserClosedSuccessfully) { // Browser was present and closed
209 |       let successMessage = `Browserbase session (associated with context ID ${previousSessionId || 'default'}) closed successfully. Context reset to default.`;
210 |       if (previousSessionId && previousSessionId !== defaultSessionId) {
211 |         successMessage += ` If this was a uniquely named session (${previousSessionId}), view replay (if available) at https://browserbase.com/sessions`;
212 |       }
213 |       return { content: [{ type: "text", text: successMessage }] };
214 |     }
215 | 
216 |     // No browser was found, or browser was null initially.
217 |     let infoMessage = "No active browser instance was found to close. Session context has been reset to default.";
218 |     if (previousSessionId && previousSessionId !== defaultSessionId) {
219 |        // This means a specific session was in context, but no browser for it.
220 |        infoMessage = `No active browser found for session ID '${previousSessionId}' in context. The context has been reset to default.`;
221 |     }
222 |     return { content: [{ type: "text", text: infoMessage }] };
223 |   };
224 | 
225 |   return {
226 |     action: action,
227 |     code: code,
228 |     captureSnapshot: false,
229 |     waitForNetwork: false,
230 |   };
231 | }
232 | 
233 | const closeSessionTool: Tool<typeof CloseSessionInputSchema> = {
234 |   capability: "core",
235 |   schema: closeSessionSchema,
236 |   handle: handleCloseSession,
237 | };
238 | 
239 | export default [createSessionTool, closeSessionTool];


--------------------------------------------------------------------------------
/browserbase/src/tools/snapshot.ts:
--------------------------------------------------------------------------------
  1 | import { z } from "zod";
  2 | import type {
  3 |   TextContent,
  4 |   ImageContent,
  5 | } from "@modelcontextprotocol/sdk/types.js";
  6 | import type { Locator, PageScreenshotOptions } from "playwright-core";
  7 | 
  8 | import { defineTool, type ToolResult,  } from "./tool.js";
  9 | import type { Context, ToolActionResult } from "../context.js"; 
 10 | import { PageSnapshot } from "../pageSnapshot.js"; 
 11 | import { outputFile } from "../config.js"; 
 12 | 
 13 | // --- Tool: Snapshot ---
 14 | const SnapshotInputSchema = z.object({});
 15 | type SnapshotInput = z.infer<typeof SnapshotInputSchema>;
 16 | 
 17 | const snapshot = defineTool<typeof SnapshotInputSchema>({
 18 |   capability: "core",
 19 |   schema: {
 20 |     name: "browserbase_snapshot",
 21 |     description:
 22 |       "Capture a new accessibility snapshot of the current page state. Use this if the page has changed to ensure subsequent actions use an up-to-date page representation.",
 23 |     inputSchema: SnapshotInputSchema,
 24 |   },
 25 | 
 26 |   handle: async (
 27 |     context: Context,
 28 |     params: SnapshotInput
 29 |   ): Promise<ToolResult> => {
 30 |     const action = async (): Promise<ToolActionResult> => {
 31 |       const content: (TextContent | ImageContent)[] = [
 32 |         { type: "text", text: "Accessibility snapshot captured." },
 33 |       ];
 34 |       return { content };
 35 |     };
 36 | 
 37 |     return {
 38 |       action,
 39 |       code: [`// Request accessibility snapshot`],
 40 |       captureSnapshot: true,
 41 |       waitForNetwork: false,
 42 |       resultOverride: {
 43 |         content: [{ type: "text", text: "Accessibility snapshot initiated." }],
 44 |       },
 45 |     };
 46 |   },
 47 | });
 48 | 
 49 | // --- Element Schema & Types ---
 50 | const elementSchema = z.object({
 51 |   element: z.string().describe("Human-readable element description"),
 52 |   ref: z
 53 |     .string()
 54 |     .describe("Exact target element reference from the page snapshot"),
 55 | });
 56 | type ElementInput = z.infer<typeof elementSchema>;
 57 | 
 58 | // --- Tool: Click (Adapted Handle, Example Action) ---
 59 | const click = defineTool({
 60 |   capability: "core",
 61 |   schema: {
 62 |     name: "browserbase_click",
 63 |     description: "Perform click on a web page using ref",
 64 |     inputSchema: elementSchema,
 65 |   },
 66 |   handle: async (
 67 |     context: Context,
 68 |     params: ElementInput
 69 |   ): Promise<ToolResult> => {
 70 |     // Get locator directly from snapshot
 71 |     const snapshot = context.snapshotOrDie();
 72 |     const locator = snapshot.refLocator(params.ref);
 73 | 
 74 |     const code = [
 75 |       `// Click ${params.element}`,
 76 |       // Use generateLocator for code string
 77 |       `// await page.${await generateLocator(locator)}.click();`,
 78 |     ];
 79 | 
 80 |     const action = async (): Promise<ToolActionResult> => {
 81 |       try {
 82 |         // Use the locator directly for the action
 83 |         await locator.click({ force: true, timeout: 30000 }); // Increased timeout like logs
 84 |       } catch (actionError) {
 85 |         const errorMessage =
 86 |           actionError instanceof Error
 87 |             ? actionError.message
 88 |             : String(actionError);
 89 |         throw new Error(
 90 |           `Failed to click element '${params.element}'. Error: ${errorMessage}`
 91 |         );
 92 |       }
 93 |       return {
 94 |         content: [{ type: "text", text: `Clicked ${params.element}` }],
 95 |       };
 96 |     };
 97 | 
 98 |     return {
 99 |       code,
100 |       action,
101 |       captureSnapshot: true,
102 |       waitForNetwork: true,
103 |     };
104 |   },
105 | });
106 | 
107 | // --- Tool: Drag (Adapted Handle, Example Action) ---
108 | const dragInputSchema = z.object({
109 |   startElement: z.string().describe("Source element description"),
110 |   startRef: z
111 |     .string()
112 |     .describe("Exact source element reference from the page snapshot"),
113 |   endElement: z.string().describe("Target element description"),
114 |   endRef: z
115 |     .string()
116 |     .describe("Exact target element reference from the page snapshot"),
117 | });
118 | type DragInput = z.infer<typeof dragInputSchema>;
119 | 
120 | const drag = defineTool<typeof dragInputSchema>({
121 |   capability: "core",
122 |   schema: {
123 |     name: "browserbase_drag",
124 |     description: "Perform drag and drop between two elements using ref.",
125 |     inputSchema: dragInputSchema,
126 |   },
127 |   handle: async (context: Context, params: DragInput): Promise<ToolResult> => {
128 |     // Get locators directly from snapshot
129 |     const snapshot = context.snapshotOrDie();
130 |     const startLocator = snapshot.refLocator(params.startRef);
131 |     const endLocator = snapshot.refLocator(params.endRef);
132 | 
133 |     const code = [
134 |       `// Drag ${params.startElement} to ${params.endElement}`,
135 |       // Use generateLocator for code string
136 |       `// await page.${await generateLocator(
137 |         startLocator
138 |       )}.dragTo(page.${await generateLocator(endLocator)});`,
139 |     ];
140 | 
141 |     const action = async (): Promise<ToolActionResult> => {
142 |       try {
143 |         // Use locators directly for the action
144 |         await startLocator.dragTo(endLocator, { timeout: 5000 });
145 |       } catch (dragError) {
146 |         const errorMsg =
147 |           dragError instanceof Error ? dragError.message : String(dragError);
148 |         throw new Error(
149 |           `Failed to drag '${params.startElement}' to '${params.endElement}'. Error: ${errorMsg}`
150 |         );
151 |       }
152 |       return {
153 |         content: [
154 |           {
155 |             type: "text",
156 |             text: `Dragged ${params.startElement} to ${params.endElement}`,
157 |           },
158 |         ],
159 |       };
160 |     };
161 | 
162 |     return { action, code, captureSnapshot: true, waitForNetwork: true };
163 |   },
164 | });
165 | 
166 | // --- Tool: Hover (Adapted Handle, Example Action) ---
167 | const hover = defineTool<typeof elementSchema>({
168 |   capability: "core",
169 |   schema: {
170 |     name: "browserbase_hover",
171 |     description: "Hover over element on page using ref.",
172 |     inputSchema: elementSchema,
173 |   },
174 |   handle: async (
175 |     context: Context,
176 |     params: ElementInput
177 |   ): Promise<ToolResult> => {
178 |     // Get locator directly from snapshot
179 |     const snapshot = context.snapshotOrDie();
180 |     const locator = snapshot.refLocator(params.ref);
181 | 
182 |     const code = [
183 |       `// Hover over ${params.element}`,
184 |       // Use generateLocator for code string
185 |       `// await page.${await generateLocator(locator)}.hover();`,
186 |     ];
187 | 
188 |     const action = async (): Promise<ToolActionResult> => {
189 |       try {
190 |         // Use locator directly for the action
191 |         await locator.hover({ timeout: 5000 });
192 |       } catch (hoverError) {
193 |         const errorMsg =
194 |           hoverError instanceof Error ? hoverError.message : String(hoverError);
195 |         throw new Error(
196 |           `Failed to hover over element '${params.element}'. Error: ${errorMsg}`
197 |         );
198 |       }
199 |       return {
200 |         content: [{ type: "text", text: `Hovered over: ${params.element}` }],
201 |       };
202 |     };
203 | 
204 |     return { action, code, captureSnapshot: true, waitForNetwork: true };
205 |   },
206 | });
207 | 
208 | // --- Tool: Type (Adapted Handle, Example Action) ---
209 | const typeSchema = elementSchema.extend({
210 |   text: z.string().describe("Text to type into the element"),
211 |   submit: z
212 |     .boolean()
213 |     .optional()
214 |     .describe("Whether to submit entered text (press Enter after)"),
215 |   slowly: z
216 |     .boolean()
217 |     .optional()
218 |     .default(true)
219 |     .describe("Whether to type one character at a time."),
220 | });
221 | type TypeInput = z.infer<typeof typeSchema>;
222 | 
223 | const type = defineTool<typeof typeSchema>({
224 |   capability: "core",
225 |   schema: {
226 |     name: "browserbase_type",
227 |     description: "Type text into editable element using ref.",
228 |     inputSchema: typeSchema,
229 |   },
230 |   handle: async (context: Context, params: TypeInput): Promise<ToolResult> => {
231 |     // Get locator directly from snapshot
232 |     const snapshot = context.snapshotOrDie();
233 |     const locator = snapshot.refLocator(params.ref);
234 | 
235 |     const code: string[] = [];
236 |     const steps: (() => Promise<void>)[] = [];
237 | 
238 |     if (params.slowly) {
239 |       code.push(
240 |         `// Press "${params.text}" sequentially into "${params.element}"`
241 |       );
242 |       code.push(
243 |         `// await page.${await generateLocator(
244 |           locator
245 |         )}.pressSequentially('${params.text.replace(/'/g, "\\'")}');`
246 |       );
247 |       steps.push(() =>
248 |         locator.pressSequentially(params.text, { delay: 50 }) 
249 |       );
250 |     } else {
251 |       code.push(`// Fill "${params.text}" into "${params.element}"`);
252 |       code.push(
253 |         `// await page.${await generateLocator(
254 |           locator
255 |         )}.fill('${params.text.replace(/'/g, "\\'")}');`
256 |       );
257 |       steps.push(async () => {
258 |         await locator.waitFor({ state: "visible"});
259 |         if (!(await locator.isEditable())) {
260 |           throw new Error(
261 |             `Element '${params.element}' was visible but not editable.`
262 |           );
263 |         }
264 |         await locator.fill("", { force: true, timeout: 5000 }); // Force empty fill first
265 |         await locator.fill(params.text, { force: true, timeout: 5000 }); // Force fill with text
266 |       });
267 |     }
268 | 
269 |     if (params.submit) {
270 |       code.push(`// Submit text`);
271 |       code.push(
272 |         `// await page.${await generateLocator(locator)}.press('Enter');`
273 |       );
274 |       steps.push(() => locator.press("Enter", { timeout: 5000 }));
275 |     }
276 | 
277 |     const action = async (): Promise<ToolActionResult> => {
278 |       try {
279 |         // Execute the steps sequentially
280 |         await steps.reduce((acc, step) => acc.then(step), Promise.resolve());
281 |       } catch (typeError) {
282 |         const errorMsg =
283 |           typeError instanceof Error ? typeError.message : String(typeError);
284 |         throw new Error(
285 |           `Failed to type into or submit element '${params.element}'. Error: ${errorMsg}`
286 |         );
287 |       }
288 |       return {
289 |         content: [
290 |           {
291 |             type: "text",
292 |             text: `Typed "${params.text}" into: ${params.element}${
293 |               params.submit ? " and submitted" : ""
294 |             }`,
295 |           },
296 |         ],
297 |       };
298 |     };
299 | 
300 |     return { action, code, captureSnapshot: true, waitForNetwork: true };
301 |   },
302 | });
303 | 
304 | // --- Tool: Select Option (Adapted Handle, Example Action) ---
305 | const selectOptionSchema = elementSchema.extend({
306 |   values: z
307 |     .array(z.string())
308 |     .describe("Array of values to select in the dropdown."),
309 | });
310 | type SelectOptionInput = z.infer<typeof selectOptionSchema>;
311 | 
312 | const selectOption = defineTool<typeof selectOptionSchema>({
313 |   capability: "core",
314 |   schema: {
315 |     name: "browserbase_select_option",
316 |     description: "Select an option in a dropdown using ref.",
317 |     inputSchema: selectOptionSchema,
318 |   },
319 |   handle: async (
320 |     context: Context,
321 |     params: SelectOptionInput
322 |   ): Promise<ToolResult> => {
323 |     // Get locator directly from snapshot
324 |     const snapshot = context.snapshotOrDie();
325 |     const locator = snapshot.refLocator(params.ref);
326 | 
327 |     const code = [
328 |       `// Select options [${params.values.join(", ")}] in ${params.element}`,
329 |       // Remove javascript.formatObject, use simple JSON.stringify for code comment
330 |       `// await page.${await generateLocator(
331 |         locator
332 |       )}.selectOption(${JSON.stringify(params.values)});`,
333 |     ];
334 | 
335 |     const action = async (): Promise<ToolActionResult> => {
336 |       try {
337 |         // Use locator directly for the action
338 |         await locator.waitFor({ state: "visible", timeout: 5000 });
339 |         await locator.selectOption(params.values, { timeout: 5000 });
340 |       } catch (selectError) {
341 |         const errorMsg =
342 |           selectError instanceof Error
343 |             ? selectError.message
344 |             : String(selectError);
345 |         throw new Error(
346 |           `Failed to select option(s) in element '${params.element}'. Error: ${errorMsg}`
347 |         );
348 |       }
349 |       return {
350 |         content: [
351 |           { type: "text", text: `Selected options in: ${params.element}` },
352 |         ],
353 |       };
354 |     };
355 | 
356 |     return { action, code, captureSnapshot: true, waitForNetwork: true };
357 |   },
358 | });
359 | 
360 | // --- Tool: Screenshot (Adapted Handle, Example Action) ---
361 | const screenshotSchema = z.object({
362 |   raw: z
363 |     .boolean()
364 |     .optional()
365 |     .describe(
366 |       "Whether to return without compression (PNG). Default is false (JPEG)."
367 |     ),
368 |   element: z
369 |     .string()
370 |     .optional()
371 |     .describe("Human-readable element description."),
372 |   ref: z
373 |     .string()
374 |     .optional()
375 |     .describe("Exact target element reference from the page snapshot.")
376 | });
377 | 
378 | type ScreenshotInput = z.infer<typeof screenshotSchema>;
379 | 
380 | const screenshot = defineTool<typeof screenshotSchema>({
381 |   capability: "core",
382 |   schema: {
383 |     name: "browserbase_take_screenshot",
384 |     description: `Take a screenshot of the current page or element using ref.`,
385 |     inputSchema: screenshotSchema,
386 |   },
387 |   handle: async (
388 |     context: Context,
389 |     params: ScreenshotInput
390 |   ): Promise<ToolResult> => {
391 |     if (!!params.element !== !!params.ref) {
392 |       throw new Error("Both element and ref must be provided or neither.");
393 |     }
394 | 
395 |     const page = await context.getActivePage();
396 |     if (!page) {
397 |       throw new Error("No active page found for screenshot");
398 |     }
399 |     // Conditionally get snapshot only if ref is provided
400 |     let pageSnapshot: PageSnapshot | null = null;
401 |     if (params.ref) {
402 |       pageSnapshot = context.snapshotOrDie();
403 |     }
404 |     const fileType = params.raw ? "png" : "jpeg";
405 |     const fileName = await outputFile(
406 |       context.config,
407 |       `screenshot-${Date.now()}.${fileType}`
408 |     );
409 | 
410 |     const baseOptions: PageScreenshotOptions = {
411 |       scale: "css",
412 |       timeout: 15000, // Kept existing timeout
413 |     };
414 | 
415 |     let options: PageScreenshotOptions;
416 | 
417 |     if (fileType === "jpeg") {
418 |       options = {
419 |         ...baseOptions,
420 |         type: "jpeg",
421 |         quality: 50, // Quality is only for jpeg
422 |         path: fileName,
423 |       };
424 |     } else {
425 |       options = {
426 |         ...baseOptions,
427 |         type: "png",
428 |         path: fileName,
429 |       };
430 |     }
431 | 
432 |     const isElementScreenshot = params.element && params.ref;
433 |     const code: string[] = [];
434 |     code.push(
435 |       `// Screenshot ${
436 |         isElementScreenshot ? params.element : "viewport"
437 |       } and save it as ${fileName}`
438 |     );
439 | 
440 |     // Conditionally get locator only if ref and snapshot are available
441 |     const locator =
442 |       params.ref && pageSnapshot ? pageSnapshot.refLocator(params.ref) : null;
443 | 
444 |     // Use JSON.stringify for code generation as javascript.formatObject is not available
445 |     const optionsForCode = { ...options };
446 |     // delete optionsForCode.path; // Path is an internal detail for saving, not usually part of the "command" log
447 | 
448 |     if (locator) {
449 |       code.push(
450 |         `// await page.${await generateLocator(
451 |           locator
452 |         )}.screenshot(${JSON.stringify(optionsForCode)});`
453 |       );
454 |     } else {
455 |       code.push(`// await page.screenshot(${JSON.stringify(optionsForCode)});`);
456 |     }
457 | 
458 |     const action = async (): Promise<ToolActionResult> => {
459 |       // Access config via context.config
460 |       const includeBase64 =
461 |         !context.config.tools?.browserbase_take_screenshot?.omitBase64;
462 | 
463 |       // Use the page directly for full page screenshots if locator is null
464 |       const screenshotBuffer = locator
465 |         ? await locator.screenshot(options)
466 |         : await page.screenshot(options);
467 | 
468 |       if (includeBase64) {
469 |         const rawBase64 = screenshotBuffer.toString("base64");
470 |         return {
471 |           content: [
472 |             {
473 |               type: "image",
474 |               format: fileType, // format might be redundant if mimeType is present, but kept for now
475 |               mimeType: fileType === "png" ? `image/png` : `image/jpeg`,
476 |               data: rawBase64,
477 |             },
478 |           ],
479 |         };
480 |       } else {
481 |         // If base64 is not included, return an empty content array
482 |         return { content: [] };
483 |       }
484 |     };
485 | 
486 |     return {
487 |       code,
488 |       action,
489 |       captureSnapshot: true, 
490 |       waitForNetwork: false, 
491 |     };
492 |   },
493 | });
494 | 
495 | export async function generateLocator(locator: Locator): Promise<string> {
496 |   return (locator as any)._generateLocatorString();
497 | }
498 | 
499 | export default [snapshot, click, drag, hover, type, selectOption, screenshot];


--------------------------------------------------------------------------------
/browserbase/src/tools/tool.ts:
--------------------------------------------------------------------------------
 1 | import type { ImageContent, TextContent } from '@modelcontextprotocol/sdk/types.js';
 2 | import type { z } from 'zod';
 3 | import type { Context } from '../context.js';
 4 | import type * as playwright from 'playwright';
 5 | import type { ToolCapability } from '../config.js'; 
 6 | import type { BrowserSession } from '../sessionManager.js'; 
 7 | import type { Server } from '@modelcontextprotocol/sdk/server/index.js'; 
 8 | import type { Config } from '../config.js'; 
 9 | 
10 | export type ToolSchema<Input extends InputType> = {
11 |   name: string;
12 |   description: string;
13 |   inputSchema: Input;
14 | };
15 | 
16 | // Export InputType
17 | export type InputType = z.Schema;
18 | 
19 | export type FileUploadModalState = {
20 |   type: 'fileChooser';
21 |   description: string;
22 |   fileChooser: playwright.FileChooser;
23 | };
24 | 
25 | export type DialogModalState = {
26 |   type: 'dialog';
27 |   description: string;
28 |   dialog: playwright.Dialog;
29 | };
30 | 
31 | export type ModalState = FileUploadModalState | DialogModalState;
32 | 
33 | export type ToolActionResult = { content?: (ImageContent | TextContent)[] } | undefined | void;
34 | 
35 | export type ToolResult = {
36 |   code: string[];
37 |   action?: () => Promise<ToolActionResult>;
38 |   captureSnapshot: boolean;
39 |   waitForNetwork: boolean;
40 |   resultOverride?: ToolActionResult;
41 | };
42 | 
43 | export type Tool<Input extends InputType = InputType> = {
44 |     capability: ToolCapability;
45 |     schema: ToolSchema<Input>;
46 |     clearsModalState?: ModalState['type'];
47 |     handle: (context: Context, params: z.output<Input>) => Promise<ToolResult>;
48 |   };
49 |   
50 |   export type ToolFactory = (snapshot: boolean) => Tool<any>;
51 |   
52 |   export function defineTool<Input extends InputType>(tool: Tool<Input>): Tool<Input> {
53 |     return tool;
54 |   }
55 |   
56 | export {}; // Ensure this is treated as a module 
57 | 
58 | // Represents the execution context for a tool
59 | // Might include the page, server instance for notifications, etc.
60 | export interface ToolContext {
61 |     page: BrowserSession['page'];
62 |     browser: BrowserSession['browser'];
63 |     server: Server;
64 |     sessionId: string;
65 |     config: Config;
66 |     context: Context; // The main context instance
67 | } 


--------------------------------------------------------------------------------
/browserbase/src/tools/toolUtils.ts:
--------------------------------------------------------------------------------
 1 | import { CallToolResult, TextContent } from "@modelcontextprotocol/sdk/types.js";
 2 | 
 3 | /**
 4 |  * Creates a standardized error result for tool calls.
 5 |  * @param message The error message text.
 6 |  * @param toolName Optional tool name for logging/context.
 7 |  * @returns CallToolResult object indicating an error.
 8 |  */
 9 | export function createErrorResult(message: string, toolName?: string): CallToolResult {
10 |     const prefix = toolName ? `[${toolName}] Error: ` : "Error: ";
11 |     // console.error(prefix + message);
12 |     return {
13 |         content: [{ type: "text", text: prefix + message } as TextContent],
14 |         isError: true,
15 |     };
16 | }
17 | 
18 | /**
19 |  * Creates a standardized success result with text content.
20 |  * @param message The success message text.
21 |  * @param toolName Optional tool name for logging/context.
22 |  * @returns CallToolResult object indicating success.
23 |  */
24 | export function createSuccessResult(message: string, toolName?: string): CallToolResult {
25 |     const prefix = toolName ? `[${toolName}] Success: ` : "Success: ";
26 |     // console.log(prefix + message); // Log success
27 |     return {
28 |         content: [{ type: "text", text: message } as TextContent],
29 |         isError: false,
30 |     };
31 | }


--------------------------------------------------------------------------------
/browserbase/src/tools/utils.ts:
--------------------------------------------------------------------------------
 1 | import type * as playwright from 'playwright';
 2 | import type { Context } from '../context.js';
 3 | 
 4 | export async function waitForCompletion<R>(context: Context, page: playwright.Page, callback: () => Promise<R>): Promise<R> {
 5 |   const requests = new Set<playwright.Request>();
 6 |   let frameNavigated = false;
 7 |   let waitCallback: () => void = () => {};
 8 |   const waitBarrier = new Promise<void>(f => { waitCallback = f; });
 9 | 
10 |   const requestListener = (request: playwright.Request) => requests.add(request);
11 |   const requestFinishedListener = (request: playwright.Request) => {
12 |     requests.delete(request);
13 |     if (!requests.size)
14 |       waitCallback();
15 |   };
16 | 
17 |   const frameNavigateListener = (frame: playwright.Frame) => {
18 |     if (frame.parentFrame())
19 |       return;
20 |     frameNavigated = true;
21 |     dispose();
22 |     clearTimeout(timeout);
23 |     void frame.waitForLoadState('load').then(() => {
24 |       waitCallback();
25 |     });
26 |   };
27 | 
28 |   const onTimeout = () => {
29 |     dispose();
30 |     waitCallback();
31 |   };
32 | 
33 |   page.on('request', requestListener);
34 |   page.on('requestfinished', requestFinishedListener);
35 |   page.on('framenavigated', frameNavigateListener);
36 |   const timeout = setTimeout(onTimeout, 10000);
37 | 
38 |   const dispose = () => {
39 |     page.off('request', requestListener);
40 |     page.off('requestfinished', requestFinishedListener);
41 |     page.off('framenavigated', frameNavigateListener);
42 |     clearTimeout(timeout);
43 |   };
44 | 
45 |   try {
46 |     const result = await callback();
47 |     if (!requests.size && !frameNavigated)
48 |       waitCallback();
49 |     await waitBarrier;
50 |     await context.waitForTimeout(1000);
51 |     return result;
52 |   } finally {
53 |     dispose();
54 |   }
55 | }
56 | 
57 | export function sanitizeForFilePath(s: string) {
58 |   return s.replace(/[^a-zA-Z0-9_.-]/g, '_'); // More robust sanitization
59 | } 


--------------------------------------------------------------------------------
/browserbase/src/transport.ts:
--------------------------------------------------------------------------------
  1 | import http from 'node:http';
  2 | import assert from 'node:assert';
  3 | import crypto from 'node:crypto';
  4 | 
  5 | import { ServerList } from './server.js';
  6 | import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
  7 | import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js';
  8 | import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
  9 | 
 10 | export async function startStdioTransport(serverList: ServerList) {
 11 |   const server = await serverList.create();
 12 |   await server.connect(new StdioServerTransport());
 13 | }
 14 | 
 15 | async function handleSSE(req: http.IncomingMessage, res: http.ServerResponse, url: URL, serverList: ServerList, sessions: Map<string, SSEServerTransport>) {
 16 |   if (req.method === 'POST') {
 17 |     const sessionId = url.searchParams.get('sessionId');
 18 |     if (!sessionId) {
 19 |       res.statusCode = 400;
 20 |       return res.end('Missing sessionId');
 21 |     }
 22 | 
 23 |     const transport = sessions.get(sessionId);
 24 |     if (!transport) {
 25 |       res.statusCode = 404;
 26 |       return res.end('Session not found');
 27 |     }
 28 | 
 29 |     return await transport.handlePostMessage(req, res);
 30 |   } else if (req.method === 'GET') {
 31 |     const transport = new SSEServerTransport('/sse', res);
 32 |     sessions.set(transport.sessionId, transport);
 33 |     const server = await serverList.create();
 34 |     res.on('close', () => {
 35 |       sessions.delete(transport.sessionId);
 36 |       serverList.close(server).catch(e => {
 37 |         // eslint-disable-next-line no-console
 38 |         // console.error(e);
 39 |       });
 40 |     });
 41 |     return await server.connect(transport);
 42 |   }
 43 | 
 44 |   res.statusCode = 405;
 45 |   res.end('Method not allowed');
 46 | }
 47 | 
 48 | async function handleStreamable(req: http.IncomingMessage, res: http.ServerResponse, serverList: ServerList, sessions: Map<string, StreamableHTTPServerTransport>) {
 49 |   const sessionId = req.headers['mcp-session-id'] as string | undefined;
 50 |   if (sessionId) {
 51 |     const transport = sessions.get(sessionId);
 52 |     if (!transport) {
 53 |       res.statusCode = 404;
 54 |       res.end('Session not found');
 55 |       return;
 56 |     }
 57 |     return await transport.handleRequest(req, res);
 58 |   }
 59 | 
 60 |   if (req.method === 'POST') {
 61 |     const transport = new StreamableHTTPServerTransport({
 62 |       sessionIdGenerator: () => crypto.randomUUID(),
 63 |       onsessioninitialized: sessionId => {
 64 |         sessions.set(sessionId, transport);
 65 |       }
 66 |     });
 67 |     transport.onclose = () => {
 68 |       if (transport.sessionId)
 69 |         sessions.delete(transport.sessionId);
 70 |     };
 71 |     const server = await serverList.create();
 72 |     await server.connect(transport);
 73 |     return await transport.handleRequest(req, res);
 74 |   }
 75 | 
 76 |   res.statusCode = 400;
 77 |   res.end('Invalid request');
 78 | }
 79 | 
 80 | export function startHttpTransport(port: number, hostname: string | undefined, serverList: ServerList) {
 81 |   const sseSessions = new Map<string, SSEServerTransport>();
 82 |   const streamableSessions = new Map<string, StreamableHTTPServerTransport>();
 83 |   const httpServer = http.createServer(async (req, res) => {
 84 |     const url = new URL(`http://localhost${req.url}`);
 85 |     if (url.pathname.startsWith('/mcp'))
 86 |       await handleStreamable(req, res, serverList, streamableSessions);
 87 |     else
 88 |       await handleSSE(req, res, url, serverList, sseSessions);
 89 |   });
 90 |   httpServer.listen(port, hostname, () => {
 91 |     const address = httpServer.address();
 92 |     assert(address, 'Could not bind server socket');
 93 |     let url: string;
 94 |     if (typeof address === 'string') {
 95 |       url = address;
 96 |     } else {
 97 |       const resolvedPort = address.port;
 98 |       let resolvedHost = address.family === 'IPv4' ? address.address : `[${address.address}]`;
 99 |       if (resolvedHost === '0.0.0.0' || resolvedHost === '[::]')
100 |         resolvedHost = 'localhost';
101 |       url = `http://${resolvedHost}:${resolvedPort}`;
102 |     }
103 |     const message = [
104 |       `Listening on ${url}`,
105 |       'Put this in your client config:',
106 |       JSON.stringify({
107 |         'mcpServers': {
108 |           'browserbase': {
109 |             'url': `${url}/sse`
110 |           }
111 |         }
112 |       }, undefined, 2),
113 |       'If your client supports streamable HTTP, you can use the /mcp endpoint instead.',
114 |     ].join('\n');
115 |     // eslint-disable-next-line no-console
116 |     console.log(message);
117 |   });
118 | }


--------------------------------------------------------------------------------
/browserbase/tests/.gitkeep:
--------------------------------------------------------------------------------
1 | # Placeholder for tests 


--------------------------------------------------------------------------------
/browserbase/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2022",
 4 |     "module": "NodeNext",
 5 |     "moduleResolution": "NodeNext",
 6 |     "strict": true,
 7 |     "esModuleInterop": true,
 8 |     "skipLibCheck": true,
 9 |     "forceConsistentCasingInFileNames": true,
10 |     "resolveJsonModule": true,
11 |     "outDir": "dist",
12 |     "rootDir": "src"
13 |   },
14 |   "include": ["src/**/*.ts"],
15 |   "exclude": ["node_modules"]
16 | }


--------------------------------------------------------------------------------
/browserbase/utils/.gitkeep:
--------------------------------------------------------------------------------
1 | # Placeholder for utility scripts 


--------------------------------------------------------------------------------
/stagehand/README.md:
--------------------------------------------------------------------------------
  1 | # Stagehand MCP Server
  2 | 
  3 | ![cover](../assets/stagehand-mcp.png)
  4 | 
  5 | A Model Context Protocol (MCP) server that provides AI-powered web automation capabilities using [Stagehand](https://github.com/browserbase/stagehand). This server enables LLMs to interact with web pages, perform actions, extract data, and observe possible actions in a real browser environment.
  6 | 
  7 | ## Get Started
  8 | 
  9 | 1. Run `npm install` to install the necessary dependencies, then run `npm run build` to get `dist/index.js`.
 10 | 
 11 | 2. Set up your Claude Desktop configuration to use the server.  
 12 | 
 13 | ```json
 14 | {
 15 |   "mcpServers": {
 16 |     "stagehand": {
 17 |       "command": "node",
 18 |       "args": ["path/to/mcp-server-browserbase/stagehand/dist/index.js"],
 19 |       "env": {
 20 |         "BROWSERBASE_API_KEY": "<YOUR_BROWSERBASE_API_KEY>",
 21 |         "BROWSERBASE_PROJECT_ID": "<YOUR_BROWSERBASE_PROJECT_ID>",
 22 |         "OPENAI_API_KEY": "<YOUR_OPENAI_API_KEY>",
 23 |         "CONTEXT_ID": "<YOUR_CONTEXT_ID>"
 24 |       }
 25 |     }
 26 |   }
 27 | }
 28 | ```
 29 | or, for running locally, first [**open Chrome in debug mode**](https://docs.stagehand.dev/examples/customize_browser#use-your-personal-browser) like so:
 30 | 
 31 | `open -a "Google Chrome" --args --remote-debugging-port=9222`
 32 | ```json
 33 | {
 34 |   "mcpServers": {
 35 |     "stagehand": {
 36 |       "command": "node",
 37 |       "args": ["path/to/mcp-server-browserbase/stagehand/dist/index.js"],
 38 |       "env": {
 39 |         "OPENAI_API_KEY": "<YOUR_OPENAI_API_KEY>",
 40 |         "LOCAL_CDP_URL": "http://localhost:9222"
 41 |       }
 42 |     }
 43 |   }
 44 | }
 45 | ```
 46 | > 💡 Check out our [documentation](https://docs.stagehand.dev/examples/customize_browser#use-your-personal-browser) for getting your local CDP url!
 47 | 
 48 | 3. Restart your Claude Desktop app and you should see the tools available clicking the 🔨 icon.
 49 | 
 50 | 4. Start using the tools! Below is a demo video of Claude doing a Google search for OpenAI using stagehand MCP server and Browserbase for a remote headless browser.
 51 | 
 52 | <div>
 53 |     <a href="https://www.loom.com/share/9fe52fd9ab24421191223645366ec1c5">
 54 |       <p>Stagehand MCP Server demo - Watch Video</p>
 55 |     </a>
 56 |     <a href="https://www.loom.com/share/9fe52fd9ab24421191223645366ec1c5">
 57 |       <img style="max-width:300px;" src="https://cdn.loom.com/sessions/thumbnails/9fe52fd9ab24421191223645366ec1c5-f1a228ffe52d8065-full-play.gif">
 58 |     </a>
 59 |   </div>
 60 | 
 61 | ## Tools
 62 | 
 63 | ### Stagehand commands
 64 | 
 65 | - **stagehand_navigate**
 66 |   - Navigate to any URL in the browser
 67 |   - Input:
 68 |     - `url` (string): The URL to navigate to
 69 | 
 70 | - **stagehand_act**
 71 |   - Perform an action on the web page
 72 |   - Inputs:
 73 |     - `action` (string): The action to perform (e.g., "click the login button")
 74 |     - `variables` (object, optional): Variables used in the action template
 75 | 
 76 | - **stagehand_extract**
 77 |   - Extract data from the web page 
 78 | 
 79 | - **stagehand_observe**
 80 |   - Observe actions that can be performed on the web page
 81 |   - Input:
 82 |     - `instruction` (string, optional): Instruction for observation
 83 | 
 84 | ### Resources
 85 | 
 86 | The server provides access to one resource:
 87 | 
 88 | 1. **Console Logs** (`console://logs`)
 89 | 
 90 |    - Browser console output in text format
 91 |    - Includes all console messages from the browser
 92 | 
 93 | 2. **Screenshots** (`screenshot://<n>`)
 94 |    - PNG images of captured screenshots
 95 |    - Accessible via the screenshot name specified during capture
 96 | 
 97 | ## File Structure
 98 | 
 99 | The codebase is organized into the following modules:
100 | 
101 | - **index.ts**: Entry point that initializes and runs the server.
102 | - **server.ts**: Core server logic, including server creation, configuration, and request handling.
103 | - **tools.ts**: Definitions and implementations of tools that can be called by MCP clients.
104 | - **prompts.ts**: Prompt templates that can be used by MCP clients.
105 | - **resources.ts**: Resource definitions and handlers for resource-related requests.
106 | - **logging.ts**: Comprehensive logging system with rotation and formatting capabilities.
107 | - **utils.ts**: Utility functions including JSON Schema to Zod schema conversion and message sanitization.
108 | 
109 | ## Module Descriptions
110 | 
111 | ### index.ts
112 | 
113 | The main entry point for the application. It:
114 | - Initializes the logging system
115 | - Creates the server instance
116 | - Connects to the stdio transport to receive and respond to requests
117 | 
118 | ### server.ts
119 | 
120 | Contains core server functionality:
121 | - Creates and configures the MCP server
122 | - Defines Stagehand configuration
123 | - Sets up request handlers for all MCP operations
124 | - Manages the Stagehand browser instance
125 | 
126 | ### tools.ts
127 | 
128 | Implements the tools that can be called by MCP clients:
129 | - `stagehand_navigate`: Navigate to URLs
130 | - `stagehand_act`: Perform actions on web elements
131 | - `stagehand_extract`: Extract structured data from web pages
132 | - `stagehand_observe`: Observe elements on the page
133 | - `screenshot`: Take screenshots of the current page
134 | 
135 | ### prompts.ts
136 | 
137 | Defines prompt templates for MCP clients:
138 | - `click_search_button`: Template for clicking search buttons
139 | 
140 | ### resources.ts
141 | 
142 | Manages resources in the MCP protocol:
143 | - Currently provides empty resource and resource template lists
144 | 
145 | ### logging.ts
146 | 
147 | Implements a comprehensive logging system:
148 | - File-based logging with rotation
149 | - In-memory operation logs
150 | - Log formatting and sanitization
151 | - Console logging for debugging
152 | 
153 | ### utils.ts
154 | 
155 | Provides utility functions:
156 | - `jsonSchemaToZod`: Converts JSON Schema to Zod schema for validation
157 | - `sanitizeMessage`: Ensures messages are properly formatted JSON
158 | 
159 | ## Key Features
160 | 
161 | - AI-powered web automation
162 | - Perform actions on web pages
163 | - Extract structured data from web pages
164 | - Observe possible actions on web pages
165 | - Simple and extensible API
166 | - Model-agnostic support for various LLM providers
167 | 
168 | ## Environment Variables
169 | 
170 | - `BROWSERBASE_API_KEY`: API key for BrowserBase authentication
171 | - `BROWSERBASE_PROJECT_ID`: Project ID for BrowserBase
172 | - `OPENAI_API_KEY`: API key for OpenAI (used by Stagehand)
173 | - `DEBUG`: Enable debug logging
174 | 
175 | ## MCP Capabilities
176 | 
177 | This server implements the following MCP capabilities:
178 | 
179 | - **Tools**: Allows clients to call tools that control a browser instance
180 | - **Prompts**: Provides prompt templates for common operations
181 | - **Resources**: (Currently empty but structured for future expansion)
182 | - **Logging**: Provides detailed logging capabilities
183 | 
184 | For more information about the Model Context Protocol, visit:
185 | - [MCP Documentation](https://modelcontextprotocol.io/docs)
186 | - [MCP Specification](https://spec.modelcontextprotocol.io/)
187 | 
188 | ## License
189 | 
190 | Licensed under the MIT License.
191 | 
192 | Copyright 2024 Browserbase, Inc.
193 | 


--------------------------------------------------------------------------------
/stagehand/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@browserbasehq/mcp-stagehand",
 3 |   "version": "0.5.1",
 4 |   "description": "MCP server for AI web browser automation using Stagehand",
 5 |   "license": "MIT",
 6 |   "author": "Browserbase, Inc. (https://www.browserbase.com/)",
 7 |   "homepage": "https://modelcontextprotocol.io",
 8 |   "bugs": "https://github.com/modelcontextprotocol/servers/issues",
 9 |   "type": "module",
10 |   "bin": {
11 |     "mcp-server-stagehand": "dist/index.js"
12 |   },
13 |   "files": [
14 |     "dist"
15 |   ],
16 |   "scripts": {
17 |     "build": "tsc && shx chmod +x dist/*.js",
18 |     "prepare": "npm run build",
19 |     "watch": "tsc --watch"
20 |   },
21 |   "dependencies": {
22 |     "@browserbasehq/sdk": "^2.0.0",
23 |     "@browserbasehq/stagehand": "^2.0.0",
24 |     "@modelcontextprotocol/sdk": "^1.0.3",
25 |     "@modelcontextprotocol/server-stagehand": "file:",
26 |     "@playwright/test": "^1.49.0"
27 |   },
28 |   "devDependencies": {
29 |     "shx": "^0.3.4",
30 |     "typescript": "^5.6.2"
31 |   }
32 | }
33 | 


--------------------------------------------------------------------------------
/stagehand/src/index.ts:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | 
 3 | import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
 4 | import { createServer } from "./server.js";
 5 | import { ensureLogDirectory, registerExitHandlers, scheduleLogRotation, setupLogRotation } from "./logging.js";
 6 | 
 7 | // Run setup for logging
 8 | ensureLogDirectory();
 9 | setupLogRotation();
10 | scheduleLogRotation();
11 | registerExitHandlers();
12 | 
13 | // Run the server
14 | async function runServer() {
15 |   const server = createServer();
16 |   const transport = new StdioServerTransport();
17 |   await server.connect(transport);
18 |   server.sendLoggingMessage({
19 |     level: "info",
20 |     data: "Stagehand MCP server is ready to accept requests",
21 |   });
22 | }
23 | 
24 | runServer().catch((error) => {
25 |   const errorMsg = error instanceof Error ? error.message : String(error);
26 |   console.error(errorMsg);
27 | });
28 | 


--------------------------------------------------------------------------------
/stagehand/src/logging.ts:
--------------------------------------------------------------------------------
  1 | import fs from 'fs';
  2 | import path from 'path';
  3 | import { fileURLToPath } from 'url';
  4 | import type { LogLine } from "@browserbasehq/stagehand";
  5 | import { Server } from "@modelcontextprotocol/sdk/server/index.js";
  6 | 
  7 | // Get the directory name for the current module
  8 | const __dirname = path.dirname(fileURLToPath(import.meta.url));
  9 | 
 10 | // Configure logging
 11 | const LOG_DIR = path.join(__dirname, '../logs');
 12 | const LOG_FILE = path.join(LOG_DIR, `stagehand-${new Date().toISOString().split('T')[0]}.log`);
 13 | const MAX_LOG_FILES = 10; // Maximum number of log files to keep
 14 | const MAX_LOG_SIZE = 10 * 1024 * 1024; // 10MB max log file size
 15 | 
 16 | // Queue for batching log writes
 17 | let logQueue: string[] = [];
 18 | let logWriteTimeout: NodeJS.Timeout | null = null;
 19 | const LOG_FLUSH_INTERVAL = 1000; // Flush logs every second
 20 | const MAX_OPERATION_LOGS = 1000; // Prevent operation logs from growing too large
 21 | 
 22 | // Operation logs stored in memory
 23 | export const operationLogs: string[] = [];
 24 | export const consoleLogs: string[] = [];
 25 | 
 26 | // Reference to server instance for logging
 27 | let serverInstance: Server | undefined;
 28 | 
 29 | // Set server for logging
 30 | export function setServerInstance(server: Server) {
 31 |   serverInstance = server;
 32 | }
 33 | 
 34 | // Get server instance for notifications and logging
 35 | export function getServerInstance() {
 36 |   return serverInstance;
 37 | }
 38 | 
 39 | // Ensure log directory exists
 40 | export function ensureLogDirectory() {
 41 |   if (!fs.existsSync(LOG_DIR)) {
 42 |     fs.mkdirSync(LOG_DIR, { recursive: true });
 43 |   }
 44 | }
 45 | 
 46 | // Setup log rotation management
 47 | export function setupLogRotation() {
 48 |   try {
 49 |     // Check if current log file exceeds max size
 50 |     if (fs.existsSync(LOG_FILE) && fs.statSync(LOG_FILE).size > MAX_LOG_SIZE) {
 51 |       const timestamp = new Date().toISOString().replace(/:/g, '-');
 52 |       const rotatedLogFile = path.join(LOG_DIR, `stagehand-${timestamp}.log`);
 53 |       fs.renameSync(LOG_FILE, rotatedLogFile);
 54 |     }
 55 |     
 56 |     // Clean up old log files if we have too many
 57 |     const logFiles = fs.readdirSync(LOG_DIR)
 58 |       .filter(file => file.startsWith('stagehand-') && file.endsWith('.log'))
 59 |       .map(file => path.join(LOG_DIR, file))
 60 |       .sort((a, b) => fs.statSync(b).mtime.getTime() - fs.statSync(a).mtime.getTime());
 61 |     
 62 |     if (logFiles.length > MAX_LOG_FILES) {
 63 |       logFiles.slice(MAX_LOG_FILES).forEach(file => {
 64 |         try {
 65 |           fs.unlinkSync(file);
 66 |         } catch (err) {
 67 |           console.error(`Failed to delete old log file ${file}:`, err);
 68 |         }
 69 |       });
 70 |     }
 71 |   } catch (err) {
 72 |     console.error('Error in log rotation:', err);
 73 |   }
 74 | }
 75 | 
 76 | // Flush logs to disk asynchronously
 77 | export async function flushLogs() {
 78 |   if (logQueue.length === 0) return;
 79 |   
 80 |   const logsToWrite = logQueue.join('\n') + '\n';
 81 |   logQueue = [];
 82 |   logWriteTimeout = null;
 83 |   
 84 |   try {
 85 |     await fs.promises.appendFile(LOG_FILE, logsToWrite);
 86 |     
 87 |     // Check if we need to rotate logs after write
 88 |     const stats = await fs.promises.stat(LOG_FILE);
 89 |     if (stats.size > MAX_LOG_SIZE) {
 90 |       setupLogRotation();
 91 |     }
 92 |   } catch (err) {
 93 |     console.error('Failed to write logs to file:', err);
 94 |     // If write fails, try to use sync version as fallback
 95 |     try {
 96 |       fs.appendFileSync(LOG_FILE, logsToWrite);
 97 |     } catch (syncErr) {
 98 |       console.error('Failed to write logs synchronously:', syncErr);
 99 |     }
100 |   }
101 | }
102 | 
103 | // Helper function to convert LogLine to string
104 | export function logLineToString(logLine: LogLine): string {
105 |   const timestamp = logLine.timestamp ? new Date(logLine.timestamp).toISOString() : new Date().toISOString();
106 |   const level = logLine.level !== undefined ? 
107 |     (logLine.level === 0 ? 'DEBUG' : 
108 |      logLine.level === 1 ? 'INFO' : 
109 |      logLine.level === 2 ? 'ERROR' : 'UNKNOWN') : 'UNKNOWN';
110 |   return `[${timestamp}] [${level}] ${logLine.message || ''}`;
111 | }
112 | 
113 | // Main logging function
114 | export function log(message: string, level: 'info' | 'error' | 'debug' = 'info') {
115 |   const timestamp = new Date().toISOString();
116 |   const logMessage = `[${timestamp}] [${level.toUpperCase()}] ${message}`;
117 |   
118 |   // Manage operation logs with size limit
119 |   operationLogs.push(logMessage);
120 |   if (operationLogs.length > MAX_OPERATION_LOGS) {
121 |     // Keep most recent logs but trim the middle to maintain context
122 |     const half = Math.floor(MAX_OPERATION_LOGS / 2);
123 |     // Keep first 100 and last (MAX_OPERATION_LOGS - 100) logs
124 |     const firstLogs = operationLogs.slice(0, 100);
125 |     const lastLogs = operationLogs.slice(operationLogs.length - (MAX_OPERATION_LOGS - 100));
126 |     operationLogs.length = 0;
127 |     operationLogs.push(...firstLogs);
128 |     operationLogs.push(`[...${operationLogs.length - MAX_OPERATION_LOGS} logs truncated...]`);
129 |     operationLogs.push(...lastLogs);
130 |   }
131 |   
132 |   // Queue log for async writing
133 |   logQueue.push(logMessage);
134 |   
135 |   // Setup timer to flush logs if not already scheduled
136 |   if (!logWriteTimeout) {
137 |     logWriteTimeout = setTimeout(flushLogs, LOG_FLUSH_INTERVAL);
138 |   }
139 |   
140 |   // Console output to stderr for debugging
141 |   if (process.env.DEBUG || level === 'error') {
142 |     console.error(logMessage);
143 |   }
144 |   
145 |   // Send logging message to client for important events
146 |   if (serverInstance && (level === 'info' || level === 'error')) {
147 |     serverInstance.sendLoggingMessage({
148 |       level: level,
149 |       data: message,
150 |     });
151 |   }
152 | }
153 | 
154 | // Format logs for response
155 | export function formatLogResponse(logs: string[]): string {
156 |   if (logs.length <= 100) {
157 |     return logs.join("\n");
158 |   }
159 |   
160 |   // For very long logs, include first and last parts with truncation notice
161 |   const first = logs.slice(0, 50);
162 |   const last = logs.slice(-50);
163 |   return [
164 |     ...first,
165 |     `\n... ${logs.length - 100} more log entries (truncated) ...\n`,
166 |     ...last
167 |   ].join("\n");
168 | }
169 | 
170 | // Log request
171 | export function logRequest(type: string, params: any) {
172 |   const requestLog = {
173 |     timestamp: new Date().toISOString(),
174 |     type,
175 |     params,
176 |   };
177 |   log(`REQUEST: ${JSON.stringify(requestLog, null, 2)}`, 'debug');
178 | }
179 | 
180 | // Log response
181 | export function logResponse(type: string, response: any) {
182 |   const responseLog = {
183 |     timestamp: new Date().toISOString(),
184 |     type,
185 |     response,
186 |   };
187 |   log(`RESPONSE: ${JSON.stringify(responseLog, null, 2)}`, 'debug');
188 | }
189 | 
190 | // Register handlers for process exit
191 | export function registerExitHandlers() {
192 |   // Make sure logs are flushed when the process exits
193 |   process.on('exit', () => {
194 |     if (logQueue.length > 0) {
195 |       try {
196 |         fs.appendFileSync(LOG_FILE, logQueue.join('\n') + '\n');
197 |       } catch (err) {
198 |         console.error('Failed to flush logs on exit:', err);
199 |       }
200 |     }
201 |   });
202 | 
203 |   process.on('SIGINT', () => {
204 |     // Flush logs and exit
205 |     if (logQueue.length > 0) {
206 |       try {
207 |         fs.appendFileSync(LOG_FILE, logQueue.join('\n') + '\n');
208 |       } catch (err) {
209 |         console.error('Failed to flush logs on SIGINT:', err);
210 |       }
211 |     }
212 |     process.exit(0);
213 |   });
214 | }
215 | 
216 | // Schedule periodic log rotation
217 | export function scheduleLogRotation() {
218 |   // Add log rotation check periodically
219 |   setInterval(() => {
220 |     setupLogRotation();
221 |   }, 15 * 60 * 1000); // Check every 15 minutes
222 | } 


--------------------------------------------------------------------------------
/stagehand/src/prompts.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Prompts module for the Stagehand MCP server
 3 |  * Contains prompts definitions and handlers for prompt-related requests
 4 |  */
 5 | 
 6 | // Define the prompts
 7 | export const PROMPTS = [
 8 |   {
 9 |     name: "click_search_button",
10 |     description: "A prompt template for clicking on a search button",
11 |     arguments: [] // No arguments required for this specific prompt
12 |   }
13 | ];
14 | 
15 | /**
16 |  * Get a prompt by name
17 |  * @param name The name of the prompt to retrieve
18 |  * @returns The prompt definition or throws an error if not found
19 |  */
20 | export function getPrompt(name: string) {
21 |   if (name === "click_search_button") {
22 |     return {
23 |       description: "This prompt provides instructions for clicking on a search button",
24 |       messages: [
25 |         {
26 |           role: "user",
27 |           content: {
28 |             type: "text",
29 |             text: "Please click on the search button"
30 |           }
31 |         }
32 |       ]
33 |     };
34 |   }
35 |   
36 |   throw new Error(`Invalid prompt name: ${name}`);
37 | } 


--------------------------------------------------------------------------------
/stagehand/src/resources.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Resources module for the Stagehand MCP server
 3 |  * Contains resources definitions and handlers for resource-related requests
 4 |  */
 5 | 
 6 | // Define the resources
 7 | export const RESOURCES = [];
 8 | 
 9 | // Define the resource templates
10 | export const RESOURCE_TEMPLATES = [];
11 | 
12 | // Store screenshots in a map
13 | export const screenshots = new Map<string, string>();
14 | 
15 | /**
16 |  * Handle listing resources request
17 |  * @returns A list of available resources including screenshots
18 |  */
19 | export function listResources() {
20 |   return { 
21 |     resources: [
22 |       ...Array.from(screenshots.keys()).map((name) => ({
23 |         uri: `screenshot://${name}`,
24 |         mimeType: "image/png",
25 |         name: `Screenshot: ${name}`,
26 |       })),
27 |     ]
28 |   };
29 | }
30 | 
31 | /**
32 |  * Handle listing resource templates request
33 |  * @returns An empty resource templates list response
34 |  */
35 | export function listResourceTemplates() {
36 |   return { resourceTemplates: [] };
37 | }
38 | 
39 | /**
40 |  * Read a resource by its URI
41 |  * @param uri The URI of the resource to read
42 |  * @returns The resource content or throws if not found
43 |  */
44 | export function readResource(uri: string) {
45 |   if (uri.startsWith("screenshot://")) {
46 |     const name = uri.split("://")[1];
47 |     const screenshot = screenshots.get(name);
48 |     if (screenshot) {
49 |       return {
50 |         contents: [
51 |           {
52 |             uri,
53 |             mimeType: "image/png",
54 |             blob: screenshot,
55 |           },
56 |         ],
57 |       };
58 |     }
59 |   }
60 | 
61 |   throw new Error(`Resource not found: ${uri}`);
62 | } 


--------------------------------------------------------------------------------
/stagehand/src/server.ts:
--------------------------------------------------------------------------------
  1 | import { Server } from "@modelcontextprotocol/sdk/server/index.js";
  2 | import {
  3 |   CallToolRequestSchema,
  4 |   ListToolsRequestSchema,
  5 |   ListResourcesRequestSchema,
  6 |   ListResourceTemplatesRequestSchema,
  7 |   ListPromptsRequestSchema,
  8 |   GetPromptRequestSchema,
  9 |   ReadResourceRequestSchema,
 10 | } from "@modelcontextprotocol/sdk/types.js";
 11 | import { Stagehand } from "@browserbasehq/stagehand";
 12 | import type { ConstructorParams } from "@browserbasehq/stagehand";
 13 | 
 14 | import { sanitizeMessage } from "./utils.js";
 15 | import {
 16 |   log,
 17 |   logRequest,
 18 |   logResponse,
 19 |   operationLogs,
 20 |   setServerInstance,
 21 | } from "./logging.js";
 22 | import { TOOLS, handleToolCall } from "./tools.js";
 23 | import { PROMPTS, getPrompt } from "./prompts.js";
 24 | import {
 25 |   listResources,
 26 |   listResourceTemplates,
 27 |   readResource,
 28 | } from "./resources.js";
 29 | 
 30 | // Define Stagehand configuration
 31 | export const stagehandConfig: ConstructorParams = {
 32 |   env:
 33 |     process.env.BROWSERBASE_API_KEY && process.env.BROWSERBASE_PROJECT_ID
 34 |       ? "BROWSERBASE"
 35 |       : "LOCAL",
 36 |   apiKey: process.env.BROWSERBASE_API_KEY /* API key for authentication */,
 37 |   projectId: process.env.BROWSERBASE_PROJECT_ID /* Project identifier */,
 38 |   logger: (message) =>
 39 |     console.error(
 40 |       logLineToString(message)
 41 |     ) /* Custom logging function to stderr */,
 42 |   domSettleTimeoutMs: 30_000 /* Timeout for DOM to settle in milliseconds */,
 43 |   browserbaseSessionCreateParams:
 44 |     process.env.BROWSERBASE_API_KEY && process.env.BROWSERBASE_PROJECT_ID
 45 |       ? {
 46 |           projectId: process.env.BROWSERBASE_PROJECT_ID!,
 47 |           browserSettings: process.env.CONTEXT_ID
 48 |             ? {
 49 |                 context: {
 50 |                   id: process.env.CONTEXT_ID,
 51 |                   persist: true,
 52 |                 },
 53 |               }
 54 |             : undefined,
 55 |         }
 56 |       : undefined,
 57 |   localBrowserLaunchOptions: process.env.LOCAL_CDP_URL
 58 |     ? {
 59 |         cdpUrl: process.env.LOCAL_CDP_URL,
 60 |       }
 61 |     : undefined,
 62 |   enableCaching: true /* Enable caching functionality */,
 63 |   browserbaseSessionID:
 64 |     undefined /* Session ID for resuming Browserbase sessions */,
 65 |   modelName: "gpt-4o" /* Name of the model to use */,
 66 |   modelClientOptions: {
 67 |     apiKey: process.env.OPENAI_API_KEY,
 68 |   } /* Configuration options for the model client */,
 69 |   useAPI: false,
 70 | };
 71 | 
 72 | // Global state
 73 | let stagehand: Stagehand | undefined;
 74 | 
 75 | // Ensure Stagehand is initialized
 76 | export async function ensureStagehand() {
 77 |   if (
 78 |     stagehandConfig.env === "LOCAL" &&
 79 |     !stagehandConfig.localBrowserLaunchOptions?.cdpUrl
 80 |   ) {
 81 |     throw new Error(
 82 |       'Using a local browser without providing a CDP URL is not supported. Please provide a CDP URL using the LOCAL_CDP_URL environment variable.\n\nTo launch your browser in "debug", see our documentation.\n\nhttps://docs.stagehand.dev/examples/customize_browser#use-your-personal-browser'
 83 |     );
 84 |   }
 85 | 
 86 |   try {
 87 |     if (!stagehand) {
 88 |       stagehand = new Stagehand(stagehandConfig);
 89 |       await stagehand.init();
 90 |       return stagehand;
 91 |     }
 92 | 
 93 |     // Try to perform a simple operation to check if the session is still valid
 94 |     try {
 95 |       await stagehand.page.evaluate(() => document.title);
 96 |       return stagehand;
 97 |     } catch (error) {
 98 |       // If we get an error indicating the session is invalid, reinitialize
 99 |       if (
100 |         error instanceof Error &&
101 |         (error.message.includes(
102 |           "Target page, context or browser has been closed"
103 |         ) ||
104 |           error.message.includes("Session expired") ||
105 |           error.message.includes("context destroyed"))
106 |       ) {
107 |         log("Browser session expired, reinitializing Stagehand...", "info");
108 |         stagehand = new Stagehand(stagehandConfig);
109 |         await stagehand.init();
110 |         return stagehand;
111 |       }
112 |       throw error; // Re-throw if it's a different type of error
113 |     }
114 |   } catch (error) {
115 |     const errorMsg = error instanceof Error ? error.message : String(error);
116 |     log(`Failed to initialize/reinitialize Stagehand: ${errorMsg}`, "error");
117 |     throw error;
118 |   }
119 | }
120 | 
121 | // Create the server
122 | export function createServer() {
123 |   const server = new Server(
124 |     {
125 |       name: "stagehand",
126 |       version: "0.1.0",
127 |     },
128 |     {
129 |       capabilities: {
130 |         resources: {},
131 |         tools: {},
132 |         logging: {},
133 |         prompts: {},
134 |       },
135 |     }
136 |   );
137 | 
138 |   // Store server instance for logging
139 |   setServerInstance(server);
140 | 
141 |   // Setup request handlers
142 |   server.setRequestHandler(ListToolsRequestSchema, async (request) => {
143 |     try {
144 |       logRequest("ListTools", request.params);
145 |       const response = { tools: TOOLS };
146 |       const sanitizedResponse = sanitizeMessage(response);
147 |       logResponse("ListTools", JSON.parse(sanitizedResponse));
148 |       return JSON.parse(sanitizedResponse);
149 |     } catch (error) {
150 |       const errorMsg = error instanceof Error ? error.message : String(error);
151 |       return {
152 |         error: {
153 |           code: -32603,
154 |           message: `Internal error: ${errorMsg}`,
155 |         },
156 |       };
157 |     }
158 |   });
159 | 
160 |   server.setRequestHandler(CallToolRequestSchema, async (request) => {
161 |     try {
162 |       logRequest("CallTool", request.params);
163 |       operationLogs.length = 0; // Clear logs for new operation
164 | 
165 |       if (
166 |         !request.params?.name ||
167 |         !TOOLS.find((t) => t.name === request.params.name)
168 |       ) {
169 |         throw new Error(`Invalid tool name: ${request.params?.name}`);
170 |       }
171 | 
172 |       // Ensure Stagehand is initialized
173 |       try {
174 |         stagehand = await ensureStagehand();
175 |       } catch (error) {
176 |         const errorMsg = error instanceof Error ? error.message : String(error);
177 |         return {
178 |           content: [
179 |             {
180 |               type: "text",
181 |               text: `Failed to initialize Stagehand: ${errorMsg}.\n\nConfig: ${JSON.stringify(
182 |                 stagehandConfig,
183 |                 null,
184 |                 2
185 |               )}`,
186 |             },
187 |             {
188 |               type: "text",
189 |               text: `Operation logs:\n${operationLogs.join("\n")}`,
190 |             },
191 |           ],
192 |           isError: true,
193 |         };
194 |       }
195 | 
196 |       const result = await handleToolCall(
197 |         request.params.name,
198 |         request.params.arguments ?? {},
199 |         stagehand
200 |       );
201 | 
202 |       const sanitizedResult = sanitizeMessage(result);
203 |       logResponse("CallTool", JSON.parse(sanitizedResult));
204 |       return JSON.parse(sanitizedResult);
205 |     } catch (error) {
206 |       const errorMsg = error instanceof Error ? error.message : String(error);
207 |       return {
208 |         error: {
209 |           code: -32603,
210 |           message: `Internal error: ${errorMsg}`,
211 |         },
212 |       };
213 |     }
214 |   });
215 | 
216 |   server.setRequestHandler(ListResourcesRequestSchema, async (request) => {
217 |     try {
218 |       logRequest("ListResources", request.params);
219 |       const response = listResources();
220 |       const sanitizedResponse = sanitizeMessage(response);
221 |       logResponse("ListResources", JSON.parse(sanitizedResponse));
222 |       return JSON.parse(sanitizedResponse);
223 |     } catch (error) {
224 |       const errorMsg = error instanceof Error ? error.message : String(error);
225 |       return {
226 |         error: {
227 |           code: -32603,
228 |           message: `Internal error: ${errorMsg}`,
229 |         },
230 |       };
231 |     }
232 |   });
233 | 
234 |   server.setRequestHandler(
235 |     ListResourceTemplatesRequestSchema,
236 |     async (request) => {
237 |       try {
238 |         logRequest("ListResourceTemplates", request.params);
239 |         const response = listResourceTemplates();
240 |         const sanitizedResponse = sanitizeMessage(response);
241 |         logResponse("ListResourceTemplates", JSON.parse(sanitizedResponse));
242 |         return JSON.parse(sanitizedResponse);
243 |       } catch (error) {
244 |         const errorMsg = error instanceof Error ? error.message : String(error);
245 |         return {
246 |           error: {
247 |             code: -32603,
248 |             message: `Internal error: ${errorMsg}`,
249 |           },
250 |         };
251 |       }
252 |     }
253 |   );
254 | 
255 |   server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
256 |     try {
257 |       logRequest("ReadResource", request.params);
258 |       const uri = request.params.uri.toString();
259 |       const response = readResource(uri);
260 |       const sanitizedResponse = sanitizeMessage(response);
261 |       logResponse("ReadResource", JSON.parse(sanitizedResponse));
262 |       return JSON.parse(sanitizedResponse);
263 |     } catch (error) {
264 |       const errorMsg = error instanceof Error ? error.message : String(error);
265 |       return {
266 |         error: {
267 |           code: -32603,
268 |           message: `Internal error: ${errorMsg}`,
269 |         },
270 |       };
271 |     }
272 |   });
273 | 
274 |   server.setRequestHandler(ListPromptsRequestSchema, async (request) => {
275 |     try {
276 |       logRequest("ListPrompts", request.params);
277 |       const response = { prompts: PROMPTS };
278 |       const sanitizedResponse = sanitizeMessage(response);
279 |       logResponse("ListPrompts", JSON.parse(sanitizedResponse));
280 |       return JSON.parse(sanitizedResponse);
281 |     } catch (error) {
282 |       const errorMsg = error instanceof Error ? error.message : String(error);
283 |       return {
284 |         error: {
285 |           code: -32603,
286 |           message: `Internal error: ${errorMsg}`,
287 |         },
288 |       };
289 |     }
290 |   });
291 | 
292 |   server.setRequestHandler(GetPromptRequestSchema, async (request) => {
293 |     try {
294 |       logRequest("GetPrompt", request.params);
295 | 
296 |       // Check if prompt name is valid and get the prompt
297 |       try {
298 |         const prompt = getPrompt(request.params?.name || "");
299 |         const sanitizedResponse = sanitizeMessage(prompt);
300 |         logResponse("GetPrompt", JSON.parse(sanitizedResponse));
301 |         return JSON.parse(sanitizedResponse);
302 |       } catch (error) {
303 |         throw new Error(`Invalid prompt name: ${request.params?.name}`);
304 |       }
305 |     } catch (error) {
306 |       const errorMsg = error instanceof Error ? error.message : String(error);
307 |       return {
308 |         error: {
309 |           code: -32603,
310 |           message: `Internal error: ${errorMsg}`,
311 |         },
312 |       };
313 |     }
314 |   });
315 | 
316 |   return server;
317 | }
318 | 
319 | // Import missing function from logging
320 | import { formatLogResponse, logLineToString } from "./logging.js";
321 | 


--------------------------------------------------------------------------------
/stagehand/src/tools.ts:
--------------------------------------------------------------------------------
  1 | import { Stagehand } from "@browserbasehq/stagehand";
  2 | import { CallToolResult, Tool } from "@modelcontextprotocol/sdk/types.js";
  3 | import { getServerInstance, operationLogs } from "./logging.js";
  4 | import { screenshots } from "./resources.js";
  5 | 
  6 | // Define the Stagehand tools
  7 | export const TOOLS: Tool[] = [
  8 |   {
  9 |     name: "stagehand_navigate",
 10 |     description:
 11 |       "Navigate to a URL in the browser. Only use this tool with URLs you're confident will work and stay up to date. Otheriwse use https://google.com as the starting point",
 12 |     inputSchema: {
 13 |       type: "object",
 14 |       properties: {
 15 |         url: { type: "string", description: "The URL to navigate to" },
 16 |       },
 17 |       required: ["url"],
 18 |     },
 19 |   },
 20 |   {
 21 |     name: "stagehand_act",
 22 |     description: `Performs an action on a web page element. Act actions should be as atomic and 
 23 |       specific as possible, i.e. "Click the sign in button" or "Type 'hello' into the search input". 
 24 |       AVOID actions that are more than one step, i.e. "Order me pizza" or "Send an email to Paul 
 25 |       asking him to call me". `,
 26 |     inputSchema: {
 27 |       type: "object",
 28 |       properties: {
 29 |         action: {
 30 |           type: "string",
 31 |           description: `The action to perform. Should be as atomic and specific as possible, 
 32 |           i.e. 'Click the sign in button' or 'Type 'hello' into the search input'. AVOID actions that are more than one 
 33 |           step, i.e. 'Order me pizza' or 'Send an email to Paul asking him to call me'. The instruction should be just as specific as possible, 
 34 |           and have a strong correlation to the text on the page. If unsure, use observe before using act."`,
 35 |         },
 36 |         variables: {
 37 |           type: "object",
 38 |           additionalProperties: true,
 39 |           description: `Variables used in the action template. ONLY use variables if you're dealing 
 40 |             with sensitive data or dynamic content. For example, if you're logging in to a website, 
 41 |             you can use a variable for the password. When using variables, you MUST have the variable
 42 |             key in the action template. For example: {"action": "Fill in the password", "variables": {"password": "123456"}}`,
 43 |         },
 44 |       },
 45 |       required: ["action"],
 46 |     },
 47 |   },
 48 |   {
 49 |     name: "stagehand_extract",
 50 |     description: `Extracts all of the text from the current page.`,
 51 |     inputSchema: {
 52 |       type: "object",
 53 |       properties: {},
 54 |     },
 55 |   },
 56 |   {
 57 |     name: "stagehand_observe",
 58 |     description:
 59 |       "Observes elements on the web page. Use this tool to observe elements that you can later use in an action. Use observe instead of extract when dealing with actionable (interactable) elements rather than text. More often than not, you'll want to use extract instead of observe when dealing with scraping or extracting structured text.",
 60 |     inputSchema: {
 61 |       type: "object",
 62 |       properties: {
 63 |         instruction: {
 64 |           type: "string",
 65 |           description:
 66 |             "Instruction for observation (e.g., 'find the login button'). This instruction must be extremely specific.",
 67 |         },
 68 |       },
 69 |       required: ["instruction"],
 70 |     },
 71 |   },
 72 |   {
 73 |     name: "screenshot",
 74 |     description:
 75 |       "Takes a screenshot of the current page. Use this tool to learn where you are on the page when controlling the browser with Stagehand. Only use this tool when the other tools are not sufficient to get the information you need.",
 76 |     inputSchema: {
 77 |       type: "object",
 78 |       properties: {},
 79 |     },
 80 |   },
 81 | ];
 82 | 
 83 | // Handle tool calls
 84 | export async function handleToolCall(
 85 |   name: string,
 86 |   args: any,
 87 |   stagehand: Stagehand
 88 | ): Promise<CallToolResult> {
 89 |   switch (name) {
 90 |     case "stagehand_navigate":
 91 |       try {
 92 |         await stagehand.page.goto(args.url);
 93 |         return {
 94 |           content: [
 95 |             {
 96 |               type: "text",
 97 |               text: `Navigated to: ${args.url}`,
 98 |             },
 99 |             {
100 |               type: "text",
101 |               text: `View the live session here: https://browserbase.com/sessions/${stagehand.browserbaseSessionID}`,
102 |             },
103 |           ],
104 |           isError: false,
105 |         };
106 |       } catch (error) {
107 |         const errorMsg = error instanceof Error ? error.message : String(error);
108 |         return {
109 |           content: [
110 |             {
111 |               type: "text",
112 |               text: `Failed to navigate: ${errorMsg}`,
113 |             },
114 |             {
115 |               type: "text",
116 |               text: `Operation logs:\n${operationLogs.join("\n")}`,
117 |             },
118 |           ],
119 |           isError: true,
120 |         };
121 |       }
122 | 
123 |     case "stagehand_act":
124 |       try {
125 |         await stagehand.page.act({
126 |           action: args.action,
127 |           variables: args.variables,
128 |         });
129 |         return {
130 |           content: [
131 |             {
132 |               type: "text",
133 |               text: `Action performed: ${args.action}`,
134 |             },
135 |           ],
136 |           isError: false,
137 |         };
138 |       } catch (error) {
139 |         const errorMsg = error instanceof Error ? error.message : String(error);
140 |         return {
141 |           content: [
142 |             {
143 |               type: "text",
144 |               text: `Failed to perform action: ${errorMsg}`,
145 |             },
146 |             {
147 |               type: "text",
148 |               text: `Operation logs:\n${operationLogs.join("\n")}`,
149 |             },
150 |           ],
151 |           isError: true,
152 |         };
153 |       }
154 | 
155 |     case "stagehand_extract": {
156 |       try {
157 |         const bodyText = await stagehand.page.evaluate(
158 |           () => document.body.innerText
159 |         );
160 |         const content = bodyText
161 |           .split("\n")
162 |           .map((line) => line.trim())
163 |           .filter((line) => {
164 |             if (!line) return false;
165 | 
166 |             if (
167 |               (line.includes("{") && line.includes("}")) ||
168 |               line.includes("@keyframes") || // Remove CSS animations
169 |               line.match(/^\.[a-zA-Z0-9_-]+\s*{/) || // Remove CSS lines starting with .className {
170 |               line.match(/^[a-zA-Z-]+:[a-zA-Z0-9%\s\(\)\.,-]+;$/) // Remove lines like "color: blue;" or "margin: 10px;"
171 |             ) {
172 |               return false;
173 |             }
174 |             return true;
175 |           })
176 |           .map((line) => {
177 |             return line.replace(/\\u([0-9a-fA-F]{4})/g, (_, hex) =>
178 |               String.fromCharCode(parseInt(hex, 16))
179 |             );
180 |           });
181 | 
182 |         return {
183 |           content: [
184 |             {
185 |               type: "text",
186 |               text: `Extracted content:\n${content.join("\n")}`,
187 |             },
188 |           ],
189 |           isError: false,
190 |         };
191 |       } catch (error) {
192 |         return {
193 |           content: [
194 |             {
195 |               type: "text",
196 |               text: `Failed to extract content: ${(error as Error).message}`,
197 |             },
198 |           ],
199 |           isError: true,
200 |         };
201 |       }
202 |     }
203 | 
204 |     case "stagehand_observe":
205 |       try {
206 |         const observations = await stagehand.page.observe({
207 |           instruction: args.instruction,
208 |           returnAction: false,
209 |         });
210 |         return {
211 |           content: [
212 |             {
213 |               type: "text",
214 |               text: `Observations: ${JSON.stringify(observations)}`,
215 |             },
216 |           ],
217 |           isError: false,
218 |         };
219 |       } catch (error) {
220 |         const errorMsg = error instanceof Error ? error.message : String(error);
221 |         return {
222 |           content: [
223 |             {
224 |               type: "text",
225 |               text: `Failed to observe: ${errorMsg}`,
226 |             },
227 |             {
228 |               type: "text",
229 |               text: `Operation logs:\n${operationLogs.join("\n")}`,
230 |             },
231 |           ],
232 |           isError: true,
233 |         };
234 |       }
235 | 
236 |     case "screenshot":
237 |       try {
238 |         const screenshotBuffer = await stagehand.page.screenshot({
239 |           fullPage: false,
240 |         });
241 | 
242 |         // Convert buffer to base64 string and store in memory
243 |         const screenshotBase64 = screenshotBuffer.toString("base64");
244 |         const name = `screenshot-${new Date()
245 |           .toISOString()
246 |           .replace(/:/g, "-")}`;
247 |         screenshots.set(name, screenshotBase64);
248 | 
249 |         // Notify the client that the resources changed
250 |         const serverInstance = getServerInstance();
251 |         if (serverInstance) {
252 |           serverInstance.notification({
253 |             method: "notifications/resources/list_changed",
254 |           });
255 |         }
256 | 
257 |         return {
258 |           content: [
259 |             {
260 |               type: "text",
261 |               text: `Screenshot taken with name: ${name}`,
262 |             },
263 |             {
264 |               type: "image",
265 |               data: screenshotBase64,
266 |               mimeType: "image/png",
267 |             },
268 |           ],
269 |           isError: false,
270 |         };
271 |       } catch (error) {
272 |         const errorMsg = error instanceof Error ? error.message : String(error);
273 |         return {
274 |           content: [
275 |             {
276 |               type: "text",
277 |               text: `Failed to take screenshot: ${errorMsg}`,
278 |             },
279 |             {
280 |               type: "text",
281 |               text: `Operation logs:\n${operationLogs.join("\n")}`,
282 |             },
283 |           ],
284 |           isError: true,
285 |         };
286 |       }
287 | 
288 |     default:
289 |       return {
290 |         content: [
291 |           {
292 |             type: "text",
293 |             text: `Unknown tool: ${name}`,
294 |           },
295 |           {
296 |             type: "text",
297 |             text: `Operation logs:\n${operationLogs.join("\n")}`,
298 |           },
299 |         ],
300 |         isError: true,
301 |       };
302 |   }
303 | }
304 | 


--------------------------------------------------------------------------------
/stagehand/src/utils.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Sanitizes a message to ensure it's properly formatted JSON
 3 |  * @param message The message to sanitize
 4 |  * @returns A sanitized JSON string
 5 |  */
 6 | export function sanitizeMessage(message: any): string {
 7 |   try {
 8 |     // Ensure the message is properly stringified JSON
 9 |     if (typeof message === 'string') {
10 |       JSON.parse(message); // Validate JSON structure
11 |       return message;
12 |     }
13 |     return JSON.stringify(message);
14 |   } catch (error) {
15 |     return JSON.stringify({
16 |       jsonrpc: '2.0',
17 |       error: {
18 |         code: -32700,
19 |         message: 'Parse error',
20 |       },
21 |       id: null,
22 |     });
23 |   }
24 | } 


--------------------------------------------------------------------------------
/stagehand/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2022",
 4 |     "module": "Node16",
 5 |     "moduleResolution": "Node16",
 6 |     "strict": true,
 7 |     "esModuleInterop": true,
 8 |     "skipLibCheck": true,
 9 |     "forceConsistentCasingInFileNames": true,
10 |     "resolveJsonModule": true,
11 |     "outDir": "dist",
12 |     "rootDir": "src"
13 |   },
14 |   "include": ["src/**/*.ts"],
15 |   "exclude": ["node_modules"]
16 | }
17 | 


--------------------------------------------------------------------------------