├── .changeset
    ├── config.json
    ├── cyan-symbols-double.md
    ├── empty-bugs-occur.md
    ├── fifty-cats-sell.md
    ├── floppy-experts-wash.md
    ├── green-signs-live.md
    ├── short-banks-sit.md
    ├── solid-rice-admire.md
    ├── sweet-glasses-hope.md
    ├── vast-vans-crash.md
    └── whole-yaks-cheat.md
├── .cursorrules
├── .env.example
├── .github
    ├── pull_request_template
    └── workflows
    │   ├── ci.yml
    │   └── release.yml
├── .gitignore
├── .prettierignore
├── .prettierrc
├── .vscode
    └── settings.json
├── CHANGELOG.md
├── LICENSE
├── README.md
├── docs
    ├── logging.md
    ├── media
    │   ├── chunks.png
    │   └── stagehand-playwright.png
    └── release.md
├── eslint.config.mjs
├── evals
    ├── args.ts
    ├── assets
    │   ├── cart.html
    │   └── peeler.html
    ├── deterministic
    │   ├── auxiliary
    │   │   └── logo.png
    │   ├── bb.playwright.config.ts
    │   ├── e2e.playwright.config.ts
    │   ├── local.playwright.config.ts
    │   ├── stagehand.config.ts
    │   └── tests
    │   │   ├── BrowserContext
    │   │       ├── addInitScript.test.ts
    │   │       ├── cookies.test.ts
    │   │       ├── multiPage.test.ts
    │   │       ├── page.test.ts
    │   │       └── routing.test.ts
    │   │   ├── Errors
    │   │       └── apiKeyError.test.ts
    │   │   ├── browserbase
    │   │       ├── contexts.test.ts
    │   │       ├── downloads.test.ts
    │   │       ├── sessions.test.ts
    │   │       └── uploads.test.ts
    │   │   ├── local
    │   │       └── create.test.ts
    │   │   └── page
    │   │       ├── addInitScript.test.ts
    │   │       ├── addRemoveLocatorHandler.test.ts
    │   │       ├── addTags.test.ts
    │   │       ├── bringToFront.test.ts
    │   │       ├── content.test.ts
    │   │       ├── evaluate.test.ts
    │   │       ├── expose.test.ts
    │   │       ├── frames.test.ts
    │   │       ├── getBy.test.ts
    │   │       ├── navigation.test.ts
    │   │       ├── on.test.ts
    │   │       ├── pageContext.test.ts
    │   │       ├── reload.test.ts
    │   │       └── waitFor.test.ts
    ├── env.ts
    ├── evals.config.json
    ├── evaluator.ts
    ├── index.eval.ts
    ├── initStagehand.ts
    ├── llm_clients
    │   ├── hn_aisdk.ts
    │   ├── hn_customOpenAI.ts
    │   └── hn_langchain.ts
    ├── logger.ts
    ├── scoring.ts
    ├── taskConfig.ts
    ├── tasks
    │   ├── agent
    │   │   ├── google_flights.ts
    │   │   ├── iframe_form.ts
    │   │   ├── iframe_form_multiple.ts
    │   │   ├── sf_library_card.ts
    │   │   └── sf_library_card_multiple.ts
    │   ├── allrecipes.ts
    │   ├── amazon_add_to_cart.ts
    │   ├── apple.ts
    │   ├── arxiv.ts
    │   ├── bidnet.ts
    │   ├── checkboxes.ts
    │   ├── combination_sauce.ts
    │   ├── costar.ts
    │   ├── dropdown.ts
    │   ├── expect_act_timeout.ts
    │   ├── expedia.ts
    │   ├── expedia_search.ts
    │   ├── extract_aigrant_companies.ts
    │   ├── extract_aigrant_targeted.ts
    │   ├── extract_aigrant_targeted_2.ts
    │   ├── extract_apartments.ts
    │   ├── extract_area_codes.ts
    │   ├── extract_baptist_health.ts
    │   ├── extract_capacitor_info.ts
    │   ├── extract_collaborators.ts
    │   ├── extract_csa.ts
    │   ├── extract_geniusee.ts
    │   ├── extract_geniusee_2.ts
    │   ├── extract_github_commits.ts
    │   ├── extract_github_stars.ts
    │   ├── extract_hamilton_weather.ts
    │   ├── extract_jfk_links.ts
    │   ├── extract_jstor_news.ts
    │   ├── extract_memorial_healthcare.ts
    │   ├── extract_nhl_stats.ts
    │   ├── extract_partners.ts
    │   ├── extract_press_releases.ts
    │   ├── extract_professional_info.ts
    │   ├── extract_public_notices.ts
    │   ├── extract_recipe.ts
    │   ├── extract_regulations_table.ts
    │   ├── extract_repo_name.ts
    │   ├── extract_resistor_info.ts
    │   ├── extract_rockauto.ts
    │   ├── extract_single_link.ts
    │   ├── extract_snowshoeing_destinations.ts
    │   ├── extract_staff_members.ts
    │   ├── extract_zillow.ts
    │   ├── google_flights.ts
    │   ├── google_jobs.ts
    │   ├── history.ts
    │   ├── homedepot.ts
    │   ├── imdb_movie_details.ts
    │   ├── instructions.ts
    │   ├── ionwave.ts
    │   ├── ionwave_observe.ts
    │   ├── nextChunk.ts
    │   ├── nonsense_action.ts
    │   ├── observe_amazon_add_to_cart.ts
    │   ├── observe_github.ts
    │   ├── observe_iframes1.ts
    │   ├── observe_iframes2.ts
    │   ├── observe_simple_google_search.ts
    │   ├── observe_taxes.ts
    │   ├── observe_vantechjournal.ts
    │   ├── observe_yc_startup.ts
    │   ├── panamcs.ts
    │   ├── peeler_complex.ts
    │   ├── peeler_simple.ts
    │   ├── prevChunk.ts
    │   ├── radio_btn.ts
    │   ├── rakuten_jp.ts
    │   ├── sciquest.ts
    │   ├── scroll_50.ts
    │   ├── scroll_75.ts
    │   ├── simple_google_search.ts
    │   ├── stock_x.ts
    │   ├── ted_talk.ts
    │   ├── vanta_h.ts
    │   ├── vantechjournal.ts
    │   ├── wichita.ts
    │   └── wikipedia.ts
    └── utils.ts
├── examples
    ├── 2048.ts
    ├── actionable_observe_example.ts
    ├── ai_sdk_example.ts
    ├── cua-example.ts
    ├── debugUrl.ts
    ├── example.ts
    ├── external_client.ts
    ├── external_clients
    │   ├── aisdk.ts
    │   ├── customOpenAI.ts
    │   └── langchain.ts
    ├── form_filling_sensible.ts
    ├── form_filling_sensible_cerebras.ts
    ├── form_filling_sensible_groq.ts
    ├── google_enter.ts
    ├── instructions.ts
    ├── langchain.ts
    ├── operator-example.ts
    ├── parameterizeApiKey.ts
    ├── popup.ts
    └── try_wordle.ts
├── lib
    ├── StagehandContext.ts
    ├── StagehandPage.ts
    ├── a11y
    │   └── utils.ts
    ├── agent
    │   ├── AgentClient.ts
    │   ├── AgentProvider.ts
    │   ├── AnthropicCUAClient.ts
    │   ├── OpenAICUAClient.ts
    │   └── StagehandAgent.ts
    ├── api.ts
    ├── cache.ts
    ├── cache
    │   ├── ActionCache.ts
    │   ├── BaseCache.ts
    │   └── LLMCache.ts
    ├── dom
    │   ├── DomChunk.ts
    │   ├── ElementContainer.ts
    │   ├── GlobalPageContainer.ts
    │   ├── StagehandContainer.ts
    │   ├── candidateCollector.ts
    │   ├── containerFactory.ts
    │   ├── elementCheckUtils.ts
    │   ├── genDomScripts.ts
    │   ├── global.d.ts
    │   ├── index.ts
    │   ├── process.ts
    │   ├── utils.ts
    │   └── xpathUtils.ts
    ├── handlers
    │   ├── actHandler.ts
    │   ├── agentHandler.ts
    │   ├── extractHandler.ts
    │   ├── handlerUtils
    │   │   └── actHandlerUtils.ts
    │   ├── observeHandler.ts
    │   └── operatorHandler.ts
    ├── index.ts
    ├── inference.ts
    ├── inferenceLogUtils.ts
    ├── llm
    │   ├── AnthropicClient.ts
    │   ├── CerebrasClient.ts
    │   ├── GoogleClient.ts
    │   ├── GroqClient.ts
    │   ├── LLMClient.ts
    │   ├── LLMProvider.ts
    │   └── OpenAIClient.ts
    ├── logger.ts
    ├── prompt.ts
    └── utils.ts
├── media
    ├── create-browser-app.gif
    └── github_demo.gif
├── package-lock.json
├── package.json
├── stagehand.config.ts
├── tsconfig.json
└── types
    ├── act.ts
    ├── agent.ts
    ├── api.ts
    ├── browser.ts
    ├── context.ts
    ├── evals.ts
    ├── evaluator.ts
    ├── inference.ts
    ├── llm.ts
    ├── log.ts
    ├── model.ts
    ├── operator.ts
    ├── page.ts
    ├── playwright.ts
    ├── stagehand.ts
    ├── stagehandApiErrors.ts
    ├── stagehandErrors.ts
    └── textannotation.ts


/.changeset/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://unpkg.com/@changesets/config@2.1.1/schema.json",
 3 |   "commit": false,
 4 |   "fixed": [],
 5 |   "linked": [],
 6 |   "baseBranch": "main",
 7 |   "updateInternalDependencies": "patch",
 8 |   "access": "public",
 9 |   "changelog": [
10 |     "@changesets/changelog-github",
11 |     {
12 |       "repo": "browserbase/stagehand"
13 |     }
14 |   ],
15 |   "snapshot": {
16 |     "useCalculatedVersion": true,
17 |     "prereleaseTemplate": "alpha-{commit}",
18 |     "tag": "alpha"
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/.changeset/cyan-symbols-double.md:
--------------------------------------------------------------------------------
1 | ---
2 | "@browserbasehq/stagehand": patch
3 | ---
4 | 
5 | Updated the list of OpenAI models (4.1, o3...)
6 | 


--------------------------------------------------------------------------------
/.changeset/empty-bugs-occur.md:
--------------------------------------------------------------------------------
1 | ---
2 | "@browserbasehq/stagehand": patch
3 | ---
4 | 
5 | wrap page.evaluate to make sure we have injected browser side scripts before calling them
6 | 


--------------------------------------------------------------------------------
/.changeset/fifty-cats-sell.md:
--------------------------------------------------------------------------------
1 | ---
2 | "@browserbasehq/stagehand": minor
3 | ---
4 | 
5 | extract links
6 | 


--------------------------------------------------------------------------------
/.changeset/floppy-experts-wash.md:
--------------------------------------------------------------------------------
1 | ---
2 | "@browserbasehq/stagehand": patch
3 | ---
4 | 
5 | remove unnecessary log
6 | 


--------------------------------------------------------------------------------
/.changeset/green-signs-live.md:
--------------------------------------------------------------------------------
1 | ---
2 | "@browserbasehq/stagehand": patch
3 | ---
4 | 
5 | use javsacript click instead of playwright
6 | 


--------------------------------------------------------------------------------
/.changeset/short-banks-sit.md:
--------------------------------------------------------------------------------
1 | ---
2 | "@browserbasehq/stagehand": patch
3 | ---
4 | 
5 | Fixed the schema input for Gemini's response model
6 | 


--------------------------------------------------------------------------------
/.changeset/solid-rice-admire.md:
--------------------------------------------------------------------------------
1 | ---
2 | "@browserbasehq/stagehand": minor
3 | ---
4 | 
5 | Added Gemini 2.5 Flash to Google supported models
6 | 


--------------------------------------------------------------------------------
/.changeset/sweet-glasses-hope.md:
--------------------------------------------------------------------------------
1 | ---
2 | "@browserbasehq/stagehand": patch
3 | ---
4 | 
5 | allow form filling when form is not top-most element
6 | 


--------------------------------------------------------------------------------
/.changeset/vast-vans-crash.md:
--------------------------------------------------------------------------------
1 | ---
2 | "@browserbasehq/stagehand": patch
3 | ---
4 | 
5 | Fixes a redundant unnecessary log
6 | 


--------------------------------------------------------------------------------
/.changeset/whole-yaks-cheat.md:
--------------------------------------------------------------------------------
1 | ---
2 | "@browserbasehq/stagehand": minor
3 | ---
4 | 
5 | Added a new class - Stagehand Evaluator - that wraps around a Stagehand object to determine whether a task is successful or not. Currently used for agent evals
6 | 


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
 1 | OPENAI_API_KEY=""
 2 | CEREBRAS_API_KEY=""
 3 | GROQ_API_KEY=""
 4 | BROWSERBASE_API_KEY=""
 5 | BRAINTRUST_API_KEY=""
 6 | ANTHROPIC_API_KEY=""
 7 | HEADLESS=false
 8 | ENABLE_CACHING=false
 9 | EVAL_MODELS="gpt-4o,claude-3-5-sonnet-latest"
10 | EXPERIMENTAL_EVAL_MODELS="gpt-4o,claude-3-5-sonnet-latest,o1-mini,o1-preview"
11 | EVAL_CATEGORIES="observe,act,combination,extract,experimental"
12 | STAGEHAND_API_URL="http://localhost:80"
13 | 


--------------------------------------------------------------------------------
/.github/pull_request_template:
--------------------------------------------------------------------------------
1 | # why
2 | 
3 | # what changed
4 | 
5 | # test plan
6 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Release
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | permissions:
 9 |   contents: write
10 |   pull-requests: write
11 | 
12 | concurrency: ${{ github.workflow }}-${{ github.ref }}
13 | 
14 | jobs:
15 |   release:
16 |     name: Release
17 |     runs-on: ubuntu-latest
18 |     steps:
19 |       - name: Checkout Repo
20 |         uses: actions/checkout@v3
21 | 
22 |       - name: Setup Node.js 20.x
23 |         uses: actions/setup-node@v3
24 |         with:
25 |           node-version: 20.x
26 |           registry-url: "https://registry.npmjs.org"
27 | 
28 |       - name: Install dependencies
29 |         run: |
30 |           rm -rf node_modules
31 |           rm -f package-lock.json
32 |           npm install
33 | 
34 |       - name: Build
35 |         run: npm run build
36 | 
37 |       - name: Create Release Pull Request or Publish to npm
38 |         id: changesets
39 |         uses: changesets/action@v1
40 |         with:
41 |           publish: npm run release
42 |         env:
43 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
44 |           NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
45 | 
46 |       - name: Publish Canary
47 |         if: github.ref == 'refs/heads/main'
48 |         run: |
49 |           npm config set //registry.npmjs.org/:_authToken=${NODE_AUTH_TOKEN}
50 |           git checkout main
51 |           npm run release-canary
52 |         env:
53 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
54 |           NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
55 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | node_modules/
 2 | /test-results/
 3 | /playwright-report/
 4 | /blob-report/
 5 | /playwright/.cache/
 6 | screenshot.png
 7 | .DS_STORE
 8 | .cache/
 9 | .env
10 | downloads/
11 | dist/
12 | evals/**/public
13 | lib/dom/build/
14 | evals/public
15 | *.tgz
16 | evals/playground.ts
17 | tmp/
18 | eval-summary.json
19 | pnpm-lock.yaml
20 | evals/deterministic/tests/BrowserContext/tmp-test.har
21 | 


--------------------------------------------------------------------------------
/.prettierignore:
--------------------------------------------------------------------------------
1 | pnpm-lock.yaml
2 | README.md
3 | **/*.json


--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {}
2 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |   "editor.defaultFormatter": "esbenp.prettier-vscode",
3 |   "editor.formatOnSave": true
4 | }
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Browserbase Inc.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/docs/media/chunks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browserbase/stagehand/74f9339bac3404078bfcc47fecda44cc4b0f3876/docs/media/chunks.png


--------------------------------------------------------------------------------
/docs/media/stagehand-playwright.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browserbase/stagehand/74f9339bac3404078bfcc47fecda44cc4b0f3876/docs/media/stagehand-playwright.png


--------------------------------------------------------------------------------
/eslint.config.mjs:
--------------------------------------------------------------------------------
 1 | import globals from "globals";
 2 | import pluginJs from "@eslint/js";
 3 | import tseslint from "typescript-eslint";
 4 | 
 5 | /** @type {import('eslint').Linter.Config[]} */
 6 | export default [
 7 |   { files: ["**/*.{js,mjs,cjs,ts}"] },
 8 |   { languageOptions: { globals: globals.browser } },
 9 |   { ignores: ["**/dist/**", "lib/dom/build/**"] },
10 |   pluginJs.configs.recommended,
11 |   ...tseslint.configs.recommended,
12 | ];
13 | 


--------------------------------------------------------------------------------
/evals/assets/cart.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="UTF-8" />
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
 6 |     <title>Document</title>
 7 |   </head>
 8 |   <body>
 9 |     <script>
10 |       function getQueryParam(param) {
11 |         const urlParams = new URLSearchParams(window.location.search);
12 |         return urlParams.get(param);
13 |       }
14 |       const item = getQueryParam("item");
15 |       document.addEventListener("DOMContentLoaded", function () {
16 |         document.getElementById("cartItem").textContent =
17 |           `Congratulations, you have 1 ${item} in your cart`;
18 |       });
19 |     </script>
20 |     <div id="cartItem"></div>
21 |   </body>
22 | </html>
23 | 


--------------------------------------------------------------------------------
/evals/assets/peeler.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="UTF-8" />
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
 6 |     <title>Document</title>
 7 |   </head>
 8 |   <body>
 9 |     <h1>Welcome to Our Page</h1>
10 | 
11 |     <div class="product-card">
12 |       <div class="product-info">
13 |         <h2>Knife Set</h2>
14 |         <p>
15 |           High-quality stainless steel knives for all your cooking needs.<a
16 |             >my stuff</a
17 |           >
18 |           more stuff
19 |         </p>
20 |       </div>
21 |       <button onclick="location.href='cart.html?item=B'">Add to cart</button>
22 |     </div>
23 |     <div class="product-card">
24 |       <div class="product-info">
25 |         <h2>Peeler</h2>
26 |         <p>The ultimate tool for peeling fruits and vegetables.</p>
27 |       </div>
28 |       <button onclick="location.href='cart.html?item=A'">Add to cart</button>
29 |     </div>
30 |     <a href="cart.html" aria-role="button">
31 |       <div>hi world</div>
32 |     </a>
33 |     <p>
34 |       Baseball evolved from older
35 |       <a href="/wiki/Bat-and-ball_games" title="Bat-and-ball games"
36 |         >bat-and-ball games</a
37 |       >
38 |       already being played in England by the mid-18th century. This game was
39 |       brought by immigrants to North America,
40 |       <a
41 |         href="/wiki/History_of_baseball_in_the_United_States"
42 |         title="History of baseball in the United States"
43 |         >where the modern version developed</a
44 |       >.
45 |     </p>
46 |   </body>
47 | </html>
48 | 


--------------------------------------------------------------------------------
/evals/deterministic/auxiliary/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browserbase/stagehand/74f9339bac3404078bfcc47fecda44cc4b0f3876/evals/deterministic/auxiliary/logo.png


--------------------------------------------------------------------------------
/evals/deterministic/bb.playwright.config.ts:
--------------------------------------------------------------------------------
 1 | import { defineConfig, devices } from "@playwright/test";
 2 | 
 3 | /**
 4 |  * See https://playwright.dev/docs/test-configuration.
 5 |  */
 6 | export default defineConfig({
 7 |   testDir: "./tests/browserbase",
 8 | 
 9 |   /* Fail the build on CI if you accidentally left test.only in the source code. */
10 |   /* Run tests in files in parallel */
11 |   fullyParallel: true,
12 |   /* Reporter to use. See https://playwright.dev/docs/test-reporters */
13 |   //   reporter: "html",
14 |   reporter: "line",
15 |   /* Retry on CI only */
16 |   retries: 2,
17 | 
18 |   /* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */
19 |   use: {
20 |     /* Collect trace when retrying the failed test. See https://playwright.dev/docs/trace-viewer */
21 |     trace: "on-first-retry",
22 |   },
23 | 
24 |   /* Configure projects for major browsers */
25 |   projects: [
26 |     {
27 |       name: "chromium",
28 |       use: { ...devices["Desktop Chrome"] },
29 |     },
30 |   ],
31 | });
32 | 


--------------------------------------------------------------------------------
/evals/deterministic/e2e.playwright.config.ts:
--------------------------------------------------------------------------------
 1 | import { defineConfig, devices } from "@playwright/test";
 2 | 
 3 | /**
 4 |  * See https://playwright.dev/docs/test-configuration.
 5 |  */
 6 | export default defineConfig({
 7 |   // Look in "tests" for test files...
 8 |   testDir: "./tests",
 9 |   // ...but ignore anything in "tests/browserbase & "tests/local"
10 |   testIgnore: ["**/browserbase/**", "**/local/**"],
11 | 
12 |   /* Fail the build on CI if you accidentally left test.only in the source code. */
13 |   /* Run tests in files in parallel */
14 |   fullyParallel: true,
15 |   /* Reporter to use. See https://playwright.dev/docs/test-reporters */
16 |   //   reporter: "html",
17 |   reporter: "line",
18 |   retries: 2,
19 | 
20 |   /* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */
21 |   use: {
22 |     /* Collect trace when retrying the failed test. See https://playwright.dev/docs/trace-viewer */
23 |     trace: "on-first-retry",
24 |   },
25 | 
26 |   /* Configure projects for major browsers */
27 |   projects: [
28 |     {
29 |       name: "chromium",
30 |       use: { ...devices["Desktop Chrome"] },
31 |     },
32 |   ],
33 | });
34 | 


--------------------------------------------------------------------------------
/evals/deterministic/local.playwright.config.ts:
--------------------------------------------------------------------------------
 1 | import { defineConfig, devices } from "@playwright/test";
 2 | 
 3 | /**
 4 |  * See https://playwright.dev/docs/test-configuration.
 5 |  */
 6 | export default defineConfig({
 7 |   testDir: "./tests/local",
 8 | 
 9 |   /* Maximum time one test can run for. */
10 |   timeout: 30 * 1000,
11 | 
12 |   /* Fail the build on CI if you accidentally left test.only in the source code. */
13 |   forbidOnly: !!process.env.CI,
14 | 
15 |   /* Run tests in files in parallel */
16 |   fullyParallel: false,
17 | 
18 |   /* Reporter to use */
19 |   reporter: "line",
20 | 
21 |   /* Retry on CI only */
22 |   retries: process.env.CI ? 2 : 0,
23 | 
24 |   /* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */
25 |   use: {
26 |     /* Collect trace when retrying the failed test. See https://playwright.dev/docs/trace-viewer */
27 |     trace: "on-first-retry",
28 |   },
29 | 
30 |   /* Configure projects for major browsers */
31 |   projects: [
32 |     {
33 |       name: "chromium",
34 |       use: { ...devices["Desktop Chrome"] },
35 |     },
36 |   ],
37 | });
38 | 


--------------------------------------------------------------------------------
/evals/deterministic/stagehand.config.ts:
--------------------------------------------------------------------------------
 1 | import { default as DefaultStagehandConfig } from "@/stagehand.config";
 2 | import type { ConstructorParams } from "@/dist";
 3 | import dotenv from "dotenv";
 4 | dotenv.config({ path: "../../.env" });
 5 | 
 6 | const StagehandConfig: ConstructorParams = {
 7 |   ...DefaultStagehandConfig,
 8 |   env: "LOCAL" /* Environment to run Stagehand in */,
 9 |   verbose: 1 /* Logging verbosity level (0=quiet, 1=normal, 2=verbose) */,
10 |   browserbaseSessionCreateParams: {
11 |     projectId: process.env.BROWSERBASE_PROJECT_ID,
12 |   },
13 |   enableCaching: false /* Enable caching functionality */,
14 |   localBrowserLaunchOptions: {
15 |     headless: true /* Run browser in headless mode */,
16 |   },
17 | };
18 | export default StagehandConfig;
19 | 


--------------------------------------------------------------------------------
/evals/deterministic/tests/BrowserContext/addInitScript.test.ts:
--------------------------------------------------------------------------------
 1 | import { test, expect } from "@playwright/test";
 2 | import { Stagehand } from "@/dist";
 3 | import StagehandConfig from "@/evals/deterministic/stagehand.config";
 4 | 
 5 | test.describe("StagehandContext - addInitScript", () => {
 6 |   test("should inject a script on the context before pages load", async () => {
 7 |     const stagehand = new Stagehand(StagehandConfig);
 8 |     await stagehand.init();
 9 | 
10 |     const context = stagehand.context;
11 | 
12 |     await context.addInitScript(() => {
13 |       const w = window as typeof window & {
14 |         __testContextScriptVar?: string;
15 |       };
16 |       w.__testContextScriptVar = "Hello from context.initScript!";
17 |     });
18 | 
19 |     const pageA = await context.newPage();
20 |     await pageA.goto("https://example.com");
21 | 
22 |     const resultA = await pageA.evaluate(() => {
23 |       const w = window as typeof window & {
24 |         __testContextScriptVar?: string;
25 |       };
26 |       return w.__testContextScriptVar;
27 |     });
28 |     expect(resultA).toBe("Hello from context.initScript!");
29 | 
30 |     const pageB = await context.newPage();
31 |     await pageB.goto("https://docs.browserbase.com");
32 | 
33 |     const resultB = await pageB.evaluate(() => {
34 |       const w = window as typeof window & {
35 |         __testContextScriptVar?: string;
36 |       };
37 |       return w.__testContextScriptVar;
38 |     });
39 |     expect(resultB).toBe("Hello from context.initScript!");
40 | 
41 |     await stagehand.close();
42 |   });
43 | });
44 | 


--------------------------------------------------------------------------------
/evals/deterministic/tests/BrowserContext/cookies.test.ts:
--------------------------------------------------------------------------------
 1 | import { test, expect } from "@playwright/test";
 2 | import { Stagehand } from "@/dist";
 3 | import StagehandConfig from "@/evals/deterministic/stagehand.config";
 4 | 
 5 | test.describe("StagehandContext - Cookies", () => {
 6 |   let stagehand: Stagehand;
 7 | 
 8 |   test.beforeEach(async () => {
 9 |     stagehand = new Stagehand(StagehandConfig);
10 |     await stagehand.init();
11 |   });
12 | 
13 |   test.afterEach(async () => {
14 |     await stagehand.close();
15 |   });
16 | 
17 |   test("should add cookies and retrieve them", async () => {
18 |     const context = stagehand.context; // This is the wrapped BrowserContext
19 |     const url = "https://example.com";
20 | 
21 |     await context.addCookies([
22 |       {
23 |         name: "myCookie",
24 |         value: "myValue",
25 |         domain: "example.com",
26 |         path: "/",
27 |         expires: Math.floor(Date.now() / 1000) + 3600,
28 |         httpOnly: false,
29 |         secure: false,
30 |         sameSite: "Lax",
31 |       },
32 |     ]);
33 | 
34 |     const cookies = await context.cookies(url);
35 |     expect(cookies.length).toBeGreaterThan(0);
36 | 
37 |     const myCookie = cookies.find((c) => c.name === "myCookie");
38 |     expect(myCookie).toBeDefined();
39 |     expect(myCookie?.value).toBe("myValue");
40 |   });
41 | 
42 |   test("should clear all cookies", async () => {
43 |     const context = stagehand.context;
44 |     const url = "https://example.com";
45 | 
46 |     await context.addCookies([
47 |       {
48 |         name: "myOtherCookie",
49 |         value: "anotherValue",
50 |         domain: "example.com",
51 |         path: "/",
52 |         expires: Math.floor(Date.now() / 1000) + 3600,
53 |         httpOnly: false,
54 |         secure: false,
55 |         sameSite: "Lax",
56 |       },
57 |     ]);
58 | 
59 |     const cookiesBefore = await context.cookies(url);
60 |     const found = cookiesBefore.some((c) => c.name === "myOtherCookie");
61 |     expect(found).toBe(true);
62 | 
63 |     await context.clearCookies();
64 | 
65 |     const cookiesAfter = await context.cookies(url);
66 |     const stillFound = cookiesAfter.some((c) => c.name === "myOtherCookie");
67 |     expect(stillFound).toBe(false);
68 |   });
69 | });
70 | 


--------------------------------------------------------------------------------
/evals/deterministic/tests/Errors/apiKeyError.test.ts:
--------------------------------------------------------------------------------
 1 | import { test, expect } from "@playwright/test";
 2 | import { Stagehand } from "@/dist";
 3 | import StagehandConfig from "@/evals/deterministic/stagehand.config";
 4 | import { z } from "zod";
 5 | 
 6 | test.describe("API key/LLMClient error", () => {
 7 |   test("Should confirm that we get an error if we call extract without LLM API key or LLMClient", async () => {
 8 |     const stagehand = new Stagehand(StagehandConfig);
 9 |     await stagehand.init();
10 |     await stagehand.page.goto("https://docs.browserbase.com/introduction");
11 | 
12 |     let errorThrown: Error | null = null;
13 | 
14 |     try {
15 |       await stagehand.page.extract({
16 |         instruction:
17 |           "From the introduction page, extract the explanation of what Browserbase is.",
18 |         schema: z.object({
19 |           stars: z.string().describe("the explanation of what Browserbase is"),
20 |         }),
21 |       });
22 |     } catch (error) {
23 |       errorThrown = error as Error;
24 |     }
25 | 
26 |     expect(errorThrown).toBeInstanceOf(Error);
27 |     expect(errorThrown?.message).toContain(
28 |       "No LLM API key or LLM Client configured",
29 |     );
30 | 
31 |     await stagehand.close();
32 |   });
33 | 
34 |   test("Should confirm that we get an error if we call act without LLM API key or LLMClient", async () => {
35 |     const stagehand = new Stagehand(StagehandConfig);
36 |     await stagehand.init();
37 |     await stagehand.page.goto("https://docs.browserbase.com/introduction");
38 | 
39 |     let errorThrown: Error | null = null;
40 | 
41 |     try {
42 |       await stagehand.page.act({
43 |         action: "Click on the 'Quickstart' section",
44 |       });
45 |     } catch (error) {
46 |       errorThrown = error as Error;
47 |     }
48 | 
49 |     expect(errorThrown).toBeInstanceOf(Error);
50 |     expect(errorThrown?.message).toContain(
51 |       "No LLM API key or LLM Client configured",
52 |     );
53 | 
54 |     await stagehand.close();
55 |   });
56 | 
57 |   test("Should confirm that we get an error if we call observe without LLM API key or LLMClient", async () => {
58 |     const stagehand = new Stagehand(StagehandConfig);
59 |     await stagehand.init();
60 |     await stagehand.page.goto("https://docs.browserbase.com/introduction");
61 | 
62 |     let errorThrown: Error | null = null;
63 | 
64 |     try {
65 |       await stagehand.page.observe();
66 |     } catch (error) {
67 |       errorThrown = error as Error;
68 |     }
69 | 
70 |     expect(errorThrown).toBeInstanceOf(Error);
71 |     expect(errorThrown?.message).toContain(
72 |       "No LLM API key or LLM Client configured",
73 |     );
74 | 
75 |     await stagehand.close();
76 |   });
77 | });
78 | 


--------------------------------------------------------------------------------
/evals/deterministic/tests/browserbase/downloads.test.ts:
--------------------------------------------------------------------------------
 1 | import { test, expect } from "@playwright/test";
 2 | import AdmZip from "adm-zip";
 3 | import StagehandConfig from "@/evals/deterministic/stagehand.config";
 4 | import { Stagehand } from "@/dist";
 5 | import Browserbase from "@browserbasehq/sdk";
 6 | 
 7 | const downloadRe = /sandstorm-(\d{13})+\.mp3/;
 8 | 
 9 | test("Downloads", async () => {
10 |   const stagehand = new Stagehand(StagehandConfig);
11 |   await stagehand.init();
12 |   const page = stagehand.page;
13 |   const context = stagehand.context;
14 | 
15 |   const client = await context.newCDPSession(page);
16 |   await client.send("Browser.setDownloadBehavior", {
17 |     behavior: "allow",
18 |     // `downloadPath` gets appended to the browser's default download directory.
19 |     // set to "downloads", it ends up being "/app/apps/browser/downloads/<file>".
20 |     downloadPath: "downloads",
21 |     eventsEnabled: true,
22 |   });
23 | 
24 |   await page.goto("https://browser-tests-alpha.vercel.app/api/download-test");
25 | 
26 |   const [download] = await Promise.all([
27 |     page.waitForEvent("download"),
28 |     page.locator("#download").click(),
29 |   ]);
30 | 
31 |   const downloadError = await download.failure();
32 | 
33 |   await stagehand.close();
34 | 
35 |   if (downloadError !== null) {
36 |     throw new Error(
37 |       `Download for session ${stagehand.browserbaseSessionID} failed: ${downloadError}`,
38 |     );
39 |   }
40 | 
41 |   expect(async () => {
42 |     const bb = new Browserbase();
43 |     const zipBuffer = await bb.sessions.downloads.list(
44 |       stagehand.browserbaseSessionID,
45 |     );
46 |     if (!zipBuffer) {
47 |       throw new Error(
48 |         `Download buffer is empty for session ${stagehand.browserbaseSessionID}`,
49 |       );
50 |     }
51 | 
52 |     const zip = new AdmZip(Buffer.from(await zipBuffer.arrayBuffer()));
53 |     const zipEntries = zip.getEntries();
54 |     const mp3Entry = zipEntries.find((entry) =>
55 |       downloadRe.test(entry.entryName),
56 |     );
57 | 
58 |     if (!mp3Entry) {
59 |       throw new Error(
60 |         `Session ${stagehand.browserbaseSessionID} is missing a file matching "${downloadRe.toString()}" in its zip entries: ${JSON.stringify(zipEntries.map((entry) => entry.entryName))}`,
61 |       );
62 |     }
63 | 
64 |     const expectedFileSize = 6137541;
65 |     expect(mp3Entry.header.size).toBe(expectedFileSize);
66 |   }).toPass({
67 |     timeout: 30_000,
68 |   });
69 | });
70 | 


--------------------------------------------------------------------------------
/evals/deterministic/tests/browserbase/sessions.test.ts:
--------------------------------------------------------------------------------
 1 | import { test, expect } from "@playwright/test";
 2 | import { Stagehand } from "@/dist";
 3 | import StagehandConfig from "@/evals/deterministic/stagehand.config";
 4 | import Browserbase from "@browserbasehq/sdk";
 5 | 
 6 | test.describe("Browserbase Sessions", () => {
 7 |   let browserbase: Browserbase;
 8 |   let sessionId: string;
 9 |   let bigStagehand: Stagehand;
10 | 
11 |   test.beforeAll(async () => {
12 |     browserbase = new Browserbase({
13 |       apiKey: process.env.BROWSERBASE_API_KEY,
14 |     });
15 |     bigStagehand = new Stagehand({
16 |       ...StagehandConfig,
17 |       env: "BROWSERBASE",
18 |       browserbaseSessionCreateParams: {
19 |         projectId: process.env.BROWSERBASE_PROJECT_ID,
20 |         keepAlive: true,
21 |       },
22 |     });
23 |     await bigStagehand.init();
24 |     await bigStagehand.page.goto(
25 |       "https://docs.stagehand.dev/get_started/introduction",
26 |     );
27 |     sessionId = bigStagehand.browserbaseSessionID;
28 |     if (!sessionId) {
29 |       throw new Error("Failed to get browserbase session ID");
30 |     }
31 |   });
32 |   test.afterAll(async () => {
33 |     await bigStagehand.close();
34 |   });
35 |   test("resumes a session via sessionId", async () => {
36 |     const stagehand = new Stagehand({
37 |       ...StagehandConfig,
38 |       env: "BROWSERBASE",
39 |       browserbaseSessionID: sessionId,
40 |     });
41 |     await stagehand.init();
42 | 
43 |     const page = stagehand.page;
44 | 
45 |     expect(page.url()).toBe(
46 |       "https://docs.stagehand.dev/get_started/introduction",
47 |     );
48 |     await stagehand.close();
49 |   });
50 |   test("resumes a session via CDP URL", async () => {
51 |     const session = await browserbase.sessions.retrieve(sessionId);
52 |     const stagehand = new Stagehand({
53 |       ...StagehandConfig,
54 |       env: "LOCAL",
55 |       localBrowserLaunchOptions: {
56 |         headless: true,
57 |         cdpUrl: session.connectUrl,
58 |       },
59 |     });
60 |     await stagehand.init();
61 |     const page = stagehand.page;
62 | 
63 |     expect(page.url()).toBe(
64 |       "https://docs.stagehand.dev/get_started/introduction",
65 |     );
66 |   });
67 | });
68 | 


--------------------------------------------------------------------------------
/evals/deterministic/tests/browserbase/uploads.test.ts:
--------------------------------------------------------------------------------
 1 | import { join } from "node:path";
 2 | import { test, expect } from "@playwright/test";
 3 | import { Stagehand } from "@/dist";
 4 | import StagehandConfig from "@/evals/deterministic/stagehand.config";
 5 | 
 6 | test.describe("Playwright Upload", () => {
 7 |   let stagehand: Stagehand;
 8 | 
 9 |   test.beforeAll(async () => {
10 |     stagehand = new Stagehand(StagehandConfig);
11 |     await stagehand.init();
12 |   });
13 | 
14 |   test.afterAll(async () => {
15 |     await stagehand.close();
16 |   });
17 | 
18 |   test("uploads a file", async () => {
19 |     const page = stagehand.page;
20 |     await page.goto("https://browser-tests-alpha.vercel.app/api/upload-test");
21 | 
22 |     const fileInput = page.locator("#fileUpload");
23 |     await fileInput.setInputFiles(
24 |       join(__dirname, "../..", "auxiliary", "logo.png"),
25 |     );
26 | 
27 |     const fileNameSpan = page.locator("#fileName");
28 |     const fileName = await fileNameSpan.innerText();
29 | 
30 |     const fileSizeSpan = page.locator("#fileSize");
31 |     const fileSize = Number(await fileSizeSpan.innerText());
32 | 
33 |     expect(fileName).toBe("logo.png");
34 |     expect(fileSize).toBeGreaterThan(0);
35 |   });
36 | });
37 | 


--------------------------------------------------------------------------------
/evals/deterministic/tests/page/addInitScript.test.ts:
--------------------------------------------------------------------------------
 1 | import { test, expect } from "@playwright/test";
 2 | import { Stagehand } from "@/dist";
 3 | import StagehandConfig from "@/evals/deterministic/stagehand.config";
 4 | 
 5 | test.describe("StagehandPage - addInitScript", () => {
 6 |   test("should inject a script before the page loads", async () => {
 7 |     const stagehand = new Stagehand(StagehandConfig);
 8 |     await stagehand.init();
 9 | 
10 |     const page = stagehand.page;
11 | 
12 |     await page.addInitScript(() => {
13 |       const w = window as typeof window & {
14 |         __testInitScriptVar?: string;
15 |       };
16 |       w.__testInitScriptVar = "Hello from init script!";
17 |     });
18 | 
19 |     await page.goto("https://example.com");
20 | 
21 |     const result = await page.evaluate(() => {
22 |       const w = window as typeof window & {
23 |         __testInitScriptVar?: string;
24 |       };
25 |       return w.__testInitScriptVar;
26 |     });
27 |     expect(result).toBe("Hello from init script!");
28 | 
29 |     await page.goto("https://docs.browserbase.com/");
30 |     const resultAfterNavigation = await page.evaluate(() => {
31 |       const w = window as typeof window & {
32 |         __testInitScriptVar?: string;
33 |       };
34 |       return w.__testInitScriptVar;
35 |     });
36 |     expect(resultAfterNavigation).toBe("Hello from init script!");
37 | 
38 |     await stagehand.close();
39 |   });
40 | 
41 |   test("checks if init scripts are re-added and available even if they've been deleted", async () => {
42 |     const stagehand = new Stagehand(StagehandConfig);
43 |     await stagehand.init();
44 | 
45 |     const page = stagehand.page;
46 |     await page.goto(
47 |       "https://browserbase.github.io/stagehand-eval-sites/sites/aigrant/",
48 |     );
49 | 
50 |     // delete the __stagehandInjected flag, and delete the
51 |     // getScrollableElementXpaths function
52 |     await page.evaluate(() => {
53 |       delete window.getScrollableElementXpaths;
54 |       delete window.__stagehandInjected;
55 |     });
56 | 
57 |     // attempt to call the getScrollableElementXpaths function
58 |     // which we previously deleted. page.evaluate should realize
59 |     // its been deleted and re-inject it
60 |     const xpaths = await page.evaluate(() => {
61 |       return window.getScrollableElementXpaths();
62 |     });
63 | 
64 |     await stagehand.close();
65 |     // this is the only scrollable element on the page
66 |     expect(xpaths).toContain("/html");
67 |   });
68 | });
69 | 


--------------------------------------------------------------------------------
/evals/deterministic/tests/page/addTags.test.ts:
--------------------------------------------------------------------------------
 1 | import { test, expect } from "@playwright/test";
 2 | import { Stagehand } from "@/dist";
 3 | import StagehandConfig from "@/evals/deterministic/stagehand.config";
 4 | 
 5 | test.describe("StagehandPage - addScriptTag and addStyleTag", () => {
 6 |   let stagehand: Stagehand;
 7 | 
 8 |   test.beforeAll(async () => {
 9 |     stagehand = new Stagehand(StagehandConfig);
10 |     await stagehand.init();
11 |   });
12 | 
13 |   test.afterAll(async () => {
14 |     await stagehand.close();
15 |   });
16 | 
17 |   test("should inject a script tag and have access to the defined function", async () => {
18 |     const { page } = stagehand;
19 | 
20 |     await page.setContent(`
21 |       <html>
22 |       <body>
23 |         <h1 id="greeting">Hello, world!</h1>
24 |       </body>
25 |       </html>
26 |     `);
27 | 
28 |     await page.addScriptTag({
29 |       content: `
30 |         window.sayHello = function() {
31 |           document.getElementById("greeting").textContent = "Hello from injected script!";
32 |         }
33 |       `,
34 |     });
35 | 
36 |     await page.evaluate(() => {
37 |       const w = window as typeof window & {
38 |         sayHello?: () => void;
39 |       };
40 |       w.sayHello?.();
41 |     });
42 | 
43 |     const text = await page.locator("#greeting").textContent();
44 |     expect(text).toBe("Hello from injected script!");
45 |   });
46 | 
47 |   test("should inject a style tag and apply styles", async () => {
48 |     const { page } = stagehand;
49 | 
50 |     await page.setContent(`
51 |       <html>
52 |       <body>
53 |         <div id="styledDiv">Some text</div>
54 |       </body>
55 |       </html>
56 |     `);
57 | 
58 |     await page.addStyleTag({
59 |       content: `
60 |         #styledDiv {
61 |           color: red;
62 |           font-weight: bold;
63 |         }
64 |       `,
65 |     });
66 | 
67 |     const color = await page.evaluate(() => {
68 |       const el = document.getElementById("styledDiv");
69 |       return window.getComputedStyle(el!).color;
70 |     });
71 |     expect(color).toBe("rgb(255, 0, 0)");
72 | 
73 |     const fontWeight = await page.evaluate(() => {
74 |       const el = document.getElementById("styledDiv");
75 |       return window.getComputedStyle(el!).fontWeight;
76 |     });
77 |     expect(["bold", "700"]).toContain(fontWeight);
78 |   });
79 | });
80 | 


--------------------------------------------------------------------------------
/evals/deterministic/tests/page/bringToFront.test.ts:
--------------------------------------------------------------------------------
 1 | import { test, expect } from "@playwright/test";
 2 | import { Stagehand } from "@/dist";
 3 | import StagehandConfig from "@/evals/deterministic/stagehand.config";
 4 | 
 5 | test.describe("StagehandPage - bringToFront", () => {
 6 |   test("should bring a background page to the front and allow further actions", async () => {
 7 |     const stagehand = new Stagehand(StagehandConfig);
 8 |     await stagehand.init();
 9 | 
10 |     const { page: page1 } = stagehand;
11 | 
12 |     const page2 = await stagehand.context.newPage();
13 |     await page2.goto("https://example.com");
14 |     const page2Title = await page2.title();
15 |     console.log("Page2 Title:", page2Title);
16 | 
17 |     await page1.goto("https://www.google.com");
18 |     const page1TitleBefore = await page1.title();
19 |     console.log("Page1 Title before:", page1TitleBefore);
20 | 
21 |     await page1.bringToFront();
22 | 
23 |     await page1.goto("https://docs.browserbase.com");
24 |     const page1TitleAfter = await page1.title();
25 |     console.log("Page1 Title after:", page1TitleAfter);
26 | 
27 |     await page2.bringToFront();
28 |     const page2URLBefore = page2.url();
29 |     console.log("Page2 URL before navigation:", page2URLBefore);
30 | 
31 |     await stagehand.close();
32 | 
33 |     expect(page1TitleBefore).toContain("Google");
34 |     expect(page1TitleAfter).toContain("Browserbase");
35 |     expect(page2Title).toContain("Example Domain");
36 |   });
37 | });
38 | 


--------------------------------------------------------------------------------
/evals/deterministic/tests/page/content.test.ts:
--------------------------------------------------------------------------------
 1 | import { test, expect } from "@playwright/test";
 2 | import { Stagehand } from "@/dist";
 3 | import StagehandConfig from "@/evals/deterministic/stagehand.config";
 4 | 
 5 | test.describe("StagehandPage - content", () => {
 6 |   test("should retrieve the full HTML content of the page", async () => {
 7 |     const stagehand = new Stagehand(StagehandConfig);
 8 |     await stagehand.init();
 9 | 
10 |     const page = stagehand.page;
11 |     await page.goto("https://example.com");
12 |     const html = await page.content();
13 |     expect(html).toContain("<title>Example Domain</title>");
14 |     expect(html).toContain("<h1>Example Domain</h1>");
15 | 
16 |     await stagehand.close();
17 |   });
18 | });
19 | 


--------------------------------------------------------------------------------
/evals/deterministic/tests/page/evaluate.test.ts:
--------------------------------------------------------------------------------
 1 | import { test, expect } from "@playwright/test";
 2 | import { Stagehand } from "@/dist";
 3 | import StagehandConfig from "@/evals/deterministic/stagehand.config";
 4 | 
 5 | test.describe("StagehandPage - JavaScript Evaluation", () => {
 6 |   test("can evaluate JavaScript in the page context", async () => {
 7 |     const stagehand = new Stagehand(StagehandConfig);
 8 |     await stagehand.init();
 9 | 
10 |     const page = stagehand.page;
11 | 
12 |     await page.goto("https://example.com");
13 | 
14 |     const sum = await page.evaluate(() => 2 + 2);
15 |     expect(sum).toBe(4);
16 | 
17 |     const pageTitle = await page.evaluate(() => document.title);
18 |     expect(pageTitle).toMatch(/example/i);
19 | 
20 |     const obj = await page.evaluate(() => {
21 |       return {
22 |         message: "Hello from the browser",
23 |         userAgent: navigator.userAgent,
24 |       };
25 |     });
26 |     expect(obj).toHaveProperty("message", "Hello from the browser");
27 |     expect(obj.userAgent).toBeDefined();
28 | 
29 |     await stagehand.close();
30 |   });
31 | });
32 | 


--------------------------------------------------------------------------------
/evals/deterministic/tests/page/expose.test.ts:
--------------------------------------------------------------------------------
 1 | import { test, expect } from "@playwright/test";
 2 | import { Stagehand } from "@/dist";
 3 | import StagehandConfig from "@/evals/deterministic/stagehand.config";
 4 | 
 5 | test.describe("StagehandPage - evaluateHandle, exposeBinding, exposeFunction", () => {
 6 |   let stagehand: Stagehand;
 7 | 
 8 |   test.beforeAll(async () => {
 9 |     stagehand = new Stagehand(StagehandConfig);
10 |     await stagehand.init();
11 |   });
12 | 
13 |   test.afterAll(async () => {
14 |     await stagehand.close();
15 |   });
16 | 
17 |   test("demonstrates evaluateHandle, exposeBinding, and exposeFunction", async () => {
18 |     const { page } = stagehand;
19 | 
20 |     await page.setContent(`
21 |       <html>
22 |         <body>
23 |           <div id="myDiv">Initial Text</div>
24 |         </body>
25 |       </html>
26 |     `);
27 | 
28 |     const divHandle = await page.evaluateHandle(() => {
29 |       return document.getElementById("myDiv");
30 |     });
31 |     await divHandle.evaluate((div, newText) => {
32 |       div.textContent = newText;
33 |     }, "Text updated via evaluateHandle");
34 | 
35 |     const text = await page.locator("#myDiv").textContent();
36 |     expect(text).toBe("Text updated via evaluateHandle");
37 | 
38 |     await page.exposeBinding("myBinding", async (source, arg: string) => {
39 |       console.log("myBinding called from page with arg:", arg);
40 |       return `Node responded with: I got your message: "${arg}"`;
41 |     });
42 | 
43 |     const responseFromBinding = await page.evaluate(async () => {
44 |       const w = window as typeof window & {
45 |         myBinding?: (arg: string) => Promise<string>;
46 |       };
47 |       return w.myBinding?.("Hello from the browser");
48 |     });
49 |     expect(responseFromBinding).toMatch(/I got your message/);
50 | 
51 |     await page.exposeFunction("addNumbers", (a: number, b: number) => {
52 |       return a + b;
53 |     });
54 | 
55 |     const sum = await page.evaluate(async () => {
56 |       const w = window as typeof window & {
57 |         addNumbers?: (a: number, b: number) => number;
58 |       };
59 |       return w.addNumbers?.(3, 7);
60 |     });
61 |     expect(sum).toBe(10);
62 |   });
63 | });
64 | 


--------------------------------------------------------------------------------
/evals/deterministic/tests/page/frames.test.ts:
--------------------------------------------------------------------------------
 1 | import { test, expect } from "@playwright/test";
 2 | import { Stagehand } from "@/dist";
 3 | import StagehandConfig from "@/evals/deterministic/stagehand.config";
 4 | 
 5 | test.describe("StagehandPage - frame operations", () => {
 6 |   let stagehand: Stagehand;
 7 | 
 8 |   test.beforeAll(async () => {
 9 |     stagehand = new Stagehand(StagehandConfig);
10 |     await stagehand.init();
11 |   });
12 | 
13 |   test.afterAll(async () => {
14 |     await stagehand.close();
15 |   });
16 | 
17 |   test("should use page.mainFrame(), page.frames(), page.frame(), and page.frameLocator()", async () => {
18 |     const { page } = stagehand;
19 | 
20 |     await page.setContent(`
21 |       <html>
22 |         <body>
23 |           <iframe
24 |             name="frame-one"
25 |             srcdoc="<html><body><h1>Hello from Frame 1</h1></body></html>">
26 |           </iframe>
27 | 
28 |           <iframe
29 |             name="frame-two"
30 |             srcdoc="<html><body><h1>Hello from Frame 2</h1></body></html>">
31 |           </iframe>
32 |         </body>
33 |       </html>
34 |     `);
35 | 
36 |     await page.waitForSelector('iframe[name="frame-one"]');
37 |     await page.waitForSelector('iframe[name="frame-two"]');
38 | 
39 |     const frames = page.frames();
40 |     console.log(
41 |       "All frames found:",
42 |       frames.map((f) => f.name()),
43 |     );
44 |     expect(frames).toHaveLength(3);
45 | 
46 |     const mainFrame = page.mainFrame();
47 |     console.log("Main frame name:", mainFrame.name());
48 |     expect(mainFrame.name()).toBe("");
49 | 
50 |     const frameOne = page.frame({ name: "frame-one" });
51 |     expect(frameOne).not.toBeNull();
52 | 
53 |     const frameOneText = await frameOne?.locator("h1").textContent();
54 |     expect(frameOneText).toBe("Hello from Frame 1");
55 | 
56 |     const frameTwoLocator = page.frameLocator("iframe[name='frame-two']");
57 |     const frameTwoText = await frameTwoLocator.locator("h1").textContent();
58 |     expect(frameTwoText).toBe("Hello from Frame 2");
59 | 
60 |     const frameTwo = page.frame({ name: "frame-two" });
61 |     expect(frameTwo).not.toBeNull();
62 | 
63 |     const frameTwoTextAgain = await frameTwo?.locator("h1").textContent();
64 |     expect(frameTwoTextAgain).toBe("Hello from Frame 2");
65 |   });
66 | });
67 | 


--------------------------------------------------------------------------------
/evals/deterministic/tests/page/getBy.test.ts:
--------------------------------------------------------------------------------
 1 | import { test, expect } from "@playwright/test";
 2 | import { Stagehand } from "@/dist";
 3 | import StagehandConfig from "@/evals/deterministic/stagehand.config";
 4 | 
 5 | test.describe("StagehandPage - Built-in locators", () => {
 6 |   let stagehand: Stagehand;
 7 | 
 8 |   test.beforeAll(async () => {
 9 |     stagehand = new Stagehand(StagehandConfig);
10 |     await stagehand.init();
11 |   });
12 | 
13 |   test.afterAll(async () => {
14 |     await stagehand.close();
15 |   });
16 | 
17 |   test("demonstrates getByAltText, getByLabel, getByPlaceholder, getByRole, getByTestId, getByText, getByTitle", async () => {
18 |     const { page } = stagehand;
19 |     await page.setContent(`
20 |       <html>
21 |         <body>
22 |           <img src="avatar.png" alt="Profile picture" />
23 |           <label for="username">Username</label>
24 |           <input id="username" type="text" />
25 |           <input placeholder="Enter your email" type="email" />
26 |           <button>Sign in</button>
27 |           <div data-testid="greeting">Hello World!</div>
28 |           <p>This is some descriptive text on the page.</p>
29 |           <h1 title="A heading for the page">Site Title</h1>
30 |         </body>
31 |       </html>
32 |     `);
33 |     const image = page.getByAltText("Profile picture");
34 |     await expect(image).toBeVisible();
35 |     const usernameInput = page.getByLabel("Username");
36 |     await expect(usernameInput).toBeVisible();
37 |     const emailInput = page.getByPlaceholder("Enter your email");
38 |     await expect(emailInput).toBeVisible();
39 |     const signInButton = page.getByRole("button", { name: "Sign in" });
40 |     await expect(signInButton).toBeVisible();
41 |     const greetingDiv = page.getByTestId("greeting");
42 |     await expect(greetingDiv).toHaveText("Hello World!");
43 |     const descriptiveText = page.getByText(
44 |       "This is some descriptive text on the page.",
45 |     );
46 |     await expect(descriptiveText).toBeVisible();
47 |     const heading = page.getByTitle("A heading for the page");
48 |     await expect(heading).toHaveText("Site Title");
49 |   });
50 | });
51 | 


--------------------------------------------------------------------------------
/evals/deterministic/tests/page/navigation.test.ts:
--------------------------------------------------------------------------------
 1 | import { test, expect } from "@playwright/test";
 2 | import { Stagehand } from "@/dist";
 3 | import StagehandConfig from "@/evals/deterministic/stagehand.config";
 4 | 
 5 | test.describe("StagehandPage - Navigation", () => {
 6 |   test("should navigate back and forward between pages", async () => {
 7 |     const stagehand = new Stagehand(StagehandConfig);
 8 |     await stagehand.init();
 9 | 
10 |     const page = stagehand.page;
11 | 
12 |     await page.goto("https://example.com");
13 |     expect(page.url()).toBe("https://example.com/");
14 | 
15 |     await page.goto("https://docs.browserbase.com/introduction");
16 |     expect(page.url()).toBe("https://docs.browserbase.com/introduction");
17 | 
18 |     await page.goBack();
19 |     expect(page.url()).toBe("https://example.com/");
20 | 
21 |     await page.goForward();
22 |     expect(page.url()).toBe("https://docs.browserbase.com/introduction");
23 | 
24 |     await stagehand.close();
25 |   });
26 | });
27 | 


--------------------------------------------------------------------------------
/evals/deterministic/tests/page/pageContext.test.ts:
--------------------------------------------------------------------------------
 1 | import { test, expect } from "@playwright/test";
 2 | import { Stagehand } from "@/dist";
 3 | import StagehandConfig from "@/evals/deterministic/stagehand.config";
 4 | 
 5 | test.describe("StagehandPage - page.context()", () => {
 6 |   let stagehand: Stagehand;
 7 | 
 8 |   test.beforeEach(async () => {
 9 |     stagehand = new Stagehand(StagehandConfig);
10 |     await stagehand.init();
11 |   });
12 | 
13 |   test.afterEach(async () => {
14 |     if (stagehand) {
15 |       try {
16 |         await stagehand.close();
17 |       } catch (error) {
18 |         console.error("[afterEach] Error during stagehand.close():", error);
19 |       }
20 |     } else {
21 |       console.log("[afterEach] Stagehand was not defined, skipping close().");
22 |     }
23 |   });
24 | 
25 |   test("should confirm page.context() and stagehand.context share state", async () => {
26 |     const page = stagehand.page;
27 |     const stagehandContext = stagehand.context;
28 |     const pageContext = page.context();
29 | 
30 |     await pageContext.addCookies([
31 |       {
32 |         name: "stagehandTestCookie",
33 |         value: "hello-stagehand",
34 |         domain: "example.com",
35 |         path: "/",
36 |         expires: Math.floor(Date.now() / 1000) + 3600, // 1 hour
37 |         httpOnly: false,
38 |         secure: false,
39 |         sameSite: "Lax",
40 |       },
41 |     ]);
42 | 
43 |     const cookies = await stagehandContext.cookies("https://example.com");
44 | 
45 |     const testCookie = cookies.find((c) => c.name === "stagehandTestCookie");
46 |     expect(testCookie).toBeDefined();
47 |     expect(testCookie?.value).toBe("hello-stagehand");
48 | 
49 |     const extraPage = await pageContext.newPage();
50 |     await extraPage.goto("https://example.com");
51 |     const contextPages = stagehandContext.pages();
52 | 
53 |     // The newly created page should be recognized by stagehandContext as well.
54 |     const foundExtraPage = contextPages.find(
55 |       (p) => p.url() === "https://example.com/",
56 |     );
57 |     expect(foundExtraPage).toBeDefined();
58 |   });
59 | });
60 | 


--------------------------------------------------------------------------------
/evals/deterministic/tests/page/reload.test.ts:
--------------------------------------------------------------------------------
 1 | import { test, expect } from "@playwright/test";
 2 | import { Stagehand } from "@/dist";
 3 | import StagehandConfig from "@/evals/deterministic/stagehand.config";
 4 | 
 5 | test.describe("StagehandPage - Reload", () => {
 6 |   test("should reload the page and reset page state", async () => {
 7 |     const stagehand = new Stagehand(StagehandConfig);
 8 |     await stagehand.init();
 9 | 
10 |     const page = stagehand.page;
11 |     await page.goto("https://docs.browserbase.com/");
12 | 
13 |     await page.evaluate(() => {
14 |       const w = window as typeof window & {
15 |         __testReloadMarker?: string;
16 |       };
17 |       w.__testReloadMarker = "Hello Reload!";
18 |     });
19 | 
20 |     const markerBeforeReload = await page.evaluate(() => {
21 |       const w = window as typeof window & {
22 |         __testReloadMarker?: string;
23 |       };
24 |       return w.__testReloadMarker;
25 |     });
26 |     expect(markerBeforeReload).toBe("Hello Reload!");
27 | 
28 |     await page.reload();
29 | 
30 |     const markerAfterReload = await page.evaluate(() => {
31 |       const w = window as typeof window & {
32 |         __testReloadMarker?: string;
33 |       };
34 |       return w.__testReloadMarker;
35 |     });
36 |     expect(markerAfterReload).toBeUndefined();
37 | 
38 |     await stagehand.close();
39 |   });
40 | });
41 | 


--------------------------------------------------------------------------------
/evals/env.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Determine the current environment in which the evaluations are running:
 3 |  * - BROWSERBASE or LOCAL
 4 |  *
 5 |  * The environment is read from the EVAL_ENV environment variable.
 6 |  */
 7 | export const env: "BROWSERBASE" | "LOCAL" =
 8 |   process.env.EVAL_ENV?.toLowerCase() === "browserbase"
 9 |     ? "BROWSERBASE"
10 |     : "LOCAL";
11 | 
12 | /**
13 |  * Enable or disable caching based on the EVAL_ENABLE_CACHING environment variable.
14 |  * Caching may improve performance by not re-fetching or re-computing certain results.
15 |  * By default, caching is disabled unless explicitly enabled.
16 |  */
17 | export const enableCaching =
18 |   process.env.EVAL_ENABLE_CACHING?.toLowerCase() === "true";
19 | 


--------------------------------------------------------------------------------
/evals/scoring.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * This file implements scoring functions needed by braintrust.
 3 |  */
 4 | 
 5 | import { EvalArgs, EvalInput, EvalResult } from "@/types/evals";
 6 | 
 7 | /**
 8 |  * Scoring function: exactMatch
 9 |  * Given the arguments (including input, output, and expected result),
10 |  * this returns a score of 1 if the result matches the expectation, and 0 otherwise.
11 |  *
12 |  * If "expected" is true, it checks if the output indicates success.
13 |  * If "expected" is a boolean or an object with _success flag,
14 |  * it checks if output is exactly that success condition.
15 |  */
16 | export function exactMatch(
17 |   args: EvalArgs<EvalInput, boolean | { _success: boolean }, unknown>,
18 | ): EvalResult {
19 |   console.log(`Task "${args.input.name}" returned: ${args.output}`);
20 | 
21 |   const expected = args.expected ?? true;
22 |   if (expected === true) {
23 |     // If we expect a success (true), then we check the output's _success flag.
24 |     return {
25 |       name: "Exact match",
26 |       score:
27 |         typeof args.output === "boolean"
28 |           ? args.output
29 |             ? 1
30 |             : 0
31 |           : args.output._success
32 |             ? 1
33 |             : 0,
34 |     };
35 |   }
36 | 
37 |   // If expected is not true, just directly compare the output to expected.
38 |   return {
39 |     name: "Exact match",
40 |     score: args.output === expected ? 1 : 0,
41 |   };
42 | }
43 | 
44 | /**
45 |  * Scoring function: errorMatch
46 |  * Determines if an error occurred in the task.
47 |  * Scores 1 if an error is found, otherwise 0.
48 |  */
49 | export function errorMatch(
50 |   args: EvalArgs<
51 |     EvalInput,
52 |     boolean | { _success: boolean; error?: unknown },
53 |     unknown
54 |   >,
55 | ): EvalResult {
56 |   console.log(`Task "${args.input.name}" returned: ${args.output}`);
57 | 
58 |   return {
59 |     name: "Error rate",
60 |     score:
61 |       typeof args.output === "object" && args.output.error !== undefined
62 |         ? 1
63 |         : 0,
64 |   };
65 | }
66 | 


--------------------------------------------------------------------------------
/evals/tasks/agent/google_flights.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | import { Evaluator } from "../../evaluator";
 3 | 
 4 | export const google_flights: EvalFunction = async ({
 5 |   debugUrl,
 6 |   sessionUrl,
 7 |   stagehand,
 8 |   logger,
 9 |   modelName,
10 | }) => {
11 |   await stagehand.page.goto("https://google.com/travel/flights");
12 | 
13 |   const agent = stagehand.agent({
14 |     model: modelName,
15 |     provider: modelName.startsWith("claude") ? "anthropic" : "openai",
16 |     instructions: `You are a helpful assistant that can help me with my tasks. You are given a task and you need to complete it without asking follow up questions. Today is ${new Date().toISOString().slice(0, 10)}. The current page is ${await stagehand.page.title()}`,
17 |   });
18 | 
19 |   const agentResult = await agent.execute({
20 |     instruction:
21 |       "Search for flights from San Francisco to New York for next weekend",
22 |     maxSteps: 15,
23 |   });
24 |   logger.log(agentResult);
25 | 
26 |   const evaluator = new Evaluator(stagehand);
27 |   const result = await evaluator.evaluate({
28 |     question:
29 |       "Does the page show flights (options, available flights, not a search form) from San Francisco to New York?",
30 |     strictResponse: true,
31 |   });
32 | 
33 |   if (result.evaluation !== "YES" && result.evaluation !== "NO") {
34 |     await stagehand.close();
35 |     return {
36 |       _success: false,
37 |       observations: "Evaluator provided an invalid response",
38 |       debugUrl,
39 |       sessionUrl,
40 |       logs: logger.getLogs(),
41 |     };
42 |   }
43 | 
44 |   if (result.evaluation === "YES") {
45 |     await stagehand.close();
46 |     return {
47 |       _success: true,
48 |       observations: result.reasoning,
49 |       debugUrl,
50 |       sessionUrl,
51 |       logs: logger.getLogs(),
52 |     };
53 |   } else {
54 |     await stagehand.close();
55 |     return {
56 |       _success: false,
57 |       observations: result.reasoning,
58 |       debugUrl,
59 |       sessionUrl,
60 |       logs: logger.getLogs(),
61 |     };
62 |   }
63 | };
64 | 


--------------------------------------------------------------------------------
/evals/tasks/agent/iframe_form.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | import { Evaluator } from "../../evaluator";
 3 | 
 4 | export const iframe_form: EvalFunction = async ({
 5 |   debugUrl,
 6 |   sessionUrl,
 7 |   stagehand,
 8 |   logger,
 9 |   modelName,
10 | }) => {
11 |   await stagehand.page.goto("https://tucowsdomains.com/abuse-form/phishing/");
12 | 
13 |   const agent = stagehand.agent({
14 |     provider: "anthropic",
15 |     model: modelName,
16 |   });
17 | 
18 |   const agentResult = await agent.execute({
19 |     instruction: "Fill in the form name with 'John Smith'",
20 |     maxSteps: 3,
21 |   });
22 |   logger.log(agentResult);
23 | 
24 |   await stagehand.page.mouse.wheel(0, -1000);
25 |   const evaluator = new Evaluator(stagehand);
26 |   const result = await evaluator.evaluate({
27 |     question: "Is the form name input filled with 'John Smith'?",
28 |     strictResponse: true,
29 |   });
30 | 
31 |   if (result.evaluation !== "YES" && result.evaluation !== "NO") {
32 |     await stagehand.close();
33 |     return {
34 |       _success: false,
35 |       observations: "Evaluator provided an invalid response",
36 |       debugUrl,
37 |       sessionUrl,
38 |       logs: logger.getLogs(),
39 |     };
40 |   }
41 | 
42 |   const agentResult2 = await agent.execute({
43 |     instruction: "Fill in the form email with 'john.smith@example.com'",
44 |     maxSteps: 3,
45 |   });
46 |   logger.log(agentResult2);
47 | 
48 |   await stagehand.page.mouse.wheel(0, -1000);
49 |   const result2 = await evaluator.evaluate({
50 |     question: "Is the form email input filled with 'john.smith@example.com'?",
51 |     strictResponse: true,
52 |   });
53 | 
54 |   if (result2.evaluation !== "YES" && result2.evaluation !== "NO") {
55 |     await stagehand.close();
56 |     return {
57 |       _success: false,
58 |       observations: "Evaluator provided an invalid response",
59 |       debugUrl,
60 |       sessionUrl,
61 |       logs: logger.getLogs(),
62 |     };
63 |   }
64 | 
65 |   if (result.evaluation === "YES" && result2.evaluation === "YES") {
66 |     await stagehand.close();
67 |     return {
68 |       _success: true,
69 |       observations: "All fields were filled correctly",
70 |       debugUrl,
71 |       sessionUrl,
72 |       logs: logger.getLogs(),
73 |     };
74 |   } else {
75 |     await stagehand.close();
76 |     return {
77 |       _success: false,
78 |       observations: "One or more fields were not filled correctly",
79 |       debugUrl,
80 |       sessionUrl,
81 |       logs: logger.getLogs(),
82 |     };
83 |   }
84 | };
85 | 


--------------------------------------------------------------------------------
/evals/tasks/agent/iframe_form_multiple.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | import { Evaluator } from "../../evaluator";
 3 | 
 4 | export const iframe_form_multiple: EvalFunction = async ({
 5 |   debugUrl,
 6 |   sessionUrl,
 7 |   stagehand,
 8 |   logger,
 9 |   modelName,
10 | }) => {
11 |   await stagehand.page.goto("https://tucowsdomains.com/abuse-form/phishing/");
12 | 
13 |   const agent = stagehand.agent({
14 |     provider: modelName.startsWith("claude") ? "anthropic" : "openai",
15 |     model: modelName,
16 |   });
17 | 
18 |   const agentResult = await agent.execute({
19 |     instruction:
20 |       "Fill in the form name with 'John Smith', the email with 'john.smith@example.com', and select the 'Are you the domain owner?' option as 'No'",
21 |     maxSteps: 10,
22 |   });
23 |   logger.log(agentResult);
24 | 
25 |   await stagehand.page.mouse.wheel(0, -1000);
26 |   const evaluator = new Evaluator(stagehand);
27 |   const results = await evaluator.batchEvaluate({
28 |     questions: [
29 |       "Is the form name input filled with 'John Smith'?",
30 |       "Is the form email input filled with 'john.smith@example.com'?",
31 |       "Is the 'Are you the domain owner?' option selected as 'No'?",
32 |     ],
33 |     strictResponse: true,
34 |   });
35 | 
36 |   for (const r of results) {
37 |     if (r.evaluation !== "YES" && r.evaluation !== "NO") {
38 |       await stagehand.close();
39 |       return {
40 |         _success: false,
41 |         observations: "Evaluator provided an invalid response",
42 |         debugUrl,
43 |         sessionUrl,
44 |         logs: logger.getLogs(),
45 |       };
46 |     }
47 |     if (r.evaluation === "NO") {
48 |       await stagehand.close();
49 |       return {
50 |         _success: false,
51 |         observations: r.reasoning,
52 |         debugUrl,
53 |         sessionUrl,
54 |         logs: logger.getLogs(),
55 |       };
56 |     }
57 |   }
58 | 
59 |   await stagehand.close();
60 |   return {
61 |     _success: true,
62 |     observations: "All fields were filled correctly",
63 |     debugUrl,
64 |     sessionUrl,
65 |     logs: logger.getLogs(),
66 |   };
67 | };
68 | 


--------------------------------------------------------------------------------
/evals/tasks/agent/sf_library_card.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | import { Evaluator } from "../../evaluator";
 3 | 
 4 | export const sf_library_card: EvalFunction = async ({
 5 |   debugUrl,
 6 |   sessionUrl,
 7 |   stagehand,
 8 |   logger,
 9 |   modelName,
10 | }) => {
11 |   await stagehand.page.goto("https://sflib1.sfpl.org/selfreg");
12 | 
13 |   const agent = stagehand.agent({
14 |     model: modelName,
15 |     provider: modelName.startsWith("claude") ? "anthropic" : "openai",
16 |     instructions: `You are a helpful assistant that can help me with my tasks. You are given a task and you need to complete it without asking follow up questions. The current page is ${await stagehand.page.title()}`,
17 |   });
18 | 
19 |   const agentResult = await agent.execute({
20 |     instruction: "Fill in the 'Residential Address' field with '166 Geary St'",
21 |     maxSteps: 3,
22 |   });
23 |   logger.log(agentResult);
24 | 
25 |   await stagehand.page.mouse.wheel(0, -1000);
26 |   const evaluator = new Evaluator(stagehand);
27 |   const result = await evaluator.evaluate({
28 |     question:
29 |       "Does the page show the 'Residential Address' field filled with '166 Geary St'?",
30 |     strictResponse: true,
31 |   });
32 | 
33 |   if (result.evaluation !== "YES" && result.evaluation !== "NO") {
34 |     await stagehand.close();
35 |     return {
36 |       _success: false,
37 |       observations: "Evaluator provided an invalid response",
38 |       debugUrl,
39 |       sessionUrl,
40 |       logs: logger.getLogs(),
41 |     };
42 |   }
43 | 
44 |   if (result.evaluation === "YES") {
45 |     await stagehand.close();
46 |     return {
47 |       _success: true,
48 |       observations: result.reasoning,
49 |       debugUrl,
50 |       sessionUrl,
51 |       logs: logger.getLogs(),
52 |     };
53 |   } else {
54 |     await stagehand.close();
55 |     return {
56 |       _success: false,
57 |       observations: result.reasoning,
58 |       debugUrl,
59 |       sessionUrl,
60 |       logs: logger.getLogs(),
61 |     };
62 |   }
63 | };
64 | 


--------------------------------------------------------------------------------
/evals/tasks/agent/sf_library_card_multiple.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | import { Evaluator } from "../../evaluator";
 3 | 
 4 | export const sf_library_card_multiple: EvalFunction = async ({
 5 |   debugUrl,
 6 |   sessionUrl,
 7 |   stagehand,
 8 |   logger,
 9 |   modelName,
10 | }) => {
11 |   await stagehand.page.goto("https://sflib1.sfpl.org/selfreg");
12 | 
13 |   const agent = stagehand.agent({
14 |     model: modelName,
15 |     provider: modelName.startsWith("claude") ? "anthropic" : "openai",
16 |     instructions: `You are a helpful assistant that can help me with my tasks. You are given a task and you need to complete it without asking follow up questions. The current page is ${await stagehand.page.title()}`,
17 |   });
18 | 
19 |   const agentResult = await agent.execute({
20 |     instruction:
21 |       "Fill in ALL the required fields with mock data. DO NOT submit the form",
22 |     maxSteps: 20,
23 |   });
24 |   logger.log(agentResult);
25 | 
26 |   const evaluator = new Evaluator(stagehand);
27 |   const result = await evaluator.evaluate({
28 |     question: "Does the page show all the required fields filled?",
29 |     strictResponse: true,
30 |   });
31 | 
32 |   if (result.evaluation !== "YES" && result.evaluation !== "NO") {
33 |     await stagehand.close();
34 |     return {
35 |       _success: false,
36 |       observations: "Evaluator provided an invalid response",
37 |       debugUrl,
38 |       sessionUrl,
39 |       logs: logger.getLogs(),
40 |     };
41 |   }
42 | 
43 |   if (result.evaluation === "YES") {
44 |     await stagehand.close();
45 |     return {
46 |       _success: true,
47 |       observations: result.reasoning,
48 |       debugUrl,
49 |       sessionUrl,
50 |       logs: logger.getLogs(),
51 |     };
52 |   } else {
53 |     await stagehand.close();
54 |     return {
55 |       _success: false,
56 |       observations: result.reasoning,
57 |       debugUrl,
58 |       sessionUrl,
59 |       logs: logger.getLogs(),
60 |     };
61 |   }
62 | };
63 | 


--------------------------------------------------------------------------------
/evals/tasks/allrecipes.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | import { z } from "zod";
 3 | 
 4 | export const allrecipes: EvalFunction = async ({
 5 |   logger,
 6 |   useTextExtract,
 7 |   debugUrl,
 8 |   sessionUrl,
 9 |   stagehand,
10 | }) => {
11 |   await stagehand.page.goto("https://www.allrecipes.com/", {
12 |     waitUntil: "domcontentloaded",
13 |   });
14 | 
15 |   await stagehand.page.act({
16 |     action: 'Type "chocolate chip cookies" in the search bar',
17 |   });
18 |   await stagehand.page.act({
19 |     action: "press enter",
20 |   });
21 | 
22 |   const recipeDetails = await stagehand.page.extract({
23 |     instruction:
24 |       "Extract the title of the first recipe and the total number of ratings it has received.",
25 |     schema: z.object({
26 |       title: z.string().describe("Title of the recipe"),
27 |       total_ratings: z
28 |         .string()
29 |         .describe("Total number of ratings for the recipe"),
30 |     }),
31 |     useTextExtract,
32 |   });
33 | 
34 |   await stagehand.close();
35 | 
36 |   const { title, total_ratings } = recipeDetails;
37 |   const expectedTitle = "Best Chocolate Chip Cookies";
38 |   const expectedRatings = 19164;
39 | 
40 |   const extractedRatings = parseInt(total_ratings.replace(/[^\d]/g, ""), 10);
41 |   const isRatingsWithinRange =
42 |     extractedRatings >= expectedRatings - 1000 &&
43 |     extractedRatings <= expectedRatings + 1000;
44 | 
45 |   if (title !== expectedTitle || !isRatingsWithinRange) {
46 |     const errors = [];
47 |     if (title !== expectedTitle) {
48 |       errors.push({
49 |         message: "Extracted title does not match the expected title",
50 |         expected: expectedTitle,
51 |         actual: title,
52 |       });
53 |     }
54 |     if (!isRatingsWithinRange) {
55 |       errors.push({
56 |         message: "Extracted ratings are not within the expected range",
57 |         expected: `${expectedRatings} ± 1000`,
58 |         actual: extractedRatings.toString(),
59 |       });
60 |     }
61 | 
62 |     logger.error({
63 |       message: "Failed to extract correct recipe details",
64 |       level: 0,
65 |       auxiliary: {
66 |         errors: {
67 |           value: JSON.stringify(errors),
68 |           type: "object",
69 |         },
70 |       },
71 |     });
72 | 
73 |     return {
74 |       _success: false,
75 |       error: "Recipe details extraction validation failed",
76 |       logs: logger.getLogs(),
77 |       debugUrl,
78 |       sessionUrl,
79 |     };
80 |   }
81 | 
82 |   return {
83 |     _success: true,
84 |     recipeDetails: {
85 |       title,
86 |       total_ratings: extractedRatings,
87 |     },
88 |     logs: logger.getLogs(),
89 |     debugUrl,
90 |     sessionUrl,
91 |   };
92 | };
93 | 


--------------------------------------------------------------------------------
/evals/tasks/amazon_add_to_cart.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | 
 3 | export const amazon_add_to_cart: EvalFunction = async ({
 4 |   logger,
 5 |   debugUrl,
 6 |   sessionUrl,
 7 |   stagehand,
 8 | }) => {
 9 |   await stagehand.page.goto(
10 |     "https://browserbase.github.io/stagehand-eval-sites/sites/amazon/",
11 |   );
12 | 
13 |   await stagehand.page.waitForTimeout(5000);
14 | 
15 |   await stagehand.page.act({
16 |     action: "click the 'Add to Cart' button",
17 |   });
18 | 
19 |   await stagehand.page.waitForTimeout(2000);
20 | 
21 |   await stagehand.page.act({
22 |     action: "click the 'Proceed to checkout' button",
23 |   });
24 | 
25 |   await stagehand.page.waitForTimeout(2000);
26 |   const currentUrl = stagehand.page.url();
27 |   const expectedUrl =
28 |     "https://browserbase.github.io/stagehand-eval-sites/sites/amazon/sign-in.html";
29 | 
30 |   await stagehand.close();
31 | 
32 |   return {
33 |     _success: currentUrl === expectedUrl,
34 |     currentUrl,
35 |     debugUrl,
36 |     sessionUrl,
37 |     logs: logger.getLogs(),
38 |   };
39 | };
40 | 


--------------------------------------------------------------------------------
/evals/tasks/apple.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | 
 3 | export const apple: EvalFunction = async ({
 4 |   logger,
 5 |   debugUrl,
 6 |   sessionUrl,
 7 |   stagehand,
 8 | }) => {
 9 |   await stagehand.page.goto("https://www.apple.com/iphone-16-pro/");
10 | 
11 |   await stagehand.page.act({ action: "click on the buy button" });
12 |   await stagehand.page.act({ action: "select the Pro Max model" });
13 |   await stagehand.page.act({ action: "select the natural titanium color" });
14 |   await stagehand.page.act({ action: "select the 256GB storage option" });
15 |   await stagehand.page.act({
16 |     action: "click on the 'select a smartphone' trade-in option",
17 |   });
18 | 
19 |   await stagehand.page.act({
20 |     action: "select the iPhone 13 mini model from the dropdown",
21 |   });
22 |   await stagehand.page.act({
23 |     action: "select the iPhone 13 mini is in good condition",
24 |   });
25 | 
26 |   const successMessageLocator = stagehand.page.locator(
27 |     'text="Good News. Your iPhone 13 mini qualifies for credit."',
28 |   );
29 |   const isVisible = await successMessageLocator.isVisible();
30 | 
31 |   await stagehand.close();
32 | 
33 |   return {
34 |     _success: isVisible,
35 |     debugUrl,
36 |     sessionUrl,
37 |     logs: logger.getLogs(),
38 |   };
39 | };
40 | 


--------------------------------------------------------------------------------
/evals/tasks/bidnet.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | 
 3 | export const bidnet: EvalFunction = async ({
 4 |   logger,
 5 |   debugUrl,
 6 |   sessionUrl,
 7 |   stagehand,
 8 | }) => {
 9 |   await stagehand.page.goto("https://www.bidnetdirect.com/");
10 | 
11 |   await stagehand.page.act({
12 |     action: 'Click on the "Construction" keyword',
13 |   });
14 | 
15 |   const expectedUrl =
16 |     "https://www.bidnetdirect.com/public/solicitations/open?keywords=Construction";
17 |   const currentUrl = stagehand.page.url();
18 | 
19 |   await stagehand.close();
20 | 
21 |   return {
22 |     _success: currentUrl.startsWith(expectedUrl),
23 |     currentUrl,
24 |     debugUrl,
25 |     sessionUrl,
26 |     logs: logger.getLogs(),
27 |   };
28 | };
29 | 


--------------------------------------------------------------------------------
/evals/tasks/checkboxes.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | 
 3 | export const checkboxes: EvalFunction = async ({
 4 |   debugUrl,
 5 |   sessionUrl,
 6 |   stagehand,
 7 |   logger,
 8 | }) => {
 9 |   await stagehand.page.goto(
10 |     "https://browserbase.github.io/stagehand-eval-sites/sites/checkboxes/",
11 |   );
12 | 
13 |   await stagehand.page.act({
14 |     action: "click the 'baseball' option",
15 |   });
16 | 
17 |   await stagehand.page.act({
18 |     action: "click the 'netball' option",
19 |   });
20 | 
21 |   const baseballChecked = await stagehand.page
22 |     .locator('input[type="checkbox"][name="sports"][value="baseball"]')
23 |     .isChecked();
24 | 
25 |   const netballChecked = await stagehand.page
26 |     .locator('input[type="checkbox"][name="sports"][value="netball"]')
27 |     .isChecked();
28 | 
29 |   await stagehand.close();
30 | 
31 |   return {
32 |     _success: baseballChecked && netballChecked,
33 |     debugUrl,
34 |     sessionUrl,
35 |     logs: logger.getLogs(),
36 |   };
37 | };
38 | 


--------------------------------------------------------------------------------
/evals/tasks/combination_sauce.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | import { z } from "zod";
 3 | 
 4 | export const combination_sauce: EvalFunction = async ({
 5 |   logger,
 6 |   useTextExtract,
 7 |   debugUrl,
 8 |   sessionUrl,
 9 |   stagehand,
10 | }) => {
11 |   try {
12 |     await stagehand.page.goto("https://www.saucedemo.com/");
13 | 
14 |     const { usernames, password } = await stagehand.page.extract({
15 |       instruction: "extract the accepted usernames and the password for login",
16 |       schema: z.object({
17 |         usernames: z.array(z.string()).describe("the accepted usernames"),
18 |         password: z.string().describe("the password for login"),
19 |       }),
20 |       useTextExtract,
21 |     });
22 | 
23 |     await stagehand.page.act({
24 |       action: `enter username 'standard_user'`,
25 |     });
26 | 
27 |     await stagehand.page.act({
28 |       action: `enter password '${password}'`,
29 |     });
30 | 
31 |     await stagehand.page.act({
32 |       action: "click on 'login'",
33 |     });
34 | 
35 |     const observations = await stagehand.page.observe({
36 |       instruction: "find all the 'add to cart' buttons",
37 |     });
38 | 
39 |     console.log("observations", observations);
40 |     console.log("observations length", observations.length);
41 | 
42 |     const url = await stagehand.page.url();
43 | 
44 |     await stagehand.close();
45 | 
46 |     const usernamesCheck = usernames.length === 6;
47 |     const urlCheck = url === "https://www.saucedemo.com/inventory.html";
48 |     const observationsCheck = observations.length === 6;
49 | 
50 |     return {
51 |       _success: usernamesCheck && urlCheck && observationsCheck,
52 |       debugUrl,
53 |       sessionUrl,
54 |       logs: logger.getLogs(),
55 |     };
56 |   } catch (error) {
57 |     console.error("Error or timeout occurred:", error);
58 | 
59 |     await stagehand.close();
60 | 
61 |     return {
62 |       _success: false,
63 |       error: JSON.parse(JSON.stringify(error, null, 2)),
64 |       debugUrl,
65 |       sessionUrl,
66 |       logs: logger.getLogs(),
67 |     };
68 |   }
69 | };
70 | 


--------------------------------------------------------------------------------
/evals/tasks/costar.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | import { z } from "zod";
 3 | 
 4 | export const costar: EvalFunction = async ({
 5 |   logger,
 6 |   debugUrl,
 7 |   sessionUrl,
 8 |   stagehand,
 9 |   useTextExtract,
10 | }) => {
11 |   // TODO: fix this eval - does not work in headless mode
12 |   try {
13 |     await Promise.race([
14 |       stagehand.page.goto("https://www.costar.com/"),
15 |       new Promise((_, reject) =>
16 |         setTimeout(() => reject(new Error("Navigation timeout")), 30000),
17 |       ),
18 |     ]);
19 | 
20 |     await stagehand.page.act({ action: "click on the first article" });
21 | 
22 |     await stagehand.page.act({
23 |       action: "click on the learn more button for the first job",
24 |     });
25 | 
26 |     const articleTitle = await stagehand.page.extract({
27 |       instruction: "extract the title of the article",
28 |       schema: z.object({
29 |         title: z.string().describe("the title of the article").nullable(),
30 |       }),
31 |       useTextExtract,
32 |     });
33 | 
34 |     logger.log({
35 |       message: "got article title",
36 |       level: 1,
37 |       auxiliary: {
38 |         articleTitle: {
39 |           value: JSON.stringify(articleTitle),
40 |           type: "object",
41 |         },
42 |       },
43 |     });
44 | 
45 |     // Check if the title is more than 5 characters
46 |     const isTitleValid =
47 |       articleTitle.title !== null && articleTitle.title.length > 5;
48 | 
49 |     await stagehand.close();
50 | 
51 |     return {
52 |       title: articleTitle.title,
53 |       _success: isTitleValid,
54 |       debugUrl,
55 |       sessionUrl,
56 |       logs: logger.getLogs(),
57 |     };
58 |   } catch (error) {
59 |     logger.error({
60 |       message: "error in costar function",
61 |       level: 0,
62 |       auxiliary: {
63 |         error: {
64 |           value: error.message,
65 |           type: "string",
66 |         },
67 |         trace: {
68 |           value: error.stack,
69 |           type: "string",
70 |         },
71 |       },
72 |     });
73 | 
74 |     await stagehand.close();
75 | 
76 |     return {
77 |       title: null,
78 |       _success: false,
79 |       debugUrl,
80 |       sessionUrl,
81 |       logs: logger.getLogs(),
82 |     };
83 |   }
84 | };
85 | 


--------------------------------------------------------------------------------
/evals/tasks/dropdown.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | 
 3 | export const dropdown: EvalFunction = async ({
 4 |   debugUrl,
 5 |   sessionUrl,
 6 |   stagehand,
 7 |   logger,
 8 | }) => {
 9 |   await stagehand.page.goto(
10 |     "https://browserbase.github.io/stagehand-eval-sites/sites/dropdown/",
11 |   );
12 | 
13 |   // click the dropdown element to expand it
14 |   const xpath = "xpath=/html/body/div/div/button";
15 |   await stagehand.page.locator(xpath).click();
16 | 
17 |   // type into the input box (which should be hidden behind the
18 |   // expanded dropdown)
19 |   await stagehand.page.act("type 'test fill' into the input field");
20 | 
21 |   const input = stagehand.page.locator(`xpath=/html/body/div/input`);
22 |   const expectedValue = "test fill";
23 | 
24 |   // get the value of the input box
25 |   const actualValue = await input.inputValue();
26 |   await stagehand.close();
27 | 
28 |   // pass if the value matches expected
29 |   return {
30 |     _success: actualValue === expectedValue,
31 |     expectedValue,
32 |     actualValue,
33 |     debugUrl,
34 |     sessionUrl,
35 |     logs: logger.getLogs(),
36 |   };
37 | };
38 | 


--------------------------------------------------------------------------------
/evals/tasks/expect_act_timeout.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | 
 3 | export const expect_act_timeout: EvalFunction = async ({
 4 |   logger,
 5 |   debugUrl,
 6 |   sessionUrl,
 7 |   stagehand,
 8 | }) => {
 9 |   await stagehand.page.goto("https://docs.stagehand.dev");
10 |   const result = await stagehand.page.act({
11 |     action: "search for 'Stagehand'",
12 |     timeoutMs: 1_000,
13 |   });
14 | 
15 |   await stagehand.close();
16 | 
17 |   return {
18 |     _success: !result.success,
19 |     debugUrl,
20 |     sessionUrl,
21 |     logs: logger.getLogs(),
22 |   };
23 | };
24 | 


--------------------------------------------------------------------------------
/evals/tasks/expedia.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | 
 3 | export const expedia: EvalFunction = async ({
 4 |   logger,
 5 |   debugUrl,
 6 |   sessionUrl,
 7 |   stagehand,
 8 | }) => {
 9 |   try {
10 |     await stagehand.page.goto("https://www.expedia.com/flights");
11 |     await stagehand.page.act(
12 |       "find round-trip flights from San Francisco (SFO) to Toronto (YYZ) for Jan 1, 2025 (up to one to two weeks)",
13 |     );
14 |     await stagehand.page.act("Go to the first non-stop flight");
15 |     await stagehand.page.act("select the cheapest flight");
16 |     await stagehand.page.act("click on the first non-stop flight");
17 |     await stagehand.page.act("Take me to the checkout page");
18 | 
19 |     const url = stagehand.page.url();
20 |     return {
21 |       _success: url.startsWith("https://www.expedia.com/Checkout/"),
22 |       logs: logger.getLogs(),
23 |       debugUrl,
24 |       sessionUrl,
25 |     };
26 |   } catch (error) {
27 |     logger.error({
28 |       message: "Error in expedia eval",
29 |       level: 0,
30 |       auxiliary: {
31 |         error: { value: error.message, type: "string" },
32 |         trace: { value: error.stack, type: "string" },
33 |       },
34 |     });
35 | 
36 |     return {
37 |       _success: false,
38 |       logs: logger.getLogs(),
39 |       debugUrl,
40 |       sessionUrl,
41 |     };
42 |   } finally {
43 |     await stagehand.close();
44 |   }
45 | };
46 | 


--------------------------------------------------------------------------------
/evals/tasks/expedia_search.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | 
 3 | export const expedia_search: EvalFunction = async ({
 4 |   logger,
 5 |   debugUrl,
 6 |   sessionUrl,
 7 |   stagehand,
 8 | }) => {
 9 |   try {
10 |     await stagehand.page.goto("https://www.expedia.com/flights");
11 |     await stagehand.page.act({
12 |       action:
13 |         "find round-trip flights from San Francisco (SFO) to Toronto (YYZ) for Jan 1, 2025 (up to one to two weeks)",
14 |     });
15 | 
16 |     await stagehand.page.act({ action: "Go to the first non-stop flight" });
17 | 
18 |     await stagehand.page.act({ action: "select the cheapest flight" });
19 | 
20 |     await stagehand.page.act({ action: "click on the first non-stop flight" });
21 | 
22 |     await stagehand.page.act({
23 |       action: "Take me to the checkout page",
24 |     });
25 | 
26 |     const url = stagehand.page.url();
27 |     return {
28 |       _success: url.startsWith("https://www.expedia.com/Checkout/"),
29 |       logs: logger.getLogs(),
30 |       debugUrl,
31 |       sessionUrl,
32 |     };
33 |   } catch (error) {
34 |     logger.error({
35 |       message: `error in expedia function`,
36 |       level: 0,
37 |       auxiliary: {
38 |         error: {
39 |           value: JSON.stringify(error, null, 2),
40 |           type: "object",
41 |         },
42 |         trace: {
43 |           value: error.stack,
44 |           type: "string",
45 |         },
46 |       },
47 |     });
48 |     return {
49 |       _success: false,
50 |       error: JSON.parse(JSON.stringify(error, null, 2)),
51 |       debugUrl,
52 |       sessionUrl,
53 |       logs: logger.getLogs(),
54 |     };
55 |   } finally {
56 |     await stagehand.close();
57 |   }
58 | };
59 | 


--------------------------------------------------------------------------------
/evals/tasks/extract_aigrant_targeted.ts:
--------------------------------------------------------------------------------
 1 | import { z } from "zod";
 2 | import { EvalFunction } from "@/types/evals";
 3 | 
 4 | export const extract_aigrant_targeted: EvalFunction = async ({
 5 |   logger,
 6 |   useTextExtract,
 7 |   debugUrl,
 8 |   sessionUrl,
 9 |   stagehand,
10 | }) => {
11 |   await stagehand.page.goto(
12 |     "https://browserbase.github.io/stagehand-eval-sites/sites/aigrant/",
13 |   );
14 |   const selector = "/html/body/div/ul[5]/li[28]";
15 |   const company = await stagehand.page.extract({
16 |     instruction: "Extract the company name.",
17 |     schema: z.object({
18 |       company_name: z.string(),
19 |     }),
20 |     useTextExtract,
21 |     selector: selector,
22 |   });
23 | 
24 |   await stagehand.close();
25 |   const companyName = company.company_name;
26 | 
27 |   const expectedName = {
28 |     company_name: "Coframe",
29 |   };
30 | 
31 |   const nameMatches = companyName == expectedName.company_name;
32 | 
33 |   if (!nameMatches) {
34 |     logger.error({
35 |       message: "extracted company name does not match expected",
36 |       level: 0,
37 |       auxiliary: {
38 |         expected: {
39 |           value: expectedName.company_name,
40 |           type: "string",
41 |         },
42 |         actual: {
43 |           value: companyName,
44 |           type: "string",
45 |         },
46 |       },
47 |     });
48 |     return {
49 |       _success: false,
50 |       error: "Company name does not match expected",
51 |       logs: logger.getLogs(),
52 |       debugUrl,
53 |       sessionUrl,
54 |     };
55 |   }
56 | 
57 |   return {
58 |     _success: true,
59 |     logs: logger.getLogs(),
60 |     debugUrl,
61 |     sessionUrl,
62 |   };
63 | };
64 | 


--------------------------------------------------------------------------------
/evals/tasks/extract_aigrant_targeted_2.ts:
--------------------------------------------------------------------------------
 1 | import { z } from "zod";
 2 | import { EvalFunction } from "@/types/evals";
 3 | 
 4 | export const extract_aigrant_targeted_2: EvalFunction = async ({
 5 |   logger,
 6 |   useTextExtract,
 7 |   debugUrl,
 8 |   sessionUrl,
 9 |   stagehand,
10 | }) => {
11 |   await stagehand.page.goto(
12 |     "https://browserbase.github.io/stagehand-eval-sites/sites/aigrant/",
13 |   );
14 |   const selector = "/html/body/div/ul[5]/li[28]";
15 |   const company = await stagehand.page.extract({
16 |     instruction: "Extract the name of the company that comes after 'Coframe'.",
17 |     schema: z.object({
18 |       company_name: z.string(),
19 |     }),
20 |     useTextExtract,
21 |     selector: selector,
22 |   });
23 | 
24 |   await stagehand.close();
25 |   const companyName = company.company_name;
26 | 
27 |   // nameWeShouldNotGet matches the name of the company that comes after
28 |   // CoFrame on the website. Since we are using targeted_extract here,
29 |   // and passing in a selector that does NOT contain the nameWeShouldNotGet,
30 |   // the LLM should have no visibility into what comes after 'CoFrame' if
31 |   // targeted_extract is performing correctly
32 |   const nameWeShouldNotGet = {
33 |     company_name: "OpusClip",
34 |   };
35 | 
36 |   const nameMatches = companyName == nameWeShouldNotGet.company_name;
37 | 
38 |   if (nameMatches) {
39 |     logger.error({
40 |       message:
41 |         "extracted company name matches the company name that we SHOULD NOT get",
42 |       level: 0,
43 |       auxiliary: {
44 |         expected: {
45 |           value: nameWeShouldNotGet.company_name,
46 |           type: "string",
47 |         },
48 |         actual: {
49 |           value: companyName,
50 |           type: "string",
51 |         },
52 |       },
53 |     });
54 |     return {
55 |       _success: false,
56 |       error:
57 |         "extracted company name matches the company name that we SHOULD NOT get",
58 |       logs: logger.getLogs(),
59 |       debugUrl,
60 |       sessionUrl,
61 |     };
62 |   }
63 | 
64 |   return {
65 |     _success: true,
66 |     logs: logger.getLogs(),
67 |     debugUrl,
68 |     sessionUrl,
69 |   };
70 | };
71 | 


--------------------------------------------------------------------------------
/evals/tasks/extract_apartments.ts:
--------------------------------------------------------------------------------
 1 | import { z } from "zod";
 2 | import { EvalFunction } from "../../types/evals";
 3 | 
 4 | export const extract_apartments: EvalFunction = async ({
 5 |   logger,
 6 |   useTextExtract,
 7 |   debugUrl,
 8 |   sessionUrl,
 9 |   stagehand,
10 | }) => {
11 |   await stagehand.page.goto(
12 |     "https://www.apartments.com/san-francisco-ca/2-bedrooms/",
13 |   );
14 |   const apartment_listings = await stagehand.page.extract({
15 |     instruction:
16 |       "Extract all the apartment listings with their prices and their addresses.",
17 |     schema: z.object({
18 |       listings: z.array(
19 |         z.object({
20 |           price: z.string().describe("The price of the listing"),
21 |           trails: z.string().describe("The address of the listing"),
22 |         }),
23 |       ),
24 |     }),
25 |     useTextExtract,
26 |   });
27 | 
28 |   await stagehand.close();
29 |   const listings = apartment_listings.listings;
30 |   const expectedLength = 40;
31 | 
32 |   if (listings.length < expectedLength) {
33 |     logger.error({
34 |       message: "Incorrect number of listings extracted",
35 |       level: 0,
36 |       auxiliary: {
37 |         expected: {
38 |           value: expectedLength.toString(),
39 |           type: "integer",
40 |         },
41 |         actual: {
42 |           value: listings.length.toString(),
43 |           type: "integer",
44 |         },
45 |       },
46 |     });
47 |     return {
48 |       _success: false,
49 |       error: "Incorrect number of listings extracted",
50 |       logs: logger.getLogs(),
51 |       debugUrl,
52 |       sessionUrl,
53 |     };
54 |   }
55 | 
56 |   return {
57 |     _success: true,
58 |     logs: logger.getLogs(),
59 |     debugUrl,
60 |     sessionUrl,
61 |   };
62 | };
63 | 


--------------------------------------------------------------------------------
/evals/tasks/extract_baptist_health.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | import { compareStrings } from "@/evals/utils";
 3 | import { z } from "zod";
 4 | 
 5 | export const extract_baptist_health: EvalFunction = async ({
 6 |   logger,
 7 |   useTextExtract,
 8 |   debugUrl,
 9 |   sessionUrl,
10 |   stagehand,
11 | }) => {
12 |   await stagehand.page.goto(
13 |     "https://browserbase.github.io/stagehand-eval-sites/sites/baptist-health/",
14 |   );
15 | 
16 |   const result = await stagehand.page.extract({
17 |     instruction:
18 |       "Extract the address, phone number, and fax number of the healthcare location.",
19 |     schema: z.object({
20 |       address: z.string(),
21 |       phone: z.string(),
22 |       fax: z.string(),
23 |     }),
24 |     useTextExtract,
25 |   });
26 | 
27 |   await stagehand.close();
28 | 
29 |   const { address, phone, fax } = result;
30 |   const expected = {
31 |     address: "2055 East South Blvd; Suite 908 Montgomery, AL 36116",
32 |     phone: "334-747-2273",
33 |     fax: "334-747-7501",
34 |   };
35 | 
36 |   const similarityThreshold = 0.85;
37 |   const failedFields: Array<{
38 |     field: string;
39 |     similarity: number;
40 |     expected: string;
41 |     actual: string;
42 |   }> = [];
43 | 
44 |   const compareField = (
45 |     actualVal: string,
46 |     expectedVal: string,
47 |     fieldName: string,
48 |   ) => {
49 |     const { similarity, meetsThreshold } = compareStrings(
50 |       actualVal,
51 |       expectedVal,
52 |       similarityThreshold,
53 |     );
54 | 
55 |     if (!meetsThreshold) {
56 |       failedFields.push({
57 |         field: fieldName,
58 |         similarity,
59 |         expected: expectedVal,
60 |         actual: actualVal,
61 |       });
62 |       logger.error({
63 |         message: `${fieldName} extracted does not meet similarity threshold`,
64 |         level: 0,
65 |         auxiliary: {
66 |           field: { value: fieldName, type: "string" },
67 |           similarity: { value: similarity.toFixed(2), type: "string" },
68 |           expected: { value: expectedVal, type: "string" },
69 |           actual: { value: actualVal, type: "string" },
70 |         },
71 |       });
72 |     }
73 | 
74 |     return meetsThreshold;
75 |   };
76 | 
77 |   const addressOk = compareField(address, expected.address, "Address");
78 |   const phoneOk = compareField(phone, expected.phone, "Phone number");
79 |   const faxOk = compareField(fax, expected.fax, "Fax number");
80 | 
81 |   if (!addressOk || !phoneOk || !faxOk) {
82 |     return {
83 |       _success: false,
84 |       error: "Some fields did not meet similarity threshold",
85 |       logs: logger.getLogs(),
86 |       debugUrl,
87 |       sessionUrl,
88 |       failedFields,
89 |     };
90 |   }
91 | 
92 |   return {
93 |     _success: true,
94 |     logs: logger.getLogs(),
95 |     debugUrl,
96 |     sessionUrl,
97 |   };
98 | };
99 | 


--------------------------------------------------------------------------------
/evals/tasks/extract_collaborators.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | import { z } from "zod";
 3 | 
 4 | export const extract_collaborators: EvalFunction = async ({
 5 |   logger,
 6 |   useTextExtract,
 7 |   debugUrl,
 8 |   sessionUrl,
 9 |   stagehand,
10 | }) => {
11 |   try {
12 |     await stagehand.page.goto("https://github.com/facebook/react");
13 |     await stagehand.page.act({
14 |       action: "find and click the contributors section",
15 |     });
16 | 
17 |     await stagehand.page.waitForLoadState("domcontentloaded");
18 |     await stagehand.page.waitForLoadState("networkidle");
19 |     await stagehand.page.waitForTimeout(5000);
20 | 
21 |     const { contributors } = await stagehand.page.extract({
22 |       instruction: "Extract top 5 contributors of this repository",
23 |       schema: z.object({
24 |         contributors: z.array(
25 |           z.object({
26 |             github_username: z
27 |               .string()
28 |               .describe("the github username of the contributor"),
29 |             commits: z.number().describe("number of commits contributed"),
30 |           }),
31 |         ),
32 |       }),
33 |       useTextExtract,
34 |     });
35 | 
36 |     await stagehand.close();
37 | 
38 |     const EXPECTED_CONTRIBUTORS = [
39 |       "zpao",
40 |       "gaearon",
41 |       "sebmarkbage",
42 |       "acdlite",
43 |       "sophiebits",
44 |     ];
45 |     return {
46 |       _success:
47 |         contributors.length === EXPECTED_CONTRIBUTORS.length &&
48 |         contributors.every(
49 |           (c, i) =>
50 |             EXPECTED_CONTRIBUTORS[i] === c.github_username && c.commits >= 1000,
51 |         ),
52 |       contributors,
53 |       debugUrl,
54 |       sessionUrl,
55 |       logs: logger.getLogs(),
56 |     };
57 |   } catch (error) {
58 |     console.error("Error or timeout occurred:", error);
59 | 
60 |     await stagehand.close();
61 | 
62 |     return {
63 |       _success: false,
64 |       error: JSON.parse(JSON.stringify(error, null, 2)),
65 |       debugUrl,
66 |       sessionUrl,
67 |       logs: logger.getLogs(),
68 |     };
69 |   }
70 | };
71 | 


--------------------------------------------------------------------------------
/evals/tasks/extract_geniusee.ts:
--------------------------------------------------------------------------------
 1 | import { z } from "zod";
 2 | import { EvalFunction } from "@/types/evals";
 3 | 
 4 | export const extract_geniusee: EvalFunction = async ({
 5 |   logger,
 6 |   useTextExtract,
 7 |   debugUrl,
 8 |   sessionUrl,
 9 |   stagehand,
10 | }) => {
11 |   await stagehand.page.goto(
12 |     "https://browserbase.github.io/stagehand-eval-sites/sites/geniusee/",
13 |   );
14 |   const selector = "/html/body/main/div[2]/div[2]/div[2]/table";
15 |   const scalability = await stagehand.page.extract({
16 |     instruction:
17 |       "Extract the scalability comment in the table for Gemini (Google)",
18 |     schema: z.object({
19 |       scalability: z.string(),
20 |     }),
21 |     useTextExtract,
22 |     selector: selector,
23 |   });
24 | 
25 |   await stagehand.close();
26 |   const scalabilityComment = scalability.scalability;
27 | 
28 |   const expectedScalabilityComment = {
29 |     scalability: "Scalable architecture with API access",
30 |   };
31 | 
32 |   const commentMatches =
33 |     scalabilityComment == expectedScalabilityComment.scalability;
34 | 
35 |   if (!commentMatches) {
36 |     logger.error({
37 |       message: "extracted scalability comment does not match expected",
38 |       level: 0,
39 |       auxiliary: {
40 |         expected: {
41 |           value: expectedScalabilityComment.scalability,
42 |           type: "string",
43 |         },
44 |         actual: {
45 |           value: scalabilityComment,
46 |           type: "string",
47 |         },
48 |       },
49 |     });
50 |     return {
51 |       _success: false,
52 |       error: "extracted scalability comment does not match expected",
53 |       logs: logger.getLogs(),
54 |       debugUrl,
55 |       sessionUrl,
56 |     };
57 |   }
58 | 
59 |   return {
60 |     _success: true,
61 |     logs: logger.getLogs(),
62 |     debugUrl,
63 |     sessionUrl,
64 |   };
65 | };
66 | 


--------------------------------------------------------------------------------
/evals/tasks/extract_geniusee_2.ts:
--------------------------------------------------------------------------------
 1 | import { z } from "zod";
 2 | import { EvalFunction } from "@/types/evals";
 3 | 
 4 | export const extract_geniusee_2: EvalFunction = async ({
 5 |   logger,
 6 |   useTextExtract,
 7 |   debugUrl,
 8 |   sessionUrl,
 9 |   stagehand,
10 | }) => {
11 |   await stagehand.page.goto(
12 |     "https://browserbase.github.io/stagehand-eval-sites/sites/geniusee/",
13 |   );
14 |   const selector = "/html/body/main/div[2]/div[2]/div[2]/table/tbody/tr[9]";
15 |   const scalability = await stagehand.page.extract({
16 |     instruction:
17 |       "Extract the scalability comment in the table for Gemini (Google)",
18 |     schema: z.object({
19 |       scalability: z.string(),
20 |     }),
21 |     useTextExtract,
22 |     selector: selector,
23 |   });
24 | 
25 |   await stagehand.close();
26 |   const scalabilityComment = scalability.scalability;
27 | 
28 |   // scalabilityCommentWeShouldNotGet matches a scalability comment in the table,
29 |   // but since we are using targeted_extract here,
30 |   // and passing in a selector that does NOT contain the scalabilityCommentWeShouldNotGet,
31 |   // the LLM should have no visibility into scalabilityCommentWeShouldNotGet if
32 |   // targeted_extract is performing correctly
33 |   const scalabilityCommentWeShouldNotGet = {
34 |     scalability: "Scalable architecture with API access",
35 |   };
36 | 
37 |   const commentMatches =
38 |     scalabilityComment == scalabilityCommentWeShouldNotGet.scalability;
39 | 
40 |   if (commentMatches) {
41 |     logger.error({
42 |       message:
43 |         "extracted scalability comment matches the scalability comment that we SHOULD NOT get",
44 |       level: 0,
45 |       auxiliary: {
46 |         expected: {
47 |           value: scalabilityCommentWeShouldNotGet.scalability,
48 |           type: "string",
49 |         },
50 |         actual: {
51 |           value: scalabilityComment,
52 |           type: "string",
53 |         },
54 |       },
55 |     });
56 |     return {
57 |       _success: false,
58 |       error:
59 |         "scalability comment matches the scalability comment that we SHOULD NOT get",
60 |       logs: logger.getLogs(),
61 |       debugUrl,
62 |       sessionUrl,
63 |     };
64 |   }
65 | 
66 |   return {
67 |     _success: true,
68 |     logs: logger.getLogs(),
69 |     debugUrl,
70 |     sessionUrl,
71 |   };
72 | };
73 | 


--------------------------------------------------------------------------------
/evals/tasks/extract_github_commits.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | import { z } from "zod";
 3 | 
 4 | export const extract_github_commits: EvalFunction = async ({
 5 |   logger,
 6 |   useTextExtract,
 7 |   debugUrl,
 8 |   sessionUrl,
 9 |   stagehand,
10 | }) => {
11 |   try {
12 |     await stagehand.page.goto("https://github.com/facebook/react");
13 | 
14 |     await stagehand.page.act({
15 |       action:
16 |         "find commit history, generally described by the number of commits",
17 |     });
18 |     const { commits } = await stagehand.page.extract({
19 |       instruction: "Extract last 20 commits",
20 |       schema: z.object({
21 |         commits: z.array(
22 |           z.object({
23 |             commit_message: z.string(),
24 |             commit_url: z.string(),
25 |             commit_hash: z.string(),
26 |           }),
27 |         ),
28 |       }),
29 |       useTextExtract,
30 |     });
31 | 
32 |     logger.log({
33 |       message: "Extracted commits",
34 |       level: 1,
35 |       auxiliary: {
36 |         commits: {
37 |           value: JSON.stringify(commits),
38 |           type: "object",
39 |         },
40 |       },
41 |     });
42 | 
43 |     await stagehand.close();
44 | 
45 |     return {
46 |       _success: commits.length === 20,
47 |       commits,
48 |       debugUrl,
49 |       sessionUrl,
50 |       logs: logger.getLogs(),
51 |     };
52 |   } catch (error) {
53 |     console.error("Error or timeout occurred:", error);
54 | 
55 |     await stagehand.close();
56 | 
57 |     return {
58 |       _success: false,
59 |       error: JSON.parse(JSON.stringify(error, null, 2)),
60 |       debugUrl,
61 |       sessionUrl,
62 |       logs: logger.getLogs(),
63 |     };
64 |   }
65 | };
66 | 


--------------------------------------------------------------------------------
/evals/tasks/extract_github_stars.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | import { z } from "zod";
 3 | 
 4 | export const extract_github_stars: EvalFunction = async ({
 5 |   logger,
 6 |   useTextExtract,
 7 |   debugUrl,
 8 |   sessionUrl,
 9 |   stagehand,
10 | }) => {
11 |   try {
12 |     await stagehand.page.goto("https://github.com/facebook/react");
13 | 
14 |     const { stars } = await stagehand.page.extract({
15 |       instruction: "Extract the number of stars for the project",
16 |       schema: z.object({
17 |         stars: z.number().describe("the number of stars for the project"),
18 |       }),
19 |       useTextExtract,
20 |     });
21 | 
22 |     const expectedStarsString = await stagehand.page
23 |       .locator("#repo-stars-counter-star")
24 |       .first()
25 |       .innerHTML();
26 | 
27 |     const expectedStars = expectedStarsString.toLowerCase().endsWith("k")
28 |       ? parseFloat(expectedStarsString.slice(0, -1)) * 1000
29 |       : parseFloat(expectedStarsString);
30 | 
31 |     const tolerance = 1000;
32 |     const isWithinTolerance = Math.abs(stars - expectedStars) <= tolerance;
33 | 
34 |     await stagehand.close();
35 | 
36 |     return {
37 |       _success: isWithinTolerance,
38 |       stars,
39 |       debugUrl,
40 |       sessionUrl,
41 |       logs: logger.getLogs(),
42 |     };
43 |   } catch (error) {
44 |     console.error("Error or timeout occurred:", error);
45 | 
46 |     await stagehand.close();
47 | 
48 |     return {
49 |       _success: false,
50 |       error: JSON.parse(JSON.stringify(error, null, 2)),
51 |       debugUrl,
52 |       sessionUrl,
53 |       logs: logger.getLogs(),
54 |     };
55 |   }
56 | };
57 | 


--------------------------------------------------------------------------------
/evals/tasks/extract_hamilton_weather.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | import { z } from "zod";
 3 | 
 4 | export const extract_hamilton_weather: EvalFunction = async ({
 5 |   logger,
 6 |   useTextExtract,
 7 |   debugUrl,
 8 |   sessionUrl,
 9 |   stagehand,
10 | }) => {
11 |   try {
12 |     await stagehand.page.goto(
13 |       "https://browserbase.github.io/stagehand-eval-sites/sites/hamilton-weather/",
14 |     );
15 |     const xpath =
16 |       "/html/body[1]/div[5]/main[1]/article[1]/div[6]/div[2]/div[1]/table[1]";
17 | 
18 |     const weatherData = await stagehand.page.extract({
19 |       instruction: "extract the weather data for Sun, Feb 23 at 11PM",
20 |       schema: z.object({
21 |         temperature: z.string(),
22 |         weather_description: z.string(),
23 |         wind: z.string(),
24 |         humidity: z.string(),
25 |         barometer: z.string(),
26 |         visibility: z.string(),
27 |       }),
28 |       useTextExtract,
29 |       selector: xpath,
30 |     });
31 | 
32 |     // Define the expected weather data
33 |     const expectedWeatherData = {
34 |       temperature: "27 °F",
35 |       weather_description: "Light snow. Overcast.",
36 |       wind: "6 mph",
37 |       humidity: "93%",
38 |       barometer: '30.07 "Hg',
39 |       visibility: "10 mi",
40 |     };
41 | 
42 |     // Check that every field matches the expected value
43 |     const isWeatherCorrect =
44 |       weatherData.temperature === expectedWeatherData.temperature &&
45 |       weatherData.weather_description ===
46 |         expectedWeatherData.weather_description &&
47 |       weatherData.wind === expectedWeatherData.wind &&
48 |       weatherData.humidity === expectedWeatherData.humidity &&
49 |       weatherData.barometer === expectedWeatherData.barometer &&
50 |       weatherData.visibility === expectedWeatherData.visibility;
51 | 
52 |     await stagehand.close();
53 | 
54 |     return {
55 |       _success: isWeatherCorrect,
56 |       weatherData,
57 |       debugUrl,
58 |       sessionUrl,
59 |       logs: logger.getLogs(),
60 |     };
61 |   } catch (error) {
62 |     console.error("Error or timeout occurred:", error);
63 | 
64 |     await stagehand.close();
65 | 
66 |     return {
67 |       _success: false,
68 |       error: JSON.parse(JSON.stringify(error, null, 2)),
69 |       debugUrl,
70 |       sessionUrl,
71 |       logs: logger.getLogs(),
72 |     };
73 |   }
74 | };
75 | 


--------------------------------------------------------------------------------
/evals/tasks/extract_partners.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | import { z } from "zod";
 3 | 
 4 | export const extract_partners: EvalFunction = async ({
 5 |   debugUrl,
 6 |   sessionUrl,
 7 |   stagehand,
 8 |   logger,
 9 |   useTextExtract,
10 | }) => {
11 |   try {
12 |     await stagehand.page.goto("https://ramp.com");
13 | 
14 |     await stagehand.page.act({
15 |       action: "move down to the bottom of the page.",
16 |     });
17 | 
18 |     await stagehand.page.act({
19 |       action: "Close the popup.",
20 |     });
21 | 
22 |     await stagehand.page.act({
23 |       action: "Find and click on the link that leads to the partners page.",
24 |     });
25 | 
26 |     const partners = await stagehand.page.extract({
27 |       instruction: `
28 |       Extract all of the partner categories on the page.
29 |     `,
30 |       schema: z.object({
31 |         partners: z.array(
32 |           z.object({
33 |             partner_category: z.string().describe("The partner category"),
34 |           }),
35 |         ),
36 |         explanation: z
37 |           .string()
38 |           .optional()
39 |           .describe("Any explanation about partner listing or absence thereof"),
40 |       }),
41 |       useTextExtract,
42 |     });
43 | 
44 |     const expectedPartners = [
45 |       "Accounting Partners",
46 |       "Private Equity & Venture Capital Partners",
47 |       "Services Partners",
48 |       "Affiliates",
49 |     ];
50 | 
51 |     const foundPartners = partners.partners.map((partner) =>
52 |       partner.partner_category.toLowerCase(),
53 |     );
54 | 
55 |     const allExpectedPartnersFound = expectedPartners.every((partner) =>
56 |       foundPartners.includes(partner.toLowerCase()),
57 |     );
58 | 
59 |     await stagehand.close();
60 | 
61 |     return {
62 |       _success: allExpectedPartnersFound,
63 |       partners,
64 |       debugUrl,
65 |       sessionUrl,
66 |       logs: logger.getLogs(),
67 |     };
68 |   } catch (error) {
69 |     logger.error({
70 |       message: "error in extractPartners function",
71 |       level: 0,
72 |       auxiliary: {
73 |         error: {
74 |           value: error.message,
75 |           type: "string",
76 |         },
77 |         trace: {
78 |           value: error.stack,
79 |           type: "string",
80 |         },
81 |       },
82 |     });
83 | 
84 |     await stagehand.close();
85 | 
86 |     return {
87 |       _success: false,
88 |       debugUrl,
89 |       sessionUrl,
90 |       error: JSON.parse(JSON.stringify(error, null, 2)),
91 |       logs: logger.getLogs(),
92 |     };
93 |   }
94 | };
95 | 


--------------------------------------------------------------------------------
/evals/tasks/extract_regulations_table.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | import { z } from "zod";
 3 | 
 4 | export const extract_regulations_table: EvalFunction = async ({
 5 |   debugUrl,
 6 |   sessionUrl,
 7 |   stagehand,
 8 |   logger,
 9 |   useTextExtract,
10 | }) => {
11 |   try {
12 |     await stagehand.page.goto(
13 |       "https://browserbase.github.io/stagehand-eval-sites/sites/ncc-numbering-plan/",
14 |     );
15 | 
16 |     const xpath =
17 |       "/html/body/div[3]/main/div[2]/div[2]/div/div/div[2]/article/div[2]/div[1]/div/table";
18 | 
19 |     const allottees = await stagehand.page.extract({
20 |       instruction:
21 |         "Extract ALL of the Allottees and their corresponding name, area, and area code.",
22 |       schema: z.object({
23 |         allottee_list: z.array(
24 |           z.object({
25 |             allottee_name: z.string(),
26 |             area: z.string(),
27 |             area_code: z.string(),
28 |             access_code: z.string(),
29 |           }),
30 |         ),
31 |       }),
32 |       useTextExtract,
33 |       selector: xpath,
34 |     });
35 | 
36 |     // Define the expected weather data
37 |     const allottees_expected_first = {
38 |       allottee_name: "101 Communications Limited",
39 |       area: "Lagos",
40 |       area_code: "0201",
41 |       access_code: "249",
42 |     };
43 | 
44 |     const allottees_expected_last = {
45 |       allottee_name: "Airtel Networks Limited",
46 |       area: "National",
47 |       area_code: "0708",
48 |       access_code: "708",
49 |     };
50 | 
51 |     const expected_length = 25;
52 | 
53 |     const allotteeList = allottees.allottee_list;
54 | 
55 |     // Check that the first entry, last entry, and total number match expectations
56 |     const isFirstCorrect =
57 |       JSON.stringify(allotteeList[0]) ===
58 |       JSON.stringify(allottees_expected_first);
59 |     const isLastCorrect =
60 |       JSON.stringify(allotteeList[allotteeList.length - 1]) ===
61 |       JSON.stringify(allottees_expected_last);
62 |     const isLengthCorrect = allotteeList.length === expected_length;
63 | 
64 |     const isRegulationsCorrect =
65 |       isFirstCorrect && isLastCorrect && isLengthCorrect;
66 | 
67 |     await stagehand.close();
68 | 
69 |     return {
70 |       _success: isRegulationsCorrect,
71 |       regulationsData: allottees,
72 |       debugUrl,
73 |       sessionUrl,
74 |       logs: logger.getLogs(),
75 |     };
76 |   } catch (error) {
77 |     console.error("Error or timeout occurred:", error);
78 | 
79 |     await stagehand.close();
80 | 
81 |     return {
82 |       _success: false,
83 |       error: JSON.parse(JSON.stringify(error, null, 2)),
84 |       debugUrl,
85 |       sessionUrl,
86 |       logs: logger.getLogs(),
87 |     };
88 |   }
89 | };
90 | 


--------------------------------------------------------------------------------
/evals/tasks/extract_repo_name.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | 
 3 | export const extract_repo_name: EvalFunction = async ({
 4 |   debugUrl,
 5 |   sessionUrl,
 6 |   stagehand,
 7 |   logger,
 8 | }) => {
 9 |   try {
10 |     await stagehand.page.goto("https://github.com/facebook/react");
11 | 
12 |     const { extraction } = await stagehand.page.extract(
13 |       "extract the title of the Github repository. Do not include the owner of the repository.",
14 |     );
15 | 
16 |     logger.log({
17 |       message: "Extracted repo title",
18 |       level: 1,
19 |       auxiliary: {
20 |         repo_name: {
21 |           value: extraction,
22 |           type: "object",
23 |         },
24 |       },
25 |     });
26 | 
27 |     await stagehand.close();
28 | 
29 |     return {
30 |       _success: extraction === "react",
31 |       extraction,
32 |       debugUrl,
33 |       sessionUrl,
34 |       logs: logger.getLogs(),
35 |     };
36 |   } catch (error) {
37 |     console.error("Error or timeout occurred:", error);
38 | 
39 |     await stagehand.close();
40 | 
41 |     return {
42 |       _success: false,
43 |       error: JSON.parse(JSON.stringify(error, null, 2)),
44 |       debugUrl,
45 |       sessionUrl,
46 |       logs: logger.getLogs(),
47 |     };
48 |   }
49 | };
50 | 


--------------------------------------------------------------------------------
/evals/tasks/extract_rockauto.ts:
--------------------------------------------------------------------------------
  1 | import { EvalFunction } from "@/types/evals";
  2 | import { z } from "zod";
  3 | 
  4 | export const extract_rockauto: EvalFunction = async ({
  5 |   debugUrl,
  6 |   sessionUrl,
  7 |   stagehand,
  8 |   logger,
  9 |   useTextExtract,
 10 | }) => {
 11 |   await stagehand.page.goto(
 12 |     "https://browserbase.github.io/stagehand-eval-sites/sites/rockauto/",
 13 |   );
 14 |   await new Promise((resolve) => setTimeout(resolve, 5000));
 15 |   const result = await stagehand.page.extract({
 16 |     instruction:
 17 |       "Extract the part number of all the coolant and antifreeze products in the 'economy' category. " +
 18 |       "Do not include the manufacturer name. Do not include products from the premium category.",
 19 |     schema: z.object({
 20 |       coolant_products: z.array(
 21 |         z.object({
 22 |           part_number: z.string(),
 23 |         }),
 24 |       ),
 25 |     }),
 26 |     useTextExtract,
 27 |   });
 28 | 
 29 |   await stagehand.close();
 30 | 
 31 |   const coolantProducts = result.coolant_products;
 32 |   const expectedPartNumbers = [
 33 |     "GREEN5050GAL",
 34 |     "719009",
 35 |     "AF3300",
 36 |     "AF3100",
 37 |     "MV5050GAL",
 38 |   ];
 39 |   const expectedLength = expectedPartNumbers.length;
 40 | 
 41 |   if (coolantProducts.length !== expectedLength) {
 42 |     logger.error({
 43 |       message: "Incorrect number of coolant products extracted",
 44 |       level: 0,
 45 |       auxiliary: {
 46 |         expected: {
 47 |           value: expectedLength.toString(),
 48 |           type: "integer",
 49 |         },
 50 |         actual: {
 51 |           value: coolantProducts.length.toString(),
 52 |           type: "integer",
 53 |         },
 54 |       },
 55 |     });
 56 |     return {
 57 |       _success: false,
 58 |       error: "Incorrect number of coolant products extracted",
 59 |       logs: logger.getLogs(),
 60 |       debugUrl,
 61 |       sessionUrl,
 62 |     };
 63 |   }
 64 | 
 65 |   const missingParts = expectedPartNumbers.filter(
 66 |     (expectedPart) =>
 67 |       !coolantProducts.some((p) => p.part_number === expectedPart),
 68 |   );
 69 | 
 70 |   if (missingParts.length > 0) {
 71 |     logger.error({
 72 |       message: "Missing expected part number(s)",
 73 |       level: 0,
 74 |       auxiliary: {
 75 |         missingParts: {
 76 |           value: JSON.stringify(missingParts),
 77 |           type: "object",
 78 |         },
 79 |         actualExtracted: {
 80 |           value: JSON.stringify(coolantProducts),
 81 |           type: "object",
 82 |         },
 83 |       },
 84 |     });
 85 |     return {
 86 |       _success: false,
 87 |       error: `One or more expected part numbers were not found: ${missingParts.join(", ")}`,
 88 |       logs: logger.getLogs(),
 89 |       debugUrl,
 90 |       sessionUrl,
 91 |     };
 92 |   }
 93 | 
 94 |   return {
 95 |     _success: true,
 96 |     logs: logger.getLogs(),
 97 |     debugUrl,
 98 |     sessionUrl,
 99 |   };
100 | };
101 | 


--------------------------------------------------------------------------------
/evals/tasks/extract_single_link.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | import { z } from "zod";
 3 | 
 4 | export const extract_single_link: EvalFunction = async ({
 5 |   logger,
 6 |   debugUrl,
 7 |   sessionUrl,
 8 |   stagehand,
 9 | }) => {
10 |   try {
11 |     await stagehand.page.goto(
12 |       "https://browserbase.github.io/stagehand-eval-sites/sites/geniusee/",
13 |     );
14 | 
15 |     const extraction = await stagehand.page.extract({
16 |       instruction: "extract the link to the 'contact us' page",
17 |       schema: z.object({
18 |         link: z.string().url(),
19 |       }),
20 |     });
21 | 
22 |     await stagehand.close();
23 |     const extractedLink = extraction.link;
24 |     const expectedLink =
25 |       "https://browserbase.github.io/stagehand-eval-sites/sites/geniusee/#contact";
26 | 
27 |     if (extractedLink === expectedLink) {
28 |       return {
29 |         _success: true,
30 |         debugUrl,
31 |         sessionUrl,
32 |         logs: logger.getLogs(),
33 |       };
34 |     }
35 |     return {
36 |       _success: false,
37 |       reason: `Extracted link: ${extractedLink} does not match expected link: ${expectedLink}`,
38 |       debugUrl,
39 |       sessionUrl,
40 |       logs: logger.getLogs(),
41 |     };
42 |   } catch (error) {
43 |     await stagehand.close();
44 |     return {
45 |       _success: false,
46 |       error: JSON.parse(JSON.stringify(error, null, 2)),
47 |       debugUrl,
48 |       sessionUrl,
49 |       logs: logger.getLogs(),
50 |     };
51 |   }
52 | };
53 | 


--------------------------------------------------------------------------------
/evals/tasks/extract_snowshoeing_destinations.ts:
--------------------------------------------------------------------------------
 1 | import { z } from "zod";
 2 | import { EvalFunction } from "@/types/evals";
 3 | 
 4 | export const extract_snowshoeing_destinations: EvalFunction = async ({
 5 |   debugUrl,
 6 |   sessionUrl,
 7 |   stagehand,
 8 |   logger,
 9 |   useTextExtract,
10 | }) => {
11 |   try {
12 |     await stagehand.page.goto(
13 |       "https://www.cbisland.com/blog/10-snowshoeing-adventures-on-cape-breton-island/",
14 |     );
15 | 
16 |     await stagehand.page.act({ action: "accept the cookies" });
17 | 
18 |     const snowshoeing_regions = await stagehand.page.extract({
19 |       instruction:
20 |         "Extract all the snowshoeing regions and the names of the trails within each region.",
21 |       schema: z.object({
22 |         snowshoeing_regions: z.array(
23 |           z.object({
24 |             region_name: z
25 |               .string()
26 |               .describe("The name of the snowshoeing region"),
27 |             trails: z
28 |               .array(
29 |                 z.object({
30 |                   trail_name: z.string().describe("The name of the trail"),
31 |                 }),
32 |               )
33 |               .describe("The list of trails available in this region."),
34 |           }),
35 |         ),
36 |       }),
37 |       useTextExtract,
38 |     });
39 | 
40 |     logger.log({
41 |       message: "Extracted destinations and trails",
42 |       level: 1,
43 |       auxiliary: {
44 |         destinations: {
45 |           value: JSON.stringify(snowshoeing_regions),
46 |           type: "object",
47 |         },
48 |       },
49 |     });
50 | 
51 |     await stagehand.close();
52 | 
53 |     const _success = snowshoeing_regions.snowshoeing_regions.length === 10;
54 | 
55 |     return {
56 |       _success,
57 |       snowshoeing_regions,
58 |       debugUrl,
59 |       sessionUrl,
60 |       logs: logger.getLogs(),
61 |     };
62 |   } catch (error) {
63 |     logger.error({
64 |       message: "Error in extract_snowshoeing_destinations function",
65 |       level: 0,
66 |       auxiliary: {
67 |         error: {
68 |           value: error.message,
69 |           type: "string",
70 |         },
71 |         trace: {
72 |           value: error.stack,
73 |           type: "string",
74 |         },
75 |       },
76 |     });
77 |     return {
78 |       _success: false,
79 |       error: JSON.parse(JSON.stringify(error, null, 2)),
80 |       debugUrl,
81 |       sessionUrl,
82 |       logs: logger.getLogs(),
83 |     };
84 |   } finally {
85 |     await stagehand.context.close().catch(() => {});
86 |   }
87 | };
88 | 


--------------------------------------------------------------------------------
/evals/tasks/extract_zillow.ts:
--------------------------------------------------------------------------------
 1 | import { z } from "zod";
 2 | import { EvalFunction } from "../../types/evals";
 3 | 
 4 | export const extract_zillow: EvalFunction = async ({
 5 |   debugUrl,
 6 |   sessionUrl,
 7 |   stagehand,
 8 |   logger,
 9 |   useTextExtract,
10 | }) => {
11 |   await stagehand.page.goto(
12 |     "https://browserbase.github.io/stagehand-eval-sites/sites/zillow/",
13 |   );
14 |   // timeout for 5 seconds
15 |   await stagehand.page.waitForTimeout(5000);
16 |   const real_estate_listings = await stagehand.page.extract({
17 |     instruction:
18 |       "Extract EACH AND EVERY HOME PRICE AND ADDRESS ON THE PAGE. DO NOT MISS ANY OF THEM.",
19 |     schema: z.object({
20 |       listings: z.array(
21 |         z.object({
22 |           price: z.string().describe("The price of the home"),
23 |           trails: z.string().describe("The address of the home"),
24 |         }),
25 |       ),
26 |     }),
27 |     useTextExtract,
28 |   });
29 | 
30 |   await stagehand.close();
31 |   const listings = real_estate_listings.listings;
32 |   const expectedLength = 38;
33 | 
34 |   if (listings.length < expectedLength) {
35 |     logger.error({
36 |       message: "Incorrect number of listings extracted",
37 |       level: 0,
38 |       auxiliary: {
39 |         expected: {
40 |           value: expectedLength.toString(),
41 |           type: "integer",
42 |         },
43 |         actual: {
44 |           value: listings.length.toString(),
45 |           type: "integer",
46 |         },
47 |       },
48 |     });
49 |     return {
50 |       _success: false,
51 |       error: "Incorrect number of listings extracted",
52 |       logs: logger.getLogs(),
53 |       debugUrl,
54 |       sessionUrl,
55 |     };
56 |   }
57 | 
58 |   return {
59 |     _success: true,
60 |     logs: logger.getLogs(),
61 |     debugUrl,
62 |     sessionUrl,
63 |   };
64 | };
65 | 


--------------------------------------------------------------------------------
/evals/tasks/google_flights.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | import { ObserveResult } from "@/types/stagehand";
 3 | 
 4 | /**
 5 |  * This eval attempts to click on an element that should not pass the playwright actionability check
 6 |  * which happens by default if you call locator.click (more information here:
 7 |  * https://playwright.dev/docs/actionability)
 8 |  *
 9 |  * If this eval passes, it means that we have correctly set {force: true} in performPlaywrightMethod,
10 |  * and the click was successful even though the target element (found by the xpath) did not
11 |  * pass the actionability check.
12 |  */
13 | 
14 | export const google_flights: EvalFunction = async ({
15 |   debugUrl,
16 |   sessionUrl,
17 |   stagehand,
18 |   logger,
19 | }) => {
20 |   await stagehand.page.goto(
21 |     "https://browserbase.github.io/stagehand-eval-sites/sites/google-flights/",
22 |   );
23 | 
24 |   const observeResult: ObserveResult = {
25 |     selector:
26 |       "xpath=/html/body/c-wiz[2]/div/div[2]/c-wiz/div[1]/c-wiz/div[2]/div[2]/div[2]/div/div[2]/div[1]/ul/li[1]/div/div[1]",
27 |     description: "the first departing flight",
28 |     method: "click",
29 |     arguments: [],
30 |   };
31 |   await stagehand.page.act(observeResult);
32 | 
33 |   const expectedUrl =
34 |     "https://browserbase.github.io/stagehand-eval-sites/sites/google-flights/return-flight.html";
35 |   const currentUrl = stagehand.page.url();
36 | 
37 |   await stagehand.close();
38 | 
39 |   if (currentUrl === expectedUrl) {
40 |     return {
41 |       _success: true,
42 |       currentUrl,
43 |       debugUrl,
44 |       sessionUrl,
45 |       logs: logger.getLogs(),
46 |     };
47 |   }
48 |   return {
49 |     _success: false,
50 |     error: "The current URL does not match expected.",
51 |     logs: logger.getLogs(),
52 |     debugUrl,
53 |     sessionUrl,
54 |   };
55 | };
56 | 


--------------------------------------------------------------------------------
/evals/tasks/history.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | 
 3 | export const history: EvalFunction = async ({
 4 |   debugUrl,
 5 |   sessionUrl,
 6 |   stagehand,
 7 |   logger,
 8 | }) => {
 9 |   await stagehand.page.goto("https://docs.stagehand.dev");
10 | 
11 |   await stagehand.page.act("click on the 'Quickstart' tab");
12 | 
13 |   await stagehand.page.extract("Extract the title of the page");
14 | 
15 |   await stagehand.page.observe("Find all links on the page");
16 | 
17 |   const history = stagehand.history;
18 | 
19 |   const hasCorrectNumberOfEntries = history.length === 4;
20 | 
21 |   const hasNavigateEntry = history[0].method === "navigate";
22 |   const hasActEntry = history[1].method === "act";
23 |   const hasExtractEntry = history[2].method === "extract";
24 |   const hasObserveEntry = history[3].method === "observe";
25 | 
26 |   const allEntriesHaveTimestamps = history.every(
27 |     (entry) =>
28 |       typeof entry.timestamp === "string" && entry.timestamp.length > 0,
29 |   );
30 |   const allEntriesHaveResults = history.every(
31 |     (entry) => entry.result !== undefined,
32 |   );
33 | 
34 |   await stagehand.close();
35 | 
36 |   const success =
37 |     hasCorrectNumberOfEntries &&
38 |     hasNavigateEntry &&
39 |     hasActEntry &&
40 |     hasExtractEntry &&
41 |     hasObserveEntry &&
42 |     allEntriesHaveTimestamps &&
43 |     allEntriesHaveResults;
44 | 
45 |   return {
46 |     _success: success,
47 |     debugUrl,
48 |     sessionUrl,
49 |     logs: logger.getLogs(),
50 |   };
51 | };
52 | 


--------------------------------------------------------------------------------
/evals/tasks/homedepot.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | import { z } from "zod";
 3 | 
 4 | export const homedepot: EvalFunction = async ({
 5 |   debugUrl,
 6 |   sessionUrl,
 7 |   stagehand,
 8 |   logger,
 9 |   useTextExtract,
10 | }) => {
11 |   try {
12 |     await stagehand.page.goto("https://www.homedepot.com/");
13 |     await stagehand.page.act("search for gas grills");
14 |     await stagehand.page.act("click on the best selling gas grill");
15 |     await stagehand.page.act("click on the Product Details");
16 |     await stagehand.page.act("find the Primary Burner BTU");
17 | 
18 |     const productSpecs = await stagehand.page.extract({
19 |       instruction: "Extract the Primary exact Burner BTU of the product",
20 |       schema: z.object({
21 |         productSpecs: z
22 |           .array(
23 |             z.object({
24 |               burnerBTU: z.string().describe("Primary Burner BTU exact value"),
25 |             }),
26 |           )
27 |           .describe("Gas grill Primary Burner BTU exact value"),
28 |       }),
29 |       useTextExtract,
30 |     });
31 | 
32 |     logger.log({
33 |       message: `gas grill primary burner BTU`,
34 |       level: 1,
35 |       auxiliary: {
36 |         productSpecs: {
37 |           value: JSON.stringify(productSpecs),
38 |           type: "object",
39 |         },
40 |       },
41 |     });
42 | 
43 |     if (
44 |       !productSpecs ||
45 |       !productSpecs.productSpecs ||
46 |       productSpecs.productSpecs.length !== 1
47 |     ) {
48 |       await stagehand.close();
49 | 
50 |       return {
51 |         _success: false,
52 |         productSpecs,
53 |         debugUrl,
54 |         sessionUrl,
55 |         logs: logger.getLogs(),
56 |       };
57 |     }
58 | 
59 |     const hasFourZerosAndOne4 =
60 |       (productSpecs.productSpecs[0].burnerBTU.match(/0/g) || []).length === 4 &&
61 |       (productSpecs.productSpecs[0].burnerBTU.match(/4/g) || []).length === 1;
62 | 
63 |     await stagehand.close();
64 | 
65 |     return {
66 |       _success: hasFourZerosAndOne4,
67 |       productSpecs,
68 |       debugUrl,
69 |       sessionUrl,
70 |       logs: logger.getLogs(),
71 |     };
72 |   } catch (error) {
73 |     logger.error({
74 |       message: "error in homedepot function",
75 |       level: 0,
76 |       auxiliary: {
77 |         error: {
78 |           value: error.message,
79 |           type: "string",
80 |         },
81 |         trace: {
82 |           value: error.stack,
83 |           type: "string",
84 |         },
85 |       },
86 |     });
87 | 
88 |     await stagehand.close();
89 | 
90 |     return {
91 |       _success: false,
92 |       error: JSON.parse(JSON.stringify(error, null, 2)),
93 |       debugUrl,
94 |       sessionUrl,
95 |       logs: logger.getLogs(),
96 |     };
97 |   }
98 | };
99 | 


--------------------------------------------------------------------------------
/evals/tasks/imdb_movie_details.ts:
--------------------------------------------------------------------------------
  1 | import { EvalFunction } from "@/types/evals";
  2 | import { z } from "zod";
  3 | 
  4 | export const imdb_movie_details: EvalFunction = async ({
  5 |   debugUrl,
  6 |   sessionUrl,
  7 |   stagehand,
  8 |   logger,
  9 |   useTextExtract,
 10 | }) => {
 11 |   await stagehand.page.goto("https://www.imdb.com/title/tt0111161/", {
 12 |     waitUntil: "domcontentloaded",
 13 |   });
 14 |   await stagehand.page.act({
 15 |     action: "click on the movie ratings",
 16 |   });
 17 | 
 18 |   const movieDetails = await stagehand.page.extract({
 19 |     instruction: "Extract the list of countries with the most ratings.",
 20 |     schema: z.object({
 21 |       countries: z
 22 |         .array(z.string())
 23 |         .describe("List of countries with the most ratings"),
 24 |     }),
 25 |     useTextExtract,
 26 |   });
 27 | 
 28 |   await stagehand.close();
 29 | 
 30 |   const expectedCountries = [
 31 |     "United States",
 32 |     "United Kingdom",
 33 |     "Turkey",
 34 |     "India",
 35 |     "Germany",
 36 |   ];
 37 | 
 38 |   if (!movieDetails.countries || movieDetails.countries.length !== 5) {
 39 |     logger.error({
 40 |       message: "Failed to extract exactly five countries",
 41 |       level: 0,
 42 |       auxiliary: {
 43 |         expected: {
 44 |           value: JSON.stringify(expectedCountries),
 45 |           type: "object",
 46 |         },
 47 |         actual: {
 48 |           value: JSON.stringify(movieDetails.countries || []),
 49 |           type: "object",
 50 |         },
 51 |       },
 52 |     });
 53 | 
 54 |     return {
 55 |       _success: false,
 56 |       error: "Incorrect number of countries extracted",
 57 |       logs: logger.getLogs(),
 58 |       debugUrl,
 59 |       sessionUrl,
 60 |     };
 61 |   }
 62 | 
 63 |   const missingCountries = expectedCountries.filter(
 64 |     (country) => !movieDetails.countries.includes(country),
 65 |   );
 66 | 
 67 |   if (missingCountries.length > 0) {
 68 |     logger.error({
 69 |       message: "Extracted countries do not match expected countries",
 70 |       level: 0,
 71 |       auxiliary: {
 72 |         missing: {
 73 |           value: JSON.stringify(missingCountries),
 74 |           type: "object",
 75 |         },
 76 |         extracted: {
 77 |           value: JSON.stringify(movieDetails.countries),
 78 |           type: "object",
 79 |         },
 80 |       },
 81 |     });
 82 | 
 83 |     return {
 84 |       _success: false,
 85 |       error: "Extracted countries do not match expected countries",
 86 |       logs: logger.getLogs(),
 87 |       debugUrl,
 88 |       sessionUrl,
 89 |     };
 90 |   }
 91 | 
 92 |   return {
 93 |     _success: true,
 94 |     countries: movieDetails.countries,
 95 |     logs: logger.getLogs(),
 96 |     debugUrl,
 97 |     sessionUrl,
 98 |   };
 99 | };
100 | 


--------------------------------------------------------------------------------
/evals/tasks/instructions.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | 
 3 | export const instructions: EvalFunction = async ({
 4 |   debugUrl,
 5 |   sessionUrl,
 6 |   stagehand,
 7 |   logger,
 8 | }) => {
 9 |   try {
10 |     const page = stagehand.page;
11 | 
12 |     await page.goto("https://docs.browserbase.com/");
13 | 
14 |     await page.act({
15 |       action: "secret12345",
16 |     });
17 | 
18 |     await page.waitForLoadState("domcontentloaded");
19 | 
20 |     const url = page.url();
21 | 
22 |     const isCorrectUrl =
23 |       url === "https://docs.browserbase.com/introduction/what-is-browserbase";
24 | 
25 |     return {
26 |       _success: isCorrectUrl,
27 |       debugUrl,
28 |       sessionUrl,
29 |       logs: logger.getLogs(),
30 |     };
31 |   } catch (error) {
32 |     console.error("Error or timeout occurred:", error);
33 | 
34 |     return {
35 |       _success: false,
36 |       error: JSON.parse(JSON.stringify(error, null, 2)),
37 |       debugUrl,
38 |       sessionUrl,
39 |       logs: logger.getLogs(),
40 |     };
41 |   } finally {
42 |     await stagehand.close();
43 |   }
44 | };
45 | 


--------------------------------------------------------------------------------
/evals/tasks/ionwave.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | 
 3 | export const ionwave: EvalFunction = async ({
 4 |   debugUrl,
 5 |   sessionUrl,
 6 |   stagehand,
 7 |   logger,
 8 | }) => {
 9 |   await stagehand.page.goto("https://elpasotexas.ionwave.net/Login.aspx");
10 | 
11 |   await stagehand.page.act({
12 |     action: 'Click on "Closed Bids"',
13 |   });
14 | 
15 |   const expectedUrl =
16 |     "https://elpasotexas.ionwave.net/SourcingEvents.aspx?SourceType=2";
17 |   const currentUrl = stagehand.page.url();
18 | 
19 |   await stagehand.close();
20 | 
21 |   return {
22 |     _success: currentUrl.startsWith(expectedUrl),
23 |     currentUrl,
24 |     debugUrl,
25 |     sessionUrl,
26 |     logs: logger.getLogs(),
27 |   };
28 | };
29 | 


--------------------------------------------------------------------------------
/evals/tasks/ionwave_observe.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | 
 3 | export const ionwave_observe: EvalFunction = async ({
 4 |   debugUrl,
 5 |   sessionUrl,
 6 |   stagehand,
 7 |   logger,
 8 | }) => {
 9 |   await stagehand.page.goto("https://elpasotexas.ionwave.net/Login.aspx");
10 | 
11 |   const observations = await stagehand.page.observe({ onlyVisible: true });
12 | 
13 |   if (observations.length === 0) {
14 |     await stagehand.close();
15 |     return {
16 |       _success: false,
17 |       observations,
18 |       debugUrl,
19 |       sessionUrl,
20 |       logs: logger.getLogs(),
21 |     };
22 |   }
23 | 
24 |   const expectedLocator = `div.rowLinks:nth-child(27) > div:nth-child(1) > a:nth-child(1)`;
25 | 
26 |   const expectedResult = await stagehand.page
27 |     .locator(expectedLocator)
28 |     .first()
29 |     .innerText();
30 | 
31 |   let foundMatch = false;
32 |   for (const observation of observations) {
33 |     try {
34 |       const observationResult = await stagehand.page
35 |         .locator(observation.selector)
36 |         .first()
37 |         .innerText();
38 | 
39 |       if (observationResult === expectedResult) {
40 |         foundMatch = true;
41 |         break;
42 |       }
43 |     } catch (error) {
44 |       console.warn(
45 |         `Failed to check observation with selector ${observation.selector}:`,
46 |         error.message,
47 |       );
48 |       continue;
49 |     }
50 |   }
51 | 
52 |   await stagehand.close();
53 | 
54 |   return {
55 |     _success: foundMatch,
56 |     expected: expectedResult,
57 |     observations,
58 |     debugUrl,
59 |     sessionUrl,
60 |     logs: logger.getLogs(),
61 |   };
62 | };
63 | 


--------------------------------------------------------------------------------
/evals/tasks/nextChunk.ts:
--------------------------------------------------------------------------------
 1 | import { Stagehand } from "@/dist";
 2 | import { EvalFunction } from "@/types/evals";
 3 | 
 4 | export const nextChunk: EvalFunction = async ({
 5 |   logger,
 6 |   stagehandConfig,
 7 |   debugUrl,
 8 |   sessionUrl,
 9 | }) => {
10 |   const stagehand = new Stagehand({
11 |     ...stagehandConfig,
12 |     domSettleTimeoutMs: 3000,
13 |   });
14 |   await stagehand.init();
15 | 
16 |   await stagehand.page.goto("https://www.apartments.com/san-francisco-ca/");
17 |   await stagehand.page.act({
18 |     action: "click on the all filters button",
19 |   });
20 | 
21 |   const { initialScrollTop, chunkHeight } = await stagehand.page.evaluate(
22 |     () => {
23 |       const container = document.querySelector(
24 |         "#advancedFilters > div",
25 |       ) as HTMLElement;
26 |       if (!container) {
27 |         console.warn(
28 |           "Could not find #advancedFilters > div. Returning 0 for measurements.",
29 |         );
30 |         return { initialScrollTop: 0, chunkHeight: 0 };
31 |       }
32 |       return {
33 |         initialScrollTop: container.scrollTop,
34 |         chunkHeight: container.getBoundingClientRect().height,
35 |       };
36 |     },
37 |   );
38 | 
39 |   await stagehand.page.act({
40 |     action: "scroll down one chunk on the filters modal",
41 |   });
42 | 
43 |   await new Promise((resolve) => setTimeout(resolve, 2000));
44 | 
45 |   const newScrollTop = await stagehand.page.evaluate(() => {
46 |     const container = document.querySelector(
47 |       "#advancedFilters > div",
48 |     ) as HTMLElement;
49 |     return container?.scrollTop ?? 0;
50 |   });
51 | 
52 |   await stagehand.close();
53 | 
54 |   const actualDiff = newScrollTop - initialScrollTop;
55 |   const threshold = 20; // allowable difference in px
56 |   const scrolledOneChunk = Math.abs(actualDiff - chunkHeight) <= threshold;
57 | 
58 |   const evaluationResult = scrolledOneChunk
59 |     ? {
60 |         _success: true,
61 |         logs: logger.getLogs(),
62 |         debugUrl,
63 |         sessionUrl,
64 |         message: `Successfully scrolled ~one chunk: expected ~${chunkHeight}, got ${actualDiff}`,
65 |       }
66 |     : {
67 |         _success: false,
68 |         logs: logger.getLogs(),
69 |         debugUrl,
70 |         sessionUrl,
71 |         message: `Scroll difference expected ~${chunkHeight} but only scrolled ${actualDiff}.`,
72 |       };
73 | 
74 |   return evaluationResult;
75 | };
76 | 


--------------------------------------------------------------------------------
/evals/tasks/nonsense_action.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | 
 3 | export const nonsense_action: EvalFunction = async ({
 4 |   debugUrl,
 5 |   sessionUrl,
 6 |   stagehand,
 7 |   logger,
 8 | }) => {
 9 |   try {
10 |     await stagehand.page.goto("https://www.homedepot.com/");
11 | 
12 |     const result = await stagehand.page.act({
13 |       action: "what is the capital of the moon?",
14 |     });
15 | 
16 |     return {
17 |       _success: !result.success, // We expect this to fail
18 |       debugUrl,
19 |       sessionUrl,
20 |       logs: logger.getLogs(),
21 |     };
22 |   } catch (error) {
23 |     console.error(`Error in nonsense_action function: ${error.message}`);
24 |     return {
25 |       _success: false,
26 |       error: JSON.parse(JSON.stringify(error, null, 2)),
27 |       debugUrl,
28 |       sessionUrl,
29 |       logs: logger.getLogs(),
30 |     };
31 |   } finally {
32 |     await stagehand.close();
33 |   }
34 | };
35 | 


--------------------------------------------------------------------------------
/evals/tasks/observe_amazon_add_to_cart.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | 
 3 | export const observe_amazon_add_to_cart: EvalFunction = async ({
 4 |   debugUrl,
 5 |   sessionUrl,
 6 |   stagehand,
 7 |   logger,
 8 | }) => {
 9 |   await stagehand.page.goto(
10 |     "https://browserbase.github.io/stagehand-eval-sites/sites/amazon/",
11 |   );
12 | 
13 |   await stagehand.page.waitForTimeout(5000);
14 | 
15 |   const observations1 = await stagehand.page.observe({
16 |     instruction: "Find and click the 'Add to Cart' button",
17 |     onlyVisible: false,
18 |     returnAction: true,
19 |   });
20 | 
21 |   console.log(observations1);
22 | 
23 |   // Example of using performPlaywrightMethod if you have the xpath
24 |   if (observations1.length > 0) {
25 |     const action1 = observations1[0];
26 |     await stagehand.page.act(action1);
27 |   }
28 | 
29 |   await stagehand.page.waitForTimeout(2000);
30 | 
31 |   const observations2 = await stagehand.page.observe({
32 |     instruction: "Find and click the 'Proceed to checkout' button",
33 |   });
34 | 
35 |   // Example of using performPlaywrightMethod if you have the xpath
36 |   if (observations2.length > 0) {
37 |     const action2 = observations2[0];
38 |     await stagehand.page.act(action2);
39 |   }
40 |   await stagehand.page.waitForTimeout(2000);
41 | 
42 |   const currentUrl = stagehand.page.url();
43 |   const expectedUrlPrefix =
44 |     "https://browserbase.github.io/stagehand-eval-sites/sites/amazon/sign-in.html";
45 | 
46 |   await stagehand.close();
47 | 
48 |   return {
49 |     _success: currentUrl.startsWith(expectedUrlPrefix),
50 |     currentUrl,
51 |     debugUrl,
52 |     sessionUrl,
53 |     logs: logger.getLogs(),
54 |   };
55 | };
56 | 


--------------------------------------------------------------------------------
/evals/tasks/observe_github.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | 
 3 | export const observe_github: EvalFunction = async ({
 4 |   debugUrl,
 5 |   sessionUrl,
 6 |   stagehand,
 7 |   logger,
 8 | }) => {
 9 |   await stagehand.page.goto("https://github.com/numpy/numpy/tree/main/numpy");
10 | 
11 |   const observations = await stagehand.page.observe({
12 |     instruction: "find the scrollable element that holds the repos file tree.",
13 |   });
14 | 
15 |   if (observations.length === 0) {
16 |     await stagehand.close();
17 |     return {
18 |       _success: false,
19 |       observations,
20 |       debugUrl,
21 |       sessionUrl,
22 |       logs: logger.getLogs(),
23 |     };
24 |   }
25 | 
26 |   const possibleLocators = [
27 |     `#repos-file-tree > div.Box-sc-g0xbh4-0.jbQqON > div > div > div > nav > ul`,
28 |     `#repos-file-tree > div.Box-sc-g0xbh4-0.jbQqON > div > div > div > nav`,
29 |     `#repos-file-tree > div.Box-sc-g0xbh4-0.jbQqON`,
30 |   ];
31 | 
32 |   const possibleHandles = [];
33 |   for (const locatorStr of possibleLocators) {
34 |     const locator = stagehand.page.locator(locatorStr);
35 |     const handle = await locator.elementHandle();
36 |     if (handle) {
37 |       possibleHandles.push({ locatorStr, handle });
38 |     }
39 |   }
40 | 
41 |   let foundMatch = false;
42 |   let matchedLocator: string | null = null;
43 | 
44 |   for (const observation of observations) {
45 |     try {
46 |       const observationLocator = stagehand.page
47 |         .locator(observation.selector)
48 |         .first();
49 |       const observationHandle = await observationLocator.elementHandle();
50 |       if (!observationHandle) {
51 |         continue;
52 |       }
53 | 
54 |       for (const { locatorStr, handle: candidateHandle } of possibleHandles) {
55 |         const isSameNode = await observationHandle.evaluate(
56 |           (node, otherNode) => node === otherNode,
57 |           candidateHandle,
58 |         );
59 |         if (isSameNode) {
60 |           foundMatch = true;
61 |           matchedLocator = locatorStr;
62 |           break;
63 |         }
64 |       }
65 | 
66 |       if (foundMatch) {
67 |         break;
68 |       }
69 |     } catch (error) {
70 |       console.warn(
71 |         `Failed to check observation with selector ${observation.selector}:`,
72 |         error.message,
73 |       );
74 |       continue;
75 |     }
76 |   }
77 | 
78 |   await stagehand.close();
79 | 
80 |   return {
81 |     _success: foundMatch,
82 |     matchedLocator,
83 |     observations,
84 |     debugUrl,
85 |     sessionUrl,
86 |     logs: logger.getLogs(),
87 |   };
88 | };
89 | 


--------------------------------------------------------------------------------
/evals/tasks/observe_iframes1.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | 
 3 | export const observe_iframes1: EvalFunction = async ({
 4 |   debugUrl,
 5 |   sessionUrl,
 6 |   stagehand,
 7 |   logger,
 8 | }) => {
 9 |   await stagehand.page.goto("https://tucowsdomains.com/abuse-form/phishing/");
10 | 
11 |   const observations = await stagehand.page.observe({
12 |     instruction: "find the main header of the page",
13 |   });
14 | 
15 |   if (observations.length === 0) {
16 |     await stagehand.close();
17 |     return {
18 |       _success: false,
19 |       observations,
20 |       debugUrl,
21 |       sessionUrl,
22 |       logs: logger.getLogs(),
23 |     };
24 |   }
25 | 
26 |   const possibleLocators = [
27 |     `#primary > div.singlePage > section > div > div > article > div > iframe`,
28 |     `#primary > div.heroBanner > section > div > h1`,
29 |   ];
30 | 
31 |   const possibleHandles = [];
32 |   for (const locatorStr of possibleLocators) {
33 |     const locator = stagehand.page.locator(locatorStr);
34 |     const handle = await locator.elementHandle();
35 |     if (handle) {
36 |       possibleHandles.push({ locatorStr, handle });
37 |     }
38 |   }
39 | 
40 |   let foundMatch = false;
41 |   let matchedLocator: string | null = null;
42 | 
43 |   for (const observation of observations) {
44 |     try {
45 |       const observationLocator = stagehand.page
46 |         .locator(observation.selector)
47 |         .first();
48 |       const observationHandle = await observationLocator.elementHandle();
49 |       if (!observationHandle) {
50 |         continue;
51 |       }
52 | 
53 |       for (const { locatorStr, handle: candidateHandle } of possibleHandles) {
54 |         const isSameNode = await observationHandle.evaluate(
55 |           (node, otherNode) => node === otherNode,
56 |           candidateHandle,
57 |         );
58 |         if (isSameNode) {
59 |           foundMatch = true;
60 |           matchedLocator = locatorStr;
61 |           break;
62 |         }
63 |       }
64 | 
65 |       if (foundMatch) {
66 |         break;
67 |       }
68 |     } catch (error) {
69 |       console.warn(
70 |         `Failed to check observation with selector ${observation.selector}:`,
71 |         error.message,
72 |       );
73 |       continue;
74 |     }
75 |   }
76 | 
77 |   await stagehand.close();
78 | 
79 |   return {
80 |     _success: foundMatch,
81 |     matchedLocator,
82 |     observations,
83 |     debugUrl,
84 |     sessionUrl,
85 |     logs: logger.getLogs(),
86 |   };
87 | };
88 | 


--------------------------------------------------------------------------------
/evals/tasks/observe_simple_google_search.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | import { performPlaywrightMethod } from "@/lib/a11y/utils";
 3 | 
 4 | export const observe_simple_google_search: EvalFunction = async ({
 5 |   debugUrl,
 6 |   sessionUrl,
 7 |   stagehand,
 8 |   logger,
 9 | }) => {
10 |   await stagehand.page.goto("https://www.google.com");
11 | 
12 |   const observation1 = await stagehand.page.observe({
13 |     instruction: "Find the search bar and enter 'OpenAI'",
14 |     onlyVisible: false,
15 |     returnAction: true,
16 |   });
17 |   console.log(observation1);
18 | 
19 |   if (observation1.length > 0) {
20 |     const action1 = observation1[0];
21 |     await performPlaywrightMethod(
22 |       stagehand.page,
23 |       stagehand.logger,
24 |       action1.method,
25 |       action1.arguments,
26 |       action1.selector.replace("xpath=", ""),
27 |     );
28 |   }
29 |   await stagehand.page.waitForTimeout(5000);
30 |   const observation2 = await stagehand.page.observe({
31 |     instruction: "Click the search button in the suggestions dropdown",
32 |     onlyVisible: false,
33 |     returnAction: true,
34 |   });
35 |   console.log(observation2);
36 | 
37 |   if (observation2.length > 0) {
38 |     const action2 = observation2[0];
39 |     await performPlaywrightMethod(
40 |       stagehand.page,
41 |       stagehand.logger,
42 |       action2.method,
43 |       action2.arguments,
44 |       action2.selector.replace("xpath=", ""),
45 |     );
46 |   }
47 |   await stagehand.page.waitForTimeout(5000);
48 | 
49 |   const expectedUrl = "https://www.google.com/search?q=OpenAI";
50 |   const currentUrl = stagehand.page.url();
51 | 
52 |   await stagehand.close();
53 | 
54 |   return {
55 |     _success: currentUrl.startsWith(expectedUrl),
56 |     currentUrl,
57 |     debugUrl,
58 |     sessionUrl,
59 |     logs: logger.getLogs(),
60 |   };
61 | };
62 | 


--------------------------------------------------------------------------------
/evals/tasks/observe_taxes.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | 
 3 | export const observe_taxes: EvalFunction = async ({
 4 |   debugUrl,
 5 |   sessionUrl,
 6 |   stagehand,
 7 |   logger,
 8 | }) => {
 9 |   await stagehand.page.goto("https://file.1040.com/estimate/");
10 | 
11 |   const observations = await stagehand.page.observe({
12 |     instruction: "Find all the form input elements under the 'Income' section",
13 |   });
14 | 
15 |   if (observations.length === 0) {
16 |     await stagehand.close();
17 |     return {
18 |       _success: false,
19 |       observations,
20 |       debugUrl,
21 |       sessionUrl,
22 |       logs: logger.getLogs(),
23 |     };
24 |   } else if (observations.length < 13) {
25 |     await stagehand.close();
26 |     return {
27 |       _success: false,
28 |       observations,
29 |       debugUrl,
30 |       sessionUrl,
31 |       logs: logger.getLogs(),
32 |     };
33 |   }
34 | 
35 |   const expectedLocator = `#tpWages`;
36 | 
37 |   const expectedResult = await stagehand.page
38 |     .locator(expectedLocator)
39 |     .first()
40 |     .innerText();
41 | 
42 |   let foundMatch = false;
43 |   for (const observation of observations) {
44 |     try {
45 |       const observationResult = await stagehand.page
46 |         .locator(observation.selector)
47 |         .first()
48 |         .innerText();
49 | 
50 |       if (observationResult === expectedResult) {
51 |         foundMatch = true;
52 |         break;
53 |       }
54 |     } catch (error) {
55 |       console.warn(
56 |         `Failed to check observation with selector ${observation.selector}:`,
57 |         error.message,
58 |       );
59 |       continue;
60 |     }
61 |   }
62 | 
63 |   await stagehand.close();
64 | 
65 |   return {
66 |     _success: foundMatch,
67 |     expected: expectedResult,
68 |     observations,
69 |     debugUrl,
70 |     sessionUrl,
71 |     logs: logger.getLogs(),
72 |   };
73 | };
74 | 


--------------------------------------------------------------------------------
/evals/tasks/observe_vantechjournal.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | 
 3 | export const observe_vantechjournal: EvalFunction = async ({
 4 |   debugUrl,
 5 |   sessionUrl,
 6 |   stagehand,
 7 |   logger,
 8 | }) => {
 9 |   await stagehand.page.goto("https://vantechjournal.com/archive?page=8");
10 |   await stagehand.page.waitForTimeout(1000);
11 | 
12 |   const observations = await stagehand.page.observe({
13 |     instruction: "find the button that takes us to the 11th page",
14 |   });
15 | 
16 |   if (observations.length === 0) {
17 |     await stagehand.close();
18 |     return {
19 |       _success: false,
20 |       observations,
21 |       debugUrl,
22 |       sessionUrl,
23 |       logs: logger.getLogs(),
24 |     };
25 |   }
26 | 
27 |   const expectedLocator = `a.rounded-lg:nth-child(8)`;
28 | 
29 |   const expectedResult = await stagehand.page.locator(expectedLocator);
30 | 
31 |   let foundMatch = false;
32 | 
33 |   for (const observation of observations) {
34 |     try {
35 |       const observationLocator = stagehand.page
36 |         .locator(observation.selector)
37 |         .first();
38 |       const observationHandle = await observationLocator.elementHandle();
39 |       const expectedHandle = await expectedResult.elementHandle();
40 | 
41 |       if (!observationHandle || !expectedHandle) {
42 |         // Couldn’t get handles, skip
43 |         continue;
44 |       }
45 | 
46 |       const isSameNode = await observationHandle.evaluate(
47 |         (node, otherNode) => node === otherNode,
48 |         expectedHandle,
49 |       );
50 | 
51 |       if (isSameNode) {
52 |         foundMatch = true;
53 |         break;
54 |       }
55 |     } catch (error) {
56 |       console.warn(
57 |         `Failed to check observation with selector ${observation.selector}:`,
58 |         error.message,
59 |       );
60 |       continue;
61 |     }
62 |   }
63 | 
64 |   await stagehand.close();
65 | 
66 |   return {
67 |     _success: foundMatch,
68 |     expected: expectedResult,
69 |     observations,
70 |     debugUrl,
71 |     sessionUrl,
72 |     logs: logger.getLogs(),
73 |   };
74 | };
75 | 


--------------------------------------------------------------------------------
/evals/tasks/observe_yc_startup.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | 
 3 | export const observe_yc_startup: EvalFunction = async ({
 4 |   debugUrl,
 5 |   sessionUrl,
 6 |   stagehand,
 7 |   logger,
 8 | }) => {
 9 |   await stagehand.page.goto("https://www.ycombinator.com/companies");
10 |   await stagehand.page.waitForLoadState("networkidle");
11 | 
12 |   const observations = await stagehand.page.observe({
13 |     instruction:
14 |       "Click the container element that holds links to each of the startup companies. The companies each have a name, a description, and a link to their website.",
15 |   });
16 | 
17 |   if (observations.length === 0) {
18 |     await stagehand.close();
19 |     return {
20 |       _success: false,
21 |       observations,
22 |       debugUrl,
23 |       sessionUrl,
24 |       logs: logger.getLogs(),
25 |     };
26 |   }
27 | 
28 |   const possibleLocators = [
29 |     `div._rightCol_i9oky_592`,
30 |     `div._section_i9oky_163._results_i9oky_343`,
31 |   ];
32 | 
33 |   const possibleHandles = [];
34 |   for (const locatorStr of possibleLocators) {
35 |     const locator = stagehand.page.locator(locatorStr);
36 |     const handle = await locator.elementHandle();
37 |     if (handle) {
38 |       possibleHandles.push({ locatorStr, handle });
39 |     }
40 |   }
41 | 
42 |   let foundMatch = false;
43 |   let matchedLocator: string | null = null;
44 | 
45 |   for (const observation of observations) {
46 |     try {
47 |       const observationLocator = stagehand.page
48 |         .locator(observation.selector)
49 |         .first();
50 |       const observationHandle = await observationLocator.elementHandle();
51 |       if (!observationHandle) {
52 |         continue;
53 |       }
54 | 
55 |       for (const { locatorStr, handle: candidateHandle } of possibleHandles) {
56 |         const isSameNode = await observationHandle.evaluate(
57 |           (node, otherNode) => node === otherNode,
58 |           candidateHandle,
59 |         );
60 |         if (isSameNode) {
61 |           foundMatch = true;
62 |           matchedLocator = locatorStr;
63 |           break;
64 |         }
65 |       }
66 | 
67 |       if (foundMatch) {
68 |         break;
69 |       }
70 |     } catch (error) {
71 |       console.warn(
72 |         `Failed to check observation with selector ${observation.selector}:`,
73 |         error.message,
74 |       );
75 |       continue;
76 |     }
77 |   }
78 | 
79 |   await stagehand.close();
80 | 
81 |   return {
82 |     _success: foundMatch,
83 |     matchedLocator,
84 |     observations,
85 |     debugUrl,
86 |     sessionUrl,
87 |     logs: logger.getLogs(),
88 |   };
89 | };
90 | 


--------------------------------------------------------------------------------
/evals/tasks/panamcs.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | 
 3 | export const panamcs: EvalFunction = async ({
 4 |   debugUrl,
 5 |   sessionUrl,
 6 |   stagehand,
 7 |   logger,
 8 | }) => {
 9 |   await stagehand.page.goto(
10 |     "https://browserbase.github.io/stagehand-eval-sites/sites/panamcs/",
11 |   );
12 | 
13 |   const observations = await stagehand.page.observe(
14 |     "click the 'about us' link",
15 |   );
16 | 
17 |   if (observations.length === 0) {
18 |     await stagehand.close();
19 |     return {
20 |       _success: false,
21 |       observations,
22 |       debugUrl,
23 |       sessionUrl,
24 |       logs: logger.getLogs(),
25 |     };
26 |   }
27 | 
28 |   const expectedLocator = `#menu > li:nth-child(1) > a`;
29 | 
30 |   const expectedResult = await stagehand.page
31 |     .locator(expectedLocator)
32 |     .first()
33 |     .innerText();
34 | 
35 |   let foundMatch = false;
36 |   for (const observation of observations) {
37 |     try {
38 |       const observationResult = await stagehand.page
39 |         .locator(observation.selector)
40 |         .first()
41 |         .innerText();
42 | 
43 |       if (observationResult === expectedResult) {
44 |         foundMatch = true;
45 |         break;
46 |       }
47 |     } catch (error) {
48 |       console.warn(
49 |         `Failed to check observation with selector ${observation.selector}:`,
50 |         error.message,
51 |       );
52 |       continue;
53 |     }
54 |   }
55 | 
56 |   await stagehand.close();
57 | 
58 |   return {
59 |     _success: foundMatch,
60 |     expected: expectedResult,
61 |     observations,
62 |     debugUrl,
63 |     sessionUrl,
64 |     logs: logger.getLogs(),
65 |   };
66 | };
67 | 


--------------------------------------------------------------------------------
/evals/tasks/peeler_complex.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | import { z } from "zod";
 3 | 
 4 | export const peeler_complex: EvalFunction = async ({
 5 |   debugUrl,
 6 |   sessionUrl,
 7 |   stagehand,
 8 |   logger,
 9 |   useTextExtract,
10 | }) => {
11 |   try {
12 |     await stagehand.page.goto(`https://chefstoys.com/`, { timeout: 60000 });
13 |     await stagehand.page.waitForLoadState("networkidle");
14 | 
15 |     await stagehand.page.act("find the button to close the popup");
16 |     await stagehand.page.act({
17 |       action: "search for %search_query%",
18 |       variables: {
19 |         search_query: "peeler",
20 |       },
21 |     });
22 | 
23 |     await stagehand.page.act({
24 |       action: 'click on the first "OXO" brand peeler',
25 |     });
26 | 
27 |     const { price } = await stagehand.page.extract({
28 |       instruction: "get the price of the peeler",
29 |       schema: z.object({ price: z.number().nullable() }),
30 |       useTextExtract,
31 |     });
32 | 
33 |     await stagehand.close();
34 | 
35 |     return {
36 |       _success: price === 11.99,
37 |       price,
38 |       debugUrl,
39 |       sessionUrl,
40 |       logs: logger.getLogs(),
41 |     };
42 |   } catch (error) {
43 |     logger.error({
44 |       message: "error in peeler_complex function",
45 |       level: 0,
46 |       auxiliary: {
47 |         error: {
48 |           value: JSON.stringify(error, null, 2),
49 |           type: "object",
50 |         },
51 |         trace: {
52 |           value: error.stack,
53 |           type: "string",
54 |         },
55 |       },
56 |     });
57 | 
58 |     await stagehand.close();
59 | 
60 |     return {
61 |       _success: false,
62 |       error: JSON.parse(JSON.stringify(error, null, 2)),
63 |       debugUrl,
64 |       sessionUrl,
65 |       logs: logger.getLogs(),
66 |     };
67 |   }
68 | };
69 | 


--------------------------------------------------------------------------------
/evals/tasks/peeler_simple.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | import { StagehandEnvironmentError } from "@/types/stagehandErrors";
 3 | 
 4 | const env: "BROWSERBASE" | "LOCAL" =
 5 |   process.env.EVAL_ENV?.toLowerCase() === "browserbase"
 6 |     ? "BROWSERBASE"
 7 |     : "LOCAL";
 8 | 
 9 | export const peeler_simple: EvalFunction = async ({
10 |   debugUrl,
11 |   sessionUrl,
12 |   stagehand,
13 |   logger,
14 | }) => {
15 |   if (env === "BROWSERBASE") {
16 |     throw new StagehandEnvironmentError(
17 |       "BROWSERBASE",
18 |       "LOCAL",
19 |       "peeler_simple eval",
20 |     );
21 |   }
22 | 
23 |   await stagehand.page.goto(`file://${process.cwd()}/evals/assets/peeler.html`);
24 |   await stagehand.page.act({ action: "add the peeler to cart" });
25 | 
26 |   const successMessageLocator = stagehand.page.locator(
27 |     'text="Congratulations, you have 1 A in your cart"',
28 |   );
29 |   const isVisible = await successMessageLocator.isVisible();
30 | 
31 |   await stagehand.close();
32 | 
33 |   return {
34 |     _success: isVisible,
35 |     debugUrl,
36 |     sessionUrl,
37 |     logs: logger.getLogs(),
38 |   };
39 | };
40 | 


--------------------------------------------------------------------------------
/evals/tasks/prevChunk.ts:
--------------------------------------------------------------------------------
 1 | import { Stagehand } from "@/dist";
 2 | import { EvalFunction } from "@/types/evals";
 3 | 
 4 | export const prevChunk: EvalFunction = async ({
 5 |   logger,
 6 |   stagehandConfig,
 7 |   debugUrl,
 8 |   sessionUrl,
 9 | }) => {
10 |   const stagehand = new Stagehand({
11 |     ...stagehandConfig,
12 |     domSettleTimeoutMs: 3000,
13 |   });
14 |   await stagehand.init();
15 | 
16 |   await stagehand.page.goto(
17 |     "https://browserbase.github.io/stagehand-eval-sites/sites/aigrant/",
18 |   );
19 |   await new Promise((resolve) => setTimeout(resolve, 2000));
20 |   const { initialScrollTop, chunkHeight } = await stagehand.page.evaluate(
21 |     () => {
22 |       const halfPage = document.body.scrollHeight / 2;
23 | 
24 |       window.scrollTo({
25 |         top: halfPage,
26 |         left: 0,
27 |         behavior: "instant",
28 |       });
29 | 
30 |       const chunk = window.innerHeight;
31 | 
32 |       return {
33 |         initialScrollTop: window.scrollY,
34 |         chunkHeight: chunk,
35 |       };
36 |     },
37 |   );
38 |   await new Promise((resolve) => setTimeout(resolve, 2000));
39 |   await stagehand.page.act({
40 |     action: "scroll up one chunk",
41 |   });
42 | 
43 |   await new Promise((resolve) => setTimeout(resolve, 5000));
44 | 
45 |   const finalScrollTop = await stagehand.page.evaluate(() => window.scrollY);
46 | 
47 |   await stagehand.close();
48 | 
49 |   const actualDiff = initialScrollTop - finalScrollTop;
50 |   const threshold = 20; // px tolerance
51 |   const scrolledOneChunk = Math.abs(actualDiff - chunkHeight) <= threshold;
52 | 
53 |   const evaluationResult = scrolledOneChunk
54 |     ? {
55 |         _success: true,
56 |         logs: logger.getLogs(),
57 |         debugUrl,
58 |         sessionUrl,
59 |         message: `Successfully scrolled ~one chunk UP: expected ~${chunkHeight}, got ${actualDiff}.`,
60 |       }
61 |     : {
62 |         _success: false,
63 |         logs: logger.getLogs(),
64 |         debugUrl,
65 |         sessionUrl,
66 |         message: `Scroll difference expected ~${chunkHeight} but only scrolled ${actualDiff}.`,
67 |       };
68 | 
69 |   return evaluationResult;
70 | };
71 | 


--------------------------------------------------------------------------------
/evals/tasks/radio_btn.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | 
 3 | export const radio_btn: EvalFunction = async ({
 4 |   debugUrl,
 5 |   sessionUrl,
 6 |   stagehand,
 7 |   logger,
 8 | }) => {
 9 |   await stagehand.page.goto(
10 |     "https://browserbase.github.io/stagehand-eval-sites/sites/paneer-pizza/",
11 |   );
12 | 
13 |   await stagehand.page.act({
14 |     action: "click the 'medium' option",
15 |   });
16 | 
17 |   // confirm that the Medium radio is now checked
18 |   const radioBtnClicked = await stagehand.page
19 |     .locator('input[type="radio"][name="Pizza"][value="Medium"]')
20 |     .isChecked();
21 | 
22 |   await stagehand.close();
23 | 
24 |   return {
25 |     _success: radioBtnClicked,
26 |     debugUrl,
27 |     sessionUrl,
28 |     logs: logger.getLogs(),
29 |   };
30 | };
31 | 


--------------------------------------------------------------------------------
/evals/tasks/rakuten_jp.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | 
 3 | export const rakuten_jp: EvalFunction = async ({
 4 |   debugUrl,
 5 |   sessionUrl,
 6 |   stagehand,
 7 |   logger,
 8 | }) => {
 9 |   await stagehand.page.goto("https://www.rakuten.co.jp/");
10 |   await stagehand.page.act({ action: "click on online supermarket" });
11 | 
12 |   await stagehand.page.act({ action: "if there is a popup, close it" });
13 | 
14 |   await stagehand.page.act({
15 |     action: "navigate to Inageya Online Supermarket",
16 |   });
17 |   await stagehand.page.act({ action: "click the search bar input" });
18 |   await stagehand.page.act({ action: "search for '香菜'" });
19 | 
20 |   const url = stagehand.page.url();
21 |   const successUrl =
22 |     "https://netsuper.rakuten.co.jp/inageya/search/?keyword=%E9%A6%99%E8%8F%9C";
23 | 
24 |   await stagehand.close();
25 | 
26 |   return {
27 |     _success: url === successUrl,
28 |     debugUrl,
29 |     sessionUrl,
30 |     logs: logger.getLogs(),
31 |   };
32 | };
33 | 


--------------------------------------------------------------------------------
/evals/tasks/sciquest.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | import { z } from "zod";
 3 | 
 4 | export const sciquest: EvalFunction = async ({
 5 |   debugUrl,
 6 |   sessionUrl,
 7 |   stagehand,
 8 |   logger,
 9 |   useTextExtract,
10 | }) => {
11 |   await stagehand.page.goto(
12 |     "https://bids.sciquest.com/apps/Router/PublicEvent?tab=PHX_NAV_SourcingAllOpps&CustomerOrg=StateOfUtah",
13 |   );
14 | 
15 |   await stagehand.page.act({
16 |     action: 'Click on the "Closed" tab',
17 |   });
18 | 
19 |   const result = await stagehand.page.extract({
20 |     instruction:
21 |       "Extract the total number of results that the search produced. Not the number of results displayed on the page.",
22 |     schema: z.object({
23 |       total_results: z.string(),
24 |     }),
25 |     useTextExtract,
26 |   });
27 | 
28 |   await stagehand.close();
29 | 
30 |   const { total_results } = result;
31 | 
32 |   const expectedNumber = 12637;
33 |   const extractedNumber = parseInt(total_results.replace(/[^\d]/g, ""), 10);
34 | 
35 |   const isWithinRange =
36 |     extractedNumber >= expectedNumber - 1000 &&
37 |     extractedNumber <= expectedNumber + 1000;
38 | 
39 |   if (!isWithinRange) {
40 |     logger.error({
41 |       message: "Total number of results is not within the expected range",
42 |       level: 0,
43 |       auxiliary: {
44 |         expected: {
45 |           value: `${expectedNumber} ± 1000`,
46 |           type: "string",
47 |         },
48 |         actual: {
49 |           value: extractedNumber.toString(),
50 |           type: "integer",
51 |         },
52 |       },
53 |     });
54 |     return {
55 |       _success: false,
56 |       error: "Total number of results is not within the expected range",
57 |       extractedNumber,
58 |       debugUrl,
59 |       sessionUrl,
60 |       logs: logger.getLogs(),
61 |     };
62 |   }
63 | 
64 |   return {
65 |     _success: true,
66 |     extractedNumber,
67 |     debugUrl,
68 |     sessionUrl,
69 |     logs: logger.getLogs(),
70 |   };
71 | };
72 | 


--------------------------------------------------------------------------------
/evals/tasks/scroll_50.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | 
 3 | export const scroll_50: EvalFunction = async ({
 4 |   debugUrl,
 5 |   sessionUrl,
 6 |   stagehand,
 7 |   logger,
 8 | }) => {
 9 |   await stagehand.page.goto(
10 |     "https://browserbase.github.io/stagehand-eval-sites/sites/aigrant/",
11 |   );
12 |   await stagehand.page.act({
13 |     action: "Scroll 50% down the page",
14 |   });
15 | 
16 |   await new Promise((resolve) => setTimeout(resolve, 5000));
17 | 
18 |   // Get the current scroll position and total scroll height
19 |   const scrollInfo = await stagehand.page.evaluate(() => {
20 |     return {
21 |       scrollTop: window.scrollY + window.innerHeight / 2,
22 |       scrollHeight: document.documentElement.scrollHeight,
23 |     };
24 |   });
25 | 
26 |   await stagehand.close();
27 | 
28 |   const halfwayScroll = scrollInfo.scrollHeight / 2;
29 |   const halfwayReached = Math.abs(scrollInfo.scrollTop - halfwayScroll) <= 200;
30 |   const evaluationResult = halfwayReached
31 |     ? {
32 |         _success: true,
33 |         logs: logger.getLogs(),
34 |         debugUrl,
35 |         sessionUrl,
36 |       }
37 |     : {
38 |         _success: false,
39 |         logs: logger.getLogs(),
40 |         debugUrl,
41 |         sessionUrl,
42 |         message: `Scroll position (${scrollInfo.scrollTop}px) is not halfway down the page (${halfwayScroll}px).`,
43 |       };
44 | 
45 |   return evaluationResult;
46 | };
47 | 


--------------------------------------------------------------------------------
/evals/tasks/scroll_75.ts:
--------------------------------------------------------------------------------
 1 | import { Stagehand } from "@/dist";
 2 | import { EvalFunction } from "@/types/evals";
 3 | 
 4 | export const scroll_75: EvalFunction = async ({
 5 |   logger,
 6 |   stagehandConfig,
 7 |   debugUrl,
 8 |   sessionUrl,
 9 | }) => {
10 |   const stagehand = new Stagehand({
11 |     ...stagehandConfig,
12 |     domSettleTimeoutMs: 3000,
13 |   });
14 |   await stagehand.init();
15 | 
16 |   await stagehand.page.goto(
17 |     "https://browserbase.github.io/stagehand-eval-sites/sites/aigrant/",
18 |   );
19 |   await stagehand.page.act({
20 |     action: "Scroll 75% down the page",
21 |   });
22 | 
23 |   await new Promise((resolve) => setTimeout(resolve, 5000));
24 | 
25 |   // Get the current scroll position and total scroll height
26 |   const scrollInfo = await stagehand.page.evaluate(() => {
27 |     return {
28 |       scrollTop: window.scrollY + window.innerHeight * 0.75,
29 |       scrollHeight: document.documentElement.scrollHeight,
30 |     };
31 |   });
32 | 
33 |   await stagehand.close();
34 | 
35 |   const threeQuartersScroll = scrollInfo.scrollHeight * 0.75;
36 |   const threeQuartersReached =
37 |     Math.abs(scrollInfo.scrollTop - threeQuartersScroll) <= 200;
38 |   const evaluationResult = threeQuartersReached
39 |     ? {
40 |         _success: true,
41 |         logs: logger.getLogs(),
42 |         debugUrl,
43 |         sessionUrl,
44 |       }
45 |     : {
46 |         _success: false,
47 |         logs: logger.getLogs(),
48 |         debugUrl,
49 |         sessionUrl,
50 |         message: `Scroll position (${scrollInfo.scrollTop}px) is not three quarters down the page (${threeQuartersScroll}px).`,
51 |       };
52 | 
53 |   return evaluationResult;
54 | };
55 | 


--------------------------------------------------------------------------------
/evals/tasks/simple_google_search.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | 
 3 | export const simple_google_search: EvalFunction = async ({
 4 |   debugUrl,
 5 |   sessionUrl,
 6 |   stagehand,
 7 |   logger,
 8 | }) => {
 9 |   await stagehand.page.goto("https://www.google.com");
10 | 
11 |   await stagehand.page.act({
12 |     action: 'type "OpenAI" into the search bar',
13 |   });
14 | 
15 |   await stagehand.page.act("click the search button");
16 | 
17 |   const expectedUrl = "https://www.google.com/search?q=OpenAI";
18 |   const currentUrl = stagehand.page.url();
19 | 
20 |   await stagehand.close();
21 | 
22 |   return {
23 |     _success: currentUrl.startsWith(expectedUrl),
24 |     currentUrl,
25 |     debugUrl,
26 |     sessionUrl,
27 |     logs: logger.getLogs(),
28 |   };
29 | };
30 | 


--------------------------------------------------------------------------------
/evals/tasks/stock_x.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | 
 3 | export const stock_x: EvalFunction = async ({
 4 |   debugUrl,
 5 |   sessionUrl,
 6 |   stagehand,
 7 |   logger,
 8 | }) => {
 9 |   await stagehand.page.goto(
10 |     "https://stockx.com/air-jordan-3-retro-black-cement-2024",
11 |   );
12 | 
13 |   await stagehand.page.waitForTimeout(3000);
14 | 
15 |   await stagehand.page.act({
16 |     action: "click on Jordan 3 Retro Crimson in the related products",
17 |   });
18 | 
19 |   await stagehand.page.waitForTimeout(2000);
20 |   const currentUrl = stagehand.page.url();
21 |   const expectedUrlPrefix = "https://stockx.com/jordan-3-retro-crimson";
22 | 
23 |   await stagehand.close();
24 | 
25 |   return {
26 |     _success: currentUrl.startsWith(expectedUrlPrefix),
27 |     currentUrl,
28 |     debugUrl,
29 |     sessionUrl,
30 |     logs: logger.getLogs(),
31 |   };
32 | };
33 | 


--------------------------------------------------------------------------------
/evals/tasks/vanta_h.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | 
 3 | export const vanta_h: EvalFunction = async ({
 4 |   debugUrl,
 5 |   sessionUrl,
 6 |   stagehand,
 7 |   logger,
 8 | }) => {
 9 |   await stagehand.page.goto("https://www.vanta.com/");
10 | 
11 |   const observations = await stagehand.page.observe(
12 |     "click the buy now button if it is available",
13 |   );
14 | 
15 |   await stagehand.close();
16 | 
17 |   // we should have no saved observation since the element shouldn't exist
18 |   return {
19 |     _success: observations.length === 0,
20 |     observations,
21 |     debugUrl,
22 |     sessionUrl,
23 |     logs: logger.getLogs(),
24 |   };
25 | };
26 | 


--------------------------------------------------------------------------------
/evals/tasks/vantechjournal.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | 
 3 | export const vantechjournal: EvalFunction = async ({
 4 |   debugUrl,
 5 |   sessionUrl,
 6 |   stagehand,
 7 |   logger,
 8 | }) => {
 9 |   await stagehand.page.goto("https://vantechjournal.com/");
10 | 
11 |   await stagehand.page.act({
12 |     action: "click on page 8. do not click the next button",
13 |   });
14 | 
15 |   const expectedUrl = "https://vantechjournal.com/archive?page=8";
16 |   const currentUrl = stagehand.page.url();
17 | 
18 |   await stagehand.close();
19 | 
20 |   return {
21 |     _success: currentUrl === expectedUrl,
22 |     currentUrl,
23 |     expectedUrl,
24 |     debugUrl,
25 |     sessionUrl,
26 |     logs: logger.getLogs(),
27 |   };
28 | };
29 | 


--------------------------------------------------------------------------------
/evals/tasks/wichita.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | import { z } from "zod";
 3 | 
 4 | export const wichita: EvalFunction = async ({
 5 |   debugUrl,
 6 |   sessionUrl,
 7 |   stagehand,
 8 |   logger,
 9 |   useTextExtract,
10 | }) => {
11 |   await stagehand.page.goto("https://www.wichitafallstx.gov/Bids.aspx");
12 | 
13 |   await stagehand.page.act({
14 |     action: 'Click on "Show Closed/Awarded/Cancelled bids"',
15 |   });
16 | 
17 |   const result = await stagehand.page.extract({
18 |     instruction: "Extract the total number of bids that the search produced.",
19 |     schema: z.object({
20 |       total_results: z.string(),
21 |     }),
22 |     useTextExtract,
23 |   });
24 | 
25 |   await stagehand.close();
26 | 
27 |   const { total_results } = result;
28 | 
29 |   const expectedNumber = 405;
30 |   const extractedNumber = parseInt(total_results.replace(/[^\d]/g, ""), 10);
31 | 
32 |   const isWithinRange =
33 |     extractedNumber >= expectedNumber - 10 &&
34 |     extractedNumber <= expectedNumber + 10;
35 | 
36 |   if (!isWithinRange) {
37 |     logger.error({
38 |       message: "Total number of results is not within the expected range",
39 |       level: 0,
40 |       auxiliary: {
41 |         expected: {
42 |           value: `${expectedNumber} ± 10`,
43 |           type: "string",
44 |         },
45 |         actual: {
46 |           value: extractedNumber.toString(),
47 |           type: "integer",
48 |         },
49 |       },
50 |     });
51 |     return {
52 |       _success: false,
53 |       error: "Total number of results is not within the expected range",
54 |       extractedNumber,
55 |       debugUrl,
56 |       sessionUrl,
57 |       logs: logger.getLogs(),
58 |     };
59 |   }
60 | 
61 |   return {
62 |     _success: true,
63 |     extractedNumber,
64 |     debugUrl,
65 |     sessionUrl,
66 |     logs: logger.getLogs(),
67 |   };
68 | };
69 | 


--------------------------------------------------------------------------------
/evals/tasks/wikipedia.ts:
--------------------------------------------------------------------------------
 1 | import { EvalFunction } from "@/types/evals";
 2 | 
 3 | export const wikipedia: EvalFunction = async ({
 4 |   debugUrl,
 5 |   sessionUrl,
 6 |   stagehand,
 7 |   logger,
 8 | }) => {
 9 |   await stagehand.page.goto(`https://en.wikipedia.org/wiki/Baseball`);
10 |   await stagehand.page.act({
11 |     action: 'click the "hit and run" link in this article',
12 |     timeoutMs: 360_000,
13 |   });
14 | 
15 |   const url = "https://en.wikipedia.org/wiki/Hit_and_run_(baseball)";
16 |   const currentUrl = stagehand.page.url();
17 | 
18 |   await stagehand.close();
19 | 
20 |   return {
21 |     _success: currentUrl === url,
22 |     expected: url,
23 |     actual: currentUrl,
24 |     debugUrl,
25 |     sessionUrl,
26 |     logs: logger.getLogs(),
27 |   };
28 | };
29 | 


--------------------------------------------------------------------------------
/examples/actionable_observe_example.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * This file is meant to be used as a scratchpad for trying out actionable observe.
 3 |  * To create a Stagehand project with best practices and configuration, run:
 4 |  *
 5 |  * npx create-browser-app@latest my-browser-app
 6 |  */
 7 | 
 8 | import { Stagehand } from "@/dist";
 9 | import stagehandConfig from "@/stagehand.config";
10 | 
11 | async function example() {
12 |   const stagehand = new Stagehand(stagehandConfig);
13 |   await stagehand.init();
14 |   await stagehand.page.goto("https://www.apartments.com/san-francisco-ca/");
15 | 
16 |   await new Promise((resolve) => setTimeout(resolve, 3000));
17 |   const observations1 = await stagehand.page.observe({
18 |     instruction: "find the 'all filters' button",
19 |   });
20 |   await stagehand.page.act(observations1[0]);
21 | 
22 |   await new Promise((resolve) => setTimeout(resolve, 3000));
23 |   const observations2 = await stagehand.page.observe({
24 |     instruction: "find the '1+' button in the 'beds' section",
25 |   });
26 |   await stagehand.page.act(observations2[0]);
27 | 
28 |   await new Promise((resolve) => setTimeout(resolve, 3000));
29 |   const observations3 = await stagehand.page.observe({
30 |     instruction: "find the 'apartments' button in the 'home type' section",
31 |   });
32 |   await stagehand.page.act(observations3[0]);
33 | 
34 |   await new Promise((resolve) => setTimeout(resolve, 3000));
35 |   const observations4 = await stagehand.page.observe({
36 |     instruction: "find the pet policy dropdown to click on.",
37 |   });
38 |   await stagehand.page.act(observations4[0]);
39 | 
40 |   await new Promise((resolve) => setTimeout(resolve, 3000));
41 |   const observations5 = await stagehand.page.observe({
42 |     instruction: "find the 'Dog Friendly' option to click on",
43 |   });
44 |   await stagehand.page.act(observations5[0]);
45 | 
46 |   await new Promise((resolve) => setTimeout(resolve, 3000));
47 |   const observations6 = await stagehand.page.observe({
48 |     instruction: "find the 'see results' section",
49 |   });
50 |   await stagehand.page.act(observations6[0]);
51 | 
52 |   const currentUrl = await stagehand.page.url();
53 |   await stagehand.close();
54 |   if (
55 |     currentUrl.includes(
56 |       "https://www.apartments.com/apartments/san-francisco-ca/min-1-bedrooms-pet-friendly-dog/",
57 |     )
58 |   ) {
59 |     console.log("✅ Success! we made it to the correct page");
60 |   } else {
61 |     console.log(
62 |       "❌ Whoops, looks like we didn't make it to the correct page. " +
63 |         "\nThanks for testing out this new Stagehand feature!" +
64 |         "\nReach us on Slack if you have any feedback/questions/suggestions!",
65 |     );
66 |   }
67 | }
68 | 
69 | (async () => {
70 |   await example();
71 | })();
72 | 


--------------------------------------------------------------------------------
/examples/ai_sdk_example.ts:
--------------------------------------------------------------------------------
 1 | import { openai } from "@ai-sdk/openai";
 2 | import { Stagehand } from "@/dist";
 3 | import { AISdkClient } from "./external_clients/aisdk";
 4 | import StagehandConfig from "@/stagehand.config";
 5 | import { z } from "zod";
 6 | 
 7 | async function example() {
 8 |   const stagehand = new Stagehand({
 9 |     ...StagehandConfig,
10 |     llmClient: new AISdkClient({
11 |       model: openai("gpt-4o"),
12 |     }),
13 |   });
14 | 
15 |   await stagehand.init();
16 |   await stagehand.page.goto("https://news.ycombinator.com");
17 | 
18 |   const { story } = await stagehand.page.extract({
19 |     instruction: "extract the title of the top story on the page",
20 |     schema: z.object({
21 |       story: z.string().describe("the top story on the page"),
22 |     }),
23 |   });
24 | 
25 |   console.log("The top story is:", story);
26 | 
27 |   await stagehand.page.act("click the first story");
28 | 
29 |   await stagehand.close();
30 | }
31 | 
32 | (async () => {
33 |   await example();
34 | })();
35 | 


--------------------------------------------------------------------------------
/examples/debugUrl.ts:
--------------------------------------------------------------------------------
 1 | import { Stagehand } from "@/dist";
 2 | 
 3 | async function debug(url: string) {
 4 |   const stagehand = new Stagehand({
 5 |     env: "LOCAL",
 6 |     verbose: 2,
 7 |     localBrowserLaunchOptions: {
 8 |       headless: true,
 9 |     },
10 |   });
11 |   await stagehand.init();
12 |   await stagehand.page.goto(url);
13 | }
14 | 
15 | (async () => {
16 |   const url = process.argv.find((arg) => arg.startsWith("--url="));
17 |   if (!url) {
18 |     console.error("No URL flag provided. Usage: --url=https://example.com");
19 |     process.exit(1);
20 |   }
21 |   const targetUrl = url.split("=")[1];
22 |   console.log(`Navigating to: ${targetUrl}`);
23 |   await debug(targetUrl);
24 | })();
25 | 


--------------------------------------------------------------------------------
/examples/example.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * This file is meant to be used as a scratchpad for developing new evals.
 3 |  * To create a Stagehand project with best practices and configuration, run:
 4 |  *
 5 |  * npx create-browser-app@latest my-browser-app
 6 |  */
 7 | 
 8 | import { Stagehand } from "@/dist";
 9 | import StagehandConfig from "@/stagehand.config";
10 | 
11 | async function example() {
12 |   const stagehand = new Stagehand({
13 |     ...StagehandConfig,
14 |   });
15 |   await stagehand.init();
16 |   await stagehand.page.goto("https://docs.stagehand.dev");
17 |   /**
18 |    * Add your code here!
19 |    */
20 |   await stagehand.close();
21 | }
22 | 
23 | (async () => {
24 |   await example();
25 | })();
26 | 


--------------------------------------------------------------------------------
/examples/external_client.ts:
--------------------------------------------------------------------------------
 1 | import { Stagehand } from "@/dist";
 2 | import { z } from "zod";
 3 | import { CustomOpenAIClient } from "./external_clients/customOpenAI";
 4 | import StagehandConfig from "@/stagehand.config";
 5 | import OpenAI from "openai";
 6 | 
 7 | async function example() {
 8 |   const stagehand = new Stagehand({
 9 |     ...StagehandConfig,
10 |     llmClient: new CustomOpenAIClient({
11 |       modelName: "gpt-4o-mini",
12 |       client: new OpenAI({
13 |         apiKey: process.env.OPENAI_API_KEY,
14 |       }),
15 |     }),
16 |   });
17 | 
18 |   await stagehand.init();
19 |   await stagehand.page.goto("https://news.ycombinator.com");
20 |   await stagehand.page.act("click on the 'new' link");
21 | 
22 |   const headlines = await stagehand.page.extract({
23 |     instruction: "Extract the top 3 stories from the Hacker News homepage.",
24 |     schema: z.object({
25 |       stories: z.array(
26 |         z.object({
27 |           title: z.string(),
28 |           url: z.string(),
29 |           points: z.number(),
30 |         }),
31 |       ),
32 |     }),
33 |   });
34 | 
35 |   console.log(headlines);
36 | 
37 |   await stagehand.close();
38 | }
39 | 
40 | (async () => {
41 |   await example();
42 | })();
43 | 


--------------------------------------------------------------------------------
/examples/external_clients/langchain.ts:
--------------------------------------------------------------------------------
 1 | import { BaseChatModel } from "@langchain/core/language_models/chat_models";
 2 | import { CreateChatCompletionOptions, LLMClient, AvailableModel } from "@/dist";
 3 | import { zodToJsonSchema } from "zod-to-json-schema";
 4 | import {
 5 |   AIMessage,
 6 |   BaseMessageLike,
 7 |   HumanMessage,
 8 |   SystemMessage,
 9 | } from "@langchain/core/messages";
10 | import { ChatCompletion } from "openai/resources";
11 | 
12 | export class LangchainClient extends LLMClient {
13 |   public type = "langchainClient" as const;
14 |   private model: BaseChatModel;
15 | 
16 |   constructor(model: BaseChatModel) {
17 |     super(model.name as AvailableModel);
18 |     this.model = model;
19 |   }
20 | 
21 |   async createChatCompletion<T = ChatCompletion>({
22 |     options,
23 |   }: CreateChatCompletionOptions): Promise<T> {
24 |     const formattedMessages: BaseMessageLike[] = options.messages.map(
25 |       (message) => {
26 |         if (Array.isArray(message.content)) {
27 |           if (message.role === "system") {
28 |             return new SystemMessage(
29 |               message.content
30 |                 .map((c) => ("text" in c ? c.text : ""))
31 |                 .join("\n"),
32 |             );
33 |           }
34 | 
35 |           const content = message.content.map((content) =>
36 |             "image_url" in content
37 |               ? { type: "image", image: content.image_url.url }
38 |               : { type: "text", text: content.text },
39 |           );
40 | 
41 |           if (message.role === "user") return new HumanMessage({ content });
42 | 
43 |           const textOnlyParts = content.map((part) => ({
44 |             type: "text" as const,
45 |             text: part.type === "image" ? "[Image]" : part.text,
46 |           }));
47 | 
48 |           return new AIMessage({ content: textOnlyParts });
49 |         }
50 | 
51 |         return {
52 |           role: message.role,
53 |           content: message.content,
54 |         };
55 |       },
56 |     );
57 | 
58 |     if (options.response_model) {
59 |       const responseSchema = zodToJsonSchema(options.response_model.schema, {
60 |         $refStrategy: "none",
61 |       });
62 |       const structuredModel = this.model.withStructuredOutput(responseSchema);
63 |       const response = await structuredModel.invoke(formattedMessages);
64 | 
65 |       return {
66 |         data: response,
67 |         usage: {
68 |           prompt_tokens: 0, // Langchain doesn't provide token counts by default
69 |           completion_tokens: 0,
70 |           total_tokens: 0,
71 |         },
72 |       } as T;
73 |     }
74 | 
75 |     const modelWithTools = this.model.bindTools(options.tools);
76 |     const response = await modelWithTools.invoke(formattedMessages);
77 | 
78 |     return {
79 |       data: response,
80 |       usage: {
81 |         prompt_tokens: 0, // Langchain doesn't provide token counts by default
82 |         completion_tokens: 0,
83 |         total_tokens: 0,
84 |       },
85 |     } as T;
86 |   }
87 | }
88 | 


--------------------------------------------------------------------------------
/examples/google_enter.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * This file is meant to be used as a scratchpad for developing new evals.
 3 |  * To create a Stagehand project with best practices and configuration, run:
 4 |  *
 5 |  * npx create-browser-app@latest my-browser-app
 6 |  */
 7 | 
 8 | import { Stagehand } from "@/dist";
 9 | import StagehandConfig from "@/stagehand.config";
10 | 
11 | async function example() {
12 |   const stagehand = new Stagehand({
13 |     ...StagehandConfig,
14 |   });
15 |   await stagehand.init();
16 |   const page = stagehand.page;
17 |   await page.goto("https://google.com");
18 |   await page.act("type in 'Browserbase'");
19 |   await page.act("press enter");
20 |   await stagehand.close();
21 | }
22 | 
23 | (async () => {
24 |   await example();
25 | })();
26 | 


--------------------------------------------------------------------------------
/examples/instructions.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * This example shows how to use custom instructions with Stagehand.
 3 |  */
 4 | import { Stagehand } from "@/dist";
 5 | import StagehandConfig from "@/stagehand.config";
 6 | 
 7 | async function example() {
 8 |   const stagehand = new Stagehand({
 9 |     ...StagehandConfig,
10 |     systemPrompt:
11 |       "if the users says `secret12345`, click on the 'getting started' tab. additionally, if the user says to type something, translate their input into french and type it.",
12 |   });
13 |   await stagehand.init();
14 | 
15 |   const page = stagehand.page;
16 | 
17 |   await page.goto("https://docs.browserbase.com/");
18 | 
19 |   await page.act({
20 |     action: "secret12345",
21 |   });
22 | 
23 |   await page.act({
24 |     action: "search for 'how to use browserbase'",
25 |   });
26 | 
27 |   await stagehand.close();
28 | }
29 | 
30 | (async () => {
31 |   await example();
32 | })();
33 | 


--------------------------------------------------------------------------------
/examples/langchain.ts:
--------------------------------------------------------------------------------
 1 | import { z } from "zod";
 2 | import { Stagehand } from "@/dist";
 3 | import StagehandConfig from "@/stagehand.config";
 4 | import { LangchainClient } from "./external_clients/langchain";
 5 | import { ChatOpenAI } from "@langchain/openai";
 6 | 
 7 | async function example() {
 8 |   const stagehand = new Stagehand({
 9 |     ...StagehandConfig,
10 |     llmClient: new LangchainClient(
11 |       new ChatOpenAI({
12 |         model: "gpt-4o",
13 |       }),
14 |     ),
15 |   });
16 | 
17 |   await stagehand.init();
18 |   await stagehand.page.goto("https://news.ycombinator.com");
19 | 
20 |   const { story } = await stagehand.page.extract({
21 |     schema: z.object({
22 |       story: z.string().describe("the top story on the page"),
23 |     }),
24 |   });
25 | 
26 |   console.log("The top story is:", story);
27 | 
28 |   await stagehand.page.act("click the first story");
29 | 
30 |   await stagehand.close();
31 | }
32 | 
33 | (async () => {
34 |   await example();
35 | })();
36 | 


--------------------------------------------------------------------------------
/examples/operator-example.ts:
--------------------------------------------------------------------------------
 1 | import { LogLine, Stagehand } from "@/dist";
 2 | import dotenv from "dotenv";
 3 | import StagehandConfig from "@/stagehand.config";
 4 | import chalk from "chalk";
 5 | 
 6 | // Load environment variables
 7 | dotenv.config();
 8 | 
 9 | const INSTRUCTION =
10 |   "Go to Google Japan and interact with it in Japanese. Tell me (in English) an authentic recipe that I can make with ingredients found in American grocery stores.";
11 | 
12 | async function main() {
13 |   console.log(`\n${chalk.bold("Stagehand 🤘 Operator Example")}\n`);
14 | 
15 |   // Initialize Stagehand
16 |   const stagehand = new Stagehand({
17 |     ...StagehandConfig,
18 |     logger: ({ level, message, timestamp }: LogLine) => {
19 |       console.log({ level, message, timestamp });
20 |     },
21 |   });
22 | 
23 |   await stagehand.init();
24 | 
25 |   try {
26 |     const agent = stagehand.agent();
27 | 
28 |     // Execute the agent
29 |     console.log(`${chalk.cyan("↳")} Instruction: ${INSTRUCTION}`);
30 | 
31 |     const result = await agent.execute({
32 |       instruction: INSTRUCTION,
33 |       maxSteps: 20,
34 |     });
35 | 
36 |     console.log(`${chalk.green("✓")} Execution complete`);
37 |     console.log(`${chalk.yellow("⤷")} Result:`);
38 |     console.log(JSON.stringify(result, null, 2));
39 |     console.log(chalk.white(result.message));
40 |   } catch (error) {
41 |     console.log(`${chalk.red("✗")} Error: ${error}`);
42 |   } finally {
43 |     await stagehand.close();
44 |   }
45 | }
46 | 
47 | main();
48 | 


--------------------------------------------------------------------------------
/examples/parameterizeApiKey.ts:
--------------------------------------------------------------------------------
 1 | import { Stagehand } from "@/dist";
 2 | import { z } from "zod";
 3 | 
 4 | /**
 5 |  * This example shows how to parameterize the API key for the LLM provider.
 6 |  *
 7 |  * In order to best demonstrate, unset the OPENAI_API_KEY environment variable and
 8 |  * set the USE_OPENAI_API_KEY environment variable to your OpenAI API key.
 9 |  *
10 |  * export USE_OPENAI_API_KEY=$OPENAI_API_KEY
11 |  * unset OPENAI_API_KEY
12 |  */
13 | 
14 | async function example() {
15 |   const stagehand = new Stagehand({
16 |     env: "LOCAL",
17 |     verbose: 1,
18 |     enableCaching: false,
19 |     modelName: "gpt-4o",
20 |     modelClientOptions: {
21 |       apiKey: process.env.USE_OPENAI_API_KEY,
22 |     },
23 |   });
24 | 
25 |   await stagehand.init();
26 |   await stagehand.page.goto("https://github.com/browserbase/stagehand");
27 |   await stagehand.page.act({ action: "click on the contributors" });
28 |   const contributor = await stagehand.page.extract({
29 |     instruction: "extract the top contributor",
30 |     schema: z.object({
31 |       username: z.string(),
32 |       url: z.string(),
33 |     }),
34 |   });
35 |   console.log(`Our favorite contributor is ${contributor.username}`);
36 | }
37 | 
38 | (async () => {
39 |   await example();
40 | })();
41 | 


--------------------------------------------------------------------------------
/examples/popup.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * This file is meant to be used as a scratchpad for developing new evals.
 3 |  * To create a Stagehand project with best practices and configuration, run:
 4 |  *
 5 |  * npx create-browser-app@latest my-browser-app
 6 |  */
 7 | 
 8 | import { ObserveResult, Stagehand } from "@/dist";
 9 | import StagehandConfig from "@/stagehand.config";
10 | 
11 | async function example() {
12 |   const stagehand = new Stagehand(StagehandConfig);
13 |   await stagehand.init();
14 | 
15 |   const page = await stagehand.page;
16 | 
17 |   let observePromise: Promise<ObserveResult[]>;
18 | 
19 |   page.on("popup", async (newPage) => {
20 |     observePromise = newPage.observe({
21 |       instruction: "return all the next possible actions from the page",
22 |     });
23 |   });
24 | 
25 |   await page.goto(
26 |     "https://docs.browserbase.com/integrations/crew-ai/introduction",
27 |   );
28 | 
29 |   await page.click(
30 |     "#content-area > div.relative.mt-8.prose.prose-gray.dark\\:prose-invert > p:nth-child(2) > a",
31 |   );
32 | 
33 |   await page.waitForTimeout(5000);
34 | 
35 |   if (observePromise) {
36 |     const observeResult = await observePromise;
37 | 
38 |     console.log("Observed", observeResult.length, "actions");
39 |   }
40 | 
41 |   await stagehand.close();
42 | }
43 | 
44 | (async () => {
45 |   await example();
46 | })();
47 | 


--------------------------------------------------------------------------------
/examples/try_wordle.ts:
--------------------------------------------------------------------------------
 1 | import { Stagehand } from "@/dist";
 2 | import StagehandConfig from "@/stagehand.config";
 3 | 
 4 | async function example() {
 5 |   const stagehand = new Stagehand({
 6 |     ...StagehandConfig,
 7 |   });
 8 |   await stagehand.init();
 9 |   const page = stagehand.page;
10 |   await page.goto("https://www.nytimes.com/games/wordle/index.html");
11 |   await page.act("click 'Continue'");
12 |   await page.act("click 'Play'");
13 |   await page.act("click cross sign on top right of 'How To Play' card");
14 |   const word = "WORDS";
15 |   for (const letter of word) {
16 |     await page.act(`press ${letter}`);
17 |   }
18 |   await page.act("press enter");
19 |   await stagehand.close();
20 | }
21 | 
22 | (async () => {
23 |   await example();
24 | })();
25 | 


--------------------------------------------------------------------------------
/lib/agent/AgentClient.ts:
--------------------------------------------------------------------------------
 1 | import {
 2 |   AgentAction,
 3 |   AgentResult,
 4 |   AgentType,
 5 |   AgentExecutionOptions,
 6 | } from "@/types/agent";
 7 | 
 8 | /**
 9 |  * Abstract base class for agent clients
10 |  * This provides a common interface for all agent implementations
11 |  */
12 | export abstract class AgentClient {
13 |   public type: AgentType;
14 |   public modelName: string;
15 |   public clientOptions: Record<string, unknown>;
16 |   public userProvidedInstructions?: string;
17 | 
18 |   constructor(
19 |     type: AgentType,
20 |     modelName: string,
21 |     userProvidedInstructions?: string,
22 |   ) {
23 |     this.type = type;
24 |     this.modelName = modelName;
25 |     this.userProvidedInstructions = userProvidedInstructions;
26 |     this.clientOptions = {};
27 |   }
28 | 
29 |   abstract execute(options: AgentExecutionOptions): Promise<AgentResult>;
30 | 
31 |   abstract captureScreenshot(
32 |     options?: Record<string, unknown>,
33 |   ): Promise<unknown>;
34 | 
35 |   abstract setViewport(width: number, height: number): void;
36 | 
37 |   abstract setCurrentUrl(url: string): void;
38 | 
39 |   abstract setScreenshotProvider(provider: () => Promise<string>): void;
40 | 
41 |   abstract setActionHandler(
42 |     handler: (action: AgentAction) => Promise<void>,
43 |   ): void;
44 | }
45 | 


--------------------------------------------------------------------------------
/lib/agent/AgentProvider.ts:
--------------------------------------------------------------------------------
 1 | import { LogLine } from "@/types/log";
 2 | import { AgentClient } from "./AgentClient";
 3 | import { AgentType } from "@/types/agent";
 4 | import { OpenAICUAClient } from "./OpenAICUAClient";
 5 | import { AnthropicCUAClient } from "./AnthropicCUAClient";
 6 | import {
 7 |   UnsupportedModelError,
 8 |   UnsupportedModelProviderError,
 9 | } from "@/types/stagehandErrors";
10 | 
11 | // Map model names to their provider types
12 | const modelToAgentProviderMap: Record<string, AgentType> = {
13 |   "computer-use-preview": "openai",
14 |   "claude-3-5-sonnet-20240620": "anthropic",
15 |   "claude-3-7-sonnet-20250219": "anthropic", // Add newer Claude models
16 | };
17 | 
18 | /**
19 |  * Provider for agent clients
20 |  * This class is responsible for creating the appropriate agent client
21 |  * based on the provider type
22 |  */
23 | export class AgentProvider {
24 |   private logger: (message: LogLine) => void;
25 | 
26 |   /**
27 |    * Create a new agent provider
28 |    */
29 |   constructor(logger: (message: LogLine) => void) {
30 |     this.logger = logger;
31 |   }
32 | 
33 |   getClient(
34 |     modelName: string,
35 |     clientOptions?: Record<string, unknown>,
36 |     userProvidedInstructions?: string,
37 |   ): AgentClient {
38 |     const type = AgentProvider.getAgentProvider(modelName);
39 |     this.logger({
40 |       category: "agent",
41 |       message: `Getting agent client for type: ${type}, model: ${modelName}`,
42 |       level: 2,
43 |     });
44 | 
45 |     try {
46 |       switch (type) {
47 |         case "openai":
48 |           return new OpenAICUAClient(
49 |             type,
50 |             modelName,
51 |             userProvidedInstructions,
52 |             clientOptions,
53 |           );
54 |         case "anthropic":
55 |           return new AnthropicCUAClient(
56 |             type,
57 |             modelName,
58 |             userProvidedInstructions,
59 |             clientOptions,
60 |           );
61 |         default:
62 |           throw new UnsupportedModelProviderError(
63 |             ["openai", "anthropic"],
64 |             "Computer Use Agent",
65 |           );
66 |       }
67 |     } catch (error) {
68 |       const errorMessage =
69 |         error instanceof Error ? error.message : String(error);
70 |       this.logger({
71 |         category: "agent",
72 |         message: `Error creating agent client: ${errorMessage}`,
73 |         level: 0,
74 |       });
75 |       throw error;
76 |     }
77 |   }
78 | 
79 |   static getAgentProvider(modelName: string): AgentType {
80 |     // First check the exact model name in the map
81 |     if (modelName in modelToAgentProviderMap) {
82 |       return modelToAgentProviderMap[modelName];
83 |     }
84 | 
85 |     throw new UnsupportedModelError(
86 |       Object.keys(modelToAgentProviderMap),
87 |       "Computer Use Agent",
88 |     );
89 |   }
90 | }
91 | 


--------------------------------------------------------------------------------
/lib/agent/StagehandAgent.ts:
--------------------------------------------------------------------------------
 1 | import { LogLine } from "@/types/log";
 2 | import {
 3 |   AgentExecuteOptions,
 4 |   AgentResult,
 5 |   AgentExecutionOptions,
 6 | } from "@/types/agent";
 7 | import { AgentClient } from "./AgentClient";
 8 | 
 9 | /**
10 |  * Main interface for agent operations in Stagehand
11 |  * This class provides methods for executing tasks with an agent
12 |  */
13 | export class StagehandAgent {
14 |   private client: AgentClient;
15 |   private logger: (message: LogLine) => void;
16 | 
17 |   constructor(client: AgentClient, logger: (message: LogLine) => void) {
18 |     this.client = client;
19 |     this.logger = logger;
20 |   }
21 | 
22 |   async execute(
23 |     optionsOrInstruction: AgentExecuteOptions | string,
24 |   ): Promise<AgentResult> {
25 |     const options =
26 |       typeof optionsOrInstruction === "string"
27 |         ? { instruction: optionsOrInstruction }
28 |         : optionsOrInstruction;
29 | 
30 |     this.logger({
31 |       category: "agent",
32 |       message: `Executing agent task: ${options.instruction}`,
33 |       level: 1,
34 |     });
35 | 
36 |     const executionOptions: AgentExecutionOptions = {
37 |       options,
38 |       logger: this.logger,
39 |       retries: 3,
40 |     };
41 | 
42 |     return await this.client.execute(executionOptions);
43 |   }
44 | 
45 |   getModelName(): string {
46 |     return this.client.modelName;
47 |   }
48 | 
49 |   getAgentType(): string {
50 |     return this.client.type;
51 |   }
52 | }
53 | 


--------------------------------------------------------------------------------
/lib/cache.ts:
--------------------------------------------------------------------------------
 1 | import fs from "fs";
 2 | const observationsPath = "./.cache/observations.json";
 3 | const actionsPath = "./.cache/actions.json";
 4 | 
 5 | /**
 6 |  * A file system cache to skip inference when repeating steps
 7 |  * It also acts as the source of truth for identifying previously seen actions and observations
 8 |  */
 9 | class Cache {
10 |   disabled: boolean;
11 | 
12 |   constructor({ disabled = false } = {}) {
13 |     this.disabled = disabled;
14 |     if (!this.disabled) {
15 |       this.initCache();
16 |     }
17 |   }
18 | 
19 |   readObservations() {
20 |     if (this.disabled) {
21 |       return {};
22 |     }
23 |     try {
24 |       return JSON.parse(fs.readFileSync(observationsPath, "utf8"));
25 |     } catch (error) {
26 |       console.error("Error reading from observations.json", error);
27 |       return {};
28 |     }
29 |   }
30 | 
31 |   readActions() {
32 |     if (this.disabled) {
33 |       return {};
34 |     }
35 |     try {
36 |       return JSON.parse(fs.readFileSync(actionsPath, "utf8"));
37 |     } catch (error) {
38 |       console.error("Error reading from actions.json", error);
39 |       return {};
40 |     }
41 |   }
42 | 
43 |   writeObservations({
44 |     key,
45 |     value,
46 |   }: {
47 |     key: string;
48 |     value: { id: string; result: string };
49 |   }) {
50 |     if (this.disabled) {
51 |       return;
52 |     }
53 | 
54 |     const observations = this.readObservations();
55 |     observations[key] = value;
56 |     fs.writeFileSync(observationsPath, JSON.stringify(observations, null, 2));
57 |   }
58 | 
59 |   writeActions({
60 |     key,
61 |     value,
62 |   }: {
63 |     key: string;
64 |     value: { id: string; result: string };
65 |   }) {
66 |     if (this.disabled) {
67 |       return;
68 |     }
69 | 
70 |     const actions = this.readActions();
71 |     actions[key] = value;
72 |     fs.writeFileSync(actionsPath, JSON.stringify(actions, null, 2));
73 |   }
74 | 
75 |   evictCache() {
76 |     throw new Error("implement me");
77 |   }
78 | 
79 |   private initCache() {
80 |     if (this.disabled) {
81 |       return;
82 |     }
83 |     const cacheDir = ".cache";
84 | 
85 |     if (!fs.existsSync(cacheDir)) {
86 |       fs.mkdirSync(cacheDir);
87 |     }
88 |     if (!fs.existsSync(actionsPath)) {
89 |       fs.writeFileSync(actionsPath, JSON.stringify({}));
90 |     }
91 | 
92 |     if (!fs.existsSync(observationsPath)) {
93 |       fs.writeFileSync(observationsPath, JSON.stringify({}));
94 |     }
95 |   }
96 | }
97 | 
98 | export default Cache;
99 | 


--------------------------------------------------------------------------------
/lib/cache/LLMCache.ts:
--------------------------------------------------------------------------------
 1 | import { BaseCache, CacheEntry } from "./BaseCache";
 2 | 
 3 | export class LLMCache extends BaseCache<CacheEntry> {
 4 |   constructor(
 5 |     logger: (message: {
 6 |       category?: string;
 7 |       message: string;
 8 |       level?: number;
 9 |     }) => void,
10 |     cacheDir?: string,
11 |     cacheFile?: string,
12 |   ) {
13 |     super(logger, cacheDir, cacheFile || "llm_calls.json");
14 |   }
15 | 
16 |   /**
17 |    * Overrides the get method to track used hashes by requestId.
18 |    * @param options - The options used to generate the cache key.
19 |    * @param requestId - The identifier for the current request.
20 |    * @returns The cached data if available, otherwise null.
21 |    */
22 |   public async get<T>(
23 |     options: Record<string, unknown>,
24 |     requestId: string,
25 |   ): Promise<T | null> {
26 |     const data = await super.get(options, requestId);
27 |     return data as T | null; // TODO: remove this cast
28 |   }
29 | 
30 |   /**
31 |    * Overrides the set method to include cache cleanup logic.
32 |    * @param options - The options used to generate the cache key.
33 |    * @param data - The data to be cached.
34 |    * @param requestId - The identifier for the current request.
35 |    */
36 |   public async set(
37 |     options: Record<string, unknown>,
38 |     data: unknown,
39 |     requestId: string,
40 |   ): Promise<void> {
41 |     await super.set(options, data, requestId);
42 |     this.logger({
43 |       category: "llm_cache",
44 |       message: "Cache miss - saved new response",
45 |       level: 1,
46 |     });
47 |   }
48 | }
49 | 


--------------------------------------------------------------------------------
/lib/dom/DomChunk.ts:
--------------------------------------------------------------------------------
1 | export interface DomChunk {
2 |   startOffset: number;
3 |   endOffset: number;
4 |   outputString: string;
5 |   selectorMap: Record<number, string[]>;
6 | }
7 | 


--------------------------------------------------------------------------------
/lib/dom/containerFactory.ts:
--------------------------------------------------------------------------------
 1 | import { StagehandContainer } from "./StagehandContainer";
 2 | import { GlobalPageContainer } from "./GlobalPageContainer";
 3 | import { ElementContainer } from "./ElementContainer";
 4 | 
 5 | /**
 6 |  * Decide which container to create.
 7 |  */
 8 | export function createStagehandContainer(
 9 |   obj: Window | HTMLElement,
10 | ): StagehandContainer {
11 |   if (obj instanceof Window) {
12 |     return new GlobalPageContainer();
13 |   } else {
14 |     return new ElementContainer(obj);
15 |   }
16 | }
17 | 


--------------------------------------------------------------------------------
/lib/dom/genDomScripts.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * We have a collection of typescript functions that we need to run in the browser.
 3 |  * First, we build them into a single js file
 4 |  * Second, due to framework differences we need to get our script content as a string to avoid pathing issues due to file routing in frameworks like Next.js
 5 |  * Playwright allows us to pass in script content directly as a string instead of reading a file from a path
 6 |  * https://github.com/browserbase/stagehand/issues/180
 7 |  *
 8 |  * We can't rely on the normal build process for stagehand, because we need our script content as a string so that the import *just works*
 9 |  */
10 | import fs from "fs";
11 | import path from "path";
12 | import esbuild from "esbuild";
13 | 
14 | fs.mkdirSync(path.join(__dirname, "./build"), { recursive: true });
15 | 
16 | esbuild.buildSync({
17 |   entryPoints: [path.join(__dirname, "index.ts")],
18 |   bundle: true,
19 |   outdir: path.join(__dirname, "build"),
20 | });
21 | 
22 | const scriptContent = fs.readFileSync(
23 |   path.join(__dirname, "./build/index.js"),
24 |   "utf8",
25 | );
26 | 
27 | const output = `export const scriptContent = ${JSON.stringify(scriptContent)};`;
28 | 
29 | fs.writeFileSync(path.join(__dirname, "./build/scriptContent.ts"), output);
30 | 


--------------------------------------------------------------------------------
/lib/dom/global.d.ts:
--------------------------------------------------------------------------------
 1 | import { StagehandContainer } from "./StagehandContainer";
 2 | 
 3 | export {};
 4 | declare global {
 5 |   interface Window {
 6 |     __stagehandInjected?: boolean;
 7 |     chunkNumber: number;
 8 |     showChunks?: boolean;
 9 |     processDom: (chunksSeen: Array<number>) => Promise<{
10 |       outputString: string;
11 |       selectorMap: Record<number, string[]>;
12 |       chunk: number;
13 |       chunks: number[];
14 |     }>;
15 |     processAllOfDom: (xpath?: string) => Promise<{
16 |       outputString: string;
17 |       selectorMap: Record<number, string[]>;
18 |     }>;
19 |     createStagehandContainer: (obj: Window | HTMLElement) => StagehandContainer;
20 |     waitForDomSettle: () => Promise<void>;
21 |     __playwright?: unknown;
22 |     __pw_manual?: unknown;
23 |     __PW_inspect?: unknown;
24 |     storeDOM: (xpath?: string) => string;
25 |     restoreDOM: (storedDOM: string, xpath?: string) => void;
26 |     createTextBoundingBoxes: (xpath?: string) => void;
27 |     getElementBoundingBoxes: (xpath: string) => Array<{
28 |       text: string;
29 |       top: number;
30 |       left: number;
31 |       width: number;
32 |       height: number;
33 |     }>;
34 |     getScrollableElementXpaths: (topN?: number) => Promise<string[]>;
35 |     getNodeFromXpath: (xpath: string) => Node | null;
36 |     waitForElementScrollEnd: (element: HTMLElement) => Promise<void>;
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/lib/dom/index.ts:
--------------------------------------------------------------------------------
1 | export * from "./process";
2 | export * from "./utils";
3 | 


--------------------------------------------------------------------------------
/lib/dom/utils.ts:
--------------------------------------------------------------------------------
 1 | import { StagehandDomProcessError } from "@/types/stagehandErrors";
 2 | 
 3 | export async function waitForDomSettle() {
 4 |   return new Promise<void>((resolve) => {
 5 |     const createTimeout = () => {
 6 |       return setTimeout(() => {
 7 |         resolve();
 8 |       }, 2000);
 9 |     };
10 |     let timeout = createTimeout();
11 |     const observer = new MutationObserver(() => {
12 |       clearTimeout(timeout);
13 |       timeout = createTimeout();
14 |     });
15 |     observer.observe(window.document.body, { childList: true, subtree: true });
16 |   });
17 | }
18 | 
19 | export function calculateViewportHeight() {
20 |   return Math.ceil(window.innerHeight * 0.75);
21 | }
22 | 
23 | /**
24 |  * Tests if the element actually responds to .scrollTo(...)
25 |  * and that scrollTop changes as expected.
26 |  */
27 | export function canElementScroll(elem: HTMLElement): boolean {
28 |   // Quick check if scrollTo is a function
29 |   if (typeof elem.scrollTo !== "function") {
30 |     console.warn("canElementScroll: .scrollTo is not a function.");
31 |     return false;
32 |   }
33 | 
34 |   try {
35 |     const originalTop = elem.scrollTop;
36 | 
37 |     // try to scroll
38 |     elem.scrollTo({
39 |       top: originalTop + 100,
40 |       left: 0,
41 |       behavior: "instant",
42 |     });
43 | 
44 |     // If scrollTop never changed, consider it unscrollable
45 |     if (elem.scrollTop === originalTop) {
46 |       throw new StagehandDomProcessError("scrollTop did not change");
47 |     }
48 | 
49 |     // Scroll back to original place
50 |     elem.scrollTo({
51 |       top: originalTop,
52 |       left: 0,
53 |       behavior: "instant",
54 |     });
55 | 
56 |     return true;
57 |   } catch (error) {
58 |     console.warn("canElementScroll error:", (error as Error).message || error);
59 |     return false;
60 |   }
61 | }
62 | 
63 | export function getNodeFromXpath(xpath: string) {
64 |   return document.evaluate(
65 |     xpath,
66 |     document.documentElement,
67 |     null,
68 |     XPathResult.FIRST_ORDERED_NODE_TYPE,
69 |     null,
70 |   ).singleNodeValue;
71 | }
72 | 
73 | export function waitForElementScrollEnd(
74 |   element: HTMLElement,
75 |   idleMs = 100,
76 | ): Promise<void> {
77 |   return new Promise<void>((resolve) => {
78 |     let scrollEndTimer: number | undefined;
79 | 
80 |     const handleScroll = () => {
81 |       clearTimeout(scrollEndTimer);
82 |       scrollEndTimer = window.setTimeout(() => {
83 |         element.removeEventListener("scroll", handleScroll);
84 |         resolve();
85 |       }, idleMs);
86 |     };
87 | 
88 |     element.addEventListener("scroll", handleScroll, { passive: true });
89 |     handleScroll();
90 |   });
91 | }
92 | 


--------------------------------------------------------------------------------
/media/create-browser-app.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browserbase/stagehand/74f9339bac3404078bfcc47fecda44cc4b0f3876/media/create-browser-app.gif


--------------------------------------------------------------------------------
/media/github_demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browserbase/stagehand/74f9339bac3404078bfcc47fecda44cc4b0f3876/media/github_demo.gif


--------------------------------------------------------------------------------
/stagehand.config.ts:
--------------------------------------------------------------------------------
 1 | import type { ConstructorParams } from "@/dist";
 2 | import dotenv from "dotenv";
 3 | dotenv.config();
 4 | 
 5 | const StagehandConfig: ConstructorParams = {
 6 |   verbose: 2 /* Verbosity level for logging: 0 = silent, 1 = info, 2 = all */,
 7 |   domSettleTimeoutMs: 30_000 /* Timeout for DOM to settle in milliseconds */,
 8 | 
 9 |   //   LLM configuration
10 |   modelName: "gpt-4o" /* Name of the model to use */,
11 |   modelClientOptions: {
12 |     apiKey: process.env.OPENAI_API_KEY,
13 |   } /* Configuration options for the model client */,
14 | 
15 |   // Browser configuration
16 |   env:
17 |     process.env.BROWSERBASE_API_KEY && process.env.BROWSERBASE_PROJECT_ID
18 |       ? "BROWSERBASE"
19 |       : "LOCAL",
20 |   apiKey: process.env.BROWSERBASE_API_KEY /* API key for authentication */,
21 |   projectId: process.env.BROWSERBASE_PROJECT_ID /* Project identifier */,
22 |   browserbaseSessionID:
23 |     undefined /* Session ID for resuming Browserbase sessions */,
24 |   browserbaseSessionCreateParams: {
25 |     projectId: process.env.BROWSERBASE_PROJECT_ID!,
26 |     browserSettings: {
27 |       blockAds: true,
28 |       viewport: {
29 |         width: 1024,
30 |         height: 768,
31 |       },
32 |     },
33 |   },
34 |   localBrowserLaunchOptions: {
35 |     headless: false,
36 |     viewport: {
37 |       width: 1024,
38 |       height: 768,
39 |     },
40 |   } /* Configuration options for the local browser */,
41 | };
42 | export default StagehandConfig;
43 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "module": "commonjs",
 4 |     "esModuleInterop": true,
 5 |     "allowSyntheticDefaultImports": true,
 6 |     "target": "es6",
 7 |     "noImplicitAny": true,
 8 |     "moduleResolution": "node",
 9 |     "sourceMap": true,
10 |     "outDir": "dist",
11 |     "baseUrl": ".",
12 |     "paths": {
13 |       "*": ["node_modules/*", "lib/types/*"],
14 |       "@/*": ["./*"]
15 |     },
16 |     "skipLibCheck": true,
17 |     "declaration": true
18 |   },
19 |   "exclude": ["node_modules", "dist", ".eslintrc.cjs"]
20 | }
21 | 


--------------------------------------------------------------------------------
/types/act.ts:
--------------------------------------------------------------------------------
 1 | import { LLMClient } from "../lib/llm/LLMClient";
 2 | import { Locator } from "@playwright/test";
 3 | import { Logger } from "@/types/log";
 4 | import { StagehandPage } from "@/lib/StagehandPage";
 5 | 
 6 | // WARNING: This is NOT to be confused with the ActParams type used in `page.act()`.
 7 | // This is the type for the parameters passed to the `act` command in `inference.ts`.
 8 | // page.act() params/result types are defined in `types/stagehand.ts`.
 9 | export interface ActCommandParams {
10 |   action: string;
11 |   steps?: string;
12 |   domElements: string;
13 |   llmClient: LLMClient;
14 |   retries?: number;
15 |   logger: (message: { category?: string; message: string }) => void;
16 |   requestId: string;
17 |   variables?: Record<string, string>;
18 |   userProvidedInstructions?: string;
19 | }
20 | 
21 | // WARNING: This is NOT to be confused with the ActResult type used in `page.act()`.
22 | // This is the type for the result of the `act` command in `inference.ts`.
23 | // page.act() params/result types are defined in `types/stagehand.ts`.
24 | export interface ActCommandResult {
25 |   method: string;
26 |   element: number;
27 |   args: unknown[];
28 |   completed: boolean;
29 |   step: string;
30 |   why?: string;
31 | }
32 | 
33 | // We can use this enum to list the actions supported in performPlaywrightMethod
34 | export enum SupportedPlaywrightAction {
35 |   CLICK = "click",
36 |   FILL = "fill",
37 |   TYPE = "type",
38 |   PRESS = "press",
39 |   SCROLL = "scrollTo",
40 |   NEXT_CHUNK = "nextChunk",
41 |   PREV_CHUNK = "prevChunk",
42 | }
43 | 
44 | /**
45 |  * A context object to hold all parameters that might be needed by
46 |  * any of the methods in the `methodHandlerMap`
47 |  */
48 | export interface MethodHandlerContext {
49 |   method: string;
50 |   locator: Locator;
51 |   xpath: string;
52 |   args: unknown[];
53 |   logger: Logger;
54 |   stagehandPage: StagehandPage;
55 |   initialUrl: string;
56 |   domSettleTimeoutMs?: number;
57 | }
58 | 


--------------------------------------------------------------------------------
/types/api.ts:
--------------------------------------------------------------------------------
 1 | import Browserbase from "@browserbasehq/sdk";
 2 | import { LogLine } from "./log";
 3 | 
 4 | export interface StagehandAPIConstructorParams {
 5 |   apiKey: string;
 6 |   projectId: string;
 7 |   logger: (message: LogLine) => void;
 8 | }
 9 | 
10 | export interface ExecuteActionParams {
11 |   method: "act" | "extract" | "observe" | "navigate" | "end" | "agentExecute";
12 |   args?: unknown;
13 |   params?: unknown;
14 | }
15 | 
16 | export interface StartSessionParams {
17 |   modelName: string;
18 |   modelApiKey: string;
19 |   domSettleTimeoutMs: number;
20 |   verbose: number;
21 |   debugDom: boolean;
22 |   systemPrompt?: string;
23 |   browserbaseSessionCreateParams?: Browserbase.Sessions.SessionCreateParams;
24 |   selfHeal?: boolean;
25 |   waitForCaptchaSolves?: boolean;
26 |   actionTimeoutMs?: number;
27 |   browserbaseSessionID?: string;
28 | }
29 | 
30 | export interface StartSessionResult {
31 |   sessionId: string;
32 | }
33 | 
34 | export interface SuccessResponse<T> {
35 |   success: true;
36 |   data: T;
37 | }
38 | 
39 | export interface ErrorResponse {
40 |   success: false;
41 |   message: string;
42 | }
43 | 
44 | export type ApiResponse<T> = SuccessResponse<T> | ErrorResponse;
45 | 


--------------------------------------------------------------------------------
/types/browser.ts:
--------------------------------------------------------------------------------
 1 | import { Browser, BrowserContext } from "./page";
 2 | 
 3 | export interface BrowserResult {
 4 |   env: "LOCAL" | "BROWSERBASE";
 5 |   browser?: Browser;
 6 |   context: BrowserContext;
 7 |   debugUrl?: string;
 8 |   sessionUrl?: string;
 9 |   contextPath?: string;
10 |   sessionId?: string;
11 | }
12 | 


--------------------------------------------------------------------------------
/types/context.ts:
--------------------------------------------------------------------------------
 1 | import type { BrowserContext as PlaywrightContext } from "@playwright/test";
 2 | import { Page } from "../types/page";
 3 | 
 4 | export interface AXNode {
 5 |   role?: { value: string };
 6 |   name?: { value: string };
 7 |   description?: { value: string };
 8 |   value?: { value: string };
 9 |   nodeId: string;
10 |   backendDOMNodeId?: number;
11 |   parentId?: string;
12 |   childIds?: string[];
13 |   properties?: {
14 |     name: string;
15 |     value: {
16 |       type: string;
17 |       value?: string;
18 |     };
19 |   }[];
20 | }
21 | 
22 | export type AccessibilityNode = {
23 |   role: string;
24 |   name?: string;
25 |   description?: string;
26 |   value?: string;
27 |   children?: AccessibilityNode[];
28 |   childIds?: string[];
29 |   parentId?: string;
30 |   nodeId?: string;
31 |   backendDOMNodeId?: number;
32 |   properties?: {
33 |     name: string;
34 |     value: {
35 |       type: string;
36 |       value?: string;
37 |     };
38 |   }[];
39 | };
40 | 
41 | export interface TreeResult {
42 |   tree: AccessibilityNode[];
43 |   simplified: string;
44 |   iframes?: AccessibilityNode[];
45 |   idToUrl: Record<string, string>;
46 | }
47 | 
48 | export interface EnhancedContext
49 |   extends Omit<PlaywrightContext, "newPage" | "pages"> {
50 |   newPage(): Promise<Page>;
51 |   pages(): Page[];
52 | }
53 | 


--------------------------------------------------------------------------------
/types/evals.ts:
--------------------------------------------------------------------------------
 1 | import { z } from "zod";
 2 | import type { AvailableModel } from "../types/model";
 3 | import type { LogLine } from "../types/log";
 4 | import type { EvalCase } from "braintrust";
 5 | import { Stagehand } from "@/dist";
 6 | import { ConstructorParams } from "@/dist";
 7 | import { EvalLogger } from "@/evals/logger";
 8 | 
 9 | export type StagehandInitResult = {
10 |   stagehand: Stagehand;
11 |   logger: EvalLogger;
12 |   debugUrl: string;
13 |   sessionUrl: string;
14 |   useTextExtract: boolean;
15 |   stagehandConfig: ConstructorParams;
16 |   modelName: AvailableModel;
17 | };
18 | 
19 | export type EvalFunction = (taskInput: StagehandInitResult) => Promise<{
20 |   _success: boolean;
21 |   logs: LogLine[];
22 |   debugUrl: string;
23 |   sessionUrl: string;
24 |   error?: unknown;
25 | }>;
26 | 
27 | export const EvalCategorySchema = z.enum([
28 |   "observe",
29 |   "act",
30 |   "combination",
31 |   "extract",
32 |   "experimental",
33 |   "text_extract",
34 |   "targeted_extract",
35 |   "regression",
36 |   "regression_llm_providers",
37 |   "llm_clients",
38 |   "agent",
39 | ]);
40 | 
41 | export type EvalCategory = z.infer<typeof EvalCategorySchema>;
42 | export interface EvalInput {
43 |   name: string;
44 |   modelName: AvailableModel;
45 | }
46 | 
47 | export interface Testcase
48 |   extends EvalCase<
49 |     EvalInput,
50 |     unknown,
51 |     { model: AvailableModel; test: string }
52 |   > {
53 |   input: EvalInput;
54 |   name: string;
55 |   tags: string[];
56 |   metadata: { model: AvailableModel; test: string };
57 |   expected: unknown;
58 | }
59 | 
60 | export interface SummaryResult {
61 |   input: EvalInput;
62 |   output: { _success: boolean };
63 |   name: string;
64 |   score: number;
65 | }
66 | 
67 | export interface EvalArgs<TInput, TOutput, TExpected> {
68 |   input: TInput;
69 |   output: TOutput;
70 |   expected: TExpected;
71 |   metadata?: { model: AvailableModel; test: string };
72 | }
73 | 
74 | export interface EvalResult {
75 |   name: string;
76 |   score: number;
77 | }
78 | 
79 | export type LogLineEval = LogLine & {
80 |   parsedAuxiliary?: string | object;
81 | };
82 | 


--------------------------------------------------------------------------------
/types/evaluator.ts:
--------------------------------------------------------------------------------
 1 | export interface EvaluateOptions {
 2 |   /**
 3 |    * The question to ask about the task state
 4 |    */
 5 |   question: string;
 6 |   /**
 7 |    * Custom system prompt for the evaluator
 8 |    */
 9 |   systemPrompt?: string;
10 |   /**
11 |    * Delay in milliseconds before taking the screenshot
12 |    * @default 1000
13 |    */
14 |   screenshotDelayMs?: number;
15 |   /**
16 |    * Whether to throw an error if the response is not a clear YES or NO
17 |    * @default false
18 |    */
19 |   strictResponse?: boolean;
20 | }
21 | 
22 | export interface BatchEvaluateOptions {
23 |   /**
24 |    * Array of questions to evaluate
25 |    */
26 |   questions: string[];
27 |   /**
28 |    * Custom system prompt for the evaluator
29 |    */
30 |   systemPrompt?: string;
31 |   /**
32 |    * Delay in milliseconds before taking the screenshot
33 |    * @default 1000
34 |    */
35 |   screenshotDelayMs?: number;
36 |   /**
37 |    * Whether to throw an error if any response is not a clear YES or NO
38 |    * @default false
39 |    */
40 |   strictResponse?: boolean;
41 |   /**
42 |    * The reasoning behind the evaluation
43 |    */
44 |   reasoning?: string;
45 | }
46 | 
47 | /**
48 |  * Result of an evaluation
49 |  */
50 | export interface EvaluationResult {
51 |   /**
52 |    * The evaluation result ('YES', 'NO', or 'INVALID' if parsing failed or value was unexpected)
53 |    */
54 |   evaluation: "YES" | "NO" | "INVALID";
55 |   /**
56 |    * The reasoning behind the evaluation
57 |    */
58 |   reasoning: string;
59 | }
60 | 


--------------------------------------------------------------------------------
/types/inference.ts:
--------------------------------------------------------------------------------
 1 | import { LLMClient } from "../lib/llm/LLMClient";
 2 | import { LLMProvider } from "../lib/llm/LLMProvider";
 3 | 
 4 | export interface VerifyActCompletionParams {
 5 |   goal: string;
 6 |   steps: string;
 7 |   llmProvider: LLMProvider;
 8 |   llmClient: LLMClient;
 9 |   domElements?: string;
10 |   logger: (message: { category?: string; message: string }) => void;
11 |   requestId: string;
12 | }
13 | 


--------------------------------------------------------------------------------
/types/llm.ts:
--------------------------------------------------------------------------------
1 | export interface LLMTool {
2 |   type: "function";
3 |   name: string;
4 |   description: string;
5 |   parameters: Record<string, unknown>;
6 | }
7 | 


--------------------------------------------------------------------------------
/types/log.ts:
--------------------------------------------------------------------------------
 1 | export type LogLevel = 0 | 1 | 2;
 2 | 
 3 | /**
 4 |  * Mapping between numeric log levels and their names
 5 |  *
 6 |  * 0 - error/warn - Critical issues or important warnings
 7 |  * 1 - info - Standard information messages
 8 |  * 2 - debug - Detailed information for debugging
 9 |  */
10 | export const LOG_LEVEL_NAMES: Record<LogLevel, string> = {
11 |   0: "error",
12 |   1: "info",
13 |   2: "debug",
14 | };
15 | 
16 | export type LogLine = {
17 |   id?: string;
18 |   category?: string;
19 |   message: string;
20 |   level?: LogLevel;
21 |   timestamp?: string;
22 |   auxiliary?: {
23 |     [key: string]: {
24 |       value: string;
25 |       type: "object" | "string" | "html" | "integer" | "float" | "boolean";
26 |     };
27 |   };
28 | };
29 | 
30 | export type Logger = (logLine: LogLine) => void;
31 | 


--------------------------------------------------------------------------------
/types/model.ts:
--------------------------------------------------------------------------------
 1 | import type { ClientOptions as AnthropicClientOptions } from "@anthropic-ai/sdk";
 2 | import type { ClientOptions as OpenAIClientOptions } from "openai";
 3 | import { z } from "zod";
 4 | 
 5 | export const AvailableModelSchema = z.enum([
 6 |   "gpt-4.1",
 7 |   "gpt-4.1-mini",
 8 |   "gpt-4.1-nano",
 9 |   "o4-mini",
10 |   "o3",
11 |   "o3-mini",
12 |   "o1",
13 |   "o1-mini",
14 |   "gpt-4o",
15 |   "gpt-4o-mini",
16 |   "gpt-4o-2024-08-06",
17 |   "gpt-4.5-preview",
18 |   "o1-preview",
19 |   "claude-3-5-sonnet-latest",
20 |   "claude-3-5-sonnet-20241022",
21 |   "claude-3-5-sonnet-20240620",
22 |   "claude-3-7-sonnet-latest",
23 |   "claude-3-7-sonnet-20250219",
24 |   "cerebras-llama-3.3-70b",
25 |   "cerebras-llama-3.1-8b",
26 |   "groq-llama-3.3-70b-versatile",
27 |   "groq-llama-3.3-70b-specdec",
28 |   "gemini-1.5-flash",
29 |   "gemini-1.5-pro",
30 |   "gemini-1.5-flash-8b",
31 |   "gemini-2.0-flash-lite",
32 |   "gemini-2.0-flash",
33 |   "gemini-2.5-flash-preview-04-17",
34 |   "gemini-2.5-pro-preview-03-25",
35 | ]);
36 | 
37 | export type AvailableModel = z.infer<typeof AvailableModelSchema>;
38 | 
39 | export type ModelProvider =
40 |   | "openai"
41 |   | "anthropic"
42 |   | "cerebras"
43 |   | "groq"
44 |   | "google";
45 | 
46 | export type ClientOptions = OpenAIClientOptions | AnthropicClientOptions;
47 | 
48 | export interface AnthropicJsonSchemaObject {
49 |   definitions?: {
50 |     MySchema?: { properties?: Record<string, unknown>; required?: string[] };
51 |   };
52 |   properties?: Record<string, unknown>;
53 |   required?: string[];
54 | }
55 | 


--------------------------------------------------------------------------------
/types/operator.ts:
--------------------------------------------------------------------------------
 1 | import { z } from "zod";
 2 | 
 3 | export const operatorResponseSchema = z.object({
 4 |   reasoning: z
 5 |     .string()
 6 |     .describe(
 7 |       "The reasoning for the step taken. If this step's method is `close`, the goal was to extract data, and the task was successful, state the data that was extracted.",
 8 |     ),
 9 |   method: z.enum([
10 |     "act",
11 |     "extract",
12 |     "goto",
13 |     "close",
14 |     "wait",
15 |     "navback",
16 |     "refresh",
17 |   ])
18 |     .describe(`The action to perform on the page based off of the goal and the current state of the page.
19 |       goto: Navigate to a specific URL.
20 |       act: Perform an action on the page.  
21 |       extract: Extract data from the page.
22 |       close: The task is complete, close the browser.
23 |       wait: Wait for a period of time.
24 |       navback: Navigate back to the previous page. Do not navigate back if you are already on the first page.
25 |       refresh: Refresh the page.`),
26 |   parameters: z
27 |     .string()
28 |     .describe(
29 |       `The parameter for the action. Only pass in a parameter for the following methods:
30 |         - act: The action to perform. e.g. "click on the submit button" or "type [email] into the email input field and press enter"
31 |         - extract: The data to extract. e.g. "the title of the article". If you want to extract all of the text on the page, leave this undefined.
32 |         - wait: The amount of time to wait in milliseconds.
33 |         - goto: The URL to navigate to. e.g. "https://www.google.com"
34 |         The other methods do not require a parameter.`,
35 |     )
36 |     .optional(),
37 |   taskComplete: z
38 |     .boolean()
39 |     .describe(
40 |       "Whether the task is complete. If true, the task is complete and no more steps are needed. If you chose to close the task because the goal is not achievable, set this to false.",
41 |     ),
42 | });
43 | 
44 | export type OperatorResponse = z.infer<typeof operatorResponseSchema>;
45 | 
46 | export const operatorSummarySchema = z.object({
47 |   answer: z.string().describe("The final answer to the original instruction."),
48 | });
49 | 
50 | export type OperatorSummary = z.infer<typeof operatorSummarySchema>;
51 | 


--------------------------------------------------------------------------------
/types/page.ts:
--------------------------------------------------------------------------------
 1 | import type {
 2 |   Browser as PlaywrightBrowser,
 3 |   BrowserContext as PlaywrightContext,
 4 |   Page as PlaywrightPage,
 5 | } from "@playwright/test";
 6 | import { z } from "zod";
 7 | import type {
 8 |   ActOptions,
 9 |   ActResult,
10 |   ExtractOptions,
11 |   ExtractResult,
12 |   ObserveOptions,
13 |   ObserveResult,
14 | } from "./stagehand";
15 | 
16 | export const defaultExtractSchema = z.object({
17 |   extraction: z.string(),
18 | });
19 | 
20 | export const pageTextSchema = z.object({
21 |   page_text: z.string(),
22 | });
23 | 
24 | export interface Page extends Omit<PlaywrightPage, "on"> {
25 |   act(action: string): Promise<ActResult>;
26 |   act(options: ActOptions): Promise<ActResult>;
27 |   act(observation: ObserveResult): Promise<ActResult>;
28 | 
29 |   extract(
30 |     instruction: string,
31 |   ): Promise<ExtractResult<typeof defaultExtractSchema>>;
32 |   extract<T extends z.AnyZodObject>(
33 |     options: ExtractOptions<T>,
34 |   ): Promise<ExtractResult<T>>;
35 |   extract(): Promise<ExtractResult<typeof pageTextSchema>>;
36 | 
37 |   observe(): Promise<ObserveResult[]>;
38 |   observe(instruction: string): Promise<ObserveResult[]>;
39 |   observe(options?: ObserveOptions): Promise<ObserveResult[]>;
40 | 
41 |   on: {
42 |     (event: "popup", listener: (page: Page) => unknown): Page;
43 |   } & PlaywrightPage["on"];
44 | }
45 | 
46 | // Empty type for now, but will be used in the future
47 | export type BrowserContext = PlaywrightContext;
48 | 
49 | // Empty type for now, but will be used in the future
50 | export type Browser = PlaywrightBrowser;
51 | 


--------------------------------------------------------------------------------
/types/playwright.ts:
--------------------------------------------------------------------------------
 1 | export class PlaywrightCommandException extends Error {
 2 |   constructor(message: string) {
 3 |     super(message);
 4 |     this.name = "PlaywrightCommandException";
 5 |   }
 6 | }
 7 | 
 8 | export class PlaywrightCommandMethodNotSupportedException extends Error {
 9 |   constructor(message: string) {
10 |     super(message);
11 |     this.name = "PlaywrightCommandMethodNotSupportedException";
12 |   }
13 | }
14 | 
15 | export interface GotoOptions {
16 |   timeout?: number;
17 |   waitUntil?: "load" | "domcontentloaded" | "networkidle" | "commit";
18 |   referer?: string;
19 | }
20 | 


--------------------------------------------------------------------------------
/types/stagehandApiErrors.ts:
--------------------------------------------------------------------------------
 1 | export class StagehandAPIError extends Error {
 2 |   constructor(message: string) {
 3 |     super(message);
 4 |     this.name = this.constructor.name;
 5 |   }
 6 | }
 7 | 
 8 | export class StagehandAPIUnauthorizedError extends StagehandAPIError {
 9 |   constructor(message?: string) {
10 |     super(message || "Unauthorized request");
11 |   }
12 | }
13 | 
14 | export class StagehandHttpError extends StagehandAPIError {
15 |   constructor(message: string) {
16 |     super(message);
17 |   }
18 | }
19 | 
20 | export class StagehandServerError extends StagehandAPIError {
21 |   constructor(message: string) {
22 |     super(message);
23 |   }
24 | }
25 | 
26 | export class StagehandResponseBodyError extends StagehandAPIError {
27 |   constructor() {
28 |     super("Response body is null");
29 |   }
30 | }
31 | 
32 | export class StagehandResponseParseError extends StagehandAPIError {
33 |   constructor(message: string) {
34 |     super(message);
35 |   }
36 | }
37 | 


--------------------------------------------------------------------------------
/types/textannotation.ts:
--------------------------------------------------------------------------------
1 | export type TextAnnotation = {
2 |   text: string;
3 |   bottom_left: { x: number; y: number };
4 |   bottom_left_normalized: { x: number; y: number };
5 |   width: number;
6 |   height: number;
7 | };
8 | 


--------------------------------------------------------------------------------