├── tests
    ├── __init__.py
    ├── assistantbench
    │   ├── __init__.py
    │   ├── test_env_general.py
    │   └── test_evaluation.py
    ├── core
    │   ├── data
    │   │   ├── screenshot.png
    │   │   ├── input_type
    │   │   │   ├── img_submit.gif
    │   │   │   ├── button_input.html
    │   │   │   ├── file_input.html
    │   │   │   ├── submit_nn_input.html
    │   │   │   ├── email_input.html
    │   │   │   ├── search_input.html
    │   │   │   ├── number_step_input.html
    │   │   │   ├── url_input.html
    │   │   │   ├── week_input.html
    │   │   │   ├── range_input.html
    │   │   │   ├── radio_input.html
    │   │   │   ├── date_input.html
    │   │   │   ├── hidden_field_input.html
    │   │   │   ├── number_input.html
    │   │   │   ├── telephone_input.html
    │   │   │   ├── color_picker_input.html
    │   │   │   ├── date_time_local_input.html
    │   │   │   ├── image_input.html
    │   │   │   ├── password_input.html
    │   │   │   ├── time_input.html
    │   │   │   ├── text_input.html
    │   │   │   ├── checkbox_input.html
    │   │   │   ├── submit_input.html
    │   │   │   ├── date_min_max_input.html
    │   │   │   └── reset_input.html
    │   │   ├── hover.html
    │   │   ├── textbox.html
    │   │   ├── basic_shadow_iframe_site
    │   │   │   ├── inner-iframe.html
    │   │   │   ├── basic_iframe_2.html
    │   │   │   ├── basic_iframe.html
    │   │   │   └── outer-iframe.html
    │   │   ├── basic_iframe_site
    │   │   │   ├── basic_iframe_2.html
    │   │   │   ├── inner-iframe.html
    │   │   │   ├── outer-iframe.html
    │   │   │   └── basic_iframe.html
    │   │   ├── dblclick.html
    │   │   ├── lots_of_iframes.html
    │   │   ├── basic_shadow_dom_site
    │   │   │   ├── simple_shadow_dom.html
    │   │   │   └── basic_shadow_dom.html
    │   │   ├── test_page.html
    │   │   ├── test_page_2.html
    │   │   ├── example.html
    │   │   ├── obstructed_checkbox_page.html
    │   │   └── long_page.html
    │   ├── __init__.py
    │   ├── test_actions_python.py
    │   ├── test_registration.py
    │   └── test_task.py
    ├── experiments
    │   ├── __init__.py
    │   ├── test_bgym.py
    │   └── test_exp_loop.py
    ├── miniwob
    │   ├── __init__.py
    │   ├── test_use-colorwheel-2.py
    │   ├── test_click-scroll-list.py
    │   ├── test_click-menu-2.py
    │   └── test_base.py
    ├── webarena
    │   ├── __init__.py
    │   ├── test_instance.py
    │   ├── test_env_general.py
    │   └── test_infeasible.py
    ├── visualwebarena
    │   ├── __init__.py
    │   ├── test_vwa_domains.py
    │   ├── test_vwa_tasks_with_reset.py
    │   └── test_vwa_tasks_without_reset.py
    └── utils.py
├── demo_agent
    ├── requirements.txt
    ├── README.md
    ├── environment.yml
    └── run_demo.py
├── browsergym
    ├── miniwob
    │   ├── requirements.txt
    │   ├── pyproject.toml
    │   ├── README.md
    │   └── src
    │   │   └── browsergym
    │   │       └── miniwob
    │   │           └── __init__.py
    ├── webarena
    │   ├── src
    │   │   └── browsergym
    │   │   │   └── webarena
    │   │   │       ├── config.py
    │   │   │       └── __init__.py
    │   ├── requirements.txt
    │   ├── pyproject.toml
    │   └── README.md
    ├── assistantbench
    │   ├── requirements.txt
    │   ├── src
    │   │   └── browsergym
    │   │   │   └── assistantbench
    │   │   │       ├── evaluation
    │   │   │           ├── evaluate_utils
    │   │   │           │   ├── evaluate_factory.py
    │   │   │           │   ├── evaluate_numbers.py
    │   │   │           │   ├── utils.py
    │   │   │           │   └── evaluate_dicts.py
    │   │   │           └── evaluator.py
    │   │   │       ├── __init__.py
    │   │   │       ├── utils.py
    │   │   │       └── task.py
    │   ├── README.md
    │   └── pyproject.toml
    ├── experiments
    │   ├── requirements.txt
    │   ├── src
    │   │   ├── browsergym
    │   │   │   └── experiments
    │   │   │   │   ├── benchmark
    │   │   │   │       ├── __init__.py
    │   │   │   │       └── metadata
    │   │   │   │       │   └── scripts.py
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── utils.py
    │   │   │   │   └── agent.py
    │   │   └── bgym
    │   │   │   └── __init__.py
    │   ├── README.md
    │   └── pyproject.toml
    ├── webarenalite
    │   ├── requirements.txt
    │   ├── src
    │   │   └── browsergym
    │   │   │   └── webarenalite
    │   │   │       ├── __init__.py
    │   │   │       ├── config.py
    │   │   │       └── task.py
    │   └── pyproject.toml
    ├── visualwebarena
    │   ├── requirements.txt
    │   ├── src
    │   │   └── browsergym
    │   │   │   └── visualwebarena
    │   │   │       ├── __init__.py
    │   │   │       ├── config.py
    │   │   │       ├── utils.py
    │   │   │       └── instance.py
    │   ├── pyproject.toml
    │   └── README.md
    ├── core
    │   ├── src
    │   │   └── browsergym
    │   │   │   └── core
    │   │   │       ├── chat_files
    │   │   │           ├── assistant.png
    │   │   │           └── img
    │   │   │           │   └── send.svg
    │   │   │       ├── constants.py
    │   │   │       ├── action
    │   │   │           ├── __init__.py
    │   │   │           ├── base.py
    │   │   │           ├── parsers.py
    │   │   │           └── python.py
    │   │   │       ├── __init__.py
    │   │   │       ├── javascript
    │   │   │           └── frame_unmark_elements.js
    │   │   │       ├── registration.py
    │   │   │       ├── task.py
    │   │   │       ├── chat.py
    │   │   │       └── spaces.py
    │   ├── requirements.txt
    │   ├── README.md
    │   └── pyproject.toml
    └── pyproject.toml
├── docs
    ├── src
    │   ├── core
    │   │   ├── observation_space.rst
    │   │   └── core.rst
    │   ├── api.rst
    │   ├── tutorials.rst
    │   ├── index.rst
    │   ├── examples
    │   │   ├── walkthrough.rst
    │   │   └── create_custom_task.rst
    │   ├── environments
    │   │   ├── webarena.rst
    │   │   ├── miniwob.rst
    │   │   └── workarena.rst
    │   ├── usage.rst
    │   └── conf.py
    ├── requirements.txt
    └── Makefile
├── dev
    ├── environment.yaml
    └── requirements.txt
├── .github
    ├── actions
    │   └── setup-python-uv
    │   │   └── action.yml
    └── workflows
    │   └── pypi.yml
├── LICENSE
├── pyproject.toml
├── .readthedocs.yaml
├── .pre-commit-config.yaml
├── Makefile
└── .gitignore


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/assistantbench/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/demo_agent/requirements.txt:
--------------------------------------------------------------------------------
1 | browsergym
2 | openai
3 | 


--------------------------------------------------------------------------------
/browsergym/miniwob/requirements.txt:
--------------------------------------------------------------------------------
1 | browsergym-core==0.14.3.dev1
2 | 


--------------------------------------------------------------------------------
/browsergym/webarena/src/browsergym/webarena/config.py:
--------------------------------------------------------------------------------
1 | TASK_IDS = range(812)
2 | 


--------------------------------------------------------------------------------
/browsergym/webarena/requirements.txt:
--------------------------------------------------------------------------------
1 | browsergym-core==0.14.3.dev1
2 | libwebarena==0.0.4
3 | 


--------------------------------------------------------------------------------
/browsergym/assistantbench/requirements.txt:
--------------------------------------------------------------------------------
1 | browsergym-core==0.14.3.dev1
2 | datasets
3 | scipy
4 | numpy
5 | 


--------------------------------------------------------------------------------
/browsergym/experiments/requirements.txt:
--------------------------------------------------------------------------------
1 | browsergym-core==0.14.3.dev1
2 | tiktoken>=0.4
3 | dataclasses-json
4 | 


--------------------------------------------------------------------------------
/tests/core/data/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ServiceNow/BrowserGym/HEAD/tests/core/data/screenshot.png


--------------------------------------------------------------------------------
/browsergym/webarenalite/requirements.txt:
--------------------------------------------------------------------------------
1 | browsergym-core==0.14.3.dev1
2 | browsergym-webarena==0.14.3.dev1
3 | libwebarena==0.0.4
4 | 


--------------------------------------------------------------------------------
/tests/core/__init__.py:
--------------------------------------------------------------------------------
1 | # bugfix: use same playwright instance in browsergym and pytest
2 | from ..utils import setup_playwright
3 | 


--------------------------------------------------------------------------------
/tests/experiments/__init__.py:
--------------------------------------------------------------------------------
1 | # bugfix: use same playwright instance in browsergym and pytest
2 | from ..utils import setup_playwright
3 | 


--------------------------------------------------------------------------------
/tests/miniwob/__init__.py:
--------------------------------------------------------------------------------
1 | # bugfix: use same playwright instance in browsergym and pytest
2 | from ..utils import setup_playwright
3 | 


--------------------------------------------------------------------------------
/tests/webarena/__init__.py:
--------------------------------------------------------------------------------
1 | # bugfix: use same playwright instance in browsergym and pytest
2 | from ..utils import setup_playwright
3 | 


--------------------------------------------------------------------------------
/tests/core/data/input_type/img_submit.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ServiceNow/BrowserGym/HEAD/tests/core/data/input_type/img_submit.gif


--------------------------------------------------------------------------------
/tests/visualwebarena/__init__.py:
--------------------------------------------------------------------------------
1 | # bugfix: use same playwright instance in browsergym and pytest
2 | from ..utils import setup_playwright
3 | 


--------------------------------------------------------------------------------
/browsergym/visualwebarena/requirements.txt:
--------------------------------------------------------------------------------
1 | browsergym-core==0.14.3.dev1
2 | browsergym-webarena
3 | libvisualwebarena==0.0.15
4 | requests
5 | torch
6 | 


--------------------------------------------------------------------------------
/demo_agent/README.md:
--------------------------------------------------------------------------------
1 | ## Install demo-agent
2 | 
3 | conda env create -f environment.yml
4 | conda activate demo-agent
5 | playwright install chromium
6 | 


--------------------------------------------------------------------------------
/browsergym/experiments/src/browsergym/experiments/benchmark/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import Benchmark, HighLevelActionSetArgs
2 | from .configs import DEFAULT_BENCHMARKS
3 | 


--------------------------------------------------------------------------------
/docs/src/core/observation_space.rst:
--------------------------------------------------------------------------------
1 | Observation space
2 | _________________
3 | 
4 | For more details refer to the `WorkArena paper <https://arxiv.org/abs/2403.07718>`_.
5 | 


--------------------------------------------------------------------------------
/browsergym/core/src/browsergym/core/chat_files/assistant.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ServiceNow/BrowserGym/HEAD/browsergym/core/src/browsergym/core/chat_files/assistant.png


--------------------------------------------------------------------------------
/browsergym/experiments/src/browsergym/experiments/__init__.py:
--------------------------------------------------------------------------------
1 | from .agent import Agent, AgentInfo
2 | from .loop import AbstractAgentArgs, EnvArgs, ExpArgs, get_exp_result
3 | 


--------------------------------------------------------------------------------
/browsergym/core/requirements.txt:
--------------------------------------------------------------------------------
1 | playwright==1.44
2 | gymnasium>=0.27
3 | numpy>=1.14
4 | pyparsing>=3
5 | Pillow>=10.1
6 | beautifulsoup4>=4.12
7 | lxml>=4.9,<6.0.0
8 | mcp[cli]>=1.6.0
9 | 


--------------------------------------------------------------------------------
/dev/environment.yaml:
--------------------------------------------------------------------------------
 1 | name: browsergym-dev
 2 | 
 3 | channels:
 4 |   - huggingface
 5 |   - conda-forge
 6 |   - defaults
 7 | 
 8 | dependencies:
 9 |   - python>=3.10
10 |   - pip
11 | 
12 |   - pip:
13 |       - -r requirements.txt


--------------------------------------------------------------------------------
/tests/core/data/input_type/button_input.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <body>
 4 | 
 5 | <h2>Input Button</h2>
 6 | 
 7 | <input type="button" onclick="alert('Hello World!')" value="Click Me!">
 8 | 
 9 | </body>
10 | </html>
11 | 


--------------------------------------------------------------------------------
/demo_agent/environment.yml:
--------------------------------------------------------------------------------
 1 | name: demo-agent
 2 | 
 3 | channels:
 4 |   - huggingface
 5 |   - conda-forge
 6 |   - defaults
 7 | 
 8 | dependencies:
 9 |   - python>=3.10
10 |   - pip
11 | 
12 |   - pip:
13 |       - -r requirements.txt
14 | 


--------------------------------------------------------------------------------
/tests/core/data/hover.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | 
 4 | <body>
 5 | 
 6 |     <input type="button" onmouseover="this.value='Hello world!';" onmouseout="this.value='Hover me';" value="Hover me">
 7 | 
 8 | </body>
 9 | 
10 | </html>
11 | 


--------------------------------------------------------------------------------
/tests/experiments/test_bgym.py:
--------------------------------------------------------------------------------
 1 | import bgym
 2 | import pytest
 3 | 
 4 | 
 5 | def test_classes():
 6 |     bgym.EnvArgs(task_name="something")
 7 |     bgym.HighLevelActionSet()
 8 |     with pytest.raises(TypeError):
 9 |         bgym.Agent()
10 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
 1 | sphinx==7.3.7
 2 | # sphinx-rtd-theme==2.0.0
 3 | pydata-sphinx-theme == 0.15.3
 4 | sphinx_design==0.6.0
 5 | -e browsergym/core/
 6 | -e browsergym/miniwob/
 7 | -e browsergym/webarena/
 8 | -e browsergym/visualwebarena/
 9 | -e browsergym/experiments/
10 | 


--------------------------------------------------------------------------------
/browsergym/core/src/browsergym/core/constants.py:
--------------------------------------------------------------------------------
1 | BROWSERGYM_ID_ATTRIBUTE = "bid"  # Playwright's default is "data-testid"
2 | BROWSERGYM_VISIBILITY_ATTRIBUTE = "browsergym_visibility_ratio"
3 | BROWSERGYM_SETOFMARKS_ATTRIBUTE = "browsergym_set_of_marks"
4 | 
5 | EXTRACT_OBS_MAX_TRIES = 5
6 | 


--------------------------------------------------------------------------------
/browsergym/core/src/browsergym/core/action/__init__.py:
--------------------------------------------------------------------------------
 1 | _DEMO_MODE = False
 2 | 
 3 | 
 4 | def set_global_demo_mode(demo_mode: bool):
 5 |     global _DEMO_MODE
 6 |     _DEMO_MODE = demo_mode
 7 | 
 8 | 
 9 | def get_global_demo_mode():
10 |     global _DEMO_MODE
11 |     return _DEMO_MODE
12 | 


--------------------------------------------------------------------------------
/tests/core/data/textbox.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | 
 4 | <head>
 5 |     <title>Simple HTML Page</title>
 6 | </head>
 7 | 
 8 | <body>
 9 |     <label for="textbox">Enter some text:</label>
10 |     <input type="text" id="textbox" name="textbox">
11 | </body>
12 | 
13 | </html>
14 | 


--------------------------------------------------------------------------------
/browsergym/core/README.md:
--------------------------------------------------------------------------------
 1 | # BrowserGym core
 2 | 
 3 | This package provides `browsergym.core`, which provides the core functionalities of [BrowserGym](https://github.com/ServiceNow/BrowserGym).
 4 | 
 5 | ## Setup
 6 | 
 7 | 1. Install the package
 8 | ```sh
 9 | pip install browsergym-core
10 | ```
11 | 


--------------------------------------------------------------------------------
/tests/core/data/basic_shadow_iframe_site/inner-iframe.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <title>Inner Iframe</title>
 5 | </head>
 6 | <body>
 7 |     <h2>Iframe Level 2</h2>
 8 |     <input type="text" name="inner-textfield" placeholder="Enter text here">
 9 |     <input type="checkbox" name="checkbox" id="checkbox" checked>
10 | 
11 | </body>
12 | </html>
13 | 


--------------------------------------------------------------------------------
/browsergym/experiments/README.md:
--------------------------------------------------------------------------------
 1 | # BrowserGym experiments
 2 | 
 3 | This package provides `browsergym.experiments`, a suite of experimentation tools for [BrowserGym](https://github.com/ServiceNow/BrowserGym).
 4 | 
 5 | As a convenience namespace, it also provides `bgym`.
 6 | 
 7 | ## Setup
 8 | 
 9 | 1. Install the package
10 | ```sh
11 | pip install browsergym-experiments
12 | ```
13 | 


--------------------------------------------------------------------------------
/tests/core/data/basic_shadow_iframe_site/basic_iframe_2.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <title>Simple Website</title>
 5 | </head>
 6 | <body>
 7 |     <input type="checkbox" name="checkbox" id="checkbox">
 8 |     <label for="checkbox">Check me</label>
 9 |     <input type="text" name="textfield" placeholder="Enter text here">
10 |     <iframe src="outer-iframe.html"></iframe>
11 | </body>
12 | </html>
13 | 


--------------------------------------------------------------------------------
/docs/src/api.rst:
--------------------------------------------------------------------------------
 1 | API
 2 | ===
 3 | 
 4 | Core
 5 | ____
 6 | 
 7 | .. toctree::
 8 |     core/core.rst
 9 | 
10 | 
11 | Action and Observation Spaces
12 | _____________________________
13 | 
14 | .. toctree::
15 |    core/action_space.rst
16 |    core/observation_space.rst
17 | 
18 | Environments
19 | ____________
20 | 
21 | .. toctree::
22 |     environments/miniwob.rst
23 |     environments/webarena.rst
24 |     environments/workarena.rst
25 | 


--------------------------------------------------------------------------------
/tests/core/data/basic_iframe_site/basic_iframe_2.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <title>Simple Website</title>
 5 | </head>
 6 | <body>
 7 |     <input type="checkbox" name="checkbox" id="checkbox">
 8 |     <label for="checkbox">Check me</label>
 9 |     <input type="text" name="textfield" placeholder="Enter text here">
10 |     <iframe src="outer-iframe.html" width="800" height="600"></iframe>
11 | </body>
12 | </html>
13 | 


--------------------------------------------------------------------------------
/docs/src/core/core.rst:
--------------------------------------------------------------------------------
 1 | BrowserGym API
 2 | ^^^^^^^^^^^^^^
 3 | 
 4 | BrowserEnv
 5 | """"""""""
 6 | 
 7 | .. autoclass:: browsergym.core.env.BrowserEnv
 8 |    :members:
 9 |    :show-inheritance:
10 | 
11 | Chat
12 | """"
13 | 
14 | .. autoclass:: browsergym.core.env.Chat
15 |    :members:
16 |    :show-inheritance:
17 | 
18 | Task
19 | """"
20 | 
21 | .. autoclass:: browsergym.core.task.AbstractBrowserTask
22 |    :members:
23 |    :show-inheritance:
24 | 
25 | 


--------------------------------------------------------------------------------
/tests/core/data/input_type/file_input.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <body>
 4 | 
 5 | <h1>File upload</h1>
 6 | 
 7 | <p>Show a file-select field which allows a file to be chosen for upload:</p>
 8 | <form action="https://www.w3schools.com/action_page.php">
 9 |   <label for="myfile">Select a file:</label>
10 |   <input type="file" id="myfile" name="myfile"><br><br>
11 |   <input type="submit" value="Submit">
12 | </form>
13 | 
14 | </body>
15 | </html>
16 | 


--------------------------------------------------------------------------------
/tests/core/data/input_type/submit_nn_input.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <body>
 4 | 
 5 | <form action="https://www.w3schools.com/action_page.php">
 6 |   <label for="fname">First name:</label><br>
 7 |   <input type="text" id="fname" name="fname" value="John"><br>
 8 |   <label for="lname">Last name:</label><br>
 9 |   <input type="text" id="lname" name="lname" value="Doe"><br><br>
10 |   <input type="submit">
11 | </form>
12 | 
13 | </body>
14 | </html>
15 | 


--------------------------------------------------------------------------------
/tests/core/data/dblclick.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <body>
 4 |     <button id="dblclick-counter" ondblclick="incrementCounter()">
 5 |         Double-click me! Count: <span id="count">0</span>
 6 |     </button>
 7 | 
 8 |     <script>
 9 |         let count = 0;
10 |         function incrementCounter() {
11 |             count++;
12 |             document.getElementById('count').textContent = count;
13 |         }
14 |     </script>
15 | </body>
16 | </html>
17 | 


--------------------------------------------------------------------------------
/tests/core/data/input_type/email_input.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <body>
 4 | 
 5 | <h2>Email Field</h2>
 6 | 
 7 | <p>The <strong>input type="email"</strong> is used for input fields that should contain an e-mail address:</p>
 8 | 
 9 | <form action="https://www.w3schools.com/action_page.php">
10 |   <label for="email">Enter your email:</label>
11 |   <input type="email" id="email" name="email">
12 |   <input type="submit" value="Submit">
13 | </form>
14 | 
15 | </body>
16 | </html>
17 | 


--------------------------------------------------------------------------------
/tests/core/data/input_type/search_input.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <body>
 4 | 
 5 | <h2>Search Field</h2>
 6 | <p>The <strong>input type="search"</strong> is used for search fields (behaves like a regular text field):</p>
 7 | 
 8 | <form action="https://www.w3schools.com/action_page.php">
 9 |   <label for="gsearch">Search Google:</label>
10 |   <input type="search" id="gsearch" name="gsearch">
11 |   <input type="submit" value="Submit">
12 | </form>
13 | 
14 | </body>
15 | </html>
16 | 


--------------------------------------------------------------------------------
/tests/core/data/input_type/number_step_input.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <body>
 4 | 
 5 | <h2>Numeric Steps</h2>
 6 | 
 7 | <p>Depending on browser support: Fixed steps will apply in the input field.</p>
 8 | 
 9 | <form action="https://www.w3schools.com/action_page.php">
10 |   <label for="quantity">Quantity:</label>
11 |   <input type="number" id="quantity" name="quantity" min="0" max="100" step="10" value="30">
12 |   <input type="submit" value="Submit">
13 | </form>
14 | 
15 | </body>
16 | </html>
17 | 


--------------------------------------------------------------------------------
/tests/core/data/input_type/url_input.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <body>
 4 | 
 5 | <h1>Display a URL Input Field</h1>
 6 | 
 7 | <p>The <strong>input type="url"</strong> is used for input fields that should contain a URL address:</p>
 8 | 
 9 | <form action="https://www.w3schools.com/action_page.php">
10 |   <label for="homepage">Add your homepage:</label>
11 |   <input type="url" id="homepage" name="homepage">
12 |   <input type="submit" value="Submit">
13 | </form>
14 | 
15 | </body>
16 | </html>
17 | 


--------------------------------------------------------------------------------
/tests/core/data/input_type/week_input.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <body>
 4 | 
 5 | <h1>Display a URL Input Field</h1>
 6 | 
 7 | <p>The <strong>input type="url"</strong> is used for input fields that should contain a URL address:</p>
 8 | 
 9 | <form action="https://www.w3schools.com/action_page.php">
10 |   <label for="homepage">Add your homepage:</label>
11 |   <input type="url" id="homepage" name="homepage">
12 |   <input type="submit" value="Submit">
13 | </form>
14 | 
15 | </body>
16 | </html>
17 | 


--------------------------------------------------------------------------------
/tests/core/data/lots_of_iframes.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | 
 4 | <head>
 5 |     <title>Lots of Iframes</title>
 6 | </head>
 7 | 
 8 | <body>
 9 |     <script>
10 |         for (let i = 0; i < 30; i++) {
11 |             const iframe = document.createElement('iframe');
12 |             iframe.srcdoc = `
13 | This is IFrame ${i}.
14 | <input type='checkbox' id='checkbox${i}'>
15 | `
16 |             document.body.appendChild(iframe);
17 |         }
18 |     </script>
19 | </body>
20 | 
21 | </html>
22 | 


--------------------------------------------------------------------------------
/tests/core/data/input_type/range_input.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <body>
 4 | 
 5 | <h2>Range Field</h2>
 6 | 
 7 | <p>Depending on browser support: The input type "range" can be displayed as a slider control.</p>
 8 | 
 9 | <form action="https://www.w3schools.com/action_page.php" method="get">
10 |   <label for="vol">Volume (between 0 and 50):</label>
11 |   <input type="range" id="vol" name="vol" min="0" max="50">
12 |   <input type="submit" value="Submit">
13 | </form>
14 | 
15 | </body>
16 | </html>
17 | 


--------------------------------------------------------------------------------
/tests/core/data/input_type/radio_input.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <body>
 4 | 
 5 | <h2>Radio Buttons</h2>
 6 | 
 7 | <p>The <strong>input type="radio"</strong> defines a radio button:</p>
 8 | 
 9 | <p>Choose your favorite Web language:</p>
10 | <form action="https://www.w3schools.com/action_page.php">
11 | 
12 |   <input type="radio" id="css" name="fav_language" value="CSS">
13 |   <label for="css">CSS</label><br>
14 | 
15 |   <input type="submit" value="Submit">
16 | </form>
17 | 
18 | </body>
19 | </html>
20 | 


--------------------------------------------------------------------------------
/dev/requirements.txt:
--------------------------------------------------------------------------------
 1 | black[jupyter]==24.2.0
 2 | blacken-docs
 3 | pre-commit
 4 | pytest==7.3.2
 5 | pytest-xdist
 6 | pytest-playwright
 7 | tenacity
 8 | -e ../browsergym/core # local package
 9 | -e ../browsergym/miniwob # local package
10 | -e ../browsergym/webarena # local package
11 | -e ../browsergym/visualwebarena # local package
12 | -e ../browsergym/experiments # local package
13 | -e ../browsergym/assistantbench # local package
14 | -e ../browsergym/webarenalite # local package
15 | browsergym-workarena
16 | weblinx_browsergym
17 | 


--------------------------------------------------------------------------------
/browsergym/experiments/src/bgym/__init__.py:
--------------------------------------------------------------------------------
 1 | from browsergym.core.action.base import AbstractActionSet
 2 | from browsergym.core.action.highlevel import HighLevelActionSet
 3 | from browsergym.core.action.python import PythonActionSet
 4 | from browsergym.experiments.agent import Agent, AgentInfo
 5 | from browsergym.experiments.benchmark import DEFAULT_BENCHMARKS, Benchmark, HighLevelActionSetArgs
 6 | from browsergym.experiments.loop import (
 7 |     AbstractAgentArgs,
 8 |     EnvArgs,
 9 |     ExpArgs,
10 |     ExpResult,
11 |     StepInfo,
12 |     StepTimestamps,
13 | )
14 | 


--------------------------------------------------------------------------------
/tests/core/data/input_type/date_input.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <body>
 4 | 
 5 | <h2>Date Field</h2>
 6 | 
 7 | <p>The <strong>input type="date"</strong> is used for input fields that should contain a date.</p>
 8 | 
 9 | <form action="https://www.w3schools.com/action_page.php">
10 |   <label for="birthday">Birthday:</label>
11 |   <input type="date" id="birthday" name="birthday">
12 |   <input type="submit" value="Submit">
13 | </form>
14 | 
15 | <p><strong>Note:</strong> type="date" is not supported in Internet Explorer 11 or prior Safari 14.1.</p>
16 | 
17 | </body>
18 | </html>
19 | 


--------------------------------------------------------------------------------
/tests/utils.py:
--------------------------------------------------------------------------------
 1 | import browsergym.core
 2 | import logging
 3 | import playwright.sync_api
 4 | import pytest
 5 | 
 6 | 
 7 | # setup code, executed ahead of first test
 8 | @pytest.fixture(scope="session", autouse=True)
 9 | def setup_playwright(playwright: playwright.sync_api.Playwright):
10 |     # bugfix: re-use pytest-playwright's playwright instance in browsergym
11 |     # https://github.com/microsoft/playwright-python/issues/2053
12 |     browsergym.core._set_global_playwright(playwright)
13 |     logging.info("Browsergym is using the playwright instance provided by pytest-playwright.")
14 | 


--------------------------------------------------------------------------------
/tests/core/data/input_type/hidden_field_input.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <body>
 4 | 
 5 | <h1>A Hidden Field (look in source code)</h1>
 6 | 
 7 | <form action="https://www.w3schools.com/action_page.php">
 8 |   <label for="fname">First name:</label>
 9 |   <input type="text" id="fname" name="fname"><br><br>
10 |   <input type="hidden" id="custId" name="custId" value="3487">
11 |   <input type="submit" value="Submit">
12 | </form>
13 | 
14 | <p><strong>Note:</strong> The hidden field is not shown to the user, but the data is sent when the form is submitted.</p>
15 | 
16 | </body>
17 | </html>
18 | 


--------------------------------------------------------------------------------
/tests/core/data/input_type/number_input.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <body>
 4 | 
 5 | <h2>Number Field</h2>
 6 | 
 7 | <p>The <strong>input type="number"</strong> defines a numeric input field.</p>
 8 | 
 9 | <p>You can use the min and max attributes to add numeric restrictions in the input field:</p>
10 | 
11 | <form action="https://www.w3schools.com/action_page.php">
12 |   <label for="quantity">Quantity (between 1 and 5):</label>
13 |   <input type="number" id="quantity" name="quantity" min="1" max="5">
14 |   <input type="submit" value="Submit">
15 | </form>
16 | 
17 | </body>
18 | </html>
19 | 


--------------------------------------------------------------------------------
/.github/actions/setup-python-uv/action.yml:
--------------------------------------------------------------------------------
 1 | name: 'Setup Python and uv'
 2 | description: 'Setup Python 3.12 and install uv package manager'
 3 | runs:
 4 |   using: 'composite'
 5 |   steps:
 6 |     - name: Set up Python
 7 |       uses: actions/setup-python@v5
 8 |       with:
 9 |         python-version: "3.12"
10 | 
11 |     - name: Install uv
12 |       uses: astral-sh/setup-uv@v7
13 |       with:
14 |         # Install a specific version of uv.
15 |         version: "0.9.17"
16 |         enable-cache: true
17 | 
18 |     - name: Create virtual environment
19 |       shell: bash
20 |       run: uv venv
21 | 


--------------------------------------------------------------------------------
/docs/src/tutorials.rst:
--------------------------------------------------------------------------------
 1 | Tutorials
 2 | =========
 3 | 
 4 | This section provides tutorials to help build new environments and tasks.
 5 | 
 6 | .. grid:: 2
 7 |     :gutter: 2
 8 | 
 9 |     .. grid-item-card:: Walkthrough
10 |         :link: examples/walkthrough.html
11 | 
12 |         :bdg-primary:`Getting started`
13 | 
14 |     .. grid-item-card:: Create a custom task
15 |         :link: examples/create_custom_task.html
16 | 
17 |         :bdg-primary:`Custom task`
18 | 
19 | 
20 | 
21 | .. toctree::
22 |     :maxdepth: 1
23 |     :hidden:
24 |     
25 |     examples/walkthrough.rst
26 |     examples/create_custom_task.rst
27 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |    Copyright 2024 ServiceNow
 2 | 
 3 |    Licensed under the Apache License, Version 2.0 (the "License");
 4 |    you may not use this file except in compliance with the License.
 5 |    You may obtain a copy of the License at
 6 | 
 7 |      http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 |    Unless required by applicable law or agreed to in writing, software
10 |    distributed under the License is distributed on an "AS IS" BASIS,
11 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |    See the License for the specific language governing permissions and
13 |    limitations under the License.
14 | 


--------------------------------------------------------------------------------
/tests/core/data/input_type/telephone_input.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <body>
 4 | 
 5 | <h2>Telephone Field</h2>
 6 | 
 7 | <p>The <strong>input type="tel"</strong> is used for input fields that should contain a telephone number:</p>
 8 | 
 9 | <form action="https://www.w3schools.com/action_page.php">
10 |   <label for="phone">Enter a phone number:</label><br><br>
11 |   <input type="tel" id="phone" name="phone" placeholder="123-45-678" pattern="[0-9]{3}-[0-9]{2}-[0-9]{3}" required><br><br>
12 |   <small>Format: 123-45-678</small><br><br>
13 |   <input type="submit" value="Submit">
14 | </form>
15 | 
16 | </body>
17 | </html>
18 | 


--------------------------------------------------------------------------------
/tests/core/data/input_type/color_picker_input.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <body>
 4 | 
 5 | <h2>Show a Color Picker</h2>
 6 | 
 7 | <p>The <strong>input type="color"</strong> is used for input fields that should contain a color.</p>
 8 | 
 9 | <form action="https://www.w3schools.com/action_page.php">
10 |   <label for="favcolor">Select your favorite color:</label>
11 |   <input type="color" id="favcolor" name="favcolor" value="#ff0000">
12 |   <input type="submit" value="Submit">
13 | </form>
14 | 
15 | <p><b>Note:</b> type="color" is not supported in Internet Explorer 11 or Safari 9.1 (or earlier).</p>
16 | 
17 | </body>
18 | </html>
19 | 


--------------------------------------------------------------------------------
/docs/src/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to BrowserGym's documentation!
 2 | ======================================
 3 | 
 4 | **BrowserGym** is a Python library that provides a `gym environment <https://gymnasium.farama.org/>`_
 5 | for web task automation in the Chromium browser. It includes the following benchmarks by default:
 6 | `MiniWob++ <https://miniwob.farama.org/>`_, `WebArena <https://webarena.dev/>`_, `WorkArena <https://github.com/ServiceNow/WorkArena>`_.
 7 | 
 8 | .. note::
 9 | 
10 |    This project is under active development.
11 | 
12 | Contents
13 | --------
14 | 
15 | .. toctree::
16 |    :maxdepth: 2
17 | 
18 |    usage
19 |    api
20 |    tutorials
21 | 


--------------------------------------------------------------------------------
/tests/core/data/input_type/date_time_local_input.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <body>
 4 | 
 5 | <h2>Local Date Field</h2>
 6 | 
 7 | <p>The <strong>input type="datetime-local"</strong> specifies a date and time input field, with no time zone.</p>
 8 | 
 9 | <form action="https://www.w3schools.com/action_page.php">
10 |   <label for="birthdaytime">Birthday (date and time):</label>
11 |   <input type="datetime-local" id="birthdaytime" name="birthdaytime">
12 |   <input type="submit" value="Submit">
13 | </form>
14 | 
15 | <p><strong>Note:</strong> type="datetime-local" is not supported in Internet Explorer 11 or prior Safari 14.1.</p>
16 | 
17 | </body>
18 | </html>
19 | 


--------------------------------------------------------------------------------
/tests/core/data/input_type/image_input.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <body>
 4 | 
 5 | <h2>Display an Image as the Submit button</h2>
 6 | 
 7 | <form action="https://www.w3schools.com/action_page.php">
 8 |   <label for="fname">First name: </label>
 9 |   <input type="text" id="fname" name="fname"><br><br>
10 |   <label for="lname">Last name: </label>
11 |   <input type="text" id="lname" name="lname"><br><br>
12 |   <input type="image" src="img_submit.gif" alt="Submit" width="48" height="48">
13 | </form>
14 | 
15 | <p><b>Note:</b> The input type="image" sends the X and Y coordinates of the click that activated the image button.</p>
16 | 
17 | </body>
18 | </html>
19 | 


--------------------------------------------------------------------------------
/tests/core/data/input_type/password_input.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <body>
 4 | 
 5 | <h2>Password field</h2>
 6 | 
 7 | <p>The <strong>input type="password"</strong> defines a password field:</p>
 8 | 
 9 | <form action="https://www.w3schools.com/action_page.php">
10 |   <label for="username">Username:</label><br>
11 |   <input type="text" id="username" name="username"><br>
12 |   <label for="pwd">Password:</label><br>
13 |   <input type="password" id="pwd" name="pwd"><br><br>
14 |   <input type="submit" value="Submit">
15 | </form>
16 | 
17 | <p>The characters in a password field are masked (shown as asterisks or circles).</p>
18 | 
19 | </body>
20 | </html>
21 | 


--------------------------------------------------------------------------------
/tests/core/data/basic_iframe_site/inner-iframe.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | 
 4 | <head>
 5 |     <title>Inner Iframe</title>
 6 |     <style>
 7 |         body {
 8 |             width: 5000px;
 9 |             /* Set the width of the body to 5000 pixels */
10 |         }
11 |     </style>
12 | </head>
13 | 
14 | <body>
15 |     <h2>Iframe Level 2</h2>
16 |     <input type="text" name="inner-textfield" placeholder="Enter text here in iframe">
17 |     <label for="checkbox" style="display: flex; justify-content: flex-end;">
18 |         <input type="checkbox" name="checkbox" id="checkbox_2" checked>
19 |         Checkbox label
20 |     </label>
21 | </body>
22 | 
23 | </html>
24 | 


--------------------------------------------------------------------------------
/docs/src/examples/walkthrough.rst:
--------------------------------------------------------------------------------
 1 | Walkthrough
 2 | ___________
 3 | 
 4 | 
 5 | Boilerplate code to run an agent on an interactive, open-ended task:
 6 | 
 7 | .. code-block:: python
 8 | 
 9 |    import gymnasium as gym
10 |    import browsergym.core  # register the openended task as a gym environment
11 | 
12 |    env = gym.make(
13 |        "browsergym/openended", task_kwargs={"start_url": "https://www.google.com/"}, wait_for_user_message=True
14 |    )
15 | 
16 |    obs, info = env.reset()
17 |    done = False
18 |    while not done:
19 |        action = ...  # implement your agent here
20 |        obs, reward, terminated, truncated, info = env.step(action)
21 |        done = terminated or truncated


--------------------------------------------------------------------------------
/tests/core/data/input_type/time_input.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <body>
 4 | 
 5 | <h1>Show a Time Input Control</h1>
 6 | 
 7 | <p>The <strong>input type="time"</strong> allows the user to select a time (no time zone):</p>
 8 | 
 9 | <p>If the browser supports it, a time picker pops up when entering the input field.</p>
10 | 
11 | <form action="https://www.w3schools.com/action_page.php">
12 |   <label for="appt">Select a time:</label>
13 |   <input type="time" id="appt" name="appt">
14 |   <input type="submit" value="Submit">
15 | </form>
16 | 
17 | <p><strong>Note:</strong> type="time" is not supported in Internet Explorer 11 or prior Safari 14.1.</p>
18 | 
19 | </body>
20 | </html>
21 | 


--------------------------------------------------------------------------------
/browsergym/core/src/browsergym/core/chat_files/img/send.svg:
--------------------------------------------------------------------------------
1 | <svg width="14" height="13" viewBox="0 0 14 13" fill="none" xmlns="http://www.w3.org/2000/svg">
2 | <path d="M13.7038 6.04336L0.709549 0.0460291C0.528509 -0.0375275 0.315131 -0.00553368 0.166559 0.127445C0.0179865 0.260423 -0.0373753 0.468963 0.0256778 0.658123L1.97297 6.50001L0.0256778 12.3419C-0.0373753 12.5311 0.0179865 12.7396 0.166559 12.8726C0.315131 13.0056 0.528509 13.0375 0.709549 12.954L13.7038 6.95666C13.8817 6.87718 14.0001 6.69465 14.0001 6.50001C14.0001 6.3048 13.8819 6.12293 13.7038 6.04336ZM2.8604 6.00001L1.33983 1.4383L11.2235 6.00001H2.8604ZM11.2235 7.00001L1.33983 11.5617L2.8604 7.00001H11.2235Z" fill="#4F6C7B"/>
3 | </svg>
4 | 


--------------------------------------------------------------------------------
/tests/core/data/input_type/text_input.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <body>
 4 | 
 5 | <h2>Text field</h2>
 6 | <p>The <strong>input type="text"</strong> defines a one-line text input field:</p>
 7 | 
 8 | <form action="https://www.w3schools.com/action_page.php">
 9 |   <label for="fname">First name:</label><br>
10 |   <input type="text" id="fname" name="fname"><br>
11 |   <label for="lname">Last name:</label><br>
12 |   <input type="text" id="lname" name="lname"><br><br>
13 |   <input type="submit" value="Submit">
14 | </form>
15 | 
16 | <p>Note that the form itself is not visible.</p>
17 | <p>Also note that the default width of a text field is 20 characters.</p>
18 | 
19 | </body>
20 | </html>
21 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = src
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/tests/core/data/input_type/checkbox_input.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <body>
 4 | 
 5 | <h2>Checkboxes</h2>
 6 | <p>The <strong>input type="checkbox"</strong> defines a checkbox:</p>
 7 | 
 8 | <form action="https://www.w3schools.com/action_page.php">
 9 |   <input type="checkbox" id="vehicle1" name="vehicle1" value="Bike">
10 |   <label for="vehicle1"> I have a bike</label><br>
11 |   <input type="checkbox" id="vehicle2" name="vehicle2" value="Car">
12 |   <label for="vehicle2"> I have a car</label><br>
13 |   <input type="checkbox" id="vehicle3" name="vehicle3" value="Boat">
14 |   <label for="vehicle3"> I have a boat</label><br><br>
15 |   <input type="submit" value="Submit">
16 | </form>
17 | 
18 | </body>
19 | </html>
20 | 


--------------------------------------------------------------------------------
/browsergym/core/src/browsergym/core/__init__.py:
--------------------------------------------------------------------------------
 1 | __version__ = "0.14.3.dev1"
 2 | 
 3 | import playwright.sync_api
 4 | 
 5 | # we use a global playwright instance
 6 | _PLAYWRIGHT = None
 7 | 
 8 | 
 9 | def _set_global_playwright(pw: playwright.sync_api.Playwright):
10 |     global _PLAYWRIGHT
11 |     _PLAYWRIGHT = pw
12 | 
13 | 
14 | def _get_global_playwright():
15 |     global _PLAYWRIGHT
16 |     if not _PLAYWRIGHT:
17 |         pw = playwright.sync_api.sync_playwright().start()
18 |         _set_global_playwright(pw)
19 | 
20 |     return _PLAYWRIGHT
21 | 
22 | 
23 | # register the open-ended task
24 | from .registration import register_task
25 | from .task import OpenEndedTask
26 | 
27 | register_task(OpenEndedTask.get_task_id(), OpenEndedTask)
28 | 


--------------------------------------------------------------------------------
/tests/core/data/basic_shadow_dom_site/simple_shadow_dom.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |   <title>Unit Test with Complex Nested Shadow DOM</title>
 5 | </head>
 6 | <body>
 7 |   <div id="host-element"></div>
 8 | 
 9 |   <script>
10 |     // Level 1, Shadow DOM 1
11 |     const hostElement1_1 = document.getElementById("host-element");
12 |     const shadowRoot1_1 = hostElement1_1.attachShadow({ mode: 'open' });
13 |     shadowRoot1_1.innerHTML = `
14 |       <h1>Shadow DOM Level 1</h1>
15 |       <input type="text" name="level1_1-textfield1" placeholder="Level 1.1 Text Field 1">
16 |       <input type="text" name="level1_1-textfield2" placeholder="Level 1.1 Text Field 2">
17 | 
18 |     `;
19 | 
20 |   </script>
21 | </body>
22 | </html>
23 | 


--------------------------------------------------------------------------------
/tests/core/data/input_type/submit_input.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <body>
 4 | 
 5 | <h2>Submit Button</h2>
 6 | 
 7 | <p>The <strong>input type="submit"</strong> defines a button for submitting form data to a form-handler:</p>
 8 | 
 9 | <form action="https://www.w3schools.com/action_page.php">
10 |   <label for="fname">First name:</label><br>
11 |   <input type="text" id="fname" name="fname" value="John"><br>
12 |   <label for="lname">Last name:</label><br>
13 |   <input type="text" id="lname" name="lname" value="Doe"><br><br>
14 |   <input type="submit" value="Submit">
15 | </form>
16 | 
17 | <p>If you click "Submit", the form-data will be sent to a page called "https://www.w3schools.com/action_page.php".</p>
18 | 
19 | </body>
20 | </html>
21 | 


--------------------------------------------------------------------------------
/browsergym/webarena/src/browsergym/webarena/__init__.py:
--------------------------------------------------------------------------------
 1 | import nltk
 2 | 
 3 | from browsergym.core.registration import register_task
 4 | 
 5 | from . import config, task
 6 | 
 7 | # download necessary tokenizer resources
 8 | # note: deprecated punkt -> punkt_tab https://github.com/nltk/nltk/issues/3293
 9 | try:
10 |     nltk.data.find("tokenizers/punkt_tab")
11 | except:
12 |     nltk.download("punkt_tab", quiet=True, raise_on_error=True)
13 | 
14 | ALL_WEBARENA_TASK_IDS = []
15 | 
16 | # register all WebArena benchmark
17 | for task_id in config.TASK_IDS:
18 |     gym_id = f"webarena.{task_id}"
19 |     register_task(
20 |         gym_id,
21 |         task.GenericWebArenaTask,
22 |         task_kwargs={"task_id": task_id},
23 |     )
24 |     ALL_WEBARENA_TASK_IDS.append(gym_id)
25 | 


--------------------------------------------------------------------------------
/tests/core/data/input_type/date_min_max_input.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <body>
 4 | 
 5 | <h2>Date Field Restrictions</h2>
 6 | 
 7 | <p>Use the min and max attributes to add restrictions to dates:</p>
 8 | 
 9 | <form action="https://www.w3schools.com/action_page.php">
10 |   <label for="datemin">Enter a date after 2000-01-01:</label>
11 |   <input type="date" id="datemin" name="datemin" min="2000-01-02"><br><br>
12 | 
13 |   <label for="datemax">Enter a date before 1980-01-01:</label>
14 |   <input type="date" id="datemax" name="datemax" max="1979-12-31"><br><br>
15 | 
16 |   <input type="submit" value="Submit">
17 | </form>
18 | 
19 | <p><strong>Note:</strong> type="date" is not supported in Internet Explorer 11 or prior Safari 14.1.</p>
20 | 
21 | </body>
22 | </html>
23 | 


--------------------------------------------------------------------------------
/browsergym/webarenalite/src/browsergym/webarenalite/__init__.py:
--------------------------------------------------------------------------------
 1 | import nltk
 2 | 
 3 | from browsergym.core.registration import register_task
 4 | 
 5 | from . import config, task
 6 | 
 7 | # download necessary tokenizer resources
 8 | # note: deprecated punkt -> punkt_tab https://github.com/nltk/nltk/issues/3293
 9 | try:
10 |     nltk.data.find("tokenizers/punkt_tab")
11 | except:
12 |     nltk.download("punkt_tab", quiet=True, raise_on_error=True)
13 | 
14 | ALL_WEBARENA_TASK_IDS = []
15 | 
16 | # register all WebArena benchmark
17 | for task_id in config.TASK_IDS:
18 |     gym_id = f"webarenalite.{task_id}"
19 |     register_task(
20 |         gym_id,
21 |         task.WebArenaLiteTask,
22 |         task_kwargs={"task_id": task_id},
23 |     )
24 |     ALL_WEBARENA_TASK_IDS.append(gym_id)
25 | 


--------------------------------------------------------------------------------
/tests/core/data/input_type/reset_input.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <body>
 4 | 
 5 | <h2>Reset Button</h2>
 6 | 
 7 | <p>The <strong>input type="reset"</strong> defines a reset button that resets all form values to their default values:</p>
 8 | 
 9 | <form action="https://www.w3schools.com/action_page.php">
10 |   <label for="fname">First name:</label><br>
11 |   <input type="text" id="fname" name="fname" value="John"><br>
12 |   <label for="lname">Last name:</label><br>
13 |   <input type="text" id="lname" name="lname" value="Doe"><br><br>
14 |   <input type="submit" value="Submit">
15 |   <input type="reset">
16 | </form>
17 | 
18 | <p>If you change the input values and then click the "Reset" button, the form-data will be reset to the default values.</p>
19 | 
20 | </body>
21 | </html>
22 | 


--------------------------------------------------------------------------------
/tests/webarena/test_instance.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import playwright.sync_api
 3 | 
 4 | from browsergym.webarena.instance import WebArenaInstance
 5 | 
 6 | 
 7 | def test_is_reachable():
 8 |     # default URLs
 9 |     instance = WebArenaInstance()
10 |     instance.check_status()
11 | 
12 |     # unreacheable URL
13 |     with pytest.raises(RuntimeError):
14 |         instance = WebArenaInstance()
15 |         instance.urls["reddit"] = "https://invalid.url"
16 |         instance.check_status()
17 | 
18 | 
19 | @pytest.mark.parametrize(
20 |     "site", ["reddit", "shopping", "shopping_admin", "gitlab", "wikipedia", "map"]
21 | )
22 | def test_credentials(page: playwright.sync_api.Page, site: str):
23 |     # default URLs and credentials
24 |     instance = WebArenaInstance()
25 |     instance.ui_login(site=site, page=page)
26 | 
27 |     # TODO: test this more thoroughly
28 | 


--------------------------------------------------------------------------------
/tests/visualwebarena/test_vwa_domains.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import playwright.sync_api
 3 | 
 4 | from browsergym.visualwebarena.instance import VisualWebArenaInstance
 5 | 
 6 | 
 7 | def test_is_reachable():
 8 |     # default URLs
 9 |     instance = VisualWebArenaInstance()
10 |     instance.check_status()
11 | 
12 |     # unreacheable URL
13 |     with pytest.raises(RuntimeError):
14 |         instance = VisualWebArenaInstance()
15 |         instance.urls["reddit"] = "https://invalid.url"
16 |         instance.check_status()
17 | 
18 | 
19 | @pytest.mark.parametrize("site", ["reddit", "shopping", "wikipedia", "classifieds"])
20 | def test_credentials(page: playwright.sync_api.Page, site: str):
21 |     # default URLs and credentials
22 |     instance = VisualWebArenaInstance()
23 |     instance.ui_login(site=site, page=page)
24 | 
25 |     # TODO: test this more thoroughly
26 | 


--------------------------------------------------------------------------------
/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/evaluate_factory.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | 
 3 | from .evaluate_dicts import evaluate_dicts
 4 | from .evaluate_numbers import evaluate_numbers
 5 | from .evaluate_strings import evaluate_strings
 6 | 
 7 | EvaluatorFactory = {
 8 |     "string": evaluate_strings,
 9 |     "number": evaluate_numbers,
10 |     "json": evaluate_dicts,
11 |     "string list": evaluate_strings,
12 | }
13 | 
14 | EvaluatorFactoryFromType = {
15 |     str: evaluate_strings,
16 |     int: evaluate_numbers,
17 |     float: evaluate_numbers,
18 |     bool: evaluate_strings,
19 |     list: evaluate_strings,
20 | }
21 | 
22 | 
23 | def get_evaluator(evaluator: str):
24 |     return EvaluatorFactory[evaluator]
25 | 
26 | 
27 | def get_evaluator_from_gold_answer(gold_answer: Union[str, int, float]):
28 |     return EvaluatorFactoryFromType[gold_answer]
29 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "browsergym-meta"
 3 | description = "BrowserGym: a gym environment for web task automation in the Chromium browser"
 4 | dynamic = ["version"]
 5 | [tool.setuptools]
 6 | packages = []  # meta distribution, packages are included as dependencies
 7 | [tool.black]
 8 | line-length = 100
 9 | include = '\.pyi?$'
10 | exclude = '''
11 | /(
12 |     \.eggs
13 |   | \.git
14 |   | \.hg
15 |   | \.mypy_cache
16 |   | \.nox
17 |   | \.tox
18 |   | \.venv
19 |   | _build
20 |   | buck-out
21 |   | build
22 |   | dist
23 | )/
24 | '''
25 | 
26 | [tool.pytest.ini_options]
27 | filterwarnings = [
28 |     'ignore::UserWarning:gymnasium.*:',  # too many "The obs is not within the observation space." warnings.
29 | ]
30 | markers = [
31 |     "slow: marks tests as slow (deselect with '-m \"not slow\"')",
32 |     "serial: mark test to be run sequantially (deselect with '-m \"not serial\"')"
33 | ]
34 | 


--------------------------------------------------------------------------------
/tests/core/data/test_page.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <title>Simple Form</title>
 5 | </head>
 6 | <body>
 7 |     <h1>Simple Form</h1>
 8 | 
 9 |     <form>
10 |         <label for="name">Name:</label>
11 |         <input type="text" id="name" name="name"><br><br>
12 | 
13 |         <label for="email">Email:</label>
14 |         <input type="email" id="email" name="email"><br><br>
15 | 
16 |         <label for="age">Age:</label>
17 |         <input type="number" id="age" name="age"><br><br>
18 | 
19 |         <label for="message">Message:</label><br>
20 |         <textarea id="message" name="message" rows="4" cols="50"></textarea><br><br>
21 | 
22 |         <input type="checkbox" id="subscribe" name="subscribe">
23 |         <label for="subscribe">Subscribe to newsletter</label><br><br>
24 | 
25 |         <input type="submit" value="Submit">
26 |         <input type="reset" value="Reset">
27 |     </form>
28 | </body>
29 | </html>
30 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yaml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Set the OS, Python version and other tools you might need
 9 | build:
10 |   os: ubuntu-22.04
11 |   tools:
12 |     python: "3.12"
13 |     # You can also specify other tool versions:
14 |     # nodejs: "19"
15 |     # rust: "1.64"
16 |     # golang: "1.19"
17 | 
18 | # Build documentation in the "docs/" directory with Sphinx
19 | sphinx:
20 |   configuration: docs/src/conf.py
21 | 
22 | # Optionally build your docs in additional formats such as PDF and ePub
23 | # formats:
24 | #    - pdf
25 | #    - epub
26 | 
27 | # Optional but recommended, declare the Python requirements required
28 | # to build your documentation
29 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
30 | python:
31 |    install:
32 |    - requirements: docs/requirements.txt
33 | 


--------------------------------------------------------------------------------
/tests/core/test_actions_python.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from browsergym.core.action.python import PythonActionSet
 4 | 
 5 | 
 6 | ACTIONS_TO_TEST = [
 7 |     (
 8 |         """\
 9 | a = 0
10 | """,
11 |         """\
12 | a = 0
13 | """,
14 |     ),
15 |     (
16 |         """\
17 | ```
18 | a = 0
19 | ```
20 | """,
21 |         """\
22 | a = 0
23 | """,
24 |     ),
25 |     (
26 |         """\
27 | ```python
28 | a = 0
29 | ```
30 | """,
31 |         """\
32 | a = 0
33 | """,
34 |     ),
35 |     (
36 |         """\
37 | ```python
38 | a = 0
39 | ```
40 | This is an explanation
41 | ```python
42 | b = 3
43 | ```
44 | More explanations
45 | """,
46 |         """\
47 | a = 0
48 | 
49 | b = 3
50 | """,
51 |     ),
52 | ]
53 | 
54 | 
55 | @pytest.mark.parametrize("action,expected_code", ACTIONS_TO_TEST)
56 | def test_action_cleaning(action, expected_code):
57 |     action_set = PythonActionSet()
58 |     code = action_set.to_python_code(action)
59 | 
60 |     assert code == expected_code
61 | 


--------------------------------------------------------------------------------
/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/evaluate_numbers.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | # Renamed calc_z function to distance_function_log
 7 | def distance_function_log(pred: float, gold: float):
 8 |     if pred == gold == 0:
 9 |         return 1
10 |     if pred == 0:
11 |         pred = 1e-4
12 |     if gold == 0:
13 |         gold = 1e-4
14 |     if pred > gold:
15 |         return max(0, 1 - np.log(pred / gold))
16 |     else:
17 |         return max(0, 1 - np.log(gold / pred))
18 | 
19 | 
20 | def evaluate_numbers(pred: Union[float, str], gold: float):
21 |     res = None
22 |     if type(pred) != float and type(pred) != int:
23 |         try:
24 |             pred = float(pred)
25 |         except ValueError:
26 |             res = 0
27 |     if type(gold) != float and type(gold) != int:
28 |         try:
29 |             gold = float(gold)
30 |         except ValueError:
31 |             res = 0
32 |     if res is None:
33 |         res = distance_function_log(pred, gold)
34 |     return res
35 | 


--------------------------------------------------------------------------------
/browsergym/assistantbench/README.md:
--------------------------------------------------------------------------------
 1 | # AssistantBench <> BrowserGym
 2 | 
 3 | This package provides an implementation for using the [AssistantBench](https://assistantbench.github.io/) benchmark in BrowserGym.
 4 | 
 5 | Because AssistantBench includes open-ended tasks, setup is extremely easy and simply requires installing the package.
 6 | 
 7 | Please note that AssistantBench has a hidden test set, so test set predictions will need to be uploaded to the official [leaderboard](https://huggingface.co/spaces/AssistantBench/leaderboard).
 8 | 
 9 | ## Setting up
10 | 
11 | - Install the package (this is still a wip)
12 | ```
13 | pip install browsergym-assistantbench
14 | ```
15 | 
16 | - Run inference, e.g., run the following commands for demo on a simple toy task
17 | ```
18 | python demo_agent/run_demo.py --task_name assistantbench.validation.3
19 | ```
20 | 
21 | - Test set predictions will be saved to `./assistantbench-predictions-test.jsonl`. To evaluate on the official test set, upload these predictions to the official [leaderboard](https://huggingface.co/spaces/AssistantBench/leaderboard).
22 | 


--------------------------------------------------------------------------------
/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable, List, Set
 2 | 
 3 | import numpy as np
 4 | from scipy.optimize import linear_sum_assignment
 5 | 
 6 | 
 7 | def _align_bags(
 8 |     predicted: List[Set[str]],
 9 |     gold: List[Set[str]],
10 |     method: Callable[[object, object], float],
11 | ) -> List[float]:
12 |     """
13 |     Takes gold and predicted answer sets and first finds the optimal 1-1 alignment
14 |     between them and gets maximum metric values over all the answers.
15 |     """
16 |     scores = np.zeros([len(gold), len(predicted)])
17 |     for gold_index, gold_item in enumerate(gold):
18 |         for pred_index, pred_item in enumerate(predicted):
19 |             scores[gold_index, pred_index] = method(pred_item, gold_item)
20 |     row_ind, col_ind = linear_sum_assignment(-scores)
21 | 
22 |     max_scores = np.zeros([max(len(gold), len(predicted))])
23 |     for row, column in zip(row_ind, col_ind):
24 |         max_scores[row] = max(max_scores[row], scores[row, column])
25 |     return max_scores
26 | 


--------------------------------------------------------------------------------
/docs/src/environments/webarena.rst:
--------------------------------------------------------------------------------
 1 | WebArena
 2 | ^^^^^^^^
 3 | 
 4 | `BrowserGym` integrates `WebArena` enviroment. For more information about this enviroment, please refer to the `WebArena <https://webarena.dev/>`_ official documentation.
 5 | 
 6 | 
 7 | BrowserGym API  
 8 | """"""""""""""
 9 | 
10 | .. currentmodule:: browsergym
11 | 
12 | .. autosummary::
13 |    :recursive:
14 |    :toctree: generated
15 |    :caption: WebArena
16 | 
17 |    webarena
18 | 
19 | 
20 | Usage 
21 | """""
22 | 
23 | Before running the sample code, install `WebArena` by following the steps in the `docs <https://github.com/ServiceNow/BrowserGym/blob/main/webarena/README.md>`_.
24 | 
25 | .. code-block:: python
26 | 
27 |     import gym
28 |     import browsergym.webarena
29 | 
30 |     env = gym.make('browsergym/webarena.10')
31 |     obs, info = env.reset()
32 |     done = False
33 | 
34 |     while not done:
35 |         action = "noop()"
36 |         obs, reward, terminated, truncated, info = env.step(action)
37 |         print(f"Reward: {reward}, Done: {done}, Info: {info}")
38 |     
39 |     env.close()
40 | 
41 | 


--------------------------------------------------------------------------------
/browsergym/visualwebarena/src/browsergym/visualwebarena/__init__.py:
--------------------------------------------------------------------------------
 1 | import nltk
 2 | 
 3 | from browsergym.core.registration import register_task
 4 | 
 5 | from . import config, task
 6 | 
 7 | # download necessary tokenizer resources
 8 | # note: deprecated punkt -> punkt_tab https://github.com/nltk/nltk/issues/3293
 9 | try:
10 |     nltk.data.find("tokenizers/punkt_tab")
11 | except:
12 |     nltk.download("punkt_tab", quiet=True, raise_on_error=True)
13 | 
14 | ALL_VISUALWEBARENA_TASK_IDS = []
15 | VISUALWEBARENA_TASK_IDS_WITH_RESET = []
16 | VISUALWEBARENA_TASK_IDS_WITHOUT_RESET = []
17 | 
18 | # register all VisualWebArena tasks
19 | for task_id in config.TASK_IDS:
20 |     gym_id = f"visualwebarena.{task_id}"
21 |     register_task(
22 |         gym_id,
23 |         task.GenericVisualWebArenaTask,
24 |         task_kwargs={"task_id": task_id},
25 |     )
26 |     ALL_VISUALWEBARENA_TASK_IDS.append(gym_id)
27 |     if task_id in config.TASK_IDS_WITH_RESET:
28 |         VISUALWEBARENA_TASK_IDS_WITH_RESET.append(gym_id)
29 |     else:
30 |         VISUALWEBARENA_TASK_IDS_WITHOUT_RESET.append(gym_id)
31 | 


--------------------------------------------------------------------------------
/docs/src/environments/miniwob.rst:
--------------------------------------------------------------------------------
 1 | MiniWoB++
 2 | ^^^^^^^^^
 3 | 
 4 | `BrowserGym` integrates `MiniWoB++` enviroment. For more information about this enviroment, please refer to the `MiniWoB+ <https://miniwob.farama.org/>`_ official documentation.
 5 | 
 6 | 
 7 | BrowserGym API 
 8 | """"""""""""""
 9 | 
10 | .. currentmodule:: browsergym
11 | 
12 | .. autosummary::
13 |    :recursive:
14 |    :toctree: generated
15 |    :caption: MiniWoB++
16 | 
17 |    miniwob
18 | 
19 | 
20 | Usage 
21 | """""
22 | 
23 | Before running the sample code, install `MiniWoB++` by following the steps in the `docs <https://github.com/ServiceNow/BrowserGym/blob/main/miniwob/README.md>`_.
24 | 
25 | .. code-block:: python
26 | 
27 |     import gym
28 |     import browsergym.minwob
29 | 
30 |     env = gym.make('browsergym/miniwob.book-flight')
31 |     obs, info = env.reset()
32 |     done = False
33 | 
34 |     while not done:
35 |         action = "noop()"
36 |         obs, reward, terminated, truncated, info = env.step(action)
37 |         print(f"Reward: {reward}, Done: {done}, Info: {info}")
38 |     
39 |     env.close()
40 | 
41 | 


--------------------------------------------------------------------------------
/browsergym/webarenalite/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["hatchling", "hatch-requirements-txt"]
 3 | build-backend = "hatchling.build"
 4 | 
 5 | [project]
 6 | name = "browsergym-webarenalite"
 7 | description = "WebArena Lite benchmark for BrowserGym"
 8 | authors = [
 9 |     {name = "Aman Jaiswal"},
10 |     {name = "Leo Biosvert"},
11 | ]
12 | requires-python = ">3.7"
13 | license = {text = "Apache-2.0"}
14 | classifiers = [
15 |     "Development Status :: 3 - Alpha",
16 |     "Programming Language :: Python :: 3",
17 |     "Operating System :: OS Independent",
18 |     "Intended Audience :: Science/Research",
19 |     "Topic :: Scientific/Engineering :: Artificial Intelligence",
20 |     "License :: OSI Approved :: Apache Software License",
21 | ]
22 | dynamic = ["dependencies", "version"]
23 | 
24 | [project.urls]
25 | homepage = "https://github.com/ServiceNow/BrowserGym"
26 | 
27 | [tool.hatch.version]
28 | path = "../core/src/browsergym/core/__init__.py"
29 | 
30 | [tool.hatch.metadata.hooks.requirements_txt]
31 | files = ["requirements.txt"]
32 | 
33 | [tool.hatch.build.targets.wheel]
34 | packages = ["src/browsergym"]
35 | 


--------------------------------------------------------------------------------
/browsergym/webarena/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["hatchling", "hatch-requirements-txt"]
 3 | build-backend = "hatchling.build"
 4 | 
 5 | [project]
 6 | name = "browsergym-webarena"
 7 | description = "WebArena benchmark for BrowserGym"
 8 | authors = [
 9 |     {name = "Maxime Gasse"},
10 |     {name = "Tom Marty"},
11 | ]
12 | readme = "README.md"
13 | requires-python = ">3.7"
14 | license = {text = "Apache-2.0"}
15 | classifiers = [
16 |     "Development Status :: 3 - Alpha",
17 |     "Programming Language :: Python :: 3",
18 |     "Operating System :: OS Independent",
19 |     "Intended Audience :: Science/Research",
20 |     "Topic :: Scientific/Engineering :: Artificial Intelligence",
21 |     "License :: OSI Approved :: Apache Software License",
22 | ]
23 | dynamic = ["dependencies", "version"]
24 | 
25 | [project.urls]
26 | homepage = "https://github.com/ServiceNow/BrowserGym"
27 | 
28 | [tool.hatch.version]
29 | path = "../core/src/browsergym/core/__init__.py"
30 | 
31 | [tool.hatch.metadata.hooks.requirements_txt]
32 | files = ["requirements.txt"]
33 | 
34 | [tool.hatch.build.targets.wheel]
35 | packages = ["src/browsergym"]
36 | 


--------------------------------------------------------------------------------
/docs/src/usage.rst:
--------------------------------------------------------------------------------
 1 | Usage
 2 | =====
 3 | 
 4 | .. _installation:
 5 | 
 6 | Installation
 7 | ------------
 8 | 
 9 | To use BrowserGym, first install it using pip:
10 | 
11 | .. code-block:: console
12 | 
13 |    pip install browsergym
14 | 
15 | Then, a required step is to setup playwright by running
16 | 
17 | .. code-block:: console
18 | 
19 |    playwright install chromium
20 | 
21 | Example code
22 | ------------
23 | 
24 | Boilerplate code to run an agent on an interactive, open-ended task:
25 | 
26 | .. code-block:: python
27 | 
28 |    import gymnasium as gym
29 |    import browsergym.core  # register the openended task as a gym environment
30 | 
31 |    env = gym.make(
32 |        "browsergym/openended",
33 |        task_kwargs={"start_url": "https://www.google.com/"},  # starting URL
34 |        wait_for_user_message=True,  # wait for a user message after each agent message sent to the chat
35 |    )
36 | 
37 |    obs, info = env.reset()
38 |    done = False
39 |    while not done:
40 |        action = ...  # implement your agent here
41 |        obs, reward, terminated, truncated, info = env.step(action)
42 |        done = terminated or truncated
43 | 


--------------------------------------------------------------------------------
/tests/webarena/test_env_general.py:
--------------------------------------------------------------------------------
 1 | import gymnasium as gym
 2 | import logging
 3 | import os
 4 | import playwright.sync_api
 5 | import pytest
 6 | import random
 7 | 
 8 | from tenacity import retry, stop_after_attempt, retry_if_exception_type
 9 | 
10 | # register gym environments
11 | import browsergym.webarena
12 | 
13 | 
14 | __SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
15 | __HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
16 | 
17 | 
18 | from browsergym.webarena import ALL_WEBARENA_TASK_IDS
19 | 
20 | rng = random.Random(1)
21 | task_ids = rng.sample(ALL_WEBARENA_TASK_IDS, 25)
22 | 
23 | 
24 | @retry(
25 |     stop=stop_after_attempt(5),
26 |     retry=retry_if_exception_type(playwright.sync_api.TimeoutError),
27 |     reraise=True,
28 |     before_sleep=lambda _: logging.info("Retrying due to a TimeoutError..."),
29 | )
30 | @pytest.mark.parametrize("task_id", task_ids)
31 | @pytest.mark.slow
32 | def test_env_generic(task_id):
33 |     env = gym.make(
34 |         f"browsergym/{task_id}",
35 |         headless=__HEADLESS,
36 |         slow_mo=__SLOW_MO,
37 |     )
38 |     obs, info = env.reset()
39 | 
40 |     env.close()
41 | 


--------------------------------------------------------------------------------
/browsergym/assistantbench/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["hatchling", "hatch-requirements-txt"]
 3 | build-backend = "hatchling.build"
 4 | 
 5 | [project]
 6 | name = "browsergym-assistantbench"
 7 | description = "AssistantBench benchmark for BrowserGym"
 8 | authors = [
 9 |     {name = "Ori Yoran"},
10 |     {name = "Maxime Gasse"},
11 | ]
12 | readme = "README.md"
13 | requires-python = ">3.7"
14 | license = {text = "Apache-2.0"}
15 | classifiers = [
16 |     "Development Status :: 3 - Alpha",
17 |     "Programming Language :: Python :: 3",
18 |     "Operating System :: OS Independent",
19 |     "Intended Audience :: Science/Research",
20 |     "Topic :: Scientific/Engineering :: Artificial Intelligence",
21 |     "License :: OSI Approved :: Apache Software License",
22 | ]
23 | dynamic = ["dependencies", "version"]
24 | 
25 | [project.urls]
26 | homepage = "https://github.com/ServiceNow/BrowserGym"
27 | 
28 | [tool.hatch.version]
29 | path = "../core/src/browsergym/core/__init__.py"
30 | 
31 | [tool.hatch.metadata.hooks.requirements_txt]
32 | files = ["requirements.txt"]
33 | 
34 | [tool.hatch.build.targets.wheel]
35 | packages = ["src/browsergym"]
36 | 


--------------------------------------------------------------------------------
/browsergym/visualwebarena/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["hatchling", "hatch-requirements-txt"]
 3 | build-backend = "hatchling.build"
 4 | 
 5 | [project]
 6 | name = "browsergym-visualwebarena"
 7 | description = "VisualWebArena benchmark for BrowserGym"
 8 | authors = [
 9 |     {name = "Lawrence Jang"},
10 |     {name = "Maxime Gasse"},
11 | ]
12 | readme = "README.md"
13 | requires-python = ">3.7"
14 | license = {text = "Apache-2.0"}
15 | classifiers = [
16 |     "Development Status :: 3 - Alpha",
17 |     "Programming Language :: Python :: 3",
18 |     "Operating System :: OS Independent",
19 |     "Intended Audience :: Science/Research",
20 |     "Topic :: Scientific/Engineering :: Artificial Intelligence",
21 |     "License :: OSI Approved :: Apache Software License",
22 | ]
23 | dynamic = ["dependencies", "version"]
24 | 
25 | [project.urls]
26 | homepage = "https://github.com/ServiceNow/BrowserGym"
27 | 
28 | [tool.hatch.version]
29 | path = "../core/src/browsergym/core/__init__.py"
30 | 
31 | [tool.hatch.metadata.hooks.requirements_txt]
32 | files = ["requirements.txt"]
33 | 
34 | [tool.hatch.build.targets.wheel]
35 | packages = ["src/browsergym"]
36 | 


--------------------------------------------------------------------------------
/browsergym/miniwob/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["hatchling", "hatch-requirements-txt"]
 3 | build-backend = "hatchling.build"
 4 | 
 5 | [project]
 6 | name = "browsergym-miniwob"
 7 | description = "MiniWoB++ benchmark for BrowserGym"
 8 | authors = [
 9 |     {name = "Rim Assouel"},
10 |     {name = "Maxime Gasse"},
11 |     {name = "Tom Marty"},
12 |     {name = "Alexandre Lacoste"},
13 | ]
14 | readme = "README.md"
15 | requires-python = ">3.7"
16 | license = {text = "Apache-2.0"}
17 | classifiers = [
18 |     "Development Status :: 3 - Alpha",
19 |     "Programming Language :: Python :: 3",
20 |     "Operating System :: OS Independent",
21 |     "Intended Audience :: Science/Research",
22 |     "Topic :: Scientific/Engineering :: Artificial Intelligence",
23 |     "License :: OSI Approved :: Apache Software License",
24 | ]
25 | dynamic = ["dependencies", "version"]
26 | 
27 | [project.urls]
28 | homepage = "https://github.com/ServiceNow/BrowserGym"
29 | 
30 | [tool.hatch.version]
31 | path = "../core/src/browsergym/core/__init__.py"
32 | 
33 | [tool.hatch.metadata.hooks.requirements_txt]
34 | files = ["requirements.txt"]
35 | 
36 | [tool.hatch.build.targets.wheel]
37 | packages = ["src/browsergym"]
38 | 


--------------------------------------------------------------------------------
/tests/miniwob/test_use-colorwheel-2.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import gymnasium as gym
 3 | import re
 4 | import pytest
 5 | 
 6 | # register gym environments
 7 | import browsergym.miniwob
 8 | 
 9 | __SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
10 | __HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
11 | 
12 | 
13 | @pytest.mark.parametrize("seed", range(5))
14 | def test_cheat(seed):
15 |     env = gym.make(
16 |         "browsergym/miniwob.use-colorwheel-2",
17 |         headless=__HEADLESS,
18 |         slow_mo=__SLOW_MO,
19 |         action_mapping=None,
20 |     )
21 |     obs, info = env.reset(seed=42)
22 | 
23 |     assert obs["last_action_error"] == ""
24 | 
25 |     match = re.match(
26 |         "Select the following color #(.+) with the color picker and hit Submit.", obs["goal"]
27 |     )
28 | 
29 |     assert match
30 | 
31 |     color = match.groups()[0].upper()
32 | 
33 |     obs, reward, term, trunc, info = env.step(
34 |         f"""\
35 | page.locator("#col").fill("{color}")
36 | page.get_by_role("button", name="Submit").click()
37 | """
38 |     )
39 | 
40 |     assert obs["last_action_error"] == ""
41 |     assert reward == 1
42 |     assert term == True
43 | 
44 |     env.close()
45 | 


--------------------------------------------------------------------------------
/tests/miniwob/test_click-scroll-list.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import gymnasium as gym
 3 | import re
 4 | import pytest
 5 | 
 6 | # register gym environments
 7 | import browsergym.miniwob
 8 | 
 9 | __SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
10 | __HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
11 | 
12 | 
13 | @pytest.mark.parametrize("seed", range(5))
14 | def test_cheat(seed):
15 |     env = gym.make(
16 |         "browsergym/miniwob.click-scroll-list",
17 |         headless=__HEADLESS,
18 |         slow_mo=__SLOW_MO,
19 |         action_mapping=None,
20 |     )
21 |     obs, info = env.reset(seed=seed)
22 | 
23 |     assert obs["last_action_error"] == ""
24 | 
25 |     match = re.match("Select (.+) from the scroll list and click Submit.", obs["goal"])
26 | 
27 |     assert match
28 | 
29 |     options = match.groups()[0].split(", ")
30 |     options = '", "'.join(options)
31 |     action = f"""\
32 | page.locator("#options").select_option(["{options}"])
33 | page.get_by_role("button", name="Submit").click()
34 | """
35 | 
36 |     obs, reward, term, trunc, info = env.step(action)
37 | 
38 |     assert obs["last_action_error"] == ""
39 |     assert reward == 1
40 |     assert term == True
41 | 
42 |     env.close()
43 | 


--------------------------------------------------------------------------------
/tests/core/data/basic_iframe_site/outer-iframe.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <title>Shadow DOM Example</title>
 5 |     <style>
 6 |         body {
 7 |             height: 2500px; /* Set the height of the body to 500 pixels */
 8 |             width: 5000px;
 9 |             overflow: auto; /* Enable scrolling for the body */
10 |             display: flex; /* Use flexbox to center the iframe */
11 |             justify-content: flex-end; /* Move the iframe to the right */
12 |             align-items: center; /* Center the iframe vertically */
13 |         }
14 |         .iframe-container {
15 |             width: 50%; /* Set the width of the container to 100% */
16 |             overflow-x: auto; /* Enable horizontal scrolling for the container */
17 |         }
18 |         iframe {
19 |             position: relative; /* Set the position of the iframe to relative */
20 |         }
21 |     </style>
22 | </head>
23 | <body>
24 |     <div id="my-host"></div>
25 |     <div style="height: 2000px;"></div> <!-- Add a tall element to create space for scrolling -->
26 |     <div class="iframe-container"> <!-- Wrap the iframe in a container element -->
27 |         <iframe src="inner-iframe.html"></iframe>
28 |     </div>
29 | </body>
30 | </html>
31 | 


--------------------------------------------------------------------------------
/tests/visualwebarena/test_vwa_tasks_with_reset.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import random
 4 | 
 5 | import gymnasium as gym
 6 | import playwright.sync_api
 7 | import pytest
 8 | from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed
 9 | 
10 | # register gym environments
11 | import browsergym.visualwebarena
12 | 
13 | __SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
14 | __HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
15 | 
16 | 
17 | from browsergym.visualwebarena import VISUALWEBARENA_TASK_IDS_WITH_RESET
18 | 
19 | rng = random.Random(1)
20 | task_ids = rng.sample(VISUALWEBARENA_TASK_IDS_WITH_RESET, 10)
21 | 
22 | 
23 | @retry(
24 |     stop=stop_after_attempt(5),
25 |     retry=retry_if_exception_type(playwright.sync_api.TimeoutError),
26 |     wait=wait_fixed(2),
27 |     reraise=True,
28 |     before_sleep=lambda _: logging.info("Retrying due to a TimeoutError..."),
29 | )
30 | @pytest.mark.parametrize("task_id", task_ids)
31 | @pytest.mark.slow
32 | @pytest.mark.serial
33 | def test_env_generic(task_id):
34 |     env = gym.make(
35 |         f"browsergym/{task_id}",
36 |         headless=__HEADLESS,
37 |         slow_mo=__SLOW_MO,
38 |     )
39 |     obs, info = env.reset()
40 |     env.close()
41 | 


--------------------------------------------------------------------------------
/browsergym/visualwebarena/src/browsergym/visualwebarena/config.py:
--------------------------------------------------------------------------------
 1 | TASK_IDS = range(910)
 2 | 
 3 | # import visualwebarena
 4 | # import importlib.resources
 5 | # import json
 6 | # all_configs_str = importlib.resources.files(visualwebarena).joinpath("test_raw.json").read_text()
 7 | # all_configs = json.loads(all_configs_str)
 8 | # task_ids_with_reset = [task["task_id"] for task in all_configs if task["require_reset"] == True]
 9 | TASK_IDS_WITH_RESET = [
10 |     4,
11 |     5,
12 |     8,
13 |     9,
14 |     28,
15 |     29,
16 |     30,
17 |     31,
18 |     57,
19 |     76,
20 |     77,
21 |     143,
22 |     144,
23 |     145,
24 |     159,
25 |     160,
26 |     203,
27 |     205,
28 |     208,
29 |     213,
30 |     217,
31 |     223,
32 |     392,
33 |     393,
34 |     394,
35 |     402,
36 |     404,
37 |     405,
38 |     406,
39 |     407,
40 |     408,
41 |     410,
42 |     411,
43 |     412,
44 |     416,
45 |     422,
46 |     423,
47 |     424,
48 |     425,
49 |     426,
50 |     441,
51 |     442,
52 |     443,
53 |     668,
54 |     669,
55 |     670,
56 |     671,
57 |     672,
58 |     673,
59 |     688,
60 |     689,
61 |     711,
62 |     712,
63 |     713,
64 |     714,
65 |     715,
66 |     716,
67 |     717,
68 |     733,
69 |     764,
70 |     765,
71 |     766,
72 | ]
73 | 


--------------------------------------------------------------------------------
/tests/core/data/basic_iframe_site/basic_iframe.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <title>Iframe Example</title>
 5 | </head>
 6 | <body>
 7 |     <input type='text' name='level1-textfield1' placeholder='Level 1 Text Field 1'>
 8 |     <input type='text' name='level1-textfield2' placeholder='Level 1 Text Field 2'>
 9 |     <iframe srcdoc="
10 |         <!DOCTYPE html>
11 |         <html>
12 |         <head>
13 |             <title>Iframe Level 2.1</title>
14 |         </head>
15 |         <body>
16 |             <h2>Iframe Level 2.1</h2>
17 |             <input type='text' name='level2_1-textfield1' placeholder='Level 2.1 Text Field 1'>
18 |             <input type='text' name='level2_1-textfield2' placeholder='Level 2.1 Text Field 2'>
19 |         </body>
20 |         </html>
21 |     "></iframe>
22 |     <iframe srcdoc="
23 |         <!DOCTYPE html>
24 |         <html>
25 |         <head>
26 |             <title>Iframe Level 2.2</title>
27 |         </head>
28 |         <body>
29 |             <h2>Iframe Level 2.2</h2>
30 |             <input type='text' name='level2_2-textfield1' placeholder='Level 2.2 Text Field 1'>
31 |             <input type='text' name='level2_2-textfield2' placeholder='Level 2.2 Text Field 2'>
32 |         </body>
33 |         </html>
34 |     "></iframe>
35 | 
36 | </body>
37 | </html>
38 | 


--------------------------------------------------------------------------------
/tests/core/data/basic_shadow_iframe_site/basic_iframe.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <title>Iframe Example</title>
 5 | </head>
 6 | <body>
 7 |     <input type='text' name='level1-textfield1' placeholder='Level 1 Text Field 1'>
 8 |     <input type='text' name='level1-textfield2' placeholder='Level 1 Text Field 2'>
 9 |     <iframe srcdoc="
10 |         <!DOCTYPE html>
11 |         <html>
12 |         <head>
13 |             <title>Iframe Level 2.1</title>
14 |         </head>
15 |         <body>
16 |             <h2>Iframe Level 2.1</h2>
17 |             <input type='text' name='level2_1-textfield1' placeholder='Level 2.1 Text Field 1'>
18 |             <input type='text' name='level2_1-textfield2' placeholder='Level 2.1 Text Field 2'>
19 |         </body>
20 |         </html>
21 |     "></iframe>
22 |     <iframe srcdoc="
23 |         <!DOCTYPE html>
24 |         <html>
25 |         <head>
26 |             <title>Iframe Level 2.2</title>
27 |         </head>
28 |         <body>
29 |             <h2>Iframe Level 2.2</h2>
30 |             <input type='text' name='level2_2-textfield1' placeholder='Level 2.2 Text Field 1'>
31 |             <input type='text' name='level2_2-textfield2' placeholder='Level 2.2 Text Field 2'>
32 |         </body>
33 |         </html>
34 |     "></iframe>
35 | 
36 | </body>
37 | </html>
38 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | fail_fast: false
 2 | 
 3 | default_language_version:
 4 |   python: python3
 5 | 
 6 | repos:
 7 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 8 |     rev: v4.2.0
 9 |     hooks:
10 |       - id: trailing-whitespace
11 |         exclude: ^(.*)\.md$
12 |       - id: end-of-file-fixer
13 |       - id: check-yaml
14 |         exclude: ^(.circleci/recipe|recipe)  # conda build recipes are templated
15 |       - id: check-added-large-files
16 |   - repo: https://github.com/pocc/pre-commit-hooks
17 |     rev: v1.1.1
18 |     hooks:
19 |       - id: clang-format
20 |         args: [--style=file, -i]
21 |       - id: clang-tidy
22 |         args: [--fix, --fix-errors]
23 |   - repo: https://github.com/psf/black
24 |     rev: 24.2.0
25 |     hooks:
26 |       - id: black
27 |         args: [--config=./pyproject.toml]
28 |   - repo: https://github.com/asottile/blacken-docs
29 |     rev: v1.12.1
30 |     hooks:
31 |     - id: blacken-docs
32 |       args: [ '--line-length', '100' ]
33 |       additional_dependencies: [black]
34 |   - repo: https://github.com/Lucas-C/pre-commit-hooks
35 |     rev: v1.5.5
36 |     hooks:
37 |     - id: forbid-crlf
38 |     - id: remove-crlf
39 |     # Black does not clear tabs in docstrings
40 |     - id: forbid-tabs
41 |       files: '.*\.py$'
42 |     - id: remove-tabs
43 |       files: '.*\.py$'
44 |       args: [ '--whitespaces-count', '4' ]


--------------------------------------------------------------------------------
/browsergym/core/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["hatchling", "hatch-requirements-txt"]
 3 | build-backend = "hatchling.build"
 4 | 
 5 | [project]
 6 | name = "browsergym-core"
 7 | description = "BrowserGym: a gym environment for web task automation in the Chromium browser"
 8 | authors = [
 9 |     {name = "Rim Assouel"},
10 |     {name = "Léo Boisvert"},
11 |     {name = "Massimo Caccia"},
12 |     {name = "Alex Drouin"},
13 |     {name = "Maxime Gasse"},
14 |     {name = "Imene Kerboua"},
15 |     {name = "Alex Lacoste"},
16 |     {name = "Thibault Le Sellier De Chezelles"},
17 |     {name = "Tom Marty"},
18 | ]
19 | readme = "README.md"
20 | requires-python = ">3.9"
21 | license = {text = "Apache-2.0"}
22 | classifiers = [
23 |     "Development Status :: 3 - Alpha",
24 |     "Programming Language :: Python :: 3",
25 |     "Operating System :: OS Independent",
26 |     "Intended Audience :: Science/Research",
27 |     "Topic :: Scientific/Engineering :: Artificial Intelligence",
28 |     "License :: OSI Approved :: Apache Software License",
29 | ]
30 | dynamic = ["dependencies", "version"]
31 | 
32 | [project.urls]
33 | homepage = "https://github.com/ServiceNow/BrowserGym"
34 | 
35 | [tool.hatch.version]
36 | path = "src/browsergym/core/__init__.py"
37 | 
38 | [tool.hatch.metadata.hooks.requirements_txt]
39 | files = ["requirements.txt"]
40 | 
41 | [tool.hatch.build.targets.wheel]
42 | packages = ["src/browsergym"]
43 | 


--------------------------------------------------------------------------------
/browsergym/miniwob/README.md:
--------------------------------------------------------------------------------
 1 | # Miniwob benchmark for BrowserGym
 2 | 
 3 | This package provides `browsergym.miniwob`, which is an unofficial port of the [MiniWoB++](https://miniwob.farama.org/) benchmark for BrowserGym.
 4 | 
 5 | ## Setup
 6 | 
 7 | ### Option 1: Automated setup (Recommended)
 8 | 
 9 | If you're working from the BrowserGym root directory, you can use the Makefile for automated setup:
10 | 
11 | ```sh
12 | make setup-miniwob
13 | ```
14 | 
15 | This will:
16 | 
17 | - Clone the MiniWoB++ repository
18 | - Reset to the specific commit for reproducibility  
19 | - Add the `MINIWOB_URL` to your `.env` file
20 | 
21 | Then load the environment variables:
22 | 
23 | ```sh
24 | source .env
25 | ```
26 | 
27 | ### Option 2: Manual setup
28 | 
29 | 1. Install the package
30 | 
31 | ```sh
32 | pip install browsergym-miniwob
33 | ```
34 | 
35 | 1. Clone miniwob (use a specific frozen commit for reproducibility)
36 | 
37 | ```sh
38 | git clone git@github.com:Farama-Foundation/miniwob-plusplus.git
39 | git -C "./miniwob-plusplus" reset --hard 7fd85d71a4b60325c6585396ec4f48377d049838
40 | ```
41 | 
42 | 1. Setup Miniwob URL (change `PATH_TO_MINIWOB_CLONED_REPO` here to the absolute path to your `miniwob-plusplus` folder)
43 | 
44 | ```sh
45 | export MINIWOB_URL="file://<PATH_TO_MINIWOB_CLONED_REPO>/miniwob/html/miniwob/"
46 | ```
47 | 
48 | Alternatively, one can [setup a simple HTTP server](https://miniwob.farama.org/content/viewing/) and use a proper URL.
49 | 


--------------------------------------------------------------------------------
/tests/core/data/basic_shadow_iframe_site/outer-iframe.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <title>Shadow DOM Example</title>
 5 | </head>
 6 | <body>
 7 |     <div id="my-host"></div>
 8 | 
 9 |     <script>
10 |         // Get the host element
11 |         const host = document.getElementById("my-host");
12 | 
13 |         // Attach a shadow DOM to the host element
14 |         const shadowRoot = host.attachShadow({ mode: "open" });
15 | 
16 |         // Create a new HTML document inside the shadow DOM
17 |         const doc = document.implementation.createHTMLDocument();
18 | 
19 |         // Set the title of the document
20 |         const title = doc.createElement("title");
21 |         title.textContent = "Outer Iframe";
22 |         doc.head.appendChild(title);
23 | 
24 |         // Add the HTML code to the body of the document
25 |         const body = doc.createElement("body");
26 |         const input = document.createElement("input");
27 |         input.type = "text";
28 |         input.name = "outer-textfield";
29 |         input.placeholder = "Enter text here";
30 |         body.appendChild(input);
31 |         const iframe = document.createElement("iframe");
32 |         iframe.src = "inner-iframe.html";
33 |         body.appendChild(iframe);
34 |         doc.body = body;
35 | 
36 |         // Append the document to the shadow DOM
37 |         shadowRoot.appendChild(doc.documentElement);
38 |     </script>
39 | </body>
40 | </html>
41 | 


--------------------------------------------------------------------------------
/tests/webarena/test_infeasible.py:
--------------------------------------------------------------------------------
 1 | import gymnasium as gym
 2 | import logging
 3 | import os
 4 | import playwright.sync_api
 5 | import pytest
 6 | 
 7 | from tenacity import retry, stop_after_attempt, retry_if_exception_type
 8 | 
 9 | # register gym environments
10 | import browsergym.webarena
11 | 
12 | 
13 | __SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
14 | __HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
15 | 
16 | INFEAS_TASK_IDS = [101, 115, 166]
17 | FEAS_TASK_IDS = [165, 187, 199]
18 | 
19 | 
20 | @retry(
21 |     stop=stop_after_attempt(5),
22 |     retry=retry_if_exception_type(playwright.sync_api.TimeoutError),
23 |     reraise=True,
24 |     before_sleep=lambda _: logging.info("Retrying due to a TimeoutError..."),
25 | )
26 | @pytest.mark.parametrize(
27 |     "task_id,infeasible",
28 |     [(task_id, True) for task_id in INFEAS_TASK_IDS]
29 |     + [(task_id, False) for task_id in FEAS_TASK_IDS],
30 | )
31 | @pytest.mark.slow
32 | def test_infeasible(task_id, infeasible):
33 |     env = gym.make(
34 |         f"browsergym/webarena.{task_id}",
35 |         headless=__HEADLESS,
36 |         slow_mo=__SLOW_MO,
37 |     )
38 |     obs, info = env.reset()
39 | 
40 |     action = 'report_infeasible("Unachievable task.")'
41 | 
42 |     obs, reward, term, trunc, info = env.step(action)
43 | 
44 |     if infeasible:
45 |         assert term == True and reward == 1.0
46 | 
47 |     else:
48 |         assert term == True and reward == 0.0
49 | 
50 |     env.close()
51 | 


--------------------------------------------------------------------------------
/tests/core/data/test_page_2.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | 
 4 | <head>
 5 |     <title>Simple Form</title>
 6 | </head>
 7 | 
 8 | <body>
 9 |     <h1>Simple Form</h1>
10 | 
11 |     <form>
12 |         <label for="name">Name:</label>
13 |         <input type="text" id="name" name="name"><br><br>
14 | 
15 |         <label for="email">Email:</label>
16 |         <input type="email" id="email" name="email"><br><br>
17 | 
18 |         <label for="age">Age:</label>
19 |         <input type="number" id="age" name="age"><br><br>
20 | 
21 |         <label for="message">Message:</label><br>
22 |         <textarea id="message" name="message" rows="4" cols="50"></textarea><br><br>
23 | 
24 |         <input type="checkbox" id="subscribe" name="subscribe">
25 |         <label for="subscribe">Subscribe to newsletter</label><br><br>
26 | 
27 |         <input type="submit" value="Submit">
28 |         <input type="reset" value="Reset">
29 |     </form>
30 | 
31 |     <nonhtmltag>
32 |         Text within a non-html tag
33 |     </nonhtmltag>
34 |     <br />
35 |     <br />
36 |     <br />
37 |     <br />
38 |     <br />
39 |     <br />
40 |     <br />
41 |     <br />
42 |     <br />
43 |     <br />
44 |     <br />
45 |     <br />
46 |     <br />
47 |     <br />
48 |     <br />
49 |     <br />
50 |     <br />
51 |     <br />
52 |     <br />
53 |     <br />
54 |     <br />
55 |     <br />
56 |     <br />
57 |     <br />
58 |     <br />
59 |     <br />
60 |     <p>Text that should not be visible</p>
61 | </body>
62 | 
63 | </html>
64 | 


--------------------------------------------------------------------------------
/browsergym/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | build-backend = "setuptools.build_meta"
 3 | requires = ["setuptools"]
 4 | 
 5 | [project]
 6 | name = "browsergym"
 7 | description = "BrowserGym: a gym environment for web task automation in the Chromium browser"
 8 | authors = [
 9 |     {name = "Rim Assouel"},
10 |     {name = "Léo Boisvert"},
11 |     {name = "Massimo Caccia"},
12 |     {name = "Alex Drouin"},
13 |     {name = "Maxime Gasse"},
14 |     {name = "Imene Kerboua"},
15 |     {name = "Alex Lacoste"},
16 |     {name = "Thibault Le Sellier De Chezelles"},
17 |     {name = "Tom Marty"},
18 |     {name = "Aman Jaiswal"},
19 | ]
20 | readme = "README.md"
21 | requires-python = ">3.10"
22 | license = {text = "Apache-2.0"}
23 | classifiers = [
24 |     "Development Status :: 3 - Alpha",
25 |     "Programming Language :: Python :: 3",
26 |     "Operating System :: OS Independent",
27 |     "Intended Audience :: Science/Research",
28 |     "Topic :: Scientific/Engineering :: Artificial Intelligence",
29 |     "License :: OSI Approved :: Apache Software License",
30 | ]
31 | version="0.14.3.dev1"
32 | dependencies = [
33 |     "browsergym-core==0.14.3.dev1",
34 |     "browsergym-miniwob==0.14.3.dev1",
35 |     "browsergym-webarena==0.14.3.dev1",
36 |     "browsergym-visualwebarena==0.14.3.dev1",
37 |     "browsergym-assistantbench==0.14.3.dev1",
38 |     "browsergym-experiments==0.14.3.dev1",
39 |     "browsergym-workarena>=0.4.1",
40 |     "weblinx-browsergym>=0.0.2",
41 |     "browsergym-webarenalite==0.14.3.dev1"
42 | ]
43 | 
44 | [tool.setuptools]
45 | packages = []  # meta distribution, packages are included as dependencies
46 | 


--------------------------------------------------------------------------------
/docs/src/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | 
 3 | # -- Project information
 4 | 
 5 | project = "BrowserGym"
 6 | copyright = "2024, ServiceNow Research"
 7 | author = "ServiceNow Research"
 8 | 
 9 | version = "0.14.3.dev1"
10 | release = version
11 | 
12 | # -- General configuration
13 | 
14 | extensions = [
15 |     "sphinx.ext.duration",
16 |     "sphinx.ext.doctest",
17 |     "sphinx.ext.autodoc",
18 |     "sphinx.ext.autosummary",
19 |     "sphinx.ext.intersphinx",
20 |     "sphinx_design",
21 | ]
22 | 
23 | intersphinx_mapping = {
24 |     "python": ("https://docs.python.org/3/", None),
25 |     "sphinx": ("https://www.sphinx-doc.org/en/master/", None),
26 | }
27 | intersphinx_disabled_domains = ["std"]
28 | 
29 | templates_path = ["_templates"]
30 | fixed_sidebar = True
31 | 
32 | # -- Options for HTML output
33 | 
34 | # Automatically extract typehints when specified and place them in
35 | # descriptions of the relevant function/method.
36 | # autodoc_typehints = "description"
37 | 
38 | # Don't show class signature with the class' name.
39 | # autodoc_class_signature = "separated"
40 | 
41 | html_theme = "pydata_sphinx_theme"
42 | 
43 | html_theme_options = {
44 |     "show_nav_level": 2,
45 |     "navigation_depth": 2,
46 |     "show_toc_level": 2,
47 |     "icon_links": [
48 |         {
49 |             "name": "GitHub",
50 |             "url": "https://github.com/ServiceNow/BrowserGym",
51 |             "icon": "fa-brands fa-square-github",
52 |             "type": "fontawesome",
53 |         }
54 |     ],
55 | }
56 | 
57 | # -- Options for EPUB output
58 | epub_show_urls = "footnote"
59 | 


--------------------------------------------------------------------------------
/tests/core/data/example.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 | 
 4 | <head>
 5 |     <title>Example Domain</title>
 6 | 
 7 |     <meta charset="utf-8" />
 8 |     <meta http-equiv="Content-type" content="text/html; charset=utf-8" />
 9 |     <meta name="viewport" content="width=device-width, initial-scale=1" />
10 |     <style type="text/css">
11 |         body {
12 |             background-color: #f0f0f2;
13 |             margin: 0;
14 |             padding: 0;
15 |             font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
16 | 
17 |         }
18 | 
19 |         div {
20 |             width: 600px;
21 |             margin: 5em auto;
22 |             padding: 2em;
23 |             background-color: #fdfdff;
24 |             border-radius: 0.5em;
25 |             box-shadow: 2px 3px 7px 2px rgba(0, 0, 0, 0.02);
26 |         }
27 | 
28 |         a:link,
29 |         a:visited {
30 |             color: #38488f;
31 |             text-decoration: none;
32 |         }
33 | 
34 |         @media (max-width: 700px) {
35 |             div {
36 |                 margin: 0 auto;
37 |                 width: auto;
38 |             }
39 |         }
40 |     </style>
41 | </head>
42 | 
43 | <body>
44 |     <div>
45 |         <h1>Example Domain</h1>
46 |         <p>This domain is for use in illustrative examples in documents. You may use this
47 |             domain in literature without prior coordination or asking for permission.</p>
48 |         <p><a href="https://www.iana.org/domains/example">More information...</a></p>
49 |     </div>
50 | </body>
51 | 
52 | </html>
53 | 


--------------------------------------------------------------------------------
/tests/assistantbench/test_env_general.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import random
 4 | 
 5 | import gymnasium as gym
 6 | import playwright.sync_api
 7 | import pytest
 8 | from tenacity import retry, retry_if_exception_type, stop_after_attempt
 9 | 
10 | # register gym environments
11 | import browsergym.assistantbench
12 | 
13 | __SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
14 | __HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
15 | 
16 | 
17 | from browsergym.assistantbench import TEST_AB_TASK_IDS, VALID_AB_TASK_IDS
18 | 
19 | rng = random.Random(1)
20 | valid_task_ids = rng.sample(VALID_AB_TASK_IDS, 10)
21 | test_task_ids = rng.sample(TEST_AB_TASK_IDS, 10)
22 | 
23 | 
24 | @retry(
25 |     stop=stop_after_attempt(5),
26 |     retry=retry_if_exception_type(playwright.sync_api.TimeoutError),
27 |     reraise=True,
28 |     before_sleep=lambda _: logging.info("Retrying due to a TimeoutError..."),
29 | )
30 | @pytest.mark.parametrize("task_id", valid_task_ids + test_task_ids)
31 | @pytest.mark.slow
32 | def test_valid_env(task_id):
33 |     env = gym.make(
34 |         f"browsergym/{task_id}",
35 |         headless=__HEADLESS,
36 |         slow_mo=__SLOW_MO,
37 |     )
38 |     obs, info = env.reset()
39 |     assert not obs["last_action_error"]
40 | 
41 |     obs, reward, terminated, truncated, info = env.step("noop(0)")
42 |     assert not obs["last_action_error"]
43 |     assert not (terminated or truncated)
44 | 
45 |     obs, reward, terminated, truncated, info = env.step('send_msg_to_user("something")')
46 |     assert not obs["last_action_error"]
47 |     assert terminated
48 | 
49 |     env.close()
50 | 


--------------------------------------------------------------------------------
/browsergym/assistantbench/src/browsergym/assistantbench/__init__.py:
--------------------------------------------------------------------------------
 1 | from browsergym.core.registration import register_task
 2 | 
 3 | from . import task
 4 | 
 5 | TOY_AB_TASK_IDS = []
 6 | VALID_AB_TASK_IDS = []
 7 | TEST_AB_TASK_IDS = []
 8 | 
 9 | 
10 | # register a toy easy task for testing implementation
11 | gym_id = f"assistantbench.imp.0"
12 | register_task(
13 |     gym_id,
14 |     task.AssistantBenchTask,
15 |     task_kwargs={
16 |         "task_id": f"imp.0",
17 |     },
18 |     default_task_kwargs={
19 |         "save_predictions": False,  # can be overriden
20 |     },
21 | )
22 | TOY_AB_TASK_IDS.append(gym_id)
23 | 
24 | # register the AssistantBench dev set
25 | for task_id in range(33):
26 |     gym_id = f"assistantbench.validation.{task_id}"
27 |     register_task(
28 |         gym_id,
29 |         task.AssistantBenchTask,
30 |         task_kwargs={
31 |             "task_id": f"validation.{task_id}",
32 |         },
33 |         default_task_kwargs={
34 |             "save_predictions": False,  # can be overriden
35 |         },
36 |     )
37 |     VALID_AB_TASK_IDS.append(gym_id)
38 | 
39 | # register the AssistantBench test set
40 | for task_id in range(181):
41 |     gym_id = f"assistantbench.test.{task_id}"
42 |     register_task(
43 |         gym_id,
44 |         task.AssistantBenchTask,
45 |         task_kwargs={
46 |             "task_id": f"test.{task_id}",
47 |         },
48 |         default_task_kwargs={
49 |             "save_predictions": True,  # can be overriden
50 |         },
51 |     )
52 |     TEST_AB_TASK_IDS.append(gym_id)
53 | 
54 | ALL_AB_TASK_IDS = TOY_AB_TASK_IDS + VALID_AB_TASK_IDS + TEST_AB_TASK_IDS
55 | 


--------------------------------------------------------------------------------
/browsergym/visualwebarena/src/browsergym/visualwebarena/utils.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import io
 3 | import PIL.Image
 4 | import requests
 5 | 
 6 | from typing import Literal
 7 | 
 8 | 
 9 | def image_url_to_pil_image(image_url: str) -> PIL.Image:
10 |     if not image_url.startswith("http"):
11 |         raise ValueError(f"Unexpected image URL: {image_url}")
12 |     response = requests.get(image_url, stream=True)
13 |     if response.status_code != 200:
14 |         raise ValueError(
15 |             f"Could not download image from url {image_url} (status code {response.status_code})"
16 |         )
17 |     img = PIL.Image.open(io.BytesIO(response.content))
18 |     return img
19 | 
20 | 
21 | def data_uri_to_pil_image(data_uri: str) -> PIL.Image:
22 |     if data_uri.startswith("data:image/png;base64,"):
23 |         image_data = base64.b64decode(data_uri.removeprefix("data:image/png;base64,"))
24 |     elif data_uri.startswith("data:image/jpeg;base64,"):
25 |         image_data = base64.b64decode(data_uri.removeprefix("data:image/jpeg;base64,"))
26 |     else:
27 |         raise ValueError(f"Unexpected image encoding: {data_uri}")
28 |     img = PIL.Image.open(io.BytesIO(image_data))
29 |     return img
30 | 
31 | 
32 | def pil_image_to_data_uri(image: PIL.Image, format: Literal["png", "jpeg"] = "png") -> str:
33 |     assert format in ("png", "jpeg")
34 |     with io.BytesIO() as image_buffer:
35 |         image.save(image_buffer, format=format.upper())
36 |         byte_data = image_buffer.getvalue()
37 |     image_b64 = base64.b64encode(byte_data).decode("utf-8")
38 |     image_b64 = f"data:image/{format};base64," + image_b64
39 |     return image_b64
40 | 


--------------------------------------------------------------------------------
/browsergym/experiments/src/browsergym/experiments/utils.py:
--------------------------------------------------------------------------------
 1 | import tiktoken
 2 | 
 3 | 
 4 | def count_tokens(text, model="gpt-4"):
 5 |     """Count the number of tokens in a text."""
 6 | 
 7 |     return len(tiktoken.encoding_for_model(model).encode(text))
 8 | 
 9 | 
10 | def count_messages_token(messages, model="gpt-4"):
11 |     """Count the number of tokens in a list of messages.
12 | 
13 |     Args:
14 |         messages (list): a list of messages, each message can be a string or a
15 |             list of dicts or an object with a content attribute.
16 |         model (str): the model to use for tokenization.
17 | 
18 |     Returns:
19 |         int: the number of tokens.
20 |     """
21 |     token_count = 0
22 |     for message in messages:
23 |         if hasattr(message, "content"):
24 |             message = message.content
25 |         elif isinstance(message, dict) and "content" in message:
26 |             message = message["content"]
27 | 
28 |         if isinstance(message, str):
29 |             token_count += count_tokens(message, model)
30 |         # handles messages with image content
31 |         elif isinstance(message, (list, tuple)):
32 |             for part in message:
33 |                 if not isinstance(part, dict):
34 |                     raise ValueError(
35 |                         f"The message is expected to be a list of dicts, but got list of {type(message)}"
36 |                     )
37 |                 if part["type"] == "text":
38 |                     token_count += count_tokens(part["text"], model)
39 |         else:
40 |             raise ValueError(
41 |                 f"The message is expected to be a string or a list of dicts, but got {type(message)}"
42 |             )
43 |     return token_count
44 | 


--------------------------------------------------------------------------------
/browsergym/core/src/browsergym/core/javascript/frame_unmark_elements.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Go through all DOM elements in the frame (including shadowDOMs),
 3 |  * and cleanup previously stored data in ARIA attributes.
 4 |  */
 5 | () => {
 6 |     // get all DOM elements in the current frame (does not include elements in shadowDOMs)
 7 |     let elements = Array.from(document.querySelectorAll('*'));
 8 |     let i = 0;
 9 |     while (i < elements.length) {
10 |         const elem = elements[i];
11 |         // add shadowDOM elements to the elements array, in such a way that order is preserved
12 |         // TODO: do we really need the order preserved?
13 |         if (elem.shadowRoot !== null) {
14 |             elements = new Array(
15 |                 ...Array.prototype.slice.call(elements, 0, i + 1),
16 |                 ...Array.from(elem.shadowRoot.querySelectorAll("*")),
17 |                 ...Array.prototype.slice.call(elements, i + 1)
18 |             );
19 |         }
20 |         i++;
21 |         // Hack: remove custom data stored in ARIA attributes
22 |         //  - elem_global_id: global browsergym identifier
23 |         pop_bid_from_attribute(elem, "aria-description");
24 |         pop_bid_from_attribute(elem, "aria-roledescription");  // fallback for generic nodes
25 |     }
26 | }
27 | 
28 | function pop_bid_from_attribute(elem, attr) {
29 |     let bid_regex = /^browsergym_id[^\s]*\s/;
30 |     if (elem.hasAttribute(attr)) {
31 |         let content = elem.getAttribute(attr);
32 |         let original_content = content.replace(bid_regex, '');
33 |         if (original_content) {
34 |             elem.setAttribute(attr, original_content);
35 |         }
36 |         else {
37 |             elem.removeAttribute(attr);
38 |         }
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/browsergym/experiments/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["hatchling", "hatch-requirements-txt"]
 3 | build-backend = "hatchling.build"
 4 | 
 5 | [project]
 6 | name = "browsergym-experiments"
 7 | description = "Experimentation tools for BrowserGym"
 8 | authors = [
 9 |     {name = "Massimo Caccia"},
10 |     {name = "Alex Lacoste"},
11 |     {name = "Thibault Le Sellier De Chezelles"},
12 |     {name = "Maxime Gasse"},
13 | ]
14 | readme = "README.md"
15 | requires-python = ">3.7"
16 | license = {text = "Apache-2.0"}
17 | classifiers = [
18 |     "Development Status :: 3 - Alpha",
19 |     "Programming Language :: Python :: 3",
20 |     "Operating System :: OS Independent",
21 |     "Intended Audience :: Science/Research",
22 |     "Topic :: Scientific/Engineering :: Artificial Intelligence",
23 |     "License :: OSI Approved :: Apache Software License",
24 | ]
25 | dynamic = ["dependencies", "version"]
26 | 
27 | [project.optional-dependencies]
28 | miniwob = [
29 |   "browsergym-miniwob",
30 | ]
31 | workarena = [
32 |   "browsergym-workarena",
33 | ]
34 | webarena = [
35 |   "browsergym-webarena",
36 | ]
37 | visualwebarena = [
38 |   "browsergym-visualwebarena",
39 | ]
40 | assistantbench = [
41 |   "browsergym-assistantbench",
42 | ]
43 | weblinx = [
44 |   "weblinx_browsergym",
45 | ]
46 | all = [
47 |   "browsergym-experiment[miniwob]",
48 |   "browsergym-experiment[workarena]",
49 |   "browsergym-experiment[webarena]",
50 |   "browsergym-experiment[visualwebarena]",
51 |   "browsergym-experiment[assistantbench]",
52 |   "browsergym-experiment[weblinx]",
53 | ]
54 | 
55 | [project.urls]
56 | homepage = "https://github.com/ServiceNow/BrowserGym"
57 | 
58 | [tool.hatch.version]
59 | path = "../core/src/browsergym/core/__init__.py"
60 | 
61 | [tool.hatch.metadata.hooks.requirements_txt]
62 | files = ["requirements.txt"]
63 | 
64 | [tool.hatch.build.targets.wheel]
65 | packages = ["src/browsergym", "src/bgym"]
66 | 


--------------------------------------------------------------------------------
/browsergym/visualwebarena/README.md:
--------------------------------------------------------------------------------
 1 | # VisualWebArena benchmark for BrowserGym
 2 | 
 3 | This package provides `browsergym.visualwebarena`, which is an unofficial port of the [VisualWebArena](https://jykoh.com/vwa) benchmark for BrowserGym.
 4 | 
 5 | Note: the original VisualWebArena codebase has been slightly adapted to ensure compatibility.
 6 | 
 7 | 
 8 | ## Server installation
 9 | 
10 | You have two options to setup your webarena instance:
11 |  - option 1: follow the official [visualwebarena README](https://github.com/web-arena-x/visualwebarena/blob/main/environment_docker/README.md)
12 |  - option 2: use our [unofficial setup scripts](https://github.com/gasse/webarena-setup/tree/main/visualwebarena)
13 | 
14 | We recommend **option 2** as it allows you to easily customize the ports of each webarena domain, and offers a reset functionality that allwos browsergym to trigger a full instance reset remotely.
15 | 
16 | ## Setup
17 | 
18 | 1. Install the package
19 | ```sh
20 | pip install browsergym-visualwebarena
21 | ```
22 | 
23 | 2. Download tokenizer resources
24 | ```sh
25 | python -c "import nltk; nltk.download('punkt_tab')"
26 | ```
27 | 
28 | 3. Setup the URLs as environment variables. The ports for each domain here should correspond to those you used when setting up your webarena instance. Note also the `VWA_` prefix which is specific to browsergym.
29 | ```sh
30 | BASE_URL=<YOUR_SERVER_URL_HERE>  # example: "http://myazuremachine.eastus.cloudapp.azure.com"
31 | 
32 | # visualwebarena environment variables (change ports as needed)
33 | export VWA_CLASSIFIEDS="$BASE_URL:8083"
34 | export VWA_CLASSIFIEDS_RESET_TOKEN="4b61655535e7ed388f0d40a93600254c"
35 | export VWA_SHOPPING="$BASE_URL:8082"
36 | export VWA_REDDIT="$BASE_URL:8080"
37 | export VWA_WIKIPEDIA="$BASE_URL:8081"
38 | export VWA_HOMEPAGE="$BASE_URL:80"
39 | 
40 | # if your webarena instance offers the FULL_RESET feature (optional)
41 | export VWA_FULL_RESET="$BASE_URL:7565"
42 | 
43 | # otherwise, be sure to NOT set VWA_FULL_RESET, or set it to an empty string
44 | export VWA_FULL_RESET=""
45 | ```
46 | 
47 | 4. Setup an OpenAI API key
48 | 
49 | ```sh
50 | export OPENAI_API_KEY=...
51 | ```
52 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | install:
 2 | 	@echo "--- 🚀 Installing project dependencies ---"
 3 | 	uv pip install -e ./browsergym/core -e ./browsergym/miniwob -e ./browsergym/webarena -e ./browsergym/webarenalite -e ./browsergym/visualwebarena/ -e ./browsergym/experiments -e ./browsergym/assistantbench -e ./browsergym/
 4 | 	uv run playwright install chromium
 5 | 
 6 | install-demo:
 7 | 	@echo "--- 🚀 Installing demo dependencies ---"
 8 | 	uv pip install -r demo_agent/requirements.txt
 9 | 	uv run playwright install chromium
10 | 
11 | demo:
12 | 	@echo "--- 🚀 Running demo agent ---"
13 | 	(set -x && cd demo_agent && python run_demo.py)
14 | 
15 | setup-miniwob:
16 | 	@echo "--- 🤖 Setting up MiniWoB++ ---"
17 | 	@if [ ! -d "miniwob-plusplus" ]; then \
18 | 		echo "Cloning MiniWoB++ repository..."; \
19 | 		git clone https://github.com/Farama-Foundation/miniwob-plusplus.git; \
20 | 	else \
21 | 		echo "MiniWoB++ repository already exists, skipping clone..."; \
22 | 	fi
23 | 	@echo "Resetting to specific commit for reproducibility..."
24 | 	git -C "./miniwob-plusplus" reset --hard 7fd85d71a4b60325c6585396ec4f48377d049838
25 | 	@echo "Adding MINIWOB_URL to .env file..."
26 | 	@echo "MINIWOB_URL=\"file://$(shell pwd)/miniwob-plusplus/miniwob/html/miniwob/\"" >> .env
27 | 	@echo "✅ MiniWoB++ setup complete!"
28 | 	@echo "💡 To use MiniWoB++, load the environment variables:"
29 | 	@echo "   source .env"
30 | 
31 | test-core:
32 | 	@echo "--- 🧪 Running tests ---"
33 | 	uv run pytest -n auto ./tests/core
34 | clean-miniwob:
35 | 	@echo "--- 🧹 Cleaning MiniWoB++ installation ---"
36 | 	rm -rf miniwob-plusplus
37 | 	@echo "✅ MiniWoB++ installation cleaned!"
38 | 
39 | help:
40 | 	@echo "Available targets:"
41 | 	@echo "  install          - Install project dependencies"
42 | 	@echo "  setup-miniwob    - Setup MiniWoB++ dependencies"
43 | 	@echo "  install-demo     - Install demo dependencies"
44 | 	@echo "  demo             - Run demo agent"
45 | 	@echo "  test-core        - Run core tests"
46 | 	@echo "  clean-miniwob    - Remove MiniWoB++ directory"
47 | 	@echo "  help             - Show this help message"
48 | 
49 | .PHONY: install setup-miniwob install-demo demo test-core clean-miniwob help
50 | 


--------------------------------------------------------------------------------
/tests/core/data/basic_shadow_dom_site/basic_shadow_dom.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |   <title>Unit Test with Complex Nested Shadow DOM</title>
 5 | </head>
 6 | <body>
 7 |   <div id="host-element-level-1-1"></div>
 8 |   <div id="host-element-level-1-2"></div>
 9 | 
10 |   <script>
11 |     // Level 1, Shadow DOM 1
12 |     const hostElement1_1 = document.getElementById("host-element-level-1-1");
13 |     const shadowRoot1_1 = hostElement1_1.attachShadow({ mode: 'open' });
14 |     shadowRoot1_1.innerHTML = `
15 |       <h1>Shadow DOM Level 1.1</h1>
16 |       <input type="text" name="level1_1-textfield1" placeholder="Level 1.1 Text Field 1">
17 |       <input type="text" name="level1_1-textfield2" placeholder="Level 1.1 Text Field 2">
18 |       <div id="host-element-level-2-1"></div>
19 |       <div id="host-element-level-2-2"></div>
20 |     `;
21 | 
22 |     // Level 2, Nested under Level 1.1
23 |     createNestedShadowDOM(shadowRoot1_1, "2-1", "2.1");
24 |     createNestedShadowDOM(shadowRoot1_1, "2-2", "2.2");
25 | 
26 |     // Level 1, Shadow DOM 2
27 |     const hostElement1_2 = document.getElementById("host-element-level-1-2");
28 |     const shadowRoot1_2 = hostElement1_2.attachShadow({ mode: 'open' });
29 |     shadowRoot1_2.innerHTML = `
30 |       <h1>Shadow DOM Level 1.2</h1>
31 |       <input type="text" name="level1_2-textfield1" placeholder="Level 1.2 Text Field 1">
32 |       <input type="text" name="level1_2-textfield2" placeholder="Level 1.2 Text Field 2">
33 |       <div id="host-element-level-2-3"></div>
34 |       <div id="host-element-level-2-4"></div>
35 |     `;
36 | 
37 |     // Level 2, Nested under Level 1.2
38 |     createNestedShadowDOM(shadowRoot1_2, "2-3", "2.3");
39 |     createNestedShadowDOM(shadowRoot1_2, "2-4", "2.4");
40 | 
41 |     function createNestedShadowDOM(parentShadowRoot, id, level) {
42 |       const hostElement = parentShadowRoot.querySelector(`#host-element-level-${id}`);
43 |       const shadowRoot = hostElement.attachShadow({ mode: 'open' });
44 |       shadowRoot.innerHTML = `
45 |         <h2>Shadow DOM Level ${level}</h2>
46 |         <input type="text" name="level${level}-textfield1" placeholder="Level ${level} Text Field 1">
47 |         <input type="text" name="level${level}-textfield2" placeholder="Level ${level} Text Field 2">
48 |       `;
49 |     }
50 |   </script>
51 | </body>
52 | </html>
53 | 


--------------------------------------------------------------------------------
/browsergym/webarena/README.md:
--------------------------------------------------------------------------------
 1 | # WebArena benchmark for BrowserGym
 2 | 
 3 | This package provides `browsergym.webarena`, which is an unofficial port of the [WebArena](https://webarena.dev/) benchmark for BrowserGym.
 4 | 
 5 | Note: the original WebArena codebase has been slightly adapted to ensure compatibility.
 6 | 
 7 | ## Server installation
 8 | 
 9 | You have two options to setup your webarena instance:
10 |  - option 1: follow the official [webarena README](https://github.com/web-arena-x/webarena/blob/main/environment_docker/README.md)
11 |  - option 2: use our [unofficial setup scripts](https://github.com/gasse/webarena-setup/tree/main/webarena)
12 | 
13 | We recommend **option 2** as it allows you to easily customize the ports of each webarena domain, and offers a reset functionality that allows browsergym to trigger a full instance reset remotely.
14 | 
15 | ## Setup
16 | 
17 | 1. Install the package
18 | ```sh
19 | pip install browsergym-webarena
20 | ```
21 | 
22 | 2. Download tokenizer resources
23 | ```sh
24 | python -c "import nltk; nltk.download('punkt_tab')"
25 | ```
26 | 
27 | 3. Setup the URLs as environment variables. The ports for each domain here should correspond to those you used when setting up your webarena instance. Note also the `WA_` prefix which is specific to browsergym.
28 | ```sh
29 | BASE_URL=<YOUR_SERVER_URL_HERE>  # example: "http://myazuremachine.eastus.cloudapp.azure.com"
30 | 
31 | # webarena environment variables (change ports as needed)
32 | export WA_SHOPPING="$BASE_URL:8082/"
33 | export WA_SHOPPING_ADMIN="$BASE_URL:8083/admin"
34 | export WA_REDDIT="$BASE_URL:8080"
35 | export WA_GITLAB="$BASE_URL:9001"
36 | export WA_WIKIPEDIA="$BASE_URL:8081/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing"
37 | export WA_MAP="$BASE_URL:443"
38 | export WA_HOMEPAGE="$BASE_URL:80"
39 | 
40 | # if your webarena instance offers the FULL_RESET feature (optional)
41 | export WA_FULL_RESET="$BASE_URL:7565"
42 | 
43 | # otherwise, be sure to NOT set WA_FULL_RESET, or set it to an empty string
44 | export WA_FULL_RESET=""
45 | ```
46 | 
47 | 4. Setup an OpenAI API key
48 | 
49 | ```sh
50 | export OPENAI_API_KEY=...
51 | ```
52 | 
53 | > **_NOTE:_**  be mindful of costs, as WebArena will call GPT4 for certain evaluations ([llm_fuzzy_match](https://github.com/web-arena-x/webarena/blob/1469b7c9d8eaec3177855b3131569751f43a40d6/evaluation_harness/helper_functions.py#L146C5-L146C20)).
54 | 


--------------------------------------------------------------------------------
/tests/miniwob/test_click-menu-2.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import gymnasium as gym
 3 | import re
 4 | import pytest
 5 | 
 6 | # register gym environments
 7 | import browsergym.miniwob
 8 | 
 9 | __SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
10 | __HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
11 | 
12 | 
13 | @pytest.mark.parametrize("seed", range(5))
14 | def test_cheat(seed):
15 |     env = gym.make(
16 |         "browsergym/miniwob.click-menu-2",
17 |         headless=__HEADLESS,
18 |         slow_mo=__SLOW_MO,
19 |         action_mapping=None,
20 |     )
21 |     obs, info = env.reset(seed=seed)
22 | 
23 |     assert obs["last_action_error"] == ""
24 | 
25 |     match1 = re.match(
26 |         'Click the "Menu" button, and then find and click on the item labeled "(.+)".', obs["goal"]
27 |     )
28 |     match2 = re.match(
29 |         'Click the "Menu" button, and then find and click on the item with the "(.+)" icon.',
30 |         obs["goal"],
31 |     )
32 | 
33 |     assert match1 or match2
34 | 
35 |     if match1:
36 |         item_label = match1.groups()[0]
37 |         item_classname = {
38 |             "Save": "ui-icon-disk",
39 |             "Prev": "ui-icon-seek-start",
40 |             "Stop": "ui-icon-stop",
41 |             "Play": "ui-icon-play",
42 |             "Next": "ui-icon-seek-end",
43 |             "Zoom In": "ui-icon-zoomin",
44 |             "Zoom Out": "ui-icon-zoomout",
45 |         }[item_label]
46 |     else:
47 |         item_classname = match2.groups()[0]
48 | 
49 |     action = f"""\
50 | page.get_by_text("Menu").click()
51 | """
52 | 
53 |     obs, reward, term, trunc, info = env.step(action)
54 | 
55 |     assert obs["last_action_error"] == ""
56 |     assert reward == 0
57 |     assert term == False
58 | 
59 |     if item_classname in ("ui-icon-seek-start", "ui-icon-stop", "ui-icon-play", "ui-icon-seek-end"):
60 | 
61 |         action = f"""\
62 | page.get_by_text("Playback").click()
63 | """
64 | 
65 |         obs, reward, term, trunc, info = env.step(action)
66 | 
67 |         assert obs["last_action_error"] == ""
68 |         assert reward == 0
69 |         assert term == False
70 | 
71 |     action = f"""\
72 | page.locator(".{item_classname}").click()
73 | """
74 | 
75 |     obs, reward, term, trunc, info = env.step(action)
76 | 
77 |     assert obs["last_action_error"] == ""
78 |     assert reward == 1
79 |     assert term == True
80 | 
81 |     env.close()
82 | 


--------------------------------------------------------------------------------
/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/evaluate_dicts.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List
 2 | 
 3 | import numpy as np
 4 | 
 5 | from .utils import _align_bags
 6 | 
 7 | 
 8 | def calculate_f1_score(precision, recall):
 9 |     if precision + recall == 0:
10 |         return 0  # Handle the case to avoid division by zero
11 |     return 2 * (precision * recall) / (precision + recall)
12 | 
13 | 
14 | def calc_recall(pred: Dict, gold: Dict, use_gold_for_eval: bool):
15 |     from .evaluate_factory import get_evaluator_from_gold_answer
16 | 
17 |     recall = []
18 |     for gold_key, gold_value in gold.items():
19 |         pred_value = pred.get(gold_key)
20 |         gold_value = fix_number(gold_value)
21 |         pred_value = fix_number(pred_value)
22 |         if gold_key not in pred:
23 |             recall.append(0)
24 |         else:
25 |             evaluator = (
26 |                 get_evaluator_from_gold_answer(type(gold_value))
27 |                 if use_gold_for_eval
28 |                 else get_evaluator_from_gold_answer(type(pred_value))
29 |             )
30 |             if type(pred_value) != type(gold_value):
31 |                 recall.append(0)
32 |                 continue
33 |             recall.append(evaluator(pred_value, gold_value))
34 |     avg_recall = np.average(recall)
35 |     return avg_recall
36 | 
37 | 
38 | def fix_number(number):
39 | 
40 |     if type(number) == str:
41 |         copy_ans = number
42 |         copy_ans = " ".join(
43 |             " ".join(" ".join(copy_ans.split("$")).split("%")).split("sqft")
44 |         ).strip()
45 |         copy_ans = copy_ans.strip()
46 |         copy_ans = copy_ans.replace(",", ".")
47 |         try:
48 |             return float(copy_ans)
49 |         except:
50 |             return number
51 |     elif type(number) == int:
52 |         return float(number)
53 |     else:
54 |         return number
55 | 
56 | 
57 | def evaluate_pair_of_dicts(pred: Dict, gold: Dict):
58 |     recall = calc_recall(pred, gold, True)
59 |     precision = calc_recall(gold, pred, False)
60 |     f1 = calculate_f1_score(precision, recall)
61 |     return f1
62 | 
63 | 
64 | def evaluate_dicts(pred: List[Dict], gold: List[Dict]):
65 |     if not (type(pred) == dict or len(pred) == 0 or (type(pred) == list and type(pred[0]) == dict)):
66 |         return 0
67 |     max_alignment_scores = _align_bags(pred, gold, evaluate_pair_of_dicts)
68 |     return np.average(max_alignment_scores)
69 | 


--------------------------------------------------------------------------------
/browsergym/webarenalite/src/browsergym/webarenalite/config.py:
--------------------------------------------------------------------------------
  1 | TASK_IDS = [
  2 |     4,
  3 |     7,
  4 |     15,
  5 |     20,
  6 |     23,
  7 |     27,
  8 |     33,
  9 |     37,
 10 |     43,
 11 |     44,
 12 |     48,
 13 |     56,
 14 |     58,
 15 |     65,
 16 |     69,
 17 |     71,
 18 |     75,
 19 |     77,
 20 |     82,
 21 |     88,
 22 |     93,
 23 |     95,
 24 |     96,
 25 |     97,
 26 |     98,
 27 |     103,
 28 |     109,
 29 |     115,
 30 |     117,
 31 |     118,
 32 |     123,
 33 |     125,
 34 |     127,
 35 |     131,
 36 |     135,
 37 |     139,
 38 |     144,
 39 |     149,
 40 |     155,
 41 |     156,
 42 |     157,
 43 |     162,
 44 |     167,
 45 |     169,
 46 |     173,
 47 |     182,
 48 |     190,
 49 |     196,
 50 |     202,
 51 |     205,
 52 |     211,
 53 |     215,
 54 |     220,
 55 |     221,
 56 |     225,
 57 |     227,
 58 |     235,
 59 |     236,
 60 |     240,
 61 |     247,
 62 |     250,
 63 |     254,
 64 |     258,
 65 |     259,
 66 |     268,
 67 |     270,
 68 |     276,
 69 |     283,
 70 |     285,
 71 |     287,
 72 |     288,
 73 |     296,
 74 |     300,
 75 |     311,
 76 |     313,
 77 |     318,
 78 |     321,
 79 |     324,
 80 |     333,
 81 |     335,
 82 |     348,
 83 |     349,
 84 |     354,
 85 |     357,
 86 |     361,
 87 |     367,
 88 |     368,
 89 |     369,
 90 |     374,
 91 |     376,
 92 |     381,
 93 |     382,
 94 |     383,
 95 |     384,
 96 |     386,
 97 |     387,
 98 |     392,
 99 |     401,
100 |     404,
101 |     415,
102 |     419,
103 |     423,
104 |     426,
105 |     431,
106 |     440,
107 |     448,
108 |     454,
109 |     458,
110 |     464,
111 |     466,
112 |     470,
113 |     476,
114 |     485,
115 |     488,
116 |     491,
117 |     497,
118 |     505,
119 |     506,
120 |     509,
121 |     514,
122 |     516,
123 |     521,
124 |     524,
125 |     528,
126 |     534,
127 |     538,
128 |     548,
129 |     566,
130 |     567,
131 |     574,
132 |     577,
133 |     582,
134 |     599,
135 |     601,
136 |     605,
137 |     612,
138 |     619,
139 |     626,
140 |     631,
141 |     641,
142 |     645,
143 |     652,
144 |     657,
145 |     668,
146 |     673,
147 |     678,
148 |     682,
149 |     686,
150 |     693,
151 |     704,
152 |     710,
153 |     714,
154 |     720,
155 |     729,
156 |     733,
157 |     741,
158 |     745,
159 |     748,
160 |     760,
161 |     762,
162 |     768,
163 |     791,
164 |     798,
165 |     809,
166 |     811,
167 | ]
168 | 


--------------------------------------------------------------------------------
/tests/visualwebarena/test_vwa_tasks_without_reset.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import random
 4 | 
 5 | import gymnasium as gym
 6 | import playwright.sync_api
 7 | import pytest
 8 | from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed
 9 | 
10 | # register gym environments
11 | import browsergym.visualwebarena
12 | 
13 | __SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
14 | __HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
15 | 
16 | 
17 | from browsergym.visualwebarena import VISUALWEBARENA_TASK_IDS_WITHOUT_RESET
18 | 
19 | rng = random.Random(1)
20 | task_ids = rng.sample(VISUALWEBARENA_TASK_IDS_WITHOUT_RESET, 25)
21 | 
22 | 
23 | @retry(
24 |     stop=stop_after_attempt(5),
25 |     retry=retry_if_exception_type(playwright.sync_api.TimeoutError),
26 |     wait=wait_fixed(2),
27 |     reraise=True,
28 |     before_sleep=lambda _: logging.info("Retrying due to a TimeoutError..."),
29 | )
30 | @pytest.mark.parametrize("task_id", task_ids)
31 | @pytest.mark.slow
32 | def test_env_generic(task_id):
33 |     env = gym.make(
34 |         f"browsergym/{task_id}",
35 |         headless=__HEADLESS,
36 |         slow_mo=__SLOW_MO,
37 |     )
38 |     obs, info = env.reset()
39 |     env.close()
40 | 
41 | 
42 | @retry(
43 |     stop=stop_after_attempt(5),
44 |     retry=retry_if_exception_type(playwright.sync_api.TimeoutError),
45 |     wait=wait_fixed(2),
46 |     reraise=True,
47 |     before_sleep=lambda _: logging.info("Retrying due to a TimeoutError..."),
48 | )
49 | def test_domain_safeguard():
50 |     env = gym.make(
51 |         f"browsergym/visualwebarena.398",
52 |         headless=__HEADLESS,
53 |         slow_mo=__SLOW_MO,
54 |     )
55 |     obs, info = env.reset()
56 |     assert not obs["last_action_error"]
57 | 
58 |     obs, reward, terminated, truncated, info = env.step("new_tab()")
59 |     assert not obs["last_action_error"]
60 |     assert not (terminated or truncated)
61 | 
62 |     obs, reward, terminated, truncated, info = env.step("tab_close()")
63 |     assert not obs["last_action_error"]
64 |     assert not (terminated or truncated)
65 | 
66 |     obs, reward, terminated, truncated, info = env.step("tab_focus(0)")
67 |     assert not obs["last_action_error"]
68 |     assert not (terminated or truncated)
69 | 
70 |     obs, reward, terminated, truncated, info = env.step('goto("http://www.google.com")')
71 |     assert not obs["last_action_error"]
72 |     assert terminated
73 | 
74 |     env.close()
75 | 


--------------------------------------------------------------------------------
/browsergym/core/src/browsergym/core/action/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Any
 3 | 
 4 | import playwright.sync_api
 5 | 
 6 | from . import get_global_demo_mode
 7 | 
 8 | 
 9 | class AbstractActionSet(ABC):
10 |     def __init__(self, strict: bool = False):
11 |         self.strict = strict
12 | 
13 |     @abstractmethod
14 |     def describe(self, with_long_description: bool = True, with_examples: bool = True) -> str:
15 |         """
16 |         Returns a textual description of this action space.
17 |         """
18 | 
19 |     @abstractmethod
20 |     def example_action(self, abstract: bool) -> str:
21 |         """
22 |         Returns an example action as a string.
23 |         """
24 | 
25 |     @abstractmethod
26 |     def to_python_code(self, action) -> str:
27 |         """
28 |         Converts the given action to browsergym-compatible python code.
29 | 
30 |         Args:
31 |             action: the action to convert.
32 | 
33 |         Returns:
34 |             Executable python code that performs the action in a browsergym environment.
35 |         """
36 | 
37 |     def to_tool_descriptor(self) -> list[Any]:
38 |         """
39 |         Converts the action set to a tool descriptor.
40 | 
41 |         Returns:
42 |             A list of dictionaries describing the actions in the action set.
43 |         """
44 |         pass
45 | 
46 | 
47 | def execute_python_code(
48 |     code: str,
49 |     page: playwright.sync_api.Page,
50 |     send_message_to_user: callable,
51 |     report_infeasible_instructions: callable,
52 | ):
53 |     """
54 |     Executes Python code in a new context, except for a playwright `page` object and a `send_message_to_user` function.
55 | 
56 |     WARNING: this is not safe!
57 |     https://stackoverflow.com/questions/77655440/can-you-protect-a-python-variable-with-exec
58 | 
59 |     Args:
60 |         code: the Python code to execute, as a string.
61 |         page: the playwright page that will be made accessible to the code.
62 |         send_message_to_user: utility function that will be made accessible to the code. It should take one text argument.
63 |         report_infeasible_instructions: utility function that will be made accessible to the code. It should take one text argument.
64 |     """
65 | 
66 |     globals = {
67 |         "page": page,
68 |         "send_message_to_user": send_message_to_user,
69 |         "report_infeasible_instructions": report_infeasible_instructions,
70 |         "DEMO_MODE": get_global_demo_mode(),
71 |     }
72 | 
73 |     exec(code, globals)
74 | 


--------------------------------------------------------------------------------
/tests/experiments/test_exp_loop.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import tempfile
 3 | import logging
 4 | import dataclasses
 5 | 
 6 | from browsergym.core.action.highlevel import HighLevelActionSet
 7 | from browsergym.experiments.agent import Agent
 8 | from browsergym.experiments.loop import AbstractAgentArgs, EnvArgs, ExpArgs, get_exp_result
 9 | from browsergym.utils.obs import flatten_axtree_to_str
10 | 
11 | 
12 | class MiniwobTestAgent(Agent):
13 | 
14 |     action_set = HighLevelActionSet(subsets="bid")
15 | 
16 |     def obs_preprocessor(self, obs: dict):
17 |         return {"axtree_txt": flatten_axtree_to_str(obs["axtree_object"])}
18 | 
19 |     def get_action(self, obs: dict) -> tuple[str, dict]:
20 |         match = re.search(r"^\s*\[(\d+)\].*button", obs["axtree_txt"], re.MULTILINE | re.IGNORECASE)
21 | 
22 |         if match:
23 |             bid = match.group(1)
24 |             action = f'click("{bid}")'
25 |         else:
26 |             raise Exception("Can't find the button's bid")
27 | 
28 |         return action, dict(think="I'm clicking the button as requested.")
29 | 
30 | 
31 | @dataclasses.dataclass
32 | class MiniwobTestAgentArgs(AbstractAgentArgs):
33 |     def make_agent(self):
34 |         return MiniwobTestAgent()
35 | 
36 | 
37 | def test_run_exp():
38 |     exp_args = ExpArgs(
39 |         agent_args=MiniwobTestAgentArgs(),
40 |         env_args=EnvArgs(task_name="miniwob.click-test", task_seed=42),
41 |     )
42 | 
43 |     with tempfile.TemporaryDirectory() as tmp_dir:
44 |         exp_args.prepare(tmp_dir)
45 |         exp_args.run()
46 |         exp_result = get_exp_result(exp_args.exp_dir)
47 |         exp_record = exp_result.get_exp_record()
48 | 
49 |         target = {
50 |             "env_args.task_name": "miniwob.click-test",
51 |             "env_args.task_seed": 42,
52 |             "env_args.headless": True,
53 |             "env_args.record_video": False,
54 |             "n_steps": 1,
55 |             "cum_reward": 1.0,
56 |             "terminated": True,
57 |             "truncated": False,
58 |         }
59 | 
60 |         assert len(exp_result.steps_info) == 2
61 | 
62 |         for key, target_val in target.items():
63 |             assert key in exp_record
64 |             assert exp_record[key] == target_val
65 | 
66 |         # TODO investigate why it's taking almost 5 seconds to solve
67 |         assert exp_record["stats.cum_step_elapsed"] < 5
68 |         if exp_record["stats.cum_step_elapsed"] > 3:
69 |             t = exp_record["stats.cum_step_elapsed"]
70 |             logging.warning(
71 |                 f"miniwob.click-test is taking {t:.2f}s (> 3s) to solve with an oracle."
72 |             )
73 | 


--------------------------------------------------------------------------------
/docs/src/environments/workarena.rst:
--------------------------------------------------------------------------------
 1 | WorkArena
 2 | ^^^^^^^^^
 3 | 
 4 | `BrowserGym` integrates `WebArena` enviroment. For more information about this enviroment, please refer to the `WorkArena <https://github.com/ServiceNow/WorkArena>`_ official documentation.
 5 | 
 6 | 
 7 | BrowserGym API
 8 | """"""""""""""
 9 | 
10 | .. currentmodule:: browsergym
11 | 
12 | .. autosummary::
13 |    :recursive:
14 |    :toctree: generated
15 |    :caption: WorkArena
16 | 
17 |    workarena
18 | 
19 | 
20 | Usage
21 | """""
22 | 
23 | **Create a ServiceNow Developer Instance**
24 | 
25 | * Go to https://developer.servicenow.com/ and create an account.
26 | 
27 | * Click on Request an instance and select the Washington release (initializing the instance will take a few minutes)
28 | 
29 | * Once the instance is ready, you should see your instance URL and credentials. If not, click Return to the Developer Portal, then navigate to Manage instance password and click Reset instance password.
30 | 
31 | * You should now see your URL and credentials. Based on this information, set the following environment variables:
32 | 
33 |    * SNOW_INSTANCE_URL: The URL of your ServiceNow developer instance
34 | 
35 |    * SNOW_INSTANCE_UNAME: The username, should be "admin"
36 | 
37 |    * SNOW_INSTANCE_PWD: The password, make sure you place the value in quotes "" and be mindful of escaping special shell characters. Running echo $SNOW_INSTANCE_PWD should print the correct password.
38 | 
39 | * Log into your instance via a browser using the admin credentials. Close any popup that appears on the main screen (e.g., agreeing to analytics).
40 | 
41 | 
42 | **Install WorkArena and Initialize your Instance**
43 | 
44 | Run the following command to install WorkArena in the BrowswerGym environment:
45 | 
46 | .. code:: bash
47 | 
48 |    pip install browsergym-workarena
49 | 
50 | 
51 | Then, run this command in a terminal to upload the benchmark data to your ServiceNow instance:
52 | 
53 | .. code:: bash
54 | 
55 |    workarena-install
56 | 
57 | 
58 | Finally, install Playwright:
59 | 
60 | .. code:: bash
61 | 
62 |    playwright install chromium
63 | 
64 | 
65 | Your installation is now complete! 🎉
66 | 
67 | **Run a task from the benchmark suite**
68 | 
69 | .. code-block:: python
70 | 
71 |     import gym
72 |     import browsergym.workarena
73 | 
74 |     env = gym.make("browsergym/workarena.servicenow.filter-asset-list")
75 |     obs, info = env.reset()
76 |     done = False
77 | 
78 |     while not done:
79 |         action = "noop()"
80 |         obs, reward, terminated, truncated, info = env.step(action)
81 |         print(f"Reward: {reward}, Done: {done}, Info: {info}")
82 | 
83 |     env.close()
84 | 


--------------------------------------------------------------------------------
/tests/assistantbench/test_evaluation.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import pathlib
 3 | 
 4 | import gymnasium as gym
 5 | import pytest
 6 | 
 7 | from browsergym.assistantbench.evaluation.evaluator import question_scorer
 8 | from browsergym.experiments.benchmark.metadata.utils import (
 9 |     task_list_from_metadata,
10 |     task_metadata,
11 | )
12 | 
13 | __DATA_DIR = pathlib.Path(__file__).resolve().parent / "data"
14 | 
15 | metadata = task_metadata("assistantbench")
16 | file_path = pathlib.Path(__DATA_DIR) / "fallback_gpt4_seeplanact_predictions.jsonl"
17 | 
18 | data_points = {}
19 | 
20 | # Open the JSONL file and read each line as a JSON object
21 | with open(file_path, "r") as f:
22 |     for line in f:
23 |         data_point = json.loads(line)
24 | 
25 |         original_id = data_point["id"]
26 |         answer = data_point["answer"]
27 |         gold_answer = data_point["gold_answer"]
28 |         score = data_point["score"]
29 |         has_ans = data_point["has_ans"]
30 | 
31 |         data_points[original_id] = {
32 |             "task_id": task_list_from_metadata(metadata, {"original_id": original_id})[0],
33 |             "answer": answer,
34 |             "gold_answer": gold_answer,
35 |             "score": score,
36 |             "has_ans": has_ans,
37 |         }
38 | 
39 | 
40 | @pytest.mark.parametrize("original_id", list(data_points.keys()))
41 | def test_evaluate(original_id: str):
42 | 
43 |     answer = data_points[original_id]["answer"]
44 |     gold_answer = data_points[original_id]["gold_answer"]
45 |     expected_score = data_points[original_id]["score"]
46 |     expected_has_ans = data_points[original_id]["has_ans"]
47 | 
48 |     score, has_ans = question_scorer(answer, gold_answer)
49 | 
50 |     # Assert if the expected results doesn't match
51 |     assert score == expected_score
52 |     assert has_ans == expected_has_ans
53 | 
54 | 
55 | @pytest.mark.parametrize(
56 |     "original_id",
57 |     [id for id in data_points.keys() if isinstance(data_points[id]["answer"], (str, float, int))],
58 | )
59 | @pytest.mark.slow
60 | def test_evaluate_within_env(original_id: str):
61 | 
62 |     task_id = data_points[original_id]["task_id"]
63 |     answer = data_points[original_id]["answer"]
64 |     expected_score = data_points[original_id]["score"]
65 | 
66 |     env = gym.make(
67 |         f"browsergym/{task_id}",
68 |     )
69 |     obs, info = env.reset()
70 |     assert not obs["last_action_error"]
71 | 
72 |     obs, reward, terminated, truncated, info = env.step(f"send_msg_to_user({repr(str(answer))})")
73 |     assert not obs["last_action_error"]
74 |     assert terminated
75 |     assert reward == expected_score
76 | 
77 |     env.close()
78 | 


--------------------------------------------------------------------------------
/browsergym/assistantbench/src/browsergym/assistantbench/utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import logging
 3 | import os
 4 | import pathlib
 5 | import time
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | def add_prediction_to_jsonl(
11 |     file_path: str, task_id: str, prediction: object, override_if_exists: bool
12 | ) -> None:
13 |     """
14 |     Multiprocessing-safe file write.
15 |     """
16 |     lock_file_path = pathlib.Path(file_path).with_suffix(".lock")
17 |     lock_max_wait = 10  # 10 seconds
18 | 
19 |     # Acquire lock (atomic file creation)
20 |     start_time = time.time()
21 |     while True:
22 |         try:
23 |             fd = os.open(lock_file_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
24 |             with os.fdopen(fd, "w") as f:
25 |                 f.write("lock")
26 |             break
27 |         except FileExistsError:
28 |             # give up if max wait time reached
29 |             seconds_waited = time.time() - start_time
30 |             if seconds_waited >= lock_max_wait:
31 |                 raise RuntimeError(
32 |                     f"Lock file could not be acquired after {seconds_waited} seconds ({lock_file_path})"
33 |                 )
34 |             # wait for lock release
35 |             logger.info(f"Waiting for lock file to be released: {lock_file_path}")
36 |             time.sleep(1)  # 1 sec
37 | 
38 |     logger.info(f"Lock file acquired: {lock_file_path}")
39 | 
40 |     # Check if the file exists, if not, create it
41 |     if not os.path.exists(file_path):
42 |         with open(file_path, "w") as f:
43 |             pass  # Create an empty file
44 | 
45 |     # Load existing data, if any
46 |     data = []
47 |     if os.path.exists(file_path):
48 |         with open(file_path, "r") as f:
49 |             data.extend([json.loads(line) for line in f if line.strip()])  # Skip empty lines
50 | 
51 |     # Check if task_id already exists
52 |     existing_record = next((entry for entry in data if entry["id"] == task_id), None)
53 | 
54 |     # Add or update the record
55 |     if not existing_record:
56 |         # Add new record
57 |         data.append({"id": task_id, "answer": prediction})
58 |     elif override_if_exists:
59 |         # Update existing record
60 |         existing_record["answer"] = prediction
61 |     else:
62 |         raise ValueError(
63 |             f"Prediction for task ID {repr(task_id)} already exists in file {file_path}."
64 |         )
65 | 
66 |     # Write data back to the file
67 |     with open(file_path, "w") as f:
68 |         for entry in data:
69 |             f.write(json.dumps(entry) + "\n")
70 | 
71 |     # Release lock (remove file)
72 |     os.remove(lock_file_path)
73 |     logger.info(f"Lock file released: {lock_file_path}")
74 | 


--------------------------------------------------------------------------------
/tests/core/test_registration.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | import gymnasium as gym
 4 | import pytest
 5 | 
 6 | from browsergym.core.registration import register_task
 7 | from browsergym.core.task import AbstractBrowserTask
 8 | 
 9 | 
10 | class RegistrationTestTask(AbstractBrowserTask):
11 |     @classmethod
12 |     def get_task_id(cls):
13 |         raise NotImplementedError
14 | 
15 |     def __init__(self, a: str = "", b: int = 0, c: bool = False, *args, **kwargs):
16 |         super().__init__(*args, **kwargs)
17 |         self.a = a
18 |         self.b = b
19 |         self.c = c
20 | 
21 |     def setup(self, page):
22 |         return "", {}
23 | 
24 |     def teardown(self):
25 |         pass
26 | 
27 |     def validate(self, page, chat_messages):
28 |         return 0, True, "", {}
29 | 
30 | 
31 | register_task("test_task", RegistrationTestTask)
32 | register_task(
33 |     "test_task_with_defaults",
34 |     RegistrationTestTask,
35 |     task_kwargs={"a": "new value"},
36 |     default_task_kwargs={"b": 1},
37 | )
38 | 
39 | 
40 | def test_registration():
41 | 
42 |     with pytest.raises(ValueError):
43 |         register_task(
44 |             "test_task_forbidden",
45 |             RegistrationTestTask,
46 |             task_kwargs={"a": "new value"},
47 |             default_task_kwargs={"a": "other value"},
48 |         )
49 | 
50 |     env = gym.make("browsergym/test_task")
51 | 
52 |     assert env.unwrapped.task_kwargs == {}
53 | 
54 |     env.reset()
55 |     env.unwrapped.task.a == ""
56 |     env.unwrapped.task.b == 0
57 |     env.unwrapped.task.c == False
58 |     env.close()
59 | 
60 |     env = gym.make("browsergym/test_task", task_kwargs={"a": "other", "b": 1})
61 | 
62 |     assert env.unwrapped.task_kwargs == {"a": "other", "b": 1}
63 | 
64 |     env.reset()
65 |     env.unwrapped.task.a == "other"
66 |     env.unwrapped.task.b == 1
67 |     env.unwrapped.task.c == False
68 |     env.close()
69 | 
70 |     env = gym.make("browsergym/test_task_with_defaults")
71 | 
72 |     assert env.unwrapped.task_kwargs == {}
73 | 
74 |     env.reset()
75 |     env.unwrapped.task.a == "new value"
76 |     env.unwrapped.task.b == 1
77 |     env.unwrapped.task.c == False
78 |     env.close()
79 | 
80 |     env = gym.make("browsergym/test_task_with_defaults", task_kwargs={"b": 2})
81 | 
82 |     assert env.unwrapped.task_kwargs == {"b": 2}
83 | 
84 |     env.reset()
85 |     env.unwrapped.task.a == "new value"
86 |     env.unwrapped.task.b == 2
87 |     env.unwrapped.task.c == False
88 |     env.close()
89 | 
90 |     env = gym.make("browsergym/test_task_with_defaults", task_kwargs={"a": "other"})
91 | 
92 |     assert env.unwrapped.task_kwargs == {"a": "other"}
93 | 
94 |     with pytest.raises(
95 |         expected_exception=ValueError,
96 |         match=re.compile("Illegal attempt to override frozen parameters"),
97 |     ):
98 |         env.reset()
99 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | .DS_store
  2 | .idea/
  3 | docs/src/generated/
  4 | 
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | pip-wheel-metadata/
 28 | share/python-wheels/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | MANIFEST
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .nox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | *.py,cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 99 | __pypackages__/
100 | 
101 | # Celery stuff
102 | celerybeat-schedule
103 | celerybeat.pid
104 | 
105 | # SageMath parsed files
106 | *.sage.py
107 | 
108 | # Environments
109 | .env
110 | .venv
111 | env/
112 | venv/
113 | ENV/
114 | env.bak/
115 | venv.bak/
116 | 
117 | # Spyder project settings
118 | .spyderproject
119 | .spyproject
120 | 
121 | # Rope project settings
122 | .ropeproject
123 | 
124 | # mkdocs documentation
125 | /site
126 | 
127 | # mypy
128 | .mypy_cache/
129 | .dmypy.json
130 | dmypy.json
131 | 
132 | # Pyre type checker
133 | .pyre/
134 | 
135 | # error logs
136 | error_logs.txt
137 | 
138 | # tests
139 | tests/results
140 | tmp.py
141 | .vscode/**
142 | 
143 | # demo and results
144 | results/
145 | 
146 | .vscode/launch.json
147 | 
148 | # assistantbench
149 | tests/assistantbench/assistantbench-predictions-test.jsonl
150 | 
151 | # weblinx
152 | bg_wl_data/
153 | 
154 | # miniwob setup
155 | miniwob-plusplus/
156 | 
157 | uv.lock
158 | 


--------------------------------------------------------------------------------
/browsergym/core/src/browsergym/core/registration.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | from typing import Type
 3 | 
 4 | import gymnasium as gym
 5 | 
 6 | from .env import BrowserEnv
 7 | from .task import AbstractBrowserTask
 8 | 
 9 | 
10 | class frozen_partial:
11 |     """
12 |     Freeze some keyword arguments of a function.
13 | 
14 |     """
15 | 
16 |     def __init__(self, func, **frozen_kwargs):
17 |         self.func = func
18 |         self.frozen_kwargs = frozen_kwargs
19 | 
20 |     def __call__(self, *args, **kwargs):
21 |         # check overlap between kwargs and frozen_kwargs
22 |         clashing_kwargs = set(self.frozen_kwargs) & set(kwargs)  # key set intersection
23 |         if clashing_kwargs:
24 |             raise ValueError(f"Illegal attempt to override frozen parameters {clashing_kwargs}.")
25 |         # merge the two dicts
26 |         kwargs = kwargs | self.frozen_kwargs
27 | 
28 |         return self.func(*args, **kwargs)
29 | 
30 | 
31 | def register_task(
32 |     id: str,
33 |     task_class: Type[AbstractBrowserTask],
34 |     task_kwargs: dict = {},
35 |     default_task_kwargs: dict = {},
36 |     nondeterministic: bool = True,
37 |     *args,
38 |     **kwargs,
39 | ):
40 |     """
41 |     Registers a browser task as a gym environment with its unique id.
42 | 
43 |     Args:
44 |         id: the id of the task to register (will be prepended by "browsergym/").
45 |         task_class: the task class to register.
46 |         task_kwargs: frozen task arguments (can not be overloaded at environment creation time).
47 |         task_kwargs_default: default task arguments (can be overloaded at environment creation time).
48 |         nondeterministic: whether the task cannot be guaranteed deterministic transitions.
49 |         *args: additional sequential arguments for either the gym or the browsergym environment.
50 |         *kwargs: additional keyword arguments for either the gym or the browsergym environment.
51 |     """
52 |     if task_kwargs and default_task_kwargs:
53 |         # check overlap between frozen and default task_kwargs
54 |         clashing_kwargs = set(task_kwargs) & set(default_task_kwargs)  # key set intersection
55 |         if clashing_kwargs:
56 |             raise ValueError(
57 |                 f"Illegal attempt to register Browsergym environment {id} with both frozen and default values for task parameters {clashing_kwargs}."
58 |             )
59 | 
60 |     task_entrypoint = task_class
61 | 
62 |     # freeze task_kwargs (cannot be overriden at environment creation)
63 |     task_entrypoint = frozen_partial(task_class, **task_kwargs)
64 | 
65 |     # pre-set default_task_kwargs (can be overriden at environment creation)
66 |     task_entrypoint = partial(task_entrypoint, **default_task_kwargs)
67 | 
68 |     gym.register(
69 |         id=f"browsergym/{id}",
70 |         entry_point=lambda *env_args, **env_kwargs: BrowserEnv(
71 |             task_entrypoint, *env_args, **env_kwargs
72 |         ),
73 |         nondeterministic=nondeterministic,
74 |         *args,
75 |         **kwargs,
76 |     )
77 | 


--------------------------------------------------------------------------------
/tests/core/data/obstructed_checkbox_page.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 6 |     <title>Checkbox with Label Interception</title>
 7 |     <style>
 8 |         .form-wrapper {
 9 |             padding: 20px;
10 |             border: 1px solid #ccc;
11 |             margin: 20px;
12 |             font-family: Arial, sans-serif;
13 |         }
14 | 
15 |         .row {
16 |             display: flex;
17 |             flex-wrap: wrap;
18 |             margin-bottom: 10px;
19 |         }
20 | 
21 |         .col-md-3, .col-md-9, .col-sm-12 {
22 |             flex: 1;
23 |             min-width: 100px;
24 |         }
25 | 
26 |         .custom-control {
27 |             position: relative;
28 |             display: flex;
29 |             align-items: center;
30 |         }
31 | 
32 |         .custom-checkbox-inline {
33 |             margin-right: 20px;
34 |         }
35 | 
36 |         .custom-control-input {
37 |             margin-right: 10px;
38 |         }
39 | 
40 |         .custom-control-label {
41 |             font-weight: bold;
42 |             position: absolute; /* Position the label over the checkbox */
43 |             top: 0;
44 |             left: 0;
45 |             width: 100%; /* Make the label cover the checkbox completely */
46 |             height: 100%;
47 |             z-index: 2; /* Ensure the label is above the checkbox */
48 |             background-color: rgba(255, 255, 255, 0.5); /* Optional: make the label semi-transparent */
49 |         }
50 |     </style>
51 | </head>
52 | <body>
53 | 
54 |     <form novalidate id="userForm" class="form-wrapper">
55 |         <div id="userName-wrapper" class="row">
56 |             <!-- User Name Input -->
57 |         </div>
58 |         <div id="userEmail-wrapper" class="row">
59 |             <!-- User Email Input -->
60 |         </div>
61 |         <!-- Additional Form Groups -->
62 |         <div id="hobbiesWrapper" class="row">
63 |             <div class="col-md-3 col-sm-12">
64 |                 <!-- Empty Column for Alignment -->
65 |             </div>
66 |             <div class="col-md-9 col-sm-12">
67 |                 <div class="custom-control custom-checkbox custom-control-inline">
68 |                     <input type="checkbox" id="hobbies-checkbox-1" class="custom-control-input" value="1" checked>
69 |                     <label for="hobbies-checkbox-1" class="custom-control-label">Hobby 1</label>
70 |                 </div>
71 |                 <div class="custom-control custom-checkbox custom-control-inline">
72 |                     <input type="checkbox" id="hobbies-checkbox-2" class="custom-control-input" value="2">
73 |                     <label for="hobbies-checkbox-2" class="custom-control-label">Hobby 2</label>
74 |                 </div>
75 |                 <div class="custom-control custom-checkbox custom-control-inline">
76 |                     <input type="checkbox" id="hobbies-checkbox-3" class="custom-control-input" value="3">
77 |                     <label for="hobbies-checkbox-3" class="custom-control-label">Hobby 3</label>
78 |                 </div>
79 |             </div>
80 |         </div>
81 |         <div id="currentAddress-wrapper" class="row">
82 |             <!-- Current Address Input -->
83 |         </div>
84 |         <div id="stateCity-wrapper" class="row">
85 |             <!-- State City Input -->
86 |         </div>
87 |         <div class="mt-4 justify-content-end row">
88 |             <!-- Submit Button -->
89 |         </div>
90 |     </form>
91 | 
92 | </body>
93 | </html>
94 | 


--------------------------------------------------------------------------------
/tests/core/test_task.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple
 2 | 
 3 | import playwright
 4 | import pytest
 5 | 
 6 | from browsergym.core.env import BrowserEnv
 7 | from browsergym.core.task import AbstractBrowserTask
 8 | 
 9 | 
10 | class MockImageGoalTask(AbstractBrowserTask):
11 |     @classmethod
12 |     def get_task_id(cls):
13 |         return "mockimagegoal"
14 | 
15 |     def __init__(self, seed: int = 0, start_url: str = "https://www.google.com") -> None:
16 |         """
17 |         Args:
18 |             seed: random seed.
19 |             start_url: str, the url for the starting page.
20 |             goal: str, the initial goal.
21 | 
22 |         """
23 |         super().__init__(seed)
24 |         self.start_url = start_url
25 |         self.goal = [
26 |             {"type": "text", "text": "This is a mock task with an image goal."},
27 |             {
28 |                 "type": "image_url",
29 |                 "image_url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAApgAAAKYB3X3/OAAAABl0RVh0U29mdHdhcmUAd3d3Lmlua3NjYXBlLm9yZ5vuPBoAAANCSURBVEiJtZZPbBtFFMZ/M7ubXdtdb1xSFyeilBapySVU8h8OoFaooFSqiihIVIpQBKci6KEg9Q6H9kovIHoCIVQJJCKE1ENFjnAgcaSGC6rEnxBwA04Tx43t2FnvDAfjkNibxgHxnWb2e/u992bee7tCa00YFsffekFY+nUzFtjW0LrvjRXrCDIAaPLlW0nHL0SsZtVoaF98mLrx3pdhOqLtYPHChahZcYYO7KvPFxvRl5XPp1sN3adWiD1ZAqD6XYK1b/dvE5IWryTt2udLFedwc1+9kLp+vbbpoDh+6TklxBeAi9TL0taeWpdmZzQDry0AcO+jQ12RyohqqoYoo8RDwJrU+qXkjWtfi8Xxt58BdQuwQs9qC/afLwCw8tnQbqYAPsgxE1S6F3EAIXux2oQFKm0ihMsOF71dHYx+f3NND68ghCu1YIoePPQN1pGRABkJ6Bus96CutRZMydTl+TvuiRW1m3n0eDl0vRPcEysqdXn+jsQPsrHMquGeXEaY4Yk4wxWcY5V/9scqOMOVUFthatyTy8QyqwZ+kDURKoMWxNKr2EeqVKcTNOajqKoBgOE28U4tdQl5p5bwCw7BWquaZSzAPlwjlithJtp3pTImSqQRrb2Z8PHGigD4RZuNX6JYj6wj7O4TFLbCO/Mn/m8R+h6rYSUb3ekokRY6f/YukArN979jcW+V/S8g0eT/N3VN3kTqWbQ428m9/8k0P/1aIhF36PccEl6EhOcAUCrXKZXXWS3XKd2vc/TRBG9O5ELC17MmWubD2nKhUKZa26Ba2+D3P+4/MNCFwg59oWVeYhkzgN/JDR8deKBoD7Y+ljEjGZ0sosXVTvbc6RHirr2reNy1OXd6pJsQ+gqjk8VWFYmHrwBzW/n+uMPFiRwHB2I7ih8ciHFxIkd/3Omk5tCDV1t+2nNu5sxxpDFNx+huNhVT3/zMDz8usXC3ddaHBj1GHj/As08fwTS7Kt1HBTmyN29vdwAw+/wbwLVOJ3uAD1wi/dUH7Qei66PfyuRj4Ik9is+hglfbkbfR3cnZm7chlUWLdwmprtCohX4HUtlOcQjLYCu+fzGJH2QRKvP3UNz8bWk1qMxjGTOMThZ3kvgLI5AzFfo379UAAAAASUVORK5CYII=",
30 |             },
31 |         ]
32 | 
33 |     def setup(self, page: playwright.sync_api.Page) -> tuple[str, dict]:
34 |         page.goto(self.start_url, timeout=10000)
35 |         return self.goal, {}
36 | 
37 |     def teardown(self) -> None:
38 |         pass
39 | 
40 |     def validate(
41 |         self, page: playwright.sync_api.Page, chat_messages: list[str]
42 |     ) -> Tuple[float, bool, str, dict]:
43 |         reward, done, msg, info = 0, False, "", {}
44 | 
45 |         for message in chat_messages:
46 |             if message["role"] == "user" and message["message"] == "exit":
47 |                 done = True
48 |                 break
49 | 
50 |         return reward, done, msg, info
51 | 
52 | 
53 | def test_mock_image_goal_task():
54 |     env = BrowserEnv(MockImageGoalTask)
55 |     obs, _ = env.reset()
56 | 
57 |     assert "goal_object" in obs
58 |     assert len(obs["goal_object"]) == 2
59 |     assert obs["goal_object"][0]["type"] == "text"
60 |     assert obs["goal_object"][0]["text"] == "This is a mock task with an image goal."
61 |     assert obs["goal_object"][1]["type"] == "image_url"
62 | 
63 |     env.chat.add_message("user", "exit")
64 |     obs, reward, terminated, _, _ = env.step("send_msg_to_user('bye')")
65 | 
66 |     assert reward == 0
67 |     assert terminated is True
68 | 
69 |     env.close()
70 | 
71 | 
72 | if __name__ == "__main__":
73 |     test_mock_image_goal_task()
74 | 


--------------------------------------------------------------------------------
/tests/core/data/long_page.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | 
  4 | <body>
  5 | 
  6 |     <p id="top">This is the top</p>
  7 |     <input type="checkbox" id="top" name="top" value="top">
  8 |     <label for="top">This is the top</label>
  9 |     <br />
 10 |     <br />
 11 |     <br />
 12 |     <br />
 13 |     <br />
 14 |     <br />
 15 |     <br />
 16 |     <br />
 17 |     <br />
 18 |     <br />
 19 |     <br />
 20 |     <br />
 21 |     <br />
 22 |     <br />
 23 |     <br />
 24 |     <br />
 25 |     <br />
 26 |     <br />
 27 |     <br />
 28 |     <br />
 29 |     <br />
 30 |     <br />
 31 |     <br />
 32 |     <br />
 33 |     <br />
 34 |     <br />
 35 |     <br />
 36 |     <br />
 37 |     <br />
 38 |     <br />
 39 |     <br />
 40 |     <br />
 41 |     <br />
 42 |     <br />
 43 |     <br />
 44 |     <br />
 45 |     <br />
 46 |     <br />
 47 |     <br />
 48 |     <br />
 49 |     <br />
 50 |     <br />
 51 |     <br />
 52 |     <br />
 53 |     <br />
 54 |     <br />
 55 |     <br />
 56 |     <br />
 57 |     <br />
 58 |     <br />
 59 |     <br />
 60 |     <br />
 61 |     <br />
 62 |     <br />
 63 |     <br />
 64 |     <br />
 65 |     <br />
 66 |     <br />
 67 |     <br />
 68 |     <br />
 69 |     <br />
 70 |     <br />
 71 |     <br />
 72 |     <br />
 73 |     <br />
 74 |     <br />
 75 |     <br />
 76 |     <br />
 77 |     <br />
 78 |     <br />
 79 |     <br />
 80 |     <br />
 81 |     <br />
 82 |     <br />
 83 |     <br />
 84 |     <br />
 85 |     <br />
 86 |     <br />
 87 |     <br />
 88 |     <br />
 89 |     <br />
 90 |     <br />
 91 |     <br />
 92 |     <br />
 93 |     <br />
 94 |     <br />
 95 |     <br />
 96 |     <br />
 97 |     <br />
 98 |     <br />
 99 |     <br />
100 |     <br />
101 |     <br />
102 |     <br />
103 |     <br />
104 |     <br />
105 |     <br />
106 |     <br />
107 |     <br />
108 |     <br />
109 |     <br />
110 |     <br />
111 |     <br />
112 |     <br />
113 |     <br />
114 |     <br />
115 |     <br />
116 |     <br />
117 |     <br />
118 |     <br />
119 |     <br />
120 |     <br />
121 |     <br />
122 |     <br />
123 |     <br />
124 |     <br />
125 |     <br />
126 |     <br />
127 |     <br />
128 |     <br />
129 |     <br />
130 |     <br />
131 |     <br />
132 |     <br />
133 |     <br />
134 |     <br />
135 |     <br />
136 |     <br />
137 |     <br />
138 |     <br />
139 |     <br />
140 |     <br />
141 |     <br />
142 |     <br />
143 |     <br />
144 |     <br />
145 |     <br />
146 |     <br />
147 |     <br />
148 |     <br />
149 |     <br />
150 |     <br />
151 |     <br />
152 |     <br />
153 |     <br />
154 |     <br />
155 |     <br />
156 |     <br />
157 |     <br />
158 |     <br />
159 |     <br />
160 |     <br />
161 |     <br />
162 |     <br />
163 |     <br />
164 |     <br />
165 |     <br />
166 |     <br />
167 |     <br />
168 |     <br />
169 |     <br />
170 |     <br />
171 |     <br />
172 |     <br />
173 |     <br />
174 |     <br />
175 |     <br />
176 |     <br />
177 |     <br />
178 |     <br />
179 |     <br />
180 |     <br />
181 |     <br />
182 |     <br />
183 |     <br />
184 |     <br />
185 |     <br />
186 |     <br />
187 |     <br />
188 |     <br />
189 |     <br />
190 |     <br />
191 |     <br />
192 |     <br />
193 |     <br />
194 |     <br />
195 |     <br />
196 |     <br />
197 |     <br />
198 |     <br />
199 |     <br />
200 |     <br />
201 |     <br />
202 |     <br />
203 |     <br />
204 |     <br />
205 |     <p id="bottom">This is the bottom</p>
206 |     <input type="checkbox" id="bottom" name="bottom" value="bottom">
207 |     <label for="top">This is the bottom</label>
208 | 
209 | </body>
210 | 
211 | </html>
212 | 


--------------------------------------------------------------------------------
/browsergym/core/src/browsergym/core/task.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | from typing import Tuple
  3 | 
  4 | import numpy as np
  5 | import playwright.sync_api
  6 | 
  7 | 
  8 | class AbstractBrowserTask(ABC):
  9 |     """
 10 |     Abstract class for browsergym tasks.
 11 | 
 12 |     """
 13 | 
 14 |     @classmethod
 15 |     def get_task_id(cls):
 16 |         raise NotImplementedError
 17 | 
 18 |     def __init__(self, seed: int) -> None:
 19 |         # initiate a random number generator
 20 |         self.random = np.random.RandomState(seed)
 21 | 
 22 |         # task properties, will be used to set up the browsergym environment
 23 |         # default values, can be overriden in children classes
 24 |         self.viewport = {"width": 1280, "height": 720}
 25 |         self.slow_mo = 1000  # ms
 26 |         self.timeout = 5000  # ms
 27 |         self.locale = None  # see https://playwright.dev/python/docs/api/class-browser#browser-new-context-option-locale
 28 |         self.timezone_id = None  # see https://playwright.dev/python/docs/api/class-browser#browser-new-context-option-timezone-id
 29 | 
 30 |     @abstractmethod
 31 |     def setup(self, page: playwright.sync_api.Page) -> tuple[str, dict]:
 32 |         """
 33 |         Set up everything needed to execute the task.
 34 | 
 35 |         Args:
 36 |             page: the active playwright page.
 37 | 
 38 |         Returns:
 39 |             goal: str, goal of the task.
 40 |             info: dict, custom information from the task.
 41 |         """
 42 | 
 43 |     @abstractmethod
 44 |     def validate(
 45 |         self, page: playwright.sync_api.Page, chat_messages: list[str]
 46 |     ) -> Tuple[float, bool, str, dict]:
 47 |         """
 48 |         Validate the task was completed successfully
 49 | 
 50 |         Args:
 51 |             page: the active playwright page.
 52 |             chat_messages: the chat messages.
 53 | 
 54 |         Returns:
 55 |             reward: float, the reward obtained since last call to validate().
 56 |             done: boolean flag, indicates if the task has finished or not (be it success or fail).
 57 |             message: string, a new user message for the chat.
 58 |             info: dictionnary, custom information from the task.
 59 | 
 60 |         """
 61 | 
 62 |     def cheat(self, page: playwright.sync_api.Page, chat_messages: list[str]) -> None:
 63 |         """
 64 |         Solve the task using a pre-defined solution (optional).
 65 | 
 66 |         """
 67 |         raise NotImplementedError
 68 | 
 69 |     def teardown(self) -> None:
 70 |         """
 71 |         Tear down the task and clean up any resource / data created by the task (optional).
 72 | 
 73 |         """
 74 |         pass
 75 | 
 76 | 
 77 | class OpenEndedTask(AbstractBrowserTask):
 78 |     @classmethod
 79 |     def get_task_id(cls):
 80 |         return "openended"
 81 | 
 82 |     def __init__(self, seed: int, start_url: str, goal: str = None) -> None:
 83 |         """
 84 |         Args:
 85 |             seed: random seed.
 86 |             start_url: str, the url for the starting page.
 87 |             goal: str, the initial goal.
 88 | 
 89 |         """
 90 |         super().__init__(seed)
 91 |         self.start_url = start_url
 92 |         self.goal = goal
 93 | 
 94 |     def setup(self, page: playwright.sync_api.Page) -> tuple[str, dict]:
 95 |         page.goto(self.start_url, timeout=10000)
 96 |         return self.goal, {}
 97 | 
 98 |     def teardown(self) -> None:
 99 |         pass
100 | 
101 |     def validate(
102 |         self, page: playwright.sync_api.Page, chat_messages: list[str]
103 |     ) -> Tuple[float, bool, str, dict]:
104 |         reward, done, msg, info = 0, False, "", {}
105 | 
106 |         for message in chat_messages:
107 |             if message["role"] == "user" and message["message"] == "exit":
108 |                 done = True
109 |                 break
110 | 
111 |         return reward, done, msg, info
112 | 


--------------------------------------------------------------------------------
/browsergym/core/src/browsergym/core/chat.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | from pathlib import Path
 3 | from typing import Literal
 4 | import logging
 5 | import playwright.sync_api
 6 | import re
 7 | import time
 8 | 
 9 | from importlib import resources
10 | 
11 | from . import _get_global_playwright, chat_files
12 | 
13 | 
14 | CHATBOX_DIR = resources.files(chat_files)
15 | 
16 | logger = logging.getLogger(__name__)
17 | 
18 | 
19 | class Chat:
20 |     def __init__(
21 |         self, headless: bool, chat_size=(500, 800), record_video_dir=None, modern=True
22 |     ) -> None:
23 |         self.messages = []
24 | 
25 |         # create a new browser, browser context and page for the chat
26 |         pw: playwright.sync_api.Playwright = _get_global_playwright()
27 |         self.browser = pw.chromium.launch(
28 |             headless=headless, args=[f"--window-size={chat_size[0]},{chat_size[1]}"]
29 |         )
30 |         self.context = self.browser.new_context(
31 |             no_viewport=True,
32 |             record_video_dir=Path(record_video_dir) / "chat_video" if record_video_dir else None,
33 |             record_video_size=dict(width=chat_size[0], height=chat_size[1]),
34 |         )
35 |         self.page = self.context.new_page()
36 |         self.recording_start_time = time.time() if record_video_dir else None
37 | 
38 |         # setup the chat page
39 |         self.page.expose_function(
40 |             "send_user_message", lambda msg: self._js_user_message_received_callback(msg=msg)
41 |         )
42 | 
43 |         if modern:
44 |             self.page.set_content(get_chatbox_modern(CHATBOX_DIR))
45 |         else:
46 |             self.page.set_content(get_chatbox_classic(CHATBOX_DIR))
47 | 
48 |     def _js_user_message_received_callback(self, msg: str):
49 |         """Callback function for when a user message is received in the chatbox"""
50 |         utc_time = time.time()
51 |         self.messages.append({"role": "user", "timestamp": utc_time, "message": msg})
52 |         # returning a list as JS doesnt like tuples
53 |         return ["user", time.strftime("%H:%M", time.localtime(utc_time)), msg]
54 | 
55 |     def add_message(
56 |         self, role: Literal["user", "user_image", "assistant", "info", "infeasible"], msg: str
57 |     ):
58 |         """Add a message to the chatbox and update the page accordingly."""
59 |         utc_time = time.time()
60 |         if role not in ("user", "user_image", "assistant", "info", "infeasible"):
61 |             raise ValueError(f"Invalid role: {role}")
62 |         if role in ("user", "user_image", "assistant", "infeasible"):
63 |             self.messages.append({"role": role, "timestamp": utc_time, "message": msg})
64 |         timestamp = time.strftime("%H:%M:%S", time.localtime(utc_time))
65 |         self.page.evaluate(f"addChatMessage({repr(role)}, {repr(timestamp)}, {repr(msg)});")
66 | 
67 |     def wait_for_user_message(self):
68 |         logger.info("Waiting for message from user...")
69 |         # reset flag
70 |         self.page.evaluate("USER_MESSAGE_RECEIVED = false;")
71 |         # wait for flag to be raised
72 |         self.page.wait_for_function("USER_MESSAGE_RECEIVED", polling=100, timeout=0)
73 |         logger.info("Message received.")
74 | 
75 |     def close(self):
76 |         self.context.close()
77 |         self.browser.close()
78 | 
79 | 
80 | def get_chatbox_modern(chatbox_dir) -> str:
81 |     with open(chatbox_dir / "chatbox_modern.html", "r") as file:
82 |         chatbox_html = file.read()
83 | 
84 |     return chatbox_html
85 | 
86 | 
87 | def get_chatbox_classic(chatbox_dir) -> str:
88 |     with open(chatbox_dir / "chatbox.html", "r") as file:
89 |         chatbox_html = file.read()
90 |     with open(chatbox_dir / "assistant.png", "rb") as f:
91 |         image_base64 = base64.b64encode(f.read()).decode("utf-8")
92 | 
93 |     assistant_image_url = f"data:image/png;base64,{image_base64}"
94 |     chatbox_html = re.sub("<ASSISTANT_IMAGE_URL>", assistant_image_url, chatbox_html)
95 |     return chatbox_html
96 | 


--------------------------------------------------------------------------------
/demo_agent/run_demo.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | 
  3 | # locally defined agent
  4 | from agent import DemoAgentArgs
  5 | 
  6 | # browsergym experiments utils
  7 | from browsergym.experiments import EnvArgs, ExpArgs, get_exp_result
  8 | 
  9 | 
 10 | def str2bool(v):
 11 |     if isinstance(v, bool):
 12 |         return v
 13 |     if v.lower() in ("yes", "true", "t", "y", "1"):
 14 |         return True
 15 |     elif v.lower() in ("no", "false", "f", "n", "0"):
 16 |         return False
 17 |     else:
 18 |         raise argparse.ArgumentTypeError("Boolean value expected.")
 19 | 
 20 | 
 21 | def parse_args():
 22 |     parser = argparse.ArgumentParser(description="Run experiment with hyperparameters.")
 23 |     parser.add_argument(
 24 |         "--model_name",
 25 |         type=str,
 26 |         default="gpt-4o-mini",
 27 |         help="OpenAI model name.",
 28 |     )
 29 |     parser.add_argument(
 30 |         "--task_name",
 31 |         type=str,
 32 |         default="openended",
 33 |         help="Name of the Browsergym task to run. If 'openended', you need to specify a 'start_url'",
 34 |     )
 35 |     parser.add_argument(
 36 |         "--start_url",
 37 |         type=str,
 38 |         default="https://www.google.com",
 39 |         help="Starting URL (only for the openended task).",
 40 |     )
 41 |     parser.add_argument(
 42 |         "--visual_effects",
 43 |         type=str2bool,
 44 |         default=True,
 45 |         help="Add visual effects when the agents performs actions.",
 46 |     )
 47 |     parser.add_argument(
 48 |         "--use_html",
 49 |         type=str2bool,
 50 |         default=False,
 51 |         help="Use HTML in the agent's observation space.",
 52 |     )
 53 |     parser.add_argument(
 54 |         "--use_axtree",
 55 |         type=str2bool,
 56 |         default=True,
 57 |         help="Use AXTree in the agent's observation space.",
 58 |     )
 59 |     parser.add_argument(
 60 |         "--use_screenshot",
 61 |         type=str2bool,
 62 |         default=False,
 63 |         help="Use screenshot in the agent's observation space.",
 64 |     )
 65 | 
 66 |     return parser.parse_args()
 67 | 
 68 | 
 69 | def main():
 70 |     print(
 71 |         """\
 72 | --- WARNING ---
 73 | This is a basic agent for demo purposes.
 74 | Visit AgentLab for more capable agents with advanced features.
 75 | https://github.com/ServiceNow/AgentLab"""
 76 |     )
 77 | 
 78 |     args = parse_args()
 79 | 
 80 |     # setting up agent config
 81 |     agent_args = DemoAgentArgs(
 82 |         model_name=args.model_name,
 83 |         chat_mode=False,
 84 |         demo_mode="default" if args.visual_effects else "off",
 85 |         use_html=args.use_html,
 86 |         use_axtree=args.use_axtree,
 87 |         use_screenshot=args.use_screenshot,
 88 |     )
 89 | 
 90 |     # setting up environment config
 91 |     env_args = EnvArgs(
 92 |         task_name=args.task_name,
 93 |         task_seed=None,
 94 |         max_steps=100,
 95 |         headless=False,  # keep the browser open
 96 |         # viewport={"width": 1500, "height": 1280},  # can be played with if needed
 97 |     )
 98 | 
 99 |     # for openended task, set environment and agent to interactive chat mode on a start url
100 |     if args.task_name == "openended":
101 |         agent_args.chat_mode = True
102 |         env_args.wait_for_user_message = True
103 |         env_args.task_kwargs = {"start_url": args.start_url}
104 | 
105 |     # setting up the experiment
106 |     exp_args = ExpArgs(
107 |         env_args=env_args,
108 |         agent_args=agent_args,
109 |     )
110 | 
111 |     # running and logging results
112 |     exp_args.prepare("./results")
113 |     exp_args.run()
114 | 
115 |     # loading and printing results
116 |     exp_result = get_exp_result(exp_args.exp_dir)
117 |     exp_record = exp_result.get_exp_record()
118 | 
119 |     for key, val in exp_record.items():
120 |         print(f"{key}: {val}")
121 | 
122 | 
123 | if __name__ == "__main__":
124 |     main()
125 | 


--------------------------------------------------------------------------------
/browsergym/core/src/browsergym/core/action/parsers.py:
--------------------------------------------------------------------------------
 1 | import ast
 2 | import pyparsing as pp
 3 | 
 4 | from dataclasses import dataclass
 5 | from typing import Any
 6 | 
 7 | 
 8 | @dataclass
 9 | class NamedArgument:
10 |     name: str
11 |     value: Any
12 | 
13 |     def __repr__(self):
14 |         return f"{self.name}={repr(self.value)}"
15 | 
16 | 
17 | def _build_highlevel_action_parser() -> pp.ParserElement:
18 |     """
19 |     Returns:
20 |         An action parser that accepts Python-like function calls with string, number, list or dict literals as arguments.
21 |         Example:
22 |             func("a", 42, None, True, [2, 4, "s"], {"a_key": "a_value"}, )
23 |         The parser is loose and accepts multi-line or single-line combinations af calls.
24 |         Example:
25 |             func() func()
26 |             \tfunc()
27 |         Python comments are ignored.
28 |         Example:
29 |             # this is a comment
30 |             func()    # this function call will be parsed
31 |             # func()  # this one will not
32 |         The parser will return a list of (function_name, function_args) tuples, one for each function call in the input.
33 |         The parser will raise exceptions
34 | 
35 |     """
36 | 
37 |     def make_keyword(kwd_str, kwd_value):
38 |         return pp.Keyword(kwd_str).set_parse_action(pp.replace_with(kwd_value))
39 | 
40 |     TRUE = make_keyword("True", True)
41 |     FALSE = make_keyword("False", False)
42 |     NONE = make_keyword("None", None)
43 | 
44 |     LBRACK, RBRACK, LBRACE, RBRACE, LPAREN, RPAREN, COLON = map(pp.Suppress, "[]{}():")
45 | 
46 |     def literal_eval(toks):
47 |         return ast.literal_eval(toks[0])
48 | 
49 |     string = pp.python_quoted_string().set_parse_action(literal_eval)
50 |     number = pp.pyparsing_common.number()
51 |     dict = pp.Forward().set_name("dict")  # will be defined later
52 |     list = pp.Forward().set_name("list")  # will be defined later
53 |     _tuple = pp.Forward().set_name("tuple")  # will be defined later
54 |     element = (string | number | dict | list | _tuple | TRUE | FALSE | NONE).set_name("element")
55 | 
56 |     list_items = pp.DelimitedList(element, allow_trailing_delim=True).set_name(None)
57 |     list << pp.Group(LBRACK + pp.Optional(list_items) + RBRACK, aslist=True)
58 |     _tuple << pp.Group(LPAREN + pp.Optional(list_items) + RPAREN, aslist=True).set_parse_action(
59 |         lambda tokens: tuple(tokens[0])
60 |     )
61 | 
62 |     dict_item = pp.Group(string + COLON + element, aslist=True).set_name("dict item")
63 |     dict_items = pp.DelimitedList(dict_item, allow_trailing_delim=True).set_name(None)
64 |     dict << pp.Dict(LBRACE + pp.Optional(dict_items) + RBRACE, asdict=True)
65 | 
66 |     arg = element
67 |     list_args = pp.DelimitedList(arg, allow_trailing_delim=True).set_name(None)
68 |     named_arg = (pp.pyparsing_common.identifier() + pp.Literal("=") + element).set_parse_action(
69 |         lambda tokens: NamedArgument(name=tokens[0], value=tokens[2])
70 |     )
71 |     list_named_args = pp.DelimitedList(named_arg, allow_trailing_delim=True).set_name(None)
72 |     function_call = pp.pyparsing_common.identifier() + pp.Group(
73 |         LPAREN + pp.Optional(list_args) + pp.Optional(list_named_args) + RPAREN, aslist=True
74 |     )
75 | 
76 |     multiple_function_calls = pp.DelimitedList(pp.Group(function_call), delim="")
77 |     multiple_function_calls.ignore(pp.python_style_comment())
78 | 
79 |     parser = multiple_function_calls
80 | 
81 |     return parser
82 | 
83 | 
84 | # this one will be used to extract python-like function calls
85 | highlevel_action_parser: pp.ParserElement = _build_highlevel_action_parser()
86 | 
87 | # this one will be used to process the docstring in high-level actions, in order to describe the action space
88 | action_docstring_parser: pp.ParserElement = (
89 |     pp.Group(pp.OneOrMore(pp.Word(pp.printables), stop_on=pp.Literal("Examples:")))
90 |     + pp.Literal("Examples:").suppress()
91 |     + pp.Group(highlevel_action_parser)
92 | )
93 | 


--------------------------------------------------------------------------------
/browsergym/visualwebarena/src/browsergym/visualwebarena/instance.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | 
  4 | import playwright.sync_api
  5 | 
  6 | # we inherit some code base from webarena to avoid too much duplication
  7 | from browsergym.webarena.instance import WebArenaInstance
  8 | 
  9 | logger = logging.getLogger(__name__)
 10 | 
 11 | 
 12 | ENV_VARS = ("SHOPPING", "REDDIT", "WIKIPEDIA", "HOMEPAGE", "CLASSIFIEDS", "CLASSIFIEDS_RESET_TOKEN")
 13 | 
 14 | 
 15 | class VisualWebArenaInstance(WebArenaInstance):
 16 |     """
 17 |     Utility class to access a WebArena instance.
 18 | 
 19 |     """
 20 | 
 21 |     RESET_URL_VAR = "VWA_FULL_RESET"  # used by full_reset()
 22 | 
 23 |     def __init__(
 24 |         self,
 25 |     ) -> None:
 26 | 
 27 |         # setup visualwebarena environment variables (visualwebarena will read those on import)
 28 |         os.environ["DATASET"] = "visualwebarena"
 29 |         append_vwa = lambda x: f"VWA_{x}"
 30 |         for key in ENV_VARS:
 31 |             assert append_vwa(key) in os.environ, (
 32 |                 f"Environment variable {append_vwa(key)} missing.\n"
 33 |                 + "Please set the following environment variables to use VisualWebArena through BrowserGym:\n"
 34 |                 + "\n".join([append_vwa(x) for x in ENV_VARS])
 35 |             )
 36 |             os.environ[key] = os.environ[append_vwa(key)]
 37 | 
 38 |         # import webarena on instantiation
 39 |         from visualwebarena.browser_env.env_config import (
 40 |             ACCOUNTS,
 41 |             CLASSIFIEDS,
 42 |             CLASSIFIEDS_RESET_TOKEN,
 43 |             HOMEPAGE,
 44 |             REDDIT,
 45 |             SHOPPING,
 46 |             WIKIPEDIA,
 47 |         )
 48 | 
 49 |         self.urls = {
 50 |             "reddit": REDDIT,
 51 |             "shopping": SHOPPING,
 52 |             "wikipedia": WIKIPEDIA,
 53 |             "classifieds": CLASSIFIEDS,
 54 |         }
 55 |         self.home_url = HOMEPAGE
 56 |         self.classifieds_reset_token = CLASSIFIEDS_RESET_TOKEN
 57 | 
 58 |         self.credentials = ACCOUNTS
 59 | 
 60 |     def ui_login(self, site: str, page: playwright.sync_api.Page):
 61 |         """
 62 |         Should only be called once per site (expects user to be logged out).
 63 |         """
 64 | 
 65 |         url = self.urls[site]
 66 | 
 67 |         # open a new page (tab) to perform the login
 68 |         page = page.context.new_page()
 69 | 
 70 |         match site:
 71 |             case "reddit":
 72 |                 username = self.credentials[site]["username"]
 73 |                 password = self.credentials[site]["password"]
 74 |                 page.goto(f"{url}")
 75 |                 page.get_by_role("link", name="Log in").click()
 76 |                 page.get_by_label("Username").fill(username)
 77 |                 page.get_by_label("Password").fill(password)
 78 |                 page.get_by_role("button", name="Log in").click()
 79 |             case "shopping":
 80 |                 username = self.credentials[site]["username"]
 81 |                 password = self.credentials[site]["password"]
 82 | 
 83 |                 page.goto(f"{url}/customer/account/login/")
 84 |                 page.get_by_label("Email", exact=True).fill(username)
 85 |                 page.get_by_label("Password", exact=True).fill(password)
 86 |                 page.get_by_role("button", name="Sign In").click()
 87 | 
 88 |             case "wikipedia":
 89 |                 page.goto(url)
 90 | 
 91 |             case "classifieds":
 92 |                 username = self.credentials[site]["username"]
 93 |                 password = self.credentials[site]["password"]
 94 |                 page.goto(f"{url}/index.php?page=login")
 95 |                 page.locator("#email").fill(username)
 96 |                 page.locator("#password").fill(password)
 97 |                 page.get_by_role("button", name="Log in").click()
 98 | 
 99 |             case _:
100 |                 raise ValueError
101 | 
102 |         # release login page
103 |         page.close()
104 | 


--------------------------------------------------------------------------------
/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluator.py:
--------------------------------------------------------------------------------
  1 | # todo export evaluation to a python package
  2 | 
  3 | import json
  4 | 
  5 | import numpy as np
  6 | 
  7 | from .evaluate_utils.evaluate_factory import get_evaluator
  8 | 
  9 | 
 10 | def find_isnan(samp):
 11 |     try:
 12 |         if np.isnan(samp):
 13 |             return True
 14 |         else:
 15 |             return False
 16 |     except:
 17 |         return False
 18 | 
 19 | 
 20 | def fix_ans(answer):
 21 |     try:
 22 |         answer = (
 23 |             answer.replace("{'", '{"')
 24 |             .replace("', '", '", "')
 25 |             .replace("': '", '": "')
 26 |             .replace("'}", '"}')
 27 |         )
 28 |         answer = answer.replace("': ", '": ')
 29 |         return answer
 30 |     except:
 31 |         return answer
 32 | 
 33 | 
 34 | def parse_answer(answer):
 35 |     if len(answer) == 1:
 36 |         ans, is_num = fix_number(answer[0])
 37 |         if is_num:
 38 |             return ans, "number"
 39 |         try:
 40 |             ans = json.loads(fix_ans(answer[0]))
 41 |             return [ans], "json"
 42 |         except:
 43 |             ans, is_num = fix_number(answer[0])
 44 |             if is_num:
 45 |                 return ans, "number"
 46 |             else:
 47 |                 return answer[0], "string"
 48 |     else:
 49 |         try:
 50 |             ans = [json.loads(fix_ans(ex)) for ex in answer]
 51 |             return ans, "json"
 52 |         except:
 53 |             return answer, "string list"
 54 | 
 55 | 
 56 | def fix_number(number):
 57 |     if type(number) == str:
 58 |         copy_ans = number
 59 |         copy_ans = " ".join(
 60 |             " ".join(" ".join(copy_ans.split("$")).split("%")).split("sqft")
 61 |         ).strip()
 62 |         copy_ans = copy_ans.strip()
 63 |         copy_ans = copy_ans.replace(",", ".").replace(" square kilometers", "")
 64 |         try:
 65 |             return float(copy_ans), True
 66 |         except:
 67 |             return number, False
 68 |     elif type(number) == int:
 69 |         return float(number), True
 70 |     else:
 71 |         return number, True
 72 | 
 73 | 
 74 | def fix_prediction(prediction, gold_answer, evaluator):
 75 |     if (
 76 |         type(prediction) == list
 77 |         and len(prediction) == 1
 78 |         and (
 79 |             type(prediction[0]) == int
 80 |             or ((type(prediction[0]) == str) and prediction[0].isnumeric())
 81 |         )
 82 |     ):
 83 |         prediction = fix_number(prediction[0])
 84 | 
 85 |     if type(prediction) != list:
 86 |         prediction, is_num = fix_number(prediction)
 87 |         if evaluator == "json":
 88 |             try:
 89 |                 prediction = [json.loads(pred) for pred in prediction.split("\n")]
 90 |             except:
 91 |                 prediction = [prediction]
 92 | 
 93 |     if (hasattr(type(prediction), "__len__")) and (len(prediction) == 0):
 94 |         return prediction, False
 95 | 
 96 |     if (type(prediction) == list and len(prediction) > 1) and type(gold_answer) == float:
 97 |         return prediction, False
 98 | 
 99 |     return prediction, True
100 | 
101 | 
102 | def question_scorer(prediction, gold_answer):
103 |     try:
104 |         prediction = json.loads(prediction)
105 |     except:
106 |         prediction = prediction
107 | 
108 |     answer_list = (
109 |         [x for x in gold_answer.split("\n") if len(x.strip()) > 0]
110 |         if type(gold_answer) != list
111 |         else gold_answer
112 |     )
113 |     gold_answer, evaluator = parse_answer(answer_list)
114 |     prediction, run_eval = fix_prediction(prediction, gold_answer, evaluator)
115 | 
116 |     has_ans = 1.0
117 |     if (type(prediction) != float and len(prediction) == 0) or find_isnan(prediction):
118 |         has_ans = 0.0
119 | 
120 |     if type(prediction) == list:
121 |         if all(
122 |             (type(pred) not in {float, int} and len(pred) == 0) or find_isnan(pred)
123 |             for pred in prediction
124 |         ):
125 |             has_ans = 0
126 | 
127 |     if not run_eval:
128 |         return 0.0, has_ans
129 | 
130 |     metric_eval = get_evaluator(evaluator)
131 |     accuracy = metric_eval(prediction, gold_answer)
132 |     return accuracy, has_ans
133 | 


--------------------------------------------------------------------------------
/browsergym/core/src/browsergym/core/action/python.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | 
  3 | from .base import AbstractActionSet
  4 | 
  5 | 
  6 | class PythonActionSet(AbstractActionSet):
  7 |     def describe(self, with_long_description: bool = True, with_examples: bool = True):
  8 |         """
  9 |         Returns a textual description of this action space.
 10 |         """
 11 |         description = f"""
 12 | Each action consists of executable Python code (python>=3.10) that uses the Playwright library (playwright==1.32)
 13 | to interact with the current webpage and the browser context. The currently active webpage is accessible via the
 14 | global variable `page`. A function `send_message_to_user(text)` is also accessible and can be used to send a
 15 | message to the user, as well as a function `report_infeasible_instructions(reason)` to notify the user when their
 16 | instructions are infeasible."""
 17 |         if with_long_description:
 18 |             description += f"""
 19 | The browser context is in `page.context`, and all open webpages (tabs and popups)
 20 | are in `page.context.pages`. Here is is an example of a valid action:
 21 | ```
 22 | frame = page.frame_locator(".result-frame")
 23 | button = frame.get_by_text("Submit")
 24 | button.click()
 25 | ```
 26 | Here is another example:
 27 | ```
 28 | frame = page.get_by_test_id("a").frame_locator(":scope")
 29 | frame.get_by_test_id("a776").click()
 30 | ```
 31 | Note that Playwright's `get_by_test_id()` method is configured to use the `bid` attribute to locate HTML elements,
 32 | instead of the default `data-testid`. Also, Playwright's locators can not traverse iframes, so you have to locate
 33 | parent iframes first in order to locate an element in an iframe. The `bid` attribute contains all the information
 34 | required to recursively locate an element. For example, an element with `bid="ac2"` can be retrieved as follows:
 35 | ```
 36 | frame = page.get_by_test_id("a").frame_locator(":scope")
 37 | frame = frame.get_by_test_id("ac").frame_locator(":scope")
 38 | elem = frame.get_by_test_id("ac2")
 39 | ```
 40 | """
 41 |         else:
 42 |             description += f"""\
 43 | 
 44 | """
 45 |         if with_examples:
 46 |             description += f"""\
 47 | Here are other examples of valid actions:
 48 | ```
 49 | page = page.context.new_page()
 50 | page.goto("https://www.wikipedia.org/")
 51 | ```
 52 | ```
 53 | page.get_by_label("Birth date").fill("2020-02-02")
 54 | page.get_by_role("link", name="Get started").click()
 55 | ```
 56 | ```
 57 | page.get_by_label('I agree to the terms above').check()
 58 | ```
 59 | ```
 60 | page.locator('#area').fill('Hello World!')
 61 | ```
 62 | ```
 63 | page.get_by_role("textbox").press("Control+ArrowRight")
 64 | ```
 65 | ```
 66 | send_message_to_user("There are 7 items to choose from.")
 67 | ```
 68 | ```
 69 | report_infeasible_instructions("I cannot follow these instructions because there is no email field in this form.")
 70 | ```
 71 | """
 72 | 
 73 |         return description
 74 | 
 75 |     def example_action(self, abstract: bool) -> str:
 76 |         """
 77 |         Returns an example action as a string.
 78 |         """
 79 |         if abstract:
 80 |             return """\
 81 | One single bloc of Python code. Do not include any explanation, only valid Python code."""
 82 |         else:
 83 |             return """\
 84 | frame = page.get_by_test_id("b").frame_locator(":scope")
 85 | frame = page.get_by_test_id("ba").frame_locator(":scope")
 86 | frame.get_by_test_id("ba2").fill("Hello world!")
 87 | frame.get_by_test_id("ba3").click()
 88 | """
 89 | 
 90 |     def to_python_code(self, action):
 91 |         """
 92 |         Converts the given code action string to browsergym-compatible playwright code.
 93 | 
 94 |         Args:
 95 |             action: the code action to parse.
 96 | 
 97 |         Returns:
 98 |             Executable playwright code that performs the action in a browsergym environment.
 99 |         """
100 | 
101 |         python_code = ""
102 | 
103 |         # extract markdown-style code snippets if detected
104 |         pattern = re.compile(r"```(?:python)?\n(?P<code>[\s\S]*?)```")
105 |         if pattern.match(action):
106 |             python_code += "\n".join([match.group("code") for match in pattern.finditer(action)])
107 |         # otherwise just use the code action as is
108 |         else:
109 |             python_code += action
110 | 
111 |         # return the produced playwright code
112 |         return python_code
113 | 


--------------------------------------------------------------------------------
/browsergym/miniwob/src/browsergym/miniwob/__init__.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | from browsergym.core.registration import register_task
  4 | 
  5 | from . import all
  6 | 
  7 | 
  8 | def environment_variables_precheck():
  9 |     assert os.environ.get(
 10 |         "MINIWOB_URL", None
 11 |     ), "Environment variable MINIWOB_URL has not been setup."
 12 | 
 13 | 
 14 | ALL_MINIWOB_TASKS = [
 15 |     all.AscendingNumbersTask,
 16 |     all.BisectAngleTask,
 17 |     all.BookFlightTask,
 18 |     all.BookFlightNodelayTask,
 19 |     all.BuyTicketTask,
 20 |     all.ChooseDateTask,
 21 |     all.ChooseDateEasyTask,
 22 |     all.ChooseDateMediumTask,
 23 |     all.ChooseDateNodelayTask,
 24 |     all.ChooseListTask,
 25 |     all.CircleCenterTask,
 26 |     all.ClickButtonTask,
 27 |     all.ClickButtonSequenceTask,
 28 |     all.ClickCheckboxesTask,
 29 |     all.ClickCheckboxesLargeTask,
 30 |     all.ClickCheckboxesSoftTask,
 31 |     all.ClickCheckboxesTransferTask,
 32 |     all.ClickCollapsibleTask,
 33 |     all.ClickCollapsible2Task,
 34 |     all.ClickCollapsible2NodelayTask,
 35 |     all.ClickCollapsibleNodelayTask,
 36 |     all.ClickColorTask,
 37 |     all.ClickDialogTask,
 38 |     all.ClickDialog2Task,
 39 |     all.ClickLinkTask,
 40 |     all.ClickMenuTask,
 41 |     all.ClickMenu2Task,
 42 |     all.ClickOptionTask,
 43 |     all.ClickPieTask,
 44 |     all.ClickPieNodelayTask,
 45 |     all.ClickScrollListTask,
 46 |     all.ClickShadesTask,
 47 |     all.ClickShapeTask,
 48 |     all.ClickTabTask,
 49 |     all.ClickTab2Task,
 50 |     all.ClickTab2EasyTask,
 51 |     all.ClickTab2HardTask,
 52 |     all.ClickTab2MediumTask,
 53 |     all.ClickTestTask,
 54 |     all.ClickTest2Task,
 55 |     all.ClickTestTransferTask,
 56 |     all.ClickWidgetTask,
 57 |     all.CopyPasteTask,
 58 |     all.CopyPaste2Task,
 59 |     all.CountShapeTask,
 60 |     all.CountSidesTask,
 61 |     all.DailyCalendarTask,
 62 |     all.DragBoxTask,
 63 |     all.DragCircleTask,
 64 |     all.DragCubeTask,
 65 |     all.DragItemsTask,
 66 |     all.DragItemsGridTask,
 67 |     all.DragShapesTask,
 68 |     all.DragShapes2Task,
 69 |     all.DragSingleShapeTask,
 70 |     all.DragSortNumbersTask,
 71 |     all.DrawCircleTask,
 72 |     all.DrawLineTask,
 73 |     all.EmailInboxTask,
 74 |     all.EmailInboxDeleteTask,
 75 |     all.EmailInboxForwardTask,
 76 |     all.EmailInboxForwardNlTask,
 77 |     all.EmailInboxForwardNlTurkTask,
 78 |     all.EmailInboxImportantTask,
 79 |     all.EmailInboxNlTurkTask,
 80 |     all.EmailInboxNoscrollTask,
 81 |     all.EmailInboxReplyTask,
 82 |     all.EmailInboxStarReplyTask,
 83 |     all.EnterDateTask,
 84 |     all.EnterPasswordTask,
 85 |     all.EnterTextTask,
 86 |     all.EnterText2Task,
 87 |     all.EnterTextDynamicTask,
 88 |     all.EnterTimeTask,
 89 |     all.FindGreatestTask,
 90 |     all.FindMidpointTask,
 91 |     all.FindWordTask,
 92 |     all.FocusTextTask,
 93 |     all.FocusText2Task,
 94 |     all.FormSequenceTask,
 95 |     all.FormSequence2Task,
 96 |     all.FormSequence3Task,
 97 |     all.GenerateNumberTask,
 98 |     all.GridCoordinateTask,
 99 |     all.GuessNumberTask,
100 |     all.HighlightTextTask,
101 |     all.HighlightText2Task,
102 |     all.HotColdTask,
103 |     all.IdentifyShapeTask,
104 |     all.LoginUserTask,
105 |     all.LoginUserPopupTask,
106 |     all.MultiLayoutsTask,
107 |     all.MultiOrderingsTask,
108 |     all.NavigateTreeTask,
109 |     all.NumberCheckboxesTask,
110 |     all.OddOrEvenTask,
111 |     all.OrderFoodTask,
112 |     all.PhoneBookTask,
113 |     all.ReadTableTask,
114 |     all.ReadTable2Task,
115 |     all.ResizeTextareaTask,
116 |     all.RightAngleTask,
117 |     all.ScrollTextTask,
118 |     all.ScrollText2Task,
119 |     all.SearchEngineTask,
120 |     all.SignAgreementTask,
121 |     all.SimpleAlgebraTask,
122 |     all.SimpleArithmeticTask,
123 |     all.SocialMediaTask,
124 |     all.SocialMediaAllTask,
125 |     all.SocialMediaSomeTask,
126 |     all.StockMarketTask,
127 |     all.TerminalTask,
128 |     all.TextEditorTask,
129 |     all.TextTransformTask,
130 |     all.TicTacToeTask,
131 |     all.UnicodeTestTask,
132 |     all.UseAutocompleteTask,
133 |     all.UseAutocompleteNodelayTask,
134 |     all.UseColorwheelTask,
135 |     all.UseColorwheel2Task,
136 |     all.UseSliderTask,
137 |     all.UseSlider2Task,
138 |     all.UseSpinnerTask,
139 |     all.VisualAdditionTask,
140 | ]
141 | 
142 | # register the Miniwob benchmark
143 | for task in ALL_MINIWOB_TASKS:
144 |     register_task(
145 |         task.get_task_id(),
146 |         task,
147 |         nondeterministic=task.nondeterministic,
148 |     )
149 | 


--------------------------------------------------------------------------------
/.github/workflows/pypi.yml:
--------------------------------------------------------------------------------
  1 | name: Build and Publish
  2 | # based on official doc
  3 | # https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/
  4 | 
  5 | on: [push, workflow_dispatch]
  6 | 
  7 | jobs:
  8 |   build:
  9 |     name: Build
 10 |     runs-on: ubuntu-22.04
 11 | 
 12 |     steps:
 13 |       - uses: actions/checkout@v4
 14 | 
 15 |       - name: Set up Python
 16 |         uses: actions/setup-python@v5
 17 |         with:
 18 |           python-version: "3.x"
 19 | 
 20 |       - name: Install pypa/build
 21 |         run: python3 -m pip install build --user
 22 | 
 23 |       - name: Build a binary wheel and a source tarball (browsergym-core)
 24 |         run: python3 -m build browsergym/core/ --outdir dist/
 25 | 
 26 |       - name: Build a binary wheel and a source tarball (browsergym-miniwob)
 27 |         run: python3 -m build browsergym/miniwob/ --outdir dist/
 28 | 
 29 |       - name: Build a binary wheel and a source tarball (browsergym-webarena)
 30 |         run: python3 -m build browsergym/webarena/ --outdir dist/
 31 |       - name: Build a binary wheel and a source tarball (browsergym-webarenalite)
 32 |         run: python3 -m build browsergym/webarenalite/ --outdir dist/
 33 |       - name: Build a binary wheel and a source tarball (browsergym-visualwebarena)
 34 |         run: python3 -m build browsergym/visualwebarena/ --outdir dist/
 35 | 
 36 |       - name: Build a binary wheel and a source tarball (browsergym-assistantbench)
 37 |         run: python3 -m build browsergym/assistantbench/ --outdir dist/
 38 | 
 39 |       - name: Build a binary wheel and a source tarball (browsergym-experiments)
 40 |         run: python3 -m build browsergym/experiments/ --outdir dist/
 41 | 
 42 |       - name: Build a binary wheel and a source tarball (browsergym)
 43 |         run: python3 -m build browsergym/ --outdir dist/
 44 | 
 45 |       - name: Store the distribution packages
 46 |         uses: actions/upload-artifact@v4
 47 |         with:
 48 |           name: python-package-distributions
 49 |           path: dist/
 50 | 
 51 |   publish-to-pypi:
 52 |     name: Publish to PyPI
 53 |     if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes
 54 |     needs:
 55 |       - build
 56 |     runs-on: ubuntu-22.04
 57 |     environment: pypi
 58 |     permissions:
 59 |       id-token: write # IMPORTANT: mandatory for trusted publishing
 60 | 
 61 |     steps:
 62 |       - name: Download all the distribution packages
 63 |         uses: actions/download-artifact@v4
 64 |         with:
 65 |           name: python-package-distributions
 66 |           path: dist/
 67 | 
 68 |       - name: Publish all distribution packages to PyPI
 69 |         uses: pypa/gh-action-pypi-publish@release/v1
 70 | 
 71 |   github-release:
 72 |     name: Create GitHub Release
 73 |     if: startsWith(github.ref, 'refs/tags/')
 74 |     needs:
 75 |       - publish-to-pypi
 76 |     runs-on: ubuntu-22.04
 77 |     permissions:
 78 |       contents: write
 79 | 
 80 |     steps:
 81 |       - name: Checkout code
 82 |         uses: actions/checkout@v4
 83 | 
 84 |       - name: Download all the dists
 85 |         uses: actions/download-artifact@v4
 86 |         with:
 87 |           name: python-package-distributions
 88 |           path: dist/
 89 | 
 90 |       - name: Create GitHub Release
 91 |         env:
 92 |           GITHUB_TOKEN: ${{ github.token }}
 93 |         run: |
 94 |           gh release create '${{ github.ref_name }}' \
 95 |             dist/* \
 96 |             --repo '${{ github.repository }}' \
 97 |             --title "Release ${{ github.ref_name }}" \
 98 |             --generate-notes
 99 | 
100 |       - name: Set as pre-release if dev version
101 |         if: contains(github.ref, '.dev')
102 |         env:
103 |           GITHUB_TOKEN: ${{ github.token }}
104 |         run: |
105 |           gh release edit '${{ github.ref_name }}' \
106 |             --repo '${{ github.repository }}' \
107 |             --prerelease
108 | 
109 |   # publish-to-testpypi:
110 |   #   name: Publish to TestPyPI
111 |   #   needs:
112 |   #   - build
113 |   #   runs-on: ubuntu-latest
114 |   #   environment: testpypi
115 |   #   permissions:
116 |   #     id-token: write  # IMPORTANT: mandatory for trusted publishing
117 | 
118 |   #   steps:
119 |   #   - name: Download all the distribution packages
120 |   #     uses: actions/download-artifact@v4
121 |   #     with:
122 |   #       name: python-package-distributions
123 |   #       path: dist/
124 | 
125 |   #   - name: Publish distribution packages to TestPyPI
126 |   #     uses: pypa/gh-action-pypi-publish@release/v1
127 |   #     with:
128 |   #       repository-url: https://test.pypi.org/legacy/
129 | 


--------------------------------------------------------------------------------
/docs/src/examples/create_custom_task.rst:
--------------------------------------------------------------------------------
  1 | Creating a custom task
  2 | ______________________
  3 | 
  4 | Creating a custom task in `BrowserGym` can be done easily by inheriting from the `AbstractBrowserTask` class.
  5 | 
  6 | 
  7 | Let's start with an example, we will build a task that starts from the Google Search page and asks for the Eiffel Tower Wikipedia page.
  8 | 
  9 | * **Goal**: Search for 'Eiffel Tower' Wikipedia page.
 10 | 
 11 | * **Reward**: Gets reward = 1 if reaches the expected Wikipedia page on Eiffel Tower, else gets 0.
 12 | 
 13 | .. code-block:: python
 14 | 
 15 |     from typing import Tuple
 16 | 
 17 |     import playwright.sync_api
 18 |     from browsergym.core.task import AbstractBrowserTask
 19 | 
 20 | 
 21 |     class SampleTask(AbstractBrowserTask):
 22 |         def __init__(self, seed: int) -> None:
 23 |             super().__init__(seed)
 24 | 
 25 |         @classmethod
 26 |         def get_task_id(cls):
 27 |             return "sample_task"
 28 | 
 29 | 
 30 | First, let's setup the task. To do this, we need to implement the `setup()` function. The starting point is *https://www.google.com* page and the goal is *"Search for 'Eiffel Tower' Wikipedia page"*.
 31 | 
 32 | .. code-block:: python
 33 | 
 34 |     class SampleTask(AbstractBrowserTask):
 35 |         # ...
 36 |         # Code above
 37 |         # ...
 38 | 
 39 |         def setup(self, page: playwright.sync_api.Page) -> Tuple[str, dict]:
 40 |             """Set up everything needed to execute the task."""
 41 |             page.goto("https://www.google.com", timeout=10000)
 42 |             goal = "Search for 'Eiffel Tower' Wikipedia page."
 43 |             info = {}
 44 |             return goal, info
 45 | 
 46 | 
 47 | Next, we need to compute a reward. For this, we'll implement our validation criteria in the `validate()` function. In our case, we consider the task being completed when the Eiffel Tower Wikipedia page is reached. Else it's a fail.
 48 | 
 49 | .. code-block:: python
 50 | 
 51 |     class SampleTask(AbstractBrowserTask):
 52 |         # ...
 53 |         # Code above
 54 |         # ...
 55 | 
 56 |         def validate(
 57 |             self, page: playwright.sync_api.Page, chat_messages: list[str]
 58 |         ) -> Tuple[float, bool, str, dict]:
 59 |             """Compute reward based on reaching final URL."""
 60 |             if page.url == "https://en.wikipedia.org/wiki/Eiffel_Tower":
 61 |                 return 1.0, True, "Task completed", {}
 62 |             else:
 63 |                 return 0.0, False, "", {}
 64 | 
 65 | 
 66 | We can also implement the code for completing the task, it's an oracle (a.k.a. cheat) version. For this, we'll fill out the `cheat()` function. 
 67 | 
 68 | .. code-block:: python
 69 | 
 70 |     class SampleTask(AbstractBrowserTask):
 71 |         # ...
 72 |         # Code above
 73 |         # ...
 74 | 
 75 |         def cheat(self, page: playwright.sync_api.Page, chat_messages: list[str]) -> None:
 76 |             """Solve the task in a single step using a hard-coded Playwright solution."""
 77 |             page.get_by_text("Search").fill("Eiffel Tower")
 78 |             page.get_by_text("Google Search").click()
 79 |             page.get_by_text("Eiffel Tower - Wikipedia").click()
 80 | 
 81 | 
 82 | Finally, the `teardown()` function. This function allows to clean resources before closing the enviroment. In our case, nothing need to be done, so we will leave it empty.
 83 | 
 84 | .. code-block:: python
 85 | 
 86 |     class SampleTask(AbstractBrowserTask):
 87 |         # ...
 88 |         # Code above
 89 |         # ...
 90 |         
 91 |         def teardown(self) -> None:
 92 |             # Nothing to do for this task.
 93 |             pass
 94 | 
 95 | 
 96 | Our folder structure should look like the following:
 97 | 
 98 | .. code-block:: bash
 99 | 
100 |     .
101 |     |── tasks
102 |     |   ├── __init__.py
103 |     |   └── sample_task.py
104 |     ├── run_task.py
105 | 
106 | 
107 | Now we should register the task in the gym environment using the following code in the `__init__.py` of your package:
108 | 
109 | .. code-block:: python
110 | 
111 |     from browsergym.core.registration import register_task
112 | 
113 |     from .sample_task import SampleTask
114 | 
115 |     register_task(id=SampleTask.get_task_id(), task_class=SampleTask)
116 | 
117 | 
118 | Now that the task is registered it can be called via this code that you can put in `run_task.py` file:
119 | 
120 | .. code-block:: python
121 | 
122 |     import gymnasium as gym
123 |     import tasks  # will register the gym environment
124 | 
125 |     env = gym.make("browsergym/sample_task")
126 |     obs, info = env.reset()
127 |     done = False
128 | 
129 |     while not done:
130 |         action = "noop()"
131 |         obs, reward, terminated, truncated, info = env.step(action)
132 |         print(f"Reward: {reward}, Done: {done}, Info: {info}")
133 |     
134 | 


--------------------------------------------------------------------------------
/browsergym/core/src/browsergym/core/spaces.py:
--------------------------------------------------------------------------------
  1 | """Borrowed from https://github.com/Farama-Foundation/miniwob-plusplus/blob/553daee55ea0b2cc32b181a474083ab4cad782a1/miniwob/spaces.py"""
  2 | 
  3 | from typing import Any
  4 | 
  5 | import numpy as np
  6 | from gymnasium.spaces import Space
  7 | from numpy.typing import NDArray
  8 | 
  9 | 
 10 | class Unicode(Space):
 11 |     """
 12 |     A space representing a unicode string.
 13 |     """
 14 | 
 15 |     def __init__(self):
 16 |         super().__init__()
 17 | 
 18 |     def contains(self, x: Any) -> bool:
 19 |         """Return boolean specifying if x is a valid member of this space."""
 20 |         # Do not check the character set.
 21 |         return isinstance(x, str)
 22 | 
 23 |     def __repr__(self) -> str:
 24 |         """Gives a string representation of this space."""
 25 |         return f"Unicode()"
 26 | 
 27 |     def __eq__(self, other: Any) -> bool:
 28 |         """Check whether ``other`` is equivalent to this instance."""
 29 |         return isinstance(other, Unicode)
 30 | 
 31 | 
 32 | class Float(Space):
 33 |     """
 34 |     A space representing a float.
 35 |     """
 36 | 
 37 |     def __init__(self):
 38 |         super().__init__()
 39 | 
 40 |     def contains(self, x: Any) -> bool:
 41 |         """Return boolean specifying if x is a valid member of this space."""
 42 |         return isinstance(x, float)
 43 | 
 44 |     def __repr__(self) -> str:
 45 |         """Gives a string representation of this space."""
 46 |         return f"Float()"
 47 | 
 48 |     def __eq__(self, other: Any) -> bool:
 49 |         """Check whether ``other`` is equivalent to this instance."""
 50 |         return isinstance(other, Float)
 51 | 
 52 | 
 53 | class Integer(Space):
 54 |     """
 55 |     A space representing an integer.
 56 |     """
 57 | 
 58 |     def __init__(self):
 59 |         super().__init__()
 60 | 
 61 |     def contains(self, x: Any) -> bool:
 62 |         """Return boolean specifying if x is a valid member of this space."""
 63 |         return isinstance(x, int)
 64 | 
 65 |     def __repr__(self) -> str:
 66 |         """Gives a string representation of this space."""
 67 |         return f"Integer()"
 68 | 
 69 |     def __eq__(self, other: Any) -> bool:
 70 |         """Check whether ``other`` is equivalent to this instance."""
 71 |         return isinstance(other, Integer)
 72 | 
 73 | 
 74 | class AnyDict(Space):
 75 |     """A space representing an arbitrary dictionary object."""
 76 | 
 77 |     def contains(self, x: Any) -> bool:
 78 |         """Return boolean specifying if x is a valid member of this space."""
 79 |         # Do not check anything specific.
 80 |         return isinstance(x, dict)
 81 | 
 82 |     def __repr__(self) -> str:
 83 |         """Gives a string representation of this space."""
 84 |         return f"AnyDict()"
 85 | 
 86 |     def __eq__(self, other: Any) -> bool:
 87 |         """Check whether ``other`` is equivalent to this instance."""
 88 |         return isinstance(other, AnyDict)
 89 | 
 90 | 
 91 | class Anything(Space):
 92 |     """A space representing an arbitrary dictionary object."""
 93 | 
 94 |     def contains(self, x: Any) -> bool:
 95 |         return True
 96 | 
 97 |     def __repr__(self) -> str:
 98 |         return f"Anything()"
 99 | 
100 |     def __eq__(self, other: Any) -> bool:
101 |         return isinstance(other, Anything)
102 | 
103 | 
104 | class AnyBox(Space[NDArray[Any]]):
105 |     """A space representing an arbitrary dictionary object."""
106 | 
107 |     def __init__(self, low, high, shape, dtype):
108 |         super().__init__(shape, dtype)
109 |         self.low = low
110 |         self.high = high
111 | 
112 |     def contains(self, x: Any) -> bool:
113 |         """Return boolean specifying if x is a valid member of this space."""
114 |         if not isinstance(x, np.ndarray):
115 |             try:
116 |                 x = np.asarray(x, dtype=self.dtype)
117 |             except (ValueError, TypeError):
118 |                 return False
119 | 
120 |         return bool(
121 |             np.can_cast(x.dtype, self.dtype)
122 |             and len(x.shape) == len(self.shape)
123 |             and all([dim in (xdim, -1) for xdim, dim in zip(x.shape, self.shape)])
124 |             and np.all(x >= self.low)
125 |             and np.all(x <= self.high)
126 |         )
127 | 
128 |     def __repr__(self) -> str:
129 |         """Gives a string representation of this space."""
130 |         return f"AnyBox(low={repr(self.low)}, high={repr(self.high)}, shape={repr(self.shape)}, dtype={repr(self.dtype)})"
131 | 
132 |     def __eq__(self, other: Any) -> bool:
133 |         """Check whether ``other`` is equivalent to this instance."""
134 |         return (
135 |             isinstance(other, AnyBox)
136 |             and self.low == other.low
137 |             and self.high == other.high
138 |             and self.shape == other.shape
139 |             and self.dtype == other.dtype
140 |         )
141 | 


--------------------------------------------------------------------------------
/browsergym/webarenalite/src/browsergym/webarenalite/task.py:
--------------------------------------------------------------------------------
  1 | import importlib.resources
  2 | import json
  3 | import logging
  4 | import tempfile
  5 | from typing import Optional
  6 | 
  7 | import playwright.sync_api
  8 | 
  9 | from browsergym.webarena.task import GenericWebArenaTask
 10 | 
 11 | 
 12 | logger = logging.getLogger(__name__)
 13 | 
 14 | 
 15 | class WebArenaLiteTask(GenericWebArenaTask):
 16 |     """
 17 |     Base class for all WebArena tasks.
 18 | 
 19 |     """
 20 | 
 21 |     def __init__(
 22 |         self,
 23 |         seed: int,
 24 |         task_id: Optional[int] = None,
 25 |         intent_template_id: Optional[int] = None,
 26 |         with_na_hint: bool = False,
 27 |         with_homepage_hint: bool = False,
 28 |     ):
 29 |         super().__init__(
 30 |             seed=seed,
 31 |             task_id=task_id,
 32 |             intent_template_id=intent_template_id,
 33 |             with_na_hint=with_na_hint,
 34 |             with_homepage_hint=with_homepage_hint,
 35 |         )
 36 | 
 37 |         all_configs_str = (
 38 |             importlib.resources.files("browsergym.webarenalite")
 39 |             .joinpath("test_webarena_lite.raw.json")
 40 |             .read_text()
 41 |         )
 42 |         # substitute URLs
 43 |         for pattern, url_key in {
 44 |             "__GITLAB__": "gitlab",
 45 |             "__REDDIT__": "reddit",
 46 |             "__SHOPPING__": "shopping",
 47 |             "__SHOPPING_ADMIN__": "shopping_admin",
 48 |             "__WIKIPEDIA__": "wikipedia",
 49 |             "__MAP__": "map",
 50 |         }.items():
 51 |             all_configs_str = all_configs_str.replace(pattern, self.webarena_instance.urls[url_key])
 52 | 
 53 |         # load all task configs to JSON
 54 |         all_configs = json.loads(all_configs_str)
 55 | 
 56 |         # keep only the desired task configs
 57 |         if intent_template_id is not None:
 58 |             task_configs = [
 59 |                 conf for conf in all_configs if conf["intent_template_id"] == intent_template_id
 60 |             ]
 61 |             if not task_configs:
 62 |                 raise ValueError(
 63 |                     f"Could not find any task config with intent_template_id={intent_template_id}."
 64 |                 )
 65 | 
 66 |         elif task_id is not None:
 67 |             # use old_task_id to filter configs
 68 |             task_configs = [conf for conf in all_configs if conf["old_task_id"] == task_id]
 69 |             if not task_configs:
 70 |                 raise ValueError(f"Could not find any task config with old_task_id={task_id}.")
 71 | 
 72 |         self.task_configs = task_configs
 73 | 
 74 |     def setup(self, page: playwright.sync_api.Page) -> tuple[str, dict]:
 75 |         # Using the custom evaluator for WebArena Lite
 76 |         from .evaluators import evaluator_router
 77 | 
 78 |         # pick a task at random
 79 |         self.config = self.random.choice(self.task_configs)
 80 | 
 81 |         # hack: dynamically build a config file to read from
 82 |         with tempfile.NamedTemporaryFile(mode="w+", delete=False) as f:
 83 |             json.dump(self.config, f)
 84 |             f.flush()
 85 |             self.config_file = f.name
 86 | 
 87 |         # build the evaluator
 88 |         self.evaluator = evaluator_router(self.config_file)
 89 | 
 90 |         # authenticate
 91 |         for site in self.config["sites"]:
 92 |             self.webarena_instance.ui_login(site=site, page=page)
 93 | 
 94 |         # set geolocation
 95 |         page.context.set_geolocation(self.config["geolocation"])
 96 | 
 97 |         # navigate to the starting url(s) (might need several pages)
 98 |         # https://github.com/web-arena-x/webarena/blob/c6475f0e9affe5252a2966e26b8cb4c834a4ae40/browser_env/envs.py#L150
 99 |         if self.config["start_url"]:
100 |             start_urls = self.config["start_url"].split(" |AND| ")
101 |             for i, url in enumerate(start_urls):
102 |                 page.goto(url)
103 |                 if i < len(start_urls) - 1:
104 |                     page = page.context.new_page()
105 | 
106 |         # recover goal
107 |         goal = self.config["intent"]
108 | 
109 |         # This note is present in all webarena's agent prompts
110 |         # https://github.com/web-arena-x/webarena/blob/c6475f0e9affe5252a2966e26b8cb4c834a4ae40/agent/prompts/raw/p_cot_id_actree_2s.py#L34
111 |         if self.with_homepage_hint:
112 |             goal += f"""
113 | 
114 | (Note: if you want to visit other websites, check out the homepage at {self.webarena_instance.home_url}. It has a list of websites you can visit. {self.webarena_instance.home_url}/password.html lists all the account name and password for the websites. You can use them to log in to the websites.)
115 | """
116 | 
117 |         # This note is present in some of webarena's agent prompts
118 |         if self.with_na_hint:
119 |             goal += """\
120 | 
121 | If you believe the task is impossible to complete, provide the answer "N/A".
122 | """
123 | 
124 |         return goal, {}
125 | 


--------------------------------------------------------------------------------
/browsergym/experiments/src/browsergym/experiments/agent.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | from dataclasses import dataclass, field
  3 | from typing import Any
  4 | 
  5 | from browsergym.core.action.base import AbstractActionSet
  6 | from browsergym.core.action.highlevel import HighLevelActionSet
  7 | from browsergym.utils.obs import flatten_axtree_to_str, flatten_dom_to_str, prune_html
  8 | 
  9 | 
 10 | def default_obs_preprocessor(obs: dict) -> dict:
 11 |     obs = obs.copy()  # shallow copy to avoid modifying the original dict
 12 |     # augment the observation with text versions of the DOM and AXTree
 13 |     obs["dom_txt"] = flatten_dom_to_str(obs["dom_object"])
 14 |     obs["axtree_txt"] = flatten_axtree_to_str(obs["axtree_object"])
 15 |     obs["pruned_html"] = prune_html(obs["dom_txt"])
 16 |     # remove raw entries that the agent won't use, and we don't want to record
 17 |     del obs["dom_object"]
 18 |     del obs["axtree_object"]
 19 |     return obs
 20 | 
 21 | 
 22 | DEFAULT_ACTION_SET: AbstractActionSet = HighLevelActionSet()
 23 | DEFAULT_OBS_PREPROCESSOR: callable = default_obs_preprocessor
 24 | 
 25 | 
 26 | @dataclass
 27 | class AgentInfo:
 28 |     think: str = None
 29 |     chat_messages: list = None
 30 |     stats: dict = field(default_factory=dict)
 31 |     markdown_page: str = ""
 32 |     html_page: str = ""
 33 |     extra_info: dict = None
 34 | 
 35 |     def __getitem__(self, key):
 36 |         return getattr(self, key)
 37 | 
 38 |     def __contains__(self, key):
 39 |         return hasattr(self, key)
 40 | 
 41 |     def pop(self, key, default=None):
 42 |         return getattr(self, key, default)
 43 | 
 44 |     def get(self, key, default=None):
 45 |         return getattr(self, key, default)
 46 | 
 47 | 
 48 | class Agent(ABC):
 49 |     """
 50 |     A template class that defines the required signature of an agent interacting
 51 |     with a browsergym environment
 52 | 
 53 |     Attributes:
 54 |         action_set: AbstractActionSet
 55 |             Defines the set of actions that the agent can take in the environment.
 56 |             This property is meant to be overloaded by your agent (optional).
 57 |             By default, uses BrowserGym's high-level action set.
 58 |     """
 59 | 
 60 |     action_set: AbstractActionSet = DEFAULT_ACTION_SET
 61 | 
 62 |     def obs_preprocessor(self, obs: dict) -> Any:
 63 |         """
 64 |         Function that pre-processes observations before feeding them to `get_action()`.
 65 |         This property is meant to be overloaded by your agent (optional).
 66 |         By default, the base observation is augmented with text versions of the DOM and AXTREE.
 67 | 
 68 |         Why this mapping? This mapping will happen within the experiment loop, so that the
 69 |         resulting observation gets recorded in the execution traces, and statistics can be computed from it.
 70 |         """
 71 |         return DEFAULT_OBS_PREPROCESSOR(obs)
 72 | 
 73 |     @abstractmethod
 74 |     def get_action(self, obs: Any) -> tuple[str, AgentInfo]:
 75 |         """
 76 |         Updates the agent with the current observation, and returns its next action (plus an info dict, optional).
 77 | 
 78 |         Parameters:
 79 |         -----------
 80 |         obs:
 81 |             The current observation of the environment, after it has been processed by `obs_preprocessor()`.
 82 |             By default, a BrowserGym observation is a dict with the following entries:
 83 |             - "chat_messages": list[str], messages between the agent and the user.
 84 |             - "goal": str, the current goal.
 85 |             - "open_pages_urls": list[str], open pages.
 86 |             - "active_page_index": int, the index of the active page.
 87 |             - "url": str, the current URL.
 88 |             - "screenshot": 3D np.array, the current screenshot.
 89 |             - "dom_object": dict, the current DOM object. See DOMSnapshot from chrome devtools.
 90 |             - "axtree_object": dict, the current AXTREE object. See Accessibility Tree from chrome devtools.
 91 |             - "extra_element_properties": dict[bid, dict[name, value]] extra
 92 |             properties of elements in the DOM.
 93 |             - "focused_element_bid": str, the bid of the focused element.
 94 |             - "last_action": str, the last action executed.
 95 |             - "last_action_error": str, the error of the last action.
 96 |             - "elapsed_time": float, the time elapsed since the start of the episode.
 97 | 
 98 |         Returns:
 99 |         --------
100 |         action: str
101 |             The action to be processed by `action_mapping()` (if any), and executed in the environment.
102 |         info: AgentInfo
103 |             Additional information about the action. with the following entries
104 |             being handled by BrowserGym:
105 |                 - "think": optional chain of thought
106 |                 - "messages": list of messages with the LLM
107 |                 - "stats": dict of extra statistics that will be saved and
108 |                   aggregated.
109 |                 - "markdown_page": str, string that will be displayed by agentlab's xray tool.
110 |                 - "extra_info": dict, additional information that will be saved
111 |                   and aggregated.
112 |         """
113 | 


--------------------------------------------------------------------------------
/browsergym/experiments/src/browsergym/experiments/benchmark/metadata/scripts.py:
--------------------------------------------------------------------------------
  1 | import importlib.resources
  2 | import json
  3 | 
  4 | import numpy as np
  5 | 
  6 | from browsergym.experiments.benchmark import task_metadata
  7 | 
  8 | # for posterity
  9 | 
 10 | 
 11 | def print_metadata_workarena():
 12 |     from browsergym.workarena import (
 13 |         AGENT_CURRICULUM_L2,
 14 |         AGENT_CURRICULUM_L3,
 15 |         TASK_CATEGORY_MAP,
 16 |     )
 17 | 
 18 |     metadata = [("task_name", "level", "category")]
 19 | 
 20 |     for task_name, category in TASK_CATEGORY_MAP.items():
 21 |         metadata.append((task_name, "l1", category))
 22 | 
 23 |     for category, items in AGENT_CURRICULUM_L2.items():
 24 |         for task_set in items["buckets"]:
 25 |             for task in task_set:
 26 |                 metadata.append((task.get_task_id(), "l2", category))
 27 | 
 28 |     for category, items in AGENT_CURRICULUM_L3.items():
 29 |         for task_set in items["buckets"]:
 30 |             for task in task_set:
 31 |                 metadata.append((task.get_task_id(), "l3", category))
 32 | 
 33 |     print("\n".join([",".join(x) for x in metadata]))
 34 | 
 35 | 
 36 | def print_metadata_webarena():
 37 | 
 38 |     import webarena
 39 | 
 40 |     metadata = [
 41 |         (
 42 |             "task_name",
 43 |             "requires_reset",
 44 |             "sites",
 45 |             "eval_types",
 46 |         )
 47 |     ]
 48 |     all_configs_str = importlib.resources.files(webarena).joinpath("test.raw.json").read_text()
 49 |     all_configs = json.loads(all_configs_str)
 50 |     for task in all_configs:
 51 |         metadata.append(
 52 |             (
 53 |                 f"webarena.{task['task_id']}",
 54 |                 str(task["require_reset"] == True),
 55 |                 " ".join(task["sites"]),
 56 |                 " ".join(task["eval"]["eval_types"]),
 57 |             )
 58 |         )
 59 | 
 60 |     print("\n".join([",".join(x) for x in metadata]))
 61 | 
 62 | 
 63 | def print_metadata_visualwebarena():
 64 |     import visualwebarena
 65 | 
 66 |     metadata = [
 67 |         (
 68 |             "task_name",
 69 |             "requires_reset",
 70 |             "sites",
 71 |             "reasoning_difficulty",
 72 |             "visual_difficulty",
 73 |             "overall_difficulty",
 74 |             "eval_types",
 75 |         )
 76 |     ]
 77 | 
 78 |     all_configs_str = (
 79 |         importlib.resources.files(visualwebarena).joinpath("test_raw.json").read_text()
 80 |     )
 81 |     all_configs = json.loads(all_configs_str)
 82 |     for task in all_configs:
 83 |         metadata.append(
 84 |             (
 85 |                 f"visualwebarena.{task['task_id']}",
 86 |                 str(task["require_reset"] == True),
 87 |                 " ".join(task["sites"]),
 88 |                 task["reasoning_difficulty"],
 89 |                 task["visual_difficulty"],
 90 |                 task["overall_difficulty"],
 91 |                 " ".join(task["eval"]["eval_types"]),
 92 |             )
 93 |         )
 94 | 
 95 |     print("\n".join([",".join(x) for x in metadata]))
 96 | 
 97 | 
 98 | def print_miniwob_train_test_splits():
 99 |     metadata = task_metadata("miniwob")
100 | 
101 |     groups = metadata["similarity_group"]
102 |     group_counts = groups.value_counts(sort=False)
103 | 
104 |     group_counts = dict({group: count for group, count in zip(group_counts.index, group_counts)})
105 | 
106 |     free_groups = set(group_counts.keys())
107 |     train_groups = set()
108 |     test_groups = set()
109 |     rng = np.random.RandomState(1337)
110 | 
111 |     # slack for train / test size equality
112 |     slack = sum(group_counts.values()) % 2
113 | 
114 |     def move_random_group(from_groups: set, to_groups: set):
115 |         # pick uniformly among tasks (weighted sampling among groups)
116 |         probs = np.asarray([float(group_counts[group]) for group in from_groups])
117 |         probs = probs / probs.sum()
118 |         # sample a group
119 |         group = rng.choice(list(from_groups), size=1, p=probs)[0]
120 |         # move between sets
121 |         to_groups.add(group)
122 |         from_groups.remove(group)
123 |         # return group for information
124 |         return group
125 | 
126 |     done = False
127 |     while not done:
128 |         n_train = sum([group_counts[group] for group in train_groups])
129 |         n_test = sum([group_counts[group] for group in test_groups])
130 | 
131 |         print(f"train/test split: {n_train} <> {n_test}")
132 | 
133 |         # growing phase
134 |         if free_groups:
135 |             if n_train < n_test:
136 |                 group = move_random_group(from_groups=free_groups, to_groups=train_groups)
137 |                 print(f"adding {group} to train")
138 |             else:
139 |                 group = move_random_group(from_groups=free_groups, to_groups=test_groups)
140 |                 print(f"adding {group} to test")
141 | 
142 |         # group switching phase
143 |         elif n_train < n_test - slack:
144 |             group = move_random_group(from_groups=test_groups, to_groups=train_groups)
145 |             print(f"switching {group} from test to train")
146 |         elif n_test < n_train - slack:
147 |             group = move_random_group(from_groups=train_groups, to_groups=test_groups)
148 |             print(f"switching {group} from train to test")
149 | 
150 |         # done (equilibrium)
151 |         else:
152 |             print("equilibrium")
153 |             done = True
154 | 
155 |         print()
156 | 
157 |     metadata["browsergym_split"] = metadata["similarity_group"].apply(
158 |         lambda group: "train" if group in train_groups else "test" if group in test_groups else ""
159 |     )
160 | 
161 |     print(metadata.to_csv(index=False))
162 | 


--------------------------------------------------------------------------------
/browsergym/assistantbench/src/browsergym/assistantbench/task.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | from typing import Dict, Tuple
  4 | 
  5 | from datasets import load_dataset
  6 | from playwright.sync_api import Page
  7 | 
  8 | from browsergym.core.task import AbstractBrowserTask
  9 | 
 10 | from .evaluation.evaluator import question_scorer
 11 | from .utils import add_prediction_to_jsonl
 12 | 
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | _DEFAULT_OUTPUT_FILE = None
 16 | 
 17 | 
 18 | def set_default_output_file(output_file: str):
 19 |     global _DEFAULT_OUTPUT_FILE
 20 |     _DEFAULT_OUTPUT_FILE = output_file
 21 | 
 22 | 
 23 | def get_default_output_file():
 24 |     return _DEFAULT_OUTPUT_FILE
 25 | 
 26 | 
 27 | # Load dataset
 28 | 
 29 | DATA_DATASET = "AssistantBench/AssistantBench"
 30 | all_tasks = load_dataset(DATA_DATASET, trust_remote_code=True)
 31 | 
 32 | 
 33 | # Extract answers and tasks for validation and test splits
 34 | def extract_data(split_name: str) -> Tuple[Dict[str, str], Dict[str, str], Dict[str, str]]:
 35 |     return (
 36 |         {
 37 |             f"{split_name}.{i}": row["answer"] if row["answer"] is not None else ""
 38 |             for i, row in enumerate(all_tasks[split_name])
 39 |         },
 40 |         {f"{split_name}.{i}": row["task"] for i, row in enumerate(all_tasks[split_name])},
 41 |         {f"{split_name}.{i}": row["id"] for i, row in enumerate(all_tasks[split_name])},
 42 |     )
 43 | 
 44 | 
 45 | # Implementation data for testing
 46 | def get_implementation_testing_data() -> Tuple[Dict[str, str], Dict[str, str], Dict[str, str]]:
 47 |     return (
 48 |         {"imp.0": "20"},
 49 |         {
 50 |             "imp.0": "What is the weather in Paris yesterday in Celsius? Answer with the number only."
 51 |         },
 52 |         {"imp.0": "test_imp_id_0"},
 53 |     )
 54 | 
 55 | 
 56 | # Combine dev, test, and implementation-specific testing splits
 57 | gold_answers_dev, tasks_dev, ids_dev = extract_data("validation")
 58 | gold_answers_test, tasks_test, ids_test = extract_data("test")
 59 | gold_answers_impl_testing, tasks_test_impl_testing, ids_imp_testing = (
 60 |     get_implementation_testing_data()
 61 | )
 62 | gold_answers = {**gold_answers_dev, **gold_answers_test, **gold_answers_impl_testing}
 63 | tasks = {**tasks_dev, **tasks_test, **tasks_test_impl_testing}
 64 | ids = {**ids_dev, **ids_test, **ids_imp_testing}
 65 | 
 66 | 
 67 | class AssistantBenchTask(AbstractBrowserTask):
 68 | 
 69 |     @classmethod
 70 |     def get_task_id(cls) -> str:
 71 |         """
 72 |         Generic class for several task ids, this way of obtaining the task id is not compatible for now.
 73 |         """
 74 |         raise NotImplementedError
 75 | 
 76 |     def __init__(
 77 |         self, seed: int, task_id: str, output_file: str = None, save_predictions: bool = False
 78 |     ) -> None:
 79 |         """
 80 |         Args:
 81 |             seed (int): Random seed for task initialization.
 82 |             task_id (str): Unique identifier for the task (for the BrowserGym environment).
 83 |             output_file (str, optional): Path to the output file for saving results, needed for test set.
 84 |             save_predictions (bool, optional): Save predictions to the output file (yes/no).
 85 |         """
 86 |         super().__init__(seed)
 87 |         self.locale = "en-US"
 88 |         self.timezone_id = "America/New_York"
 89 | 
 90 |         self.task_id = task_id
 91 |         self.start_url = "https://google.com"
 92 |         self.goal = tasks[str(self.task_id)]
 93 |         self.gold = gold_answers[str(self.task_id)]
 94 |         self.ab_task_id = ids[self.task_id]
 95 |         self.save_predictions = save_predictions
 96 | 
 97 |         self.output_file = output_file
 98 | 
 99 |         # set output_file using the global default value, if not provided in constructor
100 |         if not self.output_file:
101 |             self.output_file = get_default_output_file()
102 |         # use env variable in last resort
103 |         if not self.output_file:
104 |             self.output_file = os.getenv("ASSISTANTBENCH_OUTPUT_FILE", None)
105 | 
106 |         if self.save_predictions and self.output_file:
107 |             logger.info(f"Task prediction will be written to output file {self.output_file}")
108 | 
109 |     def setup(self, page: Page) -> Tuple[str, dict]:
110 |         logger.info(f"Navigating to start url: {self.start_url}")
111 |         page.goto(self.start_url, timeout=10000)
112 |         if self.save_predictions and self.output_file:
113 |             # create an empty task entry in the output file (will raise an Exception if the entry is already there)
114 |             add_prediction_to_jsonl(
115 |                 file_path=self.output_file,
116 |                 task_id=self.ab_task_id,
117 |                 prediction="",
118 |                 override_if_exists=False,
119 |             )
120 |         return self.goal, {}
121 | 
122 |     def teardown(self) -> None:
123 |         pass
124 | 
125 |     def validate(self, page: Page, chat_messages: list[dict]) -> Tuple[float, bool, str, dict]:
126 |         accuracy, done, msg, info = 0.0, False, "", {}
127 | 
128 |         # eval when the agent returns a response
129 |         if chat_messages and chat_messages[-1]["role"] == "assistant":
130 |             done = True
131 |             prediction = chat_messages[-1]["message"]
132 |             if self.save_predictions and self.output_file:
133 |                 # update the task entry in the output file
134 |                 add_prediction_to_jsonl(
135 |                     file_path=self.output_file,
136 |                     task_id=self.ab_task_id,
137 |                     prediction=prediction,
138 |                     override_if_exists=True,
139 |                 )
140 |             accuracy, has_ans = question_scorer(prediction, self.gold)
141 | 
142 |         return accuracy, done, msg, info
143 | 


--------------------------------------------------------------------------------
/tests/miniwob/test_base.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pytest
  3 | import time
  4 | import gymnasium as gym
  5 | 
  6 | # register gym environments
  7 | import browsergym.miniwob
  8 | 
  9 | from browsergym.miniwob.all import (
 10 |     ClickButtonTask,
 11 |     ClickOptionTask,
 12 |     DrawLineTask,
 13 |     LoginUserTask,
 14 | )
 15 | 
 16 | __SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
 17 | __HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
 18 | 
 19 | TASKS = [ClickButtonTask, ClickOptionTask, DrawLineTask, LoginUserTask]
 20 | 
 21 | 
 22 | @pytest.mark.parametrize("task_cls", TASKS)
 23 | def test_validate_teardown(task_cls):
 24 |     pw = browsergym.core._get_global_playwright()
 25 | 
 26 |     browser = pw.chromium.launch(headless=__HEADLESS, slow_mo=__SLOW_MO)
 27 |     context = browser.new_context()
 28 |     page = context.new_page()
 29 | 
 30 |     task = task_cls(seed=42)
 31 |     task.setup(page=page)
 32 | 
 33 |     reward, done, msg, info = task.validate(page, [])
 34 | 
 35 |     assert done is False
 36 | 
 37 |     task.teardown()
 38 | 
 39 |     context.close()
 40 |     browser.close()
 41 | 
 42 | 
 43 | @pytest.mark.parametrize("task_cls", TASKS)
 44 | def test_episode_max_time(task_cls):
 45 |     pw = browsergym.core._get_global_playwright()
 46 | 
 47 |     browser = pw.chromium.launch(headless=__HEADLESS, slow_mo=__SLOW_MO)
 48 |     context = browser.new_context()
 49 |     page = context.new_page()
 50 | 
 51 |     task = task_cls(seed=42, episode_max_time=0.2)
 52 |     task.setup(page=page)
 53 | 
 54 |     time.sleep(0.5)
 55 | 
 56 |     reward, done, msg, info = task.validate(page, [])
 57 | 
 58 |     assert done is True
 59 |     assert reward == 0
 60 | 
 61 |     task.teardown()
 62 | 
 63 |     context.close()
 64 |     browser.close()
 65 | 
 66 | 
 67 | @pytest.mark.parametrize("task_cls", TASKS)
 68 | def test_remove_human_display(task_cls):
 69 |     pw = browsergym.core._get_global_playwright()
 70 | 
 71 |     browser = pw.chromium.launch(headless=__HEADLESS, slow_mo=__SLOW_MO)
 72 | 
 73 |     # remove display
 74 | 
 75 |     context = browser.new_context()
 76 |     page = context.new_page()
 77 | 
 78 |     task = task_cls(seed=42, remove_human_display=True)
 79 |     task.setup(page=page)
 80 | 
 81 |     for element_id in ["reward-display", "click-canvas", "sync-task-cover"]:
 82 |         element_in_dom = page.evaluate(f"!!document.getElementById('{element_id}')")
 83 |         assert not element_in_dom
 84 | 
 85 |     assert page.evaluate(f"document.getElementById('query').innerHTML") == ""
 86 | 
 87 |     for element_id in ["wrap", "area"]:
 88 |         element_in_dom = page.evaluate(f"!!document.getElementById('{element_id}')")
 89 |         assert element_in_dom
 90 | 
 91 |     task.teardown()
 92 | 
 93 |     context.close()
 94 | 
 95 |     # keep display
 96 | 
 97 |     context = browser.new_context()
 98 |     page = context.new_page()
 99 | 
100 |     task = task_cls(seed=42, remove_human_display=False)
101 |     task.setup(page=page)
102 | 
103 |     for element_id in ["reward-display", "click-canvas", "sync-task-cover"]:
104 |         element_in_dom = page.evaluate(f"!!document.getElementById('{element_id}')")
105 |         assert element_in_dom
106 | 
107 |     assert page.evaluate(f"document.getElementById('query').innerHTML") != ""
108 | 
109 |     for element_id in ["wrap", "area"]:
110 |         element_in_dom = page.evaluate(f"!!document.getElementById('{element_id}')")
111 |         assert element_in_dom
112 | 
113 |     task.teardown()
114 | 
115 |     context.close()
116 |     browser.close()
117 | 
118 | 
119 | @pytest.mark.skip(reason="TODO: how to get the final viewport size right?")
120 | @pytest.mark.parametrize("task_cls", TASKS)
121 | def test_viewport(task_cls):
122 |     env = gym.make(
123 |         f"browsergym/{task_cls.get_task_id()}",
124 |         headless=__HEADLESS,
125 |         slow_mo=__SLOW_MO,
126 |     )
127 |     obs, info = env.reset(seed=42)
128 | 
129 |     screenshot = obs["screenshot"]
130 | 
131 |     # 3D array (height, width, rgb) of unsigned bytes (between 0 and 255)
132 |     # Miniwob viewport should be (320x500)
133 |     assert screenshot.shape[0] == 320
134 |     assert screenshot.shape[1] == 500
135 |     assert screenshot.shape[2] == 3  # RGB
136 | 
137 |     env.close()
138 | 
139 | 
140 | @pytest.mark.parametrize("task_cls", TASKS)
141 | def test_forbidden_navigation(task_cls):
142 |     pw = browsergym.core._get_global_playwright()
143 | 
144 |     browser = pw.chromium.launch(headless=__HEADLESS, slow_mo=__SLOW_MO)
145 |     context = browser.new_context()
146 |     page = context.new_page()
147 | 
148 |     task = task_cls(seed=42)
149 |     task.setup(page=page)
150 | 
151 |     reward, done, msg, info = task.validate(page, [])
152 | 
153 |     assert reward == 0.0 and done == False
154 | 
155 |     page.goto("http://www.google.com")
156 | 
157 |     reward, done, msg, info = task.validate(page, [])
158 | 
159 |     assert reward == 0.0 and done == True
160 | 
161 |     task.teardown()
162 | 
163 |     context.close()
164 |     browser.close()
165 | 
166 | 
167 | @pytest.mark.parametrize("task_cls", TASKS)
168 | def test_forbidden_navigation_2(task_cls):
169 |     pw = browsergym.core._get_global_playwright()
170 | 
171 |     browser = pw.chromium.launch(headless=__HEADLESS, slow_mo=__SLOW_MO)
172 |     context = browser.new_context()
173 |     page = context.new_page()
174 | 
175 |     task = task_cls(seed=42)
176 |     task.setup(page=page)
177 | 
178 |     reward, done, msg, info = task.validate(page, [])
179 | 
180 |     assert reward == 0.0 and done == False
181 | 
182 |     page2 = context.new_page()
183 |     page2.goto("http://www.google.com")
184 | 
185 |     reward, done, msg, info = task.validate(page, [])
186 | 
187 |     assert reward == 0.0 and done == False
188 | 
189 |     reward, done, msg, info = task.validate(page2, [])
190 | 
191 |     assert reward == 0.0 and done == True
192 | 
193 |     task.teardown()
194 | 
195 |     context.close()
196 |     browser.close()
197 | 


--------------------------------------------------------------------------------