├── .github └── workflows │ ├── pre-commit.yml │ └── tests.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CITATION.cff ├── LICENSE ├── README.md ├── agent ├── __init__.py ├── agent.py └── prompts │ ├── README.md │ ├── __init__.py │ ├── prompt_constructor.py │ ├── raw │ ├── p_cot_id_actree_2s.py │ ├── p_cot_id_actree_2s_no_na.py │ ├── p_direct_id_actree_2s.py │ ├── p_direct_id_actree_2s_no_na.py │ └── p_direct_id_actree_3s_llama.py │ └── to_json.py ├── browser_env ├── __init__.py ├── actions.py ├── async_envs.py ├── auto_login.py ├── constants.py ├── env_config.py ├── envs.py ├── helper_functions.py ├── processors.py ├── py.typed ├── trajectory.py └── utils.py ├── check_errors.sh ├── config_files ├── examples │ ├── 1.json │ ├── 2.json │ ├── 3.json │ └── 4.json └── test.raw.json ├── environment_docker ├── README.md └── webarena-homepage │ ├── app.py │ ├── static │ └── figures │ │ ├── calculator.png │ │ ├── cms.png │ │ ├── gitlab.png │ │ ├── manual1.png │ │ ├── manual2.png │ │ ├── map.png │ │ ├── onestopshop.png │ │ ├── password.png │ │ ├── reddit.png │ │ ├── scratchpad.png │ │ └── wikipedia.png │ └── templates │ ├── calculator.html │ ├── index.html │ └── scratchpad.html ├── evaluation_harness ├── __init__.py ├── evaluators.py └── helper_functions.py ├── llms ├── __init__.py ├── lm_config.py ├── providers │ ├── hf_utils.py │ └── openai_utils.py ├── tokenizers.py └── utils.py ├── media ├── example_trace_viewer.png ├── homepage_demo.png ├── logo.png ├── overview.png ├── v1_result.png └── v2_result.png ├── minimal_example.py ├── parallel_run.sh ├── prepare.sh ├── requirements.txt ├── resources └── README.md ├── run.py ├── scripts ├── check_error_runs.py ├── collect_obs.py ├── generate_test_data.py ├── html2json.py └── webarena-zeno.ipynb ├── setup.cfg ├── setup.py ├── setup_env.sh ├── tests ├── conftest.py └── test_browser_env │ ├── test_action_functionalities.py │ ├── test_actions.py │ ├── test_auth_cookie.py │ ├── test_playwright_actions.py │ └── test_script_browser_env.py └── webarena-map-backend-boot-init.yaml /.github/workflows/pre-commit.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/.github/workflows/pre-commit.yml -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/.github/workflows/tests.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/CITATION.cff -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/README.md -------------------------------------------------------------------------------- /agent/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/agent/__init__.py -------------------------------------------------------------------------------- /agent/agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/agent/agent.py -------------------------------------------------------------------------------- /agent/prompts/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/agent/prompts/README.md -------------------------------------------------------------------------------- /agent/prompts/__init__.py: -------------------------------------------------------------------------------- 1 | from .prompt_constructor import * 2 | -------------------------------------------------------------------------------- /agent/prompts/prompt_constructor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/agent/prompts/prompt_constructor.py -------------------------------------------------------------------------------- /agent/prompts/raw/p_cot_id_actree_2s.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/agent/prompts/raw/p_cot_id_actree_2s.py -------------------------------------------------------------------------------- /agent/prompts/raw/p_cot_id_actree_2s_no_na.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/agent/prompts/raw/p_cot_id_actree_2s_no_na.py -------------------------------------------------------------------------------- /agent/prompts/raw/p_direct_id_actree_2s.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/agent/prompts/raw/p_direct_id_actree_2s.py -------------------------------------------------------------------------------- /agent/prompts/raw/p_direct_id_actree_2s_no_na.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/agent/prompts/raw/p_direct_id_actree_2s_no_na.py -------------------------------------------------------------------------------- /agent/prompts/raw/p_direct_id_actree_3s_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/agent/prompts/raw/p_direct_id_actree_3s_llama.py -------------------------------------------------------------------------------- /agent/prompts/to_json.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/agent/prompts/to_json.py -------------------------------------------------------------------------------- /browser_env/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/browser_env/__init__.py -------------------------------------------------------------------------------- /browser_env/actions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/browser_env/actions.py -------------------------------------------------------------------------------- /browser_env/async_envs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/browser_env/async_envs.py -------------------------------------------------------------------------------- /browser_env/auto_login.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/browser_env/auto_login.py -------------------------------------------------------------------------------- /browser_env/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/browser_env/constants.py -------------------------------------------------------------------------------- /browser_env/env_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/browser_env/env_config.py -------------------------------------------------------------------------------- /browser_env/envs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/browser_env/envs.py -------------------------------------------------------------------------------- /browser_env/helper_functions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/browser_env/helper_functions.py -------------------------------------------------------------------------------- /browser_env/processors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/browser_env/processors.py -------------------------------------------------------------------------------- /browser_env/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /browser_env/trajectory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/browser_env/trajectory.py -------------------------------------------------------------------------------- /browser_env/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/browser_env/utils.py -------------------------------------------------------------------------------- /check_errors.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/check_errors.sh -------------------------------------------------------------------------------- /config_files/examples/1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/config_files/examples/1.json -------------------------------------------------------------------------------- /config_files/examples/2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/config_files/examples/2.json -------------------------------------------------------------------------------- /config_files/examples/3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/config_files/examples/3.json -------------------------------------------------------------------------------- /config_files/examples/4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/config_files/examples/4.json -------------------------------------------------------------------------------- /config_files/test.raw.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/config_files/test.raw.json -------------------------------------------------------------------------------- /environment_docker/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/environment_docker/README.md -------------------------------------------------------------------------------- /environment_docker/webarena-homepage/app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/environment_docker/webarena-homepage/app.py -------------------------------------------------------------------------------- /environment_docker/webarena-homepage/static/figures/calculator.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/environment_docker/webarena-homepage/static/figures/calculator.png -------------------------------------------------------------------------------- /environment_docker/webarena-homepage/static/figures/cms.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/environment_docker/webarena-homepage/static/figures/cms.png -------------------------------------------------------------------------------- /environment_docker/webarena-homepage/static/figures/gitlab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/environment_docker/webarena-homepage/static/figures/gitlab.png -------------------------------------------------------------------------------- /environment_docker/webarena-homepage/static/figures/manual1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/environment_docker/webarena-homepage/static/figures/manual1.png -------------------------------------------------------------------------------- /environment_docker/webarena-homepage/static/figures/manual2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/environment_docker/webarena-homepage/static/figures/manual2.png -------------------------------------------------------------------------------- /environment_docker/webarena-homepage/static/figures/map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/environment_docker/webarena-homepage/static/figures/map.png -------------------------------------------------------------------------------- /environment_docker/webarena-homepage/static/figures/onestopshop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/environment_docker/webarena-homepage/static/figures/onestopshop.png -------------------------------------------------------------------------------- /environment_docker/webarena-homepage/static/figures/password.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/environment_docker/webarena-homepage/static/figures/password.png -------------------------------------------------------------------------------- /environment_docker/webarena-homepage/static/figures/reddit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/environment_docker/webarena-homepage/static/figures/reddit.png -------------------------------------------------------------------------------- /environment_docker/webarena-homepage/static/figures/scratchpad.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/environment_docker/webarena-homepage/static/figures/scratchpad.png -------------------------------------------------------------------------------- /environment_docker/webarena-homepage/static/figures/wikipedia.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/environment_docker/webarena-homepage/static/figures/wikipedia.png -------------------------------------------------------------------------------- /environment_docker/webarena-homepage/templates/calculator.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/environment_docker/webarena-homepage/templates/calculator.html -------------------------------------------------------------------------------- /environment_docker/webarena-homepage/templates/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/environment_docker/webarena-homepage/templates/index.html -------------------------------------------------------------------------------- /environment_docker/webarena-homepage/templates/scratchpad.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/environment_docker/webarena-homepage/templates/scratchpad.html -------------------------------------------------------------------------------- /evaluation_harness/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/evaluation_harness/__init__.py -------------------------------------------------------------------------------- /evaluation_harness/evaluators.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/evaluation_harness/evaluators.py -------------------------------------------------------------------------------- /evaluation_harness/helper_functions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/evaluation_harness/helper_functions.py -------------------------------------------------------------------------------- /llms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/llms/__init__.py -------------------------------------------------------------------------------- /llms/lm_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/llms/lm_config.py -------------------------------------------------------------------------------- /llms/providers/hf_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/llms/providers/hf_utils.py -------------------------------------------------------------------------------- /llms/providers/openai_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/llms/providers/openai_utils.py -------------------------------------------------------------------------------- /llms/tokenizers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/llms/tokenizers.py -------------------------------------------------------------------------------- /llms/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/llms/utils.py -------------------------------------------------------------------------------- /media/example_trace_viewer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/media/example_trace_viewer.png -------------------------------------------------------------------------------- /media/homepage_demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/media/homepage_demo.png -------------------------------------------------------------------------------- /media/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/media/logo.png -------------------------------------------------------------------------------- /media/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/media/overview.png -------------------------------------------------------------------------------- /media/v1_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/media/v1_result.png -------------------------------------------------------------------------------- /media/v2_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/media/v2_result.png -------------------------------------------------------------------------------- /minimal_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/minimal_example.py -------------------------------------------------------------------------------- /parallel_run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/parallel_run.sh -------------------------------------------------------------------------------- /prepare.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/prepare.sh -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/requirements.txt -------------------------------------------------------------------------------- /resources/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/resources/README.md -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/run.py -------------------------------------------------------------------------------- /scripts/check_error_runs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/scripts/check_error_runs.py -------------------------------------------------------------------------------- /scripts/collect_obs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/scripts/collect_obs.py -------------------------------------------------------------------------------- /scripts/generate_test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/scripts/generate_test_data.py -------------------------------------------------------------------------------- /scripts/html2json.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/scripts/html2json.py -------------------------------------------------------------------------------- /scripts/webarena-zeno.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/scripts/webarena-zeno.ipynb -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/setup.cfg -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/setup.py -------------------------------------------------------------------------------- /setup_env.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/setup_env.sh -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/tests/conftest.py -------------------------------------------------------------------------------- /tests/test_browser_env/test_action_functionalities.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/tests/test_browser_env/test_action_functionalities.py -------------------------------------------------------------------------------- /tests/test_browser_env/test_actions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/tests/test_browser_env/test_actions.py -------------------------------------------------------------------------------- /tests/test_browser_env/test_auth_cookie.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/tests/test_browser_env/test_auth_cookie.py -------------------------------------------------------------------------------- /tests/test_browser_env/test_playwright_actions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/tests/test_browser_env/test_playwright_actions.py -------------------------------------------------------------------------------- /tests/test_browser_env/test_script_browser_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/tests/test_browser_env/test_script_browser_env.py -------------------------------------------------------------------------------- /webarena-map-backend-boot-init.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/web-arena-x/webarena/HEAD/webarena-map-backend-boot-init.yaml --------------------------------------------------------------------------------