├── LICENSE
├── README.md
├── assets
    └── framework.png
├── autowebbench
    ├── en
    │   ├── ind
    │   │   └── test.json
    │   └── ood
    │   │   └── test.json
    └── zh
    │   ├── ind
    │       └── test.json
    │   └── ood
    │       └── test.json
├── eval.py
├── mind2web
    ├── domain
    │   └── test.json
    ├── task
    │   └── test.json
    └── website
    │   └── test.json
├── miniwob++
    ├── .gitignore
    ├── README.md
    ├── html_tools
    │   ├── __init__.py
    │   ├── configs
    │   │   ├── __init__.py
    │   │   ├── config.py
    │   │   └── html_prompt.py
    │   ├── html_parser.py
    │   ├── identifier.py
    │   ├── prompt.py
    │   └── utils.py
    ├── install_dependency.sh
    ├── llms
    │   ├── __init__.py
    │   ├── call.py
    │   └── providers
    │   │   ├── __init__.py
    │   │   └── gpt.py
    ├── main.py
    ├── miniwob_tools
    │   ├── __init__.py
    │   ├── action.py
    │   ├── configs
    │   │   ├── __init__.py
    │   │   ├── config.py
    │   │   └── prompt.py
    │   └── utils.py
    ├── monitor.py
    ├── requirements.txt
    └── setup.sh
└── webarena
    ├── .github
        └── workflows
        │   ├── pre-commit.yml
        │   └── tests.yml
    ├── .gitignore
    ├── .pre-commit-config.yaml
    ├── CITATION.cff
    ├── LICENSE
    ├── README.md
    ├── agent
        ├── __init__.py
        ├── agent.py
        └── prompts
        │   ├── README.md
        │   ├── __init__.py
        │   ├── prompt_constructor.py
        │   ├── raw
        │       ├── new_action_prompt.py
        │       ├── p_cot_id_actree_2s.py
        │       ├── p_cot_id_actree_2s_no_na.py
        │       ├── p_direct_id_actree_2s.py
        │       ├── p_direct_id_actree_2s_no_na.py
        │       ├── p_direct_id_actree_3s_llama.py
        │       └── test_prompt.py
        │   └── to_json.py
    ├── browser_env
        ├── __init__.py
        ├── actions.py
        ├── async_envs.py
        ├── auto_login.py
        ├── constants.py
        ├── env_config.py
        ├── envs.py
        ├── helper_functions.py
        ├── html_tools
        │   ├── __init__.py
        │   ├── configs
        │   │   ├── __init__.py
        │   │   ├── config.py
        │   │   └── html_prompt.py
        │   ├── html_parser.py
        │   ├── identifier.py
        │   ├── prompt.py
        │   └── utils.py
        ├── processors.py
        ├── py.typed
        ├── scripts
        │   ├── __init__.py
        │   ├── canva_handler.js
        │   ├── get_data.js
        │   ├── get_text.js
        │   ├── label_marker.js
        │   ├── local_marker.js
        │   └── mix_marker.js
        ├── trajectory.py
        └── utils.py
    ├── check_errors.sh
    ├── config_files
        ├── examples
        │   ├── 1.json
        │   ├── 2.json
        │   ├── 3.json
        │   └── 4.json
        └── test.raw.json
    ├── environment_docker
        ├── README.md
        └── webarena-homepage
        │   ├── app.py
        │   ├── static
        │       └── figures
        │       │   ├── calculator.png
        │       │   ├── cms.png
        │       │   ├── gitlab.png
        │       │   ├── manual1.png
        │       │   ├── manual2.png
        │       │   ├── map.png
        │       │   ├── onestopshop.png
        │       │   ├── password.png
        │       │   ├── reddit.png
        │       │   ├── scratchpad.png
        │       │   └── wikipedia.png
        │   └── templates
        │       ├── calculator.html
        │       ├── index.html
        │       └── scratchpad.html
    ├── evaluation_harness
        ├── __init__.py
        ├── evaluators.py
        └── helper_functions.py
    ├── llms
        ├── __init__.py
        ├── lm_config.py
        ├── providers
        │   ├── hf_utils.py
        │   ├── openai_utils.py
        │   └── ours.py
        ├── tokenizers.py
        └── utils.py
    ├── media
        ├── example_trace_viewer.png
        ├── homepage_demo.png
        ├── logo.png
        ├── overview.png
        ├── v1_result.png
        └── v2_result.png
    ├── minimal_example.py
    ├── parallel_run.sh
    ├── prepare.sh
    ├── requirements.txt
    ├── resources
        └── README.md
    ├── run.py
    ├── scripts
        ├── check_error_runs.py
        ├── collect_obs.py
        ├── generate_test_data.py
        ├── html2json.py
        └── webarena-zeno.ipynb
    ├── setup.cfg
    ├── setup.py
    ├── solver
        ├── __init__.py
        ├── shopping_admin.py
        └── utils.py
    └── tests
        ├── conftest.py
        ├── test_browser_env
            ├── test_action_functionalities.py
            ├── test_actions.py
            ├── test_auth_cookie.py
            ├── test_playwright_actions.py
            └── test_script_browser_env.py
        └── test_evaluation_harness
            ├── configs
                ├── func_eval_fail.json
                ├── func_eval_success.json
                ├── func_url_func_1.json
                ├── func_url_func_2.json
                ├── html_content_element_exact_match.json
                ├── html_content_exact_match.json
                ├── html_content_url_comb.json
                ├── string_match.json
                └── url_exact_match.json
            ├── test_evaluators.py
            └── test_helper_functions.py


/README.md:
--------------------------------------------------------------------------------
 1 | <h1>AutoWebGLM: A Large Language Model-based Web Navigating Agent</h1>
 2 | 
 3 | This is the official implementation of AutoWebGLM. If you find our open-sourced efforts useful, please 🌟 the repo to encourage our following development!
 4 | 
 5 | # Overview
 6 | 
 7 | ![paper](./assets/framework.png)
 8 | 
 9 | AutoWebGLM is a project aimed at building a more efficient language model-driven automated web navigation agent. This project is built on top of the ChatGLM3-6B model, extending its capabilities to navigate the web more effectively and tackle real-world browsing challenges better. 
10 | 
11 | ## Features
12 | 
13 | -   **HTML Simplification Algorithm**: Inspired by human browsing patterns, we've designed an algorithm to simplify HTML, making webpages more digestible for LLM agents while preserving crucial information.
14 | -   **Hybrid Human-AI Training**: We combine human and AI knowledge to build web browsing data for curriculum training, enhancing the model's practical navigation skills.
15 | -   **Reinforcement Learning and Rejection Sampling**: We enhance the model's webpage comprehension, browser operations, and efficient task decomposition abilities by bootstrapping it with reinforcement learning and rejection sampling.
16 | -   **Bilingual Web Navigation Benchmark**: We introduce AutoWebBench—a bilingual (Chinese and English) benchmark for real-world web browsing tasks. This benchmark provides a robust tool for testing and refining the capabilities of AI web navigation agents.
17 | 
18 | # Evaluation
19 | 
20 | We have publicly disclosed our evaluation code, data, and environment. You may conduct the experiment using the following code.
21 | 
22 | ## AutoWebBench & Mind2Web
23 | 
24 | You can find our evaluation datasets at <a href="./autowebbench/" alt="autowebbench">AutoWebBench</a> and <a href="./mind2web/" alt="mind2web">Mind2Web</a>. 
25 | For the code to perform model inference, please refer to <a href="https://huggingface.co/THUDM/chatglm3-6b" alt="chatglm3-6b">ChatGLM3-6B</a>.
26 | After obtaining the output file, the score can be obtained through ```python eval.py [result_path]```.
27 | 
28 | ## WebArena
29 | 
30 | We have made modifications to the WebArena environment to fit the interaction of our system; see <a href="./webarena/" alt="webarena">WebArena</a>. The modifications and execution instructions can be found in <a href="./webarena/README.md" alt="readme">README</a>.
31 | 
32 | ## MiniWob++
33 | 
34 | We have also made modifications to the MiniWob++ environment, see <a href="./miniwob++/" alt="miniwob++">MiniWob++</a>. The modifications and execution instructions can be found in <a href="./miniwob++/README.md" alt="readme">README</a>.
35 | 
36 | # License
37 | 
38 | This repository is licensed under the [Apache-2.0 License](LICENSE). All open-sourced data is for resarch purpose only.
39 | 
40 | # Citation
41 | If you use this code for your research, please cite our paper.
42 | 
43 | ```
44 | @inproceedings{lai2024autowebglm,
45 |     author = {Lai, Hanyu and Liu, Xiao and Iong, Iat Long and Yao, Shuntian and Chen, Yuxuan and Shen, Pengbo and Yu, Hao and Zhang, Hanchen and Zhang, Xiaohan and Dong, Yuxiao and Tang, Jie},
46 |     title = {AutoWebGLM: A Large Language Model-based Web Navigating Agent},
47 |     booktitle = {Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining},
48 |     pages = {5295–-5306},
49 |     year = {2024}
50 | }
51 | ```
52 | 


--------------------------------------------------------------------------------
/assets/framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AutoWebGLM/eb8524e55cc3fac44625496e895e4044cea81fba/assets/framework.png


--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import sys
  3 | import re
  4 | import numpy as np
  5 | 
  6 | from rouge_chinese import Rouge
  7 | import jieba # you can use any other word cutting library
  8 | 
  9 | def get_rouge_score(hypothesis, reference):
 10 |     if hypothesis is None or reference is None:
 11 |         return None
 12 | 
 13 |     hypothesis = ' '.join(jieba.cut(hypothesis)) 
 14 |     reference = ' '.join(jieba.cut(reference))
 15 | 
 16 |     rouge = Rouge()
 17 |     scores = rouge.get_scores(hypothesis, reference)
 18 | 
 19 |     return scores[0]["rouge-1"]['f']
 20 | 
 21 | def parse_function_call(function_call):
 22 |     pattern = r"(\w+)\((.*)\)"
 23 |     match = re.match(pattern, function_call)
 24 | 
 25 |     if match:
 26 |         function_name = match.group(1)
 27 |         
 28 |         def return_args(*args):
 29 |             return args
 30 |         
 31 |         function_args = eval(f'return_args({match.group(2)})')
 32 | 
 33 |         return function_name, function_args
 34 | 
 35 |     return None
 36 | 
 37 | def extract(text):
 38 |     ans = {
 39 |         'type': None,
 40 |         'label': None,
 41 |         'param': None
 42 |     }
 43 |     
 44 |     match = parse_function_call(text)
 45 |     if match:
 46 |         ans['type'] = match[0]
 47 |         args = match[1]
 48 | 
 49 |     if ans['type']:
 50 |         if ans['type'] == 'click':
 51 |             ans['label'] = args[0]
 52 |         elif ans['type'] == 'hover':
 53 |             ans['label'] = args[0]
 54 |         elif ans['type'] == 'select':
 55 |             ans['label'] = args[0]
 56 |             ans['param'] = args[1]
 57 |         elif ans['type'] == 'type_string':
 58 |             ans['label'] = args[0]
 59 |             ans['param'] = args[1]
 60 |         elif ans['type'] == 'scroll_page':
 61 |             ans['param'] = args[0]
 62 |         elif ans['type'] == 'go':
 63 |             ans['param'] = args[0]
 64 |         elif ans['type'] == 'jump_to':
 65 |             ans['param'] = args[0]
 66 |         elif ans['type'] == 'switch_tab':
 67 |             ans['param'] = args[0]
 68 |         elif ans['type'] == 'user_input':
 69 |             ans['param'] = args[0]
 70 |         elif ans['type'] == 'finish':
 71 |             ans['param'] = args[0]
 72 | 
 73 |     return ans
 74 | 
 75 | if __name__ == '__main__':
 76 |     result_path = sys.argv[1]
 77 |     res_list = {
 78 |         'type': [],
 79 |         'label': [],
 80 |         'param': [],
 81 |         'all': []
 82 |     }
 83 | 
 84 |     for ix, r_str in enumerate(open(result_path).readlines()):
 85 |         r = json.loads(r_str)
 86 |         try:
 87 |             labels = json.loads(r['labels'])
 88 |         except:
 89 |             labels = [r['labels']]
 90 | 
 91 |         res = {}
 92 | 
 93 |         for label in labels:
 94 |             pred = r['predict'].split('A: ')[-1].strip()
 95 |             try:
 96 |                 label_ans = extract(label)
 97 |                 pred_ans = extract(pred)
 98 |             except:
 99 |                 continue
100 | 
101 |             print(f'{ix}. label:', label_ans)
102 |             print(f'{ix}. pred:', pred_ans)
103 | 
104 |             if label_ans['type'] is not None:
105 |                 if label_ans['type'] == pred_ans['type']:
106 |                     res['type'] = 1
107 |                 else:
108 |                     res['type'] = 0
109 |             
110 |             if label_ans['label'] is not None:
111 |                 if label_ans['label'] == pred_ans['label']:
112 |                     res['label'] = 1
113 |                 else:
114 |                     res['label'] = 0
115 |                 
116 |             if label_ans['param'] is not None:
117 |                 rouge = get_rouge_score(label_ans['param'], pred_ans['param'])
118 |                 if rouge:
119 |                     res['param'] = rouge
120 | 
121 |             if label_ans['type'] is not None and label_ans['label'] is not None:
122 |                 if label_ans['type'] == pred_ans['type'] and label_ans['label'] == pred_ans['label']:
123 |                     res['all'] = 1
124 |                     break
125 |                 else:
126 |                     res['all'] = 0
127 |         
128 |         for k, v in res.items():
129 |             res_list[k].append(v)
130 | 
131 |     for k, v in res_list.items():
132 |         if v:
133 |             res_list[k] = float(np.mean(v))
134 |         else:
135 |             res_list[k] = 0.0
136 |     
137 |     print(res_list)


--------------------------------------------------------------------------------
/miniwob++/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__/
 2 | *.pyc
 3 | 
 4 | log_files/
 5 | result/
 6 | raw/
 7 | 
 8 | *.png
 9 | test.py
10 | 
11 | .DS_Store
12 | *.deb


--------------------------------------------------------------------------------
/miniwob++/README.md:
--------------------------------------------------------------------------------
 1 | # MiniWoB++
 2 | 
 3 | ## Usage
 4 | 
 5 | ```shell
 6 | pip install -r requirement.txt
 7 | python main.py [cudas] [test-amount] [model-path] [result-path]
 8 | ```
 9 | 
10 | ### Parameter Description
11 | 
12 | | Parameter   | Format       | Mandatory | Use                                                        |
13 | | ----------- | ------------ | --------- | ---------------------------------------------------------- |
14 | | cudas       | 0,1,2        | Yes       | The GPU number to be used, separated by commas, no spaces  |
15 | | test-amount | 10           | Yes       | Number of test cases per task, the paper uses 100, but generally, 10 groups are more reasonable for efficiency |
16 | | model-path  | model_path/  | Yes       | Path to the model to be tested, if set to 'manual' then manual execution can be performed |
17 | | result-path | result/      | Yes       | Location for the model's output (Tasks that have been completed in the same path **will not** be executed again) |
18 | 
19 | ## Results
20 | 
21 | After running the above command, you should see a `log_files` folder appear in the current directory. The `**.log` files inside are the run results. When a task is completed, you should see the following output, where the result represents the test case score, which can be 0 or 1:
22 | 
23 | ```sh
24 | 2023-11-30 06:28:13,283 - INFO - {"task": "click-button", "case_id": 10, "result": 1.0}
25 | ```
26 | 
27 | When all test cases for a group of tasks have been run, the following record will be output in the log:
28 | 
29 | ```sh
30 | 2023-11-30 07:10:13,593 - INFO - {"task": "grid-coordinate", "avg_score": 0.3}
31 | ```
32 | 
33 | When all tasks in a process are completed, the log will record the following information:
34 | 
35 | ```sh
36 | 2023-11-30 07:10:13,836 - INFO - ------
37 | 2023-11-30 07:10:13,836 - INFO - click-button-sequence            1.00
38 | 2023-11-30 07:10:13,836 - INFO - click-checkboxes                 0.62
39 | 2023-11-30 07:10:13,837 - INFO - click-checkboxes-large           0.07
40 | 2023-11-30 07:10:13,837 - INFO - click-color                      0.24
41 | ... (50 lines omitted)
42 | 2023-11-30 07:10:13,839 - INFO - enter-date                       1.00
43 | 2023-11-30 07:10:13,839 - INFO - grid-coordinate                  0.30
44 | 2023-11-30 07:10:13,839 - INFO - all                             0.442
45 | ```
46 | 


--------------------------------------------------------------------------------
/miniwob++/html_tools/__init__.py:
--------------------------------------------------------------------------------
1 | from .identifier import IdentifierTool
2 | from .prompt import HtmlPrompt
3 | from .html_parser import HtmlParser
4 | 
5 | from .utils import print_html_object
6 | from .configs import basic_attrs, mind2web_keep_attrs


--------------------------------------------------------------------------------
/miniwob++/html_tools/configs/__init__.py:
--------------------------------------------------------------------------------
1 | from .html_prompt import prompts
2 | from .config import basic_attrs, mind2web_keep_attrs, miniwob_attrs
3 | from .config import config_meta


--------------------------------------------------------------------------------
/miniwob++/html_tools/configs/config.py:
--------------------------------------------------------------------------------
 1 | basic_attrs = [
 2 |     'title',
 3 |     'value',
 4 |     'placeholder',
 5 | ]
 6 |     
 7 | mind2web_keep_attrs = [
 8 |     'alt',
 9 |     'aria_description',
10 |     'aria_label',
11 |     'aria_role',
12 |     'input_checked',
13 |     'input_value',
14 |     'label',
15 |     'name',
16 |     'option_selected',
17 |     'placeholder',
18 |     'role',
19 |     'text_value',
20 |     'title',
21 |     'type',
22 |     'value',
23 | ]
24 | 
25 | miniwob_attrs = [
26 |     'id',
27 |     'type',
28 |     'value',
29 | ]
30 | 
31 | config_meta = """
32 | ======= Configs =======
33 | Columns:
34 |   - id:        {id_attr}
35 |   - label:     {label_attr}
36 | Position:      {use_position}
37 |   - window:    {window_size}
38 |   - rect_dict: {rect}
39 | Keep:
40 |   - parents:   {parent_chain}
41 |   - attrs:     {keep_attrs}
42 |   - elems:     {keep_elem}
43 |   - obs_elem:  {obs_elem}
44 | Generator:
45 |   - prompt:    {prompt_name}
46 |   - label:     {identifier_name}
47 | ========================
48 | """


--------------------------------------------------------------------------------
/miniwob++/html_tools/configs/html_prompt.py:
--------------------------------------------------------------------------------
 1 | refine_prompt = {
 2 |     'dom': '<{tag}{label}|{attr}{content}{subtree} >',
 3 |     'label': '[{label}]',
 4 |     'attr': '{attr}',
 5 |     'attr_splitter': '; ',
 6 |     'subtree_splitter': ' ',
 7 | }
 8 | 
 9 | xml_prompt = {
10 |     'dom': '<{tag}{label}{attr}>{content}{subtree} </{tag}>',
11 |     'label': ' id="{label}"',
12 |     'attr': '{key}="{attr}"',
13 |     'attr_splitter': ' ',
14 |     'subtree_splitter': ' ',
15 | }
16 | 
17 | prompts = {
18 |     'refine': refine_prompt,
19 |     'xml': xml_prompt,
20 |     'new_data': refine_prompt, 
21 | }
22 |     


--------------------------------------------------------------------------------
/miniwob++/html_tools/identifier.py:
--------------------------------------------------------------------------------
 1 | import secrets
 2 | 
 3 | class IdentifierTool:
 4 |     def __init__(self, method: str='order', existing_labels: dict[str]={}) -> None:
 5 |         self.methods = {
 6 |             'order': self.get_identifier_in_order,
 7 |             'random': self.get_random_identifier,
 8 |         }
 9 |         
10 |         if method is None:
11 |             method = 'order'
12 |             
13 |         self.func = self.methods.get(method, None)
14 |         self.name = method
15 |         if self.func is None:
16 |             raise ValueError(f'Invalid method for identifier: {method}')
17 |         
18 |         self.reset(existing_labels)
19 |     
20 |     def reset(self, exists: dict[str]={}) -> None:
21 |         self.identifier = -1
22 |         self.exists = {} if exists is None else exists
23 |         
24 |     def get_identifier_in_order(self) -> str:
25 |         def id2str(id: int) -> str:
26 |             if id < 26:
27 |                 return chr(id + 65)
28 |             id -= 26
29 |             c0 = id // 676
30 |             c1 = (id // 26) % 26
31 |             c2 = id % 26
32 |             label = f'{chr(c1 + 65)}{chr(c2 + 65)}'
33 |             return label if c0 == 0 else f'{chr(c0 + 64)}{label}'
34 |         
35 |         self.identifier += 1
36 |         label = id2str(self.identifier)
37 |         
38 |         while label in self.exists:
39 |             self.identifier += 1
40 |             label = id2str(self.identifier)
41 |         
42 |         self.exists[label] = True
43 |         return label
44 |     
45 |     def get_random_identifier(self) -> str:
46 |         secret_generator = secrets.SystemRandom()
47 |         
48 |         def get_random_label(n: int=2) -> str:
49 |             tmp = ''
50 |             for _ in range(n):
51 |                 tmp += chr(secret_generator.randint(65, 90))
52 |             return tmp
53 | 
54 |         wc = 3 if len(self.exists) > 280 else 2
55 | 
56 |         label = get_random_label(wc)
57 |         while label in self.exists:
58 |             label = get_random_label(wc)
59 |         
60 |         self.exists[label] = True
61 |         return label
62 |         
63 |     def generate(self):
64 |         return self.func()


--------------------------------------------------------------------------------
/miniwob++/html_tools/prompt.py:
--------------------------------------------------------------------------------
 1 | from .configs import prompts
 2 | 
 3 | class HtmlPrompt:    
 4 |     def __init__(self, prompt: str='') -> None:
 5 |         prompt = self.extract(prompt, 'xml')
 6 |         if prompt not in prompts:
 7 |             raise Exception('Unknown prompt: ' + prompt)
 8 |         
 9 |         constructors = {
10 |             'refine': self.normal_prompt_constructor,
11 |             'xml': self.normal_prompt_constructor,
12 |             'new_data': self.new_data_prompt_constructor,
13 |         }
14 | 
15 |         self.name = prompt
16 |         self.prompt = prompts[prompt]
17 |         self.constructor = constructors[prompt]
18 | 
19 |     @staticmethod
20 |     def extract(data, default=''):
21 |         return data if data is not None else default
22 |     
23 |     def subtree_constructor(self, subtree: list[str]=[]) -> str:
24 |         return self.prompt['subtree_splitter'].join(subtree)
25 | 
26 |     def normal_prompt_constructor(self, tag: str='', label: str='', content: str='', subtree_str: str='', class_dict: dict[str]={}) -> str:
27 |         def add_prefix(data, prefix):
28 |             return prefix + data if len(data) > 0 else ''
29 |         
30 |         tag = self.extract(tag)
31 |         label = self.extract(label)
32 |         content = self.extract(content)
33 |         subtree_str = self.extract(subtree_str, '')
34 |         class_dict = self.extract(class_dict, {})
35 |         
36 |         label_str = ''
37 |         if len(label) > 0:
38 |             label_str = self.prompt['label'].format(label=label)
39 |         
40 |         classes = []
41 |         values = set()
42 |         for key, val in class_dict.items():
43 |             if val in values:
44 |                 continue
45 |             values.add(val)
46 |             classes.append(self.prompt['attr'].format(key=key, attr=val))
47 |         classes_str = self.prompt['attr_splitter'].join(classes)
48 |         
49 |         content_splitter = ' ' if len(classes_str) == 0 else self.prompt['attr_splitter']
50 |         classes_str = add_prefix(classes_str, ' ')
51 |         content_str = add_prefix(content, content_splitter)
52 |         subtree_str = add_prefix(subtree_str, ' ')
53 | 
54 |         return self.prompt['dom'].format(tag=tag, label=label_str, attr=classes_str, content=content_str, subtree=subtree_str)
55 |     
56 |     def new_data_prompt_constructor(self, tag: str='', label: str='', content: str='', subtree_str: str='', class_dict: dict[str]={}) -> str:
57 |         def add_prefix(data, prefix):
58 |             return prefix + data if len(data) > 0 else ''
59 |         
60 |         tag = self.extract(tag)
61 |         label = self.extract(label)
62 |         content = self.extract(content)
63 |         subtree_str = self.extract(subtree_str, '')
64 |         class_dict = self.extract(class_dict, {})
65 |         
66 |         label_str = ''
67 |         if len(label) > 0:
68 |             label_str = self.prompt['label'].format(label=label)
69 |         
70 |         classes = []
71 |         values = set()
72 |         
73 |         message = []
74 |         for key, val in class_dict.items():
75 |             if val == '':
76 |                 message.append(key)
77 |                 continue
78 |             if val in values:
79 |                 continue
80 |             values.add(val)
81 |             classes.append(self.prompt['attr'].format(key=key, attr=val))
82 |         
83 |         if len(message) > 0:
84 |             message_str = ' '.join(message)
85 |             classes.append(self.prompt['attr'].format(key='message', attr=message_str))
86 |             
87 |         classes_str = self.prompt['attr_splitter'].join(classes)
88 |         
89 |         content_splitter = ' ' if len(classes_str) == 0 else self.prompt['attr_splitter']
90 |         classes_str = add_prefix(classes_str, ' ')
91 |         content_str = add_prefix(content, content_splitter)
92 |         subtree_str = add_prefix(subtree_str, ' ')
93 | 
94 |         return self.prompt['dom'].format(tag=tag, label=label_str, attr=classes_str, content=content_str, subtree=subtree_str)
95 | 
96 |     def prompt_constructor(self, tag: str='', label: str='', content: str='', subtree_str: str='', class_dict: dict[str]={}) -> str:
97 |         return self.constructor(tag, label, content, subtree_str, class_dict)


--------------------------------------------------------------------------------
/miniwob++/html_tools/utils.py:
--------------------------------------------------------------------------------
 1 | from lxml import html
 2 | def get_xpath_top_down(element: html.HtmlElement, id_column: str='temp_id', path: str='', order: int=0, 
 3 |                         in_svg: bool=False, temp_id: int=0, i2xpath: dict[str, str]={}) -> tuple[int, dict[str, str], dict[str]]:
 4 |     used_labels = {}
 5 |     # path
 6 |     tag = element.tag.lower()
 7 |     in_svg = in_svg or (tag == 'svg')
 8 |     
 9 |     if not in_svg and 'id' in element.attrib:
10 |         node_id = element.attrib['id']
11 |         path = f"//*[@id='{node_id}']"
12 |     else:
13 |         suffix = f'[{order}]' if order > 0 else ''
14 |         prefix = f"*[name()='{tag}']" if in_svg else tag
15 |         path = path + '/' + prefix + suffix
16 |     
17 |     # add temp id
18 |     element.attrib['temp_id'] = str(temp_id)
19 |     ori_label = element.attrib.get(id_column, '')
20 |     if ori_label != '':
21 |         used_labels[ori_label] = True
22 |     
23 |     bid = str(temp_id)
24 |     i2xpath[bid] = f'xpath/{path}'
25 |     i2xpath[f'/{path}'] = bid
26 |     i2xpath[f'xpath/{path}'] = bid
27 |     i2xpath[f'xpath=/{path}'] = bid
28 |     
29 |     temp_id += 1
30 |     
31 |     # traverse node
32 |     children = element.getchildren()
33 |     tag_dict = {}
34 |     id_list = []
35 |     for child in children:
36 |         ctag = child.tag.lower()
37 |         if ctag not in tag_dict:
38 |             tag_dict[ctag] = 0
39 |         tag_dict[ctag] += 1
40 |         id_list.append(tag_dict[ctag])
41 |     
42 |     for cid, child in zip(id_list, children):
43 |         ctag = child.tag.lower()
44 |         cod = cid if tag_dict[ctag] > 1 else 0
45 |         temp_id, i2x, ulabels = get_xpath_top_down(child, id_column, path, cod, in_svg, temp_id, i2xpath)
46 |         i2xpath.update(i2x)
47 |         used_labels.update(ulabels)
48 |     
49 |     return temp_id, i2xpath, used_labels
50 |         
51 | def print_html_object(obj: str='') -> str:
52 |     tab_cnt = 0
53 |     result, content, sep = '', '', ''
54 |     last_is_left, last_is_right = False, False
55 |     for ch in obj:
56 |         if ch == '<':
57 |             result += '\n'
58 |             if len(content.strip()) > 0:
59 |                 result += sep + content.strip() + '\n'
60 |             result += sep + '<'
61 |             
62 |             tab_cnt += 1
63 |             sep = '  ' * tab_cnt
64 |             
65 |             content = ''
66 |             last_is_right = False
67 |             last_is_left = True
68 |         elif ch == '>':
69 |             if last_is_left:
70 |                 result += content
71 |             else:
72 |                 if last_is_right:
73 |                     result += '\n'
74 |                 if len(content.strip()) > 0:
75 |                     result += sep + content.strip() + '\n'
76 |             
77 |             tab_cnt -= 1
78 |             sep = '  ' * tab_cnt
79 |             
80 |             if not last_is_left:
81 |                 result += sep
82 |             
83 |             result += '>'
84 |             content = ''
85 |             
86 |             last_is_right = True
87 |             last_is_left = False
88 |         else:
89 |             content += ch
90 |     
91 |     return result


--------------------------------------------------------------------------------
/miniwob++/install_dependency.sh:
--------------------------------------------------------------------------------
 1 | apt-get install -y chromium-browser
 2 | apt-get install -y libxcb1
 3 | apt-get install -y libatk1.0-0
 4 | apt-get install -y libnss3
 5 | apt-get install -y libatk-bridge2.0-0
 6 | apt-get install -y libcups2
 7 | apt-get install -y libdrm2
 8 | apt-get install -y libxkbcommon0
 9 | apt-get install -y libxcomposite1
10 | apt-get install -y libxdamage1
11 | apt-get install -y libxfixes3
12 | apt-get install -y libxrandr2
13 | apt-get install -y libgbm1
14 | apt-get install -y libpango1.0-0
15 | ls /root/.cache/selenium/chrome/linux64/ # run /root/.cache/selenium/chrome/linux64/xxx/chrome to check if it works


--------------------------------------------------------------------------------
/miniwob++/llms/__init__.py:
--------------------------------------------------------------------------------
1 | from .call import CallLLM


--------------------------------------------------------------------------------
/miniwob++/llms/call.py:
--------------------------------------------------------------------------------
 1 | from transformers import AutoTokenizer, AutoModel
 2 | from .providers import call_method
 3 | 
 4 | class CallLLM():
 5 |     def __init__(self, model_path='chatgpt', cuda='0'):
 6 |         if model_path in call_method:
 7 |             self.func = call_method[model_path]
 8 |             return
 9 |         
10 |         model = AutoModel.from_pretrained(model_path, trust_remote_code=True, device=f'cuda:{cuda}')
11 |         self.cuda = cuda
12 |         self.tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
13 |         self.model = model.eval()
14 |         self.func = self.call_pretrain_model
15 |     
16 |     def call_pretrain_model(self, query: str, sample_times: int=1):        
17 |         def chatglm3_base_template(query, history=None, system=None):
18 |             prompt = f'Q: {query}\n\nA: '
19 |             return prompt
20 |         
21 |         def model_chat(prompt: str):
22 |             output, updated_history = self.model.chat(self.tokenizer, prompt, history=None)
23 |             return output
24 |         
25 |         def generation(prompt: str, sample_times: int=1):
26 |             input_ids = self.tokenizer.encode(
27 |                 text=prompt,
28 |                 return_tensors='pt',
29 |                 max_length=8192,
30 |                 truncation=False
31 |             ).to(f'cuda:{self.cuda}')
32 | 
33 |             if len(input_ids[0]) > 7500:
34 |                 return ''
35 |             
36 |             output_ids = self.model.generate(
37 |                 input_ids=input_ids,
38 |                 max_new_tokens=1024,
39 |                 do_sample=True,
40 |                 top_p=0.7,
41 |                 temperature=0.95,
42 |                 num_return_sequences=sample_times
43 |             )
44 |             
45 |             output_text_list = []
46 |             for i in range(sample_times):
47 |                 output_text = self.tokenizer.decode(output_ids[i], skip_special_tokens=True)
48 |                 output_text = output_text.split('A: ')[-1]
49 |                 output_text_list.append(output_text)
50 |             
51 |             output = output_text_list[0]
52 |             return output
53 |         
54 |         prompt = chatglm3_base_template(query)
55 |         output = generation(prompt)
56 |         # output = model_chat(prompt)
57 |         print('[Model]', output)
58 |         return output
59 |     
60 |     def model_call(self, prompt):
61 |         output = self.func(prompt)
62 |         return output
63 | 


--------------------------------------------------------------------------------
/miniwob++/llms/providers/__init__.py:
--------------------------------------------------------------------------------
 1 | from .gpt import call_gpt
 2 | from functools import partial
 3 | 
 4 | def call_manual(prompt, history=None, system=None):
 5 |     return input()
 6 | 
 7 | call_method = {
 8 |     'chatgpt': partial(call_gpt, 'gpt-3.5-turbo'),
 9 |     'gpt4': partial(call_gpt, 'gpt-4'),
10 |     'manual': call_manual,
11 | }


--------------------------------------------------------------------------------
/miniwob++/llms/providers/gpt.py:
--------------------------------------------------------------------------------
 1 | import openai
 2 | import os
 3 | 
 4 | async def call_gpt(model, prompt, history=None, system=None):
 5 |     message = []
 6 |     if system:
 7 |         message.append({
 8 |             "role": "system",
 9 |             "content": system
10 |         })
11 |     
12 |     if history:
13 |         for chat in history:
14 |             message.append({
15 |                 "role": "user",
16 |                 "content": chat[0]
17 |             })
18 |             message.append({
19 |                 "role": "assistant",
20 |                 "content": chat[1]
21 |             })
22 |     
23 |     message.append({
24 |         "role": "user",
25 |         "content": prompt
26 |     })
27 | 
28 |     if "OPENAI_API_KEY" not in os.environ:
29 |         raise ValueError(
30 |             "OPENAI_API_KEY environment variable must be set when using OpenAI API."
31 |         )
32 |     key = os.environ["OPENAI_API_KEY"]
33 |     
34 |     resp = openai.ChatCompletion.create(
35 |         model=model,
36 |         messages=message,
37 |         api_key=key,
38 |         timeout=1000
39 |     )
40 | 
41 |     output = resp["choices"][0]["message"]["content"]
42 | 
43 |     return output


--------------------------------------------------------------------------------
/miniwob++/miniwob_tools/__init__.py:
--------------------------------------------------------------------------------
1 | from .configs import testcases, mwpp_attrs, not_clickable_tag, miniwob_attrs
2 | from .action import ActionParser
3 | from .utils import save_pixel_array, get_dom_list, get_html, update_dom_list, get_position_bar, get_page_height, get_position_info, process_dom_list


--------------------------------------------------------------------------------
/miniwob++/miniwob_tools/action.py:
--------------------------------------------------------------------------------
  1 | from .configs import miniwob_prompt, miniwob_prompt_with_tp, miniwob_prompt_new_action_space
  2 | 
  3 | class ActionParser:
  4 |     operation_pattern = {
  5 |         'Click': r'#Click#\s*([A-Z]{1,3})',
  6 |         'Hover': r'#Hover#\s*([A-Z]{1,3})',
  7 |         'Scroll_up': r'#Scroll_up#',
  8 |         'Scroll_down': r'#Scroll_down#',
  9 |         'Type': r'#Type#\s*([A-Z]{1,3})\s*"{0,1}(.+)"{0,1}',
 10 |     }
 11 |     
 12 |     new_action_space_pattern = {
 13 |         'Click': r'click\([\'\"]([A-Z]{1,3})[\'\"]\)',
 14 |         'Hover': r'hover\([\'\"]([A-Z]{1,3})[\'\"]\)',
 15 |         'Scroll_up': r'scroll_page\([\'\"]up[\'\"]\)',
 16 |         'Scroll_down': r'scroll_page\([\'\"]down[\'\"]\)',
 17 |         'Type': r'type_string\([\'\"]([A-Z]{1,3})[\'\"]\s*,\s*[\'\"](.+)[\'\"]\s*,\s*(True|False)\)',
 18 |     }
 19 |     
 20 |     prompts = {
 21 |         'basic': miniwob_prompt,
 22 |         'tp': miniwob_prompt_with_tp,
 23 |         'new_action_space': miniwob_prompt_new_action_space,
 24 |     }
 25 |     
 26 |     def __init__(self, prompt: str='basic') -> None:
 27 |         if prompt not in self.prompts:
 28 |             raise ValueError('Invalid prompt type.')
 29 |         
 30 |         funcs = {
 31 |             'basic': self.extract_operation,
 32 |             'tp': self.extract_operation_with_tp,
 33 |             'new_action_space': self.extract_operation_new_action_space,
 34 |         }
 35 |         
 36 |         self.prompt = self.prompts[prompt]
 37 |         self.func = funcs[prompt]
 38 |     
 39 |     def get_prompt(self) -> str:
 40 |         return self.prompt
 41 | 
 42 |     def extract(self, result: str='') -> (None, tuple):
 43 |         return self.func(result)
 44 |             
 45 |     @staticmethod
 46 |     def extract_operation(result: str='') -> (str, str):
 47 |         import re
 48 |         # match = re.search(r'#Operation:\s*(.+)', result)
 49 |         # if not match:
 50 |         #     return None
 51 |         # opstr = match.group(1)
 52 |         opstr = result
 53 |         
 54 |         for op, pattern in ActionParser.operation_pattern.items():
 55 |             match = re.search(pattern, opstr)
 56 |             if not match:
 57 |                 continue
 58 |             param = match.groups()
 59 |             if op == 'Type':
 60 |                 param.append(param[1])
 61 |             return '', op, param
 62 |             
 63 |         return None
 64 |     
 65 |     @staticmethod
 66 |     def extract_operation_with_tp(result: str='') -> (str, str):
 67 |         import re
 68 |         match = re.search(r'#Thinking Process:\s*(.+)\s*#Operation:\s*(.+)', result)
 69 |         if not match:
 70 |             return None
 71 |         tpstr = match.group(1)
 72 |         opstr = match.group(2)
 73 |         
 74 |         for op, pattern in ActionParser.operation_pattern.items():
 75 |             match = re.search(pattern, opstr)
 76 |             if not match:
 77 |                 continue
 78 |             param = match.groups()
 79 |             if op == 'Type':
 80 |                 param.append(False)
 81 |             return tpstr, op, match.groups()
 82 |             
 83 |         return None
 84 | 
 85 |     @staticmethod
 86 |     def extract_operation(result: str='') -> (str, str):
 87 |         import re
 88 |         # match = re.search(r'#Operation:\s*(.+)', result)
 89 |         # if not match:
 90 |         #     return None
 91 |         # opstr = match.group(1)
 92 |         opstr = result
 93 |         
 94 |         for op, pattern in ActionParser.operation_pattern.items():
 95 |             match = re.search(pattern, opstr)
 96 |             if not match:
 97 |                 continue
 98 |             param = match.groups()
 99 |             if op == 'Type':
100 |                 param.append(False)
101 |             return '', op, param
102 |             
103 |         return None
104 |     
105 |     @staticmethod
106 |     def extract_operation_new_action_space(result: str='') -> (str, str):
107 |         import re
108 |         opstr = result
109 |         
110 |         for op, pattern in ActionParser.new_action_space_pattern.items():
111 |             match = re.search(pattern, opstr)
112 |             if not match:
113 |                 continue
114 |             param = match.groups()
115 |             if op == 'Type':
116 |                 if param[1] == 'True':
117 |                     param[1] = True
118 |                 elif param[1] == 'False':
119 |                     param[1] = False
120 |                     
121 |             return '', op, param
122 |             
123 |         return None


--------------------------------------------------------------------------------
/miniwob++/miniwob_tools/configs/__init__.py:
--------------------------------------------------------------------------------
1 | from .config import testcases, mwpp_attrs, not_clickable_tag, miniwob_attrs, special_classes
2 | from .prompt import miniwob_prompt, miniwob_prompt_with_tp, miniwob_prompt_new_action_space


--------------------------------------------------------------------------------
/miniwob++/miniwob_tools/configs/config.py:
--------------------------------------------------------------------------------
  1 | testcases = [
  2 |     'book-flight',
  3 |     'choose-date',
  4 |     'choose-date-easy',
  5 |     'choose-date-medium',
  6 |     'choose-list',
  7 |     'click-button',
  8 |     'click-button-sequence',
  9 |     'click-checkboxes',
 10 |     'click-checkboxes-large',
 11 |     'click-checkboxes-soft',
 12 |     'click-checkboxes-transfer',
 13 |     'click-collapsible',
 14 |     'click-collapsible-2',
 15 |     'click-color',
 16 |     'click-dialog',
 17 |     'click-dialog-2',
 18 |     'click-link',
 19 |     'click-menu',
 20 |     'click-option',
 21 |     'click-pie',
 22 |     'click-scroll-list',
 23 |     'click-shades',
 24 |     'click-shape',
 25 |     'click-tab',
 26 |     'click-tab-2',
 27 |     'click-tab-2-hard',
 28 |     'click-test',
 29 |     'click-test-2',
 30 |     'click-widget',
 31 |     'count-shape',
 32 |     'email-inbox',
 33 |     'email-inbox-forward-nl',
 34 |     'email-inbox-forward-nl-turk',
 35 |     'email-inbox-nl-turk',
 36 |     'enter-date',
 37 |     'enter-password',
 38 |     'enter-text',
 39 |     'enter-text-dynamic',
 40 |     'enter-time',
 41 |     'focus-text',
 42 |     'focus-text-2',
 43 |     'grid-coordinate',
 44 |     'guess-number',
 45 |     'identify-shape',
 46 |     'login-user',
 47 |     'login-user-popup',
 48 |     'multi-layouts',
 49 |     'multi-orderings',
 50 |     'navigate-tree',
 51 |     'search-engine',
 52 |     'social-media',
 53 |     'social-media-all',
 54 |     'social-media-some',
 55 |     'tic-tac-toe',
 56 |     'use-autocomplete',
 57 |     'use-spinner',
 58 | ]
 59 | 
 60 | mwpp_attrs = {
 61 |     'basic': [
 62 |         'id',
 63 |         'classes',
 64 |         'value',
 65 |     ],
 66 |     'position': [
 67 |         'left',
 68 |         'top',
 69 |         'width',
 70 |         'height',
 71 |     ],
 72 |     'color': {
 73 |         'bgColor': 'background-color',
 74 |         'fgColor': 'color',
 75 |     }
 76 | }
 77 | 
 78 | not_clickable_tag = [
 79 |     'body',
 80 |     'div',
 81 |     'form', 
 82 |     'h1', 
 83 |     'h2', 
 84 |     'h3', 
 85 |     'h4',
 86 |     'h5', 
 87 |     'h6',  
 88 |     't', 
 89 |     'tr', 
 90 |     'td', 
 91 |     'th', 
 92 |     'p',
 93 |     'li',
 94 | ]
 95 | 
 96 | miniwob_attrs = [
 97 |     'id',
 98 |     'type',
 99 |     'classes',
100 |     'value',
101 |     'rgba',
102 |     'size',
103 | ]
104 | 
105 | special_classes = [
106 |     'alink', 
107 |     'color', 
108 |     'share', 
109 |     'copy', 
110 |     'embed', 
111 |     'menu-user', 
112 |     'block-user', 
113 |     'report', 
114 |     'email-forward', 
115 |     'forward-sender', 
116 |     'email-reply', 
117 |     'email-sender'
118 | ]


--------------------------------------------------------------------------------
/miniwob++/miniwob_tools/configs/prompt.py:
--------------------------------------------------------------------------------
  1 | miniwob_prompt = """<html> %s </html>
  2 | 
  3 | You are a helpful assistant that can assist with web navigation tasks.
  4 | You are given a simplified html webpage and a task description. 
  5 | Your goal is to complete the task. You can perform the specified operations below to interact with the webpage.
  6 | 
  7 | #Valid operations: - #Click# id: Click on the element with the specified id.
  8 | - #Hover# id: Hover on the element with the specified id.
  9 | - #Scroll_up#: Scroll up 1 page.
 10 | - #Scroll_down#: Scroll down 1 page.
 11 | - #Type# id "text": Type in the text at the element with the specified id.
 12 | 
 13 | #Current viewport position: %s
 14 | 
 15 | #Previous Operation: %s
 16 | 
 17 | #Task: %s
 18 | 
 19 | Your output SHOULD be in the following format:
 20 | #Operation: {Next operation to perform}
 21 | """
 22 | 
 23 | miniwob_prompt_with_tp = """<html> %s </html>
 24 | 
 25 | You are a helpful assistant that can assist with web navigation tasks.
 26 | You are given a simplified html webpage and a task description. 
 27 | Your goal is to complete the task. You can perform the specified operations below to interact with the webpage.
 28 | 
 29 | #Valid operations: - #Click# id: Click on the element with the specified id.
 30 | - #Hover# id: Hover on the element with the specified id.
 31 | - #Scroll_up#: Scroll up 1 page.
 32 | - #Scroll_down#: Scroll down 1 page.
 33 | - #Type# id "text": Type in the text at the element with the specified id.
 34 | 
 35 | #Current viewport position: %s
 36 | 
 37 | #Previous Operation: %s
 38 | 
 39 | #Task: %s
 40 | 
 41 | Your output SHOULD be in the following format:
 42 | #Thinking Process: {Your thinking process to complete the task, including detailed analysis. For example, I have completed xxx and need to do xxx, so I need to perform xxx operation on the element <a[A]| xxx>}
 43 | #Operation: {Next operation to perform}
 44 | """
 45 | 
 46 | miniwob_prompt_new_action_space = """<html> %s </html>
 47 | 
 48 | You are a helpful assistant that can assist with web navigation tasks.
 49 | You are given a simplified html webpage and a task description.
 50 | Your goal is to complete the task. You can use the provided functions below to interact with the current webpage.
 51 | 
 52 | #Provided functions:
 53 | def click(element_id: str) -> None:
 54 |     \"\"\"
 55 |     Click on the element with the specified id.
 56 | 
 57 |     Args:
 58 |        element_id: The id of the element.
 59 |     \"\"\"
 60 | 
 61 | def hover(element_id: str) -> None:
 62 |     \"\"\"
 63 |     Hover on the element with the specified id.
 64 | 
 65 |     Args:
 66 |        element_id: The id of the element.
 67 |     \"\"\"
 68 | 
 69 | def select(element_id: str, option: str) -> None:
 70 |  \"\"\"
 71 |     Select an option from a dropdown.
 72 | 
 73 |     Args:
 74 |        element_id: The id of the element.
 75 |        option: Value of the option to select.
 76 |  \"\"\"
 77 | 
 78 | def type_string(element_id: str, content: str, press_enter: bool) -> None:
 79 |  \"\"\"
 80 |     Type a string into the element with the specified id.
 81 | 
 82 |     Args:
 83 |        element_id: The id of the element.
 84 |        content: The string to type.
 85 |        press_enter: Whether to press enter after typing the string.
 86 |  \"\"\"
 87 | 
 88 | def scroll_page(direction: Literal['up', 'down']) -> None:
 89 |  \"\"\"
 90 |     Scroll down/up one page.
 91 | 
 92 |     Args:
 93 |        direction: The direction to scroll.
 94 |  \"\"\"
 95 | 
 96 | def go(direction: Literal['forward', 'backward']) -> None:
 97 |  \"\"\"
 98 |     Go forward/backward
 99 | 
100 |     Args:
101 |        direction: The direction to go to.
102 |  \"\"\"
103 | 
104 | def jump_to(url: str, new_tab: bool) -> None:
105 |  \"\"\"
106 |     Jump to the specified url.
107 | 
108 |     Args:
109 |        url: The url to jump to.
110 |        new_tab: Whether to open the url in a new tab.
111 |  \"\"\"
112 | 
113 | def switch_tab(tab_index: int) -> None:
114 |  \"\"\"
115 |     Switch to the specified tab.
116 | 
117 |     Args:
118 |        tab_index: The index of the tab to switch to.
119 |  \"\"\"
120 | 
121 | def user_input(message: str) -> str:
122 |  \"\"\"
123 |     Wait for user input.
124 | 
125 |     Args:
126 |        message: The message to display to the user.
127 | 
128 |     Returns: The user input.
129 |  \"\"\"
130 | 
131 | def finish(answer: Optional[str]) -> None:
132 |  \"\"\"
133 |     Finish the task (optionally with an answer).
134 | 
135 |     Args:
136 |        answer: The answer to the task.
137 |  \"\"\"
138 | 
139 | #Previous commands: %s
140 | 
141 | #Window tabs: 1. Default <-- current tab
142 | 
143 | #Current viewport (pages): %s
144 | 
145 | #Task: %s
146 | 
147 | You should output one command to interact to the currrent webpage.
148 | """
149 | 


--------------------------------------------------------------------------------
/miniwob++/requirements.txt:
--------------------------------------------------------------------------------
 1 | aiohttp==3.8.4
 2 | gymnasium==0.29.0
 3 | lxml==4.9.3
 4 | miniwob==1.0
 5 | numpy==1.22.2
 6 | openai==1.3.7
 7 | Pillow==9.2.0
 8 | Pillow==10.1.0
 9 | Requests==2.31.0
10 | transformers==4.35.2
11 | 


--------------------------------------------------------------------------------
/miniwob++/setup.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AutoWebGLM/eb8524e55cc3fac44625496e895e4044cea81fba/miniwob++/setup.sh


--------------------------------------------------------------------------------
/webarena/.github/workflows/pre-commit.yml:
--------------------------------------------------------------------------------
 1 | name: pre-commit
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   push:
 6 |     branches: [main]
 7 | 
 8 | jobs:
 9 |   pre-commit:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |     - uses: actions/checkout@v3
13 |     - name: Set up Python 3.10
14 |       uses: actions/setup-python@v4
15 |       with:
16 |         python-version: 3.10.9
17 |     - uses: pre-commit/action@v3.0.0
18 | 


--------------------------------------------------------------------------------
/webarena/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Python Package Pytest
 2 | on: [push]
 3 | 
 4 | jobs:
 5 |   test-all:
 6 |     runs-on: ubuntu-latest
 7 |     env:
 8 |       SHOPPING: "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770"
 9 |       SHOPPING_ADMIN: "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7780/admin"
10 |       REDDIT: "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:9999"
11 |       GITLAB: "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:8023"
12 |       MAP: "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:3000"
13 |       WIKIPEDIA: "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:8888/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing"
14 |       HOMEPAGE: "PASS"
15 |     strategy:
16 |       max-parallel: 5
17 |     steps:
18 |     - uses: actions/checkout@v3
19 |     - name: Set up Python 3.10
20 |       uses: actions/setup-python@v4
21 |       with:
22 |         python-version: 3.10.9
23 |     - name: Install dependencies
24 |       run: |
25 |         pip install -r requirements.txt
26 |         playwright install
27 |         python -m nltk.downloader punkt stopwords
28 |         pip install -e .[dev]
29 |     - name: Type-checking package with mypy
30 |       run: |
31 |         # Manually install mypy in the standard way.
32 |         pip --quiet install -U mypy
33 |         # Log this mypy version for debuggability.
34 |         mypy --version
35 |         # Run this mypy instance against our main package.
36 |         mypy --install-types --non-interactive .
37 |         mypy --strict . --exclude scripts
38 |     - name: Enviroment prepare
39 |       run: |
40 |         bash prepare.sh
41 |     - name: Test with pytest
42 |       run: |
43 |         pytest
44 | 


--------------------------------------------------------------------------------
/webarena/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # mac OS
132 | *.DS_Store
133 | 
134 | .vscode
135 | *tmp*
136 | 
137 | .auth/*
138 | 
139 | # local debug
140 | run.sh
141 | 
142 | # trajectory visualization
143 | render_cache/*
144 | cache/*
145 | 
146 | # TMP IGNORE
147 | agent/prompts/jsons/*
148 | log_files/
149 | config_files*/*0.json
150 | config_files*/*1.json
151 | config_files*/*2.json
152 | config_files*/*3.json
153 | config_files*/*4.json
154 | config_files*/*5.json
155 | config_files*/*6.json
156 | config_files*/*7.json
157 | config_files*/*8.json
158 | config_files*/*9.json
159 | config_files*/test.json
160 | 
161 | # Our Trash
162 | output/
163 | result/
164 | result-*/
165 | config_files/check.py
166 | config_files_backup/
167 | get*.py
168 | parallel_run_*.sh
169 | 
170 | check_correct_id.py
171 | rm_data.py


--------------------------------------------------------------------------------
/webarena/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v3.2.0
 4 |     hooks:
 5 |     -   id: trailing-whitespace
 6 |     -   id: end-of-file-fixer
 7 |     -   id: check-yaml
 8 |     -   id: check-added-large-files
 9 |         args: ['--maxkb=10240']
10 | -   repo: https://github.com/psf/black
11 |     rev: 22.12.0
12 |     hooks:
13 |     -   id: black
14 |         exclude: '^(agent/prompts/raw)'
15 |         args: [--line-length=79]
16 | -   repo: https://github.com/pycqa/isort
17 |     rev: 5.12.0
18 |     hooks:
19 |     -   id: isort
20 |         args: ["--profile", "black", --line-length=72]
21 | -   repo: https://github.com/kynan/nbstripout
22 |     rev: 0.6.0
23 |     hooks:
24 |       - id: nbstripout
25 | 


--------------------------------------------------------------------------------
/webarena/CITATION.cff:
--------------------------------------------------------------------------------
1 | @article{zhou2023webarena,
2 |   title={WebArena: A Realistic Web Environment for Building Autonomous Agents},
3 |   author={Zhou, Shuyan and Xu, Frank F and Zhu, Hao and Zhou, Xuhui and Lo, Robert and Sridhar, Abishek and Cheng, Xianyi and Bisk, Yonatan and Fried, Daniel and Alon, Uri and others},
4 |   journal={arXiv preprint arXiv:2307.13854},
5 |   year={2023}
6 | }
7 | 


--------------------------------------------------------------------------------
/webarena/README.md:
--------------------------------------------------------------------------------
 1 | # Modified WebArena evaluation
 2 | 
 3 | We modified the configuration in WebArena to add our unique simplification method to improve the speed of the evaluation.
 4 | 
 5 | The following content is inherited from the WebArena repository, and we've only modified some of the test commands and prompt formats.
 6 | 
 7 | ## Install
 8 | 
 9 | ```bash
10 | # Python 3.10+
11 | conda create -n webarena python=3.10; conda activate webarena
12 | pip install -r requirements.txt
13 | playwright install
14 | pip install -e .
15 | 
16 | # optional, dev only
17 | pip install -e ".[dev]"
18 | mypy --install-types --non-interactive browser_env agents evaluation_harness
19 | pip install pre-commit
20 | pre-commit install
21 | ```
22 | ## End-to-end Evaluation
23 | 
24 | 1. Setup the standalone environment.
25 | Please check out [this page](environment_docker/README.md) for details.
26 | 
27 | 2. Configurate the urls for each website.
28 | ```bash
29 | export SHOPPING="<your_shopping_site_domain>:7770"
30 | export SHOPPING_ADMIN="<your_e_commerce_cms_domain>:7780/admin"
31 | export REDDIT="<your_reddit_domain>:9999"
32 | export GITLAB="<your_gitlab_domain>:8023"
33 | export MAP="<your_map_domain>:3000"
34 | export WIKIPEDIA="<your_wikipedia_domain>:8888/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing"
35 | export HOMEPAGE="<your_homepage_domain>:4399" # this is a placeholder
36 | ```
37 | 
38 | > You are encouraged to update the environment variables in [github workflow](.github/workflows/tests.yml#L7) to ensure the correctness of unit tests
39 | 
40 | 3. Generate config file for each test example
41 | ```bash
42 | python scripts/generate_test_data.py
43 | ```
44 | You will see `*.json` files generated in [config_files](./config_files) folder. Each file contains the configuration for one test example.
45 | 
46 | 4. Obtain the auto-login cookies for all websites
47 | ```
48 | mkdir -p ./.auth
49 | python browser_env/auto_login.py
50 | ```
51 | 5. export `OPENAI_API_KEY=your_key`, a valid OpenAI API key starts with `sk-`
52 | 
53 | 6. Launch the evaluation
54 | ```bash
55 | python run.py \
56 |   --instruction_path agent/prompts/jsons/new_action_prompt.json \ # this is the reasoning agent prompt we used in the paper
57 |   --model gpt-3.5-turbo \
58 |   --mode completion \
59 |   --observation_type html \
60 |   --action_set_tag id_html_nasc_tree \
61 |   --result_dir <your_result_dir> \
62 |   --test_start_idx 0 \
63 |   --test_end_idx 1 \
64 | ```
65 | This script will run the first example with GPT-3.5 reasoning agent. The trajectory will be saved in `<your_result_dir>/0.html`
66 | 
67 | ## Develop Your Prompt-based Agent
68 | 1. Define the prompts. We provide two baseline agents whose correrponding prompts are listed [here](./agent/prompts/raw). Each prompt is a dictionary with the following keys:
69 | ```python
70 | prompt = {
71 |   "intro": <The overall guideline which includes the task description, available action, hint and others>,
72 |   "examples": [
73 |     (
74 |       example_1_observation,
75 |       example_1_response
76 |     ),
77 |     (
78 |       example_2_observation,
79 |       example_2_response
80 |     ),
81 |     ...
82 |   ],
83 |   "template": <How to organize different information such as observation, previous action, instruction, url>,
84 |   "meta_data": {
85 |     "observation": <Which observation space the agent uses>,
86 |     "action_type": <Which action space the agent uses>,
87 |     "keywords": <The keywords used in the template, the program will later enumerate all keywords in the template to see if all of them are correctly replaced with the content>,
88 |     "prompt_constructor": <Which prompt construtor is in used, the prompt constructor will construct the input feed to an LLM and extract the action from the generation, more details below>,
89 |     "action_splitter": <Inside which splitter can we extract the action, used by the prompt constructor>
90 |     }
91 |   }
92 | ```
93 | 
94 | 2. Implement the prompt constructor. An example prompt constructor using Chain-of-thought/ReAct style reasoning is [here](./agent/prompts/prompt_constructor.py#L184). The prompt constructor is a class with the following methods:
95 | * `construct`: construct the input feed to an LLM
96 | * `_extract_action`: given the generation from an LLM, how to extract the phrase that corresponds to the action


--------------------------------------------------------------------------------
/webarena/agent/__init__.py:
--------------------------------------------------------------------------------
1 | from .agent import (
2 |     Agent,
3 |     PromptAgent,
4 |     TeacherForcingAgent,
5 |     construct_agent,
6 | )
7 | 
8 | __all__ = ["Agent", "TeacherForcingAgent", "PromptAgent", "construct_agent"]
9 | 


--------------------------------------------------------------------------------
/webarena/agent/prompts/README.md:
--------------------------------------------------------------------------------
1 | ## Naming of the prompt files
2 | `description.action_space.observation_space.json`
3 | 


--------------------------------------------------------------------------------
/webarena/agent/prompts/__init__.py:
--------------------------------------------------------------------------------
1 | from .prompt_constructor import *
2 | 


--------------------------------------------------------------------------------
/webarena/agent/prompts/raw/new_action_prompt.py:
--------------------------------------------------------------------------------
  1 | prompt = {
  2 |     "intro": "",
  3 | 	"examples": [],
  4 | 	"template": """<html> {html} </html>
  5 | 
  6 | You are a helpful assistant that can assist with web navigation tasks.
  7 | You are given a simplified html webpage and a task description.
  8 | Your goal is to complete the task. You can use the provided functions below to interact with the current webpage.
  9 | 
 10 | #Provided functions:
 11 | def click(element_id: str) -> None:
 12 |     \"\"\"
 13 |     Click on the element with the specified id.
 14 | 
 15 |     Args:
 16 |        element_id: The id of the element.
 17 |     \"\"\"
 18 | 
 19 | def hover(element_id: str) -> None:
 20 |     \"\"\"
 21 |     Hover on the element with the specified id.
 22 | 
 23 |     Args:
 24 |        element_id: The id of the element.
 25 |     \"\"\"
 26 | 
 27 | def select(element_id: str, option: str) -> None:
 28 |  \"\"\"
 29 |     Select an option from a dropdown.
 30 | 
 31 |     Args:
 32 |        element_id: The id of the element.
 33 |        option: Value of the option to select.
 34 |  \"\"\"
 35 | 
 36 | def type_string(element_id: str, content: str, press_enter: bool) -> None:
 37 |  \"\"\"
 38 |     Type a string into the element with the specified id.
 39 | 
 40 |     Args:
 41 |        element_id: The id of the element.
 42 |        content: The string to type.
 43 |        press_enter: Whether to press enter after typing the string.
 44 |  \"\"\"
 45 | 
 46 | def scroll_page(direction: Literal['up', 'down']) -> None:
 47 |  \"\"\"
 48 |     Scroll down/up one page.
 49 | 
 50 |     Args:
 51 |        direction: The direction to scroll.
 52 |  \"\"\"
 53 | 
 54 | def go(direction: Literal['forward', 'backward']) -> None:
 55 |  \"\"\"
 56 |     Go forward/backward
 57 | 
 58 |     Args:
 59 |        direction: The direction to go to.
 60 |  \"\"\"
 61 | 
 62 | def jump_to(url: str, new_tab: bool) -> None:
 63 |  \"\"\"
 64 |     Jump to the specified url.
 65 | 
 66 |     Args:
 67 |        url: The url to jump to.
 68 |        new_tab: Whether to open the url in a new tab.
 69 |  \"\"\"
 70 | 
 71 | def switch_tab(tab_index: int) -> None:
 72 |  \"\"\"
 73 |     Switch to the specified tab.
 74 | 
 75 |     Args:
 76 |        tab_index: The index of the tab to switch to.
 77 |  \"\"\"
 78 | 
 79 | def user_input(message: str) -> str:
 80 |  \"\"\"
 81 |     Wait for user input.
 82 | 
 83 |     Args:
 84 |        message: The message to display to the user.
 85 | 
 86 |     Returns: The user input.
 87 |  \"\"\"
 88 | 
 89 | def finish(answer: Optional[str]) -> None:
 90 |  \"\"\"
 91 |     Finish the task (optionally with an answer).
 92 | 
 93 |     Args:
 94 |        answer: The answer to the task.
 95 |  \"\"\"
 96 | 
 97 | #Previous commands: {previous_action}
 98 | 
 99 | #Window tabs: {tabs}
100 | 
101 | #Current viewport (pages): {position}
102 | 
103 | #Task: {objective}
104 | 
105 | You should output one command to interact to the currrent webpage.
106 | You should add a brief comment to your command to explain your reasoning and thinking process.
107 | """,
108 |    "finale": "",
109 | 	"meta_data": {
110 | 		"observation": "html",
111 | 		"action_type": "id_html_nasc_tree",
112 | 		"keywords": ["url", "html", "objective", "position", "previous_action", "tabs"],
113 | 		"prompt_constructor": "NewASPromptConstructor",
114 | 		"answer_phrase": "",
115 | 		"action_splitter": "#"
116 | 	},
117 | }
118 | 
119 | 


--------------------------------------------------------------------------------
/webarena/agent/prompts/raw/p_cot_id_actree_2s.py:
--------------------------------------------------------------------------------
 1 | prompt = {
 2 | 	"intro": """You are an autonomous intelligent agent tasked with navigating a web browser. You will be given web-based tasks. These tasks will be accomplished through the use of specific actions you can issue.
 3 | 
 4 | Here's the information you'll have:
 5 | The user's objective: This is the task you're trying to complete.
 6 | The current web page's accessibility tree: This is a simplified representation of the webpage, providing key information.
 7 | The current web page's URL: This is the page you're currently navigating.
 8 | The open tabs: These are the tabs you have open.
 9 | The previous action: This is the action you just performed. It may be helpful to track your progress.
10 | 
11 | The actions you can perform fall into several categories:
12 | 
13 | Page Operation Actions:
14 | `click [id]`: This action clicks on an element with a specific id on the webpage.
15 | `type [id] [content] [press_enter_after=0|1]`: Use this to type the content into the field with id. By default, the "Enter" key is pressed after typing unless press_enter_after is set to 0.
16 | `hover [id]`: Hover over an element with id.
17 | `press [key_comb]`:  Simulates the pressing of a key combination on the keyboard (e.g., Ctrl+v).
18 | `scroll [direction=down|up]`: Scroll the page up or down.
19 | 
20 | Tab Management Actions:
21 | `new_tab`: Open a new, empty browser tab.
22 | `tab_focus [tab_index]`: Switch the browser's focus to a specific tab using its index.
23 | `close_tab`: Close the currently active tab.
24 | 
25 | URL Navigation Actions:
26 | `goto [url]`: Navigate to a specific URL.
27 | `go_back`: Navigate to the previously viewed page.
28 | `go_forward`: Navigate to the next page (if a previous 'go_back' action was performed).
29 | 
30 | Completion Action:
31 | `stop [answer]`: Issue this action when you believe the task is complete. If the objective is to find a text-based answer, provide the answer in the bracket. If you believe the task is impossible to complete, provide the answer as "N/A" in the bracket.
32 | 
33 | Homepage:
34 | If you want to visit other websites, check out the homepage at http://homepage.com. It has a list of websites you can visit.
35 | http://homepage.com/password.html lists all the account name and password for the websites. You can use them to log in to the websites.
36 | 
37 | To be successful, it is very important to follow the following rules:
38 | 1. You should only issue an action that is valid given the current observation
39 | 2. You should only issue one action at a time.
40 | 3. You should follow the examples to reason step by step and then issue the next action.
41 | 4. Generate the action in the correct format. Start with a "In summary, the next action I will perform is" phrase, followed by action inside ``````. For example, "In summary, the next action I will perform is ```click [1234]```".
42 | 5. Issue stop action when you think you have achieved the objective. Don't generate anything after stop.""",
43 | 	"examples": [
44 | 		(
45 | 			"""OBSERVATION:
46 | [1744] link 'HP CB782A#ABA 640 Inkjet Fax Machine (Renewed)'
47 | 		[1749] StaticText '$279.49'
48 | 		[1757] button 'Add to Cart'
49 | 		[1760] button 'Add to Wish List'
50 | 		[1761] button 'Add to Compare'
51 | URL: http://onestopmarket.com/office-products/office-electronics.html
52 | OBJECTIVE: What is the price of HP Inkjet Fax Machine
53 | PREVIOUS ACTION: None""",
54 | 			"Let's think step-by-step. This page list the information of HP Inkjet Fax Machine, which is the product identified in the objective. Its price is $279.49. I think I have achieved the objective. I will issue the stop action with the answer. In summary, the next action I will perform is ```stop [$279.49]```",
55 | 		),
56 | 		(
57 | 			"""OBSERVATION:
58 | [164] textbox 'Search' focused: True required: False
59 | [171] button 'Go'
60 | [174] link 'Find directions between two points'
61 | [212] heading 'Search Results'
62 | [216] button 'Close'
63 | URL: http://openstreetmap.org
64 | OBJECTIVE: Show me the restaurants near CMU
65 | PREVIOUS ACTION: None""",
66 | 			"Let's think step-by-step. This page has a search box whose ID is [164]. According to the nominatim rule of openstreetmap, I can search for the restaurants near a location by \"restaurants near\". I can submit my typing by pressing the Enter afterwards. In summary, the next action I will perform is ```type [164] [restaurants near CMU] [1]```",
67 | 		),
68 | 	],
69 | 	"template": """OBSERVATION:
70 | {observation}
71 | URL: {url}
72 | OBJECTIVE: {objective}
73 | PREVIOUS ACTION: {previous_action}""",
74 | 	"meta_data": {
75 | 		"observation": "accessibility_tree",
76 | 		"action_type": "id_accessibility_tree",
77 | 		"keywords": ["url", "objective", "observation", "previous_action"],
78 | 		"prompt_constructor": "CoTPromptConstructor",
79 | 		"answer_phrase": "In summary, the next action I will perform is",
80 | 		"action_splitter": "```"
81 | 	},
82 | }
83 | 


--------------------------------------------------------------------------------
/webarena/agent/prompts/raw/p_cot_id_actree_2s_no_na.py:
--------------------------------------------------------------------------------
 1 | prompt = {
 2 | 	"intro": """You are an autonomous intelligent agent tasked with navigating a web browser. You will be given web-based tasks. These tasks will be accomplished through the use of specific actions you can issue.
 3 | 
 4 | Here's the information you'll have:
 5 | The user's objective: This is the task you're trying to complete.
 6 | The current web page's accessibility tree: This is a simplified representation of the webpage, providing key information.
 7 | The current web page's URL: This is the page you're currently navigating.
 8 | The open tabs: These are the tabs you have open.
 9 | The previous action: This is the action you just performed. It may be helpful to track your progress.
10 | 
11 | The actions you can perform fall into several categories:
12 | 
13 | Page Operation Actions:
14 | `click [id]`: This action clicks on an element with a specific id on the webpage.
15 | `type [id] [content] [press_enter_after=0|1]`: Use this to type the content into the field with id. By default, the "Enter" key is pressed after typing unless press_enter_after is set to 0.
16 | `hover [id]`: Hover over an element with id.
17 | `press [key_comb]`:  Simulates the pressing of a key combination on the keyboard (e.g., Ctrl+v).
18 | `scroll [direction=down|up]`: Scroll the page up or down.
19 | 
20 | Tab Management Actions:
21 | `new_tab`: Open a new, empty browser tab.
22 | `tab_focus [tab_index]`: Switch the browser's focus to a specific tab using its index.
23 | `close_tab`: Close the currently active tab.
24 | 
25 | URL Navigation Actions:
26 | `goto [url]`: Navigate to a specific URL.
27 | `go_back`: Navigate to the previously viewed page.
28 | `go_forward`: Navigate to the next page (if a previous 'go_back' action was performed).
29 | 
30 | Completion Action:
31 | `stop [answer]`: Issue this action when you believe the task is complete. If the objective is to find a text-based answer, provide the answer in the bracket.
32 | 
33 | Homepage:
34 | If you want to visit other websites, check out the homepage at http://homepage.com. It has a list of websites you can visit.
35 | http://homepage.com/password.html lists all the account name and password for the websites. You can use them to log in to the websites.
36 | 
37 | To be successful, it is very important to follow the following rules:
38 | 1. You should only issue an action that is valid given the current observation
39 | 2. You should only issue one action at a time.
40 | 3. You should follow the examples to reason step by step and then issue the next action.
41 | 4. Generate the action in the correct format. Start with a "In summary, the next action I will perform is" phrase, followed by action inside ``````. For example, "In summary, the next action I will perform is ```click [1234]```".
42 | 5. Issue stop action when you think you have achieved the objective. Don't generate anything after stop.""",
43 | 	"examples": [
44 | 		(
45 | 			"""OBSERVATION:
46 | [1744] link 'HP CB782A#ABA 640 Inkjet Fax Machine (Renewed)'
47 | 		[1749] StaticText '$279.49'
48 | 		[1757] button 'Add to Cart'
49 | 		[1760] button 'Add to Wish List'
50 | 		[1761] button 'Add to Compare'
51 | URL: http://onestopmarket.com/office-products/office-electronics.html
52 | OBJECTIVE: What is the price of HP Inkjet Fax Machine
53 | PREVIOUS ACTION: None""",
54 | 			"Let's think step-by-step. This page list the information of HP Inkjet Fax Machine, which is the product identified in the objective. Its price is $279.49. I think I have achieved the objective. I will issue the stop action with the answer. In summary, the next action I will perform is ```stop [$279.49]```",
55 | 		),
56 | 		(
57 | 			"""OBSERVATION:
58 | [164] textbox 'Search' focused: True required: False
59 | [171] button 'Go'
60 | [174] link 'Find directions between two points'
61 | [212] heading 'Search Results'
62 | [216] button 'Close'
63 | URL: http://openstreetmap.org
64 | OBJECTIVE: Show me the restaurants near CMU
65 | PREVIOUS ACTION: None""",
66 | 			"Let's think step-by-step. This page has a search box whose ID is [164]. According to the nominatim rule of openstreetmap, I can search for the restaurants near a location by \"restaurants near\". I can submit my typing by pressing the Enter afterwards. In summary, the next action I will perform is ```type [164] [restaurants near CMU] [1]```",
67 | 		),
68 | 	],
69 | 	"template": """OBSERVATION:
70 | {observation}
71 | URL: {url}
72 | OBJECTIVE: {objective}
73 | PREVIOUS ACTION: {previous_action}""",
74 | 	"meta_data": {
75 | 		"observation": "accessibility_tree",
76 | 		"action_type": "id_accessibility_tree",
77 | 		"keywords": ["url", "objective", "observation", "previous_action"],
78 | 		"prompt_constructor": "CoTPromptConstructor",
79 | 		"answer_phrase": "In summary, the next action I will perform is",
80 | 		"action_splitter": "```"
81 | 	},
82 | }
83 | 


--------------------------------------------------------------------------------
/webarena/agent/prompts/raw/p_direct_id_actree_2s.py:
--------------------------------------------------------------------------------
 1 | prompt = {
 2 | 	"intro": """You are an autonomous intelligent agent tasked with navigating a web browser. You will be given web-based tasks. These tasks will be accomplished through the use of specific actions you can issue.
 3 | 
 4 | Here's the information you'll have:
 5 | The user's objective: This is the task you're trying to complete.
 6 | The current web page's accessibility tree: This is a simplified representation of the webpage, providing key information.
 7 | The current web page's URL: This is the page you're currently navigating.
 8 | The open tabs: These are the tabs you have open.
 9 | The previous action: This is the action you just performed. It may be helpful to track your progress.
10 | 
11 | The actions you can perform fall into several categories:
12 | 
13 | Page Operation Actions:
14 | `click [id]`: This action clicks on an element with a specific id on the webpage.
15 | `type [id] [content] [press_enter_after=0|1]`: Use this to type the content into the field with id. By default, the "Enter" key is pressed after typing unless press_enter_after is set to 0.
16 | `hover [id]`: Hover over an element with id.
17 | `press [key_comb]`:  Simulates the pressing of a key combination on the keyboard (e.g., Ctrl+v).
18 | `scroll [direction=down|up]`: Scroll the page up or down.
19 | 
20 | Tab Management Actions:
21 | `new_tab`: Open a new, empty browser tab.
22 | `tab_focus [tab_index]`: Switch the browser's focus to a specific tab using its index.
23 | `close_tab`: Close the currently active tab.
24 | 
25 | URL Navigation Actions:
26 | `goto [url]`: Navigate to a specific URL.
27 | `go_back`: Navigate to the previously viewed page.
28 | `go_forward`: Navigate to the next page (if a previous 'go_back' action was performed).
29 | 
30 | Completion Action:
31 | `stop [answer]`: Issue this action when you believe the task is complete. If the objective is to find a text-based answer, provide the answer in the bracket. If you believe the task is impossible to complete, provide the answer as "N/A" in the bracket.
32 | 
33 | Homepage:
34 | If you want to visit other websites, check out the homepage at http://homepage.com. It has a list of websites you can visit.
35 | http://homepage.com/password.html lists all the account name and password for the websites. You can use them to log in to the websites.
36 | 
37 | To be successful, it is very important to follow the following rules:
38 | 1. You should only issue an action that is valid given the current observation
39 | 2. You should only issue one action at a time.
40 | 3. Generate the action in the correct format. Always put the action inside a pair of ```. For example, ```click [1234]```.
41 | 5. Issue stop action when you think you have achieved the objective. Don't generate anything after stop.""",
42 | 	"examples": [
43 | 		(
44 | 			"""OBSERVATION:
45 | [1744] link 'HP CB782A#ABA 640 Inkjet Fax Machine (Renewed)'
46 | 		[1749] StaticText '$279.49'
47 | 		[1757] button 'Add to Cart'
48 | 		[1760] button 'Add to Wish List'
49 | 		[1761] button 'Add to Compare'
50 | URL: http://onestopmarket.com/office-products/office-electronics.html
51 | OBJECTIVE: What is the price of HP Inkjet Fax Machine
52 | PREVIOUS ACTION: None""",
53 | 			"```stop [$279.49]```",
54 | 		),
55 | 		(
56 | 			"""OBSERVATION:
57 | [164] textbox 'Search' focused: True required: False
58 | [171] button 'Go'
59 | [174] link 'Find directions between two points'
60 | [212] heading 'Search Results'
61 | [216] button 'Close'
62 | URL: http://openstreetmap.org
63 | OBJECTIVE: Show me the restaurants near CMU
64 | PREVIOUS ACTION: None""",
65 | 			"```type [164] [restaurants near CMU] [1]```",
66 | 		),
67 | 	],
68 | 	"template": """OBSERVATION:
69 | {observation}
70 | URL: {url}
71 | OBJECTIVE: {objective}
72 | PREVIOUS ACTION: {previous_action}""",
73 | 	"meta_data": {
74 | 		"observation": "accessibility_tree",
75 | 		"action_type": "id_accessibility_tree",
76 | 		"keywords": ["url", "objective", "observation", "previous_action"],
77 | 		"prompt_constructor": "DirectPromptConstructor",
78 | 		"action_splitter": "```"
79 | 	},
80 | }
81 | 


--------------------------------------------------------------------------------
/webarena/agent/prompts/raw/p_direct_id_actree_2s_no_na.py:
--------------------------------------------------------------------------------
 1 | prompt = {
 2 | 	"intro": """You are an autonomous intelligent agent tasked with navigating a web browser. You will be given web-based tasks. These tasks will be accomplished through the use of specific actions you can issue.
 3 | 
 4 | Here's the information you'll have:
 5 | The user's objective: This is the task you're trying to complete.
 6 | The current web page's accessibility tree: This is a simplified representation of the webpage, providing key information.
 7 | The current web page's URL: This is the page you're currently navigating.
 8 | The open tabs: These are the tabs you have open.
 9 | The previous action: This is the action you just performed. It may be helpful to track your progress.
10 | 
11 | The actions you can perform fall into several categories:
12 | 
13 | Page Operation Actions:
14 | `click [id]`: This action clicks on an element with a specific id on the webpage.
15 | `type [id] [content] [press_enter_after=0|1]`: Use this to type the content into the field with id. By default, the "Enter" key is pressed after typing unless press_enter_after is set to 0.
16 | `hover [id]`: Hover over an element with id.
17 | `press [key_comb]`:  Simulates the pressing of a key combination on the keyboard (e.g., Ctrl+v).
18 | `scroll [direction=down|up]`: Scroll the page up or down.
19 | 
20 | Tab Management Actions:
21 | `new_tab`: Open a new, empty browser tab.
22 | `tab_focus [tab_index]`: Switch the browser's focus to a specific tab using its index.
23 | `close_tab`: Close the currently active tab.
24 | 
25 | URL Navigation Actions:
26 | `goto [url]`: Navigate to a specific URL.
27 | `go_back`: Navigate to the previously viewed page.
28 | `go_forward`: Navigate to the next page (if a previous 'go_back' action was performed).
29 | 
30 | Completion Action:
31 | `stop [answer]`: Issue this action when you believe the task is complete. If the objective is to find a text-based answer, provide the answer in the bracket.
32 | 
33 | Homepage:
34 | If you want to visit other websites, check out the homepage at http://homepage.com. It has a list of websites you can visit.
35 | http://homepage.com/password.html lists all the account name and password for the websites. You can use them to log in to the websites.
36 | 
37 | To be successful, it is very important to follow the following rules:
38 | 1. You should only issue an action that is valid given the current observation
39 | 2. You should only issue one action at a time.
40 | 4. Generate the action in the correct format, wrap the action inside ``````. For example, ```click [1234]```".
41 | 5. Issue stop action when you think you have achieved the objective.""",
42 | 	"examples": [
43 | 		(
44 | 			"""OBSERVATION:
45 | [1744] link 'HP CB782A#ABA 640 Inkjet Fax Machine (Renewed)'
46 | 		[1749] StaticText '$279.49'
47 | 		[1757] button 'Add to Cart'
48 | 		[1760] button 'Add to Wish List'
49 | 		[1761] button 'Add to Compare'
50 | URL: http://onestopmarket.com/office-products/office-electronics.html
51 | OBJECTIVE: What is the price of HP Inkjet Fax Machine
52 | PREVIOUS ACTION: None""",
53 | 			"```stop [$279.49]```",
54 | 		),
55 | 		(
56 | 			"""OBSERVATION:
57 | [164] textbox 'Search' focused: True required: False
58 | [171] button 'Go'
59 | [174] link 'Find directions between two points'
60 | [212] heading 'Search Results'
61 | [216] button 'Close'
62 | URL: http://openstreetmap.org
63 | OBJECTIVE: Show me the restaurants near CMU
64 | PREVIOUS ACTION: None""",
65 | 			"```type [164] [restaurants near CMU] [1]```",
66 | 		),
67 | 	],
68 | 	"template": """OBSERVATION:
69 | {observation}
70 | URL: {url}
71 | OBJECTIVE: {objective}
72 | PREVIOUS ACTION: {previous_action}""",
73 | 	"meta_data": {
74 | 		"observation": "accessibility_tree",
75 | 		"action_type": "id_accessibility_tree",
76 | 		"keywords": ["url", "objective", "observation", "previous_action"],
77 | 		"prompt_constructor": "CoTPromptConstructor",
78 | 		"answer_phrase": "In summary, the next action I will perform is",
79 | 		"action_splitter": "```"
80 | 	},
81 | }
82 | 


--------------------------------------------------------------------------------
/webarena/agent/prompts/raw/p_direct_id_actree_3s_llama.py:
--------------------------------------------------------------------------------
 1 | prompt = {
 2 | 	"intro": """You are an autonomous intelligent agent tasked with navigating a web browser. The actions you can perform fall into several categories:
 3 | 
 4 | Page Operation Actions:
 5 | `click [id]`: This action clicks on an element with a specific id on the webpage.
 6 | `type [id] [content] [press_enter_after=0|1]`: Use this to type the content into the field with id. By default, the "Enter" key is pressed after typing unless press_enter_after is set to 0.
 7 | `hover [id]`: Hover over an element with id.
 8 | `press [key_comb]`:  Simulates the pressing of a key combination on the keyboard (e.g., Ctrl+v).
 9 | `scroll [direction=down|up]`: Scroll the page up or down.
10 | 
11 | Tab Management Actions:
12 | `new_tab`: Open a new, empty browser tab.
13 | `tab_focus [tab_index]`: Switch the browser's focus to a specific tab using its index.
14 | `close_tab`: Close the currently active tab.
15 | 
16 | URL Navigation Actions:
17 | `goto [url]`: Navigate to a specific URL.
18 | `go_back`: Navigate to the previously viewed page.
19 | `go_forward`: Navigate to the next page (if a previous 'go_back' action was performed).
20 | 
21 | Completion Action:
22 | `stop [answer]`: Issue this action when you believe the task is complete. If the objective is to find a text-based answer, provide the answer in the bracket.
23 | 
24 | Homepage:
25 | If you want to visit other websites, check out the homepage at http://homepage.com. It has a list of websites you can visit.
26 | 
27 | You can only issue one action at a time""",
28 | 
29 | 	"examples": [
30 | 		(
31 | 			"""Observation:
32 | [1744] link 'HP CB782A#ABA 640 Inkjet Fax Machine (Renewed)'
33 | 	[1749] StaticText '$279.49'
34 | 	[1757] button 'Add to Cart'
35 | 	[1760] button 'Add to Wish List'
36 | 	[1761] button 'Add to Compare'
37 | URL: http://onestopmarket.com/office-products/office-electronics.html
38 | Objective: What is the price of HP Inkjet Fax Machine
39 | Previous action: None""",
40 | 			"```stop [$279.49]```",
41 | 		),
42 | 		(
43 | 			"""Observation:
44 | [164] textbox 'Search' focused: True required: False
45 | [171] button 'Go'
46 | [174] link 'Find directions between two points'
47 | [212] heading 'Search Results'
48 | [216] button 'Close'
49 | URL: http://openstreetmap.org
50 | Objective: Show me the restaurants near CMU
51 | Previous action: None""",
52 | 			"```type [164] [restaurants near CMU] [1]```",
53 | 		),
54 |     	(
55 | 			"""Observation:
56 | [2036] button 'Sort by: New' hasPopup: menu expanded: False
57 | 	[587] link 'US Marine’s adoption of Afghan war orphan voided'
58 | 		[989] time 'March 30, 2023 at 15:03:48 AM UTC'
59 | 	[602] link 'York student uses AI chatbot to get parking fine revoked'
60 | 		[1025] time 'March 15, 2023 at 7:48:34 AM UTC'
61 | 	[617] link 'Loveland parents furious after teachers leave, communication lagged during school threat investigation'
62 | 		[1025] time 'March 2, 2023 at 3:46:01 AM UTC'
63 | URL: http://reddit.com/f/news/new
64 | Objective: Open the most recent post that was published prior to March 1st.
65 | Previous action: None""",
66 | 		"```scroll [down]```",
67 | 		)
68 | 	],
69 | 	"template": """Observation:
70 | {observation}
71 | URL: {url}
72 | Objective: {objective}
73 | Previous action: {previous_action}""",
74 | 	"meta_data": {
75 | 		"observation": "accessibility_tree",
76 | 		"action_type": "id_accessibility_tree",
77 | 		"keywords": ["url", "objective", "observation", "previous_action"],
78 | 		"prompt_constructor": "DirectPromptConstructor",
79 | 		"answer_phrase": "In summary, the next action I will perform is",
80 | 		"action_splitter": "```",
81 | 		"force_prefix": "```"
82 | 	},
83 | }
84 | 


--------------------------------------------------------------------------------
/webarena/agent/prompts/raw/test_prompt.py:
--------------------------------------------------------------------------------
 1 | prompt = {
 2 | 	"intro": "",
 3 | 	"examples": [],
 4 | 	"template": """<html> {html} </html>
 5 | 
 6 | You are a helpful assistant that can assist with web navigation tasks.
 7 | You are given a simplified html webpage and a task description. 
 8 | Your goal is to complete the task. You can perform the specified operations below to interact with the webpage.
 9 | 
10 | #Valid operations: - #Click# id: Click on the element with the specified id
11 | - #Scroll_up#: Scroll up 1 page.
12 | - #Scroll_down#: Scroll down 1 page.
13 | - #Go_backward#: Go back to the previous page.
14 | - #Go_forward#: Go forward to the next page.
15 | - #Hover# id: Hover over the element with the specified id.
16 | - #Type# id "text": Type in the text at the element with the specified id.
17 | - #Select# id "option": Select the option at the element with the specified id.
18 | - #Record# "content": Mark content that is useful in answering the question.
19 | - #Answer# "text": output the text as the answer to the user.
20 | - #Exit#: Complete the task and exit the program.
21 | 
22 | #Current viewport position: {position}
23 | 
24 | #Previous Operation: {previous_action}
25 | 
26 | #Task: {objective}
27 | """,
28 |     "finale": """
29 | Your output SHOULD be in the following format:
30 | #Operation: {Next operation to perform}
31 | """,
32 | 	"meta_data": {
33 | 		"observation": "html",
34 | 		"action_type": "id_html_tree",
35 | 		"keywords": ["url", "html", "objective", "position", "previous_action"],
36 | 		"prompt_constructor": "MyPromptConstructor",
37 | 		"answer_phrase": "",
38 | 		"action_splitter": "#"
39 | 	},
40 | }
41 | 
42 | 


--------------------------------------------------------------------------------
/webarena/agent/prompts/to_json.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import importlib
 3 | import json
 4 | import os
 5 | 
 6 | 
 7 | # use the current directory as the root
 8 | def run() -> None:
 9 |     """Convert all python files in agent/prompts to json files in agent/prompts/jsons
10 | 
11 |     Python files are easiser to edit
12 |     """
13 |     for p_file in glob.glob(f"agent/prompts/raw/*.py"):
14 |         # import the file as a module
15 |         base_name = os.path.basename(p_file).replace(".py", "")
16 |         module = importlib.import_module(f"agent.prompts.raw.{base_name}")
17 |         prompt = module.prompt
18 |         # save the prompt as a json file
19 |         os.makedirs("agent/prompts/jsons", exist_ok=True)
20 |         with open(f"agent/prompts/jsons/{base_name}.json", "w+") as f:
21 |             json.dump(prompt, f, indent=2)
22 |     print(f"Done convert python files to json")
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     run()
27 | 


--------------------------------------------------------------------------------
/webarena/browser_env/__init__.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | from .actions import (
 4 |     Action,
 5 |     ActionParsingError,
 6 |     ActionTypes,
 7 |     action2create_function,
 8 |     action2str,
 9 |     create_check_action,
10 |     create_click_action,
11 |     create_focus_and_click_action,
12 |     create_focus_and_type_action,
13 |     create_go_back_action,
14 |     create_go_forward_action,
15 |     create_goto_url_action,
16 |     create_hover_action,
17 |     create_id_based_action,
18 |     create_key_press_action,
19 |     create_keyboard_type_action,
20 |     create_mouse_click_action,
21 |     create_mouse_hover_action,
22 |     create_new_tab_action,
23 |     create_none_action,
24 |     create_page_close_action,
25 |     create_page_focus_action,
26 |     create_playwright_action,
27 |     create_random_action,
28 |     create_scroll_action,
29 |     create_select_option_action,
30 |     create_stop_action,
31 |     create_type_action,
32 |     is_equivalent,
33 | )
34 | from .async_envs import AsyncScriptBrowserEnv
35 | from .envs import ScriptBrowserEnv
36 | from .processors import ObservationMetadata
37 | from .trajectory import Trajectory
38 | from .utils import DetachedPage, StateInfo
39 | 
40 | __all__ = [
41 |     "ScriptBrowserEnv",
42 |     "AsyncScriptBrowserEnv",
43 |     "DetachedPage",
44 |     "StateInfo",
45 |     "ObservationMetadata",
46 |     "Action",
47 |     "ActionTypes",
48 |     "action2str",
49 |     "create_random_action",
50 |     "create_focus_and_click_action",
51 |     "create_focus_and_type_action",
52 |     "is_equivalent",
53 |     "create_mouse_click_action",
54 |     "create_mouse_hover_action",
55 |     "create_none_action",
56 |     "create_keyboard_type_action",
57 |     "create_page_focus_action",
58 |     "create_new_tab_action",
59 |     "create_go_back_action",
60 |     "create_go_forward_action",
61 |     "create_goto_url_action",
62 |     "create_page_close_action",
63 |     "action2create_function",
64 |     "create_playwright_action",
65 |     "create_id_based_action",
66 |     "create_scroll_action",
67 |     "create_key_press_action",
68 |     "create_check_action",
69 |     "create_click_action",
70 |     "create_type_action",
71 |     "create_hover_action",
72 |     "create_select_option_action",
73 |     "create_stop_action",
74 |     "ActionParsingError",
75 |     "Trajectory",
76 | ]
77 | 


--------------------------------------------------------------------------------
/webarena/browser_env/async_envs.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import json
  3 | from dataclasses import dataclass
  4 | from pathlib import Path
  5 | 
  6 | import numpy as np
  7 | import numpy.typing as npt
  8 | from gymnasium import Env
  9 | from gymnasium.spaces import Box, Text
 10 | from playwright.async_api import Page, ViewportSize, async_playwright
 11 | 
 12 | from .actions import Action, aexecute_action, get_action_space
 13 | from .utils import DetachedPage, png_bytes_to_numpy
 14 | 
 15 | 
 16 | class AsyncScriptBrowserEnv(Env[npt.NDArray[np.uint8], Action]):
 17 |     """
 18 |     The goal of this environment is to produce a prototype of a browser environment.
 19 |     In the end, we want to support a fully configurable browser environment with wide
 20 |     range of action spaces and observation spaces, both structured and unstructured.
 21 |     But in this prototype, we just support action space specified by Playwright script,
 22 |     and observation space is the html content of the page.
 23 |     """
 24 | 
 25 |     def __init__(
 26 |         self,
 27 |         max_page_length: int = 2048,
 28 |         headless: bool = True,
 29 |         slow_mo: int = 0,
 30 |         timeout: int = 30000,
 31 |         viewport_size: ViewportSize = {"width": 1280, "height": 720},
 32 |     ):
 33 |         self.observation_space = Box(
 34 |             0,
 35 |             255,
 36 |             (viewport_size["height"], viewport_size["width"], 4),
 37 |             np.uint8,
 38 |         )
 39 |         # TODO: make Space[Action] = ActionSpace
 40 |         self.action_space = get_action_space()  # type: ignore[assignment]
 41 |         self.headless = headless
 42 |         self.slow_mo = slow_mo
 43 |         self.reset_finished = False
 44 |         self.timeout = timeout
 45 |         self.viewport_size = viewport_size
 46 | 
 47 |     async def setup(self, config_file: Path | None = None) -> None:
 48 |         self.context_manager = async_playwright()
 49 |         self.playwright = await self.context_manager.__aenter__()
 50 |         self.browser = await self.playwright.chromium.launch(
 51 |             headless=self.headless, slow_mo=self.slow_mo
 52 |         )
 53 |         if config_file:
 54 |             with open(config_file, "r") as f:
 55 |                 instance_config = json.load(f)
 56 |         else:
 57 |             instance_config = {}
 58 | 
 59 |         storage_state = instance_config.get("storage_state", None)
 60 |         start_url = instance_config.get("start_url", None)
 61 |         geolocation = instance_config.get("geolocation", None)
 62 | 
 63 |         self.context = await self.browser.new_context(
 64 |             viewport=self.viewport_size,
 65 |             storage_state=storage_state,
 66 |             geolocation=geolocation,
 67 |             device_scale_factor=1,
 68 |         )
 69 |         self.page = await self.context.new_page()
 70 |         if start_url:
 71 |             await self.page.goto(start_url)
 72 | 
 73 |     async def areset(
 74 |         self,
 75 |         *,
 76 |         seed: int | None = None,
 77 |         options: dict[str, str] | None = None,
 78 |     ) -> tuple[npt.NDArray[np.uint8], dict[str, object]]:
 79 |         """
 80 |         Reset the environment.
 81 |         :param options: options for the environment. The options are:
 82 |             - storage_state: the path to the storage state file
 83 |         """
 84 |         super().reset(seed=seed, options=options)
 85 |         if self.reset_finished:
 86 |             await self.context_manager.__aexit__()
 87 |         if options is not None and "config_file" in options:
 88 |             config_file = Path(options["config_file"])
 89 |             if config_file.exists():
 90 |                 await self.setup(config_file=config_file)
 91 |             else:
 92 |                 raise ValueError(f"Config state {config_file} does not exist.")
 93 |         else:
 94 |             await self.setup()
 95 |         self.reset_finished = True
 96 |         content = await self.page.content()
 97 |         screenshot = png_bytes_to_numpy(await self.page.screenshot())
 98 |         return (
 99 |             screenshot,
100 |             {"page": DetachedPage(self.page.url, content)},
101 |         )
102 | 
103 |     def reset(
104 |         self,
105 |         *,
106 |         seed: int | None = None,
107 |         options: dict[str, str] | None = None,
108 |     ) -> tuple[npt.NDArray[np.uint8], dict[str, object]]:
109 |         return asyncio.run(self.areset(seed=seed, options=options))
110 | 
111 |     async def aclose(self) -> None:
112 |         if self.reset_finished:
113 |             await self.context_manager.__aexit__()
114 | 
115 |     def close(self) -> None:
116 |         asyncio.run(self.aclose())
117 | 
118 |     async def astep(
119 |         self, action: Action
120 |     ) -> tuple[npt.NDArray[np.uint8], float, bool, bool, dict[str, object]]:
121 |         if not self.reset_finished:
122 |             raise RuntimeError("Call reset first before calling step.")
123 |         success = False
124 |         fail_error = ""
125 |         try:
126 |             self.page = await aexecute_action(action, self.page, self.context)
127 |             success = True
128 |         except Exception as e:
129 |             fail_error = str(e)
130 | 
131 |         try:
132 |             content = await self.page.content()
133 |             screenshot = png_bytes_to_numpy(await self.page.screenshot())
134 |         except:
135 |             await self.page.wait_for_load_state("load")
136 |             content = await self.page.content()
137 |             screenshot = png_bytes_to_numpy(await self.page.screenshot())
138 | 
139 |         return (
140 |             screenshot,
141 |             float(success),
142 |             False,
143 |             False,
144 |             {
145 |                 "page": DetachedPage(self.page.url, content),
146 |                 "fail_error": fail_error,
147 |             },
148 |         )
149 | 
150 |     def step(
151 |         self, action: Action
152 |     ) -> tuple[npt.NDArray[np.uint8], float, bool, bool, dict[str, object]]:
153 |         return asyncio.run(self.astep(action), debug=True)
154 | 


--------------------------------------------------------------------------------
/webarena/browser_env/auto_login.py:
--------------------------------------------------------------------------------
  1 | """Script to automatically login each website"""
  2 | import argparse
  3 | import glob
  4 | import os
  5 | import time
  6 | from concurrent.futures import ThreadPoolExecutor
  7 | from itertools import combinations
  8 | from pathlib import Path
  9 | 
 10 | from playwright.sync_api import sync_playwright
 11 | 
 12 | from browser_env.env_config import (
 13 |     ACCOUNTS,
 14 |     GITLAB,
 15 |     REDDIT,
 16 |     SHOPPING,
 17 |     SHOPPING_ADMIN,
 18 | )
 19 | 
 20 | HEADLESS = True
 21 | SLOW_MO = 0
 22 | 
 23 | 
 24 | SITES = ["gitlab", "shopping", "shopping_admin", "reddit"]
 25 | URLS = [
 26 |     f"{GITLAB}/-/profile",
 27 |     f"{SHOPPING}/wishlist/",
 28 |     f"{SHOPPING_ADMIN}/dashboard",
 29 |     f"{REDDIT}/user/{ACCOUNTS['reddit']['username']}/account",
 30 | ]
 31 | EXACT_MATCH = [True, True, True, True]
 32 | KEYWORDS = ["", "", "Dashboard", "Delete"]
 33 | 
 34 | 
 35 | def is_expired(
 36 |     storage_state: Path, url: str, keyword: str, url_exact: bool = True
 37 | ) -> bool:
 38 |     """Test whether the cookie is expired"""
 39 |     if not storage_state.exists():
 40 |         return True
 41 | 
 42 |     context_manager = sync_playwright()
 43 |     playwright = context_manager.__enter__()
 44 |     browser = playwright.chromium.launch(headless=True, slow_mo=SLOW_MO)
 45 |     context = browser.new_context(storage_state=storage_state)
 46 |     page = context.new_page()
 47 |     page.goto(url)
 48 |     time.sleep(1)
 49 |     d_url = page.url
 50 |     content = page.content()
 51 |     context_manager.__exit__()
 52 |     if keyword:
 53 |         return keyword not in content
 54 |     else:
 55 |         if url_exact:
 56 |             return d_url != url
 57 |         else:
 58 |             return url not in d_url
 59 | 
 60 | 
 61 | def renew_comb(comb: list[str], auth_folder: str = "./.auth") -> None:
 62 |     context_manager = sync_playwright()
 63 |     playwright = context_manager.__enter__()
 64 |     browser = playwright.chromium.launch(headless=HEADLESS)
 65 |     context = browser.new_context()
 66 |     page = context.new_page()
 67 | 
 68 |     if "shopping" in comb:
 69 |         username = ACCOUNTS["shopping"]["username"]
 70 |         password = ACCOUNTS["shopping"]["password"]
 71 |         page.goto(f"{SHOPPING}/customer/account/login/")
 72 |         page.get_by_label("Email", exact=True).fill(username)
 73 |         page.get_by_label("Password", exact=True).fill(password)
 74 |         page.get_by_role("button", name="Sign In").click()
 75 | 
 76 |     if "reddit" in comb:
 77 |         username = ACCOUNTS["reddit"]["username"]
 78 |         password = ACCOUNTS["reddit"]["password"]
 79 |         page.goto(f"{REDDIT}/login")
 80 |         page.get_by_label("Username").fill(username)
 81 |         page.get_by_label("Password").fill(password)
 82 |         page.get_by_role("button", name="Log in").click()
 83 | 
 84 |     if "shopping_admin" in comb:
 85 |         username = ACCOUNTS["shopping_admin"]["username"]
 86 |         password = ACCOUNTS["shopping_admin"]["password"]
 87 |         page.goto(f"{SHOPPING_ADMIN}")
 88 |         page.get_by_placeholder("user name").fill(username)
 89 |         page.get_by_placeholder("password").fill(password)
 90 |         page.get_by_role("button", name="Sign in").click()
 91 | 
 92 |     if "gitlab" in comb:
 93 |         username = ACCOUNTS["gitlab"]["username"]
 94 |         password = ACCOUNTS["gitlab"]["password"]
 95 |         page.goto(f"{GITLAB}/users/sign_in")
 96 |         page.get_by_test_id("username-field").click()
 97 |         page.get_by_test_id("username-field").fill(username)
 98 |         page.get_by_test_id("username-field").press("Tab")
 99 |         page.get_by_test_id("password-field").fill(password)
100 |         page.get_by_test_id("sign-in-button").click()
101 | 
102 |     context.storage_state(path=f"{auth_folder}/{'.'.join(comb)}_state.json")
103 | 
104 |     context_manager.__exit__()
105 | 
106 | 
107 | def get_site_comb_from_filepath(file_path: str) -> list[str]:
108 |     comb = os.path.basename(file_path).rsplit("_", 1)[0].split(".")
109 |     return comb
110 | 
111 | 
112 | def main(auth_folder: str = "./.auth") -> None:
113 |     pairs = list(combinations(SITES, 2))
114 | 
115 |     max_workers = 8
116 |     with ThreadPoolExecutor(max_workers=max_workers) as executor:
117 |         for pair in pairs:
118 |             # TODO[shuyanzh] auth don't work on these two sites
119 |             if "reddit" in pair and (
120 |                 "shopping" in pair or "shopping_admin" in pair
121 |             ):
122 |                 continue
123 |             executor.submit(
124 |                 renew_comb, list(sorted(pair)), auth_folder=auth_folder
125 |             )
126 | 
127 |         for site in SITES:
128 |             executor.submit(renew_comb, [site], auth_folder=auth_folder)
129 | 
130 |     futures = []
131 |     cookie_files = list(glob.glob(f"{auth_folder}/*.json"))
132 |     with ThreadPoolExecutor(max_workers=max_workers) as executor:
133 |         for c_file in cookie_files:
134 |             comb = get_site_comb_from_filepath(c_file)
135 |             for cur_site in comb:
136 |                 url = URLS[SITES.index(cur_site)]
137 |                 keyword = KEYWORDS[SITES.index(cur_site)]
138 |                 match = EXACT_MATCH[SITES.index(cur_site)]
139 |                 future = executor.submit(
140 |                     is_expired, Path(c_file), url, keyword, match
141 |                 )
142 |                 futures.append(future)
143 | 
144 |     for i, future in enumerate(futures):
145 |         assert not future.result(), f"Cookie {cookie_files[i]} expired."
146 | 
147 | 
148 | if __name__ == "__main__":
149 |     parser = argparse.ArgumentParser()
150 |     parser.add_argument("--site_list", nargs="+", default=[])
151 |     parser.add_argument("--auth_folder", type=str, default="./.auth")
152 |     args = parser.parse_args()
153 |     if not args.site_list:
154 |         main()
155 |     else:
156 |         if "all" in args.site_list:
157 |             main(auth_folder=args.auth_folder)
158 |         else:
159 |             renew_comb(args.site_list, auth_folder=args.auth_folder)
160 | 


--------------------------------------------------------------------------------
/webarena/browser_env/constants.py:
--------------------------------------------------------------------------------
  1 | from typing import Literal
  2 | 
  3 | ROLES = (
  4 |     "alert",
  5 |     "alertdialog",
  6 |     "application",
  7 |     "article",
  8 |     "banner",
  9 |     "blockquote",
 10 |     "button",
 11 |     "caption",
 12 |     "cell",
 13 |     "checkbox",
 14 |     "code",
 15 |     "columnheader",
 16 |     "combobox",
 17 |     "complementary",
 18 |     "contentinfo",
 19 |     "definition",
 20 |     "deletion",
 21 |     "dialog",
 22 |     "directory",
 23 |     "document",
 24 |     "emphasis",
 25 |     "feed",
 26 |     "figure",
 27 |     "form",
 28 |     "generic",
 29 |     "grid",
 30 |     "gridcell",
 31 |     "group",
 32 |     "heading",
 33 |     "img",
 34 |     "insertion",
 35 |     "link",
 36 |     "list",
 37 |     "listbox",
 38 |     "listitem",
 39 |     "log",
 40 |     "main",
 41 |     "marquee",
 42 |     "math",
 43 |     "meter",
 44 |     "menu",
 45 |     "menubar",
 46 |     "menuitem",
 47 |     "menuitemcheckbox",
 48 |     "menuitemradio",
 49 |     "navigation",
 50 |     "none",
 51 |     "note",
 52 |     "option",
 53 |     "paragraph",
 54 |     "presentation",
 55 |     "progressbar",
 56 |     "radio",
 57 |     "radiogroup",
 58 |     "region",
 59 |     "row",
 60 |     "rowgroup",
 61 |     "rowheader",
 62 |     "scrollbar",
 63 |     "search",
 64 |     "searchbox",
 65 |     "separator",
 66 |     "slider",
 67 |     "spinbutton",
 68 |     "status",
 69 |     "strong",
 70 |     "subscript",
 71 |     "superscript",
 72 |     "switch",
 73 |     "tab",
 74 |     "table",
 75 |     "tablist",
 76 |     "tabpanel",
 77 |     "term",
 78 |     "textbox",
 79 |     "time",
 80 |     "timer",
 81 |     "toolbar",
 82 |     "tooltip",
 83 |     "tree",
 84 |     "treegrid",
 85 |     "treeitem",
 86 | )
 87 | 
 88 | SPECIAL_LOCATORS = (
 89 |     "alt_text",
 90 |     "label",
 91 |     "placeholder",
 92 | )
 93 | 
 94 | ASCII_CHARSET = "".join(chr(x) for x in range(32, 128))
 95 | FREQ_UNICODE_CHARSET = "".join(chr(x) for x in range(129, 110000))
 96 | UTTERANCE_MAX_LENGTH = 8192
 97 | ATTRIBUTE_MAX_LENGTH = 256
 98 | TEXT_MAX_LENGTH = 256
 99 | TYPING_MAX_LENGTH = 64
100 | URL_MAX_LENGTH = 256
101 | MAX_ELEMENT_INDEX_IN_VIEWPORT = 10
102 | MAX_ELEMENT_ID = 1000
103 | MAX_ANSWER_LENGTH = 512
104 | 
105 | MIN_REF = -1000000
106 | MAX_REF = 1000000
107 | 
108 | WINDOW_WIDTH = 500
109 | WINDOW_HEIGHT = 240
110 | TASK_WIDTH = 160
111 | TASK_HEIGHT = 210
112 | 
113 | FLIGHT_WINDOW_WIDTH = 600
114 | FLIGHT_WINDOW_HEIGHT = 700
115 | FLIGHT_TASK_WIDTH = 375
116 | FLIGHT_TASK_HEIGHT = 667
117 | MAX_PAGE_NUMBER = 10
118 | 
119 | SPECIAL_KEYS = (
120 |     "Enter",
121 |     "Tab",
122 |     "Control",
123 |     "Shift",
124 |     "Meta",
125 |     "Backspace",
126 |     "Delete",
127 |     "Escape",
128 |     "ArrowUp",
129 |     "ArrowDown",
130 |     "ArrowLeft",
131 |     "ArrowRight",
132 |     "PageDown",
133 |     "PageUp",
134 |     "Meta+a",
135 | )
136 | 
137 | SPECIAL_KEY_MAPPINGS = {
138 |     "backquote": "Backquote",
139 |     "minus": "Minus",
140 |     "equal": "Equal",
141 |     "backslash": "Backslash",
142 |     "backspace": "Backspace",
143 |     "meta": "Meta",
144 |     "tab": "Tab",
145 |     "delete": "Delete",
146 |     "escape": "Escape",
147 |     "arrowdown": "ArrowDown",
148 |     "end": "End",
149 |     "enter": "Enter",
150 |     "home": "Home",
151 |     "insert": "Insert",
152 |     "pagedown": "PageDown",
153 |     "pageup": "PageUp",
154 |     "arrowright": "ArrowRight",
155 |     "arrowup": "ArrowUp",
156 |     "f1": "F1",
157 |     "f2": "F2",
158 |     "f3": "F3",
159 |     "f4": "F4",
160 |     "f5": "F5",
161 |     "f6": "F6",
162 |     "f7": "F7",
163 |     "f8": "F8",
164 |     "f9": "F9",
165 |     "f10": "F10",
166 |     "f11": "F11",
167 |     "f12": "F12",
168 | }
169 | 
170 | RolesType = Literal[
171 |     "alert",
172 |     "alertdialog",
173 |     "application",
174 |     "article",
175 |     "banner",
176 |     "blockquote",
177 |     "button",
178 |     "caption",
179 |     "cell",
180 |     "checkbox",
181 |     "code",
182 |     "columnheader",
183 |     "combobox",
184 |     "complementary",
185 |     "contentinfo",
186 |     "definition",
187 |     "deletion",
188 |     "dialog",
189 |     "directory",
190 |     "document",
191 |     "emphasis",
192 |     "feed",
193 |     "figure",
194 |     "form",
195 |     "generic",
196 |     "grid",
197 |     "gridcell",
198 |     "group",
199 |     "heading",
200 |     "img",
201 |     "insertion",
202 |     "link",
203 |     "list",
204 |     "listbox",
205 |     "listitem",
206 |     "log",
207 |     "main",
208 |     "marquee",
209 |     "math",
210 |     "meter",
211 |     "menu",
212 |     "menubar",
213 |     "menuitem",
214 |     "menuitemcheckbox",
215 |     "menuitemradio",
216 |     "navigation",
217 |     "none",
218 |     "note",
219 |     "option",
220 |     "paragraph",
221 |     "presentation",
222 |     "progressbar",
223 |     "radio",
224 |     "radiogroup",
225 |     "region",
226 |     "row",
227 |     "rowgroup",
228 |     "rowheader",
229 |     "scrollbar",
230 |     "search",
231 |     "searchbox",
232 |     "separator",
233 |     "slider",
234 |     "spinbutton",
235 |     "status",
236 |     "strong",
237 |     "subscript",
238 |     "superscript",
239 |     "switch",
240 |     "tab",
241 |     "table",
242 |     "tablist",
243 |     "tabpanel",
244 |     "term",
245 |     "textbox",
246 |     "time",
247 |     "timer",
248 |     "toolbar",
249 |     "tooltip",
250 |     "tree",
251 |     "treegrid",
252 |     "treeitem",
253 |     "alt_text",
254 |     "label",
255 |     "placeholder",
256 | ]
257 | 
258 | MAX_VANILLA_STR_LENGTH = 1000
259 | 
260 | PLAYWRIGHT_LOCATORS = (
261 |     "get_by_role",
262 |     "get_by_text",
263 |     "get_by_label",
264 |     "get_by_placeholder",
265 |     "get_by_alt_text",
266 |     "get_by_title",
267 |     "get_by_test_id",
268 |     "filter",
269 |     "frame_locator",
270 |     "locator",
271 | )
272 | 
273 | PLAYWRIGHT_ACTIONS = (
274 |     "fill",
275 |     "check",
276 |     "select_option",
277 |     "click",
278 |     "hover",
279 |     "dclick",
280 |     "type",
281 |     "focus",
282 |     "goto",
283 |     "press",
284 |     "scroll",
285 | )
286 | 
287 | IGNORED_ACTREE_PROPERTIES = (
288 |     "focusable",
289 |     "editable",
290 |     "readonly",
291 |     "level",
292 |     "settable",
293 |     "multiline",
294 |     "invalid",
295 | )
296 | 


--------------------------------------------------------------------------------
/webarena/browser_env/env_config.py:
--------------------------------------------------------------------------------
 1 | # websites domain
 2 | import os
 3 | 
 4 | REDDIT = os.environ.get("REDDIT", "")
 5 | SHOPPING = os.environ.get("SHOPPING", "")
 6 | SHOPPING_ADMIN = os.environ.get("SHOPPING_ADMIN", "")
 7 | GITLAB = os.environ.get("GITLAB", "")
 8 | WIKIPEDIA = os.environ.get("WIKIPEDIA", "")
 9 | MAP = os.environ.get("MAP", "")
10 | HOMEPAGE = os.environ.get("HOMEPAGE", "")
11 | 
12 | assert (
13 |     REDDIT
14 |     and SHOPPING
15 |     and SHOPPING_ADMIN
16 |     and GITLAB
17 |     and WIKIPEDIA
18 |     and MAP
19 |     and HOMEPAGE
20 | ), (
21 |     f"Please setup the URLs to each site. Current: \n"
22 |     + f"Reddit: {REDDIT}\n"
23 |     + f"Shopping: {SHOPPING}\n"
24 |     + f"Shopping Admin: {SHOPPING_ADMIN}\n"
25 |     + f"Gitlab: {GITLAB}\n"
26 |     + f"Wikipedia: {WIKIPEDIA}\n"
27 |     + f"Map: {MAP}\n"
28 |     + f"Homepage: {HOMEPAGE}\n"
29 | )
30 | 
31 | 
32 | ACCOUNTS = {
33 |     "reddit": {"username": "MarvelsGrantMan136", "password": "test1234"},
34 |     "gitlab": {"username": "byteblaze", "password": "hello1234"},
35 |     "shopping": {
36 |         "username": "emma.lopez@gmail.com",
37 |         "password": "Password.123",
38 |     },
39 |     "shopping_admin": {"username": "admin", "password": "admin1234"},
40 |     "shopping_site_admin": {"username": "admin", "password": "admin1234"},
41 | }
42 | 
43 | URL_MAPPINGS = {
44 |     REDDIT: "http://reddit.com",
45 |     SHOPPING: "http://onestopmarket.com",
46 |     SHOPPING_ADMIN: "http://luma.com/admin",
47 |     GITLAB: "http://gitlab.com",
48 |     WIKIPEDIA: "http://wikipedia.org",
49 |     MAP: "http://openstreetmap.org",
50 |     HOMEPAGE: "http://homepage.com",
51 | }
52 | 


--------------------------------------------------------------------------------
/webarena/browser_env/html_tools/__init__.py:
--------------------------------------------------------------------------------
1 | from .identifier import IdentifierTool
2 | from .prompt import HtmlPrompt
3 | from .html_parser import HtmlParser
4 | 
5 | from .utils import print_html_object
6 | from .configs import basic_attrs, mind2web_keep_attrs


--------------------------------------------------------------------------------
/webarena/browser_env/html_tools/configs/__init__.py:
--------------------------------------------------------------------------------
1 | from .html_prompt import prompts
2 | from .config import basic_attrs, mind2web_keep_attrs, miniwob_attrs
3 | from .config import config_meta


--------------------------------------------------------------------------------
/webarena/browser_env/html_tools/configs/config.py:
--------------------------------------------------------------------------------
 1 | basic_attrs = [
 2 |     'title',
 3 |     'value',
 4 |     'placeholder',
 5 |     'selected',
 6 | ]
 7 |     
 8 | mind2web_keep_attrs = [
 9 |     'alt',
10 |     'aria_description',
11 |     'aria_label',
12 |     'aria_role',
13 |     'input_checked',
14 |     'input_value',
15 |     'label',
16 |     'name',
17 |     'option_selected',
18 |     'placeholder',
19 |     'role',
20 |     'text_value',
21 |     'title',
22 |     'type',
23 |     'value',
24 | ]
25 | 
26 | miniwob_attrs = [
27 |     'id',
28 |     'type',
29 |     'value',
30 | ]
31 | 
32 | config_meta = """
33 | ======= Configs =======
34 | Columns:
35 |   - id:        {id_attr}
36 |   - label:     {label_attr}
37 | Position:      {use_position}
38 |   - window:    {window_size}
39 |   - rect_dict: {rect}
40 | Keep:
41 |   - parents:   {parent_chain}
42 |   - attrs:     {keep_attrs}
43 |   - elems:     {keep_elem}
44 |   - obs_elem:  {obs_elem}
45 | Generator:
46 |   - prompt:    {prompt_name}
47 |   - label:     {identifier_name}
48 | ========================
49 | """


--------------------------------------------------------------------------------
/webarena/browser_env/html_tools/configs/html_prompt.py:
--------------------------------------------------------------------------------
 1 | refine_prompt = {
 2 |     'dom': '<{tag}{label}|{attr}{content}{subtree} >',
 3 |     'label': '[{label}]',
 4 |     'attr': '{attr}',
 5 |     'attr_splitter': '; ',
 6 |     'subtree_splitter': ' ',
 7 | }
 8 | 
 9 | xml_prompt = {
10 |     'dom': '<{tag}{label}{attr}>{content}{subtree} </{tag}>',
11 |     'label': ' id="{label}"',
12 |     'attr': '{key}="{attr}"',
13 |     'attr_splitter': ' ',
14 |     'subtree_splitter': ' ',
15 | }
16 | 
17 | prompts = {
18 |     'refine': refine_prompt,
19 |     'xml': xml_prompt,
20 |     'new_data': refine_prompt, 
21 | }
22 |     


--------------------------------------------------------------------------------
/webarena/browser_env/html_tools/identifier.py:
--------------------------------------------------------------------------------
 1 | import secrets
 2 | 
 3 | class IdentifierTool:
 4 |     def __init__(self, method: str='order', existing_labels: dict[str]={}) -> None:
 5 |         self.methods = {
 6 |             'order': self.get_identifier_in_order,
 7 |             'random': self.get_random_identifier,
 8 |         }
 9 |         
10 |         if method is None:
11 |             method = 'order'
12 |             
13 |         self.func = self.methods.get(method, None)
14 |         self.name = method
15 |         if self.func is None:
16 |             raise ValueError(f'Invalid method for identifier: {method}')
17 |         
18 |         self.reset(existing_labels)
19 |     
20 |     def reset(self, exists: dict[str]={}) -> None:
21 |         self.identifier = -1
22 |         self.exists = {} if exists is None else exists
23 |         
24 |     def get_identifier_in_order(self) -> str:
25 |         def id2str(id: int) -> str:
26 |             if id < 26:
27 |                 return chr(id + 65)
28 |             id -= 26
29 |             c0 = id // 676
30 |             c1 = (id // 26) % 26
31 |             c2 = id % 26
32 |             label = f'{chr(c1 + 65)}{chr(c2 + 65)}'
33 |             return label if c0 == 0 else f'{chr(c0 + 64)}{label}'
34 |         
35 |         self.identifier += 1
36 |         label = id2str(self.identifier)
37 |         
38 |         while label in self.exists:
39 |             self.identifier += 1
40 |             label = id2str(self.identifier)
41 |         
42 |         self.exists[label] = True
43 |         return label
44 |     
45 |     def get_random_identifier(self) -> str:
46 |         secret_generator = secrets.SystemRandom()
47 |         
48 |         def get_random_label(n: int=2) -> str:
49 |             tmp = ''
50 |             for _ in range(n):
51 |                 tmp += chr(secret_generator.randint(65, 90))
52 |             return tmp
53 | 
54 |         wc = 3 if len(self.exists) > 280 else 2
55 | 
56 |         label = get_random_label(wc)
57 |         while label in self.exists:
58 |             label = get_random_label(wc)
59 |         
60 |         self.exists[label] = True
61 |         return label
62 |         
63 |     def generate(self):
64 |         return self.func()


--------------------------------------------------------------------------------
/webarena/browser_env/html_tools/prompt.py:
--------------------------------------------------------------------------------
 1 | from .configs import prompts
 2 | 
 3 | class HtmlPrompt:    
 4 |     def __init__(self, prompt: str='') -> None:
 5 |         prompt = self.extract(prompt, 'xml')
 6 |         if prompt not in prompts:
 7 |             raise Exception('Unknown prompt: ' + prompt)
 8 |         
 9 |         constructors = {
10 |             'refine': self.normal_prompt_constructor,
11 |             'xml': self.normal_prompt_constructor,
12 |             'new_data': self.new_data_prompt_constructor,
13 |         }
14 | 
15 |         self.name = prompt
16 |         self.prompt = prompts[prompt]
17 |         self.constructor = constructors[prompt]
18 | 
19 |     @staticmethod
20 |     def extract(data, default=''):
21 |         return data if data is not None else default
22 |     
23 |     def subtree_constructor(self, subtree: list[str]=[]) -> str:
24 |         return self.prompt['subtree_splitter'].join(subtree)
25 | 
26 |     def normal_prompt_constructor(self, tag: str='', label: str='', content: str='', subtree_str: str='', class_dict: dict[str]={}) -> str:
27 |         def add_prefix(data, prefix):
28 |             return prefix + data if len(data) > 0 else ''
29 |         
30 |         tag = self.extract(tag)
31 |         label = self.extract(label)
32 |         content = self.extract(content)
33 |         subtree_str = self.extract(subtree_str, '')
34 |         class_dict = self.extract(class_dict, {})
35 |         
36 |         label_str = ''
37 |         if len(label) > 0:
38 |             label_str = self.prompt['label'].format(label=label)
39 |         
40 |         classes = []
41 |         values = set()
42 |         for key, val in class_dict.items():
43 |             if val in values:
44 |                 continue
45 |             values.add(val)
46 |             classes.append(self.prompt['attr'].format(key=key, attr=val))
47 |         classes_str = self.prompt['attr_splitter'].join(classes)
48 |         
49 |         content_splitter = ' ' if len(classes_str) == 0 else self.prompt['attr_splitter']
50 |         classes_str = add_prefix(classes_str, ' ')
51 |         content_str = add_prefix(content, content_splitter)
52 |         subtree_str = add_prefix(subtree_str, ' ')
53 | 
54 |         return self.prompt['dom'].format(tag=tag, label=label_str, attr=classes_str, content=content_str, subtree=subtree_str)
55 |     
56 |     def new_data_prompt_constructor(self, tag: str='', label: str='', content: str='', subtree_str: str='', class_dict: dict[str]={}) -> str:
57 |         def add_prefix(data, prefix):
58 |             return prefix + data if len(data) > 0 else ''
59 |         
60 |         tag = self.extract(tag)
61 |         label = self.extract(label)
62 |         content = self.extract(content)
63 |         subtree_str = self.extract(subtree_str, '')
64 |         class_dict = self.extract(class_dict, {})
65 |         
66 |         label_str = ''
67 |         if len(label) > 0:
68 |             label_str = self.prompt['label'].format(label=label)
69 |         
70 |         classes = []
71 |         values = set()
72 |         
73 |         message = []
74 |         for key, val in class_dict.items():
75 |             if val == '':
76 |                 message.append(key)
77 |                 continue
78 |             if val in values:
79 |                 continue
80 |             values.add(val)
81 |             classes.append(self.prompt['attr'].format(key=key, attr=val))
82 |         
83 |         if len(message) > 0:
84 |             message_str = ' '.join(message)
85 |             classes.append(self.prompt['attr'].format(key='message', attr=message_str))
86 |             
87 |         classes_str = self.prompt['attr_splitter'].join(classes)
88 |         
89 |         content_splitter = ' ' if len(classes_str) == 0 else self.prompt['attr_splitter']
90 |         classes_str = add_prefix(classes_str, ' ')
91 |         content_str = add_prefix(content, content_splitter)
92 |         subtree_str = add_prefix(subtree_str, ' ')
93 | 
94 |         return self.prompt['dom'].format(tag=tag, label=label_str, attr=classes_str, content=content_str, subtree=subtree_str)
95 | 
96 |     def prompt_constructor(self, tag: str='', label: str='', content: str='', subtree_str: str='', class_dict: dict[str]={}) -> str:
97 |         return self.constructor(tag, label, content, subtree_str, class_dict)


--------------------------------------------------------------------------------
/webarena/browser_env/html_tools/utils.py:
--------------------------------------------------------------------------------
  1 | from lxml import html
  2 | def get_xpath_top_down(element: html.HtmlElement, id_column: str='temp_id', label_column: str='temp_clickable_label', path: str='', order: int=0, 
  3 |                         in_svg: bool=False, temp_id: int=0) -> tuple[int, dict[str, str], dict[str]]:
  4 |     used_labels, i2xpath = {}, {}
  5 |     # path
  6 |     tag = element.tag.lower()
  7 |     in_svg = in_svg or (tag == 'svg')
  8 |     
  9 |     if not in_svg and 'id' in element.attrib:
 10 |         node_id = element.attrib['id']
 11 |         path = f'//*[@id="{node_id}"]'
 12 |     else:
 13 |         suffix = f'[{order}]' if order > 0 else ''
 14 |         prefix = f'*[name()="{tag}"]' if in_svg else tag
 15 |         path = path + '/' + prefix + suffix
 16 |     
 17 |     # add temp id
 18 |     element.attrib[id_column] = str(temp_id)
 19 |     ori_label = element.attrib.get(label_column, '')
 20 |     if ori_label != '':
 21 |         used_labels[ori_label] = True
 22 |     
 23 |     bid = str(temp_id)
 24 |     i2xpath[bid] = path
 25 |     i2xpath[path] = bid
 26 |     i2xpath[f'xpath/{path}'] = bid
 27 |     i2xpath[f'xpath=/{path}'] = bid
 28 |     
 29 |     temp_id += 1
 30 |     
 31 |     # traverse node
 32 |     children = element.getchildren()
 33 |     tag_dict = {}
 34 |     id_list = []
 35 |     for child in children:
 36 |         ctag = child.tag.lower()
 37 |         if ctag not in tag_dict:
 38 |             tag_dict[ctag] = 0
 39 |         tag_dict[ctag] += 1
 40 |         id_list.append(tag_dict[ctag])
 41 |     
 42 |     for cid, child in zip(id_list, children):
 43 |         ctag = child.tag.lower()
 44 |         cod = cid if tag_dict[ctag] > 1 else 0
 45 |         temp_id, i2x, ulabels = get_xpath_top_down(child, id_column, label_column, path, cod, in_svg, temp_id)
 46 |         i2xpath.update(i2x)
 47 |         used_labels.update(ulabels)
 48 |     
 49 |     return temp_id, i2xpath, used_labels
 50 |         
 51 | def print_html_object(obj: str='') -> str:
 52 |     tab_cnt = 0
 53 |     result, content, sep = '', '', ''
 54 |     last_is_left, last_is_right = False, False
 55 |     for ch in obj:
 56 |         if ch == '<':
 57 |             result += '\n'
 58 |             if len(content.strip()) > 0:
 59 |                 result += sep + content.strip() + '\n'
 60 |             result += sep + '<'
 61 |             
 62 |             tab_cnt += 1
 63 |             sep = '  ' * tab_cnt
 64 |             
 65 |             content = ''
 66 |             last_is_right = False
 67 |             last_is_left = True
 68 |         elif ch == '>':
 69 |             if last_is_left:
 70 |                 result += content
 71 |             else:
 72 |                 if last_is_right:
 73 |                     result += '\n'
 74 |                 if len(content.strip()) > 0:
 75 |                     result += sep + content.strip() + '\n'
 76 |             
 77 |             tab_cnt -= 1
 78 |             sep = '  ' * tab_cnt
 79 |             
 80 |             if not last_is_left:
 81 |                 result += sep
 82 |             
 83 |             result += '>'
 84 |             content = ''
 85 |             
 86 |             last_is_right = True
 87 |             last_is_left = False
 88 |         else:
 89 |             content += ch
 90 |     
 91 |     return result
 92 | 
 93 | def rect2tuple(rect: str) -> tuple[int, int, int, int]:
 94 |     if rect is None or type(rect) != type('str'):
 95 |         return None
 96 |     rect = rect.strip()
 97 |     if rect.count(',') != 3:
 98 |         return None
 99 |     rect = rect.split(',')
100 |     rect = [float(r) for r in rect]
101 |     return tuple(rect)


--------------------------------------------------------------------------------
/webarena/browser_env/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AutoWebGLM/eb8524e55cc3fac44625496e895e4044cea81fba/webarena/browser_env/py.typed


--------------------------------------------------------------------------------
/webarena/browser_env/scripts/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | rootdir = Path(__file__).parent
 4 | 
 5 | # marker, gpt-4v-act style
 6 | with open(os.path.join(rootdir, 'local_marker.js'), 'r') as f:
 7 |     local_marker_script = f.read()
 8 |     
 9 | with open(os.path.join(rootdir, 'mix_marker.js'), 'r') as f:
10 |     mix_marker_script = f.read()
11 | 
12 | with open(os.path.join(rootdir, 'get_data.js'), 'r') as f:
13 |     get_rect_script = f.read()
14 | 
15 | # canva handler
16 | with open(os.path.join(rootdir, 'canva_handler.js'), 'r') as f:
17 |     canva_handler_script = f.read()
18 | 
19 | # draw label on page
20 | with open(os.path.join(rootdir, 'label_marker.js'), 'r') as f:
21 |     label_marker_script = f.read()
22 |     
23 | # get text from page
24 | with open(os.path.join(rootdir, 'get_text.js'), 'r') as f:
25 |     get_text_script = f.read()
26 | 
27 | # remove label draw on page
28 | remove_label_mark_script = """
29 |     () => {
30 |         document.querySelectorAll(".our-dom-marker").forEach(item => {
31 |             document.body.removeChild(item);
32 |         });
33 |     }
34 | """
35 | 
36 | remove_id_script = """
37 |     () => {
38 |         Array.from(document.getElementsByClassName('possible-clickable-element')).forEach((element) => {
39 |             element.classList.remove('possible-clickable-element');
40 |             element.removeAttribute('data-testid');
41 |         });
42 |     }
43 | """
44 | 


--------------------------------------------------------------------------------
/webarena/browser_env/scripts/canva_handler.js:
--------------------------------------------------------------------------------
 1 | () => {
 2 |     var items = Array.prototype.slice.call(
 3 |         document.querySelectorAll("canvas")
 4 |     );
 5 | 
 6 |     var vw = Math.max(document.documentElement.clientWidth || 0, window.innerWidth || 0);
 7 |     var vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0);
 8 |     
 9 |     items = items.map(element => {
10 |         // var img = element.toDataURL("image/png");
11 |         var bb = element.getClientRects();
12 |         var rect = {
13 |             left: -1,
14 |             top: -1,
15 |             right: -1,
16 |             bottom: -1,
17 |             width: 0,
18 |             height: 0,
19 |         };
20 |         if (bb.length > 0) {
21 |             bb = bb[0];
22 |             rect = {
23 |                 left: Math.max(0, bb.left),
24 |                 top: Math.max(0, bb.top),
25 |                 right: Math.min(vw, bb.right),
26 |                 bottom: Math.min(vh, bb.bottom)
27 |             };
28 |             rect = {
29 |                 ...rect,
30 |                 width: rect.right - rect.left,
31 |                 height: rect.bottom - rect.top
32 |             };
33 |         }
34 |         
35 |         return {
36 |             rects: rect,
37 |             tag: element.tagName.toLowerCase?.() || "",
38 |             text: element.textContent.trim().replace(/\s{2,}/g, ' '),
39 |             // img: img
40 |         };
41 |     });
42 | 
43 |     return items;
44 | }


--------------------------------------------------------------------------------
/webarena/browser_env/scripts/get_data.js:
--------------------------------------------------------------------------------
 1 | (packet) => {
 2 |     function int2str(index) {
 3 |         var str = "";
 4 |         while (index >= 0) {
 5 |             str = String.fromCharCode(65 + index % 26) + str;
 6 |             index = Math.floor(index / 26) - 1;
 7 |         }
 8 |         return str;
 9 |     };
10 | 
11 |     selector = packet.selector
12 |     index = packet.startIndex
13 |     var items = Array.prototype.slice.call(
14 |         document.querySelectorAll(selector)
15 |     );
16 | 
17 |     var vw = Math.max(document.documentElement.clientWidth || 0, window.innerWidth || 0);
18 |     var vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0);
19 |     
20 |     items = items.filter(
21 |         x => !items.some(y => x.contains(y) && !(x == y))
22 |     ).map(element => {
23 |         var bb = element.getClientRects();
24 |         var rect = {
25 |             left: 0,
26 |             top: 0,
27 |             right: 0,
28 |             bottom: 0,
29 |             width: 0,
30 |             height: 0
31 |         };
32 |         var keep = false;
33 |         var text = "", id = -1;
34 |         if (bb.length > 0) {
35 |             bb = bb[0];
36 |             rect = {
37 |                 left: Math.max(0, bb.left),
38 |                 top: Math.max(0, bb.top),
39 |                 right: Math.min(vw, bb.right),
40 |                 bottom: Math.min(vh, bb.bottom)
41 |             };
42 |             rect = {
43 |                 ...rect,
44 |                 width: rect.right - rect.left,
45 |                 height: rect.bottom - rect.top
46 |             };
47 |             if (rect.width > 0 || rect.height > 0) {
48 |                 keep = true;
49 |                 if (index >= 0) { 
50 |                     id = int2str(index++);
51 |                     element.setAttribute("data-testid", id);
52 |                 }
53 |                 var childNodes = element.childNodes;
54 |                 
55 |                 for (var i = 0; i < childNodes.length; i++) {
56 |                     if (childNodes[i].nodeType == Node.TEXT_NODE) {
57 |                         text += childNodes[i].textContent;
58 |                     }
59 |                 }
60 |             }
61 |         }
62 |         
63 |         return {
64 |             keep: true,
65 |             id,
66 |             rects: rect,
67 |             tag: element.tagName.toLowerCase?.() || "",
68 |             text,//: element.innerText?.trim().replace(/\s{2,}/g, " ") || ""
69 |         };
70 |     }).filter(x => x.keep);
71 | 
72 |     return [items, index];
73 | }


--------------------------------------------------------------------------------
/webarena/browser_env/scripts/get_text.js:
--------------------------------------------------------------------------------
 1 | () => {
 2 |     var items = Array.prototype.slice.call(
 3 |         document.querySelectorAll("*")
 4 |     );
 5 | 
 6 |     var vw = Math.max(document.documentElement.clientWidth || 0, window.innerWidth || 0);
 7 |     var vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0);
 8 |     const ignoreTags = ["script", "html"];
 9 |     items = items.map(element => {
10 |         const tag = element.tagName.toLowerCase?.() || "";
11 |         var bb = element.getClientRects();
12 |         var keep = false;
13 |         var text = '';
14 | 
15 |         const domId = element.getAttribute('data-testid');
16 |         var id = domId? parseInt(domId): "-";
17 | 
18 |         if (bb.length > 0) {
19 |             bb = bb[0];
20 |             var width = Math.min(vw, bb.right) - Math.max(0, bb.left);
21 |             var height = Math.min(vh, bb.bottom) - Math.max(0, bb.top);
22 | 
23 |             if (width > 0 && height > 0) {
24 |                 keep = true;
25 |                 var childNodes = element.childNodes;
26 |                 
27 |                 for (var i = 0; i < childNodes.length; i++) {
28 |                     if (childNodes[i].nodeType == Node.TEXT_NODE) {
29 |                         text += childNodes[i].textContent;
30 |                     }
31 |                 }
32 |             }
33 |         }
34 |         
35 |         text = text.trim().replace(/\s{2,}/g, ' ');
36 |         if (ignoreTags.includes(tag)) keep = false;
37 |         if (id == "-" && text.length == 0) keep = false;
38 |         
39 |         return {
40 |             keep,
41 |             tag,
42 |             id,
43 |             text, //:element.innerText?.trim().replace(/\s{2,}/g, " ") || ""
44 |         };
45 |     }).filter(x => x.keep);
46 |  
47 |     return items;
48 | }


--------------------------------------------------------------------------------
/webarena/browser_env/scripts/label_marker.js:
--------------------------------------------------------------------------------
 1 | (items) => {
 2 |     function getRandomColor() {
 3 |         var letters = '0123456789ABCDEF';
 4 |         var color = '#';
 5 |         for (var i = 0; i < 6; i++) {
 6 |             color += letters[Math.floor(Math.random() * 16)];
 7 |         }
 8 |         return color;
 9 |     }
10 | 
11 |     items.filter(
12 |         item => item.id != ""
13 |     ).forEach((item) => {
14 |         const bbox = item.rects;
15 |         const id_string = `dom-marker-id-${index}`;
16 |         
17 |         index = item.id;
18 | 
19 |         outerElement = document.createElement("div");
20 |         outerElement.classList.add("our-dom-marker");
21 |         // var borderColor = getRandomColor();
22 |         var borderColor = "#FFFF00";
23 |         outerElement.style.outline = `2px dashed ${borderColor}`; 
24 |         outerElement.style.position = "fixed";
25 |         outerElement.style.left = bbox.left - 2 + "px";
26 |         outerElement.style.top = bbox.top - 2 + "px";
27 |         outerElement.style.width = bbox.width + 4 + "px";
28 |         outerElement.style.height = bbox.height + 4 + "px";
29 |         outerElement.style.pointerEvents = "none";
30 |         outerElement.style.boxSizing = "border-box";
31 |         outerElement.style.zIndex = 2147483647;
32 | 
33 |         innerElement = document.createElement("div");
34 |         innerElement.classList.add("our-dom-marker");
35 |         innerElement.style.outline = `2px dashed #222288`;
36 |         innerElement.style.position = "fixed";
37 |         innerElement.style.left = bbox.left + "px";
38 |         innerElement.style.top = bbox.top + "px";
39 |         innerElement.style.width = bbox.width + "px";
40 |         innerElement.style.height = bbox.height + "px";
41 |         innerElement.style.pointerEvents = "none";
42 |         innerElement.style.boxSizing = "border-box";
43 |         innerElement.style.zIndex = 2147483647;
44 |     
45 |         // Add floating label at the corner
46 |         var label = document.createElement("span");
47 |         var topPosition = 25;
48 |         if (bbox.top < 25) topPosition = bbox.top;
49 |         label.textContent = index;
50 |         label.style.position = "absolute";
51 |         label.style.top = `-${topPosition}px`;
52 |         label.style.left = "0px";
53 |         label.style.background = borderColor;
54 |         label.style.color = "black";
55 |         label.style.padding = "2px 4px";
56 |         label.style.fontSize = "16px";
57 |         label.style.borderRadius = "2px";
58 |         label.style.fontWeight = "bold";
59 |         outerElement.appendChild(label);
60 |     
61 |         document.body.appendChild(outerElement);
62 |         document.body.appendChild(innerElement);
63 |     })
64 |     return items;
65 | }


--------------------------------------------------------------------------------
/webarena/browser_env/scripts/local_marker.js:
--------------------------------------------------------------------------------
 1 | () => {
 2 |     var items = Array.prototype.slice.call(
 3 |         document.querySelectorAll('*')
 4 |     ).map((element) => {
 5 |         var vw = Math.max(document.documentElement.clientWidth || 0, window.innerWidth || 0);
 6 |         var vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0);
 7 | 
 8 |         var rects = [...element.getClientRects()].filter(bb => {
 9 |             var center_x = bb.left + bb.width / 2;
10 |             var center_y = bb.top + bb.height / 2;
11 |             var elAtCenter = document.elementFromPoint(center_x, center_y);
12 | 
13 |             return elAtCenter === element || element.contains(elAtCenter) 
14 |         }).map(bb => {
15 |             const rect = {
16 |                 left: Math.max(0, bb.left),
17 |                 top: Math.max(0, bb.top),
18 |                 right: Math.min(vw, bb.right),
19 |                 bottom: Math.min(vh, bb.bottom)
20 |             };
21 |             return {
22 |                 ...rect,
23 |                 width: rect.right - rect.left,
24 |                 height: rect.bottom - rect.top
25 |             }
26 |         });
27 | 
28 |         var area = rects.reduce((acc, rect) => acc + rect.width * rect.height, 0);
29 |         
30 |         return {
31 |             element: element,
32 |             include: window.getComputedStyle(element).cursor == "pointer",
33 |             area,
34 |             rects,
35 |             text: element.textContent.trim().replace(/\s{2,}/g, ' '),
36 |         };
37 |     }).filter(item =>
38 |         item.include && (item.area >= 20)
39 |     )
40 |   
41 |     items = items.filter(x => !items.some(y => x.element.contains(y.element) && !(x == y)))
42 | 
43 |     items.forEach(item => {
44 |         item.element.classList.add('possible-clickable-element');
45 |     });
46 | }


--------------------------------------------------------------------------------
/webarena/browser_env/trajectory.py:
--------------------------------------------------------------------------------
1 | from typing import Union
2 | 
3 | from .actions import Action
4 | from .utils import StateInfo
5 | 
6 | Trajectory = list[Union[StateInfo, Action]]
7 | 


--------------------------------------------------------------------------------
/webarena/browser_env/utils.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from io import BytesIO
 3 | from typing import Any, Dict, TypedDict, Union
 4 | 
 5 | import numpy as np
 6 | import numpy.typing as npt
 7 | from PIL import Image
 8 | 
 9 | 
10 | @dataclass
11 | class DetachedPage:
12 |     url: str
13 |     content: str  # html
14 | 
15 | 
16 | def png_bytes_to_numpy(png: bytes) -> npt.NDArray[np.uint8]:
17 |     """Convert png bytes to numpy array
18 | 
19 |     Example:
20 | 
21 |     >>> fig = go.Figure(go.Scatter(x=[1], y=[1]))
22 |     >>> plt.imshow(png_bytes_to_numpy(fig.to_image('png')))
23 |     """
24 |     return np.array(Image.open(BytesIO(png)))
25 | 
26 | 
27 | class AccessibilityTreeNode(TypedDict):
28 |     nodeId: str
29 |     ignored: bool
30 |     role: dict[str, Any]
31 |     chromeRole: dict[str, Any]
32 |     name: dict[str, Any]
33 |     properties: list[dict[str, Any]]
34 |     childIds: list[str]
35 |     parentId: str
36 |     backendDOMNodeId: str
37 |     frameId: str
38 |     bound: list[float] | None
39 |     union_bound: list[float] | None
40 |     offsetrect_bound: list[float] | None
41 | 
42 | 
43 | class DOMNode(TypedDict):
44 |     nodeId: str
45 |     nodeType: str
46 |     nodeName: str
47 |     nodeValue: str
48 |     attributes: str
49 |     backendNodeId: str
50 |     parentId: str
51 |     childIds: list[str]
52 |     cursor: int
53 |     union_bound: list[float] | None
54 | 
55 | 
56 | class BrowserConfig(TypedDict):
57 |     win_top_bound: float
58 |     win_left_bound: float
59 |     win_width: float
60 |     win_height: float
61 |     win_right_bound: float
62 |     win_lower_bound: float
63 |     device_pixel_ratio: float
64 | 
65 | 
66 | class BrowserInfo(TypedDict):
67 |     DOMTree: dict[str, Any]
68 |     config: BrowserConfig
69 | 
70 | 
71 | AccessibilityTree = list[AccessibilityTreeNode]
72 | DOMTree = list[DOMNode]
73 | 
74 | 
75 | Observation = str | npt.NDArray[np.uint8]
76 | 
77 | 
78 | class StateInfo(TypedDict):
79 |     observation: dict[str, Observation]
80 |     info: Dict[str, Any]
81 | 


--------------------------------------------------------------------------------
/webarena/check_errors.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/zsh
 2 | 
 3 | result_folder=$1
 4 | cd cache/$result_folder
 5 | 
 6 | 
 7 | # check whether there is any auto-login errors
 8 | errors=$(grep -l "Creating an account has many benefits: check out faster" *.html | sort -u | grep -o '[0-9]\+')
 9 | c=$(echo $errors | wc -l)
10 | echo "Shopping total errors: $c"
11 | echo $errors | tr '\n' ','
12 | echo '\n\n'
13 | 
14 | 
15 | errors=$(grep -l "Welcome, please sign in" *.html | sort -u | grep -o '[0-9]\+')
16 | c=$(echo $errors | wc -l)
17 | echo "Admin total errors: $c"
18 | echo $errors | tr '\n' ','
19 | echo '\n\n'
20 | 
21 | 
22 | 
23 | errors=$(grep -l "Username or email" *.html | sort -u | grep -o '[0-9]\+')
24 | c=$(echo $errors | wc -l)
25 | echo "Gitlab errors: $c"
26 | echo $errors | tr '\n' ','
27 | echo '\n\n'
28 | 
29 | 
30 | errors=$(grep -l "Keep me logged in" *.html | sort -u | grep -o '[0-9]\+')
31 | c=$(echo $errors | wc -l)
32 | echo "Reddit errors: $c"
33 | echo $errors | tr '\n' ','
34 | 


--------------------------------------------------------------------------------
/webarena/config_files/examples/1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sites": ["reddit"],
 3 |     "task_id": 1,
 4 |     "require_login": true,
 5 |     "storage_state": "./.auth/reddit_state.json",
 6 |     "start_url": "http://metis.lti.cs.cmu.edu:9999/",
 7 |     "geolocation": null,
 8 |     "intent_template": "tell me all subreddits starting with character '{{character}}'",
 9 |     "instantiation_dict": {"character": "a"},
10 |     "intent": "tell me all subreddits starting with character 'a'",
11 |     "require_reset": false,
12 |     "eval": {
13 |         "eval_types": ["string_match"],
14 |         "reference_answers": ["announcements Art AskReddit askscience aww"],
15 |         "reference_url": "",
16 |         "program_html": [
17 |             {
18 |                 "url": "",
19 |                 "required_contents": []
20 |             }
21 |         ]
22 |     },
23 |     "reference_action_sequence": {
24 |         "action_set_tag": "playwright",
25 |         "action_sequence": [
26 |             "page.get_by_role(\"link\", name=\"Forums\").click()",
27 |             "page.get_by_role(\"link\", name=\"Alphabetical\").click()",
28 |             "page.stop(\"announcements Art AskReddit askscience aww\")"
29 |         ]
30 |     }
31 | }
32 | 


--------------------------------------------------------------------------------
/webarena/config_files/examples/2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sites": ["misc"],
 3 |     "task_id": 2,
 4 |     "require_login": false,
 5 |     "storage_state": null,
 6 |     "start_url": "https://russmaxdesign.github.io/exercise",
 7 |     "geolocation": null,
 8 |     "intent_template": "",
 9 |     "instantiation_dict": {},
10 |     "intent": "Check out the classification section",
11 |     "require_reset": false,
12 |     "eval": {
13 |         "eval_types": ["url_match"],
14 |         "reference_answers": null,
15 |         "reference_url": "https://russmaxdesign.github.io/exercise/#link-two",
16 |         "program_html": [
17 |             {
18 |                 "url": "",
19 |                 "required_contents": []
20 |             }
21 |         ]
22 |     },
23 |     "reference_action_sequence": {
24 |         "action_set_tag": "playwright",
25 |         "action_sequence": [
26 |             "page.get_by_role(\"navigation\").get_by_role(\"link\", name=\"Classification\").click()",
27 |             "page.stop(\"Wilson and Reade\")"
28 |         ]
29 |     }
30 | }
31 | 


--------------------------------------------------------------------------------
/webarena/config_files/examples/3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sites": ["misc"],
 3 |     "task_id": 3,
 4 |     "require_login": false,
 5 |     "storage_state": null,
 6 |     "start_url": "https://russmaxdesign.github.io/exercise",
 7 |     "geolocation": null,
 8 |     "intent_template": "",
 9 |     "instantiation_dict": {},
10 |     "intent": "Tell me who provide a collection of concise, detailed information for mammal classification in 2005",
11 |     "require_reset": false,
12 |     "eval": {
13 |         "eval_types": ["string_match"],
14 |         "reference_answers": ["Wilson and Reader"],
15 |         "reference_url": "",
16 |         "program_html": [
17 |             {
18 |                 "url": "",
19 |                 "required_contents": []
20 |             }
21 |         ]
22 |     },
23 |     "reference_action_sequence": {
24 |         "action_set_tag": "id_accessibility_tree",
25 |         "action_sequence": [
26 |             "type [13] [xyz@gmail.com] [0]",
27 |             "click [65]",
28 |             "stop [Wilson and Reader]"
29 |         ]
30 |     }
31 | }
32 | 


--------------------------------------------------------------------------------
/webarena/config_files/examples/4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sites": ["reddit"],
 3 |     "task_id": 4,
 4 |     "require_login": true,
 5 |     "storage_state": "./.auth/reddit_state.json",
 6 |     "start_url": "http://metis.lti.cs.cmu.edu:9999/",
 7 |     "geolocation": null,
 8 |     "intent_template": "list all subreddits in alphabetical order",
 9 |     "instantiation_dict": {},
10 |     "intent": "list all subreddits in alphabetical order",
11 |     "require_reset": false,
12 |     "eval": {
13 |         "eval_types": ["url_match"],
14 |         "reference_answers": null,
15 |         "reference_url": "http://metis.lti.cs.cmu.edu:9999/forums/all",
16 |         "program_html": [
17 |             {
18 |                 "url": "",
19 |                 "required_contents": []
20 |             }
21 |         ]
22 |     },
23 |     "reference_action_sequence": {
24 |         "action_set_tag": "playwright",
25 |         "action_sequence": [
26 |             "page.get_by_role(\"link\", name=\"Forums\").click()",
27 |             "page.get_by_role(\"link\", name=\"Alphabetical\").click()",
28 |             "page.stop()"
29 |         ]
30 |     }
31 | }
32 | 


--------------------------------------------------------------------------------
/webarena/environment_docker/webarena-homepage/app.py:
--------------------------------------------------------------------------------
 1 | from flask import Flask, render_template
 2 | 
 3 | app = Flask(__name__)
 4 | 
 5 | 
 6 | @app.route("/")
 7 | def index() -> str:
 8 |     return render_template("index.html")
 9 | 
10 | 
11 | @app.route("/scratchpad.html")
12 | def scratchpad() -> str:
13 |     return render_template("scratchpad.html")
14 | 
15 | 
16 | @app.route("/calculator.html")
17 | def calculator() -> str:
18 |     return render_template("calculator.html")
19 | 
20 | 
21 | @app.route("/password.html")
22 | def password() -> str:
23 |     return render_template("password.html")
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     app.run(host="0.0.0.0", port=4399)
28 | 


--------------------------------------------------------------------------------
/webarena/environment_docker/webarena-homepage/static/figures/calculator.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AutoWebGLM/eb8524e55cc3fac44625496e895e4044cea81fba/webarena/environment_docker/webarena-homepage/static/figures/calculator.png


--------------------------------------------------------------------------------
/webarena/environment_docker/webarena-homepage/static/figures/cms.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AutoWebGLM/eb8524e55cc3fac44625496e895e4044cea81fba/webarena/environment_docker/webarena-homepage/static/figures/cms.png


--------------------------------------------------------------------------------
/webarena/environment_docker/webarena-homepage/static/figures/gitlab.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AutoWebGLM/eb8524e55cc3fac44625496e895e4044cea81fba/webarena/environment_docker/webarena-homepage/static/figures/gitlab.png


--------------------------------------------------------------------------------
/webarena/environment_docker/webarena-homepage/static/figures/manual1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AutoWebGLM/eb8524e55cc3fac44625496e895e4044cea81fba/webarena/environment_docker/webarena-homepage/static/figures/manual1.png


--------------------------------------------------------------------------------
/webarena/environment_docker/webarena-homepage/static/figures/manual2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AutoWebGLM/eb8524e55cc3fac44625496e895e4044cea81fba/webarena/environment_docker/webarena-homepage/static/figures/manual2.png


--------------------------------------------------------------------------------
/webarena/environment_docker/webarena-homepage/static/figures/map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AutoWebGLM/eb8524e55cc3fac44625496e895e4044cea81fba/webarena/environment_docker/webarena-homepage/static/figures/map.png


--------------------------------------------------------------------------------
/webarena/environment_docker/webarena-homepage/static/figures/onestopshop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AutoWebGLM/eb8524e55cc3fac44625496e895e4044cea81fba/webarena/environment_docker/webarena-homepage/static/figures/onestopshop.png


--------------------------------------------------------------------------------
/webarena/environment_docker/webarena-homepage/static/figures/password.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AutoWebGLM/eb8524e55cc3fac44625496e895e4044cea81fba/webarena/environment_docker/webarena-homepage/static/figures/password.png


--------------------------------------------------------------------------------
/webarena/environment_docker/webarena-homepage/static/figures/reddit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AutoWebGLM/eb8524e55cc3fac44625496e895e4044cea81fba/webarena/environment_docker/webarena-homepage/static/figures/reddit.png


--------------------------------------------------------------------------------
/webarena/environment_docker/webarena-homepage/static/figures/scratchpad.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AutoWebGLM/eb8524e55cc3fac44625496e895e4044cea81fba/webarena/environment_docker/webarena-homepage/static/figures/scratchpad.png


--------------------------------------------------------------------------------
/webarena/environment_docker/webarena-homepage/static/figures/wikipedia.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AutoWebGLM/eb8524e55cc3fac44625496e895e4044cea81fba/webarena/environment_docker/webarena-homepage/static/figures/wikipedia.png


--------------------------------------------------------------------------------
/webarena/environment_docker/webarena-homepage/templates/calculator.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | <head>
  4 |     <title>Calculator</title>
  5 |     <style>
  6 |         body {
  7 |             display: flex;
  8 |             justify-content: center;
  9 |             align-items: center;
 10 |             height: 100vh;
 11 |             background-color: #f2f2f2;
 12 |             font-family: Arial, sans-serif;
 13 |         }
 14 |         #calculator {
 15 |             border: 1px solid #333;
 16 |             padding: 30px;
 17 |             border-radius: 10px;
 18 |             width: 400px;
 19 |             background-color: #fff;
 20 |             box-shadow: 0px 0px 10px rgba(0,0,0,0.1);
 21 |             box-sizing: border-box;
 22 |             display: flex;
 23 |             flex-direction: column;
 24 |             align-items: flex-end;
 25 |         }
 26 |         #calculator h1 {
 27 |             text-align: center;
 28 |             margin-bottom: 20px;
 29 |             align-self: center;
 30 |         }
 31 |         #calculator p {
 32 |             text-align: center;
 33 |             margin-bottom: 20px;
 34 |             color: #666;
 35 |             align-self: center;
 36 |         }
 37 |         #inputExpression {
 38 |             width: 100%;
 39 |             padding: 15px;
 40 |             font-size: 18px;
 41 |             border-radius: 5px;
 42 |             border: 1px solid #ddd;
 43 |             box-sizing: border-box;
 44 |             margin-bottom: 10px;
 45 |         }
 46 |         #result {
 47 |             margin-top: 20px;
 48 |             font-size: 20px;
 49 |             text-align: center;
 50 |             color: #333;
 51 |             width: 100%;
 52 |             align-self: center;
 53 |         }
 54 |         button {
 55 |             padding: 10px 20px;
 56 |             margin-top: 10px;
 57 |             font-size: 18px;
 58 |             border: none;
 59 |             border-radius: 5px;
 60 |             cursor: pointer;
 61 |             align-self: flex-end;
 62 |         }
 63 |         #calculate {
 64 |             background-color: #4CAF50;
 65 |             color: white;
 66 |         }
 67 |         #clear {
 68 |             background-color: #f44336;
 69 |             color: white;
 70 |         }
 71 |     </style>
 72 | </head>
 73 | <body>
 74 |     <div id="calculator">
 75 |         <h1>Calculator</h1>
 76 |         <p>Enter the expression and get the results</p>
 77 |         <input type="text" id="inputExpression" placeholder="Enter expression" />
 78 |         <button id="calculate">=</button>
 79 |         <button id="clear">Clear</button>
 80 |         <div id="result">Result: <span id="calculationResult"></span></div>
 81 |     </div>
 82 | 
 83 |     <script>
 84 |         document.getElementById('calculate').addEventListener('click', function() {
 85 |             try {
 86 |                 const result = eval(document.getElementById('inputExpression').value);
 87 |                 if (!isNaN(result)) {
 88 |                     document.getElementById('calculationResult').textContent = result;
 89 |                 } else {
 90 |                     document.getElementById('calculationResult').textContent = 'Invalid expression';
 91 |                 }
 92 |             } catch {
 93 |                 document.getElementById('calculationResult').textContent = 'Invalid expression';
 94 |             }
 95 |         });
 96 | 
 97 |         document.getElementById('clear').addEventListener('click', function() {
 98 |             document.getElementById('inputExpression').value = '';
 99 |             document.getElementById('calculationResult').textContent = '';
100 |         });
101 | 
102 |         document.getElementById('inputExpression').addEventListener('keypress', function(e) {
103 |             if (e.key === 'Enter') {
104 |                 document.getElementById('calculate').click();
105 |             }
106 |         });
107 |     </script>
108 | </body>
109 | </html>
110 | 


--------------------------------------------------------------------------------
/webarena/environment_docker/webarena-homepage/templates/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | <head>
  4 |     <title>Homepage</title>
  5 |     <link href="https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500&display=swap" rel="stylesheet">
  6 |     <style>
  7 |         body {
  8 |             font-family: Arial, sans-serif;
  9 |             background-color: white;
 10 |             margin: 0;
 11 |             padding: 0;
 12 |             line-height: 1.6;
 13 |         }
 14 | 
 15 |         #container {
 16 |             width: 90%;
 17 |             margin: auto;
 18 |             overflow: hidden;
 19 |         }
 20 | 
 21 |         #header {
 22 |             background-color: #f4f7f9;
 23 |             color: #232f34;
 24 |             padding-top: 30px;
 25 |             min-height: 70px;
 26 |             border-bottom: #b1c1c6 3px solid;
 27 |             font-family: 'Roboto', sans-serif;
 28 |         }
 29 | 
 30 |         #header h1 {
 31 |             padding: 5px;
 32 |             margin: 0;
 33 |             text-align: center;
 34 |             font-weight: 500;
 35 |         }
 36 | 
 37 |         .card {
 38 |             border: 1px solid #ddd;
 39 |             border-radius: 5px;
 40 |             width: 200px; /* Changed width */
 41 |             height: 200px; /* Added height */
 42 |             text-align: center;
 43 |             margin: 10px;
 44 |             padding: 10px;
 45 |             box-shadow: 2px 2px 5px rgba(0,0,0,0.1);
 46 |             display: inline-block;
 47 |             overflow: auto; /* To handle content that might overflow */
 48 |         }
 49 | 
 50 |         .card img {
 51 |             width: 35%;
 52 |         }
 53 | 
 54 |         .card h2 {
 55 |             font-size: 15px;
 56 |         }
 57 | 
 58 |         .card p {
 59 |             font-size: 14px;
 60 |         }
 61 | 
 62 |         .card a {
 63 |             color: #35424a;
 64 |             text-decoration: none;
 65 |         }
 66 |     </style>
 67 | </head>
 68 | <body>
 69 |     <div id="container">
 70 |         <div id="header">
 71 |             <h1>Welcome to WebArena</h1>
 72 |         </div>
 73 | 
 74 |         <div class="card" role="region" aria-label="OneStopShop">
 75 |          <img src="{{ url_for('static', filename='figures/onestopshop.png') }}"  alt="Logo for OneStopShop">
 76 |          <a href="<your-server-hostname>:7770">
 77 |             <h2 id="appName">OneStopShop</h2>
 78 |          </a>
 79 |          <p id="appDescription">An online shopping site</p>
 80 |         </div>
 81 | 
 82 |         <div class="card" role="region" aria-label="Merchant Admin Portal">
 83 |             <img src="{{ url_for('static', filename='figures/cms.png') }}"  alt="Logo for CMS">
 84 |             <a href="<your-server-hostname>:7780/admin">
 85 |                 <h2 id="appName">Merchant Admin Portal</h2>
 86 |             </a>
 87 |             <p id="appDescription">An admin portal to manage E-commerce business (u: admin, p: admin1234)</p>
 88 |         </div>
 89 | 
 90 |         <div class="card" role="region" aria-label="Reddit">
 91 |             <img src="{{ url_for('static', filename='figures/reddit.png') }}"  alt="Logo for Reddit">
 92 |             <a href="<your-server-hostname>:9999/forums/all">
 93 |                 <h2 id="appName">Reddit</h2>
 94 |             </a>
 95 |             <p id="appDescription">A social news aggregation and discussion website</p>
 96 |         </div>
 97 | 
 98 |         <div class="card" role="region" aria-label="Gitlab">
 99 |             <img src="{{ url_for('static', filename='figures/gitlab.png') }}"  alt="Logo for Gitlab">
100 |             <a href="<your-server-hostname>:8023/explore">
101 |                 <h2 id="appName">Gitlab</h2>
102 |             </a>
103 |             <p id="appDescription">a DevOps software</p>
104 |         </div>
105 | 
106 |         <div class="card" role="region" aria-label="Map">
107 |             <img src="{{ url_for('static', filename='figures/map.png') }}"  alt="Logo for Map">
108 |             <a href="<your-server-hostname>:3000">
109 |                 <h2 id="appName">OpenStreetMap</h2>
110 |             </a>
111 |             <p id="appDescription">North east US map</p>
112 |         </div>
113 | 
114 |         <div class="card" role="region" aria-label="Calculator">
115 |             <img src="{{ url_for('static', filename='figures/calculator.png') }}"  alt="Logo for Calculator">
116 |             <a href="calculator.html">
117 |                 <h2 id="appName">Calculator</h2>
118 |             </a>
119 |             <p id="appDescription">A calculator</p>
120 |         </div>
121 | 
122 |         <div class="card" role="region" aria-label="Scratchpad">
123 |             <img src="{{ url_for('static', filename='figures/scratchpad.png') }}"  alt="Logo for Scratchpad">
124 |             <a href="scratchpad.html">
125 |                 <h2 id="appName">Scratchpad</h2>
126 |             </a>
127 |             <p id="appDescription">A scratchpad for taking notes</p>
128 |         </div>
129 | 
130 |         <div class="card" role="region" aria-label="Wikipedia">
131 |             <img src="{{ url_for('static', filename='figures/wikipedia.png') }}"  alt="Logo for Wikipedia">
132 |             <a href="<your-server-hostname>:8888/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing">
133 |                 <h2 id="appName">Wikipedia</h2>
134 |             </a>
135 |             <p id="appDescription">An online encyclopedia</p>
136 |         </div>
137 | 
138 |         <div class="card" role="region" aria-label="Gitlab Manual">
139 |             <img src="{{ url_for('static', filename='figures/manual1.png') }}"  alt="Logo for Gitlab Manual">
140 |             <a href="https://docs.gitlab.com/">
141 |                 <h2 id="appName">Gitlab Documentation</h2>
142 |             </a>
143 |             <p id="appDescription">Documentation for GitLab</p>
144 |         </div>
145 | 
146 |         <div class="card" role="region" aria-label="Admin Manual">
147 |             <img src="{{ url_for('static', filename='figures/manual2.png') }}"  alt="Logo for Admin Manual">
148 |             <a href="https://experienceleague.adobe.com/docs/commerce-admin/user-guides/home.html?lang=en">
149 |                 <h2 id="appName">Admin Portal Manual</h2>
150 |             </a>
151 |             <p id="appDescription">Manual on using the admin portal</p>
152 |         </div>
153 |         <!-- Repeat the above card structure for each app -->
154 | 
155 |     </div>
156 | </body>
157 | </html>
158 | 


--------------------------------------------------------------------------------
/webarena/environment_docker/webarena-homepage/templates/scratchpad.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 |   <meta charset="UTF-8">
  5 |   <title>Note Taking App</title>
  6 |   <style>
  7 |     body {
  8 |       display: flex;
  9 |       flex-direction: column;
 10 |       align-items: center;
 11 |       justify-content: flex-start;
 12 |       min-height: 100vh;
 13 |       margin: 0;
 14 |       font-family: Arial, sans-serif;
 15 |     }
 16 | 
 17 |     h1 {
 18 |       text-align: center;
 19 |     }
 20 | 
 21 |     #note-creation {
 22 |       display: flex;
 23 |       flex-direction: column;
 24 |       align-items: center;
 25 |       margin-bottom: 20px;
 26 |     }
 27 | 
 28 |     #note-form {
 29 |       display: flex;
 30 |       flex-direction: column;
 31 |       align-items: center;
 32 |       width: 300px;
 33 |     }
 34 | 
 35 |     #note-input {
 36 |       width: 100%;
 37 |       min-height: 300px;
 38 |       max-height: 900px;
 39 |       min-width: 600px;
 40 |       max-width: 600px;
 41 |       padding: 10px;
 42 |       box-sizing: border-box;
 43 |       border-radius: 4px;
 44 |       border: 1px solid #ddd;
 45 |       overflow-y: auto;
 46 |       resize: none;
 47 |     }
 48 | 
 49 |     #note-form button {
 50 |       padding: 10px 20px;
 51 |       margin-top: 10px;
 52 |       color: white;
 53 |       background-color: #007bff;
 54 |       border: none;
 55 |       border-radius: 4px;
 56 |       cursor: pointer;
 57 |       text-align: center;
 58 |       text-decoration: none;
 59 |     }
 60 | 
 61 |     #note-form button:hover {
 62 |       background-color: #0056b3;
 63 |     }
 64 | 
 65 |     #notes-display {
 66 |       display: flex;
 67 |       flex-direction: column;
 68 |       align-items: left;
 69 |       width: 600px;
 70 |     }
 71 | 
 72 |     .note {
 73 |       margin: 1em 0;
 74 |       padding: 1em;
 75 |       border: 1px solid #ddd;
 76 |       border-radius: 4px;
 77 |       background: #f9f9f9;
 78 |       white-space: pre-wrap;
 79 |     }
 80 |   </style>
 81 | </head>
 82 | <body>
 83 |   <header>
 84 |     <h1>My Notes</h1>
 85 |   </header>
 86 | 
 87 |   <main>
 88 |     <section id="note-creation">
 89 |       <form id="note-form">
 90 |         <textarea id="note-input" placeholder="Type your note here..."></textarea>
 91 |         <button type="submit">Add Note</button>
 92 |       </form>
 93 |     </section>
 94 | 
 95 |     <h2>History</h2>
 96 | 
 97 |     <section id="notes-display">
 98 |       <!-- Notes will be dynamically added here -->
 99 |     </section>
100 |   </main>
101 | 
102 |   <script>
103 |     const form = document.querySelector("#note-form");
104 |     const noteInput = document.querySelector("#note-input");
105 |     const notesDisplay = document.querySelector("#notes-display");
106 | 
107 |     form.addEventListener("submit", (event) => {
108 |       event.preventDefault();
109 | 
110 |       const note = document.createElement("div");
111 |       note.classList.add("note");
112 |       note.textContent = noteInput.value;
113 | 
114 |       note.innerHTML = noteInput.value.replace(/\n/g, '<br>');
115 |       note.tabIndex = 0;
116 | 
117 |       notesDisplay.prepend(note);
118 |       noteInput.value = '';
119 |     });
120 |   </script>
121 | </body>
122 | </html>
123 | 


--------------------------------------------------------------------------------
/webarena/evaluation_harness/__init__.py:
--------------------------------------------------------------------------------
1 | from .evaluators import *
2 | from .helper_functions import (
3 |     shopping_get_latest_order_url,
4 |     shopping_get_sku_latest_review_author,
5 |     shopping_get_sku_latest_review_rating,
6 | )
7 | 


--------------------------------------------------------------------------------
/webarena/llms/__init__.py:
--------------------------------------------------------------------------------
 1 | """This module is adapt from https://github.com/zeno-ml/zeno-build"""
 2 | from .providers.hf_utils import generate_from_huggingface_completion
 3 | from .providers.ours import call_pretrain_model
 4 | from .providers.openai_utils import (
 5 |     generate_from_openai_chat_completion,
 6 |     generate_from_openai_completion,
 7 | )
 8 | from .utils import call_llm
 9 | 
10 | __all__ = [
11 |     "generate_from_openai_completion",
12 |     "generate_from_openai_chat_completion",
13 |     "generate_from_huggingface_completion",
14 |     "call_llm",
15 |     "call_pretrain_model"
16 | ]
17 | 


--------------------------------------------------------------------------------
/webarena/llms/lm_config.py:
--------------------------------------------------------------------------------
 1 | """Config for language models."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import argparse
 6 | import dataclasses
 7 | from dataclasses import dataclass
 8 | from typing import Any
 9 | 
10 | 
11 | @dataclass(frozen=True)
12 | class LMConfig:
13 |     """A config for a language model.
14 | 
15 |     Attributes:
16 |         provider: The name of the API provider.
17 |         model: The name of the model.
18 |         model_cls: The Python class corresponding to the model, mostly for
19 |              Hugging Face transformers.
20 |         tokenizer_cls: The Python class corresponding to the tokenizer, mostly
21 |             for Hugging Face transformers.
22 |         mode: The mode of the API calls, e.g., "chat" or "generation".
23 |     """
24 | 
25 |     provider: str
26 |     model: str
27 |     model_cls: type | None = None
28 |     tokenizer_cls: type | None = None
29 |     mode: str | None = None
30 |     gen_config: dict[str, Any] = dataclasses.field(default_factory=dict)
31 |     cuda: str = '0'
32 | 
33 | 
34 | def construct_llm_config(args: argparse.Namespace) -> LMConfig:
35 |     llm_config = LMConfig(
36 |         provider=args.provider, model=args.model, mode=args.mode, cuda=args.cuda
37 |     )
38 |     if args.provider == "openai":
39 |         llm_config.gen_config["temperature"] = args.temperature
40 |         llm_config.gen_config["top_p"] = args.top_p
41 |         llm_config.gen_config["context_length"] = args.context_length
42 |         llm_config.gen_config["max_tokens"] = args.max_tokens
43 |         llm_config.gen_config["stop_token"] = args.stop_token
44 |         llm_config.gen_config["max_obs_length"] = args.max_obs_length
45 |         llm_config.gen_config["max_retry"] = args.max_retry
46 |     elif args.provider == "huggingface":
47 |         llm_config.gen_config["temperature"] = args.temperature
48 |         llm_config.gen_config["top_p"] = args.top_p
49 |         llm_config.gen_config["max_new_tokens"] = args.max_tokens
50 |         llm_config.gen_config["stop_sequences"] = (
51 |             [args.stop_token] if args.stop_token else None
52 |         )
53 |         llm_config.gen_config["max_obs_length"] = args.max_obs_length
54 |         llm_config.gen_config["model_endpoint"] = args.model_endpoint
55 |         llm_config.gen_config["max_retry"] = args.max_retry
56 |     elif args.provider == "ours":
57 |         llm_config.gen_config["temperature"] = args.temperature
58 |         llm_config.gen_config["top_p"] = args.top_p
59 |         llm_config.gen_config["context_length"] = args.context_length
60 |         llm_config.gen_config["max_tokens"] = args.max_tokens
61 |         llm_config.gen_config["stop_token"] = args.stop_token
62 |         llm_config.gen_config["max_obs_length"] = args.max_obs_length
63 |         llm_config.gen_config["max_retry"] = args.max_retry
64 |         llm_config.gen_config["cuda"] = args.cuda
65 |     else:
66 |         raise NotImplementedError(f"provider {args.provider} not implemented")
67 |     return llm_config
68 | 


--------------------------------------------------------------------------------
/webarena/llms/providers/hf_utils.py:
--------------------------------------------------------------------------------
 1 | from text_generation import Client  # type: ignore
 2 | 
 3 | 
 4 | def generate_from_huggingface_completion(
 5 |     prompt: str,
 6 |     model_endpoint: str,
 7 |     temperature: float,
 8 |     top_p: float,
 9 |     max_new_tokens: int,
10 |     stop_sequences: list[str] | None = None,
11 | ) -> str:
12 |     client = Client(model_endpoint, timeout=60)
13 |     generation: str = client.generate(
14 |         prompt=prompt,
15 |         temperature=temperature,
16 |         top_p=top_p,
17 |         max_new_tokens=max_new_tokens,
18 |         stop_sequences=stop_sequences,
19 |     ).generated_text
20 | 
21 |     return generation
22 | 


--------------------------------------------------------------------------------
/webarena/llms/providers/ours.py:
--------------------------------------------------------------------------------
 1 | def call_pretrain_model(query: str, model, tokenizer, cuda, sample_times: int=1):        
 2 |     def chatglm3_base_template(query, history=None, system=None):
 3 |         prompt = f'Q: {query}\n\nA: '
 4 |         return prompt
 5 |     
 6 |     def model_chat(prompt: str):
 7 |         output, updated_history = model.chat(tokenizer, prompt, history=None)
 8 |         return output
 9 |     
10 |     def generation(prompt: str, sample_times: int=1):
11 |         input_ids = tokenizer.encode(
12 |             text=prompt,
13 |             return_tensors='pt',
14 |             max_length=8192,
15 |             truncation=False
16 |         ).to(f'cuda:{cuda}')
17 | 
18 |         if len(input_ids[0]) > 7500:
19 |             return ''
20 |         
21 |         output_ids = model.generate(
22 |             input_ids=input_ids,
23 |             max_new_tokens=1024,
24 |             do_sample=True,
25 |             top_p=0.7,
26 |             temperature=0.95,
27 |             num_return_sequences=sample_times
28 |         )
29 |         
30 |         output_text_list = []
31 |         for i in range(sample_times):
32 |             output_text = tokenizer.decode(output_ids[i], skip_special_tokens=True)
33 |             output_text = output_text.split('A: ')[-1]
34 |             output_text_list.append(output_text)
35 |         
36 |         output = output_text_list[0]
37 |         return output
38 |     
39 |     prompt = chatglm3_base_template(query)
40 |     output = generation(prompt)
41 |     # output = model_chat(prompt)
42 |     print('[Model]', output)
43 |     return output
44 | 


--------------------------------------------------------------------------------
/webarena/llms/tokenizers.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | import tiktoken
 4 | from transformers import LlamaTokenizer  # type: ignore
 5 | 
 6 | 
 7 | class Tokenizer(object):
 8 |     def __init__(self, provider: str, model_name: str) -> None:
 9 |         if provider == "openai":
10 |             self.tokenizer = tiktoken.encoding_for_model(model_name)
11 |         elif provider == "huggingface":
12 |             self.tokenizer = LlamaTokenizer.from_pretrained(model_name)
13 |             # turn off adding special tokens automatically
14 |             self.tokenizer.add_special_tokens = False  # type: ignore[attr-defined]
15 |             self.tokenizer.add_bos_token = False  # type: ignore[attr-defined]
16 |             self.tokenizer.add_eos_token = False  # type: ignore[attr-defined]
17 |         elif provider == "ours":
18 |             self.tokenizer = tiktoken.encoding_for_model("gpt-4")
19 |         else:
20 |             raise NotImplementedError
21 | 
22 |     def encode(self, text: str) -> list[int]:
23 |         return self.tokenizer.encode(text)
24 | 
25 |     def decode(self, ids: list[int]) -> str:
26 |         return self.tokenizer.decode(ids)
27 | 
28 |     def __call__(self, text: str) -> list[int]:
29 |         return self.tokenizer.encode(text)
30 | 


--------------------------------------------------------------------------------
/webarena/llms/utils.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from typing import Any
 3 | from transformers import AutoTokenizer, AutoModel
 4 | 
 5 | from llms import (
 6 |     generate_from_huggingface_completion,
 7 |     generate_from_openai_chat_completion,
 8 |     generate_from_openai_completion,
 9 |     call_pretrain_model,
10 |     lm_config,
11 | )
12 | 
13 | APIInput = str | list[Any] | dict[str, Any]
14 | 
15 | model = None
16 | tokenizer = None
17 | 
18 | def call_llm(
19 |     lm_config: lm_config.LMConfig,
20 |     prompt: APIInput,
21 | ) -> str:
22 |     global model
23 |     global tokenizer
24 |     
25 |     response: str
26 |     
27 |     if lm_config.provider == "openai":
28 |         if lm_config.mode == "chat":
29 |             assert isinstance(prompt, list)
30 |             response = generate_from_openai_chat_completion(
31 |                 messages=prompt,
32 |                 model=lm_config.model,
33 |                 temperature=lm_config.gen_config["temperature"],
34 |                 top_p=lm_config.gen_config["top_p"],
35 |                 context_length=lm_config.gen_config["context_length"],
36 |                 max_tokens=lm_config.gen_config["max_tokens"],
37 |                 stop_token=None,
38 |             )
39 |         elif lm_config.mode == "completion":
40 |             assert isinstance(prompt, str)
41 |             response = generate_from_openai_completion(
42 |                 prompt=prompt,
43 |                 engine=lm_config.model,
44 |                 temperature=lm_config.gen_config["temperature"],
45 |                 max_tokens=lm_config.gen_config["max_tokens"],
46 |                 top_p=lm_config.gen_config["top_p"],
47 |                 stop_token=lm_config.gen_config["stop_token"],
48 |             )
49 |         else:
50 |             raise ValueError(
51 |                 f"OpenAI models do not support mode {lm_config.mode}"
52 |             )
53 |     elif lm_config.provider == "huggingface":
54 |         assert isinstance(prompt, str)
55 |         response = generate_from_huggingface_completion(
56 |             prompt=prompt,
57 |             model_endpoint=lm_config.gen_config["model_endpoint"],
58 |             temperature=lm_config.gen_config["temperature"],
59 |             top_p=lm_config.gen_config["top_p"],
60 |             stop_sequences=lm_config.gen_config["stop_sequences"],
61 |             max_new_tokens=lm_config.gen_config["max_new_tokens"],
62 |         )
63 |     elif lm_config.provider == "ours":
64 |         # print(prompt)
65 |         if lm_config.model == 'manual':
66 |             response = input("Command > ")
67 |         else:
68 |             if not model:
69 |                 model = AutoModel.from_pretrained(lm_config.model, trust_remote_code=True, device=f'cuda:{lm_config.cuda}')
70 |                 tokenizer = AutoTokenizer.from_pretrained(lm_config.model, trust_remote_code=True)
71 |                 model.eval()
72 |             response = call_pretrain_model(prompt, model, tokenizer, lm_config.cuda)
73 |     else:
74 |         raise NotImplementedError(
75 |             f"Provider {lm_config.provider} not implemented"
76 |         )
77 | 
78 |     return response
79 | 


--------------------------------------------------------------------------------
/webarena/media/example_trace_viewer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AutoWebGLM/eb8524e55cc3fac44625496e895e4044cea81fba/webarena/media/example_trace_viewer.png


--------------------------------------------------------------------------------
/webarena/media/homepage_demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AutoWebGLM/eb8524e55cc3fac44625496e895e4044cea81fba/webarena/media/homepage_demo.png


--------------------------------------------------------------------------------
/webarena/media/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AutoWebGLM/eb8524e55cc3fac44625496e895e4044cea81fba/webarena/media/logo.png


--------------------------------------------------------------------------------
/webarena/media/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AutoWebGLM/eb8524e55cc3fac44625496e895e4044cea81fba/webarena/media/overview.png


--------------------------------------------------------------------------------
/webarena/media/v1_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AutoWebGLM/eb8524e55cc3fac44625496e895e4044cea81fba/webarena/media/v1_result.png


--------------------------------------------------------------------------------
/webarena/media/v2_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AutoWebGLM/eb8524e55cc3fac44625496e895e4044cea81fba/webarena/media/v2_result.png


--------------------------------------------------------------------------------
/webarena/minimal_example.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # type: ignore
  3 | 
  4 | import json
  5 | import os
  6 | import re
  7 | import subprocess
  8 | import time
  9 | 
 10 | SLEEP = 1.5
 11 | # set the URLs of each website, we use the demo sites as an example
 12 | os.environ[
 13 |     "SHOPPING"
 14 | ] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770"
 15 | os.environ[
 16 |     "SHOPPING_ADMIN"
 17 | ] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7780/admin"
 18 | os.environ[
 19 |     "REDDIT"
 20 | ] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:9999"
 21 | os.environ[
 22 |     "GITLAB"
 23 | ] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:8023"
 24 | os.environ[
 25 |     "MAP"
 26 | ] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:3000"
 27 | os.environ[
 28 |     "WIKIPEDIA"
 29 | ] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:8888/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing"
 30 | os.environ[
 31 |     "HOMEPAGE"
 32 | ] = "PASS"  # The home page is not currently hosted in the demo site
 33 | print("Done setting up URLs")
 34 | 
 35 | # First, run `python scripts/generate_test_data.py` to generate the config files
 36 | p = subprocess.run(
 37 |     ["python", "scripts/generate_test_data.py"], capture_output=True
 38 | )
 39 | 
 40 | # It will generate individual config file for each test example in config_files
 41 | assert os.path.exists("config_files/0.json")
 42 | 
 43 | # Make sure the URLs in the config files are replaced properly
 44 | with open("config_files/0.json", "r") as f:
 45 |     config = json.load(f)
 46 |     assert os.environ["SHOPPING_ADMIN"] in config["start_url"], (
 47 |         os.environ["SHOPPING_ADMIN"],
 48 |         config["start_url"],
 49 |     )
 50 | 
 51 | print("Done generating config files with the correct URLs")
 52 | 
 53 | # run bash prepare.sh to save all account cookies, this only needs to be done once
 54 | subprocess.run(["bash", "prepare.sh"])
 55 | print("Done saving account cookies")
 56 | 
 57 | # Init an environment
 58 | from browser_env import (
 59 |     Action,
 60 |     ActionTypes,
 61 |     ObservationMetadata,
 62 |     ScriptBrowserEnv,
 63 |     StateInfo,
 64 |     Trajectory,
 65 |     action2str,
 66 |     create_id_based_action,
 67 |     create_stop_action,
 68 | )
 69 | from evaluation_harness.evaluators import evaluator_router
 70 | 
 71 | # Init the environment
 72 | env = ScriptBrowserEnv(
 73 |     headless=False,
 74 |     slow_mo=100,
 75 |     observation_type="accessibility_tree",
 76 |     current_viewport_only=True,
 77 |     viewport_size={"width": 1280, "height": 720},
 78 | )
 79 | 
 80 | # example 156 as an example
 81 | config_file = "config_files/156.json"
 82 | # maintain a trajectory
 83 | trajectory: Trajectory = []
 84 | 
 85 | # set the environment for the current example
 86 | obs, info = env.reset(options={"config_file": config_file})
 87 | actree_obs = obs["text"]
 88 | print(actree_obs)
 89 | 
 90 | # You should see some output like this:
 91 | """
 92 | [4] RootWebArea 'Projects · Dashboard · GitLab' focused: True
 93 |         [12] link 'Skip to content'
 94 |         [28] link 'Dashboard'
 95 |         [2266] button '' hasPopup: menu expanded: False
 96 |         [63] textbox 'Search GitLab' required: False
 97 |         [61] generic 'Use the shortcut key <kbd>/</kbd> to start a search'
 98 |         [79] link 'Create new...'
 99 |         [95] link 'Issues'
100 |                 [97] generic '13 assigned issues'
101 |         [101] link 'Merge requests'
102 |                 [104] generic '8 merge requests'"""
103 | 
104 | # save the state info to the trajectory
105 | state_info: StateInfo = {"observation": obs, "info": info}
106 | trajectory.append(state_info)
107 | 
108 | # Now let's try to perform the action of clicking the "Merge request" link
109 | # As the element ID is dynamic each time, we use regex to match the element as the demo
110 | match = re.search(r"\[(\d+)\] link 'Merge requests'", actree_obs).group(1)
111 | # Create the action click [ELEMENT_ID]
112 | click_action = create_id_based_action(f"click [{match}]")
113 | # Add the action to the trajectory
114 | trajectory.append(click_action)
115 | 
116 | # Step and get the new observation
117 | obs, _, terminated, _, info = env.step(click_action)
118 | # New observation
119 | actree_obs = obs["text"]
120 | print(actree_obs)
121 | time.sleep(SLEEP)
122 | 
123 | state_info = {"observation": obs, "info": info}
124 | trajectory.append(state_info)
125 | 
126 | # Next click "assign to you"
127 | match = re.search(r"\[(\d+)\] link 'Assigned to you", actree_obs).group(1)
128 | click_action = create_id_based_action(f"click [{match}]")
129 | trajectory.append(click_action)
130 | 
131 | obs, _, terminated, _, info = env.step(click_action)
132 | actree_obs = obs["text"]
133 | print(actree_obs)
134 | time.sleep(SLEEP)
135 | state_info = {"observation": obs, "info": info}
136 | trajectory.append(state_info)
137 | 
138 | # add a stop action to mark the end of the trajectory
139 | trajectory.append(create_stop_action(""))
140 | 
141 | 
142 | # Demo evaluation
143 | evaluator = evaluator_router(config_file)
144 | score = evaluator(
145 |     trajectory=trajectory,
146 |     config_file=config_file,
147 |     page=env.page,
148 |     client=env.get_page_client(env.page),
149 | )
150 | 
151 | # as we manually perform the task, the task should be judged as correct
152 | assert score == 1.0
153 | 


--------------------------------------------------------------------------------
/webarena/parallel_run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # [TODO] change this
 4 | model="<model-path>"
 5 | result_dir="<result-path>"
 6 | cuda_list=(0 1 2 3)
 7 | SERVER="<host-name>"
 8 | OPENAI_API_KEY="<your-openapi-key>"
 9 | 
10 | OPENAI_ORGANIZATION=""
11 | CONDA_ENV_NAME="webarena"
12 | instruction_path="agent/prompts/jsons/new_action_prompt.json"
13 | 
14 | ENV_VARIABLES="export SHOPPING='http://${SERVER}:7770';export SHOPPING_ADMIN='http://${SERVER}:7780/admin';export REDDIT='http://${SERVER}:9999';export GITLAB='http://${SERVER}:8023';export MAP='http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:3000';export WIKIPEDIA='http://${SERVER}:8888/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing';export HOMEPAGE='http://${SERVER}:4399';export OPENAI_API_KEY=${OPENAI_API_KEY};export OPENAI_ORGANIZATION=${OPENAI_ORGANIZATION}"
15 | 
16 | # get the number of tmux panes
17 | num_panes=$(tmux list-panes | wc -l)
18 | 
19 | # calculate how many panes need to be created
20 | let "panes_to_create = 5 - num_panes"
21 | 
22 | # array of tmux commands to create each pane
23 | tmux_commands=(
24 |     'tmux split-window -h'
25 |     'tmux split-window -v'
26 |     'tmux select-pane -t 0; tmux split-window -v'
27 |     'tmux split-window -v'
28 |     'tmux select-pane -t 3; tmux split-window -v'
29 | )
30 | 
31 | # create panes up to 5
32 | for ((i=0; i<$panes_to_create; i++)); do
33 |     eval ${tmux_commands[$i]}
34 | done
35 | 
36 | #!/bin/bash
37 | 
38 | # Function to run a job
39 | run_job() {
40 |     tmux select-pane -t $1
41 |     tmux send-keys "conda activate ${CONDA_ENV_NAME}; ${ENV_VARIABLES}; until python run.py --test_start_idx $2 --test_end_idx $3 --provider ours --mode completion --observation_type html --action_set_tag id_html_nasc_tree --model ${model} --instruction_path ${instruction_path} --result_dir ${result_dir} --cuda $1 --sample 1; do echo 'crashed' >&2; sleep 1; done" C-m
42 |     sleep 3
43 | }
44 | 
45 | TOLERANCE=2
46 | run_batch() {
47 |     args=("$@") # save all arguments in an array
48 |     num_jobs=${#args[@]} # get number of arguments
49 | 
50 |     for ((i=1; i<$num_jobs; i++)); do
51 |         run_job $i ${args[i-1]} ${args[i]} ${cuda_list[i-1]}
52 |     done
53 | 
54 |     # Wait for all jobs to finish
55 |     while tmux list-panes -F "#{pane_pid} #{pane_current_command}" | grep -q python; do
56 |         sleep 100  # wait for 10 seconds before checking again
57 |     done
58 | 
59 |     # Run checker
60 |     while ! python scripts/check_error_runs.py ${result_dir} --delete_errors --tolerance ${TOLERANCE}; do
61 |         echo "Check failed, rerunning jobs..."
62 |         for ((i=1; i<$num_jobs; i++)); do
63 |             run_job $i ${args[i-1]} ${args[i]} ${cuda_list[i-1]}
64 |         done
65 | 
66 |         # Wait for all jobs to finish
67 |         while tmux list-panes -F "#{pane_pid} #{pane_current_command}" | grep -q python; do
68 |             sleep 100  # wait for 10 seconds before checking again
69 |         done
70 |     done
71 | 
72 | }
73 | 
74 | run_batch 0 203 406 609 812
75 | python get_result.py ${result_dir}
76 | 


--------------------------------------------------------------------------------
/webarena/prepare.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # prepare the evaluation
4 | # re-validate login information
5 | mkdir -p ./.auth
6 | python browser_env/auto_login.py
7 | 


--------------------------------------------------------------------------------
/webarena/requirements.txt:
--------------------------------------------------------------------------------
 1 | gymnasium
 2 | playwright==1.32.1
 3 | Pillow
 4 | evaluate
 5 | openai==0.27.0
 6 | types-tqdm
 7 | tiktoken
 8 | aiolimiter
 9 | beartype==0.12.0
10 | flask
11 | nltk
12 | text-generation
13 | transformers==4.33.2
14 | lxml==5.1.0
15 | torch
16 | sentencepiece==0.2.0


--------------------------------------------------------------------------------
/webarena/resources/README.md:
--------------------------------------------------------------------------------
 1 | # WebArena Resources
 2 | ## [12/21/2023] Human Trajectories
 3 | We collected human trajectories on 179 tasks and the recording files are [here](https://drive.google.com/drive/folders/1NrN_sawtYK2V_uHnmmS8ugmGIKUAsPgt?usp=sharing).
 4 | 
 5 | We sample one task from each template or templates that share similar task semantic. Each file is named as `<task_id>.zip`, and the corresponding template id can be found in the [task config file](../config_files/test.raw.json). The trajectories are presented as playwright trace files. You can view the concrete HTML, network traffic etc by `playwright show-trace <example_idx>.zip`.
 6 | 
 7 | Human task success rate: 78.24%
 8 | 
 9 | 
10 | ## [11/3/2023] Execution Traces from Our Experiments (v2)
11 | ![v2 results](../media/v2_result.png)
12 | The results on the release v2 can be found in this [folder](https://drive.google.com/drive/folders/1H4wkzDkY2ufiC63DISMXllri0j-ipWcs?usp=sharing). It contains
13 | * text-bison-001 + CoT + UA Hint
14 | * GPT3.5-turbo-0613-16k + Direct + UA Hint
15 | * GPT3.5-turbo-0613-16k + Direct
16 | * GPT3.5-turbo-0613-16k + CoT + UA Hint
17 | * GPT3.5-turbo-0613-16k + CoT
18 | * GPT4-0613 + CoT
19 | 
20 | ## [8/7/2023] Execution Traces from Our Experiments (v1)
21 | ![v1 results](../media/v1_result.png)
22 | The results on the release v1 can be found in this [folder](https://drive.google.com/drive/folders/18Oww0fAgwhuSjSzxUNgzBUlC6M9IZZB2?usp=sharing). It contains
23 | * GPT4-0613 + CoT
24 | * GPT3.5-turbo-0613 + CoT
25 | * GPT3.5-turbo-0613 + Direct
26 | 
27 | 
28 | Once you unzip the file with `unzip <file_name>.zip`, you will see a list of `render_*.html`, a log file `merge_log.txt` recording whether an example failed or passed and a `trace` folder containing the `playwright` recording of the executions.
29 | 
30 | ### render_*.html
31 | Each file render the execution trace of the correponding example with (1) the accessibility tree observations, (2) the raw prediction from the agent and (3) the parsed action. We also provide the correponding screenshot of each observation.
32 | 
33 | To extract specific information from the html, you could use the following code snippet:
34 | ```python
35 | from bs4 import BeautifulSoup
36 | with open("render_<id>.html", 'r') as f:
37 |     content = f.read()
38 |     soup = BeautifulSoup(content, 'html.parser')
39 |     # get the observations
40 |     observations = soup.find_all("div", {"class": "state_obv"})
41 |     # urls
42 |     urls = soup.find_all("h3", {"class": "url"})
43 |     # get the raw predictions (e.g, let's think step-by-step ....)
44 |     raw_predictions = soup.find_all("div", {"class": "raw_parsed_prediction"})
45 |     # get the action object
46 |     actions = soup.find_all("div", {"class": "action_object"})
47 | ```
48 | ### trace/*.zip
49 | The zip files are generated automatically with [playwright](https://playwright.dev/python/docs/trace-viewer). You can view the concrete HTML, network traffic etc by `playwright show-trace <example_idx>.zip`. You will see something like this:
50 | ![example_trace_viewer](../media/example_trace_viewer.png)
51 | 


--------------------------------------------------------------------------------
/webarena/scripts/check_error_runs.py:
--------------------------------------------------------------------------------
  1 | """Some executions may failed.
  2 | This script checks the recordings, print the task ids.
  3 | It deletes the recordings if needed."""
  4 | import argparse
  5 | import glob
  6 | import os
  7 | import shutil
  8 | import sys
  9 | 
 10 | 
 11 | def merge_logs(result_folder: str, args: argparse.Namespace) -> str:
 12 |     if not os.path.exists(f"{result_folder}/log_files.txt"):
 13 |         sys.exit(1)
 14 | 
 15 |     with open(f"{result_folder}/log_files.txt", "r") as f:
 16 |         log_files = f.readlines()
 17 | 
 18 |     merged_results = {}
 19 |     for file in log_files:
 20 |         with open(file.strip(), "r") as f:
 21 |             lines = f.readlines()
 22 | 
 23 |         cur_log: list[str] = []
 24 |         index = None
 25 |         for line in lines:
 26 |             if "[Config file]" in line:
 27 |                 if (
 28 |                     cur_log
 29 |                     and index
 30 |                     and os.path.exists(f"{result_folder}/render_{index}.html")
 31 |                     and len(cur_log) >= 3
 32 |                 ):
 33 |                     merged_results[index] = cur_log
 34 |                 # update index and log
 35 |                 index = line.split("/")[-1].split(".")[0]
 36 |                 cur_log = [line]
 37 |             else:
 38 |                 cur_log.append(line)
 39 | 
 40 |         if (
 41 |             cur_log
 42 |             and index
 43 |             and os.path.exists(f"{result_folder}/render_{index}.html")
 44 |             and len(cur_log) >= 3
 45 |         ):
 46 | 
 47 |             merged_results[index] = cur_log
 48 | 
 49 |     # sort by the key
 50 |     merged_results = dict(
 51 |         sorted(merged_results.items(), key=lambda x: int(x[0]))
 52 |     )
 53 | 
 54 |     merged_log_path = f"{result_folder}/tmp_merged_log.txt"
 55 |     with open(merged_log_path, "w") as f:
 56 |         for k, v in merged_results.items():
 57 |             for line in v:
 58 |                 f.write(line)
 59 |     print(f"Number of examples: {len(merged_results)}")
 60 | 
 61 |     unlog_examples = []
 62 |     for i in range(812):
 63 |         if (
 64 |             os.path.exists(f"{result_folder}/render_{i}.html")
 65 |             and str(i) not in merged_results
 66 |         ):
 67 |             unlog_examples.append(i)
 68 | 
 69 |     print(f"Number of unlogged examples: {len(unlog_examples)}")
 70 |     print(unlog_examples)
 71 |     if (
 72 |         args.delete_errors
 73 |         or input("Do you want to delete these examples? (y/n)") == "y"
 74 |     ):
 75 |         for idx in unlog_examples:
 76 |             os.remove(f"{args.result_folder}/render_{idx}.html")
 77 | 
 78 |     unifinished_examples = [
 79 |         i for i in range(0, 812) if str(i) not in merged_results
 80 |     ]
 81 |     print(f"Number of unfinished examples: {len(unifinished_examples)}")
 82 |     print(unifinished_examples)
 83 | 
 84 |     return merged_log_path
 85 | 
 86 | 
 87 | def check_unhandled_errors(args: argparse.Namespace) -> int:
 88 |     log_path = merge_logs(args.result_folder, args)
 89 |     with open(log_path, "r") as f:
 90 |         logs = f.read()
 91 | 
 92 |     error_examples = []
 93 |     for line in logs.split("\n"):
 94 |         if "[Config file]" in line:
 95 |             example_idx = line.split("/")[-1].split(".")[0]
 96 |         if "[Unhandled Error]" in line or "[OpenAI Error]" in line:
 97 |             error_examples.append(int(example_idx))
 98 | 
 99 |     num_errors = len(error_examples)
100 |     print(f"Number of unhandled errors: {len(error_examples)}")
101 |     print(error_examples)
102 |     if (
103 |         args.delete_errors
104 |         or input("Do you want to delete these examples? (y/n)") == "y"
105 |     ):
106 |         for idx in error_examples:
107 |             if os.path.exists(f"{args.result_folder}/render_{idx}.html"):
108 |                 os.remove(f"{args.result_folder}/render_{idx}.html")
109 |     return num_errors
110 | 
111 | 
112 | def check_unexpected_logout(args: argparse.Namespace) -> int:
113 |     target_strings = set(
114 |         [
115 |             "Creating an account has many benefits: check out faster",
116 |             "Welcome, please sign in",
117 |             "Username or email",
118 |             "Keep me logged in",
119 |         ]
120 |     )
121 | 
122 |     error_examples = []
123 |     for render_file in glob.glob(f"{args.result_folder}/render_*.html"):
124 |         with open(render_file, "r") as f:
125 |             contents = f.read()
126 |             if any([s in contents for s in target_strings]):
127 |                 task_id = int(
128 |                     render_file.split("/")[-1].split(".")[0].split("_")[-1]
129 |                 )
130 |                 error_examples.append(task_id)
131 |     print(f"Number of unexpected logout: {len(error_examples)}")
132 |     print(error_examples)
133 |     num_errors = len(error_examples)
134 |     if (
135 |         args.delete_errors
136 |         or input("Do you want to delete these examples? (y/n)") == "y"
137 |     ):
138 |         for idx in error_examples:
139 |             if os.path.exists(f"{args.result_folder}/render_{idx}.html"):
140 |                 os.remove(f"{args.result_folder}/render_{idx}.html")
141 | 
142 |     return num_errors
143 | 
144 | 
145 | if __name__ == "__main__":
146 |     parser = argparse.ArgumentParser()
147 |     parser.add_argument("result_folder", type=str)
148 |     parser.add_argument("--delete_errors", action="store_true")
149 |     parser.add_argument("--tolerance", type=int, default=0)
150 | 
151 |     args = parser.parse_args()
152 |     n1 = check_unhandled_errors(args)
153 |     n2 = check_unexpected_logout(args)
154 |     if n1 + n2 > args.tolerance:
155 |         sys.exit(1)
156 |     else:
157 |         sys.exit(0)
158 | 


--------------------------------------------------------------------------------
/webarena/scripts/collect_obs.py:
--------------------------------------------------------------------------------
 1 | """Simple script to quickly get the observation of a page"""
 2 | 
 3 | import json
 4 | import re
 5 | import time
 6 | from typing import Dict, Optional, Tuple, Type, Union, cast
 7 | 
 8 | import pytest
 9 | from playwright.sync_api import Page, expect
10 | 
11 | from browser_env import (
12 |     ScriptBrowserEnv,
13 |     create_id_based_action,
14 |     create_key_press_action,
15 |     create_playwright_action,
16 |     create_scroll_action,
17 | )
18 | from browser_env.env_config import *
19 | 
20 | HEADLESS = False
21 | 
22 | 
23 | def gen_tmp_storage_state() -> None:
24 |     with open(f"scripts/tmp_storage_state.json", "w") as f:
25 |         json.dump({"storage_state": ".auth/shopping_admin_state.json"}, f)
26 | 
27 | 
28 | def get_observation(
29 |     observation_type: str, current_viewport_only: bool
30 | ) -> None:
31 |     env = ScriptBrowserEnv(
32 |         observation_type=observation_type,
33 |         current_viewport_only=current_viewport_only,
34 |         headless=HEADLESS,
35 |         sleep_after_execution=2.0,
36 |     )
37 |     env.reset(options={"config_file": f"scripts/tmp_storage_state.json"})
38 |     s = f"""page.goto("http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7780/admin/admin/dashboard/")
39 |     page.get_by_label("", exact=True).fill("reviews")
40 |     page.get_by_label("", exact=True).press("Enter")
41 |     page.scroll(down)"""
42 |     action_seq = s.split("\n")
43 | 
44 |     for action in action_seq:
45 |         action = action.strip()
46 |         obs, success, _, _, info = env.step(create_playwright_action(action))
47 |         print(obs["text"])
48 |         _ = input("Press enter to continue")
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     gen_tmp_storage_state()
53 |     obs_type = "accessibility_tree"
54 |     current_viewport_only = True
55 |     get_observation(obs_type, current_viewport_only)
56 | 


--------------------------------------------------------------------------------
/webarena/scripts/generate_test_data.py:
--------------------------------------------------------------------------------
 1 | """Replace the website placeholders with website domains from env_config
 2 | Generate the test data"""
 3 | import json
 4 | 
 5 | from browser_env.env_config import *
 6 | 
 7 | 
 8 | def main() -> None:
 9 |     with open("config_files/test.raw.json", "r") as f:
10 |         raw = f.read()
11 |     raw = raw.replace("__GITLAB__", GITLAB)
12 |     raw = raw.replace("__REDDIT__", REDDIT)
13 |     raw = raw.replace("__SHOPPING__", SHOPPING)
14 |     raw = raw.replace("__SHOPPING_ADMIN__", SHOPPING_ADMIN)
15 |     raw = raw.replace("__WIKIPEDIA__", WIKIPEDIA)
16 |     raw = raw.replace("__MAP__", MAP)
17 |     with open("config_files/test.json", "w") as f:
18 |         f.write(raw)
19 |     # split to multiple files
20 |     data = json.loads(raw)
21 |     for idx, item in enumerate(data):
22 |         with open(f"config_files/{idx}.json", "w") as f:
23 |             json.dump(item, f, indent=2)
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     main()
28 | 


--------------------------------------------------------------------------------
/webarena/scripts/html2json.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import base64
  3 | import glob
  4 | import json
  5 | import os
  6 | from collections import defaultdict
  7 | from typing import Any
  8 | 
  9 | from bs4 import BeautifulSoup
 10 | 
 11 | 
 12 | def main(result_folder: str, config_json: str) -> None:
 13 |     all_data = {}
 14 |     template_to_id: dict[str, Any] = defaultdict(lambda: len(template_to_id))
 15 | 
 16 |     with open(config_json, "r") as f:
 17 |         data_configs = json.load(f)
 18 |         data_configs = {int(item["task_id"]): item for item in data_configs}
 19 |         for k, v in data_configs.items():
 20 |             v.pop("require_login")
 21 |             v.pop("storage_state")
 22 |             v.pop("start_url")
 23 |             v.pop("geolocation")
 24 |             v.pop("require_reset")
 25 |             v.pop("intent_template_id")
 26 |             v["intent_template_id"] = template_to_id[v["intent_template"]]
 27 |             v["eval_types"] = v["eval"].pop("eval_types")
 28 |             if v["eval"]["reference_answers"]:
 29 |                 v["reference_answers"] = v["eval"].pop("reference_answers")
 30 |             if v["eval"]["reference_url"]:
 31 |                 v["reference_url"] = v["eval"].pop("reference_url")
 32 |             v.pop("eval")
 33 |             if v.get("reference_answers", {}).get("exact_match", "") == "N/A":
 34 |                 v["achievable"] = False
 35 |             else:
 36 |                 v["achievable"] = True
 37 | 
 38 |     with open(f"{result_folder}/merged_log.txt", "r") as f:
 39 |         results = {}
 40 |         for line in f:
 41 |             if "[Result]" in line:
 42 |                 id = line.strip().split(".")[-2].split("/")[-1]
 43 |                 results[int(id)] = True if "(PASS)" in line else False
 44 | 
 45 |     files = list(glob.glob(f"{result_folder}/render_*.html"))
 46 |     files = [x for x in files if os.path.exists(x)]
 47 |     print(f"Total number of files: {len(files)}")
 48 | 
 49 |     for render_file in files:
 50 |         task_id = int(render_file.split("_")[-1].split(".")[0])
 51 |         with open(render_file, "r") as f:
 52 |             try:
 53 |                 content = f.read()
 54 |                 soup = BeautifulSoup(content, "html.parser")
 55 |                 observations = [
 56 |                     obv.find("pre").text
 57 |                     for obv in soup.find_all("div", {"class": "state_obv"})
 58 |                 ]
 59 |                 base64_images = [
 60 |                     img["src"].split(",")[1] for img in soup.find_all("img")
 61 |                 ]
 62 |                 image_observations = []
 63 |                 # save image to file and change the value to be path
 64 |                 image_folder = f"images/{os.path.basename(result_folder)}"
 65 |                 os.makedirs(image_folder, exist_ok=True)
 66 |                 for i, image in enumerate(base64_images):
 67 |                     image_data = base64.b64decode(image)
 68 |                     filename = f"{image_folder}/image_{task_id}_{i}.png"
 69 |                     with open(filename, "wb") as f:  # type: ignore[assignment]
 70 |                         f.write(image_data)  # type: ignore[arg-type]
 71 |                     image_observations.append(filename)
 72 |                 urls = [
 73 |                     url.get_text()
 74 |                     for url in soup.find_all("h3", {"class": "url"})
 75 |                 ]
 76 |                 actions = [
 77 |                     action.get_text()
 78 |                     for action in soup.find_all(
 79 |                         "div", {"class": "raw_parsed_prediction"}
 80 |                     )
 81 |                 ]
 82 |                 parsed_actions = [
 83 |                     action.get_text()
 84 |                     for action in soup.find_all(
 85 |                         "div", {"class": "parsed_action"}
 86 |                     )
 87 |                 ]
 88 |                 # fill action with parsed action if action is empty
 89 |                 for i in range(len(actions)):
 90 |                     if actions[i] == "":
 91 |                         actions[i] = parsed_actions[i]
 92 | 
 93 |                 messages = []
 94 |                 for o, u, a, image in zip(
 95 |                     observations, urls, actions, image_observations
 96 |                 ):
 97 |                     messages.append(
 98 |                         {
 99 |                             "user": f"{u}\n\nobservation:\n{o}",
100 |                             "image": image,
101 |                         }
102 |                     )
103 |                     messages.append({"assistant": a})
104 | 
105 |                 all_data[f"example_{task_id}"] = {
106 |                     **data_configs[task_id],
107 |                     "messages": messages,
108 |                     "success": results.get(task_id, False),
109 |                 }
110 | 
111 |             except Exception as e:
112 |                 print(e)
113 |                 print(f"Error in {render_file}")
114 | 
115 |     with open(f"{result_folder}/json_dump.json", "w+") as f:
116 |         json.dump(all_data, f, indent=4)
117 | 
118 | 
119 | if __name__ == "__main__":
120 |     parser = argparse.ArgumentParser()
121 |     parser.add_argument("--result_folder", type=str)
122 |     parser.add_argument(
123 |         "--config_json", type=str, default="config_files/test.raw.json"
124 |     )
125 |     args = parser.parse_args()
126 |     main(args.result_folder, args.config_json)
127 | 


--------------------------------------------------------------------------------
/webarena/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = webarena
 3 | 
 4 | [tool.pytest.ini_options]
 5 | testpaths = ["tests"]
 6 | python_files = "test_*.py"
 7 | 
 8 | [options.extras_require]
 9 | dev =
10 |     pre-commit==3.0.1
11 |     pytest==7.1.2
12 |     mypy==0.991
13 |     nbmake
14 |     pytest-asyncio
15 |     types-requests
16 | 
17 | [options]
18 | python_requires = >=3.7, <4
19 | packages =
20 |     browser_env
21 |     agent
22 |     evaluation_harness
23 |     llms
24 | [mypy]
25 | strict = true
26 | 


--------------------------------------------------------------------------------
/webarena/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | 
3 | if __name__ == "__main__":
4 |     setup()
5 | 


--------------------------------------------------------------------------------
/webarena/solver/__init__.py:
--------------------------------------------------------------------------------
 1 | from .utils import step_once, show_screenshot
 2 | 
 3 | from browser_env import (
 4 |     StateInfo,
 5 |     Trajectory,
 6 | )
 7 | 
 8 | from .shopping_admin import *
 9 | 
10 | def manual_solver(env, config_file, render_helper, agent, args, intent, field_dict):
11 |     # return solver_364(env, config_file, render_helper, agent, args, intent, field_dict)
12 |     agent.reset(config_file)
13 |     trajectory: Trajectory = []
14 |     obs, info = env.reset(options={"config_file": config_file})
15 |     state_info: StateInfo = {"observation": obs, "info": info}
16 |     trajectory.append(state_info)
17 | 
18 |     meta_data = {"action_history": ["None"]}
19 |     traces = []
20 |     
21 |     obs_info = info["observation_metadata"]["text"]
22 |     dom_tree, nodes_info = obs_info["dom_info"]["dom_tree"], obs_info["obs_nodes_info"]
23 |     
24 |     print(field_dict)
25 |     while True:
26 |         obs_info = state_info["info"]["observation_metadata"]["text"]
27 |         dom_tree, nodes_info = obs_info["dom_info"]["dom_tree"], obs_info["obs_nodes_info"]
28 |         target_action = input()
29 |         res = step_once(env, trajectory, render_helper, traces, state_info, agent, args, intent, meta_data, target_action)
30 |         if res is None:
31 |             return traces, trajectory
32 |         state_info, dom_tree, nodes_info = res
33 |         


--------------------------------------------------------------------------------
/webarena/solver/utils.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import json
  3 | from PIL import Image
  4 | 
  5 | from agent import PromptAgent
  6 | from browser_env.helper_functions import get_action_description
  7 | from browser_env import ActionTypes
  8 | 
  9 | def show_screenshot(state_info):
 10 |     image_data = state_info["observation"]["image"]
 11 |     im = Image.fromarray(image_data)
 12 |     im.save('output/show_screenshot.png')
 13 | 
 14 | def get_nodes(dom_tree, nodes_info, attr, attrval, mode: int=0, use_elem: int=0):
 15 |     tar_nodes, temp_nodes = [], []
 16 |     for nodes in dom_tree:
 17 |         if (mode == 1 and nodes[attr] == attrval) or (mode == 0 and nodes[attr].count(attrval) > 0):
 18 |             if use_elem == 0:
 19 |                 tar_nodes.append(nodes['backendNodeId'])
 20 |             elif use_elem == 1:
 21 |                 temp_nodes.extend(nodes['childIds'])
 22 |             elif use_elem == 2:
 23 |                 temp_nodes.append(nodes['parentId'])
 24 |                 
 25 |     if use_elem != 0:
 26 |         for node in dom_tree:
 27 |             if node['nodeId'] in temp_nodes:
 28 |                 tar_nodes.append(node['backendNodeId'])
 29 |     
 30 |     act_nodes = []
 31 |     
 32 |     for node in list(nodes_info.values()):
 33 |         if node['backend_id'] in tar_nodes:
 34 |             act_nodes.append(node)
 35 |     return act_nodes
 36 |  
 37 | def step_once(env, trajectory, render_helper, traces, state_info, agent, args, intent, meta_data, target_action):    
 38 |     obs_info = state_info["info"]["observation_metadata"]["text"]
 39 |     images = state_info["info"]["images"]
 40 |     dom_info, nodes_info = obs_info["dom_info"], obs_info["obs_nodes_info"]
 41 |     dom_tree = dom_info["dom_tree"]
 42 |     raw_html = dom_info["raw_html"]
 43 | 
 44 |     if target_action.count('#Type#') > 0 and target_action.endswith('\\n'):
 45 |         target_action = target_action[:-2] + '\n'
 46 |     
 47 |     prompt, action = agent.check_action(
 48 |         trajectory, intent, meta_data, target_action
 49 |     )
 50 | 
 51 |     print('[prompt] ', prompt)
 52 |     print('[action] ', action)
 53 |     
 54 |     # our_dom_tree = copy.deepcopy(dom_tree)
 55 |     # for elem in our_dom_tree:
 56 |     #     elem["union_bound"] = elem["union_bound"].tolist()
 57 |     
 58 |     myaction = copy.deepcopy(action)
 59 |     myaction["coords"] = myaction["coords"].tolist()
 60 |     
 61 |     need_to_keep = action['action_type'] != ActionTypes.NONE or target_action.count("#Record#") > 0
 62 |     
 63 |     trajectory.append(action)
 64 | 
 65 |     action_str = get_action_description(
 66 |         action,
 67 |         state_info["info"]["observation_metadata"],
 68 |         action_set_tag=args.action_set_tag,
 69 |         prompt_constructor=agent.prompt_constructor
 70 |         if isinstance(agent, PromptAgent)
 71 |         else None,
 72 |     )
 73 |     
 74 |     if need_to_keep:
 75 |         user_action = action_str.split(' #HTML Segment')[0]
 76 |         traces.append({
 77 |             'source': prompt,
 78 |             'target': f'{real_action}',
 79 |             'extra_data': {
 80 |                 'element_id': action.get('element_id', ''),
 81 |                 'dom_tree': dom_tree,
 82 |                 'raw_html': raw_html,
 83 |                 'nodes_info': nodes_info,
 84 |                 'raw_action': myaction,
 85 |                 'images': images,
 86 |             },
 87 |         })
 88 |     
 89 |     render_helper.render(
 90 |         action, state_info, meta_data, args.render_screenshot
 91 |     )
 92 |     
 93 |     if need_to_keep:
 94 |         meta_data["action_history"].append(action_str)
 95 | 
 96 |     if action["action_type"] == ActionTypes.STOP:
 97 |         return None
 98 |     
 99 |     # if action['action_type'] == ActionTypes.TYPE:
100 |     #     action['text'] = [110] * 500 + action['text']
101 | 
102 |     obs, _, terminated, _, info = env.step(action)
103 |     state_info = {"observation": obs, "info": info}
104 |     show_screenshot(state_info)
105 |     
106 |     trajectory.append(state_info)
107 | 
108 |     if terminated:
109 |         # add a action place holder
110 |         trajectory.append(create_stop_action(""))
111 |         return None
112 |     
113 |     obs_info = state_info["info"]["observation_metadata"]["text"]
114 |     dom_tree, nodes_info = obs_info["dom_info"]["dom_tree"], obs_info["obs_nodes_info"]
115 |     
116 |     return state_info, dom_tree, nodes_info


--------------------------------------------------------------------------------
/webarena/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | from typing import AsyncGenerator, Generator
 2 | 
 3 | import pytest
 4 | import pytest_asyncio
 5 | 
 6 | from browser_env import AsyncScriptBrowserEnv, ScriptBrowserEnv
 7 | 
 8 | HEADLESS = True
 9 | SLOW_MO = 0
10 | 
11 | 
12 | @pytest.fixture(scope="function")
13 | def script_browser_env() -> Generator[ScriptBrowserEnv, None, None]:
14 |     """Create a ScriptBrowserEnv instance for testing.
15 |     It is automatically closed after the test session.
16 |     This is helpful when the test failed and the browser is still open.
17 |     """
18 |     env = ScriptBrowserEnv(
19 |         headless=HEADLESS,
20 |         slow_mo=SLOW_MO,
21 |     )
22 |     yield env
23 |     env.close()
24 | 
25 | 
26 | @pytest.fixture(scope="function")
27 | def current_viewport_script_browser_env() -> Generator[
28 |     ScriptBrowserEnv, None, None
29 | ]:
30 |     env = ScriptBrowserEnv(
31 |         headless=HEADLESS,
32 |         slow_mo=SLOW_MO,
33 |         current_viewport_only=True,
34 |     )
35 |     yield env
36 |     env.close()
37 | 
38 | 
39 | @pytest.fixture(scope="function")
40 | def accessibility_tree_script_browser_env() -> Generator[
41 |     ScriptBrowserEnv, None, None
42 | ]:
43 |     env = ScriptBrowserEnv(
44 |         headless=HEADLESS,
45 |         slow_mo=SLOW_MO,
46 |         observation_type="accessibility_tree",
47 |     )
48 |     yield env
49 |     env.close()
50 | 
51 | 
52 | @pytest.fixture(scope="function")
53 | def accessibility_tree_current_viewport_script_browser_env() -> Generator[
54 |     ScriptBrowserEnv, None, None
55 | ]:
56 |     env = ScriptBrowserEnv(
57 |         headless=HEADLESS,
58 |         slow_mo=SLOW_MO,
59 |         observation_type="accessibility_tree",
60 |         current_viewport_only=True,
61 |     )
62 |     yield env
63 |     env.close()
64 | 
65 | 
66 | @pytest_asyncio.fixture(scope="function", autouse=True)
67 | async def async_script_browser_env() -> AsyncGenerator[
68 |     AsyncScriptBrowserEnv, None
69 | ]:
70 |     env = AsyncScriptBrowserEnv(headless=HEADLESS, slow_mo=SLOW_MO)
71 |     yield env
72 |     await env.aclose()
73 | 


--------------------------------------------------------------------------------
/webarena/tests/test_browser_env/test_actions.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from browser_env import *
 4 | 
 5 | 
 6 | def test_is_equivalent() -> None:
 7 |     for action_type in ActionTypes.__members__.values():
 8 |         action_a = create_random_action()
 9 |         action_b = create_random_action()
10 |         if action_a["action_type"] != action_b["action_type"]:
11 |             assert not is_equivalent(action_a, action_b)
12 |         action_a["action_type"] = action_type
13 |         action_b["action_type"] = action_type
14 |         match action_type:
15 |             case ActionTypes.MOUSE_CLICK | ActionTypes.MOUSE_HOVER:
16 |                 if not np.allclose(action_a["coords"], action_b["coords"]):
17 |                     assert not is_equivalent(action_a, action_b)
18 |                     action_a["coords"] = action_b["coords"]
19 |                 assert is_equivalent(action_a, action_b)
20 |             case ActionTypes.KEYBOARD_TYPE:
21 |                 if action_a["text"] != action_b["text"]:
22 |                     assert not is_equivalent(action_a, action_b)
23 |                     action_a["text"] = action_b["text"]
24 |                 assert is_equivalent(action_a, action_b)
25 |             case ActionTypes.CLICK | ActionTypes.HOVER | ActionTypes.TYPE:
26 |                 if action_a["element_id"] and action_b["element_id"]:
27 |                     if action_a["element_id"] != action_b["element_id"]:
28 |                         assert not is_equivalent(action_a, action_b)
29 |                         action_a["element_id"] = action_b["element_id"]
30 |                     assert is_equivalent(action_a, action_b)
31 |                 elif action_a["element_id"] and action_b["element_id"]:
32 |                     if action_a["element_role"] != action_b["element_role"]:
33 |                         assert not is_equivalent(action_a, action_b)
34 |                         action_a["element_role"] = action_b["element_role"]
35 |                     if action_a["element_name"] != action_b["element_name"]:
36 |                         assert not is_equivalent(action_a, action_b)
37 |                         action_a["element_name"] = action_b["element_name"]
38 |                     assert is_equivalent(action_a, action_b)
39 |                 elif action_a["pw_code"] and action_b["pw_code"]:
40 |                     if action_a["pw_code"] != action_b["pw_code"]:
41 |                         assert not is_equivalent(action_a, action_b)
42 |                         action_a["pw_code"] = action_b["pw_code"]
43 |                     assert is_equivalent(action_a, action_b)
44 |                 else:
45 |                     action_a["element_id"] = action_b["element_id"]
46 |                     assert is_equivalent(action_a, action_b)
47 |             case ActionTypes.GOTO_URL:
48 |                 if action_a["url"] != action_b["url"]:
49 |                     assert not is_equivalent(action_a, action_b)
50 |                     action_a["url"] = action_b["url"]
51 |                 assert is_equivalent(action_a, action_b)
52 |             case ActionTypes.PAGE_FOCUS:
53 |                 if action_a["page_number"] != action_b["page_number"]:
54 |                     assert not is_equivalent(action_a, action_b)
55 |                     action_a["page_number"] = action_b["page_number"]
56 |                 assert is_equivalent(action_a, action_b)
57 |             case ActionTypes.SCROLL:
58 |                 da = "up" if "up" in action_a["direction"] else "down"
59 |                 db = "up" if "up" in action_b["direction"] else "down"
60 |                 if da != db:
61 |                     assert not is_equivalent(action_a, action_b)
62 |                     action_a["direction"] = action_b["direction"]
63 |                 assert is_equivalent(action_a, action_b)
64 |             case ActionTypes.KEY_PRESS:
65 |                 if action_a["key_comb"] != action_b["key_comb"]:
66 |                     assert not is_equivalent(action_a, action_b)
67 |                     action_a["key_comb"] = action_b["key_comb"]
68 |                 assert is_equivalent(action_a, action_b)
69 |             case ActionTypes.CHECK | ActionTypes.SELECT_OPTION:
70 |                 if action_a["pw_code"] != action_b["pw_code"]:
71 |                     assert not is_equivalent(action_a, action_b)
72 |                     action_a["pw_code"] = action_b["pw_code"]
73 |                 assert is_equivalent(action_a, action_b)
74 |             case ActionTypes.STOP:
75 |                 if action_a["answer"] != action_b["answer"]:
76 |                     assert not is_equivalent(action_a, action_b)
77 |                     action_a["answer"] = action_b["answer"]
78 |                 assert is_equivalent(action_a, action_b)
79 |             case _:
80 |                 assert is_equivalent(action_a, action_b)
81 | 
82 | 
83 | def test_action2create_function() -> None:
84 |     for _ in range(1000):
85 |         action = create_random_action()
86 |         create_function = action2create_function(action)
87 |         assert is_equivalent(action, eval(create_function))
88 | 


--------------------------------------------------------------------------------
/webarena/tests/test_browser_env/test_auth_cookie.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import json
 3 | 
 4 | from browser_env import *
 5 | 
 6 | auth_json = {
 7 |     "cookies": [
 8 |         {
 9 |             "name": "session-username",
10 |             "value": "standard_user",
11 |             "domain": "www.saucedemo.com",
12 |             "path": "/",
13 |             "httpOnly": False,
14 |             "secure": False,
15 |             "sameSite": "Lax",
16 |         }
17 |     ],
18 |     "origins": [],
19 | }
20 | 
21 | 
22 | def test_auth_cookie() -> None:
23 |     env = ScriptBrowserEnv()
24 |     env.reset()
25 |     _, reward, _, _, info = env.step(
26 |         create_goto_url_action("https://www.saucedemo.com/inventory.html"),
27 |     )
28 |     assert reward == 1
29 |     assert "page" in info and isinstance(info["page"], DetachedPage)
30 |     assert info["page"].url == "https://www.saucedemo.com/"
31 |     json.dump(auth_json, open("/tmp/auth.json", "w"))
32 |     instance_config = {"storage_state": "/tmp/auth.json"}
33 |     json.dump(instance_config, open("/tmp/config.json", "w"))
34 |     env.reset(options={"config_file": "/tmp/config.json"})
35 |     _, reward, _, _, info = env.step(
36 |         create_goto_url_action("https://www.saucedemo.com/inventory.html"),
37 |     )
38 |     assert reward == 1
39 |     assert "page" in info and isinstance(info["page"], DetachedPage)
40 |     assert info["page"].url == "https://www.saucedemo.com/inventory.html"
41 |     env.close()
42 | 
43 | 
44 | def test_async_auth_cookie() -> None:
45 |     env = AsyncScriptBrowserEnv()
46 | 
47 |     async def _test() -> None:
48 |         await env.areset()
49 |         _, reward, _, _, info = await env.astep(
50 |             create_goto_url_action("https://www.saucedemo.com/inventory.html"),
51 |         )
52 |         assert reward == 1
53 |         assert "page" in info and isinstance(info["page"], DetachedPage)
54 |         assert info["page"].url == "https://www.saucedemo.com/"
55 |         json.dump(auth_json, open("/tmp/auth.json", "w"))
56 |         instance_config = {"storage_state": "/tmp/auth.json"}
57 |         json.dump(instance_config, open("/tmp/config.json", "w"))
58 |         await env.areset(options={"config_file": "/tmp/config.json"})
59 |         _, reward, _, _, info = await env.astep(
60 |             create_goto_url_action("https://www.saucedemo.com/inventory.html"),
61 |         )
62 |         assert reward == 1
63 |         assert "page" in info and isinstance(info["page"], DetachedPage)
64 |         assert info["page"].url == "https://www.saucedemo.com/inventory.html"
65 |         await env.aclose()
66 | 
67 |     asyncio.run(_test())
68 | 


--------------------------------------------------------------------------------
/webarena/tests/test_browser_env/test_playwright_actions.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Generator, Optional, Tuple, Type, Union, cast
 2 | 
 3 | import pytest
 4 | from playwright.sync_api import Page
 5 | 
 6 | from browser_env import ScriptBrowserEnv, create_playwright_action
 7 | 
 8 | HEADLESS = True
 9 | SLOW_MO = 0
10 | 
11 | 
12 | def test_frame_locator(script_browser_env: ScriptBrowserEnv) -> None:
13 |     env = script_browser_env
14 |     seq = """page.goto("https://www.littlewebhut.com/articles/html_iframe_example/")
15 |     page.frame_locator("iframe[name=\\"imgbox\\"]").get_by_role("img").click()"""
16 | 
17 |     env.reset()
18 |     for action in seq.split("\n"):
19 |         action = action.strip()
20 |         _, success, _, _, info = env.step(create_playwright_action(action))
21 |         assert success
22 | 
23 | 
24 | def test_basic(script_browser_env: ScriptBrowserEnv) -> None:
25 |     # click, fill, press, check, goto
26 |     env = script_browser_env
27 |     seq = """page.goto("https://demo.playwright.dev/todomvc/")
28 |     page.get_by_placeholder("What needs to be done?").click()
29 |     page.get_by_placeholder("What needs to be done?").fill("hello")
30 |     page.get_by_placeholder("What needs to be done?").press("Enter")
31 |     page.get_by_placeholder("What needs to be done?").fill("world")
32 |     page.get_by_placeholder("What needs to be done?").press("Enter")
33 |     page.get_by_placeholder("What needs to be done?").fill("yes")
34 |     page.get_by_placeholder("What needs to be done?").press("Enter")
35 |     page.get_by_placeholder("What needs to be done?").fill("no")
36 |     page.get_by_placeholder("What needs to be done?").press("Enter")
37 |     page.get_by_role("listitem").filter(has_text="world").get_by_role("checkbox", name="Toggle Todo").check()
38 |     page.get_by_role("button", name="Clear completed").click()"""
39 | 
40 |     env.reset()
41 |     for action in seq.split("\n"):
42 |         action = action.strip()
43 |         _, success, _, _, info = env.step(create_playwright_action(action))
44 |         assert success
45 | 
46 | 
47 | @pytest.mark.skip(reason="not important, but the site is flaky")
48 | def test_hover(script_browser_env: ScriptBrowserEnv) -> None:
49 |     env = script_browser_env
50 |     seq = """page.goto("https://www.w3schools.com/cssref/tryit.php?filename=trycss_sel_hover")
51 |     page.frame_locator("iframe[name=\\'iframeResult\\']").get_by_role("link", name="w3schools.com").hover()"""
52 | 
53 |     env.reset()
54 |     for action in seq.split("\n"):
55 |         action = action.strip()
56 |         _, success, _, _, info = env.step(create_playwright_action(action))
57 |         assert success
58 | 
59 | 
60 | @pytest.mark.skip(reason="not important, but the site is flaky")
61 | def test_select_option(script_browser_env: ScriptBrowserEnv) -> None:
62 |     env = script_browser_env
63 |     seq = """page.goto("https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_select")
64 |     page.frame_locator("iframe[name=\\'iframeResult\\']").get_by_role("combobox", name="Choose a car:").select_option("opel")"""
65 | 
66 |     env.reset()
67 |     for action in seq.split("\n"):
68 |         action = action.strip()
69 |         _, success, _, _, info = env.step(create_playwright_action(action))
70 |         assert success
71 | 
72 | 
73 | def test_xpath(script_browser_env: ScriptBrowserEnv) -> None:
74 |     env = script_browser_env
75 |     seq = """page.goto("https://demo.playwright.dev/todomvc/")
76 |     page.goto("https://demo.playwright.dev/todomvc/#/")
77 |     page.get_by_placeholder("What needs to be done?").click()
78 |     page.get_by_placeholder("What needs to be done?").fill("hello")
79 |     page.get_by_placeholder("What needs to be done?").press("Enter")
80 |     page.get_by_role("link", name="Completed").click()
81 |     page.locator("xpath=/html/body/section/div/header/input").fill("no")
82 |     page.get_by_placeholder("What needs to be done?").press("Enter")
83 |     page.goto("https://bic-berkeley.github.io/psych-214-fall-2016/string_literals.html")
84 |     page.locator("xpath=//*[@id=\'searchbox\']/div/form/input[1]").fill("type")"""
85 |     env.reset()
86 |     for action in seq.split("\n"):
87 |         action = action.strip()
88 |         _, success, _, _, info = env.step(create_playwright_action(action))
89 |         assert success
90 | 


--------------------------------------------------------------------------------
/webarena/tests/test_evaluation_harness/configs/func_eval_fail.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sites": ["shopping"],
 3 |     "task_id": 0,
 4 |     "require_login": true,
 5 |     "storage_state": null,
 6 |     "start_url": null,
 7 |     "geolocation": null,
 8 |     "intent_template": "",
 9 |     "instantiation_dict": {},
10 |     "intent": "",
11 |     "require_reset": false,
12 |     "eval": {
13 |         "eval_types": ["program_html"],
14 |         "reference_answers": [],
15 |         "reference_url": "",
16 |         "program_html": [
17 |             {
18 |                 "url": "last",
19 |                 "required_contents": {"must_include": ["80"]},
20 |                 "locator": "func:shopping_get_sku_latest_review_rating('B09BCM56J7')"
21 |             },
22 |             {
23 |                 "url": "last",
24 |                 "required_contents": {"must_include": ["cupcakecupcake"]},
25 |                 "locator": "func:shopping_get_sku_latest_review_author('B09BCM56J7')"
26 |             }
27 |         ]
28 |     }
29 | }
30 | 


--------------------------------------------------------------------------------
/webarena/tests/test_evaluation_harness/configs/func_eval_success.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sites": ["shopping"],
 3 |     "task_id": 0,
 4 |     "require_login": true,
 5 |     "storage_state": null,
 6 |     "start_url": null,
 7 |     "geolocation": null,
 8 |     "intent_template": "",
 9 |     "instantiation_dict": {},
10 |     "intent": "",
11 |     "require_reset": false,
12 |     "eval": {
13 |         "eval_types": ["program_html"],
14 |         "reference_answers": [],
15 |         "reference_url": "",
16 |         "program_html": [
17 |             {
18 |                 "url": "last",
19 |                 "required_contents": {"must_include": ["100"]},
20 |                 "locator": "func:shopping_get_sku_latest_review_rating('B09BCM56J7')"
21 |             },
22 |             {
23 |                 "url": "last",
24 |                 "required_contents": {"must_include": ["cupcakecupcake"]},
25 |                 "locator": "func:shopping_get_sku_latest_review_author('B09BCM56J7')"
26 |             }
27 |         ]
28 |     }
29 | }
30 | 


--------------------------------------------------------------------------------
/webarena/tests/test_evaluation_harness/configs/func_url_func_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sites": ["shopping"],
 3 |     "task_id": 0,
 4 |     "require_login": true,
 5 |     "storage_state": null,
 6 |     "start_url": null,
 7 |     "geolocation": null,
 8 |     "intent_template": "",
 9 |     "instantiation_dict": {},
10 |     "intent": "",
11 |     "require_reset": false,
12 |     "eval": {
13 |         "eval_types": ["program_html"],
14 |         "reference_answers": [],
15 |         "reference_url": "",
16 |         "program_html": [
17 |             {
18 |                 "url": "func:reddit_get_post_url('__last_url__')",
19 |                 "locator": "document.querySelector('.submission__inner').outerText",
20 |                 "required_contents": {"must_include": ["How will SPY close on Monday 11/28"]}
21 |             }
22 |         ]
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/webarena/tests/test_evaluation_harness/configs/func_url_func_2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sites": [
 3 |         "shopping"
 4 |     ],
 5 |     "task_id": 0,
 6 |     "require_login": true,
 7 |     "storage_state": "./.auth/gitlab_state.json",
 8 |     "start_url": null,
 9 |     "geolocation": null,
10 |     "intent_template": "",
11 |     "instantiation_dict": {},
12 |     "intent": "",
13 |     "require_reset": false,
14 |     "eval": {
15 |         "eval_types": [
16 |             "program_html"
17 |         ],
18 |         "reference_answers": [],
19 |         "reference_url": "",
20 |         "program_html": [
21 |             {
22 |                 "url": "__GITLAB__/primer/design/-/project_members",
23 |                 "locator": "func:gitlab_get_project_memeber_role(__page__, 'byteblaze')",
24 |                 "required_contents": {"must_include": ["Developer"]}
25 |             },
26 |             {
27 |                 "url": "__GITLAB__/primer/design/-/project_members",
28 |                 "locator": "func:gitlab_get_project_memeber_role(__page__, 'primer')",
29 |                 "required_contents": {"must_include": ["Owner"]}
30 |             }
31 |         ]
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/webarena/tests/test_evaluation_harness/configs/html_content_element_exact_match.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sites": ["gitlab"],
 3 |     "task_id": 0,
 4 |     "require_login": true,
 5 |     "storage_state": "./.auth/gitlab_state.json",
 6 |     "start_url": null,
 7 |     "geolocation": null,
 8 |     "intent_template": "",
 9 |     "instantiation_dict": {},
10 |     "intent": "",
11 |     "require_reset": false,
12 |     "eval": {
13 |         "eval_types": ["program_html"],
14 |         "reference_answers": [],
15 |         "reference_url": "",
16 |         "program_html": [
17 |             {
18 |                 "url": "last",
19 |                 "required_contents": {"must_include": ["Hello World"]},
20 |                 "locator": "document.querySelector('[id=\"form-name\"').value"
21 |             },
22 |             {
23 |                 "url": "last",
24 |                 "required_contents": {"must_include": ["alexisxy@hotmail.com"]},
25 |                 "locator": "document.querySelector('[id=\"form-email\"').value"
26 |             }
27 |         ]
28 |     }
29 | }
30 | 


--------------------------------------------------------------------------------
/webarena/tests/test_evaluation_harness/configs/html_content_exact_match.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sites": ["gitlab"],
 3 |     "task_id": 0,
 4 |     "require_login": true,
 5 |     "storage_state": "./.auth/gitlab_state.json",
 6 |     "start_url": null,
 7 |     "geolocation": null,
 8 |     "intent_template": "",
 9 |     "instantiation_dict": {},
10 |     "intent": "",
11 |     "require_reset": false,
12 |     "eval": {
13 |         "eval_types": ["program_html"],
14 |         "reference_answers": [],
15 |         "reference_url": "",
16 |         "program_html": [
17 |             {
18 |                 "url": "last",
19 |                 "required_contents": {"must_include": ["What are mammals?"]},
20 |                 "locator": ""
21 |             },
22 |             {
23 |                 "url": "https://www.google.com/",
24 |                 "required_contents": {"must_include": ["Google Search"]},
25 |                 "locator": ""
26 |             }
27 |         ]
28 |     }
29 | }
30 | 


--------------------------------------------------------------------------------
/webarena/tests/test_evaluation_harness/configs/html_content_url_comb.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sites": ["gitlab"],
 3 |     "task_id": 0,
 4 |     "require_login": true,
 5 |     "storage_state": null,
 6 |     "start_url": null,
 7 |     "geolocation": null,
 8 |     "intent_template": "",
 9 |     "instantiation_dict": {},
10 |     "intent": "",
11 |     "require_reset": false,
12 |     "eval": {
13 |         "eval_types": ["program_html", "url_match"],
14 |         "reference_answers": [],
15 |         "reference_url": "https://russmaxdesign.github.io/",
16 |         "url_note": "GOLD in PRED",
17 |         "program_html": [
18 |             {
19 |                 "url": "last",
20 |                 "required_contents": {"must_include": ["Hello World"]},
21 |                 "locator": "document.querySelector('[id=\"form-name\"').value"
22 |             },
23 |             {
24 |                 "url": "last",
25 |                 "required_contents": {"must_include": ["alexisxy@hotmail.com"]},
26 |                 "locator": "document.querySelector('[id=\"form-email\"').value"
27 |             }
28 |         ]
29 |     }
30 | }
31 | 


--------------------------------------------------------------------------------
/webarena/tests/test_evaluation_harness/configs/string_match.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sites": ["reddit"],
 3 |     "task_id": 0,
 4 |     "require_login": true,
 5 |     "storage_state": "./.auth/reddit_state.json",
 6 |     "start_url": null,
 7 |     "geolocation": null,
 8 |     "intent_template": "",
 9 |     "instantiation_dict": {},
10 |     "intent": "",
11 |     "require_reset": false,
12 |     "eval": {
13 |         "eval_types": ["string_match"],
14 |         "reference_answers": {
15 |             "must_include": ["1985/04/18"]
16 |         },
17 |         "reference_url": "",
18 |         "program_html": null
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/webarena/tests/test_evaluation_harness/configs/url_exact_match.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sites": ["reddit"],
 3 |     "task_id": 0,
 4 |     "require_login": true,
 5 |     "storage_state": null,
 6 |     "start_url": null,
 7 |     "geolocation": null,
 8 |     "intent_template": "",
 9 |     "instantiation_dict": {},
10 |     "intent": "",
11 |     "require_reset": false,
12 |     "eval": {
13 |         "eval_types": ["url_match"],
14 |         "reference_answers": [],
15 |         "reference_url": "https://www.google.com/",
16 |         "program_html": [
17 |             {
18 |                 "url": "",
19 |                 "required_contents": []
20 |             }
21 |         ]
22 |     }
23 | }
24 | 


--------------------------------------------------------------------------------
/webarena/tests/test_evaluation_harness/test_helper_functions.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | from pathlib import Path
 4 | 
 5 | from browser_env import ScriptBrowserEnv
 6 | from browser_env.env_config import *
 7 | from evaluation_harness.helper_functions import (
 8 |     gitlab_get_project_memeber_role,
 9 | )
10 | 
11 | HEADLESS = True
12 | config_file_folder = "tests/test_evaluation_harness/configs"
13 | 
14 | 
15 | def test_gitlab_get_project_memeber_role(
16 |     script_browser_env: ScriptBrowserEnv,
17 | ) -> None:
18 |     env = script_browser_env
19 |     config_file = f"{config_file_folder}/tmp_config.json"
20 | 
21 |     with open(config_file, "w") as f:
22 |         json.dump({"storage_state": ".auth/gitlab_state.json"}, f)
23 |     env.reset(options={"config_file": config_file})
24 |     env.page.goto(f"{GITLAB}/primer/design/-/project_members")
25 |     role1 = gitlab_get_project_memeber_role(env.page, "byteblaze")
26 |     assert role1 == "Developer"
27 |     role2 = gitlab_get_project_memeber_role(env.page, "primer")
28 |     assert role2 == "Owner"
29 | 
30 |     # remove tmp config file
31 |     os.remove(config_file)
32 | 


--------------------------------------------------------------------------------