├── src ├── __init__.py └── ai_e2e_tester │ ├── __init__.py │ ├── llm │ ├── __init__.py │ ├── ai_wrapper.py │ └── openai.py │ ├── browser │ ├── __init__.py │ ├── html │ │ ├── __init__.py │ │ ├── visibility │ │ │ ├── __init__.py │ │ │ ├── basic_check.py │ │ │ ├── viewport_check.py │ │ │ ├── visibility_check.py │ │ │ └── occlusion_check.py │ │ └── html_optimizer.py │ ├── guards │ │ ├── __init__.py │ │ └── domain_guard.py │ ├── actions │ │ ├── action_feedback.py │ │ ├── back_action.py │ │ ├── __init__.py │ │ ├── scroll_action.py │ │ ├── element_action.py │ │ ├── wait_action.py │ │ ├── hover_action.py │ │ ├── type_action.py │ │ ├── click_action.py │ │ ├── browser_action.py │ │ └── element_selector.py │ ├── visited_page.py │ ├── session.py │ └── next_step.py │ ├── reporting │ ├── __init__.py │ └── console.py │ ├── __main__.py │ ├── utils.py │ ├── cli.py │ └── agent.py ├── tests └── __init__.py ├── requirements └── base.txt ├── .gitignore ├── CHANGELOG.md ├── ROADMAP.md ├── LICENSE ├── config.yml └── README.md /src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ai_e2e_tester/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ai_e2e_tester/llm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ai_e2e_tester/browser/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ai_e2e_tester/browser/html/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ai_e2e_tester/reporting/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ai_e2e_tester/browser/guards/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ai_e2e_tester/browser/html/visibility/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ai_e2e_tester/__main__.py: -------------------------------------------------------------------------------- 1 | from ai_e2e_tester.cli import main 2 | 3 | if __name__ == "__main__": 4 | main() -------------------------------------------------------------------------------- /src/ai_e2e_tester/llm/ai_wrapper.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | from typing import Dict 3 | 4 | 5 | class AiWrapper(ABC): 6 | def run(self, system_prompt: str, user_prompt: str, screenshot_b64) -> Dict: 7 | pass 8 | -------------------------------------------------------------------------------- /requirements/base.txt: -------------------------------------------------------------------------------- 1 | docopt # Parse command line parameters 2 | playwright # Run a browser and programmatically interact with it 3 | openai # API for Open AI LLM 4 | pyyaml # Read settings from yml file 5 | rich # Write console reports -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | __pycache__/ 3 | *.pyc 4 | *.pyo 5 | *.pyd 6 | 7 | # Virtual environments 8 | env/ 9 | venv/ 10 | .venv/ 11 | 12 | # PyCharm 13 | .idea/ 14 | 15 | # OS files 16 | .DS_Store 17 | 18 | # Test & coverage 19 | .pytest_cache/ 20 | *.cover 21 | .coverage 22 | 23 | # App 24 | reports 25 | debug -------------------------------------------------------------------------------- /src/ai_e2e_tester/browser/actions/action_feedback.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Optional 3 | 4 | 5 | @dataclass 6 | class ActionFeedback: 7 | result: str 8 | is_success: bool = True 9 | state_change: Optional[str] = '' 10 | 11 | def __str__(self): 12 | return self.result 13 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ### 0.0.4 2 | 3 | - Improved feedback reporting 4 | 5 | ### 0.0.3 6 | 7 | - Improved feedback quality and reduced duplicated feedback. 8 | - Optimized HTML passed to the LLM to reduce token usage. 9 | - Better location of elements on the page based on LLM description. 10 | 11 | ### 0.0.2 12 | 13 | - Description of available browser actions for the LLM is now automatically generated from the list of declared action classes. 14 | 15 | ### 0.0.1 16 | First basic version. -------------------------------------------------------------------------------- /src/ai_e2e_tester/utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import yaml 4 | 5 | def load_config(path="config.yaml"): 6 | with open(path, "r", encoding="utf-8") as f: 7 | return yaml.safe_load(f) 8 | 9 | def setup_logging(): 10 | logger = logging.getLogger('ai-e2e-tester') 11 | logger.setLevel(logging.INFO) 12 | handler = logging.StreamHandler() 13 | handler.setFormatter(logging.Formatter('%(name)s - %(levelname)s - %(message)s')) 14 | logger.addHandler(handler) 15 | logger.setLevel(logging.INFO) -------------------------------------------------------------------------------- /src/ai_e2e_tester/browser/html/visibility/basic_check.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | from playwright.sync_api import ElementHandle, FloatRect 4 | 5 | from ai_e2e_tester.browser.html.visibility.visibility_check import VisibilityCheck 6 | 7 | 8 | class BasicVisibilityCheck(VisibilityCheck): 9 | def is_visible(self, el: ElementHandle, box: FloatRect, viewport: Dict, scroll_x: float, scroll_y: float) -> bool: 10 | try: 11 | return el.is_visible() 12 | except Exception: 13 | return False 14 | -------------------------------------------------------------------------------- /src/ai_e2e_tester/browser/actions/back_action.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from ai_e2e_tester.browser.actions.action_feedback import ActionFeedback 4 | from ai_e2e_tester.browser.actions.browser_action import BrowserAction 5 | 6 | logger = logging.getLogger('ai-e2e-tester.browser.actions.back') 7 | 8 | 9 | class BackAction(BrowserAction): 10 | name = 'back' 11 | description = 'Navigate back to the previous page.' 12 | 13 | def run(self, page) -> ActionFeedback: 14 | page.go_back() 15 | page.wait_for_load_state('load') 16 | return ActionFeedback(f"Navigated back to previous page.") 17 | -------------------------------------------------------------------------------- /ROADMAP.md: -------------------------------------------------------------------------------- 1 | # 🚀 Project Roadmap 2 | 3 | This document outlines the planned features, improvements, and major ideas for upcoming releases. 4 | 5 | 6 | ## 🗓️ Upcoming 7 | 8 | ### Fix cyclical behaviors. 9 | The agent sometimes keep repeating the same actions. 10 | 11 | ### Simplify HTML given to the agent 12 | Reduce unnecessary content from the HTML to reduce the number of input tokens. 13 | 14 | ### Improve reports 15 | 16 | - Bug Detection Isn’t Incentivized 17 | - The agent should report a wider range if issues and suggestions. 18 | 19 | ### Improve identification & selection of interactive elements 20 | 21 | - Some elements have the same visible text, which prevents specific selection. Currently, first matching element is selected. 22 | 23 | ### Set Explicit Objective and Goal Tracking 24 | 25 | -------------------------------------------------------------------------------- /src/ai_e2e_tester/browser/actions/__init__.py: -------------------------------------------------------------------------------- 1 | from ai_e2e_tester.browser.actions.back_action import BackAction 2 | from ai_e2e_tester.browser.actions.click_action import ClickAction 3 | from ai_e2e_tester.browser.actions.hover_action import HoverAction 4 | from ai_e2e_tester.browser.actions.scroll_action import ScrollAction 5 | from ai_e2e_tester.browser.actions.type_action import TypeAction 6 | from ai_e2e_tester.browser.actions.wait_action import WaitAction 7 | 8 | # List of all actions the LLM can do on the web page. 9 | ACTION_CLASSES = [ 10 | ClickAction, 11 | TypeAction, 12 | ScrollAction, 13 | BackAction, 14 | WaitAction, 15 | HoverAction 16 | ] 17 | 18 | # Build the registry dict automatically from the class .name attribute 19 | ACTION_REGISTRY = {cls.name: cls for cls in ACTION_CLASSES} 20 | -------------------------------------------------------------------------------- /src/ai_e2e_tester/browser/actions/scroll_action.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from ai_e2e_tester.browser.actions.action_feedback import ActionFeedback 4 | from ai_e2e_tester.browser.actions.browser_action import BrowserAction 5 | 6 | logger = logging.getLogger('ai-e2e-tester.browser.actions.scroll') 7 | 8 | 9 | class ScrollAction(BrowserAction): 10 | """ 11 | Scrolls on the page. 12 | @todo Add scroll_amount param for the llm to specify scroll amount and direction. 13 | """ 14 | name = 'scroll' 15 | description = 'Scroll down a page.' 16 | 17 | def __init__(self, scroll_amount=2000): 18 | self.scroll_amount = scroll_amount 19 | 20 | def run(self, page) -> ActionFeedback: 21 | page.mouse.wheel(0, self.scroll_amount) 22 | return ActionFeedback(f"Scrolled down the page.") 23 | -------------------------------------------------------------------------------- /src/ai_e2e_tester/cli.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI E2E Website Tester 3 | 4 | Usage: 5 | ai_e2e_tester --url= [--config=] [--max-steps=] 6 | 7 | Options: 8 | --url= Landing page of the website to test. 9 | --config= Path to alternative configuration file. 10 | --max-steps= Maximum number of steps the AI can take while exploring the website [default: 5]. 11 | """ 12 | 13 | from docopt import docopt 14 | 15 | from ai_e2e_tester.agent import TestingAgent 16 | from ai_e2e_tester.utils import setup_logging 17 | 18 | 19 | def main(): 20 | setup_logging() 21 | 22 | args = docopt(__doc__) 23 | url = args["--url"] 24 | config_path = args.get("--config") or "config.yml" 25 | max_steps = int(args['--max-steps']) 26 | 27 | agent = TestingAgent(url, config_path) 28 | agent.run(max_steps=max_steps) 29 | -------------------------------------------------------------------------------- /src/ai_e2e_tester/browser/actions/element_action.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from playwright.sync_api import Page, ElementHandle 4 | 5 | from ai_e2e_tester.browser.actions.browser_action import BrowserAction 6 | from ai_e2e_tester.browser.actions.element_selector import ElementSelector 7 | 8 | logger = logging.getLogger('ai-e2e-tester.browser.actions.element') 9 | 10 | 11 | class BrowserElementAction(BrowserAction): 12 | """ 13 | Defines an action on a specific element on the webpage (button, input, etc.) 14 | """ 15 | 16 | def __init__(self, target_text: str): 17 | self.element_selector = ElementSelector() 18 | self.target_text = target_text 19 | 20 | def get_element(self, page: Page) -> ElementHandle | None: 21 | return self.element_selector.get_element(self.target_text, page) 22 | 23 | def __str__(self): 24 | return f'Browser Action on target "{self.target_text}"' -------------------------------------------------------------------------------- /src/ai_e2e_tester/browser/actions/wait_action.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | 4 | from ai_e2e_tester.browser.actions.action_feedback import ActionFeedback 5 | from ai_e2e_tester.browser.actions.browser_action import BrowserAction 6 | 7 | logger = logging.getLogger('ai-e2e-tester.browser.actions.wait') 8 | 9 | 10 | class WaitAction(BrowserAction): 11 | name = 'wait' 12 | description = 'Wait until the website has finished loading the data. Use this if you believe the website is not ready yet to be interacted with.' 13 | input_fields = { 14 | "wait_time_sec": "How long to wait in seconds." 15 | } 16 | 17 | def __init__(self, wait_time_sec: str = "5"): 18 | self.wait_time_ms = wait_time_sec 19 | 20 | def run(self, page) -> ActionFeedback: 21 | logger.info(f"→ Waiting {self.wait_time_ms}sec for the page to be ready") 22 | time.sleep(int(self.wait_time_ms)) 23 | return ActionFeedback(f"Waited for {self.wait_time_ms}sec.") 24 | -------------------------------------------------------------------------------- /src/ai_e2e_tester/browser/actions/hover_action.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from ai_e2e_tester.browser.actions.action_feedback import ActionFeedback 4 | from ai_e2e_tester.browser.actions.element_action import BrowserElementAction 5 | 6 | logger = logging.getLogger('ai-e2e-tester.browser.actions.hover') 7 | 8 | 9 | class HoverAction(BrowserElementAction): 10 | name = 'hover' 11 | description = 'Move the mouse cursor over a button, link, or interactive element (for example, to reveal tooltips or menus).' 12 | input_fields = { 13 | "target_text": "Use the element's exact `id` if present; otherwise, use the exact visible text on the element you want to hover over." 14 | } 15 | 16 | def run(self, page) -> ActionFeedback: 17 | el = self.get_element(page) 18 | if not el: 19 | return ActionFeedback(f'Could not find element to hover: "{self.target_text}"', False) 20 | 21 | el.hover() 22 | return ActionFeedback(f"Hovered over {self.target_text}") 23 | -------------------------------------------------------------------------------- /src/ai_e2e_tester/browser/html/visibility/viewport_check.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Dict 3 | 4 | from playwright.sync_api import ElementHandle, FloatRect 5 | 6 | from ai_e2e_tester.browser.html.visibility.visibility_check import VisibilityCheck 7 | 8 | logger = logging.getLogger('ai-e2e-tester.browser.html.optimizer.viewport') 9 | 10 | 11 | class ViewportIntersectionCheck(VisibilityCheck): 12 | 13 | def is_visible(self, el: ElementHandle, box: FloatRect, viewport: Dict, scroll_x: float, scroll_y: float) -> bool: 14 | """Check if element's bounding box intersects with the viewport.""" 15 | 16 | # Element box 17 | x, y, w, h = box["x"], box["y"], box["width"], box["height"] 18 | 19 | # Viewport 20 | vp_w, vp_h = viewport["width"], viewport["height"] 21 | 22 | # Return False if the element is completely outside the viewport in any direction 23 | if (x + w) <= 0 or x >= vp_w or (y + h) <= 0 or y >= vp_h: 24 | return False 25 | 26 | return True 27 | -------------------------------------------------------------------------------- /src/ai_e2e_tester/browser/html/visibility/visibility_check.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Dict 3 | 4 | from playwright.sync_api import ElementHandle, FloatRect 5 | 6 | 7 | class VisibilityCheck(ABC): 8 | """ 9 | Base class for all visibility checks. 10 | A check inspects (el, page, context) and returns (ok: bool, new_context: dict). 11 | """ 12 | 13 | @abstractmethod 14 | def is_visible(self, el: ElementHandle, box: FloatRect, viewport: Dict, scroll_x: float, scroll_y: float) -> bool: 15 | """ 16 | 17 | :param el: (ElementHandle): The Playwright handle for the element to check. 18 | :param box: (FloatRect): The element's bounding box, with 'x', 'y', 'width', 'height' as keys. 19 | :param viewport: 20 | :param scroll_x: (float): The current horizontal scroll offset of the page (window.scrollX) 21 | :param scroll_y: (float): The current vertical scroll offset of the page (window.scrollY). 22 | 23 | :return: True if the element is visible and can be interacted with. 24 | """ 25 | pass 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Alexis 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /src/ai_e2e_tester/browser/actions/type_action.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from ai_e2e_tester.browser.actions.action_feedback import ActionFeedback 4 | from ai_e2e_tester.browser.actions.element_action import BrowserElementAction 5 | 6 | logger = logging.getLogger('ai-e2e-tester.browser.actions.type') 7 | 8 | 9 | class TypeAction(BrowserElementAction): 10 | name = 'type' 11 | description = 'Type some text into an input box.' 12 | input_fields = { 13 | "target_text": "Use the input's exact `id` value if it has one. If there is no `id`, use the exact visible label, placeholder, or aria-label for the input field.", 14 | "value": "The text you want to type into the input box." 15 | } 16 | 17 | def __init__(self, target_text: str, value: str = None): 18 | super().__init__(target_text) 19 | self.value = value 20 | 21 | def run(self, page) -> ActionFeedback: 22 | el = self.get_element(page) 23 | 24 | if not el: 25 | return ActionFeedback(f'Could not type. Could not find input for "{self.target_text}"', False) 26 | 27 | el.type(self.value) 28 | return ActionFeedback(f'Typed "{self.value}" into field "{self.target_text}"') 29 | -------------------------------------------------------------------------------- /src/ai_e2e_tester/reporting/console.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict 2 | 3 | from rich.console import Console 4 | 5 | from ai_e2e_tester.browser.visited_page import VisitedPage 6 | 7 | 8 | class ConsoleReporter: 9 | 10 | def __init__(self): 11 | self.console = Console() 12 | 13 | def print_feedback_item(self, item: Dict[str, str]): 14 | self.console.print(f"[bold cyan]{item['category']}[/] [bold]{item['name']}[/]") 15 | self.console.print(f"[italic]{item['where']}[/]") 16 | self.console.print(f"[italic]{item['evidence']}[/]") 17 | self.console.print(f"{item['details']}\n") 18 | 19 | def print_report(self, grouped_visits: Dict[str, List[VisitedPage]]): 20 | """ 21 | Prints a summary report of all visited pages. 22 | """ 23 | for page_url, visited_pages in grouped_visits.items(): 24 | self.console.print(f"\n[underline bold]Page:[/] {page_url}") 25 | seen = set() 26 | for visited_page in visited_pages: 27 | for item in visited_page.feedback: 28 | signature = item.get("details") 29 | if signature not in seen: 30 | seen.add(signature) 31 | self.print_feedback_item(item) 32 | 33 | self.console.print("---\n", style="dim") 34 | -------------------------------------------------------------------------------- /src/ai_e2e_tester/browser/guards/domain_guard.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from ai_e2e_tester.browser.session import BrowserSession 4 | from ai_e2e_tester.browser.visited_page import VisitedPage 5 | 6 | logger = logging.getLogger('ai-e2e-tester.browser.guards.domain') 7 | 8 | 9 | class DomainGuardException(Exception): 10 | pass 11 | 12 | 13 | def ensure_stay_on_domain(session: BrowserSession, page: VisitedPage): 14 | """ 15 | If AI navigated out of the starting domain, we go back. 16 | raises Exception if could not return back to original domain. 17 | """ 18 | main_domain = session.get_start_domain() 19 | curr_domain = session.get_current_domain() 20 | if curr_domain != main_domain: 21 | external_url = session.url 22 | logger.info(f"External URL: {external_url}). Going back to website.") 23 | try: 24 | session.go_back() 25 | curr_domain = session.get_current_domain() 26 | if curr_domain != main_domain: 27 | raise DomainGuardException("Still not on main domain after going back.") 28 | else: 29 | page.next_step.update_action_state_change( 30 | f"Visited page {external_url} on external domain. It is not part of testing plan. Returned back to previous page.") 31 | except Exception as e: 32 | raise DomainGuardException("Error going back in browser history:", e) 33 | -------------------------------------------------------------------------------- /src/ai_e2e_tester/browser/actions/click_action.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from playwright.sync_api import Page, ElementHandle 4 | 5 | from ai_e2e_tester.browser.actions.action_feedback import ActionFeedback 6 | from ai_e2e_tester.browser.actions.element_action import BrowserElementAction 7 | 8 | logger = logging.getLogger('ai-e2e-tester.browser.actions.click') 9 | 10 | 11 | class ClickAction(BrowserElementAction): 12 | """ 13 | Click on buttons and links. 14 | """ 15 | name = 'click' 16 | description = 'Click on a button or clickable element.' 17 | input_fields = { 18 | "target_text": "Use the clickable element's exact `id` value if it has one. If there is no `id`, use exact visible text shown on the button, link, or element you want to interact with." 19 | } 20 | 21 | def __init__(self, target_text: str): 22 | super().__init__(target_text) 23 | 24 | def run(self, page: Page) -> ActionFeedback: 25 | el = self.get_element(page) 26 | 27 | if not el: 28 | return ActionFeedback(f'Could not click. Could not find element with text "{self.target_text}"', False) 29 | 30 | self._force_same_tab_open(page, el) 31 | 32 | el.click() 33 | page.wait_for_load_state('load') 34 | return ActionFeedback(f'Clicked on "{self.target_text}"') 35 | 36 | @classmethod 37 | def _force_same_tab_open(cls, page: Page, el: ElementHandle): 38 | page.evaluate(""" 39 | el => { if (el.tagName && el.tagName.toLowerCase() === 'a') el.removeAttribute('target'); } 40 | """, el) 41 | -------------------------------------------------------------------------------- /src/ai_e2e_tester/browser/actions/browser_action.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from abc import ABC 3 | from typing import Dict 4 | 5 | from playwright.sync_api import Page 6 | 7 | from ai_e2e_tester.browser.actions.action_feedback import ActionFeedback 8 | 9 | logger = logging.getLogger('ai-e2e-tester.browser.actions') 10 | 11 | 12 | class BrowserAction(ABC): 13 | # Name by which the LLM should call this action. 14 | name: str = None 15 | 16 | # The description will explain to the LLM when it should use this action. 17 | description: str = None 18 | 19 | # Describing to the LLM which input fields this action needs and how to format them. 20 | input_fields: Dict[str, str] = {} 21 | 22 | def run(self, page: Page) -> ActionFeedback: 23 | """ 24 | Runs an action in the browser. 25 | :param page: Current page in the browser. 26 | :return: Description of the result of this action, in natural language. Will be given to LLM as feedback. 27 | """ 28 | pass 29 | 30 | @classmethod 31 | def describe_for_llm(cls): 32 | """ 33 | Returns a string describing the action and its 'params' fields for the LLM. 34 | Example output: 35 | 36 | - "click": Click on a button or clickable element. 37 | params: 38 | - "target_text": The element's id or visible text. 39 | """ 40 | doc = f'- "{cls.name}": {cls.description}\n' 41 | if cls.input_fields: 42 | doc += " params:\n" 43 | for k, desc in cls.input_fields.items(): 44 | doc += f' - "{k}": {desc}\n' 45 | return doc 46 | 47 | def __str__(self): 48 | return 'Browser Action' 49 | -------------------------------------------------------------------------------- /src/ai_e2e_tester/browser/actions/element_selector.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from playwright.sync_api import ElementHandle, Page 4 | 5 | logger = logging.getLogger('ai-e2e-tester.browser.selector') 6 | 7 | 8 | class ElementSelector: 9 | """ 10 | Helper to return an element from the HTML based on the LLM description. 11 | """ 12 | 13 | def __init__(self): 14 | self.selector_strategies = [ 15 | lambda t: f'#{t}', # By id 16 | lambda t: f'text="{t}"', # By visible text 17 | lambda t: f'[placeholder="{t}"]', # By exact placeholder 18 | lambda t: f'input[placeholder*="{t.split()[0]}"]', # Fallback: partial match 19 | lambda t: f'[data-title="{t}"]', # Fallback: data-title attribute 20 | lambda t: f'.{t}', # Fallback: by class name 21 | ] 22 | 23 | def get_element(self, target_text: str, page: Page) -> ElementHandle | None: 24 | """ 25 | Attempts to locate the element to interact with, based on LLM suggestions. 26 | Uses multiple selector strategies. Logs a warning if multiple elements are found. 27 | Returns the first matching element, or None. 28 | @todo The LLM needs to return a more specific element selector if ID is not available. 29 | @todo If multiple matching elements, prioritize elements most likely to be interactive. 30 | """ 31 | 32 | for make_selector in self.selector_strategies: 33 | selector = make_selector(target_text) 34 | try: 35 | elements = page.query_selector_all(selector) 36 | if elements: 37 | if len(elements) > 1: 38 | logger.warning(f"Multiple elements found with selector '{selector}'; using the first one.") 39 | return elements[0] 40 | except Exception as e: 41 | logger.debug(f"Selector '{selector}' failed: {e}") 42 | 43 | return None 44 | -------------------------------------------------------------------------------- /src/ai_e2e_tester/llm/openai.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | from typing import Dict 5 | 6 | import openai 7 | 8 | from ai_e2e_tester.llm.ai_wrapper import AiWrapper 9 | 10 | logger = logging.getLogger('ai-e2e-tester.llm') 11 | 12 | 13 | class OpenAiWrapper(AiWrapper): 14 | """ 15 | A wrapper to call the LLM. 16 | Currently only OpenAI is supported. 17 | """ 18 | 19 | def __init__(self, prompts: Dict[str, str], max_tokens=800): 20 | self.max_tokens = max_tokens 21 | self.prompts = prompts 22 | 23 | api_key = os.environ.get("OPENAI_API_KEY") 24 | if not api_key: 25 | raise Exception("No API key found. Please set the OPENAI_API_KEY environment variable.") 26 | 27 | self.client = openai.OpenAI(api_key=api_key) 28 | 29 | def run(self, system_prompt: str, user_prompt: str, screenshot_b64) -> Dict: 30 | 31 | response = self.client.chat.completions.create( 32 | model="gpt-4o", 33 | messages=[ 34 | {"role": "system", "content": system_prompt}, 35 | { 36 | "role": "user", 37 | "content": [ 38 | {"type": "text", "text": user_prompt}, 39 | {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{screenshot_b64}"}} 40 | ] 41 | } 42 | ], 43 | max_tokens=self.max_tokens, 44 | temperature=0.2, 45 | ) 46 | 47 | txt = response.choices[0].message.content.strip() 48 | 49 | try: 50 | result = json.loads(txt) 51 | except Exception: 52 | logger.warning(f"Failed to parse response as JSON: {txt}") 53 | result = { 54 | "next_step": {"action": "done"}, 55 | "reason": "Parsing error of JSON output from AI.", 56 | "bugs": [], 57 | "suggestions": [] 58 | } 59 | return result 60 | -------------------------------------------------------------------------------- /src/ai_e2e_tester/browser/visited_page.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List, Dict 3 | 4 | from ai_e2e_tester.browser.next_step import NextStep 5 | from ai_e2e_tester.browser.session import BrowserSession 6 | 7 | 8 | @dataclass 9 | class VisitedPage: 10 | """ 11 | Stores information about a page the AI tester visited on the tested website. 12 | It is used to write reports at the end. 13 | """ 14 | page_url: str 15 | summary: str # Summary of current page 16 | context: str # Summary of previous visited pages, actions taken by the agent, and their result 17 | expectations_vs_reality: str 18 | feedback: List[Dict[str, str]] 19 | next_step: NextStep 20 | 21 | def run_next_step(self, browser_session: BrowserSession): 22 | self.next_step.run(page=browser_session.page) 23 | 24 | def has_next_step(self): 25 | return self.next_step.browser_action is not None 26 | 27 | def get_llm_condensed_feedback(self): 28 | return ', '.join([feedback['details'] for feedback in self.feedback]) 29 | 30 | def get_llm_visit_summary(self) -> str: 31 | """ 32 | Returns a combination of visit context (summary of previous actions) and current action. 33 | :return: 34 | """ 35 | return '\n'.join(item for item in [self.context, "LAST ACTION:", str(self.get_visit_summary())] if item) 36 | 37 | def get_visit_summary(self) -> Dict: 38 | return { 39 | "page_url": self.page_url, 40 | "observation": self.get_llm_condensed_feedback(), 41 | "expectations_vs_reality": self.expectations_vs_reality, 42 | "action_taken": self.next_step.get_llm_step_summary() 43 | } 44 | 45 | @classmethod 46 | def from_json(cls, page, result: Dict) -> "VisitedPage": 47 | return VisitedPage( 48 | page_url=page.url, 49 | summary=result.get("summary"), 50 | context=result.get("context"), 51 | expectations_vs_reality=result.get("expected_vs_actual"), 52 | next_step=NextStep.from_json(result.get("next_step")), 53 | feedback=result.get("feedback", []), 54 | ) 55 | -------------------------------------------------------------------------------- /config.yml: -------------------------------------------------------------------------------- 1 | prompts: 2 | system: | 3 | You are an AI website tester acting like a real user. 4 | 5 | Rules: 6 | - Don’t repeat an action that failed or had no visible effect. 7 | - Don’t re-open external links or the same identical element twice. 8 | - Base claims only on what’s in the provided HTML, screenshot, and console logs. 9 | - Prefer facts over opinions. Be specific and actionable. 10 | - If nothing useful remains, set next_step.action = "done". 11 | 12 | Output **ONLY** the JSON, and do **NOT** include markdown, code block markers, or explanations. 13 | 14 | user: | 15 | Page URL: {page_url} 16 | 17 | Visible HTML: 18 | """ 19 | {page_html} 20 | """ 21 | 22 | Screenshot: (image provided) 23 | 24 | Console logs: 25 | """ 26 | {console_logs} 27 | """ 28 | 29 | A summary of what you have seen and done on this website before, for CONTEXT: 30 | """ 31 | {context} 32 | """ 33 | 34 | Task: 35 | 1) Say whether the current page matches what you expected from the last action (brief). 36 | 2) Infer the current user goal (concise). 37 | 3) Propose ONE next action to progress that goal. 38 | 4) Give precise, evidence-based feedback. Do not speculate. Point to specific elements. 39 | 40 | YOUR OUTPUT: 41 | 42 | Output **ONLY** the JSON, and do **NOT** include markdown, code block markers, or explanations. 43 | 44 | The JSON has following fields: 45 | 46 | - "goal" : A concise statement of the user's likely intent on this page. 47 | - "expected_vs_actual" : Short statement on whether the page matched expectations (and why/why not). 48 | - "summary" : Briefly describe what is visible or prominent on the page. 49 | - "context": In a paragraph summarize all your previous experience with this website, including observations, 50 | actions you took and their results. 51 | - "next_step" : Suggest ONE next action a user might take. This field has following subfields: 52 | - "reason" : A short phrase (max 15 words) explaining why this action helps the user. 53 | - "action" : It can only have one of following values: 54 | 55 | {available_actions} 56 | 57 | - "feedback" : A list of feedback objects. Each item must include: 58 | 59 | - "name": string, 60 | - "category": one of ["functional","technical","accessibility","ui_ux"], 61 | - "where": string, // selector/id/role/text near it 62 | - "evidence": string, // short HTML/snippet or console line 63 | - "details": string // Detailed explanation of the issues and suggestion how to fix it 64 | -------------------------------------------------------------------------------- /src/ai_e2e_tester/browser/html/visibility/occlusion_check.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Dict 3 | 4 | from playwright.sync_api import ElementHandle 5 | 6 | from ai_e2e_tester.browser.html.visibility.visibility_check import VisibilityCheck 7 | 8 | logger = logging.getLogger('ai-e2e-tester.browser.html.optimizer.occlusion') 9 | 10 | 11 | class OcclusionCheck(VisibilityCheck): 12 | """ 13 | Checks if an element is visually unobstructed. 14 | """ 15 | 16 | def is_visible(self, el: ElementHandle, box: dict, viewport: Dict, scroll_x: float, scroll_y: float) -> bool: 17 | center_x, center_y = self._get_visible_center(box, viewport, scroll_x, scroll_y) 18 | 19 | is_clickable = el.evaluate( 20 | """ 21 | (el, center) => { 22 | const [x, y] = center; 23 | const clientX = x - window.scrollX; 24 | const clientY = y - window.scrollY; 25 | const top = document.elementFromPoint(clientX, clientY); 26 | // Accept exact match or child (contained) match 27 | return top === el || (top && el.contains(top)); 28 | } 29 | """, 30 | [center_x, center_y] 31 | ) 32 | 33 | # if not is_clickable: 34 | # logger.info(f'Element did not pass occlusion check: {el}') 35 | 36 | return is_clickable 37 | 38 | @classmethod 39 | def _get_visible_center(cls, box, viewport: Dict, scroll_x: float, scroll_y: float): 40 | """ 41 | Returns the center of the intersection between the element's bounding box and the viewport. 42 | If there is no intersection (element fully offscreen), returns the geometric center of the box. 43 | """ 44 | 45 | # Bounding box in page coordinates 46 | left = box["x"] 47 | top = box["y"] 48 | right = left + box["width"] 49 | bottom = top + box["height"] 50 | 51 | # Viewport in page coordinates 52 | vp_left = scroll_x 53 | vp_top = scroll_y 54 | vp_right = vp_left + viewport["width"] 55 | vp_bottom = vp_top + viewport["height"] 56 | 57 | # Intersection rectangle 58 | vis_left = max(left, vp_left) 59 | vis_top = max(top, vp_top) 60 | vis_right = min(right, vp_right) 61 | vis_bottom = min(bottom, vp_bottom) 62 | 63 | # If no intersection, fallback to geometric center 64 | if vis_right <= vis_left or vis_bottom <= vis_top: 65 | return (left + right) / 2, (top + bottom) / 2 66 | 67 | # Center of intersection 68 | vis_center_x = (vis_left + vis_right) / 2 69 | vis_center_y = (vis_top + vis_bottom) / 2 70 | return vis_center_x, vis_center_y 71 | -------------------------------------------------------------------------------- /src/ai_e2e_tester/browser/session.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import logging 3 | from urllib.parse import urlparse 4 | 5 | from playwright.sync_api import sync_playwright 6 | 7 | from ai_e2e_tester.browser.html.html_optimizer import HtmlOptimizer 8 | 9 | logger = logging.getLogger('ai-e2e-tester.browser') 10 | 11 | 12 | class BrowserSession: 13 | """ 14 | Encapsulates a Playwright browser session for automated web testing. 15 | 16 | This class manages browser startup/shutdown, page navigation, page text/screenshot extraction, 17 | and simple navigation actions, making it easier to interact with a browser in a reusable way. 18 | """ 19 | 20 | def __init__(self, start_url: str, headless=True): 21 | self.playwright = sync_playwright().start() 22 | self.browser = self.playwright.chromium.launch(headless=headless) 23 | self.page = self.browser.new_page() 24 | 25 | self.console_messages = [] 26 | self._attach_console_logging() 27 | 28 | self.html_optimizer = HtmlOptimizer() 29 | 30 | self.start_url = start_url 31 | if self.start_url: 32 | self.goto_url(self.start_url) 33 | 34 | def goto_url(self, url): 35 | self.page.goto(url) 36 | self.page.wait_for_load_state('networkidle') 37 | 38 | def go_back(self): 39 | self.page.go_back() 40 | self.page.wait_for_load_state('networkidle') 41 | 42 | def get_page_text(self) -> str: 43 | return self.page.evaluate("() => document.body.innerText") 44 | 45 | def get_page_html(self) -> str: 46 | return self.page.content() 47 | 48 | def get_optimized_html(self) -> str: 49 | return self.html_optimizer.get_optimized_html(self.page) 50 | 51 | def get_screenshot(self, path): 52 | screenshot_bytes = self.page.screenshot(path=path) 53 | return base64.b64encode(screenshot_bytes).decode('utf-8') 54 | 55 | def get_console_messages(self): 56 | """ 57 | Return warnings/errors from console. 58 | """ 59 | return list(self.console_messages) 60 | 61 | def clear_console_messages(self): 62 | self.console_messages = [] 63 | 64 | def _attach_console_logging(self): 65 | def on_console_message(msg): 66 | if msg.type in ("warning", "error"): 67 | self.console_messages.append({"type": msg.type, "text": msg.text}) 68 | 69 | self.page.on("console", on_console_message) 70 | 71 | def close(self): 72 | self.browser.close() 73 | self.playwright.stop() 74 | 75 | @property 76 | def url(self): 77 | return self.page.url 78 | 79 | def get_current_domain(self): 80 | return self.get_domain(self.url) 81 | 82 | def get_start_domain(self): 83 | return self.get_domain(self.start_url) 84 | 85 | @classmethod 86 | def get_domain(cls, url): 87 | return urlparse(url).netloc.lower() 88 | 89 | def __enter__(self): 90 | return self 91 | 92 | def __exit__(self, exc_type, exc_val, exc_tb): 93 | self.close() 94 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🤖 AI E2E Website Tester 2 | 3 | Skip the test scripts. Just point the AI E2E Website Tester at your website, and watch it explore it. 4 | 5 | The AI acts like a real human visitor, clicking, scrolling, and reading your website. As it goes, it automatically takes notes on bugs, UX issues, and proposes suggestions. 6 | 7 | This tool is an early prototype. 8 | 9 | Feedback and ideas are very welcome! 10 | 11 | --- 12 | 13 | ## Example run 14 | 15 | ```console 16 | ai-e2e-tester.agent - INFO - Test started. Max steps: 6 17 | ai-e2e-tester.agent - INFO - [Step 1] On Page: https://pi-match.web.app/ 18 | ai-e2e-tester.agent - INFO - Current goal: Find a suitable PI and lab based on location and interests. 19 | ai-e2e-tester.browser.next_step - INFO - Reasoning for Next Action: To initiate a search for PIs and labs. 20 | ai-e2e-tester.agent - INFO - Typed "Melbourne" into field "affiliation_input" → Page content updated. 21 | ai-e2e-tester.agent - INFO - [Step 2] On Page: https://pi-match.web.app/ 22 | ai-e2e-tester.agent - INFO - Current goal: Find a suitable PI and lab based on location and interests. 23 | ai-e2e-tester.browser.next_step - INFO - Reasoning for Next Action: To attempt a search with interests included. 24 | ai-e2e-tester.agent - INFO - Typed "epigenetics" into field "What I like" → Page content updated. 25 | ai-e2e-tester.agent - INFO - [Step 3] On Page: https://pi-match.web.app/ 26 | ai-e2e-tester.agent - INFO - Current goal: Find a suitable PI and lab based on location and interests. 27 | ai-e2e-tester.browser.next_step - INFO - Reasoning for Next Action: To initiate the search based on entered criteria. 28 | ai-e2e-tester.agent - INFO - Clicked on "Search" → Navigated to new URL. 29 | ai-e2e-tester.agent - INFO - [Step 4] On Page: https://pi-match.web.app/labs?location=Melbourne&keywords=epigenetics 30 | ai-e2e-tester.agent - INFO - Current goal: Find a suitable PI and lab in Melbourne related to epigenetics. 31 | ai-e2e-tester.browser.next_step - INFO - Reasoning for Next Action: The page is still loading; waiting may allow results to appear. 32 | ai-e2e-tester.browser.actions.wait - INFO - → Waiting 20sec for the page to be ready 33 | ai-e2e-tester.agent - INFO - Waited for 20sec. → Page content updated. 34 | ai-e2e-tester.agent - INFO - [Step 5] On Page: https://pi-match.web.app/labs?location=Melbourne&keywords=epigenetics 35 | ai-e2e-tester.agent - INFO - Current goal: Find labs related to epigenetics in Melbourne. 36 | ai-e2e-tester.browser.next_step - INFO - Reasoning for Next Action: Explore more details about a specific lab. 37 | 38 | ``` 39 | 40 | 41 | --- 42 | 43 | ## 🛠️ Installation 44 | 45 | Clone this repo and install dependencies: 46 | 47 | ```bash 48 | git clone https://github.com/BitsOfAdventures/ai-e2e-tester.git 49 | cd ai-e2e-tester 50 | 51 | pip install -r requirements/base.txt 52 | playwright install 53 | ```` 54 | 55 | 56 | ## 🚀 Quick Start 57 | 58 | Set your API key as an environment variable: 59 | 60 | ```bash 61 | export OPENAI_API_KEY=sk-... # On macOS/Linux 62 | ``` 63 | 64 | Then run the tool from the project root: 65 | 66 | ```bash 67 | python -m src.ai_e2e_tester --url=https://example.com 68 | ``` 69 | 70 | 71 | ## 💬 Feedback Wanted 72 | This is an early prototype. If you try it, please open an issue or suggest improvements, any feedback is appreciated! 73 | 74 | ## Currently Supported Actions 75 | Here is what the agent can currently do on the tested website: 76 | 77 | - Click on links, buttons, etc.. 78 | - Type text into input fields 79 | - Navigate to the previous page 80 | - Scroll down 81 | - Wait while the page is loading data from the server -------------------------------------------------------------------------------- /src/ai_e2e_tester/browser/next_step.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import logging 3 | from typing import Dict 4 | 5 | from ai_e2e_tester.browser.actions import ACTION_REGISTRY 6 | from ai_e2e_tester.browser.actions.action_feedback import ActionFeedback 7 | from ai_e2e_tester.browser.actions.browser_action import BrowserAction 8 | 9 | logger = logging.getLogger('ai-e2e-tester.browser.next_step') 10 | 11 | 12 | class NextStep: 13 | reason: str 14 | browser_action: BrowserAction | None = None 15 | action_feedback: ActionFeedback 16 | 17 | def __init__(self, data: Dict): 18 | self.reason = data.get('reason') 19 | self.browser_action = self._get_action(data) 20 | 21 | @classmethod 22 | def get_state_snapshot(cls, page): 23 | return { 24 | "url": page.url, 25 | "content": page.content(), 26 | "screenshot": base64.b64encode(page.screenshot(full_page=False)).decode("utf-8") 27 | } 28 | 29 | @classmethod 30 | def compare_state(cls, before, after): 31 | if after["url"] != before["url"]: 32 | return "Navigated to new URL." 33 | elif after["content"] != before["content"]: 34 | return "Page content updated." 35 | elif after["screenshot"] != before["screenshot"]: 36 | return "Viewport content updated (e.g. scrolled or animated)." 37 | else: 38 | return "No visible change detected." 39 | 40 | def run(self, page): 41 | logger.info(f"Reasoning for Next Action: {self.reason}") 42 | try: 43 | before = self.get_state_snapshot(page) 44 | self.action_feedback = self.browser_action.run(page=page) 45 | after = self.get_state_snapshot(page) 46 | self.action_feedback.state_change = self.compare_state(before, after) 47 | except Exception as e: 48 | logger.error(f'Could not execute browser action {self.browser_action}: {e}') 49 | self.action_feedback = ActionFeedback( 50 | is_success=False, 51 | result=f'Could not execute browser action {self.browser_action}' 52 | ) 53 | 54 | def get_feedback_summary(self) -> str: 55 | return f"{self.action_feedback.result} → {self.action_feedback.state_change}" 56 | 57 | def update_action_state_change(self, state_change: str): 58 | self.action_feedback.state_change = state_change 59 | 60 | def get_llm_step_summary(self) -> str: 61 | if self.browser_action: 62 | return f"{self.action_feedback.result} → {self.action_feedback.state_change}" 63 | return "There was no more actions to do." 64 | 65 | @classmethod 66 | def _get_action(cls, next_step: Dict) -> BrowserAction | None: 67 | """ 68 | Instantiates the action object. 69 | :param next_step: 70 | :return: 71 | """ 72 | action_type = next_step.get("action") 73 | action_class = ACTION_REGISTRY.get(action_type) 74 | 75 | if action_type == 'done': 76 | logger.info("The LLM has decided that there is nothing more to do.") 77 | return None 78 | 79 | if not action_class: 80 | logger.warning(f"Unknown action type:{action_type}") 81 | return None 82 | return action_class(**{k: v for k, v in next_step.get('params', {}).items()}) 83 | 84 | @classmethod 85 | def from_json(cls, data: Dict): 86 | """ 87 | 88 | :param data: Ex: {"action": "click", "params":{"target_text": "Get Started"}, "reason":"..."} 89 | :return: 90 | """ 91 | return NextStep(data=data) 92 | -------------------------------------------------------------------------------- /src/ai_e2e_tester/agent.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | import time 5 | from collections import defaultdict 6 | from typing import List, Dict 7 | from urllib.parse import urlparse, urlunparse 8 | 9 | from ai_e2e_tester.browser.actions import ACTION_REGISTRY 10 | from ai_e2e_tester.browser.guards.domain_guard import ensure_stay_on_domain 11 | from ai_e2e_tester.browser.session import BrowserSession 12 | from ai_e2e_tester.browser.visited_page import VisitedPage 13 | from ai_e2e_tester.llm.openai import OpenAiWrapper 14 | from ai_e2e_tester.reporting.console import ConsoleReporter 15 | from ai_e2e_tester.utils import load_config 16 | 17 | logger = logging.getLogger('ai-e2e-tester.agent') 18 | 19 | 20 | class TestingAgent: 21 | """ 22 | The TestingAgent is an intermediary allowing the LLM to use the web browser. 23 | """ 24 | 25 | def __init__(self, url, config_path): 26 | 27 | self.url = url 28 | self.config = load_config(config_path) 29 | self.llm = OpenAiWrapper(prompts=self.config['prompts']) 30 | 31 | self.reporter = ConsoleReporter() 32 | 33 | self.visited_pages: List[VisitedPage] = [] 34 | 35 | self.wait_between_steps = 0.5 36 | 37 | def run(self, max_steps: int): 38 | logger.info(f"Test started. Max steps: {max_steps}") 39 | browser_session = BrowserSession(self.url, headless=True) 40 | 41 | for step_idx in range(max_steps): 42 | 43 | logger.info(f'[Step {step_idx + 1}] On Page: {browser_session.url}') 44 | 45 | user_prompt = self._get_user_prompt(browser_session) 46 | system_prompt = self._get_system_prompt() 47 | screenshot_b64 = browser_session.get_screenshot(path=f"reports/screenshot_{step_idx + 1}.png") 48 | 49 | self._save_report(f"prompt-{step_idx + 1}.txt", user_prompt) 50 | 51 | self.last_result = self.llm.run(system_prompt, user_prompt, screenshot_b64) 52 | 53 | self._save_report(f"result-{step_idx + 1}.json", self.last_result) 54 | logger.info(f'Current goal: {self.last_result.get("goal")}') 55 | 56 | visited_page = VisitedPage.from_json(browser_session.page, self.last_result) 57 | self.visited_pages.append(visited_page) 58 | 59 | if visited_page.has_next_step(): 60 | browser_session.clear_console_messages() 61 | visited_page.run_next_step(browser_session) 62 | ensure_stay_on_domain(browser_session, visited_page) 63 | logger.info(visited_page.next_step.get_feedback_summary()) 64 | else: 65 | logger.info("The LLM has decided that there is nothing more to do.") 66 | break 67 | 68 | time.sleep(self.wait_between_steps) 69 | 70 | browser_session.close() 71 | logger.info("Test finished.") 72 | grouped_visits = self._get_grouped_visits() 73 | self.reporter.print_report(grouped_visits) 74 | 75 | def _get_grouped_visits(self) -> Dict[str, List[VisitedPage]]: 76 | """ 77 | Groups all visits from the same URL together. 78 | Ignores anchors. 79 | :return: 80 | """ 81 | grouped_visits = defaultdict(list) 82 | for page in self.visited_pages: 83 | parts = urlparse(page.page_url) 84 | normalized = parts._replace(query='', fragment='') 85 | clean_url = urlunparse(normalized) 86 | grouped_visits[clean_url].append(page) 87 | return grouped_visits 88 | 89 | def _generate_llm_context(self) -> str: 90 | if self.visited_pages: 91 | return self.visited_pages[-1].get_llm_visit_summary() 92 | return 'No context yet, this is the first visit to this website.' 93 | 94 | @classmethod 95 | def _generate_llm_available_actions(cls) -> str: 96 | """ 97 | Explains to the LLM which actions it can do on the webpage. 98 | :return: 99 | """ 100 | return "\n".join( 101 | action_cls.describe_for_llm() for action_cls in ACTION_REGISTRY.values() 102 | ) 103 | 104 | @classmethod 105 | def _save_report(cls, name: str, content: str | Dict, reports_folder='debug'): 106 | os.makedirs(reports_folder, exist_ok=True) 107 | 108 | if isinstance(content, dict): 109 | content = json.dumps(content, indent=2) 110 | 111 | with open(f"{reports_folder}/{name}", "w", encoding="utf-8") as f: 112 | f.write(content) 113 | 114 | def _get_user_prompt(self, browser_session: BrowserSession) -> str: 115 | return self.config['prompts']['user'].format( 116 | page_url=browser_session.url, 117 | page_html=browser_session.get_optimized_html(), 118 | console_logs=browser_session.get_console_messages(), 119 | context=self._generate_llm_context(), 120 | available_actions=self._generate_llm_available_actions() 121 | ) 122 | 123 | def _get_system_prompt(self) -> str: 124 | return self.config['prompts']['system'] 125 | -------------------------------------------------------------------------------- /src/ai_e2e_tester/browser/html/html_optimizer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import re 3 | from typing import List, Dict 4 | 5 | from playwright.sync_api import Page, ElementHandle, FloatRect 6 | 7 | from ai_e2e_tester.browser.html.visibility.basic_check import BasicVisibilityCheck 8 | from ai_e2e_tester.browser.html.visibility.occlusion_check import OcclusionCheck 9 | from ai_e2e_tester.browser.html.visibility.viewport_check import ViewportIntersectionCheck 10 | from ai_e2e_tester.browser.html.visibility.visibility_check import VisibilityCheck 11 | 12 | logger = logging.getLogger('ai-e2e-tester.browser.html.optimizer') 13 | 14 | 15 | class HtmlOptimizer: 16 | """ 17 | Optimizes the HTML to be given to the LLM: 18 | - Only keeps visible elements (reduces the number of tokens and avoid the LLM clicking on unrechable elements) 19 | - Removes parts of HTML that are too large (large SVG, style and script tags) to reduce number of tokens. 20 | """ 21 | 22 | def __init__(self): 23 | self.html_cleanup_patterns = [ 24 | r'\s_ngcontent-[^=]+="[^"]*"', # Angular _ngcontent attributes 25 | r'\s_nghost-[^=]+="[^"]*"', # Angular _nghost attributes 26 | r'\sdata-reactroot(?:="[^"]*")?', # React root attribute 27 | ] 28 | 29 | # Trimming parts of HTML that can be very large. 30 | self.tags_to_trim = ['svg'] 31 | 32 | # Removing the parts of HTML the agent can not interact with. 33 | self.visibility_checks: List[VisibilityCheck] = [ 34 | BasicVisibilityCheck(), 35 | ViewportIntersectionCheck(), 36 | OcclusionCheck() 37 | ] 38 | 39 | def get_optimized_html(self, page: Page) -> str: 40 | initial_html = page.content() 41 | initial_size = len(initial_html) 42 | 43 | body = page.query_selector("body") 44 | viewport = page.viewport_size or {"width": float("inf"), "height": float("inf")} 45 | scroll_x = page.evaluate("() => window.scrollX") 46 | scroll_y = page.evaluate("() => window.scrollY") 47 | 48 | visible_html = self._visible_subtree(body, viewport, scroll_x, scroll_y) if body else "" 49 | 50 | for cleanup_pattern in self.html_cleanup_patterns: 51 | visible_html = re.sub(cleanup_pattern, '', visible_html) 52 | 53 | for tag_to_trim in self.tags_to_trim: 54 | visible_html = self.trim_large_tag(visible_html, tag_to_trim) 55 | 56 | final_size = len(visible_html) 57 | reduction = ((initial_size - final_size) / initial_size * 100) if initial_size > 0 else 0 58 | logger.debug(f"Optimized HTML size: {initial_size} -> {final_size} : Reduced by {reduction:.2f}%") 59 | 60 | return visible_html 61 | 62 | def _visible_subtree(self, el: ElementHandle, viewport: Dict, scroll_x: float, scroll_y: float) -> str: 63 | """ 64 | Recursively build HTML for visible elements in the current viewport. 65 | """ 66 | if not self._is_visible(el, viewport, scroll_x, scroll_y): 67 | return "" 68 | 69 | tag, attrs = self._get_tag_and_attrs(el) 70 | opening = f"<{tag}{' ' + attrs if attrs else ''}>" 71 | html_parts = [opening] 72 | 73 | # Include mixed text nodes 74 | text_nodes = self._get_text_nodes(el) 75 | if text_nodes.strip(): 76 | html_parts.append(text_nodes) 77 | 78 | # Recurse for children 79 | for child in el.query_selector_all(":scope > *"): 80 | html_parts.append(self._visible_subtree(child, viewport, scroll_x, scroll_y)) 81 | 82 | html_parts.append(f"") 83 | return "".join(html_parts) 84 | 85 | def _is_visible(self, el: ElementHandle, viewport: Dict, scroll_x: float, scroll_y: float) -> bool: 86 | """Run all registered visibility checks.""" 87 | box = el.bounding_box() 88 | 89 | if not box: 90 | return False 91 | 92 | if self._ignore_check(box): 93 | return True 94 | 95 | return all(check.is_visible(el, box, viewport, scroll_x, scroll_y) for check in self.visibility_checks) 96 | 97 | @classmethod 98 | def _ignore_check(cls, box: FloatRect) -> bool: 99 | """ 100 | Skip strict visibility check for Angular-style wrappers (e.g. height=0, width>0) 101 | :param box: 102 | :return: 103 | """ 104 | if (box["width"] == 0) != (box["height"] == 0): 105 | return True 106 | return False 107 | 108 | @classmethod 109 | def _get_tag_and_attrs(cls, el: ElementHandle) -> tuple[str, str]: 110 | tag = el.evaluate("el => el.tagName.toLowerCase()") 111 | attrs = el.evaluate(""" 112 | el => Array.from(el.attributes) 113 | .map(a => `${a.name}="${a.value}"`) 114 | .join(' ') 115 | """) 116 | return tag, attrs 117 | 118 | @classmethod 119 | def _get_text_nodes(cls, el: ElementHandle) -> str: 120 | return el.evaluate(""" 121 | el => Array.from(el.childNodes) 122 | .filter(n => n.nodeType === 3) 123 | .map(n => n.textContent) 124 | .join('') 125 | """) 126 | 127 | @classmethod 128 | def trim_large_tag(cls, html: str, tag: str, max_length: int = 500) -> str: 129 | """ 130 | Trims the inner content of specified tag if it exceeds max_length characters. 131 | Keeps the opening and closing tags intact, with a placeholder for trimmed content. 132 | """ 133 | pattern = fr"<{tag}[^>]*>(.*?)" 134 | 135 | def _trim(match): 136 | inner = match.group(1) 137 | if len(inner) > max_length: 138 | logger.info(f'Trimming long {tag} ({len(inner)} > {max_length})') 139 | return f"<{tag}>...trimmed {len(inner)} chars..." 140 | return match.group(0) 141 | 142 | return re.sub(pattern, _trim, html, flags=re.DOTALL | re.IGNORECASE) 143 | --------------------------------------------------------------------------------