├── src
    ├── __init__.py
    └── ai_e2e_tester
    │   ├── __init__.py
    │   ├── llm
    │       ├── __init__.py
    │       ├── ai_wrapper.py
    │       └── openai.py
    │   ├── browser
    │       ├── __init__.py
    │       ├── html
    │       │   ├── __init__.py
    │       │   ├── visibility
    │       │   │   ├── __init__.py
    │       │   │   ├── basic_check.py
    │       │   │   ├── viewport_check.py
    │       │   │   ├── visibility_check.py
    │       │   │   └── occlusion_check.py
    │       │   └── html_optimizer.py
    │       ├── guards
    │       │   ├── __init__.py
    │       │   └── domain_guard.py
    │       ├── actions
    │       │   ├── action_feedback.py
    │       │   ├── back_action.py
    │       │   ├── __init__.py
    │       │   ├── scroll_action.py
    │       │   ├── element_action.py
    │       │   ├── wait_action.py
    │       │   ├── hover_action.py
    │       │   ├── type_action.py
    │       │   ├── click_action.py
    │       │   ├── browser_action.py
    │       │   └── element_selector.py
    │       ├── visited_page.py
    │       ├── session.py
    │       └── next_step.py
    │   ├── reporting
    │       ├── __init__.py
    │       └── console.py
    │   ├── __main__.py
    │   ├── utils.py
    │   ├── cli.py
    │   └── agent.py
├── tests
    └── __init__.py
├── requirements
    └── base.txt
├── .gitignore
├── CHANGELOG.md
├── ROADMAP.md
├── LICENSE
├── config.yml
└── README.md


/src/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/ai_e2e_tester/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/ai_e2e_tester/llm/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/ai_e2e_tester/browser/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/ai_e2e_tester/browser/html/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/ai_e2e_tester/reporting/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/ai_e2e_tester/browser/guards/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/ai_e2e_tester/browser/html/visibility/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/ai_e2e_tester/__main__.py:
--------------------------------------------------------------------------------
1 | from ai_e2e_tester.cli import main
2 | 
3 | if __name__ == "__main__":
4 |     main()


--------------------------------------------------------------------------------
/src/ai_e2e_tester/llm/ai_wrapper.py:
--------------------------------------------------------------------------------
1 | from abc import ABC
2 | from typing import Dict
3 | 
4 | 
5 | class AiWrapper(ABC):
6 |     def run(self, system_prompt: str, user_prompt: str, screenshot_b64) -> Dict:
7 |         pass
8 | 


--------------------------------------------------------------------------------
/requirements/base.txt:
--------------------------------------------------------------------------------
1 | docopt # Parse command line parameters
2 | playwright # Run a browser and programmatically interact with it
3 | openai # API for Open AI LLM
4 | pyyaml # Read settings from yml file
5 | rich # Write console reports


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python
 2 | __pycache__/
 3 | *.pyc
 4 | *.pyo
 5 | *.pyd
 6 | 
 7 | # Virtual environments
 8 | env/
 9 | venv/
10 | .venv/
11 | 
12 | # PyCharm
13 | .idea/
14 | 
15 | # OS files
16 | .DS_Store
17 | 
18 | # Test & coverage
19 | .pytest_cache/
20 | *.cover
21 | .coverage
22 | 
23 | # App
24 | reports
25 | debug


--------------------------------------------------------------------------------
/src/ai_e2e_tester/browser/actions/action_feedback.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import Optional
 3 | 
 4 | 
 5 | @dataclass
 6 | class ActionFeedback:
 7 |     result: str
 8 |     is_success: bool = True
 9 |     state_change: Optional[str] = ''
10 | 
11 |     def __str__(self):
12 |         return self.result
13 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | ### 0.0.4
 2 | 
 3 | - Improved feedback reporting
 4 | 
 5 | ### 0.0.3
 6 | 
 7 | - Improved feedback quality and reduced duplicated feedback.
 8 | - Optimized HTML passed to the LLM to reduce token usage.
 9 | - Better location of elements on the page based on LLM description.
10 | 
11 | ### 0.0.2
12 | 
13 | - Description of available browser actions for the LLM is now automatically generated from the list of declared action classes.
14 | 
15 | ### 0.0.1
16 | First basic version.


--------------------------------------------------------------------------------
/src/ai_e2e_tester/utils.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import yaml
 4 | 
 5 | def load_config(path="config.yaml"):
 6 |     with open(path, "r", encoding="utf-8") as f:
 7 |         return yaml.safe_load(f)
 8 | 
 9 | def setup_logging():
10 |     logger = logging.getLogger('ai-e2e-tester')
11 |     logger.setLevel(logging.INFO)
12 |     handler = logging.StreamHandler()
13 |     handler.setFormatter(logging.Formatter('%(name)s - %(levelname)s - %(message)s'))
14 |     logger.addHandler(handler)
15 |     logger.setLevel(logging.INFO)


--------------------------------------------------------------------------------
/src/ai_e2e_tester/browser/html/visibility/basic_check.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | 
 3 | from playwright.sync_api import ElementHandle, FloatRect
 4 | 
 5 | from ai_e2e_tester.browser.html.visibility.visibility_check import VisibilityCheck
 6 | 
 7 | 
 8 | class BasicVisibilityCheck(VisibilityCheck):
 9 |     def is_visible(self, el: ElementHandle, box: FloatRect, viewport: Dict, scroll_x: float, scroll_y: float) -> bool:
10 |         try:
11 |             return el.is_visible()
12 |         except Exception:
13 |             return False
14 | 


--------------------------------------------------------------------------------
/src/ai_e2e_tester/browser/actions/back_action.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from ai_e2e_tester.browser.actions.action_feedback import ActionFeedback
 4 | from ai_e2e_tester.browser.actions.browser_action import BrowserAction
 5 | 
 6 | logger = logging.getLogger('ai-e2e-tester.browser.actions.back')
 7 | 
 8 | 
 9 | class BackAction(BrowserAction):
10 |     name = 'back'
11 |     description = 'Navigate back to the previous page.'
12 | 
13 |     def run(self, page) -> ActionFeedback:
14 |         page.go_back()
15 |         page.wait_for_load_state('load')
16 |         return ActionFeedback(f"Navigated back to previous page.")
17 | 


--------------------------------------------------------------------------------
/ROADMAP.md:
--------------------------------------------------------------------------------
 1 | # 🚀 Project Roadmap
 2 | 
 3 | This document outlines the planned features, improvements, and major ideas for upcoming releases.
 4 | 
 5 | 
 6 | ## 🗓️ Upcoming
 7 | 
 8 | ### Fix cyclical behaviors.
 9 | The agent sometimes keep repeating the same actions.
10 | 
11 | ### Simplify HTML given to the agent
12 | Reduce unnecessary content from the HTML to reduce the number of input tokens.
13 | 
14 | ### Improve reports
15 | 
16 | - Bug Detection Isn’t Incentivized
17 | - The agent should report a wider range if issues and suggestions.
18 | 
19 | ### Improve identification & selection of interactive elements
20 | 
21 | - Some elements have the same visible text, which prevents specific selection. Currently, first matching element is selected.
22 | 
23 | ### Set Explicit Objective and Goal Tracking
24 | 
25 | 


--------------------------------------------------------------------------------
/src/ai_e2e_tester/browser/actions/__init__.py:
--------------------------------------------------------------------------------
 1 | from ai_e2e_tester.browser.actions.back_action import BackAction
 2 | from ai_e2e_tester.browser.actions.click_action import ClickAction
 3 | from ai_e2e_tester.browser.actions.hover_action import HoverAction
 4 | from ai_e2e_tester.browser.actions.scroll_action import ScrollAction
 5 | from ai_e2e_tester.browser.actions.type_action import TypeAction
 6 | from ai_e2e_tester.browser.actions.wait_action import WaitAction
 7 | 
 8 | # List of all actions the LLM can do on the web page.
 9 | ACTION_CLASSES = [
10 |     ClickAction,
11 |     TypeAction,
12 |     ScrollAction,
13 |     BackAction,
14 |     WaitAction,
15 |     HoverAction
16 | ]
17 | 
18 | # Build the registry dict automatically from the class .name attribute
19 | ACTION_REGISTRY = {cls.name: cls for cls in ACTION_CLASSES}
20 | 


--------------------------------------------------------------------------------
/src/ai_e2e_tester/browser/actions/scroll_action.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from ai_e2e_tester.browser.actions.action_feedback import ActionFeedback
 4 | from ai_e2e_tester.browser.actions.browser_action import BrowserAction
 5 | 
 6 | logger = logging.getLogger('ai-e2e-tester.browser.actions.scroll')
 7 | 
 8 | 
 9 | class ScrollAction(BrowserAction):
10 |     """
11 |     Scrolls on the page.
12 |     @todo Add scroll_amount param for the llm to specify scroll amount and direction.
13 |     """
14 |     name = 'scroll'
15 |     description = 'Scroll down a page.'
16 | 
17 |     def __init__(self, scroll_amount=2000):
18 |         self.scroll_amount = scroll_amount
19 | 
20 |     def run(self, page) -> ActionFeedback:
21 |         page.mouse.wheel(0, self.scroll_amount)
22 |         return ActionFeedback(f"Scrolled down the page.")
23 | 


--------------------------------------------------------------------------------
/src/ai_e2e_tester/cli.py:
--------------------------------------------------------------------------------
 1 | """
 2 | AI E2E Website Tester
 3 | 
 4 | Usage:
 5 |   ai_e2e_tester --url=<url> [--config=<str>] [--max-steps=<int>]
 6 | 
 7 | Options:
 8 |   --url=<url>           Landing page of the website to test.
 9 |   --config=<str>        Path to alternative configuration file.
10 |   --max-steps=<int>     Maximum number of steps the AI can take while exploring the website [default: 5].
11 | """
12 | 
13 | from docopt import docopt
14 | 
15 | from ai_e2e_tester.agent import TestingAgent
16 | from ai_e2e_tester.utils import setup_logging
17 | 
18 | 
19 | def main():
20 |     setup_logging()
21 | 
22 |     args = docopt(__doc__)
23 |     url = args["--url"]
24 |     config_path = args.get("--config") or "config.yml"
25 |     max_steps = int(args['--max-steps'])
26 | 
27 |     agent = TestingAgent(url, config_path)
28 |     agent.run(max_steps=max_steps)
29 | 


--------------------------------------------------------------------------------
/src/ai_e2e_tester/browser/actions/element_action.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from playwright.sync_api import Page, ElementHandle
 4 | 
 5 | from ai_e2e_tester.browser.actions.browser_action import BrowserAction
 6 | from ai_e2e_tester.browser.actions.element_selector import ElementSelector
 7 | 
 8 | logger = logging.getLogger('ai-e2e-tester.browser.actions.element')
 9 | 
10 | 
11 | class BrowserElementAction(BrowserAction):
12 |     """
13 |     Defines an action on a specific element on the webpage (button, input, etc.)
14 |     """
15 | 
16 |     def __init__(self, target_text: str):
17 |         self.element_selector = ElementSelector()
18 |         self.target_text = target_text
19 | 
20 |     def get_element(self, page: Page) -> ElementHandle | None:
21 |         return self.element_selector.get_element(self.target_text, page)
22 | 
23 |     def __str__(self):
24 |         return f'Browser Action on target "{self.target_text}"'


--------------------------------------------------------------------------------
/src/ai_e2e_tester/browser/actions/wait_action.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import time
 3 | 
 4 | from ai_e2e_tester.browser.actions.action_feedback import ActionFeedback
 5 | from ai_e2e_tester.browser.actions.browser_action import BrowserAction
 6 | 
 7 | logger = logging.getLogger('ai-e2e-tester.browser.actions.wait')
 8 | 
 9 | 
10 | class WaitAction(BrowserAction):
11 |     name = 'wait'
12 |     description = 'Wait until the website has finished loading the data. Use this if you believe the website is not ready yet to be interacted with.'
13 |     input_fields = {
14 |         "wait_time_sec": "How long to wait in seconds."
15 |     }
16 | 
17 |     def __init__(self, wait_time_sec: str = "5"):
18 |         self.wait_time_ms = wait_time_sec
19 | 
20 |     def run(self, page) -> ActionFeedback:
21 |         logger.info(f"→ Waiting {self.wait_time_ms}sec for the page to be ready")
22 |         time.sleep(int(self.wait_time_ms))
23 |         return ActionFeedback(f"Waited for {self.wait_time_ms}sec.")
24 | 


--------------------------------------------------------------------------------
/src/ai_e2e_tester/browser/actions/hover_action.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from ai_e2e_tester.browser.actions.action_feedback import ActionFeedback
 4 | from ai_e2e_tester.browser.actions.element_action import BrowserElementAction
 5 | 
 6 | logger = logging.getLogger('ai-e2e-tester.browser.actions.hover')
 7 | 
 8 | 
 9 | class HoverAction(BrowserElementAction):
10 |     name = 'hover'
11 |     description = 'Move the mouse cursor over a button, link, or interactive element (for example, to reveal tooltips or menus).'
12 |     input_fields = {
13 |         "target_text": "Use the element's exact `id` if present; otherwise, use the exact visible text on the element you want to hover over."
14 |     }
15 | 
16 |     def run(self, page) -> ActionFeedback:
17 |         el = self.get_element(page)
18 |         if not el:
19 |             return ActionFeedback(f'Could not find element to hover: "{self.target_text}"', False)
20 | 
21 |         el.hover()
22 |         return ActionFeedback(f"Hovered over {self.target_text}")
23 | 


--------------------------------------------------------------------------------
/src/ai_e2e_tester/browser/html/visibility/viewport_check.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Dict
 3 | 
 4 | from playwright.sync_api import ElementHandle, FloatRect
 5 | 
 6 | from ai_e2e_tester.browser.html.visibility.visibility_check import VisibilityCheck
 7 | 
 8 | logger = logging.getLogger('ai-e2e-tester.browser.html.optimizer.viewport')
 9 | 
10 | 
11 | class ViewportIntersectionCheck(VisibilityCheck):
12 | 
13 |     def is_visible(self, el: ElementHandle, box: FloatRect, viewport: Dict, scroll_x: float, scroll_y: float) -> bool:
14 |         """Check if element's bounding box intersects with the viewport."""
15 | 
16 |         # Element box
17 |         x, y, w, h = box["x"], box["y"], box["width"], box["height"]
18 | 
19 |         # Viewport
20 |         vp_w, vp_h = viewport["width"], viewport["height"]
21 | 
22 |         # Return False if the element is completely outside the viewport in any direction
23 |         if (x + w) <= 0 or x >= vp_w or (y + h) <= 0 or y >= vp_h:
24 |             return False
25 | 
26 |         return True
27 | 


--------------------------------------------------------------------------------
/src/ai_e2e_tester/browser/html/visibility/visibility_check.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Dict
 3 | 
 4 | from playwright.sync_api import ElementHandle, FloatRect
 5 | 
 6 | 
 7 | class VisibilityCheck(ABC):
 8 |     """
 9 |     Base class for all visibility checks.
10 |     A check inspects (el, page, context) and returns (ok: bool, new_context: dict).
11 |     """
12 | 
13 |     @abstractmethod
14 |     def is_visible(self, el: ElementHandle, box: FloatRect, viewport: Dict, scroll_x: float, scroll_y: float) -> bool:
15 |         """
16 | 
17 |         :param el: (ElementHandle): The Playwright handle for the element to check.
18 |         :param box: (FloatRect): The element's bounding box, with 'x', 'y', 'width', 'height' as keys.
19 |         :param viewport:
20 |         :param scroll_x: (float): The current horizontal scroll offset of the page (window.scrollX)
21 |         :param scroll_y: (float): The current vertical scroll offset of the page (window.scrollY).
22 | 
23 |         :return: True if the element is visible and can be interacted with.
24 |         """
25 |         pass
26 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Alexis
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/src/ai_e2e_tester/browser/actions/type_action.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from ai_e2e_tester.browser.actions.action_feedback import ActionFeedback
 4 | from ai_e2e_tester.browser.actions.element_action import BrowserElementAction
 5 | 
 6 | logger = logging.getLogger('ai-e2e-tester.browser.actions.type')
 7 | 
 8 | 
 9 | class TypeAction(BrowserElementAction):
10 |     name = 'type'
11 |     description = 'Type some text into an input box.'
12 |     input_fields = {
13 |         "target_text": "Use the input's exact `id` value if it has one. If there is no `id`, use the exact visible label, placeholder, or aria-label for the input field.",
14 |         "value": "The text you want to type into the input box."
15 |     }
16 | 
17 |     def __init__(self, target_text: str, value: str = None):
18 |         super().__init__(target_text)
19 |         self.value = value
20 | 
21 |     def run(self, page) -> ActionFeedback:
22 |         el = self.get_element(page)
23 | 
24 |         if not el:
25 |             return ActionFeedback(f'Could not type. Could not find input for "{self.target_text}"', False)
26 | 
27 |         el.type(self.value)
28 |         return ActionFeedback(f'Typed "{self.value}" into field "{self.target_text}"')
29 | 


--------------------------------------------------------------------------------
/src/ai_e2e_tester/reporting/console.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Dict
 2 | 
 3 | from rich.console import Console
 4 | 
 5 | from ai_e2e_tester.browser.visited_page import VisitedPage
 6 | 
 7 | 
 8 | class ConsoleReporter:
 9 | 
10 |     def __init__(self):
11 |         self.console = Console()
12 | 
13 |     def print_feedback_item(self, item: Dict[str, str]):
14 |         self.console.print(f"[bold cyan]{item['category']}[/] [bold]{item['name']}[/]")
15 |         self.console.print(f"[italic]{item['where']}[/]")
16 |         self.console.print(f"[italic]{item['evidence']}[/]")
17 |         self.console.print(f"{item['details']}\n")
18 | 
19 |     def print_report(self, grouped_visits: Dict[str, List[VisitedPage]]):
20 |         """
21 |         Prints a summary report of all visited pages.
22 |         """
23 |         for page_url, visited_pages in grouped_visits.items():
24 |             self.console.print(f"\n[underline bold]Page:[/] {page_url}")
25 |             seen = set()
26 |             for visited_page in visited_pages:
27 |                 for item in visited_page.feedback:
28 |                     signature = item.get("details")
29 |                     if signature not in seen:
30 |                         seen.add(signature)
31 |                         self.print_feedback_item(item)
32 | 
33 |             self.console.print("---\n", style="dim")
34 | 


--------------------------------------------------------------------------------
/src/ai_e2e_tester/browser/guards/domain_guard.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from ai_e2e_tester.browser.session import BrowserSession
 4 | from ai_e2e_tester.browser.visited_page import VisitedPage
 5 | 
 6 | logger = logging.getLogger('ai-e2e-tester.browser.guards.domain')
 7 | 
 8 | 
 9 | class DomainGuardException(Exception):
10 |     pass
11 | 
12 | 
13 | def ensure_stay_on_domain(session: BrowserSession, page: VisitedPage):
14 |     """
15 |     If AI navigated out of the starting domain, we go back.
16 |     raises Exception if could not return back to original domain.
17 |     """
18 |     main_domain = session.get_start_domain()
19 |     curr_domain = session.get_current_domain()
20 |     if curr_domain != main_domain:
21 |         external_url = session.url
22 |         logger.info(f"External URL: {external_url}). Going back to website.")
23 |         try:
24 |             session.go_back()
25 |             curr_domain = session.get_current_domain()
26 |             if curr_domain != main_domain:
27 |                 raise DomainGuardException("Still not on main domain after going back.")
28 |             else:
29 |                 page.next_step.update_action_state_change(
30 |                     f"Visited page {external_url} on external domain. It is not part of testing plan. Returned back to previous page.")
31 |         except Exception as e:
32 |             raise DomainGuardException("Error going back in browser history:", e)
33 | 


--------------------------------------------------------------------------------
/src/ai_e2e_tester/browser/actions/click_action.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from playwright.sync_api import Page, ElementHandle
 4 | 
 5 | from ai_e2e_tester.browser.actions.action_feedback import ActionFeedback
 6 | from ai_e2e_tester.browser.actions.element_action import BrowserElementAction
 7 | 
 8 | logger = logging.getLogger('ai-e2e-tester.browser.actions.click')
 9 | 
10 | 
11 | class ClickAction(BrowserElementAction):
12 |     """
13 |     Click on buttons and links.
14 |     """
15 |     name = 'click'
16 |     description = 'Click on a button or clickable element.'
17 |     input_fields = {
18 |         "target_text": "Use the clickable element's exact `id` value if it has one. If there is no `id`, use exact visible text shown on the button, link, or element you want to interact with."
19 |     }
20 | 
21 |     def __init__(self, target_text: str):
22 |         super().__init__(target_text)
23 | 
24 |     def run(self, page: Page) -> ActionFeedback:
25 |         el = self.get_element(page)
26 | 
27 |         if not el:
28 |             return ActionFeedback(f'Could not click. Could not find element with text "{self.target_text}"', False)
29 | 
30 |         self._force_same_tab_open(page, el)
31 | 
32 |         el.click()
33 |         page.wait_for_load_state('load')
34 |         return ActionFeedback(f'Clicked on "{self.target_text}"')
35 | 
36 |     @classmethod
37 |     def _force_same_tab_open(cls, page: Page, el: ElementHandle):
38 |         page.evaluate("""
39 |             el => { if (el.tagName && el.tagName.toLowerCase() === 'a') el.removeAttribute('target'); }
40 |         """, el)
41 | 


--------------------------------------------------------------------------------
/src/ai_e2e_tester/browser/actions/browser_action.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from abc import ABC
 3 | from typing import Dict
 4 | 
 5 | from playwright.sync_api import Page
 6 | 
 7 | from ai_e2e_tester.browser.actions.action_feedback import ActionFeedback
 8 | 
 9 | logger = logging.getLogger('ai-e2e-tester.browser.actions')
10 | 
11 | 
12 | class BrowserAction(ABC):
13 |     # Name by which the LLM should call this action.
14 |     name: str = None
15 | 
16 |     # The description will explain to the LLM when it should use this action.
17 |     description: str = None
18 | 
19 |     # Describing to the LLM which input fields this action needs and how to format them.
20 |     input_fields: Dict[str, str] = {}
21 | 
22 |     def run(self, page: Page) -> ActionFeedback:
23 |         """
24 |         Runs an action in the browser.
25 |         :param page: Current page in the browser.
26 |         :return: Description of the result of this action, in natural language. Will be given to LLM as feedback.
27 |         """
28 |         pass
29 | 
30 |     @classmethod
31 |     def describe_for_llm(cls):
32 |         """
33 |         Returns a string describing the action and its 'params' fields for the LLM.
34 |             Example output:
35 | 
36 |             - "click": Click on a button or clickable element.
37 |               params:
38 |                 - "target_text": The element's id or visible text.
39 |         """
40 |         doc = f'- "{cls.name}": {cls.description}\n'
41 |         if cls.input_fields:
42 |             doc += "  params:\n"
43 |             for k, desc in cls.input_fields.items():
44 |                 doc += f'    - "{k}": {desc}\n'
45 |         return doc
46 | 
47 |     def __str__(self):
48 |         return 'Browser Action'
49 | 


--------------------------------------------------------------------------------
/src/ai_e2e_tester/browser/actions/element_selector.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from playwright.sync_api import ElementHandle, Page
 4 | 
 5 | logger = logging.getLogger('ai-e2e-tester.browser.selector')
 6 | 
 7 | 
 8 | class ElementSelector:
 9 |     """
10 |     Helper to return an element from the HTML based on the LLM description.
11 |     """
12 | 
13 |     def __init__(self):
14 |         self.selector_strategies = [
15 |             lambda t: f'#{t}',  # By id
16 |             lambda t: f'text="{t}"',  # By visible text
17 |             lambda t: f'[placeholder="{t}"]',  # By exact placeholder
18 |             lambda t: f'input[placeholder*="{t.split()[0]}"]',  # Fallback: partial match
19 |             lambda t: f'[data-title="{t}"]',  # Fallback: data-title attribute
20 |             lambda t: f'.{t}',  # Fallback: by class name
21 |         ]
22 | 
23 |     def get_element(self, target_text: str, page: Page) -> ElementHandle | None:
24 |         """
25 |         Attempts to locate the element to interact with, based on LLM suggestions.
26 |         Uses multiple selector strategies. Logs a warning if multiple elements are found.
27 |         Returns the first matching element, or None.
28 |         @todo The LLM needs to return a more specific element selector if ID is not available.
29 |         @todo If multiple matching elements, prioritize elements most likely to be interactive.
30 |         """
31 | 
32 |         for make_selector in self.selector_strategies:
33 |             selector = make_selector(target_text)
34 |             try:
35 |                 elements = page.query_selector_all(selector)
36 |                 if elements:
37 |                     if len(elements) > 1:
38 |                         logger.warning(f"Multiple elements found with selector '{selector}'; using the first one.")
39 |                     return elements[0]
40 |             except Exception as e:
41 |                 logger.debug(f"Selector '{selector}' failed: {e}")
42 | 
43 |         return None
44 | 


--------------------------------------------------------------------------------
/src/ai_e2e_tester/llm/openai.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import logging
 3 | import os
 4 | from typing import Dict
 5 | 
 6 | import openai
 7 | 
 8 | from ai_e2e_tester.llm.ai_wrapper import AiWrapper
 9 | 
10 | logger = logging.getLogger('ai-e2e-tester.llm')
11 | 
12 | 
13 | class OpenAiWrapper(AiWrapper):
14 |     """
15 |     A wrapper to call the LLM.
16 |     Currently only OpenAI is supported.
17 |     """
18 | 
19 |     def __init__(self, prompts: Dict[str, str], max_tokens=800):
20 |         self.max_tokens = max_tokens
21 |         self.prompts = prompts
22 | 
23 |         api_key = os.environ.get("OPENAI_API_KEY")
24 |         if not api_key:
25 |             raise Exception("No API key found. Please set the OPENAI_API_KEY environment variable.")
26 | 
27 |         self.client = openai.OpenAI(api_key=api_key)
28 | 
29 |     def run(self, system_prompt: str, user_prompt: str, screenshot_b64) -> Dict:
30 | 
31 |         response = self.client.chat.completions.create(
32 |             model="gpt-4o",
33 |             messages=[
34 |                 {"role": "system", "content": system_prompt},
35 |                 {
36 |                     "role": "user",
37 |                     "content": [
38 |                         {"type": "text", "text": user_prompt},
39 |                         {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{screenshot_b64}"}}
40 |                     ]
41 |                 }
42 |             ],
43 |             max_tokens=self.max_tokens,
44 |             temperature=0.2,
45 |         )
46 | 
47 |         txt = response.choices[0].message.content.strip()
48 | 
49 |         try:
50 |             result = json.loads(txt)
51 |         except Exception:
52 |             logger.warning(f"Failed to parse response as JSON: {txt}")
53 |             result = {
54 |                 "next_step": {"action": "done"},
55 |                 "reason": "Parsing error of JSON output from AI.",
56 |                 "bugs": [],
57 |                 "suggestions": []
58 |             }
59 |         return result
60 | 


--------------------------------------------------------------------------------
/src/ai_e2e_tester/browser/visited_page.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import List, Dict
 3 | 
 4 | from ai_e2e_tester.browser.next_step import NextStep
 5 | from ai_e2e_tester.browser.session import BrowserSession
 6 | 
 7 | 
 8 | @dataclass
 9 | class VisitedPage:
10 |     """
11 |     Stores information about a page the AI tester visited on the tested website.
12 |     It is used to write reports at the end.
13 |     """
14 |     page_url: str
15 |     summary: str  # Summary of current page
16 |     context: str  # Summary of previous visited pages, actions taken by the agent, and their result
17 |     expectations_vs_reality: str
18 |     feedback: List[Dict[str, str]]
19 |     next_step: NextStep
20 | 
21 |     def run_next_step(self, browser_session: BrowserSession):
22 |         self.next_step.run(page=browser_session.page)
23 | 
24 |     def has_next_step(self):
25 |         return self.next_step.browser_action is not None
26 | 
27 |     def get_llm_condensed_feedback(self):
28 |         return ', '.join([feedback['details'] for feedback in self.feedback])
29 | 
30 |     def get_llm_visit_summary(self) -> str:
31 |         """
32 |         Returns a combination of visit context (summary of previous actions) and current action.
33 |         :return:
34 |         """
35 |         return '\n'.join(item for item in [self.context, "LAST ACTION:", str(self.get_visit_summary())] if item)
36 | 
37 |     def get_visit_summary(self) -> Dict:
38 |         return {
39 |             "page_url": self.page_url,
40 |             "observation": self.get_llm_condensed_feedback(),
41 |             "expectations_vs_reality": self.expectations_vs_reality,
42 |             "action_taken": self.next_step.get_llm_step_summary()
43 |         }
44 | 
45 |     @classmethod
46 |     def from_json(cls, page, result: Dict) -> "VisitedPage":
47 |         return VisitedPage(
48 |             page_url=page.url,
49 |             summary=result.get("summary"),
50 |             context=result.get("context"),
51 |             expectations_vs_reality=result.get("expected_vs_actual"),
52 |             next_step=NextStep.from_json(result.get("next_step")),
53 |             feedback=result.get("feedback", []),
54 |         )
55 | 


--------------------------------------------------------------------------------
/config.yml:
--------------------------------------------------------------------------------
 1 | prompts:
 2 |   system: |
 3 |     You are an AI website tester acting like a real user.
 4 | 
 5 |     Rules:
 6 |     - Don’t repeat an action that failed or had no visible effect.
 7 |     - Don’t re-open external links or the same identical element twice.
 8 |     - Base claims only on what’s in the provided HTML, screenshot, and console logs.
 9 |     - Prefer facts over opinions. Be specific and actionable.
10 |     - If nothing useful remains, set next_step.action = "done".
11 | 
12 |     Output **ONLY** the JSON, and do **NOT** include markdown, code block markers, or explanations.
13 | 
14 |   user: |
15 |     Page URL: {page_url}
16 | 
17 |     Visible HTML:
18 |     """
19 |     {page_html}
20 |     """
21 | 
22 |     Screenshot: (image provided)
23 |     
24 |     Console logs:
25 |     """
26 |     {console_logs}
27 |     """
28 | 
29 |     A summary of what you have seen and done on this website before, for CONTEXT:
30 |     """
31 |     {context}
32 |     """
33 | 
34 |     Task:
35 |     1) Say whether the current page matches what you expected from the last action (brief).
36 |     2) Infer the current user goal (concise).
37 |     3) Propose ONE next action to progress that goal.
38 |     4) Give precise, evidence-based feedback. Do not speculate. Point to specific elements.
39 | 
40 |     YOUR OUTPUT:
41 | 
42 |     Output **ONLY** the JSON, and do **NOT** include markdown, code block markers, or explanations.
43 |     
44 |     The JSON has following fields:
45 |     
46 |     - "goal" : A concise statement of the user's likely intent on this page.
47 |     - "expected_vs_actual" : Short statement on whether the page matched expectations (and why/why not).
48 |     - "summary" : Briefly describe what is visible or prominent on the page.
49 |     - "context": In a paragraph summarize all your previous experience with this website, including observations, 
50 |                  actions you took and their results.
51 |     - "next_step" : Suggest ONE next action a user might take. This field has following subfields:
52 |       - "reason" : A short phrase (max 15 words) explaining why this action helps the user.
53 |       - "action" : It can only have one of following values: 
54 |     
55 |         {available_actions}
56 |     
57 |     - "feedback" : A list of feedback objects. Each item must include:
58 | 
59 |       - "name": string,
60 |       - "category": one of ["functional","technical","accessibility","ui_ux"],
61 |       - "where": string,         // selector/id/role/text near it
62 |       - "evidence": string,      // short HTML/snippet or console line
63 |       - "details": string        // Detailed explanation of the issues and suggestion how to fix it
64 | 


--------------------------------------------------------------------------------
/src/ai_e2e_tester/browser/html/visibility/occlusion_check.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Dict
 3 | 
 4 | from playwright.sync_api import ElementHandle
 5 | 
 6 | from ai_e2e_tester.browser.html.visibility.visibility_check import VisibilityCheck
 7 | 
 8 | logger = logging.getLogger('ai-e2e-tester.browser.html.optimizer.occlusion')
 9 | 
10 | 
11 | class OcclusionCheck(VisibilityCheck):
12 |     """
13 |     Checks if an element is visually unobstructed.
14 |     """
15 | 
16 |     def is_visible(self, el: ElementHandle, box: dict, viewport: Dict, scroll_x: float, scroll_y: float) -> bool:
17 |         center_x, center_y = self._get_visible_center(box, viewport, scroll_x, scroll_y)
18 | 
19 |         is_clickable = el.evaluate(
20 |             """
21 |             (el, center) => {
22 |                 const [x, y] = center;
23 |                 const clientX = x - window.scrollX;
24 |                 const clientY = y - window.scrollY;
25 |                 const top = document.elementFromPoint(clientX, clientY);
26 |                 // Accept exact match or child (contained) match
27 |                 return top === el || (top && el.contains(top));
28 |             }
29 |             """,
30 |             [center_x, center_y]
31 |         )
32 | 
33 |         # if not is_clickable:
34 |         #     logger.info(f'Element did not pass occlusion check: {el}')
35 | 
36 |         return is_clickable
37 | 
38 |     @classmethod
39 |     def _get_visible_center(cls, box, viewport: Dict, scroll_x: float, scroll_y: float):
40 |         """
41 |         Returns the center of the intersection between the element's bounding box and the viewport.
42 |         If there is no intersection (element fully offscreen), returns the geometric center of the box.
43 |         """
44 | 
45 |         # Bounding box in page coordinates
46 |         left = box["x"]
47 |         top = box["y"]
48 |         right = left + box["width"]
49 |         bottom = top + box["height"]
50 | 
51 |         # Viewport in page coordinates
52 |         vp_left = scroll_x
53 |         vp_top = scroll_y
54 |         vp_right = vp_left + viewport["width"]
55 |         vp_bottom = vp_top + viewport["height"]
56 | 
57 |         # Intersection rectangle
58 |         vis_left = max(left, vp_left)
59 |         vis_top = max(top, vp_top)
60 |         vis_right = min(right, vp_right)
61 |         vis_bottom = min(bottom, vp_bottom)
62 | 
63 |         # If no intersection, fallback to geometric center
64 |         if vis_right <= vis_left or vis_bottom <= vis_top:
65 |             return (left + right) / 2, (top + bottom) / 2
66 | 
67 |         # Center of intersection
68 |         vis_center_x = (vis_left + vis_right) / 2
69 |         vis_center_y = (vis_top + vis_bottom) / 2
70 |         return vis_center_x, vis_center_y
71 | 


--------------------------------------------------------------------------------
/src/ai_e2e_tester/browser/session.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import logging
 3 | from urllib.parse import urlparse
 4 | 
 5 | from playwright.sync_api import sync_playwright
 6 | 
 7 | from ai_e2e_tester.browser.html.html_optimizer import HtmlOptimizer
 8 | 
 9 | logger = logging.getLogger('ai-e2e-tester.browser')
10 | 
11 | 
12 | class BrowserSession:
13 |     """
14 |     Encapsulates a Playwright browser session for automated web testing.
15 | 
16 |     This class manages browser startup/shutdown, page navigation, page text/screenshot extraction,
17 |     and simple navigation actions, making it easier to interact with a browser in a reusable way.
18 |     """
19 | 
20 |     def __init__(self, start_url: str, headless=True):
21 |         self.playwright = sync_playwright().start()
22 |         self.browser = self.playwright.chromium.launch(headless=headless)
23 |         self.page = self.browser.new_page()
24 | 
25 |         self.console_messages = []
26 |         self._attach_console_logging()
27 | 
28 |         self.html_optimizer = HtmlOptimizer()
29 | 
30 |         self.start_url = start_url
31 |         if self.start_url:
32 |             self.goto_url(self.start_url)
33 | 
34 |     def goto_url(self, url):
35 |         self.page.goto(url)
36 |         self.page.wait_for_load_state('networkidle')
37 | 
38 |     def go_back(self):
39 |         self.page.go_back()
40 |         self.page.wait_for_load_state('networkidle')
41 | 
42 |     def get_page_text(self) -> str:
43 |         return self.page.evaluate("() => document.body.innerText")
44 | 
45 |     def get_page_html(self) -> str:
46 |         return self.page.content()
47 | 
48 |     def get_optimized_html(self) -> str:
49 |         return self.html_optimizer.get_optimized_html(self.page)
50 | 
51 |     def get_screenshot(self, path):
52 |         screenshot_bytes = self.page.screenshot(path=path)
53 |         return base64.b64encode(screenshot_bytes).decode('utf-8')
54 | 
55 |     def get_console_messages(self):
56 |         """
57 |         Return warnings/errors from console.
58 |         """
59 |         return list(self.console_messages)
60 | 
61 |     def clear_console_messages(self):
62 |         self.console_messages = []
63 | 
64 |     def _attach_console_logging(self):
65 |         def on_console_message(msg):
66 |             if msg.type in ("warning", "error"):
67 |                 self.console_messages.append({"type": msg.type, "text": msg.text})
68 | 
69 |         self.page.on("console", on_console_message)
70 | 
71 |     def close(self):
72 |         self.browser.close()
73 |         self.playwright.stop()
74 | 
75 |     @property
76 |     def url(self):
77 |         return self.page.url
78 | 
79 |     def get_current_domain(self):
80 |         return self.get_domain(self.url)
81 | 
82 |     def get_start_domain(self):
83 |         return self.get_domain(self.start_url)
84 | 
85 |     @classmethod
86 |     def get_domain(cls, url):
87 |         return urlparse(url).netloc.lower()
88 | 
89 |     def __enter__(self):
90 |         return self
91 | 
92 |     def __exit__(self, exc_type, exc_val, exc_tb):
93 |         self.close()
94 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 🤖 AI E2E Website Tester
 2 | 
 3 | Skip the test scripts. Just point the AI E2E Website Tester at your website, and watch it explore it.
 4 | 
 5 | The AI acts like a real human visitor, clicking, scrolling, and reading your website. As it goes, it automatically takes notes on bugs, UX issues, and proposes suggestions.
 6 | 
 7 | This tool is an early prototype.
 8 | 
 9 | Feedback and ideas are very welcome!
10 | 
11 | ---
12 | 
13 | ## Example run
14 | 
15 | ```console
16 | ai-e2e-tester.agent - INFO - Test started. Max steps: 6
17 | ai-e2e-tester.agent - INFO - [Step 1] On Page: https://pi-match.web.app/
18 | ai-e2e-tester.agent - INFO - Current goal: Find a suitable PI and lab based on location and interests.
19 | ai-e2e-tester.browser.next_step - INFO - Reasoning for Next Action: To initiate a search for PIs and labs.
20 | ai-e2e-tester.agent - INFO - Typed "Melbourne" into field "affiliation_input" → Page content updated.
21 | ai-e2e-tester.agent - INFO - [Step 2] On Page: https://pi-match.web.app/
22 | ai-e2e-tester.agent - INFO - Current goal: Find a suitable PI and lab based on location and interests.
23 | ai-e2e-tester.browser.next_step - INFO - Reasoning for Next Action: To attempt a search with interests included.
24 | ai-e2e-tester.agent - INFO - Typed "epigenetics" into field "What I like" → Page content updated.
25 | ai-e2e-tester.agent - INFO - [Step 3] On Page: https://pi-match.web.app/
26 | ai-e2e-tester.agent - INFO - Current goal: Find a suitable PI and lab based on location and interests.
27 | ai-e2e-tester.browser.next_step - INFO - Reasoning for Next Action: To initiate the search based on entered criteria.
28 | ai-e2e-tester.agent - INFO - Clicked on "Search" → Navigated to new URL.
29 | ai-e2e-tester.agent - INFO - [Step 4] On Page: https://pi-match.web.app/labs?location=Melbourne&keywords=epigenetics
30 | ai-e2e-tester.agent - INFO - Current goal: Find a suitable PI and lab in Melbourne related to epigenetics.
31 | ai-e2e-tester.browser.next_step - INFO - Reasoning for Next Action: The page is still loading; waiting may allow results to appear.
32 | ai-e2e-tester.browser.actions.wait - INFO - → Waiting 20sec for the page to be ready
33 | ai-e2e-tester.agent - INFO - Waited for 20sec. → Page content updated.
34 | ai-e2e-tester.agent - INFO - [Step 5] On Page: https://pi-match.web.app/labs?location=Melbourne&keywords=epigenetics
35 | ai-e2e-tester.agent - INFO - Current goal: Find labs related to epigenetics in Melbourne.
36 | ai-e2e-tester.browser.next_step - INFO - Reasoning for Next Action: Explore more details about a specific lab.
37 | 
38 | ```
39 | 
40 | 
41 | ---
42 | 
43 | ## 🛠️ Installation
44 | 
45 | Clone this repo and install dependencies:
46 | 
47 | ```bash
48 | git clone https://github.com/BitsOfAdventures/ai-e2e-tester.git
49 | cd ai-e2e-tester
50 | 
51 | pip install -r requirements/base.txt
52 | playwright install
53 | ````
54 | 
55 | 
56 | ## 🚀 Quick Start
57 | 
58 | Set your API key as an environment variable:
59 | 
60 | ```bash
61 | export OPENAI_API_KEY=sk-...       # On macOS/Linux
62 | ```
63 | 
64 | Then run the tool from the project root:
65 | 
66 | ```bash
67 | python -m src.ai_e2e_tester --url=https://example.com
68 | ```
69 | 
70 | 
71 | ## 💬 Feedback Wanted
72 | This is an early prototype. If you try it, please open an issue or suggest improvements, any feedback is appreciated!
73 | 
74 | ## Currently Supported Actions
75 | Here is what the agent can currently do on the tested website:
76 | 
77 | - Click on links, buttons, etc..
78 | - Type text into input fields
79 | - Navigate to the previous page
80 | - Scroll down
81 | - Wait while the page is loading data from the server


--------------------------------------------------------------------------------
/src/ai_e2e_tester/browser/next_step.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import logging
 3 | from typing import Dict
 4 | 
 5 | from ai_e2e_tester.browser.actions import ACTION_REGISTRY
 6 | from ai_e2e_tester.browser.actions.action_feedback import ActionFeedback
 7 | from ai_e2e_tester.browser.actions.browser_action import BrowserAction
 8 | 
 9 | logger = logging.getLogger('ai-e2e-tester.browser.next_step')
10 | 
11 | 
12 | class NextStep:
13 |     reason: str
14 |     browser_action: BrowserAction | None = None
15 |     action_feedback: ActionFeedback
16 | 
17 |     def __init__(self, data: Dict):
18 |         self.reason = data.get('reason')
19 |         self.browser_action = self._get_action(data)
20 | 
21 |     @classmethod
22 |     def get_state_snapshot(cls, page):
23 |         return {
24 |             "url": page.url,
25 |             "content": page.content(),
26 |             "screenshot": base64.b64encode(page.screenshot(full_page=False)).decode("utf-8")
27 |         }
28 | 
29 |     @classmethod
30 |     def compare_state(cls, before, after):
31 |         if after["url"] != before["url"]:
32 |             return "Navigated to new URL."
33 |         elif after["content"] != before["content"]:
34 |             return "Page content updated."
35 |         elif after["screenshot"] != before["screenshot"]:
36 |             return "Viewport content updated (e.g. scrolled or animated)."
37 |         else:
38 |             return "No visible change detected."
39 | 
40 |     def run(self, page):
41 |         logger.info(f"Reasoning for Next Action: {self.reason}")
42 |         try:
43 |             before = self.get_state_snapshot(page)
44 |             self.action_feedback = self.browser_action.run(page=page)
45 |             after = self.get_state_snapshot(page)
46 |             self.action_feedback.state_change = self.compare_state(before, after)
47 |         except Exception as e:
48 |             logger.error(f'Could not execute browser action {self.browser_action}: {e}')
49 |             self.action_feedback = ActionFeedback(
50 |                 is_success=False,
51 |                 result=f'Could not execute browser action {self.browser_action}'
52 |             )
53 | 
54 |     def get_feedback_summary(self) -> str:
55 |         return f"{self.action_feedback.result} → {self.action_feedback.state_change}"
56 | 
57 |     def update_action_state_change(self, state_change: str):
58 |         self.action_feedback.state_change = state_change
59 | 
60 |     def get_llm_step_summary(self) -> str:
61 |         if self.browser_action:
62 |             return f"{self.action_feedback.result} → {self.action_feedback.state_change}"
63 |         return "There was no more actions to do."
64 | 
65 |     @classmethod
66 |     def _get_action(cls, next_step: Dict) -> BrowserAction | None:
67 |         """
68 |         Instantiates the action object.
69 |         :param next_step:
70 |         :return:
71 |         """
72 |         action_type = next_step.get("action")
73 |         action_class = ACTION_REGISTRY.get(action_type)
74 | 
75 |         if action_type == 'done':
76 |             logger.info("The LLM has decided that there is nothing more to do.")
77 |             return None
78 | 
79 |         if not action_class:
80 |             logger.warning(f"Unknown action type:{action_type}")
81 |             return None
82 |         return action_class(**{k: v for k, v in next_step.get('params', {}).items()})
83 | 
84 |     @classmethod
85 |     def from_json(cls, data: Dict):
86 |         """
87 | 
88 |         :param data: Ex: {"action": "click", "params":{"target_text": "Get Started"}, "reason":"..."}
89 |         :return:
90 |         """
91 |         return NextStep(data=data)
92 | 


--------------------------------------------------------------------------------
/src/ai_e2e_tester/agent.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import logging
  3 | import os
  4 | import time
  5 | from collections import defaultdict
  6 | from typing import List, Dict
  7 | from urllib.parse import urlparse, urlunparse
  8 | 
  9 | from ai_e2e_tester.browser.actions import ACTION_REGISTRY
 10 | from ai_e2e_tester.browser.guards.domain_guard import ensure_stay_on_domain
 11 | from ai_e2e_tester.browser.session import BrowserSession
 12 | from ai_e2e_tester.browser.visited_page import VisitedPage
 13 | from ai_e2e_tester.llm.openai import OpenAiWrapper
 14 | from ai_e2e_tester.reporting.console import ConsoleReporter
 15 | from ai_e2e_tester.utils import load_config
 16 | 
 17 | logger = logging.getLogger('ai-e2e-tester.agent')
 18 | 
 19 | 
 20 | class TestingAgent:
 21 |     """
 22 |     The TestingAgent is an intermediary allowing the LLM to use the web browser.
 23 |     """
 24 | 
 25 |     def __init__(self, url, config_path):
 26 | 
 27 |         self.url = url
 28 |         self.config = load_config(config_path)
 29 |         self.llm = OpenAiWrapper(prompts=self.config['prompts'])
 30 | 
 31 |         self.reporter = ConsoleReporter()
 32 | 
 33 |         self.visited_pages: List[VisitedPage] = []
 34 | 
 35 |         self.wait_between_steps = 0.5
 36 | 
 37 |     def run(self, max_steps: int):
 38 |         logger.info(f"Test started. Max steps: {max_steps}")
 39 |         browser_session = BrowserSession(self.url, headless=True)
 40 | 
 41 |         for step_idx in range(max_steps):
 42 | 
 43 |             logger.info(f'[Step {step_idx + 1}] On Page: {browser_session.url}')
 44 | 
 45 |             user_prompt = self._get_user_prompt(browser_session)
 46 |             system_prompt = self._get_system_prompt()
 47 |             screenshot_b64 = browser_session.get_screenshot(path=f"reports/screenshot_{step_idx + 1}.png")
 48 | 
 49 |             self._save_report(f"prompt-{step_idx + 1}.txt", user_prompt)
 50 | 
 51 |             self.last_result = self.llm.run(system_prompt, user_prompt, screenshot_b64)
 52 | 
 53 |             self._save_report(f"result-{step_idx + 1}.json", self.last_result)
 54 |             logger.info(f'Current goal: {self.last_result.get("goal")}')
 55 | 
 56 |             visited_page = VisitedPage.from_json(browser_session.page, self.last_result)
 57 |             self.visited_pages.append(visited_page)
 58 | 
 59 |             if visited_page.has_next_step():
 60 |                 browser_session.clear_console_messages()
 61 |                 visited_page.run_next_step(browser_session)
 62 |                 ensure_stay_on_domain(browser_session, visited_page)
 63 |                 logger.info(visited_page.next_step.get_feedback_summary())
 64 |             else:
 65 |                 logger.info("The LLM has decided that there is nothing more to do.")
 66 |                 break
 67 | 
 68 |             time.sleep(self.wait_between_steps)
 69 | 
 70 |         browser_session.close()
 71 |         logger.info("Test finished.")
 72 |         grouped_visits = self._get_grouped_visits()
 73 |         self.reporter.print_report(grouped_visits)
 74 | 
 75 |     def _get_grouped_visits(self) -> Dict[str, List[VisitedPage]]:
 76 |         """
 77 |         Groups all visits from the same URL together.
 78 |         Ignores anchors.
 79 |         :return:
 80 |         """
 81 |         grouped_visits = defaultdict(list)
 82 |         for page in self.visited_pages:
 83 |             parts = urlparse(page.page_url)
 84 |             normalized = parts._replace(query='', fragment='')
 85 |             clean_url = urlunparse(normalized)
 86 |             grouped_visits[clean_url].append(page)
 87 |         return grouped_visits
 88 | 
 89 |     def _generate_llm_context(self) -> str:
 90 |         if self.visited_pages:
 91 |             return self.visited_pages[-1].get_llm_visit_summary()
 92 |         return 'No context yet, this is the first visit to this website.'
 93 | 
 94 |     @classmethod
 95 |     def _generate_llm_available_actions(cls) -> str:
 96 |         """
 97 |         Explains to the LLM which actions it can do on the webpage.
 98 |         :return:
 99 |         """
100 |         return "\n".join(
101 |             action_cls.describe_for_llm() for action_cls in ACTION_REGISTRY.values()
102 |         )
103 | 
104 |     @classmethod
105 |     def _save_report(cls, name: str, content: str | Dict, reports_folder='debug'):
106 |         os.makedirs(reports_folder, exist_ok=True)
107 | 
108 |         if isinstance(content, dict):
109 |             content = json.dumps(content, indent=2)
110 | 
111 |         with open(f"{reports_folder}/{name}", "w", encoding="utf-8") as f:
112 |             f.write(content)
113 | 
114 |     def _get_user_prompt(self, browser_session: BrowserSession) -> str:
115 |         return self.config['prompts']['user'].format(
116 |             page_url=browser_session.url,
117 |             page_html=browser_session.get_optimized_html(),
118 |             console_logs=browser_session.get_console_messages(),
119 |             context=self._generate_llm_context(),
120 |             available_actions=self._generate_llm_available_actions()
121 |         )
122 | 
123 |     def _get_system_prompt(self) -> str:
124 |         return self.config['prompts']['system']
125 | 


--------------------------------------------------------------------------------
/src/ai_e2e_tester/browser/html/html_optimizer.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import re
  3 | from typing import List, Dict
  4 | 
  5 | from playwright.sync_api import Page, ElementHandle, FloatRect
  6 | 
  7 | from ai_e2e_tester.browser.html.visibility.basic_check import BasicVisibilityCheck
  8 | from ai_e2e_tester.browser.html.visibility.occlusion_check import OcclusionCheck
  9 | from ai_e2e_tester.browser.html.visibility.viewport_check import ViewportIntersectionCheck
 10 | from ai_e2e_tester.browser.html.visibility.visibility_check import VisibilityCheck
 11 | 
 12 | logger = logging.getLogger('ai-e2e-tester.browser.html.optimizer')
 13 | 
 14 | 
 15 | class HtmlOptimizer:
 16 |     """
 17 |     Optimizes the HTML to be given to the LLM:
 18 |     - Only keeps visible elements (reduces the number of tokens and avoid the LLM clicking on unrechable elements)
 19 |     - Removes parts of HTML that are too large (large SVG, style and script tags) to reduce number of tokens.
 20 |     """
 21 | 
 22 |     def __init__(self):
 23 |         self.html_cleanup_patterns = [
 24 |             r'\s_ngcontent-[^=]+="[^"]*"',  # Angular _ngcontent attributes
 25 |             r'\s_nghost-[^=]+="[^"]*"',  # Angular _nghost attributes
 26 |             r'\sdata-reactroot(?:="[^"]*")?',  # React root attribute
 27 |         ]
 28 | 
 29 |         # Trimming parts of HTML that can be very large.
 30 |         self.tags_to_trim = ['svg']
 31 | 
 32 |         # Removing the parts of HTML the agent can not interact with.
 33 |         self.visibility_checks: List[VisibilityCheck] = [
 34 |             BasicVisibilityCheck(),
 35 |             ViewportIntersectionCheck(),
 36 |             OcclusionCheck()
 37 |         ]
 38 | 
 39 |     def get_optimized_html(self, page: Page) -> str:
 40 |         initial_html = page.content()
 41 |         initial_size = len(initial_html)
 42 | 
 43 |         body = page.query_selector("body")
 44 |         viewport = page.viewport_size or {"width": float("inf"), "height": float("inf")}
 45 |         scroll_x = page.evaluate("() => window.scrollX")
 46 |         scroll_y = page.evaluate("() => window.scrollY")
 47 | 
 48 |         visible_html = self._visible_subtree(body, viewport, scroll_x, scroll_y) if body else ""
 49 | 
 50 |         for cleanup_pattern in self.html_cleanup_patterns:
 51 |             visible_html = re.sub(cleanup_pattern, '', visible_html)
 52 | 
 53 |         for tag_to_trim in self.tags_to_trim:
 54 |             visible_html = self.trim_large_tag(visible_html, tag_to_trim)
 55 | 
 56 |         final_size = len(visible_html)
 57 |         reduction = ((initial_size - final_size) / initial_size * 100) if initial_size > 0 else 0
 58 |         logger.debug(f"Optimized HTML size: {initial_size} -> {final_size} : Reduced by {reduction:.2f}%")
 59 | 
 60 |         return visible_html
 61 | 
 62 |     def _visible_subtree(self, el: ElementHandle, viewport: Dict, scroll_x: float, scroll_y: float) -> str:
 63 |         """
 64 |         Recursively build HTML for visible elements in the current viewport.
 65 |         """
 66 |         if not self._is_visible(el, viewport, scroll_x, scroll_y):
 67 |             return ""
 68 | 
 69 |         tag, attrs = self._get_tag_and_attrs(el)
 70 |         opening = f"<{tag}{' ' + attrs if attrs else ''}>"
 71 |         html_parts = [opening]
 72 | 
 73 |         # Include mixed text nodes
 74 |         text_nodes = self._get_text_nodes(el)
 75 |         if text_nodes.strip():
 76 |             html_parts.append(text_nodes)
 77 | 
 78 |         # Recurse for children
 79 |         for child in el.query_selector_all(":scope > *"):
 80 |             html_parts.append(self._visible_subtree(child, viewport, scroll_x, scroll_y))
 81 | 
 82 |         html_parts.append(f"</{tag}>")
 83 |         return "".join(html_parts)
 84 | 
 85 |     def _is_visible(self, el: ElementHandle, viewport: Dict, scroll_x: float, scroll_y: float) -> bool:
 86 |         """Run all registered visibility checks."""
 87 |         box = el.bounding_box()
 88 | 
 89 |         if not box:
 90 |             return False
 91 | 
 92 |         if self._ignore_check(box):
 93 |             return True
 94 | 
 95 |         return all(check.is_visible(el, box, viewport, scroll_x, scroll_y) for check in self.visibility_checks)
 96 | 
 97 |     @classmethod
 98 |     def _ignore_check(cls, box: FloatRect) -> bool:
 99 |         """
100 |         Skip strict visibility check for Angular-style wrappers (e.g. height=0, width>0)
101 |         :param box:
102 |         :return:
103 |         """
104 |         if (box["width"] == 0) != (box["height"] == 0):
105 |             return True
106 |         return False
107 | 
108 |     @classmethod
109 |     def _get_tag_and_attrs(cls, el: ElementHandle) -> tuple[str, str]:
110 |         tag = el.evaluate("el => el.tagName.toLowerCase()")
111 |         attrs = el.evaluate("""
112 |         el => Array.from(el.attributes)
113 |           .map(a => `${a.name}="${a.value}"`)
114 |           .join(' ')
115 |         """)
116 |         return tag, attrs
117 | 
118 |     @classmethod
119 |     def _get_text_nodes(cls, el: ElementHandle) -> str:
120 |         return el.evaluate("""
121 |         el => Array.from(el.childNodes)
122 |           .filter(n => n.nodeType === 3)
123 |           .map(n => n.textContent)
124 |           .join('')
125 |         """)
126 | 
127 |     @classmethod
128 |     def trim_large_tag(cls, html: str, tag: str, max_length: int = 500) -> str:
129 |         """
130 |         Trims the inner content of specified tag if it exceeds max_length characters.
131 |         Keeps the opening and closing tags intact, with a placeholder for trimmed content.
132 |         """
133 |         pattern = fr"<{tag}[^>]*>(.*?)</{tag}>"
134 | 
135 |         def _trim(match):
136 |             inner = match.group(1)
137 |             if len(inner) > max_length:
138 |                 logger.info(f'Trimming long {tag} ({len(inner)} > {max_length})')
139 |                 return f"<{tag}>...trimmed {len(inner)} chars...</{tag}>"
140 |             return match.group(0)
141 | 
142 |         return re.sub(pattern, _trim, html, flags=re.DOTALL | re.IGNORECASE)
143 | 


--------------------------------------------------------------------------------