├── autowing ├── __init__.py ├── core │ ├── __init__.py │ ├── llm │ │ ├── __init__.py │ │ ├── client │ │ │ ├── __init__.py │ │ │ ├── qwen.py │ │ │ ├── deepseek.py │ │ │ ├── doubao.py │ │ │ └── openai.py │ │ ├── base.py │ │ └── factory.py │ ├── ai_context.py │ ├── cache │ │ └── cache_manager.py │ └── ai_fixture_base.py ├── utils │ └── transition.py ├── appium │ ├── actions.py │ └── fixture.py ├── selenium │ └── fixture.py └── playwright │ └── fixture.py ├── wechat.jpg ├── auto-wing.png ├── docs ├── image │ ├── working.png │ └── element_list.png └── how_to_work.md ├── examples ├── .env ├── test_selenium_iframes.py ├── test_playwright_iframes.py ├── test_selenium_pytest.py ├── test_selenium_unittest.py ├── test_playwright_pytest.py ├── test_appium_unittest.py ├── test_appium_pytest.py └── test_playwright_unittest.py ├── pyproject.toml ├── CHANGES.md ├── .gitignore ├── README.md └── LICENSE /autowing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autowing/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autowing/core/llm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autowing/core/llm/client/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /wechat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SeldomQA/auto-wing/HEAD/wechat.jpg -------------------------------------------------------------------------------- /auto-wing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SeldomQA/auto-wing/HEAD/auto-wing.png -------------------------------------------------------------------------------- /docs/image/working.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SeldomQA/auto-wing/HEAD/docs/image/working.png -------------------------------------------------------------------------------- /docs/image/element_list.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SeldomQA/auto-wing/HEAD/docs/image/element_list.png -------------------------------------------------------------------------------- /examples/.env: -------------------------------------------------------------------------------- 1 | AUTOWING_MODEL_PROVIDER=deepseek 2 | DEEPSEEK_API_KEY=sk-abdefghijklmnopqrstwvwxyz0123456789 -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["poetry-core>=1.0.0"] 3 | build-backend = "poetry.core.masonry.api" 4 | 5 | [project] 6 | name = "autowing" 7 | version = "0.6.1" 8 | description = "auto-wing is a tool that uses LLM to assist automated testing." 9 | readme = "README.md" 10 | authors = [ 11 | { name = "defnngj", email = "defnngj@gmail.com" } 12 | ] 13 | requires-python = ">=3.9" 14 | 15 | dependencies = [ 16 | "openai>=1.60.1,<2.0.0", 17 | "dashscope>=1.22.1,<2.0.0", 18 | "pytest-playwright>=0.6.2,<0.7.0", 19 | "appium-python-client>=5.1.0,<6.0.0", 20 | "python-dotenv>=1.0.1,<2.0.0", 21 | "loguru>=0.7.3,<0.8.0", 22 | ] 23 | 24 | [project.urls] 25 | repository = "https://github.com/SeldomQA/auto-wing" 26 | homepage = "https://github.com/SeldomQA/auto-wing" 27 | -------------------------------------------------------------------------------- /autowing/utils/transition.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | 4 | def selector_to_locator(selector: str) -> str: 5 | """ 6 | selector to playwright locator 7 | :param selector: 8 | :return: 9 | """ 10 | if '[text()=' in selector: 11 | return re.sub( 12 | r'\[text\(\)\s*=\s*(?P[\'"])(?P.*?)(?P=quote)\]', 13 | lambda m: f':has-text({m.group("quote")}{m.group("content")}{m.group("quote")})', 14 | selector 15 | ) 16 | 17 | return selector 18 | 19 | 20 | def selector_to_selenium(selector: str) -> str: 21 | """ 22 | selector to selenium 23 | :param selector: 24 | :return: 25 | """ 26 | if '[text()=' in selector: 27 | pattern = re.compile(r'\[text\(\)\s*=\s*(?P[\'"])(?P.*?)(?P=quote)\]') 28 | return pattern.sub(r'[contains(text(),\g\g\g)]', selector) 29 | 30 | return selector 31 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | ### 0.6.1 2 | 3 | * 支持AI操作文本链接。 4 | * appium升级`>5.1`。 5 | * 更新qwen默认模型,使用最新`qwen3`。 6 | 7 | ### 0.6.0 8 | 9 | * 增加默认缓存功能,减少不必要的LLM调用,增加速度。 10 | * 移动端支持iOS❕。 11 | * 更新qwen默认模型,使用最新`qwen2.5`。 12 | 13 | ### 0.5.1 14 | 15 | * 识别更多的页面元素。 16 | * CSS选择器优化提示词,用于识别包含`$`符号的ID属性。 17 | * `playwright`/`selenium` 分别支持表单操作。 18 | * 移除`prompt`中无效信息,节省`tokens`使用。 19 | * LLM客户端代码优化。 20 | 21 | ### 0.5.0 22 | 23 | * 功能:增加 `ai_function_case()`, 识别页面元素生成功能用例。 24 | * 功能:增加appium依赖,支持App端的AI操作。 25 | 26 | ### 0.4.0 27 | 28 | * 功能:增加 `doubao`支持。 29 | 30 | ### 0.3.0 31 | 32 | * 增加日志功能,调用相关API显示日志。 33 | * 优化fixture相关代码。 34 | * python版本要求`>=3.9`(最新selenium版本要求)。 35 | 36 | ### 0.2.2 37 | 38 | * 优化:`ai_query()`、`ai_assert()`识别速度和格式兼容性。 39 | 40 | ### 0.2.1 41 | 42 | * 优化:python版本要求改为`>=3.8`。 43 | 44 | ### 0.2.0 45 | 46 | * 功能:增加 `openai`支持。 47 | 48 | ### 0.1.0 49 | 50 | * 功能: 51 | * 支持LLM: `qwen`、`deepseek`。 52 | * 提供操作:`ai_action()`、`ai_query()`、`ai_assert()`。 53 | * 支持测试库: `playwright`、`selenium`等。 54 | -------------------------------------------------------------------------------- /examples/test_selenium_iframes.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import pytest 4 | from dotenv import load_dotenv 5 | from selenium import webdriver 6 | from selenium.webdriver.common.by import By 7 | 8 | from autowing.selenium.fixture import create_fixture 9 | 10 | 11 | @pytest.fixture(scope="session") 12 | def driver(): 13 | """ 14 | Create and configure Edge WebDriver instance. 15 | """ 16 | 17 | load_dotenv() 18 | 19 | driver = webdriver.Edge() 20 | 21 | yield driver 22 | 23 | driver.quit() 24 | 25 | 26 | @pytest.fixture 27 | def ai(driver): 28 | """ 29 | Create an AI-powered Selenium fixture. 30 | """ 31 | ai_fixture = create_fixture() 32 | return ai_fixture(driver) 33 | 34 | 35 | def test_iframes(ai, driver): 36 | driver.get("https://sahitest.com/demo/iframesTest.htm") 37 | 38 | iframe = driver.find_element(By.XPATH, "/html/body/iframe") 39 | driver.switch_to.frame(iframe) 40 | 41 | ai.ai_action('点击"Link Test"链接') 42 | 43 | time.sleep(2) 44 | 45 | ai.ai_query('检查页面是否包含"linkByContent"字符串') 46 | 47 | 48 | if __name__ == '__main__': 49 | pytest.main(["test_selenium_iframes.py", "-s"]) 50 | -------------------------------------------------------------------------------- /examples/test_playwright_iframes.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dotenv import load_dotenv 3 | from playwright.sync_api import Page, sync_playwright 4 | 5 | from autowing.playwright.fixture import create_fixture 6 | 7 | 8 | @pytest.fixture(scope="session") 9 | def page(): 10 | """playwright fixture""" 11 | 12 | load_dotenv() 13 | 14 | with sync_playwright() as p: 15 | browser = p.chromium.launch(headless=False) 16 | context = browser.new_context() 17 | page = context.new_page() 18 | 19 | yield page 20 | 21 | context.close() 22 | browser.close() 23 | 24 | 25 | @pytest.fixture 26 | def ai(page): 27 | """ai fixture""" 28 | ai_fixture = create_fixture() 29 | return ai_fixture(page) 30 | 31 | 32 | def test_baidu_search(page: Page, ai): 33 | page.goto("https://sahitest.com/demo/iframesTest.htm") 34 | 35 | iframe = page.frame_locator("body > iframe") 36 | 37 | ai.ai_action('点击"Link Test"链接', iframe) 38 | 39 | page.wait_for_timeout(2000) 40 | 41 | ai.ai_query('检查页面是否包含"linkByContent"字符串') 42 | 43 | 44 | if __name__ == '__main__': 45 | pytest.main(["test_playwright_iframes.py", "-s"]) 46 | -------------------------------------------------------------------------------- /autowing/core/ai_context.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional 2 | import json 3 | 4 | 5 | class AiContext: 6 | """ 7 | A class for managing AI context data. 8 | Provides storage and retrieval of context information used in AI operations. 9 | """ 10 | 11 | def __init__(self): 12 | """ 13 | Initialize an empty context storage. 14 | """ 15 | self._context: Dict[str, Any] = {} 16 | 17 | def set_context(self, key: str, value: Any) -> None: 18 | """ 19 | Store a value in the context. 20 | 21 | Args: 22 | key (str): The key under which to store the value 23 | value (Any): The value to store 24 | """ 25 | self._context[key] = value 26 | 27 | def get_context(self, key: str) -> Optional[Any]: 28 | """ 29 | Retrieve a value from the context. 30 | 31 | Args: 32 | key (str): The key of the value to retrieve 33 | 34 | Returns: 35 | Optional[Any]: The stored value, or None if the key doesn't exist 36 | """ 37 | return self._context.get(key) 38 | 39 | def to_json(self) -> str: 40 | """ 41 | Convert the context to a JSON string. 42 | 43 | Returns: 44 | str: JSON string representation of the context 45 | """ 46 | return json.dumps(self._context) 47 | -------------------------------------------------------------------------------- /autowing/appium/actions.py: -------------------------------------------------------------------------------- 1 | from time import sleep as sys_sleep 2 | 3 | from loguru import logger 4 | from selenium.webdriver.common.action_chains import ActionChains 5 | from selenium.webdriver.common.actions import interaction 6 | from selenium.webdriver.common.actions.action_builder import ActionBuilder 7 | from selenium.webdriver.common.actions.pointer_input import PointerInput 8 | 9 | 10 | class Action: 11 | """ 12 | Encapsulate basic actions: tap, etc 13 | """ 14 | 15 | def __init__(self, driver=None): 16 | self.driver = driver 17 | 18 | def tap(self, x: int, y: int, pause: float = 0.1, sleep: float = 1) -> None: 19 | """ 20 | Tap on the coordinates 21 | :param x: x coordinates 22 | :param y: y coordinates 23 | :param pause: pause time 24 | :param sleep: sleep time 25 | :return: 26 | """ 27 | logger.info(f"👆 top x={x},y={y}.") 28 | actions = ActionChains(self.driver) 29 | actions.w3c_actions = ActionBuilder(self.driver, mouse=PointerInput(interaction.POINTER_TOUCH, "touch")) 30 | actions.w3c_actions.pointer_action.move_to_location(x, y) 31 | actions.w3c_actions.pointer_action.pointer_down() 32 | actions.w3c_actions.pointer_action.pause(pause) 33 | actions.w3c_actions.pointer_action.release() 34 | actions.perform() 35 | sys_sleep(sleep) 36 | -------------------------------------------------------------------------------- /examples/test_selenium_pytest.py: -------------------------------------------------------------------------------- 1 | """ 2 | pytest example for Selenium with AI automation. 3 | """ 4 | import time 5 | import pytest 6 | from selenium import webdriver 7 | from autowing.selenium.fixture import create_fixture 8 | 9 | from dotenv import load_dotenv 10 | 11 | 12 | @pytest.fixture(scope="session") 13 | def driver(): 14 | """ 15 | Create and configure Edge WebDriver instance. 16 | """ 17 | # loading .env file 18 | load_dotenv() 19 | 20 | driver = webdriver.Edge() 21 | 22 | yield driver 23 | 24 | driver.quit() 25 | 26 | 27 | @pytest.fixture 28 | def ai(driver): 29 | """ 30 | Create an AI-powered Selenium fixture. 31 | """ 32 | ai_fixture = create_fixture() 33 | return ai_fixture(driver) 34 | 35 | 36 | def test_bing_search(ai, driver): 37 | """ 38 | Test Bing search functionality using AI-driven automation. 39 | 40 | This test demonstrates: 41 | 1. Navigating to Bing 42 | 2. Performing a search 43 | 3. Verifying search results 44 | """ 45 | # Navigate to Bing 46 | driver.get("https://cn.bing.com") 47 | 48 | ai.ai_action('搜索输入框输入"playwright"关键字,并回车') 49 | time.sleep(3) 50 | 51 | items = ai.ai_query('string[], 搜索结果列表中包含"playwright"相关的标题') 52 | assert len(items) > 1 53 | 54 | # 使用AI断言 55 | assert ai.ai_assert('检查搜索结果列表第一条标题是否包含"playwright"字符串') 56 | 57 | 58 | if __name__ == '__main__': 59 | pytest.main(["test_selenium_pytest.py", "-s"]) 60 | -------------------------------------------------------------------------------- /examples/test_selenium_unittest.py: -------------------------------------------------------------------------------- 1 | """ 2 | Unittest example for Selenium with AI automation. 3 | """ 4 | import unittest 5 | import time 6 | from selenium import webdriver 7 | from autowing.selenium.fixture import create_fixture 8 | from dotenv import load_dotenv 9 | 10 | 11 | class TestBingSearch(unittest.TestCase): 12 | 13 | @classmethod 14 | def setUpClass(cls): 15 | # load .env file 16 | load_dotenv() 17 | # Initialize Edge WebDriver 18 | cls.driver = webdriver.Edge() 19 | # Create AI fixture 20 | ai_fixture = create_fixture() 21 | cls.ai = ai_fixture(cls.driver) 22 | 23 | @classmethod 24 | def tearDownClass(cls): 25 | cls.driver.quit() 26 | 27 | def test_01_bing_search(self): 28 | """ 29 | Test Bing search functionality using AI-driven automation. 30 | 31 | This test demonstrates: 32 | 1. Navigating to Bing 33 | 2. Performing a search 34 | 3. Verifying search results 35 | """ 36 | self.driver.get("https://cn.bing.com") 37 | 38 | self.ai.ai_action('搜索输入框输入"playwright"关键字,并回车') 39 | time.sleep(3) 40 | 41 | items = self.ai.ai_query('string[], 搜索结果列表中包含"playwright"相关的标题') 42 | 43 | self.assertGreater(len(items), 1) 44 | 45 | self.assertTrue( 46 | self.ai.ai_assert('检查搜索结果列表第一条标题是否包含"playwright"字符串') 47 | ) 48 | 49 | 50 | if __name__ == '__main__': 51 | unittest.main() 52 | -------------------------------------------------------------------------------- /examples/test_playwright_pytest.py: -------------------------------------------------------------------------------- 1 | """ 2 | pytest example for Playwright with AI automation. 3 | """ 4 | import pytest 5 | from playwright.sync_api import Page, sync_playwright 6 | from autowing.playwright.fixture import create_fixture 7 | 8 | from dotenv import load_dotenv 9 | 10 | 11 | @pytest.fixture(scope="session") 12 | def page(): 13 | """ 14 | playwright fixture 15 | """ 16 | # loading .env file 17 | load_dotenv() 18 | with sync_playwright() as p: 19 | browser = p.chromium.launch(headless=False) 20 | context = browser.new_context() 21 | page = context.new_page() 22 | yield page 23 | context.close() 24 | browser.close() 25 | 26 | 27 | @pytest.fixture 28 | def ai(page): 29 | """ 30 | ai fixture 31 | """ 32 | ai_fixture = create_fixture() 33 | return ai_fixture(page) 34 | 35 | 36 | def test_bing_search(page: Page, ai): 37 | """ 38 | Test Bing search functionality using AI-driven automation. 39 | This test demonstrates: 40 | 1. Navigating to Bing 41 | 2. Performing a search 42 | 3. Verifying search results 43 | """ 44 | page.goto("https://cn.bing.com") 45 | 46 | ai.ai_action('搜索输入框输入"playwright"关键字,并回车') 47 | page.wait_for_timeout(3000) 48 | 49 | items = ai.ai_query('string[], 搜索结果列表中包含"playwright"相关的标题') 50 | 51 | assert len(items) > 1 52 | 53 | print("assert") 54 | assert ai.ai_assert('检查搜索结果列表第一条标题是否包含"playwright"字符串') 55 | 56 | 57 | if __name__ == '__main__': 58 | pytest.main(["test_playwright_pytest.py", "-s"]) 59 | -------------------------------------------------------------------------------- /examples/test_appium_unittest.py: -------------------------------------------------------------------------------- 1 | import time 2 | import unittest 3 | 4 | from appium import webdriver 5 | from appium.options.android import UiAutomator2Options 6 | from dotenv import load_dotenv 7 | 8 | from autowing.appium.fixture import create_fixture 9 | 10 | 11 | class TestBingApp(unittest.TestCase): 12 | """ 13 | Test Bing APP 14 | """ 15 | 16 | @classmethod 17 | def setUpClass(cls): 18 | load_dotenv() 19 | 20 | def setUp(self): 21 | capabilities = { 22 | 'deviceName': 'MDX0220413011925', 23 | 'automationName': 'UiAutomator2', 24 | 'platformName': 'Android', 25 | 'appPackage': 'com.microsoft.bing', 26 | 'appActivity': 'com.microsoft.sapphire.app.main.MainSapphireActivity', 27 | 'noReset': True, 28 | } 29 | options = UiAutomator2Options().load_capabilities(capabilities) 30 | self.driver = webdriver.Remote(command_executor="http://127.0.0.1:4723", options=options) 31 | 32 | ai_fixture = create_fixture() 33 | self.ai = ai_fixture(self.driver) 34 | 35 | def tearDown(self): 36 | self.driver.quit() 37 | 38 | def test_bing_search(self): 39 | """ 40 | test bing App search 41 | """ 42 | self.ai.ai_action('点击搜索框,然后输入"auto-wing"关键字,然后回车搜索') 43 | time.sleep(3) 44 | 45 | items = self.ai.ai_query('string[], 搜索结果列表中包含"auto-wing"相关的标题') 46 | assert len(items) > 1 47 | 48 | self.ai.ai_assert('检查搜索结果列表第一条标题是否包含"auto-wing"字符串') 49 | 50 | 51 | if __name__ == '__main__': 52 | unittest.main() 53 | -------------------------------------------------------------------------------- /examples/test_appium_pytest.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import pytest 4 | from appium import webdriver 5 | from appium.options.android import UiAutomator2Options 6 | from dotenv import load_dotenv 7 | 8 | from autowing.appium.fixture import create_fixture 9 | 10 | 11 | @pytest.fixture(scope="function") 12 | def driver(): 13 | """ 14 | Create and configure Edge WebDriver instance. 15 | """ 16 | # loading .env file 17 | load_dotenv() 18 | 19 | capabilities = { 20 | 'deviceName': 'MDX0220413011925', 21 | 'automationName': 'UiAutomator2', 22 | 'platformName': 'Android', 23 | 'appPackage': 'com.microsoft.bing', 24 | 'appActivity': 'com.microsoft.sapphire.app.main.MainSapphireActivity', 25 | 'noReset': True, 26 | } 27 | options = UiAutomator2Options().load_capabilities(capabilities) 28 | driver = webdriver.Remote(command_executor="http://127.0.0.1:4723", options=options) 29 | 30 | yield driver 31 | 32 | driver.quit() 33 | 34 | 35 | @pytest.fixture 36 | def ai(driver): 37 | """ 38 | Create an AI-powered Selenium fixture. 39 | """ 40 | ai_fixture = create_fixture() 41 | return ai_fixture(driver, "Android") 42 | 43 | 44 | def test_bing_search(ai, driver): 45 | """ 46 | test bing App search 47 | """ 48 | ai.ai_action('点击搜索框,然后输入"auto-wing"关键字,然后回车搜索') 49 | time.sleep(3) 50 | 51 | items = ai.ai_query('string[], 搜索结果列表中包含"auto-wing"相关的标题') 52 | assert len(items) > 1 53 | 54 | ai.ai_assert('检查搜索结果列表第一条标题是否包含"auto-wing"字符串') 55 | 56 | 57 | if __name__ == '__main__': 58 | pytest.main(["test_appium_pytest.py", "-s"]) 59 | -------------------------------------------------------------------------------- /examples/test_playwright_unittest.py: -------------------------------------------------------------------------------- 1 | """ 2 | Unittest example for Playwright with AI automation. 3 | """ 4 | import unittest 5 | from playwright.sync_api import sync_playwright 6 | from autowing.playwright.fixture import create_fixture 7 | from dotenv import load_dotenv 8 | 9 | 10 | class TestBingSearch(unittest.TestCase): 11 | 12 | @classmethod 13 | def setUpClass(cls): 14 | # loading .env file 15 | load_dotenv() 16 | # Initialize browser 17 | cls.playwright = sync_playwright().start() 18 | cls.browser = cls.playwright.chromium.launch(headless=False) 19 | cls.context = cls.browser.new_context() 20 | cls.page = cls.context.new_page() 21 | # Create AI fixture 22 | ai_fixture = create_fixture() 23 | cls.ai = ai_fixture(cls.page) 24 | 25 | @classmethod 26 | def tearDownClass(cls): 27 | cls.context.close() 28 | cls.browser.close() 29 | cls.playwright.stop() 30 | 31 | def test_01_bing_search(self): 32 | """ 33 | Test Bing search functionality using AI-driven automation. 34 | This test demonstrates: 35 | 1. Navigating to Bing 36 | 2. Performing a search 37 | 3. Verifying search results 38 | """ 39 | self.page.goto("https://cn.bing.com") 40 | 41 | self.ai.ai_action('搜索输入框输入"playwright"关键字,并回车') 42 | self.page.wait_for_timeout(3000) 43 | 44 | items = self.ai.ai_query('string[], 搜索结果列表中包含"playwright"相关的标题') 45 | 46 | self.assertGreater(len(items), 1) 47 | 48 | self.assertTrue( 49 | self.ai.ai_assert('检查搜索结果列表第一条标题是否包含"playwright"字符串') 50 | ) 51 | 52 | 53 | if __name__ == '__main__': 54 | unittest.main() 55 | -------------------------------------------------------------------------------- /autowing/core/llm/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Dict, Any, Optional 3 | 4 | 5 | class BaseLLMClient(ABC): 6 | """ 7 | Abstract base class for Language Model clients. 8 | Defines the interface that all LLM clients must implement. 9 | """ 10 | 11 | @abstractmethod 12 | def complete(self, prompt: str, context: Optional[Dict[str, Any]] = None) -> str: 13 | """ 14 | Generate a completion for the given prompt with optional context. 15 | 16 | Args: 17 | prompt (str): The input text to generate completion for 18 | context (Optional[Dict[str, Any]]): Additional context information for the completion 19 | 20 | Returns: 21 | str: The generated completion text 22 | 23 | Raises: 24 | NotImplementedError: If the subclass doesn't implement this method 25 | """ 26 | pass 27 | 28 | @abstractmethod 29 | def complete_with_vision(self, prompt: Dict[str, Any]) -> str: 30 | """ 31 | Generate a completion for vision-based tasks. 32 | 33 | Args: 34 | prompt (Dict[str, Any]): A dictionary containing the prompt and image data 35 | in the format required by the specific model 36 | 37 | Returns: 38 | str: The generated completion text 39 | 40 | Raises: 41 | NotImplementedError: If the subclass doesn't implement this method 42 | """ 43 | pass 44 | 45 | @classmethod 46 | def get_model_name(cls) -> str: 47 | """ 48 | Get the standardized name of the model. 49 | 50 | Returns: 51 | str: The model name in lowercase, with 'client' suffix removed 52 | """ 53 | return cls.__name__.lower().replace('client', '') 54 | -------------------------------------------------------------------------------- /autowing/core/llm/factory.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Type 3 | 4 | from loguru import logger 5 | 6 | from autowing.core.llm.base import BaseLLMClient 7 | from autowing.core.llm.client.deepseek import DeepSeekClient 8 | from autowing.core.llm.client.doubao import DoubaoClient 9 | from autowing.core.llm.client.openai import OpenAIClient 10 | from autowing.core.llm.client.qwen import QwenClient 11 | 12 | 13 | class LLMFactory: 14 | """ 15 | Factory class for creating Language Model clients. 16 | Provides centralized management of different LLM implementations. 17 | """ 18 | 19 | _models = { 20 | 'openai': OpenAIClient, 21 | 'qwen': QwenClient, 22 | 'deepseek': DeepSeekClient, 23 | 'doubao': DoubaoClient 24 | } 25 | 26 | @classmethod 27 | def create(cls) -> BaseLLMClient: 28 | """ 29 | Create an instance of the configured LLM client. 30 | 31 | Returns: 32 | BaseLLMClient: An instance of the specified LLM client 33 | 34 | Raises: 35 | ValueError: If the specified model provider is not supported 36 | """ 37 | model_name = os.getenv("AUTOWING_MODEL_PROVIDER", "deepseek").lower() 38 | if model_name not in cls._models: 39 | raise ValueError(f"Unsupported model provider: {model_name}") 40 | 41 | logger.info(f"🤖 AUTOWING_MODEL_PROVIDER={model_name}") 42 | 43 | model_class = cls._models[model_name] 44 | return model_class() 45 | 46 | @classmethod 47 | def register_model(cls, name: str, model_class: Type[BaseLLMClient]) -> None: 48 | """ 49 | Register a new LLM client implementation. 50 | 51 | Args: 52 | name (str): The name to register the model under 53 | model_class (Type[BaseLLMClient]): The class implementing the BaseLLMClient interface 54 | """ 55 | cls._models[name.lower()] = model_class 56 | -------------------------------------------------------------------------------- /autowing/core/cache/cache_manager.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import json 3 | import os 4 | from datetime import datetime, timedelta 5 | from typing import Any, Optional 6 | 7 | 8 | class CacheManager: 9 | """ 10 | Manages caching of AI responses to improve performance. 11 | """ 12 | 13 | def __init__(self, cache_dir: str = ".auto-wing/cache", ttl_days: int = 7): 14 | """ 15 | Initialize the cache manager. 16 | 17 | Args: 18 | cache_dir: Directory to store cache files 19 | ttl_days: Number of days to keep cache entries 20 | """ 21 | self.cache_dir = cache_dir 22 | self.ttl_days = ttl_days 23 | os.makedirs(cache_dir, exist_ok=True) 24 | 25 | def _generate_cache_key(self, prompt: str, context: dict) -> str: 26 | """Generate a unique cache key based on prompt and context.""" 27 | # Create a string combining prompt and relevant context 28 | cache_str = f"{prompt}:{json.dumps(context, sort_keys=True)}" 29 | return hashlib.md5(cache_str.encode()).hexdigest() 30 | 31 | def _get_cache_path(self, cache_key: str) -> str: 32 | """Get the file path for a cache entry.""" 33 | return os.path.join(self.cache_dir, f"{cache_key}.json") 34 | 35 | def get(self, prompt: str, context: dict) -> Optional[Any]: 36 | """ 37 | Get a cached response if available and not expired. 38 | """ 39 | cache_key = self._generate_cache_key(prompt, context) 40 | cache_path = self._get_cache_path(cache_key) 41 | 42 | if not os.path.exists(cache_path): 43 | return None 44 | 45 | try: 46 | with open(cache_path, 'r', encoding='utf-8') as f: 47 | cache_data = json.load(f) 48 | 49 | # Check if cache has expired 50 | cached_time = datetime.fromisoformat(cache_data['timestamp']) 51 | if datetime.now() - cached_time > timedelta(days=self.ttl_days): 52 | os.remove(cache_path) 53 | return None 54 | 55 | return cache_data['response'] 56 | except (json.JSONDecodeError, KeyError, ValueError): 57 | return None 58 | 59 | def set(self, prompt: str, context: dict, response: Any) -> None: 60 | """ 61 | Cache a response. 62 | """ 63 | cache_key = self._generate_cache_key(prompt, context) 64 | cache_path = self._get_cache_path(cache_key) 65 | 66 | cache_data = { 67 | 'timestamp': datetime.now().isoformat(), 68 | 'prompt': prompt, 69 | 'context': context, 70 | 'response': response 71 | } 72 | 73 | with open(cache_path, 'w', encoding='utf-8') as f: 74 | json.dump(cache_data, f, ensure_ascii=False, indent=2) 75 | 76 | def clear(self, days: Optional[int] = None) -> None: 77 | """ 78 | Clear expired cache entries. 79 | 80 | Args: 81 | days: Optional number of days, defaults to ttl_days 82 | """ 83 | if days is None: 84 | days = self.ttl_days 85 | 86 | for filename in os.listdir(self.cache_dir): 87 | if not filename.endswith('.json'): 88 | continue 89 | 90 | filepath = os.path.join(self.cache_dir, filename) 91 | try: 92 | with open(filepath, 'r', encoding='utf-8') as f: 93 | cache_data = json.load(f) 94 | 95 | cached_time = datetime.fromisoformat(cache_data['timestamp']) 96 | if datetime.now() - cached_time > timedelta(days=days): 97 | os.remove(filepath) 98 | except (json.JSONDecodeError, KeyError, ValueError): 99 | # Remove invalid cache files 100 | os.remove(filepath) 101 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # UV 98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | #uv.lock 102 | 103 | # poetry 104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 105 | # This is especially recommended for binary packages to ensure reproducibility, and is more 106 | # commonly ignored for libraries. 107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 108 | #poetry.lock 109 | 110 | # pdm 111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 112 | #pdm.lock 113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 114 | # in version control. 115 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 116 | .pdm.toml 117 | .pdm-python 118 | .pdm-build/ 119 | 120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 121 | __pypackages__/ 122 | 123 | # Celery stuff 124 | celerybeat-schedule 125 | celerybeat.pid 126 | 127 | # SageMath parsed files 128 | *.sage.py 129 | 130 | # Environments 131 | .env 132 | .venv 133 | env/ 134 | venv/ 135 | ENV/ 136 | env.bak/ 137 | venv.bak/ 138 | 139 | # Spyder project settings 140 | .spyderproject 141 | .spyproject 142 | 143 | # Rope project settings 144 | .ropeproject 145 | 146 | # mkdocs documentation 147 | /site 148 | 149 | # mypy 150 | .mypy_cache/ 151 | .dmypy.json 152 | dmypy.json 153 | 154 | # Pyre type checker 155 | .pyre/ 156 | 157 | # pytype static type analyzer 158 | .pytype/ 159 | 160 | # Cython debug symbols 161 | cython_debug/ 162 | 163 | # PyCharm 164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 166 | # and can be added to the global gitignore or merged into this file. For a more nuclear 167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 168 | #.idea/ 169 | 170 | # PyPI configuration file 171 | .pypirc 172 | .qodo 173 | .idea/ -------------------------------------------------------------------------------- /autowing/core/ai_fixture_base.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | from autowing.core.cache.cache_manager import CacheManager 4 | 5 | 6 | class AiFixtureBase: 7 | """ 8 | Base class for AI Fixtures. Contains common response processing logic 9 | shared between Playwright and Selenium fixtures. 10 | """ 11 | 12 | def __init__(self): 13 | """Initialize the base fixture with cache support.""" 14 | self.cache_manager = CacheManager() 15 | 16 | def _remove_empty_keys(self, dict_list: list) -> list: 17 | """ 18 | remove element keys, Reduce tokens use. 19 | :return: 20 | """ 21 | if not dict_list: 22 | return [] 23 | 24 | new_list = [] 25 | for d in dict_list: 26 | new_dict = {k: v for k, v in d.items() if v != '' and v is not None} 27 | new_list.append(new_dict) 28 | 29 | return new_list 30 | 31 | def _clean_response(self, response: str) -> str: 32 | """ 33 | Clean the response text by stripping markdown formatting. 34 | 35 | Args: 36 | response (str): Raw response from LLM 37 | 38 | Returns: 39 | str: Cleaned response text. 40 | """ 41 | response = response.strip() 42 | if '```' in response: 43 | # Prioritize handling ```json format 44 | if '```json' in response: 45 | response = response.split('```json')[1].split('```')[0].strip() 46 | else: 47 | response = response.split('```')[1].split('```')[0].strip() 48 | # If the cleaned response starts with "json" or "python", remove the first line description 49 | if response.startswith(('json', 'python')): 50 | parts = response.split('\n', 1) 51 | if len(parts) > 1: 52 | response = parts[1].strip() 53 | return response 54 | 55 | def _validate_result_format(self, result: Any, format_hint: str) -> Any: 56 | """ 57 | Validate and convert the result to match the requested format. 58 | 59 | Args: 60 | result: The parsed result from AI response. 61 | format_hint: The requested format (e.g., 'string[]'). 62 | 63 | Returns: 64 | The validated and possibly converted result. 65 | 66 | Raises: 67 | ValueError: If the result doesn't match the requested format. 68 | """ 69 | if not format_hint: 70 | return result 71 | 72 | if format_hint == 'string[]': 73 | if not isinstance(result, list): 74 | result = [str(result)] 75 | return [str(item) for item in result] 76 | 77 | if format_hint == 'number[]': 78 | if not isinstance(result, list): 79 | result = [result] 80 | try: 81 | return [float(item) for item in result] 82 | except (ValueError, TypeError): 83 | raise ValueError(f"Cannot convert results to numbers: {result}") 84 | 85 | if format_hint == 'object[]': 86 | if not isinstance(result, list): 87 | result = [result] 88 | if not all(isinstance(item, dict) for item in result): 89 | raise ValueError(f"Not all items are objects: {result}") 90 | return result 91 | 92 | return result 93 | 94 | def _get_cached_or_compute(self, prompt: str, context: dict, compute_func) -> Any: 95 | """ 96 | Get response from cache or compute it using the provided function. 97 | 98 | Args: 99 | prompt: The prompt to generate cache key 100 | context: The context to generate cache key 101 | compute_func: Function to compute response if not cached 102 | """ 103 | # Try to get from cache first 104 | cached_response = self.cache_manager.get(prompt, context) 105 | if cached_response is not None: 106 | return cached_response 107 | 108 | # Compute response if not cached 109 | response = compute_func() 110 | 111 | # Cache the computed response 112 | self.cache_manager.set(prompt, context, response) 113 | 114 | return response 115 | -------------------------------------------------------------------------------- /docs/how_to_work.md: -------------------------------------------------------------------------------- 1 | # auto-wing 实现原理 - 如何利用AI识别页面元素。 2 | 3 | ## auto-wing 介绍 4 | 5 | ... 6 | 7 | ## 工作原理 8 | 9 | * __流程图__ 10 | 11 | ![](./image/working.png) 12 | 13 | 14 | ## 实现过程 15 | 16 | ### 1. 抓取当前页面所有元素 17 | 18 | ```js 19 | const getVisibleElements = () => { 20 | const elements = []; 21 | const selectors = [ 22 | 'input', 'button', 'a', '[role="button"]', 23 | '[role="link"]', '[role="searchbox"]', 'textarea' 24 | ]; 25 | 26 | for (const selector of selectors) { 27 | document.querySelectorAll(selector).forEach(el => { 28 | if (el.offsetWidth > 0 && el.offsetHeight > 0) { 29 | elements.push({ 30 | tag: el.tagName.toLowerCase(), 31 | type: el.getAttribute('type'), 32 | placeholder: el.getAttribute('placeholder'), 33 | value: el.value, 34 | text: el.textContent?.trim(), 35 | aria: el.getAttribute('aria-label'), 36 | id: el.id, 37 | name: el.getAttribute('name'), 38 | class: el.className 39 | }); 40 | } 41 | }); 42 | } 43 | return elements; 44 | }; 45 | ``` 46 | 47 | 获取当前页面的元素信息: 48 | 49 | ![](./image/element_list.png) 50 | 51 | 52 | ### 2.根据描述分析元素定位和动作 53 | 54 | __2.1 大模型的调用__ 55 | 56 | * openai SDK(推荐) 57 | 58 | ```python 59 | import os 60 | from openai import OpenAI 61 | 62 | client = OpenAI( 63 | # 若没有配置环境变量,请用百炼API Key将下行替换为:api_key="sk-xxx", 64 | api_key=os.getenv("DASHSCOPE_API_KEY"), 65 | base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", 66 | ) 67 | completion = client.chat.completions.create( 68 | model="qwen-plus", # 此处以qwen-plus为例,可按需更换模型名称。模型列表:https://help.aliyun.com/zh/model-studio/getting-started/models 69 | messages=[ 70 | {'role': 'system', 'content': 'You are a helpful assistant.'}, 71 | {'role': 'user', 'content': '你是谁?'}], 72 | ) 73 | 74 | print(completion.model_dump_json()) 75 | ``` 76 | 77 | * Request调用 78 | 79 | ```python 80 | import requests 81 | import os 82 | 83 | # 设置 API 密钥 84 | DASHSCOPE_API_KEY = os.getenv("DASHSCOPE_API_KEY") # 从环境变量中获取 API 密钥 85 | 86 | # 请求的 URL 87 | url = "https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions" 88 | 89 | # 请求头 90 | headers = { 91 | "Authorization": f"Bearer {DASHSCOPE_API_KEY}", 92 | "Content-Type": "application/json" 93 | } 94 | 95 | # 请求体 96 | data = { 97 | "model": "qwen-plus", 98 | "messages": [ 99 | { 100 | "role": "system", 101 | "content": "You are a helpful assistant." 102 | }, 103 | { 104 | "role": "user", 105 | "content": "你是谁?" 106 | } 107 | ] 108 | } 109 | 110 | # 发送 POST 请求 111 | response = requests.post(url, headers=headers, json=data) 112 | 113 | # 打印响应结果 114 | print(response.status_code) 115 | print(response.json()) 116 | ``` 117 | 118 | __2.2 提示词__ 119 | 120 | ```python 121 | 122 | action_prompt= """You are a web automation assistant. Based on the following page context, provide instructions for the requested action. 123 | 124 | Current page context: 125 | URL: {context['url']} 126 | Title: {context['title']} 127 | 128 | Available elements: 129 | {json.dumps(context['elements'], indent=2)} 130 | 131 | User request: {prompt} 132 | 133 | Return ONLY a JSON object with the following structure, no other text: 134 | {{ 135 | "selector": "CSS selector or XPath to locate the element", 136 | "action": "fill", 137 | "value": "text to input", 138 | "key": "key to press if needed" 139 | }} 140 | 141 | Example response: 142 | {{ 143 | "selector": "#search-input", 144 | "action": "fill", 145 | "value": "search text", 146 | "key": "Enter" 147 | }} 148 | """ 149 | 150 | ``` 151 | 152 | 识别并返回的数据: 153 | 154 | ```json 155 | { 156 | "selector": "#sb_form_q", 157 | "action": "fill", 158 | "value": "playwright", 159 | "key": "Enter" 160 | } 161 | ``` 162 | 163 | ### 3. 根据返回,转化动作。 164 | 165 | 转化为自动化工具的动作执行: 166 | 167 | ```python 168 | ... 169 | element = self.page.locator(selector) 170 | if action == 'click': 171 | element.click() 172 | elif action == 'fill': 173 | element.fill(instruction.get('value', '')) 174 | if instruction.get('key'): 175 | element.press(instruction.get('key')) 176 | elif action == 'press': 177 | element.press(instruction.get('key', 'Enter')) 178 | else: 179 | raise ValueError(f"Unsupported action: {action}") 180 | ``` 181 | 182 | ## 代码设计 183 | 184 | 1. 如何支持更多的模型。 185 | 2. 如何支持多个测试库。 186 | 3. App测试有哪些不同。 187 | 4. 更多的基于AI的功能探索和尝试。 188 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # auto-wing 2 | 3 | ![](auto-wing.png) 4 | 5 | > auto-wing is a tool that uses LLM to assist automated testing, give your automated testing wings. 6 | 7 | auto-wing是一个利用LLM辅助自动化测试的工具, 为你的自动化测试插上翅膀。 8 | 9 | ### Features 10 | 11 | ⭐ 集成 `playwright`、`selenium`、`appium`,支持`Web UI`和`App UI`的`AI`操作。 12 | 13 | ⭐ 支持多模型:`openai`、`deepseek`、`qwen` 和 `doubao`。 14 | 15 | ⭐ 支持多种操作:`ai_action`、`ai_query`、`ai_assert`。 16 | 17 | ⭐ 默认支持缓存:首次执行AI任务会被缓存,后续执行相同的任务可以提升效率。 18 | 19 | ⭐ 无痛的集成到现有自动化项目(`pytest`、`unittest`)中。 20 | 21 | ## Install 22 | 23 | * 支持pip安装,`python >= 3.9`。 24 | 25 | ```shell 26 | pip install autowing 27 | ``` 28 | 29 | ## Setting Env 30 | 31 | __方法一__ 32 | 33 | 申请LLM需要的key,在项目的根目录下创建`.env`文件。推荐`qwen`和 `deepseek`,一是便宜,二是方便。 34 | 35 | * openai: https://platform.openai.com/ 36 | 37 | ```ini 38 | #.env 39 | AUTOWING_MODEL_PROVIDER = openai 40 | OPENAI_API_KEY = sk-proj-abdefghijklmnopqrstwvwxyz0123456789 41 | ``` 42 | 43 | * DeepSeek: https://platform.deepseek.com/ 44 | 45 | ```ini 46 | #.env 47 | AUTOWING_MODEL_PROVIDER = deepseek 48 | DEEPSEEK_API_KEY = sk-abdefghijklmnopqrstwvwxyz0123456789 49 | ``` 50 | 51 | * 阿里云百练(千问):https://bailian.console.aliyun.com/ 52 | 53 | ```ini 54 | #.env 55 | AUTOWING_MODEL_PROVIDER = qwen 56 | DASHSCOPE_API_KEY = sk-abdefghijklmnopqrstwvwxyz0123456789 57 | ``` 58 | 59 | * 火山方舟(豆包):https://console.volcengine.com/ 60 | 61 | ```ini 62 | #.env 63 | AUTOWING_MODEL_PROVIDER = doubao 64 | ARK_API_KEY = f61d2846-xxx-xxx-xxxx-xxxxxxxxxxxxx 65 | DOUBAO_MODEL_NAME = ep-20250207200649-xxx 66 | ``` 67 | 68 | __方法二__ 69 | 70 | > 如果不想使用python-dotenv配置环境变量,可以直接配置环境变量。 71 | 72 | ```shell 73 | export AUTOWING_MODEL_PROVIDER=deepseek 74 | export DEEPSEEK_API_KEY=sk-abdefghijklmnopqrstwvwxyz0123456789 75 | ``` 76 | 77 | > 其他LLM模型环境变量同样的方式配置。 78 | 79 | ## Examples 80 | 81 | 👉 [查看 examples](./examples) 82 | 83 | ```python 84 | import pytest 85 | from playwright.sync_api import Page, sync_playwright 86 | from autowing.playwright.fixture import create_fixture 87 | from dotenv import load_dotenv 88 | 89 | 90 | @pytest.fixture(scope="session") 91 | def page(): 92 | """playwright page fixture""" 93 | # load .env file config 94 | load_dotenv() 95 | with sync_playwright() as p: 96 | browser = p.chromium.launch(headless=False) 97 | context = browser.new_context() 98 | page = context.new_page() 99 | yield page 100 | context.close() 101 | browser.close() 102 | 103 | 104 | @pytest.fixture 105 | def ai(page): 106 | """ai fixture""" 107 | ai_fixture = create_fixture() 108 | return ai_fixture(page) 109 | 110 | 111 | def test_bing_search(page: Page, ai): 112 | # 访问必应 113 | page.goto("https://cn.bing.com") 114 | 115 | # 使用AI执行搜索 116 | ai.ai_action('搜索输入框输入"playwright"关键字,并回车') 117 | page.wait_for_timeout(3000) 118 | 119 | # 使用AI查询搜索结果 120 | items = ai.ai_query('string[], 搜索结果列表中包含"playwright"相关的标题') 121 | 122 | # 验证结果 123 | assert len(items) > 1 124 | 125 | # 使用AI断言 126 | assert ai.ai_assert('检查搜索结果列表第一条标题是否包含"playwright"字符串') 127 | ``` 128 | 129 | * 运行日志: 130 | 131 | ```shell 132 | > pytest test_playwright_pytest.py -s 133 | ================================================= test session starts ================================================= 134 | platform win32 -- Python 3.12.3, pytest-8.3.4, pluggy-1.5.0 135 | rootdir: D:\github\seldomQA\auto-wing 136 | configfile: pyproject.toml 137 | plugins: base-url-2.1.0, playwright-0.6.2 138 | collected 1 item 139 | 140 | test_playwright_pytest.py 2025-02-04 10:00:30.961 | INFO | autowing.playwright.fixture:ai_action:88 - 🪽 AI Action: 搜索输入框输入"playwright"关键字,并回车 141 | 2025-02-04 10:00:40.070 | INFO | autowing.playwright.fixture:ai_query:162 - 🪽 AI Query: string[], 搜索结果列表中包 含"playwright"相关的标题 142 | 2025-02-04 10:00:48.954 | DEBUG | autowing.playwright.fixture:ai_query:218 - 📄 Query: ['Playwright 官方文档 | Playwright', 'Playwright - 快速、可靠的端到端测试框架', 'Playwright 中文文档 | Playwright', 'Playwright 入门指南 | Playwright', 'Playwright 测试框架 | Playwright', 'Playwright 教程 | Playwright', 'Playwright 使用指南 | Playwright', 'Playwright 自动化测试工具 | Playwright', 'Playwright 安装与配置 | Playwright', 'Playwright 示例代码 | Playwright'] 143 | 2025-02-04 10:00:48.954 | INFO | autowing.playwright.fixture:ai_assert:267 - 🪽 AI Assert: 检查搜索结果列表第一条标 题是否包含"playwright"字符串 144 | . 145 | 146 | ================================================= 1 passed in 27.99s ================================================== 147 | ``` 148 | 149 | ## Prompting Tips 150 | 151 | __1.提供更详细的描述以及样例__ 152 | 153 | 提供详细描述和示例一直是非常有用的提示词技巧。 154 | 155 | 错误示例 ❌: `"搜'耳机'"` 156 | 157 | 正确示例 ✅: `"找到搜索框(搜索框的上方应该有区域切换按钮,如 '国内', '国际'),输入'耳机',敲回车"` 158 | 159 | 错误示例 ❌: `"断言:外卖服务正在正常运行"` 160 | 161 | 正确示例 ✅: `"断言:界面上有个“外卖服务”的板块,并且标识着“正常”"` 162 | 163 | __2.一个 Prompt (指令)只做一件事__ 164 | 165 | 尽管 auto-wing 有自动重规划能力,但仍应保持指令简洁。否则,LLM 的输出可能会变得混乱。指令的长度对 token 消耗的影响几乎可以忽略不计。 166 | 167 | 错误示例 ❌:`"点击登录按钮,然后点击注册按钮,在表单中输入'test@test.com'作为邮箱,'test'作为密码,然后点击注册按钮"` 168 | 169 | 正确示例 170 | ✅: `将任务分解为三个步骤:"点击登录按钮" "点击注册按钮" "在表单中输入'test@test.com'作为邮箱,'test'作为密码,然后点击注册按钮"` 171 | 172 | __3.从界面做推断,而不是 DOM 属性或者浏览器状态__ 173 | 174 | 所有传递给 LLM 的数据都是截图和元素坐标。DOM和浏览器 对 LLM 来说几乎是不可见的。因此,务必确保你想提取的信息都在截图中有所体现且能被 175 | LLM “看到”。 176 | 177 | 正确示例 ✅:`标题是蓝色的` 178 | 179 | 错误实例 ❌:`标题有个 test-id-size 属性` 180 | 181 | 错误实例 ❌:`浏览器有两个 tab 开着` 182 | 183 | 错误实例 ❌:`异步请求已经结束了` 184 | 185 | __4.中、英文提示词无影响__ 186 | 187 | 由于大多数 AI 模型可以理解多种语言,所以请随意用你喜欢的语言撰写提示指令。即使提示语言与页面语言不同,通常也是可行的。 188 | 189 | ### 交流 190 | 191 | > 欢迎添加微信,交流和反馈问题。 192 | 193 |
194 |

微信

195 |
196 | -------------------------------------------------------------------------------- /autowing/core/llm/client/qwen.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from typing import Optional, Dict, Any, List 4 | 5 | from autowing.core.llm.base import BaseLLMClient 6 | from openai import OpenAI 7 | 8 | 9 | class QwenClient(BaseLLMClient): 10 | """ 11 | Qwen (DashScope) API client implementation. 12 | Provides access to Alibaba Cloud's Qwen language model through OpenAI-compatible interface. 13 | """ 14 | 15 | def __init__(self, api_key: Optional[str] = None): 16 | """ 17 | Initialize the Qwen client. 18 | 19 | Args: 20 | api_key (Optional[str]): DashScope API key. If not provided, will try to get from DASHSCOPE_API_KEY env var 21 | 22 | Raises: 23 | ValueError: If no API key is provided or found in environment variables 24 | """ 25 | self.api_key = api_key or os.getenv("DASHSCOPE_API_KEY") 26 | if not self.api_key: 27 | raise ValueError("Please set the env variable `DASHSCOPE_API_KEY`") 28 | 29 | self.base_url = os.getenv("OPENAI_BASE_URL", "https://dashscope.aliyuncs.com/compatible-mode/v1") 30 | self.model_name = os.getenv("MIDSCENE_MODEL_NAME", "qwen3-max") 31 | 32 | self.client = OpenAI( 33 | api_key=self.api_key, 34 | base_url=self.base_url 35 | ) 36 | 37 | def _truncate_text(self, text: str, max_length: int = 30000) -> str: 38 | """ 39 | Truncate text to fit within model's length limits. 40 | 41 | Args: 42 | text (str): The input text to truncate 43 | max_length (int): Maximum allowed length for the text. Defaults to 30000 44 | 45 | Returns: 46 | str: Truncated text with ellipsis if needed 47 | """ 48 | if len(text) > max_length: 49 | return text[:max_length] + "..." 50 | return text 51 | 52 | def _format_messages(self, prompt: str, context: Optional[Dict[str, Any]] = None) -> List[Dict[str, str]]: 53 | """ 54 | Format messages for the Qwen API. 55 | 56 | Args: 57 | prompt (str): The main prompt text 58 | context (Optional[Dict[str, Any]]): Additional context information 59 | 60 | Returns: 61 | List[Dict[str, str]]: Formatted messages list ready for API submission 62 | """ 63 | # Add system message 64 | messages = [{ 65 | "role": "system", 66 | "content": ( 67 | "You are a web automation assistant. " 68 | "Analyze the page structure and provide precise element locators. " 69 | "Return responses in the requested format." 70 | ) 71 | }] 72 | 73 | # Add context (if any) 74 | if context: 75 | context_str = json.dumps(context, ensure_ascii=False) 76 | messages.append({ 77 | "role": "user", 78 | "content": f"Page context: {self._truncate_text(context_str)}" 79 | }) 80 | 81 | # Add main prompt 82 | messages.append({ 83 | "role": "user", 84 | "content": self._truncate_text(prompt) 85 | }) 86 | 87 | return messages 88 | 89 | def complete(self, prompt: str, context: Optional[Dict[str, Any]] = None) -> str: 90 | """ 91 | Generate a completion using Qwen model. 92 | 93 | Args: 94 | prompt (str): The text prompt to complete 95 | context (Optional[Dict[str, Any]]): Additional context for the completion 96 | 97 | Returns: 98 | str: The model's response text 99 | 100 | Raises: 101 | Exception: If there's an error communicating with the Qwen API 102 | """ 103 | try: 104 | messages = self._format_messages(prompt, context) 105 | 106 | response = self.client.chat.completions.create( 107 | model=self.model_name, 108 | messages=messages, 109 | temperature=0.7, 110 | max_tokens=2000 111 | ) 112 | 113 | return response.choices[0].message.content 114 | except Exception as e: 115 | raise Exception(f"Qwen API error: {str(e)}") 116 | 117 | def complete_with_vision(self, prompt: Dict[str, Any]) -> str: 118 | """ 119 | Generate a completion for vision tasks using Qwen-VL model. 120 | 121 | Args: 122 | prompt (Dict[str, Any]): A dictionary containing messages and image data 123 | in the format required by the Qwen-VL API 124 | 125 | Returns: 126 | str: The model's response text 127 | 128 | Raises: 129 | Exception: If there's an error communicating with the Qwen API 130 | """ 131 | try: 132 | # Make sure the message length is within the limit 133 | messages = prompt["messages"] 134 | for msg in messages: 135 | if isinstance(msg.get("content"), str): 136 | msg["content"] = self._truncate_text(msg["content"]) 137 | elif isinstance(msg.get("content"), list): 138 | for item in msg["content"]: 139 | if isinstance(item.get("text"), str): 140 | item["text"] = self._truncate_text(item["text"]) 141 | 142 | response = self.client.chat.completions.create( 143 | model=self.model_name, 144 | messages=messages, 145 | temperature=0.7, 146 | max_tokens=2000 147 | ) 148 | 149 | return response.choices[0].message.content 150 | except Exception as e: 151 | raise Exception(f"Qwen API error: {str(e)}") 152 | -------------------------------------------------------------------------------- /autowing/core/llm/client/deepseek.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from typing import Optional, Dict, Any, List 4 | 5 | from openai import OpenAI 6 | 7 | from autowing.core.llm.base import BaseLLMClient 8 | 9 | 10 | class DeepSeekClient(BaseLLMClient): 11 | """ 12 | A client for interacting with the DeepSeek AI model. 13 | Implements the BaseLLMClient interface for text completion and vision tasks. 14 | """ 15 | 16 | def __init__(self, api_key: Optional[str] = None): 17 | """ 18 | Initialize the DeepSeek client. 19 | 20 | Args: 21 | api_key (Optional[str]): The API key for DeepSeek. If not provided, 22 | will try to get from DEEPSEEK_API_KEY environment variable. 23 | 24 | Raises: 25 | ValueError: If no API key is provided or found in environment variables. 26 | """ 27 | self.api_key = api_key or os.getenv("DEEPSEEK_API_KEY") 28 | if not self.api_key: 29 | raise ValueError("DeepSeek API key is required") 30 | 31 | self.base_url = os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com") 32 | self.model_name = os.getenv("DEEPSEEK_MODEL_NAME", "deepseek-chat") 33 | 34 | self.client = OpenAI( 35 | api_key=self.api_key, 36 | base_url=self.base_url 37 | ) 38 | 39 | def _truncate_text(self, text: str, max_length: int = 30000) -> str: 40 | """ 41 | Truncate text to fit within model's length limits. 42 | 43 | Args: 44 | text (str): The input text to truncate 45 | max_length (int): Maximum allowed length for the text. Defaults to 30000. 46 | 47 | Returns: 48 | str: Truncated text with ellipsis if needed 49 | """ 50 | if len(text) > max_length: 51 | return text[:max_length] + "..." 52 | return text 53 | 54 | def _format_messages(self, prompt: str, context: Optional[Dict[str, Any]] = None) -> List[Dict[str, str]]: 55 | """ 56 | Format messages for the DeepSeek API. 57 | 58 | Args: 59 | prompt (str): The main prompt text 60 | context (Optional[Dict[str, Any]]): Additional context information 61 | 62 | Returns: 63 | List[Dict[str, str]]: Formatted messages list ready for API submission 64 | """ 65 | # Add system message 66 | messages = [{ 67 | "role": "system", 68 | "content": ( 69 | "You are a web automation assistant. " 70 | "Analyze the page structure and provide precise element locators. " 71 | "Return responses in the requested format." 72 | ) 73 | }] 74 | 75 | # Add context (if any) 76 | if context: 77 | context_str = json.dumps(context, ensure_ascii=False) 78 | messages.append({ 79 | "role": "user", 80 | "content": f"Page context: {self._truncate_text(context_str)}" 81 | }) 82 | 83 | # Add main prompt 84 | messages.append({ 85 | "role": "user", 86 | "content": self._truncate_text(prompt) 87 | }) 88 | 89 | return messages 90 | 91 | def complete(self, prompt: str, context: Optional[Dict[str, Any]] = None) -> str: 92 | """ 93 | Send a completion request to the DeepSeek API. 94 | 95 | Args: 96 | prompt (str): The text prompt to complete 97 | context (Optional[Dict[str, Any]]): Additional context for the completion 98 | 99 | Returns: 100 | str: The model's response text 101 | 102 | Raises: 103 | Exception: If there's an error communicating with the DeepSeek API 104 | """ 105 | try: 106 | messages = self._format_messages(prompt, context) 107 | 108 | response = self.client.chat.completions.create( 109 | model=self.model_name, 110 | messages=messages, 111 | temperature=0.7, 112 | max_tokens=2000 113 | ) 114 | 115 | return response.choices[0].message.content 116 | except Exception as e: 117 | raise Exception(f"DeepSeek API error: {str(e)}") 118 | 119 | def complete_with_vision(self, prompt: Dict[str, Any]) -> str: 120 | """ 121 | Send a vision-based completion request to the DeepSeek API. 122 | 123 | Args: 124 | prompt (Dict[str, Any]): A dictionary containing messages and image data 125 | in the format expected by the API 126 | 127 | Returns: 128 | str: The model's response text 129 | 130 | Raises: 131 | Exception: If there's an error communicating with the DeepSeek API 132 | """ 133 | try: 134 | # Make sure the message length is within the limit 135 | messages = prompt["messages"] 136 | for msg in messages: 137 | if isinstance(msg.get("content"), str): 138 | msg["content"] = self._truncate_text(msg["content"]) 139 | elif isinstance(msg.get("content"), list): 140 | for item in msg["content"]: 141 | if isinstance(item.get("text"), str): 142 | item["text"] = self._truncate_text(item["text"]) 143 | 144 | response = self.client.chat.completions.create( 145 | model=self.model_name, 146 | messages=messages, 147 | temperature=0.7, 148 | max_tokens=2000 149 | ) 150 | 151 | return response.choices[0].message.content 152 | except Exception as e: 153 | raise Exception(f"DeepSeek API error: {str(e)}") 154 | -------------------------------------------------------------------------------- /autowing/core/llm/client/doubao.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from typing import Optional, Dict, Any, List 4 | 5 | from openai import OpenAI 6 | 7 | from autowing.core.llm.base import BaseLLMClient 8 | 9 | 10 | class DoubaoClient(BaseLLMClient): 11 | """ 12 | Doubao API client implementation. 13 | Provides access to Doubao's LLM models. 14 | """ 15 | 16 | def __init__(self, api_key: Optional[str] = None, base_url: Optional[str] = None): 17 | """ 18 | Initialize the Doubao client. 19 | 20 | Args: 21 | api_key (Optional[str]): Doubao API key. If not provided, will try to get from ARK_API_KEY env var 22 | base_url (Optional[str]): Custom base URL for API requests 23 | 24 | Raises: 25 | ValueError: If no API key is provided or found in environment variables 26 | """ 27 | self.api_key = api_key or os.getenv("ARK_API_KEY") 28 | if not self.api_key: 29 | raise ValueError("Doubao API key is required") 30 | 31 | self.base_url = base_url or os.getenv("DOUBAO_BASE_URL", "https://ark.cn-beijing.volces.com/api/v3") 32 | self.model_name = os.getenv("DOUBAO_MODEL_NAME") 33 | if not self.model_name: 34 | raise ValueError("Doubao model name is null, For example: ep-20250207200649-xxx") 35 | 36 | self.client = OpenAI(api_key=self.api_key, base_url=self.base_url) 37 | 38 | def _truncate_text(self, text: str, max_length: int = 30000) -> str: 39 | """ 40 | Truncate text to fit within model's length limits. 41 | 42 | Args: 43 | text (str): The input text to truncate 44 | max_length (int): Maximum allowed length for the text. Defaults to 30000 45 | 46 | Returns: 47 | str: Truncated text with ellipsis if needed 48 | """ 49 | if len(text) > max_length: 50 | return text[:max_length] + "..." 51 | return text 52 | 53 | def _format_messages(self, prompt: str, context: Optional[Dict[str, Any]] = None) -> List[Dict[str, str]]: 54 | """ 55 | Format messages for the Doubao API. 56 | 57 | Args: 58 | prompt (str): The main prompt text 59 | context (Optional[Dict[str, Any]]): Additional context information 60 | 61 | Returns: 62 | List[Dict[str, str]]: Formatted messages list ready for API submission 63 | """ 64 | # Add system message 65 | messages = [{ 66 | "role": "system", 67 | "content": ( 68 | "You are a web automation assistant. " 69 | "Analyze the page structure and provide precise element locators. " 70 | "Return responses in the requested format." 71 | ) 72 | }] 73 | 74 | # Add context (if any) 75 | if context: 76 | context_str = json.dumps(context, ensure_ascii=False) 77 | messages.append({ 78 | "role": "user", 79 | "content": f"Page context: {self._truncate_text(context_str)}" 80 | }) 81 | 82 | # Add main prompt 83 | messages.append({ 84 | "role": "user", 85 | "content": self._truncate_text(prompt) 86 | }) 87 | 88 | return messages 89 | 90 | def complete(self, prompt: str, context: Optional[Dict[str, Any]] = None) -> str: 91 | """ 92 | Generate a completion using Doubao LLM. 93 | 94 | Args: 95 | prompt (str): The text prompt to complete 96 | context (Optional[Dict[str, Any]]): Additional context for the completion 97 | 98 | Returns: 99 | str: The model's response text 100 | 101 | Raises: 102 | Exception: If there's an error communicating with the Doubao API 103 | """ 104 | try: 105 | messages = self._format_messages(prompt, context) 106 | 107 | response = self.client.chat.completions.create( 108 | model=self.model_name, 109 | messages=messages, 110 | temperature=0.7, 111 | max_tokens=2000 112 | ) 113 | return response.choices[0].message.content 114 | except Exception as e: 115 | raise Exception(f"Doubao API error: {str(e)}") 116 | 117 | def complete_with_vision(self, prompt: Dict[str, Any]) -> str: 118 | """ 119 | Generate a completion for vision tasks using Doubao Vision. 120 | 121 | Args: 122 | prompt (Dict[str, Any]): A dictionary containing messages and image data 123 | in the format required by the Doubao Vision API 124 | 125 | Returns: 126 | str: The model's response text 127 | 128 | Raises: 129 | Exception: If there's an error communicating with the Doubao Vision API 130 | """ 131 | try: 132 | # Make sure the message length is within the limit 133 | messages = prompt["messages"] 134 | for msg in messages: 135 | if isinstance(msg.get("content"), str): 136 | msg["content"] = self._truncate_text(msg["content"]) 137 | elif isinstance(msg.get("content"), list): 138 | for item in msg["content"]: 139 | if isinstance(item.get("text"), str): 140 | item["text"] = self._truncate_text(item["text"]) 141 | 142 | response = self.client.chat.completions.create( 143 | model=self.model_name, 144 | messages=messages, 145 | temperature=0.7, 146 | max_tokens=2000 147 | ) 148 | return response.choices[0].message.content 149 | except Exception as e: 150 | raise Exception(f"Doubao Vision API error: {str(e)}") 151 | -------------------------------------------------------------------------------- /autowing/core/llm/client/openai.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from typing import Optional, Dict, Any, List 4 | 5 | from openai import OpenAI 6 | 7 | from autowing.core.llm.base import BaseLLMClient 8 | 9 | 10 | class OpenAIClient(BaseLLMClient): 11 | """ 12 | OpenAI API client implementation. 13 | Provides access to OpenAI's GPT and vision models. 14 | """ 15 | 16 | def __init__(self, api_key: Optional[str] = None, base_url: Optional[str] = None): 17 | """ 18 | Initialize the OpenAI client. 19 | 20 | Args: 21 | api_key (Optional[str]): OpenAI API key. If not provided, will try to get from OPENAI_API_KEY env var 22 | base_url (Optional[str]): Custom base URL for API requests 23 | 24 | Raises: 25 | ValueError: If no API key is provided or found in environment variables 26 | """ 27 | self.api_key = api_key or os.getenv("OPENAI_API_KEY") 28 | if not self.api_key: 29 | raise ValueError("OpenAI API key is required") 30 | 31 | self.base_url = base_url or os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1") 32 | self.model_name = os.getenv("MIDSCENE_MODEL_NAME", "gpt-4o-2024-08-06") 33 | 34 | client_kwargs = {"api_key": self.api_key} 35 | if self.base_url: 36 | client_kwargs["base_url"] = self.base_url 37 | 38 | self.client = OpenAI(**client_kwargs) 39 | 40 | def _truncate_text(self, text: str, max_length: int = 30000) -> str: 41 | """ 42 | Truncate text to fit within model's length limits. 43 | 44 | Args: 45 | text (str): The input text to truncate 46 | max_length (int): Maximum allowed length for the text. Defaults to 30000 47 | 48 | Returns: 49 | str: Truncated text with ellipsis if needed 50 | """ 51 | if len(text) > max_length: 52 | return text[:max_length] + "..." 53 | return text 54 | 55 | def _format_messages(self, prompt: str, context: Optional[Dict[str, Any]] = None) -> List[Dict[str, str]]: 56 | """ 57 | Format messages for the OpenAI API. 58 | 59 | Args: 60 | prompt (str): The main prompt text 61 | context (Optional[Dict[str, Any]]): Additional context information 62 | 63 | Returns: 64 | List[Dict[str, str]]: Formatted messages list ready for API submission 65 | """ 66 | # Add system message 67 | messages = [{ 68 | "role": "system", 69 | "content": ( 70 | "You are a web automation assistant. " 71 | "Analyze the page structure and provide precise element locators. " 72 | "Return responses in the requested format." 73 | ) 74 | }] 75 | 76 | # Add context (if any) 77 | if context: 78 | context_str = json.dumps(context, ensure_ascii=False) 79 | messages.append({ 80 | "role": "user", 81 | "content": f"Page context: {self._truncate_text(context_str)}" 82 | }) 83 | 84 | # Add main prompt 85 | messages.append({ 86 | "role": "user", 87 | "content": self._truncate_text(prompt) 88 | }) 89 | 90 | return messages 91 | 92 | def complete(self, prompt: str, context: Optional[Dict[str, Any]] = None) -> str: 93 | """ 94 | Generate a completion using GPT-4. 95 | 96 | Args: 97 | prompt (str): The text prompt to complete 98 | context (Optional[Dict[str, Any]]): Additional context for the completion 99 | 100 | Returns: 101 | str: The model's response text 102 | 103 | Raises: 104 | Exception: If there's an error communicating with the OpenAI API 105 | """ 106 | try: 107 | messages = self._format_messages(prompt, context) 108 | 109 | response = self.client.chat.completions.create( 110 | model=self.model_name, 111 | messages=messages, 112 | temperature=0.7, 113 | max_tokens=2000 114 | ) 115 | return response.choices[0].message.content 116 | except Exception as e: 117 | raise Exception(f"OpenAI API error: {str(e)}") 118 | 119 | def complete_with_vision(self, prompt: Dict[str, Any]) -> str: 120 | """ 121 | Generate a completion for vision tasks using GPT-4 Vision. 122 | 123 | Args: 124 | prompt (Dict[str, Any]): A dictionary containing messages and image data 125 | in the format required by the GPT-4 Vision API 126 | 127 | Returns: 128 | str: The model's response text 129 | 130 | Raises: 131 | Exception: If there's an error communicating with the OpenAI Vision API 132 | """ 133 | try: 134 | # Make sure the message length is within the limit 135 | messages = prompt["messages"] 136 | for msg in messages: 137 | if isinstance(msg.get("content"), str): 138 | msg["content"] = self._truncate_text(msg["content"]) 139 | elif isinstance(msg.get("content"), list): 140 | for item in msg["content"]: 141 | if isinstance(item.get("text"), str): 142 | item["text"] = self._truncate_text(item["text"]) 143 | 144 | response = self.client.chat.completions.create( 145 | model=self.model_name, 146 | messages=messages, 147 | temperature=0.7, 148 | max_tokens=2000 149 | ) 150 | return response.choices[0].message.content 151 | except Exception as e: 152 | raise Exception(f"OpenAI Vision API error: {str(e)}") 153 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /autowing/appium/fixture.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | from typing import Any, Dict 4 | 5 | from appium.webdriver.common.appiumby import AppiumBy 6 | from appium.webdriver.webdriver import WebDriver 7 | from loguru import logger 8 | from selenium.webdriver.support.ui import WebDriverWait 9 | 10 | from autowing.appium.actions import Action 11 | from autowing.core.ai_fixture_base import AiFixtureBase 12 | from autowing.core.llm.factory import LLMFactory 13 | 14 | 15 | def bounds(x, y, width, height) -> list: 16 | """ 17 | return element bounds 18 | :param x: 19 | :param y: 20 | :param width: 21 | :param height: 22 | :return: 23 | """ 24 | x_start = int(x) 25 | y_start = int(y) 26 | x_end = x_start + int(width) 27 | y_end = y_start + int(height) 28 | return [[x_start, x_end], [y_start, y_end]] 29 | 30 | 31 | class AppiumAiFixture(AiFixtureBase): 32 | """ 33 | A fixture class that combines Appium with AI capabilities for mobile automation. 34 | Provides AI-driven interaction with mobile apps using various LLM providers. 35 | """ 36 | 37 | def __init__(self, driver: WebDriver, platform: str = "Android"): 38 | """ 39 | Initialize the AI-powered Appium fixture. 40 | 41 | Args: 42 | driver (WebDriver): The Appium WebDriver instance to automate 43 | platform: Mobile operating system platform 44 | """ 45 | self.driver = driver 46 | self.platform = platform 47 | self.llm_client = LLMFactory.create() 48 | self.wait = WebDriverWait(self.driver, 10) # Default timeout of 10 seconds 49 | 50 | def _get_page_context(self) -> Dict[str, Any]: 51 | """ 52 | Extract context information from the current screen of the mobile app. 53 | Collects information about visible elements and screen metadata. 54 | 55 | Returns: 56 | Dict[str, Any]: A dictionary containing screen information and visible interactive elements 57 | """ 58 | # Get basic screen info 59 | basic_info = { 60 | "activity": self.driver.current_activity, 61 | "package": self.driver.current_package 62 | } 63 | 64 | # Get key elements info using Appium 65 | elements_info = [] 66 | if self.platform == "Android": 67 | elements = self.driver.find_elements(AppiumBy.XPATH, "//*") 68 | for el in elements: 69 | if el.is_displayed(): 70 | elements_info.append({ 71 | "tag": el.tag_name, 72 | "text": el.text, 73 | "resource_id": el.get_attribute("resource-id"), 74 | "content_desc": el.get_attribute("content-desc"), 75 | "class": el.get_attribute("class"), 76 | "bounds": el.get_attribute("bounds") 77 | }) 78 | elif self.platform == "iOS": 79 | elements = self.driver.find_elements(AppiumBy.IOS_PREDICATE, "type == '*'") 80 | for el in elements: 81 | if el.is_displayed(): 82 | elements_info.append({ 83 | "tag": el.tag_name, 84 | "text": el.text, 85 | "type": el.get_attribute("type"), 86 | "name": el.get_attribute("name"), 87 | "label": el.get_attribute("label"), 88 | "enabled": el.get_attribute("enabled"), 89 | "visible": el.get_attribute("visible"), 90 | "bounds": bounds(el.get_attribute("x"), 91 | el.get_attribute("y"), 92 | el.get_attribute("width"), 93 | el.get_attribute("height")), 94 | }) 95 | else: 96 | raise NameError(f"Unsupported {self.platform} platform.") 97 | 98 | return { 99 | **basic_info, 100 | "elements": elements_info 101 | } 102 | 103 | def ai_action(self, prompt: str) -> None: 104 | """ 105 | Execute an AI-driven action on the screen based on the given prompt. 106 | 107 | Args: 108 | prompt (str): Natural language description of the action to perform 109 | 110 | Raises: 111 | ValueError: If the AI response cannot be parsed or contains invalid instructions 112 | TimeoutException: If the element cannot be found or interacted with 113 | """ 114 | logger.info(f"🪽 AI Action: {prompt}") 115 | context = self._get_page_context() 116 | 117 | action_prompt = f""" 118 | Extract element locator and action from the request. Return ONLY a JSON object. 119 | 120 | Activity: {context['activity']} 121 | Package: {context['package']} 122 | Elements: {context['elements']} 123 | Request: {prompt} 124 | 125 | Return list format: 126 | [{{ 127 | "bounds": "coordinates of the element in the format [x1,y1][x2,y2] (notice, x1,y1 and x2,y2 are replaced by concrete coordinates.)", 128 | "action": "click/fill/press", 129 | "value": "text to input if needed", 130 | "key": "key to press if needed" 131 | }}] 132 | 133 | No other text or explanation. 134 | """ 135 | 136 | response = self.llm_client.complete(action_prompt) 137 | cleaned_response = self._clean_response(response) 138 | instruction = json.loads(cleaned_response) 139 | 140 | if isinstance(instruction, list) is False: 141 | raise ValueError("Invalid instruction format") 142 | 143 | for step in instruction: 144 | bounds = step.get('bounds') 145 | action = step.get('action') 146 | 147 | if not bounds or not action: 148 | raise ValueError("Invalid instruction format") 149 | 150 | # Extract coordinates from bounds 151 | coord = re.findall(r'\d+', bounds) 152 | x1, y1, x2, y2 = map(int, coord) 153 | x_center = (x1 + x2) // 2 154 | y_center = (y1 + y2) // 2 155 | 156 | # Execute the action 157 | if action == 'click': 158 | action = Action(self.driver) 159 | action.tap(x=x_center, y=y_center) 160 | elif action == 'fill': 161 | fill_text = step.get('value', '') 162 | logger.info(f"⌨️ fill text: {fill_text}.") 163 | self.driver.execute_script('mobile: type', {'text': fill_text}) 164 | elif action == 'press': 165 | logger.info("🔍 keyboard search key.") 166 | self.driver.execute_script('mobile: performEditorAction', {'action': 'search'}) 167 | else: 168 | raise ValueError(f"Unsupported action: {action}") 169 | 170 | def ai_query(self, prompt: str) -> Any: 171 | """ 172 | Query information from the screen using AI analysis. 173 | 174 | Args: 175 | prompt (str): Natural language query about the screen content. 176 | Can include format hints like 'string[]' or 'number'. 177 | 178 | Returns: 179 | Any: The query results in the requested format 180 | 181 | Raises: 182 | ValueError: If the AI response cannot be parsed into the requested format 183 | """ 184 | logger.info(f"🪽 AI Query: {prompt}") 185 | context = self._get_page_context() 186 | 187 | # Parse the requested data format 188 | format_hint = "" 189 | if prompt.startswith(('string[]', 'number[]', 'object[]')): 190 | format_hint = prompt.split(',')[0].strip() 191 | prompt = ','.join(prompt.split(',')[1:]).strip() 192 | 193 | # Provide different prompts based on the format 194 | if format_hint == 'string[]': 195 | query_prompt = f""" 196 | Extract text content matching the query. Return ONLY a JSON array of strings. 197 | 198 | Activity: {context['activity']} 199 | Package: {context['package']} 200 | Elements: {context['elements']} 201 | Query: {prompt} 202 | 203 | Return format example: ["result1", "result2"], (notice: Gets value data from labels and text keys) 204 | No other text or explanation. 205 | """ 206 | elif format_hint == 'number[]': 207 | query_prompt = f""" 208 | Extract numeric values matching the query. Return ONLY a JSON array of numbers. 209 | 210 | Activity: {context['activity']} 211 | Package: {context['package']} 212 | Elements: {context['elements']} 213 | Query: {prompt} 214 | 215 | Return format example: [1, 2, 3], (notice: Gets value data from labels and text keys) 216 | No other text or explanation. 217 | """ 218 | else: 219 | query_prompt = f""" 220 | Extract information matching the query. Return ONLY in valid JSON format. 221 | 222 | Activity: {context['activity']} 223 | Package: {context['package']} 224 | Elements: {context['elements']} 225 | Query: {prompt} 226 | 227 | Return format: 228 | - For arrays: ["item1", "item2"] 229 | - For objects: {{"key": "value"}} 230 | - For single value: "text" or number 231 | (notice: Gets value data from labels and text keys) 232 | 233 | No other text or explanation. 234 | """ 235 | 236 | response = self.llm_client.complete(query_prompt) 237 | cleaned_response = self._clean_response(response) 238 | try: 239 | result = json.loads(cleaned_response) 240 | query_info = self._validate_result_format(result, format_hint) 241 | logger.debug(f"📄 Query: {query_info}") 242 | return query_info 243 | except json.JSONDecodeError: 244 | # If it's a string array format, try extracting from text 245 | if format_hint == 'string[]': 246 | lines = [line.strip() for line in cleaned_response.split('\n') 247 | if line.strip() and not line.startswith(('-', '*', '#'))] 248 | 249 | query_terms = [term.lower() for term in prompt.split() 250 | if len(term) > 2 and term.lower() not in ['the', 'and', 'for']] 251 | 252 | results = [] 253 | for line in lines: 254 | if any(term in line.lower() for term in query_terms): 255 | text = line.strip('`"\'- ,') 256 | if ':' in text: 257 | text = text.split(':', 1)[1].strip() 258 | if text: 259 | results.append(text) 260 | 261 | if results: 262 | seen = set() 263 | query_info = [x for x in results if not (x in seen or seen.add(x))] 264 | logger.debug(f"📄 Query: {query_info}") 265 | return query_info 266 | 267 | raise ValueError(f"Failed to parse response as JSON: {cleaned_response[:100]}...") 268 | 269 | def ai_assert(self, prompt: str) -> bool: 270 | """ 271 | Verify a condition on the screen using AI analysis. 272 | 273 | Args: 274 | prompt (str): Natural language description of the condition to verify 275 | 276 | Returns: 277 | bool: True if the condition is met, False otherwise 278 | 279 | Raises: 280 | ValueError: If the AI response cannot be parsed as a boolean value 281 | """ 282 | logger.info(f"🪽 AI Assert: {prompt}") 283 | context = self._get_page_context() 284 | 285 | assert_prompt = f""" 286 | You are a web automation assistant. Verify the following assertion and return ONLY a boolean value. 287 | 288 | Activity: {context['activity']} 289 | Package: {context['package']} 290 | Elements: {context['elements']} 291 | Assertion: {prompt} 292 | 293 | (notice: Gets value data from labels and text keys) 294 | 295 | IMPORTANT: Return ONLY the word 'true' or 'false' (lowercase). No other text, no explanation. 296 | """ 297 | 298 | response = self.llm_client.complete(assert_prompt) 299 | cleaned_response = self._clean_response(response).lower() 300 | 301 | # Directly match true or false 302 | if cleaned_response == 'true': 303 | return True 304 | if cleaned_response == 'false': 305 | return False 306 | 307 | # If response contains other content, try extracting boolean 308 | if 'true' in cleaned_response.split(): 309 | return True 310 | if 'false' in cleaned_response.split(): 311 | return False 312 | 313 | raise ValueError("Response must be 'true' or 'false'") 314 | 315 | 316 | def create_fixture(): 317 | """ 318 | Create an AppiumAiFixture factory. 319 | """ 320 | return AppiumAiFixture 321 | -------------------------------------------------------------------------------- /autowing/selenium/fixture.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import Any, Dict 3 | 4 | from loguru import logger 5 | from selenium.common.exceptions import TimeoutException 6 | from selenium.webdriver.common.by import By 7 | from selenium.webdriver.common.keys import Keys 8 | from selenium.webdriver.remote.webdriver import WebDriver 9 | from selenium.webdriver.support import expected_conditions as EC 10 | from selenium.webdriver.support.ui import WebDriverWait 11 | 12 | from autowing.core.ai_fixture_base import AiFixtureBase 13 | from autowing.core.llm.factory import LLMFactory 14 | from autowing.utils.transition import selector_to_selenium 15 | 16 | 17 | class SeleniumAiFixture(AiFixtureBase): 18 | """ 19 | A fixture class that combines Selenium with AI capabilities for web automation. 20 | Provides AI-driven interaction with web pages using various LLM providers. 21 | Maintains API compatibility with PlaywrightAiFixture. 22 | """ 23 | 24 | def __init__(self, driver: WebDriver): 25 | """ 26 | Initialize the AI-powered Selenium fixture. 27 | 28 | Args: 29 | driver (WebDriver): The Selenium WebDriver instance to automate 30 | """ 31 | super().__init__() 32 | self.driver = driver 33 | self.llm_client = LLMFactory.create() 34 | self.wait = WebDriverWait(self.driver, 10) # Default timeout of 10 seconds 35 | 36 | def _get_page_context(self) -> Dict[str, Any]: 37 | """ 38 | Extract context information from the current page. 39 | Collects information about visible elements and page metadata. 40 | 41 | Returns: 42 | Dict[str, Any]: A dictionary containing page URL, title, and information about 43 | visible interactive elements 44 | """ 45 | # Get basic page info 46 | basic_info = { 47 | "url": self.driver.current_url, 48 | "title": self.driver.title 49 | } 50 | 51 | # Get key elements info using JavaScript 52 | elements_info = self.driver.execute_script(""" 53 | const getVisibleElements = () => { 54 | const elements = []; 55 | const selectors = [ 56 | 'input', // input 57 | 'textarea', // input 58 | 'select', // input/click 59 | 'button', // click 60 | 'a', // click 61 | '[role="button"]', // click 62 | '[role="link"]', // click 63 | '[role="checkbox"]', // click 64 | '[role="radio"]', // click 65 | '[role="searchbox"]', // input 66 | 'summary', // click(
) 67 | '[draggable="true"]' // draggable 68 | ]; 69 | 70 | for (const selector of selectors) { 71 | document.querySelectorAll(selector).forEach(el => { 72 | if (el.offsetWidth > 0 && el.offsetHeight > 0) { 73 | elements.push({ 74 | tag: el.tagName.toLowerCase(), 75 | type: el.getAttribute('type') || null, 76 | placeholder: el.getAttribute('placeholder') || null, 77 | value: el.value || null, 78 | text: el.textContent?.trim() || '', 79 | aria: el.getAttribute('aria-label') || null, 80 | id: el.id || '', 81 | name: el.getAttribute('name') || null, 82 | class: el.className || '', 83 | draggable: el.getAttribute('draggable') || null 84 | }); 85 | } 86 | }); 87 | } 88 | return elements; 89 | }; 90 | return getVisibleElements(); 91 | """) 92 | 93 | return { 94 | **basic_info, 95 | "elements": elements_info 96 | } 97 | 98 | def ai_action(self, prompt: str) -> None: 99 | """ 100 | Execute an AI-driven action on the page based on the given prompt. 101 | 102 | Args: 103 | prompt (str): Natural language description of the action to perform 104 | 105 | Raises: 106 | ValueError: If the AI response cannot be parsed or contains invalid instructions 107 | TimeoutException: If the element cannot be found or interacted with 108 | """ 109 | logger.info(f"🪽 AI Action: {prompt}") 110 | context = self._get_page_context() 111 | context["elements"] = self._remove_empty_keys(context.get("elements", [])) 112 | 113 | def compute_action(): 114 | action_prompt = f""" 115 | Extract element locator and action from the request. Return ONLY a JSON object. 116 | 117 | Page: {context['url']} 118 | Title: {context['title']} 119 | Request: {prompt} 120 | 121 | Return format: 122 | {{ 123 | "selector": "XPATH selector to locate the element", 124 | "action": "click/fill/press", 125 | "value": "text to input if needed", 126 | "key": "key to press if needed" 127 | }} 128 | Note: selector is used for a selenium location, for example:find_element(By.XPATH, selector) 129 | 130 | Example response: 131 | {{ 132 | "selector": "//input[@id='search-input']", 133 | "action": "fill", 134 | "value": "search text", 135 | "key": "Enter" 136 | }} 137 | """ 138 | 139 | response = self.llm_client.complete(action_prompt) 140 | cleaned_response = self._clean_response(response) 141 | return json.loads(cleaned_response) 142 | 143 | # Use cache manager to get or compute the instruction 144 | instruction = self._get_cached_or_compute(prompt, context, compute_action) 145 | # Execute the action using the instruction 146 | selector = instruction.get('selector') 147 | action = instruction.get('action') 148 | 149 | if not selector or not action: 150 | raise ValueError("Invalid instruction format") 151 | 152 | # Execute the action 153 | selector = selector_to_selenium(selector) 154 | try: 155 | element = self.wait.until(EC.presence_of_element_located((By.XPATH, selector))) 156 | except TimeoutException: 157 | element = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, selector))) 158 | 159 | if action == 'click': 160 | element.click() 161 | elif action == 'fill': 162 | element.clear() 163 | element.send_keys(instruction.get('value', '')) 164 | if instruction.get('key'): 165 | key_attr = getattr(Keys, instruction['key'].upper(), None) 166 | if key_attr: 167 | element.send_keys(key_attr) 168 | elif action == 'press': 169 | key_attr = getattr(Keys, instruction.get('key', 'ENTER').upper()) 170 | element.send_keys(key_attr) 171 | else: 172 | raise ValueError(f"Unsupported action: {action}") 173 | 174 | def ai_query(self, prompt: str) -> Any: 175 | """ 176 | Query information from the page using AI analysis. 177 | 178 | Args: 179 | prompt (str): Natural language query about the page content. 180 | Can include format hints like 'string[]' or 'number'. 181 | 182 | Returns: 183 | Any: The query results in the requested format 184 | 185 | Raises: 186 | ValueError: If the AI response cannot be parsed into the requested format 187 | """ 188 | logger.info(f"🪽 AI Query: {prompt}") 189 | context = self._get_page_context() 190 | context["elements"] = self._remove_empty_keys(context.get("elements", [])) 191 | 192 | # Parse the requested data format 193 | format_hint = "" 194 | if prompt.startswith(('string[]', 'number[]', 'object[]')): 195 | format_hint = prompt.split(',')[0].strip() 196 | prompt = ','.join(prompt.split(',')[1:]).strip() 197 | 198 | # Provide different prompts based on the format 199 | if format_hint == 'string[]': 200 | query_prompt = f""" 201 | Extract text content matching the query. Return ONLY a JSON array of strings. 202 | 203 | Page: {context['url']} 204 | Title: {context['title']} 205 | Query: {prompt} 206 | 207 | Return format example: ["result1", "result2"] 208 | No other text or explanation. 209 | """ 210 | elif format_hint == 'number[]': 211 | query_prompt = f""" 212 | Extract numeric values matching the query. Return ONLY a JSON array of numbers. 213 | 214 | Page: {context['url']} 215 | Title: {context['title']} 216 | Query: {prompt} 217 | 218 | Return format example: [1, 2, 3] 219 | No other text or explanation. 220 | """ 221 | else: 222 | query_prompt = f""" 223 | Extract information matching the query. Return ONLY in valid JSON format. 224 | 225 | Page: {context['url']} 226 | Title: {context['title']} 227 | Query: {prompt} 228 | 229 | Return format: 230 | - For arrays: ["item1", "item2"] 231 | - For objects: {{"key": "value"}} 232 | - For single value: "text" or number 233 | 234 | No other text or explanation. 235 | """ 236 | 237 | response = self.llm_client.complete(query_prompt) 238 | cleaned_response = self._clean_response(response) 239 | try: 240 | result = json.loads(cleaned_response) 241 | query_info = self._validate_result_format(result, format_hint) 242 | logger.debug(f"📄 Query: {query_info}") 243 | return query_info 244 | except json.JSONDecodeError: 245 | # If it's a string array format, try extracting from text 246 | if format_hint == 'string[]': 247 | lines = [line.strip() for line in cleaned_response.split('\n') 248 | if line.strip() and not line.startswith(('-', '*', '#'))] 249 | 250 | query_terms = [term.lower() for term in prompt.split() 251 | if len(term) > 2 and term.lower() not in ['the', 'and', 'for']] 252 | 253 | results = [] 254 | for line in lines: 255 | if any(term in line.lower() for term in query_terms): 256 | text = line.strip('`"\'- ,') 257 | if ':' in text: 258 | text = text.split(':', 1)[1].strip() 259 | if text: 260 | results.append(text) 261 | 262 | if results: 263 | seen = set() 264 | query_info = [x for x in results if not (x in seen or seen.add(x))] 265 | logger.debug(f"📄 Query: {query_info}") 266 | return query_info 267 | 268 | raise ValueError(f"Failed to parse response as JSON: {cleaned_response[:100]}...") 269 | 270 | def ai_assert(self, prompt: str) -> bool: 271 | """ 272 | Verify a condition on the page using AI analysis. 273 | 274 | Args: 275 | prompt (str): Natural language description of the condition to verify 276 | 277 | Returns: 278 | bool: True if the condition is met, False otherwise 279 | 280 | Raises: 281 | ValueError: If the AI response cannot be parsed as a boolean value 282 | """ 283 | logger.info(f"🪽 AI Assert: {prompt}") 284 | context = self._get_page_context() 285 | context["elements"] = self._remove_empty_keys(context.get("elements", [])) 286 | 287 | assert_prompt = f""" 288 | You are a web automation assistant. Verify the following assertion and return ONLY a boolean value. 289 | 290 | Page URL: {context['url']} 291 | Page Title: {context['title']} 292 | 293 | Assertion: {prompt} 294 | 295 | IMPORTANT: Return ONLY the word 'true' or 'false' (lowercase). No other text, no explanation. 296 | """ 297 | 298 | response = self.llm_client.complete(assert_prompt) 299 | cleaned_response = self._clean_response(response).lower() 300 | 301 | # Directly match true or false 302 | if cleaned_response == 'true': 303 | return True 304 | if cleaned_response == 'false': 305 | return False 306 | 307 | # If response contains other content, try extracting boolean 308 | if 'true' in cleaned_response.split(): 309 | return True 310 | if 'false' in cleaned_response.split(): 311 | return False 312 | 313 | raise ValueError("Response must be 'true' or 'false'") 314 | 315 | def ai_function_cases(self, prompt: str, language: str = "Chinese") -> str: 316 | """ 317 | Generate functional test cases based on the given prompt. 318 | 319 | Args: 320 | prompt (str): Natural language description of the functionality to test 321 | language (str): Language in which the test cases should be generated 322 | 323 | Returns: 324 | str: Generated test cases in a standard format 325 | 326 | Raises: 327 | ValueError: If the AI response cannot be parsed or contains invalid instructions 328 | """ 329 | logger.info(f"🪽 AI Function Case: {prompt}") 330 | context = self._get_page_context() 331 | 332 | format_hint = "" 333 | if prompt.startswith(('json[]', 'markdown[]')): 334 | format_hint = prompt.split(',')[0].strip() 335 | prompt = ','.join(prompt.split(',')[1:]).strip() 336 | 337 | # Provide different prompts based on the format 338 | if format_hint == 'json[]': 339 | # Construct the prompt for generating test cases 340 | case_prompt = f""" 341 | You are a web automation assistant. Based on the following page context, generate functional test cases. 342 | 343 | Current page context: 344 | URL: {context['url']} 345 | Title: {context['title']} 346 | 347 | Available elements: 348 | {json.dumps(context['elements'], indent=2)} 349 | 350 | User request: {prompt} 351 | 352 | Return ONLY the test cases in the following format, no other text: 353 | [ 354 | {{ 355 | "Test Case ID": "001", 356 | "Steps": "Describe the steps to perform the test without mentioning element locators.", 357 | "Expected Result": "Describe the expected result." 358 | }}, 359 | {{ 360 | "Test Case ID": "002", 361 | "Steps": "Describe the steps to perform the test without mentioning element locators.", 362 | "Expected Result": "Describe the expected result." 363 | }} 364 | ] 365 | ... 366 | 367 | Finally, the output result is required to be in {language} 368 | """ 369 | elif format_hint == 'markdown[]': 370 | case_prompt = f""" 371 | You are a web automation assistant. Based on the following page context, generate functional test cases. 372 | 373 | Current page context: 374 | URL: {context['url']} 375 | Title: {context['title']} 376 | 377 | Available elements: 378 | {json.dumps(context['elements'], indent=2)} 379 | 380 | User request: {prompt} 381 | 382 | Return ONLY the test cases in the following format, no other text: 383 | | Test Case ID | Steps | Expected Result | 384 | |--------------|---------------------------------------------------|-------------------------------| 385 | | 001 | Describe the steps to perform the test without mentioning element locators. | Describe the expected result. | 386 | | 002 | Describe the steps to perform the test without mentioning element locators. | Describe the expected result. | 387 | ... 388 | 389 | Finally, the output result is required to be in {language} 390 | """ 391 | else: 392 | case_prompt = f""" 393 | You are a web automation assistant. Based on the following page context, generate functional test cases. 394 | 395 | Current page context: 396 | URL: {context['url']} 397 | Title: {context['title']} 398 | 399 | Available elements: 400 | {json.dumps(context['elements'], indent=2)} 401 | 402 | User request: {prompt} 403 | 404 | Return ONLY the test cases in the following format, no other text: 405 | Test Case ID: 001 406 | Steps: Describe the steps to perform the test without mentioning element locators. 407 | Expected Result: Describe the expected result. 408 | 409 | Test Case ID: 002 410 | Steps: Describe the steps to perform the test without mentioning element locators. 411 | Expected Result: Describe the expected result. 412 | 413 | ... 414 | 415 | Finally, the output result is required to be in {language} 416 | """ 417 | 418 | try: 419 | response = self.llm_client.complete(case_prompt) 420 | cleaned_response = self._clean_response(response) 421 | 422 | logger.debug(f"""📄 Function Cases:\n {cleaned_response}""") 423 | return cleaned_response 424 | except Exception as e: 425 | raise ValueError(f"Failed to generate test cases. Error: {str(e)}\nResponse: {cleaned_response[:100]}...") 426 | 427 | 428 | def create_fixture(): 429 | """ 430 | Create a SeleniumAiFixture factory. 431 | """ 432 | return SeleniumAiFixture 433 | -------------------------------------------------------------------------------- /autowing/playwright/fixture.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import Any, Dict 3 | 4 | from loguru import logger 5 | from playwright.sync_api import Page 6 | 7 | from autowing.core.ai_fixture_base import AiFixtureBase 8 | from autowing.core.llm.factory import LLMFactory 9 | from autowing.utils.transition import selector_to_locator 10 | 11 | 12 | class PlaywrightAiFixture(AiFixtureBase): 13 | """ 14 | A fixture class that combines Playwright with AI capabilities for web automation. 15 | Provides AI-driven interaction with web pages using various LLM providers. 16 | """ 17 | 18 | def __init__(self, page: Page): 19 | """ 20 | Initialize the AI-powered Playwright fixture. 21 | 22 | Args: 23 | page (Page): The Playwright page object to automate 24 | """ 25 | super().__init__() 26 | self.page = page 27 | self.llm_client = LLMFactory.create() 28 | 29 | def _get_page_context(self) -> Dict[str, Any]: 30 | """ 31 | Extract context information from the current page. 32 | Collects information about visible elements and page metadata. 33 | 34 | Returns: 35 | Dict[str, Any]: A dictionary containing page URL, title, and information about 36 | visible interactive elements 37 | """ 38 | # Get basic page info 39 | basic_info = { 40 | "url": self.page.url, 41 | "title": self.page.title() 42 | } 43 | 44 | # Get key elements info 45 | elements_info = self.page.evaluate("""() => { 46 | const getVisibleElements = () => { 47 | const elements = []; 48 | const selectors = [ 49 | 'input', // input 50 | 'textarea', // input 51 | 'select', // input/click 52 | 'button', // click 53 | 'a', // click 54 | '[role="button"]', // click 55 | '[role="link"]', // click 56 | '[role="checkbox"]', // click 57 | '[role="radio"]', // click 58 | '[role="searchbox"]', // input 59 | 'summary', // click(
) 60 | '[draggable="true"]' // draggable 61 | ]; 62 | 63 | for (const selector of selectors) { 64 | document.querySelectorAll(selector).forEach(el => { 65 | if (el.offsetWidth > 0 && el.offsetHeight > 0) { 66 | elements.push({ 67 | tag: el.tagName.toLowerCase(), 68 | type: el.getAttribute('type') || null, 69 | placeholder: el.getAttribute('placeholder') || null, 70 | value: el.value || null, 71 | text: el.textContent?.trim() || '', 72 | aria: el.getAttribute('aria-label') || null, 73 | id: el.id || '', 74 | name: el.getAttribute('name') || null, 75 | class: el.className || '', 76 | draggable: el.getAttribute('draggable') || null 77 | }); 78 | } 79 | }); 80 | } 81 | return elements; 82 | }; 83 | return getVisibleElements(); 84 | }""") 85 | 86 | return { 87 | **basic_info, 88 | "elements": elements_info 89 | } 90 | 91 | def ai_action(self, prompt: str, iframe=None) -> None: 92 | """ 93 | Execute an AI-driven action on the page based on the given prompt. 94 | The AI will analyze the page context and perform the requested action. 95 | 96 | Args: 97 | prompt (str): Natural language description of the action to perform 98 | iframe: FrameLocator object 99 | 100 | Raises: 101 | ValueError: If the AI response cannot be parsed or contains invalid instructions 102 | Exception: If the requested action cannot be performed 103 | """ 104 | logger.info(f"🪽 AI Action: {prompt}") 105 | context = self._get_page_context() 106 | context["elements"] = self._remove_empty_keys(context.get("elements", [])) 107 | 108 | def compute_action(): 109 | action_prompt = f""" 110 | You are a web automation assistant. Based on the following page context, provide instructions for the requested action. 111 | 112 | Current page context: 113 | URL: {context['url']} 114 | Title: {context['title']} 115 | 116 | Available elements: 117 | {json.dumps(context['elements'], indent=2)} 118 | 119 | User request: {prompt} 120 | 121 | Return ONLY a JSON object with the following structure, no other text: 122 | {{ 123 | "selector": "CSS selector or XPath to locate the element", 124 | "action": "fill", 125 | "value": "text to input", 126 | "key": "key to press if needed" 127 | }} 128 | Note: selector is used for a playwright location, for example:page.locator(selector) 129 | 130 | Example response: 131 | {{ 132 | "selector": "//input[id='search-input']", 133 | "action": "fill", 134 | "value": "search text", 135 | "key": "Enter" 136 | }} 137 | Note: The CSS selector the tag name (input/button/select...). 138 | """ 139 | response = self.llm_client.complete(action_prompt) 140 | cleaned_response = self._clean_response(response) 141 | return json.loads(cleaned_response) 142 | 143 | # Use cache manager to get or compute the instruction 144 | instruction = self._get_cached_or_compute(prompt, context, compute_action) 145 | # Execute the action using the instruction 146 | selector = instruction.get('selector') 147 | action = instruction.get('action') 148 | 149 | if not selector or not action: 150 | raise ValueError("Invalid instruction format") 151 | 152 | # Perform the action 153 | selector = selector_to_locator(selector) 154 | element = self.page.locator(selector) 155 | if iframe is not None: 156 | element = iframe.locator(selector) 157 | 158 | if action == 'click': 159 | element.click() 160 | elif action == 'fill': 161 | element.fill(instruction.get('value', '')) 162 | if instruction.get('key'): 163 | element.press(instruction.get('key')) 164 | elif action == 'press': 165 | element.press(instruction.get('key', 'Enter')) 166 | else: 167 | raise ValueError(f"Unsupported action: {action}") 168 | 169 | def ai_query(self, prompt: str) -> Any: 170 | """ 171 | Query information from the page using AI analysis. 172 | Supports various data formats including arrays, objects, and primitive types. 173 | 174 | Args: 175 | prompt (str): Natural language query about the page content. 176 | Can include format hints like 'string[]' or 'number'. 177 | 178 | Returns: 179 | Any: The query results in the requested format 180 | 181 | Raises: 182 | ValueError: If the AI response cannot be parsed into the requested format 183 | """ 184 | logger.info(f"🪽 AI Query: {prompt}") 185 | context = self._get_page_context() 186 | context["elements"] = self._remove_empty_keys(context.get("elements", [])) 187 | 188 | # Parse the requested data format 189 | format_hint = "" 190 | if prompt.startswith(('string[]', 'number[]', 'object[]')): 191 | format_hint = prompt.split(',')[0].strip() 192 | prompt = ','.join(prompt.split(',')[1:]).strip() 193 | 194 | # Provide different prompts based on the format 195 | if format_hint == 'string[]': 196 | query_prompt = f""" 197 | Extract text content matching the query. Return ONLY a JSON array of strings. 198 | 199 | Page: {context['url']} 200 | Title: {context['title']} 201 | Query: {prompt} 202 | 203 | Return format example: ["result1", "result2"] 204 | No other text or explanation. 205 | """ 206 | elif format_hint == 'number[]': 207 | query_prompt = f""" 208 | Extract numeric values matching the query. Return ONLY a JSON array of numbers. 209 | 210 | Page: {context['url']} 211 | Title: {context['title']} 212 | Query: {prompt} 213 | 214 | Return format example: [1, 2, 3] 215 | No other text or explanation. 216 | """ 217 | else: 218 | # Default prompt 219 | query_prompt = f""" 220 | Extract information matching the query. Return ONLY in valid JSON format. 221 | 222 | Page: {context['url']} 223 | Title: {context['title']} 224 | Query: {prompt} 225 | 226 | Return format: 227 | - For arrays: ["item1", "item2"] 228 | - For objects: {{"key": "value"}} 229 | - For single value: "text" or number 230 | 231 | No other text or explanation. 232 | """ 233 | 234 | response = self.llm_client.complete(query_prompt) 235 | 236 | try: 237 | cleaned_response = self._clean_response(response) 238 | try: 239 | result = json.loads(cleaned_response) 240 | query_info = self._validate_result_format(result, format_hint) 241 | logger.debug(f"📄 Query: {query_info}") 242 | return query_info 243 | except json.JSONDecodeError: 244 | # If it's a string array format, try extracting from text 245 | if format_hint == 'string[]': 246 | # Split and clean text 247 | lines = [line.strip() for line in cleaned_response.split('\n') 248 | if line.strip() and not line.startswith(('-', '*', '#'))] 249 | 250 | # Extract lines containing query terms 251 | query_terms = [term.lower() for term in prompt.split() 252 | if len(term) > 2 and term.lower() not in ['the', 'and', 'for']] 253 | 254 | results = [] 255 | for line in lines: 256 | # Check if line contains query terms 257 | if any(term in line.lower() for term in query_terms): 258 | # Clean text 259 | text = line.strip('`"\'- ,') 260 | if ':' in text: 261 | text = text.split(':', 1)[1].strip() 262 | if text: 263 | results.append(text) 264 | 265 | if results: 266 | # Remove duplicates while preserving order 267 | seen = set() 268 | query_info = [x for x in results if not (x in seen or seen.add(x))] 269 | logger.debug(f"📄 Query: {query_info}") 270 | return query_info 271 | 272 | raise ValueError(f"Failed to parse response as JSON: {cleaned_response[:100]}...") 273 | 274 | except Exception as e: 275 | raise ValueError(f"Query failed. Error: {str(e)}\nResponse: {cleaned_response[:100]}...") 276 | 277 | def ai_assert(self, prompt: str) -> bool: 278 | """ 279 | Verify a condition on the page using AI analysis. 280 | 281 | Args: 282 | prompt (str): Natural language description of the condition to verify 283 | 284 | Returns: 285 | bool: True if the condition is met, False otherwise 286 | 287 | Raises: 288 | ValueError: If the AI response cannot be parsed as a boolean value 289 | """ 290 | logger.info(f"🪽 AI Assert: {prompt}") 291 | context = self._get_page_context() 292 | context["elements"] = self._remove_empty_keys(context.get("elements", [])) 293 | 294 | # Optimize the prompt to be concise and explicitly require a boolean return 295 | assert_prompt = f""" 296 | You are a web automation assistant. Verify the following assertion and return ONLY a boolean value. 297 | 298 | Page URL: {context['url']} 299 | Page Title: {context['title']} 300 | 301 | Assertion: {prompt} 302 | 303 | IMPORTANT: Return ONLY the word 'true' or 'false' (lowercase). No other text, no explanation. 304 | """ 305 | 306 | response = self.llm_client.complete(assert_prompt) 307 | cleaned_response = self._clean_response(response).lower() 308 | 309 | try: 310 | # Directly match true or false 311 | if cleaned_response == 'true': 312 | return True 313 | if cleaned_response == 'false': 314 | return False 315 | 316 | # If response contains other content, try extracting boolean 317 | if 'true' in cleaned_response.split(): 318 | return True 319 | if 'false' in cleaned_response.split(): 320 | return False 321 | 322 | raise ValueError("Response must be 'true' or 'false'") 323 | 324 | except Exception as e: 325 | # Provide more useful error information 326 | raise ValueError( 327 | f"Failed to parse assertion result. Response: {cleaned_response[:100]}... " 328 | f"Error: {str(e)}" 329 | ) 330 | 331 | def ai_function_cases(self, prompt: str, language: str = "Chinese") -> str: 332 | """ 333 | Generate functional test cases based on the given prompt. 334 | 335 | Args: 336 | prompt (str): Natural language description of the functionality to test 337 | language (str): Natural language description of the functionality to test 338 | 339 | Returns: 340 | str: Generated test cases in a standard format 341 | 342 | Raises: 343 | ValueError: If the AI response cannot be parsed or contains invalid instructions 344 | """ 345 | logger.info(f"🪽 AI Function Case: {prompt}") 346 | context = self._get_page_context() 347 | 348 | format_hint = "" 349 | if prompt.startswith(('json[]', 'markdown[]')): 350 | format_hint = prompt.split(',')[0].strip() 351 | prompt = ','.join(prompt.split(',')[1:]).strip() 352 | 353 | # Provide different prompts based on the format 354 | if format_hint == 'json[]': 355 | # Construct the prompt for generating test cases 356 | case_prompt = f""" 357 | You are a web automation assistant. Based on the following page context, generate functional test cases. 358 | 359 | Current page context: 360 | URL: {context['url']} 361 | Title: {context['title']} 362 | 363 | Available elements: 364 | {json.dumps(context['elements'], indent=2)} 365 | 366 | User request: {prompt} 367 | 368 | Return ONLY the test cases in the following format, no other text: 369 | [ 370 | {{ 371 | "Test Case ID": "001", 372 | "Steps": "Describe the steps to perform the test without mentioning element locators.", 373 | "Expected Result": "Describe the expected result." 374 | }}, 375 | {{ 376 | "Test Case ID": "002", 377 | "Steps": "Describe the steps to perform the test without mentioning element locators.", 378 | "Expected Result": "Describe the expected result." 379 | }} 380 | ] 381 | ... 382 | 383 | Finally, the output result is required to be in {language} 384 | """ 385 | elif format_hint == 'markdown[]': 386 | case_prompt = f""" 387 | You are a web automation assistant. Based on the following page context, generate functional test cases. 388 | 389 | Current page context: 390 | URL: {context['url']} 391 | Title: {context['title']} 392 | 393 | Available elements: 394 | {json.dumps(context['elements'], indent=2)} 395 | 396 | User request: {prompt} 397 | 398 | Return ONLY the test cases in the following format, no other text: 399 | | Test Case ID | Steps | Expected Result | 400 | |--------------|---------------------------------------------------|-------------------------------| 401 | | 001 | Describe the steps to perform the test without mentioning element locators. | Describe the expected result. | 402 | | 002 | Describe the steps to perform the test without mentioning element locators. | Describe the expected result. | 403 | ... 404 | 405 | Finally, the output result is required to be in {language} 406 | """ 407 | else: 408 | case_prompt = f""" 409 | You are a web automation assistant. Based on the following page context, generate functional test cases. 410 | 411 | Current page context: 412 | URL: {context['url']} 413 | Title: {context['title']} 414 | 415 | Available elements: 416 | {json.dumps(context['elements'], indent=2)} 417 | 418 | User request: {prompt} 419 | 420 | Return ONLY the test cases in the following format, no other text: 421 | Test Case ID: 001 422 | Steps: Describe the steps to perform the test without mentioning element locators. 423 | Expected Result: Describe the expected result. 424 | 425 | Test Case ID: 002 426 | Steps: Describe the steps to perform the test without mentioning element locators. 427 | Expected Result: Describe the expected result. 428 | 429 | ... 430 | 431 | Finally, the output result is required to be in {language} 432 | """ 433 | 434 | try: 435 | response = self.llm_client.complete(case_prompt) 436 | cleaned_response = self._clean_response(response) 437 | 438 | logger.debug(f"""📄 Function Cases:\n {cleaned_response}""") 439 | return cleaned_response 440 | except Exception as e: 441 | raise ValueError(f"Failed to generate test cases. Error: {str(e)}\nResponse: {cleaned_response[:100]}...") 442 | 443 | 444 | def create_fixture(): 445 | """ 446 | Create a PlaywrightAiFixture factory. 447 | 448 | Returns: 449 | Callable[[Page], PlaywrightAiFixture]: A factory function that creates 450 | PlaywrightAiFixture instances 451 | """ 452 | return PlaywrightAiFixture 453 | --------------------------------------------------------------------------------