├── .github └── workflows │ ├── build.yaml │ └── test.yml ├── .gitignore ├── LICENSE ├── README.md ├── agentbrowser ├── __init__.py ├── browser.py ├── test.py └── test_async.py ├── requirements.txt ├── resources └── image.jpg ├── setup.py ├── test.py └── test_async.py /.github/workflows/build.yaml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries 3 | 4 | # This workflow uses actions that are not certified by GitHub. 5 | # They are provided by a third-party and are governed by 6 | # separate terms of service, privacy policy, and support 7 | # documentation. 8 | 9 | name: Upload Python Package 10 | 11 | on: 12 | release: 13 | types: [published] 14 | 15 | permissions: 16 | contents: read 17 | 18 | jobs: 19 | deploy: 20 | 21 | runs-on: ubuntu-latest 22 | 23 | steps: 24 | - uses: actions/checkout@v3 25 | - name: Set up Python 26 | uses: actions/setup-python@v3 27 | with: 28 | python-version: '3.x' 29 | - name: Install dependencies 30 | run: | 31 | python -m pip install --upgrade pip 32 | pip install build 33 | - name: Build package 34 | run: python -m build 35 | - name: Publish package 36 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 37 | with: 38 | user: ${{ secrets.pypi_username }} 39 | password: ${{ secrets.pypi_password }} 40 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Lint and Test 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: ["3.10"] 11 | steps: 12 | - uses: actions/checkout@v3 13 | - name: Set up Python ${{ matrix.python-version }} 14 | uses: actions/setup-python@v3 15 | with: 16 | python-version: ${{ matrix.python-version }} 17 | - name: Install dependencies 18 | run: | 19 | python -m pip install --upgrade pip 20 | pip install pytest pytest-asyncio 21 | pip install -r requirements.txt 22 | - name: Running tests 23 | run: | 24 | pytest test.py 25 | pytest test_async.py -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | .DS_Store 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | cover/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | .pybuilder/ 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | # For a library or package, you might want to ignore these files since the code is 88 | # intended to run in multiple environments; otherwise, check them in: 89 | # .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # poetry 99 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 100 | # This is especially recommended for binary packages to ensure reproducibility, and is more 101 | # commonly ignored for libraries. 102 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 103 | #poetry.lock 104 | 105 | # pdm 106 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 107 | #pdm.lock 108 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 109 | # in version control. 110 | # https://pdm.fming.dev/#use-with-ide 111 | .pdm.toml 112 | 113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 114 | __pypackages__/ 115 | 116 | # Celery stuff 117 | celerybeat-schedule 118 | celerybeat.pid 119 | 120 | # SageMath parsed files 121 | *.sage.py 122 | 123 | # Environments 124 | .env 125 | .venv 126 | env/ 127 | venv/ 128 | ENV/ 129 | env.bak/ 130 | venv.bak/ 131 | 132 | # Spyder project settings 133 | .spyderproject 134 | .spyproject 135 | 136 | # Rope project settings 137 | .ropeproject 138 | 139 | # mkdocs documentation 140 | /site 141 | 142 | # mypy 143 | .mypy_cache/ 144 | .dmypy.json 145 | dmypy.json 146 | 147 | # Pyre type checker 148 | .pyre/ 149 | 150 | # pytype static type analyzer 151 | .pytype/ 152 | 153 | # Cython debug symbols 154 | cython_debug/ 155 | 156 | # PyCharm 157 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 158 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 159 | # and can be added to the global gitignore or merged into this file. For a more nuclear 160 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 161 | #.idea/ 162 | 163 | .vscode/ 164 | .chroma -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 M̵̞̗̝̼̅̏̎͝Ȯ̴̝̻̊̃̋̀Õ̷̼͋N̸̩̿͜ ̶̜̠̹̼̩͒ 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # agentbrowser 2 | 3 | A browser for your agent, built on Playwright. 4 | 5 | 6 | 7 | [![Lint and Test](https://github.com/AutonomousResearchGroup/agentbrowser/actions/workflows/test.yml/badge.svg)](https://github.com/AutonomousResearchGroup/agentbrowser/actions/workflows/test.yml) 8 | [![PyPI version](https://badge.fury.io/py/agentbrowser.svg)](https://badge.fury.io/py/agentbrowser) 9 | 10 | # Installation 11 | 12 | ```bash 13 | pip install agentbrowser 14 | ``` 15 | 16 | # Usage 17 | 18 | ## Importing into your project 19 | 20 | ```python 21 | from agentbrowser import ( 22 | get_browser, 23 | init_browser, 24 | navigate_to, 25 | get_body_html, 26 | get_body_text, 27 | get_document_html, 28 | create_page, 29 | close_page, 30 | evaluate_javascript, 31 | ) 32 | ``` 33 | 34 | ## Quickstart 35 | 36 | ```python 37 | from agentbrowser import ( 38 | navigate_to, 39 | get_body_text, 40 | ) 41 | 42 | # Navigate to a URL 43 | page = navigate_to("https://google.com") 44 | 45 | # Get the text from the page 46 | text = get_body_text(page) 47 | 48 | print(text) 49 | ``` 50 | 51 | ## API Documentation 52 | 53 | ### `get_browser()` 54 | 55 | Get a Playwright browser. If the browser doesn't exist, initializes a new one. 56 | 57 | Example usage: 58 | 59 | ```python 60 | browser = get_browser() 61 | ``` 62 | 63 | ### `init_browser(headless=True, executable_path=None)` 64 | 65 | Initialize a new Playwright browser. 66 | 67 | Parameters: 68 | 69 | - `headless`: Whether the browser should be run in headless mode, defaults to True. 70 | - `executable_path`: Path to a Chromium or Chrome executable to run instead of the bundled Chromium. 71 | 72 | Example usage: 73 | 74 | ```python 75 | init_browser(headless=False, executable_path="/usr/bin/google-chrome") 76 | ``` 77 | 78 | ### `create_page(site=None)` 79 | 80 | Create a new page in the browser. If a site is provided, navigate to that site. 81 | 82 | Parameters: 83 | 84 | - `site`: URL to navigate to, defaults to None. 85 | 86 | Example usage: 87 | 88 | ```python 89 | page = create_page("https://www.example.com") 90 | ``` 91 | 92 | ### `close_page(page)` 93 | 94 | Close a page. 95 | 96 | Parameters: 97 | 98 | - `page`: The page to close. 99 | 100 | Example usage: 101 | 102 | ```python 103 | page = create_page("https://www.example.com") 104 | close_page(page) 105 | ``` 106 | 107 | ### `navigate_to(url, page, wait_until="domcontentloaded")` 108 | 109 | Navigate to a URL in a page. 110 | 111 | Parameters: 112 | 113 | - `url`: The URL to navigate to. 114 | - `page`: The page to navigate in. 115 | 116 | Example usage: 117 | 118 | ```python 119 | page = create_page() 120 | navigate_to("https://www.example.com", page) 121 | ``` 122 | 123 | ### `get_document_html(page)` 124 | 125 | Get the HTML content of a page. 126 | 127 | Parameters: 128 | 129 | - `page`: The page to get the HTML from. 130 | 131 | Example usage: 132 | 133 | ```python 134 | page = create_page("https://www.example.com") 135 | html = get_document_html(page) 136 | print(html) 137 | ``` 138 | 139 | ### `get_page_title(page)` 140 | 141 | Get the title of a page. 142 | 143 | Parameters: 144 | 145 | - `page`: The page to get the title from. 146 | 147 | Example usage: 148 | 149 | ```python 150 | page = create_page("https://www.example.com") 151 | title = get_page_title(page) 152 | print(title) 153 | ``` 154 | 155 | ### `get_body_text(page)` 156 | 157 | Get the text content of a page's body. 158 | 159 | Parameters: 160 | 161 | - `page`: The page to get the text from. 162 | 163 | Example usage: 164 | 165 | ```python 166 | page = create_page("https://www.example.com") 167 | text = get_body_text(page) 168 | print(text) 169 | ``` 170 | 171 | ### `get_body_html(page)` 172 | 173 | Get the HTML content of a page's body. 174 | 175 | Parameters: 176 | 177 | - `page`: The page to get the HTML from. 178 | 179 | Example usage: 180 | 181 | ```python 182 | page = create_page("https://www.example.com") 183 | body_html = get_body_html(page) 184 | print(body_html) 185 | ``` 186 | 187 | ### `screenshot_page(page)` 188 | 189 | Get a screenshot of a page. 190 | 191 | Parameters: 192 | 193 | - `page`: The page to screenshot. 194 | 195 | Example usage: 196 | 197 | ```python 198 | page = create_page("https://www.example.com") 199 | screenshot = screenshot_page(page) 200 | with open("screenshot.png", "wb") as f: 201 | f.write(screenshot) 202 | ``` 203 | 204 | ### `evaluate_javascript(code, page)` 205 | 206 | Evaluate JavaScript code in a page. 207 | 208 | Parameters: 209 | 210 | - `code`: The JavaScript code to evaluate. 211 | - `page`: The page to evaluate the code in. 212 | 213 | Example usage: 214 | 215 | ```python 216 | page = create_page("https://www.example.com") 217 | result = evaluate_javascript("document.title", page) 218 | print(result) 219 | ``` 220 | 221 | ### `find_chrome()` 222 | 223 | Find the Chrome executable. Returns the path to the Chrome executable, or None if it could not be found. 224 | 225 | Example usage: 226 | 227 | ```python 228 | chrome_path = find_chrome() 229 | print(chrome_path) 230 | ``` 231 | 232 | # Contributions Welcome 233 | 234 | If you like this library and want to contribute in any way, please feel free to submit a PR and I will review it. Please note that the goal here is simplicity and accesibility, using common language and few dependencies. 235 | -------------------------------------------------------------------------------- /agentbrowser/__init__.py: -------------------------------------------------------------------------------- 1 | from .browser import * -------------------------------------------------------------------------------- /agentbrowser/browser.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from playwright.async_api import async_playwright 3 | 4 | import os 5 | import platform 6 | 7 | browser = None 8 | context = None 9 | 10 | 11 | # Synchronous functions 12 | 13 | 14 | def get_browser(): 15 | """ 16 | Get a Playwright browser. 17 | 18 | If the browser doesn't exist, initializes a new one. 19 | 20 | :return: A Playwright browser. 21 | :rtype: playwright.async_api.Browser 22 | """ 23 | asyncio.get_event_loop() 24 | global browser 25 | if browser is None: 26 | init_browser() 27 | return browser 28 | 29 | 30 | def init_browser(headless=True, executable_path=None): 31 | """ 32 | Initialize a new Playwright browser. 33 | 34 | :param headless: Whether the browser should be run in headless mode, defaults to True. 35 | :type headless: bool, optional 36 | :param executable_path: Path to a Chromium or Chrome executable to run instead of the bundled Chromium. 37 | :type executable_path: str, optional 38 | """ 39 | asyncio.get_event_loop().run_until_complete( 40 | async_init_browser(headless, executable_path) 41 | ) 42 | 43 | 44 | def create_page(site=None): 45 | """ 46 | Create a new page in the browser. 47 | 48 | If a site is provided, navigate to that site. 49 | 50 | :param site: URL to navigate to, defaults to None. 51 | :type site: str, optional 52 | :return: A new page. 53 | :rtype: playwright.async_api.Page 54 | """ 55 | return asyncio.get_event_loop().run_until_complete(async_create_page(site)) 56 | 57 | 58 | def close_page(page): 59 | """ 60 | Close a page. 61 | 62 | :param page: The page to close. 63 | :type page: playwright.async_api.Page 64 | """ 65 | asyncio.get_event_loop().run_until_complete(async_close_page(page)) 66 | 67 | 68 | def navigate_to(url, page, wait_until="domcontentloaded", timeout=10000): 69 | """ 70 | Navigate to a URL in a page. 71 | 72 | :param url: The URL to navigate to. 73 | :type url: str 74 | :param page: The page to navigate in. 75 | :type page: playwright.async_api.Page 76 | :return: The page after navigation. 77 | :rtype: playwright.async_api.Page 78 | """ 79 | return asyncio.get_event_loop().run_until_complete(async_navigate_to(url, page, wait_until=wait_until, timeout=timeout)) 80 | 81 | 82 | def get_document_html(page): 83 | """ 84 | Get the HTML content of a page. 85 | 86 | :param page: The page to get the HTML from. 87 | :type page: playwright.async_api.Page 88 | :return: The HTML content of the page. 89 | :rtype: str 90 | """ 91 | return asyncio.get_event_loop().run_until_complete(async_get_document_html(page)) 92 | 93 | 94 | def get_page_title(page): 95 | """ 96 | Get the title of a page. 97 | 98 | :param page: The page to get the title from. 99 | :type page: playwright.async_api.Page 100 | :return: The title of the page. 101 | :rtype: str 102 | """ 103 | return asyncio.get_event_loop().run_until_complete(async_get_page_title(page)) 104 | 105 | 106 | def get_body_text(page): 107 | """ 108 | Get the text content of a page's body. 109 | 110 | :param page: The page to get the text from. 111 | :type page: playwright.async_api.Page 112 | :return: The text content of the page's body. 113 | :rtype: str 114 | """ 115 | return asyncio.get_event_loop().run_until_complete(async_get_body_text(page)) 116 | 117 | 118 | def get_body_html(page): 119 | """ 120 | Get the HTML content of a page's body. 121 | 122 | :param page: The page to get the HTML from. 123 | :type page: playwright.async_api.Page 124 | :return: The HTML content of the page's body. 125 | :rtype: str 126 | """ 127 | return asyncio.get_event_loop().run_until_complete(async_get_body_html(page)) 128 | 129 | 130 | def screenshot_page(page): 131 | """ 132 | Get a screenshot of a page. 133 | 134 | :param page: The page to screenshot. 135 | :type page: playwright.async_api.Page 136 | :return: A bytes object representing the screenshot. 137 | :rtype: bytes 138 | """ 139 | return asyncio.get_event_loop().run_until_complete(async_screenshot_page(page)) 140 | 141 | 142 | def evaluate_javascript(code, page): 143 | """ 144 | Evaluate JavaScript code in a page. 145 | 146 | :param code: The JavaScript code to evaluate. 147 | :type code: str 148 | :param page: The page to evaluate the code in. 149 | :type page: playwright.async_api.Page 150 | :return: The result of the evaluated code. 151 | """ 152 | return asyncio.get_event_loop().run_until_complete( 153 | async_evaluate_javascript(code, page) 154 | ) 155 | 156 | 157 | # Asynchronous functions 158 | 159 | 160 | async def async_get_browser(): 161 | """ 162 | Get a Playwright browser asynchronously. 163 | 164 | If the browser doesn't exist, initializes a new one. 165 | 166 | :return: A Playwright browser. 167 | :rtype: playwright.async_api._generated.Browser 168 | """ 169 | global browser 170 | if browser is None: 171 | await async_init_browser() 172 | return browser 173 | 174 | 175 | async def async_init_browser(headless=True, executable_path=None): 176 | """ 177 | Initialize a new Playwright browser asynchronously. 178 | 179 | :param headless: Whether the browser should be run in headless mode, defaults to True. 180 | :type headless: bool, optional 181 | :param executable_path: Path to a Chromium or Chrome executable to run instead of the bundled Chromium. 182 | :type executable_path: str, optional 183 | :return: A new Playwright browser. 184 | :rtype: playwright.async_api._generated.Browser 185 | """ 186 | global browser 187 | 188 | if executable_path is None: 189 | executable_path = find_chrome() 190 | 191 | if browser is None: 192 | playwright = await async_playwright().start() 193 | browser = await playwright.chromium.launch( 194 | headless=headless, 195 | executable_path=executable_path, 196 | ) 197 | return browser 198 | 199 | 200 | async def async_create_page(site=None): 201 | """ 202 | Create a new page in the browser asynchronously. 203 | 204 | If a site is provided, navigate to that site. 205 | 206 | :param site: URL to navigate to, defaults to None. 207 | :type site: str, optional 208 | :return: A new page. 209 | :rtype: playwright.async_api._generated.Page 210 | """ 211 | global browser 212 | if browser is None: 213 | await async_init_browser() 214 | context = await browser.new_context() 215 | page = await context.new_page() 216 | if site: 217 | await page.goto(site, wait_until="domcontentloaded") 218 | return page 219 | 220 | 221 | async def async_close_page(page): 222 | """ 223 | Close a page asynchronously. 224 | 225 | :param page: The page to close. 226 | :type page: playwright.async_api._generated.Page 227 | """ 228 | await page.close() 229 | 230 | 231 | async def async_navigate_to(url, page, wait_until="domcontentloaded", timeout=10000): 232 | """ 233 | Navigate to a URL in a page asynchronously. 234 | 235 | :param url: The URL to navigate to. 236 | :type url: str 237 | :param page: The page to navigate in. 238 | :type page: playwright.async_api._generated.Page 239 | :return: The page after navigation. 240 | :rtype: playwright.async_api._generated.Page 241 | """ 242 | if not page: 243 | page = await async_create_page(None) 244 | try: 245 | await page.goto(url, wait_until=wait_until, timeout=timeout) 246 | except Exception as e: 247 | print("Error navigating to: " + url) 248 | print(e) 249 | return None 250 | return page 251 | 252 | 253 | async def async_get_document_html(page): 254 | """ 255 | Get the HTML content of a page asynchronously. 256 | 257 | :param page: The page to get the HTML from. 258 | :type page: playwright.async_api._generated.Page 259 | :return: The HTML content of the page. 260 | :rtype: str 261 | """ 262 | return await page.content() 263 | 264 | 265 | async def async_get_page_title(page): 266 | """ 267 | Get the title of a page asynchronously. 268 | 269 | :param page: The page to get the title from. 270 | :type page: playwright.async_api._generated.Page 271 | :return: The title of the page. 272 | :rtype: str 273 | """ 274 | return await page.title() 275 | 276 | 277 | async def async_get_body_text(page): 278 | """ 279 | Get the text content of a page's body asynchronously. 280 | 281 | :param page: The page to get the text from. 282 | :type page: playwright.async_api._generated.Page 283 | :return: The text content of the page's body. 284 | :rtype: str 285 | """ 286 | body_handle = await page.query_selector("body") 287 | return await page.evaluate("(body) => body.innerText", body_handle) 288 | 289 | 290 | async def async_get_body_html(page): 291 | """ 292 | Get the HTML content of a page's body asynchronously. 293 | 294 | :param page: The page to get the HTML from. 295 | :type page: playwright.async_api._generated.Page 296 | :return: The HTML content of the page's body. 297 | :rtype: str 298 | """ 299 | body_handle = await page.query_selector("body") 300 | return await page.evaluate("(body) => body.innerHTML", body_handle) 301 | 302 | 303 | async def async_screenshot_page(page): 304 | """ 305 | Get a screenshot of a page asynchronously. 306 | 307 | :param page: The page to screenshot. 308 | :type page: playwright.async_api._generated.Page 309 | :return: A bytes object representing the screenshot. 310 | :rtype: bytes 311 | """ 312 | return await page.screenshot() 313 | 314 | 315 | async def async_evaluate_javascript(code, page): 316 | """ 317 | Evaluate JavaScript code in a page asynchronously. 318 | 319 | :param code: The JavaScript code to evaluate. 320 | :type code: str 321 | :param page: The page to evaluate the code in. 322 | :type page: playwright.sync_api.Page 323 | :return: The result of the evaluated code. 324 | """ 325 | return await page.evaluate(code) 326 | 327 | 328 | def find_chrome(): 329 | """ 330 | Find the Chrome executable. 331 | 332 | :return: The path to the Chrome executable, or None if it could not be found. 333 | :rtype: str 334 | """ 335 | if platform.system() == "Windows": 336 | paths = [ 337 | os.path.join( 338 | os.environ["ProgramFiles(x86)"], 339 | "Google", 340 | "Chrome", 341 | "Application", 342 | "chrome.exe", 343 | ), 344 | os.path.join( 345 | os.environ["ProgramFiles"], 346 | "Google", 347 | "Chrome", 348 | "Application", 349 | "chrome.exe", 350 | ), 351 | os.path.join( 352 | os.environ["LocalAppData"], 353 | "Google", 354 | "Chrome", 355 | "Application", 356 | "chrome.exe", 357 | ), 358 | ] 359 | elif platform.system() == "Darwin": 360 | paths = ["/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"] 361 | elif platform.system() == "Linux": 362 | paths = [ 363 | "/usr/bin/google-chrome", 364 | "/usr/bin/chromium", 365 | "/usr/bin/chromium-browser", 366 | ] 367 | else: 368 | print("Unsupported platform") 369 | return None 370 | 371 | for path in paths: 372 | if os.path.exists(path): 373 | return path 374 | 375 | return None 376 | -------------------------------------------------------------------------------- /agentbrowser/test.py: -------------------------------------------------------------------------------- 1 | from agentbrowser import ( 2 | create_page, 3 | evaluate_javascript, 4 | get_body_html, 5 | get_body_text, 6 | get_document_html, 7 | navigate_to 8 | ) 9 | from agentbrowser.browser import close_page, get_page_title, init_browser, get_browser, screenshot_page 10 | 11 | test_article = "https://test-page-to-crawl.vercel.app" 12 | 13 | 14 | def test_get_browser(): 15 | browser = get_browser() 16 | assert browser is not None, "Failed to get the browser" 17 | print("test_get_browser passed.") 18 | 19 | 20 | def test_init_browser(): 21 | init_browser() 22 | browser = get_browser() 23 | assert browser is not None, "Failed to initialize the browser" 24 | print("test_init_browser passed.") 25 | 26 | 27 | def test_navigation(): 28 | test_page = create_page("https://www.google.com") 29 | 30 | # navigate to google 31 | navigate_to( 32 | "https://www.yahoo.com", 33 | test_page, 34 | wait_until="domcontentloaded", 35 | ) 36 | 37 | assert test_page.url != "https://www.google.com", "Navigation failed." 38 | assert test_page is not None, "Page navigation failed." 39 | print("test_navigation passed.") 40 | 41 | 42 | def test_get_page_title(): 43 | test_page = create_page("https://www.google.com") 44 | title = get_page_title(test_page) 45 | assert title == "Google", "Failed to get the correct page title." 46 | print("test_get_page_title passed.") 47 | 48 | 49 | def test_screenshot_page(): 50 | test_page = create_page("https://www.google.com") 51 | screenshot = screenshot_page(test_page) 52 | assert screenshot is not None, "Failed to take a screenshot." 53 | print("test_screenshot_page passed.") 54 | 55 | 56 | def test_close_page(): 57 | test_page = create_page("https://www.google.com") 58 | close_page(test_page) 59 | assert test_page.is_closed(), "Failed to close the page." 60 | print("test_close_page passed.") 61 | 62 | 63 | def test_body_html(): 64 | test_page = create_page(test_article) 65 | body_html = get_body_html(test_page) 66 | assert body_html is not None, "Failed to get body html." 67 | print("test_body_html passed.") 68 | 69 | 70 | def test_document_html(): 71 | test_page = create_page(test_article) 72 | html = get_document_html(test_page) 73 | assert html is not None, "Failed to get document html." 74 | print("test_document_html passed.") 75 | 76 | 77 | def test_body_text(): 78 | test_page = create_page(test_article) 79 | body = get_body_text(test_page) 80 | print(body) 81 | assert body is not None, "Failed to get body text." 82 | print("test_body_text passed.") 83 | 84 | 85 | def test_javascript_evaluation(): 86 | test_page = create_page(test_article) 87 | result = evaluate_javascript( 88 | """ 89 | var x = 1; 90 | var y = 2; 91 | var z = x + y; 92 | z; 93 | """, 94 | test_page, 95 | ) 96 | assert result == 3, "Javascript evaluation failed." 97 | print("test_javascript_evaluation passed.") 98 | -------------------------------------------------------------------------------- /agentbrowser/test_async.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import pytest 3 | from agentbrowser import ( 4 | async_get_browser, 5 | async_init_browser, 6 | async_navigate_to, 7 | async_get_body_html, 8 | async_get_body_text, 9 | async_get_document_html, 10 | async_create_page, 11 | async_close_page, 12 | async_evaluate_javascript, 13 | ) 14 | 15 | from agentbrowser.browser import browser, context 16 | 17 | test_article = "https://test-page-to-crawl.vercel.app" 18 | 19 | @pytest.mark.asyncio 20 | async def test_async_create_page(): 21 | global browser 22 | global context 23 | browser = None 24 | context = None 25 | await async_init_browser() 26 | test_page = await async_create_page("https://www.google.com/") 27 | assert test_page is not None, "Page navigation failed." 28 | assert test_page.url == "https://www.google.com/", "Navigation failed." 29 | print("test_create_page passed.") 30 | 31 | await async_close_page(test_page) 32 | assert test_page.is_closed(), "Page failed to close." 33 | print("test_close_page passed.") 34 | 35 | test_page = await async_create_page("https://www.google.com") 36 | 37 | # navigate to google 38 | await async_navigate_to( 39 | "https://www.yahoo.com", 40 | test_page, 41 | ) 42 | 43 | assert test_page.url != "https://www.google.com", "Navigation failed." 44 | assert test_page is not None, "Page navigation failed." 45 | print("test_async_navigation passed.") 46 | 47 | body_html = await async_get_body_html(test_page) 48 | assert body_html is not None, "Failed to get body html." 49 | print("test_async_body_html passed.") 50 | 51 | html = await async_get_document_html(test_page) 52 | assert html is not None, "Failed to get document html." 53 | print("test_async_document_html passed.") 54 | 55 | body = await async_get_body_text(test_page) 56 | assert body is not None, "Failed to get body text." 57 | print("test_async_body_text passed.") 58 | 59 | result = await async_evaluate_javascript( 60 | """ 61 | var x = 1; 62 | var y = 2; 63 | var z = x + y; 64 | z; 65 | """, 66 | test_page, 67 | ) 68 | assert result == 3, "Javascript evaluation failed." 69 | print("test_async_javascript_evaluation passed.") 70 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | playwright -------------------------------------------------------------------------------- /resources/image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elizaOS/agentbrowser/40444259e47fb6a1bcc7b3387cc6fdba84fe303d/resources/image.jpg -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | long_description = "" 4 | with open("README.md", "r") as fh: 5 | long_description = fh.read() 6 | # search for any lines that contain