├── .github
    └── workflows
    │   ├── build.yaml
    │   └── test.yml
├── .gitignore
├── LICENSE
├── README.md
├── agentbrowser
    ├── __init__.py
    ├── browser.py
    ├── test.py
    └── test_async.py
├── requirements.txt
├── resources
    └── image.jpg
├── setup.py
├── test.py
└── test_async.py


/.github/workflows/build.yaml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
 3 | 
 4 | # This workflow uses actions that are not certified by GitHub.
 5 | # They are provided by a third-party and are governed by
 6 | # separate terms of service, privacy policy, and support
 7 | # documentation.
 8 | 
 9 | name: Upload Python Package
10 | 
11 | on:
12 |   release:
13 |     types: [published]
14 | 
15 | permissions:
16 |   contents: read
17 | 
18 | jobs:
19 |   deploy:
20 | 
21 |     runs-on: ubuntu-latest
22 | 
23 |     steps:
24 |     - uses: actions/checkout@v3
25 |     - name: Set up Python
26 |       uses: actions/setup-python@v3
27 |       with:
28 |         python-version: '3.x'
29 |     - name: Install dependencies
30 |       run: |
31 |         python -m pip install --upgrade pip
32 |         pip install build
33 |     - name: Build package
34 |       run: python -m build
35 |     - name: Publish package
36 |       uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
37 |       with:
38 |         user: ${{ secrets.pypi_username }}
39 |         password: ${{ secrets.pypi_password }}
40 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Lint and Test
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     runs-on: ubuntu-latest
 8 |     strategy:
 9 |       matrix:
10 |         python-version: ["3.10"]
11 |     steps:
12 |     - uses: actions/checkout@v3
13 |     - name: Set up Python ${{ matrix.python-version }}
14 |       uses: actions/setup-python@v3
15 |       with:
16 |         python-version: ${{ matrix.python-version }}
17 |     - name: Install dependencies
18 |       run: |
19 |         python -m pip install --upgrade pip
20 |         pip install pytest pytest-asyncio
21 |         pip install -r requirements.txt
22 |     - name: Running tests
23 |       run: |
24 |         pytest test.py
25 |         pytest test_async.py


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | .DS_Store
  6 | 
  7 | # C extensions
  8 | *.so
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | cover/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | db.sqlite3-journal
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | .pybuilder/
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # IPython
 83 | profile_default/
 84 | ipython_config.py
 85 | 
 86 | # pyenv
 87 | #   For a library or package, you might want to ignore these files since the code is
 88 | #   intended to run in multiple environments; otherwise, check them in:
 89 | # .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # poetry
 99 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
100 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
101 | #   commonly ignored for libraries.
102 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
103 | #poetry.lock
104 | 
105 | # pdm
106 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
107 | #pdm.lock
108 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
109 | #   in version control.
110 | #   https://pdm.fming.dev/#use-with-ide
111 | .pdm.toml
112 | 
113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
114 | __pypackages__/
115 | 
116 | # Celery stuff
117 | celerybeat-schedule
118 | celerybeat.pid
119 | 
120 | # SageMath parsed files
121 | *.sage.py
122 | 
123 | # Environments
124 | .env
125 | .venv
126 | env/
127 | venv/
128 | ENV/
129 | env.bak/
130 | venv.bak/
131 | 
132 | # Spyder project settings
133 | .spyderproject
134 | .spyproject
135 | 
136 | # Rope project settings
137 | .ropeproject
138 | 
139 | # mkdocs documentation
140 | /site
141 | 
142 | # mypy
143 | .mypy_cache/
144 | .dmypy.json
145 | dmypy.json
146 | 
147 | # Pyre type checker
148 | .pyre/
149 | 
150 | # pytype static type analyzer
151 | .pytype/
152 | 
153 | # Cython debug symbols
154 | cython_debug/
155 | 
156 | # PyCharm
157 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
158 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
159 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
160 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
161 | #.idea/
162 | 
163 | .vscode/
164 | .chroma


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 M̵̞̗̝̼̅̏̎͝Ȯ̴̝̻̊̃̋̀Õ̷̼͋N̸̩̿͜ ̶̜̠̹̼̩͒
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # agentbrowser
  2 | 
  3 | A browser for your agent, built on Playwright.
  4 | 
  5 | <img src="resources/image.jpg">
  6 | 
  7 | [![Lint and Test](https://github.com/AutonomousResearchGroup/agentbrowser/actions/workflows/test.yml/badge.svg)](https://github.com/AutonomousResearchGroup/agentbrowser/actions/workflows/test.yml)
  8 | [![PyPI version](https://badge.fury.io/py/agentbrowser.svg)](https://badge.fury.io/py/agentbrowser)
  9 | 
 10 | # Installation
 11 | 
 12 | ```bash
 13 | pip install agentbrowser
 14 | ```
 15 | 
 16 | # Usage
 17 | 
 18 | ## Importing into your project
 19 | 
 20 | ```python
 21 | from agentbrowser import (
 22 |     get_browser,
 23 |     init_browser,
 24 |     navigate_to,
 25 |     get_body_html,
 26 |     get_body_text,
 27 |     get_document_html,
 28 |     create_page,
 29 |     close_page,
 30 |     evaluate_javascript,
 31 | )
 32 | ```
 33 | 
 34 | ## Quickstart
 35 | 
 36 | ```python
 37 | from agentbrowser import (
 38 |     navigate_to,
 39 |     get_body_text,
 40 | )
 41 | 
 42 | # Navigate to a URL
 43 | page = navigate_to("https://google.com")
 44 | 
 45 | # Get the text from the page
 46 | text = get_body_text(page)
 47 | 
 48 | print(text)
 49 | ```
 50 | 
 51 | ## API Documentation
 52 | 
 53 | ### `get_browser()`
 54 | 
 55 | Get a Playwright browser. If the browser doesn't exist, initializes a new one.
 56 | 
 57 | Example usage:
 58 | 
 59 | ```python
 60 | browser = get_browser()
 61 | ```
 62 | 
 63 | ### `init_browser(headless=True, executable_path=None)`
 64 | 
 65 | Initialize a new Playwright browser.
 66 | 
 67 | Parameters:
 68 | 
 69 | - `headless`: Whether the browser should be run in headless mode, defaults to True.
 70 | - `executable_path`: Path to a Chromium or Chrome executable to run instead of the bundled Chromium.
 71 | 
 72 | Example usage:
 73 | 
 74 | ```python
 75 | init_browser(headless=False, executable_path="/usr/bin/google-chrome")
 76 | ```
 77 | 
 78 | ### `create_page(site=None)`
 79 | 
 80 | Create a new page in the browser. If a site is provided, navigate to that site.
 81 | 
 82 | Parameters:
 83 | 
 84 | - `site`: URL to navigate to, defaults to None.
 85 | 
 86 | Example usage:
 87 | 
 88 | ```python
 89 | page = create_page("https://www.example.com")
 90 | ```
 91 | 
 92 | ### `close_page(page)`
 93 | 
 94 | Close a page.
 95 | 
 96 | Parameters:
 97 | 
 98 | - `page`: The page to close.
 99 | 
100 | Example usage:
101 | 
102 | ```python
103 | page = create_page("https://www.example.com")
104 | close_page(page)
105 | ```
106 | 
107 | ### `navigate_to(url, page, wait_until="domcontentloaded")`
108 | 
109 | Navigate to a URL in a page.
110 | 
111 | Parameters:
112 | 
113 | - `url`: The URL to navigate to.
114 | - `page`: The page to navigate in.
115 | 
116 | Example usage:
117 | 
118 | ```python
119 | page = create_page()
120 | navigate_to("https://www.example.com", page)
121 | ```
122 | 
123 | ### `get_document_html(page)`
124 | 
125 | Get the HTML content of a page.
126 | 
127 | Parameters:
128 | 
129 | - `page`: The page to get the HTML from.
130 | 
131 | Example usage:
132 | 
133 | ```python
134 | page = create_page("https://www.example.com")
135 | html = get_document_html(page)
136 | print(html)
137 | ```
138 | 
139 | ### `get_page_title(page)`
140 | 
141 | Get the title of a page.
142 | 
143 | Parameters:
144 | 
145 | - `page`: The page to get the title from.
146 | 
147 | Example usage:
148 | 
149 | ```python
150 | page = create_page("https://www.example.com")
151 | title = get_page_title(page)
152 | print(title)
153 | ```
154 | 
155 | ### `get_body_text(page)`
156 | 
157 | Get the text content of a page's body.
158 | 
159 | Parameters:
160 | 
161 | - `page`: The page to get the text from.
162 | 
163 | Example usage:
164 | 
165 | ```python
166 | page = create_page("https://www.example.com")
167 | text = get_body_text(page)
168 | print(text)
169 | ```
170 | 
171 | ### `get_body_html(page)`
172 | 
173 | Get the HTML content of a page's body.
174 | 
175 | Parameters:
176 | 
177 | - `page`: The page to get the HTML from.
178 | 
179 | Example usage:
180 | 
181 | ```python
182 | page = create_page("https://www.example.com")
183 | body_html = get_body_html(page)
184 | print(body_html)
185 | ```
186 | 
187 | ### `screenshot_page(page)`
188 | 
189 | Get a screenshot of a page.
190 | 
191 | Parameters:
192 | 
193 | - `page`: The page to screenshot.
194 | 
195 | Example usage:
196 | 
197 | ```python
198 | page = create_page("https://www.example.com")
199 | screenshot = screenshot_page(page)
200 | with open("screenshot.png", "wb") as f:
201 |     f.write(screenshot)
202 | ```
203 | 
204 | ### `evaluate_javascript(code, page)`
205 | 
206 | Evaluate JavaScript code in a page.
207 | 
208 | Parameters:
209 | 
210 | - `code`: The JavaScript code to evaluate.
211 | - `page`: The page to evaluate the code in.
212 | 
213 | Example usage:
214 | 
215 | ```python
216 | page = create_page("https://www.example.com")
217 | result = evaluate_javascript("document.title", page)
218 | print(result)
219 | ```
220 | 
221 | ### `find_chrome()`
222 | 
223 | Find the Chrome executable. Returns the path to the Chrome executable, or None if it could not be found.
224 | 
225 | Example usage:
226 | 
227 | ```python
228 | chrome_path = find_chrome()
229 | print(chrome_path)
230 | ```
231 | 
232 | # Contributions Welcome
233 | 
234 | If you like this library and want to contribute in any way, please feel free to submit a PR and I will review it. Please note that the goal here is simplicity and accesibility, using common language and few dependencies.
235 | 


--------------------------------------------------------------------------------
/agentbrowser/__init__.py:
--------------------------------------------------------------------------------
1 | from .browser import *


--------------------------------------------------------------------------------
/agentbrowser/browser.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | from playwright.async_api import async_playwright
  3 | 
  4 | import os
  5 | import platform
  6 | 
  7 | browser = None
  8 | context = None
  9 | 
 10 | 
 11 | # Synchronous functions
 12 | 
 13 | 
 14 | def get_browser():
 15 |     """
 16 |     Get a Playwright browser.
 17 | 
 18 |     If the browser doesn't exist, initializes a new one.
 19 | 
 20 |     :return: A Playwright browser.
 21 |     :rtype: playwright.async_api.Browser
 22 |     """
 23 |     asyncio.get_event_loop()
 24 |     global browser
 25 |     if browser is None:
 26 |         init_browser()
 27 |     return browser
 28 | 
 29 | 
 30 | def init_browser(headless=True, executable_path=None):
 31 |     """
 32 |     Initialize a new Playwright browser.
 33 | 
 34 |     :param headless: Whether the browser should be run in headless mode, defaults to True.
 35 |     :type headless: bool, optional
 36 |     :param executable_path: Path to a Chromium or Chrome executable to run instead of the bundled Chromium.
 37 |     :type executable_path: str, optional
 38 |     """
 39 |     asyncio.get_event_loop().run_until_complete(
 40 |         async_init_browser(headless, executable_path)
 41 |     )
 42 | 
 43 | 
 44 | def create_page(site=None):
 45 |     """
 46 |     Create a new page in the browser.
 47 | 
 48 |     If a site is provided, navigate to that site.
 49 | 
 50 |     :param site: URL to navigate to, defaults to None.
 51 |     :type site: str, optional
 52 |     :return: A new page.
 53 |     :rtype: playwright.async_api.Page
 54 |     """
 55 |     return asyncio.get_event_loop().run_until_complete(async_create_page(site))
 56 | 
 57 | 
 58 | def close_page(page):
 59 |     """
 60 |     Close a page.
 61 | 
 62 |     :param page: The page to close.
 63 |     :type page: playwright.async_api.Page
 64 |     """
 65 |     asyncio.get_event_loop().run_until_complete(async_close_page(page))
 66 | 
 67 | 
 68 | def navigate_to(url, page, wait_until="domcontentloaded", timeout=10000):
 69 |     """
 70 |     Navigate to a URL in a page.
 71 | 
 72 |     :param url: The URL to navigate to.
 73 |     :type url: str
 74 |     :param page: The page to navigate in.
 75 |     :type page: playwright.async_api.Page
 76 |     :return: The page after navigation.
 77 |     :rtype: playwright.async_api.Page
 78 |     """
 79 |     return asyncio.get_event_loop().run_until_complete(async_navigate_to(url, page, wait_until=wait_until, timeout=timeout))
 80 | 
 81 | 
 82 | def get_document_html(page):
 83 |     """
 84 |     Get the HTML content of a page.
 85 | 
 86 |     :param page: The page to get the HTML from.
 87 |     :type page: playwright.async_api.Page
 88 |     :return: The HTML content of the page.
 89 |     :rtype: str
 90 |     """
 91 |     return asyncio.get_event_loop().run_until_complete(async_get_document_html(page))
 92 | 
 93 | 
 94 | def get_page_title(page):
 95 |     """
 96 |     Get the title of a page.
 97 | 
 98 |     :param page: The page to get the title from.
 99 |     :type page: playwright.async_api.Page
100 |     :return: The title of the page.
101 |     :rtype: str
102 |     """
103 |     return asyncio.get_event_loop().run_until_complete(async_get_page_title(page))
104 | 
105 | 
106 | def get_body_text(page):
107 |     """
108 |     Get the text content of a page's body.
109 | 
110 |     :param page: The page to get the text from.
111 |     :type page: playwright.async_api.Page
112 |     :return: The text content of the page's body.
113 |     :rtype: str
114 |     """
115 |     return asyncio.get_event_loop().run_until_complete(async_get_body_text(page))
116 | 
117 | 
118 | def get_body_html(page):
119 |     """
120 |     Get the HTML content of a page's body.
121 | 
122 |     :param page: The page to get the HTML from.
123 |     :type page: playwright.async_api.Page
124 |     :return: The HTML content of the page's body.
125 |     :rtype: str
126 |     """
127 |     return asyncio.get_event_loop().run_until_complete(async_get_body_html(page))
128 | 
129 | 
130 | def screenshot_page(page):
131 |     """
132 |     Get a screenshot of a page.
133 | 
134 |     :param page: The page to screenshot.
135 |     :type page: playwright.async_api.Page
136 |     :return: A bytes object representing the screenshot.
137 |     :rtype: bytes
138 |     """
139 |     return asyncio.get_event_loop().run_until_complete(async_screenshot_page(page))
140 | 
141 | 
142 | def evaluate_javascript(code, page):
143 |     """
144 |     Evaluate JavaScript code in a page.
145 | 
146 |     :param code: The JavaScript code to evaluate.
147 |     :type code: str
148 |     :param page: The page to evaluate the code in.
149 |     :type page: playwright.async_api.Page
150 |     :return: The result of the evaluated code.
151 |     """
152 |     return asyncio.get_event_loop().run_until_complete(
153 |         async_evaluate_javascript(code, page)
154 |     )
155 | 
156 | 
157 | # Asynchronous functions
158 | 
159 | 
160 | async def async_get_browser():
161 |     """
162 |     Get a Playwright browser asynchronously.
163 | 
164 |     If the browser doesn't exist, initializes a new one.
165 | 
166 |     :return: A Playwright browser.
167 |     :rtype: playwright.async_api._generated.Browser
168 |     """
169 |     global browser
170 |     if browser is None:
171 |         await async_init_browser()
172 |     return browser
173 | 
174 | 
175 | async def async_init_browser(headless=True, executable_path=None):
176 |     """
177 |     Initialize a new Playwright browser asynchronously.
178 | 
179 |     :param headless: Whether the browser should be run in headless mode, defaults to True.
180 |     :type headless: bool, optional
181 |     :param executable_path: Path to a Chromium or Chrome executable to run instead of the bundled Chromium.
182 |     :type executable_path: str, optional
183 |     :return: A new Playwright browser.
184 |     :rtype: playwright.async_api._generated.Browser
185 |     """
186 |     global browser
187 | 
188 |     if executable_path is None:
189 |         executable_path = find_chrome()
190 | 
191 |     if browser is None:
192 |         playwright = await async_playwright().start()
193 |         browser = await playwright.chromium.launch(
194 |             headless=headless,
195 |             executable_path=executable_path,
196 |         )
197 |     return browser
198 | 
199 | 
200 | async def async_create_page(site=None):
201 |     """
202 |     Create a new page in the browser asynchronously.
203 | 
204 |     If a site is provided, navigate to that site.
205 | 
206 |     :param site: URL to navigate to, defaults to None.
207 |     :type site: str, optional
208 |     :return: A new page.
209 |     :rtype: playwright.async_api._generated.Page
210 |     """
211 |     global browser
212 |     if browser is None:
213 |         await async_init_browser()
214 |     context = await browser.new_context()
215 |     page = await context.new_page()
216 |     if site:
217 |         await page.goto(site, wait_until="domcontentloaded")
218 |     return page
219 | 
220 | 
221 | async def async_close_page(page):
222 |     """
223 |     Close a page asynchronously.
224 | 
225 |     :param page: The page to close.
226 |     :type page: playwright.async_api._generated.Page
227 |     """
228 |     await page.close()
229 | 
230 | 
231 | async def async_navigate_to(url, page, wait_until="domcontentloaded", timeout=10000):
232 |     """
233 |     Navigate to a URL in a page asynchronously.
234 | 
235 |     :param url: The URL to navigate to.
236 |     :type url: str
237 |     :param page: The page to navigate in.
238 |     :type page: playwright.async_api._generated.Page
239 |     :return: The page after navigation.
240 |     :rtype: playwright.async_api._generated.Page
241 |     """
242 |     if not page:
243 |         page = await async_create_page(None)
244 |     try:
245 |         await page.goto(url, wait_until=wait_until, timeout=timeout)
246 |     except Exception as e:
247 |         print("Error navigating to: " + url)
248 |         print(e)
249 |         return None
250 |     return page
251 | 
252 | 
253 | async def async_get_document_html(page):
254 |     """
255 |     Get the HTML content of a page asynchronously.
256 | 
257 |     :param page: The page to get the HTML from.
258 |     :type page: playwright.async_api._generated.Page
259 |     :return: The HTML content of the page.
260 |     :rtype: str
261 |     """
262 |     return await page.content()
263 | 
264 | 
265 | async def async_get_page_title(page):
266 |     """
267 |     Get the title of a page asynchronously.
268 | 
269 |     :param page: The page to get the title from.
270 |     :type page: playwright.async_api._generated.Page
271 |     :return: The title of the page.
272 |     :rtype: str
273 |     """
274 |     return await page.title()
275 | 
276 | 
277 | async def async_get_body_text(page):
278 |     """
279 |     Get the text content of a page's body asynchronously.
280 | 
281 |     :param page: The page to get the text from.
282 |     :type page: playwright.async_api._generated.Page
283 |     :return: The text content of the page's body.
284 |     :rtype: str
285 |     """
286 |     body_handle = await page.query_selector("body")
287 |     return await page.evaluate("(body) => body.innerText", body_handle)
288 | 
289 | 
290 | async def async_get_body_html(page):
291 |     """
292 |     Get the HTML content of a page's body asynchronously.
293 | 
294 |     :param page: The page to get the HTML from.
295 |     :type page: playwright.async_api._generated.Page
296 |     :return: The HTML content of the page's body.
297 |     :rtype: str
298 |     """
299 |     body_handle = await page.query_selector("body")
300 |     return await page.evaluate("(body) => body.innerHTML", body_handle)
301 | 
302 | 
303 | async def async_screenshot_page(page):
304 |     """
305 |     Get a screenshot of a page asynchronously.
306 | 
307 |     :param page: The page to screenshot.
308 |     :type page: playwright.async_api._generated.Page
309 |     :return: A bytes object representing the screenshot.
310 |     :rtype: bytes
311 |     """
312 |     return await page.screenshot()
313 | 
314 | 
315 | async def async_evaluate_javascript(code, page):
316 |     """
317 |     Evaluate JavaScript code in a page asynchronously.
318 | 
319 |     :param code: The JavaScript code to evaluate.
320 |     :type code: str
321 |     :param page: The page to evaluate the code in.
322 |     :type page: playwright.sync_api.Page
323 |     :return: The result of the evaluated code.
324 |     """
325 |     return await page.evaluate(code)
326 | 
327 | 
328 | def find_chrome():
329 |     """
330 |     Find the Chrome executable.
331 | 
332 |     :return: The path to the Chrome executable, or None if it could not be found.
333 |     :rtype: str
334 |     """
335 |     if platform.system() == "Windows":
336 |         paths = [
337 |             os.path.join(
338 |                 os.environ["ProgramFiles(x86)"],
339 |                 "Google",
340 |                 "Chrome",
341 |                 "Application",
342 |                 "chrome.exe",
343 |             ),
344 |             os.path.join(
345 |                 os.environ["ProgramFiles"],
346 |                 "Google",
347 |                 "Chrome",
348 |                 "Application",
349 |                 "chrome.exe",
350 |             ),
351 |             os.path.join(
352 |                 os.environ["LocalAppData"],
353 |                 "Google",
354 |                 "Chrome",
355 |                 "Application",
356 |                 "chrome.exe",
357 |             ),
358 |         ]
359 |     elif platform.system() == "Darwin":
360 |         paths = ["/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"]
361 |     elif platform.system() == "Linux":
362 |         paths = [
363 |             "/usr/bin/google-chrome",
364 |             "/usr/bin/chromium",
365 |             "/usr/bin/chromium-browser",
366 |         ]
367 |     else:
368 |         print("Unsupported platform")
369 |         return None
370 | 
371 |     for path in paths:
372 |         if os.path.exists(path):
373 |             return path
374 | 
375 |     return None
376 | 


--------------------------------------------------------------------------------
/agentbrowser/test.py:
--------------------------------------------------------------------------------
 1 | from agentbrowser import (
 2 |     create_page,
 3 |     evaluate_javascript,
 4 |     get_body_html,
 5 |     get_body_text,
 6 |     get_document_html,
 7 |     navigate_to
 8 | )
 9 | from agentbrowser.browser import close_page, get_page_title, init_browser, get_browser, screenshot_page
10 | 
11 | test_article = "https://test-page-to-crawl.vercel.app"
12 | 
13 | 
14 | def test_get_browser():
15 |     browser = get_browser()
16 |     assert browser is not None, "Failed to get the browser"
17 |     print("test_get_browser passed.")
18 | 
19 | 
20 | def test_init_browser():
21 |     init_browser()
22 |     browser = get_browser()
23 |     assert browser is not None, "Failed to initialize the browser"
24 |     print("test_init_browser passed.")
25 | 
26 | 
27 | def test_navigation():
28 |     test_page = create_page("https://www.google.com")
29 | 
30 |     # navigate to google
31 |     navigate_to(
32 |         "https://www.yahoo.com",
33 |         test_page,
34 |         wait_until="domcontentloaded",
35 |     )
36 | 
37 |     assert test_page.url != "https://www.google.com", "Navigation failed."
38 |     assert test_page is not None, "Page navigation failed."
39 |     print("test_navigation passed.")
40 | 
41 | 
42 | def test_get_page_title():
43 |     test_page = create_page("https://www.google.com")
44 |     title = get_page_title(test_page)
45 |     assert title == "Google", "Failed to get the correct page title."
46 |     print("test_get_page_title passed.")
47 | 
48 | 
49 | def test_screenshot_page():
50 |     test_page = create_page("https://www.google.com")
51 |     screenshot = screenshot_page(test_page)
52 |     assert screenshot is not None, "Failed to take a screenshot."
53 |     print("test_screenshot_page passed.")
54 | 
55 | 
56 | def test_close_page():
57 |     test_page = create_page("https://www.google.com")
58 |     close_page(test_page)
59 |     assert test_page.is_closed(), "Failed to close the page."
60 |     print("test_close_page passed.")
61 | 
62 | 
63 | def test_body_html():
64 |     test_page = create_page(test_article)
65 |     body_html = get_body_html(test_page)
66 |     assert body_html is not None, "Failed to get body html."
67 |     print("test_body_html passed.")
68 | 
69 | 
70 | def test_document_html():
71 |     test_page = create_page(test_article)
72 |     html = get_document_html(test_page)
73 |     assert html is not None, "Failed to get document html."
74 |     print("test_document_html passed.")
75 | 
76 | 
77 | def test_body_text():
78 |     test_page = create_page(test_article)
79 |     body = get_body_text(test_page)
80 |     print(body)
81 |     assert body is not None, "Failed to get body text."
82 |     print("test_body_text passed.")
83 | 
84 | 
85 | def test_javascript_evaluation():
86 |     test_page = create_page(test_article)
87 |     result = evaluate_javascript(
88 |         """
89 |         var x = 1;
90 |         var y = 2;
91 |         var z = x + y;
92 |         z;
93 |         """,
94 |         test_page,
95 |     )
96 |     assert result == 3, "Javascript evaluation failed."
97 |     print("test_javascript_evaluation passed.")
98 | 


--------------------------------------------------------------------------------
/agentbrowser/test_async.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import pytest
 3 | from agentbrowser import (
 4 |     async_get_browser,
 5 |     async_init_browser,
 6 |     async_navigate_to,
 7 |     async_get_body_html,
 8 |     async_get_body_text,
 9 |     async_get_document_html,
10 |     async_create_page,
11 |     async_close_page,
12 |     async_evaluate_javascript,
13 | )
14 | 
15 | from agentbrowser.browser import browser, context
16 | 
17 | test_article = "https://test-page-to-crawl.vercel.app"
18 | 
19 | @pytest.mark.asyncio
20 | async def test_async_create_page():
21 |     global browser
22 |     global context
23 |     browser = None
24 |     context = None
25 |     await async_init_browser()
26 |     test_page = await async_create_page("https://www.google.com/")
27 |     assert test_page is not None, "Page navigation failed."
28 |     assert test_page.url == "https://www.google.com/", "Navigation failed."
29 |     print("test_create_page passed.")
30 | 
31 |     await async_close_page(test_page)
32 |     assert test_page.is_closed(), "Page failed to close."
33 |     print("test_close_page passed.")
34 | 
35 |     test_page = await async_create_page("https://www.google.com")
36 | 
37 |     # navigate to google
38 |     await async_navigate_to(
39 |         "https://www.yahoo.com",
40 |         test_page,
41 |     )
42 | 
43 |     assert test_page.url != "https://www.google.com", "Navigation failed."
44 |     assert test_page is not None, "Page navigation failed."
45 |     print("test_async_navigation passed.")
46 | 
47 |     body_html = await async_get_body_html(test_page)
48 |     assert body_html is not None, "Failed to get body html."
49 |     print("test_async_body_html passed.")
50 | 
51 |     html = await async_get_document_html(test_page)
52 |     assert html is not None, "Failed to get document html."
53 |     print("test_async_document_html passed.")
54 | 
55 |     body = await async_get_body_text(test_page)
56 |     assert body is not None, "Failed to get body text."
57 |     print("test_async_body_text passed.")
58 | 
59 |     result = await async_evaluate_javascript(
60 |         """
61 |         var x = 1;
62 |         var y = 2;
63 |         var z = x + y;
64 |         z;
65 |         """,
66 |         test_page,
67 |     )
68 |     assert result == 3, "Javascript evaluation failed."
69 |     print("test_async_javascript_evaluation passed.")
70 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | playwright


--------------------------------------------------------------------------------
/resources/image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/elizaOS/agentbrowser/40444259e47fb6a1bcc7b3387cc6fdba84fe303d/resources/image.jpg


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | long_description = ""
 4 | with open("README.md", "r") as fh:
 5 |     long_description = fh.read()
 6 |     # search for any lines that contain <img and remove them
 7 |     long_description = long_description.split("\n")
 8 |     long_description = [line for line in long_description if not "<img" in line]
 9 |     # now join all the lines back together
10 |     long_description = "\n".join(long_description)
11 | 
12 | 
13 | setup(
14 |     name="agentbrowser",
15 |     version="0.2.2",
16 |     description="A browser for your agent, built on Playwright.",
17 |     long_description=long_description,  # added this line
18 |     long_description_content_type="text/markdown",  # and this line
19 |     url="https://github.com/AutonomousResearchGroup/agentbrowser",
20 |     author="Moon",
21 |     author_email="shawmakesmagic@gmail.com",
22 |     license="MIT",
23 |     packages=["agentbrowser"],
24 |     install_requires=["playwright"],
25 |     readme="README.md",
26 |     classifiers=[
27 |         "Development Status :: 2 - Pre-Alpha",
28 |         "Intended Audience :: Science/Research",
29 |         "License :: OSI Approved :: MIT License",
30 |         "Operating System :: POSIX :: Linux",
31 |         "Programming Language :: Python :: 3",
32 |         "Operating System :: MacOS :: MacOS X",
33 |         "Operating System :: Microsoft :: Windows",
34 |     ],
35 | )
36 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | from agentbrowser.test import *


--------------------------------------------------------------------------------
/test_async.py:
--------------------------------------------------------------------------------
1 | from agentbrowser.test_async import *


--------------------------------------------------------------------------------