├── .github
└── workflows
│ ├── build.yaml
│ └── test.yml
├── .gitignore
├── LICENSE
├── README.md
├── agentbrowser
├── __init__.py
├── browser.py
├── test.py
└── test_async.py
├── requirements.txt
├── resources
└── image.jpg
├── setup.py
├── test.py
└── test_async.py
/.github/workflows/build.yaml:
--------------------------------------------------------------------------------
1 | # This workflow will upload a Python Package using Twine when a release is created
2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
3 |
4 | # This workflow uses actions that are not certified by GitHub.
5 | # They are provided by a third-party and are governed by
6 | # separate terms of service, privacy policy, and support
7 | # documentation.
8 |
9 | name: Upload Python Package
10 |
11 | on:
12 | release:
13 | types: [published]
14 |
15 | permissions:
16 | contents: read
17 |
18 | jobs:
19 | deploy:
20 |
21 | runs-on: ubuntu-latest
22 |
23 | steps:
24 | - uses: actions/checkout@v3
25 | - name: Set up Python
26 | uses: actions/setup-python@v3
27 | with:
28 | python-version: '3.x'
29 | - name: Install dependencies
30 | run: |
31 | python -m pip install --upgrade pip
32 | pip install build
33 | - name: Build package
34 | run: python -m build
35 | - name: Publish package
36 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
37 | with:
38 | user: ${{ secrets.pypi_username }}
39 | password: ${{ secrets.pypi_password }}
40 |
--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | name: Lint and Test
2 |
3 | on: [push]
4 |
5 | jobs:
6 | build:
7 | runs-on: ubuntu-latest
8 | strategy:
9 | matrix:
10 | python-version: ["3.10"]
11 | steps:
12 | - uses: actions/checkout@v3
13 | - name: Set up Python ${{ matrix.python-version }}
14 | uses: actions/setup-python@v3
15 | with:
16 | python-version: ${{ matrix.python-version }}
17 | - name: Install dependencies
18 | run: |
19 | python -m pip install --upgrade pip
20 | pip install pytest pytest-asyncio
21 | pip install -r requirements.txt
22 | - name: Running tests
23 | run: |
24 | pytest test.py
25 | pytest test_async.py
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 | .DS_Store
6 |
7 | # C extensions
8 | *.so
9 |
10 | # Distribution / packaging
11 | .Python
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 | cover/
54 |
55 | # Translations
56 | *.mo
57 | *.pot
58 |
59 | # Django stuff:
60 | *.log
61 | local_settings.py
62 | db.sqlite3
63 | db.sqlite3-journal
64 |
65 | # Flask stuff:
66 | instance/
67 | .webassets-cache
68 |
69 | # Scrapy stuff:
70 | .scrapy
71 |
72 | # Sphinx documentation
73 | docs/_build/
74 |
75 | # PyBuilder
76 | .pybuilder/
77 | target/
78 |
79 | # Jupyter Notebook
80 | .ipynb_checkpoints
81 |
82 | # IPython
83 | profile_default/
84 | ipython_config.py
85 |
86 | # pyenv
87 | # For a library or package, you might want to ignore these files since the code is
88 | # intended to run in multiple environments; otherwise, check them in:
89 | # .python-version
90 |
91 | # pipenv
92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
95 | # install all needed dependencies.
96 | #Pipfile.lock
97 |
98 | # poetry
99 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
100 | # This is especially recommended for binary packages to ensure reproducibility, and is more
101 | # commonly ignored for libraries.
102 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
103 | #poetry.lock
104 |
105 | # pdm
106 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
107 | #pdm.lock
108 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
109 | # in version control.
110 | # https://pdm.fming.dev/#use-with-ide
111 | .pdm.toml
112 |
113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
114 | __pypackages__/
115 |
116 | # Celery stuff
117 | celerybeat-schedule
118 | celerybeat.pid
119 |
120 | # SageMath parsed files
121 | *.sage.py
122 |
123 | # Environments
124 | .env
125 | .venv
126 | env/
127 | venv/
128 | ENV/
129 | env.bak/
130 | venv.bak/
131 |
132 | # Spyder project settings
133 | .spyderproject
134 | .spyproject
135 |
136 | # Rope project settings
137 | .ropeproject
138 |
139 | # mkdocs documentation
140 | /site
141 |
142 | # mypy
143 | .mypy_cache/
144 | .dmypy.json
145 | dmypy.json
146 |
147 | # Pyre type checker
148 | .pyre/
149 |
150 | # pytype static type analyzer
151 | .pytype/
152 |
153 | # Cython debug symbols
154 | cython_debug/
155 |
156 | # PyCharm
157 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
158 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
159 | # and can be added to the global gitignore or merged into this file. For a more nuclear
160 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
161 | #.idea/
162 |
163 | .vscode/
164 | .chroma
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 M̵̞̗̝̼̅̏̎͝Ȯ̴̝̻̊̃̋̀Õ̷̼͋N̸̩̿͜ ̶̜̠̹̼̩͒
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # agentbrowser
2 |
3 | A browser for your agent, built on Playwright.
4 |
5 |
6 |
7 | [](https://github.com/AutonomousResearchGroup/agentbrowser/actions/workflows/test.yml)
8 | [](https://badge.fury.io/py/agentbrowser)
9 |
10 | # Installation
11 |
12 | ```bash
13 | pip install agentbrowser
14 | ```
15 |
16 | # Usage
17 |
18 | ## Importing into your project
19 |
20 | ```python
21 | from agentbrowser import (
22 | get_browser,
23 | init_browser,
24 | navigate_to,
25 | get_body_html,
26 | get_body_text,
27 | get_document_html,
28 | create_page,
29 | close_page,
30 | evaluate_javascript,
31 | )
32 | ```
33 |
34 | ## Quickstart
35 |
36 | ```python
37 | from agentbrowser import (
38 | navigate_to,
39 | get_body_text,
40 | )
41 |
42 | # Navigate to a URL
43 | page = navigate_to("https://google.com")
44 |
45 | # Get the text from the page
46 | text = get_body_text(page)
47 |
48 | print(text)
49 | ```
50 |
51 | ## API Documentation
52 |
53 | ### `get_browser()`
54 |
55 | Get a Playwright browser. If the browser doesn't exist, initializes a new one.
56 |
57 | Example usage:
58 |
59 | ```python
60 | browser = get_browser()
61 | ```
62 |
63 | ### `init_browser(headless=True, executable_path=None)`
64 |
65 | Initialize a new Playwright browser.
66 |
67 | Parameters:
68 |
69 | - `headless`: Whether the browser should be run in headless mode, defaults to True.
70 | - `executable_path`: Path to a Chromium or Chrome executable to run instead of the bundled Chromium.
71 |
72 | Example usage:
73 |
74 | ```python
75 | init_browser(headless=False, executable_path="/usr/bin/google-chrome")
76 | ```
77 |
78 | ### `create_page(site=None)`
79 |
80 | Create a new page in the browser. If a site is provided, navigate to that site.
81 |
82 | Parameters:
83 |
84 | - `site`: URL to navigate to, defaults to None.
85 |
86 | Example usage:
87 |
88 | ```python
89 | page = create_page("https://www.example.com")
90 | ```
91 |
92 | ### `close_page(page)`
93 |
94 | Close a page.
95 |
96 | Parameters:
97 |
98 | - `page`: The page to close.
99 |
100 | Example usage:
101 |
102 | ```python
103 | page = create_page("https://www.example.com")
104 | close_page(page)
105 | ```
106 |
107 | ### `navigate_to(url, page, wait_until="domcontentloaded")`
108 |
109 | Navigate to a URL in a page.
110 |
111 | Parameters:
112 |
113 | - `url`: The URL to navigate to.
114 | - `page`: The page to navigate in.
115 |
116 | Example usage:
117 |
118 | ```python
119 | page = create_page()
120 | navigate_to("https://www.example.com", page)
121 | ```
122 |
123 | ### `get_document_html(page)`
124 |
125 | Get the HTML content of a page.
126 |
127 | Parameters:
128 |
129 | - `page`: The page to get the HTML from.
130 |
131 | Example usage:
132 |
133 | ```python
134 | page = create_page("https://www.example.com")
135 | html = get_document_html(page)
136 | print(html)
137 | ```
138 |
139 | ### `get_page_title(page)`
140 |
141 | Get the title of a page.
142 |
143 | Parameters:
144 |
145 | - `page`: The page to get the title from.
146 |
147 | Example usage:
148 |
149 | ```python
150 | page = create_page("https://www.example.com")
151 | title = get_page_title(page)
152 | print(title)
153 | ```
154 |
155 | ### `get_body_text(page)`
156 |
157 | Get the text content of a page's body.
158 |
159 | Parameters:
160 |
161 | - `page`: The page to get the text from.
162 |
163 | Example usage:
164 |
165 | ```python
166 | page = create_page("https://www.example.com")
167 | text = get_body_text(page)
168 | print(text)
169 | ```
170 |
171 | ### `get_body_html(page)`
172 |
173 | Get the HTML content of a page's body.
174 |
175 | Parameters:
176 |
177 | - `page`: The page to get the HTML from.
178 |
179 | Example usage:
180 |
181 | ```python
182 | page = create_page("https://www.example.com")
183 | body_html = get_body_html(page)
184 | print(body_html)
185 | ```
186 |
187 | ### `screenshot_page(page)`
188 |
189 | Get a screenshot of a page.
190 |
191 | Parameters:
192 |
193 | - `page`: The page to screenshot.
194 |
195 | Example usage:
196 |
197 | ```python
198 | page = create_page("https://www.example.com")
199 | screenshot = screenshot_page(page)
200 | with open("screenshot.png", "wb") as f:
201 | f.write(screenshot)
202 | ```
203 |
204 | ### `evaluate_javascript(code, page)`
205 |
206 | Evaluate JavaScript code in a page.
207 |
208 | Parameters:
209 |
210 | - `code`: The JavaScript code to evaluate.
211 | - `page`: The page to evaluate the code in.
212 |
213 | Example usage:
214 |
215 | ```python
216 | page = create_page("https://www.example.com")
217 | result = evaluate_javascript("document.title", page)
218 | print(result)
219 | ```
220 |
221 | ### `find_chrome()`
222 |
223 | Find the Chrome executable. Returns the path to the Chrome executable, or None if it could not be found.
224 |
225 | Example usage:
226 |
227 | ```python
228 | chrome_path = find_chrome()
229 | print(chrome_path)
230 | ```
231 |
232 | # Contributions Welcome
233 |
234 | If you like this library and want to contribute in any way, please feel free to submit a PR and I will review it. Please note that the goal here is simplicity and accesibility, using common language and few dependencies.
235 |
--------------------------------------------------------------------------------
/agentbrowser/__init__.py:
--------------------------------------------------------------------------------
1 | from .browser import *
--------------------------------------------------------------------------------
/agentbrowser/browser.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from playwright.async_api import async_playwright
3 |
4 | import os
5 | import platform
6 |
7 | browser = None
8 | context = None
9 |
10 |
11 | # Synchronous functions
12 |
13 |
14 | def get_browser():
15 | """
16 | Get a Playwright browser.
17 |
18 | If the browser doesn't exist, initializes a new one.
19 |
20 | :return: A Playwright browser.
21 | :rtype: playwright.async_api.Browser
22 | """
23 | asyncio.get_event_loop()
24 | global browser
25 | if browser is None:
26 | init_browser()
27 | return browser
28 |
29 |
30 | def init_browser(headless=True, executable_path=None):
31 | """
32 | Initialize a new Playwright browser.
33 |
34 | :param headless: Whether the browser should be run in headless mode, defaults to True.
35 | :type headless: bool, optional
36 | :param executable_path: Path to a Chromium or Chrome executable to run instead of the bundled Chromium.
37 | :type executable_path: str, optional
38 | """
39 | asyncio.get_event_loop().run_until_complete(
40 | async_init_browser(headless, executable_path)
41 | )
42 |
43 |
44 | def create_page(site=None):
45 | """
46 | Create a new page in the browser.
47 |
48 | If a site is provided, navigate to that site.
49 |
50 | :param site: URL to navigate to, defaults to None.
51 | :type site: str, optional
52 | :return: A new page.
53 | :rtype: playwright.async_api.Page
54 | """
55 | return asyncio.get_event_loop().run_until_complete(async_create_page(site))
56 |
57 |
58 | def close_page(page):
59 | """
60 | Close a page.
61 |
62 | :param page: The page to close.
63 | :type page: playwright.async_api.Page
64 | """
65 | asyncio.get_event_loop().run_until_complete(async_close_page(page))
66 |
67 |
68 | def navigate_to(url, page, wait_until="domcontentloaded", timeout=10000):
69 | """
70 | Navigate to a URL in a page.
71 |
72 | :param url: The URL to navigate to.
73 | :type url: str
74 | :param page: The page to navigate in.
75 | :type page: playwright.async_api.Page
76 | :return: The page after navigation.
77 | :rtype: playwright.async_api.Page
78 | """
79 | return asyncio.get_event_loop().run_until_complete(async_navigate_to(url, page, wait_until=wait_until, timeout=timeout))
80 |
81 |
82 | def get_document_html(page):
83 | """
84 | Get the HTML content of a page.
85 |
86 | :param page: The page to get the HTML from.
87 | :type page: playwright.async_api.Page
88 | :return: The HTML content of the page.
89 | :rtype: str
90 | """
91 | return asyncio.get_event_loop().run_until_complete(async_get_document_html(page))
92 |
93 |
94 | def get_page_title(page):
95 | """
96 | Get the title of a page.
97 |
98 | :param page: The page to get the title from.
99 | :type page: playwright.async_api.Page
100 | :return: The title of the page.
101 | :rtype: str
102 | """
103 | return asyncio.get_event_loop().run_until_complete(async_get_page_title(page))
104 |
105 |
106 | def get_body_text(page):
107 | """
108 | Get the text content of a page's body.
109 |
110 | :param page: The page to get the text from.
111 | :type page: playwright.async_api.Page
112 | :return: The text content of the page's body.
113 | :rtype: str
114 | """
115 | return asyncio.get_event_loop().run_until_complete(async_get_body_text(page))
116 |
117 |
118 | def get_body_html(page):
119 | """
120 | Get the HTML content of a page's body.
121 |
122 | :param page: The page to get the HTML from.
123 | :type page: playwright.async_api.Page
124 | :return: The HTML content of the page's body.
125 | :rtype: str
126 | """
127 | return asyncio.get_event_loop().run_until_complete(async_get_body_html(page))
128 |
129 |
130 | def screenshot_page(page):
131 | """
132 | Get a screenshot of a page.
133 |
134 | :param page: The page to screenshot.
135 | :type page: playwright.async_api.Page
136 | :return: A bytes object representing the screenshot.
137 | :rtype: bytes
138 | """
139 | return asyncio.get_event_loop().run_until_complete(async_screenshot_page(page))
140 |
141 |
142 | def evaluate_javascript(code, page):
143 | """
144 | Evaluate JavaScript code in a page.
145 |
146 | :param code: The JavaScript code to evaluate.
147 | :type code: str
148 | :param page: The page to evaluate the code in.
149 | :type page: playwright.async_api.Page
150 | :return: The result of the evaluated code.
151 | """
152 | return asyncio.get_event_loop().run_until_complete(
153 | async_evaluate_javascript(code, page)
154 | )
155 |
156 |
157 | # Asynchronous functions
158 |
159 |
160 | async def async_get_browser():
161 | """
162 | Get a Playwright browser asynchronously.
163 |
164 | If the browser doesn't exist, initializes a new one.
165 |
166 | :return: A Playwright browser.
167 | :rtype: playwright.async_api._generated.Browser
168 | """
169 | global browser
170 | if browser is None:
171 | await async_init_browser()
172 | return browser
173 |
174 |
175 | async def async_init_browser(headless=True, executable_path=None):
176 | """
177 | Initialize a new Playwright browser asynchronously.
178 |
179 | :param headless: Whether the browser should be run in headless mode, defaults to True.
180 | :type headless: bool, optional
181 | :param executable_path: Path to a Chromium or Chrome executable to run instead of the bundled Chromium.
182 | :type executable_path: str, optional
183 | :return: A new Playwright browser.
184 | :rtype: playwright.async_api._generated.Browser
185 | """
186 | global browser
187 |
188 | if executable_path is None:
189 | executable_path = find_chrome()
190 |
191 | if browser is None:
192 | playwright = await async_playwright().start()
193 | browser = await playwright.chromium.launch(
194 | headless=headless,
195 | executable_path=executable_path,
196 | )
197 | return browser
198 |
199 |
200 | async def async_create_page(site=None):
201 | """
202 | Create a new page in the browser asynchronously.
203 |
204 | If a site is provided, navigate to that site.
205 |
206 | :param site: URL to navigate to, defaults to None.
207 | :type site: str, optional
208 | :return: A new page.
209 | :rtype: playwright.async_api._generated.Page
210 | """
211 | global browser
212 | if browser is None:
213 | await async_init_browser()
214 | context = await browser.new_context()
215 | page = await context.new_page()
216 | if site:
217 | await page.goto(site, wait_until="domcontentloaded")
218 | return page
219 |
220 |
221 | async def async_close_page(page):
222 | """
223 | Close a page asynchronously.
224 |
225 | :param page: The page to close.
226 | :type page: playwright.async_api._generated.Page
227 | """
228 | await page.close()
229 |
230 |
231 | async def async_navigate_to(url, page, wait_until="domcontentloaded", timeout=10000):
232 | """
233 | Navigate to a URL in a page asynchronously.
234 |
235 | :param url: The URL to navigate to.
236 | :type url: str
237 | :param page: The page to navigate in.
238 | :type page: playwright.async_api._generated.Page
239 | :return: The page after navigation.
240 | :rtype: playwright.async_api._generated.Page
241 | """
242 | if not page:
243 | page = await async_create_page(None)
244 | try:
245 | await page.goto(url, wait_until=wait_until, timeout=timeout)
246 | except Exception as e:
247 | print("Error navigating to: " + url)
248 | print(e)
249 | return None
250 | return page
251 |
252 |
253 | async def async_get_document_html(page):
254 | """
255 | Get the HTML content of a page asynchronously.
256 |
257 | :param page: The page to get the HTML from.
258 | :type page: playwright.async_api._generated.Page
259 | :return: The HTML content of the page.
260 | :rtype: str
261 | """
262 | return await page.content()
263 |
264 |
265 | async def async_get_page_title(page):
266 | """
267 | Get the title of a page asynchronously.
268 |
269 | :param page: The page to get the title from.
270 | :type page: playwright.async_api._generated.Page
271 | :return: The title of the page.
272 | :rtype: str
273 | """
274 | return await page.title()
275 |
276 |
277 | async def async_get_body_text(page):
278 | """
279 | Get the text content of a page's body asynchronously.
280 |
281 | :param page: The page to get the text from.
282 | :type page: playwright.async_api._generated.Page
283 | :return: The text content of the page's body.
284 | :rtype: str
285 | """
286 | body_handle = await page.query_selector("body")
287 | return await page.evaluate("(body) => body.innerText", body_handle)
288 |
289 |
290 | async def async_get_body_html(page):
291 | """
292 | Get the HTML content of a page's body asynchronously.
293 |
294 | :param page: The page to get the HTML from.
295 | :type page: playwright.async_api._generated.Page
296 | :return: The HTML content of the page's body.
297 | :rtype: str
298 | """
299 | body_handle = await page.query_selector("body")
300 | return await page.evaluate("(body) => body.innerHTML", body_handle)
301 |
302 |
303 | async def async_screenshot_page(page):
304 | """
305 | Get a screenshot of a page asynchronously.
306 |
307 | :param page: The page to screenshot.
308 | :type page: playwright.async_api._generated.Page
309 | :return: A bytes object representing the screenshot.
310 | :rtype: bytes
311 | """
312 | return await page.screenshot()
313 |
314 |
315 | async def async_evaluate_javascript(code, page):
316 | """
317 | Evaluate JavaScript code in a page asynchronously.
318 |
319 | :param code: The JavaScript code to evaluate.
320 | :type code: str
321 | :param page: The page to evaluate the code in.
322 | :type page: playwright.sync_api.Page
323 | :return: The result of the evaluated code.
324 | """
325 | return await page.evaluate(code)
326 |
327 |
328 | def find_chrome():
329 | """
330 | Find the Chrome executable.
331 |
332 | :return: The path to the Chrome executable, or None if it could not be found.
333 | :rtype: str
334 | """
335 | if platform.system() == "Windows":
336 | paths = [
337 | os.path.join(
338 | os.environ["ProgramFiles(x86)"],
339 | "Google",
340 | "Chrome",
341 | "Application",
342 | "chrome.exe",
343 | ),
344 | os.path.join(
345 | os.environ["ProgramFiles"],
346 | "Google",
347 | "Chrome",
348 | "Application",
349 | "chrome.exe",
350 | ),
351 | os.path.join(
352 | os.environ["LocalAppData"],
353 | "Google",
354 | "Chrome",
355 | "Application",
356 | "chrome.exe",
357 | ),
358 | ]
359 | elif platform.system() == "Darwin":
360 | paths = ["/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"]
361 | elif platform.system() == "Linux":
362 | paths = [
363 | "/usr/bin/google-chrome",
364 | "/usr/bin/chromium",
365 | "/usr/bin/chromium-browser",
366 | ]
367 | else:
368 | print("Unsupported platform")
369 | return None
370 |
371 | for path in paths:
372 | if os.path.exists(path):
373 | return path
374 |
375 | return None
376 |
--------------------------------------------------------------------------------
/agentbrowser/test.py:
--------------------------------------------------------------------------------
1 | from agentbrowser import (
2 | create_page,
3 | evaluate_javascript,
4 | get_body_html,
5 | get_body_text,
6 | get_document_html,
7 | navigate_to
8 | )
9 | from agentbrowser.browser import close_page, get_page_title, init_browser, get_browser, screenshot_page
10 |
11 | test_article = "https://test-page-to-crawl.vercel.app"
12 |
13 |
14 | def test_get_browser():
15 | browser = get_browser()
16 | assert browser is not None, "Failed to get the browser"
17 | print("test_get_browser passed.")
18 |
19 |
20 | def test_init_browser():
21 | init_browser()
22 | browser = get_browser()
23 | assert browser is not None, "Failed to initialize the browser"
24 | print("test_init_browser passed.")
25 |
26 |
27 | def test_navigation():
28 | test_page = create_page("https://www.google.com")
29 |
30 | # navigate to google
31 | navigate_to(
32 | "https://www.yahoo.com",
33 | test_page,
34 | wait_until="domcontentloaded",
35 | )
36 |
37 | assert test_page.url != "https://www.google.com", "Navigation failed."
38 | assert test_page is not None, "Page navigation failed."
39 | print("test_navigation passed.")
40 |
41 |
42 | def test_get_page_title():
43 | test_page = create_page("https://www.google.com")
44 | title = get_page_title(test_page)
45 | assert title == "Google", "Failed to get the correct page title."
46 | print("test_get_page_title passed.")
47 |
48 |
49 | def test_screenshot_page():
50 | test_page = create_page("https://www.google.com")
51 | screenshot = screenshot_page(test_page)
52 | assert screenshot is not None, "Failed to take a screenshot."
53 | print("test_screenshot_page passed.")
54 |
55 |
56 | def test_close_page():
57 | test_page = create_page("https://www.google.com")
58 | close_page(test_page)
59 | assert test_page.is_closed(), "Failed to close the page."
60 | print("test_close_page passed.")
61 |
62 |
63 | def test_body_html():
64 | test_page = create_page(test_article)
65 | body_html = get_body_html(test_page)
66 | assert body_html is not None, "Failed to get body html."
67 | print("test_body_html passed.")
68 |
69 |
70 | def test_document_html():
71 | test_page = create_page(test_article)
72 | html = get_document_html(test_page)
73 | assert html is not None, "Failed to get document html."
74 | print("test_document_html passed.")
75 |
76 |
77 | def test_body_text():
78 | test_page = create_page(test_article)
79 | body = get_body_text(test_page)
80 | print(body)
81 | assert body is not None, "Failed to get body text."
82 | print("test_body_text passed.")
83 |
84 |
85 | def test_javascript_evaluation():
86 | test_page = create_page(test_article)
87 | result = evaluate_javascript(
88 | """
89 | var x = 1;
90 | var y = 2;
91 | var z = x + y;
92 | z;
93 | """,
94 | test_page,
95 | )
96 | assert result == 3, "Javascript evaluation failed."
97 | print("test_javascript_evaluation passed.")
98 |
--------------------------------------------------------------------------------
/agentbrowser/test_async.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import pytest
3 | from agentbrowser import (
4 | async_get_browser,
5 | async_init_browser,
6 | async_navigate_to,
7 | async_get_body_html,
8 | async_get_body_text,
9 | async_get_document_html,
10 | async_create_page,
11 | async_close_page,
12 | async_evaluate_javascript,
13 | )
14 |
15 | from agentbrowser.browser import browser, context
16 |
17 | test_article = "https://test-page-to-crawl.vercel.app"
18 |
19 | @pytest.mark.asyncio
20 | async def test_async_create_page():
21 | global browser
22 | global context
23 | browser = None
24 | context = None
25 | await async_init_browser()
26 | test_page = await async_create_page("https://www.google.com/")
27 | assert test_page is not None, "Page navigation failed."
28 | assert test_page.url == "https://www.google.com/", "Navigation failed."
29 | print("test_create_page passed.")
30 |
31 | await async_close_page(test_page)
32 | assert test_page.is_closed(), "Page failed to close."
33 | print("test_close_page passed.")
34 |
35 | test_page = await async_create_page("https://www.google.com")
36 |
37 | # navigate to google
38 | await async_navigate_to(
39 | "https://www.yahoo.com",
40 | test_page,
41 | )
42 |
43 | assert test_page.url != "https://www.google.com", "Navigation failed."
44 | assert test_page is not None, "Page navigation failed."
45 | print("test_async_navigation passed.")
46 |
47 | body_html = await async_get_body_html(test_page)
48 | assert body_html is not None, "Failed to get body html."
49 | print("test_async_body_html passed.")
50 |
51 | html = await async_get_document_html(test_page)
52 | assert html is not None, "Failed to get document html."
53 | print("test_async_document_html passed.")
54 |
55 | body = await async_get_body_text(test_page)
56 | assert body is not None, "Failed to get body text."
57 | print("test_async_body_text passed.")
58 |
59 | result = await async_evaluate_javascript(
60 | """
61 | var x = 1;
62 | var y = 2;
63 | var z = x + y;
64 | z;
65 | """,
66 | test_page,
67 | )
68 | assert result == 3, "Javascript evaluation failed."
69 | print("test_async_javascript_evaluation passed.")
70 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | playwright
--------------------------------------------------------------------------------
/resources/image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/elizaOS/agentbrowser/40444259e47fb6a1bcc7b3387cc6fdba84fe303d/resources/image.jpg
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 |
3 | long_description = ""
4 | with open("README.md", "r") as fh:
5 | long_description = fh.read()
6 | # search for any lines that contain ![]()