├── .editorconfig ├── .github ├── CODEOWNERS ├── config │ └── .pre-commit-config-template.yaml └── workflows │ ├── js-precommit.yml │ ├── python-precommit.yml │ ├── secrets-scanner.yml │ └── vuln-scanner-pr.yml ├── .gitignore ├── .templates ├── js │ └── template.js └── python │ ├── README.md │ ├── template_async.py │ └── template_sync.py ├── .vscode ├── extensions.json ├── launch.json └── settings.json ├── .yamllint ├── Makefile ├── README.md └── examples ├── googlecolab ├── close_cookie_dialog │ └── main.ipynb ├── close_popup │ └── main.ipynb ├── collect_paginated_ecommerce_listing_data │ └── main.ipynb ├── collect_paginated_news_headlines │ └── main.ipynb ├── compare_product_prices │ └── main.ipynb ├── infinite_scroll │ └── main.ipynb ├── log_into_sites │ └── main.ipynb ├── perform_sentiment_analysis │ └── main.ipynb ├── run_script_in_headless_browser │ └── main.ipynb └── wait_for_entire_page_load │ └── main.ipynb ├── js ├── .eslintrc.js ├── .prettierignore ├── .prettierrc.js ├── close-cookie-dialog │ ├── README.md │ └── main.js ├── close-popup │ ├── README.md │ └── main.js ├── collect-paginated-ecommerce-data │ ├── README.md │ └── main.js ├── collect-paginated-news-headlines │ ├── README.md │ └── main.js ├── collect-pricing-data │ ├── README.md │ └── main.js ├── collect-youtube-comments │ ├── README.md │ └── main.js ├── compare-product-prices │ ├── README.md │ └── main.js ├── first-steps │ ├── README.md │ └── main.js ├── get-by-prompt │ ├── README.md │ └── main.js ├── humanlike-antibot │ ├── README.md │ └── main.js ├── infinite-scroll │ ├── README.md │ └── main.js ├── interact-with-external-or-existing-browser │ ├── README.md │ └── main.js ├── list-query-usage │ ├── README.md │ ├── main.js │ └── products_data.csv ├── log-into-sites │ ├── README.md │ └── main.js ├── maps_scraper │ ├── README.md │ ├── main.js │ └── map_data.csv ├── news-aggregator │ ├── README.md │ └── main.js ├── package-lock.json ├── package.json ├── perform-sentiment-analysis │ ├── README.md │ └── main.js ├── run-script-in-headless-browser │ ├── README.md │ └── main.js ├── save-and-load-authenticated-session │ ├── README.md │ └── main.js ├── stealth-mode │ ├── README.md │ └── main.js ├── submit-form │ ├── README.md │ └── main.js ├── wait-for-entire-page-load │ ├── README.md │ └── main.js └── xpath │ ├── README.md │ └── main.js └── python ├── .pylintrc ├── close_cookie_dialog ├── README.md └── main.py ├── close_popup ├── README.md └── main.py ├── collect_ecommerce_pricing_data ├── README.md └── main.py ├── collect_paginated_ecommerce_listing_data ├── README.md └── main.py ├── collect_paginated_news_headlines ├── README.md └── main.py ├── compare_product_prices ├── README.md ├── async_main.py └── main.py ├── first_steps ├── README.md └── main.py ├── get_by_prompt ├── README.md └── main.py ├── humanlike-antibot ├── README.md └── main.py ├── infinite_scroll ├── README.md └── main.py ├── interact_with_external_or_existing_browser ├── README.md └── main.py ├── list_query_usage ├── README.md └── main.py ├── log_into_sites ├── README.md └── main.py ├── maps_scraper ├── README.md ├── main.py └── map_data.csv ├── news-aggregator ├── README.md ├── main.py └── main_sync.py ├── perform_sentiment_analysis ├── README.md └── main.py ├── poetry.lock ├── pyproject.toml ├── run_script_in_headless_browser ├── README.md └── main.py ├── run_script_online_in_google_colab ├── README.md └── main.ipynb ├── save_and_load_authenticated_session ├── README.md └── main.py ├── stealth_mode ├── README.md └── main.py ├── submit_form ├── README.md └── main.py ├── wait_for_entire_page_load ├── README.md └── main.py └── xpath ├── README.md └── main.py /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig helps developers define and maintain consistent 2 | # coding styles between different editors and IDEs 3 | # editorconfig.org 4 | 5 | root = true 6 | 7 | [*] 8 | end_of_line = lf 9 | charset = utf-8 10 | trim_trailing_whitespace = true 11 | insert_final_newline = true 12 | indent_style = space 13 | indent_size = 2 14 | 15 | [*.hbs] 16 | insert_final_newline = false 17 | 18 | [*.{diff,md}] 19 | trim_trailing_whitespace = false 20 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # This file is managed by Terraform in github-control repository 2 | # Do not edit this file, all changes will be overwritten 3 | # If you need to change this file, create a pull request in 4 | # https://github.com/tinyfish-io/github-control 5 | 6 | .github/workflows/** @tinyfish-io/security_team 7 | osv-scanner.toml @tinyfish-io/security_team 8 | -------------------------------------------------------------------------------- /.github/config/.pre-commit-config-template.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | repos: 3 | - repo: "local" 4 | hooks: 5 | - id: "trufflehog" 6 | name: "TruffleHog" 7 | description: Detect secrets in your data. 8 | entry: bash -c 'trufflehog git file://. --since-commit HEAD --no-verification --fail --no-update' 9 | language: system 10 | stages: ["pre-commit", "pre-push"] 11 | -------------------------------------------------------------------------------- /.github/workflows/js-precommit.yml: -------------------------------------------------------------------------------- 1 | name: JSPre-commit checks 2 | 3 | on: 4 | pull_request: 5 | types: [opened, synchronize, reopened] 6 | paths: 7 | - "examples/js/**" 8 | 9 | jobs: 10 | js-pre-commit: 11 | name: JS Pre-commit checks 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Checkout Repository 15 | uses: actions/checkout@v4 16 | 17 | - name: Setup Node.js 18 | uses: actions/setup-node@v3 19 | with: 20 | node-version: "18" 21 | 22 | - name: Install dependencies 23 | working-directory: ./examples/js 24 | run: npm install 25 | 26 | - name: Run ESLint 27 | working-directory: ./examples/js 28 | run: npm run lint 29 | 30 | - name: Run Prettier 31 | working-directory: ./examples/js 32 | run: npx prettier --check . 33 | -------------------------------------------------------------------------------- /.github/workflows/python-precommit.yml: -------------------------------------------------------------------------------- 1 | name: Python Pre-commit checks 2 | 3 | on: 4 | pull_request: 5 | types: [opened, synchronize, reopened] 6 | paths: 7 | - "examples/Python/**" 8 | 9 | jobs: 10 | python-pre-commit: 11 | name: Pre-commit checks 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Checkout Repository 15 | uses: actions/checkout@v4 16 | 17 | - name: Install Python 18 | uses: actions/setup-python@v4 19 | with: 20 | python-version: "3.11" 21 | 22 | #---------------------------------------------- 23 | # ----- install & configure poetry ----- 24 | #---------------------------------------------- 25 | - name: Install Poetry 26 | uses: snok/install-poetry@v1 27 | with: 28 | version: 1.8.3 29 | virtualenvs-create: false 30 | virtualenvs-in-project: true 31 | installer-parallel: true 32 | 33 | #---------------------------------------------- 34 | # ----- install dependencies ----- 35 | #---------------------------------------------- 36 | - name: Install dependencies 37 | working-directory: ./examples/Python 38 | run: | 39 | poetry install --no-interaction --no-root --with dev 40 | 41 | - name: Lint check 42 | working-directory: ./examples/Python 43 | run: pylint --disable=R,C application_examples examples 44 | 45 | - name: Code style check 46 | working-directory: ./examples/Python 47 | run: black . --check 48 | 49 | - name: Imports sort check 50 | working-directory: ./examples/Python 51 | uses: isort/isort-action@master 52 | 53 | - name: Static check 54 | working-directory: ./examples/Python 55 | uses: jakebailey/pyright-action@v2 56 | continue-on-error: true 57 | with: 58 | pylance-version: latest-release 59 | -------------------------------------------------------------------------------- /.github/workflows/secrets-scanner.yml: -------------------------------------------------------------------------------- 1 | # This file is managed by Terraform in github-control repository 2 | # Do not edit this file, all changes will be overwritten 3 | # If you need to change this file, create a pull request in 4 | # https://github.com/tinyfish-io/github-control 5 | --- 6 | name: Leaked Secrets Scan 7 | on: # yamllint disable-line rule:truthy 8 | pull_request: 9 | merge_group: 10 | branches: [main] 11 | 12 | jobs: 13 | TruffleHog: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Checkout code 17 | uses: actions/checkout@v4 18 | with: 19 | fetch-depth: 0 20 | - name: TruffleHog OSS 21 | uses: trufflesecurity/trufflehog@main 22 | with: 23 | path: ./ 24 | base: ${{ github.event.repository.default_branch }} 25 | head: HEAD 26 | extra_args: --only-verified 27 | -------------------------------------------------------------------------------- /.github/workflows/vuln-scanner-pr.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: OSV-Scanner PR Scan 3 | 4 | on: # yamllint disable-line rule:truthy 5 | pull_request: 6 | branches: [main] 7 | 8 | permissions: 9 | # Required to upload SARIF file to CodeQL. See: https://github.com/github/codeql-action/issues/2117 10 | actions: read 11 | # Require writing security events to upload SARIF file to security tab 12 | security-events: write 13 | # Only need to read contents 14 | contents: read 15 | 16 | jobs: 17 | vulnerability-check: 18 | uses: "google/osv-scanner-action/.github/workflows/osv-scanner-reusable.yml@v1.9.2" 19 | with: 20 | upload-sarif: false 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | .env 6 | .venv 7 | 8 | # Distribution / packaging 9 | dist/ 10 | build/ 11 | *.egg-info/ 12 | 13 | # IDE files 14 | .idea/ 15 | .vscode/* 16 | !.vscode/settings.json 17 | !.vscode/tasks.json 18 | !.vscode/launch.json 19 | !.vscode/extensions.json 20 | 21 | # Compiled Python files 22 | *.pyc 23 | 24 | # Logs and databases 25 | logs/ 26 | *.log 27 | *.sqlite3 28 | 29 | # Miscellaneous 30 | *.swp 31 | .DS_Store 32 | 33 | # Poetry 34 | # poetry.lock 35 | 36 | tmp/ 37 | 38 | *env.local 39 | 40 | examples/js/node_modules/ 41 | examples/python/__pycache__/ 42 | examples/python/.venv/ 43 | -------------------------------------------------------------------------------- /.templates/js/template.js: -------------------------------------------------------------------------------- 1 | const { chromium } = require('playwright'); 2 | const { wrap, configure } = require('agentql'); 3 | 4 | // Set URLs to the desired websites 5 | const WEBSITE_URL_1 = ""; 6 | const WEBSITE_URL_2 = ""; 7 | const WEBSITE_URL_3 = ""; 8 | 9 | async function main() { 10 | // Configure the AgentQL API key 11 | configure({ apiKey: process.env.AGENTQL_API_KEY }); 12 | 13 | const browser = await chromium.launch({ headless: false }); 14 | const context = await browser.newContext(); 15 | 16 | // Open multiple tabs in the same browser context to fetch data concurrently 17 | await Promise.all([ 18 | fetchData(context, WEBSITE_URL_1), 19 | fetchData(context, WEBSITE_URL_2), 20 | fetchData(context, WEBSITE_URL_3), 21 | ]); 22 | 23 | await browser.close(); 24 | } 25 | 26 | async function fetchData(context, sessionUrl) { 27 | // Create a page in a new tab in the browser context and wrap it to get access to the AgentQL's querying API 28 | const page = await wrap(await context.newPage()); 29 | await page.goto(sessionUrl); 30 | 31 | // Update the query to locate the desired element on the page 32 | const elementsQuery = ` 33 | { 34 | search_input 35 | search_btn 36 | } 37 | `; 38 | 39 | // Locate desired web elements using AgentQL's queryElements() method 40 | const response = await page.queryElements(elementsQuery); 41 | // Update to use the actual query terms to interact with the elements 42 | await response.search_input.fill(""); 43 | await response.search_btn.click(); 44 | 45 | // Update the query to fetch the desired data from the page 46 | const dataQuery = ` 47 | { 48 | products[] { 49 | name 50 | price(integer) 51 | } 52 | } 53 | `; 54 | 55 | // Fetch the data from the page using AgentQL's queryData() method 56 | const data = await page.queryData(dataQuery); 57 | // Update to use the actual keys corresponding to query terms 58 | console.log(`Prices fetched from ${sessionUrl}:`); 59 | for (const product of data.products) { 60 | console.log(`Product: ${product.name}, Price: ${product.price}`); 61 | } 62 | } 63 | 64 | main(); 65 | -------------------------------------------------------------------------------- /.templates/python/README.md: -------------------------------------------------------------------------------- 1 | # Example script: [title of example] 2 | 3 | This is an example shows how to [do a thing with AgentQL]. 4 | 5 | ## Run the script 6 | 7 | - [Install AgentQL SDK](https://docs.agentql.com/installation/sdk-installation) 8 | - Save this python file locally as **main.py** 9 | - Run the following command from the project's folder: 10 | 11 | ```bash 12 | python3 main.py 13 | ``` 14 | 15 | ## Play with the query 16 | 17 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 18 | 19 | ## Learn more 20 | 21 | - [Links to related guide in the docs, blog, or other examples](https://docs.agentql.com/avoiding-bot-detection/user-like-behavior) 22 | -------------------------------------------------------------------------------- /.templates/python/template_async.py: -------------------------------------------------------------------------------- 1 | """This script serves as a skeleton template for asynchronous AgentQL scripts.""" 2 | 3 | import asyncio 4 | import logging 5 | 6 | import agentql 7 | from playwright.async_api import BrowserContext, async_playwright 8 | 9 | # Set up logging 10 | logging.basicConfig(level=logging.INFO) 11 | log = logging.getLogger(__name__) 12 | 13 | # Set URLs to the desired websites 14 | WEBSITE_URL_1 = "" 15 | WEBSITE_URL_2 = "" 16 | WEBSITE_URL_3 = "" 17 | 18 | 19 | async def main(): 20 | """Fetch data concurrently in the same browser session from multiple websites.""" 21 | async with async_playwright() as p, await p.chromium.launch( 22 | headless=False 23 | ) as browser, await browser.new_context() as context: 24 | # Open multiple tabs in the same browser context to fetch data concurrently 25 | await asyncio.gather( 26 | fetch_data(context, WEBSITE_URL_1), 27 | fetch_data(context, WEBSITE_URL_2), 28 | fetch_data(context, WEBSITE_URL_3), 29 | ) 30 | 31 | 32 | async def fetch_data(context: BrowserContext, session_url): 33 | """Open the given URL in a new tab and fetch the data.""" 34 | # Create a page in a new tab in the broswer context and wrap it to get access to the AgentQL's querying API 35 | page = await agentql.wrap_async(context.new_page()) 36 | await page.goto(session_url) 37 | 38 | # Update the query to locate the desired element on the page 39 | elements_query = """ 40 | { 41 | search_input 42 | search_btn 43 | } 44 | """ 45 | 46 | # Locate desired web elements using AgentQL's query_elements() method 47 | response = await page.query_elements(elements_query) 48 | # Update to use the actual query terms to interact with the elements 49 | await response.search_input.type("") 50 | await response.search_button.click() 51 | 52 | # Update the query to fetch the desired data from the page 53 | data_query = """ 54 | { 55 | products[] { 56 | name 57 | price(integer) 58 | } 59 | } 60 | """ 61 | 62 | # Fetch the data from the page using AgentQL's query_data() method 63 | data = await page.query_data(data_query) 64 | # Update to use the actual keys corresponding to query terms 65 | log.info(f"Prices fetched from {session_url}:") 66 | for product in data["products"]: 67 | log.info(f"Product: {product['name']}, Price: {product['price']}") 68 | 69 | 70 | if __name__ == "__main__": 71 | # Run the main function in an event loop 72 | asyncio.run(main()) 73 | -------------------------------------------------------------------------------- /.templates/python/template_sync.py: -------------------------------------------------------------------------------- 1 | """This script serves as a skeleton template for synchronous AgentQL scripts.""" 2 | 3 | import logging 4 | 5 | import agentql 6 | from agentql.ext.playwright.sync_api import Page 7 | from playwright.sync_api import sync_playwright 8 | 9 | # Set up logging 10 | logging.basicConfig(level=logging.INFO) 11 | log = logging.getLogger(__name__) 12 | 13 | # Set the URL to the desired website 14 | URL = "" 15 | 16 | 17 | def main(): 18 | with sync_playwright() as p, p.chromium.launch(headless=False) as browser: 19 | # Create a new page in the browser and wrap it to get access to the AgentQL's querying API 20 | page = agentql.wrap(browser.new_page()) 21 | 22 | # Navigate to the desired URL 23 | page.goto(URL) 24 | 25 | fetch_data(page) 26 | 27 | 28 | def fetch_data(page: Page): 29 | """Fetch data from the page.""" 30 | # Update the query to locate the desired element on the page 31 | elements_query = """ 32 | { 33 | search_input 34 | search_btn 35 | } 36 | """ 37 | 38 | # Locate desired web elements using AgentQL's query_elements() method 39 | response = page.query_elements(elements_query) 40 | 41 | # Update to use the actual query terms 42 | response.search_input.type("") 43 | response.search_btn.click() 44 | 45 | # Update the query to fetch the desired data from the page 46 | data_query = """ 47 | { 48 | products[] { 49 | price(integer) 50 | } 51 | } 52 | """ 53 | 54 | # Fetch the data from the page using AgentQL's query_data() method 55 | data = page.query_data(data_query) 56 | # Update to use the actual keys corresponding to query terms 57 | for result in data["products"]: 58 | log.info(result["price"]) 59 | 60 | 61 | if __name__ == "__main__": 62 | main() 63 | -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": [ 3 | "ms-python.black-formatter", 4 | "GitHub.copilot", 5 | "esbenp.prettier-vscode", 6 | "ms-python.isort", 7 | "ms-python.pylint", 8 | "dbaeumer.vscode-eslint", 9 | "yoavbls.pretty-ts-errors", 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "Python: Run/Debug Current File", 9 | "type": "debugpy", 10 | "request": "launch", 11 | "program": "${file}", 12 | "console": "integratedTerminal", 13 | "justMyCode": true, 14 | "env": { 15 | "PYTHONPATH": "${workspaceFolder}" 16 | } 17 | } 18 | ] 19 | } 20 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "[python]": { 3 | "editor.defaultFormatter": "ms-python.black-formatter", 4 | "editor.codeActionsOnSave": { 5 | "source.organizeImports": "explicit" 6 | } 7 | }, 8 | "[json]": { 9 | "editor.defaultFormatter": "vscode.json-language-features" 10 | }, 11 | "[javascriptreact]": { 12 | "editor.defaultFormatter": "esbenp.prettier-vscode" 13 | }, 14 | "[typescript]": { 15 | "editor.defaultFormatter": "esbenp.prettier-vscode" 16 | }, 17 | "eslint.workingDirectories": [ 18 | { 19 | "directory": "./examples/js", 20 | "changeProcessCWD": true 21 | } 22 | ], 23 | "editor.formatOnSave": true, 24 | "files.insertFinalNewline": true, 25 | "black-formatter.args": [ 26 | "--line-length=100" 27 | ], 28 | "isort.args": [ 29 | "--line-length=100", 30 | "--wrap-length=100", 31 | "--multi-line=3", 32 | "--trailing-comma", 33 | "--profile=black" 34 | ], 35 | "pylint.importStrategy": "fromEnvironment", 36 | "pylint.args": [ 37 | "--rcfile=./examples/python/.pylintrc" 38 | ], 39 | "python.analysis.typeCheckingMode": "standard", 40 | "editor.rulers": [100] 41 | } 42 | -------------------------------------------------------------------------------- /.yamllint: -------------------------------------------------------------------------------- 1 | # This file is managed by Terraform in github-control repository 2 | # Do not edit this file, all changes will be overwritten 3 | # If you need to change this file, create a pull request in 4 | # https://github.com/tinyfish-io/github-control 5 | --- 6 | extends: default 7 | 8 | rules: 9 | line-length: 10 | max: 120 11 | level: warning 12 | comments: 13 | min-spaces-from-content: 1 14 | require-starting-space: false 15 | truthy: disable 16 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | SHELL := /usr/bin/env bash 2 | 3 | include $(wildcard makefiles/*) 4 | 5 | .PHONY: check-trufflehog 6 | check-trufflehog: 7 | @if ! which trufflehog > /dev/null 2>&1; then \ 8 | echo "TruffleHog is not installed."; \ 9 | echo "MacOS users can install it with:"; \ 10 | echo " brew install trufflehog"; \ 11 | echo ""; \ 12 | echo "Linux users can install it with:"; \ 13 | echo " curl -sSfL https://raw.githubusercontent.com/trufflesecurity/trufflehog/main/scripts/install.sh | sh -s -- -b /usr/local/bin"; \ 14 | echo ""; \ 15 | echo "For more details, go to https://github.com/trufflesecurity/trufflehog"; \ 16 | exit 1; \ 17 | fi 18 | 19 | .PHONY: setup-pre-commit 20 | setup-pre-commit: 21 | @if [ ! -f .pre-commit-config.yaml ]; then \ 22 | echo ".pre-commit-config.yaml not found. Copying template..."; \ 23 | cp .github/config/.pre-commit-config-template.yaml .pre-commit-config.yaml; \ 24 | echo ".pre-commit-config.yaml created from template."; \ 25 | else \ 26 | echo ".pre-commit-config.yaml already exists."; \ 27 | fi 28 | 29 | .PHONY: init 30 | init: setup-pre-commit check-trufflehog 31 | pip install pre-commit 32 | pre-commit install 33 | 34 | -------------------------------------------------------------------------------- /examples/googlecolab/close_cookie_dialog/main.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "This example shows how to close a cookie dialog on a website using AgentQL with the [Google Colaboratory](https://colab.research.google.com/) environment." 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Install [AgentQL](https://pypi.org/project/agentql/) library" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "%pip install agentql" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "Install the Playwright dependency required by AgentQL." 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "!playwright install chromium" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "You can [store](https://medium.com/@parthdasawant/how-to-use-secrets-in-google-colab-450c38e3ec75) keys in Google Colab's secrets." 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "import os\n", 56 | "\n", 57 | "from google.colab import userdata\n", 58 | "\n", 59 | "os.environ[\"AGENTQL_API_KEY\"]=userdata.get('AGENTQL_API_KEY')" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "Run AgentQL script. Unfortunately, you can't see the browser window directly in Google Colab like you would on your local machine. However, let's still interact with it and take a screen recording of the browser session to see what’s happening. \n", 67 | "\n", 68 | "Please note that an online environment like Google Colab supports **asynchronous version** of AgentQL." 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "import agentql\n", 78 | "from playwright.async_api import async_playwright\n", 79 | "from IPython.display import HTML\n", 80 | "from base64 import b64encode\n", 81 | "\n", 82 | "# Set the URL to the desired website\n", 83 | "URL = \"https://gov.uk/\"\n", 84 | "\n", 85 | "QUERY = \"\"\"\n", 86 | "{\n", 87 | " cookies_form {\n", 88 | " reject_btn\n", 89 | " }\n", 90 | "}\n", 91 | "\"\"\"\n", 92 | "\n", 93 | "async with async_playwright() as playwright, await playwright.chromium.launch() as browser:\n", 94 | " \n", 95 | " # Set up the video recording\n", 96 | " video_dir = os.path.abspath(\"videos\")\n", 97 | " context = await browser.new_context(\n", 98 | " record_video_dir=\"videos/\",\n", 99 | " record_video_size={\"width\": 1280, \"height\": 720} \n", 100 | " )\n", 101 | " \n", 102 | " # Create a new page in the context and wrap it to get access to the AgentQL's querying API\n", 103 | " page = await agentql.wrap_async(await context.new_page())\n", 104 | " \n", 105 | " await page.goto(URL)\n", 106 | "\n", 107 | " # Use query_elements() method to fetch the cookies dialog button from the page\n", 108 | " response = await page.query_elements(QUERY)\n", 109 | "\n", 110 | " # Check if there is a cookie-rejection button on the page\n", 111 | " if response.cookies_form.reject_btn != None:\n", 112 | " # If so, click the close button to reject cookies\n", 113 | " await response.cookies_form.reject_btn.click()\n", 114 | " \n", 115 | " # Wait for 10 seconds to see the browser in action\n", 116 | " await page.wait_for_timeout(10000)\n", 117 | "\n", 118 | " # Close context to ensure video is saved\n", 119 | " await context.close()\n", 120 | " \n", 121 | " # Display the video\n", 122 | " video_files = [f for f in os.listdir(video_dir) if f.endswith('.webm')]\n", 123 | " if video_files:\n", 124 | " video_path = os.path.join(video_dir, video_files[0])\n", 125 | " with open(video_path, 'rb') as f:\n", 126 | " video_bytes = f.read()\n", 127 | " \n", 128 | " video_b64 = b64encode(video_bytes).decode('utf-8')\n", 129 | " video_html = f\"\"\"\n", 130 | " \n", 134 | " \"\"\"\n", 135 | " display(HTML(video_html))\n", 136 | " else:\n", 137 | " print(\"No video file was created\")" 138 | ] 139 | } 140 | ], 141 | "metadata": { 142 | "language_info": { 143 | "name": "python" 144 | } 145 | }, 146 | "nbformat": 4, 147 | "nbformat_minor": 2 148 | } 149 | -------------------------------------------------------------------------------- /examples/googlecolab/close_popup/main.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "This example shows how to close popup windows (like promotion form) using AgentQL with the [Google Colaboratory](https://colab.research.google.com/) environment." 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Install [AgentQL](https://pypi.org/project/agentql/) library" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "%pip install agentql" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "Install the Playwright dependency required by AgentQL." 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "!playwright install chromium" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "You can [store](https://medium.com/@parthdasawant/how-to-use-secrets-in-google-colab-450c38e3ec75) keys in Google Colab's secrets." 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "import os\n", 56 | "\n", 57 | "from google.colab import userdata\n", 58 | "\n", 59 | "os.environ[\"AGENTQL_API_KEY\"]=userdata.get('AGENTQL_API_KEY')" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "Run AgentQL script. Unfortunately, you can't see the browser window directly in Google Colab like you would on your local machine. However, let's still interact with it and take a screen recording of the browser session to see what’s happening. \n", 67 | "\n", 68 | "Please note that an online environment like Google Colab supports **asynchronous version** of AgentQL." 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "import agentql\n", 78 | "from playwright.async_api import async_playwright\n", 79 | "from IPython.display import HTML\n", 80 | "from base64 import b64encode\n", 81 | "\n", 82 | "# Set the URL to the desired website\n", 83 | "URL = \"https://kinfield.com/\"\n", 84 | "\n", 85 | "QUERY = \"\"\"\n", 86 | "{\n", 87 | " popup_form {\n", 88 | " close_btn\n", 89 | " }\n", 90 | "}\n", 91 | "\"\"\"\n", 92 | "\n", 93 | "\n", 94 | "async with async_playwright() as playwright, await playwright.chromium.launch() as browser:\n", 95 | " \n", 96 | " # Set up the video recording\n", 97 | " video_dir = os.path.abspath(\"videos\")\n", 98 | " context = await browser.new_context(\n", 99 | " record_video_dir=\"videos/\",\n", 100 | " record_video_size={\"width\": 1280, \"height\": 720}\n", 101 | " )\n", 102 | " \n", 103 | " # Create a new page in the browser and wrap it to get access to the AgentQL's querying API\n", 104 | " page = await agentql.wrap_async(await context.new_page())\n", 105 | "\n", 106 | " await page.goto(URL)\n", 107 | "\n", 108 | " # Use query_elements() method to fetch the close popup button from the page\n", 109 | " response = await page.query_elements(QUERY)\n", 110 | "\n", 111 | " # Click the close button to close the popup\n", 112 | " await response.popup_form.close_btn.click()\n", 113 | " \n", 114 | " # Wait for 10 seconds to see the browser in action\n", 115 | " await page.wait_for_timeout(10000)\n", 116 | "\n", 117 | " # Close context to ensure video is saved\n", 118 | " await context.close()\n", 119 | "\n", 120 | " # Display the video\n", 121 | " video_files = [f for f in os.listdir(video_dir) if f.endswith('.webm')]\n", 122 | " if video_files:\n", 123 | " video_path = os.path.join(video_dir, video_files[0])\n", 124 | " with open(video_path, 'rb') as f:\n", 125 | " video_bytes = f.read()\n", 126 | " \n", 127 | " video_b64 = b64encode(video_bytes).decode('utf-8')\n", 128 | " video_html = f\"\"\"\n", 129 | " \n", 133 | " \"\"\"\n", 134 | " display(HTML(video_html))\n", 135 | " else:\n", 136 | " print(\"No video file was created\")" 137 | ] 138 | } 139 | ], 140 | "metadata": { 141 | "language_info": { 142 | "name": "python" 143 | } 144 | }, 145 | "nbformat": 4, 146 | "nbformat_minor": 2 147 | } 148 | -------------------------------------------------------------------------------- /examples/googlecolab/collect_paginated_ecommerce_listing_data/main.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "This example shows how to collect paginated data from an ecommerce website using AgentQL with the [Google Colaboratory](https://colab.research.google.com/) environment." 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Install [AgentQL](https://pypi.org/project/agentql/) library" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "%pip install agentql" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "Install the Playwright dependencency required by AgentQL." 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "!playwright install chromium" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "You can [store](https://medium.com/@parthdasawant/how-to-use-secrets-in-google-colab-450c38e3ec75) keys in Google Colab's secrets." 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "import os\n", 56 | "\n", 57 | "from google.colab import userdata\n", 58 | "\n", 59 | "os.environ[\"AGENTQL_API_KEY\"]=userdata.get('AGENTQL_API_KEY')" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "Run AgentQL script. Unfortunately, you can't see the browser window directly in Google Colab like you would on your local machine. However, let's still interact with it and take a screen recording of the browser session to see what’s happening. \n", 67 | "\n", 68 | "Please note that an online environment like Google Colab supports **asynchronous version** of AgentQL." 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "import json\n", 78 | "import logging\n", 79 | "\n", 80 | "import agentql\n", 81 | "from playwright.async_api import async_playwright\n", 82 | "from IPython.display import HTML\n", 83 | "from base64 import b64encode\n", 84 | "\n", 85 | "logging.basicConfig(level=logging.DEBUG)\n", 86 | "log = logging.getLogger(__name__)\n", 87 | "\n", 88 | "async with async_playwright() as playwright, await playwright.chromium.launch() as browser:\n", 89 | "\n", 90 | " # Set up the video recording\n", 91 | " video_dir = os.path.abspath(\"videos\")\n", 92 | " context = await browser.new_context(\n", 93 | " record_video_dir=\"videos/\",\n", 94 | " record_video_size={\"width\": 1280, \"height\": 720}\n", 95 | " )\n", 96 | " page = await agentql.wrap_async(await context.new_page())\n", 97 | " await page.goto(\"https://books.toscrape.com/\")\n", 98 | "\n", 99 | " # define the query to extract product names, prices, and ratings\n", 100 | " QUERY = \"\"\"\n", 101 | " {\n", 102 | " books[] {\n", 103 | " name\n", 104 | " price\n", 105 | " rating\n", 106 | " }\n", 107 | " }\n", 108 | " \"\"\"\n", 109 | "\n", 110 | " books = []\n", 111 | "\n", 112 | " # Aggregate the first 50 book names, prices and ratings\n", 113 | " while len(books) < 50:\n", 114 | " # collect data from the current page\n", 115 | " response = await page.query_data(QUERY)\n", 116 | "\n", 117 | " # limit the total number of books to 50\n", 118 | " if len(response[\"books\"]) + len(books) > 50:\n", 119 | " books.extend(response[\"books\"][:50 - len(books)])\n", 120 | " else:\n", 121 | " books.extend(response[\"books\"])\n", 122 | "\n", 123 | " # get the pagination info from the current page\n", 124 | " pagination_info = await page.get_pagination_info()\n", 125 | "\n", 126 | " # attempt to navigate to next page\n", 127 | " if pagination_info.has_next_page:\n", 128 | " await pagination_info.navigate_to_next_page()\n", 129 | "\n", 130 | " with open(f\"./books.json\", \"w\") as f:\n", 131 | " json.dump(books, f, indent=4)\n", 132 | "\n", 133 | " # Display the video\n", 134 | " video_files = [f for f in os.listdir(video_dir) if f.endswith('.webm')]\n", 135 | " if video_files:\n", 136 | " video_path = os.path.join(video_dir, video_files[0])\n", 137 | " with open(video_path, 'rb') as f:\n", 138 | " video_bytes = f.read()\n", 139 | " \n", 140 | " video_b64 = b64encode(video_bytes).decode('utf-8')\n", 141 | " video_html = f\"\"\"\n", 142 | " \n", 146 | " \"\"\"\n", 147 | " display(HTML(video_html))\n", 148 | " else:\n", 149 | " print(\"No video file was created\")" 150 | ] 151 | } 152 | ], 153 | "metadata": { 154 | "language_info": { 155 | "name": "python" 156 | } 157 | }, 158 | "nbformat": 4, 159 | "nbformat_minor": 2 160 | } 161 | -------------------------------------------------------------------------------- /examples/googlecolab/collect_paginated_news_headlines/main.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "This example shows how to collect paginated news headlines using AgentQL with the [Google Colaboratory](https://colab.research.google.com/) environment." 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Install [AgentQL](https://pypi.org/project/agentql/) library" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "%pip install agentql" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "Install the Playwright dependency required by AgentQL." 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "!playwright install chromium" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "You can [store](https://medium.com/@parthdasawant/how-to-use-secrets-in-google-colab-450c38e3ec75) keys in Google Colab's secrets." 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "import os\n", 56 | "\n", 57 | "from google.colab import userdata\n", 58 | "\n", 59 | "os.environ[\"AGENTQL_API_KEY\"]=userdata.get('AGENTQL_API_KEY')" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "Run AgentQL script. Unfortunately, you can't see the browser window directly in Google Colab like you would on your local machine. However, let's still interact with it and take a screen recording of the browser session to see what’s happening. \n", 67 | "\n", 68 | "Please note that an online environment like Google Colab supports **asynchronous version** of AgentQL." 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "import json\n", 78 | "import logging\n", 79 | "\n", 80 | "import agentql\n", 81 | "from playwright.async_api import async_playwright\n", 82 | "from IPython.display import HTML\n", 83 | "from base64 import b64encode\n", 84 | "\n", 85 | "# import paginate tool from agentql tools\n", 86 | "from agentql.tools.async_api import paginate\n", 87 | "\n", 88 | "logging.basicConfig(level=logging.DEBUG)\n", 89 | "log = logging.getLogger(__name__)\n", 90 | "\n", 91 | "\n", 92 | "async with async_playwright() as playwright, await playwright.chromium.launch() as browser:\n", 93 | " # Set up the video recording\n", 94 | " video_dir = os.path.abspath(\"videos\")\n", 95 | " context = await browser.new_context(\n", 96 | " record_video_dir=\"videos/\",\n", 97 | " record_video_size={\"width\": 1280, \"height\": 720}\n", 98 | " )\n", 99 | " page = await agentql.wrap_async(await context.new_page())\n", 100 | " await page.goto(\"https://news.ycombinator.com/\")\n", 101 | "\n", 102 | " # Define the query to extract post titles\n", 103 | " QUERY = \"\"\"\n", 104 | " {\n", 105 | " posts[] {\n", 106 | " title\n", 107 | " }\n", 108 | " }\n", 109 | " \"\"\"\n", 110 | " # Collect all data over the next 3 pages with the query defined above\n", 111 | " paginated_data = await paginate(page, QUERY, 3)\n", 112 | "\n", 113 | " # Save the aggregated data to a json file\n", 114 | " with open(\"./hackernews_paginated_data.json\", \"w\") as f:\n", 115 | " json.dump(paginated_data, f, indent=4)\n", 116 | "\n", 117 | " log.debug(\"Paginated data has been saved to hackernews_paginated_data.json\")\n", 118 | "\n", 119 | " # Display the video\n", 120 | " video_files = [f for f in os.listdir(video_dir) if f.endswith('.webm')]\n", 121 | " if video_files:\n", 122 | " video_path = os.path.join(video_dir, video_files[0])\n", 123 | " with open(video_path, 'rb') as f:\n", 124 | " video_bytes = f.read()\n", 125 | "\n", 126 | " video_b64 = b64encode(video_bytes).decode('utf-8')\n", 127 | " video_html = f\"\"\"\n", 128 | " \n", 132 | " \"\"\"\n", 133 | " display(HTML(video_html))\n", 134 | " else:\n", 135 | " print(\"No video file was created\")" 136 | ] 137 | } 138 | ], 139 | "metadata": { 140 | "language_info": { 141 | "name": "python" 142 | } 143 | }, 144 | "nbformat": 4, 145 | "nbformat_minor": 2 146 | } 147 | -------------------------------------------------------------------------------- /examples/googlecolab/compare_product_prices/main.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "This example shows how to compare product prices using AgentQL with the [Google Colaboratory](https://colab.research.google.com/) environment." 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Install [AgentQL](https://pypi.org/project/agentql/) library" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "%pip install agentql" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "Install the Playwright dependency required by AgentQL." 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "!playwright install chromium" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "You can [store](https://medium.com/@parthdasawant/how-to-use-secrets-in-google-colab-450c38e3ec75) keys in Google Colab's secrets." 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "import os\n", 56 | "\n", 57 | "from google.colab import userdata\n", 58 | "\n", 59 | "os.environ[\"AGENTQL_API_KEY\"]=userdata.get('AGENTQL_API_KEY')" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "Run AgentQL script. Please note that an online environment like Google Colab supports **asynchronous version** of AgentQL." 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "import agentql\n", 76 | "from playwright.async_api import async_playwright\n", 77 | "from IPython.display import HTML\n", 78 | "from base64 import b64encode\n", 79 | "\n", 80 | "# Set the URL to the desired website\n", 81 | "BESTBUY_URL = \"https://www.bestbuy.com/site/nintendo-switch-oled-model-w-joy-con-white/6470923.p?skuId=6470923\"\n", 82 | "TARGET_URL = \"https://www.target.com/p/nintendo-switch-oled-model-with-white-joy-con/-/A-83887639#lnk=sametab\"\n", 83 | "NINTENDO_URL = \"https://www.nintendo.com/us/store/products/nintendo-switch-oled-model-white-set/\"\n", 84 | "\n", 85 | "# Define the queries to get the product price\n", 86 | "PRODUCT_INFO_QUERY = \"\"\"\n", 87 | "{\n", 88 | " nintendo_switch_price(integer)\n", 89 | "}\n", 90 | "\"\"\"\n", 91 | "\n", 92 | "async with async_playwright() as playwright, await playwright.chromium.launch() as browser:\n", 93 | "\n", 94 | " # Set up the video recording\n", 95 | " video_dir = os.path.abspath(\"videos\")\n", 96 | " context = await browser.new_context(\n", 97 | " record_video_dir=\"videos/\",\n", 98 | " record_video_size={\"width\": 1280, \"height\": 720}\n", 99 | " )\n", 100 | " page = await agentql.wrap_async(await context.new_page())\n", 101 | " await page.goto(BESTBUY_URL, timeout= 60000)\n", 102 | "\n", 103 | " # Use query_data() method to fetch the price from the BestBuy page\n", 104 | " response = await page.query_data(PRODUCT_INFO_QUERY)\n", 105 | "\n", 106 | " print(\"Price at BestBuy: \", response[\"nintendo_switch_price\"])\n", 107 | "\n", 108 | " await page.goto(NINTENDO_URL, timeout= 60000)\n", 109 | "\n", 110 | " # Use query_data() method to fetch the price from the Nintendo page\n", 111 | " response = await page.query_data(PRODUCT_INFO_QUERY)\n", 112 | "\n", 113 | " print(\"Price at Nintendo: \", response[\"nintendo_switch_price\"])\n", 114 | "\n", 115 | " await page.goto(TARGET_URL, timeout= 60000)\n", 116 | "\n", 117 | " # Use query_data() method to fetch the price from the Target page\n", 118 | " response = await page.query_data(PRODUCT_INFO_QUERY)\n", 119 | "\n", 120 | " print(\"Price at Target: \", response[\"nintendo_switch_price\"])\n", 121 | "\n", 122 | " # Display the video\n", 123 | " video_files = [f for f in os.listdir(video_dir) if f.endswith('.webm')]\n", 124 | " if video_files:\n", 125 | " video_path = os.path.join(video_dir, video_files[0])\n", 126 | " with open(video_path, 'rb') as f:\n", 127 | " video_bytes = f.read()\n", 128 | " \n", 129 | " video_b64 = b64encode(video_bytes).decode('utf-8')\n", 130 | " video_html = f\"\"\"\n", 131 | " \n", 135 | " \"\"\"\n", 136 | " display(HTML(video_html))\n", 137 | " else:\n", 138 | " print(\"No video file was created\") " 139 | ] 140 | } 141 | ], 142 | "metadata": { 143 | "language_info": { 144 | "name": "python" 145 | } 146 | }, 147 | "nbformat": 4, 148 | "nbformat_minor": 2 149 | } 150 | -------------------------------------------------------------------------------- /examples/googlecolab/log_into_sites/main.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "This example shows how to log into websites using AgentQL with the [Google Colaboratory](https://colab.research.google.com/) environment." 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Install [AgentQL](https://pypi.org/project/agentql/) library" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "%pip install agentql" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "Install the Playwright dependency required by AgentQL." 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "!playwright install chromium" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "You can [store](https://medium.com/@parthdasawant/how-to-use-secrets-in-google-colab-450c38e3ec75) keys in Google Colab's secrets." 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "import os\n", 56 | "\n", 57 | "from google.colab import userdata\n", 58 | "\n", 59 | "os.environ[\"AGENTQL_API_KEY\"]=userdata.get('AGENTQL_API_KEY')" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "Run AgentQL script. Unfortunately, you can't see the browser window directly in Google Colab like you would on your local machine. However, let's still interact with it and take a screen recording of the browser session to see what’s happening. \n", 67 | "\n", 68 | "Please note that an online environment like Google Colab supports **asynchronous version** of AgentQL." 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "import agentql\n", 78 | "from playwright.async_api import async_playwright\n", 79 | "from IPython.display import HTML\n", 80 | "from base64 import b64encode\n", 81 | "\n", 82 | "# Set the URL to the desired website\n", 83 | "URL = \"https://practicetestautomation.com/practice-test-login/\"\n", 84 | "\n", 85 | "LOGIN_QUERY = \"\"\"\n", 86 | "{\n", 87 | " username_field\n", 88 | " password_field\n", 89 | " submit_btn\n", 90 | "}\n", 91 | "\"\"\"\n", 92 | "\n", 93 | "\n", 94 | "async with async_playwright() as playwright, await playwright.chromium.launch() as browser:\n", 95 | " # Set up the video recording\n", 96 | " video_dir = os.path.abspath(\"videos\")\n", 97 | " context = await browser.new_context(\n", 98 | " record_video_dir=\"videos/\",\n", 99 | " record_video_size={\"width\": 1280, \"height\": 720}\n", 100 | " )\n", 101 | "\n", 102 | " page = await agentql.wrap_async(await context.new_page())\n", 103 | "\n", 104 | " await page.goto(URL)\n", 105 | "\n", 106 | " # Get the username and password fields\n", 107 | " response = await page.query_elements(LOGIN_QUERY)\n", 108 | "\n", 109 | " # Fill the username and password fields\n", 110 | " await response.username_field.fill(\"student\")\n", 111 | " await response.password_field.fill(\"Password123\")\n", 112 | "\n", 113 | " # Click the submit button\n", 114 | " await response.submit_btn.click()\n", 115 | "\n", 116 | " # Used only for demo purposes. It allows you to see the effect of the script.\n", 117 | " await page.wait_for_timeout(5000)\n", 118 | "\n", 119 | " await context.close()\n", 120 | "\n", 121 | " # Display the video\n", 122 | " video_files = [f for f in os.listdir(video_dir) if f.endswith('.webm')]\n", 123 | " if video_files:\n", 124 | " video_path = os.path.join(video_dir, video_files[0])\n", 125 | " with open(video_path, 'rb') as f:\n", 126 | " video_bytes = f.read()\n", 127 | "\n", 128 | " video_b64 = b64encode(video_bytes).decode('utf-8')\n", 129 | " video_html = f\"\"\"\n", 130 | " \n", 134 | " \"\"\"\n", 135 | " display(HTML(video_html))\n", 136 | " else:\n", 137 | " print(\"No video file was created\")" 138 | ] 139 | } 140 | ], 141 | "metadata": { 142 | "language_info": { 143 | "name": "python" 144 | } 145 | }, 146 | "nbformat": 4, 147 | "nbformat_minor": 2 148 | } 149 | -------------------------------------------------------------------------------- /examples/googlecolab/run_script_in_headless_browser/main.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "This example shows how to run a script in a headless browser using AgentQL with the [Google Colaboratory](https://colab.research.google.com/) environment." 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Install [AgentQL](https://pypi.org/project/agentql/) library" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "%pip install agentql" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "Install the Playwright dependency required by AgentQL." 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "!playwright install chromium" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "You can [store](https://medium.com/@parthdasawant/how-to-use-secrets-in-google-colab-450c38e3ec75) keys in Google Colab's secrets." 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "import os\n", 56 | "\n", 57 | "from google.colab import userdata\n", 58 | "\n", 59 | "os.environ[\"AGENTQL_API_KEY\"]=userdata.get('AGENTQL_API_KEY')" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "Run AgentQL script. Please note that an online environment like Google Colab supports **asynchronous version** of AgentQL." 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "import agentql\n", 76 | "\n", 77 | "from playwright.async_api import async_playwright\n", 78 | "\n", 79 | "# Set the URL to the desired website\n", 80 | "URL = \"https://scrapeme.live/shop\"\n", 81 | "\n", 82 | "SEARCH_QUERY = \"\"\"\n", 83 | "{\n", 84 | " search_products_box\n", 85 | "}\n", 86 | "\"\"\"\n", 87 | "\n", 88 | "STOCK_NUMBER_QUERY = \"\"\"\n", 89 | "{\n", 90 | " number_in_stock\n", 91 | "}\n", 92 | "\"\"\"\n", 93 | "\n", 94 | "\n", 95 | "async with async_playwright() as playwright, await playwright.chromium.launch(headless=True) as browser:\n", 96 | "\n", 97 | " page = await agentql.wrap_async(await browser.new_page())\n", 98 | "\n", 99 | " await page.goto(URL)\n", 100 | "\n", 101 | " # Use query_elements() method to locate the search product box from the page\n", 102 | " response = await page.query_elements(SEARCH_QUERY)\n", 103 | "\n", 104 | " # Use Playwright's API to fill the search box and press Enter\n", 105 | " await response.search_products_box.type(\"Charmander\")\n", 106 | " await page.keyboard.press(\"Enter\")\n", 107 | "\n", 108 | " # Use query_data() method to fetch the stock number from the page\n", 109 | " response = await page.query_data(STOCK_NUMBER_QUERY)\n", 110 | "\n", 111 | " print(response)" 112 | ] 113 | } 114 | ], 115 | "metadata": { 116 | "language_info": { 117 | "name": "python" 118 | } 119 | }, 120 | "nbformat": 4, 121 | "nbformat_minor": 2 122 | } 123 | -------------------------------------------------------------------------------- /examples/googlecolab/wait_for_entire_page_load/main.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "This example shows how to wait for the entire page to load using AgentQL with the [Google Colaboratory](https://colab.research.google.com/) environment." 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Install [AgentQL](https://pypi.org/project/agentql/) library" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "%pip install agentql" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "Install the Playwright dependency required by AgentQL." 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "!playwright install chromium" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "You can [store](https://medium.com/@parthdasawant/how-to-use-secrets-in-google-colab-450c38e3ec75) keys in Google Colab's secrets." 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "import os\n", 56 | "\n", 57 | "from google.colab import userdata\n", 58 | "\n", 59 | "os.environ[\"AGENTQL_API_KEY\"]=userdata.get('AGENTQL_API_KEY')" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "Run AgentQL script. Unfortunately, you can't see the browser window directly in Google Colab like you would on your local machine. However, let's still interact with it and take a screen recording of the browser session to see what’s happening. \n", 67 | "\n", 68 | "Please note that an online environment like Google Colab supports **asynchronous version** of AgentQL." 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "import agentql\n", 78 | "from playwright.async_api import async_playwright\n", 79 | "from IPython.display import HTML\n", 80 | "from base64 import b64encode\n", 81 | "\n", 82 | "# Duckduckgo URL to demonstrate the example for loading more videos on the page\n", 83 | "URL = \"https://duckduckgo.com/?q=machine+learning+lectures+mit&t=h_&iar=videos&iax=videos&ia=videos\"\n", 84 | "\n", 85 | "QUERY = \"\"\"\n", 86 | "{\n", 87 | " videos(first 10 videos)[] {\n", 88 | " video_title\n", 89 | " length\n", 90 | " views\n", 91 | " }\n", 92 | "}\n", 93 | "\"\"\"\n", 94 | "\n", 95 | "async with async_playwright() as playwright, await playwright.chromium.launch() as browser:\n", 96 | " \n", 97 | " video_dir = os.path.abspath(\"videos\")\n", 98 | " context = await browser.new_context(\n", 99 | " record_video_dir=\"videos/\",\n", 100 | " record_video_size={\"width\": 1280, \"height\": 720}\n", 101 | " )\n", 102 | " \n", 103 | " # Create a new page in the browser and wrap it to get access to the AgentQL's querying API\n", 104 | " page = await agentql.wrap_async(await context.new_page())\n", 105 | "\n", 106 | " await page.goto(URL)\n", 107 | "\n", 108 | " for _ in range(2):\n", 109 | " # Wait for additional videos to load completely\n", 110 | " await page.wait_for_page_ready_state()\n", 111 | " # Scroll down the page to trigger loading of more videos\n", 112 | " await page.keyboard.press(\"End\")\n", 113 | "\n", 114 | " # Use query_data() method to fetch video lists data from the page\n", 115 | " response = await page.query_data(QUERY)\n", 116 | "\n", 117 | " # Print the details of the first video\n", 118 | " print(response[\"videos\"][0])\n", 119 | "\n", 120 | " # Display the video\n", 121 | " video_files = [f for f in os.listdir(video_dir) if f.endswith('.webm')]\n", 122 | " if video_files:\n", 123 | " video_path = os.path.join(video_dir, video_files[0])\n", 124 | " with open(video_path, 'rb') as f:\n", 125 | " video_bytes = f.read()\n", 126 | " \n", 127 | " video_b64 = b64encode(video_bytes).decode('utf-8')\n", 128 | " video_html = f\"\"\"\n", 129 | " \n", 133 | " \"\"\"\n", 134 | " display(HTML(video_html))\n", 135 | " else:\n", 136 | " print(\"No video file was created\") " 137 | ] 138 | } 139 | ], 140 | "metadata": { 141 | "language_info": { 142 | "name": "python" 143 | } 144 | }, 145 | "nbformat": 4, 146 | "nbformat_minor": 2 147 | } 148 | -------------------------------------------------------------------------------- /examples/js/.eslintrc.js: -------------------------------------------------------------------------------- 1 | /* eslint-env node */ 2 | module.exports = { 3 | root: true, 4 | env: { 5 | browser: true, 6 | es6: true, 7 | node: true, 8 | }, 9 | extends: ['eslint:recommended', 'prettier'], 10 | parserOptions: { 11 | ecmaVersion: 12, 12 | sourceType: 'module', 13 | }, 14 | }; 15 | -------------------------------------------------------------------------------- /examples/js/.prettierignore: -------------------------------------------------------------------------------- 1 | # unconventional js 2 | /blueprints/*/files/ 3 | /vendor/ 4 | 5 | # compiled output 6 | /dist/ 7 | /tmp/ 8 | 9 | # dependencies 10 | /bower_components/ 11 | /node_modules/ 12 | 13 | # misc 14 | /coverage/ 15 | !.* 16 | .eslintcache 17 | .lint-todo/ 18 | -------------------------------------------------------------------------------- /examples/js/.prettierrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | printWidth: 100, 3 | trailingComma: 'all', 4 | singleQuote: true, 5 | importOrder: ['', '^@/types/(.*)$', '^@/lib/(.*)$', '^[./]'], 6 | importOrderSeparation: true, 7 | importOrderSortSpecifiers: true, 8 | importOrderCaseInsensitive: false, 9 | }; 10 | -------------------------------------------------------------------------------- /examples/js/close-cookie-dialog/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Close a cookie dialog 3 | description: Close a site's dialog for rejecting/accepting cookies with AgentQL. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: close a cookie dialog with AgentQL 8 | 9 | This example demonstrates how to use AgentQL close a site's dialog for rejecting/accepting cookies. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/javascript-sdk/installation) 14 | - Save this JavaScript file locally as **main.js** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | node main.js 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/js/close-cookie-dialog/main.js: -------------------------------------------------------------------------------- 1 | const { wrap, configure } = require('agentql'); 2 | const { chromium } = require('playwright'); 3 | 4 | (async () => { 5 | // Configure the AgentQL API key 6 | configure({ 7 | apiKey: process.env.AGENTQL_API_KEY, // This is the default and can be omitted. 8 | }); 9 | 10 | const browser = await chromium.launch({ headless: false }); 11 | const page = await wrap(await browser.newPage()); // Wraps the Playwright Page to access AgentQL's features. 12 | 13 | // Set the URL to the desired website 14 | const URL = 'https://gov.uk/'; 15 | await page.goto(URL); 16 | 17 | // Define the query to find elements on the page 18 | const QUERY = ` 19 | { 20 | cookies_form { 21 | reject_btn 22 | } 23 | } 24 | `; 25 | 26 | // Use queryElements to fetch the cookies dialog button from the page 27 | const response = await page.queryElements(QUERY); 28 | 29 | // Check if there is a cookie-rejection button on the page and click it 30 | if (response.cookies_form && response.cookies_form.reject_btn) { 31 | await response.cookies_form.reject_btn.click(); 32 | } 33 | 34 | // Wait for 10 seconds to see the effect of the script 35 | await page.waitForTimeout(10000); 36 | 37 | await browser.close(); 38 | })(); 39 | -------------------------------------------------------------------------------- /examples/js/close-popup/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Close popup windows 3 | description: Close modals like promotional forms and banners with AgentQL. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: closing popup windows with AgentQL 8 | 9 | This example demonstrates how to close popup windows (like promotion form) with AgentQL. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/javascript-sdk/installation) 14 | - Save this JavaScript file locally as **main.js** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | node main.js 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/js/close-popup/main.js: -------------------------------------------------------------------------------- 1 | const { wrap, configure } = require('agentql'); 2 | const { chromium } = require('playwright'); 3 | 4 | // Set the URL to the desired website 5 | const URL = 'https://kinfield.com/'; 6 | 7 | const CLOSE_POPUP_QUERY = ` 8 | { 9 | popup_form { 10 | close_btn 11 | } 12 | } 13 | `; 14 | 15 | async function main() { 16 | // Configure the AgentQL API key 17 | configure({ 18 | apiKey: process.env.AGENTQL_API_KEY, // This is the default and can be omitted. 19 | }); 20 | 21 | const browser = await chromium.launch({ headless: false }); 22 | const context = await browser.newContext(); 23 | 24 | // Wrap the page to get access to the AgentQL's querying API 25 | const page = await wrap(await context.newPage()); 26 | 27 | await page.goto(URL); 28 | 29 | // Extract data using AgentQL API's queryElements() method 30 | const response = await page.queryElements(CLOSE_POPUP_QUERY); 31 | await response.popup_form.close_btn.click(); 32 | 33 | // Wait for 10 seconds to see the browser in action 34 | await page.waitForTimeout(10000); 35 | 36 | await browser.close(); 37 | } 38 | 39 | main(); 40 | -------------------------------------------------------------------------------- /examples/js/collect-paginated-ecommerce-data/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Collect paginated bookstore listing data 3 | description: Collect paginated bookstore listing data with AgentQL. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: Collect ecommerce data across multiple paginated webpages 8 | 9 | This example demonstrates how to collect ecommerce data by stepping through multiple paginated webpages. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/installation/sdk-installation) 14 | - Save this JavaScript file locally as **main.js** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | node.js 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/js/collect-paginated-ecommerce-data/main.js: -------------------------------------------------------------------------------- 1 | const { wrap, configure } = require('agentql'); 2 | const { chromium } = require('playwright'); 3 | 4 | async function getPaginatedData(page, query, pages) { 5 | const paginatedData = []; 6 | for (let i = 0; i < pages; i++) { 7 | const data = await page.queryData(query); 8 | paginatedData.push(data); 9 | await page.goto(`https://books.toscrape.com/?page=${i + 2}`); 10 | } 11 | return paginatedData; 12 | } 13 | 14 | async function goToTheNextPage(page, URL) { 15 | const nextPageQuery = ` 16 | { 17 | pagination { 18 | prev_page_url 19 | next_page_url 20 | } 21 | }`; 22 | console.log('Navigating to the next page...'); 23 | const pagination = await page.queryData(nextPageQuery); 24 | let nextPageUrl = pagination.pagination?.next_page_url; 25 | if (!nextPageUrl) { 26 | return false; 27 | } 28 | try { 29 | if (!nextPageUrl.startsWith('http')) { 30 | nextPageUrl = URL + nextPageUrl; 31 | } 32 | await page.goto(nextPageUrl); 33 | return true; 34 | } catch (error) { 35 | console.error(error); 36 | return false; 37 | } 38 | } 39 | 40 | (async () => { 41 | configure({ 42 | apiKey: process.env.AGENTQL_API_KEY, 43 | }); 44 | 45 | const QUERY = ` 46 | { 47 | books[] { 48 | name 49 | price 50 | rating 51 | } 52 | } 53 | `; 54 | const books = []; 55 | const URL = 'https://books.toscrape.com/'; 56 | 57 | const browser = await chromium.launch({ headless: false }); 58 | const page = await wrap(await browser.newPage()); 59 | 60 | await page.goto(URL); 61 | 62 | while (books.length < 50) { 63 | const response = await page.queryData(QUERY); 64 | if (response.books.length + books.length > 50) { 65 | books.push(...response.books.slice(0, 50 - books.length)); 66 | } else { 67 | books.push(...response.books); 68 | } 69 | const paginationInfo = await getPaginatedData(page, QUERY, 1); 70 | if (paginationInfo.hasNextPage) { 71 | await goToTheNextPage(page, URL); 72 | } 73 | } 74 | 75 | const fs = require('fs'); 76 | fs.writeFileSync('./books.json', JSON.stringify(books, null, 4)); 77 | 78 | await browser.close(); 79 | })(); 80 | -------------------------------------------------------------------------------- /examples/js/collect-paginated-news-headlines/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Collect paginated data from HackerNews 3 | description: Collect paginated data from HackerNews with AgentQL. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: Collect news headlines across multiple paginated webpages 8 | 9 | This example demonstrates how to collect HackerNews headlines across multiple paginated webpages by specifying query and number of pages to collect. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/installation/sdk-installation) 14 | - Save this JavaScript file locally as **main.js** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | node main.js 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/js/collect-paginated-news-headlines/main.js: -------------------------------------------------------------------------------- 1 | const { wrap, configure } = require('agentql'); 2 | const { chromium } = require('playwright'); 3 | const fs = require('fs'); 4 | 5 | async function paginate(page, query, pages) { 6 | const paginatedData = []; 7 | for (let i = 0; i < pages; i++) { 8 | const data = await page.queryData(query); 9 | paginatedData.push(data); 10 | await page.goto(`https://news.ycombinator.com/?p=${i + 2}`); 11 | } 12 | return paginatedData; 13 | } 14 | 15 | (async () => { 16 | configure({ 17 | apiKey: process.env.AGENTQL_API_KEY, 18 | }); 19 | 20 | const browser = await chromium.launch({ headless: false }); 21 | const page = await wrap(await browser.newPage()); 22 | 23 | const QUERY = ` 24 | { 25 | posts[] { 26 | title 27 | } 28 | }`; 29 | 30 | const paginatedData = await paginate(page, QUERY, 3); 31 | 32 | await fs.writeFileSync( 33 | './hackernews_paginated_data.json', 34 | JSON.stringify(paginatedData, null, 2), 35 | ); 36 | 37 | console.log('Paginated data has been saved to hackernews_paginated_data.json'); 38 | 39 | await browser.close(); 40 | })(); 41 | -------------------------------------------------------------------------------- /examples/js/collect-pricing-data/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Collect pricing data from e-commerce websites 3 | description: Collect pricing data from an e-commerce website using AgentQL. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: collecting pricing data from e-commerce website using AgentQL 8 | 9 | This is an example of collecting pricing data from e-commerce website using AgentQL. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/javascript-sdk/installation) 14 | - Save this JavaScript file locally as **main.js** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | node main.js 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/js/collect-pricing-data/main.js: -------------------------------------------------------------------------------- 1 | const { wrap, configure } = require('agentql'); 2 | const { chromium } = require('playwright'); 3 | 4 | const URL = 'https://www.bestbuy.com'; 5 | 6 | async function doExtractPricingData(page) { 7 | //Extract pricing data from the current page. 8 | //Args: page (Page): The Playwright page object to interact with the browser. 9 | //Returns: list: The pricing data extracted from the page. 10 | 11 | const QUERY = ` 12 | { 13 | products[] { 14 | name 15 | model 16 | sku 17 | price(integer) 18 | } 19 | }`; 20 | const pricingData = await page.queryData(QUERY); 21 | return pricingData.products || []; 22 | } 23 | 24 | async function searchProduct(page, product, minPrice, maxPrice) { 25 | const searchInput = await page.getByPrompt('the search input field'); 26 | if (!searchInput) { 27 | console.log('Search input field not found.'); 28 | return false; 29 | } 30 | await searchInput.type(product, { delay: 200 }); 31 | await searchInput.press('Enter'); 32 | 33 | const minPriceInput = await page.getByPrompt('the min price input field'); 34 | if (!minPriceInput) { 35 | console.log('Min price input field not found.'); 36 | return false; 37 | } 38 | await minPriceInput.fill(String(minPrice)); 39 | 40 | const maxPriceInput = await page.getByPrompt('the max price input field'); 41 | if (!maxPriceInput) { 42 | console.log('Max price input field not found.'); 43 | return false; 44 | } 45 | await maxPriceInput.fill(String(maxPrice)); 46 | await maxPriceInput.press('Enter'); 47 | return true; 48 | } 49 | 50 | async function goToTheNextPage(page) { 51 | const nextPageQuery = ` 52 | { 53 | pagination { 54 | prev_page_url 55 | next_page_url 56 | } 57 | }`; 58 | console.log('Navigating to the next page...'); 59 | const pagination = await page.queryData(nextPageQuery); 60 | let nextPageUrl = pagination.pagination?.next_page_url; 61 | if (!nextPageUrl) { 62 | return false; 63 | } 64 | try { 65 | if (!nextPageUrl.startsWith('http')) { 66 | nextPageUrl = URL + nextPageUrl; 67 | } 68 | await page.goto(nextPageUrl); 69 | return true; 70 | } catch (error) { 71 | console.error(error); 72 | return false; 73 | } 74 | } 75 | 76 | async function extractPricingData(page, product, minPrice, maxPrice, maxPages = 3) { 77 | console.log(`Searching for product: ${product} with price range: $${minPrice} - $${maxPrice}`); 78 | if (!(await searchProduct(page, product, minPrice, maxPrice))) { 79 | console.log('Failed to search for the product.'); 80 | return []; 81 | } 82 | 83 | let currentPage = 1; 84 | const pricingData = []; 85 | while (currentPage <= maxPages) { 86 | console.log(`Extracting pricing data on page ${currentPage}...`); 87 | const pricingDataOnPage = await doExtractPricingData(page); 88 | console.log(`${pricingDataOnPage.length} products found`); 89 | 90 | pricingData.push(...pricingDataOnPage); 91 | 92 | if (!(await goToTheNextPage(page))) { 93 | console.log('No more next page.'); 94 | break; 95 | } 96 | 97 | currentPage += 1; 98 | } 99 | 100 | return pricingData; 101 | } 102 | 103 | (async () => { 104 | // Configure the AgentQL API key 105 | configure({ 106 | apiKey: process.env.AGENTQL_API_KEY, // This is the default and can be omitted. 107 | }); 108 | 109 | const browser = await chromium.launch({ headless: false }); 110 | const page = await wrap(await browser.newPage()); 111 | await page.goto(URL); 112 | 113 | const pricingData = await extractPricingData(page, 'gpu', 500, 800); 114 | console.log('Pricing data:', pricingData); 115 | 116 | await browser.close(); 117 | })(); 118 | -------------------------------------------------------------------------------- /examples/js/collect-youtube-comments/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Interact with YouTube and extract video information 3 | description: Interact with YouTube website and extract video information using AgentQL. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: interacting with YouTube website and extracting video information using AgentQL 8 | 9 | This is an example of interacting with YouTube website and extracting video information using AgentQL. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/javascript-sdk/installation) 14 | - Save this JavaScript file locally as **main.js** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | node main.js 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/js/collect-youtube-comments/main.js: -------------------------------------------------------------------------------- 1 | const { chromium } = require('playwright'); 2 | const { configure, wrap } = require('agentql'); 3 | 4 | const URL = 'https://www.youtube.com/'; 5 | 6 | async function main() { 7 | // Configure the AgentQL API key 8 | configure({ 9 | apiKey: process.env.AGENTQL_API_KEY, // This is the default and can be omitted. 10 | }); 11 | 12 | const browser = await chromium.launch({ headless: false }); 13 | const page = await wrap(await browser.newPage()); 14 | await page.goto(URL); 15 | 16 | const SEARCH_QUERY = ` 17 | { 18 | search_input 19 | search_btn 20 | } 21 | `; 22 | 23 | const VIDEO_QUERY = ` 24 | { 25 | videos[] { 26 | video_link 27 | video_title 28 | channel_name 29 | } 30 | } 31 | `; 32 | 33 | const VIDEO_CONTROL_QUERY = ` 34 | { 35 | play_or_pause_btn 36 | expand_description_btn 37 | } 38 | `; 39 | 40 | const DESCRIPTION_QUERY = ` 41 | { 42 | description_text 43 | } 44 | `; 45 | 46 | const COMMENT_QUERY = ` 47 | { 48 | comments[] { 49 | channel_name 50 | comment_text 51 | } 52 | } 53 | `; 54 | 55 | try { 56 | // search query 57 | const searchResponse = await page.queryElements(SEARCH_QUERY); 58 | await searchResponse.search_input.type('machine learning', { delay: 75 }); 59 | await searchResponse.search_btn.click(); 60 | 61 | // video query 62 | const videoResponse = await page.queryElements(VIDEO_QUERY); 63 | console.log( 64 | `Clicking Youtube Video: ${await videoResponse.videos[0].video_title.textContent()}`, 65 | ); 66 | await videoResponse.videos[0].video_link.click(); // click the first youtube video 67 | 68 | // video control query 69 | const controlResponse = await page.queryElements(VIDEO_CONTROL_QUERY); 70 | await controlResponse.expand_description_btn.click(); 71 | 72 | // description query 73 | const descriptionData = await page.queryData(DESCRIPTION_QUERY); 74 | console.log(`Captured the following description:\n${descriptionData.description_text}`); 75 | 76 | // Scroll down the page to load more comments 77 | for (let i = 0; i < 3; i++) { 78 | await page.keyboard.press('PageDown'); 79 | await page.waitForLoadState(); 80 | } 81 | 82 | // comment query 83 | const commentResponse = await page.queryData(COMMENT_QUERY); 84 | console.log(`Captured ${commentResponse.comments?.length || 0} comments!`); 85 | } catch (error) { 86 | console.error(`Found Error: ${error}`); 87 | throw error; 88 | } 89 | 90 | // Used only for demo purposes. It allows you to see the effect of the script. 91 | await page.waitForTimeout(10000); 92 | 93 | await browser.close(); 94 | } 95 | 96 | main(); 97 | -------------------------------------------------------------------------------- /examples/js/compare-product-prices/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Compare product price across multiple websites 3 | description: Compare product prices across websites with query_data() method with AgentQL. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: comparing product price across websites with AgentQL 8 | 9 | This example demonstrates how to compare product prices across websites with queryData() method. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/javascript-sdk/installation) 14 | - Save this JavaScript file locally as **main.js** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | node main.js 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/js/compare-product-prices/main.js: -------------------------------------------------------------------------------- 1 | const { wrap, configure } = require('agentql'); 2 | const { chromium } = require('playwright'); 3 | 4 | // Set the URL to the desired website 5 | const URL = 'https://scrapeme.live/shop'; 6 | 7 | // Define the queries to interact with the page 8 | const HOME_PAGE_QUERY = ` 9 | { 10 | search_products_input 11 | } 12 | `; 13 | 14 | /** 15 | * Open the given URL in a new tab and fetch the price of the product. 16 | */ 17 | async function fetchPrice(context, sessionUrl, productName) { 18 | // Create a page in a new tab in the broswer context and wrap it to get access to the AgentQL's querying API 19 | const page = await wrap(await context.newPage()); 20 | await page.goto(sessionUrl); 21 | 22 | // Search for the product 23 | const homeResponse = await page.queryElements(HOME_PAGE_QUERY); 24 | await homeResponse.search_products_input.fill(productName); 25 | await homeResponse.search_products_input.press('Enter'); 26 | 27 | const PRODUCT_INFO_QUERY = ` 28 | { 29 | product_price (${productName}) 30 | } 31 | `; 32 | 33 | // Fetch the price data from the page 34 | const data = await page.queryData(PRODUCT_INFO_QUERY); 35 | return data.product_price; 36 | } 37 | 38 | /** 39 | * Fetch prices concurrently in the same browser session from multiple websites. 40 | */ 41 | async function getPriceAcrossWebsites() { 42 | // Configure the AgentQL API key 43 | configure({ 44 | apiKey: process.env.AGENTQL_API_KEY, // This is the default and can be omitted. 45 | }); 46 | const browser = await chromium.launch({ headless: false }); 47 | const context = await browser.newContext(); 48 | 49 | // Open multiple tabs in the same browser context to fetch prices concurrently 50 | const [charmanderPrice, venusaurPrice, charizardPrice] = await Promise.all([ 51 | fetchPrice(context, URL, 'Charmander'), 52 | fetchPrice(context, URL, 'Venusaur'), 53 | fetchPrice(context, URL, 'Charizard'), 54 | ]); 55 | 56 | console.log( 57 | ` 58 | Charmander price: ${charmanderPrice} 59 | Venusaur price: ${venusaurPrice} 60 | Charizard price: ${charizardPrice} 61 | `, 62 | ); 63 | 64 | await browser.close(); 65 | } 66 | 67 | getPriceAcrossWebsites(); 68 | -------------------------------------------------------------------------------- /examples/js/first-steps/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Collect pricing data from e-commerce websites 3 | description: Collect pricing data from an e-commerce website using AgentQL. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: collecting pricing data from e-commerce website using AgentQL 8 | 9 | This is an example of collecting pricing data from e-commerce website using AgentQL. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/javascript-sdk/installation) 14 | - Save this JavaScript file locally as **main.js** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | node main.js 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/js/first-steps/main.js: -------------------------------------------------------------------------------- 1 | const { chromium } = require('playwright'); 2 | const { wrap, configure } = require('agentql'); 3 | 4 | const URL = 'https://scrapeme.live/shop'; 5 | 6 | // The AgentQL query to locate the search box element 7 | const SEARCH_BOX_QUERY = ` 8 | { 9 | search_product_box 10 | } 11 | `; 12 | 13 | // The AgentQL query of the data to be extracted 14 | const PRODUCT_DATA_QUERY = ` 15 | { 16 | price_currency 17 | products[] { 18 | name 19 | price(integer) 20 | } 21 | } 22 | `; 23 | 24 | // Other than the AgentQL query, you can also use natural language prompt to locate the element 25 | const NATURAL_LANGUAGE_PROMPT = 'Button to display Qwilfish page'; 26 | 27 | async function main() { 28 | // Configure the AgentQL API key 29 | configure({ apiKey: process.env.AGENTQL_API_KEY }); 30 | 31 | const browser = await chromium.launch({ headless: false }); 32 | const context = await browser.newContext(); 33 | 34 | // Wrap the page to get access to the AgentQL's querying API 35 | const agentqlPage = await wrap(await context.newPage()); 36 | 37 | await agentqlPage.goto(URL); 38 | 39 | const productData = await extractProductData(agentqlPage, 'fish'); 40 | 41 | console.log(productData); 42 | 43 | await addQwilfishToCart(agentqlPage); 44 | 45 | await browser.close(); 46 | } 47 | 48 | async function extractProductData(page, searchKeyWord) { 49 | // Find DOM element using AgentQL API's queryElements() method 50 | const response = await page.queryElements(SEARCH_BOX_QUERY); 51 | 52 | // Interact with the element using Playwright API 53 | await response.search_product_box.type(searchKeyWord, { delay: 200 }); 54 | await page.keyboard.press('Enter'); 55 | 56 | // Extract data using AgentQL API's queryData() method 57 | const data = await page.queryData(PRODUCT_DATA_QUERY); 58 | 59 | return data; 60 | } 61 | 62 | async function addQwilfishToCart(page) { 63 | // Find DOM element using AgentQL API's getByPrompt() method 64 | const qwilfishPageBtn = await page.getByPrompt(NATURAL_LANGUAGE_PROMPT); 65 | 66 | // Interact with the element using Playwright API 67 | if (qwilfishPageBtn) { 68 | await qwilfishPageBtn.click(); 69 | } 70 | 71 | // Wait for 10 seconds to see the browser action 72 | await page.waitForTimeout(10000); 73 | } 74 | 75 | main(); 76 | -------------------------------------------------------------------------------- /examples/js/get-by-prompt/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Get single element by prompt 3 | description: Use AgentQL's get_by_prompt method to retrieve an element and interact with it. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: interact with elements leveraging getByPrompt method 8 | 9 | This is an example of levergaing getByPrompt() method to retrieve an element and interact with it using AgentQL. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/javascript-sdk/installation) 14 | - Save this JavaScript file locally as **main.js** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | node main.js 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/js/get-by-prompt/main.js: -------------------------------------------------------------------------------- 1 | // This example demonstrates how to leverage get_by_prompt method to interact with element by prompt text. 2 | 3 | const { wrap, configure } = require('agentql'); 4 | const { chromium } = require('playwright'); 5 | 6 | // Set the URL to the desired website 7 | const URL = 'https://thinking-tester-contact-list.herokuapp.com/'; 8 | 9 | async function main() { 10 | // Configure the AgentQL API key 11 | configure({ 12 | apiKey: process.env.AGENTQL_API_KEY, // This is the default and can be omitted. 13 | }); 14 | 15 | const browser = await chromium.launch({ headless: false }); 16 | 17 | // Wrap the page to get access to the AgentQL's querying API 18 | const page = await wrap(await browser.newPage()); 19 | 20 | // Navigate to the URL 21 | await page.goto(URL); 22 | 23 | // Get the sign up button by the prompt text 24 | const signUpBtn = await page.getByPrompt('Sign up button'); 25 | 26 | // Click the sign up button if it exists 27 | if (signUpBtn) { 28 | await signUpBtn.click(); 29 | } 30 | 31 | // Used only for demo purposes. It allows you to see the effect of the script. 32 | await page.waitForTimeout(10000); 33 | 34 | await browser.close(); 35 | } 36 | 37 | main(); 38 | -------------------------------------------------------------------------------- /examples/js/humanlike-antibot/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Use humanlike mouse movement to avoid antibot 3 | description: Use humanlike mouse movement to avoid antibot when using AgentQL. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: use humanlike mouse movement to avoid antibot 8 | 9 | This is an example shows how to use humanlike mouse movement to avoid antibot when using AgentQL. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/javascript-sdk/installation) 14 | - Save this JavaScript file locally as **main.js** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | node main.js 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | 25 | ## Learn more 26 | 27 | - [Read the full guide on avoiding bot detection with humanlike behavior](https://docs.agentql.com/avoiding-bot-detection/user-like-behavior) 28 | - [Check out more guides on avoiding bot detection](https://docs.agentql.com/avoiding-bot-detection) 29 | -------------------------------------------------------------------------------- /examples/js/humanlike-antibot/main.js: -------------------------------------------------------------------------------- 1 | const { wrap, configure } = require('agentql'); 2 | const { chromium } = require('playwright'); 3 | 4 | async function randomMouseMovement(page) { 5 | for (let i = 0; i < 10; i++) { 6 | await page.mouse.move(Math.floor(Math.random() * 1000), Math.floor(Math.random() * 1000)); 7 | await new Promise((r) => setTimeout(r, Math.random() * 400 + 100)); 8 | } 9 | } 10 | 11 | async function randomClick(page, element) { 12 | const box = await element.boundingBox(); 13 | await page.mouse.move(box.x + box.width / 2, box.y + box.height / 2); 14 | await page.mouse.click(box.x + box.width / 2, box.y + box.height / 2); 15 | } 16 | 17 | async function randomScroll(page) { 18 | await page.mouse.wheel(0, 1000); 19 | await new Promise((r) => setTimeout(r, Math.random() * 400 + 100)); 20 | } 21 | 22 | async function main() { 23 | // Configure the AgentQL API key 24 | configure({ 25 | apiKey: process.env.AGENTQL_API_KEY, // This is the default and can be omitted. 26 | }); 27 | 28 | // Launch browser with proxy settings 29 | const browser = await chromium.launch({ headless: false }); 30 | 31 | // Wrap browser with AgentQL 32 | const page = await wrap(await browser.newPage()); 33 | await page.goto('https://duckduckgo.com/'); 34 | 35 | // Type "AgentQL" into the search box keystroke by keystroke 36 | const searchBar = await page.getByPrompt('the search bar'); 37 | searchBar.pressSequentially('AgentQL'); 38 | 39 | // Click the search button in a random manner 40 | await randomClick(page, await page.getByPrompt('the search button')); 41 | 42 | for (let i = 0; i < 5; i++) { 43 | await randomMouseMovement(page); 44 | await randomScroll(page); 45 | } 46 | 47 | await browser.close(); 48 | } 49 | 50 | main().catch(console.error); 51 | -------------------------------------------------------------------------------- /examples/js/infinite-scroll/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Scroll to load more content 3 | description: How to load additional content on pages that load content based on scroll position (aka 'infinite scroll'). 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: load additional content on page by scrolling 8 | 9 | This example demonstrates how to load additional content on pages that load content based on scroll position 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/installation/sdk-installation) 14 | - Save this JavaScript file locally as **main.js** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | node main.js 19 | ``` 20 | 21 | ## Adjust the scrolling method 22 | 23 | Dynamically loading content can be tricky to get right, as websites have a lot of ways to customize how this interaction looks on their sites. 24 | 25 | Scrolling to the end of a page by pressing the `End` key is not always a reliable mechanism, since pages could either have multiple scrollable areas, or have the `End` key mapped to a different function, such as for video playback. Try replacing `key_press_end_scroll(page)` in the example with `mouse_wheel_scroll(page)` and observe how the browser behaves differently, or try navigating to your own site to test in `page.goto`! 26 | -------------------------------------------------------------------------------- /examples/js/infinite-scroll/main.js: -------------------------------------------------------------------------------- 1 | const { wrap, configure } = require('agentql'); 2 | const { chromium } = require('playwright'); 3 | 4 | async function pressKeyAndScroll(page) { 5 | page.keyboard.press('End'); 6 | } 7 | 8 | (async () => { 9 | // Configure the AgentQL API key 10 | configure({ 11 | apiKey: process.env.AGENTQL_API_KEY, // This is the default and can be omitted. 12 | }); 13 | 14 | const browser = await chromium.launch({ headless: false }); 15 | const page = await wrap(await browser.newPage()); 16 | 17 | console.log('Navigating to the page...'); 18 | 19 | await page.goto('https://infinite-scroll.com/demo/full-page/'); 20 | 21 | const QUERY = ` 22 | { 23 | page_title 24 | post_headers[] 25 | } 26 | `; 27 | 28 | await page.waitForLoadState(); 29 | 30 | const numExtraPagesToLoad = 3; 31 | 32 | for (let i = 0; i < numExtraPagesToLoad; i++) { 33 | console.log(`Scrolling to the bottom of the page... (num_times = ${i + 1})`); 34 | await pressKeyAndScroll(page); 35 | await page.waitForLoadState(); 36 | console.log('Content loaded!'); 37 | } 38 | 39 | console.log('Issuing AgentQL data query...'); 40 | const response = await page.queryData(QUERY); 41 | 42 | console.log('AgentQL response:', response); 43 | 44 | await browser.close(); 45 | })(); 46 | -------------------------------------------------------------------------------- /examples/js/interact-with-external-or-existing-browser/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Use AgentQL with an already open browser 3 | description: If you don't want to use Playwright's default browser, you can bring your own. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: interact with external or existing browser 8 | 9 | This is an example shows how to interact with external or existing browser by retrieving and interacting with web elements in AgentQL. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/javascript-sdk/installation) 14 | - Save this JavaScript file locally as **main.js** 15 | - Close your Google Chrome application if it is open. 16 | - If you're using **Mac**, open the terminal and run the following command: 17 | 18 | ```bash 19 | /Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --remote-debugging-port=9222 20 | ``` 21 | 22 | - If you're using **Windows**, open the Command Prompt and run the command: 23 | 24 | ```bash 25 | chrome.exe --remote-debugging-port=9222 26 | ``` 27 | 28 | **Make sure to replace `chrome.exe` with the path to your Chrome executable if it's not already in your system's PATH.** 29 | 30 | - In the browser window that's opened, select the Google profile you would like to use for this session. 31 | 32 | - In **main.js**, replace variable `WEBSOCKET_URL`'s placeholder value with the actual WebSocket URL returned in terminal or command prompt. The URL should be in the format of `ws://127.0.0.1:9222/devtools/browser/387adf4c-243f-4051-a181-46798f4a46f4`. 33 | 34 | - Run the following command from the project's folder: 35 | 36 | ```bash 37 | node main.js 38 | ``` 39 | 40 | - If you want to learn how to work with open pages, navigate to [Scrapeme website](https://scrapeme.live/shop/Charmander/) within the browser, and use `fetch_data_from_open_website_page()` method in the script to fetch data from the page. 41 | 42 | ## Play with the query 43 | 44 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 45 | -------------------------------------------------------------------------------- /examples/js/interact-with-external-or-existing-browser/main.js: -------------------------------------------------------------------------------- 1 | const { wrap, configure } = require('agentql'); 2 | const { chromium } = require('playwright'); 3 | 4 | const WEBSOCKET_URL = 'ws://127.0.0.1:9222/devtools/browser/494ca26b-74bf-42c0-932d-76d74a954019'; 5 | 6 | const URL = 'https://scrapeme.live/shop'; 7 | 8 | const SEARCH_QUERY = ` 9 | { 10 | search_products_box 11 | } 12 | `; 13 | 14 | const STOCK_QUERY = ` 15 | { 16 | number_in_stock 17 | } 18 | `; 19 | 20 | // This function demonstrates how to open and interact with a new page your local browser. 21 | async function interactWithNewPageInLocalBrowser() { 22 | // Connect to the browser via Chrome DevTools Protocol. 23 | const browser = await chromium.connectOverCDP(WEBSOCKET_URL); 24 | 25 | // Create a new tab in the browser window and wrap it to get access to the AgentQL's querying API 26 | const page = await wrap(await browser.newPage()); 27 | 28 | await page.goto(URL); 29 | 30 | // Use query_elements() method to locate the search product box from the page 31 | const response = await page.queryElements(SEARCH_QUERY); 32 | await response.search_products_box.type('Charmander'); 33 | await page.keyboard.press('Enter'); 34 | 35 | await page.waitForTimeout(10000); 36 | 37 | // Use query_data() method to fetch the stock number from the page 38 | const stockResponse = await page.queryData(STOCK_QUERY); 39 | console.log(stockResponse); 40 | await browser.close(); 41 | } 42 | 43 | async function main() { 44 | // Set the AgentQL API key via the `configure` method. 45 | configure({ apiKey: process.env.AGENTQL_API_KEY }); 46 | await interactWithNewPageInLocalBrowser(); 47 | } 48 | 49 | main(); 50 | -------------------------------------------------------------------------------- /examples/js/list-query-usage/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Query a list of items 3 | description: How to query a list of items on the page with AgentQL. 4 | updated: 2025-03-05 5 | featured: true 6 | --- 7 | 8 | # Example script: querying a list of items with AgentQL 9 | 10 | This example demonstrates how to query a list of items on the page. 11 | 12 | ## Run the script 13 | 14 | - [Install AgentQL SDK](https://docs.agentql.com/javascript-sdk/installation) 15 | - Save this Javascript file locally as **main.js** 16 | - Run the following command from the project's folder: 17 | 18 | ```bash 19 | node main.js 20 | ``` 21 | 22 | ## Play with the query 23 | 24 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 25 | -------------------------------------------------------------------------------- /examples/js/list-query-usage/main.js: -------------------------------------------------------------------------------- 1 | const { wrap, configure } = require('agentql'); 2 | const { chromium } = require('playwright'); 3 | const path = require('path'); 4 | const fs = require('fs'); 5 | 6 | (async () => { 7 | // Configure the AgentQL API key 8 | configure({ 9 | apiKey: process.env.AGENTQL_API_KEY, // This is the default and can be omitted. 10 | }); 11 | 12 | const browser = await chromium.launch({ headless: false }); 13 | const page = await wrap(await browser.newPage()); // Wraps the Playwright Page to access AgentQL's features. 14 | 15 | await page.goto('https://scrapeme.live/shop/'); 16 | 17 | const QUERY = ` 18 | { 19 | products[] 20 | { 21 | product_name 22 | price(integer) 23 | } 24 | } 25 | `; 26 | 27 | const response = await page.queryData(QUERY); 28 | 29 | const scriptDir = path.dirname(__filename); 30 | const csvFilePath = path.join(scriptDir, 'products_data.csv'); 31 | let csvContent = 'Products Name, Price\n'; 32 | 33 | response.products.forEach((product) => { 34 | csvContent += `${product.product_name},${product.price}\n`; 35 | }); 36 | 37 | fs.writeFileSync(csvFilePath, csvContent, 'utf-8'); 38 | await browser.close(); 39 | })(); 40 | -------------------------------------------------------------------------------- /examples/js/list-query-usage/products_data.csv: -------------------------------------------------------------------------------- 1 | Products Name, Price 2 | Bulbasaur,63 3 | Ivysaur,87 4 | Venusaur,105 5 | Charmander,48 6 | Charmeleon,165 7 | Charizard,156 8 | Squirtle,130 9 | Wartortle,123 10 | Blastoise,76 11 | Caterpie,73 12 | Metapod,148 13 | Butterfree,162 14 | Weedle,25 15 | Kakuna,148 16 | Beedrill,168 17 | Pidgey,159 18 | -------------------------------------------------------------------------------- /examples/js/log-into-sites/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Login to sites with AgentQL 3 | description: Use AgentQL to interact with login forms and log into websites. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: login into websites 8 | 9 | This is an example shows how to login into websites by retrieving and interacting with web elements in AgentQL. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/javascript-sdk/installation) 14 | - Save this JavaScript file locally as **main.js** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | node main.js 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/js/log-into-sites/main.js: -------------------------------------------------------------------------------- 1 | const { chromium } = require('playwright'); 2 | const { wrap, configure } = require('agentql'); 3 | 4 | // Set the URL to the desired website 5 | const URL = 'https://practicetestautomation.com/practice-test-login/'; 6 | 7 | const LOGIN_QUERY = ` 8 | { 9 | username_field 10 | password_field 11 | submit_btn 12 | } 13 | `; 14 | 15 | async function main() { 16 | // Configure the AgentQL API key 17 | configure({ 18 | apiKey: process.env.AGENTQL_API_KEY, // This is the default and can be omitted. 19 | }); 20 | 21 | const browser = await chromium.launch({ headless: false }); 22 | 23 | // Wrap the page to get access to the AgentQL's querying API 24 | const page = await wrap(await browser.newPage()); 25 | 26 | // Navigate to the URL 27 | await page.goto(URL); 28 | 29 | // Get the username and password fields 30 | const response = await page.queryElements(LOGIN_QUERY); 31 | 32 | // Fill the username and password fields 33 | await response.username_field.fill('student'); 34 | await response.password_field.fill('Password123'); 35 | 36 | // Click the submit button 37 | await response.submit_btn.click(); 38 | 39 | // Wait for 10 seconds to see the browser action 40 | await page.waitForTimeout(10000); 41 | 42 | await browser.close(); 43 | } 44 | 45 | main(); 46 | -------------------------------------------------------------------------------- /examples/js/maps_scraper/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Query a list of items 3 | description: How to query a list of items on the page with AgentQL. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: querying a list of items with AgentQL 8 | 9 | This example demonstrates how to query a list of items on the page. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/javascript-sdk/installation) 14 | - Save this Javascript file locally as **main.js** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | node main.js 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/js/maps_scraper/main.js: -------------------------------------------------------------------------------- 1 | const { wrap, configure } = require('agentql'); 2 | const { chromium } = require('playwright'); 3 | const path = require('path'); 4 | const fs = require('fs'); 5 | 6 | (async () => { 7 | // Configure the AgentQL API key 8 | configure({ 9 | apiKey: process.env.AGENTQL_API_KEY, // This is the default and can be omitted. 10 | }); 11 | 12 | const browser = await chromium.launch({ headless: false }); 13 | const page = await wrap(await browser.newPage()); // Wraps the Playwright Page to access AgentQL's features. 14 | 15 | await page.goto( 16 | 'https://www.google.com/maps/search/boba+tea/@37.4400289,-122.1653309,14z/data=!3m1!4b1?entry=ttu&g_ep=EgoyMDI1MDIxMS4wIKXMDSoASAFQAw%3D%3D', 17 | ); 18 | 19 | const QUERY = ` 20 | { 21 | listings[] { 22 | name 23 | rating(in stars) 24 | description(if not available, use "n/a") 25 | order_link(if not available, use "n/a") 26 | take_out_link(if not available, use "n/a") 27 | address 28 | hours 29 | } 30 | } 31 | `; 32 | 33 | const response = await page.queryData(QUERY); 34 | 35 | const scriptDir = path.dirname(__filename); 36 | const csvFilePath = path.join(scriptDir, 'map_data.csv'); 37 | let csvContent = 'Name, Rating, Description, Order Link, Take Out Link, Address, Hours\n'; 38 | 39 | response.listings.forEach((listing) => { 40 | csvContent += `${listing.name},${listing.rating},${listing.description},${listing.order_link},${listing.take_out_link},${listing.address},${listing.hours}\n`; 41 | }); 42 | 43 | fs.writeFileSync(csvFilePath, csvContent, 'utf-8'); 44 | await browser.close(); 45 | })(); 46 | -------------------------------------------------------------------------------- /examples/js/maps_scraper/map_data.csv: -------------------------------------------------------------------------------- 1 | Name, Rating, Description, Order Link, Take Out Link, Address, Hours 2 | Bober Tea & Coffee,4.2,Bubble Tea,https://www.google.com/viewer/chooseprovider?mid=/g/11v07psthp&g2lbs=AAEPWCtsVkaYAN-1PgJQMeFPCTu551sAhgxUJQTd4dAGBcPUsXPOIP6cKWwKIwLrTlLBHuMBRAl2sqzZVQUy1gEeoVJlPNLQrg%3D%3D&hl=en-US&gl=us&fo_m=MfohQo559jFvMUOzJVpjPL1YMfZ3bInYwBDuMfaXTPp5KXh-&utm_source=tactile&gei=HO6uZ6fmEqWWkPIPtZLT8AY&ei=HO6uZ6fmEqWWkPIPtZLT8AY&fo_s=OA,SOE&opi=79508299&ebb=1&cs=0&foub=mcpp,n/a,121 Lytton Ave,Closed⋅ Opens 10 AM Fri 3 | Mr. Sun Tea Palo Alto,4,Bubble Tea,https://www.google.com/viewer/chooseprovider?mid=/g/11sy40sg1_&g2lbs=AAEPWCtB21WlzXZzcEBYLaXecuhD44IiK3JAFz-_BsmtrtDDhmJlrBP6TkrEQsvJNoAXQD0MCL3-SIW0Kviy1PXw8F8D28l3lA%3D%3D&hl=en-US&gl=us&fo_m=MfohQo559jFvMUOzJVpjPL1YMfZ3bInYwBDuMfaXTPp5KXh-&utm_source=tactile&gei=HO6uZ6fmEqWWkPIPtZLT8AY&ei=HO6uZ6fmEqWWkPIPtZLT8AY&fo_s=OA,SOE&opi=79508299&ebb=1&cs=0&foub=mcpp,n/a,436 University Ave,Closed⋅ Opens 12 PM Fri 4 | UMe Tea,4.1,Tea house,https://www.google.com/viewer/chooseprovider?mid=/g/11nxs3lr71&g2lbs=AAEPWCvtgwLVutg-PlDqQ-WJ25WRIeSMzNBk9x_yu761XQg4dlT_vmF-cTUsVNKzeYPsX92GY3yKSbwYcOQAev--d-cGGJb39w%3D%3D&hl=en-US&gl=us&fo_m=MfohQo559jFvMUOzJVpjPL1YMfZ3bInYwBDuMfaXTPp5KXh-&utm_source=tactile&gei=HO6uZ6fmEqWWkPIPtZLT8AY&ei=HO6uZ6fmEqWWkPIPtZLT8AY&fo_s=OA,SOE&opi=79508299&ebb=1&cs=0&foub=mcpp,n/a,421 California Ave,Closed⋅ Opens 12 PM Fri 5 | T4,4.1,Bubble TeaCute cafe for a variety of bubble teas,https://www.google.com/viewer/chooseprovider?mid=/g/11b6gjnl5s&g2lbs=AAEPWCsgOueGC_0jZW5q4MPXFUTHyGND2oRWnlpr3RAh8rWch6IMCq3hP2PwayU72DCpkkqxVRh6l2_ujWgleog6gAUec5zgig%3D%3D&hl=en-US&gl=us&fo_m=MfohQo559jFvMUOzJVpjPL1YMfZ3bInYwBDuMfaXTPp5KXh-&utm_source=tactile&gei=HO6uZ6fmEqWWkPIPtZLT8AY&ei=HO6uZ6fmEqWWkPIPtZLT8AY&fo_s=OA,SOE&opi=79508299&ebb=1&cs=0&foub=mcpp,n/a,165 University Ave #D,Closed⋅ Opens 11 AM Fri 6 | Wanpo Tea Shop,3.9,Bubble Tea,https://www.google.com/viewer/chooseprovider?mid=/g/11qpp_4svp&g2lbs=AAEPWCtym7ksRonbM3VhQf3K7aTsR_bI1rVITJEua4LXNfbw2qf1zwS2T7rqBZF_iYm0YCJfdsiSNRhE5xRx5uhc4rkqPMjo9Q%3D%3D&hl=en-US&gl=us&fo_m=MfohQo559jFvMUOzJVpjPL1YMfZ3bInYwBDuMfaXTPp5KXh-&utm_source=tactile&gei=HO6uZ6fmEqWWkPIPtZLT8AY&ei=HO6uZ6fmEqWWkPIPtZLT8AY&fo_s=OA,SOE&opi=79508299&ebb=1&cs=0&foub=mcpp,n/a,660 Stanford Shopping Center #721,Closed⋅ Opens 11 AM Fri 7 | Boba Guys Palo Alto,4.3,Bubble Tea,https://www.google.com/viewer/chooseprovider?mid=/g/11fj9cgszb&g2lbs=AAEPWCtK5bphPJ4GXtj53spuXL6EEu8RTtxeWFmsy-vf0FWuf-L3mRzW5PjSpRVecnJ_oZpzv426KuvKxgzBegNMjmU3gwEDIQ%3D%3D&hl=en-US&gl=us&fo_m=MfohQo559jFvMUOzJVpjPL1YMfZ3bInYwBDuMfaXTPp5KXh-&utm_source=tactile&gei=HO6uZ6fmEqWWkPIPtZLT8AY&ei=HO6uZ6fmEqWWkPIPtZLT8AY&fo_s=OA,SOE&opi=79508299&ebb=1&cs=0&foub=mcpp,n/a,855 El Camino Real #120,Closed⋅ Opens 10 AM Fri 8 | -------------------------------------------------------------------------------- /examples/js/news-aggregator/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Perform sentiment analysis 3 | description: Perform sentiment analysis on YouTube comments with AgentQL and OpenAI's GPT-3.5 model. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: perform sentiment analysis with AgentQL 8 | 9 | This is an example shows how to return news headlines from a selection of URLs and save the results to a CSV file. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/javascript-sdk/installation) 14 | - Save this JavaScript file locally as **main.js** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | node main.js 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/js/news-aggregator/main.js: -------------------------------------------------------------------------------- 1 | const { wrap, configure } = require('agentql'); 2 | const { chromium } = require('playwright'); 3 | const path = require('path'); 4 | const fs = require('fs'); 5 | 6 | // Define the query to interact with the page 7 | const query = ` 8 | { 9 | items(might be aritcles, posts, tweets)[] 10 | { 11 | published_date(convert to XX/XX/XXXX format) 12 | entry(title or post if no title is available) 13 | author(person's name; return "n/a" if not available) 14 | outlet(the original platform it is posted on; if no platform is listed, use the root domain of the url) 15 | url 16 | } 17 | } 18 | `; 19 | 20 | // Set URLs to the desired websites 21 | const websiteUrls = [ 22 | 'https://bsky.app/search?q=agents+for+the+web', 23 | 'https://dev.to/search?q=agents%20for%20the%20web&sort_by=published_at&sort_direction=desc', 24 | 'https://hn.algolia.com/?dateRange=last24h&page=0&prefix=false&query=agents%20for%20the%20web&sort=byDate&type=story', 25 | 'https://duckduckgo.com/?q=agents+for+the+web&t=h_&iar=news&ia=news', 26 | ]; 27 | 28 | // Get the directory of the current script and create path to the csv file 29 | const scriptDir = path.dirname(__filename); 30 | const csvFilePath = path.join(scriptDir, 'news_headlines.csv'); 31 | 32 | async function fetchData(context, sessionUrl) { 33 | const page = await wrap(await context.newPage()); 34 | await page.goto(sessionUrl); 35 | 36 | const data = await page.queryData(query); 37 | 38 | // Prepare new data 39 | const newLines = data.items.map((item) => { 40 | const cleanEntry = item.entry.replace(/\|/g, ''); 41 | return `${item.published_date} | ${cleanEntry} | ${item.url} | ${item.outlet} | ${item.author}\n`; 42 | }); 43 | 44 | // Handle file writing with proper header management 45 | if (!fs.existsSync(csvFilePath)) { 46 | // New file - write header and data 47 | fs.writeFileSync(csvFilePath, 'Posted | Entry | URL | Platform | Author\n', 'utf-8'); 48 | fs.appendFileSync(csvFilePath, newLines.join(''), 'utf-8'); 49 | } else { 50 | // File exists - append new data while preserving existing content 51 | fs.appendFileSync(csvFilePath, newLines.join(''), 'utf-8'); 52 | } 53 | 54 | console.log(`Fetched items from ${sessionUrl}...`); 55 | } 56 | 57 | (async () => { 58 | // Configure the AgentQL API key 59 | configure({ 60 | apiKey: process.env.AGENTQL_API_KEY, // This is the default and can be omitted 61 | }); 62 | 63 | const browser = await chromium.launch({ headless: true }); 64 | const context = await browser.newContext(); 65 | 66 | try { 67 | // Process all URLs concurrently 68 | await Promise.all(websiteUrls.map((url) => fetchData(context, url))); 69 | console.log(`All done! CSV is here: ${csvFilePath}...`); 70 | } finally { 71 | await browser.close(); 72 | } 73 | })(); 74 | -------------------------------------------------------------------------------- /examples/js/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "javascript-sdk", 3 | "version": "1.0.0", 4 | "description": "AgentQL Examples for JavaScript SDK", 5 | "directories": { 6 | "example": "examples" 7 | }, 8 | "scripts": { 9 | "lint": "eslint --config .eslintrc.js .", 10 | "format": "prettier --write ." 11 | }, 12 | "author": "", 13 | "license": "MIT", 14 | "devDependencies": { 15 | "@trivago/prettier-plugin-sort-imports": "^4.3.0", 16 | "eslint": "^8.57.0", 17 | "eslint-config-prettier": "^9.1.0", 18 | "prettier": "^2.8.7" 19 | }, 20 | "dependencies": { 21 | "agentql": "latest", 22 | "openai": "^4.70.1", 23 | "playwright": "^1.48.2", 24 | "playwright-dompath": "^0.0.7" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /examples/js/perform-sentiment-analysis/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Perform sentiment analysis 3 | description: Perform sentiment analysis on YouTube comments with AgentQL and OpenAI's GPT-3.5 model. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: perform sentiment analysis with AgentQL 8 | 9 | This example demonstrates how to perform sentiment analysis on YouTube comments with AgentQL and OpenAI's GPT-3.5 model. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/javascript-sdk/installation) 14 | - [Install OpenAI SDK](https://www.npmjs.com/package/openai) with the following command: 15 | 16 | ```bash 17 | npm install openai 18 | ``` 19 | 20 | - Save this JavaScript file locally as **main.js** 21 | - Set your OpenAI API key as an environment variable with the following command: 22 | 23 | ```bash 24 | export OPENAI_API_KEY="My API Key" 25 | ``` 26 | 27 | - Run the following command from the project's folder: 28 | 29 | ```bash 30 | node main.js 31 | ``` 32 | 33 | ## Play with the query 34 | 35 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 36 | -------------------------------------------------------------------------------- /examples/js/perform-sentiment-analysis/main.js: -------------------------------------------------------------------------------- 1 | /* This example demonstrates how to perform sentiment analysis on YouTube comments with AgentQL and OpenAI's GPT-3.5 model. */ 2 | 3 | const { wrap, configure } = require('agentql'); 4 | const { chromium } = require('playwright'); 5 | 6 | // Import the OpenAI API client. 7 | const { OpenAI } = require('openai/index.mjs'); 8 | 9 | // Define the URL of the page to scrape. 10 | const URL = 'https://www.youtube.com/watch?v=JfM1mr2bCuk'; 11 | 12 | // Define a query to interact with the page. 13 | const QUERY = ` 14 | { 15 | video_title 16 | video_channel 17 | comments[] { 18 | comment_text 19 | author 20 | } 21 | } 22 | `; 23 | 24 | async function getComments() { 25 | // Configure the AgentQL API key 26 | configure({ 27 | apiKey: process.env.AGENTQL_API_KEY, // This is the default and can be omitted. 28 | }); 29 | 30 | // Launch a headless browser using Playwright. 31 | const browser = await chromium.launch({ headless: false }); 32 | 33 | // Create a new page in the browser and wrap it to get access to the AgentQL's querying API 34 | const page = await wrap(await browser.newPage()); 35 | await page.goto(URL); 36 | 37 | for (let i = 0; i < 5; i++) { 38 | // Scroll down the page to load more comments. 39 | await page.waitForPageReadyState(); 40 | 41 | // Scroll down the page to load more comments 42 | await page.keyboard.press('PageDown'); 43 | } 44 | 45 | // Use queryData() method to fetch the video information from the page. 46 | const response = await page.queryData(QUERY); 47 | 48 | // Close the browser 49 | await browser.close(); 50 | 51 | return response; 52 | } 53 | 54 | async function performSentimentAnalysis(comments) { 55 | // User message construction 56 | let USER_MESSAGE = 57 | 'These are the comments on the video. I am trying to understand the sentiment of the comments.'; 58 | 59 | // Append each comment's text to USER_MESSAGE 60 | comments.comments.forEach((comment) => { 61 | USER_MESSAGE += comment.comment_text; 62 | }); 63 | 64 | // Define the system message 65 | const SYSTEM_MESSAGE = `You are an expert in understanding social media analytics and specialize in analyzing the sentiment of comments. 66 | Please find the comments on the video as follows: 67 | `; 68 | 69 | // Append request for a summary and takeaways 70 | USER_MESSAGE += 71 | ' Could you please provide a summary of the comments on the video. Additionally, just give only 3 takeaways which would be important for me as the creator of the video.'; 72 | 73 | // Initialize OpenAI client 74 | const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY }); 75 | 76 | try { 77 | const response = await client.chat.completions.create({ 78 | model: 'gpt-3.5-turbo', 79 | messages: [ 80 | { role: 'system', content: SYSTEM_MESSAGE }, 81 | { role: 'user', content: USER_MESSAGE }, 82 | ], 83 | }); 84 | 85 | // Return the content of the first completion choice 86 | return response.choices[0].message.content; 87 | } catch (error) { 88 | console.error('Error during API call:', error); 89 | throw error; 90 | } 91 | } 92 | 93 | async function main() { 94 | const comments = await getComments(); 95 | const summary = await performSentimentAnalysis(comments); 96 | console.log(summary); 97 | } 98 | 99 | main(); 100 | -------------------------------------------------------------------------------- /examples/js/run-script-in-headless-browser/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Run a script in headless browser 3 | description: Run the script in headless browser with AgentQL. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: run the script in headless browser with AgentQL 8 | 9 | This example demonstrates how to run the script in headless browser. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/javascript-sdk/installation) 14 | - Save this JavaScript file locally as **main.js** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | node main.js 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/js/run-script-in-headless-browser/main.js: -------------------------------------------------------------------------------- 1 | /* This example demonstrates how to run the script in a headless browser. */ 2 | 3 | const { wrap, configure } = require('agentql'); 4 | const { chromium } = require('playwright'); 5 | 6 | // Define the URL of the page to scrape. 7 | const URL = 'https://scrapeme.live/shop/'; 8 | 9 | // Define the queries to locate the search box and fetch the stock number. 10 | const SEARCH_QUERY = ` 11 | { 12 | search_products_box 13 | } 14 | `; 15 | 16 | const STOCK_NUMBER_QUERY = ` 17 | { 18 | number_in_stock 19 | } 20 | `; 21 | 22 | (async () => { 23 | // Configure the AgentQL API key 24 | configure({ 25 | apiKey: process.env.AGENTQL_API_KEY, // This is the default and can be omitted. 26 | }); 27 | 28 | // Launch a headless browser using Playwright. 29 | const browser = await chromium.launch({ headless: false }); 30 | // Create a new page in the browser and wrap it to get access to the AgentQL's querying API 31 | const page = await wrap(await browser.newPage()); 32 | await page.goto(URL); 33 | 34 | // Use queryElements() method to locate the search box from the page. 35 | const searchResponse = await page.queryElements(SEARCH_QUERY); 36 | 37 | // Use Playwright's API to fill the search box and press Enter. 38 | await searchResponse.search_products_box.fill('Charmander'); 39 | await page.keyboard.press('Enter'); 40 | 41 | // Use queryData() method to fetch the stock number from the page. 42 | const stockResponse = await page.queryData(STOCK_NUMBER_QUERY); 43 | console.log(stockResponse); 44 | 45 | await browser.close(); 46 | })(); 47 | -------------------------------------------------------------------------------- /examples/js/save-and-load-authenticated-session/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Save and load authenticated state 3 | description: Save and load an authenticated state (i.e. signed-in state) with AgentQL. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: save and load authenticated state with AgentQL 8 | 9 | This example demonstrates how to save and load a authenticated state (i.e. signed-in state) using AgentQL. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/installation/sdk-installation) 14 | - Save this JavaScript file locally as **main.js** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | node main.js 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/js/save-and-load-authenticated-session/main.js: -------------------------------------------------------------------------------- 1 | const { wrap, configure } = require('agentql'); 2 | const { chromium } = require('playwright'); 3 | 4 | (async () => { 5 | // Configure the AgentQL API key 6 | configure({ 7 | apiKey: process.env.AGENTQL_API_KEY, // This is the default and can be omitted. 8 | }); 9 | 10 | const URL = 'https://www.yelp.com/'; 11 | const EMAIL = ''; 12 | const PASSWORD = ''; 13 | 14 | const LOG_IN_QUERY = ` 15 | { 16 | log_in_btn 17 | } 18 | `; 19 | 20 | const CREDENTIALS_QUERY = ` 21 | { 22 | sign_in_form { 23 | email_input 24 | password_input 25 | log_in_btn 26 | } 27 | } 28 | `; 29 | 30 | async function save_signed_in_state() { 31 | const browser = await chromium.launch({ headless: false }); 32 | // Create a new page in the browser and wrap it to get access to the AgentQL's querying API 33 | const page = await wrap(await browser.newPage()); 34 | await page.goto(URL); 35 | 36 | // Use query_elements() method to locate "Log In" button on the page 37 | const response = await page.queryElements(LOG_IN_QUERY); 38 | // Use Playwright's API to click located button 39 | await response.log_in_btn.click(); 40 | 41 | // Use query_elements() method to locate email, password input fields, and "Log In" button in sign-in form 42 | const response_credentials = await page.queryElements(CREDENTIALS_QUERY); 43 | 44 | // Fill the email and password input fields 45 | await response_credentials.sign_in_form.email_input.fill(EMAIL); 46 | await response_credentials.sign_in_form.password_input.fill(PASSWORD); 47 | await response_credentials.sign_in_form.log_in_btn.click(); 48 | 49 | await page.waitForPageReadyState(); 50 | 51 | // Save the signed-in state 52 | await page.context().storageState({ path: 'yelp_login.json' }); 53 | await browser.close(); 54 | } 55 | 56 | async function load_signed_in_state() { 57 | const browser = await chromium.launch({ headless: false }); 58 | // Load the saved signed-in session by creating a new page with the saved signed-in state 59 | const page = wrap(await browser.newPage({ storageState: 'yelp_login.json' })); 60 | 61 | await page.goto(URL); 62 | 63 | await page.waitForPageReadyState(); 64 | // Wait for 10 seconds to see the signed-in page 65 | await page.waitForTimeout(10000); 66 | 67 | await browser.close(); 68 | } 69 | 70 | await save_signed_in_state(); 71 | await load_signed_in_state(); 72 | })(); 73 | -------------------------------------------------------------------------------- /examples/js/stealth-mode/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Run script in a stealth mode 3 | description: How to apply different techniques to lower the risk of being detected by an anti-bot system. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Stealth mode: Running AgentQL in stealth mode and avoiding bot detection 8 | 9 | This example demonstrates how to lower the risk of being detected by a anti-bot system by running AgentQL in stealth mode. 10 | 11 | There are several techniques we use in this script to avoid detection: 12 | 13 | - randomize various HTTP headers browser sends to the server. This includes `User-Agent`, `Accept-Language`, `Referer`, etc. This helps with consecutive requests looking more like they are coming from different users. 14 | - randomize browser window size. This is important because some websites track the window size and if it's the same for all requests, it's a sign of a bot. 15 | - randomize timezone and geolocation. This is important because some websites track the timezone and geolocation and if it's the same for all requests, it's a sign of a bot. 16 | - (Optional) use a proxy server. You would need to get a Proxy configuration (host, username, password) separately from an external proxy provider (e.g. [NetNut](https://netnut.io), [BrightData](https://brightdata.com/) or similar) 17 | 18 | ## Run the script 19 | 20 | - [Install AgentQL SDK](https://docs.agentql.com/javascript-sdk/installation) 21 | - If you already have SDK installed, make sure to update to the latest version: `npm install agentql --upgrade` 22 | - Save this JavaScript file locally as **main.js** 23 | - Run the following command from the project's folder: 24 | 25 | ```bash 26 | node main.js 27 | ``` 28 | -------------------------------------------------------------------------------- /examples/js/stealth-mode/main.js: -------------------------------------------------------------------------------- 1 | const { randomInt } = require('crypto'); 2 | const { wrap, configure } = require('agentql'); 3 | const { chromium } = require('playwright'); 4 | 5 | const BROWSER_IGNORED_ARGS = ['--enable-automation', '--disable-extensions']; 6 | const BROWSER_ARGS = [ 7 | '--disable-xss-auditor', 8 | '--no-sandbox', 9 | '--disable-setuid-sandbox', 10 | '--disable-blink-features=AutomationControlled', 11 | '--disable-features=IsolateOrigins,site-per-process', 12 | '--disable-infobars', 13 | ]; 14 | 15 | const USER_AGENTS = [ 16 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36', 17 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Safari/605.1.15', 18 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:130.0) Gecko/20100101 Firefox/130.0', 19 | ]; 20 | 21 | const LOCATIONS = [ 22 | ['America/New_York', { longitude: -74.006, latitude: 40.7128 }], // New York, NY 23 | ['America/Chicago', { longitude: -87.6298, latitude: 41.8781 }], // Chicago, IL 24 | ['America/Los_Angeles', { longitude: -118.2437, latitude: 34.0522 }], // Los Angeles, CA 25 | ['America/Denver', { longitude: -104.9903, latitude: 39.7392 }], // Denver, CO 26 | ['America/Phoenix', { longitude: -112.074, latitude: 33.4484 }], // Phoenix, AZ 27 | ['America/Anchorage', { longitude: -149.9003, latitude: 61.2181 }], // Anchorage, AK 28 | ['America/Detroit', { longitude: -83.0458, latitude: 42.3314 }], // Detroit, MI 29 | ['America/Indianapolis', { longitude: -86.1581, latitude: 39.7684 }], // Indianapolis, IN 30 | ['America/Boise', { longitude: -116.2023, latitude: 43.615 }], // Boise, ID 31 | ['America/Juneau', { longitude: -134.4197, latitude: 58.3019 }], // Juneau, AK 32 | ]; 33 | 34 | const REFERERS = ['https://www.google.com', 'https://www.bing.com', 'https://duckduckgo.com']; 35 | 36 | const ACCEPT_LANGUAGES = ['en-US,en;q=0.9', 'en-GB,en;q=0.9', 'fr-FR,fr;q=0.9']; 37 | 38 | const PROXIES = [ 39 | // TODO: replace with your own proxies 40 | // { 41 | // server: 'http://ip_server:port', 42 | // username: 'proxy_username', 43 | // password: 'proxy_password', 44 | // }, 45 | ]; 46 | 47 | async function main() { 48 | const userAgent = USER_AGENTS[Math.floor(Math.random() * USER_AGENTS.length)]; 49 | const headerDNT = Math.random() > 0.5 ? '0' : '1'; 50 | const location = LOCATIONS[Math.floor(Math.random() * LOCATIONS.length)]; 51 | const referer = REFERERS[Math.floor(Math.random() * REFERERS.length)]; 52 | const acceptLanguage = ACCEPT_LANGUAGES[Math.floor(Math.random() * ACCEPT_LANGUAGES.length)]; 53 | const proxy = PROXIES.length > 0 ? PROXIES[Math.floor(Math.random() * PROXIES.length)] : null; 54 | 55 | const browser = await chromium.launch({ 56 | headless: false, 57 | args: BROWSER_ARGS, 58 | ignoreDefaultArgs: BROWSER_IGNORED_ARGS, 59 | }); 60 | 61 | const context = await browser.newContext({ 62 | proxy: proxy ?? undefined, 63 | locale: 'en-US,en,ru', 64 | timezoneId: location[0], 65 | extraHTTPHeaders: { 66 | 'Accept-Language': acceptLanguage, 67 | Referer: referer, 68 | DNT: headerDNT, 69 | Connection: 'keep-alive', 70 | 'Accept-Encoding': 'gzip, deflate, br', 71 | }, 72 | geolocation: location[1], 73 | userAgent: userAgent, 74 | permissions: ['notifications'], 75 | viewport: { 76 | width: 1920 + randomInt(-50, 50), 77 | height: 1080 + randomInt(-50, 50), 78 | }, 79 | }); 80 | 81 | // Configure the AgentQL API key 82 | configure({ 83 | apiKey: process.env.AGENTQL_API_KEY, // This is the default and can be omitted. 84 | }); 85 | 86 | const page = await wrap(await context.newPage()); 87 | await page.goto('https://bot.sannysoft.com/', { referer }); 88 | await page.waitForTimeout(30000); 89 | 90 | await browser.close(); 91 | } 92 | 93 | main(); 94 | -------------------------------------------------------------------------------- /examples/js/submit-form/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Submit form 3 | description: Submit form with AgentQL. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: Automating form submission using AgentQL 8 | 9 | This is an example of automating form submission on a website using AgentQL. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/javascript-sdk/installation) 14 | - Save this JavaScript file locally as **main.js** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | node main.js 19 | ``` 20 | 21 | ## Code explanation 22 | 23 | This script demonstrates how to: 24 | 25 | 1. Navigate to a form page 26 | 2. Fill out form fields using AgentQL queries 27 | 3. Submit the form and handle the confirmation 28 | 4. Wait for the submission to complete 29 | 30 | ## Play with the query 31 | 32 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 33 | -------------------------------------------------------------------------------- /examples/js/submit-form/main.js: -------------------------------------------------------------------------------- 1 | const { wrap, configure } = require('agentql'); 2 | const { chromium } = require('playwright'); 3 | 4 | (async () => { 5 | // Configure the AgentQL API key 6 | configure({ 7 | apiKey: process.env.AGENTQL_API_KEY, // This is the default and can be omitted. 8 | }); 9 | 10 | const browser = await chromium.launch({ headless: false }); 11 | const page = await wrap(await browser.newPage()); 12 | 13 | const URL = 'https://formsmarts.com/html-form-example'; 14 | await page.goto(URL); 15 | 16 | const form_query = ` 17 | { 18 | first_name 19 | last_name 20 | email 21 | subject_of_inquiry 22 | inquiry_text_box 23 | submit_btn 24 | } 25 | `; 26 | 27 | const response = await page.queryElements(form_query); 28 | 29 | await response.first_name.type('John'); 30 | await response.last_name.type('Doe'); 31 | await response.email.type('john.doe@example.com'); 32 | await response.subject_of_inquiry.selectOption({ label: 'Sales Inquiry' }); 33 | await response.inquiry_text_box.fill('I want to learn more about AgentQL'); 34 | await response.submit_btn.click(); 35 | 36 | const confirm_query = ` 37 | { 38 | confirmation_btn 39 | } 40 | `; 41 | const confirm_response = await page.queryElements(confirm_query); 42 | await confirm_response.confirmation_btn.click(); 43 | await page.waitForTimeout(3000); 44 | console.log('Form submitted successfully!'); 45 | 46 | await browser.close(); 47 | })(); 48 | -------------------------------------------------------------------------------- /examples/js/wait-for-entire-page-load/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Wait for page to load 3 | description: Wait for the page to load completely before querying the page with AgentQL. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: wait for page to load with AgentQL 8 | 9 | This example demonstrates how to wait for the page to load completely before querying the page. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/javascript-sdk/installation) 14 | - Save this JavaScript file locally as **main.js** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | node main.js 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/js/wait-for-entire-page-load/main.js: -------------------------------------------------------------------------------- 1 | /* This example demonstrates how to wait for the page to load completely before querying the page. */ 2 | 3 | const { wrap, configure } = require('agentql'); 4 | const { chromium } = require('playwright'); 5 | 6 | // Duckduckgo URL to demonstrate the example for loading more videos on the page 7 | const URL = 8 | 'https://duckduckgo.com/?q=machine+learning+lectures+mit&t=h_&iar=videos&iax=videos&ia=videos'; 9 | 10 | // Define the query to 11 | const QUERY = ` 12 | { 13 | videos(first 10 videos)[] { 14 | video_title 15 | length 16 | views 17 | } 18 | } 19 | `; 20 | 21 | async function main() { 22 | // Configure the AgentQL API key 23 | configure({ 24 | apiKey: process.env.AGENTQL_API_KEY, // This is the default and can be omitted. 25 | }); 26 | 27 | // Launch a headless browser using Playwright. 28 | const browser = await chromium.launch({ headless: false }); 29 | 30 | // Create a new page in the browser and wrap it to get access to the AgentQL's querying API 31 | const page = await wrap(await browser.newPage()); 32 | await page.goto(URL); 33 | 34 | for (let i = 0; i < 2; i++) { 35 | // Wait for the page to load completely. 36 | await page.waitForPageReadyState(); 37 | 38 | // Scroll to the bottom of the page to load more videos. 39 | await page.keyboard.press('End'); 40 | } 41 | 42 | // Use query_data() method to fetch video lists data from the page. 43 | const response = await page.queryData(QUERY); 44 | 45 | // Print the first video details. 46 | console.log(response['videos'][0]); 47 | 48 | // Close the browser. 49 | await browser.close(); 50 | } 51 | 52 | main(); 53 | -------------------------------------------------------------------------------- /examples/js/xpath/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Get an element's XPath 3 | description: Get XPath of an element that was fetched with AgentQL. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: get XPath of elements fetched with AgentQL 8 | 9 | This example demonstrates how to get XPath of an element that was fetched with AgentQL. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/javascript-sdk/installation) 14 | - [Install Playwright Dompath](https://www.npmjs.com/package/playwright-dompath) with the following command: 15 | 16 | ```bash 17 | npm install playwright-dompath 18 | ``` 19 | 20 | - Save this JavaScript file locally as **main.js** 21 | - Run the following command from the project's folder: 22 | 23 | ```bash 24 | node main.js 25 | ``` 26 | 27 | ## Play with the query 28 | 29 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 30 | -------------------------------------------------------------------------------- /examples/js/xpath/main.js: -------------------------------------------------------------------------------- 1 | /* This example demonstrates how to get XPath of an element that was fetched with AgentQL */ 2 | 3 | const { wrap, configure } = require('agentql'); 4 | const { chromium } = require('playwright'); 5 | 6 | // Import the xPath function from the playwright-dompath package. 7 | const { xPath } = require('playwright-dompath/dist/DOMPath'); 8 | 9 | // Define the URL of the page to scrape. 10 | const URL = 'https://scrapeme.live/shop/'; 11 | 12 | // Define the query to locate the search box. 13 | const QUERY = ` 14 | { 15 | search_products_box 16 | } 17 | `; 18 | 19 | async function main() { 20 | // Configure the AgentQL API key 21 | configure({ 22 | apiKey: process.env.AGENTQL_API_KEY, // This is the default and can be omitted. 23 | }); 24 | 25 | // Launch a headless browser using Playwright. 26 | const browser = await chromium.launch({ headless: false }); 27 | 28 | // Create a new page in the browser and wrap it to get access to the AgentQL's querying API 29 | const page = await wrap(await browser.newPage()); 30 | await page.goto(URL); 31 | 32 | // Use queryElements() method to locate the search box from the page. 33 | const response = await page.queryElements(QUERY); 34 | 35 | // Get the XPath of the search box element. 36 | console.log('XPath:', await xPath(response.search_products_box)); 37 | 38 | // Close the browser. 39 | await browser.close(); 40 | } 41 | 42 | main(); 43 | -------------------------------------------------------------------------------- /examples/python/close_cookie_dialog/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Close a cookie dialog 3 | description: Close a site's dialog for rejecting/accepting cookies with AgentQL. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: close a cookie dialog with AgentQL 8 | 9 | This example demonstrates how to use AgentQL close a site's dialog for rejecting/accepting cookies. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/installation/sdk-installation) 14 | - Save this python file locally as **main.py** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | python3 main.py 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/python/close_cookie_dialog/main.py: -------------------------------------------------------------------------------- 1 | """This example demonstrates how to close popup windows (like promotion form) with AgentQL.""" 2 | 3 | import agentql 4 | from playwright.sync_api import sync_playwright 5 | 6 | # Set the URL to the desired website 7 | URL = "https://gov.uk/" 8 | 9 | QUERY = """ 10 | { 11 | cookies_form { 12 | reject_btn 13 | } 14 | } 15 | """ 16 | 17 | 18 | def main(): 19 | with sync_playwright() as playwright, playwright.chromium.launch(headless=False) as browser: 20 | # Create a new page in the browser and wrap it to get access to the AgentQL's querying API 21 | page = agentql.wrap(browser.new_page()) 22 | 23 | page.goto(URL) 24 | 25 | # Use query_elements() method to fetch the cookies dialog button from the page 26 | response = page.query_elements(QUERY) 27 | 28 | # Check if there is a cookie-rejection button on the page 29 | if response.cookies_form.reject_btn != None: 30 | # If so, click the close button to reject cookies 31 | response.cookies_form.reject_btn.click() 32 | 33 | # Wait for 10 seconds to see the browser in action 34 | page.wait_for_timeout(10000) 35 | 36 | 37 | if __name__ == "__main__": 38 | main() 39 | -------------------------------------------------------------------------------- /examples/python/close_popup/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Closing popup windows 3 | description: Close popup windows (like promotion forms) with AgentQL. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: closing popup windows with AgentQL 8 | 9 | This example demonstrates how to close popup windows (like promotion form) with AgentQL. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/installation/sdk-installation) 14 | - Save this python file locally as **main.py** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | python3 main.py 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/python/close_popup/main.py: -------------------------------------------------------------------------------- 1 | """This example demonstrates how to close popup windows (like promotion form) with AgentQL.""" 2 | 3 | import agentql 4 | from playwright.sync_api import sync_playwright 5 | 6 | # Set the URL to the desired website 7 | URL = "https://kinfield.com/" 8 | 9 | QUERY = """ 10 | { 11 | popup_form { 12 | close_btn 13 | } 14 | } 15 | """ 16 | 17 | 18 | def main(): 19 | with sync_playwright() as playwright, playwright.chromium.launch(headless=False) as browser: 20 | # Create a new page in the browser and wrap it to get access to the AgentQL's querying API 21 | page = agentql.wrap(browser.new_page()) 22 | 23 | page.goto(URL) 24 | 25 | # Use query_elements() method to fetch the close popup button from the page 26 | response = page.query_elements(QUERY) 27 | 28 | # Click the close button to close the popup 29 | response.popup_form.close_btn.click() 30 | # Wait for 10 seconds to see the browser in action 31 | page.wait_for_timeout(10000) 32 | 33 | 34 | if __name__ == "__main__": 35 | main() 36 | -------------------------------------------------------------------------------- /examples/python/collect_ecommerce_pricing_data/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Collecting pricing data 3 | description: Collect pricing data from e-commerce websites using AgentQL. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: collecting pricing data with AgentQL from e-commerce websites 8 | 9 | This is an example of how to collect pricing data from e-commerce website using AgentQL. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/installation/sdk-installation) 14 | - Save this python file locally as **main.py** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | python3 main.py 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/python/collect_ecommerce_pricing_data/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """This is an example of how to collect pricing data from e-commerce website using AgentQL.""" 4 | 5 | import asyncio 6 | 7 | import agentql 8 | from agentql.ext.playwright.async_api import Page 9 | from playwright.async_api import async_playwright 10 | 11 | # URL of the e-commerce website 12 | # You can replace it with any other e-commerce website but the queries should be updated accordingly 13 | URL = "https://www.bestbuy.com" 14 | 15 | 16 | async def _do_extract_pricing_data(page: Page) -> list: 17 | """Extract pricing data from the current page. 18 | 19 | Args: 20 | page (Page): The Playwright page object to interact with the browser. 21 | 22 | Returns: 23 | list: The pricing data extracted from the page. 24 | """ 25 | # The query of the data to be extracted 26 | query = """ 27 | { 28 | products[] { 29 | name 30 | model 31 | sku 32 | price(integer) 33 | } 34 | }""" 35 | pricing_data = await page.query_data(query) 36 | 37 | return pricing_data.get("products", []) 38 | 39 | 40 | async def _search_product( 41 | page: Page, 42 | product: str, 43 | min_price: int, 44 | max_price: int, 45 | ) -> bool: 46 | """Search for a product with a price range. 47 | 48 | Args: 49 | page (Page): The Playwright page object to interact with the browser. 50 | product (str): The product name to search for. 51 | min_price (int): The minimum price of the product. 52 | max_price (int): The maximum price of the product. 53 | 54 | Returns: 55 | bool: True if the search is successful, False otherwise. 56 | """ 57 | 58 | # Search for a product 59 | search_input = await page.get_by_prompt("the search input field") 60 | if not search_input: 61 | print("Search input field not found.") 62 | return False 63 | await search_input.type(product, delay=200) 64 | await search_input.press("Enter") 65 | 66 | # Define price range 67 | min_price_input = await page.get_by_prompt("the min price input field") 68 | if not min_price_input: 69 | print("Min price input field not found.") 70 | return False 71 | await min_price_input.fill(str(min_price)) 72 | 73 | max_price_input = await page.get_by_prompt("the max price input field") 74 | if not max_price_input: 75 | print("Max price input field not found.") 76 | return False 77 | await max_price_input.fill(str(max_price)) 78 | await max_price_input.press("Enter") 79 | return True 80 | 81 | 82 | async def _go_to_the_next_page(page: Page) -> bool: 83 | """Navigate to the next page of the search results. 84 | 85 | Args: 86 | page (Page): The Playwright page object to interact with the browser. 87 | 88 | Returns: 89 | bool: True if the next page is navigated successfully, False if no more next page. 90 | """ 91 | # Find the next page button using smart locator 92 | next_page_query = """ 93 | { 94 | pagination { 95 | prev_page_url 96 | next_page_url 97 | } 98 | }""" 99 | print("Navigating to the next page...") 100 | pagination = await page.query_data(next_page_query) 101 | next_page_url = pagination.get("pagination", {}).get("next_page_url") 102 | if not next_page_url: 103 | return False 104 | try: 105 | if not next_page_url.startswith("http"): 106 | next_page_url = URL + next_page_url # Make it a full URL 107 | await page.goto(next_page_url) 108 | return True 109 | except Exception: 110 | pass 111 | 112 | return False 113 | 114 | 115 | async def extract_pricing_data( 116 | page: Page, 117 | product: str, 118 | min_price: int, 119 | max_price: int, 120 | max_pages: int = 3, 121 | ) -> list: 122 | """Extract pricing data for a product within a price range.""" 123 | # Search for the product with the specified price range 124 | print(f"Searching for product: {product} with price range: ${min_price} - ${max_price}") 125 | if await _search_product(page, product, min_price, max_price) is False: 126 | print("Failed to search for the product.") 127 | return [] 128 | 129 | current_page = 1 130 | pricing_data = [] 131 | while current_page <= max_pages: 132 | # Extract pricing data from the current page 133 | print(f"Extracting pricing data on page {current_page}...") 134 | pricing_data_on_page = await _do_extract_pricing_data(page) 135 | print(f"{len(pricing_data_on_page)} products found") 136 | 137 | pricing_data.extend(pricing_data_on_page) 138 | 139 | # Navigate to the next page 140 | if not await _go_to_the_next_page(page): 141 | print("No more next page.") 142 | break 143 | 144 | current_page += 1 145 | 146 | return pricing_data 147 | 148 | 149 | async def main(): 150 | """Main function.""" 151 | async with async_playwright() as playwright, await playwright.chromium.launch( 152 | headless=False 153 | ) as browser: 154 | # Create a new page in the browser and wrap it to get access to the AgentQL's querying API 155 | page = await agentql.wrap_async(browser.new_page()) 156 | await page.goto(URL) # open the target URL 157 | 158 | pricing_data = await extract_pricing_data( 159 | page, 160 | product="gpu", 161 | min_price=500, 162 | max_price=800, 163 | ) 164 | 165 | print(pricing_data) 166 | 167 | 168 | if __name__ == "__main__": 169 | asyncio.run(main()) 170 | -------------------------------------------------------------------------------- /examples/python/collect_paginated_ecommerce_listing_data/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Collect paginated ecommerce data 3 | description: Collect ecommerce data across multiple paginated webpages. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: Collect ecommerce data across multiple paginated webpages 8 | 9 | This example demonstrates how to collect ecommerce data by stepping through multiple paginated webpages. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/installation/sdk-installation) 14 | - Save this python file locally as **main.py** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | python3 main.py 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/python/collect_paginated_ecommerce_listing_data/main.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | 4 | from playwright.sync_api import sync_playwright 5 | 6 | import agentql 7 | 8 | logging.basicConfig(level=logging.DEBUG) 9 | log = logging.getLogger(__name__) 10 | 11 | if __name__ == "__main__": 12 | with sync_playwright() as playwright, playwright.chromium.launch(headless=False) as browser: 13 | page = agentql.wrap(browser.new_page()) 14 | page.goto("https://books.toscrape.com/") 15 | 16 | # define the query to extract product names, prices, and ratings 17 | QUERY = """ 18 | { 19 | books[] { 20 | name 21 | price 22 | rating 23 | } 24 | } 25 | """ 26 | 27 | books = [] 28 | 29 | # Aggregate the first 50 book names, prices and ratings 30 | while len(books) < 50: 31 | # collect data from the current page 32 | response = page.query_data(QUERY) 33 | 34 | # limit the total number of books to 50 35 | if len(response["books"]) + len(books) > 50: 36 | books.extend(response["books"][:50 - len(books)]) 37 | else: 38 | books.extend(response["books"]) 39 | 40 | # get the pagination info from the current page 41 | pagination_info = page.get_pagination_info() 42 | 43 | # attempt to navigate to next page 44 | if pagination_info.has_next_page: 45 | pagination_info.navigate_to_next_page() 46 | 47 | with open(f"./books.json", "w") as f: 48 | json.dump(books, f, indent=4) 49 | -------------------------------------------------------------------------------- /examples/python/collect_paginated_news_headlines/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Collect paginated news headlines 3 | description: Collect news headlines across multiple paginated webpages. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: Collect news headlines across multiple paginated webpages 8 | 9 | This example demonstrates how to collect HackerNews headlines across multiple paginated webpages by specifying query and number of pages to collect. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/installation/sdk-installation) 14 | - Save this python file locally as **main.py** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | python3 main.py 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/python/collect_paginated_news_headlines/main.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | 4 | from playwright.sync_api import sync_playwright 5 | 6 | import agentql 7 | 8 | # import paginate tool from agentql tools 9 | from agentql.tools.sync_api import paginate 10 | 11 | logging.basicConfig(level=logging.DEBUG) 12 | log = logging.getLogger(__name__) 13 | 14 | 15 | if __name__ == "__main__": 16 | with sync_playwright() as playwright, playwright.chromium.launch(headless=False) as browser: 17 | page = agentql.wrap(browser.new_page()) 18 | page.goto("https://news.ycombinator.com/") 19 | 20 | # define the query to extract post titles 21 | QUERY = """ 22 | { 23 | posts[] { 24 | title 25 | } 26 | } 27 | """ 28 | # collect all data over the next 3 pages with the query defined above 29 | paginated_data = paginate(page, QUERY, 3) 30 | 31 | # save the aggregateddata to a json file 32 | with open("./hackernews_paginated_data.json", "w") as f: 33 | json.dump(paginated_data, f, indent=4) 34 | log.debug("Paginated data has been saved to hackernews_paginated_data.json") 35 | -------------------------------------------------------------------------------- /examples/python/compare_product_prices/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Compare product prices 3 | description: Compare product prices across websites with AgentQL. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: comparing product price across websites with AgentQL 8 | 9 | This example demonstrates how to compare product prices across websites with query_data() method. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/installation/sdk-installation) 14 | - Save this python file locally as **main.py** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | python3 main.py 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/python/compare_product_prices/async_main.py: -------------------------------------------------------------------------------- 1 | """This example demonstrates how to asynchronously fetch product prices across websites in parallel with query_data() method.""" 2 | 3 | import asyncio 4 | 5 | import agentql 6 | from playwright.async_api import BrowserContext, async_playwright 7 | 8 | # Set the URL to the desired website 9 | BESTBUY_URL = "https://www.bestbuy.com" 10 | EBAY_URL = "https://www.ebay.com" 11 | TELQUEST_URL = "https://www.telquestintl.com" 12 | 13 | # Define the queries to interact with the page 14 | HOME_PAGE_QUERY = """ 15 | { 16 | search_input 17 | search_button 18 | } 19 | """ 20 | 21 | PRODUCT_INFO_QUERY = """ 22 | { 23 | product_price (for Nintendo Switch - OLed Model - w/ White Joy-Con) 24 | } 25 | """ 26 | 27 | 28 | async def fetch_price(context: BrowserContext, session_url): 29 | """Open the given URL in a new tab and fetch the price of the product.""" 30 | # Create a page in a new tab in the broswer context and wrap it to get access to the AgentQL's querying API 31 | page = await agentql.wrap_async(context.new_page()) 32 | await page.goto(session_url) 33 | 34 | # Search for the product 35 | await page.wait_for_page_ready_state() 36 | home_response = await page.query_elements(HOME_PAGE_QUERY) 37 | await home_response.search_input.fill("Nintendo Switch - OLED Model White") 38 | await home_response.search_button.click() 39 | 40 | # Fetch the price data from the page 41 | data = await page.query_data(PRODUCT_INFO_QUERY) 42 | return data["product_price"] 43 | 44 | 45 | async def get_price_across_websites(): 46 | """Fetch prices concurrently in the same browser session from multiple websites.""" 47 | async with async_playwright() as playwright, await playwright.chromium.launch( 48 | headless=False 49 | ) as browser, await browser.new_context() as context: 50 | # Open multiple tabs in the same browser context to fetch prices concurrently 51 | (bestbuy_price, ebay_price, telquest_price) = await asyncio.gather( 52 | fetch_price(context, BESTBUY_URL), 53 | fetch_price(context, EBAY_URL), 54 | fetch_price(context, TELQUEST_URL), 55 | ) 56 | 57 | print( 58 | f""" 59 | Price at BestBuy: {bestbuy_price} 60 | Price at Target: {ebay_price} 61 | Price at Telquest: {telquest_price} 62 | """ 63 | ) 64 | 65 | 66 | if __name__ == "__main__": 67 | asyncio.run(get_price_across_websites()) 68 | -------------------------------------------------------------------------------- /examples/python/compare_product_prices/main.py: -------------------------------------------------------------------------------- 1 | """This example demonstrates how to compare product prices across websites with query_data() method.""" 2 | 3 | import agentql 4 | from playwright.sync_api import sync_playwright 5 | 6 | # Set the URL to the desired website 7 | BESTBUY_URL = "https://www.bestbuy.com/site/nintendo-switch-oled-model-w-joy-con-white/6470923.p?skuId=6470923" 8 | TARGET_URL = "https://www.target.com/p/nintendo-switch-oled-model-with-white-joy-con/-/A-83887639#lnk=sametab" 9 | NINTENDO_URL = "https://www.nintendo.com/us/store/products/nintendo-switch-oled-model-white-set/" 10 | 11 | # Define the queries to get the product price 12 | PRODUCT_INFO_QUERY = """ 13 | { 14 | nintendo_switch_price(integer) 15 | } 16 | """ 17 | 18 | 19 | def main(): 20 | with sync_playwright() as playwright, playwright.chromium.launch(headless=False) as browser: 21 | # Create a new page in the browser and wrap it to get access to the AgentQL's querying API 22 | page = agentql.wrap(browser.new_page()) 23 | 24 | page.goto(BESTBUY_URL) 25 | 26 | # Use query_data() method to fetch the price from the BestBuy page 27 | response = page.query_data(PRODUCT_INFO_QUERY) 28 | 29 | print("Price at BestBuy: ", response["nintendo_switch_price"]) 30 | 31 | page.goto(NINTENDO_URL) 32 | 33 | # Use query_data() method to fetch the price from the Nintendo page 34 | response = page.query_data(PRODUCT_INFO_QUERY) 35 | 36 | print("Price at Nintendo: ", response["nintendo_switch_price"]) 37 | 38 | page.goto(TARGET_URL) 39 | 40 | # Use query_data() method to fetch the price from the Target page 41 | response = page.query_data(PRODUCT_INFO_QUERY) 42 | 43 | print("Price at Target: ", response["nintendo_switch_price"]) 44 | 45 | 46 | if __name__ == "__main__": 47 | main() 48 | -------------------------------------------------------------------------------- /examples/python/first_steps/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: First steps with AgentQL 3 | description: Collect pricing data from e-commerce website using AgentQL. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: collecting pricing data from e-commerce website using AgentQL 8 | 9 | This is an example of collecting pricing data from e-commerce website using AgentQL. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/installation/sdk-installation) 14 | - Save this python file locally as **main.py** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | python3 main.py 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/python/first_steps/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """This is an example of collecting pricing data from e-commerce website using AgentQL.""" 4 | 5 | # Import the Page class from the AgentQL Playwright extension 6 | # This enables the use of the AgentQL Smart Locator and Data Query API 7 | import agentql 8 | from agentql.ext.playwright.sync_api import Page 9 | 10 | # Import the synchronous playwright library 11 | # This library is used to launch the browser and interact with the web page 12 | from playwright.sync_api import sync_playwright 13 | 14 | URL = "https://scrapeme.live/shop" 15 | 16 | # The AgentQL query to locate the search box element 17 | # More about AgentQL Query: https://docs.agentql.com/agentql-query/query-intro 18 | SEARCH_BOX_QUERY = """ 19 | { 20 | search_product_box 21 | } 22 | """ 23 | 24 | # The AgentQL query of the data to be extracted 25 | # More about AgentQL Query: https://docs.agentql.com/agentql-query/query-intro 26 | PRODUCT_DATA_QUERY = """ 27 | { 28 | price_currency 29 | products[] { 30 | name 31 | price(integer) 32 | } 33 | } 34 | """ 35 | 36 | # Other than the AgentQL query, you can also use natural language prompt to locate the element 37 | NATURAL_LANGUAGE_PROMPT = "Button to display Qwilfish page" 38 | 39 | 40 | def main(): 41 | with sync_playwright() as playwright, playwright.chromium.launch(headless=False) as browser: 42 | # Create a new page in the browser and wrap it get access to the AgentQL's querying API 43 | page = agentql.wrap(browser.new_page()) 44 | 45 | page.goto(URL) 46 | 47 | product_data = _extract_product_data( 48 | page, 49 | search_key_word="fish", 50 | ) 51 | 52 | print(product_data) 53 | 54 | _add_qwilfish_to_cart(page) 55 | 56 | 57 | def _extract_product_data(page: Page, search_key_word: str) -> dict: 58 | """Extract product data. 59 | 60 | Args: 61 | page (Page): The Playwright page object to interact with the browser. 62 | search_key_word (str): The product to search for. 63 | 64 | Returns: 65 | dict: The product data extracted from the page. 66 | """ 67 | # Find DOM element using AgentQL API's query_elements() method 68 | response = page.query_elements(SEARCH_BOX_QUERY) 69 | 70 | # Interact with the element using Playwright API 71 | # API Doc: https://playwright.dev/python/docs/input#text-input 72 | response.search_product_box.type(search_key_word, delay=200) 73 | page.keyboard.press("Enter") 74 | 75 | # Extract data using AgentQL API's query_data() method 76 | data = page.query_data(PRODUCT_DATA_QUERY) 77 | 78 | return data 79 | 80 | 81 | def _add_qwilfish_to_cart(page: Page): 82 | """Add Qwilfish to cart with AgentQL Smart Locator API. 83 | 84 | Args: 85 | page (Page): The Playwright page object to interact with the browser. 86 | """ 87 | # Find DOM element using AgentQL API's get_by_prompt() method 88 | qwilfish_page_btn = page.get_by_prompt(NATURAL_LANGUAGE_PROMPT) 89 | 90 | # Interact with the element using Playwright API 91 | # API Doc: https://playwright.dev/python/docs/api/class-locator#locator-click 92 | if qwilfish_page_btn: 93 | qwilfish_page_btn.click() 94 | 95 | # Wait for 10 seconds to see the browser action 96 | page.wait_for_timeout(10000) 97 | 98 | 99 | if __name__ == "__main__": 100 | main() 101 | -------------------------------------------------------------------------------- /examples/python/get_by_prompt/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Get single element by prompt 3 | description: Use AgentQL's get_by_prompt method to retrieve an element and interact with it. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: interact with elements leveraging get_by_prompt method 8 | 9 | This example shows how to use AgentQL's [`get_by_prompt`](https://docs.agentql.com/api-references/agentql-page#getbyprompt) method to retrieve an element and interact with it. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/installation/sdk-installation) 14 | - Save this python file locally as **main.py** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | python3 main.py 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/python/get_by_prompt/main.py: -------------------------------------------------------------------------------- 1 | """This example demonstrates how to leverage get_by_prompt method to interact with element by prompt text.""" 2 | 3 | import agentql 4 | from playwright.sync_api import sync_playwright 5 | 6 | # Set the URL to the desired website 7 | URL = "https://duckduckgo.com/" 8 | 9 | 10 | def main(): 11 | with sync_playwright() as p, p.chromium.launch(headless=False) as browser: 12 | 13 | page = agentql.wrap(browser.new_page()) # Wrapped to access AgentQL's query API's 14 | 15 | # Navigate to the URL 16 | page.goto(URL) 17 | 18 | # Get the search bar with the prompt text 19 | search_bar = page.get_by_prompt("the search bar") 20 | 21 | # Fill out the search bar, if it exists 22 | if search_bar: 23 | search_bar.fill("AgentQL") 24 | 25 | # Click the search button 26 | page.get_by_prompt("the search button").click() 27 | 28 | # Used only for demo purposes. It allows you to see the effect of the script. 29 | page.wait_for_timeout(10000) 30 | 31 | 32 | if __name__ == "__main__": 33 | main() 34 | -------------------------------------------------------------------------------- /examples/python/humanlike-antibot/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Humanlike mouse movement 3 | description: Use humanlike mouse movement to avoid antibot with AgentQL. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: use humanlike mouse movement to avoid antibot 8 | 9 | This is an example shows how to use humanlike mouse movement to avoid antibot when using AgentQL. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/installation/sdk-installation) 14 | - Save this python file locally as **main.py** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | python3 main.py 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | 25 | ## Learn more 26 | 27 | - [Read the full guide on avoiding bot detection with humanlike behavior](https://docs.agentql.com/avoiding-bot-detection/user-like-behavior) 28 | - [Check out more guides on avoiding bot detection](https://docs.agentql.com/avoiding-bot-detection) 29 | -------------------------------------------------------------------------------- /examples/python/humanlike-antibot/main.py: -------------------------------------------------------------------------------- 1 | import random 2 | import time 3 | 4 | from playwright.sync_api import ElementHandle, Page, sync_playwright 5 | 6 | import agentql 7 | 8 | 9 | def random_mouse_movement(page: Page): 10 | for _ in range(10): 11 | page.mouse.move(random.randint(0, 1000), random.randint(0, 1000)) 12 | time.sleep(random.uniform(0.1, 0.5)) 13 | 14 | 15 | def random_click(page: Page, element: ElementHandle): 16 | box = element.bounding_box() 17 | page.mouse.move(box["x"] + box["width"] / 2, box["y"] + box["height"] / 2) 18 | page.mouse.click(box["x"] + box["width"] / 2, box["y"] + box["height"] / 2) 19 | 20 | 21 | def random_scroll(page: Page): 22 | page.mouse.wheel(0, 1000) 23 | time.sleep(random.uniform(0.1, 0.5)) 24 | 25 | 26 | with sync_playwright() as playwright: 27 | # Launch browser with proxy settings 28 | browser = playwright.chromium.launch(headless=False) 29 | 30 | # Wrap browser with AgentQL 31 | page = agentql.wrap(browser.new_page()) 32 | page.goto("https://duckduckgo.com/") 33 | 34 | # Type "AgentQL" into the search box keystroke by keystroke 35 | page.get_by_prompt("the search bar").press_sequentially("AgentQL") 36 | 37 | # Click the search button in a random manner 38 | random_click(page, page.get_by_prompt("the search button")) 39 | 40 | for _ in range(5): 41 | random_mouse_movement(page) 42 | random_scroll(page) 43 | -------------------------------------------------------------------------------- /examples/python/infinite_scroll/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Load additional content by scrolling 3 | description: Load additional content on pages that load content based on scroll position. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: load additional content on page by scrolling 8 | 9 | This example demonstrates how to load additional content on pages that load content based on scroll position 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/installation/sdk-installation) 14 | - Save this python file locally as **main.py** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | python3 main.py 19 | ``` 20 | 21 | ## Adjust the scrolling method 22 | 23 | Dynamically loading content can be tricky to get right, as websites have a lot of ways to customize how this interaction looks on their sites. 24 | 25 | Scrolling to the end of a page by pressing the `End` key is not always a reliable mechanism, since pages could either have multiple scrollable areas, or have the `End` key mapped to a different function, such as for video playback. Try replacing `key_press_end_scroll(page)` in the example with `mouse_wheel_scroll(page)` and observe how the browser behaves differently, or try navigating to your own site to test in `page.goto`! 26 | -------------------------------------------------------------------------------- /examples/python/infinite_scroll/main.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import random 3 | import time 4 | 5 | import agentql 6 | from agentql.ext.playwright.sync_api import Page 7 | from playwright.sync_api import sync_playwright 8 | 9 | logging.basicConfig(level=logging.DEBUG) 10 | log = logging.getLogger(__name__) 11 | 12 | 13 | def key_press_end_scroll(page: Page): 14 | page.keyboard.press("End") 15 | 16 | 17 | def mouse_wheel_scroll(page: Page): 18 | viewport_height, total_height, scroll_height = page.evaluate( 19 | "() => [window.innerHeight, document.body.scrollHeight, window.scrollY]" 20 | ) 21 | while scroll_height < total_height: 22 | scroll_height = scroll_height + viewport_height 23 | page.mouse.wheel(delta_x=0, delta_y=viewport_height) 24 | time.sleep(random.uniform(0.05, 0.1)) 25 | 26 | 27 | if __name__ == "__main__": 28 | QUERY = """ 29 | { 30 | page_title 31 | post_headers[] 32 | } 33 | """ 34 | with sync_playwright() as playwright, playwright.chromium.launch(headless=False) as browser: 35 | page = agentql.wrap(browser.new_page()) 36 | 37 | log.info("Navigating to the page...") 38 | 39 | page.goto("https://infinite-scroll.com/demo/full-page/") 40 | page.wait_for_page_ready_state() 41 | 42 | num_extra_pages_to_load = 3 43 | 44 | for times in range(num_extra_pages_to_load): 45 | log.info(f"Scrolling to the bottom of the page... (num_times = {times+1})") 46 | key_press_end_scroll(page) 47 | page.wait_for_page_ready_state() 48 | log.info("Content loaded!") 49 | 50 | log.info("Issuing AgentQL data query...") 51 | response = page.query_data(QUERY) 52 | 53 | log.info(f"AgentQL response: {response}") 54 | -------------------------------------------------------------------------------- /examples/python/interact_with_external_or_existing_browser/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Interact with an external browser 3 | description: Interact with an external or existing browser with AgentQL. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: interact with an external or existing browser with AgentQL 8 | 9 | This example demonstrates how to interact with an external or existing browser with AgentQL. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/installation/sdk-installation) 14 | - Save this Python file locally as **main.py** 15 | - Close your Google Chrome application if it is open. 16 | - If you're using **Mac**, open the terminal and run the following command: 17 | 18 | ```bash 19 | /Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --remote-debugging-port=9222 20 | ``` 21 | 22 | - If you're using **Windows**, open the Command Prompt and run the command: 23 | 24 | ```bash 25 | chrome.exe --remote-debugging-port=9222 26 | ``` 27 | 28 | **Make sure to replace `chrome.exe` with the path to your Chrome executable if it's not already in your system's PATH.** 29 | 30 | - In the browser window that's opened, select the Google profile you would like to use for this session. 31 | - In `main.py`, replace variable `WEBSOCKET_URL`'s placeholder value with the actual WebSocket URL returned in terminal or command prompt. The URL should be in the format of `ws://127.0.0.1:9222/devtools/browser/387adf4c-243f-4051-a181-46798f4a46f4`. 32 | 33 | - Run the following command from the project's folder: 34 | 35 | ```bash 36 | python3 main.py 37 | ``` 38 | 39 | - If you want to learn how to work with open pages, navigate to [Scrapeme website](https://scrapeme.live/shop/Charmander/) within the browser, and use `fetch_data_from_open_website_page()` method in the script to fetch data from the page. 40 | 41 | ## Play with the query 42 | 43 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 44 | -------------------------------------------------------------------------------- /examples/python/interact_with_external_or_existing_browser/main.py: -------------------------------------------------------------------------------- 1 | """This example demonstrates how to interact with an external or existing browser with AgentQL.""" 2 | 3 | import agentql 4 | from playwright.sync_api import sync_playwright 5 | 6 | # The URL of the external or existing browser you wish to connect. 7 | WEBSOCKET_URL = "http://localhost:9222" 8 | 9 | URL = "https://scrapeme.live/shop" 10 | 11 | SEARCH_QUERY = """ 12 | { 13 | search_products_box 14 | } 15 | """ 16 | 17 | STOCK_QUERY = """ 18 | { 19 | number_in_stock 20 | } 21 | """ 22 | 23 | 24 | def fetch_data_from_open_website_page(): 25 | """This function demonstrates how to fetch data from open pages in your local browser.""" 26 | with sync_playwright() as p: 27 | # Connect to the browser via Chrome DevTools Protocol 28 | browser = p.chromium.connect_over_cdp(WEBSOCKET_URL) 29 | 30 | # Get the first page from the opened browser and wrap it to get access to the AgentQL's querying API 31 | page = agentql.wrap(browser.contexts[0].pages[0]) 32 | 33 | # Use query_data() method to fetch the data from the page 34 | response = page.query_data(STOCK_QUERY) 35 | 36 | print(response) 37 | 38 | 39 | def interact_with_new_page_in_local_browser(): 40 | """This function demonstrates how to open and interact with a new page your local browser.""" 41 | with sync_playwright() as p: 42 | # Connect to the browser via Chrome DevTools Protocol 43 | browser = p.chromium.connect_over_cdp(WEBSOCKET_URL) 44 | 45 | # Create a new tab in the browser window and wrap it to get access to the AgentQL's querying API 46 | page = agentql.wrap(browser.contexts[0].new_page()) 47 | 48 | page.goto(URL) 49 | 50 | # Use query_elements() method to locate the search product box from the page 51 | response = page.query_elements(SEARCH_QUERY) 52 | 53 | # Use Playwright's API to fill the search box and press Enter 54 | response.search_products_box.type("Charmander") 55 | page.keyboard.press("Enter") 56 | 57 | # Use query_data() method to fetch the stock number from the page 58 | response = page.query_data(STOCK_QUERY) 59 | 60 | print(response) 61 | 62 | 63 | if __name__ == "__main__": 64 | interact_with_new_page_in_local_browser() 65 | -------------------------------------------------------------------------------- /examples/python/list_query_usage/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Query a list of items 3 | description: Query a list of items on the page with AgentQL. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: querying a list of items with AgentQL 8 | 9 | This example demonstrates how to query a list of items on the page. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/installation/sdk-installation) 14 | - Save this python file locally as **main.py** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | python3 main.py 19 | ``` 20 | 21 | ## Play with it 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/python/list_query_usage/main.py: -------------------------------------------------------------------------------- 1 | """This example demonstrates how to query a list of items on the page.""" 2 | 3 | import os 4 | 5 | import agentql 6 | from playwright.sync_api import sync_playwright 7 | 8 | # Set the URL to the desired website 9 | URL = "https://scrapeme.live/shop" 10 | 11 | # Define the queries to interact with the page 12 | QUERY = """ 13 | { 14 | products[] 15 | { 16 | name 17 | price(integer) 18 | } 19 | } 20 | """ 21 | 22 | 23 | def main(): 24 | with sync_playwright() as playwright, playwright.chromium.launch(headless=False) as browser: 25 | # Create a new page in the browser and wrap it to get access to the AgentQL's querying API 26 | page = agentql.wrap(browser.new_page()) 27 | 28 | page.goto(URL) 29 | 30 | # Use query_data() method to fetch the data from the page 31 | response = page.query_data(QUERY) 32 | 33 | # Get the directory of the current script 34 | script_dir = os.path.dirname(os.path.abspath(__file__)) 35 | 36 | # Create path to the csv file 37 | csv_file_path = os.path.join(script_dir, "product_data.csv") 38 | 39 | # Write the data to a csv file 40 | with open(csv_file_path, "w", encoding="utf-8") as file: 41 | file.write("Name, Price\n") 42 | for product in response["products"]: 43 | file.write(f"{product['name']},{product['price']}\n") 44 | 45 | 46 | if __name__ == "__main__": 47 | main() 48 | -------------------------------------------------------------------------------- /examples/python/log_into_sites/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Login to sites with AgentQL 3 | description: Use AgentQL to interact with login forms and log into websites. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: logging into sites with AgentQL 8 | 9 | This is an example shows how to login into websites by retrieving and interacting with web elements in AgentQL. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/installation/sdk-installation) 14 | - Save this python file locally as **main.py** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | python3 main.py 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/python/log_into_sites/main.py: -------------------------------------------------------------------------------- 1 | """This example demonstrates how to login into websites by retrieving and interacting with web elements in AgentQL.""" 2 | 3 | import agentql 4 | from playwright.sync_api import sync_playwright 5 | 6 | # Set the URL to the desired website 7 | URL = "https://practicetestautomation.com/practice-test-login/" 8 | 9 | LOGIN_QUERY = """ 10 | { 11 | username_field 12 | password_field 13 | submit_btn 14 | } 15 | """ 16 | 17 | 18 | def main(): 19 | with sync_playwright() as p, p.chromium.launch(headless=False) as browser: 20 | 21 | page = agentql.wrap(browser.new_page()) # Wrapped to access AgentQL's query API 22 | 23 | # Navigate to the URL 24 | page.goto(URL) 25 | 26 | # Get the username and password fields 27 | response = page.query_elements(LOGIN_QUERY) 28 | 29 | # Fill the username and password fields 30 | response.username_field.fill("student") 31 | response.password_field.fill("Password123") 32 | 33 | # Click the submit button 34 | response.submit_btn.click() 35 | 36 | # Used only for demo purposes. It allows you to see the effect of the script. 37 | page.wait_for_timeout(10000) 38 | 39 | 40 | if __name__ == "__main__": 41 | main() 42 | -------------------------------------------------------------------------------- /examples/python/maps_scraper/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Scrape maps data 3 | description: Scrape data from maps using AgentQL. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: scraping Google Maps with AgentQL 8 | 9 | This example demonstrates how to return a CSV of boba tea shops in Palo Alto by scraping Google Maps. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/installation/sdk-installation) 14 | - Save this python file locally as **main.py** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | python3 main.py 19 | ``` 20 | 21 | ## Play with it 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/python/maps_scraper/main.py: -------------------------------------------------------------------------------- 1 | """This example demonstrates how to scrape information from Google Maps into a CSV file.""" 2 | 3 | import os 4 | 5 | from playwright.sync_api import sync_playwright 6 | 7 | import agentql 8 | 9 | # Set the URL to the Google Maps search for "boba tea" near Palo Alto 10 | URL = "https://www.google.com/maps/search/boba+tea/@37.4400289,-122.1653309,14z/data=!3m1!4b1?entry=ttu&g_ep=EgoyMDI1MDIxMS4wIKXMDSoASAFQAw%3D%3D" 11 | 12 | # Define the queries to interact with the page usng prompts to narrow down the results 13 | QUERY = """ 14 | { 15 | listings[] { 16 | name 17 | rating(in stars) 18 | description(if not available, use "n/a") 19 | order_link(if not available, use "n/a") 20 | take_out_link(if not available, use "n/a") 21 | address 22 | hours 23 | } 24 | } 25 | """ 26 | 27 | 28 | def main(): 29 | with sync_playwright() as playwright, playwright.chromium.launch(headless=False) as browser: 30 | # Create a new page in the browser and wrap it to get access to the AgentQL's querying API 31 | page = agentql.wrap(browser.new_page()) 32 | 33 | page.goto(URL) 34 | 35 | # Use query_data() method to fetch the data from the page 36 | response = page.query_data(QUERY) 37 | 38 | # Get the directory of the current script 39 | script_dir = os.path.dirname(os.path.abspath(__file__)) 40 | 41 | # Create path to the csv file 42 | csv_file_path = os.path.join(script_dir, "map_data.csv") 43 | 44 | # Write the data to a csv file 45 | with open(csv_file_path, "w", encoding="utf-8") as file: 46 | file.write("Name, Rating, Description, Order Link, Take Out Link, Address, Hours\n") 47 | for listing in response["listings"]: 48 | file.write( 49 | f"{listing['name']},{listing['rating']},{listing['description']},{listing['order_link']},{listing['take_out_link']},{listing['address']},{listing['hours']} \n" 50 | ) 51 | 52 | 53 | if __name__ == "__main__": 54 | main() 55 | -------------------------------------------------------------------------------- /examples/python/maps_scraper/map_data.csv: -------------------------------------------------------------------------------- 1 | Name, Rating, Description, Order Link, Take Out Link, Address, Hours 2 | Bober Tea & Coffee,4.2,Bubble Tea,https://www.google.com/viewer/chooseprovider?mid=/g/11v07psthp&g2lbs=AAEPWCtLQJ0NhKekV7AKyx9cRm6ciwy8UXLmFtJYOHJorFYmBdS8lNQHcfnGAaO_xYZ2AeUSTWdplwYjGQtz4O3fG6dwZaLkTg%3D%3D&hl=en-US&gl=us&fo_m=MfohQo559jFvMUOzJVpjPL1YMfZ3bInYwBDuMfaXTPp5KXh-&utm_source=tactile&gei=Vu-uZ-34DvOhur8P5o76-AU&ei=Vu-uZ-34DvOhur8P5o76-AU&fo_s=OA,SOE&opi=79508299&ebb=1&cs=0&foub=mcpp,n/a,121 Lytton Ave,Closed⋅ Opens 10 AM Fri 3 | Mr. Sun Tea Palo Alto,4.0,Bubble Tea,https://www.google.com/viewer/chooseprovider?mid=/g/11sy40sg1_&g2lbs=AAEPWCtPx5itLEEiBTlqTNOOoYRUinqK2nz-oCtpgBFQnSuuOOxg1Sd8RDa0B-sPvUZCkEn0-vBD3THFJAOcTbU7T8K4-51jnw%3D%3D&hl=en-US&gl=us&fo_m=MfohQo559jFvMUOzJVpjPL1YMfZ3bInYwBDuMfaXTPp5KXh-&utm_source=tactile&gei=Vu-uZ-34DvOhur8P5o76-AU&ei=Vu-uZ-34DvOhur8P5o76-AU&fo_s=OA,SOE&opi=79508299&ebb=1&cs=0&foub=mcpp,n/a,436 University Ave,Closed⋅ Opens 12 PM Fri 4 | UMe Tea,4.1,Tea house,https://www.google.com/viewer/chooseprovider?mid=/g/11nxs3lr71&g2lbs=AAEPWCtnMb-AwY1NSqG2LMDNksENZ8BRKnAzQtRBPoPOVzrV-SSL425wHgYmOHO_GMQPE02cMgrRPwg7IDuVWqTyRENy8LRzYQ%3D%3D&hl=en-US&gl=us&fo_m=MfohQo559jFvMUOzJVpjPL1YMfZ3bInYwBDuMfaXTPp5KXh-&utm_source=tactile&gei=Vu-uZ-34DvOhur8P5o76-AU&ei=Vu-uZ-34DvOhur8P5o76-AU&fo_s=OA,SOE&opi=79508299&ebb=1&cs=0&foub=mcpp,n/a,421 California Ave,Closed⋅ Opens 12 PM Fri 5 | T4,4.1,Bubble TeaCute cafe for a variety of bubble teas,https://www.google.com/viewer/chooseprovider?mid=/g/11b6gjnl5s&g2lbs=AAEPWCuWyeNemYF6k3g-dyumJX3QVDy6uD1YMMxMOkeaniUiEBg7VxuiS6mq3JdBjsE5opb_lCfpJt48Iba6a2Jgs75SUGmF6A%3D%3D&hl=en-US&gl=us&fo_m=MfohQo559jFvMUOzJVpjPL1YMfZ3bInYwBDuMfaXTPp5KXh-&utm_source=tactile&gei=Vu-uZ-34DvOhur8P5o76-AU&ei=Vu-uZ-34DvOhur8P5o76-AU&fo_s=OA,SOE&opi=79508299&ebb=1&cs=0&foub=mcpp,n/a,165 University Ave #D,Closed⋅ Opens 11 AM Fri 6 | Wanpo Tea Shop,3.9,Bubble Tea,https://www.google.com/viewer/chooseprovider?mid=/g/11qpp_4svp&g2lbs=AAEPWCsvu2BTbD7J_dMHFjCW1PNLLYO63Kgf_HkngwyyuY4JWe7sPF5eqdTwkdWh918yLEyhkrwjBdN1MwxEvgO_5fdWEhJGHA%3D%3D&hl=en-US&gl=us&fo_m=MfohQo559jFvMUOzJVpjPL1YMfZ3bInYwBDuMfaXTPp5KXh-&utm_source=tactile&gei=Vu-uZ-34DvOhur8P5o76-AU&ei=Vu-uZ-34DvOhur8P5o76-AU&fo_s=OA,SOE&opi=79508299&ebb=1&cs=0&foub=mcpp,n/a,660 Stanford Shopping Center #721,Closed⋅ Opens 11 AM Fri 7 | Boba Guys Palo Alto,4.3,Bubble Tea,https://www.google.com/viewer/chooseprovider?mid=/g/11fj9cgszb&g2lbs=AAEPWCuIQSlnDWDRgN4QGwkmZL6ZEcVuMBE8WxJL2kKdDFIB4ODl-2-EsKWOIt0c2MCrFhWolzmMsvcdnJ9gL3gfTtO1V2P1CA%3D%3D&hl=en-US&gl=us&fo_m=MfohQo559jFvMUOzJVpjPL1YMfZ3bInYwBDuMfaXTPp5KXh-&utm_source=tactile&gei=Vu-uZ-34DvOhur8P5o76-AU&ei=Vu-uZ-34DvOhur8P5o76-AU&fo_s=OA,SOE&opi=79508299&ebb=1&cs=0&foub=mcpp,n/a,855 El Camino Real #120,Closed⋅ Opens 10 AM Fri 8 | -------------------------------------------------------------------------------- /examples/python/news-aggregator/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: News Aggregator to CSV 3 | description: Return news headlines from a selection of URLs and save the results to a CSV file. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: News Aggregator to CSV 8 | 9 | This is an example shows how to return news headlines from a selection of URLs and save the results to a CSV file. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/installation/sdk-installation) 14 | - Save this python file locally as **main.py** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | python3 main.py 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/python/news-aggregator/main.py: -------------------------------------------------------------------------------- 1 | """This script collects news headlines from a selection of URLs and saves the results to a CSV file.""" 2 | 3 | import asyncio 4 | import logging 5 | import os 6 | 7 | from playwright.async_api import BrowserContext, async_playwright 8 | 9 | import agentql 10 | 11 | # Set up logging 12 | logging.basicConfig(level=logging.INFO) 13 | log = logging.getLogger(__name__) 14 | 15 | # Define the queries to interact with the page 16 | QUERY = """ 17 | { 18 | items(might be aritcles, posts, tweets)[]{ 19 | published_date(convert to XX/XX/XXXX format) 20 | entry(title or post if no title is available) 21 | author(person's name; return "n/a" if not available) 22 | outlet(the original platform it is posted on; if no platform is listed, use the root domain of the url) 23 | url 24 | } 25 | } 26 | """ 27 | 28 | # Set URLs to the desired websites 29 | WEBSITE_URLS = [ 30 | "https://bsky.app/search?q=agents+for+the+web", 31 | "https://dev.to/search?q=agents%20for%20the%20web&sort_by=published_at&sort_direction=desc", 32 | "https://hn.algolia.com/?dateRange=last24h&page=0&prefix=false&query=agents%20for%20the%20web&sort=byDate&type=story", 33 | "https://duckduckgo.com/?q=agents+for+the+web&t=h_&iar=news&ia=news", 34 | ] 35 | # Make a CSV file to store the data 36 | # Get the directory of the current script 37 | SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) 38 | 39 | # Create path to the csv file 40 | CSV_FILE_PATH = os.path.join(SCRIPT_DIR, "news_headlines.csv") 41 | 42 | 43 | async def main(): 44 | """Fetch data concurrently in the same browser session from multiple websites.""" 45 | async with async_playwright() as p, await p.chromium.launch( 46 | headless=True 47 | ) as browser, await browser.new_context() as context: 48 | # Open multiple tabs in the same browser context to fetch data concurrently 49 | await asyncio.gather( 50 | *(fetch_data(context, url) for url in WEBSITE_URLS) 51 | ) 52 | 53 | # Update progress 54 | log.info("All done! CSV is here: %s...", CSV_FILE_PATH) 55 | 56 | 57 | async def fetch_data(context: BrowserContext, session_url): 58 | """Open the given URL in a new tab and fetch the data.""" 59 | page = await agentql.wrap_async(context.new_page()) 60 | await page.goto(session_url) 61 | 62 | data = await page.query_data(QUERY) 63 | 64 | # Prepare new data 65 | new_lines = [] 66 | for item in data["items"]: 67 | # Strip '|' from entry to avoid CSV formatting issues 68 | clean_entry = item["entry"].replace("|", "") 69 | new_lines.append( 70 | f"{item['published_date']} | {clean_entry} | {item['url']} | {item['outlet']} | {item['author']}\n" 71 | ) 72 | 73 | # Handle file writing with proper header management 74 | if not os.path.exists(CSV_FILE_PATH): 75 | # New file - write header and data 76 | with open(CSV_FILE_PATH, "w", encoding="utf-8") as file: 77 | file.write("Posted | Entry | URL | Platform | Author\n") 78 | file.writelines(new_lines) 79 | else: 80 | # File exists - append new data while preserving existing content 81 | with open(CSV_FILE_PATH, "a", encoding="utf-8") as file: 82 | file.writelines(new_lines) 83 | 84 | # Update progress 85 | log.info("Fetched items from %s...", session_url) 86 | 87 | 88 | if __name__ == "__main__": 89 | # Run the main function in an event loop 90 | asyncio.run(main()) 91 | -------------------------------------------------------------------------------- /examples/python/news-aggregator/main_sync.py: -------------------------------------------------------------------------------- 1 | """This script collects news headlines from a selection of URLs and saves the results to a CSV file.""" 2 | 3 | import logging 4 | import os 5 | 6 | from playwright.sync_api import BrowserContext, sync_playwright 7 | 8 | import agentql 9 | 10 | # Set up logging 11 | logging.basicConfig(level=logging.INFO) 12 | log = logging.getLogger(__name__) 13 | 14 | # Define the queries to interact with the page 15 | QUERY = """ 16 | { 17 | items(might be aritcles, posts, tweets)[]{ 18 | published_date(convert to XX/XX/XXXX format) 19 | entry(title or post if no title is available) 20 | author(person's name; return "n/a" if not available) 21 | outlet(the original platform it is posted on; if no platform is listed, use the root domain of the url) 22 | url 23 | } 24 | } 25 | """ 26 | 27 | # Set URLs to the desired websites 28 | WEBSITE_URLS = [ 29 | "https://bsky.app/search?q=agents+for+the+web", 30 | "https://dev.to/search?q=agents%20for%20the%20web&sort_by=published_at&sort_direction=desc", 31 | "https://hn.algolia.com/?dateRange=last24h&page=0&prefix=false&query=agents%20for%20the%20web&sort=byDate&type=story", 32 | "https://duckduckgo.com/?q=agents+for+the+web&t=h_&iar=news&ia=news", 33 | ] 34 | 35 | # Set the file path for the CSV file 36 | CSV_FILE_PATH = "news_headlines.csv" 37 | 38 | 39 | def main(): 40 | with sync_playwright() as p, p.chromium.launch( 41 | headless=True 42 | ) as browser, browser.new_context() as context: 43 | # Process URLs sequentially in synchronous version 44 | for url in WEBSITE_URLS: 45 | fetch_data(context, url) 46 | log.info("All done! CSV is here: %s", CSV_FILE_PATH) 47 | 48 | 49 | def fetch_data(context: BrowserContext, session_url): 50 | """Open the given URL in a new tab and fetch the data.""" 51 | page = agentql.wrap(context.new_page()) 52 | page.goto(session_url) 53 | 54 | data = page.query_data(QUERY) 55 | 56 | # Prepare new data 57 | new_lines = [] 58 | for item in data["items"]: 59 | # Strip '|' from entry to avoid CSV formatting issues 60 | clean_entry = item["entry"].replace("|", "") 61 | new_lines.append( 62 | f"{item['published_date']} | {clean_entry} | {item['url']} | {item['outlet']} | {item['author']}\n" 63 | ) 64 | 65 | # Handle file writing with proper header management 66 | if not os.path.exists(CSV_FILE_PATH): 67 | # New file - write header and data 68 | with open(CSV_FILE_PATH, "w", encoding="utf-8") as file: 69 | file.write("Posted | Entry | URL | Platform | Author\n") 70 | file.writelines(new_lines) 71 | else: 72 | # File exists - append new data while preserving existing content 73 | with open(CSV_FILE_PATH, "a", encoding="utf-8") as file: 74 | file.writelines(new_lines) 75 | 76 | log.info("Fetched items from %s", session_url) 77 | 78 | 79 | if __name__ == "__main__": 80 | main() 81 | -------------------------------------------------------------------------------- /examples/python/perform_sentiment_analysis/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Perform sentiment analysis 3 | description: Perform sentiment analysis on YouTube comments with AgentQL and OpenAI's GPT-3.5 model. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: performing sentiment analysis with AgentQL 8 | 9 | This example demonstrates how to perform sentiment analysis on YouTube comments with AgentQL and OpenAI's GPT-3.5 model. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/installation/sdk-installation) 14 | - [Install OpenAI SDK](https://pypi.org/project/openai/) with the following command: 15 | 16 | ```bash 17 | pip install openai 18 | ``` 19 | 20 | - Save this python file locally as **main.py** 21 | - Set your OpenAI API key as an environment variable with the following command: 22 | 23 | ```bash 24 | export OPENAI_API_KEY="My API Key" 25 | ``` 26 | 27 | - Run the following command from the project's folder: 28 | 29 | ```bash 30 | python3 main.py 31 | ``` 32 | 33 | ## Play with the query 34 | 35 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 36 | -------------------------------------------------------------------------------- /examples/python/perform_sentiment_analysis/main.py: -------------------------------------------------------------------------------- 1 | """This example demonstrates how to perform sentiment analysis on YouTube comments with AgentQL and OpenAI's GPT-3.5 model.""" 2 | 3 | import os 4 | 5 | import agentql 6 | from openai import OpenAI 7 | from playwright.sync_api import sync_playwright 8 | 9 | URL = "https://www.youtube.com/watch?v=JfM1mr2bCuk" 10 | 11 | # Define the queries to interact with the page 12 | QUERY = """ 13 | { 14 | video_title 15 | video_channel 16 | comments[] { 17 | comment_text 18 | author 19 | } 20 | } 21 | """ 22 | 23 | 24 | def get_comments(): 25 | with sync_playwright() as playwright, playwright.chromium.launch(headless=False) as browser: 26 | # Create a new page in the browser and wrap it to get access to the AgentQL's querying API 27 | page = agentql.wrap(browser.new_page()) 28 | 29 | page.goto(URL) 30 | 31 | for _ in range(5): 32 | # Wait for the page to load (helps to load the comments on the video) 33 | page.wait_for_page_ready_state() 34 | 35 | # Scroll down the page to load more comments 36 | page.keyboard.press("PageDown") 37 | 38 | # Use query_data() method to fetch the comments from the page 39 | response = page.query_data(QUERY) 40 | 41 | return response 42 | 43 | 44 | def perform_sentiment_analysis(comments): 45 | USER_MESSAGE = "These are the comments on the video. I am trying to understand the sentiment of the comments." 46 | 47 | for comment in comments["comments"]: 48 | USER_MESSAGE += comment["comment_text"] 49 | 50 | SYSTEM_MESSAGE = """You are an expert in understanding the social media analytics and analysis and specialize in analyzing sentiment of the comments. 51 | Please find the comments on the video as follows: 52 | 53 | """ 54 | 55 | USER_MESSAGE += "Could you please provide a summary of the comments on the video. Additionaly, just give only 3 takeaways which would be important for me as the creator of the video." 56 | 57 | client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) 58 | 59 | completion = client.chat.completions.create( 60 | model="gpt-3.5-turbo", 61 | messages=[ 62 | {"role": "system", "content": SYSTEM_MESSAGE}, 63 | {"role": "user", "content": USER_MESSAGE}, 64 | ], 65 | ) 66 | 67 | return completion.choices[0].message.content 68 | 69 | 70 | def main(): 71 | comments = get_comments() 72 | summary = perform_sentiment_analysis(comments) 73 | print(summary) 74 | 75 | 76 | if __name__ == "__main__": 77 | main() 78 | -------------------------------------------------------------------------------- /examples/python/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "fish-tank" 3 | version = "0.1.0" 4 | description = "A warehouse for AgentQL examples and tutorials" 5 | authors = [] 6 | license = "MIT" 7 | readme = "README.md" 8 | 9 | [tool.poetry.dependencies] 10 | python = "^3.9" 11 | agentql = "*" 12 | openai = "^1.13.3" 13 | playwright-dompath = "^0.0.1" 14 | 15 | [tool.poetry.group.dev.dependencies] 16 | black = "*" 17 | isort = "^5.13.2" 18 | pylint = "^3.1.0" 19 | 20 | [tool.black] 21 | line-length = 100 22 | 23 | [tool.isort] 24 | profile = "black" 25 | line_length = 100 26 | wrap_length = 100 27 | multi_line_output = 3 28 | include_trailing_comma = true 29 | known_third_party = ["agentql"] 30 | 31 | [build-system] 32 | requires = ["poetry-core"] 33 | build-backend = "poetry.core.masonry.api" 34 | -------------------------------------------------------------------------------- /examples/python/run_script_in_headless_browser/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Run a script in headless browser 3 | description: Run the script in a headless browser with AgentQL. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: running a script in headless browser with AgentQL 8 | 9 | This example demonstrates how to run the script in headless browser. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/installation/sdk-installation) 14 | - Save this python file locally as **main.py** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | python3 main.py 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/python/run_script_in_headless_browser/main.py: -------------------------------------------------------------------------------- 1 | """This example demonstrates how to run the script in headless browser.""" 2 | 3 | import agentql 4 | from playwright.sync_api import sync_playwright 5 | 6 | # Set the URL to the desired website 7 | URL = "https://scrapeme.live/shop" 8 | 9 | SEARCH_QUERY = """ 10 | { 11 | search_products_box 12 | } 13 | """ 14 | 15 | STOCK_NUMBER_QUERY = """ 16 | { 17 | number_in_stock 18 | } 19 | """ 20 | 21 | 22 | def main(): 23 | with sync_playwright() as playwright, playwright.chromium.launch(headless=True) as browser: 24 | # Create a new page in the browser and wrap it to get access to the AgentQL's querying API 25 | page = agentql.wrap(browser.new_page()) 26 | 27 | page.goto(URL) 28 | 29 | # Use query_elements() method to locate the search product box from the page 30 | response = page.query_elements(SEARCH_QUERY) 31 | 32 | # Use Playwright's API to fill the search box and press Enter 33 | response.search_products_box.type("Charmander") 34 | page.keyboard.press("Enter") 35 | 36 | # Use query_data() method to fetch the stock number from the page 37 | response = page.query_data(STOCK_NUMBER_QUERY) 38 | 39 | print(response) 40 | 41 | 42 | if __name__ == "__main__": 43 | main() 44 | -------------------------------------------------------------------------------- /examples/python/run_script_online_in_google_colab/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Google Colab 3 | description: Run AgentQL script in online development environment like Google Colaboratory. 4 | updated: 2025-03-05 5 | featured: true 6 | --- 7 | 8 | # Example script: run AgentQL script in online development environment 9 | 10 | This example demonstrates how to run AgentQL script in online development environment like [Google Colaboratory](https://colab.research.google.com/). 11 | 12 | ## Run the script 13 | 14 | - Go to [Google Colaboratory](https://colab.research.google.com/) website 15 | - Upload the **main.ipynb** file to Google Colab 16 | - Run the script by clicking the run button of each cell sequentially. 17 | 18 | ## Play with the query 19 | 20 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 21 | -------------------------------------------------------------------------------- /examples/python/run_script_online_in_google_colab/main.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "This example demonstrates how to run AgentQL script in online development environment like [Google Colaboratory](https://colab.research.google.com/)." 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": { 13 | "id": "cOG_eI8DY3co" 14 | }, 15 | "source": [ 16 | "Install [AgentQL](https://pypi.org/project/agentql/) library" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": { 23 | "id": "mGqCFOMozoqj" 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "%pip install agentql" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "Install the Playwright dependency required by AgentQL." 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "!playwright install chromium" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "You can [store](https://medium.com/@parthdasawant/how-to-use-secrets-in-google-colab-450c38e3ec75) keys in Google Colab's Secrets." 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": { 57 | "id": "5a405Rl5lqT1" 58 | }, 59 | "outputs": [], 60 | "source": [ 61 | "import os\n", 62 | "\n", 63 | "from google.colab import user_data\n", 64 | "\n", 65 | "os.environ[\"AGENTQL_API_KEY\"]=userdata.get('AGENTQL_API_KEY')" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "Run AgentQL script. Please note that online environment like Google Colab only supports **asynchronous version** of AgentQL." 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": { 79 | "id": "W3IIXkPJ0iIX" 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "import agentql\n", 84 | "from playwright.async_api import async_playwright\n", 85 | "\n", 86 | "\n", 87 | "URL = \"https://scrapeme.live/shop\"\n", 88 | "\n", 89 | "SEARCH_QUERY = \"\"\"\n", 90 | "{\n", 91 | " search_products_box\n", 92 | "}\n", 93 | "\"\"\"\n", 94 | "\n", 95 | "STOCK_NUMBER_QUERY = \"\"\"\n", 96 | "{\n", 97 | " number_in_stock\n", 98 | "}\n", 99 | "\"\"\"\n", 100 | "\n", 101 | "async with async_playwright() as playwright, await playwright.chromium.launch() as browser:\n", 102 | " # Create a new page in the browser and wrap it to get access to the AgentQL's querying API\n", 103 | " page= await agentql.wrap_async(browser.new_page())\n", 104 | "\n", 105 | " await page.goto(URL)\n", 106 | "\n", 107 | " # Use query_elements() method to locate the search box and search button from the page\n", 108 | " response = await page.query_elements(SEARCH_QUERY)\n", 109 | "\n", 110 | " # Use Playwright's API to fill the search box and press Enter\n", 111 | " await response.search_products_box.type(\"Charmander\")\n", 112 | " await page.keyboard.press(\"Enter\")\n", 113 | "\n", 114 | " # Use query_data() method to fetch the president name from the page\n", 115 | " response = await page.query_data(STOCK_NUMBER_QUERY)\n", 116 | "\n", 117 | " print(response)" 118 | ] 119 | } 120 | ], 121 | "metadata": { 122 | "colab": { 123 | "authorship_tag": "ABX9TyN+D8bbAfsSzsGIEJwCw5ln", 124 | "include_colab_link": true, 125 | "provenance": [] 126 | }, 127 | "kernelspec": { 128 | "display_name": "Python 3", 129 | "name": "python3" 130 | }, 131 | "language_info": { 132 | "codemirror_mode": { 133 | "name": "ipython", 134 | "version": 3 135 | }, 136 | "file_extension": ".py", 137 | "mimetype": "text/x-python", 138 | "name": "python", 139 | "nbconvert_exporter": "python", 140 | "pygments_lexer": "ipython3", 141 | "version": "3.11.4" 142 | } 143 | }, 144 | "nbformat": 4, 145 | "nbformat_minor": 0 146 | } 147 | -------------------------------------------------------------------------------- /examples/python/save_and_load_authenticated_session/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Save and load authenticated state 3 | description: Save and load an authenticated state (i.e., signed-in state) with AgentQL. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: saving and loading authenticated state with AgentQL 8 | 9 | This example demonstrates how to save and load a authenticated state (i.e. signed-in state) using AgentQL. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/installation/sdk-installation) 14 | - Save this python file locally as **main.py** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | python3 main.py 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/python/save_and_load_authenticated_session/main.py: -------------------------------------------------------------------------------- 1 | """This example demonstrates how to save and load a authenticated state (i.e. signed-in state) using AgentQL.""" 2 | 3 | import agentql 4 | from playwright.sync_api import sync_playwright 5 | 6 | URL = "https://www.yelp.com/" 7 | EMAIL = "REPLACE_WITH_YOUR_EMAIL (For yelp.com)" 8 | PASSWORD = "REPLACE_WITH_YOUR_PASSWORD (For yelp.com)" 9 | 10 | # Define the queries to interact with the page 11 | LOG_IN_QUERY = """ 12 | { 13 | log_in_btn 14 | } 15 | """ 16 | 17 | CREDENTIALS_QUERY = """ 18 | { 19 | sign_in_form { 20 | email_input 21 | password_input 22 | log_in_btn 23 | } 24 | } 25 | """ 26 | 27 | 28 | def save_signed_in_state(): 29 | with sync_playwright() as playwright, playwright.chromium.launch(headless=False) as browser: 30 | # Create a new page in the browser and wrap it to get access to the AgentQL's querying API 31 | page = agentql.wrap(browser.new_page()) 32 | 33 | page.goto(URL) 34 | 35 | # Use query_elements() method to locate "Log In" button on the page 36 | response = page.query_elements(LOG_IN_QUERY) 37 | # Use Playwright's API to click located button 38 | response.log_in_btn.click() 39 | 40 | # Use query_elements() method to locate email, password input fields, and "Log In" button in sign-in form 41 | response_credentials = page.query_elements(CREDENTIALS_QUERY) 42 | # Fill the email and password input fields 43 | response_credentials.sign_in_form.email_input.fill(EMAIL) 44 | response_credentials.sign_in_form.password_input.fill(PASSWORD) 45 | response_credentials.sign_in_form.log_in_btn.click() 46 | 47 | page.wait_for_page_ready_state() 48 | 49 | # wait for timeout in order to save state 50 | page.wait_for_timeout(5000) 51 | 52 | # Save the signed-in state 53 | browser.contexts[0].storage_state(path="yelp_login.json") 54 | 55 | 56 | def load_signed_in_state(): 57 | with sync_playwright() as playwright, playwright.chromium.launch(headless=False) as browser: 58 | # Load the saved signed-in session by creating a new browser context with the saved signed-in state 59 | context = browser.new_context(storage_state="yelp_login.json") 60 | 61 | # Create a new page in the browser and wrap it to get access to the AgentQL's querying API 62 | page = agentql.wrap(context.new_page()) 63 | 64 | page.goto(URL) 65 | 66 | page.wait_for_page_ready_state() 67 | 68 | # Wait for 10 seconds to see the signed-in page 69 | page.wait_for_timeout(10000) 70 | 71 | 72 | if __name__ == "__main__": 73 | save_signed_in_state() 74 | load_signed_in_state() 75 | -------------------------------------------------------------------------------- /examples/python/stealth_mode/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Run script in stealth mode 3 | description: Apply techniques to lower the risk of being detected by anti-bot systems. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Stealth mode: Running AgentQL in stealth mode and avoiding bot detection 8 | 9 | This example demonstrates how to lower the risk of being detected by a anti-bot system by running AgentQL in stealth mode. 10 | 11 | There are several techniques we use in this script to avoid detection: 12 | 13 | - randomize various HTTP headers browser sends to the server. This includes `User-Agent`, `Accept-Language`, `Referer`, etc. This helps with consecutive requests looking more like they are coming from different users. 14 | - randomize browser window size. This is important because some websites track the window size and if it's the same for all requests, it's a sign of a bot. 15 | - randomize timezone and geolocation. This is important because some websites track the timezone and geolocation and if it's the same for all requests, it's a sign of a bot. 16 | - (Optional) use a proxy server. You would need to get a Proxy configuration (host, username, password) separately from an external proxy provider (e.g. [NetNut](https://netnut.io), [BrightData](https://brightdata.com/) or similar) 17 | 18 | ## Run the script 19 | 20 | - [Install AgentQL SDK](https://docs.agentql.com/installation/sdk-installation) 21 | - If you already have SDK installed, make sure to update to the latest version: `pip3 install agentql --upgrade` 22 | - Save this python file locally as **main.py** 23 | - Run the following command from the project's folder: 24 | 25 | ```bash 26 | python3 main.py 27 | ``` 28 | -------------------------------------------------------------------------------- /examples/python/stealth_mode/main.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | import random 4 | 5 | import agentql 6 | from playwright.async_api import Geolocation, ProxySettings, async_playwright 7 | 8 | logging.basicConfig(level=logging.DEBUG) 9 | log = logging.getLogger(__name__) 10 | 11 | BROWSER_IGNORED_ARGS = [ 12 | "--enable-automation", 13 | "--disable-extensions", 14 | ] 15 | BROWSER_ARGS = [ 16 | "--disable-xss-auditor", 17 | "--no-sandbox", 18 | "--disable-setuid-sandbox", 19 | "--disable-blink-features=AutomationControlled", 20 | "--disable-features=IsolateOrigins,site-per-process", 21 | "--disable-infobars", 22 | ] 23 | 24 | USER_AGENTS = [ 25 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36", 26 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Safari/605.1.15", 27 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:130.0) Gecko/20100101 Firefox/130.0", 28 | ] 29 | 30 | 31 | LOCATIONS = [ 32 | ("America/New_York", Geolocation(longitude=-74.006, latitude=40.7128)), # New York, NY 33 | ("America/Chicago", Geolocation(longitude=-87.6298, latitude=41.8781)), # Chicago, IL 34 | ("America/Los_Angeles", Geolocation(longitude=-118.2437, latitude=34.0522)), # Los Angeles, CA 35 | ("America/Denver", Geolocation(longitude=-104.9903, latitude=39.7392)), # Denver, CO 36 | ("America/Phoenix", Geolocation(longitude=-112.0740, latitude=33.4484)), # Phoenix, AZ 37 | ("America/Anchorage", Geolocation(longitude=-149.9003, latitude=61.2181)), # Anchorage, AK 38 | ("America/Detroit", Geolocation(longitude=-83.0458, latitude=42.3314)), # Detroit, MI 39 | ("America/Indianapolis", Geolocation(longitude=-86.1581, latitude=39.7684)), # Indianapolis, IN 40 | ("America/Boise", Geolocation(longitude=-116.2023, latitude=43.6150)), # Boise, ID 41 | ("America/Juneau", Geolocation(longitude=-134.4197, latitude=58.3019)), # Juneau, AK 42 | ] 43 | 44 | REFERERS = ["https://www.google.com", "https://www.bing.com", "https://duckduckgo.com"] 45 | 46 | ACCEPT_LANGUAGES = ["en-US,en;q=0.9", "en-GB,en;q=0.9", "fr-FR,fr;q=0.9"] 47 | PROXIES: list[ProxySettings] = [ 48 | # TODO: replace with your own proxies 49 | # { 50 | # "server": "http://ip_server:port", 51 | # "username": "proxy_username", 52 | # "password": "proxy_password", 53 | # }, 54 | ] 55 | 56 | 57 | async def main(): 58 | user_agent = random.choice(USER_AGENTS) 59 | header_dnt = random.choice(["0", "1"]) 60 | location = random.choice(LOCATIONS) 61 | referer = random.choice(REFERERS) 62 | accept_language = random.choice(ACCEPT_LANGUAGES) 63 | proxy: ProxySettings | None = random.choice(PROXIES) if PROXIES else None 64 | 65 | async with async_playwright() as playwright, await playwright.chromium.launch( 66 | headless=False, 67 | args=BROWSER_ARGS, 68 | ignore_default_args=BROWSER_IGNORED_ARGS, 69 | ) as browser: 70 | context = await browser.new_context( 71 | proxy=proxy, 72 | locale="en-US,en,ru", 73 | timezone_id=location[0], 74 | extra_http_headers={ 75 | "Accept-Language": accept_language, 76 | "Referer": referer, 77 | "DNT": header_dnt, 78 | "Connection": "keep-alive", 79 | "Accept-Encoding": "gzip, deflate, br", 80 | }, 81 | geolocation=location[1], 82 | user_agent=user_agent, 83 | permissions=["notifications"], 84 | viewport={ 85 | "width": 1920 + random.randint(-50, 50), 86 | "height": 1080 + random.randint(-50, 50), 87 | }, 88 | ) 89 | 90 | page = await agentql.wrap_async(context.new_page()) 91 | 92 | await page.enable_stealth_mode(nav_user_agent=user_agent) 93 | 94 | await page.goto("https://bot.sannysoft.com/", referer=referer) 95 | await page.wait_for_timeout(30000) 96 | 97 | 98 | if __name__ == "__main__": 99 | asyncio.run(main()) 100 | -------------------------------------------------------------------------------- /examples/python/submit_form/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Submit form 3 | description: Submit form with AgentQL. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: Filling out and submitting a form using AgentQL 8 | 9 | This is an example of how to filling out and submitting a form using AgentQL. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/installation/sdk-installation) 14 | - Save this Python file locally as **main.py** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | python3 main.py 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/python/submit_form/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """This is an example of how to collect pricing data from e-commerce website using AgentQL.""" 4 | 5 | import asyncio 6 | 7 | import agentql 8 | from playwright.async_api import async_playwright 9 | 10 | # URL of the e-commerce website 11 | # You can replace it with any other e-commerce website but the queries should be updated accordingly 12 | URL = "https://formsmarts.com/html-form-example" 13 | 14 | 15 | async def main(): 16 | """Main function.""" 17 | async with async_playwright() as playwright, await playwright.chromium.launch( 18 | headless=False 19 | ) as browser: 20 | # Create a new page in the browser and wrap it to get access to the AgentQL's querying API 21 | page = await agentql.wrap_async(browser.new_page()) 22 | await page.goto(URL) # open the target URL 23 | 24 | form_query = """ 25 | { 26 | first_name 27 | last_name 28 | email 29 | subject_of_inquiry 30 | inquiry_text_box 31 | submit_btn 32 | } 33 | """ 34 | response = await page.query_elements(form_query) 35 | 36 | await response.first_name.fill("John") 37 | await response.last_name.fill("Doe") 38 | await response.email.fill("johndoe@agentql.com") 39 | await response.subject_of_inquiry.select_option(label="Sales Inquiry") 40 | await response.inquiry_text_box.fill("I want to learn more about AgentQL") 41 | 42 | # Submit the form 43 | await response.submit_btn.click() 44 | 45 | # confirm form 46 | confirm_query = """ 47 | { 48 | confirmation_btn 49 | } 50 | """ 51 | 52 | response = await page.query_elements(confirm_query) 53 | await response.confirmation_btn.click() 54 | await page.wait_for_page_ready_state() 55 | await page.wait_for_timeout(3000) # wait for 3 seconds 56 | print("Form submitted successfully!") 57 | 58 | 59 | if __name__ == "__main__": 60 | asyncio.run(main()) 61 | -------------------------------------------------------------------------------- /examples/python/wait_for_entire_page_load/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Wait for page to load 3 | description: Wait for the page to load completely before querying the page with AgentQL. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: waiting for page to load with AgentQL 8 | 9 | This example demonstrates how to wait for the page to load completely before querying the page. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/installation/sdk-installation) 14 | - Save this python file locally as **main.py** 15 | - Run the following command from the project's folder: 16 | 17 | ```bash 18 | python3 main.py 19 | ``` 20 | 21 | ## Play with the query 22 | 23 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 24 | -------------------------------------------------------------------------------- /examples/python/wait_for_entire_page_load/main.py: -------------------------------------------------------------------------------- 1 | """This example demonstrates how to wait for the page to load completely before querying the page.""" 2 | 3 | import agentql 4 | from playwright.sync_api import sync_playwright 5 | 6 | # Duckduckgo URL to demonstrate the example for loading more videos on the page 7 | URL = "https://duckduckgo.com/?q=machine+learning+lectures+mit&t=h_&iar=videos&iax=videos&ia=videos" 8 | 9 | QUERY = """ 10 | { 11 | videos(first 10 videos)[] { 12 | video_title 13 | length 14 | views 15 | } 16 | } 17 | """ 18 | 19 | 20 | def main(): 21 | with sync_playwright() as playwright, playwright.chromium.launch(headless=False) as browser: 22 | # Create a new page in the browser and wrap it to get access to the AgentQL's querying API 23 | page = agentql.wrap(browser.new_page()) 24 | 25 | page.goto(URL) 26 | 27 | for _ in range(2): 28 | # Wait for additional videos to load completely 29 | page.wait_for_page_ready_state() 30 | # Scroll down the page to trigger loading of more videos 31 | page.keyboard.press("End") 32 | 33 | # Use query_data() method to fetch video lists data from the page 34 | response = page.query_data(QUERY) 35 | 36 | # Print the details of the first video 37 | print(response["videos"][0]) 38 | 39 | 40 | if __name__ == "__main__": 41 | main() 42 | -------------------------------------------------------------------------------- /examples/python/xpath/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Get an element's XPath 3 | description: Get XPath of an element that was fetched with AgentQL. 4 | updated: 2025-03-05 5 | --- 6 | 7 | # Example script: getting an element's XPath with AgentQL 8 | 9 | This example demonstrates how to get XPath of an element that was fetched with AgentQL. 10 | 11 | ## Run the script 12 | 13 | - [Install AgentQL SDK](https://docs.agentql.com/installation/sdk-installation) 14 | - [Install Playwright Dompath](https://pypi.org/project/playwright-dompath/) with the following command: 15 | 16 | ```bash 17 | pip install playwright-dompath 18 | ``` 19 | 20 | - Save this python file locally as **main.py** 21 | - Run the following command from the project's folder: 22 | 23 | ```bash 24 | python3 main.py 25 | ``` 26 | 27 | ## Play with the query 28 | 29 | Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro) 30 | -------------------------------------------------------------------------------- /examples/python/xpath/main.py: -------------------------------------------------------------------------------- 1 | """This example demonstrates how to get XPath of an element that was fetched with AgentQL.""" 2 | 3 | import agentql 4 | from playwright.sync_api import sync_playwright 5 | 6 | # import https://pypi.org/project/playwright-dompath/ 7 | # Playwright Dompath is a Python library that helps you to generate XPath from Playwright selectors. 8 | from playwright_dompath.dompath_sync import xpath_path 9 | 10 | URL = "https://scrapeme.live/shop/" 11 | 12 | QUERY = """ 13 | { 14 | search_products_box 15 | } 16 | """ 17 | 18 | 19 | def main(): 20 | with sync_playwright() as playwright, playwright.chromium.launch(headless=False) as browser: 21 | # Create a new page in the browser and wrap it to get access to the AgentQL's querying API 22 | page = agentql.wrap(browser.new_page()) 23 | 24 | page.goto(URL) 25 | 26 | # Use query_elements() method to fetch the search box from the page 27 | response = page.query_elements(QUERY) 28 | 29 | # Get the XPath 30 | print("XPath:", xpath_path(response.search_products_box)) 31 | 32 | 33 | if __name__ == "__main__": 34 | main() 35 | --------------------------------------------------------------------------------