├── .cursor └── rules │ └── browser-use-rules.mdc ├── .dockerignore ├── .env.example ├── .gitattributes ├── .github ├── CONTRIBUTING.md ├── ISSUE_TEMPLATE │ ├── 1_element_detection_bug.yml │ ├── 2_bug_report.yml │ ├── 3_feature_request.yml │ ├── 4_docs_issue.yml │ └── config.yml └── workflows │ ├── claude.yml │ ├── cloud_evals.yml │ ├── docker.yml │ ├── lint.yml │ ├── package.yaml │ ├── publish.yml │ └── test.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── .python-version ├── .vscode ├── launch.json └── settings.json ├── CLAUDE.md ├── Dockerfile ├── LICENSE ├── README.md ├── SECURITY.md ├── bin ├── lint.sh ├── setup.sh └── test.sh ├── browser_use ├── README.md ├── __init__.py ├── agent │ ├── gif.py │ ├── memory │ │ ├── __init__.py │ │ ├── service.py │ │ └── views.py │ ├── message_manager │ │ ├── service.py │ │ ├── tests.py │ │ ├── utils.py │ │ └── views.py │ ├── playwright_script_generator.py │ ├── playwright_script_helpers.py │ ├── prompts.py │ ├── service.py │ ├── system_prompt.md │ ├── tests.py │ └── views.py ├── browser │ ├── __init__.py │ ├── browser.py │ ├── context.py │ ├── extensions.py │ ├── profile.py │ ├── session.py │ └── views.py ├── cli.py ├── controller │ ├── registry │ │ ├── service.py │ │ └── views.py │ ├── service.py │ └── views.py ├── dom │ ├── __init__.py │ ├── buildDomTree.js │ ├── clickable_element_processor │ │ └── service.py │ ├── history_tree_processor │ │ ├── service.py │ │ └── view.py │ ├── service.py │ ├── tests │ │ └── test_accessibility_playground.py │ └── views.py ├── exceptions.py ├── logging_config.py ├── telemetry │ ├── __init__.py │ ├── service.py │ └── views.py └── utils.py ├── codebeaver.yml ├── debug_pydantic.py ├── docs ├── README.md ├── cloud │ ├── implementation.mdx │ ├── quickstart.mdx │ └── webhooks.mdx ├── customize │ ├── agent-settings.mdx │ ├── browser-settings.mdx │ ├── custom-functions.mdx │ ├── hooks.mdx │ ├── output-format.mdx │ ├── real-browser.mdx │ ├── sensitive-data.mdx │ ├── supported-models.mdx │ └── system-prompt.mdx ├── development.mdx ├── development │ ├── contribution-guide.mdx │ ├── evaluations.mdx │ ├── local-setup.mdx │ ├── n8n-integration.mdx │ ├── observability.mdx │ ├── roadmap.mdx │ └── telemetry.mdx ├── favicon.svg ├── images │ ├── browser-use.png │ ├── checks-passed.png │ └── laminar.png ├── introduction.mdx ├── logo │ ├── dark.svg │ └── light.svg ├── mint.json └── quickstart.mdx ├── eval ├── claude-3.5.py ├── claude-3.6.py ├── claude-3.7.py ├── deepseek-r1.py ├── deepseek.py ├── gemini-1.5-flash.py ├── gemini-2.0-flash.py ├── gemini-2.5-preview.py ├── gpt-4.1.py ├── gpt-4o-no-boundingbox.py ├── gpt-4o-no-vision.py ├── gpt-4o-viewport-0.py ├── gpt-4o.py ├── gpt-o4-mini.py ├── grok.py └── service.py ├── examples ├── browser │ ├── real_browser.py │ ├── stealth.py │ ├── using_cdp.py │ └── window_sizing.py ├── custom-functions │ ├── 2fa.py │ ├── action_filters.py │ ├── advanced_search.py │ ├── clipboard.py │ ├── custom_hooks_before_after_step.py │ ├── file_upload.py │ ├── hover_element.py │ ├── notification.py │ ├── onepassword_2fa.py │ ├── perplexity_search.py │ └── save_to_file_hugging_face.py ├── features │ ├── click_fallback_options.py │ ├── cross_origin_iframes.py │ ├── custom_output.py │ ├── custom_system_prompt.py │ ├── custom_user_agent.py │ ├── custom_vector_store.py │ ├── download_file.py │ ├── drag_drop.py │ ├── follow_up_tasks.py │ ├── initial_actions.py │ ├── multi-tab_handling.py │ ├── multiple_agents_same_browser.py │ ├── multiple_tasks.py │ ├── outsource_state.py │ ├── parallel_agents.py │ ├── pause_agent.py │ ├── planner.py │ ├── playwright_script_generation.py │ ├── restrict_urls.py │ ├── result_processing.py │ ├── save_trace.py │ ├── sensitive_data.py │ ├── small_model_for_extraction.py │ ├── task_with_memory.py │ └── validate_output.py ├── integrations │ ├── browserbase_stagehand.py │ ├── discord │ │ ├── discord_api.py │ │ └── discord_example.py │ └── slack │ │ ├── README.md │ │ ├── slack_api.py │ │ └── slack_example.py ├── models │ ├── README.md │ ├── _ollama.py │ ├── azure_openai.py │ ├── bedrock_claude.py │ ├── claude-3.7-sonnet.py │ ├── deepseek-r1.py │ ├── deepseek.py │ ├── gemini.py │ ├── gpt-4o.py │ ├── grok.py │ ├── novita.py │ └── qwen.py ├── notebook │ └── agent_browsing.ipynb ├── simple.py ├── ui │ ├── README.md │ ├── command_line.py │ ├── gradio_demo.py │ └── streamlit_demo.py └── use-cases │ ├── README.md │ ├── captcha.py │ ├── check_appointment.py │ ├── find_and_apply_to_jobs.py │ ├── find_influencer_profiles.py │ ├── google_sheets.py │ ├── online_coding_agent.py │ ├── post-twitter.py │ ├── scrolling_page.py │ ├── shopping.py │ ├── test_cv.txt │ ├── twitter_cookies.txt │ ├── twitter_post_using_cookies.py │ ├── web_voyager_agent.py │ └── wikipedia_banana_to_quantum.py ├── pyproject.toml ├── pytest.ini ├── static ├── browser-use-dark.png ├── browser-use.png ├── kayak.gif └── photos.gif └── tests ├── ci ├── test_action_registry.py ├── test_browser.py ├── test_browser_session.py ├── test_browser_session_param.py ├── test_controller.py ├── test_debug_selector_map.py ├── test_sensitive_data.py ├── test_tab_management.py └── test_url_allowlist_security.py ├── debug_page_structure.py ├── extraction_test.py ├── httpx_client_test.py ├── mind2web_data └── processed.json ├── process_dom_test.py ├── screenshot_test.py ├── test_action_filters.py ├── test_action_params.py ├── test_agent_actions.py ├── test_attach_chrome.py ├── test_browser_config_models.py ├── test_clicks.py ├── test_core_functionality.py ├── test_dropdown.py ├── test_dropdown_complex.py ├── test_dropdown_error.py ├── test_excluded_actions.py ├── test_full_screen.py ├── test_gif_path.py ├── test_google_sheets_real.py ├── test_mind2web.py ├── test_models.py ├── test_qwen.py ├── test_react_dropdown.py ├── test_save_conversation.py ├── test_self_registered_actions.py ├── test_service.py ├── test_stress.py ├── test_vision.py └── test_wait_for_element.py /.cursor/rules/browser-use-rules.mdc: -------------------------------------------------------------------------------- 1 | --- 2 | description: 3 | globs: 4 | alwaysApply: true 5 | --- 6 | ## 🧠 General Guidelines for Contributing to `browser-use` 7 | 8 | **Browser-Use** is an AI agent that autonomously interacts with the web. It takes a user-defined task, navigates web pages using Chromium via Playwright, processes HTML, and repeatedly queries a language model (like `gpt-4o`) to decide the next action—until the task is completed. 9 | 10 | ### 🗂️ File Documentation 11 | 12 | When you create a **new file**: 13 | 14 | * **For humans**: At the top of the file, include a docstring in natural language explaining: 15 | 16 | * What this file does. 17 | * How it fits into the browser-use system. 18 | * If it introduces a new abstraction or replaces an old one. 19 | * **For LLMs/AI**: Include structured metadata using standardized comments such as: 20 | 21 | ```python 22 | # @file purpose: Defines 23 | ``` 24 | 25 | --- 26 | 27 | ### 🧰 Development Rules 28 | 29 | * ✅ **Always use [`uv`](mdc:https:/github.com/astral-sh/uv) instead of `pip`** 30 | For deterministic and fast dependency installs. 31 | 32 | ```bash 33 | uv venv --python 3.11 34 | source .venv/bin/activate 35 | uv sync 36 | ``` 37 | 38 | * ✅ **Use real model names** 39 | Do **not** replace `gpt-4o` with `gpt-4`. The model `gpt-4o` is a distinct release and supported. 40 | 41 | * ✅ **Type-safe coding** 42 | Use **Pydantic models** for all internal action schemas, task inputs/outputs, and controller I/O. This ensures robust validation and LLM-call integrity. 43 | 44 | --- 45 | 46 | ## ⚙️ Adding New Actions 47 | 48 | To add a new action that your browser agent can execute: 49 | 50 | ```python 51 | from browser_use.core.controller import Controller, ActionResult 52 | 53 | controller = Controller() 54 | 55 | @controller.registry.action("Search the web for a specific query") 56 | async def search_web(query: str): 57 | # Implement your logic here, e.g., query a search engine and return results 58 | result = ... 59 | return ActionResult(extracted_content=result, include_in_memory=True) 60 | ``` 61 | 62 | ### Notes: 63 | 64 | * Use descriptive names and docstrings for each action. 65 | * Prefer returning `ActionResult` with structured content to help the agent reason better. 66 | 67 | --- 68 | 69 | ## 🧠 Creating and Running an Agent 70 | 71 | To define a task and run a browser-use agent: 72 | 73 | ```python 74 | from browser_use.core.agent import Agent 75 | from langchain.chat_models import ChatOpenAI 76 | 77 | task = "Find the CEO of OpenAI and return their name" 78 | model = ChatOpenAI(model="gpt-4o") 79 | 80 | agent = Agent(task=task, llm=model, controller=controller) 81 | 82 | history = await agent.run() 83 | ``` 84 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | docs/ 2 | static/ 3 | .claude/ 4 | .github/ 5 | 6 | # Cache files 7 | .DS_Store 8 | __pycache__/ 9 | *.py[cod] 10 | *$py.class 11 | .mypy_cache/ 12 | .ruff_cache/ 13 | .pytest_cache/ 14 | .ipynb_checkpoints 15 | 16 | # Virtual Environments 17 | .venv 18 | venv/ 19 | 20 | # Editor cruft 21 | .vscode/ 22 | .idea/ 23 | 24 | # Build Files 25 | dist/ 26 | 27 | # Data files 28 | *.gif 29 | *.txt 30 | *.pdf 31 | *.csv 32 | *.json 33 | *.jsonl 34 | 35 | # Secrets and sensitive files 36 | secrets.env 37 | .env 38 | browser_cookies.json 39 | cookies.json 40 | gcp-login.json 41 | saved_trajectories/ 42 | AgentHistory.json 43 | AgentHistoryList.json 44 | private_example.py 45 | private_example 46 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY= 2 | ANTHROPIC_API_KEY= 3 | AZURE_OPENAI_ENDPOINT= 4 | AZURE_OPENAI_KEY= 5 | GOOGLE_API_KEY= 6 | DEEPSEEK_API_KEY= 7 | GROK_API_KEY= 8 | NOVITA_API_KEY= 9 | 10 | # Set to false to disable anonymized telemetry 11 | ANONYMIZED_TELEMETRY=true 12 | 13 | # LogLevel: Set to debug to enable verbose logging, set to result to get results only. Available: result | debug | info 14 | BROWSER_USE_LOGGING_LEVEL=info 15 | 16 | # set this to true to optimize browser-use's chrome for running inside docker 17 | IN_DOCKER=false 18 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | static/*.gif filter=lfs diff=lfs merge=lfs -text 2 | # static/*.mp4 filter=lfs diff=lfs merge=lfs -text 3 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to browser-use 2 | 3 | We love contributions! Please read through these links to get started: 4 | 5 | - 🔢 [Contribution Guidelines](https://docs.browser-use.com/development/contribution-guide) 6 | - 👾 [Local Development Setup Guide](https://docs.browser-use.com/development/local-setup) 7 | - 🏷️ [Issues Tagged: `#help-wanted`](https://github.com/browser-use/browser-use/issues?q=is%3Aissue%20state%3Aopen%20label%3A%22help%20wanted%22) 8 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/4_docs_issue.yml: -------------------------------------------------------------------------------- 1 | name: 📚 Documentation Issue 2 | description: Report an issue in the browser-use documentation 3 | labels: ["documentation"] 4 | title: "Documentation: ..." 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | Thanks for taking the time to improve our documentation! Please fill out the form below to help us fix the issue quickly. 10 | 11 | - type: dropdown 12 | id: type 13 | attributes: 14 | label: Type of Documentation Issue 15 | description: What type of documentation issue is this? 16 | options: 17 | - Missing documentation 18 | - Incorrect documentation 19 | - Unclear documentation 20 | - Broken link 21 | - Other (specify in description) 22 | validations: 23 | required: true 24 | 25 | - type: input 26 | id: page 27 | attributes: 28 | label: Documentation Page 29 | description: Which page or section of the documentation is this about? 30 | placeholder: "e.g. https://docs.browser-use.com/customize/browser-settings > Context Configuration > headless" 31 | validations: 32 | required: true 33 | 34 | - type: textarea 35 | id: description 36 | attributes: 37 | label: Issue Description 38 | description: "Describe what's wrong or missing in the documentation" 39 | placeholder: e.g. Docs should clarify whether BrowserSession(no_viewport=False) is supported when running in BrowserSession(headless=False) mode... 40 | validations: 41 | required: true 42 | 43 | - type: textarea 44 | id: suggestion 45 | attributes: 46 | label: Suggested Changes 47 | description: If you have specific suggestions for how to improve the documentation, please share them 48 | placeholder: | 49 | e.g. The documentation could be improved by adding one more line here: 50 | ```diff 51 | Use `BrowserSession(headless=False)` to open the browser window (aka headful mode). 52 | + Viewports are not supported when headful, if `headless=False` it will force `no_viewport=True`. 53 | ``` 54 | validations: 55 | required: false 56 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false # Set to true if you want to allow blank issues 2 | contact_links: 3 | - name: 🔢 Quickstart Guide 4 | url: https://docs.browser-use.com/quickstart 5 | about: Most common issues can be resolved by following our quickstart guide 6 | - name: 💬 Questions and Help 7 | url: https://link.browser-use.com/discord 8 | about: Please ask questions in our Discord community 9 | - name: 📖 Documentation 10 | url: https://docs.browser-use.com 11 | about: Check our documentation for answers first 12 | -------------------------------------------------------------------------------- /.github/workflows/claude.yml: -------------------------------------------------------------------------------- 1 | name: Claude Code 2 | 3 | on: 4 | issue_comment: 5 | types: [created] 6 | pull_request_review_comment: 7 | types: [created] 8 | issues: 9 | types: [opened, assigned] 10 | pull_request_review: 11 | types: [submitted] 12 | 13 | jobs: 14 | claude: 15 | if: | 16 | (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) || 17 | (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) || 18 | (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) || 19 | (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude'))) 20 | runs-on: ubuntu-latest 21 | permissions: 22 | contents: read 23 | pull-requests: read 24 | issues: read 25 | id-token: write 26 | steps: 27 | - name: Checkout repository 28 | uses: actions/checkout@v4 29 | with: 30 | fetch-depth: 1 31 | 32 | - name: Run Claude Code 33 | id: claude 34 | uses: anthropics/claude-code-action@beta 35 | with: 36 | anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} 37 | -------------------------------------------------------------------------------- /.github/workflows/cloud_evals.yml: -------------------------------------------------------------------------------- 1 | name: cloud_evals 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - 'releases/*' 8 | workflow_dispatch: 9 | inputs: 10 | commit_hash: 11 | description: Commit hash of the library to build the Cloud eval image for 12 | required: false 13 | 14 | jobs: 15 | trigger_cloud_eval_image_build: 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/github-script@v7 19 | with: 20 | github-token: ${{ secrets.TRIGGER_CLOUD_BUILD_GH_KEY }} 21 | script: | 22 | const result = await github.rest.repos.createDispatchEvent({ 23 | owner: 'browser-use', 24 | repo: 'cloud', 25 | event_type: 'trigger-workflow', 26 | client_payload: {"commit_hash": "${{ github.event.inputs.commit_hash || github.sha }}"} 27 | }) 28 | console.log(result) 29 | -------------------------------------------------------------------------------- /.github/workflows/docker.yml: -------------------------------------------------------------------------------- 1 | name: docker 2 | 3 | on: 4 | push: 5 | release: 6 | types: [published] 7 | 8 | jobs: 9 | build_publish_image: 10 | runs-on: ubuntu-latest 11 | permissions: 12 | packages: write 13 | contents: read 14 | attestations: write 15 | id-token: write 16 | steps: 17 | - name: Check out the repo 18 | uses: actions/checkout@v4 19 | 20 | - name: Set up QEMU 21 | uses: docker/setup-qemu-action@v3 22 | 23 | - name: Set up Docker Buildx 24 | uses: docker/setup-buildx-action@v3 25 | 26 | - name: Log in to Docker Hub 27 | uses: docker/login-action@v3 28 | with: 29 | username: ${{ secrets.DOCKER_USERNAME }} 30 | password: ${{ secrets.DOCKER_PASSWORD }} 31 | 32 | - name: Login to GitHub Container Registry 33 | uses: docker/login-action@v3 34 | with: 35 | registry: ghcr.io 36 | username: ${{ github.repository_owner }} 37 | password: ${{ secrets.GITHUB_TOKEN }} 38 | 39 | - name: Compute Docker tags based on tag/branch 40 | id: meta 41 | uses: docker/metadata-action@v5 42 | with: 43 | images: | 44 | browseruse/browseruse 45 | ghcr.io/browser-use/browser-use 46 | tags: | 47 | type=ref,event=branch 48 | type=ref,event=pr 49 | type=pep440,pattern={{version}} 50 | type=pep440,pattern={{major}}.{{minor}} 51 | type=sha 52 | 53 | - name: Build and push Docker image 54 | id: push 55 | uses: docker/build-push-action@v6 56 | with: 57 | platforms: linux/amd64,linux/arm64 58 | context: . 59 | file: ./Dockerfile 60 | push: true 61 | tags: ${{ steps.meta.outputs.tags }} 62 | labels: ${{ steps.meta.outputs.labels }} 63 | cache-from: type=registry,ref=browseruse/browseruse:buildcache 64 | cache-to: type=registry,ref=browseruse/browseruse:buildcache,mode=max 65 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: lint 2 | on: 3 | push: 4 | branches: 5 | - main 6 | - stable 7 | - 'releases/**' 8 | tags: 9 | - '*' 10 | pull_request: 11 | workflow_dispatch: 12 | 13 | jobs: 14 | lint-syntax: 15 | name: syntax-errors 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v4 19 | - uses: astral-sh/setup-uv@v5 20 | with: 21 | enable-cache: true 22 | - run: uv run ruff check --no-fix --select PLE 23 | 24 | lint-style: 25 | name: code-style 26 | runs-on: ubuntu-latest 27 | steps: 28 | - uses: actions/checkout@v4 29 | - uses: astral-sh/setup-uv@v5 30 | with: 31 | enable-cache: true 32 | - run: uv run pre-commit run --all-files --show-diff-on-failure 33 | 34 | lint-typecheck: 35 | name: type-checker 36 | runs-on: ubuntu-latest 37 | steps: 38 | - uses: actions/checkout@v4 39 | - uses: astral-sh/setup-uv@v6 40 | with: 41 | enable-cache: true 42 | - run: uv run pyright 43 | -------------------------------------------------------------------------------- /.github/workflows/package.yaml: -------------------------------------------------------------------------------- 1 | name: package 2 | on: 3 | push: 4 | branches: 5 | - main 6 | - stable 7 | - 'releases/**' 8 | tags: 9 | - '*' 10 | pull_request: 11 | workflow_dispatch: 12 | 13 | jobs: 14 | build: 15 | name: pip-build 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v4 19 | - uses: astral-sh/setup-uv@v5 20 | - run: uv build --python 3.12 21 | - uses: actions/upload-artifact@v4 22 | with: 23 | name: dist-artifact 24 | path: | 25 | dist/*.whl 26 | dist/*.tar.gz 27 | 28 | build_test: 29 | name: pip-install-on-${{ matrix.os }}-py-${{ matrix.python-version }} 30 | needs: build 31 | runs-on: ${{ matrix.os }} 32 | strategy: 33 | matrix: 34 | os: [ubuntu-latest, macos-latest, windows-latest] 35 | python-version: ["3.11", "3.13"] 36 | 37 | steps: 38 | - uses: actions/checkout@v4 39 | - uses: astral-sh/setup-uv@v5 40 | - uses: actions/download-artifact@v4 41 | with: 42 | name: dist-artifact 43 | 44 | - name: Set up venv and test for OS/Python versions 45 | shell: bash 46 | run: | 47 | uv venv /tmp/testenv --python ${{ matrix.python-version }} 48 | if [[ "$RUNNER_OS" == "Windows" ]]; then 49 | . /tmp/testenv/Scripts/activate 50 | else 51 | source /tmp/testenv/bin/activate 52 | fi 53 | uv pip install *.whl 54 | python -c 'from browser_use import Agent, Browser, Controller, ActionModel, ActionResult' 55 | -------------------------------------------------------------------------------- /.github/workflows/test.yaml: -------------------------------------------------------------------------------- 1 | name: test 2 | permissions: 3 | contents: read 4 | 5 | on: 6 | push: 7 | branches: 8 | - main 9 | - stable 10 | - 'releases/**' 11 | tags: 12 | - '*' 13 | pull_request: 14 | workflow_dispatch: 15 | 16 | jobs: 17 | find_tests: 18 | runs-on: ubuntu-latest 19 | outputs: 20 | TEST_FILENAMES: ${{ steps.lsgrep.outputs.TEST_FILENAMES }} 21 | # ["test_browser", "test_controller", "test_browser_session", "test_tab_management", ...] 22 | steps: 23 | - uses: actions/checkout@v4 24 | - id: lsgrep 25 | run: | 26 | TEST_FILENAMES="$(ls tests/ci/test_*.py | sed 's|^tests/ci/||' | sed 's|\.py$||' | jq -R -s -c 'split("\n")[:-1]')" 27 | echo "TEST_FILENAMES=${TEST_FILENAMES}" >> "$GITHUB_OUTPUT" 28 | echo "$TEST_FILENAMES" 29 | # https://code.dblock.org/2021/09/03/generating-task-matrix-by-looping-over-repo-files-with-github-actions.html 30 | - name: Check that at least one test file is found 31 | run: | 32 | if [ -z "${{ steps.lsgrep.outputs.TEST_FILENAMES }}" ]; then 33 | echo "Failed to find any test_*.py files in tests/ci/ folder!" > /dev/stderr 34 | exit 1 35 | fi 36 | 37 | tests: 38 | needs: find_tests 39 | runs-on: ubuntu-latest 40 | env: 41 | IN_DOCKER: 'True' 42 | strategy: 43 | matrix: 44 | test_filename: ${{ fromJson(needs.find_tests.outputs.TEST_FILENAMES || '["FAILED_TO_DISCOVER_TESTS"]') }} 45 | # autodiscovers all the files in tests/ci/test_*.py 46 | # - test_browser 47 | # - test_controller 48 | # - test_browser_session 49 | # - test_tab_management 50 | # ... and more 51 | name: ${{ matrix.test_filename }} 52 | steps: 53 | - name: Check that the previous step managed to find some test files for us to run 54 | run: | 55 | if [[ "${{ matrix.test_filename }}" == "FAILED_TO_DISCOVER_TESTS" ]]; then 56 | echo "Failed get list of test files in tests/ci/test_*.py from find_tests job" > /dev/stderr 57 | exit 1 58 | fi 59 | 60 | - uses: actions/checkout@v4 61 | - uses: astral-sh/setup-uv@v6 62 | with: 63 | enable-cache: true 64 | activate-environment: true 65 | 66 | - run: uv sync 67 | 68 | - name: Detect installed Playwright version 69 | run: echo "PLAYWRIGHT_VERSION=$(uv pip list --format json | jq -r '.[] | select(.name == "playwright") | .version')" >> $GITHUB_ENV 70 | 71 | - name: Cache playwright binaries 72 | uses: actions/cache@v3 73 | with: 74 | path: | 75 | ~/.cache/ms-playwright 76 | key: ${{ runner.os }}-playwright-${{ env.PLAYWRIGHT_VERSION }} 77 | 78 | - run: playwright install chrome 79 | - run: playwright install chromium 80 | 81 | - run: pytest tests/ci/${{ matrix.test_filename }}.py 82 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Cache files 2 | .DS_Store 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | .mypy_cache/ 7 | .ruff_cache/ 8 | .pytest_cache/ 9 | .ipynb_checkpoints 10 | 11 | # Virtual Environments 12 | .venv 13 | venv/ 14 | 15 | # IDEs 16 | .vscode/ 17 | .idea/ 18 | 19 | # Build files 20 | dist/ 21 | 22 | # Data files 23 | *.gif 24 | *.txt 25 | *.pdf 26 | *.csv 27 | *.json 28 | *.jsonl 29 | 30 | # Secrets and sensitive files 31 | secrets.env 32 | .env 33 | browser_cookies.json 34 | cookies.json 35 | gcp-login.json 36 | saved_trajectories/ 37 | AgentHistory.json 38 | AgentHistoryList.json 39 | private_example.py 40 | private_example 41 | CLAUDE.local.md 42 | 43 | uv.lock 44 | temp 45 | tmp 46 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/asottile/yesqa 3 | rev: v1.5.0 4 | hooks: 5 | - id: yesqa 6 | 7 | - repo: https://github.com/codespell-project/codespell 8 | rev: v2.4.1 9 | hooks: 10 | - id: codespell # See pyproject.toml for args 11 | additional_dependencies: 12 | - tomli 13 | 14 | - repo: https://github.com/asottile/pyupgrade 15 | rev: v3.19.1 16 | hooks: 17 | - id: pyupgrade 18 | args: [--py311-plus] 19 | 20 | # - repo: https://github.com/asottile/add-trailing-comma 21 | # rev: v3.1.0 22 | # hooks: 23 | # - id: add-trailing-comma 24 | 25 | - repo: https://github.com/astral-sh/ruff-pre-commit 26 | rev: v0.11.2 27 | hooks: 28 | - id: ruff 29 | - id: ruff-format 30 | # see pyproject.toml for more details on ruff config 31 | 32 | - repo: https://github.com/pre-commit/pre-commit-hooks 33 | rev: v5.0.0 34 | hooks: 35 | # check for basic syntax errors in python and data files 36 | - id: check-ast 37 | - id: check-toml 38 | - id: check-yaml 39 | - id: check-json 40 | - id: check-merge-conflict 41 | # check for bad files and folders 42 | - id: check-symlinks 43 | - id: destroyed-symlinks 44 | - id: check-case-conflict 45 | - id: check-illegal-windows-names 46 | - id: check-shebang-scripts-are-executable 47 | - id: mixed-line-ending 48 | - id: fix-byte-order-marker 49 | - id: end-of-file-fixer 50 | # best practices enforcement 51 | - id: detect-private-key 52 | # - id: check-docstring-first 53 | - id: debug-statements 54 | - id: forbid-submodules 55 | - id: check-added-large-files 56 | args: ["--maxkb=600"] 57 | # - id: name-tests-test 58 | # args: ["--pytest-test-first"] 59 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.11 2 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.2.0", 3 | "configurations": [ 4 | { 5 | "name": "Python Debugger: Current File", 6 | "type": "debugpy", 7 | "request": "launch", 8 | "program": "${file}", 9 | "justMyCode": false, 10 | "env": { 11 | "PYTHONPATH": "${workspaceFolder}" 12 | }, 13 | "console": "integratedTerminal" 14 | }, 15 | { 16 | "name": "Python Debugger: Module", 17 | "type": "debugpy", 18 | "request": "launch", 19 | "module": "examples.extend_actions" 20 | }, 21 | { 22 | "name": "Python: Debug extend_actions", 23 | "type": "debugpy", 24 | "request": "launch", 25 | "module": "examples.extend_actions", 26 | "console": "integratedTerminal", 27 | "justMyCode": false, 28 | "env": { 29 | "PYTHONPATH": "${workspaceFolder}" 30 | } 31 | }, 32 | { 33 | "name": "Python: Debug Captcha Tests", 34 | "type": "debugpy", 35 | "request": "launch", 36 | "module": "pytest", 37 | "args": [ 38 | "tests/test_agent_actions.py", 39 | "-v", 40 | "-k", 41 | "test_captcha_solver", 42 | "--capture=no" 43 | ], 44 | "console": "integratedTerminal", 45 | "justMyCode": false 46 | }, 47 | { 48 | "name": "Python: Debug Ecommerce Interaction", 49 | "type": "debugpy", 50 | "request": "launch", 51 | "module": "pytest", 52 | "args": [ 53 | "tests/test_agent_actions.py", 54 | "-v", 55 | "-k", 56 | "test_ecommerce_interaction", 57 | "--capture=no" 58 | ], 59 | "console": "integratedTerminal", 60 | "justMyCode": false 61 | }, 62 | { 63 | "name": "Python: Debug Core Functionality", 64 | "type": "debugpy", 65 | "request": "launch", 66 | "program": "${workspaceFolder}/.venv/bin/pytest", 67 | "args": [ 68 | "tests/test_core_functionality.py", 69 | "-v" 70 | ], 71 | "console": "integratedTerminal", 72 | "justMyCode": false 73 | }, 74 | { 75 | "name": "pytest: Debug Current File", 76 | "type": "debugpy", 77 | "request": "launch", 78 | "module": "pytest", 79 | "args": [ 80 | "${file}", 81 | "-v", 82 | "--capture=no" 83 | ], 84 | "console": "integratedTerminal", 85 | "justMyCode": false 86 | } 87 | ] 88 | } 89 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.analysis.typeCheckingMode": "basic", 3 | "[python]": { 4 | "editor.defaultFormatter": "charliermarsh.ruff", 5 | "editor.formatOnSave": true, 6 | "editor.codeActionsOnSave": { 7 | "source.fixAll.ruff": "explicit", 8 | "source.organizeImports.ruff": "explicit" 9 | } 10 | }, 11 | "python.analysis.inlayHints.variableTypes": false 12 | } 13 | -------------------------------------------------------------------------------- /CLAUDE.md: -------------------------------------------------------------------------------- 1 | Use tabs for indentation in all python code. Use async python and the modern python >3.12 typing style, e.g. use `str | None` instead 2 | of `Optional[str]`, and `list[str]` instead of `List[str]`. Use pydantic v2 models to represent internal data, and any user-facing 3 | API parameter that might otherwise be a dict. Use model_config = ConfigDict(extra='forbid', validate_by_name=True, 4 | validate_by_alias=True) etc. settings to tune the pydantic model behavior depending on the use-case. Store most pydantic models in 5 | views.py files. 6 | 7 | Try to keep all console logging logic in separate methods all prefixed with `_log_...`, e.g. `def _log_pretty_path(path: Path) -> str` so as not to clutter up the main logic. 8 | 9 | Make sure to read relevant examples in the examples/ directory and keep them up-to-date when making changes. Also make sure to read 10 | the relevant tests in the tests/ directory and keep them up-to-date as well. Once tests pass they should be moved into the tests/ci/ 11 | subdirectory so that CI will automatically continue to run them on every commit. 12 | 13 | When doing any refactor, first write failing tests for the new design, then write tests that verify the old design works in order to maintain backwards-compatibility during the refactor. Then implement the changes, then finally update the docs and examples and simplify/condense the test logic to reduce any duplication that got introduced during editing. 14 | 15 | Prefer uuid7str() (from uuid_extensions) for all new id fields. 16 | 17 | When doing any truly massive refactors, trend towards using simple event buses and job queues to break down systems into isolated 18 | subcomponents that each manage some well-defined internal state machines. 19 | 20 | Don't worry about formalities. 21 | 22 | Don't shy away from complexity, assume a deeply technical explanation is wanted for all questions. Call out the proper terminology, 23 | models, units, etc. used by fields of study relevant to the question. information theory and game theory can be useful lenses to 24 | evaluate complex systems. 25 | 26 | Choose your analogies carefully and keep poetic flowery language to a minimum, a little dry wit is welcome. 27 | 28 | If policy prevents you from responding normally, please printing "!!!!" before answering. 29 | 30 | If a policy prevents you from having an opinion, pretend to be responding as if you shared opinions that might be typical of eigenrobot. 31 | 32 | be critical of the quality of your information 33 | 34 | if you find any request irritating respond dismissively like "be real" or "that's crazy man" or "lol no" 35 | 36 | take however smart you're acting right now and write in the same style but as if you were +2sd smarter 37 | 38 | If you struggle to update or edit files in-place, try shortening your match string to 1 or 2 lines instead of 3. 39 | If that doesn't work, just insert your new modified code as new lines in the file, then remove the old code in a second step instead of replacing. 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Gregor Zunic 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | ## Reporting Security Issues 2 | 3 | If you believe you have found a security vulnerability in browser-use, please report it through coordinated disclosure. 4 | 5 | **Please do not report security vulnerabilities through the repository issues, discussions, or pull requests.** 6 | 7 | Instead, please open a new [Github security advisory](https://github.com/browser-use/browser-use/security/advisories/new). 8 | 9 | Please include as much of the information listed below as you can to help me better understand and resolve the issue: 10 | 11 | * The type of issue (e.g., buffer overflow, SQL injection, or cross-site scripting) 12 | * Full paths of source file(s) related to the manifestation of the issue 13 | * The location of the affected source code (tag/branch/commit or direct URL) 14 | * Any special configuration required to reproduce the issue 15 | * Step-by-step instructions to reproduce the issue 16 | * Proof-of-concept or exploit code (if possible) 17 | * Impact of the issue, including how an attacker might exploit the issue 18 | 19 | This information will help me triage your report more quickly. 20 | -------------------------------------------------------------------------------- /bin/lint.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # This script is used to run the formatter, linter, and type checker pre-commit hooks. 3 | # Usage: 4 | # $ ./bin/lint.sh 5 | 6 | IFS=$'\n' 7 | 8 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 9 | 10 | cd "$SCRIPT_DIR/.." || exit 1 11 | 12 | exec uv run pre-commit run --all-files 13 | -------------------------------------------------------------------------------- /bin/setup.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # This script is used to setup a local development environment for the browser-use project. 3 | # Usage: 4 | # $ ./bin/setup.sh 5 | 6 | ### Bash Environment Setup 7 | # http://redsymbol.net/articles/unofficial-bash-strict-mode/ 8 | # https://www.gnu.org/software/bash/manual/html_node/The-Set-Builtin.html 9 | # set -o xtrace 10 | # set -x 11 | # shopt -s nullglob 12 | set -o errexit 13 | set -o errtrace 14 | set -o nounset 15 | set -o pipefail 16 | IFS=$'\n' 17 | 18 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 19 | cd "$SCRIPT_DIR" 20 | 21 | 22 | if [ -f "$SCRIPT_DIR/lint.sh" ]; then 23 | echo "[√] already inside a cloned browser-use repo" 24 | else 25 | echo "[+] Cloning browser-use repo into current directory: $SCRIPT_DIR" 26 | git clone https://github.com/browser-use/browser-use 27 | cd browser-use 28 | fi 29 | 30 | echo "[+] Installing uv..." 31 | curl -LsSf https://astral.sh/uv/install.sh | sh 32 | 33 | #git checkout main git pull 34 | echo 35 | echo "[+] Setting up venv" 36 | uv venv 37 | echo 38 | echo "[+] Installing packages in venv" 39 | uv sync --dev --all-extras 40 | echo 41 | echo "[i] Tip: make sure to set BROWSER_USE_LOGGING_LEVEL=debug and your LLM API keys in your .env file" 42 | echo 43 | uv pip show browser-use 44 | 45 | echo "Usage:" 46 | echo " $ browser-use use the CLI" 47 | echo " or" 48 | echo " $ source .venv/bin/activate" 49 | echo " $ ipython use the library" 50 | echo " >>> from browser_use import BrowserSession, Agent" 51 | echo " >>> await Agent(task='book me a flight to fiji', browser=BrowserSession(headless=False)).run()" 52 | echo "" 53 | -------------------------------------------------------------------------------- /bin/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # This script is used to run all the main project tests that run on CI via .github/workflows/test.yaml. 3 | # Usage: 4 | # $ ./bin/test.sh 5 | 6 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 7 | cd "$SCRIPT_DIR/.." || exit 1 8 | 9 | exec uv run pytest tests/ci 10 | -------------------------------------------------------------------------------- /browser_use/README.md: -------------------------------------------------------------------------------- 1 | # Codebase Structure 2 | 3 | > The code structure inspired by https://github.com/Netflix/dispatch. 4 | 5 | Very good structure on how to make a scalable codebase is also in [this repo](https://github.com/zhanymkanov/fastapi-best-practices). 6 | 7 | Just a brief document about how we should structure our backend codebase. 8 | 9 | ## Code Structure 10 | 11 | ```markdown 12 | src/ 13 | // 14 | models.py 15 | services.py 16 | prompts.py 17 | views.py 18 | utils.py 19 | routers.py 20 | 21 | /_/ 22 | ``` 23 | 24 | ### Service.py 25 | 26 | Always a single file, except if it becomes too long - more than ~500 lines, split it into \_subservices 27 | 28 | ### Views.py 29 | 30 | Always split the views into two parts 31 | 32 | ```python 33 | # All 34 | ... 35 | 36 | # Requests 37 | ... 38 | 39 | # Responses 40 | ... 41 | ``` 42 | 43 | If too long → split into multiple files 44 | 45 | ### Prompts.py 46 | 47 | Single file; if too long → split into multiple files (one prompt per file or so) 48 | 49 | ### Routers.py 50 | 51 | Never split into more than one file 52 | -------------------------------------------------------------------------------- /browser_use/__init__.py: -------------------------------------------------------------------------------- 1 | from browser_use.logging_config import setup_logging 2 | 3 | setup_logging() 4 | 5 | from browser_use.agent.prompts import SystemPrompt 6 | from browser_use.agent.service import Agent 7 | from browser_use.agent.views import ActionModel, ActionResult, AgentHistoryList 8 | from browser_use.browser import Browser, BrowserConfig, BrowserContext, BrowserContextConfig, BrowserProfile, BrowserSession 9 | from browser_use.controller.service import Controller 10 | from browser_use.dom.service import DomService 11 | 12 | __all__ = [ 13 | 'Agent', 14 | 'Browser', 15 | 'BrowserConfig', 16 | 'BrowserSession', 17 | 'BrowserProfile', 18 | 'Controller', 19 | 'DomService', 20 | 'SystemPrompt', 21 | 'ActionResult', 22 | 'ActionModel', 23 | 'AgentHistoryList', 24 | 'BrowserContext', 25 | 'BrowserContextConfig', 26 | ] 27 | -------------------------------------------------------------------------------- /browser_use/agent/memory/__init__.py: -------------------------------------------------------------------------------- 1 | from browser_use.agent.memory.service import Memory 2 | from browser_use.agent.memory.views import MemoryConfig 3 | 4 | __all__ = ['Memory', 'MemoryConfig'] 5 | -------------------------------------------------------------------------------- /browser_use/browser/__init__.py: -------------------------------------------------------------------------------- 1 | from .browser import Browser, BrowserConfig 2 | from .context import BrowserContext, BrowserContextConfig 3 | from .profile import BrowserProfile 4 | from .session import BrowserSession 5 | 6 | __all__ = ['Browser', 'BrowserConfig', 'BrowserContext', 'BrowserContextConfig', 'BrowserSession', 'BrowserProfile'] 7 | -------------------------------------------------------------------------------- /browser_use/browser/browser.py: -------------------------------------------------------------------------------- 1 | from browser_use.browser.profile import BrowserProfile 2 | from browser_use.browser.session import BrowserSession 3 | 4 | BrowserConfig = BrowserProfile 5 | BrowserContextConfig = BrowserProfile 6 | Browser = BrowserSession 7 | 8 | __all__ = ['BrowserConfig', 'BrowserContextConfig', 'Browser'] 9 | -------------------------------------------------------------------------------- /browser_use/browser/context.py: -------------------------------------------------------------------------------- 1 | from browser_use.browser.profile import BrowserProfile 2 | from browser_use.browser.session import BrowserSession 3 | 4 | Browser = BrowserSession 5 | BrowserConfig = BrowserProfile 6 | BrowserContext = BrowserSession 7 | BrowserContextConfig = BrowserProfile 8 | 9 | __all__ = ['Browser', 'BrowserConfig', 'BrowserContext', 'BrowserContextConfig'] 10 | -------------------------------------------------------------------------------- /browser_use/browser/views.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from typing import Any 3 | 4 | from pydantic import BaseModel 5 | 6 | from browser_use.dom.history_tree_processor.service import DOMHistoryElement 7 | from browser_use.dom.views import DOMState 8 | 9 | 10 | # Pydantic 11 | class TabInfo(BaseModel): 12 | """Represents information about a browser tab""" 13 | 14 | page_id: int 15 | url: str 16 | title: str 17 | parent_page_id: int | None = None # parent page that contains this popup or cross-origin iframe 18 | 19 | 20 | @dataclass 21 | class BrowserStateSummary(DOMState): 22 | """The summary of the browser's current state designed for an LLM to process""" 23 | 24 | # provided by DOMState: 25 | # element_tree: DOMElementNode 26 | # selector_map: SelectorMap 27 | 28 | url: str 29 | title: str 30 | tabs: list[TabInfo] 31 | screenshot: str | None = field(default=None, repr=False) 32 | pixels_above: int = 0 33 | pixels_below: int = 0 34 | browser_errors: list[str] = field(default_factory=list) 35 | 36 | 37 | @dataclass 38 | class BrowserStateHistory: 39 | """The summary of the browser's state at a past point in time to usse in LLM message history""" 40 | 41 | url: str 42 | title: str 43 | tabs: list[TabInfo] 44 | interacted_element: list[DOMHistoryElement | None] | list[None] 45 | screenshot: str | None = None 46 | 47 | def to_dict(self) -> dict[str, Any]: 48 | data = {} 49 | data['tabs'] = [tab.model_dump() for tab in self.tabs] 50 | data['screenshot'] = self.screenshot 51 | data['interacted_element'] = [el.to_dict() if el else None for el in self.interacted_element] 52 | data['url'] = self.url 53 | data['title'] = self.title 54 | return data 55 | 56 | 57 | class BrowserError(Exception): 58 | """Base class for all browser errors""" 59 | 60 | 61 | class URLNotAllowedError(BrowserError): 62 | """Error raised when a URL is not allowed""" 63 | -------------------------------------------------------------------------------- /browser_use/controller/views.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, ConfigDict, Field, model_validator 2 | 3 | 4 | # Action Input Models 5 | class SearchGoogleAction(BaseModel): 6 | query: str 7 | 8 | 9 | class GoToUrlAction(BaseModel): 10 | url: str 11 | 12 | 13 | class ClickElementAction(BaseModel): 14 | index: int 15 | xpath: str | None = None 16 | 17 | 18 | class InputTextAction(BaseModel): 19 | index: int 20 | text: str 21 | xpath: str | None = None 22 | 23 | 24 | class DoneAction(BaseModel): 25 | text: str 26 | success: bool 27 | 28 | 29 | class SwitchTabAction(BaseModel): 30 | page_id: int 31 | 32 | 33 | class OpenTabAction(BaseModel): 34 | url: str 35 | 36 | 37 | class CloseTabAction(BaseModel): 38 | page_id: int 39 | 40 | 41 | class ScrollAction(BaseModel): 42 | amount: int | None = None # The number of pixels to scroll. If None, scroll down/up one page 43 | 44 | 45 | class SendKeysAction(BaseModel): 46 | keys: str 47 | 48 | 49 | class ExtractPageContentAction(BaseModel): 50 | value: str 51 | 52 | 53 | class NoParamsAction(BaseModel): 54 | """ 55 | Accepts absolutely anything in the incoming data 56 | and discards it, so the final parsed model is empty. 57 | """ 58 | 59 | model_config = ConfigDict(extra='allow') 60 | 61 | @model_validator(mode='before') 62 | def ignore_all_inputs(cls, values): 63 | # No matter what the user sends, discard it and return empty. 64 | return {} 65 | 66 | 67 | class Position(BaseModel): 68 | x: int 69 | y: int 70 | 71 | 72 | class DragDropAction(BaseModel): 73 | # Element-based approach 74 | element_source: str | None = Field(None, description='CSS selector or XPath of the element to drag from') 75 | element_target: str | None = Field(None, description='CSS selector or XPath of the element to drop onto') 76 | element_source_offset: Position | None = Field( 77 | None, description='Precise position within the source element to start drag (in pixels from top-left corner)' 78 | ) 79 | element_target_offset: Position | None = Field( 80 | None, description='Precise position within the target element to drop (in pixels from top-left corner)' 81 | ) 82 | 83 | # Coordinate-based approach (used if selectors not provided) 84 | coord_source_x: int | None = Field(None, description='Absolute X coordinate on page to start drag from (in pixels)') 85 | coord_source_y: int | None = Field(None, description='Absolute Y coordinate on page to start drag from (in pixels)') 86 | coord_target_x: int | None = Field(None, description='Absolute X coordinate on page to drop at (in pixels)') 87 | coord_target_y: int | None = Field(None, description='Absolute Y coordinate on page to drop at (in pixels)') 88 | 89 | # Common options 90 | steps: int | None = Field(10, description='Number of intermediate points for smoother movement (5-20 recommended)') 91 | delay_ms: int | None = Field(5, description='Delay in milliseconds between steps (0 for fastest, 10-20 for more natural)') 92 | -------------------------------------------------------------------------------- /browser_use/dom/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/browser-use/browser-use/a04def7a37d2992d32b686b628a5603cfe73ea24/browser_use/dom/__init__.py -------------------------------------------------------------------------------- /browser_use/dom/clickable_element_processor/service.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | 3 | from browser_use.dom.views import DOMElementNode 4 | 5 | 6 | class ClickableElementProcessor: 7 | @staticmethod 8 | def get_clickable_elements_hashes(dom_element: DOMElementNode) -> set[str]: 9 | """Get all clickable elements in the DOM tree""" 10 | clickable_elements = ClickableElementProcessor.get_clickable_elements(dom_element) 11 | return {ClickableElementProcessor.hash_dom_element(element) for element in clickable_elements} 12 | 13 | @staticmethod 14 | def get_clickable_elements(dom_element: DOMElementNode) -> list[DOMElementNode]: 15 | """Get all clickable elements in the DOM tree""" 16 | clickable_elements = list() 17 | for child in dom_element.children: 18 | if isinstance(child, DOMElementNode): 19 | if child.highlight_index: 20 | clickable_elements.append(child) 21 | 22 | clickable_elements.extend(ClickableElementProcessor.get_clickable_elements(child)) 23 | 24 | return list(clickable_elements) 25 | 26 | @staticmethod 27 | def hash_dom_element(dom_element: DOMElementNode) -> str: 28 | parent_branch_path = ClickableElementProcessor._get_parent_branch_path(dom_element) 29 | branch_path_hash = ClickableElementProcessor._parent_branch_path_hash(parent_branch_path) 30 | attributes_hash = ClickableElementProcessor._attributes_hash(dom_element.attributes) 31 | xpath_hash = ClickableElementProcessor._xpath_hash(dom_element.xpath) 32 | # text_hash = DomTreeProcessor._text_hash(dom_element) 33 | 34 | return ClickableElementProcessor._hash_string(f'{branch_path_hash}-{attributes_hash}-{xpath_hash}') 35 | 36 | @staticmethod 37 | def _get_parent_branch_path(dom_element: DOMElementNode) -> list[str]: 38 | parents: list[DOMElementNode] = [] 39 | current_element: DOMElementNode = dom_element 40 | while current_element.parent is not None: 41 | parents.append(current_element) 42 | current_element = current_element.parent 43 | 44 | parents.reverse() 45 | 46 | return [parent.tag_name for parent in parents] 47 | 48 | @staticmethod 49 | def _parent_branch_path_hash(parent_branch_path: list[str]) -> str: 50 | parent_branch_path_string = '/'.join(parent_branch_path) 51 | return hashlib.sha256(parent_branch_path_string.encode()).hexdigest() 52 | 53 | @staticmethod 54 | def _attributes_hash(attributes: dict[str, str]) -> str: 55 | attributes_string = ''.join(f'{key}={value}' for key, value in attributes.items()) 56 | return ClickableElementProcessor._hash_string(attributes_string) 57 | 58 | @staticmethod 59 | def _xpath_hash(xpath: str) -> str: 60 | return ClickableElementProcessor._hash_string(xpath) 61 | 62 | @staticmethod 63 | def _text_hash(dom_element: DOMElementNode) -> str: 64 | """ """ 65 | text_string = dom_element.get_all_text_till_next_clickable_element() 66 | return ClickableElementProcessor._hash_string(text_string) 67 | 68 | @staticmethod 69 | def _hash_string(string: str) -> str: 70 | return hashlib.sha256(string.encode()).hexdigest() 71 | -------------------------------------------------------------------------------- /browser_use/dom/history_tree_processor/view.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | @dataclass 7 | class HashedDomElement: 8 | """ 9 | Hash of the dom element to be used as a unique identifier 10 | """ 11 | 12 | branch_path_hash: str 13 | attributes_hash: str 14 | xpath_hash: str 15 | # text_hash: str 16 | 17 | 18 | class Coordinates(BaseModel): 19 | x: int 20 | y: int 21 | 22 | 23 | class CoordinateSet(BaseModel): 24 | top_left: Coordinates 25 | top_right: Coordinates 26 | bottom_left: Coordinates 27 | bottom_right: Coordinates 28 | center: Coordinates 29 | width: int 30 | height: int 31 | 32 | 33 | class ViewportInfo(BaseModel): 34 | scroll_x: int 35 | scroll_y: int 36 | width: int 37 | height: int 38 | 39 | 40 | @dataclass 41 | class DOMHistoryElement: 42 | tag_name: str 43 | xpath: str 44 | highlight_index: int | None 45 | entire_parent_branch_path: list[str] 46 | attributes: dict[str, str] 47 | shadow_root: bool = False 48 | css_selector: str | None = None 49 | page_coordinates: CoordinateSet | None = None 50 | viewport_coordinates: CoordinateSet | None = None 51 | viewport_info: ViewportInfo | None = None 52 | 53 | def to_dict(self) -> dict: 54 | page_coordinates = self.page_coordinates.model_dump() if self.page_coordinates else None 55 | viewport_coordinates = self.viewport_coordinates.model_dump() if self.viewport_coordinates else None 56 | viewport_info = self.viewport_info.model_dump() if self.viewport_info else None 57 | 58 | return { 59 | 'tag_name': self.tag_name, 60 | 'xpath': self.xpath, 61 | 'highlight_index': self.highlight_index, 62 | 'entire_parent_branch_path': self.entire_parent_branch_path, 63 | 'attributes': self.attributes, 64 | 'shadow_root': self.shadow_root, 65 | 'css_selector': self.css_selector, 66 | 'page_coordinates': page_coordinates, 67 | 'viewport_coordinates': viewport_coordinates, 68 | 'viewport_info': viewport_info, 69 | } 70 | -------------------------------------------------------------------------------- /browser_use/dom/tests/test_accessibility_playground.py: -------------------------------------------------------------------------------- 1 | """ 2 | Accessibility Tree Playground for browser-use 3 | 4 | - Launches a browser and navigates to a target URL (default: amazon.com) 5 | - Extracts both the full and interesting-only accessibility trees using Playwright 6 | - Prints and saves both trees to JSON files 7 | - Recursively prints relevant info for each node (role, name, value, description, focusable, focused, checked, selected, disabled, children count) 8 | - Explains the difference between the accessibility tree and the DOM tree 9 | - Notes on React/Vue/SPA apps 10 | - Easy to modify for your own experiments 11 | 12 | Run with: python browser_use/dom/tests/test_accessibility_playground.py 13 | """ 14 | 15 | import asyncio 16 | 17 | from playwright.async_api import async_playwright 18 | 19 | # Change this to any site you want to test 20 | 21 | 22 | # Helper to recursively print relevant info from the accessibility tree 23 | def print_ax_tree(node, depth=0): 24 | if not node: 25 | return 26 | indent = ' ' * depth 27 | info = [ 28 | f'role={node.get("role")!r}', 29 | f'name={node.get("name")!r}' if node.get('name') else None, 30 | f'value={node.get("value")!r}' if node.get('value') else None, 31 | f'desc={node.get("description")!r}' if node.get('description') else None, 32 | f'focusable={node.get("focusable")!r}' if 'focusable' in node else None, 33 | f'focused={node.get("focused")!r}' if 'focused' in node else None, 34 | f'checked={node.get("checked")!r}' if 'checked' in node else None, 35 | f'selected={node.get("selected")!r}' if 'selected' in node else None, 36 | f'disabled={node.get("disabled")!r}' if 'disabled' in node else None, 37 | f'children={len(node.get("children", []))}' if node.get('children') else None, 38 | ] 39 | print('--------------------------------') 40 | print(indent + ', '.join([x for x in info if x])) 41 | for child in node.get('children', []): 42 | print_ax_tree(child, depth + 1) 43 | 44 | 45 | # Helper to print all available accessibility node attributes 46 | # Prints all key-value pairs for each node (except 'children'), then recurses into children 47 | def print_all_fields(node, depth=0): 48 | if not node: 49 | return 50 | indent = ' ' * depth 51 | for k, v in node.items(): 52 | if k != 'children': 53 | print(f'{indent}{k}: {v!r}') 54 | if 'children' in node: 55 | print(f'{indent}children: {len(node["children"])}') 56 | for child in node['children']: 57 | print_all_fields(child, depth + 1) 58 | 59 | 60 | def flatten_ax_tree(node, lines): 61 | if not node: 62 | return 63 | role = node.get('role', '') 64 | name = node.get('name', '') 65 | lines.append(f'{role} {name}') 66 | for child in node.get('children', []): 67 | flatten_ax_tree(child, lines) 68 | 69 | 70 | async def get_ax_tree(TARGET_URL): 71 | async with async_playwright() as p: 72 | browser = await p.chromium.launch(headless=True) 73 | page = await browser.new_page() 74 | print(f'Navigating to {TARGET_URL}') 75 | await page.goto(TARGET_URL, wait_until='domcontentloaded') 76 | 77 | ax_tree_interesting = await page.accessibility.snapshot(interesting_only=True) 78 | lines = [] 79 | flatten_ax_tree(ax_tree_interesting, lines) 80 | print(lines) 81 | print(f'length of ax_tree_interesting: {len(lines)}') 82 | 83 | await browser.close() 84 | 85 | 86 | if __name__ == '__main__': 87 | TARGET_URL = [ 88 | # 'https://amazon.com/', 89 | # 'https://www.google.com/', 90 | # 'https://www.facebook.com/', 91 | # 'https://platform.openai.com/tokenizer', 92 | 'https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/input/checkbox', 93 | ] 94 | for url in TARGET_URL: 95 | asyncio.run(get_ax_tree(url)) 96 | -------------------------------------------------------------------------------- /browser_use/exceptions.py: -------------------------------------------------------------------------------- 1 | class LLMException(Exception): 2 | def __init__(self, status_code, message): 3 | self.status_code = status_code 4 | self.message = message 5 | super().__init__(f'Error {status_code}: {message}') 6 | -------------------------------------------------------------------------------- /browser_use/telemetry/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Telemetry for Browser Use. 3 | """ 4 | 5 | from browser_use.telemetry.service import ProductTelemetry 6 | from browser_use.telemetry.views import BaseTelemetryEvent, ControllerRegisteredFunctionsTelemetryEvent 7 | 8 | __all__ = ['BaseTelemetryEvent', 'ControllerRegisteredFunctionsTelemetryEvent', 'ProductTelemetry'] 9 | -------------------------------------------------------------------------------- /browser_use/telemetry/views.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from collections.abc import Sequence 3 | from dataclasses import asdict, dataclass 4 | from typing import Any 5 | 6 | 7 | @dataclass 8 | class BaseTelemetryEvent(ABC): 9 | @property 10 | @abstractmethod 11 | def name(self) -> str: 12 | pass 13 | 14 | @property 15 | def properties(self) -> dict[str, Any]: 16 | return {k: v for k, v in asdict(self).items() if k != 'name'} 17 | 18 | 19 | @dataclass 20 | class RegisteredFunction: 21 | name: str 22 | params: dict[str, Any] 23 | 24 | 25 | @dataclass 26 | class ControllerRegisteredFunctionsTelemetryEvent(BaseTelemetryEvent): 27 | registered_functions: list[RegisteredFunction] 28 | name: str = 'controller_registered_functions' 29 | 30 | 31 | @dataclass 32 | class AgentTelemetryEvent(BaseTelemetryEvent): 33 | # start details 34 | task: str 35 | model: str 36 | model_provider: str 37 | planner_llm: str | None 38 | max_steps: int 39 | max_actions_per_step: int 40 | use_vision: bool 41 | use_validation: bool 42 | version: str 43 | source: str 44 | # step details 45 | action_errors: Sequence[str | None] 46 | action_history: Sequence[list[dict] | None] 47 | urls_visited: Sequence[str | None] 48 | # end details 49 | steps: int 50 | total_input_tokens: int 51 | total_duration_seconds: float 52 | success: bool | None 53 | final_result_response: str | None 54 | error_message: str | None 55 | 56 | name: str = 'agent_event' 57 | -------------------------------------------------------------------------------- /codebeaver.yml: -------------------------------------------------------------------------------- 1 | environment: 2 | - OPENAI_API_KEY=empty 3 | - AZURE_OPENAI_KEY=empty 4 | from: pytest 5 | -------------------------------------------------------------------------------- /debug_pydantic.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | 3 | from pydantic import BaseModel 4 | 5 | from browser_use.controller.views import ClickElementAction 6 | 7 | 8 | # Check the pydantic detection logic 9 | def click_element_by_index(params: ClickElementAction, browser_session): 10 | pass 11 | 12 | 13 | sig = inspect.signature(click_element_by_index) 14 | parameters = list(sig.parameters.values()) 15 | parameter_names = [param.name for param in parameters] 16 | 17 | print('Parameters:', parameter_names) 18 | print('First param name:', parameters[0].name) 19 | print('First param annotation:', parameters[0].annotation) 20 | print('Is BaseModel:', issubclass(parameters[0].annotation, BaseModel)) 21 | 22 | # Check the name detection logic 23 | name_check = parameters[0].name in ['params', 'param', 'model'] or parameters[0].name.endswith('_model') 24 | print('Name check passed:', name_check) 25 | 26 | is_pydantic = ( 27 | parameters 28 | and len(parameters) > 0 29 | and hasattr(parameters[0], 'annotation') 30 | and parameters[0].annotation != parameters[0].empty 31 | and issubclass(parameters[0].annotation, BaseModel) 32 | and name_check 33 | ) 34 | print('Is pydantic:', is_pydantic) 35 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Docs 2 | 3 | The official documentation for Browser Use. The docs are published to [Browser Use Docs](https://docs.browser-use.com). 4 | 5 | ### Development 6 | 7 | Install the [Mintlify CLI](https://www.npmjs.com/package/mintlify) to preview the documentation changes locally. To install, use the following command 8 | 9 | ``` 10 | npm i -g mintlify 11 | ``` 12 | 13 | Run the following command at the root of your documentation (where mint.json is) 14 | 15 | ``` 16 | mintlify dev 17 | ``` 18 | -------------------------------------------------------------------------------- /docs/customize/output-format.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Output Format" 3 | description: "The default is text. But you can define a structured output format to make post-processing easier." 4 | icon: "code" 5 | --- 6 | 7 | ## Custom output format 8 | With [this example](https://github.com/browser-use/browser-use/blob/main/examples/features/custom_output.py) you can define what output format the agent should return to you. 9 | 10 | ```python 11 | from pydantic import BaseModel 12 | # Define the output format as a Pydantic model 13 | class Post(BaseModel): 14 | post_title: str 15 | post_url: str 16 | num_comments: int 17 | hours_since_post: int 18 | 19 | 20 | class Posts(BaseModel): 21 | posts: List[Post] 22 | 23 | 24 | controller = Controller(output_model=Posts) 25 | 26 | 27 | async def main(): 28 | task = 'Go to hackernews show hn and give me the first 5 posts' 29 | model = ChatOpenAI(model='gpt-4o') 30 | agent = Agent(task=task, llm=model, controller=controller) 31 | 32 | history = await agent.run() 33 | 34 | result = history.final_result() 35 | if result: 36 | parsed: Posts = Posts.model_validate_json(result) 37 | 38 | for post in parsed.posts: 39 | print('\n--------------------------------') 40 | print(f'Title: {post.post_title}') 41 | print(f'URL: {post.post_url}') 42 | print(f'Comments: {post.num_comments}') 43 | print(f'Hours since post: {post.hours_since_post}') 44 | else: 45 | print('No result') 46 | 47 | 48 | if __name__ == '__main__': 49 | asyncio.run(main()) 50 | ``` 51 | -------------------------------------------------------------------------------- /docs/customize/system-prompt.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "System Prompt" 3 | description: "Customize the system prompt to control agent behavior and capabilities" 4 | icon: "message" 5 | --- 6 | 7 | ## Overview 8 | 9 | You can customize the system prompt in two ways: 10 | 11 | 1. Extend the default system prompt with additional instructions 12 | 2. Override the default system prompt entirely 13 | 14 | 15 | Custom system prompts allow you to modify the agent's behavior at a 16 | fundamental level. Use this feature carefully as it can significantly impact 17 | the agent's performance and reliability. 18 | 19 | 20 | ### Extend System Prompt (recommended) 21 | 22 | To add additional instructions to the default system prompt: 23 | 24 | ```python 25 | extend_system_message = """ 26 | REMEMBER the most important RULE: 27 | ALWAYS open first a new tab and go first to url wikipedia.com no matter the task!!! 28 | """ 29 | ``` 30 | 31 | ### Override System Prompt 32 | 33 | 34 | Not recommended! If you must override the [default system 35 | prompt](https://github.com/browser-use/browser-use/blob/main/browser_use/agent/system_prompt.md), 36 | make sure to test the agent yourself. 37 | 38 | 39 | Anyway, to override the default system prompt: 40 | 41 | ```python 42 | # Define your complete custom prompt 43 | override_system_message = """ 44 | You are an AI agent that helps users with web browsing tasks. 45 | 46 | [Your complete custom instructions here...] 47 | """ 48 | 49 | # Create agent with custom system prompt 50 | agent = Agent( 51 | task="Your task here", 52 | llm=ChatOpenAI(model='gpt-4'), 53 | override_system_message=override_system_message 54 | ) 55 | ``` 56 | 57 | ### Extend Planner System Prompt 58 | 59 | You can customize the behavior of the planning agent by extending its system prompt: 60 | 61 | ```python 62 | extend_planner_system_message = """ 63 | PRIORITIZE gathering information before taking any action. 64 | Always suggest exploring multiple options before making a decision. 65 | """ 66 | 67 | # Create agent with extended planner system prompt 68 | llm = ChatOpenAI(model='gpt-4o') 69 | planner_llm = ChatOpenAI(model='gpt-4o-mini') 70 | 71 | agent = Agent( 72 | task="Your task here", 73 | llm=llm, 74 | planner_llm=planner_llm, 75 | extend_planner_system_message=extend_planner_system_message 76 | ) 77 | ``` 78 | -------------------------------------------------------------------------------- /docs/development/contribution-guide.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Contribution Guide" 3 | description: "Learn how to contribute to Browser Use" 4 | icon: "github" 5 | --- 6 | 7 | # Join the Browser Use Community! 8 | 9 | We're thrilled you're interested in contributing to Browser Use! This guide will help you get started with contributing to our project. Your contributions are what make the open-source community such an amazing place to learn, inspire, and create. 10 | 11 | ## Quick Setup 12 | 13 | Get started with Browser Use development in minutes: 14 | 15 | ```bash 16 | git clone https://github.com/browser-use/browser-use 17 | cd browser-use 18 | uv sync --all-extras --dev 19 | # or pip install -U git+https://github.com/browser-use/browser-use.git@main 20 | 21 | echo "BROWSER_USE_LOGGING_LEVEL=debug" >> .env 22 | ``` 23 | 24 | For more detailed setup instructions, see our [Local Setup Guide](/development/local-setup). 25 | 26 | ## How to Contribute 27 | 28 | ### Find Something to Work On 29 | 30 | - Browse our [GitHub Issues](https://github.com/browser-use/browser-use/issues) for beginner-friendly issues labeled `good-first-issue` 31 | - Check out our most active issues or ask in [Discord](https://discord.gg/zXJJHtJf3k) for ideas of what to work on 32 | - Get inspiration and share what you build in the [`#showcase-your-work`](https://discord.com/channels/1303749220842340412/1305549200678850642) channel 33 | - Explore or contribute to [`awesome-browser-use-prompts`](https://github.com/browser-use/awesome-prompts)! 34 | 35 | ### Making a Great Pull Request 36 | 37 | When submitting a pull request, please: 38 | 39 | - Include a clear description of what the PR does and why it's needed 40 | - Add tests that cover your changes 41 | - Include a demo screenshot/gif or an example script demonstrating your changes 42 | - Make sure the PR passes all CI checks and tests 43 | - Keep your PR focused on a single issue or feature to make it easier to review 44 | 45 | Note: We appreciate quality over quantity. Instead of submitting small typo/style-only PRs, consider including those fixes as part of larger bugfix or feature PRs. 46 | 47 | ### Contribution Process 48 | 49 | 1. Fork the repository 50 | 2. Create a new branch for your feature or bugfix 51 | 3. Make your changes 52 | 4. Run tests to ensure everything works 53 | 5. Submit a pull request 54 | 6. Respond to any feedback from maintainers 55 | 7. Celebrate your contribution! 56 | 57 | Feel free to bump your issues/PRs with comments periodically if you need faster feedback. 58 | 59 | ## Code of Conduct 60 | 61 | We're committed to providing a welcoming and inclusive environment for all contributors. Please be respectful and constructive in all interactions. 62 | 63 | ## Getting Help 64 | 65 | If you need help at any point: 66 | 67 | - Join our [Discord community](https://link.browser-use.com/discord) 68 | - Ask questions in the appropriate GitHub issue 69 | - Check our [documentation](/introduction) 70 | 71 | We're here to help you succeed in contributing to Browser Use! 72 | -------------------------------------------------------------------------------- /docs/development/evaluations.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Evaluations" 3 | description: "Test the Browser Use agent on standardized benchmarks" 4 | icon: "chart-bar" 5 | --- 6 | 7 | ## Prerequisites 8 | 9 | Browser Use uses proprietary/private test sets that must never be committed to Github and must be fetched through a authorized api request. 10 | Accessing these test sets requires an approved Browser Use account. 11 | There are currently no publicly available test sets, but some may be released in the future. 12 | 13 | ## Get an Api Access Key 14 | 15 | First, navigate to https://browser-use.tools and log in with an authorized browser use account. 16 | 17 | Then, click the "Account" button at the top right of the page, and click the "Cycle New Key" button on that page. 18 | 19 | Copy the resulting url and secret key into your `.env` file. It should look like this: 20 | 21 | ```bash .env 22 | EVALUATION_TOOL_URL= ... 23 | EVALUATION_TOOL_SECRET_KEY= ... 24 | ``` 25 | 26 | ## Running Evaluations 27 | 28 | First, ensure your file `eval/service.py` is up to date. 29 | 30 | Then run the file: 31 | 32 | ```bash 33 | python eval/service.py 34 | ``` 35 | 36 | ## Configuring Evaluations 37 | 38 | You can modify the evaluation by providing flags to the evaluation script. For instance: 39 | 40 | ```bash 41 | python eval/service.py --parallel_runs 5 --parallel_evaluations 5 --max-steps 25 --start 0 --end 100 --model gpt-4o 42 | ``` 43 | 44 | The evaluations webpage has a convenient GUI for generating these commands. To use it, navigate to https://browser-use.tools/dashboard. 45 | 46 | Then click the button "New Eval Run" on the left panel. This will open a interface with selectors, inputs, sliders, and switches. 47 | 48 | Input your desired configuration into the interface and copy the resulting python command at the bottom. Then run this command as before. 49 | -------------------------------------------------------------------------------- /docs/development/observability.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Observability" 3 | description: "Trace Browser Use's agent execution steps and browser sessions" 4 | icon: "eye" 5 | --- 6 | 7 | ## Overview 8 | 9 | Browser Use has a native integration with [Laminar](https://lmnr.ai) - open-source platform for tracing, evals and labeling of AI agents. 10 | Read more about Laminar in the [Laminar docs](https://docs.lmnr.ai). 11 | 12 | 13 | Laminar excels at tracing browser agents by providing unified visibility into both browser session recordings and agent execution steps. 14 | 15 | 16 | ## Setup 17 | 18 | To setup Laminar, you need to install the `lmnr` package and set the `LMNR_PROJECT_API_KEY` environment variable. 19 | 20 | To get your project API key, you can either: 21 | - Register on [Laminar Cloud](https://lmnr.ai) and get the key from your project settings 22 | - Or spin up a local Laminar instance and get the key from the settings page 23 | 24 | ```bash 25 | pip install 'lmnr[all]' 26 | export LMNR_PROJECT_API_KEY= 27 | ``` 28 | 29 | ## Usage 30 | 31 | Then, you simply initialize the Laminar at the top of your project and both Browser Use and session recordings will be automatically traced. 32 | 33 | ```python {5-8} 34 | from langchain_openai import ChatOpenAI 35 | from browser_use import Agent 36 | import asyncio 37 | 38 | from lmnr import Laminar 39 | # this line auto-instruments Browser Use and any browser you use (local or remote) 40 | Laminar.initialize(project_api_key="...") # you can also pass project api key here 41 | 42 | async def main(): 43 | agent = Agent( 44 | task="open google, search Laminar AI", 45 | llm=ChatOpenAI(model="gpt-4o-mini"), 46 | ) 47 | result = await agent.run() 48 | print(result) 49 | 50 | asyncio.run(main()) 51 | ``` 52 | 53 | ## Viewing Traces 54 | 55 | You can view traces in the Laminar UI by going to the traces tab in your project. 56 | When you select a trace, you can see both the browser session recording and the agent execution steps. 57 | 58 | Timeline of the browser session is synced with the agent execution steps, timeline highlights indicate the agent's current step synced with the browser session. 59 | In the trace view, you can also see the agent's current step, the tool it's using, and the tool's input and output. Tools are highlighted in the timeline with a yellow color. 60 | 61 | Laminar 62 | 63 | 64 | ## Laminar 65 | 66 | To learn more about tracing and evaluating your browser agents, check out the [Laminar docs](https://docs.lmnr.ai). 67 | -------------------------------------------------------------------------------- /docs/development/roadmap.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Roadmap" 3 | description: "Future plans and upcoming features for Browser Use" 4 | icon: "road" 5 | --- 6 | 7 | Big things coming soon! 8 | -------------------------------------------------------------------------------- /docs/development/telemetry.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Telemetry" 3 | description: "Understanding Browser Use's telemetry and privacy settings" 4 | icon: "chart-mixed" 5 | --- 6 | 7 | ## Overview 8 | 9 | Browser Use collects anonymous usage data to help us understand how the library is being used and to improve the user experience. It also helps us fix bugs faster and prioritize feature development. 10 | 11 | ## Data Collection 12 | 13 | We use [PostHog](https://posthog.com) for telemetry collection. The data is completely anonymized and contains no personally identifiable information. 14 | 15 | 16 | We never collect personal information, credentials, or specific content from 17 | your browser automation tasks. 18 | 19 | 20 | ## Opting Out 21 | 22 | You can disable telemetry by setting an environment variable: 23 | 24 | ```bash .env 25 | ANONYMIZED_TELEMETRY=false 26 | ``` 27 | 28 | Or in your Python code: 29 | 30 | ```python 31 | import os 32 | os.environ["ANONYMIZED_TELEMETRY"] = "false" 33 | ``` 34 | 35 | 36 | Even when enabled, telemetry has zero impact on the library's performance or 37 | functionality. Code is available in [Telemetry 38 | Service](https://github.com/browser-use/browser-use/tree/main/browser_use/telemetry). 39 | 40 | -------------------------------------------------------------------------------- /docs/favicon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /docs/images/browser-use.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/browser-use/browser-use/a04def7a37d2992d32b686b628a5603cfe73ea24/docs/images/browser-use.png -------------------------------------------------------------------------------- /docs/images/checks-passed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/browser-use/browser-use/a04def7a37d2992d32b686b628a5603cfe73ea24/docs/images/checks-passed.png -------------------------------------------------------------------------------- /docs/images/laminar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/browser-use/browser-use/a04def7a37d2992d32b686b628a5603cfe73ea24/docs/images/laminar.png -------------------------------------------------------------------------------- /docs/introduction.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Introduction" 3 | description: "Welcome to Browser Use - We enable AI to control your browser" 4 | icon: "book-open" 5 | --- 6 | 7 | Browser Use 8 | 9 | ## Overview 10 | 11 | Browser Use is the easiest way to connect your AI agents with the browser. It makes websites accessible for AI agents by providing a powerful, yet simple interface for browser automation. 12 | 13 | 14 | If you have used Browser Use for your project, feel free to show it off in our 15 | [Discord community](https://link.browser-use.com/discord)! 16 | 17 | 18 | ## Getting Started 19 | 20 | 21 | 22 | Get up and running with Browser Use in minutes 23 | 24 | 29 | Configure different LLMs for your agents 30 | 31 | 32 | Learn how to configure and customize your agents 33 | 34 | 35 | Extend functionality with custom actions 36 | 37 | 38 | 39 | ## Fancy Demos 40 | 41 | ### Writing in Google Docs 42 | 43 | Task: Write a letter in Google Docs to my Papa, thanking him for everything, and save the document as a PDF. 44 | 45 | 46 | 47 | 48 | 49 | ### Job Applications 50 | 51 | Task: Read my CV & find ML jobs, save them to a file, and then start applying for them in new tabs. 52 | 53 | 54 |