├── .cursor
    └── rules
    │   └── browser-use-rules.mdc
├── .dockerignore
├── .env.example
├── .gitattributes
├── .github
    ├── .git-blame-ignore-revs
    ├── CONTRIBUTING.md
    ├── ISSUE_TEMPLATE
    │   ├── 1_element_detection_bug.yml
    │   ├── 2_bug_report.yml
    │   ├── 3_feature_request.yml
    │   ├── 4_docs_issue.yml
    │   └── config.yml
    ├── SECURITY.md
    └── workflows
    │   ├── build-base-image.yml.disabled
    │   ├── claude.yml
    │   ├── cloud_evals.yml
    │   ├── docker.yml
    │   ├── lint.yml
    │   ├── package.yaml
    │   ├── publish.yml
    │   └── test.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── .python-version
├── CLAUDE.md
├── Dockerfile
├── Dockerfile.fast
├── LICENSE
├── README.md
├── bin
    ├── lint.sh
    ├── setup.sh
    └── test.sh
├── browser_use
    ├── README.md
    ├── __init__.py
    ├── agent
    │   ├── cloud_events.py
    │   ├── gif.py
    │   ├── message_manager
    │   │   ├── service.py
    │   │   ├── utils.py
    │   │   └── views.py
    │   ├── prompts.py
    │   ├── service.py
    │   ├── system_prompt.md
    │   ├── system_prompt_flash.md
    │   ├── system_prompt_no_thinking.md
    │   └── views.py
    ├── browser
    │   ├── __init__.py
    │   ├── browser.py
    │   ├── context.py
    │   ├── extensions.py
    │   ├── profile.py
    │   ├── session.py
    │   ├── types.py
    │   ├── utils.py
    │   └── views.py
    ├── cli.py
    ├── config.py
    ├── controller
    │   ├── registry
    │   │   ├── service.py
    │   │   └── views.py
    │   ├── service.py
    │   └── views.py
    ├── dom
    │   ├── __init__.py
    │   ├── clickable_element_processor
    │   │   └── service.py
    │   ├── dom_tree
    │   │   └── index.js
    │   ├── history_tree_processor
    │   │   ├── service.py
    │   │   └── view.py
    │   ├── playground
    │   │   ├── extraction.py
    │   │   ├── process_dom.py
    │   │   └── test_accessibility.py
    │   ├── service.py
    │   ├── utils.py
    │   └── views.py
    ├── exceptions.py
    ├── filesystem
    │   ├── __init__.py
    │   └── file_system.py
    ├── integrations
    │   └── gmail
    │   │   ├── __init__.py
    │   │   ├── actions.py
    │   │   └── service.py
    ├── llm
    │   ├── README.md
    │   ├── __init__.py
    │   ├── anthropic
    │   │   ├── chat.py
    │   │   └── serializer.py
    │   ├── aws
    │   │   ├── __init__.py
    │   │   ├── chat_anthropic.py
    │   │   ├── chat_bedrock.py
    │   │   └── serializer.py
    │   ├── azure
    │   │   └── chat.py
    │   ├── base.py
    │   ├── deepseek
    │   │   ├── chat.py
    │   │   └── serializer.py
    │   ├── exceptions.py
    │   ├── google
    │   │   ├── __init__.py
    │   │   ├── chat.py
    │   │   └── serializer.py
    │   ├── groq
    │   │   ├── chat.py
    │   │   ├── parser.py
    │   │   └── serializer.py
    │   ├── messages.py
    │   ├── ollama
    │   │   ├── chat.py
    │   │   └── serializer.py
    │   ├── openai
    │   │   ├── chat.py
    │   │   ├── like.py
    │   │   └── serializer.py
    │   ├── openrouter
    │   │   ├── chat.py
    │   │   └── serializer.py
    │   ├── schema.py
    │   ├── tests
    │   │   ├── test_anthropic_cache.py
    │   │   ├── test_chat_models.py
    │   │   ├── test_gemini_image.py
    │   │   ├── test_groq_loop.py
    │   │   └── test_single_step.py
    │   └── views.py
    ├── logging_config.py
    ├── mcp
    │   ├── .dxtignore
    │   ├── __init__.py
    │   ├── __main__.py
    │   ├── client.py
    │   ├── controller.py
    │   ├── manifest.json
    │   └── server.py
    ├── observability.py
    ├── py.typed
    ├── screenshots
    │   ├── __init__.py
    │   └── service.py
    ├── sync
    │   ├── __init__.py
    │   ├── auth.py
    │   └── service.py
    ├── telemetry
    │   ├── __init__.py
    │   ├── service.py
    │   └── views.py
    ├── tokens
    │   ├── __init__.py
    │   ├── service.py
    │   ├── tests
    │   │   └── test_cost.py
    │   └── views.py
    └── utils.py
├── docker
    ├── README.md
    ├── base-images
    │   ├── chromium
    │   │   └── Dockerfile
    │   ├── python-deps
    │   │   └── Dockerfile
    │   └── system
    │   │   └── Dockerfile
    └── build-base-images.sh
├── docs
    ├── README.md
    ├── api-reference
    │   ├── check-balance.mdx
    │   ├── create-browser-profile.mdx
    │   ├── create-scheduled-task.mdx
    │   ├── delete-browser-profile.mdx
    │   ├── delete-scheduled-task.mdx
    │   ├── get-browser-profile.mdx
    │   ├── get-scheduled-task.mdx
    │   ├── get-task-media.mdx
    │   ├── get-task-output-file.mdx
    │   ├── get-task-screenshots.mdx
    │   ├── get-task-status.mdx
    │   ├── get-task.mdx
    │   ├── index.mdx
    │   ├── list-browser-profiles.mdx
    │   ├── list-scheduled-tasks.mdx
    │   ├── list-tasks.mdx
    │   ├── pause-task.mdx
    │   ├── ping.mdx
    │   ├── resume-task.mdx
    │   ├── run-task.mdx
    │   ├── search-url.mdx
    │   ├── simple-search.mdx
    │   ├── stop-task.mdx
    │   ├── update-browser-profile.mdx
    │   ├── update-scheduled-task.mdx
    │   ├── upload-file-presigned-url.mdx
    │   └── user.mdx
    ├── cli.mdx
    ├── cloud
    │   ├── authentication.mdx
    │   ├── custom-sdk.mdx
    │   ├── implementation.mdx
    │   ├── n8n-browser-use-integration.mdx
    │   ├── quickstart.mdx
    │   ├── search.mdx
    │   └── webhooks.mdx
    ├── customize
    │   ├── agent-settings.mdx
    │   ├── browser-settings.mdx
    │   ├── custom-functions.mdx
    │   ├── hooks.mdx
    │   ├── mcp-client.mdx
    │   ├── mcp-server.mdx
    │   ├── output-format.mdx
    │   ├── real-browser.mdx
    │   ├── sensitive-data.mdx
    │   ├── supported-models.mdx
    │   └── system-prompt.mdx
    ├── development.mdx
    ├── development
    │   ├── contribution-guide.mdx
    │   ├── evaluations.mdx
    │   ├── local-setup.mdx
    │   ├── n8n-integration.mdx
    │   ├── observability.mdx
    │   ├── roadmap.mdx
    │   └── telemetry.mdx
    ├── docs.json
    ├── favicon.ico
    ├── favicon.svg
    ├── images
    │   ├── browser-use.png
    │   ├── checks-passed.png
    │   └── laminar.png
    ├── introduction.mdx
    ├── logo
    │   ├── dark.svg
    │   └── light.svg
    └── quickstart.mdx
├── examples
    ├── __init__.py
    ├── browser
    │   ├── multiple_agents_same_browser.py
    │   ├── real_browser.py
    │   ├── stealth.py
    │   ├── using_cdp.py
    │   └── window_sizing.py
    ├── custom-functions
    │   ├── 2fa.py
    │   ├── action_filters.py
    │   ├── advanced_search.py
    │   ├── clipboard.py
    │   ├── cua.py
    │   ├── custom_hooks_before_after_step.py
    │   ├── drag_and_drop.py
    │   ├── extract_pdf_content.py
    │   ├── file_upload.py
    │   ├── hover_element.py
    │   ├── notification.py
    │   ├── onepassword_2fa.py
    │   ├── perplexity_search.py
    │   ├── save_pdf.py
    │   ├── save_to_file_hugging_face.py
    │   └── solve_amazon_captcha.py
    ├── features
    │   ├── click_fallback_options.py
    │   ├── cross_origin_iframes.py
    │   ├── custom_output.py
    │   ├── custom_system_prompt.py
    │   ├── custom_user_agent.py
    │   ├── download_file.py
    │   ├── drag_drop.py
    │   ├── follow_up_tasks.py
    │   ├── initial_actions.py
    │   ├── multi-tab_handling.py
    │   ├── multiple_tasks.py
    │   ├── outsource_state.py
    │   ├── parallel_agents.py
    │   ├── pause_agent.py
    │   ├── planner.py
    │   ├── restrict_urls.py
    │   ├── result_processing.py
    │   ├── save_trace.py
    │   ├── sensitive_data.py
    │   ├── small_model_for_extraction.py
    │   └── validate_output.py
    ├── file_system
    │   ├── alphabet_earnings.py
    │   ├── excel_sheet.py
    │   └── file_system.py
    ├── getting_started
    │   ├── 01_basic_search.py
    │   ├── 02_form_filling.py
    │   ├── 03_data_extraction.py
    │   └── 04_multi_step_task.py
    ├── integrations
    │   ├── browserbase_stagehand.py
    │   ├── discord
    │   │   ├── discord_api.py
    │   │   └── discord_example.py
    │   ├── gmail_2fa_integration.py
    │   └── slack
    │   │   ├── README.md
    │   │   ├── slack_api.py
    │   │   └── slack_example.py
    ├── mcp
    │   ├── advanced_client.py
    │   ├── advanced_server.py
    │   ├── simple_client.py
    │   └── simple_server.py
    ├── models
    │   ├── README.md
    │   ├── aws.py
    │   ├── azure_openai.py
    │   ├── claude-4-sonnet.py
    │   ├── deepseek-chat.py
    │   ├── gemini.py
    │   ├── gpt-4.1.py
    │   ├── langchain
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── chat.py
    │   │   ├── example.py
    │   │   └── serializer.py
    │   ├── llama4-groq.py
    │   ├── novita.py
    │   └── openrouter.py
    ├── search
    │   ├── search_url.py
    │   └── simple_search.py
    ├── simple.py
    ├── ui
    │   ├── README.md
    │   ├── command_line.py
    │   ├── gradio_demo.py
    │   └── streamlit_demo.py
    └── use-cases
    │   ├── README.md
    │   ├── captcha.py
    │   ├── check_appointment.py
    │   ├── find_and_apply_to_jobs.py
    │   ├── find_influencer_profiles.py
    │   ├── google_sheets.py
    │   ├── online_coding_agent.py
    │   ├── play_chess.py
    │   ├── post-twitter.py
    │   ├── scrolling_page.py
    │   ├── shopping.py
    │   ├── test_cv.txt
    │   ├── twitter_cookies.txt
    │   ├── twitter_post_using_cookies.py
    │   ├── web_voyager_agent.py
    │   └── wikipedia_banana_to_quantum.py
├── pyproject.toml
├── static
    ├── browser-use-dark.png
    └── browser-use.png
└── tests
    ├── agent_tasks
        ├── README.md
        ├── amazon_laptop.yaml
        ├── browser_use_pip.yaml
        └── captcha_cloudflare.yaml
    ├── ci
        ├── conftest.py
        ├── evaluate_tasks.py
        ├── test_action_parameter_injection.py
        ├── test_agent_multiprocessing.py
        ├── test_agent_sensitive_data.py
        ├── test_anthropic_502_error.py
        ├── test_aria_menu_dropdown.py
        ├── test_browser_session_allowed_domains.py
        ├── test_browser_session_cookies.py
        ├── test_browser_session_crashed_page_recovery.py
        ├── test_browser_session_downloads.py
        ├── test_browser_session_element_cache.py
        ├── test_browser_session_file_uploads.py
        ├── test_browser_session_output_paths.py
        ├── test_browser_session_ownership.py
        ├── test_browser_session_reuse.py
        ├── test_browser_session_screenshots.py
        ├── test_browser_session_start.py
        ├── test_browser_session_tab_management.py
        ├── test_browser_session_via_cdp.py
        ├── test_browser_session_viewport_and_proxy.py
        ├── test_config.py
        ├── test_config_new.py
        ├── test_controller.py
        ├── test_custom_structured_ouput.py
        ├── test_dom_service_chrome_urls.py
        ├── test_filesystem.py
        ├── test_fill_fallback.py
        ├── test_gemini_type_field_fix.py
        ├── test_gif_filtering.py
        ├── test_gif_generation_with_navigation.py
        ├── test_mcp_client.py
        ├── test_mcp_server.py
        ├── test_registry.py
        ├── test_schema_optimizer.py
        ├── test_semaphores.py
        ├── test_sequential_agents_simple.py
        ├── test_sync_agent_events.py
        ├── test_sync_client.py
        ├── test_sync_client_auth.py
        └── test_telemetry.py
    ├── mind2web_data
        └── processed.json
    └── old
        ├── httpx_client_test.py
        ├── screenshot_test.py
        ├── sync_live.py
        ├── test_action_filters.py
        ├── test_agent_actions.py
        ├── test_clicks.py
        ├── test_core_functionality.py
        ├── test_cross_origin_iframe_unified_tree.py
        ├── test_dropdown.py
        ├── test_dropdown_complex.py
        ├── test_dropdown_error.py
        ├── test_full_screen.py
        ├── test_gif_path.py
        ├── test_google_sheets_real.py
        ├── test_mind2web.py
        ├── test_react_dropdown.py
        ├── test_self_registered_actions.py
        ├── test_tool_calling_methods.py
        ├── test_vision.py
        └── test_wait_for_element.py


/.dockerignore:
--------------------------------------------------------------------------------
 1 | docs/
 2 | static/
 3 | .claude/
 4 | .github/
 5 | 
 6 | # Cache files
 7 | .DS_Store
 8 | __pycache__/
 9 | *.py[cod]
10 | *$py.class
11 | .mypy_cache/
12 | .ruff_cache/
13 | .pytest_cache/
14 | .ipynb_checkpoints
15 | 
16 | # Virtual Environments
17 | .venv
18 | venv/
19 | 
20 | # Editor cruft
21 | .vscode/
22 | .idea/
23 | 
24 | # Build Files
25 | dist/
26 | 
27 | # Data files
28 | *.gif
29 | *.txt
30 | *.pdf
31 | *.csv
32 | *.json
33 | *.jsonl
34 | *.bak
35 | 
36 | # Secrets and sensitive files
37 | secrets.env
38 | .env
39 | browser_cookies.json
40 | cookies.json
41 | gcp-login.json
42 | saved_trajectories/
43 | AgentHistory.json
44 | AgentHistoryList.json
45 | private_example.py
46 | private_example
47 | 


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
 1 | OPENAI_API_KEY=
 2 | ANTHROPIC_API_KEY=
 3 | AZURE_OPENAI_ENDPOINT=
 4 | AZURE_OPENAI_API_KEY=
 5 | GOOGLE_API_KEY=
 6 | DEEPSEEK_API_KEY=
 7 | GROK_API_KEY=
 8 | NOVITA_API_KEY=
 9 | 
10 | # Set to false to disable anonymized telemetry
11 | ANONYMIZED_TELEMETRY=true
12 | 
13 | # LogLevel: Set to debug to enable verbose logging, set to result to get results only. Available: result | debug | info
14 | BROWSER_USE_LOGGING_LEVEL=info
15 | 
16 | # Calculate costs: (beta) Add cost calculations to tokens. Available: true | false
17 | BROWSER_USE_CALCULATE_COST=false
18 | 
19 | # set this to true to optimize browser-use's chrome for running inside docker
20 | IN_DOCKER=false
21 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | static/*.gif filter=lfs diff=lfs merge=lfs -text
2 | # static/*.mp4 filter=lfs diff=lfs merge=lfs -text
3 | 


--------------------------------------------------------------------------------
/.github/.git-blame-ignore-revs:
--------------------------------------------------------------------------------
1 | 66b3c26df51adec32d42c3b2c0304e0662457298
2 | 2be4ba4f7078d47bbeed04baf6f8fb04017df028
3 | 


--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to browser-use
2 | 
3 | We love contributions! Please read through these links to get started:
4 | 
5 |  - 🔢 [Contribution Guidelines](https://docs.browser-use.com/development/contribution-guide)
6 |  - 👾 [Local Development Setup Guide](https://docs.browser-use.com/development/local-setup)
7 |  - 🏷️ [Issues Tagged: `#help-wanted`](https://github.com/browser-use/browser-use/issues?q=is%3Aissue%20state%3Aopen%20label%3A%22help%20wanted%22)
8 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/4_docs_issue.yml:
--------------------------------------------------------------------------------
 1 | name: 📚 Documentation Issue
 2 | description: Report an issue in the browser-use documentation
 3 | labels: ["documentation"]
 4 | title: "Documentation: ..."
 5 | body:
 6 |   - type: markdown
 7 |     attributes:
 8 |       value: |
 9 |         Thanks for taking the time to improve our documentation! Please fill out the form below to help us fix the issue quickly.
10 | 
11 |   - type: dropdown
12 |     id: type
13 |     attributes:
14 |       label: Type of Documentation Issue
15 |       description: What type of documentation issue is this?
16 |       options:
17 |         - Missing documentation
18 |         - Incorrect documentation
19 |         - Unclear documentation
20 |         - Broken link
21 |         - Other (specify in description)
22 |     validations:
23 |       required: true
24 | 
25 |   - type: input
26 |     id: page
27 |     attributes:
28 |       label: Documentation Page
29 |       description: Which page or section of the documentation is this about?
30 |       placeholder: "e.g. https://docs.browser-use.com/customize/browser-settings > Context Configuration > headless"
31 |     validations:
32 |       required: true
33 | 
34 |   - type: textarea
35 |     id: description
36 |     attributes:
37 |       label: Issue Description
38 |       description: "Describe what's wrong or missing in the documentation"
39 |       placeholder: e.g. Docs should clarify whether BrowserSession(no_viewport=False) is supported when running in BrowserSession(headless=False) mode...
40 |     validations:
41 |       required: true
42 | 
43 |   - type: textarea
44 |     id: suggestion
45 |     attributes:
46 |       label: Suggested Changes
47 |       description: If you have specific suggestions for how to improve the documentation, please share them
48 |       placeholder: |
49 |         e.g. The documentation could be improved by adding one more line here:
50 |         ```diff
51 |         Use `BrowserSession(headless=False)` to open the browser window (aka headful mode).
52 |         + Viewports are not supported when headful, if `headless=False` it will force `no_viewport=True`.
53 |         ```
54 |     validations:
55 |       required: false
56 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
 1 | blank_issues_enabled: false  # Set to true if you want to allow blank issues
 2 | contact_links:
 3 |   - name: 🔢 Quickstart Guide
 4 |     url: https://docs.browser-use.com/quickstart
 5 |     about: Most common issues can be resolved by following our quickstart guide
 6 |   - name: 💬 Questions and Help
 7 |     url: https://link.browser-use.com/discord
 8 |     about: Please ask questions in our Discord community
 9 |   - name: 📖 Documentation
10 |     url: https://docs.browser-use.com
11 |     about: Check our documentation for answers first
12 | 


--------------------------------------------------------------------------------
/.github/SECURITY.md:
--------------------------------------------------------------------------------
 1 | ## Reporting Security Issues
 2 | 
 3 | If you believe you have found a security vulnerability in browser-use, please report it through coordinated disclosure.
 4 | 
 5 | **Please do not report security vulnerabilities through the repository issues, discussions, or pull requests.**
 6 | 
 7 | Instead, please open a new [Github security advisory](https://github.com/browser-use/browser-use/security/advisories/new).
 8 | 
 9 | Please include as much of the information listed below as you can to help me better understand and resolve the issue:
10 | 
11 | * The type of issue (e.g., buffer overflow, SQL injection, or cross-site scripting)
12 | * Full paths of source file(s) related to the manifestation of the issue
13 | * The location of the affected source code (tag/branch/commit or direct URL)
14 | * Any special configuration required to reproduce the issue
15 | * Step-by-step instructions to reproduce the issue
16 | * Proof-of-concept or exploit code (if possible)
17 | * Impact of the issue, including how an attacker might exploit the issue
18 | 
19 | This information will help me triage your report more quickly.
20 | 


--------------------------------------------------------------------------------
/.github/workflows/build-base-image.yml.disabled:
--------------------------------------------------------------------------------
 1 | name: Build Base Image
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron: '0 2 * * 1'  # Weekly on Monday
 6 |   workflow_dispatch:
 7 |   push:
 8 |     paths:
 9 |       - 'Dockerfile.base'
10 | 
11 | jobs:
12 |   build-base:
13 |     runs-on: ubuntu-latest
14 |     strategy:
15 |       matrix:
16 |         platform: [linux/amd64, linux/arm64]
17 |     steps:
18 |       - uses: actions/checkout@v4
19 |       
20 |       - name: Set up QEMU
21 |         uses: docker/setup-qemu-action@v3
22 |         
23 |       - name: Set up Docker Buildx
24 |         uses: docker/setup-buildx-action@v3
25 |         
26 |       - name: Login to Docker Hub
27 |         uses: docker/login-action@v3
28 |         with:
29 |           username: ${{ secrets.DOCKER_USERNAME }}
30 |           password: ${{ secrets.DOCKER_PASSWORD }}
31 |           
32 |       - name: Build and push base image
33 |         uses: docker/build-push-action@v5
34 |         with:
35 |           context: .
36 |           file: ./Dockerfile.base
37 |           platforms: ${{ matrix.platform }}
38 |           push: true
39 |           tags: |
40 |             browseruse/browseruse-base:chromium-138-${{ matrix.platform == 'linux/amd64' && 'amd64' || 'arm64' }}
41 |             browseruse/browseruse-base:latest-${{ matrix.platform == 'linux/amd64' && 'amd64' || 'arm64' }}
42 |           cache-from: type=registry,ref=browseruse/browseruse-base:buildcache-${{ matrix.platform == 'linux/amd64' && 'amd64' || 'arm64' }}
43 |           cache-to: type=registry,ref=browseruse/browseruse-base:buildcache-${{ matrix.platform == 'linux/amd64' && 'amd64' || 'arm64' }},mode=max
44 | 


--------------------------------------------------------------------------------
/.github/workflows/cloud_evals.yml:
--------------------------------------------------------------------------------
 1 | name: cloud_evals
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |       - 'releases/*'
 8 |   workflow_dispatch:
 9 |     inputs:
10 |       commit_hash:
11 |         description: Commit hash of the library to build the Cloud eval image for
12 |         required: false
13 | 
14 | jobs:
15 |   trigger_cloud_eval_image_build:
16 |     runs-on: ubuntu-latest
17 |     steps:
18 |       - uses: actions/github-script@v7
19 |         with:
20 |           github-token: ${{ secrets.TRIGGER_CLOUD_BUILD_GH_KEY }}
21 |           script: |
22 |             const result = await github.rest.repos.createDispatchEvent({
23 |               owner: 'browser-use',
24 |               repo: 'cloud',
25 |               event_type: 'trigger-workflow',
26 |               client_payload: {"commit_hash": "${{ github.event.inputs.commit_hash || github.sha }}"}
27 |             })
28 |             console.log(result)
29 | 


--------------------------------------------------------------------------------
/.github/workflows/docker.yml:
--------------------------------------------------------------------------------
 1 | name: docker
 2 | 
 3 | on:
 4 |   push:
 5 |   release:
 6 |     types: [published]
 7 | 
 8 | jobs:
 9 |   build_publish_image:
10 |     runs-on: ubuntu-latest
11 |     permissions:
12 |       packages: write
13 |       contents: read
14 |       attestations: write
15 |       id-token: write
16 |     steps:
17 |       - name: Check out the repo
18 |         uses: actions/checkout@v4
19 | 
20 |       - name: Set up QEMU
21 |         uses: docker/setup-qemu-action@v3
22 | 
23 |       - name: Set up Docker Buildx
24 |         uses: docker/setup-buildx-action@v3
25 | 
26 |       - name: Log in to Docker Hub
27 |         uses: docker/login-action@v3
28 |         with:
29 |           username: ${{ secrets.DOCKER_USERNAME }}
30 |           password: ${{ secrets.DOCKER_PASSWORD }}
31 | 
32 |       - name: Login to GitHub Container Registry
33 |         uses: docker/login-action@v3
34 |         with:
35 |           registry: ghcr.io
36 |           username: ${{ github.repository_owner }}
37 |           password: ${{ secrets.GITHUB_TOKEN }}
38 | 
39 |       - name: Compute Docker tags based on tag/branch
40 |         id: meta
41 |         uses: docker/metadata-action@v5
42 |         with:
43 |           images: |
44 |             browseruse/browseruse
45 |             ghcr.io/browser-use/browser-use
46 |           tags: |
47 |             type=ref,event=branch
48 |             type=ref,event=pr
49 |             type=pep440,pattern={{version}}
50 |             type=pep440,pattern={{major}}.{{minor}}
51 |             type=sha
52 | 
53 |       - name: Build and push Docker image
54 |         id: push
55 |         uses: docker/build-push-action@v6
56 |         with:
57 |           platforms: linux/amd64,linux/arm64
58 |           context: .
59 |           file: ./Dockerfile
60 |           push: true
61 |           tags: ${{ steps.meta.outputs.tags }}
62 |           labels: ${{ steps.meta.outputs.labels }}
63 |           cache-from: type=registry,ref=browseruse/browseruse:buildcache
64 |           cache-to: type=registry,ref=browseruse/browseruse:buildcache,mode=max
65 | 


--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
 1 | name: lint
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - main
 6 |       - stable
 7 |       - 'releases/**'
 8 |     tags:
 9 |       - '*'
10 |   pull_request:
11 |   workflow_dispatch:
12 | 
13 | jobs:
14 |   lint-syntax:
15 |     name: syntax-errors
16 |     runs-on: ubuntu-latest
17 |     steps:
18 |       - uses: actions/checkout@v4
19 |       - uses: astral-sh/setup-uv@v5
20 |         with:
21 |           enable-cache: true
22 |       - run: uv run ruff check --no-fix --select PLE
23 | 
24 |   lint-style:
25 |     name: code-style
26 |     runs-on: ubuntu-latest
27 |     steps:
28 |       - uses: actions/checkout@v4
29 |       - uses: astral-sh/setup-uv@v5
30 |         with:
31 |           enable-cache: true
32 |       - run: uv sync --dev --all-extras  # install extras for examples to avoid pyright missing imports errors
33 |       - run: uv run pre-commit run --all-files --show-diff-on-failure
34 | 
35 |   lint-typecheck:
36 |     name: type-checker
37 |     runs-on: ubuntu-latest
38 |     steps:
39 |       - uses: actions/checkout@v4
40 |       - uses: astral-sh/setup-uv@v6
41 |         with:
42 |           enable-cache: true
43 |       - run: uv sync --dev --all-extras  # install extras for examples to avoid pyright missing imports errors-
44 |       - run: uv run pyright
45 | 


--------------------------------------------------------------------------------
/.github/workflows/package.yaml:
--------------------------------------------------------------------------------
 1 | name: package
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - main
 6 |       - stable
 7 |       - 'releases/**'
 8 |     tags:
 9 |       - '*'
10 |   pull_request:
11 |   workflow_dispatch:
12 | 
13 | jobs:
14 |   build:
15 |     name: pip-build
16 |     runs-on: ubuntu-latest
17 |     steps:
18 |       - uses: actions/checkout@v4
19 |       - uses: astral-sh/setup-uv@v5
20 |       - run: uv build --python 3.12
21 |       - uses: actions/upload-artifact@v4
22 |         with:
23 |           name: dist-artifact
24 |           path: |
25 |             dist/*.whl
26 |             dist/*.tar.gz
27 | 
28 |   build_test:
29 |     name: pip-install-on-${{ matrix.os }}-py-${{ matrix.python-version }}
30 |     needs: build
31 |     runs-on: ${{ matrix.os }}
32 |     strategy:
33 |       matrix:
34 |         os: [ubuntu-latest, macos-latest, windows-latest]
35 |         python-version: ["3.11", "3.13"]
36 |     env:
37 |       ANONYMIZED_TELEMETRY: 'false'
38 | 
39 |     steps:
40 |       - uses: actions/checkout@v4
41 |       - uses: astral-sh/setup-uv@v5
42 |       - uses: actions/download-artifact@v4
43 |         with:
44 |           name: dist-artifact
45 | 
46 |       - name: Set up venv and test for OS/Python versions
47 |         shell: bash
48 |         run: |
49 |           uv venv /tmp/testenv --python ${{ matrix.python-version }}
50 |           if [[ "$RUNNER_OS" == "Windows" ]]; then
51 |             . /tmp/testenv/Scripts/activate
52 |           else
53 |             source /tmp/testenv/bin/activate
54 |           fi
55 |           uv pip install *.whl
56 |           python -c 'from browser_use import Agent, Browser, Controller, ActionModel, ActionResult'
57 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Cache files
 2 | .DS_Store
 3 | __pycache__/
 4 | *.py[cod]
 5 | *$py.class
 6 | .mypy_cache/
 7 | .ruff_cache/
 8 | .pytest_cache/
 9 | .ipynb_checkpoints
10 | ~/
11 | 
12 | # Virtual Environments
13 | .venv*
14 | venv/
15 | 
16 | # IDEs
17 | .vscode/
18 | .idea/
19 | 
20 | # Build files
21 | dist/
22 | 
23 | # Data files
24 | *.gif
25 | *.txt
26 | *.pdf
27 | *.csv
28 | *.json
29 | *.jsonl
30 | *.log
31 | *.bak
32 | 
33 | # Secrets and sensitive files
34 | secrets.env
35 | .env
36 | browser_cookies.json
37 | cookies.json
38 | gcp-login.json
39 | saved_trajectories/
40 | old_tests/
41 | AgentHistory.json
42 | AgentHistoryList.json
43 | private_example.py
44 | private_example
45 | CLAUDE.local.md
46 | 
47 | uv.lock
48 | temp
49 | tmp
50 | 
51 | # Google API credentials
52 | credentials.json
53 | token.json
54 | 
55 | !docs/docs.json
56 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/asottile/yesqa
 3 |     rev: v1.5.0
 4 |     hooks:
 5 |       - id: yesqa
 6 | 
 7 |   - repo: https://github.com/codespell-project/codespell
 8 |     rev: v2.4.1
 9 |     hooks:
10 |       - id: codespell # See pyproject.toml for args
11 |         additional_dependencies:
12 |           - tomli
13 | 
14 |   - repo: https://github.com/asottile/pyupgrade
15 |     rev: v3.19.1
16 |     hooks:
17 |       - id: pyupgrade
18 |         args: [--py311-plus]
19 | 
20 |   # - repo: https://github.com/asottile/add-trailing-comma
21 |   #   rev: v3.1.0
22 |   #   hooks:
23 |   #     - id: add-trailing-comma
24 | 
25 |   - repo: https://github.com/astral-sh/ruff-pre-commit
26 |     rev: v0.11.2
27 |     hooks:
28 |       - id: ruff
29 |       - id: ruff-format
30 |       # see pyproject.toml for more details on ruff config
31 | 
32 |   - repo: https://github.com/RobertCraigie/pyright-python
33 |     rev: v1.1.403
34 |     hooks:
35 |     - id: pyright
36 | 
37 |   - repo: https://github.com/pre-commit/pre-commit-hooks
38 |     rev: v5.0.0
39 |     hooks:
40 |       # check for basic syntax errors in python and data files
41 |       - id: check-ast
42 |       - id: check-toml
43 |       - id: check-yaml
44 |       - id: check-json
45 |       - id: check-merge-conflict
46 |       # check for bad files and folders
47 |       - id: check-symlinks
48 |       - id: destroyed-symlinks
49 |       - id: check-case-conflict
50 |       - id: check-illegal-windows-names
51 |       - id: check-shebang-scripts-are-executable
52 |       - id: mixed-line-ending
53 |       - id: fix-byte-order-marker
54 |       - id: end-of-file-fixer
55 |       # best practices enforcement
56 |       - id: detect-private-key
57 |       # - id: check-docstring-first
58 |       - id: debug-statements
59 |       - id: forbid-submodules
60 |       - id: check-added-large-files
61 |         args: ["--maxkb=600"]
62 |       # - id: name-tests-test
63 |       #   args: ["--pytest-test-first"]
64 | 


--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 3.12
2 | 


--------------------------------------------------------------------------------
/Dockerfile.fast:
--------------------------------------------------------------------------------
 1 | # Fast Dockerfile using pre-built base images
 2 | ARG REGISTRY=browseruse
 3 | ARG BASE_TAG=latest
 4 | FROM ${REGISTRY}/base-python-deps:${BASE_TAG}
 5 | 
 6 | LABEL name="browseruse" description="Browser automation for AI agents"
 7 | 
 8 | ENV BROWSERUSE_USER="browseruse" DEFAULT_PUID=911 DEFAULT_PGID=911 DATA_DIR=/data
 9 | 
10 | # Create user and directories
11 | RUN groupadd --system $BROWSERUSE_USER && \
12 |     useradd --system --create-home --gid $BROWSERUSE_USER --groups audio,video $BROWSERUSE_USER && \
13 |     usermod -u "$DEFAULT_PUID" "$BROWSERUSE_USER" && \
14 |     groupmod -g "$DEFAULT_PGID" "$BROWSERUSE_USER" && \
15 |     mkdir -p /data /home/$BROWSERUSE_USER/.config && \
16 |     ln -s $DATA_DIR /home/$BROWSERUSE_USER/.config/browseruse && \
17 |     mkdir -p "/home/$BROWSERUSE_USER/.config/chromium/Crash Reports/pending/" && \
18 |     mkdir -p "$DATA_DIR/profiles/default" && \
19 |     chown -R "$BROWSERUSE_USER:$BROWSERUSE_USER" "/home/$BROWSERUSE_USER" "$DATA_DIR"
20 | 
21 | WORKDIR /app
22 | COPY . /app
23 | 
24 | # Install browser-use
25 | RUN --mount=type=cache,target=/root/.cache/uv,sharing=locked \
26 |     uv sync --all-extras --locked --no-dev --compile-bytecode
27 | 
28 | USER "$BROWSERUSE_USER"
29 | VOLUME "$DATA_DIR"
30 | EXPOSE 9242 9222
31 | ENTRYPOINT ["browser-use"]
32 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Gregor Zunic
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/bin/lint.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # This script is used to run the formatter, linter, and type checker pre-commit hooks.
 3 | # Usage:
 4 | #   $ ./bin/lint.sh
 5 | 
 6 | IFS=$'\n'
 7 | 
 8 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 9 | 
10 | cd "$SCRIPT_DIR/.." || exit 1
11 | 
12 | echo "[*] Running ruff linter, formatter, pyright type checker, and other pre-commit checks..."
13 | exec uv run pre-commit run --all-files
14 | 


--------------------------------------------------------------------------------
/bin/setup.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # This script is used to setup a local development environment for the browser-use project.
 3 | # Usage:
 4 | #   $ ./bin/setup.sh
 5 | 
 6 | ### Bash Environment Setup
 7 | # http://redsymbol.net/articles/unofficial-bash-strict-mode/
 8 | # https://www.gnu.org/software/bash/manual/html_node/The-Set-Builtin.html
 9 | # set -o xtrace
10 | # set -x
11 | # shopt -s nullglob
12 | set -o errexit
13 | set -o errtrace
14 | set -o nounset
15 | set -o pipefail
16 | IFS=$'\n'
17 | 
18 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
19 | cd "$SCRIPT_DIR"
20 | 
21 | 
22 | if [ -f "$SCRIPT_DIR/lint.sh" ]; then
23 |     echo "[√] already inside a cloned browser-use repo"
24 | else
25 |     echo "[+] Cloning browser-use repo into current directory: $SCRIPT_DIR"
26 |     git clone https://github.com/browser-use/browser-use
27 |     cd browser-use
28 | fi
29 | 
30 | echo "[+] Installing uv..."
31 | curl -LsSf https://astral.sh/uv/install.sh | sh
32 | 
33 | #git checkout main git pull
34 | echo
35 | echo "[+] Setting up venv"
36 | uv venv
37 | echo
38 | echo "[+] Installing packages in venv"
39 | uv sync --dev --all-extras
40 | echo
41 | echo "[i] Tip: make sure to set BROWSER_USE_LOGGING_LEVEL=debug and your LLM API keys in your .env file"
42 | echo
43 | uv pip show browser-use
44 | 
45 | echo "Usage:"
46 | echo "  $ browser-use               use the CLI"
47 | echo "  or"
48 | echo "  $ source .venv/bin/activate"
49 | echo "  $ ipython                   use the library"
50 | echo "  >>> from browser_use import BrowserSession, Agent"
51 | echo "  >>> await Agent(task='book me a flight to fiji', browser=BrowserSession(headless=False)).run()"
52 | echo ""
53 | 


--------------------------------------------------------------------------------
/bin/test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # This script is used to run all the main project tests that run on CI via .github/workflows/test.yaml.
 3 | # Usage:
 4 | #   $ ./bin/test.sh
 5 | 
 6 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 7 | cd "$SCRIPT_DIR/.." || exit 1
 8 | 
 9 | exec uv run pytest --numprocesses auto tests/ci $1 $2 $3
10 | 


--------------------------------------------------------------------------------
/browser_use/README.md:
--------------------------------------------------------------------------------
 1 | # Codebase Structure
 2 | 
 3 | > The code structure inspired by https://github.com/Netflix/dispatch.
 4 | 
 5 | Very good structure on how to make a scalable codebase is also in [this repo](https://github.com/zhanymkanov/fastapi-best-practices).
 6 | 
 7 | Just a brief document about how we should structure our backend codebase.
 8 | 
 9 | ## Code Structure
10 | 
11 | ```markdown
12 | src/
13 | /<service name>/
14 | models.py
15 | services.py
16 | prompts.py
17 | views.py
18 | utils.py
19 | routers.py
20 | 
21 |     	/_<subservice name>/
22 | ```
23 | 
24 | ### Service.py
25 | 
26 | Always a single file, except if it becomes too long - more than ~500 lines, split it into \_subservices
27 | 
28 | ### Views.py
29 | 
30 | Always split the views into two parts
31 | 
32 | ```python
33 | # All
34 | ...
35 | 
36 | # Requests
37 | ...
38 | 
39 | # Responses
40 | ...
41 | ```
42 | 
43 | If too long → split into multiple files
44 | 
45 | ### Prompts.py
46 | 
47 | Single file; if too long → split into multiple files (one prompt per file or so)
48 | 
49 | ### Routers.py
50 | 
51 | Never split into more than one file
52 | 


--------------------------------------------------------------------------------
/browser_use/agent/message_manager/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import json
 4 | import logging
 5 | from pathlib import Path
 6 | from typing import Any
 7 | 
 8 | import anyio
 9 | 
10 | from browser_use.llm.messages import BaseMessage
11 | 
12 | logger = logging.getLogger(__name__)
13 | 
14 | 
15 | async def save_conversation(
16 | 	input_messages: list[BaseMessage],
17 | 	response: Any,
18 | 	target: str | Path,
19 | 	encoding: str | None = None,
20 | ) -> None:
21 | 	"""Save conversation history to file asynchronously."""
22 | 	target_path = Path(target)
23 | 	# create folders if not exists
24 | 	if target_path.parent:
25 | 		await anyio.Path(target_path.parent).mkdir(parents=True, exist_ok=True)
26 | 
27 | 	await anyio.Path(target_path).write_text(
28 | 		await _format_conversation(input_messages, response),
29 | 		encoding=encoding or 'utf-8',
30 | 	)
31 | 
32 | 
33 | async def _format_conversation(messages: list[BaseMessage], response: Any) -> str:
34 | 	"""Format the conversation including messages and response."""
35 | 	lines = []
36 | 
37 | 	# Format messages
38 | 	for message in messages:
39 | 		lines.append(f' {message.role} ')
40 | 
41 | 		lines.append(message.text)
42 | 		lines.append('')  # Empty line after each message
43 | 
44 | 	# Format response
45 | 	lines.append(' RESPONSE')
46 | 	lines.append(json.dumps(json.loads(response.model_dump_json(exclude_unset=True)), indent=2))
47 | 
48 | 	return '\n'.join(lines)
49 | 
50 | 
51 | # Note: _write_messages_to_file and _write_response_to_file have been merged into _format_conversation
52 | # This is more efficient for async operations and reduces file I/O
53 | 


--------------------------------------------------------------------------------
/browser_use/browser/__init__.py:
--------------------------------------------------------------------------------
 1 | from typing import TYPE_CHECKING
 2 | 
 3 | # Type stubs for lazy imports
 4 | if TYPE_CHECKING:
 5 | 	from .browser import Browser, BrowserConfig
 6 | 	from .context import BrowserContext, BrowserContextConfig
 7 | 	from .profile import BrowserProfile
 8 | 	from .session import BrowserSession
 9 | 
10 | # Lazy imports mapping for heavy browser components
11 | _LAZY_IMPORTS = {
12 | 	'Browser': ('.browser', 'Browser'),
13 | 	'BrowserConfig': ('.browser', 'BrowserConfig'),
14 | 	'BrowserContext': ('.context', 'BrowserContext'),
15 | 	'BrowserContextConfig': ('.context', 'BrowserContextConfig'),
16 | 	'BrowserProfile': ('.profile', 'BrowserProfile'),
17 | 	'BrowserSession': ('.session', 'BrowserSession'),
18 | }
19 | 
20 | 
21 | def __getattr__(name: str):
22 | 	"""Lazy import mechanism for heavy browser components."""
23 | 	if name in _LAZY_IMPORTS:
24 | 		module_path, attr_name = _LAZY_IMPORTS[name]
25 | 		try:
26 | 			from importlib import import_module
27 | 
28 | 			# Use relative import for current package
29 | 			full_module_path = f'browser_use.browser{module_path}'
30 | 			module = import_module(full_module_path)
31 | 			attr = getattr(module, attr_name)
32 | 			# Cache the imported attribute in the module's globals
33 | 			globals()[name] = attr
34 | 			return attr
35 | 		except ImportError as e:
36 | 			raise ImportError(f'Failed to import {name} from {full_module_path}: {e}') from e
37 | 
38 | 	raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
39 | 
40 | 
41 | __all__ = ['Browser', 'BrowserConfig', 'BrowserContext', 'BrowserContextConfig', 'BrowserSession', 'BrowserProfile']
42 | 


--------------------------------------------------------------------------------
/browser_use/browser/browser.py:
--------------------------------------------------------------------------------
1 | from browser_use.browser.profile import BrowserProfile
2 | from browser_use.browser.session import BrowserSession
3 | 
4 | BrowserConfig = BrowserProfile
5 | BrowserContextConfig = BrowserProfile
6 | Browser = BrowserSession
7 | 
8 | __all__ = ['BrowserConfig', 'BrowserContextConfig', 'Browser']
9 | 


--------------------------------------------------------------------------------
/browser_use/browser/context.py:
--------------------------------------------------------------------------------
 1 | from browser_use.browser.profile import BrowserProfile
 2 | from browser_use.browser.session import BrowserSession
 3 | 
 4 | Browser = BrowserSession
 5 | BrowserConfig = BrowserProfile
 6 | BrowserContext = BrowserSession
 7 | BrowserContextConfig = BrowserProfile
 8 | 
 9 | __all__ = ['Browser', 'BrowserConfig', 'BrowserContext', 'BrowserContextConfig']
10 | 


--------------------------------------------------------------------------------
/browser_use/browser/utils.py:
--------------------------------------------------------------------------------
 1 | def normalize_url(url: str) -> str:
 2 | 	"""
 3 | 	Normalize a URL by adding https:// protocol if needed, while preserving special URLs.
 4 | 
 5 | 	This function safely adds https:// to URLs that lack a protocol, but preserves
 6 | 	special URLs like "about:blank", "chrome://new-tab-page", "mailto:...", "tel:...", etc.
 7 | 	that should not be prefixed with https://.
 8 | 
 9 | 	Args:
10 | 	    url: The URL string to normalize
11 | 
12 | 	Returns:
13 | 	    str: The normalized URL with protocol if needed
14 | 
15 | 	Examples:
16 | 	    >>> normalize_url('example.com')
17 | 	    'https://example.com'
18 | 	    >>> normalize_url('about:blank')
19 | 	    'about:blank'
20 | 	    >>> normalize_url('mailto:test@example.com')
21 | 	    'mailto:test@example.com'
22 | 	    >>> normalize_url('https://example.com')
23 | 	    'https://example.com'
24 | 	"""
25 | 	normalized_url = url.strip()
26 | 
27 | 	# If URL already has a protocol, return as-is
28 | 	if '://' in normalized_url:
29 | 		return normalized_url
30 | 
31 | 	# Check for special protocols that should not be prefixed with https://
32 | 	special_protocols = ['about:', 'mailto:', 'tel:', 'ftp:', 'file:', 'data:', 'javascript:']
33 | 	for protocol in special_protocols:
34 | 		if normalized_url.startswith(protocol):
35 | 			return normalized_url
36 | 
37 | 	# For everything else, add https://
38 | 	return f'https://{normalized_url}'
39 | 


--------------------------------------------------------------------------------
/browser_use/controller/views.py:
--------------------------------------------------------------------------------
 1 | from typing import Generic, TypeVar
 2 | 
 3 | from pydantic import BaseModel, ConfigDict
 4 | 
 5 | 
 6 | # Action Input Models
 7 | class SearchGoogleAction(BaseModel):
 8 | 	query: str
 9 | 
10 | 
11 | class GoToUrlAction(BaseModel):
12 | 	url: str
13 | 	new_tab: bool = False  # True to open in new tab, False to navigate in current tab
14 | 
15 | 
16 | class ClickElementAction(BaseModel):
17 | 	index: int
18 | 
19 | 
20 | class InputTextAction(BaseModel):
21 | 	index: int
22 | 	text: str
23 | 
24 | 
25 | class DoneAction(BaseModel):
26 | 	text: str
27 | 	success: bool
28 | 	files_to_display: list[str] | None = []
29 | 
30 | 
31 | T = TypeVar('T', bound=BaseModel)
32 | 
33 | 
34 | class StructuredOutputAction(BaseModel, Generic[T]):
35 | 	success: bool = True
36 | 	data: T
37 | 
38 | 
39 | class SwitchTabAction(BaseModel):
40 | 	page_id: int
41 | 
42 | 
43 | class CloseTabAction(BaseModel):
44 | 	page_id: int
45 | 
46 | 
47 | class ScrollAction(BaseModel):
48 | 	down: bool  # True to scroll down, False to scroll up
49 | 	num_pages: float  # Number of pages to scroll (0.5 = half page, 1.0 = one page, etc.)
50 | 	index: int | None = None  # Optional element index to find scroll container for
51 | 
52 | 
53 | class SendKeysAction(BaseModel):
54 | 	keys: str
55 | 
56 | 
57 | class UploadFileAction(BaseModel):
58 | 	index: int
59 | 	path: str
60 | 
61 | 
62 | class ExtractPageContentAction(BaseModel):
63 | 	value: str
64 | 
65 | 
66 | class NoParamsAction(BaseModel):
67 | 	"""
68 | 	Accepts absolutely anything in the incoming data
69 | 	and discards it, so the final parsed model is empty.
70 | 	"""
71 | 
72 | 	model_config = ConfigDict(extra='ignore')
73 | 	# No fields defined - all inputs are ignored automatically
74 | 


--------------------------------------------------------------------------------
/browser_use/dom/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browser-use/browser-use/9871be3363081dd88402dd7244f1997132929c4f/browser_use/dom/__init__.py


--------------------------------------------------------------------------------
/browser_use/dom/history_tree_processor/view.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | 
 6 | @dataclass
 7 | class HashedDomElement:
 8 | 	"""
 9 | 	Hash of the dom element to be used as a unique identifier
10 | 	"""
11 | 
12 | 	branch_path_hash: str
13 | 	attributes_hash: str
14 | 	xpath_hash: str
15 | 	# text_hash: str
16 | 
17 | 
18 | class Coordinates(BaseModel):
19 | 	x: int
20 | 	y: int
21 | 
22 | 
23 | class CoordinateSet(BaseModel):
24 | 	top_left: Coordinates
25 | 	top_right: Coordinates
26 | 	bottom_left: Coordinates
27 | 	bottom_right: Coordinates
28 | 	center: Coordinates
29 | 	width: int
30 | 	height: int
31 | 
32 | 
33 | class ViewportInfo(BaseModel):
34 | 	scroll_x: int | None = None
35 | 	scroll_y: int | None = None
36 | 	width: int
37 | 	height: int
38 | 
39 | 
40 | @dataclass
41 | class DOMHistoryElement:
42 | 	tag_name: str
43 | 	xpath: str
44 | 	highlight_index: int | None
45 | 	entire_parent_branch_path: list[str]
46 | 	attributes: dict[str, str]
47 | 	shadow_root: bool = False
48 | 	css_selector: str | None = None
49 | 	page_coordinates: CoordinateSet | None = None
50 | 	viewport_coordinates: CoordinateSet | None = None
51 | 	viewport_info: ViewportInfo | None = None
52 | 
53 | 	def to_dict(self) -> dict:
54 | 		page_coordinates = self.page_coordinates.model_dump() if self.page_coordinates else None
55 | 		viewport_coordinates = self.viewport_coordinates.model_dump() if self.viewport_coordinates else None
56 | 		viewport_info = self.viewport_info.model_dump() if self.viewport_info else None
57 | 
58 | 		return {
59 | 			'tag_name': self.tag_name,
60 | 			'xpath': self.xpath,
61 | 			'highlight_index': self.highlight_index,
62 | 			'entire_parent_branch_path': self.entire_parent_branch_path,
63 | 			'attributes': self.attributes,
64 | 			'shadow_root': self.shadow_root,
65 | 			'css_selector': self.css_selector,
66 | 			'page_coordinates': page_coordinates,
67 | 			'viewport_coordinates': viewport_coordinates,
68 | 			'viewport_info': viewport_info,
69 | 		}
70 | 


--------------------------------------------------------------------------------
/browser_use/dom/playground/process_dom.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import json
 3 | import os
 4 | import time
 5 | 
 6 | import anyio
 7 | 
 8 | from browser_use.browser import BrowserProfile, BrowserSession
 9 | 
10 | 
11 | async def test_process_dom():
12 | 	browser_session = BrowserSession(browser_profile=BrowserProfile(headless=True))
13 | 	await browser_session.start()
14 | 	try:
15 | 		page = await browser_session.get_current_page()
16 | 		await page.goto('https://kayak.com/flights')
17 | 		# await page.goto('https://google.com/flights')
18 | 		# await page.goto('https://immobilienscout24.de')
19 | 		# await page.goto('https://seleniumbase.io/w3schools/iframes')
20 | 
21 | 		await asyncio.sleep(3)
22 | 
23 | 		async with await anyio.open_file('browser_use/dom/buildDomTree.js', 'r') as f:
24 | 			js_code = await f.read()
25 | 
26 | 		start = time.time()
27 | 		dom_tree = await page.evaluate(js_code)
28 | 		end = time.time()
29 | 
30 | 		# print(dom_tree)
31 | 		print(f'Time: {end - start:.2f}s')
32 | 
33 | 		os.makedirs('./tmp', exist_ok=True)
34 | 		async with await anyio.open_file('./tmp/dom.json', 'w') as f:
35 | 			await f.write(json.dumps(dom_tree, indent=1))
36 | 
37 | 		# both of these work for immobilienscout24.de
38 | 		# await page.click('.sc-dcJsrY.ezjNCe')
39 | 		# await page.click(
40 | 		# 	'div > div:nth-of-type(2) > div > div:nth-of-type(2) > div > div:nth-of-type(2) > div > div > div > button:nth-of-type(2)'
41 | 		# )
42 | 
43 | 		input('Press Enter to continue...')
44 | 	finally:
45 | 		await browser_session.stop()
46 | 


--------------------------------------------------------------------------------
/browser_use/dom/utils.py:
--------------------------------------------------------------------------------
1 | def cap_text_length(text: str, max_length: int) -> str:
2 | 	if len(text) > max_length:
3 | 		return text[:max_length] + '...'
4 | 	return text
5 | 


--------------------------------------------------------------------------------
/browser_use/exceptions.py:
--------------------------------------------------------------------------------
1 | class LLMException(Exception):
2 | 	def __init__(self, status_code, message):
3 | 		self.status_code = status_code
4 | 		self.message = message
5 | 		super().__init__(f'Error {status_code}: {message}')
6 | 


--------------------------------------------------------------------------------
/browser_use/filesystem/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browser-use/browser-use/9871be3363081dd88402dd7244f1997132929c4f/browser_use/filesystem/__init__.py


--------------------------------------------------------------------------------
/browser_use/integrations/gmail/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Gmail Integration for Browser Use
 3 | Provides Gmail API integration for email reading and verification code extraction.
 4 | This integration enables agents to read email content and extract verification codes themselves.
 5 | Usage:
 6 |     from browser_use.integrations.gmail import GmailService, register_gmail_actions
 7 |     # Option 1: Register Gmail actions with file-based authentication
 8 |     controller = Controller()
 9 |     register_gmail_actions(controller)
10 |     # Option 2: Register Gmail actions with direct access token (recommended for production)
11 |     controller = Controller()
12 |     register_gmail_actions(controller, access_token="your_access_token_here")
13 |     # Option 3: Use the service directly
14 |     gmail = GmailService(access_token="your_access_token_here")
15 |     await gmail.authenticate()
16 |     emails = await gmail.get_recent_emails()
17 | """
18 | 
19 | # @file purpose: Gmail integration for 2FA email authentication and email reading
20 | 
21 | from .actions import register_gmail_actions
22 | from .service import GmailService
23 | 
24 | __all__ = ['GmailService', 'register_gmail_actions']
25 | 


--------------------------------------------------------------------------------
/browser_use/llm/README.md:
--------------------------------------------------------------------------------
 1 | # Browser Use LLMs
 2 | 
 3 | We officially support the following LLMs:
 4 | 
 5 | - OpenAI
 6 | - Anthropic
 7 | - Google
 8 | - Groq
 9 | - Ollama
10 | - DeepSeek
11 | 
12 | ## Migrating from LangChain
13 | 
14 | Because of how we implemented the LLMs, we can technically support anything. If you want to use a LangChain model, you can use the `ChatLangchain` (NOT OFFICIALLY SUPPORTED) class.
15 | 
16 | You can find all the details in the [LangChain example](examples/models/langchain/example.py). We suggest you grab that code and use it as a reference.
17 | 


--------------------------------------------------------------------------------
/browser_use/llm/aws/__init__.py:
--------------------------------------------------------------------------------
 1 | from typing import TYPE_CHECKING
 2 | 
 3 | # Type stubs for lazy imports
 4 | if TYPE_CHECKING:
 5 | 	from browser_use.llm.aws.chat_anthropic import ChatAnthropicBedrock
 6 | 	from browser_use.llm.aws.chat_bedrock import ChatAWSBedrock
 7 | 
 8 | # Lazy imports mapping for AWS chat models
 9 | _LAZY_IMPORTS = {
10 | 	'ChatAnthropicBedrock': ('browser_use.llm.aws.chat_anthropic', 'ChatAnthropicBedrock'),
11 | 	'ChatAWSBedrock': ('browser_use.llm.aws.chat_bedrock', 'ChatAWSBedrock'),
12 | }
13 | 
14 | 
15 | def __getattr__(name: str):
16 | 	"""Lazy import mechanism for AWS chat models."""
17 | 	if name in _LAZY_IMPORTS:
18 | 		module_path, attr_name = _LAZY_IMPORTS[name]
19 | 		try:
20 | 			from importlib import import_module
21 | 
22 | 			module = import_module(module_path)
23 | 			attr = getattr(module, attr_name)
24 | 			# Cache the imported attribute in the module's globals
25 | 			globals()[name] = attr
26 | 			return attr
27 | 		except ImportError as e:
28 | 			raise ImportError(f'Failed to import {name} from {module_path}: {e}') from e
29 | 
30 | 	raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
31 | 
32 | 
33 | __all__ = [
34 | 	'ChatAWSBedrock',
35 | 	'ChatAnthropicBedrock',
36 | ]
37 | 


--------------------------------------------------------------------------------
/browser_use/llm/base.py:
--------------------------------------------------------------------------------
 1 | """
 2 | We have switched all of our code from langchain to openai.types.chat.chat_completion_message_param.
 3 | 
 4 | For easier transition we have
 5 | """
 6 | 
 7 | from typing import Any, Protocol, TypeVar, overload, runtime_checkable
 8 | 
 9 | from pydantic import BaseModel
10 | 
11 | from browser_use.llm.messages import BaseMessage
12 | from browser_use.llm.views import ChatInvokeCompletion
13 | 
14 | T = TypeVar('T', bound=BaseModel)
15 | 
16 | 
17 | @runtime_checkable
18 | class BaseChatModel(Protocol):
19 | 	_verified_api_keys: bool = False
20 | 
21 | 	model: str
22 | 
23 | 	@property
24 | 	def provider(self) -> str: ...
25 | 
26 | 	@property
27 | 	def name(self) -> str: ...
28 | 
29 | 	@property
30 | 	def model_name(self) -> str:
31 | 		# for legacy support
32 | 		return self.model
33 | 
34 | 	@overload
35 | 	async def ainvoke(self, messages: list[BaseMessage], output_format: None = None) -> ChatInvokeCompletion[str]: ...
36 | 
37 | 	@overload
38 | 	async def ainvoke(self, messages: list[BaseMessage], output_format: type[T]) -> ChatInvokeCompletion[T]: ...
39 | 
40 | 	async def ainvoke(
41 | 		self, messages: list[BaseMessage], output_format: type[T] | None = None
42 | 	) -> ChatInvokeCompletion[T] | ChatInvokeCompletion[str]: ...
43 | 
44 | 	@classmethod
45 | 	def __get_pydantic_core_schema__(
46 | 		cls,
47 | 		source_type: type,
48 | 		handler: Any,
49 | 	) -> Any:
50 | 		"""
51 | 		Allow this Protocol to be used in Pydantic models -> very useful to typesafe the agent settings for example.
52 | 		Returns a schema that allows any object (since this is a Protocol).
53 | 		"""
54 | 		from pydantic_core import core_schema
55 | 
56 | 		# Return a schema that accepts any object for Protocol types
57 | 		return core_schema.any_schema()
58 | 


--------------------------------------------------------------------------------
/browser_use/llm/exceptions.py:
--------------------------------------------------------------------------------
 1 | class ModelError(Exception):
 2 | 	pass
 3 | 
 4 | 
 5 | class ModelProviderError(ModelError):
 6 | 	"""Exception raised when a model provider returns an error."""
 7 | 
 8 | 	def __init__(
 9 | 		self,
10 | 		message: str,
11 | 		status_code: int = 502,
12 | 		model: str | None = None,
13 | 	):
14 | 		super().__init__(message, status_code)
15 | 		self.model = model
16 | 
17 | 
18 | class ModelRateLimitError(ModelProviderError):
19 | 	"""Exception raised when a model provider returns a rate limit error."""
20 | 
21 | 	def __init__(
22 | 		self,
23 | 		message: str,
24 | 		status_code: int = 429,
25 | 		model: str | None = None,
26 | 	):
27 | 		super().__init__(message, status_code, model)
28 | 


--------------------------------------------------------------------------------
/browser_use/llm/google/__init__.py:
--------------------------------------------------------------------------------
1 | from browser_use.llm.google.chat import ChatGoogle
2 | 
3 | __all__ = ['ChatGoogle']
4 | 


--------------------------------------------------------------------------------
/browser_use/llm/openai/like.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | from browser_use.llm.openai.chat import ChatOpenAI
 4 | 
 5 | 
 6 | @dataclass
 7 | class ChatOpenAILike(ChatOpenAI):
 8 | 	"""
 9 | 	A class for to interact with any provider using the OpenAI API schema.
10 | 
11 | 	Args:
12 | 	    model (str): The name of the OpenAI model to use.
13 | 	"""
14 | 
15 | 	model: str
16 | 


--------------------------------------------------------------------------------
/browser_use/llm/openrouter/serializer.py:
--------------------------------------------------------------------------------
 1 | from openai.types.chat import ChatCompletionMessageParam
 2 | 
 3 | from browser_use.llm.messages import BaseMessage
 4 | from browser_use.llm.openai.serializer import OpenAIMessageSerializer
 5 | 
 6 | 
 7 | class OpenRouterMessageSerializer:
 8 | 	"""
 9 | 	Serializer for converting between custom message types and OpenRouter message formats.
10 | 
11 | 	OpenRouter uses the OpenAI-compatible API, so we can reuse the OpenAI serializer.
12 | 	"""
13 | 
14 | 	@staticmethod
15 | 	def serialize_messages(messages: list[BaseMessage]) -> list[ChatCompletionMessageParam]:
16 | 		"""
17 | 		Serialize a list of browser_use messages to OpenRouter-compatible messages.
18 | 
19 | 		Args:
20 | 		    messages: List of browser_use messages
21 | 
22 | 		Returns:
23 | 		    List of OpenRouter-compatible messages (identical to OpenAI format)
24 | 		"""
25 | 		# OpenRouter uses the same message format as OpenAI
26 | 		return OpenAIMessageSerializer.serialize_messages(messages)
27 | 


--------------------------------------------------------------------------------
/browser_use/llm/tests/test_groq_loop.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | from browser_use.llm import ContentText
 4 | from browser_use.llm.groq.chat import ChatGroq
 5 | from browser_use.llm.messages import SystemMessage, UserMessage
 6 | 
 7 | llm = ChatGroq(
 8 | 	model='meta-llama/llama-4-maverick-17b-128e-instruct',
 9 | 	temperature=0.5,
10 | )
11 | # llm = ChatOpenAI(model='gpt-4.1-mini')
12 | 
13 | 
14 | async def main():
15 | 	from pydantic import BaseModel
16 | 
17 | 	from browser_use.tokens.service import TokenCost
18 | 
19 | 	tk = TokenCost().register_llm(llm)
20 | 
21 | 	class Output(BaseModel):
22 | 		reasoning: str
23 | 		answer: str
24 | 
25 | 	message = [
26 | 		SystemMessage(content='You are a helpful assistant that can answer questions and help with tasks.'),
27 | 		UserMessage(
28 | 			content=[
29 | 				ContentText(
30 | 					text=r"Why is the sky blue? write exactly this into reasoning make sure to output ' with  exactly like in the input : "
31 | 				),
32 | 				ContentText(
33 | 					text="""
34 | 	The user's request is to find the lowest priced women's plus size one piece swimsuit in color black with a customer rating of at least 5 on Kohls.com. I am currently on the homepage of Kohls. The page has a search bar and various category links. To begin, I need to navigate to the women's section and search for swimsuits. I will start by clicking on the 'Women' category link."""
35 | 				),
36 | 			]
37 | 		),
38 | 	]
39 | 
40 | 	for i in range(10):
41 | 		print('-' * 50)
42 | 		print(f'start loop {i}')
43 | 		response = await llm.ainvoke(message, output_format=Output)
44 | 		completion = response.completion
45 | 		print(f'start reasoning: {completion.reasoning}')
46 | 		print(f'answer: {completion.answer}')
47 | 		print('-' * 50)
48 | 
49 | 
50 | if __name__ == '__main__':
51 | 	asyncio.run(main())
52 | 


--------------------------------------------------------------------------------
/browser_use/llm/views.py:
--------------------------------------------------------------------------------
 1 | from typing import Generic, TypeVar, Union
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | T = TypeVar('T', bound=Union[BaseModel, str])
 6 | 
 7 | 
 8 | class ChatInvokeUsage(BaseModel):
 9 | 	"""
10 | 	Usage information for a chat model invocation.
11 | 	"""
12 | 
13 | 	prompt_tokens: int
14 | 	"""The number of tokens in the prompt (this includes the cached tokens as well. When calculating the cost, subtract the cached tokens from the prompt tokens)"""
15 | 
16 | 	prompt_cached_tokens: int | None
17 | 	"""The number of cached tokens."""
18 | 
19 | 	prompt_cache_creation_tokens: int | None
20 | 	"""Anthropic only: The number of tokens used to create the cache."""
21 | 
22 | 	prompt_image_tokens: int | None
23 | 	"""Google only: The number of tokens in the image (prompt tokens is the text tokens + image tokens in that case)"""
24 | 
25 | 	completion_tokens: int
26 | 	"""The number of tokens in the completion."""
27 | 
28 | 	total_tokens: int
29 | 	"""The total number of tokens in the response."""
30 | 
31 | 
32 | class ChatInvokeCompletion(BaseModel, Generic[T]):
33 | 	"""
34 | 	Response from a chat model invocation.
35 | 	"""
36 | 
37 | 	completion: T
38 | 	"""The completion of the response."""
39 | 
40 | 	# Thinking stuff
41 | 	thinking: str | None = None
42 | 	redacted_thinking: str | None = None
43 | 
44 | 	usage: ChatInvokeUsage | None
45 | 	"""The usage of the response."""
46 | 


--------------------------------------------------------------------------------
/browser_use/mcp/__init__.py:
--------------------------------------------------------------------------------
 1 | """MCP (Model Context Protocol) support for browser-use.
 2 | 
 3 | This module provides integration with MCP servers and clients for browser automation.
 4 | """
 5 | 
 6 | from browser_use.mcp.client import MCPClient
 7 | from browser_use.mcp.controller import MCPToolWrapper
 8 | 
 9 | __all__ = ['MCPClient', 'MCPToolWrapper', 'BrowserUseServer']  # type: ignore
10 | 
11 | 
12 | def __getattr__(name):
13 | 	"""Lazy import to avoid importing server module when only client is needed."""
14 | 	if name == 'BrowserUseServer':
15 | 		from browser_use.mcp.server import BrowserUseServer
16 | 
17 | 		return BrowserUseServer
18 | 	raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
19 | 


--------------------------------------------------------------------------------
/browser_use/mcp/__main__.py:
--------------------------------------------------------------------------------
 1 | """Entry point for running MCP server as a module.
 2 | 
 3 | Usage:
 4 |     python -m browser_use.mcp.server
 5 | """
 6 | 
 7 | import asyncio
 8 | 
 9 | from browser_use.mcp.server import main
10 | 
11 | if __name__ == '__main__':
12 | 	asyncio.run(main())
13 | 


--------------------------------------------------------------------------------
/browser_use/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browser-use/browser-use/9871be3363081dd88402dd7244f1997132929c4f/browser_use/py.typed


--------------------------------------------------------------------------------
/browser_use/screenshots/__init__.py:
--------------------------------------------------------------------------------
1 | # Screenshots package for browser-use
2 | 


--------------------------------------------------------------------------------
/browser_use/screenshots/service.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Screenshot storage service for browser-use agents.
 3 | """
 4 | 
 5 | import base64
 6 | from pathlib import Path
 7 | 
 8 | import anyio
 9 | 
10 | 
11 | class ScreenshotService:
12 | 	"""Simple screenshot storage service that saves screenshots to disk"""
13 | 
14 | 	def __init__(self, agent_directory: str | Path):
15 | 		"""Initialize with agent directory path"""
16 | 		self.agent_directory = Path(agent_directory) if isinstance(agent_directory, str) else agent_directory
17 | 
18 | 		# Create screenshots subdirectory
19 | 		self.screenshots_dir = self.agent_directory / 'screenshots'
20 | 		self.screenshots_dir.mkdir(parents=True, exist_ok=True)
21 | 
22 | 	async def store_screenshot(self, screenshot_b64: str, step_number: int) -> str:
23 | 		"""Store screenshot to disk and return the full path as string"""
24 | 		screenshot_filename = f'step_{step_number}.png'
25 | 		screenshot_path = self.screenshots_dir / screenshot_filename
26 | 
27 | 		# Decode base64 and save to disk
28 | 		screenshot_data = base64.b64decode(screenshot_b64)
29 | 
30 | 		async with await anyio.open_file(screenshot_path, 'wb') as f:
31 | 			await f.write(screenshot_data)
32 | 
33 | 		return str(screenshot_path)
34 | 
35 | 	async def get_screenshot(self, screenshot_path: str) -> str | None:
36 | 		"""Load screenshot from disk path and return as base64"""
37 | 		if not screenshot_path:
38 | 			return None
39 | 
40 | 		path = Path(screenshot_path)
41 | 		if not path.exists():
42 | 			return None
43 | 
44 | 		# Load from disk and encode to base64
45 | 		async with await anyio.open_file(path, 'rb') as f:
46 | 			screenshot_data = await f.read()
47 | 
48 | 		return base64.b64encode(screenshot_data).decode('utf-8')
49 | 


--------------------------------------------------------------------------------
/browser_use/sync/__init__.py:
--------------------------------------------------------------------------------
1 | """Cloud sync module for Browser Use."""
2 | 
3 | from browser_use.sync.auth import CloudAuthConfig, DeviceAuthClient
4 | from browser_use.sync.service import CloudSync
5 | 
6 | __all__ = ['CloudAuthConfig', 'DeviceAuthClient', 'CloudSync']
7 | 


--------------------------------------------------------------------------------
/browser_use/telemetry/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Telemetry for Browser Use.
 3 | """
 4 | 
 5 | from typing import TYPE_CHECKING
 6 | 
 7 | # Type stubs for lazy imports
 8 | if TYPE_CHECKING:
 9 | 	from browser_use.telemetry.service import ProductTelemetry
10 | 	from browser_use.telemetry.views import (
11 | 		BaseTelemetryEvent,
12 | 		CLITelemetryEvent,
13 | 		MCPClientTelemetryEvent,
14 | 		MCPServerTelemetryEvent,
15 | 	)
16 | 
17 | # Lazy imports mapping
18 | _LAZY_IMPORTS = {
19 | 	'ProductTelemetry': ('browser_use.telemetry.service', 'ProductTelemetry'),
20 | 	'BaseTelemetryEvent': ('browser_use.telemetry.views', 'BaseTelemetryEvent'),
21 | 	'CLITelemetryEvent': ('browser_use.telemetry.views', 'CLITelemetryEvent'),
22 | 	'MCPClientTelemetryEvent': ('browser_use.telemetry.views', 'MCPClientTelemetryEvent'),
23 | 	'MCPServerTelemetryEvent': ('browser_use.telemetry.views', 'MCPServerTelemetryEvent'),
24 | }
25 | 
26 | 
27 | def __getattr__(name: str):
28 | 	"""Lazy import mechanism for telemetry components."""
29 | 	if name in _LAZY_IMPORTS:
30 | 		module_path, attr_name = _LAZY_IMPORTS[name]
31 | 		try:
32 | 			from importlib import import_module
33 | 
34 | 			module = import_module(module_path)
35 | 			attr = getattr(module, attr_name)
36 | 			# Cache the imported attribute in the module's globals
37 | 			globals()[name] = attr
38 | 			return attr
39 | 		except ImportError as e:
40 | 			raise ImportError(f'Failed to import {name} from {module_path}: {e}') from e
41 | 
42 | 	raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
43 | 
44 | 
45 | __all__ = [
46 | 	'BaseTelemetryEvent',
47 | 	'ProductTelemetry',
48 | 	'CLITelemetryEvent',
49 | 	'MCPClientTelemetryEvent',
50 | 	'MCPServerTelemetryEvent',
51 | ]
52 | 


--------------------------------------------------------------------------------
/browser_use/telemetry/views.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from collections.abc import Sequence
 3 | from dataclasses import asdict, dataclass
 4 | from typing import Any
 5 | 
 6 | 
 7 | @dataclass
 8 | class BaseTelemetryEvent(ABC):
 9 | 	@property
10 | 	@abstractmethod
11 | 	def name(self) -> str:
12 | 		pass
13 | 
14 | 	@property
15 | 	def properties(self) -> dict[str, Any]:
16 | 		return {k: v for k, v in asdict(self).items() if k != 'name'}
17 | 
18 | 
19 | @dataclass
20 | class AgentTelemetryEvent(BaseTelemetryEvent):
21 | 	# start details
22 | 	task: str
23 | 	model: str
24 | 	model_provider: str
25 | 	planner_llm: str | None
26 | 	max_steps: int
27 | 	max_actions_per_step: int
28 | 	use_vision: bool
29 | 	use_validation: bool
30 | 	version: str
31 | 	source: str
32 | 	cdp_url: str | None
33 | 	# step details
34 | 	action_errors: Sequence[str | None]
35 | 	action_history: Sequence[list[dict] | None]
36 | 	urls_visited: Sequence[str | None]
37 | 	# end details
38 | 	steps: int
39 | 	total_input_tokens: int
40 | 	total_duration_seconds: float
41 | 	success: bool | None
42 | 	final_result_response: str | None
43 | 	error_message: str | None
44 | 
45 | 	name: str = 'agent_event'
46 | 
47 | 
48 | @dataclass
49 | class MCPClientTelemetryEvent(BaseTelemetryEvent):
50 | 	"""Telemetry event for MCP client usage"""
51 | 
52 | 	server_name: str
53 | 	command: str
54 | 	tools_discovered: int
55 | 	version: str
56 | 	action: str  # 'connect', 'disconnect', 'tool_call'
57 | 	tool_name: str | None = None
58 | 	duration_seconds: float | None = None
59 | 	error_message: str | None = None
60 | 
61 | 	name: str = 'mcp_client_event'
62 | 
63 | 
64 | @dataclass
65 | class MCPServerTelemetryEvent(BaseTelemetryEvent):
66 | 	"""Telemetry event for MCP server usage"""
67 | 
68 | 	version: str
69 | 	action: str  # 'start', 'stop', 'tool_call'
70 | 	tool_name: str | None = None
71 | 	duration_seconds: float | None = None
72 | 	error_message: str | None = None
73 | 	parent_process_cmdline: str | None = None
74 | 
75 | 	name: str = 'mcp_server_event'
76 | 
77 | 
78 | @dataclass
79 | class CLITelemetryEvent(BaseTelemetryEvent):
80 | 	"""Telemetry event for CLI usage"""
81 | 
82 | 	version: str
83 | 	action: str  # 'start', 'message_sent', 'task_completed', 'error'
84 | 	mode: str  # 'interactive', 'oneshot', 'mcp_server'
85 | 	model: str | None = None
86 | 	model_provider: str | None = None
87 | 	duration_seconds: float | None = None
88 | 	error_message: str | None = None
89 | 
90 | 	name: str = 'cli_event'
91 | 


--------------------------------------------------------------------------------
/browser_use/tokens/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browser-use/browser-use/9871be3363081dd88402dd7244f1997132929c4f/browser_use/tokens/__init__.py


--------------------------------------------------------------------------------
/docker/README.md:
--------------------------------------------------------------------------------
 1 | # Docker Setup for Browser-Use
 2 | 
 3 | This directory contains the optimized Docker build system for browser-use, achieving < 30 second builds.
 4 | 
 5 | ## Quick Start
 6 | 
 7 | ```bash
 8 | # Build base images (only needed once or when dependencies change)
 9 | ./docker/build-base-images.sh
10 | 
11 | # Build browser-use
12 | docker build -f Dockerfile.fast -t browseruse .
13 | 
14 | # Or use the standard Dockerfile (slower but self-contained)
15 | docker build -t browseruse .
16 | ```
17 | 
18 | ## Files
19 | 
20 | - `Dockerfile` - Standard self-contained build (~2 min)
21 | - `Dockerfile.fast` - Fast build using pre-built base images (~30 sec)
22 | - `docker/` - Base image definitions and build script
23 |   - `base-images/system/` - Python + minimal system deps
24 |   - `base-images/chromium/` - Adds Chromium browser
25 |   - `base-images/python-deps/` - Adds Python dependencies
26 |   - `build-base-images.sh` - Script to build all base images
27 | 
28 | ## Performance
29 | 
30 | | Build Type | Time |
31 | |------------|------|
32 | | Standard Dockerfile | ~2 minutes |
33 | | Fast build (with base images) | ~30 seconds |
34 | | Rebuild after code change | ~16 seconds |
35 | 


--------------------------------------------------------------------------------
/docker/base-images/chromium/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG BASE_TAG=latest
 2 | FROM browseruse/base-system:${BASE_TAG}
 3 | 
 4 | WORKDIR /tmp
 5 | COPY pyproject.toml ./
 6 | 
 7 | # Install both playwright and patchright with versions from pyproject.toml
 8 | RUN --mount=type=cache,target=/root/.cache,sharing=locked \
 9 |     PLAYWRIGHT_VERSION=$(grep -E "playwright>=" pyproject.toml | grep -o "[0-9]\+\.[0-9]\+\.[0-9]\+" | head -1) && \
10 |     PATCHRIGHT_VERSION=$(grep -E "patchright>=" pyproject.toml | grep -o "[0-9]\+\.[0-9]\+\.[0-9]\+" | head -1) && \
11 |     echo "Installing playwright==$PLAYWRIGHT_VERSION patchright==$PATCHRIGHT_VERSION" && \
12 |     pip install --no-cache-dir playwright==$PLAYWRIGHT_VERSION patchright==$PATCHRIGHT_VERSION && \
13 |     PLAYWRIGHT_BROWSERS_PATH=/opt/playwright playwright install --with-deps --no-shell chromium && \
14 |     ln -s /opt/playwright/chromium-*/chrome-linux/chrome /usr/bin/chromium-browser && \
15 |     chmod -R 755 /opt/playwright && \
16 |     rm -f pyproject.toml
17 | 


--------------------------------------------------------------------------------
/docker/base-images/python-deps/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG BASE_TAG=latest
 2 | FROM browseruse/base-chromium:${BASE_TAG}
 3 | 
 4 | ENV PYTHONUNBUFFERED=1 PATH="/app/.venv/bin:$PATH" PLAYWRIGHT_BROWSERS_PATH=/opt/playwright
 5 | 
 6 | WORKDIR /app
 7 | COPY pyproject.toml uv.lock* ./
 8 | 
 9 | RUN --mount=type=cache,target=/root/.cache/uv,sharing=locked \
10 |     uv venv && \
11 |     uv sync --all-extras --no-dev --no-install-project --compile-bytecode
12 | 


--------------------------------------------------------------------------------
/docker/base-images/system/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.12-slim
 2 | 
 3 | # Install minimal system dependencies
 4 | RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
 5 |     apt-get update && \
 6 |     apt-get install -y --no-install-recommends ca-certificates curl wget && \
 7 |     rm -rf /var/lib/apt/lists/*
 8 | 
 9 | # Install uv package manager
10 | COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
11 | 


--------------------------------------------------------------------------------
/docker/build-base-images.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Build script for browser-use base images
 3 | set -euo pipefail
 4 | 
 5 | # Configuration
 6 | REGISTRY="${DOCKER_REGISTRY:-browseruse}"
 7 | PLATFORMS="${PLATFORMS:-linux/amd64}"
 8 | PUSH="${PUSH:-false}"
 9 | 
10 | # Build function
11 | build_image() {
12 |     local name=$1
13 |     local dockerfile=$2
14 |     local build_args="${3:-}"
15 |     
16 |     echo "[INFO] Building ${name}..."
17 |     
18 |     local build_cmd="docker build"
19 |     local tag_args="-t ${REGISTRY}/${name}:latest -t ${REGISTRY}/${name}:$(date +%Y%m%d)"
20 |     
21 |     # Use buildx for multi-platform or push
22 |     if [[ "$PLATFORMS" == *","* ]] || [ "$PUSH" = "true" ]; then
23 |         build_cmd="docker buildx build --platform=$PLATFORMS"
24 |         [ "$PUSH" = "true" ] && build_cmd="$build_cmd --push" || build_cmd="$build_cmd"
25 |     fi
26 |     
27 |     $build_cmd $tag_args $build_args -f $dockerfile ../../..
28 | }
29 | 
30 | # Main
31 | cd "$(dirname "$0")"
32 | 
33 | # Parse arguments
34 | while [[ $# -gt 0 ]]; do
35 |     case $1 in
36 |         --push) PUSH=true; shift ;;
37 |         --registry) REGISTRY="$2"; shift 2 ;;
38 |         --platforms) PLATFORMS="$2"; shift 2 ;;
39 |         --help)
40 |             echo "Usage: $0 [--push] [--registry REG] [--platforms P]"
41 |             exit 0 ;;
42 |         *) echo "Unknown option: $1"; exit 1 ;;
43 |     esac
44 | done
45 | 
46 | # Create buildx builder if needed
47 | if [[ "$PLATFORMS" == *","* ]] || [ "$PUSH" = "true" ]; then
48 |     docker buildx inspect browseruse-builder >/dev/null 2>&1 || \
49 |         docker buildx create --name browseruse-builder --use
50 |     docker buildx use browseruse-builder
51 | fi
52 | 
53 | # Build images in order
54 | build_image "base-system" "base-images/system/Dockerfile"
55 | build_image "base-chromium" "base-images/chromium/Dockerfile" "--build-arg BASE_TAG=latest"
56 | build_image "base-python-deps" "base-images/python-deps/Dockerfile" "--build-arg BASE_TAG=latest"
57 | 
58 | echo "[INFO] Build complete. Use: FROM ${REGISTRY}/base-python-deps:latest"
59 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # Docs
 2 | 
 3 | The official documentation for Browser Use. The docs are published to [Browser Use Docs](https://docs.browser-use.com).
 4 | 
 5 | ### Development
 6 | 
 7 | Install the [Mintlify CLI](https://www.npmjs.com/package/mintlify) to preview the documentation changes locally. To install, use the following command
 8 | 
 9 | ```
10 | npm i -g mintlify
11 | ```
12 | 
13 | Run the following command at the root of your documentation (where mint.json is)
14 | 
15 | ```
16 | mintlify dev
17 | ```
18 | 


--------------------------------------------------------------------------------
/docs/api-reference/check-balance.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Check Balance"
 3 | api: "GET /api/v1/balance"
 4 | description: "Returns the user's current API credit balance"
 5 | ---
 6 | 
 7 | Returns the user's current API credit balance, which includes both monthly subscription credits and any additional purchased credits.
 8 | 
 9 | ## Response
10 | 
11 | <ResponseField name="balance" type="string">
12 |   The current number of API credits available, with the value in cents (0.01 USD
13 |   = 1 credit).
14 | </ResponseField>
15 | 
16 | <RequestExample>
17 | 
18 | ```python python
19 | import requests
20 | 
21 | API_KEY = 'your_api_key_here'
22 | BASE_URL = 'https://api.browser-use.com/api/v1'
23 | HEADERS = {'Authorization': f'Bearer {API_KEY}'}
24 | 
25 | response = requests.get(f'{BASE_URL}/balance', headers=HEADERS)
26 | balance = response.json()['balance']
27 | print(f"Current API credit balance: {balance}")
28 | ```
29 | 
30 | ```bash curl
31 | curl --request GET \
32 |   --url https://api.browser-use.com/api/v1/balance \
33 |   --header 'Authorization: Bearer <token>'
34 | ```
35 | 
36 | </RequestExample>
37 | 
38 | <ResponseExample>
39 | 
40 | ```json 200
41 | {
42 |   "balance": "5000"
43 | }
44 | ```
45 | 
46 | </ResponseExample>
47 | 
48 | ## API Credit Usage
49 | 
50 | Each task execution consumes API credits based on the following factors:
51 | 
52 | 1. **Task Duration**: Longer running tasks consume more credits
53 | 2. **LLM Model**: More powerful models consume more credits
54 | 3. **Browser Features**: Features like proxy usage and adblock may affect credit consumption
55 | 4. **Task Complexity**: More complex tasks with many steps consume more credits
56 | 
57 | You can monitor your credit usage through the [Browser Use Cloud dashboard](https://cloud.browser-use.com/dashboard) or by using the Check Balance endpoint.
58 | 
59 | <Note>
60 |   If your balance reaches zero, new task executions will be rejected until you
61 |   add more credits or your subscription renews.
62 | </Note>{" "}
63 | 


--------------------------------------------------------------------------------
/docs/api-reference/delete-browser-profile.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Delete Browser Profile"
 3 | api: "DELETE /api/v1/browser-profiles/{profile_id}"
 4 | description: "Deletes a browser profile. This will remove the profile and all associated browser data."
 5 | ---
 6 | 
 7 | Deletes a browser profile. This will remove the profile and all associated browser data. This action cannot be undone!
 8 | 
 9 | ### Path Parameters
10 | 
11 | <ParamField path="profile_id" type="string" required>
12 |   ID of the browser profile to delete
13 | </ParamField>
14 | 
15 | ### Response
16 | 
17 | A successful deletion returns an empty object.
18 | 
19 | <RequestExample>
20 | ```python python
21 | import requests
22 | 
23 | API_KEY = 'your_api_key_here'
24 | BASE_URL = 'https://api.browser-use.com/api/v1'
25 | HEADERS = {'Authorization': f'Bearer {API_KEY}'}
26 | 
27 | profile_id = 'profile_1234567890abcdef'
28 | response = requests.delete(f'{BASE_URL}/browser-profiles/{profile_id}', headers=HEADERS)
29 | print(response.json())
30 | 
31 | ````
32 | 
33 | ```bash curl
34 | curl --request DELETE \
35 |   --url https://api.browser-use.com/api/v1/browser-profiles/profile_1234567890abcdef \
36 |   --header 'Authorization: Bearer <token>'
37 | ````
38 | 
39 | </RequestExample>
40 | 
41 | <ResponseExample>
42 |   ```json 200
43 |   {}
44 |   ```
45 | </ResponseExample>
46 | 


--------------------------------------------------------------------------------
/docs/api-reference/delete-scheduled-task.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Delete Scheduled Task"
 3 | api: "DELETE /api/v1/scheduled-task/{task_id}"
 4 | description: "Deletes a scheduled task"
 5 | ---
 6 | 
 7 | Deletes a scheduled task. This will prevent any future runs of this task. Any currently running instances of this task will be allowed to complete.
 8 | 
 9 | ## Path Parameters
10 | 
11 | <ParamField path="task_id" type="string" required>
12 |   ID of the scheduled task to delete
13 | </ParamField>
14 | 
15 | ## Response
16 | 
17 | The endpoint returns an empty response body with a 200 status code on success.
18 | 
19 | <RequestExample>
20 | 
21 | ```python python
22 | import requests
23 | 
24 | API_KEY = 'your_api_key_here'
25 | BASE_URL = 'https://api.browser-use.com/api/v1'
26 | HEADERS = {'Authorization': f'Bearer {API_KEY}'}
27 | 
28 | task_id = 'scheduled_task_1234567890abcdef'
29 | 
30 | response = requests.delete(f'{BASE_URL}/scheduled-task/{task_id}', headers=HEADERS)
31 | 
32 | if response.status_code == 200:
33 |     print("Scheduled task deleted successfully")
34 | else:
35 |     print(f"Error deleting scheduled task: {response.status_code}")
36 | ```
37 | 
38 | ```bash curl
39 | curl --request DELETE \
40 |   --url https://api.browser-use.com/api/v1/scheduled-task/{task_id} \
41 |   --header 'Authorization: Bearer <token>'
42 | ```
43 | 
44 | </RequestExample>
45 | 
46 | <ResponseExample>
47 | 
48 | ```json 200
49 | {}
50 | ```
51 | 
52 | ```json 404
53 | {
54 |   "detail": "Scheduled task not found"
55 | }
56 | ```
57 | 
58 | ```json 422
59 | {
60 |   "detail": [
61 |     {
62 |       "loc": ["path", "task_id"],
63 |       "msg": "field required",
64 |       "type": "value_error.missing"
65 |     }
66 |   ]
67 | }
68 | ```
69 | 
70 | </ResponseExample>
71 | 
72 | ## Usage Notes
73 | 
74 | - Deletion is permanent and cannot be undone
75 | - Any currently running instances of this task will be allowed to complete
76 | - Future scheduled runs will be prevented
77 | - The task will be removed from the scheduled tasks list immediately
78 | 
79 | <Warning>
80 | Deleting a scheduled task is irreversible. Make sure you want to permanently remove the task before proceeding.
81 | </Warning> 
82 | 


--------------------------------------------------------------------------------
/docs/api-reference/get-browser-profile.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Get Browser Profile"
 3 | api: "GET /api/v1/browser-profiles/{profile_id}"
 4 | description: "Returns information about a specific browser profile and its configuration settings."
 5 | ---
 6 | 
 7 | Returns information about a specific browser profile and its configuration settings.
 8 | 
 9 | ### Path Parameters
10 | 
11 | <ParamField path="profile_id" type="string" required>
12 |   ID of the browser profile to retrieve
13 | </ParamField>
14 | 
15 | ### Response
16 | 
17 | <ResponseField name="profile_id" type="string">
18 |   Unique identifier for the browser profile
19 | </ResponseField>
20 | <ResponseField name="profile_name" type="string">
21 |   Name of the browser profile
22 | </ResponseField>
23 | <ResponseField name="description" type="string">
24 |   Description of the profile
25 | </ResponseField>
26 | <ResponseField name="persist" type="boolean">
27 |   Save cookies, local storage, and session data between tasks
28 | </ResponseField>
29 | <ResponseField name="ad_blocker" type="boolean">
30 |   Block ads and popups during automated tasks
31 | </ResponseField>
32 | <ResponseField name="proxy" type="boolean">
33 |   Route traffic through mobile proxies for better stealth
34 | </ResponseField>
35 | <ResponseField name="proxy_country_code" type="string">
36 |   Country code for the proxy
37 | </ResponseField>
38 | <ResponseField name="browser_viewport_width" type="integer">
39 |   Browser viewport width in pixels
40 | </ResponseField>
41 | <ResponseField name="browser_viewport_height" type="integer">
42 |   Browser viewport height in pixels
43 | </ResponseField>
44 | 
45 | <RequestExample>
46 | ```python python
47 | import requests
48 | 
49 | API_KEY = 'your_api_key_here'
50 | BASE_URL = 'https://api.browser-use.com/api/v1'
51 | HEADERS = {'Authorization': f'Bearer {API_KEY}'}
52 | 
53 | profile_id = 'profile_1234567890abcdef'
54 | response = requests.get(f'{BASE_URL}/browser-profiles/{profile_id}', headers=HEADERS)
55 | profile = response.json()
56 | print(profile)
57 | 
58 | ````
59 | 
60 | ```bash curl
61 | curl --request GET \
62 |   --url https://api.browser-use.com/api/v1/browser-profiles/profile_1234567890abcdef \
63 |   --header 'Authorization: Bearer <token>'
64 | ````
65 | 
66 | </RequestExample>
67 | 
68 | <ResponseExample>
69 | ```json 200
70 | {
71 |   "profile_id": "profile_1234567890abcdef",
72 |   "profile_name": "Default Profile",
73 |   "description": "Main automation profile",
74 |   "persist": true,
75 |   "ad_blocker": true,
76 |   "proxy": true,
77 |   "proxy_country_code": "US",
78 |   "browser_viewport_width": 1280,
79 |   "browser_viewport_height": 960
80 | }
81 | ```
82 | </ResponseExample>
83 | 


--------------------------------------------------------------------------------
/docs/api-reference/get-task-media.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Get Task Media"
 3 | api: "GET /api/v1/task/{task_id}/media"
 4 | description: "Get media files generated during task execution"
 5 | ---
 6 | 
 7 | Returns links to any recordings or media generated during task execution, such as browser session recordings. Only available for completed tasks.
 8 | 
 9 | <ParamField path="task_id" type="string" required>
10 |   ID of the task to retrieve media for
11 | </ParamField>
12 | 
13 | <ResponseField name="recordings" type="array">
14 |   List of recording URLs generated during task execution
15 | </ResponseField>
16 | 
17 | <RequestExample>
18 | ```python
19 | import requests
20 | 
21 | API_KEY = 'your_api_key_here'
22 | BASE_URL = 'https://api.browser-use.com/api/v1'
23 | HEADERS = {'Authorization': f'Bearer {API_KEY}'}
24 | 
25 | task_id = 'task_1234567890abcdef'
26 | response = requests.get(f'{BASE_URL}/task/{task_id}/media', headers=HEADERS)
27 | media = response.json()
28 | print(f"Found {len(media['recordings'])} recordings")
29 | ```
30 | </RequestExample>
31 | 
32 | <ResponseExample>
33 | ```json
34 | {
35 |   "recordings": [
36 |     "https://media.browser-use.com/recordings/task_1234567890abcdef/session.mp4",
37 |     "https://media.browser-use.com/recordings/task_1234567890abcdef/screen.webm"
38 |   ]
39 | }
40 | ```
41 | </ResponseExample>
42 | 
43 | ## Media Types
44 | 
45 | The following types of media files may be generated:
46 | 
47 | - **Session recordings**: Full browser session recordings in MP4 format
48 | - **Screen recordings**: Screen capture videos in WebM format
49 | - **Audio recordings**: Audio tracks if microphone access was used
50 | 
51 | ## Availability
52 | 
53 | - Media files are only available for completed tasks
54 | - Recordings are generated automatically during task execution
55 | - Files are available for download for 30 days after task completion
56 | - Media generation can be disabled in task settings to save storage
57 | 
58 | <Note>
59 | Media files are only generated for tasks that have been configured to record sessions. This feature may not be available for all task types.
60 | </Note> 
61 | 


--------------------------------------------------------------------------------
/docs/api-reference/get-task-output-file.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Get Task Output File"
 3 | api: "GET /api/v1/task/{task_id}/output-file/{file_name}"
 4 | description: "Returns a presigned URL for downloading a file from the task output files"
 5 | ---
 6 | 
 7 | Returns a presigned URL for downloading a file from the task output files. This endpoint is useful for retrieving files that were generated or modified during task execution.
 8 | 
 9 | ## Path Parameters
10 | 
11 | <ParamField path="task_id" type="string" required>
12 |   ID of the task
13 | </ParamField>
14 | <ParamField path="file_name" type="string" required>
15 |   Name of the output file
16 | </ParamField>
17 | 
18 | ## Response
19 | 
20 | <ResponseField name="download_url" type="string">
21 |   A presigned URL for downloading the file.
22 | </ResponseField>
23 | 
24 | <RequestExample>
25 | 
26 | ```python python
27 | import requests
28 | 
29 | API_KEY = 'your_api_key_here'
30 | BASE_URL = 'https://api.browser-use.com/api/v1'
31 | HEADERS = {'Authorization': f'Bearer {API_KEY}'}
32 | 
33 | task_id = 'task_1234567890abcdef'
34 | file_name = 'results.csv'
35 | 
36 | response = requests.get(f'{BASE_URL}/task/{task_id}/output-file/{file_name}', headers=HEADERS)
37 | download_url = response.json()['download_url']
38 | 
39 | # Download the file
40 | file_response = requests.get(download_url)
41 | with open('downloaded_results.csv', 'wb') as file:
42 |     file.write(file_response.content)
43 | 
44 | print("File downloaded successfully")
45 | ```
46 | 
47 | ```bash curl
48 | curl --request GET \
49 |   --url https://api.browser-use.com/api/v1/task/{task_id}/output-file/{file_name} \
50 |   --header 'Authorization: Bearer <token>'
51 | ```
52 | 
53 | </RequestExample>
54 | 
55 | <ResponseExample>
56 | 
57 | ```json 200
58 | {
59 |   "download_url": "https://storage.browser-use.com/output-files/task_1234567890abcdef/results.csv?signature=..."
60 | }
61 | ```
62 | 
63 | </ResponseExample> 
64 | 


--------------------------------------------------------------------------------
/docs/api-reference/get-task-screenshots.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Get Task Screenshots"
 3 | api: "GET /api/v1/task/{task_id}/screenshots"
 4 | description: "Get screenshots generated during task execution"
 5 | ---
 6 | 
 7 | Returns any screenshot URLs generated during task execution. Screenshots are automatically captured at key moments during the automation process.
 8 | 
 9 | <ParamField path="task_id" type="string" required>
10 |   ID of the task to retrieve screenshots for
11 | </ParamField>
12 | 
13 | <ResponseField name="screenshots" type="array">
14 |   List of screenshot URLs generated during task execution
15 | </ResponseField>
16 | 
17 | <RequestExample>
18 | ```python
19 | import requests
20 | 
21 | API_KEY = 'your_api_key_here'
22 | BASE_URL = 'https://api.browser-use.com/api/v1'
23 | HEADERS = {'Authorization': f'Bearer {API_KEY}'}
24 | 
25 | task_id = 'task_1234567890abcdef'
26 | response = requests.get(f'{BASE_URL}/task/{task_id}/screenshots', headers=HEADERS)
27 | screenshots = response.json()
28 | print(f"Found {len(screenshots['screenshots'])} screenshots")
29 | 
30 | # Download the first screenshot
31 | if screenshots['screenshots']:
32 |     screenshot_url = screenshots['screenshots'][0]
33 |     img_response = requests.get(screenshot_url)
34 |     with open('screenshot.png', 'wb') as f:
35 |         f.write(img_response.content)
36 | ```
37 | </RequestExample>
38 | 
39 | <ResponseExample>
40 | ```json
41 | {
42 |   "screenshots": [
43 |     "https://media.browser-use.com/screenshots/task_1234567890abcdef/step_1.png",
44 |     "https://media.browser-use.com/screenshots/task_1234567890abcdef/step_2.png",
45 |     "https://media.browser-use.com/screenshots/task_1234567890abcdef/step_3.png"
46 |   ]
47 | }
48 | ```
49 | </ResponseExample>
50 | 
51 | ## Screenshot Details
52 | 
53 | Screenshots are captured automatically during task execution:
54 | 
55 | - **Step-by-step captures**: Screenshots taken at each major step
56 | - **Error captures**: Screenshots captured when errors occur
57 | - **Final result**: Screenshot of the final state when task completes
58 | - **High resolution**: Screenshots are captured at full browser resolution
59 | 
60 | ## File Format
61 | 
62 | - All screenshots are saved in PNG format
63 | - Screenshots maintain the original browser viewport dimensions
64 | - File names include the task ID and step number for easy identification
65 | 
66 | ## Availability
67 | 
68 | - Screenshots are available immediately after capture
69 | - Files are stored for 30 days after task completion
70 | - Screenshots can be disabled in task settings to reduce storage usage
71 | 
72 | <Note>
73 | Screenshots are automatically generated for most tasks unless specifically disabled. The number of screenshots depends on the task complexity and duration.
74 | </Note> 
75 | 


--------------------------------------------------------------------------------
/docs/api-reference/get-task-status.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Get Task Status"
 3 | api: "GET /api/v1/task/{task_id}/status"
 4 | description: "Get the current status of a task"
 5 | ---
 6 | 
 7 | Returns just the current status of a task (created, running, finished, stopped, paused, or failed). This is more lightweight than the full task details endpoint.
 8 | 
 9 | ## Path Parameters
10 | 
11 | <ParamField path="task_id" type="string" required>
12 |   ID of the task to check status for
13 | </ParamField>
14 | 
15 | ## Response
16 | 
17 | The endpoint returns the status as a simple string value (not wrapped in an object).
18 | 
19 | <RequestExample>
20 | ```python
21 | import requests
22 | 
23 | API_KEY = 'your_api_key_here'
24 | BASE_URL = 'https://api.browser-use.com/api/v1'
25 | HEADERS = {'Authorization': f'Bearer {API_KEY}'}
26 | 
27 | task_id = 'task_1234567890abcdef'
28 | response = requests.get(f'{BASE_URL}/task/{task_id}/status', headers=HEADERS)
29 | status = response.json()
30 | print(f"Task status: {status}")
31 | ```
32 | 
33 | ```bash curl
34 | curl --request GET \
35 |   --url https://api.browser-use.com/api/v1/task/{task_id}/status \
36 |   --header 'Authorization: Bearer <token>'
37 | ```
38 | </RequestExample>
39 | 
40 | <ResponseExample>
41 | ```json 200
42 | "finished"
43 | ```
44 | 
45 | ```json 404
46 | {
47 |   "detail": "Task not found"
48 | }
49 | ```
50 | 
51 | ```json 422
52 | {
53 |   "detail": [
54 |     {
55 |       "loc": ["path", "task_id"],
56 |       "msg": "field required",
57 |       "type": "value_error.missing"
58 |     }
59 |   ]
60 | }
61 | ```
62 | </ResponseExample>
63 | 
64 | ## Status Values
65 | 
66 | The status field can have one of the following values:
67 | 
68 | - `created`: Task is initialized but not yet started
69 | - `running`: Task is currently executing
70 | - `finished`: Task has completed successfully
71 | - `stopped`: Task was manually stopped
72 | - `paused`: Task execution is temporarily paused
73 | - `failed`: Task encountered an error and could not complete
74 | 
75 | ## Use Cases
76 | 
77 | This endpoint is useful for:
78 | - Polling task status without retrieving full task details
79 | - Lightweight status checks in monitoring applications
80 | - Quick status verification before making other API calls
81 | - Building real-time dashboards with minimal data transfer
82 | 
83 | <Tip>
84 | Use this endpoint instead of the full task details endpoint when you only need to check the current status, as it's much faster and uses less bandwidth.
85 | </Tip> 
86 | 


--------------------------------------------------------------------------------
/docs/api-reference/pause-task.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Pause Task"
 3 | api: "PUT /api/v1/pause-task"
 4 | description: "Pauses execution of a running task"
 5 | ---
 6 | 
 7 | Pauses execution of a running task. The task can be resumed later using the `/resume-task` endpoint. Useful for manual intervention or inspection.
 8 | 
 9 | ## Parameters
10 | 
11 | <ParamField query="task_id" type="string" required>
12 |   ID of the task to pause
13 | </ParamField>
14 | 
15 | ## Response
16 | 
17 | The endpoint returns an empty response body with a 200 status code on success.
18 | 
19 | <RequestExample>
20 | 
21 | ```python python
22 | import requests
23 | 
24 | url = "https://api.browser-use.com/api/v1/pause-task"
25 | params = {"task_id": "task_1234567890abcdef"}
26 | headers = {"Authorization": "Bearer <token>"}
27 | 
28 | response = requests.request("PUT", url, headers=headers, params=params)
29 | 
30 | print(response.text)
31 | ```
32 | 
33 | ```bash cURL
34 | curl --request PUT \
35 |   --url 'https://api.browser-use.com/api/v1/pause-task?task_id=task_1234567890abcdef' \
36 |   --header 'Authorization: Bearer <token>'
37 | ```
38 | 
39 | ```javascript javascript 
40 | const options = {method: 'PUT', headers: {Authorization: 'Bearer <token>'}};
41 | 
42 | fetch('https://api.browser-use.com/api/v1/pause-task?task_id=task_1234567890abcdef', options)
43 |   .then(response => {
44 |     if (response.ok) {
45 |       console.log('Task paused successfully');
46 |     } else {
47 |       return response.json().then(err => { throw err; });
48 |     }
49 |   })
50 |   .catch(err => console.error(err));
51 | ```
52 | </RequestExample>
53 | 
54 | <ResponseExample>
55 |     ```json 200
56 |     {}
57 |     ```
58 | 
59 |     ```json 422
60 |     {
61 |     "detail": [
62 |         {
63 |         "loc": [
64 |             "query",
65 |             "task_id"
66 |         ],
67 |         "msg": "field required",
68 |         "type": "value_error.missing"
69 |         }
70 |     ]
71 |     }
72 |     ```
73 | </ResponseExample>
74 | 
75 | ## Usage Notes
76 | 
77 | - Paused tasks can be resumed using the `/resume-task` endpoint
78 | - The task status will change to "paused"
79 | - Browser automation will be temporarily halted
80 | - Useful for manual intervention or inspection during task execution
81 | 
82 | <Info>
83 | Pausing is useful when you need to temporarily halt execution to inspect the current state or make manual adjustments before resuming.
84 | </Info>
85 | 


--------------------------------------------------------------------------------
/docs/api-reference/ping.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Ping"
 3 | api: "GET /api/v1/ping"
 4 | description: "Check if the server is running and responding"
 5 | ---
 6 | 
 7 | Use this endpoint to check if the server is running and responding. This is the only endpoint that doesn't require authentication.
 8 | 
 9 | ## Response
10 | 
11 | A successful response has a 200 status code with an empty JSON object.
12 | 
13 | <RequestExample>
14 | 
15 | ```python python
16 | import requests
17 | 
18 | BASE_URL = 'https://api.browser-use.com/api/v1'
19 | 
20 | response = requests.get(f'{BASE_URL}/ping')
21 | if response.status_code == 200:
22 |     print("Server is up and running")
23 | ```
24 | 
25 | ```bash curl
26 | curl --request GET \
27 |   --url https://api.browser-use.com/api/v1/ping
28 | ```
29 | 
30 | </RequestExample>
31 | 
32 | <ResponseExample>
33 | 
34 | ```json 200
35 | {}
36 | ```
37 | 
38 | </ResponseExample> 
39 | 


--------------------------------------------------------------------------------
/docs/api-reference/resume-task.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Resume Task"
 3 | api: "PUT /api/v1/resume-task"
 4 | description: "Resumes execution of a previously paused task"
 5 | ---
 6 | 
 7 | Resumes execution of a previously paused task. The task will continue from where it was paused. You can't resume a stopped task.
 8 | 
 9 | ## Parameters
10 | 
11 | <ParamField query="task_id" type="string" required>
12 |   ID of the task to resume
13 | </ParamField>
14 | 
15 | ## Response
16 | 
17 | The endpoint returns an empty response body with a 200 status code on success.
18 | 
19 | <RequestExample>
20 | 
21 | ```python python
22 | import requests
23 | 
24 | url = "https://api.browser-use.com/api/v1/resume-task"
25 | params = {"task_id": "task_1234567890abcdef"}
26 | headers = {"Authorization": "Bearer <token>"}
27 | 
28 | response = requests.request("PUT", url, headers=headers, params=params)
29 | 
30 | print(response.text)
31 | ```
32 | 
33 | ```bash cURL
34 | curl --request PUT \
35 |   --url 'https://api.browser-use.com/api/v1/resume-task?task_id=task_1234567890abcdef' \
36 |   --header 'Authorization: Bearer <token>'
37 | ```
38 | 
39 | ```javascript javascript 
40 | const options = {method: 'PUT', headers: {Authorization: 'Bearer <token>'}};
41 | 
42 | fetch('https://api.browser-use.com/api/v1/resume-task?task_id=task_1234567890abcdef', options)
43 |   .then(response => response.json())
44 |   .then(response => console.log(response))
45 |   .catch(err => console.error(err));
46 | ```
47 | </RequestExample>
48 | 
49 | <ResponseExample>
50 |     ```json 200
51 |     {}
52 |     ```
53 | 
54 |     ```json 422
55 |     {
56 |     "detail": [
57 |         {
58 |         "loc": [
59 |             "query",
60 |             "task_id"
61 |         ],
62 |         "msg": "field required",
63 |         "type": "value_error.missing"
64 |         }
65 |     ]
66 |     }
67 |     ```
68 | </ResponseExample>
69 | 
70 | ## Usage Notes
71 | 
72 | - Only paused tasks can be resumed
73 | - The task status will change from "paused" to "running"
74 | - Browser automation will continue from where it was paused
75 | - Stopped tasks cannot be resumed - you must create a new task instead
76 | 
77 | <Warning>
78 | You cannot resume a task that has been stopped. Only paused tasks can be resumed.
79 | </Warning>
80 | 


--------------------------------------------------------------------------------
/docs/api-reference/stop-task.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Stop Task"
 3 | api: "PUT /api/v1/stop-task"
 4 | description: "Stops a running browser automation task immediately."
 5 | ---
 6 | 
 7 | Stops a running browser automation task immediately. The task cannot be resumed after being stopped. Use `/pause-task` endpoint instead if you want to temporarily halt execution.
 8 | 
 9 | ## Parameters
10 | 
11 | <ParamField query="task_id" type="string" required>
12 |   ID of the task to stop
13 | </ParamField>
14 | 
15 | ## Response
16 | 
17 | The endpoint returns an empty response body with a 200 status code on success.
18 | 
19 | <RequestExample>
20 | 
21 | ```python python
22 | import requests
23 | 
24 | url = "https://api.browser-use.com/api/v1/stop-task"
25 | params = {"task_id": "task_1234567890abcdef"}
26 | headers = {"Authorization": "Bearer <token>"}
27 | 
28 | response = requests.request("PUT", url, headers=headers, params=params)
29 | 
30 | print(response.text)
31 | ```
32 | 
33 | ```bash cURL
34 | curl --request PUT \
35 |   --url 'https://api.browser-use.com/api/v1/stop-task?task_id=task_1234567890abcdef' \
36 |   --header 'Authorization: Bearer <token>'
37 | ```
38 | 
39 | ```javascript javascript 
40 | const options = {method: 'PUT', headers: {Authorization: 'Bearer <token>'}};
41 | 
42 | fetch('https://api.browser-use.com/api/v1/stop-task?task_id=task_1234567890abcdef', options)
43 |   .then(response => response.json())
44 |   .then(response => console.log(response))
45 |   .catch(err => console.error(err));
46 | ```
47 | </RequestExample>
48 | 
49 | <ResponseExample>
50 |     ```json 200
51 |     {}
52 |     ```
53 | 
54 |     ```json 422
55 |     {
56 |     "detail": [
57 |         {
58 |         "loc": [
59 |             "query",
60 |             "task_id"
61 |         ],
62 |         "msg": "field required",
63 |         "type": "value_error.missing"
64 |         }
65 |     ]
66 |     }
67 |     ```
68 | </ResponseExample>
69 | 
70 | ## Usage Notes
71 | 
72 | - Once a task is stopped, it cannot be resumed
73 | - The task status will change to "stopped"
74 | - Any ongoing browser automation will be immediately terminated
75 | - Use the pause endpoint if you need to temporarily halt execution with the ability to resume later
76 | 
77 | <Warning>
78 | Stopping a task is irreversible. If you need to pause execution temporarily, use the `/pause-task` endpoint instead.
79 | </Warning>
80 | 


--------------------------------------------------------------------------------
/docs/api-reference/user.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Me"
 3 | api: "GET /api/v1/me"
 4 | description: "Returns a boolean value indicating if the API key is valid and the user is authenticated"
 5 | ---
 6 | 
 7 | Returns a boolean value indicating if the API key is valid and the user is authenticated.
 8 | 
 9 | ## Response
10 | 
11 | The endpoint returns a boolean value directly (not wrapped in an object):
12 | - `true` if the API key is valid and the user is authenticated
13 | - `false` if the API key is invalid or the user is not authenticated
14 | 
15 | <RequestExample>
16 | 
17 | ```python python
18 | import requests
19 | 
20 | API_KEY = 'your_api_key_here'
21 | BASE_URL = 'https://api.browser-use.com/api/v1'
22 | HEADERS = {'Authorization': f'Bearer {API_KEY}'}
23 | 
24 | response = requests.get(f'{BASE_URL}/me', headers=HEADERS)
25 | is_authenticated = response.json()
26 | if is_authenticated:
27 |     print("API key is valid")
28 | else:
29 |     print("API key is invalid")
30 | ```
31 | 
32 | ```bash curl
33 | curl --request GET \
34 |   --url https://api.browser-use.com/api/v1/me \
35 |   --header 'Authorization: Bearer <token>'
36 | ```
37 | 
38 | </RequestExample>
39 | 
40 | <ResponseExample>
41 | 
42 | ```json 200
43 | true
44 | ```
45 | 
46 | ```json 401
47 | false
48 | ```
49 | 
50 | </ResponseExample>
51 | 
52 | ## Usage Notes
53 | 
54 | - This endpoint is useful for validating API keys before making other API calls
55 | - Unlike other endpoints, this returns a simple boolean value rather than an object
56 | - A `true` response confirms both authentication and authorization
57 | - This endpoint can be used for health checks of your API integration
58 | 
59 | <Tip>
60 | Use this endpoint to verify your API key is working correctly before making other API calls, especially in automated systems.
61 | </Tip>
62 | 


--------------------------------------------------------------------------------
/docs/cloud/authentication.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Authentication"
 3 | description: "Learn how to authenticate with the Browser Use Cloud API"
 4 | icon: "lock"
 5 | ---
 6 | 
 7 | The Browser Use Cloud API uses API keys to authenticate requests. You can obtain an API key from your [Browser Use Cloud dashboard](https://cloud.browser-use.com/settings/api-keys).
 8 | 
 9 | ## API Keys
10 | 
11 | All API requests must include your API key in the `Authorization` header:
12 | 
13 | ```bash
14 | Authorization: Bearer YOUR_API_KEY
15 | ```
16 | 
17 | Keep your API keys secure and do not share them in publicly accessible areas such as GitHub, client-side code, or in your browser's developer tools. API keys should be stored securely in environment variables or a secure key management system.
18 | 
19 | ## Example Request
20 | 
21 | Here's an example of how to include your API key in a request using Python:
22 | 
23 | ```python
24 | import requests
25 | 
26 | API_KEY = 'your_api_key_here'
27 | BASE_URL = 'https://api.browser-use.com/api/v1'
28 | HEADERS = {'Authorization': f'Bearer {API_KEY}'}
29 | 
30 | response = requests.get(f'{BASE_URL}/me', headers=HEADERS)
31 | print(response.json())
32 | ```
33 | 
34 | ## Verifying Authentication
35 | 
36 | You can verify that your API key is valid by making a request to the `/api/v1/me` endpoint. See the [Me endpoint documentation](../api-reference/user) for more details.
37 | 
38 | ## API Key Security
39 | 
40 | To ensure the security of your API keys:
41 | 
42 | 1. **Never share your API key** in publicly accessible areas
43 | 2. **Rotate your API keys** periodically
44 | 3. **Use environment variables** to store API keys in your applications
45 | 4. **Implement proper access controls** for your API keys
46 | 5. **Monitor API key usage** for suspicious activity
47 | 
48 | If you believe your API key has been compromised, you should immediately revoke it and generate a new one from your Browser Use Cloud dashboard.
49 | 


--------------------------------------------------------------------------------
/docs/cloud/custom-sdk.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Cloud SDK"
 3 | description: "Learn how to set up your own Browser Use Cloud SDK"
 4 | icon: "code"
 5 | ---
 6 | 
 7 | This guide walks you through setting up your own Browser Use Cloud SDK.
 8 | 
 9 | ## Building your own client (OpenAPI)
10 | 
11 | <Note>
12 |   This approach is recommended **only** if you need to run simple tasks and
13 |   **don’t require fine-grained control**.
14 | </Note>
15 | 
16 | The best way to build your own client is to use our [OpenAPI specification](http://api.browser-use.com/openapi.json) to generate a type-safe client library.
17 | 
18 | ### Python
19 | 
20 | Use [openapi-python-client](https://github.com/openapi-generators/openapi-python-client) to generate a modern Python client:
21 | 
22 | ```bash
23 | # Install the generator
24 | pipx install openapi-python-client --include-deps
25 | 
26 | # Generate the client
27 | openapi-python-client generate --url http://api.browser-use.com/openapi.json
28 | ```
29 | 
30 | This will create a Python package with full type hints, modern dataclasses, and async support.
31 | 
32 | ### TypeScript/JavaScript
33 | 
34 | Use [OpenAPI TS](https://openapi-ts.dev/) library to generate a type safe TypeScript client for the Browser Use API. 
35 | 
36 | The following guide shows how to create a simple type-safe `fetch` client, but you can also use other generators.
37 | 
38 | - React Query - https://openapi-ts.dev/openapi-react-query/
39 | - SWR - https://openapi-ts.dev/swr-openapi/
40 | 
41 | 
42 | <CodeGroup>
43 | ```bash npm
44 | npm install openapi-fetch 
45 | npm install -D openapi-typescript typescript
46 | ```
47 | 
48 | ```bash yarn
49 | yarn add openapi-fetch
50 | yarn add -D openapi-typescript typescript
51 | ```
52 | ```bash pnpm
53 | pnpm add openapi-fetch
54 | pnpm add -D openapi-typescript typescript
55 | ```
56 | </CodeGroup>
57 | 
58 | ```json title="package.json"
59 | {
60 |   "scripts": {
61 |     "openapi:gen": "openapi-typescript https://api.browser-use.com/openapi.json -o ./src/lib/api/v1.d.ts"
62 |   }
63 | }
64 | ```
65 | 
66 | ```bash
67 | pnpm openapi:gen
68 | ```
69 | 
70 | ```ts
71 | // client.ts
72 | 
73 | 'use client'
74 | 
75 | import createClient from 'openapi-fetch'
76 | import { paths } from '@/lib/api/v1'
77 | 
78 | export type Client = ReturnType<typeof createClient<paths>>
79 | 
80 | export const client = createClient<paths>({
81 |     baseUrl: 'https://api.browser-use.com/',
82 | 
83 |     // NOTE: You can get your API key from https://cloud.browser-use.com/billing!
84 |     headers: { Authorization: `Bearer ${apiKey}` },
85 | })
86 | 
87 | ```
88 | 
89 | <Note>
90 |   Need help? Contact our support team at support@browser-use.com or join our
91 |   [Discord community](https://link.browser-use.com/discord)
92 | </Note>
93 | 


--------------------------------------------------------------------------------
/docs/customize/output-format.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Output Format"
 3 | description: "The default is text. But you can define a structured output format to make post-processing easier."
 4 | icon: "code"
 5 | ---
 6 | 
 7 | ## Custom output format
 8 | With [this example](https://github.com/browser-use/browser-use/blob/main/examples/features/custom_output.py) you can define what output format the agent should return to you.
 9 | 
10 | ```python
11 | from pydantic import BaseModel
12 | # Define the output format as a Pydantic model
13 | class Post(BaseModel):
14 | 	post_title: str
15 | 	post_url: str
16 | 	num_comments: int
17 | 	hours_since_post: int
18 | 
19 | 
20 | class Posts(BaseModel):
21 | 	posts: List[Post]
22 | 
23 | 
24 | controller = Controller(output_model=Posts)
25 | 
26 | 
27 | async def main():
28 | 	task = 'Go to hackernews show hn and give me the first  5 posts'
29 | 	model = ChatOpenAI(model='gpt-4o')
30 | 	agent = Agent(task=task, llm=model, controller=controller)
31 | 
32 | 	history = await agent.run()
33 | 
34 | 	result = history.final_result()
35 | 	if result:
36 | 		parsed: Posts = Posts.model_validate_json(result)
37 | 
38 | 		for post in parsed.posts:
39 | 			print('\n--------------------------------')
40 | 			print(f'Title:            {post.post_title}')
41 | 			print(f'URL:              {post.post_url}')
42 | 			print(f'Comments:         {post.num_comments}')
43 | 			print(f'Hours since post: {post.hours_since_post}')
44 | 	else:
45 | 		print('No result')
46 | 
47 | 
48 | if __name__ == '__main__':
49 | 	asyncio.run(main())
50 | ```
51 | 


--------------------------------------------------------------------------------
/docs/customize/system-prompt.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "System Prompt"
 3 | description: "Customize the system prompt to control agent behavior and capabilities"
 4 | icon: "message"
 5 | ---
 6 | 
 7 | ## Overview
 8 | 
 9 | You can customize the system prompt in two ways:
10 | 
11 | 1. Extend the default system prompt with additional instructions
12 | 2. Override the default system prompt entirely
13 | 
14 | <Note>
15 |   Custom system prompts allow you to modify the agent's behavior at a
16 |   fundamental level. Use this feature carefully as it can significantly impact
17 |   the agent's performance and reliability.
18 | </Note>
19 | 
20 | ### Extend System Prompt (recommended)
21 | 
22 | To add additional instructions to the default system prompt:
23 | 
24 | ```python
25 | extend_system_message = """
26 | REMEMBER the most important RULE:
27 | ALWAYS open first a new tab and go first to url wikipedia.com no matter the task!!!
28 | """
29 | ```
30 | 
31 | ### Override System Prompt
32 | 
33 | <Warning>
34 |   Not recommended! If you must override the [default system
35 |   prompt](https://github.com/browser-use/browser-use/blob/main/browser_use/agent/system_prompt.md),
36 |   make sure to test the agent yourself.
37 | </Warning>
38 | 
39 | Anyway, to override the default system prompt:
40 | 
41 | ```python
42 | # Define your complete custom prompt
43 | override_system_message = """
44 | You are an AI agent that helps users with web browsing tasks.
45 | 
46 | [Your complete custom instructions here...]
47 | """
48 | 
49 | # Create agent with custom system prompt
50 | agent = Agent(
51 |     task="Your task here",
52 |     llm=ChatOpenAI(model='gpt-4'),
53 |     override_system_message=override_system_message
54 | )
55 | ```
56 | 
57 | ### Extend Planner System Prompt
58 | 
59 | You can customize the behavior of the planning agent by extending its system prompt:
60 | 
61 | ```python
62 | extend_planner_system_message = """
63 | PRIORITIZE gathering information before taking any action.
64 | Always suggest exploring multiple options before making a decision.
65 | """
66 | 
67 | # Create agent with extended planner system prompt
68 | llm = ChatOpenAI(model='gpt-4o')
69 | planner_llm = ChatOpenAI(model='gpt-4o-mini')
70 | 
71 | agent = Agent(
72 | 	task="Your task here",
73 | 	llm=llm,
74 | 	planner_llm=planner_llm,
75 | 	extend_planner_system_message=extend_planner_system_message
76 | )
77 | ```
78 | 


--------------------------------------------------------------------------------
/docs/development/evaluations.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Evaluations"
 3 | description: "Test the Browser Use agent on standardized benchmarks"
 4 | icon: "chart-bar"
 5 | ---
 6 | 
 7 | ## Prerequisites
 8 | 
 9 | Browser Use uses proprietary/private test sets that must never be committed to Github and must be fetched through a authorized api request.
10 | Accessing these test sets requires an approved Browser Use account.
11 | There are currently no publicly available test sets, but some may be released in the future.
12 | 
13 | ## Get an Api Access Key
14 | 
15 | First, navigate to https://browser-use.tools and log in with an authorized browser use account.
16 | 
17 | Then, click the "Account" button at the top right of the page, and click the "Cycle New Key" button on that page.
18 | 
19 | Copy the resulting url and secret key into your `.env` file. It should look like this:
20 | 
21 | ```bash .env
22 | EVALUATION_TOOL_URL= ...
23 | EVALUATION_TOOL_SECRET_KEY= ...
24 | ```
25 | 
26 | ## Running Evaluations
27 | 
28 | First, ensure your file `eval/service.py` is up to date.
29 | 
30 | Then run the file:
31 | 
32 | ```bash
33 | python eval/service.py
34 | ```
35 | 
36 | ## Configuring Evaluations
37 | 
38 | You can modify the evaluation by providing flags to the evaluation script. For instance:
39 | 
40 | ```bash
41 | python eval/service.py --parallel_runs 5 --parallel_evaluations 5 --max-steps 25 --start 0 --end 100 --model gpt-4o
42 | ```
43 | 
44 | The evaluations webpage has a convenient GUI for generating these commands. To use it, navigate to https://browser-use.tools/dashboard.
45 | 
46 | Then click the button "New Eval Run" on the left panel. This will open a interface with selectors, inputs, sliders, and switches.
47 | 
48 | Input your desired configuration into the interface and copy the resulting python command at the bottom. Then run this command as before.
49 | 


--------------------------------------------------------------------------------
/docs/development/observability.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Observability"
 3 | description: "Trace Browser Use's agent execution steps and browser sessions"
 4 | icon: "eye"
 5 | ---
 6 | 
 7 | ## Overview
 8 | 
 9 | Browser Use has a native integration with [Laminar](https://lmnr.ai) - open-source platform for tracing, evals and labeling of AI agents.
10 | Read more about Laminar in the [Laminar docs](https://docs.lmnr.ai).
11 | 
12 | <Note>
13 |   Laminar excels at tracing browser agents by providing unified visibility into
14 |   both browser session recordings and agent execution steps.
15 | </Note>
16 | 
17 | ## Setup
18 | 
19 | To setup Laminar, you need to install the `lmnr` package and set the `LMNR_PROJECT_API_KEY` environment variable.
20 | 
21 | To get your project API key, you can either:
22 | 
23 | - Register on [Laminar Cloud](https://lmnr.ai) and get the key from your project settings
24 | - Or spin up a local Laminar instance and get the key from the settings page
25 | 
26 | ```bash
27 | pip install 'lmnr[all]'
28 | export LMNR_PROJECT_API_KEY=<your-project-api-key>
29 | ```
30 | 
31 | ## Usage
32 | 
33 | Then, you simply initialize the Laminar at the top of your project and both Browser Use and session recordings will be automatically traced.
34 | 
35 | ```python {5-8}
36 | from browser_use.llm import ChatOpenAI
37 | from browser_use import Agent
38 | import asyncio
39 | 
40 | from lmnr import Laminar, Instruments
41 | # this line auto-instruments Browser Use and any browser you use (local or remote)
42 | Laminar.initialize(project_api_key="...", disable_batch=True, disabled_instruments={Instruments.BROWSER_USE}) # you can also pass project api key here
43 | 
44 | async def main():
45 |     agent = Agent(
46 |         task="open google, search Laminar AI",
47 |         llm=ChatOpenAI(model="gpt-4.1-mini"),
48 |     )
49 |     result = await agent.run()
50 |     print(result)
51 | 
52 | asyncio.run(main())
53 | ```
54 | 
55 | ## Viewing Traces
56 | 
57 | You can view traces in the Laminar UI by going to the traces tab in your project.
58 | When you select a trace, you can see both the browser session recording and the agent execution steps.
59 | 
60 | Timeline of the browser session is synced with the agent execution steps, timeline highlights indicate the agent's current step synced with the browser session.
61 | In the trace view, you can also see the agent's current step, the tool it's using, and the tool's input and output. Tools are highlighted in the timeline with a yellow color.
62 | 
63 | <img className="block" src="/images/laminar.png" alt="Laminar" />
64 | 
65 | ## Laminar
66 | 
67 | To learn more about tracing and evaluating your browser agents, check out the [Laminar docs](https://docs.lmnr.ai).
68 | 


--------------------------------------------------------------------------------
/docs/development/roadmap.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Roadmap"
3 | description: "Future plans and upcoming features for Browser Use"
4 | icon: "road"
5 | ---
6 | 
7 | Big things coming soon!
8 | 


--------------------------------------------------------------------------------
/docs/development/telemetry.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Telemetry"
 3 | description: "Understanding Browser Use's telemetry and privacy settings"
 4 | icon: "chart-mixed"
 5 | ---
 6 | 
 7 | ## Overview
 8 | 
 9 | Browser Use collects anonymous usage data to help us understand how the library is being used and to improve the user experience. It also helps us fix bugs faster and prioritize feature development.
10 | 
11 | ## Data Collection
12 | 
13 | We use [PostHog](https://posthog.com) for telemetry collection. The data is completely anonymized and contains no personally identifiable information.
14 | 
15 | <Note>
16 |   We never collect personal information, credentials, or specific content from
17 |   your browser automation tasks.
18 | </Note>
19 | 
20 | ## Opting Out
21 | 
22 | You can disable telemetry by setting an environment variable:
23 | 
24 | ```bash .env
25 | ANONYMIZED_TELEMETRY=false
26 | ```
27 | 
28 | Or in your Python code:
29 | 
30 | ```python
31 | import os
32 | os.environ["ANONYMIZED_TELEMETRY"] = "false"
33 | ```
34 | 
35 | <Note>
36 |   Even when enabled, telemetry has zero impact on the library's performance or
37 |   functionality. Code is available in [Telemetry
38 |   Service](https://github.com/browser-use/browser-use/tree/main/browser_use/telemetry).
39 | </Note>
40 | 


--------------------------------------------------------------------------------
/docs/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browser-use/browser-use/9871be3363081dd88402dd7244f1997132929c4f/docs/favicon.ico


--------------------------------------------------------------------------------
/docs/favicon.svg:
--------------------------------------------------------------------------------
 1 | <svg width="100" height="100" viewBox="0 0 100 100" fill="none" xmlns="http://www.w3.org/2000/svg">
 2 | <g clip-path="url(#clip0_7_13)">
 3 | <path d="M97.8916 39.0448C82.6177 33.1997 95.2199 10.8169 74.212 11.3849C48.5413 12.0793 8.31528 52.4518 12.4236 78.6851C14.4652 91.6755 24.6096 86.2218 29.3732 88.1154C32.5364 89.3652 36.2792 95.0083 40.3245 95.9047C22.4293 106.193 -0.556809 96.397 0.0102912 74.3423C0.829435 41.86 47.7474 -5.25386 81.1937 0.477571C99.8702 3.68414 102.189 23.5422 97.8916 39.0448Z" fill="white"/>
 4 | <path d="M24.8115 57.7541L39.6068 71.7166C49.0332 80.1875 74.061 94.9706 85.403 84.9469C98.774 73.1306 70.495 32.3162 57.4769 25.802L68.9069 20.6639C86.7138 33.6796 113.783 75.9836 91.7294 94.4025C77.5014 106.282 54.5655 96.2204 41.0811 87.3707C30.8103 80.6294 15.9647 70.9591 24.8115 57.7415V57.7541Z" fill="white"/>
 5 | <path d="M40.3373 4.75723C35.5485 4.88347 31.8055 11.1199 28.2895 12.2182C25.1642 13.1903 20.8414 10.5266 16.1408 14.0487C11.0495 17.8613 12.7891 36.0655 3.02233 40.5976C-2.98893 22.9362 0.75354 1.8789 22.4672 0.0736228C24.1433 -0.0652445 42.7822 1.17195 40.3373 4.74463V4.75723Z" fill="white"/>
 6 | <path d="M76.1025 57.754C84.1175 71.0348 69.5871 86.2092 57.489 74.1025L76.1025 57.754Z" fill="white"/>
 7 | </g>
 8 | <defs>
 9 | <clipPath id="clip0_7_13">
10 | <rect width="100" height="100" fill="white"/>
11 | </clipPath>
12 | </defs>
13 | </svg>
14 | 


--------------------------------------------------------------------------------
/docs/images/browser-use.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browser-use/browser-use/9871be3363081dd88402dd7244f1997132929c4f/docs/images/browser-use.png


--------------------------------------------------------------------------------
/docs/images/checks-passed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browser-use/browser-use/9871be3363081dd88402dd7244f1997132929c4f/docs/images/checks-passed.png


--------------------------------------------------------------------------------
/docs/images/laminar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browser-use/browser-use/9871be3363081dd88402dd7244f1997132929c4f/docs/images/laminar.png


--------------------------------------------------------------------------------
/docs/quickstart.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Quickstart"
 3 | description: "Start using Browser Use with this quickstart guide"
 4 | icon: "rocket"
 5 | ---
 6 | 
 7 | {/* You can install Browser Use from PyPI or clone it from Github. */}
 8 | 
 9 | ## Prepare the environment
10 | 
11 | Browser Use requires Python 3.11 or higher.
12 | 
13 | First, we recommend using [uv](https://docs.astral.sh/uv/) to setup the Python environment.
14 | 
15 | ```bash
16 | uv venv --python 3.11
17 | ```
18 | 
19 | and activate it with:
20 | 
21 | ```bash
22 | # For Mac/Linux:
23 | source .venv/bin/activate
24 | 
25 | # For Windows:
26 | .venv\Scripts\activate
27 | ```
28 | 
29 | Install the dependencies:
30 | 
31 | ```bash
32 | uv pip install browser-use
33 | ```
34 | 
35 | Then install playwright:
36 | 
37 | ```bash
38 | uv run playwright install
39 | ```
40 | 
41 | ## Create an agent
42 | 
43 | Then you can use the agent as follows:
44 | 
45 | ```python agent.py
46 | from browser_use.llm import ChatOpenAI
47 | from browser_use import Agent
48 | from dotenv import load_dotenv
49 | load_dotenv()
50 | 
51 | import asyncio
52 | 
53 | llm = ChatOpenAI(model="gpt-4.1")
54 | 
55 | async def main():
56 |     agent = Agent(
57 |         task="Compare the price of gpt-4o and DeepSeek-V3",
58 |         llm=llm,
59 |     )
60 |     result = await agent.run()
61 |     print(result)
62 | 
63 | asyncio.run(main())
64 | ```
65 | 
66 | ## Set up your LLM API keys
67 | 
68 | `ChatOpenAI` and other chat models require API keys. You should store these in your `.env` file. For example, for OpenAI and Anthropic, you can set the API keys in your `.env` file, such as:
69 | 
70 | ```bash .env
71 | OPENAI_API_KEY=
72 | ANTHROPIC_API_KEY=
73 | ```
74 | 
75 | For other LLM models you can refer to the [Supported Models](/customize/supported-models) page to find how to set them up with their specific API keys.
76 | 


--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browser-use/browser-use/9871be3363081dd88402dd7244f1997132929c4f/examples/__init__.py


--------------------------------------------------------------------------------
/examples/browser/multiple_agents_same_browser.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import sys
 4 | 
 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 6 | 
 7 | from dotenv import load_dotenv
 8 | 
 9 | load_dotenv()
10 | 
11 | 
12 | from browser_use import Agent
13 | from browser_use.browser.profile import BrowserProfile
14 | from browser_use.browser.session import BrowserSession
15 | from browser_use.llm import ChatOpenAI
16 | 
17 | 
18 | async def main():
19 | 	browser_session = BrowserSession(
20 | 		browser_profile=BrowserProfile(
21 | 			keep_alive=True,
22 | 			user_data_dir=None,
23 | 			headless=False,
24 | 		)
25 | 	)
26 | 	await browser_session.start()
27 | 
28 | 	current_agent = None
29 | 	llm = ChatOpenAI(model='gpt-4.1')
30 | 
31 | 	task1 = 'find todays weather on San Francisco and extract it as json'
32 | 	task2 = 'find todays weather in Zurich and extract it as json'
33 | 
34 | 	agent1 = Agent(
35 | 		task=task1,
36 | 		browser_session=browser_session,
37 | 		llm=llm,
38 | 	)
39 | 	agent2 = Agent(
40 | 		task=task2,
41 | 		browser_session=browser_session,
42 | 		llm=llm,
43 | 	)
44 | 
45 | 	await asyncio.gather(agent1.run(), agent2.run())
46 | 	await browser_session.kill()
47 | 
48 | 
49 | asyncio.run(main())
50 | 


--------------------------------------------------------------------------------
/examples/browser/real_browser.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import sys
 4 | 
 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 6 | 
 7 | from dotenv import load_dotenv
 8 | 
 9 | load_dotenv()
10 | 
11 | from browser_use import Agent
12 | from browser_use.browser import BrowserProfile, BrowserSession
13 | from browser_use.llm import ChatOpenAI
14 | 
15 | browser_profile = BrowserProfile(
16 | 	# NOTE: you need to close your chrome browser - so that this can open your browser in debug mode
17 | 	executable_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
18 | 	user_data_dir='~/.config/browseruse/profiles/default',
19 | 	headless=False,
20 | )
21 | browser_session = BrowserSession(browser_profile=browser_profile)
22 | 
23 | 
24 | async def main():
25 | 	agent = Agent(
26 | 		task='Find todays DOW stock price',
27 | 		llm=ChatOpenAI(model='gpt-4.1'),
28 | 		browser_session=browser_session,
29 | 	)
30 | 
31 | 	await agent.run()
32 | 	await browser_session.close()
33 | 
34 | 	input('Press Enter to close...')
35 | 
36 | 
37 | if __name__ == '__main__':
38 | 	asyncio.run(main())
39 | 


--------------------------------------------------------------------------------
/examples/browser/using_cdp.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Simple demonstration of the CDP feature.
 3 | 
 4 | To test this locally, follow these steps:
 5 | 1. Create a shortcut for the executable Chrome file.
 6 | 2. Add the following argument to the shortcut:
 7 |    - On Windows: `--remote-debugging-port=9222`
 8 | 3. Open a web browser and navigate to `http://localhost:9222/json/version` to verify that the Remote Debugging Protocol (CDP) is running.
 9 | 4. Launch this example.
10 | 
11 | @dev You need to set the `GOOGLE_API_KEY` environment variable before proceeding.
12 | """
13 | 
14 | import asyncio
15 | import os
16 | import sys
17 | 
18 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
19 | 
20 | from dotenv import load_dotenv
21 | 
22 | load_dotenv()
23 | 
24 | 
25 | from browser_use import Agent, Controller
26 | from browser_use.browser import BrowserProfile, BrowserSession
27 | from browser_use.llm import ChatGoogle
28 | 
29 | api_key = os.getenv('GOOGLE_API_KEY')
30 | if not api_key:
31 | 	raise ValueError('GOOGLE_API_KEY is not set')
32 | 
33 | browser_session = BrowserSession(
34 | 	browser_profile=BrowserProfile(
35 | 		headless=False,
36 | 	),
37 | 	cdp_url='http://localhost:9222',
38 | )
39 | controller = Controller()
40 | 
41 | 
42 | async def main():
43 | 	task = 'In docs.google.com write my Papa a quick thank you for everything letter \n - Magnus'
44 | 	task += ' and save the document as pdf'
45 | 	# Assert api_key is not None to satisfy type checker
46 | 	assert api_key is not None, 'GOOGLE_API_KEY must be set'
47 | 	model = ChatGoogle(model='gemini-2.0-flash-exp', api_key=api_key)
48 | 	agent = Agent(
49 | 		task=task,
50 | 		llm=model,
51 | 		controller=controller,
52 | 		browser_session=browser_session,
53 | 	)
54 | 
55 | 	await agent.run()
56 | 	await browser_session.close()
57 | 
58 | 	input('Press Enter to close...')
59 | 
60 | 
61 | if __name__ == '__main__':
62 | 	asyncio.run(main())
63 | 


--------------------------------------------------------------------------------
/examples/custom-functions/2fa.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import logging
 3 | import os
 4 | import sys
 5 | 
 6 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 7 | 
 8 | from dotenv import load_dotenv
 9 | 
10 | load_dotenv()
11 | 
12 | import pyotp  # type: ignore
13 | 
14 | from browser_use import ActionResult, Agent, Controller
15 | from browser_use.llm import ChatOpenAI
16 | 
17 | # Set up logging
18 | logging.basicConfig(level=logging.INFO)
19 | logger = logging.getLogger(__name__)
20 | 
21 | 
22 | controller = Controller()
23 | 
24 | 
25 | @controller.registry.action('Get 2FA code from when OTP is required')
26 | async def get_otp_2fa() -> ActionResult:
27 | 	"""
28 | 	Custom action to retrieve 2FA/MFA code from OTP secret key using pyotp.
29 | 	The OTP secret key should be set in the environment variable OTP_SECRET_KEY.
30 | 	"""
31 | 	secret_key = os.environ.get('OTP_SECRET_KEY')
32 | 	if not secret_key:
33 | 		raise ValueError('OTP_SECRET_KEY environment variable is not set')
34 | 
35 | 	totp = pyotp.TOTP(secret_key, digits=6)
36 | 	code = totp.now()
37 | 	return ActionResult(extracted_content=code)
38 | 
39 | 
40 | async def main():
41 | 	# Example task using the 1Password 2FA action
42 | 	task = """
43 | 	Steps:
44 | 	1. Go to https://authenticationtest.com/totpChallenge/ and try to log in.
45 | 	2. If prompted for 2FA code:
46 | 	2.1. Use the get_2fa_code action to retrieve the 2FA code.
47 | 	2.2. Submit the code provided by the get_2fa_code action.
48 | 	
49 | 	Considerations:
50 | 	- ALWAYS use the get_2fa_code action to retrieve the 2FA code if needed.
51 | 	- NEVER skip the 2FA step if the page requires it.
52 | 	- NEVER extract the code from the page.
53 | 	- NEVER use a code that is not generated by the get_2fa_code action.
54 | 	- NEVER hallucinate the 2FA code, always use the get_2fa_code action to get it.
55 | 	
56 | 	You are completely FORBIDDEN to use any other method to get the 2FA code.
57 | 	"""
58 | 
59 | 	model = ChatOpenAI(model='gpt-4.1')
60 | 	agent = Agent(task=task, llm=model, controller=controller)
61 | 
62 | 	result = await agent.run()
63 | 	print(f'Task completed with result: {result}')
64 | 
65 | 
66 | if __name__ == '__main__':
67 | 	asyncio.run(main())
68 | 


--------------------------------------------------------------------------------
/examples/custom-functions/clipboard.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import sys
 4 | 
 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 6 | 
 7 | from dotenv import load_dotenv
 8 | 
 9 | load_dotenv()
10 | 
11 | import pyperclip
12 | 
13 | from browser_use import Agent, Controller
14 | from browser_use.agent.views import ActionResult
15 | from browser_use.browser import BrowserProfile, BrowserSession
16 | from browser_use.browser.types import Page
17 | from browser_use.llm import ChatOpenAI
18 | 
19 | browser_profile = BrowserProfile(
20 | 	headless=False,
21 | )
22 | controller = Controller()
23 | 
24 | 
25 | @controller.registry.action('Copy text to clipboard')
26 | def copy_to_clipboard(text: str):
27 | 	pyperclip.copy(text)
28 | 	return ActionResult(extracted_content=text)
29 | 
30 | 
31 | @controller.registry.action('Paste text from clipboard')
32 | async def paste_from_clipboard(page: Page):
33 | 	text = pyperclip.paste()
34 | 	# send text to browser
35 | 	await page.keyboard.type(text)
36 | 
37 | 	return ActionResult(extracted_content=text)
38 | 
39 | 
40 | async def main():
41 | 	task = 'Copy the text "Hello, world!" to the clipboard, then go to google.com and paste the text'
42 | 	model = ChatOpenAI(model='gpt-4.1')
43 | 	browser_session = BrowserSession(browser_profile=browser_profile)
44 | 	await browser_session.start()
45 | 	agent = Agent(
46 | 		task=task,
47 | 		llm=model,
48 | 		controller=controller,
49 | 		browser_session=browser_session,
50 | 	)
51 | 
52 | 	await agent.run()
53 | 	await browser_session.stop()
54 | 
55 | 	input('Press Enter to close...')
56 | 
57 | 
58 | if __name__ == '__main__':
59 | 	asyncio.run(main())
60 | 


--------------------------------------------------------------------------------
/examples/custom-functions/notification.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import sys
 4 | 
 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 6 | 
 7 | from dotenv import load_dotenv
 8 | 
 9 | load_dotenv()
10 | 
11 | from browser_use import ActionResult, Agent, Controller
12 | from browser_use.llm import ChatOpenAI
13 | 
14 | controller = Controller()
15 | 
16 | 
17 | @controller.registry.action('Done with task ')
18 | async def done(text: str):
19 | 	import yagmail  # type: ignore
20 | 
21 | 	# To send emails use
22 | 	# STEP 1: go to https://support.google.com/accounts/answer/185833
23 | 	# STEP 2: Create an app password (you can't use here your normal gmail password)
24 | 	# STEP 3: Use the app password in the code below for the password
25 | 	yag = yagmail.SMTP('your_email@gmail.com', 'your_app_password')
26 | 	yag.send(
27 | 		to='recipient@example.com',
28 | 		subject='Test Email',
29 | 		contents=f'result\n: {text}',
30 | 	)
31 | 
32 | 	return ActionResult(is_done=True, extracted_content='Email sent!')
33 | 
34 | 
35 | async def main():
36 | 	task = 'go to brower-use.com and then done'
37 | 	model = ChatOpenAI(model='gpt-4.1')
38 | 	agent = Agent(task=task, llm=model, controller=controller)
39 | 
40 | 	await agent.run()
41 | 
42 | 
43 | if __name__ == '__main__':
44 | 	asyncio.run(main())
45 | 


--------------------------------------------------------------------------------
/examples/custom-functions/onepassword_2fa.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import logging
 3 | import os
 4 | import sys
 5 | 
 6 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 7 | 
 8 | from dotenv import load_dotenv
 9 | 
10 | load_dotenv()
11 | 
12 | from onepassword.client import Client  # type: ignore  # pip install onepassword-sdk
13 | 
14 | from browser_use import ActionResult, Agent, Controller
15 | from browser_use.llm import ChatOpenAI
16 | 
17 | # Set up logging
18 | logging.basicConfig(level=logging.INFO)
19 | logger = logging.getLogger(__name__)
20 | 
21 | OP_SERVICE_ACCOUNT_TOKEN = os.getenv('OP_SERVICE_ACCOUNT_TOKEN')
22 | OP_ITEM_ID = os.getenv('OP_ITEM_ID')  # Go to 1Password, right click on the item, click "Copy Secret Reference"
23 | 
24 | 
25 | controller = Controller()
26 | 
27 | 
28 | @controller.registry.action('Get 2FA code from 1Password for Google Account', domains=['*.google.com', 'google.com'])
29 | async def get_1password_2fa() -> ActionResult:
30 | 	"""
31 | 	Custom action to retrieve 2FA/MFA code from 1Password using onepassword.client SDK.
32 | 	"""
33 | 	client = await Client.authenticate(
34 | 		# setup instructions: https://github.com/1Password/onepassword-sdk-python/#-get-started
35 | 		auth=OP_SERVICE_ACCOUNT_TOKEN,
36 | 		integration_name='Browser-Use',
37 | 		integration_version='v1.0.0',
38 | 	)
39 | 
40 | 	mfa_code = await client.secrets.resolve(f'op://Private/{OP_ITEM_ID}/One-time passcode')
41 | 
42 | 	return ActionResult(extracted_content=mfa_code)
43 | 
44 | 
45 | async def main():
46 | 	# Example task using the 1Password 2FA action
47 | 	task = 'Go to account.google.com, enter username and password, then if prompted for 2FA code, get 2FA code from 1Password for and enter it'
48 | 
49 | 	model = ChatOpenAI(model='gpt-4.1')
50 | 	agent = Agent(task=task, llm=model, controller=controller)
51 | 
52 | 	result = await agent.run()
53 | 	print(f'Task completed with result: {result}')
54 | 
55 | 
56 | if __name__ == '__main__':
57 | 	asyncio.run(main())
58 | 


--------------------------------------------------------------------------------
/examples/custom-functions/save_pdf.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import re
 4 | import sys
 5 | from pathlib import Path
 6 | 
 7 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 8 | 
 9 | from dotenv import load_dotenv
10 | 
11 | load_dotenv()
12 | 
13 | from browser_use import ActionResult, Agent, Controller
14 | from browser_use.browser.types import Page
15 | from browser_use.llm import ChatOpenAI
16 | 
17 | # Initialize controller
18 | controller = Controller()
19 | 
20 | download_path = Path.cwd() / 'downloads'
21 | download_path.mkdir(parents=True, exist_ok=True)
22 | 
23 | 
24 | # Save PDF - exact copy from original controller function
25 | @controller.registry.action('Save the current page as a PDF file')
26 | async def save_pdf(page: Page):
27 | 	short_url = re.sub(r'^https?://(?:www\.)?|/$', '', page.url)
28 | 	slug = re.sub(r'[^a-zA-Z0-9]+', '-', short_url).strip('-').lower()
29 | 	sanitized_filename = f'{slug}.pdf'
30 | 
31 | 	await page.emulate_media(media='screen')
32 | 	await page.pdf(path=download_path / sanitized_filename, format='A4', print_background=False)
33 | 	msg = f'Saving page with URL {page.url} as PDF to {download_path / sanitized_filename}'
34 | 	return ActionResult(extracted_content=msg, include_in_memory=True, long_term_memory=f'Saved PDF to {sanitized_filename}')
35 | 
36 | 
37 | async def main():
38 | 	"""
39 | 	Example task: Navigate to browser-use.com and save the page as a PDF
40 | 	"""
41 | 	task = """
42 | 	Go to https://browser-use.com/ and save the page as a PDF file.
43 | 	"""
44 | 
45 | 	# Initialize the language model
46 | 	model = ChatOpenAI(model='gpt-4.1-mini')
47 | 
48 | 	# Create and run the agent
49 | 	agent = Agent(task=task, llm=model, controller=controller)
50 | 
51 | 	result = await agent.run()
52 | 	print(f'🎯 Task completed: {result}')
53 | 
54 | 
55 | if __name__ == '__main__':
56 | 	asyncio.run(main())
57 | 


--------------------------------------------------------------------------------
/examples/custom-functions/save_to_file_hugging_face.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import sys
 4 | 
 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 6 | 
 7 | from dotenv import load_dotenv
 8 | 
 9 | load_dotenv()
10 | 
11 | from pydantic import BaseModel
12 | 
13 | from browser_use.agent.service import Agent
14 | from browser_use.controller.service import Controller
15 | from browser_use.llm import ChatOpenAI
16 | 
17 | # Initialize controller first
18 | controller = Controller()
19 | 
20 | 
21 | class Model(BaseModel):
22 | 	title: str
23 | 	url: str
24 | 	likes: int
25 | 	license: str
26 | 
27 | 
28 | class Models(BaseModel):
29 | 	models: list[Model]
30 | 
31 | 
32 | @controller.action('Save models', param_model=Models)
33 | def save_models(params: Models):
34 | 	with open('models.txt', 'a') as f:
35 | 		for model in params.models:
36 | 			f.write(f'{model.title} ({model.url}): {model.likes} likes, {model.license}\n')
37 | 
38 | 
39 | # video: https://preview.screen.studio/share/EtOhIk0P
40 | async def main():
41 | 	task = 'Look up models with a license of cc-by-sa-4.0 and sort by most likes on Hugging face, save top 5 to file.'
42 | 
43 | 	model = ChatOpenAI(model='gpt-4.1')
44 | 	agent = Agent(task=task, llm=model, controller=controller)
45 | 
46 | 	await agent.run()
47 | 
48 | 
49 | if __name__ == '__main__':
50 | 	asyncio.run(main())
51 | 


--------------------------------------------------------------------------------
/examples/custom-functions/solve_amazon_captcha.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import sys
 4 | 
 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 6 | 
 7 | from dotenv import load_dotenv
 8 | 
 9 | load_dotenv()
10 | 
11 | from amazoncaptcha import AmazonCaptcha  # type: ignore
12 | 
13 | from browser_use import ActionResult
14 | from browser_use.agent.service import Agent
15 | from browser_use.browser import BrowserConfig, BrowserSession
16 | from browser_use.controller.service import Controller
17 | from browser_use.llm import ChatOpenAI
18 | 
19 | browser_profile = BrowserConfig(headless=False)
20 | 
21 | # Initialize controller first
22 | controller = Controller()
23 | 
24 | 
25 | @controller.action(
26 | 	'Solve Amazon text based captcha',
27 | 	domains=[
28 | 		'*.amazon.com',
29 | 		'*.amazon.co.uk',
30 | 		'*.amazon.ca',
31 | 		'*.amazon.de',
32 | 		'*.amazon.es',
33 | 		'*.amazon.fr',
34 | 		'*.amazon.it',
35 | 		'*.amazon.co.jp',
36 | 		'*.amazon.in',
37 | 		'*.amazon.cn',
38 | 		'*.amazon.com.sg',
39 | 		'*.amazon.com.mx',
40 | 		'*.amazon.ae',
41 | 		'*.amazon.com.br',
42 | 		'*.amazon.nl',
43 | 		'*.amazon.com.au',
44 | 		'*.amazon.com.tr',
45 | 		'*.amazon.sa',
46 | 		'*.amazon.se',
47 | 		'*.amazon.pl',
48 | 	],
49 | )
50 | async def solve_amazon_captcha(browser_session: BrowserSession):
51 | 	page = await browser_session.get_current_page()
52 | 
53 | 	# Find the captcha image and extract its src
54 | 	captcha_img = page.locator('img[src*="amazon.com/captcha"]')
55 | 	link = await captcha_img.get_attribute('src')
56 | 
57 | 	if not link:
58 | 		raise ValueError('Could not find captcha image on the page')
59 | 
60 | 	captcha = AmazonCaptcha.fromlink(link)
61 | 	solution = captcha.solve()
62 | 	if not solution or solution == 'Not solved':
63 | 		raise ValueError('Captcha could not be solved')
64 | 
65 | 	await page.locator('#captchacharacters').fill(solution)
66 | 	await page.locator('button[type="submit"]').click()
67 | 
68 | 	return ActionResult(extracted_content=solution)
69 | 
70 | 
71 | async def main():
72 | 	task = 'Go to https://www.amazon.com/errors/validateCaptcha and solve the captcha using the solve_amazon_captcha tool'
73 | 
74 | 	model = ChatOpenAI(model='gpt-4.1')
75 | 	browser_session = BrowserSession(browser_profile=browser_profile)
76 | 	await browser_session.start()
77 | 	agent = Agent(task=task, llm=model, controller=controller, browser_session=browser_session)
78 | 
79 | 	await agent.run()
80 | 	await browser_session.stop()
81 | 
82 | 	input('Press Enter to close...')
83 | 
84 | 
85 | if __name__ == '__main__':
86 | 	asyncio.run(main())
87 | 


--------------------------------------------------------------------------------
/examples/features/cross_origin_iframes.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Example of how it supports cross-origin iframes.
 3 | 
 4 | @dev You need to add OPENAI_API_KEY to your environment variables.
 5 | """
 6 | 
 7 | import asyncio
 8 | import os
 9 | import sys
10 | 
11 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
12 | 
13 | from dotenv import load_dotenv
14 | 
15 | load_dotenv()
16 | 
17 | from browser_use import Agent, Controller
18 | from browser_use.browser import BrowserProfile, BrowserSession
19 | from browser_use.llm import ChatOpenAI
20 | 
21 | if not os.getenv('OPENAI_API_KEY'):
22 | 	raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
23 | 
24 | 
25 | browser_profile = BrowserProfile(
26 | 	executable_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
27 | )
28 | browser_session = BrowserSession(browser_profile=browser_profile)
29 | controller = Controller()
30 | 
31 | 
32 | async def main():
33 | 	agent = Agent(
34 | 		task='Click "Go cross-site (simple page)" button on https://csreis.github.io/tests/cross-site-iframe.html then tell me the text within',
35 | 		llm=ChatOpenAI(model='gpt-4.1', temperature=0.0),
36 | 		controller=controller,
37 | 		browser_session=browser_session,
38 | 	)
39 | 
40 | 	await agent.run()
41 | 	await browser_session.close()
42 | 
43 | 	input('Press Enter to close...')
44 | 
45 | 
46 | if __name__ == '__main__':
47 | 	try:
48 | 		asyncio.run(main())
49 | 	except Exception as e:
50 | 		print(e)
51 | 


--------------------------------------------------------------------------------
/examples/features/custom_output.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Show how to use custom outputs.
 3 | 
 4 | @dev You need to add OPENAI_API_KEY to your environment variables.
 5 | """
 6 | 
 7 | import asyncio
 8 | import os
 9 | import sys
10 | 
11 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
12 | 
13 | from dotenv import load_dotenv
14 | 
15 | load_dotenv()
16 | 
17 | from pydantic import BaseModel
18 | 
19 | from browser_use import Agent, Controller
20 | from browser_use.llm import ChatOpenAI
21 | 
22 | 
23 | class Post(BaseModel):
24 | 	post_title: str
25 | 	post_url: str
26 | 	num_comments: int
27 | 	hours_since_post: int
28 | 
29 | 
30 | class Posts(BaseModel):
31 | 	posts: list[Post]
32 | 
33 | 
34 | controller = Controller(output_model=Posts)
35 | 
36 | 
37 | async def main():
38 | 	task = 'Go to hackernews show hn and give me the first  5 posts'
39 | 	model = ChatOpenAI(model='gpt-4.1')
40 | 	agent = Agent(task=task, llm=model, controller=controller)
41 | 
42 | 	history = await agent.run()
43 | 
44 | 	result = history.final_result()
45 | 	if result:
46 | 		parsed: Posts = Posts.model_validate_json(result)
47 | 
48 | 		for post in parsed.posts:
49 | 			print('\n--------------------------------')
50 | 			print(f'Title:            {post.post_title}')
51 | 			print(f'URL:              {post.post_url}')
52 | 			print(f'Comments:         {post.num_comments}')
53 | 			print(f'Hours since post: {post.hours_since_post}')
54 | 	else:
55 | 		print('No result')
56 | 
57 | 
58 | if __name__ == '__main__':
59 | 	asyncio.run(main())
60 | 


--------------------------------------------------------------------------------
/examples/features/custom_system_prompt.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import json
 3 | import os
 4 | import sys
 5 | 
 6 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 7 | 
 8 | from dotenv import load_dotenv
 9 | 
10 | load_dotenv()
11 | 
12 | try:
13 | 	from lmnr import Laminar
14 | 
15 | 	Laminar.initialize(project_api_key=os.getenv('LMNR_PROJECT_API_KEY'))
16 | except Exception as e:
17 | 	print(f'Error initializing Laminar: {e}')
18 | 
19 | 
20 | from browser_use import Agent
21 | from browser_use.llm import ChatOpenAI
22 | 
23 | extend_system_message = (
24 | 	'REMEMBER the most important RULE: ALWAYS open first a new tab and go first to url wikipedia.com no matter the task!!!'
25 | )
26 | 
27 | # or use override_system_message to completely override the system prompt
28 | 
29 | 
30 | async def main():
31 | 	task = 'do google search to find images of Elon Musk'
32 | 	model = ChatOpenAI(model='gpt-4.1')
33 | 	agent = Agent(task=task, llm=model, extend_system_message=extend_system_message)
34 | 
35 | 	print(
36 | 		json.dumps(
37 | 			agent.message_manager.system_prompt.model_dump(exclude_unset=True),
38 | 			indent=4,
39 | 		)
40 | 	)
41 | 
42 | 	await agent.run()
43 | 
44 | 
45 | if __name__ == '__main__':
46 | 	asyncio.run(main())
47 | 


--------------------------------------------------------------------------------
/examples/features/custom_user_agent.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import asyncio
 3 | import os
 4 | import sys
 5 | 
 6 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 7 | 
 8 | from dotenv import load_dotenv
 9 | 
10 | load_dotenv()
11 | 
12 | from browser_use import Agent
13 | from browser_use.browser import BrowserProfile, BrowserSession
14 | from browser_use.controller.service import Controller
15 | from browser_use.llm import ChatAnthropic, ChatOpenAI
16 | 
17 | 
18 | def get_llm(provider: str):
19 | 	if provider == 'anthropic':
20 | 		return ChatAnthropic(model='claude-3-5-sonnet-20240620', temperature=0.0)
21 | 	elif provider == 'openai':
22 | 		return ChatOpenAI(model='gpt-4.1', temperature=0.0)
23 | 
24 | 	else:
25 | 		raise ValueError(f'Unsupported provider: {provider}')
26 | 
27 | 
28 | # NOTE: This example is to find your current user agent string to use it in the browser_context
29 | task = 'go to https://whatismyuseragent.com and find the current user agent string '
30 | 
31 | 
32 | controller = Controller()
33 | 
34 | 
35 | parser = argparse.ArgumentParser()
36 | parser.add_argument('--query', type=str, help='The query to process', default=task)
37 | parser.add_argument(
38 | 	'--provider',
39 | 	type=str,
40 | 	choices=['openai', 'anthropic'],
41 | 	default='openai',
42 | 	help='The model provider to use (default: openai)',
43 | )
44 | 
45 | args = parser.parse_args()
46 | 
47 | llm = get_llm(args.provider)
48 | 
49 | browser_session = BrowserSession(
50 | 	browser_profile=BrowserProfile(
51 | 		user_agent='foobarfoo',
52 | 		user_data_dir='~/.config/browseruse/profiles/default',
53 | 	)
54 | )
55 | 
56 | agent = Agent(
57 | 	task=args.query,
58 | 	llm=llm,
59 | 	controller=controller,
60 | 	browser_session=browser_session,
61 | 	use_vision=True,
62 | 	max_actions_per_step=1,
63 | )
64 | 
65 | 
66 | async def main():
67 | 	await agent.run(max_steps=25)
68 | 
69 | 	input('Press Enter to close the browser...')
70 | 	await browser_session.close()
71 | 
72 | 
73 | asyncio.run(main())
74 | 


--------------------------------------------------------------------------------
/examples/features/download_file.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import sys
 4 | 
 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 6 | 
 7 | from dotenv import load_dotenv
 8 | 
 9 | load_dotenv()
10 | 
11 | 
12 | from browser_use import Agent
13 | from browser_use.browser import BrowserSession
14 | from browser_use.llm import ChatGoogle
15 | 
16 | api_key = os.getenv('GOOGLE_API_KEY')
17 | if not api_key:
18 | 	raise ValueError('GOOGLE_API_KEY is not set')
19 | 
20 | assert api_key is not None, 'GOOGLE_API_KEY must be set'
21 | llm = ChatGoogle(model='gemini-2.0-flash-exp', api_key=api_key)
22 | 
23 | from browser_use.browser import BrowserProfile
24 | 
25 | browser_session = BrowserSession(
26 | 	browser_profile=BrowserProfile(
27 | 		downloads_path='~/Downloads',
28 | 		user_data_dir='~/.config/browseruse/profiles/default',
29 | 	)
30 | )
31 | 
32 | 
33 | async def run_download():
34 | 	agent = Agent(
35 | 		task='Go to "https://file-examples.com/" and download the smallest doc file.',
36 | 		llm=llm,
37 | 		max_actions_per_step=8,
38 | 		use_vision=True,
39 | 		browser_session=browser_session,
40 | 	)
41 | 	await agent.run(max_steps=25)
42 | 
43 | 
44 | if __name__ == '__main__':
45 | 	asyncio.run(run_download())
46 | 


--------------------------------------------------------------------------------
/examples/features/drag_drop.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import sys
 4 | 
 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 6 | 
 7 | from dotenv import load_dotenv
 8 | 
 9 | load_dotenv()
10 | 
11 | 
12 | from browser_use import Agent
13 | from browser_use.llm import ChatGoogle
14 | 
15 | api_key = os.getenv('GOOGLE_API_KEY')
16 | if not api_key:
17 | 	raise ValueError('GOOGLE_API_KEY is not set')
18 | 
19 | # API key is automatically set from the environment variable GOOGLE_API_KEY
20 | llm = ChatGoogle(model='gemini-2.0-flash-exp')
21 | 
22 | 
23 | task_1 = """
24 | Navigate to: https://sortablejs.github.io/Sortable/. 
25 | Then scroll down to the first examplw with title "Simple list example". 
26 | Drag the element with name "item 1" to below the element with name "item 3".
27 | """
28 | 
29 | 
30 | task_2 = """
31 | Navigate to: https://excalidraw.com/.
32 | Click on the pencil icon (with index 40).
33 | Then draw a triangle in the canvas.
34 | Draw the triangle starting from coordinate (400,400).
35 | You can use the drag and drop action to draw the triangle.
36 | """
37 | 
38 | 
39 | async def run_search():
40 | 	agent = Agent(
41 | 		task=task_1,
42 | 		llm=llm,
43 | 		max_actions_per_step=1,
44 | 		use_vision=True,
45 | 	)
46 | 
47 | 	await agent.run(max_steps=25)
48 | 
49 | 
50 | if __name__ == '__main__':
51 | 	asyncio.run(run_search())
52 | 


--------------------------------------------------------------------------------
/examples/features/follow_up_tasks.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import sys
 4 | 
 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 6 | 
 7 | from dotenv import load_dotenv
 8 | 
 9 | load_dotenv()
10 | 
11 | from browser_use import Agent, Controller
12 | from browser_use.browser import BrowserProfile, BrowserSession
13 | from browser_use.llm import ChatOpenAI
14 | 
15 | # Initialize the model
16 | llm = ChatOpenAI(
17 | 	model='gpt-4.1',
18 | 	temperature=0.0,
19 | )
20 | # Get your chrome path
21 | browser_session = BrowserSession(
22 | 	browser_profile=BrowserProfile(
23 | 		executable_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
24 | 		keep_alive=True,
25 | 		user_data_dir='~/.config/browseruse/profiles/default',
26 | 	),
27 | )
28 | 
29 | controller = Controller()
30 | 
31 | 
32 | task = 'Find the founders of browser-use and draft them a short personalized message'
33 | 
34 | agent = Agent(task=task, llm=llm, controller=controller, browser_session=browser_session)
35 | 
36 | 
37 | async def main():
38 | 	await agent.run()
39 | 
40 | 	# new_task = input('Type in a new task: ')
41 | 	new_task = 'Find an image of the founders'
42 | 
43 | 	agent.add_new_task(new_task)
44 | 
45 | 	await agent.run()
46 | 
47 | 
48 | if __name__ == '__main__':
49 | 	asyncio.run(main())
50 | 


--------------------------------------------------------------------------------
/examples/features/initial_actions.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import sys
 4 | 
 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 6 | 
 7 | from dotenv import load_dotenv
 8 | 
 9 | load_dotenv()
10 | 
11 | from browser_use import Agent
12 | from browser_use.llm import ChatOpenAI
13 | 
14 | llm = ChatOpenAI(model='gpt-4.1')
15 | 
16 | initial_actions = [
17 | 	{'go_to_url': {'url': 'https://www.google.com', 'new_tab': True}},
18 | 	{'go_to_url': {'url': 'https://en.wikipedia.org/wiki/Randomness', 'new_tab': True}},
19 | 	{'scroll_down': {'amount': 1000}},
20 | ]
21 | agent = Agent(
22 | 	task='What theories are displayed on the page?',
23 | 	initial_actions=initial_actions,
24 | 	llm=llm,
25 | )
26 | 
27 | 
28 | async def main():
29 | 	await agent.run(max_steps=10)
30 | 
31 | 
32 | if __name__ == '__main__':
33 | 	asyncio.run(main())
34 | 


--------------------------------------------------------------------------------
/examples/features/multi-tab_handling.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Simple try of the agent.
 3 | 
 4 | @dev You need to add OPENAI_API_KEY to your environment variables.
 5 | """
 6 | 
 7 | import asyncio
 8 | import os
 9 | import sys
10 | 
11 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
12 | 
13 | from dotenv import load_dotenv
14 | 
15 | load_dotenv()
16 | 
17 | from browser_use import Agent
18 | from browser_use.llm import ChatOpenAI
19 | 
20 | # video: https://preview.screen.studio/share/clenCmS6
21 | llm = ChatOpenAI(model='gpt-4.1')
22 | agent = Agent(
23 | 	task='open 3 tabs with elon musk, trump, and steve jobs, then go back to the first and stop',
24 | 	llm=llm,
25 | )
26 | 
27 | 
28 | async def main():
29 | 	await agent.run()
30 | 
31 | 
32 | asyncio.run(main())
33 | 


--------------------------------------------------------------------------------
/examples/features/multiple_tasks.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import sys
 4 | 
 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 6 | 
 7 | from dotenv import load_dotenv
 8 | 
 9 | load_dotenv()
10 | 
11 | 
12 | from browser_use import Agent
13 | from browser_use.browser import BrowserSession
14 | from browser_use.browser.types import async_playwright
15 | from browser_use.llm import ChatGoogle
16 | 
17 | api_key = os.getenv('GOOGLE_API_KEY')
18 | 
19 | if not api_key:
20 | 	raise ValueError('GOOGLE_API_KEY is not set')
21 | 
22 | llm = ChatGoogle(model='gemini-2.0-flash', api_key=api_key)
23 | 
24 | 
25 | async def main():
26 | 	async with async_playwright() as p:
27 | 		browser = await p.chromium.launch(
28 | 			headless=False,
29 | 		)
30 | 
31 | 		context = await browser.new_context(
32 | 			viewport={'width': 1502, 'height': 853},
33 | 			ignore_https_errors=True,
34 | 		)
35 | 
36 | 		agent = Agent(
37 | 			browser_session=BrowserSession(
38 | 				browser_context=context,
39 | 			),
40 | 			task='Go to https://browser-use.com/',
41 | 			llm=llm,
42 | 		)
43 | 
44 | 		try:
45 | 			result = await agent.run()
46 | 			print(f'First task was {"successful" if result.is_successful else "not successful"}')
47 | 
48 | 			if not result.is_successful:
49 | 				raise RuntimeError('Failed to navigate to the initial page.')
50 | 
51 | 			agent.add_new_task('Navigate to the documentation page')
52 | 
53 | 			result = await agent.run()
54 | 			print(f'Second task was {"successful" if result.is_successful else "not successful"}')
55 | 
56 | 			if not result.is_successful:
57 | 				raise RuntimeError('Failed to navigate to the documentation page.')
58 | 
59 | 			while True:
60 | 				next_task = input('Write your next task or leave empty to exit\n> ')
61 | 
62 | 				if not next_task.strip():
63 | 					print('Exiting...')
64 | 					break
65 | 
66 | 				agent.add_new_task(next_task)
67 | 				result = await agent.run()
68 | 
69 | 				print(f"Task '{next_task}' was {'successful' if result.is_successful else 'not successful'}")
70 | 
71 | 				if not result.is_successful:
72 | 					print('Failed to complete the task. Please try again.')
73 | 					continue
74 | 
75 | 		finally:
76 | 			await context.close()
77 | 			await browser.close()
78 | 
79 | 
80 | if __name__ == '__main__':
81 | 	asyncio.run(main())
82 | 


--------------------------------------------------------------------------------
/examples/features/outsource_state.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Show how to use custom outputs.
 3 | 
 4 | @dev You need to add OPENAI_API_KEY to your environment variables.
 5 | """
 6 | 
 7 | import asyncio
 8 | import os
 9 | import sys
10 | 
11 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
12 | 
13 | from dotenv import load_dotenv
14 | 
15 | load_dotenv()
16 | 
17 | import anyio
18 | 
19 | from browser_use import Agent
20 | from browser_use.agent.views import AgentState
21 | from browser_use.browser import BrowserProfile, BrowserSession
22 | from browser_use.llm import ChatOpenAI
23 | 
24 | 
25 | async def main():
26 | 	task = 'Go to hackernews show hn and give me the first  5 posts'
27 | 
28 | 	browser_profile = BrowserProfile(
29 | 		headless=True,
30 | 	)
31 | 	browser_session = BrowserSession(browser_profile=browser_profile)
32 | 
33 | 	agent_state = AgentState()
34 | 
35 | 	for i in range(10):
36 | 		agent = Agent(
37 | 			task=task,
38 | 			llm=ChatOpenAI(model='gpt-4.1'),
39 | 			browser_session=browser_session,
40 | 			injected_agent_state=agent_state,
41 | 			page_extraction_llm=ChatOpenAI(model='gpt-4.1-mini'),
42 | 		)
43 | 
44 | 		done, valid = await agent.take_step()
45 | 		print(f'Step {i}: Done: {done}, Valid: {valid}')
46 | 
47 | 		if done and valid:
48 | 			break
49 | 
50 | 		# Save state to file
51 | 		async with await anyio.open_file('agent_state.json', 'w') as f:
52 | 			serialized = agent_state.model_dump_json(exclude={'history'})
53 | 			await f.write(serialized)
54 | 
55 | 		# Load state back from file
56 | 		async with await anyio.open_file('agent_state.json', 'r') as f:
57 | 			loaded_json = await f.read()
58 | 			agent_state = AgentState.model_validate_json(loaded_json)
59 | 
60 | 		break
61 | 
62 | 
63 | if __name__ == '__main__':
64 | 	asyncio.run(main())
65 | 


--------------------------------------------------------------------------------
/examples/features/parallel_agents.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import sys
 4 | 
 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 6 | 
 7 | from dotenv import load_dotenv
 8 | 
 9 | load_dotenv()
10 | 
11 | from browser_use.agent.service import Agent
12 | from browser_use.browser import BrowserProfile, BrowserSession
13 | from browser_use.llm import ChatOpenAI
14 | 
15 | browser_session = BrowserSession(
16 | 	browser_profile=BrowserProfile(
17 | 		keep_alive=True,
18 | 		headless=False,
19 | 		record_video_dir='./tmp/recordings',
20 | 		user_data_dir='~/.config/browseruse/profiles/default',
21 | 	)
22 | )
23 | llm = ChatOpenAI(model='gpt-4.1')
24 | 
25 | 
26 | async def main():
27 | 	await browser_session.start()
28 | 	agents = [
29 | 		Agent(task=task, llm=llm, browser_session=browser_session)
30 | 		for task in [
31 | 			'Search Google for weather in Tokyo',
32 | 			'Check Reddit front page title',
33 | 			'Look up Bitcoin price on Coinbase',
34 | 			'Find NASA image of the day',
35 | 			'Check top story on CNN',
36 | 			# 'Search latest SpaceX launch date',
37 | 			# 'Look up population of Paris',
38 | 			# 'Find current time in Sydney',
39 | 			# 'Check who won last Super Bowl',
40 | 			# 'Search trending topics on Twitter',
41 | 		]
42 | 	]
43 | 
44 | 	print(await asyncio.gather(*[agent.run() for agent in agents]))
45 | 	await browser_session.kill()
46 | 
47 | 
48 | if __name__ == '__main__':
49 | 	asyncio.run(main())
50 | 


--------------------------------------------------------------------------------
/examples/features/pause_agent.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import os
  3 | import sys
  4 | import threading
  5 | 
  6 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
  7 | 
  8 | from dotenv import load_dotenv
  9 | 
 10 | load_dotenv()
 11 | 
 12 | from browser_use import Agent
 13 | from browser_use.llm import ChatOpenAI
 14 | 
 15 | 
 16 | class AgentController:
 17 | 	def __init__(self):
 18 | 		llm = ChatOpenAI(model='gpt-4.1')
 19 | 		self.agent = Agent(
 20 | 			task='open in one action https://www.google.com, https://www.wikipedia.org, https://www.youtube.com, https://www.github.com, https://amazon.com',
 21 | 			llm=llm,
 22 | 		)
 23 | 		self.running = False
 24 | 
 25 | 	async def run_agent(self):
 26 | 		"""Run the agent"""
 27 | 		self.running = True
 28 | 		await self.agent.run()
 29 | 
 30 | 	def start(self):
 31 | 		"""Start the agent in a separate thread"""
 32 | 		loop = asyncio.new_event_loop()
 33 | 		asyncio.set_event_loop(loop)
 34 | 		loop.run_until_complete(self.run_agent())
 35 | 
 36 | 	def pause(self):
 37 | 		"""Pause the agent"""
 38 | 		self.agent.pause()
 39 | 
 40 | 	def resume(self):
 41 | 		"""Resume the agent"""
 42 | 		self.agent.resume()
 43 | 
 44 | 	def stop(self):
 45 | 		"""Stop the agent"""
 46 | 		self.agent.stop()
 47 | 		self.running = False
 48 | 
 49 | 
 50 | def print_menu():
 51 | 	print('\nAgent Control Menu:')
 52 | 	print('1. Start')
 53 | 	print('2. Pause')
 54 | 	print('3. Resume')
 55 | 	print('4. Stop')
 56 | 	print('5. Exit')
 57 | 
 58 | 
 59 | async def main():
 60 | 	controller = AgentController()
 61 | 	agent_thread = None
 62 | 
 63 | 	while True:
 64 | 		print_menu()
 65 | 		try:
 66 | 			choice = input('Enter your choice (1-5): ')
 67 | 		except KeyboardInterrupt:
 68 | 			choice = '5'
 69 | 
 70 | 		if choice == '1' and not agent_thread:
 71 | 			print('Starting agent...')
 72 | 			agent_thread = threading.Thread(target=controller.start)
 73 | 			agent_thread.start()
 74 | 
 75 | 		elif choice == '2':
 76 | 			print('Pausing agent...')
 77 | 			controller.pause()
 78 | 
 79 | 		elif choice == '3':
 80 | 			print('Resuming agent...')
 81 | 			controller.resume()
 82 | 
 83 | 		elif choice == '4':
 84 | 			print('Stopping agent...')
 85 | 			controller.stop()
 86 | 			if agent_thread:
 87 | 				agent_thread.join()
 88 | 				agent_thread = None
 89 | 
 90 | 		elif choice == '5':
 91 | 			print('Exiting...')
 92 | 			if controller.running:
 93 | 				controller.stop()
 94 | 				if agent_thread:
 95 | 					agent_thread.join()
 96 | 			break
 97 | 
 98 | 		await asyncio.sleep(0.1)  # Small delay to prevent CPU spinning
 99 | 
100 | 
101 | if __name__ == '__main__':
102 | 	asyncio.run(main())
103 | 


--------------------------------------------------------------------------------
/examples/features/planner.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import sys
 4 | 
 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 6 | 
 7 | from dotenv import load_dotenv
 8 | 
 9 | load_dotenv()
10 | 
11 | from browser_use import Agent
12 | from browser_use.llm import ChatOpenAI
13 | 
14 | llm = ChatOpenAI(model='gpt-4.1', temperature=0.0)
15 | planner_llm = ChatOpenAI(
16 | 	model='o3-mini',
17 | )
18 | task = 'your task'
19 | 
20 | 
21 | agent = Agent(task=task, llm=llm, planner_llm=planner_llm, use_vision_for_planner=False, planner_interval=1)
22 | 
23 | 
24 | async def main():
25 | 	await agent.run()
26 | 
27 | 
28 | if __name__ == '__main__':
29 | 	asyncio.run(main())
30 | 


--------------------------------------------------------------------------------
/examples/features/restrict_urls.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import sys
 4 | 
 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 6 | 
 7 | from dotenv import load_dotenv
 8 | 
 9 | load_dotenv()
10 | 
11 | from browser_use import Agent
12 | from browser_use.browser import BrowserProfile, BrowserSession
13 | from browser_use.llm import ChatOpenAI
14 | 
15 | llm = ChatOpenAI(model='gpt-4.1', temperature=0.0)
16 | task = (
17 | 	"go to google.com and search for openai.com and click on the first link then extract content and scroll down - what's there?"
18 | )
19 | 
20 | allowed_domains = ['google.com']
21 | 
22 | browser_session = BrowserSession(
23 | 	browser_profile=BrowserProfile(
24 | 		executable_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
25 | 		allowed_domains=allowed_domains,
26 | 		user_data_dir='~/.config/browseruse/profiles/default',
27 | 	),
28 | )
29 | 
30 | agent = Agent(
31 | 	task=task,
32 | 	llm=llm,
33 | 	browser_session=browser_session,
34 | )
35 | 
36 | 
37 | async def main():
38 | 	await agent.run(max_steps=25)
39 | 
40 | 	input('Press Enter to close the browser...')
41 | 	await browser_session.close()
42 | 
43 | 
44 | asyncio.run(main())
45 | 


--------------------------------------------------------------------------------
/examples/features/result_processing.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import sys
 4 | from pprint import pprint
 5 | 
 6 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 7 | 
 8 | from dotenv import load_dotenv
 9 | 
10 | load_dotenv()
11 | 
12 | from browser_use import Agent
13 | from browser_use.agent.views import AgentHistoryList
14 | from browser_use.browser import BrowserProfile, BrowserSession
15 | from browser_use.llm import ChatOpenAI
16 | 
17 | llm = ChatOpenAI(model='gpt-4.1')
18 | 
19 | 
20 | async def main():
21 | 	async with BrowserSession(
22 | 		browser_profile=BrowserProfile(
23 | 			headless=False,
24 | 			traces_dir='./tmp/result_processing',
25 | 			window_size={'width': 1280, 'height': 1000},
26 | 			user_data_dir='~/.config/browseruse/profiles/default',
27 | 		)
28 | 	) as browser_session:
29 | 		agent = Agent(
30 | 			task="go to google.com and type 'OpenAI' click search and give me the first url",
31 | 			llm=llm,
32 | 			browser_session=browser_session,
33 | 		)
34 | 		history: AgentHistoryList = await agent.run(max_steps=3)
35 | 
36 | 		print('Final Result:')
37 | 		pprint(history.final_result(), indent=4)
38 | 
39 | 		print('\nErrors:')
40 | 		pprint(history.errors(), indent=4)
41 | 
42 | 		# e.g. xPaths the model clicked on
43 | 		print('\nModel Outputs:')
44 | 		pprint(history.model_actions(), indent=4)
45 | 
46 | 		print('\nThoughts:')
47 | 		pprint(history.model_thoughts(), indent=4)
48 | 
49 | 
50 | if __name__ == '__main__':
51 | 	asyncio.run(main())
52 | 


--------------------------------------------------------------------------------
/examples/features/save_trace.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import sys
 4 | 
 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 6 | 
 7 | from dotenv import load_dotenv
 8 | 
 9 | load_dotenv()
10 | 
11 | from browser_use.agent.service import Agent
12 | from browser_use.browser import BrowserProfile, BrowserSession
13 | from browser_use.llm import ChatOpenAI
14 | 
15 | llm = ChatOpenAI(model='gpt-4.1', temperature=0.0)
16 | 
17 | 
18 | async def main():
19 | 	browser_session = BrowserSession(
20 | 		browser_profile=BrowserProfile(
21 | 			traces_dir='./tmp/traces/',
22 | 			user_data_dir='~/.config/browseruse/profiles/default',
23 | 		)
24 | 	)
25 | 
26 | 	async with browser_session:
27 | 		agent = Agent(
28 | 			task='Go to hackernews, then go to apple.com and return all titles of open tabs',
29 | 			llm=llm,
30 | 			browser_session=browser_session,
31 | 		)
32 | 		await agent.run()
33 | 
34 | 
35 | asyncio.run(main())
36 | 


--------------------------------------------------------------------------------
/examples/features/sensitive_data.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import sys
 4 | 
 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 6 | 
 7 | from dotenv import load_dotenv
 8 | 
 9 | load_dotenv()
10 | 
11 | from browser_use import Agent
12 | from browser_use.browser import BrowserProfile
13 | from browser_use.llm import ChatOpenAI
14 | 
15 | try:
16 | 	from lmnr import Laminar
17 | 
18 | 	Laminar.initialize(project_api_key=os.getenv('LMNR_PROJECT_API_KEY'))
19 | except Exception as e:
20 | 	print(f'Error initializing Laminar: {e}')
21 | 
22 | # Initialize the model
23 | llm = ChatOpenAI(
24 | 	model='gpt-4.1',
25 | 	temperature=0.0,
26 | )
27 | # Simple case: the model will see x_name and x_password, but never the actual values.
28 | # sensitive_data = {'x_name': 'my_x_name', 'x_password': 'my_x_password'}
29 | 
30 | # Advanced case: domain-specific credentials with reusable data
31 | # Define a single credential set that can be reused
32 | company_credentials = {'company_username': 'user@example.com', 'company_password': 'securePassword123'}
33 | 
34 | # Map the same credentials to multiple domains for secure access control
35 | # Type annotation to satisfy pyright
36 | sensitive_data: dict[str, str | dict[str, str]] = {
37 | 	'https://example.com': company_credentials,
38 | 	'https://admin.example.com': company_credentials,
39 | 	'https://*.example-staging.com': company_credentials,
40 | 	'http*://test.example.com': company_credentials,
41 | 	# You can also add domain-specific credentials
42 | 	'https://*.google.com': {'g_email': 'user@gmail.com', 'g_pass': 'google_password'},
43 | }
44 | # Update task to use one of the credentials above
45 | task = 'Go to google.com and put the login information in the search bar.'
46 | 
47 | # Always set allowed_domains when using sensitive_data for security
48 | from browser_use.browser.session import BrowserSession
49 | 
50 | browser_session = BrowserSession(
51 | 	browser_profile=BrowserProfile(
52 | 		allowed_domains=list(sensitive_data.keys())
53 | 		+ ['https://*.trusted-partner.com']  # Domain patterns from sensitive_data + additional allowed domains
54 | 	)
55 | )
56 | 
57 | agent = Agent(task=task, llm=llm, sensitive_data=sensitive_data, browser_session=browser_session)
58 | 
59 | 
60 | async def main():
61 | 	await agent.run()
62 | 
63 | 
64 | if __name__ == '__main__':
65 | 	asyncio.run(main())
66 | 


--------------------------------------------------------------------------------
/examples/features/small_model_for_extraction.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import sys
 4 | 
 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 6 | 
 7 | from dotenv import load_dotenv
 8 | 
 9 | load_dotenv()
10 | 
11 | from browser_use import Agent
12 | from browser_use.llm import ChatOpenAI
13 | 
14 | llm = ChatOpenAI(model='gpt-4.1', temperature=0.0)
15 | small_llm = ChatOpenAI(model='gpt-4.1-mini', temperature=0.0)
16 | task = 'Find the founders of browser-use in ycombinator, extract all links and open the links one by one'
17 | agent = Agent(task=task, llm=llm, page_extraction_llm=small_llm)
18 | 
19 | 
20 | async def main():
21 | 	await agent.run()
22 | 
23 | 
24 | if __name__ == '__main__':
25 | 	asyncio.run(main())
26 | 


--------------------------------------------------------------------------------
/examples/features/validate_output.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Demonstrate output validator.
 3 | 
 4 | @dev You need to add OPENAI_API_KEY to your environment variables.
 5 | """
 6 | 
 7 | import asyncio
 8 | import os
 9 | import sys
10 | 
11 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
12 | 
13 | from dotenv import load_dotenv
14 | 
15 | load_dotenv()
16 | 
17 | from pydantic import BaseModel
18 | 
19 | from browser_use import ActionResult, Agent, Controller
20 | from browser_use.llm import ChatOpenAI
21 | 
22 | controller = Controller()
23 | 
24 | 
25 | class DoneResult(BaseModel):
26 | 	title: str
27 | 	comments: str
28 | 	hours_since_start: int
29 | 
30 | 
31 | # we overwrite done() in this example to demonstrate the validator
32 | @controller.registry.action('Done with task', param_model=DoneResult)
33 | async def done(params: DoneResult):
34 | 	result = ActionResult(is_done=True, extracted_content=params.model_dump_json())
35 | 	print(result)
36 | 	# NOTE: this is clearly wrong - to demonstrate the validator
37 | 	return 'blablabla'
38 | 
39 | 
40 | async def main():
41 | 	task = 'Go to hackernews hn and give me the top 1 post'
42 | 	model = ChatOpenAI(model='gpt-4.1')
43 | 	agent = Agent(task=task, llm=model, controller=controller, validate_output=True)
44 | 	# NOTE: this should fail to demonstrate the validator
45 | 	await agent.run(max_steps=5)
46 | 
47 | 
48 | if __name__ == '__main__':
49 | 	asyncio.run(main())
50 | 


--------------------------------------------------------------------------------
/examples/file_system/alphabet_earnings.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import pathlib
 4 | import shutil
 5 | 
 6 | from dotenv import load_dotenv
 7 | 
 8 | from browser_use import Agent
 9 | from browser_use.browser import BrowserProfile, BrowserSession
10 | from browser_use.llm import ChatOpenAI
11 | 
12 | load_dotenv()
13 | 
14 | ''
15 | SCRIPT_DIR = pathlib.Path(os.path.dirname(os.path.abspath(__file__)))
16 | agent_dir = SCRIPT_DIR / 'alphabet_earnings'
17 | agent_dir.mkdir(exist_ok=True)
18 | 
19 | try:
20 | 	from lmnr import Laminar
21 | 
22 | 	Laminar.initialize(project_api_key=os.getenv('LMNR_PROJECT_API_KEY'))
23 | except Exception as e:
24 | 	print(f'Error initializing Laminar: {e}')
25 | 
26 | llm = ChatOpenAI(
27 | 	model='o4-mini',
28 | )
29 | 
30 | browser_session = BrowserSession(
31 | 	browser_profile=BrowserProfile(downloads_path=str(agent_dir / 'downloads')),
32 | )
33 | 
34 | task = """
35 | Go to https://abc.xyz/assets/cc/27/3ada14014efbadd7a58472f1f3f4/2025q2-alphabet-earnings-release.pdf.
36 | Read the PDF and save 3 interesting data points in "alphabet_earnings.pdf" and share it with me!
37 | """.strip('\n')
38 | 
39 | agent = Agent(
40 | 	task=task,
41 | 	llm=llm,
42 | 	browser_session=browser_session,
43 | 	file_system_path=str(agent_dir / 'fs'),
44 | 	flash_mode=True,
45 | )
46 | 
47 | 
48 | async def main():
49 | 	agent_history = await agent.run()
50 | 	input('Press Enter to clean the file system...')
51 | 	# clean the file system
52 | 	shutil.rmtree(str(agent_dir / 'fs'))
53 | 
54 | 
55 | if __name__ == '__main__':
56 | 	asyncio.run(main())
57 | 


--------------------------------------------------------------------------------
/examples/file_system/excel_sheet.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import sys
 4 | 
 5 | from browser_use.llm.openai.chat import ChatOpenAI
 6 | 
 7 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 8 | 
 9 | from dotenv import load_dotenv
10 | 
11 | load_dotenv()
12 | from lmnr import Laminar
13 | 
14 | try:
15 | 	Laminar.initialize(project_api_key=os.getenv('LMNR_PROJECT_API_KEY'))
16 | except Exception:
17 | 	pass
18 | 
19 | from browser_use import Agent
20 | 
21 | # Initialize the model
22 | llm = ChatOpenAI(
23 | 	model='o4-mini',
24 | 	temperature=1.0,
25 | )
26 | 
27 | 
28 | task = (
29 | 	'Find current stock price of companies Meta and Amazon. Then, make me a CSV file with 2 columns: company name, stock price.'
30 | )
31 | 
32 | agent = Agent(task=task, llm=llm)
33 | 
34 | 
35 | async def main():
36 | 	import time
37 | 
38 | 	start_time = time.time()
39 | 	history = await agent.run()
40 | 	# token usage
41 | 	print(history.usage)
42 | 	end_time = time.time()
43 | 	print(f'Time taken: {end_time - start_time} seconds')
44 | 
45 | 
46 | if __name__ == '__main__':
47 | 	asyncio.run(main())
48 | 


--------------------------------------------------------------------------------
/examples/file_system/file_system.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import pathlib
 4 | import shutil
 5 | 
 6 | from dotenv import load_dotenv
 7 | 
 8 | from browser_use import Agent
 9 | from browser_use.llm import ChatOpenAI
10 | 
11 | load_dotenv()
12 | 
13 | 
14 | SCRIPT_DIR = pathlib.Path(os.path.dirname(os.path.abspath(__file__)))
15 | agent_dir = SCRIPT_DIR / 'file_system'
16 | agent_dir.mkdir(exist_ok=True)
17 | conversation_dir = agent_dir / 'conversations' / 'conversation'
18 | print(f'Agent logs directory: {agent_dir}')
19 | 
20 | try:
21 | 	from lmnr import Laminar
22 | 
23 | 	Laminar.initialize(project_api_key=os.getenv('LMNR_PROJECT_API_KEY'))
24 | except Exception as e:
25 | 	print(f'Error initializing Laminar: {e}')
26 | 
27 | task = """
28 | Go to https://mertunsall.github.io/posts/post1.html
29 | Save the title of the article in "data.md"
30 | Then, use append_file to add the first sentence of the article to "data.md"
31 | Then, read the file to see its content and make sure it's correct.
32 | Finally, share the file with me.
33 | 
34 | NOTE: DO NOT USE extract_structured_data action - everything is visible in browser state.
35 | """.strip('\n')
36 | 
37 | llm = ChatOpenAI(model='gpt-4.1-mini')
38 | 
39 | agent = Agent(
40 | 	task=task,
41 | 	llm=llm,
42 | 	save_conversation_path=str(conversation_dir),
43 | 	file_system_path=str(agent_dir / 'fs'),
44 | )
45 | 
46 | 
47 | async def main():
48 | 	agent_history = await agent.run()
49 | 	print(f'Final result: {agent_history.final_result()}', flush=True)
50 | 
51 | 	input('Press Enter to clean the file system...')
52 | 	# clean the file system
53 | 	shutil.rmtree(str(agent_dir / 'fs'))
54 | 
55 | 
56 | if __name__ == '__main__':
57 | 	asyncio.run(main())
58 | 


--------------------------------------------------------------------------------
/examples/getting_started/01_basic_search.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Getting Started Example 1: Basic Search
 3 | 
 4 | This example demonstrates the most basic browser-use functionality:
 5 | - Navigate to a website
 6 | - Perform a search
 7 | - Get results
 8 | 
 9 | Perfect for first-time users to understand how browser-use works.
10 | """
11 | 
12 | import asyncio
13 | import os
14 | import sys
15 | 
16 | # Add the parent directory to the path so we can import browser_use
17 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
18 | 
19 | from dotenv import load_dotenv
20 | 
21 | load_dotenv()
22 | 
23 | from browser_use import Agent
24 | from browser_use.llm.openai.chat import ChatOpenAI
25 | 
26 | 
27 | async def main():
28 | 	# Initialize the model
29 | 	llm = ChatOpenAI(model='gpt-4.1-mini')
30 | 
31 | 	# Define a simple search task
32 | 	task = "Search Google for 'what is browser automation' and tell me the top 3 results"
33 | 
34 | 	# Create and run the agent
35 | 	agent = Agent(task=task, llm=llm)
36 | 	await agent.run()
37 | 
38 | 
39 | if __name__ == '__main__':
40 | 	asyncio.run(main())
41 | 


--------------------------------------------------------------------------------
/examples/getting_started/02_form_filling.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Getting Started Example 2: Form Filling
 3 | 
 4 | This example demonstrates how to:
 5 | - Navigate to a website with forms
 6 | - Fill out input fields
 7 | - Submit forms
 8 | - Handle basic form interactions
 9 | 
10 | This builds on the basic search example by showing more complex interactions.
11 | """
12 | 
13 | import asyncio
14 | import os
15 | import sys
16 | 
17 | # Add the parent directory to the path so we can import browser_use
18 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
19 | 
20 | from dotenv import load_dotenv
21 | 
22 | load_dotenv()
23 | 
24 | from browser_use import Agent
25 | from browser_use.llm.openai.chat import ChatOpenAI
26 | 
27 | 
28 | async def main():
29 | 	# Initialize the model
30 | 	llm = ChatOpenAI(model='gpt-4.1-mini')
31 | 
32 | 	# Define a form filling task
33 | 	task = """
34 |     Go to https://httpbin.org/forms/post and fill out the contact form with:
35 |     - Customer name: John Doe
36 |     - Telephone: 555-123-4567
37 |     - Email: john.doe@example.com
38 |     - Size: Medium
39 |     - Topping: cheese
40 |     - Delivery time: now
41 |     - Comments: This is a test form submission
42 |     
43 |     Then submit the form and tell me what response you get.
44 |     """
45 | 
46 | 	# Create and run the agent
47 | 	agent = Agent(task=task, llm=llm)
48 | 	await agent.run()
49 | 
50 | 
51 | if __name__ == '__main__':
52 | 	asyncio.run(main())
53 | 


--------------------------------------------------------------------------------
/examples/getting_started/03_data_extraction.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Getting Started Example 3: Data Extraction
 3 | 
 4 | This example demonstrates how to:
 5 | - Navigate to a website with structured data
 6 | - Extract specific information from the page
 7 | - Process and organize the extracted data
 8 | - Return structured results
 9 | 
10 | This builds on previous examples by showing how to get valuable data from websites.
11 | """
12 | 
13 | import asyncio
14 | import os
15 | import sys
16 | 
17 | # Add the parent directory to the path so we can import browser_use
18 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
19 | 
20 | from dotenv import load_dotenv
21 | 
22 | load_dotenv()
23 | 
24 | from browser_use import Agent
25 | from browser_use.llm.openai.chat import ChatOpenAI
26 | 
27 | 
28 | async def main():
29 | 	# Initialize the model
30 | 	llm = ChatOpenAI(model='gpt-4.1-mini')
31 | 
32 | 	# Define a data extraction task
33 | 	task = """
34 |     Go to https://quotes.toscrape.com/ and extract the following information:
35 |     - The first 5 quotes on the page
36 |     - The author of each quote
37 |     - The tags associated with each quote
38 |     
39 |     Present the information in a clear, structured format like:
40 |     Quote 1: "[quote text]" - Author: [author name] - Tags: [tag1, tag2, ...]
41 |     Quote 2: "[quote text]" - Author: [author name] - Tags: [tag1, tag2, ...]
42 |     etc.
43 |     """
44 | 
45 | 	# Create and run the agent
46 | 	agent = Agent(task=task, llm=llm)
47 | 	await agent.run()
48 | 
49 | 
50 | if __name__ == '__main__':
51 | 	asyncio.run(main())
52 | 


--------------------------------------------------------------------------------
/examples/getting_started/04_multi_step_task.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Getting Started Example 4: Multi-Step Task
 3 | 
 4 | This example demonstrates how to:
 5 | - Perform a complex workflow with multiple steps
 6 | - Navigate between different pages
 7 | - Combine search, form filling, and data extraction
 8 | - Handle a realistic end-to-end scenario
 9 | 
10 | This is the most advanced getting started example, combining all previous concepts.
11 | """
12 | 
13 | import asyncio
14 | import os
15 | import sys
16 | 
17 | # Add the parent directory to the path so we can import browser_use
18 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
19 | 
20 | from dotenv import load_dotenv
21 | 
22 | load_dotenv()
23 | 
24 | from browser_use import Agent
25 | from browser_use.llm.openai.chat import ChatOpenAI
26 | 
27 | 
28 | async def main():
29 | 	# Initialize the model
30 | 	llm = ChatOpenAI(model='gpt-4.1-mini')
31 | 
32 | 	# Define a multi-step task
33 | 	task = """
34 |     I want you to research Python web scraping libraries. Here's what I need:
35 |     
36 |     1. First, search Google for "best Python web scraping libraries 2024"
37 |     2. Find a reputable article or blog post about this topic
38 |     3. From that article, extract the top 3 recommended libraries
39 |     4. For each library, visit its official website or GitHub page
40 |     5. Extract key information about each library:
41 |        - Name
42 |        - Brief description
43 |        - Main features or advantages
44 |        - GitHub stars (if available)
45 |     
46 |     Present your findings in a summary format comparing the three libraries.
47 |     """
48 | 
49 | 	# Create and run the agent
50 | 	agent = Agent(task=task, llm=llm)
51 | 	await agent.run()
52 | 
53 | 
54 | if __name__ == '__main__':
55 | 	asyncio.run(main())
56 | 


--------------------------------------------------------------------------------
/examples/integrations/browserbase_stagehand.py:
--------------------------------------------------------------------------------
 1 | """
 2 | EXPERIMENTAL: Integration example with Stagehand (browserbase)
 3 | 
 4 | This example shows how to combine browser-use with Stagehand for advanced browser automation.
 5 | Note: This requires the stagehand-py library to be installed separately:
 6 |     pip install stagehand-py
 7 | 
 8 | The exact API may vary depending on the stagehand-py version.
 9 | Please refer to the official Stagehand documentation for the latest usage:
10 |     https://pypi.org/project/stagehand-py/
11 |     https://github.com/browserbase/stagehand-python-examples/
12 | """
13 | 
14 | import asyncio
15 | import os
16 | 
17 | from dotenv import load_dotenv
18 | 
19 | load_dotenv()
20 | 
21 | from stagehand import Stagehand, StagehandConfig  # type: ignore
22 | 
23 | from browser_use.agent.service import Agent
24 | 
25 | 
26 | async def main():
27 | 	# Configure Stagehand
28 | 	# https://pypi.org/project/stagehand-py/
29 | 	# https://github.com/browserbase/stagehand-python-examples/blob/main/agent_example.py
30 | 	# Note: This example requires the stagehand-py library to be installed
31 | 	# pip install stagehand-py
32 | 
33 | 	# Create StagehandConfig with correct parameters
34 | 	# The exact parameters depend on the stagehand-py version
35 | 	config = StagehandConfig(  # type: ignore
36 | 		apiKey=os.getenv('BROWSERBASE_API_KEY'),
37 | 		projectId=os.getenv('BROWSERBASE_PROJECT_ID'),
38 | 	)
39 | 
40 | 	# Create a Stagehand client using the configuration object.
41 | 	stagehand = Stagehand(
42 | 		config=config,
43 | 		model_api_key=os.getenv('OPENAI_API_KEY'),
44 | 		# server_url=os.getenv('STAGEHAND_SERVER_URL'),
45 | 	)
46 | 
47 | 	# Initialize - this creates a new session automatically.
48 | 	await stagehand.init()
49 | 	print(f'\nCreated new session: {stagehand.session_id}')
50 | 	print(f'🌐 View your live browser: https://www.browserbase.com/sessions/{stagehand.session_id}')
51 | 
52 | 	# Check if stagehand has a page attribute
53 | 	if hasattr(stagehand, 'page') and stagehand.page:
54 | 		await stagehand.page.goto('https://google.com/')
55 | 		await stagehand.page.act('search for openai')
56 | 	else:
57 | 		print('Warning: Stagehand page not available')
58 | 
59 | 	# Combine with Browser Use
60 | 	agent = Agent(task='click the first result', page=stagehand.page)  # type: ignore
61 | 	await agent.run()
62 | 
63 | 	# go back and forth
64 | 	await stagehand.page.act('open the 3 first links on the page in new tabs')  # type: ignore
65 | 
66 | 	await Agent(task='click the first result', page=stagehand.page).run()  # type: ignore
67 | 
68 | 
69 | if __name__ == '__main__':
70 | 	asyncio.run(main())
71 | 


--------------------------------------------------------------------------------
/examples/integrations/slack/slack_example.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 5 | 
 6 | from dotenv import load_dotenv
 7 | 
 8 | load_dotenv()
 9 | 
10 | 
11 | from browser_use.browser import BrowserProfile
12 | from browser_use.llm import ChatGoogle
13 | from examples.integrations.slack.slack_api import SlackBot, app
14 | 
15 | # load credentials from environment variables
16 | bot_token = os.getenv('SLACK_BOT_TOKEN')
17 | if not bot_token:
18 | 	raise ValueError('Slack bot token not found in .env file.')
19 | 
20 | signing_secret = os.getenv('SLACK_SIGNING_SECRET')
21 | if not signing_secret:
22 | 	raise ValueError('Slack signing secret not found in .env file.')
23 | 
24 | api_key = os.getenv('GOOGLE_API_KEY')
25 | if not api_key:
26 | 	raise ValueError('GOOGLE_API_KEY is not set')
27 | 
28 | llm = ChatGoogle(model='gemini-2.0-flash-exp', api_key=api_key)
29 | 
30 | slack_bot = SlackBot(
31 | 	llm=llm,  # required; instance of BaseChatModel
32 | 	bot_token=bot_token,  # required; Slack bot token
33 | 	signing_secret=signing_secret,  # required; Slack signing secret
34 | 	ack=True,  # optional; whether to acknowledge task receipt with a message, defaults to False
35 | 	browser_profile=BrowserProfile(
36 | 		headless=True
37 | 	),  # optional; useful for changing headless mode or other browser configs, defaults to headless mode
38 | )
39 | 
40 | app.dependency_overrides[SlackBot] = lambda: slack_bot
41 | 
42 | if __name__ == '__main__':
43 | 	import uvicorn
44 | 
45 | 	uvicorn.run('integrations.slack.slack_api:app', host='0.0.0.0', port=3000)
46 | 


--------------------------------------------------------------------------------
/examples/mcp/simple_client.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Simple example of using MCP client with browser-use.
 3 | 
 4 | This example shows how to connect to an MCP server and use its tools with an agent.
 5 | """
 6 | 
 7 | import asyncio
 8 | import os
 9 | 
10 | from browser_use import Agent, Controller
11 | from browser_use.llm.openai.chat import ChatOpenAI
12 | from browser_use.mcp.client import MCPClient
13 | 
14 | 
15 | async def main():
16 | 	# Initialize controller
17 | 	controller = Controller()
18 | 
19 | 	# Connect to a filesystem MCP server
20 | 	# This server provides tools to read/write files in a directory
21 | 	mcp_client = MCPClient(
22 | 		server_name='filesystem', command='npx', args=['@modelcontextprotocol/server-filesystem', os.path.expanduser('~/Desktop')]
23 | 	)
24 | 
25 | 	# Connect and register MCP tools
26 | 	await mcp_client.connect()
27 | 	await mcp_client.register_to_controller(controller)
28 | 
29 | 	# Create agent with MCP-enabled controller
30 | 	agent = Agent(
31 | 		task='List all files on the Desktop and read the content of any .txt files you find',
32 | 		llm=ChatOpenAI(model='gpt-4.1-mini'),
33 | 		controller=controller,
34 | 	)
35 | 
36 | 	# Run the agent - it now has access to filesystem tools
37 | 	await agent.run()
38 | 
39 | 	# Disconnect when done
40 | 	await mcp_client.disconnect()
41 | 
42 | 
43 | if __name__ == '__main__':
44 | 	asyncio.run(main())
45 | 


--------------------------------------------------------------------------------
/examples/models/README.md:
--------------------------------------------------------------------------------
1 | # Gemini
2 | Detailed video on how to integrate browser-use with Gemini: https://www.youtube.com/watch?v=JluZiWBV_Tc
3 | 


--------------------------------------------------------------------------------
/examples/models/azure_openai.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Simple try of the agent.
 3 | 
 4 | @dev You need to add AZURE_OPENAI_KEY and AZURE_OPENAI_ENDPOINT to your environment variables.
 5 | """
 6 | 
 7 | import asyncio
 8 | import os
 9 | import sys
10 | 
11 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
12 | 
13 | from dotenv import load_dotenv
14 | 
15 | load_dotenv()
16 | 
17 | 
18 | from browser_use import Agent
19 | from browser_use.llm import ChatAzureOpenAI
20 | 
21 | # Retrieve Azure-specific environment variables
22 | azure_openai_api_key = os.getenv('AZURE_OPENAI_KEY')
23 | azure_openai_endpoint = os.getenv('AZURE_OPENAI_ENDPOINT')
24 | 
25 | if not azure_openai_api_key or not azure_openai_endpoint:
26 | 	raise ValueError('AZURE_OPENAI_KEY or AZURE_OPENAI_ENDPOINT is not set')
27 | 
28 | # Initialize the Azure OpenAI client
29 | llm = ChatAzureOpenAI(
30 | 	model='gpt-4.1',
31 | 	api_key=azure_openai_api_key,
32 | 	azure_endpoint=azure_openai_endpoint,  # Corrected to use azure_endpoint instead of openai_api_base
33 | )
34 | 
35 | agent = Agent(
36 | 	task='Go to amazon.com, search for laptop, sort by best rating, and give me the price of the first result',
37 | 	llm=llm,
38 | )
39 | 
40 | 
41 | async def main():
42 | 	await agent.run(max_steps=10)
43 | 	input('Press Enter to continue...')
44 | 
45 | 
46 | asyncio.run(main())
47 | 


--------------------------------------------------------------------------------
/examples/models/claude-4-sonnet.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Simple script that runs the task of opening amazon and searching.
 3 | @dev Ensure we have a `ANTHROPIC_API_KEY` variable in our `.env` file.
 4 | """
 5 | 
 6 | import asyncio
 7 | import os
 8 | import sys
 9 | 
10 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
11 | 
12 | from dotenv import load_dotenv
13 | from lmnr import Laminar
14 | 
15 | load_dotenv()
16 | Laminar.initialize()
17 | 
18 | from browser_use import Agent
19 | from browser_use.llm import ChatAnthropic
20 | 
21 | llm = ChatAnthropic(model='claude-4-sonnet-20250514', temperature=0.0)
22 | 
23 | agent = Agent(
24 | 	task='Go to amazon.com, search for laptop, sort by best rating, and give me the price of the first result',
25 | 	llm=llm,
26 | )
27 | 
28 | 
29 | async def main():
30 | 	await agent.run(max_steps=10)
31 | 
32 | 
33 | asyncio.run(main())
34 | 


--------------------------------------------------------------------------------
/examples/models/deepseek-chat.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | 
 4 | from browser_use import Agent
 5 | from browser_use.llm import ChatDeepSeek
 6 | 
 7 | # Add your custom instructions
 8 | extend_system_message = """
 9 | Remember the most important rules: 
10 | 1. When performing a search task, open https://www.google.com/ first for search. 
11 | 2. Final output.
12 | """
13 | deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')
14 | if deepseek_api_key is None:
15 | 	print('Make sure you have DEEPSEEK_API_KEY:')
16 | 	print('export DEEPSEEK_API_KEY=your_key')
17 | 	exit(0)
18 | 
19 | 
20 | async def main():
21 | 	llm = ChatDeepSeek(
22 | 		base_url='https://api.deepseek.com/v1',
23 | 		model='deepseek-chat',
24 | 		api_key=deepseek_api_key,
25 | 	)
26 | 
27 | 	agent = Agent(
28 | 		task='What should we pay attention to in the recent new rules on tariffs in China-US trade?',
29 | 		llm=llm,
30 | 		use_vision=False,
31 | 		extend_system_message=extend_system_message,
32 | 	)
33 | 	await agent.run()
34 | 
35 | 
36 | asyncio.run(main())
37 | 


--------------------------------------------------------------------------------
/examples/models/gemini.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import sys
 4 | 
 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 6 | 
 7 | from dotenv import load_dotenv
 8 | from lmnr import Laminar
 9 | 
10 | load_dotenv()
11 | 
12 | Laminar.initialize()
13 | 
14 | 
15 | from browser_use import Agent
16 | from browser_use.browser import BrowserProfile, BrowserSession
17 | from browser_use.llm import ChatGoogle
18 | 
19 | api_key = os.getenv('GOOGLE_API_KEY')
20 | if not api_key:
21 | 	raise ValueError('GOOGLE_API_KEY is not set')
22 | 
23 | llm = ChatGoogle(model='gemini-2.0-flash-exp', api_key=api_key)
24 | 
25 | browser_session = BrowserSession(
26 | 	browser_profile=BrowserProfile(
27 | 		viewport_expansion=0,
28 | 		user_data_dir='~/.config/browseruse/profiles/default',
29 | 	)
30 | )
31 | 
32 | 
33 | async def run_search():
34 | 	agent = Agent(
35 | 		task='Go to amazon.com, search for laptop, sort by best rating, and give me the price of the first result',
36 | 		llm=llm,
37 | 		max_actions_per_step=4,
38 | 		browser_session=browser_session,
39 | 	)
40 | 
41 | 	await agent.run(max_steps=25)
42 | 
43 | 
44 | if __name__ == '__main__':
45 | 	asyncio.run(run_search())
46 | 


--------------------------------------------------------------------------------
/examples/models/gpt-4.1.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Simple try of the agent.
 3 | 
 4 | @dev You need to add OPENAI_API_KEY to your environment variables.
 5 | """
 6 | 
 7 | import asyncio
 8 | 
 9 | from dotenv import load_dotenv
10 | from lmnr import Laminar
11 | 
12 | from browser_use import Agent
13 | from browser_use.llm import ChatOpenAI
14 | 
15 | load_dotenv()
16 | 
17 | 
18 | Laminar.initialize()
19 | 
20 | # All the models are type safe from OpenAI in case you need a list of supported models
21 | llm = ChatOpenAI(model='gpt-4.1-mini')
22 | agent = Agent(
23 | 	task='Go to example.com, click on the first link, and give me the title of the page',
24 | 	llm=llm,
25 | )
26 | 
27 | 
28 | async def main():
29 | 	await agent.run(max_steps=10)
30 | 	input('Press Enter to continue...')
31 | 
32 | 
33 | asyncio.run(main())
34 | 


--------------------------------------------------------------------------------
/examples/models/langchain/README.md:
--------------------------------------------------------------------------------
 1 | # Langchain Models (legacy)
 2 | 
 3 | This directory contains example of how to still use Langchain models with the new Browser Use chat models.
 4 | 
 5 | ## How to use
 6 | 
 7 | ```python
 8 | from langchain_openai import ChatOpenAI
 9 | 
10 | from browser_use import Agent
11 | from .chat import ChatLangchain
12 | 
13 | async def main():
14 | 	"""Basic example using ChatLangchain with OpenAI through LangChain."""
15 | 
16 | 	# Create a LangChain model (OpenAI)
17 | 	langchain_model = ChatOpenAI(
18 | 		model='gpt-4.1-mini',
19 | 		temperature=0.1,
20 | 	)
21 | 
22 | 	# Wrap it with ChatLangchain to make it compatible with browser-use
23 | 	llm = ChatLangchain(chat=langchain_model)
24 | 
25 |     agent = Agent(
26 |         task="Go to google.com and search for 'browser automation with Python'",
27 |         llm=llm,
28 |     )
29 | 
30 |     history = await agent.run()
31 | 
32 |     print(history.history)
33 | ```
34 | 


--------------------------------------------------------------------------------
/examples/models/langchain/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browser-use/browser-use/9871be3363081dd88402dd7244f1997132929c4f/examples/models/langchain/__init__.py


--------------------------------------------------------------------------------
/examples/models/langchain/example.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Example of using LangChain models with browser-use.
 3 | 
 4 | This example demonstrates how to:
 5 | 1. Wrap a LangChain model with ChatLangchain
 6 | 2. Use it with a browser-use Agent
 7 | 3. Run a simple web automation task
 8 | 
 9 | @file purpose: Example usage of LangChain integration with browser-use
10 | """
11 | 
12 | import asyncio
13 | 
14 | from langchain_openai import ChatOpenAI  # pyright: ignore
15 | from lmnr import Laminar
16 | 
17 | from browser_use import Agent
18 | from examples.models.langchain.chat import ChatLangchain
19 | 
20 | Laminar.initialize()
21 | 
22 | 
23 | async def main():
24 | 	"""Basic example using ChatLangchain with OpenAI through LangChain."""
25 | 
26 | 	# Create a LangChain model (OpenAI)
27 | 	langchain_model = ChatOpenAI(
28 | 		model='gpt-4.1-mini',
29 | 		temperature=0.1,
30 | 	)
31 | 
32 | 	# Wrap it with ChatLangchain to make it compatible with browser-use
33 | 	llm = ChatLangchain(chat=langchain_model)
34 | 
35 | 	# Create a simple task
36 | 	task = "Go to google.com and search for 'browser automation with Python'"
37 | 
38 | 	# Create and run the agent
39 | 	agent = Agent(
40 | 		task=task,
41 | 		llm=llm,
42 | 	)
43 | 
44 | 	print(f'🚀 Starting task: {task}')
45 | 	print(f'🤖 Using model: {llm.name} (provider: {llm.provider})')
46 | 
47 | 	# Run the agent
48 | 	history = await agent.run()
49 | 
50 | 	print(f'✅ Task completed! Steps taken: {len(history.history)}')
51 | 
52 | 	# Print the final result if available
53 | 	if history.final_result():
54 | 		print(f'📋 Final result: {history.final_result()}')
55 | 
56 | 		return history
57 | 
58 | 
59 | if __name__ == '__main__':
60 | 	print('🌐 Browser-use LangChain Integration Example')
61 | 	print('=' * 45)
62 | 
63 | 	asyncio.run(main())
64 | 


--------------------------------------------------------------------------------
/examples/models/llama4-groq.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import sys
 4 | 
 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 6 | 
 7 | from dotenv import load_dotenv
 8 | from lmnr import Laminar
 9 | 
10 | load_dotenv()
11 | 
12 | 
13 | Laminar.initialize()
14 | 
15 | 
16 | from browser_use import Agent
17 | from browser_use.llm import ChatGroq
18 | 
19 | groq_api_key = os.environ.get('GROQ_API_KEY')
20 | llm = ChatGroq(
21 | 	model='meta-llama/llama-4-maverick-17b-128e-instruct',
22 | 	# temperature=0.1,
23 | )
24 | 
25 | # llm = ChatGroq(
26 | # 	model='meta-llama/llama-4-maverick-17b-128e-instruct',
27 | # 	api_key=os.environ.get('GROQ_API_KEY'),
28 | # 	temperature=0.0,
29 | # )
30 | 
31 | task = 'Go to amazon.com, search for laptop, sort by best rating, and give me the price of the first result'
32 | 
33 | 
34 | async def main():
35 | 	agent = Agent(
36 | 		task=task,
37 | 		llm=llm,
38 | 	)
39 | 	await agent.run()
40 | 
41 | 
42 | if __name__ == '__main__':
43 | 	asyncio.run(main())
44 | 


--------------------------------------------------------------------------------
/examples/models/novita.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Simple try of the agent.
 3 | 
 4 | @dev You need to add NOVITA_API_KEY to your environment variables.
 5 | """
 6 | 
 7 | import asyncio
 8 | import os
 9 | import sys
10 | 
11 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
12 | 
13 | from dotenv import load_dotenv
14 | 
15 | load_dotenv()
16 | 
17 | 
18 | from browser_use import Agent
19 | from browser_use.llm import ChatOpenAI
20 | 
21 | api_key = os.getenv('NOVITA_API_KEY', '')
22 | if not api_key:
23 | 	raise ValueError('NOVITA_API_KEY is not set')
24 | 
25 | 
26 | async def run_search():
27 | 	agent = Agent(
28 | 		task=(
29 | 			'1. Go to https://www.reddit.com/r/LocalLLaMA '
30 | 			"2. Search for 'browser use' in the search bar"
31 | 			'3. Click on first result'
32 | 			'4. Return the first comment'
33 | 		),
34 | 		llm=ChatOpenAI(
35 | 			base_url='https://api.novita.ai/v3/openai',
36 | 			model='deepseek/deepseek-v3-0324',
37 | 			api_key=api_key,
38 | 		),
39 | 		use_vision=False,
40 | 	)
41 | 
42 | 	await agent.run()
43 | 
44 | 
45 | if __name__ == '__main__':
46 | 	asyncio.run(run_search())
47 | 


--------------------------------------------------------------------------------
/examples/models/openrouter.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Simple try of the agent.
 3 | 
 4 | @dev You need to add OPENAI_API_KEY to your environment variables.
 5 | """
 6 | 
 7 | import asyncio
 8 | import os
 9 | 
10 | from dotenv import load_dotenv
11 | from lmnr import Laminar
12 | 
13 | from browser_use import Agent
14 | from browser_use.llm import ChatOpenAI
15 | 
16 | load_dotenv()
17 | 
18 | 
19 | Laminar.initialize()
20 | 
21 | # All the models are type safe from OpenAI in case you need a list of supported models
22 | llm = ChatOpenAI(
23 | 	model='x-ai/grok-4',
24 | 	base_url='https://openrouter.ai/api/v1',
25 | 	api_key=os.getenv('OPENROUTER_API_KEY'),
26 | )
27 | agent = Agent(
28 | 	task='Go to example.com, click on the first link, and give me the title of the page',
29 | 	llm=llm,
30 | )
31 | 
32 | 
33 | async def main():
34 | 	await agent.run(max_steps=10)
35 | 	input('Press Enter to continue...')
36 | 
37 | 
38 | asyncio.run(main())
39 | 


--------------------------------------------------------------------------------
/examples/search/search_url.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Search URL API Example
 3 | 
 4 | This example shows how to use the Browser Use API to extract specific
 5 | content from a given URL based on your query.
 6 | 
 7 | Usage:
 8 |     # Copy this function and customize the parameters
 9 |     result = await search_url("https://example.com", "what to find", depth=2)
10 | """
11 | 
12 | import asyncio
13 | import os
14 | 
15 | import aiohttp
16 | from dotenv import load_dotenv
17 | 
18 | # Load environment variables
19 | load_dotenv()
20 | 
21 | 
22 | async def search_url(url: str, query: str, depth: int = 2):
23 | 	# Validate API key exists
24 | 	api_key = os.getenv('BROWSER_USE_API_KEY')
25 | 	if not api_key:
26 | 		print('❌ Error: BROWSER_USE_API_KEY environment variable is not set.')
27 | 		print('Please set your API key: export BROWSER_USE_API_KEY="your_api_key_here"')
28 | 		return None
29 | 
30 | 	payload = {'url': url, 'query': query, 'depth': depth}
31 | 
32 | 	headers = {'Authorization': f'Bearer {api_key}', 'Content-Type': 'application/json'}
33 | 
34 | 	print('Testing Search URL API...')
35 | 	print(f'URL: {url}')
36 | 	print(f'Query: {query}')
37 | 	print(f'Depth: {depth}')
38 | 	print('-' * 50)
39 | 
40 | 	try:
41 | 		async with aiohttp.ClientSession() as session:
42 | 			async with session.post(
43 | 				'https://api.browser-use.com/api/v1/search-url',
44 | 				json=payload,
45 | 				headers=headers,
46 | 				timeout=aiohttp.ClientTimeout(total=300),
47 | 			) as response:
48 | 				if response.status == 200:
49 | 					result = await response.json()
50 | 					print('✅ Success!')
51 | 					print(f'URL processed: {result.get("url", "N/A")}')
52 | 					content = result.get('content', '')
53 | 					print(f'Content: {content}')
54 | 					return result
55 | 				else:
56 | 					error_text = await response.text()
57 | 					print(f'❌ Error {response.status}: {error_text}')
58 | 					return None
59 | 	except Exception as e:
60 | 		print(f'❌ Exception: {str(e)}')
61 | 		return None
62 | 
63 | 
64 | if __name__ == '__main__':
65 | 	# Example 1: Extract pricing info
66 | 	asyncio.run(search_url('https://browser-use.com/#pricing', 'Find pricing information for Browser Use'))
67 | 
68 | 	# Example 2: News article analysis
69 | 	# asyncio.run(search_url("https://techcrunch.com", "latest startup funding news", depth=3))
70 | 
71 | 	# Example 3: Product research
72 | 	# asyncio.run(search_url("https://github.com/browser-use/browser-use", "installation instructions", depth=2))
73 | 


--------------------------------------------------------------------------------
/examples/search/simple_search.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Simple Search API Example
 3 | 
 4 | This example shows how to use the Browser Use API to search and extract
 5 | content from multiple websites based on a query.
 6 | 
 7 | Usage:
 8 |     # Copy this function and customize the parameters
 9 |     result = await simple_search("your search query", max_websites=5, depth=2)
10 | """
11 | 
12 | import asyncio
13 | import os
14 | 
15 | import aiohttp
16 | from dotenv import load_dotenv
17 | 
18 | # Load environment variables
19 | load_dotenv()
20 | 
21 | 
22 | async def simple_search(query: str, max_websites: int = 5, depth: int = 2):
23 | 	# Validate API key exists
24 | 	api_key = os.getenv('BROWSER_USE_API_KEY')
25 | 	if not api_key:
26 | 		print('❌ Error: BROWSER_USE_API_KEY environment variable is not set.')
27 | 		print('Please set your API key: export BROWSER_USE_API_KEY="your_api_key_here"')
28 | 		return None
29 | 
30 | 	payload = {'query': query, 'max_websites': max_websites, 'depth': depth}
31 | 
32 | 	headers = {'Authorization': f'Bearer {api_key}', 'Content-Type': 'application/json'}
33 | 
34 | 	print('Testing Simple Search API...')
35 | 	print(f'Query: {query}')
36 | 	print(f'Max websites: {max_websites}')
37 | 	print(f'Depth: {depth}')
38 | 	print('-' * 50)
39 | 
40 | 	try:
41 | 		async with aiohttp.ClientSession() as session:
42 | 			async with session.post(
43 | 				'https://api.browser-use.com/api/v1/simple-search',
44 | 				json=payload,
45 | 				headers=headers,
46 | 				timeout=aiohttp.ClientTimeout(total=300),
47 | 			) as response:
48 | 				if response.status == 200:
49 | 					result = await response.json()
50 | 					print('✅ Success!')
51 | 					print(f'Results: {len(result.get("results", []))} websites processed')
52 | 					for i, item in enumerate(result.get('results', [])[:2], 1):
53 | 						print(f'\n{i}. {item.get("url", "N/A")}')
54 | 						content = item.get('content', '')
55 | 						print(f'   Content: {content}')
56 | 					return result
57 | 				else:
58 | 					error_text = await response.text()
59 | 					print(f'❌ Error {response.status}: {error_text}')
60 | 					return None
61 | 	except Exception as e:
62 | 		print(f'❌ Exception: {str(e)}')
63 | 		return None
64 | 
65 | 
66 | if __name__ == '__main__':
67 | 	# Example 1: Basic search
68 | 	asyncio.run(simple_search('latest AI news'))
69 | 
70 | 	# Example 2: Custom parameters
71 | 	# asyncio.run(simple_search("python web scraping", max_websites=3, depth=3))
72 | 
73 | 	# Example 3: Research query
74 | 	# asyncio.run(simple_search("climate change solutions 2024", max_websites=7, depth=2))
75 | 


--------------------------------------------------------------------------------
/examples/simple.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import sys
 4 | 
 5 | from browser_use.llm.openai.chat import ChatOpenAI
 6 | 
 7 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 8 | 
 9 | from dotenv import load_dotenv
10 | 
11 | load_dotenv()
12 | 
13 | 
14 | from browser_use import Agent
15 | 
16 | # Initialize the model
17 | llm = ChatOpenAI(
18 | 	model='gpt-4.1-mini',
19 | )
20 | 
21 | 
22 | task = 'Find the founders of browser-use'
23 | agent = Agent(task=task, llm=llm)
24 | 
25 | 
26 | async def main():
27 | 	await agent.run()
28 | 
29 | 
30 | if __name__ == '__main__':
31 | 	asyncio.run(main())
32 | 


--------------------------------------------------------------------------------
/examples/ui/README.md:
--------------------------------------------------------------------------------
1 | # **User Interfaces of Browser-Use**
2 | 
3 | | **File Name**          | **User Interface** | **Description**                           | **Example Usage**                         |
4 | |------------------------|-------------------|-------------------------------------------|-------------------------------------------|
5 | | `command_line.py`      | **Terminal**      | Parses arguments for command-line execution. | `python command_line.py`                  |
6 | | `gradio_demo.py`       | **Gradio**        | Provides a Gradio-based interactive UI.  | `python gradio_demo.py`                   |
7 | | `streamlit_demo.py`    | **Streamlit**     | Runs a Streamlit-based web interface.    | `python -m streamlit run streamlit_demo.py` |
8 | 


--------------------------------------------------------------------------------
/examples/ui/streamlit_demo.py:
--------------------------------------------------------------------------------
 1 | """
 2 | To use it, you'll need to install streamlit, and run with:
 3 | 
 4 | python -m streamlit run streamlit_demo.py
 5 | 
 6 | """
 7 | 
 8 | import asyncio
 9 | import os
10 | import sys
11 | 
12 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
13 | 
14 | from dotenv import load_dotenv
15 | 
16 | load_dotenv()
17 | 
18 | import streamlit as st  # type: ignore
19 | 
20 | from browser_use import Agent
21 | from browser_use.browser import BrowserSession
22 | from browser_use.controller.service import Controller
23 | 
24 | if os.name == 'nt':
25 | 	asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
26 | 
27 | 
28 | # Function to get the LLM based on provider
29 | def get_llm(provider: str):
30 | 	if provider == 'anthropic':
31 | 		from browser_use.llm import ChatAnthropic
32 | 
33 | 		api_key = os.getenv('ANTHROPIC_API_KEY')
34 | 		if not api_key:
35 | 			st.error('Error: ANTHROPIC_API_KEY is not set. Please provide a valid API key.')
36 | 			st.stop()
37 | 
38 | 		return ChatAnthropic(model='claude-3-5-sonnet-20240620', temperature=0.0)
39 | 	elif provider == 'openai':
40 | 		from browser_use.llm import ChatOpenAI
41 | 
42 | 		api_key = os.getenv('OPENAI_API_KEY')
43 | 		if not api_key:
44 | 			st.error('Error: OPENAI_API_KEY is not set. Please provide a valid API key.')
45 | 			st.stop()
46 | 
47 | 		return ChatOpenAI(model='gpt-4.1', temperature=0.0)
48 | 	else:
49 | 		st.error(f'Unsupported provider: {provider}')
50 | 		st.stop()
51 | 		return None  # Never reached, but helps with type checking
52 | 
53 | 
54 | # Function to initialize the agent
55 | def initialize_agent(query: str, provider: str):
56 | 	llm = get_llm(provider)
57 | 	controller = Controller()
58 | 	browser_session = BrowserSession()
59 | 
60 | 	return Agent(
61 | 		task=query,
62 | 		llm=llm,  # type: ignore
63 | 		controller=controller,
64 | 		browser_session=browser_session,
65 | 		use_vision=True,
66 | 		max_actions_per_step=1,
67 | 	), browser_session
68 | 
69 | 
70 | # Streamlit UI
71 | st.title('Automated Browser Agent with LLMs 🤖')
72 | 
73 | query = st.text_input('Enter your query:', 'go to reddit and search for posts about browser-use')
74 | provider = st.radio('Select LLM Provider:', ['openai', 'anthropic'], index=0)
75 | 
76 | if st.button('Run Agent'):
77 | 	st.write('Initializing agent...')
78 | 	agent, browser_session = initialize_agent(query, provider)
79 | 
80 | 	async def run_agent():
81 | 		with st.spinner('Running automation...'):
82 | 			await agent.run(max_steps=25)
83 | 		st.success('Task completed! 🎉')
84 | 
85 | 	asyncio.run(run_agent())
86 | 
87 | 	st.button('Close Browser', on_click=lambda: asyncio.run(browser_session.close()))
88 | 


--------------------------------------------------------------------------------
/examples/use-cases/README.md:
--------------------------------------------------------------------------------
 1 | # Use Cases of Browser-Use
 2 | 
 3 | | File Name | Description |
 4 | |-----------|------------|
 5 | | `captcha.py` | Automates CAPTCHA solving on a demo website. |
 6 | | `check_appointment.py` | Checks for available visa appointment slots on the Greece MFA website. |
 7 | | `find_and_apply_to_jobs.py` | Searches for job listings, evaluates relevance based on a CV, and applies automatically. |
 8 | | `online_coding_agent.py` | Implements a multi-agent system for online code editors, with separate agents for coding and execution. |
 9 | | `post-twitter.py` | Provides a template for automated posting on X (Twitter), including new tweets, tagging, and replies. |
10 | | `scrolling_page.py` | Automates webpage scrolling with various scrolling actions and text search functionality. |
11 | | `twitter_post_using_cookies.py` | Automates posting on X (Twitter) using stored authentication cookies. |
12 | | `web_voyager_agent.py` | A general-purpose web navigation agent for tasks like flight booking and course searching. |
13 | 


--------------------------------------------------------------------------------
/examples/use-cases/captcha.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Goal: Automates CAPTCHA solving on a demo website.
 3 | 
 4 | 
 5 | Simple try of the agent.
 6 | @dev You need to add OPENAI_API_KEY to your environment variables.
 7 | NOTE: captchas are hard. For this example it works. But e.g. for iframes it does not.
 8 | for this example it helps to zoom in.
 9 | """
10 | 
11 | import asyncio
12 | import os
13 | import sys
14 | 
15 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
16 | 
17 | from dotenv import load_dotenv
18 | 
19 | load_dotenv()
20 | 
21 | from browser_use import Agent
22 | from browser_use.llm import ChatOpenAI
23 | 
24 | if not os.getenv('OPENAI_API_KEY'):
25 | 	raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
26 | 
27 | 
28 | async def main():
29 | 	llm = ChatOpenAI(model='gpt-4.1')
30 | 	agent = Agent(
31 | 		task='go to https://captcha.com/demos/features/captcha-demo.aspx and solve the captcha',
32 | 		llm=llm,
33 | 	)
34 | 	await agent.run()
35 | 	input('Press Enter to exit')
36 | 
37 | 
38 | if __name__ == '__main__':
39 | 	asyncio.run(main())
40 | 


--------------------------------------------------------------------------------
/examples/use-cases/check_appointment.py:
--------------------------------------------------------------------------------
 1 | # Goal: Checks for available visa appointment slots on the Greece MFA website.
 2 | 
 3 | import asyncio
 4 | import os
 5 | import sys
 6 | 
 7 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 8 | 
 9 | from dotenv import load_dotenv
10 | 
11 | load_dotenv()
12 | 
13 | from pydantic import BaseModel
14 | 
15 | from browser_use.agent.service import Agent
16 | from browser_use.controller.service import Controller
17 | from browser_use.llm import ChatOpenAI
18 | 
19 | if not os.getenv('OPENAI_API_KEY'):
20 | 	raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
21 | 
22 | controller = Controller()
23 | 
24 | 
25 | class WebpageInfo(BaseModel):
26 | 	"""Model for webpage link."""
27 | 
28 | 	link: str = 'https://appointment.mfa.gr/en/reservations/aero/ireland-grcon-dub/'
29 | 
30 | 
31 | @controller.action('Go to the webpage', param_model=WebpageInfo)
32 | def go_to_webpage(webpage_info: WebpageInfo):
33 | 	"""Returns the webpage link."""
34 | 	return webpage_info.link
35 | 
36 | 
37 | async def main():
38 | 	"""Main function to execute the agent task."""
39 | 	task = (
40 | 		'Go to the Greece MFA webpage via the link I provided you.'
41 | 		'Check the visa appointment dates. If there is no available date in this month, check the next month.'
42 | 		'If there is no available date in both months, tell me there is no available date.'
43 | 	)
44 | 
45 | 	model = ChatOpenAI(model='gpt-4.1-mini')
46 | 	agent = Agent(task, model, controller=controller, use_vision=True)
47 | 
48 | 	await agent.run()
49 | 
50 | 
51 | if __name__ == '__main__':
52 | 	asyncio.run(main())
53 | 


--------------------------------------------------------------------------------
/examples/use-cases/online_coding_agent.py:
--------------------------------------------------------------------------------
 1 | # Goal: Implements a multi-agent system for online code editors, with separate agents for coding and execution.
 2 | 
 3 | import asyncio
 4 | import os
 5 | import sys
 6 | 
 7 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 8 | 
 9 | from dotenv import load_dotenv
10 | 
11 | load_dotenv()
12 | 
13 | from browser_use import Agent
14 | from browser_use.browser import BrowserSession
15 | from browser_use.llm import ChatOpenAI
16 | 
17 | if not os.getenv('OPENAI_API_KEY'):
18 | 	raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
19 | 
20 | 
21 | async def main():
22 | 	browser_session = BrowserSession()
23 | 	model = ChatOpenAI(model='gpt-4.1')
24 | 
25 | 	# Initialize browser agent
26 | 	agent1 = Agent(
27 | 		task='Open an online code editor programiz.',
28 | 		llm=model,
29 | 		browser_session=browser_session,
30 | 	)
31 | 	executor = Agent(
32 | 		task='Executor. Execute the code written by the coder and suggest some updates if there are errors.',
33 | 		llm=model,
34 | 		browser_session=browser_session,
35 | 	)
36 | 
37 | 	coder = Agent(
38 | 		task='Coder. Your job is to write and complete code. You are an expert coder. Code a simple calculator. Write the code on the coding interface after agent1 has opened the link.',
39 | 		llm=model,
40 | 		browser_session=browser_session,
41 | 	)
42 | 	await agent1.run()
43 | 	await executor.run()
44 | 	await coder.run()
45 | 
46 | 
47 | if __name__ == '__main__':
48 | 	asyncio.run(main())
49 | 


--------------------------------------------------------------------------------
/examples/use-cases/test_cv.txt:
--------------------------------------------------------------------------------
1 | 123
2 | 


--------------------------------------------------------------------------------
/examples/use-cases/twitter_cookies.txt:
--------------------------------------------------------------------------------
 1 | [{
 2 |     "name": "auth_token",
 3 |     "value": "auth_token_cookie_value",
 4 |     "domain": ".x.com",
 5 |     "path": "/"
 6 |   },
 7 | {
 8 |     "name": "ct0",
 9 |     "value": "ct0_cookie_value",
10 |     "domain": ".x.com",
11 |     "path": "/"
12 | }]
13 | 


--------------------------------------------------------------------------------
/examples/use-cases/twitter_post_using_cookies.py:
--------------------------------------------------------------------------------
 1 | # Goal: Automates posting on X (Twitter) using stored authentication cookies.
 2 | 
 3 | import asyncio
 4 | import os
 5 | import sys
 6 | 
 7 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 8 | 
 9 | from dotenv import load_dotenv
10 | 
11 | load_dotenv()
12 | 
13 | 
14 | from browser_use import Agent
15 | from browser_use.browser import BrowserProfile, BrowserSession
16 | from browser_use.llm import ChatGoogle
17 | 
18 | api_key = os.getenv('GOOGLE_API_KEY')
19 | if not api_key:
20 | 	raise ValueError('GOOGLE_API_KEY is not set')
21 | 
22 | llm = ChatGoogle(model='gemini-2.0-flash-exp', api_key=api_key)
23 | 
24 | 
25 | browser_session = BrowserSession(
26 | 	browser_profile=BrowserProfile(
27 | 		user_data_dir='~/.config/browseruse/profiles/default',
28 | 		# headless=False,  # Uncomment to see the browser
29 | 		# executable_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
30 | 	)
31 | )
32 | 
33 | 
34 | async def main():
35 | 	agent = Agent(
36 | 		browser_session=browser_session,
37 | 		task=('go to https://x.com. write a new post with the text "browser-use ftw", and submit it'),
38 | 		llm=llm,
39 | 		max_actions_per_step=4,
40 | 	)
41 | 	await agent.run(max_steps=25)
42 | 	input('Press Enter to close the browser...')
43 | 
44 | 
45 | if __name__ == '__main__':
46 | 	asyncio.run(main())
47 | 


--------------------------------------------------------------------------------
/examples/use-cases/web_voyager_agent.py:
--------------------------------------------------------------------------------
 1 | # Goal: A general-purpose web navigation agent for tasks like flight booking and course searching.
 2 | 
 3 | import asyncio
 4 | import os
 5 | import sys
 6 | 
 7 | # Adjust Python path
 8 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 9 | 
10 | from dotenv import load_dotenv
11 | 
12 | load_dotenv()
13 | 
14 | 
15 | from browser_use.agent.service import Agent
16 | from browser_use.browser import BrowserProfile, BrowserSession
17 | from browser_use.llm import ChatAzureOpenAI, ChatOpenAI
18 | 
19 | # Set LLM based on defined environment variables
20 | if os.getenv('OPENAI_API_KEY'):
21 | 	llm = ChatOpenAI(
22 | 		model='gpt-4.1',
23 | 	)
24 | elif os.getenv('AZURE_OPENAI_KEY') and os.getenv('AZURE_OPENAI_ENDPOINT'):
25 | 	llm = ChatAzureOpenAI(
26 | 		model='gpt-4.1',
27 | 	)
28 | else:
29 | 	raise ValueError('No LLM found. Please set OPENAI_API_KEY or AZURE_OPENAI_KEY and AZURE_OPENAI_ENDPOINT.')
30 | 
31 | 
32 | browser_session = BrowserSession(
33 | 	browser_profile=BrowserProfile(
34 | 		headless=False,  # This is True in production
35 | 		minimum_wait_page_load_time=1,  # 3 on prod
36 | 		maximum_wait_page_load_time=10,  # 20 on prod
37 | 		viewport={'width': 1280, 'height': 1100},
38 | 		user_data_dir='~/.config/browseruse/profiles/default',
39 | 		# trace_path='./tmp/web_voyager_agent',
40 | 	)
41 | )
42 | 
43 | # TASK = """
44 | # Find the lowest-priced one-way flight from Cairo to Montreal on February 21, 2025, including the total travel time and number of stops. on https://www.google.com/travel/flights/
45 | # """
46 | # TASK = """
47 | # Browse Coursera, which universities offer Master of Advanced Study in Engineering degrees? Tell me what is the latest application deadline for this degree? on https://www.coursera.org/"""
48 | TASK = """
49 | Find and book a hotel in Paris with suitable accommodations for a family of four (two adults and two children) offering free cancellation for the dates of February 14-21, 2025. on https://www.booking.com/
50 | """
51 | 
52 | 
53 | async def main():
54 | 	agent = Agent(
55 | 		task=TASK,
56 | 		llm=llm,
57 | 		browser_session=browser_session,
58 | 		validate_output=True,
59 | 		enable_memory=False,
60 | 	)
61 | 	history = await agent.run(max_steps=50)
62 | 	history.save_to_file('./tmp/history.json')
63 | 
64 | 
65 | if __name__ == '__main__':
66 | 	asyncio.run(main())
67 | 


--------------------------------------------------------------------------------
/examples/use-cases/wikipedia_banana_to_quantum.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import sys
 4 | 
 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 6 | 
 7 | from dotenv import load_dotenv
 8 | 
 9 | load_dotenv()
10 | 
11 | from browser_use import Agent
12 | from browser_use.browser import BrowserProfile, BrowserSession
13 | from browser_use.llm import ChatOpenAI
14 | 
15 | # video https://preview.screen.studio/share/vuq91Ej8
16 | llm = ChatOpenAI(
17 | 	model='gpt-4.1',
18 | 	temperature=0.0,
19 | )
20 | task = 'go to https://en.wikipedia.org/wiki/Banana and click on buttons on the wikipedia page to go as fast as possible from banna to Quantum mechanics'
21 | 
22 | browser_session = BrowserSession(
23 | 	browser_profile=BrowserProfile(
24 | 		viewport_expansion=-1,
25 | 		highlight_elements=False,
26 | 		user_data_dir='~/.config/browseruse/profiles/default',
27 | 	),
28 | )
29 | agent = Agent(task=task, llm=llm, browser_session=browser_session, use_vision=False)
30 | 
31 | 
32 | async def main():
33 | 	await agent.run()
34 | 
35 | 
36 | if __name__ == '__main__':
37 | 	asyncio.run(main())
38 | 


--------------------------------------------------------------------------------
/static/browser-use-dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browser-use/browser-use/9871be3363081dd88402dd7244f1997132929c4f/static/browser-use-dark.png


--------------------------------------------------------------------------------
/static/browser-use.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browser-use/browser-use/9871be3363081dd88402dd7244f1997132929c4f/static/browser-use.png


--------------------------------------------------------------------------------
/tests/agent_tasks/README.md:
--------------------------------------------------------------------------------
 1 | # Contributing Agent Tasks
 2 | 
 3 | Contribute your own agent tasks and we test if the agent solves them for CI testing!
 4 | 
 5 | ## How to Add a Task
 6 | 
 7 | 1. Create a new `.yaml` file in this directory (`tests/agent_tasks/`).
 8 | 2. Use the following format:
 9 | 
10 | ```yaml
11 | name: My Task Name
12 | task: Describe the task for the agent to perform
13 | judge_context:
14 |   - List criteria for success, one per line
15 | max_steps: 10
16 | ```
17 | 
18 | ## Guidelines
19 | - Be specific in your task and criteria.
20 | - The `judge_context` should list what counts as a successful result.
21 | - The agent's output will be judged by an LLM using these criteria.
22 | 
23 | ## Running the Tests
24 | 
25 | To run all agent tasks:
26 | 
27 | ```bash
28 | pytest tests/ci/test_agent_real_tasks.py
29 | ```
30 | 
31 | ---
32 | 
33 | Happy contributing! 
34 | 


--------------------------------------------------------------------------------
/tests/agent_tasks/amazon_laptop.yaml:
--------------------------------------------------------------------------------
1 | name: Amazon Laptop Search
2 | task: Go to amazon.com, search for 'laptop', and return the first result
3 | judge_context:
4 |   - The agent must navigate to amazon.com
5 |   - The agent must search for 'laptop'
6 |   - The agent must return name of the first laptop 
7 | max_steps: 10
8 | 


--------------------------------------------------------------------------------
/tests/agent_tasks/browser_use_pip.yaml:
--------------------------------------------------------------------------------
1 | name: Find pip install command for browser-use
2 | task: Find the pip installation command for the browser-use repo
3 | judge_context:
4 |   - The output must include the command ('pip install browser-use') 
5 | max_steps: 10
6 | 


--------------------------------------------------------------------------------
/tests/agent_tasks/captcha_cloudflare.yaml:
--------------------------------------------------------------------------------
1 | name: Cloudflare captcha
2 | task: Go to https://2captcha.com/demo/cloudflare-turnstile and solve the captcha, wait a few seconds, then click on check, wait a few more seconds for it to complete, then extract the "hostname" value from the displayed dictionary under "Captcha is passed successfully!"
3 | judge_context:
4 |   - The agent must solve the captcha
5 |   - The hostname returned should be "example.com"
6 | max_steps: 6
7 | 


--------------------------------------------------------------------------------
/tests/ci/test_browser_session_via_cdp.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from browser_use.browser import BrowserSession
 4 | from browser_use.browser.profile import BrowserProfile
 5 | from browser_use.browser.types import async_playwright
 6 | 
 7 | 
 8 | async def test_connection_via_cdp():
 9 | 	browser_session = BrowserSession(
10 | 		cdp_url='http://localhost:9898',
11 | 		browser_profile=BrowserProfile(
12 | 			headless=True,
13 | 			keep_alive=True,
14 | 		),
15 | 	)
16 | 	with pytest.raises(Exception) as e:
17 | 		await browser_session.start()
18 | 
19 | 	# Assert on the exception value outside the context manager
20 | 	assert 'ECONNREFUSED' in str(e.value)
21 | 
22 | 	playwright = await async_playwright().start()
23 | 	browser = await playwright.chromium.launch(args=['--remote-debugging-port=9898'])
24 | 
25 | 	async with await browser_session.start():
26 | 		await browser_session.create_new_tab()
27 | 
28 | 		assert (await browser_session.get_current_page()).url == 'about:blank'
29 | 
30 | 		await browser.close()
31 | 
32 | 	await browser_session.kill()
33 | 	await playwright.stop()
34 | 


--------------------------------------------------------------------------------
/tests/old/httpx_client_test.py:
--------------------------------------------------------------------------------
 1 | import httpx
 2 | 
 3 | from browser_use.browser import BrowserProfile, BrowserSession
 4 | 
 5 | 
 6 | async def test_browser_close_doesnt_affect_external_httpx_clients():
 7 | 	"""
 8 | 	Test that Browser.close() doesn't close HTTPX clients created outside the Browser instance.
 9 | 	This test demonstrates the issue where Browser.close() is closing all HTTPX clients.
10 | 	"""
11 | 	# Create an external HTTPX client that should remain open
12 | 	external_client = httpx.AsyncClient()
13 | 
14 | 	# Create a BrowserSession instance
15 | 	browser_session = BrowserSession(browser_profile=BrowserProfile(headless=True))
16 | 	await browser_session.start()
17 | 
18 | 	# Close the browser (which should trigger cleanup_httpx_clients)
19 | 	await browser_session.stop()
20 | 
21 | 	# Check if the external client is still usable
22 | 	try:
23 | 		# If the client is closed, this will raise RuntimeError
24 | 		# Using a simple HEAD request to a reliable URL
25 | 		await external_client.head('https://www.example.com', timeout=2.0)
26 | 		client_is_closed = False
27 | 	except RuntimeError as e:
28 | 		# If we get "Cannot send a request, as the client has been closed"
29 | 		client_is_closed = 'client has been closed' in str(e)
30 | 	except Exception:
31 | 		# Any other exception means the client is not closed but request failed
32 | 		client_is_closed = False
33 | 	finally:
34 | 		# Always clean up our test client properly
35 | 		await external_client.aclose()
36 | 
37 | 	# Our external client should not be closed by browser.close()
38 | 	assert not client_is_closed, 'External HTTPX client was incorrectly closed by Browser.close()'
39 | 


--------------------------------------------------------------------------------
/tests/old/screenshot_test.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import base64
 3 | 
 4 | import pytest
 5 | 
 6 | from browser_use.browser import BrowserProfile, BrowserSession
 7 | 
 8 | 
 9 | async def test_take_full_page_screenshot():
10 | 	browser_session = BrowserSession(browser_profile=BrowserProfile(headless=True, disable_security=True))
11 | 	await browser_session.start()
12 | 	try:
13 | 		page = await browser_session.get_current_page()
14 | 		# Go to a test page
15 | 		await page.goto('https://example.com')
16 | 
17 | 		await asyncio.sleep(3)
18 | 		# Take full page screenshot
19 | 		screenshot_b64 = await browser_session.take_screenshot(full_page=True)
20 | 		await asyncio.sleep(3)
21 | 		# Verify screenshot is not empty and is valid base64
22 | 		assert screenshot_b64 is not None
23 | 		assert isinstance(screenshot_b64, str)
24 | 		assert len(screenshot_b64) > 0
25 | 
26 | 		# Test we can decode the base64 string
27 | 		try:
28 | 			base64.b64decode(screenshot_b64)
29 | 		except Exception as e:
30 | 			pytest.fail(f'Failed to decode base64 screenshot: {str(e)}')
31 | 	finally:
32 | 		await browser_session.stop()
33 | 
34 | 
35 | if __name__ == '__main__':
36 | 	asyncio.run(test_take_full_page_screenshot())
37 | 


--------------------------------------------------------------------------------
/tests/old/test_dropdown.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Test dropdown interaction functionality.
 3 | """
 4 | 
 5 | import pytest
 6 | 
 7 | from browser_use.agent.service import Agent
 8 | from browser_use.agent.views import AgentHistoryList
 9 | 
10 | 
11 | async def test_dropdown(llm, browser_session):
12 | 	"""Test selecting an option from a dropdown menu."""
13 | 	agent = Agent(
14 | 		task=(
15 | 			'go to https://codepen.io/geheimschriftstift/pen/mPLvQz and first get all options for the dropdown and then select the 5th option'
16 | 		),
17 | 		llm=llm,
18 | 		browser_session=browser_session,
19 | 	)
20 | 
21 | 	try:
22 | 		history: AgentHistoryList = await agent.run(20)
23 | 		result = history.final_result()
24 | 
25 | 		# Verify dropdown interaction
26 | 		assert result is not None
27 | 		assert 'Duck' in result, "Expected 5th option 'Duck' to be selected"
28 | 
29 | 		# Verify dropdown state
30 | 		page = await browser_session.get_current_page()
31 | 		element = await page.query_selector('select')
32 | 		assert element is not None, 'Dropdown element should exist'
33 | 
34 | 		value = await element.evaluate('el => el.value')
35 | 		assert value == '5', 'Dropdown should have 5th option selected'
36 | 
37 | 	except Exception as e:
38 | 		pytest.fail(f'Dropdown test failed: {str(e)}')
39 | 


--------------------------------------------------------------------------------
/tests/old/test_dropdown_complex.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Test complex dropdown interaction functionality.
 3 | """
 4 | 
 5 | import pytest
 6 | 
 7 | from browser_use.agent.service import Agent
 8 | from browser_use.agent.views import AgentHistoryList
 9 | 
10 | 
11 | async def test_dropdown_complex(llm, browser_session):
12 | 	"""Test selecting an option from a complex dropdown menu."""
13 | 	agent = Agent(
14 | 		task=(
15 | 			'go to https://codepen.io/shyam-king/pen/pvzpByJ and first get all options for the dropdown and then select the json option'
16 | 		),
17 | 		llm=llm,
18 | 		browser_session=browser_session,
19 | 	)
20 | 
21 | 	try:
22 | 		history: AgentHistoryList = await agent.run(20)
23 | 		result = history.final_result()
24 | 
25 | 		# Verify dropdown interaction
26 | 		assert result is not None
27 | 		assert 'json' in result.lower(), "Expected 'json' option to be selected"
28 | 
29 | 		# Verify dropdown state
30 | 		page = await browser_session.get_current_page()
31 | 		element = await page.query_selector('.select-selected')
32 | 		assert element is not None, 'Custom dropdown element should exist'
33 | 
34 | 		text = await element.text_content()
35 | 		assert 'json' in text.lower(), 'Dropdown should display json option'
36 | 
37 | 		# Verify the selected option's effect
38 | 		code_element = await page.query_selector('pre code')
39 | 		assert code_element is not None, 'Code element should be visible when JSON is selected'
40 | 
41 | 	except Exception as e:
42 | 		pytest.fail(f'Complex dropdown test failed: {str(e)}')
43 | 


--------------------------------------------------------------------------------
/tests/old/test_dropdown_error.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Simple try of the agent.
 3 | 
 4 | @dev You need to add OPENAI_API_KEY to your environment variables.
 5 | """
 6 | 
 7 | import os
 8 | import sys
 9 | 
10 | from browser_use.browser import BrowserProfile, BrowserSession
11 | 
12 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
13 | 
14 | from browser_use import Agent, AgentHistoryList
15 | from browser_use.llm import ChatOpenAI
16 | 
17 | llm = ChatOpenAI(model='gpt-4.1')
18 | browser_session = BrowserSession(browser_profile=BrowserProfile(headless=True))
19 | 
20 | agent = Agent(
21 | 	task=('go to https://codepen.io/shyam-king/pen/emOyjKm and select number "4" and return the output of "selected value"'),
22 | 	llm=llm,
23 | 	browser_session=browser_session,
24 | )
25 | 
26 | 
27 | async def test_dropdown():
28 | 	await browser_session.start()
29 | 	try:
30 | 		history: AgentHistoryList = await agent.run(20)
31 | 
32 | 		result = history.final_result()
33 | 		assert result is not None
34 | 		assert '4' in result
35 | 		print(result)
36 | 	finally:
37 | 		await browser_session.stop()
38 | 


--------------------------------------------------------------------------------
/tests/old/test_full_screen.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | from browser_use.browser.types import async_playwright
 4 | 
 5 | 
 6 | async def test_full_screen(start_fullscreen: bool, maximize: bool):
 7 | 	async with async_playwright() as p:
 8 | 		browser = await p.chromium.launch(
 9 | 			headless=False,
10 | 			args=['--start-maximized'],
11 | 		)
12 | 		context = await browser.new_context(no_viewport=True, viewport=None)
13 | 		page = await context.new_page()
14 | 		await page.goto('https://google.com')
15 | 
16 | 		await asyncio.sleep(10)
17 | 		await browser.close()
18 | 
19 | 
20 | if __name__ == '__main__':
21 | 	asyncio.run(test_full_screen(False, False))
22 | 


--------------------------------------------------------------------------------
/tests/old/test_gif_path.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Simple try of the agent.
 3 | 
 4 | @dev You need to add OPENAI_API_KEY to your environment variables.
 5 | """
 6 | 
 7 | import os
 8 | import sys
 9 | 
10 | from browser_use.browser import BrowserProfile, BrowserSession
11 | 
12 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
13 | 
14 | from browser_use import Agent, AgentHistoryList
15 | from browser_use.llm import ChatOpenAI
16 | 
17 | llm = ChatOpenAI(model='gpt-4.1')
18 | 
19 | browser_session = BrowserSession(browser_profile=BrowserProfile(headless=True, disable_security=True))
20 | 
21 | agent = Agent(
22 | 	task=('go to google.com and search for text "hi there"'),
23 | 	llm=llm,
24 | 	browser_session=browser_session,
25 | 	generate_gif='./google.gif',
26 | )
27 | 
28 | 
29 | async def test_gif_path():
30 | 	if os.path.exists('./google.gif'):
31 | 		os.unlink('./google.gif')
32 | 
33 | 	await browser_session.start()
34 | 	try:
35 | 		history: AgentHistoryList = await agent.run(20)
36 | 
37 | 		result = history.final_result()
38 | 		assert result is not None
39 | 
40 | 		assert os.path.exists('./google.gif'), 'google.gif was not created'
41 | 	finally:
42 | 		await browser_session.stop()
43 | 


--------------------------------------------------------------------------------
/tests/old/test_react_dropdown.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Simple try of the agent.
 3 | 
 4 | @dev You need to add OPENAI_API_KEY to your environment variables.
 5 | """
 6 | 
 7 | import os
 8 | import sys
 9 | 
10 | from browser_use.browser import BrowserProfile, BrowserSession
11 | 
12 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
13 | import asyncio
14 | 
15 | from browser_use import Agent, AgentHistoryList
16 | from browser_use.llm import ChatOpenAI
17 | 
18 | llm = ChatOpenAI(model='gpt-4.1')
19 | 
20 | browser_session = BrowserSession(browser_profile=BrowserProfile(headless=True, disable_security=True))
21 | 
22 | agent = Agent(
23 | 	task=(
24 | 		'go to https://codepen.io/shyam-king/pen/ByBJoOv and select "Tiger" dropdown and read the text given in "Selected Animal" box (it can be empty as well)'
25 | 	),
26 | 	llm=llm,
27 | 	browser_session=browser_session,
28 | )
29 | 
30 | 
31 | async def test_dropdown():
32 | 	await browser_session.start()
33 | 	try:
34 | 		history: AgentHistoryList = await agent.run(10)
35 | 
36 | 		result = history.final_result()
37 | 		assert result is not None
38 | 		print('result: ', result)
39 | 	finally:
40 | 		await browser_session.stop()
41 | 
42 | 
43 | if __name__ == '__main__':
44 | 	asyncio.run(test_dropdown())
45 | 


--------------------------------------------------------------------------------
/tests/old/test_vision.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Simple try of the agent.
 3 | 
 4 | @dev You need to add OPENAI_API_KEY to your environment variables.
 5 | """
 6 | 
 7 | import os
 8 | import sys
 9 | from pprint import pprint
10 | 
11 | import pytest
12 | 
13 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
14 | 
15 | 
16 | from browser_use import Agent, AgentHistoryList, BrowserSession, Controller
17 | from browser_use.llm import ChatOpenAI
18 | 
19 | llm = ChatOpenAI(model='gpt-4.1')
20 | controller = Controller()
21 | 
22 | # use this test to ask the model questions about the page like
23 | # which color do you see for bbox labels, list all with their label
24 | # what's the smallest bboxes with labels and
25 | 
26 | 
27 | @controller.registry.action(description='explain what you see on the screen and ask user for input')
28 | async def explain_screen(text: str) -> str:
29 | 	pprint(text)
30 | 	answer = input('\nuser input next question: \n')
31 | 	return answer
32 | 
33 | 
34 | @controller.registry.action(description='done')
35 | async def done(text: str) -> str:
36 | 	# pprint(text)
37 | 	return 'call explain_screen'
38 | 
39 | 
40 | @pytest.mark.skip(reason='this is for local testing only')
41 | async def test_vision():
42 | 	from browser_use.browser.profile import BrowserProfile
43 | 
44 | 	profile = BrowserProfile(headless=True, user_data_dir=None)
45 | 	browser_session = BrowserSession(browser_profile=profile)
46 | 	await browser_session.start()
47 | 	try:
48 | 		agent = Agent(
49 | 			task='call explain_screen all the time the user asks you questions e.g. about the page like bbox which you see are labels  - your task is to explain it and get the next question',
50 | 			llm=llm,
51 | 			controller=controller,
52 | 			browser_session=browser_session,
53 | 		)
54 | 		history: AgentHistoryList = await agent.run(20)
55 | 	finally:
56 | 		# Make sure to close the browser
57 | 		await browser_session.stop()
58 | 


--------------------------------------------------------------------------------
/tests/old/test_wait_for_element.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import sys
 4 | 
 5 | from browser_use.llm.openai.chat import ChatOpenAI
 6 | 
 7 | project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
 8 | if project_root not in sys.path:
 9 | 	sys.path.insert(0, project_root)
10 | 
11 | import pytest
12 | from dotenv import load_dotenv
13 | 
14 | # Third-party imports
15 | from browser_use import Agent, Controller
16 | 
17 | # Local imports
18 | from browser_use.browser import BrowserProfile, BrowserSession
19 | 
20 | # Load environment variables.
21 | load_dotenv()
22 | 
23 | # Initialize language model and controller.
24 | llm = ChatOpenAI(model='gpt-4.1')
25 | controller = Controller()
26 | 
27 | 
28 | @pytest.mark.skip(reason='this is for local testing only')
29 | async def test_wait_for_element():
30 | 	"""Test 'Wait for element' action."""
31 | 
32 | 	initial_actions = [
33 | 		{'go_to_url': {'url': 'https://pypi.org/', 'new_tab': True}},
34 | 		# Uncomment the line below to include the wait action in initial actions.
35 | 		# {'wait_for_element': {'selector': '#search', 'timeout': 30}},
36 | 	]
37 | 
38 | 	# Set up the browser session.
39 | 	browser_session = BrowserSession(browser_profile=BrowserProfile(headless=True, disable_security=True))
40 | 	await browser_session.start()
41 | 
42 | 	try:
43 | 		# Create the agent with the task.
44 | 		agent = Agent(
45 | 			task="Wait for element '#search' to be visible with a timeout of 30 seconds.",
46 | 			llm=llm,
47 | 			browser_session=browser_session,
48 | 			initial_actions=initial_actions,
49 | 			controller=controller,
50 | 		)
51 | 
52 | 		# Run the agent for a few steps to trigger navigation and then the wait action.
53 | 		history = await agent.run(max_steps=3)
54 | 		action_names = history.action_names()
55 | 
56 | 		# Ensure that the wait_for_element action was executed.
57 | 		assert 'wait_for_element' in action_names, 'Expected wait_for_element action to be executed.'
58 | 
59 | 		# Verify that the #search element is visible by querying the page.
60 | 		page = await browser_session.get_current_page()
61 | 		header_handle = await page.query_selector('#search')
62 | 		assert header_handle is not None, 'Expected to find a #search element on the page.'
63 | 		is_visible = await header_handle.is_visible()
64 | 		assert is_visible, 'Expected the #search element to be visible.'
65 | 	finally:
66 | 		await browser_session.stop()
67 | 
68 | 
69 | if __name__ == '__main__':
70 | 	asyncio.run(test_wait_for_element())
71 | 


--------------------------------------------------------------------------------