├── .cursor └── rules │ └── browser-use-rules.mdc ├── .dockerignore ├── .env.example ├── .gitattributes ├── .github ├── .git-blame-ignore-revs ├── CONTRIBUTING.md ├── ISSUE_TEMPLATE │ ├── 1_element_detection_bug.yml │ ├── 2_bug_report.yml │ ├── 3_feature_request.yml │ ├── 4_docs_issue.yml │ └── config.yml ├── SECURITY.md └── workflows │ ├── build-base-image.yml.disabled │ ├── claude.yml │ ├── cloud_evals.yml │ ├── docker.yml │ ├── lint.yml │ ├── package.yaml │ ├── publish.yml │ └── test.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── .python-version ├── CLAUDE.md ├── Dockerfile ├── Dockerfile.fast ├── LICENSE ├── README.md ├── bin ├── lint.sh ├── setup.sh └── test.sh ├── browser_use ├── README.md ├── __init__.py ├── agent │ ├── cloud_events.py │ ├── gif.py │ ├── message_manager │ │ ├── service.py │ │ ├── utils.py │ │ └── views.py │ ├── prompts.py │ ├── service.py │ ├── system_prompt.md │ ├── system_prompt_flash.md │ ├── system_prompt_no_thinking.md │ └── views.py ├── browser │ ├── __init__.py │ ├── browser.py │ ├── context.py │ ├── extensions.py │ ├── profile.py │ ├── session.py │ ├── types.py │ ├── utils.py │ └── views.py ├── cli.py ├── config.py ├── controller │ ├── registry │ │ ├── service.py │ │ └── views.py │ ├── service.py │ └── views.py ├── dom │ ├── __init__.py │ ├── clickable_element_processor │ │ └── service.py │ ├── dom_tree │ │ └── index.js │ ├── history_tree_processor │ │ ├── service.py │ │ └── view.py │ ├── playground │ │ ├── extraction.py │ │ ├── process_dom.py │ │ └── test_accessibility.py │ ├── service.py │ ├── utils.py │ └── views.py ├── exceptions.py ├── filesystem │ ├── __init__.py │ └── file_system.py ├── integrations │ └── gmail │ │ ├── __init__.py │ │ ├── actions.py │ │ └── service.py ├── llm │ ├── README.md │ ├── __init__.py │ ├── anthropic │ │ ├── chat.py │ │ └── serializer.py │ ├── aws │ │ ├── __init__.py │ │ ├── chat_anthropic.py │ │ ├── chat_bedrock.py │ │ └── serializer.py │ ├── azure │ │ └── chat.py │ ├── base.py │ ├── deepseek │ │ ├── chat.py │ │ └── serializer.py │ ├── exceptions.py │ ├── google │ │ ├── __init__.py │ │ ├── chat.py │ │ └── serializer.py │ ├── groq │ │ ├── chat.py │ │ ├── parser.py │ │ └── serializer.py │ ├── messages.py │ ├── ollama │ │ ├── chat.py │ │ └── serializer.py │ ├── openai │ │ ├── chat.py │ │ ├── like.py │ │ └── serializer.py │ ├── openrouter │ │ ├── chat.py │ │ └── serializer.py │ ├── schema.py │ ├── tests │ │ ├── test_anthropic_cache.py │ │ ├── test_chat_models.py │ │ ├── test_gemini_image.py │ │ ├── test_groq_loop.py │ │ └── test_single_step.py │ └── views.py ├── logging_config.py ├── mcp │ ├── .dxtignore │ ├── __init__.py │ ├── __main__.py │ ├── client.py │ ├── controller.py │ ├── manifest.json │ └── server.py ├── observability.py ├── py.typed ├── screenshots │ ├── __init__.py │ └── service.py ├── sync │ ├── __init__.py │ ├── auth.py │ └── service.py ├── telemetry │ ├── __init__.py │ ├── service.py │ └── views.py ├── tokens │ ├── __init__.py │ ├── service.py │ ├── tests │ │ └── test_cost.py │ └── views.py └── utils.py ├── docker ├── README.md ├── base-images │ ├── chromium │ │ └── Dockerfile │ ├── python-deps │ │ └── Dockerfile │ └── system │ │ └── Dockerfile └── build-base-images.sh ├── docs ├── README.md ├── api-reference │ ├── check-balance.mdx │ ├── create-browser-profile.mdx │ ├── create-scheduled-task.mdx │ ├── delete-browser-profile.mdx │ ├── delete-scheduled-task.mdx │ ├── get-browser-profile.mdx │ ├── get-scheduled-task.mdx │ ├── get-task-media.mdx │ ├── get-task-output-file.mdx │ ├── get-task-screenshots.mdx │ ├── get-task-status.mdx │ ├── get-task.mdx │ ├── index.mdx │ ├── list-browser-profiles.mdx │ ├── list-scheduled-tasks.mdx │ ├── list-tasks.mdx │ ├── pause-task.mdx │ ├── ping.mdx │ ├── resume-task.mdx │ ├── run-task.mdx │ ├── search-url.mdx │ ├── simple-search.mdx │ ├── stop-task.mdx │ ├── update-browser-profile.mdx │ ├── update-scheduled-task.mdx │ ├── upload-file-presigned-url.mdx │ └── user.mdx ├── cli.mdx ├── cloud │ ├── authentication.mdx │ ├── custom-sdk.mdx │ ├── implementation.mdx │ ├── n8n-browser-use-integration.mdx │ ├── quickstart.mdx │ ├── search.mdx │ └── webhooks.mdx ├── customize │ ├── agent-settings.mdx │ ├── browser-settings.mdx │ ├── custom-functions.mdx │ ├── hooks.mdx │ ├── mcp-client.mdx │ ├── mcp-server.mdx │ ├── output-format.mdx │ ├── real-browser.mdx │ ├── sensitive-data.mdx │ ├── supported-models.mdx │ └── system-prompt.mdx ├── development.mdx ├── development │ ├── contribution-guide.mdx │ ├── evaluations.mdx │ ├── local-setup.mdx │ ├── n8n-integration.mdx │ ├── observability.mdx │ ├── roadmap.mdx │ └── telemetry.mdx ├── docs.json ├── favicon.ico ├── favicon.svg ├── images │ ├── browser-use.png │ ├── checks-passed.png │ └── laminar.png ├── introduction.mdx ├── logo │ ├── dark.svg │ └── light.svg └── quickstart.mdx ├── examples ├── __init__.py ├── browser │ ├── multiple_agents_same_browser.py │ ├── real_browser.py │ ├── stealth.py │ ├── using_cdp.py │ └── window_sizing.py ├── custom-functions │ ├── 2fa.py │ ├── action_filters.py │ ├── advanced_search.py │ ├── clipboard.py │ ├── cua.py │ ├── custom_hooks_before_after_step.py │ ├── drag_and_drop.py │ ├── extract_pdf_content.py │ ├── file_upload.py │ ├── hover_element.py │ ├── notification.py │ ├── onepassword_2fa.py │ ├── perplexity_search.py │ ├── save_pdf.py │ ├── save_to_file_hugging_face.py │ └── solve_amazon_captcha.py ├── features │ ├── click_fallback_options.py │ ├── cross_origin_iframes.py │ ├── custom_output.py │ ├── custom_system_prompt.py │ ├── custom_user_agent.py │ ├── download_file.py │ ├── drag_drop.py │ ├── follow_up_tasks.py │ ├── initial_actions.py │ ├── multi-tab_handling.py │ ├── multiple_tasks.py │ ├── outsource_state.py │ ├── parallel_agents.py │ ├── pause_agent.py │ ├── planner.py │ ├── restrict_urls.py │ ├── result_processing.py │ ├── save_trace.py │ ├── sensitive_data.py │ ├── small_model_for_extraction.py │ └── validate_output.py ├── file_system │ ├── alphabet_earnings.py │ ├── excel_sheet.py │ └── file_system.py ├── getting_started │ ├── 01_basic_search.py │ ├── 02_form_filling.py │ ├── 03_data_extraction.py │ └── 04_multi_step_task.py ├── integrations │ ├── browserbase_stagehand.py │ ├── discord │ │ ├── discord_api.py │ │ └── discord_example.py │ ├── gmail_2fa_integration.py │ └── slack │ │ ├── README.md │ │ ├── slack_api.py │ │ └── slack_example.py ├── mcp │ ├── advanced_client.py │ ├── advanced_server.py │ ├── simple_client.py │ └── simple_server.py ├── models │ ├── README.md │ ├── aws.py │ ├── azure_openai.py │ ├── claude-4-sonnet.py │ ├── deepseek-chat.py │ ├── gemini.py │ ├── gpt-4.1.py │ ├── langchain │ │ ├── README.md │ │ ├── __init__.py │ │ ├── chat.py │ │ ├── example.py │ │ └── serializer.py │ ├── llama4-groq.py │ ├── novita.py │ └── openrouter.py ├── search │ ├── search_url.py │ └── simple_search.py ├── simple.py ├── ui │ ├── README.md │ ├── command_line.py │ ├── gradio_demo.py │ └── streamlit_demo.py └── use-cases │ ├── README.md │ ├── captcha.py │ ├── check_appointment.py │ ├── find_and_apply_to_jobs.py │ ├── find_influencer_profiles.py │ ├── google_sheets.py │ ├── online_coding_agent.py │ ├── play_chess.py │ ├── post-twitter.py │ ├── scrolling_page.py │ ├── shopping.py │ ├── test_cv.txt │ ├── twitter_cookies.txt │ ├── twitter_post_using_cookies.py │ ├── web_voyager_agent.py │ └── wikipedia_banana_to_quantum.py ├── pyproject.toml ├── static ├── browser-use-dark.png └── browser-use.png └── tests ├── agent_tasks ├── README.md ├── amazon_laptop.yaml ├── browser_use_pip.yaml └── captcha_cloudflare.yaml ├── ci ├── conftest.py ├── evaluate_tasks.py ├── test_action_parameter_injection.py ├── test_agent_multiprocessing.py ├── test_agent_sensitive_data.py ├── test_anthropic_502_error.py ├── test_aria_menu_dropdown.py ├── test_browser_session_allowed_domains.py ├── test_browser_session_cookies.py ├── test_browser_session_crashed_page_recovery.py ├── test_browser_session_downloads.py ├── test_browser_session_element_cache.py ├── test_browser_session_file_uploads.py ├── test_browser_session_output_paths.py ├── test_browser_session_ownership.py ├── test_browser_session_reuse.py ├── test_browser_session_screenshots.py ├── test_browser_session_start.py ├── test_browser_session_tab_management.py ├── test_browser_session_via_cdp.py ├── test_browser_session_viewport_and_proxy.py ├── test_config.py ├── test_config_new.py ├── test_controller.py ├── test_custom_structured_ouput.py ├── test_dom_service_chrome_urls.py ├── test_filesystem.py ├── test_fill_fallback.py ├── test_gemini_type_field_fix.py ├── test_gif_filtering.py ├── test_gif_generation_with_navigation.py ├── test_mcp_client.py ├── test_mcp_server.py ├── test_registry.py ├── test_schema_optimizer.py ├── test_semaphores.py ├── test_sequential_agents_simple.py ├── test_sync_agent_events.py ├── test_sync_client.py ├── test_sync_client_auth.py └── test_telemetry.py ├── mind2web_data └── processed.json └── old ├── httpx_client_test.py ├── screenshot_test.py ├── sync_live.py ├── test_action_filters.py ├── test_agent_actions.py ├── test_clicks.py ├── test_core_functionality.py ├── test_cross_origin_iframe_unified_tree.py ├── test_dropdown.py ├── test_dropdown_complex.py ├── test_dropdown_error.py ├── test_full_screen.py ├── test_gif_path.py ├── test_google_sheets_real.py ├── test_mind2web.py ├── test_react_dropdown.py ├── test_self_registered_actions.py ├── test_tool_calling_methods.py ├── test_vision.py └── test_wait_for_element.py /.dockerignore: -------------------------------------------------------------------------------- 1 | docs/ 2 | static/ 3 | .claude/ 4 | .github/ 5 | 6 | # Cache files 7 | .DS_Store 8 | __pycache__/ 9 | *.py[cod] 10 | *$py.class 11 | .mypy_cache/ 12 | .ruff_cache/ 13 | .pytest_cache/ 14 | .ipynb_checkpoints 15 | 16 | # Virtual Environments 17 | .venv 18 | venv/ 19 | 20 | # Editor cruft 21 | .vscode/ 22 | .idea/ 23 | 24 | # Build Files 25 | dist/ 26 | 27 | # Data files 28 | *.gif 29 | *.txt 30 | *.pdf 31 | *.csv 32 | *.json 33 | *.jsonl 34 | *.bak 35 | 36 | # Secrets and sensitive files 37 | secrets.env 38 | .env 39 | browser_cookies.json 40 | cookies.json 41 | gcp-login.json 42 | saved_trajectories/ 43 | AgentHistory.json 44 | AgentHistoryList.json 45 | private_example.py 46 | private_example 47 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY= 2 | ANTHROPIC_API_KEY= 3 | AZURE_OPENAI_ENDPOINT= 4 | AZURE_OPENAI_API_KEY= 5 | GOOGLE_API_KEY= 6 | DEEPSEEK_API_KEY= 7 | GROK_API_KEY= 8 | NOVITA_API_KEY= 9 | 10 | # Set to false to disable anonymized telemetry 11 | ANONYMIZED_TELEMETRY=true 12 | 13 | # LogLevel: Set to debug to enable verbose logging, set to result to get results only. Available: result | debug | info 14 | BROWSER_USE_LOGGING_LEVEL=info 15 | 16 | # Calculate costs: (beta) Add cost calculations to tokens. Available: true | false 17 | BROWSER_USE_CALCULATE_COST=false 18 | 19 | # set this to true to optimize browser-use's chrome for running inside docker 20 | IN_DOCKER=false 21 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | static/*.gif filter=lfs diff=lfs merge=lfs -text 2 | # static/*.mp4 filter=lfs diff=lfs merge=lfs -text 3 | -------------------------------------------------------------------------------- /.github/.git-blame-ignore-revs: -------------------------------------------------------------------------------- 1 | 66b3c26df51adec32d42c3b2c0304e0662457298 2 | 2be4ba4f7078d47bbeed04baf6f8fb04017df028 3 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to browser-use 2 | 3 | We love contributions! Please read through these links to get started: 4 | 5 | - 🔢 [Contribution Guidelines](https://docs.browser-use.com/development/contribution-guide) 6 | - 👾 [Local Development Setup Guide](https://docs.browser-use.com/development/local-setup) 7 | - 🏷️ [Issues Tagged: `#help-wanted`](https://github.com/browser-use/browser-use/issues?q=is%3Aissue%20state%3Aopen%20label%3A%22help%20wanted%22) 8 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/4_docs_issue.yml: -------------------------------------------------------------------------------- 1 | name: 📚 Documentation Issue 2 | description: Report an issue in the browser-use documentation 3 | labels: ["documentation"] 4 | title: "Documentation: ..." 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | Thanks for taking the time to improve our documentation! Please fill out the form below to help us fix the issue quickly. 10 | 11 | - type: dropdown 12 | id: type 13 | attributes: 14 | label: Type of Documentation Issue 15 | description: What type of documentation issue is this? 16 | options: 17 | - Missing documentation 18 | - Incorrect documentation 19 | - Unclear documentation 20 | - Broken link 21 | - Other (specify in description) 22 | validations: 23 | required: true 24 | 25 | - type: input 26 | id: page 27 | attributes: 28 | label: Documentation Page 29 | description: Which page or section of the documentation is this about? 30 | placeholder: "e.g. https://docs.browser-use.com/customize/browser-settings > Context Configuration > headless" 31 | validations: 32 | required: true 33 | 34 | - type: textarea 35 | id: description 36 | attributes: 37 | label: Issue Description 38 | description: "Describe what's wrong or missing in the documentation" 39 | placeholder: e.g. Docs should clarify whether BrowserSession(no_viewport=False) is supported when running in BrowserSession(headless=False) mode... 40 | validations: 41 | required: true 42 | 43 | - type: textarea 44 | id: suggestion 45 | attributes: 46 | label: Suggested Changes 47 | description: If you have specific suggestions for how to improve the documentation, please share them 48 | placeholder: | 49 | e.g. The documentation could be improved by adding one more line here: 50 | ```diff 51 | Use `BrowserSession(headless=False)` to open the browser window (aka headful mode). 52 | + Viewports are not supported when headful, if `headless=False` it will force `no_viewport=True`. 53 | ``` 54 | validations: 55 | required: false 56 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false # Set to true if you want to allow blank issues 2 | contact_links: 3 | - name: 🔢 Quickstart Guide 4 | url: https://docs.browser-use.com/quickstart 5 | about: Most common issues can be resolved by following our quickstart guide 6 | - name: 💬 Questions and Help 7 | url: https://link.browser-use.com/discord 8 | about: Please ask questions in our Discord community 9 | - name: 📖 Documentation 10 | url: https://docs.browser-use.com 11 | about: Check our documentation for answers first 12 | -------------------------------------------------------------------------------- /.github/SECURITY.md: -------------------------------------------------------------------------------- 1 | ## Reporting Security Issues 2 | 3 | If you believe you have found a security vulnerability in browser-use, please report it through coordinated disclosure. 4 | 5 | **Please do not report security vulnerabilities through the repository issues, discussions, or pull requests.** 6 | 7 | Instead, please open a new [Github security advisory](https://github.com/browser-use/browser-use/security/advisories/new). 8 | 9 | Please include as much of the information listed below as you can to help me better understand and resolve the issue: 10 | 11 | * The type of issue (e.g., buffer overflow, SQL injection, or cross-site scripting) 12 | * Full paths of source file(s) related to the manifestation of the issue 13 | * The location of the affected source code (tag/branch/commit or direct URL) 14 | * Any special configuration required to reproduce the issue 15 | * Step-by-step instructions to reproduce the issue 16 | * Proof-of-concept or exploit code (if possible) 17 | * Impact of the issue, including how an attacker might exploit the issue 18 | 19 | This information will help me triage your report more quickly. 20 | -------------------------------------------------------------------------------- /.github/workflows/build-base-image.yml.disabled: -------------------------------------------------------------------------------- 1 | name: Build Base Image 2 | 3 | on: 4 | schedule: 5 | - cron: '0 2 * * 1' # Weekly on Monday 6 | workflow_dispatch: 7 | push: 8 | paths: 9 | - 'Dockerfile.base' 10 | 11 | jobs: 12 | build-base: 13 | runs-on: ubuntu-latest 14 | strategy: 15 | matrix: 16 | platform: [linux/amd64, linux/arm64] 17 | steps: 18 | - uses: actions/checkout@v4 19 | 20 | - name: Set up QEMU 21 | uses: docker/setup-qemu-action@v3 22 | 23 | - name: Set up Docker Buildx 24 | uses: docker/setup-buildx-action@v3 25 | 26 | - name: Login to Docker Hub 27 | uses: docker/login-action@v3 28 | with: 29 | username: ${{ secrets.DOCKER_USERNAME }} 30 | password: ${{ secrets.DOCKER_PASSWORD }} 31 | 32 | - name: Build and push base image 33 | uses: docker/build-push-action@v5 34 | with: 35 | context: . 36 | file: ./Dockerfile.base 37 | platforms: ${{ matrix.platform }} 38 | push: true 39 | tags: | 40 | browseruse/browseruse-base:chromium-138-${{ matrix.platform == 'linux/amd64' && 'amd64' || 'arm64' }} 41 | browseruse/browseruse-base:latest-${{ matrix.platform == 'linux/amd64' && 'amd64' || 'arm64' }} 42 | cache-from: type=registry,ref=browseruse/browseruse-base:buildcache-${{ matrix.platform == 'linux/amd64' && 'amd64' || 'arm64' }} 43 | cache-to: type=registry,ref=browseruse/browseruse-base:buildcache-${{ matrix.platform == 'linux/amd64' && 'amd64' || 'arm64' }},mode=max 44 | -------------------------------------------------------------------------------- /.github/workflows/cloud_evals.yml: -------------------------------------------------------------------------------- 1 | name: cloud_evals 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - 'releases/*' 8 | workflow_dispatch: 9 | inputs: 10 | commit_hash: 11 | description: Commit hash of the library to build the Cloud eval image for 12 | required: false 13 | 14 | jobs: 15 | trigger_cloud_eval_image_build: 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/github-script@v7 19 | with: 20 | github-token: ${{ secrets.TRIGGER_CLOUD_BUILD_GH_KEY }} 21 | script: | 22 | const result = await github.rest.repos.createDispatchEvent({ 23 | owner: 'browser-use', 24 | repo: 'cloud', 25 | event_type: 'trigger-workflow', 26 | client_payload: {"commit_hash": "${{ github.event.inputs.commit_hash || github.sha }}"} 27 | }) 28 | console.log(result) 29 | -------------------------------------------------------------------------------- /.github/workflows/docker.yml: -------------------------------------------------------------------------------- 1 | name: docker 2 | 3 | on: 4 | push: 5 | release: 6 | types: [published] 7 | 8 | jobs: 9 | build_publish_image: 10 | runs-on: ubuntu-latest 11 | permissions: 12 | packages: write 13 | contents: read 14 | attestations: write 15 | id-token: write 16 | steps: 17 | - name: Check out the repo 18 | uses: actions/checkout@v4 19 | 20 | - name: Set up QEMU 21 | uses: docker/setup-qemu-action@v3 22 | 23 | - name: Set up Docker Buildx 24 | uses: docker/setup-buildx-action@v3 25 | 26 | - name: Log in to Docker Hub 27 | uses: docker/login-action@v3 28 | with: 29 | username: ${{ secrets.DOCKER_USERNAME }} 30 | password: ${{ secrets.DOCKER_PASSWORD }} 31 | 32 | - name: Login to GitHub Container Registry 33 | uses: docker/login-action@v3 34 | with: 35 | registry: ghcr.io 36 | username: ${{ github.repository_owner }} 37 | password: ${{ secrets.GITHUB_TOKEN }} 38 | 39 | - name: Compute Docker tags based on tag/branch 40 | id: meta 41 | uses: docker/metadata-action@v5 42 | with: 43 | images: | 44 | browseruse/browseruse 45 | ghcr.io/browser-use/browser-use 46 | tags: | 47 | type=ref,event=branch 48 | type=ref,event=pr 49 | type=pep440,pattern={{version}} 50 | type=pep440,pattern={{major}}.{{minor}} 51 | type=sha 52 | 53 | - name: Build and push Docker image 54 | id: push 55 | uses: docker/build-push-action@v6 56 | with: 57 | platforms: linux/amd64,linux/arm64 58 | context: . 59 | file: ./Dockerfile 60 | push: true 61 | tags: ${{ steps.meta.outputs.tags }} 62 | labels: ${{ steps.meta.outputs.labels }} 63 | cache-from: type=registry,ref=browseruse/browseruse:buildcache 64 | cache-to: type=registry,ref=browseruse/browseruse:buildcache,mode=max 65 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: lint 2 | on: 3 | push: 4 | branches: 5 | - main 6 | - stable 7 | - 'releases/**' 8 | tags: 9 | - '*' 10 | pull_request: 11 | workflow_dispatch: 12 | 13 | jobs: 14 | lint-syntax: 15 | name: syntax-errors 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v4 19 | - uses: astral-sh/setup-uv@v5 20 | with: 21 | enable-cache: true 22 | - run: uv run ruff check --no-fix --select PLE 23 | 24 | lint-style: 25 | name: code-style 26 | runs-on: ubuntu-latest 27 | steps: 28 | - uses: actions/checkout@v4 29 | - uses: astral-sh/setup-uv@v5 30 | with: 31 | enable-cache: true 32 | - run: uv sync --dev --all-extras # install extras for examples to avoid pyright missing imports errors 33 | - run: uv run pre-commit run --all-files --show-diff-on-failure 34 | 35 | lint-typecheck: 36 | name: type-checker 37 | runs-on: ubuntu-latest 38 | steps: 39 | - uses: actions/checkout@v4 40 | - uses: astral-sh/setup-uv@v6 41 | with: 42 | enable-cache: true 43 | - run: uv sync --dev --all-extras # install extras for examples to avoid pyright missing imports errors- 44 | - run: uv run pyright 45 | -------------------------------------------------------------------------------- /.github/workflows/package.yaml: -------------------------------------------------------------------------------- 1 | name: package 2 | on: 3 | push: 4 | branches: 5 | - main 6 | - stable 7 | - 'releases/**' 8 | tags: 9 | - '*' 10 | pull_request: 11 | workflow_dispatch: 12 | 13 | jobs: 14 | build: 15 | name: pip-build 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v4 19 | - uses: astral-sh/setup-uv@v5 20 | - run: uv build --python 3.12 21 | - uses: actions/upload-artifact@v4 22 | with: 23 | name: dist-artifact 24 | path: | 25 | dist/*.whl 26 | dist/*.tar.gz 27 | 28 | build_test: 29 | name: pip-install-on-${{ matrix.os }}-py-${{ matrix.python-version }} 30 | needs: build 31 | runs-on: ${{ matrix.os }} 32 | strategy: 33 | matrix: 34 | os: [ubuntu-latest, macos-latest, windows-latest] 35 | python-version: ["3.11", "3.13"] 36 | env: 37 | ANONYMIZED_TELEMETRY: 'false' 38 | 39 | steps: 40 | - uses: actions/checkout@v4 41 | - uses: astral-sh/setup-uv@v5 42 | - uses: actions/download-artifact@v4 43 | with: 44 | name: dist-artifact 45 | 46 | - name: Set up venv and test for OS/Python versions 47 | shell: bash 48 | run: | 49 | uv venv /tmp/testenv --python ${{ matrix.python-version }} 50 | if [[ "$RUNNER_OS" == "Windows" ]]; then 51 | . /tmp/testenv/Scripts/activate 52 | else 53 | source /tmp/testenv/bin/activate 54 | fi 55 | uv pip install *.whl 56 | python -c 'from browser_use import Agent, Browser, Controller, ActionModel, ActionResult' 57 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Cache files 2 | .DS_Store 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | .mypy_cache/ 7 | .ruff_cache/ 8 | .pytest_cache/ 9 | .ipynb_checkpoints 10 | ~/ 11 | 12 | # Virtual Environments 13 | .venv* 14 | venv/ 15 | 16 | # IDEs 17 | .vscode/ 18 | .idea/ 19 | 20 | # Build files 21 | dist/ 22 | 23 | # Data files 24 | *.gif 25 | *.txt 26 | *.pdf 27 | *.csv 28 | *.json 29 | *.jsonl 30 | *.log 31 | *.bak 32 | 33 | # Secrets and sensitive files 34 | secrets.env 35 | .env 36 | browser_cookies.json 37 | cookies.json 38 | gcp-login.json 39 | saved_trajectories/ 40 | old_tests/ 41 | AgentHistory.json 42 | AgentHistoryList.json 43 | private_example.py 44 | private_example 45 | CLAUDE.local.md 46 | 47 | uv.lock 48 | temp 49 | tmp 50 | 51 | # Google API credentials 52 | credentials.json 53 | token.json 54 | 55 | !docs/docs.json 56 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/asottile/yesqa 3 | rev: v1.5.0 4 | hooks: 5 | - id: yesqa 6 | 7 | - repo: https://github.com/codespell-project/codespell 8 | rev: v2.4.1 9 | hooks: 10 | - id: codespell # See pyproject.toml for args 11 | additional_dependencies: 12 | - tomli 13 | 14 | - repo: https://github.com/asottile/pyupgrade 15 | rev: v3.19.1 16 | hooks: 17 | - id: pyupgrade 18 | args: [--py311-plus] 19 | 20 | # - repo: https://github.com/asottile/add-trailing-comma 21 | # rev: v3.1.0 22 | # hooks: 23 | # - id: add-trailing-comma 24 | 25 | - repo: https://github.com/astral-sh/ruff-pre-commit 26 | rev: v0.11.2 27 | hooks: 28 | - id: ruff 29 | - id: ruff-format 30 | # see pyproject.toml for more details on ruff config 31 | 32 | - repo: https://github.com/RobertCraigie/pyright-python 33 | rev: v1.1.403 34 | hooks: 35 | - id: pyright 36 | 37 | - repo: https://github.com/pre-commit/pre-commit-hooks 38 | rev: v5.0.0 39 | hooks: 40 | # check for basic syntax errors in python and data files 41 | - id: check-ast 42 | - id: check-toml 43 | - id: check-yaml 44 | - id: check-json 45 | - id: check-merge-conflict 46 | # check for bad files and folders 47 | - id: check-symlinks 48 | - id: destroyed-symlinks 49 | - id: check-case-conflict 50 | - id: check-illegal-windows-names 51 | - id: check-shebang-scripts-are-executable 52 | - id: mixed-line-ending 53 | - id: fix-byte-order-marker 54 | - id: end-of-file-fixer 55 | # best practices enforcement 56 | - id: detect-private-key 57 | # - id: check-docstring-first 58 | - id: debug-statements 59 | - id: forbid-submodules 60 | - id: check-added-large-files 61 | args: ["--maxkb=600"] 62 | # - id: name-tests-test 63 | # args: ["--pytest-test-first"] 64 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.12 2 | -------------------------------------------------------------------------------- /Dockerfile.fast: -------------------------------------------------------------------------------- 1 | # Fast Dockerfile using pre-built base images 2 | ARG REGISTRY=browseruse 3 | ARG BASE_TAG=latest 4 | FROM ${REGISTRY}/base-python-deps:${BASE_TAG} 5 | 6 | LABEL name="browseruse" description="Browser automation for AI agents" 7 | 8 | ENV BROWSERUSE_USER="browseruse" DEFAULT_PUID=911 DEFAULT_PGID=911 DATA_DIR=/data 9 | 10 | # Create user and directories 11 | RUN groupadd --system $BROWSERUSE_USER && \ 12 | useradd --system --create-home --gid $BROWSERUSE_USER --groups audio,video $BROWSERUSE_USER && \ 13 | usermod -u "$DEFAULT_PUID" "$BROWSERUSE_USER" && \ 14 | groupmod -g "$DEFAULT_PGID" "$BROWSERUSE_USER" && \ 15 | mkdir -p /data /home/$BROWSERUSE_USER/.config && \ 16 | ln -s $DATA_DIR /home/$BROWSERUSE_USER/.config/browseruse && \ 17 | mkdir -p "/home/$BROWSERUSE_USER/.config/chromium/Crash Reports/pending/" && \ 18 | mkdir -p "$DATA_DIR/profiles/default" && \ 19 | chown -R "$BROWSERUSE_USER:$BROWSERUSE_USER" "/home/$BROWSERUSE_USER" "$DATA_DIR" 20 | 21 | WORKDIR /app 22 | COPY . /app 23 | 24 | # Install browser-use 25 | RUN --mount=type=cache,target=/root/.cache/uv,sharing=locked \ 26 | uv sync --all-extras --locked --no-dev --compile-bytecode 27 | 28 | USER "$BROWSERUSE_USER" 29 | VOLUME "$DATA_DIR" 30 | EXPOSE 9242 9222 31 | ENTRYPOINT ["browser-use"] 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Gregor Zunic 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /bin/lint.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # This script is used to run the formatter, linter, and type checker pre-commit hooks. 3 | # Usage: 4 | # $ ./bin/lint.sh 5 | 6 | IFS=$'\n' 7 | 8 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 9 | 10 | cd "$SCRIPT_DIR/.." || exit 1 11 | 12 | echo "[*] Running ruff linter, formatter, pyright type checker, and other pre-commit checks..." 13 | exec uv run pre-commit run --all-files 14 | -------------------------------------------------------------------------------- /bin/setup.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # This script is used to setup a local development environment for the browser-use project. 3 | # Usage: 4 | # $ ./bin/setup.sh 5 | 6 | ### Bash Environment Setup 7 | # http://redsymbol.net/articles/unofficial-bash-strict-mode/ 8 | # https://www.gnu.org/software/bash/manual/html_node/The-Set-Builtin.html 9 | # set -o xtrace 10 | # set -x 11 | # shopt -s nullglob 12 | set -o errexit 13 | set -o errtrace 14 | set -o nounset 15 | set -o pipefail 16 | IFS=$'\n' 17 | 18 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 19 | cd "$SCRIPT_DIR" 20 | 21 | 22 | if [ -f "$SCRIPT_DIR/lint.sh" ]; then 23 | echo "[√] already inside a cloned browser-use repo" 24 | else 25 | echo "[+] Cloning browser-use repo into current directory: $SCRIPT_DIR" 26 | git clone https://github.com/browser-use/browser-use 27 | cd browser-use 28 | fi 29 | 30 | echo "[+] Installing uv..." 31 | curl -LsSf https://astral.sh/uv/install.sh | sh 32 | 33 | #git checkout main git pull 34 | echo 35 | echo "[+] Setting up venv" 36 | uv venv 37 | echo 38 | echo "[+] Installing packages in venv" 39 | uv sync --dev --all-extras 40 | echo 41 | echo "[i] Tip: make sure to set BROWSER_USE_LOGGING_LEVEL=debug and your LLM API keys in your .env file" 42 | echo 43 | uv pip show browser-use 44 | 45 | echo "Usage:" 46 | echo " $ browser-use use the CLI" 47 | echo " or" 48 | echo " $ source .venv/bin/activate" 49 | echo " $ ipython use the library" 50 | echo " >>> from browser_use import BrowserSession, Agent" 51 | echo " >>> await Agent(task='book me a flight to fiji', browser=BrowserSession(headless=False)).run()" 52 | echo "" 53 | -------------------------------------------------------------------------------- /bin/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # This script is used to run all the main project tests that run on CI via .github/workflows/test.yaml. 3 | # Usage: 4 | # $ ./bin/test.sh 5 | 6 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 7 | cd "$SCRIPT_DIR/.." || exit 1 8 | 9 | exec uv run pytest --numprocesses auto tests/ci $1 $2 $3 10 | -------------------------------------------------------------------------------- /browser_use/README.md: -------------------------------------------------------------------------------- 1 | # Codebase Structure 2 | 3 | > The code structure inspired by https://github.com/Netflix/dispatch. 4 | 5 | Very good structure on how to make a scalable codebase is also in [this repo](https://github.com/zhanymkanov/fastapi-best-practices). 6 | 7 | Just a brief document about how we should structure our backend codebase. 8 | 9 | ## Code Structure 10 | 11 | ```markdown 12 | src/ 13 | // 14 | models.py 15 | services.py 16 | prompts.py 17 | views.py 18 | utils.py 19 | routers.py 20 | 21 | /_/ 22 | ``` 23 | 24 | ### Service.py 25 | 26 | Always a single file, except if it becomes too long - more than ~500 lines, split it into \_subservices 27 | 28 | ### Views.py 29 | 30 | Always split the views into two parts 31 | 32 | ```python 33 | # All 34 | ... 35 | 36 | # Requests 37 | ... 38 | 39 | # Responses 40 | ... 41 | ``` 42 | 43 | If too long → split into multiple files 44 | 45 | ### Prompts.py 46 | 47 | Single file; if too long → split into multiple files (one prompt per file or so) 48 | 49 | ### Routers.py 50 | 51 | Never split into more than one file 52 | -------------------------------------------------------------------------------- /browser_use/agent/message_manager/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import json 4 | import logging 5 | from pathlib import Path 6 | from typing import Any 7 | 8 | import anyio 9 | 10 | from browser_use.llm.messages import BaseMessage 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | async def save_conversation( 16 | input_messages: list[BaseMessage], 17 | response: Any, 18 | target: str | Path, 19 | encoding: str | None = None, 20 | ) -> None: 21 | """Save conversation history to file asynchronously.""" 22 | target_path = Path(target) 23 | # create folders if not exists 24 | if target_path.parent: 25 | await anyio.Path(target_path.parent).mkdir(parents=True, exist_ok=True) 26 | 27 | await anyio.Path(target_path).write_text( 28 | await _format_conversation(input_messages, response), 29 | encoding=encoding or 'utf-8', 30 | ) 31 | 32 | 33 | async def _format_conversation(messages: list[BaseMessage], response: Any) -> str: 34 | """Format the conversation including messages and response.""" 35 | lines = [] 36 | 37 | # Format messages 38 | for message in messages: 39 | lines.append(f' {message.role} ') 40 | 41 | lines.append(message.text) 42 | lines.append('') # Empty line after each message 43 | 44 | # Format response 45 | lines.append(' RESPONSE') 46 | lines.append(json.dumps(json.loads(response.model_dump_json(exclude_unset=True)), indent=2)) 47 | 48 | return '\n'.join(lines) 49 | 50 | 51 | # Note: _write_messages_to_file and _write_response_to_file have been merged into _format_conversation 52 | # This is more efficient for async operations and reduces file I/O 53 | -------------------------------------------------------------------------------- /browser_use/browser/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import TYPE_CHECKING 2 | 3 | # Type stubs for lazy imports 4 | if TYPE_CHECKING: 5 | from .browser import Browser, BrowserConfig 6 | from .context import BrowserContext, BrowserContextConfig 7 | from .profile import BrowserProfile 8 | from .session import BrowserSession 9 | 10 | # Lazy imports mapping for heavy browser components 11 | _LAZY_IMPORTS = { 12 | 'Browser': ('.browser', 'Browser'), 13 | 'BrowserConfig': ('.browser', 'BrowserConfig'), 14 | 'BrowserContext': ('.context', 'BrowserContext'), 15 | 'BrowserContextConfig': ('.context', 'BrowserContextConfig'), 16 | 'BrowserProfile': ('.profile', 'BrowserProfile'), 17 | 'BrowserSession': ('.session', 'BrowserSession'), 18 | } 19 | 20 | 21 | def __getattr__(name: str): 22 | """Lazy import mechanism for heavy browser components.""" 23 | if name in _LAZY_IMPORTS: 24 | module_path, attr_name = _LAZY_IMPORTS[name] 25 | try: 26 | from importlib import import_module 27 | 28 | # Use relative import for current package 29 | full_module_path = f'browser_use.browser{module_path}' 30 | module = import_module(full_module_path) 31 | attr = getattr(module, attr_name) 32 | # Cache the imported attribute in the module's globals 33 | globals()[name] = attr 34 | return attr 35 | except ImportError as e: 36 | raise ImportError(f'Failed to import {name} from {full_module_path}: {e}') from e 37 | 38 | raise AttributeError(f"module '{__name__}' has no attribute '{name}'") 39 | 40 | 41 | __all__ = ['Browser', 'BrowserConfig', 'BrowserContext', 'BrowserContextConfig', 'BrowserSession', 'BrowserProfile'] 42 | -------------------------------------------------------------------------------- /browser_use/browser/browser.py: -------------------------------------------------------------------------------- 1 | from browser_use.browser.profile import BrowserProfile 2 | from browser_use.browser.session import BrowserSession 3 | 4 | BrowserConfig = BrowserProfile 5 | BrowserContextConfig = BrowserProfile 6 | Browser = BrowserSession 7 | 8 | __all__ = ['BrowserConfig', 'BrowserContextConfig', 'Browser'] 9 | -------------------------------------------------------------------------------- /browser_use/browser/context.py: -------------------------------------------------------------------------------- 1 | from browser_use.browser.profile import BrowserProfile 2 | from browser_use.browser.session import BrowserSession 3 | 4 | Browser = BrowserSession 5 | BrowserConfig = BrowserProfile 6 | BrowserContext = BrowserSession 7 | BrowserContextConfig = BrowserProfile 8 | 9 | __all__ = ['Browser', 'BrowserConfig', 'BrowserContext', 'BrowserContextConfig'] 10 | -------------------------------------------------------------------------------- /browser_use/browser/utils.py: -------------------------------------------------------------------------------- 1 | def normalize_url(url: str) -> str: 2 | """ 3 | Normalize a URL by adding https:// protocol if needed, while preserving special URLs. 4 | 5 | This function safely adds https:// to URLs that lack a protocol, but preserves 6 | special URLs like "about:blank", "chrome://new-tab-page", "mailto:...", "tel:...", etc. 7 | that should not be prefixed with https://. 8 | 9 | Args: 10 | url: The URL string to normalize 11 | 12 | Returns: 13 | str: The normalized URL with protocol if needed 14 | 15 | Examples: 16 | >>> normalize_url('example.com') 17 | 'https://example.com' 18 | >>> normalize_url('about:blank') 19 | 'about:blank' 20 | >>> normalize_url('mailto:test@example.com') 21 | 'mailto:test@example.com' 22 | >>> normalize_url('https://example.com') 23 | 'https://example.com' 24 | """ 25 | normalized_url = url.strip() 26 | 27 | # If URL already has a protocol, return as-is 28 | if '://' in normalized_url: 29 | return normalized_url 30 | 31 | # Check for special protocols that should not be prefixed with https:// 32 | special_protocols = ['about:', 'mailto:', 'tel:', 'ftp:', 'file:', 'data:', 'javascript:'] 33 | for protocol in special_protocols: 34 | if normalized_url.startswith(protocol): 35 | return normalized_url 36 | 37 | # For everything else, add https:// 38 | return f'https://{normalized_url}' 39 | -------------------------------------------------------------------------------- /browser_use/controller/views.py: -------------------------------------------------------------------------------- 1 | from typing import Generic, TypeVar 2 | 3 | from pydantic import BaseModel, ConfigDict 4 | 5 | 6 | # Action Input Models 7 | class SearchGoogleAction(BaseModel): 8 | query: str 9 | 10 | 11 | class GoToUrlAction(BaseModel): 12 | url: str 13 | new_tab: bool = False # True to open in new tab, False to navigate in current tab 14 | 15 | 16 | class ClickElementAction(BaseModel): 17 | index: int 18 | 19 | 20 | class InputTextAction(BaseModel): 21 | index: int 22 | text: str 23 | 24 | 25 | class DoneAction(BaseModel): 26 | text: str 27 | success: bool 28 | files_to_display: list[str] | None = [] 29 | 30 | 31 | T = TypeVar('T', bound=BaseModel) 32 | 33 | 34 | class StructuredOutputAction(BaseModel, Generic[T]): 35 | success: bool = True 36 | data: T 37 | 38 | 39 | class SwitchTabAction(BaseModel): 40 | page_id: int 41 | 42 | 43 | class CloseTabAction(BaseModel): 44 | page_id: int 45 | 46 | 47 | class ScrollAction(BaseModel): 48 | down: bool # True to scroll down, False to scroll up 49 | num_pages: float # Number of pages to scroll (0.5 = half page, 1.0 = one page, etc.) 50 | index: int | None = None # Optional element index to find scroll container for 51 | 52 | 53 | class SendKeysAction(BaseModel): 54 | keys: str 55 | 56 | 57 | class UploadFileAction(BaseModel): 58 | index: int 59 | path: str 60 | 61 | 62 | class ExtractPageContentAction(BaseModel): 63 | value: str 64 | 65 | 66 | class NoParamsAction(BaseModel): 67 | """ 68 | Accepts absolutely anything in the incoming data 69 | and discards it, so the final parsed model is empty. 70 | """ 71 | 72 | model_config = ConfigDict(extra='ignore') 73 | # No fields defined - all inputs are ignored automatically 74 | -------------------------------------------------------------------------------- /browser_use/dom/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/browser-use/browser-use/9871be3363081dd88402dd7244f1997132929c4f/browser_use/dom/__init__.py -------------------------------------------------------------------------------- /browser_use/dom/history_tree_processor/view.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | @dataclass 7 | class HashedDomElement: 8 | """ 9 | Hash of the dom element to be used as a unique identifier 10 | """ 11 | 12 | branch_path_hash: str 13 | attributes_hash: str 14 | xpath_hash: str 15 | # text_hash: str 16 | 17 | 18 | class Coordinates(BaseModel): 19 | x: int 20 | y: int 21 | 22 | 23 | class CoordinateSet(BaseModel): 24 | top_left: Coordinates 25 | top_right: Coordinates 26 | bottom_left: Coordinates 27 | bottom_right: Coordinates 28 | center: Coordinates 29 | width: int 30 | height: int 31 | 32 | 33 | class ViewportInfo(BaseModel): 34 | scroll_x: int | None = None 35 | scroll_y: int | None = None 36 | width: int 37 | height: int 38 | 39 | 40 | @dataclass 41 | class DOMHistoryElement: 42 | tag_name: str 43 | xpath: str 44 | highlight_index: int | None 45 | entire_parent_branch_path: list[str] 46 | attributes: dict[str, str] 47 | shadow_root: bool = False 48 | css_selector: str | None = None 49 | page_coordinates: CoordinateSet | None = None 50 | viewport_coordinates: CoordinateSet | None = None 51 | viewport_info: ViewportInfo | None = None 52 | 53 | def to_dict(self) -> dict: 54 | page_coordinates = self.page_coordinates.model_dump() if self.page_coordinates else None 55 | viewport_coordinates = self.viewport_coordinates.model_dump() if self.viewport_coordinates else None 56 | viewport_info = self.viewport_info.model_dump() if self.viewport_info else None 57 | 58 | return { 59 | 'tag_name': self.tag_name, 60 | 'xpath': self.xpath, 61 | 'highlight_index': self.highlight_index, 62 | 'entire_parent_branch_path': self.entire_parent_branch_path, 63 | 'attributes': self.attributes, 64 | 'shadow_root': self.shadow_root, 65 | 'css_selector': self.css_selector, 66 | 'page_coordinates': page_coordinates, 67 | 'viewport_coordinates': viewport_coordinates, 68 | 'viewport_info': viewport_info, 69 | } 70 | -------------------------------------------------------------------------------- /browser_use/dom/playground/process_dom.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | import os 4 | import time 5 | 6 | import anyio 7 | 8 | from browser_use.browser import BrowserProfile, BrowserSession 9 | 10 | 11 | async def test_process_dom(): 12 | browser_session = BrowserSession(browser_profile=BrowserProfile(headless=True)) 13 | await browser_session.start() 14 | try: 15 | page = await browser_session.get_current_page() 16 | await page.goto('https://kayak.com/flights') 17 | # await page.goto('https://google.com/flights') 18 | # await page.goto('https://immobilienscout24.de') 19 | # await page.goto('https://seleniumbase.io/w3schools/iframes') 20 | 21 | await asyncio.sleep(3) 22 | 23 | async with await anyio.open_file('browser_use/dom/buildDomTree.js', 'r') as f: 24 | js_code = await f.read() 25 | 26 | start = time.time() 27 | dom_tree = await page.evaluate(js_code) 28 | end = time.time() 29 | 30 | # print(dom_tree) 31 | print(f'Time: {end - start:.2f}s') 32 | 33 | os.makedirs('./tmp', exist_ok=True) 34 | async with await anyio.open_file('./tmp/dom.json', 'w') as f: 35 | await f.write(json.dumps(dom_tree, indent=1)) 36 | 37 | # both of these work for immobilienscout24.de 38 | # await page.click('.sc-dcJsrY.ezjNCe') 39 | # await page.click( 40 | # 'div > div:nth-of-type(2) > div > div:nth-of-type(2) > div > div:nth-of-type(2) > div > div > div > button:nth-of-type(2)' 41 | # ) 42 | 43 | input('Press Enter to continue...') 44 | finally: 45 | await browser_session.stop() 46 | -------------------------------------------------------------------------------- /browser_use/dom/utils.py: -------------------------------------------------------------------------------- 1 | def cap_text_length(text: str, max_length: int) -> str: 2 | if len(text) > max_length: 3 | return text[:max_length] + '...' 4 | return text 5 | -------------------------------------------------------------------------------- /browser_use/exceptions.py: -------------------------------------------------------------------------------- 1 | class LLMException(Exception): 2 | def __init__(self, status_code, message): 3 | self.status_code = status_code 4 | self.message = message 5 | super().__init__(f'Error {status_code}: {message}') 6 | -------------------------------------------------------------------------------- /browser_use/filesystem/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/browser-use/browser-use/9871be3363081dd88402dd7244f1997132929c4f/browser_use/filesystem/__init__.py -------------------------------------------------------------------------------- /browser_use/integrations/gmail/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Gmail Integration for Browser Use 3 | Provides Gmail API integration for email reading and verification code extraction. 4 | This integration enables agents to read email content and extract verification codes themselves. 5 | Usage: 6 | from browser_use.integrations.gmail import GmailService, register_gmail_actions 7 | # Option 1: Register Gmail actions with file-based authentication 8 | controller = Controller() 9 | register_gmail_actions(controller) 10 | # Option 2: Register Gmail actions with direct access token (recommended for production) 11 | controller = Controller() 12 | register_gmail_actions(controller, access_token="your_access_token_here") 13 | # Option 3: Use the service directly 14 | gmail = GmailService(access_token="your_access_token_here") 15 | await gmail.authenticate() 16 | emails = await gmail.get_recent_emails() 17 | """ 18 | 19 | # @file purpose: Gmail integration for 2FA email authentication and email reading 20 | 21 | from .actions import register_gmail_actions 22 | from .service import GmailService 23 | 24 | __all__ = ['GmailService', 'register_gmail_actions'] 25 | -------------------------------------------------------------------------------- /browser_use/llm/README.md: -------------------------------------------------------------------------------- 1 | # Browser Use LLMs 2 | 3 | We officially support the following LLMs: 4 | 5 | - OpenAI 6 | - Anthropic 7 | - Google 8 | - Groq 9 | - Ollama 10 | - DeepSeek 11 | 12 | ## Migrating from LangChain 13 | 14 | Because of how we implemented the LLMs, we can technically support anything. If you want to use a LangChain model, you can use the `ChatLangchain` (NOT OFFICIALLY SUPPORTED) class. 15 | 16 | You can find all the details in the [LangChain example](examples/models/langchain/example.py). We suggest you grab that code and use it as a reference. 17 | -------------------------------------------------------------------------------- /browser_use/llm/aws/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import TYPE_CHECKING 2 | 3 | # Type stubs for lazy imports 4 | if TYPE_CHECKING: 5 | from browser_use.llm.aws.chat_anthropic import ChatAnthropicBedrock 6 | from browser_use.llm.aws.chat_bedrock import ChatAWSBedrock 7 | 8 | # Lazy imports mapping for AWS chat models 9 | _LAZY_IMPORTS = { 10 | 'ChatAnthropicBedrock': ('browser_use.llm.aws.chat_anthropic', 'ChatAnthropicBedrock'), 11 | 'ChatAWSBedrock': ('browser_use.llm.aws.chat_bedrock', 'ChatAWSBedrock'), 12 | } 13 | 14 | 15 | def __getattr__(name: str): 16 | """Lazy import mechanism for AWS chat models.""" 17 | if name in _LAZY_IMPORTS: 18 | module_path, attr_name = _LAZY_IMPORTS[name] 19 | try: 20 | from importlib import import_module 21 | 22 | module = import_module(module_path) 23 | attr = getattr(module, attr_name) 24 | # Cache the imported attribute in the module's globals 25 | globals()[name] = attr 26 | return attr 27 | except ImportError as e: 28 | raise ImportError(f'Failed to import {name} from {module_path}: {e}') from e 29 | 30 | raise AttributeError(f"module '{__name__}' has no attribute '{name}'") 31 | 32 | 33 | __all__ = [ 34 | 'ChatAWSBedrock', 35 | 'ChatAnthropicBedrock', 36 | ] 37 | -------------------------------------------------------------------------------- /browser_use/llm/base.py: -------------------------------------------------------------------------------- 1 | """ 2 | We have switched all of our code from langchain to openai.types.chat.chat_completion_message_param. 3 | 4 | For easier transition we have 5 | """ 6 | 7 | from typing import Any, Protocol, TypeVar, overload, runtime_checkable 8 | 9 | from pydantic import BaseModel 10 | 11 | from browser_use.llm.messages import BaseMessage 12 | from browser_use.llm.views import ChatInvokeCompletion 13 | 14 | T = TypeVar('T', bound=BaseModel) 15 | 16 | 17 | @runtime_checkable 18 | class BaseChatModel(Protocol): 19 | _verified_api_keys: bool = False 20 | 21 | model: str 22 | 23 | @property 24 | def provider(self) -> str: ... 25 | 26 | @property 27 | def name(self) -> str: ... 28 | 29 | @property 30 | def model_name(self) -> str: 31 | # for legacy support 32 | return self.model 33 | 34 | @overload 35 | async def ainvoke(self, messages: list[BaseMessage], output_format: None = None) -> ChatInvokeCompletion[str]: ... 36 | 37 | @overload 38 | async def ainvoke(self, messages: list[BaseMessage], output_format: type[T]) -> ChatInvokeCompletion[T]: ... 39 | 40 | async def ainvoke( 41 | self, messages: list[BaseMessage], output_format: type[T] | None = None 42 | ) -> ChatInvokeCompletion[T] | ChatInvokeCompletion[str]: ... 43 | 44 | @classmethod 45 | def __get_pydantic_core_schema__( 46 | cls, 47 | source_type: type, 48 | handler: Any, 49 | ) -> Any: 50 | """ 51 | Allow this Protocol to be used in Pydantic models -> very useful to typesafe the agent settings for example. 52 | Returns a schema that allows any object (since this is a Protocol). 53 | """ 54 | from pydantic_core import core_schema 55 | 56 | # Return a schema that accepts any object for Protocol types 57 | return core_schema.any_schema() 58 | -------------------------------------------------------------------------------- /browser_use/llm/exceptions.py: -------------------------------------------------------------------------------- 1 | class ModelError(Exception): 2 | pass 3 | 4 | 5 | class ModelProviderError(ModelError): 6 | """Exception raised when a model provider returns an error.""" 7 | 8 | def __init__( 9 | self, 10 | message: str, 11 | status_code: int = 502, 12 | model: str | None = None, 13 | ): 14 | super().__init__(message, status_code) 15 | self.model = model 16 | 17 | 18 | class ModelRateLimitError(ModelProviderError): 19 | """Exception raised when a model provider returns a rate limit error.""" 20 | 21 | def __init__( 22 | self, 23 | message: str, 24 | status_code: int = 429, 25 | model: str | None = None, 26 | ): 27 | super().__init__(message, status_code, model) 28 | -------------------------------------------------------------------------------- /browser_use/llm/google/__init__.py: -------------------------------------------------------------------------------- 1 | from browser_use.llm.google.chat import ChatGoogle 2 | 3 | __all__ = ['ChatGoogle'] 4 | -------------------------------------------------------------------------------- /browser_use/llm/openai/like.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from browser_use.llm.openai.chat import ChatOpenAI 4 | 5 | 6 | @dataclass 7 | class ChatOpenAILike(ChatOpenAI): 8 | """ 9 | A class for to interact with any provider using the OpenAI API schema. 10 | 11 | Args: 12 | model (str): The name of the OpenAI model to use. 13 | """ 14 | 15 | model: str 16 | -------------------------------------------------------------------------------- /browser_use/llm/openrouter/serializer.py: -------------------------------------------------------------------------------- 1 | from openai.types.chat import ChatCompletionMessageParam 2 | 3 | from browser_use.llm.messages import BaseMessage 4 | from browser_use.llm.openai.serializer import OpenAIMessageSerializer 5 | 6 | 7 | class OpenRouterMessageSerializer: 8 | """ 9 | Serializer for converting between custom message types and OpenRouter message formats. 10 | 11 | OpenRouter uses the OpenAI-compatible API, so we can reuse the OpenAI serializer. 12 | """ 13 | 14 | @staticmethod 15 | def serialize_messages(messages: list[BaseMessage]) -> list[ChatCompletionMessageParam]: 16 | """ 17 | Serialize a list of browser_use messages to OpenRouter-compatible messages. 18 | 19 | Args: 20 | messages: List of browser_use messages 21 | 22 | Returns: 23 | List of OpenRouter-compatible messages (identical to OpenAI format) 24 | """ 25 | # OpenRouter uses the same message format as OpenAI 26 | return OpenAIMessageSerializer.serialize_messages(messages) 27 | -------------------------------------------------------------------------------- /browser_use/llm/tests/test_groq_loop.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from browser_use.llm import ContentText 4 | from browser_use.llm.groq.chat import ChatGroq 5 | from browser_use.llm.messages import SystemMessage, UserMessage 6 | 7 | llm = ChatGroq( 8 | model='meta-llama/llama-4-maverick-17b-128e-instruct', 9 | temperature=0.5, 10 | ) 11 | # llm = ChatOpenAI(model='gpt-4.1-mini') 12 | 13 | 14 | async def main(): 15 | from pydantic import BaseModel 16 | 17 | from browser_use.tokens.service import TokenCost 18 | 19 | tk = TokenCost().register_llm(llm) 20 | 21 | class Output(BaseModel): 22 | reasoning: str 23 | answer: str 24 | 25 | message = [ 26 | SystemMessage(content='You are a helpful assistant that can answer questions and help with tasks.'), 27 | UserMessage( 28 | content=[ 29 | ContentText( 30 | text=r"Why is the sky blue? write exactly this into reasoning make sure to output ' with exactly like in the input : " 31 | ), 32 | ContentText( 33 | text=""" 34 | The user's request is to find the lowest priced women's plus size one piece swimsuit in color black with a customer rating of at least 5 on Kohls.com. I am currently on the homepage of Kohls. The page has a search bar and various category links. To begin, I need to navigate to the women's section and search for swimsuits. I will start by clicking on the 'Women' category link.""" 35 | ), 36 | ] 37 | ), 38 | ] 39 | 40 | for i in range(10): 41 | print('-' * 50) 42 | print(f'start loop {i}') 43 | response = await llm.ainvoke(message, output_format=Output) 44 | completion = response.completion 45 | print(f'start reasoning: {completion.reasoning}') 46 | print(f'answer: {completion.answer}') 47 | print('-' * 50) 48 | 49 | 50 | if __name__ == '__main__': 51 | asyncio.run(main()) 52 | -------------------------------------------------------------------------------- /browser_use/llm/views.py: -------------------------------------------------------------------------------- 1 | from typing import Generic, TypeVar, Union 2 | 3 | from pydantic import BaseModel 4 | 5 | T = TypeVar('T', bound=Union[BaseModel, str]) 6 | 7 | 8 | class ChatInvokeUsage(BaseModel): 9 | """ 10 | Usage information for a chat model invocation. 11 | """ 12 | 13 | prompt_tokens: int 14 | """The number of tokens in the prompt (this includes the cached tokens as well. When calculating the cost, subtract the cached tokens from the prompt tokens)""" 15 | 16 | prompt_cached_tokens: int | None 17 | """The number of cached tokens.""" 18 | 19 | prompt_cache_creation_tokens: int | None 20 | """Anthropic only: The number of tokens used to create the cache.""" 21 | 22 | prompt_image_tokens: int | None 23 | """Google only: The number of tokens in the image (prompt tokens is the text tokens + image tokens in that case)""" 24 | 25 | completion_tokens: int 26 | """The number of tokens in the completion.""" 27 | 28 | total_tokens: int 29 | """The total number of tokens in the response.""" 30 | 31 | 32 | class ChatInvokeCompletion(BaseModel, Generic[T]): 33 | """ 34 | Response from a chat model invocation. 35 | """ 36 | 37 | completion: T 38 | """The completion of the response.""" 39 | 40 | # Thinking stuff 41 | thinking: str | None = None 42 | redacted_thinking: str | None = None 43 | 44 | usage: ChatInvokeUsage | None 45 | """The usage of the response.""" 46 | -------------------------------------------------------------------------------- /browser_use/mcp/__init__.py: -------------------------------------------------------------------------------- 1 | """MCP (Model Context Protocol) support for browser-use. 2 | 3 | This module provides integration with MCP servers and clients for browser automation. 4 | """ 5 | 6 | from browser_use.mcp.client import MCPClient 7 | from browser_use.mcp.controller import MCPToolWrapper 8 | 9 | __all__ = ['MCPClient', 'MCPToolWrapper', 'BrowserUseServer'] # type: ignore 10 | 11 | 12 | def __getattr__(name): 13 | """Lazy import to avoid importing server module when only client is needed.""" 14 | if name == 'BrowserUseServer': 15 | from browser_use.mcp.server import BrowserUseServer 16 | 17 | return BrowserUseServer 18 | raise AttributeError(f"module '{__name__}' has no attribute '{name}'") 19 | -------------------------------------------------------------------------------- /browser_use/mcp/__main__.py: -------------------------------------------------------------------------------- 1 | """Entry point for running MCP server as a module. 2 | 3 | Usage: 4 | python -m browser_use.mcp.server 5 | """ 6 | 7 | import asyncio 8 | 9 | from browser_use.mcp.server import main 10 | 11 | if __name__ == '__main__': 12 | asyncio.run(main()) 13 | -------------------------------------------------------------------------------- /browser_use/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/browser-use/browser-use/9871be3363081dd88402dd7244f1997132929c4f/browser_use/py.typed -------------------------------------------------------------------------------- /browser_use/screenshots/__init__.py: -------------------------------------------------------------------------------- 1 | # Screenshots package for browser-use 2 | -------------------------------------------------------------------------------- /browser_use/screenshots/service.py: -------------------------------------------------------------------------------- 1 | """ 2 | Screenshot storage service for browser-use agents. 3 | """ 4 | 5 | import base64 6 | from pathlib import Path 7 | 8 | import anyio 9 | 10 | 11 | class ScreenshotService: 12 | """Simple screenshot storage service that saves screenshots to disk""" 13 | 14 | def __init__(self, agent_directory: str | Path): 15 | """Initialize with agent directory path""" 16 | self.agent_directory = Path(agent_directory) if isinstance(agent_directory, str) else agent_directory 17 | 18 | # Create screenshots subdirectory 19 | self.screenshots_dir = self.agent_directory / 'screenshots' 20 | self.screenshots_dir.mkdir(parents=True, exist_ok=True) 21 | 22 | async def store_screenshot(self, screenshot_b64: str, step_number: int) -> str: 23 | """Store screenshot to disk and return the full path as string""" 24 | screenshot_filename = f'step_{step_number}.png' 25 | screenshot_path = self.screenshots_dir / screenshot_filename 26 | 27 | # Decode base64 and save to disk 28 | screenshot_data = base64.b64decode(screenshot_b64) 29 | 30 | async with await anyio.open_file(screenshot_path, 'wb') as f: 31 | await f.write(screenshot_data) 32 | 33 | return str(screenshot_path) 34 | 35 | async def get_screenshot(self, screenshot_path: str) -> str | None: 36 | """Load screenshot from disk path and return as base64""" 37 | if not screenshot_path: 38 | return None 39 | 40 | path = Path(screenshot_path) 41 | if not path.exists(): 42 | return None 43 | 44 | # Load from disk and encode to base64 45 | async with await anyio.open_file(path, 'rb') as f: 46 | screenshot_data = await f.read() 47 | 48 | return base64.b64encode(screenshot_data).decode('utf-8') 49 | -------------------------------------------------------------------------------- /browser_use/sync/__init__.py: -------------------------------------------------------------------------------- 1 | """Cloud sync module for Browser Use.""" 2 | 3 | from browser_use.sync.auth import CloudAuthConfig, DeviceAuthClient 4 | from browser_use.sync.service import CloudSync 5 | 6 | __all__ = ['CloudAuthConfig', 'DeviceAuthClient', 'CloudSync'] 7 | -------------------------------------------------------------------------------- /browser_use/telemetry/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Telemetry for Browser Use. 3 | """ 4 | 5 | from typing import TYPE_CHECKING 6 | 7 | # Type stubs for lazy imports 8 | if TYPE_CHECKING: 9 | from browser_use.telemetry.service import ProductTelemetry 10 | from browser_use.telemetry.views import ( 11 | BaseTelemetryEvent, 12 | CLITelemetryEvent, 13 | MCPClientTelemetryEvent, 14 | MCPServerTelemetryEvent, 15 | ) 16 | 17 | # Lazy imports mapping 18 | _LAZY_IMPORTS = { 19 | 'ProductTelemetry': ('browser_use.telemetry.service', 'ProductTelemetry'), 20 | 'BaseTelemetryEvent': ('browser_use.telemetry.views', 'BaseTelemetryEvent'), 21 | 'CLITelemetryEvent': ('browser_use.telemetry.views', 'CLITelemetryEvent'), 22 | 'MCPClientTelemetryEvent': ('browser_use.telemetry.views', 'MCPClientTelemetryEvent'), 23 | 'MCPServerTelemetryEvent': ('browser_use.telemetry.views', 'MCPServerTelemetryEvent'), 24 | } 25 | 26 | 27 | def __getattr__(name: str): 28 | """Lazy import mechanism for telemetry components.""" 29 | if name in _LAZY_IMPORTS: 30 | module_path, attr_name = _LAZY_IMPORTS[name] 31 | try: 32 | from importlib import import_module 33 | 34 | module = import_module(module_path) 35 | attr = getattr(module, attr_name) 36 | # Cache the imported attribute in the module's globals 37 | globals()[name] = attr 38 | return attr 39 | except ImportError as e: 40 | raise ImportError(f'Failed to import {name} from {module_path}: {e}') from e 41 | 42 | raise AttributeError(f"module '{__name__}' has no attribute '{name}'") 43 | 44 | 45 | __all__ = [ 46 | 'BaseTelemetryEvent', 47 | 'ProductTelemetry', 48 | 'CLITelemetryEvent', 49 | 'MCPClientTelemetryEvent', 50 | 'MCPServerTelemetryEvent', 51 | ] 52 | -------------------------------------------------------------------------------- /browser_use/telemetry/views.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from collections.abc import Sequence 3 | from dataclasses import asdict, dataclass 4 | from typing import Any 5 | 6 | 7 | @dataclass 8 | class BaseTelemetryEvent(ABC): 9 | @property 10 | @abstractmethod 11 | def name(self) -> str: 12 | pass 13 | 14 | @property 15 | def properties(self) -> dict[str, Any]: 16 | return {k: v for k, v in asdict(self).items() if k != 'name'} 17 | 18 | 19 | @dataclass 20 | class AgentTelemetryEvent(BaseTelemetryEvent): 21 | # start details 22 | task: str 23 | model: str 24 | model_provider: str 25 | planner_llm: str | None 26 | max_steps: int 27 | max_actions_per_step: int 28 | use_vision: bool 29 | use_validation: bool 30 | version: str 31 | source: str 32 | cdp_url: str | None 33 | # step details 34 | action_errors: Sequence[str | None] 35 | action_history: Sequence[list[dict] | None] 36 | urls_visited: Sequence[str | None] 37 | # end details 38 | steps: int 39 | total_input_tokens: int 40 | total_duration_seconds: float 41 | success: bool | None 42 | final_result_response: str | None 43 | error_message: str | None 44 | 45 | name: str = 'agent_event' 46 | 47 | 48 | @dataclass 49 | class MCPClientTelemetryEvent(BaseTelemetryEvent): 50 | """Telemetry event for MCP client usage""" 51 | 52 | server_name: str 53 | command: str 54 | tools_discovered: int 55 | version: str 56 | action: str # 'connect', 'disconnect', 'tool_call' 57 | tool_name: str | None = None 58 | duration_seconds: float | None = None 59 | error_message: str | None = None 60 | 61 | name: str = 'mcp_client_event' 62 | 63 | 64 | @dataclass 65 | class MCPServerTelemetryEvent(BaseTelemetryEvent): 66 | """Telemetry event for MCP server usage""" 67 | 68 | version: str 69 | action: str # 'start', 'stop', 'tool_call' 70 | tool_name: str | None = None 71 | duration_seconds: float | None = None 72 | error_message: str | None = None 73 | parent_process_cmdline: str | None = None 74 | 75 | name: str = 'mcp_server_event' 76 | 77 | 78 | @dataclass 79 | class CLITelemetryEvent(BaseTelemetryEvent): 80 | """Telemetry event for CLI usage""" 81 | 82 | version: str 83 | action: str # 'start', 'message_sent', 'task_completed', 'error' 84 | mode: str # 'interactive', 'oneshot', 'mcp_server' 85 | model: str | None = None 86 | model_provider: str | None = None 87 | duration_seconds: float | None = None 88 | error_message: str | None = None 89 | 90 | name: str = 'cli_event' 91 | -------------------------------------------------------------------------------- /browser_use/tokens/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/browser-use/browser-use/9871be3363081dd88402dd7244f1997132929c4f/browser_use/tokens/__init__.py -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- 1 | # Docker Setup for Browser-Use 2 | 3 | This directory contains the optimized Docker build system for browser-use, achieving < 30 second builds. 4 | 5 | ## Quick Start 6 | 7 | ```bash 8 | # Build base images (only needed once or when dependencies change) 9 | ./docker/build-base-images.sh 10 | 11 | # Build browser-use 12 | docker build -f Dockerfile.fast -t browseruse . 13 | 14 | # Or use the standard Dockerfile (slower but self-contained) 15 | docker build -t browseruse . 16 | ``` 17 | 18 | ## Files 19 | 20 | - `Dockerfile` - Standard self-contained build (~2 min) 21 | - `Dockerfile.fast` - Fast build using pre-built base images (~30 sec) 22 | - `docker/` - Base image definitions and build script 23 | - `base-images/system/` - Python + minimal system deps 24 | - `base-images/chromium/` - Adds Chromium browser 25 | - `base-images/python-deps/` - Adds Python dependencies 26 | - `build-base-images.sh` - Script to build all base images 27 | 28 | ## Performance 29 | 30 | | Build Type | Time | 31 | |------------|------| 32 | | Standard Dockerfile | ~2 minutes | 33 | | Fast build (with base images) | ~30 seconds | 34 | | Rebuild after code change | ~16 seconds | 35 | -------------------------------------------------------------------------------- /docker/base-images/chromium/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG BASE_TAG=latest 2 | FROM browseruse/base-system:${BASE_TAG} 3 | 4 | WORKDIR /tmp 5 | COPY pyproject.toml ./ 6 | 7 | # Install both playwright and patchright with versions from pyproject.toml 8 | RUN --mount=type=cache,target=/root/.cache,sharing=locked \ 9 | PLAYWRIGHT_VERSION=$(grep -E "playwright>=" pyproject.toml | grep -o "[0-9]\+\.[0-9]\+\.[0-9]\+" | head -1) && \ 10 | PATCHRIGHT_VERSION=$(grep -E "patchright>=" pyproject.toml | grep -o "[0-9]\+\.[0-9]\+\.[0-9]\+" | head -1) && \ 11 | echo "Installing playwright==$PLAYWRIGHT_VERSION patchright==$PATCHRIGHT_VERSION" && \ 12 | pip install --no-cache-dir playwright==$PLAYWRIGHT_VERSION patchright==$PATCHRIGHT_VERSION && \ 13 | PLAYWRIGHT_BROWSERS_PATH=/opt/playwright playwright install --with-deps --no-shell chromium && \ 14 | ln -s /opt/playwright/chromium-*/chrome-linux/chrome /usr/bin/chromium-browser && \ 15 | chmod -R 755 /opt/playwright && \ 16 | rm -f pyproject.toml 17 | -------------------------------------------------------------------------------- /docker/base-images/python-deps/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG BASE_TAG=latest 2 | FROM browseruse/base-chromium:${BASE_TAG} 3 | 4 | ENV PYTHONUNBUFFERED=1 PATH="/app/.venv/bin:$PATH" PLAYWRIGHT_BROWSERS_PATH=/opt/playwright 5 | 6 | WORKDIR /app 7 | COPY pyproject.toml uv.lock* ./ 8 | 9 | RUN --mount=type=cache,target=/root/.cache/uv,sharing=locked \ 10 | uv venv && \ 11 | uv sync --all-extras --no-dev --no-install-project --compile-bytecode 12 | -------------------------------------------------------------------------------- /docker/base-images/system/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.12-slim 2 | 3 | # Install minimal system dependencies 4 | RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ 5 | apt-get update && \ 6 | apt-get install -y --no-install-recommends ca-certificates curl wget && \ 7 | rm -rf /var/lib/apt/lists/* 8 | 9 | # Install uv package manager 10 | COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ 11 | -------------------------------------------------------------------------------- /docker/build-base-images.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Build script for browser-use base images 3 | set -euo pipefail 4 | 5 | # Configuration 6 | REGISTRY="${DOCKER_REGISTRY:-browseruse}" 7 | PLATFORMS="${PLATFORMS:-linux/amd64}" 8 | PUSH="${PUSH:-false}" 9 | 10 | # Build function 11 | build_image() { 12 | local name=$1 13 | local dockerfile=$2 14 | local build_args="${3:-}" 15 | 16 | echo "[INFO] Building ${name}..." 17 | 18 | local build_cmd="docker build" 19 | local tag_args="-t ${REGISTRY}/${name}:latest -t ${REGISTRY}/${name}:$(date +%Y%m%d)" 20 | 21 | # Use buildx for multi-platform or push 22 | if [[ "$PLATFORMS" == *","* ]] || [ "$PUSH" = "true" ]; then 23 | build_cmd="docker buildx build --platform=$PLATFORMS" 24 | [ "$PUSH" = "true" ] && build_cmd="$build_cmd --push" || build_cmd="$build_cmd" 25 | fi 26 | 27 | $build_cmd $tag_args $build_args -f $dockerfile ../../.. 28 | } 29 | 30 | # Main 31 | cd "$(dirname "$0")" 32 | 33 | # Parse arguments 34 | while [[ $# -gt 0 ]]; do 35 | case $1 in 36 | --push) PUSH=true; shift ;; 37 | --registry) REGISTRY="$2"; shift 2 ;; 38 | --platforms) PLATFORMS="$2"; shift 2 ;; 39 | --help) 40 | echo "Usage: $0 [--push] [--registry REG] [--platforms P]" 41 | exit 0 ;; 42 | *) echo "Unknown option: $1"; exit 1 ;; 43 | esac 44 | done 45 | 46 | # Create buildx builder if needed 47 | if [[ "$PLATFORMS" == *","* ]] || [ "$PUSH" = "true" ]; then 48 | docker buildx inspect browseruse-builder >/dev/null 2>&1 || \ 49 | docker buildx create --name browseruse-builder --use 50 | docker buildx use browseruse-builder 51 | fi 52 | 53 | # Build images in order 54 | build_image "base-system" "base-images/system/Dockerfile" 55 | build_image "base-chromium" "base-images/chromium/Dockerfile" "--build-arg BASE_TAG=latest" 56 | build_image "base-python-deps" "base-images/python-deps/Dockerfile" "--build-arg BASE_TAG=latest" 57 | 58 | echo "[INFO] Build complete. Use: FROM ${REGISTRY}/base-python-deps:latest" 59 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Docs 2 | 3 | The official documentation for Browser Use. The docs are published to [Browser Use Docs](https://docs.browser-use.com). 4 | 5 | ### Development 6 | 7 | Install the [Mintlify CLI](https://www.npmjs.com/package/mintlify) to preview the documentation changes locally. To install, use the following command 8 | 9 | ``` 10 | npm i -g mintlify 11 | ``` 12 | 13 | Run the following command at the root of your documentation (where mint.json is) 14 | 15 | ``` 16 | mintlify dev 17 | ``` 18 | -------------------------------------------------------------------------------- /docs/api-reference/check-balance.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Check Balance" 3 | api: "GET /api/v1/balance" 4 | description: "Returns the user's current API credit balance" 5 | --- 6 | 7 | Returns the user's current API credit balance, which includes both monthly subscription credits and any additional purchased credits. 8 | 9 | ## Response 10 | 11 | 12 | The current number of API credits available, with the value in cents (0.01 USD 13 | = 1 credit). 14 | 15 | 16 | 17 | 18 | ```python python 19 | import requests 20 | 21 | API_KEY = 'your_api_key_here' 22 | BASE_URL = 'https://api.browser-use.com/api/v1' 23 | HEADERS = {'Authorization': f'Bearer {API_KEY}'} 24 | 25 | response = requests.get(f'{BASE_URL}/balance', headers=HEADERS) 26 | balance = response.json()['balance'] 27 | print(f"Current API credit balance: {balance}") 28 | ``` 29 | 30 | ```bash curl 31 | curl --request GET \ 32 | --url https://api.browser-use.com/api/v1/balance \ 33 | --header 'Authorization: Bearer ' 34 | ``` 35 | 36 | 37 | 38 | 39 | 40 | ```json 200 41 | { 42 | "balance": "5000" 43 | } 44 | ``` 45 | 46 | 47 | 48 | ## API Credit Usage 49 | 50 | Each task execution consumes API credits based on the following factors: 51 | 52 | 1. **Task Duration**: Longer running tasks consume more credits 53 | 2. **LLM Model**: More powerful models consume more credits 54 | 3. **Browser Features**: Features like proxy usage and adblock may affect credit consumption 55 | 4. **Task Complexity**: More complex tasks with many steps consume more credits 56 | 57 | You can monitor your credit usage through the [Browser Use Cloud dashboard](https://cloud.browser-use.com/dashboard) or by using the Check Balance endpoint. 58 | 59 | 60 | If your balance reaches zero, new task executions will be rejected until you 61 | add more credits or your subscription renews. 62 | {" "} 63 | -------------------------------------------------------------------------------- /docs/api-reference/delete-browser-profile.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Delete Browser Profile" 3 | api: "DELETE /api/v1/browser-profiles/{profile_id}" 4 | description: "Deletes a browser profile. This will remove the profile and all associated browser data." 5 | --- 6 | 7 | Deletes a browser profile. This will remove the profile and all associated browser data. This action cannot be undone! 8 | 9 | ### Path Parameters 10 | 11 | 12 | ID of the browser profile to delete 13 | 14 | 15 | ### Response 16 | 17 | A successful deletion returns an empty object. 18 | 19 | 20 | ```python python 21 | import requests 22 | 23 | API_KEY = 'your_api_key_here' 24 | BASE_URL = 'https://api.browser-use.com/api/v1' 25 | HEADERS = {'Authorization': f'Bearer {API_KEY}'} 26 | 27 | profile_id = 'profile_1234567890abcdef' 28 | response = requests.delete(f'{BASE_URL}/browser-profiles/{profile_id}', headers=HEADERS) 29 | print(response.json()) 30 | 31 | ```` 32 | 33 | ```bash curl 34 | curl --request DELETE \ 35 | --url https://api.browser-use.com/api/v1/browser-profiles/profile_1234567890abcdef \ 36 | --header 'Authorization: Bearer ' 37 | ```` 38 | 39 | 40 | 41 | 42 | ```json 200 43 | {} 44 | ``` 45 | 46 | -------------------------------------------------------------------------------- /docs/api-reference/delete-scheduled-task.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Delete Scheduled Task" 3 | api: "DELETE /api/v1/scheduled-task/{task_id}" 4 | description: "Deletes a scheduled task" 5 | --- 6 | 7 | Deletes a scheduled task. This will prevent any future runs of this task. Any currently running instances of this task will be allowed to complete. 8 | 9 | ## Path Parameters 10 | 11 | 12 | ID of the scheduled task to delete 13 | 14 | 15 | ## Response 16 | 17 | The endpoint returns an empty response body with a 200 status code on success. 18 | 19 | 20 | 21 | ```python python 22 | import requests 23 | 24 | API_KEY = 'your_api_key_here' 25 | BASE_URL = 'https://api.browser-use.com/api/v1' 26 | HEADERS = {'Authorization': f'Bearer {API_KEY}'} 27 | 28 | task_id = 'scheduled_task_1234567890abcdef' 29 | 30 | response = requests.delete(f'{BASE_URL}/scheduled-task/{task_id}', headers=HEADERS) 31 | 32 | if response.status_code == 200: 33 | print("Scheduled task deleted successfully") 34 | else: 35 | print(f"Error deleting scheduled task: {response.status_code}") 36 | ``` 37 | 38 | ```bash curl 39 | curl --request DELETE \ 40 | --url https://api.browser-use.com/api/v1/scheduled-task/{task_id} \ 41 | --header 'Authorization: Bearer ' 42 | ``` 43 | 44 | 45 | 46 | 47 | 48 | ```json 200 49 | {} 50 | ``` 51 | 52 | ```json 404 53 | { 54 | "detail": "Scheduled task not found" 55 | } 56 | ``` 57 | 58 | ```json 422 59 | { 60 | "detail": [ 61 | { 62 | "loc": ["path", "task_id"], 63 | "msg": "field required", 64 | "type": "value_error.missing" 65 | } 66 | ] 67 | } 68 | ``` 69 | 70 | 71 | 72 | ## Usage Notes 73 | 74 | - Deletion is permanent and cannot be undone 75 | - Any currently running instances of this task will be allowed to complete 76 | - Future scheduled runs will be prevented 77 | - The task will be removed from the scheduled tasks list immediately 78 | 79 | 80 | Deleting a scheduled task is irreversible. Make sure you want to permanently remove the task before proceeding. 81 | 82 | -------------------------------------------------------------------------------- /docs/api-reference/get-browser-profile.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Get Browser Profile" 3 | api: "GET /api/v1/browser-profiles/{profile_id}" 4 | description: "Returns information about a specific browser profile and its configuration settings." 5 | --- 6 | 7 | Returns information about a specific browser profile and its configuration settings. 8 | 9 | ### Path Parameters 10 | 11 | 12 | ID of the browser profile to retrieve 13 | 14 | 15 | ### Response 16 | 17 | 18 | Unique identifier for the browser profile 19 | 20 | 21 | Name of the browser profile 22 | 23 | 24 | Description of the profile 25 | 26 | 27 | Save cookies, local storage, and session data between tasks 28 | 29 | 30 | Block ads and popups during automated tasks 31 | 32 | 33 | Route traffic through mobile proxies for better stealth 34 | 35 | 36 | Country code for the proxy 37 | 38 | 39 | Browser viewport width in pixels 40 | 41 | 42 | Browser viewport height in pixels 43 | 44 | 45 | 46 | ```python python 47 | import requests 48 | 49 | API_KEY = 'your_api_key_here' 50 | BASE_URL = 'https://api.browser-use.com/api/v1' 51 | HEADERS = {'Authorization': f'Bearer {API_KEY}'} 52 | 53 | profile_id = 'profile_1234567890abcdef' 54 | response = requests.get(f'{BASE_URL}/browser-profiles/{profile_id}', headers=HEADERS) 55 | profile = response.json() 56 | print(profile) 57 | 58 | ```` 59 | 60 | ```bash curl 61 | curl --request GET \ 62 | --url https://api.browser-use.com/api/v1/browser-profiles/profile_1234567890abcdef \ 63 | --header 'Authorization: Bearer ' 64 | ```` 65 | 66 | 67 | 68 | 69 | ```json 200 70 | { 71 | "profile_id": "profile_1234567890abcdef", 72 | "profile_name": "Default Profile", 73 | "description": "Main automation profile", 74 | "persist": true, 75 | "ad_blocker": true, 76 | "proxy": true, 77 | "proxy_country_code": "US", 78 | "browser_viewport_width": 1280, 79 | "browser_viewport_height": 960 80 | } 81 | ``` 82 | 83 | -------------------------------------------------------------------------------- /docs/api-reference/get-task-media.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Get Task Media" 3 | api: "GET /api/v1/task/{task_id}/media" 4 | description: "Get media files generated during task execution" 5 | --- 6 | 7 | Returns links to any recordings or media generated during task execution, such as browser session recordings. Only available for completed tasks. 8 | 9 | 10 | ID of the task to retrieve media for 11 | 12 | 13 | 14 | List of recording URLs generated during task execution 15 | 16 | 17 | 18 | ```python 19 | import requests 20 | 21 | API_KEY = 'your_api_key_here' 22 | BASE_URL = 'https://api.browser-use.com/api/v1' 23 | HEADERS = {'Authorization': f'Bearer {API_KEY}'} 24 | 25 | task_id = 'task_1234567890abcdef' 26 | response = requests.get(f'{BASE_URL}/task/{task_id}/media', headers=HEADERS) 27 | media = response.json() 28 | print(f"Found {len(media['recordings'])} recordings") 29 | ``` 30 | 31 | 32 | 33 | ```json 34 | { 35 | "recordings": [ 36 | "https://media.browser-use.com/recordings/task_1234567890abcdef/session.mp4", 37 | "https://media.browser-use.com/recordings/task_1234567890abcdef/screen.webm" 38 | ] 39 | } 40 | ``` 41 | 42 | 43 | ## Media Types 44 | 45 | The following types of media files may be generated: 46 | 47 | - **Session recordings**: Full browser session recordings in MP4 format 48 | - **Screen recordings**: Screen capture videos in WebM format 49 | - **Audio recordings**: Audio tracks if microphone access was used 50 | 51 | ## Availability 52 | 53 | - Media files are only available for completed tasks 54 | - Recordings are generated automatically during task execution 55 | - Files are available for download for 30 days after task completion 56 | - Media generation can be disabled in task settings to save storage 57 | 58 | 59 | Media files are only generated for tasks that have been configured to record sessions. This feature may not be available for all task types. 60 | 61 | -------------------------------------------------------------------------------- /docs/api-reference/get-task-output-file.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Get Task Output File" 3 | api: "GET /api/v1/task/{task_id}/output-file/{file_name}" 4 | description: "Returns a presigned URL for downloading a file from the task output files" 5 | --- 6 | 7 | Returns a presigned URL for downloading a file from the task output files. This endpoint is useful for retrieving files that were generated or modified during task execution. 8 | 9 | ## Path Parameters 10 | 11 | 12 | ID of the task 13 | 14 | 15 | Name of the output file 16 | 17 | 18 | ## Response 19 | 20 | 21 | A presigned URL for downloading the file. 22 | 23 | 24 | 25 | 26 | ```python python 27 | import requests 28 | 29 | API_KEY = 'your_api_key_here' 30 | BASE_URL = 'https://api.browser-use.com/api/v1' 31 | HEADERS = {'Authorization': f'Bearer {API_KEY}'} 32 | 33 | task_id = 'task_1234567890abcdef' 34 | file_name = 'results.csv' 35 | 36 | response = requests.get(f'{BASE_URL}/task/{task_id}/output-file/{file_name}', headers=HEADERS) 37 | download_url = response.json()['download_url'] 38 | 39 | # Download the file 40 | file_response = requests.get(download_url) 41 | with open('downloaded_results.csv', 'wb') as file: 42 | file.write(file_response.content) 43 | 44 | print("File downloaded successfully") 45 | ``` 46 | 47 | ```bash curl 48 | curl --request GET \ 49 | --url https://api.browser-use.com/api/v1/task/{task_id}/output-file/{file_name} \ 50 | --header 'Authorization: Bearer ' 51 | ``` 52 | 53 | 54 | 55 | 56 | 57 | ```json 200 58 | { 59 | "download_url": "https://storage.browser-use.com/output-files/task_1234567890abcdef/results.csv?signature=..." 60 | } 61 | ``` 62 | 63 | 64 | -------------------------------------------------------------------------------- /docs/api-reference/get-task-screenshots.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Get Task Screenshots" 3 | api: "GET /api/v1/task/{task_id}/screenshots" 4 | description: "Get screenshots generated during task execution" 5 | --- 6 | 7 | Returns any screenshot URLs generated during task execution. Screenshots are automatically captured at key moments during the automation process. 8 | 9 | 10 | ID of the task to retrieve screenshots for 11 | 12 | 13 | 14 | List of screenshot URLs generated during task execution 15 | 16 | 17 | 18 | ```python 19 | import requests 20 | 21 | API_KEY = 'your_api_key_here' 22 | BASE_URL = 'https://api.browser-use.com/api/v1' 23 | HEADERS = {'Authorization': f'Bearer {API_KEY}'} 24 | 25 | task_id = 'task_1234567890abcdef' 26 | response = requests.get(f'{BASE_URL}/task/{task_id}/screenshots', headers=HEADERS) 27 | screenshots = response.json() 28 | print(f"Found {len(screenshots['screenshots'])} screenshots") 29 | 30 | # Download the first screenshot 31 | if screenshots['screenshots']: 32 | screenshot_url = screenshots['screenshots'][0] 33 | img_response = requests.get(screenshot_url) 34 | with open('screenshot.png', 'wb') as f: 35 | f.write(img_response.content) 36 | ``` 37 | 38 | 39 | 40 | ```json 41 | { 42 | "screenshots": [ 43 | "https://media.browser-use.com/screenshots/task_1234567890abcdef/step_1.png", 44 | "https://media.browser-use.com/screenshots/task_1234567890abcdef/step_2.png", 45 | "https://media.browser-use.com/screenshots/task_1234567890abcdef/step_3.png" 46 | ] 47 | } 48 | ``` 49 | 50 | 51 | ## Screenshot Details 52 | 53 | Screenshots are captured automatically during task execution: 54 | 55 | - **Step-by-step captures**: Screenshots taken at each major step 56 | - **Error captures**: Screenshots captured when errors occur 57 | - **Final result**: Screenshot of the final state when task completes 58 | - **High resolution**: Screenshots are captured at full browser resolution 59 | 60 | ## File Format 61 | 62 | - All screenshots are saved in PNG format 63 | - Screenshots maintain the original browser viewport dimensions 64 | - File names include the task ID and step number for easy identification 65 | 66 | ## Availability 67 | 68 | - Screenshots are available immediately after capture 69 | - Files are stored for 30 days after task completion 70 | - Screenshots can be disabled in task settings to reduce storage usage 71 | 72 | 73 | Screenshots are automatically generated for most tasks unless specifically disabled. The number of screenshots depends on the task complexity and duration. 74 | 75 | -------------------------------------------------------------------------------- /docs/api-reference/get-task-status.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Get Task Status" 3 | api: "GET /api/v1/task/{task_id}/status" 4 | description: "Get the current status of a task" 5 | --- 6 | 7 | Returns just the current status of a task (created, running, finished, stopped, paused, or failed). This is more lightweight than the full task details endpoint. 8 | 9 | ## Path Parameters 10 | 11 | 12 | ID of the task to check status for 13 | 14 | 15 | ## Response 16 | 17 | The endpoint returns the status as a simple string value (not wrapped in an object). 18 | 19 | 20 | ```python 21 | import requests 22 | 23 | API_KEY = 'your_api_key_here' 24 | BASE_URL = 'https://api.browser-use.com/api/v1' 25 | HEADERS = {'Authorization': f'Bearer {API_KEY}'} 26 | 27 | task_id = 'task_1234567890abcdef' 28 | response = requests.get(f'{BASE_URL}/task/{task_id}/status', headers=HEADERS) 29 | status = response.json() 30 | print(f"Task status: {status}") 31 | ``` 32 | 33 | ```bash curl 34 | curl --request GET \ 35 | --url https://api.browser-use.com/api/v1/task/{task_id}/status \ 36 | --header 'Authorization: Bearer ' 37 | ``` 38 | 39 | 40 | 41 | ```json 200 42 | "finished" 43 | ``` 44 | 45 | ```json 404 46 | { 47 | "detail": "Task not found" 48 | } 49 | ``` 50 | 51 | ```json 422 52 | { 53 | "detail": [ 54 | { 55 | "loc": ["path", "task_id"], 56 | "msg": "field required", 57 | "type": "value_error.missing" 58 | } 59 | ] 60 | } 61 | ``` 62 | 63 | 64 | ## Status Values 65 | 66 | The status field can have one of the following values: 67 | 68 | - `created`: Task is initialized but not yet started 69 | - `running`: Task is currently executing 70 | - `finished`: Task has completed successfully 71 | - `stopped`: Task was manually stopped 72 | - `paused`: Task execution is temporarily paused 73 | - `failed`: Task encountered an error and could not complete 74 | 75 | ## Use Cases 76 | 77 | This endpoint is useful for: 78 | - Polling task status without retrieving full task details 79 | - Lightweight status checks in monitoring applications 80 | - Quick status verification before making other API calls 81 | - Building real-time dashboards with minimal data transfer 82 | 83 | 84 | Use this endpoint instead of the full task details endpoint when you only need to check the current status, as it's much faster and uses less bandwidth. 85 | 86 | -------------------------------------------------------------------------------- /docs/api-reference/pause-task.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Pause Task" 3 | api: "PUT /api/v1/pause-task" 4 | description: "Pauses execution of a running task" 5 | --- 6 | 7 | Pauses execution of a running task. The task can be resumed later using the `/resume-task` endpoint. Useful for manual intervention or inspection. 8 | 9 | ## Parameters 10 | 11 | 12 | ID of the task to pause 13 | 14 | 15 | ## Response 16 | 17 | The endpoint returns an empty response body with a 200 status code on success. 18 | 19 | 20 | 21 | ```python python 22 | import requests 23 | 24 | url = "https://api.browser-use.com/api/v1/pause-task" 25 | params = {"task_id": "task_1234567890abcdef"} 26 | headers = {"Authorization": "Bearer "} 27 | 28 | response = requests.request("PUT", url, headers=headers, params=params) 29 | 30 | print(response.text) 31 | ``` 32 | 33 | ```bash cURL 34 | curl --request PUT \ 35 | --url 'https://api.browser-use.com/api/v1/pause-task?task_id=task_1234567890abcdef' \ 36 | --header 'Authorization: Bearer ' 37 | ``` 38 | 39 | ```javascript javascript 40 | const options = {method: 'PUT', headers: {Authorization: 'Bearer '}}; 41 | 42 | fetch('https://api.browser-use.com/api/v1/pause-task?task_id=task_1234567890abcdef', options) 43 | .then(response => { 44 | if (response.ok) { 45 | console.log('Task paused successfully'); 46 | } else { 47 | return response.json().then(err => { throw err; }); 48 | } 49 | }) 50 | .catch(err => console.error(err)); 51 | ``` 52 | 53 | 54 | 55 | ```json 200 56 | {} 57 | ``` 58 | 59 | ```json 422 60 | { 61 | "detail": [ 62 | { 63 | "loc": [ 64 | "query", 65 | "task_id" 66 | ], 67 | "msg": "field required", 68 | "type": "value_error.missing" 69 | } 70 | ] 71 | } 72 | ``` 73 | 74 | 75 | ## Usage Notes 76 | 77 | - Paused tasks can be resumed using the `/resume-task` endpoint 78 | - The task status will change to "paused" 79 | - Browser automation will be temporarily halted 80 | - Useful for manual intervention or inspection during task execution 81 | 82 | 83 | Pausing is useful when you need to temporarily halt execution to inspect the current state or make manual adjustments before resuming. 84 | 85 | -------------------------------------------------------------------------------- /docs/api-reference/ping.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Ping" 3 | api: "GET /api/v1/ping" 4 | description: "Check if the server is running and responding" 5 | --- 6 | 7 | Use this endpoint to check if the server is running and responding. This is the only endpoint that doesn't require authentication. 8 | 9 | ## Response 10 | 11 | A successful response has a 200 status code with an empty JSON object. 12 | 13 | 14 | 15 | ```python python 16 | import requests 17 | 18 | BASE_URL = 'https://api.browser-use.com/api/v1' 19 | 20 | response = requests.get(f'{BASE_URL}/ping') 21 | if response.status_code == 200: 22 | print("Server is up and running") 23 | ``` 24 | 25 | ```bash curl 26 | curl --request GET \ 27 | --url https://api.browser-use.com/api/v1/ping 28 | ``` 29 | 30 | 31 | 32 | 33 | 34 | ```json 200 35 | {} 36 | ``` 37 | 38 | 39 | -------------------------------------------------------------------------------- /docs/api-reference/resume-task.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Resume Task" 3 | api: "PUT /api/v1/resume-task" 4 | description: "Resumes execution of a previously paused task" 5 | --- 6 | 7 | Resumes execution of a previously paused task. The task will continue from where it was paused. You can't resume a stopped task. 8 | 9 | ## Parameters 10 | 11 | 12 | ID of the task to resume 13 | 14 | 15 | ## Response 16 | 17 | The endpoint returns an empty response body with a 200 status code on success. 18 | 19 | 20 | 21 | ```python python 22 | import requests 23 | 24 | url = "https://api.browser-use.com/api/v1/resume-task" 25 | params = {"task_id": "task_1234567890abcdef"} 26 | headers = {"Authorization": "Bearer "} 27 | 28 | response = requests.request("PUT", url, headers=headers, params=params) 29 | 30 | print(response.text) 31 | ``` 32 | 33 | ```bash cURL 34 | curl --request PUT \ 35 | --url 'https://api.browser-use.com/api/v1/resume-task?task_id=task_1234567890abcdef' \ 36 | --header 'Authorization: Bearer ' 37 | ``` 38 | 39 | ```javascript javascript 40 | const options = {method: 'PUT', headers: {Authorization: 'Bearer '}}; 41 | 42 | fetch('https://api.browser-use.com/api/v1/resume-task?task_id=task_1234567890abcdef', options) 43 | .then(response => response.json()) 44 | .then(response => console.log(response)) 45 | .catch(err => console.error(err)); 46 | ``` 47 | 48 | 49 | 50 | ```json 200 51 | {} 52 | ``` 53 | 54 | ```json 422 55 | { 56 | "detail": [ 57 | { 58 | "loc": [ 59 | "query", 60 | "task_id" 61 | ], 62 | "msg": "field required", 63 | "type": "value_error.missing" 64 | } 65 | ] 66 | } 67 | ``` 68 | 69 | 70 | ## Usage Notes 71 | 72 | - Only paused tasks can be resumed 73 | - The task status will change from "paused" to "running" 74 | - Browser automation will continue from where it was paused 75 | - Stopped tasks cannot be resumed - you must create a new task instead 76 | 77 | 78 | You cannot resume a task that has been stopped. Only paused tasks can be resumed. 79 | 80 | -------------------------------------------------------------------------------- /docs/api-reference/stop-task.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Stop Task" 3 | api: "PUT /api/v1/stop-task" 4 | description: "Stops a running browser automation task immediately." 5 | --- 6 | 7 | Stops a running browser automation task immediately. The task cannot be resumed after being stopped. Use `/pause-task` endpoint instead if you want to temporarily halt execution. 8 | 9 | ## Parameters 10 | 11 | 12 | ID of the task to stop 13 | 14 | 15 | ## Response 16 | 17 | The endpoint returns an empty response body with a 200 status code on success. 18 | 19 | 20 | 21 | ```python python 22 | import requests 23 | 24 | url = "https://api.browser-use.com/api/v1/stop-task" 25 | params = {"task_id": "task_1234567890abcdef"} 26 | headers = {"Authorization": "Bearer "} 27 | 28 | response = requests.request("PUT", url, headers=headers, params=params) 29 | 30 | print(response.text) 31 | ``` 32 | 33 | ```bash cURL 34 | curl --request PUT \ 35 | --url 'https://api.browser-use.com/api/v1/stop-task?task_id=task_1234567890abcdef' \ 36 | --header 'Authorization: Bearer ' 37 | ``` 38 | 39 | ```javascript javascript 40 | const options = {method: 'PUT', headers: {Authorization: 'Bearer '}}; 41 | 42 | fetch('https://api.browser-use.com/api/v1/stop-task?task_id=task_1234567890abcdef', options) 43 | .then(response => response.json()) 44 | .then(response => console.log(response)) 45 | .catch(err => console.error(err)); 46 | ``` 47 | 48 | 49 | 50 | ```json 200 51 | {} 52 | ``` 53 | 54 | ```json 422 55 | { 56 | "detail": [ 57 | { 58 | "loc": [ 59 | "query", 60 | "task_id" 61 | ], 62 | "msg": "field required", 63 | "type": "value_error.missing" 64 | } 65 | ] 66 | } 67 | ``` 68 | 69 | 70 | ## Usage Notes 71 | 72 | - Once a task is stopped, it cannot be resumed 73 | - The task status will change to "stopped" 74 | - Any ongoing browser automation will be immediately terminated 75 | - Use the pause endpoint if you need to temporarily halt execution with the ability to resume later 76 | 77 | 78 | Stopping a task is irreversible. If you need to pause execution temporarily, use the `/pause-task` endpoint instead. 79 | 80 | -------------------------------------------------------------------------------- /docs/api-reference/user.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Me" 3 | api: "GET /api/v1/me" 4 | description: "Returns a boolean value indicating if the API key is valid and the user is authenticated" 5 | --- 6 | 7 | Returns a boolean value indicating if the API key is valid and the user is authenticated. 8 | 9 | ## Response 10 | 11 | The endpoint returns a boolean value directly (not wrapped in an object): 12 | - `true` if the API key is valid and the user is authenticated 13 | - `false` if the API key is invalid or the user is not authenticated 14 | 15 | 16 | 17 | ```python python 18 | import requests 19 | 20 | API_KEY = 'your_api_key_here' 21 | BASE_URL = 'https://api.browser-use.com/api/v1' 22 | HEADERS = {'Authorization': f'Bearer {API_KEY}'} 23 | 24 | response = requests.get(f'{BASE_URL}/me', headers=HEADERS) 25 | is_authenticated = response.json() 26 | if is_authenticated: 27 | print("API key is valid") 28 | else: 29 | print("API key is invalid") 30 | ``` 31 | 32 | ```bash curl 33 | curl --request GET \ 34 | --url https://api.browser-use.com/api/v1/me \ 35 | --header 'Authorization: Bearer ' 36 | ``` 37 | 38 | 39 | 40 | 41 | 42 | ```json 200 43 | true 44 | ``` 45 | 46 | ```json 401 47 | false 48 | ``` 49 | 50 | 51 | 52 | ## Usage Notes 53 | 54 | - This endpoint is useful for validating API keys before making other API calls 55 | - Unlike other endpoints, this returns a simple boolean value rather than an object 56 | - A `true` response confirms both authentication and authorization 57 | - This endpoint can be used for health checks of your API integration 58 | 59 | 60 | Use this endpoint to verify your API key is working correctly before making other API calls, especially in automated systems. 61 | 62 | -------------------------------------------------------------------------------- /docs/cloud/authentication.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Authentication" 3 | description: "Learn how to authenticate with the Browser Use Cloud API" 4 | icon: "lock" 5 | --- 6 | 7 | The Browser Use Cloud API uses API keys to authenticate requests. You can obtain an API key from your [Browser Use Cloud dashboard](https://cloud.browser-use.com/settings/api-keys). 8 | 9 | ## API Keys 10 | 11 | All API requests must include your API key in the `Authorization` header: 12 | 13 | ```bash 14 | Authorization: Bearer YOUR_API_KEY 15 | ``` 16 | 17 | Keep your API keys secure and do not share them in publicly accessible areas such as GitHub, client-side code, or in your browser's developer tools. API keys should be stored securely in environment variables or a secure key management system. 18 | 19 | ## Example Request 20 | 21 | Here's an example of how to include your API key in a request using Python: 22 | 23 | ```python 24 | import requests 25 | 26 | API_KEY = 'your_api_key_here' 27 | BASE_URL = 'https://api.browser-use.com/api/v1' 28 | HEADERS = {'Authorization': f'Bearer {API_KEY}'} 29 | 30 | response = requests.get(f'{BASE_URL}/me', headers=HEADERS) 31 | print(response.json()) 32 | ``` 33 | 34 | ## Verifying Authentication 35 | 36 | You can verify that your API key is valid by making a request to the `/api/v1/me` endpoint. See the [Me endpoint documentation](../api-reference/user) for more details. 37 | 38 | ## API Key Security 39 | 40 | To ensure the security of your API keys: 41 | 42 | 1. **Never share your API key** in publicly accessible areas 43 | 2. **Rotate your API keys** periodically 44 | 3. **Use environment variables** to store API keys in your applications 45 | 4. **Implement proper access controls** for your API keys 46 | 5. **Monitor API key usage** for suspicious activity 47 | 48 | If you believe your API key has been compromised, you should immediately revoke it and generate a new one from your Browser Use Cloud dashboard. 49 | -------------------------------------------------------------------------------- /docs/cloud/custom-sdk.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Cloud SDK" 3 | description: "Learn how to set up your own Browser Use Cloud SDK" 4 | icon: "code" 5 | --- 6 | 7 | This guide walks you through setting up your own Browser Use Cloud SDK. 8 | 9 | ## Building your own client (OpenAPI) 10 | 11 | 12 | This approach is recommended **only** if you need to run simple tasks and 13 | **don’t require fine-grained control**. 14 | 15 | 16 | The best way to build your own client is to use our [OpenAPI specification](http://api.browser-use.com/openapi.json) to generate a type-safe client library. 17 | 18 | ### Python 19 | 20 | Use [openapi-python-client](https://github.com/openapi-generators/openapi-python-client) to generate a modern Python client: 21 | 22 | ```bash 23 | # Install the generator 24 | pipx install openapi-python-client --include-deps 25 | 26 | # Generate the client 27 | openapi-python-client generate --url http://api.browser-use.com/openapi.json 28 | ``` 29 | 30 | This will create a Python package with full type hints, modern dataclasses, and async support. 31 | 32 | ### TypeScript/JavaScript 33 | 34 | Use [OpenAPI TS](https://openapi-ts.dev/) library to generate a type safe TypeScript client for the Browser Use API. 35 | 36 | The following guide shows how to create a simple type-safe `fetch` client, but you can also use other generators. 37 | 38 | - React Query - https://openapi-ts.dev/openapi-react-query/ 39 | - SWR - https://openapi-ts.dev/swr-openapi/ 40 | 41 | 42 | 43 | ```bash npm 44 | npm install openapi-fetch 45 | npm install -D openapi-typescript typescript 46 | ``` 47 | 48 | ```bash yarn 49 | yarn add openapi-fetch 50 | yarn add -D openapi-typescript typescript 51 | ``` 52 | ```bash pnpm 53 | pnpm add openapi-fetch 54 | pnpm add -D openapi-typescript typescript 55 | ``` 56 | 57 | 58 | ```json title="package.json" 59 | { 60 | "scripts": { 61 | "openapi:gen": "openapi-typescript https://api.browser-use.com/openapi.json -o ./src/lib/api/v1.d.ts" 62 | } 63 | } 64 | ``` 65 | 66 | ```bash 67 | pnpm openapi:gen 68 | ``` 69 | 70 | ```ts 71 | // client.ts 72 | 73 | 'use client' 74 | 75 | import createClient from 'openapi-fetch' 76 | import { paths } from '@/lib/api/v1' 77 | 78 | export type Client = ReturnType> 79 | 80 | export const client = createClient({ 81 | baseUrl: 'https://api.browser-use.com/', 82 | 83 | // NOTE: You can get your API key from https://cloud.browser-use.com/billing! 84 | headers: { Authorization: `Bearer ${apiKey}` }, 85 | }) 86 | 87 | ``` 88 | 89 | 90 | Need help? Contact our support team at support@browser-use.com or join our 91 | [Discord community](https://link.browser-use.com/discord) 92 | 93 | -------------------------------------------------------------------------------- /docs/customize/output-format.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Output Format" 3 | description: "The default is text. But you can define a structured output format to make post-processing easier." 4 | icon: "code" 5 | --- 6 | 7 | ## Custom output format 8 | With [this example](https://github.com/browser-use/browser-use/blob/main/examples/features/custom_output.py) you can define what output format the agent should return to you. 9 | 10 | ```python 11 | from pydantic import BaseModel 12 | # Define the output format as a Pydantic model 13 | class Post(BaseModel): 14 | post_title: str 15 | post_url: str 16 | num_comments: int 17 | hours_since_post: int 18 | 19 | 20 | class Posts(BaseModel): 21 | posts: List[Post] 22 | 23 | 24 | controller = Controller(output_model=Posts) 25 | 26 | 27 | async def main(): 28 | task = 'Go to hackernews show hn and give me the first 5 posts' 29 | model = ChatOpenAI(model='gpt-4o') 30 | agent = Agent(task=task, llm=model, controller=controller) 31 | 32 | history = await agent.run() 33 | 34 | result = history.final_result() 35 | if result: 36 | parsed: Posts = Posts.model_validate_json(result) 37 | 38 | for post in parsed.posts: 39 | print('\n--------------------------------') 40 | print(f'Title: {post.post_title}') 41 | print(f'URL: {post.post_url}') 42 | print(f'Comments: {post.num_comments}') 43 | print(f'Hours since post: {post.hours_since_post}') 44 | else: 45 | print('No result') 46 | 47 | 48 | if __name__ == '__main__': 49 | asyncio.run(main()) 50 | ``` 51 | -------------------------------------------------------------------------------- /docs/customize/system-prompt.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "System Prompt" 3 | description: "Customize the system prompt to control agent behavior and capabilities" 4 | icon: "message" 5 | --- 6 | 7 | ## Overview 8 | 9 | You can customize the system prompt in two ways: 10 | 11 | 1. Extend the default system prompt with additional instructions 12 | 2. Override the default system prompt entirely 13 | 14 | 15 | Custom system prompts allow you to modify the agent's behavior at a 16 | fundamental level. Use this feature carefully as it can significantly impact 17 | the agent's performance and reliability. 18 | 19 | 20 | ### Extend System Prompt (recommended) 21 | 22 | To add additional instructions to the default system prompt: 23 | 24 | ```python 25 | extend_system_message = """ 26 | REMEMBER the most important RULE: 27 | ALWAYS open first a new tab and go first to url wikipedia.com no matter the task!!! 28 | """ 29 | ``` 30 | 31 | ### Override System Prompt 32 | 33 | 34 | Not recommended! If you must override the [default system 35 | prompt](https://github.com/browser-use/browser-use/blob/main/browser_use/agent/system_prompt.md), 36 | make sure to test the agent yourself. 37 | 38 | 39 | Anyway, to override the default system prompt: 40 | 41 | ```python 42 | # Define your complete custom prompt 43 | override_system_message = """ 44 | You are an AI agent that helps users with web browsing tasks. 45 | 46 | [Your complete custom instructions here...] 47 | """ 48 | 49 | # Create agent with custom system prompt 50 | agent = Agent( 51 | task="Your task here", 52 | llm=ChatOpenAI(model='gpt-4'), 53 | override_system_message=override_system_message 54 | ) 55 | ``` 56 | 57 | ### Extend Planner System Prompt 58 | 59 | You can customize the behavior of the planning agent by extending its system prompt: 60 | 61 | ```python 62 | extend_planner_system_message = """ 63 | PRIORITIZE gathering information before taking any action. 64 | Always suggest exploring multiple options before making a decision. 65 | """ 66 | 67 | # Create agent with extended planner system prompt 68 | llm = ChatOpenAI(model='gpt-4o') 69 | planner_llm = ChatOpenAI(model='gpt-4o-mini') 70 | 71 | agent = Agent( 72 | task="Your task here", 73 | llm=llm, 74 | planner_llm=planner_llm, 75 | extend_planner_system_message=extend_planner_system_message 76 | ) 77 | ``` 78 | -------------------------------------------------------------------------------- /docs/development/evaluations.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Evaluations" 3 | description: "Test the Browser Use agent on standardized benchmarks" 4 | icon: "chart-bar" 5 | --- 6 | 7 | ## Prerequisites 8 | 9 | Browser Use uses proprietary/private test sets that must never be committed to Github and must be fetched through a authorized api request. 10 | Accessing these test sets requires an approved Browser Use account. 11 | There are currently no publicly available test sets, but some may be released in the future. 12 | 13 | ## Get an Api Access Key 14 | 15 | First, navigate to https://browser-use.tools and log in with an authorized browser use account. 16 | 17 | Then, click the "Account" button at the top right of the page, and click the "Cycle New Key" button on that page. 18 | 19 | Copy the resulting url and secret key into your `.env` file. It should look like this: 20 | 21 | ```bash .env 22 | EVALUATION_TOOL_URL= ... 23 | EVALUATION_TOOL_SECRET_KEY= ... 24 | ``` 25 | 26 | ## Running Evaluations 27 | 28 | First, ensure your file `eval/service.py` is up to date. 29 | 30 | Then run the file: 31 | 32 | ```bash 33 | python eval/service.py 34 | ``` 35 | 36 | ## Configuring Evaluations 37 | 38 | You can modify the evaluation by providing flags to the evaluation script. For instance: 39 | 40 | ```bash 41 | python eval/service.py --parallel_runs 5 --parallel_evaluations 5 --max-steps 25 --start 0 --end 100 --model gpt-4o 42 | ``` 43 | 44 | The evaluations webpage has a convenient GUI for generating these commands. To use it, navigate to https://browser-use.tools/dashboard. 45 | 46 | Then click the button "New Eval Run" on the left panel. This will open a interface with selectors, inputs, sliders, and switches. 47 | 48 | Input your desired configuration into the interface and copy the resulting python command at the bottom. Then run this command as before. 49 | -------------------------------------------------------------------------------- /docs/development/observability.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Observability" 3 | description: "Trace Browser Use's agent execution steps and browser sessions" 4 | icon: "eye" 5 | --- 6 | 7 | ## Overview 8 | 9 | Browser Use has a native integration with [Laminar](https://lmnr.ai) - open-source platform for tracing, evals and labeling of AI agents. 10 | Read more about Laminar in the [Laminar docs](https://docs.lmnr.ai). 11 | 12 | 13 | Laminar excels at tracing browser agents by providing unified visibility into 14 | both browser session recordings and agent execution steps. 15 | 16 | 17 | ## Setup 18 | 19 | To setup Laminar, you need to install the `lmnr` package and set the `LMNR_PROJECT_API_KEY` environment variable. 20 | 21 | To get your project API key, you can either: 22 | 23 | - Register on [Laminar Cloud](https://lmnr.ai) and get the key from your project settings 24 | - Or spin up a local Laminar instance and get the key from the settings page 25 | 26 | ```bash 27 | pip install 'lmnr[all]' 28 | export LMNR_PROJECT_API_KEY= 29 | ``` 30 | 31 | ## Usage 32 | 33 | Then, you simply initialize the Laminar at the top of your project and both Browser Use and session recordings will be automatically traced. 34 | 35 | ```python {5-8} 36 | from browser_use.llm import ChatOpenAI 37 | from browser_use import Agent 38 | import asyncio 39 | 40 | from lmnr import Laminar, Instruments 41 | # this line auto-instruments Browser Use and any browser you use (local or remote) 42 | Laminar.initialize(project_api_key="...", disable_batch=True, disabled_instruments={Instruments.BROWSER_USE}) # you can also pass project api key here 43 | 44 | async def main(): 45 | agent = Agent( 46 | task="open google, search Laminar AI", 47 | llm=ChatOpenAI(model="gpt-4.1-mini"), 48 | ) 49 | result = await agent.run() 50 | print(result) 51 | 52 | asyncio.run(main()) 53 | ``` 54 | 55 | ## Viewing Traces 56 | 57 | You can view traces in the Laminar UI by going to the traces tab in your project. 58 | When you select a trace, you can see both the browser session recording and the agent execution steps. 59 | 60 | Timeline of the browser session is synced with the agent execution steps, timeline highlights indicate the agent's current step synced with the browser session. 61 | In the trace view, you can also see the agent's current step, the tool it's using, and the tool's input and output. Tools are highlighted in the timeline with a yellow color. 62 | 63 | Laminar 64 | 65 | ## Laminar 66 | 67 | To learn more about tracing and evaluating your browser agents, check out the [Laminar docs](https://docs.lmnr.ai). 68 | -------------------------------------------------------------------------------- /docs/development/roadmap.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Roadmap" 3 | description: "Future plans and upcoming features for Browser Use" 4 | icon: "road" 5 | --- 6 | 7 | Big things coming soon! 8 | -------------------------------------------------------------------------------- /docs/development/telemetry.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Telemetry" 3 | description: "Understanding Browser Use's telemetry and privacy settings" 4 | icon: "chart-mixed" 5 | --- 6 | 7 | ## Overview 8 | 9 | Browser Use collects anonymous usage data to help us understand how the library is being used and to improve the user experience. It also helps us fix bugs faster and prioritize feature development. 10 | 11 | ## Data Collection 12 | 13 | We use [PostHog](https://posthog.com) for telemetry collection. The data is completely anonymized and contains no personally identifiable information. 14 | 15 | 16 | We never collect personal information, credentials, or specific content from 17 | your browser automation tasks. 18 | 19 | 20 | ## Opting Out 21 | 22 | You can disable telemetry by setting an environment variable: 23 | 24 | ```bash .env 25 | ANONYMIZED_TELEMETRY=false 26 | ``` 27 | 28 | Or in your Python code: 29 | 30 | ```python 31 | import os 32 | os.environ["ANONYMIZED_TELEMETRY"] = "false" 33 | ``` 34 | 35 | 36 | Even when enabled, telemetry has zero impact on the library's performance or 37 | functionality. Code is available in [Telemetry 38 | Service](https://github.com/browser-use/browser-use/tree/main/browser_use/telemetry). 39 | 40 | -------------------------------------------------------------------------------- /docs/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/browser-use/browser-use/9871be3363081dd88402dd7244f1997132929c4f/docs/favicon.ico -------------------------------------------------------------------------------- /docs/favicon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /docs/images/browser-use.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/browser-use/browser-use/9871be3363081dd88402dd7244f1997132929c4f/docs/images/browser-use.png -------------------------------------------------------------------------------- /docs/images/checks-passed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/browser-use/browser-use/9871be3363081dd88402dd7244f1997132929c4f/docs/images/checks-passed.png -------------------------------------------------------------------------------- /docs/images/laminar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/browser-use/browser-use/9871be3363081dd88402dd7244f1997132929c4f/docs/images/laminar.png -------------------------------------------------------------------------------- /docs/quickstart.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Quickstart" 3 | description: "Start using Browser Use with this quickstart guide" 4 | icon: "rocket" 5 | --- 6 | 7 | {/* You can install Browser Use from PyPI or clone it from Github. */} 8 | 9 | ## Prepare the environment 10 | 11 | Browser Use requires Python 3.11 or higher. 12 | 13 | First, we recommend using [uv](https://docs.astral.sh/uv/) to setup the Python environment. 14 | 15 | ```bash 16 | uv venv --python 3.11 17 | ``` 18 | 19 | and activate it with: 20 | 21 | ```bash 22 | # For Mac/Linux: 23 | source .venv/bin/activate 24 | 25 | # For Windows: 26 | .venv\Scripts\activate 27 | ``` 28 | 29 | Install the dependencies: 30 | 31 | ```bash 32 | uv pip install browser-use 33 | ``` 34 | 35 | Then install playwright: 36 | 37 | ```bash 38 | uv run playwright install 39 | ``` 40 | 41 | ## Create an agent 42 | 43 | Then you can use the agent as follows: 44 | 45 | ```python agent.py 46 | from browser_use.llm import ChatOpenAI 47 | from browser_use import Agent 48 | from dotenv import load_dotenv 49 | load_dotenv() 50 | 51 | import asyncio 52 | 53 | llm = ChatOpenAI(model="gpt-4.1") 54 | 55 | async def main(): 56 | agent = Agent( 57 | task="Compare the price of gpt-4o and DeepSeek-V3", 58 | llm=llm, 59 | ) 60 | result = await agent.run() 61 | print(result) 62 | 63 | asyncio.run(main()) 64 | ``` 65 | 66 | ## Set up your LLM API keys 67 | 68 | `ChatOpenAI` and other chat models require API keys. You should store these in your `.env` file. For example, for OpenAI and Anthropic, you can set the API keys in your `.env` file, such as: 69 | 70 | ```bash .env 71 | OPENAI_API_KEY= 72 | ANTHROPIC_API_KEY= 73 | ``` 74 | 75 | For other LLM models you can refer to the [Supported Models](/customize/supported-models) page to find how to set them up with their specific API keys. 76 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/browser-use/browser-use/9871be3363081dd88402dd7244f1997132929c4f/examples/__init__.py -------------------------------------------------------------------------------- /examples/browser/multiple_agents_same_browser.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import sys 4 | 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 6 | 7 | from dotenv import load_dotenv 8 | 9 | load_dotenv() 10 | 11 | 12 | from browser_use import Agent 13 | from browser_use.browser.profile import BrowserProfile 14 | from browser_use.browser.session import BrowserSession 15 | from browser_use.llm import ChatOpenAI 16 | 17 | 18 | async def main(): 19 | browser_session = BrowserSession( 20 | browser_profile=BrowserProfile( 21 | keep_alive=True, 22 | user_data_dir=None, 23 | headless=False, 24 | ) 25 | ) 26 | await browser_session.start() 27 | 28 | current_agent = None 29 | llm = ChatOpenAI(model='gpt-4.1') 30 | 31 | task1 = 'find todays weather on San Francisco and extract it as json' 32 | task2 = 'find todays weather in Zurich and extract it as json' 33 | 34 | agent1 = Agent( 35 | task=task1, 36 | browser_session=browser_session, 37 | llm=llm, 38 | ) 39 | agent2 = Agent( 40 | task=task2, 41 | browser_session=browser_session, 42 | llm=llm, 43 | ) 44 | 45 | await asyncio.gather(agent1.run(), agent2.run()) 46 | await browser_session.kill() 47 | 48 | 49 | asyncio.run(main()) 50 | -------------------------------------------------------------------------------- /examples/browser/real_browser.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import sys 4 | 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 6 | 7 | from dotenv import load_dotenv 8 | 9 | load_dotenv() 10 | 11 | from browser_use import Agent 12 | from browser_use.browser import BrowserProfile, BrowserSession 13 | from browser_use.llm import ChatOpenAI 14 | 15 | browser_profile = BrowserProfile( 16 | # NOTE: you need to close your chrome browser - so that this can open your browser in debug mode 17 | executable_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', 18 | user_data_dir='~/.config/browseruse/profiles/default', 19 | headless=False, 20 | ) 21 | browser_session = BrowserSession(browser_profile=browser_profile) 22 | 23 | 24 | async def main(): 25 | agent = Agent( 26 | task='Find todays DOW stock price', 27 | llm=ChatOpenAI(model='gpt-4.1'), 28 | browser_session=browser_session, 29 | ) 30 | 31 | await agent.run() 32 | await browser_session.close() 33 | 34 | input('Press Enter to close...') 35 | 36 | 37 | if __name__ == '__main__': 38 | asyncio.run(main()) 39 | -------------------------------------------------------------------------------- /examples/browser/using_cdp.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple demonstration of the CDP feature. 3 | 4 | To test this locally, follow these steps: 5 | 1. Create a shortcut for the executable Chrome file. 6 | 2. Add the following argument to the shortcut: 7 | - On Windows: `--remote-debugging-port=9222` 8 | 3. Open a web browser and navigate to `http://localhost:9222/json/version` to verify that the Remote Debugging Protocol (CDP) is running. 9 | 4. Launch this example. 10 | 11 | @dev You need to set the `GOOGLE_API_KEY` environment variable before proceeding. 12 | """ 13 | 14 | import asyncio 15 | import os 16 | import sys 17 | 18 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 19 | 20 | from dotenv import load_dotenv 21 | 22 | load_dotenv() 23 | 24 | 25 | from browser_use import Agent, Controller 26 | from browser_use.browser import BrowserProfile, BrowserSession 27 | from browser_use.llm import ChatGoogle 28 | 29 | api_key = os.getenv('GOOGLE_API_KEY') 30 | if not api_key: 31 | raise ValueError('GOOGLE_API_KEY is not set') 32 | 33 | browser_session = BrowserSession( 34 | browser_profile=BrowserProfile( 35 | headless=False, 36 | ), 37 | cdp_url='http://localhost:9222', 38 | ) 39 | controller = Controller() 40 | 41 | 42 | async def main(): 43 | task = 'In docs.google.com write my Papa a quick thank you for everything letter \n - Magnus' 44 | task += ' and save the document as pdf' 45 | # Assert api_key is not None to satisfy type checker 46 | assert api_key is not None, 'GOOGLE_API_KEY must be set' 47 | model = ChatGoogle(model='gemini-2.0-flash-exp', api_key=api_key) 48 | agent = Agent( 49 | task=task, 50 | llm=model, 51 | controller=controller, 52 | browser_session=browser_session, 53 | ) 54 | 55 | await agent.run() 56 | await browser_session.close() 57 | 58 | input('Press Enter to close...') 59 | 60 | 61 | if __name__ == '__main__': 62 | asyncio.run(main()) 63 | -------------------------------------------------------------------------------- /examples/custom-functions/2fa.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | import os 4 | import sys 5 | 6 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 7 | 8 | from dotenv import load_dotenv 9 | 10 | load_dotenv() 11 | 12 | import pyotp # type: ignore 13 | 14 | from browser_use import ActionResult, Agent, Controller 15 | from browser_use.llm import ChatOpenAI 16 | 17 | # Set up logging 18 | logging.basicConfig(level=logging.INFO) 19 | logger = logging.getLogger(__name__) 20 | 21 | 22 | controller = Controller() 23 | 24 | 25 | @controller.registry.action('Get 2FA code from when OTP is required') 26 | async def get_otp_2fa() -> ActionResult: 27 | """ 28 | Custom action to retrieve 2FA/MFA code from OTP secret key using pyotp. 29 | The OTP secret key should be set in the environment variable OTP_SECRET_KEY. 30 | """ 31 | secret_key = os.environ.get('OTP_SECRET_KEY') 32 | if not secret_key: 33 | raise ValueError('OTP_SECRET_KEY environment variable is not set') 34 | 35 | totp = pyotp.TOTP(secret_key, digits=6) 36 | code = totp.now() 37 | return ActionResult(extracted_content=code) 38 | 39 | 40 | async def main(): 41 | # Example task using the 1Password 2FA action 42 | task = """ 43 | Steps: 44 | 1. Go to https://authenticationtest.com/totpChallenge/ and try to log in. 45 | 2. If prompted for 2FA code: 46 | 2.1. Use the get_2fa_code action to retrieve the 2FA code. 47 | 2.2. Submit the code provided by the get_2fa_code action. 48 | 49 | Considerations: 50 | - ALWAYS use the get_2fa_code action to retrieve the 2FA code if needed. 51 | - NEVER skip the 2FA step if the page requires it. 52 | - NEVER extract the code from the page. 53 | - NEVER use a code that is not generated by the get_2fa_code action. 54 | - NEVER hallucinate the 2FA code, always use the get_2fa_code action to get it. 55 | 56 | You are completely FORBIDDEN to use any other method to get the 2FA code. 57 | """ 58 | 59 | model = ChatOpenAI(model='gpt-4.1') 60 | agent = Agent(task=task, llm=model, controller=controller) 61 | 62 | result = await agent.run() 63 | print(f'Task completed with result: {result}') 64 | 65 | 66 | if __name__ == '__main__': 67 | asyncio.run(main()) 68 | -------------------------------------------------------------------------------- /examples/custom-functions/clipboard.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import sys 4 | 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 6 | 7 | from dotenv import load_dotenv 8 | 9 | load_dotenv() 10 | 11 | import pyperclip 12 | 13 | from browser_use import Agent, Controller 14 | from browser_use.agent.views import ActionResult 15 | from browser_use.browser import BrowserProfile, BrowserSession 16 | from browser_use.browser.types import Page 17 | from browser_use.llm import ChatOpenAI 18 | 19 | browser_profile = BrowserProfile( 20 | headless=False, 21 | ) 22 | controller = Controller() 23 | 24 | 25 | @controller.registry.action('Copy text to clipboard') 26 | def copy_to_clipboard(text: str): 27 | pyperclip.copy(text) 28 | return ActionResult(extracted_content=text) 29 | 30 | 31 | @controller.registry.action('Paste text from clipboard') 32 | async def paste_from_clipboard(page: Page): 33 | text = pyperclip.paste() 34 | # send text to browser 35 | await page.keyboard.type(text) 36 | 37 | return ActionResult(extracted_content=text) 38 | 39 | 40 | async def main(): 41 | task = 'Copy the text "Hello, world!" to the clipboard, then go to google.com and paste the text' 42 | model = ChatOpenAI(model='gpt-4.1') 43 | browser_session = BrowserSession(browser_profile=browser_profile) 44 | await browser_session.start() 45 | agent = Agent( 46 | task=task, 47 | llm=model, 48 | controller=controller, 49 | browser_session=browser_session, 50 | ) 51 | 52 | await agent.run() 53 | await browser_session.stop() 54 | 55 | input('Press Enter to close...') 56 | 57 | 58 | if __name__ == '__main__': 59 | asyncio.run(main()) 60 | -------------------------------------------------------------------------------- /examples/custom-functions/notification.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import sys 4 | 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 6 | 7 | from dotenv import load_dotenv 8 | 9 | load_dotenv() 10 | 11 | from browser_use import ActionResult, Agent, Controller 12 | from browser_use.llm import ChatOpenAI 13 | 14 | controller = Controller() 15 | 16 | 17 | @controller.registry.action('Done with task ') 18 | async def done(text: str): 19 | import yagmail # type: ignore 20 | 21 | # To send emails use 22 | # STEP 1: go to https://support.google.com/accounts/answer/185833 23 | # STEP 2: Create an app password (you can't use here your normal gmail password) 24 | # STEP 3: Use the app password in the code below for the password 25 | yag = yagmail.SMTP('your_email@gmail.com', 'your_app_password') 26 | yag.send( 27 | to='recipient@example.com', 28 | subject='Test Email', 29 | contents=f'result\n: {text}', 30 | ) 31 | 32 | return ActionResult(is_done=True, extracted_content='Email sent!') 33 | 34 | 35 | async def main(): 36 | task = 'go to brower-use.com and then done' 37 | model = ChatOpenAI(model='gpt-4.1') 38 | agent = Agent(task=task, llm=model, controller=controller) 39 | 40 | await agent.run() 41 | 42 | 43 | if __name__ == '__main__': 44 | asyncio.run(main()) 45 | -------------------------------------------------------------------------------- /examples/custom-functions/onepassword_2fa.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | import os 4 | import sys 5 | 6 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 7 | 8 | from dotenv import load_dotenv 9 | 10 | load_dotenv() 11 | 12 | from onepassword.client import Client # type: ignore # pip install onepassword-sdk 13 | 14 | from browser_use import ActionResult, Agent, Controller 15 | from browser_use.llm import ChatOpenAI 16 | 17 | # Set up logging 18 | logging.basicConfig(level=logging.INFO) 19 | logger = logging.getLogger(__name__) 20 | 21 | OP_SERVICE_ACCOUNT_TOKEN = os.getenv('OP_SERVICE_ACCOUNT_TOKEN') 22 | OP_ITEM_ID = os.getenv('OP_ITEM_ID') # Go to 1Password, right click on the item, click "Copy Secret Reference" 23 | 24 | 25 | controller = Controller() 26 | 27 | 28 | @controller.registry.action('Get 2FA code from 1Password for Google Account', domains=['*.google.com', 'google.com']) 29 | async def get_1password_2fa() -> ActionResult: 30 | """ 31 | Custom action to retrieve 2FA/MFA code from 1Password using onepassword.client SDK. 32 | """ 33 | client = await Client.authenticate( 34 | # setup instructions: https://github.com/1Password/onepassword-sdk-python/#-get-started 35 | auth=OP_SERVICE_ACCOUNT_TOKEN, 36 | integration_name='Browser-Use', 37 | integration_version='v1.0.0', 38 | ) 39 | 40 | mfa_code = await client.secrets.resolve(f'op://Private/{OP_ITEM_ID}/One-time passcode') 41 | 42 | return ActionResult(extracted_content=mfa_code) 43 | 44 | 45 | async def main(): 46 | # Example task using the 1Password 2FA action 47 | task = 'Go to account.google.com, enter username and password, then if prompted for 2FA code, get 2FA code from 1Password for and enter it' 48 | 49 | model = ChatOpenAI(model='gpt-4.1') 50 | agent = Agent(task=task, llm=model, controller=controller) 51 | 52 | result = await agent.run() 53 | print(f'Task completed with result: {result}') 54 | 55 | 56 | if __name__ == '__main__': 57 | asyncio.run(main()) 58 | -------------------------------------------------------------------------------- /examples/custom-functions/save_pdf.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import re 4 | import sys 5 | from pathlib import Path 6 | 7 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 8 | 9 | from dotenv import load_dotenv 10 | 11 | load_dotenv() 12 | 13 | from browser_use import ActionResult, Agent, Controller 14 | from browser_use.browser.types import Page 15 | from browser_use.llm import ChatOpenAI 16 | 17 | # Initialize controller 18 | controller = Controller() 19 | 20 | download_path = Path.cwd() / 'downloads' 21 | download_path.mkdir(parents=True, exist_ok=True) 22 | 23 | 24 | # Save PDF - exact copy from original controller function 25 | @controller.registry.action('Save the current page as a PDF file') 26 | async def save_pdf(page: Page): 27 | short_url = re.sub(r'^https?://(?:www\.)?|/$', '', page.url) 28 | slug = re.sub(r'[^a-zA-Z0-9]+', '-', short_url).strip('-').lower() 29 | sanitized_filename = f'{slug}.pdf' 30 | 31 | await page.emulate_media(media='screen') 32 | await page.pdf(path=download_path / sanitized_filename, format='A4', print_background=False) 33 | msg = f'Saving page with URL {page.url} as PDF to {download_path / sanitized_filename}' 34 | return ActionResult(extracted_content=msg, include_in_memory=True, long_term_memory=f'Saved PDF to {sanitized_filename}') 35 | 36 | 37 | async def main(): 38 | """ 39 | Example task: Navigate to browser-use.com and save the page as a PDF 40 | """ 41 | task = """ 42 | Go to https://browser-use.com/ and save the page as a PDF file. 43 | """ 44 | 45 | # Initialize the language model 46 | model = ChatOpenAI(model='gpt-4.1-mini') 47 | 48 | # Create and run the agent 49 | agent = Agent(task=task, llm=model, controller=controller) 50 | 51 | result = await agent.run() 52 | print(f'🎯 Task completed: {result}') 53 | 54 | 55 | if __name__ == '__main__': 56 | asyncio.run(main()) 57 | -------------------------------------------------------------------------------- /examples/custom-functions/save_to_file_hugging_face.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import sys 4 | 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 6 | 7 | from dotenv import load_dotenv 8 | 9 | load_dotenv() 10 | 11 | from pydantic import BaseModel 12 | 13 | from browser_use.agent.service import Agent 14 | from browser_use.controller.service import Controller 15 | from browser_use.llm import ChatOpenAI 16 | 17 | # Initialize controller first 18 | controller = Controller() 19 | 20 | 21 | class Model(BaseModel): 22 | title: str 23 | url: str 24 | likes: int 25 | license: str 26 | 27 | 28 | class Models(BaseModel): 29 | models: list[Model] 30 | 31 | 32 | @controller.action('Save models', param_model=Models) 33 | def save_models(params: Models): 34 | with open('models.txt', 'a') as f: 35 | for model in params.models: 36 | f.write(f'{model.title} ({model.url}): {model.likes} likes, {model.license}\n') 37 | 38 | 39 | # video: https://preview.screen.studio/share/EtOhIk0P 40 | async def main(): 41 | task = 'Look up models with a license of cc-by-sa-4.0 and sort by most likes on Hugging face, save top 5 to file.' 42 | 43 | model = ChatOpenAI(model='gpt-4.1') 44 | agent = Agent(task=task, llm=model, controller=controller) 45 | 46 | await agent.run() 47 | 48 | 49 | if __name__ == '__main__': 50 | asyncio.run(main()) 51 | -------------------------------------------------------------------------------- /examples/custom-functions/solve_amazon_captcha.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import sys 4 | 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 6 | 7 | from dotenv import load_dotenv 8 | 9 | load_dotenv() 10 | 11 | from amazoncaptcha import AmazonCaptcha # type: ignore 12 | 13 | from browser_use import ActionResult 14 | from browser_use.agent.service import Agent 15 | from browser_use.browser import BrowserConfig, BrowserSession 16 | from browser_use.controller.service import Controller 17 | from browser_use.llm import ChatOpenAI 18 | 19 | browser_profile = BrowserConfig(headless=False) 20 | 21 | # Initialize controller first 22 | controller = Controller() 23 | 24 | 25 | @controller.action( 26 | 'Solve Amazon text based captcha', 27 | domains=[ 28 | '*.amazon.com', 29 | '*.amazon.co.uk', 30 | '*.amazon.ca', 31 | '*.amazon.de', 32 | '*.amazon.es', 33 | '*.amazon.fr', 34 | '*.amazon.it', 35 | '*.amazon.co.jp', 36 | '*.amazon.in', 37 | '*.amazon.cn', 38 | '*.amazon.com.sg', 39 | '*.amazon.com.mx', 40 | '*.amazon.ae', 41 | '*.amazon.com.br', 42 | '*.amazon.nl', 43 | '*.amazon.com.au', 44 | '*.amazon.com.tr', 45 | '*.amazon.sa', 46 | '*.amazon.se', 47 | '*.amazon.pl', 48 | ], 49 | ) 50 | async def solve_amazon_captcha(browser_session: BrowserSession): 51 | page = await browser_session.get_current_page() 52 | 53 | # Find the captcha image and extract its src 54 | captcha_img = page.locator('img[src*="amazon.com/captcha"]') 55 | link = await captcha_img.get_attribute('src') 56 | 57 | if not link: 58 | raise ValueError('Could not find captcha image on the page') 59 | 60 | captcha = AmazonCaptcha.fromlink(link) 61 | solution = captcha.solve() 62 | if not solution or solution == 'Not solved': 63 | raise ValueError('Captcha could not be solved') 64 | 65 | await page.locator('#captchacharacters').fill(solution) 66 | await page.locator('button[type="submit"]').click() 67 | 68 | return ActionResult(extracted_content=solution) 69 | 70 | 71 | async def main(): 72 | task = 'Go to https://www.amazon.com/errors/validateCaptcha and solve the captcha using the solve_amazon_captcha tool' 73 | 74 | model = ChatOpenAI(model='gpt-4.1') 75 | browser_session = BrowserSession(browser_profile=browser_profile) 76 | await browser_session.start() 77 | agent = Agent(task=task, llm=model, controller=controller, browser_session=browser_session) 78 | 79 | await agent.run() 80 | await browser_session.stop() 81 | 82 | input('Press Enter to close...') 83 | 84 | 85 | if __name__ == '__main__': 86 | asyncio.run(main()) 87 | -------------------------------------------------------------------------------- /examples/features/cross_origin_iframes.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example of how it supports cross-origin iframes. 3 | 4 | @dev You need to add OPENAI_API_KEY to your environment variables. 5 | """ 6 | 7 | import asyncio 8 | import os 9 | import sys 10 | 11 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 12 | 13 | from dotenv import load_dotenv 14 | 15 | load_dotenv() 16 | 17 | from browser_use import Agent, Controller 18 | from browser_use.browser import BrowserProfile, BrowserSession 19 | from browser_use.llm import ChatOpenAI 20 | 21 | if not os.getenv('OPENAI_API_KEY'): 22 | raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.') 23 | 24 | 25 | browser_profile = BrowserProfile( 26 | executable_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', 27 | ) 28 | browser_session = BrowserSession(browser_profile=browser_profile) 29 | controller = Controller() 30 | 31 | 32 | async def main(): 33 | agent = Agent( 34 | task='Click "Go cross-site (simple page)" button on https://csreis.github.io/tests/cross-site-iframe.html then tell me the text within', 35 | llm=ChatOpenAI(model='gpt-4.1', temperature=0.0), 36 | controller=controller, 37 | browser_session=browser_session, 38 | ) 39 | 40 | await agent.run() 41 | await browser_session.close() 42 | 43 | input('Press Enter to close...') 44 | 45 | 46 | if __name__ == '__main__': 47 | try: 48 | asyncio.run(main()) 49 | except Exception as e: 50 | print(e) 51 | -------------------------------------------------------------------------------- /examples/features/custom_output.py: -------------------------------------------------------------------------------- 1 | """ 2 | Show how to use custom outputs. 3 | 4 | @dev You need to add OPENAI_API_KEY to your environment variables. 5 | """ 6 | 7 | import asyncio 8 | import os 9 | import sys 10 | 11 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 12 | 13 | from dotenv import load_dotenv 14 | 15 | load_dotenv() 16 | 17 | from pydantic import BaseModel 18 | 19 | from browser_use import Agent, Controller 20 | from browser_use.llm import ChatOpenAI 21 | 22 | 23 | class Post(BaseModel): 24 | post_title: str 25 | post_url: str 26 | num_comments: int 27 | hours_since_post: int 28 | 29 | 30 | class Posts(BaseModel): 31 | posts: list[Post] 32 | 33 | 34 | controller = Controller(output_model=Posts) 35 | 36 | 37 | async def main(): 38 | task = 'Go to hackernews show hn and give me the first 5 posts' 39 | model = ChatOpenAI(model='gpt-4.1') 40 | agent = Agent(task=task, llm=model, controller=controller) 41 | 42 | history = await agent.run() 43 | 44 | result = history.final_result() 45 | if result: 46 | parsed: Posts = Posts.model_validate_json(result) 47 | 48 | for post in parsed.posts: 49 | print('\n--------------------------------') 50 | print(f'Title: {post.post_title}') 51 | print(f'URL: {post.post_url}') 52 | print(f'Comments: {post.num_comments}') 53 | print(f'Hours since post: {post.hours_since_post}') 54 | else: 55 | print('No result') 56 | 57 | 58 | if __name__ == '__main__': 59 | asyncio.run(main()) 60 | -------------------------------------------------------------------------------- /examples/features/custom_system_prompt.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | import os 4 | import sys 5 | 6 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 7 | 8 | from dotenv import load_dotenv 9 | 10 | load_dotenv() 11 | 12 | try: 13 | from lmnr import Laminar 14 | 15 | Laminar.initialize(project_api_key=os.getenv('LMNR_PROJECT_API_KEY')) 16 | except Exception as e: 17 | print(f'Error initializing Laminar: {e}') 18 | 19 | 20 | from browser_use import Agent 21 | from browser_use.llm import ChatOpenAI 22 | 23 | extend_system_message = ( 24 | 'REMEMBER the most important RULE: ALWAYS open first a new tab and go first to url wikipedia.com no matter the task!!!' 25 | ) 26 | 27 | # or use override_system_message to completely override the system prompt 28 | 29 | 30 | async def main(): 31 | task = 'do google search to find images of Elon Musk' 32 | model = ChatOpenAI(model='gpt-4.1') 33 | agent = Agent(task=task, llm=model, extend_system_message=extend_system_message) 34 | 35 | print( 36 | json.dumps( 37 | agent.message_manager.system_prompt.model_dump(exclude_unset=True), 38 | indent=4, 39 | ) 40 | ) 41 | 42 | await agent.run() 43 | 44 | 45 | if __name__ == '__main__': 46 | asyncio.run(main()) 47 | -------------------------------------------------------------------------------- /examples/features/custom_user_agent.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import asyncio 3 | import os 4 | import sys 5 | 6 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 7 | 8 | from dotenv import load_dotenv 9 | 10 | load_dotenv() 11 | 12 | from browser_use import Agent 13 | from browser_use.browser import BrowserProfile, BrowserSession 14 | from browser_use.controller.service import Controller 15 | from browser_use.llm import ChatAnthropic, ChatOpenAI 16 | 17 | 18 | def get_llm(provider: str): 19 | if provider == 'anthropic': 20 | return ChatAnthropic(model='claude-3-5-sonnet-20240620', temperature=0.0) 21 | elif provider == 'openai': 22 | return ChatOpenAI(model='gpt-4.1', temperature=0.0) 23 | 24 | else: 25 | raise ValueError(f'Unsupported provider: {provider}') 26 | 27 | 28 | # NOTE: This example is to find your current user agent string to use it in the browser_context 29 | task = 'go to https://whatismyuseragent.com and find the current user agent string ' 30 | 31 | 32 | controller = Controller() 33 | 34 | 35 | parser = argparse.ArgumentParser() 36 | parser.add_argument('--query', type=str, help='The query to process', default=task) 37 | parser.add_argument( 38 | '--provider', 39 | type=str, 40 | choices=['openai', 'anthropic'], 41 | default='openai', 42 | help='The model provider to use (default: openai)', 43 | ) 44 | 45 | args = parser.parse_args() 46 | 47 | llm = get_llm(args.provider) 48 | 49 | browser_session = BrowserSession( 50 | browser_profile=BrowserProfile( 51 | user_agent='foobarfoo', 52 | user_data_dir='~/.config/browseruse/profiles/default', 53 | ) 54 | ) 55 | 56 | agent = Agent( 57 | task=args.query, 58 | llm=llm, 59 | controller=controller, 60 | browser_session=browser_session, 61 | use_vision=True, 62 | max_actions_per_step=1, 63 | ) 64 | 65 | 66 | async def main(): 67 | await agent.run(max_steps=25) 68 | 69 | input('Press Enter to close the browser...') 70 | await browser_session.close() 71 | 72 | 73 | asyncio.run(main()) 74 | -------------------------------------------------------------------------------- /examples/features/download_file.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import sys 4 | 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 6 | 7 | from dotenv import load_dotenv 8 | 9 | load_dotenv() 10 | 11 | 12 | from browser_use import Agent 13 | from browser_use.browser import BrowserSession 14 | from browser_use.llm import ChatGoogle 15 | 16 | api_key = os.getenv('GOOGLE_API_KEY') 17 | if not api_key: 18 | raise ValueError('GOOGLE_API_KEY is not set') 19 | 20 | assert api_key is not None, 'GOOGLE_API_KEY must be set' 21 | llm = ChatGoogle(model='gemini-2.0-flash-exp', api_key=api_key) 22 | 23 | from browser_use.browser import BrowserProfile 24 | 25 | browser_session = BrowserSession( 26 | browser_profile=BrowserProfile( 27 | downloads_path='~/Downloads', 28 | user_data_dir='~/.config/browseruse/profiles/default', 29 | ) 30 | ) 31 | 32 | 33 | async def run_download(): 34 | agent = Agent( 35 | task='Go to "https://file-examples.com/" and download the smallest doc file.', 36 | llm=llm, 37 | max_actions_per_step=8, 38 | use_vision=True, 39 | browser_session=browser_session, 40 | ) 41 | await agent.run(max_steps=25) 42 | 43 | 44 | if __name__ == '__main__': 45 | asyncio.run(run_download()) 46 | -------------------------------------------------------------------------------- /examples/features/drag_drop.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import sys 4 | 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 6 | 7 | from dotenv import load_dotenv 8 | 9 | load_dotenv() 10 | 11 | 12 | from browser_use import Agent 13 | from browser_use.llm import ChatGoogle 14 | 15 | api_key = os.getenv('GOOGLE_API_KEY') 16 | if not api_key: 17 | raise ValueError('GOOGLE_API_KEY is not set') 18 | 19 | # API key is automatically set from the environment variable GOOGLE_API_KEY 20 | llm = ChatGoogle(model='gemini-2.0-flash-exp') 21 | 22 | 23 | task_1 = """ 24 | Navigate to: https://sortablejs.github.io/Sortable/. 25 | Then scroll down to the first examplw with title "Simple list example". 26 | Drag the element with name "item 1" to below the element with name "item 3". 27 | """ 28 | 29 | 30 | task_2 = """ 31 | Navigate to: https://excalidraw.com/. 32 | Click on the pencil icon (with index 40). 33 | Then draw a triangle in the canvas. 34 | Draw the triangle starting from coordinate (400,400). 35 | You can use the drag and drop action to draw the triangle. 36 | """ 37 | 38 | 39 | async def run_search(): 40 | agent = Agent( 41 | task=task_1, 42 | llm=llm, 43 | max_actions_per_step=1, 44 | use_vision=True, 45 | ) 46 | 47 | await agent.run(max_steps=25) 48 | 49 | 50 | if __name__ == '__main__': 51 | asyncio.run(run_search()) 52 | -------------------------------------------------------------------------------- /examples/features/follow_up_tasks.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import sys 4 | 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 6 | 7 | from dotenv import load_dotenv 8 | 9 | load_dotenv() 10 | 11 | from browser_use import Agent, Controller 12 | from browser_use.browser import BrowserProfile, BrowserSession 13 | from browser_use.llm import ChatOpenAI 14 | 15 | # Initialize the model 16 | llm = ChatOpenAI( 17 | model='gpt-4.1', 18 | temperature=0.0, 19 | ) 20 | # Get your chrome path 21 | browser_session = BrowserSession( 22 | browser_profile=BrowserProfile( 23 | executable_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', 24 | keep_alive=True, 25 | user_data_dir='~/.config/browseruse/profiles/default', 26 | ), 27 | ) 28 | 29 | controller = Controller() 30 | 31 | 32 | task = 'Find the founders of browser-use and draft them a short personalized message' 33 | 34 | agent = Agent(task=task, llm=llm, controller=controller, browser_session=browser_session) 35 | 36 | 37 | async def main(): 38 | await agent.run() 39 | 40 | # new_task = input('Type in a new task: ') 41 | new_task = 'Find an image of the founders' 42 | 43 | agent.add_new_task(new_task) 44 | 45 | await agent.run() 46 | 47 | 48 | if __name__ == '__main__': 49 | asyncio.run(main()) 50 | -------------------------------------------------------------------------------- /examples/features/initial_actions.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import sys 4 | 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 6 | 7 | from dotenv import load_dotenv 8 | 9 | load_dotenv() 10 | 11 | from browser_use import Agent 12 | from browser_use.llm import ChatOpenAI 13 | 14 | llm = ChatOpenAI(model='gpt-4.1') 15 | 16 | initial_actions = [ 17 | {'go_to_url': {'url': 'https://www.google.com', 'new_tab': True}}, 18 | {'go_to_url': {'url': 'https://en.wikipedia.org/wiki/Randomness', 'new_tab': True}}, 19 | {'scroll_down': {'amount': 1000}}, 20 | ] 21 | agent = Agent( 22 | task='What theories are displayed on the page?', 23 | initial_actions=initial_actions, 24 | llm=llm, 25 | ) 26 | 27 | 28 | async def main(): 29 | await agent.run(max_steps=10) 30 | 31 | 32 | if __name__ == '__main__': 33 | asyncio.run(main()) 34 | -------------------------------------------------------------------------------- /examples/features/multi-tab_handling.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple try of the agent. 3 | 4 | @dev You need to add OPENAI_API_KEY to your environment variables. 5 | """ 6 | 7 | import asyncio 8 | import os 9 | import sys 10 | 11 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 12 | 13 | from dotenv import load_dotenv 14 | 15 | load_dotenv() 16 | 17 | from browser_use import Agent 18 | from browser_use.llm import ChatOpenAI 19 | 20 | # video: https://preview.screen.studio/share/clenCmS6 21 | llm = ChatOpenAI(model='gpt-4.1') 22 | agent = Agent( 23 | task='open 3 tabs with elon musk, trump, and steve jobs, then go back to the first and stop', 24 | llm=llm, 25 | ) 26 | 27 | 28 | async def main(): 29 | await agent.run() 30 | 31 | 32 | asyncio.run(main()) 33 | -------------------------------------------------------------------------------- /examples/features/multiple_tasks.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import sys 4 | 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 6 | 7 | from dotenv import load_dotenv 8 | 9 | load_dotenv() 10 | 11 | 12 | from browser_use import Agent 13 | from browser_use.browser import BrowserSession 14 | from browser_use.browser.types import async_playwright 15 | from browser_use.llm import ChatGoogle 16 | 17 | api_key = os.getenv('GOOGLE_API_KEY') 18 | 19 | if not api_key: 20 | raise ValueError('GOOGLE_API_KEY is not set') 21 | 22 | llm = ChatGoogle(model='gemini-2.0-flash', api_key=api_key) 23 | 24 | 25 | async def main(): 26 | async with async_playwright() as p: 27 | browser = await p.chromium.launch( 28 | headless=False, 29 | ) 30 | 31 | context = await browser.new_context( 32 | viewport={'width': 1502, 'height': 853}, 33 | ignore_https_errors=True, 34 | ) 35 | 36 | agent = Agent( 37 | browser_session=BrowserSession( 38 | browser_context=context, 39 | ), 40 | task='Go to https://browser-use.com/', 41 | llm=llm, 42 | ) 43 | 44 | try: 45 | result = await agent.run() 46 | print(f'First task was {"successful" if result.is_successful else "not successful"}') 47 | 48 | if not result.is_successful: 49 | raise RuntimeError('Failed to navigate to the initial page.') 50 | 51 | agent.add_new_task('Navigate to the documentation page') 52 | 53 | result = await agent.run() 54 | print(f'Second task was {"successful" if result.is_successful else "not successful"}') 55 | 56 | if not result.is_successful: 57 | raise RuntimeError('Failed to navigate to the documentation page.') 58 | 59 | while True: 60 | next_task = input('Write your next task or leave empty to exit\n> ') 61 | 62 | if not next_task.strip(): 63 | print('Exiting...') 64 | break 65 | 66 | agent.add_new_task(next_task) 67 | result = await agent.run() 68 | 69 | print(f"Task '{next_task}' was {'successful' if result.is_successful else 'not successful'}") 70 | 71 | if not result.is_successful: 72 | print('Failed to complete the task. Please try again.') 73 | continue 74 | 75 | finally: 76 | await context.close() 77 | await browser.close() 78 | 79 | 80 | if __name__ == '__main__': 81 | asyncio.run(main()) 82 | -------------------------------------------------------------------------------- /examples/features/outsource_state.py: -------------------------------------------------------------------------------- 1 | """ 2 | Show how to use custom outputs. 3 | 4 | @dev You need to add OPENAI_API_KEY to your environment variables. 5 | """ 6 | 7 | import asyncio 8 | import os 9 | import sys 10 | 11 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 12 | 13 | from dotenv import load_dotenv 14 | 15 | load_dotenv() 16 | 17 | import anyio 18 | 19 | from browser_use import Agent 20 | from browser_use.agent.views import AgentState 21 | from browser_use.browser import BrowserProfile, BrowserSession 22 | from browser_use.llm import ChatOpenAI 23 | 24 | 25 | async def main(): 26 | task = 'Go to hackernews show hn and give me the first 5 posts' 27 | 28 | browser_profile = BrowserProfile( 29 | headless=True, 30 | ) 31 | browser_session = BrowserSession(browser_profile=browser_profile) 32 | 33 | agent_state = AgentState() 34 | 35 | for i in range(10): 36 | agent = Agent( 37 | task=task, 38 | llm=ChatOpenAI(model='gpt-4.1'), 39 | browser_session=browser_session, 40 | injected_agent_state=agent_state, 41 | page_extraction_llm=ChatOpenAI(model='gpt-4.1-mini'), 42 | ) 43 | 44 | done, valid = await agent.take_step() 45 | print(f'Step {i}: Done: {done}, Valid: {valid}') 46 | 47 | if done and valid: 48 | break 49 | 50 | # Save state to file 51 | async with await anyio.open_file('agent_state.json', 'w') as f: 52 | serialized = agent_state.model_dump_json(exclude={'history'}) 53 | await f.write(serialized) 54 | 55 | # Load state back from file 56 | async with await anyio.open_file('agent_state.json', 'r') as f: 57 | loaded_json = await f.read() 58 | agent_state = AgentState.model_validate_json(loaded_json) 59 | 60 | break 61 | 62 | 63 | if __name__ == '__main__': 64 | asyncio.run(main()) 65 | -------------------------------------------------------------------------------- /examples/features/parallel_agents.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import sys 4 | 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 6 | 7 | from dotenv import load_dotenv 8 | 9 | load_dotenv() 10 | 11 | from browser_use.agent.service import Agent 12 | from browser_use.browser import BrowserProfile, BrowserSession 13 | from browser_use.llm import ChatOpenAI 14 | 15 | browser_session = BrowserSession( 16 | browser_profile=BrowserProfile( 17 | keep_alive=True, 18 | headless=False, 19 | record_video_dir='./tmp/recordings', 20 | user_data_dir='~/.config/browseruse/profiles/default', 21 | ) 22 | ) 23 | llm = ChatOpenAI(model='gpt-4.1') 24 | 25 | 26 | async def main(): 27 | await browser_session.start() 28 | agents = [ 29 | Agent(task=task, llm=llm, browser_session=browser_session) 30 | for task in [ 31 | 'Search Google for weather in Tokyo', 32 | 'Check Reddit front page title', 33 | 'Look up Bitcoin price on Coinbase', 34 | 'Find NASA image of the day', 35 | 'Check top story on CNN', 36 | # 'Search latest SpaceX launch date', 37 | # 'Look up population of Paris', 38 | # 'Find current time in Sydney', 39 | # 'Check who won last Super Bowl', 40 | # 'Search trending topics on Twitter', 41 | ] 42 | ] 43 | 44 | print(await asyncio.gather(*[agent.run() for agent in agents])) 45 | await browser_session.kill() 46 | 47 | 48 | if __name__ == '__main__': 49 | asyncio.run(main()) 50 | -------------------------------------------------------------------------------- /examples/features/pause_agent.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import sys 4 | import threading 5 | 6 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 7 | 8 | from dotenv import load_dotenv 9 | 10 | load_dotenv() 11 | 12 | from browser_use import Agent 13 | from browser_use.llm import ChatOpenAI 14 | 15 | 16 | class AgentController: 17 | def __init__(self): 18 | llm = ChatOpenAI(model='gpt-4.1') 19 | self.agent = Agent( 20 | task='open in one action https://www.google.com, https://www.wikipedia.org, https://www.youtube.com, https://www.github.com, https://amazon.com', 21 | llm=llm, 22 | ) 23 | self.running = False 24 | 25 | async def run_agent(self): 26 | """Run the agent""" 27 | self.running = True 28 | await self.agent.run() 29 | 30 | def start(self): 31 | """Start the agent in a separate thread""" 32 | loop = asyncio.new_event_loop() 33 | asyncio.set_event_loop(loop) 34 | loop.run_until_complete(self.run_agent()) 35 | 36 | def pause(self): 37 | """Pause the agent""" 38 | self.agent.pause() 39 | 40 | def resume(self): 41 | """Resume the agent""" 42 | self.agent.resume() 43 | 44 | def stop(self): 45 | """Stop the agent""" 46 | self.agent.stop() 47 | self.running = False 48 | 49 | 50 | def print_menu(): 51 | print('\nAgent Control Menu:') 52 | print('1. Start') 53 | print('2. Pause') 54 | print('3. Resume') 55 | print('4. Stop') 56 | print('5. Exit') 57 | 58 | 59 | async def main(): 60 | controller = AgentController() 61 | agent_thread = None 62 | 63 | while True: 64 | print_menu() 65 | try: 66 | choice = input('Enter your choice (1-5): ') 67 | except KeyboardInterrupt: 68 | choice = '5' 69 | 70 | if choice == '1' and not agent_thread: 71 | print('Starting agent...') 72 | agent_thread = threading.Thread(target=controller.start) 73 | agent_thread.start() 74 | 75 | elif choice == '2': 76 | print('Pausing agent...') 77 | controller.pause() 78 | 79 | elif choice == '3': 80 | print('Resuming agent...') 81 | controller.resume() 82 | 83 | elif choice == '4': 84 | print('Stopping agent...') 85 | controller.stop() 86 | if agent_thread: 87 | agent_thread.join() 88 | agent_thread = None 89 | 90 | elif choice == '5': 91 | print('Exiting...') 92 | if controller.running: 93 | controller.stop() 94 | if agent_thread: 95 | agent_thread.join() 96 | break 97 | 98 | await asyncio.sleep(0.1) # Small delay to prevent CPU spinning 99 | 100 | 101 | if __name__ == '__main__': 102 | asyncio.run(main()) 103 | -------------------------------------------------------------------------------- /examples/features/planner.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import sys 4 | 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 6 | 7 | from dotenv import load_dotenv 8 | 9 | load_dotenv() 10 | 11 | from browser_use import Agent 12 | from browser_use.llm import ChatOpenAI 13 | 14 | llm = ChatOpenAI(model='gpt-4.1', temperature=0.0) 15 | planner_llm = ChatOpenAI( 16 | model='o3-mini', 17 | ) 18 | task = 'your task' 19 | 20 | 21 | agent = Agent(task=task, llm=llm, planner_llm=planner_llm, use_vision_for_planner=False, planner_interval=1) 22 | 23 | 24 | async def main(): 25 | await agent.run() 26 | 27 | 28 | if __name__ == '__main__': 29 | asyncio.run(main()) 30 | -------------------------------------------------------------------------------- /examples/features/restrict_urls.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import sys 4 | 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 6 | 7 | from dotenv import load_dotenv 8 | 9 | load_dotenv() 10 | 11 | from browser_use import Agent 12 | from browser_use.browser import BrowserProfile, BrowserSession 13 | from browser_use.llm import ChatOpenAI 14 | 15 | llm = ChatOpenAI(model='gpt-4.1', temperature=0.0) 16 | task = ( 17 | "go to google.com and search for openai.com and click on the first link then extract content and scroll down - what's there?" 18 | ) 19 | 20 | allowed_domains = ['google.com'] 21 | 22 | browser_session = BrowserSession( 23 | browser_profile=BrowserProfile( 24 | executable_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', 25 | allowed_domains=allowed_domains, 26 | user_data_dir='~/.config/browseruse/profiles/default', 27 | ), 28 | ) 29 | 30 | agent = Agent( 31 | task=task, 32 | llm=llm, 33 | browser_session=browser_session, 34 | ) 35 | 36 | 37 | async def main(): 38 | await agent.run(max_steps=25) 39 | 40 | input('Press Enter to close the browser...') 41 | await browser_session.close() 42 | 43 | 44 | asyncio.run(main()) 45 | -------------------------------------------------------------------------------- /examples/features/result_processing.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import sys 4 | from pprint import pprint 5 | 6 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 7 | 8 | from dotenv import load_dotenv 9 | 10 | load_dotenv() 11 | 12 | from browser_use import Agent 13 | from browser_use.agent.views import AgentHistoryList 14 | from browser_use.browser import BrowserProfile, BrowserSession 15 | from browser_use.llm import ChatOpenAI 16 | 17 | llm = ChatOpenAI(model='gpt-4.1') 18 | 19 | 20 | async def main(): 21 | async with BrowserSession( 22 | browser_profile=BrowserProfile( 23 | headless=False, 24 | traces_dir='./tmp/result_processing', 25 | window_size={'width': 1280, 'height': 1000}, 26 | user_data_dir='~/.config/browseruse/profiles/default', 27 | ) 28 | ) as browser_session: 29 | agent = Agent( 30 | task="go to google.com and type 'OpenAI' click search and give me the first url", 31 | llm=llm, 32 | browser_session=browser_session, 33 | ) 34 | history: AgentHistoryList = await agent.run(max_steps=3) 35 | 36 | print('Final Result:') 37 | pprint(history.final_result(), indent=4) 38 | 39 | print('\nErrors:') 40 | pprint(history.errors(), indent=4) 41 | 42 | # e.g. xPaths the model clicked on 43 | print('\nModel Outputs:') 44 | pprint(history.model_actions(), indent=4) 45 | 46 | print('\nThoughts:') 47 | pprint(history.model_thoughts(), indent=4) 48 | 49 | 50 | if __name__ == '__main__': 51 | asyncio.run(main()) 52 | -------------------------------------------------------------------------------- /examples/features/save_trace.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import sys 4 | 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 6 | 7 | from dotenv import load_dotenv 8 | 9 | load_dotenv() 10 | 11 | from browser_use.agent.service import Agent 12 | from browser_use.browser import BrowserProfile, BrowserSession 13 | from browser_use.llm import ChatOpenAI 14 | 15 | llm = ChatOpenAI(model='gpt-4.1', temperature=0.0) 16 | 17 | 18 | async def main(): 19 | browser_session = BrowserSession( 20 | browser_profile=BrowserProfile( 21 | traces_dir='./tmp/traces/', 22 | user_data_dir='~/.config/browseruse/profiles/default', 23 | ) 24 | ) 25 | 26 | async with browser_session: 27 | agent = Agent( 28 | task='Go to hackernews, then go to apple.com and return all titles of open tabs', 29 | llm=llm, 30 | browser_session=browser_session, 31 | ) 32 | await agent.run() 33 | 34 | 35 | asyncio.run(main()) 36 | -------------------------------------------------------------------------------- /examples/features/sensitive_data.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import sys 4 | 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 6 | 7 | from dotenv import load_dotenv 8 | 9 | load_dotenv() 10 | 11 | from browser_use import Agent 12 | from browser_use.browser import BrowserProfile 13 | from browser_use.llm import ChatOpenAI 14 | 15 | try: 16 | from lmnr import Laminar 17 | 18 | Laminar.initialize(project_api_key=os.getenv('LMNR_PROJECT_API_KEY')) 19 | except Exception as e: 20 | print(f'Error initializing Laminar: {e}') 21 | 22 | # Initialize the model 23 | llm = ChatOpenAI( 24 | model='gpt-4.1', 25 | temperature=0.0, 26 | ) 27 | # Simple case: the model will see x_name and x_password, but never the actual values. 28 | # sensitive_data = {'x_name': 'my_x_name', 'x_password': 'my_x_password'} 29 | 30 | # Advanced case: domain-specific credentials with reusable data 31 | # Define a single credential set that can be reused 32 | company_credentials = {'company_username': 'user@example.com', 'company_password': 'securePassword123'} 33 | 34 | # Map the same credentials to multiple domains for secure access control 35 | # Type annotation to satisfy pyright 36 | sensitive_data: dict[str, str | dict[str, str]] = { 37 | 'https://example.com': company_credentials, 38 | 'https://admin.example.com': company_credentials, 39 | 'https://*.example-staging.com': company_credentials, 40 | 'http*://test.example.com': company_credentials, 41 | # You can also add domain-specific credentials 42 | 'https://*.google.com': {'g_email': 'user@gmail.com', 'g_pass': 'google_password'}, 43 | } 44 | # Update task to use one of the credentials above 45 | task = 'Go to google.com and put the login information in the search bar.' 46 | 47 | # Always set allowed_domains when using sensitive_data for security 48 | from browser_use.browser.session import BrowserSession 49 | 50 | browser_session = BrowserSession( 51 | browser_profile=BrowserProfile( 52 | allowed_domains=list(sensitive_data.keys()) 53 | + ['https://*.trusted-partner.com'] # Domain patterns from sensitive_data + additional allowed domains 54 | ) 55 | ) 56 | 57 | agent = Agent(task=task, llm=llm, sensitive_data=sensitive_data, browser_session=browser_session) 58 | 59 | 60 | async def main(): 61 | await agent.run() 62 | 63 | 64 | if __name__ == '__main__': 65 | asyncio.run(main()) 66 | -------------------------------------------------------------------------------- /examples/features/small_model_for_extraction.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import sys 4 | 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 6 | 7 | from dotenv import load_dotenv 8 | 9 | load_dotenv() 10 | 11 | from browser_use import Agent 12 | from browser_use.llm import ChatOpenAI 13 | 14 | llm = ChatOpenAI(model='gpt-4.1', temperature=0.0) 15 | small_llm = ChatOpenAI(model='gpt-4.1-mini', temperature=0.0) 16 | task = 'Find the founders of browser-use in ycombinator, extract all links and open the links one by one' 17 | agent = Agent(task=task, llm=llm, page_extraction_llm=small_llm) 18 | 19 | 20 | async def main(): 21 | await agent.run() 22 | 23 | 24 | if __name__ == '__main__': 25 | asyncio.run(main()) 26 | -------------------------------------------------------------------------------- /examples/features/validate_output.py: -------------------------------------------------------------------------------- 1 | """ 2 | Demonstrate output validator. 3 | 4 | @dev You need to add OPENAI_API_KEY to your environment variables. 5 | """ 6 | 7 | import asyncio 8 | import os 9 | import sys 10 | 11 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 12 | 13 | from dotenv import load_dotenv 14 | 15 | load_dotenv() 16 | 17 | from pydantic import BaseModel 18 | 19 | from browser_use import ActionResult, Agent, Controller 20 | from browser_use.llm import ChatOpenAI 21 | 22 | controller = Controller() 23 | 24 | 25 | class DoneResult(BaseModel): 26 | title: str 27 | comments: str 28 | hours_since_start: int 29 | 30 | 31 | # we overwrite done() in this example to demonstrate the validator 32 | @controller.registry.action('Done with task', param_model=DoneResult) 33 | async def done(params: DoneResult): 34 | result = ActionResult(is_done=True, extracted_content=params.model_dump_json()) 35 | print(result) 36 | # NOTE: this is clearly wrong - to demonstrate the validator 37 | return 'blablabla' 38 | 39 | 40 | async def main(): 41 | task = 'Go to hackernews hn and give me the top 1 post' 42 | model = ChatOpenAI(model='gpt-4.1') 43 | agent = Agent(task=task, llm=model, controller=controller, validate_output=True) 44 | # NOTE: this should fail to demonstrate the validator 45 | await agent.run(max_steps=5) 46 | 47 | 48 | if __name__ == '__main__': 49 | asyncio.run(main()) 50 | -------------------------------------------------------------------------------- /examples/file_system/alphabet_earnings.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import pathlib 4 | import shutil 5 | 6 | from dotenv import load_dotenv 7 | 8 | from browser_use import Agent 9 | from browser_use.browser import BrowserProfile, BrowserSession 10 | from browser_use.llm import ChatOpenAI 11 | 12 | load_dotenv() 13 | 14 | '' 15 | SCRIPT_DIR = pathlib.Path(os.path.dirname(os.path.abspath(__file__))) 16 | agent_dir = SCRIPT_DIR / 'alphabet_earnings' 17 | agent_dir.mkdir(exist_ok=True) 18 | 19 | try: 20 | from lmnr import Laminar 21 | 22 | Laminar.initialize(project_api_key=os.getenv('LMNR_PROJECT_API_KEY')) 23 | except Exception as e: 24 | print(f'Error initializing Laminar: {e}') 25 | 26 | llm = ChatOpenAI( 27 | model='o4-mini', 28 | ) 29 | 30 | browser_session = BrowserSession( 31 | browser_profile=BrowserProfile(downloads_path=str(agent_dir / 'downloads')), 32 | ) 33 | 34 | task = """ 35 | Go to https://abc.xyz/assets/cc/27/3ada14014efbadd7a58472f1f3f4/2025q2-alphabet-earnings-release.pdf. 36 | Read the PDF and save 3 interesting data points in "alphabet_earnings.pdf" and share it with me! 37 | """.strip('\n') 38 | 39 | agent = Agent( 40 | task=task, 41 | llm=llm, 42 | browser_session=browser_session, 43 | file_system_path=str(agent_dir / 'fs'), 44 | flash_mode=True, 45 | ) 46 | 47 | 48 | async def main(): 49 | agent_history = await agent.run() 50 | input('Press Enter to clean the file system...') 51 | # clean the file system 52 | shutil.rmtree(str(agent_dir / 'fs')) 53 | 54 | 55 | if __name__ == '__main__': 56 | asyncio.run(main()) 57 | -------------------------------------------------------------------------------- /examples/file_system/excel_sheet.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import sys 4 | 5 | from browser_use.llm.openai.chat import ChatOpenAI 6 | 7 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 8 | 9 | from dotenv import load_dotenv 10 | 11 | load_dotenv() 12 | from lmnr import Laminar 13 | 14 | try: 15 | Laminar.initialize(project_api_key=os.getenv('LMNR_PROJECT_API_KEY')) 16 | except Exception: 17 | pass 18 | 19 | from browser_use import Agent 20 | 21 | # Initialize the model 22 | llm = ChatOpenAI( 23 | model='o4-mini', 24 | temperature=1.0, 25 | ) 26 | 27 | 28 | task = ( 29 | 'Find current stock price of companies Meta and Amazon. Then, make me a CSV file with 2 columns: company name, stock price.' 30 | ) 31 | 32 | agent = Agent(task=task, llm=llm) 33 | 34 | 35 | async def main(): 36 | import time 37 | 38 | start_time = time.time() 39 | history = await agent.run() 40 | # token usage 41 | print(history.usage) 42 | end_time = time.time() 43 | print(f'Time taken: {end_time - start_time} seconds') 44 | 45 | 46 | if __name__ == '__main__': 47 | asyncio.run(main()) 48 | -------------------------------------------------------------------------------- /examples/file_system/file_system.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import pathlib 4 | import shutil 5 | 6 | from dotenv import load_dotenv 7 | 8 | from browser_use import Agent 9 | from browser_use.llm import ChatOpenAI 10 | 11 | load_dotenv() 12 | 13 | 14 | SCRIPT_DIR = pathlib.Path(os.path.dirname(os.path.abspath(__file__))) 15 | agent_dir = SCRIPT_DIR / 'file_system' 16 | agent_dir.mkdir(exist_ok=True) 17 | conversation_dir = agent_dir / 'conversations' / 'conversation' 18 | print(f'Agent logs directory: {agent_dir}') 19 | 20 | try: 21 | from lmnr import Laminar 22 | 23 | Laminar.initialize(project_api_key=os.getenv('LMNR_PROJECT_API_KEY')) 24 | except Exception as e: 25 | print(f'Error initializing Laminar: {e}') 26 | 27 | task = """ 28 | Go to https://mertunsall.github.io/posts/post1.html 29 | Save the title of the article in "data.md" 30 | Then, use append_file to add the first sentence of the article to "data.md" 31 | Then, read the file to see its content and make sure it's correct. 32 | Finally, share the file with me. 33 | 34 | NOTE: DO NOT USE extract_structured_data action - everything is visible in browser state. 35 | """.strip('\n') 36 | 37 | llm = ChatOpenAI(model='gpt-4.1-mini') 38 | 39 | agent = Agent( 40 | task=task, 41 | llm=llm, 42 | save_conversation_path=str(conversation_dir), 43 | file_system_path=str(agent_dir / 'fs'), 44 | ) 45 | 46 | 47 | async def main(): 48 | agent_history = await agent.run() 49 | print(f'Final result: {agent_history.final_result()}', flush=True) 50 | 51 | input('Press Enter to clean the file system...') 52 | # clean the file system 53 | shutil.rmtree(str(agent_dir / 'fs')) 54 | 55 | 56 | if __name__ == '__main__': 57 | asyncio.run(main()) 58 | -------------------------------------------------------------------------------- /examples/getting_started/01_basic_search.py: -------------------------------------------------------------------------------- 1 | """ 2 | Getting Started Example 1: Basic Search 3 | 4 | This example demonstrates the most basic browser-use functionality: 5 | - Navigate to a website 6 | - Perform a search 7 | - Get results 8 | 9 | Perfect for first-time users to understand how browser-use works. 10 | """ 11 | 12 | import asyncio 13 | import os 14 | import sys 15 | 16 | # Add the parent directory to the path so we can import browser_use 17 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 18 | 19 | from dotenv import load_dotenv 20 | 21 | load_dotenv() 22 | 23 | from browser_use import Agent 24 | from browser_use.llm.openai.chat import ChatOpenAI 25 | 26 | 27 | async def main(): 28 | # Initialize the model 29 | llm = ChatOpenAI(model='gpt-4.1-mini') 30 | 31 | # Define a simple search task 32 | task = "Search Google for 'what is browser automation' and tell me the top 3 results" 33 | 34 | # Create and run the agent 35 | agent = Agent(task=task, llm=llm) 36 | await agent.run() 37 | 38 | 39 | if __name__ == '__main__': 40 | asyncio.run(main()) 41 | -------------------------------------------------------------------------------- /examples/getting_started/02_form_filling.py: -------------------------------------------------------------------------------- 1 | """ 2 | Getting Started Example 2: Form Filling 3 | 4 | This example demonstrates how to: 5 | - Navigate to a website with forms 6 | - Fill out input fields 7 | - Submit forms 8 | - Handle basic form interactions 9 | 10 | This builds on the basic search example by showing more complex interactions. 11 | """ 12 | 13 | import asyncio 14 | import os 15 | import sys 16 | 17 | # Add the parent directory to the path so we can import browser_use 18 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 19 | 20 | from dotenv import load_dotenv 21 | 22 | load_dotenv() 23 | 24 | from browser_use import Agent 25 | from browser_use.llm.openai.chat import ChatOpenAI 26 | 27 | 28 | async def main(): 29 | # Initialize the model 30 | llm = ChatOpenAI(model='gpt-4.1-mini') 31 | 32 | # Define a form filling task 33 | task = """ 34 | Go to https://httpbin.org/forms/post and fill out the contact form with: 35 | - Customer name: John Doe 36 | - Telephone: 555-123-4567 37 | - Email: john.doe@example.com 38 | - Size: Medium 39 | - Topping: cheese 40 | - Delivery time: now 41 | - Comments: This is a test form submission 42 | 43 | Then submit the form and tell me what response you get. 44 | """ 45 | 46 | # Create and run the agent 47 | agent = Agent(task=task, llm=llm) 48 | await agent.run() 49 | 50 | 51 | if __name__ == '__main__': 52 | asyncio.run(main()) 53 | -------------------------------------------------------------------------------- /examples/getting_started/03_data_extraction.py: -------------------------------------------------------------------------------- 1 | """ 2 | Getting Started Example 3: Data Extraction 3 | 4 | This example demonstrates how to: 5 | - Navigate to a website with structured data 6 | - Extract specific information from the page 7 | - Process and organize the extracted data 8 | - Return structured results 9 | 10 | This builds on previous examples by showing how to get valuable data from websites. 11 | """ 12 | 13 | import asyncio 14 | import os 15 | import sys 16 | 17 | # Add the parent directory to the path so we can import browser_use 18 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 19 | 20 | from dotenv import load_dotenv 21 | 22 | load_dotenv() 23 | 24 | from browser_use import Agent 25 | from browser_use.llm.openai.chat import ChatOpenAI 26 | 27 | 28 | async def main(): 29 | # Initialize the model 30 | llm = ChatOpenAI(model='gpt-4.1-mini') 31 | 32 | # Define a data extraction task 33 | task = """ 34 | Go to https://quotes.toscrape.com/ and extract the following information: 35 | - The first 5 quotes on the page 36 | - The author of each quote 37 | - The tags associated with each quote 38 | 39 | Present the information in a clear, structured format like: 40 | Quote 1: "[quote text]" - Author: [author name] - Tags: [tag1, tag2, ...] 41 | Quote 2: "[quote text]" - Author: [author name] - Tags: [tag1, tag2, ...] 42 | etc. 43 | """ 44 | 45 | # Create and run the agent 46 | agent = Agent(task=task, llm=llm) 47 | await agent.run() 48 | 49 | 50 | if __name__ == '__main__': 51 | asyncio.run(main()) 52 | -------------------------------------------------------------------------------- /examples/getting_started/04_multi_step_task.py: -------------------------------------------------------------------------------- 1 | """ 2 | Getting Started Example 4: Multi-Step Task 3 | 4 | This example demonstrates how to: 5 | - Perform a complex workflow with multiple steps 6 | - Navigate between different pages 7 | - Combine search, form filling, and data extraction 8 | - Handle a realistic end-to-end scenario 9 | 10 | This is the most advanced getting started example, combining all previous concepts. 11 | """ 12 | 13 | import asyncio 14 | import os 15 | import sys 16 | 17 | # Add the parent directory to the path so we can import browser_use 18 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 19 | 20 | from dotenv import load_dotenv 21 | 22 | load_dotenv() 23 | 24 | from browser_use import Agent 25 | from browser_use.llm.openai.chat import ChatOpenAI 26 | 27 | 28 | async def main(): 29 | # Initialize the model 30 | llm = ChatOpenAI(model='gpt-4.1-mini') 31 | 32 | # Define a multi-step task 33 | task = """ 34 | I want you to research Python web scraping libraries. Here's what I need: 35 | 36 | 1. First, search Google for "best Python web scraping libraries 2024" 37 | 2. Find a reputable article or blog post about this topic 38 | 3. From that article, extract the top 3 recommended libraries 39 | 4. For each library, visit its official website or GitHub page 40 | 5. Extract key information about each library: 41 | - Name 42 | - Brief description 43 | - Main features or advantages 44 | - GitHub stars (if available) 45 | 46 | Present your findings in a summary format comparing the three libraries. 47 | """ 48 | 49 | # Create and run the agent 50 | agent = Agent(task=task, llm=llm) 51 | await agent.run() 52 | 53 | 54 | if __name__ == '__main__': 55 | asyncio.run(main()) 56 | -------------------------------------------------------------------------------- /examples/integrations/browserbase_stagehand.py: -------------------------------------------------------------------------------- 1 | """ 2 | EXPERIMENTAL: Integration example with Stagehand (browserbase) 3 | 4 | This example shows how to combine browser-use with Stagehand for advanced browser automation. 5 | Note: This requires the stagehand-py library to be installed separately: 6 | pip install stagehand-py 7 | 8 | The exact API may vary depending on the stagehand-py version. 9 | Please refer to the official Stagehand documentation for the latest usage: 10 | https://pypi.org/project/stagehand-py/ 11 | https://github.com/browserbase/stagehand-python-examples/ 12 | """ 13 | 14 | import asyncio 15 | import os 16 | 17 | from dotenv import load_dotenv 18 | 19 | load_dotenv() 20 | 21 | from stagehand import Stagehand, StagehandConfig # type: ignore 22 | 23 | from browser_use.agent.service import Agent 24 | 25 | 26 | async def main(): 27 | # Configure Stagehand 28 | # https://pypi.org/project/stagehand-py/ 29 | # https://github.com/browserbase/stagehand-python-examples/blob/main/agent_example.py 30 | # Note: This example requires the stagehand-py library to be installed 31 | # pip install stagehand-py 32 | 33 | # Create StagehandConfig with correct parameters 34 | # The exact parameters depend on the stagehand-py version 35 | config = StagehandConfig( # type: ignore 36 | apiKey=os.getenv('BROWSERBASE_API_KEY'), 37 | projectId=os.getenv('BROWSERBASE_PROJECT_ID'), 38 | ) 39 | 40 | # Create a Stagehand client using the configuration object. 41 | stagehand = Stagehand( 42 | config=config, 43 | model_api_key=os.getenv('OPENAI_API_KEY'), 44 | # server_url=os.getenv('STAGEHAND_SERVER_URL'), 45 | ) 46 | 47 | # Initialize - this creates a new session automatically. 48 | await stagehand.init() 49 | print(f'\nCreated new session: {stagehand.session_id}') 50 | print(f'🌐 View your live browser: https://www.browserbase.com/sessions/{stagehand.session_id}') 51 | 52 | # Check if stagehand has a page attribute 53 | if hasattr(stagehand, 'page') and stagehand.page: 54 | await stagehand.page.goto('https://google.com/') 55 | await stagehand.page.act('search for openai') 56 | else: 57 | print('Warning: Stagehand page not available') 58 | 59 | # Combine with Browser Use 60 | agent = Agent(task='click the first result', page=stagehand.page) # type: ignore 61 | await agent.run() 62 | 63 | # go back and forth 64 | await stagehand.page.act('open the 3 first links on the page in new tabs') # type: ignore 65 | 66 | await Agent(task='click the first result', page=stagehand.page).run() # type: ignore 67 | 68 | 69 | if __name__ == '__main__': 70 | asyncio.run(main()) 71 | -------------------------------------------------------------------------------- /examples/integrations/slack/slack_example.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 5 | 6 | from dotenv import load_dotenv 7 | 8 | load_dotenv() 9 | 10 | 11 | from browser_use.browser import BrowserProfile 12 | from browser_use.llm import ChatGoogle 13 | from examples.integrations.slack.slack_api import SlackBot, app 14 | 15 | # load credentials from environment variables 16 | bot_token = os.getenv('SLACK_BOT_TOKEN') 17 | if not bot_token: 18 | raise ValueError('Slack bot token not found in .env file.') 19 | 20 | signing_secret = os.getenv('SLACK_SIGNING_SECRET') 21 | if not signing_secret: 22 | raise ValueError('Slack signing secret not found in .env file.') 23 | 24 | api_key = os.getenv('GOOGLE_API_KEY') 25 | if not api_key: 26 | raise ValueError('GOOGLE_API_KEY is not set') 27 | 28 | llm = ChatGoogle(model='gemini-2.0-flash-exp', api_key=api_key) 29 | 30 | slack_bot = SlackBot( 31 | llm=llm, # required; instance of BaseChatModel 32 | bot_token=bot_token, # required; Slack bot token 33 | signing_secret=signing_secret, # required; Slack signing secret 34 | ack=True, # optional; whether to acknowledge task receipt with a message, defaults to False 35 | browser_profile=BrowserProfile( 36 | headless=True 37 | ), # optional; useful for changing headless mode or other browser configs, defaults to headless mode 38 | ) 39 | 40 | app.dependency_overrides[SlackBot] = lambda: slack_bot 41 | 42 | if __name__ == '__main__': 43 | import uvicorn 44 | 45 | uvicorn.run('integrations.slack.slack_api:app', host='0.0.0.0', port=3000) 46 | -------------------------------------------------------------------------------- /examples/mcp/simple_client.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple example of using MCP client with browser-use. 3 | 4 | This example shows how to connect to an MCP server and use its tools with an agent. 5 | """ 6 | 7 | import asyncio 8 | import os 9 | 10 | from browser_use import Agent, Controller 11 | from browser_use.llm.openai.chat import ChatOpenAI 12 | from browser_use.mcp.client import MCPClient 13 | 14 | 15 | async def main(): 16 | # Initialize controller 17 | controller = Controller() 18 | 19 | # Connect to a filesystem MCP server 20 | # This server provides tools to read/write files in a directory 21 | mcp_client = MCPClient( 22 | server_name='filesystem', command='npx', args=['@modelcontextprotocol/server-filesystem', os.path.expanduser('~/Desktop')] 23 | ) 24 | 25 | # Connect and register MCP tools 26 | await mcp_client.connect() 27 | await mcp_client.register_to_controller(controller) 28 | 29 | # Create agent with MCP-enabled controller 30 | agent = Agent( 31 | task='List all files on the Desktop and read the content of any .txt files you find', 32 | llm=ChatOpenAI(model='gpt-4.1-mini'), 33 | controller=controller, 34 | ) 35 | 36 | # Run the agent - it now has access to filesystem tools 37 | await agent.run() 38 | 39 | # Disconnect when done 40 | await mcp_client.disconnect() 41 | 42 | 43 | if __name__ == '__main__': 44 | asyncio.run(main()) 45 | -------------------------------------------------------------------------------- /examples/models/README.md: -------------------------------------------------------------------------------- 1 | # Gemini 2 | Detailed video on how to integrate browser-use with Gemini: https://www.youtube.com/watch?v=JluZiWBV_Tc 3 | -------------------------------------------------------------------------------- /examples/models/azure_openai.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple try of the agent. 3 | 4 | @dev You need to add AZURE_OPENAI_KEY and AZURE_OPENAI_ENDPOINT to your environment variables. 5 | """ 6 | 7 | import asyncio 8 | import os 9 | import sys 10 | 11 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 12 | 13 | from dotenv import load_dotenv 14 | 15 | load_dotenv() 16 | 17 | 18 | from browser_use import Agent 19 | from browser_use.llm import ChatAzureOpenAI 20 | 21 | # Retrieve Azure-specific environment variables 22 | azure_openai_api_key = os.getenv('AZURE_OPENAI_KEY') 23 | azure_openai_endpoint = os.getenv('AZURE_OPENAI_ENDPOINT') 24 | 25 | if not azure_openai_api_key or not azure_openai_endpoint: 26 | raise ValueError('AZURE_OPENAI_KEY or AZURE_OPENAI_ENDPOINT is not set') 27 | 28 | # Initialize the Azure OpenAI client 29 | llm = ChatAzureOpenAI( 30 | model='gpt-4.1', 31 | api_key=azure_openai_api_key, 32 | azure_endpoint=azure_openai_endpoint, # Corrected to use azure_endpoint instead of openai_api_base 33 | ) 34 | 35 | agent = Agent( 36 | task='Go to amazon.com, search for laptop, sort by best rating, and give me the price of the first result', 37 | llm=llm, 38 | ) 39 | 40 | 41 | async def main(): 42 | await agent.run(max_steps=10) 43 | input('Press Enter to continue...') 44 | 45 | 46 | asyncio.run(main()) 47 | -------------------------------------------------------------------------------- /examples/models/claude-4-sonnet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple script that runs the task of opening amazon and searching. 3 | @dev Ensure we have a `ANTHROPIC_API_KEY` variable in our `.env` file. 4 | """ 5 | 6 | import asyncio 7 | import os 8 | import sys 9 | 10 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 11 | 12 | from dotenv import load_dotenv 13 | from lmnr import Laminar 14 | 15 | load_dotenv() 16 | Laminar.initialize() 17 | 18 | from browser_use import Agent 19 | from browser_use.llm import ChatAnthropic 20 | 21 | llm = ChatAnthropic(model='claude-4-sonnet-20250514', temperature=0.0) 22 | 23 | agent = Agent( 24 | task='Go to amazon.com, search for laptop, sort by best rating, and give me the price of the first result', 25 | llm=llm, 26 | ) 27 | 28 | 29 | async def main(): 30 | await agent.run(max_steps=10) 31 | 32 | 33 | asyncio.run(main()) 34 | -------------------------------------------------------------------------------- /examples/models/deepseek-chat.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | 4 | from browser_use import Agent 5 | from browser_use.llm import ChatDeepSeek 6 | 7 | # Add your custom instructions 8 | extend_system_message = """ 9 | Remember the most important rules: 10 | 1. When performing a search task, open https://www.google.com/ first for search. 11 | 2. Final output. 12 | """ 13 | deepseek_api_key = os.getenv('DEEPSEEK_API_KEY') 14 | if deepseek_api_key is None: 15 | print('Make sure you have DEEPSEEK_API_KEY:') 16 | print('export DEEPSEEK_API_KEY=your_key') 17 | exit(0) 18 | 19 | 20 | async def main(): 21 | llm = ChatDeepSeek( 22 | base_url='https://api.deepseek.com/v1', 23 | model='deepseek-chat', 24 | api_key=deepseek_api_key, 25 | ) 26 | 27 | agent = Agent( 28 | task='What should we pay attention to in the recent new rules on tariffs in China-US trade?', 29 | llm=llm, 30 | use_vision=False, 31 | extend_system_message=extend_system_message, 32 | ) 33 | await agent.run() 34 | 35 | 36 | asyncio.run(main()) 37 | -------------------------------------------------------------------------------- /examples/models/gemini.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import sys 4 | 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 6 | 7 | from dotenv import load_dotenv 8 | from lmnr import Laminar 9 | 10 | load_dotenv() 11 | 12 | Laminar.initialize() 13 | 14 | 15 | from browser_use import Agent 16 | from browser_use.browser import BrowserProfile, BrowserSession 17 | from browser_use.llm import ChatGoogle 18 | 19 | api_key = os.getenv('GOOGLE_API_KEY') 20 | if not api_key: 21 | raise ValueError('GOOGLE_API_KEY is not set') 22 | 23 | llm = ChatGoogle(model='gemini-2.0-flash-exp', api_key=api_key) 24 | 25 | browser_session = BrowserSession( 26 | browser_profile=BrowserProfile( 27 | viewport_expansion=0, 28 | user_data_dir='~/.config/browseruse/profiles/default', 29 | ) 30 | ) 31 | 32 | 33 | async def run_search(): 34 | agent = Agent( 35 | task='Go to amazon.com, search for laptop, sort by best rating, and give me the price of the first result', 36 | llm=llm, 37 | max_actions_per_step=4, 38 | browser_session=browser_session, 39 | ) 40 | 41 | await agent.run(max_steps=25) 42 | 43 | 44 | if __name__ == '__main__': 45 | asyncio.run(run_search()) 46 | -------------------------------------------------------------------------------- /examples/models/gpt-4.1.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple try of the agent. 3 | 4 | @dev You need to add OPENAI_API_KEY to your environment variables. 5 | """ 6 | 7 | import asyncio 8 | 9 | from dotenv import load_dotenv 10 | from lmnr import Laminar 11 | 12 | from browser_use import Agent 13 | from browser_use.llm import ChatOpenAI 14 | 15 | load_dotenv() 16 | 17 | 18 | Laminar.initialize() 19 | 20 | # All the models are type safe from OpenAI in case you need a list of supported models 21 | llm = ChatOpenAI(model='gpt-4.1-mini') 22 | agent = Agent( 23 | task='Go to example.com, click on the first link, and give me the title of the page', 24 | llm=llm, 25 | ) 26 | 27 | 28 | async def main(): 29 | await agent.run(max_steps=10) 30 | input('Press Enter to continue...') 31 | 32 | 33 | asyncio.run(main()) 34 | -------------------------------------------------------------------------------- /examples/models/langchain/README.md: -------------------------------------------------------------------------------- 1 | # Langchain Models (legacy) 2 | 3 | This directory contains example of how to still use Langchain models with the new Browser Use chat models. 4 | 5 | ## How to use 6 | 7 | ```python 8 | from langchain_openai import ChatOpenAI 9 | 10 | from browser_use import Agent 11 | from .chat import ChatLangchain 12 | 13 | async def main(): 14 | """Basic example using ChatLangchain with OpenAI through LangChain.""" 15 | 16 | # Create a LangChain model (OpenAI) 17 | langchain_model = ChatOpenAI( 18 | model='gpt-4.1-mini', 19 | temperature=0.1, 20 | ) 21 | 22 | # Wrap it with ChatLangchain to make it compatible with browser-use 23 | llm = ChatLangchain(chat=langchain_model) 24 | 25 | agent = Agent( 26 | task="Go to google.com and search for 'browser automation with Python'", 27 | llm=llm, 28 | ) 29 | 30 | history = await agent.run() 31 | 32 | print(history.history) 33 | ``` 34 | -------------------------------------------------------------------------------- /examples/models/langchain/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/browser-use/browser-use/9871be3363081dd88402dd7244f1997132929c4f/examples/models/langchain/__init__.py -------------------------------------------------------------------------------- /examples/models/langchain/example.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example of using LangChain models with browser-use. 3 | 4 | This example demonstrates how to: 5 | 1. Wrap a LangChain model with ChatLangchain 6 | 2. Use it with a browser-use Agent 7 | 3. Run a simple web automation task 8 | 9 | @file purpose: Example usage of LangChain integration with browser-use 10 | """ 11 | 12 | import asyncio 13 | 14 | from langchain_openai import ChatOpenAI # pyright: ignore 15 | from lmnr import Laminar 16 | 17 | from browser_use import Agent 18 | from examples.models.langchain.chat import ChatLangchain 19 | 20 | Laminar.initialize() 21 | 22 | 23 | async def main(): 24 | """Basic example using ChatLangchain with OpenAI through LangChain.""" 25 | 26 | # Create a LangChain model (OpenAI) 27 | langchain_model = ChatOpenAI( 28 | model='gpt-4.1-mini', 29 | temperature=0.1, 30 | ) 31 | 32 | # Wrap it with ChatLangchain to make it compatible with browser-use 33 | llm = ChatLangchain(chat=langchain_model) 34 | 35 | # Create a simple task 36 | task = "Go to google.com and search for 'browser automation with Python'" 37 | 38 | # Create and run the agent 39 | agent = Agent( 40 | task=task, 41 | llm=llm, 42 | ) 43 | 44 | print(f'🚀 Starting task: {task}') 45 | print(f'🤖 Using model: {llm.name} (provider: {llm.provider})') 46 | 47 | # Run the agent 48 | history = await agent.run() 49 | 50 | print(f'✅ Task completed! Steps taken: {len(history.history)}') 51 | 52 | # Print the final result if available 53 | if history.final_result(): 54 | print(f'📋 Final result: {history.final_result()}') 55 | 56 | return history 57 | 58 | 59 | if __name__ == '__main__': 60 | print('🌐 Browser-use LangChain Integration Example') 61 | print('=' * 45) 62 | 63 | asyncio.run(main()) 64 | -------------------------------------------------------------------------------- /examples/models/llama4-groq.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import sys 4 | 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 6 | 7 | from dotenv import load_dotenv 8 | from lmnr import Laminar 9 | 10 | load_dotenv() 11 | 12 | 13 | Laminar.initialize() 14 | 15 | 16 | from browser_use import Agent 17 | from browser_use.llm import ChatGroq 18 | 19 | groq_api_key = os.environ.get('GROQ_API_KEY') 20 | llm = ChatGroq( 21 | model='meta-llama/llama-4-maverick-17b-128e-instruct', 22 | # temperature=0.1, 23 | ) 24 | 25 | # llm = ChatGroq( 26 | # model='meta-llama/llama-4-maverick-17b-128e-instruct', 27 | # api_key=os.environ.get('GROQ_API_KEY'), 28 | # temperature=0.0, 29 | # ) 30 | 31 | task = 'Go to amazon.com, search for laptop, sort by best rating, and give me the price of the first result' 32 | 33 | 34 | async def main(): 35 | agent = Agent( 36 | task=task, 37 | llm=llm, 38 | ) 39 | await agent.run() 40 | 41 | 42 | if __name__ == '__main__': 43 | asyncio.run(main()) 44 | -------------------------------------------------------------------------------- /examples/models/novita.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple try of the agent. 3 | 4 | @dev You need to add NOVITA_API_KEY to your environment variables. 5 | """ 6 | 7 | import asyncio 8 | import os 9 | import sys 10 | 11 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 12 | 13 | from dotenv import load_dotenv 14 | 15 | load_dotenv() 16 | 17 | 18 | from browser_use import Agent 19 | from browser_use.llm import ChatOpenAI 20 | 21 | api_key = os.getenv('NOVITA_API_KEY', '') 22 | if not api_key: 23 | raise ValueError('NOVITA_API_KEY is not set') 24 | 25 | 26 | async def run_search(): 27 | agent = Agent( 28 | task=( 29 | '1. Go to https://www.reddit.com/r/LocalLLaMA ' 30 | "2. Search for 'browser use' in the search bar" 31 | '3. Click on first result' 32 | '4. Return the first comment' 33 | ), 34 | llm=ChatOpenAI( 35 | base_url='https://api.novita.ai/v3/openai', 36 | model='deepseek/deepseek-v3-0324', 37 | api_key=api_key, 38 | ), 39 | use_vision=False, 40 | ) 41 | 42 | await agent.run() 43 | 44 | 45 | if __name__ == '__main__': 46 | asyncio.run(run_search()) 47 | -------------------------------------------------------------------------------- /examples/models/openrouter.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple try of the agent. 3 | 4 | @dev You need to add OPENAI_API_KEY to your environment variables. 5 | """ 6 | 7 | import asyncio 8 | import os 9 | 10 | from dotenv import load_dotenv 11 | from lmnr import Laminar 12 | 13 | from browser_use import Agent 14 | from browser_use.llm import ChatOpenAI 15 | 16 | load_dotenv() 17 | 18 | 19 | Laminar.initialize() 20 | 21 | # All the models are type safe from OpenAI in case you need a list of supported models 22 | llm = ChatOpenAI( 23 | model='x-ai/grok-4', 24 | base_url='https://openrouter.ai/api/v1', 25 | api_key=os.getenv('OPENROUTER_API_KEY'), 26 | ) 27 | agent = Agent( 28 | task='Go to example.com, click on the first link, and give me the title of the page', 29 | llm=llm, 30 | ) 31 | 32 | 33 | async def main(): 34 | await agent.run(max_steps=10) 35 | input('Press Enter to continue...') 36 | 37 | 38 | asyncio.run(main()) 39 | -------------------------------------------------------------------------------- /examples/search/search_url.py: -------------------------------------------------------------------------------- 1 | """ 2 | Search URL API Example 3 | 4 | This example shows how to use the Browser Use API to extract specific 5 | content from a given URL based on your query. 6 | 7 | Usage: 8 | # Copy this function and customize the parameters 9 | result = await search_url("https://example.com", "what to find", depth=2) 10 | """ 11 | 12 | import asyncio 13 | import os 14 | 15 | import aiohttp 16 | from dotenv import load_dotenv 17 | 18 | # Load environment variables 19 | load_dotenv() 20 | 21 | 22 | async def search_url(url: str, query: str, depth: int = 2): 23 | # Validate API key exists 24 | api_key = os.getenv('BROWSER_USE_API_KEY') 25 | if not api_key: 26 | print('❌ Error: BROWSER_USE_API_KEY environment variable is not set.') 27 | print('Please set your API key: export BROWSER_USE_API_KEY="your_api_key_here"') 28 | return None 29 | 30 | payload = {'url': url, 'query': query, 'depth': depth} 31 | 32 | headers = {'Authorization': f'Bearer {api_key}', 'Content-Type': 'application/json'} 33 | 34 | print('Testing Search URL API...') 35 | print(f'URL: {url}') 36 | print(f'Query: {query}') 37 | print(f'Depth: {depth}') 38 | print('-' * 50) 39 | 40 | try: 41 | async with aiohttp.ClientSession() as session: 42 | async with session.post( 43 | 'https://api.browser-use.com/api/v1/search-url', 44 | json=payload, 45 | headers=headers, 46 | timeout=aiohttp.ClientTimeout(total=300), 47 | ) as response: 48 | if response.status == 200: 49 | result = await response.json() 50 | print('✅ Success!') 51 | print(f'URL processed: {result.get("url", "N/A")}') 52 | content = result.get('content', '') 53 | print(f'Content: {content}') 54 | return result 55 | else: 56 | error_text = await response.text() 57 | print(f'❌ Error {response.status}: {error_text}') 58 | return None 59 | except Exception as e: 60 | print(f'❌ Exception: {str(e)}') 61 | return None 62 | 63 | 64 | if __name__ == '__main__': 65 | # Example 1: Extract pricing info 66 | asyncio.run(search_url('https://browser-use.com/#pricing', 'Find pricing information for Browser Use')) 67 | 68 | # Example 2: News article analysis 69 | # asyncio.run(search_url("https://techcrunch.com", "latest startup funding news", depth=3)) 70 | 71 | # Example 3: Product research 72 | # asyncio.run(search_url("https://github.com/browser-use/browser-use", "installation instructions", depth=2)) 73 | -------------------------------------------------------------------------------- /examples/search/simple_search.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple Search API Example 3 | 4 | This example shows how to use the Browser Use API to search and extract 5 | content from multiple websites based on a query. 6 | 7 | Usage: 8 | # Copy this function and customize the parameters 9 | result = await simple_search("your search query", max_websites=5, depth=2) 10 | """ 11 | 12 | import asyncio 13 | import os 14 | 15 | import aiohttp 16 | from dotenv import load_dotenv 17 | 18 | # Load environment variables 19 | load_dotenv() 20 | 21 | 22 | async def simple_search(query: str, max_websites: int = 5, depth: int = 2): 23 | # Validate API key exists 24 | api_key = os.getenv('BROWSER_USE_API_KEY') 25 | if not api_key: 26 | print('❌ Error: BROWSER_USE_API_KEY environment variable is not set.') 27 | print('Please set your API key: export BROWSER_USE_API_KEY="your_api_key_here"') 28 | return None 29 | 30 | payload = {'query': query, 'max_websites': max_websites, 'depth': depth} 31 | 32 | headers = {'Authorization': f'Bearer {api_key}', 'Content-Type': 'application/json'} 33 | 34 | print('Testing Simple Search API...') 35 | print(f'Query: {query}') 36 | print(f'Max websites: {max_websites}') 37 | print(f'Depth: {depth}') 38 | print('-' * 50) 39 | 40 | try: 41 | async with aiohttp.ClientSession() as session: 42 | async with session.post( 43 | 'https://api.browser-use.com/api/v1/simple-search', 44 | json=payload, 45 | headers=headers, 46 | timeout=aiohttp.ClientTimeout(total=300), 47 | ) as response: 48 | if response.status == 200: 49 | result = await response.json() 50 | print('✅ Success!') 51 | print(f'Results: {len(result.get("results", []))} websites processed') 52 | for i, item in enumerate(result.get('results', [])[:2], 1): 53 | print(f'\n{i}. {item.get("url", "N/A")}') 54 | content = item.get('content', '') 55 | print(f' Content: {content}') 56 | return result 57 | else: 58 | error_text = await response.text() 59 | print(f'❌ Error {response.status}: {error_text}') 60 | return None 61 | except Exception as e: 62 | print(f'❌ Exception: {str(e)}') 63 | return None 64 | 65 | 66 | if __name__ == '__main__': 67 | # Example 1: Basic search 68 | asyncio.run(simple_search('latest AI news')) 69 | 70 | # Example 2: Custom parameters 71 | # asyncio.run(simple_search("python web scraping", max_websites=3, depth=3)) 72 | 73 | # Example 3: Research query 74 | # asyncio.run(simple_search("climate change solutions 2024", max_websites=7, depth=2)) 75 | -------------------------------------------------------------------------------- /examples/simple.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import sys 4 | 5 | from browser_use.llm.openai.chat import ChatOpenAI 6 | 7 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 8 | 9 | from dotenv import load_dotenv 10 | 11 | load_dotenv() 12 | 13 | 14 | from browser_use import Agent 15 | 16 | # Initialize the model 17 | llm = ChatOpenAI( 18 | model='gpt-4.1-mini', 19 | ) 20 | 21 | 22 | task = 'Find the founders of browser-use' 23 | agent = Agent(task=task, llm=llm) 24 | 25 | 26 | async def main(): 27 | await agent.run() 28 | 29 | 30 | if __name__ == '__main__': 31 | asyncio.run(main()) 32 | -------------------------------------------------------------------------------- /examples/ui/README.md: -------------------------------------------------------------------------------- 1 | # **User Interfaces of Browser-Use** 2 | 3 | | **File Name** | **User Interface** | **Description** | **Example Usage** | 4 | |------------------------|-------------------|-------------------------------------------|-------------------------------------------| 5 | | `command_line.py` | **Terminal** | Parses arguments for command-line execution. | `python command_line.py` | 6 | | `gradio_demo.py` | **Gradio** | Provides a Gradio-based interactive UI. | `python gradio_demo.py` | 7 | | `streamlit_demo.py` | **Streamlit** | Runs a Streamlit-based web interface. | `python -m streamlit run streamlit_demo.py` | 8 | -------------------------------------------------------------------------------- /examples/ui/streamlit_demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | To use it, you'll need to install streamlit, and run with: 3 | 4 | python -m streamlit run streamlit_demo.py 5 | 6 | """ 7 | 8 | import asyncio 9 | import os 10 | import sys 11 | 12 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 13 | 14 | from dotenv import load_dotenv 15 | 16 | load_dotenv() 17 | 18 | import streamlit as st # type: ignore 19 | 20 | from browser_use import Agent 21 | from browser_use.browser import BrowserSession 22 | from browser_use.controller.service import Controller 23 | 24 | if os.name == 'nt': 25 | asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy()) 26 | 27 | 28 | # Function to get the LLM based on provider 29 | def get_llm(provider: str): 30 | if provider == 'anthropic': 31 | from browser_use.llm import ChatAnthropic 32 | 33 | api_key = os.getenv('ANTHROPIC_API_KEY') 34 | if not api_key: 35 | st.error('Error: ANTHROPIC_API_KEY is not set. Please provide a valid API key.') 36 | st.stop() 37 | 38 | return ChatAnthropic(model='claude-3-5-sonnet-20240620', temperature=0.0) 39 | elif provider == 'openai': 40 | from browser_use.llm import ChatOpenAI 41 | 42 | api_key = os.getenv('OPENAI_API_KEY') 43 | if not api_key: 44 | st.error('Error: OPENAI_API_KEY is not set. Please provide a valid API key.') 45 | st.stop() 46 | 47 | return ChatOpenAI(model='gpt-4.1', temperature=0.0) 48 | else: 49 | st.error(f'Unsupported provider: {provider}') 50 | st.stop() 51 | return None # Never reached, but helps with type checking 52 | 53 | 54 | # Function to initialize the agent 55 | def initialize_agent(query: str, provider: str): 56 | llm = get_llm(provider) 57 | controller = Controller() 58 | browser_session = BrowserSession() 59 | 60 | return Agent( 61 | task=query, 62 | llm=llm, # type: ignore 63 | controller=controller, 64 | browser_session=browser_session, 65 | use_vision=True, 66 | max_actions_per_step=1, 67 | ), browser_session 68 | 69 | 70 | # Streamlit UI 71 | st.title('Automated Browser Agent with LLMs 🤖') 72 | 73 | query = st.text_input('Enter your query:', 'go to reddit and search for posts about browser-use') 74 | provider = st.radio('Select LLM Provider:', ['openai', 'anthropic'], index=0) 75 | 76 | if st.button('Run Agent'): 77 | st.write('Initializing agent...') 78 | agent, browser_session = initialize_agent(query, provider) 79 | 80 | async def run_agent(): 81 | with st.spinner('Running automation...'): 82 | await agent.run(max_steps=25) 83 | st.success('Task completed! 🎉') 84 | 85 | asyncio.run(run_agent()) 86 | 87 | st.button('Close Browser', on_click=lambda: asyncio.run(browser_session.close())) 88 | -------------------------------------------------------------------------------- /examples/use-cases/README.md: -------------------------------------------------------------------------------- 1 | # Use Cases of Browser-Use 2 | 3 | | File Name | Description | 4 | |-----------|------------| 5 | | `captcha.py` | Automates CAPTCHA solving on a demo website. | 6 | | `check_appointment.py` | Checks for available visa appointment slots on the Greece MFA website. | 7 | | `find_and_apply_to_jobs.py` | Searches for job listings, evaluates relevance based on a CV, and applies automatically. | 8 | | `online_coding_agent.py` | Implements a multi-agent system for online code editors, with separate agents for coding and execution. | 9 | | `post-twitter.py` | Provides a template for automated posting on X (Twitter), including new tweets, tagging, and replies. | 10 | | `scrolling_page.py` | Automates webpage scrolling with various scrolling actions and text search functionality. | 11 | | `twitter_post_using_cookies.py` | Automates posting on X (Twitter) using stored authentication cookies. | 12 | | `web_voyager_agent.py` | A general-purpose web navigation agent for tasks like flight booking and course searching. | 13 | -------------------------------------------------------------------------------- /examples/use-cases/captcha.py: -------------------------------------------------------------------------------- 1 | """ 2 | Goal: Automates CAPTCHA solving on a demo website. 3 | 4 | 5 | Simple try of the agent. 6 | @dev You need to add OPENAI_API_KEY to your environment variables. 7 | NOTE: captchas are hard. For this example it works. But e.g. for iframes it does not. 8 | for this example it helps to zoom in. 9 | """ 10 | 11 | import asyncio 12 | import os 13 | import sys 14 | 15 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 16 | 17 | from dotenv import load_dotenv 18 | 19 | load_dotenv() 20 | 21 | from browser_use import Agent 22 | from browser_use.llm import ChatOpenAI 23 | 24 | if not os.getenv('OPENAI_API_KEY'): 25 | raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.') 26 | 27 | 28 | async def main(): 29 | llm = ChatOpenAI(model='gpt-4.1') 30 | agent = Agent( 31 | task='go to https://captcha.com/demos/features/captcha-demo.aspx and solve the captcha', 32 | llm=llm, 33 | ) 34 | await agent.run() 35 | input('Press Enter to exit') 36 | 37 | 38 | if __name__ == '__main__': 39 | asyncio.run(main()) 40 | -------------------------------------------------------------------------------- /examples/use-cases/check_appointment.py: -------------------------------------------------------------------------------- 1 | # Goal: Checks for available visa appointment slots on the Greece MFA website. 2 | 3 | import asyncio 4 | import os 5 | import sys 6 | 7 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 8 | 9 | from dotenv import load_dotenv 10 | 11 | load_dotenv() 12 | 13 | from pydantic import BaseModel 14 | 15 | from browser_use.agent.service import Agent 16 | from browser_use.controller.service import Controller 17 | from browser_use.llm import ChatOpenAI 18 | 19 | if not os.getenv('OPENAI_API_KEY'): 20 | raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.') 21 | 22 | controller = Controller() 23 | 24 | 25 | class WebpageInfo(BaseModel): 26 | """Model for webpage link.""" 27 | 28 | link: str = 'https://appointment.mfa.gr/en/reservations/aero/ireland-grcon-dub/' 29 | 30 | 31 | @controller.action('Go to the webpage', param_model=WebpageInfo) 32 | def go_to_webpage(webpage_info: WebpageInfo): 33 | """Returns the webpage link.""" 34 | return webpage_info.link 35 | 36 | 37 | async def main(): 38 | """Main function to execute the agent task.""" 39 | task = ( 40 | 'Go to the Greece MFA webpage via the link I provided you.' 41 | 'Check the visa appointment dates. If there is no available date in this month, check the next month.' 42 | 'If there is no available date in both months, tell me there is no available date.' 43 | ) 44 | 45 | model = ChatOpenAI(model='gpt-4.1-mini') 46 | agent = Agent(task, model, controller=controller, use_vision=True) 47 | 48 | await agent.run() 49 | 50 | 51 | if __name__ == '__main__': 52 | asyncio.run(main()) 53 | -------------------------------------------------------------------------------- /examples/use-cases/online_coding_agent.py: -------------------------------------------------------------------------------- 1 | # Goal: Implements a multi-agent system for online code editors, with separate agents for coding and execution. 2 | 3 | import asyncio 4 | import os 5 | import sys 6 | 7 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 8 | 9 | from dotenv import load_dotenv 10 | 11 | load_dotenv() 12 | 13 | from browser_use import Agent 14 | from browser_use.browser import BrowserSession 15 | from browser_use.llm import ChatOpenAI 16 | 17 | if not os.getenv('OPENAI_API_KEY'): 18 | raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.') 19 | 20 | 21 | async def main(): 22 | browser_session = BrowserSession() 23 | model = ChatOpenAI(model='gpt-4.1') 24 | 25 | # Initialize browser agent 26 | agent1 = Agent( 27 | task='Open an online code editor programiz.', 28 | llm=model, 29 | browser_session=browser_session, 30 | ) 31 | executor = Agent( 32 | task='Executor. Execute the code written by the coder and suggest some updates if there are errors.', 33 | llm=model, 34 | browser_session=browser_session, 35 | ) 36 | 37 | coder = Agent( 38 | task='Coder. Your job is to write and complete code. You are an expert coder. Code a simple calculator. Write the code on the coding interface after agent1 has opened the link.', 39 | llm=model, 40 | browser_session=browser_session, 41 | ) 42 | await agent1.run() 43 | await executor.run() 44 | await coder.run() 45 | 46 | 47 | if __name__ == '__main__': 48 | asyncio.run(main()) 49 | -------------------------------------------------------------------------------- /examples/use-cases/test_cv.txt: -------------------------------------------------------------------------------- 1 | 123 2 | -------------------------------------------------------------------------------- /examples/use-cases/twitter_cookies.txt: -------------------------------------------------------------------------------- 1 | [{ 2 | "name": "auth_token", 3 | "value": "auth_token_cookie_value", 4 | "domain": ".x.com", 5 | "path": "/" 6 | }, 7 | { 8 | "name": "ct0", 9 | "value": "ct0_cookie_value", 10 | "domain": ".x.com", 11 | "path": "/" 12 | }] 13 | -------------------------------------------------------------------------------- /examples/use-cases/twitter_post_using_cookies.py: -------------------------------------------------------------------------------- 1 | # Goal: Automates posting on X (Twitter) using stored authentication cookies. 2 | 3 | import asyncio 4 | import os 5 | import sys 6 | 7 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 8 | 9 | from dotenv import load_dotenv 10 | 11 | load_dotenv() 12 | 13 | 14 | from browser_use import Agent 15 | from browser_use.browser import BrowserProfile, BrowserSession 16 | from browser_use.llm import ChatGoogle 17 | 18 | api_key = os.getenv('GOOGLE_API_KEY') 19 | if not api_key: 20 | raise ValueError('GOOGLE_API_KEY is not set') 21 | 22 | llm = ChatGoogle(model='gemini-2.0-flash-exp', api_key=api_key) 23 | 24 | 25 | browser_session = BrowserSession( 26 | browser_profile=BrowserProfile( 27 | user_data_dir='~/.config/browseruse/profiles/default', 28 | # headless=False, # Uncomment to see the browser 29 | # executable_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', 30 | ) 31 | ) 32 | 33 | 34 | async def main(): 35 | agent = Agent( 36 | browser_session=browser_session, 37 | task=('go to https://x.com. write a new post with the text "browser-use ftw", and submit it'), 38 | llm=llm, 39 | max_actions_per_step=4, 40 | ) 41 | await agent.run(max_steps=25) 42 | input('Press Enter to close the browser...') 43 | 44 | 45 | if __name__ == '__main__': 46 | asyncio.run(main()) 47 | -------------------------------------------------------------------------------- /examples/use-cases/web_voyager_agent.py: -------------------------------------------------------------------------------- 1 | # Goal: A general-purpose web navigation agent for tasks like flight booking and course searching. 2 | 3 | import asyncio 4 | import os 5 | import sys 6 | 7 | # Adjust Python path 8 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 9 | 10 | from dotenv import load_dotenv 11 | 12 | load_dotenv() 13 | 14 | 15 | from browser_use.agent.service import Agent 16 | from browser_use.browser import BrowserProfile, BrowserSession 17 | from browser_use.llm import ChatAzureOpenAI, ChatOpenAI 18 | 19 | # Set LLM based on defined environment variables 20 | if os.getenv('OPENAI_API_KEY'): 21 | llm = ChatOpenAI( 22 | model='gpt-4.1', 23 | ) 24 | elif os.getenv('AZURE_OPENAI_KEY') and os.getenv('AZURE_OPENAI_ENDPOINT'): 25 | llm = ChatAzureOpenAI( 26 | model='gpt-4.1', 27 | ) 28 | else: 29 | raise ValueError('No LLM found. Please set OPENAI_API_KEY or AZURE_OPENAI_KEY and AZURE_OPENAI_ENDPOINT.') 30 | 31 | 32 | browser_session = BrowserSession( 33 | browser_profile=BrowserProfile( 34 | headless=False, # This is True in production 35 | minimum_wait_page_load_time=1, # 3 on prod 36 | maximum_wait_page_load_time=10, # 20 on prod 37 | viewport={'width': 1280, 'height': 1100}, 38 | user_data_dir='~/.config/browseruse/profiles/default', 39 | # trace_path='./tmp/web_voyager_agent', 40 | ) 41 | ) 42 | 43 | # TASK = """ 44 | # Find the lowest-priced one-way flight from Cairo to Montreal on February 21, 2025, including the total travel time and number of stops. on https://www.google.com/travel/flights/ 45 | # """ 46 | # TASK = """ 47 | # Browse Coursera, which universities offer Master of Advanced Study in Engineering degrees? Tell me what is the latest application deadline for this degree? on https://www.coursera.org/""" 48 | TASK = """ 49 | Find and book a hotel in Paris with suitable accommodations for a family of four (two adults and two children) offering free cancellation for the dates of February 14-21, 2025. on https://www.booking.com/ 50 | """ 51 | 52 | 53 | async def main(): 54 | agent = Agent( 55 | task=TASK, 56 | llm=llm, 57 | browser_session=browser_session, 58 | validate_output=True, 59 | enable_memory=False, 60 | ) 61 | history = await agent.run(max_steps=50) 62 | history.save_to_file('./tmp/history.json') 63 | 64 | 65 | if __name__ == '__main__': 66 | asyncio.run(main()) 67 | -------------------------------------------------------------------------------- /examples/use-cases/wikipedia_banana_to_quantum.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import sys 4 | 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 6 | 7 | from dotenv import load_dotenv 8 | 9 | load_dotenv() 10 | 11 | from browser_use import Agent 12 | from browser_use.browser import BrowserProfile, BrowserSession 13 | from browser_use.llm import ChatOpenAI 14 | 15 | # video https://preview.screen.studio/share/vuq91Ej8 16 | llm = ChatOpenAI( 17 | model='gpt-4.1', 18 | temperature=0.0, 19 | ) 20 | task = 'go to https://en.wikipedia.org/wiki/Banana and click on buttons on the wikipedia page to go as fast as possible from banna to Quantum mechanics' 21 | 22 | browser_session = BrowserSession( 23 | browser_profile=BrowserProfile( 24 | viewport_expansion=-1, 25 | highlight_elements=False, 26 | user_data_dir='~/.config/browseruse/profiles/default', 27 | ), 28 | ) 29 | agent = Agent(task=task, llm=llm, browser_session=browser_session, use_vision=False) 30 | 31 | 32 | async def main(): 33 | await agent.run() 34 | 35 | 36 | if __name__ == '__main__': 37 | asyncio.run(main()) 38 | -------------------------------------------------------------------------------- /static/browser-use-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/browser-use/browser-use/9871be3363081dd88402dd7244f1997132929c4f/static/browser-use-dark.png -------------------------------------------------------------------------------- /static/browser-use.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/browser-use/browser-use/9871be3363081dd88402dd7244f1997132929c4f/static/browser-use.png -------------------------------------------------------------------------------- /tests/agent_tasks/README.md: -------------------------------------------------------------------------------- 1 | # Contributing Agent Tasks 2 | 3 | Contribute your own agent tasks and we test if the agent solves them for CI testing! 4 | 5 | ## How to Add a Task 6 | 7 | 1. Create a new `.yaml` file in this directory (`tests/agent_tasks/`). 8 | 2. Use the following format: 9 | 10 | ```yaml 11 | name: My Task Name 12 | task: Describe the task for the agent to perform 13 | judge_context: 14 | - List criteria for success, one per line 15 | max_steps: 10 16 | ``` 17 | 18 | ## Guidelines 19 | - Be specific in your task and criteria. 20 | - The `judge_context` should list what counts as a successful result. 21 | - The agent's output will be judged by an LLM using these criteria. 22 | 23 | ## Running the Tests 24 | 25 | To run all agent tasks: 26 | 27 | ```bash 28 | pytest tests/ci/test_agent_real_tasks.py 29 | ``` 30 | 31 | --- 32 | 33 | Happy contributing! 34 | -------------------------------------------------------------------------------- /tests/agent_tasks/amazon_laptop.yaml: -------------------------------------------------------------------------------- 1 | name: Amazon Laptop Search 2 | task: Go to amazon.com, search for 'laptop', and return the first result 3 | judge_context: 4 | - The agent must navigate to amazon.com 5 | - The agent must search for 'laptop' 6 | - The agent must return name of the first laptop 7 | max_steps: 10 8 | -------------------------------------------------------------------------------- /tests/agent_tasks/browser_use_pip.yaml: -------------------------------------------------------------------------------- 1 | name: Find pip install command for browser-use 2 | task: Find the pip installation command for the browser-use repo 3 | judge_context: 4 | - The output must include the command ('pip install browser-use') 5 | max_steps: 10 6 | -------------------------------------------------------------------------------- /tests/agent_tasks/captcha_cloudflare.yaml: -------------------------------------------------------------------------------- 1 | name: Cloudflare captcha 2 | task: Go to https://2captcha.com/demo/cloudflare-turnstile and solve the captcha, wait a few seconds, then click on check, wait a few more seconds for it to complete, then extract the "hostname" value from the displayed dictionary under "Captcha is passed successfully!" 3 | judge_context: 4 | - The agent must solve the captcha 5 | - The hostname returned should be "example.com" 6 | max_steps: 6 7 | -------------------------------------------------------------------------------- /tests/ci/test_browser_session_via_cdp.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from browser_use.browser import BrowserSession 4 | from browser_use.browser.profile import BrowserProfile 5 | from browser_use.browser.types import async_playwright 6 | 7 | 8 | async def test_connection_via_cdp(): 9 | browser_session = BrowserSession( 10 | cdp_url='http://localhost:9898', 11 | browser_profile=BrowserProfile( 12 | headless=True, 13 | keep_alive=True, 14 | ), 15 | ) 16 | with pytest.raises(Exception) as e: 17 | await browser_session.start() 18 | 19 | # Assert on the exception value outside the context manager 20 | assert 'ECONNREFUSED' in str(e.value) 21 | 22 | playwright = await async_playwright().start() 23 | browser = await playwright.chromium.launch(args=['--remote-debugging-port=9898']) 24 | 25 | async with await browser_session.start(): 26 | await browser_session.create_new_tab() 27 | 28 | assert (await browser_session.get_current_page()).url == 'about:blank' 29 | 30 | await browser.close() 31 | 32 | await browser_session.kill() 33 | await playwright.stop() 34 | -------------------------------------------------------------------------------- /tests/old/httpx_client_test.py: -------------------------------------------------------------------------------- 1 | import httpx 2 | 3 | from browser_use.browser import BrowserProfile, BrowserSession 4 | 5 | 6 | async def test_browser_close_doesnt_affect_external_httpx_clients(): 7 | """ 8 | Test that Browser.close() doesn't close HTTPX clients created outside the Browser instance. 9 | This test demonstrates the issue where Browser.close() is closing all HTTPX clients. 10 | """ 11 | # Create an external HTTPX client that should remain open 12 | external_client = httpx.AsyncClient() 13 | 14 | # Create a BrowserSession instance 15 | browser_session = BrowserSession(browser_profile=BrowserProfile(headless=True)) 16 | await browser_session.start() 17 | 18 | # Close the browser (which should trigger cleanup_httpx_clients) 19 | await browser_session.stop() 20 | 21 | # Check if the external client is still usable 22 | try: 23 | # If the client is closed, this will raise RuntimeError 24 | # Using a simple HEAD request to a reliable URL 25 | await external_client.head('https://www.example.com', timeout=2.0) 26 | client_is_closed = False 27 | except RuntimeError as e: 28 | # If we get "Cannot send a request, as the client has been closed" 29 | client_is_closed = 'client has been closed' in str(e) 30 | except Exception: 31 | # Any other exception means the client is not closed but request failed 32 | client_is_closed = False 33 | finally: 34 | # Always clean up our test client properly 35 | await external_client.aclose() 36 | 37 | # Our external client should not be closed by browser.close() 38 | assert not client_is_closed, 'External HTTPX client was incorrectly closed by Browser.close()' 39 | -------------------------------------------------------------------------------- /tests/old/screenshot_test.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import base64 3 | 4 | import pytest 5 | 6 | from browser_use.browser import BrowserProfile, BrowserSession 7 | 8 | 9 | async def test_take_full_page_screenshot(): 10 | browser_session = BrowserSession(browser_profile=BrowserProfile(headless=True, disable_security=True)) 11 | await browser_session.start() 12 | try: 13 | page = await browser_session.get_current_page() 14 | # Go to a test page 15 | await page.goto('https://example.com') 16 | 17 | await asyncio.sleep(3) 18 | # Take full page screenshot 19 | screenshot_b64 = await browser_session.take_screenshot(full_page=True) 20 | await asyncio.sleep(3) 21 | # Verify screenshot is not empty and is valid base64 22 | assert screenshot_b64 is not None 23 | assert isinstance(screenshot_b64, str) 24 | assert len(screenshot_b64) > 0 25 | 26 | # Test we can decode the base64 string 27 | try: 28 | base64.b64decode(screenshot_b64) 29 | except Exception as e: 30 | pytest.fail(f'Failed to decode base64 screenshot: {str(e)}') 31 | finally: 32 | await browser_session.stop() 33 | 34 | 35 | if __name__ == '__main__': 36 | asyncio.run(test_take_full_page_screenshot()) 37 | -------------------------------------------------------------------------------- /tests/old/test_dropdown.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test dropdown interaction functionality. 3 | """ 4 | 5 | import pytest 6 | 7 | from browser_use.agent.service import Agent 8 | from browser_use.agent.views import AgentHistoryList 9 | 10 | 11 | async def test_dropdown(llm, browser_session): 12 | """Test selecting an option from a dropdown menu.""" 13 | agent = Agent( 14 | task=( 15 | 'go to https://codepen.io/geheimschriftstift/pen/mPLvQz and first get all options for the dropdown and then select the 5th option' 16 | ), 17 | llm=llm, 18 | browser_session=browser_session, 19 | ) 20 | 21 | try: 22 | history: AgentHistoryList = await agent.run(20) 23 | result = history.final_result() 24 | 25 | # Verify dropdown interaction 26 | assert result is not None 27 | assert 'Duck' in result, "Expected 5th option 'Duck' to be selected" 28 | 29 | # Verify dropdown state 30 | page = await browser_session.get_current_page() 31 | element = await page.query_selector('select') 32 | assert element is not None, 'Dropdown element should exist' 33 | 34 | value = await element.evaluate('el => el.value') 35 | assert value == '5', 'Dropdown should have 5th option selected' 36 | 37 | except Exception as e: 38 | pytest.fail(f'Dropdown test failed: {str(e)}') 39 | -------------------------------------------------------------------------------- /tests/old/test_dropdown_complex.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test complex dropdown interaction functionality. 3 | """ 4 | 5 | import pytest 6 | 7 | from browser_use.agent.service import Agent 8 | from browser_use.agent.views import AgentHistoryList 9 | 10 | 11 | async def test_dropdown_complex(llm, browser_session): 12 | """Test selecting an option from a complex dropdown menu.""" 13 | agent = Agent( 14 | task=( 15 | 'go to https://codepen.io/shyam-king/pen/pvzpByJ and first get all options for the dropdown and then select the json option' 16 | ), 17 | llm=llm, 18 | browser_session=browser_session, 19 | ) 20 | 21 | try: 22 | history: AgentHistoryList = await agent.run(20) 23 | result = history.final_result() 24 | 25 | # Verify dropdown interaction 26 | assert result is not None 27 | assert 'json' in result.lower(), "Expected 'json' option to be selected" 28 | 29 | # Verify dropdown state 30 | page = await browser_session.get_current_page() 31 | element = await page.query_selector('.select-selected') 32 | assert element is not None, 'Custom dropdown element should exist' 33 | 34 | text = await element.text_content() 35 | assert 'json' in text.lower(), 'Dropdown should display json option' 36 | 37 | # Verify the selected option's effect 38 | code_element = await page.query_selector('pre code') 39 | assert code_element is not None, 'Code element should be visible when JSON is selected' 40 | 41 | except Exception as e: 42 | pytest.fail(f'Complex dropdown test failed: {str(e)}') 43 | -------------------------------------------------------------------------------- /tests/old/test_dropdown_error.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple try of the agent. 3 | 4 | @dev You need to add OPENAI_API_KEY to your environment variables. 5 | """ 6 | 7 | import os 8 | import sys 9 | 10 | from browser_use.browser import BrowserProfile, BrowserSession 11 | 12 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 13 | 14 | from browser_use import Agent, AgentHistoryList 15 | from browser_use.llm import ChatOpenAI 16 | 17 | llm = ChatOpenAI(model='gpt-4.1') 18 | browser_session = BrowserSession(browser_profile=BrowserProfile(headless=True)) 19 | 20 | agent = Agent( 21 | task=('go to https://codepen.io/shyam-king/pen/emOyjKm and select number "4" and return the output of "selected value"'), 22 | llm=llm, 23 | browser_session=browser_session, 24 | ) 25 | 26 | 27 | async def test_dropdown(): 28 | await browser_session.start() 29 | try: 30 | history: AgentHistoryList = await agent.run(20) 31 | 32 | result = history.final_result() 33 | assert result is not None 34 | assert '4' in result 35 | print(result) 36 | finally: 37 | await browser_session.stop() 38 | -------------------------------------------------------------------------------- /tests/old/test_full_screen.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from browser_use.browser.types import async_playwright 4 | 5 | 6 | async def test_full_screen(start_fullscreen: bool, maximize: bool): 7 | async with async_playwright() as p: 8 | browser = await p.chromium.launch( 9 | headless=False, 10 | args=['--start-maximized'], 11 | ) 12 | context = await browser.new_context(no_viewport=True, viewport=None) 13 | page = await context.new_page() 14 | await page.goto('https://google.com') 15 | 16 | await asyncio.sleep(10) 17 | await browser.close() 18 | 19 | 20 | if __name__ == '__main__': 21 | asyncio.run(test_full_screen(False, False)) 22 | -------------------------------------------------------------------------------- /tests/old/test_gif_path.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple try of the agent. 3 | 4 | @dev You need to add OPENAI_API_KEY to your environment variables. 5 | """ 6 | 7 | import os 8 | import sys 9 | 10 | from browser_use.browser import BrowserProfile, BrowserSession 11 | 12 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 13 | 14 | from browser_use import Agent, AgentHistoryList 15 | from browser_use.llm import ChatOpenAI 16 | 17 | llm = ChatOpenAI(model='gpt-4.1') 18 | 19 | browser_session = BrowserSession(browser_profile=BrowserProfile(headless=True, disable_security=True)) 20 | 21 | agent = Agent( 22 | task=('go to google.com and search for text "hi there"'), 23 | llm=llm, 24 | browser_session=browser_session, 25 | generate_gif='./google.gif', 26 | ) 27 | 28 | 29 | async def test_gif_path(): 30 | if os.path.exists('./google.gif'): 31 | os.unlink('./google.gif') 32 | 33 | await browser_session.start() 34 | try: 35 | history: AgentHistoryList = await agent.run(20) 36 | 37 | result = history.final_result() 38 | assert result is not None 39 | 40 | assert os.path.exists('./google.gif'), 'google.gif was not created' 41 | finally: 42 | await browser_session.stop() 43 | -------------------------------------------------------------------------------- /tests/old/test_react_dropdown.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple try of the agent. 3 | 4 | @dev You need to add OPENAI_API_KEY to your environment variables. 5 | """ 6 | 7 | import os 8 | import sys 9 | 10 | from browser_use.browser import BrowserProfile, BrowserSession 11 | 12 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 13 | import asyncio 14 | 15 | from browser_use import Agent, AgentHistoryList 16 | from browser_use.llm import ChatOpenAI 17 | 18 | llm = ChatOpenAI(model='gpt-4.1') 19 | 20 | browser_session = BrowserSession(browser_profile=BrowserProfile(headless=True, disable_security=True)) 21 | 22 | agent = Agent( 23 | task=( 24 | 'go to https://codepen.io/shyam-king/pen/ByBJoOv and select "Tiger" dropdown and read the text given in "Selected Animal" box (it can be empty as well)' 25 | ), 26 | llm=llm, 27 | browser_session=browser_session, 28 | ) 29 | 30 | 31 | async def test_dropdown(): 32 | await browser_session.start() 33 | try: 34 | history: AgentHistoryList = await agent.run(10) 35 | 36 | result = history.final_result() 37 | assert result is not None 38 | print('result: ', result) 39 | finally: 40 | await browser_session.stop() 41 | 42 | 43 | if __name__ == '__main__': 44 | asyncio.run(test_dropdown()) 45 | -------------------------------------------------------------------------------- /tests/old/test_vision.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple try of the agent. 3 | 4 | @dev You need to add OPENAI_API_KEY to your environment variables. 5 | """ 6 | 7 | import os 8 | import sys 9 | from pprint import pprint 10 | 11 | import pytest 12 | 13 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 14 | 15 | 16 | from browser_use import Agent, AgentHistoryList, BrowserSession, Controller 17 | from browser_use.llm import ChatOpenAI 18 | 19 | llm = ChatOpenAI(model='gpt-4.1') 20 | controller = Controller() 21 | 22 | # use this test to ask the model questions about the page like 23 | # which color do you see for bbox labels, list all with their label 24 | # what's the smallest bboxes with labels and 25 | 26 | 27 | @controller.registry.action(description='explain what you see on the screen and ask user for input') 28 | async def explain_screen(text: str) -> str: 29 | pprint(text) 30 | answer = input('\nuser input next question: \n') 31 | return answer 32 | 33 | 34 | @controller.registry.action(description='done') 35 | async def done(text: str) -> str: 36 | # pprint(text) 37 | return 'call explain_screen' 38 | 39 | 40 | @pytest.mark.skip(reason='this is for local testing only') 41 | async def test_vision(): 42 | from browser_use.browser.profile import BrowserProfile 43 | 44 | profile = BrowserProfile(headless=True, user_data_dir=None) 45 | browser_session = BrowserSession(browser_profile=profile) 46 | await browser_session.start() 47 | try: 48 | agent = Agent( 49 | task='call explain_screen all the time the user asks you questions e.g. about the page like bbox which you see are labels - your task is to explain it and get the next question', 50 | llm=llm, 51 | controller=controller, 52 | browser_session=browser_session, 53 | ) 54 | history: AgentHistoryList = await agent.run(20) 55 | finally: 56 | # Make sure to close the browser 57 | await browser_session.stop() 58 | -------------------------------------------------------------------------------- /tests/old/test_wait_for_element.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import sys 4 | 5 | from browser_use.llm.openai.chat import ChatOpenAI 6 | 7 | project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) 8 | if project_root not in sys.path: 9 | sys.path.insert(0, project_root) 10 | 11 | import pytest 12 | from dotenv import load_dotenv 13 | 14 | # Third-party imports 15 | from browser_use import Agent, Controller 16 | 17 | # Local imports 18 | from browser_use.browser import BrowserProfile, BrowserSession 19 | 20 | # Load environment variables. 21 | load_dotenv() 22 | 23 | # Initialize language model and controller. 24 | llm = ChatOpenAI(model='gpt-4.1') 25 | controller = Controller() 26 | 27 | 28 | @pytest.mark.skip(reason='this is for local testing only') 29 | async def test_wait_for_element(): 30 | """Test 'Wait for element' action.""" 31 | 32 | initial_actions = [ 33 | {'go_to_url': {'url': 'https://pypi.org/', 'new_tab': True}}, 34 | # Uncomment the line below to include the wait action in initial actions. 35 | # {'wait_for_element': {'selector': '#search', 'timeout': 30}}, 36 | ] 37 | 38 | # Set up the browser session. 39 | browser_session = BrowserSession(browser_profile=BrowserProfile(headless=True, disable_security=True)) 40 | await browser_session.start() 41 | 42 | try: 43 | # Create the agent with the task. 44 | agent = Agent( 45 | task="Wait for element '#search' to be visible with a timeout of 30 seconds.", 46 | llm=llm, 47 | browser_session=browser_session, 48 | initial_actions=initial_actions, 49 | controller=controller, 50 | ) 51 | 52 | # Run the agent for a few steps to trigger navigation and then the wait action. 53 | history = await agent.run(max_steps=3) 54 | action_names = history.action_names() 55 | 56 | # Ensure that the wait_for_element action was executed. 57 | assert 'wait_for_element' in action_names, 'Expected wait_for_element action to be executed.' 58 | 59 | # Verify that the #search element is visible by querying the page. 60 | page = await browser_session.get_current_page() 61 | header_handle = await page.query_selector('#search') 62 | assert header_handle is not None, 'Expected to find a #search element on the page.' 63 | is_visible = await header_handle.is_visible() 64 | assert is_visible, 'Expected the #search element to be visible.' 65 | finally: 66 | await browser_session.stop() 67 | 68 | 69 | if __name__ == '__main__': 70 | asyncio.run(test_wait_for_element()) 71 | --------------------------------------------------------------------------------