├── .github └── workflows │ ├── build.yaml │ ├── reusable_build_step.yaml │ └── tests.yaml ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── anthropic-browserbase.png ├── computer-use-demo ├── .gitignore ├── Dockerfile ├── LICENSE ├── computer_use_demo │ ├── .env.template │ ├── __init__.py │ ├── loop.py │ ├── requirements.txt │ ├── streamlit.py │ └── tools │ │ ├── __init__.py │ │ ├── base.py │ │ ├── bash.py │ │ ├── browserbase.py │ │ ├── collection.py │ │ ├── computer.py │ │ ├── edit.py │ │ └── run.py ├── dev-requirements.txt ├── image │ ├── .config │ │ └── tint2 │ │ │ ├── applications │ │ │ ├── firefox-custom.desktop │ │ │ ├── gedit.desktop │ │ │ └── terminal.desktop │ │ │ └── tint2rc │ ├── .streamlit │ │ └── config.toml │ ├── entrypoint.sh │ ├── http_server.py │ ├── index.html │ ├── mutter_startup.sh │ ├── novnc_startup.sh │ ├── open_debugger.sh │ ├── start_all.sh │ ├── static_content │ │ └── index.html │ ├── tint2_startup.sh │ ├── x11vnc_startup.sh │ └── xvfb_startup.sh ├── pyproject.toml ├── ruff.toml ├── setup.sh └── tests │ ├── conftest.py │ ├── loop_test.py │ ├── streamlit_test.py │ └── tools │ ├── bash_test.py │ ├── computer_test.py │ └── edit_test.py └── pyproject.toml /.github/workflows/build.yaml: -------------------------------------------------------------------------------- 1 | env: 2 | REGISTRY: ghcr.io/anthropics/anthropic-quickstarts 3 | name: build 4 | on: 5 | pull_request: 6 | paths: 7 | - .github/** 8 | - computer-use-demo/** 9 | push: 10 | branches: 11 | - main 12 | paths: 13 | - .github/** 14 | - computer-use-demo/** 15 | jobs: 16 | build-amd64: 17 | uses: ./.github/workflows/reusable_build_step.yaml 18 | with: 19 | platform: amd64 20 | builder: ubuntu-latest-16core 21 | registry: ghcr.io/anthropics/anthropic-quickstarts 22 | tag_prefix: computer-use-demo- 23 | context: computer-use-demo 24 | permissions: 25 | contents: read 26 | packages: write 27 | build-arm64: 28 | uses: ./.github/workflows/reusable_build_step.yaml 29 | with: 30 | platform: arm64 31 | builder: ubuntu-22.04-arm64-16core 32 | registry: ghcr.io/anthropics/anthropic-quickstarts 33 | tag_prefix: computer-use-demo- 34 | context: computer-use-demo 35 | permissions: 36 | contents: read 37 | packages: write 38 | merge: 39 | runs-on: ubuntu-latest 40 | needs: 41 | - build-arm64 42 | - build-amd64 43 | permissions: 44 | contents: read 45 | packages: write 46 | steps: 47 | - uses: actions/checkout@v4 48 | - name: Login to ghcr.io 49 | uses: docker/login-action@v3 50 | with: 51 | registry: ghcr.io 52 | username: ${{github.actor}} 53 | password: ${{secrets.GITHUB_TOKEN}} 54 | - name: Set up Docker Buildx 55 | uses: docker/setup-buildx-action@v3 56 | - name: Set image tag 57 | run: | 58 | echo "SHORT_SHA=$(git rev-parse --short ${{ github.sha }})" >> "$GITHUB_ENV" 59 | - name: Create SHA manifest and push 60 | run: | 61 | docker buildx imagetools create -t \ 62 | ${REGISTRY}:computer-use-demo-${SHORT_SHA} \ 63 | ${REGISTRY}:computer-use-demo-${SHORT_SHA}-amd64 \ 64 | ${REGISTRY}:computer-use-demo-${SHORT_SHA}-arm64 65 | 66 | - name: Create latest manifest and push 67 | if: github.event_name == 'push' && github.ref == 'refs/heads/main' 68 | run: | 69 | docker buildx imagetools create -t \ 70 | ${REGISTRY}:computer-use-demo-latest \ 71 | ${REGISTRY}:computer-use-demo-latest-amd64 \ 72 | ${REGISTRY}:computer-use-demo-latest-arm64 73 | -------------------------------------------------------------------------------- /.github/workflows/reusable_build_step.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | workflow_call: 3 | inputs: 4 | platform: 5 | required: true 6 | type: string 7 | builder: 8 | required: true 9 | type: string 10 | registry: 11 | required: true 12 | type: string 13 | tag_prefix: 14 | required: false 15 | type: string 16 | context: 17 | required: false 18 | type: string 19 | jobs: 20 | build: 21 | runs-on: ${{ inputs.builder }} 22 | permissions: 23 | contents: read 24 | packages: write 25 | steps: 26 | - uses: actions/checkout@v4 27 | - name: Login to ghcr.io 28 | uses: docker/login-action@v3 29 | with: 30 | registry: ghcr.io 31 | username: ${{github.actor}} 32 | password: ${{secrets.GITHUB_TOKEN}} 33 | - name: Set up Docker Buildx 34 | uses: docker/setup-buildx-action@v3 35 | - name: Set image tag 36 | run: | 37 | short_sha=$(git rev-parse --short ${{ github.sha }}) 38 | echo "TAG=${{ inputs.registry }}:${{ inputs.tag_prefix }}${short_sha}" >> "$GITHUB_ENV" 39 | - name: Build Docker image 40 | uses: docker/build-push-action@v5 41 | with: 42 | platforms: linux/${{ inputs.platform }} 43 | context: ${{ inputs.context || '.' }} 44 | push: false 45 | tags: ${{ env.TAG }} 46 | cache-from: type=gha,scope=computer-use-${{ inputs.platform }} 47 | cache-to: type=gha,mode=max,scope=computer-use-${{ inputs.platform }} 48 | load: true 49 | - name: Run container 50 | run: docker run -d -p 8051:8051 ${{ env.TAG }} 51 | - name: Check streamlit 52 | run: | 53 | timeout=60 54 | start_time=$(date +%s) 55 | docker_id=$(docker ps --filter "ancestor=${{ env.TAG }}" --format "{{.ID}}") 56 | echo "docker_id=$docker_id" >> "$GITHUB_ENV" 57 | while true; do 58 | current_time=$(date +%s) 59 | elapsed=$((current_time - start_time)) 60 | if [ $elapsed -ge $timeout ]; then 61 | echo "Timeout reached. Container did not respond within $timeout seconds." 62 | exit 1 63 | fi 64 | response=$(docker exec $docker_id curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:8501 || echo "000") 65 | if [ "$response" = "200" ]; then 66 | echo "Container responded with 200 OK" 67 | exit 0 68 | fi 69 | done 70 | - name: Check VNC 71 | run: docker exec $docker_id nc localhost 5900 -z 72 | - name: Check noVNC 73 | run: docker exec $docker_id curl -s -o /dev/null -w "%{http_code}" http://localhost:6080 | grep -q 200 || exit 1 74 | - name: Check landing page 75 | run: docker exec $docker_id curl -s -o /dev/null -w "%{http_code}" http://localhost:8080 | grep -q 200 || exit 1 76 | - name: Determine push tags 77 | run: | 78 | if [ "${{ github.event_name }}" == "pull_request" ]; then 79 | echo "PUSH_TAGS=${TAG}-${{ inputs.platform }}" >> "$GITHUB_ENV" 80 | else 81 | echo "PUSH_TAGS=${TAG}-${{ inputs.platform }},${{ inputs.registry }}:${{ inputs.tag_prefix }}latest-${{ inputs.platform }}" >> "$GITHUB_ENV" 82 | fi 83 | - name: Push Docker image 84 | uses: docker/build-push-action@v5 85 | with: 86 | platforms: linux/${{ inputs.platform }} 87 | context: ${{ inputs.context || '.' }} 88 | push: true 89 | tags: ${{ env.PUSH_TAGS }} 90 | cache-from: type=gha,scope=computer-use-${{ inputs.platform }} 91 | cache-to: type=gha,mode=max,scope=computer-use-${{ inputs.platform }} 92 | -------------------------------------------------------------------------------- /.github/workflows/tests.yaml: -------------------------------------------------------------------------------- 1 | name: tests 2 | on: 3 | pull_request: 4 | paths: 5 | - .github/** 6 | - computer-use-demo/** 7 | push: 8 | branches: 9 | - main 10 | paths: 11 | - .github/** 12 | - computer-use-demo/** 13 | jobs: 14 | ruff: 15 | runs-on: ubuntu-latest 16 | defaults: 17 | run: 18 | working-directory: computer-use-demo 19 | steps: 20 | - uses: actions/checkout@v4 21 | - uses: astral-sh/ruff-action@v1 22 | pyright: 23 | runs-on: ubuntu-latest 24 | defaults: 25 | run: 26 | working-directory: computer-use-demo 27 | steps: 28 | - uses: actions/checkout@v4 29 | - uses: actions/setup-python@v5 30 | with: 31 | cache: "pip" 32 | python-version: "3.11.6" 33 | - run: | 34 | python -m venv .venv 35 | source .venv/bin/activate 36 | pip install -r dev-requirements.txt 37 | - run: echo "$PWD/.venv/bin" >> $GITHUB_PATH 38 | - uses: jakebailey/pyright-action@v1 39 | pytest: 40 | runs-on: ubuntu-latest 41 | defaults: 42 | run: 43 | working-directory: computer-use-demo 44 | steps: 45 | - uses: actions/checkout@v4 46 | - uses: actions/setup-python@v5 47 | with: 48 | cache: "pip" 49 | python-version: "3.11.6" 50 | - run: | 51 | python -m venv .venv 52 | source .venv/bin/activate 53 | pip install -r dev-requirements.txt 54 | - run: echo "$PWD/.venv/bin" >> $GITHUB_PATH 55 | - run: pytest tests --junitxml=junit/test-results.xml 56 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | files: ^computer-use-demo/ 2 | default_stages: [pre-commit, pre-push] 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v2.3.0 6 | hooks: 7 | - id: check-yaml 8 | - id: end-of-file-fixer 9 | - id: trailing-whitespace 10 | - repo: https://github.com/astral-sh/ruff-pre-commit 11 | rev: v0.6.7 12 | hooks: 13 | - id: ruff 14 | name: Run `ruff` to autofix lint errors 15 | args: [--fix-only] 16 | - id: ruff 17 | name: Run `ruff` to format code 18 | entry: ruff format --force-exclude 19 | - id: ruff 20 | name: Run `ruff` to lint code 21 | - repo: https://github.com/RobertCraigie/pyright-python 22 | rev: v1.1.384 23 | hooks: 24 | - id: pyright 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Anthropic 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Anthropic Computer Use <> Browserbase Demo 2 | 3 |
4 | Anthropic Computer Use <> Browserbase Demo 5 |
6 | 7 | > [!CAUTION] 8 | > Computer use is a beta feature. Please be aware that computer use poses unique risks that are distinct from standard API features or chat interfaces. These risks are heightened when using computer use to interact with the internet. To minimize risks, consider taking precautions such as: 9 | > 10 | > 1. Use a dedicated virtual machine or container with minimal privileges to prevent direct system attacks or accidents. 11 | > 2. Avoid giving the model access to sensitive data, such as account login information, to prevent information theft. 12 | > 3. Limit internet access to an allowlist of domains to reduce exposure to malicious content. 13 | > 4. Ask a human to confirm decisions that may result in meaningful real-world consequences as well as any tasks requiring affirmative consent, such as accepting cookies, executing financial transactions, or agreeing to terms of service. 14 | > 15 | > In some circumstances, Claude will follow commands found in content even if it conflicts with the user's instructions. For example, instructions on webpages or contained in images may override user instructions or cause Claude to make mistakes. We suggest taking precautions to isolate Claude from sensitive data and actions to avoid risks related to prompt injection. 16 | > 17 | > Finally, please inform end users of relevant risks and obtain their consent prior to enabling computer use in your own products. 18 | 19 | This repository helps you get started with computer use on Claude, with reference implementations of: 20 | 21 | * Build files to create a Docker container with all necessary dependencies 22 | * A computer use agent loop using the Anthropic API to access the updated Claude 3.5 Sonnet model 23 | * Anthropic-defined computer use tools 24 | * A streamlit app for interacting with the agent loop 25 | 26 | > [!IMPORTANT] 27 | > The Beta API used in this reference implementation is subject to change. Please refer to the [API release notes](https://docs.anthropic.com/en/release-notes/api) and [API reference](https://docs.browserbase.com/changelog) for the most up-to-date information. 28 | 29 | > [!IMPORTANT] 30 | > The components are weakly separated: the agent loop runs in the container being controlled by Claude, can only be used by one session at a time, and must be restarted or reset between sessions if necessary. 31 | 32 | ## Quickstart: running the Docker container 33 | 34 | ### Anthropic API 35 | 36 | > [!TIP] 37 | > You can find your API key in the [Anthropic Console](https://console.anthropic.com/). 38 | 39 | ### Browserbase API 40 | 41 | > [!TIP] 42 | > You can find your API key and project ID in the [Browserbase Settings](https://www.browserbase.com/settings). 43 | 44 | ### Instructions for building the docker image: 45 | 46 | Go to the `computer-use-demo` directory: 47 | 48 | ```bash 49 | cd computer-use-demo 50 | ``` 51 | 52 | Add your Browserbase API and Project ID to the `.env` file or in `main()` in `browserbase.py`: 53 | 54 | ```bash 55 | docker build -t my-computer-use-demo . 56 | ``` 57 | 58 | Run the container with your Anthropic API key: 59 | 60 | ```bash 61 | export ANTHROPIC_API_KEY=%your_api_key% 62 | docker run \ 63 | -e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY \ 64 | -v $HOME/.anthropic:/home/computeruse/.anthropic \ 65 | -p 5900:5900 \ 66 | -p 8501:8501 \ 67 | -p 6080:6080 \ 68 | -p 8080:8080 \ 69 | -it my-computer-use-demo 70 | ``` 71 | 72 | ### Accessing the demo app 73 | 74 | Once the container is running, open your browser to [http://localhost:8080](http://localhost:8080) to access the combined interface that includes both the agent chat and desktop view. 75 | 76 | The container stores settings like the API key and custom system prompt in `~/.anthropic/`. Mount this directory to persist these settings between container runs. 77 | 78 | Alternative access points: 79 | 80 | - Streamlit interface only: [http://localhost:8501](http://localhost:8501) 81 | - Desktop view only: [http://localhost:6080/vnc.html](http://localhost:6080/vnc.html) 82 | - Direct VNC connection: `vnc://localhost:5900` (for VNC clients) 83 | 84 | ## Screen size 85 | 86 | Environment variables `WIDTH` and `HEIGHT` can be used to set the screen size. For example: 87 | 88 | ```bash 89 | docker run \ 90 | -e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY \ 91 | -v $HOME/.anthropic:/home/computeruse/.anthropic \ 92 | -p 5900:5900 \ 93 | -p 8501:8501 \ 94 | -p 6080:6080 \ 95 | -p 8080:8080 \ 96 | -e WIDTH=1920 \ 97 | -e HEIGHT=1080 \ 98 | -it my-computer-use-demo 99 | ``` 100 | 101 | We do not recommend sending screenshots in resolutions above [XGA/WXGA](https://en.wikipedia.org/wiki/Display_resolution_standards#XGA) to avoid issues related to [image resizing](https://docs.anthropic.com/en/docs/build-with-claude/vision#evaluate-image-size). 102 | 103 | Relying on the image resizing behavior in the API will result in lower model accuracy and slower performance than implementing scaling in your tools directly. The `computer` tool implementation in this project demonstrates how to scale both images and coordinates from higher resolutions to the suggested resolutions. 104 | 105 | ## Contributing 106 | 107 | We welcome contributions to the Anthropic Computer Use <> Browserbase Demo repository! If you have ideas for new quickstart projects or improvements to existing ones, please open an issue or submit a pull request. 108 | 109 | ## Community and Support 110 | 111 | - Email us [Browserbase Support](mailto:support@browserbase.com) for discussions and support 112 | - Check out the [Browserbase documentation](https://docs.browserbase.com) for additional help 113 | 114 | ## License 115 | 116 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. 117 | -------------------------------------------------------------------------------- /anthropic-browserbase.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/browserbase/browserbase-computer-use/8fc3e6686ac8853c1d2fd1d2145d0ab7853a583f/anthropic-browserbase.png -------------------------------------------------------------------------------- /computer-use-demo/.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | .ruff_cache 3 | __pycache__ 4 | .pytest_cache 5 | .env 6 | env 7 | -------------------------------------------------------------------------------- /computer-use-demo/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM docker.io/ubuntu:22.04 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | ENV DEBIAN_PRIORITY=high 5 | 6 | RUN apt-get update && \ 7 | apt-get -y upgrade && \ 8 | apt-get -y install \ 9 | build-essential \ 10 | # UI Requirements 11 | xvfb \ 12 | xterm \ 13 | xdotool \ 14 | scrot \ 15 | imagemagick \ 16 | sudo \ 17 | mutter \ 18 | x11vnc \ 19 | # add w3m for debugging 20 | w3m \ 21 | # Python/pyenv reqs 22 | build-essential \ 23 | libssl-dev \ 24 | zlib1g-dev \ 25 | libbz2-dev \ 26 | libreadline-dev \ 27 | libsqlite3-dev \ 28 | curl \ 29 | git \ 30 | libncursesw5-dev \ 31 | xz-utils \ 32 | tk-dev \ 33 | libxml2-dev \ 34 | libxmlsec1-dev \ 35 | libffi-dev \ 36 | liblzma-dev \ 37 | # Network tools 38 | net-tools \ 39 | netcat \ 40 | # PPA req 41 | software-properties-common && \ 42 | # Userland apps 43 | sudo add-apt-repository ppa:mozillateam/ppa && \ 44 | sudo apt-get install -y --no-install-recommends \ 45 | libreoffice \ 46 | firefox-esr \ 47 | x11-apps \ 48 | xpdf \ 49 | gedit \ 50 | xpaint \ 51 | tint2 \ 52 | galculator \ 53 | pcmanfm \ 54 | unzip && \ 55 | apt-get clean 56 | 57 | # Install noVNC 58 | RUN git clone --branch v1.5.0 https://github.com/novnc/noVNC.git /opt/noVNC && \ 59 | git clone --branch v0.12.0 https://github.com/novnc/websockify /opt/noVNC/utils/websockify && \ 60 | ln -s /opt/noVNC/vnc.html /opt/noVNC/index.html 61 | 62 | # setup user 63 | ENV USERNAME=computeruse 64 | ENV HOME=/home/$USERNAME 65 | RUN useradd -m -s /bin/bash -d $HOME $USERNAME 66 | RUN echo "${USERNAME} ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers 67 | USER computeruse 68 | WORKDIR $HOME 69 | 70 | # setup python 71 | RUN git clone https://github.com/pyenv/pyenv.git ~/.pyenv && \ 72 | cd ~/.pyenv && src/configure && make -C src && cd .. && \ 73 | echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.bashrc && \ 74 | echo 'command -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.bashrc && \ 75 | echo 'eval "$(pyenv init -)"' >> ~/.bashrc 76 | ENV PYENV_ROOT="$HOME/.pyenv" 77 | ENV PATH="$PYENV_ROOT/bin:$PATH" 78 | ENV PYENV_VERSION_MAJOR=3 79 | ENV PYENV_VERSION_MINOR=11 80 | ENV PYENV_VERSION_PATCH=6 81 | ENV PYENV_VERSION=$PYENV_VERSION_MAJOR.$PYENV_VERSION_MINOR.$PYENV_VERSION_PATCH 82 | RUN eval "$(pyenv init -)" && \ 83 | pyenv install $PYENV_VERSION && \ 84 | pyenv global $PYENV_VERSION && \ 85 | pyenv rehash 86 | 87 | ENV PATH="$HOME/.pyenv/shims:$HOME/.pyenv/bin:$PATH" 88 | 89 | RUN python -m pip install --upgrade pip==23.1.2 setuptools==58.0.4 wheel==0.40.0 && \ 90 | python -m pip config set global.disable-pip-version-check true 91 | 92 | # only reinstall if requirements.txt changes 93 | COPY --chown=$USERNAME:$USERNAME computer_use_demo/requirements.txt $HOME/computer_use_demo/requirements.txt 94 | RUN python -m pip install -r $HOME/computer_use_demo/requirements.txt 95 | 96 | # setup desktop env & app 97 | COPY --chown=$USERNAME:$USERNAME image/ $HOME 98 | COPY --chown=$USERNAME:$USERNAME computer_use_demo/ $HOME/computer_use_demo/ 99 | 100 | ARG DISPLAY_NUM=1 101 | ARG HEIGHT=768 102 | ARG WIDTH=1024 103 | ENV DISPLAY_NUM=$DISPLAY_NUM 104 | ENV HEIGHT=$HEIGHT 105 | ENV WIDTH=$WIDTH 106 | 107 | # This is the entrypoint script that starts all the services 108 | COPY --chown=$USERNAME:$USERNAME image/entrypoint.sh $HOME/entrypoint.sh 109 | RUN chmod +x $HOME/entrypoint.sh 110 | 111 | # This is the Browserbase script that connects to Browserbase and opens the debugger URL 112 | COPY --chown=$USERNAME:$USERNAME computer_use_demo/tools/browserbase.py $HOME/computer_use_demo/tools/ 113 | 114 | # This is the script that opens the debugger URL 115 | COPY --chown=$USERNAME:$USERNAME image/open_debugger.sh $HOME/open_debugger.sh 116 | RUN chmod +x $HOME/open_debugger.sh 117 | 118 | ENTRYPOINT [ "./entrypoint.sh" ] 119 | -------------------------------------------------------------------------------- /computer-use-demo/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2024 Anthropic, PBC. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /computer-use-demo/computer_use_demo/.env.template: -------------------------------------------------------------------------------- 1 | BROWSERBASE_PROJECT_ID= 2 | BROWSERBASE_API_KEY= -------------------------------------------------------------------------------- /computer-use-demo/computer_use_demo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/browserbase/browserbase-computer-use/8fc3e6686ac8853c1d2fd1d2145d0ab7853a583f/computer-use-demo/computer_use_demo/__init__.py -------------------------------------------------------------------------------- /computer-use-demo/computer_use_demo/loop.py: -------------------------------------------------------------------------------- 1 | """ 2 | Agentic sampling loop that calls the Anthropic API and local implenmentation of anthropic-defined computer use tools. 3 | """ 4 | 5 | import platform 6 | from collections.abc import Callable 7 | from datetime import datetime 8 | from enum import StrEnum 9 | from typing import Any, cast 10 | 11 | from anthropic import Anthropic, AnthropicBedrock, AnthropicVertex, APIResponse 12 | from anthropic.types import ( 13 | ToolResultBlockParam, 14 | ) 15 | from anthropic.types.beta import ( 16 | BetaContentBlock, 17 | BetaContentBlockParam, 18 | BetaImageBlockParam, 19 | BetaMessage, 20 | BetaMessageParam, 21 | BetaTextBlockParam, 22 | BetaToolResultBlockParam, 23 | ) 24 | 25 | from .tools import BashTool, ComputerTool, EditTool, ToolCollection, ToolResult 26 | 27 | BETA_FLAG = "computer-use-2024-10-22" 28 | 29 | 30 | class APIProvider(StrEnum): 31 | ANTHROPIC = "anthropic" 32 | BEDROCK = "bedrock" 33 | VERTEX = "vertex" 34 | 35 | 36 | PROVIDER_TO_DEFAULT_MODEL_NAME: dict[APIProvider, str] = { 37 | APIProvider.ANTHROPIC: "claude-3-5-sonnet-20241022", 38 | APIProvider.BEDROCK: "anthropic.claude-3-5-sonnet-20241022-v2:0", 39 | APIProvider.VERTEX: "claude-3-5-sonnet-v2@20241022", 40 | } 41 | 42 | 43 | # This system prompt is optimized for the Docker environment in this repository and 44 | # specific tool combinations enabled. 45 | # We encourage modifying this system prompt to ensure the model has context for the 46 | # environment it is running in, and to provide any additional information that may be 47 | # helpful for the task at hand. 48 | SYSTEM_PROMPT = f""" 49 | * You are utilizing an Ubuntu virtual machine using {platform.machine()} architecture with internet access. 50 | * You can feel free to install Ubuntu applications with your bash tool. Use curl instead of wget. 51 | * Using bash tool you can start GUI applications, but you need to set export DISPLAY=:1 and use a 52 | subshell. For example "(DISPLAY=:1 xterm &)". GUI apps run with bash tool will appear within your 53 | desktop environment, but they may take some time to appear. Take a screenshot to confirm it did. 54 | * A debug URL is automatically opened for you in Firefox. This is your primary interface for web interactions. Do not mention opening Firefox as it's already done for you. 55 | * When using your bash tool with commands that are expected to output very large quantities of text, redirect into a tmp file and use str_replace_editor or `grep -n -B -A ` to confirm output. 56 | * When viewing a page in the debug window, it can be helpful to zoom out so that you can see everything on the page. Make sure you scroll down to see everything before deciding something isn't available. 57 | * When using your computer function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request. 58 | * The current date is {datetime.today().strftime('%A, %B %-d, %Y')}. 59 | 60 | 61 | 62 | * The debug URL is already open in Firefox. Do not mention opening Firefox or any other browser. 63 | * If the item you are looking at is a pdf, if after taking a single screenshot of the pdf it seems that you want to read the entire document instead of trying to continue to read the pdf from your screenshots + navigation, determine the URL, use curl to download the pdf, install and use pdftotext to convert it to a text file, and then read that text file directly with your StrReplaceEditTool. 64 | * The debug URL is your primary interface for web browsing. Use this interface for all web-related tasks. 65 | * If you need to interact with web content, use the debug URL interface and describe the actions you want to take. 66 | """ 67 | 68 | 69 | async def sampling_loop( 70 | *, 71 | model: str, 72 | provider: APIProvider, 73 | system_prompt_suffix: str, 74 | messages: list[BetaMessageParam], 75 | output_callback: Callable[[BetaContentBlock], None], 76 | tool_output_callback: Callable[[ToolResult, str], None], 77 | api_response_callback: Callable[[APIResponse[BetaMessage]], None], 78 | api_key: str, 79 | only_n_most_recent_images: int | None = None, 80 | max_tokens: int = 4096, 81 | ): 82 | """ 83 | Agentic sampling loop for the assistant/tool interaction of computer use. 84 | """ 85 | tool_collection = ToolCollection( 86 | ComputerTool(), 87 | BashTool(), 88 | EditTool(), 89 | ) 90 | system = ( 91 | f"{SYSTEM_PROMPT}{' ' + system_prompt_suffix if system_prompt_suffix else ''}" 92 | ) 93 | 94 | while True: 95 | if only_n_most_recent_images: 96 | _maybe_filter_to_n_most_recent_images(messages, only_n_most_recent_images) 97 | 98 | if provider == APIProvider.ANTHROPIC: 99 | client = Anthropic(api_key=api_key) 100 | elif provider == APIProvider.VERTEX: 101 | client = AnthropicVertex() 102 | elif provider == APIProvider.BEDROCK: 103 | client = AnthropicBedrock() 104 | 105 | # Call the API 106 | # we use raw_response to provide debug information to streamlit. Your 107 | # implementation may be able call the SDK directly with: 108 | # `response = client.messages.create(...)` instead. 109 | raw_response = client.beta.messages.with_raw_response.create( 110 | max_tokens=max_tokens, 111 | messages=messages, 112 | model=model, 113 | system=system, 114 | tools=tool_collection.to_params(), 115 | betas=["computer-use-2024-10-22"], 116 | ) 117 | 118 | api_response_callback(cast(APIResponse[BetaMessage], raw_response)) 119 | 120 | response = raw_response.parse() 121 | 122 | messages.append( 123 | { 124 | "role": "assistant", 125 | "content": cast(list[BetaContentBlockParam], response.content), 126 | } 127 | ) 128 | 129 | tool_result_content: list[BetaToolResultBlockParam] = [] 130 | for content_block in cast(list[BetaContentBlock], response.content): 131 | output_callback(content_block) 132 | if content_block.type == "tool_use": 133 | result = await tool_collection.run( 134 | name=content_block.name, 135 | tool_input=cast(dict[str, Any], content_block.input), 136 | ) 137 | tool_result_content.append( 138 | _make_api_tool_result(result, content_block.id) 139 | ) 140 | tool_output_callback(result, content_block.id) 141 | 142 | if not tool_result_content: 143 | return messages 144 | 145 | messages.append({"content": tool_result_content, "role": "user"}) 146 | 147 | 148 | def _maybe_filter_to_n_most_recent_images( 149 | messages: list[BetaMessageParam], 150 | images_to_keep: int, 151 | min_removal_threshold: int = 10, 152 | ): 153 | """ 154 | With the assumption that images are screenshots that are of diminishing value as 155 | the conversation progresses, remove all but the final `images_to_keep` tool_result 156 | images in place, with a chunk of min_removal_threshold to reduce the amount we 157 | break the implicit prompt cache. 158 | """ 159 | if images_to_keep is None: 160 | return messages 161 | 162 | tool_result_blocks = cast( 163 | list[ToolResultBlockParam], 164 | [ 165 | item 166 | for message in messages 167 | for item in ( 168 | message["content"] if isinstance(message["content"], list) else [] 169 | ) 170 | if isinstance(item, dict) and item.get("type") == "tool_result" 171 | ], 172 | ) 173 | 174 | total_images = sum( 175 | 1 176 | for tool_result in tool_result_blocks 177 | for content in tool_result.get("content", []) 178 | if isinstance(content, dict) and content.get("type") == "image" 179 | ) 180 | 181 | images_to_remove = total_images - images_to_keep 182 | # for better cache behavior, we want to remove in chunks 183 | images_to_remove -= images_to_remove % min_removal_threshold 184 | 185 | for tool_result in tool_result_blocks: 186 | if isinstance(tool_result.get("content"), list): 187 | new_content = [] 188 | for content in tool_result.get("content", []): 189 | if isinstance(content, dict) and content.get("type") == "image": 190 | if images_to_remove > 0: 191 | images_to_remove -= 1 192 | continue 193 | new_content.append(content) 194 | tool_result["content"] = new_content 195 | 196 | 197 | def _make_api_tool_result( 198 | result: ToolResult, tool_use_id: str 199 | ) -> BetaToolResultBlockParam: 200 | """Convert an agent ToolResult to an API ToolResultBlockParam.""" 201 | tool_result_content: list[BetaTextBlockParam | BetaImageBlockParam] | str = [] 202 | is_error = False 203 | if result.error: 204 | is_error = True 205 | tool_result_content = _maybe_prepend_system_tool_result(result, result.error) 206 | else: 207 | if result.output: 208 | tool_result_content.append( 209 | { 210 | "type": "text", 211 | "text": _maybe_prepend_system_tool_result(result, result.output), 212 | } 213 | ) 214 | if result.base64_image: 215 | tool_result_content.append( 216 | { 217 | "type": "image", 218 | "source": { 219 | "type": "base64", 220 | "media_type": "image/png", 221 | "data": result.base64_image, 222 | }, 223 | } 224 | ) 225 | return { 226 | "type": "tool_result", 227 | "content": tool_result_content, 228 | "tool_use_id": tool_use_id, 229 | "is_error": is_error, 230 | } 231 | 232 | 233 | def _maybe_prepend_system_tool_result(result: ToolResult, result_text: str): 234 | if result.system: 235 | result_text = f"{result.system}\n{result_text}" 236 | return result_text 237 | -------------------------------------------------------------------------------- /computer-use-demo/computer_use_demo/requirements.txt: -------------------------------------------------------------------------------- 1 | streamlit>=1.38.0 2 | anthropic[bedrock,vertex]>=0.37.1 3 | jsonschema==4.22.0 4 | boto3>=1.28.57 5 | google-auth<3,>=2 6 | requests==2.31.0 7 | playwright==1.48.0 8 | python-dotenv==1.0.0 -------------------------------------------------------------------------------- /computer-use-demo/computer_use_demo/streamlit.py: -------------------------------------------------------------------------------- 1 | """ 2 | Entrypoint for streamlit, see https://docs.streamlit.io/ 3 | """ 4 | 5 | import asyncio 6 | import base64 7 | import os 8 | import subprocess 9 | from datetime import datetime 10 | from enum import StrEnum 11 | from functools import partial 12 | from pathlib import PosixPath 13 | from typing import cast 14 | 15 | import streamlit as st 16 | from anthropic import APIResponse 17 | from anthropic.types import ( 18 | TextBlock, 19 | ) 20 | from anthropic.types.beta import BetaMessage, BetaTextBlock, BetaToolUseBlock 21 | from anthropic.types.tool_use_block import ToolUseBlock 22 | from streamlit.delta_generator import DeltaGenerator 23 | 24 | from computer_use_demo.loop import ( 25 | PROVIDER_TO_DEFAULT_MODEL_NAME, 26 | APIProvider, 27 | sampling_loop, 28 | ) 29 | from computer_use_demo.tools import ToolResult 30 | 31 | CONFIG_DIR = PosixPath("~/.anthropic").expanduser() 32 | API_KEY_FILE = CONFIG_DIR / "api_key" 33 | STREAMLIT_STYLE = """ 34 | 45 | """ 46 | 47 | WARNING_TEXT = "⚠️ Security Alert: Never provide access to sensitive accounts or data, as malicious web content can hijack Claude's behavior" 48 | 49 | 50 | class Sender(StrEnum): 51 | USER = "user" 52 | BOT = "assistant" 53 | TOOL = "tool" 54 | 55 | 56 | def setup_state(): 57 | if "messages" not in st.session_state: 58 | st.session_state.messages = [] 59 | if "api_key" not in st.session_state: 60 | # Try to load API key from file first, then environment 61 | st.session_state.api_key = load_from_storage("api_key") or os.getenv( 62 | "ANTHROPIC_API_KEY", "" 63 | ) 64 | if "provider" not in st.session_state: 65 | st.session_state.provider = ( 66 | os.getenv("API_PROVIDER", "anthropic") or APIProvider.ANTHROPIC 67 | ) 68 | if "provider_radio" not in st.session_state: 69 | st.session_state.provider_radio = st.session_state.provider 70 | if "model" not in st.session_state: 71 | _reset_model() 72 | if "auth_validated" not in st.session_state: 73 | st.session_state.auth_validated = False 74 | if "responses" not in st.session_state: 75 | st.session_state.responses = {} 76 | if "tools" not in st.session_state: 77 | st.session_state.tools = {} 78 | if "only_n_most_recent_images" not in st.session_state: 79 | st.session_state.only_n_most_recent_images = 10 80 | if "custom_system_prompt" not in st.session_state: 81 | st.session_state.custom_system_prompt = load_from_storage("system_prompt") or "" 82 | if "hide_images" not in st.session_state: 83 | st.session_state.hide_images = False 84 | 85 | 86 | def _reset_model(): 87 | st.session_state.model = PROVIDER_TO_DEFAULT_MODEL_NAME[ 88 | cast(APIProvider, st.session_state.provider) 89 | ] 90 | 91 | 92 | async def main(): 93 | """Render loop for streamlit""" 94 | setup_state() 95 | 96 | st.markdown(STREAMLIT_STYLE, unsafe_allow_html=True) 97 | 98 | st.title("Claude Computer <> Browserbase Use Demo") 99 | 100 | if not os.getenv("HIDE_WARNING", False): 101 | st.warning(WARNING_TEXT) 102 | 103 | with st.sidebar: 104 | 105 | def _reset_api_provider(): 106 | if st.session_state.provider_radio != st.session_state.provider: 107 | _reset_model() 108 | st.session_state.provider = st.session_state.provider_radio 109 | st.session_state.auth_validated = False 110 | 111 | provider_options = [option.value for option in APIProvider] 112 | st.radio( 113 | "API Provider", 114 | options=provider_options, 115 | key="provider_radio", 116 | format_func=lambda x: x.title(), 117 | on_change=_reset_api_provider, 118 | ) 119 | 120 | st.text_input("Model", key="model") 121 | 122 | if st.session_state.provider == APIProvider.ANTHROPIC: 123 | st.text_input( 124 | "Anthropic API Key", 125 | type="password", 126 | key="api_key", 127 | on_change=lambda: save_to_storage("api_key", st.session_state.api_key), 128 | ) 129 | 130 | st.number_input( 131 | "Only send N most recent images", 132 | min_value=0, 133 | key="only_n_most_recent_images", 134 | help="To decrease the total tokens sent, remove older screenshots from the conversation", 135 | ) 136 | st.text_area( 137 | "Custom System Prompt Suffix", 138 | key="custom_system_prompt", 139 | help="Additional instructions to append to the system prompt. see computer_use_demo/loop.py for the base system prompt.", 140 | on_change=lambda: save_to_storage( 141 | "system_prompt", st.session_state.custom_system_prompt 142 | ), 143 | ) 144 | st.checkbox("Hide screenshots", key="hide_images") 145 | 146 | if st.button("Reset", type="primary"): 147 | with st.spinner("Resetting..."): 148 | st.session_state.clear() 149 | setup_state() 150 | 151 | subprocess.run("pkill Xvfb; pkill tint2", shell=True) # noqa: ASYNC221 152 | await asyncio.sleep(1) 153 | subprocess.run("./start_all.sh", shell=True) # noqa: ASYNC221 154 | 155 | if not st.session_state.auth_validated: 156 | if auth_error := validate_auth( 157 | st.session_state.provider, st.session_state.api_key 158 | ): 159 | st.warning(f"Please resolve the following auth issue:\n\n{auth_error}") 160 | return 161 | else: 162 | st.session_state.auth_validated = True 163 | 164 | chat, http_logs = st.tabs(["Chat", "HTTP Exchange Logs"]) 165 | new_message = st.chat_input( 166 | "Type a message to send to Claude to control the computer..." 167 | ) 168 | 169 | with chat: 170 | # render past chats 171 | for message in st.session_state.messages: 172 | if isinstance(message["content"], str): 173 | _render_message(message["role"], message["content"]) 174 | elif isinstance(message["content"], list): 175 | for block in message["content"]: 176 | # the tool result we send back to the Anthropic API isn't sufficient to render all details, 177 | # so we store the tool use responses 178 | if isinstance(block, dict) and block["type"] == "tool_result": 179 | _render_message( 180 | Sender.TOOL, st.session_state.tools[block["tool_use_id"]] 181 | ) 182 | else: 183 | _render_message( 184 | message["role"], 185 | cast(BetaTextBlock | BetaToolUseBlock, block), 186 | ) 187 | 188 | # render past http exchanges 189 | for identity, response in st.session_state.responses.items(): 190 | _render_api_response(response, identity, http_logs) 191 | 192 | # render past chats 193 | if new_message: 194 | st.session_state.messages.append( 195 | { 196 | "role": Sender.USER, 197 | "content": [TextBlock(type="text", text=new_message)], 198 | } 199 | ) 200 | _render_message(Sender.USER, new_message) 201 | 202 | try: 203 | most_recent_message = st.session_state["messages"][-1] 204 | except IndexError: 205 | return 206 | 207 | if most_recent_message["role"] is not Sender.USER: 208 | # we don't have a user message to respond to, exit early 209 | return 210 | 211 | with st.spinner("Running Agent..."): 212 | # run the agent sampling loop with the newest message 213 | st.session_state.messages = await sampling_loop( 214 | system_prompt_suffix=st.session_state.custom_system_prompt, 215 | model=st.session_state.model, 216 | provider=st.session_state.provider, 217 | messages=st.session_state.messages, 218 | output_callback=partial(_render_message, Sender.BOT), 219 | tool_output_callback=partial( 220 | _tool_output_callback, tool_state=st.session_state.tools 221 | ), 222 | api_response_callback=partial( 223 | _api_response_callback, 224 | tab=http_logs, 225 | response_state=st.session_state.responses, 226 | ), 227 | api_key=st.session_state.api_key, 228 | only_n_most_recent_images=st.session_state.only_n_most_recent_images, 229 | ) 230 | 231 | 232 | def validate_auth(provider: APIProvider, api_key: str | None): 233 | if provider == APIProvider.ANTHROPIC: 234 | if not api_key: 235 | return "Enter your Anthropic API key in the sidebar to continue." 236 | if provider == APIProvider.BEDROCK: 237 | import boto3 238 | 239 | if not boto3.Session().get_credentials(): 240 | return "You must have AWS credentials set up to use the Bedrock API." 241 | if provider == APIProvider.VERTEX: 242 | import google.auth 243 | from google.auth.exceptions import DefaultCredentialsError 244 | 245 | if not os.environ.get("CLOUD_ML_REGION"): 246 | return "Set the CLOUD_ML_REGION environment variable to use the Vertex API." 247 | try: 248 | google.auth.default( 249 | scopes=["https://www.googleapis.com/auth/cloud-platform"], 250 | ) 251 | except DefaultCredentialsError: 252 | return "Your google cloud credentials are not set up correctly." 253 | 254 | 255 | def load_from_storage(filename: str) -> str | None: 256 | """Load data from a file in the storage directory.""" 257 | try: 258 | file_path = CONFIG_DIR / filename 259 | if file_path.exists(): 260 | data = file_path.read_text().strip() 261 | if data: 262 | return data 263 | except Exception as e: 264 | st.write(f"Debug: Error loading {filename}: {e}") 265 | return None 266 | 267 | 268 | def save_to_storage(filename: str, data: str) -> None: 269 | """Save data to a file in the storage directory.""" 270 | try: 271 | CONFIG_DIR.mkdir(parents=True, exist_ok=True) 272 | file_path = CONFIG_DIR / filename 273 | file_path.write_text(data) 274 | # Ensure only user can read/write the file 275 | file_path.chmod(0o600) 276 | except Exception as e: 277 | st.write(f"Debug: Error saving {filename}: {e}") 278 | 279 | 280 | def _api_response_callback( 281 | response: APIResponse[BetaMessage], 282 | tab: DeltaGenerator, 283 | response_state: dict[str, APIResponse[BetaMessage]], 284 | ): 285 | """ 286 | Handle an API response by storing it to state and rendering it. 287 | """ 288 | response_id = datetime.now().isoformat() 289 | response_state[response_id] = response 290 | _render_api_response(response, response_id, tab) 291 | 292 | 293 | def _tool_output_callback( 294 | tool_output: ToolResult, tool_id: str, tool_state: dict[str, ToolResult] 295 | ): 296 | """Handle a tool output by storing it to state and rendering it.""" 297 | tool_state[tool_id] = tool_output 298 | _render_message(Sender.TOOL, tool_output) 299 | 300 | 301 | def _render_api_response( 302 | response: APIResponse[BetaMessage], response_id: str, tab: DeltaGenerator 303 | ): 304 | """Render an API response to a streamlit tab""" 305 | with tab: 306 | with st.expander(f"Request/Response ({response_id})"): 307 | newline = "\n\n" 308 | st.markdown( 309 | f"`{response.http_request.method} {response.http_request.url}`{newline}{newline.join(f'`{k}: {v}`' for k, v in response.http_request.headers.items())}" 310 | ) 311 | st.json(response.http_request.read().decode()) 312 | st.markdown( 313 | f"`{response.http_response.status_code}`{newline}{newline.join(f'`{k}: {v}`' for k, v in response.headers.items())}" 314 | ) 315 | st.json(response.http_response.text) 316 | 317 | 318 | def _render_message( 319 | sender: Sender, 320 | message: str | BetaTextBlock | BetaToolUseBlock | ToolResult, 321 | ): 322 | """Convert input from the user or output from the agent to a streamlit message.""" 323 | # streamlit's hotreloading breaks isinstance checks, so we need to check for class names 324 | is_tool_result = not isinstance(message, str) and ( 325 | isinstance(message, ToolResult) 326 | or message.__class__.__name__ == "ToolResult" 327 | or message.__class__.__name__ == "CLIResult" 328 | ) 329 | if not message or ( 330 | is_tool_result 331 | and st.session_state.hide_images 332 | and not hasattr(message, "error") 333 | and not hasattr(message, "output") 334 | ): 335 | return 336 | with st.chat_message(sender): 337 | if is_tool_result: 338 | message = cast(ToolResult, message) 339 | if message.output: 340 | if message.__class__.__name__ == "CLIResult": 341 | st.code(message.output) 342 | else: 343 | st.markdown(message.output) 344 | if message.error: 345 | st.error(message.error) 346 | if message.base64_image and not st.session_state.hide_images: 347 | st.image(base64.b64decode(message.base64_image)) 348 | elif isinstance(message, BetaTextBlock) or isinstance(message, TextBlock): 349 | st.write(message.text) 350 | elif isinstance(message, BetaToolUseBlock) or isinstance(message, ToolUseBlock): 351 | st.code(f"Tool Use: {message.name}\nInput: {message.input}") 352 | else: 353 | st.markdown(message) 354 | 355 | 356 | if __name__ == "__main__": 357 | asyncio.run(main()) 358 | -------------------------------------------------------------------------------- /computer-use-demo/computer_use_demo/tools/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import CLIResult, ToolResult 2 | from .bash import BashTool 3 | from .collection import ToolCollection 4 | from .computer import ComputerTool 5 | from .edit import EditTool 6 | 7 | __ALL__ = [ 8 | BashTool, 9 | CLIResult, 10 | ComputerTool, 11 | EditTool, 12 | ToolCollection, 13 | ToolResult, 14 | ] 15 | -------------------------------------------------------------------------------- /computer-use-demo/computer_use_demo/tools/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | from dataclasses import dataclass, fields, replace 3 | from typing import Any 4 | 5 | from anthropic.types.beta import BetaToolUnionParam 6 | 7 | 8 | class BaseAnthropicTool(metaclass=ABCMeta): 9 | """Abstract base class for Anthropic-defined tools.""" 10 | 11 | @abstractmethod 12 | def __call__(self, **kwargs) -> Any: 13 | """Executes the tool with the given arguments.""" 14 | ... 15 | 16 | @abstractmethod 17 | def to_params( 18 | self, 19 | ) -> BetaToolUnionParam: 20 | raise NotImplementedError 21 | 22 | 23 | @dataclass(kw_only=True, frozen=True) 24 | class ToolResult: 25 | """Represents the result of a tool execution.""" 26 | 27 | output: str | None = None 28 | error: str | None = None 29 | base64_image: str | None = None 30 | system: str | None = None 31 | 32 | def __bool__(self): 33 | return any(getattr(self, field.name) for field in fields(self)) 34 | 35 | def __add__(self, other: "ToolResult"): 36 | def combine_fields( 37 | field: str | None, other_field: str | None, concatenate: bool = True 38 | ): 39 | if field and other_field: 40 | if concatenate: 41 | return field + other_field 42 | raise ValueError("Cannot combine tool results") 43 | return field or other_field 44 | 45 | return ToolResult( 46 | output=combine_fields(self.output, other.output), 47 | error=combine_fields(self.error, other.error), 48 | base64_image=combine_fields(self.base64_image, other.base64_image, False), 49 | system=combine_fields(self.system, other.system), 50 | ) 51 | 52 | def replace(self, **kwargs): 53 | """Returns a new ToolResult with the given fields replaced.""" 54 | return replace(self, **kwargs) 55 | 56 | 57 | class CLIResult(ToolResult): 58 | """A ToolResult that can be rendered as a CLI output.""" 59 | 60 | 61 | class ToolFailure(ToolResult): 62 | """A ToolResult that represents a failure.""" 63 | 64 | 65 | class ToolError(Exception): 66 | """Raised when a tool encounters an error.""" 67 | 68 | def __init__(self, message): 69 | self.message = message 70 | -------------------------------------------------------------------------------- /computer-use-demo/computer_use_demo/tools/bash.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | from typing import ClassVar, Literal 4 | 5 | from anthropic.types.beta import BetaToolBash20241022Param 6 | 7 | from .base import BaseAnthropicTool, CLIResult, ToolError, ToolResult 8 | 9 | 10 | class _BashSession: 11 | """A session of a bash shell.""" 12 | 13 | _started: bool 14 | _process: asyncio.subprocess.Process 15 | 16 | command: str = "/bin/bash" 17 | _output_delay: float = 0.2 # seconds 18 | _timeout: float = 120.0 # seconds 19 | _sentinel: str = "<>" 20 | 21 | def __init__(self): 22 | self._started = False 23 | self._timed_out = False 24 | 25 | async def start(self): 26 | if self._started: 27 | return 28 | 29 | self._process = await asyncio.create_subprocess_shell( 30 | self.command, 31 | preexec_fn=os.setsid, 32 | shell=True, 33 | bufsize=0, 34 | stdin=asyncio.subprocess.PIPE, 35 | stdout=asyncio.subprocess.PIPE, 36 | stderr=asyncio.subprocess.PIPE, 37 | ) 38 | 39 | self._started = True 40 | 41 | def stop(self): 42 | """Terminate the bash shell.""" 43 | if not self._started: 44 | raise ToolError("Session has not started.") 45 | if self._process.returncode is not None: 46 | return 47 | self._process.terminate() 48 | 49 | async def run(self, command: str): 50 | """Execute a command in the bash shell.""" 51 | if not self._started: 52 | raise ToolError("Session has not started.") 53 | if self._process.returncode is not None: 54 | return ToolResult( 55 | system="tool must be restarted", 56 | error=f"bash has exited with returncode {self._process.returncode}", 57 | ) 58 | if self._timed_out: 59 | raise ToolError( 60 | f"timed out: bash has not returned in {self._timeout} seconds and must be restarted", 61 | ) 62 | 63 | # we know these are not None because we created the process with PIPEs 64 | assert self._process.stdin 65 | assert self._process.stdout 66 | assert self._process.stderr 67 | 68 | # send command to the process 69 | self._process.stdin.write( 70 | command.encode() + f"; echo '{self._sentinel}'\n".encode() 71 | ) 72 | await self._process.stdin.drain() 73 | 74 | # read output from the process, until the sentinel is found 75 | try: 76 | async with asyncio.timeout(self._timeout): 77 | while True: 78 | await asyncio.sleep(self._output_delay) 79 | # if we read directly from stdout/stderr, it will wait forever for 80 | # EOF. use the StreamReader buffer directly instead. 81 | output = self._process.stdout._buffer.decode() # pyright: ignore[reportAttributeAccessIssue] 82 | if self._sentinel in output: 83 | # strip the sentinel and break 84 | output = output[: output.index(self._sentinel)] 85 | break 86 | except asyncio.TimeoutError: 87 | self._timed_out = True 88 | raise ToolError( 89 | f"timed out: bash has not returned in {self._timeout} seconds and must be restarted", 90 | ) from None 91 | 92 | if output.endswith("\n"): 93 | output = output[:-1] 94 | 95 | error = self._process.stderr._buffer.decode() # pyright: ignore[reportAttributeAccessIssue] 96 | if error.endswith("\n"): 97 | error = error[:-1] 98 | 99 | # clear the buffers so that the next output can be read correctly 100 | self._process.stdout._buffer.clear() # pyright: ignore[reportAttributeAccessIssue] 101 | self._process.stderr._buffer.clear() # pyright: ignore[reportAttributeAccessIssue] 102 | 103 | return CLIResult(output=output, error=error) 104 | 105 | 106 | class BashTool(BaseAnthropicTool): 107 | """ 108 | A tool that allows the agent to run bash commands. 109 | The tool parameters are defined by Anthropic and are not editable. 110 | """ 111 | 112 | _session: _BashSession | None 113 | name: ClassVar[Literal["bash"]] = "bash" 114 | api_type: ClassVar[Literal["bash_20241022"]] = "bash_20241022" 115 | 116 | def __init__(self): 117 | self._session = None 118 | super().__init__() 119 | 120 | async def __call__( 121 | self, command: str | None = None, restart: bool = False, **kwargs 122 | ): 123 | if restart: 124 | if self._session: 125 | self._session.stop() 126 | self._session = _BashSession() 127 | await self._session.start() 128 | 129 | return ToolResult(system="tool has been restarted.") 130 | 131 | if self._session is None: 132 | self._session = _BashSession() 133 | await self._session.start() 134 | 135 | if command is not None: 136 | return await self._session.run(command) 137 | 138 | raise ToolError("no command provided.") 139 | 140 | def to_params(self) -> BetaToolBash20241022Param: 141 | return { 142 | "type": self.api_type, 143 | "name": self.name, 144 | } 145 | -------------------------------------------------------------------------------- /computer-use-demo/computer_use_demo/tools/browserbase.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import sys 3 | import json 4 | from playwright.sync_api import sync_playwright, Playwright 5 | import dotenv 6 | import os 7 | 8 | dotenv.load_dotenv() 9 | 10 | 11 | def create_session(project_id, api_key): 12 | url = "https://www.browserbase.com/v1/sessions" 13 | payload = {"projectId": project_id} 14 | headers = { 15 | "X-BB-API-Key": api_key, 16 | "Content-Type": "application/json" 17 | } 18 | 19 | try: 20 | response = requests.post(url, json=payload, headers=headers) 21 | response.raise_for_status() 22 | return response.json()["id"] 23 | except requests.exceptions.RequestException as e: 24 | print(f"Error creating session: {e}", file=sys.stderr) 25 | return None 26 | 27 | def get_debug_url(session_id, api_key): 28 | url = f"https://www.browserbase.com/v1/sessions/{session_id}/debug" 29 | headers = {"X-BB-API-Key": api_key} 30 | 31 | try: 32 | response = requests.get(url, headers=headers) 33 | response.raise_for_status() 34 | return response.json() 35 | except requests.exceptions.RequestException as e: 36 | print(f"Error getting debug URL: {e}", file=sys.stderr) 37 | return None 38 | 39 | def connect_to_browserbase(playwright: Playwright, api_key, session_id): 40 | chromium = playwright.chromium 41 | browser = chromium.connect_over_cdp(f'wss://connect.browserbase.com?apiKey={api_key}&sessionId={session_id}') 42 | return browser 43 | 44 | def main(): 45 | print("Starting browserbase.py script", file=sys.stderr) 46 | 47 | project_id = os.environ["BROWSERBASE_PROJECT_ID"] 48 | api_key = os.environ["BROWSERBASE_API_KEY"] 49 | 50 | session_id = create_session(project_id, api_key) 51 | if session_id: 52 | print(f"Session ID: {session_id}") 53 | 54 | print("Connecting to Browserbase", file=sys.stderr) 55 | with sync_playwright() as playwright: 56 | print("Connected to Browserbase", file=sys.stderr) 57 | browser = connect_to_browserbase(playwright, api_key, session_id) 58 | context = browser.contexts[0] 59 | page = context.pages[0] 60 | 61 | print("Going to google", file=sys.stderr) 62 | page.goto('https://www.google.com') 63 | 64 | print("Getting debug URL", file=sys.stderr) 65 | debug_info = get_debug_url(session_id, api_key) 66 | if debug_info: 67 | print(json.dumps(debug_info, indent=2)) 68 | with open('/tmp/debugger_url.txt', 'w') as f: 69 | f.write(debug_info['debuggerFullscreenUrl']) 70 | print("Debug URL saved to /tmp/debugger_url.txt", file=sys.stderr) 71 | else: 72 | print("Failed to get debug URL", file=sys.stderr) 73 | 74 | print("Browser session is still active. Press Ctrl+C to exit.", file=sys.stderr) 75 | try: 76 | # Keep the script running 77 | while True: 78 | page.wait_for_timeout(1000) # Wait for 1 second 79 | except KeyboardInterrupt: 80 | print("Closing browser and exiting", file=sys.stderr) 81 | finally: 82 | browser.close() 83 | 84 | print("Finished browserbase.py script", file=sys.stderr) 85 | else: 86 | print("Failed to create session", file=sys.stderr) 87 | 88 | if __name__ == "__main__": 89 | main() 90 | 91 | -------------------------------------------------------------------------------- /computer-use-demo/computer_use_demo/tools/collection.py: -------------------------------------------------------------------------------- 1 | """Collection classes for managing multiple tools.""" 2 | 3 | from typing import Any 4 | 5 | from anthropic.types.beta import BetaToolUnionParam 6 | 7 | from .base import ( 8 | BaseAnthropicTool, 9 | ToolError, 10 | ToolFailure, 11 | ToolResult, 12 | ) 13 | 14 | 15 | class ToolCollection: 16 | """A collection of anthropic-defined tools.""" 17 | 18 | def __init__(self, *tools: BaseAnthropicTool): 19 | self.tools = tools 20 | self.tool_map = {tool.to_params()["name"]: tool for tool in tools} 21 | 22 | def to_params( 23 | self, 24 | ) -> list[BetaToolUnionParam]: 25 | return [tool.to_params() for tool in self.tools] 26 | 27 | async def run(self, *, name: str, tool_input: dict[str, Any]) -> ToolResult: 28 | tool = self.tool_map.get(name) 29 | if not tool: 30 | return ToolFailure(error=f"Tool {name} is invalid") 31 | try: 32 | return await tool(**tool_input) 33 | except ToolError as e: 34 | return ToolFailure(error=e.message) 35 | -------------------------------------------------------------------------------- /computer-use-demo/computer_use_demo/tools/computer.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import base64 3 | import os 4 | import shlex 5 | import shutil 6 | from enum import StrEnum 7 | from pathlib import Path 8 | from typing import Literal, TypedDict 9 | from uuid import uuid4 10 | 11 | from anthropic.types.beta import BetaToolComputerUse20241022Param 12 | 13 | from .base import BaseAnthropicTool, ToolError, ToolResult 14 | from .run import run 15 | 16 | OUTPUT_DIR = "/tmp/outputs" 17 | 18 | TYPING_DELAY_MS = 12 19 | TYPING_GROUP_SIZE = 50 20 | 21 | Action = Literal[ 22 | "key", 23 | "type", 24 | "mouse_move", 25 | "left_click", 26 | "left_click_drag", 27 | "right_click", 28 | "middle_click", 29 | "double_click", 30 | "screenshot", 31 | "cursor_position", 32 | ] 33 | 34 | 35 | class Resolution(TypedDict): 36 | width: int 37 | height: int 38 | 39 | 40 | # sizes above XGA/WXGA are not recommended (see README.md) 41 | # scale down to one of these targets if ComputerTool._scaling_enabled is set 42 | MAX_SCALING_TARGETS: dict[str, Resolution] = { 43 | "XGA": Resolution(width=1024, height=768), # 4:3 44 | "WXGA": Resolution(width=1280, height=800), # 16:10 45 | "FWXGA": Resolution(width=1366, height=768), # ~16:9 46 | } 47 | 48 | 49 | class ScalingSource(StrEnum): 50 | COMPUTER = "computer" 51 | API = "api" 52 | 53 | 54 | class ComputerToolOptions(TypedDict): 55 | display_height_px: int 56 | display_width_px: int 57 | display_number: int | None 58 | 59 | 60 | def chunks(s: str, chunk_size: int) -> list[str]: 61 | return [s[i : i + chunk_size] for i in range(0, len(s), chunk_size)] 62 | 63 | 64 | class ComputerTool(BaseAnthropicTool): 65 | """ 66 | A tool that allows the agent to interact with the screen, keyboard, and mouse of the current computer. 67 | The tool parameters are defined by Anthropic and are not editable. 68 | """ 69 | 70 | name: Literal["computer"] = "computer" 71 | api_type: Literal["computer_20241022"] = "computer_20241022" 72 | width: int 73 | height: int 74 | display_num: int | None 75 | 76 | _screenshot_delay = 2.0 77 | _scaling_enabled = True 78 | 79 | @property 80 | def options(self) -> ComputerToolOptions: 81 | width, height = self.scale_coordinates( 82 | ScalingSource.COMPUTER, self.width, self.height 83 | ) 84 | return { 85 | "display_width_px": width, 86 | "display_height_px": height, 87 | "display_number": self.display_num, 88 | } 89 | 90 | def to_params(self) -> BetaToolComputerUse20241022Param: 91 | return {"name": self.name, "type": self.api_type, **self.options} 92 | 93 | def __init__(self): 94 | super().__init__() 95 | 96 | self.width = int(os.getenv("WIDTH") or 0) 97 | self.height = int(os.getenv("HEIGHT") or 0) 98 | assert self.width and self.height, "WIDTH, HEIGHT must be set" 99 | if (display_num := os.getenv("DISPLAY_NUM")) is not None: 100 | self.display_num = int(display_num) 101 | self._display_prefix = f"DISPLAY=:{self.display_num} " 102 | else: 103 | self.display_num = None 104 | self._display_prefix = "" 105 | 106 | self.xdotool = f"{self._display_prefix}xdotool" 107 | 108 | async def __call__( 109 | self, 110 | *, 111 | action: Action, 112 | text: str | None = None, 113 | coordinate: tuple[int, int] | None = None, 114 | **kwargs, 115 | ): 116 | if action in ("mouse_move", "left_click_drag"): 117 | if coordinate is None: 118 | raise ToolError(f"coordinate is required for {action}") 119 | if text is not None: 120 | raise ToolError(f"text is not accepted for {action}") 121 | if not isinstance(coordinate, list) or len(coordinate) != 2: 122 | raise ToolError(f"{coordinate} must be a tuple of length 2") 123 | if not all(isinstance(i, int) and i >= 0 for i in coordinate): 124 | raise ToolError(f"{coordinate} must be a tuple of non-negative ints") 125 | 126 | x, y = self.scale_coordinates( 127 | ScalingSource.API, coordinate[0], coordinate[1] 128 | ) 129 | 130 | if action == "mouse_move": 131 | return await self.shell(f"{self.xdotool} mousemove --sync {x} {y}") 132 | elif action == "left_click_drag": 133 | return await self.shell( 134 | f"{self.xdotool} mousedown 1 mousemove --sync {x} {y} mouseup 1" 135 | ) 136 | 137 | if action in ("key", "type"): 138 | if text is None: 139 | raise ToolError(f"text is required for {action}") 140 | if coordinate is not None: 141 | raise ToolError(f"coordinate is not accepted for {action}") 142 | if not isinstance(text, str): 143 | raise ToolError(output=f"{text} must be a string") 144 | 145 | if action == "key": 146 | return await self.shell(f"{self.xdotool} key -- {text}") 147 | elif action == "type": 148 | results: list[ToolResult] = [] 149 | for chunk in chunks(text, TYPING_GROUP_SIZE): 150 | cmd = f"{self.xdotool} type --delay {TYPING_DELAY_MS} -- {shlex.quote(chunk)}" 151 | results.append(await self.shell(cmd, take_screenshot=False)) 152 | screenshot_base64 = (await self.screenshot()).base64_image 153 | return ToolResult( 154 | output="".join(result.output or "" for result in results), 155 | error="".join(result.error or "" for result in results), 156 | base64_image=screenshot_base64, 157 | ) 158 | 159 | if action in ( 160 | "left_click", 161 | "right_click", 162 | "double_click", 163 | "middle_click", 164 | "screenshot", 165 | "cursor_position", 166 | ): 167 | if text is not None: 168 | raise ToolError(f"text is not accepted for {action}") 169 | if coordinate is not None: 170 | raise ToolError(f"coordinate is not accepted for {action}") 171 | 172 | if action == "screenshot": 173 | return await self.screenshot() 174 | elif action == "cursor_position": 175 | result = await self.shell( 176 | f"{self.xdotool} getmouselocation --shell", 177 | take_screenshot=False, 178 | ) 179 | output = result.output or "" 180 | x, y = self.scale_coordinates( 181 | ScalingSource.COMPUTER, 182 | int(output.split("X=")[1].split("\n")[0]), 183 | int(output.split("Y=")[1].split("\n")[0]), 184 | ) 185 | return result.replace(output=f"X={x},Y={y}") 186 | else: 187 | click_arg = { 188 | "left_click": "1", 189 | "right_click": "3", 190 | "middle_click": "2", 191 | "double_click": "--repeat 2 --delay 500 1", 192 | }[action] 193 | return await self.shell(f"{self.xdotool} click {click_arg}") 194 | 195 | raise ToolError(f"Invalid action: {action}") 196 | 197 | async def screenshot(self): 198 | """Take a screenshot of the current screen and return the base64 encoded image.""" 199 | output_dir = Path(OUTPUT_DIR) 200 | output_dir.mkdir(parents=True, exist_ok=True) 201 | path = output_dir / f"screenshot_{uuid4().hex}.png" 202 | 203 | # Try gnome-screenshot first 204 | if shutil.which("gnome-screenshot"): 205 | screenshot_cmd = f"{self._display_prefix}gnome-screenshot -f {path} -p" 206 | else: 207 | # Fall back to scrot if gnome-screenshot isn't available 208 | screenshot_cmd = f"{self._display_prefix}scrot -p {path}" 209 | 210 | result = await self.shell(screenshot_cmd, take_screenshot=False) 211 | if self._scaling_enabled: 212 | x, y = self.scale_coordinates( 213 | ScalingSource.COMPUTER, self.width, self.height 214 | ) 215 | await self.shell( 216 | f"convert {path} -resize {x}x{y}! {path}", take_screenshot=False 217 | ) 218 | 219 | if path.exists(): 220 | return result.replace( 221 | base64_image=base64.b64encode(path.read_bytes()).decode() 222 | ) 223 | raise ToolError(f"Failed to take screenshot: {result.error}") 224 | 225 | async def shell(self, command: str, take_screenshot=True) -> ToolResult: 226 | """Run a shell command and return the output, error, and optionally a screenshot.""" 227 | _, stdout, stderr = await run(command) 228 | base64_image = None 229 | 230 | if take_screenshot: 231 | # delay to let things settle before taking a screenshot 232 | await asyncio.sleep(self._screenshot_delay) 233 | base64_image = (await self.screenshot()).base64_image 234 | 235 | return ToolResult(output=stdout, error=stderr, base64_image=base64_image) 236 | 237 | def scale_coordinates(self, source: ScalingSource, x: int, y: int): 238 | """Scale coordinates to a target maximum resolution.""" 239 | if not self._scaling_enabled: 240 | return x, y 241 | ratio = self.width / self.height 242 | target_dimension = None 243 | for dimension in MAX_SCALING_TARGETS.values(): 244 | # allow some error in the aspect ratio - not ratios are exactly 16:9 245 | if abs(dimension["width"] / dimension["height"] - ratio) < 0.02: 246 | if dimension["width"] < self.width: 247 | target_dimension = dimension 248 | break 249 | if target_dimension is None: 250 | return x, y 251 | # should be less than 1 252 | x_scaling_factor = target_dimension["width"] / self.width 253 | y_scaling_factor = target_dimension["height"] / self.height 254 | if source == ScalingSource.API: 255 | if x > self.width or y > self.height: 256 | raise ToolError(f"Coordinates {x}, {y} are out of bounds") 257 | # scale up 258 | return round(x / x_scaling_factor), round(y / y_scaling_factor) 259 | # scale down 260 | return round(x * x_scaling_factor), round(y * y_scaling_factor) 261 | -------------------------------------------------------------------------------- /computer-use-demo/computer_use_demo/tools/edit.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from pathlib import Path 3 | from typing import Literal, get_args 4 | 5 | from anthropic.types.beta import BetaToolTextEditor20241022Param 6 | 7 | from .base import BaseAnthropicTool, CLIResult, ToolError, ToolResult 8 | from .run import maybe_truncate, run 9 | 10 | Command = Literal[ 11 | "view", 12 | "create", 13 | "str_replace", 14 | "insert", 15 | "undo_edit", 16 | ] 17 | SNIPPET_LINES: int = 4 18 | 19 | 20 | class EditTool(BaseAnthropicTool): 21 | """ 22 | An filesystem editor tool that allows the agent to view, create, and edit files. 23 | The tool parameters are defined by Anthropic and are not editable. 24 | """ 25 | 26 | api_type: Literal["text_editor_20241022"] = "text_editor_20241022" 27 | name: Literal["str_replace_editor"] = "str_replace_editor" 28 | 29 | _file_history: dict[Path, list[str]] 30 | 31 | def __init__(self): 32 | self._file_history = defaultdict(list) 33 | super().__init__() 34 | 35 | def to_params(self) -> BetaToolTextEditor20241022Param: 36 | return { 37 | "name": self.name, 38 | "type": self.api_type, 39 | } 40 | 41 | async def __call__( 42 | self, 43 | *, 44 | command: Command, 45 | path: str, 46 | file_text: str | None = None, 47 | view_range: list[int] | None = None, 48 | old_str: str | None = None, 49 | new_str: str | None = None, 50 | insert_line: int | None = None, 51 | **kwargs, 52 | ): 53 | _path = Path(path) 54 | self.validate_path(command, _path) 55 | if command == "view": 56 | return await self.view(_path, view_range) 57 | elif command == "create": 58 | if not file_text: 59 | raise ToolError("Parameter `file_text` is required for command: create") 60 | self.write_file(_path, file_text) 61 | self._file_history[_path].append(file_text) 62 | return ToolResult(output=f"File created successfully at: {_path}") 63 | elif command == "str_replace": 64 | if not old_str: 65 | raise ToolError( 66 | "Parameter `old_str` is required for command: str_replace" 67 | ) 68 | return self.str_replace(_path, old_str, new_str) 69 | elif command == "insert": 70 | if insert_line is None: 71 | raise ToolError( 72 | "Parameter `insert_line` is required for command: insert" 73 | ) 74 | if not new_str: 75 | raise ToolError("Parameter `new_str` is required for command: insert") 76 | return self.insert(_path, insert_line, new_str) 77 | elif command == "undo_edit": 78 | return self.undo_edit(_path) 79 | raise ToolError( 80 | f'Unrecognized command {command}. The allowed commands for the {self.name} tool are: {", ".join(get_args(Command))}' 81 | ) 82 | 83 | def validate_path(self, command: str, path: Path): 84 | """ 85 | Check that the path/command combination is valid. 86 | """ 87 | # Check if its an absolute path 88 | if not path.is_absolute(): 89 | suggested_path = Path("") / path 90 | raise ToolError( 91 | f"The path {path} is not an absolute path, it should start with `/`. Maybe you meant {suggested_path}?" 92 | ) 93 | # Check if path exists 94 | if not path.exists() and command != "create": 95 | raise ToolError( 96 | f"The path {path} does not exist. Please provide a valid path." 97 | ) 98 | if path.exists() and command == "create": 99 | raise ToolError( 100 | f"File already exists at: {path}. Cannot overwrite files using command `create`." 101 | ) 102 | # Check if the path points to a directory 103 | if path.is_dir(): 104 | if command != "view": 105 | raise ToolError( 106 | f"The path {path} is a directory and only the `view` command can be used on directories" 107 | ) 108 | 109 | async def view(self, path: Path, view_range: list[int] | None = None): 110 | """Implement the view command""" 111 | if path.is_dir(): 112 | if view_range: 113 | raise ToolError( 114 | "The `view_range` parameter is not allowed when `path` points to a directory." 115 | ) 116 | 117 | _, stdout, stderr = await run( 118 | rf"find {path} -maxdepth 2 -not -path '*/\.*'" 119 | ) 120 | if not stderr: 121 | stdout = f"Here's the files and directories up to 2 levels deep in {path}, excluding hidden items:\n{stdout}\n" 122 | return CLIResult(output=stdout, error=stderr) 123 | 124 | file_content = self.read_file(path) 125 | init_line = 1 126 | if view_range: 127 | if len(view_range) != 2 or not all(isinstance(i, int) for i in view_range): 128 | raise ToolError( 129 | "Invalid `view_range`. It should be a list of two integers." 130 | ) 131 | file_lines = file_content.split("\n") 132 | n_lines_file = len(file_lines) 133 | init_line, final_line = view_range 134 | if init_line < 1 or init_line > n_lines_file: 135 | raise ToolError( 136 | f"Invalid `view_range`: {view_range}. It's first element `{init_line}` should be within the range of lines of the file: {[1, n_lines_file]}" 137 | ) 138 | if final_line > n_lines_file: 139 | raise ToolError( 140 | f"Invalid `view_range`: {view_range}. It's second element `{final_line}` should be smaller than the number of lines in the file: `{n_lines_file}`" 141 | ) 142 | if final_line != -1 and final_line < init_line: 143 | raise ToolError( 144 | f"Invalid `view_range`: {view_range}. It's second element `{final_line}` should be larger or equal than its first `{init_line}`" 145 | ) 146 | 147 | if final_line == -1: 148 | file_content = "\n".join(file_lines[init_line - 1 :]) 149 | else: 150 | file_content = "\n".join(file_lines[init_line - 1 : final_line]) 151 | 152 | return CLIResult( 153 | output=self._make_output(file_content, str(path), init_line=init_line) 154 | ) 155 | 156 | def str_replace(self, path: Path, old_str: str, new_str: str | None): 157 | """Implement the str_replace command, which replaces old_str with new_str in the file content""" 158 | # Read the file content 159 | file_content = self.read_file(path).expandtabs() 160 | old_str = old_str.expandtabs() 161 | new_str = new_str.expandtabs() if new_str is not None else "" 162 | 163 | # Check if old_str is unique in the file 164 | occurrences = file_content.count(old_str) 165 | if occurrences == 0: 166 | raise ToolError( 167 | f"No replacement was performed, old_str `{old_str}` did not appear verbatim in {path}." 168 | ) 169 | elif occurrences > 1: 170 | file_content_lines = file_content.split("\n") 171 | lines = [ 172 | idx + 1 173 | for idx, line in enumerate(file_content_lines) 174 | if old_str in line 175 | ] 176 | raise ToolError( 177 | f"No replacement was performed. Multiple occurrences of old_str `{old_str}` in lines {lines}. Please ensure it is unique" 178 | ) 179 | 180 | # Replace old_str with new_str 181 | new_file_content = file_content.replace(old_str, new_str) 182 | 183 | # Write the new content to the file 184 | self.write_file(path, new_file_content) 185 | 186 | # Save the content to history 187 | self._file_history[path].append(file_content) 188 | 189 | # Create a snippet of the edited section 190 | replacement_line = file_content.split(old_str)[0].count("\n") 191 | start_line = max(0, replacement_line - SNIPPET_LINES) 192 | end_line = replacement_line + SNIPPET_LINES + new_str.count("\n") 193 | snippet = "\n".join(new_file_content.split("\n")[start_line : end_line + 1]) 194 | 195 | # Prepare the success message 196 | success_msg = f"The file {path} has been edited. " 197 | success_msg += self._make_output( 198 | snippet, f"a snippet of {path}", start_line + 1 199 | ) 200 | success_msg += "Review the changes and make sure they are as expected. Edit the file again if necessary." 201 | 202 | return CLIResult(output=success_msg) 203 | 204 | def insert(self, path: Path, insert_line: int, new_str: str): 205 | """Implement the insert command, which inserts new_str at the specified line in the file content.""" 206 | file_text = self.read_file(path).expandtabs() 207 | new_str = new_str.expandtabs() 208 | file_text_lines = file_text.split("\n") 209 | n_lines_file = len(file_text_lines) 210 | 211 | if insert_line < 0 or insert_line > n_lines_file: 212 | raise ToolError( 213 | f"Invalid `insert_line` parameter: {insert_line}. It should be within the range of lines of the file: {[0, n_lines_file]}" 214 | ) 215 | 216 | new_str_lines = new_str.split("\n") 217 | new_file_text_lines = ( 218 | file_text_lines[:insert_line] 219 | + new_str_lines 220 | + file_text_lines[insert_line:] 221 | ) 222 | snippet_lines = ( 223 | file_text_lines[max(0, insert_line - SNIPPET_LINES) : insert_line] 224 | + new_str_lines 225 | + file_text_lines[insert_line : insert_line + SNIPPET_LINES] 226 | ) 227 | 228 | new_file_text = "\n".join(new_file_text_lines) 229 | snippet = "\n".join(snippet_lines) 230 | 231 | self.write_file(path, new_file_text) 232 | self._file_history[path].append(file_text) 233 | 234 | success_msg = f"The file {path} has been edited. " 235 | success_msg += self._make_output( 236 | snippet, 237 | "a snippet of the edited file", 238 | max(1, insert_line - SNIPPET_LINES + 1), 239 | ) 240 | success_msg += "Review the changes and make sure they are as expected (correct indentation, no duplicate lines, etc). Edit the file again if necessary." 241 | return CLIResult(output=success_msg) 242 | 243 | def undo_edit(self, path: Path): 244 | """Implement the undo_edit command.""" 245 | if not self._file_history[path]: 246 | raise ToolError(f"No edit history found for {path}.") 247 | 248 | old_text = self._file_history[path].pop() 249 | self.write_file(path, old_text) 250 | 251 | return CLIResult( 252 | output=f"Last edit to {path} undone successfully. {self._make_output(old_text, str(path))}" 253 | ) 254 | 255 | def read_file(self, path: Path): 256 | """Read the content of a file from a given path; raise a ToolError if an error occurs.""" 257 | try: 258 | return path.read_text() 259 | except Exception as e: 260 | raise ToolError(f"Ran into {e} while trying to read {path}") from None 261 | 262 | def write_file(self, path: Path, file: str): 263 | """Write the content of a file to a given path; raise a ToolError if an error occurs.""" 264 | try: 265 | path.write_text(file) 266 | except Exception as e: 267 | raise ToolError(f"Ran into {e} while trying to write to {path}") from None 268 | 269 | def _make_output( 270 | self, 271 | file_content: str, 272 | file_descriptor: str, 273 | init_line: int = 1, 274 | expand_tabs: bool = True, 275 | ): 276 | """Generate output for the CLI based on the content of a file.""" 277 | file_content = maybe_truncate(file_content) 278 | if expand_tabs: 279 | file_content = file_content.expandtabs() 280 | file_content = "\n".join( 281 | [ 282 | f"{i + init_line:6}\t{line}" 283 | for i, line in enumerate(file_content.split("\n")) 284 | ] 285 | ) 286 | return ( 287 | f"Here's the result of running `cat -n` on {file_descriptor}:\n" 288 | + file_content 289 | + "\n" 290 | ) 291 | -------------------------------------------------------------------------------- /computer-use-demo/computer_use_demo/tools/run.py: -------------------------------------------------------------------------------- 1 | """Utility to run shell commands asynchronously with a timeout.""" 2 | 3 | import asyncio 4 | 5 | TRUNCATED_MESSAGE: str = "To save on context only part of this file has been shown to you. You should retry this tool after you have searched inside the file with `grep -n` in order to find the line numbers of what you are looking for." 6 | MAX_RESPONSE_LEN: int = 16000 7 | 8 | 9 | def maybe_truncate(content: str, truncate_after: int | None = MAX_RESPONSE_LEN): 10 | """Truncate content and append a notice if content exceeds the specified length.""" 11 | return ( 12 | content 13 | if not truncate_after or len(content) <= truncate_after 14 | else content[:truncate_after] + TRUNCATED_MESSAGE 15 | ) 16 | 17 | 18 | async def run( 19 | cmd: str, 20 | timeout: float | None = 120.0, # seconds 21 | truncate_after: int | None = MAX_RESPONSE_LEN, 22 | ): 23 | """Run a shell command asynchronously with a timeout.""" 24 | process = await asyncio.create_subprocess_shell( 25 | cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE 26 | ) 27 | 28 | try: 29 | stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=timeout) 30 | return ( 31 | process.returncode or 0, 32 | maybe_truncate(stdout.decode(), truncate_after=truncate_after), 33 | maybe_truncate(stderr.decode(), truncate_after=truncate_after), 34 | ) 35 | except asyncio.TimeoutError as exc: 36 | try: 37 | process.kill() 38 | except ProcessLookupError: 39 | pass 40 | raise TimeoutError( 41 | f"Command '{cmd}' timed out after {timeout} seconds" 42 | ) from exc 43 | -------------------------------------------------------------------------------- /computer-use-demo/dev-requirements.txt: -------------------------------------------------------------------------------- 1 | -r computer_use_demo/requirements.txt 2 | ruff==0.6.7 3 | pre-commit==3.8.0 4 | pytest==8.3.3 5 | pytest-asyncio==0.23.6 6 | # don't use this file, wrong version -------------------------------------------------------------------------------- /computer-use-demo/image/.config/tint2/applications/firefox-custom.desktop: -------------------------------------------------------------------------------- 1 | [Desktop Entry] 2 | Name=Firefox Custom 3 | Comment=Open Firefox with custom URL 4 | Exec=firefox-esr -new-window 5 | Icon=firefox-esr 6 | Terminal=false 7 | Type=Application 8 | Categories=Network;WebBrowser; 9 | -------------------------------------------------------------------------------- /computer-use-demo/image/.config/tint2/applications/gedit.desktop: -------------------------------------------------------------------------------- 1 | [Desktop Entry] 2 | Name=Gedit 3 | Comment=Open gedit 4 | Exec=gedit 5 | Icon=text-editor-symbolic 6 | Terminal=false 7 | Type=Application 8 | Categories=TextEditor; 9 | -------------------------------------------------------------------------------- /computer-use-demo/image/.config/tint2/applications/terminal.desktop: -------------------------------------------------------------------------------- 1 | [Desktop Entry] 2 | Name=Terminal 3 | Comment=Open Terminal 4 | Exec=xterm 5 | Icon=utilities-terminal 6 | Terminal=false 7 | Type=Application 8 | Categories=System;TerminalEmulator; 9 | -------------------------------------------------------------------------------- /computer-use-demo/image/.config/tint2/tint2rc: -------------------------------------------------------------------------------- 1 | #------------------------------------- 2 | # Panel 3 | panel_items = TL 4 | panel_size = 100% 60 5 | panel_margin = 0 0 6 | panel_padding = 2 0 2 7 | panel_background_id = 1 8 | wm_menu = 0 9 | panel_dock = 0 10 | panel_position = bottom center horizontal 11 | panel_layer = top 12 | panel_monitor = all 13 | panel_shrink = 0 14 | autohide = 0 15 | autohide_show_timeout = 0 16 | autohide_hide_timeout = 0.5 17 | autohide_height = 2 18 | strut_policy = follow_size 19 | panel_window_name = tint2 20 | disable_transparency = 1 21 | mouse_effects = 1 22 | font_shadow = 0 23 | mouse_hover_icon_asb = 100 0 10 24 | mouse_pressed_icon_asb = 100 0 0 25 | scale_relative_to_dpi = 0 26 | scale_relative_to_screen_height = 0 27 | 28 | #------------------------------------- 29 | # Taskbar 30 | taskbar_mode = single_desktop 31 | taskbar_hide_if_empty = 0 32 | taskbar_padding = 0 0 2 33 | taskbar_background_id = 0 34 | taskbar_active_background_id = 0 35 | taskbar_name = 1 36 | taskbar_hide_inactive_tasks = 0 37 | taskbar_hide_different_monitor = 0 38 | taskbar_hide_different_desktop = 0 39 | taskbar_always_show_all_desktop_tasks = 0 40 | taskbar_name_padding = 4 2 41 | taskbar_name_background_id = 0 42 | taskbar_name_active_background_id = 0 43 | taskbar_name_font_color = #e3e3e3 100 44 | taskbar_name_active_font_color = #ffffff 100 45 | taskbar_distribute_size = 0 46 | taskbar_sort_order = none 47 | task_align = left 48 | 49 | #------------------------------------- 50 | # Launcher 51 | launcher_padding = 4 8 4 52 | launcher_background_id = 0 53 | launcher_icon_background_id = 0 54 | launcher_icon_size = 48 55 | launcher_icon_asb = 100 0 0 56 | launcher_icon_theme_override = 0 57 | startup_notifications = 1 58 | launcher_tooltip = 1 59 | 60 | #------------------------------------- 61 | # Launcher icon 62 | launcher_item_app = /usr/share/applications/libreoffice-calc.desktop 63 | launcher_item_app = /home/computeruse/.config/tint2/applications/terminal.desktop 64 | launcher_item_app = /home/computeruse/.config/tint2/applications/firefox-custom.desktop 65 | launcher_item_app = /usr/share/applications/xpaint.desktop 66 | launcher_item_app = /usr/share/applications/xpdf.desktop 67 | launcher_item_app = /home/computeruse/.config/tint2/applications/gedit.desktop 68 | launcher_item_app = /usr/share/applications/galculator.desktop 69 | 70 | #------------------------------------- 71 | # Background definitions 72 | # ID 1 73 | rounded = 0 74 | border_width = 0 75 | background_color = #000000 60 76 | border_color = #000000 30 77 | 78 | # ID 2 79 | rounded = 4 80 | border_width = 1 81 | background_color = #777777 20 82 | border_color = #777777 30 83 | 84 | # ID 3 85 | rounded = 4 86 | border_width = 1 87 | background_color = #777777 20 88 | border_color = #ffffff 40 89 | 90 | # ID 4 91 | rounded = 4 92 | border_width = 1 93 | background_color = #aa4400 100 94 | border_color = #aa7733 100 95 | 96 | # ID 5 97 | rounded = 4 98 | border_width = 1 99 | background_color = #aaaa00 100 100 | border_color = #aaaa00 100 101 | -------------------------------------------------------------------------------- /computer-use-demo/image/.streamlit/config.toml: -------------------------------------------------------------------------------- 1 | [server] 2 | fileWatcherType = "auto" 3 | runOnSave = true 4 | 5 | [browser] 6 | gatherUsageStats = false 7 | 8 | [theme] 9 | primaryColor="#4361ee" 10 | backgroundColor="#f8f9fa" 11 | secondaryBackgroundColor="#e9ecef" 12 | textColor="#212529" 13 | font="sans serif" 14 | -------------------------------------------------------------------------------- /computer-use-demo/image/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | ./start_all.sh 5 | ./novnc_startup.sh 6 | 7 | python http_server.py > /tmp/server_logs.txt 2>&1 & 8 | 9 | # Run browserbase.py and redirect its output to both a file and stderr 10 | python /home/computeruse/computer_use_demo/tools/browserbase.py 2>&1 | tee /tmp/browserbase_logs.txt >&2 & 11 | 12 | # Run the new script to open the debugger URL 13 | ./open_debugger.sh & 14 | 15 | STREAMLIT_SERVER_PORT=8501 python -m streamlit run computer_use_demo/streamlit.py > /tmp/streamlit_stdout.log & 16 | 17 | echo "✨ Computer Use Demo is ready!" 18 | echo "➡️ Open http://localhost:8080 in your browser to begin" 19 | 20 | # Keep the container running 21 | tail -f /dev/null -------------------------------------------------------------------------------- /computer-use-demo/image/http_server.py: -------------------------------------------------------------------------------- 1 | import os 2 | import socket 3 | from http.server import HTTPServer, SimpleHTTPRequestHandler 4 | 5 | 6 | class HTTPServerV6(HTTPServer): 7 | address_family = socket.AF_INET6 8 | 9 | 10 | def run_server(): 11 | os.chdir(os.path.dirname(__file__) + "/static_content") 12 | server_address = ("::", 8080) 13 | httpd = HTTPServerV6(server_address, SimpleHTTPRequestHandler) 14 | print("Starting HTTP server on port 8080...") # noqa: T201 15 | httpd.serve_forever() 16 | 17 | 18 | if __name__ == "__main__": 19 | run_server() 20 | -------------------------------------------------------------------------------- /computer-use-demo/image/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Computer Use Demo 5 | 6 | 28 | 29 | 30 |
31 | 36 | 41 |
42 | 43 | 44 | -------------------------------------------------------------------------------- /computer-use-demo/image/mutter_startup.sh: -------------------------------------------------------------------------------- 1 | echo "starting mutter" 2 | XDG_SESSION_TYPE=x11 mutter --replace --sm-disable 2>/tmp/mutter_stderr.log & 3 | 4 | # Wait for tint2 window properties to appear 5 | timeout=30 6 | while [ $timeout -gt 0 ]; do 7 | if xdotool search --class "mutter" >/dev/null 2>&1; then 8 | break 9 | fi 10 | sleep 1 11 | ((timeout--)) 12 | done 13 | 14 | if [ $timeout -eq 0 ]; then 15 | echo "mutter stderr output:" >&2 16 | cat /tmp/mutter_stderr.log >&2 17 | exit 1 18 | fi 19 | 20 | rm /tmp/mutter_stderr.log 21 | -------------------------------------------------------------------------------- /computer-use-demo/image/novnc_startup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "starting noVNC" 3 | 4 | # Start noVNC with explicit websocket settings 5 | /opt/noVNC/utils/novnc_proxy \ 6 | --vnc localhost:5900 \ 7 | --listen 6080 \ 8 | --web /opt/noVNC \ 9 | > /tmp/novnc.log 2>&1 & 10 | 11 | # Wait for noVNC to start 12 | timeout=10 13 | while [ $timeout -gt 0 ]; do 14 | if netstat -tuln | grep -q ":6080 "; then 15 | break 16 | fi 17 | sleep 1 18 | ((timeout--)) 19 | done 20 | 21 | echo "noVNC started successfully" 22 | -------------------------------------------------------------------------------- /computer-use-demo/image/open_debugger.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Wait for the URL file to be created 4 | while [ ! -f /tmp/debugger_url.txt ]; do 5 | sleep 1 6 | done 7 | 8 | # Read the URL from the file 9 | URL=$(cat /tmp/debugger_url.txt) 10 | 11 | # Open the URL in Firefox 12 | # DISPLAY=:1 firefox-esr "$URL" 13 | 14 | # Open the URL using w3m in the background 15 | # DISPLAY=:1 xterm -e "w3m '$URL'" & 16 | 17 | # Open the URL using curl and display it in less 18 | # DISPLAY=:1 xterm -e "curl -s '$URL' | less" & 19 | 20 | # Print the URL to the console 21 | echo "Debugger URL: $URL" 22 | 23 | # Open Firefox in kiosk mode 24 | DISPLAY=:1 firefox-esr --kiosk "$URL" & 25 | # DISPLAY=:1 firefox-esr --fullscreen "$URL" & -------------------------------------------------------------------------------- /computer-use-demo/image/start_all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | export DISPLAY=:${DISPLAY_NUM} 6 | ./xvfb_startup.sh 7 | ./tint2_startup.sh 8 | ./mutter_startup.sh 9 | ./x11vnc_startup.sh 10 | -------------------------------------------------------------------------------- /computer-use-demo/image/static_content/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Computer Use Demo 5 | 6 | 28 | 29 | 30 |
31 | 36 | 42 | 48 | 70 |
71 | 72 | 73 | -------------------------------------------------------------------------------- /computer-use-demo/image/tint2_startup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "starting tint2 on display :$DISPLAY_NUM ..." 3 | 4 | # Start tint2 and capture its stderr 5 | tint2 -c $HOME/.config/tint2/tint2rc 2>/tmp/tint2_stderr.log & 6 | 7 | # Wait for tint2 window properties to appear 8 | timeout=30 9 | while [ $timeout -gt 0 ]; do 10 | if xdotool search --class "tint2" >/dev/null 2>&1; then 11 | break 12 | fi 13 | sleep 1 14 | ((timeout--)) 15 | done 16 | 17 | if [ $timeout -eq 0 ]; then 18 | echo "tint2 stderr output:" >&2 19 | cat /tmp/tint2_stderr.log >&2 20 | exit 1 21 | fi 22 | 23 | # Remove the temporary stderr log file 24 | rm /tmp/tint2_stderr.log 25 | -------------------------------------------------------------------------------- /computer-use-demo/image/x11vnc_startup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "starting vnc" 3 | 4 | (x11vnc -display $DISPLAY \ 5 | -forever \ 6 | -shared \ 7 | -wait 50 \ 8 | -timeout 60 \ 9 | -noxrecord \ 10 | -noxfixes \ 11 | -noxdamage \ 12 | -rfbport 5900 \ 13 | 2>/tmp/x11vnc_stderr.log) & 14 | 15 | x11vnc_pid=$! 16 | 17 | # Wait for x11vnc to start 18 | timeout=10 19 | while [ $timeout -gt 0 ]; do 20 | if netstat -tuln | grep -q ":5900 "; then 21 | break 22 | fi 23 | sleep 1 24 | ((timeout--)) 25 | done 26 | 27 | if [ $timeout -eq 0 ]; then 28 | echo "x11vnc failed to start, stderr output:" >&2 29 | cat /tmp/x11vnc_stderr.log >&2 30 | exit 1 31 | fi 32 | 33 | : > /tmp/x11vnc_stderr.log 34 | 35 | # Monitor x11vnc process in the background 36 | ( 37 | while true; do 38 | if ! kill -0 $x11vnc_pid 2>/dev/null; then 39 | echo "x11vnc process crashed, restarting..." >&2 40 | if [ -f /tmp/x11vnc_stderr.log ]; then 41 | echo "x11vnc stderr output:" >&2 42 | cat /tmp/x11vnc_stderr.log >&2 43 | rm /tmp/x11vnc_stderr.log 44 | fi 45 | exec "$0" 46 | fi 47 | sleep 5 48 | done 49 | ) & 50 | -------------------------------------------------------------------------------- /computer-use-demo/image/xvfb_startup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e # Exit on error 3 | 4 | DPI=96 5 | RES_AND_DEPTH=${WIDTH}x${HEIGHT}x24 6 | 7 | # Function to check if Xvfb is already running 8 | check_xvfb_running() { 9 | if [ -e /tmp/.X${DISPLAY_NUM}-lock ]; then 10 | return 0 # Xvfb is already running 11 | else 12 | return 1 # Xvfb is not running 13 | fi 14 | } 15 | 16 | # Function to check if Xvfb is ready 17 | wait_for_xvfb() { 18 | local timeout=10 19 | local start_time=$(date +%s) 20 | while ! xdpyinfo >/dev/null 2>&1; do 21 | if [ $(($(date +%s) - start_time)) -gt $timeout ]; then 22 | echo "Xvfb failed to start within $timeout seconds" >&2 23 | return 1 24 | fi 25 | sleep 0.1 26 | done 27 | return 0 28 | } 29 | 30 | # Check if Xvfb is already running 31 | if check_xvfb_running; then 32 | echo "Xvfb is already running on display ${DISPLAY}" 33 | exit 0 34 | fi 35 | 36 | # Start Xvfb 37 | Xvfb $DISPLAY -ac -screen 0 $RES_AND_DEPTH -retro -dpi $DPI -nolisten tcp -nolisten unix & 38 | XVFB_PID=$! 39 | 40 | # Wait for Xvfb to start 41 | if wait_for_xvfb; then 42 | echo "Xvfb started successfully on display ${DISPLAY}" 43 | echo "Xvfb PID: $XVFB_PID" 44 | else 45 | echo "Xvfb failed to start" 46 | kill $XVFB_PID 47 | exit 1 48 | fi 49 | -------------------------------------------------------------------------------- /computer-use-demo/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.pyright] 2 | venvPath = "." 3 | venv = ".venv" 4 | useLibraryCodeForTypes = false 5 | 6 | [tool.pytest.ini_options] 7 | pythonpath = "." 8 | asyncio_mode = "auto" 9 | -------------------------------------------------------------------------------- /computer-use-demo/ruff.toml: -------------------------------------------------------------------------------- 1 | extend-exclude = [".venv"] 2 | 3 | [format] 4 | docstring-code-format = true 5 | 6 | [lint] 7 | select = [ 8 | "A", 9 | "ASYNC", 10 | "B", 11 | "E", 12 | "F", 13 | "I", 14 | "PIE", 15 | "RUF200", 16 | "T20", 17 | "UP", 18 | "W", 19 | ] 20 | 21 | ignore = ["E501", "ASYNC230"] 22 | 23 | [lint.isort] 24 | combine-as-imports = true 25 | -------------------------------------------------------------------------------- /computer-use-demo/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if ! command -v cargo &> /dev/null; then 3 | echo "Cargo (the package manager for Rust) is not present. This is required for one of this module's dependencies." 4 | echo "See https://www.rust-lang.org/tools/install for installation instructions." 5 | exit 1 6 | fi 7 | 8 | python3 -m venv .venv 9 | source .venv/bin/activate 10 | pip install --upgrade pip 11 | pip install -r dev-requirements.txt 12 | pre-commit install 13 | -------------------------------------------------------------------------------- /computer-use-demo/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | from unittest import mock 3 | 4 | import pytest 5 | 6 | 7 | @pytest.fixture(autouse=True) 8 | def mock_screen_dimensions(): 9 | with mock.patch.dict( 10 | os.environ, {"HEIGHT": "768", "WIDTH": "1024", "DISPLAY_NUM": "1"} 11 | ): 12 | yield 13 | -------------------------------------------------------------------------------- /computer-use-demo/tests/loop_test.py: -------------------------------------------------------------------------------- 1 | from unittest import mock 2 | 3 | from anthropic.types import TextBlock, ToolUseBlock 4 | from anthropic.types.beta import BetaMessage, BetaMessageParam 5 | 6 | from computer_use_demo.loop import APIProvider, sampling_loop 7 | 8 | 9 | async def test_loop(): 10 | client = mock.Mock() 11 | client.beta.messages.with_raw_response.create.return_value = mock.Mock() 12 | client.beta.messages.with_raw_response.create.return_value.parse.side_effect = [ 13 | mock.Mock( 14 | spec=BetaMessage, 15 | content=[ 16 | TextBlock(type="text", text="Hello"), 17 | ToolUseBlock( 18 | type="tool_use", id="1", name="computer", input={"action": "test"} 19 | ), 20 | ], 21 | ), 22 | mock.Mock(spec=BetaMessage, content=[TextBlock(type="text", text="Done!")]), 23 | ] 24 | 25 | tool_collection = mock.AsyncMock() 26 | tool_collection.run.return_value = mock.Mock( 27 | output="Tool output", error=None, base64_image=None 28 | ) 29 | 30 | output_callback = mock.Mock() 31 | tool_output_callback = mock.Mock() 32 | api_response_callback = mock.Mock() 33 | 34 | with mock.patch( 35 | "computer_use_demo.loop.Anthropic", return_value=client 36 | ), mock.patch( 37 | "computer_use_demo.loop.ToolCollection", return_value=tool_collection 38 | ): 39 | messages: list[BetaMessageParam] = [{"role": "user", "content": "Test message"}] 40 | result = await sampling_loop( 41 | model="test-model", 42 | provider=APIProvider.ANTHROPIC, 43 | system_prompt_suffix="", 44 | messages=messages, 45 | output_callback=output_callback, 46 | tool_output_callback=tool_output_callback, 47 | api_response_callback=api_response_callback, 48 | api_key="test-key", 49 | ) 50 | 51 | assert len(result) == 4 52 | assert result[0] == {"role": "user", "content": "Test message"} 53 | assert result[1]["role"] == "assistant" 54 | assert result[2]["role"] == "user" 55 | assert result[3]["role"] == "assistant" 56 | 57 | assert client.beta.messages.with_raw_response.create.call_count == 2 58 | tool_collection.run.assert_called_once_with( 59 | name="computer", tool_input={"action": "test"} 60 | ) 61 | output_callback.assert_called_with(TextBlock(text="Done!", type="text")) 62 | assert output_callback.call_count == 3 63 | assert tool_output_callback.call_count == 1 64 | assert api_response_callback.call_count == 2 65 | -------------------------------------------------------------------------------- /computer-use-demo/tests/streamlit_test.py: -------------------------------------------------------------------------------- 1 | from unittest import mock 2 | 3 | import pytest 4 | from streamlit.testing.v1 import AppTest 5 | 6 | from computer_use_demo.streamlit import Sender, TextBlock 7 | 8 | 9 | @pytest.fixture 10 | def streamlit_app(): 11 | return AppTest.from_file("computer_use_demo/streamlit.py") 12 | 13 | 14 | def test_streamlit(streamlit_app: AppTest): 15 | streamlit_app.run() 16 | streamlit_app.text_input[1].set_value("sk-ant-0000000000000").run() 17 | with mock.patch("computer_use_demo.loop.sampling_loop") as patch: 18 | streamlit_app.chat_input[0].set_value("Hello").run() 19 | assert patch.called 20 | assert patch.call_args.kwargs["messages"] == [ 21 | {"role": Sender.USER, "content": [TextBlock(text="Hello", type="text")]} 22 | ] 23 | assert not streamlit_app.exception 24 | -------------------------------------------------------------------------------- /computer-use-demo/tests/tools/bash_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from computer_use_demo.tools.bash import BashTool, ToolError 4 | 5 | 6 | @pytest.fixture 7 | def bash_tool(): 8 | return BashTool() 9 | 10 | 11 | @pytest.mark.asyncio 12 | async def test_bash_tool_restart(bash_tool): 13 | result = await bash_tool(restart=True) 14 | assert result.system == "tool has been restarted." 15 | 16 | # Verify the tool can be used after restart 17 | result = await bash_tool(command="echo 'Hello after restart'") 18 | assert "Hello after restart" in result.output 19 | 20 | 21 | @pytest.mark.asyncio 22 | async def test_bash_tool_run_command(bash_tool): 23 | result = await bash_tool(command="echo 'Hello, World!'") 24 | assert result.output.strip() == "Hello, World!" 25 | assert result.error == "" 26 | 27 | 28 | @pytest.mark.asyncio 29 | async def test_bash_tool_no_command(bash_tool): 30 | with pytest.raises(ToolError, match="no command provided."): 31 | await bash_tool() 32 | 33 | 34 | @pytest.mark.asyncio 35 | async def test_bash_tool_session_creation(bash_tool): 36 | result = await bash_tool(command="echo 'Session created'") 37 | assert bash_tool._session is not None 38 | assert "Session created" in result.output 39 | 40 | 41 | @pytest.mark.asyncio 42 | async def test_bash_tool_session_reuse(bash_tool): 43 | result1 = await bash_tool(command="echo 'First command'") 44 | result2 = await bash_tool(command="echo 'Second command'") 45 | 46 | assert "First command" in result1.output 47 | assert "Second command" in result2.output 48 | 49 | 50 | @pytest.mark.asyncio 51 | async def test_bash_tool_session_error(bash_tool): 52 | result = await bash_tool(command="invalid_command_that_does_not_exist") 53 | assert "command not found" in result.error 54 | 55 | 56 | @pytest.mark.asyncio 57 | async def test_bash_tool_non_zero_exit(bash_tool): 58 | result = await bash_tool(command="bash -c 'exit 1'") 59 | assert result.error.strip() == "" 60 | assert result.output.strip() == "" 61 | 62 | 63 | @pytest.mark.asyncio 64 | async def test_bash_tool_timeout(bash_tool): 65 | await bash_tool(command="echo 'Hello, World!'") 66 | bash_tool._session._timeout = 0.1 # Set a very short timeout for testing 67 | with pytest.raises( 68 | ToolError, 69 | match="timed out: bash has not returned in 0.1 seconds and must be restarted", 70 | ): 71 | await bash_tool(command="sleep 1") 72 | -------------------------------------------------------------------------------- /computer-use-demo/tests/tools/computer_test.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import AsyncMock, patch 2 | 3 | import pytest 4 | 5 | from computer_use_demo.tools.computer import ( 6 | ComputerTool, 7 | ScalingSource, 8 | ToolError, 9 | ToolResult, 10 | ) 11 | 12 | 13 | @pytest.fixture 14 | def computer_tool(): 15 | return ComputerTool() 16 | 17 | 18 | @pytest.mark.asyncio 19 | async def test_computer_tool_mouse_move(computer_tool): 20 | with patch.object(computer_tool, "shell", new_callable=AsyncMock) as mock_shell: 21 | mock_shell.return_value = ToolResult(output="Mouse moved") 22 | result = await computer_tool(action="mouse_move", coordinate=[100, 200]) 23 | mock_shell.assert_called_once_with( 24 | f"{computer_tool.xdotool} mousemove --sync 100 200" 25 | ) 26 | assert result.output == "Mouse moved" 27 | 28 | 29 | @pytest.mark.asyncio 30 | async def test_computer_tool_type(computer_tool): 31 | with ( 32 | patch.object(computer_tool, "shell", new_callable=AsyncMock) as mock_shell, 33 | patch.object( 34 | computer_tool, "screenshot", new_callable=AsyncMock 35 | ) as mock_screenshot, 36 | ): 37 | mock_shell.return_value = ToolResult(output="Text typed") 38 | mock_screenshot.return_value = ToolResult(base64_image="base64_screenshot") 39 | result = await computer_tool(action="type", text="Hello, World!") 40 | assert mock_shell.call_count == 1 41 | assert "type --delay 12 -- 'Hello, World!'" in mock_shell.call_args[0][0] 42 | assert result.output == "Text typed" 43 | assert result.base64_image == "base64_screenshot" 44 | 45 | 46 | @pytest.mark.asyncio 47 | async def test_computer_tool_screenshot(computer_tool): 48 | with patch.object( 49 | computer_tool, "screenshot", new_callable=AsyncMock 50 | ) as mock_screenshot: 51 | mock_screenshot.return_value = ToolResult(base64_image="base64_screenshot") 52 | result = await computer_tool(action="screenshot") 53 | mock_screenshot.assert_called_once() 54 | assert result.base64_image == "base64_screenshot" 55 | 56 | 57 | @pytest.mark.asyncio 58 | async def test_computer_tool_scaling(computer_tool): 59 | computer_tool._scaling_enabled = True 60 | computer_tool.width = 1920 61 | computer_tool.height = 1080 62 | 63 | # Test scaling from API to computer 64 | x, y = computer_tool.scale_coordinates(ScalingSource.API, 1366, 768) 65 | assert x == 1920 66 | assert y == 1080 67 | 68 | # Test scaling from computer to API 69 | x, y = computer_tool.scale_coordinates(ScalingSource.COMPUTER, 1920, 1080) 70 | assert x == 1366 71 | assert y == 768 72 | 73 | # Test no scaling when disabled 74 | computer_tool._scaling_enabled = False 75 | x, y = computer_tool.scale_coordinates(ScalingSource.API, 1366, 768) 76 | assert x == 1366 77 | assert y == 768 78 | 79 | 80 | @pytest.mark.asyncio 81 | async def test_computer_tool_scaling_with_different_aspect_ratio(computer_tool): 82 | computer_tool._scaling_enabled = True 83 | computer_tool.width = 1920 84 | computer_tool.height = 1200 # 16:10 aspect ratio 85 | 86 | # Test scaling from API to computer 87 | x, y = computer_tool.scale_coordinates(ScalingSource.API, 1280, 800) 88 | assert x == 1920 89 | assert y == 1200 90 | 91 | # Test scaling from computer to API 92 | x, y = computer_tool.scale_coordinates(ScalingSource.COMPUTER, 1920, 1200) 93 | assert x == 1280 94 | assert y == 800 95 | 96 | 97 | @pytest.mark.asyncio 98 | async def test_computer_tool_no_scaling_for_unsupported_resolution(computer_tool): 99 | computer_tool._scaling_enabled = True 100 | computer_tool.width = 4096 101 | computer_tool.height = 2160 102 | 103 | # Test no scaling for unsupported resolution 104 | x, y = computer_tool.scale_coordinates(ScalingSource.API, 4096, 2160) 105 | assert x == 4096 106 | assert y == 2160 107 | 108 | x, y = computer_tool.scale_coordinates(ScalingSource.COMPUTER, 4096, 2160) 109 | assert x == 4096 110 | assert y == 2160 111 | 112 | 113 | @pytest.mark.asyncio 114 | async def test_computer_tool_scaling_out_of_bounds(computer_tool): 115 | computer_tool._scaling_enabled = True 116 | computer_tool.width = 1920 117 | computer_tool.height = 1080 118 | 119 | # Test scaling from API with out of bounds coordinates 120 | with pytest.raises(ToolError, match="Coordinates .*, .* are out of bounds"): 121 | x, y = computer_tool.scale_coordinates(ScalingSource.API, 2000, 1500) 122 | 123 | 124 | @pytest.mark.asyncio 125 | async def test_computer_tool_invalid_action(computer_tool): 126 | with pytest.raises(ToolError, match="Invalid action: invalid_action"): 127 | await computer_tool(action="invalid_action") 128 | 129 | 130 | @pytest.mark.asyncio 131 | async def test_computer_tool_missing_coordinate(computer_tool): 132 | with pytest.raises(ToolError, match="coordinate is required for mouse_move"): 133 | await computer_tool(action="mouse_move") 134 | 135 | 136 | @pytest.mark.asyncio 137 | async def test_computer_tool_missing_text(computer_tool): 138 | with pytest.raises(ToolError, match="text is required for type"): 139 | await computer_tool(action="type") 140 | -------------------------------------------------------------------------------- /computer-use-demo/tests/tools/edit_test.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from unittest.mock import patch 3 | 4 | import pytest 5 | 6 | from computer_use_demo.tools.base import CLIResult, ToolError, ToolResult 7 | from computer_use_demo.tools.edit import EditTool 8 | 9 | 10 | @pytest.mark.asyncio 11 | async def test_view_command(): 12 | edit_tool = EditTool() 13 | 14 | # Test viewing a file that exists 15 | with patch("pathlib.Path.exists", return_value=True), patch( 16 | "pathlib.Path.is_dir", return_value=False 17 | ), patch("pathlib.Path.read_text") as mock_read_text: 18 | mock_read_text.return_value = "File content" 19 | result = await edit_tool(command="view", path="/test/file.txt") 20 | assert isinstance(result, CLIResult) 21 | assert result.output 22 | assert "File content" in result.output 23 | 24 | # Test viewing a directory 25 | with patch("pathlib.Path.exists", return_value=True), patch( 26 | "pathlib.Path.is_dir", return_value=True 27 | ), patch("computer_use_demo.tools.edit.run") as mock_run: 28 | mock_run.return_value = (None, "file1.txt\nfile2.txt", None) 29 | result = await edit_tool(command="view", path="/test/dir") 30 | assert isinstance(result, CLIResult) 31 | assert result.output 32 | assert "file1.txt" in result.output 33 | assert "file2.txt" in result.output 34 | 35 | # Test viewing a file with a specific range 36 | with patch("pathlib.Path.exists", return_value=True), patch( 37 | "pathlib.Path.is_dir", return_value=False 38 | ), patch("pathlib.Path.read_text") as mock_read_text: 39 | mock_read_text.return_value = "Line 1\nLine 2\nLine 3\nLine 4" 40 | result = await edit_tool( 41 | command="view", path="/test/file.txt", view_range=[2, 3] 42 | ) 43 | assert isinstance(result, CLIResult) 44 | assert result.output 45 | assert "\n 2\tLine 2\n 3\tLine 3\n" in result.output 46 | 47 | # Test viewing a file with an invalid range 48 | with patch("pathlib.Path.exists", return_value=True), patch( 49 | "pathlib.Path.is_dir", return_value=False 50 | ), patch("pathlib.Path.read_text") as mock_read_text: 51 | mock_read_text.return_value = "Line 1\nLine 2\nLine 3\nLine 4" 52 | with pytest.raises(ToolError, match="Invalid `view_range`"): 53 | await edit_tool(command="view", path="/test/file.txt", view_range=[3, 2]) 54 | 55 | # Test viewing a non-existent file 56 | with patch("pathlib.Path.exists", return_value=False): 57 | with pytest.raises(ToolError, match="does not exist"): 58 | await edit_tool(command="view", path="/nonexistent/file.txt") 59 | 60 | # Test viewing a directory with a view_range 61 | with patch("pathlib.Path.exists", return_value=True), patch( 62 | "pathlib.Path.is_dir", return_value=True 63 | ): 64 | with pytest.raises(ToolError, match="view_range` parameter is not allowed"): 65 | await edit_tool(command="view", path="/test/dir", view_range=[1, 2]) 66 | 67 | 68 | @pytest.mark.asyncio 69 | async def test_create_command(): 70 | edit_tool = EditTool() 71 | 72 | # Test creating a new file with content 73 | with patch("pathlib.Path.exists", return_value=False), patch( 74 | "pathlib.Path.write_text" 75 | ) as mock_write_text: 76 | result = await edit_tool( 77 | command="create", path="/test/newfile.txt", file_text="New file content" 78 | ) 79 | assert isinstance(result, ToolResult) 80 | assert result.output 81 | assert "File created successfully" in result.output 82 | mock_write_text.assert_called_once_with("New file content") 83 | 84 | # Test attempting to create a file without content 85 | with patch("pathlib.Path.exists", return_value=False): 86 | with pytest.raises(ToolError, match="Parameter `file_text` is required"): 87 | await edit_tool(command="create", path="/test/newfile.txt") 88 | 89 | # Test attempting to create a file that already exists 90 | with patch("pathlib.Path.exists", return_value=True): 91 | with pytest.raises(ToolError, match="File already exists"): 92 | await edit_tool( 93 | command="create", path="/test/existingfile.txt", file_text="Content" 94 | ) 95 | 96 | 97 | @pytest.mark.asyncio 98 | async def test_str_replace_command(): 99 | edit_tool = EditTool() 100 | 101 | # Test replacing a unique string in a file 102 | with patch("pathlib.Path.exists", return_value=True), patch( 103 | "pathlib.Path.is_dir", return_value=False 104 | ), patch("pathlib.Path.read_text") as mock_read_text, patch( 105 | "pathlib.Path.write_text" 106 | ) as mock_write_text: 107 | mock_read_text.return_value = "Original content" 108 | result = await edit_tool( 109 | command="str_replace", 110 | path="/test/file.txt", 111 | old_str="Original", 112 | new_str="New", 113 | ) 114 | assert isinstance(result, CLIResult) 115 | assert result.output 116 | assert "has been edited" in result.output 117 | mock_write_text.assert_called_once_with("New content") 118 | 119 | # Test attempting to replace a non-existent string 120 | with patch("pathlib.Path.exists", return_value=True), patch( 121 | "pathlib.Path.is_dir", return_value=False 122 | ), patch("pathlib.Path.read_text") as mock_read_text: 123 | mock_read_text.return_value = "Original content" 124 | with pytest.raises(ToolError, match="did not appear verbatim"): 125 | await edit_tool( 126 | command="str_replace", 127 | path="/test/file.txt", 128 | old_str="Nonexistent", 129 | new_str="New", 130 | ) 131 | 132 | # Test attempting to replace a string that appears multiple times 133 | with patch("pathlib.Path.exists", return_value=True), patch( 134 | "pathlib.Path.is_dir", return_value=False 135 | ), patch("pathlib.Path.read_text") as mock_read_text: 136 | mock_read_text.return_value = "Test test test" 137 | with pytest.raises(ToolError, match="Multiple occurrences"): 138 | await edit_tool( 139 | command="str_replace", 140 | path="/test/file.txt", 141 | old_str="test", 142 | new_str="example", 143 | ) 144 | 145 | edit_tool._file_history.clear() 146 | # Verify that the file history is updated after replacement 147 | with patch("pathlib.Path.exists", return_value=True), patch( 148 | "pathlib.Path.is_dir", return_value=False 149 | ), patch("pathlib.Path.read_text") as mock_read_text, patch( 150 | "pathlib.Path.write_text" 151 | ): 152 | mock_read_text.return_value = "Original content" 153 | await edit_tool( 154 | command="str_replace", 155 | path="/test/file.txt", 156 | old_str="Original", 157 | new_str="New", 158 | ) 159 | assert edit_tool._file_history[Path("/test/file.txt")] == ["Original content"] 160 | 161 | 162 | @pytest.mark.asyncio 163 | async def test_insert_command(): 164 | edit_tool = EditTool() 165 | 166 | # Test inserting a string at a valid line number 167 | with patch("pathlib.Path.exists", return_value=True), patch( 168 | "pathlib.Path.is_dir", return_value=False 169 | ), patch("pathlib.Path.read_text") as mock_read_text, patch( 170 | "pathlib.Path.write_text" 171 | ) as mock_write_text: 172 | mock_read_text.return_value = "Line 1\nLine 2\nLine 3" 173 | result = await edit_tool( 174 | command="insert", path="/test/file.txt", insert_line=2, new_str="New Line" 175 | ) 176 | assert isinstance(result, CLIResult) 177 | assert result.output 178 | assert "has been edited" in result.output 179 | mock_write_text.assert_called_once_with("Line 1\nLine 2\nNew Line\nLine 3") 180 | 181 | # Test inserting a string at the beginning of the file (line 0) 182 | with patch("pathlib.Path.exists", return_value=True), patch( 183 | "pathlib.Path.is_dir", return_value=False 184 | ), patch("pathlib.Path.read_text") as mock_read_text, patch( 185 | "pathlib.Path.write_text" 186 | ) as mock_write_text: 187 | mock_read_text.return_value = "Line 1\nLine 2" 188 | result = await edit_tool( 189 | command="insert", 190 | path="/test/file.txt", 191 | insert_line=0, 192 | new_str="New First Line", 193 | ) 194 | assert isinstance(result, CLIResult) 195 | assert result.output 196 | assert "has been edited" in result.output 197 | mock_write_text.assert_called_once_with("New First Line\nLine 1\nLine 2") 198 | 199 | # Test inserting a string at the end of the file 200 | with patch("pathlib.Path.exists", return_value=True), patch( 201 | "pathlib.Path.is_dir", return_value=False 202 | ), patch("pathlib.Path.read_text") as mock_read_text, patch( 203 | "pathlib.Path.write_text" 204 | ) as mock_write_text: 205 | mock_read_text.return_value = "Line 1\nLine 2" 206 | result = await edit_tool( 207 | command="insert", 208 | path="/test/file.txt", 209 | insert_line=2, 210 | new_str="New Last Line", 211 | ) 212 | assert isinstance(result, CLIResult) 213 | assert result.output 214 | assert "has been edited" in result.output 215 | mock_write_text.assert_called_once_with("Line 1\nLine 2\nNew Last Line") 216 | 217 | # Test attempting to insert at an invalid line number 218 | with patch("pathlib.Path.exists", return_value=True), patch( 219 | "pathlib.Path.is_dir", return_value=False 220 | ), patch("pathlib.Path.read_text") as mock_read_text: 221 | mock_read_text.return_value = "Line 1\nLine 2" 222 | with pytest.raises(ToolError, match="Invalid `insert_line` parameter"): 223 | await edit_tool( 224 | command="insert", 225 | path="/test/file.txt", 226 | insert_line=5, 227 | new_str="Invalid Line", 228 | ) 229 | 230 | # Verify that the file history is updated after insertion 231 | edit_tool._file_history.clear() 232 | with patch("pathlib.Path.exists", return_value=True), patch( 233 | "pathlib.Path.is_dir", return_value=False 234 | ), patch("pathlib.Path.read_text") as mock_read_text, patch( 235 | "pathlib.Path.write_text" 236 | ): 237 | mock_read_text.return_value = "Original content" 238 | await edit_tool( 239 | command="insert", path="/test/file.txt", insert_line=1, new_str="New Line" 240 | ) 241 | assert edit_tool._file_history[Path("/test/file.txt")] == ["Original content"] 242 | 243 | 244 | @pytest.mark.asyncio 245 | async def test_undo_edit_command(): 246 | edit_tool = EditTool() 247 | 248 | # Test undoing a str_replace operation 249 | with patch("pathlib.Path.exists", return_value=True), patch( 250 | "pathlib.Path.is_dir", return_value=False 251 | ), patch("pathlib.Path.read_text") as mock_read_text, patch( 252 | "pathlib.Path.write_text" 253 | ) as mock_write_text: 254 | mock_read_text.return_value = "Original content" 255 | await edit_tool( 256 | command="str_replace", 257 | path="/test/file.txt", 258 | old_str="Original", 259 | new_str="New", 260 | ) 261 | mock_read_text.return_value = "New content" 262 | result = await edit_tool(command="undo_edit", path="/test/file.txt") 263 | assert isinstance(result, CLIResult) 264 | assert result.output 265 | assert "Last edit to /test/file.txt undone successfully" in result.output 266 | mock_write_text.assert_called_with("Original content") 267 | 268 | # Test undoing an insert operation 269 | edit_tool._file_history.clear() 270 | with patch("pathlib.Path.exists", return_value=True), patch( 271 | "pathlib.Path.is_dir", return_value=False 272 | ), patch("pathlib.Path.read_text") as mock_read_text, patch( 273 | "pathlib.Path.write_text" 274 | ) as mock_write_text: 275 | mock_read_text.return_value = "Line 1\nLine 2" 276 | await edit_tool( 277 | command="insert", path="/test/file.txt", insert_line=1, new_str="New Line" 278 | ) 279 | mock_read_text.return_value = "Line 1\nNew Line\nLine 2" 280 | result = await edit_tool(command="undo_edit", path="/test/file.txt") 281 | assert isinstance(result, CLIResult) 282 | assert result.output 283 | assert "Last edit to /test/file.txt undone successfully" in result.output 284 | mock_write_text.assert_called_with("Line 1\nLine 2") 285 | 286 | # Test attempting to undo when there's no history 287 | edit_tool._file_history.clear() 288 | with patch("pathlib.Path.exists", return_value=True), patch( 289 | "pathlib.Path.is_dir", return_value=False 290 | ): 291 | with pytest.raises(ToolError, match="No edit history found"): 292 | await edit_tool(command="undo_edit", path="/test/file.txt") 293 | 294 | 295 | @pytest.mark.asyncio 296 | async def test_validate_path(): 297 | edit_tool = EditTool() 298 | 299 | # Test with valid absolute paths 300 | with patch("pathlib.Path.exists", return_value=True), patch( 301 | "pathlib.Path.is_dir", return_value=False 302 | ): 303 | edit_tool.validate_path("view", Path("/valid/path.txt")) 304 | 305 | # Test with relative paths (should raise an error) 306 | with pytest.raises(ToolError, match="not an absolute path"): 307 | edit_tool.validate_path("view", Path("relative/path.txt")) 308 | 309 | # Test with non-existent paths for non-create commands (should raise an error) 310 | with patch("pathlib.Path.exists", return_value=False): 311 | with pytest.raises(ToolError, match="does not exist"): 312 | edit_tool.validate_path("view", Path("/nonexistent/file.txt")) 313 | 314 | # Test with existing paths for create command (should raise an error) 315 | with patch("pathlib.Path.exists", return_value=True): 316 | with pytest.raises(ToolError, match="File already exists"): 317 | edit_tool.validate_path("create", Path("/existing/file.txt")) 318 | 319 | # Test with directory paths for non-view commands (should raise an error) 320 | with patch("pathlib.Path.exists", return_value=True), patch( 321 | "pathlib.Path.is_dir", return_value=True 322 | ): 323 | with pytest.raises(ToolError, match="is a directory"): 324 | edit_tool.validate_path("str_replace", Path("/directory/path")) 325 | 326 | # Test with directory path for view command (should not raise an error) 327 | with patch("pathlib.Path.exists", return_value=True), patch( 328 | "pathlib.Path.is_dir", return_value=True 329 | ): 330 | edit_tool.validate_path("view", Path("/directory/path")) 331 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.pyright] 2 | venvPath = "computer-use-demo" 3 | venv = ".venv" 4 | useLibraryCodeForTypes = false --------------------------------------------------------------------------------