├── .github
    └── workflows
    │   ├── build.yaml
    │   ├── reusable_build_step.yaml
    │   └── tests.yaml
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── anthropic-browserbase.png
├── computer-use-demo
    ├── .gitignore
    ├── Dockerfile
    ├── LICENSE
    ├── computer_use_demo
    │   ├── .env.template
    │   ├── __init__.py
    │   ├── loop.py
    │   ├── requirements.txt
    │   ├── streamlit.py
    │   └── tools
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── bash.py
    │   │   ├── browserbase.py
    │   │   ├── collection.py
    │   │   ├── computer.py
    │   │   ├── edit.py
    │   │   └── run.py
    ├── dev-requirements.txt
    ├── image
    │   ├── .config
    │   │   └── tint2
    │   │   │   ├── applications
    │   │   │       ├── firefox-custom.desktop
    │   │   │       ├── gedit.desktop
    │   │   │       └── terminal.desktop
    │   │   │   └── tint2rc
    │   ├── .streamlit
    │   │   └── config.toml
    │   ├── entrypoint.sh
    │   ├── http_server.py
    │   ├── index.html
    │   ├── mutter_startup.sh
    │   ├── novnc_startup.sh
    │   ├── open_debugger.sh
    │   ├── start_all.sh
    │   ├── static_content
    │   │   └── index.html
    │   ├── tint2_startup.sh
    │   ├── x11vnc_startup.sh
    │   └── xvfb_startup.sh
    ├── pyproject.toml
    ├── ruff.toml
    ├── setup.sh
    └── tests
    │   ├── conftest.py
    │   ├── loop_test.py
    │   ├── streamlit_test.py
    │   └── tools
    │       ├── bash_test.py
    │       ├── computer_test.py
    │       └── edit_test.py
└── pyproject.toml


/.github/workflows/build.yaml:
--------------------------------------------------------------------------------
 1 | env:
 2 |   REGISTRY: ghcr.io/anthropics/anthropic-quickstarts
 3 | name: build
 4 | on:
 5 |   pull_request:
 6 |     paths:
 7 |       - .github/**
 8 |       - computer-use-demo/**
 9 |   push:
10 |     branches:
11 |       - main
12 |     paths:
13 |       - .github/**
14 |       - computer-use-demo/**
15 | jobs:
16 |   build-amd64:
17 |     uses: ./.github/workflows/reusable_build_step.yaml
18 |     with:
19 |       platform: amd64
20 |       builder: ubuntu-latest-16core
21 |       registry: ghcr.io/anthropics/anthropic-quickstarts
22 |       tag_prefix: computer-use-demo-
23 |       context: computer-use-demo
24 |     permissions:
25 |       contents: read
26 |       packages: write
27 |   build-arm64:
28 |     uses: ./.github/workflows/reusable_build_step.yaml
29 |     with:
30 |       platform: arm64
31 |       builder: ubuntu-22.04-arm64-16core
32 |       registry: ghcr.io/anthropics/anthropic-quickstarts
33 |       tag_prefix: computer-use-demo-
34 |       context: computer-use-demo
35 |     permissions:
36 |       contents: read
37 |       packages: write
38 |   merge:
39 |     runs-on: ubuntu-latest
40 |     needs:
41 |       - build-arm64
42 |       - build-amd64
43 |     permissions:
44 |       contents: read
45 |       packages: write
46 |     steps:
47 |       - uses: actions/checkout@v4
48 |       - name: Login to ghcr.io
49 |         uses: docker/login-action@v3
50 |         with:
51 |           registry: ghcr.io
52 |           username: ${{github.actor}}
53 |           password: ${{secrets.GITHUB_TOKEN}}
54 |       - name: Set up Docker Buildx
55 |         uses: docker/setup-buildx-action@v3
56 |       - name: Set image tag
57 |         run: |
58 |           echo "SHORT_SHA=$(git rev-parse --short ${{ github.sha }})" >> "$GITHUB_ENV"
59 |       - name: Create SHA manifest and push
60 |         run: |
61 |           docker buildx imagetools create -t \
62 |             ${REGISTRY}:computer-use-demo-${SHORT_SHA} \
63 |             ${REGISTRY}:computer-use-demo-${SHORT_SHA}-amd64 \
64 |             ${REGISTRY}:computer-use-demo-${SHORT_SHA}-arm64
65 | 
66 |       - name: Create latest manifest and push
67 |         if: github.event_name == 'push' && github.ref == 'refs/heads/main'
68 |         run: |
69 |           docker buildx imagetools create -t \
70 |             ${REGISTRY}:computer-use-demo-latest \
71 |             ${REGISTRY}:computer-use-demo-latest-amd64 \
72 |             ${REGISTRY}:computer-use-demo-latest-arm64
73 | 


--------------------------------------------------------------------------------
/.github/workflows/reusable_build_step.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   workflow_call:
 3 |     inputs:
 4 |       platform:
 5 |         required: true
 6 |         type: string
 7 |       builder:
 8 |         required: true
 9 |         type: string
10 |       registry:
11 |         required: true
12 |         type: string
13 |       tag_prefix:
14 |         required: false
15 |         type: string
16 |       context:
17 |         required: false
18 |         type: string
19 | jobs:
20 |   build:
21 |     runs-on: ${{ inputs.builder }}
22 |     permissions:
23 |       contents: read
24 |       packages: write
25 |     steps:
26 |       - uses: actions/checkout@v4
27 |       - name: Login to ghcr.io
28 |         uses: docker/login-action@v3
29 |         with:
30 |           registry: ghcr.io
31 |           username: ${{github.actor}}
32 |           password: ${{secrets.GITHUB_TOKEN}}
33 |       - name: Set up Docker Buildx
34 |         uses: docker/setup-buildx-action@v3
35 |       - name: Set image tag
36 |         run: |
37 |           short_sha=$(git rev-parse --short ${{ github.sha }})
38 |           echo "TAG=${{ inputs.registry }}:${{ inputs.tag_prefix }}${short_sha}" >> "$GITHUB_ENV"
39 |       - name: Build Docker image
40 |         uses: docker/build-push-action@v5
41 |         with:
42 |           platforms: linux/${{ inputs.platform }}
43 |           context: ${{ inputs.context || '.' }}
44 |           push: false
45 |           tags: ${{ env.TAG }}
46 |           cache-from: type=gha,scope=computer-use-${{ inputs.platform }}
47 |           cache-to: type=gha,mode=max,scope=computer-use-${{ inputs.platform }}
48 |           load: true
49 |       - name: Run container
50 |         run: docker run -d -p 8051:8051 ${{ env.TAG }}
51 |       - name: Check streamlit
52 |         run: |
53 |           timeout=60
54 |           start_time=$(date +%s)
55 |           docker_id=$(docker ps --filter "ancestor=${{ env.TAG }}" --format "{{.ID}}")
56 |           echo "docker_id=$docker_id" >> "$GITHUB_ENV"
57 |           while true; do
58 |             current_time=$(date +%s)
59 |             elapsed=$((current_time - start_time))
60 |             if [ $elapsed -ge $timeout ]; then
61 |               echo "Timeout reached. Container did not respond within $timeout seconds."
62 |               exit 1
63 |             fi
64 |             response=$(docker exec $docker_id curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:8501 || echo "000")
65 |             if [ "$response" = "200" ]; then
66 |               echo "Container responded with 200 OK"
67 |               exit 0
68 |             fi
69 |           done
70 |       - name: Check VNC
71 |         run: docker exec $docker_id nc localhost 5900 -z
72 |       - name: Check noVNC
73 |         run: docker exec $docker_id curl -s -o /dev/null -w "%{http_code}" http://localhost:6080 | grep -q 200 || exit 1
74 |       - name: Check landing page
75 |         run: docker exec $docker_id curl -s -o /dev/null -w "%{http_code}" http://localhost:8080 | grep -q 200 || exit 1
76 |       - name: Determine push tags
77 |         run: |
78 |           if [ "${{ github.event_name }}" == "pull_request" ]; then
79 |             echo "PUSH_TAGS=${TAG}-${{ inputs.platform }}" >> "$GITHUB_ENV"
80 |           else
81 |             echo "PUSH_TAGS=${TAG}-${{ inputs.platform }},${{ inputs.registry }}:${{ inputs.tag_prefix }}latest-${{ inputs.platform }}" >> "$GITHUB_ENV"
82 |           fi
83 |       - name: Push Docker image
84 |         uses: docker/build-push-action@v5
85 |         with:
86 |           platforms: linux/${{ inputs.platform }}
87 |           context: ${{ inputs.context || '.' }}
88 |           push: true
89 |           tags: ${{ env.PUSH_TAGS }}
90 |           cache-from: type=gha,scope=computer-use-${{ inputs.platform }}
91 |           cache-to: type=gha,mode=max,scope=computer-use-${{ inputs.platform }}
92 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yaml:
--------------------------------------------------------------------------------
 1 | name: tests
 2 | on:
 3 |   pull_request:
 4 |     paths:
 5 |       - .github/**
 6 |       - computer-use-demo/**
 7 |   push:
 8 |     branches:
 9 |       - main
10 |     paths:
11 |       - .github/**
12 |       - computer-use-demo/**
13 | jobs:
14 |   ruff:
15 |     runs-on: ubuntu-latest
16 |     defaults:
17 |       run:
18 |         working-directory: computer-use-demo
19 |     steps:
20 |       - uses: actions/checkout@v4
21 |       - uses: astral-sh/ruff-action@v1
22 |   pyright:
23 |     runs-on: ubuntu-latest
24 |     defaults:
25 |       run:
26 |         working-directory: computer-use-demo
27 |     steps:
28 |       - uses: actions/checkout@v4
29 |       - uses: actions/setup-python@v5
30 |         with:
31 |           cache: "pip"
32 |           python-version: "3.11.6"
33 |       - run: |
34 |           python -m venv .venv
35 |           source .venv/bin/activate
36 |           pip install -r dev-requirements.txt
37 |       - run: echo "$PWD/.venv/bin" >> $GITHUB_PATH
38 |       - uses: jakebailey/pyright-action@v1
39 |   pytest:
40 |     runs-on: ubuntu-latest
41 |     defaults:
42 |       run:
43 |         working-directory: computer-use-demo
44 |     steps:
45 |       - uses: actions/checkout@v4
46 |       - uses: actions/setup-python@v5
47 |         with:
48 |           cache: "pip"
49 |           python-version: "3.11.6"
50 |       - run: |
51 |           python -m venv .venv
52 |           source .venv/bin/activate
53 |           pip install -r dev-requirements.txt
54 |       - run: echo "$PWD/.venv/bin" >> $GITHUB_PATH
55 |       - run: pytest tests --junitxml=junit/test-results.xml
56 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | files: ^computer-use-demo/
 2 | default_stages: [pre-commit, pre-push]
 3 | repos:
 4 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 5 |     rev: v2.3.0
 6 |     hooks:
 7 |       - id: check-yaml
 8 |       - id: end-of-file-fixer
 9 |       - id: trailing-whitespace
10 |   - repo: https://github.com/astral-sh/ruff-pre-commit
11 |     rev: v0.6.7
12 |     hooks:
13 |       - id: ruff
14 |         name: Run `ruff` to autofix lint errors
15 |         args: [--fix-only]
16 |       - id: ruff
17 |         name: Run `ruff` to format code
18 |         entry: ruff format --force-exclude
19 |       - id: ruff
20 |         name: Run `ruff` to lint code
21 |   - repo: https://github.com/RobertCraigie/pyright-python
22 |     rev: v1.1.384
23 |     hooks:
24 |       - id: pyright
25 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Anthropic
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Anthropic Computer Use <> Browserbase Demo
  2 | 
  3 | <div style="text-align: center;">
  4 |   <img src="anthropic-browserbase.png" alt="Anthropic Computer Use <> Browserbase Demo"">
  5 | </div>
  6 | 
  7 | > [!CAUTION]
  8 | > Computer use is a beta feature. Please be aware that computer use poses unique risks that are distinct from standard API features or chat interfaces. These risks are heightened when using computer use to interact with the internet. To minimize risks, consider taking precautions such as:
  9 | >
 10 | > 1. Use a dedicated virtual machine or container with minimal privileges to prevent direct system attacks or accidents.
 11 | > 2. Avoid giving the model access to sensitive data, such as account login information, to prevent information theft.
 12 | > 3. Limit internet access to an allowlist of domains to reduce exposure to malicious content.
 13 | > 4. Ask a human to confirm decisions that may result in meaningful real-world consequences as well as any tasks requiring affirmative consent, such as accepting cookies, executing financial transactions, or agreeing to terms of service.
 14 | >
 15 | > In some circumstances, Claude will follow commands found in content even if it conflicts with the user's instructions. For example, instructions on webpages or contained in images may override user instructions or cause Claude to make mistakes. We suggest taking precautions to isolate Claude from sensitive data and actions to avoid risks related to prompt injection.
 16 | >
 17 | > Finally, please inform end users of relevant risks and obtain their consent prior to enabling computer use in your own products.
 18 | 
 19 | This repository helps you get started with computer use on Claude, with reference implementations of:
 20 | 
 21 | * Build files to create a Docker container with all necessary dependencies
 22 | * A computer use agent loop using the Anthropic API to access the updated Claude 3.5 Sonnet model
 23 | * Anthropic-defined computer use tools
 24 | * A streamlit app for interacting with the agent loop
 25 | 
 26 | > [!IMPORTANT]
 27 | > The Beta API used in this reference implementation is subject to change. Please refer to the [API release notes](https://docs.anthropic.com/en/release-notes/api) and [API reference](https://docs.browserbase.com/changelog) for the most up-to-date information.
 28 | 
 29 | > [!IMPORTANT]
 30 | > The components are weakly separated: the agent loop runs in the container being controlled by Claude, can only be used by one session at a time, and must be restarted or reset between sessions if necessary.
 31 | 
 32 | ## Quickstart: running the Docker container
 33 | 
 34 | ### Anthropic API
 35 | 
 36 | > [!TIP]
 37 | > You can find your API key in the [Anthropic Console](https://console.anthropic.com/).
 38 | 
 39 | ### Browserbase API
 40 | 
 41 | > [!TIP]
 42 | > You can find your API key and project ID in the [Browserbase Settings](https://www.browserbase.com/settings).
 43 | 
 44 | ### Instructions for building the docker image:
 45 | 
 46 | Go to the `computer-use-demo` directory:
 47 | 
 48 | ```bash
 49 | cd computer-use-demo
 50 | ```
 51 | 
 52 | Add your Browserbase API and Project ID to the `.env` file or in `main()` in `browserbase.py`:
 53 | 
 54 | ```bash
 55 | docker build -t my-computer-use-demo .
 56 | ```
 57 | 
 58 | Run the container with your Anthropic API key:
 59 | 
 60 | ```bash
 61 | export ANTHROPIC_API_KEY=%your_api_key%
 62 | docker run \
 63 |     -e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY \
 64 |     -v $HOME/.anthropic:/home/computeruse/.anthropic \
 65 |     -p 5900:5900 \
 66 |     -p 8501:8501 \
 67 |     -p 6080:6080 \
 68 |     -p 8080:8080 \
 69 |     -it my-computer-use-demo
 70 | ```
 71 | 
 72 | ### Accessing the demo app
 73 | 
 74 | Once the container is running, open your browser to [http://localhost:8080](http://localhost:8080) to access the combined interface that includes both the agent chat and desktop view.
 75 | 
 76 | The container stores settings like the API key and custom system prompt in `~/.anthropic/`. Mount this directory to persist these settings between container runs.
 77 | 
 78 | Alternative access points:
 79 | 
 80 | - Streamlit interface only: [http://localhost:8501](http://localhost:8501)
 81 | - Desktop view only: [http://localhost:6080/vnc.html](http://localhost:6080/vnc.html)
 82 | - Direct VNC connection: `vnc://localhost:5900` (for VNC clients)
 83 | 
 84 | ## Screen size
 85 | 
 86 | Environment variables `WIDTH` and `HEIGHT` can be used to set the screen size. For example:
 87 | 
 88 | ```bash
 89 | docker run \
 90 |     -e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY \
 91 |     -v $HOME/.anthropic:/home/computeruse/.anthropic \
 92 |     -p 5900:5900 \
 93 |     -p 8501:8501 \
 94 |     -p 6080:6080 \
 95 |     -p 8080:8080 \
 96 |     -e WIDTH=1920 \
 97 |     -e HEIGHT=1080 \
 98 |     -it my-computer-use-demo
 99 | ```
100 | 
101 | We do not recommend sending screenshots in resolutions above [XGA/WXGA](https://en.wikipedia.org/wiki/Display_resolution_standards#XGA) to avoid issues related to [image resizing](https://docs.anthropic.com/en/docs/build-with-claude/vision#evaluate-image-size).
102 | 
103 | Relying on the image resizing behavior in the API will result in lower model accuracy and slower performance than implementing scaling in your tools directly. The `computer` tool implementation in this project demonstrates how to scale both images and coordinates from higher resolutions to the suggested resolutions.
104 | 
105 | ## Contributing
106 | 
107 | We welcome contributions to the Anthropic Computer Use <> Browserbase Demo repository! If you have ideas for new quickstart projects or improvements to existing ones, please open an issue or submit a pull request.
108 | 
109 | ## Community and Support
110 | 
111 | - Email us [Browserbase Support](mailto:support@browserbase.com) for discussions and support
112 | - Check out the [Browserbase documentation](https://docs.browserbase.com) for additional help
113 | 
114 | ## License
115 | 
116 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
117 | 


--------------------------------------------------------------------------------
/anthropic-browserbase.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browserbase/browserbase-computer-use/8fc3e6686ac8853c1d2fd1d2145d0ab7853a583f/anthropic-browserbase.png


--------------------------------------------------------------------------------
/computer-use-demo/.gitignore:
--------------------------------------------------------------------------------
1 | .venv
2 | .ruff_cache
3 | __pycache__
4 | .pytest_cache
5 | .env
6 | env
7 | 


--------------------------------------------------------------------------------
/computer-use-demo/Dockerfile:
--------------------------------------------------------------------------------
  1 | FROM docker.io/ubuntu:22.04
  2 | 
  3 | ENV DEBIAN_FRONTEND=noninteractive
  4 | ENV DEBIAN_PRIORITY=high
  5 | 
  6 | RUN apt-get update && \
  7 |     apt-get -y upgrade && \
  8 |     apt-get -y install \
  9 |     build-essential \
 10 |     # UI Requirements
 11 |     xvfb \
 12 |     xterm \
 13 |     xdotool \
 14 |     scrot \
 15 |     imagemagick \
 16 |     sudo \
 17 |     mutter \
 18 |     x11vnc \
 19 |     # add w3m for debugging
 20 |     w3m \
 21 |     # Python/pyenv reqs
 22 |     build-essential \
 23 |     libssl-dev  \
 24 |     zlib1g-dev \
 25 |     libbz2-dev \
 26 |     libreadline-dev \
 27 |     libsqlite3-dev \
 28 |     curl \
 29 |     git \
 30 |     libncursesw5-dev \
 31 |     xz-utils \
 32 |     tk-dev \
 33 |     libxml2-dev \
 34 |     libxmlsec1-dev \
 35 |     libffi-dev \
 36 |     liblzma-dev \
 37 |     # Network tools
 38 |     net-tools \
 39 |     netcat \
 40 |     # PPA req
 41 |     software-properties-common && \
 42 |     # Userland apps
 43 |     sudo add-apt-repository ppa:mozillateam/ppa && \
 44 |     sudo apt-get install -y --no-install-recommends \
 45 |     libreoffice \
 46 |     firefox-esr \
 47 |     x11-apps \
 48 |     xpdf \
 49 |     gedit \
 50 |     xpaint \
 51 |     tint2 \
 52 |     galculator \
 53 |     pcmanfm \
 54 |     unzip && \
 55 |     apt-get clean
 56 | 
 57 | # Install noVNC
 58 | RUN git clone --branch v1.5.0 https://github.com/novnc/noVNC.git /opt/noVNC && \
 59 |     git clone --branch v0.12.0 https://github.com/novnc/websockify /opt/noVNC/utils/websockify && \
 60 |     ln -s /opt/noVNC/vnc.html /opt/noVNC/index.html
 61 | 
 62 | # setup user
 63 | ENV USERNAME=computeruse
 64 | ENV HOME=/home/$USERNAME
 65 | RUN useradd -m -s /bin/bash -d $HOME $USERNAME
 66 | RUN echo "${USERNAME} ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
 67 | USER computeruse
 68 | WORKDIR $HOME
 69 | 
 70 | # setup python
 71 | RUN git clone https://github.com/pyenv/pyenv.git ~/.pyenv && \
 72 |     cd ~/.pyenv && src/configure && make -C src && cd .. && \
 73 |     echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.bashrc && \
 74 |     echo 'command -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.bashrc && \
 75 |     echo 'eval "$(pyenv init -)"' >> ~/.bashrc
 76 | ENV PYENV_ROOT="$HOME/.pyenv"
 77 | ENV PATH="$PYENV_ROOT/bin:$PATH"
 78 | ENV PYENV_VERSION_MAJOR=3
 79 | ENV PYENV_VERSION_MINOR=11
 80 | ENV PYENV_VERSION_PATCH=6
 81 | ENV PYENV_VERSION=$PYENV_VERSION_MAJOR.$PYENV_VERSION_MINOR.$PYENV_VERSION_PATCH
 82 | RUN eval "$(pyenv init -)" && \
 83 |     pyenv install $PYENV_VERSION && \
 84 |     pyenv global $PYENV_VERSION && \
 85 |     pyenv rehash
 86 | 
 87 | ENV PATH="$HOME/.pyenv/shims:$HOME/.pyenv/bin:$PATH"
 88 | 
 89 | RUN python -m pip install --upgrade pip==23.1.2 setuptools==58.0.4 wheel==0.40.0 && \
 90 |     python -m pip config set global.disable-pip-version-check true
 91 | 
 92 | # only reinstall if requirements.txt changes
 93 | COPY --chown=$USERNAME:$USERNAME computer_use_demo/requirements.txt $HOME/computer_use_demo/requirements.txt
 94 | RUN python -m pip install -r $HOME/computer_use_demo/requirements.txt
 95 | 
 96 | # setup desktop env & app
 97 | COPY --chown=$USERNAME:$USERNAME image/ $HOME
 98 | COPY --chown=$USERNAME:$USERNAME computer_use_demo/ $HOME/computer_use_demo/
 99 | 
100 | ARG DISPLAY_NUM=1
101 | ARG HEIGHT=768
102 | ARG WIDTH=1024
103 | ENV DISPLAY_NUM=$DISPLAY_NUM
104 | ENV HEIGHT=$HEIGHT
105 | ENV WIDTH=$WIDTH
106 | 
107 | # This is the entrypoint script that starts all the services
108 | COPY --chown=$USERNAME:$USERNAME image/entrypoint.sh $HOME/entrypoint.sh
109 | RUN chmod +x $HOME/entrypoint.sh
110 | 
111 | # This is the Browserbase script that connects to Browserbase and opens the debugger URL
112 | COPY --chown=$USERNAME:$USERNAME computer_use_demo/tools/browserbase.py $HOME/computer_use_demo/tools/
113 | 
114 | # This is the script that opens the debugger URL
115 | COPY --chown=$USERNAME:$USERNAME image/open_debugger.sh $HOME/open_debugger.sh
116 | RUN chmod +x $HOME/open_debugger.sh
117 | 
118 | ENTRYPOINT [ "./entrypoint.sh" ]
119 | 


--------------------------------------------------------------------------------
/computer-use-demo/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2024 Anthropic, PBC.
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 


--------------------------------------------------------------------------------
/computer-use-demo/computer_use_demo/.env.template:
--------------------------------------------------------------------------------
1 | BROWSERBASE_PROJECT_ID=<your-browserbase-project-id>
2 | BROWSERBASE_API_KEY=<your-browserbase-api-key>


--------------------------------------------------------------------------------
/computer-use-demo/computer_use_demo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browserbase/browserbase-computer-use/8fc3e6686ac8853c1d2fd1d2145d0ab7853a583f/computer-use-demo/computer_use_demo/__init__.py


--------------------------------------------------------------------------------
/computer-use-demo/computer_use_demo/loop.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Agentic sampling loop that calls the Anthropic API and local implenmentation of anthropic-defined computer use tools.
  3 | """
  4 | 
  5 | import platform
  6 | from collections.abc import Callable
  7 | from datetime import datetime
  8 | from enum import StrEnum
  9 | from typing import Any, cast
 10 | 
 11 | from anthropic import Anthropic, AnthropicBedrock, AnthropicVertex, APIResponse
 12 | from anthropic.types import (
 13 |     ToolResultBlockParam,
 14 | )
 15 | from anthropic.types.beta import (
 16 |     BetaContentBlock,
 17 |     BetaContentBlockParam,
 18 |     BetaImageBlockParam,
 19 |     BetaMessage,
 20 |     BetaMessageParam,
 21 |     BetaTextBlockParam,
 22 |     BetaToolResultBlockParam,
 23 | )
 24 | 
 25 | from .tools import BashTool, ComputerTool, EditTool, ToolCollection, ToolResult
 26 | 
 27 | BETA_FLAG = "computer-use-2024-10-22"
 28 | 
 29 | 
 30 | class APIProvider(StrEnum):
 31 |     ANTHROPIC = "anthropic"
 32 |     BEDROCK = "bedrock"
 33 |     VERTEX = "vertex"
 34 | 
 35 | 
 36 | PROVIDER_TO_DEFAULT_MODEL_NAME: dict[APIProvider, str] = {
 37 |     APIProvider.ANTHROPIC: "claude-3-5-sonnet-20241022",
 38 |     APIProvider.BEDROCK: "anthropic.claude-3-5-sonnet-20241022-v2:0",
 39 |     APIProvider.VERTEX: "claude-3-5-sonnet-v2@20241022",
 40 | }
 41 | 
 42 | 
 43 | # This system prompt is optimized for the Docker environment in this repository and
 44 | # specific tool combinations enabled.
 45 | # We encourage modifying this system prompt to ensure the model has context for the
 46 | # environment it is running in, and to provide any additional information that may be
 47 | # helpful for the task at hand.
 48 | SYSTEM_PROMPT = f"""<SYSTEM_CAPABILITY>
 49 | * You are utilizing an Ubuntu virtual machine using {platform.machine()} architecture with internet access.
 50 | * You can feel free to install Ubuntu applications with your bash tool. Use curl instead of wget.
 51 | * Using bash tool you can start GUI applications, but you need to set export DISPLAY=:1 and use a 
 52 | subshell. For example "(DISPLAY=:1 xterm &)". GUI apps run with bash tool will appear within your 
 53 | desktop environment, but they may take some time to appear. Take a screenshot to confirm it did.
 54 | * A debug URL is automatically opened for you in Firefox. This is your primary interface for web interactions. Do not mention opening Firefox as it's already done for you.
 55 | * When using your bash tool with commands that are expected to output very large quantities of text, redirect into a tmp file and use str_replace_editor or `grep -n -B <lines before> -A <lines after> <query> <filename>` to confirm output.
 56 | * When viewing a page in the debug window, it can be helpful to zoom out so that you can see everything on the page. Make sure you scroll down to see everything before deciding something isn't available.
 57 | * When using your computer function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request.
 58 | * The current date is {datetime.today().strftime('%A, %B %-d, %Y')}.
 59 | </SYSTEM_CAPABILITY>
 60 | 
 61 | <IMPORTANT>
 62 | * The debug URL is already open in Firefox. Do not mention opening Firefox or any other browser.
 63 | * If the item you are looking at is a pdf, if after taking a single screenshot of the pdf it seems that you want to read the entire document instead of trying to continue to read the pdf from your screenshots + navigation, determine the URL, use curl to download the pdf, install and use pdftotext to convert it to a text file, and then read that text file directly with your StrReplaceEditTool.
 64 | * The debug URL is your primary interface for web browsing. Use this interface for all web-related tasks.
 65 | * If you need to interact with web content, use the debug URL interface and describe the actions you want to take.
 66 | </IMPORTANT>"""
 67 | 
 68 | 
 69 | async def sampling_loop(
 70 |     *,
 71 |     model: str,
 72 |     provider: APIProvider,
 73 |     system_prompt_suffix: str,
 74 |     messages: list[BetaMessageParam],
 75 |     output_callback: Callable[[BetaContentBlock], None],
 76 |     tool_output_callback: Callable[[ToolResult, str], None],
 77 |     api_response_callback: Callable[[APIResponse[BetaMessage]], None],
 78 |     api_key: str,
 79 |     only_n_most_recent_images: int | None = None,
 80 |     max_tokens: int = 4096,
 81 | ):
 82 |     """
 83 |     Agentic sampling loop for the assistant/tool interaction of computer use.
 84 |     """
 85 |     tool_collection = ToolCollection(
 86 |         ComputerTool(),
 87 |         BashTool(),
 88 |         EditTool(),
 89 |     )
 90 |     system = (
 91 |         f"{SYSTEM_PROMPT}{' ' + system_prompt_suffix if system_prompt_suffix else ''}"
 92 |     )
 93 | 
 94 |     while True:
 95 |         if only_n_most_recent_images:
 96 |             _maybe_filter_to_n_most_recent_images(messages, only_n_most_recent_images)
 97 | 
 98 |         if provider == APIProvider.ANTHROPIC:
 99 |             client = Anthropic(api_key=api_key)
100 |         elif provider == APIProvider.VERTEX:
101 |             client = AnthropicVertex()
102 |         elif provider == APIProvider.BEDROCK:
103 |             client = AnthropicBedrock()
104 | 
105 |         # Call the API
106 |         # we use raw_response to provide debug information to streamlit. Your
107 |         # implementation may be able call the SDK directly with:
108 |         # `response = client.messages.create(...)` instead.
109 |         raw_response = client.beta.messages.with_raw_response.create(
110 |             max_tokens=max_tokens,
111 |             messages=messages,
112 |             model=model,
113 |             system=system,
114 |             tools=tool_collection.to_params(),
115 |             betas=["computer-use-2024-10-22"],
116 |         )
117 | 
118 |         api_response_callback(cast(APIResponse[BetaMessage], raw_response))
119 | 
120 |         response = raw_response.parse()
121 | 
122 |         messages.append(
123 |             {
124 |                 "role": "assistant",
125 |                 "content": cast(list[BetaContentBlockParam], response.content),
126 |             }
127 |         )
128 | 
129 |         tool_result_content: list[BetaToolResultBlockParam] = []
130 |         for content_block in cast(list[BetaContentBlock], response.content):
131 |             output_callback(content_block)
132 |             if content_block.type == "tool_use":
133 |                 result = await tool_collection.run(
134 |                     name=content_block.name,
135 |                     tool_input=cast(dict[str, Any], content_block.input),
136 |                 )
137 |                 tool_result_content.append(
138 |                     _make_api_tool_result(result, content_block.id)
139 |                 )
140 |                 tool_output_callback(result, content_block.id)
141 | 
142 |         if not tool_result_content:
143 |             return messages
144 | 
145 |         messages.append({"content": tool_result_content, "role": "user"})
146 | 
147 | 
148 | def _maybe_filter_to_n_most_recent_images(
149 |     messages: list[BetaMessageParam],
150 |     images_to_keep: int,
151 |     min_removal_threshold: int = 10,
152 | ):
153 |     """
154 |     With the assumption that images are screenshots that are of diminishing value as
155 |     the conversation progresses, remove all but the final `images_to_keep` tool_result
156 |     images in place, with a chunk of min_removal_threshold to reduce the amount we
157 |     break the implicit prompt cache.
158 |     """
159 |     if images_to_keep is None:
160 |         return messages
161 | 
162 |     tool_result_blocks = cast(
163 |         list[ToolResultBlockParam],
164 |         [
165 |             item
166 |             for message in messages
167 |             for item in (
168 |                 message["content"] if isinstance(message["content"], list) else []
169 |             )
170 |             if isinstance(item, dict) and item.get("type") == "tool_result"
171 |         ],
172 |     )
173 | 
174 |     total_images = sum(
175 |         1
176 |         for tool_result in tool_result_blocks
177 |         for content in tool_result.get("content", [])
178 |         if isinstance(content, dict) and content.get("type") == "image"
179 |     )
180 | 
181 |     images_to_remove = total_images - images_to_keep
182 |     # for better cache behavior, we want to remove in chunks
183 |     images_to_remove -= images_to_remove % min_removal_threshold
184 | 
185 |     for tool_result in tool_result_blocks:
186 |         if isinstance(tool_result.get("content"), list):
187 |             new_content = []
188 |             for content in tool_result.get("content", []):
189 |                 if isinstance(content, dict) and content.get("type") == "image":
190 |                     if images_to_remove > 0:
191 |                         images_to_remove -= 1
192 |                         continue
193 |                 new_content.append(content)
194 |             tool_result["content"] = new_content
195 | 
196 | 
197 | def _make_api_tool_result(
198 |     result: ToolResult, tool_use_id: str
199 | ) -> BetaToolResultBlockParam:
200 |     """Convert an agent ToolResult to an API ToolResultBlockParam."""
201 |     tool_result_content: list[BetaTextBlockParam | BetaImageBlockParam] | str = []
202 |     is_error = False
203 |     if result.error:
204 |         is_error = True
205 |         tool_result_content = _maybe_prepend_system_tool_result(result, result.error)
206 |     else:
207 |         if result.output:
208 |             tool_result_content.append(
209 |                 {
210 |                     "type": "text",
211 |                     "text": _maybe_prepend_system_tool_result(result, result.output),
212 |                 }
213 |             )
214 |         if result.base64_image:
215 |             tool_result_content.append(
216 |                 {
217 |                     "type": "image",
218 |                     "source": {
219 |                         "type": "base64",
220 |                         "media_type": "image/png",
221 |                         "data": result.base64_image,
222 |                     },
223 |                 }
224 |             )
225 |     return {
226 |         "type": "tool_result",
227 |         "content": tool_result_content,
228 |         "tool_use_id": tool_use_id,
229 |         "is_error": is_error,
230 |     }
231 | 
232 | 
233 | def _maybe_prepend_system_tool_result(result: ToolResult, result_text: str):
234 |     if result.system:
235 |         result_text = f"<system>{result.system}</system>\n{result_text}"
236 |     return result_text
237 | 


--------------------------------------------------------------------------------
/computer-use-demo/computer_use_demo/requirements.txt:
--------------------------------------------------------------------------------
1 | streamlit>=1.38.0
2 | anthropic[bedrock,vertex]>=0.37.1
3 | jsonschema==4.22.0
4 | boto3>=1.28.57
5 | google-auth<3,>=2
6 | requests==2.31.0
7 | playwright==1.48.0
8 | python-dotenv==1.0.0


--------------------------------------------------------------------------------
/computer-use-demo/computer_use_demo/streamlit.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Entrypoint for streamlit, see https://docs.streamlit.io/
  3 | """
  4 | 
  5 | import asyncio
  6 | import base64
  7 | import os
  8 | import subprocess
  9 | from datetime import datetime
 10 | from enum import StrEnum
 11 | from functools import partial
 12 | from pathlib import PosixPath
 13 | from typing import cast
 14 | 
 15 | import streamlit as st
 16 | from anthropic import APIResponse
 17 | from anthropic.types import (
 18 |     TextBlock,
 19 | )
 20 | from anthropic.types.beta import BetaMessage, BetaTextBlock, BetaToolUseBlock
 21 | from anthropic.types.tool_use_block import ToolUseBlock
 22 | from streamlit.delta_generator import DeltaGenerator
 23 | 
 24 | from computer_use_demo.loop import (
 25 |     PROVIDER_TO_DEFAULT_MODEL_NAME,
 26 |     APIProvider,
 27 |     sampling_loop,
 28 | )
 29 | from computer_use_demo.tools import ToolResult
 30 | 
 31 | CONFIG_DIR = PosixPath("~/.anthropic").expanduser()
 32 | API_KEY_FILE = CONFIG_DIR / "api_key"
 33 | STREAMLIT_STYLE = """
 34 | <style>
 35 |     /* Hide chat input while agent loop is running */
 36 |     .stApp[data-teststate=running] .stChatInput textarea,
 37 |     .stApp[data-test-script-state=running] .stChatInput textarea {
 38 |         display: none;
 39 |     }
 40 |      /* Hide the streamlit deploy button */
 41 |     .stDeployButton {
 42 |         visibility: hidden;
 43 |     }
 44 | </style>
 45 | """
 46 | 
 47 | WARNING_TEXT = "⚠️ Security Alert: Never provide access to sensitive accounts or data, as malicious web content can hijack Claude's behavior"
 48 | 
 49 | 
 50 | class Sender(StrEnum):
 51 |     USER = "user"
 52 |     BOT = "assistant"
 53 |     TOOL = "tool"
 54 | 
 55 | 
 56 | def setup_state():
 57 |     if "messages" not in st.session_state:
 58 |         st.session_state.messages = []
 59 |     if "api_key" not in st.session_state:
 60 |         # Try to load API key from file first, then environment
 61 |         st.session_state.api_key = load_from_storage("api_key") or os.getenv(
 62 |             "ANTHROPIC_API_KEY", ""
 63 |         )
 64 |     if "provider" not in st.session_state:
 65 |         st.session_state.provider = (
 66 |             os.getenv("API_PROVIDER", "anthropic") or APIProvider.ANTHROPIC
 67 |         )
 68 |     if "provider_radio" not in st.session_state:
 69 |         st.session_state.provider_radio = st.session_state.provider
 70 |     if "model" not in st.session_state:
 71 |         _reset_model()
 72 |     if "auth_validated" not in st.session_state:
 73 |         st.session_state.auth_validated = False
 74 |     if "responses" not in st.session_state:
 75 |         st.session_state.responses = {}
 76 |     if "tools" not in st.session_state:
 77 |         st.session_state.tools = {}
 78 |     if "only_n_most_recent_images" not in st.session_state:
 79 |         st.session_state.only_n_most_recent_images = 10
 80 |     if "custom_system_prompt" not in st.session_state:
 81 |         st.session_state.custom_system_prompt = load_from_storage("system_prompt") or ""
 82 |     if "hide_images" not in st.session_state:
 83 |         st.session_state.hide_images = False
 84 | 
 85 | 
 86 | def _reset_model():
 87 |     st.session_state.model = PROVIDER_TO_DEFAULT_MODEL_NAME[
 88 |         cast(APIProvider, st.session_state.provider)
 89 |     ]
 90 | 
 91 | 
 92 | async def main():
 93 |     """Render loop for streamlit"""
 94 |     setup_state()
 95 | 
 96 |     st.markdown(STREAMLIT_STYLE, unsafe_allow_html=True)
 97 | 
 98 |     st.title("Claude Computer <> Browserbase Use Demo")
 99 | 
100 |     if not os.getenv("HIDE_WARNING", False):
101 |         st.warning(WARNING_TEXT)
102 | 
103 |     with st.sidebar:
104 | 
105 |         def _reset_api_provider():
106 |             if st.session_state.provider_radio != st.session_state.provider:
107 |                 _reset_model()
108 |                 st.session_state.provider = st.session_state.provider_radio
109 |                 st.session_state.auth_validated = False
110 | 
111 |         provider_options = [option.value for option in APIProvider]
112 |         st.radio(
113 |             "API Provider",
114 |             options=provider_options,
115 |             key="provider_radio",
116 |             format_func=lambda x: x.title(),
117 |             on_change=_reset_api_provider,
118 |         )
119 | 
120 |         st.text_input("Model", key="model")
121 | 
122 |         if st.session_state.provider == APIProvider.ANTHROPIC:
123 |             st.text_input(
124 |                 "Anthropic API Key",
125 |                 type="password",
126 |                 key="api_key",
127 |                 on_change=lambda: save_to_storage("api_key", st.session_state.api_key),
128 |             )
129 | 
130 |         st.number_input(
131 |             "Only send N most recent images",
132 |             min_value=0,
133 |             key="only_n_most_recent_images",
134 |             help="To decrease the total tokens sent, remove older screenshots from the conversation",
135 |         )
136 |         st.text_area(
137 |             "Custom System Prompt Suffix",
138 |             key="custom_system_prompt",
139 |             help="Additional instructions to append to the system prompt. see computer_use_demo/loop.py for the base system prompt.",
140 |             on_change=lambda: save_to_storage(
141 |                 "system_prompt", st.session_state.custom_system_prompt
142 |             ),
143 |         )
144 |         st.checkbox("Hide screenshots", key="hide_images")
145 | 
146 |         if st.button("Reset", type="primary"):
147 |             with st.spinner("Resetting..."):
148 |                 st.session_state.clear()
149 |                 setup_state()
150 | 
151 |                 subprocess.run("pkill Xvfb; pkill tint2", shell=True)  # noqa: ASYNC221
152 |                 await asyncio.sleep(1)
153 |                 subprocess.run("./start_all.sh", shell=True)  # noqa: ASYNC221
154 | 
155 |     if not st.session_state.auth_validated:
156 |         if auth_error := validate_auth(
157 |             st.session_state.provider, st.session_state.api_key
158 |         ):
159 |             st.warning(f"Please resolve the following auth issue:\n\n{auth_error}")
160 |             return
161 |         else:
162 |             st.session_state.auth_validated = True
163 | 
164 |     chat, http_logs = st.tabs(["Chat", "HTTP Exchange Logs"])
165 |     new_message = st.chat_input(
166 |         "Type a message to send to Claude to control the computer..."
167 |     )
168 | 
169 |     with chat:
170 |         # render past chats
171 |         for message in st.session_state.messages:
172 |             if isinstance(message["content"], str):
173 |                 _render_message(message["role"], message["content"])
174 |             elif isinstance(message["content"], list):
175 |                 for block in message["content"]:
176 |                     # the tool result we send back to the Anthropic API isn't sufficient to render all details,
177 |                     # so we store the tool use responses
178 |                     if isinstance(block, dict) and block["type"] == "tool_result":
179 |                         _render_message(
180 |                             Sender.TOOL, st.session_state.tools[block["tool_use_id"]]
181 |                         )
182 |                     else:
183 |                         _render_message(
184 |                             message["role"],
185 |                             cast(BetaTextBlock | BetaToolUseBlock, block),
186 |                         )
187 | 
188 |         # render past http exchanges
189 |         for identity, response in st.session_state.responses.items():
190 |             _render_api_response(response, identity, http_logs)
191 | 
192 |         # render past chats
193 |         if new_message:
194 |             st.session_state.messages.append(
195 |                 {
196 |                     "role": Sender.USER,
197 |                     "content": [TextBlock(type="text", text=new_message)],
198 |                 }
199 |             )
200 |             _render_message(Sender.USER, new_message)
201 | 
202 |         try:
203 |             most_recent_message = st.session_state["messages"][-1]
204 |         except IndexError:
205 |             return
206 | 
207 |         if most_recent_message["role"] is not Sender.USER:
208 |             # we don't have a user message to respond to, exit early
209 |             return
210 | 
211 |         with st.spinner("Running Agent..."):
212 |             # run the agent sampling loop with the newest message
213 |             st.session_state.messages = await sampling_loop(
214 |                 system_prompt_suffix=st.session_state.custom_system_prompt,
215 |                 model=st.session_state.model,
216 |                 provider=st.session_state.provider,
217 |                 messages=st.session_state.messages,
218 |                 output_callback=partial(_render_message, Sender.BOT),
219 |                 tool_output_callback=partial(
220 |                     _tool_output_callback, tool_state=st.session_state.tools
221 |                 ),
222 |                 api_response_callback=partial(
223 |                     _api_response_callback,
224 |                     tab=http_logs,
225 |                     response_state=st.session_state.responses,
226 |                 ),
227 |                 api_key=st.session_state.api_key,
228 |                 only_n_most_recent_images=st.session_state.only_n_most_recent_images,
229 |             )
230 | 
231 | 
232 | def validate_auth(provider: APIProvider, api_key: str | None):
233 |     if provider == APIProvider.ANTHROPIC:
234 |         if not api_key:
235 |             return "Enter your Anthropic API key in the sidebar to continue."
236 |     if provider == APIProvider.BEDROCK:
237 |         import boto3
238 | 
239 |         if not boto3.Session().get_credentials():
240 |             return "You must have AWS credentials set up to use the Bedrock API."
241 |     if provider == APIProvider.VERTEX:
242 |         import google.auth
243 |         from google.auth.exceptions import DefaultCredentialsError
244 | 
245 |         if not os.environ.get("CLOUD_ML_REGION"):
246 |             return "Set the CLOUD_ML_REGION environment variable to use the Vertex API."
247 |         try:
248 |             google.auth.default(
249 |                 scopes=["https://www.googleapis.com/auth/cloud-platform"],
250 |             )
251 |         except DefaultCredentialsError:
252 |             return "Your google cloud credentials are not set up correctly."
253 | 
254 | 
255 | def load_from_storage(filename: str) -> str | None:
256 |     """Load data from a file in the storage directory."""
257 |     try:
258 |         file_path = CONFIG_DIR / filename
259 |         if file_path.exists():
260 |             data = file_path.read_text().strip()
261 |             if data:
262 |                 return data
263 |     except Exception as e:
264 |         st.write(f"Debug: Error loading {filename}: {e}")
265 |     return None
266 | 
267 | 
268 | def save_to_storage(filename: str, data: str) -> None:
269 |     """Save data to a file in the storage directory."""
270 |     try:
271 |         CONFIG_DIR.mkdir(parents=True, exist_ok=True)
272 |         file_path = CONFIG_DIR / filename
273 |         file_path.write_text(data)
274 |         # Ensure only user can read/write the file
275 |         file_path.chmod(0o600)
276 |     except Exception as e:
277 |         st.write(f"Debug: Error saving {filename}: {e}")
278 | 
279 | 
280 | def _api_response_callback(
281 |     response: APIResponse[BetaMessage],
282 |     tab: DeltaGenerator,
283 |     response_state: dict[str, APIResponse[BetaMessage]],
284 | ):
285 |     """
286 |     Handle an API response by storing it to state and rendering it.
287 |     """
288 |     response_id = datetime.now().isoformat()
289 |     response_state[response_id] = response
290 |     _render_api_response(response, response_id, tab)
291 | 
292 | 
293 | def _tool_output_callback(
294 |     tool_output: ToolResult, tool_id: str, tool_state: dict[str, ToolResult]
295 | ):
296 |     """Handle a tool output by storing it to state and rendering it."""
297 |     tool_state[tool_id] = tool_output
298 |     _render_message(Sender.TOOL, tool_output)
299 | 
300 | 
301 | def _render_api_response(
302 |     response: APIResponse[BetaMessage], response_id: str, tab: DeltaGenerator
303 | ):
304 |     """Render an API response to a streamlit tab"""
305 |     with tab:
306 |         with st.expander(f"Request/Response ({response_id})"):
307 |             newline = "\n\n"
308 |             st.markdown(
309 |                 f"`{response.http_request.method} {response.http_request.url}`{newline}{newline.join(f'`{k}: {v}`' for k, v in response.http_request.headers.items())}"
310 |             )
311 |             st.json(response.http_request.read().decode())
312 |             st.markdown(
313 |                 f"`{response.http_response.status_code}`{newline}{newline.join(f'`{k}: {v}`' for k, v in response.headers.items())}"
314 |             )
315 |             st.json(response.http_response.text)
316 | 
317 | 
318 | def _render_message(
319 |     sender: Sender,
320 |     message: str | BetaTextBlock | BetaToolUseBlock | ToolResult,
321 | ):
322 |     """Convert input from the user or output from the agent to a streamlit message."""
323 |     # streamlit's hotreloading breaks isinstance checks, so we need to check for class names
324 |     is_tool_result = not isinstance(message, str) and (
325 |         isinstance(message, ToolResult)
326 |         or message.__class__.__name__ == "ToolResult"
327 |         or message.__class__.__name__ == "CLIResult"
328 |     )
329 |     if not message or (
330 |         is_tool_result
331 |         and st.session_state.hide_images
332 |         and not hasattr(message, "error")
333 |         and not hasattr(message, "output")
334 |     ):
335 |         return
336 |     with st.chat_message(sender):
337 |         if is_tool_result:
338 |             message = cast(ToolResult, message)
339 |             if message.output:
340 |                 if message.__class__.__name__ == "CLIResult":
341 |                     st.code(message.output)
342 |                 else:
343 |                     st.markdown(message.output)
344 |             if message.error:
345 |                 st.error(message.error)
346 |             if message.base64_image and not st.session_state.hide_images:
347 |                 st.image(base64.b64decode(message.base64_image))
348 |         elif isinstance(message, BetaTextBlock) or isinstance(message, TextBlock):
349 |             st.write(message.text)
350 |         elif isinstance(message, BetaToolUseBlock) or isinstance(message, ToolUseBlock):
351 |             st.code(f"Tool Use: {message.name}\nInput: {message.input}")
352 |         else:
353 |             st.markdown(message)
354 | 
355 | 
356 | if __name__ == "__main__":
357 |     asyncio.run(main())
358 | 


--------------------------------------------------------------------------------
/computer-use-demo/computer_use_demo/tools/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base import CLIResult, ToolResult
 2 | from .bash import BashTool
 3 | from .collection import ToolCollection
 4 | from .computer import ComputerTool
 5 | from .edit import EditTool
 6 | 
 7 | __ALL__ = [
 8 |     BashTool,
 9 |     CLIResult,
10 |     ComputerTool,
11 |     EditTool,
12 |     ToolCollection,
13 |     ToolResult,
14 | ]
15 | 


--------------------------------------------------------------------------------
/computer-use-demo/computer_use_demo/tools/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | from dataclasses import dataclass, fields, replace
 3 | from typing import Any
 4 | 
 5 | from anthropic.types.beta import BetaToolUnionParam
 6 | 
 7 | 
 8 | class BaseAnthropicTool(metaclass=ABCMeta):
 9 |     """Abstract base class for Anthropic-defined tools."""
10 | 
11 |     @abstractmethod
12 |     def __call__(self, **kwargs) -> Any:
13 |         """Executes the tool with the given arguments."""
14 |         ...
15 | 
16 |     @abstractmethod
17 |     def to_params(
18 |         self,
19 |     ) -> BetaToolUnionParam:
20 |         raise NotImplementedError
21 | 
22 | 
23 | @dataclass(kw_only=True, frozen=True)
24 | class ToolResult:
25 |     """Represents the result of a tool execution."""
26 | 
27 |     output: str | None = None
28 |     error: str | None = None
29 |     base64_image: str | None = None
30 |     system: str | None = None
31 | 
32 |     def __bool__(self):
33 |         return any(getattr(self, field.name) for field in fields(self))
34 | 
35 |     def __add__(self, other: "ToolResult"):
36 |         def combine_fields(
37 |             field: str | None, other_field: str | None, concatenate: bool = True
38 |         ):
39 |             if field and other_field:
40 |                 if concatenate:
41 |                     return field + other_field
42 |                 raise ValueError("Cannot combine tool results")
43 |             return field or other_field
44 | 
45 |         return ToolResult(
46 |             output=combine_fields(self.output, other.output),
47 |             error=combine_fields(self.error, other.error),
48 |             base64_image=combine_fields(self.base64_image, other.base64_image, False),
49 |             system=combine_fields(self.system, other.system),
50 |         )
51 | 
52 |     def replace(self, **kwargs):
53 |         """Returns a new ToolResult with the given fields replaced."""
54 |         return replace(self, **kwargs)
55 | 
56 | 
57 | class CLIResult(ToolResult):
58 |     """A ToolResult that can be rendered as a CLI output."""
59 | 
60 | 
61 | class ToolFailure(ToolResult):
62 |     """A ToolResult that represents a failure."""
63 | 
64 | 
65 | class ToolError(Exception):
66 |     """Raised when a tool encounters an error."""
67 | 
68 |     def __init__(self, message):
69 |         self.message = message
70 | 


--------------------------------------------------------------------------------
/computer-use-demo/computer_use_demo/tools/bash.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import os
  3 | from typing import ClassVar, Literal
  4 | 
  5 | from anthropic.types.beta import BetaToolBash20241022Param
  6 | 
  7 | from .base import BaseAnthropicTool, CLIResult, ToolError, ToolResult
  8 | 
  9 | 
 10 | class _BashSession:
 11 |     """A session of a bash shell."""
 12 | 
 13 |     _started: bool
 14 |     _process: asyncio.subprocess.Process
 15 | 
 16 |     command: str = "/bin/bash"
 17 |     _output_delay: float = 0.2  # seconds
 18 |     _timeout: float = 120.0  # seconds
 19 |     _sentinel: str = "<<exit>>"
 20 | 
 21 |     def __init__(self):
 22 |         self._started = False
 23 |         self._timed_out = False
 24 | 
 25 |     async def start(self):
 26 |         if self._started:
 27 |             return
 28 | 
 29 |         self._process = await asyncio.create_subprocess_shell(
 30 |             self.command,
 31 |             preexec_fn=os.setsid,
 32 |             shell=True,
 33 |             bufsize=0,
 34 |             stdin=asyncio.subprocess.PIPE,
 35 |             stdout=asyncio.subprocess.PIPE,
 36 |             stderr=asyncio.subprocess.PIPE,
 37 |         )
 38 | 
 39 |         self._started = True
 40 | 
 41 |     def stop(self):
 42 |         """Terminate the bash shell."""
 43 |         if not self._started:
 44 |             raise ToolError("Session has not started.")
 45 |         if self._process.returncode is not None:
 46 |             return
 47 |         self._process.terminate()
 48 | 
 49 |     async def run(self, command: str):
 50 |         """Execute a command in the bash shell."""
 51 |         if not self._started:
 52 |             raise ToolError("Session has not started.")
 53 |         if self._process.returncode is not None:
 54 |             return ToolResult(
 55 |                 system="tool must be restarted",
 56 |                 error=f"bash has exited with returncode {self._process.returncode}",
 57 |             )
 58 |         if self._timed_out:
 59 |             raise ToolError(
 60 |                 f"timed out: bash has not returned in {self._timeout} seconds and must be restarted",
 61 |             )
 62 | 
 63 |         # we know these are not None because we created the process with PIPEs
 64 |         assert self._process.stdin
 65 |         assert self._process.stdout
 66 |         assert self._process.stderr
 67 | 
 68 |         # send command to the process
 69 |         self._process.stdin.write(
 70 |             command.encode() + f"; echo '{self._sentinel}'\n".encode()
 71 |         )
 72 |         await self._process.stdin.drain()
 73 | 
 74 |         # read output from the process, until the sentinel is found
 75 |         try:
 76 |             async with asyncio.timeout(self._timeout):
 77 |                 while True:
 78 |                     await asyncio.sleep(self._output_delay)
 79 |                     # if we read directly from stdout/stderr, it will wait forever for
 80 |                     # EOF. use the StreamReader buffer directly instead.
 81 |                     output = self._process.stdout._buffer.decode()  # pyright: ignore[reportAttributeAccessIssue]
 82 |                     if self._sentinel in output:
 83 |                         # strip the sentinel and break
 84 |                         output = output[: output.index(self._sentinel)]
 85 |                         break
 86 |         except asyncio.TimeoutError:
 87 |             self._timed_out = True
 88 |             raise ToolError(
 89 |                 f"timed out: bash has not returned in {self._timeout} seconds and must be restarted",
 90 |             ) from None
 91 | 
 92 |         if output.endswith("\n"):
 93 |             output = output[:-1]
 94 | 
 95 |         error = self._process.stderr._buffer.decode()  # pyright: ignore[reportAttributeAccessIssue]
 96 |         if error.endswith("\n"):
 97 |             error = error[:-1]
 98 | 
 99 |         # clear the buffers so that the next output can be read correctly
100 |         self._process.stdout._buffer.clear()  # pyright: ignore[reportAttributeAccessIssue]
101 |         self._process.stderr._buffer.clear()  # pyright: ignore[reportAttributeAccessIssue]
102 | 
103 |         return CLIResult(output=output, error=error)
104 | 
105 | 
106 | class BashTool(BaseAnthropicTool):
107 |     """
108 |     A tool that allows the agent to run bash commands.
109 |     The tool parameters are defined by Anthropic and are not editable.
110 |     """
111 | 
112 |     _session: _BashSession | None
113 |     name: ClassVar[Literal["bash"]] = "bash"
114 |     api_type: ClassVar[Literal["bash_20241022"]] = "bash_20241022"
115 | 
116 |     def __init__(self):
117 |         self._session = None
118 |         super().__init__()
119 | 
120 |     async def __call__(
121 |         self, command: str | None = None, restart: bool = False, **kwargs
122 |     ):
123 |         if restart:
124 |             if self._session:
125 |                 self._session.stop()
126 |             self._session = _BashSession()
127 |             await self._session.start()
128 | 
129 |             return ToolResult(system="tool has been restarted.")
130 | 
131 |         if self._session is None:
132 |             self._session = _BashSession()
133 |             await self._session.start()
134 | 
135 |         if command is not None:
136 |             return await self._session.run(command)
137 | 
138 |         raise ToolError("no command provided.")
139 | 
140 |     def to_params(self) -> BetaToolBash20241022Param:
141 |         return {
142 |             "type": self.api_type,
143 |             "name": self.name,
144 |         }
145 | 


--------------------------------------------------------------------------------
/computer-use-demo/computer_use_demo/tools/browserbase.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import sys
 3 | import json
 4 | from playwright.sync_api import sync_playwright, Playwright
 5 | import dotenv
 6 | import os
 7 | 
 8 | dotenv.load_dotenv()
 9 | 
10 | 
11 | def create_session(project_id, api_key):
12 |     url = "https://www.browserbase.com/v1/sessions"
13 |     payload = {"projectId": project_id}
14 |     headers = {
15 |         "X-BB-API-Key": api_key,
16 |         "Content-Type": "application/json"
17 |     }
18 | 
19 |     try:
20 |         response = requests.post(url, json=payload, headers=headers)
21 |         response.raise_for_status()
22 |         return response.json()["id"]
23 |     except requests.exceptions.RequestException as e:
24 |         print(f"Error creating session: {e}", file=sys.stderr)
25 |         return None
26 | 
27 | def get_debug_url(session_id, api_key):
28 |     url = f"https://www.browserbase.com/v1/sessions/{session_id}/debug"
29 |     headers = {"X-BB-API-Key": api_key}
30 | 
31 |     try:
32 |         response = requests.get(url, headers=headers)
33 |         response.raise_for_status()
34 |         return response.json()
35 |     except requests.exceptions.RequestException as e:
36 |         print(f"Error getting debug URL: {e}", file=sys.stderr)
37 |         return None
38 |     
39 | def connect_to_browserbase(playwright: Playwright, api_key, session_id):
40 |     chromium = playwright.chromium
41 |     browser = chromium.connect_over_cdp(f'wss://connect.browserbase.com?apiKey={api_key}&sessionId={session_id}')
42 |     return browser
43 | 
44 | def main():
45 |     print("Starting browserbase.py script", file=sys.stderr)
46 | 
47 |     project_id = os.environ["BROWSERBASE_PROJECT_ID"]
48 |     api_key = os.environ["BROWSERBASE_API_KEY"]
49 | 
50 |     session_id = create_session(project_id, api_key)
51 |     if session_id:
52 |         print(f"Session ID: {session_id}")
53 | 
54 |         print("Connecting to Browserbase", file=sys.stderr)
55 |         with sync_playwright() as playwright:
56 |             print("Connected to Browserbase", file=sys.stderr)
57 |             browser = connect_to_browserbase(playwright, api_key, session_id)
58 |             context = browser.contexts[0]
59 |             page = context.pages[0]
60 |             
61 |             print("Going to google", file=sys.stderr)
62 |             page.goto('https://www.google.com')
63 |             
64 |             print("Getting debug URL", file=sys.stderr)
65 |             debug_info = get_debug_url(session_id, api_key)
66 |             if debug_info:
67 |                 print(json.dumps(debug_info, indent=2))
68 |                 with open('/tmp/debugger_url.txt', 'w') as f:
69 |                     f.write(debug_info['debuggerFullscreenUrl'])
70 |                 print("Debug URL saved to /tmp/debugger_url.txt", file=sys.stderr)
71 |             else:
72 |                 print("Failed to get debug URL", file=sys.stderr)
73 |             
74 |             print("Browser session is still active. Press Ctrl+C to exit.", file=sys.stderr)
75 |             try:
76 |                 # Keep the script running
77 |                 while True:
78 |                     page.wait_for_timeout(1000)  # Wait for 1 second
79 |             except KeyboardInterrupt:
80 |                 print("Closing browser and exiting", file=sys.stderr)
81 |             finally:
82 |                 browser.close()
83 | 
84 |         print("Finished browserbase.py script", file=sys.stderr)
85 |     else:
86 |         print("Failed to create session", file=sys.stderr)
87 | 
88 | if __name__ == "__main__":
89 |     main()
90 | 
91 | 


--------------------------------------------------------------------------------
/computer-use-demo/computer_use_demo/tools/collection.py:
--------------------------------------------------------------------------------
 1 | """Collection classes for managing multiple tools."""
 2 | 
 3 | from typing import Any
 4 | 
 5 | from anthropic.types.beta import BetaToolUnionParam
 6 | 
 7 | from .base import (
 8 |     BaseAnthropicTool,
 9 |     ToolError,
10 |     ToolFailure,
11 |     ToolResult,
12 | )
13 | 
14 | 
15 | class ToolCollection:
16 |     """A collection of anthropic-defined tools."""
17 | 
18 |     def __init__(self, *tools: BaseAnthropicTool):
19 |         self.tools = tools
20 |         self.tool_map = {tool.to_params()["name"]: tool for tool in tools}
21 | 
22 |     def to_params(
23 |         self,
24 |     ) -> list[BetaToolUnionParam]:
25 |         return [tool.to_params() for tool in self.tools]
26 | 
27 |     async def run(self, *, name: str, tool_input: dict[str, Any]) -> ToolResult:
28 |         tool = self.tool_map.get(name)
29 |         if not tool:
30 |             return ToolFailure(error=f"Tool {name} is invalid")
31 |         try:
32 |             return await tool(**tool_input)
33 |         except ToolError as e:
34 |             return ToolFailure(error=e.message)
35 | 


--------------------------------------------------------------------------------
/computer-use-demo/computer_use_demo/tools/computer.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import base64
  3 | import os
  4 | import shlex
  5 | import shutil
  6 | from enum import StrEnum
  7 | from pathlib import Path
  8 | from typing import Literal, TypedDict
  9 | from uuid import uuid4
 10 | 
 11 | from anthropic.types.beta import BetaToolComputerUse20241022Param
 12 | 
 13 | from .base import BaseAnthropicTool, ToolError, ToolResult
 14 | from .run import run
 15 | 
 16 | OUTPUT_DIR = "/tmp/outputs"
 17 | 
 18 | TYPING_DELAY_MS = 12
 19 | TYPING_GROUP_SIZE = 50
 20 | 
 21 | Action = Literal[
 22 |     "key",
 23 |     "type",
 24 |     "mouse_move",
 25 |     "left_click",
 26 |     "left_click_drag",
 27 |     "right_click",
 28 |     "middle_click",
 29 |     "double_click",
 30 |     "screenshot",
 31 |     "cursor_position",
 32 | ]
 33 | 
 34 | 
 35 | class Resolution(TypedDict):
 36 |     width: int
 37 |     height: int
 38 | 
 39 | 
 40 | # sizes above XGA/WXGA are not recommended (see README.md)
 41 | # scale down to one of these targets if ComputerTool._scaling_enabled is set
 42 | MAX_SCALING_TARGETS: dict[str, Resolution] = {
 43 |     "XGA": Resolution(width=1024, height=768),  # 4:3
 44 |     "WXGA": Resolution(width=1280, height=800),  # 16:10
 45 |     "FWXGA": Resolution(width=1366, height=768),  # ~16:9
 46 | }
 47 | 
 48 | 
 49 | class ScalingSource(StrEnum):
 50 |     COMPUTER = "computer"
 51 |     API = "api"
 52 | 
 53 | 
 54 | class ComputerToolOptions(TypedDict):
 55 |     display_height_px: int
 56 |     display_width_px: int
 57 |     display_number: int | None
 58 | 
 59 | 
 60 | def chunks(s: str, chunk_size: int) -> list[str]:
 61 |     return [s[i : i + chunk_size] for i in range(0, len(s), chunk_size)]
 62 | 
 63 | 
 64 | class ComputerTool(BaseAnthropicTool):
 65 |     """
 66 |     A tool that allows the agent to interact with the screen, keyboard, and mouse of the current computer.
 67 |     The tool parameters are defined by Anthropic and are not editable.
 68 |     """
 69 | 
 70 |     name: Literal["computer"] = "computer"
 71 |     api_type: Literal["computer_20241022"] = "computer_20241022"
 72 |     width: int
 73 |     height: int
 74 |     display_num: int | None
 75 | 
 76 |     _screenshot_delay = 2.0
 77 |     _scaling_enabled = True
 78 | 
 79 |     @property
 80 |     def options(self) -> ComputerToolOptions:
 81 |         width, height = self.scale_coordinates(
 82 |             ScalingSource.COMPUTER, self.width, self.height
 83 |         )
 84 |         return {
 85 |             "display_width_px": width,
 86 |             "display_height_px": height,
 87 |             "display_number": self.display_num,
 88 |         }
 89 | 
 90 |     def to_params(self) -> BetaToolComputerUse20241022Param:
 91 |         return {"name": self.name, "type": self.api_type, **self.options}
 92 | 
 93 |     def __init__(self):
 94 |         super().__init__()
 95 | 
 96 |         self.width = int(os.getenv("WIDTH") or 0)
 97 |         self.height = int(os.getenv("HEIGHT") or 0)
 98 |         assert self.width and self.height, "WIDTH, HEIGHT must be set"
 99 |         if (display_num := os.getenv("DISPLAY_NUM")) is not None:
100 |             self.display_num = int(display_num)
101 |             self._display_prefix = f"DISPLAY=:{self.display_num} "
102 |         else:
103 |             self.display_num = None
104 |             self._display_prefix = ""
105 | 
106 |         self.xdotool = f"{self._display_prefix}xdotool"
107 | 
108 |     async def __call__(
109 |         self,
110 |         *,
111 |         action: Action,
112 |         text: str | None = None,
113 |         coordinate: tuple[int, int] | None = None,
114 |         **kwargs,
115 |     ):
116 |         if action in ("mouse_move", "left_click_drag"):
117 |             if coordinate is None:
118 |                 raise ToolError(f"coordinate is required for {action}")
119 |             if text is not None:
120 |                 raise ToolError(f"text is not accepted for {action}")
121 |             if not isinstance(coordinate, list) or len(coordinate) != 2:
122 |                 raise ToolError(f"{coordinate} must be a tuple of length 2")
123 |             if not all(isinstance(i, int) and i >= 0 for i in coordinate):
124 |                 raise ToolError(f"{coordinate} must be a tuple of non-negative ints")
125 | 
126 |             x, y = self.scale_coordinates(
127 |                 ScalingSource.API, coordinate[0], coordinate[1]
128 |             )
129 | 
130 |             if action == "mouse_move":
131 |                 return await self.shell(f"{self.xdotool} mousemove --sync {x} {y}")
132 |             elif action == "left_click_drag":
133 |                 return await self.shell(
134 |                     f"{self.xdotool} mousedown 1 mousemove --sync {x} {y} mouseup 1"
135 |                 )
136 | 
137 |         if action in ("key", "type"):
138 |             if text is None:
139 |                 raise ToolError(f"text is required for {action}")
140 |             if coordinate is not None:
141 |                 raise ToolError(f"coordinate is not accepted for {action}")
142 |             if not isinstance(text, str):
143 |                 raise ToolError(output=f"{text} must be a string")
144 | 
145 |             if action == "key":
146 |                 return await self.shell(f"{self.xdotool} key -- {text}")
147 |             elif action == "type":
148 |                 results: list[ToolResult] = []
149 |                 for chunk in chunks(text, TYPING_GROUP_SIZE):
150 |                     cmd = f"{self.xdotool} type --delay {TYPING_DELAY_MS} -- {shlex.quote(chunk)}"
151 |                     results.append(await self.shell(cmd, take_screenshot=False))
152 |                 screenshot_base64 = (await self.screenshot()).base64_image
153 |                 return ToolResult(
154 |                     output="".join(result.output or "" for result in results),
155 |                     error="".join(result.error or "" for result in results),
156 |                     base64_image=screenshot_base64,
157 |                 )
158 | 
159 |         if action in (
160 |             "left_click",
161 |             "right_click",
162 |             "double_click",
163 |             "middle_click",
164 |             "screenshot",
165 |             "cursor_position",
166 |         ):
167 |             if text is not None:
168 |                 raise ToolError(f"text is not accepted for {action}")
169 |             if coordinate is not None:
170 |                 raise ToolError(f"coordinate is not accepted for {action}")
171 | 
172 |             if action == "screenshot":
173 |                 return await self.screenshot()
174 |             elif action == "cursor_position":
175 |                 result = await self.shell(
176 |                     f"{self.xdotool} getmouselocation --shell",
177 |                     take_screenshot=False,
178 |                 )
179 |                 output = result.output or ""
180 |                 x, y = self.scale_coordinates(
181 |                     ScalingSource.COMPUTER,
182 |                     int(output.split("X=")[1].split("\n")[0]),
183 |                     int(output.split("Y=")[1].split("\n")[0]),
184 |                 )
185 |                 return result.replace(output=f"X={x},Y={y}")
186 |             else:
187 |                 click_arg = {
188 |                     "left_click": "1",
189 |                     "right_click": "3",
190 |                     "middle_click": "2",
191 |                     "double_click": "--repeat 2 --delay 500 1",
192 |                 }[action]
193 |                 return await self.shell(f"{self.xdotool} click {click_arg}")
194 | 
195 |         raise ToolError(f"Invalid action: {action}")
196 | 
197 |     async def screenshot(self):
198 |         """Take a screenshot of the current screen and return the base64 encoded image."""
199 |         output_dir = Path(OUTPUT_DIR)
200 |         output_dir.mkdir(parents=True, exist_ok=True)
201 |         path = output_dir / f"screenshot_{uuid4().hex}.png"
202 | 
203 |         # Try gnome-screenshot first
204 |         if shutil.which("gnome-screenshot"):
205 |             screenshot_cmd = f"{self._display_prefix}gnome-screenshot -f {path} -p"
206 |         else:
207 |             # Fall back to scrot if gnome-screenshot isn't available
208 |             screenshot_cmd = f"{self._display_prefix}scrot -p {path}"
209 | 
210 |         result = await self.shell(screenshot_cmd, take_screenshot=False)
211 |         if self._scaling_enabled:
212 |             x, y = self.scale_coordinates(
213 |                 ScalingSource.COMPUTER, self.width, self.height
214 |             )
215 |             await self.shell(
216 |                 f"convert {path} -resize {x}x{y}! {path}", take_screenshot=False
217 |             )
218 | 
219 |         if path.exists():
220 |             return result.replace(
221 |                 base64_image=base64.b64encode(path.read_bytes()).decode()
222 |             )
223 |         raise ToolError(f"Failed to take screenshot: {result.error}")
224 | 
225 |     async def shell(self, command: str, take_screenshot=True) -> ToolResult:
226 |         """Run a shell command and return the output, error, and optionally a screenshot."""
227 |         _, stdout, stderr = await run(command)
228 |         base64_image = None
229 | 
230 |         if take_screenshot:
231 |             # delay to let things settle before taking a screenshot
232 |             await asyncio.sleep(self._screenshot_delay)
233 |             base64_image = (await self.screenshot()).base64_image
234 | 
235 |         return ToolResult(output=stdout, error=stderr, base64_image=base64_image)
236 | 
237 |     def scale_coordinates(self, source: ScalingSource, x: int, y: int):
238 |         """Scale coordinates to a target maximum resolution."""
239 |         if not self._scaling_enabled:
240 |             return x, y
241 |         ratio = self.width / self.height
242 |         target_dimension = None
243 |         for dimension in MAX_SCALING_TARGETS.values():
244 |             # allow some error in the aspect ratio - not ratios are exactly 16:9
245 |             if abs(dimension["width"] / dimension["height"] - ratio) < 0.02:
246 |                 if dimension["width"] < self.width:
247 |                     target_dimension = dimension
248 |                 break
249 |         if target_dimension is None:
250 |             return x, y
251 |         # should be less than 1
252 |         x_scaling_factor = target_dimension["width"] / self.width
253 |         y_scaling_factor = target_dimension["height"] / self.height
254 |         if source == ScalingSource.API:
255 |             if x > self.width or y > self.height:
256 |                 raise ToolError(f"Coordinates {x}, {y} are out of bounds")
257 |             # scale up
258 |             return round(x / x_scaling_factor), round(y / y_scaling_factor)
259 |         # scale down
260 |         return round(x * x_scaling_factor), round(y * y_scaling_factor)
261 | 


--------------------------------------------------------------------------------
/computer-use-demo/computer_use_demo/tools/edit.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict
  2 | from pathlib import Path
  3 | from typing import Literal, get_args
  4 | 
  5 | from anthropic.types.beta import BetaToolTextEditor20241022Param
  6 | 
  7 | from .base import BaseAnthropicTool, CLIResult, ToolError, ToolResult
  8 | from .run import maybe_truncate, run
  9 | 
 10 | Command = Literal[
 11 |     "view",
 12 |     "create",
 13 |     "str_replace",
 14 |     "insert",
 15 |     "undo_edit",
 16 | ]
 17 | SNIPPET_LINES: int = 4
 18 | 
 19 | 
 20 | class EditTool(BaseAnthropicTool):
 21 |     """
 22 |     An filesystem editor tool that allows the agent to view, create, and edit files.
 23 |     The tool parameters are defined by Anthropic and are not editable.
 24 |     """
 25 | 
 26 |     api_type: Literal["text_editor_20241022"] = "text_editor_20241022"
 27 |     name: Literal["str_replace_editor"] = "str_replace_editor"
 28 | 
 29 |     _file_history: dict[Path, list[str]]
 30 | 
 31 |     def __init__(self):
 32 |         self._file_history = defaultdict(list)
 33 |         super().__init__()
 34 | 
 35 |     def to_params(self) -> BetaToolTextEditor20241022Param:
 36 |         return {
 37 |             "name": self.name,
 38 |             "type": self.api_type,
 39 |         }
 40 | 
 41 |     async def __call__(
 42 |         self,
 43 |         *,
 44 |         command: Command,
 45 |         path: str,
 46 |         file_text: str | None = None,
 47 |         view_range: list[int] | None = None,
 48 |         old_str: str | None = None,
 49 |         new_str: str | None = None,
 50 |         insert_line: int | None = None,
 51 |         **kwargs,
 52 |     ):
 53 |         _path = Path(path)
 54 |         self.validate_path(command, _path)
 55 |         if command == "view":
 56 |             return await self.view(_path, view_range)
 57 |         elif command == "create":
 58 |             if not file_text:
 59 |                 raise ToolError("Parameter `file_text` is required for command: create")
 60 |             self.write_file(_path, file_text)
 61 |             self._file_history[_path].append(file_text)
 62 |             return ToolResult(output=f"File created successfully at: {_path}")
 63 |         elif command == "str_replace":
 64 |             if not old_str:
 65 |                 raise ToolError(
 66 |                     "Parameter `old_str` is required for command: str_replace"
 67 |                 )
 68 |             return self.str_replace(_path, old_str, new_str)
 69 |         elif command == "insert":
 70 |             if insert_line is None:
 71 |                 raise ToolError(
 72 |                     "Parameter `insert_line` is required for command: insert"
 73 |                 )
 74 |             if not new_str:
 75 |                 raise ToolError("Parameter `new_str` is required for command: insert")
 76 |             return self.insert(_path, insert_line, new_str)
 77 |         elif command == "undo_edit":
 78 |             return self.undo_edit(_path)
 79 |         raise ToolError(
 80 |             f'Unrecognized command {command}. The allowed commands for the {self.name} tool are: {", ".join(get_args(Command))}'
 81 |         )
 82 | 
 83 |     def validate_path(self, command: str, path: Path):
 84 |         """
 85 |         Check that the path/command combination is valid.
 86 |         """
 87 |         # Check if its an absolute path
 88 |         if not path.is_absolute():
 89 |             suggested_path = Path("") / path
 90 |             raise ToolError(
 91 |                 f"The path {path} is not an absolute path, it should start with `/`. Maybe you meant {suggested_path}?"
 92 |             )
 93 |         # Check if path exists
 94 |         if not path.exists() and command != "create":
 95 |             raise ToolError(
 96 |                 f"The path {path} does not exist. Please provide a valid path."
 97 |             )
 98 |         if path.exists() and command == "create":
 99 |             raise ToolError(
100 |                 f"File already exists at: {path}. Cannot overwrite files using command `create`."
101 |             )
102 |         # Check if the path points to a directory
103 |         if path.is_dir():
104 |             if command != "view":
105 |                 raise ToolError(
106 |                     f"The path {path} is a directory and only the `view` command can be used on directories"
107 |                 )
108 | 
109 |     async def view(self, path: Path, view_range: list[int] | None = None):
110 |         """Implement the view command"""
111 |         if path.is_dir():
112 |             if view_range:
113 |                 raise ToolError(
114 |                     "The `view_range` parameter is not allowed when `path` points to a directory."
115 |                 )
116 | 
117 |             _, stdout, stderr = await run(
118 |                 rf"find {path} -maxdepth 2 -not -path '*/\.*'"
119 |             )
120 |             if not stderr:
121 |                 stdout = f"Here's the files and directories up to 2 levels deep in {path}, excluding hidden items:\n{stdout}\n"
122 |             return CLIResult(output=stdout, error=stderr)
123 | 
124 |         file_content = self.read_file(path)
125 |         init_line = 1
126 |         if view_range:
127 |             if len(view_range) != 2 or not all(isinstance(i, int) for i in view_range):
128 |                 raise ToolError(
129 |                     "Invalid `view_range`. It should be a list of two integers."
130 |                 )
131 |             file_lines = file_content.split("\n")
132 |             n_lines_file = len(file_lines)
133 |             init_line, final_line = view_range
134 |             if init_line < 1 or init_line > n_lines_file:
135 |                 raise ToolError(
136 |                     f"Invalid `view_range`: {view_range}. It's first element `{init_line}` should be within the range of lines of the file: {[1, n_lines_file]}"
137 |                 )
138 |             if final_line > n_lines_file:
139 |                 raise ToolError(
140 |                     f"Invalid `view_range`: {view_range}. It's second element `{final_line}` should be smaller than the number of lines in the file: `{n_lines_file}`"
141 |                 )
142 |             if final_line != -1 and final_line < init_line:
143 |                 raise ToolError(
144 |                     f"Invalid `view_range`: {view_range}. It's second element `{final_line}` should be larger or equal than its first `{init_line}`"
145 |                 )
146 | 
147 |             if final_line == -1:
148 |                 file_content = "\n".join(file_lines[init_line - 1 :])
149 |             else:
150 |                 file_content = "\n".join(file_lines[init_line - 1 : final_line])
151 | 
152 |         return CLIResult(
153 |             output=self._make_output(file_content, str(path), init_line=init_line)
154 |         )
155 | 
156 |     def str_replace(self, path: Path, old_str: str, new_str: str | None):
157 |         """Implement the str_replace command, which replaces old_str with new_str in the file content"""
158 |         # Read the file content
159 |         file_content = self.read_file(path).expandtabs()
160 |         old_str = old_str.expandtabs()
161 |         new_str = new_str.expandtabs() if new_str is not None else ""
162 | 
163 |         # Check if old_str is unique in the file
164 |         occurrences = file_content.count(old_str)
165 |         if occurrences == 0:
166 |             raise ToolError(
167 |                 f"No replacement was performed, old_str `{old_str}` did not appear verbatim in {path}."
168 |             )
169 |         elif occurrences > 1:
170 |             file_content_lines = file_content.split("\n")
171 |             lines = [
172 |                 idx + 1
173 |                 for idx, line in enumerate(file_content_lines)
174 |                 if old_str in line
175 |             ]
176 |             raise ToolError(
177 |                 f"No replacement was performed. Multiple occurrences of old_str `{old_str}` in lines {lines}. Please ensure it is unique"
178 |             )
179 | 
180 |         # Replace old_str with new_str
181 |         new_file_content = file_content.replace(old_str, new_str)
182 | 
183 |         # Write the new content to the file
184 |         self.write_file(path, new_file_content)
185 | 
186 |         # Save the content to history
187 |         self._file_history[path].append(file_content)
188 | 
189 |         # Create a snippet of the edited section
190 |         replacement_line = file_content.split(old_str)[0].count("\n")
191 |         start_line = max(0, replacement_line - SNIPPET_LINES)
192 |         end_line = replacement_line + SNIPPET_LINES + new_str.count("\n")
193 |         snippet = "\n".join(new_file_content.split("\n")[start_line : end_line + 1])
194 | 
195 |         # Prepare the success message
196 |         success_msg = f"The file {path} has been edited. "
197 |         success_msg += self._make_output(
198 |             snippet, f"a snippet of {path}", start_line + 1
199 |         )
200 |         success_msg += "Review the changes and make sure they are as expected. Edit the file again if necessary."
201 | 
202 |         return CLIResult(output=success_msg)
203 | 
204 |     def insert(self, path: Path, insert_line: int, new_str: str):
205 |         """Implement the insert command, which inserts new_str at the specified line in the file content."""
206 |         file_text = self.read_file(path).expandtabs()
207 |         new_str = new_str.expandtabs()
208 |         file_text_lines = file_text.split("\n")
209 |         n_lines_file = len(file_text_lines)
210 | 
211 |         if insert_line < 0 or insert_line > n_lines_file:
212 |             raise ToolError(
213 |                 f"Invalid `insert_line` parameter: {insert_line}. It should be within the range of lines of the file: {[0, n_lines_file]}"
214 |             )
215 | 
216 |         new_str_lines = new_str.split("\n")
217 |         new_file_text_lines = (
218 |             file_text_lines[:insert_line]
219 |             + new_str_lines
220 |             + file_text_lines[insert_line:]
221 |         )
222 |         snippet_lines = (
223 |             file_text_lines[max(0, insert_line - SNIPPET_LINES) : insert_line]
224 |             + new_str_lines
225 |             + file_text_lines[insert_line : insert_line + SNIPPET_LINES]
226 |         )
227 | 
228 |         new_file_text = "\n".join(new_file_text_lines)
229 |         snippet = "\n".join(snippet_lines)
230 | 
231 |         self.write_file(path, new_file_text)
232 |         self._file_history[path].append(file_text)
233 | 
234 |         success_msg = f"The file {path} has been edited. "
235 |         success_msg += self._make_output(
236 |             snippet,
237 |             "a snippet of the edited file",
238 |             max(1, insert_line - SNIPPET_LINES + 1),
239 |         )
240 |         success_msg += "Review the changes and make sure they are as expected (correct indentation, no duplicate lines, etc). Edit the file again if necessary."
241 |         return CLIResult(output=success_msg)
242 | 
243 |     def undo_edit(self, path: Path):
244 |         """Implement the undo_edit command."""
245 |         if not self._file_history[path]:
246 |             raise ToolError(f"No edit history found for {path}.")
247 | 
248 |         old_text = self._file_history[path].pop()
249 |         self.write_file(path, old_text)
250 | 
251 |         return CLIResult(
252 |             output=f"Last edit to {path} undone successfully. {self._make_output(old_text, str(path))}"
253 |         )
254 | 
255 |     def read_file(self, path: Path):
256 |         """Read the content of a file from a given path; raise a ToolError if an error occurs."""
257 |         try:
258 |             return path.read_text()
259 |         except Exception as e:
260 |             raise ToolError(f"Ran into {e} while trying to read {path}") from None
261 | 
262 |     def write_file(self, path: Path, file: str):
263 |         """Write the content of a file to a given path; raise a ToolError if an error occurs."""
264 |         try:
265 |             path.write_text(file)
266 |         except Exception as e:
267 |             raise ToolError(f"Ran into {e} while trying to write to {path}") from None
268 | 
269 |     def _make_output(
270 |         self,
271 |         file_content: str,
272 |         file_descriptor: str,
273 |         init_line: int = 1,
274 |         expand_tabs: bool = True,
275 |     ):
276 |         """Generate output for the CLI based on the content of a file."""
277 |         file_content = maybe_truncate(file_content)
278 |         if expand_tabs:
279 |             file_content = file_content.expandtabs()
280 |         file_content = "\n".join(
281 |             [
282 |                 f"{i + init_line:6}\t{line}"
283 |                 for i, line in enumerate(file_content.split("\n"))
284 |             ]
285 |         )
286 |         return (
287 |             f"Here's the result of running `cat -n` on {file_descriptor}:\n"
288 |             + file_content
289 |             + "\n"
290 |         )
291 | 


--------------------------------------------------------------------------------
/computer-use-demo/computer_use_demo/tools/run.py:
--------------------------------------------------------------------------------
 1 | """Utility to run shell commands asynchronously with a timeout."""
 2 | 
 3 | import asyncio
 4 | 
 5 | TRUNCATED_MESSAGE: str = "<response clipped><NOTE>To save on context only part of this file has been shown to you. You should retry this tool after you have searched inside the file with `grep -n` in order to find the line numbers of what you are looking for.</NOTE>"
 6 | MAX_RESPONSE_LEN: int = 16000
 7 | 
 8 | 
 9 | def maybe_truncate(content: str, truncate_after: int | None = MAX_RESPONSE_LEN):
10 |     """Truncate content and append a notice if content exceeds the specified length."""
11 |     return (
12 |         content
13 |         if not truncate_after or len(content) <= truncate_after
14 |         else content[:truncate_after] + TRUNCATED_MESSAGE
15 |     )
16 | 
17 | 
18 | async def run(
19 |     cmd: str,
20 |     timeout: float | None = 120.0,  # seconds
21 |     truncate_after: int | None = MAX_RESPONSE_LEN,
22 | ):
23 |     """Run a shell command asynchronously with a timeout."""
24 |     process = await asyncio.create_subprocess_shell(
25 |         cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
26 |     )
27 | 
28 |     try:
29 |         stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=timeout)
30 |         return (
31 |             process.returncode or 0,
32 |             maybe_truncate(stdout.decode(), truncate_after=truncate_after),
33 |             maybe_truncate(stderr.decode(), truncate_after=truncate_after),
34 |         )
35 |     except asyncio.TimeoutError as exc:
36 |         try:
37 |             process.kill()
38 |         except ProcessLookupError:
39 |             pass
40 |         raise TimeoutError(
41 |             f"Command '{cmd}' timed out after {timeout} seconds"
42 |         ) from exc
43 | 


--------------------------------------------------------------------------------
/computer-use-demo/dev-requirements.txt:
--------------------------------------------------------------------------------
1 | -r computer_use_demo/requirements.txt
2 | ruff==0.6.7
3 | pre-commit==3.8.0
4 | pytest==8.3.3
5 | pytest-asyncio==0.23.6
6 | # don't use this file, wrong version


--------------------------------------------------------------------------------
/computer-use-demo/image/.config/tint2/applications/firefox-custom.desktop:
--------------------------------------------------------------------------------
1 | [Desktop Entry]
2 | Name=Firefox Custom
3 | Comment=Open Firefox with custom URL
4 | Exec=firefox-esr -new-window
5 | Icon=firefox-esr
6 | Terminal=false
7 | Type=Application
8 | Categories=Network;WebBrowser;
9 | 


--------------------------------------------------------------------------------
/computer-use-demo/image/.config/tint2/applications/gedit.desktop:
--------------------------------------------------------------------------------
1 | [Desktop Entry]
2 | Name=Gedit
3 | Comment=Open gedit
4 | Exec=gedit
5 | Icon=text-editor-symbolic
6 | Terminal=false
7 | Type=Application
8 | Categories=TextEditor;
9 | 


--------------------------------------------------------------------------------
/computer-use-demo/image/.config/tint2/applications/terminal.desktop:
--------------------------------------------------------------------------------
1 | [Desktop Entry]
2 | Name=Terminal
3 | Comment=Open Terminal
4 | Exec=xterm
5 | Icon=utilities-terminal
6 | Terminal=false
7 | Type=Application
8 | Categories=System;TerminalEmulator;
9 | 


--------------------------------------------------------------------------------
/computer-use-demo/image/.config/tint2/tint2rc:
--------------------------------------------------------------------------------
  1 | #-------------------------------------
  2 | # Panel
  3 | panel_items = TL
  4 | panel_size = 100% 60
  5 | panel_margin = 0 0
  6 | panel_padding = 2 0 2
  7 | panel_background_id = 1
  8 | wm_menu = 0
  9 | panel_dock = 0
 10 | panel_position = bottom center horizontal
 11 | panel_layer = top
 12 | panel_monitor = all
 13 | panel_shrink = 0
 14 | autohide = 0
 15 | autohide_show_timeout = 0
 16 | autohide_hide_timeout = 0.5
 17 | autohide_height = 2
 18 | strut_policy = follow_size
 19 | panel_window_name = tint2
 20 | disable_transparency = 1
 21 | mouse_effects = 1
 22 | font_shadow = 0
 23 | mouse_hover_icon_asb = 100 0 10
 24 | mouse_pressed_icon_asb = 100 0 0
 25 | scale_relative_to_dpi = 0
 26 | scale_relative_to_screen_height = 0
 27 | 
 28 | #-------------------------------------
 29 | # Taskbar
 30 | taskbar_mode = single_desktop
 31 | taskbar_hide_if_empty = 0
 32 | taskbar_padding = 0 0 2
 33 | taskbar_background_id = 0
 34 | taskbar_active_background_id = 0
 35 | taskbar_name = 1
 36 | taskbar_hide_inactive_tasks = 0
 37 | taskbar_hide_different_monitor = 0
 38 | taskbar_hide_different_desktop = 0
 39 | taskbar_always_show_all_desktop_tasks = 0
 40 | taskbar_name_padding = 4 2
 41 | taskbar_name_background_id = 0
 42 | taskbar_name_active_background_id = 0
 43 | taskbar_name_font_color = #e3e3e3 100
 44 | taskbar_name_active_font_color = #ffffff 100
 45 | taskbar_distribute_size = 0
 46 | taskbar_sort_order = none
 47 | task_align = left
 48 | 
 49 | #-------------------------------------
 50 | # Launcher
 51 | launcher_padding = 4 8 4
 52 | launcher_background_id = 0
 53 | launcher_icon_background_id = 0
 54 | launcher_icon_size = 48
 55 | launcher_icon_asb = 100 0 0
 56 | launcher_icon_theme_override = 0
 57 | startup_notifications = 1
 58 | launcher_tooltip = 1
 59 | 
 60 | #-------------------------------------
 61 | # Launcher icon
 62 | launcher_item_app = /usr/share/applications/libreoffice-calc.desktop
 63 | launcher_item_app = /home/computeruse/.config/tint2/applications/terminal.desktop
 64 | launcher_item_app = /home/computeruse/.config/tint2/applications/firefox-custom.desktop
 65 | launcher_item_app = /usr/share/applications/xpaint.desktop
 66 | launcher_item_app = /usr/share/applications/xpdf.desktop
 67 | launcher_item_app = /home/computeruse/.config/tint2/applications/gedit.desktop
 68 | launcher_item_app = /usr/share/applications/galculator.desktop
 69 | 
 70 | #-------------------------------------
 71 | # Background definitions
 72 | # ID 1
 73 | rounded = 0
 74 | border_width = 0
 75 | background_color = #000000 60
 76 | border_color = #000000 30
 77 | 
 78 | # ID 2
 79 | rounded = 4
 80 | border_width = 1
 81 | background_color = #777777 20
 82 | border_color = #777777 30
 83 | 
 84 | # ID 3
 85 | rounded = 4
 86 | border_width = 1
 87 | background_color = #777777 20
 88 | border_color = #ffffff 40
 89 | 
 90 | # ID 4
 91 | rounded = 4
 92 | border_width = 1
 93 | background_color = #aa4400 100
 94 | border_color = #aa7733 100
 95 | 
 96 | # ID 5
 97 | rounded = 4
 98 | border_width = 1
 99 | background_color = #aaaa00 100
100 | border_color = #aaaa00 100
101 | 


--------------------------------------------------------------------------------
/computer-use-demo/image/.streamlit/config.toml:
--------------------------------------------------------------------------------
 1 | [server]
 2 | fileWatcherType = "auto"
 3 | runOnSave = true
 4 | 
 5 | [browser]
 6 | gatherUsageStats = false
 7 | 
 8 | [theme]
 9 | primaryColor="#4361ee"
10 | backgroundColor="#f8f9fa"
11 | secondaryBackgroundColor="#e9ecef"
12 | textColor="#212529"
13 | font="sans serif"
14 | 


--------------------------------------------------------------------------------
/computer-use-demo/image/entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | ./start_all.sh
 5 | ./novnc_startup.sh
 6 | 
 7 | python http_server.py > /tmp/server_logs.txt 2>&1 &
 8 | 
 9 | # Run browserbase.py and redirect its output to both a file and stderr
10 | python /home/computeruse/computer_use_demo/tools/browserbase.py 2>&1 | tee /tmp/browserbase_logs.txt >&2 &
11 | 
12 | # Run the new script to open the debugger URL
13 | ./open_debugger.sh &
14 | 
15 | STREAMLIT_SERVER_PORT=8501 python -m streamlit run computer_use_demo/streamlit.py > /tmp/streamlit_stdout.log &
16 | 
17 | echo "✨ Computer Use Demo is ready!"
18 | echo "➡️  Open http://localhost:8080 in your browser to begin"
19 | 
20 | # Keep the container running
21 | tail -f /dev/null


--------------------------------------------------------------------------------
/computer-use-demo/image/http_server.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import socket
 3 | from http.server import HTTPServer, SimpleHTTPRequestHandler
 4 | 
 5 | 
 6 | class HTTPServerV6(HTTPServer):
 7 |     address_family = socket.AF_INET6
 8 | 
 9 | 
10 | def run_server():
11 |     os.chdir(os.path.dirname(__file__) + "/static_content")
12 |     server_address = ("::", 8080)
13 |     httpd = HTTPServerV6(server_address, SimpleHTTPRequestHandler)
14 |     print("Starting HTTP server on port 8080...")  # noqa: T201
15 |     httpd.serve_forever()
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     run_server()
20 | 


--------------------------------------------------------------------------------
/computer-use-demo/image/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |     <head>
 4 |         <title>Computer Use Demo</title>
 5 |         <meta name="permissions-policy" content="fullscreen=*" />
 6 |         <style>
 7 |             body {
 8 |                 margin: 0;
 9 |                 padding: 0;
10 |                 overflow: hidden;
11 |             }
12 |             .container {
13 |                 display: flex;
14 |                 height: 100vh;
15 |                 width: 100vw;
16 |             }
17 |             .left {
18 |                 flex: 1;
19 |                 border: none;
20 |                 height: 100vh;
21 |             }
22 |             .right {
23 |                 flex: 2;
24 |                 border: none;
25 |                 height: 100vh;
26 |             }
27 |         </style>
28 |     </head>
29 |     <body>
30 |         <div class="container">
31 |             <iframe
32 |                 src="http://localhost:8501"
33 |                 class="left"
34 |                 allow="fullscreen"
35 |             ></iframe>
36 |             <iframe
37 |                 src="http://localhost:6080/vnc.html?view_only=1&autoconnect=1&resize=scale"
38 |                 class="right"
39 |                 allow="fullscreen"
40 |             ></iframe>
41 |         </div>
42 |     </body>
43 | </html>
44 | 


--------------------------------------------------------------------------------
/computer-use-demo/image/mutter_startup.sh:
--------------------------------------------------------------------------------
 1 | echo "starting mutter"
 2 | XDG_SESSION_TYPE=x11 mutter --replace --sm-disable 2>/tmp/mutter_stderr.log &
 3 | 
 4 | # Wait for tint2 window properties to appear
 5 | timeout=30
 6 | while [ $timeout -gt 0 ]; do
 7 |     if xdotool search --class "mutter" >/dev/null 2>&1; then
 8 |         break
 9 |     fi
10 |     sleep 1
11 |     ((timeout--))
12 | done
13 | 
14 | if [ $timeout -eq 0 ]; then
15 |     echo "mutter stderr output:" >&2
16 |     cat /tmp/mutter_stderr.log >&2
17 |     exit 1
18 | fi
19 | 
20 | rm /tmp/mutter_stderr.log
21 | 


--------------------------------------------------------------------------------
/computer-use-demo/image/novnc_startup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | echo "starting noVNC"
 3 | 
 4 | # Start noVNC with explicit websocket settings
 5 | /opt/noVNC/utils/novnc_proxy \
 6 |     --vnc localhost:5900 \
 7 |     --listen 6080 \
 8 |     --web /opt/noVNC \
 9 |     > /tmp/novnc.log 2>&1 &
10 | 
11 | # Wait for noVNC to start
12 | timeout=10
13 | while [ $timeout -gt 0 ]; do
14 |     if netstat -tuln | grep -q ":6080 "; then
15 |         break
16 |     fi
17 |     sleep 1
18 |     ((timeout--))
19 | done
20 | 
21 | echo "noVNC started successfully"
22 | 


--------------------------------------------------------------------------------
/computer-use-demo/image/open_debugger.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Wait for the URL file to be created
 4 | while [ ! -f /tmp/debugger_url.txt ]; do
 5 |     sleep 1
 6 | done
 7 | 
 8 | # Read the URL from the file
 9 | URL=$(cat /tmp/debugger_url.txt)
10 | 
11 | # Open the URL in Firefox
12 | # DISPLAY=:1 firefox-esr "$URL"
13 | 
14 | # Open the URL using w3m in the background
15 | # DISPLAY=:1 xterm -e "w3m '$URL'" &
16 | 
17 | # Open the URL using curl and display it in less
18 | # DISPLAY=:1 xterm -e "curl -s '$URL' | less" &
19 | 
20 | # Print the URL to the console
21 | echo "Debugger URL: $URL"
22 | 
23 | # Open Firefox in kiosk mode
24 | DISPLAY=:1 firefox-esr --kiosk "$URL" &
25 | # DISPLAY=:1 firefox-esr --fullscreen "$URL" &


--------------------------------------------------------------------------------
/computer-use-demo/image/start_all.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | export DISPLAY=:${DISPLAY_NUM}
 6 | ./xvfb_startup.sh
 7 | ./tint2_startup.sh
 8 | ./mutter_startup.sh
 9 | ./x11vnc_startup.sh
10 | 


--------------------------------------------------------------------------------
/computer-use-demo/image/static_content/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |     <head>
 4 |         <title>Computer Use Demo</title>
 5 |         <meta name="permissions-policy" content="fullscreen=*" />
 6 |         <style>
 7 |             body {
 8 |                 margin: 0;
 9 |                 padding: 0;
10 |                 overflow: hidden;
11 |             }
12 |             .container {
13 |                 display: flex;
14 |                 height: 100vh;
15 |                 width: 100vw;
16 |             }
17 |             .left {
18 |                 flex: 1;
19 |                 border: none;
20 |                 height: 100vh;
21 |             }
22 |             .right {
23 |                 flex: 2;
24 |                 border: none;
25 |                 height: 100vh;
26 |             }
27 |         </style>
28 |     </head>
29 |     <body>
30 |         <div class="container">
31 |             <iframe
32 |                 src="http://localhost:8501"
33 |                 class="left"
34 |                 allow="fullscreen"
35 |             ></iframe>
36 |             <iframe
37 |                 id="vnc"
38 |                 src="http://127.0.0.1:6080/vnc.html?&resize=scale&autoconnect=1&view_only=1&reconnect=1&reconnect_delay=2000"
39 |                 class="right"
40 |                 allow="fullscreen"
41 |             ></iframe>
42 |             <button
43 |                 id="toggleViewOnly"
44 |                 style="position: absolute; top: 10px; right: 10px; z-index: 1000"
45 |             >
46 |                 Toggle Screen Control (Off)
47 |             </button>
48 |             <script>
49 |                 document
50 |                     .getElementById("toggleViewOnly")
51 |                     .addEventListener("click", function () {
52 |                         var vncIframe = document.getElementById("vnc");
53 |                         var button = document.getElementById("toggleViewOnly");
54 |                         var currentSrc = vncIframe.src;
55 |                         if (currentSrc.includes("view_only=1")) {
56 |                             vncIframe.src = currentSrc.replace(
57 |                                 "view_only=1",
58 |                                 "view_only=0",
59 |                             );
60 |                             button.innerText = "Toggle Screen Control (On)";
61 |                         } else {
62 |                             vncIframe.src = currentSrc.replace(
63 |                                 "view_only=0",
64 |                                 "view_only=1",
65 |                             );
66 |                             button.innerText = "Toggle Screen Control (Off)";
67 |                         }
68 |                     });
69 |             </script>
70 |         </div>
71 |     </body>
72 | </html>
73 | 


--------------------------------------------------------------------------------
/computer-use-demo/image/tint2_startup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | echo "starting tint2 on display :$DISPLAY_NUM ..."
 3 | 
 4 | # Start tint2 and capture its stderr
 5 | tint2 -c $HOME/.config/tint2/tint2rc 2>/tmp/tint2_stderr.log &
 6 | 
 7 | # Wait for tint2 window properties to appear
 8 | timeout=30
 9 | while [ $timeout -gt 0 ]; do
10 |     if xdotool search --class "tint2" >/dev/null 2>&1; then
11 |         break
12 |     fi
13 |     sleep 1
14 |     ((timeout--))
15 | done
16 | 
17 | if [ $timeout -eq 0 ]; then
18 |     echo "tint2 stderr output:" >&2
19 |     cat /tmp/tint2_stderr.log >&2
20 |     exit 1
21 | fi
22 | 
23 | # Remove the temporary stderr log file
24 | rm /tmp/tint2_stderr.log
25 | 


--------------------------------------------------------------------------------
/computer-use-demo/image/x11vnc_startup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | echo "starting vnc"
 3 | 
 4 | (x11vnc -display $DISPLAY \
 5 |     -forever \
 6 |     -shared \
 7 |     -wait 50 \
 8 |     -timeout 60 \
 9 |     -noxrecord \
10 |     -noxfixes \
11 |     -noxdamage \
12 |     -rfbport 5900 \
13 |     2>/tmp/x11vnc_stderr.log) &
14 | 
15 | x11vnc_pid=$!
16 | 
17 | # Wait for x11vnc to start
18 | timeout=10
19 | while [ $timeout -gt 0 ]; do
20 |     if netstat -tuln | grep -q ":5900 "; then
21 |         break
22 |     fi
23 |     sleep 1
24 |     ((timeout--))
25 | done
26 | 
27 | if [ $timeout -eq 0 ]; then
28 |     echo "x11vnc failed to start, stderr output:" >&2
29 |     cat /tmp/x11vnc_stderr.log >&2
30 |     exit 1
31 | fi
32 | 
33 | : > /tmp/x11vnc_stderr.log
34 | 
35 | # Monitor x11vnc process in the background
36 | (
37 |     while true; do
38 |         if ! kill -0 $x11vnc_pid 2>/dev/null; then
39 |             echo "x11vnc process crashed, restarting..." >&2
40 |             if [ -f /tmp/x11vnc_stderr.log ]; then
41 |                 echo "x11vnc stderr output:" >&2
42 |                 cat /tmp/x11vnc_stderr.log >&2
43 |                 rm /tmp/x11vnc_stderr.log
44 |             fi
45 |             exec "$0"
46 |         fi
47 |         sleep 5
48 |     done
49 | ) &
50 | 


--------------------------------------------------------------------------------
/computer-use-demo/image/xvfb_startup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e  # Exit on error
 3 | 
 4 | DPI=96
 5 | RES_AND_DEPTH=${WIDTH}x${HEIGHT}x24
 6 | 
 7 | # Function to check if Xvfb is already running
 8 | check_xvfb_running() {
 9 |     if [ -e /tmp/.X${DISPLAY_NUM}-lock ]; then
10 |         return 0  # Xvfb is already running
11 |     else
12 |         return 1  # Xvfb is not running
13 |     fi
14 | }
15 | 
16 | # Function to check if Xvfb is ready
17 | wait_for_xvfb() {
18 |     local timeout=10
19 |     local start_time=$(date +%s)
20 |     while ! xdpyinfo >/dev/null 2>&1; do
21 |         if [ $(($(date +%s) - start_time)) -gt $timeout ]; then
22 |             echo "Xvfb failed to start within $timeout seconds" >&2
23 |             return 1
24 |         fi
25 |         sleep 0.1
26 |     done
27 |     return 0
28 | }
29 | 
30 | # Check if Xvfb is already running
31 | if check_xvfb_running; then
32 |     echo "Xvfb is already running on display ${DISPLAY}"
33 |     exit 0
34 | fi
35 | 
36 | # Start Xvfb
37 | Xvfb $DISPLAY -ac -screen 0 $RES_AND_DEPTH -retro -dpi $DPI -nolisten tcp -nolisten unix &
38 | XVFB_PID=$!
39 | 
40 | # Wait for Xvfb to start
41 | if wait_for_xvfb; then
42 |     echo "Xvfb started successfully on display ${DISPLAY}"
43 |     echo "Xvfb PID: $XVFB_PID"
44 | else
45 |     echo "Xvfb failed to start"
46 |     kill $XVFB_PID
47 |     exit 1
48 | fi
49 | 


--------------------------------------------------------------------------------
/computer-use-demo/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.pyright]
2 | venvPath = "."
3 | venv = ".venv"
4 | useLibraryCodeForTypes = false
5 | 
6 | [tool.pytest.ini_options]
7 | pythonpath = "."
8 | asyncio_mode = "auto"
9 | 


--------------------------------------------------------------------------------
/computer-use-demo/ruff.toml:
--------------------------------------------------------------------------------
 1 | extend-exclude = [".venv"]
 2 | 
 3 | [format]
 4 | docstring-code-format = true
 5 | 
 6 | [lint]
 7 | select = [
 8 |     "A",
 9 |     "ASYNC",
10 |     "B",
11 |     "E",
12 |     "F",
13 |     "I",
14 |     "PIE",
15 |     "RUF200",
16 |     "T20",
17 |     "UP",
18 |     "W",
19 | ]
20 | 
21 | ignore = ["E501", "ASYNC230"]
22 | 
23 | [lint.isort]
24 | combine-as-imports = true
25 | 


--------------------------------------------------------------------------------
/computer-use-demo/setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | if ! command -v cargo &> /dev/null; then
 3 |     echo "Cargo (the package manager for Rust) is not present.  This is required for one of this module's dependencies."
 4 |     echo "See https://www.rust-lang.org/tools/install for installation instructions."
 5 |     exit 1
 6 | fi
 7 | 
 8 | python3 -m venv .venv
 9 | source .venv/bin/activate
10 | pip install --upgrade pip
11 | pip install -r dev-requirements.txt
12 | pre-commit install
13 | 


--------------------------------------------------------------------------------
/computer-use-demo/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from unittest import mock
 3 | 
 4 | import pytest
 5 | 
 6 | 
 7 | @pytest.fixture(autouse=True)
 8 | def mock_screen_dimensions():
 9 |     with mock.patch.dict(
10 |         os.environ, {"HEIGHT": "768", "WIDTH": "1024", "DISPLAY_NUM": "1"}
11 |     ):
12 |         yield
13 | 


--------------------------------------------------------------------------------
/computer-use-demo/tests/loop_test.py:
--------------------------------------------------------------------------------
 1 | from unittest import mock
 2 | 
 3 | from anthropic.types import TextBlock, ToolUseBlock
 4 | from anthropic.types.beta import BetaMessage, BetaMessageParam
 5 | 
 6 | from computer_use_demo.loop import APIProvider, sampling_loop
 7 | 
 8 | 
 9 | async def test_loop():
10 |     client = mock.Mock()
11 |     client.beta.messages.with_raw_response.create.return_value = mock.Mock()
12 |     client.beta.messages.with_raw_response.create.return_value.parse.side_effect = [
13 |         mock.Mock(
14 |             spec=BetaMessage,
15 |             content=[
16 |                 TextBlock(type="text", text="Hello"),
17 |                 ToolUseBlock(
18 |                     type="tool_use", id="1", name="computer", input={"action": "test"}
19 |                 ),
20 |             ],
21 |         ),
22 |         mock.Mock(spec=BetaMessage, content=[TextBlock(type="text", text="Done!")]),
23 |     ]
24 | 
25 |     tool_collection = mock.AsyncMock()
26 |     tool_collection.run.return_value = mock.Mock(
27 |         output="Tool output", error=None, base64_image=None
28 |     )
29 | 
30 |     output_callback = mock.Mock()
31 |     tool_output_callback = mock.Mock()
32 |     api_response_callback = mock.Mock()
33 | 
34 |     with mock.patch(
35 |         "computer_use_demo.loop.Anthropic", return_value=client
36 |     ), mock.patch(
37 |         "computer_use_demo.loop.ToolCollection", return_value=tool_collection
38 |     ):
39 |         messages: list[BetaMessageParam] = [{"role": "user", "content": "Test message"}]
40 |         result = await sampling_loop(
41 |             model="test-model",
42 |             provider=APIProvider.ANTHROPIC,
43 |             system_prompt_suffix="",
44 |             messages=messages,
45 |             output_callback=output_callback,
46 |             tool_output_callback=tool_output_callback,
47 |             api_response_callback=api_response_callback,
48 |             api_key="test-key",
49 |         )
50 | 
51 |         assert len(result) == 4
52 |         assert result[0] == {"role": "user", "content": "Test message"}
53 |         assert result[1]["role"] == "assistant"
54 |         assert result[2]["role"] == "user"
55 |         assert result[3]["role"] == "assistant"
56 | 
57 |         assert client.beta.messages.with_raw_response.create.call_count == 2
58 |         tool_collection.run.assert_called_once_with(
59 |             name="computer", tool_input={"action": "test"}
60 |         )
61 |         output_callback.assert_called_with(TextBlock(text="Done!", type="text"))
62 |         assert output_callback.call_count == 3
63 |         assert tool_output_callback.call_count == 1
64 |         assert api_response_callback.call_count == 2
65 | 


--------------------------------------------------------------------------------
/computer-use-demo/tests/streamlit_test.py:
--------------------------------------------------------------------------------
 1 | from unittest import mock
 2 | 
 3 | import pytest
 4 | from streamlit.testing.v1 import AppTest
 5 | 
 6 | from computer_use_demo.streamlit import Sender, TextBlock
 7 | 
 8 | 
 9 | @pytest.fixture
10 | def streamlit_app():
11 |     return AppTest.from_file("computer_use_demo/streamlit.py")
12 | 
13 | 
14 | def test_streamlit(streamlit_app: AppTest):
15 |     streamlit_app.run()
16 |     streamlit_app.text_input[1].set_value("sk-ant-0000000000000").run()
17 |     with mock.patch("computer_use_demo.loop.sampling_loop") as patch:
18 |         streamlit_app.chat_input[0].set_value("Hello").run()
19 |         assert patch.called
20 |         assert patch.call_args.kwargs["messages"] == [
21 |             {"role": Sender.USER, "content": [TextBlock(text="Hello", type="text")]}
22 |         ]
23 |         assert not streamlit_app.exception
24 | 


--------------------------------------------------------------------------------
/computer-use-demo/tests/tools/bash_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from computer_use_demo.tools.bash import BashTool, ToolError
 4 | 
 5 | 
 6 | @pytest.fixture
 7 | def bash_tool():
 8 |     return BashTool()
 9 | 
10 | 
11 | @pytest.mark.asyncio
12 | async def test_bash_tool_restart(bash_tool):
13 |     result = await bash_tool(restart=True)
14 |     assert result.system == "tool has been restarted."
15 | 
16 |     # Verify the tool can be used after restart
17 |     result = await bash_tool(command="echo 'Hello after restart'")
18 |     assert "Hello after restart" in result.output
19 | 
20 | 
21 | @pytest.mark.asyncio
22 | async def test_bash_tool_run_command(bash_tool):
23 |     result = await bash_tool(command="echo 'Hello, World!'")
24 |     assert result.output.strip() == "Hello, World!"
25 |     assert result.error == ""
26 | 
27 | 
28 | @pytest.mark.asyncio
29 | async def test_bash_tool_no_command(bash_tool):
30 |     with pytest.raises(ToolError, match="no command provided."):
31 |         await bash_tool()
32 | 
33 | 
34 | @pytest.mark.asyncio
35 | async def test_bash_tool_session_creation(bash_tool):
36 |     result = await bash_tool(command="echo 'Session created'")
37 |     assert bash_tool._session is not None
38 |     assert "Session created" in result.output
39 | 
40 | 
41 | @pytest.mark.asyncio
42 | async def test_bash_tool_session_reuse(bash_tool):
43 |     result1 = await bash_tool(command="echo 'First command'")
44 |     result2 = await bash_tool(command="echo 'Second command'")
45 | 
46 |     assert "First command" in result1.output
47 |     assert "Second command" in result2.output
48 | 
49 | 
50 | @pytest.mark.asyncio
51 | async def test_bash_tool_session_error(bash_tool):
52 |     result = await bash_tool(command="invalid_command_that_does_not_exist")
53 |     assert "command not found" in result.error
54 | 
55 | 
56 | @pytest.mark.asyncio
57 | async def test_bash_tool_non_zero_exit(bash_tool):
58 |     result = await bash_tool(command="bash -c 'exit 1'")
59 |     assert result.error.strip() == ""
60 |     assert result.output.strip() == ""
61 | 
62 | 
63 | @pytest.mark.asyncio
64 | async def test_bash_tool_timeout(bash_tool):
65 |     await bash_tool(command="echo 'Hello, World!'")
66 |     bash_tool._session._timeout = 0.1  # Set a very short timeout for testing
67 |     with pytest.raises(
68 |         ToolError,
69 |         match="timed out: bash has not returned in 0.1 seconds and must be restarted",
70 |     ):
71 |         await bash_tool(command="sleep 1")
72 | 


--------------------------------------------------------------------------------
/computer-use-demo/tests/tools/computer_test.py:
--------------------------------------------------------------------------------
  1 | from unittest.mock import AsyncMock, patch
  2 | 
  3 | import pytest
  4 | 
  5 | from computer_use_demo.tools.computer import (
  6 |     ComputerTool,
  7 |     ScalingSource,
  8 |     ToolError,
  9 |     ToolResult,
 10 | )
 11 | 
 12 | 
 13 | @pytest.fixture
 14 | def computer_tool():
 15 |     return ComputerTool()
 16 | 
 17 | 
 18 | @pytest.mark.asyncio
 19 | async def test_computer_tool_mouse_move(computer_tool):
 20 |     with patch.object(computer_tool, "shell", new_callable=AsyncMock) as mock_shell:
 21 |         mock_shell.return_value = ToolResult(output="Mouse moved")
 22 |         result = await computer_tool(action="mouse_move", coordinate=[100, 200])
 23 |         mock_shell.assert_called_once_with(
 24 |             f"{computer_tool.xdotool} mousemove --sync 100 200"
 25 |         )
 26 |         assert result.output == "Mouse moved"
 27 | 
 28 | 
 29 | @pytest.mark.asyncio
 30 | async def test_computer_tool_type(computer_tool):
 31 |     with (
 32 |         patch.object(computer_tool, "shell", new_callable=AsyncMock) as mock_shell,
 33 |         patch.object(
 34 |             computer_tool, "screenshot", new_callable=AsyncMock
 35 |         ) as mock_screenshot,
 36 |     ):
 37 |         mock_shell.return_value = ToolResult(output="Text typed")
 38 |         mock_screenshot.return_value = ToolResult(base64_image="base64_screenshot")
 39 |         result = await computer_tool(action="type", text="Hello, World!")
 40 |         assert mock_shell.call_count == 1
 41 |         assert "type --delay 12 -- 'Hello, World!'" in mock_shell.call_args[0][0]
 42 |         assert result.output == "Text typed"
 43 |         assert result.base64_image == "base64_screenshot"
 44 | 
 45 | 
 46 | @pytest.mark.asyncio
 47 | async def test_computer_tool_screenshot(computer_tool):
 48 |     with patch.object(
 49 |         computer_tool, "screenshot", new_callable=AsyncMock
 50 |     ) as mock_screenshot:
 51 |         mock_screenshot.return_value = ToolResult(base64_image="base64_screenshot")
 52 |         result = await computer_tool(action="screenshot")
 53 |         mock_screenshot.assert_called_once()
 54 |         assert result.base64_image == "base64_screenshot"
 55 | 
 56 | 
 57 | @pytest.mark.asyncio
 58 | async def test_computer_tool_scaling(computer_tool):
 59 |     computer_tool._scaling_enabled = True
 60 |     computer_tool.width = 1920
 61 |     computer_tool.height = 1080
 62 | 
 63 |     # Test scaling from API to computer
 64 |     x, y = computer_tool.scale_coordinates(ScalingSource.API, 1366, 768)
 65 |     assert x == 1920
 66 |     assert y == 1080
 67 | 
 68 |     # Test scaling from computer to API
 69 |     x, y = computer_tool.scale_coordinates(ScalingSource.COMPUTER, 1920, 1080)
 70 |     assert x == 1366
 71 |     assert y == 768
 72 | 
 73 |     # Test no scaling when disabled
 74 |     computer_tool._scaling_enabled = False
 75 |     x, y = computer_tool.scale_coordinates(ScalingSource.API, 1366, 768)
 76 |     assert x == 1366
 77 |     assert y == 768
 78 | 
 79 | 
 80 | @pytest.mark.asyncio
 81 | async def test_computer_tool_scaling_with_different_aspect_ratio(computer_tool):
 82 |     computer_tool._scaling_enabled = True
 83 |     computer_tool.width = 1920
 84 |     computer_tool.height = 1200  # 16:10 aspect ratio
 85 | 
 86 |     # Test scaling from API to computer
 87 |     x, y = computer_tool.scale_coordinates(ScalingSource.API, 1280, 800)
 88 |     assert x == 1920
 89 |     assert y == 1200
 90 | 
 91 |     # Test scaling from computer to API
 92 |     x, y = computer_tool.scale_coordinates(ScalingSource.COMPUTER, 1920, 1200)
 93 |     assert x == 1280
 94 |     assert y == 800
 95 | 
 96 | 
 97 | @pytest.mark.asyncio
 98 | async def test_computer_tool_no_scaling_for_unsupported_resolution(computer_tool):
 99 |     computer_tool._scaling_enabled = True
100 |     computer_tool.width = 4096
101 |     computer_tool.height = 2160
102 | 
103 |     # Test no scaling for unsupported resolution
104 |     x, y = computer_tool.scale_coordinates(ScalingSource.API, 4096, 2160)
105 |     assert x == 4096
106 |     assert y == 2160
107 | 
108 |     x, y = computer_tool.scale_coordinates(ScalingSource.COMPUTER, 4096, 2160)
109 |     assert x == 4096
110 |     assert y == 2160
111 | 
112 | 
113 | @pytest.mark.asyncio
114 | async def test_computer_tool_scaling_out_of_bounds(computer_tool):
115 |     computer_tool._scaling_enabled = True
116 |     computer_tool.width = 1920
117 |     computer_tool.height = 1080
118 | 
119 |     # Test scaling from API with out of bounds coordinates
120 |     with pytest.raises(ToolError, match="Coordinates .*, .* are out of bounds"):
121 |         x, y = computer_tool.scale_coordinates(ScalingSource.API, 2000, 1500)
122 | 
123 | 
124 | @pytest.mark.asyncio
125 | async def test_computer_tool_invalid_action(computer_tool):
126 |     with pytest.raises(ToolError, match="Invalid action: invalid_action"):
127 |         await computer_tool(action="invalid_action")
128 | 
129 | 
130 | @pytest.mark.asyncio
131 | async def test_computer_tool_missing_coordinate(computer_tool):
132 |     with pytest.raises(ToolError, match="coordinate is required for mouse_move"):
133 |         await computer_tool(action="mouse_move")
134 | 
135 | 
136 | @pytest.mark.asyncio
137 | async def test_computer_tool_missing_text(computer_tool):
138 |     with pytest.raises(ToolError, match="text is required for type"):
139 |         await computer_tool(action="type")
140 | 


--------------------------------------------------------------------------------
/computer-use-demo/tests/tools/edit_test.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | from unittest.mock import patch
  3 | 
  4 | import pytest
  5 | 
  6 | from computer_use_demo.tools.base import CLIResult, ToolError, ToolResult
  7 | from computer_use_demo.tools.edit import EditTool
  8 | 
  9 | 
 10 | @pytest.mark.asyncio
 11 | async def test_view_command():
 12 |     edit_tool = EditTool()
 13 | 
 14 |     # Test viewing a file that exists
 15 |     with patch("pathlib.Path.exists", return_value=True), patch(
 16 |         "pathlib.Path.is_dir", return_value=False
 17 |     ), patch("pathlib.Path.read_text") as mock_read_text:
 18 |         mock_read_text.return_value = "File content"
 19 |         result = await edit_tool(command="view", path="/test/file.txt")
 20 |         assert isinstance(result, CLIResult)
 21 |         assert result.output
 22 |         assert "File content" in result.output
 23 | 
 24 |     # Test viewing a directory
 25 |     with patch("pathlib.Path.exists", return_value=True), patch(
 26 |         "pathlib.Path.is_dir", return_value=True
 27 |     ), patch("computer_use_demo.tools.edit.run") as mock_run:
 28 |         mock_run.return_value = (None, "file1.txt\nfile2.txt", None)
 29 |         result = await edit_tool(command="view", path="/test/dir")
 30 |         assert isinstance(result, CLIResult)
 31 |         assert result.output
 32 |         assert "file1.txt" in result.output
 33 |         assert "file2.txt" in result.output
 34 | 
 35 |     # Test viewing a file with a specific range
 36 |     with patch("pathlib.Path.exists", return_value=True), patch(
 37 |         "pathlib.Path.is_dir", return_value=False
 38 |     ), patch("pathlib.Path.read_text") as mock_read_text:
 39 |         mock_read_text.return_value = "Line 1\nLine 2\nLine 3\nLine 4"
 40 |         result = await edit_tool(
 41 |             command="view", path="/test/file.txt", view_range=[2, 3]
 42 |         )
 43 |         assert isinstance(result, CLIResult)
 44 |         assert result.output
 45 |         assert "\n     2\tLine 2\n     3\tLine 3\n" in result.output
 46 | 
 47 |     # Test viewing a file with an invalid range
 48 |     with patch("pathlib.Path.exists", return_value=True), patch(
 49 |         "pathlib.Path.is_dir", return_value=False
 50 |     ), patch("pathlib.Path.read_text") as mock_read_text:
 51 |         mock_read_text.return_value = "Line 1\nLine 2\nLine 3\nLine 4"
 52 |         with pytest.raises(ToolError, match="Invalid `view_range`"):
 53 |             await edit_tool(command="view", path="/test/file.txt", view_range=[3, 2])
 54 | 
 55 |     # Test viewing a non-existent file
 56 |     with patch("pathlib.Path.exists", return_value=False):
 57 |         with pytest.raises(ToolError, match="does not exist"):
 58 |             await edit_tool(command="view", path="/nonexistent/file.txt")
 59 | 
 60 |     # Test viewing a directory with a view_range
 61 |     with patch("pathlib.Path.exists", return_value=True), patch(
 62 |         "pathlib.Path.is_dir", return_value=True
 63 |     ):
 64 |         with pytest.raises(ToolError, match="view_range` parameter is not allowed"):
 65 |             await edit_tool(command="view", path="/test/dir", view_range=[1, 2])
 66 | 
 67 | 
 68 | @pytest.mark.asyncio
 69 | async def test_create_command():
 70 |     edit_tool = EditTool()
 71 | 
 72 |     # Test creating a new file with content
 73 |     with patch("pathlib.Path.exists", return_value=False), patch(
 74 |         "pathlib.Path.write_text"
 75 |     ) as mock_write_text:
 76 |         result = await edit_tool(
 77 |             command="create", path="/test/newfile.txt", file_text="New file content"
 78 |         )
 79 |         assert isinstance(result, ToolResult)
 80 |         assert result.output
 81 |         assert "File created successfully" in result.output
 82 |         mock_write_text.assert_called_once_with("New file content")
 83 | 
 84 |     # Test attempting to create a file without content
 85 |     with patch("pathlib.Path.exists", return_value=False):
 86 |         with pytest.raises(ToolError, match="Parameter `file_text` is required"):
 87 |             await edit_tool(command="create", path="/test/newfile.txt")
 88 | 
 89 |     # Test attempting to create a file that already exists
 90 |     with patch("pathlib.Path.exists", return_value=True):
 91 |         with pytest.raises(ToolError, match="File already exists"):
 92 |             await edit_tool(
 93 |                 command="create", path="/test/existingfile.txt", file_text="Content"
 94 |             )
 95 | 
 96 | 
 97 | @pytest.mark.asyncio
 98 | async def test_str_replace_command():
 99 |     edit_tool = EditTool()
100 | 
101 |     # Test replacing a unique string in a file
102 |     with patch("pathlib.Path.exists", return_value=True), patch(
103 |         "pathlib.Path.is_dir", return_value=False
104 |     ), patch("pathlib.Path.read_text") as mock_read_text, patch(
105 |         "pathlib.Path.write_text"
106 |     ) as mock_write_text:
107 |         mock_read_text.return_value = "Original content"
108 |         result = await edit_tool(
109 |             command="str_replace",
110 |             path="/test/file.txt",
111 |             old_str="Original",
112 |             new_str="New",
113 |         )
114 |         assert isinstance(result, CLIResult)
115 |         assert result.output
116 |         assert "has been edited" in result.output
117 |         mock_write_text.assert_called_once_with("New content")
118 | 
119 |     # Test attempting to replace a non-existent string
120 |     with patch("pathlib.Path.exists", return_value=True), patch(
121 |         "pathlib.Path.is_dir", return_value=False
122 |     ), patch("pathlib.Path.read_text") as mock_read_text:
123 |         mock_read_text.return_value = "Original content"
124 |         with pytest.raises(ToolError, match="did not appear verbatim"):
125 |             await edit_tool(
126 |                 command="str_replace",
127 |                 path="/test/file.txt",
128 |                 old_str="Nonexistent",
129 |                 new_str="New",
130 |             )
131 | 
132 |     # Test attempting to replace a string that appears multiple times
133 |     with patch("pathlib.Path.exists", return_value=True), patch(
134 |         "pathlib.Path.is_dir", return_value=False
135 |     ), patch("pathlib.Path.read_text") as mock_read_text:
136 |         mock_read_text.return_value = "Test test test"
137 |         with pytest.raises(ToolError, match="Multiple occurrences"):
138 |             await edit_tool(
139 |                 command="str_replace",
140 |                 path="/test/file.txt",
141 |                 old_str="test",
142 |                 new_str="example",
143 |             )
144 | 
145 |     edit_tool._file_history.clear()
146 |     # Verify that the file history is updated after replacement
147 |     with patch("pathlib.Path.exists", return_value=True), patch(
148 |         "pathlib.Path.is_dir", return_value=False
149 |     ), patch("pathlib.Path.read_text") as mock_read_text, patch(
150 |         "pathlib.Path.write_text"
151 |     ):
152 |         mock_read_text.return_value = "Original content"
153 |         await edit_tool(
154 |             command="str_replace",
155 |             path="/test/file.txt",
156 |             old_str="Original",
157 |             new_str="New",
158 |         )
159 |         assert edit_tool._file_history[Path("/test/file.txt")] == ["Original content"]
160 | 
161 | 
162 | @pytest.mark.asyncio
163 | async def test_insert_command():
164 |     edit_tool = EditTool()
165 | 
166 |     # Test inserting a string at a valid line number
167 |     with patch("pathlib.Path.exists", return_value=True), patch(
168 |         "pathlib.Path.is_dir", return_value=False
169 |     ), patch("pathlib.Path.read_text") as mock_read_text, patch(
170 |         "pathlib.Path.write_text"
171 |     ) as mock_write_text:
172 |         mock_read_text.return_value = "Line 1\nLine 2\nLine 3"
173 |         result = await edit_tool(
174 |             command="insert", path="/test/file.txt", insert_line=2, new_str="New Line"
175 |         )
176 |         assert isinstance(result, CLIResult)
177 |         assert result.output
178 |         assert "has been edited" in result.output
179 |         mock_write_text.assert_called_once_with("Line 1\nLine 2\nNew Line\nLine 3")
180 | 
181 |     # Test inserting a string at the beginning of the file (line 0)
182 |     with patch("pathlib.Path.exists", return_value=True), patch(
183 |         "pathlib.Path.is_dir", return_value=False
184 |     ), patch("pathlib.Path.read_text") as mock_read_text, patch(
185 |         "pathlib.Path.write_text"
186 |     ) as mock_write_text:
187 |         mock_read_text.return_value = "Line 1\nLine 2"
188 |         result = await edit_tool(
189 |             command="insert",
190 |             path="/test/file.txt",
191 |             insert_line=0,
192 |             new_str="New First Line",
193 |         )
194 |         assert isinstance(result, CLIResult)
195 |         assert result.output
196 |         assert "has been edited" in result.output
197 |         mock_write_text.assert_called_once_with("New First Line\nLine 1\nLine 2")
198 | 
199 |     # Test inserting a string at the end of the file
200 |     with patch("pathlib.Path.exists", return_value=True), patch(
201 |         "pathlib.Path.is_dir", return_value=False
202 |     ), patch("pathlib.Path.read_text") as mock_read_text, patch(
203 |         "pathlib.Path.write_text"
204 |     ) as mock_write_text:
205 |         mock_read_text.return_value = "Line 1\nLine 2"
206 |         result = await edit_tool(
207 |             command="insert",
208 |             path="/test/file.txt",
209 |             insert_line=2,
210 |             new_str="New Last Line",
211 |         )
212 |         assert isinstance(result, CLIResult)
213 |         assert result.output
214 |         assert "has been edited" in result.output
215 |         mock_write_text.assert_called_once_with("Line 1\nLine 2\nNew Last Line")
216 | 
217 |     # Test attempting to insert at an invalid line number
218 |     with patch("pathlib.Path.exists", return_value=True), patch(
219 |         "pathlib.Path.is_dir", return_value=False
220 |     ), patch("pathlib.Path.read_text") as mock_read_text:
221 |         mock_read_text.return_value = "Line 1\nLine 2"
222 |         with pytest.raises(ToolError, match="Invalid `insert_line` parameter"):
223 |             await edit_tool(
224 |                 command="insert",
225 |                 path="/test/file.txt",
226 |                 insert_line=5,
227 |                 new_str="Invalid Line",
228 |             )
229 | 
230 |     # Verify that the file history is updated after insertion
231 |     edit_tool._file_history.clear()
232 |     with patch("pathlib.Path.exists", return_value=True), patch(
233 |         "pathlib.Path.is_dir", return_value=False
234 |     ), patch("pathlib.Path.read_text") as mock_read_text, patch(
235 |         "pathlib.Path.write_text"
236 |     ):
237 |         mock_read_text.return_value = "Original content"
238 |         await edit_tool(
239 |             command="insert", path="/test/file.txt", insert_line=1, new_str="New Line"
240 |         )
241 |         assert edit_tool._file_history[Path("/test/file.txt")] == ["Original content"]
242 | 
243 | 
244 | @pytest.mark.asyncio
245 | async def test_undo_edit_command():
246 |     edit_tool = EditTool()
247 | 
248 |     # Test undoing a str_replace operation
249 |     with patch("pathlib.Path.exists", return_value=True), patch(
250 |         "pathlib.Path.is_dir", return_value=False
251 |     ), patch("pathlib.Path.read_text") as mock_read_text, patch(
252 |         "pathlib.Path.write_text"
253 |     ) as mock_write_text:
254 |         mock_read_text.return_value = "Original content"
255 |         await edit_tool(
256 |             command="str_replace",
257 |             path="/test/file.txt",
258 |             old_str="Original",
259 |             new_str="New",
260 |         )
261 |         mock_read_text.return_value = "New content"
262 |         result = await edit_tool(command="undo_edit", path="/test/file.txt")
263 |         assert isinstance(result, CLIResult)
264 |         assert result.output
265 |         assert "Last edit to /test/file.txt undone successfully" in result.output
266 |         mock_write_text.assert_called_with("Original content")
267 | 
268 |     # Test undoing an insert operation
269 |     edit_tool._file_history.clear()
270 |     with patch("pathlib.Path.exists", return_value=True), patch(
271 |         "pathlib.Path.is_dir", return_value=False
272 |     ), patch("pathlib.Path.read_text") as mock_read_text, patch(
273 |         "pathlib.Path.write_text"
274 |     ) as mock_write_text:
275 |         mock_read_text.return_value = "Line 1\nLine 2"
276 |         await edit_tool(
277 |             command="insert", path="/test/file.txt", insert_line=1, new_str="New Line"
278 |         )
279 |         mock_read_text.return_value = "Line 1\nNew Line\nLine 2"
280 |         result = await edit_tool(command="undo_edit", path="/test/file.txt")
281 |         assert isinstance(result, CLIResult)
282 |         assert result.output
283 |         assert "Last edit to /test/file.txt undone successfully" in result.output
284 |         mock_write_text.assert_called_with("Line 1\nLine 2")
285 | 
286 |     # Test attempting to undo when there's no history
287 |     edit_tool._file_history.clear()
288 |     with patch("pathlib.Path.exists", return_value=True), patch(
289 |         "pathlib.Path.is_dir", return_value=False
290 |     ):
291 |         with pytest.raises(ToolError, match="No edit history found"):
292 |             await edit_tool(command="undo_edit", path="/test/file.txt")
293 | 
294 | 
295 | @pytest.mark.asyncio
296 | async def test_validate_path():
297 |     edit_tool = EditTool()
298 | 
299 |     # Test with valid absolute paths
300 |     with patch("pathlib.Path.exists", return_value=True), patch(
301 |         "pathlib.Path.is_dir", return_value=False
302 |     ):
303 |         edit_tool.validate_path("view", Path("/valid/path.txt"))
304 | 
305 |     # Test with relative paths (should raise an error)
306 |     with pytest.raises(ToolError, match="not an absolute path"):
307 |         edit_tool.validate_path("view", Path("relative/path.txt"))
308 | 
309 |     # Test with non-existent paths for non-create commands (should raise an error)
310 |     with patch("pathlib.Path.exists", return_value=False):
311 |         with pytest.raises(ToolError, match="does not exist"):
312 |             edit_tool.validate_path("view", Path("/nonexistent/file.txt"))
313 | 
314 |     # Test with existing paths for create command (should raise an error)
315 |     with patch("pathlib.Path.exists", return_value=True):
316 |         with pytest.raises(ToolError, match="File already exists"):
317 |             edit_tool.validate_path("create", Path("/existing/file.txt"))
318 | 
319 |     # Test with directory paths for non-view commands (should raise an error)
320 |     with patch("pathlib.Path.exists", return_value=True), patch(
321 |         "pathlib.Path.is_dir", return_value=True
322 |     ):
323 |         with pytest.raises(ToolError, match="is a directory"):
324 |             edit_tool.validate_path("str_replace", Path("/directory/path"))
325 | 
326 |     # Test with directory path for view command (should not raise an error)
327 |     with patch("pathlib.Path.exists", return_value=True), patch(
328 |         "pathlib.Path.is_dir", return_value=True
329 |     ):
330 |         edit_tool.validate_path("view", Path("/directory/path"))
331 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.pyright]
2 | venvPath = "computer-use-demo"
3 | venv = ".venv"
4 | useLibraryCodeForTypes = false


--------------------------------------------------------------------------------