├── agentd
├── __init__.py
├── chromium.py
├── logging_config.py
├── firefox.py
├── models.py
├── celery_worker.py
├── util.py
└── server.py
├── meta
├── vendor-data
└── meta-data
├── tests
├── __init__.py
└── test_server.py
├── .prettierignore
├── .python-version
├── .dockerignore
├── root_meta
├── meta-data
└── user-data
├── scripts
├── lint.py
└── build_docs.py
├── theme
├── enable-compositing.desktop
├── xfce4-desktop.xml
├── xsettings.xml
├── xfce4-panel.xml
└── xfwm4.xml
├── .flake8
├── conf
├── xvfb.service
├── websockify.service
├── dconf.service
├── gnome.service
├── openbox.service
├── lxqt.service
├── agentd.service
├── x11vnc.service
└── kasm
│ └── run
├── uvicorn_run
├── redis_run
├── user-data.tpl
├── docs
├── index.rst
├── Makefile
├── browser.rst
├── screenshots.rst
├── make.bat
├── conf.py
├── keyboard.rst
├── info.rst
├── mouse.rst
└── recordings.rst
├── logging_config.yaml
├── xconf_run
├── .github
└── workflows
│ ├── poetry-lint.yml
│ ├── poetry-tests.yml
│ ├── poetry-docs.yml
│ └── docker-image.yml
├── remote_install_server.sh
├── install_deps.sh
├── install_desktop.sh
├── remote_install.sh
├── LICENSE
├── pyproject.toml
├── xfce4-desktop.xml
├── pack.sh
├── pack_server.sh
├── cloudbuild_old.yaml
├── CONTRIBUTING.md
├── Makefile
├── CODE_OF_CONDUCT.md
├── cloudbuild.yaml
├── .gitignore
├── install.sh
├── base.pkr.hcl
├── server.pkr.hcl
├── README.md
├── Dockerfile
└── Dockerfile.loaded
/agentd/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/meta/vendor-data:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.prettierignore:
--------------------------------------------------------------------------------
1 | *.md
2 |
--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 3.12.4
2 |
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | .venv
2 | .github
3 | docs/
4 | .recordings
--------------------------------------------------------------------------------
/meta/meta-data:
--------------------------------------------------------------------------------
1 | instance-id: agentd
2 | local-hostname: agentd
--------------------------------------------------------------------------------
/root_meta/meta-data:
--------------------------------------------------------------------------------
1 | instance-id: agentd
2 | local-hostname: agentd
--------------------------------------------------------------------------------
/scripts/lint.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 |
3 |
4 | def main():
5 | subprocess.run(["black", "."])
6 | subprocess.run(["flake8", "."])
7 |
--------------------------------------------------------------------------------
/theme/enable-compositing.desktop:
--------------------------------------------------------------------------------
1 | [Desktop Entry]
2 | Type=Application
3 | Name=Enable xfwm4 Compositing
4 | Exec=xfconf-query -c xfwm4 -p /general/use_compositing -s true
5 | Terminal=false
6 | NoDisplay=true
--------------------------------------------------------------------------------
/root_meta/user-data:
--------------------------------------------------------------------------------
1 | #cloud-config
2 | password: ubuntu
3 | ssh_pwauth: true
4 | chpasswd:
5 | expire: false
6 | # users:
7 | # - name: agentsea
8 | # sudo: ALL=(ALL) NOPASSWD:ALL
9 | # groups: sudo
10 | # shell: /bin/bash
11 |
--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 120
3 | ignore = E203, E266, E501, W503
4 | max-complexity = 18
5 | select = B,C,E,F,W,T4,B9
6 | exclude =
7 | .git,
8 | __pycache__,
9 | build,
10 | dist,
11 | .venv,
12 | .tox,
13 | .mypy_cache,
14 | .pytest_cache,
15 | .vscode,
16 |
--------------------------------------------------------------------------------
/conf/xvfb.service:
--------------------------------------------------------------------------------
1 | [Unit]
2 | Description=X Virtual Frame Buffer Service
3 | After=network.target
4 |
5 | [Service]
6 | ExecStart=/usr/bin/Xvfb :99 -screen 0 1280x1024x24
7 | Environment="XAUTHORITY=/home/agentsea/.Xauthority" "DISPLAY=:99"
8 | User=agentsea
9 | Restart=on-failure
10 | RestartSec=2
11 |
12 | [Install]
13 | WantedBy=multi-user.target
--------------------------------------------------------------------------------
/conf/websockify.service:
--------------------------------------------------------------------------------
1 | [Unit]
2 | Description=Websockify Service
3 | After=x11vnc.service network.target xvfb.service
4 |
5 | [Service]
6 | ExecStart=/usr/bin/websockify 6080 localhost:5900
7 | Restart=on-failure
8 | User=agentsea
9 | RestartSec=11s
10 | StartLimitBurst=5
11 | StartLimitIntervalSec=60s
12 |
13 | [Install]
14 | WantedBy=multi-user.target
15 |
--------------------------------------------------------------------------------
/conf/dconf.service:
--------------------------------------------------------------------------------
1 | [Unit]
2 | Description=Apply dconf settings for GNOME
3 | Requires=gnome.service
4 | After=gnome.service
5 |
6 | [Service]
7 | Type=oneshot
8 | User=agentsea
9 | Environment="DISPLAY=:99"
10 | ExecStart=/bin/su agentsea -c "dconf write /org/gnome/initial-setup/done true"
11 | # Replace 1000 with agentsea's UID
12 |
13 | [Install]
14 | WantedBy=multi-user.target
15 |
--------------------------------------------------------------------------------
/conf/gnome.service:
--------------------------------------------------------------------------------
1 | [Unit]
2 | Description=GNOME session on Xvfb
3 | Requires=xvfb.service
4 | After=xvfb.service
5 | PartOf=xvfb.service
6 |
7 | [Service]
8 | Type=forking
9 | User=agentsea
10 | Environment="DISPLAY=:99" "XAUTHORITY=/home/agentsea/.Xauthority"
11 | ExecStart=/usr/bin/dbus-launch gnome-session
12 | ExecStop=/usr/bin/killall gnome-session
13 |
14 | [Install]
15 | WantedBy=multi-user.target
--------------------------------------------------------------------------------
/conf/openbox.service:
--------------------------------------------------------------------------------
1 | [Unit]
2 | Description=Openbox Window Manager
3 | Requires=xvfb.service
4 | After=xvfb.service network.target
5 |
6 | [Service]
7 | Type=simple
8 | User=agentsea
9 | Environment="DISPLAY=:99" "XAUTHORITY=/home/agentsea/.Xauthority"
10 | ExecStart=/usr/bin/openbox --config-file /home/agentsea/.config/openbox/rc.xml
11 | Restart=on-failure
12 | RestartSec=5
13 |
14 | [Install]
15 | WantedBy=multi-user.target
--------------------------------------------------------------------------------
/uvicorn_run:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -e
3 |
4 | echo "Test log message from uvicorn_run script" >&2
5 |
6 | cd /config/app
7 |
8 | chown -R abc:abc /config/app
9 |
10 | source ./pyenv_setup.sh
11 | source ./venv/bin/activate
12 |
13 | export DISPLAY=:1
14 |
15 | exec s6-setuidgid abc uvicorn agentd.server:app \
16 | --host 0.0.0.0 --port 8000 --log-level debug \
17 | --log-config /config/app/logging_config.yaml
--------------------------------------------------------------------------------
/conf/lxqt.service:
--------------------------------------------------------------------------------
1 | [Unit]
2 | Description=Start LXQt on Xvfb
3 | Requires=xvfb.service
4 | After=xvfb.service network.target
5 |
6 | [Service]
7 | Type=simple
8 | User=agentsea
9 | Environment="DISPLAY=:99" "XAUTHORITY=/home/agentsea/.Xauthority"
10 | Environment="DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/1001/bus"
11 | ExecStart=/usr/bin/startlxqt
12 | Restart=on-failure
13 | RestartSec=5
14 |
15 | [Install]
16 | WantedBy=multi-user.target
--------------------------------------------------------------------------------
/conf/agentd.service:
--------------------------------------------------------------------------------
1 | [Unit]
2 | Description=An agent daemon that gives AI agents desktop access
3 | After=network.target xvfb.service
4 |
5 | [Service]
6 | User=agentsea
7 | Environment="DISPLAY=:99" "XAUTHORITY=/home/agentsea/.Xauthority"
8 | WorkingDirectory=/home/agentsea/agentd
9 | ExecStart=/home/agentsea/.local/bin/uvicorn agentd.server:app --host 0.0.0.0 --port 8000 --reload
10 | Restart=always
11 |
12 | [Install]
13 | WantedBy=graphical.target
--------------------------------------------------------------------------------
/redis_run:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -e
3 | echo "Test log message from redis_run script" >&2
4 |
5 |
6 | # Log the environment variables
7 | env > /config/app/logs/redis_env.log
8 |
9 | # exec a shell to Run Redis and pipe to sed to hack a prefix
10 | exec sh -c "redis-server \
11 | --bind 0.0.0.0 \
12 | --maxmemory ${MAXMEMORY:-512mb} \
13 | --maxmemory-policy allkeys-lru \
14 | --loglevel ${LOG_LEVEL:-notice} \
15 | 2>&1 | sed 's/^/[redis] /'"
--------------------------------------------------------------------------------
/user-data.tpl:
--------------------------------------------------------------------------------
1 | #cloud-config
2 | chpasswd:
3 | list: |
4 | agentsea:sailor
5 | expire: False
6 | users:
7 | - name: agentsea
8 | ssh_authorized_keys:
9 | - {{ ssh_public_key }}
10 | sudo: ALL=(ALL) NOPASSWD:ALL
11 | groups: sudo
12 | shell: /bin/bash
13 | runcmd:
14 | - growpart /dev/sda 1
15 | - resize2fs /dev/sda1
16 | - "curl -sSL https://raw.githubusercontent.com/agentsea/agentd/main/remote_install.sh | sudo bash"
17 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | AgentD API Reference
2 | ====================
3 |
4 | The ``AgentD`` demon provides a number of HTTP endpoints for interacting with the VM via HTTP.
5 |
6 | .. toctree::
7 | :maxdepth: 2
8 | :caption: API Reference
9 |
10 | info
11 | mouse
12 | keyboard
13 | browser
14 | screenshots
15 | recordings
16 |
17 | .. toctree::
18 | :maxdepth: 1
19 | :caption: ↪
20 |
21 | Go to User Guide
22 |
--------------------------------------------------------------------------------
/scripts/build_docs.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 | import subprocess
4 |
5 |
6 | def main():
7 | # Define the build directory (this is the default for Sphinx)
8 | build_dir = "docs/_build/html"
9 |
10 | # Check if the build directory exists and remove it
11 | if os.path.exists(build_dir):
12 | shutil.rmtree(build_dir)
13 |
14 | # Now, run the Sphinx build command
15 | subprocess.run(["sphinx-build", "-b", "html", "docs/", build_dir])
16 |
17 |
18 | # This allows the script to be run from the command line
19 | if __name__ == "__main__":
20 | main()
21 |
--------------------------------------------------------------------------------
/conf/x11vnc.service:
--------------------------------------------------------------------------------
1 | [Unit]
2 | Description=x11vnc service
3 | After=display-manager.service network.target syslog.target xvfb.service
4 |
5 | [Service]
6 | Type=simple
7 | User=agentsea
8 | Environment="XAUTHORITY=/home/agentsea/.Xauthority" "DISPLAY=:99"
9 | ExecStartPre=/bin/sleep 10
10 | ExecStart=/usr/bin/x11vnc -forever -display :99 -auth /home/agentsea/.Xauthority -passwd agentsea123 -shared -verbose -rfbport 5900
11 | ExecStop=/usr/bin/killall x11vnc
12 | Restart=on-failure
13 | RestartSec=11s
14 | StartLimitBurst=5
15 | StartLimitIntervalSec=60s
16 |
17 | [Install]
18 | WantedBy=multi-user.target
19 |
--------------------------------------------------------------------------------
/logging_config.yaml:
--------------------------------------------------------------------------------
1 | version: 1
2 | disable_existing_loggers: False
3 |
4 | formatters:
5 | custom:
6 | format: "[uvicorn] %(asctime)s %(levelname)s %(message)s"
7 | datefmt: "%Y-%m-%d %H:%M:%S"
8 |
9 | handlers:
10 | console:
11 | class: logging.StreamHandler
12 | formatter: custom
13 | stream: ext://sys.stdout
14 |
15 | loggers:
16 | uvicorn:
17 | level: DEBUG
18 | handlers: [console]
19 | propagate: no
20 | uvicorn.error:
21 | level: DEBUG
22 | handlers: [console]
23 | propagate: no
24 | uvicorn.access:
25 | level: DEBUG
26 | handlers: [console]
27 | propagate: no
--------------------------------------------------------------------------------
/xconf_run:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # Debugging: show commands as they run
4 | set -x
5 |
6 | # 1) Make sure HOME points to the user’s config directory
7 | export HOME=/config
8 | export USER=abc
9 |
10 | # 2) Set DISPLAY and possibly XAUTHORITY
11 | export DISPLAY=:0
12 | export XAUTHORITY=/config/.Xauthority
13 |
14 | # Wait for xfwm4 (and the X server) to finish starting
15 | sleep 10
16 |
17 | echo "Setting compositing to true"
18 |
19 | # 3) Run xfconf-query as user "abc"
20 | exec s6-setuidgid abc xfconf-query -c xfwm4 -p /general/use_compositing -s true
21 | # exec s6-setuidgid abc xfwm4 --replace &g
22 |
23 | echo "Set compositing to true"
--------------------------------------------------------------------------------
/.github/workflows/poetry-lint.yml:
--------------------------------------------------------------------------------
1 | name: Poetry Lint
2 |
3 | on:
4 | push:
5 | branches: [ '**' ]
6 | pull_request:
7 | branches: [ '**' ]
8 |
9 | jobs:
10 | lint:
11 |
12 | runs-on: ubuntu-latest
13 |
14 | steps:
15 | - uses: actions/checkout@v2
16 |
17 | - name: Set up Python
18 | uses: actions/setup-python@v2
19 | with:
20 | python-version: '3.10'
21 |
22 | - name: Install Poetry
23 | uses: snok/install-poetry@v1
24 |
25 | - name: Install dependencies
26 | run: |
27 | poetry install
28 |
29 | - name: Run lint
30 | run: |
31 | poetry run lint
32 |
33 |
--------------------------------------------------------------------------------
/.github/workflows/poetry-tests.yml:
--------------------------------------------------------------------------------
1 | name: Poetry Tests
2 |
3 | on:
4 | push:
5 | branches: [ '**' ]
6 | pull_request:
7 | branches: [ '**' ]
8 |
9 | jobs:
10 | test:
11 |
12 | runs-on: ubuntu-latest
13 |
14 | steps:
15 | - uses: actions/checkout@v2
16 |
17 | - name: Set up Python
18 | uses: actions/setup-python@v2
19 | with:
20 | python-version: '3.10'
21 |
22 | - name: Install Poetry
23 | uses: snok/install-poetry@v1
24 |
25 | - name: Install dependencies
26 | run: |
27 | poetry install
28 |
29 | - name: Run tests
30 | uses: coactions/setup-xvfb@v1
31 | with:
32 | run: poetry run pytest
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/conf/kasm/run:
--------------------------------------------------------------------------------
1 | #!/usr/bin/with-contenv bash
2 |
3 | # Pass gpu flags if mounted
4 | if ls /dev/dri/renderD* 1> /dev/null 2>&1 && [ -z ${DISABLE_DRI+x} ] && ! which nvidia-smi; then
5 | HW3D="-hw3d"
6 | fi
7 | if [ -z ${DRINODE+x} ]; then
8 | DRINODE="/dev/dri/renderD128"
9 | fi
10 |
11 | exec s6-setuidgid abc \
12 | /usr/local/bin/Xvnc $DISPLAY \
13 | ${HW3D} \
14 | -PublicIP 127.0.0.1 \
15 | -drinode ${DRINODE} \
16 | -disableBasicAuth \
17 | -SecurityTypes None \
18 | -AlwaysShared \
19 | -http-header Cross-Origin-Embedder-Policy=require-corp \
20 | -http-header Cross-Origin-Opener-Policy=same-origin \
21 | -geometry 1280x800 \
22 | -sslOnly 0 \
23 | -RectThreads 0 \
24 | -websocketPort 6901 \
25 | -interface 0.0.0.0 \
26 | -Log *:stdout:10
--------------------------------------------------------------------------------
/docs/browser.rst:
--------------------------------------------------------------------------------
1 | Browser Operations
2 | ==================
3 |
4 | POST /open_url
5 | ^^^^^^^^^^^^^^
6 |
7 | The ``/open_url`` endpoint opens a specified URL in the Chromium browser.
8 |
9 | **Request:**
10 |
11 | .. code-block:: json
12 |
13 | {
14 | "url": "https://example.com"
15 | }
16 |
17 | Attributes:
18 |
19 | - ``url`` (str): The URL to be opened in the browser.
20 |
21 | **Response:**
22 |
23 | Returns a JSON response indicating the status of the operation.
24 |
25 | .. code-block:: json
26 |
27 | {
28 | "status": "success"
29 | }
30 |
31 | Possible ``status`` values:
32 |
33 | - ``success``: The URL was successfully opened in the browser.
34 | - ``error``: An error occurred while attempting to open the URL. An additional ``message`` field will provide details about the error.
35 |
--------------------------------------------------------------------------------
/docs/screenshots.rst:
--------------------------------------------------------------------------------
1 | Making Screenshots
2 | ===================
3 |
4 | POST /screenshot
5 | ^^^^^^^^^^^^^^^^
6 |
7 | The ``/screenshot`` endpoint captures the current screen and returns an image.
8 |
9 | **Request:**
10 |
11 | No parameters required.
12 |
13 | **Response:**
14 |
15 | Returns a JSON response containing the screenshot image encoded in base64 and the file path where the screenshot is saved.
16 |
17 | .. code-block:: json
18 |
19 | {
20 | "status": "success",
21 | "image": "base64_encoded_image",
22 | "file_path": "path/to/screenshot.png"
23 | }
24 |
25 | Possible ``status`` values:
26 |
27 | - ``success``: The screenshot was successfully captured and returned.
28 | - ``error``: An error occurred while attempting to capture the screenshot. An additional ``message`` field will provide details about the error.
29 |
--------------------------------------------------------------------------------
/remote_install_server.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Define where to clone the repository
4 | INSTALL_DIR="/home/agentsea/agentd"
5 | rm -rf $INSTALL_DIR
6 |
7 | # Clone the repository
8 | echo "Cloning repository into $INSTALL_DIR..."
9 | git clone https://github.com/agentsea/agentd.git "$INSTALL_DIR"
10 | chown -R agentsea:agentsea $INSTALL_DIR
11 |
12 | # Check if git clone was successful
13 | if [ $? -ne 0 ]; then
14 | echo "Failed to clone the repository. Please check your internet connection and repository URL."
15 | exit 1
16 | fi
17 |
18 | # Change directory to the cloned repository
19 | cd "$INSTALL_DIR"
20 |
21 | apt install -y xdotool
22 |
23 | # whoami
24 | # bash install_deps.sh
25 |
26 | # Assuming your script uses other scripts or configurations from the repo
27 | # Execute a specific script from the cloned repository
28 | echo "Installation completed."
29 |
--------------------------------------------------------------------------------
/.github/workflows/poetry-docs.yml:
--------------------------------------------------------------------------------
1 | name: Build and Deploy Sphinx Documentation
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 |
8 | jobs:
9 | build-and-deploy:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - uses: actions/checkout@v2
13 |
14 | - name: Set up Python
15 | uses: actions/setup-python@v2
16 | with:
17 | python-version: '3.10'
18 |
19 | - name: Install Poetry
20 | uses: snok/install-poetry@v1
21 |
22 | - name: Install dependencies
23 | run: |
24 | poetry install
25 |
26 | - name: Build Sphinx Documentation
27 | run: |
28 | poetry run build-docs
29 |
30 | - name: Deploy to GitHub Pages
31 | uses: peaceiris/actions-gh-pages@v3
32 | with:
33 | github_token: ${{ secrets.GITHUB_TOKEN }}
34 | publish_dir: ./docs/_build/html
35 | publish_branch: gh-pages
36 |
--------------------------------------------------------------------------------
/install_deps.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ "$(whoami)" != "agentsea" ]; then
4 | echo "This script must be run as the user 'agentsea'. Exiting..."
5 | exit 1
6 | fi
7 |
8 | # Define the path to be added
9 | PATH_TO_ADD="/home/agentsea/.local/bin"
10 |
11 | # Define the profile file
12 | PROFILE_FILE="/home/agentsea/.bashrc"
13 |
14 | # Check if the path is already in the PATH variable within the profile file
15 | if ! grep -qxF "export PATH=\"\$PATH:$PATH_TO_ADD\"" $PROFILE_FILE; then
16 | # If the path is not in the file, append the export command to the profile file
17 | echo "export PATH=\"\$PATH:$PATH_TO_ADD\"" >> $PROFILE_FILE
18 | echo "Path $PATH_TO_ADD added to PATH permanently for user agentsea."
19 | else
20 | echo "Path $PATH_TO_ADD is already in PATH for user agentsea."
21 | fi
22 |
23 | export PATH="$PATH:$PATH_TO_ADD"
24 |
25 | python3 -m pip install mss "fastapi[all]" pyautogui pynput "uvicorn[standard]" psutil
26 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 |
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | echo.
16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | echo.installed, then set the SPHINXBUILD environment variable to point
18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | echo.may add the Sphinx directory to PATH.
20 | echo.
21 | echo.If you don't have Sphinx installed, grab it from
22 | echo.https://www.sphinx-doc.org/
23 | exit /b 1
24 | )
25 |
26 | if "%1" == "" goto help
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/install_desktop.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [[ $EUID -ne 0 ]]; then
4 | echo "This script must be run as root (or with sudo). Exiting..."
5 | exit 1
6 | fi
7 |
8 | echo "creating user..."
9 | adduser --disabled-password --gecos '' agentsea
10 | chown -R agentsea:agentsea /home/agentsea
11 | echo 'agentsea ALL=(ALL) NOPASSWD:ALL' | tee /etc/sudoers.d/agentsea
12 |
13 | echo "installing base packages..."
14 | apt-get update
15 | apt-get install -y xvfb ubuntu-desktop x11vnc websockify python3-pip python3-dev python3-venv
16 | snap install chromium
17 |
18 | echo "setting up firewall..."
19 | ufw_status=$(ufw status | grep -o "inactive")
20 | if [ "$ufw_status" == "inactive" ]; then
21 | echo "UFW is inactive. Enabling..."
22 | ufw enable
23 | fi
24 |
25 | # ssh
26 | ufw allow 22/tcp
27 | ufw reload
28 |
29 |
30 | cloud-init clean --logs
31 | truncate -s 0 /etc/machine-id
32 | rm /var/lib/dbus/machine-id
33 | ln -s /etc/machine-id /var/lib/dbus/machine-id
--------------------------------------------------------------------------------
/remote_install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Define where to clone the repository
4 | INSTALL_DIR="/home/agentsea/agentd"
5 | if [ -d "$INSTALL_DIR" ]; then
6 | echo "$INSTALL_DIR already exists. Consider removing it first if you want a fresh install."
7 | exit 1
8 | fi
9 |
10 | # Clone the repository
11 | echo "Cloning repository into $INSTALL_DIR..."
12 | git clone https://github.com/agentsea/agentd.git "$INSTALL_DIR"
13 |
14 | # Check if git clone was successful
15 | if [ $? -ne 0 ]; then
16 | echo "Failed to clone the repository. Please check your internet connection and repository URL."
17 | exit 1
18 | fi
19 |
20 | # Change directory to the cloned repository
21 | cd "$INSTALL_DIR"
22 |
23 | # Assuming your script uses other scripts or configurations from the repo
24 | # Execute a specific script from the cloned repository
25 | echo "Running installation script from the cloned repository..."
26 | bash install.sh
27 |
28 | echo "Installation completed."
29 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Kentauros AI
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # For the full list of built-in configuration values, see the documentation:
4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
5 |
6 | import os
7 | import sys
8 |
9 | # -- Project information -----------------------------------------------------
10 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
11 |
12 | project = 'agentd'
13 | copyright = '2024, Kentauros AI'
14 | author = 'Kentauros AI'
15 | release = '0.1.0'
16 |
17 | extensions = [
18 | "sphinx.ext.autodoc",
19 | "sphinx.ext.viewcode",
20 | "sphinx.ext.napoleon",
21 | "recommonmark",
22 | ]
23 |
24 | source_suffix = [".rst", ".md"]
25 |
26 | templates_path = ["_templates"]
27 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
28 |
29 | # -- Options for HTML output -------------------------------------------------
30 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
31 |
32 | html_theme = "sphinx_rtd_theme"
33 |
34 | # -- Source files location ----------------------------------------------------
35 |
36 | sys.path.insert(0, os.path.abspath("../agentd"))
37 |
--------------------------------------------------------------------------------
/agentd/chromium.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 | import os
3 | import signal
4 |
5 |
6 | def is_chromium_running() -> list:
7 | """
8 | Checks if Chromium is running and returns a list of PIDs.
9 | """
10 | try:
11 | output = subprocess.check_output(["pgrep", "-f", "chromium"])
12 | return [int(pid) for pid in output.decode().strip().split("\n")]
13 | except subprocess.CalledProcessError:
14 | return []
15 |
16 |
17 | def is_chromium_window_open():
18 | try:
19 | output = subprocess.check_output(["wmctrl", "-l", "-x"])
20 | return "Chromium" in output.decode()
21 | except subprocess.CalledProcessError:
22 | return False
23 |
24 |
25 | def gracefully_terminate_chromium(pids: list):
26 | """
27 | Attempts to gracefully terminate Chromium processes given their PIDs.
28 | """
29 | for pid in pids:
30 | try:
31 | os.kill(pid, signal.SIGTERM)
32 | print(f"Sent SIGTERM to Chromium process {pid}.")
33 | except ProcessLookupError:
34 | print(f"Chromium process {pid} not found.")
35 | except Exception as e:
36 | print(f"Error terminating Chromium process {pid}: {e}")
37 |
--------------------------------------------------------------------------------
/agentd/logging_config.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import logging.config
3 |
4 | LOGGING_CONFIG = {
5 | "version": 1,
6 | "disable_existing_loggers": False,
7 | "formatters": {
8 | "api_formatter": {
9 | "format": "[api] %(asctime)s %(levelname)s [%(funcName)s]: %(message)s",
10 | "datefmt": "%Y-%m-%d %H:%M:%S",
11 | },
12 | "recording_formatter": {
13 | "format": "[recording] %(asctime)s %(levelname)s [%(funcName)s]: %(message)s",
14 | "datefmt": "%Y-%m-%d %H:%M:%S",
15 | },
16 | },
17 | "handlers": {
18 | "api_console": {
19 | "class": "logging.StreamHandler",
20 | "formatter": "api_formatter",
21 | "stream": "ext://sys.stdout",
22 | },
23 | "recording_console": {
24 | "class": "logging.StreamHandler",
25 | "formatter": "recording_formatter",
26 | "stream": "ext://sys.stdout",
27 | },
28 | },
29 | "loggers": {
30 | "api": {
31 | "handlers": ["api_console"],
32 | "level": "INFO",
33 | "propagate": False,
34 | },
35 | "recording": {
36 | "handlers": ["recording_console"],
37 | "level": "INFO",
38 | "propagate": False,
39 | },
40 | },
41 | }
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "agentd"
3 | version = "0.1.0"
4 | description = "A daemon that makes a desktop OS accessible to AI agents"
5 | authors = ["Patrick Barker "]
6 | license = "Apache 2.0"
7 | readme = "README.md"
8 |
9 | [tool.poetry.dependencies]
10 | python = "^3.12"
11 | pyautogui = "^0.9.54"
12 | mss = "^9.0.1"
13 | pynput = "^1.7.6"
14 | psutil = "^5.9.8"
15 | python-xlib = "^0.33"
16 | pillow = "^10.4.0"
17 | pyscreeze = "^1.0.1"
18 | fastapi = {version = "0.109", extras = ["all"]}
19 | tiktoken = "0.7.0"
20 | celery = "^5.4.0"
21 | celery-types = "^0.22.0"
22 | redis = "^5.2.1"
23 | taskara = "^0.1.225"
24 |
25 | [tool.poetry.group.dev.dependencies]
26 | pytest = "^8.1.0"
27 | pytest-asyncio = "^0.23.5"
28 | flake8 = "^7.0.0"
29 | black = "^24.2.0"
30 | sphinx = "^7.2.6"
31 | sphinx-rtd-theme = "^2.0.0"
32 | recommonmark = "^0.7.1"
33 |
34 | [tool.pyright]
35 | reportUnknownParameterType = false
36 | reportMissingTypeArgument = false
37 | reportUnknownMemberType = false
38 | reportUnknownVariableType = false
39 | reportUnknownArgumentType = false
40 | reportPrivateUsage = false
41 | reportMissingParameterType = false
42 |
43 | [build-system]
44 | requires = ["poetry-core"]
45 | build-backend = "poetry.core.masonry.api"
46 |
47 | [tool.poetry.scripts]
48 | build-docs = "scripts.build_docs:main"
49 | lint = "scripts.lint:main"
50 |
--------------------------------------------------------------------------------
/theme/xfce4-desktop.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/xfce4-desktop.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
--------------------------------------------------------------------------------
/docs/keyboard.rst:
--------------------------------------------------------------------------------
1 | Keyboard Operations
2 | ====================
3 |
4 | POST /type_text
5 | ^^^^^^^^^^^^^^^
6 |
7 | The ``/type_text`` endpoint simulates typing text at the current cursor location.
8 |
9 | **Request:**
10 |
11 | .. code-block:: json
12 |
13 | {
14 | "text": "Hello, world!",
15 | "min_interval": 0.05,
16 | "max_interval": 0.25
17 | }
18 |
19 | Attributes:
20 |
21 | - ``text`` (str): The text to be typed.
22 | - ``min_interval`` (float, optional): The minimum interval between key presses. Defaults to 0.05 seconds.
23 | - ``max_interval`` (float, optional): The maximum interval between key presses. Defaults to 0.25 seconds.
24 |
25 | **Response:**
26 |
27 | Returns a JSON response indicating the status of the operation.
28 |
29 | .. code-block:: json
30 |
31 | {
32 | "status": "success"
33 | }
34 |
35 | Possible ``status`` values:
36 |
37 | - ``success``: The text was successfully typed at the current cursor location.
38 | - ``error``: An error occurred while attempting to type the text. An additional ``message`` field will provide details about the error.
39 |
40 | POST /press_key
41 | ^^^^^^^^^^^^^^^
42 |
43 | The ``/press_key`` endpoint simulates pressing a key on the keyboard.
44 |
45 | **Request:**
46 |
47 | .. code-block:: json
48 |
49 | {
50 | "key": "string"
51 | }
52 |
53 | **Response:**
54 |
55 | Returns a JSON response indicating the status of the operation.
56 |
57 | .. code-block:: json
58 |
59 | {
60 | "status": "success"
61 | }
62 |
63 | Possible ``status`` values:
64 |
65 | - ``success``: The key was successfully pressed.
66 | - ``error``: An error occurred while attempting to press the key. An additional ``message`` field will provide details about the error.
67 |
--------------------------------------------------------------------------------
/theme/xsettings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
--------------------------------------------------------------------------------
/pack.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Default builder flags
4 | BUILD_QEMU=${BUILD_QEMU:-true}
5 | BUILD_EC2=${BUILD_EC2:-true}
6 | BUILD_GCE=${BUILD_GCE:-true}
7 |
8 | # Parse command-line arguments
9 | while [[ "$#" -gt 0 ]]; do
10 | case $1 in
11 | --no-qemu) BUILD_QEMU=false ;;
12 | --no-ec2) BUILD_EC2=false ;;
13 | --no-gce) BUILD_GCE=false ;;
14 | *) echo "Unknown parameter passed: $1"; exit 1 ;;
15 | esac
16 | shift
17 | done
18 |
19 | # Fetch the current GCP project ID
20 | export GCP_PROJECT_ID=$(gcloud config get-value project)
21 |
22 | # Fetch the current AWS region
23 | export AWS_REGION=$(aws configure get region)
24 |
25 | # Check if GCP_PROJECT_ID is not empty
26 | if [ -z "$GCP_PROJECT_ID" ]; then
27 | echo "GCP Project ID could not be found. Ensure you're logged in to gcloud and have a project set."
28 | exit 1
29 | fi
30 |
31 | # Check if AWS_REGION is not empty
32 | if [ -z "$AWS_REGION" ]; then
33 | echo "AWS Region could not be found. Ensure you're logged in to aws cli and have a default region set."
34 | exit 1
35 | fi
36 |
37 | rm -rf ~/.cache/packer
38 |
39 | # Initialize Packer configuration
40 | packer init base.pkr.hcl
41 |
42 | # Generate a timestamp
43 | TIMESTAMP=$(date +%Y%m%d%H%M%S)
44 |
45 | # Define the base directory for VM outputs
46 | BASE_DIR=".vms/jammy"
47 |
48 | # Create a unique output directory with the timestamp
49 | OUTPUT_DIRECTORY="${BASE_DIR}/${TIMESTAMP}"
50 |
51 | # Ensure the directory exists
52 | mkdir -p "${BASE_DIR}"
53 |
54 | # Run Packer with the current GCP project ID, AWS region, generated timestamp for version, and builder flags
55 | PACKER_LOG=1 packer build \
56 | -var 'gcp_project_id='"$GCP_PROJECT_ID" \
57 | -var 'aws_region='"$AWS_REGION" \
58 | -var 'version='"$TIMESTAMP" \
59 | -var "output_directory=${OUTPUT_DIRECTORY}" \
60 | -var 'build_qemu='"$BUILD_QEMU" \
61 | -var 'build_ec2='"$BUILD_EC2" \
62 | -var 'build_gce='"$BUILD_GCE" \
63 | base.pkr.hcl
64 |
65 | # gsutil cp .vms/jammy/latest/jammy.qcow2 gs://agentsea-vms/jammy/latest/agentd-jammy.qcow2
66 | # gsutil acl ch -u AllUsers:R gs://agentsea-vms/jammy/latest/agentd-jammy.qcow2
--------------------------------------------------------------------------------
/pack_server.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Default builder flags
4 | BUILD_QEMU=${BUILD_QEMU:-true}
5 | BUILD_EC2=${BUILD_EC2:-true}
6 | BUILD_GCE=${BUILD_GCE:-true}
7 |
8 | # Parse command-line arguments
9 | while [[ "$#" -gt 0 ]]; do
10 | case $1 in
11 | --no-qemu) BUILD_QEMU=false ;;
12 | --no-ec2) BUILD_EC2=false ;;
13 | --no-gce) BUILD_GCE=false ;;
14 | *) echo "Unknown parameter passed: $1"; exit 1 ;;
15 | esac
16 | shift
17 | done
18 |
19 | # Fetch the current GCP project ID
20 | export GCP_PROJECT_ID=$(gcloud config get-value project)
21 |
22 | # Fetch the current AWS region
23 | export AWS_REGION=$(aws configure get region)
24 |
25 | # Check if GCP_PROJECT_ID is not empty
26 | if [ -z "$GCP_PROJECT_ID" ]; then
27 | echo "GCP Project ID could not be found. Ensure you're logged in to gcloud and have a project set."
28 | exit 1
29 | fi
30 |
31 | # Check if AWS_REGION is not empty
32 | if [ -z "$AWS_REGION" ]; then
33 | echo "AWS Region could not be found. Ensure you're logged in to aws cli and have a default region set."
34 | exit 1
35 | fi
36 |
37 | rm -rf ~/.cache/packer
38 |
39 | # Initialize Packer configuration
40 | packer init server.pkr.hcl
41 |
42 | # Generate a timestamp
43 | TIMESTAMP=$(date +%Y%m%d%H%M%S)
44 |
45 | # Define the base directory for VM outputs
46 | BASE_DIR=".vms/jammy"
47 |
48 | # Create a unique output directory with the timestamp
49 | OUTPUT_DIRECTORY="${BASE_DIR}/${TIMESTAMP}"
50 |
51 | # Ensure the directory exists
52 | mkdir -p "${BASE_DIR}"
53 |
54 | # Run Packer with the current GCP project ID, AWS region, generated timestamp for version, and builder flags
55 | PACKER_LOG=1 packer build \
56 | -var 'gcp_project_id='"$GCP_PROJECT_ID" \
57 | -var 'aws_region='"$AWS_REGION" \
58 | -var 'version='"$TIMESTAMP" \
59 | -var "output_directory=${OUTPUT_DIRECTORY}" \
60 | -var 'build_qemu='"$BUILD_QEMU" \
61 | -var 'build_ec2='"$BUILD_EC2" \
62 | -var 'build_gce='"$BUILD_GCE" \
63 | server.pkr.hcl
64 |
65 | # gsutil cp .vms/jammy/latest/jammy.qcow2 gs://agentsea-vms/jammy/latest/agentd-jammy.qcow2
66 | # gsutil acl ch -u AllUsers:R gs://agentsea-vms/jammy/latest/agentd-jammy.qcow2
--------------------------------------------------------------------------------
/.github/workflows/docker-image.yml:
--------------------------------------------------------------------------------
1 | name: Docker Image CI
2 |
3 | on:
4 | push:
5 | branches:
6 | - recordings
7 |
8 | jobs:
9 | build:
10 |
11 | runs-on: ubuntu-arm64
12 |
13 | steps:
14 | - uses: actions/checkout@v4
15 |
16 | # Step: Set up Google Cloud authentication
17 | - name: Set up Google Cloud authentication
18 | uses: google-github-actions/auth@v2
19 | with:
20 | project_id: ${{ vars.PROJECT_ID }}
21 | credentials_json: ${{ secrets._GITHUB_ACTIONS_PUSH_IMAGES_KEY }}
22 |
23 | # Step 1: Set up Docker Buildx
24 | - name: Set up Docker Buildx
25 | run: |
26 | docker buildx create --name mybuilder --use
27 | docker buildx inspect --bootstrap
28 |
29 | # Step 2: Build and push for multiple architectures with caching
30 | - name: Build and Push
31 | run: |
32 | # Shorten the GitHub commit SHA (first 7 characters)
33 | SHORT_SHA=$(echo ${{ github.sha }} | cut -c1-7)
34 | echo "SHORT_SHA=${SHORT_SHA}" >> $GITHUB_ENV # Save SHORT_SHA to the environment for use in other steps
35 |
36 | docker buildx build \
37 | --platform linux/arm64 \
38 | -t us-docker.pkg.dev/${{ vars.PROJECT_ID }}/agentd/desktop-webtop:${{ env.SHORT_SHA }} \
39 | --build-arg PYTHON_VERSION=3.12.0 \
40 | --cache-from type=registry,ref=us-docker.pkg.dev/${{ vars.PROJECT_ID }}/agentd/desktop-webtop:buildcache \
41 | --cache-to type=registry,ref=us-docker.pkg.dev/${{ vars.PROJECT_ID }}/agentd/desktop-webtop:buildcache,mode=max \
42 | --push \
43 | .
44 |
45 | # Step 4: Verify the multi-arch image
46 | - name: Verify Images
47 | run: |
48 | docker buildx imagetools inspect us-docker.pkg.dev/${{ vars.PROJECT_ID }}/agentd/desktop-webtop:${{ env.SHORT_SHA }}
49 |
50 | # Optional: Set timeout and machine type (not directly supported in GitHub Actions, but can be controlled via runners)
51 | # timeout: "3600s" # GitHub actions does not directly support timeouts in YAML, can be controlled at job level.
52 | # options: machineType: "N1_HIGHCPU_32" # You would need to use a custom runner for machine type configuration.
53 |
--------------------------------------------------------------------------------
/cloudbuild_old.yaml:
--------------------------------------------------------------------------------
1 | steps:
2 | # Build for x86_64
3 | - name: "gcr.io/cloud-builders/docker"
4 | args: [
5 | "build",
6 | "--no-cache",
7 | "--pull",
8 | "--platform",
9 | "linux/amd64", # Specify platform explicitly
10 | "-f",
11 | "Dockerfile.amd64", # Your custom Dockerfile for x86_64
12 | "-t",
13 | "gcr.io/$PROJECT_ID/agentd-webtop:latest-amd64",
14 | ".",
15 | ]
16 |
17 | # Set up QEMU for ARM builds
18 | - name: "gcr.io/cloud-builders/docker"
19 | args:
20 | [
21 | "run",
22 | "--rm",
23 | "--privileged",
24 | "multiarch/qemu-user-static:register",
25 | "--reset",
26 | ]
27 |
28 | # Build for ARM64
29 | - name: "gcr.io/cloud-builders/docker"
30 | args: [
31 | "build",
32 | "--no-cache",
33 | "--pull",
34 | "--platform",
35 | "linux/arm64", # Specify platform explicitly
36 | "-f",
37 | "Dockerfile.arm64", # Your custom Dockerfile for ARM64
38 | "-t",
39 | "gcr.io/$PROJECT_ID/agentd-webtop:latest-arm64",
40 | ".",
41 | ]
42 |
43 | # Create and push a multi-arch manifest
44 | - name: "gcr.io/cloud-builders/docker"
45 | entrypoint: "bash"
46 | args:
47 | - "-c"
48 | - |
49 | echo '{ "experimental": true }' | sudo tee /etc/docker/daemon.json
50 | sudo service docker restart
51 | docker manifest create gcr.io/$PROJECT_ID/agentd-webtop:latest \
52 | gcr.io/$PROJECT_ID/agentd-webtop:latest-amd64 \
53 | gcr.io/$PROJECT_ID/agentd-webtop:latest-arm64
54 | docker manifest annotate gcr.io/$PROJECT_ID/agentd-webtop:latest \
55 | gcr.io/$PROJECT_ID/agentd-webtop:latest-amd64 --os linux --arch amd64
56 | docker manifest annotate gcr.io/$PROJECT_ID/agentd-webtop:latest \
57 | gcr.io/$PROJECT_ID/agentd-webtop:latest-arm64 --os linux --arch arm64
58 | docker manifest push gcr.io/$PROJECT_ID/agentd-webtop:latest
59 |
60 | # Images to be pushed to Google Container Registry
61 | images:
62 | - "gcr.io/$PROJECT_ID/agentd-webtop:latest-amd64"
63 | - "gcr.io/$PROJECT_ID/agentd-webtop:latest-arm64"
64 | - "gcr.io/$PROJECT_ID/agentd-webtop:latest"
65 |
66 | # Set a longer timeout for the build process (default is 10m)
67 | timeout: "3600s"
68 |
69 | # Use a larger machine type for faster builds
70 | options:
71 | machineType: "N1_HIGHCPU_8"
72 |
--------------------------------------------------------------------------------
/docs/info.rst:
--------------------------------------------------------------------------------
1 | System Information and Health
2 | =============================
3 |
4 | GET "/"
5 | ^^^^^^^
6 |
7 | The root endpoint returns a welcome message. This endpoint serves as a basic check to ensure
8 | the agent service is running and accessible.
9 |
10 | **Request:**
11 |
12 | No parameters required.
13 |
14 | **Response:**
15 |
16 | Returns a JSON response with a welcome message.
17 |
18 | .. code-block:: json
19 |
20 | {
21 | "message": "Agent in the shell"
22 | }
23 |
24 | GET /health
25 | ^^^^^^^^^^^
26 |
27 | The ``/health`` endpoint returns a health check for the agent service.
28 |
29 | **Request:**
30 |
31 | No parameters required.
32 |
33 | **Response:**
34 |
35 | Returns a JSON response with a health check.
36 |
37 | .. code-block:: json
38 |
39 | {
40 | "status": "ok"
41 | }
42 |
43 | GET /info
44 | ^^^^^^^^^
45 |
46 | The ``/info`` endpoint returns detailed information about the system where the agent is running.
47 |
48 | **Request:**
49 |
50 | No parameters required.
51 |
52 | **Response:**
53 |
54 | Returns a JSON response with the system information.
55 |
56 | .. code-block:: json
57 |
58 | {
59 | "last_activity_ts": 1625079600,
60 | "screen_size": {
61 | "x": 1920,
62 | "y": 1080
63 | },
64 | "os_info": "Linux 5.8.0-53-generic",
65 | "code_version": "a1b2c3d4"
66 | }
67 |
68 | The response includes the last activity timestamp (``last_activity_ts``), screen size (``screen_size``), operating system information (``os_info``), and the current code version (``code_version``).
69 |
70 | GET /screen_size
71 | ^^^^^^^^^^^^^^^^
72 |
73 | The ``/screen_size`` endpoint returns the current screen size of the system where the agent is running.
74 |
75 | **Request:**
76 |
77 | No parameters required.
78 |
79 | **Response:**
80 |
81 | Returns a JSON response with the screen size.
82 |
83 | .. code-block:: json
84 |
85 | {
86 | "x": 1920,
87 | "y": 1080
88 | }
89 |
90 | The response includes the width (``x``) and height (``y``) of the screen in pixels.
91 |
92 | GET /system_usage
93 | ^^^^^^^^^^^^^^^^^
94 |
95 | This endpoint retrieves the current system usage statistics.
96 |
97 | **Response:**
98 |
99 | Returns a JSON response containing the current system usage statistics including CPU, memory, and disk usage percentages.
100 |
101 | .. code-block:: json
102 |
103 | {
104 | "cpu_percent": 23.5,
105 | "memory_percent": 74.2,
106 | "disk_percent": 55.3
107 | }
108 |
109 | This endpoint allows you to monitor the health and performance of the system where the agent is running.
110 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | First off, thank you for considering contributing to this project. It's people like you that make it such a great tool.
4 |
5 | ## Code of Conduct
6 |
7 | This project adheres to a Code of Conduct that we expect project participants to adhere to. Please read [the full text](CODE_OF_CONDUCT.md) so that you can understand what actions will and will not be tolerated.
8 |
9 | ## What we are looking for
10 |
11 | This is an open-source project, and we welcome contributions of all kinds: new features, bug fixes, documentation, examples, or enhancements to existing features. We are always thrilled to receive contributions from the community.
12 |
13 | ## How to contribute
14 |
15 | If you've never contributed to an open-source project before, here are a few steps to get you started:
16 |
17 | ### Reporting Issues
18 |
19 | Before submitting a bug report or feature request, check to make sure it hasn't already been submitted. You can search through existing issues and pull requests to see if someone has reported one similar to yours.
20 |
21 | When you are creating a bug report, please include as much detail as possible.
22 |
23 | ### Pull Requests
24 |
25 | - Fork the repository and create your branch from `main`.
26 | - If you've added code that should be tested, add tests.
27 | - If you've changed APIs, update the documentation.
28 | - Ensure the test suite passes.
29 | - Make sure your code lints.
30 | - Issue that pull request!
31 |
32 | ### Getting started
33 |
34 | For something that is bigger than a one or two-line fix:
35 |
36 | 1. Create your own fork of the code.
37 | 2. Do the changes in your fork.
38 | 3. If you like the change and think the project could use it:
39 | - Be sure you have followed the code style for the project.
40 | - Note the Code of Conduct.
41 | - Send a pull request.
42 |
43 | ## How to report a bug
44 |
45 | If you find a security vulnerability, do NOT open an issue. Email github@kentauros.ai instead.
46 |
47 | In order to help us understand and resolve your issue quickly, please include as much information as possible, including:
48 |
49 | - A quick summary and/or background
50 | - Steps to reproduce
51 | - Be specific!
52 | - Give a sample code if you can.
53 | - What you expected would happen
54 | - What actually happens
55 | - Notes (possibly including why you think this might be happening or stuff you tried that didn't work)
56 |
57 | People *love* thorough bug reports. I'm not even kidding.
58 |
59 | ## How to suggest a feature or enhancement
60 |
61 | If you find yourself wishing for a feature that doesn't exist in the project, you are probably not alone. There are bound to be others out there with similar needs. Open an issue on our issues list on GitHub, which describes the feature you would like to see, why you need it, and how it should work.
62 |
63 | ## Code review process
64 |
65 | The core team looks at Pull Requests on a regular basis in a bi-weekly triage meeting. After feedback has been given, we expect responses within two weeks. After two weeks, we may close the pull request if it isn't showing any activity.
66 |
67 | ## Community
68 |
69 | Discussions about the project take place in this repository's Issues and Pull Requests sections. Anybody is welcome to join these conversations.
70 |
71 | Wherever possible, we use GitHub to discuss changes and keep the decision-making process open.
72 |
73 | ## Thank you!
74 |
75 | Thank you for contributing!
76 |
77 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | VMS_DIR := .vms
2 | JAMMY := $(VMS_DIR)/jammy.img
3 | META_DIR := ./meta
4 | TEMPLATE_FILE := user-data.tpl
5 | OUTPUT_FILE := $(META_DIR)/user-data
6 | SSH_KEY_FILE := $(shell [ -f ~/.ssh/id_rsa.pub ] && echo ~/.ssh/id_rsa.pub || echo ~/.ssh/id_ed25519.pub)
7 | JAMMY_LATEST := ./.vms/jammy/latest/jammy.qcow2
8 |
9 | $(JAMMY):
10 | @mkdir -p $(VMS_DIR)
11 | @test -f $(JAMMY) || (echo "Downloading jammy..." && curl -o $(JAMMY) https://cloud-images.ubuntu.com/jammy/current/jammy-server-cloudimg-amd64.img && echo "Download complete.")
12 | qemu-img resize $(JAMMY) +10G
13 |
14 | .PHONY: download-jammy
15 | download-jammy: $(JAMMY)
16 |
17 | .PHONY: prepare-user-data
18 | prepare-user-data:
19 | @mkdir -p $(META_DIR)
20 | @SSH_KEY=$$(cat $(SSH_KEY_FILE)); \
21 | sed "s|{{ ssh_public_key }}|$$SSH_KEY|" $(TEMPLATE_FILE) > $(OUTPUT_FILE)
22 | @echo "User-data file prepared at $(OUTPUT_FILE)."
23 |
24 | .PHONY: run-meta
25 | run-meta:
26 | python3 -m http.server 8060 --directory ./meta
27 |
28 | .PHONY: run-jammy
29 | run-jammy: prepare-user-data
30 | xorriso -as mkisofs -o cidata.iso -V "cidata" -J -r -iso-level 3 meta/
31 | qemu-system-x86_64 -nographic -hda $(JAMMY_LATEST) \
32 | -m 4G -smp 2 -netdev user,id=vmnet,hostfwd=tcp::6080-:6080,hostfwd=tcp::8000-:8000,hostfwd=tcp::2222-:22 \
33 | -device e1000,netdev=vmnet -cdrom cidata.iso
34 | # -smbios type=1,serial=ds='nocloud;s=http://10.0.2.2:8060/';
35 |
36 | .PHONY: clean
37 | clean:
38 | rm -rf $(VMS_DIR)
39 |
40 | .PHONY: pack
41 | pack: user-data
42 | ./pack.sh
43 |
44 | .PHONY: user-data
45 | user-data:
46 | # hdiutil makehybrid -o cidata.iso -hfs -joliet -iso -default-volume-name cidata root_meta/
47 | xorriso -as mkisofs -o cidata_root.iso -V "cidata" -J -r -iso-level 3 root_meta/
48 |
49 | .PHONY: push-latest
50 | push-latest:
51 | gsutil cp .vms/jammy/latest/jammy.qcow2 gs://agentsea-vms/jammy/latest/agentd-jammy.qcow2
52 | gsutil acl ch -u AllUsers:R gs://agentsea-vms/jammy/latest/agentd-jammy.qcow2
53 |
54 | .PHONY: exp-deps
55 | exp-deps:
56 | poetry export -f requirements.txt --output requirements.txt --without-hashes
57 |
58 | .PHONY: run-latest-auth
59 | run-latest-auth:
60 | docker run -d \
61 | --platform linux/arm64 \
62 | --name=webtop \
63 | --security-opt seccomp=unconfined \
64 | -e PUID=1000 \
65 | -e PGID=1000 \
66 | -e CUSTOM_USER=agentd \
67 | -e PASSWORD=agentd \
68 | -e TZ=Etc/UTC \
69 | -p 3000:3000 \
70 | -p 3001:3001 \
71 | -p 8000:8000 \
72 | --restart unless-stopped \
73 | us-docker.pkg.dev/agentsea-dev/agentd/desktop-webtop:efc7aed
74 |
75 | .PHONY: run-latest
76 | run-latest:
77 | docker run -d \
78 | --platform linux/arm64 \
79 | --name=webtop \
80 | --security-opt seccomp=unconfined \
81 | -e TZ=Etc/UTC \
82 | -p 3000:3000 \
83 | -p 3001:3001 \
84 | -p 8000:8000 \
85 | --restart unless-stopped \
86 | us-docker.pkg.dev/agentsea-dev/agentd/desktop-webtop:773b6aa
87 | # us-docker.pkg.dev/agentsea-dev/agentd/desktop-webtop:latest
88 |
89 |
90 | .PHONY: dev
91 | dev:
92 | docker run -d \
93 | --platform linux/arm64 \
94 | --name=webtop \
95 | --security-opt seccomp=unconfined \
96 | -e TZ=Etc/UTC \
97 | -p 3000:3000 \
98 | -p 3001:3001 \
99 | -p 8000:8000 \
100 | --restart unless-stopped \
101 | -v $(shell pwd)/agentd:/config/app/agentd \
102 | us-docker.pkg.dev/agentsea-dev/agentd/desktop-webtop:latest
--------------------------------------------------------------------------------
/agentd/firefox.py:
--------------------------------------------------------------------------------
1 | import os
2 | import signal
3 | import subprocess
4 |
5 |
6 | def is_firefox_running() -> list:
7 | """
8 | Checks if Firefox is running and returns a list of PIDs.
9 | """
10 | try:
11 | output = subprocess.check_output(["pgrep", "-f", "firefox"])
12 | return [int(pid) for pid in output.decode().strip().split("\n")]
13 | except subprocess.CalledProcessError:
14 | return []
15 |
16 |
17 | def is_firefox_window_open():
18 | try:
19 | output = subprocess.check_output(
20 | ["xdotool", "search", "--onlyvisible", "--class", "firefox"]
21 | )
22 | return bool(output.strip())
23 | except subprocess.CalledProcessError:
24 | return False
25 |
26 |
27 | def gracefully_terminate_firefox(pids: list):
28 | """
29 | Attempts to gracefully terminate Firefox processes given their PIDs.
30 | """
31 | for pid in pids:
32 | try:
33 | os.kill(pid, signal.SIGTERM)
34 | print(f"Sent SIGTERM to Firefox process {pid}.")
35 | except ProcessLookupError:
36 | print(f"Firefox process {pid} not found.")
37 | except Exception as e:
38 | print(f"Error terminating Firefox process {pid}: {e}")
39 |
40 |
41 | def maximize_firefox_window():
42 | """
43 | Maximizes the Firefox window by resizing it to the full screen size.
44 | """
45 | try:
46 | # Get the window ID(s) of the Firefox window(s)
47 | window_ids_output = subprocess.check_output(
48 | ["xdotool", "search", "--onlyvisible", "--class", "firefox"]
49 | )
50 | window_ids = window_ids_output.decode("utf-8").split()
51 |
52 | # Get the display geometry (screen width and height)
53 | geometry_output = subprocess.check_output(["xdotool", "getdisplaygeometry"])
54 | screen_width, screen_height = geometry_output.decode("utf-8").split()
55 |
56 | for window_id in window_ids:
57 | # Activate the window
58 | subprocess.run(
59 | ["xdotool", "windowactivate", "--sync", window_id], check=True
60 | )
61 |
62 | # Resize the window to match the screen dimensions
63 | subprocess.run(
64 | ["xdotool", "windowsize", window_id, screen_width, screen_height],
65 | check=True,
66 | )
67 |
68 | # Move the window to the top-left corner
69 | subprocess.run(["xdotool", "windowmove", window_id, "0", "0"], check=True)
70 |
71 | print(f"Maximized Firefox window with window ID {window_id}")
72 | except subprocess.CalledProcessError as e:
73 | print(f"Failed to maximize Firefox window: {e}")
74 |
75 |
76 | def close_firefox_window():
77 | """
78 | Closes the Firefox window gracefully using xdotool's windowclose command.
79 | """
80 | try:
81 | # Get the window ID(s) of the Firefox window(s)
82 | window_ids_output = subprocess.check_output(
83 | ["xdotool", "search", "--onlyvisible", "--class", "firefox"]
84 | )
85 | window_ids = window_ids_output.decode("utf-8").split()
86 |
87 | for window_id in window_ids:
88 | # Close the window
89 | subprocess.run(["xdotool", "windowclose", window_id], check=True)
90 |
91 | print(f"Closed Firefox window with window ID {window_id}")
92 | except subprocess.CalledProcessError as e:
93 | print(f"Failed to close Firefox window: {e}")
94 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation.
6 |
7 | ## Our Standards
8 |
9 | Examples of behavior that contributes to creating a positive environment include:
10 |
11 | - Using welcoming and inclusive language
12 | - Being respectful of differing viewpoints and experiences
13 | - Gracefully accepting constructive criticism
14 | - Focusing on what is best for the community
15 | - Showing empathy towards other community members
16 |
17 | Examples of unacceptable behavior by participants include:
18 |
19 | - The use of sexualized language or imagery and unwelcome sexual attention or advances
20 | - Trolling, insulting/derogatory comments, and personal or political attacks
21 | - Public or private harassment
22 | - Publishing others' private information, such as a physical or email address, without explicit permission
23 | - Other conduct which could reasonably be considered inappropriate in a professional setting
24 |
25 | ## Our Responsibilities
26 |
27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
28 |
29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned with this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
30 |
31 | ## Scope
32 |
33 | This Code of Conduct applies within all project spaces, including GitHub, and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event.
34 |
35 | ## Enforcement
36 |
37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at github@kentauros.ai. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality regarding the reporter of an incident. Further details of specific enforcement policies may be posted separately.
38 |
39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
40 |
41 | ## Attribution
42 |
43 | This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org), version 2.0, available at https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
44 |
45 | Community Impact Guidelines were inspired by [Mozilla's code of conduct enforcement ladder](https://github.com/mozilla/diversity).
46 |
47 | For answers to common questions about this code of conduct, see https://www.contributor-covenant.org/faq. Translations are available at https://www.contributor-covenant.org/translations.
48 |
--------------------------------------------------------------------------------
/cloudbuild.yaml:
--------------------------------------------------------------------------------
1 | steps:
2 | # Set up QEMU for multi-architecture support
3 | # - name: "gcr.io/cloud-builders/docker"
4 | # entrypoint: "bash"
5 | # args:
6 | # - "-c"
7 | # - |
8 | # docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
9 |
10 | # Set up Docker Buildx
11 | - name: "gcr.io/cloud-builders/docker"
12 | entrypoint: "bash"
13 | args:
14 | - "-c"
15 | - |
16 | docker buildx create --name mybuilder --use
17 | docker buildx inspect --bootstrap
18 |
19 | # Build and push for amd64 with caching
20 | - name: "gcr.io/cloud-builders/docker"
21 | id: Build and Push
22 | entrypoint: "bash"
23 | args:
24 | - "-c"
25 | - |
26 | docker buildx build \
27 | --platform linux/arm64,linux/amd64 \
28 | -t us-docker.pkg.dev/$PROJECT_ID/agentd/desktop-webtop:${SHORT_SHA} \
29 | --build-arg PYTHON_VERSION=3.12.0 \
30 | --cache-from type=registry,ref=us-docker.pkg.dev/$PROJECT_ID/agentd/desktop-webtop:buildcache \
31 | --cache-to type=registry,ref=us-docker.pkg.dev/$PROJECT_ID/agentd/desktop-webtop:buildcache,mode=max \
32 | --push \
33 | .
34 |
35 | # Build and push for amd64 with caching
36 | - name: "gcr.io/cloud-builders/docker"
37 | id: Build and Push Loaded
38 | entrypoint: "bash"
39 | args:
40 | - "-c"
41 | - |
42 | docker buildx build \
43 | --platform linux/arm64,linux/amd64 \
44 | -f Dockerfile.loaded \
45 | -t us-docker.pkg.dev/$PROJECT_ID/agentd/desktop-webtop-loaded:${SHORT_SHA} \
46 | --build-arg PYTHON_VERSION=3.12.0 \
47 | --cache-from type=registry,ref=us-docker.pkg.dev/$PROJECT_ID/agentd/desktop-webtop-loaded:buildcache \
48 | --cache-to type=registry,ref=us-docker.pkg.dev/$PROJECT_ID/agentd/desktop-webtop-loaded:buildcache,mode=max \
49 | --push \
50 | .
51 |
52 | # - name: "gcr.io/cloud-builders/docker"
53 | # id: Build and Push AMD
54 | # entrypoint: "bash"
55 | # args:
56 | # - "-c"
57 | # - |
58 | # docker buildx build \
59 | # --platform linux/amd64 \
60 | # -t us-docker.pkg.dev/$PROJECT_ID/agentd/desktop-webtop:${SHORT_SHA} \
61 | # --build-arg PYTHON_VERSION=3.12.0 \
62 | # --cache-from type=registry,ref=us-docker.pkg.dev/$PROJECT_ID/agentd/desktop-webtop:buildcache \
63 | # --cache-to type=registry,ref=us-docker.pkg.dev/$PROJECT_ID/agentd/desktop-webtop:buildcache,mode=max \
64 | # --push \
65 | # .
66 |
67 | # Verify the multi-arch image
68 | - name: "gcr.io/cloud-builders/docker"
69 | id: Verify Images
70 | entrypoint: "bash"
71 | args:
72 | - "-c"
73 | - |
74 | docker buildx imagetools inspect us-docker.pkg.dev/$PROJECT_ID/agentd/desktop-webtop:${SHORT_SHA}
75 | if [ "$BRANCH_NAME" == "main" ]; then
76 | docker buildx imagetools inspect us-docker.pkg.dev/$PROJECT_ID/agentd/desktop-webtop:latest
77 | fi
78 |
79 | # Verify the multi-arch image
80 | - name: "gcr.io/cloud-builders/docker"
81 | id: Verify Images Loaded
82 | entrypoint: "bash"
83 | args:
84 | - "-c"
85 | - |
86 | docker buildx imagetools inspect us-docker.pkg.dev/$PROJECT_ID/agentd/desktop-webtop-loaded:${SHORT_SHA}
87 | if [ "$BRANCH_NAME" == "main" ]; then
88 | docker buildx imagetools inspect us-docker.pkg.dev/$PROJECT_ID/agentd/desktop-webtop-loaded:latest
89 | fi
90 |
91 | timeout: "3600s"
92 |
93 | options:
94 | machineType: "N1_HIGHCPU_32"
95 |
--------------------------------------------------------------------------------
/agentd/models.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, List, Optional
2 | from skillpacks import V1EnvState, V1Action
3 | from pydantic import BaseModel
4 |
5 |
6 | class OpenURLModel(BaseModel):
7 | url: str
8 |
9 |
10 | class ScreenSizeModel(BaseModel):
11 | x: int
12 | y: int
13 |
14 |
15 | class SystemInfoModel(BaseModel):
16 | last_activity_ts: int | None
17 | screen_size: ScreenSizeModel
18 | os_info: str
19 | code_version: str | None
20 |
21 |
22 | class MoveMouseModel(BaseModel):
23 | x: int
24 | y: int
25 | duration: float = 1.0
26 | tween: str = "easeInOutQuad"
27 |
28 |
29 | class ClickModel(BaseModel):
30 | button: str = "left"
31 | location: Optional[MoveMouseModel] = None
32 |
33 |
34 | class TypeTextModel(BaseModel):
35 | text: str
36 | min_interval: float = 0.05
37 | max_interval: float = 0.25
38 |
39 |
40 | class PressKeyModel(BaseModel):
41 | key: str
42 |
43 |
44 | class useSecretRequest(BaseModel):
45 | token: str
46 | server_address: str
47 | name: str
48 | field: str
49 |
50 | class getSecretRequest(BaseModel):
51 | token: str
52 | server_address: str
53 | owner_id: str
54 |
55 | class PressKeysModel(BaseModel):
56 | keys: List[str]
57 |
58 |
59 | class ScrollModel(BaseModel):
60 | clicks: int = 3
61 |
62 |
63 | class DragMouseModel(BaseModel):
64 | x: int
65 | y: int
66 |
67 |
68 | class ScreenshotResponseModel(BaseModel):
69 | status: str
70 | images: List[str]
71 |
72 |
73 | class CoordinatesModel(BaseModel):
74 | x: int
75 | y: int
76 |
77 |
78 | class RecordRequest(BaseModel):
79 | description: Optional[str] = None
80 | task_id: Optional[str] = None
81 | skill_id: Optional[str] = None
82 | token: str
83 | server_address: str
84 | owner_id: str
85 |
86 | class StopRequest(BaseModel):
87 | result: Optional[str] = None
88 | comment: Optional[str] = None
89 |
90 |
91 | class RecordResponse(BaseModel):
92 | task_id: str
93 |
94 |
95 | class ClickData(BaseModel):
96 | button: str
97 | pressed: bool
98 |
99 |
100 | class KeyData(BaseModel):
101 | key: str
102 |
103 |
104 | class TextData(BaseModel):
105 | text: str
106 |
107 |
108 | class ScrollData(BaseModel):
109 | dx: int
110 | dy: int
111 |
112 | class ActionDetails(BaseModel):
113 | x: float
114 | y: float
115 | action: V1Action
116 | end_stamp: Optional[float]
117 | start_state: Optional[V1EnvState]
118 | event_order: int
119 |
120 | class RecordedEvent(BaseModel):
121 | id: str
122 | type: str
123 | timestamp: float
124 | coordinates: CoordinatesModel
125 | before_screenshot_path: Optional[str] = None
126 | after_screenshot_path: Optional[str] = None
127 | before_screenshot_b64: Optional[str] = None
128 | after_screenshot_b64: Optional[str] = None
129 | click_data: Optional[ClickData] = None
130 | key_data: Optional[KeyData] = None
131 | scroll_data: Optional[ScrollData] = None
132 | text_data: Optional[TextData] = None
133 |
134 |
135 | class Recording(BaseModel):
136 | id: str
137 | description: Optional[str] = None
138 | start_time: float
139 | end_time: float
140 | events: List[RecordedEvent] = []
141 | task_id: str
142 |
143 |
144 | class Recordings(BaseModel):
145 | recordings: List[str]
146 |
147 |
148 | class Actions(BaseModel):
149 | actions: List[Dict[str, Any]]
150 |
151 |
152 | class SystemUsageModel(BaseModel):
153 | cpu_percent: float
154 | memory_percent: float
155 | disk_percent: float
156 |
--------------------------------------------------------------------------------
/agentd/celery_worker.py:
--------------------------------------------------------------------------------
1 | from celery import Celery
2 | import requests
3 | from skillpacks import V1ActionEvent, ActionEvent
4 | from celery.app.task import Task
5 | from taskara.task import V1TaskUpdate, V1Task
6 | from taskara.task import Task as App_task
7 | Task.__class_getitem__ = classmethod(lambda cls, *args, **kwargs: cls) # type: ignore[attr-defined]
8 |
9 |
10 |
11 | # Create a new Celery application instance with the filesystem as the broker
12 | celery_app = Celery('send_actions', broker='redis://localhost:6379/0', backend='file:///config/app/celery')
13 |
14 | celery_app.conf.update(
15 | worker_concurrency=1, # Set concurrency to 1 we need to either change the data model or enable object locking to do this.
16 | task_serializer='json', # Specify the task serializer if needed
17 | worker_max_memory_per_child=512000000
18 | )
19 |
20 |
21 | @celery_app.task
22 | def send_action(taskID, auth_token, owner_id, v1Task: dict, v1actionEvent: dict):
23 | print("send_action: starting send action function in worker")
24 | action = ActionEvent.from_v1(V1ActionEvent(**v1actionEvent))
25 | print(f"send_action: action {action.id} variable created in worker process")
26 | task = App_task.from_v1_remote_actions(V1Task(**v1Task), owner_id=owner_id, auth_token=auth_token)
27 | print(f"send_action: task {task.id} variable created in worker process")
28 | try:
29 | task.record_action_event(action)
30 | except Exception as e:
31 | print(f"send_action: record_action_event failed due to error: {e} for task ID: {taskID} and action {action.action.model_dump()} and event order {action.event_order}")
32 | print(f"send_action: finished sending action {action.id} for task {task.id}")
33 | return f"send_action: finished sending action {action.id} for task {task.id}"
34 |
35 | @celery_app.task
36 | def update_task(taskID, remote_address, auth_token, v1taskupdate: dict):
37 | print("update_task: starting update task function in worker")
38 | print(f"update_task: task: {taskID} with be updated with {v1taskupdate}")
39 | # Ensure the v1taskupdate dictionary matches the Pydantic model
40 | try:
41 | updateData = V1TaskUpdate(**v1taskupdate)
42 | except Exception as e:
43 | print(f"update_task: Error while parsing update data: {e}")
44 | raise
45 |
46 | print(f"update_task: Task {taskID} update {updateData.model_dump()} created in worker process")
47 |
48 | headers = {}
49 | if auth_token:
50 | headers["Authorization"] = f"Bearer {auth_token}"
51 | else:
52 | print("update_task: Error: no auth token!!")
53 |
54 | url = f"{remote_address}/v1/tasks/{taskID}"
55 | print(url, headers, "update_task: url and headers")
56 | try:
57 | response = requests.put(url, json=updateData.model_dump(), headers=headers)
58 | try:
59 | response.raise_for_status()
60 | except requests.exceptions.HTTPError as e:
61 |
62 | print(f"update_task: HTTP Error: {e}")
63 | print(f"update_task: Status Code: {response.status_code}")
64 | try:
65 | print(f"update_task: Response Body: {response.json()}")
66 | except ValueError:
67 | print(f"update_task: Raw Response: {response.text}")
68 | raise
69 | print(f"update_task: response: {response.__dict__}")
70 | print(f"update_task: response.status_code: {response.status_code}")
71 | try:
72 | response_json = response.json()
73 | print(f"update_task: response_json: {response_json}")
74 | return response_json
75 | except ValueError:
76 | print(f"update_task: Raw Response: {response.text}")
77 | return None
78 |
79 | except requests.RequestException as e:
80 | print(f"update_task: Request failed: {e}")
81 | raise e
82 |
83 | return "Something went wrong"
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | screenshots/
2 | .recordings/
3 | .DS_Store
4 |
5 | # Byte-compiled / optimized / DLL files
6 | __pycache__/
7 | *.py[cod]
8 | *$py.class
9 |
10 | # C extensions
11 | *.so
12 |
13 | # Distribution / packaging
14 | .Python
15 | build/
16 | develop-eggs/
17 | dist/
18 | downloads/
19 | eggs/
20 | .eggs/
21 | lib/
22 | lib64/
23 | parts/
24 | sdist/
25 | var/
26 | wheels/
27 | share/python-wheels/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 | MANIFEST
32 |
33 | # PyInstaller
34 | # Usually these files are written by a python script from a template
35 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
36 | *.manifest
37 | *.spec
38 |
39 | # Installer logs
40 | pip-log.txt
41 | pip-delete-this-directory.txt
42 |
43 | # Unit test / coverage reports
44 | htmlcov/
45 | .tox/
46 | .nox/
47 | .coverage
48 | .coverage.*
49 | .cache
50 | nosetests.xml
51 | coverage.xml
52 | *.cover
53 | *.py,cover
54 | .hypothesis/
55 | .pytest_cache/
56 | cover/
57 |
58 | # Translations
59 | *.mo
60 | *.pot
61 |
62 | # Django stuff:
63 | *.log
64 | local_settings.py
65 | db.sqlite3
66 | db.sqlite3-journal
67 |
68 | # Flask stuff:
69 | instance/
70 | .webassets-cache
71 |
72 | # Scrapy stuff:
73 | .scrapy
74 |
75 | # Sphinx documentation
76 | docs/_build/
77 |
78 | # PyBuilder
79 | .pybuilder/
80 | target/
81 |
82 | # Jupyter Notebook
83 | .ipynb_checkpoints
84 |
85 | # IPython
86 | profile_default/
87 | ipython_config.py
88 |
89 | # pyenv
90 | # For a library or package, you might want to ignore these files since the code is
91 | # intended to run in multiple environments; otherwise, check them in:
92 | # .python-version
93 |
94 | # pipenv
95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
98 | # install all needed dependencies.
99 | #Pipfile.lock
100 |
101 | # poetry
102 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
103 | # This is especially recommended for binary packages to ensure reproducibility, and is more
104 | # commonly ignored for libraries.
105 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
106 | #poetry.lock
107 |
108 | # pdm
109 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
110 | #pdm.lock
111 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
112 | # in version control.
113 | # https://pdm.fming.dev/#use-with-ide
114 | .pdm.toml
115 |
116 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
117 | __pypackages__/
118 |
119 | # Celery stuff
120 | celerybeat-schedule
121 | celerybeat.pid
122 |
123 | # SageMath parsed files
124 | *.sage.py
125 |
126 | # Environments
127 | .env
128 | .venv
129 | env/
130 | venv/
131 | ENV/
132 | env.bak/
133 | venv.bak/
134 |
135 | # Spyder project settings
136 | .spyderproject
137 | .spyproject
138 |
139 | # Rope project settings
140 | .ropeproject
141 |
142 | # mkdocs documentation
143 | /site
144 |
145 | # mypy
146 | .mypy_cache/
147 | .dmypy.json
148 | dmypy.json
149 |
150 | # Pyre type checker
151 | .pyre/
152 |
153 | # pytype static type analyzer
154 | .pytype/
155 |
156 | # Cython debug symbols
157 | cython_debug/
158 |
159 | # PyCharm
160 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
161 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
162 | # and can be added to the global gitignore or merged into this file. For a more nuclear
163 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
164 | #.idea/
165 |
166 | .vms
167 | meta/user-data
168 | log
169 | output-*
170 | artifacts
171 | cidata.iso
172 | cidata_root.iso
173 | .data
--------------------------------------------------------------------------------
/agentd/util.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pwd
3 | import subprocess
4 | import threading
5 | import queue
6 |
7 |
8 | def run_as_user(command, username):
9 | # Get the user's UID and GID
10 | pw_record = pwd.getpwnam(username)
11 | user_uid = pw_record.pw_uid
12 | user_gid = pw_record.pw_gid
13 |
14 | def preexec_fn():
15 | os.setgid(user_gid)
16 | os.setuid(user_uid)
17 |
18 | return subprocess.Popen(command, preexec_fn=preexec_fn)
19 |
20 | def log_subprocess_output(pipe, sub_process):
21 | for line in iter(pipe.readline, b''): # b'\n'-separated lines
22 | if line: # Check if the line is not empty
23 | print(f'from subprocess: {sub_process} got line: {line.strip()}', flush=True)
24 |
25 | class OrderLock:
26 | """
27 | A lock that ensures threads acquire the lock in FIFO (first-in, first-out) order
28 | using queue.Queue(). Each thread places an Event in the queue and waits for
29 | its Event to be set before proceeding to acquire the internal lock.
30 |
31 | This approach automates queue management, removing the need for manual
32 | Condition objects and notify/wait calls.
33 |
34 | Usage:
35 | order_lock = OrderLock()
36 |
37 | def worker(i):
38 | print(f"Worker {i} waiting for lock")
39 | with order_lock:
40 | print(f"Worker {i} acquired lock")
41 | time.sleep(1)
42 | print(f"Worker {i} released lock")
43 |
44 | threads = [threading.Thread(target=worker, args=(i,)) for i in range(5)]
45 | for t in threads:
46 | t.start()
47 | for t in threads:
48 | t.join()
49 |
50 | Behavior:
51 | 1. Thread enqueues a threading.Event (thread’s place in line).
52 | 2. If it’s the only event in the queue, it is immediately set.
53 | 3. The thread waits on the Event until it is set, then acquires the lock.
54 | 4. On release, the thread dequeues its own Event and sets the next Event
55 | in the queue (if any), transferring ownership of the lock to that thread.
56 |
57 | Note:
58 | - This enforces strict FIFO ordering.
59 | - If you don’t need ordering, a regular threading.Lock is simpler/faster.
60 | - If you need complex ordering (e.g., priority), you’ll need a more advanced approach.
61 | """
62 |
63 | def __init__(self):
64 | # Lock for the shared resource
65 | self._resource_lock = threading.Lock()
66 | # A queue of Event objects, one per waiting thread
67 | self._queue = queue.Queue()
68 | # Internal lock to ensure enqueue/dequeue operations are atomic
69 | self._queue_lock = threading.Lock()
70 |
71 | def acquire(self):
72 | """Acquire the lock in FIFO order."""
73 | my_event = threading.Event()
74 |
75 | with self._queue_lock:
76 | self._queue.put(my_event)
77 | # If this is the only event in the queue, allow the thread to proceed
78 | if self._queue.qsize() == 1:
79 | my_event.set()
80 |
81 | # Block until my_event is set, meaning it's this thread's turn
82 | my_event.wait()
83 | self._resource_lock.acquire()
84 |
85 | def release(self):
86 | """Release the lock, notify the next waiting thread (if any)."""
87 | self._resource_lock.release()
88 |
89 | with self._queue_lock:
90 | # Remove this thread’s event from the queue
91 | finished_event = self._queue.get()
92 | # Optional: sanity check
93 | # assert finished_event.is_set()
94 |
95 | # If there is another thread waiting, set its event
96 | if not self._queue.empty():
97 | next_event = self._queue.queue[0] # Peek at the next event
98 | next_event.set()
99 |
100 | def __enter__(self):
101 | self.acquire()
102 | return self
103 |
104 | def __exit__(self, exc_type, exc_val, exc_tb):
105 | self.release()
106 |
--------------------------------------------------------------------------------
/theme/xfce4-panel.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [[ $EUID -ne 0 ]]; then
4 | echo "This script must be run as root (or with sudo). Exiting..."
5 | exit 1
6 | fi
7 |
8 | echo "creating user..."
9 | adduser --disabled-password --gecos '' agentsea
10 | touch /home/agentsea/.bashrc
11 | touch /home/agentsea/.Xauthority
12 | chown -R agentsea:agentsea /home/agentsea
13 | echo 'agentsea ALL=(ALL) NOPASSWD:ALL' | tee /etc/sudoers.d/agentsea
14 |
15 |
16 | echo "Configuring .xprofile to disable screen saver..."
17 | cat > /home/agentsea/.xprofile < /home/agentsea/.config/lxqt/lxqt-powermanagement.conf
31 | chown -R agentsea:agentsea /home/agentsea/.config
32 |
33 | echo "installing base packages..."
34 | add-apt-repository universe
35 | apt-get update
36 | apt-get install -y xvfb x11vnc websockify python3-pip python3-dev python3-venv python3-tk software-properties-common ntp dbus-x11 openbox menu lxqt sddm lxqt-session wmctrl xdotool
37 | apt-get remove -y xscreensaver
38 |
39 | echo 'export DBUS_SESSION_BUS_ADDRESS="unix:path=/run/user/1001/bus"' >> /home/agentsea/.profile
40 |
41 | echo "installing chromium"
42 | snap install chromium
43 | update-alternatives --install /usr/bin/x-www-browser x-www-browser /snap/bin/chromium 200
44 | update-alternatives --set x-www-browser /snap/bin/chromium
45 |
46 | echo "configuring lxqt"
47 | mkdir -p /etc/sddm.conf.d
48 | echo "[Autologin]" > /etc/sddm.conf.d/autologin.conf
49 | echo "User=agentsea" >> /etc/sddm.conf.d/autologin.conf
50 | echo "Session=lxqt.desktop" >> /etc/sddm.conf.d/autologin.conf
51 |
52 |
53 | echo -e "[Session]\nwindow_manager=openbox" > /home/agentsea/.config/lxqt/session.conf
54 |
55 | mkdir -p /home/agentsea/.config/openbox
56 | cp /etc/xdg/openbox/rc.xml /home/agentsea/.config/openbox/
57 |
58 | chown -R agentsea:agentsea /home/agentsea/.config
59 |
60 | su agentsea -c "xauth generate :99 . trusted"
61 | su agentsea -c "bash install_deps.sh"
62 |
63 | # Disable screen saver and DPMS
64 | echo "Disabling screen saver and DPMS..."
65 | su agentsea -c "xset s off"
66 | # su agentsea -c "xset -dpms"
67 |
68 | echo "copying services..."
69 | cp ./conf/agentd.service /etc/systemd/system/agentd.service
70 | cp ./conf/websockify.service /etc/systemd/system/websockify.service
71 | cp ./conf/x11vnc.service /lib/systemd/system/x11vnc.service
72 | cp ./conf/xvfb.service /lib/systemd/system/xvfb.service
73 | cp ./conf/openbox.service /lib/systemd/system/openbox.service
74 | cp ./conf/lxqt.service /lib/systemd/system/lxqt.service
75 |
76 | loginctl enable-linger agentsea
77 |
78 | echo "enabling services..."
79 | systemctl daemon-reload
80 | systemctl enable agentd.service
81 | systemctl enable websockify.service
82 | systemctl enable x11vnc.service
83 | systemctl enable xvfb.service
84 | systemctl enable openbox.service
85 | systemctl enable lxqt.service
86 | systemctl enable ntp
87 |
88 | restart_service_and_log() {
89 | local service_name="$1"
90 | echo "Restarting $service_name..."
91 | if systemctl restart "$service_name"; then
92 | echo "$service_name restarted successfully."
93 | else
94 | echo "Failed to restart $service_name. Here are the last 20 log lines:"
95 | journalctl -u "$service_name" --no-pager -n 20
96 | fi
97 | }
98 |
99 | echo "restarting services..."
100 | restart_service_and_log agentd.service
101 | restart_service_and_log websockify.service
102 | restart_service_and_log x11vnc.service
103 | restart_service_and_log xvfb.service
104 | restart_service_and_log openbox.service
105 | restart_service_and_log lxqt.service
106 | restart_service_and_log ntp
107 |
108 | echo "disabling firewall..."
109 | ufw disable
110 |
111 | su - agentsea -c 'bash -l -c "
112 | while [ -z \$(pgrep -u agentsea lxqt-session) ]; do
113 | echo Waiting for LXQt session to start...
114 | sleep 2
115 | done
116 |
117 | echo LXQt session started, setting icon as trusted...
118 | "'
119 |
120 |
121 | echo "Adding Firefox icon to desktop..."
122 | mkdir -p /home/agentsea/Desktop
123 | cat > /home/agentsea/Desktop/firefox.desktop <
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
--------------------------------------------------------------------------------
/base.pkr.hcl:
--------------------------------------------------------------------------------
1 | packer {
2 | required_plugins {
3 | googlecompute = {
4 | source = "github.com/hashicorp/googlecompute"
5 | version = "~> 1"
6 | }
7 | }
8 | }
9 |
10 | packer {
11 | required_plugins {
12 | amazon = {
13 | source = "github.com/hashicorp/amazon"
14 | version = "~> 1"
15 | }
16 | }
17 | }
18 | variable "build_qemu" {
19 | type = bool
20 | default = true
21 | }
22 |
23 | variable "build_ec2" {
24 | type = bool
25 | default = true
26 | }
27 |
28 | variable "build_gce" {
29 | type = bool
30 | default = true
31 | }
32 |
33 | variable "gcp_project_id" {
34 | type = string
35 | default = "your-gcp-project-id"
36 | }
37 |
38 | variable "aws_region" {
39 | type = string
40 | default = "your-aws-region"
41 | }
42 |
43 | variable "output_directory" {
44 | type = string
45 | default = "output-ubuntu"
46 | }
47 |
48 | variable "cpu" {
49 | type = string
50 | default = "2"
51 | }
52 |
53 | variable "disk_size" {
54 | type = string
55 | default = "40000"
56 | }
57 |
58 | variable "headless" {
59 | type = string
60 | default = "true"
61 | }
62 |
63 | variable "iso_checksum" {
64 | type = string
65 | default = "d277aaac7a56ec02ea026a02d92fde2fc358048431749cb1031b62380cc93584"
66 | }
67 |
68 | variable "iso_url" {
69 | type = string
70 | default = "https://cloud-images.ubuntu.com/jammy/current/jammy-server-cloudimg-amd64.img"
71 | }
72 |
73 | variable "name" {
74 | type = string
75 | default = "jammy"
76 | }
77 |
78 | variable "ram" {
79 | type = string
80 | default = "2048"
81 | }
82 |
83 | variable "ssh_password" {
84 | type = string
85 | default = "ubuntu"
86 | }
87 |
88 | variable "ssh_username" {
89 | type = string
90 | default = "ubuntu"
91 | }
92 |
93 | variable "version" {
94 | type = string
95 | default = ""
96 | }
97 |
98 | variable "format" {
99 | type = string
100 | default = "qcow2"
101 | }
102 |
103 | source "qemu" "jammy" {
104 | # accelerator = "kvm"
105 | boot_command = []
106 | disk_compression = true
107 | disk_interface = "virtio"
108 | disk_image = true
109 | disk_size = var.disk_size
110 | format = var.format
111 | headless = var.headless
112 | iso_checksum = var.iso_checksum
113 | iso_url = var.iso_url
114 | net_device = "virtio-net"
115 | output_directory = "${var.output_directory}"
116 | qemuargs = [
117 | ["-m", "${var.ram}M"],
118 | ["-smp", "${var.cpu}"],
119 | ["-cdrom", "cidata_root.iso"]
120 | ]
121 | communicator = "ssh"
122 | shutdown_command = "echo '${var.ssh_password}' | sudo -S shutdown -P now"
123 | ssh_password = var.ssh_password
124 | ssh_username = var.ssh_username
125 | ssh_timeout = "10m"
126 | }
127 |
128 | source "amazon-ebs" "jammy" {
129 | ami_name = "agentd-ubuntu-22.04-${formatdate("YYYYMMDDHHmmss", timestamp())}"
130 | instance_type = "t2.micro"
131 | region = var.aws_region
132 | source_ami_filter {
133 | filters = {
134 | name = "ubuntu/images/*ubuntu-jammy-22.04-amd64-server-*"
135 | root-device-type = "ebs"
136 | virtualization-type = "hvm"
137 | }
138 | owners = ["099720109477"] # Ubuntu's owner ID
139 | most_recent = true
140 | }
141 | ssh_username = "ubuntu"
142 | }
143 |
144 | source "googlecompute" "ubuntu" {
145 | project_id = var.gcp_project_id
146 | source_image_family = "ubuntu-2204-lts"
147 | zone = "us-central1-a"
148 | ssh_username = "ubuntu"
149 | image_name = "agentd-ubuntu-22-04-${formatdate("YYYYMMDDHHmmss", timestamp())}"
150 | }
151 |
152 | build {
153 | // dynamic "source" {
154 | // for_each = var.build_qemu ? ["source.qemu.jammy"] : []
155 | // content {
156 | // source = source.value
157 | // }
158 | // }
159 |
160 | // dynamic "source" {
161 | // for_each = var.build_ec2 ? ["source.amazon-ebs.jammy"] : []
162 | // content {
163 | // source = source.value
164 | // }
165 | // }
166 |
167 | // dynamic "source" {
168 | // for_each = var.build_gce ? ["source.googlecompute.ubuntu"] : []
169 | // content {
170 | // source = source.value
171 | // }
172 | // }
173 | sources = [
174 | "source.qemu.jammy",
175 | "source.amazon-ebs.jammy",
176 | "source.googlecompute.ubuntu",
177 | ]
178 |
179 |
180 | provisioner "shell" {
181 | inline = [
182 | # Run install script
183 | "curl -sSL https://raw.githubusercontent.com/agentsea/agentd/main/remote_install.sh | sudo bash",
184 |
185 | # Prepare cloud-init to run on next boot for the QEMU image
186 | "sudo cloud-init clean --logs",
187 | "sudo truncate -s 0 /etc/machine-id",
188 | "sudo rm /var/lib/dbus/machine-id",
189 | "sudo ln -s /etc/machine-id /var/lib/dbus/machine-id",
190 |
191 | # Disable SSH password authentication
192 | "sudo sed -i 's/^#PasswordAuthentication yes/PasswordAuthentication no/' /etc/ssh/sshd_config",
193 | "sudo sed -i 's/^PasswordAuthentication yes/PasswordAuthentication no/' /etc/ssh/sshd_config",
194 | "sudo systemctl restart sshd",
195 | ]
196 | }
197 |
198 | // post-processor "amazon-ami" {
199 | // region = var.aws_region
200 | // ami_users = ["all"]
201 | // only = ["source.amazon-ebs.jammy"]
202 | // }
203 |
204 | // post-processor "shell-local" {
205 | // inline = [
206 | // "gcloud compute images add-iam-policy-binding ${build.ImageName} --member='allAuthenticatedUsers' --role='roles/compute.imageUser'",
207 | // ]
208 | // only = ["source.googlecompute.ubuntu"]
209 | // }
210 |
211 | // post-processor "shell-local" {
212 | // only = ["source.qemu.jammy"]
213 | // inline = [
214 | // "echo \"copying artifacts to local latest directory...\"",
215 | // "mkdir -p \"${BASE_DIR}/latest\"",
216 | // "cp \"${OUTPUT_DIRECTORY}/packer-jammy\" \"${BASE_DIR}/latest/jammy.qcow2\"",
217 | // "echo 'copying artifacts to GCS...'",
218 | // "TIMESTAMP=$(date +%Y%m%d%H%M%S)",
219 | // "OUTPUT_DIR='output-ubuntu'",
220 | // // Commands for copying artifacts to GCS commented out for clarity
221 | // "gsutil cp \"gs://agentsea-vms/jammy/latest/agentd-jammy.qcow2\" \"gs://agentsea-vms/jammy/${TIMESTAMP}/agentd-jammy.qcow2\"",
222 | // "gsutil acl ch -u AllUsers:R \"gs://agentsea-vms/jammy/${TIMESTAMP}/agentd-jammy.qcow2\"",
223 | // ]
224 | // }
225 | }
226 |
--------------------------------------------------------------------------------
/server.pkr.hcl:
--------------------------------------------------------------------------------
1 | packer {
2 | required_plugins {
3 | googlecompute = {
4 | source = "github.com/hashicorp/googlecompute"
5 | version = "~> 1"
6 | }
7 | }
8 | }
9 |
10 | packer {
11 | required_plugins {
12 | amazon = {
13 | source = "github.com/hashicorp/amazon"
14 | version = "~> 1"
15 | }
16 | }
17 | }
18 | variable "build_qemu" {
19 | type = bool
20 | default = true
21 | }
22 |
23 | variable "build_ec2" {
24 | type = bool
25 | default = true
26 | }
27 |
28 | variable "build_gce" {
29 | type = bool
30 | default = true
31 | }
32 |
33 | variable "gcp_project_id" {
34 | type = string
35 | default = "your-gcp-project-id"
36 | }
37 |
38 | variable "aws_region" {
39 | type = string
40 | default = "your-aws-region"
41 | }
42 |
43 | variable "output_directory" {
44 | type = string
45 | default = "output-ubuntu"
46 | }
47 |
48 | variable "cpu" {
49 | type = string
50 | default = "2"
51 | }
52 |
53 | variable "disk_size" {
54 | type = string
55 | default = "40000"
56 | }
57 |
58 | variable "headless" {
59 | type = string
60 | default = "true"
61 | }
62 |
63 | variable "iso_checksum" {
64 | type = string
65 | default = "d277aaac7a56ec02ea026a02d92fde2fc358048431749cb1031b62380cc93584"
66 | }
67 |
68 | variable "iso_url" {
69 | type = string
70 | default = "https://cloud-images.ubuntu.com/jammy/current/jammy-server-cloudimg-amd64.img"
71 | }
72 |
73 | variable "name" {
74 | type = string
75 | default = "jammy"
76 | }
77 |
78 | variable "ram" {
79 | type = string
80 | default = "2048"
81 | }
82 |
83 | variable "ssh_password" {
84 | type = string
85 | default = "ubuntu"
86 | }
87 |
88 | variable "ssh_username" {
89 | type = string
90 | default = "ubuntu"
91 | }
92 |
93 | variable "version" {
94 | type = string
95 | default = ""
96 | }
97 |
98 | variable "format" {
99 | type = string
100 | default = "qcow2"
101 | }
102 |
103 | // source "qemu" "jammy" {
104 | // # accelerator = "kvm"
105 | // boot_command = []
106 | // disk_compression = true
107 | // disk_interface = "virtio"
108 | // disk_image = true
109 | // disk_size = var.disk_size
110 | // format = var.format
111 | // headless = var.headless
112 | // iso_checksum = var.iso_checksum
113 | // iso_url = var.iso_url
114 | // net_device = "virtio-net"
115 | // output_directory = "${var.output_directory}"
116 | // qemuargs = [
117 | // ["-m", "${var.ram}M"],
118 | // ["-smp", "${var.cpu}"],
119 | // ["-cdrom", "cidata_root.iso"]
120 | // ]
121 | // communicator = "ssh"
122 | // shutdown_command = "echo '${var.ssh_password}' | sudo -S shutdown -P now"
123 | // ssh_password = var.ssh_password
124 | // ssh_username = var.ssh_username
125 | // ssh_timeout = "10m"
126 | // }
127 |
128 | // source "amazon-ebs" "jammy" {
129 | // ami_name = "agentd-ubuntu-22.04-${formatdate("YYYYMMDDHHmmss", timestamp())}"
130 | // instance_type = "t2.micro"
131 | // region = var.aws_region
132 | // source_ami_filter {
133 | // filters = {
134 | // name = "ubuntu/images/*ubuntu-jammy-22.04-amd64-server-*"
135 | // root-device-type = "ebs"
136 | // virtualization-type = "hvm"
137 | // }
138 | // owners = ["099720109477"] # Ubuntu's owner ID
139 | // most_recent = true
140 | // }
141 | // ssh_username = "ubuntu"
142 | // }
143 |
144 | source "googlecompute" "ubuntu" {
145 | project_id = var.gcp_project_id
146 | source_image = "agentd-ubuntu-22-04-u20240530022848"
147 | zone = "us-central1-a"
148 | ssh_username = "ubuntu"
149 | image_name = "agentd-ubuntu-22-04-u${formatdate("YYYYMMDDHHmmss", timestamp())}"
150 | }
151 |
152 | build {
153 | // dynamic "source" {
154 | // for_each = var.build_qemu ? ["source.qemu.jammy"] : []
155 | // content {
156 | // source = source.value
157 | // }
158 | // }
159 |
160 | // dynamic "source" {
161 | // for_each = var.build_ec2 ? ["source.amazon-ebs.jammy"] : []
162 | // content {
163 | // source = source.value
164 | // }
165 | // }
166 |
167 | // dynamic "source" {
168 | // for_each = var.build_gce ? ["source.googlecompute.ubuntu"] : []
169 | // content {
170 | // source = source.value
171 | // }
172 | // }
173 |
174 | sources = [
175 | // "source.qemu.jammy",
176 | // "source.amazon-ebs.jammy",
177 | "source.googlecompute.ubuntu",
178 | ]
179 |
180 |
181 | provisioner "shell" {
182 | inline = [
183 | # Run install script
184 | "curl -sSL https://raw.githubusercontent.com/agentsea/agentd/main/remote_install_server.sh | sudo bash",
185 |
186 | # Prepare cloud-init to run on next boot for the QEMU image
187 | "sudo cloud-init clean --logs",
188 | "sudo truncate -s 0 /etc/machine-id",
189 | "sudo rm /var/lib/dbus/machine-id",
190 | "sudo ln -s /etc/machine-id /var/lib/dbus/machine-id",
191 |
192 | # Disable SSH password authentication
193 | "sudo sed -i 's/^#PasswordAuthentication yes/PasswordAuthentication no/' /etc/ssh/sshd_config",
194 | "sudo sed -i 's/^PasswordAuthentication yes/PasswordAuthentication no/' /etc/ssh/sshd_config",
195 | "sudo systemctl restart sshd",
196 | ]
197 | }
198 | // post-processor "amazon-ami" {
199 | // region = var.aws_region
200 | // ami_users = ["all"]
201 | // only = ["source.amazon-ebs.jammy"]
202 | // }
203 |
204 | // post-processor "shell-local" {
205 | // inline = [
206 | // "gcloud compute images add-iam-policy-binding ${build.ImageName} --member='allAuthenticatedUsers' --role='roles/compute.imageUser'",
207 | // ]
208 | // only = ["source.googlecompute.ubuntu"]
209 | // }
210 |
211 | // post-processor "shell-local" {
212 | // only = ["source.qemu.jammy"]
213 | // inline = [
214 | // "echo \"copying artifacts to local latest directory...\"",
215 | // "mkdir -p \"${BASE_DIR}/latest\"",
216 | // "cp \"${OUTPUT_DIRECTORY}/packer-jammy\" \"${BASE_DIR}/latest/jammy.qcow2\"",
217 | // "echo 'copying artifacts to GCS...'",
218 | // "TIMESTAMP=$(date +%Y%m%d%H%M%S)",
219 | // "OUTPUT_DIR='output-ubuntu'",
220 | // // Commands for copying artifacts to GCS commented out for clarity
221 | // "gsutil cp \"gs://agentsea-vms/jammy/latest/agentd-jammy.qcow2\" \"gs://agentsea-vms/jammy/${TIMESTAMP}/agentd-jammy.qcow2\"",
222 | // "gsutil acl ch -u AllUsers:R \"gs://agentsea-vms/jammy/${TIMESTAMP}/agentd-jammy.qcow2\"",
223 | // ]
224 | // }
225 | }
226 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
agentd
9 |
10 |
11 | A daemon that makes a desktop OS accessible to AI agents.
12 |
13 | Explore the docs »
14 |
15 |
16 | View Demo
17 | ·
18 | Report Bug
19 | ·
20 | Request Feature
21 |
22 |
23 |
24 |
25 | `AgentD` makes a desktop OS accessible to AI agents by exposing an HTTP API.
26 |
27 | For a higher level interface see [AgentDesk](https://github.com/agentsea/agentdesk).
28 |
29 | ## Usage
30 |
31 | `AgentD` is currently tested on Ubuntu 22.04 cloud image.
32 |
33 | We recommend using one of our base vms which is already configured.
34 |
35 | ### Qemu
36 |
37 | For Qemu, download the qcow2 image:
38 | ```bash
39 | wget https://storage.googleapis.com/agentsea-vms/jammy/latest/agentd-jammy.qcow2
40 | ```
41 |
42 | To use the image, we need to make a [cloud-init](https://cloud-init.io/) iso with our user-data. See this [tutorial](https://cloudinit.readthedocs.io/en/latest/reference/datasources/nocloud.html), below is how it looks on MacOS:
43 |
44 | ```bash
45 | xorriso -as mkisofs -o cidata.iso -V "cidata" -J -r -iso-level 3 meta/
46 | ```
47 | Then the image can be ran with Qemu:
48 |
49 | ```bash
50 | qemu-system-x86_64 -nographic -hda ./agentd-jammy.qcow2 \
51 | -m 4G -smp 2 -netdev user,id=vmnet,hostfwd=tcp::6080-:6080,hostfwd=tcp::8000-:8000,hostfwd=tcp::2222-:22 \
52 | -device e1000,netdev=vmnet -cdrom cidata.iso
53 | ```
54 | Once running, the agentd service can be accessed:
55 |
56 | ```bash
57 | curl localhost:8000/health
58 | ```
59 | To login to the machine:
60 |
61 | ```bash
62 | ssh -p 2222 agentsea@localhost
63 | ```
64 |
65 | ### AWS
66 | For AWS, use public AMI `ami-01a893c1530453073`.
67 |
68 | Create a cloud-init script with your ssh key:
69 |
70 | ```yaml
71 | #cloud-config
72 |
73 | users:
74 | - name: agentsea
75 | sudo: ['ALL=(ALL) NOPASSWD:ALL']
76 | groups: sudo
77 | ssh_authorized_keys:
78 | - your-ssh-public-key
79 |
80 | package_upgrade: true
81 | ```
82 |
83 | ```bash
84 | aws ec2 run-instances \
85 | --image-id ami-01a893c1530453073 \
86 | --count 1 \
87 | --instance-type t2.micro \
88 | --key-name $KEY_NAME \
89 | --security-group-ids $SG_NAME \
90 | --subnet-id $SUBNET_NAME \
91 | --user-data file://path/to/cloud-init-config.yaml
92 | ```
93 |
94 | ### GCE
95 |
96 | For GCE, use the public image `ubuntu-22-04-20240208044623`.
97 |
98 | ```bash
99 | gcloud compute instances create $NAME \
100 | --machine-type "n1-standard-1" \
101 | --image "ubuntu-22-04-20240208044623" \
102 | --image-project $PROJECT_ID \
103 | --zone $ZONE \
104 | --metadata ssh-keys="agentsea:$(cat path/to/your/public/ssh/key.pub)"
105 | ```
106 |
107 | ### Custom
108 |
109 | If you want to install on a fresh Ubuntu VM, use the a [cloud images base](https://cloud-images.ubuntu.com/jammy/current/) qcow2 image.
110 |
111 | ```bash
112 | curl -sSL https://raw.githubusercontent.com/agentsea/agentd/main/remote_install.sh | sudo bash
113 | ```
114 |
115 | ## API Endpoints
116 |
117 | ### General
118 |
119 | - **GET /health** - Checks the API's health.
120 | - **Response:** `{"status": "ok"}`
121 |
122 | ### Mouse and Keyboard Control
123 |
124 | - **GET /mouse_coordinates** - Retrieves the current mouse coordinates.
125 |
126 | - **Response Model:** `CoordinatesModel`
127 |
128 | - **POST /move_mouse** - Moves the mouse to specified coordinates.
129 |
130 | - **Request Body:** `MoveMouseModel`
131 | - **Response:** `{"status": "success"}` or `{"status": "error", "message": ""}`
132 |
133 | - **POST /click** - Clicks at the current or specified location.
134 |
135 | - **Request Body:** `ClickModel`
136 | - **Response:** `{"status": "success"}` or raises `HTTPException`
137 |
138 | - **POST /double_click** - Performs a double-click at the current mouse location.
139 |
140 | - **Response:** `{"status": "success"}` or raises `HTTPException`
141 |
142 | - **POST /type_text** - Types the specified text.
143 |
144 | - **Request Body:** `TypeTextModel`
145 | - **Response:** `{"status": "success"}` or raises `HTTPException`
146 |
147 | - **POST /press_key** - Presses a specified key.
148 |
149 | - **Request Body:** `PressKeyModel`
150 | - **Response:** `{"status": "success"}` or raises `HTTPException`
151 |
152 | - **POST /scroll** - Scrolls the mouse wheel.
153 |
154 | - **Request Body:** `ScrollModel`
155 | - **Response:** `{"status": "success"}` or raises `HTTPException`
156 |
157 | - **POST /drag_mouse** - Drags the mouse to specified coordinates.
158 | - **Request Body:** `DragMouseModel`
159 | - **Response:** `{"status": "success"}` or raises `HTTPException`
160 |
161 | ### Web Browser Control
162 |
163 | - **POST /open_url** - Opens a URL in a Chromium-based browser.
164 | - **Request Body:** `OpenURLModel`
165 | - **Response:** `{"status": "success"}` or `{"status": "error", "message": ""}`
166 |
167 | ### Screen Capture
168 |
169 | - **POST /screenshot** - Takes a screenshot and returns it as a base64-encoded image.
170 | - **Response Model:** `ScreenshotResponseModel`
171 |
172 | ### Session Recording
173 |
174 | - **POST /recordings** - Starts a new recording session.
175 |
176 | - **Request Body:** `RecordRequest`
177 | - **Response Model:** `RecordResponse`
178 |
179 | - **GET /recordings** - Lists all recordings.
180 |
181 | - **Response Model:** `Recordings`
182 |
183 | - **POST /recordings/{session_id}/stop** - Stops a recording session.
184 |
185 | - **Path Variable:** `session_id`
186 | - **Response:** None (side effect: stops recording and saves to file)
187 |
188 | - **GET /recordings/{session_id}** - Retrieves information about a specific recording session.
189 |
190 | - **Path Variable:** `session_id`
191 | - **Response Model:** `Recording`
192 |
193 | - **GET /recordings/{session_id}/event/{event_id}** - Retrieves a specific event from a recording.
194 |
195 | - **Path Variables:** `session_id`, `event_id`
196 | - **Response Model:** `RecordedEvent`
197 |
198 | - **DELETE /recordings/{session_id}/event/{event_id}** - Deletes a specific event from a recording.
199 |
200 | - **Path Variables:** `session_id`, `event_id`
201 | - **Response Model:** `Recording`
202 |
203 | - **GET /active_sessions** - Lists IDs of all active recording sessions.
204 |
205 | - **Response Model:** `Recordings`
206 |
207 | - **GET /recordings/{session_id}/actions** - Retrieves all actions from a specific recording session.
208 | - **Path Variable:** `session_id`
209 | - **Response Model:** `Actions`
210 |
211 | ## Community
212 |
213 | Come join us on [Discord](https://discord.gg/hhaq7XYPS6).
214 |
215 | ## Developing
216 |
217 | To pack a fresh set of images
218 |
219 | ```bash
220 | make pack
221 | ```
222 |
223 | To run from this repo
224 |
225 | ```bash
226 | make run-jammy
227 | ```
228 |
--------------------------------------------------------------------------------
/docs/recordings.rst:
--------------------------------------------------------------------------------
1 | Making Recordings
2 | ==================
3 |
4 | POST /recordings
5 | ^^^^^^^^^^^^^^^^
6 |
7 | The ``/recordings`` endpoint starts a new recording session.
8 |
9 | **Request:**
10 |
11 | .. code-block:: json
12 |
13 | {
14 | "description": "string"
15 | }
16 |
17 | **Response:**
18 |
19 | Returns a JSON response containing the session ID of the newly started recording session.
20 |
21 | .. code-block:: json
22 |
23 | {
24 | "session_id": "uuid"
25 | }
26 |
27 | GET /recordings
28 | ^^^^^^^^^^^^^^^
29 |
30 | The ``/recordings`` endpoint retrieves a list of all recording sessions.
31 |
32 | **Request:**
33 |
34 | No parameters required.
35 |
36 | **Response:**
37 |
38 | Returns a JSON response containing a list of recording session IDs.
39 |
40 | .. code-block:: json
41 |
42 | {
43 | "recordings": [
44 | "uuid1",
45 | "uuid2",
46 | "uuid3"
47 | ]
48 | }
49 |
50 | This endpoint allows you to retrieve all the recording sessions that have been initiated.
51 |
52 | POST /recordings/{session_id}/stop
53 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
54 |
55 | The endpoint to stop a recording session.
56 |
57 | **Request:**
58 |
59 | Path Parameters:
60 | - ``session_id``: The unique identifier of the recording session to be stopped.
61 |
62 | **Response:**
63 |
64 | Returns a JSON response indicating the success of the operation.
65 |
66 | GET /recordings/{session_id}
67 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
68 |
69 | The endpoint to retrieve a specific recording session by its session ID.
70 |
71 | **Request:**
72 |
73 | Path Parameters:
74 | - ``session_id``: The unique identifier of the recording session to be retrieved.
75 |
76 | **Response:**
77 |
78 | Returns a JSON response containing the details of the specified recording session, including the session ID, description, start time, end time, and a list of recorded events.
79 |
80 | .. code-block:: json
81 |
82 | {
83 | "id": "uuid",
84 | "description": "Session Description",
85 | "start_time": 1622547600,
86 | "end_time": 1622547900,
87 | "events": [
88 | {
89 | "id": "uuid",
90 | "type": "click",
91 | "timestamp": 1622547605,
92 | "coordinates": {
93 | "x": 100,
94 | "y": 200
95 | },
96 | "screenshot_path": "path/to/screenshot",
97 | "click_data": {
98 | "button": "left",
99 | "pressed": true
100 | }
101 | },
102 | {
103 | "id": "uuid",
104 | "type": "key",
105 | "timestamp": 1622547610,
106 | "key_data": {
107 | "key": "a"
108 | }
109 | }
110 | ]
111 | }
112 |
113 | This endpoint allows you to retrieve detailed information about a specific recording session, including all the events that occurred during the session.
114 |
115 | GET /recordings/{session_id}/event/{event_id}
116 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
117 |
118 | The endpoint to retrieve a specific event from a recording session by its session ID and event ID.
119 |
120 | **Request:**
121 |
122 | Path Parameters:
123 | - ``session_id``: The unique identifier of the recording session.
124 | - ``event_id``: The unique identifier of the event within the recording session.
125 |
126 | **Response:**
127 |
128 | Returns a JSON response containing the details of the specified event, including the event ID, type, timestamp, coordinates, and any associated data such as click data, key data, scroll data, or text data.
129 |
130 | .. code-block:: json
131 |
132 | {
133 | "id": "uuid",
134 | "type": "click",
135 | "timestamp": 1622547605,
136 | "coordinates": {
137 | "x": 100,
138 | "y": 200
139 | },
140 | "screenshot_path": "path/to/screenshot",
141 | "click_data": {
142 | "button": "left",
143 | "pressed": true
144 | }
145 | }
146 |
147 | This endpoint allows you to retrieve detailed information about a specific event within a recording session.
148 |
149 | DELETE /recordings/{session_id}/event/{event_id}
150 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
151 |
152 | The endpoint to delete a specific event from a recording session by its session ID and event ID.
153 |
154 | **Request:**
155 |
156 | Path Parameters:
157 | - ``session_id``: The unique identifier of the recording session.
158 | - ``event_id``: The unique identifier of the event within the recording session.
159 |
160 | **Response:**
161 |
162 | Returns a JSON response containing the updated recording session details without the deleted event.
163 |
164 | .. code-block:: json
165 |
166 | {
167 | "id": "session_uuid",
168 | "description": "Session Description",
169 | "start_time": 1622547600,
170 | "end_time": 1622547615,
171 | "events": [
172 | {
173 | "id": "uuid",
174 | "type": "click",
175 | "timestamp": 1622547605,
176 | "coordinates": {
177 | "x": 100,
178 | "y": 200
179 | },
180 | "screenshot_path": "path/to/screenshot",
181 | "click_data": {
182 | "button": "left",
183 | "pressed": true
184 | }
185 | }
186 | // Other events
187 | ]
188 | }
189 |
190 | This endpoint allows you to delete a specific event from a recording session.
191 |
192 | GET /active_sessions
193 | ^^^^^^^^^^^^^^^^^^^^
194 |
195 | This endpoint lists all active recording sessions.
196 |
197 | **Response:**
198 |
199 | Returns a JSON response containing a list of session IDs for all active recording sessions.
200 |
201 | .. code-block:: json
202 |
203 | {
204 | "recordings": [
205 | "session_id_1",
206 | "session_id_2",
207 | // Other session IDs
208 | ]
209 | }
210 |
211 | This endpoint allows you to retrieve a list of all active recording sessions.
212 |
213 | GET /recordings/{session_id}/actions
214 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
215 |
216 | This endpoint retrieves a list of actions for a specific recording session.
217 |
218 | **Parameters:**
219 |
220 | - ``session_id``: The unique identifier for the recording session.
221 |
222 | **Response:**
223 |
224 | Returns a JSON response containing a list of actions for the specified recording session.
225 |
226 | .. code-block:: json
227 |
228 | {
229 | "actions": [
230 | {
231 | "id": "action_uuid",
232 | "type": "click",
233 | "timestamp": 1622547605,
234 | "details": {
235 | "coordinates": {
236 | "x": 100,
237 | "y": 200
238 | },
239 | "button": "left",
240 | "pressed": true
241 | }
242 | },
243 | {
244 | "id": "action_uuid",
245 | "type": "keypress",
246 | "timestamp": 1622547610,
247 | "details": {
248 | "key": "space"
249 | }
250 | }
251 | // Other actions
252 | ]
253 | }
254 |
255 | This endpoint allows you to retrieve a list of all actions (clicks, keypresses, etc.) that occurred during a specific recording session.
256 |
257 |
--------------------------------------------------------------------------------
/tests/test_server.py:
--------------------------------------------------------------------------------
1 | from httpx import AsyncClient
2 | from unittest.mock import patch
3 | import pytest
4 | from agentd.server import app
5 | from agentd.recording import RecordingSession
6 |
7 |
8 | @pytest.mark.asyncio
9 | async def test_root():
10 | async with AsyncClient(app=app, base_url="http://test") as ac:
11 | response = await ac.get("/")
12 | assert response.status_code == 200
13 | assert response.json() == {"message": "Agent in the shell"}
14 |
15 |
16 | @pytest.mark.asyncio
17 | async def test_health():
18 | async with AsyncClient(app=app, base_url="http://test") as ac:
19 | response = await ac.get("/health")
20 | assert response.status_code == 200
21 | assert response.json() == {"status": "ok"}
22 |
23 |
24 | @pytest.mark.asyncio
25 | async def test_info():
26 | async with AsyncClient(app=app, base_url="http://test") as ac:
27 | response = await ac.get("/info")
28 | assert response.status_code == 200
29 | assert "last_activity_ts" in response.json()
30 | assert "screen_size" in response.json()
31 | assert "os_info" in response.json()
32 | assert "code_version" in response.json()
33 |
34 |
35 | @pytest.mark.asyncio
36 | async def test_screen_size():
37 | async with AsyncClient(app=app, base_url="http://test") as ac:
38 | response = await ac.get("/screen_size")
39 | assert response.status_code == 200
40 | assert "x" in response.json()
41 | assert "y" in response.json()
42 |
43 |
44 | @pytest.mark.asyncio
45 | async def test_mouse_coordinates():
46 | async with AsyncClient(app=app, base_url="http://test") as ac:
47 | response = await ac.get("/mouse_coordinates")
48 | assert response.status_code == 200
49 | assert "x" in response.json()
50 | assert "y" in response.json()
51 |
52 |
53 | @pytest.mark.asyncio
54 | async def test_system_usage():
55 | async with AsyncClient(app=app, base_url="http://test") as ac:
56 | response = await ac.get("/system_usage")
57 | assert response.status_code == 200
58 | assert "cpu_percent" in response.json()
59 | assert "memory_percent" in response.json()
60 | assert "disk_percent" in response.json()
61 |
62 |
63 | @pytest.mark.asyncio
64 | async def test_open_url():
65 | with patch("agentd.server.is_chromium_running", return_value=False), patch(
66 | "agentd.server.gracefully_terminate_chromium"
67 | ) as mock_terminate, patch(
68 | "agentd.server.is_chromium_window_open", return_value=True
69 | ), patch(
70 | "agentd.server.subprocess.Popen"
71 | ) as mock_popen:
72 | async with AsyncClient(app=app, base_url="http://test") as ac:
73 | response = await ac.post("/open_url", json={"url": "http://example.com"})
74 |
75 | assert response.status_code == 200
76 | assert response.json() == {"status": "success"}
77 | mock_terminate.assert_not_called()
78 | mock_popen.assert_called_once()
79 |
80 |
81 | @pytest.mark.asyncio
82 | async def test_move_mouse():
83 | async with AsyncClient(app=app, base_url="http://test") as ac:
84 | response = await ac.post(
85 | "/move_mouse", json={"x": 100, "y": 200, "duration": 1.0, "tween": "linear"}
86 | )
87 | assert response.status_code == 200
88 | assert response.json() == {"status": "success"}
89 |
90 |
91 | @pytest.mark.asyncio
92 | async def test_click():
93 | async with AsyncClient(app=app, base_url="http://test") as ac:
94 | response = await ac.post("/click", json={"button": "left"})
95 | assert response.status_code == 200
96 | assert response.json() == {"status": "success"}
97 |
98 |
99 | @pytest.mark.asyncio
100 | async def test_double_click():
101 | async with AsyncClient(app=app, base_url="http://test") as ac:
102 | response = await ac.post("/double_click")
103 | assert response.status_code == 200
104 | assert response.json() == {"status": "success"}
105 |
106 |
107 | @pytest.mark.asyncio
108 | async def test_type_text():
109 | async with AsyncClient(app=app, base_url="http://test") as ac:
110 | response = await ac.post(
111 | "/type_text",
112 | json={"text": "hello", "min_interval": 0.05, "max_interval": 0.25},
113 | )
114 | assert response.status_code == 200
115 | assert response.json() == {"status": "success"}
116 |
117 |
118 | @pytest.mark.asyncio
119 | async def test_press_key():
120 | async with AsyncClient(app=app, base_url="http://test") as ac:
121 | response = await ac.post("/press_key", json={"key": "enter"})
122 | assert response.status_code == 200
123 | assert response.json() == {"status": "success"}
124 |
125 |
126 | @pytest.mark.asyncio
127 | async def test_scroll():
128 | async with AsyncClient(app=app, base_url="http://test") as ac:
129 | response = await ac.post("/scroll", json={"clicks": 3})
130 | assert response.status_code == 200
131 | assert response.json() == {"status": "success"}
132 |
133 |
134 | @pytest.mark.asyncio
135 | async def test_drag_mouse():
136 | with patch("agentd.server.pyautogui.dragTo") as mock_dragTo:
137 | async with AsyncClient(app=app, base_url="http://test") as ac:
138 | response = await ac.post("/drag_mouse", json={"x": 300, "y": 400})
139 |
140 | assert response.status_code == 200
141 | assert response.json() == {"status": "success"}
142 | mock_dragTo.assert_called_once_with(300, 400)
143 |
144 |
145 | @pytest.mark.asyncio
146 | async def test_take_screenshot():
147 | async with AsyncClient(app=app, base_url="http://test") as ac:
148 | response = await ac.post("/screenshot")
149 | assert response.status_code == 200
150 | assert "status" in response.json()
151 | assert response.json()["status"] == "success"
152 | assert "image" in response.json()
153 | assert "file_path" in response.json()
154 |
155 |
156 | @pytest.fixture
157 | def mocker():
158 | from unittest.mock import MagicMock
159 |
160 | return MagicMock()
161 |
162 |
163 | @pytest.mark.asyncio
164 | async def test_recording_workflow(mocker):
165 | async with AsyncClient(app=app, base_url="http://test") as ac:
166 |
167 | # Test start recording
168 | description = "Test recording"
169 | response_start = await ac.post("/recordings", json={"description": description})
170 | assert response_start.status_code == 200
171 | assert "session_id" in response_start.json()
172 | session_id = response_start.json()["session_id"]
173 |
174 | # Test list recordings
175 | mocker.patch("agentd.server.list_recordings", return_value={"recordings": []})
176 | response_list = await ac.get("/recordings")
177 | assert response_list.status_code == 200
178 | recordings_list = response_list.json()["recordings"]
179 | assert session_id in recordings_list
180 |
181 | # Test stop recording
182 | mocker.patch(
183 | "agentd.server.sessions.get",
184 | return_value=RecordingSession(session_id, "Test"),
185 | )
186 | mocker.patch("agentd.server.RecordingSession.stop", return_value=None)
187 | mocker.patch(
188 | "agentd.server.RecordingSession.save_to_file", return_value="path/to/file"
189 | )
190 | response_stop = await ac.post(f"/recordings/{session_id}/stop")
191 | assert response_stop.status_code == 200
192 |
193 | # Test get recording
194 | response_get = await ac.get(f"/recordings/{session_id}")
195 | assert response_get.status_code == 200
196 | assert "id" in response_get.json()
197 | assert "end_time" in response_get.json()
198 |
199 | # Test delete event
200 | event_id = "test_event"
201 | session = RecordingSession(session_id, "Test")
202 | mocker.patch("agentd.server.sessions.get", return_value=session)
203 | mocker.patch("agentd.server.RecordingSession.delete_event", return_value=None)
204 | response_delete_event = await ac.delete(
205 | f"/recordings/{session_id}/event/{event_id}"
206 | )
207 | assert response_delete_event.status_code == 200
208 | assert "id" in response_delete_event.json()
209 |
210 | # Test get actions
211 | mocker.patch(
212 | "agentd.server.sessions.get",
213 | return_value=RecordingSession(session_id, "Test"),
214 | )
215 | response_get_actions = await ac.get(f"/recordings/{session_id}/actions")
216 | assert response_get_actions.status_code == 200
217 | assert "actions" in response_get_actions.json()
218 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM --platform=$TARGETPLATFORM lscr.io/linuxserver/webtop:latest@sha256:41109089fcf80d45b25e6e3d0d8a9ae9bd13568af2d020266e55c7159fc9f2eb
2 |
3 | RUN uname -m
4 | RUN cat /etc/alpine-release
5 |
6 | # Install necessary build tools and libraries
7 | RUN echo "http://dl-cdn.alpinelinux.org/alpine/v3.20/community" >> /etc/apk/repositories && \
8 | apk update && \
9 | apk add --no-cache \
10 | build-base \
11 | libffi-dev \
12 | openssl-dev \
13 | zlib-dev \
14 | bzip2-dev \
15 | readline-dev \
16 | sqlite-dev \
17 | ncurses-dev \
18 | xz-dev \
19 | bash \
20 | tk-dev \
21 | gdbm-dev \
22 | db-dev \
23 | libpcap-dev \
24 | linux-headers \
25 | curl \
26 | git \
27 | wget \
28 | scrot \
29 | xrandr \
30 | libx11 \
31 | libxext \
32 | libxcb \
33 | xauth \
34 | xwd \
35 | imagemagick \
36 | procps \
37 | xdotool \
38 | speech-dispatcher \
39 | xclip \
40 | gtk-murrine-engine \
41 | sassc \
42 | rsync \
43 | bc \
44 | optipng \
45 | zip \
46 | unzip \
47 | xmlstarlet \
48 | coreutils \
49 | glib-dev \
50 | libxml2-utils \
51 | mesa-gl \
52 | redis
53 |
54 | # RUN echo $USER
55 | RUN pwd
56 | RUN echo $HOME
57 | RUN echo $USER
58 | RUN echo $LOGNAME
59 | RUN echo $SHELL
60 |
61 | RUN which readlink && readlink --version
62 |
63 | RUN mkdir -p /config/.themes /config/.icons /config/.wallpapers /config/.local /config/.config/gtk-3.0 /config/.config/glib-2.0 && \
64 | chown -R abc:abc /config/.themes /config/.icons /config/.wallpapers /config/.local /config/.config/gtk-3.0 /config/.config/glib-2.0
65 |
66 | # Set environment variables for Python installation
67 | ENV PYTHON_VERSION=3.12.1
68 | ENV PYENV_ROOT="/config/.pyenv"
69 | ENV PATH="$PYENV_ROOT/bin:$PATH"
70 |
71 | # Install pyenv as root
72 | RUN curl https://pyenv.run | bash
73 |
74 | # Change ownership of pyenv directories to user 'abc'
75 | RUN chown -R abc:abc /config/.pyenv
76 |
77 | # Create the application directory and set ownership to 'abc'
78 | RUN mkdir -p /config/app && chown -R abc:abc /config/app
79 |
80 | # Ensure the cache directory exists and is owned by 'abc'
81 | RUN mkdir -p /config/app/.cache && chown -R abc:abc /config/app/.cache
82 |
83 | # Switch to non-root user 'abc'
84 | USER abc
85 |
86 | # Create a shell script for environment setup
87 | RUN echo 'export PYENV_ROOT="/config/.pyenv"' > /config/app/pyenv_setup.sh && \
88 | echo 'export PATH="$PYENV_ROOT/bin:$PYENV_ROOT/shims:$PATH"' >> /config/app/pyenv_setup.sh && \
89 | echo 'eval "$(pyenv init --path)"' >> /config/app/pyenv_setup.sh && \
90 | echo 'eval "$(pyenv init -)"' >> /config/app/pyenv_setup.sh && \
91 | chmod +x /config/app/pyenv_setup.sh
92 |
93 | # Set working directory to '/config/app'
94 | WORKDIR /config/app
95 |
96 | # Copy project files (only pyproject.toml and poetry.lock to leverage caching)
97 | COPY --chown=abc:abc pyproject.toml README.md poetry.lock /config/app/
98 |
99 | # Install Python using pyenv as 'abc' by sourcing the setup script
100 | RUN XDG_CACHE_HOME=/config/app/.cache /bin/bash -c \
101 | "source /config/app/pyenv_setup.sh && pyenv install ${PYTHON_VERSION}" || \
102 | { echo "Build failed. Showing config.log:"; cat /tmp/python-build.*/Python-*/config.log; exit 1; }
103 |
104 | # Set the global Python version
105 | RUN XDG_CACHE_HOME=/config/app/.cache /bin/bash -c \
106 | "source /config/app/pyenv_setup.sh && pyenv global ${PYTHON_VERSION}"
107 |
108 | # Switch to user 'abc'
109 | USER abc
110 | RUN env
111 |
112 | # Install WhiteSur Themes and Wallpapers
113 | RUN export HOME=/config USER=abc LOGNAME=abc SHELL=/bin/bash && \
114 | \
115 | # Install WhiteSur GTK Theme
116 | git clone https://github.com/vinceliuice/WhiteSur-gtk-theme.git --depth=1 /config/.themes/WhiteSur-gtk-theme && \
117 | /bin/bash -ex /config/.themes/WhiteSur-gtk-theme/install.sh -d /config/.themes && \
118 | rm -rf /config/.themes/WhiteSur-gtk-theme && \
119 | \
120 | # Install WhiteSur Icon Theme
121 | git clone https://github.com/vinceliuice/WhiteSur-icon-theme.git --depth=1 /config/.icons/WhiteSur-icon-theme && \
122 | /bin/bash -ex /config/.icons/WhiteSur-icon-theme/install.sh -d /config/.icons && \
123 | rm -rf /config/.icons/WhiteSur-icon-theme && \
124 | \
125 | # Install WhiteSur Wallpapers
126 | git clone https://github.com/vinceliuice/WhiteSur-wallpapers.git --depth=1 /config/.wallpapers/WhiteSur-wallpapers && \
127 | /bin/bash -ex /config/.wallpapers/WhiteSur-wallpapers/install-wallpapers.sh -t monterey && \
128 | rm -rf /config/.wallpapers/WhiteSur-wallpapers
129 |
130 | RUN chown -R abc:abc /config/.themes /config/.icons /config/.local /config/.wallpapers
131 |
132 | # Copy (and overwrite) the Xfce desktop XML (wallpaper settings)
133 | COPY --chown=abc:abc ./theme/xfce4-desktop.xml /config/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-desktop.xml
134 |
135 | # Copy in xsettings.xml to set GTK theme, icon theme, cursor, and fonts
136 | COPY --chown=abc:abc ./theme/xsettings.xml /config/.config/xfce4/xfconf/xfce-perchannel-xml/xsettings.xml
137 |
138 | # Copy in xfwm4.xml to set the window manager theme and titlebar font
139 | COPY --chown=abc:abc ./theme/xfwm4.xml /config/.config/xfce4/xfconf/xfce-perchannel-xml/xfwm4.xml
140 |
141 | # Copy in enable-compositing.desktop to enable compositing
142 | COPY --chown=abc:abc ./theme/enable-compositing.desktop /config/.config/autostart/enable-compositing.desktop
143 |
144 | # TODO: ?
145 | # VOLUME /config
146 |
147 | # Ensure 'abc' owns the pyenv directory after installation
148 | USER root
149 | RUN chown -R abc:abc /config/.pyenv
150 | USER abc
151 |
152 | # Create a virtual environment using the installed Python version
153 | RUN XDG_CACHE_HOME=/config/app/.cache /bin/bash -c \
154 | "source /config/app/pyenv_setup.sh && python -m venv /config/app/venv"
155 |
156 | # Update PATH to include the virtual environment's bin directory
157 | ENV PATH="/config/app/venv/bin:$PATH"
158 |
159 | # Set environment variable to prevent poetry from using keyring
160 | ENV POETRY_NO_KEYRING=1
161 |
162 | # Upgrade pip to the latest version
163 | RUN XDG_CACHE_HOME=/config/app/.cache /bin/bash -c \
164 | "source /config/app/pyenv_setup.sh && \
165 | source /config/app/venv/bin/activate && \
166 | pip install --no-cache-dir --upgrade pip"
167 |
168 | # Install project dependencies using Poetry
169 | RUN XDG_CACHE_HOME=/config/app/.cache \
170 | POETRY_CACHE_DIR=/config/app/.cache/pypoetry \
171 | /bin/bash -c "source /config/app/pyenv_setup.sh && \
172 | source /config/app/venv/bin/activate && \
173 | pip install --no-cache-dir poetry && \
174 | poetry install --no-root"
175 |
176 | # Copy the rest of your application code
177 | COPY --chown=abc:abc . /config/app/
178 |
179 | # Create the logs and recordings directories and set ownership to 'abc'
180 | RUN mkdir -p /config/app/logs && chown -R abc:abc /config/app/logs
181 | RUN mkdir -p /config/app/recordings && chown -R abc:abc /config/app/recordings
182 |
183 | # # Switch back to root to set up the s6-overlay v3 service
184 | USER root
185 |
186 | ENV S6_LOGGING=1
187 | ENV S6_VERBOSITY=2
188 | ENV S6_KEEP_ENV=1
189 | ENV S6_RC_VERBOSE=1
190 |
191 | # Probably don't need, for compositing
192 | # COPY xconf_run /etc/s6-overlay/s6-rc.d/xconf/up
193 | # RUN echo 'oneshot' > /etc/s6-overlay/s6-rc.d/xconf/type
194 | # RUN ln -s ../xconf /etc/s6-overlay/s6-rc.d/user/contents.d/xconf
195 | # COPY ./theme/enable-compositing.desktop /etc/xdg/autostart/enable-compositing.desktop
196 |
197 |
198 | RUN touch /config/app/audit.log && chown abc:abc /config/app/audit.log && chmod 644 /config/app/audit.log
199 | RUN touch /config/app/logs/redis_env.log && chown abc:abc /config/app/logs/redis_env.log && chmod 644 /config/app/logs/redis_env.log
200 |
201 | RUN mkdir -p /config/app/logs/uvicorn && chown -R abc:abc /config/app/logs/uvicorn
202 |
203 | RUN mkdir -p /config/app/celery && chown -R abc:abc /config/app/celery && chmod 744 /config/app/celery
204 | RUN mkdir -p /config/.agentsea && chown -R abc:abc /config/.agentsea
205 | RUN mkdir -p /config/.agentsea/data && chown -R abc:abc /config/.agentsea/data
206 |
207 | # Create the s6-overlay v3 service directory for your application
208 | RUN mkdir -p /etc/s6-overlay/s6-rc.d/uvicorn
209 |
210 | # Create Redis service directory
211 | RUN mkdir -p /etc/s6-overlay/s6-rc.d/redis
212 |
213 | # Copy the s6-overlay v3 run script into the service directory
214 | COPY uvicorn_run /etc/s6-overlay/s6-rc.d/uvicorn/run
215 |
216 | # Copy the s6-overlay v3 run script into the service directory
217 | COPY redis_run /etc/s6-overlay/s6-rc.d/redis/run
218 |
219 | # Make the run script executable
220 | RUN chmod +x /etc/s6-overlay/s6-rc.d/uvicorn/run
221 |
222 | # Make the run script executable for redis
223 | RUN chmod +x /etc/s6-overlay/s6-rc.d/redis/run
224 |
225 | # Create the 'type' file for the service
226 | RUN echo 'longrun' > /etc/s6-overlay/s6-rc.d/uvicorn/type
227 |
228 | # Create the 'type' file for Redis service
229 | RUN echo 'longrun' > /etc/s6-overlay/s6-rc.d/redis/type
230 |
231 | # Enable the service by creating a symlink in the 'user' bundle
232 | RUN ln -s ../uvicorn /etc/s6-overlay/s6-rc.d/user/contents.d/uvicorn
233 |
234 | # Enable Redis service by creating a symlink in the 'user' bundle
235 | RUN ln -s ../redis /etc/s6-overlay/s6-rc.d/user/contents.d/redis
236 |
237 | RUN chown -R abc:abc /config/.agentsea/data
238 |
239 | COPY conf/kasm/run /etc/s6-overlay/s6-rc.d/svc-kasmvnc/run
240 |
241 | # Create the 'data' directory for the service and set the user
242 | # RUN mkdir -p /etc/s6-overlay/s6-rc.d/uvicorn/data && \
243 | # echo 'abc' > /etc/s6-overlay/s6-rc.d/uvicorn/data/user
244 |
245 | RUN echo 'abc' > /etc/s6-overlay/s6-rc.d/uvicorn/user
246 |
247 | # Set the user for Redis service
248 | RUN echo 'abc' > /etc/s6-overlay/s6-rc.d/redis/user
249 |
250 | ENV AGENTSEA_HOME=/config/.agentsea
251 |
252 | # Expose the port uvicorn is running on (if needed)
253 | EXPOSE 8000
254 |
255 | # Expose Redis Port, we don't need to because it should only be used internally but this is there just incase
256 | # EXPOSE 6379
--------------------------------------------------------------------------------
/Dockerfile.loaded:
--------------------------------------------------------------------------------
1 | FROM --platform=$TARGETPLATFORM lscr.io/linuxserver/webtop:latest@sha256:41109089fcf80d45b25e6e3d0d8a9ae9bd13568af2d020266e55c7159fc9f2eb
2 |
3 | RUN uname -m
4 | RUN cat /etc/alpine-release
5 |
6 | # Install necessary build tools and libraries
7 | RUN echo "http://dl-cdn.alpinelinux.org/alpine/v3.20/community" >> /etc/apk/repositories && \
8 | apk update && \
9 | apk add --no-cache \
10 | build-base \
11 | libffi-dev \
12 | openssl-dev \
13 | zlib-dev \
14 | bzip2-dev \
15 | readline-dev \
16 | sqlite-dev \
17 | ncurses-dev \
18 | xz-dev \
19 | bash \
20 | tk-dev \
21 | gdbm-dev \
22 | db-dev \
23 | libpcap-dev \
24 | linux-headers \
25 | curl \
26 | git \
27 | wget \
28 | scrot \
29 | xrandr \
30 | libx11 \
31 | libxext \
32 | libxcb \
33 | xauth \
34 | xwd \
35 | imagemagick \
36 | procps \
37 | xdotool \
38 | speech-dispatcher \
39 | xclip \
40 | gtk-murrine-engine \
41 | sassc \
42 | rsync \
43 | bc \
44 | optipng \
45 | zip \
46 | unzip \
47 | xmlstarlet \
48 | coreutils \
49 | glib-dev \
50 | libxml2-utils \
51 | mesa-gl \
52 | redis
53 |
54 | # RUN echo $USER
55 | RUN pwd
56 | RUN echo $HOME
57 | RUN echo $USER
58 | RUN echo $LOGNAME
59 | RUN echo $SHELL
60 |
61 | RUN which readlink && readlink --version
62 |
63 | RUN mkdir -p /config/.themes /config/.icons /config/.wallpapers /config/.local /config/.config/gtk-3.0 /config/.config/glib-2.0 && \
64 | chown -R abc:abc /config/.themes /config/.icons /config/.wallpapers /config/.local /config/.config/gtk-3.0 /config/.config/glib-2.0
65 |
66 | # Set environment variables for Python installation
67 | ENV PYTHON_VERSION=3.12.1
68 | ENV PYENV_ROOT="/config/.pyenv"
69 | ENV PATH="$PYENV_ROOT/bin:$PATH"
70 |
71 | # Install pyenv as root
72 | RUN curl https://pyenv.run | bash
73 |
74 | # Change ownership of pyenv directories to user 'abc'
75 | RUN chown -R abc:abc /config/.pyenv
76 |
77 | # Create the application directory and set ownership to 'abc'
78 | RUN mkdir -p /config/app && chown -R abc:abc /config/app
79 |
80 | # Ensure the cache directory exists and is owned by 'abc'
81 | RUN mkdir -p /config/app/.cache && chown -R abc:abc /config/app/.cache
82 |
83 | # Switch to non-root user 'abc'
84 | USER abc
85 |
86 | # Create a shell script for environment setup
87 | RUN echo 'export PYENV_ROOT="/config/.pyenv"' > /config/app/pyenv_setup.sh && \
88 | echo 'export PATH="$PYENV_ROOT/bin:$PYENV_ROOT/shims:$PATH"' >> /config/app/pyenv_setup.sh && \
89 | echo 'eval "$(pyenv init --path)"' >> /config/app/pyenv_setup.sh && \
90 | echo 'eval "$(pyenv init -)"' >> /config/app/pyenv_setup.sh && \
91 | chmod +x /config/app/pyenv_setup.sh
92 |
93 | # Set working directory to '/config/app'
94 | WORKDIR /config/app
95 |
96 | # Copy project files (only pyproject.toml and poetry.lock to leverage caching)
97 | COPY --chown=abc:abc pyproject.toml README.md poetry.lock /config/app/
98 |
99 | # Install Python using pyenv as 'abc' by sourcing the setup script
100 | RUN XDG_CACHE_HOME=/config/app/.cache /bin/bash -c \
101 | "source /config/app/pyenv_setup.sh && pyenv install ${PYTHON_VERSION}" || \
102 | { echo "Build failed. Showing config.log:"; cat /tmp/python-build.*/Python-*/config.log; exit 1; }
103 |
104 | # Set the global Python version
105 | RUN XDG_CACHE_HOME=/config/app/.cache /bin/bash -c \
106 | "source /config/app/pyenv_setup.sh && pyenv global ${PYTHON_VERSION}"
107 |
108 | # Switch to user 'abc'
109 | USER abc
110 | RUN env
111 |
112 | # Install WhiteSur Themes and Wallpapers
113 | RUN export HOME=/config USER=abc LOGNAME=abc SHELL=/bin/bash && \
114 | \
115 | # Install WhiteSur GTK Theme
116 | git clone https://github.com/vinceliuice/WhiteSur-gtk-theme.git --depth=1 /config/.themes/WhiteSur-gtk-theme && \
117 | /bin/bash -ex /config/.themes/WhiteSur-gtk-theme/install.sh -d /config/.themes && \
118 | rm -rf /config/.themes/WhiteSur-gtk-theme && \
119 | \
120 | # Install WhiteSur Icon Theme
121 | git clone https://github.com/vinceliuice/WhiteSur-icon-theme.git --depth=1 /config/.icons/WhiteSur-icon-theme && \
122 | /bin/bash -ex /config/.icons/WhiteSur-icon-theme/install.sh -d /config/.icons && \
123 | rm -rf /config/.icons/WhiteSur-icon-theme && \
124 | \
125 | # Install WhiteSur Wallpapers
126 | git clone https://github.com/vinceliuice/WhiteSur-wallpapers.git --depth=1 /config/.wallpapers/WhiteSur-wallpapers && \
127 | /bin/bash -ex /config/.wallpapers/WhiteSur-wallpapers/install-wallpapers.sh -t monterey && \
128 | rm -rf /config/.wallpapers/WhiteSur-wallpapers
129 |
130 | RUN chown -R abc:abc /config/.themes /config/.icons /config/.local /config/.wallpapers
131 |
132 | # Copy (and overwrite) the Xfce desktop XML (wallpaper settings)
133 | COPY --chown=abc:abc ./theme/xfce4-desktop.xml /config/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-desktop.xml
134 |
135 | # Copy in xsettings.xml to set GTK theme, icon theme, cursor, and fonts
136 | COPY --chown=abc:abc ./theme/xsettings.xml /config/.config/xfce4/xfconf/xfce-perchannel-xml/xsettings.xml
137 |
138 | # Copy in xfwm4.xml to set the window manager theme and titlebar font
139 | COPY --chown=abc:abc ./theme/xfwm4.xml /config/.config/xfce4/xfconf/xfce-perchannel-xml/xfwm4.xml
140 |
141 | # Copy in enable-compositing.desktop to enable compositing
142 | COPY --chown=abc:abc ./theme/enable-compositing.desktop /config/.config/autostart/enable-compositing.desktop
143 |
144 | # TODO: ?
145 | # VOLUME /config
146 |
147 | # Ensure 'abc' owns the pyenv directory after installation
148 | USER root
149 | RUN chown -R abc:abc /config/.pyenv
150 | USER abc
151 |
152 | # Create a virtual environment using the installed Python version
153 | RUN XDG_CACHE_HOME=/config/app/.cache /bin/bash -c \
154 | "source /config/app/pyenv_setup.sh && python -m venv /config/app/venv"
155 |
156 | # Update PATH to include the virtual environment's bin directory
157 | ENV PATH="/config/app/venv/bin:$PATH"
158 |
159 | # Set environment variable to prevent poetry from using keyring
160 | ENV POETRY_NO_KEYRING=1
161 |
162 | # Upgrade pip to the latest version
163 | RUN XDG_CACHE_HOME=/config/app/.cache /bin/bash -c \
164 | "source /config/app/pyenv_setup.sh && \
165 | source /config/app/venv/bin/activate && \
166 | pip install --no-cache-dir --upgrade pip"
167 |
168 | # Install project dependencies using Poetry
169 | RUN XDG_CACHE_HOME=/config/app/.cache \
170 | POETRY_CACHE_DIR=/config/app/.cache/pypoetry \
171 | /bin/bash -c "source /config/app/pyenv_setup.sh && \
172 | source /config/app/venv/bin/activate && \
173 | pip install --no-cache-dir poetry && \
174 | poetry install --no-root"
175 |
176 | # Copy the rest of your application code
177 | COPY --chown=abc:abc . /config/app/
178 |
179 | # Create the logs and recordings directories and set ownership to 'abc'
180 | RUN mkdir -p /config/app/logs && chown -R abc:abc /config/app/logs
181 | RUN mkdir -p /config/app/recordings && chown -R abc:abc /config/app/recordings
182 |
183 | # # Switch back to root to set up the s6-overlay v3 service
184 | USER root
185 |
186 | ENV S6_LOGGING=1
187 | ENV S6_VERBOSITY=2
188 | ENV S6_KEEP_ENV=1
189 | ENV S6_RC_VERBOSE=1
190 |
191 | # Probably don't need, for compositing
192 | # COPY xconf_run /etc/s6-overlay/s6-rc.d/xconf/up
193 | # RUN echo 'oneshot' > /etc/s6-overlay/s6-rc.d/xconf/type
194 | # RUN ln -s ../xconf /etc/s6-overlay/s6-rc.d/user/contents.d/xconf
195 | # COPY ./theme/enable-compositing.desktop /etc/xdg/autostart/enable-compositing.desktop
196 |
197 |
198 | RUN touch /config/app/audit.log && chown abc:abc /config/app/audit.log && chmod 644 /config/app/audit.log
199 | RUN touch /config/app/logs/redis_env.log && chown abc:abc /config/app/logs/redis_env.log && chmod 644 /config/app/logs/redis_env.log
200 |
201 | RUN mkdir -p /config/app/logs/uvicorn && chown -R abc:abc /config/app/logs/uvicorn
202 |
203 | RUN mkdir -p /config/app/celery && chown -R abc:abc /config/app/celery && chmod 744 /config/app/celery
204 | RUN mkdir -p /config/.agentsea && chown -R abc:abc /config/.agentsea
205 | RUN mkdir -p /config/.agentsea/data && chown -R abc:abc /config/.agentsea/data
206 |
207 | # Create the s6-overlay v3 service directory for your application
208 | RUN mkdir -p /etc/s6-overlay/s6-rc.d/uvicorn
209 |
210 | # Create Redis service directory
211 | RUN mkdir -p /etc/s6-overlay/s6-rc.d/redis
212 |
213 | # Copy the s6-overlay v3 run script into the service directory
214 | COPY uvicorn_run /etc/s6-overlay/s6-rc.d/uvicorn/run
215 |
216 | # Copy the s6-overlay v3 run script into the service directory
217 | COPY redis_run /etc/s6-overlay/s6-rc.d/redis/run
218 |
219 | # Make the run script executable
220 | RUN chmod +x /etc/s6-overlay/s6-rc.d/uvicorn/run
221 |
222 | # Make the run script executable for redis
223 | RUN chmod +x /etc/s6-overlay/s6-rc.d/redis/run
224 |
225 | # Create the 'type' file for the service
226 | RUN echo 'longrun' > /etc/s6-overlay/s6-rc.d/uvicorn/type
227 |
228 | # Create the 'type' file for Redis service
229 | RUN echo 'longrun' > /etc/s6-overlay/s6-rc.d/redis/type
230 |
231 | # Enable the service by creating a symlink in the 'user' bundle
232 | RUN ln -s ../uvicorn /etc/s6-overlay/s6-rc.d/user/contents.d/uvicorn
233 |
234 | # Enable Redis service by creating a symlink in the 'user' bundle
235 | RUN ln -s ../redis /etc/s6-overlay/s6-rc.d/user/contents.d/redis
236 |
237 | RUN chown -R abc:abc /config/.agentsea/data
238 |
239 | COPY conf/kasm/run /etc/s6-overlay/s6-rc.d/svc-kasmvnc/run
240 |
241 | # Create the 'data' directory for the service and set the user
242 | # RUN mkdir -p /etc/s6-overlay/s6-rc.d/uvicorn/data && \
243 | # echo 'abc' > /etc/s6-overlay/s6-rc.d/uvicorn/data/user
244 |
245 | RUN echo 'abc' > /etc/s6-overlay/s6-rc.d/uvicorn/user
246 |
247 | # Set the user for Redis service
248 | RUN echo 'abc' > /etc/s6-overlay/s6-rc.d/redis/user
249 |
250 | ENV AGENTSEA_HOME=/config/.agentsea
251 |
252 | # Install extras
253 | RUN apk add --no-cache \
254 | libreoffice \
255 | gimp \
256 | inkscape \
257 | vlc \
258 | thunderbird \
259 | audacity \
260 | filezilla \
261 | evolution \
262 | kodi \
263 | handbrake \
264 | openmpi-dev
265 |
266 | RUN set -e; \
267 | mkdir -p /config/Desktop && \
268 | \
269 | ##### 1) Copy the selected launchers if they exist #####
270 | for file in /usr/share/applications/libreoffice-*.desktop \
271 | /usr/share/applications/gimp*.desktop \
272 | /usr/share/applications/inkscape*.desktop \
273 | /usr/share/applications/audacity.desktop \
274 | /usr/share/applications/kodi.desktop \
275 | /usr/share/applications/firefox.desktop \
276 | /usr/share/applications/mousepad.desktop; do \
277 | [ -e "$file" ] || continue; \
278 | name=$(basename "$file"); \
279 | target="/config/Desktop/$name"; \
280 | [ -f "$target" ] || cp "$file" "$target"; \
281 | done && \
282 | \
283 | ##### 2) Final permissions #####
284 | chmod +x /config/Desktop/*.desktop && \
285 | chown -R abc:abc /config/Desktop
286 |
287 | # Expose the port uvicorn is running on (if needed)
288 | EXPOSE 8000
289 |
290 | # Expose Redis Port, we don't need to because it should only be used internally but this is there just incase
291 | # EXPOSE 6379
--------------------------------------------------------------------------------
/agentd/server.py:
--------------------------------------------------------------------------------
1 | import base64
2 | import getpass
3 | import logging
4 | import os
5 | import platform
6 | import random
7 | import subprocess
8 | import sys
9 | import threading
10 | import time
11 | import uuid
12 | from datetime import datetime
13 | from typing import Optional
14 | import pyperclip
15 | import requests
16 |
17 | import psutil
18 | import pyautogui
19 | from fastapi import Body, FastAPI, HTTPException, Request
20 | from fastapi.middleware.cors import CORSMiddleware
21 | from fastapi.responses import FileResponse
22 | from pydantic import BaseModel
23 | from taskara.task import Task
24 |
25 | from agentd.util import log_subprocess_output
26 |
27 | from .firefox import (
28 | gracefully_terminate_firefox,
29 | is_firefox_running,
30 | is_firefox_window_open,
31 | maximize_firefox_window,
32 | )
33 | from .models import (
34 | ClickModel,
35 | CoordinatesModel,
36 | DragMouseModel,
37 | MoveMouseModel,
38 | OpenURLModel,
39 | PressKeyModel,
40 | PressKeysModel,
41 | RecordRequest,
42 | RecordResponse,
43 | ScreenshotResponseModel,
44 | ScreenSizeModel,
45 | ScrollModel,
46 | SystemInfoModel,
47 | SystemUsageModel,
48 | TypeTextModel,
49 | StopRequest,
50 | useSecretRequest,
51 | getSecretRequest
52 | )
53 | from .recording import RecordingSession, lock
54 |
55 | import logging
56 | import logging.config
57 | from .logging_config import LOGGING_CONFIG # or wherever you store the config
58 |
59 | logging.config.dictConfig(LOGGING_CONFIG)
60 |
61 | # Create logger instances
62 | api_logger = logging.getLogger("api")
63 |
64 | current_user: str = getpass.getuser()
65 | api_logger.info(f"current user: {current_user}")
66 |
67 | active_session: Optional[RecordingSession] = None
68 |
69 | app = FastAPI()
70 |
71 | app.add_middleware(
72 | CORSMiddleware,
73 | allow_origins=["*"],
74 | allow_credentials=True,
75 | allow_methods=["*"],
76 | allow_headers=["*"],
77 | )
78 |
79 |
80 | @app.middleware("http")
81 | async def log_requests(request: Request, call_next):
82 | # Log the request details
83 | api_logger.info(f"Method: {request.method} Path: {request.url.path}")
84 | response = await call_next(request)
85 | return response
86 |
87 |
88 | @app.get("/")
89 | async def root():
90 | return {"message": "Agent in the shell"}
91 |
92 |
93 | @app.get("/health")
94 | async def health():
95 | return {"status": "ok"}
96 |
97 |
98 | @app.get("/v1/info", response_model=SystemInfoModel)
99 | async def get_info():
100 | # Screen size
101 | width, height = pyautogui.size()
102 | screen_size = ScreenSizeModel(x=width, y=height)
103 |
104 | # OS Info
105 | os_info = f"{platform.system()} {platform.release()}"
106 |
107 | # Code Version (Git)
108 | try:
109 | code_version = (
110 | subprocess.check_output(["git", "rev-parse", "HEAD"])
111 | .decode("utf-8")
112 | .strip()
113 | )
114 | except Exception:
115 | code_version = None
116 |
117 | # Last Activity from log
118 | try:
119 | with open("audit.log", "r") as f:
120 | lines = f.readlines()
121 | last_activity_unix = None
122 | if lines:
123 | last_line = lines[-1]
124 | last_activity_str = last_line.split(" - ")[0]
125 | last_activity_datetime = datetime.strptime(
126 | last_activity_str, "%Y-%m-%d %H:%M:%S"
127 | )
128 | last_activity_unix = int(
129 | time.mktime(last_activity_datetime.timetuple())
130 | )
131 | except Exception:
132 | last_activity_unix = None
133 |
134 | return SystemInfoModel(
135 | last_activity_ts=last_activity_unix,
136 | screen_size=screen_size,
137 | os_info=os_info,
138 | code_version=code_version,
139 | )
140 |
141 |
142 | @app.get("/v1/screen_size")
143 | def get_screen_size() -> ScreenSizeModel:
144 | width, height = pyautogui.size()
145 | return ScreenSizeModel(x=width, y=height)
146 |
147 |
148 | @app.get("/v1/mouse_coordinates")
149 | async def mouse_coordinates() -> CoordinatesModel:
150 | x, y = pyautogui.position()
151 | return CoordinatesModel(x=x, y=y) # type: ignore
152 |
153 |
154 | @app.post("/v1/open_url")
155 | async def open_url(request: OpenURLModel):
156 | try:
157 | firefox_pids = is_firefox_running()
158 | if firefox_pids:
159 | api_logger.info("Firefox is running. Restarting it...")
160 | gracefully_terminate_firefox(firefox_pids)
161 | time.sleep(5)
162 |
163 | api_logger.info("Starting Firefox...")
164 | subprocess.Popen(
165 | [
166 | "firefox",
167 | request.url,
168 | ],
169 | stdout=sys.stdout,
170 | stderr=sys.stderr,
171 | )
172 |
173 | while not is_firefox_window_open():
174 | time.sleep(1)
175 | api_logger.info("Waiting for the Firefox window to open...")
176 |
177 | maximize_firefox_window()
178 |
179 | return {"status": "success"}
180 |
181 | except Exception as e:
182 | return {"status": "error", "message": str(e)}
183 |
184 |
185 | @app.post("/v1/move_mouse")
186 | async def move_mouse_to(request: MoveMouseModel):
187 | try:
188 | tween_func = getattr(pyautogui, request.tween, pyautogui.linear)
189 | pyautogui.moveTo(
190 | request.x, request.y, duration=request.duration, tween=tween_func
191 | )
192 | return {"status": "success"}
193 | except Exception as e:
194 | return {"status": "error", "message": str(e)}
195 |
196 |
197 | @app.post("/v1/click")
198 | async def click(request: ClickModel):
199 | if request.location:
200 | tween_func = getattr(pyautogui, request.location.tween, pyautogui.linear)
201 | pyautogui.moveTo(
202 | request.location.x,
203 | request.location.y,
204 | duration=request.location.duration,
205 | tween=tween_func,
206 | )
207 | try:
208 | pyautogui.click(button=request.button)
209 | return {"status": "success"}
210 | except Exception as e:
211 | raise HTTPException(status_code=500, detail=str(e))
212 |
213 |
214 | @app.post("/v1/double_click")
215 | async def double_click(request: ClickModel):
216 | if request.location:
217 | tween_func = getattr(pyautogui, request.location.tween, pyautogui.linear)
218 | pyautogui.moveTo(
219 | request.location.x,
220 | request.location.y,
221 | duration=request.location.duration,
222 | tween=tween_func,
223 | )
224 | try:
225 | pyautogui.doubleClick(button=request.button)
226 | return {"status": "success"}
227 | except Exception as e:
228 | raise HTTPException(status_code=500, detail=str(e))
229 |
230 |
231 | @app.post("/v1/type_text")
232 | async def type_text(request: TypeTextModel):
233 | try:
234 | for char in request.text:
235 | pyautogui.write(
236 | char,
237 | interval=random.uniform(request.min_interval, request.max_interval),
238 | )
239 | time.sleep(random.uniform(request.min_interval, request.max_interval))
240 | return {"status": "success"}
241 | except Exception as e:
242 | raise HTTPException(status_code=500, detail=str(e))
243 |
244 |
245 | @app.post("/v1/press_key")
246 | async def press_key(request: PressKeyModel):
247 | try:
248 | pyautogui.press(request.key)
249 | return {"status": "success"}
250 | except Exception as e:
251 | raise HTTPException(status_code=500, detail=str(e))
252 |
253 |
254 | @app.post("/v1/hot_key")
255 | async def hot_key(request: PressKeysModel):
256 | try:
257 | pyautogui.hotkey(*request.keys)
258 | return {"status": "success"}
259 | except Exception as e:
260 | raise HTTPException(status_code=500, detail=str(e))
261 |
262 |
263 | @app.post("/v1/scroll")
264 | async def scroll(request: ScrollModel):
265 | try:
266 | # clicks > 0: scrolls UP
267 | # clicks < 0: scrolls DOWN
268 | pyautogui.scroll(request.clicks)
269 | return {"status": "success"}
270 | except Exception as e:
271 | raise HTTPException(status_code=500, detail=str(e))
272 |
273 |
274 | @app.post("/v1/drag_mouse")
275 | async def drag_mouse(request: DragMouseModel):
276 | try:
277 | pyautogui.dragTo(request.x, request.y)
278 | return {"status": "success"}
279 | except Exception as e:
280 | raise HTTPException(status_code=500, detail=str(e))
281 |
282 |
283 | @app.post("/v1/screenshot", response_model=ScreenshotResponseModel)
284 | async def take_screenshot(
285 | count: int = 1, delay: float = 0.0
286 | ) -> ScreenshotResponseModel:
287 | try:
288 | os.environ["DISPLAY"] = ":1.0"
289 |
290 | # Create a directory for screenshots if it doesn't exist
291 | screenshots_dir = "screenshots"
292 | os.makedirs(screenshots_dir, exist_ok=True)
293 |
294 | file_paths = []
295 |
296 | # Loop for the number of screenshots specified by 'count'
297 | for i in range(count):
298 | # Generate a unique file name based on the current timestamp and index
299 | timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
300 | file_path = os.path.join(
301 | screenshots_dir, f"screenshot_{timestamp}_{i + 1}.png"
302 | )
303 |
304 | # Use scrot to take a screenshot with the cursor (-p flag)
305 | subprocess.run(["scrot", "-z", "-p", file_path], check=True)
306 |
307 | file_paths.append(file_path)
308 |
309 | # Delay between screenshots if specified
310 | if i < count - 1:
311 | time.sleep(delay)
312 |
313 | # Now that all screenshots are taken, read, encode, and delete them
314 | encoded_images = []
315 |
316 | for file_path in file_paths:
317 | # Read and encode the image
318 | with open(file_path, "rb") as image_file:
319 | encoded_image = base64.b64encode(image_file.read()).decode("utf-8")
320 | encoded_images.append(encoded_image)
321 |
322 | # Delete the file after encoding
323 | os.remove(file_path)
324 |
325 | # Return the list of encoded images
326 | response = ScreenshotResponseModel(
327 | status="success",
328 | images=encoded_images, # List of all encoded images
329 | )
330 |
331 | return response
332 |
333 | except Exception as e:
334 | raise HTTPException(status_code=500, detail=str(e))
335 |
336 |
337 | @app.post("/v1/exec")
338 | async def exec_command(command: str = Body(..., embed=True)):
339 | try:
340 | # Execute the provided command
341 | result = subprocess.run(
342 | command,
343 | shell=True,
344 | stdout=subprocess.PIPE,
345 | stderr=subprocess.PIPE,
346 | text=True,
347 | )
348 |
349 | # Check if the command was successful
350 | if result.returncode == 0:
351 | return {"status": "success", "output": result.stdout.strip()}
352 | else:
353 | return {
354 | "status": "error",
355 | "output": result.stderr.strip(),
356 | "return_code": result.returncode,
357 | }
358 |
359 | except Exception as e:
360 | raise HTTPException(status_code=500, detail=str(e))
361 |
362 | @app.post("/v1/use_secret")
363 | async def use_secret(request: useSecretRequest):
364 | global active_session
365 | api_logger.info(f"using secret {request.name} and applying {request.field}")
366 | try:
367 | # Get the secret
368 | url = f"{request.server_address}/v1/secrets/search"
369 | json_data={"name": request.name}
370 | headers= {"Authorization": f"bearer {request.token}"}
371 | response = requests.post(url, json=json_data, headers=headers)
372 | # Check the response status
373 | if response.status_code != 200:
374 | api_logger.info(f"secret fetch failed, name: {request.name}, status_code: {response.status_code} detail: {response.text}")
375 | raise HTTPException(
376 | status_code=response.status_code,
377 | detail=f"Failed to fetch secret: {response.text}",
378 | )
379 | secrets = response.json()
380 | secret = secrets["secrets"][0]
381 | api_logger.info(f"secret fetched: {secret['name']}")
382 | event_time = time.time()
383 | try:
384 | #TODO will encrypt secret values in transit. Will want to use a private key in the system env to decrypt.
385 | # We can rotate the private key every so often. We are already using https but would be good to have another layer
386 | # An example where this is useful so you won't see real secret values in the network tab on the browser
387 | try:
388 | password = secret["value"][request.field]
389 | except KeyError:
390 | raise HTTPException(
391 | status_code=400,
392 | detail=f"Field '{request.field}' not found in the secret."
393 | )
394 | if active_session:
395 | active_session.pause_listeners()
396 | else:
397 | api_logger.error("secret used but without active session")
398 |
399 | for char in password:
400 | pyautogui.write(
401 | char,
402 | # interval=random.uniform(request.min_interval, request.max_interval),
403 | )
404 | # time.sleep(random.uniform(request.min_interval, request.max_interval))
405 | pyperclip.copy(password) # TODO consider copy paste instead of writing
406 | api_logger.info("secret Text copied to clipboard.")
407 |
408 | if active_session:
409 | active_session.resume_listeners()
410 | active_session.record_useSecret_action(secret_name=secret['name'], field=request.field, event_time=event_time)
411 | else:
412 | api_logger.error("secret used but without active session")
413 |
414 | return {"status": "success"}
415 | except Exception as e:
416 | if active_session:
417 | active_session.resume_listeners()
418 | raise HTTPException(status_code=500, detail=str(e))
419 |
420 | except Exception as e:
421 | raise HTTPException(status_code=500, detail=str(e))
422 |
423 | @app.post("/v1/get_secrets")
424 | async def get_secret(request: getSecretRequest):
425 | api_logger.info(f"geting secrets: {request.model_dump_json()}")
426 | try:
427 | # Get the secret
428 | url = f"{request.server_address}/v1/secrets"
429 | headers= {"Authorization": f"bearer {request.token}"}
430 | response = requests.get(url, headers=headers)
431 | # Check the response status
432 | try:
433 | response.raise_for_status()
434 | except requests.exceptions.HTTPError as e:
435 | # Extract the status code and response content
436 | if response:
437 | status_code = response.status_code
438 | error_message = response.text
439 | else:
440 | status_code = 500
441 | error_message = f"An unknown error occurred: {str(e)}"
442 | raise HTTPException(
443 | status_code=status_code,
444 | detail=f"Error: {error_message}"
445 | )
446 | secrets = response.json()
447 | api_logger.info(f"in get secret response is: {secrets}")
448 | result = [{"name": secret["name"], "fields": list(secret["value"].keys())} for secret in secrets["secrets"]]
449 | return result
450 |
451 | except requests.RequestException as e:
452 | # Handle general request exceptions
453 | raise HTTPException(
454 | status_code=500,
455 | detail=f"An unknown error occurred: {str(e)}"
456 | )
457 |
458 |
459 | @app.get("/v1/system_usage", response_model=SystemUsageModel)
460 | async def system_usage():
461 | cpu_percent = psutil.cpu_percent()
462 | memory = psutil.virtual_memory()
463 | disk = psutil.disk_usage("/")
464 |
465 | return SystemUsageModel(
466 | cpu_percent=cpu_percent, # type: ignore
467 | memory_percent=memory.percent,
468 | disk_percent=disk.percent,
469 | )
470 |
471 |
472 | ##
473 | ### Demonstrate
474 | ##
475 |
476 |
477 | @app.post("/v1/start_recording", response_model=RecordResponse)
478 | async def start_recording(request: RecordRequest):
479 | global active_session
480 | session_id = str(uuid.uuid4())
481 |
482 | if not request.description and not request.task_id:
483 | raise HTTPException(
484 | status_code=400,
485 | detail="Either description or task_id must be provided",
486 | )
487 |
488 | if request.description:
489 | task = Task(
490 | description=request.description,
491 | remote=request.server_address,
492 | auth_token=request.token,
493 | owner_id=request.owner_id,
494 | skill=request.skill_id
495 | )
496 | else:
497 | tasks = Task.find(
498 | remote=request.server_address,
499 | id=request.task_id,
500 | auth_token=request.token,
501 | owner_id=request.owner_id,
502 | )
503 | if not tasks:
504 | raise HTTPException(status_code=404, detail="Task not found")
505 | task = tasks[0]
506 | # launching celery worker
507 | command = ["celery", "-A", "agentd.celery_worker", "worker", "--loglevel=debug"]
508 | subProc = subprocess.Popen(
509 | command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
510 | )
511 | # starting new thread below to capture worker logs in our stdout for uvicorn
512 | threading.Thread(
513 | target=log_subprocess_output,
514 | args=(subProc.stdout, "celery_worker"),
515 | daemon=True,
516 | ).start()
517 |
518 | with lock:
519 | if active_session:
520 | raise HTTPException(
521 | status_code=400,
522 | detail="A recording session is already active. Stop it first",
523 | )
524 | session = RecordingSession(id=session_id, task=task)
525 | session.start()
526 | active_session = session
527 | return RecordResponse(task_id=task.id)
528 |
529 |
530 | @app.post("/v1/stop_recording")
531 | async def stop_recording(request: StopRequest):
532 | global active_session
533 | with lock:
534 | if not active_session:
535 | raise HTTPException(status_code=404, detail="Session not found")
536 | active_session.stop(result=request.result, comment=request.comment )
537 | api_logger.info("Stopped recording session")
538 |
539 | active_session = None
540 | return
541 |
542 |
543 | ##
544 | ### Video Recording
545 | ##
546 |
547 | video_recording_process = None
548 | video_recording_lock = threading.Lock()
549 | video_recordings_dir = "video_recordings"
550 | os.makedirs(video_recordings_dir, exist_ok=True)
551 |
552 |
553 | class VideoRecordRequest(BaseModel):
554 | framerate: int
555 |
556 |
557 | class VideoRecordResponse(BaseModel):
558 | session_id: str
559 |
560 |
561 | class VideoRecordings(BaseModel):
562 | recordings: list[str]
563 |
564 |
565 | class VideoRecordModel(BaseModel):
566 | status: str
567 | file_path: str
568 |
569 |
570 | @app.post("/v1/start_video_recording", response_model=VideoRecordResponse)
571 | async def start_video_recording(request: VideoRecordRequest):
572 | global video_recording_process
573 | with video_recording_lock:
574 | if video_recording_process is not None:
575 | raise HTTPException(
576 | status_code=400, detail="Video recording is already in progress."
577 | )
578 |
579 | session_id = str(uuid.uuid4())
580 | file_path = os.path.join(video_recordings_dir, f"{session_id}.mp4")
581 |
582 | video_recording_process = subprocess.Popen(
583 | [
584 | "ffmpeg",
585 | "-video_size",
586 | "1280x800", # TODO we need to make this configurable like framerate
587 | "-framerate",
588 | f"{request.framerate}",
589 | "-f",
590 | "x11grab",
591 | "-i",
592 | ":1",
593 | file_path,
594 | ]
595 | )
596 |
597 | return VideoRecordResponse(session_id=session_id)
598 |
599 |
600 | @app.post("/v1/stop_video_recording", response_model=VideoRecordModel)
601 | async def stop_video_recording():
602 | global video_recording_process
603 | with video_recording_lock:
604 | if video_recording_process is None:
605 | raise HTTPException(
606 | status_code=400, detail="No video recording in progress."
607 | )
608 |
609 | video_recording_process.terminate()
610 | video_recording_process = None
611 |
612 | session_id = str(uuid.uuid4())
613 | file_path = os.path.join(video_recordings_dir, f"{session_id}.mp4")
614 |
615 | return VideoRecordModel(status="success", file_path=file_path)
616 |
617 |
618 | @app.get("/v1/video_recordings", response_model=VideoRecordings)
619 | async def list_video_recordings():
620 | recordings = os.listdir(video_recordings_dir)
621 | return VideoRecordings(recordings=recordings)
622 |
623 |
624 | @app.get("/v1/video_recordings/{session_id}", response_class=FileResponse)
625 | async def get_video_recording(session_id: str):
626 | file_path = os.path.join(video_recordings_dir, f"{session_id}.mp4")
627 | if not os.path.exists(file_path):
628 | raise HTTPException(status_code=404, detail="Recording not found.")
629 |
630 | return FileResponse(file_path, media_type="video/mp4", filename=f"{session_id}.mp4")
631 |
632 |
633 | @app.delete("/v1/video_recordings/{session_id}", response_model=VideoRecordModel)
634 | async def delete_video_recording(session_id: str):
635 | file_path = os.path.join(video_recordings_dir, f"{session_id}.mp4")
636 | if not os.path.exists(file_path):
637 | raise HTTPException(status_code=404, detail="Recording not found.")
638 |
639 | os.remove(file_path)
640 | return VideoRecordModel(status="success", file_path=file_path)
641 |
--------------------------------------------------------------------------------