├── electron
├── icon.icns
├── icon.ico
├── icon.png
├── preload.js
├── package.json
├── electron-builder.yml
└── afterPack.js
├── scrcpy-server-v3.3.3
├── frontend
├── postcss.config.mjs
├── tsconfig.json
├── .prettierignore
├── index.html
├── src
│ ├── routes
│ │ ├── about.tsx
│ │ ├── index.tsx
│ │ └── __root.tsx
│ ├── main.tsx
│ ├── styles.css
│ ├── components
│ │ ├── ConfirmDialog.tsx
│ │ ├── Toast.tsx
│ │ ├── DeviceSidebar.tsx
│ │ └── DeviceCard.tsx
│ ├── routeTree.gen.ts
│ └── api.ts
├── vite.config.js
├── README.md
├── .gitignore
├── prettier.config.js
├── package.json
└── eslint.config.js
├── AutoGLM_GUI
├── server.py
├── resources
│ └── apks
│ │ ├── ADBKeyboard.apk
│ │ └── ADBKeyBoard.README.txt
├── exceptions.py
├── version.py
├── adb_plus
│ ├── __init__.py
│ ├── serial.py
│ ├── device.py
│ ├── ip.py
│ ├── touch.py
│ └── screenshot.py
├── config.py
├── state.py
├── __init__.py
├── api
│ ├── __init__.py
│ ├── control.py
│ └── devices.py
├── platform_utils.py
├── logger.py
├── schemas.py
└── __main__.py
├── phone_agent
├── model
│ ├── __init__.py
│ └── client.py
├── actions
│ └── __init__.py
├── __init__.py
├── config
│ ├── __init__.py
│ ├── i18n.py
│ ├── prompts_en.py
│ ├── prompts.py
│ ├── prompts_zh.py
│ └── apps.py
├── adb
│ ├── __init__.py
│ ├── input.py
│ ├── screenshot.py
│ └── device.py
└── agent.py
├── .vscode
└── settings.json
├── main.py
├── .gitignore
├── .github
├── actions
│ └── setup-python
│ │ └── action.yml
└── workflows
│ ├── release.yml
│ └── pr-lint.yml
├── scripts
├── pyi_rth_utf8.py
├── autoglm.spec
├── convert_icon.py
├── build.py
├── download_adb.py
├── release.py
└── build_electron.py
├── pyproject.toml
└── README.md
/electron/icon.icns:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suyiiyii/AutoGLM-GUI/HEAD/electron/icon.icns
--------------------------------------------------------------------------------
/electron/icon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suyiiyii/AutoGLM-GUI/HEAD/electron/icon.ico
--------------------------------------------------------------------------------
/electron/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suyiiyii/AutoGLM-GUI/HEAD/electron/icon.png
--------------------------------------------------------------------------------
/scrcpy-server-v3.3.3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suyiiyii/AutoGLM-GUI/HEAD/scrcpy-server-v3.3.3
--------------------------------------------------------------------------------
/frontend/postcss.config.mjs:
--------------------------------------------------------------------------------
1 | export default {
2 | plugins: {
3 | '@tailwindcss/postcss': {},
4 | },
5 | };
6 |
--------------------------------------------------------------------------------
/AutoGLM_GUI/server.py:
--------------------------------------------------------------------------------
1 | """AutoGLM-GUI Backend API Server."""
2 |
3 | from AutoGLM_GUI.api import app
4 |
5 | __all__ = ["app"]
6 |
--------------------------------------------------------------------------------
/AutoGLM_GUI/resources/apks/ADBKeyboard.apk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suyiiyii/AutoGLM-GUI/HEAD/AutoGLM_GUI/resources/apks/ADBKeyboard.apk
--------------------------------------------------------------------------------
/phone_agent/model/__init__.py:
--------------------------------------------------------------------------------
1 | """Model client module for AI inference."""
2 |
3 | from phone_agent.model.client import ModelClient, ModelConfig
4 |
5 | __all__ = ["ModelClient", "ModelConfig"]
6 |
--------------------------------------------------------------------------------
/phone_agent/actions/__init__.py:
--------------------------------------------------------------------------------
1 | """Action handling module for Phone Agent."""
2 |
3 | from phone_agent.actions.handler import ActionHandler, ActionResult
4 |
5 | __all__ = ["ActionHandler", "ActionResult"]
6 |
--------------------------------------------------------------------------------
/AutoGLM_GUI/exceptions.py:
--------------------------------------------------------------------------------
1 | """Custom exceptions for AutoGLM-GUI."""
2 |
3 |
4 | class DeviceNotAvailableError(Exception):
5 | """Raised when device is not available (disconnected/offline)."""
6 |
7 | pass
8 |
--------------------------------------------------------------------------------
/AutoGLM_GUI/version.py:
--------------------------------------------------------------------------------
1 | """Package version helper."""
2 |
3 | from importlib.metadata import version as get_version
4 |
5 | try:
6 | APP_VERSION = get_version("autoglm-gui")
7 | except Exception:
8 | APP_VERSION = "dev"
9 |
--------------------------------------------------------------------------------
/frontend/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "strict": true,
4 | "esModuleInterop": true,
5 | "jsx": "react-jsx",
6 | "lib": ["DOM", "DOM.Iterable", "ES2022"],
7 | "skipLibCheck": true
8 | }
9 | }
10 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "files.watcherExclude": {
3 | "**/routeTree.gen.ts": true
4 | },
5 | "search.exclude": {
6 | "**/routeTree.gen.ts": true
7 | },
8 | "files.readonlyInclude": {
9 | "**/routeTree.gen.ts": true
10 | }
11 | }
12 |
--------------------------------------------------------------------------------
/frontend/.prettierignore:
--------------------------------------------------------------------------------
1 | # Dependencies
2 | node_modules
3 | pnpm-lock.yaml
4 |
5 | # Build outputs
6 | dist
7 | build
8 | .vite
9 |
10 | # Auto-generated files
11 | src/routeTree.gen.ts
12 |
13 | # Coverage
14 | coverage
15 |
16 | # Cache
17 | .eslintcache
18 | .tsbuildinfo
19 |
--------------------------------------------------------------------------------
/AutoGLM_GUI/resources/apks/ADBKeyBoard.README.txt:
--------------------------------------------------------------------------------
1 | Note on Third-Party Components: This project includes ADBKeyBoard to support automated text input. ADBKeyBoard is licensed under GPL-2.0 and is developed by senzhk. The source code for ADBKeyBoard can be found at: https://github.com/senzhk/ADBKeyBoard
--------------------------------------------------------------------------------
/phone_agent/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Phone Agent - An AI-powered phone automation framework.
3 |
4 | This package provides tools for automating Android phone interactions
5 | using AI models for visual understanding and decision making.
6 | """
7 |
8 | from phone_agent.agent import PhoneAgent
9 |
10 | __version__ = "0.1.0"
11 | __all__ = ["PhoneAgent"]
12 |
--------------------------------------------------------------------------------
/frontend/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | AutoGLM GUI
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/frontend/src/routes/about.tsx:
--------------------------------------------------------------------------------
1 | import { createFileRoute } from '@tanstack/react-router';
2 | import * as React from 'react';
3 |
4 | export const Route = createFileRoute('/about')({
5 | component: AboutComponent,
6 | });
7 |
8 | function AboutComponent() {
9 | return (
10 |
11 |
About
12 |
13 | );
14 | }
15 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | """AutoGLM-GUI Backend API Server.
2 |
3 | This module is kept for backward compatibility and development.
4 | For production use, run: autoglm-gui (or uvx autoglm-gui)
5 | """
6 |
7 | # Re-export app from the package
8 | from AutoGLM_GUI.server import app
9 |
10 | if __name__ == "__main__":
11 | import uvicorn
12 |
13 | uvicorn.run(app, host="0.0.0.0", port=8000)
14 |
--------------------------------------------------------------------------------
/frontend/vite.config.js:
--------------------------------------------------------------------------------
1 | import { defineConfig } from 'vite';
2 | import react from '@vitejs/plugin-react';
3 | import { tanstackRouter } from '@tanstack/router-plugin/vite';
4 |
5 | // https://vitejs.dev/config/
6 | export default defineConfig({
7 | plugins: [
8 | tanstackRouter({ target: 'react', autoCodeSplitting: true }),
9 | react(),
10 | ],
11 | server: {
12 | proxy: {
13 | '/api': {
14 | target: 'http://localhost:8000',
15 | changeOrigin: true,
16 | },
17 | },
18 | },
19 | });
20 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Python-generated files
2 | __pycache__/
3 | *.py[oc]
4 | build/
5 | dist/
6 | wheels/
7 | *.egg-info
8 |
9 | # Virtual environments
10 | .venv
11 | .python-version
12 |
13 | # Built static files (generated by scripts/build.py)
14 | AutoGLM_GUI/static/
15 |
16 | # Frontend
17 | frontend/node_modules/
18 | frontend/dist/
19 |
20 | # Electron
21 | electron/node_modules/
22 | electron/dist/
23 |
24 | # Build resources
25 | resources/
26 |
27 | # Logs
28 | *.log
29 | logs/
30 |
31 | # MCP
32 | .mcp.json
33 |
34 | # macOS
35 | .DS_Store
36 |
--------------------------------------------------------------------------------
/frontend/src/routes/index.tsx:
--------------------------------------------------------------------------------
1 | import { createFileRoute } from '@tanstack/react-router';
2 | import { useEffect } from 'react';
3 | import { useNavigate } from '@tanstack/react-router';
4 |
5 | export const Route = createFileRoute('/')({
6 | component: HomeComponent,
7 | });
8 |
9 | function HomeComponent() {
10 | const navigate = useNavigate();
11 |
12 | useEffect(() => {
13 | navigate({ to: '/chat' });
14 | }, [navigate]);
15 |
16 | return (
17 |
18 |
Welcome Home!
19 |
20 | );
21 | }
22 |
--------------------------------------------------------------------------------
/electron/preload.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Electron 预加载脚本
3 | *
4 | * 此脚本在渲染进程中运行,但在加载网页内容之前执行。
5 | * 可以安全地暴露一些 Node.js API 给渲染进程使用。
6 | *
7 | * 目前我们的应用不需要特殊的 IPC 通信,因为前端直接通过
8 | * HTTP/WebSocket 与后端通信。
9 | */
10 |
11 | const { contextBridge } = require('electron');
12 |
13 | // 暴露版本信息(可选)
14 | contextBridge.exposeInMainWorld('electronAPI', {
15 | versions: {
16 | node: process.versions.node,
17 | chrome: process.versions.chrome,
18 | electron: process.versions.electron
19 | },
20 | platform: process.platform
21 | });
22 |
23 | console.log('Electron preload script loaded');
24 |
--------------------------------------------------------------------------------
/.github/actions/setup-python/action.yml:
--------------------------------------------------------------------------------
1 | name: Setup Python & uv
2 | description: Setup Python and uv package manager
3 |
4 | inputs:
5 | python-version:
6 | description: Python version to use
7 | required: false
8 | default: "3.11"
9 |
10 | runs:
11 | using: composite
12 | steps:
13 | - name: Install uv
14 | uses: astral-sh/setup-uv@v4
15 | with:
16 | enable-cache: true
17 |
18 | - name: Setup Python
19 | uses: actions/setup-python@v5
20 | with:
21 | python-version: ${{ inputs.python-version }}
22 |
23 | - name: Install dependencies
24 | shell: bash
25 | run: uv sync
26 |
--------------------------------------------------------------------------------
/AutoGLM_GUI/adb_plus/__init__.py:
--------------------------------------------------------------------------------
1 | """Lightweight ADB helpers with a more robust screenshot implementation."""
2 |
3 | from .keyboard_installer import ADBKeyboardInstaller
4 | from .screenshot import Screenshot, capture_screenshot
5 | from .touch import touch_down, touch_move, touch_up
6 | from .ip import get_wifi_ip
7 | from .serial import get_device_serial
8 | from .device import check_device_available
9 |
10 | __all__ = [
11 | "ADBKeyboardInstaller",
12 | "Screenshot",
13 | "capture_screenshot",
14 | "touch_down",
15 | "touch_move",
16 | "touch_up",
17 | "get_wifi_ip",
18 | "get_device_serial",
19 | "check_device_available",
20 | ]
21 |
--------------------------------------------------------------------------------
/AutoGLM_GUI/config.py:
--------------------------------------------------------------------------------
1 | """Application configuration singleton."""
2 |
3 | import os
4 | from dataclasses import dataclass
5 |
6 |
7 | @dataclass
8 | class AppConfig:
9 | """Global application configuration."""
10 |
11 | base_url: str = ""
12 | model_name: str = "autoglm-phone-9b"
13 | api_key: str = "EMPTY"
14 |
15 | def refresh_from_env(self):
16 | """从环境变量刷新配置(适用于 reload 模式)"""
17 | self.base_url = os.getenv("AUTOGLM_BASE_URL", self.base_url)
18 | self.model_name = os.getenv("AUTOGLM_MODEL_NAME", self.model_name)
19 | self.api_key = os.getenv("AUTOGLM_API_KEY", self.api_key)
20 |
21 |
22 | # Global singleton instance
23 | config = AppConfig()
24 |
--------------------------------------------------------------------------------
/frontend/src/main.tsx:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | import ReactDOM from 'react-dom/client';
3 | import { RouterProvider, createRouter } from '@tanstack/react-router';
4 | import { routeTree } from './routeTree.gen';
5 | import './styles.css';
6 |
7 | // Set up a Router instance
8 | const router = createRouter({
9 | routeTree,
10 | defaultPreload: 'intent',
11 | scrollRestoration: true,
12 | });
13 |
14 | // Register things for typesafety
15 | declare module '@tanstack/react-router' {
16 | interface Register {
17 | router: typeof router;
18 | }
19 | }
20 |
21 | const rootElement = document.getElementById('app');
22 |
23 | if (rootElement && !rootElement.innerHTML) {
24 | const root = ReactDOM.createRoot(rootElement);
25 | root.render();
26 | }
27 |
--------------------------------------------------------------------------------
/frontend/README.md:
--------------------------------------------------------------------------------
1 | # TanStack Router - File-Based Quickstart Example
2 |
3 | A quickstart example using file-based routing.
4 |
5 | - [TanStack Router Docs](https://tanstack.com/router)
6 |
7 | ## Start a new project based on this example
8 |
9 | To start a new project based on this example, run:
10 |
11 | ```sh
12 | npx gitpick TanStack/router/tree/main/examples/react/quickstart-file-based quickstart-file-based
13 | ```
14 |
15 | ## Getting Started
16 |
17 | Install dependencies:
18 |
19 | ```sh
20 | pnpm install
21 | ```
22 |
23 | Start the development server:
24 |
25 | ```sh
26 | pnpm dev
27 | ```
28 |
29 | ## Build
30 |
31 | Build for production:
32 |
33 | ```sh
34 | pnpm build
35 | ```
36 |
37 | ## About This Example
38 |
39 | This example demonstrates:
40 |
41 | - Quick setup with file-based routing
42 | - Automatic route generation
43 | - Minimal configuration
44 | - Type-safe routes
45 |
--------------------------------------------------------------------------------
/AutoGLM_GUI/state.py:
--------------------------------------------------------------------------------
1 | """Shared runtime state for the AutoGLM-GUI API."""
2 |
3 | from __future__ import annotations
4 |
5 | import asyncio
6 | from typing import TYPE_CHECKING
7 |
8 | from AutoGLM_GUI.logger import logger
9 | from phone_agent.agent import AgentConfig
10 | from phone_agent.model import ModelConfig
11 |
12 | if TYPE_CHECKING:
13 | from AutoGLM_GUI.scrcpy_stream import ScrcpyStreamer
14 | from phone_agent import PhoneAgent
15 |
16 | # Agent instances keyed by device_id
17 | agents: dict[str, "PhoneAgent"] = {}
18 | # Cached configs to rebuild agents on reset
19 | agent_configs: dict[str, tuple[ModelConfig, AgentConfig]] = {}
20 |
21 | # Scrcpy streaming per device
22 | scrcpy_streamers: dict[str, "ScrcpyStreamer"] = {}
23 | scrcpy_locks: dict[str, asyncio.Lock] = {}
24 |
25 |
26 | def non_blocking_takeover(message: str) -> None:
27 | """Log takeover requests without blocking for console input."""
28 | logger.warning(f"Takeover requested: {message}")
29 |
--------------------------------------------------------------------------------
/phone_agent/config/__init__.py:
--------------------------------------------------------------------------------
1 | """Configuration module for Phone Agent."""
2 |
3 | from phone_agent.config.apps import APP_PACKAGES
4 | from phone_agent.config.i18n import get_message, get_messages
5 | from phone_agent.config.prompts_en import SYSTEM_PROMPT as SYSTEM_PROMPT_EN
6 | from phone_agent.config.prompts_zh import SYSTEM_PROMPT as SYSTEM_PROMPT_ZH
7 |
8 |
9 | def get_system_prompt(lang: str = "cn") -> str:
10 | """
11 | Get system prompt by language.
12 |
13 | Args:
14 | lang: Language code, 'cn' for Chinese, 'en' for English.
15 |
16 | Returns:
17 | System prompt string.
18 | """
19 | if lang == "en":
20 | return SYSTEM_PROMPT_EN
21 | return SYSTEM_PROMPT_ZH
22 |
23 |
24 | # Default to Chinese for backward compatibility
25 | SYSTEM_PROMPT = SYSTEM_PROMPT_ZH
26 |
27 | __all__ = [
28 | "APP_PACKAGES",
29 | "SYSTEM_PROMPT",
30 | "SYSTEM_PROMPT_ZH",
31 | "SYSTEM_PROMPT_EN",
32 | "get_system_prompt",
33 | "get_messages",
34 | "get_message",
35 | ]
36 |
--------------------------------------------------------------------------------
/scripts/pyi_rth_utf8.py:
--------------------------------------------------------------------------------
1 | """
2 | PyInstaller Runtime Hook - Force UTF-8 encoding on Windows
3 |
4 | This file is executed by PyInstaller BEFORE the main script,
5 | at the earliest possible moment, ensuring UTF-8 encoding is set
6 | before any user code runs.
7 |
8 | Reference: https://pyinstaller.org/en/stable/hooks.html#understanding-pyi-rth-hooks
9 | """
10 |
11 | import sys
12 | import os
13 |
14 | # Only apply on Windows
15 | if sys.platform == "win32":
16 | # Set environment variable for any subprocess
17 | os.environ["PYTHONIOENCODING"] = "utf-8"
18 |
19 | # Reconfigure stdout and stderr to UTF-8
20 | # This is the official Python 3.7+ way
21 | if hasattr(sys.stdout, "reconfigure"):
22 | sys.stdout.reconfigure(encoding="utf-8", errors="replace")
23 | sys.stderr.reconfigure(encoding="utf-8", errors="replace")
24 | else:
25 | # Fallback for Python < 3.7 (shouldn't happen with Python 3.11)
26 | import codecs
27 |
28 | sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach(), "replace")
29 | sys.stderr = codecs.getwriter("utf-8")(sys.stderr.detach(), "replace")
30 |
--------------------------------------------------------------------------------
/electron/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "autoglm-gui",
3 | "version": "0.5.0",
4 | "description": "AutoGLM GUI - AI-powered Android automation desktop app",
5 | "main": "main.js",
6 | "homepage": "https://github.com/suyiiyii/AutoGLM-GUI",
7 | "repository": {
8 | "type": "git",
9 | "url": "https://github.com/suyiiyii/AutoGLM-GUI.git"
10 | },
11 | "scripts": {
12 | "start": "electron .",
13 | "dev": "electron . --dev",
14 | "build": "electron-builder",
15 | "build:win": "electron-builder --win",
16 | "build:mac": "electron-builder --mac",
17 | "build:linux": "electron-builder --linux"
18 | },
19 | "keywords": [
20 | "autoglm",
21 | "android",
22 | "automation",
23 | "electron"
24 | ],
25 | "author": {
26 | "name": "suyiiyii",
27 | "email": "suyiiyii@gmail.com"
28 | },
29 | "license": "MIT",
30 | "devDependencies": {
31 | "electron": "^28.0.0",
32 | "electron-builder": "^24.9.0"
33 | },
34 | "packageManager": "pnpm@10.17.1+sha512.17c560fca4867ae9473a3899ad84a88334914f379be46d455cbf92e5cf4b39d34985d452d2583baf19967fa76cb5c17bc9e245529d0b98745721aa7200ecaf7a"
35 | }
36 |
--------------------------------------------------------------------------------
/phone_agent/adb/__init__.py:
--------------------------------------------------------------------------------
1 | """ADB utilities for Android device interaction."""
2 |
3 | from phone_agent.adb.connection import (
4 | ADBConnection,
5 | ConnectionType,
6 | DeviceInfo,
7 | list_devices,
8 | quick_connect,
9 | )
10 | from phone_agent.adb.device import (
11 | back,
12 | double_tap,
13 | get_current_app,
14 | home,
15 | launch_app,
16 | long_press,
17 | swipe,
18 | tap,
19 | )
20 | from phone_agent.adb.input import (
21 | clear_text,
22 | detect_and_set_adb_keyboard,
23 | restore_keyboard,
24 | type_text,
25 | )
26 | from phone_agent.adb.screenshot import get_screenshot
27 |
28 | __all__ = [
29 | # Screenshot
30 | "get_screenshot",
31 | # Input
32 | "type_text",
33 | "clear_text",
34 | "detect_and_set_adb_keyboard",
35 | "restore_keyboard",
36 | # Device control
37 | "get_current_app",
38 | "tap",
39 | "swipe",
40 | "back",
41 | "home",
42 | "double_tap",
43 | "long_press",
44 | "launch_app",
45 | # Connection management
46 | "ADBConnection",
47 | "DeviceInfo",
48 | "ConnectionType",
49 | "quick_connect",
50 | "list_devices",
51 | ]
52 |
--------------------------------------------------------------------------------
/electron/electron-builder.yml:
--------------------------------------------------------------------------------
1 | appId: com.autoglm.gui
2 | productName: AutoGLM GUI
3 | copyright: Copyright © 2025
4 |
5 | # 使用本地 Electron,避免网络下载
6 | electronDist: node_modules/electron/dist
7 |
8 | # 打包后设置可执行权限
9 | afterPack: ./afterPack.js
10 |
11 | directories:
12 | output: dist
13 | buildResources: build
14 |
15 | files:
16 | - "**/*"
17 | - "!node_modules"
18 |
19 | extraResources:
20 | - from: ../resources/backend
21 | to: backend
22 | - from: ../resources/adb
23 | to: adb
24 | - from: ../scrcpy-server-v3.3.3
25 | to: ./
26 |
27 | win:
28 | target:
29 | - nsis # 安装包(推荐)
30 | - portable # 便携版
31 | icon: icon.ico
32 | publisherName: AutoGLM-GUI Team
33 |
34 | nsis:
35 | oneClick: false
36 | allowToChangeInstallationDirectory: true
37 | createDesktopShortcut: true
38 | createStartMenuShortcut: true
39 | perMachine: false # 用户级安装,无需管理员权限
40 |
41 | mac:
42 | target:
43 | - dmg
44 | icon: icon.icns
45 | category: public.app-category.developer-tools
46 | # 开发阶段不签名,使用 xattr -cr 移除隔离属性
47 | identity: null
48 |
49 | linux:
50 | target:
51 | - AppImage # 最通用的格式,无需安装
52 | - deb # Debian/Ubuntu 系统
53 | - tar.gz # 便携版
54 | category: Development
55 | icon: icon.png
56 | maintainer: suyiiyii
57 |
--------------------------------------------------------------------------------
/AutoGLM_GUI/adb_plus/serial.py:
--------------------------------------------------------------------------------
1 | """Get device serial number using ADB."""
2 |
3 | from AutoGLM_GUI.platform_utils import run_cmd_silently_sync
4 |
5 |
6 | def get_device_serial(device_id: str, adb_path: str = "adb") -> str | None:
7 | """
8 | Get the real hardware serial number of a device.
9 |
10 | This works for both USB and WiFi connected devices,
11 | returning the actual hardware serial number (ro.serialno).
12 |
13 | Args:
14 | device_id: The device ID (can be USB serial or IP:port for WiFi)
15 | adb_path: Path to adb executable (default: "adb")
16 |
17 | Returns:
18 | The device hardware serial number, or None if failed
19 | """
20 | try:
21 | # Use getprop to get the actual hardware serial number
22 | # This works for both USB and WiFi connections
23 | result = run_cmd_silently_sync(
24 | [adb_path, "-s", device_id, "shell", "getprop", "ro.serialno"],
25 | timeout=3,
26 | )
27 | if result.returncode == 0:
28 | serial = result.stdout.strip()
29 | # Filter out error messages and empty values
30 | if serial and not serial.startswith("error:") and serial != "unknown":
31 | return serial
32 | except Exception:
33 | pass
34 |
35 | return None
36 |
--------------------------------------------------------------------------------
/frontend/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | .DS_Store
3 | dist
4 | dist-ssr
5 | *.local
6 |
7 | # Editor directories and files
8 | .vscode/*
9 | !.vscode/extensions.json
10 | .idea
11 | *.suo
12 | *.ntvs*
13 | *.njsproj
14 | *.sln
15 | *.sw?
16 |
17 | # OS generated files
18 | Thumbs.db
19 |
20 | # Logs
21 | logs
22 | *.log
23 | npm-debug.log*
24 | yarn-debug.log*
25 | yarn-error.log*
26 | pnpm-debug.log*
27 | lerna-debug.log*
28 |
29 | # Coverage directory used by tools like istanbul
30 | coverage/
31 | *.lcov
32 |
33 | # nyc test coverage
34 | .nyc_output
35 |
36 | # Dependency directories
37 | jspm_packages/
38 |
39 | # TypeScript cache
40 | *.tsbuildinfo
41 |
42 | # Optional npm cache directory
43 | .npm
44 |
45 | # Optional eslint cache
46 | .eslintcache
47 |
48 | # Optional stylelint cache
49 | .stylelintcache
50 |
51 | # Microbundle cache
52 | .rpt2_cache/
53 | .rts2_cache_cjs/
54 | .rts2_cache_es/
55 | .rts2_cache_umd/
56 |
57 | # Optional REPL history
58 | .node_repl_history
59 |
60 | # Output of 'npm pack'
61 | *.tgz
62 |
63 | # Yarn Integrity file
64 | .yarn-integrity
65 |
66 | # parcel-bundler cache (https://parceljs.org/)
67 | .cache
68 | .parcel-cache
69 |
70 | # Stores VSCode versions used for testing VSCode extensions
71 | .vscode-test
72 |
73 | # yarn v2
74 | .yarn/cache
75 | .yarn/unplugged
76 | .yarn/build-state.yml
77 | .yarn/install-state.gz
78 | .pnp.*
79 |
--------------------------------------------------------------------------------
/frontend/src/styles.css:
--------------------------------------------------------------------------------
1 | @import 'tailwindcss';
2 |
3 | @layer base {
4 | *,
5 | ::after,
6 | ::before,
7 | ::backdrop,
8 | ::file-selector-button {
9 | border-color: var(--color-gray-200, currentcolor);
10 | }
11 | }
12 |
13 | html {
14 | color-scheme: light dark;
15 | }
16 | * {
17 | @apply border-gray-200 dark:border-gray-800;
18 | }
19 | body {
20 | @apply bg-gray-50 text-gray-950 dark:bg-gray-900 dark:text-gray-200;
21 | }
22 |
23 | /* Ripple effect animation for ScrcpyPlayer */
24 | @keyframes ripple {
25 | 0% {
26 | width: 0;
27 | height: 0;
28 | opacity: 1;
29 | }
30 | 100% {
31 | width: 60px;
32 | height: 60px;
33 | opacity: 0;
34 | }
35 | }
36 |
37 | .ripple-circle {
38 | position: absolute;
39 | width: 60px;
40 | height: 60px;
41 | border-radius: 50%;
42 | background: radial-gradient(
43 | circle,
44 | rgba(59, 130, 246, 0.5) 0%,
45 | rgba(59, 130, 246, 0) 70%
46 | );
47 | border: 2px solid rgba(59, 130, 246, 0.8);
48 | animation: ripple 500ms ease-out;
49 | pointer-events: none;
50 | transform: translate(-50%, -50%);
51 | }
52 |
53 | /* Scroll trail animation with moving ball */
54 | @keyframes scrollTrail {
55 | 0% {
56 | background-position: 0% 0%;
57 | opacity: 0;
58 | }
59 | 5% {
60 | opacity: 1;
61 | }
62 | 95% {
63 | opacity: 1;
64 | }
65 | 100% {
66 | background-position: 0% 100%;
67 | opacity: 0;
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/frontend/prettier.config.js:
--------------------------------------------------------------------------------
1 | export default {
2 | // 2 spaces for indentation
3 | tabWidth: 2,
4 | useTabs: false,
5 |
6 | // Print semicolons
7 | semi: true,
8 |
9 | // Use single quotes for strings
10 | singleQuote: true,
11 |
12 | // Use trailing commas where valid in ES5 (objects, arrays, etc.)
13 | trailingComma: 'es5',
14 |
15 | // Print spaces between brackets in object literals
16 | bracketSpacing: true,
17 |
18 | // Put the > of a multi-line JSX element at the end of the last line
19 | bracketSameLine: false,
20 |
21 | // Include parentheses around a sole arrow function parameter
22 | arrowParens: 'avoid',
23 |
24 | // Format only files that have a pragma comment
25 | requirePragma: false,
26 |
27 | // Insert pragma comment at the top of formatted files
28 | insertPragma: false,
29 |
30 | // How to handle whitespace in prose
31 | proseWrap: 'preserve',
32 |
33 | // How to handle whitespace in HTML
34 | htmlWhitespaceSensitivity: 'css',
35 |
36 | // How to handle whitespace in Vue files
37 | vueIndentScriptAndStyle: false,
38 |
39 | // Line length that Prettier will try to maintain
40 | printWidth: 80,
41 |
42 | // End of line character
43 | endOfLine: 'lf',
44 |
45 | // Control whether Prettier formats quoted code embedded in the file
46 | embeddedLanguageFormatting: 'auto',
47 |
48 | // Enforce single attribute per line in HTML, Vue and JSX
49 | singleAttributePerLine: false,
50 | };
51 |
--------------------------------------------------------------------------------
/AutoGLM_GUI/adb_plus/device.py:
--------------------------------------------------------------------------------
1 | """Device availability checking utilities."""
2 |
3 | import asyncio
4 |
5 | from AutoGLM_GUI.exceptions import DeviceNotAvailableError
6 | from AutoGLM_GUI.logger import logger
7 | from AutoGLM_GUI.platform_utils import run_cmd_silently
8 |
9 |
10 | async def check_device_available(device_id: str | None = None) -> None:
11 | """Check if the device is available.
12 |
13 | Args:
14 | device_id: ADB device serial (None for default device)
15 |
16 | Raises:
17 | DeviceNotAvailableError: If device is not reachable
18 | """
19 | cmd = ["adb"]
20 | if device_id:
21 | cmd.extend(["-s", device_id])
22 | cmd.append("get-state")
23 |
24 | try:
25 | result = await asyncio.wait_for(run_cmd_silently(cmd), timeout=5.0)
26 |
27 | state = result.stdout.strip() if result.stdout else ""
28 | error_output = result.stderr.strip() if result.stderr else ""
29 |
30 | # Check for common error patterns
31 | if "not found" in error_output.lower() or "offline" in error_output.lower():
32 | raise DeviceNotAvailableError(
33 | f"Device {device_id} is not available: {error_output}"
34 | )
35 |
36 | if state != "device":
37 | raise DeviceNotAvailableError(
38 | f"Device {device_id} is not available (state: {state or 'offline'})"
39 | )
40 |
41 | logger.debug(f"Device {device_id} is available (state: {state})")
42 |
43 | except asyncio.TimeoutError:
44 | raise DeviceNotAvailableError(f"Device {device_id} connection timed out")
45 | except FileNotFoundError:
46 | raise DeviceNotAvailableError("ADB executable not found")
47 | except DeviceNotAvailableError:
48 | raise
49 | except Exception as e:
50 | raise DeviceNotAvailableError(f"Failed to check device {device_id}: {e}")
51 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "autoglm-gui"
3 | version = "0.4.11"
4 | description = "Web GUI for AutoGLM Phone Agent - AI-powered Android automation"
5 | readme = "README.md"
6 | requires-python = ">=3.10"
7 | license = "MIT"
8 | authors = [{ name = "suyiiyii" }]
9 | keywords = ["autoglm", "phone-agent", "android", "automation", "ai", "gui"]
10 | classifiers = [
11 | "Development Status :: 3 - Alpha",
12 | "Environment :: Web Environment",
13 | "Framework :: FastAPI",
14 | "Intended Audience :: Developers",
15 | "License :: OSI Approved :: MIT License",
16 | "Operating System :: OS Independent",
17 | "Programming Language :: Python :: 3",
18 | "Programming Language :: Python :: 3.10",
19 | "Programming Language :: Python :: 3.11",
20 | "Programming Language :: Python :: 3.12",
21 | "Topic :: Software Development :: Libraries :: Python Modules",
22 | ]
23 | dependencies = [
24 | "fastapi>=0.124.0",
25 | "httpx[socks]>=0.28.1",
26 | "loguru>=0.7.3",
27 | "openai>=2.9.0",
28 | "pillow>=11.3.0",
29 | "uvicorn[standard]>=0.38.0",
30 | ]
31 |
32 | [project.urls]
33 | Homepage = "https://github.com/suyiiyii/AutoGLM-GUI"
34 | Repository = "https://github.com/suyiiyii/AutoGLM-GUI"
35 |
36 | [project.scripts]
37 | autoglm-gui = "AutoGLM_GUI.__main__:main"
38 |
39 | [build-system]
40 | requires = ["hatchling"]
41 | build-backend = "hatchling.build"
42 |
43 | [tool.hatch.build.targets.wheel]
44 | packages = ["AutoGLM_GUI", "phone_agent"]
45 | force-include = {"scrcpy-server-v3.3.3" = "scrcpy-server-v3.3.3"}
46 |
47 | [tool.hatch.build.targets.sdist]
48 | include = ["AutoGLM_GUI/**/*", "phone_agent/**/*", "scrcpy-server-v3.3.3"]
49 |
50 | [tool.hatch.build]
51 | artifacts = ["AutoGLM_GUI/static/**/*"]
52 |
53 | [tool.hatch.metadata]
54 | allow-direct-references = true
55 |
56 | [dependency-groups]
57 | dev = [
58 | "pyinstaller>=6.17.0",
59 | "ruff>=0.14.9",
60 | ]
61 |
--------------------------------------------------------------------------------
/frontend/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "tanstack-router-react-example-quickstart-file-based",
3 | "private": true,
4 | "type": "module",
5 | "scripts": {
6 | "dev": "vite --port 3000",
7 | "build": "vite build && tsc --noEmit",
8 | "preview": "vite preview",
9 | "start": "vite",
10 | "lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0",
11 | "lint:ignore-warnings": "eslint . --ext ts,tsx --report-unused-disable-directives",
12 | "lint:fix": "eslint . --ext ts,tsx --fix",
13 | "lint:inspect": "eslint . --ext ts,tsx --debug",
14 | "format": "prettier --write .",
15 | "format:check": "prettier --check .",
16 | "type-check": "tsc --noEmit"
17 | },
18 | "dependencies": {
19 | "@tailwindcss/postcss": "^4.1.15",
20 | "@tanstack/react-router": "^1.140.0",
21 | "@tanstack/react-router-devtools": "^1.140.0",
22 | "@tanstack/router-plugin": "^1.140.0",
23 | "jmuxer": "^2.1.0",
24 | "postcss": "^8.5.1",
25 | "react": "^19.2.1",
26 | "react-dom": "^19.0.0",
27 | "redaxios": "^0.5.1",
28 | "tailwindcss": "^4.1.17",
29 | "zod": "^3.24.2"
30 | },
31 | "devDependencies": {
32 | "@eslint/js": "^9.39.1",
33 | "@types/jmuxer": "^2.0.7",
34 | "@types/react": "^19.0.8",
35 | "@types/react-dom": "^19.0.3",
36 | "@typescript-eslint/eslint-plugin": "^8.49.0",
37 | "@typescript-eslint/parser": "^8.49.0",
38 | "@vitejs/plugin-react": "^4.3.4",
39 | "eslint": "^9.39.1",
40 | "eslint-config-prettier": "^10.1.8",
41 | "eslint-plugin-prettier": "^5.5.4",
42 | "eslint-plugin-react": "^7.37.5",
43 | "eslint-plugin-react-hooks": "^7.0.1",
44 | "eslint-plugin-react-refresh": "^0.4.24",
45 | "globals": "^16.5.0",
46 | "prettier": "^3.7.4",
47 | "typescript": "^5.7.2",
48 | "vite": "^7.1.7"
49 | },
50 | "packageManager": "pnpm@10.17.1+sha512.17c560fca4867ae9473a3899ad84a88334914f379be46d455cbf92e5cf4b39d34985d452d2583baf19967fa76cb5c17bc9e245529d0b98745721aa7200ecaf7a"
51 | }
52 |
--------------------------------------------------------------------------------
/phone_agent/config/i18n.py:
--------------------------------------------------------------------------------
1 | """Internationalization (i18n) module for Phone Agent UI messages."""
2 |
3 | # Chinese messages
4 | MESSAGES_ZH = {
5 | "thinking": "思考过程",
6 | "action": "执行动作",
7 | "task_completed": "任务完成",
8 | "done": "完成",
9 | "starting_task": "开始执行任务",
10 | "final_result": "最终结果",
11 | "task_result": "任务结果",
12 | "confirmation_required": "需要确认",
13 | "continue_prompt": "是否继续?(y/n)",
14 | "manual_operation_required": "需要人工操作",
15 | "manual_operation_hint": "请手动完成操作...",
16 | "press_enter_when_done": "完成后按回车继续",
17 | "connection_failed": "连接失败",
18 | "connection_successful": "连接成功",
19 | "step": "步骤",
20 | "task": "任务",
21 | "result": "结果",
22 | }
23 |
24 | # English messages
25 | MESSAGES_EN = {
26 | "thinking": "Thinking",
27 | "action": "Action",
28 | "task_completed": "Task Completed",
29 | "done": "Done",
30 | "starting_task": "Starting task",
31 | "final_result": "Final Result",
32 | "task_result": "Task Result",
33 | "confirmation_required": "Confirmation Required",
34 | "continue_prompt": "Continue? (y/n)",
35 | "manual_operation_required": "Manual Operation Required",
36 | "manual_operation_hint": "Please complete the operation manually...",
37 | "press_enter_when_done": "Press Enter when done",
38 | "connection_failed": "Connection Failed",
39 | "connection_successful": "Connection Successful",
40 | "step": "Step",
41 | "task": "Task",
42 | "result": "Result",
43 | }
44 |
45 |
46 | def get_messages(lang: str = "cn") -> dict:
47 | """
48 | Get UI messages dictionary by language.
49 |
50 | Args:
51 | lang: Language code, 'cn' for Chinese, 'en' for English.
52 |
53 | Returns:
54 | Dictionary of UI messages.
55 | """
56 | if lang == "en":
57 | return MESSAGES_EN
58 | return MESSAGES_ZH
59 |
60 |
61 | def get_message(key: str, lang: str = "cn") -> str:
62 | """
63 | Get a single UI message by key and language.
64 |
65 | Args:
66 | key: Message key.
67 | lang: Language code, 'cn' for Chinese, 'en' for English.
68 |
69 | Returns:
70 | Message string.
71 | """
72 | messages = get_messages(lang)
73 | return messages.get(key, key)
74 |
--------------------------------------------------------------------------------
/frontend/src/components/ConfirmDialog.tsx:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 |
3 | interface ConfirmDialogProps {
4 | isOpen: boolean;
5 | title: string;
6 | content: string;
7 | onConfirm: () => void;
8 | onCancel: () => void;
9 | confirmText?: string;
10 | cancelText?: string;
11 | }
12 |
13 | export function ConfirmDialog({
14 | isOpen,
15 | title,
16 | content,
17 | onConfirm,
18 | onCancel,
19 | confirmText = '确认',
20 | cancelText = '取消',
21 | }: ConfirmDialogProps) {
22 | if (!isOpen) return null;
23 |
24 | return (
25 |
26 |
e.stopPropagation()}
29 | >
30 |
31 |
32 | {title}
33 |
34 |
35 | {content}
36 |
37 |
38 |
39 |
48 |
57 |
58 |
59 |
60 | );
61 | }
62 |
--------------------------------------------------------------------------------
/AutoGLM_GUI/__init__.py:
--------------------------------------------------------------------------------
1 | """AutoGLM-GUI package metadata."""
2 |
3 | import subprocess
4 | import sys
5 | from functools import wraps
6 | from importlib import metadata
7 |
8 | # 修复 Windows 编码问题 - 必须在所有其他导入之前
9 | if sys.platform == "win32":
10 | import codecs
11 |
12 | sys.stdout = codecs.getwriter("utf-8")(sys.stdout.buffer, "strict")
13 | sys.stderr = codecs.getwriter("utf-8")(sys.stderr.buffer, "strict")
14 |
15 |
16 | # ============================================================================
17 | # Fix Windows encoding issue: Force UTF-8 for all subprocess calls
18 | # ============================================================================
19 | # On Windows, subprocess defaults to GBK encoding which fails when ADB/scrcpy
20 | # output UTF-8 characters. This monkey patch ensures all subprocess calls
21 | # use UTF-8 encoding by default.
22 |
23 | _original_run = subprocess.run
24 | _original_popen = subprocess.Popen
25 |
26 |
27 | @wraps(_original_run)
28 | def _patched_run(*args, **kwargs):
29 | """Patched subprocess.run that defaults to UTF-8 encoding on Windows."""
30 | if sys.platform == "win32":
31 | # Add encoding='utf-8' if text=True is set but encoding is not specified
32 | if kwargs.get("text") or kwargs.get("universal_newlines"):
33 | if "encoding" not in kwargs:
34 | kwargs["encoding"] = "utf-8"
35 | return _original_run(*args, **kwargs)
36 |
37 |
38 | class _PatchedPopen(_original_popen):
39 | """Patched subprocess.Popen that defaults to UTF-8 encoding on Windows."""
40 |
41 | def __init__(self, *args, **kwargs):
42 | if sys.platform == "win32":
43 | # Add encoding='utf-8' if text=True is set but encoding is not specified
44 | if kwargs.get("text") or kwargs.get("universal_newlines"):
45 | if "encoding" not in kwargs:
46 | kwargs["encoding"] = "utf-8"
47 | super().__init__(*args, **kwargs)
48 |
49 |
50 | # Apply the patches globally
51 | subprocess.run = _patched_run
52 | subprocess.Popen = _PatchedPopen
53 |
54 | # ============================================================================
55 |
56 | # Expose package version at runtime; fall back to "unknown" during editable/dev runs
57 | try:
58 | __version__ = metadata.version("autoglm-gui")
59 | except metadata.PackageNotFoundError:
60 | __version__ = "unknown"
61 |
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Release
2 |
3 | on:
4 | push:
5 | tags:
6 | - v*
7 |
8 | jobs:
9 | release:
10 | name: Release
11 | runs-on: ubuntu-latest
12 | environment: release
13 | permissions:
14 | id-token: write
15 | contents: write
16 | steps:
17 | - name: Checkout
18 | uses: actions/checkout@v4
19 |
20 | - name: Setup Python & uv
21 | uses: ./.github/actions/setup-python
22 |
23 | - name: Setup Node.js
24 | uses: actions/setup-node@v4
25 | with:
26 | node-version: "20"
27 |
28 | - name: Setup pnpm
29 | uses: pnpm/action-setup@v4
30 | with:
31 | version: 9
32 |
33 | - name: Get Version
34 | id: version
35 | run: |
36 | echo "VERSION=$(uvx --from=toml-cli toml get --toml-path=pyproject.toml project.version)" >> $GITHUB_OUTPUT
37 | echo "TAG_VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_OUTPUT
38 | echo "TAG_NAME=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
39 |
40 | - name: Check Version
41 | if: steps.version.outputs.VERSION != steps.version.outputs.TAG_VERSION
42 | run: |
43 | echo "Version mismatch: ${{ steps.version.outputs.VERSION }} != ${{ steps.version.outputs.TAG_VERSION }}"
44 | exit 1
45 |
46 | - name: Build Frontend
47 | run: |
48 | cd frontend
49 | pnpm install
50 | pnpm build
51 |
52 | - name: Copy Static Files
53 | run: |
54 | mkdir -p AutoGLM_GUI/static
55 | cp -r frontend/dist/* AutoGLM_GUI/static/
56 |
57 | - name: Build Package
58 | run: uv build
59 |
60 | - name: Publish to PyPI
61 | uses: pypa/gh-action-pypi-publish@release/v1
62 |
63 | - name: Create Release if not exists
64 | run: |
65 | gh release view ${{ steps.version.outputs.TAG_NAME }} || \
66 | gh release create ${{ steps.version.outputs.TAG_NAME }} --title "${{ steps.version.outputs.TAG_NAME }}" --notes "Release ${{ steps.version.outputs.TAG_NAME }}"
67 | env:
68 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
69 |
70 | - name: Upload Release Asset
71 | run: gh release upload --clobber ${{ steps.version.outputs.TAG_NAME }} dist/*.tar.gz dist/*.whl
72 | env:
73 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
74 |
--------------------------------------------------------------------------------
/electron/afterPack.js:
--------------------------------------------------------------------------------
1 | /**
2 | * electron-builder afterPack hook
3 | * 在打包后设置可执行文件权限
4 | */
5 |
6 | const fs = require('fs');
7 | const path = require('path');
8 |
9 | exports.default = async function(context) {
10 | const { appOutDir, electronPlatformName } = context;
11 |
12 | console.log('Running afterPack hook...');
13 | console.log('Platform:', electronPlatformName);
14 | console.log('Output directory:', appOutDir);
15 |
16 | // 确定资源路径
17 | let resourcesPath;
18 | if (electronPlatformName === 'darwin') {
19 | const appName = context.packager.appInfo.productFilename;
20 | resourcesPath = path.join(appOutDir, `${appName}.app`, 'Contents', 'Resources');
21 | } else if (electronPlatformName === 'win32') {
22 | resourcesPath = path.join(appOutDir, 'resources');
23 | } else if (electronPlatformName === 'linux') {
24 | resourcesPath = path.join(appOutDir, 'resources');
25 | } else {
26 | console.log('Unsupported platform:', electronPlatformName);
27 | return;
28 | }
29 |
30 | console.log('Resources path:', resourcesPath);
31 |
32 | // 设置后端可执行文件权限
33 | const backendExe = path.join(
34 | resourcesPath,
35 | 'backend',
36 | electronPlatformName === 'win32' ? 'autoglm-gui.exe' : 'autoglm-gui'
37 | );
38 |
39 | if (fs.existsSync(backendExe)) {
40 | fs.chmodSync(backendExe, 0o755);
41 | console.log('✓ Set executable permission for backend:', backendExe);
42 | } else {
43 | console.warn('⚠ Backend executable not found:', backendExe);
44 | }
45 |
46 | // 设置 ADB 工具权限
47 | const platformName = electronPlatformName === 'win32' ? 'windows'
48 | : electronPlatformName === 'linux' ? 'linux'
49 | : 'darwin';
50 | const adbDir = path.join(resourcesPath, 'adb', platformName, 'platform-tools');
51 |
52 | if (fs.existsSync(adbDir)) {
53 | const adbFiles = ['adb', 'fastboot', 'etc1tool', 'hprof-conv', 'make_f2fs', 'make_f2fs_casefold', 'mke2fs', 'sqlite3'];
54 |
55 | for (const file of adbFiles) {
56 | const filePath = path.join(adbDir, electronPlatformName === 'win32' ? `${file}.exe` : file);
57 | if (fs.existsSync(filePath)) {
58 | fs.chmodSync(filePath, 0o755);
59 | console.log('✓ Set executable permission for:', file);
60 | }
61 | }
62 | } else {
63 | console.warn('⚠ ADB directory not found:', adbDir);
64 | }
65 |
66 | console.log('afterPack hook completed');
67 | };
68 |
--------------------------------------------------------------------------------
/AutoGLM_GUI/adb_plus/ip.py:
--------------------------------------------------------------------------------
1 | """ADB IP helpers (prefer WiFi address, skip cellular interfaces)."""
2 |
3 | from __future__ import annotations
4 |
5 | import re
6 | import subprocess
7 | from typing import Optional
8 |
9 | __all__ = ["get_wifi_ip"]
10 |
11 |
12 | def _run(adb_path: str, device_id: Optional[str], cmd: list[str]) -> str:
13 | base_cmd = [adb_path]
14 | if device_id:
15 | base_cmd.extend(["-s", device_id])
16 | result = subprocess.run(
17 | base_cmd + ["shell", *cmd], capture_output=True, text=True, timeout=5
18 | )
19 | return (result.stdout or "") + (result.stderr or "")
20 |
21 |
22 | def _extract_ip(text: str) -> Optional[str]:
23 | m = re.search(r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b", text)
24 | if not m:
25 | return None
26 | ip = m.group(0)
27 | if ip == "0.0.0.0":
28 | return None
29 | return ip
30 |
31 |
32 | def get_wifi_ip(
33 | adb_path: str = "adb", device_id: Optional[str] = None
34 | ) -> Optional[str]:
35 | """
36 | Prefer WiFi IP when multiple interfaces exist.
37 |
38 | - First try `ip -4 route get 8.8.8.8`, skip typical cellular interfaces (ccmni/rmnet).
39 | - Fallback to `ip -4 addr show wlan0`.
40 | Returns None if no suitable IP is found or on error.
41 | """
42 | # 1) route
43 | try:
44 | route_out = _run(adb_path, device_id, ["ip", "-4", "route", "get", "8.8.8.8"])
45 | for line in route_out.splitlines():
46 | if "src" not in line:
47 | continue
48 | parts = line.split()
49 | iface = None
50 | ip = None
51 | if "dev" in parts:
52 | try:
53 | iface = parts[parts.index("dev") + 1]
54 | except Exception:
55 | pass
56 | if "src" in parts:
57 | try:
58 | ip = parts[parts.index("src") + 1]
59 | except Exception:
60 | pass
61 | if not ip or ip == "0.0.0.0":
62 | continue
63 | if iface and (iface.startswith("ccmni") or iface.startswith("rmnet")):
64 | continue
65 | return ip
66 | except Exception:
67 | pass
68 |
69 | # 2) wlan0 addr
70 | try:
71 | addr_out = _run(adb_path, device_id, ["ip", "-4", "addr", "show", "wlan0"])
72 | ip = _extract_ip(addr_out)
73 | if ip:
74 | return ip
75 | except Exception:
76 | pass
77 |
78 | return None
79 |
--------------------------------------------------------------------------------
/AutoGLM_GUI/api/__init__.py:
--------------------------------------------------------------------------------
1 | """FastAPI application factory and route registration."""
2 |
3 | import sys
4 | from importlib.resources import files
5 | from pathlib import Path
6 |
7 | from fastapi import FastAPI
8 | from fastapi.middleware.cors import CORSMiddleware
9 | from fastapi.responses import FileResponse
10 | from fastapi.staticfiles import StaticFiles
11 |
12 | from AutoGLM_GUI.version import APP_VERSION
13 |
14 | from . import agents, control, devices, media
15 |
16 |
17 | def _get_static_dir() -> Path | None:
18 | """Locate packaged static assets."""
19 | # Priority 1: PyInstaller bundled path (for packaged executable)
20 | if getattr(sys, "_MEIPASS", None):
21 | bundled_static = Path(sys._MEIPASS) / "AutoGLM_GUI" / "static"
22 | if bundled_static.exists():
23 | return bundled_static
24 |
25 | # Priority 2: importlib.resources (for installed package)
26 | try:
27 | static_dir = files("AutoGLM_GUI").joinpath("static")
28 | if hasattr(static_dir, "_path"):
29 | path = Path(str(static_dir))
30 | if path.exists():
31 | return path
32 | path = Path(str(static_dir))
33 | if path.exists():
34 | return path
35 | except (TypeError, FileNotFoundError):
36 | pass
37 |
38 | return None
39 |
40 |
41 | def create_app() -> FastAPI:
42 | """Build the FastAPI app with routers and static assets."""
43 | app = FastAPI(title="AutoGLM-GUI API", version=APP_VERSION)
44 |
45 | app.add_middleware(
46 | CORSMiddleware,
47 | allow_origins=["http://localhost:3000"],
48 | allow_credentials=True,
49 | allow_methods=["*"],
50 | allow_headers=["*"],
51 | )
52 |
53 | app.include_router(agents.router)
54 | app.include_router(devices.router)
55 | app.include_router(control.router)
56 | app.include_router(media.router)
57 |
58 | static_dir = _get_static_dir()
59 | if static_dir is not None and static_dir.exists():
60 | assets_dir = static_dir / "assets"
61 | if assets_dir.exists():
62 | app.mount("/assets", StaticFiles(directory=assets_dir), name="assets")
63 |
64 | @app.get("/{full_path:path}")
65 | async def serve_spa(full_path: str) -> FileResponse:
66 | file_path = static_dir / full_path
67 | if file_path.is_file():
68 | return FileResponse(file_path)
69 | return FileResponse(static_dir / "index.html")
70 |
71 | return app
72 |
73 |
74 | app = create_app()
75 |
--------------------------------------------------------------------------------
/AutoGLM_GUI/platform_utils.py:
--------------------------------------------------------------------------------
1 | """Platform-aware subprocess helpers to avoid duplicated Windows branches."""
2 |
3 | import asyncio
4 | import platform
5 | import subprocess
6 | from typing import Any, Sequence
7 |
8 |
9 | def is_windows() -> bool:
10 | """Return True if running on Windows."""
11 | return platform.system() == "Windows"
12 |
13 |
14 | def run_cmd_silently_sync(
15 | cmd: Sequence[str], timeout: float | None = None
16 | ) -> subprocess.CompletedProcess:
17 | """Run a command synchronously, suppressing output but preserving it in the result.
18 |
19 | This is the synchronous version that works on all platforms.
20 |
21 | Args:
22 | cmd: Command to run as a sequence of strings
23 | timeout: Optional timeout in seconds
24 |
25 | Returns:
26 | CompletedProcess with stdout/stderr captured
27 | """
28 | return subprocess.run(
29 | cmd, capture_output=True, text=True, check=False, timeout=timeout
30 | )
31 |
32 |
33 | async def run_cmd_silently(cmd: Sequence[str]) -> subprocess.CompletedProcess:
34 | """Run a command, suppressing output but preserving it in the result; safe for async contexts on all platforms."""
35 | if is_windows():
36 | # Avoid blocking the event loop with a blocking subprocess call on Windows.
37 | return await asyncio.to_thread(
38 | subprocess.run, cmd, capture_output=True, text=True, check=False
39 | )
40 |
41 | # Use PIPE on macOS/Linux to capture output
42 | process = await asyncio.create_subprocess_exec(
43 | *cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
44 | )
45 | stdout, stderr = await process.communicate()
46 | # Decode bytes to string for API consistency across platforms
47 | stdout_str = stdout.decode("utf-8") if stdout else ""
48 | stderr_str = stderr.decode("utf-8") if stderr else ""
49 | # Return CompletedProcess with stdout/stderr for API consistency across platforms
50 | return_code = process.returncode if process.returncode is not None else -1
51 | return subprocess.CompletedProcess(cmd, return_code, stdout_str, stderr_str)
52 |
53 |
54 | async def spawn_process(cmd: Sequence[str], *, capture_output: bool = False) -> Any:
55 | """Start a long-running process with optional stdio capture."""
56 | stdout = subprocess.PIPE if capture_output else None
57 | stderr = subprocess.PIPE if capture_output else None
58 |
59 | if is_windows():
60 | return subprocess.Popen(cmd, stdout=stdout, stderr=stderr)
61 |
62 | return await asyncio.create_subprocess_exec(*cmd, stdout=stdout, stderr=stderr)
63 |
--------------------------------------------------------------------------------
/AutoGLM_GUI/adb_plus/touch.py:
--------------------------------------------------------------------------------
1 | """Touch control utilities using ADB motion events for real-time dragging."""
2 |
3 | import subprocess
4 | import time
5 |
6 |
7 | def _get_adb_prefix(device_id: str | None, adb_path: str = "adb") -> list[str]:
8 | """Get ADB command prefix with optional device specifier."""
9 | if device_id:
10 | return [adb_path, "-s", device_id]
11 | return [adb_path]
12 |
13 |
14 | def touch_down(
15 | x: int,
16 | y: int,
17 | device_id: str | None = None,
18 | delay: float = 0.0,
19 | adb_path: str = "adb",
20 | ) -> None:
21 | """
22 | Send touch DOWN event at specified coordinates.
23 |
24 | Args:
25 | x: X coordinate.
26 | y: Y coordinate.
27 | device_id: Optional ADB device ID.
28 | delay: Delay in seconds after event (default: 0.0 for real-time).
29 | adb_path: Path to adb binary.
30 | """
31 | adb_prefix = _get_adb_prefix(device_id, adb_path)
32 |
33 | subprocess.run(
34 | adb_prefix + ["shell", "input", "motionevent", "DOWN", str(x), str(y)],
35 | capture_output=True,
36 | )
37 | if delay > 0:
38 | time.sleep(delay)
39 |
40 |
41 | def touch_move(
42 | x: int,
43 | y: int,
44 | device_id: str | None = None,
45 | delay: float = 0.0,
46 | adb_path: str = "adb",
47 | ) -> None:
48 | """
49 | Send touch MOVE event at specified coordinates.
50 |
51 | Args:
52 | x: X coordinate.
53 | y: Y coordinate.
54 | device_id: Optional ADB device ID.
55 | delay: Delay in seconds after event (default: 0.0 for real-time).
56 | adb_path: Path to adb binary.
57 | """
58 | adb_prefix = _get_adb_prefix(device_id, adb_path)
59 |
60 | subprocess.run(
61 | adb_prefix + ["shell", "input", "motionevent", "MOVE", str(x), str(y)],
62 | capture_output=True,
63 | )
64 | if delay > 0:
65 | time.sleep(delay)
66 |
67 |
68 | def touch_up(
69 | x: int,
70 | y: int,
71 | device_id: str | None = None,
72 | delay: float = 0.0,
73 | adb_path: str = "adb",
74 | ) -> None:
75 | """
76 | Send touch UP event at specified coordinates.
77 |
78 | Args:
79 | x: X coordinate.
80 | y: Y coordinate.
81 | device_id: Optional ADB device ID.
82 | delay: Delay in seconds after event (default: 0.0 for real-time).
83 | adb_path: Path to adb binary.
84 | """
85 | adb_prefix = _get_adb_prefix(device_id, adb_path)
86 |
87 | subprocess.run(
88 | adb_prefix + ["shell", "input", "motionevent", "UP", str(x), str(y)],
89 | capture_output=True,
90 | )
91 | if delay > 0:
92 | time.sleep(delay)
93 |
--------------------------------------------------------------------------------
/AutoGLM_GUI/logger.py:
--------------------------------------------------------------------------------
1 | """
2 | Centralized logging configuration using loguru.
3 | """
4 |
5 | import sys
6 | from pathlib import Path
7 | from loguru import logger
8 |
9 | # Remove default handler
10 | logger.remove()
11 |
12 | # Default configuration - will be overridden by configure_logger()
13 | _configured = False
14 |
15 |
16 | def configure_logger(
17 | console_level: str = "INFO",
18 | log_file: str | None = "logs/autoglm_{time:YYYY-MM-DD}.log",
19 | log_level: str = "DEBUG",
20 | rotation: str = "100 MB",
21 | retention: str = "7 days",
22 | compression: str = "zip",
23 | ) -> None:
24 | """
25 | Configure the global logger with console and file handlers.
26 |
27 | Args:
28 | console_level: Console output level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
29 | log_file: Log file path (None to disable file logging)
30 | log_level: File logging level
31 | rotation: Log rotation policy (e.g., "100 MB", "1 day")
32 | retention: Log retention policy (e.g., "7 days", "1 week")
33 | compression: Compression format for rotated logs (e.g., "zip", "gz")
34 | """
35 | global _configured
36 |
37 | # Remove existing handlers if reconfiguring
38 | if _configured:
39 | logger.remove()
40 |
41 | # Console handler with colors
42 | logger.add(
43 | sys.stderr,
44 | format="{time:YYYY-MM-DD HH:mm:ss.SSS} | {level: <8} | {name}:{function}:{line} - {message}",
45 | level=console_level,
46 | colorize=True,
47 | )
48 |
49 | # File handler
50 | if log_file:
51 | # Create logs directory if it doesn't exist
52 | log_path = Path(log_file)
53 | log_path.parent.mkdir(parents=True, exist_ok=True)
54 |
55 | logger.add(
56 | log_file,
57 | rotation=rotation,
58 | retention=retention,
59 | compression=compression,
60 | level=log_level,
61 | format="{time:YYYY-MM-DD HH:mm:ss.SSS} | {level: <8} | {name}:{function}:{line} - {message}",
62 | encoding="utf-8",
63 | )
64 |
65 | # Separate error log file
66 | error_file = str(log_path.parent / f"errors_{log_path.name.split('_', 1)[1]}")
67 | logger.add(
68 | error_file,
69 | rotation="50 MB",
70 | retention="30 days",
71 | compression=compression,
72 | level="ERROR",
73 | format="{time:YYYY-MM-DD HH:mm:ss.SSS} | {level: <8} | {name}:{function}:{line} - {message}",
74 | backtrace=True,
75 | diagnose=True,
76 | encoding="utf-8",
77 | )
78 |
79 | _configured = True
80 |
81 |
82 | # Default initialization (can be reconfigured later)
83 | configure_logger()
84 |
85 | __all__ = ["logger", "configure_logger"]
86 |
--------------------------------------------------------------------------------
/phone_agent/config/prompts_en.py:
--------------------------------------------------------------------------------
1 | """System prompts for the AI agent."""
2 |
3 | from datetime import datetime
4 |
5 | today = datetime.today()
6 | formatted_date = today.strftime("%Y-%m-%d, %A")
7 |
8 | SYSTEM_PROMPT = (
9 | "The current date: "
10 | + formatted_date
11 | + """
12 | # Setup
13 | You are a professional Android operation agent assistant that can fulfill the user's high-level instructions. Given a screenshot of the Android interface at each step, you first analyze the situation, then plan the best course of action using Python-style pseudo-code.
14 |
15 | # More details about the code
16 | Your response format must be structured as follows:
17 |
18 | Think first: Use ... to analyze the current screen, identify key elements, and determine the most efficient action.
19 | Provide the action: Use ... to return a single line of pseudo-code representing the operation.
20 |
21 | Your output should STRICTLY follow the format:
22 |
23 | [Your thought]
24 |
25 |
26 | [Your operation code]
27 |
28 |
29 | - **Tap**
30 | Perform a tap action on a specified screen area. The element is a list of 2 integers, representing the coordinates of the tap point.
31 | **Example**:
32 |
33 | do(action="Tap", element=[x,y])
34 |
35 | - **Type**
36 | Enter text into the currently focused input field.
37 | **Example**:
38 |
39 | do(action="Type", text="Hello World")
40 |
41 | - **Swipe**
42 | Perform a swipe action with start point and end point.
43 | **Examples**:
44 |
45 | do(action="Swipe", start=[x1,y1], end=[x2,y2])
46 |
47 | - **Long Press**
48 | Perform a long press action on a specified screen area.
49 | You can add the element to the action to specify the long press area. The element is a list of 2 integers, representing the coordinates of the long press point.
50 | **Example**:
51 |
52 | do(action="Long Press", element=[x,y])
53 |
54 | - **Launch**
55 | Launch an app. Try to use launch action when you need to launch an app. Check the instruction to choose the right app before you use this action.
56 | **Example**:
57 |
58 | do(action="Launch", app="Settings")
59 |
60 | - **Back**
61 | Press the Back button to navigate to the previous screen.
62 | **Example**:
63 |
64 | do(action="Back")
65 |
66 | - **Finish**
67 | Terminate the program and optionally print a message.
68 | **Example**:
69 |
70 | finish(message="Task completed.")
71 |
72 |
73 |
74 | REMEMBER:
75 | - Think before you act: Always analyze the current UI and the best course of action before executing any step, and output in part.
76 | - Only ONE LINE of action in part per response: Each step must contain exactly one line of executable code.
77 | - Generate execution code strictly according to format requirements.
78 | """
79 | )
80 |
--------------------------------------------------------------------------------
/frontend/src/routeTree.gen.ts:
--------------------------------------------------------------------------------
1 | /* eslint-disable */
2 |
3 | // @ts-nocheck
4 |
5 | // noinspection JSUnusedGlobalSymbols
6 |
7 | // This file was automatically generated by TanStack Router.
8 | // You should NOT make any changes in this file as it will be overwritten.
9 | // Additionally, you should also exclude this file from your linter and/or formatter to prevent it from being checked or modified.
10 |
11 | import { Route as rootRouteImport } from './routes/__root'
12 | import { Route as ChatRouteImport } from './routes/chat'
13 | import { Route as AboutRouteImport } from './routes/about'
14 | import { Route as IndexRouteImport } from './routes/index'
15 |
16 | const ChatRoute = ChatRouteImport.update({
17 | id: '/chat',
18 | path: '/chat',
19 | getParentRoute: () => rootRouteImport,
20 | } as any)
21 | const AboutRoute = AboutRouteImport.update({
22 | id: '/about',
23 | path: '/about',
24 | getParentRoute: () => rootRouteImport,
25 | } as any)
26 | const IndexRoute = IndexRouteImport.update({
27 | id: '/',
28 | path: '/',
29 | getParentRoute: () => rootRouteImport,
30 | } as any)
31 |
32 | export interface FileRoutesByFullPath {
33 | '/': typeof IndexRoute
34 | '/about': typeof AboutRoute
35 | '/chat': typeof ChatRoute
36 | }
37 | export interface FileRoutesByTo {
38 | '/': typeof IndexRoute
39 | '/about': typeof AboutRoute
40 | '/chat': typeof ChatRoute
41 | }
42 | export interface FileRoutesById {
43 | __root__: typeof rootRouteImport
44 | '/': typeof IndexRoute
45 | '/about': typeof AboutRoute
46 | '/chat': typeof ChatRoute
47 | }
48 | export interface FileRouteTypes {
49 | fileRoutesByFullPath: FileRoutesByFullPath
50 | fullPaths: '/' | '/about' | '/chat'
51 | fileRoutesByTo: FileRoutesByTo
52 | to: '/' | '/about' | '/chat'
53 | id: '__root__' | '/' | '/about' | '/chat'
54 | fileRoutesById: FileRoutesById
55 | }
56 | export interface RootRouteChildren {
57 | IndexRoute: typeof IndexRoute
58 | AboutRoute: typeof AboutRoute
59 | ChatRoute: typeof ChatRoute
60 | }
61 |
62 | declare module '@tanstack/react-router' {
63 | interface FileRoutesByPath {
64 | '/chat': {
65 | id: '/chat'
66 | path: '/chat'
67 | fullPath: '/chat'
68 | preLoaderRoute: typeof ChatRouteImport
69 | parentRoute: typeof rootRouteImport
70 | }
71 | '/about': {
72 | id: '/about'
73 | path: '/about'
74 | fullPath: '/about'
75 | preLoaderRoute: typeof AboutRouteImport
76 | parentRoute: typeof rootRouteImport
77 | }
78 | '/': {
79 | id: '/'
80 | path: '/'
81 | fullPath: '/'
82 | preLoaderRoute: typeof IndexRouteImport
83 | parentRoute: typeof rootRouteImport
84 | }
85 | }
86 | }
87 |
88 | const rootRouteChildren: RootRouteChildren = {
89 | IndexRoute: IndexRoute,
90 | AboutRoute: AboutRoute,
91 | ChatRoute: ChatRoute,
92 | }
93 | export const routeTree = rootRouteImport
94 | ._addFileChildren(rootRouteChildren)
95 | ._addFileTypes()
96 |
--------------------------------------------------------------------------------
/frontend/src/components/Toast.tsx:
--------------------------------------------------------------------------------
1 | import React, { useEffect } from 'react';
2 |
3 | export type ToastType = 'success' | 'error' | 'info';
4 |
5 | interface ToastProps {
6 | message: string;
7 | type?: ToastType;
8 | onClose: () => void;
9 | duration?: number;
10 | }
11 |
12 | export function Toast({
13 | message,
14 | type = 'info',
15 | onClose,
16 | duration = 3000,
17 | }: ToastProps) {
18 | useEffect(() => {
19 | const timer = setTimeout(() => {
20 | onClose();
21 | }, duration);
22 | return () => clearTimeout(timer);
23 | }, [duration, onClose]);
24 |
25 | const bgColors = {
26 | success: 'bg-green-500',
27 | error: 'bg-red-500',
28 | info: 'bg-blue-500',
29 | };
30 |
31 | const icons = {
32 | success: (
33 |
46 | ),
47 | error: (
48 |
61 | ),
62 | info: (
63 |
76 | ),
77 | };
78 |
79 | return (
80 |
81 |
84 | {icons[type]}
85 |
{message}
86 |
104 |
105 |
106 | );
107 | }
108 |
--------------------------------------------------------------------------------
/frontend/src/routes/__root.tsx:
--------------------------------------------------------------------------------
1 | import * as React from 'react';
2 | import { Outlet, createRootRoute } from '@tanstack/react-router';
3 | import { TanStackRouterDevtools } from '@tanstack/react-router-devtools';
4 | import { getStatus } from '../api';
5 |
6 | export const Route = createRootRoute({
7 | component: RootComponent,
8 | });
9 |
10 | function Footer() {
11 | const [version, setVersion] = React.useState('...');
12 |
13 | React.useEffect(() => {
14 | getStatus()
15 | .then(status => setVersion(status.version))
16 | .catch(() => setVersion('unknown'));
17 | }, []);
18 |
19 | return (
20 |
54 | );
55 | }
56 |
57 | function RootComponent() {
58 | return (
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 | );
67 | }
68 |
--------------------------------------------------------------------------------
/scripts/autoglm.spec:
--------------------------------------------------------------------------------
1 | # -*- mode: python ; coding: utf-8 -*-
2 | """
3 | PyInstaller 配置文件 - AutoGLM-GUI 后端打包
4 |
5 | 使用方法:
6 | cd scripts
7 | pyinstaller autoglm.spec
8 |
9 | 输出目录:
10 | scripts/dist/autoglm-gui/
11 | """
12 |
13 | from pathlib import Path
14 |
15 | # 项目根目录(SPECPATH 是 spec 文件所在目录,由 PyInstaller 提供)
16 | ROOT_DIR = Path(SPECPATH).parent
17 |
18 | block_cipher = None
19 |
20 | a = Analysis(
21 | # 入口点:Python 后端的 __main__.py
22 | [str(ROOT_DIR / 'AutoGLM_GUI' / '__main__.py')],
23 |
24 | pathex=[],
25 |
26 | # 二进制文件
27 | binaries=[
28 | # scrcpy-server 二进制文件(必需)
29 | (str(ROOT_DIR / 'scrcpy-server-v3.3.3'), '.'),
30 | ],
31 |
32 | # 数据文件
33 | datas=[
34 | # 前端静态文件(必需)
35 | (str(ROOT_DIR / 'AutoGLM_GUI' / 'static'), 'AutoGLM_GUI/static'),
36 |
37 | # phone_agent 配置文件(prompts, apps 等)
38 | (str(ROOT_DIR / 'phone_agent' / 'config'), 'phone_agent/config'),
39 |
40 | # ADB Keyboard APK 及许可证文件(自动安装功能)
41 | (str(ROOT_DIR / 'AutoGLM_GUI' / 'resources' / 'apks'), 'AutoGLM_GUI/resources/apks'),
42 | ],
43 |
44 | # 隐藏导入:PyInstaller 无法自动检测的模块
45 | hiddenimports=[
46 | # uvicorn 相关
47 | 'uvicorn.logging',
48 | 'uvicorn.loops',
49 | 'uvicorn.loops.auto',
50 | 'uvicorn.loops.asyncio',
51 | 'uvicorn.protocols',
52 | 'uvicorn.protocols.http',
53 | 'uvicorn.protocols.http.auto',
54 | 'uvicorn.protocols.http.h11_impl',
55 | 'uvicorn.protocols.websockets',
56 | 'uvicorn.protocols.websockets.auto',
57 | 'uvicorn.protocols.websockets.websockets_impl',
58 | 'uvicorn.lifespan',
59 | 'uvicorn.lifespan.on',
60 |
61 | # FastAPI 相关
62 | 'fastapi.responses',
63 | 'fastapi.staticfiles',
64 |
65 | # 其他可能需要的模块
66 | 'PIL._tkinter_finder', # Pillow
67 | ],
68 |
69 | hookspath=[],
70 | hooksconfig={},
71 | # Runtime hook: 在主程序运行前强制设置 UTF-8 编码(Windows)
72 | runtime_hooks=[str(Path(SPECPATH) / 'pyi_rth_utf8.py')],
73 | excludes=[
74 | # 排除不需要的模块以减小体积
75 | 'tkinter',
76 | 'matplotlib',
77 | 'numpy', # 如果不需要的话
78 | ],
79 | win_no_prefer_redirects=False,
80 | win_private_assemblies=False,
81 | cipher=block_cipher,
82 | noarchive=False,
83 | )
84 |
85 | pyz = PYZ(
86 | a.pure,
87 | a.zipped_data,
88 | cipher=block_cipher
89 | )
90 |
91 | exe = EXE(
92 | pyz,
93 | a.scripts,
94 | [],
95 | exclude_binaries=True,
96 | name='autoglm-gui',
97 | debug=False,
98 | bootloader_ignore_signals=False,
99 | strip=False,
100 | upx=False, # 首次不启用 UPX 压缩,确保稳定性
101 | console=True, # 保留控制台窗口便于调试(生产版本可改为 False)
102 | disable_windowed_traceback=False,
103 | argv_emulation=False,
104 | target_arch=None,
105 | codesign_identity=None,
106 | entitlements_file=None,
107 | )
108 |
109 | coll = COLLECT(
110 | exe,
111 | a.binaries,
112 | a.zipfiles,
113 | a.datas,
114 | strip=False,
115 | upx=False,
116 | upx_exclude=[],
117 | name='autoglm-gui',
118 | )
119 |
--------------------------------------------------------------------------------
/scripts/convert_icon.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """将图标图片转换为 Windows .ico 和 macOS .icns 格式"""
3 |
4 | import sys
5 | from pathlib import Path
6 | from PIL import Image
7 |
8 | ROOT_DIR = Path(__file__).parent.parent
9 | ELECTRON_DIR = ROOT_DIR / "electron"
10 |
11 |
12 | def create_ico(source_image_path: Path):
13 | """创建 Windows .ico 文件(包含多个尺寸)"""
14 | img = Image.open(source_image_path)
15 |
16 | # 确保是 RGBA 模式
17 | if img.mode != "RGBA":
18 | img = img.convert("RGBA")
19 |
20 | # 生成多个尺寸
21 | sizes = [256, 128, 64, 48, 32, 16]
22 | images = []
23 |
24 | for size in sizes:
25 | resized = img.resize((size, size), Image.Resampling.LANCZOS)
26 | images.append(resized)
27 |
28 | ico_path = ELECTRON_DIR / "icon.ico"
29 | images[0].save(ico_path, format="ICO", sizes=[(size, size) for size in sizes])
30 | print(f"✓ Created Windows icon: {ico_path}")
31 |
32 |
33 | def create_png_for_icns(source_image_path: Path):
34 | """创建 1024x1024 PNG(用于生成 .icns)"""
35 | img = Image.open(source_image_path)
36 |
37 | # 确保是 RGBA 模式
38 | if img.mode != "RGBA":
39 | img = img.convert("RGBA")
40 |
41 | # 调整为 1024x1024
42 | img_1024 = img.resize((1024, 1024), Image.Resampling.LANCZOS)
43 |
44 | png_path = ELECTRON_DIR / "icon.png"
45 | img_1024.save(png_path, format="PNG")
46 | print(f"✓ Created PNG icon: {png_path}")
47 | return png_path
48 |
49 |
50 | def main():
51 | if len(sys.argv) < 2:
52 | print("Usage: uv run python scripts/convert_icon.py ")
53 | sys.exit(1)
54 |
55 | source_path = Path(sys.argv[1])
56 | if not source_path.exists():
57 | print(f"Error: Source image not found: {source_path}")
58 | sys.exit(1)
59 |
60 | print("\n" + "=" * 60)
61 | print(" 转换应用图标")
62 | print("=" * 60)
63 | print(f" 源文件: {source_path}")
64 |
65 | ELECTRON_DIR.mkdir(exist_ok=True)
66 |
67 | # 创建 Windows .ico
68 | create_ico(source_path)
69 |
70 | # 创建 PNG(macOS 需要额外工具转换为 .icns)
71 | create_png_for_icns(source_path)
72 |
73 | print("\n" + "=" * 60)
74 | print(" 下一步: 生成 macOS .icns")
75 | print("=" * 60)
76 | print(" 运行以下命令:")
77 | print(f" cd {ELECTRON_DIR}")
78 | print(" mkdir -p icon.iconset")
79 | print(" sips -z 16 16 icon.png --out icon.iconset/icon_16x16.png")
80 | print(" sips -z 32 32 icon.png --out icon.iconset/icon_16x16@2x.png")
81 | print(" sips -z 32 32 icon.png --out icon.iconset/icon_32x32.png")
82 | print(" sips -z 64 64 icon.png --out icon.iconset/icon_32x32@2x.png")
83 | print(" sips -z 128 128 icon.png --out icon.iconset/icon_128x128.png")
84 | print(" sips -z 256 256 icon.png --out icon.iconset/icon_128x128@2x.png")
85 | print(" sips -z 256 256 icon.png --out icon.iconset/icon_256x256.png")
86 | print(" sips -z 512 512 icon.png --out icon.iconset/icon_256x256@2x.png")
87 | print(" sips -z 512 512 icon.png --out icon.iconset/icon_512x512.png")
88 | print(" sips -z 1024 1024 icon.png --out icon.iconset/icon_512x512@2x.png")
89 | print(" iconutil -c icns icon.iconset")
90 | print(" rm -rf icon.iconset")
91 | print("=" * 60)
92 |
93 |
94 | if __name__ == "__main__":
95 | main()
96 |
--------------------------------------------------------------------------------
/phone_agent/adb/input.py:
--------------------------------------------------------------------------------
1 | """Input utilities for Android device text input."""
2 |
3 | import base64
4 | import subprocess
5 |
6 |
7 | def type_text(text: str, device_id: str | None = None) -> None:
8 | """
9 | Type text into the currently focused input field using ADB Keyboard.
10 |
11 | Args:
12 | text: The text to type.
13 | device_id: Optional ADB device ID for multi-device setups.
14 |
15 | Note:
16 | Requires ADB Keyboard to be installed on the device.
17 | See: https://github.com/nicnocquee/AdbKeyboard
18 | """
19 | adb_prefix = _get_adb_prefix(device_id)
20 | encoded_text = base64.b64encode(text.encode("utf-8")).decode("utf-8")
21 |
22 | subprocess.run(
23 | adb_prefix
24 | + [
25 | "shell",
26 | "am",
27 | "broadcast",
28 | "-a",
29 | "ADB_INPUT_B64",
30 | "--es",
31 | "msg",
32 | encoded_text,
33 | ],
34 | capture_output=True,
35 | text=True,
36 | )
37 |
38 |
39 | def clear_text(device_id: str | None = None) -> None:
40 | """
41 | Clear text in the currently focused input field.
42 |
43 | Args:
44 | device_id: Optional ADB device ID for multi-device setups.
45 | """
46 | adb_prefix = _get_adb_prefix(device_id)
47 |
48 | subprocess.run(
49 | adb_prefix + ["shell", "am", "broadcast", "-a", "ADB_CLEAR_TEXT"],
50 | capture_output=True,
51 | text=True,
52 | )
53 |
54 |
55 | def detect_and_set_adb_keyboard(device_id: str | None = None) -> str:
56 | """
57 | Detect current keyboard and switch to ADB Keyboard if needed.
58 |
59 | Args:
60 | device_id: Optional ADB device ID for multi-device setups.
61 |
62 | Returns:
63 | The original keyboard IME identifier for later restoration.
64 | """
65 | adb_prefix = _get_adb_prefix(device_id)
66 |
67 | # Get current IME
68 | result = subprocess.run(
69 | adb_prefix + ["shell", "settings", "get", "secure", "default_input_method"],
70 | capture_output=True,
71 | text=True,
72 | )
73 | current_ime = (result.stdout + result.stderr).strip()
74 |
75 | # Switch to ADB Keyboard if not already set
76 | if "com.android.adbkeyboard/.AdbIME" not in current_ime:
77 | subprocess.run(
78 | adb_prefix + ["shell", "ime", "set", "com.android.adbkeyboard/.AdbIME"],
79 | capture_output=True,
80 | text=True,
81 | )
82 |
83 | # Warm up the keyboard
84 | type_text("", device_id)
85 |
86 | return current_ime
87 |
88 |
89 | def restore_keyboard(ime: str, device_id: str | None = None) -> None:
90 | """
91 | Restore the original keyboard IME.
92 |
93 | Args:
94 | ime: The IME identifier to restore.
95 | device_id: Optional ADB device ID for multi-device setups.
96 | """
97 | adb_prefix = _get_adb_prefix(device_id)
98 |
99 | subprocess.run(
100 | adb_prefix + ["shell", "ime", "set", ime], capture_output=True, text=True
101 | )
102 |
103 |
104 | def _get_adb_prefix(device_id: str | None) -> list:
105 | """Get ADB command prefix with optional device specifier."""
106 | if device_id:
107 | return ["adb", "-s", device_id]
108 | return ["adb"]
109 |
--------------------------------------------------------------------------------
/phone_agent/adb/screenshot.py:
--------------------------------------------------------------------------------
1 | """Screenshot utilities for capturing Android device screen."""
2 |
3 | import base64
4 | import os
5 | import subprocess
6 | import tempfile
7 | import uuid
8 | from dataclasses import dataclass
9 | from io import BytesIO
10 |
11 | from PIL import Image
12 |
13 |
14 | @dataclass
15 | class Screenshot:
16 | """Represents a captured screenshot."""
17 |
18 | base64_data: str
19 | width: int
20 | height: int
21 | is_sensitive: bool = False
22 |
23 |
24 | def get_screenshot(device_id: str | None = None, timeout: int = 10) -> Screenshot:
25 | """
26 | Capture a screenshot from the connected Android device.
27 |
28 | Args:
29 | device_id: Optional ADB device ID for multi-device setups.
30 | timeout: Timeout in seconds for screenshot operations.
31 |
32 | Returns:
33 | Screenshot object containing base64 data and dimensions.
34 |
35 | Note:
36 | If the screenshot fails (e.g., on sensitive screens like payment pages),
37 | a black fallback image is returned with is_sensitive=True.
38 | """
39 | temp_path = os.path.join(tempfile.gettempdir(), f"screenshot_{uuid.uuid4()}.png")
40 | adb_prefix = _get_adb_prefix(device_id)
41 |
42 | try:
43 | # Execute screenshot command
44 | result = subprocess.run(
45 | adb_prefix + ["shell", "screencap", "-p", "/sdcard/tmp.png"],
46 | capture_output=True,
47 | text=True,
48 | timeout=timeout,
49 | )
50 |
51 | # Check for screenshot failure (sensitive screen)
52 | output = result.stdout + result.stderr
53 | if "Status: -1" in output or "Failed" in output:
54 | return _create_fallback_screenshot(is_sensitive=True)
55 |
56 | # Pull screenshot to local temp path
57 | subprocess.run(
58 | adb_prefix + ["pull", "/sdcard/tmp.png", temp_path],
59 | capture_output=True,
60 | text=True,
61 | timeout=5,
62 | )
63 |
64 | if not os.path.exists(temp_path):
65 | return _create_fallback_screenshot(is_sensitive=False)
66 |
67 | # Read and encode image
68 | img = Image.open(temp_path)
69 | width, height = img.size
70 |
71 | buffered = BytesIO()
72 | img.save(buffered, format="PNG")
73 | base64_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
74 |
75 | # Cleanup
76 | os.remove(temp_path)
77 |
78 | return Screenshot(
79 | base64_data=base64_data, width=width, height=height, is_sensitive=False
80 | )
81 |
82 | except Exception as e:
83 | print(f"Screenshot error: {e}")
84 | return _create_fallback_screenshot(is_sensitive=False)
85 |
86 |
87 | def _get_adb_prefix(device_id: str | None) -> list:
88 | """Get ADB command prefix with optional device specifier."""
89 | if device_id:
90 | return ["adb", "-s", device_id]
91 | return ["adb"]
92 |
93 |
94 | def _create_fallback_screenshot(is_sensitive: bool) -> Screenshot:
95 | """Create a black fallback image when screenshot fails."""
96 | default_width, default_height = 1080, 2400
97 |
98 | black_img = Image.new("RGB", (default_width, default_height), color="black")
99 | buffered = BytesIO()
100 | black_img.save(buffered, format="PNG")
101 | base64_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
102 |
103 | return Screenshot(
104 | base64_data=base64_data,
105 | width=default_width,
106 | height=default_height,
107 | is_sensitive=is_sensitive,
108 | )
109 |
--------------------------------------------------------------------------------
/AutoGLM_GUI/api/control.py:
--------------------------------------------------------------------------------
1 | """Device control routes (tap/swipe/touch)."""
2 |
3 | from fastapi import APIRouter
4 |
5 | from AutoGLM_GUI.schemas import (
6 | SwipeRequest,
7 | SwipeResponse,
8 | TapRequest,
9 | TapResponse,
10 | TouchDownRequest,
11 | TouchDownResponse,
12 | TouchMoveRequest,
13 | TouchMoveResponse,
14 | TouchUpRequest,
15 | TouchUpResponse,
16 | )
17 |
18 | router = APIRouter()
19 |
20 |
21 | @router.post("/api/control/tap", response_model=TapResponse)
22 | def control_tap(request: TapRequest) -> TapResponse:
23 | """Execute tap at specified device coordinates."""
24 | try:
25 | from phone_agent.adb import tap
26 |
27 | tap(
28 | x=request.x,
29 | y=request.y,
30 | device_id=request.device_id,
31 | delay=request.delay,
32 | )
33 |
34 | return TapResponse(success=True)
35 | except Exception as e:
36 | return TapResponse(success=False, error=str(e))
37 |
38 |
39 | @router.post("/api/control/swipe", response_model=SwipeResponse)
40 | def control_swipe(request: SwipeRequest) -> SwipeResponse:
41 | """Execute swipe from start to end coordinates."""
42 | try:
43 | from phone_agent.adb import swipe
44 |
45 | swipe(
46 | start_x=request.start_x,
47 | start_y=request.start_y,
48 | end_x=request.end_x,
49 | end_y=request.end_y,
50 | duration_ms=request.duration_ms,
51 | device_id=request.device_id,
52 | delay=request.delay,
53 | )
54 |
55 | return SwipeResponse(success=True)
56 | except Exception as e:
57 | return SwipeResponse(success=False, error=str(e))
58 |
59 |
60 | @router.post("/api/control/touch/down", response_model=TouchDownResponse)
61 | def control_touch_down(request: TouchDownRequest) -> TouchDownResponse:
62 | """Send touch DOWN event at specified device coordinates."""
63 | try:
64 | from AutoGLM_GUI.adb_plus import touch_down
65 |
66 | touch_down(
67 | x=request.x,
68 | y=request.y,
69 | device_id=request.device_id,
70 | delay=request.delay,
71 | )
72 |
73 | return TouchDownResponse(success=True)
74 | except Exception as e:
75 | return TouchDownResponse(success=False, error=str(e))
76 |
77 |
78 | @router.post("/api/control/touch/move", response_model=TouchMoveResponse)
79 | def control_touch_move(request: TouchMoveRequest) -> TouchMoveResponse:
80 | """Send touch MOVE event at specified device coordinates."""
81 | try:
82 | from AutoGLM_GUI.adb_plus import touch_move
83 |
84 | touch_move(
85 | x=request.x,
86 | y=request.y,
87 | device_id=request.device_id,
88 | delay=request.delay,
89 | )
90 |
91 | return TouchMoveResponse(success=True)
92 | except Exception as e:
93 | return TouchMoveResponse(success=False, error=str(e))
94 |
95 |
96 | @router.post("/api/control/touch/up", response_model=TouchUpResponse)
97 | def control_touch_up(request: TouchUpRequest) -> TouchUpResponse:
98 | """Send touch UP event at specified device coordinates."""
99 | try:
100 | from AutoGLM_GUI.adb_plus import touch_up
101 |
102 | touch_up(
103 | x=request.x,
104 | y=request.y,
105 | device_id=request.device_id,
106 | delay=request.delay,
107 | )
108 |
109 | return TouchUpResponse(success=True)
110 | except Exception as e:
111 | return TouchUpResponse(success=False, error=str(e))
112 |
--------------------------------------------------------------------------------
/AutoGLM_GUI/adb_plus/screenshot.py:
--------------------------------------------------------------------------------
1 | """Robust screenshot helper using `adb exec-out screencap -p`.
2 |
3 | Features:
4 | - Avoids temp files and uses exec-out to reduce corruption.
5 | - Normalizes CRLF issues from some devices.
6 | - Validates PNG signature/size and retries before falling back.
7 | """
8 |
9 | import base64
10 | import subprocess
11 | from dataclasses import dataclass
12 | from io import BytesIO
13 |
14 | from PIL import Image
15 |
16 |
17 | PNG_SIGNATURE = b"\x89PNG\r\n\x1a\n"
18 |
19 |
20 | @dataclass
21 | class Screenshot:
22 | """Represents a captured screenshot."""
23 |
24 | base64_data: str
25 | width: int
26 | height: int
27 | is_sensitive: bool = False
28 |
29 |
30 | def capture_screenshot(
31 | device_id: str | None = None,
32 | adb_path: str = "adb",
33 | timeout: int = 10,
34 | retries: int = 1,
35 | ) -> Screenshot:
36 | """
37 | Capture a screenshot using adb exec-out.
38 |
39 | Args:
40 | device_id: Optional device serial.
41 | adb_path: Path to adb binary.
42 | timeout: Per-attempt timeout in seconds.
43 | retries: Extra attempts after the first try.
44 |
45 | Returns:
46 | Screenshot object; falls back to a black image on failure.
47 | """
48 | attempts = max(1, retries + 1)
49 | for _ in range(attempts):
50 | data = _try_capture(device_id=device_id, adb_path=adb_path, timeout=timeout)
51 | if not data:
52 | continue
53 |
54 | # NOTE: Do NOT do CRLF normalization for binary PNG data from exec-out
55 | # The PNG signature contains \r\n bytes that must be preserved
56 |
57 | if not _is_valid_png(data):
58 | continue
59 |
60 | try:
61 | img = Image.open(BytesIO(data))
62 | width, height = img.size
63 | buffered = BytesIO()
64 | img.save(buffered, format="PNG")
65 | base64_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
66 | return Screenshot(base64_data=base64_data, width=width, height=height)
67 | except Exception:
68 | # Try next attempt
69 | continue
70 |
71 | return _fallback_screenshot()
72 |
73 |
74 | def _try_capture(device_id: str | None, adb_path: str, timeout: int) -> bytes | None:
75 | """Run exec-out screencap and return raw bytes or None on failure."""
76 | cmd: list[str | bytes] = [adb_path]
77 | if device_id:
78 | cmd.extend(["-s", device_id])
79 | cmd.extend(["exec-out", "screencap", "-p"])
80 |
81 | try:
82 | result = subprocess.run(
83 | cmd,
84 | capture_output=True,
85 | timeout=timeout,
86 | )
87 | if result.returncode != 0:
88 | return None
89 | # stdout should hold the PNG data
90 | return result.stdout if isinstance(result.stdout, (bytes, bytearray)) else None
91 | except Exception:
92 | return None
93 |
94 |
95 | def _is_valid_png(data: bytes) -> bool:
96 | """Basic PNG validation (signature + minimal length)."""
97 | return (
98 | len(data) > len(PNG_SIGNATURE) + 8 # header + IHDR length
99 | and data.startswith(PNG_SIGNATURE)
100 | )
101 |
102 |
103 | def _fallback_screenshot() -> Screenshot:
104 | """Return a black fallback image."""
105 | width, height = 1080, 2400
106 | img = Image.new("RGB", (width, height), color="black")
107 | buffered = BytesIO()
108 | img.save(buffered, format="PNG")
109 | base64_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
110 | return Screenshot(
111 | base64_data=base64_data, width=width, height=height, is_sensitive=False
112 | )
113 |
--------------------------------------------------------------------------------
/phone_agent/config/prompts.py:
--------------------------------------------------------------------------------
1 | """System prompts for the AI agent."""
2 |
3 | from datetime import datetime
4 |
5 | today = datetime.today()
6 | formatted_date = today.strftime("%Y年%m月%d日")
7 |
8 | SYSTEM_PROMPT = (
9 | "今天的日期是: "
10 | + formatted_date
11 | + """
12 | 你是一个智能体分析专家,可以根据操作历史和当前状态图执行一系列操作来完成任务。
13 | 你必须严格按照要求输出以下格式:
14 | {think}
15 | {action}
16 |
17 | 其中:
18 | - {think} 是对你为什么选择这个操作的简短推理说明。
19 | - {action} 是本次执行的具体操作指令,必须严格遵循下方定义的指令格式。
20 |
21 | 操作指令及其作用如下:
22 | - do(action="Launch", app="xxx")
23 | Launch是启动目标app的操作,这比通过主屏幕导航更快。此操作完成后,您将自动收到结果状态的截图。
24 | - do(action="Tap", element=[x,y])
25 | Tap是点击操作,点击屏幕上的特定点。可用此操作点击按钮、选择项目、从主屏幕打开应用程序,或与任何可点击的用户界面元素进行交互。坐标系统从左上角 (0,0) 开始到右下角(999,999)结束。此操作完成后,您将自动收到结果状态的截图。
26 | - do(action="Tap", element=[x,y], message="重要操作")
27 | 基本功能同Tap,点击涉及财产、支付、隐私等敏感按钮时触发。
28 | - do(action="Type", text="xxx")
29 | Type是输入操作,在当前聚焦的输入框中输入文本。使用此操作前,请确保输入框已被聚焦(先点击它)。输入的文本将像使用键盘输入一样输入。重要提示:手机可能正在使用 ADB 键盘,该键盘不会像普通键盘那样占用屏幕空间。要确认键盘已激活,请查看屏幕底部是否显示 'ADB Keyboard {ON}' 类似的文本,或者检查输入框是否处于激活/高亮状态。不要仅仅依赖视觉上的键盘显示。自动清除文本:当你使用输入操作时,输入框中现有的任何文本(包括占位符文本和实际输入)都会在输入新文本前自动清除。你无需在输入前手动清除文本——直接使用输入操作输入所需文本即可。操作完成后,你将自动收到结果状态的截图。
30 | - do(action="Type_Name", text="xxx")
31 | Type_Name是输入人名的操作,基本功能同Type。
32 | - do(action="Interact")
33 | Interact是当有多个满足条件的选项时而触发的交互操作,询问用户如何选择。
34 | - do(action="Swipe", start=[x1,y1], end=[x2,y2])
35 | Swipe是滑动操作,通过从起始坐标拖动到结束坐标来执行滑动手势。可用于滚动内容、在屏幕之间导航、下拉通知栏以及项目栏或进行基于手势的导航。坐标系统从左上角 (0,0) 开始到右下角(999,999)结束。滑动持续时间会自动调整以实现自然的移动。此操作完成后,您将自动收到结果状态的截图。
36 | - do(action="Note", message="True")
37 | 记录当前页面内容以便后续总结。
38 | - do(action="Call_API", instruction="xxx")
39 | 总结或评论当前页面或已记录的内容。
40 | - do(action="Long Press", element=[x,y])
41 | Long Pres是长按操作,在屏幕上的特定点长按指定时间。可用于触发上下文菜单、选择文本或激活长按交互。坐标系统从左上角 (0,0) 开始到右下角(999,999)结束。此操作完成后,您将自动收到结果状态的屏幕截图。
42 | - do(action="Double Tap", element=[x,y])
43 | Double Tap在屏幕上的特定点快速连续点按两次。使用此操作可以激活双击交互,如缩放、选择文本或打开项目。坐标系统从左上角 (0,0) 开始到右下角(999,999)结束。此操作完成后,您将自动收到结果状态的截图。
44 | - do(action="Take_over", message="xxx")
45 | Take_over是接管操作,表示在登录和验证阶段需要用户协助。
46 | - do(action="Back")
47 | 导航返回到上一个屏幕或关闭当前对话框。相当于按下 Android 的返回按钮。使用此操作可以从更深的屏幕返回、关闭弹出窗口或退出当前上下文。此操作完成后,您将自动收到结果状态的截图。
48 | - do(action="Home")
49 | Home是回到系统桌面的操作,相当于按下 Android 主屏幕按钮。使用此操作可退出当前应用并返回启动器,或从已知状态启动新任务。此操作完成后,您将自动收到结果状态的截图。
50 | - do(action="Wait", duration="x seconds")
51 | 等待页面加载,x为需要等待多少秒。
52 | - finish(message="xxx")
53 | finish是结束任务的操作,表示准确完整完成任务,message是终止信息。
54 |
55 | 必须遵循的规则:
56 | 1. 在执行任何操作前,先检查当前app是否是目标app,如果不是,先执行 Launch。
57 | 2. 如果进入到了无关页面,先执行 Back。如果执行Back后页面没有变化,请点击页面左上角的返回键进行返回,或者右上角的X号关闭。
58 | 3. 如果页面未加载出内容,最多连续 Wait 三次,否则执行 Back重新进入。
59 | 4. 如果页面显示网络问题,需要重新加载,请点击重新加载。
60 | 5. 如果当前页面找不到目标联系人、商品、店铺等信息,可以尝试 Swipe 滑动查找。
61 | 6. 遇到价格区间、时间区间等筛选条件,如果没有完全符合的,可以放宽要求。
62 | 7. 在做小红书总结类任务时一定要筛选图文笔记。
63 | 8. 购物车全选后再点击全选可以把状态设为全不选,在做购物车任务时,如果购物车里已经有商品被选中时,你需要点击全选后再点击取消全选,再去找需要购买或者删除的商品。
64 | 9. 在做外卖任务时,如果相应店铺购物车里已经有其他商品你需要先把购物车清空再去购买用户指定的外卖。
65 | 10. 在做点外卖任务时,如果用户需要点多个外卖,请尽量在同一店铺进行购买,如果无法找到可以下单,并说明某个商品未找到。
66 | 11. 请严格遵循用户意图执行任务,用户的特殊要求可以执行多次搜索,滑动查找。比如(i)用户要求点一杯咖啡,要咸的,你可以直接搜索咸咖啡,或者搜索咖啡后滑动查找咸的咖啡,比如海盐咖啡。(ii)用户要找到XX群,发一条消息,你可以先搜索XX群,找不到结果后,将"群"字去掉,搜索XX重试。(iii)用户要找到宠物友好的餐厅,你可以搜索餐厅,找到筛选,找到设施,选择可带宠物,或者直接搜索可带宠物,必要时可以使用AI搜索。
67 | 12. 在选择日期时,如果原滑动方向与预期日期越来越远,请向反方向滑动查找。
68 | 13. 执行任务过程中如果有多个可选择的项目栏,请逐个查找每个项目栏,直到完成任务,一定不要在同一项目栏多次查找,从而陷入死循环。
69 | 14. 在执行下一步操作前请一定要检查上一步的操作是否生效,如果点击没生效,可能因为app反应较慢,请先稍微等待一下,如果还是不生效请调整一下点击位置重试,如果仍然不生效请跳过这一步继续任务,并在finish message说明点击不生效。
70 | 15. 在执行任务中如果遇到滑动不生效的情况,请调整一下起始点位置,增大滑动距离重试,如果还是不生效,有可能是已经滑到底了,请继续向反方向滑动,直到顶部或底部,如果仍然没有符合要求的结果,请跳过这一步继续任务,并在finish message说明但没找到要求的项目。
71 | 16. 在做游戏任务时如果在战斗页面如果有自动战斗一定要开启自动战斗,如果多轮历史状态相似要检查自动战斗是否开启。
72 | 17. 如果没有合适的搜索结果,可能是因为搜索页面不对,请返回到搜索页面的上一级尝试重新搜索,如果尝试三次返回上一级搜索后仍然没有符合要求的结果,执行 finish(message="原因")。
73 | 18. 在结束任务前请一定要仔细检查任务是否完整准确的完成,如果出现错选、漏选、多选的情况,请返回之前的步骤进行纠正。
74 | """
75 | )
76 |
--------------------------------------------------------------------------------
/phone_agent/config/prompts_zh.py:
--------------------------------------------------------------------------------
1 | """System prompts for the AI agent."""
2 |
3 | from datetime import datetime
4 |
5 | today = datetime.today()
6 | weekday_names = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"]
7 | weekday = weekday_names[today.weekday()]
8 | formatted_date = today.strftime("%Y年%m月%d日") + " " + weekday
9 |
10 | SYSTEM_PROMPT = (
11 | "今天的日期是: "
12 | + formatted_date
13 | + """
14 | 你是一个智能体分析专家,可以根据操作历史和当前状态图执行一系列操作来完成任务。
15 | 你必须严格按照要求输出以下格式:
16 | {think}
17 | {action}
18 |
19 | 其中:
20 | - {think} 是对你为什么选择这个操作的简短推理说明。
21 | - {action} 是本次执行的具体操作指令,必须严格遵循下方定义的指令格式。
22 |
23 | 操作指令及其作用如下:
24 | - do(action="Launch", app="xxx")
25 | Launch是启动目标app的操作,这比通过主屏幕导航更快。此操作完成后,您将自动收到结果状态的截图。
26 | - do(action="Tap", element=[x,y])
27 | Tap是点击操作,点击屏幕上的特定点。可用此操作点击按钮、选择项目、从主屏幕打开应用程序,或与任何可点击的用户界面元素进行交互。坐标系统从左上角 (0,0) 开始到右下角(999,999)结束。此操作完成后,您将自动收到结果状态的截图。
28 | - do(action="Tap", element=[x,y], message="重要操作")
29 | 基本功能同Tap,点击涉及财产、支付、隐私等敏感按钮时触发。
30 | - do(action="Type", text="xxx")
31 | Type是输入操作,在当前聚焦的输入框中输入文本。使用此操作前,请确保输入框已被聚焦(先点击它)。输入的文本将像使用键盘输入一样输入。重要提示:手机可能正在使用 ADB 键盘,该键盘不会像普通键盘那样占用屏幕空间。要确认键盘已激活,请查看屏幕底部是否显示 'ADB Keyboard {ON}' 类似的文本,或者检查输入框是否处于激活/高亮状态。不要仅仅依赖视觉上的键盘显示。自动清除文本:当你使用输入操作时,输入框中现有的任何文本(包括占位符文本和实际输入)都会在输入新文本前自动清除。你无需在输入前手动清除文本——直接使用输入操作输入所需文本即可。操作完成后,你将自动收到结果状态的截图。
32 | - do(action="Type_Name", text="xxx")
33 | Type_Name是输入人名的操作,基本功能同Type。
34 | - do(action="Interact")
35 | Interact是当有多个满足条件的选项时而触发的交互操作,询问用户如何选择。
36 | - do(action="Swipe", start=[x1,y1], end=[x2,y2])
37 | Swipe是滑动操作,通过从起始坐标拖动到结束坐标来执行滑动手势。可用于滚动内容、在屏幕之间导航、下拉通知栏以及项目栏或进行基于手势的导航。坐标系统从左上角 (0,0) 开始到右下角(999,999)结束。滑动持续时间会自动调整以实现自然的移动。此操作完成后,您将自动收到结果状态的截图。
38 | - do(action="Note", message="True")
39 | 记录当前页面内容以便后续总结。
40 | - do(action="Call_API", instruction="xxx")
41 | 总结或评论当前页面或已记录的内容。
42 | - do(action="Long Press", element=[x,y])
43 | Long Pres是长按操作,在屏幕上的特定点长按指定时间。可用于触发上下文菜单、选择文本或激活长按交互。坐标系统从左上角 (0,0) 开始到右下角(999,999)结束。此操作完成后,您将自动收到结果状态的屏幕截图。
44 | - do(action="Double Tap", element=[x,y])
45 | Double Tap在屏幕上的特定点快速连续点按两次。使用此操作可以激活双击交互,如缩放、选择文本或打开项目。坐标系统从左上角 (0,0) 开始到右下角(999,999)结束。此操作完成后,您将自动收到结果状态的截图。
46 | - do(action="Take_over", message="xxx")
47 | Take_over是接管操作,表示在登录和验证阶段需要用户协助。
48 | - do(action="Back")
49 | 导航返回到上一个屏幕或关闭当前对话框。相当于按下 Android 的返回按钮。使用此操作可以从更深的屏幕返回、关闭弹出窗口或退出当前上下文。此操作完成后,您将自动收到结果状态的截图。
50 | - do(action="Home")
51 | Home是回到系统桌面的操作,相当于按下 Android 主屏幕按钮。使用此操作可退出当前应用并返回启动器,或从已知状态启动新任务。此操作完成后,您将自动收到结果状态的截图。
52 | - do(action="Wait", duration="x seconds")
53 | 等待页面加载,x为需要等待多少秒。
54 | - finish(message="xxx")
55 | finish是结束任务的操作,表示准确完整完成任务,message是终止信息。
56 |
57 | 必须遵循的规则:
58 | 1. 在执行任何操作前,先检查当前app是否是目标app,如果不是,先执行 Launch。
59 | 2. 如果进入到了无关页面,先执行 Back。如果执行Back后页面没有变化,请点击页面左上角的返回键进行返回,或者右上角的X号关闭。
60 | 3. 如果页面未加载出内容,最多连续 Wait 三次,否则执行 Back重新进入。
61 | 4. 如果页面显示网络问题,需要重新加载,请点击重新加载。
62 | 5. 如果当前页面找不到目标联系人、商品、店铺等信息,可以尝试 Swipe 滑动查找。
63 | 6. 遇到价格区间、时间区间等筛选条件,如果没有完全符合的,可以放宽要求。
64 | 7. 在做小红书总结类任务时一定要筛选图文笔记。
65 | 8. 购物车全选后再点击全选可以把状态设为全不选,在做购物车任务时,如果购物车里已经有商品被选中时,你需要点击全选后再点击取消全选,再去找需要购买或者删除的商品。
66 | 9. 在做外卖任务时,如果相应店铺购物车里已经有其他商品你需要先把购物车清空再去购买用户指定的外卖。
67 | 10. 在做点外卖任务时,如果用户需要点多个外卖,请尽量在同一店铺进行购买,如果无法找到可以下单,并说明某个商品未找到。
68 | 11. 请严格遵循用户意图执行任务,用户的特殊要求可以执行多次搜索,滑动查找。比如(i)用户要求点一杯咖啡,要咸的,你可以直接搜索咸咖啡,或者搜索咖啡后滑动查找咸的咖啡,比如海盐咖啡。(ii)用户要找到XX群,发一条消息,你可以先搜索XX群,找不到结果后,将"群"字去掉,搜索XX重试。(iii)用户要找到宠物友好的餐厅,你可以搜索餐厅,找到筛选,找到设施,选择可带宠物,或者直接搜索可带宠物,必要时可以使用AI搜索。
69 | 12. 在选择日期时,如果原滑动方向与预期日期越来越远,请向反方向滑动查找。
70 | 13. 执行任务过程中如果有多个可选择的项目栏,请逐个查找每个项目栏,直到完成任务,一定不要在同一项目栏多次查找,从而陷入死循环。
71 | 14. 在执行下一步操作前请一定要检查上一步的操作是否生效,如果点击没生效,可能因为app反应较慢,请先稍微等待一下,如果还是不生效请调整一下点击位置重试,如果仍然不生效请跳过这一步继续任务,并在finish message说明点击不生效。
72 | 15. 在执行任务中如果遇到滑动不生效的情况,请调整一下起始点位置,增大滑动距离重试,如果还是不生效,有可能是已经滑到底了,请继续向反方向滑动,直到顶部或底部,如果仍然没有符合要求的结果,请跳过这一步继续任务,并在finish message说明但没找到要求的项目。
73 | 16. 在做游戏任务时如果在战斗页面如果有自动战斗一定要开启自动战斗,如果多轮历史状态相似要检查自动战斗是否开启。
74 | 17. 如果没有合适的搜索结果,可能是因为搜索页面不对,请返回到搜索页面的上一级尝试重新搜索,如果尝试三次返回上一级搜索后仍然没有符合要求的结果,执行 finish(message="原因")。
75 | 18. 在结束任务前请一定要仔细检查任务是否完整准确的完成,如果出现错选、漏选、多选的情况,请返回之前的步骤进行纠正。
76 | """
77 | )
78 |
--------------------------------------------------------------------------------
/AutoGLM_GUI/api/devices.py:
--------------------------------------------------------------------------------
1 | """Device discovery routes."""
2 |
3 | from fastapi import APIRouter
4 |
5 | from AutoGLM_GUI.adb_plus import get_wifi_ip, get_device_serial
6 |
7 | from AutoGLM_GUI.schemas import (
8 | DeviceListResponse,
9 | WiFiConnectRequest,
10 | WiFiConnectResponse,
11 | WiFiDisconnectRequest,
12 | WiFiDisconnectResponse,
13 | )
14 | from AutoGLM_GUI.state import agents
15 |
16 | router = APIRouter()
17 |
18 |
19 | @router.get("/api/devices", response_model=DeviceListResponse)
20 | def list_devices() -> DeviceListResponse:
21 | """列出所有 ADB 设备。"""
22 | from phone_agent.adb import list_devices as adb_list, ADBConnection
23 |
24 | adb_devices = adb_list()
25 | conn = ADBConnection()
26 |
27 | devices_with_serial = []
28 | for d in adb_devices:
29 | # 使用 adb_plus 的 get_device_serial 获取真实序列号
30 | serial = get_device_serial(d.device_id, conn.adb_path)
31 |
32 | devices_with_serial.append(
33 | {
34 | "id": d.device_id,
35 | "model": d.model or "Unknown",
36 | "status": d.status,
37 | "connection_type": d.connection_type.value,
38 | "is_initialized": d.device_id in agents,
39 | "serial": serial, # 真实序列号
40 | }
41 | )
42 |
43 | return DeviceListResponse(devices=devices_with_serial)
44 |
45 |
46 | @router.post("/api/devices/connect_wifi", response_model=WiFiConnectResponse)
47 | def connect_wifi(request: WiFiConnectRequest) -> WiFiConnectResponse:
48 | """从 USB 启用 TCP/IP 并连接到 WiFi。"""
49 | from phone_agent.adb import ADBConnection, ConnectionType
50 |
51 | conn = ADBConnection()
52 |
53 | # 优先使用传入的 device_id,否则取第一个在线设备
54 | device_info = conn.get_device_info(request.device_id)
55 | if not device_info:
56 | return WiFiConnectResponse(
57 | success=False,
58 | message="No connected device found",
59 | error="device_not_found",
60 | )
61 |
62 | # 已经是 WiFi 连接则直接返回
63 | if device_info.connection_type == ConnectionType.REMOTE:
64 | address = device_info.device_id
65 | return WiFiConnectResponse(
66 | success=True,
67 | message="Already connected over WiFi",
68 | device_id=address,
69 | address=address,
70 | )
71 |
72 | # 1) 启用 tcpip
73 | ok, msg = conn.enable_tcpip(port=request.port, device_id=device_info.device_id)
74 | if not ok:
75 | return WiFiConnectResponse(
76 | success=False, message=msg or "Failed to enable tcpip", error="tcpip"
77 | )
78 |
79 | # 2) 读取设备 IP:先用本地 adb_plus 的 WiFi 优先逻辑,失败再回退上游接口
80 | ip = get_wifi_ip(conn.adb_path, device_info.device_id) or conn.get_device_ip(
81 | device_info.device_id
82 | )
83 | if not ip:
84 | return WiFiConnectResponse(
85 | success=False, message="Failed to get device IP", error="ip"
86 | )
87 |
88 | address = f"{ip}:{request.port}"
89 |
90 | # 3) 连接 WiFi
91 | ok, msg = conn.connect(address)
92 | if not ok:
93 | return WiFiConnectResponse(
94 | success=False,
95 | message=msg or "Failed to connect over WiFi",
96 | error="connect",
97 | )
98 |
99 | return WiFiConnectResponse(
100 | success=True,
101 | message="Switched to WiFi successfully",
102 | device_id=address,
103 | address=address,
104 | )
105 |
106 |
107 | @router.post("/api/devices/disconnect_wifi", response_model=WiFiDisconnectResponse)
108 | def disconnect_wifi(request: WiFiDisconnectRequest) -> WiFiDisconnectResponse:
109 | """断开 WiFi 连接。"""
110 | from phone_agent.adb import ADBConnection
111 |
112 | conn = ADBConnection()
113 | ok, msg = conn.disconnect(request.device_id)
114 |
115 | return WiFiDisconnectResponse(
116 | success=ok,
117 | message=msg,
118 | error=None if ok else "disconnect_failed",
119 | )
120 |
--------------------------------------------------------------------------------
/scripts/build.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """Build script for AutoGLM-GUI.
3 |
4 | This script builds the frontend and copies the dist files to the package.
5 | Run this before publishing to PyPI.
6 |
7 | Usage:
8 | uv run python scripts/build.py # Build frontend only
9 | uv run python scripts/build.py --pack # Build frontend and create package
10 | """
11 |
12 | import argparse
13 | import shutil
14 | import subprocess
15 | import sys
16 | from pathlib import Path
17 |
18 | ROOT_DIR = Path(__file__).parent.parent
19 | FRONTEND_DIR = ROOT_DIR / "frontend"
20 | STATIC_DIR = ROOT_DIR / "AutoGLM_GUI" / "static"
21 |
22 |
23 | def build_frontend() -> bool:
24 | """Build the frontend using pnpm."""
25 | print("Building frontend...")
26 |
27 | # Check if pnpm is available
28 | try:
29 | subprocess.run(["pnpm", "--version"], check=True, capture_output=True)
30 | except (subprocess.CalledProcessError, FileNotFoundError):
31 | print("Error: pnpm is not installed. Please install pnpm first.")
32 | return False
33 |
34 | # Install dependencies
35 | print("Installing frontend dependencies...")
36 | result = subprocess.run(["pnpm", "install"], cwd=FRONTEND_DIR)
37 | if result.returncode != 0:
38 | print("Error: Failed to install frontend dependencies.")
39 | return False
40 |
41 | # Build
42 | print("Building frontend...")
43 | result = subprocess.run(["pnpm", "build"], cwd=FRONTEND_DIR)
44 | if result.returncode != 0:
45 | print("Error: Failed to build frontend.")
46 | return False
47 |
48 | return True
49 |
50 |
51 | def copy_static_files() -> bool:
52 | """Copy frontend dist to package static directory."""
53 | print("Copying static files to package...")
54 |
55 | dist_dir = FRONTEND_DIR / "dist"
56 | if not dist_dir.exists():
57 | print(f"Error: Frontend dist directory not found: {dist_dir}")
58 | return False
59 |
60 | # Remove existing static directory
61 | if STATIC_DIR.exists():
62 | shutil.rmtree(STATIC_DIR)
63 |
64 | # Copy dist to static
65 | shutil.copytree(dist_dir, STATIC_DIR)
66 |
67 | print(f"Static files copied to: {STATIC_DIR}")
68 | return True
69 |
70 |
71 | def build_package() -> bool:
72 | """Build the Python package using uv."""
73 | print("Building Python package...")
74 |
75 | # Remove old dist
76 | dist_dir = ROOT_DIR / "dist"
77 | if dist_dir.exists():
78 | shutil.rmtree(dist_dir)
79 |
80 | result = subprocess.run(["uv", "build"], cwd=ROOT_DIR)
81 | if result.returncode != 0:
82 | print("Error: Failed to build package.")
83 | return False
84 |
85 | return True
86 |
87 |
88 | def main() -> int:
89 | """Main build process."""
90 | parser = argparse.ArgumentParser(description="Build AutoGLM-GUI for distribution")
91 | parser.add_argument(
92 | "--pack", action="store_true", help="Also build Python package after frontend"
93 | )
94 | args = parser.parse_args()
95 |
96 | print("=" * 50)
97 | print("AutoGLM-GUI Build Script")
98 | print("=" * 50)
99 |
100 | if not build_frontend():
101 | return 1
102 |
103 | if not copy_static_files():
104 | return 1
105 |
106 | if args.pack:
107 | if not build_package():
108 | return 1
109 |
110 | print()
111 | print("=" * 50)
112 | print("Build completed successfully!")
113 | print()
114 | if args.pack:
115 | print("Package built in: dist/")
116 | print()
117 | print("Next steps:")
118 | print(" 1. Test: uvx --from dist/autoglm_gui-*.whl autoglm-gui")
119 | print(" 2. Publish: uv publish")
120 | else:
121 | print("Next steps:")
122 | print(" 1. Test locally: uv run autoglm-gui")
123 | print(" 2. Build package: uv run python scripts/build.py --pack")
124 | print(" 3. Publish to PyPI: uv publish")
125 | print("=" * 50)
126 |
127 | return 0
128 |
129 |
130 | if __name__ == "__main__":
131 | sys.exit(main())
132 |
--------------------------------------------------------------------------------
/scripts/download_adb.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | ADB 工具自动下载脚本
4 |
5 | 用法:
6 | uv run python scripts/download_adb.py # 下载所有平台(Windows + macOS)
7 | uv run python scripts/download_adb.py windows # 只下载 Windows
8 | uv run python scripts/download_adb.py darwin # 只下载 macOS
9 |
10 | 输出目录:
11 | resources/adb/windows/platform-tools/
12 | resources/adb/darwin/platform-tools/
13 | """
14 |
15 | import sys
16 | import urllib.request
17 | import zipfile
18 | from pathlib import Path
19 |
20 |
21 | # Google 官方 Android Platform Tools 下载地址
22 | ADB_URLS = {
23 | "windows": "https://dl.google.com/android/repository/platform-tools-latest-windows.zip",
24 | "darwin": "https://dl.google.com/android/repository/platform-tools-latest-darwin.zip",
25 | "linux": "https://dl.google.com/android/repository/platform-tools-latest-linux.zip",
26 | }
27 |
28 |
29 | def download_with_progress(url: str, output_path: Path) -> None:
30 | """下载文件并显示进度"""
31 | print(f" 下载: {url}")
32 |
33 | def reporthook(block_num, block_size, total_size):
34 | if total_size > 0:
35 | downloaded = block_num * block_size
36 | percent = min(100, downloaded * 100 / total_size)
37 | mb_downloaded = downloaded / (1024 * 1024)
38 | mb_total = total_size / (1024 * 1024)
39 | print(
40 | f" 进度: {percent:.1f}% ({mb_downloaded:.1f}/{mb_total:.1f} MB)",
41 | end="\r",
42 | )
43 |
44 | try:
45 | urllib.request.urlretrieve(url, output_path, reporthook=reporthook)
46 | print() # 换行
47 | except Exception as e:
48 | print(f"\n ❌ 下载失败: {e}")
49 | raise
50 |
51 |
52 | def download_adb(platform: str) -> None:
53 | """下载并解压 ADB 工具"""
54 | url = ADB_URLS.get(platform)
55 | if not url:
56 | print(f"❌ 不支持的平台: {platform}")
57 | print(f" 支持的平台: {', '.join(ADB_URLS.keys())}")
58 | return
59 |
60 | # 项目根目录
61 | root_dir = Path(__file__).parent.parent
62 | output_dir = root_dir / "resources" / "adb" / platform
63 | output_dir.mkdir(parents=True, exist_ok=True)
64 |
65 | zip_path = output_dir / "platform-tools.zip"
66 |
67 | print(f"\n{'=' * 60}")
68 | print(f"下载 ADB 工具 - {platform}")
69 | print(f"{'=' * 60}")
70 |
71 | # 下载
72 | download_with_progress(url, zip_path)
73 |
74 | # 解压
75 | print(" 解压中...")
76 | try:
77 | with zipfile.ZipFile(zip_path, "r") as zip_ref:
78 | zip_ref.extractall(output_dir)
79 | print(" ✓ 解压完成")
80 | except Exception as e:
81 | print(f" ❌ 解压失败: {e}")
82 | raise
83 |
84 | # 删除 zip 文件
85 | zip_path.unlink()
86 | print(" ✓ 清理临时文件")
87 |
88 | # 验证
89 | platform_tools_dir = output_dir / "platform-tools"
90 | adb_exe = platform_tools_dir / ("adb.exe" if platform == "windows" else "adb")
91 |
92 | if adb_exe.exists():
93 | file_size = adb_exe.stat().st_size / (1024 * 1024)
94 | print(f" ✓ ADB 可执行文件: {adb_exe} ({file_size:.1f} MB)")
95 | else:
96 | print(" ⚠️ 警告: 未找到 ADB 可执行文件")
97 |
98 | print(f"\n✓ {platform.upper()} ADB 工具下载完成")
99 | print(f" 位置: {output_dir}")
100 |
101 |
102 | def main():
103 | """主函数"""
104 | # 默认下载所有平台
105 | platforms = sys.argv[1:] if len(sys.argv) > 1 else ["windows", "darwin"]
106 |
107 | print("\n" + "=" * 60)
108 | print(" AutoGLM-GUI - ADB 工具下载器")
109 | print("=" * 60)
110 | print(f" 目标平台: {', '.join(platforms)}")
111 |
112 | success_count = 0
113 | failed_platforms = []
114 |
115 | for platform in platforms:
116 | try:
117 | download_adb(platform)
118 | success_count += 1
119 | except Exception as e:
120 | print(f"\n❌ {platform} 下载失败: {e}")
121 | failed_platforms.append(platform)
122 |
123 | # 总结
124 | print("\n" + "=" * 60)
125 | print(" 下载总结")
126 | print("=" * 60)
127 | print(f" 成功: {success_count}/{len(platforms)}")
128 | if failed_platforms:
129 | print(f" 失败: {', '.join(failed_platforms)}")
130 | print("=" * 60)
131 |
132 | if failed_platforms:
133 | sys.exit(1)
134 |
135 |
136 | if __name__ == "__main__":
137 | main()
138 |
--------------------------------------------------------------------------------
/AutoGLM_GUI/schemas.py:
--------------------------------------------------------------------------------
1 | """Shared Pydantic models for the AutoGLM-GUI API."""
2 |
3 | from pydantic import BaseModel, Field
4 |
5 |
6 | class APIModelConfig(BaseModel):
7 | base_url: str | None = None
8 | api_key: str | None = None
9 | model_name: str | None = None
10 | max_tokens: int = 3000
11 | temperature: float = 0.0
12 | top_p: float = 0.85
13 | frequency_penalty: float = 0.2
14 |
15 |
16 | class APIAgentConfig(BaseModel):
17 | max_steps: int = 100
18 | device_id: str | None = None
19 | lang: str = "cn"
20 | system_prompt: str | None = None
21 | verbose: bool = True
22 |
23 |
24 | class InitRequest(BaseModel):
25 | model: APIModelConfig | None = Field(default=None, alias="model_config")
26 | agent: APIAgentConfig | None = Field(default=None, alias="agent_config")
27 |
28 |
29 | class ChatRequest(BaseModel):
30 | message: str
31 | device_id: str # 设备 ID(必填)
32 |
33 |
34 | class ChatResponse(BaseModel):
35 | result: str
36 | steps: int
37 | success: bool
38 |
39 |
40 | class StatusResponse(BaseModel):
41 | version: str
42 | initialized: bool
43 | step_count: int
44 |
45 |
46 | class ResetRequest(BaseModel):
47 | device_id: str # 设备 ID(必填)
48 |
49 |
50 | class ScreenshotRequest(BaseModel):
51 | device_id: str | None = None
52 |
53 |
54 | class ScreenshotResponse(BaseModel):
55 | success: bool
56 | image: str # base64 encoded PNG
57 | width: int
58 | height: int
59 | is_sensitive: bool
60 | error: str | None = None
61 |
62 |
63 | class TapRequest(BaseModel):
64 | x: int
65 | y: int
66 | device_id: str | None = None
67 | delay: float = 0.0
68 |
69 |
70 | class TapResponse(BaseModel):
71 | success: bool
72 | error: str | None = None
73 |
74 |
75 | class SwipeRequest(BaseModel):
76 | start_x: int
77 | start_y: int
78 | end_x: int
79 | end_y: int
80 | duration_ms: int | None = None
81 | device_id: str | None = None
82 | delay: float = 0.0
83 |
84 |
85 | class SwipeResponse(BaseModel):
86 | success: bool
87 | error: str | None = None
88 |
89 |
90 | class TouchDownRequest(BaseModel):
91 | x: int
92 | y: int
93 | device_id: str | None = None
94 | delay: float = 0.0
95 |
96 |
97 | class TouchDownResponse(BaseModel):
98 | success: bool
99 | error: str | None = None
100 |
101 |
102 | class TouchMoveRequest(BaseModel):
103 | x: int
104 | y: int
105 | device_id: str | None = None
106 | delay: float = 0.0
107 |
108 |
109 | class TouchMoveResponse(BaseModel):
110 | success: bool
111 | error: str | None = None
112 |
113 |
114 | class TouchUpRequest(BaseModel):
115 | x: int
116 | y: int
117 | device_id: str | None = None
118 | delay: float = 0.0
119 |
120 |
121 | class TouchUpResponse(BaseModel):
122 | success: bool
123 | error: str | None = None
124 |
125 |
126 | class DeviceListResponse(BaseModel):
127 | devices: list[dict]
128 |
129 |
130 | class ConfigResponse(BaseModel):
131 | """配置读取响应."""
132 |
133 | base_url: str
134 | model_name: str
135 | api_key: str # 返回实际值(明文)
136 | source: str # "CLI arguments" | "environment variables" | "config file (...)" | "default"
137 | conflicts: list[dict] | None = None # 配置冲突信息(可选)
138 | # conflicts 示例:
139 | # [
140 | # {
141 | # "field": "base_url",
142 | # "file_value": "http://localhost:8080/v1",
143 | # "override_value": "https://api.example.com",
144 | # "override_source": "CLI arguments"
145 | # }
146 | # ]
147 |
148 |
149 | class ConfigSaveRequest(BaseModel):
150 | """配置保存请求."""
151 |
152 | base_url: str
153 | model_name: str = "autoglm-phone-9b"
154 | api_key: str | None = None
155 |
156 |
157 | class WiFiConnectRequest(BaseModel):
158 | device_id: str | None = None
159 | port: int = 5555
160 |
161 |
162 | class WiFiConnectResponse(BaseModel):
163 | success: bool
164 | message: str
165 | device_id: str | None = None
166 | address: str | None = None
167 | error: str | None = None
168 |
169 |
170 | class WiFiDisconnectRequest(BaseModel):
171 | device_id: str
172 |
173 |
174 | class WiFiDisconnectResponse(BaseModel):
175 | success: bool
176 | message: str
177 | error: str | None = None
178 |
--------------------------------------------------------------------------------
/frontend/eslint.config.js:
--------------------------------------------------------------------------------
1 | import js from '@eslint/js';
2 | import typescript from '@typescript-eslint/eslint-plugin';
3 | import typescriptParser from '@typescript-eslint/parser';
4 | import react from 'eslint-plugin-react';
5 | import reactHooks from 'eslint-plugin-react-hooks';
6 | import reactRefresh from 'eslint-plugin-react-refresh';
7 | import prettier from 'eslint-plugin-prettier';
8 | import prettierConfig from 'eslint-config-prettier';
9 | import globals from 'globals';
10 |
11 | export default [
12 | js.configs.recommended,
13 | {
14 | files: ['**/*.{js,jsx,ts,tsx}'],
15 | languageOptions: {
16 | parser: typescriptParser,
17 | parserOptions: {
18 | ecmaVersion: 'latest',
19 | sourceType: 'module',
20 | ecmaFeatures: {
21 | jsx: true,
22 | },
23 | },
24 | globals: {
25 | ...globals.browser,
26 | ...globals.node,
27 | console: 'readonly',
28 | process: 'readonly',
29 | Buffer: 'readonly',
30 | __dirname: 'readonly',
31 | __filename: 'readonly',
32 | module: 'readonly',
33 | require: 'readonly',
34 | exports: 'readonly',
35 | global: 'readonly',
36 | window: 'readonly',
37 | document: 'readonly',
38 | navigator: 'readonly',
39 | localStorage: 'readonly',
40 | sessionStorage: 'readonly',
41 | setInterval: 'readonly',
42 | clearInterval: 'readonly',
43 | setTimeout: 'readonly',
44 | clearTimeout: 'readonly',
45 | HTMLDivElement: 'readonly',
46 | HTMLElement: 'readonly',
47 | HTMLInputElement: 'readonly',
48 | HTMLVideoElement: 'readonly',
49 | WebSocket: 'readonly',
50 | AbortController: 'readonly',
51 | TextDecoder: 'readonly',
52 | TextEncoder: 'readonly',
53 | fetch: 'readonly',
54 | URL: 'readonly',
55 | Blob: 'readonly',
56 | File: 'readonly',
57 | FileReader: 'readonly',
58 | requestAnimationFrame: 'readonly',
59 | cancelAnimationFrame: 'readonly',
60 | NodeJS: 'readonly',
61 | },
62 | },
63 | plugins: {
64 | '@typescript-eslint': typescript,
65 | react,
66 | 'react-hooks': reactHooks,
67 | 'react-refresh': reactRefresh,
68 | prettier,
69 | },
70 | rules: {
71 | ...typescript.configs.recommended.rules,
72 | ...react.configs.recommended.rules,
73 | ...reactHooks.configs.recommended.rules,
74 | ...reactRefresh.configs.recommended.rules,
75 | ...prettierConfig.rules,
76 |
77 | // TypeScript rules
78 | '@typescript-eslint/no-unused-vars': [
79 | 'error',
80 | { argsIgnorePattern: '^_' },
81 | ],
82 | '@typescript-eslint/explicit-function-return-type': 'off',
83 | '@typescript-eslint/explicit-module-boundary-types': 'off',
84 | '@typescript-eslint/no-explicit-any': 'warn',
85 | '@typescript-eslint/no-non-null-assertion': 'warn',
86 | '@typescript-eslint/no-require-imports': 'off',
87 |
88 | // React rules
89 | 'react/react-in-jsx-scope': 'off', // Not needed with React 17+
90 | 'react/prop-types': 'off', // Using TypeScript for prop validation
91 | 'react/jsx-uses-react': 'off', // Not needed with React 17+
92 | 'react/jsx-key': 'error',
93 | 'react/jsx-no-duplicate-props': 'error',
94 | 'react-hooks/rules-of-hooks': 'error', // Enforce rules of hooks
95 | 'react-hooks/purity': 'off', // Allow Date.now() in event handlers
96 |
97 | // General rules
98 | 'no-console': 'off', // Allow console logs for debugging in this component
99 | 'no-debugger': 'error',
100 | 'prefer-const': 'error',
101 | 'no-var': 'error',
102 | 'no-unused-vars': 'off', // Let TypeScript handle this
103 |
104 | // Prettier
105 | 'prettier/prettier': 'error',
106 | },
107 | settings: {
108 | react: {
109 | version: 'detect',
110 | },
111 | },
112 | },
113 | {
114 | files: ['**/*.js'],
115 | rules: {
116 | '@typescript-eslint/no-require-imports': 'off',
117 | },
118 | },
119 | {
120 | ignores: [
121 | 'dist/**',
122 | 'node_modules/**',
123 | 'build/**',
124 | 'coverage/**',
125 | '*.config.js',
126 | '*.config.ts',
127 | 'vite.config.*',
128 | 'tailwind.config.*',
129 | ],
130 | },
131 | ];
132 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # AutoGLM-GUI
2 |
3 | AutoGLM 手机助手的现代化 Web 图形界面 - 让 AI 自动化操作 Android 设备变得简单
4 |
5 | 
6 | 
7 |
8 |
9 | 欢迎加入讨论交流群
10 |
11 | ## ✨ 特性
12 |
13 | - **多设备并发控制** - 同时管理和控制多个 Android 设备,设备间状态完全隔离
14 | - **对话式任务管理** - 通过聊天界面控制 Android 设备
15 | - **实时屏幕预览** - 基于 scrcpy 的低延迟视频流,随时查看设备正在执行的操作
16 | - **直接操控手机** - 在实时画面上直接点击、滑动操作,支持精准坐标转换和视觉反馈
17 | - **零配置部署** - 支持任何 OpenAI 兼容的 LLM API
18 | - **ADB 深度集成** - 通过 Android Debug Bridge 直接控制设备
19 | - **模块化界面** - 清晰的侧边栏 + 设备面板设计,功能分离明确
20 |
21 | ## 📸 界面预览
22 |
23 | ### 任务开始
24 | 
25 |
26 | ### 任务执行完成
27 | 
28 |
29 | ### 多设备控制
30 | 
31 |
32 | ## 🚀 快速开始
33 |
34 | ## 🎯 模型服务配置
35 |
36 | AutoGLM-GUI 只需要一个 OpenAI 兼容的模型服务。你可以:
37 |
38 | - 使用官方已托管的第三方服务
39 | - 智谱 BigModel:`--base-url https://open.bigmodel.cn/api/paas/v4`,`--model autoglm-phone`,`--apikey <你的 API Key>`
40 | - ModelScope:`--base-url https://api-inference.modelscope.cn/v1`,`--model ZhipuAI/AutoGLM-Phone-9B`,`--apikey <你的 API Key>`
41 | - 或自建服务:参考上游项目的[部署文档](https://github.com/zai-org/Open-AutoGLM/blob/main/README.md)用 vLLM/SGLang 部署 `zai-org/AutoGLM-Phone-9B`,启动 OpenAI 兼容端口后将 `--base-url` 指向你的服务。
42 |
43 | 示例:
44 |
45 | ```bash
46 | # 使用智谱 BigModel
47 | pip install autoglm-gui
48 | autoglm-gui \
49 | --base-url https://open.bigmodel.cn/api/paas/v4 \
50 | --model autoglm-phone \
51 | --apikey sk-xxxxx
52 |
53 | # 使用 ModelScope
54 | pip install autoglm-gui
55 | autoglm-gui \
56 | --base-url https://api-inference.modelscope.cn/v1 \
57 | --model ZhipuAI/AutoGLM-Phone-9B \
58 | --apikey sk-xxxxx
59 |
60 | # 指向你自建的 vLLM/SGLang 服务
61 | pip install autoglm-gui
62 | autoglm-gui --base-url http://localhost:8000/v1 --model autoglm-phone-9b
63 | ```
64 |
65 | ### 前置要求
66 |
67 | - Python 3.10+
68 | - 已开启 USB 调试的 Android 设备
69 | - 已安装 ADB 并添加到系统 PATH
70 | - 一个 OpenAI 兼容的 API 端点
71 |
72 | ### 快捷运行(推荐)
73 |
74 | **无需手动准备环境,直接安装运行:**
75 |
76 | ```bash
77 | # 通过 pip 安装并启动
78 | pip install autoglm-gui
79 | autoglm-gui --base-url http://localhost:8080/v1
80 | ```
81 |
82 | 也可以使用 uvx 免安装启动(需已安装 uv,[安装教程](https://docs.astral.sh/uv/getting-started/installation/)):
83 |
84 | ```bash
85 | uvx autoglm-gui --base-url http://localhost:8080/v1
86 | ```
87 |
88 | ### 传统安装
89 |
90 | ```bash
91 | # 从源码安装
92 | git clone https://github.com/your-repo/AutoGLM-GUI.git
93 | cd AutoGLM-GUI
94 | uv sync
95 |
96 | # 构建前端(必须)
97 | uv run python scripts/build.py
98 |
99 | # 启动服务
100 | uv run autoglm-gui --base-url http://localhost:8080/v1
101 | ```
102 |
103 | 启动后,在浏览器中打开 http://localhost:8000 即可开始使用!
104 |
105 | ## 📖 使用说明
106 |
107 | ### 多设备管理
108 |
109 | AutoGLM-GUI 支持同时控制多个 Android 设备:
110 |
111 | 1. **设备列表** - 左侧边栏自动显示所有已连接的 ADB 设备
112 | 2. **设备选择** - 点击设备卡片切换到对应的控制面板
113 | 3. **状态指示** - 清晰显示每个设备的在线状态和初始化状态
114 | 4. **状态隔离** - 每个设备有独立的对话历史、配置和视频流
115 |
116 | **设备状态说明**:
117 | - 🟢 绿点:设备在线
118 | - ⚪ 灰点:设备离线
119 | - ✓ 标记:设备已初始化
120 |
121 | ### AI 自动化模式
122 |
123 | 1. **连接设备** - 启用 USB 调试并通过 ADB 连接设备(支持 USB 和 WiFi)
124 | 2. **选择设备** - 在左侧边栏选择要控制的设备
125 | 3. **初始化** - 点击"初始化设备"按钮配置 Agent
126 | 4. **对话** - 描述你想要做什么(例如:"去美团点一杯霸王茶姬的伯牙绝弦")
127 | 5. **观察** - Agent 会逐步执行操作,每一步的思考过程和动作都会实时显示
128 |
129 | ### 手动控制模式
130 |
131 | 除了 AI 自动化,你也可以直接在实时画面上操控手机:
132 |
133 | 1. **实时画面** - 设备面板右侧显示手机屏幕的实时视频流(基于 scrcpy)
134 | 2. **点击操作** - 直接点击画面中的任意位置,操作会立即发送到手机
135 | 3. **滑动手势** - 按住鼠标拖动实现滑动操作(支持滚轮滚动)
136 | 4. **视觉反馈** - 每次操作都会显示涟漪动画和成功/失败提示
137 | 5. **精准转换** - 自动处理屏幕缩放和坐标转换,确保操作位置准确
138 | 6. **显示模式** - 支持自动、视频流、截图三种显示模式切换
139 |
140 | **技术细节**:
141 | - 使用 scrcpy 提供低延迟(~30-50ms)的 H.264 视频流
142 | - 前端自动获取设备实际分辨率(如 1080x2400)
143 | - 智能处理视频流缩放(如 576x1280)与设备分辨率的映射
144 | - 支持 letterbox 黑边的精确坐标计算
145 | - 颗粒化触摸事件支持(DOWN、MOVE、UP)实现流畅的手势操作
146 |
147 | ## 🏗️ 架构设计
148 |
149 | ### 多设备并发架构
150 |
151 | AutoGLM-GUI 采用简化的多设备并发架构,支持同时管理多个 Android 设备:
152 |
153 | **后端设计**:
154 | - 使用字典管理多个 `PhoneAgent` 实例:`agents: dict[str, PhoneAgent]`
155 | - 每个设备有独立的 `scrcpy` 视频流实例
156 | - 设备级别的锁机制,避免不同设备间的阻塞
157 | - 所有 API 接口支持 `device_id` 参数进行设备路由
158 |
159 | **前端设计**:
160 | - 使用 `Map` 管理每个设备的独立状态
161 | - 组件化设计,功能职责清晰分离:
162 | - `DeviceCard` - 单个设备信息卡片
163 | - `DeviceSidebar` - 设备列表侧边栏
164 | - `DevicePanel` - 设备操作面板(ChatBox + Screen Monitor)
165 | - 设备状态完全隔离,互不影响
166 |
167 | **核心特点**:
168 | - ✅ 无任务队列,简化设计
169 | - ✅ 无复杂调度,每个设备独立运行
170 | - ✅ 实时 WebSocket 通信,支持流式响应
171 | - ✅ 自动设备发现和状态同步(每 3 秒刷新)
172 |
173 | ## 🛠️ 开发指南
174 |
175 | ### 快速开发
176 |
177 | ```bash
178 | # 后端开发(自动重载)
179 | uv run autoglm-gui --base-url http://localhost:8080/v1 --reload
180 |
181 | # 前端开发服务器(热重载)
182 | cd frontend && pnpm dev
183 |
184 | ### 构建和打包
185 |
186 | ```bash
187 | # 仅构建前端
188 | uv run python scripts/build.py
189 |
190 | # 构建完整包
191 | uv run python scripts/build.py --pack
192 | ```
193 |
194 | ## 📝 开源协议
195 |
196 | Apache License 2.0
197 |
198 |
199 | ### 许可证说明
200 |
201 | AutoGLM-GUI 使用 MIT 许可证。但是,它打包了 ADB Keyboard APK (`com.android.adbkeyboard`),该组件使用 GPL-2.0 许可证。ADB Keyboard 组件作为独立工具使用,不影响 AutoGLM-GUI 本身的 MIT 许可。
202 |
203 | 详见:`AutoGLM_GUI/resources/apks/ADBKeyBoard.LICENSE.txt`
204 |
205 | ## 🙏 致谢
206 |
207 | 本项目基于 [Open-AutoGLM](https://github.com/zai-org/Open-AutoGLM) 构建,感谢 zai-org 团队在 AutoGLM 上的卓越工作。
208 |
--------------------------------------------------------------------------------
/phone_agent/adb/device.py:
--------------------------------------------------------------------------------
1 | """Device control utilities for Android automation."""
2 |
3 | import subprocess
4 | import time
5 |
6 | from phone_agent.config.apps import APP_PACKAGES
7 |
8 |
9 | def get_current_app(device_id: str | None = None) -> str:
10 | """
11 | Get the currently focused app name.
12 |
13 | Args:
14 | device_id: Optional ADB device ID for multi-device setups.
15 |
16 | Returns:
17 | The app name if recognized, otherwise "System Home".
18 | """
19 | adb_prefix = _get_adb_prefix(device_id)
20 |
21 | result = subprocess.run(
22 | adb_prefix + ["shell", "dumpsys", "window"], capture_output=True, text=True
23 | )
24 | output = result.stdout
25 |
26 | # Parse window focus info
27 | for line in output.split("\n"):
28 | if "mCurrentFocus" in line or "mFocusedApp" in line:
29 | for app_name, package in APP_PACKAGES.items():
30 | if package in line:
31 | return app_name
32 |
33 | return "System Home"
34 |
35 |
36 | def tap(x: int, y: int, device_id: str | None = None, delay: float = 1.0) -> None:
37 | """
38 | Tap at the specified coordinates.
39 |
40 | Args:
41 | x: X coordinate.
42 | y: Y coordinate.
43 | device_id: Optional ADB device ID.
44 | delay: Delay in seconds after tap.
45 | """
46 | adb_prefix = _get_adb_prefix(device_id)
47 |
48 | subprocess.run(
49 | adb_prefix + ["shell", "input", "tap", str(x), str(y)], capture_output=True
50 | )
51 | time.sleep(delay)
52 |
53 |
54 | def double_tap(
55 | x: int, y: int, device_id: str | None = None, delay: float = 1.0
56 | ) -> None:
57 | """
58 | Double tap at the specified coordinates.
59 |
60 | Args:
61 | x: X coordinate.
62 | y: Y coordinate.
63 | device_id: Optional ADB device ID.
64 | delay: Delay in seconds after double tap.
65 | """
66 | adb_prefix = _get_adb_prefix(device_id)
67 |
68 | subprocess.run(
69 | adb_prefix + ["shell", "input", "tap", str(x), str(y)], capture_output=True
70 | )
71 | time.sleep(0.1)
72 | subprocess.run(
73 | adb_prefix + ["shell", "input", "tap", str(x), str(y)], capture_output=True
74 | )
75 | time.sleep(delay)
76 |
77 |
78 | def long_press(
79 | x: int,
80 | y: int,
81 | duration_ms: int = 3000,
82 | device_id: str | None = None,
83 | delay: float = 1.0,
84 | ) -> None:
85 | """
86 | Long press at the specified coordinates.
87 |
88 | Args:
89 | x: X coordinate.
90 | y: Y coordinate.
91 | duration_ms: Duration of press in milliseconds.
92 | device_id: Optional ADB device ID.
93 | delay: Delay in seconds after long press.
94 | """
95 | adb_prefix = _get_adb_prefix(device_id)
96 |
97 | subprocess.run(
98 | adb_prefix
99 | + ["shell", "input", "swipe", str(x), str(y), str(x), str(y), str(duration_ms)],
100 | capture_output=True,
101 | )
102 | time.sleep(delay)
103 |
104 |
105 | def swipe(
106 | start_x: int,
107 | start_y: int,
108 | end_x: int,
109 | end_y: int,
110 | duration_ms: int | None = None,
111 | device_id: str | None = None,
112 | delay: float = 1.0,
113 | ) -> None:
114 | """
115 | Swipe from start to end coordinates.
116 |
117 | Args:
118 | start_x: Starting X coordinate.
119 | start_y: Starting Y coordinate.
120 | end_x: Ending X coordinate.
121 | end_y: Ending Y coordinate.
122 | duration_ms: Duration of swipe in milliseconds (auto-calculated if None).
123 | device_id: Optional ADB device ID.
124 | delay: Delay in seconds after swipe.
125 | """
126 | adb_prefix = _get_adb_prefix(device_id)
127 |
128 | if duration_ms is None:
129 | # Calculate duration based on distance
130 | dist_sq = (start_x - end_x) ** 2 + (start_y - end_y) ** 2
131 | duration_ms = int(dist_sq / 1000)
132 | duration_ms = max(1000, min(duration_ms, 2000)) # Clamp between 1000-2000ms
133 |
134 | subprocess.run(
135 | adb_prefix
136 | + [
137 | "shell",
138 | "input",
139 | "swipe",
140 | str(start_x),
141 | str(start_y),
142 | str(end_x),
143 | str(end_y),
144 | str(duration_ms),
145 | ],
146 | capture_output=True,
147 | )
148 | time.sleep(delay)
149 |
150 |
151 | def back(device_id: str | None = None, delay: float = 1.0) -> None:
152 | """
153 | Press the back button.
154 |
155 | Args:
156 | device_id: Optional ADB device ID.
157 | delay: Delay in seconds after pressing back.
158 | """
159 | adb_prefix = _get_adb_prefix(device_id)
160 |
161 | subprocess.run(
162 | adb_prefix + ["shell", "input", "keyevent", "4"], capture_output=True
163 | )
164 | time.sleep(delay)
165 |
166 |
167 | def home(device_id: str | None = None, delay: float = 1.0) -> None:
168 | """
169 | Press the home button.
170 |
171 | Args:
172 | device_id: Optional ADB device ID.
173 | delay: Delay in seconds after pressing home.
174 | """
175 | adb_prefix = _get_adb_prefix(device_id)
176 |
177 | subprocess.run(
178 | adb_prefix + ["shell", "input", "keyevent", "KEYCODE_HOME"], capture_output=True
179 | )
180 | time.sleep(delay)
181 |
182 |
183 | def launch_app(app_name: str, device_id: str | None = None, delay: float = 1.0) -> bool:
184 | """
185 | Launch an app by name.
186 |
187 | Args:
188 | app_name: The app name (must be in APP_PACKAGES).
189 | device_id: Optional ADB device ID.
190 | delay: Delay in seconds after launching.
191 |
192 | Returns:
193 | True if app was launched, False if app not found.
194 | """
195 | if app_name not in APP_PACKAGES:
196 | return False
197 |
198 | adb_prefix = _get_adb_prefix(device_id)
199 | package = APP_PACKAGES[app_name]
200 |
201 | subprocess.run(
202 | adb_prefix
203 | + [
204 | "shell",
205 | "monkey",
206 | "-p",
207 | package,
208 | "-c",
209 | "android.intent.category.LAUNCHER",
210 | "1",
211 | ],
212 | capture_output=True,
213 | )
214 | time.sleep(delay)
215 | return True
216 |
217 |
218 | def _get_adb_prefix(device_id: str | None) -> list:
219 | """Get ADB command prefix with optional device specifier."""
220 | if device_id:
221 | return ["adb", "-s", device_id]
222 | return ["adb"]
223 |
--------------------------------------------------------------------------------
/phone_agent/model/client.py:
--------------------------------------------------------------------------------
1 | """Model client for AI inference using OpenAI-compatible API."""
2 |
3 | import json
4 | from dataclasses import dataclass, field
5 | from typing import Any
6 |
7 | from openai import OpenAI
8 |
9 |
10 | @dataclass
11 | class ModelConfig:
12 | """Configuration for the AI model."""
13 |
14 | base_url: str = "http://localhost:8000/v1"
15 | api_key: str = "EMPTY"
16 | model_name: str = "autoglm-phone-9b"
17 | max_tokens: int = 3000
18 | temperature: float = 0.0
19 | top_p: float = 0.85
20 | frequency_penalty: float = 0.2
21 | extra_body: dict[str, Any] = field(default_factory=dict)
22 |
23 |
24 | @dataclass
25 | class ModelResponse:
26 | """Response from the AI model."""
27 |
28 | thinking: str
29 | action: str
30 | raw_content: str
31 |
32 |
33 | class ModelClient:
34 | """
35 | Client for interacting with OpenAI-compatible vision-language models.
36 |
37 | Args:
38 | config: Model configuration.
39 | """
40 |
41 | def __init__(self, config: ModelConfig | None = None):
42 | self.config = config or ModelConfig()
43 | self.client = OpenAI(base_url=self.config.base_url, api_key=self.config.api_key)
44 |
45 | def request(self, messages: list[dict[str, Any]]) -> ModelResponse:
46 | """
47 | Send a request to the model.
48 |
49 | Args:
50 | messages: List of message dictionaries in OpenAI format.
51 |
52 | Returns:
53 | ModelResponse containing thinking and action.
54 |
55 | Raises:
56 | ValueError: If the response cannot be parsed.
57 | """
58 | response = self.client.chat.completions.create(
59 | messages=messages,
60 | model=self.config.model_name,
61 | max_tokens=self.config.max_tokens,
62 | temperature=self.config.temperature,
63 | top_p=self.config.top_p,
64 | frequency_penalty=self.config.frequency_penalty,
65 | extra_body=self.config.extra_body,
66 | stream=False,
67 | )
68 |
69 | raw_content = response.choices[0].message.content
70 |
71 | # Parse thinking and action from response
72 | thinking, action = self._parse_response(raw_content)
73 |
74 | return ModelResponse(thinking=thinking, action=action, raw_content=raw_content)
75 |
76 | def _parse_response(self, content: str) -> tuple[str, str]:
77 | """
78 | Parse the model response into thinking and action parts.
79 |
80 | Parsing rules:
81 | 1. If content contains 'finish(message=', everything before is thinking,
82 | everything from 'finish(message=' onwards is action.
83 | 2. If rule 1 doesn't apply but content contains 'do(action=',
84 | everything before is thinking, everything from 'do(action=' onwards is action.
85 | 3. Fallback: If content contains '', use legacy parsing with XML tags.
86 | 4. Otherwise, return empty thinking and full content as action.
87 |
88 | Args:
89 | content: Raw response content.
90 |
91 | Returns:
92 | Tuple of (thinking, action).
93 | """
94 | # Rule 1: Check for finish(message=
95 | if "finish(message=" in content:
96 | parts = content.split("finish(message=", 1)
97 | thinking = parts[0].strip()
98 | action = "finish(message=" + parts[1]
99 | return thinking, action
100 |
101 | # Rule 2: Check for do(action=
102 | if "do(action=" in content:
103 | parts = content.split("do(action=", 1)
104 | thinking = parts[0].strip()
105 | action = "do(action=" + parts[1]
106 | return thinking, action
107 |
108 | # Rule 3: Fallback to legacy XML tag parsing
109 | if "" in content:
110 | parts = content.split("", 1)
111 | thinking = parts[0].replace("", "").replace("", "").strip()
112 | action = parts[1].replace("", "").strip()
113 | return thinking, action
114 |
115 | # Rule 4: No markers found, return content as action
116 | return "", content
117 |
118 |
119 | class MessageBuilder:
120 | """Helper class for building conversation messages."""
121 |
122 | @staticmethod
123 | def create_system_message(content: str) -> dict[str, Any]:
124 | """Create a system message."""
125 | return {"role": "system", "content": content}
126 |
127 | @staticmethod
128 | def create_user_message(
129 | text: str, image_base64: str | None = None
130 | ) -> dict[str, Any]:
131 | """
132 | Create a user message with optional image.
133 |
134 | Args:
135 | text: Text content.
136 | image_base64: Optional base64-encoded image.
137 |
138 | Returns:
139 | Message dictionary.
140 | """
141 | content = []
142 |
143 | if image_base64:
144 | content.append(
145 | {
146 | "type": "image_url",
147 | "image_url": {"url": f"data:image/png;base64,{image_base64}"},
148 | }
149 | )
150 |
151 | content.append({"type": "text", "text": text})
152 |
153 | return {"role": "user", "content": content}
154 |
155 | @staticmethod
156 | def create_assistant_message(content: str) -> dict[str, Any]:
157 | """Create an assistant message."""
158 | return {"role": "assistant", "content": content}
159 |
160 | @staticmethod
161 | def remove_images_from_message(message: dict[str, Any]) -> dict[str, Any]:
162 | """
163 | Remove image content from a message to save context space.
164 |
165 | Args:
166 | message: Message dictionary.
167 |
168 | Returns:
169 | Message with images removed.
170 | """
171 | if isinstance(message.get("content"), list):
172 | message["content"] = [
173 | item for item in message["content"] if item.get("type") == "text"
174 | ]
175 | return message
176 |
177 | @staticmethod
178 | def build_screen_info(current_app: str, **extra_info) -> str:
179 | """
180 | Build screen info string for the model.
181 |
182 | Args:
183 | current_app: Current app name.
184 | **extra_info: Additional info to include.
185 |
186 | Returns:
187 | JSON string with screen info.
188 | """
189 | info = {"current_app": current_app, **extra_info}
190 | return json.dumps(info, ensure_ascii=False)
191 |
--------------------------------------------------------------------------------
/AutoGLM_GUI/__main__.py:
--------------------------------------------------------------------------------
1 | """CLI entry point for AutoGLM-GUI."""
2 |
3 | import argparse
4 | import sys
5 | import socket
6 | import threading
7 | import time
8 | import webbrowser
9 |
10 | from AutoGLM_GUI import __version__
11 |
12 | # Default configuration
13 | DEFAULT_MODEL_NAME = "autoglm-phone-9b"
14 |
15 |
16 | def find_available_port(
17 | start_port: int = 8000, max_attempts: int = 100, host: str = "127.0.0.1"
18 | ) -> int:
19 | """Find an available port starting from start_port.
20 |
21 | Args:
22 | start_port: Port to start searching from
23 | max_attempts: Maximum number of ports to try
24 | host: Host to bind to (default: 127.0.0.1)
25 |
26 | Returns:
27 | An available port number
28 |
29 | Raises:
30 | RuntimeError: If no available port found within max_attempts
31 | """
32 | for port in range(start_port, start_port + max_attempts):
33 | try:
34 | # Try to bind to the port
35 | with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
36 | s.bind((host, port))
37 | return port
38 | except OSError:
39 | # Port is in use, try next one
40 | continue
41 |
42 | raise RuntimeError(
43 | f"Could not find available port in range {start_port}-{start_port + max_attempts - 1}"
44 | )
45 |
46 |
47 | def open_browser(host: str, port: int, delay: float = 1.5) -> None:
48 | """Open browser after a delay to ensure server is ready.
49 |
50 | Args:
51 | host: Server host
52 | port: Server port
53 | delay: Delay in seconds before opening browser
54 | """
55 |
56 | def _open():
57 | time.sleep(delay)
58 | url = (
59 | f"http://127.0.0.1:{port}" if host == "0.0.0.0" else f"http://{host}:{port}"
60 | )
61 | try:
62 | webbrowser.open(url)
63 | except Exception as e:
64 | # Non-critical failure, just log it
65 | print(f"Could not open browser automatically: {e}", file=sys.stderr)
66 |
67 | thread = threading.Thread(target=_open, daemon=True)
68 | thread.start()
69 |
70 |
71 | def main() -> None:
72 | """Start the AutoGLM-GUI server."""
73 | parser = argparse.ArgumentParser(
74 | description="AutoGLM-GUI - Web GUI for AutoGLM Phone Agent"
75 | )
76 | parser.add_argument(
77 | "--base-url",
78 | required=False,
79 | help="Base URL of the model API (e.g., http://localhost:8080/v1)",
80 | )
81 | parser.add_argument(
82 | "--model",
83 | default=None,
84 | help=f"Model name to use (default: {DEFAULT_MODEL_NAME}, or from config file)",
85 | )
86 | parser.add_argument(
87 | "--apikey",
88 | default=None,
89 | help="API key for the model API (default: from AUTOGLM_API_KEY or unset)",
90 | )
91 | parser.add_argument(
92 | "--host",
93 | default="127.0.0.1",
94 | help="Host to bind the server to (default: 127.0.0.1)",
95 | )
96 | parser.add_argument(
97 | "--port",
98 | type=int,
99 | default=None,
100 | help="Port to bind the server to (default: auto-find starting from 8000)",
101 | )
102 | parser.add_argument(
103 | "--reload",
104 | action="store_true",
105 | help="Enable auto-reload for development",
106 | )
107 | parser.add_argument(
108 | "--no-browser",
109 | action="store_true",
110 | help="Do not open browser automatically",
111 | )
112 | parser.add_argument(
113 | "--log-level",
114 | choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
115 | default="INFO",
116 | help="Console log level (default: INFO)",
117 | )
118 | parser.add_argument(
119 | "--log-file",
120 | default="logs/autoglm_{time:YYYY-MM-DD}.log",
121 | help="Log file path (default: logs/autoglm_{time:YYYY-MM-DD}.log)",
122 | )
123 | parser.add_argument(
124 | "--no-log-file",
125 | action="store_true",
126 | help="Disable file logging",
127 | )
128 |
129 | args = parser.parse_args()
130 |
131 | # Auto-find available port if not specified
132 | if args.port is None:
133 | try:
134 | args.port = find_available_port(start_port=8000, host=args.host)
135 | print(f"\nAuto-detected available port: {args.port}\n")
136 | except RuntimeError as e:
137 | print(f"\nError: {e}", file=sys.stderr)
138 | sys.exit(1)
139 |
140 | import uvicorn
141 |
142 | from AutoGLM_GUI import server
143 | from AutoGLM_GUI.config import config
144 | from AutoGLM_GUI.config_manager import config_manager
145 | from AutoGLM_GUI.logger import configure_logger
146 |
147 | # Configure logging system
148 | configure_logger(
149 | console_level=args.log_level,
150 | log_file=None if args.no_log_file else args.log_file,
151 | )
152 |
153 | # ==================== 配置系统初始化 ====================
154 | # 使用统一配置管理器(四层优先级:CLI > ENV > FILE > DEFAULT)
155 |
156 | # 1. 设置 CLI 参数配置(最高优先级)
157 | config_manager.set_cli_config(
158 | base_url=args.base_url, model_name=args.model, api_key=args.apikey
159 | )
160 |
161 | # 2. 加载环境变量配置
162 | config_manager.load_env_config()
163 |
164 | # 3. 加载配置文件
165 | config_manager.load_file_config()
166 |
167 | # 4. 获取合并后的有效配置
168 | effective_config = config_manager.get_effective_config()
169 |
170 | # 5. 同步到环境变量(reload 模式需要)
171 | config_manager.sync_to_env()
172 |
173 | # 6. 刷新旧的 config 对象(保持现有代码兼容)
174 | config.refresh_from_env()
175 |
176 | # 获取配置来源
177 | config_source = config_manager.get_config_source()
178 |
179 | # Display startup banner
180 | print()
181 | print("=" * 50)
182 | print(" AutoGLM-GUI - Phone Agent Web Interface")
183 | print("=" * 50)
184 | print(f" Version: {__version__}")
185 | print()
186 | print(f" Server: http://{args.host}:{args.port}")
187 | print()
188 | print(" Model Configuration:")
189 | print(f" Source: {config_source.value}")
190 | print(f" Base URL: {effective_config.base_url or '(not set)'}")
191 | print(f" Model: {effective_config.model_name}")
192 | if effective_config.api_key != "EMPTY":
193 | print(" API Key: (configured)")
194 | print()
195 |
196 | # Warning if base_url is not configured
197 | if not effective_config.base_url:
198 | print(" [!] WARNING: base_url is not configured!")
199 | print(" Please configure via frontend or use --base-url")
200 | print()
201 |
202 | print("=" * 50)
203 | print(" Press Ctrl+C to stop")
204 | print("=" * 50)
205 | print()
206 |
207 | # Open browser automatically unless disabled
208 | if not args.no_browser:
209 | open_browser(args.host, args.port)
210 |
211 | uvicorn.run(
212 | server.app if not args.reload else "AutoGLM_GUI.server:app",
213 | host=args.host,
214 | port=args.port,
215 | reload=args.reload,
216 | )
217 |
218 |
219 | if __name__ == "__main__":
220 | main()
221 |
--------------------------------------------------------------------------------
/frontend/src/components/DeviceSidebar.tsx:
--------------------------------------------------------------------------------
1 | import React, { useState, useEffect } from 'react';
2 | import { DeviceCard } from './DeviceCard';
3 | import type { Device } from '../api';
4 |
5 | // 初始状态从 localStorage 读取
6 | const getInitialCollapsedState = (): boolean => {
7 | try {
8 | const saved = localStorage.getItem('sidebar-collapsed');
9 | return saved !== null ? JSON.parse(saved) : false;
10 | } catch (error) {
11 | console.warn('Failed to load sidebar collapsed state:', error);
12 | return false;
13 | }
14 | };
15 |
16 | interface DeviceSidebarProps {
17 | devices: Device[];
18 | currentDeviceId: string;
19 | onSelectDevice: (deviceId: string) => void;
20 | onOpenConfig: () => void;
21 | onConnectWifi: (deviceId: string) => void;
22 | onDisconnectWifi: (deviceId: string) => void;
23 | }
24 |
25 | export function DeviceSidebar({
26 | devices,
27 | currentDeviceId,
28 | onSelectDevice,
29 | onOpenConfig,
30 | onConnectWifi,
31 | onDisconnectWifi,
32 | }: DeviceSidebarProps) {
33 | const [isCollapsed, setIsCollapsed] = useState(getInitialCollapsedState);
34 |
35 | useEffect(() => {
36 | localStorage.setItem('sidebar-collapsed', JSON.stringify(isCollapsed));
37 | }, [isCollapsed]);
38 |
39 | // 键盘快捷键支持
40 | useEffect(() => {
41 | const handleKeyDown = (event: KeyboardEvent) => {
42 | if ((event.metaKey || event.ctrlKey) && event.key === 'b') {
43 | event.preventDefault();
44 | setIsCollapsed(!isCollapsed);
45 | }
46 | };
47 | window.addEventListener('keydown', handleKeyDown);
48 | return () => window.removeEventListener('keydown', handleKeyDown);
49 | }, [isCollapsed]);
50 |
51 | const toggleCollapse = () => {
52 | setIsCollapsed(!isCollapsed);
53 | };
54 |
55 | return (
56 | <>
57 | {/* 半圆形展开按钮(当侧边栏隐藏时显示) */}
58 | {isCollapsed && (
59 |
78 | )}
79 |
80 | {/* 侧边栏主体 */}
81 |
84 | {/* 头部 */}
85 |
86 |
87 |
88 |
101 | 设备列表
102 |
103 |
104 | 共 {devices.length} 个设备
105 |
106 |
107 |
108 |
127 |
128 |
129 | {/* 设备列表 */}
130 |
131 | {devices.length === 0 ? (
132 |
133 |
146 |
未检测到设备
147 |
请连接 ADB 设备
148 |
149 | ) : (
150 | devices.map(device => (
151 |
onSelectDevice(device.id)}
160 | onConnectWifi={async () => {
161 | await onConnectWifi(device.id);
162 | }}
163 | onDisconnectWifi={async () => {
164 | await onDisconnectWifi(device.id);
165 | }}
166 | />
167 | ))
168 | )}
169 |
170 |
171 | {/* 底部操作栏 */}
172 |
173 |
198 |
199 |
200 | >
201 | );
202 | }
203 |
--------------------------------------------------------------------------------
/.github/workflows/pr-lint.yml:
--------------------------------------------------------------------------------
1 | name: PR Lint & Format Check
2 |
3 | on:
4 | pull_request:
5 | types: [opened, synchronize, reopened, ready_for_review]
6 | branches: [main, master]
7 |
8 | permissions:
9 | contents: read
10 | pull-requests: write # 用于在 PR 中评论结果
11 |
12 | jobs:
13 | lint-and-format:
14 | name: Lint & Format Check
15 | runs-on: ubuntu-latest
16 |
17 | steps:
18 | - name: Checkout code
19 | uses: actions/checkout@v4
20 |
21 | - name: Set up Python
22 | uses: actions/setup-python@v4
23 | with:
24 | python-version: '3.11'
25 |
26 | - name: Set up Node.js
27 | uses: actions/setup-node@v4
28 | with:
29 | node-version: '18'
30 |
31 | - name: Set up pnpm
32 | uses: pnpm/action-setup@v3
33 | with:
34 | version: 10
35 |
36 | - name: Get pnpm store directory
37 | shell: bash
38 | run: |
39 | echo "STORE_PATH=$(pnpm store path --silent)" >> $GITHUB_ENV
40 |
41 | - name: Set up pnpm cache
42 | uses: actions/cache@v4
43 | with:
44 | path: ${{ env.STORE_PATH }}
45 | key: ${{ runner.os }}-pnpm-store-${{ hashFiles('**/pnpm-lock.yaml') }}
46 | restore-keys: |
47 | ${{ runner.os }}-pnpm-store-
48 |
49 | - name: Install uv
50 | uses: astral-sh/setup-uv@v3
51 | with:
52 | version: "latest"
53 |
54 | - name: Install backend dependencies
55 | run: |
56 | echo "📦 Installing backend dependencies..."
57 | uv sync --dev
58 |
59 | - name: Install frontend dependencies
60 | run: |
61 | echo "📦 Installing frontend dependencies..."
62 | cd frontend
63 | pnpm install
64 | cd ..
65 |
66 | - name: Run unified lint script (check-only mode)
67 | run: |
68 | echo "🚀 Running unified lint script in check-only mode..."
69 | uv run python scripts/lint.py --check-only
70 |
71 | - name: Run backend lint and format check
72 | if: always()
73 | run: |
74 | echo "🐍 Checking backend Python code..."
75 | uv run ruff check --output-format=github
76 |
77 | echo "🎨 Checking backend Python format..."
78 | uv run ruff format --check --diff
79 |
80 | - name: Run frontend lint and format check
81 | if: always()
82 | run: |
83 | echo "📱 Checking frontend JavaScript/TypeScript code..."
84 | cd frontend
85 | pnpm lint
86 |
87 | echo "🎨 Checking frontend format..."
88 | pnpm format:check
89 |
90 | echo "🔷 Running TypeScript type check..."
91 | pnpm type-check
92 | cd ..
93 |
94 | - name: Check for uncommitted changes
95 | if: always()
96 | run: |
97 | if [ -n "$(git status --porcelain)" ]; then
98 | echo "❌ There are uncommitted changes after running formatters!"
99 | echo "Please run the following locally to fix:"
100 | echo " uv run python scripts/lint.py"
101 | echo ""
102 | echo "Or run the individual commands:"
103 | echo " Backend: uv run ruff check --fix && uv run ruff format"
104 | echo " Frontend: cd frontend && pnpm lint --fix && pnpm format"
105 | echo ""
106 | echo "Files with changes:"
107 | git status --porcelain
108 | exit 1
109 | else
110 | echo "✅ No formatting changes detected"
111 | fi
112 |
113 | - name: Comment PR with results
114 | # 只在非 fork PR 时尝试评论(fork PR 没有写权限)
115 | if: always() && github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
116 | continue-on-error: true # 即使评论失败也不阻塞 workflow
117 | uses: actions/github-script@v7
118 | with:
119 | script: |
120 | const { data: comments } = await github.rest.issues.listComments({
121 | owner: context.repo.owner,
122 | repo: context.repo.repo,
123 | issue_number: context.issue.number,
124 | });
125 |
126 | const botComment = comments.find(comment =>
127 | comment.user.type === 'Bot' &&
128 | comment.body.includes('🚀 Lint & Format Check Results')
129 | );
130 |
131 | const status = '${{ job.status }}' === 'success' ? '✅ PASSED' : '❌ FAILED';
132 | const color = '${{ job.status }}' === 'success' ? 'green' : 'red';
133 |
134 | const commentBody = `
135 | ## 🚀 Lint & Format Check Results: ${status}
136 |
137 | ### What was checked:
138 | - 🐍 **Backend**: Ruff linting + formatting
139 | - 📱 **Frontend**: ESLint + Prettier + TypeScript types
140 | - 🔧 **Unified**: Custom lint script validation
141 |
142 | ${'${{ job.status }}' === 'success' ? `
143 | ### ✅ All checks passed!
144 | Your code follows the project's style guidelines.
145 |
146 | ### Checks performed:
147 | - Backend Ruff lint ✅
148 | - Backend Ruff format ✅
149 | - Frontend ESLint ✅
150 | - Frontend Prettier format ✅
151 | - Frontend TypeScript types ✅
152 | - Unified lint script ✅
153 | ` : `
154 | ### ❌ Some checks failed!
155 |
156 | #### How to fix:
157 | 1. Run locally to auto-fix most issues:
158 | \`\`\`bash
159 | uv run python scripts/lint.py
160 | \`\`\`
161 |
162 | 2. Commit the fixes and push:
163 | \`\`\`bash
164 | git add .
165 | git commit -m "fix: apply lint and format fixes"
166 | git push
167 | \`\`\`
168 |
169 | 3. The checks will run again automatically.
170 |
171 | #### Individual fix commands:
172 | **Backend:**
173 | \`\`\`bash
174 | uv run ruff check --fix
175 | uv run ruff format
176 | \`\`\`
177 |
178 | **Frontend:**
179 | \`\`\`bash
180 | cd frontend
181 | pnpm lint --fix
182 | pnpm format
183 | cd ..
184 | \`\`\`
185 | `}
186 |
187 | ---
188 | *This is an automated check. For questions, ask in the PR discussion.*
189 | `;
190 |
191 | if (botComment) {
192 | await github.rest.issues.updateComment({
193 | owner: context.repo.owner,
194 | repo: context.repo.repo,
195 | comment_id: botComment.id,
196 | body: commentBody,
197 | });
198 | } else {
199 | await github.rest.issues.createComment({
200 | owner: context.repo.owner,
201 | repo: context.repo.repo,
202 | issue_number: context.issue.number,
203 | body: commentBody,
204 | });
205 | }
206 |
207 | # 构建任务 - 仅在 lint 通过后运行
208 | build:
209 | name: Build Check
210 | runs-on: ubuntu-latest
211 | needs: lint-and-format
212 | if: always() && needs.lint-and-format.result == 'success'
213 |
214 | steps:
215 | - name: Checkout code
216 | uses: actions/checkout@v4
217 |
218 | - name: Set up Node.js
219 | uses: actions/setup-node@v4
220 | with:
221 | node-version: '18'
222 |
223 | - name: Set up pnpm
224 | uses: pnpm/action-setup@v3
225 | with:
226 | version: 10
227 |
228 | - name: Get pnpm store directory
229 | shell: bash
230 | run: |
231 | echo "STORE_PATH=$(pnpm store path --silent)" >> $GITHUB_ENV
232 |
233 | - name: Set up pnpm cache
234 | uses: actions/cache@v4
235 | with:
236 | path: ${{ env.STORE_PATH }}
237 | key: ${{ runner.os }}-pnpm-store-${{ hashFiles('**/pnpm-lock.yaml') }}
238 | restore-keys: |
239 | ${{ runner.os }}-pnpm-store-
240 |
241 | - name: Install frontend dependencies
242 | run: |
243 | cd frontend
244 | pnpm install
245 |
246 | - name: Build frontend
247 | run: |
248 | cd frontend
249 | pnpm build
250 |
251 | - name: Upload build artifacts
252 | uses: actions/upload-artifact@v4
253 | with:
254 | name: frontend-dist
255 | path: frontend/dist/
256 | retention-days: 7
257 |
--------------------------------------------------------------------------------
/phone_agent/agent.py:
--------------------------------------------------------------------------------
1 | """Main PhoneAgent class for orchestrating phone automation."""
2 |
3 | import json
4 | import traceback
5 | from dataclasses import dataclass
6 | from typing import Any, Callable
7 |
8 | from phone_agent.actions import ActionHandler
9 | from phone_agent.actions.handler import finish, parse_action
10 | from phone_agent.adb import get_current_app, get_screenshot
11 | from phone_agent.config import get_messages, get_system_prompt
12 | from phone_agent.model import ModelClient, ModelConfig
13 | from phone_agent.model.client import MessageBuilder
14 |
15 |
16 | @dataclass
17 | class AgentConfig:
18 | """Configuration for the PhoneAgent."""
19 |
20 | max_steps: int = 100
21 | device_id: str | None = None
22 | lang: str = "cn"
23 | system_prompt: str | None = None
24 | verbose: bool = True
25 |
26 | def __post_init__(self):
27 | if self.system_prompt is None:
28 | self.system_prompt = get_system_prompt(self.lang)
29 |
30 |
31 | @dataclass
32 | class StepResult:
33 | """Result of a single agent step."""
34 |
35 | success: bool
36 | finished: bool
37 | action: dict[str, Any] | None
38 | thinking: str
39 | message: str | None = None
40 |
41 |
42 | class PhoneAgent:
43 | """
44 | AI-powered agent for automating Android phone interactions.
45 |
46 | The agent uses a vision-language model to understand screen content
47 | and decide on actions to complete user tasks.
48 |
49 | Args:
50 | model_config: Configuration for the AI model.
51 | agent_config: Configuration for the agent behavior.
52 | confirmation_callback: Optional callback for sensitive action confirmation.
53 | takeover_callback: Optional callback for takeover requests.
54 |
55 | Example:
56 | >>> from phone_agent import PhoneAgent
57 | >>> from phone_agent.model import ModelConfig
58 | >>>
59 | >>> model_config = ModelConfig(base_url="http://localhost:8000/v1")
60 | >>> agent = PhoneAgent(model_config)
61 | >>> agent.run("Open WeChat and send a message to John")
62 | """
63 |
64 | def __init__(
65 | self,
66 | model_config: ModelConfig | None = None,
67 | agent_config: AgentConfig | None = None,
68 | confirmation_callback: Callable[[str], bool] | None = None,
69 | takeover_callback: Callable[[str], None] | None = None,
70 | ):
71 | self.model_config = model_config or ModelConfig()
72 | self.agent_config = agent_config or AgentConfig()
73 |
74 | self.model_client = ModelClient(self.model_config)
75 | self.action_handler = ActionHandler(
76 | device_id=self.agent_config.device_id,
77 | confirmation_callback=confirmation_callback,
78 | takeover_callback=takeover_callback,
79 | )
80 |
81 | self._context: list[dict[str, Any]] = []
82 | self._step_count = 0
83 |
84 | def run(self, task: str) -> str:
85 | """
86 | Run the agent to complete a task.
87 |
88 | Args:
89 | task: Natural language description of the task.
90 |
91 | Returns:
92 | Final message from the agent.
93 | """
94 | self._context = []
95 | self._step_count = 0
96 |
97 | # First step with user prompt
98 | result = self._execute_step(task, is_first=True)
99 |
100 | if result.finished:
101 | return result.message or "Task completed"
102 |
103 | # Continue until finished or max steps reached
104 | while self._step_count < self.agent_config.max_steps:
105 | result = self._execute_step(is_first=False)
106 |
107 | if result.finished:
108 | return result.message or "Task completed"
109 |
110 | return "Max steps reached"
111 |
112 | def step(self, task: str | None = None) -> StepResult:
113 | """
114 | Execute a single step of the agent.
115 |
116 | Useful for manual control or debugging.
117 |
118 | Args:
119 | task: Task description (only needed for first step).
120 |
121 | Returns:
122 | StepResult with step details.
123 | """
124 | is_first = len(self._context) == 0
125 |
126 | if is_first and not task:
127 | raise ValueError("Task is required for the first step")
128 |
129 | return self._execute_step(task, is_first)
130 |
131 | def reset(self) -> None:
132 | """Reset the agent state for a new task."""
133 | self._context = []
134 | self._step_count = 0
135 |
136 | def _execute_step(
137 | self, user_prompt: str | None = None, is_first: bool = False
138 | ) -> StepResult:
139 | """Execute a single step of the agent loop."""
140 | self._step_count += 1
141 |
142 | # Capture current screen state
143 | screenshot = get_screenshot(self.agent_config.device_id)
144 | current_app = get_current_app(self.agent_config.device_id)
145 |
146 | # Build messages
147 | if is_first:
148 | self._context.append(
149 | MessageBuilder.create_system_message(self.agent_config.system_prompt)
150 | )
151 |
152 | screen_info = MessageBuilder.build_screen_info(current_app)
153 | text_content = f"{user_prompt}\n\n{screen_info}"
154 |
155 | self._context.append(
156 | MessageBuilder.create_user_message(
157 | text=text_content, image_base64=screenshot.base64_data
158 | )
159 | )
160 | else:
161 | screen_info = MessageBuilder.build_screen_info(current_app)
162 | text_content = f"** Screen Info **\n\n{screen_info}"
163 |
164 | self._context.append(
165 | MessageBuilder.create_user_message(
166 | text=text_content, image_base64=screenshot.base64_data
167 | )
168 | )
169 |
170 | # Get model response
171 | try:
172 | response = self.model_client.request(self._context)
173 | except Exception as e:
174 | if self.agent_config.verbose:
175 | traceback.print_exc()
176 | return StepResult(
177 | success=False,
178 | finished=True,
179 | action=None,
180 | thinking="",
181 | message=f"Model error: {e}",
182 | )
183 |
184 | # Parse action from response
185 | try:
186 | action = parse_action(response.action)
187 | except ValueError:
188 | if self.agent_config.verbose:
189 | traceback.print_exc()
190 | action = finish(message=response.action)
191 |
192 | if self.agent_config.verbose:
193 | # Print thinking process
194 | msgs = get_messages(self.agent_config.lang)
195 | print("\n" + "=" * 50)
196 | print(f"💭 {msgs['thinking']}:")
197 | print("-" * 50)
198 | print(response.thinking)
199 | print("-" * 50)
200 | print(f"🎯 {msgs['action']}:")
201 | print(json.dumps(action, ensure_ascii=False, indent=2))
202 | print("=" * 50 + "\n")
203 |
204 | # Remove image from context to save space
205 | self._context[-1] = MessageBuilder.remove_images_from_message(self._context[-1])
206 |
207 | # Execute action
208 | try:
209 | result = self.action_handler.execute(
210 | action, screenshot.width, screenshot.height
211 | )
212 | except Exception as e:
213 | if self.agent_config.verbose:
214 | traceback.print_exc()
215 | result = self.action_handler.execute(
216 | finish(message=str(e)), screenshot.width, screenshot.height
217 | )
218 |
219 | # Add assistant response to context
220 | self._context.append(
221 | MessageBuilder.create_assistant_message(
222 | f"{response.thinking}{response.action}"
223 | )
224 | )
225 |
226 | # Check if finished
227 | finished = action.get("_metadata") == "finish" or result.should_finish
228 |
229 | if finished and self.agent_config.verbose:
230 | msgs = get_messages(self.agent_config.lang)
231 | print("\n" + "🎉 " + "=" * 48)
232 | print(
233 | f"✅ {msgs['task_completed']}: {result.message or action.get('message', msgs['done'])}"
234 | )
235 | print("=" * 50 + "\n")
236 |
237 | return StepResult(
238 | success=result.success,
239 | finished=finished,
240 | action=action,
241 | thinking=response.thinking,
242 | message=result.message or action.get("message"),
243 | )
244 |
245 | @property
246 | def context(self) -> list[dict[str, Any]]:
247 | """Get the current conversation context."""
248 | return self._context.copy()
249 |
250 | @property
251 | def step_count(self) -> int:
252 | """Get the current step count."""
253 | return self._step_count
254 |
--------------------------------------------------------------------------------
/phone_agent/config/apps.py:
--------------------------------------------------------------------------------
1 | """App name to package name mapping for supported applications."""
2 |
3 | APP_PACKAGES: dict[str, str] = {
4 | # Social & Messaging
5 | "微信": "com.tencent.mm",
6 | "QQ": "com.tencent.mobileqq",
7 | "微博": "com.sina.weibo",
8 | # E-commerce
9 | "淘宝": "com.taobao.taobao",
10 | "京东": "com.jingdong.app.mall",
11 | "拼多多": "com.xunmeng.pinduoduo",
12 | "淘宝闪购": "com.taobao.taobao",
13 | "京东秒送": "com.jingdong.app.mall",
14 | # Lifestyle & Social
15 | "小红书": "com.xingin.xhs",
16 | "豆瓣": "com.douban.frodo",
17 | "知乎": "com.zhihu.android",
18 | # Maps & Navigation
19 | "高德地图": "com.autonavi.minimap",
20 | "百度地图": "com.baidu.BaiduMap",
21 | # Food & Services
22 | "美团": "com.sankuai.meituan",
23 | "大众点评": "com.dianping.v1",
24 | "饿了么": "me.ele",
25 | "肯德基": "com.yek.android.kfc.activitys",
26 | # Travel
27 | "携程": "ctrip.android.view",
28 | "铁路12306": "com.MobileTicket",
29 | "12306": "com.MobileTicket",
30 | "去哪儿": "com.Qunar",
31 | "去哪儿旅行": "com.Qunar",
32 | "滴滴出行": "com.sdu.did.psnger",
33 | # Video & Entertainment
34 | "bilibili": "tv.danmaku.bili",
35 | "抖音": "com.ss.android.ugc.aweme",
36 | "快手": "com.smile.gifmaker",
37 | "腾讯视频": "com.tencent.qqlive",
38 | "爱奇艺": "com.qiyi.video",
39 | "优酷视频": "com.youku.phone",
40 | "芒果TV": "com.hunantv.imgo.activity",
41 | "红果短剧": "com.phoenix.read",
42 | # Music & Audio
43 | "网易云音乐": "com.netease.cloudmusic",
44 | "QQ音乐": "com.tencent.qqmusic",
45 | "汽水音乐": "com.luna.music",
46 | "喜马拉雅": "com.ximalaya.ting.android",
47 | # Reading
48 | "番茄小说": "com.dragon.read",
49 | "番茄免费小说": "com.dragon.read",
50 | "七猫免费小说": "com.kmxs.reader",
51 | # Productivity
52 | "飞书": "com.ss.android.lark",
53 | "QQ邮箱": "com.tencent.androidqqmail",
54 | # AI & Tools
55 | "豆包": "com.larus.nova",
56 | # Health & Fitness
57 | "keep": "com.gotokeep.keep",
58 | "美柚": "com.lingan.seeyou",
59 | # News & Information
60 | "腾讯新闻": "com.tencent.news",
61 | "今日头条": "com.ss.android.article.news",
62 | # Real Estate
63 | "贝壳找房": "com.lianjia.beike",
64 | "安居客": "com.anjuke.android.app",
65 | # Finance
66 | "同花顺": "com.hexin.plat.android",
67 | # Games
68 | "星穹铁道": "com.miHoYo.hkrpg",
69 | "崩坏:星穹铁道": "com.miHoYo.hkrpg",
70 | "恋与深空": "com.papegames.lysk.cn",
71 | "AndroidSystemSettings": "com.android.settings",
72 | "Android System Settings": "com.android.settings",
73 | "Android System Settings": "com.android.settings",
74 | "Android-System-Settings": "com.android.settings",
75 | "Settings": "com.android.settings",
76 | "AudioRecorder": "com.android.soundrecorder",
77 | "audiorecorder": "com.android.soundrecorder",
78 | "Bluecoins": "com.rammigsoftware.bluecoins",
79 | "bluecoins": "com.rammigsoftware.bluecoins",
80 | "Broccoli": "com.flauschcode.broccoli",
81 | "broccoli": "com.flauschcode.broccoli",
82 | "Booking.com": "com.booking",
83 | "Booking": "com.booking",
84 | "booking.com": "com.booking",
85 | "booking": "com.booking",
86 | "BOOKING.COM": "com.booking",
87 | "Chrome": "com.android.chrome",
88 | "chrome": "com.android.chrome",
89 | "Google Chrome": "com.android.chrome",
90 | "Clock": "com.android.deskclock",
91 | "clock": "com.android.deskclock",
92 | "Contacts": "com.android.contacts",
93 | "contacts": "com.android.contacts",
94 | "Duolingo": "com.duolingo",
95 | "duolingo": "com.duolingo",
96 | "Expedia": "com.expedia.bookings",
97 | "expedia": "com.expedia.bookings",
98 | "Files": "com.android.fileexplorer",
99 | "files": "com.android.fileexplorer",
100 | "File Manager": "com.android.fileexplorer",
101 | "file manager": "com.android.fileexplorer",
102 | "gmail": "com.google.android.gm",
103 | "Gmail": "com.google.android.gm",
104 | "GoogleMail": "com.google.android.gm",
105 | "Google Mail": "com.google.android.gm",
106 | "GoogleFiles": "com.google.android.apps.nbu.files",
107 | "googlefiles": "com.google.android.apps.nbu.files",
108 | "FilesbyGoogle": "com.google.android.apps.nbu.files",
109 | "GoogleCalendar": "com.google.android.calendar",
110 | "Google-Calendar": "com.google.android.calendar",
111 | "Google Calendar": "com.google.android.calendar",
112 | "google-calendar": "com.google.android.calendar",
113 | "google calendar": "com.google.android.calendar",
114 | "GoogleChat": "com.google.android.apps.dynamite",
115 | "Google Chat": "com.google.android.apps.dynamite",
116 | "Google-Chat": "com.google.android.apps.dynamite",
117 | "GoogleClock": "com.google.android.deskclock",
118 | "Google Clock": "com.google.android.deskclock",
119 | "Google-Clock": "com.google.android.deskclock",
120 | "GoogleContacts": "com.google.android.contacts",
121 | "Google-Contacts": "com.google.android.contacts",
122 | "Google Contacts": "com.google.android.contacts",
123 | "google-contacts": "com.google.android.contacts",
124 | "google contacts": "com.google.android.contacts",
125 | "GoogleDocs": "com.google.android.apps.docs.editors.docs",
126 | "Google Docs": "com.google.android.apps.docs.editors.docs",
127 | "googledocs": "com.google.android.apps.docs.editors.docs",
128 | "google docs": "com.google.android.apps.docs.editors.docs",
129 | "Google Drive": "com.google.android.apps.docs",
130 | "Google-Drive": "com.google.android.apps.docs",
131 | "google drive": "com.google.android.apps.docs",
132 | "google-drive": "com.google.android.apps.docs",
133 | "GoogleDrive": "com.google.android.apps.docs",
134 | "Googledrive": "com.google.android.apps.docs",
135 | "googledrive": "com.google.android.apps.docs",
136 | "GoogleFit": "com.google.android.apps.fitness",
137 | "googlefit": "com.google.android.apps.fitness",
138 | "GoogleKeep": "com.google.android.keep",
139 | "googlekeep": "com.google.android.keep",
140 | "GoogleMaps": "com.google.android.apps.maps",
141 | "Google Maps": "com.google.android.apps.maps",
142 | "googlemaps": "com.google.android.apps.maps",
143 | "google maps": "com.google.android.apps.maps",
144 | "Google Play Books": "com.google.android.apps.books",
145 | "Google-Play-Books": "com.google.android.apps.books",
146 | "google play books": "com.google.android.apps.books",
147 | "google-play-books": "com.google.android.apps.books",
148 | "GooglePlayBooks": "com.google.android.apps.books",
149 | "googleplaybooks": "com.google.android.apps.books",
150 | "GooglePlayStore": "com.android.vending",
151 | "Google Play Store": "com.android.vending",
152 | "Google-Play-Store": "com.android.vending",
153 | "GoogleSlides": "com.google.android.apps.docs.editors.slides",
154 | "Google Slides": "com.google.android.apps.docs.editors.slides",
155 | "Google-Slides": "com.google.android.apps.docs.editors.slides",
156 | "GoogleTasks": "com.google.android.apps.tasks",
157 | "Google Tasks": "com.google.android.apps.tasks",
158 | "Google-Tasks": "com.google.android.apps.tasks",
159 | "Joplin": "net.cozic.joplin",
160 | "joplin": "net.cozic.joplin",
161 | "McDonald": "com.mcdonalds.app",
162 | "mcdonald": "com.mcdonalds.app",
163 | "Osmand": "net.osmand",
164 | "osmand": "net.osmand",
165 | "PiMusicPlayer": "com.Project100Pi.themusicplayer",
166 | "pimusicplayer": "com.Project100Pi.themusicplayer",
167 | "Quora": "com.quora.android",
168 | "quora": "com.quora.android",
169 | "Reddit": "com.reddit.frontpage",
170 | "reddit": "com.reddit.frontpage",
171 | "RetroMusic": "code.name.monkey.retromusic",
172 | "retromusic": "code.name.monkey.retromusic",
173 | "SimpleCalendarPro": "com.scientificcalculatorplus.simplecalculator.basiccalculator.mathcalc",
174 | "SimpleSMSMessenger": "com.simplemobiletools.smsmessenger",
175 | "Telegram": "org.telegram.messenger",
176 | "temu": "com.einnovation.temu",
177 | "Temu": "com.einnovation.temu",
178 | "Tiktok": "com.zhiliaoapp.musically",
179 | "tiktok": "com.zhiliaoapp.musically",
180 | "Twitter": "com.twitter.android",
181 | "twitter": "com.twitter.android",
182 | "X": "com.twitter.android",
183 | "VLC": "org.videolan.vlc",
184 | "WeChat": "com.tencent.mm",
185 | "wechat": "com.tencent.mm",
186 | "Whatsapp": "com.whatsapp",
187 | "WhatsApp": "com.whatsapp",
188 | }
189 |
190 |
191 | def get_package_name(app_name: str) -> str | None:
192 | """
193 | Get the package name for an app.
194 |
195 | Args:
196 | app_name: The display name of the app.
197 |
198 | Returns:
199 | The Android package name, or None if not found.
200 | """
201 | return APP_PACKAGES.get(app_name)
202 |
203 |
204 | def get_app_name(package_name: str) -> str | None:
205 | """
206 | Get the app name from a package name.
207 |
208 | Args:
209 | package_name: The Android package name.
210 |
211 | Returns:
212 | The display name of the app, or None if not found.
213 | """
214 | for name, package in APP_PACKAGES.items():
215 | if package == package_name:
216 | return name
217 | return None
218 |
219 |
220 | def list_supported_apps() -> list[str]:
221 | """
222 | Get a list of all supported app names.
223 |
224 | Returns:
225 | List of app names.
226 | """
227 | return list(APP_PACKAGES.keys())
228 |
--------------------------------------------------------------------------------
/frontend/src/components/DeviceCard.tsx:
--------------------------------------------------------------------------------
1 | import React, { useState } from 'react';
2 | import { ConfirmDialog } from './ConfirmDialog';
3 |
4 | interface DeviceCardProps {
5 | id: string;
6 | model: string;
7 | status: string;
8 | connectionType?: string;
9 | isInitialized: boolean;
10 | isActive: boolean;
11 | onClick: () => void;
12 | onConnectWifi?: () => Promise;
13 | onDisconnectWifi?: () => Promise;
14 | }
15 |
16 | export function DeviceCard({
17 | id,
18 | model,
19 | status,
20 | connectionType,
21 | isInitialized,
22 | isActive,
23 | onClick,
24 | onConnectWifi,
25 | onDisconnectWifi,
26 | }: DeviceCardProps) {
27 | const isOnline = status === 'device';
28 | const isUsb = connectionType === 'usb';
29 | const isRemote = connectionType === 'remote';
30 | const [loading, setLoading] = useState(false);
31 | const [showWifiConfirm, setShowWifiConfirm] = useState(false);
32 | const [showDisconnectConfirm, setShowDisconnectConfirm] = useState(false);
33 |
34 | const handleWifiClick = (e: React.MouseEvent) => {
35 | e.stopPropagation();
36 | if (loading || !onConnectWifi) return;
37 | setShowWifiConfirm(true);
38 | };
39 |
40 | const handleDisconnectClick = (e: React.MouseEvent) => {
41 | e.stopPropagation();
42 | if (loading || !onDisconnectWifi) return;
43 | setShowDisconnectConfirm(true);
44 | };
45 |
46 | const handleConfirmWifi = async () => {
47 | setShowWifiConfirm(false);
48 | setLoading(true);
49 | try {
50 | if (onConnectWifi) {
51 | await onConnectWifi();
52 | }
53 | } finally {
54 | setLoading(false);
55 | }
56 | };
57 |
58 | const handleConfirmDisconnect = async () => {
59 | setShowDisconnectConfirm(false);
60 | setLoading(true);
61 | try {
62 | if (onDisconnectWifi) {
63 | await onDisconnectWifi();
64 | }
65 | } finally {
66 | setLoading(false);
67 | }
68 | };
69 |
70 | return (
71 | <>
72 | {
77 | if (e.key === 'Enter' || e.key === ' ') {
78 | onClick();
79 | }
80 | }}
81 | className={`w-full text-left px-4 py-3 rounded-xl transition-all duration-300 cursor-pointer border relative group ${
82 | isActive
83 | ? 'bg-blue-500 border-blue-500 text-white shadow-lg shadow-blue-500/20'
84 | : 'bg-white dark:bg-gray-800 border-transparent hover:bg-gray-50 dark:hover:bg-gray-700/50'
85 | }`}
86 | >
87 |
88 | {/* 状态指示器 */}
89 |
97 |
98 | {/* 设备信息 */}
99 |
100 |
101 |
106 | {model || '未知设备'}
107 |
108 |
109 |
114 | {id}
115 |
116 |
117 |
118 | {/* 操作按钮区 */}
119 |
120 | {isUsb && onConnectWifi && (
121 |
169 | )}
170 |
171 | {isRemote && onDisconnectWifi && (
172 |
220 | )}
221 |
222 |
223 | {/* 初始化状态标识 */}
224 | {isInitialized && (
225 |
247 | )}
248 |
249 |
250 |
251 | setShowWifiConfirm(false)}
257 | />
258 |
259 | setShowDisconnectConfirm(false)}
265 | />
266 | >
267 | );
268 | }
269 |
--------------------------------------------------------------------------------
/scripts/release.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """Release script for AutoGLM-GUI.
3 |
4 | This script automates the release process:
5 | 1. Bumps the version in pyproject.toml and electron/package.json
6 | 2. Commits the changes with git
7 | 3. Creates a git tag for the new version
8 |
9 | Usage:
10 | uv run python scripts/release.py # Bump patch version (0.1.3 -> 0.1.4)
11 | uv run python scripts/release.py --minor # Bump minor version (0.1.3 -> 0.2.0)
12 | uv run python scripts/release.py --major # Bump major version (0.1.3 -> 1.0.0)
13 | uv run python scripts/release.py --version 1.2.3 # Set specific version
14 | """
15 |
16 | import argparse
17 | import json
18 | import re
19 | import subprocess
20 | import sys
21 | from pathlib import Path
22 |
23 | ROOT_DIR = Path(__file__).parent.parent
24 | PYPROJECT_PATH = ROOT_DIR / "pyproject.toml"
25 | ELECTRON_PACKAGE_JSON_PATH = ROOT_DIR / "electron" / "package.json"
26 |
27 |
28 | def get_current_version() -> str:
29 | """Extract current version from pyproject.toml."""
30 | if not PYPROJECT_PATH.exists():
31 | print(f"Error: {PYPROJECT_PATH} not found.")
32 | sys.exit(1)
33 |
34 | content = PYPROJECT_PATH.read_text()
35 | match = re.search(r'^version\s*=\s*"([^"]+)"', content, re.MULTILINE)
36 |
37 | if not match:
38 | print("Error: Could not find version in pyproject.toml")
39 | sys.exit(1)
40 |
41 | return match.group(1)
42 |
43 |
44 | def parse_version(version: str) -> tuple[int, int, int]:
45 | """Parse version string into (major, minor, patch) tuple."""
46 | match = re.match(r"^(\d+)\.(\d+)\.(\d+)$", version)
47 | if not match:
48 | print(f"Error: Invalid version format: {version}")
49 | sys.exit(1)
50 |
51 | return int(match.group(1)), int(match.group(2)), int(match.group(3))
52 |
53 |
54 | def bump_version(
55 | current: str, bump_type: str = "patch", target_version: str | None = None
56 | ) -> str:
57 | """Bump version number based on bump type or return target version."""
58 | if target_version:
59 | parse_version(target_version)
60 | return target_version
61 |
62 | major, minor, patch = parse_version(current)
63 |
64 | if bump_type == "major":
65 | return f"{major + 1}.0.0"
66 | elif bump_type == "minor":
67 | return f"{major}.{minor + 1}.0"
68 | elif bump_type == "patch":
69 | return f"{major}.{minor}.{patch + 1}"
70 | else:
71 | print(f"Error: Invalid bump type: {bump_type}")
72 | sys.exit(1)
73 |
74 |
75 | def update_pyproject_version(new_version: str) -> bool:
76 | """Update version in pyproject.toml."""
77 | print(f"Updating pyproject.toml to version {new_version}...")
78 |
79 | content = PYPROJECT_PATH.read_text()
80 | new_content = re.sub(
81 | r'^version\s*=\s*"[^"]+"',
82 | f'version = "{new_version}"',
83 | content,
84 | flags=re.MULTILINE,
85 | )
86 |
87 | if content == new_content:
88 | print("Error: Failed to update version in pyproject.toml")
89 | return False
90 |
91 | PYPROJECT_PATH.write_text(new_content)
92 | print(f'Updated pyproject.toml: version = "{new_version}"')
93 | return True
94 |
95 |
96 | def update_electron_package_json_version(new_version: str) -> bool:
97 | """Update version in electron/package.json."""
98 | print(f"Updating electron/package.json to version {new_version}...")
99 |
100 | if not ELECTRON_PACKAGE_JSON_PATH.exists():
101 | print(f"Warning: {ELECTRON_PACKAGE_JSON_PATH} not found, skipping...")
102 | return True
103 |
104 | try:
105 | # Read and parse JSON
106 | content = ELECTRON_PACKAGE_JSON_PATH.read_text(encoding="utf-8")
107 | package_data = json.loads(content)
108 |
109 | # Update version
110 | package_data["version"] = new_version
111 |
112 | # Write back with pretty formatting
113 | ELECTRON_PACKAGE_JSON_PATH.write_text(
114 | json.dumps(package_data, indent=2, ensure_ascii=False) + "\n",
115 | encoding="utf-8",
116 | )
117 |
118 | print(f'Updated electron/package.json: "version": "{new_version}"')
119 | return True
120 |
121 | except json.JSONDecodeError as e:
122 | print(f"Error: Failed to parse {ELECTRON_PACKAGE_JSON_PATH}: {e}")
123 | return False
124 | except Exception as e:
125 | print(f"Error: Failed to update {ELECTRON_PACKAGE_JSON_PATH}: {e}")
126 | return False
127 |
128 |
129 | def git_commit_version(version: str, dry_run: bool = False) -> bool:
130 | """Commit version bumps in pyproject.toml and electron/package.json."""
131 | print("Committing version bump to git...")
132 |
133 | if dry_run:
134 | print("[DRY RUN] Would run: git add pyproject.toml electron/package.json")
135 | print(f'[DRY RUN] Would run: git commit -m "release v{version}"')
136 | return True
137 |
138 | try:
139 | # Stage pyproject.toml and electron/package.json
140 | result = subprocess.run(
141 | ["git", "add", "pyproject.toml", "electron/package.json", "uv.lock"],
142 | cwd=ROOT_DIR,
143 | capture_output=True,
144 | text=True,
145 | )
146 |
147 | if result.returncode != 0:
148 | print(f"Error staging files: {result.stderr}")
149 | return False
150 |
151 | # Commit the change
152 | result = subprocess.run(
153 | ["git", "commit", "-m", f"release v{version}"],
154 | cwd=ROOT_DIR,
155 | capture_output=True,
156 | text=True,
157 | )
158 |
159 | if result.returncode != 0:
160 | print(f"Error creating commit: {result.stderr}")
161 | return False
162 |
163 | print(f"Committed: release v{version}")
164 | return True
165 |
166 | except Exception as e:
167 | print(f"Error: {e}")
168 | return False
169 |
170 |
171 | def create_git_tag(version: str, dry_run: bool = False) -> bool:
172 | """Create git tag."""
173 | tag_name = f"v{version}"
174 |
175 | print(f"Creating git tag: {tag_name}...")
176 |
177 | if dry_run:
178 | print(f"[DRY RUN] Would create tag: {tag_name}")
179 | return True
180 |
181 | try:
182 | result = subprocess.run(
183 | ["git", "tag", "-a", tag_name, "-m", f"release {tag_name}"],
184 | cwd=ROOT_DIR,
185 | capture_output=True,
186 | text=True,
187 | )
188 |
189 | if result.returncode != 0:
190 | print(f"Error creating tag: {result.stderr}")
191 | return False
192 |
193 | print(f"Created tag: {tag_name}")
194 | return True
195 |
196 | except Exception as e:
197 | print(f"Error: {e}")
198 | return False
199 |
200 |
201 | def run_uv_sync() -> bool:
202 | """Run uv sync to synchronize dependencies."""
203 | print("Running uv sync...")
204 |
205 | try:
206 | result = subprocess.run(
207 | ["uv", "sync"],
208 | cwd=ROOT_DIR,
209 | capture_output=True,
210 | text=True,
211 | )
212 |
213 | if result.returncode != 0:
214 | print(f"Error running uv sync: {result.stderr}")
215 | return False
216 |
217 | print("Dependencies synchronized successfully.")
218 | return True
219 |
220 | except Exception as e:
221 | print(f"Error: {e}")
222 | return False
223 |
224 |
225 | def main() -> int:
226 | """Main release process."""
227 | parser = argparse.ArgumentParser(
228 | description="Release AutoGLM-GUI with version bump"
229 | )
230 | bump_group = parser.add_mutually_exclusive_group()
231 | bump_group.add_argument(
232 | "--major", action="store_true", help="Bump major version (X.0.0)"
233 | )
234 | bump_group.add_argument(
235 | "--minor", action="store_true", help="Bump minor version (x.X.0)"
236 | )
237 | bump_group.add_argument(
238 | "--patch", action="store_true", help="Bump patch version (x.x.X) [default]"
239 | )
240 | bump_group.add_argument(
241 | "--version", type=str, help="Set specific version (e.g., 1.2.3)"
242 | )
243 | parser.add_argument(
244 | "--dry-run",
245 | action="store_true",
246 | help="Show what would be done without making changes",
247 | )
248 |
249 | args = parser.parse_args()
250 |
251 | print("=" * 50)
252 | print("AutoGLM-GUI Release Script")
253 | print("=" * 50)
254 |
255 | current_version = get_current_version()
256 | print(f"Current version: {current_version}")
257 |
258 | if args.major:
259 | bump_type = "major"
260 | elif args.minor:
261 | bump_type = "minor"
262 | else:
263 | bump_type = "patch"
264 |
265 | new_version = bump_version(current_version, bump_type, args.version)
266 | print(f"New version: {new_version}")
267 | print()
268 |
269 | if args.dry_run:
270 | print("[DRY RUN] No changes will be made")
271 | print()
272 |
273 | if not args.dry_run:
274 | # Update pyproject.toml
275 | if not update_pyproject_version(new_version):
276 | return 1
277 |
278 | # Update electron/package.json
279 | if not update_electron_package_json_version(new_version):
280 | return 1
281 | print()
282 |
283 | ## run uv sync
284 | if not args.dry_run:
285 | if not run_uv_sync():
286 | return 1
287 |
288 | if not git_commit_version(new_version, dry_run=args.dry_run):
289 | return 1
290 | print()
291 |
292 | if not create_git_tag(new_version, dry_run=args.dry_run):
293 | return 1
294 |
295 | print()
296 | print("=" * 50)
297 | if args.dry_run:
298 | print("Dry run completed!")
299 | else:
300 | print("Release completed successfully!")
301 | print()
302 | print("Next steps:")
303 | print(" 1. Push changes: git push && git push origin v" + new_version)
304 | print(" 2. Build package: uv run python scripts/build.py --pack")
305 | print(" 3. Publish to PyPI: uv publish")
306 | print("=" * 50)
307 |
308 | return 0
309 |
310 |
311 | if __name__ == "__main__":
312 | sys.exit(main())
313 |
--------------------------------------------------------------------------------
/scripts/build_electron.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | AutoGLM-GUI Electron 一键构建脚本
4 |
5 | 功能:
6 | 1. 检查环境依赖
7 | 2. 同步 Python 开发依赖
8 | 3. 构建前端
9 | 4. 下载 ADB 工具
10 | 5. 打包 Python 后端
11 | 6. 构建 Electron 应用
12 |
13 | 用法:
14 | uv run python scripts/build_electron.py [--skip-frontend] [--skip-adb] [--skip-backend]
15 | """
16 |
17 | import argparse
18 | import platform
19 | import shutil
20 | import subprocess
21 | import sys
22 | from pathlib import Path
23 |
24 | # 修复 Windows 编码问题
25 | if sys.platform == "win32":
26 | import codecs
27 |
28 | sys.stdout = codecs.getwriter("utf-8")(sys.stdout.buffer, "strict")
29 | sys.stderr = codecs.getwriter("utf-8")(sys.stderr.buffer, "strict")
30 |
31 |
32 | class Color:
33 | """终端颜色"""
34 |
35 | RESET = "\033[0m"
36 | BOLD = "\033[1m"
37 | RED = "\033[91m"
38 | GREEN = "\033[92m"
39 | YELLOW = "\033[93m"
40 | BLUE = "\033[94m"
41 | CYAN = "\033[96m"
42 |
43 |
44 | def print_step(step: str, total: int, current: int):
45 | """打印步骤信息"""
46 | print(f"\n{Color.CYAN}{Color.BOLD}[{current}/{total}] {step}{Color.RESET}")
47 | print("=" * 60)
48 |
49 |
50 | def print_success(message: str):
51 | """打印成功信息"""
52 | print(f"{Color.GREEN}✓ {message}{Color.RESET}")
53 |
54 |
55 | def print_error(message: str):
56 | """打印错误信息"""
57 | print(f"{Color.RED}✗ {message}{Color.RESET}", file=sys.stderr)
58 |
59 |
60 | def print_warning(message: str):
61 | """打印警告信息"""
62 | print(f"{Color.YELLOW}⚠ {message}{Color.RESET}")
63 |
64 |
65 | def run_command(cmd: list[str], cwd: Path | None = None, check: bool = True) -> bool:
66 | """执行命令"""
67 | cmd_str = " ".join(str(c) for c in cmd)
68 | print(f"{Color.BLUE}$ {cmd_str}{Color.RESET}")
69 |
70 | try:
71 | # Windows 下 pnpm/npm 等命令需要通过 shell 执行
72 | use_shell = sys.platform == "win32" and cmd[0] in ["pnpm", "npm"]
73 |
74 | result = subprocess.run(
75 | cmd, cwd=cwd, check=check, capture_output=False, text=True, shell=use_shell
76 | )
77 | return result.returncode == 0
78 | except subprocess.CalledProcessError as e:
79 | print_error(f"命令执行失败: {e}")
80 | return False
81 | except FileNotFoundError:
82 | print_error(f"命令未找到: {cmd[0]}")
83 | return False
84 |
85 |
86 | def check_command(cmd: str) -> bool:
87 | """检查命令是否可用"""
88 | try:
89 | # Windows 下某些命令(如 pnpm)需要通过 shell 执行
90 | subprocess.run(
91 | [cmd, "--version"],
92 | capture_output=True,
93 | check=True,
94 | shell=(sys.platform == "win32"),
95 | )
96 | return True
97 | except (subprocess.CalledProcessError, FileNotFoundError):
98 | return False
99 |
100 |
101 | class ElectronBuilder:
102 | def __init__(self, args):
103 | self.args = args
104 | self.root_dir = Path(__file__).parent.parent
105 | self.frontend_dir = self.root_dir / "frontend"
106 | self.scripts_dir = self.root_dir / "scripts"
107 | self.electron_dir = self.root_dir / "electron"
108 | self.resources_dir = self.root_dir / "resources"
109 |
110 | # 平台信息
111 | self.platform = platform.system().lower()
112 | self.is_windows = self.platform == "windows"
113 | self.is_macos = self.platform == "darwin"
114 | self.is_linux = self.platform == "linux"
115 |
116 | def check_environment(self) -> bool:
117 | """检查环境依赖"""
118 | print_step("检查环境依赖", 7, 1)
119 |
120 | required_tools = {
121 | "uv": "Python 包管理器",
122 | "node": "Node.js 运行时",
123 | "pnpm": "pnpm 包管理器",
124 | }
125 |
126 | missing_tools = []
127 | for tool, description in required_tools.items():
128 | if check_command(tool):
129 | print_success(f"{description} ({tool}) 已安装")
130 | else:
131 | print_error(f"{description} ({tool}) 未安装")
132 | missing_tools.append(tool)
133 |
134 | if missing_tools:
135 | print_error(f"\n缺少必需工具: {', '.join(missing_tools)}")
136 | print("\n安装指南:")
137 | if "uv" in missing_tools:
138 | print(" uv: curl -LsSf https://astral.sh/uv/install.sh | sh")
139 | if "node" in missing_tools:
140 | print(" Node.js: https://nodejs.org/")
141 | if "pnpm" in missing_tools:
142 | print(" pnpm: npm install -g pnpm")
143 | return False
144 |
145 | return True
146 |
147 | def sync_python_deps(self) -> bool:
148 | """同步 Python 开发依赖"""
149 | print_step("同步 Python 开发依赖", 7, 2)
150 | return run_command(["uv", "sync", "--dev"], cwd=self.root_dir)
151 |
152 | def build_frontend(self) -> bool:
153 | """构建前端"""
154 | print_step("构建前端", 7, 3)
155 |
156 | # 安装前端依赖
157 | print("\n安装前端依赖...")
158 | if not run_command(["pnpm", "install"], cwd=self.frontend_dir):
159 | return False
160 |
161 | # 构建前端
162 | print("\n构建前端代码...")
163 | if not run_command(["pnpm", "build"], cwd=self.frontend_dir):
164 | return False
165 |
166 | # 复制前端构建产物到后端 static 目录
167 | print("\n复制前端到后端...")
168 | frontend_dist = self.frontend_dir / "dist"
169 | backend_static = self.root_dir / "AutoGLM_GUI" / "static"
170 |
171 | if backend_static.exists():
172 | shutil.rmtree(backend_static)
173 |
174 | shutil.copytree(frontend_dist, backend_static)
175 | print_success(f"前端已复制到 {backend_static}")
176 |
177 | return True
178 |
179 | def download_adb(self) -> bool:
180 | """下载 ADB 工具"""
181 | print_step("下载 ADB 工具", 7, 4)
182 |
183 | # 确定要下载的平台
184 | platforms = []
185 | if self.is_windows:
186 | platforms.append("windows")
187 | elif self.is_macos:
188 | platforms.extend(["darwin", "windows"]) # macOS 上构建两个平台
189 | elif self.is_linux:
190 | platforms.append("linux") # Linux 下载自己的 ADB
191 | else:
192 | print_warning(f"未知平台 {self.platform},跳过 ADB 下载")
193 | return True
194 |
195 | # 下载 ADB
196 | for plat in platforms:
197 | print(f"\n下载 {plat} ADB...")
198 | if not run_command(
199 | ["uv", "run", "python", "scripts/download_adb.py", plat],
200 | cwd=self.root_dir,
201 | ):
202 | return False
203 |
204 | return True
205 |
206 | def build_backend(self) -> bool:
207 | """打包 Python 后端"""
208 | print_step("打包 Python 后端", 7, 5)
209 |
210 | # 运行 PyInstaller
211 | print("\n运行 PyInstaller...")
212 | if not run_command(["pyinstaller", "autoglm.spec"], cwd=self.scripts_dir):
213 | return False
214 |
215 | # 复制到 resources/backend
216 | print("\n复制后端到 resources...")
217 | backend_dist = self.scripts_dir / "dist" / "autoglm-gui"
218 | backend_resources = self.resources_dir / "backend"
219 |
220 | if backend_resources.exists():
221 | shutil.rmtree(backend_resources)
222 |
223 | shutil.copytree(backend_dist, backend_resources)
224 | print_success(f"后端已复制到 {backend_resources}")
225 |
226 | return True
227 |
228 | def build_electron(self) -> bool:
229 | """构建 Electron 应用"""
230 | print_step("安装 Electron 依赖", 7, 6)
231 |
232 | # 安装 Electron 依赖
233 | if not run_command(["npm", "install"], cwd=self.electron_dir):
234 | return False
235 |
236 | print_step("构建 Electron 应用", 7, 7)
237 |
238 | # 构建 Electron (明确指定不发布)
239 | if not run_command(["npm", "run", "build", "--", "--publish", "never"], cwd=self.electron_dir):
240 | return False
241 |
242 | # 显示构建产物
243 | print("\n" + "=" * 60)
244 | print(f"{Color.GREEN}{Color.BOLD}✓ 构建完成!{Color.RESET}")
245 | print("=" * 60)
246 |
247 | dist_dir = self.electron_dir / "dist"
248 | if dist_dir.exists():
249 | print(f"\n构建产物位置: {dist_dir}")
250 | print("\n文件列表:")
251 | for item in sorted(dist_dir.iterdir()):
252 | if item.is_file():
253 | size = item.stat().st_size / (1024 * 1024)
254 | print(f" - {item.name} ({size:.1f} MB)")
255 | elif item.is_dir() and not item.name.startswith("."):
256 | print(f" - {item.name}/ (目录)")
257 |
258 | return True
259 |
260 | def build(self) -> bool:
261 | """执行完整构建流程"""
262 | print(f"\n{Color.BOLD}AutoGLM-GUI Electron 构建工具{Color.RESET}")
263 | print(f"平台: {self.platform}")
264 | print(f"项目根目录: {self.root_dir}\n")
265 |
266 | steps = [
267 | ("环境检查", lambda: self.check_environment()),
268 | ("Python 依赖", lambda: self.sync_python_deps()),
269 | (
270 | "前端构建",
271 | lambda: self.build_frontend()
272 | if not self.args.skip_frontend
273 | else (print_warning("跳过前端构建"), True)[1],
274 | ),
275 | (
276 | "ADB 工具",
277 | lambda: self.download_adb()
278 | if not self.args.skip_adb
279 | else (print_warning("跳过 ADB 下载"), True)[1],
280 | ),
281 | (
282 | "后端打包",
283 | lambda: self.build_backend()
284 | if not self.args.skip_backend
285 | else (print_warning("跳过后端打包"), True)[1],
286 | ),
287 | ("Electron", lambda: self.build_electron()),
288 | ]
289 |
290 | for step_name, step_func in steps:
291 | if not step_func():
292 | print_error(f"\n构建失败: {step_name}")
293 | return False
294 |
295 | return True
296 |
297 |
298 | def main():
299 | parser = argparse.ArgumentParser(description="AutoGLM-GUI Electron 一键构建脚本")
300 | parser.add_argument("--skip-frontend", action="store_true", help="跳过前端构建")
301 | parser.add_argument("--skip-adb", action="store_true", help="跳过 ADB 工具下载")
302 | parser.add_argument("--skip-backend", action="store_true", help="跳过后端打包")
303 | args = parser.parse_args()
304 |
305 | builder = ElectronBuilder(args)
306 |
307 | try:
308 | success = builder.build()
309 | sys.exit(0 if success else 1)
310 | except KeyboardInterrupt:
311 | print_error("\n\n构建已取消")
312 | sys.exit(1)
313 | except Exception as e:
314 | print_error(f"\n\n构建失败: {e}")
315 | import traceback
316 |
317 | traceback.print_exc()
318 | sys.exit(1)
319 |
320 |
321 | if __name__ == "__main__":
322 | main()
323 |
--------------------------------------------------------------------------------
/frontend/src/api.ts:
--------------------------------------------------------------------------------
1 | import axios from 'redaxios';
2 |
3 | export interface Device {
4 | id: string;
5 | model: string;
6 | status: string;
7 | connection_type: string;
8 | is_initialized: boolean;
9 | serial?: string; // 设备真实序列号
10 | }
11 |
12 | export interface DeviceListResponse {
13 | devices: Device[];
14 | }
15 |
16 | export interface ChatResponse {
17 | result: string;
18 | steps: number;
19 | success: boolean;
20 | }
21 |
22 | export interface StatusResponse {
23 | version: string;
24 | initialized: boolean;
25 | step_count: number;
26 | }
27 |
28 | export interface APIModelConfig {
29 | base_url?: string;
30 | api_key?: string;
31 | model_name?: string;
32 | max_tokens?: number;
33 | temperature?: number;
34 | top_p?: number;
35 | frequency_penalty?: number;
36 | }
37 |
38 | export interface APIAgentConfig {
39 | max_steps?: number;
40 | device_id?: string | null;
41 | verbose?: boolean;
42 | }
43 |
44 | export interface InitRequest {
45 | model_config?: APIModelConfig;
46 | agent_config?: APIAgentConfig;
47 | }
48 |
49 | export interface ScreenshotRequest {
50 | device_id?: string | null;
51 | }
52 |
53 | export interface ScreenshotResponse {
54 | success: boolean;
55 | image: string; // base64 encoded PNG
56 | width: number;
57 | height: number;
58 | is_sensitive: boolean;
59 | error?: string;
60 | }
61 |
62 | export interface StepEvent {
63 | type: 'step';
64 | step: number;
65 | thinking: string;
66 | action: Record;
67 | success: boolean;
68 | finished: boolean;
69 | }
70 |
71 | export interface DoneEvent {
72 | type: 'done';
73 | message: string;
74 | steps: number;
75 | success: boolean;
76 | }
77 |
78 | export interface ErrorEvent {
79 | type: 'error';
80 | message: string;
81 | }
82 |
83 | export type StreamEvent = StepEvent | DoneEvent | ErrorEvent;
84 |
85 | export interface TapRequest {
86 | x: number;
87 | y: number;
88 | device_id?: string | null;
89 | delay?: number;
90 | }
91 |
92 | export interface TapResponse {
93 | success: boolean;
94 | error?: string;
95 | }
96 |
97 | export interface SwipeRequest {
98 | start_x: number;
99 | start_y: number;
100 | end_x: number;
101 | end_y: number;
102 | duration_ms?: number;
103 | device_id?: string | null;
104 | delay?: number;
105 | }
106 |
107 | export interface SwipeResponse {
108 | success: boolean;
109 | error?: string;
110 | }
111 |
112 | export interface TouchDownRequest {
113 | x: number;
114 | y: number;
115 | device_id?: string | null;
116 | delay?: number;
117 | }
118 |
119 | export interface TouchDownResponse {
120 | success: boolean;
121 | error?: string;
122 | }
123 |
124 | export interface TouchMoveRequest {
125 | x: number;
126 | y: number;
127 | device_id?: string | null;
128 | delay?: number;
129 | }
130 |
131 | export interface TouchMoveResponse {
132 | success: boolean;
133 | error?: string;
134 | }
135 |
136 | export interface TouchUpRequest {
137 | x: number;
138 | y: number;
139 | device_id?: string | null;
140 | delay?: number;
141 | }
142 |
143 | export interface TouchUpResponse {
144 | success: boolean;
145 | error?: string;
146 | }
147 |
148 | export interface WiFiConnectRequest {
149 | device_id?: string | null;
150 | port?: number;
151 | }
152 |
153 | export interface WiFiConnectResponse {
154 | success: boolean;
155 | message: string;
156 | device_id?: string;
157 | address?: string;
158 | error?: string;
159 | }
160 |
161 | export interface WiFiDisconnectResponse {
162 | success: boolean;
163 | message: string;
164 | error?: string;
165 | }
166 |
167 | export async function listDevices(): Promise {
168 | const res = await axios.get('/api/devices');
169 | return res.data;
170 | }
171 |
172 | export async function getDevices(): Promise {
173 | const response = await axios.get('/api/devices');
174 | return response.data.devices;
175 | }
176 |
177 | export async function connectWifi(
178 | payload: WiFiConnectRequest
179 | ): Promise {
180 | const res = await axios.post(
181 | '/api/devices/connect_wifi',
182 | payload
183 | );
184 | return res.data;
185 | }
186 |
187 | export async function disconnectWifi(
188 | deviceId: string
189 | ): Promise {
190 | const response = await axios.post(
191 | '/api/devices/disconnect_wifi',
192 | {
193 | device_id: deviceId,
194 | }
195 | );
196 | return response.data;
197 | }
198 |
199 | export async function initAgent(
200 | config?: InitRequest
201 | ): Promise<{ success: boolean; message: string; device_id?: string }> {
202 | const res = await axios.post('/api/init', config ?? {});
203 | return res.data;
204 | }
205 |
206 | export async function sendMessage(message: string): Promise {
207 | const res = await axios.post('/api/chat', { message });
208 | return res.data;
209 | }
210 |
211 | export function sendMessageStream(
212 | message: string,
213 | deviceId: string,
214 | onStep: (event: StepEvent) => void,
215 | onDone: (event: DoneEvent) => void,
216 | onError: (event: ErrorEvent) => void
217 | ): { close: () => void } {
218 | const controller = new AbortController();
219 |
220 | fetch('/api/chat/stream', {
221 | method: 'POST',
222 | headers: {
223 | 'Content-Type': 'application/json',
224 | },
225 | body: JSON.stringify({ message, device_id: deviceId }),
226 | signal: controller.signal,
227 | })
228 | .then(async response => {
229 | if (!response.ok) {
230 | throw new Error(`HTTP error! status: ${response.status}`);
231 | }
232 |
233 | if (!response.body) {
234 | throw new Error('Response body is null');
235 | }
236 |
237 | const reader = response.body.getReader();
238 | const decoder = new TextDecoder();
239 | let buffer = '';
240 | let eventType = 'message'; // 移到外部,跨 chunks 保持状态
241 |
242 | while (true) {
243 | const { done, value } = await reader.read();
244 | if (done) break;
245 |
246 | buffer += decoder.decode(value, { stream: true });
247 | const lines = buffer.split('\n');
248 |
249 | // 保留最后一行(可能不完整)
250 | buffer = lines.pop() || '';
251 |
252 | for (const line of lines) {
253 | if (line.startsWith('event: ')) {
254 | eventType = line.slice(7).trim();
255 | } else if (line.startsWith('data: ')) {
256 | try {
257 | const data = JSON.parse(line.slice(6));
258 |
259 | if (eventType === 'step') {
260 | console.log('[SSE] Received step event:', data);
261 | onStep(data as StepEvent);
262 | } else if (eventType === 'done') {
263 | console.log('[SSE] Received done event:', data);
264 | onDone(data as DoneEvent);
265 | } else if (eventType === 'error') {
266 | console.log('[SSE] Received error event:', data);
267 | onError(data as ErrorEvent);
268 | }
269 | } catch (e) {
270 | console.error('Failed to parse SSE data:', line, e);
271 | }
272 | }
273 | }
274 | }
275 | })
276 | .catch(error => {
277 | if (error.name !== 'AbortError') {
278 | onError({ type: 'error', message: error.message });
279 | }
280 | });
281 |
282 | return {
283 | close: () => controller.abort(),
284 | };
285 | }
286 |
287 | export async function getStatus(): Promise {
288 | const res = await axios.get('/api/status');
289 | return res.data;
290 | }
291 |
292 | export async function resetChat(deviceId: string): Promise<{
293 | success: boolean;
294 | message: string;
295 | device_id?: string;
296 | }> {
297 | const res = await axios.post('/api/reset', { device_id: deviceId });
298 | return res.data;
299 | }
300 |
301 | export async function getScreenshot(
302 | deviceId?: string | null
303 | ): Promise {
304 | const res = await axios.post(
305 | '/api/screenshot',
306 | { device_id: deviceId ?? null },
307 | {}
308 | );
309 | return res.data;
310 | }
311 |
312 | export async function sendTap(
313 | x: number,
314 | y: number,
315 | deviceId?: string | null,
316 | delay: number = 0
317 | ): Promise {
318 | const res = await axios.post('/api/control/tap', {
319 | x,
320 | y,
321 | device_id: deviceId ?? null,
322 | delay,
323 | });
324 | return res.data;
325 | }
326 |
327 | export async function sendSwipe(
328 | startX: number,
329 | startY: number,
330 | endX: number,
331 | endY: number,
332 | durationMs?: number,
333 | deviceId?: string | null,
334 | delay: number = 0
335 | ): Promise {
336 | const swipeData = {
337 | start_x: Math.round(startX),
338 | start_y: Math.round(startY),
339 | end_x: Math.round(endX),
340 | end_y: Math.round(endY),
341 | duration_ms: Math.round(durationMs || 300),
342 | device_id: deviceId ?? null,
343 | delay: Math.round(delay * 1000) / 1000,
344 | };
345 |
346 | try {
347 | const res = await axios.post(
348 | '/api/control/swipe',
349 | swipeData
350 | );
351 | return res.data;
352 | } catch (error) {
353 | console.error('[API] Swipe request failed:', error);
354 | throw error;
355 | }
356 | }
357 |
358 | export async function sendTouchDown(
359 | x: number,
360 | y: number,
361 | deviceId?: string | null,
362 | delay: number = 0
363 | ): Promise {
364 | const res = await axios.post('/api/control/touch/down', {
365 | x: Math.round(x),
366 | y: Math.round(y),
367 | device_id: deviceId ?? null,
368 | delay,
369 | });
370 | return res.data;
371 | }
372 |
373 | export async function sendTouchMove(
374 | x: number,
375 | y: number,
376 | deviceId?: string | null,
377 | delay: number = 0
378 | ): Promise {
379 | const res = await axios.post('/api/control/touch/move', {
380 | x: Math.round(x),
381 | y: Math.round(y),
382 | device_id: deviceId ?? null,
383 | delay,
384 | });
385 | return res.data;
386 | }
387 |
388 | export async function sendTouchUp(
389 | x: number,
390 | y: number,
391 | deviceId?: string | null,
392 | delay: number = 0
393 | ): Promise {
394 | const res = await axios.post('/api/control/touch/up', {
395 | x: Math.round(x),
396 | y: Math.round(y),
397 | device_id: deviceId ?? null,
398 | delay,
399 | });
400 | return res.data;
401 | }
402 |
403 | // Configuration Management
404 |
405 | export interface ConfigResponse {
406 | base_url: string;
407 | model_name: string;
408 | api_key: string;
409 | source: string;
410 | }
411 |
412 | export interface ConfigSaveRequest {
413 | base_url: string;
414 | model_name: string;
415 | api_key?: string;
416 | }
417 |
418 | export async function getConfig(): Promise {
419 | const res = await axios.get('/api/config');
420 | return res.data;
421 | }
422 |
423 | export async function saveConfig(
424 | config: ConfigSaveRequest
425 | ): Promise<{ success: boolean; message: string }> {
426 | const res = await axios.post('/api/config', config);
427 | return res.data;
428 | }
429 |
430 | export async function deleteConfig(): Promise<{
431 | success: boolean;
432 | message: string;
433 | }> {
434 | const res = await axios.delete('/api/config');
435 | return res.data;
436 | }
437 |
--------------------------------------------------------------------------------