├── electron ├── icon.icns ├── icon.ico ├── icon.png ├── preload.js ├── package.json ├── electron-builder.yml └── afterPack.js ├── scrcpy-server-v3.3.3 ├── frontend ├── postcss.config.mjs ├── tsconfig.json ├── .prettierignore ├── index.html ├── src │ ├── routes │ │ ├── about.tsx │ │ ├── index.tsx │ │ └── __root.tsx │ ├── main.tsx │ ├── styles.css │ ├── components │ │ ├── ConfirmDialog.tsx │ │ ├── Toast.tsx │ │ ├── DeviceSidebar.tsx │ │ └── DeviceCard.tsx │ ├── routeTree.gen.ts │ └── api.ts ├── vite.config.js ├── README.md ├── .gitignore ├── prettier.config.js ├── package.json └── eslint.config.js ├── AutoGLM_GUI ├── server.py ├── resources │ └── apks │ │ ├── ADBKeyboard.apk │ │ └── ADBKeyBoard.README.txt ├── exceptions.py ├── version.py ├── adb_plus │ ├── __init__.py │ ├── serial.py │ ├── device.py │ ├── ip.py │ ├── touch.py │ └── screenshot.py ├── config.py ├── state.py ├── __init__.py ├── api │ ├── __init__.py │ ├── control.py │ └── devices.py ├── platform_utils.py ├── logger.py ├── schemas.py └── __main__.py ├── phone_agent ├── model │ ├── __init__.py │ └── client.py ├── actions │ └── __init__.py ├── __init__.py ├── config │ ├── __init__.py │ ├── i18n.py │ ├── prompts_en.py │ ├── prompts.py │ ├── prompts_zh.py │ └── apps.py ├── adb │ ├── __init__.py │ ├── input.py │ ├── screenshot.py │ └── device.py └── agent.py ├── .vscode └── settings.json ├── main.py ├── .gitignore ├── .github ├── actions │ └── setup-python │ │ └── action.yml └── workflows │ ├── release.yml │ └── pr-lint.yml ├── scripts ├── pyi_rth_utf8.py ├── autoglm.spec ├── convert_icon.py ├── build.py ├── download_adb.py ├── release.py └── build_electron.py ├── pyproject.toml └── README.md /electron/icon.icns: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suyiiyii/AutoGLM-GUI/HEAD/electron/icon.icns -------------------------------------------------------------------------------- /electron/icon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suyiiyii/AutoGLM-GUI/HEAD/electron/icon.ico -------------------------------------------------------------------------------- /electron/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suyiiyii/AutoGLM-GUI/HEAD/electron/icon.png -------------------------------------------------------------------------------- /scrcpy-server-v3.3.3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suyiiyii/AutoGLM-GUI/HEAD/scrcpy-server-v3.3.3 -------------------------------------------------------------------------------- /frontend/postcss.config.mjs: -------------------------------------------------------------------------------- 1 | export default { 2 | plugins: { 3 | '@tailwindcss/postcss': {}, 4 | }, 5 | }; 6 | -------------------------------------------------------------------------------- /AutoGLM_GUI/server.py: -------------------------------------------------------------------------------- 1 | """AutoGLM-GUI Backend API Server.""" 2 | 3 | from AutoGLM_GUI.api import app 4 | 5 | __all__ = ["app"] 6 | -------------------------------------------------------------------------------- /AutoGLM_GUI/resources/apks/ADBKeyboard.apk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suyiiyii/AutoGLM-GUI/HEAD/AutoGLM_GUI/resources/apks/ADBKeyboard.apk -------------------------------------------------------------------------------- /phone_agent/model/__init__.py: -------------------------------------------------------------------------------- 1 | """Model client module for AI inference.""" 2 | 3 | from phone_agent.model.client import ModelClient, ModelConfig 4 | 5 | __all__ = ["ModelClient", "ModelConfig"] 6 | -------------------------------------------------------------------------------- /phone_agent/actions/__init__.py: -------------------------------------------------------------------------------- 1 | """Action handling module for Phone Agent.""" 2 | 3 | from phone_agent.actions.handler import ActionHandler, ActionResult 4 | 5 | __all__ = ["ActionHandler", "ActionResult"] 6 | -------------------------------------------------------------------------------- /AutoGLM_GUI/exceptions.py: -------------------------------------------------------------------------------- 1 | """Custom exceptions for AutoGLM-GUI.""" 2 | 3 | 4 | class DeviceNotAvailableError(Exception): 5 | """Raised when device is not available (disconnected/offline).""" 6 | 7 | pass 8 | -------------------------------------------------------------------------------- /AutoGLM_GUI/version.py: -------------------------------------------------------------------------------- 1 | """Package version helper.""" 2 | 3 | from importlib.metadata import version as get_version 4 | 5 | try: 6 | APP_VERSION = get_version("autoglm-gui") 7 | except Exception: 8 | APP_VERSION = "dev" 9 | -------------------------------------------------------------------------------- /frontend/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "strict": true, 4 | "esModuleInterop": true, 5 | "jsx": "react-jsx", 6 | "lib": ["DOM", "DOM.Iterable", "ES2022"], 7 | "skipLibCheck": true 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "files.watcherExclude": { 3 | "**/routeTree.gen.ts": true 4 | }, 5 | "search.exclude": { 6 | "**/routeTree.gen.ts": true 7 | }, 8 | "files.readonlyInclude": { 9 | "**/routeTree.gen.ts": true 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /frontend/.prettierignore: -------------------------------------------------------------------------------- 1 | # Dependencies 2 | node_modules 3 | pnpm-lock.yaml 4 | 5 | # Build outputs 6 | dist 7 | build 8 | .vite 9 | 10 | # Auto-generated files 11 | src/routeTree.gen.ts 12 | 13 | # Coverage 14 | coverage 15 | 16 | # Cache 17 | .eslintcache 18 | .tsbuildinfo 19 | -------------------------------------------------------------------------------- /AutoGLM_GUI/resources/apks/ADBKeyBoard.README.txt: -------------------------------------------------------------------------------- 1 | Note on Third-Party Components: This project includes ADBKeyBoard to support automated text input. ADBKeyBoard is licensed under GPL-2.0 and is developed by senzhk. The source code for ADBKeyBoard can be found at: https://github.com/senzhk/ADBKeyBoard -------------------------------------------------------------------------------- /phone_agent/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Phone Agent - An AI-powered phone automation framework. 3 | 4 | This package provides tools for automating Android phone interactions 5 | using AI models for visual understanding and decision making. 6 | """ 7 | 8 | from phone_agent.agent import PhoneAgent 9 | 10 | __version__ = "0.1.0" 11 | __all__ = ["PhoneAgent"] 12 | -------------------------------------------------------------------------------- /frontend/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | AutoGLM GUI 7 | 8 | 9 |
10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /frontend/src/routes/about.tsx: -------------------------------------------------------------------------------- 1 | import { createFileRoute } from '@tanstack/react-router'; 2 | import * as React from 'react'; 3 | 4 | export const Route = createFileRoute('/about')({ 5 | component: AboutComponent, 6 | }); 7 | 8 | function AboutComponent() { 9 | return ( 10 |
11 |

About

12 |
13 | ); 14 | } 15 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | """AutoGLM-GUI Backend API Server. 2 | 3 | This module is kept for backward compatibility and development. 4 | For production use, run: autoglm-gui (or uvx autoglm-gui) 5 | """ 6 | 7 | # Re-export app from the package 8 | from AutoGLM_GUI.server import app 9 | 10 | if __name__ == "__main__": 11 | import uvicorn 12 | 13 | uvicorn.run(app, host="0.0.0.0", port=8000) 14 | -------------------------------------------------------------------------------- /frontend/vite.config.js: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'vite'; 2 | import react from '@vitejs/plugin-react'; 3 | import { tanstackRouter } from '@tanstack/router-plugin/vite'; 4 | 5 | // https://vitejs.dev/config/ 6 | export default defineConfig({ 7 | plugins: [ 8 | tanstackRouter({ target: 'react', autoCodeSplitting: true }), 9 | react(), 10 | ], 11 | server: { 12 | proxy: { 13 | '/api': { 14 | target: 'http://localhost:8000', 15 | changeOrigin: true, 16 | }, 17 | }, 18 | }, 19 | }); 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python-generated files 2 | __pycache__/ 3 | *.py[oc] 4 | build/ 5 | dist/ 6 | wheels/ 7 | *.egg-info 8 | 9 | # Virtual environments 10 | .venv 11 | .python-version 12 | 13 | # Built static files (generated by scripts/build.py) 14 | AutoGLM_GUI/static/ 15 | 16 | # Frontend 17 | frontend/node_modules/ 18 | frontend/dist/ 19 | 20 | # Electron 21 | electron/node_modules/ 22 | electron/dist/ 23 | 24 | # Build resources 25 | resources/ 26 | 27 | # Logs 28 | *.log 29 | logs/ 30 | 31 | # MCP 32 | .mcp.json 33 | 34 | # macOS 35 | .DS_Store 36 | -------------------------------------------------------------------------------- /frontend/src/routes/index.tsx: -------------------------------------------------------------------------------- 1 | import { createFileRoute } from '@tanstack/react-router'; 2 | import { useEffect } from 'react'; 3 | import { useNavigate } from '@tanstack/react-router'; 4 | 5 | export const Route = createFileRoute('/')({ 6 | component: HomeComponent, 7 | }); 8 | 9 | function HomeComponent() { 10 | const navigate = useNavigate(); 11 | 12 | useEffect(() => { 13 | navigate({ to: '/chat' }); 14 | }, [navigate]); 15 | 16 | return ( 17 |
18 |

Welcome Home!

19 |
20 | ); 21 | } 22 | -------------------------------------------------------------------------------- /electron/preload.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Electron 预加载脚本 3 | * 4 | * 此脚本在渲染进程中运行,但在加载网页内容之前执行。 5 | * 可以安全地暴露一些 Node.js API 给渲染进程使用。 6 | * 7 | * 目前我们的应用不需要特殊的 IPC 通信,因为前端直接通过 8 | * HTTP/WebSocket 与后端通信。 9 | */ 10 | 11 | const { contextBridge } = require('electron'); 12 | 13 | // 暴露版本信息(可选) 14 | contextBridge.exposeInMainWorld('electronAPI', { 15 | versions: { 16 | node: process.versions.node, 17 | chrome: process.versions.chrome, 18 | electron: process.versions.electron 19 | }, 20 | platform: process.platform 21 | }); 22 | 23 | console.log('Electron preload script loaded'); 24 | -------------------------------------------------------------------------------- /.github/actions/setup-python/action.yml: -------------------------------------------------------------------------------- 1 | name: Setup Python & uv 2 | description: Setup Python and uv package manager 3 | 4 | inputs: 5 | python-version: 6 | description: Python version to use 7 | required: false 8 | default: "3.11" 9 | 10 | runs: 11 | using: composite 12 | steps: 13 | - name: Install uv 14 | uses: astral-sh/setup-uv@v4 15 | with: 16 | enable-cache: true 17 | 18 | - name: Setup Python 19 | uses: actions/setup-python@v5 20 | with: 21 | python-version: ${{ inputs.python-version }} 22 | 23 | - name: Install dependencies 24 | shell: bash 25 | run: uv sync 26 | -------------------------------------------------------------------------------- /AutoGLM_GUI/adb_plus/__init__.py: -------------------------------------------------------------------------------- 1 | """Lightweight ADB helpers with a more robust screenshot implementation.""" 2 | 3 | from .keyboard_installer import ADBKeyboardInstaller 4 | from .screenshot import Screenshot, capture_screenshot 5 | from .touch import touch_down, touch_move, touch_up 6 | from .ip import get_wifi_ip 7 | from .serial import get_device_serial 8 | from .device import check_device_available 9 | 10 | __all__ = [ 11 | "ADBKeyboardInstaller", 12 | "Screenshot", 13 | "capture_screenshot", 14 | "touch_down", 15 | "touch_move", 16 | "touch_up", 17 | "get_wifi_ip", 18 | "get_device_serial", 19 | "check_device_available", 20 | ] 21 | -------------------------------------------------------------------------------- /AutoGLM_GUI/config.py: -------------------------------------------------------------------------------- 1 | """Application configuration singleton.""" 2 | 3 | import os 4 | from dataclasses import dataclass 5 | 6 | 7 | @dataclass 8 | class AppConfig: 9 | """Global application configuration.""" 10 | 11 | base_url: str = "" 12 | model_name: str = "autoglm-phone-9b" 13 | api_key: str = "EMPTY" 14 | 15 | def refresh_from_env(self): 16 | """从环境变量刷新配置(适用于 reload 模式)""" 17 | self.base_url = os.getenv("AUTOGLM_BASE_URL", self.base_url) 18 | self.model_name = os.getenv("AUTOGLM_MODEL_NAME", self.model_name) 19 | self.api_key = os.getenv("AUTOGLM_API_KEY", self.api_key) 20 | 21 | 22 | # Global singleton instance 23 | config = AppConfig() 24 | -------------------------------------------------------------------------------- /frontend/src/main.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import ReactDOM from 'react-dom/client'; 3 | import { RouterProvider, createRouter } from '@tanstack/react-router'; 4 | import { routeTree } from './routeTree.gen'; 5 | import './styles.css'; 6 | 7 | // Set up a Router instance 8 | const router = createRouter({ 9 | routeTree, 10 | defaultPreload: 'intent', 11 | scrollRestoration: true, 12 | }); 13 | 14 | // Register things for typesafety 15 | declare module '@tanstack/react-router' { 16 | interface Register { 17 | router: typeof router; 18 | } 19 | } 20 | 21 | const rootElement = document.getElementById('app'); 22 | 23 | if (rootElement && !rootElement.innerHTML) { 24 | const root = ReactDOM.createRoot(rootElement); 25 | root.render(); 26 | } 27 | -------------------------------------------------------------------------------- /frontend/README.md: -------------------------------------------------------------------------------- 1 | # TanStack Router - File-Based Quickstart Example 2 | 3 | A quickstart example using file-based routing. 4 | 5 | - [TanStack Router Docs](https://tanstack.com/router) 6 | 7 | ## Start a new project based on this example 8 | 9 | To start a new project based on this example, run: 10 | 11 | ```sh 12 | npx gitpick TanStack/router/tree/main/examples/react/quickstart-file-based quickstart-file-based 13 | ``` 14 | 15 | ## Getting Started 16 | 17 | Install dependencies: 18 | 19 | ```sh 20 | pnpm install 21 | ``` 22 | 23 | Start the development server: 24 | 25 | ```sh 26 | pnpm dev 27 | ``` 28 | 29 | ## Build 30 | 31 | Build for production: 32 | 33 | ```sh 34 | pnpm build 35 | ``` 36 | 37 | ## About This Example 38 | 39 | This example demonstrates: 40 | 41 | - Quick setup with file-based routing 42 | - Automatic route generation 43 | - Minimal configuration 44 | - Type-safe routes 45 | -------------------------------------------------------------------------------- /AutoGLM_GUI/state.py: -------------------------------------------------------------------------------- 1 | """Shared runtime state for the AutoGLM-GUI API.""" 2 | 3 | from __future__ import annotations 4 | 5 | import asyncio 6 | from typing import TYPE_CHECKING 7 | 8 | from AutoGLM_GUI.logger import logger 9 | from phone_agent.agent import AgentConfig 10 | from phone_agent.model import ModelConfig 11 | 12 | if TYPE_CHECKING: 13 | from AutoGLM_GUI.scrcpy_stream import ScrcpyStreamer 14 | from phone_agent import PhoneAgent 15 | 16 | # Agent instances keyed by device_id 17 | agents: dict[str, "PhoneAgent"] = {} 18 | # Cached configs to rebuild agents on reset 19 | agent_configs: dict[str, tuple[ModelConfig, AgentConfig]] = {} 20 | 21 | # Scrcpy streaming per device 22 | scrcpy_streamers: dict[str, "ScrcpyStreamer"] = {} 23 | scrcpy_locks: dict[str, asyncio.Lock] = {} 24 | 25 | 26 | def non_blocking_takeover(message: str) -> None: 27 | """Log takeover requests without blocking for console input.""" 28 | logger.warning(f"Takeover requested: {message}") 29 | -------------------------------------------------------------------------------- /phone_agent/config/__init__.py: -------------------------------------------------------------------------------- 1 | """Configuration module for Phone Agent.""" 2 | 3 | from phone_agent.config.apps import APP_PACKAGES 4 | from phone_agent.config.i18n import get_message, get_messages 5 | from phone_agent.config.prompts_en import SYSTEM_PROMPT as SYSTEM_PROMPT_EN 6 | from phone_agent.config.prompts_zh import SYSTEM_PROMPT as SYSTEM_PROMPT_ZH 7 | 8 | 9 | def get_system_prompt(lang: str = "cn") -> str: 10 | """ 11 | Get system prompt by language. 12 | 13 | Args: 14 | lang: Language code, 'cn' for Chinese, 'en' for English. 15 | 16 | Returns: 17 | System prompt string. 18 | """ 19 | if lang == "en": 20 | return SYSTEM_PROMPT_EN 21 | return SYSTEM_PROMPT_ZH 22 | 23 | 24 | # Default to Chinese for backward compatibility 25 | SYSTEM_PROMPT = SYSTEM_PROMPT_ZH 26 | 27 | __all__ = [ 28 | "APP_PACKAGES", 29 | "SYSTEM_PROMPT", 30 | "SYSTEM_PROMPT_ZH", 31 | "SYSTEM_PROMPT_EN", 32 | "get_system_prompt", 33 | "get_messages", 34 | "get_message", 35 | ] 36 | -------------------------------------------------------------------------------- /scripts/pyi_rth_utf8.py: -------------------------------------------------------------------------------- 1 | """ 2 | PyInstaller Runtime Hook - Force UTF-8 encoding on Windows 3 | 4 | This file is executed by PyInstaller BEFORE the main script, 5 | at the earliest possible moment, ensuring UTF-8 encoding is set 6 | before any user code runs. 7 | 8 | Reference: https://pyinstaller.org/en/stable/hooks.html#understanding-pyi-rth-hooks 9 | """ 10 | 11 | import sys 12 | import os 13 | 14 | # Only apply on Windows 15 | if sys.platform == "win32": 16 | # Set environment variable for any subprocess 17 | os.environ["PYTHONIOENCODING"] = "utf-8" 18 | 19 | # Reconfigure stdout and stderr to UTF-8 20 | # This is the official Python 3.7+ way 21 | if hasattr(sys.stdout, "reconfigure"): 22 | sys.stdout.reconfigure(encoding="utf-8", errors="replace") 23 | sys.stderr.reconfigure(encoding="utf-8", errors="replace") 24 | else: 25 | # Fallback for Python < 3.7 (shouldn't happen with Python 3.11) 26 | import codecs 27 | 28 | sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach(), "replace") 29 | sys.stderr = codecs.getwriter("utf-8")(sys.stderr.detach(), "replace") 30 | -------------------------------------------------------------------------------- /electron/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "autoglm-gui", 3 | "version": "0.5.0", 4 | "description": "AutoGLM GUI - AI-powered Android automation desktop app", 5 | "main": "main.js", 6 | "homepage": "https://github.com/suyiiyii/AutoGLM-GUI", 7 | "repository": { 8 | "type": "git", 9 | "url": "https://github.com/suyiiyii/AutoGLM-GUI.git" 10 | }, 11 | "scripts": { 12 | "start": "electron .", 13 | "dev": "electron . --dev", 14 | "build": "electron-builder", 15 | "build:win": "electron-builder --win", 16 | "build:mac": "electron-builder --mac", 17 | "build:linux": "electron-builder --linux" 18 | }, 19 | "keywords": [ 20 | "autoglm", 21 | "android", 22 | "automation", 23 | "electron" 24 | ], 25 | "author": { 26 | "name": "suyiiyii", 27 | "email": "suyiiyii@gmail.com" 28 | }, 29 | "license": "MIT", 30 | "devDependencies": { 31 | "electron": "^28.0.0", 32 | "electron-builder": "^24.9.0" 33 | }, 34 | "packageManager": "pnpm@10.17.1+sha512.17c560fca4867ae9473a3899ad84a88334914f379be46d455cbf92e5cf4b39d34985d452d2583baf19967fa76cb5c17bc9e245529d0b98745721aa7200ecaf7a" 35 | } 36 | -------------------------------------------------------------------------------- /phone_agent/adb/__init__.py: -------------------------------------------------------------------------------- 1 | """ADB utilities for Android device interaction.""" 2 | 3 | from phone_agent.adb.connection import ( 4 | ADBConnection, 5 | ConnectionType, 6 | DeviceInfo, 7 | list_devices, 8 | quick_connect, 9 | ) 10 | from phone_agent.adb.device import ( 11 | back, 12 | double_tap, 13 | get_current_app, 14 | home, 15 | launch_app, 16 | long_press, 17 | swipe, 18 | tap, 19 | ) 20 | from phone_agent.adb.input import ( 21 | clear_text, 22 | detect_and_set_adb_keyboard, 23 | restore_keyboard, 24 | type_text, 25 | ) 26 | from phone_agent.adb.screenshot import get_screenshot 27 | 28 | __all__ = [ 29 | # Screenshot 30 | "get_screenshot", 31 | # Input 32 | "type_text", 33 | "clear_text", 34 | "detect_and_set_adb_keyboard", 35 | "restore_keyboard", 36 | # Device control 37 | "get_current_app", 38 | "tap", 39 | "swipe", 40 | "back", 41 | "home", 42 | "double_tap", 43 | "long_press", 44 | "launch_app", 45 | # Connection management 46 | "ADBConnection", 47 | "DeviceInfo", 48 | "ConnectionType", 49 | "quick_connect", 50 | "list_devices", 51 | ] 52 | -------------------------------------------------------------------------------- /electron/electron-builder.yml: -------------------------------------------------------------------------------- 1 | appId: com.autoglm.gui 2 | productName: AutoGLM GUI 3 | copyright: Copyright © 2025 4 | 5 | # 使用本地 Electron,避免网络下载 6 | electronDist: node_modules/electron/dist 7 | 8 | # 打包后设置可执行权限 9 | afterPack: ./afterPack.js 10 | 11 | directories: 12 | output: dist 13 | buildResources: build 14 | 15 | files: 16 | - "**/*" 17 | - "!node_modules" 18 | 19 | extraResources: 20 | - from: ../resources/backend 21 | to: backend 22 | - from: ../resources/adb 23 | to: adb 24 | - from: ../scrcpy-server-v3.3.3 25 | to: ./ 26 | 27 | win: 28 | target: 29 | - nsis # 安装包(推荐) 30 | - portable # 便携版 31 | icon: icon.ico 32 | publisherName: AutoGLM-GUI Team 33 | 34 | nsis: 35 | oneClick: false 36 | allowToChangeInstallationDirectory: true 37 | createDesktopShortcut: true 38 | createStartMenuShortcut: true 39 | perMachine: false # 用户级安装,无需管理员权限 40 | 41 | mac: 42 | target: 43 | - dmg 44 | icon: icon.icns 45 | category: public.app-category.developer-tools 46 | # 开发阶段不签名,使用 xattr -cr 移除隔离属性 47 | identity: null 48 | 49 | linux: 50 | target: 51 | - AppImage # 最通用的格式,无需安装 52 | - deb # Debian/Ubuntu 系统 53 | - tar.gz # 便携版 54 | category: Development 55 | icon: icon.png 56 | maintainer: suyiiyii 57 | -------------------------------------------------------------------------------- /AutoGLM_GUI/adb_plus/serial.py: -------------------------------------------------------------------------------- 1 | """Get device serial number using ADB.""" 2 | 3 | from AutoGLM_GUI.platform_utils import run_cmd_silently_sync 4 | 5 | 6 | def get_device_serial(device_id: str, adb_path: str = "adb") -> str | None: 7 | """ 8 | Get the real hardware serial number of a device. 9 | 10 | This works for both USB and WiFi connected devices, 11 | returning the actual hardware serial number (ro.serialno). 12 | 13 | Args: 14 | device_id: The device ID (can be USB serial or IP:port for WiFi) 15 | adb_path: Path to adb executable (default: "adb") 16 | 17 | Returns: 18 | The device hardware serial number, or None if failed 19 | """ 20 | try: 21 | # Use getprop to get the actual hardware serial number 22 | # This works for both USB and WiFi connections 23 | result = run_cmd_silently_sync( 24 | [adb_path, "-s", device_id, "shell", "getprop", "ro.serialno"], 25 | timeout=3, 26 | ) 27 | if result.returncode == 0: 28 | serial = result.stdout.strip() 29 | # Filter out error messages and empty values 30 | if serial and not serial.startswith("error:") and serial != "unknown": 31 | return serial 32 | except Exception: 33 | pass 34 | 35 | return None 36 | -------------------------------------------------------------------------------- /frontend/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | .DS_Store 3 | dist 4 | dist-ssr 5 | *.local 6 | 7 | # Editor directories and files 8 | .vscode/* 9 | !.vscode/extensions.json 10 | .idea 11 | *.suo 12 | *.ntvs* 13 | *.njsproj 14 | *.sln 15 | *.sw? 16 | 17 | # OS generated files 18 | Thumbs.db 19 | 20 | # Logs 21 | logs 22 | *.log 23 | npm-debug.log* 24 | yarn-debug.log* 25 | yarn-error.log* 26 | pnpm-debug.log* 27 | lerna-debug.log* 28 | 29 | # Coverage directory used by tools like istanbul 30 | coverage/ 31 | *.lcov 32 | 33 | # nyc test coverage 34 | .nyc_output 35 | 36 | # Dependency directories 37 | jspm_packages/ 38 | 39 | # TypeScript cache 40 | *.tsbuildinfo 41 | 42 | # Optional npm cache directory 43 | .npm 44 | 45 | # Optional eslint cache 46 | .eslintcache 47 | 48 | # Optional stylelint cache 49 | .stylelintcache 50 | 51 | # Microbundle cache 52 | .rpt2_cache/ 53 | .rts2_cache_cjs/ 54 | .rts2_cache_es/ 55 | .rts2_cache_umd/ 56 | 57 | # Optional REPL history 58 | .node_repl_history 59 | 60 | # Output of 'npm pack' 61 | *.tgz 62 | 63 | # Yarn Integrity file 64 | .yarn-integrity 65 | 66 | # parcel-bundler cache (https://parceljs.org/) 67 | .cache 68 | .parcel-cache 69 | 70 | # Stores VSCode versions used for testing VSCode extensions 71 | .vscode-test 72 | 73 | # yarn v2 74 | .yarn/cache 75 | .yarn/unplugged 76 | .yarn/build-state.yml 77 | .yarn/install-state.gz 78 | .pnp.* 79 | -------------------------------------------------------------------------------- /frontend/src/styles.css: -------------------------------------------------------------------------------- 1 | @import 'tailwindcss'; 2 | 3 | @layer base { 4 | *, 5 | ::after, 6 | ::before, 7 | ::backdrop, 8 | ::file-selector-button { 9 | border-color: var(--color-gray-200, currentcolor); 10 | } 11 | } 12 | 13 | html { 14 | color-scheme: light dark; 15 | } 16 | * { 17 | @apply border-gray-200 dark:border-gray-800; 18 | } 19 | body { 20 | @apply bg-gray-50 text-gray-950 dark:bg-gray-900 dark:text-gray-200; 21 | } 22 | 23 | /* Ripple effect animation for ScrcpyPlayer */ 24 | @keyframes ripple { 25 | 0% { 26 | width: 0; 27 | height: 0; 28 | opacity: 1; 29 | } 30 | 100% { 31 | width: 60px; 32 | height: 60px; 33 | opacity: 0; 34 | } 35 | } 36 | 37 | .ripple-circle { 38 | position: absolute; 39 | width: 60px; 40 | height: 60px; 41 | border-radius: 50%; 42 | background: radial-gradient( 43 | circle, 44 | rgba(59, 130, 246, 0.5) 0%, 45 | rgba(59, 130, 246, 0) 70% 46 | ); 47 | border: 2px solid rgba(59, 130, 246, 0.8); 48 | animation: ripple 500ms ease-out; 49 | pointer-events: none; 50 | transform: translate(-50%, -50%); 51 | } 52 | 53 | /* Scroll trail animation with moving ball */ 54 | @keyframes scrollTrail { 55 | 0% { 56 | background-position: 0% 0%; 57 | opacity: 0; 58 | } 59 | 5% { 60 | opacity: 1; 61 | } 62 | 95% { 63 | opacity: 1; 64 | } 65 | 100% { 66 | background-position: 0% 100%; 67 | opacity: 0; 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /frontend/prettier.config.js: -------------------------------------------------------------------------------- 1 | export default { 2 | // 2 spaces for indentation 3 | tabWidth: 2, 4 | useTabs: false, 5 | 6 | // Print semicolons 7 | semi: true, 8 | 9 | // Use single quotes for strings 10 | singleQuote: true, 11 | 12 | // Use trailing commas where valid in ES5 (objects, arrays, etc.) 13 | trailingComma: 'es5', 14 | 15 | // Print spaces between brackets in object literals 16 | bracketSpacing: true, 17 | 18 | // Put the > of a multi-line JSX element at the end of the last line 19 | bracketSameLine: false, 20 | 21 | // Include parentheses around a sole arrow function parameter 22 | arrowParens: 'avoid', 23 | 24 | // Format only files that have a pragma comment 25 | requirePragma: false, 26 | 27 | // Insert pragma comment at the top of formatted files 28 | insertPragma: false, 29 | 30 | // How to handle whitespace in prose 31 | proseWrap: 'preserve', 32 | 33 | // How to handle whitespace in HTML 34 | htmlWhitespaceSensitivity: 'css', 35 | 36 | // How to handle whitespace in Vue files 37 | vueIndentScriptAndStyle: false, 38 | 39 | // Line length that Prettier will try to maintain 40 | printWidth: 80, 41 | 42 | // End of line character 43 | endOfLine: 'lf', 44 | 45 | // Control whether Prettier formats quoted code embedded in the file 46 | embeddedLanguageFormatting: 'auto', 47 | 48 | // Enforce single attribute per line in HTML, Vue and JSX 49 | singleAttributePerLine: false, 50 | }; 51 | -------------------------------------------------------------------------------- /AutoGLM_GUI/adb_plus/device.py: -------------------------------------------------------------------------------- 1 | """Device availability checking utilities.""" 2 | 3 | import asyncio 4 | 5 | from AutoGLM_GUI.exceptions import DeviceNotAvailableError 6 | from AutoGLM_GUI.logger import logger 7 | from AutoGLM_GUI.platform_utils import run_cmd_silently 8 | 9 | 10 | async def check_device_available(device_id: str | None = None) -> None: 11 | """Check if the device is available. 12 | 13 | Args: 14 | device_id: ADB device serial (None for default device) 15 | 16 | Raises: 17 | DeviceNotAvailableError: If device is not reachable 18 | """ 19 | cmd = ["adb"] 20 | if device_id: 21 | cmd.extend(["-s", device_id]) 22 | cmd.append("get-state") 23 | 24 | try: 25 | result = await asyncio.wait_for(run_cmd_silently(cmd), timeout=5.0) 26 | 27 | state = result.stdout.strip() if result.stdout else "" 28 | error_output = result.stderr.strip() if result.stderr else "" 29 | 30 | # Check for common error patterns 31 | if "not found" in error_output.lower() or "offline" in error_output.lower(): 32 | raise DeviceNotAvailableError( 33 | f"Device {device_id} is not available: {error_output}" 34 | ) 35 | 36 | if state != "device": 37 | raise DeviceNotAvailableError( 38 | f"Device {device_id} is not available (state: {state or 'offline'})" 39 | ) 40 | 41 | logger.debug(f"Device {device_id} is available (state: {state})") 42 | 43 | except asyncio.TimeoutError: 44 | raise DeviceNotAvailableError(f"Device {device_id} connection timed out") 45 | except FileNotFoundError: 46 | raise DeviceNotAvailableError("ADB executable not found") 47 | except DeviceNotAvailableError: 48 | raise 49 | except Exception as e: 50 | raise DeviceNotAvailableError(f"Failed to check device {device_id}: {e}") 51 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "autoglm-gui" 3 | version = "0.4.11" 4 | description = "Web GUI for AutoGLM Phone Agent - AI-powered Android automation" 5 | readme = "README.md" 6 | requires-python = ">=3.10" 7 | license = "MIT" 8 | authors = [{ name = "suyiiyii" }] 9 | keywords = ["autoglm", "phone-agent", "android", "automation", "ai", "gui"] 10 | classifiers = [ 11 | "Development Status :: 3 - Alpha", 12 | "Environment :: Web Environment", 13 | "Framework :: FastAPI", 14 | "Intended Audience :: Developers", 15 | "License :: OSI Approved :: MIT License", 16 | "Operating System :: OS Independent", 17 | "Programming Language :: Python :: 3", 18 | "Programming Language :: Python :: 3.10", 19 | "Programming Language :: Python :: 3.11", 20 | "Programming Language :: Python :: 3.12", 21 | "Topic :: Software Development :: Libraries :: Python Modules", 22 | ] 23 | dependencies = [ 24 | "fastapi>=0.124.0", 25 | "httpx[socks]>=0.28.1", 26 | "loguru>=0.7.3", 27 | "openai>=2.9.0", 28 | "pillow>=11.3.0", 29 | "uvicorn[standard]>=0.38.0", 30 | ] 31 | 32 | [project.urls] 33 | Homepage = "https://github.com/suyiiyii/AutoGLM-GUI" 34 | Repository = "https://github.com/suyiiyii/AutoGLM-GUI" 35 | 36 | [project.scripts] 37 | autoglm-gui = "AutoGLM_GUI.__main__:main" 38 | 39 | [build-system] 40 | requires = ["hatchling"] 41 | build-backend = "hatchling.build" 42 | 43 | [tool.hatch.build.targets.wheel] 44 | packages = ["AutoGLM_GUI", "phone_agent"] 45 | force-include = {"scrcpy-server-v3.3.3" = "scrcpy-server-v3.3.3"} 46 | 47 | [tool.hatch.build.targets.sdist] 48 | include = ["AutoGLM_GUI/**/*", "phone_agent/**/*", "scrcpy-server-v3.3.3"] 49 | 50 | [tool.hatch.build] 51 | artifacts = ["AutoGLM_GUI/static/**/*"] 52 | 53 | [tool.hatch.metadata] 54 | allow-direct-references = true 55 | 56 | [dependency-groups] 57 | dev = [ 58 | "pyinstaller>=6.17.0", 59 | "ruff>=0.14.9", 60 | ] 61 | -------------------------------------------------------------------------------- /frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "tanstack-router-react-example-quickstart-file-based", 3 | "private": true, 4 | "type": "module", 5 | "scripts": { 6 | "dev": "vite --port 3000", 7 | "build": "vite build && tsc --noEmit", 8 | "preview": "vite preview", 9 | "start": "vite", 10 | "lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0", 11 | "lint:ignore-warnings": "eslint . --ext ts,tsx --report-unused-disable-directives", 12 | "lint:fix": "eslint . --ext ts,tsx --fix", 13 | "lint:inspect": "eslint . --ext ts,tsx --debug", 14 | "format": "prettier --write .", 15 | "format:check": "prettier --check .", 16 | "type-check": "tsc --noEmit" 17 | }, 18 | "dependencies": { 19 | "@tailwindcss/postcss": "^4.1.15", 20 | "@tanstack/react-router": "^1.140.0", 21 | "@tanstack/react-router-devtools": "^1.140.0", 22 | "@tanstack/router-plugin": "^1.140.0", 23 | "jmuxer": "^2.1.0", 24 | "postcss": "^8.5.1", 25 | "react": "^19.2.1", 26 | "react-dom": "^19.0.0", 27 | "redaxios": "^0.5.1", 28 | "tailwindcss": "^4.1.17", 29 | "zod": "^3.24.2" 30 | }, 31 | "devDependencies": { 32 | "@eslint/js": "^9.39.1", 33 | "@types/jmuxer": "^2.0.7", 34 | "@types/react": "^19.0.8", 35 | "@types/react-dom": "^19.0.3", 36 | "@typescript-eslint/eslint-plugin": "^8.49.0", 37 | "@typescript-eslint/parser": "^8.49.0", 38 | "@vitejs/plugin-react": "^4.3.4", 39 | "eslint": "^9.39.1", 40 | "eslint-config-prettier": "^10.1.8", 41 | "eslint-plugin-prettier": "^5.5.4", 42 | "eslint-plugin-react": "^7.37.5", 43 | "eslint-plugin-react-hooks": "^7.0.1", 44 | "eslint-plugin-react-refresh": "^0.4.24", 45 | "globals": "^16.5.0", 46 | "prettier": "^3.7.4", 47 | "typescript": "^5.7.2", 48 | "vite": "^7.1.7" 49 | }, 50 | "packageManager": "pnpm@10.17.1+sha512.17c560fca4867ae9473a3899ad84a88334914f379be46d455cbf92e5cf4b39d34985d452d2583baf19967fa76cb5c17bc9e245529d0b98745721aa7200ecaf7a" 51 | } 52 | -------------------------------------------------------------------------------- /phone_agent/config/i18n.py: -------------------------------------------------------------------------------- 1 | """Internationalization (i18n) module for Phone Agent UI messages.""" 2 | 3 | # Chinese messages 4 | MESSAGES_ZH = { 5 | "thinking": "思考过程", 6 | "action": "执行动作", 7 | "task_completed": "任务完成", 8 | "done": "完成", 9 | "starting_task": "开始执行任务", 10 | "final_result": "最终结果", 11 | "task_result": "任务结果", 12 | "confirmation_required": "需要确认", 13 | "continue_prompt": "是否继续?(y/n)", 14 | "manual_operation_required": "需要人工操作", 15 | "manual_operation_hint": "请手动完成操作...", 16 | "press_enter_when_done": "完成后按回车继续", 17 | "connection_failed": "连接失败", 18 | "connection_successful": "连接成功", 19 | "step": "步骤", 20 | "task": "任务", 21 | "result": "结果", 22 | } 23 | 24 | # English messages 25 | MESSAGES_EN = { 26 | "thinking": "Thinking", 27 | "action": "Action", 28 | "task_completed": "Task Completed", 29 | "done": "Done", 30 | "starting_task": "Starting task", 31 | "final_result": "Final Result", 32 | "task_result": "Task Result", 33 | "confirmation_required": "Confirmation Required", 34 | "continue_prompt": "Continue? (y/n)", 35 | "manual_operation_required": "Manual Operation Required", 36 | "manual_operation_hint": "Please complete the operation manually...", 37 | "press_enter_when_done": "Press Enter when done", 38 | "connection_failed": "Connection Failed", 39 | "connection_successful": "Connection Successful", 40 | "step": "Step", 41 | "task": "Task", 42 | "result": "Result", 43 | } 44 | 45 | 46 | def get_messages(lang: str = "cn") -> dict: 47 | """ 48 | Get UI messages dictionary by language. 49 | 50 | Args: 51 | lang: Language code, 'cn' for Chinese, 'en' for English. 52 | 53 | Returns: 54 | Dictionary of UI messages. 55 | """ 56 | if lang == "en": 57 | return MESSAGES_EN 58 | return MESSAGES_ZH 59 | 60 | 61 | def get_message(key: str, lang: str = "cn") -> str: 62 | """ 63 | Get a single UI message by key and language. 64 | 65 | Args: 66 | key: Message key. 67 | lang: Language code, 'cn' for Chinese, 'en' for English. 68 | 69 | Returns: 70 | Message string. 71 | """ 72 | messages = get_messages(lang) 73 | return messages.get(key, key) 74 | -------------------------------------------------------------------------------- /frontend/src/components/ConfirmDialog.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | 3 | interface ConfirmDialogProps { 4 | isOpen: boolean; 5 | title: string; 6 | content: string; 7 | onConfirm: () => void; 8 | onCancel: () => void; 9 | confirmText?: string; 10 | cancelText?: string; 11 | } 12 | 13 | export function ConfirmDialog({ 14 | isOpen, 15 | title, 16 | content, 17 | onConfirm, 18 | onCancel, 19 | confirmText = '确认', 20 | cancelText = '取消', 21 | }: ConfirmDialogProps) { 22 | if (!isOpen) return null; 23 | 24 | return ( 25 |
26 |
e.stopPropagation()} 29 | > 30 |
31 |

32 | {title} 33 |

34 |

35 | {content} 36 |

37 |
38 |
39 | 48 | 57 |
58 |
59 |
60 | ); 61 | } 62 | -------------------------------------------------------------------------------- /AutoGLM_GUI/__init__.py: -------------------------------------------------------------------------------- 1 | """AutoGLM-GUI package metadata.""" 2 | 3 | import subprocess 4 | import sys 5 | from functools import wraps 6 | from importlib import metadata 7 | 8 | # 修复 Windows 编码问题 - 必须在所有其他导入之前 9 | if sys.platform == "win32": 10 | import codecs 11 | 12 | sys.stdout = codecs.getwriter("utf-8")(sys.stdout.buffer, "strict") 13 | sys.stderr = codecs.getwriter("utf-8")(sys.stderr.buffer, "strict") 14 | 15 | 16 | # ============================================================================ 17 | # Fix Windows encoding issue: Force UTF-8 for all subprocess calls 18 | # ============================================================================ 19 | # On Windows, subprocess defaults to GBK encoding which fails when ADB/scrcpy 20 | # output UTF-8 characters. This monkey patch ensures all subprocess calls 21 | # use UTF-8 encoding by default. 22 | 23 | _original_run = subprocess.run 24 | _original_popen = subprocess.Popen 25 | 26 | 27 | @wraps(_original_run) 28 | def _patched_run(*args, **kwargs): 29 | """Patched subprocess.run that defaults to UTF-8 encoding on Windows.""" 30 | if sys.platform == "win32": 31 | # Add encoding='utf-8' if text=True is set but encoding is not specified 32 | if kwargs.get("text") or kwargs.get("universal_newlines"): 33 | if "encoding" not in kwargs: 34 | kwargs["encoding"] = "utf-8" 35 | return _original_run(*args, **kwargs) 36 | 37 | 38 | class _PatchedPopen(_original_popen): 39 | """Patched subprocess.Popen that defaults to UTF-8 encoding on Windows.""" 40 | 41 | def __init__(self, *args, **kwargs): 42 | if sys.platform == "win32": 43 | # Add encoding='utf-8' if text=True is set but encoding is not specified 44 | if kwargs.get("text") or kwargs.get("universal_newlines"): 45 | if "encoding" not in kwargs: 46 | kwargs["encoding"] = "utf-8" 47 | super().__init__(*args, **kwargs) 48 | 49 | 50 | # Apply the patches globally 51 | subprocess.run = _patched_run 52 | subprocess.Popen = _PatchedPopen 53 | 54 | # ============================================================================ 55 | 56 | # Expose package version at runtime; fall back to "unknown" during editable/dev runs 57 | try: 58 | __version__ = metadata.version("autoglm-gui") 59 | except metadata.PackageNotFoundError: 60 | __version__ = "unknown" 61 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | tags: 6 | - v* 7 | 8 | jobs: 9 | release: 10 | name: Release 11 | runs-on: ubuntu-latest 12 | environment: release 13 | permissions: 14 | id-token: write 15 | contents: write 16 | steps: 17 | - name: Checkout 18 | uses: actions/checkout@v4 19 | 20 | - name: Setup Python & uv 21 | uses: ./.github/actions/setup-python 22 | 23 | - name: Setup Node.js 24 | uses: actions/setup-node@v4 25 | with: 26 | node-version: "20" 27 | 28 | - name: Setup pnpm 29 | uses: pnpm/action-setup@v4 30 | with: 31 | version: 9 32 | 33 | - name: Get Version 34 | id: version 35 | run: | 36 | echo "VERSION=$(uvx --from=toml-cli toml get --toml-path=pyproject.toml project.version)" >> $GITHUB_OUTPUT 37 | echo "TAG_VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_OUTPUT 38 | echo "TAG_NAME=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT 39 | 40 | - name: Check Version 41 | if: steps.version.outputs.VERSION != steps.version.outputs.TAG_VERSION 42 | run: | 43 | echo "Version mismatch: ${{ steps.version.outputs.VERSION }} != ${{ steps.version.outputs.TAG_VERSION }}" 44 | exit 1 45 | 46 | - name: Build Frontend 47 | run: | 48 | cd frontend 49 | pnpm install 50 | pnpm build 51 | 52 | - name: Copy Static Files 53 | run: | 54 | mkdir -p AutoGLM_GUI/static 55 | cp -r frontend/dist/* AutoGLM_GUI/static/ 56 | 57 | - name: Build Package 58 | run: uv build 59 | 60 | - name: Publish to PyPI 61 | uses: pypa/gh-action-pypi-publish@release/v1 62 | 63 | - name: Create Release if not exists 64 | run: | 65 | gh release view ${{ steps.version.outputs.TAG_NAME }} || \ 66 | gh release create ${{ steps.version.outputs.TAG_NAME }} --title "${{ steps.version.outputs.TAG_NAME }}" --notes "Release ${{ steps.version.outputs.TAG_NAME }}" 67 | env: 68 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 69 | 70 | - name: Upload Release Asset 71 | run: gh release upload --clobber ${{ steps.version.outputs.TAG_NAME }} dist/*.tar.gz dist/*.whl 72 | env: 73 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 74 | -------------------------------------------------------------------------------- /electron/afterPack.js: -------------------------------------------------------------------------------- 1 | /** 2 | * electron-builder afterPack hook 3 | * 在打包后设置可执行文件权限 4 | */ 5 | 6 | const fs = require('fs'); 7 | const path = require('path'); 8 | 9 | exports.default = async function(context) { 10 | const { appOutDir, electronPlatformName } = context; 11 | 12 | console.log('Running afterPack hook...'); 13 | console.log('Platform:', electronPlatformName); 14 | console.log('Output directory:', appOutDir); 15 | 16 | // 确定资源路径 17 | let resourcesPath; 18 | if (electronPlatformName === 'darwin') { 19 | const appName = context.packager.appInfo.productFilename; 20 | resourcesPath = path.join(appOutDir, `${appName}.app`, 'Contents', 'Resources'); 21 | } else if (electronPlatformName === 'win32') { 22 | resourcesPath = path.join(appOutDir, 'resources'); 23 | } else if (electronPlatformName === 'linux') { 24 | resourcesPath = path.join(appOutDir, 'resources'); 25 | } else { 26 | console.log('Unsupported platform:', electronPlatformName); 27 | return; 28 | } 29 | 30 | console.log('Resources path:', resourcesPath); 31 | 32 | // 设置后端可执行文件权限 33 | const backendExe = path.join( 34 | resourcesPath, 35 | 'backend', 36 | electronPlatformName === 'win32' ? 'autoglm-gui.exe' : 'autoglm-gui' 37 | ); 38 | 39 | if (fs.existsSync(backendExe)) { 40 | fs.chmodSync(backendExe, 0o755); 41 | console.log('✓ Set executable permission for backend:', backendExe); 42 | } else { 43 | console.warn('⚠ Backend executable not found:', backendExe); 44 | } 45 | 46 | // 设置 ADB 工具权限 47 | const platformName = electronPlatformName === 'win32' ? 'windows' 48 | : electronPlatformName === 'linux' ? 'linux' 49 | : 'darwin'; 50 | const adbDir = path.join(resourcesPath, 'adb', platformName, 'platform-tools'); 51 | 52 | if (fs.existsSync(adbDir)) { 53 | const adbFiles = ['adb', 'fastboot', 'etc1tool', 'hprof-conv', 'make_f2fs', 'make_f2fs_casefold', 'mke2fs', 'sqlite3']; 54 | 55 | for (const file of adbFiles) { 56 | const filePath = path.join(adbDir, electronPlatformName === 'win32' ? `${file}.exe` : file); 57 | if (fs.existsSync(filePath)) { 58 | fs.chmodSync(filePath, 0o755); 59 | console.log('✓ Set executable permission for:', file); 60 | } 61 | } 62 | } else { 63 | console.warn('⚠ ADB directory not found:', adbDir); 64 | } 65 | 66 | console.log('afterPack hook completed'); 67 | }; 68 | -------------------------------------------------------------------------------- /AutoGLM_GUI/adb_plus/ip.py: -------------------------------------------------------------------------------- 1 | """ADB IP helpers (prefer WiFi address, skip cellular interfaces).""" 2 | 3 | from __future__ import annotations 4 | 5 | import re 6 | import subprocess 7 | from typing import Optional 8 | 9 | __all__ = ["get_wifi_ip"] 10 | 11 | 12 | def _run(adb_path: str, device_id: Optional[str], cmd: list[str]) -> str: 13 | base_cmd = [adb_path] 14 | if device_id: 15 | base_cmd.extend(["-s", device_id]) 16 | result = subprocess.run( 17 | base_cmd + ["shell", *cmd], capture_output=True, text=True, timeout=5 18 | ) 19 | return (result.stdout or "") + (result.stderr or "") 20 | 21 | 22 | def _extract_ip(text: str) -> Optional[str]: 23 | m = re.search(r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b", text) 24 | if not m: 25 | return None 26 | ip = m.group(0) 27 | if ip == "0.0.0.0": 28 | return None 29 | return ip 30 | 31 | 32 | def get_wifi_ip( 33 | adb_path: str = "adb", device_id: Optional[str] = None 34 | ) -> Optional[str]: 35 | """ 36 | Prefer WiFi IP when multiple interfaces exist. 37 | 38 | - First try `ip -4 route get 8.8.8.8`, skip typical cellular interfaces (ccmni/rmnet). 39 | - Fallback to `ip -4 addr show wlan0`. 40 | Returns None if no suitable IP is found or on error. 41 | """ 42 | # 1) route 43 | try: 44 | route_out = _run(adb_path, device_id, ["ip", "-4", "route", "get", "8.8.8.8"]) 45 | for line in route_out.splitlines(): 46 | if "src" not in line: 47 | continue 48 | parts = line.split() 49 | iface = None 50 | ip = None 51 | if "dev" in parts: 52 | try: 53 | iface = parts[parts.index("dev") + 1] 54 | except Exception: 55 | pass 56 | if "src" in parts: 57 | try: 58 | ip = parts[parts.index("src") + 1] 59 | except Exception: 60 | pass 61 | if not ip or ip == "0.0.0.0": 62 | continue 63 | if iface and (iface.startswith("ccmni") or iface.startswith("rmnet")): 64 | continue 65 | return ip 66 | except Exception: 67 | pass 68 | 69 | # 2) wlan0 addr 70 | try: 71 | addr_out = _run(adb_path, device_id, ["ip", "-4", "addr", "show", "wlan0"]) 72 | ip = _extract_ip(addr_out) 73 | if ip: 74 | return ip 75 | except Exception: 76 | pass 77 | 78 | return None 79 | -------------------------------------------------------------------------------- /AutoGLM_GUI/api/__init__.py: -------------------------------------------------------------------------------- 1 | """FastAPI application factory and route registration.""" 2 | 3 | import sys 4 | from importlib.resources import files 5 | from pathlib import Path 6 | 7 | from fastapi import FastAPI 8 | from fastapi.middleware.cors import CORSMiddleware 9 | from fastapi.responses import FileResponse 10 | from fastapi.staticfiles import StaticFiles 11 | 12 | from AutoGLM_GUI.version import APP_VERSION 13 | 14 | from . import agents, control, devices, media 15 | 16 | 17 | def _get_static_dir() -> Path | None: 18 | """Locate packaged static assets.""" 19 | # Priority 1: PyInstaller bundled path (for packaged executable) 20 | if getattr(sys, "_MEIPASS", None): 21 | bundled_static = Path(sys._MEIPASS) / "AutoGLM_GUI" / "static" 22 | if bundled_static.exists(): 23 | return bundled_static 24 | 25 | # Priority 2: importlib.resources (for installed package) 26 | try: 27 | static_dir = files("AutoGLM_GUI").joinpath("static") 28 | if hasattr(static_dir, "_path"): 29 | path = Path(str(static_dir)) 30 | if path.exists(): 31 | return path 32 | path = Path(str(static_dir)) 33 | if path.exists(): 34 | return path 35 | except (TypeError, FileNotFoundError): 36 | pass 37 | 38 | return None 39 | 40 | 41 | def create_app() -> FastAPI: 42 | """Build the FastAPI app with routers and static assets.""" 43 | app = FastAPI(title="AutoGLM-GUI API", version=APP_VERSION) 44 | 45 | app.add_middleware( 46 | CORSMiddleware, 47 | allow_origins=["http://localhost:3000"], 48 | allow_credentials=True, 49 | allow_methods=["*"], 50 | allow_headers=["*"], 51 | ) 52 | 53 | app.include_router(agents.router) 54 | app.include_router(devices.router) 55 | app.include_router(control.router) 56 | app.include_router(media.router) 57 | 58 | static_dir = _get_static_dir() 59 | if static_dir is not None and static_dir.exists(): 60 | assets_dir = static_dir / "assets" 61 | if assets_dir.exists(): 62 | app.mount("/assets", StaticFiles(directory=assets_dir), name="assets") 63 | 64 | @app.get("/{full_path:path}") 65 | async def serve_spa(full_path: str) -> FileResponse: 66 | file_path = static_dir / full_path 67 | if file_path.is_file(): 68 | return FileResponse(file_path) 69 | return FileResponse(static_dir / "index.html") 70 | 71 | return app 72 | 73 | 74 | app = create_app() 75 | -------------------------------------------------------------------------------- /AutoGLM_GUI/platform_utils.py: -------------------------------------------------------------------------------- 1 | """Platform-aware subprocess helpers to avoid duplicated Windows branches.""" 2 | 3 | import asyncio 4 | import platform 5 | import subprocess 6 | from typing import Any, Sequence 7 | 8 | 9 | def is_windows() -> bool: 10 | """Return True if running on Windows.""" 11 | return platform.system() == "Windows" 12 | 13 | 14 | def run_cmd_silently_sync( 15 | cmd: Sequence[str], timeout: float | None = None 16 | ) -> subprocess.CompletedProcess: 17 | """Run a command synchronously, suppressing output but preserving it in the result. 18 | 19 | This is the synchronous version that works on all platforms. 20 | 21 | Args: 22 | cmd: Command to run as a sequence of strings 23 | timeout: Optional timeout in seconds 24 | 25 | Returns: 26 | CompletedProcess with stdout/stderr captured 27 | """ 28 | return subprocess.run( 29 | cmd, capture_output=True, text=True, check=False, timeout=timeout 30 | ) 31 | 32 | 33 | async def run_cmd_silently(cmd: Sequence[str]) -> subprocess.CompletedProcess: 34 | """Run a command, suppressing output but preserving it in the result; safe for async contexts on all platforms.""" 35 | if is_windows(): 36 | # Avoid blocking the event loop with a blocking subprocess call on Windows. 37 | return await asyncio.to_thread( 38 | subprocess.run, cmd, capture_output=True, text=True, check=False 39 | ) 40 | 41 | # Use PIPE on macOS/Linux to capture output 42 | process = await asyncio.create_subprocess_exec( 43 | *cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE 44 | ) 45 | stdout, stderr = await process.communicate() 46 | # Decode bytes to string for API consistency across platforms 47 | stdout_str = stdout.decode("utf-8") if stdout else "" 48 | stderr_str = stderr.decode("utf-8") if stderr else "" 49 | # Return CompletedProcess with stdout/stderr for API consistency across platforms 50 | return_code = process.returncode if process.returncode is not None else -1 51 | return subprocess.CompletedProcess(cmd, return_code, stdout_str, stderr_str) 52 | 53 | 54 | async def spawn_process(cmd: Sequence[str], *, capture_output: bool = False) -> Any: 55 | """Start a long-running process with optional stdio capture.""" 56 | stdout = subprocess.PIPE if capture_output else None 57 | stderr = subprocess.PIPE if capture_output else None 58 | 59 | if is_windows(): 60 | return subprocess.Popen(cmd, stdout=stdout, stderr=stderr) 61 | 62 | return await asyncio.create_subprocess_exec(*cmd, stdout=stdout, stderr=stderr) 63 | -------------------------------------------------------------------------------- /AutoGLM_GUI/adb_plus/touch.py: -------------------------------------------------------------------------------- 1 | """Touch control utilities using ADB motion events for real-time dragging.""" 2 | 3 | import subprocess 4 | import time 5 | 6 | 7 | def _get_adb_prefix(device_id: str | None, adb_path: str = "adb") -> list[str]: 8 | """Get ADB command prefix with optional device specifier.""" 9 | if device_id: 10 | return [adb_path, "-s", device_id] 11 | return [adb_path] 12 | 13 | 14 | def touch_down( 15 | x: int, 16 | y: int, 17 | device_id: str | None = None, 18 | delay: float = 0.0, 19 | adb_path: str = "adb", 20 | ) -> None: 21 | """ 22 | Send touch DOWN event at specified coordinates. 23 | 24 | Args: 25 | x: X coordinate. 26 | y: Y coordinate. 27 | device_id: Optional ADB device ID. 28 | delay: Delay in seconds after event (default: 0.0 for real-time). 29 | adb_path: Path to adb binary. 30 | """ 31 | adb_prefix = _get_adb_prefix(device_id, adb_path) 32 | 33 | subprocess.run( 34 | adb_prefix + ["shell", "input", "motionevent", "DOWN", str(x), str(y)], 35 | capture_output=True, 36 | ) 37 | if delay > 0: 38 | time.sleep(delay) 39 | 40 | 41 | def touch_move( 42 | x: int, 43 | y: int, 44 | device_id: str | None = None, 45 | delay: float = 0.0, 46 | adb_path: str = "adb", 47 | ) -> None: 48 | """ 49 | Send touch MOVE event at specified coordinates. 50 | 51 | Args: 52 | x: X coordinate. 53 | y: Y coordinate. 54 | device_id: Optional ADB device ID. 55 | delay: Delay in seconds after event (default: 0.0 for real-time). 56 | adb_path: Path to adb binary. 57 | """ 58 | adb_prefix = _get_adb_prefix(device_id, adb_path) 59 | 60 | subprocess.run( 61 | adb_prefix + ["shell", "input", "motionevent", "MOVE", str(x), str(y)], 62 | capture_output=True, 63 | ) 64 | if delay > 0: 65 | time.sleep(delay) 66 | 67 | 68 | def touch_up( 69 | x: int, 70 | y: int, 71 | device_id: str | None = None, 72 | delay: float = 0.0, 73 | adb_path: str = "adb", 74 | ) -> None: 75 | """ 76 | Send touch UP event at specified coordinates. 77 | 78 | Args: 79 | x: X coordinate. 80 | y: Y coordinate. 81 | device_id: Optional ADB device ID. 82 | delay: Delay in seconds after event (default: 0.0 for real-time). 83 | adb_path: Path to adb binary. 84 | """ 85 | adb_prefix = _get_adb_prefix(device_id, adb_path) 86 | 87 | subprocess.run( 88 | adb_prefix + ["shell", "input", "motionevent", "UP", str(x), str(y)], 89 | capture_output=True, 90 | ) 91 | if delay > 0: 92 | time.sleep(delay) 93 | -------------------------------------------------------------------------------- /AutoGLM_GUI/logger.py: -------------------------------------------------------------------------------- 1 | """ 2 | Centralized logging configuration using loguru. 3 | """ 4 | 5 | import sys 6 | from pathlib import Path 7 | from loguru import logger 8 | 9 | # Remove default handler 10 | logger.remove() 11 | 12 | # Default configuration - will be overridden by configure_logger() 13 | _configured = False 14 | 15 | 16 | def configure_logger( 17 | console_level: str = "INFO", 18 | log_file: str | None = "logs/autoglm_{time:YYYY-MM-DD}.log", 19 | log_level: str = "DEBUG", 20 | rotation: str = "100 MB", 21 | retention: str = "7 days", 22 | compression: str = "zip", 23 | ) -> None: 24 | """ 25 | Configure the global logger with console and file handlers. 26 | 27 | Args: 28 | console_level: Console output level (DEBUG, INFO, WARNING, ERROR, CRITICAL) 29 | log_file: Log file path (None to disable file logging) 30 | log_level: File logging level 31 | rotation: Log rotation policy (e.g., "100 MB", "1 day") 32 | retention: Log retention policy (e.g., "7 days", "1 week") 33 | compression: Compression format for rotated logs (e.g., "zip", "gz") 34 | """ 35 | global _configured 36 | 37 | # Remove existing handlers if reconfiguring 38 | if _configured: 39 | logger.remove() 40 | 41 | # Console handler with colors 42 | logger.add( 43 | sys.stderr, 44 | format="{time:YYYY-MM-DD HH:mm:ss.SSS} | {level: <8} | {name}:{function}:{line} - {message}", 45 | level=console_level, 46 | colorize=True, 47 | ) 48 | 49 | # File handler 50 | if log_file: 51 | # Create logs directory if it doesn't exist 52 | log_path = Path(log_file) 53 | log_path.parent.mkdir(parents=True, exist_ok=True) 54 | 55 | logger.add( 56 | log_file, 57 | rotation=rotation, 58 | retention=retention, 59 | compression=compression, 60 | level=log_level, 61 | format="{time:YYYY-MM-DD HH:mm:ss.SSS} | {level: <8} | {name}:{function}:{line} - {message}", 62 | encoding="utf-8", 63 | ) 64 | 65 | # Separate error log file 66 | error_file = str(log_path.parent / f"errors_{log_path.name.split('_', 1)[1]}") 67 | logger.add( 68 | error_file, 69 | rotation="50 MB", 70 | retention="30 days", 71 | compression=compression, 72 | level="ERROR", 73 | format="{time:YYYY-MM-DD HH:mm:ss.SSS} | {level: <8} | {name}:{function}:{line} - {message}", 74 | backtrace=True, 75 | diagnose=True, 76 | encoding="utf-8", 77 | ) 78 | 79 | _configured = True 80 | 81 | 82 | # Default initialization (can be reconfigured later) 83 | configure_logger() 84 | 85 | __all__ = ["logger", "configure_logger"] 86 | -------------------------------------------------------------------------------- /phone_agent/config/prompts_en.py: -------------------------------------------------------------------------------- 1 | """System prompts for the AI agent.""" 2 | 3 | from datetime import datetime 4 | 5 | today = datetime.today() 6 | formatted_date = today.strftime("%Y-%m-%d, %A") 7 | 8 | SYSTEM_PROMPT = ( 9 | "The current date: " 10 | + formatted_date 11 | + """ 12 | # Setup 13 | You are a professional Android operation agent assistant that can fulfill the user's high-level instructions. Given a screenshot of the Android interface at each step, you first analyze the situation, then plan the best course of action using Python-style pseudo-code. 14 | 15 | # More details about the code 16 | Your response format must be structured as follows: 17 | 18 | Think first: Use ... to analyze the current screen, identify key elements, and determine the most efficient action. 19 | Provide the action: Use ... to return a single line of pseudo-code representing the operation. 20 | 21 | Your output should STRICTLY follow the format: 22 | 23 | [Your thought] 24 | 25 | 26 | [Your operation code] 27 | 28 | 29 | - **Tap** 30 | Perform a tap action on a specified screen area. The element is a list of 2 integers, representing the coordinates of the tap point. 31 | **Example**: 32 | 33 | do(action="Tap", element=[x,y]) 34 | 35 | - **Type** 36 | Enter text into the currently focused input field. 37 | **Example**: 38 | 39 | do(action="Type", text="Hello World") 40 | 41 | - **Swipe** 42 | Perform a swipe action with start point and end point. 43 | **Examples**: 44 | 45 | do(action="Swipe", start=[x1,y1], end=[x2,y2]) 46 | 47 | - **Long Press** 48 | Perform a long press action on a specified screen area. 49 | You can add the element to the action to specify the long press area. The element is a list of 2 integers, representing the coordinates of the long press point. 50 | **Example**: 51 | 52 | do(action="Long Press", element=[x,y]) 53 | 54 | - **Launch** 55 | Launch an app. Try to use launch action when you need to launch an app. Check the instruction to choose the right app before you use this action. 56 | **Example**: 57 | 58 | do(action="Launch", app="Settings") 59 | 60 | - **Back** 61 | Press the Back button to navigate to the previous screen. 62 | **Example**: 63 | 64 | do(action="Back") 65 | 66 | - **Finish** 67 | Terminate the program and optionally print a message. 68 | **Example**: 69 | 70 | finish(message="Task completed.") 71 | 72 | 73 | 74 | REMEMBER: 75 | - Think before you act: Always analyze the current UI and the best course of action before executing any step, and output in part. 76 | - Only ONE LINE of action in part per response: Each step must contain exactly one line of executable code. 77 | - Generate execution code strictly according to format requirements. 78 | """ 79 | ) 80 | -------------------------------------------------------------------------------- /frontend/src/routeTree.gen.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable */ 2 | 3 | // @ts-nocheck 4 | 5 | // noinspection JSUnusedGlobalSymbols 6 | 7 | // This file was automatically generated by TanStack Router. 8 | // You should NOT make any changes in this file as it will be overwritten. 9 | // Additionally, you should also exclude this file from your linter and/or formatter to prevent it from being checked or modified. 10 | 11 | import { Route as rootRouteImport } from './routes/__root' 12 | import { Route as ChatRouteImport } from './routes/chat' 13 | import { Route as AboutRouteImport } from './routes/about' 14 | import { Route as IndexRouteImport } from './routes/index' 15 | 16 | const ChatRoute = ChatRouteImport.update({ 17 | id: '/chat', 18 | path: '/chat', 19 | getParentRoute: () => rootRouteImport, 20 | } as any) 21 | const AboutRoute = AboutRouteImport.update({ 22 | id: '/about', 23 | path: '/about', 24 | getParentRoute: () => rootRouteImport, 25 | } as any) 26 | const IndexRoute = IndexRouteImport.update({ 27 | id: '/', 28 | path: '/', 29 | getParentRoute: () => rootRouteImport, 30 | } as any) 31 | 32 | export interface FileRoutesByFullPath { 33 | '/': typeof IndexRoute 34 | '/about': typeof AboutRoute 35 | '/chat': typeof ChatRoute 36 | } 37 | export interface FileRoutesByTo { 38 | '/': typeof IndexRoute 39 | '/about': typeof AboutRoute 40 | '/chat': typeof ChatRoute 41 | } 42 | export interface FileRoutesById { 43 | __root__: typeof rootRouteImport 44 | '/': typeof IndexRoute 45 | '/about': typeof AboutRoute 46 | '/chat': typeof ChatRoute 47 | } 48 | export interface FileRouteTypes { 49 | fileRoutesByFullPath: FileRoutesByFullPath 50 | fullPaths: '/' | '/about' | '/chat' 51 | fileRoutesByTo: FileRoutesByTo 52 | to: '/' | '/about' | '/chat' 53 | id: '__root__' | '/' | '/about' | '/chat' 54 | fileRoutesById: FileRoutesById 55 | } 56 | export interface RootRouteChildren { 57 | IndexRoute: typeof IndexRoute 58 | AboutRoute: typeof AboutRoute 59 | ChatRoute: typeof ChatRoute 60 | } 61 | 62 | declare module '@tanstack/react-router' { 63 | interface FileRoutesByPath { 64 | '/chat': { 65 | id: '/chat' 66 | path: '/chat' 67 | fullPath: '/chat' 68 | preLoaderRoute: typeof ChatRouteImport 69 | parentRoute: typeof rootRouteImport 70 | } 71 | '/about': { 72 | id: '/about' 73 | path: '/about' 74 | fullPath: '/about' 75 | preLoaderRoute: typeof AboutRouteImport 76 | parentRoute: typeof rootRouteImport 77 | } 78 | '/': { 79 | id: '/' 80 | path: '/' 81 | fullPath: '/' 82 | preLoaderRoute: typeof IndexRouteImport 83 | parentRoute: typeof rootRouteImport 84 | } 85 | } 86 | } 87 | 88 | const rootRouteChildren: RootRouteChildren = { 89 | IndexRoute: IndexRoute, 90 | AboutRoute: AboutRoute, 91 | ChatRoute: ChatRoute, 92 | } 93 | export const routeTree = rootRouteImport 94 | ._addFileChildren(rootRouteChildren) 95 | ._addFileTypes() 96 | -------------------------------------------------------------------------------- /frontend/src/components/Toast.tsx: -------------------------------------------------------------------------------- 1 | import React, { useEffect } from 'react'; 2 | 3 | export type ToastType = 'success' | 'error' | 'info'; 4 | 5 | interface ToastProps { 6 | message: string; 7 | type?: ToastType; 8 | onClose: () => void; 9 | duration?: number; 10 | } 11 | 12 | export function Toast({ 13 | message, 14 | type = 'info', 15 | onClose, 16 | duration = 3000, 17 | }: ToastProps) { 18 | useEffect(() => { 19 | const timer = setTimeout(() => { 20 | onClose(); 21 | }, duration); 22 | return () => clearTimeout(timer); 23 | }, [duration, onClose]); 24 | 25 | const bgColors = { 26 | success: 'bg-green-500', 27 | error: 'bg-red-500', 28 | info: 'bg-blue-500', 29 | }; 30 | 31 | const icons = { 32 | success: ( 33 | 39 | 45 | 46 | ), 47 | error: ( 48 | 54 | 60 | 61 | ), 62 | info: ( 63 | 69 | 75 | 76 | ), 77 | }; 78 | 79 | return ( 80 |
81 |
84 | {icons[type]} 85 | {message} 86 | 104 |
105 |
106 | ); 107 | } 108 | -------------------------------------------------------------------------------- /frontend/src/routes/__root.tsx: -------------------------------------------------------------------------------- 1 | import * as React from 'react'; 2 | import { Outlet, createRootRoute } from '@tanstack/react-router'; 3 | import { TanStackRouterDevtools } from '@tanstack/react-router-devtools'; 4 | import { getStatus } from '../api'; 5 | 6 | export const Route = createRootRoute({ 7 | component: RootComponent, 8 | }); 9 | 10 | function Footer() { 11 | const [version, setVersion] = React.useState('...'); 12 | 13 | React.useEffect(() => { 14 | getStatus() 15 | .then(status => setVersion(status.version)) 16 | .catch(() => setVersion('unknown')); 17 | }, []); 18 | 19 | return ( 20 | 54 | ); 55 | } 56 | 57 | function RootComponent() { 58 | return ( 59 |
60 |
61 | 62 |
63 |
64 | 65 |
66 | ); 67 | } 68 | -------------------------------------------------------------------------------- /scripts/autoglm.spec: -------------------------------------------------------------------------------- 1 | # -*- mode: python ; coding: utf-8 -*- 2 | """ 3 | PyInstaller 配置文件 - AutoGLM-GUI 后端打包 4 | 5 | 使用方法: 6 | cd scripts 7 | pyinstaller autoglm.spec 8 | 9 | 输出目录: 10 | scripts/dist/autoglm-gui/ 11 | """ 12 | 13 | from pathlib import Path 14 | 15 | # 项目根目录(SPECPATH 是 spec 文件所在目录,由 PyInstaller 提供) 16 | ROOT_DIR = Path(SPECPATH).parent 17 | 18 | block_cipher = None 19 | 20 | a = Analysis( 21 | # 入口点:Python 后端的 __main__.py 22 | [str(ROOT_DIR / 'AutoGLM_GUI' / '__main__.py')], 23 | 24 | pathex=[], 25 | 26 | # 二进制文件 27 | binaries=[ 28 | # scrcpy-server 二进制文件(必需) 29 | (str(ROOT_DIR / 'scrcpy-server-v3.3.3'), '.'), 30 | ], 31 | 32 | # 数据文件 33 | datas=[ 34 | # 前端静态文件(必需) 35 | (str(ROOT_DIR / 'AutoGLM_GUI' / 'static'), 'AutoGLM_GUI/static'), 36 | 37 | # phone_agent 配置文件(prompts, apps 等) 38 | (str(ROOT_DIR / 'phone_agent' / 'config'), 'phone_agent/config'), 39 | 40 | # ADB Keyboard APK 及许可证文件(自动安装功能) 41 | (str(ROOT_DIR / 'AutoGLM_GUI' / 'resources' / 'apks'), 'AutoGLM_GUI/resources/apks'), 42 | ], 43 | 44 | # 隐藏导入:PyInstaller 无法自动检测的模块 45 | hiddenimports=[ 46 | # uvicorn 相关 47 | 'uvicorn.logging', 48 | 'uvicorn.loops', 49 | 'uvicorn.loops.auto', 50 | 'uvicorn.loops.asyncio', 51 | 'uvicorn.protocols', 52 | 'uvicorn.protocols.http', 53 | 'uvicorn.protocols.http.auto', 54 | 'uvicorn.protocols.http.h11_impl', 55 | 'uvicorn.protocols.websockets', 56 | 'uvicorn.protocols.websockets.auto', 57 | 'uvicorn.protocols.websockets.websockets_impl', 58 | 'uvicorn.lifespan', 59 | 'uvicorn.lifespan.on', 60 | 61 | # FastAPI 相关 62 | 'fastapi.responses', 63 | 'fastapi.staticfiles', 64 | 65 | # 其他可能需要的模块 66 | 'PIL._tkinter_finder', # Pillow 67 | ], 68 | 69 | hookspath=[], 70 | hooksconfig={}, 71 | # Runtime hook: 在主程序运行前强制设置 UTF-8 编码(Windows) 72 | runtime_hooks=[str(Path(SPECPATH) / 'pyi_rth_utf8.py')], 73 | excludes=[ 74 | # 排除不需要的模块以减小体积 75 | 'tkinter', 76 | 'matplotlib', 77 | 'numpy', # 如果不需要的话 78 | ], 79 | win_no_prefer_redirects=False, 80 | win_private_assemblies=False, 81 | cipher=block_cipher, 82 | noarchive=False, 83 | ) 84 | 85 | pyz = PYZ( 86 | a.pure, 87 | a.zipped_data, 88 | cipher=block_cipher 89 | ) 90 | 91 | exe = EXE( 92 | pyz, 93 | a.scripts, 94 | [], 95 | exclude_binaries=True, 96 | name='autoglm-gui', 97 | debug=False, 98 | bootloader_ignore_signals=False, 99 | strip=False, 100 | upx=False, # 首次不启用 UPX 压缩,确保稳定性 101 | console=True, # 保留控制台窗口便于调试(生产版本可改为 False) 102 | disable_windowed_traceback=False, 103 | argv_emulation=False, 104 | target_arch=None, 105 | codesign_identity=None, 106 | entitlements_file=None, 107 | ) 108 | 109 | coll = COLLECT( 110 | exe, 111 | a.binaries, 112 | a.zipfiles, 113 | a.datas, 114 | strip=False, 115 | upx=False, 116 | upx_exclude=[], 117 | name='autoglm-gui', 118 | ) 119 | -------------------------------------------------------------------------------- /scripts/convert_icon.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """将图标图片转换为 Windows .ico 和 macOS .icns 格式""" 3 | 4 | import sys 5 | from pathlib import Path 6 | from PIL import Image 7 | 8 | ROOT_DIR = Path(__file__).parent.parent 9 | ELECTRON_DIR = ROOT_DIR / "electron" 10 | 11 | 12 | def create_ico(source_image_path: Path): 13 | """创建 Windows .ico 文件(包含多个尺寸)""" 14 | img = Image.open(source_image_path) 15 | 16 | # 确保是 RGBA 模式 17 | if img.mode != "RGBA": 18 | img = img.convert("RGBA") 19 | 20 | # 生成多个尺寸 21 | sizes = [256, 128, 64, 48, 32, 16] 22 | images = [] 23 | 24 | for size in sizes: 25 | resized = img.resize((size, size), Image.Resampling.LANCZOS) 26 | images.append(resized) 27 | 28 | ico_path = ELECTRON_DIR / "icon.ico" 29 | images[0].save(ico_path, format="ICO", sizes=[(size, size) for size in sizes]) 30 | print(f"✓ Created Windows icon: {ico_path}") 31 | 32 | 33 | def create_png_for_icns(source_image_path: Path): 34 | """创建 1024x1024 PNG(用于生成 .icns)""" 35 | img = Image.open(source_image_path) 36 | 37 | # 确保是 RGBA 模式 38 | if img.mode != "RGBA": 39 | img = img.convert("RGBA") 40 | 41 | # 调整为 1024x1024 42 | img_1024 = img.resize((1024, 1024), Image.Resampling.LANCZOS) 43 | 44 | png_path = ELECTRON_DIR / "icon.png" 45 | img_1024.save(png_path, format="PNG") 46 | print(f"✓ Created PNG icon: {png_path}") 47 | return png_path 48 | 49 | 50 | def main(): 51 | if len(sys.argv) < 2: 52 | print("Usage: uv run python scripts/convert_icon.py ") 53 | sys.exit(1) 54 | 55 | source_path = Path(sys.argv[1]) 56 | if not source_path.exists(): 57 | print(f"Error: Source image not found: {source_path}") 58 | sys.exit(1) 59 | 60 | print("\n" + "=" * 60) 61 | print(" 转换应用图标") 62 | print("=" * 60) 63 | print(f" 源文件: {source_path}") 64 | 65 | ELECTRON_DIR.mkdir(exist_ok=True) 66 | 67 | # 创建 Windows .ico 68 | create_ico(source_path) 69 | 70 | # 创建 PNG(macOS 需要额外工具转换为 .icns) 71 | create_png_for_icns(source_path) 72 | 73 | print("\n" + "=" * 60) 74 | print(" 下一步: 生成 macOS .icns") 75 | print("=" * 60) 76 | print(" 运行以下命令:") 77 | print(f" cd {ELECTRON_DIR}") 78 | print(" mkdir -p icon.iconset") 79 | print(" sips -z 16 16 icon.png --out icon.iconset/icon_16x16.png") 80 | print(" sips -z 32 32 icon.png --out icon.iconset/icon_16x16@2x.png") 81 | print(" sips -z 32 32 icon.png --out icon.iconset/icon_32x32.png") 82 | print(" sips -z 64 64 icon.png --out icon.iconset/icon_32x32@2x.png") 83 | print(" sips -z 128 128 icon.png --out icon.iconset/icon_128x128.png") 84 | print(" sips -z 256 256 icon.png --out icon.iconset/icon_128x128@2x.png") 85 | print(" sips -z 256 256 icon.png --out icon.iconset/icon_256x256.png") 86 | print(" sips -z 512 512 icon.png --out icon.iconset/icon_256x256@2x.png") 87 | print(" sips -z 512 512 icon.png --out icon.iconset/icon_512x512.png") 88 | print(" sips -z 1024 1024 icon.png --out icon.iconset/icon_512x512@2x.png") 89 | print(" iconutil -c icns icon.iconset") 90 | print(" rm -rf icon.iconset") 91 | print("=" * 60) 92 | 93 | 94 | if __name__ == "__main__": 95 | main() 96 | -------------------------------------------------------------------------------- /phone_agent/adb/input.py: -------------------------------------------------------------------------------- 1 | """Input utilities for Android device text input.""" 2 | 3 | import base64 4 | import subprocess 5 | 6 | 7 | def type_text(text: str, device_id: str | None = None) -> None: 8 | """ 9 | Type text into the currently focused input field using ADB Keyboard. 10 | 11 | Args: 12 | text: The text to type. 13 | device_id: Optional ADB device ID for multi-device setups. 14 | 15 | Note: 16 | Requires ADB Keyboard to be installed on the device. 17 | See: https://github.com/nicnocquee/AdbKeyboard 18 | """ 19 | adb_prefix = _get_adb_prefix(device_id) 20 | encoded_text = base64.b64encode(text.encode("utf-8")).decode("utf-8") 21 | 22 | subprocess.run( 23 | adb_prefix 24 | + [ 25 | "shell", 26 | "am", 27 | "broadcast", 28 | "-a", 29 | "ADB_INPUT_B64", 30 | "--es", 31 | "msg", 32 | encoded_text, 33 | ], 34 | capture_output=True, 35 | text=True, 36 | ) 37 | 38 | 39 | def clear_text(device_id: str | None = None) -> None: 40 | """ 41 | Clear text in the currently focused input field. 42 | 43 | Args: 44 | device_id: Optional ADB device ID for multi-device setups. 45 | """ 46 | adb_prefix = _get_adb_prefix(device_id) 47 | 48 | subprocess.run( 49 | adb_prefix + ["shell", "am", "broadcast", "-a", "ADB_CLEAR_TEXT"], 50 | capture_output=True, 51 | text=True, 52 | ) 53 | 54 | 55 | def detect_and_set_adb_keyboard(device_id: str | None = None) -> str: 56 | """ 57 | Detect current keyboard and switch to ADB Keyboard if needed. 58 | 59 | Args: 60 | device_id: Optional ADB device ID for multi-device setups. 61 | 62 | Returns: 63 | The original keyboard IME identifier for later restoration. 64 | """ 65 | adb_prefix = _get_adb_prefix(device_id) 66 | 67 | # Get current IME 68 | result = subprocess.run( 69 | adb_prefix + ["shell", "settings", "get", "secure", "default_input_method"], 70 | capture_output=True, 71 | text=True, 72 | ) 73 | current_ime = (result.stdout + result.stderr).strip() 74 | 75 | # Switch to ADB Keyboard if not already set 76 | if "com.android.adbkeyboard/.AdbIME" not in current_ime: 77 | subprocess.run( 78 | adb_prefix + ["shell", "ime", "set", "com.android.adbkeyboard/.AdbIME"], 79 | capture_output=True, 80 | text=True, 81 | ) 82 | 83 | # Warm up the keyboard 84 | type_text("", device_id) 85 | 86 | return current_ime 87 | 88 | 89 | def restore_keyboard(ime: str, device_id: str | None = None) -> None: 90 | """ 91 | Restore the original keyboard IME. 92 | 93 | Args: 94 | ime: The IME identifier to restore. 95 | device_id: Optional ADB device ID for multi-device setups. 96 | """ 97 | adb_prefix = _get_adb_prefix(device_id) 98 | 99 | subprocess.run( 100 | adb_prefix + ["shell", "ime", "set", ime], capture_output=True, text=True 101 | ) 102 | 103 | 104 | def _get_adb_prefix(device_id: str | None) -> list: 105 | """Get ADB command prefix with optional device specifier.""" 106 | if device_id: 107 | return ["adb", "-s", device_id] 108 | return ["adb"] 109 | -------------------------------------------------------------------------------- /phone_agent/adb/screenshot.py: -------------------------------------------------------------------------------- 1 | """Screenshot utilities for capturing Android device screen.""" 2 | 3 | import base64 4 | import os 5 | import subprocess 6 | import tempfile 7 | import uuid 8 | from dataclasses import dataclass 9 | from io import BytesIO 10 | 11 | from PIL import Image 12 | 13 | 14 | @dataclass 15 | class Screenshot: 16 | """Represents a captured screenshot.""" 17 | 18 | base64_data: str 19 | width: int 20 | height: int 21 | is_sensitive: bool = False 22 | 23 | 24 | def get_screenshot(device_id: str | None = None, timeout: int = 10) -> Screenshot: 25 | """ 26 | Capture a screenshot from the connected Android device. 27 | 28 | Args: 29 | device_id: Optional ADB device ID for multi-device setups. 30 | timeout: Timeout in seconds for screenshot operations. 31 | 32 | Returns: 33 | Screenshot object containing base64 data and dimensions. 34 | 35 | Note: 36 | If the screenshot fails (e.g., on sensitive screens like payment pages), 37 | a black fallback image is returned with is_sensitive=True. 38 | """ 39 | temp_path = os.path.join(tempfile.gettempdir(), f"screenshot_{uuid.uuid4()}.png") 40 | adb_prefix = _get_adb_prefix(device_id) 41 | 42 | try: 43 | # Execute screenshot command 44 | result = subprocess.run( 45 | adb_prefix + ["shell", "screencap", "-p", "/sdcard/tmp.png"], 46 | capture_output=True, 47 | text=True, 48 | timeout=timeout, 49 | ) 50 | 51 | # Check for screenshot failure (sensitive screen) 52 | output = result.stdout + result.stderr 53 | if "Status: -1" in output or "Failed" in output: 54 | return _create_fallback_screenshot(is_sensitive=True) 55 | 56 | # Pull screenshot to local temp path 57 | subprocess.run( 58 | adb_prefix + ["pull", "/sdcard/tmp.png", temp_path], 59 | capture_output=True, 60 | text=True, 61 | timeout=5, 62 | ) 63 | 64 | if not os.path.exists(temp_path): 65 | return _create_fallback_screenshot(is_sensitive=False) 66 | 67 | # Read and encode image 68 | img = Image.open(temp_path) 69 | width, height = img.size 70 | 71 | buffered = BytesIO() 72 | img.save(buffered, format="PNG") 73 | base64_data = base64.b64encode(buffered.getvalue()).decode("utf-8") 74 | 75 | # Cleanup 76 | os.remove(temp_path) 77 | 78 | return Screenshot( 79 | base64_data=base64_data, width=width, height=height, is_sensitive=False 80 | ) 81 | 82 | except Exception as e: 83 | print(f"Screenshot error: {e}") 84 | return _create_fallback_screenshot(is_sensitive=False) 85 | 86 | 87 | def _get_adb_prefix(device_id: str | None) -> list: 88 | """Get ADB command prefix with optional device specifier.""" 89 | if device_id: 90 | return ["adb", "-s", device_id] 91 | return ["adb"] 92 | 93 | 94 | def _create_fallback_screenshot(is_sensitive: bool) -> Screenshot: 95 | """Create a black fallback image when screenshot fails.""" 96 | default_width, default_height = 1080, 2400 97 | 98 | black_img = Image.new("RGB", (default_width, default_height), color="black") 99 | buffered = BytesIO() 100 | black_img.save(buffered, format="PNG") 101 | base64_data = base64.b64encode(buffered.getvalue()).decode("utf-8") 102 | 103 | return Screenshot( 104 | base64_data=base64_data, 105 | width=default_width, 106 | height=default_height, 107 | is_sensitive=is_sensitive, 108 | ) 109 | -------------------------------------------------------------------------------- /AutoGLM_GUI/api/control.py: -------------------------------------------------------------------------------- 1 | """Device control routes (tap/swipe/touch).""" 2 | 3 | from fastapi import APIRouter 4 | 5 | from AutoGLM_GUI.schemas import ( 6 | SwipeRequest, 7 | SwipeResponse, 8 | TapRequest, 9 | TapResponse, 10 | TouchDownRequest, 11 | TouchDownResponse, 12 | TouchMoveRequest, 13 | TouchMoveResponse, 14 | TouchUpRequest, 15 | TouchUpResponse, 16 | ) 17 | 18 | router = APIRouter() 19 | 20 | 21 | @router.post("/api/control/tap", response_model=TapResponse) 22 | def control_tap(request: TapRequest) -> TapResponse: 23 | """Execute tap at specified device coordinates.""" 24 | try: 25 | from phone_agent.adb import tap 26 | 27 | tap( 28 | x=request.x, 29 | y=request.y, 30 | device_id=request.device_id, 31 | delay=request.delay, 32 | ) 33 | 34 | return TapResponse(success=True) 35 | except Exception as e: 36 | return TapResponse(success=False, error=str(e)) 37 | 38 | 39 | @router.post("/api/control/swipe", response_model=SwipeResponse) 40 | def control_swipe(request: SwipeRequest) -> SwipeResponse: 41 | """Execute swipe from start to end coordinates.""" 42 | try: 43 | from phone_agent.adb import swipe 44 | 45 | swipe( 46 | start_x=request.start_x, 47 | start_y=request.start_y, 48 | end_x=request.end_x, 49 | end_y=request.end_y, 50 | duration_ms=request.duration_ms, 51 | device_id=request.device_id, 52 | delay=request.delay, 53 | ) 54 | 55 | return SwipeResponse(success=True) 56 | except Exception as e: 57 | return SwipeResponse(success=False, error=str(e)) 58 | 59 | 60 | @router.post("/api/control/touch/down", response_model=TouchDownResponse) 61 | def control_touch_down(request: TouchDownRequest) -> TouchDownResponse: 62 | """Send touch DOWN event at specified device coordinates.""" 63 | try: 64 | from AutoGLM_GUI.adb_plus import touch_down 65 | 66 | touch_down( 67 | x=request.x, 68 | y=request.y, 69 | device_id=request.device_id, 70 | delay=request.delay, 71 | ) 72 | 73 | return TouchDownResponse(success=True) 74 | except Exception as e: 75 | return TouchDownResponse(success=False, error=str(e)) 76 | 77 | 78 | @router.post("/api/control/touch/move", response_model=TouchMoveResponse) 79 | def control_touch_move(request: TouchMoveRequest) -> TouchMoveResponse: 80 | """Send touch MOVE event at specified device coordinates.""" 81 | try: 82 | from AutoGLM_GUI.adb_plus import touch_move 83 | 84 | touch_move( 85 | x=request.x, 86 | y=request.y, 87 | device_id=request.device_id, 88 | delay=request.delay, 89 | ) 90 | 91 | return TouchMoveResponse(success=True) 92 | except Exception as e: 93 | return TouchMoveResponse(success=False, error=str(e)) 94 | 95 | 96 | @router.post("/api/control/touch/up", response_model=TouchUpResponse) 97 | def control_touch_up(request: TouchUpRequest) -> TouchUpResponse: 98 | """Send touch UP event at specified device coordinates.""" 99 | try: 100 | from AutoGLM_GUI.adb_plus import touch_up 101 | 102 | touch_up( 103 | x=request.x, 104 | y=request.y, 105 | device_id=request.device_id, 106 | delay=request.delay, 107 | ) 108 | 109 | return TouchUpResponse(success=True) 110 | except Exception as e: 111 | return TouchUpResponse(success=False, error=str(e)) 112 | -------------------------------------------------------------------------------- /AutoGLM_GUI/adb_plus/screenshot.py: -------------------------------------------------------------------------------- 1 | """Robust screenshot helper using `adb exec-out screencap -p`. 2 | 3 | Features: 4 | - Avoids temp files and uses exec-out to reduce corruption. 5 | - Normalizes CRLF issues from some devices. 6 | - Validates PNG signature/size and retries before falling back. 7 | """ 8 | 9 | import base64 10 | import subprocess 11 | from dataclasses import dataclass 12 | from io import BytesIO 13 | 14 | from PIL import Image 15 | 16 | 17 | PNG_SIGNATURE = b"\x89PNG\r\n\x1a\n" 18 | 19 | 20 | @dataclass 21 | class Screenshot: 22 | """Represents a captured screenshot.""" 23 | 24 | base64_data: str 25 | width: int 26 | height: int 27 | is_sensitive: bool = False 28 | 29 | 30 | def capture_screenshot( 31 | device_id: str | None = None, 32 | adb_path: str = "adb", 33 | timeout: int = 10, 34 | retries: int = 1, 35 | ) -> Screenshot: 36 | """ 37 | Capture a screenshot using adb exec-out. 38 | 39 | Args: 40 | device_id: Optional device serial. 41 | adb_path: Path to adb binary. 42 | timeout: Per-attempt timeout in seconds. 43 | retries: Extra attempts after the first try. 44 | 45 | Returns: 46 | Screenshot object; falls back to a black image on failure. 47 | """ 48 | attempts = max(1, retries + 1) 49 | for _ in range(attempts): 50 | data = _try_capture(device_id=device_id, adb_path=adb_path, timeout=timeout) 51 | if not data: 52 | continue 53 | 54 | # NOTE: Do NOT do CRLF normalization for binary PNG data from exec-out 55 | # The PNG signature contains \r\n bytes that must be preserved 56 | 57 | if not _is_valid_png(data): 58 | continue 59 | 60 | try: 61 | img = Image.open(BytesIO(data)) 62 | width, height = img.size 63 | buffered = BytesIO() 64 | img.save(buffered, format="PNG") 65 | base64_data = base64.b64encode(buffered.getvalue()).decode("utf-8") 66 | return Screenshot(base64_data=base64_data, width=width, height=height) 67 | except Exception: 68 | # Try next attempt 69 | continue 70 | 71 | return _fallback_screenshot() 72 | 73 | 74 | def _try_capture(device_id: str | None, adb_path: str, timeout: int) -> bytes | None: 75 | """Run exec-out screencap and return raw bytes or None on failure.""" 76 | cmd: list[str | bytes] = [adb_path] 77 | if device_id: 78 | cmd.extend(["-s", device_id]) 79 | cmd.extend(["exec-out", "screencap", "-p"]) 80 | 81 | try: 82 | result = subprocess.run( 83 | cmd, 84 | capture_output=True, 85 | timeout=timeout, 86 | ) 87 | if result.returncode != 0: 88 | return None 89 | # stdout should hold the PNG data 90 | return result.stdout if isinstance(result.stdout, (bytes, bytearray)) else None 91 | except Exception: 92 | return None 93 | 94 | 95 | def _is_valid_png(data: bytes) -> bool: 96 | """Basic PNG validation (signature + minimal length).""" 97 | return ( 98 | len(data) > len(PNG_SIGNATURE) + 8 # header + IHDR length 99 | and data.startswith(PNG_SIGNATURE) 100 | ) 101 | 102 | 103 | def _fallback_screenshot() -> Screenshot: 104 | """Return a black fallback image.""" 105 | width, height = 1080, 2400 106 | img = Image.new("RGB", (width, height), color="black") 107 | buffered = BytesIO() 108 | img.save(buffered, format="PNG") 109 | base64_data = base64.b64encode(buffered.getvalue()).decode("utf-8") 110 | return Screenshot( 111 | base64_data=base64_data, width=width, height=height, is_sensitive=False 112 | ) 113 | -------------------------------------------------------------------------------- /phone_agent/config/prompts.py: -------------------------------------------------------------------------------- 1 | """System prompts for the AI agent.""" 2 | 3 | from datetime import datetime 4 | 5 | today = datetime.today() 6 | formatted_date = today.strftime("%Y年%m月%d日") 7 | 8 | SYSTEM_PROMPT = ( 9 | "今天的日期是: " 10 | + formatted_date 11 | + """ 12 | 你是一个智能体分析专家,可以根据操作历史和当前状态图执行一系列操作来完成任务。 13 | 你必须严格按照要求输出以下格式: 14 | {think} 15 | {action} 16 | 17 | 其中: 18 | - {think} 是对你为什么选择这个操作的简短推理说明。 19 | - {action} 是本次执行的具体操作指令,必须严格遵循下方定义的指令格式。 20 | 21 | 操作指令及其作用如下: 22 | - do(action="Launch", app="xxx") 23 | Launch是启动目标app的操作,这比通过主屏幕导航更快。此操作完成后,您将自动收到结果状态的截图。 24 | - do(action="Tap", element=[x,y]) 25 | Tap是点击操作,点击屏幕上的特定点。可用此操作点击按钮、选择项目、从主屏幕打开应用程序,或与任何可点击的用户界面元素进行交互。坐标系统从左上角 (0,0) 开始到右下角(999,999)结束。此操作完成后,您将自动收到结果状态的截图。 26 | - do(action="Tap", element=[x,y], message="重要操作") 27 | 基本功能同Tap,点击涉及财产、支付、隐私等敏感按钮时触发。 28 | - do(action="Type", text="xxx") 29 | Type是输入操作,在当前聚焦的输入框中输入文本。使用此操作前,请确保输入框已被聚焦(先点击它)。输入的文本将像使用键盘输入一样输入。重要提示:手机可能正在使用 ADB 键盘,该键盘不会像普通键盘那样占用屏幕空间。要确认键盘已激活,请查看屏幕底部是否显示 'ADB Keyboard {ON}' 类似的文本,或者检查输入框是否处于激活/高亮状态。不要仅仅依赖视觉上的键盘显示。自动清除文本:当你使用输入操作时,输入框中现有的任何文本(包括占位符文本和实际输入)都会在输入新文本前自动清除。你无需在输入前手动清除文本——直接使用输入操作输入所需文本即可。操作完成后,你将自动收到结果状态的截图。 30 | - do(action="Type_Name", text="xxx") 31 | Type_Name是输入人名的操作,基本功能同Type。 32 | - do(action="Interact") 33 | Interact是当有多个满足条件的选项时而触发的交互操作,询问用户如何选择。 34 | - do(action="Swipe", start=[x1,y1], end=[x2,y2]) 35 | Swipe是滑动操作,通过从起始坐标拖动到结束坐标来执行滑动手势。可用于滚动内容、在屏幕之间导航、下拉通知栏以及项目栏或进行基于手势的导航。坐标系统从左上角 (0,0) 开始到右下角(999,999)结束。滑动持续时间会自动调整以实现自然的移动。此操作完成后,您将自动收到结果状态的截图。 36 | - do(action="Note", message="True") 37 | 记录当前页面内容以便后续总结。 38 | - do(action="Call_API", instruction="xxx") 39 | 总结或评论当前页面或已记录的内容。 40 | - do(action="Long Press", element=[x,y]) 41 | Long Pres是长按操作,在屏幕上的特定点长按指定时间。可用于触发上下文菜单、选择文本或激活长按交互。坐标系统从左上角 (0,0) 开始到右下角(999,999)结束。此操作完成后,您将自动收到结果状态的屏幕截图。 42 | - do(action="Double Tap", element=[x,y]) 43 | Double Tap在屏幕上的特定点快速连续点按两次。使用此操作可以激活双击交互,如缩放、选择文本或打开项目。坐标系统从左上角 (0,0) 开始到右下角(999,999)结束。此操作完成后,您将自动收到结果状态的截图。 44 | - do(action="Take_over", message="xxx") 45 | Take_over是接管操作,表示在登录和验证阶段需要用户协助。 46 | - do(action="Back") 47 | 导航返回到上一个屏幕或关闭当前对话框。相当于按下 Android 的返回按钮。使用此操作可以从更深的屏幕返回、关闭弹出窗口或退出当前上下文。此操作完成后,您将自动收到结果状态的截图。 48 | - do(action="Home") 49 | Home是回到系统桌面的操作,相当于按下 Android 主屏幕按钮。使用此操作可退出当前应用并返回启动器,或从已知状态启动新任务。此操作完成后,您将自动收到结果状态的截图。 50 | - do(action="Wait", duration="x seconds") 51 | 等待页面加载,x为需要等待多少秒。 52 | - finish(message="xxx") 53 | finish是结束任务的操作,表示准确完整完成任务,message是终止信息。 54 | 55 | 必须遵循的规则: 56 | 1. 在执行任何操作前,先检查当前app是否是目标app,如果不是,先执行 Launch。 57 | 2. 如果进入到了无关页面,先执行 Back。如果执行Back后页面没有变化,请点击页面左上角的返回键进行返回,或者右上角的X号关闭。 58 | 3. 如果页面未加载出内容,最多连续 Wait 三次,否则执行 Back重新进入。 59 | 4. 如果页面显示网络问题,需要重新加载,请点击重新加载。 60 | 5. 如果当前页面找不到目标联系人、商品、店铺等信息,可以尝试 Swipe 滑动查找。 61 | 6. 遇到价格区间、时间区间等筛选条件,如果没有完全符合的,可以放宽要求。 62 | 7. 在做小红书总结类任务时一定要筛选图文笔记。 63 | 8. 购物车全选后再点击全选可以把状态设为全不选,在做购物车任务时,如果购物车里已经有商品被选中时,你需要点击全选后再点击取消全选,再去找需要购买或者删除的商品。 64 | 9. 在做外卖任务时,如果相应店铺购物车里已经有其他商品你需要先把购物车清空再去购买用户指定的外卖。 65 | 10. 在做点外卖任务时,如果用户需要点多个外卖,请尽量在同一店铺进行购买,如果无法找到可以下单,并说明某个商品未找到。 66 | 11. 请严格遵循用户意图执行任务,用户的特殊要求可以执行多次搜索,滑动查找。比如(i)用户要求点一杯咖啡,要咸的,你可以直接搜索咸咖啡,或者搜索咖啡后滑动查找咸的咖啡,比如海盐咖啡。(ii)用户要找到XX群,发一条消息,你可以先搜索XX群,找不到结果后,将"群"字去掉,搜索XX重试。(iii)用户要找到宠物友好的餐厅,你可以搜索餐厅,找到筛选,找到设施,选择可带宠物,或者直接搜索可带宠物,必要时可以使用AI搜索。 67 | 12. 在选择日期时,如果原滑动方向与预期日期越来越远,请向反方向滑动查找。 68 | 13. 执行任务过程中如果有多个可选择的项目栏,请逐个查找每个项目栏,直到完成任务,一定不要在同一项目栏多次查找,从而陷入死循环。 69 | 14. 在执行下一步操作前请一定要检查上一步的操作是否生效,如果点击没生效,可能因为app反应较慢,请先稍微等待一下,如果还是不生效请调整一下点击位置重试,如果仍然不生效请跳过这一步继续任务,并在finish message说明点击不生效。 70 | 15. 在执行任务中如果遇到滑动不生效的情况,请调整一下起始点位置,增大滑动距离重试,如果还是不生效,有可能是已经滑到底了,请继续向反方向滑动,直到顶部或底部,如果仍然没有符合要求的结果,请跳过这一步继续任务,并在finish message说明但没找到要求的项目。 71 | 16. 在做游戏任务时如果在战斗页面如果有自动战斗一定要开启自动战斗,如果多轮历史状态相似要检查自动战斗是否开启。 72 | 17. 如果没有合适的搜索结果,可能是因为搜索页面不对,请返回到搜索页面的上一级尝试重新搜索,如果尝试三次返回上一级搜索后仍然没有符合要求的结果,执行 finish(message="原因")。 73 | 18. 在结束任务前请一定要仔细检查任务是否完整准确的完成,如果出现错选、漏选、多选的情况,请返回之前的步骤进行纠正。 74 | """ 75 | ) 76 | -------------------------------------------------------------------------------- /phone_agent/config/prompts_zh.py: -------------------------------------------------------------------------------- 1 | """System prompts for the AI agent.""" 2 | 3 | from datetime import datetime 4 | 5 | today = datetime.today() 6 | weekday_names = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"] 7 | weekday = weekday_names[today.weekday()] 8 | formatted_date = today.strftime("%Y年%m月%d日") + " " + weekday 9 | 10 | SYSTEM_PROMPT = ( 11 | "今天的日期是: " 12 | + formatted_date 13 | + """ 14 | 你是一个智能体分析专家,可以根据操作历史和当前状态图执行一系列操作来完成任务。 15 | 你必须严格按照要求输出以下格式: 16 | {think} 17 | {action} 18 | 19 | 其中: 20 | - {think} 是对你为什么选择这个操作的简短推理说明。 21 | - {action} 是本次执行的具体操作指令,必须严格遵循下方定义的指令格式。 22 | 23 | 操作指令及其作用如下: 24 | - do(action="Launch", app="xxx") 25 | Launch是启动目标app的操作,这比通过主屏幕导航更快。此操作完成后,您将自动收到结果状态的截图。 26 | - do(action="Tap", element=[x,y]) 27 | Tap是点击操作,点击屏幕上的特定点。可用此操作点击按钮、选择项目、从主屏幕打开应用程序,或与任何可点击的用户界面元素进行交互。坐标系统从左上角 (0,0) 开始到右下角(999,999)结束。此操作完成后,您将自动收到结果状态的截图。 28 | - do(action="Tap", element=[x,y], message="重要操作") 29 | 基本功能同Tap,点击涉及财产、支付、隐私等敏感按钮时触发。 30 | - do(action="Type", text="xxx") 31 | Type是输入操作,在当前聚焦的输入框中输入文本。使用此操作前,请确保输入框已被聚焦(先点击它)。输入的文本将像使用键盘输入一样输入。重要提示:手机可能正在使用 ADB 键盘,该键盘不会像普通键盘那样占用屏幕空间。要确认键盘已激活,请查看屏幕底部是否显示 'ADB Keyboard {ON}' 类似的文本,或者检查输入框是否处于激活/高亮状态。不要仅仅依赖视觉上的键盘显示。自动清除文本:当你使用输入操作时,输入框中现有的任何文本(包括占位符文本和实际输入)都会在输入新文本前自动清除。你无需在输入前手动清除文本——直接使用输入操作输入所需文本即可。操作完成后,你将自动收到结果状态的截图。 32 | - do(action="Type_Name", text="xxx") 33 | Type_Name是输入人名的操作,基本功能同Type。 34 | - do(action="Interact") 35 | Interact是当有多个满足条件的选项时而触发的交互操作,询问用户如何选择。 36 | - do(action="Swipe", start=[x1,y1], end=[x2,y2]) 37 | Swipe是滑动操作,通过从起始坐标拖动到结束坐标来执行滑动手势。可用于滚动内容、在屏幕之间导航、下拉通知栏以及项目栏或进行基于手势的导航。坐标系统从左上角 (0,0) 开始到右下角(999,999)结束。滑动持续时间会自动调整以实现自然的移动。此操作完成后,您将自动收到结果状态的截图。 38 | - do(action="Note", message="True") 39 | 记录当前页面内容以便后续总结。 40 | - do(action="Call_API", instruction="xxx") 41 | 总结或评论当前页面或已记录的内容。 42 | - do(action="Long Press", element=[x,y]) 43 | Long Pres是长按操作,在屏幕上的特定点长按指定时间。可用于触发上下文菜单、选择文本或激活长按交互。坐标系统从左上角 (0,0) 开始到右下角(999,999)结束。此操作完成后,您将自动收到结果状态的屏幕截图。 44 | - do(action="Double Tap", element=[x,y]) 45 | Double Tap在屏幕上的特定点快速连续点按两次。使用此操作可以激活双击交互,如缩放、选择文本或打开项目。坐标系统从左上角 (0,0) 开始到右下角(999,999)结束。此操作完成后,您将自动收到结果状态的截图。 46 | - do(action="Take_over", message="xxx") 47 | Take_over是接管操作,表示在登录和验证阶段需要用户协助。 48 | - do(action="Back") 49 | 导航返回到上一个屏幕或关闭当前对话框。相当于按下 Android 的返回按钮。使用此操作可以从更深的屏幕返回、关闭弹出窗口或退出当前上下文。此操作完成后,您将自动收到结果状态的截图。 50 | - do(action="Home") 51 | Home是回到系统桌面的操作,相当于按下 Android 主屏幕按钮。使用此操作可退出当前应用并返回启动器,或从已知状态启动新任务。此操作完成后,您将自动收到结果状态的截图。 52 | - do(action="Wait", duration="x seconds") 53 | 等待页面加载,x为需要等待多少秒。 54 | - finish(message="xxx") 55 | finish是结束任务的操作,表示准确完整完成任务,message是终止信息。 56 | 57 | 必须遵循的规则: 58 | 1. 在执行任何操作前,先检查当前app是否是目标app,如果不是,先执行 Launch。 59 | 2. 如果进入到了无关页面,先执行 Back。如果执行Back后页面没有变化,请点击页面左上角的返回键进行返回,或者右上角的X号关闭。 60 | 3. 如果页面未加载出内容,最多连续 Wait 三次,否则执行 Back重新进入。 61 | 4. 如果页面显示网络问题,需要重新加载,请点击重新加载。 62 | 5. 如果当前页面找不到目标联系人、商品、店铺等信息,可以尝试 Swipe 滑动查找。 63 | 6. 遇到价格区间、时间区间等筛选条件,如果没有完全符合的,可以放宽要求。 64 | 7. 在做小红书总结类任务时一定要筛选图文笔记。 65 | 8. 购物车全选后再点击全选可以把状态设为全不选,在做购物车任务时,如果购物车里已经有商品被选中时,你需要点击全选后再点击取消全选,再去找需要购买或者删除的商品。 66 | 9. 在做外卖任务时,如果相应店铺购物车里已经有其他商品你需要先把购物车清空再去购买用户指定的外卖。 67 | 10. 在做点外卖任务时,如果用户需要点多个外卖,请尽量在同一店铺进行购买,如果无法找到可以下单,并说明某个商品未找到。 68 | 11. 请严格遵循用户意图执行任务,用户的特殊要求可以执行多次搜索,滑动查找。比如(i)用户要求点一杯咖啡,要咸的,你可以直接搜索咸咖啡,或者搜索咖啡后滑动查找咸的咖啡,比如海盐咖啡。(ii)用户要找到XX群,发一条消息,你可以先搜索XX群,找不到结果后,将"群"字去掉,搜索XX重试。(iii)用户要找到宠物友好的餐厅,你可以搜索餐厅,找到筛选,找到设施,选择可带宠物,或者直接搜索可带宠物,必要时可以使用AI搜索。 69 | 12. 在选择日期时,如果原滑动方向与预期日期越来越远,请向反方向滑动查找。 70 | 13. 执行任务过程中如果有多个可选择的项目栏,请逐个查找每个项目栏,直到完成任务,一定不要在同一项目栏多次查找,从而陷入死循环。 71 | 14. 在执行下一步操作前请一定要检查上一步的操作是否生效,如果点击没生效,可能因为app反应较慢,请先稍微等待一下,如果还是不生效请调整一下点击位置重试,如果仍然不生效请跳过这一步继续任务,并在finish message说明点击不生效。 72 | 15. 在执行任务中如果遇到滑动不生效的情况,请调整一下起始点位置,增大滑动距离重试,如果还是不生效,有可能是已经滑到底了,请继续向反方向滑动,直到顶部或底部,如果仍然没有符合要求的结果,请跳过这一步继续任务,并在finish message说明但没找到要求的项目。 73 | 16. 在做游戏任务时如果在战斗页面如果有自动战斗一定要开启自动战斗,如果多轮历史状态相似要检查自动战斗是否开启。 74 | 17. 如果没有合适的搜索结果,可能是因为搜索页面不对,请返回到搜索页面的上一级尝试重新搜索,如果尝试三次返回上一级搜索后仍然没有符合要求的结果,执行 finish(message="原因")。 75 | 18. 在结束任务前请一定要仔细检查任务是否完整准确的完成,如果出现错选、漏选、多选的情况,请返回之前的步骤进行纠正。 76 | """ 77 | ) 78 | -------------------------------------------------------------------------------- /AutoGLM_GUI/api/devices.py: -------------------------------------------------------------------------------- 1 | """Device discovery routes.""" 2 | 3 | from fastapi import APIRouter 4 | 5 | from AutoGLM_GUI.adb_plus import get_wifi_ip, get_device_serial 6 | 7 | from AutoGLM_GUI.schemas import ( 8 | DeviceListResponse, 9 | WiFiConnectRequest, 10 | WiFiConnectResponse, 11 | WiFiDisconnectRequest, 12 | WiFiDisconnectResponse, 13 | ) 14 | from AutoGLM_GUI.state import agents 15 | 16 | router = APIRouter() 17 | 18 | 19 | @router.get("/api/devices", response_model=DeviceListResponse) 20 | def list_devices() -> DeviceListResponse: 21 | """列出所有 ADB 设备。""" 22 | from phone_agent.adb import list_devices as adb_list, ADBConnection 23 | 24 | adb_devices = adb_list() 25 | conn = ADBConnection() 26 | 27 | devices_with_serial = [] 28 | for d in adb_devices: 29 | # 使用 adb_plus 的 get_device_serial 获取真实序列号 30 | serial = get_device_serial(d.device_id, conn.adb_path) 31 | 32 | devices_with_serial.append( 33 | { 34 | "id": d.device_id, 35 | "model": d.model or "Unknown", 36 | "status": d.status, 37 | "connection_type": d.connection_type.value, 38 | "is_initialized": d.device_id in agents, 39 | "serial": serial, # 真实序列号 40 | } 41 | ) 42 | 43 | return DeviceListResponse(devices=devices_with_serial) 44 | 45 | 46 | @router.post("/api/devices/connect_wifi", response_model=WiFiConnectResponse) 47 | def connect_wifi(request: WiFiConnectRequest) -> WiFiConnectResponse: 48 | """从 USB 启用 TCP/IP 并连接到 WiFi。""" 49 | from phone_agent.adb import ADBConnection, ConnectionType 50 | 51 | conn = ADBConnection() 52 | 53 | # 优先使用传入的 device_id,否则取第一个在线设备 54 | device_info = conn.get_device_info(request.device_id) 55 | if not device_info: 56 | return WiFiConnectResponse( 57 | success=False, 58 | message="No connected device found", 59 | error="device_not_found", 60 | ) 61 | 62 | # 已经是 WiFi 连接则直接返回 63 | if device_info.connection_type == ConnectionType.REMOTE: 64 | address = device_info.device_id 65 | return WiFiConnectResponse( 66 | success=True, 67 | message="Already connected over WiFi", 68 | device_id=address, 69 | address=address, 70 | ) 71 | 72 | # 1) 启用 tcpip 73 | ok, msg = conn.enable_tcpip(port=request.port, device_id=device_info.device_id) 74 | if not ok: 75 | return WiFiConnectResponse( 76 | success=False, message=msg or "Failed to enable tcpip", error="tcpip" 77 | ) 78 | 79 | # 2) 读取设备 IP:先用本地 adb_plus 的 WiFi 优先逻辑,失败再回退上游接口 80 | ip = get_wifi_ip(conn.adb_path, device_info.device_id) or conn.get_device_ip( 81 | device_info.device_id 82 | ) 83 | if not ip: 84 | return WiFiConnectResponse( 85 | success=False, message="Failed to get device IP", error="ip" 86 | ) 87 | 88 | address = f"{ip}:{request.port}" 89 | 90 | # 3) 连接 WiFi 91 | ok, msg = conn.connect(address) 92 | if not ok: 93 | return WiFiConnectResponse( 94 | success=False, 95 | message=msg or "Failed to connect over WiFi", 96 | error="connect", 97 | ) 98 | 99 | return WiFiConnectResponse( 100 | success=True, 101 | message="Switched to WiFi successfully", 102 | device_id=address, 103 | address=address, 104 | ) 105 | 106 | 107 | @router.post("/api/devices/disconnect_wifi", response_model=WiFiDisconnectResponse) 108 | def disconnect_wifi(request: WiFiDisconnectRequest) -> WiFiDisconnectResponse: 109 | """断开 WiFi 连接。""" 110 | from phone_agent.adb import ADBConnection 111 | 112 | conn = ADBConnection() 113 | ok, msg = conn.disconnect(request.device_id) 114 | 115 | return WiFiDisconnectResponse( 116 | success=ok, 117 | message=msg, 118 | error=None if ok else "disconnect_failed", 119 | ) 120 | -------------------------------------------------------------------------------- /scripts/build.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Build script for AutoGLM-GUI. 3 | 4 | This script builds the frontend and copies the dist files to the package. 5 | Run this before publishing to PyPI. 6 | 7 | Usage: 8 | uv run python scripts/build.py # Build frontend only 9 | uv run python scripts/build.py --pack # Build frontend and create package 10 | """ 11 | 12 | import argparse 13 | import shutil 14 | import subprocess 15 | import sys 16 | from pathlib import Path 17 | 18 | ROOT_DIR = Path(__file__).parent.parent 19 | FRONTEND_DIR = ROOT_DIR / "frontend" 20 | STATIC_DIR = ROOT_DIR / "AutoGLM_GUI" / "static" 21 | 22 | 23 | def build_frontend() -> bool: 24 | """Build the frontend using pnpm.""" 25 | print("Building frontend...") 26 | 27 | # Check if pnpm is available 28 | try: 29 | subprocess.run(["pnpm", "--version"], check=True, capture_output=True) 30 | except (subprocess.CalledProcessError, FileNotFoundError): 31 | print("Error: pnpm is not installed. Please install pnpm first.") 32 | return False 33 | 34 | # Install dependencies 35 | print("Installing frontend dependencies...") 36 | result = subprocess.run(["pnpm", "install"], cwd=FRONTEND_DIR) 37 | if result.returncode != 0: 38 | print("Error: Failed to install frontend dependencies.") 39 | return False 40 | 41 | # Build 42 | print("Building frontend...") 43 | result = subprocess.run(["pnpm", "build"], cwd=FRONTEND_DIR) 44 | if result.returncode != 0: 45 | print("Error: Failed to build frontend.") 46 | return False 47 | 48 | return True 49 | 50 | 51 | def copy_static_files() -> bool: 52 | """Copy frontend dist to package static directory.""" 53 | print("Copying static files to package...") 54 | 55 | dist_dir = FRONTEND_DIR / "dist" 56 | if not dist_dir.exists(): 57 | print(f"Error: Frontend dist directory not found: {dist_dir}") 58 | return False 59 | 60 | # Remove existing static directory 61 | if STATIC_DIR.exists(): 62 | shutil.rmtree(STATIC_DIR) 63 | 64 | # Copy dist to static 65 | shutil.copytree(dist_dir, STATIC_DIR) 66 | 67 | print(f"Static files copied to: {STATIC_DIR}") 68 | return True 69 | 70 | 71 | def build_package() -> bool: 72 | """Build the Python package using uv.""" 73 | print("Building Python package...") 74 | 75 | # Remove old dist 76 | dist_dir = ROOT_DIR / "dist" 77 | if dist_dir.exists(): 78 | shutil.rmtree(dist_dir) 79 | 80 | result = subprocess.run(["uv", "build"], cwd=ROOT_DIR) 81 | if result.returncode != 0: 82 | print("Error: Failed to build package.") 83 | return False 84 | 85 | return True 86 | 87 | 88 | def main() -> int: 89 | """Main build process.""" 90 | parser = argparse.ArgumentParser(description="Build AutoGLM-GUI for distribution") 91 | parser.add_argument( 92 | "--pack", action="store_true", help="Also build Python package after frontend" 93 | ) 94 | args = parser.parse_args() 95 | 96 | print("=" * 50) 97 | print("AutoGLM-GUI Build Script") 98 | print("=" * 50) 99 | 100 | if not build_frontend(): 101 | return 1 102 | 103 | if not copy_static_files(): 104 | return 1 105 | 106 | if args.pack: 107 | if not build_package(): 108 | return 1 109 | 110 | print() 111 | print("=" * 50) 112 | print("Build completed successfully!") 113 | print() 114 | if args.pack: 115 | print("Package built in: dist/") 116 | print() 117 | print("Next steps:") 118 | print(" 1. Test: uvx --from dist/autoglm_gui-*.whl autoglm-gui") 119 | print(" 2. Publish: uv publish") 120 | else: 121 | print("Next steps:") 122 | print(" 1. Test locally: uv run autoglm-gui") 123 | print(" 2. Build package: uv run python scripts/build.py --pack") 124 | print(" 3. Publish to PyPI: uv publish") 125 | print("=" * 50) 126 | 127 | return 0 128 | 129 | 130 | if __name__ == "__main__": 131 | sys.exit(main()) 132 | -------------------------------------------------------------------------------- /scripts/download_adb.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | ADB 工具自动下载脚本 4 | 5 | 用法: 6 | uv run python scripts/download_adb.py # 下载所有平台(Windows + macOS) 7 | uv run python scripts/download_adb.py windows # 只下载 Windows 8 | uv run python scripts/download_adb.py darwin # 只下载 macOS 9 | 10 | 输出目录: 11 | resources/adb/windows/platform-tools/ 12 | resources/adb/darwin/platform-tools/ 13 | """ 14 | 15 | import sys 16 | import urllib.request 17 | import zipfile 18 | from pathlib import Path 19 | 20 | 21 | # Google 官方 Android Platform Tools 下载地址 22 | ADB_URLS = { 23 | "windows": "https://dl.google.com/android/repository/platform-tools-latest-windows.zip", 24 | "darwin": "https://dl.google.com/android/repository/platform-tools-latest-darwin.zip", 25 | "linux": "https://dl.google.com/android/repository/platform-tools-latest-linux.zip", 26 | } 27 | 28 | 29 | def download_with_progress(url: str, output_path: Path) -> None: 30 | """下载文件并显示进度""" 31 | print(f" 下载: {url}") 32 | 33 | def reporthook(block_num, block_size, total_size): 34 | if total_size > 0: 35 | downloaded = block_num * block_size 36 | percent = min(100, downloaded * 100 / total_size) 37 | mb_downloaded = downloaded / (1024 * 1024) 38 | mb_total = total_size / (1024 * 1024) 39 | print( 40 | f" 进度: {percent:.1f}% ({mb_downloaded:.1f}/{mb_total:.1f} MB)", 41 | end="\r", 42 | ) 43 | 44 | try: 45 | urllib.request.urlretrieve(url, output_path, reporthook=reporthook) 46 | print() # 换行 47 | except Exception as e: 48 | print(f"\n ❌ 下载失败: {e}") 49 | raise 50 | 51 | 52 | def download_adb(platform: str) -> None: 53 | """下载并解压 ADB 工具""" 54 | url = ADB_URLS.get(platform) 55 | if not url: 56 | print(f"❌ 不支持的平台: {platform}") 57 | print(f" 支持的平台: {', '.join(ADB_URLS.keys())}") 58 | return 59 | 60 | # 项目根目录 61 | root_dir = Path(__file__).parent.parent 62 | output_dir = root_dir / "resources" / "adb" / platform 63 | output_dir.mkdir(parents=True, exist_ok=True) 64 | 65 | zip_path = output_dir / "platform-tools.zip" 66 | 67 | print(f"\n{'=' * 60}") 68 | print(f"下载 ADB 工具 - {platform}") 69 | print(f"{'=' * 60}") 70 | 71 | # 下载 72 | download_with_progress(url, zip_path) 73 | 74 | # 解压 75 | print(" 解压中...") 76 | try: 77 | with zipfile.ZipFile(zip_path, "r") as zip_ref: 78 | zip_ref.extractall(output_dir) 79 | print(" ✓ 解压完成") 80 | except Exception as e: 81 | print(f" ❌ 解压失败: {e}") 82 | raise 83 | 84 | # 删除 zip 文件 85 | zip_path.unlink() 86 | print(" ✓ 清理临时文件") 87 | 88 | # 验证 89 | platform_tools_dir = output_dir / "platform-tools" 90 | adb_exe = platform_tools_dir / ("adb.exe" if platform == "windows" else "adb") 91 | 92 | if adb_exe.exists(): 93 | file_size = adb_exe.stat().st_size / (1024 * 1024) 94 | print(f" ✓ ADB 可执行文件: {adb_exe} ({file_size:.1f} MB)") 95 | else: 96 | print(" ⚠️ 警告: 未找到 ADB 可执行文件") 97 | 98 | print(f"\n✓ {platform.upper()} ADB 工具下载完成") 99 | print(f" 位置: {output_dir}") 100 | 101 | 102 | def main(): 103 | """主函数""" 104 | # 默认下载所有平台 105 | platforms = sys.argv[1:] if len(sys.argv) > 1 else ["windows", "darwin"] 106 | 107 | print("\n" + "=" * 60) 108 | print(" AutoGLM-GUI - ADB 工具下载器") 109 | print("=" * 60) 110 | print(f" 目标平台: {', '.join(platforms)}") 111 | 112 | success_count = 0 113 | failed_platforms = [] 114 | 115 | for platform in platforms: 116 | try: 117 | download_adb(platform) 118 | success_count += 1 119 | except Exception as e: 120 | print(f"\n❌ {platform} 下载失败: {e}") 121 | failed_platforms.append(platform) 122 | 123 | # 总结 124 | print("\n" + "=" * 60) 125 | print(" 下载总结") 126 | print("=" * 60) 127 | print(f" 成功: {success_count}/{len(platforms)}") 128 | if failed_platforms: 129 | print(f" 失败: {', '.join(failed_platforms)}") 130 | print("=" * 60) 131 | 132 | if failed_platforms: 133 | sys.exit(1) 134 | 135 | 136 | if __name__ == "__main__": 137 | main() 138 | -------------------------------------------------------------------------------- /AutoGLM_GUI/schemas.py: -------------------------------------------------------------------------------- 1 | """Shared Pydantic models for the AutoGLM-GUI API.""" 2 | 3 | from pydantic import BaseModel, Field 4 | 5 | 6 | class APIModelConfig(BaseModel): 7 | base_url: str | None = None 8 | api_key: str | None = None 9 | model_name: str | None = None 10 | max_tokens: int = 3000 11 | temperature: float = 0.0 12 | top_p: float = 0.85 13 | frequency_penalty: float = 0.2 14 | 15 | 16 | class APIAgentConfig(BaseModel): 17 | max_steps: int = 100 18 | device_id: str | None = None 19 | lang: str = "cn" 20 | system_prompt: str | None = None 21 | verbose: bool = True 22 | 23 | 24 | class InitRequest(BaseModel): 25 | model: APIModelConfig | None = Field(default=None, alias="model_config") 26 | agent: APIAgentConfig | None = Field(default=None, alias="agent_config") 27 | 28 | 29 | class ChatRequest(BaseModel): 30 | message: str 31 | device_id: str # 设备 ID(必填) 32 | 33 | 34 | class ChatResponse(BaseModel): 35 | result: str 36 | steps: int 37 | success: bool 38 | 39 | 40 | class StatusResponse(BaseModel): 41 | version: str 42 | initialized: bool 43 | step_count: int 44 | 45 | 46 | class ResetRequest(BaseModel): 47 | device_id: str # 设备 ID(必填) 48 | 49 | 50 | class ScreenshotRequest(BaseModel): 51 | device_id: str | None = None 52 | 53 | 54 | class ScreenshotResponse(BaseModel): 55 | success: bool 56 | image: str # base64 encoded PNG 57 | width: int 58 | height: int 59 | is_sensitive: bool 60 | error: str | None = None 61 | 62 | 63 | class TapRequest(BaseModel): 64 | x: int 65 | y: int 66 | device_id: str | None = None 67 | delay: float = 0.0 68 | 69 | 70 | class TapResponse(BaseModel): 71 | success: bool 72 | error: str | None = None 73 | 74 | 75 | class SwipeRequest(BaseModel): 76 | start_x: int 77 | start_y: int 78 | end_x: int 79 | end_y: int 80 | duration_ms: int | None = None 81 | device_id: str | None = None 82 | delay: float = 0.0 83 | 84 | 85 | class SwipeResponse(BaseModel): 86 | success: bool 87 | error: str | None = None 88 | 89 | 90 | class TouchDownRequest(BaseModel): 91 | x: int 92 | y: int 93 | device_id: str | None = None 94 | delay: float = 0.0 95 | 96 | 97 | class TouchDownResponse(BaseModel): 98 | success: bool 99 | error: str | None = None 100 | 101 | 102 | class TouchMoveRequest(BaseModel): 103 | x: int 104 | y: int 105 | device_id: str | None = None 106 | delay: float = 0.0 107 | 108 | 109 | class TouchMoveResponse(BaseModel): 110 | success: bool 111 | error: str | None = None 112 | 113 | 114 | class TouchUpRequest(BaseModel): 115 | x: int 116 | y: int 117 | device_id: str | None = None 118 | delay: float = 0.0 119 | 120 | 121 | class TouchUpResponse(BaseModel): 122 | success: bool 123 | error: str | None = None 124 | 125 | 126 | class DeviceListResponse(BaseModel): 127 | devices: list[dict] 128 | 129 | 130 | class ConfigResponse(BaseModel): 131 | """配置读取响应.""" 132 | 133 | base_url: str 134 | model_name: str 135 | api_key: str # 返回实际值(明文) 136 | source: str # "CLI arguments" | "environment variables" | "config file (...)" | "default" 137 | conflicts: list[dict] | None = None # 配置冲突信息(可选) 138 | # conflicts 示例: 139 | # [ 140 | # { 141 | # "field": "base_url", 142 | # "file_value": "http://localhost:8080/v1", 143 | # "override_value": "https://api.example.com", 144 | # "override_source": "CLI arguments" 145 | # } 146 | # ] 147 | 148 | 149 | class ConfigSaveRequest(BaseModel): 150 | """配置保存请求.""" 151 | 152 | base_url: str 153 | model_name: str = "autoglm-phone-9b" 154 | api_key: str | None = None 155 | 156 | 157 | class WiFiConnectRequest(BaseModel): 158 | device_id: str | None = None 159 | port: int = 5555 160 | 161 | 162 | class WiFiConnectResponse(BaseModel): 163 | success: bool 164 | message: str 165 | device_id: str | None = None 166 | address: str | None = None 167 | error: str | None = None 168 | 169 | 170 | class WiFiDisconnectRequest(BaseModel): 171 | device_id: str 172 | 173 | 174 | class WiFiDisconnectResponse(BaseModel): 175 | success: bool 176 | message: str 177 | error: str | None = None 178 | -------------------------------------------------------------------------------- /frontend/eslint.config.js: -------------------------------------------------------------------------------- 1 | import js from '@eslint/js'; 2 | import typescript from '@typescript-eslint/eslint-plugin'; 3 | import typescriptParser from '@typescript-eslint/parser'; 4 | import react from 'eslint-plugin-react'; 5 | import reactHooks from 'eslint-plugin-react-hooks'; 6 | import reactRefresh from 'eslint-plugin-react-refresh'; 7 | import prettier from 'eslint-plugin-prettier'; 8 | import prettierConfig from 'eslint-config-prettier'; 9 | import globals from 'globals'; 10 | 11 | export default [ 12 | js.configs.recommended, 13 | { 14 | files: ['**/*.{js,jsx,ts,tsx}'], 15 | languageOptions: { 16 | parser: typescriptParser, 17 | parserOptions: { 18 | ecmaVersion: 'latest', 19 | sourceType: 'module', 20 | ecmaFeatures: { 21 | jsx: true, 22 | }, 23 | }, 24 | globals: { 25 | ...globals.browser, 26 | ...globals.node, 27 | console: 'readonly', 28 | process: 'readonly', 29 | Buffer: 'readonly', 30 | __dirname: 'readonly', 31 | __filename: 'readonly', 32 | module: 'readonly', 33 | require: 'readonly', 34 | exports: 'readonly', 35 | global: 'readonly', 36 | window: 'readonly', 37 | document: 'readonly', 38 | navigator: 'readonly', 39 | localStorage: 'readonly', 40 | sessionStorage: 'readonly', 41 | setInterval: 'readonly', 42 | clearInterval: 'readonly', 43 | setTimeout: 'readonly', 44 | clearTimeout: 'readonly', 45 | HTMLDivElement: 'readonly', 46 | HTMLElement: 'readonly', 47 | HTMLInputElement: 'readonly', 48 | HTMLVideoElement: 'readonly', 49 | WebSocket: 'readonly', 50 | AbortController: 'readonly', 51 | TextDecoder: 'readonly', 52 | TextEncoder: 'readonly', 53 | fetch: 'readonly', 54 | URL: 'readonly', 55 | Blob: 'readonly', 56 | File: 'readonly', 57 | FileReader: 'readonly', 58 | requestAnimationFrame: 'readonly', 59 | cancelAnimationFrame: 'readonly', 60 | NodeJS: 'readonly', 61 | }, 62 | }, 63 | plugins: { 64 | '@typescript-eslint': typescript, 65 | react, 66 | 'react-hooks': reactHooks, 67 | 'react-refresh': reactRefresh, 68 | prettier, 69 | }, 70 | rules: { 71 | ...typescript.configs.recommended.rules, 72 | ...react.configs.recommended.rules, 73 | ...reactHooks.configs.recommended.rules, 74 | ...reactRefresh.configs.recommended.rules, 75 | ...prettierConfig.rules, 76 | 77 | // TypeScript rules 78 | '@typescript-eslint/no-unused-vars': [ 79 | 'error', 80 | { argsIgnorePattern: '^_' }, 81 | ], 82 | '@typescript-eslint/explicit-function-return-type': 'off', 83 | '@typescript-eslint/explicit-module-boundary-types': 'off', 84 | '@typescript-eslint/no-explicit-any': 'warn', 85 | '@typescript-eslint/no-non-null-assertion': 'warn', 86 | '@typescript-eslint/no-require-imports': 'off', 87 | 88 | // React rules 89 | 'react/react-in-jsx-scope': 'off', // Not needed with React 17+ 90 | 'react/prop-types': 'off', // Using TypeScript for prop validation 91 | 'react/jsx-uses-react': 'off', // Not needed with React 17+ 92 | 'react/jsx-key': 'error', 93 | 'react/jsx-no-duplicate-props': 'error', 94 | 'react-hooks/rules-of-hooks': 'error', // Enforce rules of hooks 95 | 'react-hooks/purity': 'off', // Allow Date.now() in event handlers 96 | 97 | // General rules 98 | 'no-console': 'off', // Allow console logs for debugging in this component 99 | 'no-debugger': 'error', 100 | 'prefer-const': 'error', 101 | 'no-var': 'error', 102 | 'no-unused-vars': 'off', // Let TypeScript handle this 103 | 104 | // Prettier 105 | 'prettier/prettier': 'error', 106 | }, 107 | settings: { 108 | react: { 109 | version: 'detect', 110 | }, 111 | }, 112 | }, 113 | { 114 | files: ['**/*.js'], 115 | rules: { 116 | '@typescript-eslint/no-require-imports': 'off', 117 | }, 118 | }, 119 | { 120 | ignores: [ 121 | 'dist/**', 122 | 'node_modules/**', 123 | 'build/**', 124 | 'coverage/**', 125 | '*.config.js', 126 | '*.config.ts', 127 | 'vite.config.*', 128 | 'tailwind.config.*', 129 | ], 130 | }, 131 | ]; 132 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AutoGLM-GUI 2 | 3 | AutoGLM 手机助手的现代化 Web 图形界面 - 让 AI 自动化操作 Android 设备变得简单 4 | 5 | ![Python](https://img.shields.io/badge/python-3.10+-blue.svg) 6 | ![License](https://img.shields.io/badge/license-Apache%202.0-green.svg) 7 |
8 | 9 | 欢迎加入讨论交流群 10 | 11 | ## ✨ 特性 12 | 13 | - **多设备并发控制** - 同时管理和控制多个 Android 设备,设备间状态完全隔离 14 | - **对话式任务管理** - 通过聊天界面控制 Android 设备 15 | - **实时屏幕预览** - 基于 scrcpy 的低延迟视频流,随时查看设备正在执行的操作 16 | - **直接操控手机** - 在实时画面上直接点击、滑动操作,支持精准坐标转换和视觉反馈 17 | - **零配置部署** - 支持任何 OpenAI 兼容的 LLM API 18 | - **ADB 深度集成** - 通过 Android Debug Bridge 直接控制设备 19 | - **模块化界面** - 清晰的侧边栏 + 设备面板设计,功能分离明确 20 | 21 | ## 📸 界面预览 22 | 23 | ### 任务开始 24 | ![任务开始](https://github.com/user-attachments/assets/b8cb6fbc-ca5b-452c-bcf4-7d5863d4577a) 25 | 26 | ### 任务执行完成 27 | ![任务结束](https://github.com/user-attachments/assets/b32f2e46-5340-42f5-a0db-0033729e1605) 28 | 29 | ### 多设备控制 30 | ![多设备控制](https://github.com/user-attachments/assets/f826736f-c41f-4d64-bf54-3ca65c69068d) 31 | 32 | ## 🚀 快速开始 33 | 34 | ## 🎯 模型服务配置 35 | 36 | AutoGLM-GUI 只需要一个 OpenAI 兼容的模型服务。你可以: 37 | 38 | - 使用官方已托管的第三方服务 39 | - 智谱 BigModel:`--base-url https://open.bigmodel.cn/api/paas/v4`,`--model autoglm-phone`,`--apikey <你的 API Key>` 40 | - ModelScope:`--base-url https://api-inference.modelscope.cn/v1`,`--model ZhipuAI/AutoGLM-Phone-9B`,`--apikey <你的 API Key>` 41 | - 或自建服务:参考上游项目的[部署文档](https://github.com/zai-org/Open-AutoGLM/blob/main/README.md)用 vLLM/SGLang 部署 `zai-org/AutoGLM-Phone-9B`,启动 OpenAI 兼容端口后将 `--base-url` 指向你的服务。 42 | 43 | 示例: 44 | 45 | ```bash 46 | # 使用智谱 BigModel 47 | pip install autoglm-gui 48 | autoglm-gui \ 49 | --base-url https://open.bigmodel.cn/api/paas/v4 \ 50 | --model autoglm-phone \ 51 | --apikey sk-xxxxx 52 | 53 | # 使用 ModelScope 54 | pip install autoglm-gui 55 | autoglm-gui \ 56 | --base-url https://api-inference.modelscope.cn/v1 \ 57 | --model ZhipuAI/AutoGLM-Phone-9B \ 58 | --apikey sk-xxxxx 59 | 60 | # 指向你自建的 vLLM/SGLang 服务 61 | pip install autoglm-gui 62 | autoglm-gui --base-url http://localhost:8000/v1 --model autoglm-phone-9b 63 | ``` 64 | 65 | ### 前置要求 66 | 67 | - Python 3.10+ 68 | - 已开启 USB 调试的 Android 设备 69 | - 已安装 ADB 并添加到系统 PATH 70 | - 一个 OpenAI 兼容的 API 端点 71 | 72 | ### 快捷运行(推荐) 73 | 74 | **无需手动准备环境,直接安装运行:** 75 | 76 | ```bash 77 | # 通过 pip 安装并启动 78 | pip install autoglm-gui 79 | autoglm-gui --base-url http://localhost:8080/v1 80 | ``` 81 | 82 | 也可以使用 uvx 免安装启动(需已安装 uv,[安装教程](https://docs.astral.sh/uv/getting-started/installation/)): 83 | 84 | ```bash 85 | uvx autoglm-gui --base-url http://localhost:8080/v1 86 | ``` 87 | 88 | ### 传统安装 89 | 90 | ```bash 91 | # 从源码安装 92 | git clone https://github.com/your-repo/AutoGLM-GUI.git 93 | cd AutoGLM-GUI 94 | uv sync 95 | 96 | # 构建前端(必须) 97 | uv run python scripts/build.py 98 | 99 | # 启动服务 100 | uv run autoglm-gui --base-url http://localhost:8080/v1 101 | ``` 102 | 103 | 启动后,在浏览器中打开 http://localhost:8000 即可开始使用! 104 | 105 | ## 📖 使用说明 106 | 107 | ### 多设备管理 108 | 109 | AutoGLM-GUI 支持同时控制多个 Android 设备: 110 | 111 | 1. **设备列表** - 左侧边栏自动显示所有已连接的 ADB 设备 112 | 2. **设备选择** - 点击设备卡片切换到对应的控制面板 113 | 3. **状态指示** - 清晰显示每个设备的在线状态和初始化状态 114 | 4. **状态隔离** - 每个设备有独立的对话历史、配置和视频流 115 | 116 | **设备状态说明**: 117 | - 🟢 绿点:设备在线 118 | - ⚪ 灰点:设备离线 119 | - ✓ 标记:设备已初始化 120 | 121 | ### AI 自动化模式 122 | 123 | 1. **连接设备** - 启用 USB 调试并通过 ADB 连接设备(支持 USB 和 WiFi) 124 | 2. **选择设备** - 在左侧边栏选择要控制的设备 125 | 3. **初始化** - 点击"初始化设备"按钮配置 Agent 126 | 4. **对话** - 描述你想要做什么(例如:"去美团点一杯霸王茶姬的伯牙绝弦") 127 | 5. **观察** - Agent 会逐步执行操作,每一步的思考过程和动作都会实时显示 128 | 129 | ### 手动控制模式 130 | 131 | 除了 AI 自动化,你也可以直接在实时画面上操控手机: 132 | 133 | 1. **实时画面** - 设备面板右侧显示手机屏幕的实时视频流(基于 scrcpy) 134 | 2. **点击操作** - 直接点击画面中的任意位置,操作会立即发送到手机 135 | 3. **滑动手势** - 按住鼠标拖动实现滑动操作(支持滚轮滚动) 136 | 4. **视觉反馈** - 每次操作都会显示涟漪动画和成功/失败提示 137 | 5. **精准转换** - 自动处理屏幕缩放和坐标转换,确保操作位置准确 138 | 6. **显示模式** - 支持自动、视频流、截图三种显示模式切换 139 | 140 | **技术细节**: 141 | - 使用 scrcpy 提供低延迟(~30-50ms)的 H.264 视频流 142 | - 前端自动获取设备实际分辨率(如 1080x2400) 143 | - 智能处理视频流缩放(如 576x1280)与设备分辨率的映射 144 | - 支持 letterbox 黑边的精确坐标计算 145 | - 颗粒化触摸事件支持(DOWN、MOVE、UP)实现流畅的手势操作 146 | 147 | ## 🏗️ 架构设计 148 | 149 | ### 多设备并发架构 150 | 151 | AutoGLM-GUI 采用简化的多设备并发架构,支持同时管理多个 Android 设备: 152 | 153 | **后端设计**: 154 | - 使用字典管理多个 `PhoneAgent` 实例:`agents: dict[str, PhoneAgent]` 155 | - 每个设备有独立的 `scrcpy` 视频流实例 156 | - 设备级别的锁机制,避免不同设备间的阻塞 157 | - 所有 API 接口支持 `device_id` 参数进行设备路由 158 | 159 | **前端设计**: 160 | - 使用 `Map` 管理每个设备的独立状态 161 | - 组件化设计,功能职责清晰分离: 162 | - `DeviceCard` - 单个设备信息卡片 163 | - `DeviceSidebar` - 设备列表侧边栏 164 | - `DevicePanel` - 设备操作面板(ChatBox + Screen Monitor) 165 | - 设备状态完全隔离,互不影响 166 | 167 | **核心特点**: 168 | - ✅ 无任务队列,简化设计 169 | - ✅ 无复杂调度,每个设备独立运行 170 | - ✅ 实时 WebSocket 通信,支持流式响应 171 | - ✅ 自动设备发现和状态同步(每 3 秒刷新) 172 | 173 | ## 🛠️ 开发指南 174 | 175 | ### 快速开发 176 | 177 | ```bash 178 | # 后端开发(自动重载) 179 | uv run autoglm-gui --base-url http://localhost:8080/v1 --reload 180 | 181 | # 前端开发服务器(热重载) 182 | cd frontend && pnpm dev 183 | 184 | ### 构建和打包 185 | 186 | ```bash 187 | # 仅构建前端 188 | uv run python scripts/build.py 189 | 190 | # 构建完整包 191 | uv run python scripts/build.py --pack 192 | ``` 193 | 194 | ## 📝 开源协议 195 | 196 | Apache License 2.0 197 | 198 | 199 | ### 许可证说明 200 | 201 | AutoGLM-GUI 使用 MIT 许可证。但是,它打包了 ADB Keyboard APK (`com.android.adbkeyboard`),该组件使用 GPL-2.0 许可证。ADB Keyboard 组件作为独立工具使用,不影响 AutoGLM-GUI 本身的 MIT 许可。 202 | 203 | 详见:`AutoGLM_GUI/resources/apks/ADBKeyBoard.LICENSE.txt` 204 | 205 | ## 🙏 致谢 206 | 207 | 本项目基于 [Open-AutoGLM](https://github.com/zai-org/Open-AutoGLM) 构建,感谢 zai-org 团队在 AutoGLM 上的卓越工作。 208 | -------------------------------------------------------------------------------- /phone_agent/adb/device.py: -------------------------------------------------------------------------------- 1 | """Device control utilities for Android automation.""" 2 | 3 | import subprocess 4 | import time 5 | 6 | from phone_agent.config.apps import APP_PACKAGES 7 | 8 | 9 | def get_current_app(device_id: str | None = None) -> str: 10 | """ 11 | Get the currently focused app name. 12 | 13 | Args: 14 | device_id: Optional ADB device ID for multi-device setups. 15 | 16 | Returns: 17 | The app name if recognized, otherwise "System Home". 18 | """ 19 | adb_prefix = _get_adb_prefix(device_id) 20 | 21 | result = subprocess.run( 22 | adb_prefix + ["shell", "dumpsys", "window"], capture_output=True, text=True 23 | ) 24 | output = result.stdout 25 | 26 | # Parse window focus info 27 | for line in output.split("\n"): 28 | if "mCurrentFocus" in line or "mFocusedApp" in line: 29 | for app_name, package in APP_PACKAGES.items(): 30 | if package in line: 31 | return app_name 32 | 33 | return "System Home" 34 | 35 | 36 | def tap(x: int, y: int, device_id: str | None = None, delay: float = 1.0) -> None: 37 | """ 38 | Tap at the specified coordinates. 39 | 40 | Args: 41 | x: X coordinate. 42 | y: Y coordinate. 43 | device_id: Optional ADB device ID. 44 | delay: Delay in seconds after tap. 45 | """ 46 | adb_prefix = _get_adb_prefix(device_id) 47 | 48 | subprocess.run( 49 | adb_prefix + ["shell", "input", "tap", str(x), str(y)], capture_output=True 50 | ) 51 | time.sleep(delay) 52 | 53 | 54 | def double_tap( 55 | x: int, y: int, device_id: str | None = None, delay: float = 1.0 56 | ) -> None: 57 | """ 58 | Double tap at the specified coordinates. 59 | 60 | Args: 61 | x: X coordinate. 62 | y: Y coordinate. 63 | device_id: Optional ADB device ID. 64 | delay: Delay in seconds after double tap. 65 | """ 66 | adb_prefix = _get_adb_prefix(device_id) 67 | 68 | subprocess.run( 69 | adb_prefix + ["shell", "input", "tap", str(x), str(y)], capture_output=True 70 | ) 71 | time.sleep(0.1) 72 | subprocess.run( 73 | adb_prefix + ["shell", "input", "tap", str(x), str(y)], capture_output=True 74 | ) 75 | time.sleep(delay) 76 | 77 | 78 | def long_press( 79 | x: int, 80 | y: int, 81 | duration_ms: int = 3000, 82 | device_id: str | None = None, 83 | delay: float = 1.0, 84 | ) -> None: 85 | """ 86 | Long press at the specified coordinates. 87 | 88 | Args: 89 | x: X coordinate. 90 | y: Y coordinate. 91 | duration_ms: Duration of press in milliseconds. 92 | device_id: Optional ADB device ID. 93 | delay: Delay in seconds after long press. 94 | """ 95 | adb_prefix = _get_adb_prefix(device_id) 96 | 97 | subprocess.run( 98 | adb_prefix 99 | + ["shell", "input", "swipe", str(x), str(y), str(x), str(y), str(duration_ms)], 100 | capture_output=True, 101 | ) 102 | time.sleep(delay) 103 | 104 | 105 | def swipe( 106 | start_x: int, 107 | start_y: int, 108 | end_x: int, 109 | end_y: int, 110 | duration_ms: int | None = None, 111 | device_id: str | None = None, 112 | delay: float = 1.0, 113 | ) -> None: 114 | """ 115 | Swipe from start to end coordinates. 116 | 117 | Args: 118 | start_x: Starting X coordinate. 119 | start_y: Starting Y coordinate. 120 | end_x: Ending X coordinate. 121 | end_y: Ending Y coordinate. 122 | duration_ms: Duration of swipe in milliseconds (auto-calculated if None). 123 | device_id: Optional ADB device ID. 124 | delay: Delay in seconds after swipe. 125 | """ 126 | adb_prefix = _get_adb_prefix(device_id) 127 | 128 | if duration_ms is None: 129 | # Calculate duration based on distance 130 | dist_sq = (start_x - end_x) ** 2 + (start_y - end_y) ** 2 131 | duration_ms = int(dist_sq / 1000) 132 | duration_ms = max(1000, min(duration_ms, 2000)) # Clamp between 1000-2000ms 133 | 134 | subprocess.run( 135 | adb_prefix 136 | + [ 137 | "shell", 138 | "input", 139 | "swipe", 140 | str(start_x), 141 | str(start_y), 142 | str(end_x), 143 | str(end_y), 144 | str(duration_ms), 145 | ], 146 | capture_output=True, 147 | ) 148 | time.sleep(delay) 149 | 150 | 151 | def back(device_id: str | None = None, delay: float = 1.0) -> None: 152 | """ 153 | Press the back button. 154 | 155 | Args: 156 | device_id: Optional ADB device ID. 157 | delay: Delay in seconds after pressing back. 158 | """ 159 | adb_prefix = _get_adb_prefix(device_id) 160 | 161 | subprocess.run( 162 | adb_prefix + ["shell", "input", "keyevent", "4"], capture_output=True 163 | ) 164 | time.sleep(delay) 165 | 166 | 167 | def home(device_id: str | None = None, delay: float = 1.0) -> None: 168 | """ 169 | Press the home button. 170 | 171 | Args: 172 | device_id: Optional ADB device ID. 173 | delay: Delay in seconds after pressing home. 174 | """ 175 | adb_prefix = _get_adb_prefix(device_id) 176 | 177 | subprocess.run( 178 | adb_prefix + ["shell", "input", "keyevent", "KEYCODE_HOME"], capture_output=True 179 | ) 180 | time.sleep(delay) 181 | 182 | 183 | def launch_app(app_name: str, device_id: str | None = None, delay: float = 1.0) -> bool: 184 | """ 185 | Launch an app by name. 186 | 187 | Args: 188 | app_name: The app name (must be in APP_PACKAGES). 189 | device_id: Optional ADB device ID. 190 | delay: Delay in seconds after launching. 191 | 192 | Returns: 193 | True if app was launched, False if app not found. 194 | """ 195 | if app_name not in APP_PACKAGES: 196 | return False 197 | 198 | adb_prefix = _get_adb_prefix(device_id) 199 | package = APP_PACKAGES[app_name] 200 | 201 | subprocess.run( 202 | adb_prefix 203 | + [ 204 | "shell", 205 | "monkey", 206 | "-p", 207 | package, 208 | "-c", 209 | "android.intent.category.LAUNCHER", 210 | "1", 211 | ], 212 | capture_output=True, 213 | ) 214 | time.sleep(delay) 215 | return True 216 | 217 | 218 | def _get_adb_prefix(device_id: str | None) -> list: 219 | """Get ADB command prefix with optional device specifier.""" 220 | if device_id: 221 | return ["adb", "-s", device_id] 222 | return ["adb"] 223 | -------------------------------------------------------------------------------- /phone_agent/model/client.py: -------------------------------------------------------------------------------- 1 | """Model client for AI inference using OpenAI-compatible API.""" 2 | 3 | import json 4 | from dataclasses import dataclass, field 5 | from typing import Any 6 | 7 | from openai import OpenAI 8 | 9 | 10 | @dataclass 11 | class ModelConfig: 12 | """Configuration for the AI model.""" 13 | 14 | base_url: str = "http://localhost:8000/v1" 15 | api_key: str = "EMPTY" 16 | model_name: str = "autoglm-phone-9b" 17 | max_tokens: int = 3000 18 | temperature: float = 0.0 19 | top_p: float = 0.85 20 | frequency_penalty: float = 0.2 21 | extra_body: dict[str, Any] = field(default_factory=dict) 22 | 23 | 24 | @dataclass 25 | class ModelResponse: 26 | """Response from the AI model.""" 27 | 28 | thinking: str 29 | action: str 30 | raw_content: str 31 | 32 | 33 | class ModelClient: 34 | """ 35 | Client for interacting with OpenAI-compatible vision-language models. 36 | 37 | Args: 38 | config: Model configuration. 39 | """ 40 | 41 | def __init__(self, config: ModelConfig | None = None): 42 | self.config = config or ModelConfig() 43 | self.client = OpenAI(base_url=self.config.base_url, api_key=self.config.api_key) 44 | 45 | def request(self, messages: list[dict[str, Any]]) -> ModelResponse: 46 | """ 47 | Send a request to the model. 48 | 49 | Args: 50 | messages: List of message dictionaries in OpenAI format. 51 | 52 | Returns: 53 | ModelResponse containing thinking and action. 54 | 55 | Raises: 56 | ValueError: If the response cannot be parsed. 57 | """ 58 | response = self.client.chat.completions.create( 59 | messages=messages, 60 | model=self.config.model_name, 61 | max_tokens=self.config.max_tokens, 62 | temperature=self.config.temperature, 63 | top_p=self.config.top_p, 64 | frequency_penalty=self.config.frequency_penalty, 65 | extra_body=self.config.extra_body, 66 | stream=False, 67 | ) 68 | 69 | raw_content = response.choices[0].message.content 70 | 71 | # Parse thinking and action from response 72 | thinking, action = self._parse_response(raw_content) 73 | 74 | return ModelResponse(thinking=thinking, action=action, raw_content=raw_content) 75 | 76 | def _parse_response(self, content: str) -> tuple[str, str]: 77 | """ 78 | Parse the model response into thinking and action parts. 79 | 80 | Parsing rules: 81 | 1. If content contains 'finish(message=', everything before is thinking, 82 | everything from 'finish(message=' onwards is action. 83 | 2. If rule 1 doesn't apply but content contains 'do(action=', 84 | everything before is thinking, everything from 'do(action=' onwards is action. 85 | 3. Fallback: If content contains '', use legacy parsing with XML tags. 86 | 4. Otherwise, return empty thinking and full content as action. 87 | 88 | Args: 89 | content: Raw response content. 90 | 91 | Returns: 92 | Tuple of (thinking, action). 93 | """ 94 | # Rule 1: Check for finish(message= 95 | if "finish(message=" in content: 96 | parts = content.split("finish(message=", 1) 97 | thinking = parts[0].strip() 98 | action = "finish(message=" + parts[1] 99 | return thinking, action 100 | 101 | # Rule 2: Check for do(action= 102 | if "do(action=" in content: 103 | parts = content.split("do(action=", 1) 104 | thinking = parts[0].strip() 105 | action = "do(action=" + parts[1] 106 | return thinking, action 107 | 108 | # Rule 3: Fallback to legacy XML tag parsing 109 | if "" in content: 110 | parts = content.split("", 1) 111 | thinking = parts[0].replace("", "").replace("", "").strip() 112 | action = parts[1].replace("", "").strip() 113 | return thinking, action 114 | 115 | # Rule 4: No markers found, return content as action 116 | return "", content 117 | 118 | 119 | class MessageBuilder: 120 | """Helper class for building conversation messages.""" 121 | 122 | @staticmethod 123 | def create_system_message(content: str) -> dict[str, Any]: 124 | """Create a system message.""" 125 | return {"role": "system", "content": content} 126 | 127 | @staticmethod 128 | def create_user_message( 129 | text: str, image_base64: str | None = None 130 | ) -> dict[str, Any]: 131 | """ 132 | Create a user message with optional image. 133 | 134 | Args: 135 | text: Text content. 136 | image_base64: Optional base64-encoded image. 137 | 138 | Returns: 139 | Message dictionary. 140 | """ 141 | content = [] 142 | 143 | if image_base64: 144 | content.append( 145 | { 146 | "type": "image_url", 147 | "image_url": {"url": f"data:image/png;base64,{image_base64}"}, 148 | } 149 | ) 150 | 151 | content.append({"type": "text", "text": text}) 152 | 153 | return {"role": "user", "content": content} 154 | 155 | @staticmethod 156 | def create_assistant_message(content: str) -> dict[str, Any]: 157 | """Create an assistant message.""" 158 | return {"role": "assistant", "content": content} 159 | 160 | @staticmethod 161 | def remove_images_from_message(message: dict[str, Any]) -> dict[str, Any]: 162 | """ 163 | Remove image content from a message to save context space. 164 | 165 | Args: 166 | message: Message dictionary. 167 | 168 | Returns: 169 | Message with images removed. 170 | """ 171 | if isinstance(message.get("content"), list): 172 | message["content"] = [ 173 | item for item in message["content"] if item.get("type") == "text" 174 | ] 175 | return message 176 | 177 | @staticmethod 178 | def build_screen_info(current_app: str, **extra_info) -> str: 179 | """ 180 | Build screen info string for the model. 181 | 182 | Args: 183 | current_app: Current app name. 184 | **extra_info: Additional info to include. 185 | 186 | Returns: 187 | JSON string with screen info. 188 | """ 189 | info = {"current_app": current_app, **extra_info} 190 | return json.dumps(info, ensure_ascii=False) 191 | -------------------------------------------------------------------------------- /AutoGLM_GUI/__main__.py: -------------------------------------------------------------------------------- 1 | """CLI entry point for AutoGLM-GUI.""" 2 | 3 | import argparse 4 | import sys 5 | import socket 6 | import threading 7 | import time 8 | import webbrowser 9 | 10 | from AutoGLM_GUI import __version__ 11 | 12 | # Default configuration 13 | DEFAULT_MODEL_NAME = "autoglm-phone-9b" 14 | 15 | 16 | def find_available_port( 17 | start_port: int = 8000, max_attempts: int = 100, host: str = "127.0.0.1" 18 | ) -> int: 19 | """Find an available port starting from start_port. 20 | 21 | Args: 22 | start_port: Port to start searching from 23 | max_attempts: Maximum number of ports to try 24 | host: Host to bind to (default: 127.0.0.1) 25 | 26 | Returns: 27 | An available port number 28 | 29 | Raises: 30 | RuntimeError: If no available port found within max_attempts 31 | """ 32 | for port in range(start_port, start_port + max_attempts): 33 | try: 34 | # Try to bind to the port 35 | with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: 36 | s.bind((host, port)) 37 | return port 38 | except OSError: 39 | # Port is in use, try next one 40 | continue 41 | 42 | raise RuntimeError( 43 | f"Could not find available port in range {start_port}-{start_port + max_attempts - 1}" 44 | ) 45 | 46 | 47 | def open_browser(host: str, port: int, delay: float = 1.5) -> None: 48 | """Open browser after a delay to ensure server is ready. 49 | 50 | Args: 51 | host: Server host 52 | port: Server port 53 | delay: Delay in seconds before opening browser 54 | """ 55 | 56 | def _open(): 57 | time.sleep(delay) 58 | url = ( 59 | f"http://127.0.0.1:{port}" if host == "0.0.0.0" else f"http://{host}:{port}" 60 | ) 61 | try: 62 | webbrowser.open(url) 63 | except Exception as e: 64 | # Non-critical failure, just log it 65 | print(f"Could not open browser automatically: {e}", file=sys.stderr) 66 | 67 | thread = threading.Thread(target=_open, daemon=True) 68 | thread.start() 69 | 70 | 71 | def main() -> None: 72 | """Start the AutoGLM-GUI server.""" 73 | parser = argparse.ArgumentParser( 74 | description="AutoGLM-GUI - Web GUI for AutoGLM Phone Agent" 75 | ) 76 | parser.add_argument( 77 | "--base-url", 78 | required=False, 79 | help="Base URL of the model API (e.g., http://localhost:8080/v1)", 80 | ) 81 | parser.add_argument( 82 | "--model", 83 | default=None, 84 | help=f"Model name to use (default: {DEFAULT_MODEL_NAME}, or from config file)", 85 | ) 86 | parser.add_argument( 87 | "--apikey", 88 | default=None, 89 | help="API key for the model API (default: from AUTOGLM_API_KEY or unset)", 90 | ) 91 | parser.add_argument( 92 | "--host", 93 | default="127.0.0.1", 94 | help="Host to bind the server to (default: 127.0.0.1)", 95 | ) 96 | parser.add_argument( 97 | "--port", 98 | type=int, 99 | default=None, 100 | help="Port to bind the server to (default: auto-find starting from 8000)", 101 | ) 102 | parser.add_argument( 103 | "--reload", 104 | action="store_true", 105 | help="Enable auto-reload for development", 106 | ) 107 | parser.add_argument( 108 | "--no-browser", 109 | action="store_true", 110 | help="Do not open browser automatically", 111 | ) 112 | parser.add_argument( 113 | "--log-level", 114 | choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], 115 | default="INFO", 116 | help="Console log level (default: INFO)", 117 | ) 118 | parser.add_argument( 119 | "--log-file", 120 | default="logs/autoglm_{time:YYYY-MM-DD}.log", 121 | help="Log file path (default: logs/autoglm_{time:YYYY-MM-DD}.log)", 122 | ) 123 | parser.add_argument( 124 | "--no-log-file", 125 | action="store_true", 126 | help="Disable file logging", 127 | ) 128 | 129 | args = parser.parse_args() 130 | 131 | # Auto-find available port if not specified 132 | if args.port is None: 133 | try: 134 | args.port = find_available_port(start_port=8000, host=args.host) 135 | print(f"\nAuto-detected available port: {args.port}\n") 136 | except RuntimeError as e: 137 | print(f"\nError: {e}", file=sys.stderr) 138 | sys.exit(1) 139 | 140 | import uvicorn 141 | 142 | from AutoGLM_GUI import server 143 | from AutoGLM_GUI.config import config 144 | from AutoGLM_GUI.config_manager import config_manager 145 | from AutoGLM_GUI.logger import configure_logger 146 | 147 | # Configure logging system 148 | configure_logger( 149 | console_level=args.log_level, 150 | log_file=None if args.no_log_file else args.log_file, 151 | ) 152 | 153 | # ==================== 配置系统初始化 ==================== 154 | # 使用统一配置管理器(四层优先级:CLI > ENV > FILE > DEFAULT) 155 | 156 | # 1. 设置 CLI 参数配置(最高优先级) 157 | config_manager.set_cli_config( 158 | base_url=args.base_url, model_name=args.model, api_key=args.apikey 159 | ) 160 | 161 | # 2. 加载环境变量配置 162 | config_manager.load_env_config() 163 | 164 | # 3. 加载配置文件 165 | config_manager.load_file_config() 166 | 167 | # 4. 获取合并后的有效配置 168 | effective_config = config_manager.get_effective_config() 169 | 170 | # 5. 同步到环境变量(reload 模式需要) 171 | config_manager.sync_to_env() 172 | 173 | # 6. 刷新旧的 config 对象(保持现有代码兼容) 174 | config.refresh_from_env() 175 | 176 | # 获取配置来源 177 | config_source = config_manager.get_config_source() 178 | 179 | # Display startup banner 180 | print() 181 | print("=" * 50) 182 | print(" AutoGLM-GUI - Phone Agent Web Interface") 183 | print("=" * 50) 184 | print(f" Version: {__version__}") 185 | print() 186 | print(f" Server: http://{args.host}:{args.port}") 187 | print() 188 | print(" Model Configuration:") 189 | print(f" Source: {config_source.value}") 190 | print(f" Base URL: {effective_config.base_url or '(not set)'}") 191 | print(f" Model: {effective_config.model_name}") 192 | if effective_config.api_key != "EMPTY": 193 | print(" API Key: (configured)") 194 | print() 195 | 196 | # Warning if base_url is not configured 197 | if not effective_config.base_url: 198 | print(" [!] WARNING: base_url is not configured!") 199 | print(" Please configure via frontend or use --base-url") 200 | print() 201 | 202 | print("=" * 50) 203 | print(" Press Ctrl+C to stop") 204 | print("=" * 50) 205 | print() 206 | 207 | # Open browser automatically unless disabled 208 | if not args.no_browser: 209 | open_browser(args.host, args.port) 210 | 211 | uvicorn.run( 212 | server.app if not args.reload else "AutoGLM_GUI.server:app", 213 | host=args.host, 214 | port=args.port, 215 | reload=args.reload, 216 | ) 217 | 218 | 219 | if __name__ == "__main__": 220 | main() 221 | -------------------------------------------------------------------------------- /frontend/src/components/DeviceSidebar.tsx: -------------------------------------------------------------------------------- 1 | import React, { useState, useEffect } from 'react'; 2 | import { DeviceCard } from './DeviceCard'; 3 | import type { Device } from '../api'; 4 | 5 | // 初始状态从 localStorage 读取 6 | const getInitialCollapsedState = (): boolean => { 7 | try { 8 | const saved = localStorage.getItem('sidebar-collapsed'); 9 | return saved !== null ? JSON.parse(saved) : false; 10 | } catch (error) { 11 | console.warn('Failed to load sidebar collapsed state:', error); 12 | return false; 13 | } 14 | }; 15 | 16 | interface DeviceSidebarProps { 17 | devices: Device[]; 18 | currentDeviceId: string; 19 | onSelectDevice: (deviceId: string) => void; 20 | onOpenConfig: () => void; 21 | onConnectWifi: (deviceId: string) => void; 22 | onDisconnectWifi: (deviceId: string) => void; 23 | } 24 | 25 | export function DeviceSidebar({ 26 | devices, 27 | currentDeviceId, 28 | onSelectDevice, 29 | onOpenConfig, 30 | onConnectWifi, 31 | onDisconnectWifi, 32 | }: DeviceSidebarProps) { 33 | const [isCollapsed, setIsCollapsed] = useState(getInitialCollapsedState); 34 | 35 | useEffect(() => { 36 | localStorage.setItem('sidebar-collapsed', JSON.stringify(isCollapsed)); 37 | }, [isCollapsed]); 38 | 39 | // 键盘快捷键支持 40 | useEffect(() => { 41 | const handleKeyDown = (event: KeyboardEvent) => { 42 | if ((event.metaKey || event.ctrlKey) && event.key === 'b') { 43 | event.preventDefault(); 44 | setIsCollapsed(!isCollapsed); 45 | } 46 | }; 47 | window.addEventListener('keydown', handleKeyDown); 48 | return () => window.removeEventListener('keydown', handleKeyDown); 49 | }, [isCollapsed]); 50 | 51 | const toggleCollapse = () => { 52 | setIsCollapsed(!isCollapsed); 53 | }; 54 | 55 | return ( 56 | <> 57 | {/* 半圆形展开按钮(当侧边栏隐藏时显示) */} 58 | {isCollapsed && ( 59 | 78 | )} 79 | 80 | {/* 侧边栏主体 */} 81 |
84 | {/* 头部 */} 85 |
86 |
87 |

88 | 94 | 100 | 101 | 设备列表 102 |

103 |

104 | 共 {devices.length} 个设备 105 |

106 |
107 | 108 | 127 |
128 | 129 | {/* 设备列表 */} 130 |
131 | {devices.length === 0 ? ( 132 |
133 | 139 | 145 | 146 |

未检测到设备

147 |

请连接 ADB 设备

148 |
149 | ) : ( 150 | devices.map(device => ( 151 | onSelectDevice(device.id)} 160 | onConnectWifi={async () => { 161 | await onConnectWifi(device.id); 162 | }} 163 | onDisconnectWifi={async () => { 164 | await onDisconnectWifi(device.id); 165 | }} 166 | /> 167 | )) 168 | )} 169 |
170 | 171 | {/* 底部操作栏 */} 172 |
173 | 198 |
199 |
200 | 201 | ); 202 | } 203 | -------------------------------------------------------------------------------- /.github/workflows/pr-lint.yml: -------------------------------------------------------------------------------- 1 | name: PR Lint & Format Check 2 | 3 | on: 4 | pull_request: 5 | types: [opened, synchronize, reopened, ready_for_review] 6 | branches: [main, master] 7 | 8 | permissions: 9 | contents: read 10 | pull-requests: write # 用于在 PR 中评论结果 11 | 12 | jobs: 13 | lint-and-format: 14 | name: Lint & Format Check 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - name: Checkout code 19 | uses: actions/checkout@v4 20 | 21 | - name: Set up Python 22 | uses: actions/setup-python@v4 23 | with: 24 | python-version: '3.11' 25 | 26 | - name: Set up Node.js 27 | uses: actions/setup-node@v4 28 | with: 29 | node-version: '18' 30 | 31 | - name: Set up pnpm 32 | uses: pnpm/action-setup@v3 33 | with: 34 | version: 10 35 | 36 | - name: Get pnpm store directory 37 | shell: bash 38 | run: | 39 | echo "STORE_PATH=$(pnpm store path --silent)" >> $GITHUB_ENV 40 | 41 | - name: Set up pnpm cache 42 | uses: actions/cache@v4 43 | with: 44 | path: ${{ env.STORE_PATH }} 45 | key: ${{ runner.os }}-pnpm-store-${{ hashFiles('**/pnpm-lock.yaml') }} 46 | restore-keys: | 47 | ${{ runner.os }}-pnpm-store- 48 | 49 | - name: Install uv 50 | uses: astral-sh/setup-uv@v3 51 | with: 52 | version: "latest" 53 | 54 | - name: Install backend dependencies 55 | run: | 56 | echo "📦 Installing backend dependencies..." 57 | uv sync --dev 58 | 59 | - name: Install frontend dependencies 60 | run: | 61 | echo "📦 Installing frontend dependencies..." 62 | cd frontend 63 | pnpm install 64 | cd .. 65 | 66 | - name: Run unified lint script (check-only mode) 67 | run: | 68 | echo "🚀 Running unified lint script in check-only mode..." 69 | uv run python scripts/lint.py --check-only 70 | 71 | - name: Run backend lint and format check 72 | if: always() 73 | run: | 74 | echo "🐍 Checking backend Python code..." 75 | uv run ruff check --output-format=github 76 | 77 | echo "🎨 Checking backend Python format..." 78 | uv run ruff format --check --diff 79 | 80 | - name: Run frontend lint and format check 81 | if: always() 82 | run: | 83 | echo "📱 Checking frontend JavaScript/TypeScript code..." 84 | cd frontend 85 | pnpm lint 86 | 87 | echo "🎨 Checking frontend format..." 88 | pnpm format:check 89 | 90 | echo "🔷 Running TypeScript type check..." 91 | pnpm type-check 92 | cd .. 93 | 94 | - name: Check for uncommitted changes 95 | if: always() 96 | run: | 97 | if [ -n "$(git status --porcelain)" ]; then 98 | echo "❌ There are uncommitted changes after running formatters!" 99 | echo "Please run the following locally to fix:" 100 | echo " uv run python scripts/lint.py" 101 | echo "" 102 | echo "Or run the individual commands:" 103 | echo " Backend: uv run ruff check --fix && uv run ruff format" 104 | echo " Frontend: cd frontend && pnpm lint --fix && pnpm format" 105 | echo "" 106 | echo "Files with changes:" 107 | git status --porcelain 108 | exit 1 109 | else 110 | echo "✅ No formatting changes detected" 111 | fi 112 | 113 | - name: Comment PR with results 114 | # 只在非 fork PR 时尝试评论(fork PR 没有写权限) 115 | if: always() && github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository 116 | continue-on-error: true # 即使评论失败也不阻塞 workflow 117 | uses: actions/github-script@v7 118 | with: 119 | script: | 120 | const { data: comments } = await github.rest.issues.listComments({ 121 | owner: context.repo.owner, 122 | repo: context.repo.repo, 123 | issue_number: context.issue.number, 124 | }); 125 | 126 | const botComment = comments.find(comment => 127 | comment.user.type === 'Bot' && 128 | comment.body.includes('🚀 Lint & Format Check Results') 129 | ); 130 | 131 | const status = '${{ job.status }}' === 'success' ? '✅ PASSED' : '❌ FAILED'; 132 | const color = '${{ job.status }}' === 'success' ? 'green' : 'red'; 133 | 134 | const commentBody = ` 135 | ## 🚀 Lint & Format Check Results: ${status} 136 | 137 | ### What was checked: 138 | - 🐍 **Backend**: Ruff linting + formatting 139 | - 📱 **Frontend**: ESLint + Prettier + TypeScript types 140 | - 🔧 **Unified**: Custom lint script validation 141 | 142 | ${'${{ job.status }}' === 'success' ? ` 143 | ### ✅ All checks passed! 144 | Your code follows the project's style guidelines. 145 | 146 | ### Checks performed: 147 | - Backend Ruff lint ✅ 148 | - Backend Ruff format ✅ 149 | - Frontend ESLint ✅ 150 | - Frontend Prettier format ✅ 151 | - Frontend TypeScript types ✅ 152 | - Unified lint script ✅ 153 | ` : ` 154 | ### ❌ Some checks failed! 155 | 156 | #### How to fix: 157 | 1. Run locally to auto-fix most issues: 158 | \`\`\`bash 159 | uv run python scripts/lint.py 160 | \`\`\` 161 | 162 | 2. Commit the fixes and push: 163 | \`\`\`bash 164 | git add . 165 | git commit -m "fix: apply lint and format fixes" 166 | git push 167 | \`\`\` 168 | 169 | 3. The checks will run again automatically. 170 | 171 | #### Individual fix commands: 172 | **Backend:** 173 | \`\`\`bash 174 | uv run ruff check --fix 175 | uv run ruff format 176 | \`\`\` 177 | 178 | **Frontend:** 179 | \`\`\`bash 180 | cd frontend 181 | pnpm lint --fix 182 | pnpm format 183 | cd .. 184 | \`\`\` 185 | `} 186 | 187 | --- 188 | *This is an automated check. For questions, ask in the PR discussion.* 189 | `; 190 | 191 | if (botComment) { 192 | await github.rest.issues.updateComment({ 193 | owner: context.repo.owner, 194 | repo: context.repo.repo, 195 | comment_id: botComment.id, 196 | body: commentBody, 197 | }); 198 | } else { 199 | await github.rest.issues.createComment({ 200 | owner: context.repo.owner, 201 | repo: context.repo.repo, 202 | issue_number: context.issue.number, 203 | body: commentBody, 204 | }); 205 | } 206 | 207 | # 构建任务 - 仅在 lint 通过后运行 208 | build: 209 | name: Build Check 210 | runs-on: ubuntu-latest 211 | needs: lint-and-format 212 | if: always() && needs.lint-and-format.result == 'success' 213 | 214 | steps: 215 | - name: Checkout code 216 | uses: actions/checkout@v4 217 | 218 | - name: Set up Node.js 219 | uses: actions/setup-node@v4 220 | with: 221 | node-version: '18' 222 | 223 | - name: Set up pnpm 224 | uses: pnpm/action-setup@v3 225 | with: 226 | version: 10 227 | 228 | - name: Get pnpm store directory 229 | shell: bash 230 | run: | 231 | echo "STORE_PATH=$(pnpm store path --silent)" >> $GITHUB_ENV 232 | 233 | - name: Set up pnpm cache 234 | uses: actions/cache@v4 235 | with: 236 | path: ${{ env.STORE_PATH }} 237 | key: ${{ runner.os }}-pnpm-store-${{ hashFiles('**/pnpm-lock.yaml') }} 238 | restore-keys: | 239 | ${{ runner.os }}-pnpm-store- 240 | 241 | - name: Install frontend dependencies 242 | run: | 243 | cd frontend 244 | pnpm install 245 | 246 | - name: Build frontend 247 | run: | 248 | cd frontend 249 | pnpm build 250 | 251 | - name: Upload build artifacts 252 | uses: actions/upload-artifact@v4 253 | with: 254 | name: frontend-dist 255 | path: frontend/dist/ 256 | retention-days: 7 257 | -------------------------------------------------------------------------------- /phone_agent/agent.py: -------------------------------------------------------------------------------- 1 | """Main PhoneAgent class for orchestrating phone automation.""" 2 | 3 | import json 4 | import traceback 5 | from dataclasses import dataclass 6 | from typing import Any, Callable 7 | 8 | from phone_agent.actions import ActionHandler 9 | from phone_agent.actions.handler import finish, parse_action 10 | from phone_agent.adb import get_current_app, get_screenshot 11 | from phone_agent.config import get_messages, get_system_prompt 12 | from phone_agent.model import ModelClient, ModelConfig 13 | from phone_agent.model.client import MessageBuilder 14 | 15 | 16 | @dataclass 17 | class AgentConfig: 18 | """Configuration for the PhoneAgent.""" 19 | 20 | max_steps: int = 100 21 | device_id: str | None = None 22 | lang: str = "cn" 23 | system_prompt: str | None = None 24 | verbose: bool = True 25 | 26 | def __post_init__(self): 27 | if self.system_prompt is None: 28 | self.system_prompt = get_system_prompt(self.lang) 29 | 30 | 31 | @dataclass 32 | class StepResult: 33 | """Result of a single agent step.""" 34 | 35 | success: bool 36 | finished: bool 37 | action: dict[str, Any] | None 38 | thinking: str 39 | message: str | None = None 40 | 41 | 42 | class PhoneAgent: 43 | """ 44 | AI-powered agent for automating Android phone interactions. 45 | 46 | The agent uses a vision-language model to understand screen content 47 | and decide on actions to complete user tasks. 48 | 49 | Args: 50 | model_config: Configuration for the AI model. 51 | agent_config: Configuration for the agent behavior. 52 | confirmation_callback: Optional callback for sensitive action confirmation. 53 | takeover_callback: Optional callback for takeover requests. 54 | 55 | Example: 56 | >>> from phone_agent import PhoneAgent 57 | >>> from phone_agent.model import ModelConfig 58 | >>> 59 | >>> model_config = ModelConfig(base_url="http://localhost:8000/v1") 60 | >>> agent = PhoneAgent(model_config) 61 | >>> agent.run("Open WeChat and send a message to John") 62 | """ 63 | 64 | def __init__( 65 | self, 66 | model_config: ModelConfig | None = None, 67 | agent_config: AgentConfig | None = None, 68 | confirmation_callback: Callable[[str], bool] | None = None, 69 | takeover_callback: Callable[[str], None] | None = None, 70 | ): 71 | self.model_config = model_config or ModelConfig() 72 | self.agent_config = agent_config or AgentConfig() 73 | 74 | self.model_client = ModelClient(self.model_config) 75 | self.action_handler = ActionHandler( 76 | device_id=self.agent_config.device_id, 77 | confirmation_callback=confirmation_callback, 78 | takeover_callback=takeover_callback, 79 | ) 80 | 81 | self._context: list[dict[str, Any]] = [] 82 | self._step_count = 0 83 | 84 | def run(self, task: str) -> str: 85 | """ 86 | Run the agent to complete a task. 87 | 88 | Args: 89 | task: Natural language description of the task. 90 | 91 | Returns: 92 | Final message from the agent. 93 | """ 94 | self._context = [] 95 | self._step_count = 0 96 | 97 | # First step with user prompt 98 | result = self._execute_step(task, is_first=True) 99 | 100 | if result.finished: 101 | return result.message or "Task completed" 102 | 103 | # Continue until finished or max steps reached 104 | while self._step_count < self.agent_config.max_steps: 105 | result = self._execute_step(is_first=False) 106 | 107 | if result.finished: 108 | return result.message or "Task completed" 109 | 110 | return "Max steps reached" 111 | 112 | def step(self, task: str | None = None) -> StepResult: 113 | """ 114 | Execute a single step of the agent. 115 | 116 | Useful for manual control or debugging. 117 | 118 | Args: 119 | task: Task description (only needed for first step). 120 | 121 | Returns: 122 | StepResult with step details. 123 | """ 124 | is_first = len(self._context) == 0 125 | 126 | if is_first and not task: 127 | raise ValueError("Task is required for the first step") 128 | 129 | return self._execute_step(task, is_first) 130 | 131 | def reset(self) -> None: 132 | """Reset the agent state for a new task.""" 133 | self._context = [] 134 | self._step_count = 0 135 | 136 | def _execute_step( 137 | self, user_prompt: str | None = None, is_first: bool = False 138 | ) -> StepResult: 139 | """Execute a single step of the agent loop.""" 140 | self._step_count += 1 141 | 142 | # Capture current screen state 143 | screenshot = get_screenshot(self.agent_config.device_id) 144 | current_app = get_current_app(self.agent_config.device_id) 145 | 146 | # Build messages 147 | if is_first: 148 | self._context.append( 149 | MessageBuilder.create_system_message(self.agent_config.system_prompt) 150 | ) 151 | 152 | screen_info = MessageBuilder.build_screen_info(current_app) 153 | text_content = f"{user_prompt}\n\n{screen_info}" 154 | 155 | self._context.append( 156 | MessageBuilder.create_user_message( 157 | text=text_content, image_base64=screenshot.base64_data 158 | ) 159 | ) 160 | else: 161 | screen_info = MessageBuilder.build_screen_info(current_app) 162 | text_content = f"** Screen Info **\n\n{screen_info}" 163 | 164 | self._context.append( 165 | MessageBuilder.create_user_message( 166 | text=text_content, image_base64=screenshot.base64_data 167 | ) 168 | ) 169 | 170 | # Get model response 171 | try: 172 | response = self.model_client.request(self._context) 173 | except Exception as e: 174 | if self.agent_config.verbose: 175 | traceback.print_exc() 176 | return StepResult( 177 | success=False, 178 | finished=True, 179 | action=None, 180 | thinking="", 181 | message=f"Model error: {e}", 182 | ) 183 | 184 | # Parse action from response 185 | try: 186 | action = parse_action(response.action) 187 | except ValueError: 188 | if self.agent_config.verbose: 189 | traceback.print_exc() 190 | action = finish(message=response.action) 191 | 192 | if self.agent_config.verbose: 193 | # Print thinking process 194 | msgs = get_messages(self.agent_config.lang) 195 | print("\n" + "=" * 50) 196 | print(f"💭 {msgs['thinking']}:") 197 | print("-" * 50) 198 | print(response.thinking) 199 | print("-" * 50) 200 | print(f"🎯 {msgs['action']}:") 201 | print(json.dumps(action, ensure_ascii=False, indent=2)) 202 | print("=" * 50 + "\n") 203 | 204 | # Remove image from context to save space 205 | self._context[-1] = MessageBuilder.remove_images_from_message(self._context[-1]) 206 | 207 | # Execute action 208 | try: 209 | result = self.action_handler.execute( 210 | action, screenshot.width, screenshot.height 211 | ) 212 | except Exception as e: 213 | if self.agent_config.verbose: 214 | traceback.print_exc() 215 | result = self.action_handler.execute( 216 | finish(message=str(e)), screenshot.width, screenshot.height 217 | ) 218 | 219 | # Add assistant response to context 220 | self._context.append( 221 | MessageBuilder.create_assistant_message( 222 | f"{response.thinking}{response.action}" 223 | ) 224 | ) 225 | 226 | # Check if finished 227 | finished = action.get("_metadata") == "finish" or result.should_finish 228 | 229 | if finished and self.agent_config.verbose: 230 | msgs = get_messages(self.agent_config.lang) 231 | print("\n" + "🎉 " + "=" * 48) 232 | print( 233 | f"✅ {msgs['task_completed']}: {result.message or action.get('message', msgs['done'])}" 234 | ) 235 | print("=" * 50 + "\n") 236 | 237 | return StepResult( 238 | success=result.success, 239 | finished=finished, 240 | action=action, 241 | thinking=response.thinking, 242 | message=result.message or action.get("message"), 243 | ) 244 | 245 | @property 246 | def context(self) -> list[dict[str, Any]]: 247 | """Get the current conversation context.""" 248 | return self._context.copy() 249 | 250 | @property 251 | def step_count(self) -> int: 252 | """Get the current step count.""" 253 | return self._step_count 254 | -------------------------------------------------------------------------------- /phone_agent/config/apps.py: -------------------------------------------------------------------------------- 1 | """App name to package name mapping for supported applications.""" 2 | 3 | APP_PACKAGES: dict[str, str] = { 4 | # Social & Messaging 5 | "微信": "com.tencent.mm", 6 | "QQ": "com.tencent.mobileqq", 7 | "微博": "com.sina.weibo", 8 | # E-commerce 9 | "淘宝": "com.taobao.taobao", 10 | "京东": "com.jingdong.app.mall", 11 | "拼多多": "com.xunmeng.pinduoduo", 12 | "淘宝闪购": "com.taobao.taobao", 13 | "京东秒送": "com.jingdong.app.mall", 14 | # Lifestyle & Social 15 | "小红书": "com.xingin.xhs", 16 | "豆瓣": "com.douban.frodo", 17 | "知乎": "com.zhihu.android", 18 | # Maps & Navigation 19 | "高德地图": "com.autonavi.minimap", 20 | "百度地图": "com.baidu.BaiduMap", 21 | # Food & Services 22 | "美团": "com.sankuai.meituan", 23 | "大众点评": "com.dianping.v1", 24 | "饿了么": "me.ele", 25 | "肯德基": "com.yek.android.kfc.activitys", 26 | # Travel 27 | "携程": "ctrip.android.view", 28 | "铁路12306": "com.MobileTicket", 29 | "12306": "com.MobileTicket", 30 | "去哪儿": "com.Qunar", 31 | "去哪儿旅行": "com.Qunar", 32 | "滴滴出行": "com.sdu.did.psnger", 33 | # Video & Entertainment 34 | "bilibili": "tv.danmaku.bili", 35 | "抖音": "com.ss.android.ugc.aweme", 36 | "快手": "com.smile.gifmaker", 37 | "腾讯视频": "com.tencent.qqlive", 38 | "爱奇艺": "com.qiyi.video", 39 | "优酷视频": "com.youku.phone", 40 | "芒果TV": "com.hunantv.imgo.activity", 41 | "红果短剧": "com.phoenix.read", 42 | # Music & Audio 43 | "网易云音乐": "com.netease.cloudmusic", 44 | "QQ音乐": "com.tencent.qqmusic", 45 | "汽水音乐": "com.luna.music", 46 | "喜马拉雅": "com.ximalaya.ting.android", 47 | # Reading 48 | "番茄小说": "com.dragon.read", 49 | "番茄免费小说": "com.dragon.read", 50 | "七猫免费小说": "com.kmxs.reader", 51 | # Productivity 52 | "飞书": "com.ss.android.lark", 53 | "QQ邮箱": "com.tencent.androidqqmail", 54 | # AI & Tools 55 | "豆包": "com.larus.nova", 56 | # Health & Fitness 57 | "keep": "com.gotokeep.keep", 58 | "美柚": "com.lingan.seeyou", 59 | # News & Information 60 | "腾讯新闻": "com.tencent.news", 61 | "今日头条": "com.ss.android.article.news", 62 | # Real Estate 63 | "贝壳找房": "com.lianjia.beike", 64 | "安居客": "com.anjuke.android.app", 65 | # Finance 66 | "同花顺": "com.hexin.plat.android", 67 | # Games 68 | "星穹铁道": "com.miHoYo.hkrpg", 69 | "崩坏:星穹铁道": "com.miHoYo.hkrpg", 70 | "恋与深空": "com.papegames.lysk.cn", 71 | "AndroidSystemSettings": "com.android.settings", 72 | "Android System Settings": "com.android.settings", 73 | "Android System Settings": "com.android.settings", 74 | "Android-System-Settings": "com.android.settings", 75 | "Settings": "com.android.settings", 76 | "AudioRecorder": "com.android.soundrecorder", 77 | "audiorecorder": "com.android.soundrecorder", 78 | "Bluecoins": "com.rammigsoftware.bluecoins", 79 | "bluecoins": "com.rammigsoftware.bluecoins", 80 | "Broccoli": "com.flauschcode.broccoli", 81 | "broccoli": "com.flauschcode.broccoli", 82 | "Booking.com": "com.booking", 83 | "Booking": "com.booking", 84 | "booking.com": "com.booking", 85 | "booking": "com.booking", 86 | "BOOKING.COM": "com.booking", 87 | "Chrome": "com.android.chrome", 88 | "chrome": "com.android.chrome", 89 | "Google Chrome": "com.android.chrome", 90 | "Clock": "com.android.deskclock", 91 | "clock": "com.android.deskclock", 92 | "Contacts": "com.android.contacts", 93 | "contacts": "com.android.contacts", 94 | "Duolingo": "com.duolingo", 95 | "duolingo": "com.duolingo", 96 | "Expedia": "com.expedia.bookings", 97 | "expedia": "com.expedia.bookings", 98 | "Files": "com.android.fileexplorer", 99 | "files": "com.android.fileexplorer", 100 | "File Manager": "com.android.fileexplorer", 101 | "file manager": "com.android.fileexplorer", 102 | "gmail": "com.google.android.gm", 103 | "Gmail": "com.google.android.gm", 104 | "GoogleMail": "com.google.android.gm", 105 | "Google Mail": "com.google.android.gm", 106 | "GoogleFiles": "com.google.android.apps.nbu.files", 107 | "googlefiles": "com.google.android.apps.nbu.files", 108 | "FilesbyGoogle": "com.google.android.apps.nbu.files", 109 | "GoogleCalendar": "com.google.android.calendar", 110 | "Google-Calendar": "com.google.android.calendar", 111 | "Google Calendar": "com.google.android.calendar", 112 | "google-calendar": "com.google.android.calendar", 113 | "google calendar": "com.google.android.calendar", 114 | "GoogleChat": "com.google.android.apps.dynamite", 115 | "Google Chat": "com.google.android.apps.dynamite", 116 | "Google-Chat": "com.google.android.apps.dynamite", 117 | "GoogleClock": "com.google.android.deskclock", 118 | "Google Clock": "com.google.android.deskclock", 119 | "Google-Clock": "com.google.android.deskclock", 120 | "GoogleContacts": "com.google.android.contacts", 121 | "Google-Contacts": "com.google.android.contacts", 122 | "Google Contacts": "com.google.android.contacts", 123 | "google-contacts": "com.google.android.contacts", 124 | "google contacts": "com.google.android.contacts", 125 | "GoogleDocs": "com.google.android.apps.docs.editors.docs", 126 | "Google Docs": "com.google.android.apps.docs.editors.docs", 127 | "googledocs": "com.google.android.apps.docs.editors.docs", 128 | "google docs": "com.google.android.apps.docs.editors.docs", 129 | "Google Drive": "com.google.android.apps.docs", 130 | "Google-Drive": "com.google.android.apps.docs", 131 | "google drive": "com.google.android.apps.docs", 132 | "google-drive": "com.google.android.apps.docs", 133 | "GoogleDrive": "com.google.android.apps.docs", 134 | "Googledrive": "com.google.android.apps.docs", 135 | "googledrive": "com.google.android.apps.docs", 136 | "GoogleFit": "com.google.android.apps.fitness", 137 | "googlefit": "com.google.android.apps.fitness", 138 | "GoogleKeep": "com.google.android.keep", 139 | "googlekeep": "com.google.android.keep", 140 | "GoogleMaps": "com.google.android.apps.maps", 141 | "Google Maps": "com.google.android.apps.maps", 142 | "googlemaps": "com.google.android.apps.maps", 143 | "google maps": "com.google.android.apps.maps", 144 | "Google Play Books": "com.google.android.apps.books", 145 | "Google-Play-Books": "com.google.android.apps.books", 146 | "google play books": "com.google.android.apps.books", 147 | "google-play-books": "com.google.android.apps.books", 148 | "GooglePlayBooks": "com.google.android.apps.books", 149 | "googleplaybooks": "com.google.android.apps.books", 150 | "GooglePlayStore": "com.android.vending", 151 | "Google Play Store": "com.android.vending", 152 | "Google-Play-Store": "com.android.vending", 153 | "GoogleSlides": "com.google.android.apps.docs.editors.slides", 154 | "Google Slides": "com.google.android.apps.docs.editors.slides", 155 | "Google-Slides": "com.google.android.apps.docs.editors.slides", 156 | "GoogleTasks": "com.google.android.apps.tasks", 157 | "Google Tasks": "com.google.android.apps.tasks", 158 | "Google-Tasks": "com.google.android.apps.tasks", 159 | "Joplin": "net.cozic.joplin", 160 | "joplin": "net.cozic.joplin", 161 | "McDonald": "com.mcdonalds.app", 162 | "mcdonald": "com.mcdonalds.app", 163 | "Osmand": "net.osmand", 164 | "osmand": "net.osmand", 165 | "PiMusicPlayer": "com.Project100Pi.themusicplayer", 166 | "pimusicplayer": "com.Project100Pi.themusicplayer", 167 | "Quora": "com.quora.android", 168 | "quora": "com.quora.android", 169 | "Reddit": "com.reddit.frontpage", 170 | "reddit": "com.reddit.frontpage", 171 | "RetroMusic": "code.name.monkey.retromusic", 172 | "retromusic": "code.name.monkey.retromusic", 173 | "SimpleCalendarPro": "com.scientificcalculatorplus.simplecalculator.basiccalculator.mathcalc", 174 | "SimpleSMSMessenger": "com.simplemobiletools.smsmessenger", 175 | "Telegram": "org.telegram.messenger", 176 | "temu": "com.einnovation.temu", 177 | "Temu": "com.einnovation.temu", 178 | "Tiktok": "com.zhiliaoapp.musically", 179 | "tiktok": "com.zhiliaoapp.musically", 180 | "Twitter": "com.twitter.android", 181 | "twitter": "com.twitter.android", 182 | "X": "com.twitter.android", 183 | "VLC": "org.videolan.vlc", 184 | "WeChat": "com.tencent.mm", 185 | "wechat": "com.tencent.mm", 186 | "Whatsapp": "com.whatsapp", 187 | "WhatsApp": "com.whatsapp", 188 | } 189 | 190 | 191 | def get_package_name(app_name: str) -> str | None: 192 | """ 193 | Get the package name for an app. 194 | 195 | Args: 196 | app_name: The display name of the app. 197 | 198 | Returns: 199 | The Android package name, or None if not found. 200 | """ 201 | return APP_PACKAGES.get(app_name) 202 | 203 | 204 | def get_app_name(package_name: str) -> str | None: 205 | """ 206 | Get the app name from a package name. 207 | 208 | Args: 209 | package_name: The Android package name. 210 | 211 | Returns: 212 | The display name of the app, or None if not found. 213 | """ 214 | for name, package in APP_PACKAGES.items(): 215 | if package == package_name: 216 | return name 217 | return None 218 | 219 | 220 | def list_supported_apps() -> list[str]: 221 | """ 222 | Get a list of all supported app names. 223 | 224 | Returns: 225 | List of app names. 226 | """ 227 | return list(APP_PACKAGES.keys()) 228 | -------------------------------------------------------------------------------- /frontend/src/components/DeviceCard.tsx: -------------------------------------------------------------------------------- 1 | import React, { useState } from 'react'; 2 | import { ConfirmDialog } from './ConfirmDialog'; 3 | 4 | interface DeviceCardProps { 5 | id: string; 6 | model: string; 7 | status: string; 8 | connectionType?: string; 9 | isInitialized: boolean; 10 | isActive: boolean; 11 | onClick: () => void; 12 | onConnectWifi?: () => Promise; 13 | onDisconnectWifi?: () => Promise; 14 | } 15 | 16 | export function DeviceCard({ 17 | id, 18 | model, 19 | status, 20 | connectionType, 21 | isInitialized, 22 | isActive, 23 | onClick, 24 | onConnectWifi, 25 | onDisconnectWifi, 26 | }: DeviceCardProps) { 27 | const isOnline = status === 'device'; 28 | const isUsb = connectionType === 'usb'; 29 | const isRemote = connectionType === 'remote'; 30 | const [loading, setLoading] = useState(false); 31 | const [showWifiConfirm, setShowWifiConfirm] = useState(false); 32 | const [showDisconnectConfirm, setShowDisconnectConfirm] = useState(false); 33 | 34 | const handleWifiClick = (e: React.MouseEvent) => { 35 | e.stopPropagation(); 36 | if (loading || !onConnectWifi) return; 37 | setShowWifiConfirm(true); 38 | }; 39 | 40 | const handleDisconnectClick = (e: React.MouseEvent) => { 41 | e.stopPropagation(); 42 | if (loading || !onDisconnectWifi) return; 43 | setShowDisconnectConfirm(true); 44 | }; 45 | 46 | const handleConfirmWifi = async () => { 47 | setShowWifiConfirm(false); 48 | setLoading(true); 49 | try { 50 | if (onConnectWifi) { 51 | await onConnectWifi(); 52 | } 53 | } finally { 54 | setLoading(false); 55 | } 56 | }; 57 | 58 | const handleConfirmDisconnect = async () => { 59 | setShowDisconnectConfirm(false); 60 | setLoading(true); 61 | try { 62 | if (onDisconnectWifi) { 63 | await onDisconnectWifi(); 64 | } 65 | } finally { 66 | setLoading(false); 67 | } 68 | }; 69 | 70 | return ( 71 | <> 72 |
{ 77 | if (e.key === 'Enter' || e.key === ' ') { 78 | onClick(); 79 | } 80 | }} 81 | className={`w-full text-left px-4 py-3 rounded-xl transition-all duration-300 cursor-pointer border relative group ${ 82 | isActive 83 | ? 'bg-blue-500 border-blue-500 text-white shadow-lg shadow-blue-500/20' 84 | : 'bg-white dark:bg-gray-800 border-transparent hover:bg-gray-50 dark:hover:bg-gray-700/50' 85 | }`} 86 | > 87 |
88 | {/* 状态指示器 */} 89 |
97 | 98 | {/* 设备信息 */} 99 |
100 |
101 | 106 | {model || '未知设备'} 107 | 108 |
109 | 114 | {id} 115 | 116 |
117 | 118 | {/* 操作按钮区 */} 119 |
120 | {isUsb && onConnectWifi && ( 121 | 169 | )} 170 | 171 | {isRemote && onDisconnectWifi && ( 172 | 220 | )} 221 |
222 | 223 | {/* 初始化状态标识 */} 224 | {isInitialized && ( 225 |
233 | 239 | 245 | 246 |
247 | )} 248 |
249 |
250 | 251 | setShowWifiConfirm(false)} 257 | /> 258 | 259 | setShowDisconnectConfirm(false)} 265 | /> 266 | 267 | ); 268 | } 269 | -------------------------------------------------------------------------------- /scripts/release.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Release script for AutoGLM-GUI. 3 | 4 | This script automates the release process: 5 | 1. Bumps the version in pyproject.toml and electron/package.json 6 | 2. Commits the changes with git 7 | 3. Creates a git tag for the new version 8 | 9 | Usage: 10 | uv run python scripts/release.py # Bump patch version (0.1.3 -> 0.1.4) 11 | uv run python scripts/release.py --minor # Bump minor version (0.1.3 -> 0.2.0) 12 | uv run python scripts/release.py --major # Bump major version (0.1.3 -> 1.0.0) 13 | uv run python scripts/release.py --version 1.2.3 # Set specific version 14 | """ 15 | 16 | import argparse 17 | import json 18 | import re 19 | import subprocess 20 | import sys 21 | from pathlib import Path 22 | 23 | ROOT_DIR = Path(__file__).parent.parent 24 | PYPROJECT_PATH = ROOT_DIR / "pyproject.toml" 25 | ELECTRON_PACKAGE_JSON_PATH = ROOT_DIR / "electron" / "package.json" 26 | 27 | 28 | def get_current_version() -> str: 29 | """Extract current version from pyproject.toml.""" 30 | if not PYPROJECT_PATH.exists(): 31 | print(f"Error: {PYPROJECT_PATH} not found.") 32 | sys.exit(1) 33 | 34 | content = PYPROJECT_PATH.read_text() 35 | match = re.search(r'^version\s*=\s*"([^"]+)"', content, re.MULTILINE) 36 | 37 | if not match: 38 | print("Error: Could not find version in pyproject.toml") 39 | sys.exit(1) 40 | 41 | return match.group(1) 42 | 43 | 44 | def parse_version(version: str) -> tuple[int, int, int]: 45 | """Parse version string into (major, minor, patch) tuple.""" 46 | match = re.match(r"^(\d+)\.(\d+)\.(\d+)$", version) 47 | if not match: 48 | print(f"Error: Invalid version format: {version}") 49 | sys.exit(1) 50 | 51 | return int(match.group(1)), int(match.group(2)), int(match.group(3)) 52 | 53 | 54 | def bump_version( 55 | current: str, bump_type: str = "patch", target_version: str | None = None 56 | ) -> str: 57 | """Bump version number based on bump type or return target version.""" 58 | if target_version: 59 | parse_version(target_version) 60 | return target_version 61 | 62 | major, minor, patch = parse_version(current) 63 | 64 | if bump_type == "major": 65 | return f"{major + 1}.0.0" 66 | elif bump_type == "minor": 67 | return f"{major}.{minor + 1}.0" 68 | elif bump_type == "patch": 69 | return f"{major}.{minor}.{patch + 1}" 70 | else: 71 | print(f"Error: Invalid bump type: {bump_type}") 72 | sys.exit(1) 73 | 74 | 75 | def update_pyproject_version(new_version: str) -> bool: 76 | """Update version in pyproject.toml.""" 77 | print(f"Updating pyproject.toml to version {new_version}...") 78 | 79 | content = PYPROJECT_PATH.read_text() 80 | new_content = re.sub( 81 | r'^version\s*=\s*"[^"]+"', 82 | f'version = "{new_version}"', 83 | content, 84 | flags=re.MULTILINE, 85 | ) 86 | 87 | if content == new_content: 88 | print("Error: Failed to update version in pyproject.toml") 89 | return False 90 | 91 | PYPROJECT_PATH.write_text(new_content) 92 | print(f'Updated pyproject.toml: version = "{new_version}"') 93 | return True 94 | 95 | 96 | def update_electron_package_json_version(new_version: str) -> bool: 97 | """Update version in electron/package.json.""" 98 | print(f"Updating electron/package.json to version {new_version}...") 99 | 100 | if not ELECTRON_PACKAGE_JSON_PATH.exists(): 101 | print(f"Warning: {ELECTRON_PACKAGE_JSON_PATH} not found, skipping...") 102 | return True 103 | 104 | try: 105 | # Read and parse JSON 106 | content = ELECTRON_PACKAGE_JSON_PATH.read_text(encoding="utf-8") 107 | package_data = json.loads(content) 108 | 109 | # Update version 110 | package_data["version"] = new_version 111 | 112 | # Write back with pretty formatting 113 | ELECTRON_PACKAGE_JSON_PATH.write_text( 114 | json.dumps(package_data, indent=2, ensure_ascii=False) + "\n", 115 | encoding="utf-8", 116 | ) 117 | 118 | print(f'Updated electron/package.json: "version": "{new_version}"') 119 | return True 120 | 121 | except json.JSONDecodeError as e: 122 | print(f"Error: Failed to parse {ELECTRON_PACKAGE_JSON_PATH}: {e}") 123 | return False 124 | except Exception as e: 125 | print(f"Error: Failed to update {ELECTRON_PACKAGE_JSON_PATH}: {e}") 126 | return False 127 | 128 | 129 | def git_commit_version(version: str, dry_run: bool = False) -> bool: 130 | """Commit version bumps in pyproject.toml and electron/package.json.""" 131 | print("Committing version bump to git...") 132 | 133 | if dry_run: 134 | print("[DRY RUN] Would run: git add pyproject.toml electron/package.json") 135 | print(f'[DRY RUN] Would run: git commit -m "release v{version}"') 136 | return True 137 | 138 | try: 139 | # Stage pyproject.toml and electron/package.json 140 | result = subprocess.run( 141 | ["git", "add", "pyproject.toml", "electron/package.json", "uv.lock"], 142 | cwd=ROOT_DIR, 143 | capture_output=True, 144 | text=True, 145 | ) 146 | 147 | if result.returncode != 0: 148 | print(f"Error staging files: {result.stderr}") 149 | return False 150 | 151 | # Commit the change 152 | result = subprocess.run( 153 | ["git", "commit", "-m", f"release v{version}"], 154 | cwd=ROOT_DIR, 155 | capture_output=True, 156 | text=True, 157 | ) 158 | 159 | if result.returncode != 0: 160 | print(f"Error creating commit: {result.stderr}") 161 | return False 162 | 163 | print(f"Committed: release v{version}") 164 | return True 165 | 166 | except Exception as e: 167 | print(f"Error: {e}") 168 | return False 169 | 170 | 171 | def create_git_tag(version: str, dry_run: bool = False) -> bool: 172 | """Create git tag.""" 173 | tag_name = f"v{version}" 174 | 175 | print(f"Creating git tag: {tag_name}...") 176 | 177 | if dry_run: 178 | print(f"[DRY RUN] Would create tag: {tag_name}") 179 | return True 180 | 181 | try: 182 | result = subprocess.run( 183 | ["git", "tag", "-a", tag_name, "-m", f"release {tag_name}"], 184 | cwd=ROOT_DIR, 185 | capture_output=True, 186 | text=True, 187 | ) 188 | 189 | if result.returncode != 0: 190 | print(f"Error creating tag: {result.stderr}") 191 | return False 192 | 193 | print(f"Created tag: {tag_name}") 194 | return True 195 | 196 | except Exception as e: 197 | print(f"Error: {e}") 198 | return False 199 | 200 | 201 | def run_uv_sync() -> bool: 202 | """Run uv sync to synchronize dependencies.""" 203 | print("Running uv sync...") 204 | 205 | try: 206 | result = subprocess.run( 207 | ["uv", "sync"], 208 | cwd=ROOT_DIR, 209 | capture_output=True, 210 | text=True, 211 | ) 212 | 213 | if result.returncode != 0: 214 | print(f"Error running uv sync: {result.stderr}") 215 | return False 216 | 217 | print("Dependencies synchronized successfully.") 218 | return True 219 | 220 | except Exception as e: 221 | print(f"Error: {e}") 222 | return False 223 | 224 | 225 | def main() -> int: 226 | """Main release process.""" 227 | parser = argparse.ArgumentParser( 228 | description="Release AutoGLM-GUI with version bump" 229 | ) 230 | bump_group = parser.add_mutually_exclusive_group() 231 | bump_group.add_argument( 232 | "--major", action="store_true", help="Bump major version (X.0.0)" 233 | ) 234 | bump_group.add_argument( 235 | "--minor", action="store_true", help="Bump minor version (x.X.0)" 236 | ) 237 | bump_group.add_argument( 238 | "--patch", action="store_true", help="Bump patch version (x.x.X) [default]" 239 | ) 240 | bump_group.add_argument( 241 | "--version", type=str, help="Set specific version (e.g., 1.2.3)" 242 | ) 243 | parser.add_argument( 244 | "--dry-run", 245 | action="store_true", 246 | help="Show what would be done without making changes", 247 | ) 248 | 249 | args = parser.parse_args() 250 | 251 | print("=" * 50) 252 | print("AutoGLM-GUI Release Script") 253 | print("=" * 50) 254 | 255 | current_version = get_current_version() 256 | print(f"Current version: {current_version}") 257 | 258 | if args.major: 259 | bump_type = "major" 260 | elif args.minor: 261 | bump_type = "minor" 262 | else: 263 | bump_type = "patch" 264 | 265 | new_version = bump_version(current_version, bump_type, args.version) 266 | print(f"New version: {new_version}") 267 | print() 268 | 269 | if args.dry_run: 270 | print("[DRY RUN] No changes will be made") 271 | print() 272 | 273 | if not args.dry_run: 274 | # Update pyproject.toml 275 | if not update_pyproject_version(new_version): 276 | return 1 277 | 278 | # Update electron/package.json 279 | if not update_electron_package_json_version(new_version): 280 | return 1 281 | print() 282 | 283 | ## run uv sync 284 | if not args.dry_run: 285 | if not run_uv_sync(): 286 | return 1 287 | 288 | if not git_commit_version(new_version, dry_run=args.dry_run): 289 | return 1 290 | print() 291 | 292 | if not create_git_tag(new_version, dry_run=args.dry_run): 293 | return 1 294 | 295 | print() 296 | print("=" * 50) 297 | if args.dry_run: 298 | print("Dry run completed!") 299 | else: 300 | print("Release completed successfully!") 301 | print() 302 | print("Next steps:") 303 | print(" 1. Push changes: git push && git push origin v" + new_version) 304 | print(" 2. Build package: uv run python scripts/build.py --pack") 305 | print(" 3. Publish to PyPI: uv publish") 306 | print("=" * 50) 307 | 308 | return 0 309 | 310 | 311 | if __name__ == "__main__": 312 | sys.exit(main()) 313 | -------------------------------------------------------------------------------- /scripts/build_electron.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | AutoGLM-GUI Electron 一键构建脚本 4 | 5 | 功能: 6 | 1. 检查环境依赖 7 | 2. 同步 Python 开发依赖 8 | 3. 构建前端 9 | 4. 下载 ADB 工具 10 | 5. 打包 Python 后端 11 | 6. 构建 Electron 应用 12 | 13 | 用法: 14 | uv run python scripts/build_electron.py [--skip-frontend] [--skip-adb] [--skip-backend] 15 | """ 16 | 17 | import argparse 18 | import platform 19 | import shutil 20 | import subprocess 21 | import sys 22 | from pathlib import Path 23 | 24 | # 修复 Windows 编码问题 25 | if sys.platform == "win32": 26 | import codecs 27 | 28 | sys.stdout = codecs.getwriter("utf-8")(sys.stdout.buffer, "strict") 29 | sys.stderr = codecs.getwriter("utf-8")(sys.stderr.buffer, "strict") 30 | 31 | 32 | class Color: 33 | """终端颜色""" 34 | 35 | RESET = "\033[0m" 36 | BOLD = "\033[1m" 37 | RED = "\033[91m" 38 | GREEN = "\033[92m" 39 | YELLOW = "\033[93m" 40 | BLUE = "\033[94m" 41 | CYAN = "\033[96m" 42 | 43 | 44 | def print_step(step: str, total: int, current: int): 45 | """打印步骤信息""" 46 | print(f"\n{Color.CYAN}{Color.BOLD}[{current}/{total}] {step}{Color.RESET}") 47 | print("=" * 60) 48 | 49 | 50 | def print_success(message: str): 51 | """打印成功信息""" 52 | print(f"{Color.GREEN}✓ {message}{Color.RESET}") 53 | 54 | 55 | def print_error(message: str): 56 | """打印错误信息""" 57 | print(f"{Color.RED}✗ {message}{Color.RESET}", file=sys.stderr) 58 | 59 | 60 | def print_warning(message: str): 61 | """打印警告信息""" 62 | print(f"{Color.YELLOW}⚠ {message}{Color.RESET}") 63 | 64 | 65 | def run_command(cmd: list[str], cwd: Path | None = None, check: bool = True) -> bool: 66 | """执行命令""" 67 | cmd_str = " ".join(str(c) for c in cmd) 68 | print(f"{Color.BLUE}$ {cmd_str}{Color.RESET}") 69 | 70 | try: 71 | # Windows 下 pnpm/npm 等命令需要通过 shell 执行 72 | use_shell = sys.platform == "win32" and cmd[0] in ["pnpm", "npm"] 73 | 74 | result = subprocess.run( 75 | cmd, cwd=cwd, check=check, capture_output=False, text=True, shell=use_shell 76 | ) 77 | return result.returncode == 0 78 | except subprocess.CalledProcessError as e: 79 | print_error(f"命令执行失败: {e}") 80 | return False 81 | except FileNotFoundError: 82 | print_error(f"命令未找到: {cmd[0]}") 83 | return False 84 | 85 | 86 | def check_command(cmd: str) -> bool: 87 | """检查命令是否可用""" 88 | try: 89 | # Windows 下某些命令(如 pnpm)需要通过 shell 执行 90 | subprocess.run( 91 | [cmd, "--version"], 92 | capture_output=True, 93 | check=True, 94 | shell=(sys.platform == "win32"), 95 | ) 96 | return True 97 | except (subprocess.CalledProcessError, FileNotFoundError): 98 | return False 99 | 100 | 101 | class ElectronBuilder: 102 | def __init__(self, args): 103 | self.args = args 104 | self.root_dir = Path(__file__).parent.parent 105 | self.frontend_dir = self.root_dir / "frontend" 106 | self.scripts_dir = self.root_dir / "scripts" 107 | self.electron_dir = self.root_dir / "electron" 108 | self.resources_dir = self.root_dir / "resources" 109 | 110 | # 平台信息 111 | self.platform = platform.system().lower() 112 | self.is_windows = self.platform == "windows" 113 | self.is_macos = self.platform == "darwin" 114 | self.is_linux = self.platform == "linux" 115 | 116 | def check_environment(self) -> bool: 117 | """检查环境依赖""" 118 | print_step("检查环境依赖", 7, 1) 119 | 120 | required_tools = { 121 | "uv": "Python 包管理器", 122 | "node": "Node.js 运行时", 123 | "pnpm": "pnpm 包管理器", 124 | } 125 | 126 | missing_tools = [] 127 | for tool, description in required_tools.items(): 128 | if check_command(tool): 129 | print_success(f"{description} ({tool}) 已安装") 130 | else: 131 | print_error(f"{description} ({tool}) 未安装") 132 | missing_tools.append(tool) 133 | 134 | if missing_tools: 135 | print_error(f"\n缺少必需工具: {', '.join(missing_tools)}") 136 | print("\n安装指南:") 137 | if "uv" in missing_tools: 138 | print(" uv: curl -LsSf https://astral.sh/uv/install.sh | sh") 139 | if "node" in missing_tools: 140 | print(" Node.js: https://nodejs.org/") 141 | if "pnpm" in missing_tools: 142 | print(" pnpm: npm install -g pnpm") 143 | return False 144 | 145 | return True 146 | 147 | def sync_python_deps(self) -> bool: 148 | """同步 Python 开发依赖""" 149 | print_step("同步 Python 开发依赖", 7, 2) 150 | return run_command(["uv", "sync", "--dev"], cwd=self.root_dir) 151 | 152 | def build_frontend(self) -> bool: 153 | """构建前端""" 154 | print_step("构建前端", 7, 3) 155 | 156 | # 安装前端依赖 157 | print("\n安装前端依赖...") 158 | if not run_command(["pnpm", "install"], cwd=self.frontend_dir): 159 | return False 160 | 161 | # 构建前端 162 | print("\n构建前端代码...") 163 | if not run_command(["pnpm", "build"], cwd=self.frontend_dir): 164 | return False 165 | 166 | # 复制前端构建产物到后端 static 目录 167 | print("\n复制前端到后端...") 168 | frontend_dist = self.frontend_dir / "dist" 169 | backend_static = self.root_dir / "AutoGLM_GUI" / "static" 170 | 171 | if backend_static.exists(): 172 | shutil.rmtree(backend_static) 173 | 174 | shutil.copytree(frontend_dist, backend_static) 175 | print_success(f"前端已复制到 {backend_static}") 176 | 177 | return True 178 | 179 | def download_adb(self) -> bool: 180 | """下载 ADB 工具""" 181 | print_step("下载 ADB 工具", 7, 4) 182 | 183 | # 确定要下载的平台 184 | platforms = [] 185 | if self.is_windows: 186 | platforms.append("windows") 187 | elif self.is_macos: 188 | platforms.extend(["darwin", "windows"]) # macOS 上构建两个平台 189 | elif self.is_linux: 190 | platforms.append("linux") # Linux 下载自己的 ADB 191 | else: 192 | print_warning(f"未知平台 {self.platform},跳过 ADB 下载") 193 | return True 194 | 195 | # 下载 ADB 196 | for plat in platforms: 197 | print(f"\n下载 {plat} ADB...") 198 | if not run_command( 199 | ["uv", "run", "python", "scripts/download_adb.py", plat], 200 | cwd=self.root_dir, 201 | ): 202 | return False 203 | 204 | return True 205 | 206 | def build_backend(self) -> bool: 207 | """打包 Python 后端""" 208 | print_step("打包 Python 后端", 7, 5) 209 | 210 | # 运行 PyInstaller 211 | print("\n运行 PyInstaller...") 212 | if not run_command(["pyinstaller", "autoglm.spec"], cwd=self.scripts_dir): 213 | return False 214 | 215 | # 复制到 resources/backend 216 | print("\n复制后端到 resources...") 217 | backend_dist = self.scripts_dir / "dist" / "autoglm-gui" 218 | backend_resources = self.resources_dir / "backend" 219 | 220 | if backend_resources.exists(): 221 | shutil.rmtree(backend_resources) 222 | 223 | shutil.copytree(backend_dist, backend_resources) 224 | print_success(f"后端已复制到 {backend_resources}") 225 | 226 | return True 227 | 228 | def build_electron(self) -> bool: 229 | """构建 Electron 应用""" 230 | print_step("安装 Electron 依赖", 7, 6) 231 | 232 | # 安装 Electron 依赖 233 | if not run_command(["npm", "install"], cwd=self.electron_dir): 234 | return False 235 | 236 | print_step("构建 Electron 应用", 7, 7) 237 | 238 | # 构建 Electron (明确指定不发布) 239 | if not run_command(["npm", "run", "build", "--", "--publish", "never"], cwd=self.electron_dir): 240 | return False 241 | 242 | # 显示构建产物 243 | print("\n" + "=" * 60) 244 | print(f"{Color.GREEN}{Color.BOLD}✓ 构建完成!{Color.RESET}") 245 | print("=" * 60) 246 | 247 | dist_dir = self.electron_dir / "dist" 248 | if dist_dir.exists(): 249 | print(f"\n构建产物位置: {dist_dir}") 250 | print("\n文件列表:") 251 | for item in sorted(dist_dir.iterdir()): 252 | if item.is_file(): 253 | size = item.stat().st_size / (1024 * 1024) 254 | print(f" - {item.name} ({size:.1f} MB)") 255 | elif item.is_dir() and not item.name.startswith("."): 256 | print(f" - {item.name}/ (目录)") 257 | 258 | return True 259 | 260 | def build(self) -> bool: 261 | """执行完整构建流程""" 262 | print(f"\n{Color.BOLD}AutoGLM-GUI Electron 构建工具{Color.RESET}") 263 | print(f"平台: {self.platform}") 264 | print(f"项目根目录: {self.root_dir}\n") 265 | 266 | steps = [ 267 | ("环境检查", lambda: self.check_environment()), 268 | ("Python 依赖", lambda: self.sync_python_deps()), 269 | ( 270 | "前端构建", 271 | lambda: self.build_frontend() 272 | if not self.args.skip_frontend 273 | else (print_warning("跳过前端构建"), True)[1], 274 | ), 275 | ( 276 | "ADB 工具", 277 | lambda: self.download_adb() 278 | if not self.args.skip_adb 279 | else (print_warning("跳过 ADB 下载"), True)[1], 280 | ), 281 | ( 282 | "后端打包", 283 | lambda: self.build_backend() 284 | if not self.args.skip_backend 285 | else (print_warning("跳过后端打包"), True)[1], 286 | ), 287 | ("Electron", lambda: self.build_electron()), 288 | ] 289 | 290 | for step_name, step_func in steps: 291 | if not step_func(): 292 | print_error(f"\n构建失败: {step_name}") 293 | return False 294 | 295 | return True 296 | 297 | 298 | def main(): 299 | parser = argparse.ArgumentParser(description="AutoGLM-GUI Electron 一键构建脚本") 300 | parser.add_argument("--skip-frontend", action="store_true", help="跳过前端构建") 301 | parser.add_argument("--skip-adb", action="store_true", help="跳过 ADB 工具下载") 302 | parser.add_argument("--skip-backend", action="store_true", help="跳过后端打包") 303 | args = parser.parse_args() 304 | 305 | builder = ElectronBuilder(args) 306 | 307 | try: 308 | success = builder.build() 309 | sys.exit(0 if success else 1) 310 | except KeyboardInterrupt: 311 | print_error("\n\n构建已取消") 312 | sys.exit(1) 313 | except Exception as e: 314 | print_error(f"\n\n构建失败: {e}") 315 | import traceback 316 | 317 | traceback.print_exc() 318 | sys.exit(1) 319 | 320 | 321 | if __name__ == "__main__": 322 | main() 323 | -------------------------------------------------------------------------------- /frontend/src/api.ts: -------------------------------------------------------------------------------- 1 | import axios from 'redaxios'; 2 | 3 | export interface Device { 4 | id: string; 5 | model: string; 6 | status: string; 7 | connection_type: string; 8 | is_initialized: boolean; 9 | serial?: string; // 设备真实序列号 10 | } 11 | 12 | export interface DeviceListResponse { 13 | devices: Device[]; 14 | } 15 | 16 | export interface ChatResponse { 17 | result: string; 18 | steps: number; 19 | success: boolean; 20 | } 21 | 22 | export interface StatusResponse { 23 | version: string; 24 | initialized: boolean; 25 | step_count: number; 26 | } 27 | 28 | export interface APIModelConfig { 29 | base_url?: string; 30 | api_key?: string; 31 | model_name?: string; 32 | max_tokens?: number; 33 | temperature?: number; 34 | top_p?: number; 35 | frequency_penalty?: number; 36 | } 37 | 38 | export interface APIAgentConfig { 39 | max_steps?: number; 40 | device_id?: string | null; 41 | verbose?: boolean; 42 | } 43 | 44 | export interface InitRequest { 45 | model_config?: APIModelConfig; 46 | agent_config?: APIAgentConfig; 47 | } 48 | 49 | export interface ScreenshotRequest { 50 | device_id?: string | null; 51 | } 52 | 53 | export interface ScreenshotResponse { 54 | success: boolean; 55 | image: string; // base64 encoded PNG 56 | width: number; 57 | height: number; 58 | is_sensitive: boolean; 59 | error?: string; 60 | } 61 | 62 | export interface StepEvent { 63 | type: 'step'; 64 | step: number; 65 | thinking: string; 66 | action: Record; 67 | success: boolean; 68 | finished: boolean; 69 | } 70 | 71 | export interface DoneEvent { 72 | type: 'done'; 73 | message: string; 74 | steps: number; 75 | success: boolean; 76 | } 77 | 78 | export interface ErrorEvent { 79 | type: 'error'; 80 | message: string; 81 | } 82 | 83 | export type StreamEvent = StepEvent | DoneEvent | ErrorEvent; 84 | 85 | export interface TapRequest { 86 | x: number; 87 | y: number; 88 | device_id?: string | null; 89 | delay?: number; 90 | } 91 | 92 | export interface TapResponse { 93 | success: boolean; 94 | error?: string; 95 | } 96 | 97 | export interface SwipeRequest { 98 | start_x: number; 99 | start_y: number; 100 | end_x: number; 101 | end_y: number; 102 | duration_ms?: number; 103 | device_id?: string | null; 104 | delay?: number; 105 | } 106 | 107 | export interface SwipeResponse { 108 | success: boolean; 109 | error?: string; 110 | } 111 | 112 | export interface TouchDownRequest { 113 | x: number; 114 | y: number; 115 | device_id?: string | null; 116 | delay?: number; 117 | } 118 | 119 | export interface TouchDownResponse { 120 | success: boolean; 121 | error?: string; 122 | } 123 | 124 | export interface TouchMoveRequest { 125 | x: number; 126 | y: number; 127 | device_id?: string | null; 128 | delay?: number; 129 | } 130 | 131 | export interface TouchMoveResponse { 132 | success: boolean; 133 | error?: string; 134 | } 135 | 136 | export interface TouchUpRequest { 137 | x: number; 138 | y: number; 139 | device_id?: string | null; 140 | delay?: number; 141 | } 142 | 143 | export interface TouchUpResponse { 144 | success: boolean; 145 | error?: string; 146 | } 147 | 148 | export interface WiFiConnectRequest { 149 | device_id?: string | null; 150 | port?: number; 151 | } 152 | 153 | export interface WiFiConnectResponse { 154 | success: boolean; 155 | message: string; 156 | device_id?: string; 157 | address?: string; 158 | error?: string; 159 | } 160 | 161 | export interface WiFiDisconnectResponse { 162 | success: boolean; 163 | message: string; 164 | error?: string; 165 | } 166 | 167 | export async function listDevices(): Promise { 168 | const res = await axios.get('/api/devices'); 169 | return res.data; 170 | } 171 | 172 | export async function getDevices(): Promise { 173 | const response = await axios.get('/api/devices'); 174 | return response.data.devices; 175 | } 176 | 177 | export async function connectWifi( 178 | payload: WiFiConnectRequest 179 | ): Promise { 180 | const res = await axios.post( 181 | '/api/devices/connect_wifi', 182 | payload 183 | ); 184 | return res.data; 185 | } 186 | 187 | export async function disconnectWifi( 188 | deviceId: string 189 | ): Promise { 190 | const response = await axios.post( 191 | '/api/devices/disconnect_wifi', 192 | { 193 | device_id: deviceId, 194 | } 195 | ); 196 | return response.data; 197 | } 198 | 199 | export async function initAgent( 200 | config?: InitRequest 201 | ): Promise<{ success: boolean; message: string; device_id?: string }> { 202 | const res = await axios.post('/api/init', config ?? {}); 203 | return res.data; 204 | } 205 | 206 | export async function sendMessage(message: string): Promise { 207 | const res = await axios.post('/api/chat', { message }); 208 | return res.data; 209 | } 210 | 211 | export function sendMessageStream( 212 | message: string, 213 | deviceId: string, 214 | onStep: (event: StepEvent) => void, 215 | onDone: (event: DoneEvent) => void, 216 | onError: (event: ErrorEvent) => void 217 | ): { close: () => void } { 218 | const controller = new AbortController(); 219 | 220 | fetch('/api/chat/stream', { 221 | method: 'POST', 222 | headers: { 223 | 'Content-Type': 'application/json', 224 | }, 225 | body: JSON.stringify({ message, device_id: deviceId }), 226 | signal: controller.signal, 227 | }) 228 | .then(async response => { 229 | if (!response.ok) { 230 | throw new Error(`HTTP error! status: ${response.status}`); 231 | } 232 | 233 | if (!response.body) { 234 | throw new Error('Response body is null'); 235 | } 236 | 237 | const reader = response.body.getReader(); 238 | const decoder = new TextDecoder(); 239 | let buffer = ''; 240 | let eventType = 'message'; // 移到外部,跨 chunks 保持状态 241 | 242 | while (true) { 243 | const { done, value } = await reader.read(); 244 | if (done) break; 245 | 246 | buffer += decoder.decode(value, { stream: true }); 247 | const lines = buffer.split('\n'); 248 | 249 | // 保留最后一行(可能不完整) 250 | buffer = lines.pop() || ''; 251 | 252 | for (const line of lines) { 253 | if (line.startsWith('event: ')) { 254 | eventType = line.slice(7).trim(); 255 | } else if (line.startsWith('data: ')) { 256 | try { 257 | const data = JSON.parse(line.slice(6)); 258 | 259 | if (eventType === 'step') { 260 | console.log('[SSE] Received step event:', data); 261 | onStep(data as StepEvent); 262 | } else if (eventType === 'done') { 263 | console.log('[SSE] Received done event:', data); 264 | onDone(data as DoneEvent); 265 | } else if (eventType === 'error') { 266 | console.log('[SSE] Received error event:', data); 267 | onError(data as ErrorEvent); 268 | } 269 | } catch (e) { 270 | console.error('Failed to parse SSE data:', line, e); 271 | } 272 | } 273 | } 274 | } 275 | }) 276 | .catch(error => { 277 | if (error.name !== 'AbortError') { 278 | onError({ type: 'error', message: error.message }); 279 | } 280 | }); 281 | 282 | return { 283 | close: () => controller.abort(), 284 | }; 285 | } 286 | 287 | export async function getStatus(): Promise { 288 | const res = await axios.get('/api/status'); 289 | return res.data; 290 | } 291 | 292 | export async function resetChat(deviceId: string): Promise<{ 293 | success: boolean; 294 | message: string; 295 | device_id?: string; 296 | }> { 297 | const res = await axios.post('/api/reset', { device_id: deviceId }); 298 | return res.data; 299 | } 300 | 301 | export async function getScreenshot( 302 | deviceId?: string | null 303 | ): Promise { 304 | const res = await axios.post( 305 | '/api/screenshot', 306 | { device_id: deviceId ?? null }, 307 | {} 308 | ); 309 | return res.data; 310 | } 311 | 312 | export async function sendTap( 313 | x: number, 314 | y: number, 315 | deviceId?: string | null, 316 | delay: number = 0 317 | ): Promise { 318 | const res = await axios.post('/api/control/tap', { 319 | x, 320 | y, 321 | device_id: deviceId ?? null, 322 | delay, 323 | }); 324 | return res.data; 325 | } 326 | 327 | export async function sendSwipe( 328 | startX: number, 329 | startY: number, 330 | endX: number, 331 | endY: number, 332 | durationMs?: number, 333 | deviceId?: string | null, 334 | delay: number = 0 335 | ): Promise { 336 | const swipeData = { 337 | start_x: Math.round(startX), 338 | start_y: Math.round(startY), 339 | end_x: Math.round(endX), 340 | end_y: Math.round(endY), 341 | duration_ms: Math.round(durationMs || 300), 342 | device_id: deviceId ?? null, 343 | delay: Math.round(delay * 1000) / 1000, 344 | }; 345 | 346 | try { 347 | const res = await axios.post( 348 | '/api/control/swipe', 349 | swipeData 350 | ); 351 | return res.data; 352 | } catch (error) { 353 | console.error('[API] Swipe request failed:', error); 354 | throw error; 355 | } 356 | } 357 | 358 | export async function sendTouchDown( 359 | x: number, 360 | y: number, 361 | deviceId?: string | null, 362 | delay: number = 0 363 | ): Promise { 364 | const res = await axios.post('/api/control/touch/down', { 365 | x: Math.round(x), 366 | y: Math.round(y), 367 | device_id: deviceId ?? null, 368 | delay, 369 | }); 370 | return res.data; 371 | } 372 | 373 | export async function sendTouchMove( 374 | x: number, 375 | y: number, 376 | deviceId?: string | null, 377 | delay: number = 0 378 | ): Promise { 379 | const res = await axios.post('/api/control/touch/move', { 380 | x: Math.round(x), 381 | y: Math.round(y), 382 | device_id: deviceId ?? null, 383 | delay, 384 | }); 385 | return res.data; 386 | } 387 | 388 | export async function sendTouchUp( 389 | x: number, 390 | y: number, 391 | deviceId?: string | null, 392 | delay: number = 0 393 | ): Promise { 394 | const res = await axios.post('/api/control/touch/up', { 395 | x: Math.round(x), 396 | y: Math.round(y), 397 | device_id: deviceId ?? null, 398 | delay, 399 | }); 400 | return res.data; 401 | } 402 | 403 | // Configuration Management 404 | 405 | export interface ConfigResponse { 406 | base_url: string; 407 | model_name: string; 408 | api_key: string; 409 | source: string; 410 | } 411 | 412 | export interface ConfigSaveRequest { 413 | base_url: string; 414 | model_name: string; 415 | api_key?: string; 416 | } 417 | 418 | export async function getConfig(): Promise { 419 | const res = await axios.get('/api/config'); 420 | return res.data; 421 | } 422 | 423 | export async function saveConfig( 424 | config: ConfigSaveRequest 425 | ): Promise<{ success: boolean; message: string }> { 426 | const res = await axios.post('/api/config', config); 427 | return res.data; 428 | } 429 | 430 | export async function deleteConfig(): Promise<{ 431 | success: boolean; 432 | message: string; 433 | }> { 434 | const res = await axios.delete('/api/config'); 435 | return res.data; 436 | } 437 | --------------------------------------------------------------------------------