├── .gitignore ├── README.md ├── mcp-client-cli-interface ├── README.md ├── main.ts ├── package-lock.json ├── package.json ├── query-processing-engine.ts ├── setup.ts ├── start-here.ts └── tests │ └── test-mcp.js ├── mcp-client-nextjs ├── .env.example ├── .gitignore ├── README.md ├── eslint.config.mjs ├── next.config.ts ├── package-lock.json ├── package.json ├── postcss.config.mjs ├── public │ ├── file.svg │ ├── globe.svg │ ├── next.svg │ ├── vercel.svg │ └── window.svg ├── src │ ├── app │ │ ├── api │ │ │ └── mcp │ │ │ │ ├── initialize │ │ │ │ └── route.ts │ │ │ │ ├── logs │ │ │ │ └── route.ts │ │ │ │ ├── query │ │ │ │ └── route.ts │ │ │ │ ├── route.ts │ │ │ │ └── tools │ │ │ │ └── route.ts │ │ ├── favicon.ico │ │ ├── globals.css │ │ ├── layout.tsx │ │ └── page.tsx │ ├── components │ │ └── mcp-interface.tsx │ └── lib │ │ └── mcp │ │ ├── log-buffer.ts │ │ ├── query-processing-engine.ts │ │ ├── setup.ts │ │ └── start-here.ts ├── tailwind.config.js └── tsconfig.json └── mcp-server-os-level ├── Cargo.toml ├── examples ├── test_click_by_role.rs ├── test_example.rs ├── test_get_all_apps.rs ├── test_get_arc_all_elements_custom.rs ├── test_get_arc_all_elements_sdk_count.rs ├── test_get_arc_interactable_elements.rs ├── test_get_arc_interactable_elements_list.rs ├── test_get_arc_text_sdk.rs ├── test_get_messages_and_send_message.rs └── test_get_messages_text_sdk.rs └── src ├── bin ├── handlers │ ├── click_by_index.rs │ ├── input_control.rs │ ├── list_elements_and_attributes.rs │ ├── mcp.rs │ ├── mod.rs │ ├── open_application.rs │ ├── open_url.rs │ ├── press_key_by_index.rs │ ├── type_by_index.rs │ └── utils.rs ├── mcp-bridge.ts ├── mod.rs ├── server.rs └── types.rs ├── desktop.rs ├── element.rs ├── errors.rs ├── lib.rs ├── locator.rs ├── platforms ├── linux.rs ├── macos.rs ├── mod.rs ├── tree_search.rs └── windows.rs ├── selector.rs └── tests.rs /.gitignore: -------------------------------------------------------------------------------- 1 | # node/typescript 2 | node_modules/ 3 | dist/ 4 | build/ 5 | *.log 6 | .npm 7 | .env 8 | .env.* 9 | *.tsbuildinfo 10 | 11 | # rust 12 | /target/ 13 | **/target/ 14 | Cargo.lock 15 | **/*.rs.bk 16 | 17 | # os specific 18 | .DS_Store 19 | Thumbs.db 20 | desktop.ini 21 | 22 | # editors 23 | .vscode/* 24 | !.vscode/settings.json 25 | !.vscode/tasks.json 26 | !.vscode/launch.json 27 | !.vscode/extensions.json 28 | .idea/ 29 | *.sublime-* 30 | 31 | # logs and databases 32 | *.log 33 | *.sql 34 | *.sqlite 35 | *.sqlite3 36 | 37 | # build artifacts 38 | *.o 39 | *.a 40 | *.so 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Computer Use AI SDK 2 | 3 | * We've built an MCP server that controls computer 4 | 5 | * You've heard of OpenAI's operator, you've heard of Claude's computer use. Now the open source alternative: Computer Use SDK from screenpipe. 6 | 7 | * It's native on macOS—no virtual machine bs, no guardrails. Use it with any app or website however you want. 8 | 9 | * No pixel-based bs—it relies on underlying desktop-rendered elements, making it much faster and far more reliable than pixel-based vision models. 10 | 11 | * You can now build your own agents getting started with our simple Hello World Template using our MCP server and client. 12 | 13 | * There are tools that our MCP Server provides out of the box: 14 | * Launch apps 15 | * Read content 16 | * Click 17 | * Enter text 18 | * Press keys 19 | 20 | * These will be computational primitives to allow the AI to control your computer and do your tasks for you. What will you build? Come check us out at https://screenpi.pe 21 | 22 | ## Demos 23 | 24 | agent sending a message 25 | 26 | https://github.com/user-attachments/assets/f8687500-9a8c-4a96-81b6-77562feff093 27 | 28 | get latest whatsapp messages 29 | ![Image](https://github.com/user-attachments/assets/6401c930-07e5-4459-b54c-a8c70fdca73f) 30 | 31 | open arc browser 32 | ![Image](https://github.com/user-attachments/assets/8656be95-951d-4f13-8ee9-41babb821abb) 33 | 34 | ## Get started 35 | 36 | ```bash 37 | git clone https://github.com/m13v/computer-use-ai-sdk.git 38 | cd MCP-server-client-computer-use-ai-sdk 39 | ``` 40 | 41 | ```bash 42 | # Install Rust (if not already installed) 43 | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh 44 | # Install Node.js and npm (if not already installed) 45 | # Visit https://nodejs.org/ or use nvm 46 | ``` 47 | 48 | ```bash 49 | # run backend server 50 | cd mcp-server-os-level 51 | cargo run --bin server 52 | # keep it running 53 | ``` 54 | 55 | ### Option 1: CLI Interface 56 | 57 | ```bash 58 | # run CLI interface client in a new terminal (good for debugging) 59 | cd mcp-client-cli-interface 60 | npm install # install dependencies first 61 | 62 | # Set your Anthropic API key as an environment variable 63 | export ANTHROPIC_API_KEY=sk-ant-xxxx # Replace with your actual Anthropic API key 64 | # For Windows, use: set ANTHROPIC_API_KEY=sk-ant-xxxx 65 | # For permanent setup, add to your shell profile (.bashrc, .zshrc, etc.) 66 | 67 | npx tsx main.ts 68 | ``` 69 | 70 | ### Option 2: Web app Interface 71 | 72 | ```bash 73 | # run CLI interface client in a new terminal (good for debugging) 74 | cd mcp-client-nextjs 75 | npm install # install dependencies first 76 | 77 | # Set API key via command line 78 | echo "ANTHROPIC_API_KEY=sk-ant-XXXXXXXX" > .env # replace XXXXXXXX with your actual key 79 | # Or append if you want to keep other env variables 80 | # echo "ANTHROPIC_API_KEY=sk-ant-XXXXXXXX" >> .env 81 | 82 | npm run dev 83 | # go to provided localhost web page 84 | ``` 85 | 86 | 87 | ## What do I do with it? 88 | 89 | - Build custom worfklows of agents to performs various actions 90 | - Build custom UI to make it easy for users to automate their computer work 91 | - Save workflow and run in cron 92 | - Combine with other MCP servers to do something cool, e.g.: fill out a google sheet based on the history of people i talk to throughout the day 93 | 94 | ## Request features and endpoints in github issues 95 | 96 | https://github.com/m13v/computer-use-ai-sdk/issues/new/choose -------------------------------------------------------------------------------- /mcp-client-cli-interface/README.md: -------------------------------------------------------------------------------- 1 | This is a client example, it has a simple CLI interface that helps to get started and better understand how everything works -------------------------------------------------------------------------------- /mcp-client-cli-interface/main.ts: -------------------------------------------------------------------------------- 1 | import { desktopClient, log } from './start-here'; 2 | import { setupEnvironment } from './setup'; 3 | import { processUserQuery } from './query-processing-engine'; 4 | import readline from 'readline'; 5 | import inquirer from 'inquirer'; 6 | 7 | async function main() { 8 | // setup environment and check server 9 | await setupEnvironment(); 10 | 11 | // connect to rust mcp server 12 | await desktopClient.connect('http://localhost:8080/mcp'); 13 | 14 | // list available tools 15 | await desktopClient.listTools(); 16 | 17 | // create readline interface 18 | const rl = readline.createInterface({ 19 | input: process.stdin, 20 | output: process.stdout 21 | }); 22 | 23 | // start chat loop 24 | console.log('\n=== desktop control chat ==='); 25 | console.log('(type "exit" to quit)'); 26 | 27 | // show initial options 28 | showInitialOptions(rl); 29 | } 30 | 31 | // Show initial options 32 | function showInitialOptions(rl: readline.Interface) { 33 | console.log("\nselect how to start:"); 34 | 35 | const choices = [ 36 | "[type your own]", 37 | "send message to first dialogie in messages app. message is 'i'm testing computer-use-sdk'", 38 | "go to discord, click 'direct messages' dialogue, then send message 'i'm testing computer-use-sdk'" 39 | ]; 40 | 41 | inquirer.prompt([ 42 | { 43 | type: 'list', 44 | name: 'option', 45 | message: 'choose an option:', 46 | choices: choices 47 | } 48 | ]).then(answers => { 49 | log.debug(`selected option: ${answers.option}`); 50 | 51 | if (answers.option === "[type your own]") { 52 | // Ask for custom input 53 | askQuestion(rl); 54 | } else { 55 | // Use the selected option directly as the prompt 56 | log.highlight(`using prompt: "${answers.option}"`); 57 | processQuery(answers.option, rl); 58 | } 59 | }); 60 | } 61 | 62 | function processQuery(input: string, rl: readline.Interface) { 63 | if (input.toLowerCase() === 'exit') { 64 | log.info("shutting down..."); 65 | desktopClient.disconnect() 66 | .then(() => { 67 | rl.close(); 68 | process.exit(0); 69 | }); 70 | return; 71 | } 72 | 73 | log.highlight("\nprocessing..."); 74 | processUserQuery(input) 75 | .then(response => { 76 | // Only show success message if we actually got a valid response 77 | if (response && !response.startsWith('Error:')) { 78 | log.response(response); 79 | } 80 | askQuestion(rl); // Continue with normal flow 81 | }) 82 | .catch(error => { 83 | // Show error in red and with clear error prefix 84 | log.error(`query failed: ${error.message || error}`); 85 | askQuestion(rl); // Continue with normal flow 86 | }); 87 | } 88 | 89 | function askQuestion(rl: readline.Interface) { 90 | inquirer.prompt([ 91 | { 92 | type: 'input', 93 | name: 'query', 94 | message: 'query:', 95 | prefix: '' 96 | } 97 | ]).then(answers => { 98 | log.debug(`received input: "${answers.query}"`); 99 | processQuery(answers.query, rl); 100 | }).catch(err => { 101 | log.error("error getting input:", err); 102 | askQuestion(rl); // Try again 103 | }); 104 | } 105 | 106 | main().catch(error => log.error("fatal error:", error)); -------------------------------------------------------------------------------- /mcp-client-cli-interface/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "hello-world-mcp-client", 3 | "version": "1.0.0", 4 | "main": "index.js", 5 | "scripts": { 6 | "test": "echo \"Error: no test specified\" && exit 1" 7 | }, 8 | "keywords": [], 9 | "author": "", 10 | "license": "ISC", 11 | "description": "", 12 | "dependencies": { 13 | "@anthropic-ai/sdk": "^0.39.0", 14 | "@modelcontextprotocol/sdk": "^1.8.0", 15 | "axios": "^1.8.4", 16 | "dotenv": "^16.4.7", 17 | "inquirer": "^12.5.0" 18 | }, 19 | "type": "module", 20 | "devDependencies": { 21 | "@types/inquirer": "^9.0.7" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /mcp-client-cli-interface/setup.ts: -------------------------------------------------------------------------------- 1 | import dotenv from "dotenv"; 2 | import path from "path"; 3 | import fs from "fs"; 4 | import { exec } from "child_process"; 5 | import { promisify } from "util"; 6 | 7 | // Create the exec promise function 8 | const execPromise = promisify(exec); 9 | 10 | // Load environment variables 11 | export async function setupEnvironment() { 12 | // First try loading from .env file 13 | dotenv.config(); 14 | 15 | // Check if API key is set 16 | if (!process.env.ANTHROPIC_API_KEY) { 17 | // Try to load from config file 18 | const configDir = path.join(process.env.HOME || "", ".screenpipe"); 19 | const configPath = path.join(configDir, "config.json"); 20 | 21 | if (fs.existsSync(configPath)) { 22 | try { 23 | const config = JSON.parse(fs.readFileSync(configPath, "utf8")); 24 | if (config.anthropicApiKey) { 25 | process.env.ANTHROPIC_API_KEY = config.anthropicApiKey; 26 | } 27 | } catch (error) { 28 | console.error("error loading config:", error); 29 | } 30 | } 31 | 32 | // If still not set, show error 33 | if (!process.env.ANTHROPIC_API_KEY) { 34 | console.error( 35 | "missing ANTHROPIC_API_KEY - please set in .env file or config.json" 36 | ); 37 | process.exit(1); 38 | } 39 | } 40 | 41 | // Validate API key format 42 | const apiKey = process.env.ANTHROPIC_API_KEY; 43 | if (!apiKey.startsWith('sk-ant-')) { 44 | console.error("\n======================================"); 45 | console.error("invalid ANTHROPIC_API_KEY format"); 46 | console.error("api key should start with 'sk-ant-'"); 47 | console.error(`found: ${apiKey.substring(0, 7)}...`); 48 | console.error("please check your .env file or config.json"); 49 | console.error("======================================\n"); 50 | process.exit(1); // Exit immediately with error code 51 | } 52 | 53 | // check if rust mcp server is running 54 | const checkServer = async () => { 55 | try { 56 | // use the correct JSON-RPC format for MCP 57 | const payload = { 58 | jsonrpc: "2.0", 59 | id: "health-check", 60 | method: "initialize", 61 | params: { 62 | clientInfo: { 63 | name: "mcp-client-health-check", 64 | version: "1.0.0" 65 | }, 66 | capabilities: {} 67 | } 68 | }; 69 | 70 | console.log("checking mcp server connection..."); 71 | 72 | // Direct connection to 127.0.0.1:8080 since we've verified it works 73 | const curlCommand = `curl -s -X POST http://127.0.0.1:8080/mcp -H "Content-Type: application/json" -d '${JSON.stringify(payload)}'`; 74 | 75 | const { stdout, stderr } = await execPromise(curlCommand); 76 | 77 | if (stderr && stderr.length > 0) { 78 | console.error(`curl stderr: ${stderr}`); 79 | // Note: curl often writes progress info to stderr but still succeeds 80 | // Only fail if stdout is empty 81 | if (!stdout) { 82 | throw new Error(stderr); 83 | } 84 | } 85 | 86 | // Check if we got a valid JSON response 87 | try { 88 | const response = JSON.parse(stdout); 89 | if (response.result) { 90 | console.log("mcp server is running and responding properly"); 91 | return true; 92 | } 93 | } catch (jsonError) { 94 | console.error("invalid json response from server:", stdout.substring(0, 100)); 95 | throw new Error("Invalid JSON response from server"); 96 | } 97 | 98 | console.log("mcp server responded but with unexpected format"); 99 | return false; 100 | } catch (error) { 101 | console.error("failed to connect to mcp server at http://127.0.0.1:8080/mcp"); 102 | console.error(`error details: ${error.message || error}`); 103 | console.error("please ensure the rust server is running"); 104 | process.exit(1); 105 | } 106 | }; 107 | 108 | await checkServer(); 109 | } 110 | -------------------------------------------------------------------------------- /mcp-client-cli-interface/start-here.ts: -------------------------------------------------------------------------------- 1 | import Anthropic from "@anthropic-ai/sdk"; 2 | 3 | // enhanced logging utility with colors for better readability 4 | export const log = { 5 | info: (msg: string, ...args: any[]) => console.log(`\x1b[36m[info]\x1b[0m ${msg}`, ...args), 6 | success: (msg: string, ...args: any[]) => console.log(`\x1b[32m[success]\x1b[0m ${msg}`, ...args), 7 | error: (msg: string, ...args: any[]) => console.error(`\x1b[31m[error]\x1b[0m ${msg}`, ...args), 8 | warn: (msg: string, ...args: any[]) => console.log(`\x1b[33m[warn]\x1b[0m ${msg}`, ...args), 9 | debug: (msg: string, ...args: any[]) => console.log(`\x1b[90m[debug]\x1b[0m ${msg}`, ...args), 10 | // New logging methods for specific UI elements 11 | highlight: (msg: string, ...args: any[]) => console.log(`\x1b[1m\x1b[35m${msg}\x1b[0m`, ...args), 12 | iteration: (msg: string, ...args: any[]) => console.log(`\x1b[36m${msg}\x1b[0m`, ...args), 13 | response: (msg: string) => console.log(`\n\x1b[1m\x1b[37mresponse:\x1b[0m ${msg}`), 14 | tool: (name: string, result: any) => { 15 | // Truncate long results 16 | const truncateJSON = (obj: any, maxLength = 500): string => { 17 | if (obj === undefined || obj === null) { 18 | return String(obj); 19 | } 20 | const str = JSON.stringify(obj); 21 | if (str.length <= maxLength) return str; 22 | return str.substring(0, maxLength) + '... [truncated]'; 23 | }; 24 | 25 | // One-line format with truncation 26 | console.log(`\n\x1b[1m\x1b[37m${name} result:\x1b[0m ${truncateJSON(result)}`); 27 | } 28 | }; 29 | 30 | class DesktopControlClient { 31 | private connected = false; 32 | private serverUrl = ""; 33 | private requestId = 0; 34 | private anthropic = new Anthropic(); 35 | 36 | // Connect to the MCP server via http 37 | async connect(serverUrl: string) { 38 | log.info(`connecting to mcp server: ${serverUrl}`); 39 | 40 | try { 41 | this.serverUrl = serverUrl; 42 | const response = await this.makeRequest("initialize", {}); 43 | 44 | if (response.result) { 45 | this.connected = true; 46 | log.success('mcp client session established successfully'); 47 | return true; 48 | } else { 49 | log.error('failed to establish mcp client session:', response.error); 50 | return false; 51 | } 52 | } catch (error) { 53 | log.error('failed to establish mcp client session:', error); 54 | return false; 55 | } 56 | } 57 | 58 | // Make a JSON-RPC request 59 | private async makeRequest(method: string, params: any) { 60 | const id = `request-${++this.requestId}`; 61 | 62 | const response = await fetch("http://127.0.0.1:8080/mcp", { 63 | method: "POST", 64 | headers: { 65 | "Content-Type": "application/json" 66 | }, 67 | body: JSON.stringify({ 68 | jsonrpc: "2.0", 69 | id, 70 | method, 71 | params 72 | }) 73 | }); 74 | 75 | return await response.json(); 76 | } 77 | 78 | // Check if connected 79 | isConnected(): boolean { 80 | return this.connected; 81 | } 82 | 83 | // List available tools 84 | async listTools() { 85 | if (!this.isConnected()) { 86 | log.error('cannot list tools: not connected'); 87 | throw new Error('Not connected to MCP server'); 88 | } 89 | 90 | try { 91 | // In standard MCP, this would be tools/list 92 | // But our rust server exposes tools through initialize 93 | const response = await this.makeRequest("initialize", {}); 94 | const tools = response.result.capabilities.tools.functions; 95 | 96 | // Create simplified view - one line per tool 97 | log.info('available tools:'); 98 | tools.forEach((tool: any) => { 99 | const propertyNames = Object.keys(tool.parameters.properties || {}).join(', '); 100 | log.debug(`- ${tool.name}: ${propertyNames}`); 101 | }); 102 | 103 | return { tools }; 104 | } catch (error) { 105 | log.error('failed to list tools:', error); 106 | throw error; 107 | } 108 | } 109 | 110 | // Call a tool 111 | async callTool(name: string, args: Record) { 112 | if (!this.isConnected()) { 113 | log.error('cannot call tool: not connected'); 114 | throw new Error('Not connected to MCP server'); 115 | } 116 | 117 | log.info(`calling tool "${name}" with args: ${JSON.stringify(args)}`); 118 | 119 | try { 120 | const response = await this.makeRequest("executeToolFunction", { 121 | function: name, 122 | arguments: args 123 | }); 124 | 125 | // Check if result exists before logging 126 | if (response && 'result' in response) { 127 | log.tool(name, response.result); 128 | return response.result; 129 | } else { 130 | log.tool(name, response); // Log the entire response if result is missing 131 | return response; // Still return whatever we got 132 | } 133 | } catch (error) { 134 | log.error(`error calling tool "${name}":`, error); 135 | throw error; 136 | } 137 | } 138 | 139 | // Disconnect from the server 140 | async disconnect() { 141 | this.connected = false; 142 | log.success('mcp client session closed'); 143 | } 144 | } 145 | 146 | // Export an instance that can be used throughout your application 147 | export const desktopClient = new DesktopControlClient(); 148 | -------------------------------------------------------------------------------- /mcp-client-cli-interface/tests/test-mcp.js: -------------------------------------------------------------------------------- 1 | import axios from 'axios'; 2 | 3 | // Helper function to create an MCP request 4 | function createMCPRequest(method, params) { 5 | return { 6 | jsonrpc: '2.0', 7 | id: Math.floor(Math.random() * 10000), 8 | method, 9 | params 10 | }; 11 | } 12 | 13 | async function testMCP() { 14 | const mcpUrl = 'http://127.0.0.1:8080/mcp'; 15 | console.log('starting mcp test suite'); 16 | 17 | try { 18 | // 1. Initialize to get capabilities 19 | console.log('testing initialize...'); 20 | const initResponse = await axios.post(mcpUrl, createMCPRequest('initialize', { 21 | capabilities: { 22 | tools: { execution: true }, 23 | resources: {} 24 | } 25 | })); 26 | console.log('initialize response capabilities:', initResponse.data.result.capabilities.tools.functions.map(f => f.name)); 27 | 28 | // 2. Open an application (e.g., browser) 29 | console.log('\ntesting openApplication...'); 30 | const openAppResponse = await axios.post(mcpUrl, createMCPRequest('executeToolFunction', { 31 | function: 'openApplication', 32 | arguments: { 33 | app_name: 'Arc' 34 | } 35 | })); 36 | console.log('open application response:', openAppResponse.data); 37 | 38 | // Wait a moment for the app to open 39 | await new Promise(resolve => setTimeout(resolve, 2000)); 40 | 41 | // 3. Open a URL 42 | console.log('\ntesting openUrl...'); 43 | const openUrlResponse = await axios.post(mcpUrl, createMCPRequest('executeToolFunction', { 44 | function: 'openUrl', 45 | arguments: { 46 | url: 'https://example.com', 47 | browser: 'Arc' 48 | } 49 | })); 50 | console.log('open url response:', openUrlResponse.data); 51 | 52 | // Wait for page to load 53 | await new Promise(resolve => setTimeout(resolve, 3000)); 54 | 55 | // 4. List interactable elements 56 | console.log('\ntesting listInteractableElements...'); 57 | const listElementsResponse = await axios.post(mcpUrl, createMCPRequest('executeToolFunction', { 58 | function: 'listInteractableElements', 59 | arguments: { 60 | app_name: 'Arc', 61 | interactable_only: true, 62 | max_elements: 10 63 | } 64 | })); 65 | console.log('list elements response stats:', listElementsResponse.data.result.stats); 66 | 67 | if (listElementsResponse.data.result.elements.length > 0) { 68 | console.log('first element:', listElementsResponse.data.result.elements[0]); 69 | 70 | // 5. Click element by index 71 | console.log('\ntesting clickByIndex...'); 72 | const clickByIndexResponse = await axios.post(mcpUrl, createMCPRequest('executeToolFunction', { 73 | function: 'clickByIndex', 74 | arguments: { 75 | element_index: 0 76 | } 77 | })); 78 | console.log('click by index response:', clickByIndexResponse.data); 79 | 80 | // 6. Type text by index (only if we have a text field) 81 | const textField = listElementsResponse.data.result.elements.findIndex(el => 82 | el.role === 'AXTextField' || el.role === 'AXTextArea'); 83 | 84 | if (textField >= 0) { 85 | console.log('\ntesting typeByIndex...'); 86 | const typeByIndexResponse = await axios.post(mcpUrl, createMCPRequest('executeToolFunction', { 87 | function: 'typeByIndex', 88 | arguments: { 89 | element_index: textField, 90 | text: 'Hello from MCP test' 91 | } 92 | })); 93 | console.log('type by index response:', typeByIndexResponse.data); 94 | 95 | // 7. Press key by index 96 | console.log('\ntesting pressKeyByIndex...'); 97 | const pressKeyByIndexResponse = await axios.post(mcpUrl, createMCPRequest('executeToolFunction', { 98 | function: 'pressKeyByIndex', 99 | arguments: { 100 | element_index: textField, 101 | key_combo: 'Enter' 102 | } 103 | })); 104 | console.log('press key by index response:', pressKeyByIndexResponse.data); 105 | } else { 106 | console.log('no text fields found, skipping type and press key tests'); 107 | } 108 | } else { 109 | console.log('no elements found, skipping index-based operations'); 110 | } 111 | 112 | // 8. Scroll element 113 | console.log('\ntesting scrollElement...'); 114 | const scrollResponse = await axios.post(mcpUrl, createMCPRequest('executeToolFunction', { 115 | function: 'scrollElement', 116 | arguments: { 117 | selector: { 118 | app_name: 'Arc', 119 | locator: 'main' 120 | }, 121 | direction: 'down', 122 | amount: 100 123 | } 124 | })); 125 | console.log('scroll response:', scrollResponse.data); 126 | 127 | // 9. Input control 128 | console.log('\ntesting inputControl...'); 129 | const inputControlResponse = await axios.post(mcpUrl, createMCPRequest('executeToolFunction', { 130 | function: 'inputControl', 131 | arguments: { 132 | action: { 133 | type: 'KeyPress', 134 | data: 'Escape' 135 | } 136 | } 137 | })); 138 | console.log('input control response:', inputControlResponse.data); 139 | 140 | console.log('\nall tests completed'); 141 | } catch (error) { 142 | console.error('error during testing:', error.response?.data || error.message); 143 | } 144 | } 145 | 146 | testMCP(); -------------------------------------------------------------------------------- /mcp-client-nextjs/.env.example: -------------------------------------------------------------------------------- 1 | ANTHROPIC_API_KEY=sk-ant-XXXXXXXX -------------------------------------------------------------------------------- /mcp-client-nextjs/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.* 7 | .yarn/* 8 | !.yarn/patches 9 | !.yarn/plugins 10 | !.yarn/releases 11 | !.yarn/versions 12 | 13 | # testing 14 | /coverage 15 | 16 | # next.js 17 | /.next/ 18 | /out/ 19 | 20 | # production 21 | /build 22 | 23 | # misc 24 | .DS_Store 25 | *.pem 26 | 27 | # debug 28 | npm-debug.log* 29 | yarn-debug.log* 30 | yarn-error.log* 31 | .pnpm-debug.log* 32 | 33 | # env files (can opt-in for committing if needed) 34 | .env* 35 | !.env.example 36 | 37 | # vercel 38 | .vercel 39 | 40 | # typescript 41 | *.tsbuildinfo 42 | next-env.d.ts 43 | -------------------------------------------------------------------------------- /mcp-client-nextjs/README.md: -------------------------------------------------------------------------------- 1 | This is a NexJS app that serves as MCP Client -------------------------------------------------------------------------------- /mcp-client-nextjs/eslint.config.mjs: -------------------------------------------------------------------------------- 1 | import { dirname } from "path"; 2 | import { fileURLToPath } from "url"; 3 | import { FlatCompat } from "@eslint/eslintrc"; 4 | 5 | const __filename = fileURLToPath(import.meta.url); 6 | const __dirname = dirname(__filename); 7 | 8 | const compat = new FlatCompat({ 9 | baseDirectory: __dirname, 10 | }); 11 | 12 | const eslintConfig = [ 13 | ...compat.extends("next/core-web-vitals", "next/typescript"), 14 | ]; 15 | 16 | export default eslintConfig; 17 | -------------------------------------------------------------------------------- /mcp-client-nextjs/next.config.ts: -------------------------------------------------------------------------------- 1 | import type { NextConfig } from "next"; 2 | 3 | const nextConfig: NextConfig = { 4 | /* config options here */ 5 | }; 6 | 7 | export default nextConfig; 8 | -------------------------------------------------------------------------------- /mcp-client-nextjs/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "hellow-world-mcp-client-nextjs", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev --turbopack", 7 | "build": "next build", 8 | "start": "next start", 9 | "lint": "next lint" 10 | }, 11 | "dependencies": { 12 | "@anthropic-ai/sdk": "^0.39.0", 13 | "@modelcontextprotocol/sdk": "^1.8.0", 14 | "axios": "^1.8.4", 15 | "dotenv": "^16.4.7", 16 | "lucide-react": "^0.485.0", 17 | "next": "15.2.4", 18 | "react": "^19.0.0", 19 | "react-dom": "^19.0.0" 20 | }, 21 | "devDependencies": { 22 | "@eslint/eslintrc": "^3", 23 | "@tailwindcss/postcss": "^4", 24 | "@types/node": "^20", 25 | "@types/react": "^19", 26 | "@types/react-dom": "^19", 27 | "eslint": "^9", 28 | "eslint-config-next": "15.2.4", 29 | "tailwindcss": "^4", 30 | "typescript": "^5" 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /mcp-client-nextjs/postcss.config.mjs: -------------------------------------------------------------------------------- 1 | const config = { 2 | plugins: ["@tailwindcss/postcss"], 3 | }; 4 | 5 | export default config; 6 | -------------------------------------------------------------------------------- /mcp-client-nextjs/public/file.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mcp-client-nextjs/public/globe.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mcp-client-nextjs/public/next.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mcp-client-nextjs/public/vercel.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mcp-client-nextjs/public/window.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mcp-client-nextjs/src/app/api/mcp/initialize/route.ts: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { desktopClient } from '@/lib/mcp/start-here'; 3 | import { checkMCPServer } from '@/lib/mcp/setup'; 4 | 5 | // Shared state can be moved to a separate file if needed 6 | let isInitialized = false; 7 | 8 | export async function GET() { 9 | // Skip actual MCP connection during server startup 10 | if (process.env.NEXT_PHASE === 'phase-production-build') { 11 | return NextResponse.json({ status: 'skipped-during-build' }); 12 | } 13 | 14 | try { 15 | if (isInitialized) { 16 | console.log('mcp client already initialized'); 17 | return NextResponse.json({ status: 'connected' }); 18 | } 19 | 20 | console.log('initializing mcp client connection...'); 21 | 22 | // check if server is available 23 | const serverRunning = await checkMCPServer(); 24 | if (!serverRunning) { 25 | throw new Error('mcp server is not available'); 26 | } 27 | 28 | // connect to rust mcp server using ipv4 29 | await desktopClient.connect('http://127.0.0.1:8080/mcp'); 30 | 31 | // list available tools 32 | await desktopClient.listTools(); 33 | 34 | isInitialized = true; 35 | console.log('mcp client initialized successfully'); 36 | 37 | return NextResponse.json({ 38 | status: 'connected', 39 | message: 'mcp client initialized successfully' 40 | }); 41 | } catch (error) { 42 | console.error('failed to initialize mcp client:', error); 43 | return NextResponse.json( 44 | { 45 | status: 'error', 46 | error: `failed to initialize mcp client: ${error instanceof Error ? error.message : String(error)}` 47 | }, 48 | { status: 503 } 49 | ); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /mcp-client-nextjs/src/app/api/mcp/logs/route.ts: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { logBuffer } from '../../../../lib/mcp/log-buffer'; 3 | 4 | export async function GET(request: Request) { 5 | const url = new URL(request.url); 6 | const since = url.searchParams.get('since'); 7 | 8 | const logs = logBuffer.getLogs(since ? parseInt(since, 10) : undefined); 9 | 10 | return NextResponse.json({ logs }); 11 | } 12 | -------------------------------------------------------------------------------- /mcp-client-nextjs/src/app/api/mcp/query/route.ts: -------------------------------------------------------------------------------- 1 | import { NextRequest, NextResponse } from 'next/server'; 2 | import { processUserQuery } from '@/lib/mcp/query-processing-engine'; 3 | import { desktopClient, log } from '@/lib/mcp/start-here'; 4 | import { checkMCPServer } from '@/lib/mcp/setup'; 5 | 6 | export async function POST(request: NextRequest) { 7 | try { 8 | // Parse the request body 9 | const body = await request.json(); 10 | const { query } = body; 11 | 12 | if (!query) { 13 | return NextResponse.json( 14 | { status: 'error', error: 'query is required' }, 15 | { status: 400 } 16 | ); 17 | } 18 | 19 | log.info('received mcp query:', query); 20 | 21 | // Check if server is available 22 | const serverRunning = await checkMCPServer(); 23 | if (!serverRunning) { 24 | throw new Error('mcp server is not available'); 25 | } 26 | 27 | // Use the advanced query processing engine instead of direct client call 28 | log.highlight('processing query through agent loop'); 29 | 30 | try { 31 | const response = await processUserQuery(query); 32 | return NextResponse.json({ response }); 33 | } catch (error) { 34 | log.error(`failed to process query: ${error.message}`); 35 | 36 | // Return proper error response with status code 37 | return NextResponse.json( 38 | { 39 | error: error.message, 40 | status: 'error', 41 | details: error.toString() 42 | }, 43 | { status: 500 } 44 | ); 45 | } 46 | } catch (error) { 47 | log.error(`error handling request: ${error}`); 48 | return NextResponse.json({ error: 'Internal server error' }, { status: 500 }); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /mcp-client-nextjs/src/app/api/mcp/route.ts: -------------------------------------------------------------------------------- 1 | import { NextRequest, NextResponse } from 'next/server'; 2 | import { desktopClient } from '@/lib/mcp/start-here'; 3 | import { processUserQuery } from '@/lib/mcp/query-processing-engine'; 4 | import { checkMCPServer } from '@/lib/mcp/setup'; 5 | 6 | let isInitialized = false; 7 | 8 | async function initialize() { 9 | if (isInitialized) return true; 10 | 11 | console.log('initializing mcp client connection...'); 12 | 13 | try { 14 | // check if server is available 15 | const serverRunning = await checkMCPServer(); 16 | if (!serverRunning) { 17 | throw new Error('mcp server is not available'); 18 | } 19 | 20 | // connect to rust mcp server using ipv4 21 | await desktopClient.connect('http://127.0.0.1:8080/mcp'); 22 | 23 | // list available tools 24 | await desktopClient.listTools(); 25 | 26 | isInitialized = true; 27 | console.log('mcp client initialized successfully'); 28 | return true; 29 | } catch (error) { 30 | console.error('failed to initialize mcp client:', error); 31 | return false; 32 | } 33 | } 34 | 35 | export async function POST(request: NextRequest) { 36 | try { 37 | const initialized = await initialize(); 38 | if (!initialized) { 39 | return NextResponse.json( 40 | { error: 'failed to initialize mcp client' }, 41 | { status: 503 } 42 | ); 43 | } 44 | 45 | const { query } = await request.json(); 46 | console.log('processing query:', query); 47 | 48 | if (!query) { 49 | return NextResponse.json( 50 | { error: 'query is required' }, 51 | { status: 400 } 52 | ); 53 | } 54 | 55 | const response = await processUserQuery(query); 56 | return NextResponse.json({ response }); 57 | } catch (error) { 58 | console.error('error in mcp api route:', error); 59 | return NextResponse.json( 60 | { error: 'failed to process query' }, 61 | { status: 500 } 62 | ); 63 | } 64 | } -------------------------------------------------------------------------------- /mcp-client-nextjs/src/app/api/mcp/tools/route.ts: -------------------------------------------------------------------------------- 1 | import { desktopClient } from '@/lib/mcp/start-here'; 2 | import { NextResponse } from 'next/server'; 3 | 4 | export async function GET() { 5 | try { 6 | // The listTools method already exists in your desktopClient 7 | const toolsResponse = await desktopClient.listTools(); 8 | 9 | // Format the tools into a simple array of tool names 10 | const toolNames = toolsResponse.tools.map((tool: any) => tool.name); 11 | 12 | console.log(`api/mcp/tools: returning ${toolNames.length} tools`); 13 | 14 | return NextResponse.json({ tools: toolNames }); 15 | } catch (error) { 16 | console.error('failed to get tools:', error); 17 | return NextResponse.json( 18 | { error: error instanceof Error ? error.message : 'Unknown error' }, 19 | { status: 500 } 20 | ); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /mcp-client-nextjs/src/app/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mediar-ai/MCP-server-client-computer-use-ai-sdk/4c5866be6b8f55702651cea91dc4b5849cd899b3/mcp-client-nextjs/src/app/favicon.ico -------------------------------------------------------------------------------- /mcp-client-nextjs/src/app/globals.css: -------------------------------------------------------------------------------- 1 | @import "tailwindcss"; 2 | 3 | :root { 4 | --background: #ffffff; 5 | --foreground: #171717; 6 | } 7 | 8 | .dark { 9 | --background: #0a0a0a; 10 | --foreground: #ededed; 11 | } 12 | 13 | @theme inline { 14 | --color-background: var(--background); 15 | --color-foreground: var(--foreground); 16 | --font-sans: var(--font-geist-sans); 17 | --font-mono: var(--font-geist-mono); 18 | } 19 | 20 | body { 21 | background: var(--background); 22 | color: var(--foreground); 23 | font-family: Arial, Helvetica, sans-serif; 24 | } 25 | -------------------------------------------------------------------------------- /mcp-client-nextjs/src/app/layout.tsx: -------------------------------------------------------------------------------- 1 | import type { Metadata } from "next"; 2 | import { Geist, Geist_Mono } from "next/font/google"; 3 | import "./globals.css"; 4 | 5 | const geistSans = Geist({ 6 | variable: "--font-geist-sans", 7 | subsets: ["latin"], 8 | }); 9 | 10 | const geistMono = Geist_Mono({ 11 | variable: "--font-geist-mono", 12 | subsets: ["latin"], 13 | }); 14 | 15 | export const metadata: Metadata = { 16 | title: "MCP Client", 17 | description: "Model Context Protocol Client Interface", 18 | }; 19 | 20 | export default function RootLayout({ 21 | children, 22 | }: Readonly<{ 23 | children: React.ReactNode; 24 | }>) { 25 | return ( 26 | 27 | 30 | {children} 31 | 32 | 33 | ); 34 | } 35 | -------------------------------------------------------------------------------- /mcp-client-nextjs/src/app/page.tsx: -------------------------------------------------------------------------------- 1 | import MCPInterface from '@/components/mcp-interface'; 2 | 3 | export default function Home() { 4 | return ( 5 |
6 | 7 |
8 | ); 9 | } -------------------------------------------------------------------------------- /mcp-client-nextjs/src/lib/mcp/log-buffer.ts: -------------------------------------------------------------------------------- 1 | // Simple in-memory buffer to store logs for client retrieval 2 | class LogBuffer { 3 | private logs: { timestamp: number; level: string; message: string }[] = []; 4 | private maxLogs = 1000; // Limit buffer size to prevent memory issues 5 | 6 | addLog(level: string, message: string) { 7 | this.logs.push({ 8 | timestamp: Date.now(), 9 | level, 10 | message 11 | }); 12 | 13 | // Trim old logs if buffer gets too large 14 | if (this.logs.length > this.maxLogs) { 15 | this.logs = this.logs.slice(-this.maxLogs); 16 | } 17 | } 18 | 19 | getLogs(since?: number): { timestamp: number; level: string; message: string }[] { 20 | if (since) { 21 | return this.logs.filter(log => log.timestamp > since); 22 | } 23 | return [...this.logs]; 24 | } 25 | 26 | clear() { 27 | this.logs = []; 28 | } 29 | } 30 | 31 | // Export a singleton instance 32 | export const logBuffer = new LogBuffer(); 33 | 34 | // Export a utility function to clear logs when needed 35 | export const clearLogs = () => { 36 | console.log("clearing logs buffer"); 37 | logBuffer.clear(); 38 | }; -------------------------------------------------------------------------------- /mcp-client-nextjs/src/lib/mcp/setup.ts: -------------------------------------------------------------------------------- 1 | // Browser/Next.js compatible version of setup 2 | 3 | // Check if MCP server is running 4 | export async function checkMCPServer() { 5 | try { 6 | console.log("checking mcp server connection..."); 7 | 8 | // Direct connection to 127.0.0.1:8080 9 | const payload = { 10 | jsonrpc: "2.0", 11 | id: "health-check", 12 | method: "initialize", 13 | params: { 14 | clientInfo: { 15 | name: "mcp-client-health-check", 16 | version: "1.0.0" 17 | }, 18 | capabilities: {} 19 | } 20 | }; 21 | 22 | const response = await fetch('http://127.0.0.1:8080/mcp', { 23 | method: 'POST', 24 | headers: { 25 | 'Content-Type': 'application/json', 26 | }, 27 | body: JSON.stringify(payload), 28 | }); 29 | 30 | if (!response.ok) { 31 | throw new Error(`HTTP error: ${response.status}`); 32 | } 33 | 34 | const data = await response.json(); 35 | 36 | if (data.result) { 37 | console.log("mcp server is running and responding properly"); 38 | return true; 39 | } 40 | 41 | console.log("mcp server responded but with unexpected format"); 42 | return false; 43 | } catch (error) { 44 | console.error("failed to connect to mcp server:", error.message); 45 | return false; 46 | } 47 | } 48 | 49 | // Setup environment - simplified for Next.js 50 | export async function setupEnvironment() { 51 | // API keys should be handled through Next.js environment variables 52 | // in .env.local files or deployment environment 53 | 54 | const serverRunning = await checkMCPServer(); 55 | if (!serverRunning) { 56 | console.error("mcp server check failed - functionality may be limited"); 57 | // Don't exit process in Next.js - just return false 58 | return false; 59 | } 60 | 61 | return true; 62 | } -------------------------------------------------------------------------------- /mcp-client-nextjs/src/lib/mcp/start-here.ts: -------------------------------------------------------------------------------- 1 | import Anthropic from "@anthropic-ai/sdk"; 2 | import { logBuffer } from './log-buffer'; 3 | 4 | // enhanced logging utility with colors for better readability 5 | export const log = { 6 | info: (msg: string, ...args: unknown[]) => { 7 | console.log(`\x1b[36m[info]\x1b[0m ${msg}`, ...args); 8 | logBuffer.addLog('info', formatLogMessage(msg, args)); 9 | }, 10 | success: (msg: string, ...args: unknown[]) => { 11 | console.log(`\x1b[32m[success]\x1b[0m ${msg}`, ...args); 12 | logBuffer.addLog('success', formatLogMessage(msg, args)); 13 | }, 14 | error: (msg: string, ...args: unknown[]) => { 15 | console.error(`\x1b[31m[error]\x1b[0m ${msg}`, ...args); 16 | logBuffer.addLog('error', formatLogMessage(msg, args)); 17 | }, 18 | warn: (msg: string, ...args: unknown[]) => { 19 | console.log(`\x1b[33m[warn]\x1b[0m ${msg}`, ...args); 20 | logBuffer.addLog('warn', formatLogMessage(msg, args)); 21 | }, 22 | debug: (msg: string, ...args: unknown[]) => { 23 | console.log(`\x1b[90m[debug]\x1b[0m ${msg}`, ...args); 24 | logBuffer.addLog('debug', formatLogMessage(msg, args)); 25 | }, 26 | // New logging methods for specific UI elements 27 | highlight: (msg: string, ...args: unknown[]) => { 28 | console.log(`\x1b[1m\x1b[35m${msg}\x1b[0m`, ...args); 29 | logBuffer.addLog('highlight', formatLogMessage(msg, args)); 30 | }, 31 | iteration: (msg: string, ...args: unknown[]) => { 32 | console.log(`\x1b[36m${msg}\x1b[0m`, ...args); 33 | logBuffer.addLog('iteration', formatLogMessage(msg, args)); 34 | }, 35 | response: (msg: string) => { 36 | console.log(`\n\x1b[1m\x1b[37mresponse:\x1b[0m ${msg}`); 37 | logBuffer.addLog('response', msg); 38 | }, 39 | tool: (name: string, result: unknown) => { 40 | const truncatedResult = truncateJSON(result); 41 | if (typeof result === 'object' && result !== null && 'isError' in result) { 42 | console.log(`\x1b[31m[tool ${name}]\x1b[0m ${truncatedResult}`); 43 | logBuffer.addLog('tool-error', `[${name}] ${truncatedResult}`); 44 | } else { 45 | console.log(`\x1b[32m[tool ${name}]\x1b[0m ${truncatedResult}`); 46 | logBuffer.addLog('tool', `[${name}] ${truncatedResult}`); 47 | } 48 | } 49 | }; 50 | 51 | // Helper functions 52 | function formatLogMessage(msg: string, args: unknown[]): string { 53 | if (args.length === 0) return msg; 54 | 55 | try { 56 | const formattedArgs = args.map(arg => 57 | typeof arg === 'object' ? truncateJSON(arg) : String(arg) 58 | ).join(' '); 59 | return `${msg} ${formattedArgs}`; 60 | } catch (e) { 61 | return `${msg} [args formatting error]`; 62 | } 63 | } 64 | 65 | function truncateJSON(obj: unknown, maxLength = 500): string { 66 | try { 67 | const str = JSON.stringify(obj); 68 | if (str.length <= maxLength) return str; 69 | return str.substring(0, maxLength) + '... [truncated]'; 70 | } catch (e) { 71 | return '[unserializable object]'; 72 | } 73 | } 74 | 75 | type MCPResponse = { 76 | result?: unknown; 77 | error?: string; 78 | }; 79 | 80 | class DesktopControlClient { 81 | private connected = false; 82 | private serverUrl = ""; 83 | private requestId = 0; 84 | private anthropic = new Anthropic(); 85 | 86 | // Connect to the MCP server via http 87 | async connect(serverUrl: string) { 88 | log.info(`connecting to mcp server: ${serverUrl}`); 89 | 90 | try { 91 | this.serverUrl = serverUrl; 92 | const response = await this.makeRequest("initialize", {}); 93 | 94 | if (response.result) { 95 | this.connected = true; 96 | log.success('mcp client session established successfully'); 97 | return true; 98 | } else { 99 | log.error('failed to establish mcp client session:', response.error); 100 | return false; 101 | } 102 | } catch (error) { 103 | log.error('failed to establish mcp client session:', error); 104 | return false; 105 | } 106 | } 107 | 108 | // Make a JSON-RPC request 109 | private async makeRequest(method: string, params: Record) { 110 | const id = `request-${++this.requestId}`; 111 | 112 | const response = await fetch("http://127.0.0.1:8080/mcp", { 113 | method: "POST", 114 | headers: { 115 | "Content-Type": "application/json" 116 | }, 117 | body: JSON.stringify({ 118 | jsonrpc: "2.0", 119 | id, 120 | method, 121 | params 122 | }) 123 | }); 124 | 125 | return await response.json() as MCPResponse; 126 | } 127 | 128 | // Check if connected 129 | isConnected(): boolean { 130 | return this.connected; 131 | } 132 | 133 | // List available tools 134 | async listTools() { 135 | if (!this.isConnected()) { 136 | log.error('cannot list tools: not connected'); 137 | throw new Error('Not connected to MCP server'); 138 | } 139 | 140 | try { 141 | // In standard MCP, this would be tools/list 142 | // But our rust server exposes tools through initialize 143 | const response = await this.makeRequest("initialize", {}); 144 | const tools = (response.result as { capabilities: { tools: { functions: unknown[] } } }).capabilities.tools.functions; 145 | 146 | // Create simplified view - one line per tool 147 | log.info('available tools:'); 148 | tools.forEach((tool: Record) => { 149 | const params = tool.parameters as { properties?: Record }; 150 | const propertyNames = Object.keys(params.properties || {}).join(', '); 151 | log.debug(`- ${tool.name}: ${propertyNames}`); 152 | }); 153 | 154 | return { tools }; 155 | } catch (error) { 156 | log.error('failed to list tools:', error); 157 | throw error; 158 | } 159 | } 160 | 161 | // Call a tool 162 | async callTool(name: string, args: Record) { 163 | if (!this.isConnected()) { 164 | log.error('cannot call tool: not connected'); 165 | throw new Error('Not connected to MCP server'); 166 | } 167 | 168 | log.info(`calling tool "${name}" with args: ${JSON.stringify(args)}`); 169 | 170 | try { 171 | const response = await this.makeRequest("executeToolFunction", { 172 | function: name, 173 | arguments: args 174 | }); 175 | 176 | // Check if result exists before logging 177 | if (response && 'result' in response) { 178 | log.tool(name, response.result); 179 | return response.result; 180 | } else { 181 | log.tool(name, response); // Log the entire response if result is missing 182 | return response; // Still return whatever we got 183 | } 184 | } catch (error) { 185 | log.error(`error calling tool "${name}":`, error); 186 | throw error; 187 | } 188 | } 189 | 190 | // Disconnect from the server 191 | async disconnect() { 192 | this.connected = false; 193 | log.success('mcp client session closed'); 194 | } 195 | } 196 | 197 | // Export an instance that can be used throughout your application 198 | export const desktopClient = new DesktopControlClient(); 199 | -------------------------------------------------------------------------------- /mcp-client-nextjs/tailwind.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | module.exports = { 3 | darkMode: 'class', 4 | content: [ 5 | "./src/**/*.{js,ts,jsx,tsx}", 6 | ], 7 | theme: { 8 | extend: {}, 9 | }, 10 | plugins: [], 11 | } 12 | -------------------------------------------------------------------------------- /mcp-client-nextjs/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2017", 4 | "lib": ["dom", "dom.iterable", "esnext"], 5 | "allowJs": true, 6 | "skipLibCheck": true, 7 | "strict": true, 8 | "noEmit": true, 9 | "esModuleInterop": true, 10 | "module": "esnext", 11 | "moduleResolution": "bundler", 12 | "resolveJsonModule": true, 13 | "isolatedModules": true, 14 | "jsx": "preserve", 15 | "incremental": true, 16 | "plugins": [ 17 | { 18 | "name": "next" 19 | } 20 | ], 21 | "paths": { 22 | "@/*": ["./src/*"] 23 | } 24 | }, 25 | "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], 26 | "exclude": ["node_modules"] 27 | } 28 | -------------------------------------------------------------------------------- /mcp-server-os-level/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "computer-use-ai-sdk" 3 | version = "0.1.0" 4 | edition = "2021" 5 | description = "Desktop UI automation through accessibility APIs" 6 | authors = ["m13v, louis030195"] 7 | repository = "" 8 | 9 | [dependencies] 10 | # General dependencies 11 | tokio = { version = "1", features = ["full"] } 12 | anyhow = "1.0" 13 | tracing = "0.1" 14 | tracing-subscriber = "0.3" 15 | serde = { version = "1.0", features = ["derive"] } 16 | serde_json = "1.0" 17 | thiserror = "2.0" 18 | once_cell = "1.19" 19 | uuid = { version = "1.3", features = ["v4"] } 20 | chrono = "0.4" 21 | 22 | # Server dependencies 23 | axum = "0.6.20" 24 | tower-http = { version = "0.4.0", features = ["cors", "trace"] } 25 | 26 | # Common dependencies that might be needed based on original code 27 | log = "0.4" 28 | 29 | [target.'cfg(target_os = "macos")'.dependencies] 30 | # macOS specific dependencies 31 | accessibility-sys = { git = "https://github.com/eiz/accessibility.git", branch = "master" } 32 | accessibility = { git = "https://github.com/eiz/accessibility.git", branch = "master" } 33 | objc = "0.2.7" 34 | objc-foundation = "0.1.1" 35 | core-foundation = "0.10.0" 36 | core-graphics = { version = "0.24.0", features = ["highsierra"] } 37 | 38 | # Add Windows dependencies if needed 39 | [target.'cfg(target_os = "windows")'.dependencies] 40 | # windows crate with relevant features (if used) 41 | 42 | # Add Linux dependencies if needed 43 | [target.'cfg(target_os = "linux")'.dependencies] 44 | # dbus, atspi, etc. (if used) 45 | 46 | [[bin]] 47 | name = "server" 48 | path = "src/bin/server.rs" 49 | 50 | [features] 51 | cargo-clippy = [] 52 | -------------------------------------------------------------------------------- /mcp-server-os-level/examples/test_click_by_role.rs: -------------------------------------------------------------------------------- 1 | use computer_use_ai_sdk::{Selector, UIElement}; 2 | use computer_use_ai_sdk::platforms::AccessibilityEngine; 3 | use computer_use_ai_sdk::platforms::macos::{MacOSEngine, ClickMethodSelection}; 4 | use anyhow::Result; 5 | use tracing::{debug, info}; 6 | 7 | fn main() -> Result<()> { 8 | // Add direct console output 9 | println!("program starting..."); 10 | 11 | // Initialize tracing/logging with more verbosity 12 | tracing_subscriber::fmt() 13 | .with_max_level(tracing::Level::TRACE) 14 | .init(); 15 | 16 | println!("tracing initialized"); 17 | 18 | println!("creating accessibility engine..."); 19 | info!("creating accessibility engine..."); 20 | let engine = MacOSEngine::new(true, true)?; 21 | 22 | // Find Arc browser and focus it (similar to test_role.rs) 23 | info!("finding arc browser and focusing it..."); 24 | 25 | let arc_app = match engine.get_application_by_name("Arc") { 26 | Ok(app) => { 27 | info!("found arc with direct search"); 28 | app 29 | }, 30 | Err(_) => { 31 | info!("direct search failed, trying app list"); 32 | 33 | // Method 2: get all applications and filter 34 | let apps = engine.get_applications()?; 35 | info!("found {} applications", apps.len()); 36 | 37 | // Find arc in the app list 38 | let arc = apps.into_iter().find(|app| { 39 | app.attributes().label.as_ref().map_or(false, |label| 40 | label.contains("Arc")) 41 | }); 42 | 43 | match arc { 44 | Some(app) => { 45 | info!("found arc in app list"); 46 | 47 | // Try to bring it to focus 48 | engine.refresh_accessibility_tree(Some("Arc"))?; 49 | app 50 | }, 51 | None => { 52 | return Err(anyhow::anyhow!("couldn't find arc browser")); 53 | } 54 | } 55 | } 56 | }; 57 | 58 | info!("looking for first element containing 'whatsapp2llm'..."); 59 | 60 | // Create a selector for any element with text 61 | let selector = Selector::Text("whatsapp2llm".to_string()); 62 | 63 | // Find elements and take only the first match 64 | let elements = engine.find_elements(&selector, Some(&arc_app))?; 65 | info!("search found {} elements, using first match", elements.len()); 66 | 67 | // Click the first matching element if found 68 | if let Some(element) = elements.first() { 69 | let attrs = element.attributes(); 70 | info!("Found match: role={}, label={:?}", attrs.role, attrs.label); 71 | 72 | // Get position info if available 73 | if let Ok((x, y, width, height)) = element.bounds() { 74 | info!(" position: ({}, {}), size: ({}, {})", x, y, width, height); 75 | } 76 | 77 | info!("attempting to click element with text 'whatsapp2llm' using mouse simulation..."); 78 | 79 | // Use mouse simulation specifically 80 | match element.click_with_method(ClickMethodSelection::MouseSimulation) { 81 | Ok(result) => { 82 | info!("mouse simulation click successful"); 83 | info!("click details: {}", result.details); 84 | 85 | if let Some((x, y)) = result.coordinates { 86 | info!("clicked at coordinates: ({:.1}, {:.1})", x, y); 87 | } 88 | 89 | // Add delay between clicks 90 | info!("waiting 500ms before second click..."); 91 | std::thread::sleep(std::time::Duration::from_millis(500)); 92 | 93 | // Second click also with mouse simulation 94 | match element.click_with_method(ClickMethodSelection::MouseSimulation) { 95 | Ok(result2) => { 96 | info!("second mouse simulation click successful"); 97 | info!("click details: {}", result2.details); 98 | 99 | if let Some((x, y)) = result2.coordinates { 100 | info!("second clicked at coordinates: ({:.1}, {:.1})", x, y); 101 | } 102 | }, 103 | Err(e) => { 104 | info!("second click failed: {:?}", e); 105 | } 106 | } 107 | }, 108 | Err(e) => { 109 | info!("first click failed: {:?}", e); 110 | } 111 | } 112 | } else { 113 | info!("no elements with 'whatsapp2llm' text found to click"); 114 | } 115 | 116 | Ok(()) 117 | } 118 | -------------------------------------------------------------------------------- /mcp-server-os-level/examples/test_example.rs: -------------------------------------------------------------------------------- 1 | use computer_use_ai_sdk::{Selector, UIElement}; 2 | use computer_use_ai_sdk::platforms::AccessibilityEngine; 3 | use computer_use_ai_sdk::platforms::macos::MacOSEngine; 4 | 5 | fn print_element_tree(element: &UIElement, depth: usize) { 6 | let attrs = element.attributes(); 7 | let indent = " ".repeat(depth); 8 | let label = attrs.label.unwrap_or_default(); 9 | 10 | println!("{}role: {}, label: {}", indent, attrs.role, label); 11 | 12 | if depth < 3 { // limit depth for readability 13 | if let Ok(children) = element.children() { 14 | for child in children.iter().take(3) { // limit to 3 children 15 | print_element_tree(child, depth + 1); 16 | } 17 | } 18 | } 19 | } 20 | 21 | fn main() -> Result<(), Box> { 22 | // create engine with default settings 23 | println!("creating accessibility engine..."); 24 | let engine = MacOSEngine::new(true, true)?; 25 | 26 | // specifically find and focus the Arc browser 27 | println!("finding arc browser and focusing it..."); 28 | 29 | // method 1: direct application search + focus 30 | let arc_app = match engine.get_application_by_name("Arc") { 31 | Ok(app) => { 32 | println!("found arc with direct search"); 33 | app 34 | }, 35 | Err(_) => { 36 | println!("direct search failed, trying app list"); 37 | 38 | // method 2: get all applications and filter 39 | let apps = engine.get_applications()?; 40 | println!("found {} applications", apps.len()); 41 | 42 | // find arc in the app list 43 | let arc = apps.into_iter().find(|app| { 44 | app.attributes().label.as_ref().map_or(false, |label| 45 | label.contains("Arc")) 46 | }); 47 | 48 | match arc { 49 | Some(app) => { 50 | println!("found arc in app list"); 51 | 52 | // try to bring it to focus 53 | engine.refresh_accessibility_tree(Some("Arc"))?; 54 | app 55 | }, 56 | None => { 57 | return Err("couldn't find arc browser".into()); 58 | } 59 | } 60 | } 61 | }; 62 | 63 | println!("arc app info: {:?}", arc_app.attributes()); 64 | 65 | // test with wildcard role 66 | let selector = Selector::Role { 67 | role: "*".to_string(), 68 | name: None 69 | }; 70 | 71 | println!("finding elements with wildcard role in arc..."); 72 | 73 | // find elements within arc 74 | let elements = engine.find_elements(&selector, Some(&arc_app))?; 75 | println!("found {} elements with wildcard role", elements.len()); 76 | 77 | // print info about first few 78 | for (i, element) in elements.iter().take(10).enumerate() { 79 | let attrs = element.attributes(); 80 | println!("{}: role={}, label={:?}", i, attrs.role, attrs.label); 81 | } 82 | 83 | // get direct children as a comparison 84 | println!("\ngetting direct children of arc..."); 85 | if let Ok(children) = arc_app.children() { 86 | println!("arc has {} direct children", children.len()); 87 | 88 | // print first few children 89 | for (i, child) in children.iter().take(10).enumerate() { 90 | let attrs = child.attributes(); 91 | println!("child {}: role={}, label={:?}", i, attrs.role, attrs.label); 92 | 93 | // try to get grandchildren for first couple of children 94 | if i < 2 { 95 | if let Ok(grandchildren) = child.children() { 96 | println!(" child {} has {} children", i, grandchildren.len()); 97 | 98 | // print first few grandchildren 99 | for (j, grandchild) in grandchildren.iter().take(3).enumerate() { 100 | let gc_attrs = grandchild.attributes(); 101 | println!(" grandchild {}.{}: role={}, label={:?}", 102 | i, j, gc_attrs.role, gc_attrs.label); 103 | } 104 | } 105 | } 106 | } 107 | } 108 | 109 | Ok(()) 110 | } -------------------------------------------------------------------------------- /mcp-server-os-level/examples/test_get_all_apps.rs: -------------------------------------------------------------------------------- 1 | use tracing::{info, Level}; 2 | use tracing_subscriber::FmtSubscriber; 3 | 4 | use computer_use_ai_sdk::platforms::macos::MacOSEngine; 5 | use computer_use_ai_sdk::platforms::AccessibilityEngine; 6 | 7 | fn main() -> Result<(), Box> { 8 | // initialize logging 9 | let subscriber = FmtSubscriber::builder() 10 | .with_max_level(Level::INFO) 11 | .finish(); 12 | tracing::subscriber::set_global_default(subscriber)?; 13 | 14 | info!("fetching all running applications..."); 15 | 16 | // create accessibility engine - true for use_background_apps, false for activate_app 17 | let engine = MacOSEngine::new(true, false)?; 18 | 19 | // get all applications 20 | let apps = engine.get_applications()?; 21 | 22 | info!("found {} applications", apps.len()); 23 | 24 | // collect app details 25 | let mut app_details = Vec::new(); 26 | for app in apps { 27 | let attrs = app.attributes(); 28 | 29 | // get process id if available 30 | let pid = if let Some(Some(pid_value)) = attrs.properties.get("AXPid") { 31 | if let Some(pid_str) = pid_value.as_str() { 32 | pid_str.parse::().ok() 33 | } else { 34 | None 35 | } 36 | } else { 37 | None 38 | }; 39 | 40 | app_details.push((attrs.label.unwrap_or_default(), pid, attrs.role)); 41 | } 42 | 43 | // sort by name for easier viewing 44 | app_details.sort_by(|a, b| a.0.to_lowercase().cmp(&b.0.to_lowercase())); 45 | 46 | // print application details 47 | info!("application details:"); 48 | for (i, (name, pid, role)) in app_details.iter().enumerate() { 49 | info!("{}. '{}' (pid: {:?}, role: {})", i+1, name, pid, role); 50 | } 51 | 52 | Ok(()) 53 | } 54 | -------------------------------------------------------------------------------- /mcp-server-os-level/examples/test_get_arc_all_elements_custom.rs: -------------------------------------------------------------------------------- 1 | use computer_use_ai_sdk::{Selector, UIElement}; 2 | use computer_use_ai_sdk::platforms::AccessibilityEngine; 3 | use computer_use_ai_sdk::platforms::macos::MacOSEngine; 4 | use std::collections::VecDeque; 5 | use std::time::Instant; 6 | 7 | // Recursive function to collect all UI elements using breadth-first traversal 8 | fn collect_all_elements(root: &UIElement) -> Vec { 9 | let mut all_elements = Vec::new(); 10 | let mut queue = VecDeque::new(); 11 | queue.push_back(root.clone()); 12 | 13 | let mut processed = 0; 14 | let start_time = Instant::now(); 15 | 16 | while let Some(element) = queue.pop_front() { 17 | all_elements.push(element.clone()); 18 | 19 | processed += 1; 20 | if processed % 100 == 0 { 21 | println!("processed {} elements so far ({:?} elapsed)", 22 | processed, start_time.elapsed()); 23 | } 24 | 25 | if let Ok(children) = element.children() { 26 | for child in children { 27 | queue.push_back(child); 28 | } 29 | } 30 | } 31 | 32 | println!("collected {} total elements in {:?}", 33 | all_elements.len(), start_time.elapsed()); 34 | all_elements 35 | } 36 | 37 | // Print summary of collected elements 38 | fn print_element_stats(elements: &[UIElement]) { 39 | println!("\nelement statistics:"); 40 | 41 | let mut role_counts = std::collections::HashMap::new(); 42 | 43 | for element in elements { 44 | let attrs = element.attributes(); 45 | let role = attrs.role.clone(); 46 | *role_counts.entry(role).or_insert(0) += 1; 47 | } 48 | 49 | println!("found {} unique element roles", role_counts.len()); 50 | 51 | // Sort roles by count (most frequent first) 52 | let mut roles: Vec<_> = role_counts.into_iter().collect(); 53 | roles.sort_by(|a, b| b.1.cmp(&a.1)); 54 | 55 | println!("top 10 element roles:"); 56 | for (i, (role, count)) in roles.iter().take(10).enumerate() { 57 | println!(" {}: {} - {} instances", i+1, role, count); 58 | } 59 | } 60 | 61 | fn main() -> Result<(), Box> { 62 | // create engine with default settings 63 | println!("creating accessibility engine..."); 64 | let engine = MacOSEngine::new(true, true)?; 65 | 66 | // find arc browser 67 | println!("finding arc browser..."); 68 | let arc_app = match engine.get_application_by_name("Arc") { 69 | Ok(app) => { 70 | println!("found arc with direct search"); 71 | app 72 | }, 73 | Err(_) => { 74 | println!("direct search failed, trying app list"); 75 | 76 | // get all applications and filter 77 | let apps = engine.get_applications()?; 78 | println!("found {} applications", apps.len()); 79 | 80 | // find arc in the app list 81 | let arc = apps.into_iter().find(|app| { 82 | app.attributes().label.as_ref().map_or(false, |label| 83 | label.contains("Arc")) 84 | }); 85 | 86 | match arc { 87 | Some(app) => { 88 | println!("found arc in app list"); 89 | 90 | // try to bring it to focus 91 | engine.refresh_accessibility_tree(Some("Arc"))?; 92 | app 93 | }, 94 | None => { 95 | return Err("couldn't find arc browser".into()); 96 | } 97 | } 98 | } 99 | }; 100 | 101 | println!("arc app info: {:?}", arc_app.attributes()); 102 | 103 | // Get all elements from Arc 104 | println!("collecting all elements from arc (this may take a while)..."); 105 | let all_elements = collect_all_elements(&arc_app); 106 | 107 | // Print statistics about the elements 108 | print_element_stats(&all_elements); 109 | 110 | // Sample some elements to explore their structure 111 | println!("\nsampling elements by depth:"); 112 | 113 | // Some elements at different depth levels 114 | let depths = [0, 1, 2, 3, 5, 10]; // different depths to explore 115 | 116 | // Build depth map 117 | let mut elements_by_depth = std::collections::HashMap::new(); 118 | let mut queue = VecDeque::new(); 119 | queue.push_back((arc_app.clone(), 0)); // (element, depth) 120 | 121 | while let Some((element, depth)) = queue.pop_front() { 122 | elements_by_depth.entry(depth).or_insert_with(Vec::new).push(element.clone()); 123 | 124 | if let Ok(children) = element.children() { 125 | for child in children { 126 | queue.push_back((child, depth + 1)); 127 | } 128 | } 129 | } 130 | 131 | // Display elements at each sample depth 132 | for &depth in &depths { 133 | if let Some(elements) = elements_by_depth.get(&depth) { 134 | println!("\n--- depth {} ({} elements) ---", depth, elements.len()); 135 | 136 | // Show first few elements at this depth 137 | for (i, element) in elements.iter().take(3).enumerate() { 138 | let attrs = element.attributes(); 139 | println!(" {}: role={}, label={:?}", i, attrs.role, attrs.label); 140 | 141 | } 142 | } else { 143 | println!("\n--- depth {} (no elements) ---", depth); 144 | } 145 | } 146 | 147 | // Show elements with specific roles 148 | println!("\nsearching for interesting element roles:"); 149 | let interesting_roles = ["AXButton", "AXTextField", "AXLink", "AXWebArea", "AXStaticText"]; 150 | 151 | for role in interesting_roles { 152 | let matching = all_elements.iter() 153 | .filter(|e| e.attributes().role == role) 154 | .collect::>(); 155 | 156 | println!("\nfound {} elements with role '{}'", matching.len(), role); 157 | 158 | // Show sample of these elements 159 | for (i, element) in matching.iter().take(3).enumerate() { 160 | let attrs = element.attributes(); 161 | println!(" {}: label={:?}", i, attrs.label); 162 | 163 | // Try to show description if available 164 | if let Some(desc) = &attrs.description { 165 | println!(" description: {}", desc); 166 | } 167 | 168 | // Try to show value if available 169 | if let Some(val) = &attrs.value { 170 | println!(" value: {:?}", val); 171 | } 172 | } 173 | } 174 | 175 | Ok(()) 176 | } -------------------------------------------------------------------------------- /mcp-server-os-level/examples/test_get_arc_all_elements_sdk_count.rs: -------------------------------------------------------------------------------- 1 | use std::time::Instant; 2 | use tracing::{debug, info, Level}; 3 | use tracing_subscriber::FmtSubscriber; 4 | 5 | use computer_use_ai_sdk::platforms::macos::MacOSEngine; 6 | use computer_use_ai_sdk::platforms::AccessibilityEngine; 7 | use computer_use_ai_sdk::Selector; 8 | 9 | fn main() -> Result<(), Box> { 10 | // Initialize logging with DEBUG level to see all logs 11 | let subscriber = FmtSubscriber::builder() 12 | .with_max_level(Level::DEBUG) // Changed from INFO to DEBUG 13 | .finish(); 14 | tracing::subscriber::set_global_default(subscriber)?; 15 | 16 | debug!("debug logging enabled"); 17 | info!("looking for arc browser..."); 18 | 19 | // Create accessibility engine 20 | let engine = MacOSEngine::new(true, false)?; 21 | 22 | // Get Arc browser application 23 | let arc_app = match engine.get_application_by_name("Arc") { 24 | Ok(app) => { 25 | info!("found arc browser!"); 26 | app 27 | } 28 | Err(e) => { 29 | info!("error finding arc browser: {:?} - is it running?", e); 30 | return Ok(()); 31 | } 32 | }; 33 | 34 | info!("starting element collection..."); 35 | let start = Instant::now(); 36 | 37 | // Use String::from("") for the text selector 38 | debug!("calling find_elements with empty text selector"); 39 | let elements = engine.find_elements(&Selector::Text(String::from("")), Some(&arc_app))?; 40 | 41 | let duration = start.elapsed(); 42 | 43 | info!("found {} elements in arc browser in {:?}", elements.len(), duration); 44 | 45 | Ok(()) 46 | } 47 | -------------------------------------------------------------------------------- /mcp-server-os-level/examples/test_get_arc_interactable_elements.rs: -------------------------------------------------------------------------------- 1 | use std::collections::{HashMap, HashSet}; 2 | use std::time::Instant; 3 | use tracing::{debug, info, Level}; 4 | use tracing_subscriber::FmtSubscriber; 5 | 6 | use computer_use_ai_sdk::platforms::macos::MacOSEngine; 7 | use computer_use_ai_sdk::platforms::AccessibilityEngine; 8 | use computer_use_ai_sdk::Selector; 9 | 10 | fn main() -> Result<(), Box> { 11 | // Initialize logging with DEBUG level to see all logs 12 | let subscriber = FmtSubscriber::builder() 13 | .with_max_level(Level::DEBUG) 14 | .finish(); 15 | tracing::subscriber::set_global_default(subscriber)?; 16 | 17 | debug!("debug logging enabled"); 18 | info!("looking for arc browser..."); 19 | 20 | // Define interactivity categories with original macOS casing 21 | let definitely_interactable: HashSet<&str> = [ 22 | "AXButton", "AXMenuItem", "AXMenuBarItem", "AXCheckBox", "AXPopUpButton", 23 | "AXTextField", "AXTextArea", "AXComboBox", "AXLink", "AXScrollBar", 24 | "AXSlider", "AXRadioButtonGroup", "AXRadioButton", "AXSearchField", 25 | "AXTabGroup", "AXTabButton", "AXDisclosureButton", "AXStepper", 26 | "AXDisclosureTriangle", "AXIncrementor", "AXProgressIndicator" 27 | ].iter().cloned().collect(); 28 | 29 | let sometimes_interactable: HashSet<&str> = [ 30 | "AXImage", "AXCell", "AXSplitter", "AXRow", "AXStatusItem", 31 | "AXLevelIndicator", "AXColumnHeader", "AXRowHeader", "AXDocument", 32 | "AXDrawer", "AXOutline", "AXOutlineRow", "AXHandleElementProxy", 33 | "AXBrowser", "AXColumn", "AXGrid", "AXWebArea", "AXGenericElementProxy", 34 | "AXValueIndicator" 35 | ].iter().cloned().collect(); 36 | 37 | // Create accessibility engine 38 | let engine = MacOSEngine::new(true, false)?; 39 | 40 | // Get Arc browser application 41 | let arc_app = match engine.get_application_by_name("Arc") { 42 | Ok(app) => { 43 | info!("found arc browser!"); 44 | app 45 | } 46 | Err(e) => { 47 | info!("error finding arc browser: {:?} - is it running?", e); 48 | return Ok(()); 49 | } 50 | }; 51 | 52 | info!("starting element collection..."); 53 | let start = Instant::now(); 54 | 55 | // Use String::from("") for the text selector to get all elements 56 | debug!("calling find_elements with empty text selector"); 57 | let elements = engine.find_elements(&Selector::Text(String::from("")), Some(&arc_app))?; 58 | 59 | let duration = start.elapsed(); 60 | 61 | info!("found {} elements in arc browser in {:?}", elements.len(), duration); 62 | 63 | // Now organize elements by role 64 | let mut role_counts: HashMap = HashMap::new(); 65 | 66 | // Count elements by role - keep original casing 67 | for element in &elements { 68 | let role = element.role(); // Remove .to_lowercase() to preserve original casing 69 | *role_counts.entry(role).or_insert(0) += 1; 70 | } 71 | 72 | // Create category counts 73 | let mut definitely_interactable_count = 0; 74 | let mut sometimes_interactable_count = 0; 75 | let mut non_interactable_count = 0; 76 | 77 | // Prepare categorized data for display 78 | let mut definitely_interactable_roles: Vec<(String, usize)> = Vec::new(); 79 | let mut sometimes_interactable_roles: Vec<(String, usize)> = Vec::new(); 80 | let mut non_interactable_roles: Vec<(String, usize)> = Vec::new(); 81 | 82 | // Categorize each role and its count 83 | for (role, count) in &role_counts { 84 | if definitely_interactable.contains(role.as_str()) { 85 | definitely_interactable_roles.push((role.clone(), *count)); 86 | definitely_interactable_count += count; 87 | } else if sometimes_interactable.contains(role.as_str()) { 88 | sometimes_interactable_roles.push((role.clone(), *count)); 89 | sometimes_interactable_count += count; 90 | } else { 91 | non_interactable_roles.push((role.clone(), *count)); 92 | non_interactable_count += count; 93 | } 94 | } 95 | 96 | // Sort each category by count (highest first) 97 | definitely_interactable_roles.sort_by(|a, b| b.1.cmp(&a.1)); 98 | sometimes_interactable_roles.sort_by(|a, b| b.1.cmp(&a.1)); 99 | non_interactable_roles.sort_by(|a, b| b.1.cmp(&a.1)); 100 | 101 | // Display counts by category 102 | info!("element interactivity breakdown:"); 103 | info!(" definitely interactable: {} elements ({:.1}%)", 104 | definitely_interactable_count, 105 | (definitely_interactable_count as f64 / elements.len() as f64) * 100.0); 106 | 107 | for (role, count) in &definitely_interactable_roles { 108 | info!(" {}: {}", role, count); 109 | } 110 | 111 | info!(" sometimes interactable: {} elements ({:.1}%)", 112 | sometimes_interactable_count, 113 | (sometimes_interactable_count as f64 / elements.len() as f64) * 100.0); 114 | 115 | for (role, count) in &sometimes_interactable_roles { 116 | info!(" {}: {}", role, count); 117 | } 118 | 119 | info!(" non-interactable: {} elements ({:.1}%)", 120 | non_interactable_count, 121 | (non_interactable_count as f64 / elements.len() as f64) * 100.0); 122 | 123 | for (role, count) in &non_interactable_roles { 124 | info!(" {}: {}", role, count); 125 | } 126 | 127 | Ok(()) 128 | } -------------------------------------------------------------------------------- /mcp-server-os-level/examples/test_get_arc_text_sdk.rs: -------------------------------------------------------------------------------- 1 | use std::time::Instant; 2 | use tracing::{info, Level}; 3 | use tracing_subscriber::FmtSubscriber; 4 | 5 | use computer_use_ai_sdk::platforms::macos::MacOSEngine; 6 | use computer_use_ai_sdk::platforms::AccessibilityEngine; 7 | 8 | fn main() -> Result<(), Box> { 9 | // Initialize logging 10 | let subscriber = FmtSubscriber::builder() 11 | .with_max_level(Level::INFO) 12 | .finish(); 13 | tracing::subscriber::set_global_default(subscriber)?; 14 | 15 | info!("looking for arc browser..."); 16 | 17 | // Create accessibility engine 18 | let engine = MacOSEngine::new(true, false)?; 19 | 20 | // Get Arc browser application 21 | let arc_app = match engine.get_application_by_name("Arc") { 22 | Ok(app) => { 23 | info!("found arc browser!"); 24 | app 25 | } 26 | Err(e) => { 27 | info!("error finding arc browser: {:?} - is it running?", e); 28 | return Ok(()); 29 | } 30 | }; 31 | 32 | info!("extracting text from arc browser..."); 33 | let start = Instant::now(); 34 | 35 | // Get text directly using our improved method 36 | let text = arc_app.text(10)?; 37 | 38 | let duration = start.elapsed(); 39 | 40 | info!("extracted text from arc browser in {:?}", duration); 41 | info!("text length: {} characters", text.len()); 42 | info!("text content:\n{}", text); 43 | 44 | Ok(()) 45 | } -------------------------------------------------------------------------------- /mcp-server-os-level/examples/test_get_messages_and_send_message.rs: -------------------------------------------------------------------------------- 1 | use std::time::Instant; 2 | use tracing::{debug, info, Level}; 3 | use tracing_subscriber::FmtSubscriber; 4 | 5 | use computer_use_ai_sdk::platforms::macos::MacOSEngine; 6 | use computer_use_ai_sdk::platforms::AccessibilityEngine; 7 | use computer_use_ai_sdk::Selector; 8 | 9 | fn main() -> Result<(), Box> { 10 | // Initialize logging with DEBUG level to see all logs 11 | let subscriber = FmtSubscriber::builder() 12 | .with_max_level(Level::DEBUG) 13 | .finish(); 14 | tracing::subscriber::set_global_default(subscriber)?; 15 | 16 | debug!("debug logging enabled"); 17 | info!("looking for messages app..."); 18 | 19 | // Create accessibility engine with activate_app set to true 20 | // This helps refresh the accessibility tree and ensures app focus 21 | let engine = MacOSEngine::new(true, true)?; 22 | 23 | // Get Messages application 24 | let messages_app = match engine.get_application_by_name("Messages") { 25 | Ok(app) => { 26 | info!("found messages app!"); 27 | app 28 | } 29 | Err(e) => { 30 | info!("error finding messages app: {:?} - is it running?", e); 31 | return Ok(()); 32 | } 33 | }; 34 | 35 | info!("getting all elements from messages app..."); 36 | let start = Instant::now(); 37 | 38 | // Get all elements 39 | let all_elements = engine.find_elements(&Selector::Text(String::from("")), Some(&messages_app))?; 40 | 41 | info!("found {} total elements in messages app in {:?}", all_elements.len(), start.elapsed()); 42 | 43 | // Define sets of definitely and sometimes interactable roles 44 | let definitely_interactable = [ 45 | "AXButton", "AXMenuItem", "AXMenuBarItem", "AXCheckBox", "AXPopUpButton", 46 | "AXTextField", "AXTextArea", "AXComboBox", "AXLink", "AXScrollBar", 47 | ]; 48 | 49 | let sometimes_interactable = [ 50 | "AXImage", "AXCell", "AXSplitter", "AXRow", "AXStatusItem", 51 | ]; 52 | 53 | // Filter for interactable elements with text 54 | let interactable_elements: Vec<_> = all_elements.iter() 55 | .enumerate() 56 | .filter(|(_, element)| { 57 | let role = element.role(); 58 | let text = element.text(10).unwrap_or_default(); 59 | 60 | // Check if it has text 61 | let has_text = !text.is_empty(); 62 | 63 | // Check if it's interactable 64 | let is_interactable = definitely_interactable.contains(&role.as_str()) || 65 | sometimes_interactable.contains(&role.as_str()); 66 | 67 | has_text && is_interactable 68 | }) 69 | .collect(); 70 | 71 | info!("found {} interactable elements with text", interactable_elements.len()); 72 | 73 | // Log the first 10 interactable elements 74 | for (i, (original_index, element)) in interactable_elements.iter().take(10).enumerate() { 75 | let role = element.role(); 76 | let text = element.text(10).unwrap_or_default(); 77 | 78 | // Truncate text if it's too long for logging 79 | let text_preview = if text.len() > 50 { 80 | format!("{}...", &text[..47]) 81 | } else { 82 | text 83 | }; 84 | 85 | info!("[{}] index={}, role={}, text={}", i, original_index, role, text_preview); 86 | } 87 | 88 | // Look for text input field specifically with "Message" text 89 | info!("looking for text field with 'Message' text..."); 90 | let message_text_field = interactable_elements.iter() 91 | .find(|(_, element)| { 92 | let role = element.role(); 93 | let text = element.text(10).unwrap_or_default(); 94 | 95 | // Check if it's a text field/area containing "Message" 96 | (role == "textfield" || role == "textarea" || role == "AXTextField" || role == "AXTextArea") 97 | && text.contains("Message") 98 | }); 99 | 100 | if let Some((original_index, element)) = message_text_field { 101 | info!("found message text field at original index: {}", original_index); 102 | 103 | // Type "hello world" with smile emoji 104 | info!("typing message into text field..."); 105 | match element.type_text("hello world 😊") { 106 | Ok(_) => info!("successfully typed message"), 107 | Err(e) => info!("failed to type message: {:?}", e), 108 | } 109 | 110 | // Brief pause to see the text 111 | std::thread::sleep(std::time::Duration::from_millis(500)); 112 | 113 | // Press return key to send the message 114 | info!("pressing return key to send message..."); 115 | match element.press_key("return") { 116 | Ok(_) => info!("successfully pressed return key"), 117 | Err(e) => info!("failed to press return key: {:?}", e), 118 | } 119 | } else { 120 | info!("no message text field found. trying another approach..."); 121 | 122 | // Try using selector to find message text field directly 123 | info!("searching for message text field using selector..."); 124 | match engine.find_element(&Selector::Text(String::from("Message")), Some(&messages_app)) { 125 | Ok(element) => { 126 | info!("found message text field using selector"); 127 | info!("typing message into text field..."); 128 | match element.type_text("hello world 😊") { 129 | Ok(_) => info!("successfully typed message"), 130 | Err(e) => info!("failed to type message: {:?}", e), 131 | } 132 | 133 | std::thread::sleep(std::time::Duration::from_millis(500)); 134 | 135 | info!("pressing return key to send message..."); 136 | match element.press_key("return") { 137 | Ok(_) => info!("successfully pressed return key"), 138 | Err(e) => info!("failed to press return key: {:?}", e), 139 | } 140 | }, 141 | Err(e) => { 142 | info!("failed to find message text field using selector: {:?}", e); 143 | 144 | // Fall back to original behavior if needed 145 | // Check if we have enough elements to access index 60 (original functionality) 146 | let target_index = 63; 147 | if interactable_elements.len() > target_index { 148 | let (original_index, element) = &interactable_elements[target_index]; 149 | info!("pressing Return key on element at index {} (original index: {})", target_index, original_index); 150 | 151 | // Try to press Return key on the element 152 | match element.press_key("return") { 153 | Ok(_) => info!("successfully pressed Return key on element"), 154 | Err(e) => info!("failed to press Return key: {:?}", e), 155 | } 156 | } else { 157 | info!("not enough elements to access index {}, only have {}", target_index, interactable_elements.len()); 158 | } 159 | } 160 | } 161 | } 162 | 163 | info!("test completed successfully"); 164 | Ok(()) 165 | } -------------------------------------------------------------------------------- /mcp-server-os-level/examples/test_get_messages_text_sdk.rs: -------------------------------------------------------------------------------- 1 | use std::time::Instant; 2 | use tracing::{info, Level}; 3 | use tracing_subscriber::FmtSubscriber; 4 | 5 | use computer_use_ai_sdk::platforms::macos::MacOSEngine; 6 | use computer_use_ai_sdk::platforms::AccessibilityEngine; 7 | 8 | fn main() -> Result<(), Box> { 9 | // Initialize logging 10 | let subscriber = FmtSubscriber::builder() 11 | .with_max_level(Level::INFO) 12 | .finish(); 13 | tracing::subscriber::set_global_default(subscriber)?; 14 | 15 | info!("looking for messages app..."); 16 | 17 | // Create accessibility engine 18 | let engine = MacOSEngine::new(true, false)?; 19 | 20 | // Get Messages application 21 | let messages_app = match engine.get_application_by_name("Messages") { 22 | Ok(app) => { 23 | info!("found messages app!"); 24 | app 25 | } 26 | Err(e) => { 27 | info!("error finding messages app: {:?} - is it running?", e); 28 | return Ok(()); 29 | } 30 | }; 31 | 32 | info!("extracting text from messages app..."); 33 | let start = Instant::now(); 34 | 35 | // Get text directly using our improved method 36 | let text = messages_app.text(10)?; 37 | 38 | let duration = start.elapsed(); 39 | 40 | info!("extracted text from messages app in {:?}", duration); 41 | info!("text length: {} characters", text.len()); 42 | info!("text content:\n{}", text); 43 | 44 | Ok(()) 45 | } -------------------------------------------------------------------------------- /mcp-server-os-level/src/bin/handlers/click_by_index.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | use axum::{ 3 | extract::{Json, State}, 4 | http::StatusCode, 5 | response::Json as JsonResponse, 6 | }; 7 | use serde::Serialize; 8 | use serde_json::json; 9 | use tracing::{debug, error}; 10 | use computer_use_ai_sdk::Desktop; 11 | 12 | use crate::types::{AppState, ClickByIndexRequest, ClickByIndexResponse, ListElementsAndAttributesResponse}; 13 | use crate::refresh_elements_and_attributes_after_action; 14 | 15 | // Response type that combines both click result and elements 16 | #[derive(Serialize)] 17 | pub struct ClickByIndexWithElementsResponse { 18 | pub click: ClickByIndexResponse, 19 | pub elements: Option, 20 | } 21 | 22 | pub async fn click_by_index_handler( 23 | State(state): State>, 24 | Json(request): Json, 25 | ) -> Result, (StatusCode, JsonResponse)> { 26 | // Get elements from cache 27 | let elements_opt = { 28 | let cache = state.element_cache.lock().await; 29 | cache.clone() 30 | }; 31 | 32 | // Check if cache exists 33 | if elements_opt.is_none() { 34 | return Err(( 35 | StatusCode::BAD_REQUEST, 36 | JsonResponse(json!({ 37 | "error": "no element cache found - you must call listInteractableElementsByIndex first to index the elements before using by-index operations" 38 | })), 39 | )); 40 | } 41 | 42 | match elements_opt { 43 | Some((elements, timestamp, app_name)) if timestamp.elapsed() < std::time::Duration::from_secs(30) => { 44 | // Use element_index directly 45 | if request.element_index < elements.len() { 46 | let element = &elements[request.element_index]; 47 | 48 | // Step 1: Try inputControl first (AppleScript) if bounds are available 49 | let bounds = element.bounds(); 50 | let input_control_success = if let Ok((x, y, width, height)) = bounds { 51 | debug!("attempting to click element at position [{}, {}] using inputControl", 52 | x + width/2.0, y + height/2.0); 53 | 54 | // Activate the app first 55 | debug!("activating app: {}", app_name); 56 | let desktop = match Desktop::new(false, true) { 57 | Ok(d) => d, 58 | Err(e) => { 59 | error!("failed to initialize desktop automation: {}", e); 60 | return Err(( 61 | StatusCode::INTERNAL_SERVER_ERROR, 62 | JsonResponse(json!({ 63 | "error": format!("failed to initialize desktop automation: {}", e) 64 | })), 65 | )); 66 | } 67 | }; 68 | 69 | // Get and activate the application 70 | let _ = match desktop.application(&app_name) { 71 | Ok(app) => app, 72 | Err(e) => { 73 | error!("application not found: {}", e); 74 | return Err(( 75 | StatusCode::NOT_FOUND, 76 | JsonResponse(json!({ 77 | "error": format!("application not found: {}", e) 78 | })), 79 | )); 80 | } 81 | }; 82 | 83 | // Calculate center of element 84 | let center_x = x + width/2.0; 85 | let center_y = y + height/2.0; 86 | 87 | use std::process::Command; 88 | 89 | // Use AppleScript to click at position 90 | let script = format!( 91 | "tell application \"System Events\" to click at {{round {}, round {}}}", 92 | center_x, center_y 93 | ); 94 | 95 | match Command::new("osascript").arg("-e").arg(script).output() { 96 | Ok(_) => { 97 | debug!("successfully clicked element using inputControl at [{}, {}]", 98 | center_x, center_y); 99 | true 100 | }, 101 | Err(e) => { 102 | debug!("failed to click using inputControl: {} - falling back to accessibility API", e); 103 | false 104 | } 105 | } 106 | } else { 107 | debug!("could not get element bounds - skipping inputControl approach"); 108 | false 109 | }; 110 | 111 | // Step 2: If inputControl failed, use accessibility API as fallback 112 | if !input_control_success { 113 | debug!("using accessibility API for clicking"); 114 | match element.click() { 115 | Ok(_) => { 116 | debug!("successfully clicked element using accessibility API"); 117 | }, 118 | Err(e) => { 119 | error!("failed to click element with accessibility API: {}", e); 120 | return Err(( 121 | StatusCode::INTERNAL_SERVER_ERROR, 122 | JsonResponse(json!({ 123 | "error": format!("failed to click element using both inputControl and accessibility API: {}", e) 124 | })), 125 | )); 126 | } 127 | } 128 | } 129 | 130 | // Create the success response based on which method worked 131 | let method_used = if input_control_success { "AppleScript" } else { "Accessibility API" }; 132 | let click_response = ClickByIndexResponse { 133 | success: true, 134 | message: format!( 135 | "successfully clicked element with role: {} (using {} method)", 136 | element.role(), method_used 137 | ), 138 | elements: None, // add the missing field 139 | }; 140 | 141 | // Get refreshed elements using the helper function 142 | let elements_response = refresh_elements_and_attributes_after_action(state, app_name.clone(), 500).await; 143 | 144 | // Return combined response 145 | Ok(JsonResponse(ClickByIndexWithElementsResponse { 146 | click: click_response, 147 | elements: elements_response, 148 | })) 149 | } else { 150 | error!( 151 | "element index out of bounds: {} (max: {})", 152 | request.element_index, 153 | elements.len() - 1 154 | ); 155 | Err(( 156 | StatusCode::BAD_REQUEST, 157 | JsonResponse(json!({ 158 | "error": format!("element index out of bounds: {} (max: {})", 159 | request.element_index, elements.len() - 1) 160 | })), 161 | )) 162 | } 163 | } 164 | Some(_) => { 165 | // Cache entry expired 166 | Err(( 167 | StatusCode::BAD_REQUEST, 168 | JsonResponse(json!({ 169 | "error": "cache entry expired, please list elements again" 170 | })), 171 | )) 172 | } 173 | None => { 174 | // Cache miss 175 | Err(( 176 | StatusCode::NOT_FOUND, 177 | JsonResponse(json!({ 178 | "error": "no cache entry found, please list elements again" 179 | })), 180 | )) 181 | } 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /mcp-server-os-level/src/bin/handlers/input_control.rs: -------------------------------------------------------------------------------- 1 | use axum::{ 2 | extract::{Json, State}, 3 | http::StatusCode, 4 | response::Json as JsonResponse, 5 | }; 6 | use serde_json; 7 | use std::process::Command; 8 | use std::sync::Arc; 9 | use tokio::time::Duration; 10 | use tracing::{error, info}; 11 | 12 | use crate::types::*; 13 | use crate::AppState; 14 | 15 | // Define the handler for input control 16 | pub async fn input_control_handler( 17 | State(state): State>, 18 | Json(payload): Json, 19 | ) -> Result, (StatusCode, JsonResponse)> { 20 | info!("input control handler {:?}", payload); 21 | 22 | // Execute appropriate input action 23 | match payload.action { 24 | InputAction::KeyPress(key) => { 25 | // Add key name to key code mapping 26 | let key_code = match key.as_str() { 27 | "Tab" => "48", // Tab key code 28 | "Return" => "36", // Enter/Return key code 29 | "Space" => "49", // Space key code 30 | "Escape" => "53", // Escape key code 31 | // Add more key mappings as needed 32 | _ => key.as_str(), // Use as-is if it's already a number 33 | }; 34 | 35 | let script = format!("tell application \"System Events\" to key code {}", key_code); 36 | info!("executing key press script: {}", script); 37 | if let Err(e) = Command::new("osascript").arg("-e").arg(script).output() { 38 | error!("failed to press key: {}", e); 39 | return Err(( 40 | StatusCode::INTERNAL_SERVER_ERROR, 41 | JsonResponse(serde_json::json!({"error": format!("failed to press key: {}", e)})), 42 | )); 43 | } 44 | } 45 | InputAction::MouseMove { x, y } => { 46 | // Implement mouse move 47 | let script = format!("tell application \"System Events\" to set mouse position to {{{}, {}}}", x, y); 48 | if let Err(e) = Command::new("osascript").arg("-e").arg(script).output() { 49 | error!("failed to move mouse: {}", e); 50 | return Err(( 51 | StatusCode::INTERNAL_SERVER_ERROR, 52 | JsonResponse(serde_json::json!({"error": format!("failed to move mouse: {}", e)})), 53 | )); 54 | } 55 | } 56 | InputAction::MouseClick(button) => { 57 | // Implement mouse click 58 | let button_num = match button.as_str() { 59 | "left" => 1, 60 | "right" => 2, 61 | _ => { 62 | error!("unsupported mouse button: {}", button); 63 | return Err(( 64 | StatusCode::BAD_REQUEST, 65 | JsonResponse(serde_json::json!({"error": format!("unsupported mouse button: {}", button)})), 66 | )); 67 | } 68 | }; 69 | 70 | let script = format!("tell application \"System Events\" to click button {}", button_num); 71 | if let Err(e) = Command::new("osascript").arg("-e").arg(script).output() { 72 | error!("failed to click mouse: {}", e); 73 | return Err(( 74 | StatusCode::INTERNAL_SERVER_ERROR, 75 | JsonResponse(serde_json::json!({"error": format!("failed to click mouse: {}", e)})), 76 | )); 77 | } 78 | } 79 | InputAction::WriteText(text) => { 80 | // Implement text writing 81 | let script = format!("tell application \"System Events\" to keystroke \"{}\"", text); 82 | if let Err(e) = Command::new("osascript").arg("-e").arg(script).output() { 83 | error!("failed to write text: {}", e); 84 | return Err(( 85 | StatusCode::INTERNAL_SERVER_ERROR, 86 | JsonResponse(serde_json::json!({"error": format!("failed to write text: {}", e)})), 87 | )); 88 | } 89 | } 90 | } 91 | 92 | // Get elements from cache to find the active application 93 | let elements_response = { 94 | let cache = state.element_cache.lock().await; 95 | match &*cache { 96 | Some((_, _, cached_app_name)) => { 97 | // We have a cached app name, so let's refresh elements 98 | info!("refreshing elements for app: {}", cached_app_name); 99 | refresh_elements_and_attributes_after_action(state.clone(), cached_app_name.clone(), 500).await 100 | } 101 | None => { 102 | // No cache available, don't try to refresh elements 103 | info!("no element cache found, skipping element refresh"); 104 | None 105 | } 106 | } 107 | }; 108 | 109 | // Return combined response 110 | Ok(JsonResponse(InputControlWithElementsResponse { 111 | input: InputControlResponse { success: true }, 112 | elements: elements_response, 113 | })) 114 | } 115 | 116 | // Updated helper function to refresh elements after an action 117 | async fn refresh_elements_and_attributes_after_action( 118 | state: Arc, 119 | app_name: String, 120 | delay_ms: u64 121 | ) -> Option { 122 | // Small delay to allow UI to update after action 123 | info!("waiting for ui to update after action before listing elements and attributes"); 124 | tokio::time::sleep(Duration::from_millis(delay_ms)).await; 125 | 126 | // Create request to refresh the elements list 127 | let elements_request = ListInteractableElementsRequest { 128 | app_name, 129 | max_elements: None, 130 | use_background_apps: Some(false), 131 | activate_app: Some(true), 132 | }; 133 | 134 | // Call the new list elements handler 135 | match crate::handlers::list_elements_and_attributes::list_elements_and_attributes_handler(State(state), Json(elements_request)).await { 136 | Ok(response) => Some(response.0), 137 | Err(e) => { 138 | // Log the error but don't fail the whole request 139 | error!("failed to list elements and attributes after action: {:?}", e); 140 | None 141 | } 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /mcp-server-os-level/src/bin/handlers/mod.rs: -------------------------------------------------------------------------------- 1 | // Export all handlers 2 | pub mod mcp; 3 | pub mod click_by_index; 4 | pub mod type_by_index; 5 | pub mod press_key_by_index; 6 | pub mod open_application; 7 | pub mod open_url; 8 | pub mod input_control; 9 | pub mod utils; 10 | pub mod list_elements_and_attributes; 11 | 12 | // No re-exports since they're not being used 13 | -------------------------------------------------------------------------------- /mcp-server-os-level/src/bin/handlers/open_application.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | use axum::{ 3 | extract::{Json, State}, 4 | http::StatusCode, 5 | response::Json as JsonResponse, 6 | }; 7 | use serde::Serialize; 8 | use serde_json::json; 9 | use computer_use_ai_sdk::Desktop; 10 | 11 | use crate::types::{AppState, OpenApplicationRequest, OpenApplicationResponse, ListElementsAndAttributesResponse}; 12 | use crate::refresh_elements_and_attributes_after_action; 13 | 14 | // Response type that combines both results 15 | #[derive(Serialize)] 16 | pub struct OpenApplicationWithElementsResponse { 17 | pub application: OpenApplicationResponse, 18 | pub elements: Option, 19 | } 20 | 21 | pub async fn open_application_handler( 22 | State(state): State>, 23 | Json(request): Json, 24 | ) -> Result, (StatusCode, JsonResponse)> { 25 | // Create Desktop automation instance 26 | let desktop = match Desktop::new(false, true) { 27 | Ok(desktop) => desktop, 28 | Err(err) => { 29 | return Err(( 30 | StatusCode::INTERNAL_SERVER_ERROR, 31 | JsonResponse(json!({"error": format!("failed to initialize automation: {}", err)})), 32 | )); 33 | } 34 | }; 35 | 36 | // Open the application 37 | match desktop.open_application(&request.app_name) { 38 | Ok(_) => { 39 | // Application opened successfully 40 | let app_response = OpenApplicationResponse { 41 | success: true, 42 | message: format!("successfully opened application: {}", request.app_name), 43 | }; 44 | 45 | // Get refreshed elements using the helper function - use a longer delay for app startup 46 | let mut elements_response = refresh_elements_and_attributes_after_action(state.clone(), request.app_name.clone(), 1000).await; 47 | 48 | // If elements retrieval failed, wait 500ms and retry once 49 | if elements_response.is_none() { 50 | log::info!("elements retrieval failed for {}, retrying after 500ms", request.app_name); 51 | tokio::time::sleep(tokio::time::Duration::from_millis(500)).await; 52 | elements_response = refresh_elements_and_attributes_after_action(state, request.app_name.clone(), 500).await; 53 | 54 | if elements_response.is_none() { 55 | log::warn!("elements retrieval failed for {} even after retry", request.app_name); 56 | } 57 | } 58 | 59 | // Return combined response 60 | Ok(JsonResponse(OpenApplicationWithElementsResponse { 61 | application: app_response, 62 | elements: elements_response, 63 | })) 64 | }, 65 | Err(err) => Err(( 66 | StatusCode::BAD_REQUEST, 67 | JsonResponse(json!({"error": format!("failed to open application: {}", err)})), 68 | )), 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /mcp-server-os-level/src/bin/handlers/open_url.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | use axum::{ 3 | extract::{Json, State}, 4 | http::StatusCode, 5 | response::Json as JsonResponse, 6 | }; 7 | use serde::{Deserialize, Serialize}; 8 | use serde_json::json; 9 | use tracing::{debug, error, info}; 10 | use computer_use_ai_sdk::Desktop; 11 | 12 | use crate::types::AppState; 13 | use crate::handlers::utils::refresh_elements_and_attributes_after_action; 14 | use crate::types::ListElementsAndAttributesResponse; 15 | 16 | #[derive(Deserialize, Clone)] 17 | pub struct OpenUrlRequest { 18 | pub url: String, 19 | pub browser: Option, 20 | } 21 | 22 | #[derive(Serialize)] 23 | pub struct OpenUrlResponse { 24 | pub success: bool, 25 | pub message: String, 26 | } 27 | 28 | // First, create a new response type that combines both results 29 | #[derive(Serialize)] 30 | pub struct OpenUrlWithElementsResponse { 31 | pub url: OpenUrlResponse, 32 | pub elements: Option, 33 | } 34 | 35 | pub async fn open_url_handler( 36 | State(state): State>, 37 | Json(request): Json, 38 | ) -> Result, (StatusCode, JsonResponse)> { 39 | info!("handling request to open url: {}", request.url); 40 | 41 | // Create Desktop automation instance 42 | let desktop = match Desktop::new(false, true) { 43 | Ok(desktop) => desktop, 44 | Err(err) => { 45 | error!("failed to initialize automation: {}", err); 46 | return Err(( 47 | StatusCode::INTERNAL_SERVER_ERROR, 48 | JsonResponse(json!({"error": format!("failed to initialize automation: {}", err)})), 49 | )); 50 | } 51 | }; 52 | 53 | // Open the URL 54 | let browser_ref = request.browser.as_deref(); 55 | 56 | if let Some(browser) = browser_ref { 57 | debug!("opening url {} in specified browser: {}", request.url, browser); 58 | } else { 59 | debug!("opening url {} in system default browser", request.url); 60 | } 61 | 62 | match desktop.open_url(&request.url, browser_ref) { 63 | Ok(_) => { 64 | // Wait for browser to start/activate 65 | tokio::time::sleep(tokio::time::Duration::from_millis(800)).await; 66 | 67 | // Determine which browser to use for refreshing elements 68 | let browser_for_refresh: Option = if let Some(browser) = &request.browser { 69 | // If user specified a browser, use that 70 | info!("using specified browser for refresh: {}", browser); 71 | 72 | // Map common browser names to possible variations 73 | let browser_search = match browser.as_str() { 74 | "Google Chrome" => "Chrome", 75 | "Microsoft Edge" => "Edge", 76 | _ => browser.as_str(), 77 | }; 78 | 79 | debug!("searching for browser as: {}", browser_search); 80 | 81 | if desktop.application(browser_search).is_ok() { 82 | info!("found browser with name: {}", browser_search); 83 | Some(browser_search.to_string()) 84 | } else { 85 | info!("could not find browser with name: {}", browser_search); 86 | None 87 | } 88 | } else { 89 | // Try to detect which browser is running 90 | let likely_browsers = ["Arc", "Safari", "Chrome", "Firefox", "Edge", "Opera", "Brave"]; 91 | let mut detected = None; 92 | 93 | for browser in likely_browsers.iter() { 94 | match desktop.application(browser) { 95 | Ok(_) => { 96 | info!("detected browser for refresh: {}", browser); 97 | detected = Some(browser.to_string()); 98 | break; 99 | }, 100 | Err(_) => continue, 101 | } 102 | } 103 | 104 | // If we couldn't detect a specific browser, we don't do element refresh 105 | if detected.is_none() { 106 | info!("could not detect which browser was used - skipping element refresh"); 107 | } 108 | 109 | detected 110 | }; 111 | 112 | info!("successfully opened url: {}", request.url); 113 | 114 | // Create success response 115 | let url_response = OpenUrlResponse { 116 | success: true, 117 | message: if let Some(browser) = &browser_for_refresh { 118 | format!("successfully opened URL: {} in browser: {}", request.url, browser) 119 | } else { 120 | format!("successfully opened URL: {} in default browser (unknown)", request.url) 121 | }, 122 | }; 123 | 124 | // Only attempt to refresh elements if we know which browser to target 125 | let elements_response = if let Some(browser) = browser_for_refresh { 126 | refresh_elements_and_attributes_after_action(state, browser, 2000).await 127 | } else { 128 | // If we don't know which browser was used, don't try to refresh elements 129 | None 130 | }; 131 | 132 | // Return combined response 133 | Ok(JsonResponse(OpenUrlWithElementsResponse { 134 | url: url_response, 135 | elements: elements_response, 136 | })) 137 | }, 138 | Err(err) => { 139 | error!("failed to open url {}: {}", request.url, err); 140 | Err(( 141 | StatusCode::BAD_REQUEST, 142 | JsonResponse(json!({"error": format!("failed to open URL: {}", err)})), 143 | )) 144 | }, 145 | } 146 | } 147 | /* 148 | 149 | curl -X POST http://localhost:8080/api/open-url \ 150 | -H "Content-Type: application/json" \ 151 | -d '{"url": "https://twitter.com"}' \ 152 | | jq -r '"url opening:", 153 | " success: \(.url.success)", 154 | " message: \(.url.message)", 155 | "\nelements: \(if .elements then 156 | if .elements.elements then 157 | .elements.elements | map("\n [\(.index)]: \(.role)\(if .text then " \"\(.text)\"" else "" end)") | join("") 158 | else 159 | "\n no elements found" 160 | end 161 | else 162 | "\n no elements info available" 163 | end)", 164 | "\nstats summary: \(if .elements then 165 | "\n count: \(.elements.stats.count)", 166 | " with_text_count: \(.elements.stats.with_text_count)", 167 | " without_text_count: \(.elements.stats.without_text_count)", 168 | " excluded_count: \(.elements.stats.excluded_count)", 169 | " processing time: \(.elements.processing_time_seconds)s", 170 | " cache_id: \(.elements.cache_info.cache_id)", 171 | " expires_at: \(.elements.cache_info.expires_at)", 172 | " element_count: \(.elements.cache_info.element_count)" 173 | else 174 | "\n no stats available" 175 | end)"' 176 | 177 | */ -------------------------------------------------------------------------------- /mcp-server-os-level/src/bin/handlers/press_key_by_index.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | use axum::{ 3 | extract::{Json, State}, 4 | http::StatusCode, 5 | response::Json as JsonResponse, 6 | }; 7 | use serde::Serialize; 8 | use serde_json::json; 9 | use tracing::{debug, error}; 10 | use computer_use_ai_sdk::Desktop; 11 | 12 | use crate::types::{AppState, PressKeyByIndexRequest, PressKeyByIndexResponse, ListElementsAndAttributesResponse}; 13 | use crate::refresh_elements_and_attributes_after_action; 14 | 15 | // Response type that combines both results 16 | #[derive(Debug, Serialize)] 17 | pub struct PressKeyByIndexWithElementsResponse { 18 | pub press_key: PressKeyByIndexResponse, 19 | pub elements: Option, 20 | } 21 | 22 | pub async fn press_key_by_index_handler( 23 | State(state): State>, 24 | Json(request): Json, 25 | ) -> Result, (StatusCode, JsonResponse)> { 26 | debug!("pressing key combination by index: element_index={}, key_combo={}", 27 | request.element_index, request.key_combo); 28 | 29 | // Get elements from cache 30 | let elements_opt = { 31 | let cache = state.element_cache.lock().await; 32 | cache.clone() 33 | }; 34 | 35 | // Check if cache exists 36 | if elements_opt.is_none() { 37 | return Err(( 38 | StatusCode::BAD_REQUEST, 39 | JsonResponse(json!({ 40 | "error": "no element cache found - you must call listInteractableElementsByIndex first to index the elements before using by-index operations" 41 | })), 42 | )); 43 | } 44 | 45 | match elements_opt { 46 | Some((elements, timestamp, app_name)) if timestamp.elapsed() < std::time::Duration::from_secs(30) => { 47 | // Activate the app first 48 | debug!("activating app: {}", app_name); 49 | let desktop = match Desktop::new(false, true) { 50 | Ok(d) => d, 51 | Err(e) => { 52 | error!("failed to initialize desktop automation: {}", e); 53 | return Err(( 54 | StatusCode::INTERNAL_SERVER_ERROR, 55 | JsonResponse(json!({ 56 | "error": format!("failed to initialize desktop automation: {}", e) 57 | })), 58 | )); 59 | } 60 | }; 61 | 62 | // Get and activate the application 63 | let _ = match desktop.application(&app_name) { 64 | Ok(app) => app, 65 | Err(e) => { 66 | error!("application not found: {}", e); 67 | return Err(( 68 | StatusCode::NOT_FOUND, 69 | JsonResponse(json!({ 70 | "error": format!("application not found: {}", e) 71 | })), 72 | )); 73 | } 74 | }; 75 | 76 | // Use element_index directly 77 | if request.element_index < elements.len() { 78 | let element = &elements[request.element_index]; 79 | 80 | // Step 1: Try to click the element first to focus it 81 | if let Err(e) = element.click() { 82 | debug!("failed to click element before key press: {}", e); 83 | // Continue anyway 84 | } 85 | 86 | // Small delay to ensure element is focused 87 | std::thread::sleep(std::time::Duration::from_millis(100)); 88 | 89 | // Step 2: Try inputControl first (AppleScript) 90 | debug!("attempting to press key '{}' using inputControl (AppleScript)", request.key_combo); 91 | 92 | use std::process::Command; 93 | 94 | // Convert key combo to AppleScript format 95 | let key_script = convert_key_combo_to_applescript(&request.key_combo); 96 | 97 | let input_control_success = match Command::new("osascript").arg("-e").arg(key_script).output() { 98 | Ok(_) => { 99 | debug!("successfully pressed key '{}' using inputControl", request.key_combo); 100 | true 101 | }, 102 | Err(e) => { 103 | debug!("failed to press key using inputControl: {} - falling back to accessibility API", e); 104 | false 105 | } 106 | }; 107 | 108 | // Step 3: If inputControl failed, use accessibility API as fallback 109 | if !input_control_success { 110 | debug!("falling back to accessibility API for key press"); 111 | match element.press_key(&request.key_combo) { 112 | Ok(_) => { 113 | debug!("successfully pressed key '{}' using accessibility API", request.key_combo); 114 | }, 115 | Err(e) => { 116 | error!("failed to press key on element with accessibility API: {}", e); 117 | return Err(( 118 | StatusCode::INTERNAL_SERVER_ERROR, 119 | JsonResponse(json!({ 120 | "error": format!("failed to press key using both inputControl and accessibility API: {}", e) 121 | })), 122 | )); 123 | } 124 | } 125 | } 126 | 127 | // Create the success response based on which method worked 128 | let method_used = if input_control_success { "AppleScript" } else { "Accessibility API" }; 129 | let press_key_response = PressKeyByIndexResponse { 130 | success: true, 131 | message: format!( 132 | "successfully pressed key combination '{}' on element with role: {} (using {} method)", 133 | request.key_combo, 134 | element.role(), 135 | method_used 136 | ), 137 | }; 138 | 139 | // Get refreshed elements using the helper function 140 | let elements_response = refresh_elements_and_attributes_after_action(state, app_name.clone(), 500).await; 141 | 142 | // Return combined response 143 | Ok(JsonResponse(PressKeyByIndexWithElementsResponse { 144 | press_key: press_key_response, 145 | elements: elements_response, 146 | })) 147 | } else { 148 | error!( 149 | "element index out of bounds: {} (max: {})", 150 | request.element_index, 151 | elements.len() - 1 152 | ); 153 | Err(( 154 | StatusCode::BAD_REQUEST, 155 | JsonResponse(json!({ 156 | "error": format!("element index out of bounds: {} (max: {})", 157 | request.element_index, elements.len() - 1) 158 | })), 159 | )) 160 | } 161 | } 162 | Some(_) => { 163 | // Cache entry expired 164 | Err(( 165 | StatusCode::BAD_REQUEST, 166 | JsonResponse(json!({ 167 | "error": "cache entry expired, please list elements again" 168 | })), 169 | )) 170 | } 171 | None => { 172 | // Cache miss 173 | Err(( 174 | StatusCode::NOT_FOUND, 175 | JsonResponse(json!({ 176 | "error": "no cache entry found, please list elements again" 177 | })), 178 | )) 179 | } 180 | } 181 | } 182 | 183 | // Helper function to convert key combo to AppleScript format 184 | fn convert_key_combo_to_applescript(key_combo: &str) -> String { 185 | // Split the key combo by "+" to handle modifiers 186 | let parts: Vec<&str> = key_combo.split('+').collect(); 187 | 188 | // Last part is usually the main key 189 | let main_key = parts.last().unwrap_or(&"").trim(); 190 | 191 | // Check for modifiers 192 | let has_command = parts.iter().any(|p| p.trim().eq_ignore_ascii_case("command") || p.trim().eq_ignore_ascii_case("cmd")); 193 | let has_shift = parts.iter().any(|p| p.trim().eq_ignore_ascii_case("shift")); 194 | let has_option = parts.iter().any(|p| p.trim().eq_ignore_ascii_case("option") || p.trim().eq_ignore_ascii_case("alt")); 195 | let has_control = parts.iter().any(|p| p.trim().eq_ignore_ascii_case("control") || p.trim().eq_ignore_ascii_case("ctrl")); 196 | 197 | // For special keys like Return, Tab, etc. 198 | let special_key_mapping = match main_key.to_lowercase().as_str() { 199 | "return" | "enter" => "return", 200 | "tab" => "tab", 201 | "escape" | "esc" => "escape", 202 | "backspace" | "delete" => "delete", 203 | "space" => "space", 204 | "down" | "downarrow" => "down arrow", 205 | "up" | "uparrow" => "up arrow", 206 | "left" | "leftarrow" => "left arrow", 207 | "right" | "rightarrow" => "right arrow", 208 | _ => main_key, // use as is for regular keys 209 | }; 210 | 211 | // Build the AppleScript 212 | let mut script = String::from("tell application \"System Events\" to "); 213 | 214 | // For simple one-character keys 215 | if special_key_mapping.len() == 1 && !has_command && !has_shift && !has_option && !has_control { 216 | script.push_str(&format!("keystroke \"{}\"", special_key_mapping)); 217 | } else { 218 | // For key combinations or special keys 219 | script.push_str("key code "); 220 | 221 | // Map the key to AppleScript key code or use the name for special keys 222 | match special_key_mapping { 223 | "return" => script.push_str("36"), 224 | "tab" => script.push_str("48"), 225 | "escape" => script.push_str("53"), 226 | "delete" => script.push_str("51"), 227 | "space" => script.push_str("49"), 228 | "down arrow" => script.push_str("125"), 229 | "up arrow" => script.push_str("126"), 230 | "left arrow" => script.push_str("123"), 231 | "right arrow" => script.push_str("124"), 232 | _ => { 233 | // For single character keys 234 | if special_key_mapping.len() == 1 { 235 | // Get ASCII value 236 | let c = special_key_mapping.chars().next().unwrap(); 237 | // This is a simplification - a proper implementation would map characters to key codes 238 | // For letters, lowercase ASCII - 'a' + 0 would work 239 | if c.is_ascii_lowercase() { 240 | script.push_str(&format!("{}", (c as u8 - b'a') + 0)); 241 | } else if c.is_ascii_uppercase() { 242 | script.push_str(&format!("{}", (c as u8 - b'A') + 0)); 243 | } else { 244 | // This is a placeholder - you'd need a full mapping for all characters 245 | script.push_str(&format!("\"{}\"", c)); 246 | } 247 | } else { 248 | // For anything else, default to keystroke 249 | script = format!("tell application \"System Events\" to keystroke \"{}\"", special_key_mapping); 250 | } 251 | } 252 | } 253 | 254 | // Add modifiers 255 | if has_command || has_shift || has_option || has_control { 256 | script.push_str(" using {"); 257 | let mut modifiers = Vec::new(); 258 | if has_command { modifiers.push("command down"); } 259 | if has_shift { modifiers.push("shift down"); } 260 | if has_option { modifiers.push("option down"); } 261 | if has_control { modifiers.push("control down"); } 262 | script.push_str(&modifiers.join(", ")); 263 | script.push_str("}"); 264 | } 265 | } 266 | 267 | debug!("generated applescript: {}", script); 268 | script 269 | } 270 | -------------------------------------------------------------------------------- /mcp-server-os-level/src/bin/handlers/type_by_index.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | use axum::{ 3 | extract::{Json, State}, 4 | http::StatusCode, 5 | response::Json as JsonResponse, 6 | }; 7 | use serde::Serialize; 8 | use serde_json::json; 9 | use tracing::{debug, error}; 10 | use computer_use_ai_sdk::Desktop; 11 | 12 | use crate::types::{AppState, TypeByIndexRequest, TypeByIndexResponse, ListElementsAndAttributesResponse}; 13 | use crate::refresh_elements_and_attributes_after_action; 14 | 15 | // Response type that combines both results 16 | #[derive(Serialize)] 17 | pub struct TypeByIndexWithElementsResponse { 18 | pub type_action: TypeByIndexResponse, 19 | pub elements: Option, 20 | } 21 | 22 | pub async fn type_by_index_handler( 23 | State(state): State>, 24 | Json(request): Json, 25 | ) -> Result, (StatusCode, JsonResponse)> { 26 | // Get elements from cache 27 | let elements_opt = { 28 | let cache = state.element_cache.lock().await; 29 | cache.clone() 30 | }; 31 | 32 | // Check if cache exists 33 | if elements_opt.is_none() { 34 | return Err(( 35 | StatusCode::BAD_REQUEST, 36 | JsonResponse(json!({ 37 | "error": "no element cache found - you must call listInteractableElementsByIndex first to index the elements before using by-index operations" 38 | })), 39 | )); 40 | } 41 | 42 | match elements_opt { 43 | Some((elements, timestamp, app_name)) if timestamp.elapsed() < std::time::Duration::from_secs(30) => { 44 | // Use element_index directly 45 | if request.element_index < elements.len() { 46 | let element = &elements[request.element_index]; 47 | 48 | // Step 1: Try inputControl first 49 | debug!("attempting to type text '{}' using inputControl (AppleScript)", request.text); 50 | 51 | // Activate the app first 52 | debug!("activating app: {}", app_name); 53 | let desktop = match Desktop::new(false, true) { 54 | Ok(d) => d, 55 | Err(e) => { 56 | error!("failed to initialize desktop automation: {}", e); 57 | return Err(( 58 | StatusCode::INTERNAL_SERVER_ERROR, 59 | JsonResponse(json!({ 60 | "error": format!("failed to initialize desktop automation: {}", e) 61 | })), 62 | )); 63 | } 64 | }; 65 | 66 | // Get and activate the application 67 | let _ = match desktop.application(&app_name) { 68 | Ok(app) => app, 69 | Err(e) => { 70 | error!("application not found: {}", e); 71 | return Err(( 72 | StatusCode::NOT_FOUND, 73 | JsonResponse(json!({ 74 | "error": format!("application not found: {}", e) 75 | })), 76 | )); 77 | } 78 | }; 79 | 80 | // Click the element first to ensure it has focus 81 | if let Err(e) = element.click() { 82 | debug!("failed to click element before typing: {}", e); 83 | // Continue anyway 84 | } 85 | 86 | // Small delay to ensure element is focused 87 | std::thread::sleep(std::time::Duration::from_millis(100)); 88 | 89 | // Use inputControl for text input using System Events 90 | use std::process::Command; 91 | 92 | // Escape any quotes in the text to avoid breaking the AppleScript 93 | let escaped_text = request.text.replace("\"", "\\\""); 94 | let script = format!("tell application \"System Events\" to keystroke \"{}\"", escaped_text); 95 | 96 | let input_control_success = match Command::new("osascript").arg("-e").arg(script).output() { 97 | Ok(_) => { 98 | debug!("successfully typed text '{}' using inputControl", request.text); 99 | true 100 | }, 101 | Err(e) => { 102 | debug!("failed to type text using inputControl: {} - falling back to AXValue", e); 103 | false 104 | } 105 | }; 106 | 107 | // Step 2: If inputControl failed, try AXValue as fallback 108 | if !input_control_success { 109 | debug!("falling back to AXValue for typing"); 110 | match element.type_text(&request.text) { 111 | Ok(_) => { 112 | debug!("successfully typed text '{}' into element with role: {} using AXValue", 113 | request.text, element.role()); 114 | 115 | // Add a small delay to ensure UI updates 116 | std::thread::sleep(std::time::Duration::from_millis(100)); 117 | 118 | // Verify text was actually set by reading it back 119 | let verification = match element.text(1) { 120 | Ok(actual_text) => { 121 | let contains_text = actual_text.contains(&request.text); 122 | if contains_text { 123 | debug!("verified text was set correctly: '{}'", actual_text); 124 | true 125 | } else { 126 | debug!("verification failed: expected '{}' but got '{}'", 127 | request.text, actual_text); 128 | false 129 | } 130 | }, 131 | Err(e) => { 132 | debug!("failed to verify text: {}", e); 133 | false 134 | } 135 | }; 136 | 137 | if !verification { 138 | error!("failed to verify text was set with AXValue after inputControl failure"); 139 | return Err(( 140 | StatusCode::INTERNAL_SERVER_ERROR, 141 | JsonResponse(json!({ 142 | "error": "failed to type text using both inputControl and AXValue methods" 143 | })), 144 | )); 145 | } 146 | }, 147 | Err(e) => { 148 | error!("failed to type text into element with AXValue: {}", e); 149 | return Err(( 150 | StatusCode::INTERNAL_SERVER_ERROR, 151 | JsonResponse(json!({ 152 | "error": format!("failed to type text using both inputControl and AXValue methods: {}", e) 153 | })), 154 | )); 155 | } 156 | } 157 | } 158 | 159 | // Create the success response based on which method worked 160 | let method_used = if input_control_success { "AppleScript" } else { "AXValue" }; 161 | let type_response = TypeByIndexResponse { 162 | success: true, 163 | message: format!( 164 | "successfully typed text into element with role: {} (using {} method)", 165 | element.role(), method_used 166 | ), 167 | }; 168 | 169 | // Get refreshed elements using the helper function 170 | let elements_response = refresh_elements_and_attributes_after_action(state, app_name.clone(), 500).await; 171 | 172 | // Return combined response 173 | Ok(JsonResponse(TypeByIndexWithElementsResponse { 174 | type_action: type_response, 175 | elements: elements_response, 176 | })) 177 | } else { 178 | error!( 179 | "element index out of bounds: {} (max: {})", 180 | request.element_index, 181 | elements.len() - 1 182 | ); 183 | Err(( 184 | StatusCode::BAD_REQUEST, 185 | JsonResponse(json!({ 186 | "error": format!("element index out of bounds: {} (max: {})", 187 | request.element_index, elements.len() - 1) 188 | })), 189 | )) 190 | } 191 | } 192 | Some(_) => { 193 | // Cache entry expired 194 | Err(( 195 | StatusCode::BAD_REQUEST, 196 | JsonResponse(json!({ 197 | "error": "cache entry expired, please list elements again" 198 | })), 199 | )) 200 | } 201 | None => { 202 | // Cache miss 203 | Err(( 204 | StatusCode::NOT_FOUND, 205 | JsonResponse(json!({ 206 | "error": "no cache entry found, please list elements again" 207 | })), 208 | )) 209 | } 210 | } 211 | } 212 | -------------------------------------------------------------------------------- /mcp-server-os-level/src/bin/handlers/utils.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | use axum::extract::{Json, State}; 3 | use tokio::time::Duration; 4 | use tracing::{error, info}; 5 | 6 | use crate::types::*; 7 | use crate::AppState; 8 | 9 | use super::list_elements_and_attributes::list_elements_and_attributes_handler; 10 | 11 | 12 | pub async fn refresh_elements_and_attributes_after_action( 13 | state: Arc, 14 | app_name: String, 15 | delay_ms: u64, 16 | ) -> Option { 17 | // Add a small delay to allow UI to update 18 | info!("waiting for UI to update after action before listing elements and attributes"); 19 | tokio::time::sleep(Duration::from_millis(delay_ms)).await; 20 | 21 | // Create request for list elements and attributes 22 | let list_request = ListInteractableElementsRequest { 23 | app_name, 24 | max_elements: None, 25 | use_background_apps: Some(false), 26 | activate_app: Some(true), 27 | }; 28 | 29 | // Call the handler to get fresh elements 30 | match list_elements_and_attributes_handler(State(state), Json(list_request)).await { 31 | Ok(response) => Some(response.0), 32 | Err(e) => { 33 | error!("failed to refresh elements and attributes after action: {:?}", e); 34 | None 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /mcp-server-os-level/src/bin/mcp-bridge.ts: -------------------------------------------------------------------------------- 1 | // mcp-bridge.ts 2 | import { Server } from "@modelcontextprotocol/sdk/server/index.js"; 3 | import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; 4 | import { spawn } from "child_process"; 5 | import { z } from "zod"; 6 | 7 | // Path to your rust binary 8 | const RUST_BINARY = "/Users/matthewdi/Desktop/screenpipe/computer-use-ai-sdk/mcp-server-os-level/target/debug/server"; 9 | 10 | // Create server 11 | const server = new Server( 12 | { 13 | name: "ui-automation-bridge", 14 | version: "1.0.0", 15 | }, 16 | { 17 | capabilities: { 18 | tools: { 19 | // Define the same tools as your Rust server 20 | }, 21 | }, 22 | } 23 | ); 24 | 25 | // Start your Rust server in HTTP mode (not STDIO) 26 | const rustProcess = spawn(RUST_BINARY, [], { 27 | stdio: 'ignore' // Run in background 28 | }); 29 | 30 | // Set up clean exit 31 | process.on('exit', () => { 32 | rustProcess.kill(); 33 | }); 34 | 35 | // Define handlers that forward requests to your Rust HTTP endpoint 36 | server.setRequestHandler(/* ... */, async (request) => { 37 | // Forward the request to your Rust server running on HTTP 38 | const response = await fetch("http://127.0.0.1:8080/api/click-by-index", { 39 | method: "POST", 40 | headers: { "Content-Type": "application/json" }, 41 | body: JSON.stringify(request.params), 42 | }); 43 | 44 | const data = await response.json(); 45 | return data; 46 | }); 47 | 48 | // Start bridge server 49 | async function runServer() { 50 | const transport = new StdioServerTransport(); 51 | await server.connect(transport); 52 | console.error("UI Automation Bridge running on stdio"); 53 | } 54 | 55 | runServer().catch((error) => { 56 | console.error("Fatal error running server:", error); 57 | rustProcess.kill(); 58 | process.exit(1); 59 | }); -------------------------------------------------------------------------------- /mcp-server-os-level/src/bin/mod.rs: -------------------------------------------------------------------------------- 1 | // Define modules 2 | pub mod server; 3 | pub mod types; 4 | pub mod handlers; -------------------------------------------------------------------------------- /mcp-server-os-level/src/bin/server.rs: -------------------------------------------------------------------------------- 1 | use std::{net::SocketAddr, sync::Arc, io::{self, BufRead, BufReader, Write}}; 2 | 3 | use axum::{ 4 | routing::post, 5 | Router, 6 | }; 7 | use tokio::sync::Mutex; 8 | use tower_http::{cors::CorsLayer, trace::TraceLayer}; 9 | use tracing::{error, info, level_filters::LevelFilter}; 10 | use serde_json::{json, Value}; 11 | mod types; 12 | use types::*; 13 | mod handlers; 14 | 15 | // Import only the handlers actually used 16 | use handlers::mcp::mcp_handler; 17 | use handlers::click_by_index::click_by_index_handler; 18 | use handlers::type_by_index::type_by_index_handler; 19 | use handlers::press_key_by_index::press_key_by_index_handler; 20 | use handlers::open_application::open_application_handler; 21 | use handlers::open_url::open_url_handler; 22 | use handlers::input_control::input_control_handler; 23 | use handlers::list_elements_and_attributes::list_elements_and_attributes_handler; 24 | use handlers::utils::*; 25 | 26 | // Import mcp_handler helpers but we'll call them directly 27 | use handlers::mcp::{handle_initialize, handle_execute_tool_function, mcp_error_response}; 28 | 29 | // ================ Main ================ 30 | 31 | #[tokio::main] 32 | async fn main() -> anyhow::Result<()> { 33 | // Check if we should use STDIO mode 34 | let use_stdio = std::env::args().any(|arg| arg == "--stdio"); 35 | 36 | // initialize tracing with different settings based on mode 37 | if use_stdio { 38 | // For STDIO mode, disable colors and only log to stderr 39 | tracing_subscriber::fmt() 40 | .with_max_level(LevelFilter::DEBUG) 41 | .with_ansi(false) // Disable ANSI color codes 42 | .with_writer(std::io::stderr) // Only write logs to stderr 43 | .init(); 44 | } else { 45 | // For HTTP mode, use default settings 46 | tracing_subscriber::fmt() 47 | .with_max_level(LevelFilter::DEBUG) 48 | .init(); 49 | } 50 | 51 | info!("starting ui automation server"); 52 | 53 | // Check permissions early - add this line 54 | check_os_permissions(); 55 | 56 | // Create app state 57 | let app_state = Arc::new(AppState { 58 | element_cache: Arc::new(Mutex::new(None)), 59 | }); 60 | 61 | if use_stdio { 62 | info!("running in STDIO mode for MCP"); 63 | // run_stdio_mode(app_state).await?; 64 | } else { 65 | info!("running in HTTP mode on port 8080"); 66 | run_http_server(app_state).await?; 67 | } 68 | 69 | Ok(()) 70 | } 71 | 72 | async fn run_http_server(app_state: Arc) -> anyhow::Result<()> { 73 | // Create CORS layer 74 | let cors = CorsLayer::very_permissive(); 75 | 76 | // Create router with both existing and MCP endpoints plus new endpoints 77 | let app = Router::new() 78 | .route("/mcp", post(mcp_handler)) 79 | .route("/api/click-by-index", post(click_by_index_handler)) 80 | .route("/api/type-by-index", post(type_by_index_handler)) 81 | .route("/api/press-key-by-index", post(press_key_by_index_handler)) 82 | .route("/api/open-application", post(open_application_handler)) 83 | .route("/api/open-url", post(open_url_handler)) 84 | .route("/api/input-control", post(input_control_handler)) 85 | .route("/api/list-elements-and-attributes", post(list_elements_and_attributes_handler)) 86 | .with_state(app_state) 87 | .layer(cors) 88 | .layer(TraceLayer::new_for_http()); 89 | 90 | // Get the address to bind to 91 | let addr = SocketAddr::from(([0, 0, 0, 0], 8080)); 92 | info!("listening on {}", addr); 93 | 94 | // Start the server 95 | axum::Server::bind(&addr) 96 | .serve(app.into_make_service()) 97 | .await?; 98 | 99 | Ok(()) 100 | } 101 | 102 | // Add this function right after main imports but before the types 103 | fn check_os_permissions() { 104 | // Only check on macOS 105 | #[cfg(target_os = "macos")] 106 | { 107 | use computer_use_ai_sdk::platforms::macos::check_accessibility_permissions; 108 | 109 | match check_accessibility_permissions(true) { 110 | Ok(granted) => { 111 | if !granted { 112 | info!("accessibility permissions: prompt shown to user"); 113 | // Sleep to give user time to respond to the prompt 114 | std::thread::sleep(std::time::Duration::from_secs(2)); 115 | 116 | // Check again without prompt 117 | match check_accessibility_permissions(false) { 118 | Ok(_) => info!("accessibility permissions now granted"), 119 | Err(e) => { 120 | error!("accessibility permissions check failed: {}", e); 121 | info!("**************************************************************"); 122 | info!("* ACCESSIBILITY PERMISSIONS REQUIRED *"); 123 | info!("* Go to System Preferences > Security & Privacy > Privacy > *"); 124 | info!("* Accessibility and add this application. *"); 125 | info!("* Without this permission, UI automation will not function. *"); 126 | info!("**************************************************************"); 127 | } 128 | } 129 | } else { 130 | info!("accessibility permissions already granted"); 131 | } 132 | }, 133 | Err(e) => { 134 | error!("accessibility permissions check failed: {}", e); 135 | info!("**************************************************************"); 136 | info!("* ACCESSIBILITY PERMISSIONS REQUIRED *"); 137 | info!("* Go to System Preferences > Security & Privacy > Privacy > *"); 138 | info!("* Accessibility and add this application. *"); 139 | info!("* Without this permission, UI automation will not function. *"); 140 | info!("**************************************************************"); 141 | } 142 | } 143 | } 144 | } 145 | -------------------------------------------------------------------------------- /mcp-server-os-level/src/bin/types.rs: -------------------------------------------------------------------------------- 1 | use std::{collections::HashMap, sync::Arc, time::Instant}; 2 | use computer_use_ai_sdk::UIElement; 3 | use serde::{Deserialize, Serialize}; 4 | use tokio::sync::Mutex; 5 | use serde_json::Value; 6 | 7 | // ================ Types ================ 8 | 9 | #[derive(Debug, Deserialize, Serialize)] 10 | pub struct ElementSelector { 11 | pub app_name: String, 12 | pub window_name: Option, 13 | pub locator: String, 14 | pub index: Option, 15 | pub text: Option, 16 | pub label: Option, 17 | pub description: Option, 18 | pub element_id: Option, 19 | pub use_background_apps: Option, 20 | pub activate_app: Option, 21 | } 22 | 23 | #[derive(Debug, Deserialize, Serialize)] 24 | pub struct FindElementsRequest { 25 | pub selector: ElementSelector, 26 | pub max_results: Option, 27 | pub max_depth: Option, 28 | } 29 | 30 | #[derive(Debug, Deserialize, Serialize)] 31 | pub struct ClickElementRequest { 32 | pub selector: ElementSelector, 33 | } 34 | 35 | #[derive(Debug, Deserialize, Serialize)] 36 | pub struct TypeTextRequest { 37 | pub selector: ElementSelector, 38 | pub text: String, 39 | } 40 | 41 | #[derive(Debug, Deserialize, Serialize)] 42 | pub struct PressKeyRequest { 43 | pub selector: ElementSelector, 44 | pub key_combo: String, 45 | } 46 | 47 | #[derive(Debug, Deserialize, Serialize)] 48 | pub struct GetTextRequest { 49 | pub app_name: String, 50 | pub window_name: Option, 51 | pub max_depth: Option, 52 | pub use_background_apps: Option, 53 | pub activate_app: Option, 54 | } 55 | 56 | #[derive(Debug, Deserialize, Serialize)] 57 | pub struct ElementPosition { 58 | pub x: i32, 59 | pub y: i32, 60 | } 61 | 62 | #[derive(Debug, Deserialize, Serialize)] 63 | pub struct ElementSize { 64 | pub width: i32, 65 | pub height: i32, 66 | } 67 | 68 | #[derive(Debug, Deserialize, Serialize)] 69 | pub struct ElementInfo { 70 | pub id: Option, 71 | pub role: String, 72 | pub label: Option, 73 | pub description: Option, 74 | pub text: Option, 75 | pub position: Option, 76 | pub size: Option, 77 | pub properties: serde_json::Value, 78 | } 79 | 80 | #[derive(Debug, Serialize)] 81 | pub struct FindElementsResponse { 82 | pub data: Vec, 83 | } 84 | 85 | #[derive(Debug, Serialize)] 86 | pub struct ActionResponse { 87 | pub success: bool, 88 | pub message: String, 89 | } 90 | 91 | #[derive(Debug, Serialize)] 92 | pub struct GetTextResponse { 93 | pub success: bool, 94 | pub text: String, 95 | } 96 | 97 | // App state 98 | pub struct AppState { 99 | pub element_cache: Arc, Instant, String)>>>, 100 | } 101 | 102 | // MCP-specific types 103 | #[derive(Debug, Deserialize, Serialize)] 104 | pub struct MCPRequest { 105 | pub jsonrpc: String, 106 | pub id: Value, 107 | pub method: String, 108 | pub params: Option, 109 | } 110 | 111 | #[derive(Debug, Deserialize, Serialize)] 112 | pub struct MCPResponse { 113 | pub jsonrpc: String, 114 | pub id: Value, 115 | pub result: Value, 116 | } 117 | 118 | #[derive(Debug, Deserialize, Serialize)] 119 | pub struct MCPErrorResponse { 120 | pub jsonrpc: String, 121 | pub id: Value, 122 | pub error: MCPError, 123 | } 124 | 125 | #[derive(Debug, Deserialize, Serialize)] 126 | pub struct MCPError { 127 | pub code: i32, 128 | pub message: String, 129 | pub data: Option, 130 | } 131 | 132 | #[derive(Debug, Deserialize, Serialize)] 133 | pub struct InitializeParams { 134 | pub capabilities: ClientCapabilities, 135 | } 136 | 137 | #[derive(Debug, Deserialize, Serialize)] 138 | pub struct ClientCapabilities { 139 | // MCP client capabilities 140 | pub tools: Option, 141 | pub resources: Option, 142 | // Add other capabilities as needed 143 | } 144 | 145 | #[derive(Debug, Deserialize, Serialize)] 146 | pub struct ToolClientCapabilities { 147 | pub execution: bool, 148 | } 149 | 150 | #[derive(Debug, Deserialize, Serialize)] 151 | pub struct ResourceClientCapabilities { 152 | // Resource capabilities 153 | } 154 | 155 | #[derive(Debug, Deserialize, Serialize)] 156 | pub struct ServerCapabilities { 157 | pub tools: Option, 158 | pub resources: Option, 159 | // Add other capabilities as needed 160 | } 161 | 162 | #[derive(Debug, Deserialize, Serialize)] 163 | pub struct ToolServerCapabilities { 164 | pub functions: Vec, 165 | } 166 | 167 | #[derive(Debug, Deserialize, Serialize)] 168 | pub struct ResourceServerCapabilities { 169 | // Resource capabilities 170 | } 171 | 172 | #[derive(Debug, Deserialize, Serialize)] 173 | pub struct ToolFunctionDefinition { 174 | pub name: String, 175 | pub description: String, 176 | pub parameters: serde_json::Value, // JSON Schema 177 | } 178 | 179 | #[derive(Debug, Deserialize, Serialize)] 180 | pub struct ExecuteToolFunctionParams { 181 | pub function: String, 182 | pub arguments: Value, 183 | } 184 | 185 | // Types for scrolling 186 | #[derive(Debug, Deserialize, Serialize)] 187 | pub struct ScrollElementRequest { 188 | pub selector: Option, 189 | pub coordinates: Option, 190 | pub direction: String, 191 | pub amount: f64, 192 | } 193 | 194 | // Types for opening applications 195 | #[derive(Deserialize, Serialize)] 196 | pub struct OpenApplicationRequest { 197 | pub app_name: String, 198 | } 199 | 200 | #[derive(Serialize)] 201 | pub struct OpenApplicationResponse { 202 | pub success: bool, 203 | pub message: String, 204 | } 205 | 206 | // Types for opening URLs 207 | #[derive(Deserialize, Serialize)] 208 | pub struct OpenUrlRequest { 209 | pub url: String, 210 | pub browser: Option, 211 | } 212 | 213 | #[derive(Serialize)] 214 | pub struct OpenUrlResponse { 215 | pub success: bool, 216 | pub message: String, 217 | } 218 | 219 | // Types for interactable elements 220 | #[derive(Debug, Deserialize, Serialize)] 221 | pub struct ListInteractableElementsRequest { 222 | pub app_name: String, 223 | pub max_elements: Option, 224 | pub use_background_apps: Option, 225 | pub activate_app: Option, 226 | } 227 | 228 | #[derive(Debug, Serialize)] 229 | pub struct InteractableElement { 230 | pub index: usize, 231 | pub role: String, 232 | pub interactability: String, // "definite", "sometimes", "none" 233 | pub text: String, 234 | pub position: Option, 235 | pub size: Option, 236 | pub element_id: Option, 237 | } 238 | 239 | #[derive(Debug, Serialize)] 240 | pub struct ElementCacheInfo { 241 | pub cache_id: String, 242 | pub timestamp: String, 243 | pub expires_at: String, 244 | pub element_count: usize, 245 | pub ttl_seconds: u64, 246 | } 247 | 248 | // Remove old ElementStats and add new ElementStatistics struct 249 | #[derive(serde::Serialize, Debug)] 250 | pub struct ElementStatistics { 251 | pub count: usize, 252 | pub excluded_count: usize, 253 | pub excluded_non_interactable: usize, 254 | pub excluded_no_text: usize, 255 | pub with_text_count: usize, 256 | pub without_text_count: usize, 257 | pub top_roles: HashMap, 258 | pub properties: HashMap, 259 | } 260 | 261 | #[derive(serde::Serialize, Debug)] 262 | pub struct ListElementsAndAttributesResponse { 263 | pub elements: Vec, 264 | pub cache_info: ElementCacheInfo, 265 | pub stats: ElementStatistics, 266 | pub processing_time_seconds: String, 267 | } 268 | 269 | // Types for index-based operations 270 | #[derive(Debug, Deserialize, Serialize)] 271 | pub struct ClickByIndexRequest { 272 | pub element_index: usize, 273 | } 274 | 275 | #[derive(Debug, Serialize)] 276 | pub struct ClickByIndexResponse { 277 | pub success: bool, 278 | pub message: String, 279 | pub elements: Option, 280 | } 281 | 282 | #[derive(Debug, Deserialize, Serialize)] 283 | pub struct TypeByIndexRequest { 284 | pub element_index: usize, 285 | pub text: String, 286 | } 287 | 288 | #[derive(Debug, Serialize)] 289 | pub struct TypeByIndexResponse { 290 | pub success: bool, 291 | pub message: String, 292 | } 293 | 294 | #[derive(Debug, Deserialize, Serialize)] 295 | pub struct PressKeyByIndexRequest { 296 | pub element_index: usize, 297 | pub key_combo: String, 298 | } 299 | 300 | #[derive(Debug, Serialize)] 301 | pub struct PressKeyByIndexResponse { 302 | pub success: bool, 303 | pub message: String, 304 | } 305 | 306 | // Types for input control 307 | #[derive(Debug, Deserialize)] 308 | pub struct InputControlRequest { 309 | pub action: InputAction, 310 | } 311 | 312 | #[derive(Debug, Deserialize)] 313 | #[serde(tag = "type", content = "data")] 314 | pub enum InputAction { 315 | KeyPress(String), 316 | MouseMove { x: i32, y: i32 }, 317 | MouseClick(String), 318 | WriteText(String), 319 | } 320 | 321 | #[derive(Serialize)] 322 | pub struct InputControlResponse { 323 | pub success: bool, 324 | } 325 | 326 | // Combined response types 327 | #[derive(Serialize)] 328 | pub struct InputControlWithElementsResponse { 329 | pub input: InputControlResponse, 330 | pub elements: Option, 331 | } 332 | 333 | #[derive(Serialize)] 334 | pub struct ClickByIndexWithElementsResponse { 335 | pub click: ClickByIndexResponse, 336 | pub elements: Option, 337 | } 338 | 339 | #[derive(Serialize)] 340 | pub struct TypeByIndexWithElementsResponse { 341 | pub type_action: TypeByIndexResponse, 342 | pub elements: Option, 343 | } 344 | 345 | #[derive(Debug, Serialize)] 346 | pub struct PressKeyByIndexWithElementsResponse { 347 | pub press_key: PressKeyByIndexResponse, 348 | pub elements: Option, 349 | } 350 | 351 | #[derive(Serialize)] 352 | pub struct OpenApplicationWithElementsResponse { 353 | pub application: OpenApplicationResponse, 354 | pub elements: Option, 355 | } 356 | 357 | #[derive(Serialize)] 358 | pub struct OpenUrlWithElementsResponse { 359 | pub url: OpenUrlResponse, 360 | pub elements: Option, 361 | } 362 | -------------------------------------------------------------------------------- /mcp-server-os-level/src/desktop.rs: -------------------------------------------------------------------------------- 1 | //! Desktop UI automation through accessibility APIs 2 | //! 3 | //! This module provides a cross-platform API for automating desktop applications 4 | //! through accessibility APIs, inspired by Playwright's web automation model. 5 | 6 | use std::sync::Arc; 7 | 8 | mod element; 9 | mod errors; 10 | mod locator; 11 | pub mod platforms; 12 | mod selector; 13 | #[cfg(test)] 14 | mod tests; 15 | 16 | 17 | pub use element::{UIElement, UIElementAttributes}; 18 | pub use errors::AutomationError; 19 | pub use locator::Locator; 20 | pub use selector::Selector; 21 | 22 | // Define a new struct to hold click result information - move to module level 23 | pub struct ClickResult { 24 | pub method: String, 25 | pub coordinates: Option<(f64, f64)>, 26 | pub details: String, 27 | } 28 | 29 | /// The main entry point for UI automation 30 | pub struct Desktop { 31 | engine: Arc, 32 | } 33 | 34 | impl Desktop { 35 | /// Create a new instance with the default platform-specific implementation 36 | pub fn new(use_background_apps: bool, activate_app: bool) -> Result { 37 | let boxed_engine = platforms::create_engine(use_background_apps, activate_app)?; 38 | // Move the boxed engine into an Arc 39 | let engine = Arc::from(boxed_engine); 40 | Ok(Self { engine }) 41 | } 42 | 43 | /// Get the root UI element representing the entire desktop 44 | pub fn root(&self) -> UIElement { 45 | self.engine.get_root_element() 46 | } 47 | 48 | /// Create a locator to find elements matching the given selector 49 | pub fn locator(&self, selector: impl Into) -> Locator { 50 | Locator::new(Arc::clone(&self.engine), selector.into()) 51 | } 52 | 53 | /// Get the currently focused element 54 | pub fn focused_element(&self) -> Result { 55 | self.engine.get_focused_element() 56 | } 57 | 58 | /// List all running applications 59 | pub fn applications(&self) -> Result, AutomationError> { 60 | self.engine.get_applications() 61 | } 62 | 63 | /// Find an application by name 64 | pub fn application(&self, name: &str) -> Result { 65 | self.engine.get_application_by_name(name) 66 | } 67 | 68 | /// Open an application by name 69 | pub fn open_application(&self, app_name: &str) -> Result { 70 | self.engine.open_application(app_name) 71 | } 72 | 73 | /// Open a URL in a specified browser (or default browser if None) 74 | pub fn open_url(&self, url: &str, browser: Option<&str>) -> Result { 75 | self.engine.open_url(url, browser) 76 | } 77 | 78 | /// Scroll at a specific position on the screen 79 | pub fn scroll_at_position(&self, x: f64, y: f64, direction: &str, amount: f64) -> Result<(), AutomationError> { 80 | self.engine.scroll_at_position(x, y, direction, amount) 81 | } 82 | 83 | /// Scroll at the current mouse position 84 | pub fn scroll_at_current_position(&self, direction: &str, amount: f64) -> Result<(), AutomationError> { 85 | self.engine.scroll_at_current_position(direction, amount) 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /mcp-server-os-level/src/element.rs: -------------------------------------------------------------------------------- 1 | use crate::errors::AutomationError; 2 | use crate::selector::Selector; 3 | use std::collections::HashMap; 4 | use std::fmt::Debug; 5 | 6 | use super::{ClickResult, Locator}; 7 | 8 | /// Represents a UI element in a desktop application 9 | #[derive(Debug)] 10 | pub struct UIElement { 11 | inner: Box, 12 | } 13 | 14 | /// Attributes associated with a UI element 15 | #[derive(Debug)] 16 | pub struct UIElementAttributes { 17 | pub role: String, 18 | pub label: Option, 19 | pub value: Option, 20 | pub description: Option, 21 | pub properties: HashMap>, 22 | } 23 | 24 | /// Interface for platform-specific element implementations 25 | pub(crate) trait UIElementImpl: Send + Sync + Debug { 26 | fn object_id(&self) -> usize; 27 | fn id(&self) -> Option; 28 | fn role(&self) -> String; 29 | fn attributes(&self) -> UIElementAttributes; 30 | fn children(&self) -> Result, AutomationError>; 31 | fn parent(&self) -> Result, AutomationError>; 32 | fn bounds(&self) -> Result<(f64, f64, f64, f64), AutomationError>; // x, y, width, height 33 | fn click(&self) -> Result; 34 | fn double_click(&self) -> Result; 35 | fn right_click(&self) -> Result<(), AutomationError>; 36 | fn hover(&self) -> Result<(), AutomationError>; 37 | fn focus(&self) -> Result<(), AutomationError>; 38 | fn type_text(&self, text: &str) -> Result<(), AutomationError>; 39 | fn press_key(&self, key: &str) -> Result<(), AutomationError>; 40 | fn get_text(&self, max_depth: usize) -> Result; 41 | fn set_value(&self, value: &str) -> Result<(), AutomationError>; 42 | fn is_enabled(&self) -> Result; 43 | fn is_visible(&self) -> Result; 44 | fn is_focused(&self) -> Result; 45 | fn perform_action(&self, action: &str) -> Result<(), AutomationError>; 46 | fn as_any(&self) -> &dyn std::any::Any; 47 | fn create_locator(&self, selector: Selector) -> Result; 48 | fn scroll(&self, direction: &str, amount: f64) -> Result<(), AutomationError>; 49 | 50 | // Add a method to clone the box 51 | fn clone_box(&self) -> Box; 52 | } 53 | 54 | impl UIElement { 55 | /// Create a new UI element from a platform-specific implementation 56 | pub(crate) fn new(impl_: Box) -> Self { 57 | Self { inner: impl_ } 58 | } 59 | 60 | /// Get the element's ID 61 | pub fn id(&self) -> Option { 62 | self.inner.id() 63 | } 64 | 65 | /// Get the element's role (e.g., "button", "textfield") 66 | pub fn role(&self) -> String { 67 | self.inner.role() 68 | } 69 | 70 | /// Get all attributes of the element 71 | pub fn attributes(&self) -> UIElementAttributes { 72 | self.inner.attributes() 73 | } 74 | 75 | /// Get child elements 76 | pub fn children(&self) -> Result, AutomationError> { 77 | self.inner.children() 78 | } 79 | 80 | /// Get parent element 81 | pub fn parent(&self) -> Result, AutomationError> { 82 | self.inner.parent() 83 | } 84 | 85 | /// Get element bounds (x, y, width, height) 86 | pub fn bounds(&self) -> Result<(f64, f64, f64, f64), AutomationError> { 87 | self.inner.bounds() 88 | } 89 | 90 | /// Click on this element 91 | pub fn click(&self) -> Result { 92 | self.inner.click() 93 | } 94 | 95 | /// Double-click on this element 96 | pub fn double_click(&self) -> Result { 97 | self.inner.double_click() 98 | } 99 | 100 | /// Right-click on this element 101 | pub fn right_click(&self) -> Result<(), AutomationError> { 102 | self.inner.right_click() 103 | } 104 | 105 | /// Hover over this element 106 | pub fn hover(&self) -> Result<(), AutomationError> { 107 | self.inner.hover() 108 | } 109 | 110 | /// Focus this element 111 | pub fn focus(&self) -> Result<(), AutomationError> { 112 | self.inner.focus() 113 | } 114 | 115 | /// Type text into this element 116 | pub fn type_text(&self, text: &str) -> Result<(), AutomationError> { 117 | self.inner.type_text(text) 118 | } 119 | 120 | /// Press a key while this element is focused 121 | pub fn press_key(&self, key: &str) -> Result<(), AutomationError> { 122 | self.inner.press_key(key) 123 | } 124 | 125 | /// Get text content of this element 126 | pub fn text(&self, max_depth: usize) -> Result { 127 | self.inner.get_text(max_depth) 128 | } 129 | 130 | /// Set value of this element 131 | pub fn set_value(&self, value: &str) -> Result<(), AutomationError> { 132 | self.inner.set_value(value) 133 | } 134 | 135 | /// Check if element is enabled 136 | pub fn is_enabled(&self) -> Result { 137 | self.inner.is_enabled() 138 | } 139 | 140 | /// Check if element is visible 141 | pub fn is_visible(&self) -> Result { 142 | self.inner.is_visible() 143 | } 144 | 145 | /// Check if element is focused 146 | pub fn is_focused(&self) -> Result { 147 | self.inner.is_focused() 148 | } 149 | 150 | /// Perform a named action on this element 151 | pub fn perform_action(&self, action: &str) -> Result<(), AutomationError> { 152 | self.inner.perform_action(action) 153 | } 154 | 155 | /// Get the underlying implementation as a specific type 156 | pub(crate) fn as_any(&self) -> &dyn std::any::Any { 157 | self.inner.as_any() 158 | } 159 | 160 | /// Find elements matching the selector within this element 161 | pub fn locator(&self, selector: impl Into) -> Result { 162 | let selector = selector.into(); 163 | self.inner.create_locator(selector) 164 | } 165 | 166 | /// Scroll the element in a given direction 167 | pub fn scroll(&self, direction: &str, amount: f64) -> Result<(), AutomationError> { 168 | self.inner.scroll(direction, amount) 169 | } 170 | } 171 | 172 | impl PartialEq for UIElement { 173 | fn eq(&self, other: &Self) -> bool { 174 | self.inner.object_id() == other.inner.object_id() 175 | } 176 | } 177 | 178 | impl Eq for UIElement {} 179 | 180 | impl std::hash::Hash for UIElement { 181 | fn hash(&self, state: &mut H) { 182 | self.inner.object_id().hash(state); 183 | } 184 | } 185 | 186 | impl Clone for UIElement { 187 | fn clone(&self) -> Self { 188 | // We can't directly clone the inner Box, 189 | // but we can create a new UIElement with the same identity 190 | // that will behave the same way 191 | Self { 192 | inner: self.inner.clone_box(), 193 | } 194 | } 195 | } 196 | -------------------------------------------------------------------------------- /mcp-server-os-level/src/errors.rs: -------------------------------------------------------------------------------- 1 | use thiserror::Error; 2 | 3 | #[derive(Error, Debug)] 4 | pub enum AutomationError { 5 | #[error("Element not found: {0}")] 6 | ElementNotFound(String), 7 | 8 | #[error("Operation timed out: {0}")] 9 | Timeout(String), 10 | 11 | #[error("Permission denied: {0}")] 12 | PermissionDenied(String), 13 | 14 | #[error("Platform-specific error: {0}")] 15 | PlatformError(String), 16 | 17 | #[error("Unsupported operation: {0}")] 18 | UnsupportedOperation(String), 19 | 20 | #[error("Unsupported platform: {0}")] 21 | UnsupportedPlatform(String), 22 | 23 | #[error("Invalid argument: {0}")] 24 | InvalidArgument(String), 25 | 26 | #[error("Internal error: {0}")] 27 | Internal(String), 28 | } 29 | -------------------------------------------------------------------------------- /mcp-server-os-level/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Desktop UI automation through accessibility APIs 2 | //! 3 | //! This module provides a cross-platform API for automating desktop applications 4 | //! through accessibility APIs, inspired by Playwright's web automation model. 5 | 6 | use std::sync::Arc; 7 | 8 | mod element; 9 | mod errors; 10 | mod locator; 11 | pub mod platforms; 12 | mod selector; 13 | #[cfg(test)] 14 | mod tests; 15 | 16 | pub use element::{UIElement, UIElementAttributes}; 17 | pub use errors::AutomationError; 18 | pub use locator::Locator; 19 | pub use selector::Selector; 20 | 21 | // Define a new struct to hold click result information - move to module level 22 | pub struct ClickResult { 23 | pub method: String, 24 | pub coordinates: Option<(f64, f64)>, 25 | pub details: String, 26 | } 27 | 28 | /// The main entry point for UI automation 29 | pub struct Desktop { 30 | engine: Arc, 31 | } 32 | 33 | impl Desktop { 34 | /// Create a new instance with the default platform-specific implementation 35 | pub fn new(use_background_apps: bool, activate_app: bool) -> Result { 36 | let boxed_engine = platforms::create_engine(use_background_apps, activate_app)?; 37 | // Move the boxed engine into an Arc 38 | let engine = Arc::from(boxed_engine); 39 | Ok(Self { engine }) 40 | } 41 | 42 | /// Get the root UI element representing the entire desktop 43 | pub fn root(&self) -> UIElement { 44 | self.engine.get_root_element() 45 | } 46 | 47 | /// Create a locator to find elements matching the given selector 48 | pub fn locator(&self, selector: impl Into) -> Locator { 49 | Locator::new(Arc::clone(&self.engine), selector.into()) 50 | } 51 | 52 | /// Get the currently focused element 53 | pub fn focused_element(&self) -> Result { 54 | self.engine.get_focused_element() 55 | } 56 | 57 | /// List all running applications 58 | pub fn applications(&self) -> Result, AutomationError> { 59 | self.engine.get_applications() 60 | } 61 | 62 | /// Find an application by name 63 | pub fn application(&self, name: &str) -> Result { 64 | self.engine.get_application_by_name(name) 65 | } 66 | 67 | /// Open an application by name 68 | pub fn open_application(&self, app_name: &str) -> Result { 69 | self.engine.open_application(app_name) 70 | } 71 | 72 | /// Open a URL in a specified browser (or default browser if None) 73 | pub fn open_url(&self, url: &str, browser: Option<&str>) -> Result { 74 | self.engine.open_url(url, browser) 75 | } 76 | 77 | // /// Scroll at a specific position on screen 78 | // pub fn scroll_at_position(&self, x: f64, y: f64, direction: &str, amount: f64) -> Result<(), AutomationError> { 79 | // self.engine.scroll_at_position(x, y, direction, amount) 80 | // } 81 | 82 | // /// Scroll at the current mouse position 83 | // pub fn scroll_at_current_position(&self, direction: &str, amount: f64) -> Result<(), AutomationError> { 84 | // self.engine.scroll_at_current_position(direction, amount) 85 | // } 86 | } 87 | -------------------------------------------------------------------------------- /mcp-server-os-level/src/locator.rs: -------------------------------------------------------------------------------- 1 | use crate::platforms::AccessibilityEngine; 2 | use crate::{AutomationError, Selector, UIElement}; 3 | use std::sync::Arc; 4 | use std::time::Duration; 5 | 6 | use super::ClickResult; 7 | 8 | /// A high-level API for finding and interacting with UI elements 9 | pub struct Locator { 10 | engine: Arc, 11 | selector: Selector, 12 | timeout: Duration, 13 | root: Option, 14 | } 15 | 16 | impl Locator { 17 | /// Create a new locator with the given selector 18 | pub(crate) fn new(engine: Arc, selector: Selector) -> Self { 19 | Self { 20 | engine, 21 | selector, 22 | timeout: Duration::from_secs(30), 23 | root: None, 24 | } 25 | } 26 | 27 | /// Set timeout for waiting operations 28 | pub fn timeout(mut self, timeout: Duration) -> Self { 29 | self.timeout = timeout; 30 | self 31 | } 32 | 33 | /// Set the root element for this locator 34 | pub fn within(mut self, element: UIElement) -> Self { 35 | self.root = Some(element); 36 | self 37 | } 38 | 39 | /// Get the first element matching this locator 40 | pub fn first(&self) -> Result, AutomationError> { 41 | let element = self 42 | .engine 43 | .find_element(&self.selector, self.root.as_ref())?; 44 | Ok(Some(element)) 45 | } 46 | 47 | /// Get all elements matching this locator 48 | pub fn all(&self) -> Result, AutomationError> { 49 | // Check if we can use platform-specific find_elements method 50 | if let Ok(elements) = self 51 | .engine 52 | .find_elements(&self.selector, self.root.as_ref()) 53 | { 54 | return Ok(elements); 55 | } 56 | 57 | // Fallback implementation - get the first element, then get its siblings 58 | // Note: This is a naive implementation and might not work correctly in all cases 59 | match self.first()? { 60 | Some(first) => { 61 | let result = vec![first]; 62 | // In a proper implementation, we would need to search for siblings 63 | // or implement a custom ElementCollector that gathers all matches 64 | Ok(result) 65 | } 66 | None => Ok(vec![]), 67 | } 68 | } 69 | 70 | /// Wait for an element to be available 71 | pub async fn wait(&self) -> Result { 72 | let start = std::time::Instant::now(); 73 | 74 | while start.elapsed() < self.timeout { 75 | if let Some(element) = self.first()? { 76 | return Ok(element); 77 | } 78 | tokio::time::sleep(Duration::from_millis(50)).await; 79 | } 80 | 81 | Err(AutomationError::Timeout(format!( 82 | "Timed out waiting for selector: {:?}", 83 | self.selector 84 | ))) 85 | } 86 | 87 | /// Get a nested locator 88 | pub fn locator(&self, selector: impl Into) -> Locator { 89 | let selector = selector.into(); 90 | Locator { 91 | engine: self.engine.clone(), 92 | selector: Selector::Chain(vec![self.selector.clone(), selector]), 93 | timeout: self.timeout, 94 | root: self.root.clone(), 95 | } 96 | } 97 | 98 | // Convenience methods for common actions 99 | 100 | /// Click on the first matching element 101 | pub async fn click(&self) -> Result { 102 | self.wait().await?.click() 103 | } 104 | 105 | /// Type text into the first matching element 106 | pub async fn type_text(&self, text: &str) -> Result<(), AutomationError> { 107 | self.wait().await?.type_text(text) 108 | } 109 | 110 | /// Press a key on the first matching element 111 | pub async fn press_key(&self, key: &str) -> Result<(), AutomationError> { 112 | self.wait().await?.press_key(key) 113 | } 114 | 115 | /// Get text from the first matching element 116 | pub async fn text(&self, max_depth: usize) -> Result { 117 | self.wait().await?.text(max_depth) 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /mcp-server-os-level/src/platforms/linux.rs: -------------------------------------------------------------------------------- 1 | use crate::operator::element::UIElementImpl; 2 | use crate::operator::platforms::AccessibilityEngine; 3 | use crate::operator::ClickResult; 4 | use crate::operator::{AutomationError, Locator, Selector, UIElement, UIElementAttributes}; 5 | use std::fmt::Debug; 6 | 7 | pub struct LinuxEngine; 8 | 9 | impl LinuxEngine { 10 | pub fn new(use_background_apps: bool, activate_app: bool) -> Result { 11 | Err(AutomationError::UnsupportedPlatform( 12 | "Linux implementation is not yet available".to_string(), 13 | )) 14 | } 15 | } 16 | 17 | impl AccessibilityEngine for LinuxEngine { 18 | fn get_root_element(&self) -> UIElement { 19 | panic!("Linux implementation is not yet available") 20 | } 21 | 22 | fn get_focused_element(&self) -> Result { 23 | Err(AutomationError::UnsupportedPlatform( 24 | "Linux implementation is not yet available".to_string(), 25 | )) 26 | } 27 | 28 | fn get_applications(&self) -> Result, AutomationError> { 29 | Err(AutomationError::UnsupportedPlatform( 30 | "Linux implementation is not yet available".to_string(), 31 | )) 32 | } 33 | 34 | fn get_application_by_name(&self, _name: &str) -> Result { 35 | Err(AutomationError::UnsupportedPlatform( 36 | "Linux implementation is not yet available".to_string(), 37 | )) 38 | } 39 | 40 | fn find_element( 41 | &self, 42 | selector: &Selector, 43 | root: Option<&UIElement>, 44 | ) -> Result { 45 | Err(AutomationError::UnsupportedPlatform( 46 | "Linux implementation is not yet available".to_string(), 47 | )) 48 | } 49 | 50 | fn find_elements( 51 | &self, 52 | _selector: &Selector, 53 | _root: Option<&UIElement>, 54 | ) -> Result, AutomationError> { 55 | Err(AutomationError::UnsupportedPlatform( 56 | "Linux implementation is not yet available".to_string(), 57 | )) 58 | } 59 | 60 | fn open_application(&self, _app_name: &str) -> Result { 61 | Err(AutomationError::UnsupportedPlatform( 62 | "Linux implementation is not yet available".to_string(), 63 | )) 64 | } 65 | 66 | fn open_url(&self, _url: &str, _browser: Option<&str>) -> Result { 67 | Err(AutomationError::UnsupportedPlatform( 68 | "Linux implementation is not yet available".to_string(), 69 | )) 70 | } 71 | } 72 | 73 | // Placeholder LinuxUIElement that implements UIElementImpl 74 | pub struct LinuxUIElement; 75 | 76 | impl Debug for LinuxUIElement { 77 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 78 | f.debug_struct("LinuxUIElement").finish() 79 | } 80 | } 81 | 82 | impl UIElementImpl for LinuxUIElement { 83 | fn object_id(&self) -> usize { 84 | 0 85 | } 86 | 87 | fn id(&self) -> Option { 88 | None 89 | } 90 | 91 | fn role(&self) -> String { 92 | "".to_string() 93 | } 94 | 95 | fn attributes(&self) -> UIElementAttributes { 96 | UIElementAttributes { 97 | role: "".to_string(), 98 | label: None, 99 | value: None, 100 | description: None, 101 | properties: std::collections::HashMap::new(), 102 | } 103 | } 104 | 105 | fn children(&self) -> Result, AutomationError> { 106 | Err(AutomationError::UnsupportedPlatform( 107 | "Linux implementation is not yet available".to_string(), 108 | )) 109 | } 110 | 111 | fn parent(&self) -> Result, AutomationError> { 112 | Err(AutomationError::UnsupportedPlatform( 113 | "Linux implementation is not yet available".to_string(), 114 | )) 115 | } 116 | 117 | fn bounds(&self) -> Result<(f64, f64, f64, f64), AutomationError> { 118 | Err(AutomationError::UnsupportedPlatform( 119 | "Linux implementation is not yet available".to_string(), 120 | )) 121 | } 122 | 123 | fn click(&self) -> Result { 124 | Err(AutomationError::UnsupportedPlatform( 125 | "Linux implementation is not yet available".to_string(), 126 | )) 127 | } 128 | 129 | fn double_click(&self) -> Result { 130 | Err(AutomationError::UnsupportedPlatform( 131 | "Linux implementation is not yet available".to_string(), 132 | )) 133 | } 134 | 135 | fn right_click(&self) -> Result<(), AutomationError> { 136 | Err(AutomationError::UnsupportedPlatform( 137 | "Linux implementation is not yet available".to_string(), 138 | )) 139 | } 140 | 141 | fn hover(&self) -> Result<(), AutomationError> { 142 | Err(AutomationError::UnsupportedPlatform( 143 | "Linux implementation is not yet available".to_string(), 144 | )) 145 | } 146 | 147 | fn focus(&self) -> Result<(), AutomationError> { 148 | Err(AutomationError::UnsupportedPlatform( 149 | "Linux implementation is not yet available".to_string(), 150 | )) 151 | } 152 | 153 | fn type_text(&self, _text: &str) -> Result<(), AutomationError> { 154 | Err(AutomationError::UnsupportedPlatform( 155 | "Linux implementation is not yet available".to_string(), 156 | )) 157 | } 158 | 159 | fn press_key(&self, _key: &str) -> Result<(), AutomationError> { 160 | Err(AutomationError::UnsupportedPlatform( 161 | "Linux implementation is not yet available".to_string(), 162 | )) 163 | } 164 | 165 | fn get_text(&self, max_depth: usize) -> Result { 166 | Err(AutomationError::UnsupportedPlatform( 167 | "Linux implementation is not yet available".to_string(), 168 | )) 169 | } 170 | 171 | fn set_value(&self, _value: &str) -> Result<(), AutomationError> { 172 | Err(AutomationError::UnsupportedPlatform( 173 | "Linux implementation is not yet available".to_string(), 174 | )) 175 | } 176 | 177 | fn is_enabled(&self) -> Result { 178 | Err(AutomationError::UnsupportedPlatform( 179 | "Linux implementation is not yet available".to_string(), 180 | )) 181 | } 182 | 183 | fn is_visible(&self) -> Result { 184 | Err(AutomationError::UnsupportedPlatform( 185 | "Linux implementation is not yet available".to_string(), 186 | )) 187 | } 188 | 189 | fn is_focused(&self) -> Result { 190 | Err(AutomationError::UnsupportedPlatform( 191 | "Linux implementation is not yet available".to_string(), 192 | )) 193 | } 194 | 195 | fn perform_action(&self, _action: &str) -> Result<(), AutomationError> { 196 | Err(AutomationError::UnsupportedPlatform( 197 | "Linux implementation is not yet available".to_string(), 198 | )) 199 | } 200 | 201 | fn as_any(&self) -> &dyn std::any::Any { 202 | self 203 | } 204 | 205 | fn create_locator(&self, _selector: Selector) -> Result { 206 | Err(AutomationError::UnsupportedPlatform( 207 | "Linux implementation is not yet available".to_string(), 208 | )) 209 | } 210 | 211 | fn clone_box(&self) -> Box { 212 | Box::new(LinuxUIElement) 213 | } 214 | } 215 | -------------------------------------------------------------------------------- /mcp-server-os-level/src/platforms/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::{AutomationError, Selector, UIElement}; 2 | 3 | /// The common trait that all platform-specific engines must implement 4 | pub trait AccessibilityEngine: Send + Sync { 5 | /// Get the root UI element 6 | fn get_root_element(&self) -> UIElement; 7 | 8 | #[cfg(target_os = "windows")] 9 | fn get_element_by_id(&self, _id: &str) -> Result; 10 | /// Get the currently focused element 11 | fn get_focused_element(&self) -> Result; 12 | 13 | /// Get all running applications 14 | fn get_applications(&self) -> Result, AutomationError>; 15 | 16 | /// Get application by name 17 | fn get_application_by_name(&self, name: &str) -> Result; 18 | 19 | /// Find elements using a selector 20 | fn find_element( 21 | &self, 22 | selector: &Selector, 23 | root: Option<&UIElement>, 24 | ) -> Result; 25 | 26 | /// Find all elements matching a selector 27 | /// Default implementation returns an UnsupportedOperation error, 28 | /// allowing platform-specific implementations to override as needed 29 | fn find_elements( 30 | &self, 31 | selector: &Selector, 32 | root: Option<&UIElement>, 33 | ) -> Result, AutomationError>; 34 | 35 | /// Open an application by name 36 | fn open_application(&self, app_name: &str) -> Result; 37 | 38 | /// Open a URL in a specified browser (or default if None) 39 | fn open_url(&self, url: &str, browser: Option<&str>) -> Result; 40 | 41 | /// Convert to Any for downcasting 42 | fn as_any(&self) -> &dyn std::any::Any; 43 | 44 | // //Scroll at a specific position on screen 45 | // fn scroll_at_position(&self, x: f64, y: f64, direction: &str, amount: f64) -> Result<(), AutomationError> { 46 | // Err(AutomationError::UnsupportedOperation("scroll_at_position not implemented for this platform".to_string())) 47 | // } 48 | 49 | // // Scroll at the current mouse position 50 | // fn scroll_at_current_position(&self, direction: &str, amount: f64) -> Result<(), AutomationError> { 51 | // Err(AutomationError::UnsupportedOperation("scroll_at_current_position not implemented for this platform".to_string())) 52 | // } 53 | } 54 | 55 | #[cfg(target_os = "linux")] 56 | mod linux; 57 | #[cfg(target_os = "macos")] 58 | pub mod macos; 59 | #[cfg(target_os = "macos")] 60 | pub mod tree_search; 61 | #[cfg(target_os = "windows")] 62 | mod windows; 63 | 64 | /// Create the appropriate engine for the current platform 65 | pub fn create_engine( 66 | use_background_apps: bool, 67 | activate_app: bool, 68 | ) -> Result, AutomationError> { 69 | #[cfg(target_os = "macos")] 70 | { 71 | return Ok(Box::new(macos::MacOSEngine::new( 72 | use_background_apps, 73 | activate_app, 74 | )?)); 75 | } 76 | #[cfg(target_os = "windows")] 77 | { 78 | return Ok(Box::new(windows::WindowsEngine::new( 79 | use_background_apps, 80 | activate_app, 81 | )?)); 82 | } 83 | #[cfg(target_os = "linux")] 84 | { 85 | return Ok(Box::new(linux::LinuxEngine::new( 86 | use_background_apps, 87 | activate_app, 88 | )?)); 89 | } 90 | #[cfg(not(any(target_os = "macos", target_os = "windows", target_os = "linux")))] 91 | { 92 | return Err(AutomationError::UnsupportedPlatform( 93 | "Current platform is not supported".to_string(), 94 | )); 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /mcp-server-os-level/src/platforms/tree_search.rs: -------------------------------------------------------------------------------- 1 | /// TLDR: default TreeWalker does not traverse windows, so we need to traverse windows manually 2 | use accessibility::{AXAttribute, AXUIElement, AXUIElementAttributes, Error}; 3 | use core_foundation::array::CFArray; 4 | use core_foundation::base::TCFType; 5 | use std::{ 6 | cell::{Cell, RefCell}, 7 | collections::HashSet, 8 | hash::{Hash, Hasher}, 9 | thread, 10 | time::{Duration, Instant}, 11 | }; 12 | use tracing::debug; 13 | 14 | pub trait TreeVisitor { 15 | fn enter_element(&self, element: &AXUIElement) -> TreeWalkerFlow; 16 | fn exit_element(&self, element: &AXUIElement); 17 | } 18 | 19 | pub struct TreeWalkerWithWindows { 20 | attr_children: AXAttribute>, 21 | visited: RefCell>, 22 | cycle_count: RefCell, 23 | } 24 | 25 | #[derive(Copy, Clone, PartialEq, Eq)] 26 | pub enum TreeWalkerFlow { 27 | Continue, 28 | SkipSubtree, 29 | Exit, 30 | } 31 | 32 | impl Default for TreeWalkerWithWindows { 33 | fn default() -> Self { 34 | Self { 35 | attr_children: AXAttribute::children(), 36 | visited: RefCell::new(HashSet::new()), 37 | cycle_count: RefCell::new(0), 38 | } 39 | } 40 | } 41 | 42 | impl TreeWalkerWithWindows { 43 | pub fn new() -> Self { 44 | Self::default() 45 | } 46 | 47 | pub fn walk(&self, root: &AXUIElement, visitor: &dyn TreeVisitor) { 48 | let _ = self.walk_one(root, visitor); 49 | } 50 | 51 | fn walk_one(&self, root: &AXUIElement, visitor: &dyn TreeVisitor) -> TreeWalkerFlow { 52 | // Create wrapper for the element 53 | let element_wrapper = AXUIElementWrapper { 54 | element: root.clone(), 55 | }; 56 | 57 | // Check if already visited 58 | if self.visited.borrow().contains(&element_wrapper) { 59 | // Increment cycle counter 60 | let mut count = self.cycle_count.borrow_mut(); 61 | *count += 1; 62 | 63 | return TreeWalkerFlow::SkipSubtree; 64 | } 65 | 66 | // Mark as visited 67 | self.visited.borrow_mut().insert(element_wrapper); 68 | 69 | let mut flow = visitor.enter_element(root); 70 | 71 | // debug!(target: "operator", "Walking element: {:?}", root.role()); 72 | 73 | if flow == TreeWalkerFlow::Continue { 74 | // First try to get windows (if this is an application element) 75 | let windows_result = root.windows(); 76 | if let Ok(windows) = &windows_result { 77 | for window in windows.iter() { 78 | // debug!(target: "operator", "Walking window: {:?}", window.title()); 79 | let window_flow = self.walk_one(&window, visitor); 80 | if window_flow == TreeWalkerFlow::Exit { 81 | flow = window_flow; 82 | break; 83 | } 84 | } 85 | } 86 | 87 | // TODO avoid duplicate main window walking 88 | // Try main window 89 | if flow != TreeWalkerFlow::Exit { 90 | if let Ok(main_window) = root.main_window() { 91 | // debug!(target: "operator", "Walking main window: {:?}", main_window.title()); 92 | let window_flow = self.walk_one(&main_window, visitor); 93 | if window_flow == TreeWalkerFlow::Exit { 94 | flow = window_flow; 95 | } 96 | } 97 | } 98 | 99 | // If we haven't exited yet, continue with regular children 100 | if flow == TreeWalkerFlow::Continue { 101 | if let Ok(children) = root.attribute(&self.attr_children) { 102 | for child in children.into_iter() { 103 | let child_flow = self.walk_one(&child, visitor); 104 | 105 | if child_flow == TreeWalkerFlow::Exit { 106 | flow = child_flow; 107 | break; 108 | } 109 | } 110 | } 111 | } 112 | } 113 | 114 | visitor.exit_element(root); 115 | flow 116 | } 117 | 118 | pub fn get_cycle_count(&self) -> usize { 119 | *self.cycle_count.borrow() 120 | } 121 | } 122 | 123 | pub struct ElementFinderWithWindows { 124 | root: AXUIElement, 125 | implicit_wait: Option, 126 | predicate: Box bool>, 127 | depth: Cell, 128 | cached: RefCell>, 129 | } 130 | 131 | impl ElementFinderWithWindows { 132 | pub fn new(root: &AXUIElement, predicate: F, implicit_wait: Option) -> Self 133 | where 134 | F: 'static + Fn(&AXUIElement) -> bool, 135 | { 136 | Self { 137 | root: root.clone(), 138 | predicate: Box::new(predicate), 139 | implicit_wait, 140 | depth: Cell::new(0), 141 | cached: RefCell::new(None), 142 | } 143 | } 144 | 145 | pub fn find(&self) -> Result { 146 | if let Some(result) = &*self.cached.borrow() { 147 | return Ok(result.clone()); 148 | } 149 | 150 | let mut deadline = Instant::now(); 151 | let walker = TreeWalkerWithWindows::new(); 152 | 153 | if let Some(implicit_wait) = &self.implicit_wait { 154 | deadline += *implicit_wait; 155 | } 156 | 157 | loop { 158 | if let Some(result) = &*self.cached.borrow() { 159 | return Ok(result.clone()); 160 | } 161 | 162 | walker.walk(&self.root, self); 163 | let now = Instant::now(); 164 | 165 | if now >= deadline { 166 | return Err(Error::NotFound); 167 | } else { 168 | let time_left = deadline.saturating_duration_since(now); 169 | thread::sleep(std::cmp::min(time_left, Duration::from_millis(250))); 170 | } 171 | } 172 | } 173 | } 174 | 175 | const MAX_DEPTH: usize = 100; 176 | 177 | impl TreeVisitor for ElementFinderWithWindows { 178 | fn enter_element(&self, element: &AXUIElement) -> TreeWalkerFlow { 179 | self.depth.set(self.depth.get() + 1); 180 | 181 | if (self.predicate)(element) { 182 | self.cached.replace(Some(element.clone())); 183 | return TreeWalkerFlow::Exit; 184 | } 185 | 186 | if self.depth.get() > MAX_DEPTH { 187 | TreeWalkerFlow::SkipSubtree 188 | } else { 189 | TreeWalkerFlow::Continue 190 | } 191 | } 192 | 193 | fn exit_element(&self, _element: &AXUIElement) { 194 | self.depth.set(self.depth.get() - 1) 195 | } 196 | } 197 | 198 | pub struct ElementsCollectorWithWindows { 199 | root: AXUIElement, 200 | predicate: Box bool>, 201 | depth: Cell, 202 | matches: RefCell>, 203 | max_results: Option, 204 | max_depth: Option, 205 | } 206 | 207 | impl ElementsCollectorWithWindows { 208 | pub fn new(root: &AXUIElement, predicate: F) -> Self 209 | where 210 | F: 'static + Fn(&AXUIElement) -> bool, 211 | { 212 | Self { 213 | root: root.clone(), 214 | predicate: Box::new(predicate), 215 | depth: Cell::new(0), 216 | matches: RefCell::new(Vec::new()), 217 | max_results: None, 218 | max_depth: None, 219 | } 220 | } 221 | 222 | pub fn with_limits(mut self, max_results: Option, max_depth: Option) -> Self { 223 | self.max_results = max_results; 224 | self.max_depth = max_depth; 225 | self 226 | } 227 | 228 | pub fn find_all(&self) -> Vec { 229 | let walker = TreeWalkerWithWindows::new(); 230 | walker.walk(&self.root, self); 231 | 232 | // After traversal is done, log how many cycles were detected 233 | let cycles = walker.get_cycle_count(); 234 | if cycles > 0 { 235 | debug!(target: "operator", "UI traversal complete - detected {} cycles in the accessibility tree", cycles); 236 | } 237 | 238 | self.matches.borrow().clone() 239 | } 240 | 241 | pub fn with_max_results(self, max: Option) -> Self { 242 | Self { 243 | max_results: max, 244 | ..self 245 | } 246 | } 247 | 248 | pub fn with_max_depth(self, max: Option) -> Self { 249 | Self { 250 | max_depth: max, 251 | ..self 252 | } 253 | } 254 | } 255 | 256 | impl TreeVisitor for ElementsCollectorWithWindows { 257 | fn enter_element(&self, element: &AXUIElement) -> TreeWalkerFlow { 258 | self.depth.set(self.depth.get() + 1); 259 | 260 | if let Some(max_depth) = self.max_depth { 261 | if self.depth.get() > max_depth { 262 | return TreeWalkerFlow::SkipSubtree; 263 | } 264 | } else if self.depth.get() > MAX_DEPTH { 265 | return TreeWalkerFlow::SkipSubtree; 266 | } 267 | 268 | if (self.predicate)(element) { 269 | self.matches.borrow_mut().push(element.clone()); 270 | 271 | if let Some(max_results) = self.max_results { 272 | if self.matches.borrow().len() >= max_results { 273 | debug!(target: "operator", "Reached max_results limit of {}", max_results); 274 | return TreeWalkerFlow::Exit; 275 | } 276 | } 277 | } 278 | 279 | TreeWalkerFlow::Continue 280 | } 281 | 282 | fn exit_element(&self, _element: &AXUIElement) { 283 | self.depth.set(self.depth.get() - 1) 284 | } 285 | } 286 | 287 | // Add a wrapper struct similar to Swift 288 | struct AXUIElementWrapper { 289 | element: AXUIElement, 290 | } 291 | 292 | impl PartialEq for AXUIElementWrapper { 293 | fn eq(&self, other: &Self) -> bool { 294 | // Use Core Foundation's CFEqual for proper element comparison 295 | unsafe { 296 | let self_ref = self.element.as_concrete_TypeRef(); 297 | let other_ref = other.element.as_concrete_TypeRef(); 298 | 299 | // CFEqual returns a Boolean (u8), convert to bool 300 | core_foundation::base::CFEqual(self_ref as _, other_ref as _) != 0 301 | } 302 | } 303 | } 304 | 305 | impl Eq for AXUIElementWrapper {} 306 | 307 | impl Hash for AXUIElementWrapper { 308 | fn hash(&self, state: &mut H) { 309 | // Use Core Foundation's CFHash for consistent hashing 310 | unsafe { 311 | let element_ref = self.element.as_concrete_TypeRef(); 312 | let hash_value = core_foundation::base::CFHash(element_ref as _); 313 | state.write_u64(hash_value as u64); 314 | } 315 | } 316 | } 317 | -------------------------------------------------------------------------------- /mcp-server-os-level/src/platforms/windows.rs: -------------------------------------------------------------------------------- 1 | use crate::operator::element::UIElementImpl; 2 | use crate::operator::platforms::AccessibilityEngine; 3 | use crate::operator::{AutomationError, Locator, Selector, UIElement, UIElementAttributes}; 4 | use std::fmt::Debug; 5 | use crate::operator::ClickResult; 6 | 7 | pub struct WindowsEngine; 8 | 9 | impl WindowsEngine { 10 | pub fn new(use_background_apps: bool, activate_app: bool) -> Result { 11 | Err(AutomationError::UnsupportedPlatform( 12 | "Windows implementation is not yet available".to_string(), 13 | )) 14 | } 15 | } 16 | 17 | impl AccessibilityEngine for WindowsEngine { 18 | fn get_root_element(&self) -> UIElement { 19 | panic!("Windows implementation is not yet available") 20 | } 21 | 22 | fn get_element_by_id(&self, _id: &str) -> Result { 23 | Err(AutomationError::UnsupportedPlatform( 24 | "Windows implementation is not yet available".to_string(), 25 | )) 26 | } 27 | 28 | fn get_focused_element(&self) -> Result { 29 | Err(AutomationError::UnsupportedPlatform( 30 | "Windows implementation is not yet available".to_string(), 31 | )) 32 | } 33 | 34 | fn get_applications(&self) -> Result, AutomationError> { 35 | Err(AutomationError::UnsupportedPlatform( 36 | "Windows implementation is not yet available".to_string(), 37 | )) 38 | } 39 | 40 | fn get_application_by_name(&self, _name: &str) -> Result { 41 | Err(AutomationError::UnsupportedPlatform( 42 | "Windows implementation is not yet available".to_string(), 43 | )) 44 | } 45 | 46 | fn find_elements( 47 | &self, 48 | _selector: &Selector, 49 | _root: Option<&UIElement>, 50 | ) -> Result, AutomationError> { 51 | Err(AutomationError::UnsupportedPlatform( 52 | "Windows implementation is not yet available".to_string(), 53 | )) 54 | } 55 | 56 | fn find_element( 57 | &self, 58 | selector: &Selector, 59 | root: Option<&UIElement>, 60 | ) -> Result { 61 | Err(AutomationError::UnsupportedPlatform( 62 | "Windows implementation is not yet available".to_string(), 63 | )) 64 | } 65 | 66 | fn open_application(&self, _app_name: &str) -> Result { 67 | Err(AutomationError::UnsupportedPlatform( 68 | "Windows implementation is not yet available".to_string(), 69 | )) 70 | } 71 | 72 | fn open_url(&self, _url: &str, _browser: Option<&str>) -> Result { 73 | Err(AutomationError::UnsupportedPlatform( 74 | "Windows implementation is not yet available".to_string(), 75 | )) 76 | } 77 | } 78 | 79 | // Placeholder WindowsUIElement that implements UIElementImpl 80 | pub struct WindowsUIElement; 81 | 82 | impl Debug for WindowsUIElement { 83 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 84 | f.debug_struct("WindowsUIElement").finish() 85 | } 86 | } 87 | 88 | impl UIElementImpl for WindowsUIElement { 89 | fn object_id(&self) -> usize { 90 | 0 91 | } 92 | 93 | fn id(&self) -> Option { 94 | None 95 | } 96 | 97 | fn role(&self) -> String { 98 | "".to_string() 99 | } 100 | 101 | fn attributes(&self) -> UIElementAttributes { 102 | UIElementAttributes { 103 | role: "".to_string(), 104 | label: None, 105 | value: None, 106 | description: None, 107 | properties: std::collections::HashMap::new(), 108 | } 109 | } 110 | 111 | fn children(&self) -> Result, AutomationError> { 112 | Err(AutomationError::UnsupportedPlatform( 113 | "Windows implementation is not yet available".to_string(), 114 | )) 115 | } 116 | 117 | fn parent(&self) -> Result, AutomationError> { 118 | Err(AutomationError::UnsupportedPlatform( 119 | "Windows implementation is not yet available".to_string(), 120 | )) 121 | } 122 | 123 | fn bounds(&self) -> Result<(f64, f64, f64, f64), AutomationError> { 124 | Err(AutomationError::UnsupportedPlatform( 125 | "Windows implementation is not yet available".to_string(), 126 | )) 127 | } 128 | 129 | fn click(&self) -> Result { 130 | Err(AutomationError::UnsupportedPlatform( 131 | "Windows implementation is not yet available".to_string(), 132 | )) 133 | } 134 | 135 | fn double_click(&self) -> Result { 136 | Err(AutomationError::UnsupportedPlatform( 137 | "Windows implementation is not yet available".to_string(), 138 | )) 139 | } 140 | 141 | fn right_click(&self) -> Result<(), AutomationError> { 142 | Err(AutomationError::UnsupportedPlatform( 143 | "Windows implementation is not yet available".to_string(), 144 | )) 145 | } 146 | 147 | fn hover(&self) -> Result<(), AutomationError> { 148 | Err(AutomationError::UnsupportedPlatform( 149 | "Windows implementation is not yet available".to_string(), 150 | )) 151 | } 152 | 153 | fn focus(&self) -> Result<(), AutomationError> { 154 | Err(AutomationError::UnsupportedPlatform( 155 | "Windows implementation is not yet available".to_string(), 156 | )) 157 | } 158 | 159 | fn type_text(&self, _text: &str) -> Result<(), AutomationError> { 160 | Err(AutomationError::UnsupportedPlatform( 161 | "Windows implementation is not yet available".to_string(), 162 | )) 163 | } 164 | 165 | fn press_key(&self, _key: &str) -> Result<(), AutomationError> { 166 | Err(AutomationError::UnsupportedPlatform( 167 | "Windows implementation is not yet available".to_string(), 168 | )) 169 | } 170 | 171 | fn get_text(&self, max_depth: usize) -> Result { 172 | Err(AutomationError::UnsupportedPlatform( 173 | "Windows implementation is not yet available".to_string(), 174 | )) 175 | } 176 | 177 | fn set_value(&self, _value: &str) -> Result<(), AutomationError> { 178 | Err(AutomationError::UnsupportedPlatform( 179 | "Windows implementation is not yet available".to_string(), 180 | )) 181 | } 182 | 183 | fn is_enabled(&self) -> Result { 184 | Err(AutomationError::UnsupportedPlatform( 185 | "Windows implementation is not yet available".to_string(), 186 | )) 187 | } 188 | 189 | fn is_visible(&self) -> Result { 190 | Err(AutomationError::UnsupportedPlatform( 191 | "Windows implementation is not yet available".to_string(), 192 | )) 193 | } 194 | 195 | fn is_focused(&self) -> Result { 196 | Err(AutomationError::UnsupportedPlatform( 197 | "Windows implementation is not yet available".to_string(), 198 | )) 199 | } 200 | 201 | fn perform_action(&self, _action: &str) -> Result<(), AutomationError> { 202 | Err(AutomationError::UnsupportedPlatform( 203 | "Windows implementation is not yet available".to_string(), 204 | )) 205 | } 206 | 207 | fn as_any(&self) -> &dyn std::any::Any { 208 | self 209 | } 210 | 211 | fn create_locator(&self, _selector: Selector) -> Result { 212 | Err(AutomationError::UnsupportedPlatform( 213 | "Windows implementation is not yet available".to_string(), 214 | )) 215 | } 216 | 217 | fn clone_box(&self) -> Box { 218 | Box::new(WindowsUIElement) 219 | } 220 | } 221 | -------------------------------------------------------------------------------- /mcp-server-os-level/src/selector.rs: -------------------------------------------------------------------------------- 1 | use std::collections::BTreeMap; 2 | 3 | /// Represents ways to locate a UI element 4 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] 5 | pub enum Selector { 6 | /// Select by role and optional name 7 | Role { role: String, name: Option }, 8 | /// Select by accessibility ID 9 | Id(String), 10 | /// Select by name/label 11 | Name(String), 12 | /// Select by text content 13 | Text(String), 14 | /// Select using XPath-like query 15 | Path(String), 16 | /// Select by multiple attributes (key-value pairs) 17 | Attributes(BTreeMap), 18 | /// Filter current elements by a predicate 19 | Filter(usize), // Uses an ID to reference a filter predicate stored separately 20 | /// Chain multiple selectors 21 | Chain(Vec), 22 | } 23 | 24 | impl From<&str> for Selector { 25 | fn from(s: &str) -> Self { 26 | // Make common UI roles like "window", "button", etc. default to Role selectors 27 | // instead of Name selectors 28 | match s { 29 | "window" | "button" | "checkbox" | "menu" | "menuitem" | "menubar" | "textfield" 30 | | "input" => Selector::Role { 31 | role: s.to_string(), 32 | name: None, 33 | }, 34 | // starts with AX 35 | _ if s.starts_with("AX") => Selector::Role { 36 | role: s.to_string(), 37 | name: None, 38 | }, 39 | _ if s.contains(':') => { 40 | let parts: Vec<&str> = s.splitn(2, ':').collect(); 41 | Selector::Role { 42 | role: parts[0].to_string(), 43 | name: Some(parts[1].to_string()), 44 | } 45 | } 46 | _ if s.starts_with('#') => Selector::Id(s[1..].to_string()), 47 | _ if s.starts_with('/') => Selector::Path(s.to_string()), 48 | _ if s.starts_with("text:") => Selector::Text(s[5..].to_string()), 49 | _ => Selector::Name(s.to_string()), 50 | } 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /mcp-server-os-level/src/tests.rs: -------------------------------------------------------------------------------- 1 | use tracing_subscriber::prelude::*; 2 | 3 | #[cfg(test)] 4 | mod tests { 5 | use super::*; 6 | use tracing_subscriber::{filter::LevelFilter, fmt, EnvFilter}; 7 | 8 | #[cfg(target_os = "macos")] 9 | mod macos_tests { 10 | use serde_json::Value; 11 | 12 | use crate::Desktop; 13 | 14 | use super::*; 15 | 16 | // Setup tracing for tests 17 | fn setup_tracing() { 18 | let filter = EnvFilter::from_default_env() 19 | .add_directive(LevelFilter::DEBUG.into()) 20 | .add_directive("operator=debug".parse().unwrap()); 21 | 22 | tracing_subscriber::registry() 23 | .with(fmt::layer()) 24 | .with(filter) 25 | .try_init() 26 | .unwrap_or_default(); 27 | } 28 | 29 | #[test] 30 | #[ignore] 31 | 32 | fn test_find_buttons_in_iphone_mirroring() { 33 | setup_tracing(); 34 | 35 | // Create a desktop automation instance 36 | let desktop = match Desktop::new(true, false) { 37 | Ok(d) => { 38 | println!("Successfully created Desktop automation"); 39 | d 40 | } 41 | Err(e) => { 42 | println!("Failed to create Desktop automation: {:?}", e); 43 | return; 44 | } 45 | }; 46 | 47 | let app = match desktop.application("Cursor") { 48 | Ok(w) => w, 49 | Err(e) => { 50 | println!("Failed to find application: {:?}", e); 51 | return; 52 | } 53 | }; 54 | println!("App: {:?}", app.attributes().label); 55 | 56 | let windows = app.locator("window").unwrap().all().unwrap_or_default(); 57 | println!("Found {} windows", windows.len()); 58 | 59 | // Print the window hierarchy to understand the structure 60 | println!("\n===== WINDOW HIERARCHY ====="); 61 | if let Ok(children) = app.children() { 62 | println!("App has {} direct children", children.len()); 63 | for (i, child) in children.iter().enumerate() { 64 | println!( 65 | "Child #{}: role={}, label={:?}, description={:?}", 66 | i, 67 | child.role(), 68 | child.attributes().label, 69 | child.attributes().description 70 | ); 71 | 72 | // Print the next level down to see buttons 73 | if let Ok(grandchildren) = child.children() { 74 | println!(" Has {} children", grandchildren.len()); 75 | for (j, grandchild) in grandchildren.iter().enumerate() { 76 | println!( 77 | " Grandchild #{}.{}: role={}, label={:?}, description={:?}", 78 | i, 79 | j, 80 | grandchild.role(), 81 | grandchild.attributes().label, 82 | grandchild.attributes().description 83 | ); 84 | 85 | // Try one more level 86 | if let Ok(great_grandchildren) = grandchild.children() { 87 | println!(" Has {} children", great_grandchildren.len()); 88 | for (k, ggc) in great_grandchildren.iter().take(5).enumerate() { 89 | println!( 90 | " Great-grandchild #{}.{}.{}: role={}, label={:?}", 91 | i, 92 | j, 93 | k, 94 | ggc.role(), 95 | ggc.attributes().label 96 | ); 97 | } 98 | if great_grandchildren.len() > 5 { 99 | println!(" ... and {} more", great_grandchildren.len() - 5); 100 | } 101 | } 102 | } 103 | } 104 | } 105 | } 106 | 107 | // Find buttons in the application window 108 | println!("\n===== BUTTON SEARCH RESULTS ====="); 109 | let buttons = match app.locator("button") { 110 | Ok(locator) => locator.all().unwrap_or_default(), 111 | Err(_) => Vec::new(), 112 | }; 113 | println!("Found {} buttons via locator API", buttons.len()); 114 | 115 | // Print details about each button by type 116 | let mut ax_button_count = 0; 117 | let mut ax_menu_item_count = 0; 118 | let mut ax_menu_bar_item_count = 0; 119 | let mut ax_static_text_count = 0; 120 | let mut ax_image_count = 0; 121 | let mut other_count = 0; 122 | 123 | for (i, button) in buttons.iter().enumerate() { 124 | let button_type = if let Some(props) = button.attributes().properties.get("AXRole") 125 | { 126 | let props_str = props.clone(); 127 | props_str.unwrap_or_default() 128 | } else { 129 | Value::String("unknown".to_string()) 130 | }; 131 | 132 | println!( 133 | "Button #{}: type={}, role={}, label={:?}, description={:?}", 134 | i, 135 | button_type, 136 | button.role(), 137 | button.attributes().label, 138 | button.attributes().description 139 | ); 140 | 141 | // if description is "Rust" then click it 142 | if button.attributes().description == Some("Rust".to_string()) { 143 | match button.click() { 144 | Ok(_) => println!("Clicked button: {:?}", button.attributes().label), 145 | Err(e) => println!("Failed to click button: {:?}", e), 146 | } 147 | } 148 | 149 | // Count by type 150 | match button_type.as_str() { 151 | Some("AXButton") => ax_button_count += 1, 152 | Some("AXMenuItem") => ax_menu_item_count += 1, 153 | Some("AXMenuBarItem") => ax_menu_bar_item_count += 1, 154 | Some("AXStaticText") => ax_static_text_count += 1, 155 | Some("AXImage") => ax_image_count += 1, 156 | _ => other_count += 1, 157 | } 158 | } 159 | 160 | // Print summary of button types 161 | println!("\n===== BUTTON TYPE SUMMARY ====="); 162 | println!("AXButton: {}", ax_button_count); 163 | println!("AXMenuItem: {}", ax_menu_item_count); 164 | println!("AXMenuBarItem: {}", ax_menu_bar_item_count); 165 | println!("AXStaticText: {}", ax_static_text_count); 166 | println!("AXImage: {}", ax_image_count); 167 | println!("Other: {}", other_count); 168 | println!("Total: {}", buttons.len()); 169 | 170 | // Make sure we found at least some buttons 171 | assert!(buttons.len() > 0, "No buttons found in iPhone Mirroring"); 172 | 173 | // Check that we found the standard menu bar items 174 | assert_eq!( 175 | ax_menu_bar_item_count, 6, 176 | "Should find exactly 6 menu bar items" 177 | ); 178 | } 179 | 180 | #[test] 181 | #[ignore] 182 | fn test_find_and_fill_text_inputs() { 183 | setup_tracing(); 184 | 185 | // Create a desktop automation instance 186 | let desktop = match Desktop::new(true, false) { 187 | Ok(d) => { 188 | println!("Successfully created Desktop automation"); 189 | d 190 | } 191 | Err(e) => { 192 | println!("Failed to create Desktop automation: {:?}", e); 193 | return; 194 | } 195 | }; 196 | 197 | let app = desktop.application("Arc").unwrap(); 198 | 199 | let children = app.children().unwrap(); 200 | 201 | println!("App children: {:?}", children.len()); 202 | 203 | for (i, child) in children.iter().enumerate() { 204 | println!("App child #{}: {:?}", i, child.role()); 205 | } 206 | 207 | let input = app.locator("window").unwrap().first().unwrap_or_default(); 208 | println!("found input: {:?}", input.is_some()); 209 | println!("found input: {:?}", input.unwrap().text(10).unwrap()); 210 | } 211 | 212 | #[test] 213 | #[ignore] 214 | fn test_find_and_fill_text_inputsv2() { 215 | setup_tracing(); 216 | 217 | // Create a desktop automation instance 218 | let desktop = match Desktop::new(true, true) { 219 | Ok(d) => { 220 | println!("Successfully created Desktop automation"); 221 | d 222 | } 223 | Err(e) => { 224 | println!("Failed to create Desktop automation: {:?}", e); 225 | return; 226 | } 227 | }; 228 | 229 | let app = desktop.application("Arc").unwrap(); 230 | 231 | let children = app.children().unwrap(); 232 | 233 | println!("App children: {:?}", children.len()); 234 | 235 | for (i, child) in children.iter().enumerate() { 236 | println!("App child #{}: {:?}", i, child.role()); 237 | } 238 | 239 | let buttons = app.locator("AXButton").unwrap().all().unwrap_or_default(); 240 | for b in buttons { 241 | println!("b: {:?}", b.role()); 242 | println!("b: {:?}", b.attributes().label); 243 | let text = b.text(4).unwrap_or_default(); 244 | println!("b: {:?}", text); 245 | if text.contains("Click") { 246 | println!("clicking"); 247 | let _ = b.type_text("foo"); 248 | b.focus().unwrap(); 249 | if let Err(e) = b.click() { 250 | println!("failed to click: {:?}", e); 251 | } 252 | } 253 | } 254 | // input.focus().err().unwrap(); 255 | // let text = input.text(10).unwrap(); 256 | // println!("text: {:?}", text); 257 | 258 | // let children = input.children().unwrap(); 259 | // println!("children: {:?}", children.len()); 260 | } 261 | } 262 | } 263 | --------------------------------------------------------------------------------