├── .gitignore
├── README.md
├── mcp-client-cli-interface
    ├── README.md
    ├── main.ts
    ├── package-lock.json
    ├── package.json
    ├── query-processing-engine.ts
    ├── setup.ts
    ├── start-here.ts
    └── tests
    │   └── test-mcp.js
├── mcp-client-nextjs
    ├── .env.example
    ├── .gitignore
    ├── README.md
    ├── eslint.config.mjs
    ├── next.config.ts
    ├── package-lock.json
    ├── package.json
    ├── postcss.config.mjs
    ├── public
    │   ├── file.svg
    │   ├── globe.svg
    │   ├── next.svg
    │   ├── vercel.svg
    │   └── window.svg
    ├── src
    │   ├── app
    │   │   ├── api
    │   │   │   └── mcp
    │   │   │   │   ├── initialize
    │   │   │   │       └── route.ts
    │   │   │   │   ├── logs
    │   │   │   │       └── route.ts
    │   │   │   │   ├── query
    │   │   │   │       └── route.ts
    │   │   │   │   ├── route.ts
    │   │   │   │   └── tools
    │   │   │   │       └── route.ts
    │   │   ├── favicon.ico
    │   │   ├── globals.css
    │   │   ├── layout.tsx
    │   │   └── page.tsx
    │   ├── components
    │   │   └── mcp-interface.tsx
    │   └── lib
    │   │   └── mcp
    │   │       ├── log-buffer.ts
    │   │       ├── query-processing-engine.ts
    │   │       ├── setup.ts
    │   │       └── start-here.ts
    ├── tailwind.config.js
    └── tsconfig.json
└── mcp-server-os-level
    ├── Cargo.toml
    ├── examples
        ├── test_click_by_role.rs
        ├── test_example.rs
        ├── test_get_all_apps.rs
        ├── test_get_arc_all_elements_custom.rs
        ├── test_get_arc_all_elements_sdk_count.rs
        ├── test_get_arc_interactable_elements.rs
        ├── test_get_arc_interactable_elements_list.rs
        ├── test_get_arc_text_sdk.rs
        ├── test_get_messages_and_send_message.rs
        └── test_get_messages_text_sdk.rs
    └── src
        ├── bin
            ├── handlers
            │   ├── click_by_index.rs
            │   ├── input_control.rs
            │   ├── list_elements_and_attributes.rs
            │   ├── mcp.rs
            │   ├── mod.rs
            │   ├── open_application.rs
            │   ├── open_url.rs
            │   ├── press_key_by_index.rs
            │   ├── type_by_index.rs
            │   └── utils.rs
            ├── mcp-bridge.ts
            ├── mod.rs
            ├── server.rs
            └── types.rs
        ├── desktop.rs
        ├── element.rs
        ├── errors.rs
        ├── lib.rs
        ├── locator.rs
        ├── platforms
            ├── linux.rs
            ├── macos.rs
            ├── mod.rs
            ├── tree_search.rs
            └── windows.rs
        ├── selector.rs
        └── tests.rs


/.gitignore:
--------------------------------------------------------------------------------
 1 | # node/typescript
 2 | node_modules/
 3 | dist/
 4 | build/
 5 | *.log
 6 | .npm
 7 | .env
 8 | .env.*
 9 | *.tsbuildinfo
10 | 
11 | # rust
12 | /target/
13 | **/target/
14 | Cargo.lock
15 | **/*.rs.bk
16 | 
17 | # os specific
18 | .DS_Store
19 | Thumbs.db
20 | desktop.ini
21 | 
22 | # editors
23 | .vscode/*
24 | !.vscode/settings.json
25 | !.vscode/tasks.json
26 | !.vscode/launch.json
27 | !.vscode/extensions.json
28 | .idea/
29 | *.sublime-*
30 | 
31 | # logs and databases
32 | *.log
33 | *.sql
34 | *.sqlite
35 | *.sqlite3
36 | 
37 | # build artifacts
38 | *.o
39 | *.a
40 | *.so
41 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Computer Use AI SDK
 2 | 
 3 | * We've built an MCP server that controls computer
 4 | 
 5 | * You've heard of OpenAI's operator, you've heard of Claude's computer use. Now the open source alternative: Computer Use SDK from screenpipe.
 6 | 
 7 | * It's native on macOS—no virtual machine bs, no guardrails. Use it with any app or website however you want.
 8 | 
 9 | * No pixel-based bs—it relies on underlying desktop-rendered elements, making it much faster and far more reliable than pixel-based vision models.
10 | 
11 | * You can now build your own agents getting started with our simple Hello World Template using our MCP server and client.
12 | 
13 | * There are tools that our MCP Server provides out of the box:
14 |     * Launch apps
15 |     * Read content
16 |     * Click
17 |     * Enter text
18 |     * Press keys
19 | 
20 | * These will be computational primitives to allow the AI to control your computer and do your tasks for you. What will you build? Come check us out at https://screenpi.pe
21 | 
22 | ## Demos
23 | 
24 | agent sending a message
25 | 
26 | https://github.com/user-attachments/assets/f8687500-9a8c-4a96-81b6-77562feff093
27 | 
28 | get latest whatsapp messages
29 | ![Image](https://github.com/user-attachments/assets/6401c930-07e5-4459-b54c-a8c70fdca73f)
30 | 
31 | open arc browser 
32 | ![Image](https://github.com/user-attachments/assets/8656be95-951d-4f13-8ee9-41babb821abb)
33 | 
34 | ## Get started
35 | 
36 | ```bash
37 | git clone https://github.com/m13v/computer-use-ai-sdk.git
38 | cd MCP-server-client-computer-use-ai-sdk
39 | ```
40 | 
41 | ```bash
42 | # Install Rust (if not already installed)
43 | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
44 | # Install Node.js and npm (if not already installed)
45 | # Visit https://nodejs.org/ or use nvm
46 | ```
47 | 
48 | ```bash
49 | # run backend server
50 | cd mcp-server-os-level
51 | cargo run --bin server
52 | # keep it running
53 | ```
54 | 
55 | ### Option 1: CLI Interface
56 | 
57 | ```bash
58 | # run CLI interface client in a new terminal  (good for debugging)
59 | cd mcp-client-cli-interface
60 | npm install  # install dependencies first
61 | 
62 | # Set your Anthropic API key as an environment variable
63 | export ANTHROPIC_API_KEY=sk-ant-xxxx  # Replace with your actual Anthropic API key
64 | # For Windows, use: set ANTHROPIC_API_KEY=sk-ant-xxxx
65 | # For permanent setup, add to your shell profile (.bashrc, .zshrc, etc.)
66 | 
67 | npx tsx main.ts
68 | ```
69 | 
70 | ### Option 2: Web app Interface
71 | 
72 | ```bash
73 | # run CLI interface client in a new terminal  (good for debugging)
74 | cd mcp-client-nextjs
75 | npm install  # install dependencies first
76 | 
77 | # Set API key via command line
78 | echo "ANTHROPIC_API_KEY=sk-ant-XXXXXXXX" > .env  # replace XXXXXXXX with your actual key
79 | # Or append if you want to keep other env variables
80 | # echo "ANTHROPIC_API_KEY=sk-ant-XXXXXXXX" >> .env
81 | 
82 | npm run dev
83 | # go to provided localhost web page
84 | ```
85 | 
86 | 
87 | ## What do I do with it?
88 | 
89 | - Build custom worfklows of agents to performs various actions
90 | - Build custom UI to make it easy for users to automate their computer work
91 | - Save workflow and run in cron
92 | - Combine with other MCP servers to do something cool, e.g.: fill out a google sheet based on the history of people i talk to throughout the day
93 | 
94 | ## Request features and endpoints in github issues
95 | 
96 | https://github.com/m13v/computer-use-ai-sdk/issues/new/choose


--------------------------------------------------------------------------------
/mcp-client-cli-interface/README.md:
--------------------------------------------------------------------------------
1 | This is a client example, it has a simple CLI interface that helps to get started and better understand how everything works


--------------------------------------------------------------------------------
/mcp-client-cli-interface/main.ts:
--------------------------------------------------------------------------------
  1 | import { desktopClient, log } from './start-here';
  2 | import { setupEnvironment } from './setup';
  3 | import { processUserQuery } from './query-processing-engine';
  4 | import readline from 'readline';
  5 | import inquirer from 'inquirer';
  6 | 
  7 | async function main() {
  8 |   // setup environment and check server
  9 |   await setupEnvironment();
 10 |   
 11 |   // connect to rust mcp server
 12 |   await desktopClient.connect('http://localhost:8080/mcp');
 13 |   
 14 |   // list available tools
 15 |   await desktopClient.listTools();
 16 |   
 17 |   // create readline interface
 18 |   const rl = readline.createInterface({
 19 |     input: process.stdin,
 20 |     output: process.stdout
 21 |   });
 22 |   
 23 |   // start chat loop
 24 |   console.log('\n=== desktop control chat ===');
 25 |   console.log('(type "exit" to quit)');
 26 |   
 27 |   // show initial options
 28 |   showInitialOptions(rl);
 29 | }
 30 | 
 31 | // Show initial options
 32 | function showInitialOptions(rl: readline.Interface) {
 33 |   console.log("\nselect how to start:");
 34 |   
 35 |   const choices = [
 36 |     "[type your own]",
 37 |     "send message to first dialogie in messages app. message is 'i'm testing computer-use-sdk'",
 38 |     "go to discord, click 'direct messages' dialogue, then send message 'i'm testing computer-use-sdk'"
 39 |   ];
 40 |   
 41 |   inquirer.prompt([
 42 |     {
 43 |       type: 'list',
 44 |       name: 'option',
 45 |       message: 'choose an option:',
 46 |       choices: choices
 47 |     }
 48 |   ]).then(answers => {
 49 |     log.debug(`selected option: ${answers.option}`);
 50 |     
 51 |     if (answers.option === "[type your own]") {
 52 |       // Ask for custom input
 53 |       askQuestion(rl);
 54 |     } else {
 55 |       // Use the selected option directly as the prompt
 56 |       log.highlight(`using prompt: "${answers.option}"`);
 57 |       processQuery(answers.option, rl);
 58 |     }
 59 |   });
 60 | }
 61 | 
 62 | function processQuery(input: string, rl: readline.Interface) {
 63 |   if (input.toLowerCase() === 'exit') {
 64 |     log.info("shutting down...");
 65 |     desktopClient.disconnect()
 66 |       .then(() => {
 67 |         rl.close();
 68 |         process.exit(0);
 69 |       });
 70 |     return;
 71 |   }
 72 |   
 73 |   log.highlight("\nprocessing...");
 74 |   processUserQuery(input)
 75 |     .then(response => {
 76 |       // Only show success message if we actually got a valid response
 77 |       if (response && !response.startsWith('Error:')) {
 78 |         log.response(response);
 79 |       }
 80 |       askQuestion(rl); // Continue with normal flow
 81 |     })
 82 |     .catch(error => {
 83 |       // Show error in red and with clear error prefix
 84 |       log.error(`query failed: ${error.message || error}`);
 85 |       askQuestion(rl); // Continue with normal flow
 86 |     });
 87 | }
 88 | 
 89 | function askQuestion(rl: readline.Interface) {
 90 |   inquirer.prompt([
 91 |     {
 92 |       type: 'input',
 93 |       name: 'query',
 94 |       message: 'query:',
 95 |       prefix: ''
 96 |     }
 97 |   ]).then(answers => {
 98 |     log.debug(`received input: "${answers.query}"`);
 99 |     processQuery(answers.query, rl);
100 |   }).catch(err => {
101 |     log.error("error getting input:", err);
102 |     askQuestion(rl); // Try again
103 |   });
104 | }
105 | 
106 | main().catch(error => log.error("fatal error:", error)); 


--------------------------------------------------------------------------------
/mcp-client-cli-interface/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "hello-world-mcp-client",
 3 |   "version": "1.0.0",
 4 |   "main": "index.js",
 5 |   "scripts": {
 6 |     "test": "echo \"Error: no test specified\" && exit 1"
 7 |   },
 8 |   "keywords": [],
 9 |   "author": "",
10 |   "license": "ISC",
11 |   "description": "",
12 |   "dependencies": {
13 |     "@anthropic-ai/sdk": "^0.39.0",
14 |     "@modelcontextprotocol/sdk": "^1.8.0",
15 |     "axios": "^1.8.4",
16 |     "dotenv": "^16.4.7",
17 |     "inquirer": "^12.5.0"
18 |   },
19 |   "type": "module",
20 |   "devDependencies": {
21 |     "@types/inquirer": "^9.0.7"
22 |   }
23 | }
24 | 


--------------------------------------------------------------------------------
/mcp-client-cli-interface/setup.ts:
--------------------------------------------------------------------------------
  1 | import dotenv from "dotenv";
  2 | import path from "path";
  3 | import fs from "fs";
  4 | import { exec } from "child_process";
  5 | import { promisify } from "util";
  6 | 
  7 | // Create the exec promise function
  8 | const execPromise = promisify(exec);
  9 | 
 10 | // Load environment variables
 11 | export async function setupEnvironment() {
 12 |   // First try loading from .env file
 13 |   dotenv.config();
 14 | 
 15 |   // Check if API key is set
 16 |   if (!process.env.ANTHROPIC_API_KEY) {
 17 |     // Try to load from config file
 18 |     const configDir = path.join(process.env.HOME || "", ".screenpipe");
 19 |     const configPath = path.join(configDir, "config.json");
 20 | 
 21 |     if (fs.existsSync(configPath)) {
 22 |       try {
 23 |         const config = JSON.parse(fs.readFileSync(configPath, "utf8"));
 24 |         if (config.anthropicApiKey) {
 25 |           process.env.ANTHROPIC_API_KEY = config.anthropicApiKey;
 26 |         }
 27 |       } catch (error) {
 28 |         console.error("error loading config:", error);
 29 |       }
 30 |     }
 31 | 
 32 |     // If still not set, show error
 33 |     if (!process.env.ANTHROPIC_API_KEY) {
 34 |       console.error(
 35 |         "missing ANTHROPIC_API_KEY - please set in .env file or config.json"
 36 |       );
 37 |       process.exit(1);
 38 |     }
 39 |   }
 40 |   
 41 |   // Validate API key format
 42 |   const apiKey = process.env.ANTHROPIC_API_KEY;
 43 |   if (!apiKey.startsWith('sk-ant-')) {
 44 |     console.error("\n======================================");
 45 |     console.error("invalid ANTHROPIC_API_KEY format");
 46 |     console.error("api key should start with 'sk-ant-'");
 47 |     console.error(`found: ${apiKey.substring(0, 7)}...`);
 48 |     console.error("please check your .env file or config.json");
 49 |     console.error("======================================\n");
 50 |     process.exit(1); // Exit immediately with error code
 51 |   }
 52 | 
 53 |   // check if rust mcp server is running
 54 |   const checkServer = async () => {
 55 |     try {
 56 |       // use the correct JSON-RPC format for MCP
 57 |       const payload = {
 58 |         jsonrpc: "2.0", 
 59 |         id: "health-check",
 60 |         method: "initialize",
 61 |         params: {
 62 |           clientInfo: {
 63 |             name: "mcp-client-health-check",
 64 |             version: "1.0.0"
 65 |           },
 66 |           capabilities: {}
 67 |         }
 68 |       };
 69 |       
 70 |       console.log("checking mcp server connection...");
 71 |       
 72 |       // Direct connection to 127.0.0.1:8080 since we've verified it works
 73 |       const curlCommand = `curl -s -X POST http://127.0.0.1:8080/mcp -H "Content-Type: application/json" -d '${JSON.stringify(payload)}'`;
 74 |       
 75 |       const { stdout, stderr } = await execPromise(curlCommand);
 76 |       
 77 |       if (stderr && stderr.length > 0) {
 78 |         console.error(`curl stderr: ${stderr}`);
 79 |         // Note: curl often writes progress info to stderr but still succeeds
 80 |         // Only fail if stdout is empty
 81 |         if (!stdout) {
 82 |           throw new Error(stderr);
 83 |         }
 84 |       }
 85 |       
 86 |       // Check if we got a valid JSON response
 87 |       try {
 88 |         const response = JSON.parse(stdout);
 89 |         if (response.result) {
 90 |           console.log("mcp server is running and responding properly");
 91 |           return true;
 92 |         }
 93 |       } catch (jsonError) {
 94 |         console.error("invalid json response from server:", stdout.substring(0, 100));
 95 |         throw new Error("Invalid JSON response from server");
 96 |       }
 97 |       
 98 |       console.log("mcp server responded but with unexpected format");
 99 |       return false;
100 |     } catch (error) {
101 |       console.error("failed to connect to mcp server at http://127.0.0.1:8080/mcp");
102 |       console.error(`error details: ${error.message || error}`);
103 |       console.error("please ensure the rust server is running");
104 |       process.exit(1);
105 |     }
106 |   };
107 |   
108 |   await checkServer();
109 | }
110 |     


--------------------------------------------------------------------------------
/mcp-client-cli-interface/start-here.ts:
--------------------------------------------------------------------------------
  1 | import Anthropic from "@anthropic-ai/sdk";
  2 | 
  3 | // enhanced logging utility with colors for better readability
  4 | export const log = {
  5 |   info: (msg: string, ...args: any[]) => console.log(`\x1b[36m[info]\x1b[0m ${msg}`, ...args), 
  6 |   success: (msg: string, ...args: any[]) => console.log(`\x1b[32m[success]\x1b[0m ${msg}`, ...args),
  7 |   error: (msg: string, ...args: any[]) => console.error(`\x1b[31m[error]\x1b[0m ${msg}`, ...args),
  8 |   warn: (msg: string, ...args: any[]) => console.log(`\x1b[33m[warn]\x1b[0m ${msg}`, ...args),
  9 |   debug: (msg: string, ...args: any[]) => console.log(`\x1b[90m[debug]\x1b[0m ${msg}`, ...args),
 10 |   // New logging methods for specific UI elements
 11 |   highlight: (msg: string, ...args: any[]) => console.log(`\x1b[1m\x1b[35m${msg}\x1b[0m`, ...args),
 12 |   iteration: (msg: string, ...args: any[]) => console.log(`\x1b[36m${msg}\x1b[0m`, ...args),
 13 |   response: (msg: string) => console.log(`\n\x1b[1m\x1b[37mresponse:\x1b[0m ${msg}`),
 14 |   tool: (name: string, result: any) => {
 15 |     // Truncate long results
 16 |     const truncateJSON = (obj: any, maxLength = 500): string => {
 17 |       if (obj === undefined || obj === null) {
 18 |         return String(obj);
 19 |       }
 20 |       const str = JSON.stringify(obj);
 21 |       if (str.length <= maxLength) return str;
 22 |       return str.substring(0, maxLength) + '... [truncated]';
 23 |     };
 24 |     
 25 |     // One-line format with truncation
 26 |     console.log(`\n\x1b[1m\x1b[37m${name} result:\x1b[0m ${truncateJSON(result)}`);
 27 |   }
 28 | };
 29 | 
 30 | class DesktopControlClient {
 31 |   private connected = false;
 32 |   private serverUrl = "";
 33 |   private requestId = 0;
 34 |   private anthropic = new Anthropic();
 35 |   
 36 |   // Connect to the MCP server via http
 37 |   async connect(serverUrl: string) {
 38 |     log.info(`connecting to mcp server: ${serverUrl}`);
 39 |     
 40 |     try {
 41 |       this.serverUrl = serverUrl;
 42 |       const response = await this.makeRequest("initialize", {});
 43 |       
 44 |       if (response.result) {
 45 |         this.connected = true;
 46 |         log.success('mcp client session established successfully');
 47 |         return true;
 48 |       } else {
 49 |         log.error('failed to establish mcp client session:', response.error);
 50 |         return false;
 51 |       }
 52 |     } catch (error) {
 53 |       log.error('failed to establish mcp client session:', error);
 54 |       return false;
 55 |     }
 56 |   }
 57 |   
 58 |   // Make a JSON-RPC request
 59 |   private async makeRequest(method: string, params: any) {
 60 |     const id = `request-${++this.requestId}`;
 61 |     
 62 |     const response = await fetch("http://127.0.0.1:8080/mcp", {
 63 |       method: "POST",
 64 |       headers: {
 65 |         "Content-Type": "application/json"
 66 |       },
 67 |       body: JSON.stringify({
 68 |         jsonrpc: "2.0",
 69 |         id,
 70 |         method,
 71 |         params
 72 |       })
 73 |     });
 74 |     
 75 |     return await response.json();
 76 |   }
 77 |   
 78 |   // Check if connected
 79 |   isConnected(): boolean {
 80 |     return this.connected;
 81 |   }
 82 |   
 83 |   // List available tools
 84 |   async listTools() {
 85 |     if (!this.isConnected()) {
 86 |       log.error('cannot list tools: not connected');
 87 |       throw new Error('Not connected to MCP server');
 88 |     }
 89 |     
 90 |     try {
 91 |       // In standard MCP, this would be tools/list
 92 |       // But our rust server exposes tools through initialize
 93 |       const response = await this.makeRequest("initialize", {});
 94 |       const tools = response.result.capabilities.tools.functions;
 95 |       
 96 |       // Create simplified view - one line per tool
 97 |       log.info('available tools:');
 98 |       tools.forEach((tool: any) => {
 99 |         const propertyNames = Object.keys(tool.parameters.properties || {}).join(', ');
100 |         log.debug(`- ${tool.name}: ${propertyNames}`);
101 |       });
102 |       
103 |       return { tools };
104 |     } catch (error) {
105 |       log.error('failed to list tools:', error);
106 |       throw error;
107 |     }
108 |   }
109 |   
110 |   // Call a tool
111 |   async callTool(name: string, args: Record<string, any>) {
112 |     if (!this.isConnected()) {
113 |       log.error('cannot call tool: not connected');
114 |       throw new Error('Not connected to MCP server');
115 |     }
116 |     
117 |     log.info(`calling tool "${name}" with args: ${JSON.stringify(args)}`);
118 |     
119 |     try {
120 |       const response = await this.makeRequest("executeToolFunction", {
121 |         function: name,
122 |         arguments: args
123 |       });
124 |       
125 |       // Check if result exists before logging
126 |       if (response && 'result' in response) {
127 |         log.tool(name, response.result);
128 |         return response.result;
129 |       } else {
130 |         log.tool(name, response); // Log the entire response if result is missing
131 |         return response; // Still return whatever we got
132 |       }
133 |     } catch (error) {
134 |       log.error(`error calling tool "${name}":`, error);
135 |       throw error;
136 |     }
137 |   }
138 |   
139 |   // Disconnect from the server
140 |   async disconnect() {
141 |     this.connected = false;
142 |     log.success('mcp client session closed');
143 |   }
144 | }
145 | 
146 | // Export an instance that can be used throughout your application
147 | export const desktopClient = new DesktopControlClient();
148 | 


--------------------------------------------------------------------------------
/mcp-client-cli-interface/tests/test-mcp.js:
--------------------------------------------------------------------------------
  1 | import axios from 'axios';
  2 | 
  3 | // Helper function to create an MCP request
  4 | function createMCPRequest(method, params) {
  5 |   return {
  6 |     jsonrpc: '2.0',
  7 |     id: Math.floor(Math.random() * 10000),
  8 |     method,
  9 |     params
 10 |   };
 11 | }
 12 | 
 13 | async function testMCP() {
 14 |   const mcpUrl = 'http://127.0.0.1:8080/mcp';
 15 |   console.log('starting mcp test suite');
 16 | 
 17 |   try {
 18 |     // 1. Initialize to get capabilities
 19 |     console.log('testing initialize...');
 20 |     const initResponse = await axios.post(mcpUrl, createMCPRequest('initialize', {
 21 |       capabilities: {
 22 |         tools: { execution: true },
 23 |         resources: {}
 24 |       }
 25 |     }));
 26 |     console.log('initialize response capabilities:', initResponse.data.result.capabilities.tools.functions.map(f => f.name));
 27 | 
 28 |     // 2. Open an application (e.g., browser)
 29 |     console.log('\ntesting openApplication...');
 30 |     const openAppResponse = await axios.post(mcpUrl, createMCPRequest('executeToolFunction', {
 31 |       function: 'openApplication',
 32 |       arguments: {
 33 |         app_name: 'Arc'
 34 |       }
 35 |     }));
 36 |     console.log('open application response:', openAppResponse.data);
 37 | 
 38 |     // Wait a moment for the app to open
 39 |     await new Promise(resolve => setTimeout(resolve, 2000));
 40 | 
 41 |     // 3. Open a URL
 42 |     console.log('\ntesting openUrl...');
 43 |     const openUrlResponse = await axios.post(mcpUrl, createMCPRequest('executeToolFunction', {
 44 |       function: 'openUrl',
 45 |       arguments: {
 46 |         url: 'https://example.com',
 47 |         browser: 'Arc'
 48 |       }
 49 |     }));
 50 |     console.log('open url response:', openUrlResponse.data);
 51 | 
 52 |     // Wait for page to load
 53 |     await new Promise(resolve => setTimeout(resolve, 3000));
 54 | 
 55 |     // 4. List interactable elements
 56 |     console.log('\ntesting listInteractableElements...');
 57 |     const listElementsResponse = await axios.post(mcpUrl, createMCPRequest('executeToolFunction', {
 58 |       function: 'listInteractableElements',
 59 |       arguments: {
 60 |         app_name: 'Arc',
 61 |         interactable_only: true,
 62 |         max_elements: 10
 63 |       }
 64 |     }));
 65 |     console.log('list elements response stats:', listElementsResponse.data.result.stats);
 66 |     
 67 |     if (listElementsResponse.data.result.elements.length > 0) {
 68 |       console.log('first element:', listElementsResponse.data.result.elements[0]);
 69 | 
 70 |       // 5. Click element by index
 71 |       console.log('\ntesting clickByIndex...');
 72 |       const clickByIndexResponse = await axios.post(mcpUrl, createMCPRequest('executeToolFunction', {
 73 |         function: 'clickByIndex',
 74 |         arguments: {
 75 |           element_index: 0
 76 |         }
 77 |       }));
 78 |       console.log('click by index response:', clickByIndexResponse.data);
 79 | 
 80 |       // 6. Type text by index (only if we have a text field)
 81 |       const textField = listElementsResponse.data.result.elements.findIndex(el => 
 82 |         el.role === 'AXTextField' || el.role === 'AXTextArea');
 83 |       
 84 |       if (textField >= 0) {
 85 |         console.log('\ntesting typeByIndex...');
 86 |         const typeByIndexResponse = await axios.post(mcpUrl, createMCPRequest('executeToolFunction', {
 87 |           function: 'typeByIndex',
 88 |           arguments: {
 89 |             element_index: textField,
 90 |             text: 'Hello from MCP test'
 91 |           }
 92 |         }));
 93 |         console.log('type by index response:', typeByIndexResponse.data);
 94 | 
 95 |         // 7. Press key by index
 96 |         console.log('\ntesting pressKeyByIndex...');
 97 |         const pressKeyByIndexResponse = await axios.post(mcpUrl, createMCPRequest('executeToolFunction', {
 98 |           function: 'pressKeyByIndex',
 99 |           arguments: {
100 |             element_index: textField,
101 |             key_combo: 'Enter'
102 |           }
103 |         }));
104 |         console.log('press key by index response:', pressKeyByIndexResponse.data);
105 |       } else {
106 |         console.log('no text fields found, skipping type and press key tests');
107 |       }
108 |     } else {
109 |       console.log('no elements found, skipping index-based operations');
110 |     }
111 | 
112 |     // 8. Scroll element
113 |     console.log('\ntesting scrollElement...');
114 |     const scrollResponse = await axios.post(mcpUrl, createMCPRequest('executeToolFunction', {
115 |       function: 'scrollElement',
116 |       arguments: {
117 |         selector: {
118 |           app_name: 'Arc',
119 |           locator: 'main'
120 |         },
121 |         direction: 'down',
122 |         amount: 100
123 |       }
124 |     }));
125 |     console.log('scroll response:', scrollResponse.data);
126 | 
127 |     // 9. Input control
128 |     console.log('\ntesting inputControl...');
129 |     const inputControlResponse = await axios.post(mcpUrl, createMCPRequest('executeToolFunction', {
130 |       function: 'inputControl',
131 |       arguments: {
132 |         action: {
133 |           type: 'KeyPress',
134 |           data: 'Escape'
135 |         }
136 |       }
137 |     }));
138 |     console.log('input control response:', inputControlResponse.data);
139 | 
140 |     console.log('\nall tests completed');
141 |   } catch (error) {
142 |     console.error('error during testing:', error.response?.data || error.message);
143 |   }
144 | }
145 | 
146 | testMCP();


--------------------------------------------------------------------------------
/mcp-client-nextjs/.env.example:
--------------------------------------------------------------------------------
1 | ANTHROPIC_API_KEY=sk-ant-XXXXXXXX


--------------------------------------------------------------------------------
/mcp-client-nextjs/.gitignore:
--------------------------------------------------------------------------------
 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
 2 | 
 3 | # dependencies
 4 | /node_modules
 5 | /.pnp
 6 | .pnp.*
 7 | .yarn/*
 8 | !.yarn/patches
 9 | !.yarn/plugins
10 | !.yarn/releases
11 | !.yarn/versions
12 | 
13 | # testing
14 | /coverage
15 | 
16 | # next.js
17 | /.next/
18 | /out/
19 | 
20 | # production
21 | /build
22 | 
23 | # misc
24 | .DS_Store
25 | *.pem
26 | 
27 | # debug
28 | npm-debug.log*
29 | yarn-debug.log*
30 | yarn-error.log*
31 | .pnpm-debug.log*
32 | 
33 | # env files (can opt-in for committing if needed)
34 | .env*
35 | !.env.example
36 | 
37 | # vercel
38 | .vercel
39 | 
40 | # typescript
41 | *.tsbuildinfo
42 | next-env.d.ts
43 | 


--------------------------------------------------------------------------------
/mcp-client-nextjs/README.md:
--------------------------------------------------------------------------------
1 | This is a NexJS app that serves as MCP Client


--------------------------------------------------------------------------------
/mcp-client-nextjs/eslint.config.mjs:
--------------------------------------------------------------------------------
 1 | import { dirname } from "path";
 2 | import { fileURLToPath } from "url";
 3 | import { FlatCompat } from "@eslint/eslintrc";
 4 | 
 5 | const __filename = fileURLToPath(import.meta.url);
 6 | const __dirname = dirname(__filename);
 7 | 
 8 | const compat = new FlatCompat({
 9 |   baseDirectory: __dirname,
10 | });
11 | 
12 | const eslintConfig = [
13 |   ...compat.extends("next/core-web-vitals", "next/typescript"),
14 | ];
15 | 
16 | export default eslintConfig;
17 | 


--------------------------------------------------------------------------------
/mcp-client-nextjs/next.config.ts:
--------------------------------------------------------------------------------
1 | import type { NextConfig } from "next";
2 | 
3 | const nextConfig: NextConfig = {
4 |   /* config options here */
5 | };
6 | 
7 | export default nextConfig;
8 | 


--------------------------------------------------------------------------------
/mcp-client-nextjs/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "hellow-world-mcp-client-nextjs",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "dev": "next dev --turbopack",
 7 |     "build": "next build",
 8 |     "start": "next start",
 9 |     "lint": "next lint"
10 |   },
11 |   "dependencies": {
12 |     "@anthropic-ai/sdk": "^0.39.0",
13 |     "@modelcontextprotocol/sdk": "^1.8.0",
14 |     "axios": "^1.8.4",
15 |     "dotenv": "^16.4.7",
16 |     "lucide-react": "^0.485.0",
17 |     "next": "15.2.4",
18 |     "react": "^19.0.0",
19 |     "react-dom": "^19.0.0"
20 |   },
21 |   "devDependencies": {
22 |     "@eslint/eslintrc": "^3",
23 |     "@tailwindcss/postcss": "^4",
24 |     "@types/node": "^20",
25 |     "@types/react": "^19",
26 |     "@types/react-dom": "^19",
27 |     "eslint": "^9",
28 |     "eslint-config-next": "15.2.4",
29 |     "tailwindcss": "^4",
30 |     "typescript": "^5"
31 |   }
32 | }
33 | 


--------------------------------------------------------------------------------
/mcp-client-nextjs/postcss.config.mjs:
--------------------------------------------------------------------------------
1 | const config = {
2 |   plugins: ["@tailwindcss/postcss"],
3 | };
4 | 
5 | export default config;
6 | 


--------------------------------------------------------------------------------
/mcp-client-nextjs/public/file.svg:
--------------------------------------------------------------------------------
1 | <svg fill="none" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg"><path d="M14.5 13.5V5.41a1 1 0 0 0-.3-.7L9.8.29A1 1 0 0 0 9.08 0H1.5v13.5A2.5 2.5 0 0 0 4 16h8a2.5 2.5 0 0 0 2.5-2.5m-1.5 0v-7H8v-5H3v12a1 1 0 0 0 1 1h8a1 1 0 0 0 1-1M9.5 5V2.12L12.38 5zM5.13 5h-.62v1.25h2.12V5zm-.62 3h7.12v1.25H4.5zm.62 3h-.62v1.25h7.12V11z" clip-rule="evenodd" fill="#666" fill-rule="evenodd"/></svg>


--------------------------------------------------------------------------------
/mcp-client-nextjs/public/globe.svg:
--------------------------------------------------------------------------------
1 | <svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><g clip-path="url(#a)"><path fill-rule="evenodd" clip-rule="evenodd" d="M10.27 14.1a6.5 6.5 0 0 0 3.67-3.45q-1.24.21-2.7.34-.31 1.83-.97 3.1M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16m.48-1.52a7 7 0 0 1-.96 0H7.5a4 4 0 0 1-.84-1.32q-.38-.89-.63-2.08a40 40 0 0 0 3.92 0q-.25 1.2-.63 2.08a4 4 0 0 1-.84 1.31zm2.94-4.76q1.66-.15 2.95-.43a7 7 0 0 0 0-2.58q-1.3-.27-2.95-.43a18 18 0 0 1 0 3.44m-1.27-3.54a17 17 0 0 1 0 3.64 39 39 0 0 1-4.3 0 17 17 0 0 1 0-3.64 39 39 0 0 1 4.3 0m1.1-1.17q1.45.13 2.69.34a6.5 6.5 0 0 0-3.67-3.44q.65 1.26.98 3.1M8.48 1.5l.01.02q.41.37.84 1.31.38.89.63 2.08a40 40 0 0 0-3.92 0q.25-1.2.63-2.08a4 4 0 0 1 .85-1.32 7 7 0 0 1 .96 0m-2.75.4a6.5 6.5 0 0 0-3.67 3.44 29 29 0 0 1 2.7-.34q.31-1.83.97-3.1M4.58 6.28q-1.66.16-2.95.43a7 7 0 0 0 0 2.58q1.3.27 2.95.43a18 18 0 0 1 0-3.44m.17 4.71q-1.45-.12-2.69-.34a6.5 6.5 0 0 0 3.67 3.44q-.65-1.27-.98-3.1" fill="#666"/></g><defs><clipPath id="a"><path fill="#fff" d="M0 0h16v16H0z"/></clipPath></defs></svg>


--------------------------------------------------------------------------------
/mcp-client-nextjs/public/next.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 394 80"><path fill="#000" d="M262 0h68.5v12.7h-27.2v66.6h-13.6V12.7H262V0ZM149 0v12.7H94v20.4h44.3v12.6H94v21h55v12.6H80.5V0h68.7zm34.3 0h-17.8l63.8 79.4h17.9l-32-39.7 32-39.6h-17.9l-23 28.6-23-28.6zm18.3 56.7-9-11-27.1 33.7h17.8l18.3-22.7z"/><path fill="#000" d="M81 79.3 17 0H0v79.3h13.6V17l50.2 62.3H81Zm252.6-.4c-1 0-1.8-.4-2.5-1s-1.1-1.6-1.1-2.6.3-1.8 1-2.5 1.6-1 2.6-1 1.8.3 2.5 1a3.4 3.4 0 0 1 .6 4.3 3.7 3.7 0 0 1-3 1.8zm23.2-33.5h6v23.3c0 2.1-.4 4-1.3 5.5a9.1 9.1 0 0 1-3.8 3.5c-1.6.8-3.5 1.3-5.7 1.3-2 0-3.7-.4-5.3-1s-2.8-1.8-3.7-3.2c-.9-1.3-1.4-3-1.4-5h6c.1.8.3 1.6.7 2.2s1 1.2 1.6 1.5c.7.4 1.5.5 2.4.5 1 0 1.8-.2 2.4-.6a4 4 0 0 0 1.6-1.8c.3-.8.5-1.8.5-3V45.5zm30.9 9.1a4.4 4.4 0 0 0-2-3.3 7.5 7.5 0 0 0-4.3-1.1c-1.3 0-2.4.2-3.3.5-.9.4-1.6 1-2 1.6a3.5 3.5 0 0 0-.3 4c.3.5.7.9 1.3 1.2l1.8 1 2 .5 3.2.8c1.3.3 2.5.7 3.7 1.2a13 13 0 0 1 3.2 1.8 8.1 8.1 0 0 1 3 6.5c0 2-.5 3.7-1.5 5.1a10 10 0 0 1-4.4 3.5c-1.8.8-4.1 1.2-6.8 1.2-2.6 0-4.9-.4-6.8-1.2-2-.8-3.4-2-4.5-3.5a10 10 0 0 1-1.7-5.6h6a5 5 0 0 0 3.5 4.6c1 .4 2.2.6 3.4.6 1.3 0 2.5-.2 3.5-.6 1-.4 1.8-1 2.4-1.7a4 4 0 0 0 .8-2.4c0-.9-.2-1.6-.7-2.2a11 11 0 0 0-2.1-1.4l-3.2-1-3.8-1c-2.8-.7-5-1.7-6.6-3.2a7.2 7.2 0 0 1-2.4-5.7 8 8 0 0 1 1.7-5 10 10 0 0 1 4.3-3.5c2-.8 4-1.2 6.4-1.2 2.3 0 4.4.4 6.2 1.2 1.8.8 3.2 2 4.3 3.4 1 1.4 1.5 3 1.5 5h-5.8z"/></svg>


--------------------------------------------------------------------------------
/mcp-client-nextjs/public/vercel.svg:
--------------------------------------------------------------------------------
1 | <svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1155 1000"><path d="m577.3 0 577.4 1000H0z" fill="#fff"/></svg>


--------------------------------------------------------------------------------
/mcp-client-nextjs/public/window.svg:
--------------------------------------------------------------------------------
1 | <svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path fill-rule="evenodd" clip-rule="evenodd" d="M1.5 2.5h13v10a1 1 0 0 1-1 1h-11a1 1 0 0 1-1-1zM0 1h16v11.5a2.5 2.5 0 0 1-2.5 2.5h-11A2.5 2.5 0 0 1 0 12.5zm3.75 4.5a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5M7 4.75a.75.75 0 1 1-1.5 0 .75.75 0 0 1 1.5 0m1.75.75a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5" fill="#666"/></svg>


--------------------------------------------------------------------------------
/mcp-client-nextjs/src/app/api/mcp/initialize/route.ts:
--------------------------------------------------------------------------------
 1 | import { NextResponse } from 'next/server';
 2 | import { desktopClient } from '@/lib/mcp/start-here';
 3 | import { checkMCPServer } from '@/lib/mcp/setup';
 4 | 
 5 | // Shared state can be moved to a separate file if needed
 6 | let isInitialized = false;
 7 | 
 8 | export async function GET() {
 9 |   // Skip actual MCP connection during server startup
10 |   if (process.env.NEXT_PHASE === 'phase-production-build') {
11 |     return NextResponse.json({ status: 'skipped-during-build' });
12 |   }
13 |   
14 |   try {
15 |     if (isInitialized) {
16 |       console.log('mcp client already initialized');
17 |       return NextResponse.json({ status: 'connected' });
18 |     }
19 |     
20 |     console.log('initializing mcp client connection...');
21 |     
22 |     // check if server is available
23 |     const serverRunning = await checkMCPServer();
24 |     if (!serverRunning) {
25 |       throw new Error('mcp server is not available');
26 |     }
27 |     
28 |     // connect to rust mcp server using ipv4
29 |     await desktopClient.connect('http://127.0.0.1:8080/mcp');
30 |     
31 |     // list available tools
32 |     await desktopClient.listTools();
33 |     
34 |     isInitialized = true;
35 |     console.log('mcp client initialized successfully');
36 |     
37 |     return NextResponse.json({ 
38 |       status: 'connected',
39 |       message: 'mcp client initialized successfully'
40 |     });
41 |   } catch (error) {
42 |     console.error('failed to initialize mcp client:', error);
43 |     return NextResponse.json(
44 |       { 
45 |         status: 'error',
46 |         error: `failed to initialize mcp client: ${error instanceof Error ? error.message : String(error)}` 
47 |       },
48 |       { status: 503 }
49 |     );
50 |   }
51 | }
52 | 


--------------------------------------------------------------------------------
/mcp-client-nextjs/src/app/api/mcp/logs/route.ts:
--------------------------------------------------------------------------------
 1 | import { NextResponse } from 'next/server';
 2 | import { logBuffer } from '../../../../lib/mcp/log-buffer';
 3 | 
 4 | export async function GET(request: Request) {
 5 |   const url = new URL(request.url);
 6 |   const since = url.searchParams.get('since');
 7 |   
 8 |   const logs = logBuffer.getLogs(since ? parseInt(since, 10) : undefined);
 9 |   
10 |   return NextResponse.json({ logs });
11 | }
12 | 


--------------------------------------------------------------------------------
/mcp-client-nextjs/src/app/api/mcp/query/route.ts:
--------------------------------------------------------------------------------
 1 | import { NextRequest, NextResponse } from 'next/server';
 2 | import { processUserQuery } from '@/lib/mcp/query-processing-engine';
 3 | import { desktopClient, log } from '@/lib/mcp/start-here';
 4 | import { checkMCPServer } from '@/lib/mcp/setup';
 5 | 
 6 | export async function POST(request: NextRequest) {
 7 |   try {
 8 |     // Parse the request body
 9 |     const body = await request.json();
10 |     const { query } = body;
11 |     
12 |     if (!query) {
13 |       return NextResponse.json(
14 |         { status: 'error', error: 'query is required' },
15 |         { status: 400 }
16 |       );
17 |     }
18 |     
19 |     log.info('received mcp query:', query);
20 |     
21 |     // Check if server is available
22 |     const serverRunning = await checkMCPServer();
23 |     if (!serverRunning) {
24 |       throw new Error('mcp server is not available');
25 |     }
26 |     
27 |     // Use the advanced query processing engine instead of direct client call
28 |     log.highlight('processing query through agent loop');
29 |     
30 |     try {
31 |       const response = await processUserQuery(query);
32 |       return NextResponse.json({ response });
33 |     } catch (error) {
34 |       log.error(`failed to process query: ${error.message}`);
35 |       
36 |       // Return proper error response with status code
37 |       return NextResponse.json(
38 |         { 
39 |           error: error.message,
40 |           status: 'error',
41 |           details: error.toString()
42 |         }, 
43 |         { status: 500 }
44 |       );
45 |     }
46 |   } catch (error) {
47 |     log.error(`error handling request: ${error}`);
48 |     return NextResponse.json({ error: 'Internal server error' }, { status: 500 });
49 |   }
50 | }
51 | 


--------------------------------------------------------------------------------
/mcp-client-nextjs/src/app/api/mcp/route.ts:
--------------------------------------------------------------------------------
 1 | import { NextRequest, NextResponse } from 'next/server';
 2 | import { desktopClient } from '@/lib/mcp/start-here';
 3 | import { processUserQuery } from '@/lib/mcp/query-processing-engine';
 4 | import { checkMCPServer } from '@/lib/mcp/setup';
 5 | 
 6 | let isInitialized = false;
 7 | 
 8 | async function initialize() {
 9 |   if (isInitialized) return true;
10 |   
11 |   console.log('initializing mcp client connection...');
12 |   
13 |   try {
14 |     // check if server is available
15 |     const serverRunning = await checkMCPServer();
16 |     if (!serverRunning) {
17 |       throw new Error('mcp server is not available');
18 |     }
19 |     
20 |     // connect to rust mcp server using ipv4
21 |     await desktopClient.connect('http://127.0.0.1:8080/mcp');
22 |     
23 |     // list available tools
24 |     await desktopClient.listTools();
25 |     
26 |     isInitialized = true;
27 |     console.log('mcp client initialized successfully');
28 |     return true;
29 |   } catch (error) {
30 |     console.error('failed to initialize mcp client:', error);
31 |     return false;
32 |   }
33 | }
34 | 
35 | export async function POST(request: NextRequest) {
36 |   try {
37 |     const initialized = await initialize();
38 |     if (!initialized) {
39 |       return NextResponse.json(
40 |         { error: 'failed to initialize mcp client' },
41 |         { status: 503 }
42 |       );
43 |     }
44 |     
45 |     const { query } = await request.json();
46 |     console.log('processing query:', query);
47 |     
48 |     if (!query) {
49 |       return NextResponse.json(
50 |         { error: 'query is required' },
51 |         { status: 400 }
52 |       );
53 |     }
54 |     
55 |     const response = await processUserQuery(query);
56 |     return NextResponse.json({ response });
57 |   } catch (error) {
58 |     console.error('error in mcp api route:', error);
59 |     return NextResponse.json(
60 |       { error: 'failed to process query' },
61 |       { status: 500 }
62 |     );
63 |   }
64 | }


--------------------------------------------------------------------------------
/mcp-client-nextjs/src/app/api/mcp/tools/route.ts:
--------------------------------------------------------------------------------
 1 | import { desktopClient } from '@/lib/mcp/start-here';
 2 | import { NextResponse } from 'next/server';
 3 | 
 4 | export async function GET() {
 5 |   try {
 6 |     // The listTools method already exists in your desktopClient
 7 |     const toolsResponse = await desktopClient.listTools();
 8 |     
 9 |     // Format the tools into a simple array of tool names
10 |     const toolNames = toolsResponse.tools.map((tool: any) => tool.name);
11 |     
12 |     console.log(`api/mcp/tools: returning ${toolNames.length} tools`);
13 |     
14 |     return NextResponse.json({ tools: toolNames });
15 |   } catch (error) {
16 |     console.error('failed to get tools:', error);
17 |     return NextResponse.json(
18 |       { error: error instanceof Error ? error.message : 'Unknown error' },
19 |       { status: 500 }
20 |     );
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/mcp-client-nextjs/src/app/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mediar-ai/MCP-server-client-computer-use-ai-sdk/4c5866be6b8f55702651cea91dc4b5849cd899b3/mcp-client-nextjs/src/app/favicon.ico


--------------------------------------------------------------------------------
/mcp-client-nextjs/src/app/globals.css:
--------------------------------------------------------------------------------
 1 | @import "tailwindcss";
 2 | 
 3 | :root {
 4 |   --background: #ffffff;
 5 |   --foreground: #171717;
 6 | }
 7 | 
 8 | .dark {
 9 |   --background: #0a0a0a;
10 |   --foreground: #ededed;
11 | }
12 | 
13 | @theme inline {
14 |   --color-background: var(--background);
15 |   --color-foreground: var(--foreground);
16 |   --font-sans: var(--font-geist-sans);
17 |   --font-mono: var(--font-geist-mono);
18 | }
19 | 
20 | body {
21 |   background: var(--background);
22 |   color: var(--foreground);
23 |   font-family: Arial, Helvetica, sans-serif;
24 | }
25 | 


--------------------------------------------------------------------------------
/mcp-client-nextjs/src/app/layout.tsx:
--------------------------------------------------------------------------------
 1 | import type { Metadata } from "next";
 2 | import { Geist, Geist_Mono } from "next/font/google";
 3 | import "./globals.css";
 4 | 
 5 | const geistSans = Geist({
 6 |   variable: "--font-geist-sans",
 7 |   subsets: ["latin"],
 8 | });
 9 | 
10 | const geistMono = Geist_Mono({
11 |   variable: "--font-geist-mono",
12 |   subsets: ["latin"],
13 | });
14 | 
15 | export const metadata: Metadata = {
16 |   title: "MCP Client",
17 |   description: "Model Context Protocol Client Interface",
18 | };
19 | 
20 | export default function RootLayout({
21 |   children,
22 | }: Readonly<{
23 |   children: React.ReactNode;
24 | }>) {
25 |   return (
26 |     <html lang="en" suppressHydrationWarning>
27 |       <body
28 |         className={`${geistSans.variable} ${geistMono.variable} antialiased`}
29 |       >
30 |         {children}
31 |       </body>
32 |     </html>
33 |   );
34 | }
35 | 


--------------------------------------------------------------------------------
/mcp-client-nextjs/src/app/page.tsx:
--------------------------------------------------------------------------------
1 | import MCPInterface from '@/components/mcp-interface';
2 | 
3 | export default function Home() {
4 |   return (
5 |     <main className="min-h-screen p-4">
6 |       <MCPInterface />
7 |     </main>
8 |   );
9 | }


--------------------------------------------------------------------------------
/mcp-client-nextjs/src/lib/mcp/log-buffer.ts:
--------------------------------------------------------------------------------
 1 | // Simple in-memory buffer to store logs for client retrieval
 2 | class LogBuffer {
 3 |     private logs: { timestamp: number; level: string; message: string }[] = [];
 4 |     private maxLogs = 1000; // Limit buffer size to prevent memory issues
 5 |   
 6 |     addLog(level: string, message: string) {
 7 |       this.logs.push({
 8 |         timestamp: Date.now(),
 9 |         level,
10 |         message
11 |       });
12 |       
13 |       // Trim old logs if buffer gets too large
14 |       if (this.logs.length > this.maxLogs) {
15 |         this.logs = this.logs.slice(-this.maxLogs);
16 |       }
17 |     }
18 |   
19 |     getLogs(since?: number): { timestamp: number; level: string; message: string }[] {
20 |       if (since) {
21 |         return this.logs.filter(log => log.timestamp > since);
22 |       }
23 |       return [...this.logs];
24 |     }
25 |   
26 |     clear() {
27 |       this.logs = [];
28 |     }
29 |   }
30 |   
31 |   // Export a singleton instance
32 |   export const logBuffer = new LogBuffer();
33 | 
34 |   // Export a utility function to clear logs when needed
35 |   export const clearLogs = () => {
36 |     console.log("clearing logs buffer");
37 |     logBuffer.clear();
38 |   };


--------------------------------------------------------------------------------
/mcp-client-nextjs/src/lib/mcp/setup.ts:
--------------------------------------------------------------------------------
 1 | // Browser/Next.js compatible version of setup
 2 | 
 3 | // Check if MCP server is running
 4 | export async function checkMCPServer() {
 5 |   try {
 6 |     console.log("checking mcp server connection...");
 7 |     
 8 |     // Direct connection to 127.0.0.1:8080
 9 |     const payload = {
10 |       jsonrpc: "2.0", 
11 |       id: "health-check",
12 |       method: "initialize",
13 |       params: {
14 |         clientInfo: {
15 |           name: "mcp-client-health-check",
16 |           version: "1.0.0"
17 |         },
18 |         capabilities: {}
19 |       }
20 |     };
21 |     
22 |     const response = await fetch('http://127.0.0.1:8080/mcp', {
23 |       method: 'POST',
24 |       headers: {
25 |         'Content-Type': 'application/json',
26 |       },
27 |       body: JSON.stringify(payload),
28 |     });
29 |     
30 |     if (!response.ok) {
31 |       throw new Error(`HTTP error: ${response.status}`);
32 |     }
33 |     
34 |     const data = await response.json();
35 |     
36 |     if (data.result) {
37 |       console.log("mcp server is running and responding properly");
38 |       return true;
39 |     }
40 |     
41 |     console.log("mcp server responded but with unexpected format");
42 |     return false;
43 |   } catch (error) {
44 |     console.error("failed to connect to mcp server:", error.message);
45 |     return false;
46 |   }
47 | }
48 | 
49 | // Setup environment - simplified for Next.js
50 | export async function setupEnvironment() {
51 |   // API keys should be handled through Next.js environment variables
52 |   // in .env.local files or deployment environment
53 |   
54 |   const serverRunning = await checkMCPServer();
55 |   if (!serverRunning) {
56 |     console.error("mcp server check failed - functionality may be limited");
57 |     // Don't exit process in Next.js - just return false
58 |     return false;
59 |   }
60 |   
61 |   return true;
62 | }


--------------------------------------------------------------------------------
/mcp-client-nextjs/src/lib/mcp/start-here.ts:
--------------------------------------------------------------------------------
  1 | import Anthropic from "@anthropic-ai/sdk";
  2 | import { logBuffer } from './log-buffer';
  3 | 
  4 | // enhanced logging utility with colors for better readability
  5 | export const log = {
  6 |   info: (msg: string, ...args: unknown[]) => {
  7 |     console.log(`\x1b[36m[info]\x1b[0m ${msg}`, ...args);
  8 |     logBuffer.addLog('info', formatLogMessage(msg, args));
  9 |   }, 
 10 |   success: (msg: string, ...args: unknown[]) => {
 11 |     console.log(`\x1b[32m[success]\x1b[0m ${msg}`, ...args);
 12 |     logBuffer.addLog('success', formatLogMessage(msg, args));
 13 |   },
 14 |   error: (msg: string, ...args: unknown[]) => {
 15 |     console.error(`\x1b[31m[error]\x1b[0m ${msg}`, ...args);
 16 |     logBuffer.addLog('error', formatLogMessage(msg, args));
 17 |   },
 18 |   warn: (msg: string, ...args: unknown[]) => {
 19 |     console.log(`\x1b[33m[warn]\x1b[0m ${msg}`, ...args);
 20 |     logBuffer.addLog('warn', formatLogMessage(msg, args));
 21 |   },
 22 |   debug: (msg: string, ...args: unknown[]) => {
 23 |     console.log(`\x1b[90m[debug]\x1b[0m ${msg}`, ...args);
 24 |     logBuffer.addLog('debug', formatLogMessage(msg, args));
 25 |   },
 26 |   // New logging methods for specific UI elements
 27 |   highlight: (msg: string, ...args: unknown[]) => {
 28 |     console.log(`\x1b[1m\x1b[35m${msg}\x1b[0m`, ...args);
 29 |     logBuffer.addLog('highlight', formatLogMessage(msg, args));
 30 |   },
 31 |   iteration: (msg: string, ...args: unknown[]) => {
 32 |     console.log(`\x1b[36m${msg}\x1b[0m`, ...args);
 33 |     logBuffer.addLog('iteration', formatLogMessage(msg, args));
 34 |   },
 35 |   response: (msg: string) => {
 36 |     console.log(`\n\x1b[1m\x1b[37mresponse:\x1b[0m ${msg}`);
 37 |     logBuffer.addLog('response', msg);
 38 |   },
 39 |   tool: (name: string, result: unknown) => {
 40 |     const truncatedResult = truncateJSON(result);
 41 |     if (typeof result === 'object' && result !== null && 'isError' in result) {
 42 |       console.log(`\x1b[31m[tool ${name}]\x1b[0m ${truncatedResult}`);
 43 |       logBuffer.addLog('tool-error', `[${name}] ${truncatedResult}`);
 44 |     } else {
 45 |       console.log(`\x1b[32m[tool ${name}]\x1b[0m ${truncatedResult}`);
 46 |       logBuffer.addLog('tool', `[${name}] ${truncatedResult}`);
 47 |     }
 48 |   }
 49 | };
 50 | 
 51 | // Helper functions
 52 | function formatLogMessage(msg: string, args: unknown[]): string {
 53 |   if (args.length === 0) return msg;
 54 |   
 55 |   try {
 56 |     const formattedArgs = args.map(arg => 
 57 |       typeof arg === 'object' ? truncateJSON(arg) : String(arg)
 58 |     ).join(' ');
 59 |     return `${msg} ${formattedArgs}`;
 60 |   } catch (e) {
 61 |     return `${msg} [args formatting error]`;
 62 |   }
 63 | }
 64 | 
 65 | function truncateJSON(obj: unknown, maxLength = 500): string {
 66 |   try {
 67 |     const str = JSON.stringify(obj);
 68 |     if (str.length <= maxLength) return str;
 69 |     return str.substring(0, maxLength) + '... [truncated]';
 70 |   } catch (e) {
 71 |     return '[unserializable object]';
 72 |   }
 73 | }
 74 | 
 75 | type MCPResponse = {
 76 |   result?: unknown;
 77 |   error?: string;
 78 | };
 79 | 
 80 | class DesktopControlClient {
 81 |   private connected = false;
 82 |   private serverUrl = "";
 83 |   private requestId = 0;
 84 |   private anthropic = new Anthropic();
 85 |   
 86 |   // Connect to the MCP server via http
 87 |   async connect(serverUrl: string) {
 88 |     log.info(`connecting to mcp server: ${serverUrl}`);
 89 |     
 90 |     try {
 91 |       this.serverUrl = serverUrl;
 92 |       const response = await this.makeRequest("initialize", {});
 93 |       
 94 |       if (response.result) {
 95 |         this.connected = true;
 96 |         log.success('mcp client session established successfully');
 97 |         return true;
 98 |       } else {
 99 |         log.error('failed to establish mcp client session:', response.error);
100 |         return false;
101 |       }
102 |     } catch (error) {
103 |       log.error('failed to establish mcp client session:', error);
104 |       return false;
105 |     }
106 |   }
107 |   
108 |   // Make a JSON-RPC request
109 |   private async makeRequest(method: string, params: Record<string, unknown>) {
110 |     const id = `request-${++this.requestId}`;
111 |     
112 |     const response = await fetch("http://127.0.0.1:8080/mcp", {
113 |       method: "POST",
114 |       headers: {
115 |         "Content-Type": "application/json"
116 |       },
117 |       body: JSON.stringify({
118 |         jsonrpc: "2.0",
119 |         id,
120 |         method,
121 |         params
122 |       })
123 |     });
124 |     
125 |     return await response.json() as MCPResponse;
126 |   }
127 |   
128 |   // Check if connected
129 |   isConnected(): boolean {
130 |     return this.connected;
131 |   }
132 |   
133 |   // List available tools
134 |   async listTools() {
135 |     if (!this.isConnected()) {
136 |       log.error('cannot list tools: not connected');
137 |       throw new Error('Not connected to MCP server');
138 |     }
139 |     
140 |     try {
141 |       // In standard MCP, this would be tools/list
142 |       // But our rust server exposes tools through initialize
143 |       const response = await this.makeRequest("initialize", {});
144 |       const tools = (response.result as { capabilities: { tools: { functions: unknown[] } } }).capabilities.tools.functions;
145 |       
146 |       // Create simplified view - one line per tool
147 |       log.info('available tools:');
148 |       tools.forEach((tool: Record<string, unknown>) => {
149 |         const params = tool.parameters as { properties?: Record<string, unknown> };
150 |         const propertyNames = Object.keys(params.properties || {}).join(', ');
151 |         log.debug(`- ${tool.name}: ${propertyNames}`);
152 |       });
153 |       
154 |       return { tools };
155 |     } catch (error) {
156 |       log.error('failed to list tools:', error);
157 |       throw error;
158 |     }
159 |   }
160 |   
161 |   // Call a tool
162 |   async callTool(name: string, args: Record<string, unknown>) {
163 |     if (!this.isConnected()) {
164 |       log.error('cannot call tool: not connected');
165 |       throw new Error('Not connected to MCP server');
166 |     }
167 |     
168 |     log.info(`calling tool "${name}" with args: ${JSON.stringify(args)}`);
169 |     
170 |     try {
171 |       const response = await this.makeRequest("executeToolFunction", {
172 |         function: name,
173 |         arguments: args
174 |       });
175 |       
176 |       // Check if result exists before logging
177 |       if (response && 'result' in response) {
178 |         log.tool(name, response.result);
179 |         return response.result;
180 |       } else {
181 |         log.tool(name, response); // Log the entire response if result is missing
182 |         return response; // Still return whatever we got
183 |       }
184 |     } catch (error) {
185 |       log.error(`error calling tool "${name}":`, error);
186 |       throw error;
187 |     }
188 |   }
189 |   
190 |   // Disconnect from the server
191 |   async disconnect() {
192 |     this.connected = false;
193 |     log.success('mcp client session closed');
194 |   }
195 | }
196 | 
197 | // Export an instance that can be used throughout your application
198 | export const desktopClient = new DesktopControlClient();
199 | 


--------------------------------------------------------------------------------
/mcp-client-nextjs/tailwind.config.js:
--------------------------------------------------------------------------------
 1 | /** @type {import('tailwindcss').Config} */
 2 | module.exports = {
 3 |   darkMode: 'class',
 4 |   content: [
 5 |     "./src/**/*.{js,ts,jsx,tsx}",
 6 |   ],
 7 |   theme: {
 8 |     extend: {},
 9 |   },
10 |   plugins: [],
11 | }
12 | 


--------------------------------------------------------------------------------
/mcp-client-nextjs/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2017",
 4 |     "lib": ["dom", "dom.iterable", "esnext"],
 5 |     "allowJs": true,
 6 |     "skipLibCheck": true,
 7 |     "strict": true,
 8 |     "noEmit": true,
 9 |     "esModuleInterop": true,
10 |     "module": "esnext",
11 |     "moduleResolution": "bundler",
12 |     "resolveJsonModule": true,
13 |     "isolatedModules": true,
14 |     "jsx": "preserve",
15 |     "incremental": true,
16 |     "plugins": [
17 |       {
18 |         "name": "next"
19 |       }
20 |     ],
21 |     "paths": {
22 |       "@/*": ["./src/*"]
23 |     }
24 |   },
25 |   "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
26 |   "exclude": ["node_modules"]
27 | }
28 | 


--------------------------------------------------------------------------------
/mcp-server-os-level/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "computer-use-ai-sdk"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | description = "Desktop UI automation through accessibility APIs"
 6 | authors = ["m13v, louis030195"]
 7 | repository = ""
 8 | 
 9 | [dependencies]
10 | # General dependencies
11 | tokio = { version = "1", features = ["full"] }
12 | anyhow = "1.0"
13 | tracing = "0.1"
14 | tracing-subscriber = "0.3"
15 | serde = { version = "1.0", features = ["derive"] }
16 | serde_json = "1.0"
17 | thiserror = "2.0"
18 | once_cell = "1.19"
19 | uuid = { version = "1.3", features = ["v4"] }
20 | chrono = "0.4"
21 | 
22 | # Server dependencies
23 | axum = "0.6.20"
24 | tower-http = { version = "0.4.0", features = ["cors", "trace"] }
25 | 
26 | # Common dependencies that might be needed based on original code
27 | log = "0.4"
28 | 
29 | [target.'cfg(target_os = "macos")'.dependencies]
30 | # macOS specific dependencies
31 | accessibility-sys = { git = "https://github.com/eiz/accessibility.git", branch = "master" }
32 | accessibility = { git = "https://github.com/eiz/accessibility.git", branch = "master" }
33 | objc = "0.2.7"
34 | objc-foundation = "0.1.1"
35 | core-foundation = "0.10.0"
36 | core-graphics = { version = "0.24.0", features = ["highsierra"] }
37 | 
38 | # Add Windows dependencies if needed
39 | [target.'cfg(target_os = "windows")'.dependencies]
40 | # windows crate with relevant features (if used)
41 | 
42 | # Add Linux dependencies if needed
43 | [target.'cfg(target_os = "linux")'.dependencies]
44 | # dbus, atspi, etc. (if used)
45 | 
46 | [[bin]]
47 | name = "server"
48 | path = "src/bin/server.rs"
49 | 
50 | [features]
51 | cargo-clippy = []
52 | 


--------------------------------------------------------------------------------
/mcp-server-os-level/examples/test_click_by_role.rs:
--------------------------------------------------------------------------------
  1 | use computer_use_ai_sdk::{Selector, UIElement};
  2 | use computer_use_ai_sdk::platforms::AccessibilityEngine;
  3 | use computer_use_ai_sdk::platforms::macos::{MacOSEngine, ClickMethodSelection};
  4 | use anyhow::Result;
  5 | use tracing::{debug, info};
  6 | 
  7 | fn main() -> Result<()> {
  8 |     // Add direct console output
  9 |     println!("program starting...");
 10 |     
 11 |     // Initialize tracing/logging with more verbosity
 12 |     tracing_subscriber::fmt()
 13 |         .with_max_level(tracing::Level::TRACE)
 14 |         .init();
 15 |     
 16 |     println!("tracing initialized");
 17 |     
 18 |     println!("creating accessibility engine...");
 19 |     info!("creating accessibility engine...");
 20 |     let engine = MacOSEngine::new(true, true)?;
 21 |     
 22 |     // Find Arc browser and focus it (similar to test_role.rs)
 23 |     info!("finding arc browser and focusing it...");
 24 |     
 25 |     let arc_app = match engine.get_application_by_name("Arc") {
 26 |         Ok(app) => {
 27 |             info!("found arc with direct search");
 28 |             app
 29 |         },
 30 |         Err(_) => {
 31 |             info!("direct search failed, trying app list");
 32 |             
 33 |             // Method 2: get all applications and filter
 34 |             let apps = engine.get_applications()?;
 35 |             info!("found {} applications", apps.len());
 36 |             
 37 |             // Find arc in the app list
 38 |             let arc = apps.into_iter().find(|app| {
 39 |                 app.attributes().label.as_ref().map_or(false, |label| 
 40 |                     label.contains("Arc"))
 41 |             });
 42 |             
 43 |             match arc {
 44 |                 Some(app) => {
 45 |                     info!("found arc in app list");
 46 |                     
 47 |                     // Try to bring it to focus
 48 |                     engine.refresh_accessibility_tree(Some("Arc"))?;
 49 |                     app
 50 |                 },
 51 |                 None => {
 52 |                     return Err(anyhow::anyhow!("couldn't find arc browser"));
 53 |                 }
 54 |             }
 55 |         }
 56 |     };
 57 |     
 58 |     info!("looking for first element containing 'whatsapp2llm'...");
 59 |     
 60 |     // Create a selector for any element with text
 61 |     let selector = Selector::Text("whatsapp2llm".to_string());
 62 |     
 63 |     // Find elements and take only the first match
 64 |     let elements = engine.find_elements(&selector, Some(&arc_app))?;
 65 |     info!("search found {} elements, using first match", elements.len());
 66 |     
 67 |     // Click the first matching element if found
 68 |     if let Some(element) = elements.first() {
 69 |         let attrs = element.attributes();
 70 |         info!("Found match: role={}, label={:?}", attrs.role, attrs.label);
 71 |         
 72 |         // Get position info if available
 73 |         if let Ok((x, y, width, height)) = element.bounds() {
 74 |             info!("  position: ({}, {}), size: ({}, {})", x, y, width, height);
 75 |         }
 76 |         
 77 |         info!("attempting to click element with text 'whatsapp2llm' using mouse simulation...");
 78 |         
 79 |         // Use mouse simulation specifically
 80 |         match element.click_with_method(ClickMethodSelection::MouseSimulation) {
 81 |             Ok(result) => {
 82 |                 info!("mouse simulation click successful");
 83 |                 info!("click details: {}", result.details);
 84 |                 
 85 |                 if let Some((x, y)) = result.coordinates {
 86 |                     info!("clicked at coordinates: ({:.1}, {:.1})", x, y);
 87 |                 }
 88 |                 
 89 |                 // Add delay between clicks
 90 |                 info!("waiting 500ms before second click...");
 91 |                 std::thread::sleep(std::time::Duration::from_millis(500));
 92 |                 
 93 |                 // Second click also with mouse simulation
 94 |                 match element.click_with_method(ClickMethodSelection::MouseSimulation) {
 95 |                     Ok(result2) => {
 96 |                         info!("second mouse simulation click successful");
 97 |                         info!("click details: {}", result2.details);
 98 |                         
 99 |                         if let Some((x, y)) = result2.coordinates {
100 |                             info!("second clicked at coordinates: ({:.1}, {:.1})", x, y);
101 |                         }
102 |                     },
103 |                     Err(e) => {
104 |                         info!("second click failed: {:?}", e);
105 |                     }
106 |                 }
107 |             },
108 |             Err(e) => {
109 |                 info!("first click failed: {:?}", e);
110 |             }
111 |         }
112 |     } else {
113 |         info!("no elements with 'whatsapp2llm' text found to click");
114 |     }
115 |     
116 |     Ok(())
117 | }
118 | 


--------------------------------------------------------------------------------
/mcp-server-os-level/examples/test_example.rs:
--------------------------------------------------------------------------------
  1 | use computer_use_ai_sdk::{Selector, UIElement};
  2 | use computer_use_ai_sdk::platforms::AccessibilityEngine;
  3 | use computer_use_ai_sdk::platforms::macos::MacOSEngine;
  4 | 
  5 | fn print_element_tree(element: &UIElement, depth: usize) {
  6 |     let attrs = element.attributes();
  7 |     let indent = "  ".repeat(depth);
  8 |     let label = attrs.label.unwrap_or_default();
  9 |     
 10 |     println!("{}role: {}, label: {}", indent, attrs.role, label);
 11 |     
 12 |     if depth < 3 { // limit depth for readability
 13 |         if let Ok(children) = element.children() {
 14 |             for child in children.iter().take(3) { // limit to 3 children 
 15 |                 print_element_tree(child, depth + 1);
 16 |             }
 17 |         }
 18 |     }
 19 | }
 20 | 
 21 | fn main() -> Result<(), Box<dyn std::error::Error>> {
 22 |     // create engine with default settings
 23 |     println!("creating accessibility engine...");
 24 |     let engine = MacOSEngine::new(true, true)?;
 25 |     
 26 |     // specifically find and focus the Arc browser
 27 |     println!("finding arc browser and focusing it...");
 28 |     
 29 |     // method 1: direct application search + focus  
 30 |     let arc_app = match engine.get_application_by_name("Arc") {
 31 |         Ok(app) => {
 32 |             println!("found arc with direct search");
 33 |             app
 34 |         },
 35 |         Err(_) => {
 36 |             println!("direct search failed, trying app list");
 37 |             
 38 |             // method 2: get all applications and filter
 39 |             let apps = engine.get_applications()?;
 40 |             println!("found {} applications", apps.len());
 41 |             
 42 |             // find arc in the app list
 43 |             let arc = apps.into_iter().find(|app| {
 44 |                 app.attributes().label.as_ref().map_or(false, |label| 
 45 |                     label.contains("Arc"))
 46 |             });
 47 |             
 48 |             match arc {
 49 |                 Some(app) => {
 50 |                     println!("found arc in app list");
 51 |                     
 52 |                     // try to bring it to focus
 53 |                     engine.refresh_accessibility_tree(Some("Arc"))?;
 54 |                     app
 55 |                 },
 56 |                 None => {
 57 |                     return Err("couldn't find arc browser".into());
 58 |                 }
 59 |             }
 60 |         }
 61 |     };
 62 |     
 63 |     println!("arc app info: {:?}", arc_app.attributes());
 64 |     
 65 |     // test with wildcard role
 66 |     let selector = Selector::Role { 
 67 |         role: "*".to_string(), 
 68 |         name: None 
 69 |     };
 70 |     
 71 |     println!("finding elements with wildcard role in arc...");
 72 |     
 73 |     // find elements within arc
 74 |     let elements = engine.find_elements(&selector, Some(&arc_app))?;
 75 |     println!("found {} elements with wildcard role", elements.len());
 76 |     
 77 |     // print info about first few
 78 |     for (i, element) in elements.iter().take(10).enumerate() {
 79 |         let attrs = element.attributes();
 80 |         println!("{}: role={}, label={:?}", i, attrs.role, attrs.label);
 81 |     }
 82 |     
 83 |     // get direct children as a comparison
 84 |     println!("\ngetting direct children of arc...");
 85 |     if let Ok(children) = arc_app.children() {
 86 |         println!("arc has {} direct children", children.len());
 87 |         
 88 |         // print first few children
 89 |         for (i, child) in children.iter().take(10).enumerate() {
 90 |             let attrs = child.attributes();
 91 |             println!("child {}: role={}, label={:?}", i, attrs.role, attrs.label);
 92 |             
 93 |             // try to get grandchildren for first couple of children
 94 |             if i < 2 {
 95 |                 if let Ok(grandchildren) = child.children() {
 96 |                     println!("  child {} has {} children", i, grandchildren.len());
 97 |                     
 98 |                     // print first few grandchildren
 99 |                     for (j, grandchild) in grandchildren.iter().take(3).enumerate() {
100 |                         let gc_attrs = grandchild.attributes();
101 |                         println!("    grandchild {}.{}: role={}, label={:?}", 
102 |                                  i, j, gc_attrs.role, gc_attrs.label);
103 |                     }
104 |                 }
105 |             }
106 |         }
107 |     }
108 |     
109 |     Ok(())
110 | }


--------------------------------------------------------------------------------
/mcp-server-os-level/examples/test_get_all_apps.rs:
--------------------------------------------------------------------------------
 1 | use tracing::{info, Level};
 2 | use tracing_subscriber::FmtSubscriber;
 3 | 
 4 | use computer_use_ai_sdk::platforms::macos::MacOSEngine;
 5 | use computer_use_ai_sdk::platforms::AccessibilityEngine;
 6 | 
 7 | fn main() -> Result<(), Box<dyn std::error::Error>> {
 8 |     // initialize logging
 9 |     let subscriber = FmtSubscriber::builder()
10 |         .with_max_level(Level::INFO)
11 |         .finish();
12 |     tracing::subscriber::set_global_default(subscriber)?;
13 | 
14 |     info!("fetching all running applications...");
15 |     
16 |     // create accessibility engine - true for use_background_apps, false for activate_app
17 |     let engine = MacOSEngine::new(true, false)?;
18 |     
19 |     // get all applications
20 |     let apps = engine.get_applications()?;
21 |     
22 |     info!("found {} applications", apps.len());
23 |     
24 |     // collect app details
25 |     let mut app_details = Vec::new();
26 |     for app in apps {
27 |         let attrs = app.attributes();
28 |         
29 |         // get process id if available
30 |         let pid = if let Some(Some(pid_value)) = attrs.properties.get("AXPid") {
31 |             if let Some(pid_str) = pid_value.as_str() {
32 |                 pid_str.parse::<i32>().ok()
33 |             } else {
34 |                 None
35 |             }
36 |         } else {
37 |             None
38 |         };
39 |         
40 |         app_details.push((attrs.label.unwrap_or_default(), pid, attrs.role));
41 |     }
42 |     
43 |     // sort by name for easier viewing
44 |     app_details.sort_by(|a, b| a.0.to_lowercase().cmp(&b.0.to_lowercase()));
45 |     
46 |     // print application details
47 |     info!("application details:");
48 |     for (i, (name, pid, role)) in app_details.iter().enumerate() {
49 |         info!("{}. '{}' (pid: {:?}, role: {})", i+1, name, pid, role);
50 |     }
51 |     
52 |     Ok(())
53 | }
54 | 


--------------------------------------------------------------------------------
/mcp-server-os-level/examples/test_get_arc_all_elements_custom.rs:
--------------------------------------------------------------------------------
  1 | use computer_use_ai_sdk::{Selector, UIElement};
  2 | use computer_use_ai_sdk::platforms::AccessibilityEngine;
  3 | use computer_use_ai_sdk::platforms::macos::MacOSEngine;
  4 | use std::collections::VecDeque;
  5 | use std::time::Instant;
  6 | 
  7 | // Recursive function to collect all UI elements using breadth-first traversal
  8 | fn collect_all_elements(root: &UIElement) -> Vec<UIElement> {
  9 |     let mut all_elements = Vec::new();
 10 |     let mut queue = VecDeque::new();
 11 |     queue.push_back(root.clone());
 12 |     
 13 |     let mut processed = 0;
 14 |     let start_time = Instant::now();
 15 |     
 16 |     while let Some(element) = queue.pop_front() {
 17 |         all_elements.push(element.clone());
 18 |         
 19 |         processed += 1;
 20 |         if processed % 100 == 0 {
 21 |             println!("processed {} elements so far ({:?} elapsed)", 
 22 |                      processed, start_time.elapsed());
 23 |         }
 24 |         
 25 |         if let Ok(children) = element.children() {
 26 |             for child in children {
 27 |                 queue.push_back(child);
 28 |             }
 29 |         }
 30 |     }
 31 |     
 32 |     println!("collected {} total elements in {:?}", 
 33 |              all_elements.len(), start_time.elapsed());
 34 |     all_elements
 35 | }
 36 | 
 37 | // Print summary of collected elements
 38 | fn print_element_stats(elements: &[UIElement]) {
 39 |     println!("\nelement statistics:");
 40 |     
 41 |     let mut role_counts = std::collections::HashMap::new();
 42 |     
 43 |     for element in elements {
 44 |         let attrs = element.attributes();
 45 |         let role = attrs.role.clone();
 46 |         *role_counts.entry(role).or_insert(0) += 1;
 47 |     }
 48 |     
 49 |     println!("found {} unique element roles", role_counts.len());
 50 |     
 51 |     // Sort roles by count (most frequent first)
 52 |     let mut roles: Vec<_> = role_counts.into_iter().collect();
 53 |     roles.sort_by(|a, b| b.1.cmp(&a.1));
 54 |     
 55 |     println!("top 10 element roles:");
 56 |     for (i, (role, count)) in roles.iter().take(10).enumerate() {
 57 |         println!("  {}: {} - {} instances", i+1, role, count);
 58 |     }
 59 | }
 60 | 
 61 | fn main() -> Result<(), Box<dyn std::error::Error>> {
 62 |     // create engine with default settings
 63 |     println!("creating accessibility engine...");
 64 |     let engine = MacOSEngine::new(true, true)?;
 65 |     
 66 |     // find arc browser
 67 |     println!("finding arc browser...");
 68 |     let arc_app = match engine.get_application_by_name("Arc") {
 69 |         Ok(app) => {
 70 |             println!("found arc with direct search");
 71 |             app
 72 |         },
 73 |         Err(_) => {
 74 |             println!("direct search failed, trying app list");
 75 |             
 76 |             // get all applications and filter
 77 |             let apps = engine.get_applications()?;
 78 |             println!("found {} applications", apps.len());
 79 |             
 80 |             // find arc in the app list
 81 |             let arc = apps.into_iter().find(|app| {
 82 |                 app.attributes().label.as_ref().map_or(false, |label| 
 83 |                     label.contains("Arc"))
 84 |             });
 85 |             
 86 |             match arc {
 87 |                 Some(app) => {
 88 |                     println!("found arc in app list");
 89 |                     
 90 |                     // try to bring it to focus
 91 |                     engine.refresh_accessibility_tree(Some("Arc"))?;
 92 |                     app
 93 |                 },
 94 |                 None => {
 95 |                     return Err("couldn't find arc browser".into());
 96 |                 }
 97 |             }
 98 |         }
 99 |     };
100 |     
101 |     println!("arc app info: {:?}", arc_app.attributes());
102 |     
103 |     // Get all elements from Arc
104 |     println!("collecting all elements from arc (this may take a while)...");
105 |     let all_elements = collect_all_elements(&arc_app);
106 |     
107 |     // Print statistics about the elements
108 |     print_element_stats(&all_elements);
109 |     
110 |     // Sample some elements to explore their structure
111 |     println!("\nsampling elements by depth:");
112 |     
113 |     // Some elements at different depth levels
114 |     let depths = [0, 1, 2, 3, 5, 10]; // different depths to explore
115 |     
116 |     // Build depth map
117 |     let mut elements_by_depth = std::collections::HashMap::new();
118 |     let mut queue = VecDeque::new();
119 |     queue.push_back((arc_app.clone(), 0)); // (element, depth)
120 |     
121 |     while let Some((element, depth)) = queue.pop_front() {
122 |         elements_by_depth.entry(depth).or_insert_with(Vec::new).push(element.clone());
123 |         
124 |         if let Ok(children) = element.children() {
125 |             for child in children {
126 |                 queue.push_back((child, depth + 1));
127 |             }
128 |         }
129 |     }
130 |     
131 |     // Display elements at each sample depth
132 |     for &depth in &depths {
133 |         if let Some(elements) = elements_by_depth.get(&depth) {
134 |             println!("\n--- depth {} ({} elements) ---", depth, elements.len());
135 |             
136 |             // Show first few elements at this depth
137 |             for (i, element) in elements.iter().take(3).enumerate() {
138 |                 let attrs = element.attributes();
139 |                 println!("  {}: role={}, label={:?}", i, attrs.role, attrs.label);
140 |                 
141 |             }
142 |         } else {
143 |             println!("\n--- depth {} (no elements) ---", depth);
144 |         }
145 |     }
146 |     
147 |     // Show elements with specific roles
148 |     println!("\nsearching for interesting element roles:");
149 |     let interesting_roles = ["AXButton", "AXTextField", "AXLink", "AXWebArea", "AXStaticText"];
150 |     
151 |     for role in interesting_roles {
152 |         let matching = all_elements.iter()
153 |             .filter(|e| e.attributes().role == role)
154 |             .collect::<Vec<_>>();
155 |         
156 |         println!("\nfound {} elements with role '{}'", matching.len(), role);
157 |         
158 |         // Show sample of these elements
159 |         for (i, element) in matching.iter().take(3).enumerate() {
160 |             let attrs = element.attributes();
161 |             println!("  {}: label={:?}", i, attrs.label);
162 |             
163 |             // Try to show description if available
164 |             if let Some(desc) = &attrs.description {
165 |                 println!("    description: {}", desc);
166 |             }
167 |             
168 |             // Try to show value if available
169 |             if let Some(val) = &attrs.value {
170 |                 println!("    value: {:?}", val);
171 |             }
172 |         }
173 |     }
174 |     
175 |     Ok(())
176 | }


--------------------------------------------------------------------------------
/mcp-server-os-level/examples/test_get_arc_all_elements_sdk_count.rs:
--------------------------------------------------------------------------------
 1 | use std::time::Instant;
 2 | use tracing::{debug, info, Level};
 3 | use tracing_subscriber::FmtSubscriber;
 4 | 
 5 | use computer_use_ai_sdk::platforms::macos::MacOSEngine;
 6 | use computer_use_ai_sdk::platforms::AccessibilityEngine;
 7 | use computer_use_ai_sdk::Selector;
 8 | 
 9 | fn main() -> Result<(), Box<dyn std::error::Error>> {
10 |     // Initialize logging with DEBUG level to see all logs
11 |     let subscriber = FmtSubscriber::builder()
12 |         .with_max_level(Level::DEBUG)  // Changed from INFO to DEBUG
13 |         .finish();
14 |     tracing::subscriber::set_global_default(subscriber)?;
15 | 
16 |     debug!("debug logging enabled");
17 |     info!("looking for arc browser...");
18 |     
19 |     // Create accessibility engine
20 |     let engine = MacOSEngine::new(true, false)?;
21 |     
22 |     // Get Arc browser application
23 |     let arc_app = match engine.get_application_by_name("Arc") {
24 |         Ok(app) => {
25 |             info!("found arc browser!");
26 |             app
27 |         }
28 |         Err(e) => {
29 |             info!("error finding arc browser: {:?} - is it running?", e);
30 |             return Ok(());
31 |         }
32 |     };
33 | 
34 |     info!("starting element collection...");
35 |     let start = Instant::now();
36 | 
37 |     // Use String::from("") for the text selector
38 |     debug!("calling find_elements with empty text selector");
39 |     let elements = engine.find_elements(&Selector::Text(String::from("")), Some(&arc_app))?;
40 |     
41 |     let duration = start.elapsed();
42 |     
43 |     info!("found {} elements in arc browser in {:?}", elements.len(), duration);
44 |         
45 |     Ok(())
46 | }
47 | 


--------------------------------------------------------------------------------
/mcp-server-os-level/examples/test_get_arc_interactable_elements.rs:
--------------------------------------------------------------------------------
  1 | use std::collections::{HashMap, HashSet};
  2 | use std::time::Instant;
  3 | use tracing::{debug, info, Level};
  4 | use tracing_subscriber::FmtSubscriber;
  5 | 
  6 | use computer_use_ai_sdk::platforms::macos::MacOSEngine;
  7 | use computer_use_ai_sdk::platforms::AccessibilityEngine;
  8 | use computer_use_ai_sdk::Selector;
  9 | 
 10 | fn main() -> Result<(), Box<dyn std::error::Error>> {
 11 |     // Initialize logging with DEBUG level to see all logs
 12 |     let subscriber = FmtSubscriber::builder()
 13 |         .with_max_level(Level::DEBUG)  
 14 |         .finish();
 15 |     tracing::subscriber::set_global_default(subscriber)?;
 16 | 
 17 |     debug!("debug logging enabled");
 18 |     info!("looking for arc browser...");
 19 |     
 20 |     // Define interactivity categories with original macOS casing
 21 |     let definitely_interactable: HashSet<&str> = [
 22 |         "AXButton", "AXMenuItem", "AXMenuBarItem", "AXCheckBox", "AXPopUpButton",
 23 |         "AXTextField", "AXTextArea", "AXComboBox", "AXLink", "AXScrollBar",
 24 |         "AXSlider", "AXRadioButtonGroup", "AXRadioButton", "AXSearchField",
 25 |         "AXTabGroup", "AXTabButton", "AXDisclosureButton", "AXStepper",
 26 |         "AXDisclosureTriangle", "AXIncrementor", "AXProgressIndicator"
 27 |     ].iter().cloned().collect();
 28 |     
 29 |     let sometimes_interactable: HashSet<&str> = [
 30 |         "AXImage", "AXCell", "AXSplitter", "AXRow", "AXStatusItem",
 31 |         "AXLevelIndicator", "AXColumnHeader", "AXRowHeader", "AXDocument",
 32 |         "AXDrawer", "AXOutline", "AXOutlineRow", "AXHandleElementProxy",
 33 |         "AXBrowser", "AXColumn", "AXGrid", "AXWebArea", "AXGenericElementProxy",
 34 |         "AXValueIndicator"
 35 |     ].iter().cloned().collect();
 36 |     
 37 |     // Create accessibility engine
 38 |     let engine = MacOSEngine::new(true, false)?;
 39 |     
 40 |     // Get Arc browser application
 41 |     let arc_app = match engine.get_application_by_name("Arc") {
 42 |         Ok(app) => {
 43 |             info!("found arc browser!");
 44 |             app
 45 |         }
 46 |         Err(e) => {
 47 |             info!("error finding arc browser: {:?} - is it running?", e);
 48 |             return Ok(());
 49 |         }
 50 |     };
 51 | 
 52 |     info!("starting element collection...");
 53 |     let start = Instant::now();
 54 | 
 55 |     // Use String::from("") for the text selector to get all elements
 56 |     debug!("calling find_elements with empty text selector");
 57 |     let elements = engine.find_elements(&Selector::Text(String::from("")), Some(&arc_app))?;
 58 |     
 59 |     let duration = start.elapsed();
 60 |     
 61 |     info!("found {} elements in arc browser in {:?}", elements.len(), duration);
 62 |     
 63 |     // Now organize elements by role
 64 |     let mut role_counts: HashMap<String, usize> = HashMap::new();
 65 |     
 66 |     // Count elements by role - keep original casing
 67 |     for element in &elements {
 68 |         let role = element.role(); // Remove .to_lowercase() to preserve original casing
 69 |         *role_counts.entry(role).or_insert(0) += 1;
 70 |     }
 71 |     
 72 |     // Create category counts
 73 |     let mut definitely_interactable_count = 0;
 74 |     let mut sometimes_interactable_count = 0;
 75 |     let mut non_interactable_count = 0;
 76 |     
 77 |     // Prepare categorized data for display
 78 |     let mut definitely_interactable_roles: Vec<(String, usize)> = Vec::new();
 79 |     let mut sometimes_interactable_roles: Vec<(String, usize)> = Vec::new();
 80 |     let mut non_interactable_roles: Vec<(String, usize)> = Vec::new();
 81 |     
 82 |     // Categorize each role and its count
 83 |     for (role, count) in &role_counts {
 84 |         if definitely_interactable.contains(role.as_str()) {
 85 |             definitely_interactable_roles.push((role.clone(), *count));
 86 |             definitely_interactable_count += count;
 87 |         } else if sometimes_interactable.contains(role.as_str()) {
 88 |             sometimes_interactable_roles.push((role.clone(), *count));
 89 |             sometimes_interactable_count += count;
 90 |         } else {
 91 |             non_interactable_roles.push((role.clone(), *count));
 92 |             non_interactable_count += count;
 93 |         }
 94 |     }
 95 |     
 96 |     // Sort each category by count (highest first)
 97 |     definitely_interactable_roles.sort_by(|a, b| b.1.cmp(&a.1));
 98 |     sometimes_interactable_roles.sort_by(|a, b| b.1.cmp(&a.1));
 99 |     non_interactable_roles.sort_by(|a, b| b.1.cmp(&a.1));
100 |     
101 |     // Display counts by category
102 |     info!("element interactivity breakdown:");
103 |     info!("  definitely interactable: {} elements ({:.1}%)", 
104 |           definitely_interactable_count, 
105 |           (definitely_interactable_count as f64 / elements.len() as f64) * 100.0);
106 |     
107 |     for (role, count) in &definitely_interactable_roles {
108 |         info!("    {}: {}", role, count);
109 |     }
110 |     
111 |     info!("  sometimes interactable: {} elements ({:.1}%)", 
112 |           sometimes_interactable_count,
113 |           (sometimes_interactable_count as f64 / elements.len() as f64) * 100.0);
114 |     
115 |     for (role, count) in &sometimes_interactable_roles {
116 |         info!("    {}: {}", role, count);
117 |     }
118 |     
119 |     info!("  non-interactable: {} elements ({:.1}%)", 
120 |           non_interactable_count,
121 |           (non_interactable_count as f64 / elements.len() as f64) * 100.0);
122 |     
123 |     for (role, count) in &non_interactable_roles {
124 |         info!("    {}: {}", role, count);
125 |     }
126 |     
127 |     Ok(())
128 | }


--------------------------------------------------------------------------------
/mcp-server-os-level/examples/test_get_arc_text_sdk.rs:
--------------------------------------------------------------------------------
 1 | use std::time::Instant;
 2 | use tracing::{info, Level};
 3 | use tracing_subscriber::FmtSubscriber;
 4 | 
 5 | use computer_use_ai_sdk::platforms::macos::MacOSEngine;
 6 | use computer_use_ai_sdk::platforms::AccessibilityEngine;
 7 | 
 8 | fn main() -> Result<(), Box<dyn std::error::Error>> {
 9 |     // Initialize logging
10 |     let subscriber = FmtSubscriber::builder()
11 |         .with_max_level(Level::INFO)
12 |         .finish();
13 |     tracing::subscriber::set_global_default(subscriber)?;
14 | 
15 |     info!("looking for arc browser...");
16 |     
17 |     // Create accessibility engine
18 |     let engine = MacOSEngine::new(true, false)?;
19 |     
20 |     // Get Arc browser application
21 |     let arc_app = match engine.get_application_by_name("Arc") {
22 |         Ok(app) => {
23 |             info!("found arc browser!");
24 |             app
25 |         }
26 |         Err(e) => {
27 |             info!("error finding arc browser: {:?} - is it running?", e);
28 |             return Ok(());
29 |         }
30 |     };
31 | 
32 |     info!("extracting text from arc browser...");
33 |     let start = Instant::now();
34 | 
35 |     // Get text directly using our improved method
36 |     let text = arc_app.text(10)?;
37 |     
38 |     let duration = start.elapsed();
39 |     
40 |     info!("extracted text from arc browser in {:?}", duration);
41 |     info!("text length: {} characters", text.len());
42 |     info!("text content:\n{}", text);
43 |     
44 |     Ok(())
45 | }


--------------------------------------------------------------------------------
/mcp-server-os-level/examples/test_get_messages_and_send_message.rs:
--------------------------------------------------------------------------------
  1 | use std::time::Instant;
  2 | use tracing::{debug, info, Level};
  3 | use tracing_subscriber::FmtSubscriber;
  4 | 
  5 | use computer_use_ai_sdk::platforms::macos::MacOSEngine;
  6 | use computer_use_ai_sdk::platforms::AccessibilityEngine;
  7 | use computer_use_ai_sdk::Selector;
  8 | 
  9 | fn main() -> Result<(), Box<dyn std::error::Error>> {
 10 |     // Initialize logging with DEBUG level to see all logs
 11 |     let subscriber = FmtSubscriber::builder()
 12 |         .with_max_level(Level::DEBUG)
 13 |         .finish();
 14 |     tracing::subscriber::set_global_default(subscriber)?;
 15 | 
 16 |     debug!("debug logging enabled");
 17 |     info!("looking for messages app...");
 18 |     
 19 |     // Create accessibility engine with activate_app set to true
 20 |     // This helps refresh the accessibility tree and ensures app focus
 21 |     let engine = MacOSEngine::new(true, true)?;
 22 |     
 23 |     // Get Messages application
 24 |     let messages_app = match engine.get_application_by_name("Messages") {
 25 |         Ok(app) => {
 26 |             info!("found messages app!");
 27 |             app
 28 |         }
 29 |         Err(e) => {
 30 |             info!("error finding messages app: {:?} - is it running?", e);
 31 |             return Ok(());
 32 |         }
 33 |     };
 34 | 
 35 |     info!("getting all elements from messages app...");
 36 |     let start = Instant::now();
 37 | 
 38 |     // Get all elements
 39 |     let all_elements = engine.find_elements(&Selector::Text(String::from("")), Some(&messages_app))?;
 40 |     
 41 |     info!("found {} total elements in messages app in {:?}", all_elements.len(), start.elapsed());
 42 |     
 43 |     // Define sets of definitely and sometimes interactable roles
 44 |     let definitely_interactable = [
 45 |         "AXButton", "AXMenuItem", "AXMenuBarItem", "AXCheckBox", "AXPopUpButton",
 46 |         "AXTextField", "AXTextArea", "AXComboBox", "AXLink", "AXScrollBar",
 47 |     ];
 48 |     
 49 |     let sometimes_interactable = [
 50 |         "AXImage", "AXCell", "AXSplitter", "AXRow", "AXStatusItem",
 51 |     ];
 52 |     
 53 |     // Filter for interactable elements with text
 54 |     let interactable_elements: Vec<_> = all_elements.iter()
 55 |         .enumerate()
 56 |         .filter(|(_, element)| {
 57 |             let role = element.role();
 58 |             let text = element.text(10).unwrap_or_default();
 59 |             
 60 |             // Check if it has text
 61 |             let has_text = !text.is_empty();
 62 |             
 63 |             // Check if it's interactable
 64 |             let is_interactable = definitely_interactable.contains(&role.as_str()) ||
 65 |                                  sometimes_interactable.contains(&role.as_str());
 66 |             
 67 |             has_text && is_interactable
 68 |         })
 69 |         .collect();
 70 |     
 71 |     info!("found {} interactable elements with text", interactable_elements.len());
 72 |     
 73 |     // Log the first 10 interactable elements
 74 |     for (i, (original_index, element)) in interactable_elements.iter().take(10).enumerate() {
 75 |         let role = element.role();
 76 |         let text = element.text(10).unwrap_or_default();
 77 |         
 78 |         // Truncate text if it's too long for logging
 79 |         let text_preview = if text.len() > 50 {
 80 |             format!("{}...", &text[..47])
 81 |         } else {
 82 |             text
 83 |         };
 84 |         
 85 |         info!("[{}] index={}, role={}, text={}", i, original_index, role, text_preview);
 86 |     }
 87 |     
 88 |     // Look for text input field specifically with "Message" text
 89 |     info!("looking for text field with 'Message' text...");
 90 |     let message_text_field = interactable_elements.iter()
 91 |         .find(|(_, element)| {
 92 |             let role = element.role();
 93 |             let text = element.text(10).unwrap_or_default();
 94 |             
 95 |             // Check if it's a text field/area containing "Message"
 96 |             (role == "textfield" || role == "textarea" || role == "AXTextField" || role == "AXTextArea") 
 97 |                 && text.contains("Message")
 98 |         });
 99 |     
100 |     if let Some((original_index, element)) = message_text_field {
101 |         info!("found message text field at original index: {}", original_index);
102 |         
103 |         // Type "hello world" with smile emoji
104 |         info!("typing message into text field...");
105 |         match element.type_text("hello world 😊") {
106 |             Ok(_) => info!("successfully typed message"),
107 |             Err(e) => info!("failed to type message: {:?}", e),
108 |         }
109 |         
110 |         // Brief pause to see the text
111 |         std::thread::sleep(std::time::Duration::from_millis(500));
112 |         
113 |         // Press return key to send the message
114 |         info!("pressing return key to send message...");
115 |         match element.press_key("return") {
116 |             Ok(_) => info!("successfully pressed return key"),
117 |             Err(e) => info!("failed to press return key: {:?}", e),
118 |         }
119 |     } else {
120 |         info!("no message text field found. trying another approach...");
121 |         
122 |         // Try using selector to find message text field directly
123 |         info!("searching for message text field using selector...");
124 |         match engine.find_element(&Selector::Text(String::from("Message")), Some(&messages_app)) {
125 |             Ok(element) => {
126 |                 info!("found message text field using selector");
127 |                 info!("typing message into text field...");
128 |                 match element.type_text("hello world 😊") {
129 |                     Ok(_) => info!("successfully typed message"),
130 |                     Err(e) => info!("failed to type message: {:?}", e),
131 |                 }
132 |                 
133 |                 std::thread::sleep(std::time::Duration::from_millis(500));
134 |                 
135 |                 info!("pressing return key to send message...");
136 |                 match element.press_key("return") {
137 |                     Ok(_) => info!("successfully pressed return key"),
138 |                     Err(e) => info!("failed to press return key: {:?}", e),
139 |                 }
140 |             },
141 |             Err(e) => {
142 |                 info!("failed to find message text field using selector: {:?}", e);
143 |                 
144 |                 // Fall back to original behavior if needed
145 |                 // Check if we have enough elements to access index 60 (original functionality)
146 |                 let target_index = 63;
147 |                 if interactable_elements.len() > target_index {
148 |                     let (original_index, element) = &interactable_elements[target_index];
149 |                     info!("pressing Return key on element at index {} (original index: {})", target_index, original_index);
150 |                     
151 |                     // Try to press Return key on the element
152 |                     match element.press_key("return") {
153 |                         Ok(_) => info!("successfully pressed Return key on element"),
154 |                         Err(e) => info!("failed to press Return key: {:?}", e),
155 |                     }
156 |                 } else {
157 |                     info!("not enough elements to access index {}, only have {}", target_index, interactable_elements.len());
158 |                 }
159 |             }
160 |         }
161 |     }
162 |     
163 |     info!("test completed successfully");
164 |     Ok(())
165 | }


--------------------------------------------------------------------------------
/mcp-server-os-level/examples/test_get_messages_text_sdk.rs:
--------------------------------------------------------------------------------
 1 | use std::time::Instant;
 2 | use tracing::{info, Level};
 3 | use tracing_subscriber::FmtSubscriber;
 4 | 
 5 | use computer_use_ai_sdk::platforms::macos::MacOSEngine;
 6 | use computer_use_ai_sdk::platforms::AccessibilityEngine;
 7 | 
 8 | fn main() -> Result<(), Box<dyn std::error::Error>> {
 9 |     // Initialize logging
10 |     let subscriber = FmtSubscriber::builder()
11 |         .with_max_level(Level::INFO)
12 |         .finish();
13 |     tracing::subscriber::set_global_default(subscriber)?;
14 | 
15 |     info!("looking for messages app...");
16 |     
17 |     // Create accessibility engine
18 |     let engine = MacOSEngine::new(true, false)?;
19 |     
20 |     // Get Messages application
21 |     let messages_app = match engine.get_application_by_name("Messages") {
22 |         Ok(app) => {
23 |             info!("found messages app!");
24 |             app
25 |         }
26 |         Err(e) => {
27 |             info!("error finding messages app: {:?} - is it running?", e);
28 |             return Ok(());
29 |         }
30 |     };
31 | 
32 |     info!("extracting text from messages app...");
33 |     let start = Instant::now();
34 | 
35 |     // Get text directly using our improved method
36 |     let text = messages_app.text(10)?;
37 |     
38 |     let duration = start.elapsed();
39 |     
40 |     info!("extracted text from messages app in {:?}", duration);
41 |     info!("text length: {} characters", text.len());
42 |     info!("text content:\n{}", text);
43 |     
44 |     Ok(())
45 | } 


--------------------------------------------------------------------------------
/mcp-server-os-level/src/bin/handlers/click_by_index.rs:
--------------------------------------------------------------------------------
  1 | use std::sync::Arc;
  2 | use axum::{
  3 |     extract::{Json, State},
  4 |     http::StatusCode,
  5 |     response::Json as JsonResponse,
  6 | };
  7 | use serde::Serialize;
  8 | use serde_json::json;
  9 | use tracing::{debug, error};
 10 | use computer_use_ai_sdk::Desktop;
 11 | 
 12 | use crate::types::{AppState, ClickByIndexRequest, ClickByIndexResponse, ListElementsAndAttributesResponse};
 13 | use crate::refresh_elements_and_attributes_after_action;
 14 | 
 15 | // Response type that combines both click result and elements
 16 | #[derive(Serialize)]
 17 | pub struct ClickByIndexWithElementsResponse {
 18 |     pub click: ClickByIndexResponse,
 19 |     pub elements: Option<ListElementsAndAttributesResponse>,
 20 | }
 21 | 
 22 | pub async fn click_by_index_handler(
 23 |     State(state): State<Arc<AppState>>,
 24 |     Json(request): Json<ClickByIndexRequest>,
 25 | ) -> Result<JsonResponse<ClickByIndexWithElementsResponse>, (StatusCode, JsonResponse<serde_json::Value>)> {
 26 |     // Get elements from cache
 27 |     let elements_opt = {
 28 |         let cache = state.element_cache.lock().await;
 29 |         cache.clone()
 30 |     };
 31 | 
 32 |     // Check if cache exists
 33 |     if elements_opt.is_none() {
 34 |         return Err((
 35 |             StatusCode::BAD_REQUEST,
 36 |             JsonResponse(json!({
 37 |                 "error": "no element cache found - you must call listInteractableElementsByIndex first to index the elements before using by-index operations"
 38 |             })),
 39 |         ));
 40 |     }
 41 | 
 42 |     match elements_opt {
 43 |         Some((elements, timestamp, app_name)) if timestamp.elapsed() < std::time::Duration::from_secs(30) => {
 44 |             // Use element_index directly
 45 |             if request.element_index < elements.len() {
 46 |                 let element = &elements[request.element_index];
 47 |                 
 48 |                 // Step 1: Try inputControl first (AppleScript) if bounds are available
 49 |                 let bounds = element.bounds();
 50 |                 let input_control_success = if let Ok((x, y, width, height)) = bounds {
 51 |                     debug!("attempting to click element at position [{}, {}] using inputControl", 
 52 |                           x + width/2.0, y + height/2.0);
 53 |                     
 54 |                     // Activate the app first
 55 |                     debug!("activating app: {}", app_name);
 56 |                     let desktop = match Desktop::new(false, true) {
 57 |                         Ok(d) => d,
 58 |                         Err(e) => {
 59 |                             error!("failed to initialize desktop automation: {}", e);
 60 |                             return Err((
 61 |                                 StatusCode::INTERNAL_SERVER_ERROR,
 62 |                                 JsonResponse(json!({
 63 |                                     "error": format!("failed to initialize desktop automation: {}", e)
 64 |                                 })),
 65 |                             ));
 66 |                         }
 67 |                     };
 68 | 
 69 |                     // Get and activate the application
 70 |                     let _ = match desktop.application(&app_name) {
 71 |                         Ok(app) => app,
 72 |                         Err(e) => {
 73 |                             error!("application not found: {}", e);
 74 |                             return Err((
 75 |                                 StatusCode::NOT_FOUND,
 76 |                                 JsonResponse(json!({
 77 |                                     "error": format!("application not found: {}", e)
 78 |                                 })),
 79 |                             ));
 80 |                         }
 81 |                     };
 82 |                     
 83 |                     // Calculate center of element
 84 |                     let center_x = x + width/2.0;
 85 |                     let center_y = y + height/2.0;
 86 |                     
 87 |                     use std::process::Command;
 88 |                     
 89 |                     // Use AppleScript to click at position
 90 |                     let script = format!(
 91 |                         "tell application \"System Events\" to click at {{round {}, round {}}}",
 92 |                         center_x, center_y
 93 |                     );
 94 |                     
 95 |                     match Command::new("osascript").arg("-e").arg(script).output() {
 96 |                         Ok(_) => {
 97 |                             debug!("successfully clicked element using inputControl at [{}, {}]",
 98 |                                   center_x, center_y);
 99 |                             true
100 |                         },
101 |                         Err(e) => {
102 |                             debug!("failed to click using inputControl: {} - falling back to accessibility API", e);
103 |                             false
104 |                         }
105 |                     }
106 |                 } else {
107 |                     debug!("could not get element bounds - skipping inputControl approach");
108 |                     false
109 |                 };
110 |                 
111 |                 // Step 2: If inputControl failed, use accessibility API as fallback
112 |                 if !input_control_success {
113 |                     debug!("using accessibility API for clicking");
114 |                     match element.click() {
115 |                         Ok(_) => {
116 |                             debug!("successfully clicked element using accessibility API");
117 |                         },
118 |                         Err(e) => {
119 |                             error!("failed to click element with accessibility API: {}", e);
120 |                             return Err((
121 |                                 StatusCode::INTERNAL_SERVER_ERROR,
122 |                                 JsonResponse(json!({
123 |                                     "error": format!("failed to click element using both inputControl and accessibility API: {}", e)
124 |                                 })),
125 |                             ));
126 |                         }
127 |                     }
128 |                 }
129 |                 
130 |                 // Create the success response based on which method worked
131 |                 let method_used = if input_control_success { "AppleScript" } else { "Accessibility API" };
132 |                 let click_response = ClickByIndexResponse {
133 |                     success: true,
134 |                     message: format!(
135 |                         "successfully clicked element with role: {} (using {} method)",
136 |                         element.role(), method_used
137 |                     ),
138 |                     elements: None,  // add the missing field
139 |                 };
140 |                 
141 |                 // Get refreshed elements using the helper function
142 |                 let elements_response = refresh_elements_and_attributes_after_action(state, app_name.clone(), 500).await;
143 |                 
144 |                 // Return combined response
145 |                 Ok(JsonResponse(ClickByIndexWithElementsResponse {
146 |                     click: click_response,
147 |                     elements: elements_response,
148 |                 }))
149 |             } else {
150 |                 error!(
151 |                     "element index out of bounds: {} (max: {})",
152 |                     request.element_index,
153 |                     elements.len() - 1
154 |                 );
155 |                 Err((
156 |                     StatusCode::BAD_REQUEST,
157 |                     JsonResponse(json!({
158 |                         "error": format!("element index out of bounds: {} (max: {})",
159 |                                         request.element_index, elements.len() - 1)
160 |                     })),
161 |                 ))
162 |             }
163 |         }
164 |         Some(_) => {
165 |             // Cache entry expired
166 |             Err((
167 |                 StatusCode::BAD_REQUEST,
168 |                 JsonResponse(json!({
169 |                     "error": "cache entry expired, please list elements again"
170 |                 })),
171 |             ))
172 |         }
173 |         None => {
174 |             // Cache miss
175 |             Err((
176 |                 StatusCode::NOT_FOUND,
177 |                 JsonResponse(json!({
178 |                     "error": "no cache entry found, please list elements again"
179 |                 })),
180 |             ))
181 |         }
182 |     }
183 | }
184 | 


--------------------------------------------------------------------------------
/mcp-server-os-level/src/bin/handlers/input_control.rs:
--------------------------------------------------------------------------------
  1 | use axum::{
  2 |     extract::{Json, State},
  3 |     http::StatusCode,
  4 |     response::Json as JsonResponse,
  5 | };
  6 | use serde_json;
  7 | use std::process::Command;
  8 | use std::sync::Arc;
  9 | use tokio::time::Duration;
 10 | use tracing::{error, info};
 11 | 
 12 | use crate::types::*;
 13 | use crate::AppState;
 14 | 
 15 | // Define the handler for input control
 16 | pub async fn input_control_handler(
 17 |     State(state): State<Arc<AppState>>,
 18 |     Json(payload): Json<InputControlRequest>,
 19 | ) -> Result<JsonResponse<InputControlWithElementsResponse>, (StatusCode, JsonResponse<serde_json::Value>)> {
 20 |     info!("input control handler {:?}", payload);
 21 |     
 22 |     // Execute appropriate input action
 23 |     match payload.action {
 24 |         InputAction::KeyPress(key) => {
 25 |             // Add key name to key code mapping
 26 |             let key_code = match key.as_str() {
 27 |                 "Tab" => "48",      // Tab key code
 28 |                 "Return" => "36",   // Enter/Return key code
 29 |                 "Space" => "49",    // Space key code
 30 |                 "Escape" => "53",   // Escape key code
 31 |                 // Add more key mappings as needed
 32 |                 _ => key.as_str(),  // Use as-is if it's already a number
 33 |             };
 34 |             
 35 |             let script = format!("tell application \"System Events\" to key code {}", key_code);
 36 |             info!("executing key press script: {}", script);
 37 |             if let Err(e) = Command::new("osascript").arg("-e").arg(script).output() {
 38 |                 error!("failed to press key: {}", e);
 39 |                 return Err((
 40 |                     StatusCode::INTERNAL_SERVER_ERROR,
 41 |                     JsonResponse(serde_json::json!({"error": format!("failed to press key: {}", e)})),
 42 |                 ));
 43 |             }
 44 |         }
 45 |         InputAction::MouseMove { x, y } => {
 46 |             // Implement mouse move
 47 |             let script = format!("tell application \"System Events\" to set mouse position to {{{}, {}}}", x, y);
 48 |             if let Err(e) = Command::new("osascript").arg("-e").arg(script).output() {
 49 |                 error!("failed to move mouse: {}", e);
 50 |                 return Err((
 51 |                     StatusCode::INTERNAL_SERVER_ERROR,
 52 |                     JsonResponse(serde_json::json!({"error": format!("failed to move mouse: {}", e)})),
 53 |                 ));
 54 |             }
 55 |         }
 56 |         InputAction::MouseClick(button) => {
 57 |             // Implement mouse click
 58 |             let button_num = match button.as_str() {
 59 |                 "left" => 1,
 60 |                 "right" => 2,
 61 |                 _ => {
 62 |                     error!("unsupported mouse button: {}", button);
 63 |                     return Err((
 64 |                         StatusCode::BAD_REQUEST,
 65 |                         JsonResponse(serde_json::json!({"error": format!("unsupported mouse button: {}", button)})),
 66 |                     ));
 67 |                 }
 68 |             };
 69 |             
 70 |             let script = format!("tell application \"System Events\" to click button {}", button_num);
 71 |             if let Err(e) = Command::new("osascript").arg("-e").arg(script).output() {
 72 |                 error!("failed to click mouse: {}", e);
 73 |                 return Err((
 74 |                     StatusCode::INTERNAL_SERVER_ERROR,
 75 |                     JsonResponse(serde_json::json!({"error": format!("failed to click mouse: {}", e)})),
 76 |                 ));
 77 |             }
 78 |         }
 79 |         InputAction::WriteText(text) => {
 80 |             // Implement text writing
 81 |             let script = format!("tell application \"System Events\" to keystroke \"{}\"", text);
 82 |             if let Err(e) = Command::new("osascript").arg("-e").arg(script).output() {
 83 |                 error!("failed to write text: {}", e);
 84 |                 return Err((
 85 |                     StatusCode::INTERNAL_SERVER_ERROR,
 86 |                     JsonResponse(serde_json::json!({"error": format!("failed to write text: {}", e)})),
 87 |                 ));
 88 |             }
 89 |         }
 90 |     }
 91 | 
 92 |     // Get elements from cache to find the active application
 93 |     let elements_response = {
 94 |         let cache = state.element_cache.lock().await;
 95 |         match &*cache {
 96 |             Some((_, _, cached_app_name)) => {
 97 |                 // We have a cached app name, so let's refresh elements
 98 |                 info!("refreshing elements for app: {}", cached_app_name);
 99 |                 refresh_elements_and_attributes_after_action(state.clone(), cached_app_name.clone(), 500).await
100 |             }
101 |             None => {
102 |                 // No cache available, don't try to refresh elements
103 |                 info!("no element cache found, skipping element refresh");
104 |                 None
105 |             }
106 |         }
107 |     };
108 |     
109 |     // Return combined response
110 |     Ok(JsonResponse(InputControlWithElementsResponse {
111 |         input: InputControlResponse { success: true },
112 |         elements: elements_response,
113 |     }))
114 | }
115 | 
116 | // Updated helper function to refresh elements after an action
117 | async fn refresh_elements_and_attributes_after_action(
118 |     state: Arc<AppState>, 
119 |     app_name: String,
120 |     delay_ms: u64
121 | ) -> Option<ListElementsAndAttributesResponse> {
122 |     // Small delay to allow UI to update after action
123 |     info!("waiting for ui to update after action before listing elements and attributes");
124 |     tokio::time::sleep(Duration::from_millis(delay_ms)).await;
125 |     
126 |     // Create request to refresh the elements list
127 |     let elements_request = ListInteractableElementsRequest {
128 |         app_name,
129 |         max_elements: None,
130 |         use_background_apps: Some(false),
131 |         activate_app: Some(true),
132 |     };
133 |     
134 |     // Call the new list elements handler
135 |     match crate::handlers::list_elements_and_attributes::list_elements_and_attributes_handler(State(state), Json(elements_request)).await {
136 |         Ok(response) => Some(response.0),
137 |         Err(e) => {
138 |             // Log the error but don't fail the whole request
139 |             error!("failed to list elements and attributes after action: {:?}", e);
140 |             None
141 |         }
142 |     }
143 | }
144 | 


--------------------------------------------------------------------------------
/mcp-server-os-level/src/bin/handlers/mod.rs:
--------------------------------------------------------------------------------
 1 | // Export all handlers
 2 | pub mod mcp;
 3 | pub mod click_by_index;
 4 | pub mod type_by_index;
 5 | pub mod press_key_by_index;
 6 | pub mod open_application;
 7 | pub mod open_url;
 8 | pub mod input_control;
 9 | pub mod utils;
10 | pub mod list_elements_and_attributes;
11 | 
12 | // No re-exports since they're not being used
13 | 


--------------------------------------------------------------------------------
/mcp-server-os-level/src/bin/handlers/open_application.rs:
--------------------------------------------------------------------------------
 1 | use std::sync::Arc;
 2 | use axum::{
 3 |     extract::{Json, State},
 4 |     http::StatusCode,
 5 |     response::Json as JsonResponse,
 6 | };
 7 | use serde::Serialize;
 8 | use serde_json::json;
 9 | use computer_use_ai_sdk::Desktop;
10 | 
11 | use crate::types::{AppState, OpenApplicationRequest, OpenApplicationResponse, ListElementsAndAttributesResponse};
12 | use crate::refresh_elements_and_attributes_after_action;
13 | 
14 | // Response type that combines both results
15 | #[derive(Serialize)]
16 | pub struct OpenApplicationWithElementsResponse {
17 |     pub application: OpenApplicationResponse,
18 |     pub elements: Option<ListElementsAndAttributesResponse>,
19 | }
20 | 
21 | pub async fn open_application_handler(
22 |     State(state): State<Arc<AppState>>,
23 |     Json(request): Json<OpenApplicationRequest>,
24 | ) -> Result<JsonResponse<OpenApplicationWithElementsResponse>, (StatusCode, JsonResponse<serde_json::Value>)> {
25 |     // Create Desktop automation instance
26 |     let desktop = match Desktop::new(false, true) {
27 |         Ok(desktop) => desktop,
28 |         Err(err) => {
29 |             return Err((
30 |                 StatusCode::INTERNAL_SERVER_ERROR,
31 |                 JsonResponse(json!({"error": format!("failed to initialize automation: {}", err)})),
32 |             ));
33 |         }
34 |     };
35 | 
36 |     // Open the application
37 |     match desktop.open_application(&request.app_name) {
38 |         Ok(_) => {
39 |             // Application opened successfully
40 |             let app_response = OpenApplicationResponse {
41 |                 success: true,
42 |                 message: format!("successfully opened application: {}", request.app_name),
43 |             };
44 |             
45 |             // Get refreshed elements using the helper function - use a longer delay for app startup
46 |             let mut elements_response = refresh_elements_and_attributes_after_action(state.clone(), request.app_name.clone(), 1000).await;
47 |             
48 |             // If elements retrieval failed, wait 500ms and retry once
49 |             if elements_response.is_none() {
50 |                 log::info!("elements retrieval failed for {}, retrying after 500ms", request.app_name);
51 |                 tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
52 |                 elements_response = refresh_elements_and_attributes_after_action(state, request.app_name.clone(), 500).await;
53 |                 
54 |                 if elements_response.is_none() {
55 |                     log::warn!("elements retrieval failed for {} even after retry", request.app_name);
56 |                 }
57 |             }
58 |             
59 |             // Return combined response
60 |             Ok(JsonResponse(OpenApplicationWithElementsResponse {
61 |                 application: app_response,
62 |                 elements: elements_response,
63 |             }))
64 |         },
65 |         Err(err) => Err((
66 |             StatusCode::BAD_REQUEST,
67 |             JsonResponse(json!({"error": format!("failed to open application: {}", err)})),
68 |         )),
69 |     }
70 | }
71 | 


--------------------------------------------------------------------------------
/mcp-server-os-level/src/bin/handlers/open_url.rs:
--------------------------------------------------------------------------------
  1 | use std::sync::Arc;
  2 | use axum::{
  3 |     extract::{Json, State},
  4 |     http::StatusCode,
  5 |     response::Json as JsonResponse,
  6 | };
  7 | use serde::{Deserialize, Serialize};
  8 | use serde_json::json;
  9 | use tracing::{debug, error, info};
 10 | use computer_use_ai_sdk::Desktop;
 11 | 
 12 | use crate::types::AppState;
 13 | use crate::handlers::utils::refresh_elements_and_attributes_after_action;
 14 | use crate::types::ListElementsAndAttributesResponse;
 15 | 
 16 | #[derive(Deserialize, Clone)]
 17 | pub struct OpenUrlRequest {
 18 |     pub url: String,
 19 |     pub browser: Option<String>,
 20 | }
 21 | 
 22 | #[derive(Serialize)]
 23 | pub struct OpenUrlResponse {
 24 |     pub success: bool,
 25 |     pub message: String,
 26 | }
 27 | 
 28 | // First, create a new response type that combines both results
 29 | #[derive(Serialize)]
 30 | pub struct OpenUrlWithElementsResponse {
 31 |     pub url: OpenUrlResponse,
 32 |     pub elements: Option<ListElementsAndAttributesResponse>,
 33 | }
 34 | 
 35 | pub async fn open_url_handler(
 36 |     State(state): State<Arc<AppState>>,
 37 |     Json(request): Json<OpenUrlRequest>,
 38 | ) -> Result<JsonResponse<OpenUrlWithElementsResponse>, (StatusCode, JsonResponse<serde_json::Value>)> {
 39 |     info!("handling request to open url: {}", request.url);
 40 |     
 41 |     // Create Desktop automation instance
 42 |     let desktop = match Desktop::new(false, true) {
 43 |         Ok(desktop) => desktop,
 44 |         Err(err) => {
 45 |             error!("failed to initialize automation: {}", err);
 46 |             return Err((
 47 |                 StatusCode::INTERNAL_SERVER_ERROR,
 48 |                 JsonResponse(json!({"error": format!("failed to initialize automation: {}", err)})),
 49 |             ));
 50 |         }
 51 |     };
 52 | 
 53 |     // Open the URL
 54 |     let browser_ref = request.browser.as_deref();
 55 |     
 56 |     if let Some(browser) = browser_ref {
 57 |         debug!("opening url {} in specified browser: {}", request.url, browser);
 58 |     } else {
 59 |         debug!("opening url {} in system default browser", request.url);
 60 |     }
 61 |     
 62 |     match desktop.open_url(&request.url, browser_ref) {
 63 |         Ok(_) => {
 64 |             // Wait for browser to start/activate
 65 |             tokio::time::sleep(tokio::time::Duration::from_millis(800)).await;
 66 |             
 67 |             // Determine which browser to use for refreshing elements
 68 |             let browser_for_refresh: Option<String> = if let Some(browser) = &request.browser {
 69 |                 // If user specified a browser, use that
 70 |                 info!("using specified browser for refresh: {}", browser);
 71 |                 
 72 |                 // Map common browser names to possible variations
 73 |                 let browser_search = match browser.as_str() {
 74 |                     "Google Chrome" => "Chrome",
 75 |                     "Microsoft Edge" => "Edge",
 76 |                     _ => browser.as_str(),
 77 |                 };
 78 |                 
 79 |                 debug!("searching for browser as: {}", browser_search);
 80 |                 
 81 |                 if desktop.application(browser_search).is_ok() {
 82 |                     info!("found browser with name: {}", browser_search);
 83 |                     Some(browser_search.to_string())
 84 |                 } else {
 85 |                     info!("could not find browser with name: {}", browser_search);
 86 |                     None
 87 |                 }
 88 |             } else {
 89 |                 // Try to detect which browser is running
 90 |                 let likely_browsers = ["Arc", "Safari", "Chrome", "Firefox", "Edge", "Opera", "Brave"];
 91 |                 let mut detected = None;
 92 |                 
 93 |                 for browser in likely_browsers.iter() {
 94 |                     match desktop.application(browser) {
 95 |                         Ok(_) => {
 96 |                             info!("detected browser for refresh: {}", browser);
 97 |                             detected = Some(browser.to_string());
 98 |                             break;
 99 |                         },
100 |                         Err(_) => continue,
101 |                     }
102 |                 }
103 |                 
104 |                 // If we couldn't detect a specific browser, we don't do element refresh
105 |                 if detected.is_none() {
106 |                     info!("could not detect which browser was used - skipping element refresh");
107 |                 }
108 |                 
109 |                 detected
110 |             };
111 |             
112 |             info!("successfully opened url: {}", request.url);
113 |             
114 |             // Create success response
115 |             let url_response = OpenUrlResponse {
116 |                 success: true,
117 |                 message: if let Some(browser) = &browser_for_refresh {
118 |                     format!("successfully opened URL: {} in browser: {}", request.url, browser)
119 |                 } else {
120 |                     format!("successfully opened URL: {} in default browser (unknown)", request.url)
121 |                 },
122 |             };
123 |             
124 |             // Only attempt to refresh elements if we know which browser to target
125 |             let elements_response = if let Some(browser) = browser_for_refresh {
126 |                 refresh_elements_and_attributes_after_action(state, browser, 2000).await
127 |             } else {
128 |                 // If we don't know which browser was used, don't try to refresh elements
129 |                 None
130 |             };
131 |             
132 |             // Return combined response
133 |             Ok(JsonResponse(OpenUrlWithElementsResponse {
134 |                 url: url_response,
135 |                 elements: elements_response,
136 |             }))
137 |         },
138 |         Err(err) => {
139 |             error!("failed to open url {}: {}", request.url, err);
140 |             Err((
141 |                 StatusCode::BAD_REQUEST,
142 |                 JsonResponse(json!({"error": format!("failed to open URL: {}", err)})),
143 |             ))
144 |         },
145 |     }
146 | }
147 | /*
148 | 
149 | curl -X POST http://localhost:8080/api/open-url \
150 |   -H "Content-Type: application/json" \
151 |   -d '{"url": "https://twitter.com"}' \
152 |   | jq -r '"url opening:",
153 |     "  success: \(.url.success)",
154 |     "  message: \(.url.message)",
155 |     "\nelements: \(if .elements then
156 |       if .elements.elements then
157 |         .elements.elements | map("\n  [\(.index)]: \(.role)\(if .text then " \"\(.text)\"" else "" end)") | join("")
158 |       else
159 |         "\n  no elements found"
160 |       end
161 |     else
162 |       "\n  no elements info available"
163 |     end)",
164 |     "\nstats summary: \(if .elements then
165 |       "\n  count: \(.elements.stats.count)",
166 |       "  with_text_count: \(.elements.stats.with_text_count)",
167 |       "  without_text_count: \(.elements.stats.without_text_count)",
168 |       "  excluded_count: \(.elements.stats.excluded_count)",
169 |       "  processing time: \(.elements.processing_time_seconds)s",
170 |       "  cache_id: \(.elements.cache_info.cache_id)",
171 |       "  expires_at: \(.elements.cache_info.expires_at)",
172 |       "  element_count: \(.elements.cache_info.element_count)"
173 |     else
174 |       "\n  no stats available"
175 |     end)"'
176 |   
177 | */


--------------------------------------------------------------------------------
/mcp-server-os-level/src/bin/handlers/press_key_by_index.rs:
--------------------------------------------------------------------------------
  1 | use std::sync::Arc;
  2 | use axum::{
  3 |     extract::{Json, State},
  4 |     http::StatusCode,
  5 |     response::Json as JsonResponse,
  6 | };
  7 | use serde::Serialize;
  8 | use serde_json::json;
  9 | use tracing::{debug, error};
 10 | use computer_use_ai_sdk::Desktop;
 11 | 
 12 | use crate::types::{AppState, PressKeyByIndexRequest, PressKeyByIndexResponse, ListElementsAndAttributesResponse};
 13 | use crate::refresh_elements_and_attributes_after_action;
 14 | 
 15 | // Response type that combines both results
 16 | #[derive(Debug, Serialize)]
 17 | pub struct PressKeyByIndexWithElementsResponse {
 18 |     pub press_key: PressKeyByIndexResponse,
 19 |     pub elements: Option<ListElementsAndAttributesResponse>,
 20 | }
 21 | 
 22 | pub async fn press_key_by_index_handler(
 23 |     State(state): State<Arc<AppState>>,
 24 |     Json(request): Json<PressKeyByIndexRequest>,
 25 | ) -> Result<JsonResponse<PressKeyByIndexWithElementsResponse>, (StatusCode, JsonResponse<serde_json::Value>)> {
 26 |     debug!("pressing key combination by index: element_index={}, key_combo={}", 
 27 |         request.element_index, request.key_combo);
 28 | 
 29 |     // Get elements from cache
 30 |     let elements_opt = {
 31 |         let cache = state.element_cache.lock().await;
 32 |         cache.clone()
 33 |     };
 34 | 
 35 |     // Check if cache exists
 36 |     if elements_opt.is_none() {
 37 |         return Err((
 38 |             StatusCode::BAD_REQUEST,
 39 |             JsonResponse(json!({
 40 |                 "error": "no element cache found - you must call listInteractableElementsByIndex first to index the elements before using by-index operations"
 41 |             })),
 42 |         ));
 43 |     }
 44 | 
 45 |     match elements_opt {
 46 |         Some((elements, timestamp, app_name)) if timestamp.elapsed() < std::time::Duration::from_secs(30) => {
 47 |             // Activate the app first
 48 |             debug!("activating app: {}", app_name);
 49 |             let desktop = match Desktop::new(false, true) {
 50 |                 Ok(d) => d,
 51 |                 Err(e) => {
 52 |                     error!("failed to initialize desktop automation: {}", e);
 53 |                     return Err((
 54 |                         StatusCode::INTERNAL_SERVER_ERROR,
 55 |                         JsonResponse(json!({
 56 |                             "error": format!("failed to initialize desktop automation: {}", e)
 57 |                         })),
 58 |                     ));
 59 |                 }
 60 |             };
 61 | 
 62 |             // Get and activate the application
 63 |             let _ = match desktop.application(&app_name) {
 64 |                 Ok(app) => app,
 65 |                 Err(e) => {
 66 |                     error!("application not found: {}", e);
 67 |                     return Err((
 68 |                         StatusCode::NOT_FOUND,
 69 |                         JsonResponse(json!({
 70 |                             "error": format!("application not found: {}", e)
 71 |                         })),
 72 |                     ));
 73 |                 }
 74 |             };
 75 | 
 76 |             // Use element_index directly
 77 |             if request.element_index < elements.len() {
 78 |                 let element = &elements[request.element_index];
 79 |                 
 80 |                 // Step 1: Try to click the element first to focus it
 81 |                 if let Err(e) = element.click() {
 82 |                     debug!("failed to click element before key press: {}", e);
 83 |                     // Continue anyway
 84 |                 }
 85 |                 
 86 |                 // Small delay to ensure element is focused
 87 |                 std::thread::sleep(std::time::Duration::from_millis(100));
 88 |                 
 89 |                 // Step 2: Try inputControl first (AppleScript)
 90 |                 debug!("attempting to press key '{}' using inputControl (AppleScript)", request.key_combo);
 91 |                 
 92 |                 use std::process::Command;
 93 |                 
 94 |                 // Convert key combo to AppleScript format
 95 |                 let key_script = convert_key_combo_to_applescript(&request.key_combo);
 96 |                 
 97 |                 let input_control_success = match Command::new("osascript").arg("-e").arg(key_script).output() {
 98 |                     Ok(_) => {
 99 |                         debug!("successfully pressed key '{}' using inputControl", request.key_combo);
100 |                         true
101 |                     },
102 |                     Err(e) => {
103 |                         debug!("failed to press key using inputControl: {} - falling back to accessibility API", e);
104 |                         false
105 |                     }
106 |                 };
107 |                 
108 |                 // Step 3: If inputControl failed, use accessibility API as fallback
109 |                 if !input_control_success {
110 |                     debug!("falling back to accessibility API for key press");
111 |                     match element.press_key(&request.key_combo) {
112 |                         Ok(_) => {
113 |                             debug!("successfully pressed key '{}' using accessibility API", request.key_combo);
114 |                         },
115 |                         Err(e) => {
116 |                             error!("failed to press key on element with accessibility API: {}", e);
117 |                             return Err((
118 |                                 StatusCode::INTERNAL_SERVER_ERROR,
119 |                                 JsonResponse(json!({
120 |                                     "error": format!("failed to press key using both inputControl and accessibility API: {}", e)
121 |                                 })),
122 |                             ));
123 |                         }
124 |                     }
125 |                 }
126 |                 
127 |                 // Create the success response based on which method worked
128 |                 let method_used = if input_control_success { "AppleScript" } else { "Accessibility API" };
129 |                 let press_key_response = PressKeyByIndexResponse {
130 |                     success: true,
131 |                     message: format!(
132 |                         "successfully pressed key combination '{}' on element with role: {} (using {} method)",
133 |                         request.key_combo,
134 |                         element.role(),
135 |                         method_used
136 |                     ),
137 |                 };
138 |                 
139 |                 // Get refreshed elements using the helper function
140 |                 let elements_response = refresh_elements_and_attributes_after_action(state, app_name.clone(), 500).await;
141 |                 
142 |                 // Return combined response
143 |                 Ok(JsonResponse(PressKeyByIndexWithElementsResponse {
144 |                     press_key: press_key_response,
145 |                     elements: elements_response,
146 |                 }))
147 |             } else {
148 |                 error!(
149 |                     "element index out of bounds: {} (max: {})",
150 |                     request.element_index,
151 |                     elements.len() - 1
152 |                 );
153 |                 Err((
154 |                     StatusCode::BAD_REQUEST,
155 |                     JsonResponse(json!({
156 |                         "error": format!("element index out of bounds: {} (max: {})",
157 |                                        request.element_index, elements.len() - 1)
158 |                     })),
159 |                 ))
160 |             }
161 |         }
162 |         Some(_) => {
163 |             // Cache entry expired
164 |             Err((
165 |                 StatusCode::BAD_REQUEST,
166 |                 JsonResponse(json!({
167 |                     "error": "cache entry expired, please list elements again"
168 |                 })),
169 |             ))
170 |         }
171 |         None => {
172 |             // Cache miss
173 |             Err((
174 |                 StatusCode::NOT_FOUND,
175 |                 JsonResponse(json!({
176 |                     "error": "no cache entry found, please list elements again"
177 |                 })),
178 |             ))
179 |         }
180 |     }
181 | }
182 | 
183 | // Helper function to convert key combo to AppleScript format
184 | fn convert_key_combo_to_applescript(key_combo: &str) -> String {
185 |     // Split the key combo by "+" to handle modifiers
186 |     let parts: Vec<&str> = key_combo.split('+').collect();
187 |     
188 |     // Last part is usually the main key
189 |     let main_key = parts.last().unwrap_or(&"").trim();
190 |     
191 |     // Check for modifiers
192 |     let has_command = parts.iter().any(|p| p.trim().eq_ignore_ascii_case("command") || p.trim().eq_ignore_ascii_case("cmd"));
193 |     let has_shift = parts.iter().any(|p| p.trim().eq_ignore_ascii_case("shift"));
194 |     let has_option = parts.iter().any(|p| p.trim().eq_ignore_ascii_case("option") || p.trim().eq_ignore_ascii_case("alt")); 
195 |     let has_control = parts.iter().any(|p| p.trim().eq_ignore_ascii_case("control") || p.trim().eq_ignore_ascii_case("ctrl"));
196 |     
197 |     // For special keys like Return, Tab, etc.
198 |     let special_key_mapping = match main_key.to_lowercase().as_str() {
199 |         "return" | "enter" => "return",
200 |         "tab" => "tab",
201 |         "escape" | "esc" => "escape",
202 |         "backspace" | "delete" => "delete",
203 |         "space" => "space",
204 |         "down" | "downarrow" => "down arrow",
205 |         "up" | "uparrow" => "up arrow",
206 |         "left" | "leftarrow" => "left arrow",
207 |         "right" | "rightarrow" => "right arrow",
208 |         _ => main_key,  // use as is for regular keys
209 |     };
210 |     
211 |     // Build the AppleScript
212 |     let mut script = String::from("tell application \"System Events\" to ");
213 |     
214 |     // For simple one-character keys
215 |     if special_key_mapping.len() == 1 && !has_command && !has_shift && !has_option && !has_control {
216 |         script.push_str(&format!("keystroke \"{}\"", special_key_mapping));
217 |     } else {
218 |         // For key combinations or special keys
219 |         script.push_str("key code ");
220 |         
221 |         // Map the key to AppleScript key code or use the name for special keys
222 |         match special_key_mapping {
223 |             "return" => script.push_str("36"),
224 |             "tab" => script.push_str("48"),
225 |             "escape" => script.push_str("53"),
226 |             "delete" => script.push_str("51"),
227 |             "space" => script.push_str("49"),
228 |             "down arrow" => script.push_str("125"),
229 |             "up arrow" => script.push_str("126"),
230 |             "left arrow" => script.push_str("123"),
231 |             "right arrow" => script.push_str("124"),
232 |             _ => {
233 |                 // For single character keys
234 |                 if special_key_mapping.len() == 1 {
235 |                     // Get ASCII value
236 |                     let c = special_key_mapping.chars().next().unwrap();
237 |                     // This is a simplification - a proper implementation would map characters to key codes
238 |                     // For letters, lowercase ASCII - 'a' + 0 would work
239 |                     if c.is_ascii_lowercase() {
240 |                         script.push_str(&format!("{}", (c as u8 - b'a') + 0));
241 |                     } else if c.is_ascii_uppercase() {
242 |                         script.push_str(&format!("{}", (c as u8 - b'A') + 0));
243 |                     } else {
244 |                         // This is a placeholder - you'd need a full mapping for all characters
245 |                         script.push_str(&format!("\"{}\"", c));
246 |                     }
247 |                 } else {
248 |                     // For anything else, default to keystroke
249 |                     script = format!("tell application \"System Events\" to keystroke \"{}\"", special_key_mapping);
250 |                 }
251 |             }
252 |         }
253 |         
254 |         // Add modifiers
255 |         if has_command || has_shift || has_option || has_control {
256 |             script.push_str(" using {");
257 |             let mut modifiers = Vec::new();
258 |             if has_command { modifiers.push("command down"); }
259 |             if has_shift { modifiers.push("shift down"); }
260 |             if has_option { modifiers.push("option down"); }
261 |             if has_control { modifiers.push("control down"); }
262 |             script.push_str(&modifiers.join(", "));
263 |             script.push_str("}");
264 |         }
265 |     }
266 |     
267 |     debug!("generated applescript: {}", script);
268 |     script
269 | }
270 | 


--------------------------------------------------------------------------------
/mcp-server-os-level/src/bin/handlers/type_by_index.rs:
--------------------------------------------------------------------------------
  1 | use std::sync::Arc;
  2 | use axum::{
  3 |     extract::{Json, State},
  4 |     http::StatusCode,
  5 |     response::Json as JsonResponse,
  6 | };
  7 | use serde::Serialize;
  8 | use serde_json::json;
  9 | use tracing::{debug, error};
 10 | use computer_use_ai_sdk::Desktop;
 11 | 
 12 | use crate::types::{AppState, TypeByIndexRequest, TypeByIndexResponse, ListElementsAndAttributesResponse};
 13 | use crate::refresh_elements_and_attributes_after_action;
 14 | 
 15 | // Response type that combines both results
 16 | #[derive(Serialize)]
 17 | pub struct TypeByIndexWithElementsResponse {
 18 |     pub type_action: TypeByIndexResponse,
 19 |     pub elements: Option<ListElementsAndAttributesResponse>,
 20 | }
 21 | 
 22 | pub async fn type_by_index_handler(
 23 |     State(state): State<Arc<AppState>>,
 24 |     Json(request): Json<TypeByIndexRequest>,
 25 | ) -> Result<JsonResponse<TypeByIndexWithElementsResponse>, (StatusCode, JsonResponse<serde_json::Value>)> {
 26 |     // Get elements from cache
 27 |     let elements_opt = {
 28 |         let cache = state.element_cache.lock().await;
 29 |         cache.clone()
 30 |     };
 31 | 
 32 |     // Check if cache exists
 33 |     if elements_opt.is_none() {
 34 |         return Err((
 35 |             StatusCode::BAD_REQUEST,
 36 |             JsonResponse(json!({
 37 |                 "error": "no element cache found - you must call listInteractableElementsByIndex first to index the elements before using by-index operations"
 38 |             })),
 39 |         ));
 40 |     }
 41 | 
 42 |     match elements_opt {
 43 |         Some((elements, timestamp, app_name)) if timestamp.elapsed() < std::time::Duration::from_secs(30) => {
 44 |             // Use element_index directly
 45 |             if request.element_index < elements.len() {
 46 |                 let element = &elements[request.element_index];
 47 |                 
 48 |                 // Step 1: Try inputControl first
 49 |                 debug!("attempting to type text '{}' using inputControl (AppleScript)", request.text);
 50 | 
 51 |                 // Activate the app first
 52 |                 debug!("activating app: {}", app_name);
 53 |                 let desktop = match Desktop::new(false, true) {
 54 |                     Ok(d) => d,
 55 |                     Err(e) => {
 56 |                         error!("failed to initialize desktop automation: {}", e);
 57 |                         return Err((
 58 |                             StatusCode::INTERNAL_SERVER_ERROR,
 59 |                             JsonResponse(json!({
 60 |                                 "error": format!("failed to initialize desktop automation: {}", e)
 61 |                             })),
 62 |                         ));
 63 |                     }
 64 |                 };
 65 | 
 66 |                 // Get and activate the application
 67 |                 let _ = match desktop.application(&app_name) {
 68 |                     Ok(app) => app,
 69 |                     Err(e) => {
 70 |                         error!("application not found: {}", e);
 71 |                         return Err((
 72 |                             StatusCode::NOT_FOUND,
 73 |                             JsonResponse(json!({
 74 |                                 "error": format!("application not found: {}", e)
 75 |                             })),
 76 |                         ));
 77 |                     }
 78 |                 };
 79 | 
 80 |                 // Click the element first to ensure it has focus
 81 |                 if let Err(e) = element.click() {
 82 |                     debug!("failed to click element before typing: {}", e);
 83 |                     // Continue anyway
 84 |                 }
 85 | 
 86 |                 // Small delay to ensure element is focused
 87 |                 std::thread::sleep(std::time::Duration::from_millis(100));
 88 | 
 89 |                 // Use inputControl for text input using System Events
 90 |                 use std::process::Command;
 91 | 
 92 |                 // Escape any quotes in the text to avoid breaking the AppleScript
 93 |                 let escaped_text = request.text.replace("\"", "\\\"");
 94 |                 let script = format!("tell application \"System Events\" to keystroke \"{}\"", escaped_text);
 95 | 
 96 |                 let input_control_success = match Command::new("osascript").arg("-e").arg(script).output() {
 97 |                     Ok(_) => {
 98 |                         debug!("successfully typed text '{}' using inputControl", request.text);
 99 |                         true
100 |                     },
101 |                     Err(e) => {
102 |                         debug!("failed to type text using inputControl: {} - falling back to AXValue", e);
103 |                         false
104 |                     }
105 |                 };
106 | 
107 |                 // Step 2: If inputControl failed, try AXValue as fallback
108 |                 if !input_control_success {
109 |                     debug!("falling back to AXValue for typing");
110 |                     match element.type_text(&request.text) {
111 |                         Ok(_) => {
112 |                             debug!("successfully typed text '{}' into element with role: {} using AXValue", 
113 |                                   request.text, element.role());
114 |                             
115 |                             // Add a small delay to ensure UI updates
116 |                             std::thread::sleep(std::time::Duration::from_millis(100));
117 |                             
118 |                             // Verify text was actually set by reading it back
119 |                             let verification = match element.text(1) {
120 |                                 Ok(actual_text) => {
121 |                                     let contains_text = actual_text.contains(&request.text);
122 |                                     if contains_text {
123 |                                         debug!("verified text was set correctly: '{}'", actual_text);
124 |                                         true
125 |                                     } else {
126 |                                         debug!("verification failed: expected '{}' but got '{}'", 
127 |                                               request.text, actual_text);
128 |                                         false
129 |                                     }
130 |                                 },
131 |                                 Err(e) => {
132 |                                     debug!("failed to verify text: {}", e);
133 |                                     false
134 |                                 }
135 |                             };
136 |                             
137 |                             if !verification {
138 |                                 error!("failed to verify text was set with AXValue after inputControl failure");
139 |                                 return Err((
140 |                                     StatusCode::INTERNAL_SERVER_ERROR,
141 |                                     JsonResponse(json!({
142 |                                         "error": "failed to type text using both inputControl and AXValue methods"
143 |                                     })),
144 |                                 ));
145 |                             }
146 |                         },
147 |                         Err(e) => {
148 |                             error!("failed to type text into element with AXValue: {}", e);
149 |                             return Err((
150 |                                 StatusCode::INTERNAL_SERVER_ERROR,
151 |                                 JsonResponse(json!({
152 |                                     "error": format!("failed to type text using both inputControl and AXValue methods: {}", e)
153 |                                 })),
154 |                             ));
155 |                         }
156 |                     }
157 |                 }
158 | 
159 |                 // Create the success response based on which method worked
160 |                 let method_used = if input_control_success { "AppleScript" } else { "AXValue" };
161 |                 let type_response = TypeByIndexResponse {
162 |                     success: true,
163 |                     message: format!(
164 |                         "successfully typed text into element with role: {} (using {} method)",
165 |                         element.role(), method_used
166 |                     ),
167 |                 };
168 |                 
169 |                 // Get refreshed elements using the helper function
170 |                 let elements_response = refresh_elements_and_attributes_after_action(state, app_name.clone(), 500).await;
171 |                 
172 |                 // Return combined response
173 |                 Ok(JsonResponse(TypeByIndexWithElementsResponse {
174 |                     type_action: type_response,
175 |                     elements: elements_response,
176 |                 }))
177 |             } else {
178 |                 error!(
179 |                     "element index out of bounds: {} (max: {})",
180 |                     request.element_index,
181 |                     elements.len() - 1
182 |                 );
183 |                 Err((
184 |                     StatusCode::BAD_REQUEST,
185 |                     JsonResponse(json!({
186 |                         "error": format!("element index out of bounds: {} (max: {})",
187 |                                         request.element_index, elements.len() - 1)
188 |                     })),
189 |                 ))
190 |             }
191 |         }
192 |         Some(_) => {
193 |             // Cache entry expired
194 |             Err((
195 |                 StatusCode::BAD_REQUEST,
196 |                 JsonResponse(json!({
197 |                     "error": "cache entry expired, please list elements again"
198 |                 })),
199 |             ))
200 |         }
201 |         None => {
202 |             // Cache miss
203 |             Err((
204 |                 StatusCode::NOT_FOUND,
205 |                 JsonResponse(json!({
206 |                     "error": "no cache entry found, please list elements again"
207 |                 })),
208 |             ))
209 |         }
210 |     }
211 | }
212 | 


--------------------------------------------------------------------------------
/mcp-server-os-level/src/bin/handlers/utils.rs:
--------------------------------------------------------------------------------
 1 | use std::sync::Arc;
 2 | use axum::extract::{Json, State};
 3 | use tokio::time::Duration;
 4 | use tracing::{error, info};
 5 | 
 6 | use crate::types::*;
 7 | use crate::AppState;
 8 | 
 9 | use super::list_elements_and_attributes::list_elements_and_attributes_handler;
10 | 
11 | 
12 | pub async fn refresh_elements_and_attributes_after_action(
13 |     state: Arc<AppState>,
14 |     app_name: String,
15 |     delay_ms: u64,
16 | ) -> Option<ListElementsAndAttributesResponse> {
17 |     // Add a small delay to allow UI to update
18 |     info!("waiting for UI to update after action before listing elements and attributes");
19 |     tokio::time::sleep(Duration::from_millis(delay_ms)).await;
20 |     
21 |     // Create request for list elements and attributes
22 |     let list_request = ListInteractableElementsRequest {
23 |         app_name,
24 |         max_elements: None,
25 |         use_background_apps: Some(false),
26 |         activate_app: Some(true),
27 |     };
28 |     
29 |     // Call the handler to get fresh elements
30 |     match list_elements_and_attributes_handler(State(state), Json(list_request)).await {
31 |         Ok(response) => Some(response.0),
32 |         Err(e) => {
33 |             error!("failed to refresh elements and attributes after action: {:?}", e);
34 |             None
35 |         }
36 |     }
37 | }
38 | 


--------------------------------------------------------------------------------
/mcp-server-os-level/src/bin/mcp-bridge.ts:
--------------------------------------------------------------------------------
 1 | // mcp-bridge.ts
 2 | import { Server } from "@modelcontextprotocol/sdk/server/index.js";
 3 | import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
 4 | import { spawn } from "child_process";
 5 | import { z } from "zod";
 6 | 
 7 | // Path to your rust binary
 8 | const RUST_BINARY = "/Users/matthewdi/Desktop/screenpipe/computer-use-ai-sdk/mcp-server-os-level/target/debug/server";
 9 | 
10 | // Create server
11 | const server = new Server(
12 |   {
13 |     name: "ui-automation-bridge",
14 |     version: "1.0.0",
15 |   },
16 |   {
17 |     capabilities: {
18 |       tools: {
19 |         // Define the same tools as your Rust server
20 |       },
21 |     },
22 |   }
23 | );
24 | 
25 | // Start your Rust server in HTTP mode (not STDIO)
26 | const rustProcess = spawn(RUST_BINARY, [], {
27 |   stdio: 'ignore' // Run in background
28 | });
29 | 
30 | // Set up clean exit
31 | process.on('exit', () => {
32 |   rustProcess.kill();
33 | });
34 | 
35 | // Define handlers that forward requests to your Rust HTTP endpoint
36 | server.setRequestHandler(/* ... */, async (request) => {
37 |   // Forward the request to your Rust server running on HTTP
38 |   const response = await fetch("http://127.0.0.1:8080/api/click-by-index", {
39 |     method: "POST",
40 |     headers: { "Content-Type": "application/json" },
41 |     body: JSON.stringify(request.params),
42 |   });
43 |   
44 |   const data = await response.json();
45 |   return data;
46 | });
47 | 
48 | // Start bridge server
49 | async function runServer() {
50 |   const transport = new StdioServerTransport();
51 |   await server.connect(transport);
52 |   console.error("UI Automation Bridge running on stdio");
53 | }
54 | 
55 | runServer().catch((error) => {
56 |   console.error("Fatal error running server:", error);
57 |   rustProcess.kill();
58 |   process.exit(1);
59 | });


--------------------------------------------------------------------------------
/mcp-server-os-level/src/bin/mod.rs:
--------------------------------------------------------------------------------
1 | // Define modules
2 | pub mod server;
3 | pub mod types;
4 | pub mod handlers;


--------------------------------------------------------------------------------
/mcp-server-os-level/src/bin/server.rs:
--------------------------------------------------------------------------------
  1 | use std::{net::SocketAddr, sync::Arc, io::{self, BufRead, BufReader, Write}};
  2 | 
  3 | use axum::{
  4 |     routing::post,
  5 |     Router,
  6 | };
  7 | use tokio::sync::Mutex;
  8 | use tower_http::{cors::CorsLayer, trace::TraceLayer};
  9 | use tracing::{error, info, level_filters::LevelFilter};
 10 | use serde_json::{json, Value};
 11 | mod types;
 12 | use types::*;
 13 | mod handlers;
 14 | 
 15 | // Import only the handlers actually used
 16 | use handlers::mcp::mcp_handler;
 17 | use handlers::click_by_index::click_by_index_handler;
 18 | use handlers::type_by_index::type_by_index_handler;
 19 | use handlers::press_key_by_index::press_key_by_index_handler;
 20 | use handlers::open_application::open_application_handler;
 21 | use handlers::open_url::open_url_handler;
 22 | use handlers::input_control::input_control_handler;
 23 | use handlers::list_elements_and_attributes::list_elements_and_attributes_handler;
 24 | use handlers::utils::*;
 25 | 
 26 | // Import mcp_handler helpers but we'll call them directly
 27 | use handlers::mcp::{handle_initialize, handle_execute_tool_function, mcp_error_response};
 28 | 
 29 | // ================ Main ================
 30 | 
 31 | #[tokio::main]
 32 | async fn main() -> anyhow::Result<()> {
 33 |     // Check if we should use STDIO mode
 34 |     let use_stdio = std::env::args().any(|arg| arg == "--stdio");
 35 |     
 36 |     // initialize tracing with different settings based on mode
 37 |     if use_stdio {
 38 |         // For STDIO mode, disable colors and only log to stderr
 39 |         tracing_subscriber::fmt()
 40 |             .with_max_level(LevelFilter::DEBUG)
 41 |             .with_ansi(false)  // Disable ANSI color codes
 42 |             .with_writer(std::io::stderr)  // Only write logs to stderr
 43 |             .init();
 44 |     } else {
 45 |         // For HTTP mode, use default settings
 46 |         tracing_subscriber::fmt()
 47 |             .with_max_level(LevelFilter::DEBUG)
 48 |             .init();
 49 |     }
 50 |     
 51 |     info!("starting ui automation server");
 52 |     
 53 |     // Check permissions early - add this line
 54 |     check_os_permissions();
 55 |     
 56 |     // Create app state
 57 |     let app_state = Arc::new(AppState {
 58 |         element_cache: Arc::new(Mutex::new(None)),
 59 |     });
 60 | 
 61 |     if use_stdio {
 62 |         info!("running in STDIO mode for MCP");
 63 |         // run_stdio_mode(app_state).await?;
 64 |     } else {
 65 |         info!("running in HTTP mode on port 8080");
 66 |         run_http_server(app_state).await?;
 67 |     }
 68 |     
 69 |     Ok(())
 70 | }
 71 | 
 72 | async fn run_http_server(app_state: Arc<AppState>) -> anyhow::Result<()> {
 73 |     // Create CORS layer
 74 |     let cors = CorsLayer::very_permissive();
 75 |     
 76 |     // Create router with both existing and MCP endpoints plus new endpoints
 77 |     let app = Router::new()
 78 |         .route("/mcp", post(mcp_handler))
 79 |         .route("/api/click-by-index", post(click_by_index_handler))
 80 |         .route("/api/type-by-index", post(type_by_index_handler))
 81 |         .route("/api/press-key-by-index", post(press_key_by_index_handler))
 82 |         .route("/api/open-application", post(open_application_handler))
 83 |         .route("/api/open-url", post(open_url_handler))
 84 |         .route("/api/input-control", post(input_control_handler))
 85 |         .route("/api/list-elements-and-attributes", post(list_elements_and_attributes_handler))
 86 |         .with_state(app_state)
 87 |         .layer(cors)
 88 |         .layer(TraceLayer::new_for_http());
 89 |     
 90 |     // Get the address to bind to
 91 |     let addr = SocketAddr::from(([0, 0, 0, 0], 8080));
 92 |     info!("listening on {}", addr);
 93 |     
 94 |     // Start the server
 95 |     axum::Server::bind(&addr)
 96 |         .serve(app.into_make_service())
 97 |         .await?;
 98 |     
 99 |     Ok(())
100 | }
101 | 
102 | // Add this function right after main imports but before the types
103 | fn check_os_permissions() {
104 |     // Only check on macOS
105 |     #[cfg(target_os = "macos")]
106 |     {
107 |         use computer_use_ai_sdk::platforms::macos::check_accessibility_permissions;
108 |         
109 |         match check_accessibility_permissions(true) {
110 |             Ok(granted) => {
111 |                 if !granted {
112 |                     info!("accessibility permissions: prompt shown to user");
113 |                     // Sleep to give user time to respond to the prompt
114 |                     std::thread::sleep(std::time::Duration::from_secs(2));
115 |                     
116 |                     // Check again without prompt
117 |                     match check_accessibility_permissions(false) {
118 |                         Ok(_) => info!("accessibility permissions now granted"),
119 |                         Err(e) => {
120 |                             error!("accessibility permissions check failed: {}", e);
121 |                             info!("**************************************************************");
122 |                             info!("* ACCESSIBILITY PERMISSIONS REQUIRED                          *");
123 |                             info!("* Go to System Preferences > Security & Privacy > Privacy >   *");
124 |                             info!("* Accessibility and add this application.                     *");
125 |                             info!("* Without this permission, UI automation will not function.   *");
126 |                             info!("**************************************************************");
127 |                         }
128 |                     }
129 |                 } else {
130 |                     info!("accessibility permissions already granted");
131 |                 }
132 |             },
133 |             Err(e) => {
134 |                 error!("accessibility permissions check failed: {}", e);
135 |                 info!("**************************************************************");
136 |                 info!("* ACCESSIBILITY PERMISSIONS REQUIRED                          *");
137 |                 info!("* Go to System Preferences > Security & Privacy > Privacy >   *");
138 |                 info!("* Accessibility and add this application.                     *");
139 |                 info!("* Without this permission, UI automation will not function.   *");
140 |                 info!("**************************************************************");
141 |             }
142 |         }
143 |     }
144 | }
145 | 


--------------------------------------------------------------------------------
/mcp-server-os-level/src/bin/types.rs:
--------------------------------------------------------------------------------
  1 | use std::{collections::HashMap, sync::Arc, time::Instant};
  2 | use computer_use_ai_sdk::UIElement;
  3 | use serde::{Deserialize, Serialize};
  4 | use tokio::sync::Mutex;
  5 | use serde_json::Value;
  6 | 
  7 | // ================ Types ================
  8 | 
  9 | #[derive(Debug, Deserialize, Serialize)]
 10 | pub struct ElementSelector {
 11 |     pub app_name: String,
 12 |     pub window_name: Option<String>,
 13 |     pub locator: String,
 14 |     pub index: Option<usize>,
 15 |     pub text: Option<String>,
 16 |     pub label: Option<String>,
 17 |     pub description: Option<String>,
 18 |     pub element_id: Option<String>,
 19 |     pub use_background_apps: Option<bool>,
 20 |     pub activate_app: Option<bool>,
 21 | }
 22 | 
 23 | #[derive(Debug, Deserialize, Serialize)]
 24 | pub struct FindElementsRequest {
 25 |     pub selector: ElementSelector,
 26 |     pub max_results: Option<usize>,
 27 |     pub max_depth: Option<usize>,
 28 | }
 29 | 
 30 | #[derive(Debug, Deserialize, Serialize)]
 31 | pub struct ClickElementRequest {
 32 |     pub selector: ElementSelector,
 33 | }
 34 | 
 35 | #[derive(Debug, Deserialize, Serialize)]
 36 | pub struct TypeTextRequest {
 37 |     pub selector: ElementSelector,
 38 |     pub text: String,
 39 | }
 40 | 
 41 | #[derive(Debug, Deserialize, Serialize)]
 42 | pub struct PressKeyRequest {
 43 |     pub selector: ElementSelector,
 44 |     pub key_combo: String,
 45 | }
 46 | 
 47 | #[derive(Debug, Deserialize, Serialize)]
 48 | pub struct GetTextRequest {
 49 |     pub app_name: String,
 50 |     pub window_name: Option<String>,
 51 |     pub max_depth: Option<usize>,
 52 |     pub use_background_apps: Option<bool>,
 53 |     pub activate_app: Option<bool>,
 54 | }
 55 | 
 56 | #[derive(Debug, Deserialize, Serialize)]
 57 | pub struct ElementPosition {
 58 |     pub x: i32,
 59 |     pub y: i32,
 60 | }
 61 | 
 62 | #[derive(Debug, Deserialize, Serialize)]
 63 | pub struct ElementSize {
 64 |     pub width: i32,
 65 |     pub height: i32,
 66 | }
 67 | 
 68 | #[derive(Debug, Deserialize, Serialize)]
 69 | pub struct ElementInfo {
 70 |     pub id: Option<String>,
 71 |     pub role: String,
 72 |     pub label: Option<String>,
 73 |     pub description: Option<String>,
 74 |     pub text: Option<String>,
 75 |     pub position: Option<ElementPosition>,
 76 |     pub size: Option<ElementSize>,
 77 |     pub properties: serde_json::Value,
 78 | }
 79 | 
 80 | #[derive(Debug, Serialize)]
 81 | pub struct FindElementsResponse {
 82 |     pub data: Vec<ElementInfo>,
 83 | }
 84 | 
 85 | #[derive(Debug, Serialize)]
 86 | pub struct ActionResponse {
 87 |     pub success: bool,
 88 |     pub message: String,
 89 | }
 90 | 
 91 | #[derive(Debug, Serialize)]
 92 | pub struct GetTextResponse {
 93 |     pub success: bool,
 94 |     pub text: String,
 95 | }
 96 | 
 97 | // App state
 98 | pub struct AppState {
 99 |     pub element_cache: Arc<Mutex<Option<(Vec<UIElement>, Instant, String)>>>,
100 | }
101 | 
102 | // MCP-specific types
103 | #[derive(Debug, Deserialize, Serialize)]
104 | pub struct MCPRequest {
105 |     pub jsonrpc: String,
106 |     pub id: Value,
107 |     pub method: String,
108 |     pub params: Option<Value>,
109 | }
110 | 
111 | #[derive(Debug, Deserialize, Serialize)]
112 | pub struct MCPResponse {
113 |     pub jsonrpc: String,
114 |     pub id: Value,
115 |     pub result: Value,
116 | }
117 | 
118 | #[derive(Debug, Deserialize, Serialize)]
119 | pub struct MCPErrorResponse {
120 |     pub jsonrpc: String,
121 |     pub id: Value,
122 |     pub error: MCPError,
123 | }
124 | 
125 | #[derive(Debug, Deserialize, Serialize)]
126 | pub struct MCPError {
127 |     pub code: i32,
128 |     pub message: String,
129 |     pub data: Option<Value>,
130 | }
131 | 
132 | #[derive(Debug, Deserialize, Serialize)]
133 | pub struct InitializeParams {
134 |     pub capabilities: ClientCapabilities,
135 | }
136 | 
137 | #[derive(Debug, Deserialize, Serialize)]
138 | pub struct ClientCapabilities {
139 |     // MCP client capabilities
140 |     pub tools: Option<ToolClientCapabilities>,
141 |     pub resources: Option<ResourceClientCapabilities>,
142 |     // Add other capabilities as needed
143 | }
144 | 
145 | #[derive(Debug, Deserialize, Serialize)]
146 | pub struct ToolClientCapabilities {
147 |     pub execution: bool,
148 | }
149 | 
150 | #[derive(Debug, Deserialize, Serialize)]
151 | pub struct ResourceClientCapabilities {
152 |     // Resource capabilities
153 | }
154 | 
155 | #[derive(Debug, Deserialize, Serialize)]
156 | pub struct ServerCapabilities {
157 |     pub tools: Option<ToolServerCapabilities>,
158 |     pub resources: Option<ResourceServerCapabilities>,
159 |     // Add other capabilities as needed
160 | }
161 | 
162 | #[derive(Debug, Deserialize, Serialize)]
163 | pub struct ToolServerCapabilities {
164 |     pub functions: Vec<ToolFunctionDefinition>,
165 | }
166 | 
167 | #[derive(Debug, Deserialize, Serialize)]
168 | pub struct ResourceServerCapabilities {
169 |     // Resource capabilities
170 | }
171 | 
172 | #[derive(Debug, Deserialize, Serialize)]
173 | pub struct ToolFunctionDefinition {
174 |     pub name: String,
175 |     pub description: String,
176 |     pub parameters: serde_json::Value, // JSON Schema
177 | }
178 | 
179 | #[derive(Debug, Deserialize, Serialize)]
180 | pub struct ExecuteToolFunctionParams {
181 |     pub function: String,
182 |     pub arguments: Value,
183 | }
184 | 
185 | // Types for scrolling
186 | #[derive(Debug, Deserialize, Serialize)]
187 | pub struct ScrollElementRequest {
188 |     pub selector: Option<ElementSelector>,
189 |     pub coordinates: Option<ElementPosition>,
190 |     pub direction: String,
191 |     pub amount: f64,
192 | }
193 | 
194 | // Types for opening applications
195 | #[derive(Deserialize, Serialize)]
196 | pub struct OpenApplicationRequest {
197 |     pub app_name: String,
198 | }
199 | 
200 | #[derive(Serialize)]
201 | pub struct OpenApplicationResponse {
202 |     pub success: bool,
203 |     pub message: String,
204 | }
205 | 
206 | // Types for opening URLs
207 | #[derive(Deserialize, Serialize)]
208 | pub struct OpenUrlRequest {
209 |     pub url: String,
210 |     pub browser: Option<String>,
211 | }
212 | 
213 | #[derive(Serialize)]
214 | pub struct OpenUrlResponse {
215 |     pub success: bool,
216 |     pub message: String,
217 | }
218 | 
219 | // Types for interactable elements
220 | #[derive(Debug, Deserialize, Serialize)]
221 | pub struct ListInteractableElementsRequest {
222 |     pub app_name: String,
223 |     pub max_elements: Option<usize>,
224 |     pub use_background_apps: Option<bool>,
225 |     pub activate_app: Option<bool>,
226 | }
227 | 
228 | #[derive(Debug, Serialize)]
229 | pub struct InteractableElement {
230 |     pub index: usize,
231 |     pub role: String,
232 |     pub interactability: String, // "definite", "sometimes", "none"
233 |     pub text: String,
234 |     pub position: Option<ElementPosition>,
235 |     pub size: Option<ElementSize>,
236 |     pub element_id: Option<String>,
237 | }
238 | 
239 | #[derive(Debug, Serialize)]
240 | pub struct ElementCacheInfo {
241 |     pub cache_id: String,
242 |     pub timestamp: String,
243 |     pub expires_at: String,
244 |     pub element_count: usize,
245 |     pub ttl_seconds: u64,
246 | }
247 | 
248 | // Remove old ElementStats and add new ElementStatistics struct
249 | #[derive(serde::Serialize, Debug)]
250 | pub struct ElementStatistics {
251 |     pub count: usize,
252 |     pub excluded_count: usize,
253 |     pub excluded_non_interactable: usize,
254 |     pub excluded_no_text: usize,
255 |     pub with_text_count: usize,
256 |     pub without_text_count: usize,
257 |     pub top_roles: HashMap<String, u32>,
258 |     pub properties: HashMap<String, u32>,
259 | }
260 | 
261 | #[derive(serde::Serialize, Debug)]
262 | pub struct ListElementsAndAttributesResponse {
263 |     pub elements: Vec<serde_json::Value>,
264 |     pub cache_info: ElementCacheInfo,
265 |     pub stats: ElementStatistics,
266 |     pub processing_time_seconds: String,
267 | }
268 | 
269 | // Types for index-based operations
270 | #[derive(Debug, Deserialize, Serialize)]
271 | pub struct ClickByIndexRequest {
272 |     pub element_index: usize,
273 | }
274 | 
275 | #[derive(Debug, Serialize)]
276 | pub struct ClickByIndexResponse {
277 |     pub success: bool,
278 |     pub message: String,
279 |     pub elements: Option<ListElementsAndAttributesResponse>,
280 | }
281 | 
282 | #[derive(Debug, Deserialize, Serialize)]
283 | pub struct TypeByIndexRequest {
284 |     pub element_index: usize,
285 |     pub text: String,
286 | }
287 | 
288 | #[derive(Debug, Serialize)]
289 | pub struct TypeByIndexResponse {
290 |     pub success: bool,
291 |     pub message: String,
292 | }
293 | 
294 | #[derive(Debug, Deserialize, Serialize)]
295 | pub struct PressKeyByIndexRequest {
296 |     pub element_index: usize,
297 |     pub key_combo: String,
298 | }
299 | 
300 | #[derive(Debug, Serialize)]
301 | pub struct PressKeyByIndexResponse {
302 |     pub success: bool,
303 |     pub message: String,
304 | }
305 | 
306 | // Types for input control
307 | #[derive(Debug, Deserialize)]
308 | pub struct InputControlRequest {
309 |     pub action: InputAction,
310 | }
311 | 
312 | #[derive(Debug, Deserialize)]
313 | #[serde(tag = "type", content = "data")]
314 | pub enum InputAction {
315 |     KeyPress(String),
316 |     MouseMove { x: i32, y: i32 },
317 |     MouseClick(String),
318 |     WriteText(String),
319 | }
320 | 
321 | #[derive(Serialize)]
322 | pub struct InputControlResponse {
323 |     pub success: bool,
324 | }
325 | 
326 | // Combined response types
327 | #[derive(Serialize)]
328 | pub struct InputControlWithElementsResponse {
329 |     pub input: InputControlResponse,
330 |     pub elements: Option<ListElementsAndAttributesResponse>,
331 | }
332 | 
333 | #[derive(Serialize)]
334 | pub struct ClickByIndexWithElementsResponse {
335 |     pub click: ClickByIndexResponse,
336 |     pub elements: Option<ListElementsAndAttributesResponse>,
337 | }
338 | 
339 | #[derive(Serialize)]
340 | pub struct TypeByIndexWithElementsResponse {
341 |     pub type_action: TypeByIndexResponse,
342 |     pub elements: Option<ListElementsAndAttributesResponse>,
343 | }
344 | 
345 | #[derive(Debug, Serialize)]
346 | pub struct PressKeyByIndexWithElementsResponse {
347 |     pub press_key: PressKeyByIndexResponse,
348 |     pub elements: Option<ListElementsAndAttributesResponse>,
349 | }
350 | 
351 | #[derive(Serialize)]
352 | pub struct OpenApplicationWithElementsResponse {
353 |     pub application: OpenApplicationResponse,
354 |     pub elements: Option<ListElementsAndAttributesResponse>,
355 | }
356 | 
357 | #[derive(Serialize)]
358 | pub struct OpenUrlWithElementsResponse {
359 |     pub url: OpenUrlResponse,
360 |     pub elements: Option<ListElementsAndAttributesResponse>,
361 | }
362 | 


--------------------------------------------------------------------------------
/mcp-server-os-level/src/desktop.rs:
--------------------------------------------------------------------------------
 1 | //! Desktop UI automation through accessibility APIs
 2 | //!
 3 | //! This module provides a cross-platform API for automating desktop applications
 4 | //! through accessibility APIs, inspired by Playwright's web automation model.
 5 | 
 6 | use std::sync::Arc;
 7 | 
 8 | mod element;
 9 | mod errors;
10 | mod locator;
11 | pub mod platforms;
12 | mod selector;
13 | #[cfg(test)]
14 | mod tests;
15 | 
16 | 
17 | pub use element::{UIElement, UIElementAttributes};
18 | pub use errors::AutomationError;
19 | pub use locator::Locator;
20 | pub use selector::Selector;
21 | 
22 | // Define a new struct to hold click result information - move to module level
23 | pub struct ClickResult {
24 |     pub method: String,
25 |     pub coordinates: Option<(f64, f64)>,
26 |     pub details: String,
27 | }
28 | 
29 | /// The main entry point for UI automation
30 | pub struct Desktop {
31 |     engine: Arc<dyn platforms::AccessibilityEngine>,
32 | }
33 | 
34 | impl Desktop {
35 |     /// Create a new instance with the default platform-specific implementation
36 |     pub fn new(use_background_apps: bool, activate_app: bool) -> Result<Self, AutomationError> {
37 |         let boxed_engine = platforms::create_engine(use_background_apps, activate_app)?;
38 |         // Move the boxed engine into an Arc
39 |         let engine = Arc::from(boxed_engine);
40 |         Ok(Self { engine })
41 |     }
42 | 
43 |     /// Get the root UI element representing the entire desktop
44 |     pub fn root(&self) -> UIElement {
45 |         self.engine.get_root_element()
46 |     }
47 | 
48 |     /// Create a locator to find elements matching the given selector
49 |     pub fn locator(&self, selector: impl Into<Selector>) -> Locator {
50 |         Locator::new(Arc::clone(&self.engine), selector.into())
51 |     }
52 | 
53 |     /// Get the currently focused element
54 |     pub fn focused_element(&self) -> Result<UIElement, AutomationError> {
55 |         self.engine.get_focused_element()
56 |     }
57 | 
58 |     /// List all running applications
59 |     pub fn applications(&self) -> Result<Vec<UIElement>, AutomationError> {
60 |         self.engine.get_applications()
61 |     }
62 | 
63 |     /// Find an application by name
64 |     pub fn application(&self, name: &str) -> Result<UIElement, AutomationError> {
65 |         self.engine.get_application_by_name(name)
66 |     }
67 | 
68 |     /// Open an application by name
69 |     pub fn open_application(&self, app_name: &str) -> Result<UIElement, AutomationError> {
70 |         self.engine.open_application(app_name)
71 |     }
72 | 
73 |     /// Open a URL in a specified browser (or default browser if None)
74 |     pub fn open_url(&self, url: &str, browser: Option<&str>) -> Result<UIElement, AutomationError> {
75 |         self.engine.open_url(url, browser)
76 |     }
77 | 
78 |     /// Scroll at a specific position on the screen
79 |     pub fn scroll_at_position(&self, x: f64, y: f64, direction: &str, amount: f64) -> Result<(), AutomationError> {
80 |         self.engine.scroll_at_position(x, y, direction, amount)
81 |     }
82 | 
83 |     /// Scroll at the current mouse position
84 |     pub fn scroll_at_current_position(&self, direction: &str, amount: f64) -> Result<(), AutomationError> {
85 |         self.engine.scroll_at_current_position(direction, amount)
86 |     }
87 | }
88 | 


--------------------------------------------------------------------------------
/mcp-server-os-level/src/element.rs:
--------------------------------------------------------------------------------
  1 | use crate::errors::AutomationError;
  2 | use crate::selector::Selector;
  3 | use std::collections::HashMap;
  4 | use std::fmt::Debug;
  5 | 
  6 | use super::{ClickResult, Locator};
  7 | 
  8 | /// Represents a UI element in a desktop application
  9 | #[derive(Debug)]
 10 | pub struct UIElement {
 11 |     inner: Box<dyn UIElementImpl>,
 12 | }
 13 | 
 14 | /// Attributes associated with a UI element
 15 | #[derive(Debug)]
 16 | pub struct UIElementAttributes {
 17 |     pub role: String,
 18 |     pub label: Option<String>,
 19 |     pub value: Option<String>,
 20 |     pub description: Option<String>,
 21 |     pub properties: HashMap<String, Option<serde_json::Value>>,
 22 | }
 23 | 
 24 | /// Interface for platform-specific element implementations
 25 | pub(crate) trait UIElementImpl: Send + Sync + Debug {
 26 |     fn object_id(&self) -> usize;
 27 |     fn id(&self) -> Option<String>;
 28 |     fn role(&self) -> String;
 29 |     fn attributes(&self) -> UIElementAttributes;
 30 |     fn children(&self) -> Result<Vec<UIElement>, AutomationError>;
 31 |     fn parent(&self) -> Result<Option<UIElement>, AutomationError>;
 32 |     fn bounds(&self) -> Result<(f64, f64, f64, f64), AutomationError>; // x, y, width, height
 33 |     fn click(&self) -> Result<ClickResult, AutomationError>;
 34 |     fn double_click(&self) -> Result<ClickResult, AutomationError>;
 35 |     fn right_click(&self) -> Result<(), AutomationError>;
 36 |     fn hover(&self) -> Result<(), AutomationError>;
 37 |     fn focus(&self) -> Result<(), AutomationError>;
 38 |     fn type_text(&self, text: &str) -> Result<(), AutomationError>;
 39 |     fn press_key(&self, key: &str) -> Result<(), AutomationError>;
 40 |     fn get_text(&self, max_depth: usize) -> Result<String, AutomationError>;
 41 |     fn set_value(&self, value: &str) -> Result<(), AutomationError>;
 42 |     fn is_enabled(&self) -> Result<bool, AutomationError>;
 43 |     fn is_visible(&self) -> Result<bool, AutomationError>;
 44 |     fn is_focused(&self) -> Result<bool, AutomationError>;
 45 |     fn perform_action(&self, action: &str) -> Result<(), AutomationError>;
 46 |     fn as_any(&self) -> &dyn std::any::Any;
 47 |     fn create_locator(&self, selector: Selector) -> Result<Locator, AutomationError>;
 48 |     fn scroll(&self, direction: &str, amount: f64) -> Result<(), AutomationError>;
 49 | 
 50 |     // Add a method to clone the box
 51 |     fn clone_box(&self) -> Box<dyn UIElementImpl>;
 52 | }
 53 | 
 54 | impl UIElement {
 55 |     /// Create a new UI element from a platform-specific implementation
 56 |     pub(crate) fn new(impl_: Box<dyn UIElementImpl>) -> Self {
 57 |         Self { inner: impl_ }
 58 |     }
 59 | 
 60 |     /// Get the element's ID
 61 |     pub fn id(&self) -> Option<String> {
 62 |         self.inner.id()
 63 |     }
 64 | 
 65 |     /// Get the element's role (e.g., "button", "textfield")
 66 |     pub fn role(&self) -> String {
 67 |         self.inner.role()
 68 |     }
 69 | 
 70 |     /// Get all attributes of the element
 71 |     pub fn attributes(&self) -> UIElementAttributes {
 72 |         self.inner.attributes()
 73 |     }
 74 | 
 75 |     /// Get child elements
 76 |     pub fn children(&self) -> Result<Vec<UIElement>, AutomationError> {
 77 |         self.inner.children()
 78 |     }
 79 | 
 80 |     /// Get parent element
 81 |     pub fn parent(&self) -> Result<Option<UIElement>, AutomationError> {
 82 |         self.inner.parent()
 83 |     }
 84 | 
 85 |     /// Get element bounds (x, y, width, height)
 86 |     pub fn bounds(&self) -> Result<(f64, f64, f64, f64), AutomationError> {
 87 |         self.inner.bounds()
 88 |     }
 89 | 
 90 |     /// Click on this element
 91 |     pub fn click(&self) -> Result<ClickResult, AutomationError> {
 92 |         self.inner.click()
 93 |     }
 94 | 
 95 |     /// Double-click on this element
 96 |     pub fn double_click(&self) -> Result<ClickResult, AutomationError> {
 97 |         self.inner.double_click()
 98 |     }
 99 | 
100 |     /// Right-click on this element
101 |     pub fn right_click(&self) -> Result<(), AutomationError> {
102 |         self.inner.right_click()
103 |     }
104 | 
105 |     /// Hover over this element
106 |     pub fn hover(&self) -> Result<(), AutomationError> {
107 |         self.inner.hover()
108 |     }
109 | 
110 |     /// Focus this element
111 |     pub fn focus(&self) -> Result<(), AutomationError> {
112 |         self.inner.focus()
113 |     }
114 | 
115 |     /// Type text into this element
116 |     pub fn type_text(&self, text: &str) -> Result<(), AutomationError> {
117 |         self.inner.type_text(text)
118 |     }
119 | 
120 |     /// Press a key while this element is focused
121 |     pub fn press_key(&self, key: &str) -> Result<(), AutomationError> {
122 |         self.inner.press_key(key)
123 |     }
124 | 
125 |     /// Get text content of this element
126 |     pub fn text(&self, max_depth: usize) -> Result<String, AutomationError> {
127 |         self.inner.get_text(max_depth)
128 |     }
129 | 
130 |     /// Set value of this element
131 |     pub fn set_value(&self, value: &str) -> Result<(), AutomationError> {
132 |         self.inner.set_value(value)
133 |     }
134 | 
135 |     /// Check if element is enabled
136 |     pub fn is_enabled(&self) -> Result<bool, AutomationError> {
137 |         self.inner.is_enabled()
138 |     }
139 | 
140 |     /// Check if element is visible
141 |     pub fn is_visible(&self) -> Result<bool, AutomationError> {
142 |         self.inner.is_visible()
143 |     }
144 | 
145 |     /// Check if element is focused
146 |     pub fn is_focused(&self) -> Result<bool, AutomationError> {
147 |         self.inner.is_focused()
148 |     }
149 | 
150 |     /// Perform a named action on this element
151 |     pub fn perform_action(&self, action: &str) -> Result<(), AutomationError> {
152 |         self.inner.perform_action(action)
153 |     }
154 | 
155 |     /// Get the underlying implementation as a specific type
156 |     pub(crate) fn as_any(&self) -> &dyn std::any::Any {
157 |         self.inner.as_any()
158 |     }
159 | 
160 |     /// Find elements matching the selector within this element
161 |     pub fn locator(&self, selector: impl Into<Selector>) -> Result<Locator, AutomationError> {
162 |         let selector = selector.into();
163 |         self.inner.create_locator(selector)
164 |     }
165 | 
166 |     /// Scroll the element in a given direction
167 |     pub fn scroll(&self, direction: &str, amount: f64) -> Result<(), AutomationError> {
168 |         self.inner.scroll(direction, amount)
169 |     }
170 | }
171 | 
172 | impl PartialEq for UIElement {
173 |     fn eq(&self, other: &Self) -> bool {
174 |         self.inner.object_id() == other.inner.object_id()
175 |     }
176 | }
177 | 
178 | impl Eq for UIElement {}
179 | 
180 | impl std::hash::Hash for UIElement {
181 |     fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
182 |         self.inner.object_id().hash(state);
183 |     }
184 | }
185 | 
186 | impl Clone for UIElement {
187 |     fn clone(&self) -> Self {
188 |         // We can't directly clone the inner Box<dyn UIElementImpl>,
189 |         // but we can create a new UIElement with the same identity
190 |         // that will behave the same way
191 |         Self {
192 |             inner: self.inner.clone_box(),
193 |         }
194 |     }
195 | }
196 | 


--------------------------------------------------------------------------------
/mcp-server-os-level/src/errors.rs:
--------------------------------------------------------------------------------
 1 | use thiserror::Error;
 2 | 
 3 | #[derive(Error, Debug)]
 4 | pub enum AutomationError {
 5 |     #[error("Element not found: {0}")]
 6 |     ElementNotFound(String),
 7 | 
 8 |     #[error("Operation timed out: {0}")]
 9 |     Timeout(String),
10 | 
11 |     #[error("Permission denied: {0}")]
12 |     PermissionDenied(String),
13 | 
14 |     #[error("Platform-specific error: {0}")]
15 |     PlatformError(String),
16 | 
17 |     #[error("Unsupported operation: {0}")]
18 |     UnsupportedOperation(String),
19 | 
20 |     #[error("Unsupported platform: {0}")]
21 |     UnsupportedPlatform(String),
22 | 
23 |     #[error("Invalid argument: {0}")]
24 |     InvalidArgument(String),
25 | 
26 |     #[error("Internal error: {0}")]
27 |     Internal(String),
28 | }
29 | 


--------------------------------------------------------------------------------
/mcp-server-os-level/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! Desktop UI automation through accessibility APIs
 2 | //!
 3 | //! This module provides a cross-platform API for automating desktop applications
 4 | //! through accessibility APIs, inspired by Playwright's web automation model.
 5 | 
 6 | use std::sync::Arc;
 7 | 
 8 | mod element;
 9 | mod errors;
10 | mod locator;
11 | pub mod platforms;
12 | mod selector;
13 | #[cfg(test)]
14 | mod tests;
15 | 
16 | pub use element::{UIElement, UIElementAttributes};
17 | pub use errors::AutomationError;
18 | pub use locator::Locator;
19 | pub use selector::Selector;
20 | 
21 | // Define a new struct to hold click result information - move to module level
22 | pub struct ClickResult {
23 |     pub method: String,
24 |     pub coordinates: Option<(f64, f64)>,
25 |     pub details: String,
26 | }
27 | 
28 | /// The main entry point for UI automation
29 | pub struct Desktop {
30 |     engine: Arc<dyn platforms::AccessibilityEngine>,
31 | }
32 | 
33 | impl Desktop {
34 |     /// Create a new instance with the default platform-specific implementation
35 |     pub fn new(use_background_apps: bool, activate_app: bool) -> Result<Self, AutomationError> {
36 |         let boxed_engine = platforms::create_engine(use_background_apps, activate_app)?;
37 |         // Move the boxed engine into an Arc
38 |         let engine = Arc::from(boxed_engine);
39 |         Ok(Self { engine })
40 |     }
41 | 
42 |     /// Get the root UI element representing the entire desktop
43 |     pub fn root(&self) -> UIElement {
44 |         self.engine.get_root_element()
45 |     }
46 | 
47 |     /// Create a locator to find elements matching the given selector
48 |     pub fn locator(&self, selector: impl Into<Selector>) -> Locator {
49 |         Locator::new(Arc::clone(&self.engine), selector.into())
50 |     }
51 | 
52 |     /// Get the currently focused element
53 |     pub fn focused_element(&self) -> Result<UIElement, AutomationError> {
54 |         self.engine.get_focused_element()
55 |     }
56 | 
57 |     /// List all running applications
58 |     pub fn applications(&self) -> Result<Vec<UIElement>, AutomationError> {
59 |         self.engine.get_applications()
60 |     }
61 | 
62 |     /// Find an application by name
63 |     pub fn application(&self, name: &str) -> Result<UIElement, AutomationError> {
64 |         self.engine.get_application_by_name(name)
65 |     }
66 | 
67 |     /// Open an application by name
68 |     pub fn open_application(&self, app_name: &str) -> Result<UIElement, AutomationError> {
69 |         self.engine.open_application(app_name)
70 |     }
71 | 
72 |     /// Open a URL in a specified browser (or default browser if None)
73 |     pub fn open_url(&self, url: &str, browser: Option<&str>) -> Result<UIElement, AutomationError> {
74 |         self.engine.open_url(url, browser)
75 |     }
76 | 
77 |     // /// Scroll at a specific position on screen
78 |     // pub fn scroll_at_position(&self, x: f64, y: f64, direction: &str, amount: f64) -> Result<(), AutomationError> {
79 |     //     self.engine.scroll_at_position(x, y, direction, amount)
80 |     // }
81 | 
82 |     // /// Scroll at the current mouse position
83 |     // pub fn scroll_at_current_position(&self, direction: &str, amount: f64) -> Result<(), AutomationError> {
84 |     //     self.engine.scroll_at_current_position(direction, amount)
85 |     // }
86 | }
87 | 


--------------------------------------------------------------------------------
/mcp-server-os-level/src/locator.rs:
--------------------------------------------------------------------------------
  1 | use crate::platforms::AccessibilityEngine;
  2 | use crate::{AutomationError, Selector, UIElement};
  3 | use std::sync::Arc;
  4 | use std::time::Duration;
  5 | 
  6 | use super::ClickResult;
  7 | 
  8 | /// A high-level API for finding and interacting with UI elements
  9 | pub struct Locator {
 10 |     engine: Arc<dyn AccessibilityEngine>,
 11 |     selector: Selector,
 12 |     timeout: Duration,
 13 |     root: Option<UIElement>,
 14 | }
 15 | 
 16 | impl Locator {
 17 |     /// Create a new locator with the given selector
 18 |     pub(crate) fn new(engine: Arc<dyn AccessibilityEngine>, selector: Selector) -> Self {
 19 |         Self {
 20 |             engine,
 21 |             selector,
 22 |             timeout: Duration::from_secs(30),
 23 |             root: None,
 24 |         }
 25 |     }
 26 | 
 27 |     /// Set timeout for waiting operations
 28 |     pub fn timeout(mut self, timeout: Duration) -> Self {
 29 |         self.timeout = timeout;
 30 |         self
 31 |     }
 32 | 
 33 |     /// Set the root element for this locator
 34 |     pub fn within(mut self, element: UIElement) -> Self {
 35 |         self.root = Some(element);
 36 |         self
 37 |     }
 38 | 
 39 |     /// Get the first element matching this locator
 40 |     pub fn first(&self) -> Result<Option<UIElement>, AutomationError> {
 41 |         let element = self
 42 |             .engine
 43 |             .find_element(&self.selector, self.root.as_ref())?;
 44 |         Ok(Some(element))
 45 |     }
 46 | 
 47 |     /// Get all elements matching this locator
 48 |     pub fn all(&self) -> Result<Vec<UIElement>, AutomationError> {
 49 |         // Check if we can use platform-specific find_elements method
 50 |         if let Ok(elements) = self
 51 |             .engine
 52 |             .find_elements(&self.selector, self.root.as_ref())
 53 |         {
 54 |             return Ok(elements);
 55 |         }
 56 | 
 57 |         // Fallback implementation - get the first element, then get its siblings
 58 |         // Note: This is a naive implementation and might not work correctly in all cases
 59 |         match self.first()? {
 60 |             Some(first) => {
 61 |                 let result = vec![first];
 62 |                 // In a proper implementation, we would need to search for siblings
 63 |                 // or implement a custom ElementCollector that gathers all matches
 64 |                 Ok(result)
 65 |             }
 66 |             None => Ok(vec![]),
 67 |         }
 68 |     }
 69 | 
 70 |     /// Wait for an element to be available
 71 |     pub async fn wait(&self) -> Result<UIElement, AutomationError> {
 72 |         let start = std::time::Instant::now();
 73 | 
 74 |         while start.elapsed() < self.timeout {
 75 |             if let Some(element) = self.first()? {
 76 |                 return Ok(element);
 77 |             }
 78 |             tokio::time::sleep(Duration::from_millis(50)).await;
 79 |         }
 80 | 
 81 |         Err(AutomationError::Timeout(format!(
 82 |             "Timed out waiting for selector: {:?}",
 83 |             self.selector
 84 |         )))
 85 |     }
 86 | 
 87 |     /// Get a nested locator
 88 |     pub fn locator(&self, selector: impl Into<Selector>) -> Locator {
 89 |         let selector = selector.into();
 90 |         Locator {
 91 |             engine: self.engine.clone(),
 92 |             selector: Selector::Chain(vec![self.selector.clone(), selector]),
 93 |             timeout: self.timeout,
 94 |             root: self.root.clone(),
 95 |         }
 96 |     }
 97 | 
 98 |     // Convenience methods for common actions
 99 | 
100 |     /// Click on the first matching element
101 |     pub async fn click(&self) -> Result<ClickResult, AutomationError> {
102 |         self.wait().await?.click()
103 |     }
104 | 
105 |     /// Type text into the first matching element
106 |     pub async fn type_text(&self, text: &str) -> Result<(), AutomationError> {
107 |         self.wait().await?.type_text(text)
108 |     }
109 | 
110 |     /// Press a key on the first matching element
111 |     pub async fn press_key(&self, key: &str) -> Result<(), AutomationError> {
112 |         self.wait().await?.press_key(key)
113 |     }
114 | 
115 |     /// Get text from the first matching element
116 |     pub async fn text(&self, max_depth: usize) -> Result<String, AutomationError> {
117 |         self.wait().await?.text(max_depth)
118 |     }
119 | }
120 | 


--------------------------------------------------------------------------------
/mcp-server-os-level/src/platforms/linux.rs:
--------------------------------------------------------------------------------
  1 | use crate::operator::element::UIElementImpl;
  2 | use crate::operator::platforms::AccessibilityEngine;
  3 | use crate::operator::ClickResult;
  4 | use crate::operator::{AutomationError, Locator, Selector, UIElement, UIElementAttributes};
  5 | use std::fmt::Debug;
  6 | 
  7 | pub struct LinuxEngine;
  8 | 
  9 | impl LinuxEngine {
 10 |     pub fn new(use_background_apps: bool, activate_app: bool) -> Result<Self, AutomationError> {
 11 |         Err(AutomationError::UnsupportedPlatform(
 12 |             "Linux implementation is not yet available".to_string(),
 13 |         ))
 14 |     }
 15 | }
 16 | 
 17 | impl AccessibilityEngine for LinuxEngine {
 18 |     fn get_root_element(&self) -> UIElement {
 19 |         panic!("Linux implementation is not yet available")
 20 |     }
 21 | 
 22 |     fn get_focused_element(&self) -> Result<UIElement, AutomationError> {
 23 |         Err(AutomationError::UnsupportedPlatform(
 24 |             "Linux implementation is not yet available".to_string(),
 25 |         ))
 26 |     }
 27 | 
 28 |     fn get_applications(&self) -> Result<Vec<UIElement>, AutomationError> {
 29 |         Err(AutomationError::UnsupportedPlatform(
 30 |             "Linux implementation is not yet available".to_string(),
 31 |         ))
 32 |     }
 33 | 
 34 |     fn get_application_by_name(&self, _name: &str) -> Result<UIElement, AutomationError> {
 35 |         Err(AutomationError::UnsupportedPlatform(
 36 |             "Linux implementation is not yet available".to_string(),
 37 |         ))
 38 |     }
 39 | 
 40 |     fn find_element(
 41 |         &self,
 42 |         selector: &Selector,
 43 |         root: Option<&UIElement>,
 44 |     ) -> Result<UIElement, AutomationError> {
 45 |         Err(AutomationError::UnsupportedPlatform(
 46 |             "Linux implementation is not yet available".to_string(),
 47 |         ))
 48 |     }
 49 | 
 50 |     fn find_elements(
 51 |         &self,
 52 |         _selector: &Selector,
 53 |         _root: Option<&UIElement>,
 54 |     ) -> Result<Vec<UIElement>, AutomationError> {
 55 |         Err(AutomationError::UnsupportedPlatform(
 56 |             "Linux implementation is not yet available".to_string(),
 57 |         ))
 58 |     }
 59 | 
 60 |     fn open_application(&self, _app_name: &str) -> Result<UIElement, AutomationError> {
 61 |         Err(AutomationError::UnsupportedPlatform(
 62 |             "Linux implementation is not yet available".to_string(),
 63 |         ))
 64 |     }
 65 | 
 66 |     fn open_url(&self, _url: &str, _browser: Option<&str>) -> Result<UIElement, AutomationError> {
 67 |         Err(AutomationError::UnsupportedPlatform(
 68 |             "Linux implementation is not yet available".to_string(),
 69 |         ))
 70 |     }
 71 | }
 72 | 
 73 | // Placeholder LinuxUIElement that implements UIElementImpl
 74 | pub struct LinuxUIElement;
 75 | 
 76 | impl Debug for LinuxUIElement {
 77 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 78 |         f.debug_struct("LinuxUIElement").finish()
 79 |     }
 80 | }
 81 | 
 82 | impl UIElementImpl for LinuxUIElement {
 83 |     fn object_id(&self) -> usize {
 84 |         0
 85 |     }
 86 | 
 87 |     fn id(&self) -> Option<String> {
 88 |         None
 89 |     }
 90 | 
 91 |     fn role(&self) -> String {
 92 |         "".to_string()
 93 |     }
 94 | 
 95 |     fn attributes(&self) -> UIElementAttributes {
 96 |         UIElementAttributes {
 97 |             role: "".to_string(),
 98 |             label: None,
 99 |             value: None,
100 |             description: None,
101 |             properties: std::collections::HashMap::new(),
102 |         }
103 |     }
104 | 
105 |     fn children(&self) -> Result<Vec<UIElement>, AutomationError> {
106 |         Err(AutomationError::UnsupportedPlatform(
107 |             "Linux implementation is not yet available".to_string(),
108 |         ))
109 |     }
110 | 
111 |     fn parent(&self) -> Result<Option<UIElement>, AutomationError> {
112 |         Err(AutomationError::UnsupportedPlatform(
113 |             "Linux implementation is not yet available".to_string(),
114 |         ))
115 |     }
116 | 
117 |     fn bounds(&self) -> Result<(f64, f64, f64, f64), AutomationError> {
118 |         Err(AutomationError::UnsupportedPlatform(
119 |             "Linux implementation is not yet available".to_string(),
120 |         ))
121 |     }
122 | 
123 |     fn click(&self) -> Result<ClickResult, AutomationError> {
124 |         Err(AutomationError::UnsupportedPlatform(
125 |             "Linux implementation is not yet available".to_string(),
126 |         ))
127 |     }
128 | 
129 |     fn double_click(&self) -> Result<ClickResult, AutomationError> {
130 |         Err(AutomationError::UnsupportedPlatform(
131 |             "Linux implementation is not yet available".to_string(),
132 |         ))
133 |     }
134 | 
135 |     fn right_click(&self) -> Result<(), AutomationError> {
136 |         Err(AutomationError::UnsupportedPlatform(
137 |             "Linux implementation is not yet available".to_string(),
138 |         ))
139 |     }
140 | 
141 |     fn hover(&self) -> Result<(), AutomationError> {
142 |         Err(AutomationError::UnsupportedPlatform(
143 |             "Linux implementation is not yet available".to_string(),
144 |         ))
145 |     }
146 | 
147 |     fn focus(&self) -> Result<(), AutomationError> {
148 |         Err(AutomationError::UnsupportedPlatform(
149 |             "Linux implementation is not yet available".to_string(),
150 |         ))
151 |     }
152 | 
153 |     fn type_text(&self, _text: &str) -> Result<(), AutomationError> {
154 |         Err(AutomationError::UnsupportedPlatform(
155 |             "Linux implementation is not yet available".to_string(),
156 |         ))
157 |     }
158 | 
159 |     fn press_key(&self, _key: &str) -> Result<(), AutomationError> {
160 |         Err(AutomationError::UnsupportedPlatform(
161 |             "Linux implementation is not yet available".to_string(),
162 |         ))
163 |     }
164 | 
165 |     fn get_text(&self, max_depth: usize) -> Result<String, AutomationError> {
166 |         Err(AutomationError::UnsupportedPlatform(
167 |             "Linux implementation is not yet available".to_string(),
168 |         ))
169 |     }
170 | 
171 |     fn set_value(&self, _value: &str) -> Result<(), AutomationError> {
172 |         Err(AutomationError::UnsupportedPlatform(
173 |             "Linux implementation is not yet available".to_string(),
174 |         ))
175 |     }
176 | 
177 |     fn is_enabled(&self) -> Result<bool, AutomationError> {
178 |         Err(AutomationError::UnsupportedPlatform(
179 |             "Linux implementation is not yet available".to_string(),
180 |         ))
181 |     }
182 | 
183 |     fn is_visible(&self) -> Result<bool, AutomationError> {
184 |         Err(AutomationError::UnsupportedPlatform(
185 |             "Linux implementation is not yet available".to_string(),
186 |         ))
187 |     }
188 | 
189 |     fn is_focused(&self) -> Result<bool, AutomationError> {
190 |         Err(AutomationError::UnsupportedPlatform(
191 |             "Linux implementation is not yet available".to_string(),
192 |         ))
193 |     }
194 | 
195 |     fn perform_action(&self, _action: &str) -> Result<(), AutomationError> {
196 |         Err(AutomationError::UnsupportedPlatform(
197 |             "Linux implementation is not yet available".to_string(),
198 |         ))
199 |     }
200 | 
201 |     fn as_any(&self) -> &dyn std::any::Any {
202 |         self
203 |     }
204 | 
205 |     fn create_locator(&self, _selector: Selector) -> Result<Locator, AutomationError> {
206 |         Err(AutomationError::UnsupportedPlatform(
207 |             "Linux implementation is not yet available".to_string(),
208 |         ))
209 |     }
210 | 
211 |     fn clone_box(&self) -> Box<dyn UIElementImpl> {
212 |         Box::new(LinuxUIElement)
213 |     }
214 | }
215 | 


--------------------------------------------------------------------------------
/mcp-server-os-level/src/platforms/mod.rs:
--------------------------------------------------------------------------------
 1 | use crate::{AutomationError, Selector, UIElement};
 2 | 
 3 | /// The common trait that all platform-specific engines must implement
 4 | pub trait AccessibilityEngine: Send + Sync {
 5 |     /// Get the root UI element
 6 |     fn get_root_element(&self) -> UIElement;
 7 | 
 8 |     #[cfg(target_os = "windows")]
 9 |     fn get_element_by_id(&self, _id: &str) -> Result<UIElement, AutomationError>;
10 |     /// Get the currently focused element
11 |     fn get_focused_element(&self) -> Result<UIElement, AutomationError>;
12 | 
13 |     /// Get all running applications
14 |     fn get_applications(&self) -> Result<Vec<UIElement>, AutomationError>;
15 | 
16 |     /// Get application by name
17 |     fn get_application_by_name(&self, name: &str) -> Result<UIElement, AutomationError>;
18 | 
19 |     /// Find elements using a selector
20 |     fn find_element(
21 |         &self,
22 |         selector: &Selector,
23 |         root: Option<&UIElement>,
24 |     ) -> Result<UIElement, AutomationError>;
25 | 
26 |     /// Find all elements matching a selector
27 |     /// Default implementation returns an UnsupportedOperation error,
28 |     /// allowing platform-specific implementations to override as needed
29 |     fn find_elements(
30 |         &self,
31 |         selector: &Selector,
32 |         root: Option<&UIElement>,
33 |     ) -> Result<Vec<UIElement>, AutomationError>;
34 | 
35 |     /// Open an application by name
36 |     fn open_application(&self, app_name: &str) -> Result<UIElement, AutomationError>;
37 | 
38 |     /// Open a URL in a specified browser (or default if None)
39 |     fn open_url(&self, url: &str, browser: Option<&str>) -> Result<UIElement, AutomationError>;
40 | 
41 |     /// Convert to Any for downcasting
42 |     fn as_any(&self) -> &dyn std::any::Any;
43 | 
44 |     // //Scroll at a specific position on screen
45 |     // fn scroll_at_position(&self, x: f64, y: f64, direction: &str, amount: f64) -> Result<(), AutomationError> {
46 |     //     Err(AutomationError::UnsupportedOperation("scroll_at_position not implemented for this platform".to_string()))
47 |     // }
48 | 
49 |     // // Scroll at the current mouse position
50 |     // fn scroll_at_current_position(&self, direction: &str, amount: f64) -> Result<(), AutomationError> {
51 |     //     Err(AutomationError::UnsupportedOperation("scroll_at_current_position not implemented for this platform".to_string()))
52 |     // }
53 | }
54 | 
55 | #[cfg(target_os = "linux")]
56 | mod linux;
57 | #[cfg(target_os = "macos")]
58 | pub mod macos;
59 | #[cfg(target_os = "macos")]
60 | pub mod tree_search;
61 | #[cfg(target_os = "windows")]
62 | mod windows;
63 | 
64 | /// Create the appropriate engine for the current platform
65 | pub fn create_engine(
66 |     use_background_apps: bool,
67 |     activate_app: bool,
68 | ) -> Result<Box<dyn AccessibilityEngine>, AutomationError> {
69 |     #[cfg(target_os = "macos")]
70 |     {
71 |         return Ok(Box::new(macos::MacOSEngine::new(
72 |             use_background_apps,
73 |             activate_app,
74 |         )?));
75 |     }
76 |     #[cfg(target_os = "windows")]
77 |     {
78 |         return Ok(Box::new(windows::WindowsEngine::new(
79 |             use_background_apps,
80 |             activate_app,
81 |         )?));
82 |     }
83 |     #[cfg(target_os = "linux")]
84 |     {
85 |         return Ok(Box::new(linux::LinuxEngine::new(
86 |             use_background_apps,
87 |             activate_app,
88 |         )?));
89 |     }
90 |     #[cfg(not(any(target_os = "macos", target_os = "windows", target_os = "linux")))]
91 |     {
92 |         return Err(AutomationError::UnsupportedPlatform(
93 |             "Current platform is not supported".to_string(),
94 |         ));
95 |     }
96 | }
97 | 


--------------------------------------------------------------------------------
/mcp-server-os-level/src/platforms/tree_search.rs:
--------------------------------------------------------------------------------
  1 | /// TLDR: default TreeWalker does not traverse windows, so we need to traverse windows manually
  2 | use accessibility::{AXAttribute, AXUIElement, AXUIElementAttributes, Error};
  3 | use core_foundation::array::CFArray;
  4 | use core_foundation::base::TCFType;
  5 | use std::{
  6 |     cell::{Cell, RefCell},
  7 |     collections::HashSet,
  8 |     hash::{Hash, Hasher},
  9 |     thread,
 10 |     time::{Duration, Instant},
 11 | };
 12 | use tracing::debug;
 13 | 
 14 | pub trait TreeVisitor {
 15 |     fn enter_element(&self, element: &AXUIElement) -> TreeWalkerFlow;
 16 |     fn exit_element(&self, element: &AXUIElement);
 17 | }
 18 | 
 19 | pub struct TreeWalkerWithWindows {
 20 |     attr_children: AXAttribute<CFArray<AXUIElement>>,
 21 |     visited: RefCell<HashSet<AXUIElementWrapper>>,
 22 |     cycle_count: RefCell<usize>,
 23 | }
 24 | 
 25 | #[derive(Copy, Clone, PartialEq, Eq)]
 26 | pub enum TreeWalkerFlow {
 27 |     Continue,
 28 |     SkipSubtree,
 29 |     Exit,
 30 | }
 31 | 
 32 | impl Default for TreeWalkerWithWindows {
 33 |     fn default() -> Self {
 34 |         Self {
 35 |             attr_children: AXAttribute::children(),
 36 |             visited: RefCell::new(HashSet::new()),
 37 |             cycle_count: RefCell::new(0),
 38 |         }
 39 |     }
 40 | }
 41 | 
 42 | impl TreeWalkerWithWindows {
 43 |     pub fn new() -> Self {
 44 |         Self::default()
 45 |     }
 46 | 
 47 |     pub fn walk(&self, root: &AXUIElement, visitor: &dyn TreeVisitor) {
 48 |         let _ = self.walk_one(root, visitor);
 49 |     }
 50 | 
 51 |     fn walk_one(&self, root: &AXUIElement, visitor: &dyn TreeVisitor) -> TreeWalkerFlow {
 52 |         // Create wrapper for the element
 53 |         let element_wrapper = AXUIElementWrapper {
 54 |             element: root.clone(),
 55 |         };
 56 | 
 57 |         // Check if already visited
 58 |         if self.visited.borrow().contains(&element_wrapper) {
 59 |             // Increment cycle counter
 60 |             let mut count = self.cycle_count.borrow_mut();
 61 |             *count += 1;
 62 | 
 63 |             return TreeWalkerFlow::SkipSubtree;
 64 |         }
 65 | 
 66 |         // Mark as visited
 67 |         self.visited.borrow_mut().insert(element_wrapper);
 68 | 
 69 |         let mut flow = visitor.enter_element(root);
 70 | 
 71 |         // debug!(target: "operator", "Walking element: {:?}", root.role());
 72 | 
 73 |         if flow == TreeWalkerFlow::Continue {
 74 |             // First try to get windows (if this is an application element)
 75 |             let windows_result = root.windows();
 76 |             if let Ok(windows) = &windows_result {
 77 |                 for window in windows.iter() {
 78 |                     // debug!(target: "operator", "Walking window: {:?}", window.title());
 79 |                     let window_flow = self.walk_one(&window, visitor);
 80 |                     if window_flow == TreeWalkerFlow::Exit {
 81 |                         flow = window_flow;
 82 |                         break;
 83 |                     }
 84 |                 }
 85 |             }
 86 | 
 87 |             // TODO avoid duplicate main window walking
 88 |             // Try main window
 89 |             if flow != TreeWalkerFlow::Exit {
 90 |                 if let Ok(main_window) = root.main_window() {
 91 |                     // debug!(target: "operator", "Walking main window: {:?}", main_window.title());
 92 |                     let window_flow = self.walk_one(&main_window, visitor);
 93 |                     if window_flow == TreeWalkerFlow::Exit {
 94 |                         flow = window_flow;
 95 |                     }
 96 |                 }
 97 |             }
 98 | 
 99 |             // If we haven't exited yet, continue with regular children
100 |             if flow == TreeWalkerFlow::Continue {
101 |                 if let Ok(children) = root.attribute(&self.attr_children) {
102 |                     for child in children.into_iter() {
103 |                         let child_flow = self.walk_one(&child, visitor);
104 | 
105 |                         if child_flow == TreeWalkerFlow::Exit {
106 |                             flow = child_flow;
107 |                             break;
108 |                         }
109 |                     }
110 |                 }
111 |             }
112 |         }
113 | 
114 |         visitor.exit_element(root);
115 |         flow
116 |     }
117 | 
118 |     pub fn get_cycle_count(&self) -> usize {
119 |         *self.cycle_count.borrow()
120 |     }
121 | }
122 | 
123 | pub struct ElementFinderWithWindows {
124 |     root: AXUIElement,
125 |     implicit_wait: Option<Duration>,
126 |     predicate: Box<dyn Fn(&AXUIElement) -> bool>,
127 |     depth: Cell<usize>,
128 |     cached: RefCell<Option<AXUIElement>>,
129 | }
130 | 
131 | impl ElementFinderWithWindows {
132 |     pub fn new<F>(root: &AXUIElement, predicate: F, implicit_wait: Option<Duration>) -> Self
133 |     where
134 |         F: 'static + Fn(&AXUIElement) -> bool,
135 |     {
136 |         Self {
137 |             root: root.clone(),
138 |             predicate: Box::new(predicate),
139 |             implicit_wait,
140 |             depth: Cell::new(0),
141 |             cached: RefCell::new(None),
142 |         }
143 |     }
144 | 
145 |     pub fn find(&self) -> Result<AXUIElement, Error> {
146 |         if let Some(result) = &*self.cached.borrow() {
147 |             return Ok(result.clone());
148 |         }
149 | 
150 |         let mut deadline = Instant::now();
151 |         let walker = TreeWalkerWithWindows::new();
152 | 
153 |         if let Some(implicit_wait) = &self.implicit_wait {
154 |             deadline += *implicit_wait;
155 |         }
156 | 
157 |         loop {
158 |             if let Some(result) = &*self.cached.borrow() {
159 |                 return Ok(result.clone());
160 |             }
161 | 
162 |             walker.walk(&self.root, self);
163 |             let now = Instant::now();
164 | 
165 |             if now >= deadline {
166 |                 return Err(Error::NotFound);
167 |             } else {
168 |                 let time_left = deadline.saturating_duration_since(now);
169 |                 thread::sleep(std::cmp::min(time_left, Duration::from_millis(250)));
170 |             }
171 |         }
172 |     }
173 | }
174 | 
175 | const MAX_DEPTH: usize = 100;
176 | 
177 | impl TreeVisitor for ElementFinderWithWindows {
178 |     fn enter_element(&self, element: &AXUIElement) -> TreeWalkerFlow {
179 |         self.depth.set(self.depth.get() + 1);
180 | 
181 |         if (self.predicate)(element) {
182 |             self.cached.replace(Some(element.clone()));
183 |             return TreeWalkerFlow::Exit;
184 |         }
185 | 
186 |         if self.depth.get() > MAX_DEPTH {
187 |             TreeWalkerFlow::SkipSubtree
188 |         } else {
189 |             TreeWalkerFlow::Continue
190 |         }
191 |     }
192 | 
193 |     fn exit_element(&self, _element: &AXUIElement) {
194 |         self.depth.set(self.depth.get() - 1)
195 |     }
196 | }
197 | 
198 | pub struct ElementsCollectorWithWindows {
199 |     root: AXUIElement,
200 |     predicate: Box<dyn Fn(&AXUIElement) -> bool>,
201 |     depth: Cell<usize>,
202 |     matches: RefCell<Vec<AXUIElement>>,
203 |     max_results: Option<usize>,
204 |     max_depth: Option<usize>,
205 | }
206 | 
207 | impl ElementsCollectorWithWindows {
208 |     pub fn new<F>(root: &AXUIElement, predicate: F) -> Self
209 |     where
210 |         F: 'static + Fn(&AXUIElement) -> bool,
211 |     {
212 |         Self {
213 |             root: root.clone(),
214 |             predicate: Box::new(predicate),
215 |             depth: Cell::new(0),
216 |             matches: RefCell::new(Vec::new()),
217 |             max_results: None,
218 |             max_depth: None,
219 |         }
220 |     }
221 | 
222 |     pub fn with_limits(mut self, max_results: Option<usize>, max_depth: Option<usize>) -> Self {
223 |         self.max_results = max_results;
224 |         self.max_depth = max_depth;
225 |         self
226 |     }
227 | 
228 |     pub fn find_all(&self) -> Vec<AXUIElement> {
229 |         let walker = TreeWalkerWithWindows::new();
230 |         walker.walk(&self.root, self);
231 | 
232 |         // After traversal is done, log how many cycles were detected
233 |         let cycles = walker.get_cycle_count();
234 |         if cycles > 0 {
235 |             debug!(target: "operator", "UI traversal complete - detected {} cycles in the accessibility tree", cycles);
236 |         }
237 | 
238 |         self.matches.borrow().clone()
239 |     }
240 | 
241 |     pub fn with_max_results(self, max: Option<usize>) -> Self {
242 |         Self {
243 |             max_results: max,
244 |             ..self
245 |         }
246 |     }
247 | 
248 |     pub fn with_max_depth(self, max: Option<usize>) -> Self {
249 |         Self {
250 |             max_depth: max,
251 |             ..self
252 |         }
253 |     }
254 | }
255 | 
256 | impl TreeVisitor for ElementsCollectorWithWindows {
257 |     fn enter_element(&self, element: &AXUIElement) -> TreeWalkerFlow {
258 |         self.depth.set(self.depth.get() + 1);
259 | 
260 |         if let Some(max_depth) = self.max_depth {
261 |             if self.depth.get() > max_depth {
262 |                 return TreeWalkerFlow::SkipSubtree;
263 |             }
264 |         } else if self.depth.get() > MAX_DEPTH {
265 |             return TreeWalkerFlow::SkipSubtree;
266 |         }
267 | 
268 |         if (self.predicate)(element) {
269 |             self.matches.borrow_mut().push(element.clone());
270 | 
271 |             if let Some(max_results) = self.max_results {
272 |                 if self.matches.borrow().len() >= max_results {
273 |                     debug!(target: "operator", "Reached max_results limit of {}", max_results);
274 |                     return TreeWalkerFlow::Exit;
275 |                 }
276 |             }
277 |         }
278 | 
279 |         TreeWalkerFlow::Continue
280 |     }
281 | 
282 |     fn exit_element(&self, _element: &AXUIElement) {
283 |         self.depth.set(self.depth.get() - 1)
284 |     }
285 | }
286 | 
287 | // Add a wrapper struct similar to Swift
288 | struct AXUIElementWrapper {
289 |     element: AXUIElement,
290 | }
291 | 
292 | impl PartialEq for AXUIElementWrapper {
293 |     fn eq(&self, other: &Self) -> bool {
294 |         // Use Core Foundation's CFEqual for proper element comparison
295 |         unsafe {
296 |             let self_ref = self.element.as_concrete_TypeRef();
297 |             let other_ref = other.element.as_concrete_TypeRef();
298 | 
299 |             // CFEqual returns a Boolean (u8), convert to bool
300 |             core_foundation::base::CFEqual(self_ref as _, other_ref as _) != 0
301 |         }
302 |     }
303 | }
304 | 
305 | impl Eq for AXUIElementWrapper {}
306 | 
307 | impl Hash for AXUIElementWrapper {
308 |     fn hash<H: Hasher>(&self, state: &mut H) {
309 |         // Use Core Foundation's CFHash for consistent hashing
310 |         unsafe {
311 |             let element_ref = self.element.as_concrete_TypeRef();
312 |             let hash_value = core_foundation::base::CFHash(element_ref as _);
313 |             state.write_u64(hash_value as u64);
314 |         }
315 |     }
316 | }
317 | 


--------------------------------------------------------------------------------
/mcp-server-os-level/src/platforms/windows.rs:
--------------------------------------------------------------------------------
  1 | use crate::operator::element::UIElementImpl;
  2 | use crate::operator::platforms::AccessibilityEngine;
  3 | use crate::operator::{AutomationError, Locator, Selector, UIElement, UIElementAttributes};
  4 | use std::fmt::Debug;
  5 | use crate::operator::ClickResult;
  6 | 
  7 | pub struct WindowsEngine;
  8 | 
  9 | impl WindowsEngine {
 10 |     pub fn new(use_background_apps: bool, activate_app: bool) -> Result<Self, AutomationError> {
 11 |         Err(AutomationError::UnsupportedPlatform(
 12 |             "Windows implementation is not yet available".to_string(),
 13 |         ))
 14 |     }
 15 | }
 16 | 
 17 | impl AccessibilityEngine for WindowsEngine {
 18 |     fn get_root_element(&self) -> UIElement {
 19 |         panic!("Windows implementation is not yet available")
 20 |     }
 21 | 
 22 |     fn get_element_by_id(&self, _id: &str) -> Result<UIElement, AutomationError> {
 23 |         Err(AutomationError::UnsupportedPlatform(
 24 |             "Windows implementation is not yet available".to_string(),
 25 |         ))
 26 |     }
 27 | 
 28 |     fn get_focused_element(&self) -> Result<UIElement, AutomationError> {
 29 |         Err(AutomationError::UnsupportedPlatform(
 30 |             "Windows implementation is not yet available".to_string(),
 31 |         ))
 32 |     }
 33 | 
 34 |     fn get_applications(&self) -> Result<Vec<UIElement>, AutomationError> {
 35 |         Err(AutomationError::UnsupportedPlatform(
 36 |             "Windows implementation is not yet available".to_string(),
 37 |         ))
 38 |     }
 39 | 
 40 |     fn get_application_by_name(&self, _name: &str) -> Result<UIElement, AutomationError> {
 41 |         Err(AutomationError::UnsupportedPlatform(
 42 |             "Windows implementation is not yet available".to_string(),
 43 |         ))
 44 |     }
 45 | 
 46 |     fn find_elements(
 47 |         &self,
 48 |         _selector: &Selector,
 49 |         _root: Option<&UIElement>,
 50 |     ) -> Result<Vec<UIElement>, AutomationError> {
 51 |         Err(AutomationError::UnsupportedPlatform(
 52 |             "Windows implementation is not yet available".to_string(),
 53 |         ))
 54 |     }
 55 | 
 56 |     fn find_element(
 57 |         &self,
 58 |         selector: &Selector,
 59 |         root: Option<&UIElement>,
 60 |     ) -> Result<UIElement, AutomationError> {
 61 |         Err(AutomationError::UnsupportedPlatform(
 62 |             "Windows implementation is not yet available".to_string(),
 63 |         ))
 64 |     }
 65 | 
 66 |     fn open_application(&self, _app_name: &str) -> Result<UIElement, AutomationError> {
 67 |         Err(AutomationError::UnsupportedPlatform(
 68 |             "Windows implementation is not yet available".to_string(),
 69 |         ))
 70 |     }
 71 | 
 72 |     fn open_url(&self, _url: &str, _browser: Option<&str>) -> Result<UIElement, AutomationError> {
 73 |         Err(AutomationError::UnsupportedPlatform(
 74 |             "Windows implementation is not yet available".to_string(),
 75 |         ))
 76 |     }
 77 | }
 78 | 
 79 | // Placeholder WindowsUIElement that implements UIElementImpl
 80 | pub struct WindowsUIElement;
 81 | 
 82 | impl Debug for WindowsUIElement {
 83 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 84 |         f.debug_struct("WindowsUIElement").finish()
 85 |     }
 86 | }
 87 | 
 88 | impl UIElementImpl for WindowsUIElement {
 89 |     fn object_id(&self) -> usize {
 90 |         0
 91 |     }
 92 | 
 93 |     fn id(&self) -> Option<String> {
 94 |         None
 95 |     }
 96 | 
 97 |     fn role(&self) -> String {
 98 |         "".to_string()
 99 |     }
100 | 
101 |     fn attributes(&self) -> UIElementAttributes {
102 |         UIElementAttributes {
103 |             role: "".to_string(),
104 |             label: None,
105 |             value: None,
106 |             description: None,
107 |             properties: std::collections::HashMap::new(),
108 |         }
109 |     }
110 | 
111 |     fn children(&self) -> Result<Vec<UIElement>, AutomationError> {
112 |         Err(AutomationError::UnsupportedPlatform(
113 |             "Windows implementation is not yet available".to_string(),
114 |         ))
115 |     }
116 | 
117 |     fn parent(&self) -> Result<Option<UIElement>, AutomationError> {
118 |         Err(AutomationError::UnsupportedPlatform(
119 |             "Windows implementation is not yet available".to_string(),
120 |         ))
121 |     }
122 | 
123 |     fn bounds(&self) -> Result<(f64, f64, f64, f64), AutomationError> {
124 |         Err(AutomationError::UnsupportedPlatform(
125 |             "Windows implementation is not yet available".to_string(),
126 |         ))
127 |     }
128 | 
129 |     fn click(&self) -> Result<ClickResult, AutomationError> {
130 |         Err(AutomationError::UnsupportedPlatform(
131 |             "Windows implementation is not yet available".to_string(),
132 |         ))
133 |     }
134 | 
135 |     fn double_click(&self) -> Result<ClickResult, AutomationError> {
136 |         Err(AutomationError::UnsupportedPlatform(
137 |             "Windows implementation is not yet available".to_string(),
138 |         ))
139 |     }
140 | 
141 |     fn right_click(&self) -> Result<(), AutomationError> {
142 |         Err(AutomationError::UnsupportedPlatform(
143 |             "Windows implementation is not yet available".to_string(),
144 |         ))
145 |     }
146 | 
147 |     fn hover(&self) -> Result<(), AutomationError> {
148 |         Err(AutomationError::UnsupportedPlatform(
149 |             "Windows implementation is not yet available".to_string(),
150 |         ))
151 |     }
152 | 
153 |     fn focus(&self) -> Result<(), AutomationError> {
154 |         Err(AutomationError::UnsupportedPlatform(
155 |             "Windows implementation is not yet available".to_string(),
156 |         ))
157 |     }
158 | 
159 |     fn type_text(&self, _text: &str) -> Result<(), AutomationError> {
160 |         Err(AutomationError::UnsupportedPlatform(
161 |             "Windows implementation is not yet available".to_string(),
162 |         ))
163 |     }
164 | 
165 |     fn press_key(&self, _key: &str) -> Result<(), AutomationError> {
166 |         Err(AutomationError::UnsupportedPlatform(
167 |             "Windows implementation is not yet available".to_string(),
168 |         ))
169 |     }
170 | 
171 |     fn get_text(&self, max_depth: usize) -> Result<String, AutomationError> {
172 |         Err(AutomationError::UnsupportedPlatform(
173 |             "Windows implementation is not yet available".to_string(),
174 |         ))
175 |     }
176 | 
177 |     fn set_value(&self, _value: &str) -> Result<(), AutomationError> {
178 |         Err(AutomationError::UnsupportedPlatform(
179 |             "Windows implementation is not yet available".to_string(),
180 |         ))
181 |     }
182 | 
183 |     fn is_enabled(&self) -> Result<bool, AutomationError> {
184 |         Err(AutomationError::UnsupportedPlatform(
185 |             "Windows implementation is not yet available".to_string(),
186 |         ))
187 |     }
188 | 
189 |     fn is_visible(&self) -> Result<bool, AutomationError> {
190 |         Err(AutomationError::UnsupportedPlatform(
191 |             "Windows implementation is not yet available".to_string(),
192 |         ))
193 |     }
194 | 
195 |     fn is_focused(&self) -> Result<bool, AutomationError> {
196 |         Err(AutomationError::UnsupportedPlatform(
197 |             "Windows implementation is not yet available".to_string(),
198 |         ))
199 |     }
200 | 
201 |     fn perform_action(&self, _action: &str) -> Result<(), AutomationError> {
202 |         Err(AutomationError::UnsupportedPlatform(
203 |             "Windows implementation is not yet available".to_string(),
204 |         ))
205 |     }
206 | 
207 |     fn as_any(&self) -> &dyn std::any::Any {
208 |         self
209 |     }
210 | 
211 |     fn create_locator(&self, _selector: Selector) -> Result<Locator, AutomationError> {
212 |         Err(AutomationError::UnsupportedPlatform(
213 |             "Windows implementation is not yet available".to_string(),
214 |         ))
215 |     }
216 | 
217 |     fn clone_box(&self) -> Box<dyn UIElementImpl> {
218 |         Box::new(WindowsUIElement)
219 |     }
220 | }
221 | 


--------------------------------------------------------------------------------
/mcp-server-os-level/src/selector.rs:
--------------------------------------------------------------------------------
 1 | use std::collections::BTreeMap;
 2 | 
 3 | /// Represents ways to locate a UI element
 4 | #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 5 | pub enum Selector {
 6 |     /// Select by role and optional name
 7 |     Role { role: String, name: Option<String> },
 8 |     /// Select by accessibility ID
 9 |     Id(String),
10 |     /// Select by name/label
11 |     Name(String),
12 |     /// Select by text content
13 |     Text(String),
14 |     /// Select using XPath-like query
15 |     Path(String),
16 |     /// Select by multiple attributes (key-value pairs)
17 |     Attributes(BTreeMap<String, String>),
18 |     /// Filter current elements by a predicate
19 |     Filter(usize), // Uses an ID to reference a filter predicate stored separately
20 |     /// Chain multiple selectors
21 |     Chain(Vec<Selector>),
22 | }
23 | 
24 | impl From<&str> for Selector {
25 |     fn from(s: &str) -> Self {
26 |         // Make common UI roles like "window", "button", etc. default to Role selectors
27 |         // instead of Name selectors
28 |         match s {
29 |             "window" | "button" | "checkbox" | "menu" | "menuitem" | "menubar" | "textfield"
30 |             | "input" => Selector::Role {
31 |                 role: s.to_string(),
32 |                 name: None,
33 |             },
34 |             // starts with AX
35 |             _ if s.starts_with("AX") => Selector::Role {
36 |                 role: s.to_string(),
37 |                 name: None,
38 |             },
39 |             _ if s.contains(':') => {
40 |                 let parts: Vec<&str> = s.splitn(2, ':').collect();
41 |                 Selector::Role {
42 |                     role: parts[0].to_string(),
43 |                     name: Some(parts[1].to_string()),
44 |                 }
45 |             }
46 |             _ if s.starts_with('#') => Selector::Id(s[1..].to_string()),
47 |             _ if s.starts_with('/') => Selector::Path(s.to_string()),
48 |             _ if s.starts_with("text:") => Selector::Text(s[5..].to_string()),
49 |             _ => Selector::Name(s.to_string()),
50 |         }
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/mcp-server-os-level/src/tests.rs:
--------------------------------------------------------------------------------
  1 | use tracing_subscriber::prelude::*;
  2 | 
  3 | #[cfg(test)]
  4 | mod tests {
  5 |     use super::*;
  6 |     use tracing_subscriber::{filter::LevelFilter, fmt, EnvFilter};
  7 | 
  8 |     #[cfg(target_os = "macos")]
  9 |     mod macos_tests {
 10 |         use serde_json::Value;
 11 | 
 12 |         use crate::Desktop;
 13 | 
 14 |         use super::*;
 15 | 
 16 |         // Setup tracing for tests
 17 |         fn setup_tracing() {
 18 |             let filter = EnvFilter::from_default_env()
 19 |                 .add_directive(LevelFilter::DEBUG.into())
 20 |                 .add_directive("operator=debug".parse().unwrap());
 21 | 
 22 |             tracing_subscriber::registry()
 23 |                 .with(fmt::layer())
 24 |                 .with(filter)
 25 |                 .try_init()
 26 |                 .unwrap_or_default();
 27 |         }
 28 | 
 29 |         #[test]
 30 |         #[ignore]
 31 | 
 32 |         fn test_find_buttons_in_iphone_mirroring() {
 33 |             setup_tracing();
 34 | 
 35 |             // Create a desktop automation instance
 36 |             let desktop = match Desktop::new(true, false) {
 37 |                 Ok(d) => {
 38 |                     println!("Successfully created Desktop automation");
 39 |                     d
 40 |                 }
 41 |                 Err(e) => {
 42 |                     println!("Failed to create Desktop automation: {:?}", e);
 43 |                     return;
 44 |                 }
 45 |             };
 46 | 
 47 |             let app = match desktop.application("Cursor") {
 48 |                 Ok(w) => w,
 49 |                 Err(e) => {
 50 |                     println!("Failed to find application: {:?}", e);
 51 |                     return;
 52 |                 }
 53 |             };
 54 |             println!("App: {:?}", app.attributes().label);
 55 | 
 56 |             let windows = app.locator("window").unwrap().all().unwrap_or_default();
 57 |             println!("Found {} windows", windows.len());
 58 | 
 59 |             // Print the window hierarchy to understand the structure
 60 |             println!("\n===== WINDOW HIERARCHY =====");
 61 |             if let Ok(children) = app.children() {
 62 |                 println!("App has {} direct children", children.len());
 63 |                 for (i, child) in children.iter().enumerate() {
 64 |                     println!(
 65 |                         "Child #{}: role={}, label={:?}, description={:?}",
 66 |                         i,
 67 |                         child.role(),
 68 |                         child.attributes().label,
 69 |                         child.attributes().description
 70 |                     );
 71 | 
 72 |                     // Print the next level down to see buttons
 73 |                     if let Ok(grandchildren) = child.children() {
 74 |                         println!("  Has {} children", grandchildren.len());
 75 |                         for (j, grandchild) in grandchildren.iter().enumerate() {
 76 |                             println!(
 77 |                                 "  Grandchild #{}.{}: role={}, label={:?}, description={:?}",
 78 |                                 i,
 79 |                                 j,
 80 |                                 grandchild.role(),
 81 |                                 grandchild.attributes().label,
 82 |                                 grandchild.attributes().description
 83 |                             );
 84 | 
 85 |                             // Try one more level
 86 |                             if let Ok(great_grandchildren) = grandchild.children() {
 87 |                                 println!("    Has {} children", great_grandchildren.len());
 88 |                                 for (k, ggc) in great_grandchildren.iter().take(5).enumerate() {
 89 |                                     println!(
 90 |                                         "    Great-grandchild #{}.{}.{}: role={}, label={:?}",
 91 |                                         i,
 92 |                                         j,
 93 |                                         k,
 94 |                                         ggc.role(),
 95 |                                         ggc.attributes().label
 96 |                                     );
 97 |                                 }
 98 |                                 if great_grandchildren.len() > 5 {
 99 |                                     println!("    ... and {} more", great_grandchildren.len() - 5);
100 |                                 }
101 |                             }
102 |                         }
103 |                     }
104 |                 }
105 |             }
106 | 
107 |             // Find buttons in the application window
108 |             println!("\n===== BUTTON SEARCH RESULTS =====");
109 |             let buttons = match app.locator("button") {
110 |                 Ok(locator) => locator.all().unwrap_or_default(),
111 |                 Err(_) => Vec::new(),
112 |             };
113 |             println!("Found {} buttons via locator API", buttons.len());
114 | 
115 |             // Print details about each button by type
116 |             let mut ax_button_count = 0;
117 |             let mut ax_menu_item_count = 0;
118 |             let mut ax_menu_bar_item_count = 0;
119 |             let mut ax_static_text_count = 0;
120 |             let mut ax_image_count = 0;
121 |             let mut other_count = 0;
122 | 
123 |             for (i, button) in buttons.iter().enumerate() {
124 |                 let button_type = if let Some(props) = button.attributes().properties.get("AXRole")
125 |                 {
126 |                     let props_str = props.clone();
127 |                     props_str.unwrap_or_default()
128 |                 } else {
129 |                     Value::String("unknown".to_string())
130 |                 };
131 | 
132 |                 println!(
133 |                     "Button #{}: type={}, role={}, label={:?}, description={:?}",
134 |                     i,
135 |                     button_type,
136 |                     button.role(),
137 |                     button.attributes().label,
138 |                     button.attributes().description
139 |                 );
140 | 
141 |                 // if description is "Rust" then click it
142 |                 if button.attributes().description == Some("Rust".to_string()) {
143 |                     match button.click() {
144 |                         Ok(_) => println!("Clicked button: {:?}", button.attributes().label),
145 |                         Err(e) => println!("Failed to click button: {:?}", e),
146 |                     }
147 |                 }
148 | 
149 |                 // Count by type
150 |                 match button_type.as_str() {
151 |                     Some("AXButton") => ax_button_count += 1,
152 |                     Some("AXMenuItem") => ax_menu_item_count += 1,
153 |                     Some("AXMenuBarItem") => ax_menu_bar_item_count += 1,
154 |                     Some("AXStaticText") => ax_static_text_count += 1,
155 |                     Some("AXImage") => ax_image_count += 1,
156 |                     _ => other_count += 1,
157 |                 }
158 |             }
159 | 
160 |             // Print summary of button types
161 |             println!("\n===== BUTTON TYPE SUMMARY =====");
162 |             println!("AXButton: {}", ax_button_count);
163 |             println!("AXMenuItem: {}", ax_menu_item_count);
164 |             println!("AXMenuBarItem: {}", ax_menu_bar_item_count);
165 |             println!("AXStaticText: {}", ax_static_text_count);
166 |             println!("AXImage: {}", ax_image_count);
167 |             println!("Other: {}", other_count);
168 |             println!("Total: {}", buttons.len());
169 | 
170 |             // Make sure we found at least some buttons
171 |             assert!(buttons.len() > 0, "No buttons found in iPhone Mirroring");
172 | 
173 |             // Check that we found the standard menu bar items
174 |             assert_eq!(
175 |                 ax_menu_bar_item_count, 6,
176 |                 "Should find exactly 6 menu bar items"
177 |             );
178 |         }
179 | 
180 |         #[test]
181 |         #[ignore]
182 |         fn test_find_and_fill_text_inputs() {
183 |             setup_tracing();
184 | 
185 |             // Create a desktop automation instance
186 |             let desktop = match Desktop::new(true, false) {
187 |                 Ok(d) => {
188 |                     println!("Successfully created Desktop automation");
189 |                     d
190 |                 }
191 |                 Err(e) => {
192 |                     println!("Failed to create Desktop automation: {:?}", e);
193 |                     return;
194 |                 }
195 |             };
196 | 
197 |             let app = desktop.application("Arc").unwrap();
198 | 
199 |             let children = app.children().unwrap();
200 | 
201 |             println!("App children: {:?}", children.len());
202 | 
203 |             for (i, child) in children.iter().enumerate() {
204 |                 println!("App child #{}: {:?}", i, child.role());
205 |             }
206 | 
207 |             let input = app.locator("window").unwrap().first().unwrap_or_default();
208 |             println!("found input: {:?}", input.is_some());
209 |             println!("found input: {:?}", input.unwrap().text(10).unwrap());
210 |         }
211 | 
212 |         #[test]
213 |         #[ignore]
214 |         fn test_find_and_fill_text_inputsv2() {
215 |             setup_tracing();
216 | 
217 |             // Create a desktop automation instance
218 |             let desktop = match Desktop::new(true, true) {
219 |                 Ok(d) => {
220 |                     println!("Successfully created Desktop automation");
221 |                     d
222 |                 }
223 |                 Err(e) => {
224 |                     println!("Failed to create Desktop automation: {:?}", e);
225 |                     return;
226 |                 }
227 |             };
228 | 
229 |             let app = desktop.application("Arc").unwrap();
230 | 
231 |             let children = app.children().unwrap();
232 | 
233 |             println!("App children: {:?}", children.len());
234 | 
235 |             for (i, child) in children.iter().enumerate() {
236 |                 println!("App child #{}: {:?}", i, child.role());
237 |             }
238 | 
239 |             let buttons = app.locator("AXButton").unwrap().all().unwrap_or_default();
240 |             for b in buttons {
241 |                 println!("b: {:?}", b.role());
242 |                 println!("b: {:?}", b.attributes().label);
243 |                 let text = b.text(4).unwrap_or_default();
244 |                 println!("b: {:?}", text);
245 |                 if text.contains("Click") {
246 |                     println!("clicking");
247 |                     let _ = b.type_text("foo");
248 |                     b.focus().unwrap();
249 |                     if let Err(e) = b.click() {
250 |                         println!("failed to click: {:?}", e);
251 |                     }
252 |                 }
253 |             }
254 |             // input.focus().err().unwrap();
255 |             // let text = input.text(10).unwrap();
256 |             // println!("text: {:?}", text);
257 | 
258 |             // let children = input.children().unwrap();
259 |             // println!("children: {:?}", children.len());
260 |         }
261 |     }
262 | }
263 | 


--------------------------------------------------------------------------------