├── .build └── arm64-apple-macosx │ └── debug │ └── mcp-server-macos-use ├── .gitignore ├── Package.swift ├── Package.resolved ├── LICENSE ├── README.md └── Sources └── main.swift /.build/arm64-apple-macosx/debug/mcp-server-macos-use: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mediar-ai/mcp-server-macos-use/HEAD/.build/arm64-apple-macosx/debug/mcp-server-macos-use -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | /.build 3 | .build 4 | # But DO track the specific executable 5 | !/.build/debug/mcp-server-macos-use 6 | /Packages 7 | xcuserdata/ 8 | DerivedData/ 9 | .swiftpm/configuration/registries.json 10 | .swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata 11 | .netrc 12 | -------------------------------------------------------------------------------- /Package.swift: -------------------------------------------------------------------------------- 1 | // swift-tools-version: 5.9 2 | // The swift-tools-version declares the minimum version of Swift required to build this package. 3 | 4 | import PackageDescription 5 | 6 | let package = Package( 7 | name: "mcp-server-macos-use", 8 | platforms: [ 9 | .macOS(.v13) 10 | ], 11 | dependencies: [ 12 | .package(url: "https://github.com/modelcontextprotocol/swift-sdk.git", from: "0.7.1"), 13 | .package(url: "https://github.com/mediar-ai/MacosUseSDK.git", branch: "main") 14 | ], 15 | targets: [ 16 | // Targets are the basic building blocks of a package, defining a module or a test suite. 17 | // Targets can depend on other targets in this package and products from dependencies. 18 | .executableTarget( 19 | name: "mcp-server-macos-use", 20 | dependencies: [ 21 | .product(name: "MCP", package: "swift-sdk"), 22 | .product(name: "MacosUseSDK", package: "MacosUseSDK") 23 | ] 24 | ), 25 | ] 26 | ) 27 | -------------------------------------------------------------------------------- /Package.resolved: -------------------------------------------------------------------------------- 1 | { 2 | "pins" : [ 3 | { 4 | "identity" : "eventsource", 5 | "kind" : "remoteSourceControl", 6 | "location" : "https://github.com/mattt/eventsource.git", 7 | "state" : { 8 | "revision" : "07957602bb99a5355c810187e66e6ce378a1057d", 9 | "version" : "1.1.1" 10 | } 11 | }, 12 | { 13 | "identity" : "macosusesdk", 14 | "kind" : "remoteSourceControl", 15 | "location" : "https://github.com/mediar-ai/MacosUseSDK.git", 16 | "state" : { 17 | "branch" : "main", 18 | "revision" : "5d810b557dc73f8fb930767671b0cfcd989a23e7" 19 | } 20 | }, 21 | { 22 | "identity" : "swift-log", 23 | "kind" : "remoteSourceControl", 24 | "location" : "https://github.com/apple/swift-log.git", 25 | "state" : { 26 | "revision" : "ce592ae52f982c847a4efc0dd881cc9eb32d29f2", 27 | "version" : "1.6.4" 28 | } 29 | }, 30 | { 31 | "identity" : "swift-sdk", 32 | "kind" : "remoteSourceControl", 33 | "location" : "https://github.com/modelcontextprotocol/swift-sdk.git", 34 | "state" : { 35 | "revision" : "f1b50e6de22b5206068bb09851d585f560d893c4", 36 | "version" : "0.10.1" 37 | } 38 | }, 39 | { 40 | "identity" : "swift-system", 41 | "kind" : "remoteSourceControl", 42 | "location" : "https://github.com/apple/swift-system.git", 43 | "state" : { 44 | "revision" : "890830fff1a577dc83134890c7984020c5f6b43b", 45 | "version" : "1.6.2" 46 | } 47 | } 48 | ], 49 | "version" : 2 50 | } 51 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Business Source License 1.1 2 | 3 | TERMS 4 | 5 | The Licensor hereby grants you the right to copy, modify, create derivative works, redistribute, and make non-production use of the Licensed Work. The Licensor may make an Additional Use Grant, above, permitting limited production use. 6 | 7 | Effective on the Change Date, or the fourth anniversary of the first publicly available distribution of a specific version of the Licensed Work under this License, whichever comes first, the Licensor hereby grants you rights under the terms of the Change License, and the rights granted in the paragraph above terminate. 8 | 9 | If your use of the Licensed Work does not comply with the requirements currently in effect as described in this License, you must purchase a commercial license from the Licensor, its affiliated entities, or authorized resellers, or you must refrain from using the Licensed Work. 10 | 11 | All copies of the original and modified Licensed Work, and derivative works of the Licensed Work, are subject to this License. This License applies separately for each version of the Licensed Work and the Change Date may vary for each version of the Licensed Work released by Licensor. 12 | 13 | You must conspicuously display this License on each original or modified copy of the Licensed Work. If you receive the Licensed Work in original or modified form from a third party, the terms and conditions set forth in this License apply to your use of that work. 14 | 15 | Any use of the Licensed Work in violation of this License will automatically terminate your rights under this License for the current and all other versions of the Licensed Work. 16 | 17 | This License does not grant you any right in any trademark or logo of Licensor or its affiliates (provided that you may use a trademark or logo of Licensor as expressly required by this License). 18 | 19 | TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND TITLE. 20 | 21 | MariaDB hereby grants you permission to use this License's text to license your works, and to refer to it using the trademark "Business Source License", as long as you comply with the Covenants of Licensor below. 22 | 23 | COVENANTS OF LICENSOR 24 | 25 | In consideration of the right to use this License's text and the "Business Source License" name and trademark, Licensor covenants to MariaDB, and to all other recipients of the licensed work to be provided by Licensor: 26 | 27 | 1. To specify as the Change License the GPL Version 2.0 or any later version, or a license that is compatible with GPL Version 2.0 or a later version, where "compatible" means that software provided under the Change License can be included in a program with software provided under GPL Version 2.0 or a later version. Licensor may specify additional Change Licenses without limitation. 28 | 2. To either: (a) specify an additional grant of rights to use that does not impose any additional restriction on the right granted in this License, as the Additional Use Grant; or (b) insert the text "None". 29 | 3. Not to modify this License in any other way. 30 | 31 | ------------------------- 32 | 33 | Licensed Work: screenpipe Computer Agent 34 | Licensor: Mediar, Inc. 35 | Additional Use Grant: Production use is permitted for non-commercial, educational purposes only 36 | Change Date: April 9, 2028 37 | Change License: MIT License -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mcp-server-macos-use 2 | 3 | Model Context Protocol (MCP) server in Swift. It allows controlling macOS applications by leveraging the accessibility APIs, primarily through the `MacosUseSDK`. 4 | 5 | You can use it in Claude Desktop or other compatible MCP-client. 6 | 7 | The server listens for MCP commands over standard input/output (`stdio`) and exposes several tools to interact with applications. 8 | 9 | 10 | https://github.com/user-attachments/assets/b43622a3-3d20-4026-b02f-e9add06afe2b 11 | 12 | ## Available Tools 13 | 14 | The server exposes the following tools via the `CallTool` MCP method: 15 | 16 | 1. **`macos-use_open_application_and_traverse`** 17 | * **Description:** Opens or activates a specified application and then traverses its accessibility tree. 18 | * **Parameters:** 19 | * `identifier` (String, Required): The application's name, bundle ID, or file path. 20 | 21 | 2. **`macos-use_click_and_traverse`** 22 | * **Description:** Simulates a mouse click at specific coordinates within the window of the target application (identified by PID) and then traverses its accessibility tree. 23 | * **Parameters:** 24 | * `pid` (Number, Required): The Process ID (PID) of the target application. 25 | * `x` (Number, Required): The X-coordinate for the click (relative to the window/screen, depending on SDK behavior). 26 | * `y` (Number, Required): The Y-coordinate for the click. 27 | 28 | 3. **`macos-use_type_and_traverse`** 29 | * **Description:** Simulates typing text into the target application (identified by PID) and then traverses its accessibility tree. 30 | * **Parameters:** 31 | * `pid` (Number, Required): The Process ID (PID) of the target application. 32 | * `text` (String, Required): The text to be typed. 33 | 34 | 4. **`macos-use_press_key_and_traverse`** 35 | * **Description:** Simulates pressing a specific keyboard key (e.g., 'Enter', 'Tab', 'a', 'B') with optional modifier keys held down, targeting the application specified by PID, and then traverses its accessibility tree. 36 | * **Parameters:** 37 | * `pid` (Number, Required): The Process ID (PID) of the target application. 38 | * `keyName` (String, Required): The name of the key (e.g., `Return`, `Escape`, `ArrowUp`, `Delete`, `a`, `B`). Case-sensitive for letters if no modifiers are active. 39 | * `modifierFlags` (Array, Optional): An array of modifier keys to hold during the press. Valid values: `CapsLock` (or `Caps`), `Shift`, `Control` (or `Ctrl`), `Option` (or `Opt`, `Alt`), `Command` (or `Cmd`), `Function` (or `Fn`), `NumericPad` (or `Numpad`), `Help`. 40 | 41 | 5. **`macos-use_refresh_traversal`** 42 | * **Description:** Only performs the accessibility tree traversal for the specified application (identified by PID). Useful for getting the current UI state without performing an action. 43 | * **Parameters:** 44 | * `pid` (Number, Required): The Process ID (PID) of the application to traverse. 45 | 46 | **Common Optional Parameters (for `CallTool`)** 47 | 48 | These can potentially be passed in the `arguments` object for any tool call to override default `MacosUseSDK` behavior (refer to `ActionOptions` in the code): 49 | 50 | * `traverseBefore` (Boolean, Optional): Traverse accessibility tree before the primary action. 51 | * `traverseAfter` (Boolean, Optional): Traverse accessibility tree after the primary action (usually defaults to true). 52 | * `showDiff` (Boolean, Optional): Include a diff between traversals (if applicable). 53 | * `onlyVisibleElements` (Boolean, Optional): Limit traversal to visible elements. 54 | * `showAnimation` (Boolean, Optional): Show visual feedback animation for actions. 55 | * `animationDuration` (Number, Optional): Duration of the feedback animation. 56 | * `delayAfterAction` (Number, Optional): Add a delay after performing the action. 57 | 58 | ## Dependencies 59 | 60 | * `MacosUseSDK` (Assumed local or external Swift package providing macOS control functionality) 61 | 62 | ## Building and Running 63 | 64 | ```bash 65 | # Example build command (adjust as needed, use 'debug' for development) 66 | swift build -c debug # Or 'release' for production 67 | 68 | # Running the server (it communicates via stdin/stdout) 69 | ./.build/debug/mcp-server-macos-use 70 | ``` 71 | 72 | **Integrating with Clients (Example: Claude Desktop)** 73 | 74 | Once built, you need to tell your client application where to find the server executable. For example, to configure Claude Desktop, you might add the following to its configuration: 75 | 76 | ```json 77 | { 78 | "mcpServers": { 79 | "mcp-server-macos-use": { 80 | "command": "/path/to/your/project/mcp-server-macos-use/.build/debug/mcp-server-macos-use" 81 | } 82 | } 83 | } 84 | ``` 85 | 86 | *Replace `/path/to/your/project/` with the actual absolute path to your `mcp-server-macos-use` directory.* 87 | 88 | ## Help 89 | 90 | Reach out to matt@mediar.ai 91 | Discord: m13v_ 92 | 93 | 94 | ## Plans 95 | 96 | Happy to tailor the server for your needs, feel free to open an issue or reach out 97 | -------------------------------------------------------------------------------- /Sources/main.swift: -------------------------------------------------------------------------------- 1 | import MCP 2 | import Foundation 3 | import CoreGraphics // Still needed for CGPoint, CGEventFlags 4 | import MacosUseSDK // <-- Import the SDK 5 | 6 | // --- Helper to serialize Swift structs to JSON String --- 7 | func serializeToJsonString(_ value: T) -> String? { 8 | let encoder = JSONEncoder() 9 | // Use pretty printing for easier debugging of the output if needed 10 | encoder.outputFormatting = [.prettyPrinted, .sortedKeys, .withoutEscapingSlashes] 11 | do { 12 | let jsonData = try encoder.encode(value) 13 | return String(data: jsonData, encoding: .utf8) 14 | } catch { 15 | fputs("error: serializeToJsonString: failed to encode value to JSON: \(error)\n", stderr) 16 | return nil 17 | } 18 | } 19 | 20 | // --- Function to get arguments from MCP Value --- 21 | // Helper to extract typed values safely 22 | func getRequiredString(from args: [String: Value]?, key: String) throws -> String { 23 | guard let val = args?[key]?.stringValue else { 24 | throw MCPError.invalidParams("Missing or invalid required string argument: '\(key)'") 25 | } 26 | return val 27 | } 28 | 29 | func getRequiredDouble(from args: [String: Value]?, key: String) throws -> Double { 30 | guard let value = args?[key] else { 31 | throw MCPError.invalidParams("Missing required number argument: '\(key)'") 32 | } 33 | switch value { 34 | case .int(let intValue): 35 | fputs("log: getRequiredDouble: converting int \(intValue) to double for key '\(key)'\n", stderr) 36 | return Double(intValue) 37 | case .double(let doubleValue): 38 | return doubleValue 39 | default: 40 | throw MCPError.invalidParams("Invalid type for required number argument: '\(key)', expected Int or Double, got \(value)") 41 | } 42 | } 43 | 44 | func getRequiredInt(from args: [String: Value]?, key: String) throws -> Int { 45 | guard let value = args?[key] else { 46 | throw MCPError.invalidParams("Missing required integer argument: '\(key)'") 47 | } 48 | // Allow conversion from Double if it's an exact integer 49 | if let doubleValue = value.doubleValue { 50 | if let intValue = Int(exactly: doubleValue) { 51 | fputs("log: getRequiredInt: converting exact double \(doubleValue) to int for key '\(key)'\n", stderr) 52 | return intValue 53 | } else { 54 | fputs("warning: getRequiredInt: received non-exact double \(doubleValue) for key '\(key)', expecting integer.\n", stderr) 55 | throw MCPError.invalidParams("Invalid type for required integer argument: '\(key)', received non-exact Double \(doubleValue)") 56 | } 57 | } 58 | // Otherwise, require it to be an Int directly 59 | guard let intValue = value.intValue else { 60 | throw MCPError.invalidParams("Invalid type for required integer argument: '\(key)', expected Int or exact Double, got \(value)") 61 | } 62 | return intValue 63 | } 64 | 65 | 66 | // --- Get Optional arguments --- 67 | // Helper for optional values 68 | func getOptionalDouble(from args: [String: Value]?, key: String) throws -> Double? { 69 | guard let value = args?[key] else { return nil } // Key not present is valid for optional 70 | if value.isNull { return nil } // Explicit null is also valid 71 | switch value { 72 | case .int(let intValue): 73 | fputs("log: getOptionalDouble: converting int \(intValue) to double for key '\(key)'\n", stderr) 74 | return Double(intValue) 75 | case .double(let doubleValue): 76 | return doubleValue 77 | default: 78 | throw MCPError.invalidParams("Invalid type for optional number argument: '\(key)', expected Int or Double, got \(value)") 79 | } 80 | } 81 | 82 | func getOptionalInt(from args: [String: Value]?, key: String) throws -> Int? { 83 | guard let value = args?[key] else { return nil } // Key not present is valid for optional 84 | if value.isNull { return nil } // Explicit null is also valid 85 | 86 | if let doubleValue = value.doubleValue { 87 | if let intValue = Int(exactly: doubleValue) { 88 | fputs("log: getOptionalInt: converting exact double \(doubleValue) to int for key '\(key)'\n", stderr) 89 | return intValue 90 | } else { 91 | fputs("warning: getOptionalInt: received non-exact double \(doubleValue) for key '\(key)', expecting integer.\n", stderr) 92 | throw MCPError.invalidParams("Invalid type for optional integer argument: '\(key)', received non-exact Double \(doubleValue)") 93 | } 94 | } 95 | guard let intValue = value.intValue else { 96 | throw MCPError.invalidParams("Invalid type for optional integer argument: '\(key)', expected Int or exact Double, got \(value)") 97 | } 98 | return intValue 99 | } 100 | 101 | func getOptionalBool(from args: [String: Value]?, key: String) throws -> Bool? { 102 | guard let value = args?[key] else { return nil } // Key not present 103 | if value.isNull { return nil } // Explicit null 104 | guard let boolValue = value.boolValue else { 105 | throw MCPError.invalidParams("Invalid type for optional boolean argument: '\(key)', expected Bool, got \(value)") 106 | } 107 | return boolValue 108 | } 109 | 110 | // --- NEW Helper to parse modifier flags --- 111 | func parseFlags(from value: Value?) throws -> CGEventFlags { 112 | guard let arrayValue = value?.arrayValue else { 113 | // No flags provided or not an array, return empty flags 114 | return [] 115 | } 116 | 117 | var flags: CGEventFlags = [] 118 | for flagValue in arrayValue { 119 | guard let flagString = flagValue.stringValue else { 120 | throw MCPError.invalidParams("Invalid modifierFlags array: contains non-string element \(flagValue)") 121 | } 122 | switch flagString.lowercased() { 123 | // Standard modifiers 124 | case "capslock", "caps": flags.insert(.maskAlphaShift) 125 | case "shift": flags.insert(.maskShift) 126 | case "control", "ctrl": flags.insert(.maskControl) 127 | case "option", "opt", "alt": flags.insert(.maskAlternate) 128 | case "command", "cmd": flags.insert(.maskCommand) 129 | // Other potentially useful flags 130 | case "help": flags.insert(.maskHelp) 131 | case "function", "fn": flags.insert(.maskSecondaryFn) 132 | case "numericpad", "numpad": flags.insert(.maskNumericPad) 133 | // Non-keyed state (less common for press simulation) 134 | // case "noncoalesced": flags.insert(.maskNonCoalesced) 135 | default: 136 | fputs("warning: parseFlags: unknown modifier flag string '\(flagString)', ignoring.\n", stderr) 137 | // Optionally throw an error: 138 | // throw MCPError.invalidParams("Unknown modifier flag: '\(flagString)'") 139 | } 140 | } 141 | return flags 142 | } 143 | 144 | // Async helper function to set up and start the server 145 | func setupAndStartServer() async throws -> Server { 146 | fputs("log: setupAndStartServer: entering function.\n", stderr) 147 | 148 | // --- Define Schemas and Tools for Simplified Actions --- 149 | // (Schemas remain the same as they define the MCP interface) 150 | let openAppSchema: Value = .object([ 151 | "type": .string("object"), 152 | "properties": .object([ 153 | "identifier": .object(["type": .string("string"), "description": .string("REQUIRED. App name, path, or bundle ID.")]) 154 | ]), 155 | "required": .array([.string("identifier")]) 156 | ]) 157 | let openAppTool = Tool( 158 | name: "macos-use_open_application_and_traverse", 159 | description: "Opens/activates an application and then traverses its accessibility tree.", 160 | inputSchema: openAppSchema 161 | ) 162 | 163 | let clickSchema: Value = .object([ 164 | "type": .string("object"), 165 | "properties": .object([ 166 | "pid": .object(["type": .string("number"), "description": .string("REQUIRED. PID of the target application window.")]), 167 | "x": .object(["type": .string("number"), "description": .string("REQUIRED. X coordinate for the click.")]), 168 | "y": .object(["type": .string("number"), "description": .string("REQUIRED. Y coordinate for the click.")]) 169 | // Add optional options here if needed later 170 | ]), 171 | "required": .array([.string("pid"), .string("x"), .string("y")]) 172 | ]) 173 | let clickTool = Tool( 174 | name: "macos-use_click_and_traverse", 175 | description: "Simulates a click at the given coordinates within the app specified by PID, then traverses its accessibility tree.", 176 | inputSchema: clickSchema 177 | ) 178 | 179 | let typeSchema: Value = .object([ 180 | "type": .string("object"), 181 | "properties": .object([ 182 | "pid": .object(["type": .string("number"), "description": .string("REQUIRED. PID of the target application window.")]), 183 | "text": .object(["type": .string("string"), "description": .string("REQUIRED. Text to type.")]) 184 | // Add optional options here if needed later 185 | ]), 186 | "required": .array([.string("pid"), .string("text")]) 187 | ]) 188 | let typeTool = Tool( 189 | name: "macos-use_type_and_traverse", 190 | description: "Simulates typing text into the app specified by PID, then traverses its accessibility tree.", 191 | inputSchema: typeSchema 192 | ) 193 | 194 | let refreshSchema: Value = .object([ 195 | "type": .string("object"), 196 | "properties": .object([ 197 | "pid": .object(["type": .string("number"), "description": .string("REQUIRED. PID of the application to traverse.")]) 198 | // Add optional options here if needed later 199 | ]), 200 | "required": .array([.string("pid")]) 201 | ]) 202 | let refreshTool = Tool( 203 | name: "macos-use_refresh_traversal", 204 | description: "Traverses the accessibility tree of the application specified by PID.", 205 | inputSchema: refreshSchema 206 | ) 207 | 208 | // *** NEW: Schema and Tool for Press Key *** 209 | let pressKeySchema: Value = .object([ 210 | "type": .string("object"), 211 | "properties": .object([ 212 | "pid": .object(["type": .string("number"), "description": .string("REQUIRED. PID of the target application window.")]), 213 | "keyName": .object(["type": .string("string"), "description": .string("REQUIRED. Name of the key to press (e.g., 'Return', 'Enter', 'Escape', 'Tab', 'ArrowUp', 'Delete', 'a', 'B'). Case-sensitive for letter keys if no modifiers used.")]), 214 | "modifierFlags": .object([ // Optional array of strings 215 | "type": .string("array"), 216 | "description": .string("OPTIONAL. Modifier keys to hold (e.g., ['Command', 'Shift']). Valid: CapsLock, Shift, Control, Option, Command, Function, NumericPad, Help."), 217 | "items": .object(["type": .string("string")]) // Items in the array must be strings 218 | ]) 219 | // Add optional ActionOptions overrides here if needed later 220 | ]), 221 | "required": .array([.string("pid"), .string("keyName")]) 222 | ]) 223 | let pressKeyTool = Tool( 224 | name: "macos-use_press_key_and_traverse", 225 | description: "Simulates pressing a specific key (like Return, Enter, Escape, Tab, Arrow Keys, regular characters) with optional modifiers, then traverses the accessibility tree.", 226 | inputSchema: pressKeySchema 227 | ) 228 | 229 | // --- Aggregate list of tools --- 230 | let allTools = [openAppTool, clickTool, typeTool, pressKeyTool, refreshTool] 231 | fputs("log: setupAndStartServer: defined \(allTools.count) tools: \(allTools.map { $0.name })\n", stderr) 232 | 233 | let server = Server( 234 | name: "SwiftMacOSServerDirect", // Renamed slightly 235 | version: "1.3.0", // Incremented version for major change 236 | capabilities: .init( 237 | tools: .init(listChanged: true) 238 | ) 239 | ) 240 | fputs("log: setupAndStartServer: server instance created (\(server.name)) version \(server.version).\n", stderr) 241 | 242 | // --- Dummy Handlers (ReadResource, ListResources, ListPrompts) --- 243 | // (Keep these as they are part of the MCP spec, even if unused for now) 244 | await server.withMethodHandler(ReadResource.self) { params in 245 | let uri = params.uri 246 | fputs("log: handler(ReadResource): received request for uri: \(uri) (dummy handler)\n", stderr) 247 | // In a real scenario, you might fetch resource content here 248 | return .init(contents: [.text("dummy content for \(uri)", uri: uri)]) 249 | } 250 | fputs("log: setupAndStartServer: registered ReadResource handler (dummy).\n", stderr) 251 | 252 | await server.withMethodHandler(ListResources.self) { _ in 253 | fputs("log: handler(ListResources): received request (dummy handler).\n", stderr) 254 | // In a real scenario, list available resources 255 | return ListResources.Result(resources: []) 256 | } 257 | fputs("log: setupAndStartServer: registered ListResources handler (dummy).\n", stderr) 258 | 259 | await server.withMethodHandler(ListPrompts.self) { _ in 260 | fputs("log: handler(ListPrompts): received request (dummy handler).\n", stderr) 261 | // In a real scenario, list available prompts 262 | return ListPrompts.Result(prompts: []) 263 | } 264 | fputs("log: setupAndStartServer: registered ListPrompts handler (dummy).\n", stderr) 265 | 266 | // --- ListTools Handler --- 267 | await server.withMethodHandler(ListTools.self) { _ in 268 | fputs("log: handler(ListTools): received request.\n", stderr) 269 | let result = ListTools.Result(tools: allTools) 270 | fputs("log: handler(ListTools): responding with \(result.tools.count) tools: \(result.tools.map { $0.name })\n", stderr) 271 | return result 272 | } 273 | fputs("log: setupAndStartServer: registered ListTools handler.\n", stderr) 274 | 275 | // --- UPDATED CallTool Handler (Direct SDK Call) --- 276 | await server.withMethodHandler(CallTool.self) { params in 277 | fputs("log: handler(CallTool): received request for tool: \(params.name).\n", stderr) 278 | fputs("log: handler(CallTool): arguments received (raw MCP): \(params.arguments?.debugDescription ?? "nil")\n", stderr) 279 | 280 | do { 281 | // --- Determine Action and Options from MCP Params --- 282 | let primaryAction: PrimaryAction 283 | var options = ActionOptions() // Start with default options 284 | 285 | // PID is required for click, type, press, refresh 286 | // Optional only for open (where SDK finds it) 287 | let pidOptionalInt = try getOptionalInt(from: params.arguments, key: "pid") 288 | 289 | // Convert Int? to pid_t? 290 | let pidForOptions: pid_t? 291 | if let unwrappedPid = pidOptionalInt { 292 | guard let convertedPid = pid_t(exactly: unwrappedPid) else { 293 | fputs("error: handler(CallTool): PID value \(unwrappedPid) is out of range for pid_t (Int32).\n", stderr) 294 | throw MCPError.invalidParams("PID value \(unwrappedPid) is out of range.") 295 | } 296 | pidForOptions = convertedPid 297 | } else { 298 | pidForOptions = nil 299 | } 300 | options.pidForTraversal = pidForOptions 301 | 302 | // Potentially allow overriding default options from params 303 | options.traverseBefore = try getOptionalBool(from: params.arguments, key: "traverseBefore") ?? options.traverseBefore 304 | options.traverseAfter = try getOptionalBool(from: params.arguments, key: "traverseAfter") ?? options.traverseAfter 305 | options.showDiff = try getOptionalBool(from: params.arguments, key: "showDiff") ?? options.showDiff 306 | options.onlyVisibleElements = try getOptionalBool(from: params.arguments, key: "onlyVisibleElements") ?? options.onlyVisibleElements 307 | options.showAnimation = try getOptionalBool(from: params.arguments, key: "showAnimation") ?? options.showAnimation 308 | options.animationDuration = try getOptionalDouble(from: params.arguments, key: "animationDuration") ?? options.animationDuration 309 | options.delayAfterAction = try getOptionalDouble(from: params.arguments, key: "delayAfterAction") ?? options.delayAfterAction 310 | 311 | options = options.validated() 312 | fputs("log: handler(CallTool): constructed ActionOptions: \(options)\n", stderr) 313 | 314 | 315 | switch params.name { 316 | case openAppTool.name: 317 | let identifier = try getRequiredString(from: params.arguments, key: "identifier") 318 | primaryAction = .open(identifier: identifier) 319 | 320 | case clickTool.name: 321 | guard let reqPid = pidForOptions else { throw MCPError.invalidParams("Missing required 'pid' for click tool") } 322 | let x = try getRequiredDouble(from: params.arguments, key: "x") 323 | let y = try getRequiredDouble(from: params.arguments, key: "y") 324 | primaryAction = .input(action: .click(point: CGPoint(x: x, y: y))) 325 | options.pidForTraversal = reqPid // Re-affirm 326 | 327 | case typeTool.name: 328 | guard let reqPid = pidForOptions else { throw MCPError.invalidParams("Missing required 'pid' for type tool") } 329 | let text = try getRequiredString(from: params.arguments, key: "text") 330 | primaryAction = .input(action: .type(text: text)) 331 | options.pidForTraversal = reqPid // Re-affirm 332 | 333 | // *** NEW CASE for Press Key *** 334 | case pressKeyTool.name: 335 | guard let reqPid = pidForOptions else { throw MCPError.invalidParams("Missing required 'pid' for press key tool") } 336 | let keyName = try getRequiredString(from: params.arguments, key: "keyName") 337 | // Parse optional flags using the new helper 338 | let flags = try parseFlags(from: params.arguments?["modifierFlags"]) 339 | fputs("log: handler(CallTool): parsed modifierFlags: \(flags)\n", stderr) 340 | primaryAction = .input(action: .press(keyName: keyName, flags: flags)) 341 | options.pidForTraversal = reqPid // Re-affirm 342 | 343 | case refreshTool.name: 344 | guard let reqPid = pidForOptions else { throw MCPError.invalidParams("Missing required 'pid' for refresh tool") } 345 | primaryAction = .traverseOnly 346 | options.pidForTraversal = reqPid // Re-affirm 347 | 348 | default: 349 | fputs("error: handler(CallTool): received request for unknown or unsupported tool: \(params.name)\n", stderr) 350 | throw MCPError.methodNotFound(params.name) 351 | } 352 | 353 | fputs("log: handler(CallTool): constructed PrimaryAction: \(primaryAction)\n", stderr) 354 | 355 | // --- Execute the Action using MacosUseSDK --- 356 | let actionResult: ActionResult = await Task { @MainActor in 357 | fputs("log: handler(CallTool): executing performAction on MainActor via Task...\n", stderr) 358 | return await performAction(action: primaryAction, optionsInput: options) 359 | }.value 360 | fputs("log: handler(CallTool): performAction task completed.\n", stderr) 361 | 362 | // --- Serialize the ActionResult to JSON --- 363 | guard let resultJsonString = serializeToJsonString(actionResult) else { 364 | fputs("error: handler(CallTool): failed to serialize ActionResult to JSON for tool \(params.name).\n", stderr) 365 | throw MCPError.internalError("failed to serialize ActionResult to JSON") 366 | } 367 | fputs("log: handler(CallTool): successfully serialized ActionResult to JSON string:\n\(resultJsonString)\n", stderr) 368 | 369 | // --- Determine if it was an error overall --- 370 | let isError = actionResult.primaryActionError != nil || 371 | (options.traverseBefore && actionResult.traversalBeforeError != nil) || 372 | (options.traverseAfter && actionResult.traversalAfterError != nil) 373 | 374 | if isError { 375 | fputs("warning: handler(CallTool): Action resulted in an error state (primary: \(actionResult.primaryActionError ?? "nil"), before: \(actionResult.traversalBeforeError ?? "nil"), after: \(actionResult.traversalAfterError ?? "nil")).\n", stderr) 376 | } 377 | 378 | // --- Return the JSON result --- 379 | let content: [Tool.Content] = [.text(resultJsonString)] 380 | return .init(content: content, isError: isError) 381 | 382 | } catch let error as MCPError { 383 | fputs("error: handler(CallTool): MCPError occurred processing MCP params for tool '\(params.name)': \(error)\n", stderr) 384 | return .init(content: [.text("Error processing parameters for tool '\(params.name)': \(error.localizedDescription)")], isError: true) 385 | } catch { 386 | fputs("error: handler(CallTool): Unexpected error occurred setting up call for tool '\(params.name)': \(error)\n", stderr) 387 | return .init(content: [.text("Unexpected setup error executing tool '\(params.name)': \(error.localizedDescription)")], isError: true) 388 | } 389 | } 390 | fputs("log: setupAndStartServer: registered CallTool handler.\n", stderr) 391 | 392 | 393 | // --- Transport and Start --- 394 | let transport = StdioTransport() 395 | fputs("log: setupAndStartServer: created StdioTransport.\n", stderr) 396 | 397 | fputs("log: setupAndStartServer: calling server.start()...\n", stderr) 398 | try await server.start(transport: transport) 399 | fputs("log: setupAndStartServer: server.start() completed (background task launched).\n", stderr) 400 | 401 | fputs("log: setupAndStartServer: returning server instance.\n", stderr) 402 | return server 403 | } 404 | 405 | // --- @main Entry Point --- 406 | @main 407 | struct MCPServer { 408 | // Main entry point - Async 409 | static func main() async { 410 | fputs("log: main: starting server (async).\n", stderr) 411 | 412 | // Configure logging if needed (optional) 413 | // LoggingSystem.bootstrap { label in MultiplexLogHandler([...]) } 414 | 415 | let server: Server 416 | do { 417 | fputs("log: main: calling setupAndStartServer()...\n", stderr) 418 | server = try await setupAndStartServer() 419 | fputs("log: main: setupAndStartServer() successful, server instance obtained.\n", stderr) 420 | 421 | fputs("log: main: server started, calling server.waitUntilCompleted()...\n", stderr) 422 | await server.waitUntilCompleted() // Waits until the server loop finishes/errors 423 | fputs("log: main: server.waitUntilCompleted() returned. Server has stopped.\n", stderr) 424 | 425 | } catch { 426 | fputs("error: main: server setup or run failed: \(error)\n", stderr) 427 | if let mcpError = error as? MCPError { 428 | fputs("error: main: MCPError details: \(mcpError.localizedDescription)\n", stderr) 429 | } 430 | // Consider more specific exit codes if useful 431 | exit(1) // Exit with error code 432 | } 433 | 434 | fputs("log: main: Server processing finished gracefully. Exiting.\n", stderr) 435 | exit(0) // Exit cleanly 436 | } 437 | } 438 | --------------------------------------------------------------------------------