├── .gitignore ├── LICENSE ├── Package.swift ├── README.md ├── Sources ├── ActionTool │ └── main.swift ├── AppOpenerTool │ └── main.swift ├── HighlightTraversalTool │ └── main.swift ├── InputControllerTool │ └── main.swift ├── MacosUseSDK │ ├── AccessibilityTraversal.swift │ ├── ActionCoordinator.swift │ ├── AppOpener.swift │ ├── CombinedActions.swift │ ├── DrawVisuals.swift │ ├── HighlightInput.swift │ └── InputController.swift ├── TraversalTool │ └── main.swift └── VisualInputTool │ └── main.swift └── Tests └── MacosUseSDKTests ├── CombinedActionsDiffTests.swift └── CombinedActionsFocusVisualizationTests.swift /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | /.build 3 | /Packages 4 | xcuserdata/ 5 | DerivedData/ 6 | .swiftpm/configuration/registries.json 7 | .swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata 8 | .netrc 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 mediar 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Package.swift: -------------------------------------------------------------------------------- 1 | // swift-tools-version: 6.0 2 | // The swift-tools-version declares the minimum version of Swift required to build this package. 3 | 4 | import PackageDescription 5 | 6 | let package = Package( 7 | name: "MacosUseSDK", 8 | platforms: [ 9 | .macOS(.v12) 10 | ], 11 | products: [ 12 | // Products define the executables and libraries a package produces, making them visible to other packages. 13 | .library( 14 | name: "MacosUseSDK", 15 | targets: ["MacosUseSDK"]), 16 | .executable( 17 | name: "TraversalTool", 18 | targets: ["TraversalTool"]), 19 | .executable( 20 | name: "HighlightTraversalTool", 21 | targets: ["HighlightTraversalTool"]), 22 | .executable( 23 | name: "InputControllerTool", 24 | targets: ["InputControllerTool"]), 25 | .executable( 26 | name: "VisualInputTool", 27 | targets: ["VisualInputTool"]), 28 | .executable( 29 | name: "AppOpenerTool", 30 | targets: ["AppOpenerTool"]), 31 | .executable( 32 | name: "ActionTool", 33 | targets: ["ActionTool"]), 34 | ], 35 | dependencies: [ 36 | // Add any external package dependencies here later if needed 37 | ], 38 | targets: [ 39 | // Targets are the basic building blocks of a package, defining a module or a test suite. 40 | // Targets can depend on other targets in this package and products from dependencies. 41 | .target( 42 | name: "MacosUseSDK", 43 | dependencies: [], 44 | linkerSettings: [ 45 | .linkedFramework("AppKit"), 46 | .linkedFramework("ApplicationServices"), 47 | ] 48 | ), 49 | .executableTarget( 50 | name: "TraversalTool", 51 | dependencies: ["MacosUseSDK"] 52 | ), 53 | .executableTarget( 54 | name: "HighlightTraversalTool", 55 | dependencies: [ 56 | "MacosUseSDK", 57 | ] 58 | ), 59 | .executableTarget( 60 | name: "InputControllerTool", 61 | dependencies: ["MacosUseSDK"] 62 | ), 63 | .executableTarget( 64 | name: "VisualInputTool", 65 | dependencies: ["MacosUseSDK"] 66 | ), 67 | .executableTarget( 68 | name: "AppOpenerTool", 69 | dependencies: ["MacosUseSDK"] 70 | ), 71 | .executableTarget( 72 | name: "ActionTool", 73 | dependencies: ["MacosUseSDK"] 74 | ), 75 | .testTarget( 76 | name: "MacosUseSDKTests", 77 | dependencies: ["MacosUseSDK"] 78 | ), 79 | ] 80 | ) 81 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MacosUseSDK 2 | 3 | Library and command-line tools to traverse the macOS accessibility tree and simulate user input actions. Allows interaction with UI elements of other applications. 4 | 5 | 6 | https://github.com/user-attachments/assets/d8dc75ba-5b15-492c-bb40-d2bc5b65483e 7 | 8 | Highlight whatever is happening on the computer: text elements, clicks, typing 9 | ![Image](https://github.com/user-attachments/assets/9e182bbc-bd30-4285-984a-207a58b32bc0) 10 | 11 | Listen to changes in the UI, elements changed, text changed 12 | ![Image](https://github.com/user-attachments/assets/4a972dfa-ce4d-4b1a-9781-43379375b313) 13 | 14 | ## Building the Tools 15 | 16 | To build the command-line tools provided by this package, navigate to the root directory (`MacosUseSDK`) in your terminal and run: 17 | 18 | ```bash 19 | swift build 20 | ``` 21 | 22 | This will compile the tools and place the executables in the `.build/debug/` directory (or `.build/release/` if you use `swift build -c release`). You can run them directly from there (e.g., `.build/debug/TraversalTool`) or use `swift run `. 23 | 24 | ## Available Tools 25 | 26 | All tools output informational logs and timing data to `stderr`. Primary output (like PIDs or JSON data) is sent to `stdout`. 27 | 28 | ### AppOpenerTool 29 | 30 | * **Purpose:** Opens or activates a macOS application by its name, bundle ID, or full path. Outputs the application's PID on success. 31 | * **Usage:** `AppOpenerTool ` 32 | * **Examples:** 33 | ```bash 34 | # Open by name 35 | swift run AppOpenerTool Calculator 36 | # Open by bundle ID 37 | swift run AppOpenerTool com.apple.Terminal 38 | # Open by path 39 | swift run AppOpenerTool /System/Applications/Utilities/Terminal.app 40 | # Example output (stdout) 41 | # 54321 42 | ``` 43 | 44 | ### TraversalTool 45 | 46 | * **Purpose:** Traverses the accessibility tree of a running application (specified by PID) and outputs a JSON representation of the UI elements to `stdout`. 47 | * **Usage:** `TraversalTool [--visible-only] ` 48 | * **Options:** 49 | * `--visible-only`: Only include elements that have a position and size (are geometrically visible). 50 | * **Examples:** 51 | ```bash 52 | # Get only visible elements for Messages app 53 | swift run TraversalTool --visible-only $(swift run AppOpenerTool Messages) 54 | ``` 55 | 56 | ### HighlightTraversalTool 57 | 58 | * **Purpose:** Traverses the accessibility tree of a running application (specified by PID) and draws temporary red boxes around all visible UI elements. Also outputs traversal data (JSON) to `stdout`. Useful for debugging accessibility structures. 59 | * **Usage:** `HighlightTraversalTool [--duration ]` 60 | * **Options:** 61 | * `--duration `: Specifies how long the highlights remain visible (default: 3.0 seconds). 62 | * **Examples:** 63 | ```bash 64 | # Combine with AppOpenerTool to open Messages and highlight it 65 | swift run HighlightTraversalTool $(swift run AppOpenerTool Messages) --duration 5 66 | ``` 67 | *Note: This tool needs to keep running for the duration specified to manage the highlights.* 68 | 69 | ### InputControllerTool 70 | 71 | * **Purpose:** Simulates keyboard and mouse input events without visual feedback. 72 | * **Usage:** See `swift run InputControllerTool --help` (or just run without args) for actions. 73 | * **Examples:** 74 | ```bash 75 | # Press the Enter key 76 | swift run InputControllerTool keypress enter 77 | # Simulate Cmd+C (Copy) 78 | swift run InputControllerTool keypress cmd+c 79 | # Simulate Shift+Tab 80 | swift run InputControllerTool keypress shift+tab 81 | # Left click at screen coordinates (100, 250) 82 | swift run InputControllerTool click 100 250 83 | # Double click at screen coordinates (150, 300) 84 | swift run InputControllerTool doubleclick 150 300 85 | # Right click at screen coordinates (200, 350) 86 | swift run InputControllerTool rightclick 200 350 87 | # Move mouse cursor to (500, 500) 88 | swift run InputControllerTool mousemove 500 500 89 | # Type the text "Hello World!" 90 | swift run InputControllerTool writetext "Hello World!" 91 | ``` 92 | 93 | ### VisualInputTool 94 | 95 | * **Purpose:** Simulates keyboard and mouse input events *with* visual feedback (currently a pulsing green circle for mouse actions). 96 | * **Usage:** Similar to `InputControllerTool`, but adds a `--duration` option for the visual effect. See `swift run VisualInputTool --help`. 97 | * **Options:** 98 | * `--duration `: How long the visual feedback effect lasts (default: 0.5 seconds). 99 | * **Examples:** 100 | ```bash 101 | # Left click at (100, 250) with default 0.5s feedback 102 | swift run VisualInputTool click 100 250 103 | # Right click at (800, 400) with 2 second feedback 104 | swift run VisualInputTool rightclick 800 400 --duration 2.0 105 | # Move mouse to (500, 500) with 1 second feedback 106 | swift run VisualInputTool mousemove 500 500 --duration 1.0 107 | # Keypress and writetext (currently NO visualization implemented) 108 | swift run VisualInputTool keypress cmd+c 109 | swift run VisualInputTool writetext "Testing" 110 | ``` 111 | *Note: This tool needs to keep running for the duration specified to display the visual feedback.* 112 | 113 | ### Running Tests 114 | 115 | Run only specific tests or test classes, use the --filter option. 116 | Run a specific test method: Provide the full identifier TestClassName/testMethodName 117 | 118 | ```bash 119 | swift test 120 | # Example: Run only the multiply test in CombinedActionsDiffTests 121 | swift test --filter CombinedActionsDiffTests/testCalculatorMultiplyWithActionAndTraversalHighlight 122 | # Example: Run all tests in CombinedActionsFocusVisualizationTests 123 | swift test --filter CombinedActionsFocusVisualizationTests 124 | ``` 125 | 126 | 127 | ## Using the Library 128 | 129 | You can also use `MacosUseSDK` as a dependency in your own Swift projects. Add it to your `Package.swift` dependencies: 130 | 131 | ```swift 132 | dependencies: [ 133 | .package(url: "/* path or URL to your MacosUseSDK repo */", from: "1.0.0"), 134 | ] 135 | ``` 136 | 137 | And add `MacosUseSDK` to your target's dependencies: 138 | 139 | ```swift 140 | .target( 141 | name: "YourApp", 142 | dependencies: ["MacosUseSDK"]), 143 | ``` 144 | 145 | Then import and use the public functions: 146 | 147 | ```swift 148 | import MacosUseSDK 149 | import Foundation // For Dispatch etc. 150 | 151 | // Example: Get elements from Calculator app 152 | Task { 153 | do { 154 | // Find Calculator PID (replace with actual logic or use AppOpenerTool output) 155 | // let calcPID: Int32 = ... 156 | // let response = try MacosUseSDK.traverseAccessibilityTree(pid: calcPID, onlyVisibleElements: true) 157 | // print("Found \(response.elements.count) visible elements.") 158 | 159 | // Example: Click at a point 160 | let point = CGPoint(x: 100, y: 200) 161 | try MacosUseSDK.clickMouse(at: point) 162 | 163 | // Example: Click with visual feedback (needs main thread for UI) 164 | DispatchQueue.main.async { 165 | do { 166 | try MacosUseSDK.clickMouseAndVisualize(at: point, duration: 1.0) 167 | } catch { 168 | print("Visualization error: \(error)") 169 | } 170 | } 171 | 172 | } catch { 173 | print("MacosUseSDK Error: \(error)") 174 | } 175 | } 176 | 177 | // Remember to keep the run loop active if using async UI functions like highlightVisibleElements or *AndVisualize 178 | // RunLoop.main.run() // Or use within an @main Application structure 179 | ``` 180 | 181 | ## License 182 | 183 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. 184 | -------------------------------------------------------------------------------- /Sources/ActionTool/main.swift: -------------------------------------------------------------------------------- 1 | import MacosUseSDK 2 | import Foundation // For exit, FileHandle 3 | import CoreGraphics // For CGPoint, CGEventFlags 4 | 5 | // Use @main struct for async top-level code 6 | @main 7 | struct ActionTool { 8 | 9 | static func main() async { 10 | fputs("info: ActionTool started.\n", stderr) 11 | 12 | // --- Example 1: Open Messages, Type, Traverse with Diff --- 13 | let textEditAction = PrimaryAction.open(identifier: "Messages") // Changed to TextEdit for typing example 14 | 15 | let openOptions = ActionOptions( 16 | traverseBefore: true, // Keep true for diff 17 | traverseAfter: true, // Keep true for diff 18 | showDiff: false, // Set to false for open, true for type 19 | onlyVisibleElements: true, 20 | showAnimation: false, // Use the consolidated flag 21 | delayAfterAction: 0.0 // No extra delay needed immediately after open, before next step 22 | ) 23 | 24 | fputs("\n--- Running Example 1: Open TextEdit ---\n", stderr) 25 | let openResult = await performAction(action: textEditAction, optionsInput: openOptions) 26 | 27 | if let pid = openResult.openResult?.pid, openResult.primaryActionError == nil { 28 | fputs("info: TextEdit opened/activated (PID: \(pid)). Now preparing to type...\n", stderr) 29 | 30 | // --- Options for TYPE Action --- 31 | let typeAction = PrimaryAction.input(action: .type(text: "Hello world from ActionTool!")) 32 | let typeOptions = ActionOptions( 33 | traverseBefore: true, // Need before state for diff 34 | traverseAfter: true, // Need after state for diff 35 | showDiff: true, // Calculate the diff after typing 36 | onlyVisibleElements: true, 37 | showAnimation: true, // Use the consolidated flag 38 | animationDuration: 0.8, // Duration for animation/highlight 39 | pidForTraversal: pid, // <<-- IMPORTANT: Use the PID from the open result 40 | delayAfterAction: 0.0 // Delay *after* typing, *before* the 'traverseAfter' step, good if we need to wait for application to render updated UI, first try without it 41 | ) 42 | 43 | fputs("\n--- Running Example 1: Type into TextEdit (with Diff & Animation) ---\n", stderr) 44 | let typeResult = await performAction(action: typeAction, optionsInput: typeOptions) 45 | 46 | print("\n--- TextEdit Type Result (including Diff) ---") 47 | printResult(typeResult) 48 | 49 | } else { 50 | fputs("error: Failed to open TextEdit or get PID. Aborting typing.\n", stderr) 51 | print("\n--- TextEdit Open Result (Failed) ---") 52 | printResult(openResult) // Print the result even on failure 53 | } 54 | 55 | // --- Example 2 (Commented out) --- 56 | // ... 57 | 58 | // ######################################################################### 59 | // # # 60 | // # !!! CRITICAL WAIT FOR ASYNCHRONOUS VISUALIZATIONS !!! # 61 | // # # 62 | // ######################################################################### 63 | // 64 | // WHY THIS WAIT IS NECESSARY: 65 | // -------------------------- 66 | // Functions like `showVisualFeedback` and `drawHighlightBoxes` in the SDK 67 | // use `DispatchQueue.main.async` to schedule UI work (drawing windows, 68 | // showing animations like captions or highlights) on the main thread. 69 | // This dispatching happens ASYNCHRONOUSLY, meaning the SDK functions 70 | // return *immediately* after *scheduling* the work, not after it's done. 71 | // 72 | // THE PROBLEM: 73 | // ----------- 74 | // If this command-line tool calls `exit(0)` immediately after the main 75 | // `performAction` calls finish, the entire process can terminate *before* 76 | // the main thread gets a chance to actually execute the scheduled UI tasks 77 | // or before the animations (which also run asynchronously) complete. 78 | // 79 | // CONSEQUENCE: 80 | // ----------- 81 | // Without this `Task.sleep`, visual feedback might: 82 | // - Not appear at all. 83 | // - Be cut off mid-animation. 84 | // 85 | // THE SOLUTION: 86 | // ------------ 87 | // This `Task.sleep` introduces a deliberate pause *at the end* of the 88 | // main program logic. It keeps the process alive long enough for the 89 | // asynchronous UI tasks dispatched earlier to run and be visually perceived. 90 | // Adjust the duration (currently 1 second) if animations seem consistently 91 | // cut short or if you want to reduce the final wait time. 92 | // 93 | // NOTE: We are intentionally *not* closing the overlay windows explicitly 94 | // in the SDK anymore, as doing so near `exit(0)` caused crashes. We rely 95 | // on the operating system to clean up the windows when the process exits. 96 | // 97 | fputs("info: Main logic complete. Pausing to allow async animations to complete before exiting...\n", stderr); // Emphasized log message 98 | try? await Task.sleep(nanoseconds: 1_000_000_000) // Wait 1 second (adjust if needed) 99 | // ######################################################################### 100 | 101 | fputs("\ninfo: ActionTool finished.\n", stderr) 102 | exit(0) // Exit cleanly 103 | } 104 | 105 | // Helper to print the ActionResult (only prints diff if available) 106 | static func printResult(_ result: ActionResult) { 107 | // Check if the traversalDiff exists 108 | if let diff = result.traversalDiff { 109 | print("\n--- Traversal Diff ---") 110 | let encoder = JSONEncoder() 111 | encoder.outputFormatting = [.prettyPrinted, .sortedKeys] 112 | 113 | do { 114 | let jsonData = try encoder.encode(diff) 115 | if let jsonString = String(data: jsonData, encoding: .utf8) { 116 | print(jsonString) 117 | } else { 118 | fputs("error: Failed to convert diff JSON data to string.\n", stderr) 119 | } 120 | } catch { 121 | fputs("error: Failed to encode TraversalDiff to JSON: \(error)\n", stderr) 122 | // Fallback: Print manually 123 | print(" Added (\(diff.added.count))") 124 | print(" Removed (\(diff.removed.count))") 125 | print(" Modified (\(diff.modified.count))") 126 | diff.modified.forEach { mod in 127 | print(" - Role: \(mod.before.role)") 128 | mod.changes.forEach { change in 129 | if change.attributeName == "text" { 130 | // Print simple diff first if available 131 | if let added = change.addedText { 132 | print(" - text added: \"\(added)\"") 133 | } else if let removed = change.removedText { // Use else if to avoid printing both potentially 134 | print(" - text removed: \"\(removed)\"") 135 | } else { 136 | // If no simple diff, print a generic message instead of old/new values 137 | print(" - text changed (complex)") 138 | } 139 | } else { 140 | // Print standard old -> new for other attributes 141 | print(" - \(change.attributeName): \(change.oldValue ?? "nil") -> \(change.newValue ?? "nil")") 142 | } 143 | } 144 | } 145 | } 146 | } else { 147 | print("\n--- Traversal Diff ---") 148 | print(" (No diff calculated or available in this result object)") 149 | if let err = result.traversalBeforeError { print(" Traversal Before Error: \(err)") } 150 | if let err = result.traversalAfterError { print(" Traversal After Error: \(err)") } 151 | if let err = result.primaryActionError { print(" Primary Action Error: \(err)") } 152 | } 153 | fflush(stdout) 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /Sources/AppOpenerTool/main.swift: -------------------------------------------------------------------------------- 1 | // main.swift for AppOpenerTool 2 | // Script to open or activate a specified macOS application by name or path. 3 | // Reliably outputs the PID on success (launch or activation) and processing time to stderr. 4 | 5 | import AppKit // Needed for NSWorkspace, NSApplication, NSRunningApplication 6 | import Foundation 7 | import MacosUseSDK // Import the library 8 | 9 | // Encapsulate logic in a @main struct isolated to the MainActor 10 | @main 11 | @MainActor 12 | struct AppOpenerTool { 13 | 14 | // Make timers static properties of the struct 15 | static let startTime = Date() 16 | static var stepStartTime = startTime // Initialize step timer 17 | 18 | // --- Helper function for step timing (now a static method) --- 19 | static func logStepCompletion(_ stepDescription: String) { 20 | let endTime = Date() 21 | // Accessing static stepStartTime is now safe within @MainActor context 22 | let duration = endTime.timeIntervalSince(stepStartTime) 23 | let durationStr = String(format: "%.3f", duration) // Use 3 decimal places for steps 24 | fputs("info: [\(durationStr)s] finished '\(stepDescription)'\n", stderr) 25 | // Mutating static stepStartTime is also safe 26 | stepStartTime = endTime // Reset start time for the next step 27 | } 28 | 29 | // The main function now needs to be async to call the async library function 30 | static func main() async { 31 | // --- Argument Parsing --- 32 | guard CommandLine.arguments.count == 2 else { 33 | let scriptName = URL(fileURLWithPath: CommandLine.arguments[0]).lastPathComponent 34 | fputs("usage: \(scriptName) \n", stderr) 35 | fputs("example (name): \(scriptName) Calculator\n", stderr) 36 | fputs("example (path): \(scriptName) /System/Applications/Utilities/Terminal.app\n", stderr) 37 | fputs("example (bundleID): \(scriptName) com.apple.Terminal\n", stderr) 38 | exit(1) 39 | } 40 | let appIdentifier = CommandLine.arguments[1] 41 | 42 | // --- Call Library Function --- 43 | fputs("info: calling MacosUseSDK.openApplication for identifier: '\(appIdentifier)'\n", stderr) 44 | do { 45 | // Use await to call the async function 46 | let result = try await MacosUseSDK.openApplication(identifier: appIdentifier) 47 | 48 | // --- Output PID on Success --- 49 | // Success/Timing logs are already printed by the library function to stderr 50 | // Print only the PID to stdout as the primary output 51 | print(result.pid) 52 | exit(0) // Exit successfully 53 | 54 | } catch let error as MacosUseSDKError.AppOpenerError { 55 | // Specific errors from the AppOpener module 56 | fputs("❌ Error (AppOpener): \(error.localizedDescription)\n", stderr) 57 | exit(1) 58 | } catch let error as MacosUseSDKError { 59 | // Other potential errors from the SDK (though less likely here) 60 | fputs("❌ Error (MacosUseSDK): \(error.localizedDescription)\n", stderr) 61 | exit(1) 62 | } catch { 63 | // Catch any other unexpected errors 64 | fputs("❌ An unexpected error occurred: \(error.localizedDescription)\n", stderr) 65 | exit(1) 66 | } 67 | } 68 | } // End of struct AppOpenerTool 69 | 70 | /* 71 | swift run AppOpenerTool Messages 72 | */ -------------------------------------------------------------------------------- /Sources/HighlightTraversalTool/main.swift: -------------------------------------------------------------------------------- 1 | import Foundation 2 | import AppKit // Required for NSApplication and RunLoop 3 | import MacosUseSDK // Your library 4 | 5 | // --- Helper Function for Argument Parsing --- 6 | // Simple parser for "--duration " and PID 7 | func parseArguments() -> (pid: Int32?, duration: Double?) { 8 | var pid: Int32? = nil 9 | var duration: Double? = nil 10 | var waitingForDurationValue = false 11 | 12 | // Skip the executable path 13 | for arg in CommandLine.arguments.dropFirst() { 14 | if waitingForDurationValue { 15 | if let durationValue = Double(arg), durationValue > 0 { 16 | duration = durationValue 17 | } else { 18 | fputs("error: Invalid value provided after --duration.\n", stderr) 19 | return (nil, nil) // Indicate parsing error 20 | } 21 | waitingForDurationValue = false 22 | } else if arg == "--duration" { 23 | waitingForDurationValue = true 24 | } else if pid == nil, let pidValue = Int32(arg) { 25 | pid = pidValue 26 | } else { 27 | fputs("error: Unexpected argument '\(arg)'.\n", stderr) 28 | return (nil, nil) // Indicate parsing error 29 | } 30 | } 31 | 32 | // Check if duration flag was seen but value is missing 33 | if waitingForDurationValue { 34 | fputs("error: Missing value after --duration flag.\n", stderr) 35 | return (nil, nil) 36 | } 37 | 38 | // Check if PID was found 39 | if pid == nil { 40 | fputs("error: Missing required PID argument.\n", stderr) 41 | return (nil, nil) 42 | } 43 | 44 | return (pid, duration) 45 | } 46 | 47 | // --- Main Execution Logic --- 48 | 49 | // 1. Parse Arguments 50 | let (parsedPID, parsedDuration) = parseArguments() 51 | 52 | guard let targetPID = parsedPID else { 53 | // Error messages printed by parser 54 | fputs("\nusage: HighlightTraversalTool [--duration ]\n", stderr) 55 | fputs(" : Process ID of the application to highlight.\n", stderr) 56 | fputs(" --duration : How long the highlights should stay visible (default: 3.0).\n", stderr) 57 | fputs("\nexample: HighlightTraversalTool 14154 --duration 5\n", stderr) 58 | exit(1) 59 | } 60 | 61 | // Use provided duration or default 62 | let highlightDuration = parsedDuration ?? 3.0 63 | 64 | fputs("info: Target PID: \(targetPID), Highlight Duration: \(highlightDuration) seconds.\n", stderr) 65 | 66 | // Wrap async calls in a Task 67 | Task { 68 | do { 69 | // 2. Perform Traversal FIRST 70 | fputs("info: Calling traverseAccessibilityTree (visible only)...\n", stderr) 71 | let responseData = try await MacosUseSDK.traverseAccessibilityTree(pid: targetPID, onlyVisibleElements: true) 72 | fputs("info: Traversal complete. Found \(responseData.elements.count) visible elements.\n", stderr) 73 | 74 | // 3. Dispatch Highlighting using the traversal results 75 | fputs("info: Calling drawHighlightBoxes with \(responseData.elements.count) elements...\n", stderr) 76 | // Ensure this call happens on the main actor, drawHighlightBoxes requires it. 77 | // Since we are in a Task, explicitly hop to MainActor. 78 | await MainActor.run { 79 | MacosUseSDK.drawHighlightBoxes(for: responseData.elements, duration: highlightDuration) 80 | } 81 | fputs("info: drawHighlightBoxes call dispatched successfully.\n", stderr) 82 | fputs(" Overlays appear/disappear asynchronously on the main thread.\n", stderr) 83 | 84 | // 4. Encode the ResponseData to JSON 85 | fputs("info: Encoding traversal response to JSON...\n", stderr) 86 | let encoder = JSONEncoder() 87 | // Optionally make the output prettier 88 | // encoder.outputFormatting = [.prettyPrinted, .sortedKeys] // Uncomment for human-readable JSON 89 | let jsonData = try encoder.encode(responseData) 90 | 91 | // 5. Print JSON to standard output 92 | guard let jsonString = String(data: jsonData, encoding: .utf8) else { 93 | throw MacosUseSDKError.internalError("Failed to convert JSON data to UTF-8 string.") 94 | } 95 | print(jsonString) // Print JSON to stdout 96 | fputs("info: Successfully printed JSON response to stdout.\n", stderr) 97 | 98 | // 6. Keep the Main Thread Alive for UI Updates 99 | // IMPORTANT: Still need this for the visual highlights to appear/disappear 100 | // We need to schedule this *after* the async work above has potentially returned. 101 | let waitTime = highlightDuration + 1.0 // Wait a bit longer than the effect 102 | fputs("info: Keeping the tool alive for \(waitTime) seconds to allow UI updates...\n", stderr) 103 | // Use DispatchQueue.main.async to schedule the RunLoop wait on the main thread 104 | DispatchQueue.main.async { 105 | RunLoop.main.run(until: Date(timeIntervalSinceNow: waitTime)) 106 | fputs("info: Run loop finished. Tool exiting normally.\n", stderr) 107 | exit(0) // Success 108 | } 109 | // Allow the Task itself to stay alive while the main thread waits 110 | try await Task.sleep(nanoseconds: UInt64((waitTime + 0.1) * 1_000_000_000)) 111 | // Fallback exit if runloop doesn't trigger exit 112 | exit(0) 113 | 114 | } catch let error as MacosUseSDKError { 115 | // Specific SDK errors 116 | fputs("❌ Error from MacosUseSDK: \(error.localizedDescription)\n", stderr) 117 | exit(1) 118 | } catch { 119 | // Other errors (e.g., JSON encoding failure) 120 | fputs("❌ An unexpected error occurred: \(error.localizedDescription)\n", stderr) 121 | exit(1) 122 | } 123 | } 124 | 125 | // Keep the process alive so the Task can run 126 | RunLoop.main.run() 127 | 128 | /* 129 | 130 | swift run HighlightTraversalTool $(swift run AppOpenerTool Messages) --duration 5 131 | 132 | */ -------------------------------------------------------------------------------- /Sources/InputControllerTool/main.swift: -------------------------------------------------------------------------------- 1 | import Foundation 2 | import CoreGraphics // For CGPoint, CGEventFlags 3 | import MacosUseSDK // Import the library 4 | 5 | // --- Start Time --- 6 | let startTime = Date() // Record start time for the tool's execution 7 | 8 | // --- Helper Function for Logging --- 9 | // Tool-specific logging prefix 10 | func log(_ message: String) { 11 | fputs("InputControllerTool: \(message)\n", stderr) 12 | } 13 | 14 | // --- Helper Function for Exiting --- 15 | // Logs final time and exits 16 | func finish(success: Bool, message: String? = nil) -> Never { 17 | if let msg = message { 18 | log(success ? "✅ Success: \(msg)" : "❌ Error: \(msg)") 19 | } 20 | 21 | // --- Calculate and Log Time --- 22 | let endTime = Date() 23 | let processingTime = endTime.timeIntervalSince(startTime) 24 | let formattedTime = String(format: "%.3f", processingTime) 25 | fputs("InputControllerTool: total execution time: \(formattedTime) seconds\n", stderr) 26 | // --- End Time Logging --- 27 | 28 | exit(success ? 0 : 1) 29 | } 30 | 31 | 32 | // --- Argument Parsing and Main Logic --- 33 | let arguments = CommandLine.arguments 34 | let scriptName = arguments.first ?? "InputControllerTool" 35 | 36 | // Define usage instructions 37 | let usage = """ 38 | usage: \(scriptName) [options...] 39 | 40 | actions: 41 | keypress [+modifier...] Simulate pressing a key (e.g., 'return', 'a', 'f1', 'cmd+c', 'shift+tab'). 42 | Supported modifiers: cmd, shift, opt, ctrl, fn. 43 | click Simulate a left mouse click at screen coordinates. 44 | doubleclick Simulate a left mouse double-click at screen coordinates. 45 | rightclick Simulate a right mouse click at screen coordinates. 46 | mousemove Move the mouse cursor to screen coordinates. 47 | writetext Simulate typing a string of text. 48 | 49 | Examples: 50 | \(scriptName) keypress enter 51 | \(scriptName) keypress cmd+shift+4 52 | \(scriptName) click 100 250 53 | \(scriptName) writetext "Hello World!" 54 | """ 55 | 56 | // Check for minimum argument count 57 | guard arguments.count > 1 else { 58 | fputs(usage, stderr) 59 | finish(success: false, message: "No action specified.") 60 | } 61 | 62 | let action = arguments[1].lowercased() 63 | log("Action: \(action)") 64 | 65 | // --- Action Handling --- 66 | do { 67 | switch action { 68 | case "keypress": 69 | guard arguments.count == 3 else { 70 | throw MacosUseSDKError.inputInvalidArgument("'keypress' requires exactly one argument: \n\(usage)") 71 | } 72 | let keyCombo = arguments[2] 73 | log("Key Combo Argument: '\(keyCombo)'") 74 | var keyCode: CGKeyCode? 75 | var flags: CGEventFlags = [] 76 | 77 | // Parse modifiers (cmd, shift, opt, ctrl, fn) 78 | let parts = keyCombo.split(separator: "+").map { String($0).trimmingCharacters(in: .whitespacesAndNewlines).lowercased() } 79 | 80 | // The last part is the key 81 | guard let keyPart = parts.last else { 82 | throw MacosUseSDKError.inputInvalidArgument("Invalid key combination format: '\(keyCombo)'") 83 | } 84 | log("Parsing key part: '\(keyPart)'") 85 | keyCode = MacosUseSDK.mapKeyNameToKeyCode(keyPart) // Use library function 86 | 87 | // Process modifier parts 88 | if parts.count > 1 { 89 | log("Parsing modifiers: \(parts.dropLast().joined(separator: ", "))") 90 | for i in 0..<(parts.count - 1) { 91 | switch parts[i] { 92 | case "cmd", "command": flags.insert(.maskCommand) 93 | case "shift": flags.insert(.maskShift) 94 | case "opt", "option", "alt": flags.insert(.maskAlternate) 95 | case "ctrl", "control": flags.insert(.maskControl) 96 | case "fn", "function": flags.insert(.maskSecondaryFn) // Note: 'fn' might need special handling or accessibility settings 97 | default: throw MacosUseSDKError.inputInvalidArgument("Unknown modifier: '\(parts[i])' in '\(keyCombo)'") 98 | } 99 | } 100 | } 101 | 102 | 103 | guard let finalKeyCode = keyCode else { 104 | throw MacosUseSDKError.inputInvalidArgument("Unknown key name or invalid key code: '\(keyPart)' in '\(keyCombo)'") 105 | } 106 | 107 | log("Calling pressKey library function...") 108 | try MacosUseSDK.pressKey(keyCode: finalKeyCode, flags: flags) 109 | finish(success: true, message: "Key press '\(keyCombo)' simulated.") 110 | 111 | case "click", "doubleclick", "rightclick", "mousemove": 112 | guard arguments.count == 4 else { 113 | throw MacosUseSDKError.inputInvalidArgument("'\(action)' requires exactly two arguments: \n\(usage)") 114 | } 115 | guard let x = Double(arguments[2]), let y = Double(arguments[3]) else { 116 | throw MacosUseSDKError.inputInvalidArgument("Invalid coordinates for '\(action)'. x and y must be numbers.") 117 | } 118 | let point = CGPoint(x: x, y: y) 119 | log("Coordinates: (\(x), \(y))") 120 | 121 | log("Calling \(action) library function...") 122 | switch action { 123 | case "click": try MacosUseSDK.clickMouse(at: point) 124 | case "doubleclick": try MacosUseSDK.doubleClickMouse(at: point) 125 | case "rightclick": try MacosUseSDK.rightClickMouse(at: point) 126 | case "mousemove": try MacosUseSDK.moveMouse(to: point) 127 | default: break // Should not happen 128 | } 129 | finish(success: true, message: "\(action) simulated at (\(x), \(y)).") 130 | 131 | 132 | case "writetext": 133 | guard arguments.count == 3 else { 134 | throw MacosUseSDKError.inputInvalidArgument("'writetext' requires exactly one argument: \n\(usage)") 135 | } 136 | let text = arguments[2] 137 | log("Text Argument: \"\(text)\"") 138 | log("Calling writeText library function...") 139 | try MacosUseSDK.writeText(text) 140 | finish(success: true, message: "Text writing simulated.") 141 | 142 | default: 143 | fputs(usage, stderr) 144 | throw MacosUseSDKError.inputInvalidArgument("Unknown action '\(action)'") 145 | } 146 | 147 | } catch let error as MacosUseSDKError { 148 | // Handle specific SDK errors 149 | finish(success: false, message: "MacosUseSDK Error: \(error.localizedDescription)") 150 | } catch { 151 | // Handle other unexpected errors 152 | finish(success: false, message: "An unexpected error occurred: \(error.localizedDescription)") 153 | } 154 | 155 | // Should not be reached due to finish() calls, but satisfies the compiler 156 | exit(0) 157 | 158 | /* 159 | # Example: Open Calculator and type 2*3= 160 | swift run AppOpenerTool Calculator 161 | # (Wait a moment or use the PID from above if needed) 162 | swift run InputControllerTool writetext "2*3=" 163 | */ 164 | -------------------------------------------------------------------------------- /Sources/MacosUseSDK/AccessibilityTraversal.swift: -------------------------------------------------------------------------------- 1 | // The Swift Programming Language 2 | // https://docs.swift.org/swift-book 3 | 4 | import AppKit // For NSWorkspace, NSRunningApplication, NSApplication 5 | import Foundation // For basic types, JSONEncoder, Date 6 | import ApplicationServices // For Accessibility API (AXUIElement, etc.) 7 | 8 | // --- Error Enum --- 9 | public enum MacosUseSDKError: Error, LocalizedError { 10 | case accessibilityDenied 11 | case appNotFound(pid: Int32) 12 | case jsonEncodingFailed(Error) 13 | case internalError(String) // For unexpected issues 14 | 15 | public var errorDescription: String? { 16 | switch self { 17 | case .accessibilityDenied: 18 | return "Accessibility access is denied. Please grant permissions in System Settings > Privacy & Security > Accessibility." 19 | case .appNotFound(let pid): 20 | return "No running application found with PID \(pid)." 21 | case .jsonEncodingFailed(let underlyingError): 22 | return "Failed to encode response to JSON: \(underlyingError.localizedDescription)" 23 | case .internalError(let message): 24 | return "Internal SDK error: \(message)" 25 | } 26 | } 27 | } 28 | 29 | 30 | // --- Public Data Structures for API Response --- 31 | 32 | public struct ElementData: Codable, Hashable, Sendable { 33 | public var role: String 34 | public var text: String? 35 | public var x: Double? 36 | public var y: Double? 37 | public var width: Double? 38 | public var height: Double? 39 | 40 | // Implement Hashable for use in Set 41 | public func hash(into hasher: inout Hasher) { 42 | hasher.combine(role) 43 | hasher.combine(text) 44 | hasher.combine(x) 45 | hasher.combine(y) 46 | hasher.combine(width) 47 | hasher.combine(height) 48 | } 49 | public static func == (lhs: ElementData, rhs: ElementData) -> Bool { 50 | lhs.role == rhs.role && 51 | lhs.text == rhs.text && 52 | lhs.x == rhs.x && 53 | lhs.y == rhs.y && 54 | lhs.width == rhs.width && 55 | lhs.height == rhs.height 56 | } 57 | } 58 | 59 | public struct Statistics: Codable, Sendable { 60 | public var count: Int = 0 61 | public var excluded_count: Int = 0 62 | public var excluded_non_interactable: Int = 0 63 | public var excluded_no_text: Int = 0 64 | public var with_text_count: Int = 0 65 | public var without_text_count: Int = 0 66 | public var visible_elements_count: Int = 0 67 | public var role_counts: [String: Int] = [:] 68 | } 69 | 70 | public struct ResponseData: Codable, Sendable { 71 | public let app_name: String 72 | public var elements: [ElementData] 73 | public var stats: Statistics 74 | public let processing_time_seconds: String 75 | } 76 | 77 | 78 | // --- Main Public Function --- 79 | 80 | /// Traverses the accessibility tree of an application specified by its PID. 81 | /// 82 | /// - Parameter pid: The Process ID (PID) of the target application. 83 | /// - Parameter onlyVisibleElements: If true, only collects elements with valid position and size. Defaults to false. 84 | /// - Returns: A `ResponseData` struct containing the collected elements, statistics, and timing information. 85 | /// - Throws: `MacosUseSDKError` if accessibility is denied, the app is not found, or an internal error occurs. 86 | public func traverseAccessibilityTree(pid: Int32, onlyVisibleElements: Bool = false) throws -> ResponseData { 87 | let operation = AccessibilityTraversalOperation(pid: pid, onlyVisibleElements: onlyVisibleElements) 88 | return try operation.executeTraversal() 89 | } 90 | 91 | 92 | // --- Internal Implementation Detail --- 93 | 94 | // Class to encapsulate the state and logic of a single traversal operation 95 | fileprivate class AccessibilityTraversalOperation { 96 | let pid: Int32 97 | let onlyVisibleElements: Bool 98 | var visitedElements: Set = [] 99 | var collectedElements: Set = [] 100 | var statistics: Statistics = Statistics() 101 | var stepStartTime: Date = Date() 102 | let maxDepth = 100 103 | 104 | // Define roles considered non-interactable by default 105 | let nonInteractableRoles: Set = [ 106 | "AXGroup", "AXStaticText", "AXUnknown", "AXSeparator", 107 | "AXHeading", "AXLayoutArea", "AXHelpTag", "AXGrowArea", 108 | "AXOutline", "AXScrollArea", "AXSplitGroup", "AXSplitter", 109 | "AXToolbar", "AXDisclosureTriangle", 110 | ] 111 | 112 | init(pid: Int32, onlyVisibleElements: Bool) { 113 | self.pid = pid 114 | self.onlyVisibleElements = onlyVisibleElements 115 | } 116 | 117 | // --- Main Execution Method --- 118 | func executeTraversal() throws -> ResponseData { 119 | let overallStartTime = Date() 120 | fputs("info: starting traversal for pid: \(pid) (Visible Only: \(onlyVisibleElements))\n", stderr) 121 | stepStartTime = Date() // Initialize step timer 122 | 123 | // 1. Accessibility Check 124 | fputs("info: checking accessibility permissions...\n", stderr) 125 | let checkOptions = ["AXTrustedCheckOptionPrompt": kCFBooleanTrue] as CFDictionary 126 | let isTrusted = AXIsProcessTrustedWithOptions(checkOptions) 127 | 128 | if !isTrusted { 129 | fputs("❌ error: accessibility access is denied.\n", stderr) 130 | fputs(" please grant permissions in system settings > privacy & security > accessibility.\n", stderr) 131 | throw MacosUseSDKError.accessibilityDenied 132 | } 133 | logStepCompletion("checking accessibility permissions (granted)") 134 | 135 | // 2. Find Application by PID and Create AXUIElement 136 | guard let runningApp = NSRunningApplication(processIdentifier: pid) else { 137 | fputs("error: no running application found with pid \(pid).\n", stderr) 138 | throw MacosUseSDKError.appNotFound(pid: pid) 139 | } 140 | let targetAppName = runningApp.localizedName ?? "App (PID: \(pid))" 141 | let appElement = AXUIElementCreateApplication(pid) 142 | // logStepCompletion("finding application '\(targetAppName)'") // Logging step completion implicitly here 143 | 144 | // 3. Activate App if needed 145 | var didActivate = false 146 | if runningApp.activationPolicy == NSApplication.ActivationPolicy.regular { 147 | if !runningApp.isActive { 148 | // fputs("info: activating application '\(targetAppName)'...\n", stderr) // Optional start log 149 | runningApp.activate() 150 | // Consider adding a small delay or a check loop if activation timing is critical 151 | // Thread.sleep(forTimeInterval: 0.2) 152 | didActivate = true 153 | } 154 | } 155 | if didActivate { 156 | logStepCompletion("activating application '\(targetAppName)'") 157 | } 158 | 159 | // 4. Start Traversal 160 | // fputs("info: starting accessibility tree traversal...\n", stderr) // Optional start log 161 | walkElementTree(element: appElement, depth: 0) 162 | logStepCompletion("traversing accessibility tree (\(collectedElements.count) elements collected)") 163 | 164 | // 5. Process Results 165 | // fputs("info: sorting elements...\n", stderr) // Optional start log 166 | let sortedElements = collectedElements.sorted { 167 | let y0 = $0.y ?? Double.greatestFiniteMagnitude 168 | let y1 = $1.y ?? Double.greatestFiniteMagnitude 169 | if y0 != y1 { return y0 < y1 } 170 | let x0 = $0.x ?? Double.greatestFiniteMagnitude 171 | let x1 = $1.x ?? Double.greatestFiniteMagnitude 172 | return x0 < x1 173 | } 174 | // logStepCompletion("sorting \(sortedElements.count) elements") // Log implicitly 175 | 176 | // Set the final count statistic 177 | statistics.count = sortedElements.count 178 | 179 | // --- Calculate Total Time --- 180 | let overallEndTime = Date() 181 | let totalProcessingTime = overallEndTime.timeIntervalSince(overallStartTime) 182 | let formattedTime = String(format: "%.2f", totalProcessingTime) 183 | fputs("info: total execution time: \(formattedTime) seconds\n", stderr) 184 | 185 | // 6. Prepare Response 186 | let response = ResponseData( 187 | app_name: targetAppName, 188 | elements: sortedElements, 189 | stats: statistics, 190 | processing_time_seconds: formattedTime 191 | ) 192 | 193 | return response 194 | // JSON encoding will be handled by the caller of the library function if needed 195 | } 196 | 197 | 198 | // --- Helper Functions (now methods of the class) --- 199 | 200 | // Safely copy an attribute value 201 | func copyAttributeValue(element: AXUIElement, attribute: String) -> CFTypeRef? { 202 | var value: CFTypeRef? 203 | let result = AXUIElementCopyAttributeValue(element, attribute as CFString, &value) 204 | if result == .success { 205 | return value 206 | } else if result != .attributeUnsupported && result != .noValue { 207 | // fputs("warning: could not get attribute '\(attribute)' for element: error \(result.rawValue)\n", stderr) 208 | } 209 | return nil 210 | } 211 | 212 | // Extract string value 213 | func getStringValue(_ value: CFTypeRef?) -> String? { 214 | guard let value = value else { return nil } 215 | let typeID = CFGetTypeID(value) 216 | if typeID == CFStringGetTypeID() { 217 | let cfString = value as! CFString 218 | return cfString as String 219 | } else if typeID == AXValueGetTypeID() { 220 | // AXValue conversion is complex, return nil for generic string conversion 221 | return nil 222 | } 223 | return nil 224 | } 225 | 226 | // Extract CGPoint 227 | func getCGPointValue(_ value: CFTypeRef?) -> CGPoint? { 228 | guard let value = value, CFGetTypeID(value) == AXValueGetTypeID() else { return nil } 229 | let axValue = value as! AXValue 230 | var pointValue = CGPoint.zero 231 | if AXValueGetValue(axValue, .cgPoint, &pointValue) { 232 | return pointValue 233 | } 234 | // fputs("warning: failed to extract cgpoint from axvalue.\n", stderr) 235 | return nil 236 | } 237 | 238 | // Extract CGSize 239 | func getCGSizeValue(_ value: CFTypeRef?) -> CGSize? { 240 | guard let value = value, CFGetTypeID(value) == AXValueGetTypeID() else { return nil } 241 | let axValue = value as! AXValue 242 | var sizeValue = CGSize.zero 243 | if AXValueGetValue(axValue, .cgSize, &sizeValue) { 244 | return sizeValue 245 | } 246 | // fputs("warning: failed to extract cgsize from axvalue.\n", stderr) 247 | return nil 248 | } 249 | 250 | // Extract attributes, text, and geometry 251 | func extractElementAttributes(element: AXUIElement) -> (role: String, roleDesc: String?, text: String?, allTextParts: [String], position: CGPoint?, size: CGSize?) { 252 | var role = "AXUnknown" 253 | var roleDesc: String? = nil 254 | var textParts: [String] = [] 255 | var position: CGPoint? = nil 256 | var size: CGSize? = nil 257 | 258 | if let roleValue = copyAttributeValue(element: element, attribute: kAXRoleAttribute as String) { 259 | role = getStringValue(roleValue) ?? "AXUnknown" 260 | } 261 | if let roleDescValue = copyAttributeValue(element: element, attribute: kAXRoleDescriptionAttribute as String) { 262 | roleDesc = getStringValue(roleDescValue) 263 | } 264 | 265 | let textAttributes = [ 266 | kAXValueAttribute as String, kAXTitleAttribute as String, kAXDescriptionAttribute as String, 267 | "AXLabel", "AXHelp", 268 | ] 269 | for attr in textAttributes { 270 | if let attrValue = copyAttributeValue(element: element, attribute: attr), 271 | let text = getStringValue(attrValue), 272 | !text.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { 273 | textParts.append(text) 274 | } 275 | } 276 | let combinedText = textParts.isEmpty ? nil : textParts.joined(separator: " ").trimmingCharacters(in: .whitespacesAndNewlines) 277 | 278 | if let posValue = copyAttributeValue(element: element, attribute: kAXPositionAttribute as String) { 279 | position = getCGPointValue(posValue) 280 | // if position == nil { fputs("debug: failed to get position for element (role: \(role))\n", stderr) } 281 | } else { 282 | // fputs("debug: position attribute ('\(kAXPositionAttribute)') not found or unsupported for element (role: \(role))\n", stderr) 283 | } 284 | 285 | if let sizeValue = copyAttributeValue(element: element, attribute: kAXSizeAttribute as String) { 286 | size = getCGSizeValue(sizeValue) 287 | // if size == nil { fputs("debug: failed to get size for element (role: \(role))\n", stderr) } 288 | } else { 289 | // fputs("debug: size attribute ('\(kAXSizeAttribute)') not found or unsupported for element (role: \(role))\n", stderr) 290 | } 291 | 292 | return (role, roleDesc, combinedText, textParts, position, size) 293 | } 294 | 295 | // Recursive traversal function (now a method) 296 | func walkElementTree(element: AXUIElement, depth: Int) { 297 | // 1. Check for cycles and depth limit 298 | if visitedElements.contains(element) || depth > maxDepth { 299 | // fputs("debug: skipping visited or too deep element (depth: \(depth))\n", stderr) 300 | return 301 | } 302 | visitedElements.insert(element) 303 | 304 | // 2. Process the current element 305 | let (role, roleDesc, combinedText, _, position, size) = extractElementAttributes(element: element) 306 | let hasText = combinedText != nil && !combinedText!.isEmpty 307 | let isNonInteractable = nonInteractableRoles.contains(role) 308 | let roleWithoutAX = role.starts(with: "AX") ? String(role.dropFirst(2)) : role 309 | 310 | statistics.role_counts[role, default: 0] += 1 311 | 312 | // 3. Determine Geometry and Visibility 313 | var finalX: Double? = nil 314 | var finalY: Double? = nil 315 | var finalWidth: Double? = nil 316 | var finalHeight: Double? = nil 317 | if let p = position, let s = size, s.width > 0 || s.height > 0 { 318 | finalX = Double(p.x) 319 | finalY = Double(p.y) 320 | finalWidth = s.width > 0 ? Double(s.width) : nil 321 | finalHeight = s.height > 0 ? Double(s.height) : nil 322 | } 323 | let isGeometricallyVisible = finalX != nil && finalY != nil && finalWidth != nil && finalHeight != nil 324 | 325 | // Always update the visible_elements_count stat based on geometry, regardless of collection 326 | if isGeometricallyVisible { 327 | statistics.visible_elements_count += 1 328 | } 329 | 330 | // 4. Apply Filtering Logic 331 | var displayRole = role 332 | if let desc = roleDesc, !desc.isEmpty, !desc.elementsEqual(roleWithoutAX) { 333 | displayRole = "\(role) (\(desc))" 334 | } 335 | 336 | // Determine if the element passes the original filter criteria 337 | let passesOriginalFilter = !isNonInteractable || hasText 338 | 339 | // Determine if the element should be collected based on the new flag 340 | let shouldCollectElement = passesOriginalFilter && (!onlyVisibleElements || isGeometricallyVisible) 341 | 342 | if shouldCollectElement { 343 | let elementData = ElementData( 344 | role: displayRole, text: combinedText, 345 | x: finalX, y: finalY, width: finalWidth, height: finalHeight 346 | ) 347 | 348 | if collectedElements.insert(elementData).inserted { 349 | // Log addition (optional) 350 | // let geometryStatus = isGeometricallyVisible ? "visible" : "not_visible" 351 | // fputs("debug: + collect [\(geometryStatus)] | r: \(displayRole) | t: '\(combinedText ?? "nil")'\n", stderr) 352 | 353 | // Update text counts only for collected elements 354 | if hasText { statistics.with_text_count += 1 } 355 | else { statistics.without_text_count += 1 } 356 | } else { 357 | // Log duplicate (optional) 358 | // fputs("debug: = skip duplicate | r: \(displayRole) | t: '\(combinedText ?? "nil")'\n", stderr) 359 | } 360 | } else { 361 | // Log exclusion (MODIFIED logic) 362 | var reasons: [String] = [] 363 | if !passesOriginalFilter { 364 | if isNonInteractable { reasons.append("non-interactable role '\(role)'") } 365 | if !hasText { reasons.append("no text") } 366 | } 367 | // Add visibility reason only if it was the deciding factor 368 | if passesOriginalFilter && onlyVisibleElements && !isGeometricallyVisible { 369 | reasons.append("not visible") 370 | } 371 | // fputs("debug: - exclude | r: \(role) | reason(s): \(reasons.joined(separator: ", "))\n", stderr) 372 | 373 | // Update exclusion counts 374 | statistics.excluded_count += 1 375 | // Note: The specific exclusion reasons (non-interactable, no-text) might be slightly less precise 376 | // if an element is excluded *only* because it's invisible, but this keeps the stats simple. 377 | // We can refine this if needed. 378 | if isNonInteractable { statistics.excluded_non_interactable += 1 } 379 | if !hasText { statistics.excluded_no_text += 1 } 380 | } 381 | 382 | // 5. Recursively traverse children, windows, main window 383 | // a) Windows 384 | if let windowsValue = copyAttributeValue(element: element, attribute: kAXWindowsAttribute as String) { 385 | if let windowsArray = windowsValue as? [AXUIElement] { 386 | for windowElement in windowsArray where !visitedElements.contains(windowElement) { 387 | walkElementTree(element: windowElement, depth: depth + 1) 388 | } 389 | } else if CFGetTypeID(windowsValue) == CFArrayGetTypeID() { 390 | // fputs("warning: attribute \(kAXWindowsAttribute) was CFArray but failed bridge to [AXUIElement]\n", stderr) 391 | } 392 | } 393 | 394 | // b) Main Window 395 | if let mainWindowValue = copyAttributeValue(element: element, attribute: kAXMainWindowAttribute as String) { 396 | if CFGetTypeID(mainWindowValue) == AXUIElementGetTypeID() { 397 | let mainWindowElement = mainWindowValue as! AXUIElement 398 | if !visitedElements.contains(mainWindowElement) { 399 | walkElementTree(element: mainWindowElement, depth: depth + 1) 400 | } 401 | } else { 402 | // fputs("warning: attribute \(kAXMainWindowAttribute) was not an AXUIElement\n", stderr) 403 | } 404 | } 405 | 406 | // c) Regular Children 407 | if let childrenValue = copyAttributeValue(element: element, attribute: kAXChildrenAttribute as String) { 408 | if let childrenArray = childrenValue as? [AXUIElement] { 409 | for childElement in childrenArray where !visitedElements.contains(childElement) { 410 | walkElementTree(element: childElement, depth: depth + 1) 411 | } 412 | } else if CFGetTypeID(childrenValue) == CFArrayGetTypeID() { 413 | // fputs("warning: attribute \(kAXChildrenAttribute) was CFArray but failed bridge to [AXUIElement]\n", stderr) 414 | } 415 | } 416 | } 417 | 418 | 419 | // Helper function logs duration of the step just completed 420 | func logStepCompletion(_ stepDescription: String) { 421 | let endTime = Date() 422 | let duration = endTime.timeIntervalSince(stepStartTime) 423 | let durationStr = String(format: "%.3f", duration) 424 | fputs("info: [\(durationStr)s] finished '\(stepDescription)'\n", stderr) 425 | stepStartTime = endTime // Reset start time for the next step 426 | } 427 | } // End of AccessibilityTraversalOperation class 428 | -------------------------------------------------------------------------------- /Sources/MacosUseSDK/ActionCoordinator.swift: -------------------------------------------------------------------------------- 1 | import Foundation 2 | import CoreGraphics 3 | import AppKit // For NSWorkspace, NSRunningApplication, CGPoint, etc. 4 | 5 | // --- Enums and Structs for Orchestration --- 6 | 7 | /// Defines the specific type of user input simulation. 8 | public enum InputAction: Sendable { 9 | case click(point: CGPoint) 10 | case doubleClick(point: CGPoint) 11 | case rightClick(point: CGPoint) 12 | case type(text: String) 13 | // Use keyName for easier specification, maps to CGKeyCode internally 14 | case press(keyName: String, flags: CGEventFlags = []) 15 | case move(to: CGPoint) 16 | } 17 | 18 | /// Defines the main action to be performed. 19 | public enum PrimaryAction: Sendable { 20 | // Identifier can be name, bundleID, or path 21 | case open(identifier: String) 22 | // Encapsulates various input types 23 | case input(action: InputAction) 24 | // If only traversal is needed, specify PID via options 25 | case traverseOnly 26 | } 27 | 28 | /// Configuration options for the orchestrated action. 29 | public struct ActionOptions: Sendable { 30 | /// Perform traversal before the primary action. Required if `showDiff` is true. 31 | public var traverseBefore: Bool = false 32 | /// Perform traversal after the primary action. Required if `showDiff` is true. 33 | public var traverseAfter: Bool = false 34 | /// Calculate and return the difference between before/after traversals. Implies `traverseBefore` and `traverseAfter`. 35 | public var showDiff: Bool = false 36 | /// Filter traversals to only include elements with position and size > 0. 37 | public var onlyVisibleElements: Bool = false 38 | /// Show visual feedback for input actions (e.g., click pulse, typing caption) AND highlight elements found in the *final* traversal. 39 | public var showAnimation: Bool = true // Consolidated flag 40 | /// Duration for input animations and element highlighting. 41 | public var animationDuration: Double = 0.8 42 | /// Explicitly provide the PID for traversal if the primary action isn't `open`. Required if traversing without opening. 43 | public var pidForTraversal: pid_t? = nil 44 | /// Delay in seconds *after* the primary action completes, but *before* the 'after' traversal starts. 45 | public var delayAfterAction: Double = 0.2 46 | 47 | // Ensure consistency if showDiff is enabled 48 | public func validated() -> ActionOptions { 49 | var options = self 50 | if options.showDiff { 51 | options.traverseBefore = true 52 | options.traverseAfter = true 53 | } 54 | return options 55 | } 56 | 57 | public init(traverseBefore: Bool = false, traverseAfter: Bool = false, showDiff: Bool = false, onlyVisibleElements: Bool = false, showAnimation: Bool = true, animationDuration: Double = 0.8, pidForTraversal: pid_t? = nil, delayAfterAction: Double = 0.2) { 58 | self.traverseBefore = traverseBefore 59 | self.traverseAfter = traverseAfter 60 | self.showDiff = showDiff 61 | self.onlyVisibleElements = onlyVisibleElements 62 | self.showAnimation = showAnimation // Use the new flag 63 | self.animationDuration = animationDuration 64 | self.pidForTraversal = pidForTraversal 65 | self.delayAfterAction = delayAfterAction 66 | } 67 | } 68 | 69 | 70 | /// Contains the results of the orchestrated action. 71 | public struct ActionResult: Codable, Sendable { 72 | /// Result from the `openApplication` action, if performed. 73 | public var openResult: AppOpenerResult? 74 | /// The PID used for traversals. Determined by `open` or provided in options. 75 | public var traversalPid: pid_t? 76 | /// Traversal data captured *before* the primary action. 77 | public var traversalBefore: ResponseData? 78 | /// Traversal data captured *after* the primary action. 79 | public var traversalAfter: ResponseData? 80 | /// The calculated difference between traversals, if requested. 81 | public var traversalDiff: TraversalDiff? 82 | /// Any error encountered during the primary action (open/input). Traversal errors are handled internally or thrown. 83 | public var primaryActionError: String? 84 | /// Any error encountered during the 'before' traversal. 85 | public var traversalBeforeError: String? 86 | /// Any error encountered during the 'after' traversal. 87 | public var traversalAfterError: String? 88 | 89 | // Default initializer 90 | public init(openResult: AppOpenerResult? = nil, traversalPid: pid_t? = nil, traversalBefore: ResponseData? = nil, traversalAfter: ResponseData? = nil, traversalDiff: TraversalDiff? = nil, primaryActionError: String? = nil, traversalBeforeError: String? = nil, traversalAfterError: String? = nil) { 91 | self.openResult = openResult 92 | self.traversalPid = traversalPid 93 | self.traversalBefore = traversalBefore 94 | self.traversalAfter = traversalAfter 95 | self.traversalDiff = traversalDiff 96 | self.primaryActionError = primaryActionError 97 | self.traversalBeforeError = traversalBeforeError 98 | self.traversalAfterError = traversalAfterError 99 | } 100 | } 101 | 102 | 103 | // --- Action Coordinator Logic --- 104 | 105 | /// Orchestrates application opening, input simulation, and accessibility traversal. 106 | /// Requires running on the main actor due to UI interactions. 107 | /// 108 | /// - Parameters: 109 | /// - action: The primary action to perform (`PrimaryAction`). 110 | /// - options: Configuration for the action execution (`ActionOptions`). 111 | /// - Returns: An `ActionResult` containing the results of the steps performed. 112 | /// - Throws: Can throw errors from underlying SDK functions, particularly during setup or unrecoverable failures. 113 | @MainActor 114 | public func performAction( 115 | action: PrimaryAction, 116 | optionsInput: ActionOptions = ActionOptions() 117 | ) async -> ActionResult { // Changed to return ActionResult directly, errors are stored within it 118 | let options = optionsInput.validated() // Ensure options are consistent (e.g., showDiff implies traversals) 119 | var result = ActionResult() 120 | var effectivePid: pid_t? = options.pidForTraversal 121 | var primaryActionError: Error? = nil // Temporary storage for Error objects 122 | var primaryActionExecuted: Bool = false // Flag to track if primary action ran 123 | 124 | fputs("info: [Coordinator] Starting action: \(action) with options: \(options)\n", stderr) 125 | 126 | // --- 1. Determine Target PID & Execute Open Action --- 127 | if case .open(let identifier) = action { 128 | fputs("info: [Coordinator] Primary action is 'open', attempting to get PID for '\(identifier)'...\n", stderr) 129 | do { 130 | let openRes = try await openApplication(identifier: identifier) 131 | result.openResult = openRes 132 | effectivePid = openRes.pid 133 | fputs("info: [Coordinator] App opened successfully. PID: \(effectivePid!).\n", stderr) 134 | primaryActionExecuted = true // Mark 'open' as executed 135 | // REMOVED Delay specific to open 136 | } catch { 137 | fputs("error: [Coordinator] Failed to open application '\(identifier)': \(error.localizedDescription)\n", stderr) 138 | primaryActionError = error 139 | if effectivePid == nil { 140 | result.primaryActionError = error.localizedDescription 141 | fputs("warning: [Coordinator] Cannot proceed with PID-dependent steps (traversal) due to open failure and no provided PID.\n", stderr) 142 | return result 143 | } else { 144 | fputs("warning: [Coordinator] Open failed, but continuing with provided PID \(effectivePid!).\n", stderr) 145 | } 146 | } 147 | } 148 | 149 | result.traversalPid = effectivePid 150 | 151 | // --- Check if PID is available for traversal --- 152 | guard let pid = effectivePid, (options.traverseBefore || options.traverseAfter || options.showAnimation) else { 153 | if options.traverseBefore || options.traverseAfter || options.showAnimation { 154 | fputs("warning: [Coordinator] Traversal or animation requested, but no PID could be determined (app open failed or PID not provided).\n", stderr) 155 | if options.traverseBefore { result.traversalBeforeError = "PID unavailable" } 156 | if options.traverseAfter { result.traversalAfterError = "PID unavailable" } 157 | } else { 158 | fputs("info: [Coordinator] No PID determined and no traversal/animation requested. Proceeding with primary action only (if applicable).\n", stderr) 159 | } 160 | // If primary action was *not* open, execute it now if PID wasn't available/needed 161 | if case .input(let inputAction) = action { 162 | fputs("info: [Coordinator] Executing primary input action (no PID context available/needed for traversal)...\n", stderr) 163 | do { 164 | try await executeInputAction(inputAction, options: options) 165 | primaryActionExecuted = true // Mark 'input' as executed 166 | } catch { 167 | fputs("error: [Coordinator] Failed to execute input action: \(error.localizedDescription)\n", stderr) 168 | primaryActionError = error 169 | } 170 | } else if case .traverseOnly = action { 171 | // Nothing to execute, no action here. primaryActionExecuted remains false. 172 | } 173 | 174 | // Apply generic delay if an action was executed *and* a delay is set, 175 | // even if no traversal follows (though less common use case). 176 | if primaryActionExecuted && options.delayAfterAction > 0 { 177 | fputs("info: [Coordinator] Primary action finished. Applying delay: \(options.delayAfterAction)s (before exiting due to no PID/traversal/animation)\n", stderr) 178 | try? await Task.sleep(nanoseconds: UInt64(options.delayAfterAction * 1_000_000_000)) 179 | } 180 | 181 | result.primaryActionError = primaryActionError?.localizedDescription 182 | return result 183 | } 184 | 185 | fputs("info: [Coordinator] Effective PID for subsequent steps: \(pid)\n", stderr) 186 | 187 | // --- 2. Traverse Before --- 188 | if options.traverseBefore { 189 | fputs("info: [Coordinator] Performing pre-action traversal for PID \(pid)...\n", stderr) 190 | do { 191 | result.traversalBefore = try traverseAccessibilityTree(pid: pid, onlyVisibleElements: options.onlyVisibleElements) 192 | fputs("info: [Coordinator] Pre-action traversal complete. Elements: \(result.traversalBefore?.elements.count ?? 0)\n", stderr) 193 | } catch { 194 | fputs("error: [Coordinator] Pre-action traversal failed: \(error.localizedDescription)\n", stderr) 195 | result.traversalBeforeError = error.localizedDescription 196 | } 197 | } 198 | 199 | // --- 3. Execute Primary Input Action (if not 'open' or 'traverseOnly') --- 200 | if case .input(let inputAction) = action { 201 | fputs("info: [Coordinator] Executing primary input action...\n", stderr) 202 | do { 203 | try await executeInputAction(inputAction, options: options) 204 | primaryActionExecuted = true // Mark 'input' as executed 205 | } catch { 206 | fputs("error: [Coordinator] Failed to execute input action: \(error.localizedDescription)\n", stderr) 207 | primaryActionError = error 208 | } 209 | } else if case .traverseOnly = action { 210 | fputs("info: [Coordinator] Primary action is 'traverseOnly', skipping action execution.\n", stderr) 211 | } // 'open' action was handled earlier 212 | 213 | // --- 4. Apply Delay AFTER Action, BEFORE Traverse After --- 214 | // Apply delay only if an action was actually executed and delay > 0 215 | if primaryActionExecuted && options.delayAfterAction > 0 { 216 | fputs("info: [Coordinator] Primary action finished. Applying delay: \(options.delayAfterAction)s (before post-action traversal)\n", stderr) 217 | try? await Task.sleep(nanoseconds: UInt64(options.delayAfterAction * 1_000_000_000)) 218 | } 219 | 220 | 221 | // --- 5. Traverse After --- 222 | var finalTraversalData: ResponseData? = nil 223 | if options.traverseAfter { 224 | fputs("info: [Coordinator] Performing post-action traversal for PID \(pid)...\n", stderr) 225 | do { 226 | let traversalData = try traverseAccessibilityTree(pid: pid, onlyVisibleElements: options.onlyVisibleElements) 227 | result.traversalAfter = traversalData 228 | finalTraversalData = traversalData // Keep for highlighting 229 | fputs("info: [Coordinator] Post-action traversal complete. Elements: \(traversalData.elements.count)\n", stderr) 230 | } catch { 231 | fputs("error: [Coordinator] Post-action traversal failed: \(error.localizedDescription)\n", stderr) 232 | result.traversalAfterError = error.localizedDescription 233 | } 234 | } 235 | 236 | // --- 6. Calculate Diff --- 237 | if options.showDiff { 238 | fputs("info: [Coordinator] Calculating detailed traversal diff...\n", stderr) 239 | if let beforeElements = result.traversalBefore?.elements, let afterElements = result.traversalAfter?.elements { 240 | 241 | // --- DETAILED DIFF LOGIC START --- 242 | var added: [ElementData] = [] 243 | var removed: [ElementData] = [] 244 | var modified: [ModifiedElement] = [] 245 | 246 | // FIX: Use let for afterElements copy, since we iterate it but don't mutate this copy directly 247 | let remainingAfter = afterElements 248 | var matchedAfterIndices = Set() // Keep track of matched 'after' elements 249 | 250 | let positionTolerance: Double = 5.0 // Max distance in points to consider a position match 251 | 252 | // Iterate through 'before' elements to find matches or mark as removed 253 | for beforeElement in beforeElements { 254 | var bestMatchIndex: Int? = nil 255 | var smallestDistanceSq: Double = .greatestFiniteMagnitude 256 | 257 | // Find potential matches in the 'after' list 258 | for (index, afterElement) in remainingAfter.enumerated() { 259 | // Skip if already matched or role doesn't match 260 | if matchedAfterIndices.contains(index) || beforeElement.role != afterElement.role { 261 | continue 262 | } 263 | 264 | // Check position proximity (if coordinates exist) 265 | if let bx = beforeElement.x, let by = beforeElement.y, let ax = afterElement.x, let ay = afterElement.y { 266 | let dx = bx - ax 267 | let dy = by - ay 268 | let distanceSq = (dx * dx) + (dy * dy) 269 | 270 | if distanceSq <= (positionTolerance * positionTolerance) { 271 | // Found a plausible match based on role and position 272 | // If multiple are close, pick the closest one 273 | if distanceSq < smallestDistanceSq { 274 | smallestDistanceSq = distanceSq 275 | bestMatchIndex = index 276 | } 277 | } 278 | } else if beforeElement.x == nil && afterElement.x == nil && beforeElement.y == nil && afterElement.y == nil { 279 | // If *both* lack position, consider them potentially matched if role matches (and text?) 280 | // For now, let's focus on positional matching primarily. 281 | // Maybe add a fallback: if role matches AND text matches (and text exists) 282 | if let bt = beforeElement.text, let at = afterElement.text, bt == at { 283 | if bestMatchIndex == nil { // Only if no positional match found yet 284 | bestMatchIndex = index 285 | // Don't update smallestDistanceSq here as it's not a positional match 286 | } 287 | } 288 | } 289 | } // End inner loop through 'after' elements 290 | 291 | if let matchIndex = bestMatchIndex { 292 | // Found a match 293 | let afterElement = remainingAfter[matchIndex] 294 | matchedAfterIndices.insert(matchIndex) // Mark as matched 295 | 296 | // --- UPDATED Attribute Comparison --- 297 | var attributeChanges: [AttributeChangeDetail] = [] 298 | 299 | // Handle TEXT change specifically using the dedicated initializer 300 | if beforeElement.text != afterElement.text { 301 | attributeChanges.append(AttributeChangeDetail(textBefore: beforeElement.text, textAfter: afterElement.text)) 302 | } 303 | 304 | // Handle other attributes using generic/double initializers 305 | if !areDoublesEqual(beforeElement.x, afterElement.x) { 306 | attributeChanges.append(AttributeChangeDetail(attribute: "x", before: beforeElement.x, after: afterElement.x)) 307 | } 308 | if !areDoublesEqual(beforeElement.y, afterElement.y) { 309 | attributeChanges.append(AttributeChangeDetail(attribute: "y", before: beforeElement.y, after: afterElement.y)) 310 | } 311 | if !areDoublesEqual(beforeElement.width, afterElement.width) { 312 | attributeChanges.append(AttributeChangeDetail(attribute: "width", before: beforeElement.width, after: afterElement.width)) 313 | } 314 | if !areDoublesEqual(beforeElement.height, afterElement.height) { 315 | attributeChanges.append(AttributeChangeDetail(attribute: "height", before: beforeElement.height, after: afterElement.height)) 316 | } 317 | // --- End Updated Attribute Comparison --- 318 | 319 | if !attributeChanges.isEmpty { 320 | modified.append(ModifiedElement(before: beforeElement, after: afterElement, changes: attributeChanges)) 321 | } 322 | } else { 323 | // No match found for this 'before' element, it was removed 324 | removed.append(beforeElement) 325 | } 326 | } // End outer loop through 'before' elements 327 | 328 | // Any 'after' elements not matched are 'added' 329 | for (index, afterElement) in remainingAfter.enumerated() { 330 | if !matchedAfterIndices.contains(index) { 331 | added.append(afterElement) 332 | } 333 | } 334 | 335 | // Assign to result (using the TraversalDiff struct from CombinedActions.swift) 336 | result.traversalDiff = TraversalDiff(added: added, removed: removed, modified: modified) 337 | fputs("info: [Coordinator] Detailed diff calculated: Added=\(added.count), Removed=\(removed.count), Modified=\(modified.count)\n", stderr) 338 | // --- DETAILED DIFF LOGIC END --- 339 | 340 | } else { 341 | fputs("warning: [Coordinator] Cannot calculate detailed diff because one or both traversals failed or were not performed.\n", stderr) 342 | } 343 | } 344 | 345 | // --- 7. Highlight Target Elements (Now controlled by showAnimation) --- 346 | if options.showAnimation { 347 | if let elementsToHighlight = finalTraversalData?.elements, !elementsToHighlight.isEmpty { 348 | fputs("info: [Coordinator] Highlighting \(elementsToHighlight.count) elements from final traversal (showAnimation=true)...\n", stderr) 349 | // No need for try/catch as drawHighlightBoxes is async and handles errors internally via logs 350 | drawHighlightBoxes(for: elementsToHighlight, duration: options.animationDuration) 351 | // Note: Highlighting starts and runs async, this function returns before it finishes. 352 | } else if finalTraversalData == nil && options.traverseAfter { 353 | fputs("warning: [Coordinator] Animation requested, but post-action traversal failed or was skipped (cannot highlight).\n", stderr) 354 | } else { 355 | fputs("info: [Coordinator] Animation requested, but no elements found in the final traversal to highlight.\n", stderr) 356 | } 357 | } else { 358 | fputs("info: [Coordinator] Skipping element highlighting (showAnimation=false).\n", stderr) 359 | } 360 | 361 | // Store any primary action error encountered 362 | result.primaryActionError = primaryActionError?.localizedDescription 363 | 364 | fputs("info: [Coordinator] Action sequence finished.\n", stderr) 365 | return result 366 | } 367 | 368 | 369 | /// Helper function to execute the specific input action based on type. 370 | @MainActor 371 | private func executeInputAction(_ action: InputAction, options: ActionOptions) async throws { 372 | switch action { 373 | case .click(let point): 374 | if options.showAnimation { 375 | fputs("log: simulating click AND visualizing at \(point) (duration: \(options.animationDuration))\n", stderr) 376 | try clickMouseAndVisualize(at: point, duration: options.animationDuration) 377 | } else { 378 | fputs("log: simulating click at \(point) (no visualization)\n", stderr) 379 | try clickMouse(at: point) 380 | } 381 | case .doubleClick(let point): 382 | if options.showAnimation { 383 | fputs("log: simulating double-click AND visualizing at \(point) (duration: \(options.animationDuration))\n", stderr) 384 | try doubleClickMouseAndVisualize(at: point, duration: options.animationDuration) 385 | } else { 386 | fputs("log: simulating double-click at \(point) (no visualization)\n", stderr) 387 | try doubleClickMouse(at: point) 388 | } 389 | case .rightClick(let point): 390 | if options.showAnimation { 391 | fputs("log: simulating right-click AND visualizing at \(point) (duration: \(options.animationDuration))\n", stderr) 392 | try rightClickMouseAndVisualize(at: point, duration: options.animationDuration) 393 | } else { 394 | fputs("log: simulating right-click at \(point) (no visualization)\n", stderr) 395 | try rightClickMouse(at: point) 396 | } 397 | case .type(let text): 398 | if options.showAnimation { 399 | fputs("log: simulating text writing AND visualizing caption \"\(text)\" (auto duration)\n", stderr) 400 | try writeTextAndVisualize(text, duration: nil) // Use nil to let visualize calculate duration 401 | } else { 402 | fputs("log: simulating text writing \"\(text)\" (no visualization)\n", stderr) 403 | try writeText(text) 404 | } 405 | case .press(let keyName, let flags): 406 | guard let keyCode = mapKeyNameToKeyCode(keyName) else { 407 | throw MacosUseSDKError.inputInvalidArgument("Unknown key name: \(keyName)") 408 | } 409 | if options.showAnimation { 410 | fputs("log: simulating key press \(keyName) (\(keyCode)) AND visualizing (duration: \(options.animationDuration))\n", stderr) 411 | try pressKeyAndVisualize(keyCode: keyCode, flags: flags, duration: options.animationDuration) 412 | } else { 413 | fputs("log: simulating key press \(keyName) (\(keyCode)) (no visualization)\n", stderr) 414 | try pressKey(keyCode: keyCode, flags: flags) 415 | } 416 | case .move(let point): 417 | if options.showAnimation { 418 | fputs("log: simulating mouse move AND visualizing to \(point) (duration: \(options.animationDuration))\n", stderr) 419 | try moveMouseAndVisualize(to: point, duration: options.animationDuration) 420 | } else { 421 | fputs("log: simulating mouse move to \(point) (no visualization)\n", stderr) 422 | try moveMouse(to: point) 423 | } 424 | } 425 | } 426 | 427 | // --- ADD Helper function for comparing optional Doubles --- 428 | fileprivate func areDoublesEqual(_ d1: Double?, _ d2: Double?, tolerance: Double = 0.01) -> Bool { 429 | switch (d1, d2) { 430 | case (nil, nil): 431 | return true // Both nil are considered equal in this context 432 | case (let val1?, let val2?): 433 | // Use tolerance for floating point comparison if both exist 434 | return abs(val1 - val2) < tolerance 435 | default: 436 | return false // One is nil, the other is not 437 | } 438 | } 439 | -------------------------------------------------------------------------------- /Sources/MacosUseSDK/AppOpener.swift: -------------------------------------------------------------------------------- 1 | import AppKit 2 | import Foundation 3 | 4 | // Define potential errors during app opening 5 | public extension MacosUseSDKError { 6 | // Ensure this enum is correctly defined within the extension 7 | enum AppOpenerError: Error, LocalizedError { 8 | case appNotFound(identifier: String) 9 | case invalidPath(path: String) 10 | case activationFailed(identifier: String, underlyingError: Error?) 11 | case pidLookupFailed(identifier: String) 12 | case unexpectedNilURL 13 | 14 | public var errorDescription: String? { 15 | switch self { 16 | case .appNotFound(let id): 17 | return "Application not found for identifier: '\(id)'" 18 | case .invalidPath(let path): 19 | return "Provided path does not appear to be a valid application bundle: '\(path)'" 20 | case .activationFailed(let id, let err): 21 | let base = "Failed to open/activate application '\(id)'" 22 | if let err = err { 23 | return "\(base): \(err.localizedDescription)" 24 | } 25 | return base 26 | case .pidLookupFailed(let id): 27 | return "Could not determine PID for application '\(id)' after activation attempt." 28 | case .unexpectedNilURL: 29 | return "Internal error: Application URL became nil unexpectedly." 30 | } 31 | } 32 | } 33 | } 34 | 35 | // Define the structure for the successful result 36 | public struct AppOpenerResult: Codable, Sendable { 37 | public let pid: pid_t 38 | public let appName: String 39 | public let processingTimeSeconds: String 40 | } 41 | 42 | // --- Private Helper Class for State Management --- 43 | // Using a class instance allows managing state like stepStartTime across async calls 44 | @MainActor 45 | private class AppOpenerOperation { 46 | let appIdentifier: String 47 | let overallStartTime: Date = Date() 48 | var stepStartTime: Date 49 | 50 | init(identifier: String) { 51 | self.appIdentifier = identifier 52 | self.stepStartTime = overallStartTime // Initialize step timer 53 | fputs("info: starting AppOpenerOperation for: \(identifier)\n", stderr) 54 | } 55 | 56 | // Helper to log step completion times (Method definition) 57 | func logStepCompletion(_ stepDescription: String) { 58 | let endTime = Date() 59 | let duration = endTime.timeIntervalSince(stepStartTime) 60 | let durationStr = String(format: "%.3f", duration) 61 | fputs("info: [\(durationStr)s] finished '\(stepDescription)'\n", stderr) 62 | stepStartTime = endTime // Reset for next step 63 | } 64 | 65 | // Main logic function using async/await (Method definition) 66 | func execute() async throws -> AppOpenerResult { 67 | // --- All the application discovery, PID finding, and activation logic goes *inside* this method --- 68 | let workspace = NSWorkspace.shared // Define workspace locally within the method 69 | var appURL: URL? 70 | var foundPID: pid_t? 71 | var bundleIdentifier: String? 72 | var finalAppName: String? 73 | 74 | // --- 1. Application Discovery --- 75 | // (Path checking logic...) 76 | if appIdentifier.hasSuffix(".app") && appIdentifier.contains("/") { 77 | fputs("info: interpreting '\(appIdentifier)' as a path.\n", stderr) 78 | let potentialURL = URL(fileURLWithPath: appIdentifier) 79 | var isDirectory: ObjCBool = false 80 | if FileManager.default.fileExists(atPath: potentialURL.path, isDirectory: &isDirectory) 81 | && isDirectory.boolValue && potentialURL.pathExtension == "app" 82 | { 83 | appURL = potentialURL 84 | fputs("info: path confirmed as valid application bundle: \(potentialURL.path)\n", stderr) 85 | if let bundle = Bundle(url: potentialURL) { 86 | bundleIdentifier = bundle.bundleIdentifier 87 | finalAppName = bundle.localizedInfoDictionary?["CFBundleName"] as? String ?? bundle.bundleIdentifier 88 | fputs("info: derived bundleID: \(bundleIdentifier ?? "nil"), name: \(finalAppName ?? "nil") from path\n", stderr) 89 | } 90 | } else { 91 | fputs("warning: provided path does not appear to be a valid application bundle: \(appIdentifier). Will try as name/bundleID.\n", stderr) 92 | } 93 | } 94 | 95 | // (Name/BundleID search logic...) 96 | if appURL == nil { 97 | fputs("info: interpreting '\(appIdentifier)' as an application name or bundleID, searching...\n", stderr) 98 | if let foundURL = workspace.urlForApplication(withBundleIdentifier: appIdentifier) { 99 | appURL = foundURL 100 | bundleIdentifier = appIdentifier 101 | fputs("info: found application url via bundleID '\(appIdentifier)': \(foundURL.path)\n", stderr) 102 | if let bundle = Bundle(url: foundURL) { 103 | finalAppName = bundle.localizedInfoDictionary?["CFBundleName"] as? String ?? bundle.bundleIdentifier 104 | } 105 | } else if let foundURLByName = workspace.urlForApplication(toOpen: URL(fileURLWithPath: "/Applications/\(appIdentifier).app")) ?? 106 | workspace.urlForApplication(toOpen: URL(fileURLWithPath: "/System/Applications/\(appIdentifier).app")) ?? 107 | workspace.urlForApplication(toOpen: URL(fileURLWithPath: "/System/Applications/Utilities/\(appIdentifier).app")) 108 | { 109 | appURL = foundURLByName 110 | fputs("info: found application url via name search '\(appIdentifier)': \(foundURLByName.path)\n", stderr) 111 | if let bundle = Bundle(url: foundURLByName) { 112 | bundleIdentifier = bundle.bundleIdentifier 113 | finalAppName = bundle.localizedInfoDictionary?["CFBundleName"] as? String ?? bundle.bundleIdentifier 114 | fputs("info: derived bundleID: \(bundleIdentifier ?? "nil"), name: \(finalAppName ?? "nil") from found URL\n", stderr) 115 | } 116 | } else { 117 | logStepCompletion("application discovery (failed)") // Call method 118 | throw MacosUseSDKError.AppOpenerError.appNotFound(identifier: appIdentifier) 119 | } 120 | } 121 | logStepCompletion("application discovery (url: \(appURL?.path ?? "nil"), bundleID: \(bundleIdentifier ?? "nil"))") // Call method 122 | 123 | // (Guard statement logic...) 124 | guard let finalAppURL = appURL else { 125 | fputs("error: unexpected error - application url is nil before launch attempt.\n", stderr) 126 | throw MacosUseSDKError.AppOpenerError.unexpectedNilURL 127 | } 128 | // (Final app name determination...) 129 | if finalAppName == nil { 130 | if let bundle = Bundle(url: finalAppURL) { 131 | finalAppName = bundle.localizedInfoDictionary?["CFBundleName"] as? String ?? bundle.bundleIdentifier 132 | } 133 | finalAppName = finalAppName ?? appIdentifier 134 | } 135 | 136 | 137 | // --- 2. Pre-find PID if running --- 138 | // (PID finding logic...) 139 | if let bID = bundleIdentifier { 140 | fputs("info: checking running applications for bundle id: \(bID)\n", stderr) 141 | if let runningApp = NSRunningApplication.runningApplications(withBundleIdentifier: bID).first { 142 | foundPID = runningApp.processIdentifier 143 | fputs("info: found running instance with pid \(foundPID!) for bundle id \(bID).\n", stderr) 144 | } else { 145 | fputs("info: no running instance found for bundle id \(bID) before activation attempt.\n", stderr) 146 | } 147 | } else { 148 | fputs("warning: no bundle identifier, attempting lookup by URL: \(finalAppURL.path)\n", stderr) 149 | for app in workspace.runningApplications { 150 | if app.bundleURL?.standardizedFileURL == finalAppURL.standardizedFileURL || app.executableURL?.standardizedFileURL == finalAppURL.standardizedFileURL { 151 | foundPID = app.processIdentifier 152 | fputs("info: found running instance with pid \(foundPID!) matching URL.\n", stderr) 153 | break 154 | } 155 | } 156 | if foundPID == nil { 157 | fputs("info: no running instance found by URL before activation attempt.\n", stderr) 158 | } 159 | } 160 | logStepCompletion("pre-finding existing process (pid: \(foundPID.map(String.init) ?? "none found"))") // Call method 161 | 162 | // --- 3. Open/Activate Application --- 163 | // (Activation logic...) 164 | fputs("info: attempting to open/activate application: \(finalAppName ?? appIdentifier)\n", stderr) 165 | let configuration = NSWorkspace.OpenConfiguration() // Define configuration locally 166 | 167 | do { 168 | // Await the async call AND extract the PID within an explicit MainActor Task 169 | // This replaces MainActor.run which caused issues in Swift 6.1 with async closures 170 | let pidAfterOpen = try await Task { @MainActor in 171 | fputs("info: [Task @MainActor] executing workspace.openApplication...\n", stderr) 172 | // The await happens *inside* the MainActor Task block 173 | let runningApp = try await workspace.openApplication(at: finalAppURL, configuration: configuration) 174 | // Access the non-Sendable property *inside* the MainActor Task block 175 | let pid = runningApp.processIdentifier 176 | fputs("info: [Task @MainActor] got pid \(pid) from NSRunningApplication.\n", stderr) 177 | // Return the Sendable pid_t 178 | return pid 179 | }.value // Await the result of the Task 180 | 181 | logStepCompletion("opening/activating application async call completed") 182 | 183 | // --- 4. Determine Final PID --- 184 | var finalPID: pid_t? = nil 185 | 186 | if let pid = foundPID { 187 | finalPID = pid 188 | fputs("info: using pre-found pid \(pid).\n", stderr) 189 | } else { 190 | // Use the PID extracted immediately after the await 191 | finalPID = pidAfterOpen 192 | fputs("info: using pid \(finalPID!) from newly launched/activated application instance.\n", stderr) 193 | foundPID = finalPID // Update foundPID if it was initially nil 194 | } 195 | logStepCompletion("determining final pid (using \(finalPID!))") // Call method 196 | 197 | // --- 5. Prepare Result --- 198 | let endTime = Date() 199 | let processingTime = endTime.timeIntervalSince(overallStartTime) 200 | let formattedTime = String(format: "%.3f", processingTime) 201 | 202 | fputs("success: application '\(finalAppName ?? appIdentifier)' active (pid: \(finalPID!)).\n", stderr) 203 | fputs("info: total processing time: \(formattedTime) seconds\n", stderr) 204 | 205 | return AppOpenerResult( 206 | pid: finalPID!, 207 | appName: finalAppName ?? appIdentifier, 208 | processingTimeSeconds: formattedTime 209 | ) 210 | 211 | } catch { 212 | logStepCompletion("opening/activating application (failed)") // Call method 213 | fputs("error: activation call failed: \(error.localizedDescription)\n", stderr) 214 | 215 | if let pid = foundPID { 216 | fputs("warning: activation failed, but PID \(pid) was found beforehand. Assuming it's running.\n", stderr) 217 | let endTime = Date() 218 | let processingTime = endTime.timeIntervalSince(overallStartTime) 219 | let formattedTime = String(format: "%.3f", processingTime) 220 | fputs("info: total processing time: \(formattedTime) seconds\n", stderr) 221 | return AppOpenerResult( 222 | pid: pid, 223 | appName: finalAppName ?? appIdentifier, 224 | processingTimeSeconds: formattedTime 225 | ) 226 | } else { 227 | fputs("error: PID could not be determined after activation failure.\n", stderr) 228 | let endTime = Date() 229 | let processingTime = endTime.timeIntervalSince(overallStartTime) 230 | let formattedTime = String(format: "%.3f", processingTime) 231 | fputs("info: total processing time (on failure): \(formattedTime) seconds\n", stderr) 232 | throw MacosUseSDKError.AppOpenerError.activationFailed(identifier: appIdentifier, underlyingError: error) 233 | } 234 | } 235 | // --- End of logic inside execute method --- 236 | } // End of execute() method 237 | } // End of AppOpenerOperation class 238 | 239 | 240 | /// Opens or activates a macOS application identified by its name, bundle ID, or full path. 241 | /// Outputs detailed logs to stderr. 242 | /// 243 | /// - Parameter identifier: The application name (e.g., "Calculator"), bundle ID (e.g., "com.apple.calculator"), or full path (e.g., "/System/Applications/Calculator.app"). 244 | /// - Returns: An `AppOpenerResult` containing the PID, application name, and processing time on success. 245 | /// - Throws: `MacosUseSDKError.AppOpenerError` if the application cannot be found, activated, or its PID determined. 246 | @MainActor 247 | public func openApplication(identifier: String) async throws -> AppOpenerResult { 248 | // Input validation 249 | guard !identifier.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else { 250 | throw MacosUseSDKError.AppOpenerError.appNotFound(identifier: "(empty)") 251 | } 252 | 253 | // Create an instance of the helper class and execute its logic 254 | let operation = AppOpenerOperation(identifier: identifier) 255 | return try await operation.execute() 256 | } 257 | 258 | // --- IMPORTANT: Ensure no other executable code (like the old script lines) exists below this line in the file --- 259 | // --- Remove any leftover 'if', 'guard', 'logStepCompletion', 'workspace.openApplication', 'RunLoop.main.run' calls from the top level --- 260 | -------------------------------------------------------------------------------- /Sources/MacosUseSDK/CombinedActions.swift: -------------------------------------------------------------------------------- 1 | import Foundation // Needed for fputs, etc. 2 | import CoreGraphics // Needed for CGPoint, CGKeyCode, CGEventFlags 3 | 4 | /// Represents a change in a specific attribute of an accessibility element. 5 | public struct AttributeChangeDetail: Codable, Sendable { 6 | public let attributeName: String 7 | 8 | // --- Fields for Simple Text Diff --- 9 | /// Text added (e.g., if newValue = oldValue + addedText). Populated only for text attribute changes. 10 | public let addedText: String? 11 | /// Text removed (e.g., if oldValue = newValue + removedText). Populated only for text attribute changes. 12 | public let removedText: String? 13 | 14 | // --- Fallback Fields --- 15 | /// Full old value, used for non-text attributes OR complex text changes. 16 | public let oldValue: String? 17 | /// Full new value, used for non-text attributes OR complex text changes. 18 | public let newValue: String? 19 | 20 | 21 | // --- Initializers --- 22 | 23 | // Initializer for non-text attributes (simple old/new) 24 | init(attribute: String, before: T?, after: T?) { 25 | guard attribute != "text" else { 26 | // This initializer should not be called directly for text. 27 | // Handle text changes via the dedicated text initializer below. 28 | // For safety, provide a basic fallback if called incorrectly. 29 | fputs("warning: Generic AttributeChangeDetail initializer called for 'text'. Use text-specific init.\n", stderr) 30 | self.attributeName = attribute 31 | self.oldValue = before.map { $0.description } 32 | self.newValue = after.map { $0.description } 33 | self.addedText = nil 34 | self.removedText = nil 35 | return 36 | } 37 | self.attributeName = attribute 38 | self.oldValue = before.map { $0.description } 39 | self.newValue = after.map { $0.description } 40 | self.addedText = nil // Not applicable 41 | self.removedText = nil // Not applicable 42 | } 43 | 44 | // Initializer for Doubles (position/size) 45 | init(attribute: String, before: Double?, after: Double?, format: String = "%.1f") { 46 | self.attributeName = attribute 47 | self.oldValue = before.map { String(format: format, $0) } 48 | self.newValue = after.map { String(format: format, $0) } 49 | self.addedText = nil 50 | self.removedText = nil 51 | } 52 | 53 | // --- UPDATED Initializer for Text Changes using CollectionDifference --- 54 | init(textBefore: String?, textAfter: String?) { 55 | self.attributeName = "text" 56 | 57 | let old = textBefore ?? "" 58 | let new = textAfter ?? "" 59 | 60 | // Use CollectionDifference to find insertions and removals 61 | let diff = new.difference(from: old) 62 | 63 | var addedChars: [Character] = [] 64 | var removedChars: [Character] = [] 65 | 66 | // Process the calculated difference 67 | for change in diff { 68 | switch change { 69 | case .insert(_, let element, _): 70 | addedChars.append(element) 71 | case .remove(_, let element, _): 72 | removedChars.append(element) 73 | } 74 | } 75 | 76 | // Assign collected characters to the respective fields, or nil if empty 77 | self.addedText = addedChars.isEmpty ? nil : String(addedChars) 78 | self.removedText = removedChars.isEmpty ? nil : String(removedChars) 79 | 80 | // Since we now have potentially more granular diff info, 81 | // we consistently set oldValue/newValue to nil for text changes 82 | // to avoid redundancy in the output, as decided previously. 83 | self.oldValue = nil 84 | self.newValue = nil 85 | } 86 | } 87 | 88 | /// Represents an element identified as potentially the same logical entity 89 | /// across two traversals, but with modified attributes. 90 | public struct ModifiedElement: Codable, Sendable { 91 | /// The element data from the 'before' traversal. 92 | public let before: ElementData 93 | /// The element data from the 'after' traversal. 94 | public let after: ElementData 95 | /// A list detailing the specific attributes that changed. 96 | public let changes: [AttributeChangeDetail] 97 | } 98 | 99 | /// Represents the difference between two accessibility traversals, 100 | /// now including added, removed, and modified elements with attribute details. 101 | public struct TraversalDiff: Codable, Sendable { 102 | public let added: [ElementData] 103 | public let removed: [ElementData] 104 | /// Elements identified as modified, along with their specific changes. 105 | public let modified: [ModifiedElement] 106 | } 107 | 108 | /// Holds the results of an action performed between two accessibility traversals, 109 | /// including the state before, the state after, and the calculated difference. 110 | public struct ActionDiffResult: Codable, Sendable { 111 | public let afterAction: ResponseData 112 | public let diff: TraversalDiff 113 | } 114 | 115 | /// Defines combined, higher-level actions using the SDK's core functionalities. 116 | public enum CombinedActions { 117 | 118 | /// Opens or activates an application and then immediately traverses its accessibility tree. 119 | /// 120 | /// This combines the functionality of `openApplication` and `traverseAccessibilityTree`. 121 | /// Logs detailed steps to stderr. 122 | /// 123 | /// - Parameters: 124 | /// - identifier: The application name (e.g., "Calculator"), bundle ID (e.g., "com.apple.calculator"), or full path (e.g., "/System/Applications/Calculator.app"). 125 | /// - onlyVisibleElements: If true, the traversal only collects elements with valid position and size. Defaults to false. 126 | /// - Returns: A `ResponseData` struct containing the collected elements, statistics, and timing information from the traversal. 127 | /// - Throws: `MacosUseSDKError` if either the application opening/activation or the accessibility traversal fails. 128 | @MainActor // Ensures UI-related parts like activation happen on the main thread 129 | public static func openAndTraverseApp(identifier: String, onlyVisibleElements: Bool = false) async throws -> ResponseData { 130 | fputs("info: starting combined action 'openAndTraverseApp' for identifier: '\(identifier)'\n", stderr) 131 | 132 | // Step 1: Open or Activate the Application 133 | fputs("info: calling openApplication...\n", stderr) 134 | let openResult = try await MacosUseSDK.openApplication(identifier: identifier) 135 | fputs("info: openApplication completed successfully. PID: \(openResult.pid), App Name: \(openResult.appName)\n", stderr) 136 | 137 | // Step 2: Traverse the Accessibility Tree of the opened/activated application 138 | fputs("info: calling traverseAccessibilityTree for PID \(openResult.pid) (Visible Only: \(onlyVisibleElements))...\n", stderr) 139 | let traversalResult = try MacosUseSDK.traverseAccessibilityTree(pid: openResult.pid, onlyVisibleElements: onlyVisibleElements) 140 | fputs("info: traverseAccessibilityTree completed successfully.\n", stderr) 141 | 142 | // Step 3: Return the traversal result 143 | fputs("info: combined action 'openAndTraverseApp' finished.\n", stderr) 144 | return traversalResult 145 | } 146 | 147 | // --- Input Action followed by Traversal --- 148 | 149 | /// Simulates a left mouse click at the specified coordinates, then traverses the accessibility tree of the target application. 150 | /// 151 | /// - Parameters: 152 | /// - point: The `CGPoint` where the click should occur (screen coordinates). 153 | /// - pid: The Process ID (PID) of the application to traverse after the click. 154 | /// - onlyVisibleElements: If true, the traversal only collects elements with valid position and size. Defaults to false. 155 | /// - Returns: A `ResponseData` struct containing the collected elements, statistics, and timing information from the traversal. 156 | /// - Throws: `MacosUseSDKError` if the click simulation or the accessibility traversal fails. 157 | @MainActor // Added for consistency, although core CGEvent might not strictly require it 158 | public static func clickAndTraverseApp(point: CGPoint, pid: Int32, onlyVisibleElements: Bool = false) async throws -> ResponseData { 159 | fputs("info: starting combined action 'clickAndTraverseApp' at (\(point.x), \(point.y)) for PID \(pid)\n", stderr) 160 | 161 | // Step 1: Perform the click 162 | fputs("info: calling clickMouse...\n", stderr) 163 | try MacosUseSDK.clickMouse(at: point) 164 | fputs("info: clickMouse completed successfully.\n", stderr) 165 | 166 | // Add a small delay to allow UI to potentially update after the click 167 | try await Task.sleep(nanoseconds: 100_000_000) // 100 milliseconds 168 | 169 | // Step 2: Traverse the Accessibility Tree 170 | fputs("info: calling traverseAccessibilityTree for PID \(pid) (Visible Only: \(onlyVisibleElements))...\n", stderr) 171 | let traversalResult = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements) 172 | fputs("info: traverseAccessibilityTree completed successfully.\n", stderr) 173 | 174 | // Step 3: Return the traversal result 175 | fputs("info: combined action 'clickAndTraverseApp' finished.\n", stderr) 176 | return traversalResult 177 | } 178 | 179 | /// Simulates pressing a key with optional modifiers, then traverses the accessibility tree of the target application. 180 | /// 181 | /// - Parameters: 182 | /// - keyCode: The `CGKeyCode` of the key to press. 183 | /// - flags: The modifier flags (`CGEventFlags`) to apply. 184 | /// - pid: The Process ID (PID) of the application to traverse after the key press. 185 | /// - onlyVisibleElements: If true, the traversal only collects elements with valid position and size. Defaults to false. 186 | /// - Returns: A `ResponseData` struct containing the collected elements, statistics, and timing information from the traversal. 187 | /// - Throws: `MacosUseSDKError` if the key press simulation or the accessibility traversal fails. 188 | @MainActor 189 | public static func pressKeyAndTraverseApp(keyCode: CGKeyCode, flags: CGEventFlags = [], pid: Int32, onlyVisibleElements: Bool = false) async throws -> ResponseData { 190 | fputs("info: starting combined action 'pressKeyAndTraverseApp' (key: \(keyCode), flags: \(flags.rawValue)) for PID \(pid)\n", stderr) 191 | 192 | // Step 1: Perform the key press 193 | fputs("info: calling pressKey...\n", stderr) 194 | try MacosUseSDK.pressKey(keyCode: keyCode, flags: flags) 195 | fputs("info: pressKey completed successfully.\n", stderr) 196 | 197 | // Add a small delay 198 | try await Task.sleep(nanoseconds: 100_000_000) // 100 milliseconds 199 | 200 | // Step 2: Traverse the Accessibility Tree 201 | fputs("info: calling traverseAccessibilityTree for PID \(pid) (Visible Only: \(onlyVisibleElements))...\n", stderr) 202 | let traversalResult = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements) 203 | fputs("info: traverseAccessibilityTree completed successfully.\n", stderr) 204 | 205 | // Step 3: Return the traversal result 206 | fputs("info: combined action 'pressKeyAndTraverseApp' finished.\n", stderr) 207 | return traversalResult 208 | } 209 | 210 | /// Simulates typing text, then traverses the accessibility tree of the target application. 211 | /// 212 | /// - Parameters: 213 | /// - text: The `String` to type. 214 | /// - pid: The Process ID (PID) of the application to traverse after typing the text. 215 | /// - onlyVisibleElements: If true, the traversal only collects elements with valid position and size. Defaults to false. 216 | /// - Returns: A `ResponseData` struct containing the collected elements, statistics, and timing information from the traversal. 217 | /// - Throws: `MacosUseSDKError` if the text writing simulation or the accessibility traversal fails. 218 | @MainActor 219 | public static func writeTextAndTraverseApp(text: String, pid: Int32, onlyVisibleElements: Bool = false) async throws -> ResponseData { 220 | fputs("info: starting combined action 'writeTextAndTraverseApp' (text: \"\(text)\") for PID \(pid)\n", stderr) 221 | 222 | // Step 1: Perform the text writing 223 | fputs("info: calling writeText...\n", stderr) 224 | try MacosUseSDK.writeText(text) 225 | fputs("info: writeText completed successfully.\n", stderr) 226 | 227 | // Add a small delay 228 | try await Task.sleep(nanoseconds: 100_000_000) // 100 milliseconds 229 | 230 | // Step 2: Traverse the Accessibility Tree 231 | fputs("info: calling traverseAccessibilityTree for PID \(pid) (Visible Only: \(onlyVisibleElements))...\n", stderr) 232 | let traversalResult = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements) 233 | fputs("info: traverseAccessibilityTree completed successfully.\n", stderr) 234 | 235 | // Step 3: Return the traversal result 236 | fputs("info: combined action 'writeTextAndTraverseApp' finished.\n", stderr) 237 | return traversalResult 238 | } 239 | 240 | // You can add similar functions for doubleClick, rightClick, moveMouse etc. if needed 241 | 242 | // --- Helper Function for Diffing --- 243 | 244 | /// Calculates the difference between two sets of ElementData based on set operations. 245 | /// - Parameters: 246 | /// - beforeElements: The list of elements from the first traversal. 247 | /// - afterElements: The list of elements from the second traversal. 248 | /// - Returns: A `TraversalDiff` struct containing added and removed elements. 249 | private static func calculateDiff(beforeElements: [ElementData], afterElements: [ElementData]) -> TraversalDiff { 250 | fputs("debug: calculating diff between \(beforeElements.count) (before) and \(afterElements.count) (after) elements.\n", stderr) 251 | // Convert arrays to Sets for efficient comparison. Relies on ElementData being Hashable. 252 | let beforeSet = Set(beforeElements) 253 | let afterSet = Set(afterElements) 254 | 255 | // Elements present in 'after' but not in 'before' are added. 256 | let addedElements = Array(afterSet.subtracting(beforeSet)) 257 | fputs("debug: diff calculation - found \(addedElements.count) added elements.\n", stderr) 258 | 259 | // Elements present in 'before' but not in 'after' are removed. 260 | let removedElements = Array(beforeSet.subtracting(afterSet)) 261 | fputs("debug: diff calculation - found \(removedElements.count) removed elements.\n", stderr) 262 | 263 | // Sort results for consistent output (optional, but helpful) 264 | let sortedAdded = addedElements.sorted(by: elementSortPredicate) 265 | let sortedRemoved = removedElements.sorted(by: elementSortPredicate) 266 | 267 | 268 | return TraversalDiff(added: sortedAdded, removed: sortedRemoved, modified: []) 269 | } 270 | 271 | // Helper sorting predicate (consistent with AccessibilityTraversalOperation) 272 | private static var elementSortPredicate: (ElementData, ElementData) -> Bool { 273 | return { e1, e2 in 274 | let y1 = e1.y ?? Double.greatestFiniteMagnitude 275 | let y2 = e2.y ?? Double.greatestFiniteMagnitude 276 | if y1 != y2 { return y1 < y2 } 277 | let x1 = e1.x ?? Double.greatestFiniteMagnitude 278 | let x2 = e2.x ?? Double.greatestFiniteMagnitude 279 | return x1 < x2 280 | } 281 | } 282 | 283 | 284 | // --- Combined Actions with Diffing --- 285 | 286 | /// Performs a left mouse click, bracketed by accessibility traversals, and returns the diff. 287 | /// 288 | /// - Parameters: 289 | /// - point: The `CGPoint` where the click should occur (screen coordinates). 290 | /// - pid: The Process ID (PID) of the application to traverse. 291 | /// - onlyVisibleElements: If true, traversals only collect elements with valid position/size. Defaults to false. 292 | /// - delayAfterActionNano: Nanoseconds to wait after the action before the second traversal. Default 100ms. 293 | /// - Returns: An `ActionDiffResult` containing traversals before/after the click and the diff. 294 | /// - Throws: `MacosUseSDKError` if any step (traversal, click) fails. 295 | @MainActor 296 | public static func clickWithDiff( 297 | point: CGPoint, 298 | pid: Int32, 299 | onlyVisibleElements: Bool = false, 300 | delayAfterActionNano: UInt64 = 100_000_000 // 100 ms default 301 | ) async throws -> ActionDiffResult { 302 | fputs("info: starting combined action 'clickWithDiff' at (\(point.x), \(point.y)) for PID \(pid)\n", stderr) 303 | 304 | // Step 1: Traverse Before Action 305 | fputs("info: calling traverseAccessibilityTree (before action)...\n", stderr) 306 | let beforeTraversal = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements) 307 | fputs("info: traversal (before action) completed.\n", stderr) 308 | 309 | // Step 2: Perform the Click 310 | fputs("info: calling clickMouse...\n", stderr) 311 | try MacosUseSDK.clickMouse(at: point) 312 | fputs("info: clickMouse completed successfully.\n", stderr) 313 | 314 | // Step 3: Wait for UI to Update 315 | fputs("info: waiting \(Double(delayAfterActionNano) / 1_000_000_000.0) seconds after action...\n", stderr) 316 | try await Task.sleep(nanoseconds: delayAfterActionNano) 317 | 318 | // Step 4: Traverse After Action 319 | fputs("info: calling traverseAccessibilityTree (after action)...\n", stderr) 320 | let afterTraversal = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements) 321 | fputs("info: traversal (after action) completed.\n", stderr) 322 | 323 | // Step 5: Calculate Diff 324 | fputs("info: calculating traversal diff...\n", stderr) 325 | let diff = calculateDiff(beforeElements: beforeTraversal.elements, afterElements: afterTraversal.elements) 326 | fputs("info: diff calculation completed.\n", stderr) 327 | 328 | // Step 6: Prepare and Return Result 329 | let result = ActionDiffResult( 330 | afterAction: afterTraversal, 331 | diff: diff 332 | ) 333 | fputs("info: combined action 'clickWithDiff' finished.\n", stderr) 334 | return result 335 | } 336 | 337 | /// Presses a key, bracketed by accessibility traversals, and returns the diff. 338 | /// 339 | /// - Parameters: 340 | /// - keyCode: The `CGKeyCode` of the key to press. 341 | /// - flags: The modifier flags (`CGEventFlags`). 342 | /// - pid: The Process ID (PID) of the application to traverse. 343 | /// - onlyVisibleElements: If true, traversals only collect elements with valid position/size. Defaults to false. 344 | /// - delayAfterActionNano: Nanoseconds to wait after the action before the second traversal. Default 100ms. 345 | /// - Returns: An `ActionDiffResult` containing traversals before/after the key press and the diff. 346 | /// - Throws: `MacosUseSDKError` if any step fails. 347 | @MainActor 348 | public static func pressKeyWithDiff( 349 | keyCode: CGKeyCode, 350 | flags: CGEventFlags = [], 351 | pid: Int32, 352 | onlyVisibleElements: Bool = false, 353 | delayAfterActionNano: UInt64 = 100_000_000 // 100 ms default 354 | ) async throws -> ActionDiffResult { 355 | fputs("info: starting combined action 'pressKeyWithDiff' (key: \(keyCode), flags: \(flags.rawValue)) for PID \(pid)\n", stderr) 356 | 357 | // Step 1: Traverse Before Action 358 | fputs("info: calling traverseAccessibilityTree (before action)...\n", stderr) 359 | let beforeTraversal = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements) 360 | fputs("info: traversal (before action) completed.\n", stderr) 361 | 362 | // Step 2: Perform the Key Press 363 | fputs("info: calling pressKey...\n", stderr) 364 | try MacosUseSDK.pressKey(keyCode: keyCode, flags: flags) 365 | fputs("info: pressKey completed successfully.\n", stderr) 366 | 367 | // Step 3: Wait for UI to Update 368 | fputs("info: waiting \(Double(delayAfterActionNano) / 1_000_000_000.0) seconds after action...\n", stderr) 369 | try await Task.sleep(nanoseconds: delayAfterActionNano) 370 | 371 | // Step 4: Traverse After Action 372 | fputs("info: calling traverseAccessibilityTree (after action)...\n", stderr) 373 | let afterTraversal = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements) 374 | fputs("info: traversal (after action) completed.\n", stderr) 375 | 376 | // Step 5: Calculate Diff 377 | fputs("info: calculating traversal diff...\n", stderr) 378 | let diff = calculateDiff(beforeElements: beforeTraversal.elements, afterElements: afterTraversal.elements) 379 | fputs("info: diff calculation completed.\n", stderr) 380 | 381 | // Step 6: Prepare and Return Result 382 | let result = ActionDiffResult( 383 | afterAction: afterTraversal, 384 | diff: diff 385 | ) 386 | fputs("info: combined action 'pressKeyWithDiff' finished.\n", stderr) 387 | return result 388 | } 389 | 390 | /// Types text, bracketed by accessibility traversals, and returns the diff. 391 | /// 392 | /// - Parameters: 393 | /// - text: The `String` to type. 394 | /// - pid: The Process ID (PID) of the application to traverse. 395 | /// - onlyVisibleElements: If true, traversals only collect elements with valid position/size. Defaults to false. 396 | /// - delayAfterActionNano: Nanoseconds to wait after the action before the second traversal. Default 100ms. 397 | /// - Returns: An `ActionDiffResult` containing traversals before/after typing and the diff. 398 | /// - Throws: `MacosUseSDKError` if any step fails. 399 | @MainActor 400 | public static func writeTextWithDiff( 401 | text: String, 402 | pid: Int32, 403 | onlyVisibleElements: Bool = false, 404 | delayAfterActionNano: UInt64 = 100_000_000 // 100 ms default 405 | ) async throws -> ActionDiffResult { 406 | fputs("info: starting combined action 'writeTextWithDiff' (text: \"\(text)\") for PID \(pid)\n", stderr) 407 | 408 | // Step 1: Traverse Before Action 409 | fputs("info: calling traverseAccessibilityTree (before action)...\n", stderr) 410 | let beforeTraversal = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements) 411 | fputs("info: traversal (before action) completed.\n", stderr) 412 | 413 | // Step 2: Perform the Text Writing 414 | fputs("info: calling writeText...\n", stderr) 415 | try MacosUseSDK.writeText(text) 416 | fputs("info: writeText completed successfully.\n", stderr) 417 | 418 | // Step 3: Wait for UI to Update 419 | fputs("info: waiting \(Double(delayAfterActionNano) / 1_000_000_000.0) seconds after action...\n", stderr) 420 | try await Task.sleep(nanoseconds: delayAfterActionNano) 421 | 422 | // Step 4: Traverse After Action 423 | fputs("info: calling traverseAccessibilityTree (after action)...\n", stderr) 424 | let afterTraversal = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements) 425 | fputs("info: traversal (after action) completed.\n", stderr) 426 | 427 | // Step 5: Calculate Diff 428 | fputs("info: calculating traversal diff...\n", stderr) 429 | let diff = calculateDiff(beforeElements: beforeTraversal.elements, afterElements: afterTraversal.elements) 430 | fputs("info: diff calculation completed.\n", stderr) 431 | 432 | // Step 6: Prepare and Return Result 433 | let result = ActionDiffResult( 434 | afterAction: afterTraversal, 435 | diff: diff 436 | ) 437 | fputs("info: combined action 'writeTextWithDiff' finished.\n", stderr) 438 | return result 439 | } 440 | 441 | // Add similar '...WithDiff' functions for doubleClick, rightClick, etc. as needed 442 | 443 | 444 | // --- NEW: Combined Actions with Action Visualization AND Traversal Highlighting --- 445 | 446 | /// Performs a left click with visual feedback, bracketed by traversals (before action, after action), 447 | /// highlights the elements from the second traversal, and returns the diff. 448 | /// 449 | /// - Parameters: 450 | /// - point: The `CGPoint` where the click should occur. 451 | /// - pid: The Process ID (PID) of the application. 452 | /// - onlyVisibleElements: If true, traversals only collect elements with valid position/size. Default false. 453 | /// - actionHighlightDuration: Duration (seconds) for the click's visual feedback pulse. Default 0.5s. 454 | /// - traversalHighlightDuration: Duration (seconds) for highlighting elements found in the second traversal. Default 3.0s. 455 | /// - delayAfterActionNano: Nanoseconds to wait after the click before the second traversal. Default 100ms. 456 | /// - Returns: An `ActionDiffResult` containing the second traversal's data and the diff. 457 | /// - Throws: `MacosUseSDKError` if any step fails. 458 | @MainActor 459 | public static func clickWithActionAndTraversalHighlight( 460 | point: CGPoint, 461 | pid: Int32, 462 | onlyVisibleElements: Bool = false, 463 | actionHighlightDuration: Double = 0.5, // Duration for the click pulse 464 | traversalHighlightDuration: Double = 3.0, // Duration for highlighting elements 465 | delayAfterActionNano: UInt64 = 100_000_000 // 100 ms default 466 | ) async throws -> ActionDiffResult { 467 | fputs("info: starting combined action 'clickWithActionAndTraversalHighlight' at (\(point.x), \(point.y)) for PID \(pid)\n", stderr) 468 | 469 | // Step 1: Traverse Before Action 470 | fputs("info: calling traverseAccessibilityTree (before action)...\n", stderr) 471 | let beforeTraversal = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements) 472 | fputs("info: traversal (before action) completed.\n", stderr) 473 | 474 | // Step 2a: Perform the Click (Input Simulation Only) 475 | fputs("info: calling clickMouse...\n", stderr) 476 | try MacosUseSDK.clickMouse(at: point) 477 | fputs("info: clickMouse completed successfully.\n", stderr) 478 | 479 | // Step 2b: Dispatch Click Visualization 480 | fputs("info: dispatching showVisualFeedback for click (duration: \(actionHighlightDuration)s)...\n", stderr) 481 | // Use Task to ensure it runs on MainActor, respecting showVisualFeedback's requirement 482 | Task { @MainActor in 483 | MacosUseSDK.showVisualFeedback(at: point, type: .circle, duration: actionHighlightDuration) 484 | } 485 | fputs("info: showVisualFeedback for click dispatched.\n", stderr) 486 | 487 | 488 | // Step 3: Wait for UI to Update (after action, before second traversal) 489 | fputs("info: waiting \(Double(delayAfterActionNano) / 1_000_000_000.0) seconds after action...\n", stderr) 490 | try await Task.sleep(nanoseconds: delayAfterActionNano) 491 | 492 | // Step 4: Traverse After Action (Standard Traversal) 493 | fputs("info: calling traverseAccessibilityTree (after action)...\n", stderr) 494 | let afterTraversal = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements) 495 | fputs("info: traversal (after action) completed.\n", stderr) 496 | 497 | // Step 5: Calculate Diff using data from the two traversals 498 | fputs("info: calculating traversal diff...\n", stderr) 499 | let diff = calculateDiff(beforeElements: beforeTraversal.elements, afterElements: afterTraversal.elements) 500 | fputs("info: diff calculation completed.\n", stderr) 501 | 502 | // Step 6: Dispatch Highlighting of the "After" Elements 503 | fputs("info: calling drawHighlightBoxes (duration: \(traversalHighlightDuration)s) for afterTraversal elements...\n", stderr) 504 | // This call returns immediately after dispatching the UI work. 505 | // It uses the @MainActor function drawHighlightBoxes. 506 | drawHighlightBoxes(for: afterTraversal.elements, duration: traversalHighlightDuration) 507 | fputs("info: drawHighlightBoxes dispatched highlight drawing.\n", stderr) 508 | 509 | // Step 7: Prepare and Return Result (using data from the *second* traversal) 510 | let result = ActionDiffResult( 511 | afterAction: afterTraversal, // Contains data from the second traversal 512 | diff: diff 513 | ) 514 | fputs("info: combined action 'clickWithActionAndTraversalHighlight' finished returning result.\n", stderr) 515 | // IMPORTANT: Highlighting cleanup happens asynchronously later. 516 | return result 517 | } 518 | 519 | 520 | /// Presses a key with visual feedback (caption), bracketed by traversals (before action, after action), 521 | /// highlights the elements from the second traversal, and returns the diff. 522 | /// 523 | /// - Parameters: 524 | /// - keyCode: The `CGKeyCode` of the key to press. 525 | /// - flags: The modifier flags (`CGEventFlags`). 526 | /// - pid: The Process ID (PID) of the application. 527 | /// - onlyVisibleElements: If true, traversals only collect elements with valid position/size. Default false. 528 | /// - actionHighlightDuration: Duration (seconds) for the key press visual feedback caption. Default 0.8s. 529 | /// - traversalHighlightDuration: Duration (seconds) for highlighting elements found in the second traversal. Default 3.0s. 530 | /// - delayAfterActionNano: Nanoseconds to wait after the key press before the second traversal. Default 100ms. 531 | /// - Returns: An `ActionDiffResult` containing the second traversal's data and the diff. 532 | /// - Throws: `MacosUseSDKError` if any step fails. 533 | @MainActor 534 | public static func pressKeyWithActionAndTraversalHighlight( 535 | keyCode: CGKeyCode, 536 | flags: CGEventFlags = [], 537 | pid: Int32, 538 | onlyVisibleElements: Bool = false, 539 | actionHighlightDuration: Double = 0.8, // Duration for visualization caption 540 | traversalHighlightDuration: Double = 3.0, // Duration for highlighting elements 541 | delayAfterActionNano: UInt64 = 100_000_000 // 100 ms default 542 | ) async throws -> ActionDiffResult { 543 | fputs("info: starting combined action 'pressKeyWithActionAndTraversalHighlight' (key: \(keyCode), flags: \(flags.rawValue)) for PID \(pid)\n", stderr) 544 | 545 | // Step 1: Traverse Before Action 546 | fputs("info: calling traverseAccessibilityTree (before action)...\n", stderr) 547 | let beforeTraversal = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements) 548 | fputs("info: traversal (before action) completed.\n", stderr) 549 | 550 | // Step 2a: Perform the Key Press (Input Simulation Only) 551 | fputs("info: calling pressKey (key: \(keyCode), flags: \(flags.rawValue))...\n", stderr) 552 | try MacosUseSDK.pressKey(keyCode: keyCode, flags: flags) 553 | fputs("info: pressKey completed successfully.\n", stderr) 554 | 555 | // Step 2b: Dispatch Key Press Visualization (Caption) 556 | let captionText = "[KEY PRESS]" 557 | let captionSize = CGSize(width: 250, height: 80) // Keep caption size definition here or centralize 558 | fputs("info: dispatching showVisualFeedback for key press (duration: \(actionHighlightDuration)s)...\n", stderr) 559 | Task { @MainActor in 560 | // Use the internal top-level function directly 561 | if let screenCenter = getMainScreenCenter() { 562 | MacosUseSDK.showVisualFeedback( 563 | at: screenCenter, 564 | type: .caption(text: captionText), 565 | size: captionSize, 566 | duration: actionHighlightDuration 567 | ) 568 | } else { 569 | fputs("warning: [\(#function)] could not get screen center for key press caption.\n", stderr) 570 | } 571 | } 572 | fputs("info: showVisualFeedback for key press dispatched.\n", stderr) 573 | 574 | 575 | // Step 3: Wait for UI to Update 576 | fputs("info: waiting \(Double(delayAfterActionNano) / 1_000_000_000.0) seconds after action...\n", stderr) 577 | try await Task.sleep(nanoseconds: delayAfterActionNano) 578 | 579 | // Step 4: Traverse After Action 580 | fputs("info: calling traverseAccessibilityTree (after action)...\n", stderr) 581 | let afterTraversal = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements) 582 | fputs("info: traversal (after action) completed.\n", stderr) 583 | 584 | // Step 5: Calculate Diff 585 | fputs("info: calculating traversal diff...\n", stderr) 586 | let diff = calculateDiff(beforeElements: beforeTraversal.elements, afterElements: afterTraversal.elements) 587 | fputs("info: diff calculation completed.\n", stderr) 588 | 589 | // Step 6: Dispatch Highlighting of the "After" Elements 590 | fputs("info: calling drawHighlightBoxes (duration: \(traversalHighlightDuration)s) for afterTraversal elements...\n", stderr) 591 | drawHighlightBoxes(for: afterTraversal.elements, duration: traversalHighlightDuration) 592 | fputs("info: drawHighlightBoxes dispatched highlight drawing.\n", stderr) 593 | 594 | 595 | // Step 7: Prepare and Return Result 596 | let result = ActionDiffResult( 597 | afterAction: afterTraversal, 598 | diff: diff 599 | ) 600 | fputs("info: combined action 'pressKeyWithActionAndTraversalHighlight' finished returning result.\n", stderr) 601 | // IMPORTANT: Highlighting cleanup happens asynchronously later. 602 | return result 603 | } 604 | 605 | /// Types text with visual feedback (caption), bracketed by traversals (before action, after action), 606 | /// highlights the elements from the second traversal, and returns the diff. 607 | /// 608 | /// - Parameters: 609 | /// - text: The `String` to type. 610 | /// - pid: The Process ID (PID) of the application. 611 | /// - onlyVisibleElements: If true, traversals only collect elements with valid position/size. Default false. 612 | /// - actionHighlightDuration: Duration (seconds) for the text input visual feedback caption. Default calculated or 1.0s. 613 | /// - traversalHighlightDuration: Duration (seconds) for highlighting elements found in the second traversal. Default 3.0s. 614 | /// - delayAfterActionNano: Nanoseconds to wait after typing before the second traversal. Default 100ms. 615 | /// - Returns: An `ActionDiffResult` containing the second traversal's data and the diff. 616 | /// - Throws: `MacosUseSDKError` if any step fails. 617 | @MainActor 618 | public static func writeTextWithActionAndTraversalHighlight( 619 | text: String, 620 | pid: Int32, 621 | onlyVisibleElements: Bool = false, 622 | actionHighlightDuration: Double? = nil, // Duration for visualization caption (optional, calculated if nil) 623 | traversalHighlightDuration: Double = 3.0, // Duration for highlighting elements 624 | delayAfterActionNano: UInt64 = 100_000_000 // 100 ms default 625 | ) async throws -> ActionDiffResult { 626 | fputs("info: starting combined action 'writeTextWithActionAndTraversalHighlight' (text: \"\(text)\") for PID \(pid)\n", stderr) 627 | 628 | // Step 1: Traverse Before Action 629 | fputs("info: calling traverseAccessibilityTree (before action)...\n", stderr) 630 | let beforeTraversal = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements) 631 | fputs("info: traversal (before action) completed.\n", stderr) 632 | 633 | // Step 2a: Perform the Text Writing (Input Simulation Only) 634 | fputs("info: calling writeText (\"\(text)\")...\n", stderr) 635 | try MacosUseSDK.writeText(text) 636 | fputs("info: writeText completed successfully.\n", stderr) 637 | 638 | // Step 2b: Dispatch Text Writing Visualization (Caption) 639 | let defaultDuration = 1.0 640 | let calculatedDuration = max(defaultDuration, 0.5 + Double(text.count) * 0.05) 641 | let finalDuration = actionHighlightDuration ?? calculatedDuration // Use provided or calculated duration 642 | let captionSize = CGSize(width: 450, height: 100) // Keep caption size definition here or centralize 643 | fputs("info: dispatching showVisualFeedback for write text (duration: \(finalDuration)s)...\n", stderr) 644 | Task { @MainActor in 645 | // Use the internal top-level function directly 646 | if let screenCenter = getMainScreenCenter() { 647 | MacosUseSDK.showVisualFeedback( 648 | at: screenCenter, 649 | type: .caption(text: text), // Show the actual typed text 650 | size: captionSize, 651 | duration: finalDuration 652 | ) 653 | } else { 654 | fputs("warning: [\(#function)] could not get screen center for write text caption.\n", stderr) 655 | } 656 | } 657 | fputs("info: showVisualFeedback for write text dispatched.\n", stderr) 658 | 659 | 660 | // Step 3: Wait for UI to Update 661 | fputs("info: waiting \(Double(delayAfterActionNano) / 1_000_000_000.0) seconds after action...\n", stderr) 662 | try await Task.sleep(nanoseconds: delayAfterActionNano) 663 | 664 | // Step 4: Traverse After Action 665 | fputs("info: calling traverseAccessibilityTree (after action)...\n", stderr) 666 | let afterTraversal = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements) 667 | fputs("info: traversal (after action) completed.\n", stderr) 668 | 669 | // Step 5: Calculate Diff 670 | fputs("info: calculating traversal diff...\n", stderr) 671 | let diff = calculateDiff(beforeElements: beforeTraversal.elements, afterElements: afterTraversal.elements) 672 | fputs("info: diff calculation completed.\n", stderr) 673 | 674 | // Step 6: Dispatch Highlighting of the "After" Elements 675 | fputs("info: calling drawHighlightBoxes (duration: \(traversalHighlightDuration)s) for afterTraversal elements...\n", stderr) 676 | drawHighlightBoxes(for: afterTraversal.elements, duration: traversalHighlightDuration) 677 | fputs("info: drawHighlightBoxes dispatched highlight drawing.\n", stderr) 678 | 679 | // Step 7: Prepare and Return Result 680 | let result = ActionDiffResult( 681 | afterAction: afterTraversal, 682 | diff: diff 683 | ) 684 | fputs("info: combined action 'writeTextWithActionAndTraversalHighlight' finished returning result.\n", stderr) 685 | // IMPORTANT: Highlighting cleanup happens asynchronously later. 686 | return result 687 | } 688 | 689 | } 690 | -------------------------------------------------------------------------------- /Sources/MacosUseSDK/DrawVisuals.swift: -------------------------------------------------------------------------------- 1 | // REMOVED: #!/usr/bin/env swift 2 | // REMOVED: import Cocoa 3 | import AppKit 4 | import Foundation 5 | 6 | // Define types of visual feedback 7 | public enum FeedbackType { 8 | case box(text: String) // Existing box with optional text 9 | case circle // New simple circle 10 | case caption(text: String) // New type for large screen-center text 11 | } 12 | 13 | // Define a custom view that draws the rectangle and text with truncation 14 | internal class OverlayView: NSView { 15 | var feedbackType: FeedbackType = .box(text: "") // Property to hold the type and data 16 | 17 | // Constants for drawing 18 | let padding: CGFloat = 10 // Increased padding for caption 19 | let frameLineWidth: CGFloat = 2 20 | let circleRadius: CGFloat = 15 // Radius for the circle feedback 21 | let captionFontSize: CGFloat = 36 // Font size for caption 22 | let captionBackgroundColor = NSColor.black.withAlphaComponent(0.6) // Semi-transparent black background 23 | let captionTextColor = NSColor.white 24 | 25 | override func draw(_ dirtyRect: NSRect) { 26 | super.draw(dirtyRect) 27 | 28 | switch feedbackType { 29 | case .box(let displayText): 30 | drawBox(with: displayText) 31 | case .circle: 32 | drawCircle() 33 | case .caption(let captionText): 34 | drawCaption(with: captionText) // Call the new drawing method 35 | } 36 | } 37 | 38 | private func drawCircle() { 39 | // fputs("debug: OverlayView drawing circle\n", stderr) 40 | // fputs("debug: Setting circle fill color to green.\n", stderr) // Updated log message 41 | NSColor.green.setFill() // Set fill color instead of stroke 42 | 43 | let center = NSPoint(x: bounds.midX, y: bounds.midY) 44 | // Ensure the circle fits within the bounds if bounds are smaller than diameter 45 | let effectiveRadius = min(circleRadius, bounds.width / 2.0, bounds.height / 2.0) 46 | guard effectiveRadius > 0 else { return } // Don't draw if too small 47 | 48 | let circleRect = NSRect(x: center.x - effectiveRadius, y: center.y - effectiveRadius, 49 | width: effectiveRadius * 2, height: effectiveRadius * 2) 50 | let path = NSBezierPath(ovalIn: circleRect) 51 | // path.lineWidth = frameLineWidth // No longer needed for fill 52 | path.fill() // Fill the path instead of stroking it 53 | } 54 | 55 | private func drawBox(with displayText: String) { 56 | // --- Frame Drawing --- 57 | NSColor.red.setStroke() 58 | let frameInset = frameLineWidth / 2.0 59 | let frameRect = bounds.insetBy(dx: frameInset, dy: frameInset) 60 | let path = NSBezierPath(rect: frameRect) 61 | path.lineWidth = frameLineWidth 62 | path.stroke() 63 | // fputs("debug: OverlayView drew frame at \(frameRect)\n", stderr) 64 | 65 | // --- Text Drawing with Truncation --- 66 | if !displayText.isEmpty { 67 | // Define text attributes 68 | let textColor = NSColor.red 69 | // Slightly smaller font for potentially many overlays 70 | let textFont = NSFont.systemFont(ofSize: 10.0) // NSFont.smallSystemFontSize) 71 | let textAttributes: [NSAttributedString.Key: Any] = [ 72 | .font: textFont, 73 | .foregroundColor: textColor 74 | ] 75 | 76 | // Calculate available width for text (bounds - frame lines - padding on both sides) 77 | let availableWidth = max(0, bounds.width - (frameLineWidth * 2.0) - (padding * 2.0)) 78 | var stringToDraw = displayText 79 | var textSize = stringToDraw.size(withAttributes: textAttributes) 80 | 81 | // Check if truncation is needed 82 | if textSize.width > availableWidth && availableWidth > 0 { 83 | // fputs("debug: OverlayView truncating text '\(stringToDraw)' (\(textSize.width)) > available \(availableWidth)\n", stderr) 84 | let ellipsis = "…" // Use ellipsis character 85 | let ellipsisSize = ellipsis.size(withAttributes: textAttributes) 86 | 87 | // Keep removing characters until text + ellipsis fits 88 | while !stringToDraw.isEmpty && (stringToDraw.size(withAttributes: textAttributes).width + ellipsisSize.width > availableWidth) { 89 | stringToDraw.removeLast() 90 | } 91 | stringToDraw += ellipsis 92 | textSize = stringToDraw.size(withAttributes: textAttributes) // Recalculate size 93 | // fputs("debug: OverlayView truncated to '\(stringToDraw)' (\(textSize.width))\n", stderr) 94 | } 95 | 96 | // Ensure text doesn't exceed available height (though less likely for small font) 97 | let availableHeight = max(0, bounds.height - (frameLineWidth * 2.0) - (padding * 2.0)) 98 | if textSize.height > availableHeight { 99 | // fputs("debug: OverlayView text height (\(textSize.height)) > available \(availableHeight)\n", stderr) 100 | // Simple vertical clipping will occur naturally if too tall 101 | } 102 | 103 | // Calculate position to center the (potentially truncated) text 104 | // X: Add frame line width + padding 105 | // Y: Center vertically within the available height area 106 | let textX = frameLineWidth + padding 107 | let textY = frameLineWidth + padding + (availableHeight - textSize.height) // Top align 108 | let textPoint = NSPoint(x: textX, y: textY) 109 | 110 | // Draw the text string 111 | // fputs("debug: OverlayView drawing text '\(stringToDraw)' at \(textPoint)\n", stderr) 112 | (stringToDraw as NSString).draw(at: textPoint, withAttributes: textAttributes) 113 | } else { 114 | // fputs("debug: OverlayView no text to draw.\n", stderr) 115 | } 116 | } 117 | 118 | // New method to draw the caption 119 | private func drawCaption(with text: String) { 120 | fputs("debug: OverlayView drawing caption: '\(text)'\n", stderr) 121 | 122 | // Draw background 123 | captionBackgroundColor.setFill() 124 | let backgroundRect = bounds.insetBy(dx: frameLineWidth / 2.0, dy: frameLineWidth / 2.0) // Adjust for potential border line width if we add one later 125 | let backgroundPath = NSBezierPath(roundedRect: backgroundRect, xRadius: 8, yRadius: 8) // Rounded corners 126 | backgroundPath.fill() 127 | 128 | // --- Text Drawing --- 129 | if !text.isEmpty { 130 | // Define text attributes 131 | let textFont = NSFont.systemFont(ofSize: captionFontSize, weight: .medium) 132 | let paragraphStyle = NSMutableParagraphStyle() 133 | paragraphStyle.alignment = .center // Center align text 134 | 135 | let textAttributes: [NSAttributedString.Key: Any] = [ 136 | .font: textFont, 137 | .foregroundColor: captionTextColor, 138 | .paragraphStyle: paragraphStyle 139 | ] 140 | 141 | // Calculate available area for text (bounds - padding) 142 | let availableRect = bounds.insetBy(dx: padding, dy: padding) 143 | let stringToDraw = text 144 | let textSize = stringToDraw.size(withAttributes: textAttributes) 145 | 146 | // Basic truncation if text wider than available space (though less likely for centered captions) 147 | if textSize.width > availableRect.width && availableRect.width > 0 { 148 | fputs("warning: Caption text '\(stringToDraw)' (\(textSize.width)) wider than available \(availableRect.width), may clip.\n", stderr) 149 | // Simple clipping will occur, could implement more complex truncation if needed 150 | } 151 | if textSize.height > availableRect.height { 152 | fputs("warning: Caption text '\(stringToDraw)' (\(textSize.height)) taller than available \(availableRect.height), may clip.\n", stderr) 153 | } 154 | 155 | // Calculate position to center the text vertically and horizontally within the available rect 156 | let textX = availableRect.origin.x 157 | let textY = availableRect.origin.y + (availableRect.height - textSize.height) / 2.0 // Center vertically 158 | let textRect = NSRect(x: textX, y: textY, width: availableRect.width, height: textSize.height) 159 | 160 | 161 | // Draw the text string centered 162 | fputs("debug: OverlayView drawing caption text '\(stringToDraw)' in rect \(textRect)\n", stderr) 163 | (stringToDraw as NSString).draw(in: textRect, withAttributes: textAttributes) 164 | } else { 165 | fputs("debug: OverlayView no caption text to draw.\n", stderr) 166 | } 167 | } 168 | 169 | // Update initializer to accept FeedbackType 170 | init(frame frameRect: NSRect, type: FeedbackType) { 171 | self.feedbackType = type 172 | super.init(frame: frameRect) 173 | // fputs("debug: OverlayView initialized with frame \(frameRect) type \(type)\n", stderr) 174 | } 175 | 176 | required init?(coder: NSCoder) { 177 | fatalError("init(coder:) has not been implemented") 178 | } 179 | } 180 | 181 | // --- REMOVED AppDelegate Class Definition --- 182 | 183 | // --- REMOVED Top-Level Application Entry Point Code (app creation, delegate, argument parsing, app.run) --- 184 | 185 | 186 | // --- Internal Window Creation Helper --- 187 | // Creates a configured, borderless overlay window but does not show it. 188 | // ADDED: @MainActor annotation to ensure UI operations run on the main thread 189 | @MainActor 190 | internal func createOverlayWindow(frame: NSRect, type: FeedbackType) -> NSWindow { 191 | fputs("debug: Creating overlay window with frame: \(frame), type: \(type)\n", stderr) // Log includes type now 192 | // Now safe to call NSWindow initializer and set properties from here 193 | let window = NSWindow( 194 | contentRect: frame, 195 | styleMask: [.borderless], 196 | backing: .buffered, 197 | defer: false 198 | ) 199 | 200 | // Configuration for transparent, floating overlay 201 | window.isOpaque = false 202 | // Make background clear ONLY if not a caption (caption view draws its own background) 203 | if case .caption = type { 204 | window.backgroundColor = .clear // View draws background 205 | } else { 206 | window.backgroundColor = .clear // Original behavior 207 | } 208 | window.hasShadow = false // No window shadow 209 | window.level = .floating // Keep above normal windows 210 | window.collectionBehavior = [.canJoinAllSpaces, .stationary, .ignoresCycle] // Visible on all spaces 211 | window.isMovableByWindowBackground = false // Prevent accidental dragging 212 | 213 | // Create and set the custom view 214 | let overlayFrame = window.contentView?.bounds ?? NSRect(origin: .zero, size: frame.size) 215 | let overlayView = OverlayView(frame: overlayFrame, type: type) 216 | window.contentView = overlayView 217 | // fputs("debug: Set OverlayView with frame \(overlayFrame) for window.\n", stderr) 218 | 219 | return window 220 | } 221 | 222 | // --- Helper Function to Get Main Screen Center (Moved from HighlightInput.swift) --- 223 | /// Gets the center point of the main screen. 224 | /// - Returns: CGPoint of the center in screen coordinates, or nil if main screen not found. 225 | public func getMainScreenCenter() -> CGPoint? { 226 | guard let mainScreen = NSScreen.main else { 227 | fputs("error: could not get main screen.\n", stderr) 228 | return nil 229 | } 230 | let screenRect = mainScreen.frame 231 | let centerX = screenRect.midX 232 | // AppKit coordinates (bottom-left origin) are used by NSWindow positioning. 233 | // screenRect.midY correctly gives the vertical center in this coordinate system. 234 | let centerY = screenRect.midY 235 | let centerPoint = CGPoint(x: centerX, y: centerY) 236 | // fputs("debug: calculated main screen center: \(centerPoint) from rect \(screenRect)\n", stderr) 237 | return centerPoint 238 | } 239 | 240 | // --- Public API Function for Simple Visual Feedback --- 241 | /// Displays a temporary visual indicator (e.g., a circle, a caption) at specified screen coordinates. 242 | /// This version includes a pulsing/fading animation for circles. Captions simply appear and disappear. 243 | /// - Parameters: 244 | /// - point: The center point (`CGPoint`) in screen coordinates for the visual feedback. For captions, this is usually the screen center. 245 | /// - type: The type of feedback to display (`FeedbackType`). 246 | /// - size: The desired size (width/height) of the overlay window. Defaults work for circle, consider larger for captions. **NOTE: For `.circle`, this parameter is now ignored and a size is calculated based on animation.** 247 | /// - duration: How long the feedback should remain visible, in seconds. 248 | @MainActor // Ensure this runs on the main thread 249 | public func showVisualFeedback(at point: CGPoint, type: FeedbackType, size: CGSize = CGSize(width: 30, height: 30), duration: Double = 0.5) { 250 | // Requires main thread for UI work 251 | guard Thread.isMainThread else { 252 | fputs("warning: showVisualFeedback called off main thread, dispatching. Point: \(point), Type: \(type)\n", stderr) 253 | DispatchQueue.main.async { 254 | showVisualFeedback(at: point, type: type, size: size, duration: duration) 255 | } 256 | return 257 | } 258 | 259 | // --- Calculate Required Size --- 260 | var effectiveSize: CGSize 261 | let maxCircleScale: CGFloat = 1.8 // The maximum scale factor from the animation 262 | let circleRadius: CGFloat = 15.0 // The base radius defined in OverlayView 263 | 264 | if case .circle = type { 265 | // Calculate the needed diameter at max scale and add more padding 266 | let maxDiameter = circleRadius * 2.0 * maxCircleScale 267 | // Increased padding from 4.0 to 10.0 268 | let paddedSize = ceil(maxDiameter + 100.0) // Add padding (e.g., 5 points on each side) 269 | effectiveSize = CGSize(width: paddedSize, height: paddedSize) 270 | fputs("info: showVisualFeedback using calculated size \(effectiveSize) for .circle type (ignores input size \(size)).\n", stderr) 271 | } else { 272 | // Use provided or default size for other types (box, caption) 273 | effectiveSize = size 274 | fputs("info: showVisualFeedback called for point \(point), type \(type), size \(effectiveSize), duration \(duration)s.\n", stderr) 275 | } 276 | 277 | 278 | // --- Coordinate Conversion (Using AppKit bottom-left origin) --- 279 | // Screen height is needed to convert the Y coordinate. 280 | let screenHeight = NSScreen.main?.frame.height ?? 0 281 | if screenHeight == 0 { 282 | fputs("warning: Could not get main screen height, coordinates might be incorrect.\n", stderr) 283 | } 284 | // Calculate origin based on the center point provided and the *effective* size 285 | let originX = point.x - (effectiveSize.width / 2.0) 286 | let originY = screenHeight - point.y - (effectiveSize.height / 2.0) // Convert Y from top-left to bottom-left 287 | let frame = NSRect(x: originX, y: originY, width: effectiveSize.width, height: effectiveSize.height) 288 | fputs("debug: Creating feedback window with AppKit frame: \(frame)\n", stderr) 289 | 290 | // --- Create Window --- 291 | // Pass the calculated effectiveSize and frame to createOverlayWindow 292 | let window = createOverlayWindow(frame: frame, type: type) 293 | 294 | // --- Make Window Visible --- 295 | window.makeKeyAndOrderFront(nil) 296 | 297 | // --- Apply Animation (Only for Circle or Caption Type) --- 298 | if let overlayView = window.contentView as? OverlayView { 299 | overlayView.wantsLayer = true // Ensure the view has a layer for animation 300 | 301 | if case .circle = type { 302 | fputs("debug: Applying pulse/fade animation to circle overlay layer.\n", stderr) 303 | // --- Circle Pulse/Fade Animation --- 304 | let scaleAnimation = CABasicAnimation(keyPath: "transform.scale") 305 | scaleAnimation.fromValue = 0.7 306 | scaleAnimation.toValue = 1.8 307 | scaleAnimation.duration = duration 308 | 309 | let opacityAnimation = CABasicAnimation(keyPath: "opacity") 310 | opacityAnimation.fromValue = 0.8 311 | opacityAnimation.toValue = 0.0 312 | opacityAnimation.duration = duration 313 | 314 | let animationGroup = CAAnimationGroup() 315 | animationGroup.animations = [scaleAnimation, opacityAnimation] 316 | animationGroup.duration = duration 317 | animationGroup.timingFunction = CAMediaTimingFunction(name: .easeOut) 318 | animationGroup.fillMode = .forwards 319 | animationGroup.isRemovedOnCompletion = false 320 | overlayView.layer?.add(animationGroup, forKey: "pulseFadeEffect") 321 | 322 | } else if case .caption = type { 323 | fputs("debug: Applying entrance and fade-out animations to caption overlay layer.\n", stderr) 324 | 325 | // --- Caption Entrance Animation (Scale Up & Fade In) --- 326 | let entranceDuration = 0.2 // Duration for the entrance effect 327 | let scaleInAnimation = CABasicAnimation(keyPath: "transform.scale") 328 | scaleInAnimation.fromValue = 0.7 // Start slightly smaller 329 | scaleInAnimation.toValue = 1.0 // Scale to normal size 330 | scaleInAnimation.duration = entranceDuration 331 | 332 | let fadeInAnimation = CABasicAnimation(keyPath: "opacity") 333 | fadeInAnimation.fromValue = 0.0 // Start fully transparent 334 | fadeInAnimation.toValue = 1.0 // Fade to fully opaque 335 | fadeInAnimation.duration = entranceDuration 336 | 337 | let entranceGroup = CAAnimationGroup() 338 | entranceGroup.animations = [scaleInAnimation, fadeInAnimation] 339 | entranceGroup.duration = entranceDuration 340 | entranceGroup.timingFunction = CAMediaTimingFunction(name: .easeOut) 341 | // `fillMode = .backwards` ensures the initial state (small, transparent) is applied *before* the animation starts 342 | entranceGroup.fillMode = .backwards 343 | // `isRemovedOnCompletion = true` (default) is fine here, we want the layer's normal state after entrance. 344 | overlayView.layer?.add(entranceGroup, forKey: "captionEntranceEffect") 345 | 346 | 347 | // --- Caption Fade-Out Animation (Starts near the end) --- 348 | let fadeOutDuration = 0.3 // Duration of the fade-out 349 | // Ensure fade-out doesn't start before entrance completes if total duration is very short 350 | let fadeOutStartTime = max(entranceDuration, duration - fadeOutDuration) 351 | 352 | let fadeOutAnimation = CABasicAnimation(keyPath: "opacity") 353 | fadeOutAnimation.fromValue = 1.0 // Start opaque 354 | fadeOutAnimation.toValue = 0.0 // Fade to transparent 355 | fadeOutAnimation.duration = fadeOutDuration 356 | // Use CACurrentMediaTime() + delay to schedule the start 357 | fadeOutAnimation.beginTime = CACurrentMediaTime() + fadeOutStartTime 358 | fadeOutAnimation.fillMode = .forwards // Keep final state (transparent) 359 | fadeOutAnimation.isRemovedOnCompletion = false // Don't remove until window closes 360 | overlayView.layer?.add(fadeOutAnimation, forKey: "captionFadeOut") 361 | 362 | } else { 363 | // Log if a type is added that doesn't have specific animation handling 364 | fputs("debug: Animation skipped (unhandled FeedbackType or view issue).\n", stderr) 365 | } 366 | } else { 367 | // Log if contentView isn't the expected OverlayView or is nil 368 | fputs("warning: Could not get OverlayView from window content for animation.\n", stderr) 369 | } 370 | 371 | fputs("debug: Visual feedback window displayed. It will remain until the tool exits.\n", stderr) 372 | } 373 | 374 | // --- NEW Public API Function for Drawing Highlight Boxes --- 375 | /// Draws temporary overlay windows (highlight boxes) around the specified accessibility elements. 376 | /// 377 | /// The overlays automatically disappear after the specified duration. 378 | /// This function *only* draws; it does not perform accessibility traversal. 379 | /// Call `traverseAccessibilityTree` first to get the `ElementData`. 380 | /// 381 | /// - Important: This function schedules UI work on the main dispatch queue. 382 | /// It should be called from a context where the main run loop is active. 383 | /// The function itself returns immediately; the overlays appear and disappear asynchronously. 384 | /// 385 | /// - Parameter elementsToHighlight: An array of `ElementData` representing the elements to highlight. 386 | /// Only elements with valid geometry (x, y, width > 0, height > 0) will be highlighted. 387 | /// - Parameter duration: The time in seconds for which the overlay windows should be visible. Defaults to 3.0 seconds. 388 | @MainActor // Ensure UI work happens on the main thread 389 | public func drawHighlightBoxes(for elementsToHighlightInput: [ElementData], duration: Double = 3.0) { 390 | fputs("info: drawHighlightBoxes called for \(elementsToHighlightInput.count) elements, duration \(duration)s.\n", stderr) 391 | 392 | // 1. Filter elements that have geometry needed for highlighting 393 | // (Moved filtering here from the old highlightVisibleElements) 394 | let elementsToHighlight = elementsToHighlightInput.filter { 395 | $0.x != nil && $0.y != nil && 396 | $0.width != nil && $0.width! > 0 && 397 | $0.height != nil && $0.height! > 0 398 | } 399 | 400 | // 2. Check if there's anything to highlight 401 | if elementsToHighlight.isEmpty { 402 | fputs("info: No elements with valid geometry provided to highlight.\n", stderr) 403 | return // Nothing to do 404 | } 405 | 406 | fputs("info: Filtered down to \(elementsToHighlight.count) elements with valid geometry to highlight.\n", stderr) 407 | 408 | // 3. Dispatch UI work to the main thread asynchronously 409 | DispatchQueue.main.async { // This block executes on the main actor 410 | var overlayWindows: [NSWindow] = [] 411 | 412 | fputs("info: [Main Thread] Creating \(elementsToHighlight.count) overlay windows...\n", stderr) 413 | 414 | let screenHeight = NSScreen.main?.frame.height ?? 0 415 | if screenHeight == 0 { 416 | fputs("warning: [Main Thread] Could not get main screen height, coordinates might be incorrect.\n", stderr) 417 | } else { 418 | fputs("debug: [Main Thread] Main screen height for coordinate conversion: \(screenHeight)\n", stderr) 419 | } 420 | 421 | for element in elementsToHighlight { 422 | let originalX = element.x! 423 | let originalY = element.y! 424 | let elementWidth = element.width! 425 | let elementHeight = element.height! 426 | let convertedY = screenHeight - originalY - elementHeight 427 | let frame = NSRect(x: originalX, y: convertedY, width: elementWidth, height: elementHeight) 428 | let textToShow = (element.text?.isEmpty ?? true) ? element.role : element.text! 429 | let feedbackType: FeedbackType = .box(text: textToShow) 430 | 431 | // Use the @MainActor function safely within this async block 432 | let window = createOverlayWindow(frame: frame, type: feedbackType) 433 | overlayWindows.append(window) 434 | window.makeKeyAndOrderFront(nil) 435 | } 436 | 437 | fputs("info: [Main Thread] Displayed \(overlayWindows.count) overlays. They will remain until the tool exits.\n", stderr) 438 | 439 | } // End of DispatchQueue.main.async block 440 | 441 | // 5. Return immediately after dispatching UI work 442 | fputs("info: drawHighlightBoxes finished synchronous part, dispatched UI updates.\n", stderr) 443 | // No return value needed 444 | } -------------------------------------------------------------------------------- /Sources/MacosUseSDK/HighlightInput.swift: -------------------------------------------------------------------------------- 1 | import Foundation 2 | import CoreGraphics 3 | import AppKit // For DispatchQueue, showVisualFeedback 4 | 5 | // --- Public Functions Combining Input Simulation and Visualization --- 6 | 7 | /// Simulates a left mouse click at the specified coordinates and shows visual feedback. 8 | /// - Parameters: 9 | /// - point: The `CGPoint` where the click should occur. 10 | /// - duration: How long the visual feedback should last (in seconds). Default is 0.5s. 11 | /// - Throws: `MacosUseSDKError` if simulation or visualization fails. 12 | public func clickMouseAndVisualize(at point: CGPoint, duration: Double = 0.5) throws { 13 | fputs("log: simulating left click AND visualize at: (\(point.x), \(point.y)), duration: \(duration)s\n", stderr) 14 | // Call the original input function 15 | try clickMouse(at: point) 16 | 17 | // Restore the correct async dispatch: 18 | DispatchQueue.main.async { 19 | Task { @MainActor in 20 | // Ensure FeedbackType is used if it's public/internal enum 21 | showVisualFeedback(at: point, type: FeedbackType.circle, duration: duration) 22 | } 23 | } 24 | fputs("log: left click simulation and visualization dispatched.\n", stderr) 25 | } 26 | 27 | /// Simulates a left mouse double click at the specified coordinates and shows visual feedback. 28 | /// - Parameters: 29 | /// - point: The `CGPoint` where the double click should occur. 30 | /// - duration: How long the visual feedback should last (in seconds). Default is 0.5s. 31 | /// - Throws: `MacosUseSDKError` if simulation or visualization fails. 32 | public func doubleClickMouseAndVisualize(at point: CGPoint, duration: Double = 0.5) throws { 33 | fputs("log: simulating double-click AND visualize at: (\(point.x), \(point.y)), duration: \(duration)s\n", stderr) 34 | // Call the original input function 35 | try doubleClickMouse(at: point) 36 | // Schedule visualization on the main thread 37 | DispatchQueue.main.async { 38 | Task { @MainActor in 39 | showVisualFeedback(at: point, type: FeedbackType.circle, duration: duration) 40 | } 41 | } 42 | fputs("log: double-click simulation and visualization dispatched.\n", stderr) 43 | } 44 | 45 | /// Simulates a right mouse click at the specified coordinates and shows visual feedback. 46 | /// - Parameters: 47 | /// - point: The `CGPoint` where the right click should occur. 48 | /// - duration: How long the visual feedback should last (in seconds). Default is 0.5s. 49 | /// - Throws: `MacosUseSDKError` if simulation or visualization fails. 50 | public func rightClickMouseAndVisualize(at point: CGPoint, duration: Double = 0.5) throws { 51 | fputs("log: simulating right-click AND visualize at: (\(point.x), \(point.y)), duration: \(duration)s\n", stderr) 52 | // Call the original input function 53 | try rightClickMouse(at: point) 54 | // Schedule visualization on the main thread 55 | DispatchQueue.main.async { 56 | Task { @MainActor in 57 | showVisualFeedback(at: point, type: FeedbackType.circle, duration: duration) 58 | } 59 | } 60 | fputs("log: right-click simulation and visualization dispatched.\n", stderr) 61 | } 62 | 63 | /// Moves the mouse cursor to the specified coordinates and shows brief visual feedback at the destination. 64 | /// - Parameters: 65 | /// - point: The `CGPoint` to move the cursor to. 66 | /// - duration: How long the visual feedback should last (in seconds). Default is 0.5s. 67 | /// - Throws: `MacosUseSDKError` if simulation or visualization fails. 68 | public func moveMouseAndVisualize(to point: CGPoint, duration: Double = 0.5) throws { 69 | fputs("log: moving mouse AND visualize to: (\(point.x), \(point.y)), duration: \(duration)s\n", stderr) 70 | // Call the original input function 71 | try moveMouse(to: point) 72 | // Schedule visualization on the main thread 73 | DispatchQueue.main.async { 74 | Task { @MainActor in 75 | showVisualFeedback(at: point, type: FeedbackType.circle, duration: duration) 76 | } 77 | } 78 | fputs("log: mouse move simulation and visualization dispatched.\n", stderr) 79 | } 80 | 81 | /// Simulates pressing and releasing a key with optional modifiers. Shows a caption at screen center. 82 | /// - Parameters: 83 | /// - keyCode: The `CGKeyCode` of the key to press. 84 | /// - flags: The modifier flags (`CGEventFlags`). 85 | /// - duration: How long the visual feedback should last (in seconds). Default is 0.8s. 86 | /// - Throws: `MacosUseSDKError` if simulation fails. 87 | public func pressKeyAndVisualize(keyCode: CGKeyCode, flags: CGEventFlags = [], duration: Double = 0.8) throws { 88 | // Define caption constants 89 | let captionText = "[KEY PRESS]" 90 | let captionSize = CGSize(width: 250, height: 80) // Size for the key press caption 91 | 92 | fputs("log: simulating key press (code: \(keyCode), flags: \(flags.rawValue)) AND visualizing caption '\(captionText)', duration: \(duration)s\n", stderr) 93 | // Call the original input function first 94 | try pressKey(keyCode: keyCode, flags: flags) 95 | 96 | // Always dispatch caption visualization to the main thread at screen center 97 | DispatchQueue.main.async { 98 | Task { @MainActor in 99 | // Get screen center for caption placement 100 | if let screenCenter = getMainScreenCenter() { 101 | fputs("log: [Main Thread] Displaying key press caption at screen center: \(screenCenter).\n", stderr) 102 | // Show the caption feedback 103 | showVisualFeedback( 104 | at: screenCenter, 105 | type: .caption(text: captionText), 106 | size: captionSize, 107 | duration: duration 108 | ) 109 | } else { 110 | fputs("warning: [Main Thread] could not get main screen center for key press caption visualization.\n", stderr) 111 | } 112 | } 113 | } 114 | fputs("log: key press simulation complete, caption visualization dispatched.\n", stderr) 115 | } 116 | 117 | /// Simulates typing a string of text. Shows a caption of the text at screen center. 118 | /// - Parameters: 119 | /// - text: The `String` to type. 120 | /// - duration: How long the visual feedback should last (in seconds). Default is calculated or 1.0s min. 121 | /// - Throws: `MacosUseSDKError` if simulation fails. 122 | public func writeTextAndVisualize(_ text: String, duration: Double? = nil) throws { 123 | // Define caption constants 124 | let defaultDuration = 1.0 // Minimum duration 125 | // Optional: Calculate duration based on text length, e.g., 0.5s + 0.05s per char 126 | let calculatedDuration = max(defaultDuration, 0.5 + Double(text.count) * 0.05) 127 | let finalDuration = duration ?? calculatedDuration 128 | let captionSize = CGSize(width: 450, height: 100) // Adjust size as needed, maybe make dynamic later 129 | 130 | fputs("log: simulating text writing AND visualizing caption: \"\(text)\", duration: \(finalDuration)s\n", stderr) 131 | // Call the original input function first 132 | try writeText(text) 133 | 134 | // Always dispatch caption visualization to the main thread at screen center 135 | DispatchQueue.main.async { 136 | Task { @MainActor in 137 | // Get screen center for caption placement 138 | if let screenCenter = getMainScreenCenter() { 139 | fputs("log: [Main Thread] Displaying text writing caption at screen center: \(screenCenter).\n", stderr) 140 | // Show the caption feedback with the typed text 141 | showVisualFeedback( 142 | at: screenCenter, 143 | type: .caption(text: text), // Pass the actual text here 144 | size: captionSize, 145 | duration: finalDuration 146 | ) 147 | } else { 148 | fputs("warning: [Main Thread] could not get main screen center for text writing caption visualization.\n", stderr) 149 | } 150 | } 151 | } 152 | fputs("log: text writing simulation complete, caption visualization dispatched.\n", stderr) 153 | } 154 | 155 | // --- Helper Function to Get Main Screen Center --- 156 | // REMOVED: Entire fileprivate getMainScreenCenter() function definition. 157 | // The internal version in DrawVisuals.swift will be used instead. 158 | -------------------------------------------------------------------------------- /Sources/MacosUseSDK/InputController.swift: -------------------------------------------------------------------------------- 1 | // #!/usr/bin/swift - Don't need this when it's part of a library 2 | 3 | import Foundation 4 | import CoreGraphics 5 | import AppKit // Needed for Process and potentially other things later 6 | 7 | // --- Add new Error Cases for Input Control --- 8 | public extension MacosUseSDKError { 9 | // Add specific error cases relevant to InputController 10 | static func inputInvalidArgument(_ message: String) -> MacosUseSDKError { 11 | .internalError("Input Argument Error: \(message)") // Reuse internalError or create specific types 12 | } 13 | static func inputSimulationFailed(_ message: String) -> MacosUseSDKError { 14 | .internalError("Input Simulation Failed: \(message)") 15 | } 16 | static func osascriptExecutionFailed(status: Int32, message: String = "") -> MacosUseSDKError { 17 | .internalError("osascript execution failed with status \(status). \(message)") 18 | } 19 | } 20 | 21 | 22 | // --- Constants for Key Codes --- 23 | // These match the constants used in the Rust macos.rs code for consistency 24 | public let KEY_RETURN: CGKeyCode = 36 25 | public let KEY_TAB: CGKeyCode = 48 26 | public let KEY_SPACE: CGKeyCode = 49 27 | public let KEY_DELETE: CGKeyCode = 51 // Matches 'delete' (backspace on many keyboards) 28 | public let KEY_ESCAPE: CGKeyCode = 53 29 | public let KEY_ARROW_LEFT: CGKeyCode = 123 30 | public let KEY_ARROW_RIGHT: CGKeyCode = 124 31 | public let KEY_ARROW_DOWN: CGKeyCode = 125 32 | public let KEY_ARROW_UP: CGKeyCode = 126 33 | // Add other key codes as needed (consider making them public if the tool needs direct access) 34 | 35 | // --- Helper Functions (Internal or Fileprivate) --- 36 | 37 | // Logs messages to stderr for debugging/status - keep internal or remove if tool handles logging 38 | // fileprivate func log(_ message: String) { // Make fileprivate or remove 39 | // fputs("log: \(message)\n", stderr) 40 | // } 41 | 42 | // Creates a CGEventSource or throws 43 | fileprivate func createEventSource() throws -> CGEventSource { 44 | guard let source = CGEventSource(stateID: .hidSystemState) else { 45 | throw MacosUseSDKError.inputSimulationFailed("failed to create event source") 46 | } 47 | return source 48 | } 49 | 50 | // Posts a CGEvent or throws 51 | fileprivate func postEvent(_ event: CGEvent?, actionDescription: String) throws { 52 | guard let event = event else { 53 | throw MacosUseSDKError.inputSimulationFailed("failed to create \(actionDescription) event") 54 | } 55 | event.post(tap: .cghidEventTap) 56 | // Add a small delay after posting, crucial for some applications 57 | usleep(15_000) // 15 milliseconds, slightly increased from 10ms 58 | } 59 | 60 | // --- Public Input Simulation Functions --- 61 | 62 | /// Simulates pressing and releasing a key with optional modifier flags. 63 | /// - Parameters: 64 | /// - keyCode: The `CGKeyCode` of the key to press. 65 | /// - flags: The modifier flags (`CGEventFlags`) to apply (e.g., `.maskCommand`, `.maskShift`). 66 | /// - Throws: `MacosUseSDKError` if the event source cannot be created or the event cannot be posted. 67 | public func pressKey(keyCode: CGKeyCode, flags: CGEventFlags = []) throws { 68 | fputs("log: simulating key press: (code: \(keyCode), flags: \(flags.rawValue))\n", stderr) // Log action 69 | let source = try createEventSource() 70 | 71 | let keyDown = CGEvent(keyboardEventSource: source, virtualKey: keyCode, keyDown: true) 72 | keyDown?.flags = flags // Apply modifier flags 73 | try postEvent(keyDown, actionDescription: "key down (code: \(keyCode), flags: \(flags.rawValue))") 74 | 75 | // Short delay between key down and key up is often necessary 76 | // usleep(10_000) // Delay moved into postEvent 77 | 78 | let keyUp = CGEvent(keyboardEventSource: source, virtualKey: keyCode, keyDown: false) 79 | keyUp?.flags = flags // Apply modifier flags for key up as well 80 | try postEvent(keyUp, actionDescription: "key up (code: \(keyCode), flags: \(flags.rawValue))") 81 | fputs("log: key press simulation complete.\n", stderr) 82 | } 83 | 84 | /// Simulates a left mouse click at the specified screen coordinates. 85 | /// Does not move the cursor first. Call `moveMouse` beforehand if needed. 86 | /// - Parameter point: The `CGPoint` where the click should occur. 87 | /// - Throws: `MacosUseSDKError` if the event source cannot be created or the event cannot be posted. 88 | public func clickMouse(at point: CGPoint) throws { 89 | fputs("log: simulating left click at: (\(point.x), \(point.y))\n", stderr) // Log action 90 | let source = try createEventSource() 91 | 92 | // Create and post mouse down event 93 | let mouseDown = CGEvent(mouseEventSource: source, mouseType: .leftMouseDown, mouseCursorPosition: point, mouseButton: .left) 94 | try postEvent(mouseDown, actionDescription: "mouse down at (\(point.x), \(point.y))") 95 | 96 | // Short delay - moved into postEvent 97 | // usleep(10_000) 98 | 99 | // Create and post mouse up event 100 | let mouseUp = CGEvent(mouseEventSource: source, mouseType: .leftMouseUp, mouseCursorPosition: point, mouseButton: .left) 101 | try postEvent(mouseUp, actionDescription: "mouse up at (\(point.x), \(point.y))") 102 | fputs("log: left click simulation complete.\n", stderr) 103 | } 104 | 105 | /// Simulates a left mouse double click at the specified screen coordinates. 106 | /// Does not move the cursor first. Call `moveMouse` beforehand if needed. 107 | /// - Parameter point: The `CGPoint` where the double click should occur. 108 | /// - Throws: `MacosUseSDKError` if the event source cannot be created or the event cannot be posted. 109 | public func doubleClickMouse(at point: CGPoint) throws { 110 | fputs("log: simulating double-click at: (\(point.x), \(point.y))\n", stderr) // Log action 111 | let source = try createEventSource() 112 | 113 | // Use the specific double-click event type directly 114 | let doubleClickEvent = CGEvent(mouseEventSource: source, mouseType: .leftMouseDown, mouseCursorPosition: point, mouseButton: .left) 115 | doubleClickEvent?.setIntegerValueField(.mouseEventClickState, value: 2) // Set click count 116 | try postEvent(doubleClickEvent, actionDescription: "double click down at (\(point.x), \(point.y))") 117 | 118 | // usleep(10_000) // Delay moved into postEvent 119 | 120 | let mouseUpEvent = CGEvent(mouseEventSource: source, mouseType: .leftMouseUp, mouseCursorPosition: point, mouseButton: .left) 121 | mouseUpEvent?.setIntegerValueField(.mouseEventClickState, value: 2) // Set click count 122 | try postEvent(mouseUpEvent, actionDescription: "double click up at (\(point.x), \(point.y))") 123 | fputs("log: double-click simulation complete.\n", stderr) 124 | } 125 | 126 | // Simulates a right mouse click at the specified coordinates 127 | /// Simulates a right mouse click at the specified screen coordinates. 128 | /// Does not move the cursor first. Call `moveMouse` beforehand if needed. 129 | /// - Parameter point: The `CGPoint` where the right click should occur. 130 | /// - Throws: `MacosUseSDKError` if the event source cannot be created or the event cannot be posted. 131 | public func rightClickMouse(at point: CGPoint) throws { 132 | fputs("log: simulating right-click at: (\(point.x), \(point.y))\n", stderr) // Log action 133 | let source = try createEventSource() 134 | 135 | // Create and post mouse down event (RIGHT button) 136 | let mouseDown = CGEvent(mouseEventSource: source, mouseType: .rightMouseDown, mouseCursorPosition: point, mouseButton: .right) 137 | try postEvent(mouseDown, actionDescription: "right mouse down at (\(point.x), \(point.y))") 138 | 139 | // Short delay - moved into postEvent 140 | // usleep(10_000) 141 | 142 | // Create and post mouse up event (RIGHT button) 143 | let mouseUp = CGEvent(mouseEventSource: source, mouseType: .rightMouseUp, mouseCursorPosition: point, mouseButton: .right) 144 | try postEvent(mouseUp, actionDescription: "right mouse up at (\(point.x), \(point.y))") 145 | fputs("log: right-click simulation complete.\n", stderr) 146 | } 147 | 148 | /// Moves the mouse cursor to the specified screen coordinates. 149 | /// - Parameter point: The `CGPoint` to move the cursor to. 150 | /// - Throws: `MacosUseSDKError` if the event source cannot be created or the event cannot be posted. 151 | public func moveMouse(to point: CGPoint) throws { 152 | fputs("log: moving mouse to: (\(point.x), \(point.y))\n", stderr) // Log action 153 | let source = try createEventSource() 154 | 155 | // .mouseMoved type doesn't require a button state 156 | let mouseMove = CGEvent(mouseEventSource: source, mouseType: .mouseMoved, mouseCursorPosition: point, mouseButton: .left) // Button doesn't matter for move 157 | try postEvent(mouseMove, actionDescription: "mouse move to (\(point.x), \(point.y))") 158 | fputs("log: mouse move simulation complete.\n", stderr) 159 | } 160 | 161 | /// Simulates typing a string of text using AppleScript `keystroke`. 162 | /// This is generally more reliable for arbitrary text than simulating individual key presses. 163 | /// - Parameter text: The `String` to type. 164 | /// - Throws: `MacosUseSDKError` if the osascript command fails to execute or returns an error. 165 | public func writeText(_ text: String) throws { 166 | // Using AppleScript's 'keystroke' is simplest for arbitrary text, 167 | // as it handles character mapping, keyboard layouts, etc. 168 | // A pure CGEvent approach would require complex character-to-keycode+flags mapping. 169 | fputs("log: simulating text writing: \"\(text)\" (using AppleScript)\n", stderr) // Log action 170 | 171 | // Escape double quotes and backslashes within the text for AppleScript string 172 | let escapedText = text.replacingOccurrences(of: "\\", with: "\\\\").replacingOccurrences(of: "\"", with: "\\\"") 173 | let script = "tell application \"System Events\" to keystroke \"\(escapedText)\"" 174 | 175 | let process = Process() 176 | process.executableURL = URL(fileURLWithPath: "/usr/bin/osascript") 177 | process.arguments = ["-e", script] 178 | 179 | // Capture potential errors from osascript 180 | let errorPipe = Pipe() 181 | process.standardError = errorPipe 182 | 183 | do { 184 | try process.run() 185 | process.waitUntilExit() 186 | 187 | // Read error output 188 | let errorData = errorPipe.fileHandleForReading.readDataToEndOfFile() 189 | let errorString = String(data: errorData, encoding: .utf8)?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" 190 | 191 | 192 | if process.terminationStatus == 0 { 193 | fputs("log: text writing simulation complete.\n", stderr) 194 | } else { 195 | fputs("error: osascript command failed with status \(process.terminationStatus)\n", stderr) 196 | if !errorString.isEmpty { 197 | fputs("error details (osascript): \(errorString)\n", stderr) 198 | } 199 | throw MacosUseSDKError.osascriptExecutionFailed(status: process.terminationStatus, message: errorString) 200 | } 201 | } catch { 202 | // Catch errors from process.run() itself 203 | throw MacosUseSDKError.inputSimulationFailed("failed to execute osascript for writetext: \(error.localizedDescription)") 204 | } 205 | } 206 | 207 | 208 | // Maps common key names (case-insensitive) to their CGKeyCode. Public for potential use by the tool. 209 | /// Maps common key names (case-insensitive) or a numeric string to their `CGKeyCode`. 210 | /// - Parameter keyName: The name of the key (e.g., "return", "a", "esc") or a string representation of the key code number. 211 | /// - Returns: The corresponding `CGKeyCode` or `nil` if the name is not recognized and cannot be parsed as a number. 212 | public func mapKeyNameToKeyCode(_ keyName: String) -> CGKeyCode? { 213 | switch keyName.lowercased() { 214 | // Special Keys 215 | case "return", "enter": return KEY_RETURN 216 | case "tab": return KEY_TAB 217 | case "space": return KEY_SPACE 218 | case "delete", "backspace": return KEY_DELETE 219 | case "escape", "esc": return KEY_ESCAPE 220 | case "left": return KEY_ARROW_LEFT 221 | case "right": return KEY_ARROW_RIGHT 222 | case "down": return KEY_ARROW_DOWN 223 | case "up": return KEY_ARROW_UP 224 | 225 | // Letters (Standard US QWERTY Layout Key Codes) - Assuming US QWERTY. Might need adjustments for others. 226 | case "a": return 0 227 | case "b": return 11 228 | case "c": return 8 229 | case "d": return 2 230 | case "e": return 14 231 | case "f": return 3 232 | case "g": return 5 233 | case "h": return 4 234 | case "i": return 34 235 | case "j": return 38 236 | case "k": return 40 237 | case "l": return 37 238 | case "m": return 46 239 | case "n": return 45 240 | case "o": return 31 241 | case "p": return 35 242 | case "q": return 12 243 | case "r": return 15 244 | case "s": return 1 245 | case "t": return 17 246 | case "u": return 32 247 | case "v": return 9 248 | case "w": return 13 249 | case "x": return 7 250 | case "y": return 16 251 | case "z": return 6 252 | 253 | // Numbers (Main Keyboard Row) 254 | case "1": return 18 255 | case "2": return 19 256 | case "3": return 20 257 | case "4": return 21 258 | case "5": return 23 259 | case "6": return 22 260 | case "7": return 26 261 | case "8": return 28 262 | case "9": return 25 263 | case "0": return 29 264 | 265 | // Symbols (Common - May vary significantly by layout) 266 | case "-": return 27 267 | case "=": return 24 268 | case "[": return 33 269 | case "]": return 30 270 | case "\\": return 42 // Backslash 271 | case ";": return 41 272 | case "'": return 39 // Quote 273 | case ",": return 43 274 | case ".": return 47 275 | case "/": return 44 276 | case "`": return 50 // Grave accent / Tilde 277 | 278 | // Function Keys 279 | case "f1": return 122 280 | case "f2": return 120 281 | case "f3": return 99 282 | case "f4": return 118 283 | case "f5": return 96 284 | case "f6": return 97 285 | case "f7": return 98 286 | case "f8": return 100 287 | case "f9": return 101 288 | case "f10": return 109 289 | case "f11": return 103 290 | case "f12": return 111 291 | // Add F13-F20 if needed 292 | 293 | default: 294 | // If not a known name, attempt to interpret it as a raw key code number 295 | fputs("log: key '\(keyName)' not explicitly mapped, attempting conversion to CGKeyCode number.\n", stderr) 296 | return CGKeyCode(keyName) // Returns nil if conversion fails 297 | } 298 | } 299 | 300 | // --- Removed Main Script Logic --- 301 | // The argument parsing, switch statement, fail(), completeSuccessfully(), startTime 302 | // and related logic have been removed from this file. They will be handled by the 303 | // InputControllerTool executable's main.swift. 304 | 305 | // --- Retained Helper Structures/Functions if needed by public API --- 306 | // (e.g., mapKeyNameToKeyCode is now public) -------------------------------------------------------------------------------- /Sources/TraversalTool/main.swift: -------------------------------------------------------------------------------- 1 | import Foundation 2 | import MacosUseSDK // Import your library 3 | 4 | // --- Main Execution Logic --- 5 | 6 | // 1. Argument Parsing 7 | var arguments = CommandLine.arguments 8 | var onlyVisible = false 9 | var pidString: String? = nil 10 | 11 | // Remove the executable name 12 | arguments.removeFirst() 13 | 14 | // Check for the flag and remove it if found 15 | if let flagIndex = arguments.firstIndex(of: "--visible-only") { 16 | onlyVisible = true 17 | arguments.remove(at: flagIndex) 18 | fputs("info: '--visible-only' flag detected.\n", stderr) 19 | } 20 | 21 | // The remaining argument should be the PID 22 | if arguments.count == 1 { 23 | pidString = arguments[0] 24 | } 25 | 26 | guard let pidStr = pidString, let appPID = Int32(pidStr) else { 27 | fputs("usage: TraversalTool [--visible-only] \n", stderr) 28 | fputs("error: expected a valid process id (pid) as the argument.\n", stderr) 29 | fputs("example (all elements): TraversalTool 14154\n", stderr) 30 | fputs("example (visible only): TraversalTool --visible-only 14154\n", stderr) 31 | exit(1) 32 | } 33 | 34 | // 2. Call the Library Function 35 | do { 36 | fputs("info: calling traverseAccessibilityTree for pid \(appPID) (Visible Only: \(onlyVisible))...\n", stderr) 37 | // MODIFIED: Pass the parsed 'onlyVisible' flag to the library function 38 | let responseData = try MacosUseSDK.traverseAccessibilityTree(pid: appPID, onlyVisibleElements: onlyVisible) 39 | fputs("info: successfully received response from traverseAccessibilityTree.\n", stderr) 40 | 41 | // 3. Encode the result to JSON 42 | let encoder = JSONEncoder() 43 | encoder.outputFormatting = [.prettyPrinted, .sortedKeys] 44 | 45 | let jsonData = try encoder.encode(responseData) 46 | 47 | // 4. Print JSON to standard output 48 | if let jsonString = String(data: jsonData, encoding: .utf8) { 49 | print(jsonString) 50 | exit(0) // Success 51 | } else { 52 | fputs("error: failed to convert response data to json string.\n", stderr) 53 | exit(1) 54 | } 55 | 56 | } catch let error as MacosUseSDKError { 57 | fputs("❌ Error from MacosUseSDK: \(error.localizedDescription)\n", stderr) 58 | exit(1) 59 | } catch { 60 | fputs("❌ An unexpected error occurred: \(error.localizedDescription)\n", stderr) 61 | exit(1) 62 | } 63 | 64 | /* 65 | # Example: Get visible elements from Messages app 66 | swift run TraversalTool --visible-only $(swift run AppOpenerTool Messages) 67 | */ -------------------------------------------------------------------------------- /Sources/VisualInputTool/main.swift: -------------------------------------------------------------------------------- 1 | import Foundation 2 | import CoreGraphics // For CGPoint, CGEventFlags 3 | import MacosUseSDK // Import the library 4 | import AppKit // Required for RunLoop, NSScreen 5 | 6 | // --- Start Time --- 7 | let startTime = Date() // Record start time for the tool's execution 8 | 9 | // --- Tool-specific Logging --- 10 | func log(_ message: String) { 11 | fputs("VisualInputTool: \(message)\n", stderr) 12 | } 13 | 14 | // --- Tool-specific Exiting --- 15 | func finish(success: Bool, message: String? = nil) { 16 | if let msg = message { 17 | log(success ? "✅ Success: \(msg)" : "❌ Error: \(msg)") 18 | } 19 | let endTime = Date() 20 | let processingTime = endTime.timeIntervalSince(startTime) 21 | let formattedTime = String(format: "%.3f", processingTime) 22 | fputs("VisualInputTool: total execution time (before wait): \(formattedTime) seconds\n", stderr) 23 | // Don't exit immediately, let RunLoop finish 24 | } 25 | 26 | // --- Argument Parsing Helper --- 27 | // Parses standard input actions AND an optional --duration flag 28 | func parseArguments() -> (action: String?, args: [String], duration: Double) { 29 | var action: String? = nil 30 | var actionArgs: [String] = [] 31 | var duration: Double = 0.5 // Default duration for visualization 32 | var waitingForDurationValue = false 33 | let allArgs = CommandLine.arguments.dropFirst() // Skip executable path 34 | 35 | for arg in allArgs { 36 | if waitingForDurationValue { 37 | if let durationValue = Double(arg), durationValue > 0 { 38 | duration = durationValue 39 | log("Parsed duration: \(duration) seconds") 40 | } else { 41 | fputs("error: Invalid value provided after --duration.\n", stderr) 42 | // Return error indication or default? Let's keep default and log error. 43 | } 44 | waitingForDurationValue = false 45 | } else if arg == "--duration" { 46 | waitingForDurationValue = true 47 | } else if action == nil { 48 | action = arg.lowercased() 49 | log("Parsed action: \(action!)") 50 | } else { 51 | actionArgs.append(arg) 52 | } 53 | } 54 | 55 | if waitingForDurationValue { 56 | fputs("error: Missing value after --duration flag. Using default \(duration)s.\n", stderr) 57 | } 58 | if action == nil { 59 | fputs("error: No action specified.\n", stderr) 60 | } 61 | 62 | log("Parsed action arguments: \(actionArgs)") 63 | return (action, actionArgs, duration) 64 | } 65 | 66 | 67 | // --- Main Logic --- 68 | let scriptName = CommandLine.arguments.first ?? "VisualInputTool" 69 | let usage = """ 70 | usage: \(scriptName) [options...] [--duration ] 71 | 72 | actions: 73 | keypress [+modifier...] Simulate key press AND show caption visualization. 74 | click Simulate left click AND show circle visualization. 75 | doubleclick Simulate double-click AND show circle visualization. 76 | rightclick Simulate right click AND show circle visualization. 77 | mousemove Move mouse AND show circle visualization at destination. 78 | writetext Simulate typing text AND show caption visualization. 79 | 80 | options: 81 | --duration How long the visual effect should last (default: 0.5s for mouse, 0.8s for keypress, calculated for writetext). 82 | 83 | Examples: 84 | \(scriptName) click 100 250 85 | \(scriptName) click 500 500 --duration 1.5 86 | \(scriptName) keypress cmd+shift+4 --duration 1.0 87 | \(scriptName) writetext "Hello There" 88 | """ 89 | 90 | let (action, actionArgs, parsedDuration) = parseArguments() 91 | 92 | guard let action = action else { 93 | fputs(usage, stderr) 94 | exit(1) 95 | } 96 | 97 | // --- Action Handling --- 98 | var success = false 99 | var message: String? = nil 100 | var requiresRunLoopWait = true // Default to true, as all actions now have visualization 101 | 102 | // Variable to hold the actual duration used for visualization 103 | var visualizationDuration: Double = 0.5 // Default fallback 104 | 105 | // Use a Task for the main logic to easily call async/await and @MainActor functions 106 | Task { 107 | do { 108 | switch action { 109 | case "keypress": 110 | guard actionArgs.count == 1 else { 111 | throw MacosUseSDKError.inputInvalidArgument("'keypress' requires exactly one argument: \n\(usage)") 112 | } 113 | let keyCombo = actionArgs[0] 114 | log("Processing key combo: '\(keyCombo)'") 115 | // (Parsing logic copied from InputControllerTool) 116 | var keyCode: CGKeyCode? 117 | var flags: CGEventFlags = [] 118 | let parts = keyCombo.split(separator: "+").map { String($0).trimmingCharacters(in: .whitespacesAndNewlines).lowercased() } 119 | guard let keyPart = parts.last else { 120 | throw MacosUseSDKError.inputInvalidArgument("Invalid key combination format: '\(keyCombo)'") 121 | } 122 | keyCode = MacosUseSDK.mapKeyNameToKeyCode(keyPart) 123 | if parts.count > 1 { 124 | log("Parsing modifiers: \(parts.dropLast().joined(separator: ", "))") 125 | for i in 0..<(parts.count - 1) { 126 | switch parts[i] { 127 | case "cmd", "command": flags.insert(.maskCommand) 128 | case "shift": flags.insert(.maskShift) 129 | case "opt", "option", "alt": flags.insert(.maskAlternate) 130 | case "ctrl", "control": flags.insert(.maskControl) 131 | case "fn", "function": flags.insert(.maskSecondaryFn) 132 | default: throw MacosUseSDKError.inputInvalidArgument("Unknown modifier: '\(parts[i])' in '\(keyCombo)'") 133 | } 134 | } 135 | } 136 | guard let finalKeyCode = keyCode else { 137 | throw MacosUseSDKError.inputInvalidArgument("Unknown key name or invalid key code: '\(keyPart)' in '\(keyCombo)'") 138 | } 139 | 140 | visualizationDuration = parsedDuration > 0 ? parsedDuration : 0.8 // Use parsed or default 0.8s 141 | 142 | log("Calling pressKey library function...") 143 | try MacosUseSDK.pressKey(keyCode: finalKeyCode, flags: flags) // Input simulation 144 | 145 | log("Dispatching showVisualFeedback for keypress...") 146 | // Dispatch visualization separately (@MainActor is handled by showVisualFeedback) 147 | let captionText = "[KEY PRESS]" 148 | let captionSize = CGSize(width: 250, height: 80) 149 | if let screenCenter = MacosUseSDK.getMainScreenCenter() { 150 | MacosUseSDK.showVisualFeedback( 151 | at: screenCenter, 152 | type: .caption(text: captionText), 153 | size: captionSize, 154 | duration: visualizationDuration 155 | ) 156 | } else { 157 | fputs("warning: could not get screen center for key press caption.\n", stderr) 158 | requiresRunLoopWait = false // Don't wait if viz failed 159 | } 160 | 161 | success = true 162 | message = "Key press '\(keyCombo)' simulated with visualization." 163 | 164 | case "click", "doubleclick", "rightclick", "mousemove": 165 | guard actionArgs.count == 2 else { 166 | throw MacosUseSDKError.inputInvalidArgument("'\(action)' requires exactly two arguments: \n\(usage)") 167 | } 168 | guard let x = Double(actionArgs[0]), let y = Double(actionArgs[1]) else { 169 | throw MacosUseSDKError.inputInvalidArgument("Invalid coordinates for '\(action)'. x and y must be numbers.") 170 | } 171 | let point = CGPoint(x: x, y: y) 172 | log("Coordinates: (\(x), \(y))") 173 | 174 | visualizationDuration = parsedDuration > 0 ? parsedDuration : 0.5 // Use parsed or default 0.5s 175 | 176 | log("Calling \(action) library function...") // Now refers to the input-only function 177 | switch action { 178 | case "click": try MacosUseSDK.clickMouse(at: point) 179 | case "doubleclick": try MacosUseSDK.doubleClickMouse(at: point) 180 | case "rightclick": try MacosUseSDK.rightClickMouse(at: point) 181 | case "mousemove": try MacosUseSDK.moveMouse(to: point) 182 | default: break // Should not happen 183 | } 184 | 185 | log("Dispatching showVisualFeedback for \(action)...") 186 | // Dispatch visualization separately 187 | MacosUseSDK.showVisualFeedback(at: point, type: .circle, duration: visualizationDuration) 188 | 189 | success = true 190 | message = "\(action) simulated at (\(x), \(y)) with visualization." 191 | 192 | 193 | case "writetext": 194 | guard actionArgs.count == 1 else { 195 | throw MacosUseSDKError.inputInvalidArgument("'writetext' requires exactly one argument: \n\(usage)") 196 | } 197 | let text = actionArgs[0] 198 | log("Text Argument: \"\(text)\"") 199 | 200 | // Calculate duration if not specified 201 | let defaultDuration = 1.0 202 | let calculatedDuration = max(defaultDuration, 0.5 + Double(text.count) * 0.05) 203 | visualizationDuration = parsedDuration > 0 ? parsedDuration : calculatedDuration // Use parsed or calculated 204 | 205 | log("Calling writeText library function...") 206 | try MacosUseSDK.writeText(text) // Input simulation 207 | 208 | log("Dispatching showVisualFeedback for writetext...") 209 | // Dispatch visualization separately 210 | let captionSize = CGSize(width: 450, height: 100) 211 | if let screenCenter = MacosUseSDK.getMainScreenCenter() { 212 | MacosUseSDK.showVisualFeedback( 213 | at: screenCenter, 214 | type: .caption(text: text), // Show actual text 215 | size: captionSize, 216 | duration: visualizationDuration 217 | ) 218 | } else { 219 | fputs("warning: could not get screen center for write text caption.\n", stderr) 220 | requiresRunLoopWait = false // Don't wait if viz failed 221 | } 222 | 223 | success = true 224 | message = "Text writing simulated with visualization." 225 | 226 | default: 227 | fputs(usage, stderr) 228 | throw MacosUseSDKError.inputInvalidArgument("Unknown action '\(action)'") 229 | } 230 | 231 | // --- Log final status before potentially waiting --- 232 | finish(success: success, message: message) 233 | 234 | // --- Keep Main Thread Alive for Visualization (if needed) --- 235 | if requiresRunLoopWait { 236 | let waitTime = visualizationDuration + 0.5 // Wait slightly longer 237 | log("Waiting for \(waitTime) seconds for visualization to complete...") 238 | // Use RunLoop directly since we are in a Task that might not be on the main thread initially 239 | DispatchQueue.main.async { 240 | RunLoop.main.run(until: Date(timeIntervalSinceNow: waitTime)) 241 | log("Run loop finished. Exiting.") 242 | exit(0) // Exit normally after waiting 243 | } 244 | // Keep the task alive until the run loop finishes 245 | try await Task.sleep(nanoseconds: UInt64((waitTime + 0.1) * 1_000_000_000)) 246 | // Fallback exit if the run loop mechanism doesn't exit 247 | exit(0) 248 | 249 | } else { 250 | log("No visualization triggered or viz failed, exiting immediately.") 251 | exit(0) // Exit normally without waiting 252 | } 253 | 254 | } catch let error as MacosUseSDKError { 255 | finish(success: false, message: "MacosUseSDK Error: \(error.localizedDescription)") 256 | exit(1) // Exit with error 257 | } catch { 258 | finish(success: false, message: "An unexpected error occurred: \(error.localizedDescription)") 259 | exit(1) // Exit with error 260 | } 261 | } 262 | 263 | // Keep the main thread running to allow the Task to execute 264 | RunLoop.main.run() 265 | -------------------------------------------------------------------------------- /Tests/MacosUseSDKTests/CombinedActionsDiffTests.swift: -------------------------------------------------------------------------------- 1 | import XCTest 2 | @testable import MacosUseSDK // Use @testable to access internal stuff if needed, otherwise just import 3 | import AppKit // For NSWorkspace, NSRunningApplication 4 | 5 | final class CombinedActionsDiffTests: XCTestCase { 6 | 7 | var calculatorPID: pid_t? 8 | var calculatorApp: NSRunningApplication? 9 | 10 | // Launch Calculator before each test 11 | override func setUp() async throws { 12 | // Ensure accessibility is granted (cannot check programmatically easily, user must pre-authorize) 13 | fputs("info: Test setup - Launching Calculator...\n", stderr) 14 | // Note: Using NSWorkspace directly here to avoid SDK dependency loop if openApplication fails 15 | let calcURL = URL(fileURLWithPath: "/System/Applications/Calculator.app") 16 | // Configuration to activate it 17 | let config = NSWorkspace.OpenConfiguration() 18 | config.activates = true 19 | calculatorApp = try await NSWorkspace.shared.openApplication(at: calcURL, configuration: config) 20 | calculatorPID = calculatorApp?.processIdentifier 21 | XCTAssertNotNil(calculatorPID, "Failed to get Calculator PID") 22 | fputs("info: Test setup - Calculator launched with PID \(calculatorPID!)\n", stderr) 23 | // Give it a moment to fully launch and settle 24 | try await Task.sleep(nanoseconds: 1_000_000_000) // 1 second 25 | } 26 | 27 | // Quit Calculator after each test 28 | override func tearDown() async throws { 29 | fputs("info: Test teardown - Terminating Calculator (PID: \(calculatorPID ?? -1))...\n", stderr) 30 | calculatorApp?.terminate() 31 | // Give it more time to terminate AND allow any remaining async SDK tasks (like animations) to naturally cease. 32 | fputs("info: Test teardown - Waiting 1.5 seconds for app termination and UI settling...\n", stderr) 33 | try await Task.sleep(nanoseconds: 1_500_000_000) // 1.5 seconds 34 | calculatorApp = nil 35 | calculatorPID = nil 36 | fputs("info: Test teardown - Finished.\n", stderr) 37 | } 38 | 39 | // Test: Type '2*3=' with action viz + traversal highlight and print the diff 40 | @MainActor 41 | func testCalculatorMultiplyWithActionAndTraversalHighlight() async throws { 42 | guard let pid = calculatorPID else { 43 | XCTFail("Calculator PID not available") 44 | return 45 | } 46 | 47 | fputs("\ninfo: === Starting testCalculatorMultiplyWithActionAndTraversalHighlight ===\n", stderr) 48 | 49 | // --- Define durations for test --- 50 | let testActionHighlightDuration: Double = 0.4 51 | let testTraversalHighlightDuration: Double = 2.0 // Duration passed to SDK function 52 | let testDelayNano: UInt64 = 150_000_000 53 | 54 | // --- Action Sequence with Highlighting --- 55 | fputs("info: Test run - Calling writeTextWithActionAndTraversalHighlight for '2*3='...\n", stderr) 56 | let result = try await CombinedActions.writeTextWithActionAndTraversalHighlight( 57 | text: "2*3=", 58 | pid: pid, 59 | onlyVisibleElements: true, 60 | actionHighlightDuration: testActionHighlightDuration, 61 | traversalHighlightDuration: testTraversalHighlightDuration, // Pass 2.0s duration 62 | delayAfterActionNano: testDelayNano 63 | ) 64 | fputs("info: Test run - writeTextWithActionAndTraversalHighlight returned (highlighting may start appearing).\n", stderr) 65 | 66 | // --- Print Diff --- 67 | fputs("info: --- Traversal Diff Results (Highlighted) ---\n", stderr) 68 | 69 | fputs("info: Added Elements (\(result.diff.added.count)):\n", stderr) 70 | if result.diff.added.isEmpty { 71 | fputs("info: (None)\n", stderr) 72 | } else { 73 | for element in result.diff.added { 74 | fputs("info: + Role: \(element.role), Text: \(element.text ?? "nil"), Pos: (\(element.x ?? -1), \(element.y ?? -1)), Size: (\(element.width ?? -1) x \(element.height ?? -1))\n", stderr) 75 | } 76 | } 77 | 78 | fputs("info: Removed Elements (\(result.diff.removed.count)):\n", stderr) 79 | if result.diff.removed.isEmpty { 80 | fputs("info: (None)\n", stderr) 81 | } else { 82 | for element in result.diff.removed { 83 | fputs("info: - Role: \(element.role), Text: \(element.text ?? "nil"), Pos: (\(element.x ?? -1), \(element.y ?? -1)), Size: (\(element.width ?? -1) x \(element.height ?? -1))\n", stderr) 84 | } 85 | } 86 | fputs("info: --- End Diff Results (Highlighted) ---\n", stderr) 87 | 88 | // --- Wait for Traversal Highlighting Animations BEFORE Test Ends --- 89 | // The SDK no longer explicitly closes highlight windows, relying on OS cleanup. 90 | // This wait ensures the highlight *animations* have sufficient time to visually 91 | // complete before tearDown terminates the Calculator app. It also provides 92 | // a buffer for general UI settling. 93 | let highlightCompletionWaitSeconds = testTraversalHighlightDuration + 0.2 // Wait slightly longer than animation 94 | fputs("info: Test run - Waiting \(highlightCompletionWaitSeconds) seconds for traversal highlighting animations to complete...\n", stderr) 95 | try await Task.sleep(nanoseconds: UInt64(highlightCompletionWaitSeconds * 1_000_000_000)) 96 | fputs("info: Test run - Traversal highlight animation wait finished. Proceeding to finish test function.\n", stderr) 97 | // --- END WAIT --- 98 | 99 | fputs("info: === Finished testCalculatorMultiplyWithActionAndTraversalHighlight ===\n", stderr) 100 | } 101 | // --- END TEST --- 102 | 103 | // Add more test methods for clickWithDiff, pressKeyWithDiff etc. 104 | // You can add similar tests for clickWithActionAndTraversalHighlight and pressKeyWithActionAndTraversalHighlight 105 | // For click tests, you might need to first traverse to find the coordinates of a button 106 | // (e.g., the '5' button) and then pass those coordinates to the click function. 107 | } 108 | -------------------------------------------------------------------------------- /Tests/MacosUseSDKTests/CombinedActionsFocusVisualizationTests.swift: -------------------------------------------------------------------------------- 1 | import XCTest 2 | @testable import MacosUseSDK 3 | import AppKit 4 | 5 | final class CombinedActionsFocusVisualizationTests: XCTestCase { 6 | 7 | var textEditPID: pid_t? 8 | var textEditApp: NSRunningApplication? 9 | var temporaryFileURL: URL? 10 | 11 | // Launch TextEdit before each test, opening a temporary file 12 | override func setUp() async throws { 13 | // Create a temporary file URL 14 | temporaryFileURL = FileManager.default.temporaryDirectory 15 | .appendingPathComponent("testFocus_\(UUID().uuidString).txt") // Unique name 16 | 17 | guard let fileURL = temporaryFileURL else { 18 | XCTFail("Failed to create temporary file URL") 19 | return 20 | } 21 | 22 | // Create an empty file 23 | do { 24 | try "".write(to: fileURL, atomically: true, encoding: .utf8) 25 | fputs("info: Focus Test Setup - Created temporary file at: \(fileURL.path)\n", stderr) 26 | } catch { 27 | XCTFail("Failed to create temporary file: \(error)") 28 | return 29 | } 30 | 31 | // Ensure accessibility is granted (user must pre-authorize) 32 | fputs("info: Focus Test Setup - Launching TextEdit to open temporary file...\n", stderr) 33 | 34 | let textEditAppURL = URL(fileURLWithPath: "/System/Applications/TextEdit.app") 35 | let config = NSWorkspace.OpenConfiguration() 36 | config.activates = true // Ensure it comes to the front and likely grabs focus 37 | 38 | // Open the temporary file with TextEdit 39 | textEditApp = try await NSWorkspace.shared.open( 40 | [fileURL], // Pass the URL of the file to open in an array 41 | withApplicationAt: textEditAppURL, 42 | configuration: config 43 | ) 44 | 45 | textEditPID = textEditApp?.processIdentifier 46 | XCTAssertNotNil(textEditPID, "Failed to get TextEdit PID") 47 | fputs("info: Focus Test Setup - TextEdit launched with PID \(textEditPID!) opening \(fileURL.lastPathComponent)\n", stderr) 48 | 49 | // Give it time to fully launch, open the file, and potentially set initial focus 50 | try await Task.sleep(nanoseconds: 1_500_000_000) // 1.5 seconds 51 | } 52 | 53 | // Quit TextEdit and delete the temporary file after each test 54 | override func tearDown() async throws { 55 | fputs("info: Focus Test Teardown - Terminating TextEdit (PID: \(textEditPID ?? -1)) and cleaning up file...\n", stderr) 56 | 57 | // --- Close TextEdit Document (AppleScript part remains the same) --- 58 | if let pid = textEditPID { 59 | let script = """ 60 | tell application "System Events" 61 | tell process id \(pid) 62 | try 63 | # Get the front window (document) 64 | set frontWindow to first window 65 | 66 | # Check if it's the document window we opened 67 | # This might need adjustment based on exact window naming 68 | if name of frontWindow contains "testFocus_" then 69 | # Perform close action (Command-W) 70 | keystroke "w" using {command down} 71 | delay 0.2 # Small delay 72 | 73 | # Check if a "Don't Save" sheet appeared (unlikely for empty/unchanged file) 74 | if exists sheet 1 of frontWindow then 75 | key code 36 # Return key code (usually selects default like "Don't Save") 76 | delay 0.2 77 | end if 78 | end if 79 | end try 80 | end tell 81 | end tell 82 | tell application "TextEdit" to if it is running then quit saving no # Add 'saving no' for clarity 83 | """ 84 | let process = Process() 85 | process.executableURL = URL(fileURLWithPath: "/usr/bin/osascript") 86 | process.arguments = ["-e", script] 87 | do { 88 | try process.run() 89 | process.waitUntilExit() 90 | fputs("info: Focus Test Teardown - Attempted clean close via AppleScript (Status: \(process.terminationStatus))\n", stderr) 91 | } catch { 92 | fputs("error: Focus Test Teardown - AppleScript execution failed: \(error)\n", stderr) 93 | } 94 | } 95 | 96 | // Fallback or alternative: Force terminate if still running 97 | if textEditApp?.isTerminated == false { 98 | fputs("info: Focus Test Teardown - Forcing termination...\n", stderr) 99 | textEditApp?.forceTerminate() 100 | // Add a small delay after force termination 101 | try await Task.sleep(nanoseconds: 200_000_000) // 0.2 seconds 102 | } 103 | 104 | // --- Delete the temporary file --- 105 | if let fileURL = temporaryFileURL { 106 | do { 107 | try FileManager.default.removeItem(at: fileURL) 108 | fputs("info: Focus Test Teardown - Successfully deleted temporary file: \(fileURL.path)\n", stderr) 109 | } catch { 110 | // Log error but don't fail the test teardown for this 111 | fputs("warning: Focus Test Teardown - Could not delete temporary file: \(error)\n", stderr) 112 | } 113 | temporaryFileURL = nil // Clear the reference 114 | } 115 | 116 | // Allow time for termination and general settling before next test. 117 | try await Task.sleep(nanoseconds: 300_000_000) // 0.3 seconds 118 | textEditApp = nil 119 | textEditPID = nil 120 | fputs("info: Focus Test Teardown - Finished.\n", stderr) 121 | } 122 | 123 | // Test: Write text to TextEdit, expecting focus to be on the text area 124 | // Verify by checking logs for the "successfully found focused element center" message. 125 | @MainActor 126 | func testTextEditFocusAndWriteVisualization() async throws { 127 | guard let pid = textEditPID else { 128 | XCTFail("TextEdit PID not available") 129 | return 130 | } 131 | 132 | fputs("\ninfo: === Starting testTextEditFocusAndWriteVisualization ===\n", stderr) 133 | 134 | // --- Define durations --- 135 | let testActionHighlightDuration: Double = 0.6 136 | let testTraversalHighlightDuration: Double = 1.5 // Shorter for this test 137 | let testDelayNano: UInt64 = 200_000_000 // 0.2s 138 | let observationDelaySeconds: Double = 1.0 // Time to observe action visualization 139 | 140 | // --- Action Sequence --- 141 | // We expect TextEdit's main text view to have focus after activation in setUp. 142 | fputs("info: Test run - Calling writeTextWithActionAndTraversalHighlight for 'Hello TextEdit!'...\n", stderr) 143 | let result = try await CombinedActions.writeTextWithActionAndTraversalHighlight( 144 | text: "Hello TextEdit!", 145 | pid: pid, 146 | onlyVisibleElements: true, // Doesn't affect focus check, but standard for combined action 147 | actionHighlightDuration: testActionHighlightDuration, 148 | traversalHighlightDuration: testTraversalHighlightDuration, 149 | delayAfterActionNano: testDelayNano 150 | ) 151 | fputs("info: Test run - writeTextWithActionAndTraversalHighlight returned.\n", stderr) 152 | fputs("info: Test run - Check logs above for 'successfully found focused element center' from writeTextAndVisualize.\n", stderr) 153 | // You can examine result.diff if needed, but the focus is on the visualization attempt. 154 | 155 | // --- Short Wait for Visual Observation --- 156 | // Allows time to visually observe the action/highlight animations before teardown. 157 | fputs("info: Test run - Waiting \(observationDelaySeconds) seconds for visual observation...\n", stderr) 158 | try await Task.sleep(nanoseconds: UInt64(observationDelaySeconds * 1_000_000_000)) 159 | fputs("info: Test run - Observation wait finished.\n", stderr) 160 | 161 | fputs("info: === Finished testTextEditFocusAndWriteVisualization ===\n", stderr) 162 | // Teardown will handle closing TextEdit. 163 | } 164 | } 165 | --------------------------------------------------------------------------------