├── .gitignore
├── LICENSE
├── Package.swift
├── README.md
├── Sources
    ├── ActionTool
    │   └── main.swift
    ├── AppOpenerTool
    │   └── main.swift
    ├── HighlightTraversalTool
    │   └── main.swift
    ├── InputControllerTool
    │   └── main.swift
    ├── MacosUseSDK
    │   ├── AccessibilityTraversal.swift
    │   ├── ActionCoordinator.swift
    │   ├── AppOpener.swift
    │   ├── CombinedActions.swift
    │   ├── DrawVisuals.swift
    │   ├── HighlightInput.swift
    │   └── InputController.swift
    ├── TraversalTool
    │   └── main.swift
    └── VisualInputTool
    │   └── main.swift
└── Tests
    └── MacosUseSDKTests
        ├── CombinedActionsDiffTests.swift
        └── CombinedActionsFocusVisualizationTests.swift


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | /.build
3 | /Packages
4 | xcuserdata/
5 | DerivedData/
6 | .swiftpm/configuration/registries.json
7 | .swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata
8 | .netrc
9 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 mediar
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Package.swift:
--------------------------------------------------------------------------------
 1 | // swift-tools-version: 6.0
 2 | // The swift-tools-version declares the minimum version of Swift required to build this package.
 3 | 
 4 | import PackageDescription
 5 | 
 6 | let package = Package(
 7 |     name: "MacosUseSDK",
 8 |     platforms: [
 9 |         .macOS(.v12)
10 |     ],
11 |     products: [
12 |         // Products define the executables and libraries a package produces, making them visible to other packages.
13 |         .library(
14 |             name: "MacosUseSDK",
15 |             targets: ["MacosUseSDK"]),
16 |         .executable(
17 |             name: "TraversalTool",
18 |             targets: ["TraversalTool"]),
19 |         .executable(
20 |             name: "HighlightTraversalTool",
21 |             targets: ["HighlightTraversalTool"]),
22 |         .executable(
23 |             name: "InputControllerTool",
24 |             targets: ["InputControllerTool"]),
25 |         .executable(
26 |             name: "VisualInputTool",
27 |             targets: ["VisualInputTool"]),
28 |         .executable(
29 |             name: "AppOpenerTool",
30 |             targets: ["AppOpenerTool"]),
31 |         .executable(
32 |             name: "ActionTool",
33 |             targets: ["ActionTool"]),
34 |     ],
35 |     dependencies: [
36 |         // Add any external package dependencies here later if needed
37 |     ],
38 |     targets: [
39 |         // Targets are the basic building blocks of a package, defining a module or a test suite.
40 |         // Targets can depend on other targets in this package and products from dependencies.
41 |         .target(
42 |             name: "MacosUseSDK",
43 |             dependencies: [],
44 |             linkerSettings: [
45 |                 .linkedFramework("AppKit"),
46 |                 .linkedFramework("ApplicationServices"),
47 |             ]
48 |         ),
49 |         .executableTarget(
50 |             name: "TraversalTool",
51 |             dependencies: ["MacosUseSDK"]
52 |         ),
53 |         .executableTarget(
54 |             name: "HighlightTraversalTool",
55 |             dependencies: [
56 |                 "MacosUseSDK",
57 |             ]
58 |         ),
59 |         .executableTarget(
60 |             name: "InputControllerTool",
61 |             dependencies: ["MacosUseSDK"]
62 |         ),
63 |         .executableTarget(
64 |             name: "VisualInputTool",
65 |             dependencies: ["MacosUseSDK"]
66 |         ),
67 |         .executableTarget(
68 |             name: "AppOpenerTool",
69 |             dependencies: ["MacosUseSDK"]
70 |         ),
71 |         .executableTarget(
72 |             name: "ActionTool",
73 |             dependencies: ["MacosUseSDK"]
74 |         ),
75 |         .testTarget(
76 |             name: "MacosUseSDKTests",
77 |             dependencies: ["MacosUseSDK"]
78 |         ),
79 |     ]
80 | )
81 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # MacosUseSDK
  2 | 
  3 | Library and command-line tools to traverse the macOS accessibility tree and simulate user input actions. Allows interaction with UI elements of other applications.
  4 | 
  5 | 
  6 | https://github.com/user-attachments/assets/d8dc75ba-5b15-492c-bb40-d2bc5b65483e
  7 | 
  8 | Highlight whatever is happening on the computer: text elements, clicks, typing
  9 | ![Image](https://github.com/user-attachments/assets/9e182bbc-bd30-4285-984a-207a58b32bc0)
 10 | 
 11 | Listen to changes in the UI, elements changed, text changed
 12 | ![Image](https://github.com/user-attachments/assets/4a972dfa-ce4d-4b1a-9781-43379375b313)
 13 | 
 14 | ## Building the Tools
 15 | 
 16 | To build the command-line tools provided by this package, navigate to the root directory (`MacosUseSDK`) in your terminal and run:
 17 | 
 18 | ```bash
 19 | swift build
 20 | ```
 21 | 
 22 | This will compile the tools and place the executables in the `.build/debug/` directory (or `.build/release/` if you use `swift build -c release`). You can run them directly from there (e.g., `.build/debug/TraversalTool`) or use `swift run <ToolName>`.
 23 | 
 24 | ## Available Tools
 25 | 
 26 | All tools output informational logs and timing data to `stderr`. Primary output (like PIDs or JSON data) is sent to `stdout`.
 27 | 
 28 | ### AppOpenerTool
 29 | 
 30 | *   **Purpose:** Opens or activates a macOS application by its name, bundle ID, or full path. Outputs the application's PID on success.
 31 | *   **Usage:** `AppOpenerTool <Application Name | Bundle ID | Path>`
 32 | *   **Examples:**
 33 |     ```bash
 34 |     # Open by name
 35 |     swift run AppOpenerTool Calculator
 36 |     # Open by bundle ID
 37 |     swift run AppOpenerTool com.apple.Terminal
 38 |     # Open by path
 39 |     swift run AppOpenerTool /System/Applications/Utilities/Terminal.app
 40 |     # Example output (stdout)
 41 |     # 54321 
 42 |     ```
 43 | 
 44 | ### TraversalTool
 45 | 
 46 | *   **Purpose:** Traverses the accessibility tree of a running application (specified by PID) and outputs a JSON representation of the UI elements to `stdout`.
 47 | *   **Usage:** `TraversalTool [--visible-only] <PID>`
 48 | *   **Options:**
 49 |     *   `--visible-only`: Only include elements that have a position and size (are geometrically visible).
 50 | *   **Examples:**
 51 |     ```bash
 52 |     # Get only visible elements for Messages app
 53 |     swift run TraversalTool --visible-only $(swift run AppOpenerTool Messages)
 54 |     ```
 55 | 
 56 | ### HighlightTraversalTool
 57 | 
 58 | *   **Purpose:** Traverses the accessibility tree of a running application (specified by PID) and draws temporary red boxes around all visible UI elements. Also outputs traversal data (JSON) to `stdout`. Useful for debugging accessibility structures.
 59 | *   **Usage:** `HighlightTraversalTool <PID> [--duration <seconds>]`
 60 | *   **Options:**
 61 |     *   `--duration <seconds>`: Specifies how long the highlights remain visible (default: 3.0 seconds).
 62 | *   **Examples:**
 63 |     ```bash
 64 |     # Combine with AppOpenerTool to open Messages and highlight it
 65 |     swift run HighlightTraversalTool $(swift run AppOpenerTool Messages) --duration 5
 66 |     ```
 67 |     *Note: This tool needs to keep running for the duration specified to manage the highlights.*
 68 | 
 69 | ### InputControllerTool
 70 | 
 71 | *   **Purpose:** Simulates keyboard and mouse input events without visual feedback.
 72 | *   **Usage:** See `swift run InputControllerTool --help` (or just run without args) for actions.
 73 | *   **Examples:**
 74 |     ```bash
 75 |     # Press the Enter key
 76 |     swift run InputControllerTool keypress enter
 77 |     # Simulate Cmd+C (Copy)
 78 |     swift run InputControllerTool keypress cmd+c
 79 |     # Simulate Shift+Tab
 80 |     swift run InputControllerTool keypress shift+tab
 81 |     # Left click at screen coordinates (100, 250)
 82 |     swift run InputControllerTool click 100 250
 83 |     # Double click at screen coordinates (150, 300)
 84 |     swift run InputControllerTool doubleclick 150 300
 85 |     # Right click at screen coordinates (200, 350)
 86 |     swift run InputControllerTool rightclick 200 350
 87 |     # Move mouse cursor to (500, 500)
 88 |     swift run InputControllerTool mousemove 500 500
 89 |     # Type the text "Hello World!"
 90 |     swift run InputControllerTool writetext "Hello World!"
 91 |     ```
 92 | 
 93 | ### VisualInputTool
 94 | 
 95 | *   **Purpose:** Simulates keyboard and mouse input events *with* visual feedback (currently a pulsing green circle for mouse actions).
 96 | *   **Usage:** Similar to `InputControllerTool`, but adds a `--duration` option for the visual effect. See `swift run VisualInputTool --help`.
 97 | *   **Options:**
 98 |     *   `--duration <seconds>`: How long the visual feedback effect lasts (default: 0.5 seconds).
 99 | *   **Examples:**
100 |     ```bash
101 |     # Left click at (100, 250) with default 0.5s feedback
102 |     swift run VisualInputTool click 100 250
103 |     # Right click at (800, 400) with 2 second feedback
104 |     swift run VisualInputTool rightclick 800 400 --duration 2.0
105 |     # Move mouse to (500, 500) with 1 second feedback
106 |     swift run VisualInputTool mousemove 500 500 --duration 1.0
107 |     # Keypress and writetext (currently NO visualization implemented)
108 |     swift run VisualInputTool keypress cmd+c
109 |     swift run VisualInputTool writetext "Testing"
110 |     ```
111 |     *Note: This tool needs to keep running for the duration specified to display the visual feedback.*
112 | 
113 | ### Running Tests
114 | 
115 | Run only specific tests or test classes, use the --filter option.
116 | Run a specific test method: Provide the full identifier TestClassName/testMethodName
117 | 
118 | ```bash
119 | swift test
120 | # Example: Run only the multiply test in CombinedActionsDiffTests
121 | swift test --filter CombinedActionsDiffTests/testCalculatorMultiplyWithActionAndTraversalHighlight
122 | # Example: Run all tests in CombinedActionsFocusVisualizationTests
123 | swift test --filter CombinedActionsFocusVisualizationTests
124 | ```
125 | 
126 | 
127 | ## Using the Library
128 | 
129 | You can also use `MacosUseSDK` as a dependency in your own Swift projects. Add it to your `Package.swift` dependencies:
130 | 
131 | ```swift
132 | dependencies: [
133 |     .package(url: "/* path or URL to your MacosUseSDK repo */", from: "1.0.0"),
134 | ]
135 | ```
136 | 
137 | And add `MacosUseSDK` to your target's dependencies:
138 | 
139 | ```swift
140 | .target(
141 |     name: "YourApp",
142 |     dependencies: ["MacosUseSDK"]),
143 | ```
144 | 
145 | Then import and use the public functions:
146 | 
147 | ```swift
148 | import MacosUseSDK
149 | import Foundation // For Dispatch etc.
150 | 
151 | // Example: Get elements from Calculator app
152 | Task {
153 |     do {
154 |         // Find Calculator PID (replace with actual logic or use AppOpenerTool output)
155 |         // let calcPID: Int32 = ... 
156 |         // let response = try MacosUseSDK.traverseAccessibilityTree(pid: calcPID, onlyVisibleElements: true)
157 |         // print("Found \(response.elements.count) visible elements.")
158 | 
159 |         // Example: Click at a point
160 |         let point = CGPoint(x: 100, y: 200)
161 |         try MacosUseSDK.clickMouse(at: point)
162 | 
163 |         // Example: Click with visual feedback (needs main thread for UI)
164 |         DispatchQueue.main.async {
165 |             do {
166 |                  try MacosUseSDK.clickMouseAndVisualize(at: point, duration: 1.0)
167 |             } catch {
168 |                  print("Visualization error: \(error)")
169 |             }
170 |         }
171 | 
172 |     } catch {
173 |         print("MacosUseSDK Error: \(error)")
174 |     }
175 | }
176 | 
177 | // Remember to keep the run loop active if using async UI functions like highlightVisibleElements or *AndVisualize
178 | // RunLoop.main.run() // Or use within an @main Application structure
179 | ```
180 | 
181 | ## License
182 | 
183 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
184 | 


--------------------------------------------------------------------------------
/Sources/ActionTool/main.swift:
--------------------------------------------------------------------------------
  1 | import MacosUseSDK
  2 | import Foundation // For exit, FileHandle
  3 | import CoreGraphics // For CGPoint, CGEventFlags
  4 | 
  5 | // Use @main struct for async top-level code
  6 | @main
  7 | struct ActionTool {
  8 | 
  9 |     static func main() async {
 10 |         fputs("info: ActionTool started.\n", stderr)
 11 | 
 12 |         // --- Example 1: Open Messages, Type, Traverse with Diff ---
 13 |         let textEditAction = PrimaryAction.open(identifier: "Messages") // Changed to TextEdit for typing example
 14 | 
 15 |         let openOptions = ActionOptions(
 16 |             traverseBefore: true, // Keep true for diff
 17 |             traverseAfter: true,  // Keep true for diff
 18 |             showDiff: false,      // Set to false for open, true for type
 19 |             onlyVisibleElements: true,
 20 |             showAnimation: false, // Use the consolidated flag
 21 |             delayAfterAction: 0.0 // No extra delay needed immediately after open, before next step
 22 |         )
 23 | 
 24 |         fputs("\n--- Running Example 1: Open TextEdit ---\n", stderr)
 25 |         let openResult = await performAction(action: textEditAction, optionsInput: openOptions)
 26 | 
 27 |         if let pid = openResult.openResult?.pid, openResult.primaryActionError == nil {
 28 |             fputs("info: TextEdit opened/activated (PID: \(pid)). Now preparing to type...\n", stderr)
 29 | 
 30 |             // --- Options for TYPE Action ---
 31 |             let typeAction = PrimaryAction.input(action: .type(text: "Hello world from ActionTool!"))
 32 |             let typeOptions = ActionOptions(
 33 |                 traverseBefore: true,        // Need before state for diff
 34 |                 traverseAfter: true,         // Need after state for diff
 35 |                 showDiff: true,              // Calculate the diff after typing
 36 |                 onlyVisibleElements: true,
 37 |                 showAnimation: true,         // Use the consolidated flag
 38 |                 animationDuration: 0.8,      // Duration for animation/highlight
 39 |                 pidForTraversal: pid,        // <<-- IMPORTANT: Use the PID from the open result
 40 |                 delayAfterAction: 0.0        // Delay *after* typing, *before* the 'traverseAfter' step, good if we need to wait for application to render updated UI, first try without it
 41 |             )
 42 | 
 43 |             fputs("\n--- Running Example 1: Type into TextEdit (with Diff & Animation) ---\n", stderr)
 44 |             let typeResult = await performAction(action: typeAction, optionsInput: typeOptions)
 45 | 
 46 |             print("\n--- TextEdit Type Result (including Diff) ---")
 47 |             printResult(typeResult)
 48 | 
 49 |         } else {
 50 |             fputs("error: Failed to open TextEdit or get PID. Aborting typing.\n", stderr)
 51 |             print("\n--- TextEdit Open Result (Failed) ---")
 52 |             printResult(openResult) // Print the result even on failure
 53 |         }
 54 | 
 55 |         // --- Example 2 (Commented out) ---
 56 |         // ...
 57 | 
 58 |         // #########################################################################
 59 |         // #                                                                       #
 60 |         // #          !!! CRITICAL WAIT FOR ASYNCHRONOUS VISUALIZATIONS !!!        #
 61 |         // #                                                                       #
 62 |         // #########################################################################
 63 |         //
 64 |         // WHY THIS WAIT IS NECESSARY:
 65 |         // --------------------------
 66 |         // Functions like `showVisualFeedback` and `drawHighlightBoxes` in the SDK
 67 |         // use `DispatchQueue.main.async` to schedule UI work (drawing windows,
 68 |         // showing animations like captions or highlights) on the main thread.
 69 |         // This dispatching happens ASYNCHRONOUSLY, meaning the SDK functions
 70 |         // return *immediately* after *scheduling* the work, not after it's done.
 71 |         //
 72 |         // THE PROBLEM:
 73 |         // -----------
 74 |         // If this command-line tool calls `exit(0)` immediately after the main
 75 |         // `performAction` calls finish, the entire process can terminate *before*
 76 |         // the main thread gets a chance to actually execute the scheduled UI tasks
 77 |         // or before the animations (which also run asynchronously) complete.
 78 |         //
 79 |         // CONSEQUENCE:
 80 |         // -----------
 81 |         // Without this `Task.sleep`, visual feedback might:
 82 |         //   - Not appear at all.
 83 |         //   - Be cut off mid-animation.
 84 |         //
 85 |         // THE SOLUTION:
 86 |         // ------------
 87 |         // This `Task.sleep` introduces a deliberate pause *at the end* of the
 88 |         // main program logic. It keeps the process alive long enough for the
 89 |         // asynchronous UI tasks dispatched earlier to run and be visually perceived.
 90 |         // Adjust the duration (currently 1 second) if animations seem consistently
 91 |         // cut short or if you want to reduce the final wait time.
 92 |         //
 93 |         // NOTE: We are intentionally *not* closing the overlay windows explicitly
 94 |         // in the SDK anymore, as doing so near `exit(0)` caused crashes. We rely
 95 |         // on the operating system to clean up the windows when the process exits.
 96 |         //
 97 |         fputs("info: Main logic complete. Pausing to allow async animations to complete before exiting...\n", stderr); // Emphasized log message
 98 |         try? await Task.sleep(nanoseconds: 1_000_000_000) // Wait 1 second (adjust if needed)
 99 |         // #########################################################################
100 | 
101 |         fputs("\ninfo: ActionTool finished.\n", stderr)
102 |         exit(0) // Exit cleanly
103 |     }
104 | 
105 |     // Helper to print the ActionResult (only prints diff if available)
106 |     static func printResult(_ result: ActionResult) {
107 |         // Check if the traversalDiff exists
108 |         if let diff = result.traversalDiff {
109 |             print("\n--- Traversal Diff ---")
110 |             let encoder = JSONEncoder()
111 |             encoder.outputFormatting = [.prettyPrinted, .sortedKeys]
112 | 
113 |             do {
114 |                 let jsonData = try encoder.encode(diff)
115 |                 if let jsonString = String(data: jsonData, encoding: .utf8) {
116 |                     print(jsonString)
117 |                 } else {
118 |                     fputs("error: Failed to convert diff JSON data to string.\n", stderr)
119 |                 }
120 |             } catch {
121 |                 fputs("error: Failed to encode TraversalDiff to JSON: \(error)\n", stderr)
122 |                 // Fallback: Print manually
123 |                 print("  Added (\(diff.added.count))")
124 |                 print("  Removed (\(diff.removed.count))")
125 |                 print("  Modified (\(diff.modified.count))")
126 |                  diff.modified.forEach { mod in
127 |                      print("    - Role: \(mod.before.role)")
128 |                      mod.changes.forEach { change in
129 |                          if change.attributeName == "text" {
130 |                             // Print simple diff first if available
131 |                             if let added = change.addedText {
132 |                                 print("      - text added: \"\(added)\"")
133 |                             } else if let removed = change.removedText { // Use else if to avoid printing both potentially
134 |                                 print("      - text removed: \"\(removed)\"")
135 |                             } else {
136 |                                 // If no simple diff, print a generic message instead of old/new values
137 |                                 print("      - text changed (complex)")
138 |                             }
139 |                          } else {
140 |                              // Print standard old -> new for other attributes
141 |                              print("      - \(change.attributeName): \(change.oldValue ?? "nil") -> \(change.newValue ?? "nil")")
142 |                          }
143 |                      }
144 |                  }
145 |             }
146 |         } else {
147 |             print("\n--- Traversal Diff ---")
148 |             print("  (No diff calculated or available in this result object)")
149 |             if let err = result.traversalBeforeError { print("  Traversal Before Error: \(err)") }
150 |             if let err = result.traversalAfterError { print("  Traversal After Error: \(err)") }
151 |             if let err = result.primaryActionError { print("  Primary Action Error: \(err)") }
152 |         }
153 |         fflush(stdout)
154 |     }
155 | }
156 | 


--------------------------------------------------------------------------------
/Sources/AppOpenerTool/main.swift:
--------------------------------------------------------------------------------
 1 | // main.swift for AppOpenerTool
 2 | // Script to open or activate a specified macOS application by name or path.
 3 | // Reliably outputs the PID on success (launch or activation) and processing time to stderr.
 4 | 
 5 | import AppKit // Needed for NSWorkspace, NSApplication, NSRunningApplication
 6 | import Foundation
 7 | import MacosUseSDK // Import the library
 8 | 
 9 | // Encapsulate logic in a @main struct isolated to the MainActor
10 | @main
11 | @MainActor
12 | struct AppOpenerTool {
13 | 
14 |     // Make timers static properties of the struct
15 |     static let startTime = Date()
16 |     static var stepStartTime = startTime // Initialize step timer
17 | 
18 |     // --- Helper function for step timing (now a static method) ---
19 |     static func logStepCompletion(_ stepDescription: String) {
20 |         let endTime = Date()
21 |         // Accessing static stepStartTime is now safe within @MainActor context
22 |         let duration = endTime.timeIntervalSince(stepStartTime)
23 |         let durationStr = String(format: "%.3f", duration) // Use 3 decimal places for steps
24 |         fputs("info: [\(durationStr)s] finished '\(stepDescription)'\n", stderr)
25 |         // Mutating static stepStartTime is also safe
26 |         stepStartTime = endTime // Reset start time for the next step
27 |     }
28 | 
29 |     // The main function now needs to be async to call the async library function
30 |     static func main() async {
31 |         // --- Argument Parsing ---
32 |         guard CommandLine.arguments.count == 2 else {
33 |             let scriptName = URL(fileURLWithPath: CommandLine.arguments[0]).lastPathComponent
34 |             fputs("usage: \(scriptName) <Application Name, Bundle ID, or Path>\n", stderr)
35 |             fputs("example (name): \(scriptName) Calculator\n", stderr)
36 |             fputs("example (path): \(scriptName) /System/Applications/Utilities/Terminal.app\n", stderr)
37 |             fputs("example (bundleID): \(scriptName) com.apple.Terminal\n", stderr)
38 |             exit(1)
39 |         }
40 |         let appIdentifier = CommandLine.arguments[1]
41 | 
42 |         // --- Call Library Function ---
43 |         fputs("info: calling MacosUseSDK.openApplication for identifier: '\(appIdentifier)'\n", stderr)
44 |         do {
45 |             // Use await to call the async function
46 |             let result = try await MacosUseSDK.openApplication(identifier: appIdentifier)
47 | 
48 |             // --- Output PID on Success ---
49 |             // Success/Timing logs are already printed by the library function to stderr
50 |             // Print only the PID to stdout as the primary output
51 |             print(result.pid)
52 |             exit(0) // Exit successfully
53 | 
54 |         } catch let error as MacosUseSDKError.AppOpenerError {
55 |             // Specific errors from the AppOpener module
56 |             fputs("❌ Error (AppOpener): \(error.localizedDescription)\n", stderr)
57 |             exit(1)
58 |         } catch let error as MacosUseSDKError {
59 |              // Other potential errors from the SDK (though less likely here)
60 |              fputs("❌ Error (MacosUseSDK): \(error.localizedDescription)\n", stderr)
61 |              exit(1)
62 |         } catch {
63 |             // Catch any other unexpected errors
64 |             fputs("❌ An unexpected error occurred: \(error.localizedDescription)\n", stderr)
65 |             exit(1)
66 |         }
67 |     }
68 | } // End of struct AppOpenerTool
69 | 
70 | /*
71 | swift run AppOpenerTool Messages
72 | */


--------------------------------------------------------------------------------
/Sources/HighlightTraversalTool/main.swift:
--------------------------------------------------------------------------------
  1 | import Foundation
  2 | import AppKit // Required for NSApplication and RunLoop
  3 | import MacosUseSDK // Your library
  4 | 
  5 | // --- Helper Function for Argument Parsing ---
  6 | // Simple parser for "--duration <value>" and PID
  7 | func parseArguments() -> (pid: Int32?, duration: Double?) {
  8 |     var pid: Int32? = nil
  9 |     var duration: Double? = nil
 10 |     var waitingForDurationValue = false
 11 | 
 12 |     // Skip the executable path
 13 |     for arg in CommandLine.arguments.dropFirst() {
 14 |         if waitingForDurationValue {
 15 |             if let durationValue = Double(arg), durationValue > 0 {
 16 |                 duration = durationValue
 17 |             } else {
 18 |                 fputs("error: Invalid value provided after --duration.\n", stderr)
 19 |                 return (nil, nil) // Indicate parsing error
 20 |             }
 21 |             waitingForDurationValue = false
 22 |         } else if arg == "--duration" {
 23 |             waitingForDurationValue = true
 24 |         } else if pid == nil, let pidValue = Int32(arg) {
 25 |             pid = pidValue
 26 |         } else {
 27 |             fputs("error: Unexpected argument '\(arg)'.\n", stderr)
 28 |             return (nil, nil) // Indicate parsing error
 29 |         }
 30 |     }
 31 | 
 32 |     // Check if duration flag was seen but value is missing
 33 |     if waitingForDurationValue {
 34 |         fputs("error: Missing value after --duration flag.\n", stderr)
 35 |         return (nil, nil)
 36 |     }
 37 | 
 38 |     // Check if PID was found
 39 |     if pid == nil {
 40 |         fputs("error: Missing required PID argument.\n", stderr)
 41 |         return (nil, nil)
 42 |     }
 43 | 
 44 |     return (pid, duration)
 45 | }
 46 | 
 47 | // --- Main Execution Logic ---
 48 | 
 49 | // 1. Parse Arguments
 50 | let (parsedPID, parsedDuration) = parseArguments()
 51 | 
 52 | guard let targetPID = parsedPID else {
 53 |     // Error messages printed by parser
 54 |     fputs("\nusage: HighlightTraversalTool <PID> [--duration <seconds>]\n", stderr)
 55 |     fputs("  <PID>: Process ID of the application to highlight.\n", stderr)
 56 |     fputs("  --duration <seconds>: How long the highlights should stay visible (default: 3.0).\n", stderr)
 57 |     fputs("\nexample: HighlightTraversalTool 14154 --duration 5\n", stderr)
 58 |     exit(1)
 59 | }
 60 | 
 61 | // Use provided duration or default
 62 | let highlightDuration = parsedDuration ?? 3.0
 63 | 
 64 | fputs("info: Target PID: \(targetPID), Highlight Duration: \(highlightDuration) seconds.\n", stderr)
 65 | 
 66 | // Wrap async calls in a Task
 67 | Task {
 68 |     do {
 69 |         // 2. Perform Traversal FIRST
 70 |         fputs("info: Calling traverseAccessibilityTree (visible only)...\n", stderr)
 71 |         let responseData = try await MacosUseSDK.traverseAccessibilityTree(pid: targetPID, onlyVisibleElements: true)
 72 |         fputs("info: Traversal complete. Found \(responseData.elements.count) visible elements.\n", stderr)
 73 | 
 74 |         // 3. Dispatch Highlighting using the traversal results
 75 |         fputs("info: Calling drawHighlightBoxes with \(responseData.elements.count) elements...\n", stderr)
 76 |         // Ensure this call happens on the main actor, drawHighlightBoxes requires it.
 77 |         // Since we are in a Task, explicitly hop to MainActor.
 78 |         await MainActor.run {
 79 |             MacosUseSDK.drawHighlightBoxes(for: responseData.elements, duration: highlightDuration)
 80 |         }
 81 |         fputs("info: drawHighlightBoxes call dispatched successfully.\n", stderr)
 82 |         fputs("      Overlays appear/disappear asynchronously on the main thread.\n", stderr)
 83 | 
 84 |         // 4. Encode the ResponseData to JSON
 85 |         fputs("info: Encoding traversal response to JSON...\n", stderr)
 86 |         let encoder = JSONEncoder()
 87 |         // Optionally make the output prettier
 88 |         // encoder.outputFormatting = [.prettyPrinted, .sortedKeys] // Uncomment for human-readable JSON
 89 |         let jsonData = try encoder.encode(responseData)
 90 | 
 91 |         // 5. Print JSON to standard output
 92 |         guard let jsonString = String(data: jsonData, encoding: .utf8) else {
 93 |             throw MacosUseSDKError.internalError("Failed to convert JSON data to UTF-8 string.")
 94 |         }
 95 |         print(jsonString) // Print JSON to stdout
 96 |         fputs("info: Successfully printed JSON response to stdout.\n", stderr)
 97 | 
 98 |         // 6. Keep the Main Thread Alive for UI Updates
 99 |         // IMPORTANT: Still need this for the visual highlights to appear/disappear
100 |         // We need to schedule this *after* the async work above has potentially returned.
101 |         let waitTime = highlightDuration + 1.0 // Wait a bit longer than the effect
102 |         fputs("info: Keeping the tool alive for \(waitTime) seconds to allow UI updates...\n", stderr)
103 |         // Use DispatchQueue.main.async to schedule the RunLoop wait on the main thread
104 |         DispatchQueue.main.async {
105 |             RunLoop.main.run(until: Date(timeIntervalSinceNow: waitTime))
106 |             fputs("info: Run loop finished. Tool exiting normally.\n", stderr)
107 |             exit(0) // Success
108 |         }
109 |         // Allow the Task itself to stay alive while the main thread waits
110 |          try await Task.sleep(nanoseconds: UInt64((waitTime + 0.1) * 1_000_000_000))
111 |          // Fallback exit if runloop doesn't trigger exit
112 |          exit(0)
113 | 
114 |     } catch let error as MacosUseSDKError {
115 |         // Specific SDK errors
116 |         fputs("❌ Error from MacosUseSDK: \(error.localizedDescription)\n", stderr)
117 |         exit(1)
118 |     } catch {
119 |         // Other errors (e.g., JSON encoding failure)
120 |         fputs("❌ An unexpected error occurred: \(error.localizedDescription)\n", stderr)
121 |         exit(1)
122 |     }
123 | }
124 | 
125 | // Keep the process alive so the Task can run
126 | RunLoop.main.run()
127 | 
128 | /*
129 | 
130 | swift run HighlightTraversalTool $(swift run AppOpenerTool Messages) --duration 5
131 | 
132 | */


--------------------------------------------------------------------------------
/Sources/InputControllerTool/main.swift:
--------------------------------------------------------------------------------
  1 | import Foundation
  2 | import CoreGraphics // For CGPoint, CGEventFlags
  3 | import MacosUseSDK // Import the library
  4 | 
  5 | // --- Start Time ---
  6 | let startTime = Date() // Record start time for the tool's execution
  7 | 
  8 | // --- Helper Function for Logging ---
  9 | // Tool-specific logging prefix
 10 | func log(_ message: String) {
 11 |     fputs("InputControllerTool: \(message)\n", stderr)
 12 | }
 13 | 
 14 | // --- Helper Function for Exiting ---
 15 | // Logs final time and exits
 16 | func finish(success: Bool, message: String? = nil) -> Never {
 17 |     if let msg = message {
 18 |         log(success ? "✅ Success: \(msg)" : "❌ Error: \(msg)")
 19 |     }
 20 | 
 21 |     // --- Calculate and Log Time ---
 22 |     let endTime = Date()
 23 |     let processingTime = endTime.timeIntervalSince(startTime)
 24 |     let formattedTime = String(format: "%.3f", processingTime)
 25 |     fputs("InputControllerTool: total execution time: \(formattedTime) seconds\n", stderr)
 26 |     // --- End Time Logging ---
 27 | 
 28 |     exit(success ? 0 : 1)
 29 | }
 30 | 
 31 | 
 32 | // --- Argument Parsing and Main Logic ---
 33 | let arguments = CommandLine.arguments
 34 | let scriptName = arguments.first ?? "InputControllerTool"
 35 | 
 36 | // Define usage instructions
 37 | let usage = """
 38 | usage: \(scriptName) <action> [options...]
 39 | 
 40 | actions:
 41 |   keypress <key_name_or_code>[+modifier...]   Simulate pressing a key (e.g., 'return', 'a', 'f1', 'cmd+c', 'shift+tab').
 42 |                                              Supported modifiers: cmd, shift, opt, ctrl, fn.
 43 |   click <x> <y>                 Simulate a left mouse click at screen coordinates.
 44 |   doubleclick <x> <y>           Simulate a left mouse double-click at screen coordinates.
 45 |   rightclick <x> <y>            Simulate a right mouse click at screen coordinates.
 46 |   mousemove <x> <y>             Move the mouse cursor to screen coordinates.
 47 |   writetext <text_to_type>      Simulate typing a string of text.
 48 | 
 49 | Examples:
 50 |   \(scriptName) keypress enter
 51 |   \(scriptName) keypress cmd+shift+4
 52 |   \(scriptName) click 100 250
 53 |   \(scriptName) writetext "Hello World!"
 54 | """
 55 | 
 56 | // Check for minimum argument count
 57 | guard arguments.count > 1 else {
 58 |     fputs(usage, stderr)
 59 |     finish(success: false, message: "No action specified.")
 60 | }
 61 | 
 62 | let action = arguments[1].lowercased()
 63 | log("Action: \(action)")
 64 | 
 65 | // --- Action Handling ---
 66 | do {
 67 |     switch action {
 68 |     case "keypress":
 69 |         guard arguments.count == 3 else {
 70 |             throw MacosUseSDKError.inputInvalidArgument("'keypress' requires exactly one argument: <key_name_or_code_with_modifiers>\n\(usage)")
 71 |         }
 72 |         let keyCombo = arguments[2]
 73 |         log("Key Combo Argument: '\(keyCombo)'")
 74 |         var keyCode: CGKeyCode?
 75 |         var flags: CGEventFlags = []
 76 | 
 77 |         // Parse modifiers (cmd, shift, opt, ctrl, fn)
 78 |         let parts = keyCombo.split(separator: "+").map { String($0).trimmingCharacters(in: .whitespacesAndNewlines).lowercased() }
 79 | 
 80 |         // The last part is the key
 81 |         guard let keyPart = parts.last else {
 82 |             throw MacosUseSDKError.inputInvalidArgument("Invalid key combination format: '\(keyCombo)'")
 83 |         }
 84 |         log("Parsing key part: '\(keyPart)'")
 85 |         keyCode = MacosUseSDK.mapKeyNameToKeyCode(keyPart) // Use library function
 86 | 
 87 |         // Process modifier parts
 88 |         if parts.count > 1 {
 89 |              log("Parsing modifiers: \(parts.dropLast().joined(separator: ", "))")
 90 |             for i in 0..<(parts.count - 1) {
 91 |                 switch parts[i] {
 92 |                     case "cmd", "command": flags.insert(.maskCommand)
 93 |                     case "shift": flags.insert(.maskShift)
 94 |                     case "opt", "option", "alt": flags.insert(.maskAlternate)
 95 |                     case "ctrl", "control": flags.insert(.maskControl)
 96 |                     case "fn", "function": flags.insert(.maskSecondaryFn) // Note: 'fn' might need special handling or accessibility settings
 97 |                     default: throw MacosUseSDKError.inputInvalidArgument("Unknown modifier: '\(parts[i])' in '\(keyCombo)'")
 98 |                 }
 99 |             }
100 |         }
101 | 
102 | 
103 |         guard let finalKeyCode = keyCode else {
104 |             throw MacosUseSDKError.inputInvalidArgument("Unknown key name or invalid key code: '\(keyPart)' in '\(keyCombo)'")
105 |         }
106 | 
107 |         log("Calling pressKey library function...")
108 |         try MacosUseSDK.pressKey(keyCode: finalKeyCode, flags: flags)
109 |         finish(success: true, message: "Key press '\(keyCombo)' simulated.")
110 | 
111 |     case "click", "doubleclick", "rightclick", "mousemove":
112 |         guard arguments.count == 4 else {
113 |              throw MacosUseSDKError.inputInvalidArgument("'\(action)' requires exactly two arguments: <x> <y>\n\(usage)")
114 |         }
115 |         guard let x = Double(arguments[2]), let y = Double(arguments[3]) else {
116 |             throw MacosUseSDKError.inputInvalidArgument("Invalid coordinates for '\(action)'. x and y must be numbers.")
117 |         }
118 |         let point = CGPoint(x: x, y: y)
119 |         log("Coordinates: (\(x), \(y))")
120 | 
121 |         log("Calling \(action) library function...")
122 |         switch action {
123 |             case "click":       try MacosUseSDK.clickMouse(at: point)
124 |             case "doubleclick": try MacosUseSDK.doubleClickMouse(at: point)
125 |             case "rightclick":  try MacosUseSDK.rightClickMouse(at: point)
126 |             case "mousemove":   try MacosUseSDK.moveMouse(to: point)
127 |             default: break // Should not happen
128 |         }
129 |         finish(success: true, message: "\(action) simulated at (\(x), \(y)).")
130 | 
131 | 
132 |     case "writetext":
133 |          guard arguments.count == 3 else {
134 |             throw MacosUseSDKError.inputInvalidArgument("'writetext' requires exactly one argument: <text_to_type>\n\(usage)")
135 |         }
136 |         let text = arguments[2]
137 |         log("Text Argument: \"\(text)\"")
138 |         log("Calling writeText library function...")
139 |         try MacosUseSDK.writeText(text)
140 |         finish(success: true, message: "Text writing simulated.")
141 | 
142 |     default:
143 |         fputs(usage, stderr)
144 |         throw MacosUseSDKError.inputInvalidArgument("Unknown action '\(action)'")
145 |     }
146 | 
147 | } catch let error as MacosUseSDKError {
148 |     // Handle specific SDK errors
149 |     finish(success: false, message: "MacosUseSDK Error: \(error.localizedDescription)")
150 | } catch {
151 |     // Handle other unexpected errors
152 |      finish(success: false, message: "An unexpected error occurred: \(error.localizedDescription)")
153 | }
154 | 
155 | // Should not be reached due to finish() calls, but satisfies the compiler
156 | exit(0)
157 | 
158 | /*
159 | # Example: Open Calculator and type 2*3=
160 | swift run AppOpenerTool Calculator
161 | # (Wait a moment or use the PID from above if needed)
162 | swift run InputControllerTool writetext "2*3="
163 | */
164 | 


--------------------------------------------------------------------------------
/Sources/MacosUseSDK/AccessibilityTraversal.swift:
--------------------------------------------------------------------------------
  1 | // The Swift Programming Language
  2 | // https://docs.swift.org/swift-book
  3 | 
  4 | import AppKit // For NSWorkspace, NSRunningApplication, NSApplication
  5 | import Foundation // For basic types, JSONEncoder, Date
  6 | import ApplicationServices // For Accessibility API (AXUIElement, etc.)
  7 | 
  8 | // --- Error Enum ---
  9 | public enum MacosUseSDKError: Error, LocalizedError {
 10 |     case accessibilityDenied
 11 |     case appNotFound(pid: Int32)
 12 |     case jsonEncodingFailed(Error)
 13 |     case internalError(String) // For unexpected issues
 14 | 
 15 |     public var errorDescription: String? {
 16 |         switch self {
 17 |         case .accessibilityDenied:
 18 |             return "Accessibility access is denied. Please grant permissions in System Settings > Privacy & Security > Accessibility."
 19 |         case .appNotFound(let pid):
 20 |             return "No running application found with PID \(pid)."
 21 |         case .jsonEncodingFailed(let underlyingError):
 22 |             return "Failed to encode response to JSON: \(underlyingError.localizedDescription)"
 23 |         case .internalError(let message):
 24 |             return "Internal SDK error: \(message)"
 25 |         }
 26 |     }
 27 | }
 28 | 
 29 | 
 30 | // --- Public Data Structures for API Response ---
 31 | 
 32 | public struct ElementData: Codable, Hashable, Sendable {
 33 |     public var role: String
 34 |     public var text: String?
 35 |     public var x: Double?
 36 |     public var y: Double?
 37 |     public var width: Double?
 38 |     public var height: Double?
 39 | 
 40 |     // Implement Hashable for use in Set
 41 |     public func hash(into hasher: inout Hasher) {
 42 |         hasher.combine(role)
 43 |         hasher.combine(text)
 44 |         hasher.combine(x)
 45 |         hasher.combine(y)
 46 |         hasher.combine(width)
 47 |         hasher.combine(height)
 48 |     }
 49 |     public static func == (lhs: ElementData, rhs: ElementData) -> Bool {
 50 |         lhs.role == rhs.role &&
 51 |         lhs.text == rhs.text &&
 52 |         lhs.x == rhs.x &&
 53 |         lhs.y == rhs.y &&
 54 |         lhs.width == rhs.width &&
 55 |         lhs.height == rhs.height
 56 |     }
 57 | }
 58 | 
 59 | public struct Statistics: Codable, Sendable {
 60 |     public var count: Int = 0
 61 |     public var excluded_count: Int = 0
 62 |     public var excluded_non_interactable: Int = 0
 63 |     public var excluded_no_text: Int = 0
 64 |     public var with_text_count: Int = 0
 65 |     public var without_text_count: Int = 0
 66 |     public var visible_elements_count: Int = 0
 67 |     public var role_counts: [String: Int] = [:]
 68 | }
 69 | 
 70 | public struct ResponseData: Codable, Sendable {
 71 |     public let app_name: String
 72 |     public var elements: [ElementData]
 73 |     public var stats: Statistics
 74 |     public let processing_time_seconds: String
 75 | }
 76 | 
 77 | 
 78 | // --- Main Public Function ---
 79 | 
 80 | /// Traverses the accessibility tree of an application specified by its PID.
 81 | ///
 82 | /// - Parameter pid: The Process ID (PID) of the target application.
 83 | /// - Parameter onlyVisibleElements: If true, only collects elements with valid position and size. Defaults to false.
 84 | /// - Returns: A `ResponseData` struct containing the collected elements, statistics, and timing information.
 85 | /// - Throws: `MacosUseSDKError` if accessibility is denied, the app is not found, or an internal error occurs.
 86 | public func traverseAccessibilityTree(pid: Int32, onlyVisibleElements: Bool = false) throws -> ResponseData {
 87 |     let operation = AccessibilityTraversalOperation(pid: pid, onlyVisibleElements: onlyVisibleElements)
 88 |     return try operation.executeTraversal()
 89 | }
 90 | 
 91 | 
 92 | // --- Internal Implementation Detail ---
 93 | 
 94 | // Class to encapsulate the state and logic of a single traversal operation
 95 | fileprivate class AccessibilityTraversalOperation {
 96 |     let pid: Int32
 97 |     let onlyVisibleElements: Bool
 98 |     var visitedElements: Set<AXUIElement> = []
 99 |     var collectedElements: Set<ElementData> = []
100 |     var statistics: Statistics = Statistics()
101 |     var stepStartTime: Date = Date()
102 |     let maxDepth = 100
103 | 
104 |     // Define roles considered non-interactable by default
105 |     let nonInteractableRoles: Set<String> = [
106 |         "AXGroup", "AXStaticText", "AXUnknown", "AXSeparator",
107 |         "AXHeading", "AXLayoutArea", "AXHelpTag", "AXGrowArea",
108 |         "AXOutline", "AXScrollArea", "AXSplitGroup", "AXSplitter",
109 |         "AXToolbar", "AXDisclosureTriangle",
110 |     ]
111 | 
112 |     init(pid: Int32, onlyVisibleElements: Bool) {
113 |         self.pid = pid
114 |         self.onlyVisibleElements = onlyVisibleElements
115 |     }
116 | 
117 |     // --- Main Execution Method ---
118 |     func executeTraversal() throws -> ResponseData {
119 |         let overallStartTime = Date()
120 |         fputs("info: starting traversal for pid: \(pid) (Visible Only: \(onlyVisibleElements))\n", stderr)
121 |         stepStartTime = Date() // Initialize step timer
122 | 
123 |         // 1. Accessibility Check
124 |         fputs("info: checking accessibility permissions...\n", stderr)
125 |         let checkOptions = ["AXTrustedCheckOptionPrompt": kCFBooleanTrue] as CFDictionary
126 |         let isTrusted = AXIsProcessTrustedWithOptions(checkOptions)
127 | 
128 |         if !isTrusted {
129 |             fputs("❌ error: accessibility access is denied.\n", stderr)
130 |             fputs("       please grant permissions in system settings > privacy & security > accessibility.\n", stderr)
131 |             throw MacosUseSDKError.accessibilityDenied
132 |         }
133 |         logStepCompletion("checking accessibility permissions (granted)")
134 | 
135 |         // 2. Find Application by PID and Create AXUIElement
136 |         guard let runningApp = NSRunningApplication(processIdentifier: pid) else {
137 |             fputs("error: no running application found with pid \(pid).\n", stderr)
138 |             throw MacosUseSDKError.appNotFound(pid: pid)
139 |         }
140 |         let targetAppName = runningApp.localizedName ?? "App (PID: \(pid))"
141 |         let appElement = AXUIElementCreateApplication(pid)
142 |         // logStepCompletion("finding application '\(targetAppName)'") // Logging step completion implicitly here
143 | 
144 |         // 3. Activate App if needed
145 |         var didActivate = false
146 |         if runningApp.activationPolicy == NSApplication.ActivationPolicy.regular {
147 |             if !runningApp.isActive {
148 |                 // fputs("info: activating application '\(targetAppName)'...\n", stderr) // Optional start log
149 |                 runningApp.activate()
150 |                 // Consider adding a small delay or a check loop if activation timing is critical
151 |                 // Thread.sleep(forTimeInterval: 0.2)
152 |                 didActivate = true
153 |             }
154 |         }
155 |         if didActivate {
156 |             logStepCompletion("activating application '\(targetAppName)'")
157 |         }
158 | 
159 |         // 4. Start Traversal
160 |         // fputs("info: starting accessibility tree traversal...\n", stderr) // Optional start log
161 |         walkElementTree(element: appElement, depth: 0)
162 |         logStepCompletion("traversing accessibility tree (\(collectedElements.count) elements collected)")
163 | 
164 |         // 5. Process Results
165 |         // fputs("info: sorting elements...\n", stderr) // Optional start log
166 |         let sortedElements = collectedElements.sorted {
167 |             let y0 = $0.y ?? Double.greatestFiniteMagnitude
168 |             let y1 = $1.y ?? Double.greatestFiniteMagnitude
169 |             if y0 != y1 { return y0 < y1 }
170 |             let x0 = $0.x ?? Double.greatestFiniteMagnitude
171 |             let x1 = $1.x ?? Double.greatestFiniteMagnitude
172 |             return x0 < x1
173 |         }
174 |         // logStepCompletion("sorting \(sortedElements.count) elements") // Log implicitly
175 | 
176 |         // Set the final count statistic
177 |         statistics.count = sortedElements.count
178 | 
179 |         // --- Calculate Total Time ---
180 |         let overallEndTime = Date()
181 |         let totalProcessingTime = overallEndTime.timeIntervalSince(overallStartTime)
182 |         let formattedTime = String(format: "%.2f", totalProcessingTime)
183 |         fputs("info: total execution time: \(formattedTime) seconds\n", stderr)
184 | 
185 |         // 6. Prepare Response
186 |         let response = ResponseData(
187 |             app_name: targetAppName,
188 |             elements: sortedElements,
189 |             stats: statistics,
190 |             processing_time_seconds: formattedTime
191 |         )
192 | 
193 |         return response
194 |         // JSON encoding will be handled by the caller of the library function if needed
195 |     }
196 | 
197 | 
198 |     // --- Helper Functions (now methods of the class) ---
199 | 
200 |     // Safely copy an attribute value
201 |     func copyAttributeValue(element: AXUIElement, attribute: String) -> CFTypeRef? {
202 |         var value: CFTypeRef?
203 |         let result = AXUIElementCopyAttributeValue(element, attribute as CFString, &value)
204 |         if result == .success {
205 |             return value
206 |         } else if result != .attributeUnsupported && result != .noValue {
207 |             // fputs("warning: could not get attribute '\(attribute)' for element: error \(result.rawValue)\n", stderr)
208 |         }
209 |         return nil
210 |     }
211 | 
212 |     // Extract string value
213 |     func getStringValue(_ value: CFTypeRef?) -> String? {
214 |         guard let value = value else { return nil }
215 |         let typeID = CFGetTypeID(value)
216 |         if typeID == CFStringGetTypeID() {
217 |             let cfString = value as! CFString
218 |             return cfString as String
219 |         } else if typeID == AXValueGetTypeID() {
220 |             // AXValue conversion is complex, return nil for generic string conversion
221 |             return nil
222 |         }
223 |         return nil
224 |     }
225 | 
226 |     // Extract CGPoint
227 |     func getCGPointValue(_ value: CFTypeRef?) -> CGPoint? {
228 |         guard let value = value, CFGetTypeID(value) == AXValueGetTypeID() else { return nil }
229 |         let axValue = value as! AXValue
230 |         var pointValue = CGPoint.zero
231 |         if AXValueGetValue(axValue, .cgPoint, &pointValue) {
232 |             return pointValue
233 |         }
234 |         // fputs("warning: failed to extract cgpoint from axvalue.\n", stderr)
235 |         return nil
236 |     }
237 | 
238 |     // Extract CGSize
239 |     func getCGSizeValue(_ value: CFTypeRef?) -> CGSize? {
240 |         guard let value = value, CFGetTypeID(value) == AXValueGetTypeID() else { return nil }
241 |         let axValue = value as! AXValue
242 |         var sizeValue = CGSize.zero
243 |         if AXValueGetValue(axValue, .cgSize, &sizeValue) {
244 |             return sizeValue
245 |         }
246 |         // fputs("warning: failed to extract cgsize from axvalue.\n", stderr)
247 |         return nil
248 |     }
249 | 
250 |     // Extract attributes, text, and geometry
251 |     func extractElementAttributes(element: AXUIElement) -> (role: String, roleDesc: String?, text: String?, allTextParts: [String], position: CGPoint?, size: CGSize?) {
252 |         var role = "AXUnknown"
253 |         var roleDesc: String? = nil
254 |         var textParts: [String] = []
255 |         var position: CGPoint? = nil
256 |         var size: CGSize? = nil
257 | 
258 |         if let roleValue = copyAttributeValue(element: element, attribute: kAXRoleAttribute as String) {
259 |             role = getStringValue(roleValue) ?? "AXUnknown"
260 |         }
261 |         if let roleDescValue = copyAttributeValue(element: element, attribute: kAXRoleDescriptionAttribute as String) {
262 |             roleDesc = getStringValue(roleDescValue)
263 |         }
264 | 
265 |         let textAttributes = [
266 |             kAXValueAttribute as String, kAXTitleAttribute as String, kAXDescriptionAttribute as String,
267 |             "AXLabel", "AXHelp",
268 |         ]
269 |         for attr in textAttributes {
270 |             if let attrValue = copyAttributeValue(element: element, attribute: attr),
271 |                let text = getStringValue(attrValue),
272 |                !text.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
273 |                 textParts.append(text)
274 |             }
275 |         }
276 |         let combinedText = textParts.isEmpty ? nil : textParts.joined(separator: " ").trimmingCharacters(in: .whitespacesAndNewlines)
277 | 
278 |         if let posValue = copyAttributeValue(element: element, attribute: kAXPositionAttribute as String) {
279 |             position = getCGPointValue(posValue)
280 |             // if position == nil { fputs("debug: failed to get position for element (role: \(role))\n", stderr) }
281 |         } else {
282 |             // fputs("debug: position attribute ('\(kAXPositionAttribute)') not found or unsupported for element (role: \(role))\n", stderr)
283 |         }
284 | 
285 |         if let sizeValue = copyAttributeValue(element: element, attribute: kAXSizeAttribute as String) {
286 |             size = getCGSizeValue(sizeValue)
287 |              // if size == nil { fputs("debug: failed to get size for element (role: \(role))\n", stderr) }
288 |         } else {
289 |              // fputs("debug: size attribute ('\(kAXSizeAttribute)') not found or unsupported for element (role: \(role))\n", stderr)
290 |         }
291 | 
292 |         return (role, roleDesc, combinedText, textParts, position, size)
293 |     }
294 | 
295 |     // Recursive traversal function (now a method)
296 |     func walkElementTree(element: AXUIElement, depth: Int) {
297 |         // 1. Check for cycles and depth limit
298 |         if visitedElements.contains(element) || depth > maxDepth {
299 |             // fputs("debug: skipping visited or too deep element (depth: \(depth))\n", stderr)
300 |             return
301 |         }
302 |         visitedElements.insert(element)
303 | 
304 |         // 2. Process the current element
305 |         let (role, roleDesc, combinedText, _, position, size) = extractElementAttributes(element: element)
306 |         let hasText = combinedText != nil && !combinedText!.isEmpty
307 |         let isNonInteractable = nonInteractableRoles.contains(role)
308 |         let roleWithoutAX = role.starts(with: "AX") ? String(role.dropFirst(2)) : role
309 | 
310 |         statistics.role_counts[role, default: 0] += 1
311 | 
312 |         // 3. Determine Geometry and Visibility
313 |         var finalX: Double? = nil
314 |         var finalY: Double? = nil
315 |         var finalWidth: Double? = nil
316 |         var finalHeight: Double? = nil
317 |         if let p = position, let s = size, s.width > 0 || s.height > 0 {
318 |             finalX = Double(p.x)
319 |             finalY = Double(p.y)
320 |             finalWidth = s.width > 0 ? Double(s.width) : nil
321 |             finalHeight = s.height > 0 ? Double(s.height) : nil
322 |         }
323 |         let isGeometricallyVisible = finalX != nil && finalY != nil && finalWidth != nil && finalHeight != nil
324 | 
325 |         // Always update the visible_elements_count stat based on geometry, regardless of collection
326 |         if isGeometricallyVisible {
327 |             statistics.visible_elements_count += 1
328 |         }
329 | 
330 |         // 4. Apply Filtering Logic
331 |         var displayRole = role
332 |         if let desc = roleDesc, !desc.isEmpty, !desc.elementsEqual(roleWithoutAX) {
333 |             displayRole = "\(role) (\(desc))"
334 |         }
335 | 
336 |         // Determine if the element passes the original filter criteria
337 |         let passesOriginalFilter = !isNonInteractable || hasText
338 | 
339 |         // Determine if the element should be collected based on the new flag
340 |         let shouldCollectElement = passesOriginalFilter && (!onlyVisibleElements || isGeometricallyVisible)
341 | 
342 |         if shouldCollectElement {
343 |             let elementData = ElementData(
344 |                 role: displayRole, text: combinedText,
345 |                 x: finalX, y: finalY, width: finalWidth, height: finalHeight
346 |             )
347 | 
348 |             if collectedElements.insert(elementData).inserted {
349 |                 // Log addition (optional)
350 |                 // let geometryStatus = isGeometricallyVisible ? "visible" : "not_visible"
351 |                 // fputs("debug: + collect [\(geometryStatus)] | r: \(displayRole) | t: '\(combinedText ?? "nil")'\n", stderr)
352 | 
353 |                 // Update text counts only for collected elements
354 |                 if hasText { statistics.with_text_count += 1 }
355 |                 else { statistics.without_text_count += 1 }
356 |             } else {
357 |                 // Log duplicate (optional)
358 |                 // fputs("debug: = skip duplicate | r: \(displayRole) | t: '\(combinedText ?? "nil")'\n", stderr)
359 |             }
360 |         } else {
361 |             // Log exclusion (MODIFIED logic)
362 |             var reasons: [String] = []
363 |             if !passesOriginalFilter {
364 |                  if isNonInteractable { reasons.append("non-interactable role '\(role)'") }
365 |                  if !hasText { reasons.append("no text") }
366 |             }
367 |             // Add visibility reason only if it was the deciding factor
368 |             if passesOriginalFilter && onlyVisibleElements && !isGeometricallyVisible {
369 |                 reasons.append("not visible")
370 |             }
371 |             // fputs("debug: - exclude | r: \(role) | reason(s): \(reasons.joined(separator: ", "))\n", stderr)
372 | 
373 |             // Update exclusion counts
374 |             statistics.excluded_count += 1
375 |             // Note: The specific exclusion reasons (non-interactable, no-text) might be slightly less precise
376 |             // if an element is excluded *only* because it's invisible, but this keeps the stats simple.
377 |             // We can refine this if needed.
378 |             if isNonInteractable { statistics.excluded_non_interactable += 1 }
379 |             if !hasText { statistics.excluded_no_text += 1 }
380 |         }
381 | 
382 |         // 5. Recursively traverse children, windows, main window
383 |         // a) Windows
384 |         if let windowsValue = copyAttributeValue(element: element, attribute: kAXWindowsAttribute as String) {
385 |             if let windowsArray = windowsValue as? [AXUIElement] {
386 |                 for windowElement in windowsArray where !visitedElements.contains(windowElement) {
387 |                     walkElementTree(element: windowElement, depth: depth + 1)
388 |                 }
389 |             } else if CFGetTypeID(windowsValue) == CFArrayGetTypeID() {
390 |                 // fputs("warning: attribute \(kAXWindowsAttribute) was CFArray but failed bridge to [AXUIElement]\n", stderr)
391 |             }
392 |         }
393 | 
394 |         // b) Main Window
395 |         if let mainWindowValue = copyAttributeValue(element: element, attribute: kAXMainWindowAttribute as String) {
396 |             if CFGetTypeID(mainWindowValue) == AXUIElementGetTypeID() {
397 |                  let mainWindowElement = mainWindowValue as! AXUIElement
398 |                  if !visitedElements.contains(mainWindowElement) {
399 |                      walkElementTree(element: mainWindowElement, depth: depth + 1)
400 |                  }
401 |             } else {
402 |                  // fputs("warning: attribute \(kAXMainWindowAttribute) was not an AXUIElement\n", stderr)
403 |             }
404 |         }
405 | 
406 |         // c) Regular Children
407 |         if let childrenValue = copyAttributeValue(element: element, attribute: kAXChildrenAttribute as String) {
408 |             if let childrenArray = childrenValue as? [AXUIElement] {
409 |                 for childElement in childrenArray where !visitedElements.contains(childElement) {
410 |                     walkElementTree(element: childElement, depth: depth + 1)
411 |                 }
412 |             } else if CFGetTypeID(childrenValue) == CFArrayGetTypeID() {
413 |                 // fputs("warning: attribute \(kAXChildrenAttribute) was CFArray but failed bridge to [AXUIElement]\n", stderr)
414 |             }
415 |         }
416 |     }
417 | 
418 | 
419 |     // Helper function logs duration of the step just completed
420 |     func logStepCompletion(_ stepDescription: String) {
421 |         let endTime = Date()
422 |         let duration = endTime.timeIntervalSince(stepStartTime)
423 |         let durationStr = String(format: "%.3f", duration)
424 |         fputs("info: [\(durationStr)s] finished '\(stepDescription)'\n", stderr)
425 |         stepStartTime = endTime // Reset start time for the next step
426 |     }
427 | } // End of AccessibilityTraversalOperation class
428 | 


--------------------------------------------------------------------------------
/Sources/MacosUseSDK/ActionCoordinator.swift:
--------------------------------------------------------------------------------
  1 | import Foundation
  2 | import CoreGraphics
  3 | import AppKit // For NSWorkspace, NSRunningApplication, CGPoint, etc.
  4 | 
  5 | // --- Enums and Structs for Orchestration ---
  6 | 
  7 | /// Defines the specific type of user input simulation.
  8 | public enum InputAction: Sendable {
  9 |     case click(point: CGPoint)
 10 |     case doubleClick(point: CGPoint)
 11 |     case rightClick(point: CGPoint)
 12 |     case type(text: String)
 13 |     // Use keyName for easier specification, maps to CGKeyCode internally
 14 |     case press(keyName: String, flags: CGEventFlags = [])
 15 |     case move(to: CGPoint)
 16 | }
 17 | 
 18 | /// Defines the main action to be performed.
 19 | public enum PrimaryAction: Sendable {
 20 |     // Identifier can be name, bundleID, or path
 21 |     case open(identifier: String)
 22 |     // Encapsulates various input types
 23 |     case input(action: InputAction)
 24 |     // If only traversal is needed, specify PID via options
 25 |     case traverseOnly
 26 | }
 27 | 
 28 | /// Configuration options for the orchestrated action.
 29 | public struct ActionOptions: Sendable {
 30 |     /// Perform traversal before the primary action. Required if `showDiff` is true.
 31 |     public var traverseBefore: Bool = false
 32 |     /// Perform traversal after the primary action. Required if `showDiff` is true.
 33 |     public var traverseAfter: Bool = false
 34 |     /// Calculate and return the difference between before/after traversals. Implies `traverseBefore` and `traverseAfter`.
 35 |     public var showDiff: Bool = false
 36 |     /// Filter traversals to only include elements with position and size > 0.
 37 |     public var onlyVisibleElements: Bool = false
 38 |     /// Show visual feedback for input actions (e.g., click pulse, typing caption) AND highlight elements found in the *final* traversal.
 39 |     public var showAnimation: Bool = true // Consolidated flag
 40 |     /// Duration for input animations and element highlighting.
 41 |     public var animationDuration: Double = 0.8
 42 |     /// Explicitly provide the PID for traversal if the primary action isn't `open`. Required if traversing without opening.
 43 |     public var pidForTraversal: pid_t? = nil
 44 |     /// Delay in seconds *after* the primary action completes, but *before* the 'after' traversal starts.
 45 |     public var delayAfterAction: Double = 0.2
 46 | 
 47 |     // Ensure consistency if showDiff is enabled
 48 |     public func validated() -> ActionOptions {
 49 |         var options = self
 50 |         if options.showDiff {
 51 |             options.traverseBefore = true
 52 |             options.traverseAfter = true
 53 |         }
 54 |         return options
 55 |     }
 56 | 
 57 |     public init(traverseBefore: Bool = false, traverseAfter: Bool = false, showDiff: Bool = false, onlyVisibleElements: Bool = false, showAnimation: Bool = true, animationDuration: Double = 0.8, pidForTraversal: pid_t? = nil, delayAfterAction: Double = 0.2) {
 58 |         self.traverseBefore = traverseBefore
 59 |         self.traverseAfter = traverseAfter
 60 |         self.showDiff = showDiff
 61 |         self.onlyVisibleElements = onlyVisibleElements
 62 |         self.showAnimation = showAnimation // Use the new flag
 63 |         self.animationDuration = animationDuration
 64 |         self.pidForTraversal = pidForTraversal
 65 |         self.delayAfterAction = delayAfterAction
 66 |     }
 67 | }
 68 | 
 69 | 
 70 | /// Contains the results of the orchestrated action.
 71 | public struct ActionResult: Codable, Sendable {
 72 |     /// Result from the `openApplication` action, if performed.
 73 |     public var openResult: AppOpenerResult?
 74 |     /// The PID used for traversals. Determined by `open` or provided in options.
 75 |     public var traversalPid: pid_t?
 76 |     /// Traversal data captured *before* the primary action.
 77 |     public var traversalBefore: ResponseData?
 78 |     /// Traversal data captured *after* the primary action.
 79 |     public var traversalAfter: ResponseData?
 80 |     /// The calculated difference between traversals, if requested.
 81 |     public var traversalDiff: TraversalDiff?
 82 |     /// Any error encountered during the primary action (open/input). Traversal errors are handled internally or thrown.
 83 |     public var primaryActionError: String?
 84 |     /// Any error encountered during the 'before' traversal.
 85 |     public var traversalBeforeError: String?
 86 |     /// Any error encountered during the 'after' traversal.
 87 |     public var traversalAfterError: String?
 88 | 
 89 |      // Default initializer
 90 |      public init(openResult: AppOpenerResult? = nil, traversalPid: pid_t? = nil, traversalBefore: ResponseData? = nil, traversalAfter: ResponseData? = nil, traversalDiff: TraversalDiff? = nil, primaryActionError: String? = nil, traversalBeforeError: String? = nil, traversalAfterError: String? = nil) {
 91 |          self.openResult = openResult
 92 |          self.traversalPid = traversalPid
 93 |          self.traversalBefore = traversalBefore
 94 |          self.traversalAfter = traversalAfter
 95 |          self.traversalDiff = traversalDiff
 96 |          self.primaryActionError = primaryActionError
 97 |          self.traversalBeforeError = traversalBeforeError
 98 |          self.traversalAfterError = traversalAfterError
 99 |      }
100 | }
101 | 
102 | 
103 | // --- Action Coordinator Logic ---
104 | 
105 | /// Orchestrates application opening, input simulation, and accessibility traversal.
106 | /// Requires running on the main actor due to UI interactions.
107 | ///
108 | /// - Parameters:
109 | ///   - action: The primary action to perform (`PrimaryAction`).
110 | ///   - options: Configuration for the action execution (`ActionOptions`).
111 | /// - Returns: An `ActionResult` containing the results of the steps performed.
112 | /// - Throws: Can throw errors from underlying SDK functions, particularly during setup or unrecoverable failures.
113 | @MainActor
114 | public func performAction(
115 |     action: PrimaryAction,
116 |     optionsInput: ActionOptions = ActionOptions()
117 | ) async -> ActionResult { // Changed to return ActionResult directly, errors are stored within it
118 |     let options = optionsInput.validated() // Ensure options are consistent (e.g., showDiff implies traversals)
119 |     var result = ActionResult()
120 |     var effectivePid: pid_t? = options.pidForTraversal
121 |     var primaryActionError: Error? = nil // Temporary storage for Error objects
122 |     var primaryActionExecuted: Bool = false // Flag to track if primary action ran
123 | 
124 |     fputs("info: [Coordinator] Starting action: \(action) with options: \(options)\n", stderr)
125 | 
126 |     // --- 1. Determine Target PID & Execute Open Action ---
127 |     if case .open(let identifier) = action {
128 |         fputs("info: [Coordinator] Primary action is 'open', attempting to get PID for '\(identifier)'...\n", stderr)
129 |         do {
130 |             let openRes = try await openApplication(identifier: identifier)
131 |             result.openResult = openRes
132 |             effectivePid = openRes.pid
133 |             fputs("info: [Coordinator] App opened successfully. PID: \(effectivePid!).\n", stderr)
134 |             primaryActionExecuted = true // Mark 'open' as executed
135 |             // REMOVED Delay specific to open
136 |         } catch {
137 |             fputs("error: [Coordinator] Failed to open application '\(identifier)': \(error.localizedDescription)\n", stderr)
138 |             primaryActionError = error
139 |              if effectivePid == nil {
140 |                  result.primaryActionError = error.localizedDescription
141 |                  fputs("warning: [Coordinator] Cannot proceed with PID-dependent steps (traversal) due to open failure and no provided PID.\n", stderr)
142 |                  return result
143 |              } else {
144 |                  fputs("warning: [Coordinator] Open failed, but continuing with provided PID \(effectivePid!).\n", stderr)
145 |              }
146 |         }
147 |     }
148 | 
149 |     result.traversalPid = effectivePid
150 | 
151 |     // --- Check if PID is available for traversal ---
152 |     guard let pid = effectivePid, (options.traverseBefore || options.traverseAfter || options.showAnimation) else {
153 |         if options.traverseBefore || options.traverseAfter || options.showAnimation {
154 |             fputs("warning: [Coordinator] Traversal or animation requested, but no PID could be determined (app open failed or PID not provided).\n", stderr)
155 |              if options.traverseBefore { result.traversalBeforeError = "PID unavailable" }
156 |              if options.traverseAfter { result.traversalAfterError = "PID unavailable" }
157 |         } else {
158 |              fputs("info: [Coordinator] No PID determined and no traversal/animation requested. Proceeding with primary action only (if applicable).\n", stderr)
159 |         }
160 |         // If primary action was *not* open, execute it now if PID wasn't available/needed
161 |         if case .input(let inputAction) = action {
162 |              fputs("info: [Coordinator] Executing primary input action (no PID context available/needed for traversal)...\n", stderr)
163 |             do {
164 |                  try await executeInputAction(inputAction, options: options)
165 |                  primaryActionExecuted = true // Mark 'input' as executed
166 |              } catch {
167 |                  fputs("error: [Coordinator] Failed to execute input action: \(error.localizedDescription)\n", stderr)
168 |                  primaryActionError = error
169 |              }
170 |          } else if case .traverseOnly = action {
171 |              // Nothing to execute, no action here. primaryActionExecuted remains false.
172 |          }
173 | 
174 |          // Apply generic delay if an action was executed *and* a delay is set,
175 |          // even if no traversal follows (though less common use case).
176 |          if primaryActionExecuted && options.delayAfterAction > 0 {
177 |              fputs("info: [Coordinator] Primary action finished. Applying delay: \(options.delayAfterAction)s (before exiting due to no PID/traversal/animation)\n", stderr)
178 |              try? await Task.sleep(nanoseconds: UInt64(options.delayAfterAction * 1_000_000_000))
179 |          }
180 | 
181 |         result.primaryActionError = primaryActionError?.localizedDescription
182 |          return result
183 |     }
184 | 
185 |      fputs("info: [Coordinator] Effective PID for subsequent steps: \(pid)\n", stderr)
186 | 
187 |     // --- 2. Traverse Before ---
188 |     if options.traverseBefore {
189 |         fputs("info: [Coordinator] Performing pre-action traversal for PID \(pid)...\n", stderr)
190 |         do {
191 |             result.traversalBefore = try traverseAccessibilityTree(pid: pid, onlyVisibleElements: options.onlyVisibleElements)
192 |             fputs("info: [Coordinator] Pre-action traversal complete. Elements: \(result.traversalBefore?.elements.count ?? 0)\n", stderr)
193 |         } catch {
194 |             fputs("error: [Coordinator] Pre-action traversal failed: \(error.localizedDescription)\n", stderr)
195 |             result.traversalBeforeError = error.localizedDescription
196 |         }
197 |     }
198 | 
199 |     // --- 3. Execute Primary Input Action (if not 'open' or 'traverseOnly') ---
200 |     if case .input(let inputAction) = action {
201 |         fputs("info: [Coordinator] Executing primary input action...\n", stderr)
202 |         do {
203 |             try await executeInputAction(inputAction, options: options)
204 |             primaryActionExecuted = true // Mark 'input' as executed
205 |         } catch {
206 |             fputs("error: [Coordinator] Failed to execute input action: \(error.localizedDescription)\n", stderr)
207 |             primaryActionError = error
208 |         }
209 |     } else if case .traverseOnly = action {
210 |          fputs("info: [Coordinator] Primary action is 'traverseOnly', skipping action execution.\n", stderr)
211 |     } // 'open' action was handled earlier
212 | 
213 |     // --- 4. Apply Delay AFTER Action, BEFORE Traverse After ---
214 |     // Apply delay only if an action was actually executed and delay > 0
215 |     if primaryActionExecuted && options.delayAfterAction > 0 {
216 |         fputs("info: [Coordinator] Primary action finished. Applying delay: \(options.delayAfterAction)s (before post-action traversal)\n", stderr)
217 |         try? await Task.sleep(nanoseconds: UInt64(options.delayAfterAction * 1_000_000_000))
218 |     }
219 | 
220 | 
221 |     // --- 5. Traverse After ---
222 |     var finalTraversalData: ResponseData? = nil
223 |     if options.traverseAfter {
224 |         fputs("info: [Coordinator] Performing post-action traversal for PID \(pid)...\n", stderr)
225 |         do {
226 |             let traversalData = try traverseAccessibilityTree(pid: pid, onlyVisibleElements: options.onlyVisibleElements)
227 |             result.traversalAfter = traversalData
228 |             finalTraversalData = traversalData // Keep for highlighting
229 |             fputs("info: [Coordinator] Post-action traversal complete. Elements: \(traversalData.elements.count)\n", stderr)
230 |         } catch {
231 |             fputs("error: [Coordinator] Post-action traversal failed: \(error.localizedDescription)\n", stderr)
232 |             result.traversalAfterError = error.localizedDescription
233 |         }
234 |     }
235 | 
236 |     // --- 6. Calculate Diff ---
237 |     if options.showDiff {
238 |         fputs("info: [Coordinator] Calculating detailed traversal diff...\n", stderr)
239 |         if let beforeElements = result.traversalBefore?.elements, let afterElements = result.traversalAfter?.elements {
240 | 
241 |             // --- DETAILED DIFF LOGIC START ---
242 |             var added: [ElementData] = []
243 |             var removed: [ElementData] = []
244 |             var modified: [ModifiedElement] = []
245 | 
246 |             // FIX: Use let for afterElements copy, since we iterate it but don't mutate this copy directly
247 |             let remainingAfter = afterElements
248 |             var matchedAfterIndices = Set<Int>() // Keep track of matched 'after' elements
249 | 
250 |             let positionTolerance: Double = 5.0 // Max distance in points to consider a position match
251 | 
252 |             // Iterate through 'before' elements to find matches or mark as removed
253 |             for beforeElement in beforeElements {
254 |                 var bestMatchIndex: Int? = nil
255 |                 var smallestDistanceSq: Double = .greatestFiniteMagnitude
256 | 
257 |                 // Find potential matches in the 'after' list
258 |                 for (index, afterElement) in remainingAfter.enumerated() {
259 |                     // Skip if already matched or role doesn't match
260 |                     if matchedAfterIndices.contains(index) || beforeElement.role != afterElement.role {
261 |                         continue
262 |                     }
263 | 
264 |                     // Check position proximity (if coordinates exist)
265 |                     if let bx = beforeElement.x, let by = beforeElement.y, let ax = afterElement.x, let ay = afterElement.y {
266 |                         let dx = bx - ax
267 |                         let dy = by - ay
268 |                         let distanceSq = (dx * dx) + (dy * dy)
269 | 
270 |                         if distanceSq <= (positionTolerance * positionTolerance) {
271 |                             // Found a plausible match based on role and position
272 |                             // If multiple are close, pick the closest one
273 |                             if distanceSq < smallestDistanceSq {
274 |                                 smallestDistanceSq = distanceSq
275 |                                 bestMatchIndex = index
276 |                             }
277 |                         }
278 |                     } else if beforeElement.x == nil && afterElement.x == nil && beforeElement.y == nil && afterElement.y == nil {
279 |                         // If *both* lack position, consider them potentially matched if role matches (and text?)
280 |                         // For now, let's focus on positional matching primarily.
281 |                         // Maybe add a fallback: if role matches AND text matches (and text exists)
282 |                         if let bt = beforeElement.text, let at = afterElement.text, bt == at {
283 |                              if bestMatchIndex == nil { // Only if no positional match found yet
284 |                                  bestMatchIndex = index
285 |                                  // Don't update smallestDistanceSq here as it's not a positional match
286 |                              }
287 |                         }
288 |                     }
289 |                 } // End inner loop through 'after' elements
290 | 
291 |                 if let matchIndex = bestMatchIndex {
292 |                     // Found a match
293 |                     let afterElement = remainingAfter[matchIndex]
294 |                     matchedAfterIndices.insert(matchIndex) // Mark as matched
295 | 
296 |                     // --- UPDATED Attribute Comparison ---
297 |                     var attributeChanges: [AttributeChangeDetail] = []
298 | 
299 |                     // Handle TEXT change specifically using the dedicated initializer
300 |                     if beforeElement.text != afterElement.text {
301 |                         attributeChanges.append(AttributeChangeDetail(textBefore: beforeElement.text, textAfter: afterElement.text))
302 |                     }
303 | 
304 |                     // Handle other attributes using generic/double initializers
305 |                     if !areDoublesEqual(beforeElement.x, afterElement.x) {
306 |                         attributeChanges.append(AttributeChangeDetail(attribute: "x", before: beforeElement.x, after: afterElement.x))
307 |                     }
308 |                     if !areDoublesEqual(beforeElement.y, afterElement.y) {
309 |                         attributeChanges.append(AttributeChangeDetail(attribute: "y", before: beforeElement.y, after: afterElement.y))
310 |                     }
311 |                     if !areDoublesEqual(beforeElement.width, afterElement.width) {
312 |                         attributeChanges.append(AttributeChangeDetail(attribute: "width", before: beforeElement.width, after: afterElement.width))
313 |                     }
314 |                     if !areDoublesEqual(beforeElement.height, afterElement.height) {
315 |                         attributeChanges.append(AttributeChangeDetail(attribute: "height", before: beforeElement.height, after: afterElement.height))
316 |                     }
317 |                     // --- End Updated Attribute Comparison ---
318 | 
319 |                     if !attributeChanges.isEmpty {
320 |                         modified.append(ModifiedElement(before: beforeElement, after: afterElement, changes: attributeChanges))
321 |                     }
322 |                 } else {
323 |                     // No match found for this 'before' element, it was removed
324 |                     removed.append(beforeElement)
325 |                 }
326 |             } // End outer loop through 'before' elements
327 | 
328 |             // Any 'after' elements not matched are 'added'
329 |             for (index, afterElement) in remainingAfter.enumerated() {
330 |                 if !matchedAfterIndices.contains(index) {
331 |                     added.append(afterElement)
332 |                 }
333 |             }
334 | 
335 |             // Assign to result (using the TraversalDiff struct from CombinedActions.swift)
336 |             result.traversalDiff = TraversalDiff(added: added, removed: removed, modified: modified)
337 |             fputs("info: [Coordinator] Detailed diff calculated: Added=\(added.count), Removed=\(removed.count), Modified=\(modified.count)\n", stderr)
338 |             // --- DETAILED DIFF LOGIC END ---
339 | 
340 |         } else {
341 |             fputs("warning: [Coordinator] Cannot calculate detailed diff because one or both traversals failed or were not performed.\n", stderr)
342 |         }
343 |     }
344 | 
345 |     // --- 7. Highlight Target Elements (Now controlled by showAnimation) ---
346 |     if options.showAnimation {
347 |         if let elementsToHighlight = finalTraversalData?.elements, !elementsToHighlight.isEmpty {
348 |              fputs("info: [Coordinator] Highlighting \(elementsToHighlight.count) elements from final traversal (showAnimation=true)...\n", stderr)
349 |              // No need for try/catch as drawHighlightBoxes is async and handles errors internally via logs
350 |              drawHighlightBoxes(for: elementsToHighlight, duration: options.animationDuration)
351 |              // Note: Highlighting starts and runs async, this function returns before it finishes.
352 |          } else if finalTraversalData == nil && options.traverseAfter {
353 |              fputs("warning: [Coordinator] Animation requested, but post-action traversal failed or was skipped (cannot highlight).\n", stderr)
354 |          } else {
355 |              fputs("info: [Coordinator] Animation requested, but no elements found in the final traversal to highlight.\n", stderr)
356 |          }
357 |     } else {
358 |          fputs("info: [Coordinator] Skipping element highlighting (showAnimation=false).\n", stderr)
359 |     }
360 | 
361 |     // Store any primary action error encountered
362 |     result.primaryActionError = primaryActionError?.localizedDescription
363 | 
364 |     fputs("info: [Coordinator] Action sequence finished.\n", stderr)
365 |     return result
366 | }
367 | 
368 | 
369 | /// Helper function to execute the specific input action based on type.
370 | @MainActor
371 | private func executeInputAction(_ action: InputAction, options: ActionOptions) async throws {
372 |     switch action {
373 |     case .click(let point):
374 |         if options.showAnimation {
375 |             fputs("log: simulating click AND visualizing at \(point) (duration: \(options.animationDuration))\n", stderr)
376 |             try clickMouseAndVisualize(at: point, duration: options.animationDuration)
377 |         } else {
378 |             fputs("log: simulating click at \(point) (no visualization)\n", stderr)
379 |             try clickMouse(at: point)
380 |         }
381 |     case .doubleClick(let point):
382 |         if options.showAnimation {
383 |              fputs("log: simulating double-click AND visualizing at \(point) (duration: \(options.animationDuration))\n", stderr)
384 |              try doubleClickMouseAndVisualize(at: point, duration: options.animationDuration)
385 |         } else {
386 |             fputs("log: simulating double-click at \(point) (no visualization)\n", stderr)
387 |             try doubleClickMouse(at: point)
388 |         }
389 |     case .rightClick(let point):
390 |          if options.showAnimation {
391 |              fputs("log: simulating right-click AND visualizing at \(point) (duration: \(options.animationDuration))\n", stderr)
392 |              try rightClickMouseAndVisualize(at: point, duration: options.animationDuration)
393 |          } else {
394 |             fputs("log: simulating right-click at \(point) (no visualization)\n", stderr)
395 |              try rightClickMouse(at: point)
396 |          }
397 |     case .type(let text):
398 |         if options.showAnimation {
399 |             fputs("log: simulating text writing AND visualizing caption \"\(text)\" (auto duration)\n", stderr)
400 |             try writeTextAndVisualize(text, duration: nil) // Use nil to let visualize calculate duration
401 |         } else {
402 |             fputs("log: simulating text writing \"\(text)\" (no visualization)\n", stderr)
403 |             try writeText(text)
404 |         }
405 |     case .press(let keyName, let flags):
406 |          guard let keyCode = mapKeyNameToKeyCode(keyName) else {
407 |              throw MacosUseSDKError.inputInvalidArgument("Unknown key name: \(keyName)")
408 |          }
409 |          if options.showAnimation {
410 |              fputs("log: simulating key press \(keyName) (\(keyCode)) AND visualizing (duration: \(options.animationDuration))\n", stderr)
411 |              try pressKeyAndVisualize(keyCode: keyCode, flags: flags, duration: options.animationDuration)
412 |          } else {
413 |              fputs("log: simulating key press \(keyName) (\(keyCode)) (no visualization)\n", stderr)
414 |              try pressKey(keyCode: keyCode, flags: flags)
415 |          }
416 |     case .move(let point):
417 |          if options.showAnimation {
418 |              fputs("log: simulating mouse move AND visualizing to \(point) (duration: \(options.animationDuration))\n", stderr)
419 |              try moveMouseAndVisualize(to: point, duration: options.animationDuration)
420 |          } else {
421 |              fputs("log: simulating mouse move to \(point) (no visualization)\n", stderr)
422 |              try moveMouse(to: point)
423 |          }
424 |     }
425 | }
426 | 
427 | // --- ADD Helper function for comparing optional Doubles ---
428 | fileprivate func areDoublesEqual(_ d1: Double?, _ d2: Double?, tolerance: Double = 0.01) -> Bool {
429 |     switch (d1, d2) {
430 |     case (nil, nil):
431 |         return true // Both nil are considered equal in this context
432 |     case (let val1?, let val2?):
433 |         // Use tolerance for floating point comparison if both exist
434 |         return abs(val1 - val2) < tolerance
435 |     default:
436 |         return false // One is nil, the other is not
437 |     }
438 | }
439 | 


--------------------------------------------------------------------------------
/Sources/MacosUseSDK/AppOpener.swift:
--------------------------------------------------------------------------------
  1 | import AppKit
  2 | import Foundation
  3 | 
  4 | // Define potential errors during app opening
  5 | public extension MacosUseSDKError {
  6 |     // Ensure this enum is correctly defined within the extension
  7 |     enum AppOpenerError: Error, LocalizedError {
  8 |         case appNotFound(identifier: String)
  9 |         case invalidPath(path: String)
 10 |         case activationFailed(identifier: String, underlyingError: Error?)
 11 |         case pidLookupFailed(identifier: String)
 12 |         case unexpectedNilURL
 13 | 
 14 |         public var errorDescription: String? {
 15 |             switch self {
 16 |             case .appNotFound(let id):
 17 |                 return "Application not found for identifier: '\(id)'"
 18 |             case .invalidPath(let path):
 19 |                 return "Provided path does not appear to be a valid application bundle: '\(path)'"
 20 |             case .activationFailed(let id, let err):
 21 |                 let base = "Failed to open/activate application '\(id)'"
 22 |                 if let err = err {
 23 |                     return "\(base): \(err.localizedDescription)"
 24 |                 }
 25 |                 return base
 26 |             case .pidLookupFailed(let id):
 27 |                 return "Could not determine PID for application '\(id)' after activation attempt."
 28 |             case .unexpectedNilURL:
 29 |                  return "Internal error: Application URL became nil unexpectedly."
 30 |             }
 31 |         }
 32 |     }
 33 | }
 34 | 
 35 | // Define the structure for the successful result
 36 | public struct AppOpenerResult: Codable, Sendable {
 37 |     public let pid: pid_t
 38 |     public let appName: String
 39 |     public let processingTimeSeconds: String
 40 | }
 41 | 
 42 | // --- Private Helper Class for State Management ---
 43 | // Using a class instance allows managing state like stepStartTime across async calls
 44 | @MainActor
 45 | private class AppOpenerOperation {
 46 |     let appIdentifier: String
 47 |     let overallStartTime: Date = Date()
 48 |     var stepStartTime: Date
 49 | 
 50 |     init(identifier: String) {
 51 |         self.appIdentifier = identifier
 52 |         self.stepStartTime = overallStartTime // Initialize step timer
 53 |         fputs("info: starting AppOpenerOperation for: \(identifier)\n", stderr)
 54 |     }
 55 | 
 56 |     // Helper to log step completion times (Method definition)
 57 |     func logStepCompletion(_ stepDescription: String) {
 58 |         let endTime = Date()
 59 |         let duration = endTime.timeIntervalSince(stepStartTime)
 60 |         let durationStr = String(format: "%.3f", duration)
 61 |         fputs("info: [\(durationStr)s] finished '\(stepDescription)'\n", stderr)
 62 |         stepStartTime = endTime // Reset for next step
 63 |     }
 64 | 
 65 |     // Main logic function using async/await (Method definition)
 66 |     func execute() async throws -> AppOpenerResult {
 67 |         // --- All the application discovery, PID finding, and activation logic goes *inside* this method ---
 68 |         let workspace = NSWorkspace.shared // Define workspace locally within the method
 69 |         var appURL: URL?
 70 |         var foundPID: pid_t?
 71 |         var bundleIdentifier: String?
 72 |         var finalAppName: String?
 73 | 
 74 |         // --- 1. Application Discovery ---
 75 |         // (Path checking logic...)
 76 |         if appIdentifier.hasSuffix(".app") && appIdentifier.contains("/") {
 77 |              fputs("info: interpreting '\(appIdentifier)' as a path.\n", stderr)
 78 |              let potentialURL = URL(fileURLWithPath: appIdentifier)
 79 |              var isDirectory: ObjCBool = false
 80 |              if FileManager.default.fileExists(atPath: potentialURL.path, isDirectory: &isDirectory)
 81 |                  && isDirectory.boolValue && potentialURL.pathExtension == "app"
 82 |              {
 83 |                  appURL = potentialURL
 84 |                  fputs("info: path confirmed as valid application bundle: \(potentialURL.path)\n", stderr)
 85 |                  if let bundle = Bundle(url: potentialURL) {
 86 |                      bundleIdentifier = bundle.bundleIdentifier
 87 |                      finalAppName = bundle.localizedInfoDictionary?["CFBundleName"] as? String ?? bundle.bundleIdentifier
 88 |                      fputs("info: derived bundleID: \(bundleIdentifier ?? "nil"), name: \(finalAppName ?? "nil") from path\n", stderr)
 89 |                  }
 90 |              } else {
 91 |                   fputs("warning: provided path does not appear to be a valid application bundle: \(appIdentifier). Will try as name/bundleID.\n", stderr)
 92 |              }
 93 |          }
 94 | 
 95 |         // (Name/BundleID search logic...)
 96 |          if appURL == nil {
 97 |              fputs("info: interpreting '\(appIdentifier)' as an application name or bundleID, searching...\n", stderr)
 98 |               if let foundURL = workspace.urlForApplication(withBundleIdentifier: appIdentifier) {
 99 |                   appURL = foundURL
100 |                   bundleIdentifier = appIdentifier
101 |                   fputs("info: found application url via bundleID '\(appIdentifier)': \(foundURL.path)\n", stderr)
102 |                   if let bundle = Bundle(url: foundURL) {
103 |                      finalAppName = bundle.localizedInfoDictionary?["CFBundleName"] as? String ?? bundle.bundleIdentifier
104 |                   }
105 |               } else if let foundURLByName = workspace.urlForApplication(toOpen: URL(fileURLWithPath: "/Applications/\(appIdentifier).app")) ??
106 |                                              workspace.urlForApplication(toOpen: URL(fileURLWithPath: "/System/Applications/\(appIdentifier).app")) ??
107 |                                              workspace.urlForApplication(toOpen: URL(fileURLWithPath: "/System/Applications/Utilities/\(appIdentifier).app"))
108 |               {
109 |                   appURL = foundURLByName
110 |                   fputs("info: found application url via name search '\(appIdentifier)': \(foundURLByName.path)\n", stderr)
111 |                   if let bundle = Bundle(url: foundURLByName) {
112 |                       bundleIdentifier = bundle.bundleIdentifier
113 |                       finalAppName = bundle.localizedInfoDictionary?["CFBundleName"] as? String ?? bundle.bundleIdentifier
114 |                       fputs("info: derived bundleID: \(bundleIdentifier ?? "nil"), name: \(finalAppName ?? "nil") from found URL\n", stderr)
115 |                   }
116 |               } else {
117 |                   logStepCompletion("application discovery (failed)") // Call method
118 |                   throw MacosUseSDKError.AppOpenerError.appNotFound(identifier: appIdentifier)
119 |               }
120 |          }
121 |         logStepCompletion("application discovery (url: \(appURL?.path ?? "nil"), bundleID: \(bundleIdentifier ?? "nil"))") // Call method
122 | 
123 |         // (Guard statement logic...)
124 |         guard let finalAppURL = appURL else {
125 |              fputs("error: unexpected error - application url is nil before launch attempt.\n", stderr)
126 |             throw MacosUseSDKError.AppOpenerError.unexpectedNilURL
127 |         }
128 |         // (Final app name determination...)
129 |          if finalAppName == nil {
130 |               if let bundle = Bundle(url: finalAppURL) {
131 |                    finalAppName = bundle.localizedInfoDictionary?["CFBundleName"] as? String ?? bundle.bundleIdentifier
132 |               }
133 |               finalAppName = finalAppName ?? appIdentifier
134 |          }
135 | 
136 | 
137 |         // --- 2. Pre-find PID if running ---
138 |         // (PID finding logic...)
139 |         if let bID = bundleIdentifier {
140 |              fputs("info: checking running applications for bundle id: \(bID)\n", stderr)
141 |              if let runningApp = NSRunningApplication.runningApplications(withBundleIdentifier: bID).first {
142 |                  foundPID = runningApp.processIdentifier
143 |                  fputs("info: found running instance with pid \(foundPID!) for bundle id \(bID).\n", stderr)
144 |              } else {
145 |                  fputs("info: no running instance found for bundle id \(bID) before activation attempt.\n", stderr)
146 |              }
147 |          } else {
148 |              fputs("warning: no bundle identifier, attempting lookup by URL: \(finalAppURL.path)\n", stderr)
149 |              for app in workspace.runningApplications {
150 |                  if app.bundleURL?.standardizedFileURL == finalAppURL.standardizedFileURL || app.executableURL?.standardizedFileURL == finalAppURL.standardizedFileURL {
151 |                      foundPID = app.processIdentifier
152 |                      fputs("info: found running instance with pid \(foundPID!) matching URL.\n", stderr)
153 |                      break
154 |                  }
155 |              }
156 |              if foundPID == nil {
157 |                  fputs("info: no running instance found by URL before activation attempt.\n", stderr)
158 |              }
159 |          }
160 |         logStepCompletion("pre-finding existing process (pid: \(foundPID.map(String.init) ?? "none found"))") // Call method
161 | 
162 |         // --- 3. Open/Activate Application ---
163 |         // (Activation logic...)
164 |         fputs("info: attempting to open/activate application: \(finalAppName ?? appIdentifier)\n", stderr)
165 |         let configuration = NSWorkspace.OpenConfiguration() // Define configuration locally
166 | 
167 |         do {
168 |             // Await the async call AND extract the PID within an explicit MainActor Task
169 |             // This replaces MainActor.run which caused issues in Swift 6.1 with async closures
170 |             let pidAfterOpen = try await Task { @MainActor in
171 |                 fputs("info: [Task @MainActor] executing workspace.openApplication...\n", stderr)
172 |                 // The await happens *inside* the MainActor Task block
173 |                 let runningApp = try await workspace.openApplication(at: finalAppURL, configuration: configuration)
174 |                 // Access the non-Sendable property *inside* the MainActor Task block
175 |                 let pid = runningApp.processIdentifier
176 |                 fputs("info: [Task @MainActor] got pid \(pid) from NSRunningApplication.\n", stderr)
177 |                 // Return the Sendable pid_t
178 |                 return pid
179 |             }.value // Await the result of the Task
180 | 
181 |             logStepCompletion("opening/activating application async call completed")
182 | 
183 |              // --- 4. Determine Final PID ---
184 |              var finalPID: pid_t? = nil
185 | 
186 |              if let pid = foundPID {
187 |                  finalPID = pid
188 |                  fputs("info: using pre-found pid \(pid).\n", stderr)
189 |              } else {
190 |                  // Use the PID extracted immediately after the await
191 |                  finalPID = pidAfterOpen
192 |                  fputs("info: using pid \(finalPID!) from newly launched/activated application instance.\n", stderr)
193 |                  foundPID = finalPID // Update foundPID if it was initially nil
194 |              }
195 |              logStepCompletion("determining final pid (using \(finalPID!))") // Call method
196 | 
197 |              // --- 5. Prepare Result ---
198 |              let endTime = Date()
199 |              let processingTime = endTime.timeIntervalSince(overallStartTime)
200 |              let formattedTime = String(format: "%.3f", processingTime)
201 | 
202 |              fputs("success: application '\(finalAppName ?? appIdentifier)' active (pid: \(finalPID!)).\n", stderr)
203 |              fputs("info: total processing time: \(formattedTime) seconds\n", stderr)
204 | 
205 |              return AppOpenerResult(
206 |                  pid: finalPID!,
207 |                  appName: finalAppName ?? appIdentifier,
208 |                  processingTimeSeconds: formattedTime
209 |              )
210 | 
211 |         } catch {
212 |              logStepCompletion("opening/activating application (failed)") // Call method
213 |              fputs("error: activation call failed: \(error.localizedDescription)\n", stderr)
214 | 
215 |              if let pid = foundPID {
216 |                  fputs("warning: activation failed, but PID \(pid) was found beforehand. Assuming it's running.\n", stderr)
217 |                  let endTime = Date()
218 |                  let processingTime = endTime.timeIntervalSince(overallStartTime)
219 |                  let formattedTime = String(format: "%.3f", processingTime)
220 |                  fputs("info: total processing time: \(formattedTime) seconds\n", stderr)
221 |                  return AppOpenerResult(
222 |                      pid: pid,
223 |                      appName: finalAppName ?? appIdentifier,
224 |                      processingTimeSeconds: formattedTime
225 |                  )
226 |              } else {
227 |                  fputs("error: PID could not be determined after activation failure.\n", stderr)
228 |                   let endTime = Date()
229 |                   let processingTime = endTime.timeIntervalSince(overallStartTime)
230 |                   let formattedTime = String(format: "%.3f", processingTime)
231 |                   fputs("info: total processing time (on failure): \(formattedTime) seconds\n", stderr)
232 |                  throw MacosUseSDKError.AppOpenerError.activationFailed(identifier: appIdentifier, underlyingError: error)
233 |              }
234 |         }
235 |         // --- End of logic inside execute method ---
236 |     } // End of execute() method
237 | } // End of AppOpenerOperation class
238 | 
239 | 
240 | /// Opens or activates a macOS application identified by its name, bundle ID, or full path.
241 | /// Outputs detailed logs to stderr.
242 | ///
243 | /// - Parameter identifier: The application name (e.g., "Calculator"), bundle ID (e.g., "com.apple.calculator"), or full path (e.g., "/System/Applications/Calculator.app").
244 | /// - Returns: An `AppOpenerResult` containing the PID, application name, and processing time on success.
245 | /// - Throws: `MacosUseSDKError.AppOpenerError` if the application cannot be found, activated, or its PID determined.
246 | @MainActor
247 | public func openApplication(identifier: String) async throws -> AppOpenerResult {
248 |     // Input validation
249 |     guard !identifier.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else {
250 |         throw MacosUseSDKError.AppOpenerError.appNotFound(identifier: "(empty)")
251 |     }
252 | 
253 |     // Create an instance of the helper class and execute its logic
254 |     let operation = AppOpenerOperation(identifier: identifier)
255 |     return try await operation.execute()
256 | }
257 | 
258 | // --- IMPORTANT: Ensure no other executable code (like the old script lines) exists below this line in the file ---
259 | // --- Remove any leftover 'if', 'guard', 'logStepCompletion', 'workspace.openApplication', 'RunLoop.main.run' calls from the top level ---
260 | 


--------------------------------------------------------------------------------
/Sources/MacosUseSDK/CombinedActions.swift:
--------------------------------------------------------------------------------
  1 | import Foundation // Needed for fputs, etc.
  2 | import CoreGraphics // Needed for CGPoint, CGKeyCode, CGEventFlags
  3 | 
  4 | /// Represents a change in a specific attribute of an accessibility element.
  5 | public struct AttributeChangeDetail: Codable, Sendable {
  6 |     public let attributeName: String
  7 | 
  8 |     // --- Fields for Simple Text Diff ---
  9 |     /// Text added (e.g., if newValue = oldValue + addedText). Populated only for text attribute changes.
 10 |     public let addedText: String?
 11 |     /// Text removed (e.g., if oldValue = newValue + removedText). Populated only for text attribute changes.
 12 |     public let removedText: String?
 13 | 
 14 |     // --- Fallback Fields ---
 15 |     /// Full old value, used for non-text attributes OR complex text changes.
 16 |     public let oldValue: String?
 17 |     /// Full new value, used for non-text attributes OR complex text changes.
 18 |     public let newValue: String?
 19 | 
 20 | 
 21 |     // --- Initializers ---
 22 | 
 23 |     // Initializer for non-text attributes (simple old/new)
 24 |     init<T: CustomStringConvertible>(attribute: String, before: T?, after: T?) {
 25 |         guard attribute != "text" else {
 26 |             // This initializer should not be called directly for text.
 27 |             // Handle text changes via the dedicated text initializer below.
 28 |             // For safety, provide a basic fallback if called incorrectly.
 29 |              fputs("warning: Generic AttributeChangeDetail initializer called for 'text'. Use text-specific init.\n", stderr)
 30 |              self.attributeName = attribute
 31 |              self.oldValue = before.map { $0.description }
 32 |              self.newValue = after.map { $0.description }
 33 |              self.addedText = nil
 34 |              self.removedText = nil
 35 |              return
 36 |         }
 37 |         self.attributeName = attribute
 38 |         self.oldValue = before.map { $0.description }
 39 |         self.newValue = after.map { $0.description }
 40 |         self.addedText = nil // Not applicable
 41 |         self.removedText = nil // Not applicable
 42 |     }
 43 | 
 44 |     // Initializer for Doubles (position/size)
 45 |      init(attribute: String, before: Double?, after: Double?, format: String = "%.1f") {
 46 |          self.attributeName = attribute
 47 |          self.oldValue = before.map { String(format: format, $0) }
 48 |          self.newValue = after.map { String(format: format, $0) }
 49 |          self.addedText = nil
 50 |          self.removedText = nil
 51 |      }
 52 | 
 53 |      // --- UPDATED Initializer for Text Changes using CollectionDifference ---
 54 |      init(textBefore: String?, textAfter: String?) {
 55 |         self.attributeName = "text"
 56 | 
 57 |         let old = textBefore ?? ""
 58 |         let new = textAfter ?? ""
 59 | 
 60 |         // Use CollectionDifference to find insertions and removals
 61 |         let diff = new.difference(from: old)
 62 | 
 63 |         var addedChars: [Character] = []
 64 |         var removedChars: [Character] = []
 65 | 
 66 |         // Process the calculated difference
 67 |         for change in diff {
 68 |             switch change {
 69 |             case .insert(_, let element, _):
 70 |                 addedChars.append(element)
 71 |             case .remove(_, let element, _):
 72 |                 removedChars.append(element)
 73 |             }
 74 |         }
 75 | 
 76 |         // Assign collected characters to the respective fields, or nil if empty
 77 |         self.addedText = addedChars.isEmpty ? nil : String(addedChars)
 78 |         self.removedText = removedChars.isEmpty ? nil : String(removedChars)
 79 | 
 80 |         // Since we now have potentially more granular diff info,
 81 |         // we consistently set oldValue/newValue to nil for text changes
 82 |         // to avoid redundancy in the output, as decided previously.
 83 |         self.oldValue = nil
 84 |         self.newValue = nil
 85 |     }
 86 | }
 87 | 
 88 | /// Represents an element identified as potentially the same logical entity
 89 | /// across two traversals, but with modified attributes.
 90 | public struct ModifiedElement: Codable, Sendable {
 91 |     /// The element data from the 'before' traversal.
 92 |     public let before: ElementData
 93 |     /// The element data from the 'after' traversal.
 94 |     public let after: ElementData
 95 |     /// A list detailing the specific attributes that changed.
 96 |     public let changes: [AttributeChangeDetail]
 97 | }
 98 | 
 99 | /// Represents the difference between two accessibility traversals,
100 | /// now including added, removed, and modified elements with attribute details.
101 | public struct TraversalDiff: Codable, Sendable {
102 |     public let added: [ElementData]
103 |     public let removed: [ElementData]
104 |     /// Elements identified as modified, along with their specific changes.
105 |     public let modified: [ModifiedElement]
106 | }
107 | 
108 | /// Holds the results of an action performed between two accessibility traversals,
109 | /// including the state before, the state after, and the calculated difference.
110 | public struct ActionDiffResult: Codable, Sendable {
111 |     public let afterAction: ResponseData
112 |     public let diff: TraversalDiff
113 | }
114 | 
115 | /// Defines combined, higher-level actions using the SDK's core functionalities.
116 | public enum CombinedActions {
117 | 
118 |     /// Opens or activates an application and then immediately traverses its accessibility tree.
119 |     ///
120 |     /// This combines the functionality of `openApplication` and `traverseAccessibilityTree`.
121 |     /// Logs detailed steps to stderr.
122 |     ///
123 |     /// - Parameters:
124 |     ///   - identifier: The application name (e.g., "Calculator"), bundle ID (e.g., "com.apple.calculator"), or full path (e.g., "/System/Applications/Calculator.app").
125 |     ///   - onlyVisibleElements: If true, the traversal only collects elements with valid position and size. Defaults to false.
126 |     /// - Returns: A `ResponseData` struct containing the collected elements, statistics, and timing information from the traversal.
127 |     /// - Throws: `MacosUseSDKError` if either the application opening/activation or the accessibility traversal fails.
128 |     @MainActor // Ensures UI-related parts like activation happen on the main thread
129 |     public static func openAndTraverseApp(identifier: String, onlyVisibleElements: Bool = false) async throws -> ResponseData {
130 |         fputs("info: starting combined action 'openAndTraverseApp' for identifier: '\(identifier)'\n", stderr)
131 | 
132 |         // Step 1: Open or Activate the Application
133 |         fputs("info: calling openApplication...\n", stderr)
134 |         let openResult = try await MacosUseSDK.openApplication(identifier: identifier)
135 |         fputs("info: openApplication completed successfully. PID: \(openResult.pid), App Name: \(openResult.appName)\n", stderr)
136 | 
137 |         // Step 2: Traverse the Accessibility Tree of the opened/activated application
138 |         fputs("info: calling traverseAccessibilityTree for PID \(openResult.pid) (Visible Only: \(onlyVisibleElements))...\n", stderr)
139 |         let traversalResult = try MacosUseSDK.traverseAccessibilityTree(pid: openResult.pid, onlyVisibleElements: onlyVisibleElements)
140 |         fputs("info: traverseAccessibilityTree completed successfully.\n", stderr)
141 | 
142 |         // Step 3: Return the traversal result
143 |         fputs("info: combined action 'openAndTraverseApp' finished.\n", stderr)
144 |         return traversalResult
145 |     }
146 | 
147 |     // --- Input Action followed by Traversal ---
148 | 
149 |     /// Simulates a left mouse click at the specified coordinates, then traverses the accessibility tree of the target application.
150 |     ///
151 |     /// - Parameters:
152 |     ///   - point: The `CGPoint` where the click should occur (screen coordinates).
153 |     ///   - pid: The Process ID (PID) of the application to traverse after the click.
154 |     ///   - onlyVisibleElements: If true, the traversal only collects elements with valid position and size. Defaults to false.
155 |     /// - Returns: A `ResponseData` struct containing the collected elements, statistics, and timing information from the traversal.
156 |     /// - Throws: `MacosUseSDKError` if the click simulation or the accessibility traversal fails.
157 |     @MainActor // Added for consistency, although core CGEvent might not strictly require it
158 |     public static func clickAndTraverseApp(point: CGPoint, pid: Int32, onlyVisibleElements: Bool = false) async throws -> ResponseData {
159 |         fputs("info: starting combined action 'clickAndTraverseApp' at (\(point.x), \(point.y)) for PID \(pid)\n", stderr)
160 | 
161 |         // Step 1: Perform the click
162 |         fputs("info: calling clickMouse...\n", stderr)
163 |         try MacosUseSDK.clickMouse(at: point)
164 |         fputs("info: clickMouse completed successfully.\n", stderr)
165 | 
166 |         // Add a small delay to allow UI to potentially update after the click
167 |         try await Task.sleep(nanoseconds: 100_000_000) // 100 milliseconds
168 | 
169 |         // Step 2: Traverse the Accessibility Tree
170 |         fputs("info: calling traverseAccessibilityTree for PID \(pid) (Visible Only: \(onlyVisibleElements))...\n", stderr)
171 |         let traversalResult = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements)
172 |         fputs("info: traverseAccessibilityTree completed successfully.\n", stderr)
173 | 
174 |         // Step 3: Return the traversal result
175 |         fputs("info: combined action 'clickAndTraverseApp' finished.\n", stderr)
176 |         return traversalResult
177 |     }
178 | 
179 |     /// Simulates pressing a key with optional modifiers, then traverses the accessibility tree of the target application.
180 |     ///
181 |     /// - Parameters:
182 |     ///   - keyCode: The `CGKeyCode` of the key to press.
183 |     ///   - flags: The modifier flags (`CGEventFlags`) to apply.
184 |     ///   - pid: The Process ID (PID) of the application to traverse after the key press.
185 |     ///   - onlyVisibleElements: If true, the traversal only collects elements with valid position and size. Defaults to false.
186 |     /// - Returns: A `ResponseData` struct containing the collected elements, statistics, and timing information from the traversal.
187 |     /// - Throws: `MacosUseSDKError` if the key press simulation or the accessibility traversal fails.
188 |     @MainActor
189 |     public static func pressKeyAndTraverseApp(keyCode: CGKeyCode, flags: CGEventFlags = [], pid: Int32, onlyVisibleElements: Bool = false) async throws -> ResponseData {
190 |          fputs("info: starting combined action 'pressKeyAndTraverseApp' (key: \(keyCode), flags: \(flags.rawValue)) for PID \(pid)\n", stderr)
191 | 
192 |          // Step 1: Perform the key press
193 |          fputs("info: calling pressKey...\n", stderr)
194 |          try MacosUseSDK.pressKey(keyCode: keyCode, flags: flags)
195 |          fputs("info: pressKey completed successfully.\n", stderr)
196 | 
197 |          // Add a small delay
198 |          try await Task.sleep(nanoseconds: 100_000_000) // 100 milliseconds
199 | 
200 |          // Step 2: Traverse the Accessibility Tree
201 |          fputs("info: calling traverseAccessibilityTree for PID \(pid) (Visible Only: \(onlyVisibleElements))...\n", stderr)
202 |          let traversalResult = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements)
203 |          fputs("info: traverseAccessibilityTree completed successfully.\n", stderr)
204 | 
205 |          // Step 3: Return the traversal result
206 |          fputs("info: combined action 'pressKeyAndTraverseApp' finished.\n", stderr)
207 |          return traversalResult
208 |     }
209 | 
210 |     /// Simulates typing text, then traverses the accessibility tree of the target application.
211 |     ///
212 |     /// - Parameters:
213 |     ///   - text: The `String` to type.
214 |     ///   - pid: The Process ID (PID) of the application to traverse after typing the text.
215 |     ///   - onlyVisibleElements: If true, the traversal only collects elements with valid position and size. Defaults to false.
216 |     /// - Returns: A `ResponseData` struct containing the collected elements, statistics, and timing information from the traversal.
217 |     /// - Throws: `MacosUseSDKError` if the text writing simulation or the accessibility traversal fails.
218 |     @MainActor
219 |     public static func writeTextAndTraverseApp(text: String, pid: Int32, onlyVisibleElements: Bool = false) async throws -> ResponseData {
220 |         fputs("info: starting combined action 'writeTextAndTraverseApp' (text: \"\(text)\") for PID \(pid)\n", stderr)
221 | 
222 |         // Step 1: Perform the text writing
223 |         fputs("info: calling writeText...\n", stderr)
224 |         try MacosUseSDK.writeText(text)
225 |         fputs("info: writeText completed successfully.\n", stderr)
226 | 
227 |         // Add a small delay
228 |         try await Task.sleep(nanoseconds: 100_000_000) // 100 milliseconds
229 | 
230 |         // Step 2: Traverse the Accessibility Tree
231 |         fputs("info: calling traverseAccessibilityTree for PID \(pid) (Visible Only: \(onlyVisibleElements))...\n", stderr)
232 |         let traversalResult = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements)
233 |         fputs("info: traverseAccessibilityTree completed successfully.\n", stderr)
234 | 
235 |         // Step 3: Return the traversal result
236 |         fputs("info: combined action 'writeTextAndTraverseApp' finished.\n", stderr)
237 |         return traversalResult
238 |     }
239 | 
240 |      // You can add similar functions for doubleClick, rightClick, moveMouse etc. if needed
241 | 
242 |     // --- Helper Function for Diffing ---
243 | 
244 |     /// Calculates the difference between two sets of ElementData based on set operations.
245 |     /// - Parameters:
246 |     ///   - beforeElements: The list of elements from the first traversal.
247 |     ///   - afterElements: The list of elements from the second traversal.
248 |     /// - Returns: A `TraversalDiff` struct containing added and removed elements.
249 |     private static func calculateDiff(beforeElements: [ElementData], afterElements: [ElementData]) -> TraversalDiff {
250 |         fputs("debug: calculating diff between \(beforeElements.count) (before) and \(afterElements.count) (after) elements.\n", stderr)
251 |         // Convert arrays to Sets for efficient comparison. Relies on ElementData being Hashable.
252 |         let beforeSet = Set(beforeElements)
253 |         let afterSet = Set(afterElements)
254 | 
255 |         // Elements present in 'after' but not in 'before' are added.
256 |         let addedElements = Array(afterSet.subtracting(beforeSet))
257 |         fputs("debug: diff calculation - found \(addedElements.count) added elements.\n", stderr)
258 | 
259 |         // Elements present in 'before' but not in 'after' are removed.
260 |         let removedElements = Array(beforeSet.subtracting(afterSet))
261 |         fputs("debug: diff calculation - found \(removedElements.count) removed elements.\n", stderr)
262 | 
263 |         // Sort results for consistent output (optional, but helpful)
264 |         let sortedAdded = addedElements.sorted(by: elementSortPredicate)
265 |         let sortedRemoved = removedElements.sorted(by: elementSortPredicate)
266 | 
267 | 
268 |         return TraversalDiff(added: sortedAdded, removed: sortedRemoved, modified: [])
269 |     }
270 | 
271 |     // Helper sorting predicate (consistent with AccessibilityTraversalOperation)
272 |     private static var elementSortPredicate: (ElementData, ElementData) -> Bool {
273 |         return { e1, e2 in
274 |             let y1 = e1.y ?? Double.greatestFiniteMagnitude
275 |             let y2 = e2.y ?? Double.greatestFiniteMagnitude
276 |             if y1 != y2 { return y1 < y2 }
277 |             let x1 = e1.x ?? Double.greatestFiniteMagnitude
278 |             let x2 = e2.x ?? Double.greatestFiniteMagnitude
279 |             return x1 < x2
280 |         }
281 |     }
282 | 
283 | 
284 |     // --- Combined Actions with Diffing ---
285 | 
286 |     /// Performs a left mouse click, bracketed by accessibility traversals, and returns the diff.
287 |     ///
288 |     /// - Parameters:
289 |     ///   - point: The `CGPoint` where the click should occur (screen coordinates).
290 |     ///   - pid: The Process ID (PID) of the application to traverse.
291 |     ///   - onlyVisibleElements: If true, traversals only collect elements with valid position/size. Defaults to false.
292 |     ///   - delayAfterActionNano: Nanoseconds to wait after the action before the second traversal. Default 100ms.
293 |     /// - Returns: An `ActionDiffResult` containing traversals before/after the click and the diff.
294 |     /// - Throws: `MacosUseSDKError` if any step (traversal, click) fails.
295 |     @MainActor
296 |     public static func clickWithDiff(
297 |         point: CGPoint,
298 |         pid: Int32,
299 |         onlyVisibleElements: Bool = false,
300 |         delayAfterActionNano: UInt64 = 100_000_000 // 100 ms default
301 |     ) async throws -> ActionDiffResult {
302 |         fputs("info: starting combined action 'clickWithDiff' at (\(point.x), \(point.y)) for PID \(pid)\n", stderr)
303 | 
304 |         // Step 1: Traverse Before Action
305 |         fputs("info: calling traverseAccessibilityTree (before action)...\n", stderr)
306 |         let beforeTraversal = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements)
307 |         fputs("info: traversal (before action) completed.\n", stderr)
308 | 
309 |         // Step 2: Perform the Click
310 |         fputs("info: calling clickMouse...\n", stderr)
311 |         try MacosUseSDK.clickMouse(at: point)
312 |         fputs("info: clickMouse completed successfully.\n", stderr)
313 | 
314 |         // Step 3: Wait for UI to Update
315 |         fputs("info: waiting \(Double(delayAfterActionNano) / 1_000_000_000.0) seconds after action...\n", stderr)
316 |         try await Task.sleep(nanoseconds: delayAfterActionNano)
317 | 
318 |         // Step 4: Traverse After Action
319 |         fputs("info: calling traverseAccessibilityTree (after action)...\n", stderr)
320 |         let afterTraversal = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements)
321 |         fputs("info: traversal (after action) completed.\n", stderr)
322 | 
323 |         // Step 5: Calculate Diff
324 |         fputs("info: calculating traversal diff...\n", stderr)
325 |         let diff = calculateDiff(beforeElements: beforeTraversal.elements, afterElements: afterTraversal.elements)
326 |         fputs("info: diff calculation completed.\n", stderr)
327 | 
328 |         // Step 6: Prepare and Return Result
329 |         let result = ActionDiffResult(
330 |             afterAction: afterTraversal,
331 |             diff: diff
332 |         )
333 |         fputs("info: combined action 'clickWithDiff' finished.\n", stderr)
334 |         return result
335 |     }
336 | 
337 |     /// Presses a key, bracketed by accessibility traversals, and returns the diff.
338 |     ///
339 |     /// - Parameters:
340 |     ///   - keyCode: The `CGKeyCode` of the key to press.
341 |     ///   - flags: The modifier flags (`CGEventFlags`).
342 |     ///   - pid: The Process ID (PID) of the application to traverse.
343 |     ///   - onlyVisibleElements: If true, traversals only collect elements with valid position/size. Defaults to false.
344 |     ///   - delayAfterActionNano: Nanoseconds to wait after the action before the second traversal. Default 100ms.
345 |     /// - Returns: An `ActionDiffResult` containing traversals before/after the key press and the diff.
346 |     /// - Throws: `MacosUseSDKError` if any step fails.
347 |     @MainActor
348 |     public static func pressKeyWithDiff(
349 |         keyCode: CGKeyCode,
350 |         flags: CGEventFlags = [],
351 |         pid: Int32,
352 |         onlyVisibleElements: Bool = false,
353 |         delayAfterActionNano: UInt64 = 100_000_000 // 100 ms default
354 |     ) async throws -> ActionDiffResult {
355 |          fputs("info: starting combined action 'pressKeyWithDiff' (key: \(keyCode), flags: \(flags.rawValue)) for PID \(pid)\n", stderr)
356 | 
357 |         // Step 1: Traverse Before Action
358 |         fputs("info: calling traverseAccessibilityTree (before action)...\n", stderr)
359 |         let beforeTraversal = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements)
360 |         fputs("info: traversal (before action) completed.\n", stderr)
361 | 
362 |         // Step 2: Perform the Key Press
363 |         fputs("info: calling pressKey...\n", stderr)
364 |         try MacosUseSDK.pressKey(keyCode: keyCode, flags: flags)
365 |         fputs("info: pressKey completed successfully.\n", stderr)
366 | 
367 |         // Step 3: Wait for UI to Update
368 |         fputs("info: waiting \(Double(delayAfterActionNano) / 1_000_000_000.0) seconds after action...\n", stderr)
369 |         try await Task.sleep(nanoseconds: delayAfterActionNano)
370 | 
371 |         // Step 4: Traverse After Action
372 |         fputs("info: calling traverseAccessibilityTree (after action)...\n", stderr)
373 |         let afterTraversal = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements)
374 |         fputs("info: traversal (after action) completed.\n", stderr)
375 | 
376 |         // Step 5: Calculate Diff
377 |         fputs("info: calculating traversal diff...\n", stderr)
378 |         let diff = calculateDiff(beforeElements: beforeTraversal.elements, afterElements: afterTraversal.elements)
379 |         fputs("info: diff calculation completed.\n", stderr)
380 | 
381 |         // Step 6: Prepare and Return Result
382 |         let result = ActionDiffResult(
383 |             afterAction: afterTraversal,
384 |             diff: diff
385 |         )
386 |          fputs("info: combined action 'pressKeyWithDiff' finished.\n", stderr)
387 |         return result
388 |     }
389 | 
390 |     /// Types text, bracketed by accessibility traversals, and returns the diff.
391 |     ///
392 |     /// - Parameters:
393 |     ///   - text: The `String` to type.
394 |     ///   - pid: The Process ID (PID) of the application to traverse.
395 |     ///   - onlyVisibleElements: If true, traversals only collect elements with valid position/size. Defaults to false.
396 |     ///   - delayAfterActionNano: Nanoseconds to wait after the action before the second traversal. Default 100ms.
397 |     /// - Returns: An `ActionDiffResult` containing traversals before/after typing and the diff.
398 |     /// - Throws: `MacosUseSDKError` if any step fails.
399 |     @MainActor
400 |     public static func writeTextWithDiff(
401 |         text: String,
402 |         pid: Int32,
403 |         onlyVisibleElements: Bool = false,
404 |         delayAfterActionNano: UInt64 = 100_000_000 // 100 ms default
405 |     ) async throws -> ActionDiffResult {
406 |          fputs("info: starting combined action 'writeTextWithDiff' (text: \"\(text)\") for PID \(pid)\n", stderr)
407 | 
408 |         // Step 1: Traverse Before Action
409 |         fputs("info: calling traverseAccessibilityTree (before action)...\n", stderr)
410 |         let beforeTraversal = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements)
411 |         fputs("info: traversal (before action) completed.\n", stderr)
412 | 
413 |         // Step 2: Perform the Text Writing
414 |         fputs("info: calling writeText...\n", stderr)
415 |         try MacosUseSDK.writeText(text)
416 |         fputs("info: writeText completed successfully.\n", stderr)
417 | 
418 |         // Step 3: Wait for UI to Update
419 |         fputs("info: waiting \(Double(delayAfterActionNano) / 1_000_000_000.0) seconds after action...\n", stderr)
420 |         try await Task.sleep(nanoseconds: delayAfterActionNano)
421 | 
422 |         // Step 4: Traverse After Action
423 |         fputs("info: calling traverseAccessibilityTree (after action)...\n", stderr)
424 |         let afterTraversal = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements)
425 |         fputs("info: traversal (after action) completed.\n", stderr)
426 | 
427 |         // Step 5: Calculate Diff
428 |         fputs("info: calculating traversal diff...\n", stderr)
429 |         let diff = calculateDiff(beforeElements: beforeTraversal.elements, afterElements: afterTraversal.elements)
430 |         fputs("info: diff calculation completed.\n", stderr)
431 | 
432 |         // Step 6: Prepare and Return Result
433 |         let result = ActionDiffResult(
434 |             afterAction: afterTraversal,
435 |             diff: diff
436 |         )
437 |          fputs("info: combined action 'writeTextWithDiff' finished.\n", stderr)
438 |         return result
439 |     }
440 | 
441 |      // Add similar '...WithDiff' functions for doubleClick, rightClick, etc. as needed
442 | 
443 | 
444 |     // --- NEW: Combined Actions with Action Visualization AND Traversal Highlighting ---
445 | 
446 |     /// Performs a left click with visual feedback, bracketed by traversals (before action, after action),
447 |     /// highlights the elements from the second traversal, and returns the diff.
448 |     ///
449 |     /// - Parameters:
450 |     ///   - point: The `CGPoint` where the click should occur.
451 |     ///   - pid: The Process ID (PID) of the application.
452 |     ///   - onlyVisibleElements: If true, traversals only collect elements with valid position/size. Default false.
453 |     ///   - actionHighlightDuration: Duration (seconds) for the click's visual feedback pulse. Default 0.5s.
454 |     ///   - traversalHighlightDuration: Duration (seconds) for highlighting elements found in the second traversal. Default 3.0s.
455 |     ///   - delayAfterActionNano: Nanoseconds to wait after the click before the second traversal. Default 100ms.
456 |     /// - Returns: An `ActionDiffResult` containing the second traversal's data and the diff.
457 |     /// - Throws: `MacosUseSDKError` if any step fails.
458 |     @MainActor
459 |     public static func clickWithActionAndTraversalHighlight(
460 |         point: CGPoint,
461 |         pid: Int32,
462 |         onlyVisibleElements: Bool = false,
463 |         actionHighlightDuration: Double = 0.5, // Duration for the click pulse
464 |         traversalHighlightDuration: Double = 3.0, // Duration for highlighting elements
465 |         delayAfterActionNano: UInt64 = 100_000_000 // 100 ms default
466 |     ) async throws -> ActionDiffResult {
467 |         fputs("info: starting combined action 'clickWithActionAndTraversalHighlight' at (\(point.x), \(point.y)) for PID \(pid)\n", stderr)
468 | 
469 |         // Step 1: Traverse Before Action
470 |         fputs("info: calling traverseAccessibilityTree (before action)...\n", stderr)
471 |         let beforeTraversal = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements)
472 |         fputs("info: traversal (before action) completed.\n", stderr)
473 | 
474 |         // Step 2a: Perform the Click (Input Simulation Only)
475 |         fputs("info: calling clickMouse...\n", stderr)
476 |         try MacosUseSDK.clickMouse(at: point)
477 |         fputs("info: clickMouse completed successfully.\n", stderr)
478 | 
479 |         // Step 2b: Dispatch Click Visualization
480 |         fputs("info: dispatching showVisualFeedback for click (duration: \(actionHighlightDuration)s)...\n", stderr)
481 |         // Use Task to ensure it runs on MainActor, respecting showVisualFeedback's requirement
482 |         Task { @MainActor in
483 |             MacosUseSDK.showVisualFeedback(at: point, type: .circle, duration: actionHighlightDuration)
484 |         }
485 |         fputs("info: showVisualFeedback for click dispatched.\n", stderr)
486 | 
487 | 
488 |         // Step 3: Wait for UI to Update (after action, before second traversal)
489 |         fputs("info: waiting \(Double(delayAfterActionNano) / 1_000_000_000.0) seconds after action...\n", stderr)
490 |         try await Task.sleep(nanoseconds: delayAfterActionNano)
491 | 
492 |         // Step 4: Traverse After Action (Standard Traversal)
493 |         fputs("info: calling traverseAccessibilityTree (after action)...\n", stderr)
494 |         let afterTraversal = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements)
495 |         fputs("info: traversal (after action) completed.\n", stderr)
496 | 
497 |         // Step 5: Calculate Diff using data from the two traversals
498 |         fputs("info: calculating traversal diff...\n", stderr)
499 |         let diff = calculateDiff(beforeElements: beforeTraversal.elements, afterElements: afterTraversal.elements)
500 |         fputs("info: diff calculation completed.\n", stderr)
501 | 
502 |         // Step 6: Dispatch Highlighting of the "After" Elements
503 |         fputs("info: calling drawHighlightBoxes (duration: \(traversalHighlightDuration)s) for afterTraversal elements...\n", stderr)
504 |         // This call returns immediately after dispatching the UI work.
505 |         // It uses the @MainActor function drawHighlightBoxes.
506 |         drawHighlightBoxes(for: afterTraversal.elements, duration: traversalHighlightDuration)
507 |         fputs("info: drawHighlightBoxes dispatched highlight drawing.\n", stderr)
508 | 
509 |         // Step 7: Prepare and Return Result (using data from the *second* traversal)
510 |         let result = ActionDiffResult(
511 |             afterAction: afterTraversal, // Contains data from the second traversal
512 |             diff: diff
513 |         )
514 |         fputs("info: combined action 'clickWithActionAndTraversalHighlight' finished returning result.\n", stderr)
515 |         // IMPORTANT: Highlighting cleanup happens asynchronously later.
516 |         return result
517 |     }
518 | 
519 | 
520 |     /// Presses a key with visual feedback (caption), bracketed by traversals (before action, after action),
521 |     /// highlights the elements from the second traversal, and returns the diff.
522 |     ///
523 |     /// - Parameters:
524 |     ///   - keyCode: The `CGKeyCode` of the key to press.
525 |     ///   - flags: The modifier flags (`CGEventFlags`).
526 |     ///   - pid: The Process ID (PID) of the application.
527 |     ///   - onlyVisibleElements: If true, traversals only collect elements with valid position/size. Default false.
528 |     ///   - actionHighlightDuration: Duration (seconds) for the key press visual feedback caption. Default 0.8s.
529 |     ///   - traversalHighlightDuration: Duration (seconds) for highlighting elements found in the second traversal. Default 3.0s.
530 |     ///   - delayAfterActionNano: Nanoseconds to wait after the key press before the second traversal. Default 100ms.
531 |     /// - Returns: An `ActionDiffResult` containing the second traversal's data and the diff.
532 |     /// - Throws: `MacosUseSDKError` if any step fails.
533 |     @MainActor
534 |     public static func pressKeyWithActionAndTraversalHighlight(
535 |         keyCode: CGKeyCode,
536 |         flags: CGEventFlags = [],
537 |         pid: Int32,
538 |         onlyVisibleElements: Bool = false,
539 |         actionHighlightDuration: Double = 0.8, // Duration for visualization caption
540 |         traversalHighlightDuration: Double = 3.0, // Duration for highlighting elements
541 |         delayAfterActionNano: UInt64 = 100_000_000 // 100 ms default
542 |     ) async throws -> ActionDiffResult {
543 |          fputs("info: starting combined action 'pressKeyWithActionAndTraversalHighlight' (key: \(keyCode), flags: \(flags.rawValue)) for PID \(pid)\n", stderr)
544 | 
545 |         // Step 1: Traverse Before Action
546 |         fputs("info: calling traverseAccessibilityTree (before action)...\n", stderr)
547 |         let beforeTraversal = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements)
548 |         fputs("info: traversal (before action) completed.\n", stderr)
549 | 
550 |         // Step 2a: Perform the Key Press (Input Simulation Only)
551 |         fputs("info: calling pressKey (key: \(keyCode), flags: \(flags.rawValue))...\n", stderr)
552 |         try MacosUseSDK.pressKey(keyCode: keyCode, flags: flags)
553 |         fputs("info: pressKey completed successfully.\n", stderr)
554 | 
555 |         // Step 2b: Dispatch Key Press Visualization (Caption)
556 |         let captionText = "[KEY PRESS]"
557 |         let captionSize = CGSize(width: 250, height: 80) // Keep caption size definition here or centralize
558 |         fputs("info: dispatching showVisualFeedback for key press (duration: \(actionHighlightDuration)s)...\n", stderr)
559 |         Task { @MainActor in
560 |             // Use the internal top-level function directly
561 |             if let screenCenter = getMainScreenCenter() {
562 |                 MacosUseSDK.showVisualFeedback(
563 |                     at: screenCenter,
564 |                     type: .caption(text: captionText),
565 |                     size: captionSize,
566 |                     duration: actionHighlightDuration
567 |                 )
568 |             } else {
569 |                  fputs("warning: [\(#function)] could not get screen center for key press caption.\n", stderr)
570 |             }
571 |         }
572 |         fputs("info: showVisualFeedback for key press dispatched.\n", stderr)
573 | 
574 | 
575 |         // Step 3: Wait for UI to Update
576 |         fputs("info: waiting \(Double(delayAfterActionNano) / 1_000_000_000.0) seconds after action...\n", stderr)
577 |         try await Task.sleep(nanoseconds: delayAfterActionNano)
578 | 
579 |         // Step 4: Traverse After Action
580 |         fputs("info: calling traverseAccessibilityTree (after action)...\n", stderr)
581 |         let afterTraversal = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements)
582 |         fputs("info: traversal (after action) completed.\n", stderr)
583 | 
584 |         // Step 5: Calculate Diff
585 |         fputs("info: calculating traversal diff...\n", stderr)
586 |         let diff = calculateDiff(beforeElements: beforeTraversal.elements, afterElements: afterTraversal.elements)
587 |         fputs("info: diff calculation completed.\n", stderr)
588 | 
589 |         // Step 6: Dispatch Highlighting of the "After" Elements
590 |         fputs("info: calling drawHighlightBoxes (duration: \(traversalHighlightDuration)s) for afterTraversal elements...\n", stderr)
591 |         drawHighlightBoxes(for: afterTraversal.elements, duration: traversalHighlightDuration)
592 |         fputs("info: drawHighlightBoxes dispatched highlight drawing.\n", stderr)
593 | 
594 | 
595 |         // Step 7: Prepare and Return Result
596 |         let result = ActionDiffResult(
597 |             afterAction: afterTraversal,
598 |             diff: diff
599 |         )
600 |          fputs("info: combined action 'pressKeyWithActionAndTraversalHighlight' finished returning result.\n", stderr)
601 |          // IMPORTANT: Highlighting cleanup happens asynchronously later.
602 |         return result
603 |     }
604 | 
605 |     /// Types text with visual feedback (caption), bracketed by traversals (before action, after action),
606 |     /// highlights the elements from the second traversal, and returns the diff.
607 |     ///
608 |     /// - Parameters:
609 |     ///   - text: The `String` to type.
610 |     ///   - pid: The Process ID (PID) of the application.
611 |     ///   - onlyVisibleElements: If true, traversals only collect elements with valid position/size. Default false.
612 |     ///   - actionHighlightDuration: Duration (seconds) for the text input visual feedback caption. Default calculated or 1.0s.
613 |     ///   - traversalHighlightDuration: Duration (seconds) for highlighting elements found in the second traversal. Default 3.0s.
614 |     ///   - delayAfterActionNano: Nanoseconds to wait after typing before the second traversal. Default 100ms.
615 |     /// - Returns: An `ActionDiffResult` containing the second traversal's data and the diff.
616 |     /// - Throws: `MacosUseSDKError` if any step fails.
617 |     @MainActor
618 |     public static func writeTextWithActionAndTraversalHighlight(
619 |         text: String,
620 |         pid: Int32,
621 |         onlyVisibleElements: Bool = false,
622 |         actionHighlightDuration: Double? = nil, // Duration for visualization caption (optional, calculated if nil)
623 |         traversalHighlightDuration: Double = 3.0, // Duration for highlighting elements
624 |         delayAfterActionNano: UInt64 = 100_000_000 // 100 ms default
625 |     ) async throws -> ActionDiffResult {
626 |          fputs("info: starting combined action 'writeTextWithActionAndTraversalHighlight' (text: \"\(text)\") for PID \(pid)\n", stderr)
627 | 
628 |         // Step 1: Traverse Before Action
629 |         fputs("info: calling traverseAccessibilityTree (before action)...\n", stderr)
630 |         let beforeTraversal = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements)
631 |         fputs("info: traversal (before action) completed.\n", stderr)
632 | 
633 |         // Step 2a: Perform the Text Writing (Input Simulation Only)
634 |         fputs("info: calling writeText (\"\(text)\")...\n", stderr)
635 |         try MacosUseSDK.writeText(text)
636 |         fputs("info: writeText completed successfully.\n", stderr)
637 | 
638 |         // Step 2b: Dispatch Text Writing Visualization (Caption)
639 |         let defaultDuration = 1.0
640 |         let calculatedDuration = max(defaultDuration, 0.5 + Double(text.count) * 0.05)
641 |         let finalDuration = actionHighlightDuration ?? calculatedDuration // Use provided or calculated duration
642 |         let captionSize = CGSize(width: 450, height: 100) // Keep caption size definition here or centralize
643 |         fputs("info: dispatching showVisualFeedback for write text (duration: \(finalDuration)s)...\n", stderr)
644 |         Task { @MainActor in
645 |              // Use the internal top-level function directly
646 |              if let screenCenter = getMainScreenCenter() {
647 |                  MacosUseSDK.showVisualFeedback(
648 |                      at: screenCenter,
649 |                      type: .caption(text: text), // Show the actual typed text
650 |                      size: captionSize,
651 |                      duration: finalDuration
652 |                  )
653 |              } else {
654 |                  fputs("warning: [\(#function)] could not get screen center for write text caption.\n", stderr)
655 |              }
656 |         }
657 |         fputs("info: showVisualFeedback for write text dispatched.\n", stderr)
658 | 
659 | 
660 |         // Step 3: Wait for UI to Update
661 |         fputs("info: waiting \(Double(delayAfterActionNano) / 1_000_000_000.0) seconds after action...\n", stderr)
662 |         try await Task.sleep(nanoseconds: delayAfterActionNano)
663 | 
664 |         // Step 4: Traverse After Action
665 |         fputs("info: calling traverseAccessibilityTree (after action)...\n", stderr)
666 |         let afterTraversal = try MacosUseSDK.traverseAccessibilityTree(pid: pid, onlyVisibleElements: onlyVisibleElements)
667 |         fputs("info: traversal (after action) completed.\n", stderr)
668 | 
669 |         // Step 5: Calculate Diff
670 |         fputs("info: calculating traversal diff...\n", stderr)
671 |         let diff = calculateDiff(beforeElements: beforeTraversal.elements, afterElements: afterTraversal.elements)
672 |         fputs("info: diff calculation completed.\n", stderr)
673 | 
674 |         // Step 6: Dispatch Highlighting of the "After" Elements
675 |         fputs("info: calling drawHighlightBoxes (duration: \(traversalHighlightDuration)s) for afterTraversal elements...\n", stderr)
676 |         drawHighlightBoxes(for: afterTraversal.elements, duration: traversalHighlightDuration)
677 |         fputs("info: drawHighlightBoxes dispatched highlight drawing.\n", stderr)
678 | 
679 |         // Step 7: Prepare and Return Result
680 |         let result = ActionDiffResult(
681 |             afterAction: afterTraversal,
682 |             diff: diff
683 |         )
684 |          fputs("info: combined action 'writeTextWithActionAndTraversalHighlight' finished returning result.\n", stderr)
685 |          // IMPORTANT: Highlighting cleanup happens asynchronously later.
686 |         return result
687 |     }
688 | 
689 | }
690 | 


--------------------------------------------------------------------------------
/Sources/MacosUseSDK/DrawVisuals.swift:
--------------------------------------------------------------------------------
  1 | // REMOVED: #!/usr/bin/env swift
  2 | // REMOVED: import Cocoa
  3 | import AppKit
  4 | import Foundation
  5 | 
  6 | // Define types of visual feedback
  7 | public enum FeedbackType {
  8 |     case box(text: String) // Existing box with optional text
  9 |     case circle           // New simple circle
 10 |     case caption(text: String) // New type for large screen-center text
 11 | }
 12 | 
 13 | // Define a custom view that draws the rectangle and text with truncation
 14 | internal class OverlayView: NSView {
 15 |     var feedbackType: FeedbackType = .box(text: "") // Property to hold the type and data
 16 | 
 17 |     // Constants for drawing
 18 |     let padding: CGFloat = 10 // Increased padding for caption
 19 |     let frameLineWidth: CGFloat = 2
 20 |     let circleRadius: CGFloat = 15 // Radius for the circle feedback
 21 |     let captionFontSize: CGFloat = 36 // Font size for caption
 22 |     let captionBackgroundColor = NSColor.black.withAlphaComponent(0.6) // Semi-transparent black background
 23 |     let captionTextColor = NSColor.white
 24 | 
 25 |     override func draw(_ dirtyRect: NSRect) {
 26 |         super.draw(dirtyRect)
 27 | 
 28 |         switch feedbackType {
 29 |         case .box(let displayText):
 30 |             drawBox(with: displayText)
 31 |         case .circle:
 32 |             drawCircle()
 33 |         case .caption(let captionText):
 34 |             drawCaption(with: captionText) // Call the new drawing method
 35 |         }
 36 |     }
 37 | 
 38 |     private func drawCircle() {
 39 |         // fputs("debug: OverlayView drawing circle\n", stderr)
 40 |         // fputs("debug: Setting circle fill color to green.\n", stderr) // Updated log message
 41 |         NSColor.green.setFill() // Set fill color instead of stroke
 42 | 
 43 |         let center = NSPoint(x: bounds.midX, y: bounds.midY)
 44 |         // Ensure the circle fits within the bounds if bounds are smaller than diameter
 45 |         let effectiveRadius = min(circleRadius, bounds.width / 2.0, bounds.height / 2.0)
 46 |         guard effectiveRadius > 0 else { return } // Don't draw if too small
 47 | 
 48 |         let circleRect = NSRect(x: center.x - effectiveRadius, y: center.y - effectiveRadius,
 49 |                                 width: effectiveRadius * 2, height: effectiveRadius * 2)
 50 |         let path = NSBezierPath(ovalIn: circleRect)
 51 |         // path.lineWidth = frameLineWidth // No longer needed for fill
 52 |         path.fill() // Fill the path instead of stroking it
 53 |     }
 54 | 
 55 |     private func drawBox(with displayText: String) {
 56 |         // --- Frame Drawing ---
 57 |         NSColor.red.setStroke()
 58 |         let frameInset = frameLineWidth / 2.0
 59 |         let frameRect = bounds.insetBy(dx: frameInset, dy: frameInset)
 60 |         let path = NSBezierPath(rect: frameRect)
 61 |         path.lineWidth = frameLineWidth
 62 |         path.stroke()
 63 |         // fputs("debug: OverlayView drew frame at \(frameRect)\n", stderr)
 64 | 
 65 |         // --- Text Drawing with Truncation ---
 66 |         if !displayText.isEmpty {
 67 |             // Define text attributes
 68 |             let textColor = NSColor.red
 69 |             // Slightly smaller font for potentially many overlays
 70 |             let textFont = NSFont.systemFont(ofSize: 10.0) // NSFont.smallSystemFontSize)
 71 |             let textAttributes: [NSAttributedString.Key: Any] = [
 72 |                 .font: textFont,
 73 |                 .foregroundColor: textColor
 74 |             ]
 75 | 
 76 |             // Calculate available width for text (bounds - frame lines - padding on both sides)
 77 |             let availableWidth = max(0, bounds.width - (frameLineWidth * 2.0) - (padding * 2.0))
 78 |             var stringToDraw = displayText
 79 |             var textSize = stringToDraw.size(withAttributes: textAttributes)
 80 | 
 81 |             // Check if truncation is needed
 82 |             if textSize.width > availableWidth && availableWidth > 0 {
 83 |                  // fputs("debug: OverlayView truncating text '\(stringToDraw)' (\(textSize.width)) > available \(availableWidth)\n", stderr)
 84 |                  let ellipsis = "…" // Use ellipsis character
 85 |                  let ellipsisSize = ellipsis.size(withAttributes: textAttributes)
 86 | 
 87 |                  // Keep removing characters until text + ellipsis fits
 88 |                  while !stringToDraw.isEmpty && (stringToDraw.size(withAttributes: textAttributes).width + ellipsisSize.width > availableWidth) {
 89 |                      stringToDraw.removeLast()
 90 |                  }
 91 |                  stringToDraw += ellipsis
 92 |                  textSize = stringToDraw.size(withAttributes: textAttributes) // Recalculate size
 93 |                  // fputs("debug: OverlayView truncated to '\(stringToDraw)' (\(textSize.width))\n", stderr)
 94 |             }
 95 | 
 96 |             // Ensure text doesn't exceed available height (though less likely for small font)
 97 |             let availableHeight = max(0, bounds.height - (frameLineWidth * 2.0) - (padding * 2.0))
 98 |              if textSize.height > availableHeight {
 99 |                  // fputs("debug: OverlayView text height (\(textSize.height)) > available \(availableHeight)\n", stderr)
100 |                  // Simple vertical clipping will occur naturally if too tall
101 |              }
102 | 
103 |             // Calculate position to center the (potentially truncated) text
104 |             // X: Add frame line width + padding
105 |             // Y: Center vertically within the available height area
106 |             let textX = frameLineWidth + padding
107 |             let textY = frameLineWidth + padding + (availableHeight - textSize.height) // Top align
108 |             let textPoint = NSPoint(x: textX, y: textY)
109 | 
110 |             // Draw the text string
111 |             // fputs("debug: OverlayView drawing text '\(stringToDraw)' at \(textPoint)\n", stderr)
112 |             (stringToDraw as NSString).draw(at: textPoint, withAttributes: textAttributes)
113 |         } else {
114 |              // fputs("debug: OverlayView no text to draw.\n", stderr)
115 |         }
116 |     }
117 | 
118 |     // New method to draw the caption
119 |     private func drawCaption(with text: String) {
120 |         fputs("debug: OverlayView drawing caption: '\(text)'\n", stderr)
121 | 
122 |         // Draw background
123 |         captionBackgroundColor.setFill()
124 |         let backgroundRect = bounds.insetBy(dx: frameLineWidth / 2.0, dy: frameLineWidth / 2.0) // Adjust for potential border line width if we add one later
125 |         let backgroundPath = NSBezierPath(roundedRect: backgroundRect, xRadius: 8, yRadius: 8) // Rounded corners
126 |         backgroundPath.fill()
127 | 
128 |         // --- Text Drawing ---
129 |         if !text.isEmpty {
130 |             // Define text attributes
131 |             let textFont = NSFont.systemFont(ofSize: captionFontSize, weight: .medium)
132 |             let paragraphStyle = NSMutableParagraphStyle()
133 |             paragraphStyle.alignment = .center // Center align text
134 | 
135 |             let textAttributes: [NSAttributedString.Key: Any] = [
136 |                 .font: textFont,
137 |                 .foregroundColor: captionTextColor,
138 |                 .paragraphStyle: paragraphStyle
139 |             ]
140 | 
141 |             // Calculate available area for text (bounds - padding)
142 |             let availableRect = bounds.insetBy(dx: padding, dy: padding)
143 |             let stringToDraw = text
144 |             let textSize = stringToDraw.size(withAttributes: textAttributes)
145 | 
146 |             // Basic truncation if text wider than available space (though less likely for centered captions)
147 |              if textSize.width > availableRect.width && availableRect.width > 0 {
148 |                  fputs("warning: Caption text '\(stringToDraw)' (\(textSize.width)) wider than available \(availableRect.width), may clip.\n", stderr)
149 |                  // Simple clipping will occur, could implement more complex truncation if needed
150 |              }
151 |              if textSize.height > availableRect.height {
152 |                   fputs("warning: Caption text '\(stringToDraw)' (\(textSize.height)) taller than available \(availableRect.height), may clip.\n", stderr)
153 |              }
154 | 
155 |             // Calculate position to center the text vertically and horizontally within the available rect
156 |              let textX = availableRect.origin.x
157 |              let textY = availableRect.origin.y + (availableRect.height - textSize.height) / 2.0 // Center vertically
158 |              let textRect = NSRect(x: textX, y: textY, width: availableRect.width, height: textSize.height)
159 | 
160 | 
161 |             // Draw the text string centered
162 |             fputs("debug: OverlayView drawing caption text '\(stringToDraw)' in rect \(textRect)\n", stderr)
163 |             (stringToDraw as NSString).draw(in: textRect, withAttributes: textAttributes)
164 |         } else {
165 |              fputs("debug: OverlayView no caption text to draw.\n", stderr)
166 |         }
167 |     }
168 | 
169 |     // Update initializer to accept FeedbackType
170 |     init(frame frameRect: NSRect, type: FeedbackType) {
171 |         self.feedbackType = type
172 |         super.init(frame: frameRect)
173 |         // fputs("debug: OverlayView initialized with frame \(frameRect) type \(type)\n", stderr)
174 |     }
175 | 
176 |     required init?(coder: NSCoder) {
177 |         fatalError("init(coder:) has not been implemented")
178 |     }
179 | }
180 | 
181 | // --- REMOVED AppDelegate Class Definition ---
182 | 
183 | // --- REMOVED Top-Level Application Entry Point Code (app creation, delegate, argument parsing, app.run) ---
184 | 
185 | 
186 | // --- Internal Window Creation Helper ---
187 | // Creates a configured, borderless overlay window but does not show it.
188 | // ADDED: @MainActor annotation to ensure UI operations run on the main thread
189 | @MainActor
190 | internal func createOverlayWindow(frame: NSRect, type: FeedbackType) -> NSWindow {
191 |     fputs("debug: Creating overlay window with frame: \(frame), type: \(type)\n", stderr) // Log includes type now
192 |     // Now safe to call NSWindow initializer and set properties from here
193 |     let window = NSWindow(
194 |         contentRect: frame,
195 |         styleMask: [.borderless],
196 |         backing: .buffered,
197 |         defer: false
198 |     )
199 | 
200 |     // Configuration for transparent, floating overlay
201 |     window.isOpaque = false
202 |     // Make background clear ONLY if not a caption (caption view draws its own background)
203 |     if case .caption = type {
204 |         window.backgroundColor = .clear // View draws background
205 |     } else {
206 |         window.backgroundColor = .clear // Original behavior
207 |     }
208 |     window.hasShadow = false        // No window shadow
209 |     window.level = .floating        // Keep above normal windows
210 |     window.collectionBehavior = [.canJoinAllSpaces, .stationary, .ignoresCycle] // Visible on all spaces
211 |     window.isMovableByWindowBackground = false // Prevent accidental dragging
212 | 
213 |     // Create and set the custom view
214 |     let overlayFrame = window.contentView?.bounds ?? NSRect(origin: .zero, size: frame.size)
215 |     let overlayView = OverlayView(frame: overlayFrame, type: type)
216 |     window.contentView = overlayView
217 |     // fputs("debug: Set OverlayView with frame \(overlayFrame) for window.\n", stderr)
218 | 
219 |     return window
220 | }
221 | 
222 | // --- Helper Function to Get Main Screen Center (Moved from HighlightInput.swift) ---
223 | /// Gets the center point of the main screen.
224 | /// - Returns: CGPoint of the center in screen coordinates, or nil if main screen not found.
225 | public func getMainScreenCenter() -> CGPoint? {
226 |     guard let mainScreen = NSScreen.main else {
227 |         fputs("error: could not get main screen.\n", stderr)
228 |         return nil
229 |     }
230 |     let screenRect = mainScreen.frame
231 |     let centerX = screenRect.midX
232 |     // AppKit coordinates (bottom-left origin) are used by NSWindow positioning.
233 |     // screenRect.midY correctly gives the vertical center in this coordinate system.
234 |     let centerY = screenRect.midY
235 |     let centerPoint = CGPoint(x: centerX, y: centerY)
236 |     // fputs("debug: calculated main screen center: \(centerPoint) from rect \(screenRect)\n", stderr)
237 |     return centerPoint
238 | }
239 | 
240 | // --- Public API Function for Simple Visual Feedback ---
241 | /// Displays a temporary visual indicator (e.g., a circle, a caption) at specified screen coordinates.
242 | /// This version includes a pulsing/fading animation for circles. Captions simply appear and disappear.
243 | /// - Parameters:
244 | ///   - point: The center point (`CGPoint`) in screen coordinates for the visual feedback. For captions, this is usually the screen center.
245 | ///   - type: The type of feedback to display (`FeedbackType`).
246 | ///   - size: The desired size (width/height) of the overlay window. Defaults work for circle, consider larger for captions. **NOTE: For `.circle`, this parameter is now ignored and a size is calculated based on animation.**
247 | ///   - duration: How long the feedback should remain visible, in seconds.
248 | @MainActor // Ensure this runs on the main thread
249 | public func showVisualFeedback(at point: CGPoint, type: FeedbackType, size: CGSize = CGSize(width: 30, height: 30), duration: Double = 0.5) {
250 |     // Requires main thread for UI work
251 |     guard Thread.isMainThread else {
252 |         fputs("warning: showVisualFeedback called off main thread, dispatching. Point: \(point), Type: \(type)\n", stderr)
253 |         DispatchQueue.main.async {
254 |             showVisualFeedback(at: point, type: type, size: size, duration: duration)
255 |         }
256 |         return
257 |     }
258 | 
259 |     // --- Calculate Required Size ---
260 |     var effectiveSize: CGSize
261 |     let maxCircleScale: CGFloat = 1.8 // The maximum scale factor from the animation
262 |     let circleRadius: CGFloat = 15.0 // The base radius defined in OverlayView
263 | 
264 |     if case .circle = type {
265 |         // Calculate the needed diameter at max scale and add more padding
266 |         let maxDiameter = circleRadius * 2.0 * maxCircleScale
267 |         // Increased padding from 4.0 to 10.0
268 |         let paddedSize = ceil(maxDiameter + 100.0) // Add padding (e.g., 5 points on each side)
269 |         effectiveSize = CGSize(width: paddedSize, height: paddedSize)
270 |         fputs("info: showVisualFeedback using calculated size \(effectiveSize) for .circle type (ignores input size \(size)).\n", stderr)
271 |     } else {
272 |         // Use provided or default size for other types (box, caption)
273 |         effectiveSize = size
274 |         fputs("info: showVisualFeedback called for point \(point), type \(type), size \(effectiveSize), duration \(duration)s.\n", stderr)
275 |     }
276 | 
277 | 
278 |     // --- Coordinate Conversion (Using AppKit bottom-left origin) ---
279 |     // Screen height is needed to convert the Y coordinate.
280 |     let screenHeight = NSScreen.main?.frame.height ?? 0
281 |     if screenHeight == 0 {
282 |         fputs("warning: Could not get main screen height, coordinates might be incorrect.\n", stderr)
283 |     }
284 |     // Calculate origin based on the center point provided and the *effective* size
285 |     let originX = point.x - (effectiveSize.width / 2.0)
286 |     let originY = screenHeight - point.y - (effectiveSize.height / 2.0) // Convert Y from top-left to bottom-left
287 |     let frame = NSRect(x: originX, y: originY, width: effectiveSize.width, height: effectiveSize.height)
288 |     fputs("debug: Creating feedback window with AppKit frame: \(frame)\n", stderr)
289 | 
290 |     // --- Create Window ---
291 |     // Pass the calculated effectiveSize and frame to createOverlayWindow
292 |     let window = createOverlayWindow(frame: frame, type: type)
293 | 
294 |     // --- Make Window Visible ---
295 |     window.makeKeyAndOrderFront(nil)
296 | 
297 |     // --- Apply Animation (Only for Circle or Caption Type) ---
298 |     if let overlayView = window.contentView as? OverlayView {
299 |         overlayView.wantsLayer = true // Ensure the view has a layer for animation
300 | 
301 |         if case .circle = type {
302 |             fputs("debug: Applying pulse/fade animation to circle overlay layer.\n", stderr)
303 |             // --- Circle Pulse/Fade Animation ---
304 |             let scaleAnimation = CABasicAnimation(keyPath: "transform.scale")
305 |             scaleAnimation.fromValue = 0.7
306 |             scaleAnimation.toValue = 1.8
307 |             scaleAnimation.duration = duration
308 | 
309 |             let opacityAnimation = CABasicAnimation(keyPath: "opacity")
310 |             opacityAnimation.fromValue = 0.8
311 |             opacityAnimation.toValue = 0.0
312 |             opacityAnimation.duration = duration
313 | 
314 |             let animationGroup = CAAnimationGroup()
315 |             animationGroup.animations = [scaleAnimation, opacityAnimation]
316 |             animationGroup.duration = duration
317 |             animationGroup.timingFunction = CAMediaTimingFunction(name: .easeOut)
318 |             animationGroup.fillMode = .forwards
319 |             animationGroup.isRemovedOnCompletion = false
320 |             overlayView.layer?.add(animationGroup, forKey: "pulseFadeEffect")
321 | 
322 |         } else if case .caption = type {
323 |              fputs("debug: Applying entrance and fade-out animations to caption overlay layer.\n", stderr)
324 | 
325 |              // --- Caption Entrance Animation (Scale Up & Fade In) ---
326 |              let entranceDuration = 0.2 // Duration for the entrance effect
327 |              let scaleInAnimation = CABasicAnimation(keyPath: "transform.scale")
328 |              scaleInAnimation.fromValue = 0.7 // Start slightly smaller
329 |              scaleInAnimation.toValue = 1.0   // Scale to normal size
330 |              scaleInAnimation.duration = entranceDuration
331 | 
332 |              let fadeInAnimation = CABasicAnimation(keyPath: "opacity")
333 |              fadeInAnimation.fromValue = 0.0 // Start fully transparent
334 |              fadeInAnimation.toValue = 1.0   // Fade to fully opaque
335 |              fadeInAnimation.duration = entranceDuration
336 | 
337 |              let entranceGroup = CAAnimationGroup()
338 |              entranceGroup.animations = [scaleInAnimation, fadeInAnimation]
339 |              entranceGroup.duration = entranceDuration
340 |              entranceGroup.timingFunction = CAMediaTimingFunction(name: .easeOut)
341 |              // `fillMode = .backwards` ensures the initial state (small, transparent) is applied *before* the animation starts
342 |              entranceGroup.fillMode = .backwards
343 |              // `isRemovedOnCompletion = true` (default) is fine here, we want the layer's normal state after entrance.
344 |              overlayView.layer?.add(entranceGroup, forKey: "captionEntranceEffect")
345 | 
346 | 
347 |              // --- Caption Fade-Out Animation (Starts near the end) ---
348 |              let fadeOutDuration = 0.3 // Duration of the fade-out
349 |              // Ensure fade-out doesn't start before entrance completes if total duration is very short
350 |              let fadeOutStartTime = max(entranceDuration, duration - fadeOutDuration)
351 | 
352 |              let fadeOutAnimation = CABasicAnimation(keyPath: "opacity")
353 |              fadeOutAnimation.fromValue = 1.0 // Start opaque
354 |              fadeOutAnimation.toValue = 0.0   // Fade to transparent
355 |              fadeOutAnimation.duration = fadeOutDuration
356 |              // Use CACurrentMediaTime() + delay to schedule the start
357 |              fadeOutAnimation.beginTime = CACurrentMediaTime() + fadeOutStartTime
358 |              fadeOutAnimation.fillMode = .forwards // Keep final state (transparent)
359 |              fadeOutAnimation.isRemovedOnCompletion = false // Don't remove until window closes
360 |              overlayView.layer?.add(fadeOutAnimation, forKey: "captionFadeOut")
361 | 
362 |         } else {
363 |             // Log if a type is added that doesn't have specific animation handling
364 |             fputs("debug: Animation skipped (unhandled FeedbackType or view issue).\n", stderr)
365 |         }
366 |     } else {
367 |         // Log if contentView isn't the expected OverlayView or is nil
368 |          fputs("warning: Could not get OverlayView from window content for animation.\n", stderr)
369 |     }
370 | 
371 |     fputs("debug: Visual feedback window displayed. It will remain until the tool exits.\n", stderr)
372 | }
373 | 
374 | // --- NEW Public API Function for Drawing Highlight Boxes ---
375 | /// Draws temporary overlay windows (highlight boxes) around the specified accessibility elements.
376 | ///
377 | /// The overlays automatically disappear after the specified duration.
378 | /// This function *only* draws; it does not perform accessibility traversal.
379 | /// Call `traverseAccessibilityTree` first to get the `ElementData`.
380 | ///
381 | /// - Important: This function schedules UI work on the main dispatch queue.
382 | ///              It should be called from a context where the main run loop is active.
383 | ///              The function itself returns immediately; the overlays appear and disappear asynchronously.
384 | ///
385 | /// - Parameter elementsToHighlight: An array of `ElementData` representing the elements to highlight.
386 | ///                                 Only elements with valid geometry (x, y, width > 0, height > 0) will be highlighted.
387 | /// - Parameter duration: The time in seconds for which the overlay windows should be visible. Defaults to 3.0 seconds.
388 | @MainActor // Ensure UI work happens on the main thread
389 | public func drawHighlightBoxes(for elementsToHighlightInput: [ElementData], duration: Double = 3.0) {
390 |     fputs("info: drawHighlightBoxes called for \(elementsToHighlightInput.count) elements, duration \(duration)s.\n", stderr)
391 | 
392 |     // 1. Filter elements that have geometry needed for highlighting
393 |     //    (Moved filtering here from the old highlightVisibleElements)
394 |     let elementsToHighlight = elementsToHighlightInput.filter {
395 |         $0.x != nil && $0.y != nil &&
396 |         $0.width != nil && $0.width! > 0 &&
397 |         $0.height != nil && $0.height! > 0
398 |     }
399 | 
400 |     // 2. Check if there's anything to highlight
401 |     if elementsToHighlight.isEmpty {
402 |         fputs("info: No elements with valid geometry provided to highlight.\n", stderr)
403 |         return // Nothing to do
404 |     }
405 | 
406 |     fputs("info: Filtered down to \(elementsToHighlight.count) elements with valid geometry to highlight.\n", stderr)
407 | 
408 |     // 3. Dispatch UI work to the main thread asynchronously
409 |     DispatchQueue.main.async { // This block executes on the main actor
410 |         var overlayWindows: [NSWindow] = []
411 | 
412 |         fputs("info: [Main Thread] Creating \(elementsToHighlight.count) overlay windows...\n", stderr)
413 | 
414 |         let screenHeight = NSScreen.main?.frame.height ?? 0
415 |         if screenHeight == 0 {
416 |              fputs("warning: [Main Thread] Could not get main screen height, coordinates might be incorrect.\n", stderr)
417 |         } else {
418 |             fputs("debug: [Main Thread] Main screen height for coordinate conversion: \(screenHeight)\n", stderr)
419 |         }
420 | 
421 |         for element in elementsToHighlight {
422 |             let originalX = element.x!
423 |             let originalY = element.y!
424 |             let elementWidth = element.width!
425 |             let elementHeight = element.height!
426 |             let convertedY = screenHeight - originalY - elementHeight
427 |             let frame = NSRect(x: originalX, y: convertedY, width: elementWidth, height: elementHeight)
428 |             let textToShow = (element.text?.isEmpty ?? true) ? element.role : element.text!
429 |             let feedbackType: FeedbackType = .box(text: textToShow)
430 | 
431 |             // Use the @MainActor function safely within this async block
432 |             let window = createOverlayWindow(frame: frame, type: feedbackType)
433 |             overlayWindows.append(window)
434 |             window.makeKeyAndOrderFront(nil)
435 |         }
436 | 
437 |         fputs("info: [Main Thread] Displayed \(overlayWindows.count) overlays. They will remain until the tool exits.\n", stderr)
438 | 
439 |     } // End of DispatchQueue.main.async block
440 | 
441 |     // 5. Return immediately after dispatching UI work
442 |     fputs("info: drawHighlightBoxes finished synchronous part, dispatched UI updates.\n", stderr)
443 |     // No return value needed
444 | }


--------------------------------------------------------------------------------
/Sources/MacosUseSDK/HighlightInput.swift:
--------------------------------------------------------------------------------
  1 | import Foundation
  2 | import CoreGraphics
  3 | import AppKit // For DispatchQueue, showVisualFeedback
  4 | 
  5 | // --- Public Functions Combining Input Simulation and Visualization ---
  6 | 
  7 | /// Simulates a left mouse click at the specified coordinates and shows visual feedback.
  8 | /// - Parameters:
  9 | ///   - point: The `CGPoint` where the click should occur.
 10 | ///   - duration: How long the visual feedback should last (in seconds). Default is 0.5s.
 11 | /// - Throws: `MacosUseSDKError` if simulation or visualization fails.
 12 | public func clickMouseAndVisualize(at point: CGPoint, duration: Double = 0.5) throws {
 13 |     fputs("log: simulating left click AND visualize at: (\(point.x), \(point.y)), duration: \(duration)s\n", stderr)
 14 |     // Call the original input function
 15 |     try clickMouse(at: point)
 16 | 
 17 |     // Restore the correct async dispatch:
 18 |     DispatchQueue.main.async {
 19 |         Task { @MainActor in
 20 |             // Ensure FeedbackType is used if it's public/internal enum
 21 |             showVisualFeedback(at: point, type: FeedbackType.circle, duration: duration)
 22 |         }
 23 |     }
 24 |     fputs("log: left click simulation and visualization dispatched.\n", stderr)
 25 | }
 26 | 
 27 | /// Simulates a left mouse double click at the specified coordinates and shows visual feedback.
 28 | /// - Parameters:
 29 | ///   - point: The `CGPoint` where the double click should occur.
 30 | ///   - duration: How long the visual feedback should last (in seconds). Default is 0.5s.
 31 | /// - Throws: `MacosUseSDKError` if simulation or visualization fails.
 32 | public func doubleClickMouseAndVisualize(at point: CGPoint, duration: Double = 0.5) throws {
 33 |     fputs("log: simulating double-click AND visualize at: (\(point.x), \(point.y)), duration: \(duration)s\n", stderr)
 34 |     // Call the original input function
 35 |     try doubleClickMouse(at: point)
 36 |     // Schedule visualization on the main thread
 37 |     DispatchQueue.main.async {
 38 |         Task { @MainActor in
 39 |              showVisualFeedback(at: point, type: FeedbackType.circle, duration: duration)
 40 |         }
 41 |     }
 42 |     fputs("log: double-click simulation and visualization dispatched.\n", stderr)
 43 | }
 44 | 
 45 | /// Simulates a right mouse click at the specified coordinates and shows visual feedback.
 46 | /// - Parameters:
 47 | ///   - point: The `CGPoint` where the right click should occur.
 48 | ///   - duration: How long the visual feedback should last (in seconds). Default is 0.5s.
 49 | /// - Throws: `MacosUseSDKError` if simulation or visualization fails.
 50 | public func rightClickMouseAndVisualize(at point: CGPoint, duration: Double = 0.5) throws {
 51 |      fputs("log: simulating right-click AND visualize at: (\(point.x), \(point.y)), duration: \(duration)s\n", stderr)
 52 |      // Call the original input function
 53 |     try rightClickMouse(at: point)
 54 |     // Schedule visualization on the main thread
 55 |     DispatchQueue.main.async {
 56 |         Task { @MainActor in
 57 |             showVisualFeedback(at: point, type: FeedbackType.circle, duration: duration)
 58 |         }
 59 |     }
 60 |      fputs("log: right-click simulation and visualization dispatched.\n", stderr)
 61 | }
 62 | 
 63 | /// Moves the mouse cursor to the specified coordinates and shows brief visual feedback at the destination.
 64 | /// - Parameters:
 65 | ///   - point: The `CGPoint` to move the cursor to.
 66 | ///   - duration: How long the visual feedback should last (in seconds). Default is 0.5s.
 67 | /// - Throws: `MacosUseSDKError` if simulation or visualization fails.
 68 | public func moveMouseAndVisualize(to point: CGPoint, duration: Double = 0.5) throws {
 69 |      fputs("log: moving mouse AND visualize to: (\(point.x), \(point.y)), duration: \(duration)s\n", stderr)
 70 |      // Call the original input function
 71 |     try moveMouse(to: point)
 72 |     // Schedule visualization on the main thread
 73 |     DispatchQueue.main.async {
 74 |          Task { @MainActor in
 75 |             showVisualFeedback(at: point, type: FeedbackType.circle, duration: duration)
 76 |          }
 77 |     }
 78 |      fputs("log: mouse move simulation and visualization dispatched.\n", stderr)
 79 | }
 80 | 
 81 | /// Simulates pressing and releasing a key with optional modifiers. Shows a caption at screen center.
 82 | /// - Parameters:
 83 | ///   - keyCode: The `CGKeyCode` of the key to press.
 84 | ///   - flags: The modifier flags (`CGEventFlags`).
 85 | ///   - duration: How long the visual feedback should last (in seconds). Default is 0.8s.
 86 | /// - Throws: `MacosUseSDKError` if simulation fails.
 87 | public func pressKeyAndVisualize(keyCode: CGKeyCode, flags: CGEventFlags = [], duration: Double = 0.8) throws {
 88 |     // Define caption constants
 89 |     let captionText = "[KEY PRESS]"
 90 |     let captionSize = CGSize(width: 250, height: 80) // Size for the key press caption
 91 | 
 92 |     fputs("log: simulating key press (code: \(keyCode), flags: \(flags.rawValue)) AND visualizing caption '\(captionText)', duration: \(duration)s\n", stderr)
 93 |     // Call the original input function first
 94 |     try pressKey(keyCode: keyCode, flags: flags)
 95 | 
 96 |     // Always dispatch caption visualization to the main thread at screen center
 97 |     DispatchQueue.main.async {
 98 |         Task { @MainActor in
 99 |             // Get screen center for caption placement
100 |             if let screenCenter = getMainScreenCenter() {
101 |                 fputs("log: [Main Thread] Displaying key press caption at screen center: \(screenCenter).\n", stderr)
102 |                 // Show the caption feedback
103 |                 showVisualFeedback(
104 |                     at: screenCenter,
105 |                     type: .caption(text: captionText),
106 |                     size: captionSize,
107 |                     duration: duration
108 |                 )
109 |             } else {
110 |                 fputs("warning: [Main Thread] could not get main screen center for key press caption visualization.\n", stderr)
111 |             }
112 |         }
113 |     }
114 |     fputs("log: key press simulation complete, caption visualization dispatched.\n", stderr)
115 | }
116 | 
117 | /// Simulates typing a string of text. Shows a caption of the text at screen center.
118 | /// - Parameters:
119 | ///   - text: The `String` to type.
120 | ///   - duration: How long the visual feedback should last (in seconds). Default is calculated or 1.0s min.
121 | /// - Throws: `MacosUseSDKError` if simulation fails.
122 | public func writeTextAndVisualize(_ text: String, duration: Double? = nil) throws {
123 |     // Define caption constants
124 |     let defaultDuration = 1.0 // Minimum duration
125 |     // Optional: Calculate duration based on text length, e.g., 0.5s + 0.05s per char
126 |     let calculatedDuration = max(defaultDuration, 0.5 + Double(text.count) * 0.05)
127 |     let finalDuration = duration ?? calculatedDuration
128 |     let captionSize = CGSize(width: 450, height: 100) // Adjust size as needed, maybe make dynamic later
129 | 
130 |     fputs("log: simulating text writing AND visualizing caption: \"\(text)\", duration: \(finalDuration)s\n", stderr)
131 |     // Call the original input function first
132 |     try writeText(text)
133 | 
134 |     // Always dispatch caption visualization to the main thread at screen center
135 |     DispatchQueue.main.async {
136 |         Task { @MainActor in
137 |             // Get screen center for caption placement
138 |             if let screenCenter = getMainScreenCenter() {
139 |                  fputs("log: [Main Thread] Displaying text writing caption at screen center: \(screenCenter).\n", stderr)
140 |                  // Show the caption feedback with the typed text
141 |                  showVisualFeedback(
142 |                      at: screenCenter,
143 |                      type: .caption(text: text), // Pass the actual text here
144 |                      size: captionSize,
145 |                      duration: finalDuration
146 |                  )
147 |             } else {
148 |                 fputs("warning: [Main Thread] could not get main screen center for text writing caption visualization.\n", stderr)
149 |             }
150 |         }
151 |     }
152 |      fputs("log: text writing simulation complete, caption visualization dispatched.\n", stderr)
153 | }
154 | 
155 | // --- Helper Function to Get Main Screen Center ---
156 | // REMOVED: Entire fileprivate getMainScreenCenter() function definition.
157 | // The internal version in DrawVisuals.swift will be used instead.
158 | 


--------------------------------------------------------------------------------
/Sources/MacosUseSDK/InputController.swift:
--------------------------------------------------------------------------------
  1 | // #!/usr/bin/swift - Don't need this when it's part of a library
  2 | 
  3 | import Foundation
  4 | import CoreGraphics
  5 | import AppKit // Needed for Process and potentially other things later
  6 | 
  7 | // --- Add new Error Cases for Input Control ---
  8 | public extension MacosUseSDKError {
  9 |     // Add specific error cases relevant to InputController
 10 |     static func inputInvalidArgument(_ message: String) -> MacosUseSDKError {
 11 |         .internalError("Input Argument Error: \(message)") // Reuse internalError or create specific types
 12 |     }
 13 |     static func inputSimulationFailed(_ message: String) -> MacosUseSDKError {
 14 |         .internalError("Input Simulation Failed: \(message)")
 15 |     }
 16 |      static func osascriptExecutionFailed(status: Int32, message: String = "") -> MacosUseSDKError {
 17 |         .internalError("osascript execution failed with status \(status). \(message)")
 18 |     }
 19 | }
 20 | 
 21 | 
 22 | // --- Constants for Key Codes ---
 23 | // These match the constants used in the Rust macos.rs code for consistency
 24 | public let KEY_RETURN: CGKeyCode = 36
 25 | public let KEY_TAB: CGKeyCode = 48
 26 | public let KEY_SPACE: CGKeyCode = 49
 27 | public let KEY_DELETE: CGKeyCode = 51 // Matches 'delete' (backspace on many keyboards)
 28 | public let KEY_ESCAPE: CGKeyCode = 53
 29 | public let KEY_ARROW_LEFT: CGKeyCode = 123
 30 | public let KEY_ARROW_RIGHT: CGKeyCode = 124
 31 | public let KEY_ARROW_DOWN: CGKeyCode = 125
 32 | public let KEY_ARROW_UP: CGKeyCode = 126
 33 | // Add other key codes as needed (consider making them public if the tool needs direct access)
 34 | 
 35 | // --- Helper Functions (Internal or Fileprivate) ---
 36 | 
 37 | // Logs messages to stderr for debugging/status - keep internal or remove if tool handles logging
 38 | // fileprivate func log(_ message: String) { // Make fileprivate or remove
 39 | //     fputs("log: \(message)\n", stderr)
 40 | // }
 41 | 
 42 | // Creates a CGEventSource or throws
 43 | fileprivate func createEventSource() throws -> CGEventSource {
 44 |     guard let source = CGEventSource(stateID: .hidSystemState) else {
 45 |         throw MacosUseSDKError.inputSimulationFailed("failed to create event source")
 46 |     }
 47 |     return source
 48 | }
 49 | 
 50 | // Posts a CGEvent or throws
 51 | fileprivate func postEvent(_ event: CGEvent?, actionDescription: String) throws {
 52 |     guard let event = event else {
 53 |         throw MacosUseSDKError.inputSimulationFailed("failed to create \(actionDescription) event")
 54 |     }
 55 |     event.post(tap: .cghidEventTap)
 56 |     // Add a small delay after posting, crucial for some applications
 57 |     usleep(15_000) // 15 milliseconds, slightly increased from 10ms
 58 | }
 59 | 
 60 | // --- Public Input Simulation Functions ---
 61 | 
 62 | /// Simulates pressing and releasing a key with optional modifier flags.
 63 | /// - Parameters:
 64 | ///   - keyCode: The `CGKeyCode` of the key to press.
 65 | ///   - flags: The modifier flags (`CGEventFlags`) to apply (e.g., `.maskCommand`, `.maskShift`).
 66 | /// - Throws: `MacosUseSDKError` if the event source cannot be created or the event cannot be posted.
 67 | public func pressKey(keyCode: CGKeyCode, flags: CGEventFlags = []) throws {
 68 |     fputs("log: simulating key press: (code: \(keyCode), flags: \(flags.rawValue))\n", stderr) // Log action
 69 |     let source = try createEventSource()
 70 | 
 71 |     let keyDown = CGEvent(keyboardEventSource: source, virtualKey: keyCode, keyDown: true)
 72 |     keyDown?.flags = flags // Apply modifier flags
 73 |     try postEvent(keyDown, actionDescription: "key down (code: \(keyCode), flags: \(flags.rawValue))")
 74 | 
 75 |     // Short delay between key down and key up is often necessary
 76 |     // usleep(10_000) // Delay moved into postEvent
 77 | 
 78 |     let keyUp = CGEvent(keyboardEventSource: source, virtualKey: keyCode, keyDown: false)
 79 |     keyUp?.flags = flags // Apply modifier flags for key up as well
 80 |     try postEvent(keyUp, actionDescription: "key up (code: \(keyCode), flags: \(flags.rawValue))")
 81 |     fputs("log: key press simulation complete.\n", stderr)
 82 | }
 83 | 
 84 | /// Simulates a left mouse click at the specified screen coordinates.
 85 | /// Does not move the cursor first. Call `moveMouse` beforehand if needed.
 86 | /// - Parameter point: The `CGPoint` where the click should occur.
 87 | /// - Throws: `MacosUseSDKError` if the event source cannot be created or the event cannot be posted.
 88 | public func clickMouse(at point: CGPoint) throws {
 89 |     fputs("log: simulating left click at: (\(point.x), \(point.y))\n", stderr) // Log action
 90 |     let source = try createEventSource()
 91 | 
 92 |     // Create and post mouse down event
 93 |     let mouseDown = CGEvent(mouseEventSource: source, mouseType: .leftMouseDown, mouseCursorPosition: point, mouseButton: .left)
 94 |     try postEvent(mouseDown, actionDescription: "mouse down at (\(point.x), \(point.y))")
 95 | 
 96 |     // Short delay - moved into postEvent
 97 |     // usleep(10_000)
 98 | 
 99 |     // Create and post mouse up event
100 |     let mouseUp = CGEvent(mouseEventSource: source, mouseType: .leftMouseUp, mouseCursorPosition: point, mouseButton: .left)
101 |     try postEvent(mouseUp, actionDescription: "mouse up at (\(point.x), \(point.y))")
102 |     fputs("log: left click simulation complete.\n", stderr)
103 | }
104 | 
105 | /// Simulates a left mouse double click at the specified screen coordinates.
106 | /// Does not move the cursor first. Call `moveMouse` beforehand if needed.
107 | /// - Parameter point: The `CGPoint` where the double click should occur.
108 | /// - Throws: `MacosUseSDKError` if the event source cannot be created or the event cannot be posted.
109 | public func doubleClickMouse(at point: CGPoint) throws {
110 |      fputs("log: simulating double-click at: (\(point.x), \(point.y))\n", stderr) // Log action
111 |     let source = try createEventSource()
112 | 
113 |     // Use the specific double-click event type directly
114 |     let doubleClickEvent = CGEvent(mouseEventSource: source, mouseType: .leftMouseDown, mouseCursorPosition: point, mouseButton: .left)
115 |     doubleClickEvent?.setIntegerValueField(.mouseEventClickState, value: 2) // Set click count
116 |     try postEvent(doubleClickEvent, actionDescription: "double click down at (\(point.x), \(point.y))")
117 | 
118 |     // usleep(10_000) // Delay moved into postEvent
119 | 
120 |     let mouseUpEvent = CGEvent(mouseEventSource: source, mouseType: .leftMouseUp, mouseCursorPosition: point, mouseButton: .left)
121 |     mouseUpEvent?.setIntegerValueField(.mouseEventClickState, value: 2) // Set click count
122 |     try postEvent(mouseUpEvent, actionDescription: "double click up at (\(point.x), \(point.y))")
123 |      fputs("log: double-click simulation complete.\n", stderr)
124 | }
125 | 
126 | // Simulates a right mouse click at the specified coordinates
127 | /// Simulates a right mouse click at the specified screen coordinates.
128 | /// Does not move the cursor first. Call `moveMouse` beforehand if needed.
129 | /// - Parameter point: The `CGPoint` where the right click should occur.
130 | /// - Throws: `MacosUseSDKError` if the event source cannot be created or the event cannot be posted.
131 | public func rightClickMouse(at point: CGPoint) throws {
132 |      fputs("log: simulating right-click at: (\(point.x), \(point.y))\n", stderr) // Log action
133 |     let source = try createEventSource()
134 | 
135 |     // Create and post mouse down event (RIGHT button)
136 |     let mouseDown = CGEvent(mouseEventSource: source, mouseType: .rightMouseDown, mouseCursorPosition: point, mouseButton: .right)
137 |     try postEvent(mouseDown, actionDescription: "right mouse down at (\(point.x), \(point.y))")
138 | 
139 |     // Short delay - moved into postEvent
140 |     // usleep(10_000)
141 | 
142 |     // Create and post mouse up event (RIGHT button)
143 |     let mouseUp = CGEvent(mouseEventSource: source, mouseType: .rightMouseUp, mouseCursorPosition: point, mouseButton: .right)
144 |     try postEvent(mouseUp, actionDescription: "right mouse up at (\(point.x), \(point.y))")
145 |      fputs("log: right-click simulation complete.\n", stderr)
146 | }
147 | 
148 | /// Moves the mouse cursor to the specified screen coordinates.
149 | /// - Parameter point: The `CGPoint` to move the cursor to.
150 | /// - Throws: `MacosUseSDKError` if the event source cannot be created or the event cannot be posted.
151 | public func moveMouse(to point: CGPoint) throws {
152 |      fputs("log: moving mouse to: (\(point.x), \(point.y))\n", stderr) // Log action
153 |     let source = try createEventSource()
154 | 
155 |     // .mouseMoved type doesn't require a button state
156 |     let mouseMove = CGEvent(mouseEventSource: source, mouseType: .mouseMoved, mouseCursorPosition: point, mouseButton: .left) // Button doesn't matter for move
157 |     try postEvent(mouseMove, actionDescription: "mouse move to (\(point.x), \(point.y))")
158 |     fputs("log: mouse move simulation complete.\n", stderr)
159 | }
160 | 
161 | /// Simulates typing a string of text using AppleScript `keystroke`.
162 | /// This is generally more reliable for arbitrary text than simulating individual key presses.
163 | /// - Parameter text: The `String` to type.
164 | /// - Throws: `MacosUseSDKError` if the osascript command fails to execute or returns an error.
165 | public func writeText(_ text: String) throws {
166 |     // Using AppleScript's 'keystroke' is simplest for arbitrary text,
167 |     // as it handles character mapping, keyboard layouts, etc.
168 |     // A pure CGEvent approach would require complex character-to-keycode+flags mapping.
169 |     fputs("log: simulating text writing: \"\(text)\" (using AppleScript)\n", stderr) // Log action
170 | 
171 |     // Escape double quotes and backslashes within the text for AppleScript string
172 |     let escapedText = text.replacingOccurrences(of: "\\", with: "\\\\").replacingOccurrences(of: "\"", with: "\\\"")
173 |     let script = "tell application \"System Events\" to keystroke \"\(escapedText)\""
174 | 
175 |     let process = Process()
176 |     process.executableURL = URL(fileURLWithPath: "/usr/bin/osascript")
177 |     process.arguments = ["-e", script]
178 | 
179 |     // Capture potential errors from osascript
180 |     let errorPipe = Pipe()
181 |     process.standardError = errorPipe
182 | 
183 |     do {
184 |         try process.run()
185 |         process.waitUntilExit()
186 | 
187 |         // Read error output
188 |         let errorData = errorPipe.fileHandleForReading.readDataToEndOfFile()
189 |         let errorString = String(data: errorData, encoding: .utf8)?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
190 | 
191 | 
192 |         if process.terminationStatus == 0 {
193 |             fputs("log: text writing simulation complete.\n", stderr)
194 |         } else {
195 |              fputs("error: osascript command failed with status \(process.terminationStatus)\n", stderr)
196 |              if !errorString.isEmpty {
197 |                  fputs("error details (osascript): \(errorString)\n", stderr)
198 |              }
199 |             throw MacosUseSDKError.osascriptExecutionFailed(status: process.terminationStatus, message: errorString)
200 |         }
201 |     } catch {
202 |         // Catch errors from process.run() itself
203 |         throw MacosUseSDKError.inputSimulationFailed("failed to execute osascript for writetext: \(error.localizedDescription)")
204 |     }
205 | }
206 | 
207 | 
208 | // Maps common key names (case-insensitive) to their CGKeyCode. Public for potential use by the tool.
209 | /// Maps common key names (case-insensitive) or a numeric string to their `CGKeyCode`.
210 | /// - Parameter keyName: The name of the key (e.g., "return", "a", "esc") or a string representation of the key code number.
211 | /// - Returns: The corresponding `CGKeyCode` or `nil` if the name is not recognized and cannot be parsed as a number.
212 | public func mapKeyNameToKeyCode(_ keyName: String) -> CGKeyCode? {
213 |     switch keyName.lowercased() {
214 |         // Special Keys
215 |         case "return", "enter": return KEY_RETURN
216 |         case "tab": return KEY_TAB
217 |         case "space": return KEY_SPACE
218 |         case "delete", "backspace": return KEY_DELETE
219 |         case "escape", "esc": return KEY_ESCAPE
220 |         case "left": return KEY_ARROW_LEFT
221 |         case "right": return KEY_ARROW_RIGHT
222 |         case "down": return KEY_ARROW_DOWN
223 |         case "up": return KEY_ARROW_UP
224 | 
225 |         // Letters (Standard US QWERTY Layout Key Codes) - Assuming US QWERTY. Might need adjustments for others.
226 |         case "a": return 0
227 |         case "b": return 11
228 |         case "c": return 8
229 |         case "d": return 2
230 |         case "e": return 14
231 |         case "f": return 3
232 |         case "g": return 5
233 |         case "h": return 4
234 |         case "i": return 34
235 |         case "j": return 38
236 |         case "k": return 40
237 |         case "l": return 37
238 |         case "m": return 46
239 |         case "n": return 45
240 |         case "o": return 31
241 |         case "p": return 35
242 |         case "q": return 12
243 |         case "r": return 15
244 |         case "s": return 1
245 |         case "t": return 17
246 |         case "u": return 32
247 |         case "v": return 9
248 |         case "w": return 13
249 |         case "x": return 7
250 |         case "y": return 16
251 |         case "z": return 6
252 | 
253 |         // Numbers (Main Keyboard Row)
254 |         case "1": return 18
255 |         case "2": return 19
256 |         case "3": return 20
257 |         case "4": return 21
258 |         case "5": return 23
259 |         case "6": return 22
260 |         case "7": return 26
261 |         case "8": return 28
262 |         case "9": return 25
263 |         case "0": return 29
264 | 
265 |         // Symbols (Common - May vary significantly by layout)
266 |         case "-": return 27
267 |         case "=": return 24
268 |         case "[": return 33
269 |         case "]": return 30
270 |         case "\\": return 42 // Backslash
271 |         case ";": return 41
272 |         case "'": return 39 // Quote
273 |         case ",": return 43
274 |         case ".": return 47
275 |         case "/": return 44
276 |         case "`": return 50 // Grave accent / Tilde
277 | 
278 |         // Function Keys
279 |         case "f1": return 122
280 |         case "f2": return 120
281 |         case "f3": return 99
282 |         case "f4": return 118
283 |         case "f5": return 96
284 |         case "f6": return 97
285 |         case "f7": return 98
286 |         case "f8": return 100
287 |         case "f9": return 101
288 |         case "f10": return 109
289 |         case "f11": return 103
290 |         case "f12": return 111
291 |         // Add F13-F20 if needed
292 | 
293 |         default:
294 |             // If not a known name, attempt to interpret it as a raw key code number
295 |             fputs("log: key '\(keyName)' not explicitly mapped, attempting conversion to CGKeyCode number.\n", stderr)
296 |             return CGKeyCode(keyName) // Returns nil if conversion fails
297 |     }
298 | }
299 | 
300 | // --- Removed Main Script Logic ---
301 | // The argument parsing, switch statement, fail(), completeSuccessfully(), startTime
302 | // and related logic have been removed from this file. They will be handled by the
303 | // InputControllerTool executable's main.swift.
304 | 
305 | // --- Retained Helper Structures/Functions if needed by public API ---
306 | // (e.g., mapKeyNameToKeyCode is now public)


--------------------------------------------------------------------------------
/Sources/TraversalTool/main.swift:
--------------------------------------------------------------------------------
 1 | import Foundation
 2 | import MacosUseSDK // Import your library
 3 | 
 4 | // --- Main Execution Logic ---
 5 | 
 6 | // 1. Argument Parsing
 7 | var arguments = CommandLine.arguments
 8 | var onlyVisible = false
 9 | var pidString: String? = nil
10 | 
11 | // Remove the executable name
12 | arguments.removeFirst()
13 | 
14 | // Check for the flag and remove it if found
15 | if let flagIndex = arguments.firstIndex(of: "--visible-only") {
16 |     onlyVisible = true
17 |     arguments.remove(at: flagIndex)
18 |     fputs("info: '--visible-only' flag detected.\n", stderr)
19 | }
20 | 
21 | // The remaining argument should be the PID
22 | if arguments.count == 1 {
23 |     pidString = arguments[0]
24 | }
25 | 
26 | guard let pidStr = pidString, let appPID = Int32(pidStr) else {
27 |     fputs("usage: TraversalTool [--visible-only] <PID>\n", stderr)
28 |     fputs("error: expected a valid process id (pid) as the argument.\n", stderr)
29 |     fputs("example (all elements): TraversalTool 14154\n", stderr)
30 |     fputs("example (visible only): TraversalTool --visible-only 14154\n", stderr)
31 |     exit(1)
32 | }
33 | 
34 | // 2. Call the Library Function
35 | do {
36 |     fputs("info: calling traverseAccessibilityTree for pid \(appPID) (Visible Only: \(onlyVisible))...\n", stderr)
37 |     // MODIFIED: Pass the parsed 'onlyVisible' flag to the library function
38 |     let responseData = try MacosUseSDK.traverseAccessibilityTree(pid: appPID, onlyVisibleElements: onlyVisible)
39 |     fputs("info: successfully received response from traverseAccessibilityTree.\n", stderr)
40 | 
41 |     // 3. Encode the result to JSON
42 |     let encoder = JSONEncoder()
43 |     encoder.outputFormatting = [.prettyPrinted, .sortedKeys]
44 | 
45 |     let jsonData = try encoder.encode(responseData)
46 | 
47 |     // 4. Print JSON to standard output
48 |     if let jsonString = String(data: jsonData, encoding: .utf8) {
49 |         print(jsonString)
50 |         exit(0) // Success
51 |     } else {
52 |         fputs("error: failed to convert response data to json string.\n", stderr)
53 |         exit(1)
54 |     }
55 | 
56 | } catch let error as MacosUseSDKError {
57 |     fputs("❌ Error from MacosUseSDK: \(error.localizedDescription)\n", stderr)
58 |     exit(1)
59 | } catch {
60 |     fputs("❌ An unexpected error occurred: \(error.localizedDescription)\n", stderr)
61 |     exit(1)
62 | }
63 | 
64 | /*
65 | # Example: Get visible elements from Messages app
66 | swift run TraversalTool --visible-only $(swift run AppOpenerTool Messages)
67 | */ 


--------------------------------------------------------------------------------
/Sources/VisualInputTool/main.swift:
--------------------------------------------------------------------------------
  1 | import Foundation
  2 | import CoreGraphics // For CGPoint, CGEventFlags
  3 | import MacosUseSDK // Import the library
  4 | import AppKit // Required for RunLoop, NSScreen
  5 | 
  6 | // --- Start Time ---
  7 | let startTime = Date() // Record start time for the tool's execution
  8 | 
  9 | // --- Tool-specific Logging ---
 10 | func log(_ message: String) {
 11 |     fputs("VisualInputTool: \(message)\n", stderr)
 12 | }
 13 | 
 14 | // --- Tool-specific Exiting ---
 15 | func finish(success: Bool, message: String? = nil) {
 16 |     if let msg = message {
 17 |         log(success ? "✅ Success: \(msg)" : "❌ Error: \(msg)")
 18 |     }
 19 |     let endTime = Date()
 20 |     let processingTime = endTime.timeIntervalSince(startTime)
 21 |     let formattedTime = String(format: "%.3f", processingTime)
 22 |     fputs("VisualInputTool: total execution time (before wait): \(formattedTime) seconds\n", stderr)
 23 |     // Don't exit immediately, let RunLoop finish
 24 | }
 25 | 
 26 | // --- Argument Parsing Helper ---
 27 | // Parses standard input actions AND an optional --duration flag
 28 | func parseArguments() -> (action: String?, args: [String], duration: Double) {
 29 |     var action: String? = nil
 30 |     var actionArgs: [String] = []
 31 |     var duration: Double = 0.5 // Default duration for visualization
 32 |     var waitingForDurationValue = false
 33 |     let allArgs = CommandLine.arguments.dropFirst() // Skip executable path
 34 | 
 35 |     for arg in allArgs {
 36 |         if waitingForDurationValue {
 37 |             if let durationValue = Double(arg), durationValue > 0 {
 38 |                 duration = durationValue
 39 |                 log("Parsed duration: \(duration) seconds")
 40 |             } else {
 41 |                 fputs("error: Invalid value provided after --duration.\n", stderr)
 42 |                 // Return error indication or default? Let's keep default and log error.
 43 |             }
 44 |             waitingForDurationValue = false
 45 |         } else if arg == "--duration" {
 46 |             waitingForDurationValue = true
 47 |         } else if action == nil {
 48 |             action = arg.lowercased()
 49 |             log("Parsed action: \(action!)")
 50 |         } else {
 51 |             actionArgs.append(arg)
 52 |         }
 53 |     }
 54 | 
 55 |     if waitingForDurationValue {
 56 |         fputs("error: Missing value after --duration flag. Using default \(duration)s.\n", stderr)
 57 |     }
 58 |     if action == nil {
 59 |          fputs("error: No action specified.\n", stderr)
 60 |     }
 61 | 
 62 |     log("Parsed action arguments: \(actionArgs)")
 63 |     return (action, actionArgs, duration)
 64 | }
 65 | 
 66 | 
 67 | // --- Main Logic ---
 68 | let scriptName = CommandLine.arguments.first ?? "VisualInputTool"
 69 | let usage = """
 70 | usage: \(scriptName) <action> [options...] [--duration <seconds>]
 71 | 
 72 | actions:
 73 |   keypress <key_name_or_code>[+modifier...]   Simulate key press AND show caption visualization.
 74 |   click <x> <y>                 Simulate left click AND show circle visualization.
 75 |   doubleclick <x> <y>           Simulate double-click AND show circle visualization.
 76 |   rightclick <x> <y>            Simulate right click AND show circle visualization.
 77 |   mousemove <x> <y>             Move mouse AND show circle visualization at destination.
 78 |   writetext <text_to_type>      Simulate typing text AND show caption visualization.
 79 | 
 80 | options:
 81 |   --duration <seconds>          How long the visual effect should last (default: 0.5s for mouse, 0.8s for keypress, calculated for writetext).
 82 | 
 83 | Examples:
 84 |   \(scriptName) click 100 250
 85 |   \(scriptName) click 500 500 --duration 1.5
 86 |   \(scriptName) keypress cmd+shift+4 --duration 1.0
 87 |   \(scriptName) writetext "Hello There"
 88 | """
 89 | 
 90 | let (action, actionArgs, parsedDuration) = parseArguments()
 91 | 
 92 | guard let action = action else {
 93 |     fputs(usage, stderr)
 94 |     exit(1)
 95 | }
 96 | 
 97 | // --- Action Handling ---
 98 | var success = false
 99 | var message: String? = nil
100 | var requiresRunLoopWait = true // Default to true, as all actions now have visualization
101 | 
102 | // Variable to hold the actual duration used for visualization
103 | var visualizationDuration: Double = 0.5 // Default fallback
104 | 
105 | // Use a Task for the main logic to easily call async/await and @MainActor functions
106 | Task {
107 |     do {
108 |         switch action {
109 |         case "keypress":
110 |             guard actionArgs.count == 1 else {
111 |                 throw MacosUseSDKError.inputInvalidArgument("'keypress' requires exactly one argument: <key_name_or_code_with_modifiers>\n\(usage)")
112 |             }
113 |             let keyCombo = actionArgs[0]
114 |             log("Processing key combo: '\(keyCombo)'")
115 |             // (Parsing logic copied from InputControllerTool)
116 |             var keyCode: CGKeyCode?
117 |             var flags: CGEventFlags = []
118 |             let parts = keyCombo.split(separator: "+").map { String($0).trimmingCharacters(in: .whitespacesAndNewlines).lowercased() }
119 |             guard let keyPart = parts.last else {
120 |                 throw MacosUseSDKError.inputInvalidArgument("Invalid key combination format: '\(keyCombo)'")
121 |             }
122 |             keyCode = MacosUseSDK.mapKeyNameToKeyCode(keyPart)
123 |             if parts.count > 1 {
124 |                 log("Parsing modifiers: \(parts.dropLast().joined(separator: ", "))")
125 |                 for i in 0..<(parts.count - 1) {
126 |                     switch parts[i] {
127 |                         case "cmd", "command": flags.insert(.maskCommand)
128 |                         case "shift": flags.insert(.maskShift)
129 |                         case "opt", "option", "alt": flags.insert(.maskAlternate)
130 |                         case "ctrl", "control": flags.insert(.maskControl)
131 |                         case "fn", "function": flags.insert(.maskSecondaryFn)
132 |                         default: throw MacosUseSDKError.inputInvalidArgument("Unknown modifier: '\(parts[i])' in '\(keyCombo)'")
133 |                     }
134 |                 }
135 |             }
136 |             guard let finalKeyCode = keyCode else {
137 |                 throw MacosUseSDKError.inputInvalidArgument("Unknown key name or invalid key code: '\(keyPart)' in '\(keyCombo)'")
138 |             }
139 | 
140 |             visualizationDuration = parsedDuration > 0 ? parsedDuration : 0.8 // Use parsed or default 0.8s
141 | 
142 |             log("Calling pressKey library function...")
143 |             try MacosUseSDK.pressKey(keyCode: finalKeyCode, flags: flags) // Input simulation
144 | 
145 |             log("Dispatching showVisualFeedback for keypress...")
146 |             // Dispatch visualization separately (@MainActor is handled by showVisualFeedback)
147 |             let captionText = "[KEY PRESS]"
148 |             let captionSize = CGSize(width: 250, height: 80)
149 |             if let screenCenter = MacosUseSDK.getMainScreenCenter() {
150 |                 MacosUseSDK.showVisualFeedback(
151 |                     at: screenCenter,
152 |                     type: .caption(text: captionText),
153 |                     size: captionSize,
154 |                     duration: visualizationDuration
155 |                 )
156 |             } else {
157 |                 fputs("warning: could not get screen center for key press caption.\n", stderr)
158 |                 requiresRunLoopWait = false // Don't wait if viz failed
159 |             }
160 | 
161 |             success = true
162 |             message = "Key press '\(keyCombo)' simulated with visualization."
163 | 
164 |         case "click", "doubleclick", "rightclick", "mousemove":
165 |             guard actionArgs.count == 2 else {
166 |                 throw MacosUseSDKError.inputInvalidArgument("'\(action)' requires exactly two arguments: <x> <y>\n\(usage)")
167 |             }
168 |             guard let x = Double(actionArgs[0]), let y = Double(actionArgs[1]) else {
169 |                 throw MacosUseSDKError.inputInvalidArgument("Invalid coordinates for '\(action)'. x and y must be numbers.")
170 |             }
171 |             let point = CGPoint(x: x, y: y)
172 |             log("Coordinates: (\(x), \(y))")
173 | 
174 |             visualizationDuration = parsedDuration > 0 ? parsedDuration : 0.5 // Use parsed or default 0.5s
175 | 
176 |             log("Calling \(action) library function...") // Now refers to the input-only function
177 |             switch action {
178 |                 case "click":       try MacosUseSDK.clickMouse(at: point)
179 |                 case "doubleclick": try MacosUseSDK.doubleClickMouse(at: point)
180 |                 case "rightclick":  try MacosUseSDK.rightClickMouse(at: point)
181 |                 case "mousemove":   try MacosUseSDK.moveMouse(to: point)
182 |                 default: break // Should not happen
183 |             }
184 | 
185 |             log("Dispatching showVisualFeedback for \(action)...")
186 |             // Dispatch visualization separately
187 |             MacosUseSDK.showVisualFeedback(at: point, type: .circle, duration: visualizationDuration)
188 | 
189 |             success = true
190 |             message = "\(action) simulated at (\(x), \(y)) with visualization."
191 | 
192 | 
193 |         case "writetext":
194 |             guard actionArgs.count == 1 else {
195 |                 throw MacosUseSDKError.inputInvalidArgument("'writetext' requires exactly one argument: <text_to_type>\n\(usage)")
196 |             }
197 |             let text = actionArgs[0]
198 |             log("Text Argument: \"\(text)\"")
199 | 
200 |             // Calculate duration if not specified
201 |             let defaultDuration = 1.0
202 |             let calculatedDuration = max(defaultDuration, 0.5 + Double(text.count) * 0.05)
203 |             visualizationDuration = parsedDuration > 0 ? parsedDuration : calculatedDuration // Use parsed or calculated
204 | 
205 |             log("Calling writeText library function...")
206 |             try MacosUseSDK.writeText(text) // Input simulation
207 | 
208 |             log("Dispatching showVisualFeedback for writetext...")
209 |             // Dispatch visualization separately
210 |             let captionSize = CGSize(width: 450, height: 100)
211 |             if let screenCenter = MacosUseSDK.getMainScreenCenter() {
212 |                 MacosUseSDK.showVisualFeedback(
213 |                     at: screenCenter,
214 |                     type: .caption(text: text), // Show actual text
215 |                     size: captionSize,
216 |                     duration: visualizationDuration
217 |                 )
218 |             } else {
219 |                 fputs("warning: could not get screen center for write text caption.\n", stderr)
220 |                 requiresRunLoopWait = false // Don't wait if viz failed
221 |             }
222 | 
223 |             success = true
224 |             message = "Text writing simulated with visualization."
225 | 
226 |         default:
227 |             fputs(usage, stderr)
228 |             throw MacosUseSDKError.inputInvalidArgument("Unknown action '\(action)'")
229 |         }
230 | 
231 |         // --- Log final status before potentially waiting ---
232 |         finish(success: success, message: message)
233 | 
234 |         // --- Keep Main Thread Alive for Visualization (if needed) ---
235 |         if requiresRunLoopWait {
236 |             let waitTime = visualizationDuration + 0.5 // Wait slightly longer
237 |             log("Waiting for \(waitTime) seconds for visualization to complete...")
238 |             // Use RunLoop directly since we are in a Task that might not be on the main thread initially
239 |             DispatchQueue.main.async {
240 |                 RunLoop.main.run(until: Date(timeIntervalSinceNow: waitTime))
241 |                 log("Run loop finished. Exiting.")
242 |                 exit(0) // Exit normally after waiting
243 |             }
244 |             // Keep the task alive until the run loop finishes
245 |             try await Task.sleep(nanoseconds: UInt64((waitTime + 0.1) * 1_000_000_000))
246 |             // Fallback exit if the run loop mechanism doesn't exit
247 |             exit(0)
248 | 
249 |         } else {
250 |             log("No visualization triggered or viz failed, exiting immediately.")
251 |             exit(0) // Exit normally without waiting
252 |         }
253 | 
254 |     } catch let error as MacosUseSDKError {
255 |         finish(success: false, message: "MacosUseSDK Error: \(error.localizedDescription)")
256 |         exit(1) // Exit with error
257 |     } catch {
258 |         finish(success: false, message: "An unexpected error occurred: \(error.localizedDescription)")
259 |         exit(1) // Exit with error
260 |     }
261 | }
262 | 
263 | // Keep the main thread running to allow the Task to execute
264 | RunLoop.main.run()
265 | 


--------------------------------------------------------------------------------
/Tests/MacosUseSDKTests/CombinedActionsDiffTests.swift:
--------------------------------------------------------------------------------
  1 | import XCTest
  2 | @testable import MacosUseSDK // Use @testable to access internal stuff if needed, otherwise just import
  3 | import AppKit // For NSWorkspace, NSRunningApplication
  4 | 
  5 | final class CombinedActionsDiffTests: XCTestCase {
  6 | 
  7 |     var calculatorPID: pid_t?
  8 |     var calculatorApp: NSRunningApplication?
  9 | 
 10 |     // Launch Calculator before each test
 11 |     override func setUp() async throws {
 12 |         // Ensure accessibility is granted (cannot check programmatically easily, user must pre-authorize)
 13 |         fputs("info: Test setup - Launching Calculator...\n", stderr)
 14 |         // Note: Using NSWorkspace directly here to avoid SDK dependency loop if openApplication fails
 15 |         let calcURL = URL(fileURLWithPath: "/System/Applications/Calculator.app")
 16 |         // Configuration to activate it
 17 |         let config = NSWorkspace.OpenConfiguration()
 18 |         config.activates = true
 19 |         calculatorApp = try await NSWorkspace.shared.openApplication(at: calcURL, configuration: config)
 20 |         calculatorPID = calculatorApp?.processIdentifier
 21 |         XCTAssertNotNil(calculatorPID, "Failed to get Calculator PID")
 22 |         fputs("info: Test setup - Calculator launched with PID \(calculatorPID!)\n", stderr)
 23 |         // Give it a moment to fully launch and settle
 24 |         try await Task.sleep(nanoseconds: 1_000_000_000) // 1 second
 25 |     }
 26 | 
 27 |     // Quit Calculator after each test
 28 |     override func tearDown() async throws {
 29 |         fputs("info: Test teardown - Terminating Calculator (PID: \(calculatorPID ?? -1))...\n", stderr)
 30 |         calculatorApp?.terminate()
 31 |         // Give it more time to terminate AND allow any remaining async SDK tasks (like animations) to naturally cease.
 32 |         fputs("info: Test teardown - Waiting 1.5 seconds for app termination and UI settling...\n", stderr)
 33 |         try await Task.sleep(nanoseconds: 1_500_000_000) // 1.5 seconds
 34 |         calculatorApp = nil
 35 |         calculatorPID = nil
 36 |         fputs("info: Test teardown - Finished.\n", stderr)
 37 |     }
 38 | 
 39 |     // Test: Type '2*3=' with action viz + traversal highlight and print the diff
 40 |     @MainActor
 41 |     func testCalculatorMultiplyWithActionAndTraversalHighlight() async throws {
 42 |         guard let pid = calculatorPID else {
 43 |             XCTFail("Calculator PID not available")
 44 |             return
 45 |         }
 46 | 
 47 |         fputs("\ninfo: === Starting testCalculatorMultiplyWithActionAndTraversalHighlight ===\n", stderr)
 48 | 
 49 |         // --- Define durations for test ---
 50 |         let testActionHighlightDuration: Double = 0.4
 51 |         let testTraversalHighlightDuration: Double = 2.0 // Duration passed to SDK function
 52 |         let testDelayNano: UInt64 = 150_000_000
 53 | 
 54 |         // --- Action Sequence with Highlighting ---
 55 |         fputs("info: Test run - Calling writeTextWithActionAndTraversalHighlight for '2*3='...\n", stderr)
 56 |         let result = try await CombinedActions.writeTextWithActionAndTraversalHighlight(
 57 |             text: "2*3=",
 58 |             pid: pid,
 59 |             onlyVisibleElements: true,
 60 |             actionHighlightDuration: testActionHighlightDuration,
 61 |             traversalHighlightDuration: testTraversalHighlightDuration, // Pass 2.0s duration
 62 |             delayAfterActionNano: testDelayNano
 63 |         )
 64 |         fputs("info: Test run - writeTextWithActionAndTraversalHighlight returned (highlighting may start appearing).\n", stderr)
 65 | 
 66 |         // --- Print Diff ---
 67 |         fputs("info: --- Traversal Diff Results (Highlighted) ---\n", stderr)
 68 | 
 69 |         fputs("info: Added Elements (\(result.diff.added.count)):\n", stderr)
 70 |         if result.diff.added.isEmpty {
 71 |             fputs("info:   (None)\n", stderr)
 72 |         } else {
 73 |             for element in result.diff.added {
 74 |                 fputs("info:   + Role: \(element.role), Text: \(element.text ?? "nil"), Pos: (\(element.x ?? -1), \(element.y ?? -1)), Size: (\(element.width ?? -1) x \(element.height ?? -1))\n", stderr)
 75 |             }
 76 |         }
 77 | 
 78 |         fputs("info: Removed Elements (\(result.diff.removed.count)):\n", stderr)
 79 |         if result.diff.removed.isEmpty {
 80 |              fputs("info:   (None)\n", stderr)
 81 |         } else {
 82 |             for element in result.diff.removed {
 83 |                  fputs("info:   - Role: \(element.role), Text: \(element.text ?? "nil"), Pos: (\(element.x ?? -1), \(element.y ?? -1)), Size: (\(element.width ?? -1) x \(element.height ?? -1))\n", stderr)
 84 |             }
 85 |         }
 86 |         fputs("info: --- End Diff Results (Highlighted) ---\n", stderr)
 87 | 
 88 |         // --- Wait for Traversal Highlighting Animations BEFORE Test Ends ---
 89 |         // The SDK no longer explicitly closes highlight windows, relying on OS cleanup.
 90 |         // This wait ensures the highlight *animations* have sufficient time to visually
 91 |         // complete before tearDown terminates the Calculator app. It also provides
 92 |         // a buffer for general UI settling.
 93 |         let highlightCompletionWaitSeconds = testTraversalHighlightDuration + 0.2 // Wait slightly longer than animation
 94 |         fputs("info: Test run - Waiting \(highlightCompletionWaitSeconds) seconds for traversal highlighting animations to complete...\n", stderr)
 95 |         try await Task.sleep(nanoseconds: UInt64(highlightCompletionWaitSeconds * 1_000_000_000))
 96 |         fputs("info: Test run - Traversal highlight animation wait finished. Proceeding to finish test function.\n", stderr)
 97 |         // --- END WAIT ---
 98 | 
 99 |         fputs("info: === Finished testCalculatorMultiplyWithActionAndTraversalHighlight ===\n", stderr)
100 |     }
101 |     // --- END TEST ---
102 | 
103 |     // Add more test methods for clickWithDiff, pressKeyWithDiff etc.
104 |     // You can add similar tests for clickWithActionAndTraversalHighlight and pressKeyWithActionAndTraversalHighlight
105 |     // For click tests, you might need to first traverse to find the coordinates of a button
106 |     // (e.g., the '5' button) and then pass those coordinates to the click function.
107 | }
108 | 


--------------------------------------------------------------------------------
/Tests/MacosUseSDKTests/CombinedActionsFocusVisualizationTests.swift:
--------------------------------------------------------------------------------
  1 | import XCTest
  2 | @testable import MacosUseSDK
  3 | import AppKit
  4 | 
  5 | final class CombinedActionsFocusVisualizationTests: XCTestCase {
  6 | 
  7 |     var textEditPID: pid_t?
  8 |     var textEditApp: NSRunningApplication?
  9 |     var temporaryFileURL: URL?
 10 | 
 11 |     // Launch TextEdit before each test, opening a temporary file
 12 |     override func setUp() async throws {
 13 |         // Create a temporary file URL
 14 |         temporaryFileURL = FileManager.default.temporaryDirectory
 15 |             .appendingPathComponent("testFocus_\(UUID().uuidString).txt") // Unique name
 16 | 
 17 |         guard let fileURL = temporaryFileURL else {
 18 |             XCTFail("Failed to create temporary file URL")
 19 |             return
 20 |         }
 21 | 
 22 |         // Create an empty file
 23 |         do {
 24 |             try "".write(to: fileURL, atomically: true, encoding: .utf8)
 25 |             fputs("info: Focus Test Setup - Created temporary file at: \(fileURL.path)\n", stderr)
 26 |         } catch {
 27 |             XCTFail("Failed to create temporary file: \(error)")
 28 |             return
 29 |         }
 30 | 
 31 |         // Ensure accessibility is granted (user must pre-authorize)
 32 |         fputs("info: Focus Test Setup - Launching TextEdit to open temporary file...\n", stderr)
 33 | 
 34 |         let textEditAppURL = URL(fileURLWithPath: "/System/Applications/TextEdit.app")
 35 |         let config = NSWorkspace.OpenConfiguration()
 36 |         config.activates = true // Ensure it comes to the front and likely grabs focus
 37 | 
 38 |         // Open the temporary file with TextEdit
 39 |         textEditApp = try await NSWorkspace.shared.open(
 40 |             [fileURL], // Pass the URL of the file to open in an array
 41 |             withApplicationAt: textEditAppURL,
 42 |             configuration: config
 43 |         )
 44 | 
 45 |         textEditPID = textEditApp?.processIdentifier
 46 |         XCTAssertNotNil(textEditPID, "Failed to get TextEdit PID")
 47 |         fputs("info: Focus Test Setup - TextEdit launched with PID \(textEditPID!) opening \(fileURL.lastPathComponent)\n", stderr)
 48 | 
 49 |         // Give it time to fully launch, open the file, and potentially set initial focus
 50 |         try await Task.sleep(nanoseconds: 1_500_000_000) // 1.5 seconds
 51 |     }
 52 | 
 53 |     // Quit TextEdit and delete the temporary file after each test
 54 |     override func tearDown() async throws {
 55 |         fputs("info: Focus Test Teardown - Terminating TextEdit (PID: \(textEditPID ?? -1)) and cleaning up file...\n", stderr)
 56 | 
 57 |         // --- Close TextEdit Document (AppleScript part remains the same) ---
 58 |         if let pid = textEditPID {
 59 |             let script = """
 60 |             tell application "System Events"
 61 |                 tell process id \(pid)
 62 |                     try
 63 |                         # Get the front window (document)
 64 |                         set frontWindow to first window
 65 | 
 66 |                         # Check if it's the document window we opened
 67 |                         # This might need adjustment based on exact window naming
 68 |                         if name of frontWindow contains "testFocus_" then
 69 |                            # Perform close action (Command-W)
 70 |                            keystroke "w" using {command down}
 71 |                            delay 0.2 # Small delay
 72 | 
 73 |                            # Check if a "Don't Save" sheet appeared (unlikely for empty/unchanged file)
 74 |                            if exists sheet 1 of frontWindow then
 75 |                                key code 36 # Return key code (usually selects default like "Don't Save")
 76 |                                delay 0.2
 77 |                            end if
 78 |                         end if
 79 |                     end try
 80 |                 end tell
 81 |             end tell
 82 |             tell application "TextEdit" to if it is running then quit saving no # Add 'saving no' for clarity
 83 |             """
 84 |             let process = Process()
 85 |             process.executableURL = URL(fileURLWithPath: "/usr/bin/osascript")
 86 |             process.arguments = ["-e", script]
 87 |             do {
 88 |                 try process.run()
 89 |                 process.waitUntilExit()
 90 |                  fputs("info: Focus Test Teardown - Attempted clean close via AppleScript (Status: \(process.terminationStatus))\n", stderr)
 91 |             } catch {
 92 |                  fputs("error: Focus Test Teardown - AppleScript execution failed: \(error)\n", stderr)
 93 |             }
 94 |         }
 95 | 
 96 |         // Fallback or alternative: Force terminate if still running
 97 |         if textEditApp?.isTerminated == false {
 98 |             fputs("info: Focus Test Teardown - Forcing termination...\n", stderr)
 99 |             textEditApp?.forceTerminate()
100 |             // Add a small delay after force termination
101 |              try await Task.sleep(nanoseconds: 200_000_000) // 0.2 seconds
102 |         }
103 | 
104 |         // --- Delete the temporary file ---
105 |         if let fileURL = temporaryFileURL {
106 |             do {
107 |                 try FileManager.default.removeItem(at: fileURL)
108 |                 fputs("info: Focus Test Teardown - Successfully deleted temporary file: \(fileURL.path)\n", stderr)
109 |             } catch {
110 |                 // Log error but don't fail the test teardown for this
111 |                 fputs("warning: Focus Test Teardown - Could not delete temporary file: \(error)\n", stderr)
112 |             }
113 |             temporaryFileURL = nil // Clear the reference
114 |         }
115 | 
116 |         // Allow time for termination and general settling before next test.
117 |         try await Task.sleep(nanoseconds: 300_000_000) // 0.3 seconds
118 |         textEditApp = nil
119 |         textEditPID = nil
120 |         fputs("info: Focus Test Teardown - Finished.\n", stderr)
121 |     }
122 | 
123 |     // Test: Write text to TextEdit, expecting focus to be on the text area
124 |     // Verify by checking logs for the "successfully found focused element center" message.
125 |     @MainActor
126 |     func testTextEditFocusAndWriteVisualization() async throws {
127 |         guard let pid = textEditPID else {
128 |             XCTFail("TextEdit PID not available")
129 |             return
130 |         }
131 | 
132 |         fputs("\ninfo: === Starting testTextEditFocusAndWriteVisualization ===\n", stderr)
133 | 
134 |         // --- Define durations ---
135 |         let testActionHighlightDuration: Double = 0.6
136 |         let testTraversalHighlightDuration: Double = 1.5 // Shorter for this test
137 |         let testDelayNano: UInt64 = 200_000_000 // 0.2s
138 |         let observationDelaySeconds: Double = 1.0 // Time to observe action visualization
139 | 
140 |         // --- Action Sequence ---
141 |         // We expect TextEdit's main text view to have focus after activation in setUp.
142 |         fputs("info: Test run - Calling writeTextWithActionAndTraversalHighlight for 'Hello TextEdit!'...\n", stderr)
143 |         let result = try await CombinedActions.writeTextWithActionAndTraversalHighlight(
144 |             text: "Hello TextEdit!",
145 |             pid: pid,
146 |             onlyVisibleElements: true, // Doesn't affect focus check, but standard for combined action
147 |             actionHighlightDuration: testActionHighlightDuration,
148 |             traversalHighlightDuration: testTraversalHighlightDuration,
149 |             delayAfterActionNano: testDelayNano
150 |         )
151 |         fputs("info: Test run - writeTextWithActionAndTraversalHighlight returned.\n", stderr)
152 |         fputs("info: Test run - Check logs above for 'successfully found focused element center' from writeTextAndVisualize.\n", stderr)
153 |         // You can examine result.diff if needed, but the focus is on the visualization attempt.
154 | 
155 |         // --- Short Wait for Visual Observation ---
156 |         // Allows time to visually observe the action/highlight animations before teardown.
157 |         fputs("info: Test run - Waiting \(observationDelaySeconds) seconds for visual observation...\n", stderr)
158 |         try await Task.sleep(nanoseconds: UInt64(observationDelaySeconds * 1_000_000_000))
159 |         fputs("info: Test run - Observation wait finished.\n", stderr)
160 | 
161 |         fputs("info: === Finished testTextEditFocusAndWriteVisualization ===\n", stderr)
162 |         // Teardown will handle closing TextEdit.
163 |     }
164 | }
165 | 


--------------------------------------------------------------------------------