├── .gitignore ├── LICENSE ├── README.md ├── WhisperKit-Sample.xcodeproj ├── project.pbxproj ├── project.xcworkspace │ ├── contents.xcworkspacedata │ └── xcshareddata │ │ └── swiftpm │ │ └── Package.resolved └── xcshareddata │ └── xcschemes │ └── WhisperKit-Sample.xcscheme └── WhisperKit-Sample ├── Assets.xcassets ├── AccentColor.colorset │ └── Contents.json ├── AppIcon.appiconset │ └── Contents.json └── Contents.json ├── ContentView.swift ├── Info.plist ├── Preview Content └── Preview Assets.xcassets │ └── Contents.json ├── TranscriptionService.swift ├── WhisperKit-Sample.entitlements ├── WhisperingApp.swift └── test.wav /.gitignore: ---------------------------------------------------------------------------------------------------- 1 | # Xcode 2 | # 3 | # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore 4 | 5 | ## User settings 6 | xcuserdata/ 7 | 8 | ## Obj-C/Swift specific 9 | *.hmap 10 | 11 | ## App packaging 12 | *.ipa 13 | *.dSYM.zip 14 | *.dSYM 15 | 16 | ## Playgrounds 17 | timeline.xctimeline 18 | playground.xcworkspace 19 | 20 | # Swift Package Manager 21 | # 22 | # Add this line if you want to avoid checking in source code from Swift Package Manager dependencies. 23 | # Packages/ 24 | # Package.pins 25 | # Package.resolved 26 | # *.xcodeproj 27 | # 28 | # Xcode automatically generates this directory with a .xcworkspacedata file and xcuserdata 29 | # hence it is not needed unless you have added a package configuration file to your project 30 | # .swiftpm 31 | 32 | .build/ 33 | 34 | # CocoaPods 35 | # 36 | # We recommend against adding the Pods directory to your .gitignore. However 37 | # you should judge for yourself, the pros and cons are mentioned at: 38 | # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control 39 | # 40 | # Pods/ 41 | # 42 | # Add this line if you want to avoid checking in source code from the Xcode workspace 43 | # *.xcworkspace 44 | 45 | # Carthage 46 | # 47 | # Add this line if you want to avoid checking in source code from Carthage dependencies. 48 | # Carthage/Checkouts 49 | 50 | Carthage/Build/ 51 | 52 | # fastlane 53 | # 54 | # It is recommended to not store the screenshots in the git repo. 55 | # Instead, use fastlane to re-generate the screenshots whenever they are needed. 56 | # For more information about the recommended setup visit: 57 | # https://docs.fastlane.tools/best-practices/source-control/#source-control 58 | 59 | fastlane/report.xml 60 | fastlane/Preview.html 61 | fastlane/screenshots/**/*.png 62 | fastlane/test_output 63 | ---------------------------------------------------------------------------------------------------- /LICENSE: ---------------------------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Rudrank Riyam 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | ---------------------------------------------------------------------------------------------------- /README.md: ---------------------------------------------------------------------------------------------------- 1 | # WhisperKit Sample 2 | 3 | This is a sample SwiftUI project showing how to use [**WhisperKit**](https://github.com/argmaxinc/WhisperKit) to transcribe audio across Apple platforms. The app has audio recording and transcription, using WhisperKit and AVFoundation. 4 | 5 | ## Features 6 | 7 | - Record audio and view transcription results almost instantly. 8 | - Simplest UI with controls for starting and stopping recordings. 9 | - Can be used across various Apple platforms with SwiftUI. 10 | 11 | ## Running the App 12 | 13 | ```bash 14 | git clone https://github.com/rudrankriyam/whispering.git 15 | cd whispering 16 | ``` 17 | 18 | Open the project in Xcode, build and run the app on a **real** device. 19 | 20 | ## Usage 21 | 22 | - Record Audio: Tap the microphone icon to start recording audio. It will first ask for permission to microphone and then download the model. The recording status will be indicated by a visual waveform and a “Recording…” label. 23 | - Transcribe: Once recording is stopped, the audio file will be transcribed, and results will appear in the text view. 24 | 25 | ## License 26 | 27 | This project is licensed under the MIT License - see the LICENSE file for details. 28 | 29 | --- 30 | 31 | Enjoy using WhisperKit-Sample! If you have questions or feedback, feel free to reach out to me on [X @rudrankriyam](https://x.com/rudrankriyam)! 32 | ---------------------------------------------------------------------------------------------------- /WhisperKit-Sample.xcodeproj/project.pbxproj: ---------------------------------------------------------------------------------------------------- 1 | // !$*UTF8*$! 2 | { 3 | archiveVersion = 1; 4 | classes = { 5 | }; 6 | objectVersion = 77; 7 | objects = { 8 | 9 | /* Begin PBXBuildFile section */ 10 | 0A125A692CE0C3EE0044215E /* WhisperKit in Frameworks */ = {isa = PBXBuildFile; productRef = 0A125A682CE0C3EE0044215E /* WhisperKit */; }; 11 | /* End PBXBuildFile section */ 12 | 13 | /* Begin PBXFileReference section */ 14 | 0ADDF2952CE0C3BF008793AC /* WhisperKit-Sample.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "WhisperKit-Sample.app"; sourceTree = BUILT_PRODUCTS_DIR; }; 15 | /* End PBXFileReference section */ 16 | 17 | /* Begin PBXFileSystemSynchronizedBuildFileExceptionSet section */ 18 | 0AA843562CE74DFB0043F1DA /* Exceptions for "WhisperKit-Sample" folder in "WhisperKit-Sample" target */ = { 19 | isa = PBXFileSystemSynchronizedBuildFileExceptionSet; 20 | membershipExceptions = ( 21 | Info.plist, 22 | ); 23 | target = 0ADDF2942CE0C3BF008793AC /* WhisperKit-Sample */; 24 | }; 25 | /* End PBXFileSystemSynchronizedBuildFileExceptionSet section */ 26 | 27 | /* Begin PBXFileSystemSynchronizedRootGroup section */ 28 | 0ADDF2972CE0C3BF008793AC /* WhisperKit-Sample */ = { 29 | isa = PBXFileSystemSynchronizedRootGroup; 30 | exceptions = ( 31 | 0AA843562CE74DFB0043F1DA /* Exceptions for "WhisperKit-Sample" folder in "WhisperKit-Sample" target */, 32 | ); 33 | path = "WhisperKit-Sample"; 34 | sourceTree = ""; 35 | }; 36 | /* End PBXFileSystemSynchronizedRootGroup section */ 37 | 38 | /* Begin PBXFrameworksBuildPhase section */ 39 | 0ADDF2922CE0C3BF008793AC /* Frameworks */ = { 40 | isa = PBXFrameworksBuildPhase; 41 | buildActionMask = 2147483647; 42 | files = ( 43 | 0A125A692CE0C3EE0044215E /* WhisperKit in Frameworks */, 44 | ); 45 | runOnlyForDeploymentPostprocessing = 0; 46 | }; 47 | /* End PBXFrameworksBuildPhase section */ 48 | 49 | /* Begin PBXGroup section */ 50 | 0ADDF28C2CE0C3BF008793AC = { 51 | isa = PBXGroup; 52 | children = ( 53 | 0ADDF2972CE0C3BF008793AC /* WhisperKit-Sample */, 54 | 0ADDF2962CE0C3BF008793AC /* Products */, 55 | ); 56 | sourceTree = ""; 57 | }; 58 | 0ADDF2962CE0C3BF008793AC /* Products */ = { 59 | isa = PBXGroup; 60 | children = ( 61 | 0ADDF2952CE0C3BF008793AC /* WhisperKit-Sample.app */, 62 | ); 63 | name = Products; 64 | sourceTree = ""; 65 | }; 66 | /* End PBXGroup section */ 67 | 68 | /* Begin PBXNativeTarget section */ 69 | 0ADDF2942CE0C3BF008793AC /* WhisperKit-Sample */ = { 70 | isa = PBXNativeTarget; 71 | buildConfigurationList = 0ADDF2A32CE0C3C0008793AC /* Build configuration list for PBXNativeTarget "WhisperKit-Sample" */; 72 | buildPhases = ( 73 | 0ADDF2912CE0C3BF008793AC /* Sources */, 74 | 0ADDF2922CE0C3BF008793AC /* Frameworks */, 75 | 0ADDF2932CE0C3BF008793AC /* Resources */, 76 | ); 77 | buildRules = ( 78 | ); 79 | dependencies = ( 80 | ); 81 | fileSystemSynchronizedGroups = ( 82 | 0ADDF2972CE0C3BF008793AC /* WhisperKit-Sample */, 83 | ); 84 | name = "WhisperKit-Sample"; 85 | packageProductDependencies = ( 86 | 0A125A682CE0C3EE0044215E /* WhisperKit */, 87 | ); 88 | productName = Whispering; 89 | productReference = 0ADDF2952CE0C3BF008793AC /* WhisperKit-Sample.app */; 90 | productType = "com.apple.product-type.application"; 91 | }; 92 | /* End PBXNativeTarget section */ 93 | 94 | /* Begin PBXProject section */ 95 | 0ADDF28D2CE0C3BF008793AC /* Project object */ = { 96 | isa = PBXProject; 97 | attributes = { 98 | BuildIndependentTargetsInParallel = 1; 99 | LastSwiftUpdateCheck = 1620; 100 | LastUpgradeCheck = 1620; 101 | TargetAttributes = { 102 | 0ADDF2942CE0C3BF008793AC = { 103 | CreatedOnToolsVersion = 16.2; 104 | }; 105 | }; 106 | }; 107 | buildConfigurationList = 0ADDF2902CE0C3BF008793AC /* Build configuration list for PBXProject "WhisperKit-Sample" */; 108 | developmentRegion = en; 109 | hasScannedForEncodings = 0; 110 | knownRegions = ( 111 | en, 112 | Base, 113 | ); 114 | mainGroup = 0ADDF28C2CE0C3BF008793AC; 115 | minimizedProjectReferenceProxies = 1; 116 | packageReferences = ( 117 | 0A125A672CE0C3EE0044215E /* XCRemoteSwiftPackageReference "WhisperKit" */, 118 | ); 119 | preferredProjectObjectVersion = 77; 120 | productRefGroup = 0ADDF2962CE0C3BF008793AC /* Products */; 121 | projectDirPath = ""; 122 | projectRoot = ""; 123 | targets = ( 124 | 0ADDF2942CE0C3BF008793AC /* WhisperKit-Sample */, 125 | ); 126 | }; 127 | /* End PBXProject section */ 128 | 129 | /* Begin PBXResourcesBuildPhase section */ 130 | 0ADDF2932CE0C3BF008793AC /* Resources */ = { 131 | isa = PBXResourcesBuildPhase; 132 | buildActionMask = 2147483647; 133 | files = ( 134 | ); 135 | runOnlyForDeploymentPostprocessing = 0; 136 | }; 137 | /* End PBXResourcesBuildPhase section */ 138 | 139 | /* Begin PBXSourcesBuildPhase section */ 140 | 0ADDF2912CE0C3BF008793AC /* Sources */ = { 141 | isa = PBXSourcesBuildPhase; 142 | buildActionMask = 2147483647; 143 | files = ( 144 | ); 145 | runOnlyForDeploymentPostprocessing = 0; 146 | }; 147 | /* End PBXSourcesBuildPhase section */ 148 | 149 | /* Begin XCBuildConfiguration section */ 150 | 0ADDF2A12CE0C3C0008793AC /* Debug */ = { 151 | isa = XCBuildConfiguration; 152 | buildSettings = { 153 | ALWAYS_SEARCH_USER_PATHS = NO; 154 | ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES; 155 | CLANG_ANALYZER_NONNULL = YES; 156 | CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; 157 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; 158 | CLANG_ENABLE_MODULES = YES; 159 | CLANG_ENABLE_OBJC_ARC = YES; 160 | CLANG_ENABLE_OBJC_WEAK = YES; 161 | CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; 162 | CLANG_WARN_BOOL_CONVERSION = YES; 163 | CLANG_WARN_COMMA = YES; 164 | CLANG_WARN_CONSTANT_CONVERSION = YES; 165 | CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; 166 | CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; 167 | CLANG_WARN_DOCUMENTATION_COMMENTS = YES; 168 | CLANG_WARN_EMPTY_BODY = YES; 169 | CLANG_WARN_ENUM_CONVERSION = YES; 170 | CLANG_WARN_INFINITE_RECURSION = YES; 171 | CLANG_WARN_INT_CONVERSION = YES; 172 | CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; 173 | CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; 174 | CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; 175 | CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; 176 | CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; 177 | CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; 178 | CLANG_WARN_STRICT_PROTOTYPES = YES; 179 | CLANG_WARN_SUSPICIOUS_MOVE = YES; 180 | CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; 181 | CLANG_WARN_UNREACHABLE_CODE = YES; 182 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 183 | COPY_PHASE_STRIP = NO; 184 | DEBUG_INFORMATION_FORMAT = dwarf; 185 | ENABLE_STRICT_OBJC_MSGSEND = YES; 186 | ENABLE_TESTABILITY = YES; 187 | ENABLE_USER_SCRIPT_SANDBOXING = YES; 188 | GCC_C_LANGUAGE_STANDARD = gnu17; 189 | GCC_DYNAMIC_NO_PIC = NO; 190 | GCC_NO_COMMON_BLOCKS = YES; 191 | GCC_OPTIMIZATION_LEVEL = 0; 192 | GCC_PREPROCESSOR_DEFINITIONS = ( 193 | "DEBUG=1", 194 | "$(inherited)", 195 | ); 196 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES; 197 | GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; 198 | GCC_WARN_UNDECLARED_SELECTOR = YES; 199 | GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; 200 | GCC_WARN_UNUSED_FUNCTION = YES; 201 | GCC_WARN_UNUSED_VARIABLE = YES; 202 | IPHONEOS_DEPLOYMENT_TARGET = 18.2; 203 | LOCALIZATION_PREFERS_STRING_CATALOGS = YES; 204 | MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; 205 | MTL_FAST_MATH = YES; 206 | ONLY_ACTIVE_ARCH = YES; 207 | SDKROOT = iphoneos; 208 | SWIFT_ACTIVE_COMPILATION_CONDITIONS = "DEBUG $(inherited)"; 209 | SWIFT_OPTIMIZATION_LEVEL = "-Onone"; 210 | }; 211 | name = Debug; 212 | }; 213 | 0ADDF2A22CE0C3C0008793AC /* Release */ = { 214 | isa = XCBuildConfiguration; 215 | buildSettings = { 216 | ALWAYS_SEARCH_USER_PATHS = NO; 217 | ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES; 218 | CLANG_ANALYZER_NONNULL = YES; 219 | CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; 220 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; 221 | CLANG_ENABLE_MODULES = YES; 222 | CLANG_ENABLE_OBJC_ARC = YES; 223 | CLANG_ENABLE_OBJC_WEAK = YES; 224 | CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; 225 | CLANG_WARN_BOOL_CONVERSION = YES; 226 | CLANG_WARN_COMMA = YES; 227 | CLANG_WARN_CONSTANT_CONVERSION = YES; 228 | CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; 229 | CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; 230 | CLANG_WARN_DOCUMENTATION_COMMENTS = YES; 231 | CLANG_WARN_EMPTY_BODY = YES; 232 | CLANG_WARN_ENUM_CONVERSION = YES; 233 | CLANG_WARN_INFINITE_RECURSION = YES; 234 | CLANG_WARN_INT_CONVERSION = YES; 235 | CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; 236 | CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; 237 | CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; 238 | CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; 239 | CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; 240 | CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; 241 | CLANG_WARN_STRICT_PROTOTYPES = YES; 242 | CLANG_WARN_SUSPICIOUS_MOVE = YES; 243 | CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; 244 | CLANG_WARN_UNREACHABLE_CODE = YES; 245 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 246 | COPY_PHASE_STRIP = NO; 247 | DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; 248 | ENABLE_NS_ASSERTIONS = NO; 249 | ENABLE_STRICT_OBJC_MSGSEND = YES; 250 | ENABLE_USER_SCRIPT_SANDBOXING = YES; 251 | GCC_C_LANGUAGE_STANDARD = gnu17; 252 | GCC_NO_COMMON_BLOCKS = YES; 253 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES; 254 | GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; 255 | GCC_WARN_UNDECLARED_SELECTOR = YES; 256 | GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; 257 | GCC_WARN_UNUSED_FUNCTION = YES; 258 | GCC_WARN_UNUSED_VARIABLE = YES; 259 | IPHONEOS_DEPLOYMENT_TARGET = 18.2; 260 | LOCALIZATION_PREFERS_STRING_CATALOGS = YES; 261 | MTL_ENABLE_DEBUG_INFO = NO; 262 | MTL_FAST_MATH = YES; 263 | SDKROOT = iphoneos; 264 | SWIFT_COMPILATION_MODE = wholemodule; 265 | VALIDATE_PRODUCT = YES; 266 | }; 267 | name = Release; 268 | }; 269 | 0ADDF2A42CE0C3C0008793AC /* Debug */ = { 270 | isa = XCBuildConfiguration; 271 | buildSettings = { 272 | ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; 273 | ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; 274 | CODE_SIGN_ENTITLEMENTS = "WhisperKit-Sample/WhisperKit-Sample.entitlements"; 275 | CODE_SIGN_STYLE = Automatic; 276 | CURRENT_PROJECT_VERSION = 1; 277 | DEVELOPMENT_ASSET_PATHS = "\"WhisperKit-Sample/Preview Content\""; 278 | DEVELOPMENT_TEAM = YQZQG7N4WG; 279 | ENABLE_HARDENED_RUNTIME = YES; 280 | ENABLE_PREVIEWS = YES; 281 | GENERATE_INFOPLIST_FILE = YES; 282 | INFOPLIST_FILE = "WhisperKit-Sample/Info.plist"; 283 | INFOPLIST_KEY_NSAppleEventsUsageDescription = "Whispering needs accessibility access to listen for the F5 key to start/stop recording."; 284 | INFOPLIST_KEY_NSMicrophoneUsageDescription = "We need access to your microphone to record audio for transcription"; 285 | INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES; 286 | INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES; 287 | INFOPLIST_KEY_UILaunchScreen_Generation = YES; 288 | INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight"; 289 | INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight"; 290 | IPHONEOS_DEPLOYMENT_TARGET = 16.0; 291 | LD_RUNPATH_SEARCH_PATHS = ( 292 | "$(inherited)", 293 | "@executable_path/Frameworks", 294 | ); 295 | MACOSX_DEPLOYMENT_TARGET = 13.0; 296 | MARKETING_VERSION = 1.0; 297 | PRODUCT_BUNDLE_IDENTIFIER = "com.rudrankriyam.whisperkit-sample"; 298 | PRODUCT_NAME = "$(TARGET_NAME)"; 299 | REGISTER_APP_GROUPS = NO; 300 | SUPPORTED_PLATFORMS = "iphoneos iphonesimulator xros xrsimulator"; 301 | SUPPORTS_MACCATALYST = NO; 302 | SUPPORTS_MAC_DESIGNED_FOR_IPHONE_IPAD = NO; 303 | SUPPORTS_XR_DESIGNED_FOR_IPHONE_IPAD = NO; 304 | SWIFT_EMIT_LOC_STRINGS = YES; 305 | SWIFT_VERSION = 5.0; 306 | TARGETED_DEVICE_FAMILY = "1,2,7"; 307 | TVOS_DEPLOYMENT_TARGET = 16.0; 308 | XROS_DEPLOYMENT_TARGET = 1.0; 309 | }; 310 | name = Debug; 311 | }; 312 | 0ADDF2A52CE0C3C0008793AC /* Release */ = { 313 | isa = XCBuildConfiguration; 314 | buildSettings = { 315 | ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; 316 | ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; 317 | CODE_SIGN_ENTITLEMENTS = "WhisperKit-Sample/WhisperKit-Sample.entitlements"; 318 | CODE_SIGN_STYLE = Automatic; 319 | CURRENT_PROJECT_VERSION = 1; 320 | DEVELOPMENT_ASSET_PATHS = "\"WhisperKit-Sample/Preview Content\""; 321 | DEVELOPMENT_TEAM = YQZQG7N4WG; 322 | ENABLE_HARDENED_RUNTIME = YES; 323 | ENABLE_PREVIEWS = YES; 324 | GENERATE_INFOPLIST_FILE = YES; 325 | INFOPLIST_FILE = "WhisperKit-Sample/Info.plist"; 326 | INFOPLIST_KEY_NSAppleEventsUsageDescription = "Whispering needs accessibility access to listen for the F5 key to start/stop recording."; 327 | INFOPLIST_KEY_NSMicrophoneUsageDescription = "We need access to your microphone to record audio for transcription"; 328 | INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES; 329 | INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES; 330 | INFOPLIST_KEY_UILaunchScreen_Generation = YES; 331 | INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight"; 332 | INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight"; 333 | IPHONEOS_DEPLOYMENT_TARGET = 16.0; 334 | LD_RUNPATH_SEARCH_PATHS = ( 335 | "$(inherited)", 336 | "@executable_path/Frameworks", 337 | ); 338 | MACOSX_DEPLOYMENT_TARGET = 13.0; 339 | MARKETING_VERSION = 1.0; 340 | PRODUCT_BUNDLE_IDENTIFIER = "com.rudrankriyam.whisperkit-sample"; 341 | PRODUCT_NAME = "$(TARGET_NAME)"; 342 | REGISTER_APP_GROUPS = NO; 343 | SUPPORTED_PLATFORMS = "iphoneos iphonesimulator xros xrsimulator"; 344 | SUPPORTS_MACCATALYST = NO; 345 | SUPPORTS_MAC_DESIGNED_FOR_IPHONE_IPAD = NO; 346 | SUPPORTS_XR_DESIGNED_FOR_IPHONE_IPAD = NO; 347 | SWIFT_EMIT_LOC_STRINGS = YES; 348 | SWIFT_VERSION = 5.0; 349 | TARGETED_DEVICE_FAMILY = "1,2,7"; 350 | TVOS_DEPLOYMENT_TARGET = 16.0; 351 | XROS_DEPLOYMENT_TARGET = 1.0; 352 | }; 353 | name = Release; 354 | }; 355 | /* End XCBuildConfiguration section */ 356 | 357 | /* Begin XCConfigurationList section */ 358 | 0ADDF2902CE0C3BF008793AC /* Build configuration list for PBXProject "WhisperKit-Sample" */ = { 359 | isa = XCConfigurationList; 360 | buildConfigurations = ( 361 | 0ADDF2A12CE0C3C0008793AC /* Debug */, 362 | 0ADDF2A22CE0C3C0008793AC /* Release */, 363 | ); 364 | defaultConfigurationIsVisible = 0; 365 | defaultConfigurationName = Release; 366 | }; 367 | 0ADDF2A32CE0C3C0008793AC /* Build configuration list for PBXNativeTarget "WhisperKit-Sample" */ = { 368 | isa = XCConfigurationList; 369 | buildConfigurations = ( 370 | 0ADDF2A42CE0C3C0008793AC /* Debug */, 371 | 0ADDF2A52CE0C3C0008793AC /* Release */, 372 | ); 373 | defaultConfigurationIsVisible = 0; 374 | defaultConfigurationName = Release; 375 | }; 376 | /* End XCConfigurationList section */ 377 | 378 | /* Begin XCRemoteSwiftPackageReference section */ 379 | 0A125A672CE0C3EE0044215E /* XCRemoteSwiftPackageReference "WhisperKit" */ = { 380 | isa = XCRemoteSwiftPackageReference; 381 | repositoryURL = "https://github.com/argmaxinc/WhisperKit.git"; 382 | requirement = { 383 | branch = main; 384 | kind = branch; 385 | }; 386 | }; 387 | /* End XCRemoteSwiftPackageReference section */ 388 | 389 | /* Begin XCSwiftPackageProductDependency section */ 390 | 0A125A682CE0C3EE0044215E /* WhisperKit */ = { 391 | isa = XCSwiftPackageProductDependency; 392 | package = 0A125A672CE0C3EE0044215E /* XCRemoteSwiftPackageReference "WhisperKit" */; 393 | productName = WhisperKit; 394 | }; 395 | /* End XCSwiftPackageProductDependency section */ 396 | }; 397 | rootObject = 0ADDF28D2CE0C3BF008793AC /* Project object */; 398 | } 399 | ---------------------------------------------------------------------------------------------------- /WhisperKit-Sample.xcodeproj/project.xcworkspace/contents.xcworkspacedata: ---------------------------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | ---------------------------------------------------------------------------------------------------- /WhisperKit-Sample.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved: ---------------------------------------------------------------------------------------------------- 1 | { 2 | "originHash" : "be2b508cf91e00ccfdfd83e8177b94c9aee963c0dbf9b604f03fa19a3d85c0d4", 3 | "pins" : [ 4 | { 5 | "identity" : "swift-argument-parser", 6 | "kind" : "remoteSourceControl", 7 | "location" : "https://github.com/apple/swift-argument-parser.git", 8 | "state" : { 9 | "revision" : "c8ed701b513cf5177118a175d85fbbbcd707ab41", 10 | "version" : "1.3.0" 11 | } 12 | }, 13 | { 14 | "identity" : "swift-transformers", 15 | "kind" : "remoteSourceControl", 16 | "location" : "https://github.com/huggingface/swift-transformers.git", 17 | "state" : { 18 | "revision" : "fc6543263e4caed9bf6107466d625cfae9357f08", 19 | "version" : "0.1.8" 20 | } 21 | }, 22 | { 23 | "identity" : "whisperkit", 24 | "kind" : "remoteSourceControl", 25 | "location" : "https://github.com/argmaxinc/WhisperKit.git", 26 | "state" : { 27 | "branch" : "main", 28 | "revision" : "5254d82dbb6de4326d5fbdc5f5a730a50bc1ed08" 29 | } 30 | } 31 | ], 32 | "version" : 3 33 | } 34 | ---------------------------------------------------------------------------------------------------- /WhisperKit-Sample.xcodeproj/xcshareddata/xcschemes/WhisperKit-Sample.xcscheme: ---------------------------------------------------------------------------------------------------- 1 | 2 | 5 | 9 | 10 | 16 | 22 | 23 | 24 | 25 | 26 | 32 | 33 | 43 | 45 | 51 | 52 | 53 | 54 | 60 | 62 | 68 | 69 | 70 | 71 | 73 | 74 | 77 | 78 | 79 | ---------------------------------------------------------------------------------------------------- /WhisperKit-Sample/Assets.xcassets/AccentColor.colorset/Contents.json: ---------------------------------------------------------------------------------------------------- 1 | { 2 | "colors" : [ 3 | { 4 | "idiom" : "universal" 5 | } 6 | ], 7 | "info" : { 8 | "author" : "xcode", 9 | "version" : 1 10 | } 11 | } 12 | ---------------------------------------------------------------------------------------------------- /WhisperKit-Sample/Assets.xcassets/AppIcon.appiconset/Contents.json: ---------------------------------------------------------------------------------------------------- 1 | { 2 | "images" : [ 3 | { 4 | "idiom" : "universal", 5 | "platform" : "ios", 6 | "size" : "1024x1024" 7 | }, 8 | { 9 | "appearances" : [ 10 | { 11 | "appearance" : "luminosity", 12 | "value" : "dark" 13 | } 14 | ], 15 | "idiom" : "universal", 16 | "platform" : "ios", 17 | "size" : "1024x1024" 18 | }, 19 | { 20 | "appearances" : [ 21 | { 22 | "appearance" : "luminosity", 23 | "value" : "tinted" 24 | } 25 | ], 26 | "idiom" : "universal", 27 | "platform" : "ios", 28 | "size" : "1024x1024" 29 | } 30 | ], 31 | "info" : { 32 | "author" : "xcode", 33 | "version" : 1 34 | } 35 | } 36 | ---------------------------------------------------------------------------------------------------- /WhisperKit-Sample/Assets.xcassets/Contents.json: ---------------------------------------------------------------------------------------------------- 1 | { 2 | "info" : { 3 | "author" : "xcode", 4 | "version" : 1 5 | } 6 | } 7 | ---------------------------------------------------------------------------------------------------- /WhisperKit-Sample/ContentView.swift: ---------------------------------------------------------------------------------------------------- 1 | // 2 | // ContentView.swift 3 | // Whispering 4 | // 5 | // Created by Rudrank Riyam on 11/10/24. 6 | // 7 | 8 | import SwiftUI 9 | import WhisperKit 10 | import AVFoundation 11 | 12 | struct ContentView: View { 13 | @StateObject private var transcriptionService = TranscriptionService() 14 | 15 | var body: some View { 16 | VStack { 17 | if transcriptionService.isTranscribing { 18 | ProgressView("Transcribing...") 19 | } else { 20 | Button(action: { 21 | // Button is now just for visual feedback 22 | }) { 23 | Image(systemName: transcriptionService.isRecording ? "stop.circle.fill" : "mic.circle.fill") 24 | .font(.system(size: 44)) 25 | .symbolRenderingMode(.multicolor) 26 | } 27 | .disabled(transcriptionService.isTranscribing) 28 | } 29 | 30 | if !transcriptionService.transcriptionResult.isEmpty { 31 | Text(transcriptionService.transcriptionResult) 32 | .padding() 33 | .frame(maxWidth: .infinity, alignment: .leading) 34 | } 35 | 36 | #if os(macOS) 37 | if !transcriptionService.hasInputMonitoringPermission || 38 | !transcriptionService.hasAccessibilityPermissions { 39 | VStack(spacing: 12) { 40 | Text("Required Permissions") 41 | .font(.headline) 42 | 43 | if !transcriptionService.hasInputMonitoringPermission { 44 | PermissionRow( 45 | title: "Input Monitoring", 46 | description: "Required to detect F5 key press", 47 | action: { 48 | InputMonitoringPermission.shared.checkPermission() 49 | } 50 | ) 51 | } 52 | 53 | if !transcriptionService.hasAccessibilityPermissions { 54 | PermissionRow( 55 | title: "Accessibility", 56 | description: "Required to paste transcribed text", 57 | action: { 58 | NSWorkspace.shared.open( 59 | URL(string: "x-apple.systempreferences:com.apple.preference.security?Privacy_Accessibility")! 60 | ) 61 | } 62 | ) 63 | } 64 | } 65 | .padding() 66 | .background(Color.gray.opacity(0.1)) 67 | .cornerRadius(10) 68 | .padding() 69 | } 70 | #endif 71 | } 72 | .padding() 73 | } 74 | } 75 | 76 | struct PermissionRow: View { 77 | let title: String 78 | let description: String 79 | let action: () -> Void 80 | 81 | var body: some View { 82 | VStack(alignment: .leading) { 83 | Text(title) 84 | .font(.subheadline) 85 | .bold() 86 | Text(description) 87 | .font(.caption) 88 | .foregroundColor(.secondary) 89 | Button("Open Settings") { 90 | action() 91 | } 92 | .padding(.top, 4) 93 | } 94 | .frame(maxWidth: .infinity, alignment: .leading) 95 | .padding() 96 | .background(Color.white.opacity(0.5)) 97 | .cornerRadius(8) 98 | } 99 | } 100 | 101 | #Preview { 102 | ContentView() 103 | } 104 | ---------------------------------------------------------------------------------------------------- /WhisperKit-Sample/Info.plist: ---------------------------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | ---------------------------------------------------------------------------------------------------- /WhisperKit-Sample/Preview Content/Preview Assets.xcassets/Contents.json: ---------------------------------------------------------------------------------------------------- 1 | { 2 | "info" : { 3 | "author" : "xcode", 4 | "version" : 1 5 | } 6 | } 7 | ---------------------------------------------------------------------------------------------------- /WhisperKit-Sample/TranscriptionService.swift: ---------------------------------------------------------------------------------------------------- 1 | import SwiftUI 2 | import WhisperKit 3 | import AVFoundation 4 | 5 | class TranscriptionService: ObservableObject { 6 | // MARK: - Published Properties 7 | @Published var hasInputMonitoringPermission = false 8 | @Published var hasAccessibilityPermissions = false 9 | @Published var isRecording = false 10 | @Published var isTranscribing = false 11 | @Published var transcriptionResult = "" 12 | 13 | // MARK: - Private Properties 14 | private var whisperKit: WhisperKit? 15 | private var audioRecorder: AVAudioRecorder? 16 | private var keyboardMonitor: Any? 17 | private var recordingURL: URL? 18 | 19 | #if os(iOS) 20 | private var recordingSession: AVAudioSession? 21 | #endif 22 | 23 | // MARK: - Initialization 24 | init() { 25 | print("🚀 TranscriptionService: Initializing...") 26 | checkPermissions() 27 | setupAudioSession() 28 | setupKeyboardMonitor() 29 | } 30 | 31 | // MARK: - Permission Handling 32 | private func checkPermissions() { 33 | #if os(macOS) 34 | print("🔍 Checking all required permissions...") 35 | 36 | // Check Input Monitoring 37 | hasInputMonitoringPermission = inputMonitoring.checkPermission() 38 | if !hasInputMonitoringPermission { 39 | print("⚠️ Input Monitoring permission needed") 40 | inputMonitoring.requestPermission() 41 | } 42 | 43 | // Check Accessibility 44 | let options = [kAXTrustedCheckOptionPrompt.takeUnretainedValue() as String: true] 45 | let trusted = AXIsProcessTrustedWithOptions(options as CFDictionary) 46 | hasAccessibilityPermissions = trusted 47 | print("🔐 Accessibility permission status: \(trusted)") 48 | 49 | // Re-check after a delay to catch permission changes 50 | DispatchQueue.main.asyncAfter(deadline: .now() + 1) { [weak self] in 51 | self?.hasInputMonitoringPermission = self?.inputMonitoring.checkPermission() ?? false 52 | self?.hasAccessibilityPermissions = AXIsProcessTrusted() 53 | } 54 | #endif 55 | } 56 | 57 | // MARK: - Audio Session Setup 58 | private func setupAudioSession() { 59 | print("🎙 Setting up audio session...") 60 | #if os(iOS) 61 | recordingSession = AVAudioSession.sharedInstance() 62 | do { 63 | try recordingSession?.setCategory(.playAndRecord, mode: .default) 64 | try recordingSession?.setActive(true) 65 | print("✅ Audio session setup complete") 66 | } catch { 67 | print("❌ Failed to set up recording session: \(error)") 68 | } 69 | #endif 70 | } 71 | 72 | // MARK: - Keyboard Monitoring 73 | private func setupKeyboardMonitor() { 74 | #if os(macOS) 75 | print("⌨️ Setting up keyboard monitor...") 76 | 77 | guard inputMonitoring.checkPermission() else { 78 | print("⚠️ Cannot setup keyboard monitor - missing Input Monitoring permission") 79 | return 80 | } 81 | 82 | // Local monitor for when app is active 83 | NSEvent.addLocalMonitorForEvents(matching: .keyDown) { [weak self] event in 84 | print("⌨️ Local keyboard event detected - keyCode: \(event.keyCode)") 85 | if event.keyCode == 96 { // F5 86 | print("🎯 F5 key pressed (local)") 87 | self?.handleF5Press() 88 | return nil // Consume the event 89 | } 90 | return event 91 | } 92 | 93 | // Global monitor for when app is in background 94 | keyboardMonitor = NSEvent.addGlobalMonitorForEvents(matching: .keyDown) { [weak self] event in 95 | print("⌨️ Global keyboard event detected - keyCode: \(event.keyCode)") 96 | if event.keyCode == 96 { // F5 97 | print("🎯 F5 key pressed (global)") 98 | self?.handleF5Press() 99 | } 100 | } 101 | print("✅ Keyboard monitors successfully set up") 102 | #endif 103 | } 104 | 105 | // MARK: - F5 Key Handling 106 | private func handleF5Press() { 107 | print("🎙 F5 Press Handler: Processing F5 key press") 108 | 109 | Task { @MainActor in 110 | if audioRecorder?.isRecording == true { 111 | print("🛑 Stopping recording...") 112 | isRecording = false 113 | isTranscribing = true 114 | 115 | if let recordingURL = await stopRecording() { 116 | print("🔤 Starting transcription...") 117 | do { 118 | transcriptionResult = try await transcribe(audio: recordingURL) 119 | print("✅ Transcription completed: \(transcriptionResult)") 120 | pasteTranscribedText(transcriptionResult) 121 | } catch { 122 | print("❌ Transcription failed: \(error)") 123 | transcriptionResult = "Transcription failed: \(error.localizedDescription)" 124 | } 125 | } 126 | isTranscribing = false 127 | } else { 128 | print("▶️ Starting recording...") 129 | isRecording = true 130 | transcriptionResult = "" 131 | await startRecording() 132 | } 133 | } 134 | } 135 | 136 | // MARK: - Recording Functions 137 | func startRecording() async { 138 | print("🎙 Starting recording process...") 139 | let settings = [ 140 | AVFormatIDKey: Int(kAudioFormatLinearPCM), 141 | AVSampleRateKey: 16000, 142 | AVNumberOfChannelsKey: 1, 143 | AVEncoderAudioQualityKey: AVAudioQuality.high.rawValue 144 | ] 145 | 146 | do { 147 | let documentsPath = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0] 148 | recordingURL = documentsPath.appendingPathComponent("recording.wav") 149 | 150 | if let url = recordingURL { 151 | print("📝 Recording to URL: \(url.path)") 152 | audioRecorder = try AVAudioRecorder(url: url, settings: settings) 153 | audioRecorder?.record() 154 | print("✅ Recording started successfully") 155 | } 156 | } catch { 157 | print("❌ Failed to start recording: \(error)") 158 | } 159 | } 160 | 161 | func stopRecording() async -> URL? { 162 | print("🛑 Stopping recording process...") 163 | 164 | guard let recorder = audioRecorder, let url = recordingURL else { 165 | print("⚠️ No active recorder or URL found") 166 | return nil 167 | } 168 | 169 | recorder.stop() 170 | print("✅ Recording stopped successfully") 171 | print("📍 Recording saved at: \(url.path)") 172 | 173 | return url 174 | } 175 | 176 | // MARK: - Transcription 177 | func transcribe(audio url: URL) async throws -> String { 178 | print("🎯 Transcribing audio from: \(url.path)") 179 | if whisperKit == nil { 180 | print("🔄 Initializing WhisperKit...") 181 | whisperKit = try await WhisperKit(model: "base") 182 | } 183 | 184 | print("📝 Starting transcription process...") 185 | let result = try await whisperKit?.transcribe(audioPath: url.path) 186 | print("✅ Transcription successful") 187 | return result?.map { $0.text }.joined(separator: " ") ?? "No transcription" 188 | } 189 | 190 | // MARK: - Paste Handling 191 | private func pasteTranscribedText(_ text: String) { 192 | print("📋 Attempting to paste text: \(text)") 193 | 194 | #if os(macOS) 195 | // Create a temporary pasteboard 196 | let pasteboard = NSPasteboard.general 197 | pasteboard.clearContents() 198 | pasteboard.setString(text, forType: .string) 199 | 200 | // Simulate Cmd+V keystroke 201 | let source = CGEventSource(stateID: .hidSystemState) 202 | 203 | // Create key down and up events for Command key (⌘) 204 | let cmdKeyDown = CGEvent(keyboardEventSource: source, virtualKey: 0x37, keyDown: true) 205 | let cmdKeyUp = CGEvent(keyboardEventSource: source, virtualKey: 0x37, keyDown: false) 206 | 207 | // Create key down and up events for V key 208 | let vKeyDown = CGEvent(keyboardEventSource: source, virtualKey: 0x09, keyDown: true) 209 | let vKeyUp = CGEvent(keyboardEventSource: source, virtualKey: 0x09, keyDown: false) 210 | 211 | // Set command flag for V key events 212 | vKeyDown?.flags = .maskCommand 213 | vKeyUp?.flags = .maskCommand 214 | 215 | // Post the events in sequence 216 | cmdKeyDown?.post(tap: .cghidEventTap) 217 | vKeyDown?.post(tap: .cghidEventTap) 218 | vKeyUp?.post(tap: .cghidEventTap) 219 | cmdKeyUp?.post(tap: .cghidEventTap) 220 | 221 | print("✅ Paste command sent successfully") 222 | #endif 223 | } 224 | 225 | // MARK: - Cleanup 226 | deinit { 227 | print("♻️ TranscriptionService: Cleaning up resources") 228 | #if os(macOS) 229 | if let monitor = keyboardMonitor { 230 | NSEvent.removeMonitor(monitor) 231 | print("🧹 Removed keyboard monitor") 232 | } 233 | #endif 234 | } 235 | } 236 | ---------------------------------------------------------------------------------------------------- /WhisperKit-Sample/WhisperKit-Sample.entitlements: ---------------------------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | com.apple.security.app-sandbox 6 | 7 | com.apple.security.automation.apple-events 8 | 9 | com.apple.security.device.audio-input 10 | 11 | com.apple.security.network.client 12 | 13 | com.apple.security.network.server 14 | 15 | com.apple.security.temporary-exception.apple-events 16 | 17 | com.apple.systempreferences 18 | 19 | com.apple.security.temporary-exception.mach-register.global-name 20 | 21 | com.apple.coreservices.launchservicesd 22 | 23 | com.apple.security.temporary-exception.input-monitoring 24 | 25 | 26 | ---------------------------------------------------------------------------------------------------- /WhisperKit-Sample/WhisperingApp.swift: ---------------------------------------------------------------------------------------------------- 1 | // 2 | // WhisperingApp.swift 3 | // Whispering 4 | // 5 | // Created by Rudrank Riyam on 11/10/24. 6 | // 7 | 8 | import SwiftUI 9 | 10 | @main 11 | struct WhisperKitSampleApp: App { 12 | var body: some Scene { 13 | WindowGroup { 14 | ContentView() 15 | } 16 | } 17 | } 18 | ---------------------------------------------------------------------------------------------------- /WhisperKit-Sample/test.wav: ---------------------------------------------------------------------------------------------------- https://raw.githubusercontent.com/rudrankriyam/WhisperKit-Sample/main/WhisperKit-Sample/test.wav