├── .gitignore ├── Pytorch-CoreML-Spectrogram.xcodeproj ├── project.pbxproj ├── project.xcworkspace │ ├── contents.xcworkspacedata │ └── xcshareddata │ │ └── IDEWorkspaceChecks.plist └── xcuserdata │ └── gerald.xcuserdatad │ └── xcschemes │ └── xcschememanagement.plist ├── Pytorch-CoreML-Spectrogram ├── AppDelegate.swift ├── Assets.xcassets │ ├── AppIcon.appiconset │ │ └── Contents.json │ └── Contents.json ├── Base.lproj │ ├── LaunchScreen.storyboard │ └── Main.storyboard ├── ConvertSpectrogram.swift ├── DrawSpecView.swift ├── Info.plist ├── SceneDelegate.swift ├── ViewController.swift └── wave__melspec.mlmodel ├── Pytorch-CoreML-SpectrogramTests ├── Info.plist ├── Pytorch_CoreML_SpectrogramTests.swift ├── bonjour.wav └── melspec_out.bonjour.json ├── README.md └── python ├── model.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.pyc 3 | *~ 4 | xcuserdata 5 | -------------------------------------------------------------------------------- /Pytorch-CoreML-Spectrogram.xcodeproj/project.pbxproj: -------------------------------------------------------------------------------- 1 | // !$*UTF8*$! 2 | { 3 | archiveVersion = 1; 4 | classes = { 5 | }; 6 | objectVersion = 50; 7 | objects = { 8 | 9 | /* Begin PBXBuildFile section */ 10 | 74568D0D2484589000210F85 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 74568D0C2484589000210F85 /* AppDelegate.swift */; }; 11 | 74568D0F2484589000210F85 /* SceneDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 74568D0E2484589000210F85 /* SceneDelegate.swift */; }; 12 | 74568D112484589000210F85 /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 74568D102484589000210F85 /* ViewController.swift */; }; 13 | 74568D142484589000210F85 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 74568D122484589000210F85 /* Main.storyboard */; }; 14 | 74568D162484589200210F85 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 74568D152484589200210F85 /* Assets.xcassets */; }; 15 | 74568D192484589200210F85 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 74568D172484589200210F85 /* LaunchScreen.storyboard */; }; 16 | 74568D242484589200210F85 /* Pytorch_CoreML_SpectrogramTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 74568D232484589200210F85 /* Pytorch_CoreML_SpectrogramTests.swift */; }; 17 | 74568D2F248461F100210F85 /* bonjour.wav in Resources */ = {isa = PBXBuildFile; fileRef = 74568D2E248461F100210F85 /* bonjour.wav */; }; 18 | 74568D362484691C00210F85 /* ConvertSpectrogram.swift in Sources */ = {isa = PBXBuildFile; fileRef = 74568D352484691C00210F85 /* ConvertSpectrogram.swift */; }; 19 | 74568D372484691C00210F85 /* ConvertSpectrogram.swift in Sources */ = {isa = PBXBuildFile; fileRef = 74568D352484691C00210F85 /* ConvertSpectrogram.swift */; }; 20 | 74568D392484693000210F85 /* DrawSpecView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 74568D382484693000210F85 /* DrawSpecView.swift */; }; 21 | 74568D3B248472FD00210F85 /* melspec_out.bonjour.json in Resources */ = {isa = PBXBuildFile; fileRef = 74568D3A248472FD00210F85 /* melspec_out.bonjour.json */; }; 22 | 74568D3D2484730C00210F85 /* wave__melspec.mlmodel in Sources */ = {isa = PBXBuildFile; fileRef = 74568D3C2484730C00210F85 /* wave__melspec.mlmodel */; }; 23 | 74568D3E2484730C00210F85 /* wave__melspec.mlmodel in Sources */ = {isa = PBXBuildFile; fileRef = 74568D3C2484730C00210F85 /* wave__melspec.mlmodel */; }; 24 | /* End PBXBuildFile section */ 25 | 26 | /* Begin PBXContainerItemProxy section */ 27 | 74568D202484589200210F85 /* PBXContainerItemProxy */ = { 28 | isa = PBXContainerItemProxy; 29 | containerPortal = 74568D012484589000210F85 /* Project object */; 30 | proxyType = 1; 31 | remoteGlobalIDString = 74568D082484589000210F85; 32 | remoteInfo = "Pytorch-CoreML-Spectrogram"; 33 | }; 34 | /* End PBXContainerItemProxy section */ 35 | 36 | /* Begin PBXFileReference section */ 37 | 74568D092484589000210F85 /* Pytorch-CoreML-Spectrogram.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "Pytorch-CoreML-Spectrogram.app"; sourceTree = BUILT_PRODUCTS_DIR; }; 38 | 74568D0C2484589000210F85 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = ""; }; 39 | 74568D0E2484589000210F85 /* SceneDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SceneDelegate.swift; sourceTree = ""; }; 40 | 74568D102484589000210F85 /* ViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ViewController.swift; sourceTree = ""; }; 41 | 74568D132484589000210F85 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = ""; }; 42 | 74568D152484589200210F85 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; 43 | 74568D182484589200210F85 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = ""; }; 44 | 74568D1A2484589200210F85 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; 45 | 74568D1F2484589200210F85 /* Pytorch-CoreML-SpectrogramTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = "Pytorch-CoreML-SpectrogramTests.xctest"; sourceTree = BUILT_PRODUCTS_DIR; }; 46 | 74568D232484589200210F85 /* Pytorch_CoreML_SpectrogramTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Pytorch_CoreML_SpectrogramTests.swift; sourceTree = ""; }; 47 | 74568D252484589200210F85 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; 48 | 74568D2E248461F100210F85 /* bonjour.wav */ = {isa = PBXFileReference; lastKnownFileType = audio.wav; path = bonjour.wav; sourceTree = ""; }; 49 | 74568D352484691C00210F85 /* ConvertSpectrogram.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ConvertSpectrogram.swift; sourceTree = ""; }; 50 | 74568D382484693000210F85 /* DrawSpecView.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = DrawSpecView.swift; sourceTree = ""; }; 51 | 74568D3A248472FD00210F85 /* melspec_out.bonjour.json */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.json; path = melspec_out.bonjour.json; sourceTree = ""; }; 52 | 74568D3C2484730C00210F85 /* wave__melspec.mlmodel */ = {isa = PBXFileReference; lastKnownFileType = file.mlmodel; path = wave__melspec.mlmodel; sourceTree = ""; }; 53 | /* End PBXFileReference section */ 54 | 55 | /* Begin PBXFrameworksBuildPhase section */ 56 | 74568D062484589000210F85 /* Frameworks */ = { 57 | isa = PBXFrameworksBuildPhase; 58 | buildActionMask = 2147483647; 59 | files = ( 60 | ); 61 | runOnlyForDeploymentPostprocessing = 0; 62 | }; 63 | 74568D1C2484589200210F85 /* Frameworks */ = { 64 | isa = PBXFrameworksBuildPhase; 65 | buildActionMask = 2147483647; 66 | files = ( 67 | ); 68 | runOnlyForDeploymentPostprocessing = 0; 69 | }; 70 | /* End PBXFrameworksBuildPhase section */ 71 | 72 | /* Begin PBXGroup section */ 73 | 74568D002484589000210F85 = { 74 | isa = PBXGroup; 75 | children = ( 76 | 74568D0B2484589000210F85 /* Pytorch-CoreML-Spectrogram */, 77 | 74568D222484589200210F85 /* Pytorch-CoreML-SpectrogramTests */, 78 | 74568D0A2484589000210F85 /* Products */, 79 | ); 80 | sourceTree = ""; 81 | }; 82 | 74568D0A2484589000210F85 /* Products */ = { 83 | isa = PBXGroup; 84 | children = ( 85 | 74568D092484589000210F85 /* Pytorch-CoreML-Spectrogram.app */, 86 | 74568D1F2484589200210F85 /* Pytorch-CoreML-SpectrogramTests.xctest */, 87 | ); 88 | name = Products; 89 | sourceTree = ""; 90 | }; 91 | 74568D0B2484589000210F85 /* Pytorch-CoreML-Spectrogram */ = { 92 | isa = PBXGroup; 93 | children = ( 94 | 74568D0C2484589000210F85 /* AppDelegate.swift */, 95 | 74568D0E2484589000210F85 /* SceneDelegate.swift */, 96 | 74568D102484589000210F85 /* ViewController.swift */, 97 | 74568D352484691C00210F85 /* ConvertSpectrogram.swift */, 98 | 74568D3C2484730C00210F85 /* wave__melspec.mlmodel */, 99 | 74568D122484589000210F85 /* Main.storyboard */, 100 | 74568D382484693000210F85 /* DrawSpecView.swift */, 101 | 74568D152484589200210F85 /* Assets.xcassets */, 102 | 74568D172484589200210F85 /* LaunchScreen.storyboard */, 103 | 74568D1A2484589200210F85 /* Info.plist */, 104 | ); 105 | path = "Pytorch-CoreML-Spectrogram"; 106 | sourceTree = ""; 107 | }; 108 | 74568D222484589200210F85 /* Pytorch-CoreML-SpectrogramTests */ = { 109 | isa = PBXGroup; 110 | children = ( 111 | 74568D232484589200210F85 /* Pytorch_CoreML_SpectrogramTests.swift */, 112 | 74568D2E248461F100210F85 /* bonjour.wav */, 113 | 74568D3A248472FD00210F85 /* melspec_out.bonjour.json */, 114 | 74568D252484589200210F85 /* Info.plist */, 115 | ); 116 | path = "Pytorch-CoreML-SpectrogramTests"; 117 | sourceTree = ""; 118 | }; 119 | /* End PBXGroup section */ 120 | 121 | /* Begin PBXNativeTarget section */ 122 | 74568D082484589000210F85 /* Pytorch-CoreML-Spectrogram */ = { 123 | isa = PBXNativeTarget; 124 | buildConfigurationList = 74568D282484589200210F85 /* Build configuration list for PBXNativeTarget "Pytorch-CoreML-Spectrogram" */; 125 | buildPhases = ( 126 | 74568D052484589000210F85 /* Sources */, 127 | 74568D062484589000210F85 /* Frameworks */, 128 | 74568D072484589000210F85 /* Resources */, 129 | ); 130 | buildRules = ( 131 | ); 132 | dependencies = ( 133 | ); 134 | name = "Pytorch-CoreML-Spectrogram"; 135 | productName = "Pytorch-CoreML-Spectrogram"; 136 | productReference = 74568D092484589000210F85 /* Pytorch-CoreML-Spectrogram.app */; 137 | productType = "com.apple.product-type.application"; 138 | }; 139 | 74568D1E2484589200210F85 /* Pytorch-CoreML-SpectrogramTests */ = { 140 | isa = PBXNativeTarget; 141 | buildConfigurationList = 74568D2B2484589200210F85 /* Build configuration list for PBXNativeTarget "Pytorch-CoreML-SpectrogramTests" */; 142 | buildPhases = ( 143 | 74568D1B2484589200210F85 /* Sources */, 144 | 74568D1C2484589200210F85 /* Frameworks */, 145 | 74568D1D2484589200210F85 /* Resources */, 146 | ); 147 | buildRules = ( 148 | ); 149 | dependencies = ( 150 | 74568D212484589200210F85 /* PBXTargetDependency */, 151 | ); 152 | name = "Pytorch-CoreML-SpectrogramTests"; 153 | productName = "Pytorch-CoreML-SpectrogramTests"; 154 | productReference = 74568D1F2484589200210F85 /* Pytorch-CoreML-SpectrogramTests.xctest */; 155 | productType = "com.apple.product-type.bundle.unit-test"; 156 | }; 157 | /* End PBXNativeTarget section */ 158 | 159 | /* Begin PBXProject section */ 160 | 74568D012484589000210F85 /* Project object */ = { 161 | isa = PBXProject; 162 | attributes = { 163 | LastSwiftUpdateCheck = 1140; 164 | LastUpgradeCheck = 1140; 165 | ORGANIZATIONNAME = Gerald; 166 | TargetAttributes = { 167 | 74568D082484589000210F85 = { 168 | CreatedOnToolsVersion = 11.4.1; 169 | }; 170 | 74568D1E2484589200210F85 = { 171 | CreatedOnToolsVersion = 11.4.1; 172 | TestTargetID = 74568D082484589000210F85; 173 | }; 174 | }; 175 | }; 176 | buildConfigurationList = 74568D042484589000210F85 /* Build configuration list for PBXProject "Pytorch-CoreML-Spectrogram" */; 177 | compatibilityVersion = "Xcode 9.3"; 178 | developmentRegion = en; 179 | hasScannedForEncodings = 0; 180 | knownRegions = ( 181 | en, 182 | Base, 183 | ); 184 | mainGroup = 74568D002484589000210F85; 185 | productRefGroup = 74568D0A2484589000210F85 /* Products */; 186 | projectDirPath = ""; 187 | projectRoot = ""; 188 | targets = ( 189 | 74568D082484589000210F85 /* Pytorch-CoreML-Spectrogram */, 190 | 74568D1E2484589200210F85 /* Pytorch-CoreML-SpectrogramTests */, 191 | ); 192 | }; 193 | /* End PBXProject section */ 194 | 195 | /* Begin PBXResourcesBuildPhase section */ 196 | 74568D072484589000210F85 /* Resources */ = { 197 | isa = PBXResourcesBuildPhase; 198 | buildActionMask = 2147483647; 199 | files = ( 200 | 74568D192484589200210F85 /* LaunchScreen.storyboard in Resources */, 201 | 74568D162484589200210F85 /* Assets.xcassets in Resources */, 202 | 74568D142484589000210F85 /* Main.storyboard in Resources */, 203 | ); 204 | runOnlyForDeploymentPostprocessing = 0; 205 | }; 206 | 74568D1D2484589200210F85 /* Resources */ = { 207 | isa = PBXResourcesBuildPhase; 208 | buildActionMask = 2147483647; 209 | files = ( 210 | 74568D2F248461F100210F85 /* bonjour.wav in Resources */, 211 | 74568D3B248472FD00210F85 /* melspec_out.bonjour.json in Resources */, 212 | ); 213 | runOnlyForDeploymentPostprocessing = 0; 214 | }; 215 | /* End PBXResourcesBuildPhase section */ 216 | 217 | /* Begin PBXSourcesBuildPhase section */ 218 | 74568D052484589000210F85 /* Sources */ = { 219 | isa = PBXSourcesBuildPhase; 220 | buildActionMask = 2147483647; 221 | files = ( 222 | 74568D112484589000210F85 /* ViewController.swift in Sources */, 223 | 74568D392484693000210F85 /* DrawSpecView.swift in Sources */, 224 | 74568D362484691C00210F85 /* ConvertSpectrogram.swift in Sources */, 225 | 74568D0D2484589000210F85 /* AppDelegate.swift in Sources */, 226 | 74568D0F2484589000210F85 /* SceneDelegate.swift in Sources */, 227 | 74568D3D2484730C00210F85 /* wave__melspec.mlmodel in Sources */, 228 | ); 229 | runOnlyForDeploymentPostprocessing = 0; 230 | }; 231 | 74568D1B2484589200210F85 /* Sources */ = { 232 | isa = PBXSourcesBuildPhase; 233 | buildActionMask = 2147483647; 234 | files = ( 235 | 74568D242484589200210F85 /* Pytorch_CoreML_SpectrogramTests.swift in Sources */, 236 | 74568D372484691C00210F85 /* ConvertSpectrogram.swift in Sources */, 237 | 74568D3E2484730C00210F85 /* wave__melspec.mlmodel in Sources */, 238 | ); 239 | runOnlyForDeploymentPostprocessing = 0; 240 | }; 241 | /* End PBXSourcesBuildPhase section */ 242 | 243 | /* Begin PBXTargetDependency section */ 244 | 74568D212484589200210F85 /* PBXTargetDependency */ = { 245 | isa = PBXTargetDependency; 246 | target = 74568D082484589000210F85 /* Pytorch-CoreML-Spectrogram */; 247 | targetProxy = 74568D202484589200210F85 /* PBXContainerItemProxy */; 248 | }; 249 | /* End PBXTargetDependency section */ 250 | 251 | /* Begin PBXVariantGroup section */ 252 | 74568D122484589000210F85 /* Main.storyboard */ = { 253 | isa = PBXVariantGroup; 254 | children = ( 255 | 74568D132484589000210F85 /* Base */, 256 | ); 257 | name = Main.storyboard; 258 | sourceTree = ""; 259 | }; 260 | 74568D172484589200210F85 /* LaunchScreen.storyboard */ = { 261 | isa = PBXVariantGroup; 262 | children = ( 263 | 74568D182484589200210F85 /* Base */, 264 | ); 265 | name = LaunchScreen.storyboard; 266 | sourceTree = ""; 267 | }; 268 | /* End PBXVariantGroup section */ 269 | 270 | /* Begin XCBuildConfiguration section */ 271 | 74568D262484589200210F85 /* Debug */ = { 272 | isa = XCBuildConfiguration; 273 | buildSettings = { 274 | ALWAYS_SEARCH_USER_PATHS = NO; 275 | CLANG_ANALYZER_NONNULL = YES; 276 | CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; 277 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; 278 | CLANG_CXX_LIBRARY = "libc++"; 279 | CLANG_ENABLE_MODULES = YES; 280 | CLANG_ENABLE_OBJC_ARC = YES; 281 | CLANG_ENABLE_OBJC_WEAK = YES; 282 | CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; 283 | CLANG_WARN_BOOL_CONVERSION = YES; 284 | CLANG_WARN_COMMA = YES; 285 | CLANG_WARN_CONSTANT_CONVERSION = YES; 286 | CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; 287 | CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; 288 | CLANG_WARN_DOCUMENTATION_COMMENTS = YES; 289 | CLANG_WARN_EMPTY_BODY = YES; 290 | CLANG_WARN_ENUM_CONVERSION = YES; 291 | CLANG_WARN_INFINITE_RECURSION = YES; 292 | CLANG_WARN_INT_CONVERSION = YES; 293 | CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; 294 | CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; 295 | CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; 296 | CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; 297 | CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; 298 | CLANG_WARN_STRICT_PROTOTYPES = YES; 299 | CLANG_WARN_SUSPICIOUS_MOVE = YES; 300 | CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; 301 | CLANG_WARN_UNREACHABLE_CODE = YES; 302 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 303 | COPY_PHASE_STRIP = NO; 304 | DEBUG_INFORMATION_FORMAT = dwarf; 305 | ENABLE_STRICT_OBJC_MSGSEND = YES; 306 | ENABLE_TESTABILITY = YES; 307 | GCC_C_LANGUAGE_STANDARD = gnu11; 308 | GCC_DYNAMIC_NO_PIC = NO; 309 | GCC_NO_COMMON_BLOCKS = YES; 310 | GCC_OPTIMIZATION_LEVEL = 0; 311 | GCC_PREPROCESSOR_DEFINITIONS = ( 312 | "DEBUG=1", 313 | "$(inherited)", 314 | ); 315 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES; 316 | GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; 317 | GCC_WARN_UNDECLARED_SELECTOR = YES; 318 | GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; 319 | GCC_WARN_UNUSED_FUNCTION = YES; 320 | GCC_WARN_UNUSED_VARIABLE = YES; 321 | IPHONEOS_DEPLOYMENT_TARGET = 13.0; 322 | MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; 323 | MTL_FAST_MATH = YES; 324 | ONLY_ACTIVE_ARCH = YES; 325 | SDKROOT = iphoneos; 326 | SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG; 327 | SWIFT_OPTIMIZATION_LEVEL = "-Onone"; 328 | }; 329 | name = Debug; 330 | }; 331 | 74568D272484589200210F85 /* Release */ = { 332 | isa = XCBuildConfiguration; 333 | buildSettings = { 334 | ALWAYS_SEARCH_USER_PATHS = NO; 335 | CLANG_ANALYZER_NONNULL = YES; 336 | CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; 337 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; 338 | CLANG_CXX_LIBRARY = "libc++"; 339 | CLANG_ENABLE_MODULES = YES; 340 | CLANG_ENABLE_OBJC_ARC = YES; 341 | CLANG_ENABLE_OBJC_WEAK = YES; 342 | CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; 343 | CLANG_WARN_BOOL_CONVERSION = YES; 344 | CLANG_WARN_COMMA = YES; 345 | CLANG_WARN_CONSTANT_CONVERSION = YES; 346 | CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; 347 | CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; 348 | CLANG_WARN_DOCUMENTATION_COMMENTS = YES; 349 | CLANG_WARN_EMPTY_BODY = YES; 350 | CLANG_WARN_ENUM_CONVERSION = YES; 351 | CLANG_WARN_INFINITE_RECURSION = YES; 352 | CLANG_WARN_INT_CONVERSION = YES; 353 | CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; 354 | CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; 355 | CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; 356 | CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; 357 | CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; 358 | CLANG_WARN_STRICT_PROTOTYPES = YES; 359 | CLANG_WARN_SUSPICIOUS_MOVE = YES; 360 | CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; 361 | CLANG_WARN_UNREACHABLE_CODE = YES; 362 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 363 | COPY_PHASE_STRIP = NO; 364 | DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; 365 | ENABLE_NS_ASSERTIONS = NO; 366 | ENABLE_STRICT_OBJC_MSGSEND = YES; 367 | GCC_C_LANGUAGE_STANDARD = gnu11; 368 | GCC_NO_COMMON_BLOCKS = YES; 369 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES; 370 | GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; 371 | GCC_WARN_UNDECLARED_SELECTOR = YES; 372 | GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; 373 | GCC_WARN_UNUSED_FUNCTION = YES; 374 | GCC_WARN_UNUSED_VARIABLE = YES; 375 | IPHONEOS_DEPLOYMENT_TARGET = 13.0; 376 | MTL_ENABLE_DEBUG_INFO = NO; 377 | MTL_FAST_MATH = YES; 378 | SDKROOT = iphoneos; 379 | SWIFT_COMPILATION_MODE = wholemodule; 380 | SWIFT_OPTIMIZATION_LEVEL = "-O"; 381 | VALIDATE_PRODUCT = YES; 382 | }; 383 | name = Release; 384 | }; 385 | 74568D292484589200210F85 /* Debug */ = { 386 | isa = XCBuildConfiguration; 387 | buildSettings = { 388 | ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; 389 | CODE_SIGN_STYLE = Automatic; 390 | DEVELOPMENT_TEAM = K6KE3UBBT4; 391 | INFOPLIST_FILE = "Pytorch-CoreML-Spectrogram/Info.plist"; 392 | LD_RUNPATH_SEARCH_PATHS = ( 393 | "$(inherited)", 394 | "@executable_path/Frameworks", 395 | ); 396 | PRODUCT_BUNDLE_IDENTIFIER = "my.testco.Pytorch-CoreML-Spectrogram"; 397 | PRODUCT_NAME = "$(TARGET_NAME)"; 398 | SWIFT_VERSION = 5.0; 399 | TARGETED_DEVICE_FAMILY = "1,2"; 400 | }; 401 | name = Debug; 402 | }; 403 | 74568D2A2484589200210F85 /* Release */ = { 404 | isa = XCBuildConfiguration; 405 | buildSettings = { 406 | ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; 407 | CODE_SIGN_STYLE = Automatic; 408 | DEVELOPMENT_TEAM = K6KE3UBBT4; 409 | INFOPLIST_FILE = "Pytorch-CoreML-Spectrogram/Info.plist"; 410 | LD_RUNPATH_SEARCH_PATHS = ( 411 | "$(inherited)", 412 | "@executable_path/Frameworks", 413 | ); 414 | PRODUCT_BUNDLE_IDENTIFIER = "my.testco.Pytorch-CoreML-Spectrogram"; 415 | PRODUCT_NAME = "$(TARGET_NAME)"; 416 | SWIFT_VERSION = 5.0; 417 | TARGETED_DEVICE_FAMILY = "1,2"; 418 | }; 419 | name = Release; 420 | }; 421 | 74568D2C2484589200210F85 /* Debug */ = { 422 | isa = XCBuildConfiguration; 423 | buildSettings = { 424 | ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES; 425 | BUNDLE_LOADER = "$(TEST_HOST)"; 426 | CODE_SIGN_STYLE = Automatic; 427 | DEVELOPMENT_TEAM = K6KE3UBBT4; 428 | INFOPLIST_FILE = "Pytorch-CoreML-SpectrogramTests/Info.plist"; 429 | IPHONEOS_DEPLOYMENT_TARGET = 13.0; 430 | LD_RUNPATH_SEARCH_PATHS = ( 431 | "$(inherited)", 432 | "@executable_path/Frameworks", 433 | "@loader_path/Frameworks", 434 | ); 435 | PRODUCT_BUNDLE_IDENTIFIER = "my.testco.Pytorch-CoreML-SpectrogramTests"; 436 | PRODUCT_NAME = "$(TARGET_NAME)"; 437 | SWIFT_VERSION = 5.0; 438 | TARGETED_DEVICE_FAMILY = "1,2"; 439 | TEST_HOST = "$(BUILT_PRODUCTS_DIR)/Pytorch-CoreML-Spectrogram.app/Pytorch-CoreML-Spectrogram"; 440 | }; 441 | name = Debug; 442 | }; 443 | 74568D2D2484589200210F85 /* Release */ = { 444 | isa = XCBuildConfiguration; 445 | buildSettings = { 446 | ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES; 447 | BUNDLE_LOADER = "$(TEST_HOST)"; 448 | CODE_SIGN_STYLE = Automatic; 449 | DEVELOPMENT_TEAM = K6KE3UBBT4; 450 | INFOPLIST_FILE = "Pytorch-CoreML-SpectrogramTests/Info.plist"; 451 | IPHONEOS_DEPLOYMENT_TARGET = 13.0; 452 | LD_RUNPATH_SEARCH_PATHS = ( 453 | "$(inherited)", 454 | "@executable_path/Frameworks", 455 | "@loader_path/Frameworks", 456 | ); 457 | PRODUCT_BUNDLE_IDENTIFIER = "my.testco.Pytorch-CoreML-SpectrogramTests"; 458 | PRODUCT_NAME = "$(TARGET_NAME)"; 459 | SWIFT_VERSION = 5.0; 460 | TARGETED_DEVICE_FAMILY = "1,2"; 461 | TEST_HOST = "$(BUILT_PRODUCTS_DIR)/Pytorch-CoreML-Spectrogram.app/Pytorch-CoreML-Spectrogram"; 462 | }; 463 | name = Release; 464 | }; 465 | /* End XCBuildConfiguration section */ 466 | 467 | /* Begin XCConfigurationList section */ 468 | 74568D042484589000210F85 /* Build configuration list for PBXProject "Pytorch-CoreML-Spectrogram" */ = { 469 | isa = XCConfigurationList; 470 | buildConfigurations = ( 471 | 74568D262484589200210F85 /* Debug */, 472 | 74568D272484589200210F85 /* Release */, 473 | ); 474 | defaultConfigurationIsVisible = 0; 475 | defaultConfigurationName = Release; 476 | }; 477 | 74568D282484589200210F85 /* Build configuration list for PBXNativeTarget "Pytorch-CoreML-Spectrogram" */ = { 478 | isa = XCConfigurationList; 479 | buildConfigurations = ( 480 | 74568D292484589200210F85 /* Debug */, 481 | 74568D2A2484589200210F85 /* Release */, 482 | ); 483 | defaultConfigurationIsVisible = 0; 484 | defaultConfigurationName = Release; 485 | }; 486 | 74568D2B2484589200210F85 /* Build configuration list for PBXNativeTarget "Pytorch-CoreML-SpectrogramTests" */ = { 487 | isa = XCConfigurationList; 488 | buildConfigurations = ( 489 | 74568D2C2484589200210F85 /* Debug */, 490 | 74568D2D2484589200210F85 /* Release */, 491 | ); 492 | defaultConfigurationIsVisible = 0; 493 | defaultConfigurationName = Release; 494 | }; 495 | /* End XCConfigurationList section */ 496 | }; 497 | rootObject = 74568D012484589000210F85 /* Project object */; 498 | } 499 | -------------------------------------------------------------------------------- /Pytorch-CoreML-Spectrogram.xcodeproj/project.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /Pytorch-CoreML-Spectrogram.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | IDEDidComputeMac32BitWarning 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /Pytorch-CoreML-Spectrogram.xcodeproj/xcuserdata/gerald.xcuserdatad/xcschemes/xcschememanagement.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | SchemeUserState 6 | 7 | Pytorch-CoreML-Spectrogram.xcscheme_^#shared#^_ 8 | 9 | orderHint 10 | 0 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /Pytorch-CoreML-Spectrogram/AppDelegate.swift: -------------------------------------------------------------------------------- 1 | // 2 | // AppDelegate.swift 3 | // Pytorch-CoreML-Spectrogram 4 | // 5 | // Created by Gerald on 5/31/20. 6 | // Copyright © 2020 Gerald. All rights reserved. 7 | // 8 | 9 | import UIKit 10 | 11 | @UIApplicationMain 12 | class AppDelegate: UIResponder, UIApplicationDelegate { 13 | 14 | func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool { 15 | // Override point for customization after application launch. 16 | return true 17 | } 18 | 19 | // MARK: UISceneSession Lifecycle 20 | 21 | func application(_ application: UIApplication, configurationForConnecting connectingSceneSession: UISceneSession, options: UIScene.ConnectionOptions) -> UISceneConfiguration { 22 | // Called when a new scene session is being created. 23 | // Use this method to select a configuration to create the new scene with. 24 | return UISceneConfiguration(name: "Default Configuration", sessionRole: connectingSceneSession.role) 25 | } 26 | 27 | func application(_ application: UIApplication, didDiscardSceneSessions sceneSessions: Set) { 28 | // Called when the user discards a scene session. 29 | // If any sessions were discarded while the application was not running, this will be called shortly after application:didFinishLaunchingWithOptions. 30 | // Use this method to release any resources that were specific to the discarded scenes, as they will not return. 31 | } 32 | 33 | 34 | } 35 | 36 | -------------------------------------------------------------------------------- /Pytorch-CoreML-Spectrogram/Assets.xcassets/AppIcon.appiconset/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "images" : [ 3 | { 4 | "idiom" : "iphone", 5 | "scale" : "2x", 6 | "size" : "20x20" 7 | }, 8 | { 9 | "idiom" : "iphone", 10 | "scale" : "3x", 11 | "size" : "20x20" 12 | }, 13 | { 14 | "idiom" : "iphone", 15 | "scale" : "2x", 16 | "size" : "29x29" 17 | }, 18 | { 19 | "idiom" : "iphone", 20 | "scale" : "3x", 21 | "size" : "29x29" 22 | }, 23 | { 24 | "idiom" : "iphone", 25 | "scale" : "2x", 26 | "size" : "40x40" 27 | }, 28 | { 29 | "idiom" : "iphone", 30 | "scale" : "3x", 31 | "size" : "40x40" 32 | }, 33 | { 34 | "idiom" : "iphone", 35 | "scale" : "2x", 36 | "size" : "60x60" 37 | }, 38 | { 39 | "idiom" : "iphone", 40 | "scale" : "3x", 41 | "size" : "60x60" 42 | }, 43 | { 44 | "idiom" : "ipad", 45 | "scale" : "1x", 46 | "size" : "20x20" 47 | }, 48 | { 49 | "idiom" : "ipad", 50 | "scale" : "2x", 51 | "size" : "20x20" 52 | }, 53 | { 54 | "idiom" : "ipad", 55 | "scale" : "1x", 56 | "size" : "29x29" 57 | }, 58 | { 59 | "idiom" : "ipad", 60 | "scale" : "2x", 61 | "size" : "29x29" 62 | }, 63 | { 64 | "idiom" : "ipad", 65 | "scale" : "1x", 66 | "size" : "40x40" 67 | }, 68 | { 69 | "idiom" : "ipad", 70 | "scale" : "2x", 71 | "size" : "40x40" 72 | }, 73 | { 74 | "idiom" : "ipad", 75 | "scale" : "1x", 76 | "size" : "76x76" 77 | }, 78 | { 79 | "idiom" : "ipad", 80 | "scale" : "2x", 81 | "size" : "76x76" 82 | }, 83 | { 84 | "idiom" : "ipad", 85 | "scale" : "2x", 86 | "size" : "83.5x83.5" 87 | }, 88 | { 89 | "idiom" : "ios-marketing", 90 | "scale" : "1x", 91 | "size" : "1024x1024" 92 | } 93 | ], 94 | "info" : { 95 | "author" : "xcode", 96 | "version" : 1 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /Pytorch-CoreML-Spectrogram/Assets.xcassets/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "info" : { 3 | "author" : "xcode", 4 | "version" : 1 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /Pytorch-CoreML-Spectrogram/Base.lproj/LaunchScreen.storyboard: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /Pytorch-CoreML-Spectrogram/Base.lproj/Main.storyboard: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /Pytorch-CoreML-Spectrogram/ConvertSpectrogram.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ConvertSpectrogram.swift 3 | // CoreML_Audio_Analysis 4 | // 5 | // Created by Gerald on 5/30/20. 6 | // Copyright © 2020 Gerald. All rights reserved. 7 | // 8 | // from https://github.com/tucan9389/DepthPrediction-CoreML/blob/master/DepthPrediction-CoreML/heatmapProcessor.swift 9 | import CoreML 10 | 11 | class SpectrogramConverter { 12 | func convertTo2DArray(from spectrogram: MLMultiArray) -> Array> { 13 | guard spectrogram.shape.count == 4 else { 14 | print("spectrogram's shape is invalid. \(spectrogram.shape)") 15 | return [] 16 | } 17 | let spectrogram_w = spectrogram.shape[2].intValue 18 | let spectrogram_h = spectrogram.shape[3].intValue 19 | 20 | var converted_spectrogram: Array> = Array(repeating: Array(repeating: 0.0, count: spectrogram_h), count: spectrogram_w) 21 | 22 | var minimumValue: Float32 = Float32.greatestFiniteMagnitude 23 | var maximumValue: Float32 = -Float32.greatestFiniteMagnitude 24 | 25 | for i in 0.. 0 else { continue } 30 | converted_spectrogram[i][spectrogram_h-j-1] = val // origin at bottom 31 | 32 | if minimumValue > val { 33 | minimumValue = val 34 | } 35 | if maximumValue < val { 36 | maximumValue = val 37 | } 38 | } 39 | } 40 | 41 | maximumValue = max( -15.0, maximumValue ) // for improved contrast on device 42 | var minmaxGap = maximumValue - minimumValue 43 | 44 | // print( "minmax \(minmaxGap) \(maximumValue) \(minimumValue)") 45 | 46 | if ( minmaxGap == 0 ) { 47 | minmaxGap = 1.0 48 | } 49 | for i in 0..>? = nil { 17 | didSet { 18 | DispatchQueue.main.async { 19 | self.setNeedsDisplay() 20 | } 21 | } 22 | } 23 | 24 | override func draw(_ rect: CGRect) { 25 | 26 | if let ctx = UIGraphicsGetCurrentContext() { 27 | 28 | ctx.clear(rect); 29 | 30 | guard let spectrogram = self.spectrogram else { return } 31 | 32 | let size = self.bounds.size 33 | let spectrogram_w = spectrogram.count 34 | let spectrogram_h = spectrogram.first?.count ?? 0 35 | let w = size.width / CGFloat(spectrogram_w) 36 | let h = size.height / CGFloat(spectrogram_h) 37 | 38 | for j in 0.. 1 { 43 | alpha = 1 44 | } else if alpha < 0 { 45 | alpha = 0 46 | } 47 | 48 | let rect: CGRect = CGRect(x: CGFloat(i) * w, y: CGFloat(j) * h, width: w, height: h) 49 | 50 | // color 51 | let hue: CGFloat = (1.0-alpha) * (240.0 / 360.0) 52 | let color: UIColor = UIColor(hue: hue, saturation: 1, brightness: 1, alpha: 0.94) 53 | 54 | // gray 55 | // let color: UIColor = UIColor(white: 1-alpha, alpha: 1) 56 | 57 | let bpath: UIBezierPath = UIBezierPath(rect: rect) 58 | 59 | color.set() 60 | bpath.fill() 61 | } 62 | } 63 | } 64 | } // end of draw(rect:) 65 | 66 | } 67 | -------------------------------------------------------------------------------- /Pytorch-CoreML-Spectrogram/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | $(DEVELOPMENT_LANGUAGE) 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | $(PRODUCT_BUNDLE_PACKAGE_TYPE) 17 | CFBundleShortVersionString 18 | 1.0 19 | CFBundleVersion 20 | 1 21 | LSRequiresIPhoneOS 22 | 23 | NSMicrophoneUsageDescription 24 | Microphone for capturing audio. 25 | UIApplicationSceneManifest 26 | 27 | UIApplicationSupportsMultipleScenes 28 | 29 | UISceneConfigurations 30 | 31 | UIWindowSceneSessionRoleApplication 32 | 33 | 34 | UISceneConfigurationName 35 | Default Configuration 36 | UISceneDelegateClassName 37 | $(PRODUCT_MODULE_NAME).SceneDelegate 38 | UISceneStoryboardFile 39 | Main 40 | 41 | 42 | 43 | 44 | UILaunchStoryboardName 45 | LaunchScreen 46 | UIMainStoryboardFile 47 | Main 48 | UIRequiredDeviceCapabilities 49 | 50 | armv7 51 | 52 | UISupportedInterfaceOrientations 53 | 54 | UIInterfaceOrientationPortrait 55 | UIInterfaceOrientationLandscapeLeft 56 | UIInterfaceOrientationLandscapeRight 57 | 58 | UISupportedInterfaceOrientations~ipad 59 | 60 | UIInterfaceOrientationPortrait 61 | UIInterfaceOrientationPortraitUpsideDown 62 | UIInterfaceOrientationLandscapeLeft 63 | UIInterfaceOrientationLandscapeRight 64 | 65 | 66 | 67 | -------------------------------------------------------------------------------- /Pytorch-CoreML-Spectrogram/SceneDelegate.swift: -------------------------------------------------------------------------------- 1 | // 2 | // SceneDelegate.swift 3 | // Pytorch-CoreML-Spectrogram 4 | // 5 | // Created by Gerald on 5/31/20. 6 | // Copyright © 2020 Gerald. All rights reserved. 7 | // 8 | 9 | import UIKit 10 | 11 | class SceneDelegate: UIResponder, UIWindowSceneDelegate { 12 | 13 | var window: UIWindow? 14 | 15 | 16 | func scene(_ scene: UIScene, willConnectTo session: UISceneSession, options connectionOptions: UIScene.ConnectionOptions) { 17 | // Use this method to optionally configure and attach the UIWindow `window` to the provided UIWindowScene `scene`. 18 | // If using a storyboard, the `window` property will automatically be initialized and attached to the scene. 19 | // This delegate does not imply the connecting scene or session are new (see `application:configurationForConnectingSceneSession` instead). 20 | guard let _ = (scene as? UIWindowScene) else { return } 21 | } 22 | 23 | func sceneDidDisconnect(_ scene: UIScene) { 24 | // Called as the scene is being released by the system. 25 | // This occurs shortly after the scene enters the background, or when its session is discarded. 26 | // Release any resources associated with this scene that can be re-created the next time the scene connects. 27 | // The scene may re-connect later, as its session was not neccessarily discarded (see `application:didDiscardSceneSessions` instead). 28 | } 29 | 30 | func sceneDidBecomeActive(_ scene: UIScene) { 31 | // Called when the scene has moved from an inactive state to an active state. 32 | // Use this method to restart any tasks that were paused (or not yet started) when the scene was inactive. 33 | } 34 | 35 | func sceneWillResignActive(_ scene: UIScene) { 36 | // Called when the scene will move from an active state to an inactive state. 37 | // This may occur due to temporary interruptions (ex. an incoming phone call). 38 | } 39 | 40 | func sceneWillEnterForeground(_ scene: UIScene) { 41 | // Called as the scene transitions from the background to the foreground. 42 | // Use this method to undo the changes made on entering the background. 43 | } 44 | 45 | func sceneDidEnterBackground(_ scene: UIScene) { 46 | // Called as the scene transitions from the foreground to the background. 47 | // Use this method to save data, release shared resources, and store enough scene-specific state information 48 | // to restore the scene back to its current state. 49 | } 50 | 51 | 52 | } 53 | 54 | -------------------------------------------------------------------------------- /Pytorch-CoreML-Spectrogram/ViewController.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ViewController.swift 3 | // Pytorch-CoreML-Spectrogram 4 | // 5 | // Created by Gerald on 5/24/20. 6 | // Copyright © 2020 Gerald. All rights reserved. 7 | // 8 | 9 | // reference: https://developer.apple.com/documentation/speech/recognizing_speech_in_live_audio 10 | 11 | import UIKit 12 | import AVKit 13 | import CoreML 14 | 15 | class ViewController: UIViewController { 16 | 17 | @IBOutlet weak var drawSpecView: DrawSpecView! 18 | 19 | // set up for audio 20 | private let audioEngine = AVAudioEngine() 21 | // specify the audio samples format the CoreML model 22 | private let desiredAudioFormat: AVAudioFormat = { 23 | let avAudioChannelLayout = AVAudioChannelLayout(layoutTag: kAudioChannelLayoutTag_Mono)! 24 | return AVAudioFormat( 25 | commonFormat: .pcmFormatFloat32, 26 | sampleRate: Double( 32000 ), // as specified when creating the Pytorch model 27 | interleaved: true, 28 | channelLayout: avAudioChannelLayout 29 | ) 30 | }() 31 | 32 | // create a queue to do analysis on a separate thread 33 | private let analysisQueue = DispatchQueue(label: "com.myco.AnalysisQueue") 34 | 35 | // instantiate our model 36 | 37 | 38 | var model : wave__melspec? = nil 39 | typealias NetworkInput = wave__melspecInput 40 | typealias NetworkOutput = wave__melspecOutput 41 | 42 | // semaphore to protect the CoreML model 43 | let semaphore = DispatchSemaphore(value: 1) 44 | 45 | // for rendering our spectrogram 46 | let spec_converter = SpectrogramConverter() 47 | 48 | override func viewDidLoad() { 49 | super.viewDidLoad() 50 | // Do any additional setup after loading the view. 51 | load_model() 52 | } 53 | 54 | override func viewDidAppear(_ animated: Bool) { 55 | startAudioEngine() 56 | } 57 | 58 | private func load_model() { 59 | let config = MLModelConfiguration() 60 | config.computeUnits = .all 61 | do { 62 | self.model = try wave__melspec( configuration: config ) 63 | } catch { 64 | fatalError( "unable to load ML model!" ) 65 | } 66 | 67 | } 68 | 69 | // audio capture via microphone 70 | private func startAudioEngine() { 71 | 72 | // https://stackoverflow.com/questions/48831411/converting-avaudiopcmbuffer-to-another-avaudiopcmbuffer 73 | // more info at https://medium.com/@prianka.kariat/changing-the-format-of-ios-avaudioengine-mic-input-c183459cab63 74 | 75 | let inputNode = audioEngine.inputNode 76 | let originalAudioFormat: AVAudioFormat = inputNode.inputFormat(forBus: 0) 77 | // input is in 44.1kHz, 2 channels 78 | 79 | let downSampleRate: Double = desiredAudioFormat.sampleRate 80 | let ratio: Float = Float(originalAudioFormat.sampleRate)/Float(downSampleRate) 81 | 82 | // print( "input sr: \(originalAudioFormat.sampleRate) ch: \(originalAudioFormat.channelCount)" ) 83 | // print( "desired sr: \(desiredAudioFormat.sampleRate) ch: \(desiredAudioFormat.channelCount) ratio \(ratio)" ) 84 | 85 | guard let formatConverter = AVAudioConverter(from:originalAudioFormat, to: desiredAudioFormat) else { 86 | fatalError( "unable to create formatConverter!" ) 87 | } 88 | 89 | // start audio capture by installing a Tap 90 | inputNode.installTap( 91 | onBus: 0, 92 | bufferSize: AVAudioFrameCount(downSampleRate * 2), 93 | format: originalAudioFormat 94 | ) { 95 | (buffer: AVAudioPCMBuffer!, time: AVAudioTime!) in 96 | // closure to process the captured audio, buffer size dictated by AudioEngine/device 97 | 98 | let capacity = UInt32(Float(buffer.frameCapacity)/ratio) 99 | 100 | guard let pcmBuffer = AVAudioPCMBuffer( 101 | pcmFormat: self.desiredAudioFormat, 102 | frameCapacity: capacity) else { 103 | print("Failed to create pcm buffer") 104 | return 105 | } 106 | 107 | let inputBlock: AVAudioConverterInputBlock = { inNumPackets, outStatus in 108 | outStatus.pointee = AVAudioConverterInputStatus.haveData 109 | return buffer 110 | } 111 | 112 | // convert input samples into the one our model needs 113 | var error: NSError? 114 | let status: AVAudioConverterOutputStatus = formatConverter.convert( 115 | to: pcmBuffer, 116 | error: &error, 117 | withInputFrom: inputBlock) 118 | 119 | if status == .error { 120 | if let unwrappedError: NSError = error { 121 | print("Error \(unwrappedError)") 122 | } 123 | return 124 | } 125 | 126 | // we now have the audio in mono, 32000 sample rate the CoreML model needs 127 | // convert audio samples into MLMultiArray format for CoreML models 128 | let channelData = pcmBuffer.floatChannelData 129 | let output_samples = Int(pcmBuffer.frameLength) 130 | let channelDataPointer = channelData!.pointee 131 | 132 | // print( "converted from \(buffer.frameLength) to len \(output_samples) val[0] \(channelDataPointer[0]) \(channelDataPointer[output_samples-1])" ) 133 | 134 | let audioData = try! MLMultiArray( shape: [1, output_samples as NSNumber], dataType: .float32 ) 135 | let ptr = UnsafeMutablePointer(OpaquePointer(audioData.dataPointer)) 136 | for i in 0.. 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | $(DEVELOPMENT_LANGUAGE) 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | $(PRODUCT_BUNDLE_PACKAGE_TYPE) 17 | CFBundleShortVersionString 18 | 1.0 19 | CFBundleVersion 20 | 1 21 | 22 | 23 | -------------------------------------------------------------------------------- /Pytorch-CoreML-SpectrogramTests/Pytorch_CoreML_SpectrogramTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Pytorch_CoreML_SpectrogramTests.swift 3 | // Pytorch-CoreML-SpectrogramTests 4 | // 5 | // Created by Gerald on 5/31/20. 6 | // Copyright © 2020 Gerald. All rights reserved. 7 | // 8 | 9 | import XCTest 10 | import AVFoundation 11 | import CoreML 12 | 13 | @testable import Pytorch_CoreML_Spectrogram 14 | 15 | class Pytorch_CoreML_SpectrogramTests: XCTestCase { 16 | 17 | override func setUpWithError() throws { 18 | // Put setup code here. This method is called before the invocation of each test method in the class. 19 | } 20 | 21 | override func tearDownWithError() throws { 22 | // Put teardown code here. This method is called after the invocation of each test method in the class. 23 | } 24 | 25 | func test_wav__spectrogram() { 26 | // instantiate our CoreML model 27 | let model = wave__melspec() 28 | typealias NetworkInput = wave__melspecInput 29 | typealias NetworkOutput = wave__melspecOutput 30 | 31 | // read in the expected model output from JSON 32 | let bundle = Bundle(for: Pytorch_CoreML_SpectrogramTests.self) 33 | let path = bundle.path(forResource: "melspec_out.bonjour", ofType: "json") 34 | let data = try! Data(contentsOf: URL(fileURLWithPath: path!)) 35 | let expected_spectrogram: [[NSNumber]] = try! JSONSerialization.jsonObject(with: data) as! [[NSNumber]] 36 | 37 | print( "expected spec: \(expected_spectrogram.count) \(expected_spectrogram[0].count)") 38 | 39 | // read the input shapes of our model 40 | let inputName = "input.1" 41 | let inputConstraint: MLFeatureDescription = model.model.modelDescription 42 | .inputDescriptionsByName[inputName]! 43 | 44 | let input_batch_size: Int = Int(truncating: (inputConstraint.multiArrayConstraint?.shape[0])! ) 45 | let input_samples: Int = Int(truncating: (inputConstraint.multiArrayConstraint?.shape[1])! ) 46 | // print( "inputCon: \(input_batch_size) \(input_samples)") 47 | 48 | // read the same WAV file used in PyTorch 49 | let testBundle = Bundle(for: type(of: self)) 50 | guard let filePath = testBundle.path(forResource: "bonjour", ofType: "wav") else { 51 | fatalError( "error opening bonjour.wav" ) 52 | } 53 | 54 | // Read wav file 55 | var wav_file:AVAudioFile! 56 | do { 57 | let fileUrl = URL(fileURLWithPath: filePath ) 58 | wav_file = try AVAudioFile( forReading:fileUrl ) 59 | } catch { 60 | fatalError("Could not open wav file.") 61 | } 62 | 63 | let buffer = AVAudioPCMBuffer(pcmFormat: wav_file.processingFormat, 64 | frameCapacity: UInt32(wav_file.length)) 65 | do { 66 | try wav_file.read(into:buffer!) 67 | } catch{ 68 | fatalError("Error reading buffer.") 69 | } 70 | 71 | guard let bufferData = buffer?.floatChannelData![0] else { 72 | fatalError("Can not get a float handle to buffer") 73 | } 74 | 75 | 76 | // allocate a ML Array & copy samples over 77 | let array_shape = [input_batch_size as NSNumber, input_samples as NSNumber] 78 | let audioData = try! MLMultiArray(shape: array_shape, dataType: MLMultiArrayDataType.float32 ) 79 | let ptr = UnsafeMutablePointer(OpaquePointer(audioData.dataPointer)) 80 | for i in 0..] } 85 | let inputs: [String: Any] = [ 86 | inputName: audioData, 87 | ] 88 | // container for ML Model inputs 89 | let provider = try! MLDictionaryFeatureProvider(dictionary: inputs) 90 | 91 | // Send the wavefor samples into the model to generate the Spectrogram 92 | let raw_outputs = try! model.model.prediction(from: provider) 93 | 94 | // convert raw dictionary into our model's output object 95 | let outputs = NetworkOutput( features: raw_outputs ) 96 | // the output we're interested in is "_25" 97 | let output_spectrogram: MLMultiArray = outputs._25 98 | print( "outputs: \(output_spectrogram.shape)") // [1, 1, 41, 64] 99 | 100 | // sanity check the shapes of our output 101 | XCTAssertTrue( Int( truncating: output_spectrogram.shape[2] ) == expected_spectrogram.count, 102 | "incorrect shape[2]! \(output_spectrogram.shape[2]) \(expected_spectrogram.count)" ) 103 | XCTAssertTrue( Int( truncating: output_spectrogram.shape[3] ) == expected_spectrogram[0].count, 104 | "incorrect shape[3]! \(output_spectrogram.shape[3]) \(expected_spectrogram[0].count)" ) 105 | 106 | // compare every element of our spectrogram with those from the JSON file 107 | for i in 0..(OpaquePointer(spectrogram.dataPointer)) 134 | for i in 0..= Float32(0.0), "converted spec min incorrect!" ) 150 | XCTAssertTrue( converted_spec[0].max()! <= Float32(1.0), "converted spec max incorrect!" ) 151 | 152 | } 153 | 154 | func test_inference_time() throws { 155 | // This is an example of a performance test case. 156 | let model = wave__melspec() 157 | 158 | let array_shape: [NSNumber] = [1, 12800] 159 | let audioData = try! MLMultiArray(shape: array_shape, dataType: MLMultiArrayDataType.float32 ) 160 | let inputs: [String: Any] = [ 161 | "input.1": audioData, 162 | ] 163 | // container for ML Model inputs 164 | let provider = try! MLDictionaryFeatureProvider(dictionary: inputs) 165 | 166 | self.measure { 167 | // Put the code you want to measure the time of here. 168 | let N = 100 169 | let start_time = CACurrentMediaTime() 170 | let options = MLPredictionOptions() 171 | // options.usesCPUOnly = true 172 | for _ in 0.. 7 | -------------------------------------------------------------------------------- /python/model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | import torchlibrosa 5 | 6 | class WaveToLogmelSpectrogram(nn.Module): 7 | 8 | def __init__( 9 | self, 10 | sample_rate=32000, 11 | n_fft=1024, 12 | hop_length=320, 13 | mel_bins=64, 14 | fmin=50, 15 | fmax=14000, 16 | ): 17 | super(WaveToLogmelSpectrogram, self).__init__() 18 | 19 | window = 'hann' 20 | center = True 21 | pad_mode = 'reflect' 22 | ref = 1.0 23 | amin = 1e-10 24 | top_db = None 25 | 26 | self.spec_extractor = torchlibrosa.stft.Spectrogram( 27 | n_fft=n_fft, 28 | hop_length=hop_length, 29 | win_length=n_fft, 30 | window=window, 31 | center=center, 32 | pad_mode=pad_mode, 33 | freeze_parameters=True 34 | ) 35 | 36 | self.logmel_extractor = torchlibrosa.stft.LogmelFilterBank( 37 | sr=sample_rate, 38 | n_fft=n_fft, 39 | n_mels=mel_bins, 40 | fmin=fmin, 41 | fmax=fmax, 42 | ref=ref, 43 | amin=amin, 44 | top_db=top_db, 45 | freeze_parameters=True 46 | ) 47 | 48 | 49 | self.input_name = 'input.1' 50 | self.output_name = '25' # looked up via Netron 51 | 52 | def forward( self, x ): 53 | x = self.spec_extractor( x ) 54 | return self.logmel_extractor( x ) 55 | 56 | def gen_torch_output( self, sample_input ): 57 | self.eval() 58 | with torch.no_grad(): 59 | torch_output = self( torch.from_numpy( sample_input ) ) 60 | torch_output = torch_output.cpu().detach().numpy() 61 | return torch_output 62 | 63 | def convert_to_onnx( self, filename_onnx, sample_input ): 64 | 65 | input_names = [ self.input_name ] 66 | output_names = [ self.output_name ] 67 | 68 | torch.onnx.export( 69 | self, 70 | torch.from_numpy( sample_input ), 71 | filename_onnx, 72 | input_names=input_names, 73 | output_names=output_names, 74 | # operator_export_type=OperatorExportTypes.ONNX 75 | ) 76 | 77 | def gen_onnx_output( self, filename_onnx, sample_input ): 78 | import onnxruntime 79 | 80 | session = onnxruntime.InferenceSession( filename_onnx, None) 81 | 82 | input_name = session.get_inputs()[0].name 83 | # output_names = [ item.name for item in session.get_outputs() ] 84 | 85 | raw_result = session.run([], {input_name: sample_input}) 86 | 87 | return raw_result[0] 88 | 89 | 90 | def convert_to_coreml( self, fn_mlmodel, sample_input, plot_specs=True ): 91 | import onnx 92 | import onnx_coreml 93 | 94 | torch_output = self.gen_torch_output( sample_input ) 95 | # print( 'torch_output: shape %s\nsample %s ' % ( torch_output.shape, torch_output[:, :, :3, :3] ) ) 96 | print( 'torch_output: shape ', ( torch_output.shape ) ) # (1, 1, 28, 64) 97 | 98 | # first convert to ONNX 99 | filename_onnx = '/tmp/wave__melspec_model.onnx' 100 | model.convert_to_onnx( filename_onnx, sample_input ) 101 | 102 | onnx_output = self.gen_onnx_output( filename_onnx, sample_input ) 103 | 104 | # set up for Core ML export 105 | convert_params = dict( 106 | predicted_feature_name = [], 107 | minimum_ios_deployment_target='13', 108 | ) 109 | 110 | mlmodel = onnx_coreml.convert( 111 | model=filename_onnx, 112 | **convert_params, 113 | ) 114 | 115 | assert mlmodel != None, 'CoreML Conversion failed' 116 | 117 | mlmodel.save( fn_mlmodel ) 118 | 119 | model_inputs = { 120 | self.input_name : sample_input 121 | } 122 | # do forward pass 123 | mlmodel_outputs = mlmodel.predict(model_inputs, useCPUOnly=True) 124 | 125 | # fetch the spectrogram from output dictionary 126 | mlmodel_output = mlmodel_outputs[ self.output_name ] 127 | # print( 'mlmodel_output: shape %s \nsample %s ' % ( mlmodel_output.shape, mlmodel_output[:,:,:3, :3] ) ) 128 | print( 'mlmodel_output: shape ', ( mlmodel_output.shape ) ) 129 | 130 | assert torch_output.shape == mlmodel_output.shape 131 | 132 | print( 'sum diff ', np.sum( np.abs( torch_output-mlmodel_output) ), np.max( np.abs( torch_output-mlmodel_output) ) ) 133 | assert np.allclose( torch_output, mlmodel_output, atol=2, rtol=2 ) # big tolerance due to log scale 134 | 135 | print( 'Successful MLModel conversion to %s!' % fn_mlmodel ) 136 | 137 | if plot_specs: 138 | plot_spectrograms( torch_output, onnx_output, mlmodel_output ) 139 | 140 | return mlmodel_output 141 | 142 | def load_wav_file( fn_wav ): 143 | import soundfile as sf 144 | 145 | data, samplerate = sf.read( fn_wav ) 146 | return data 147 | 148 | def save_ml_model_output_as_json( fn_output, mlmodel_output ): 149 | import json 150 | with open( fn_output, 'w' ) as fp: 151 | json.dump( mlmodel_output.tolist(), fp ) 152 | 153 | def plot_spectrograms( torch_output, onnx_output, mlmodel_output ): 154 | import matplotlib 155 | matplotlib.use('TkAgg') 156 | import matplotlib.pyplot as plt 157 | 158 | def spec__image( spectrogram ): 159 | return spectrogram[0,0,...].T 160 | 161 | fig = plt.figure( figsize=(8,8) ) 162 | 163 | a = fig.add_subplot(3, 1, 1) 164 | a.imshow( spec__image( torch_output ), aspect='auto', origin='lower', cmap='jet') 165 | a.set_title( 'Pytorch' ) 166 | a.tick_params( axis='x', which='both', bottom=False, top=False, labelbottom=False) 167 | 168 | a = fig.add_subplot(3, 1, 2) 169 | a.imshow( spec__image( onnx_output ), aspect='auto', origin='lower', cmap='jet') 170 | a.set_title( 'ONNX' ) 171 | a.tick_params( axis='x', which='both', bottom=False, top=False, labelbottom=False) 172 | 173 | a = fig.add_subplot(3, 1, 3) 174 | a.imshow( spec__image( mlmodel_output ), aspect='auto', origin='lower', cmap='jet') 175 | a.set_title( 'Core ML' ) 176 | 177 | plt.show() 178 | 179 | 180 | if __name__ == '__main__': 181 | import sys 182 | fn_sample_wav = sys.argv[1] 183 | fn_mlmodel = sys.argv[2] 184 | fn_model_output = sys.argv[3] 185 | 186 | num_samples = 12800 # hack, load samples same length as iOS audio buffer 187 | waveform = load_wav_file( fn_sample_wav ) 188 | sample_input = waveform[ :num_samples ].astype( dtype=np.float32 ) 189 | # shape: (samples_num,) 190 | 191 | # add batch dimension 192 | sample_input = np.expand_dims( sample_input, axis=0 ) 193 | # shape: (batch_size, samples_num) 194 | 195 | model = WaveToLogmelSpectrogram() 196 | 197 | # filename_onnx = '/tmp/wave__sound_events_model.onnx' 198 | # model.convert_to_onnx( filename_onnx, sample_input ) 199 | 200 | mlmodel_output = model.convert_to_coreml( fn_mlmodel, sample_input ) 201 | # shape: ?? 202 | 203 | save_ml_model_output_as_json( fn_model_output, mlmodel_output[0,0,...]) 204 | 205 | ''' 206 | # example command: 207 | python model.py ../Pytorch-CoreML-SpectrogramTests/bonjour.wav /tmp/wave__melspec.mlmodel /tmp/melspec_out.bonjour.json 208 | ''' 209 | 210 | -------------------------------------------------------------------------------- /python/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | librosa 3 | # torchlibrosa 4 | -e git://github.com/ml-illustrated/torchlibrosa.git#egg=torchlibrosa 5 | SoundFile 6 | onnx 7 | onnxruntime 8 | onnx-coreml 9 | coremltools 10 | matplotlib 11 | numba==0.48.* 12 | --------------------------------------------------------------------------------