├── .gitignore
├── .gitmodules
├── Pytorch-CoreML-Sound-Classification.xcodeproj
    ├── project.pbxproj
    ├── project.xcworkspace
    │   ├── contents.xcworkspacedata
    │   └── xcshareddata
    │   │   └── IDEWorkspaceChecks.plist
    ├── xcshareddata
    │   └── xcschemes
    │   │   └── Pytorch-CoreML-Sound-Classification.xcscheme
    └── xcuserdata
    │   └── gerald.xcuserdatad
    │       ├── xcdebugger
    │           └── Breakpoints_v2.xcbkptlist
    │       └── xcschemes
    │           └── xcschememanagement.plist
├── Pytorch-CoreML-Sound-Classification
    ├── AppDelegate.swift
    ├── Assets.xcassets
    │   ├── AppIcon.appiconset
    │   │   └── Contents.json
    │   └── Contents.json
    ├── Base.lproj
    │   ├── LaunchScreen.storyboard
    │   └── Main.storyboard
    ├── ConvertSpectrogram.swift
    ├── DrawSpecView.swift
    ├── Info.plist
    ├── PANN.mlmodel
    ├── PANN_labels.json
    ├── SceneDelegate.swift
    └── ViewController.swift
├── Pytorch-CoreML-Sound-ClassificationTests
    ├── Info.plist
    ├── PANN_out.ring_hello.json
    ├── Pytorch_CoreML_Sound_ClassificationTests.swift
    └── ring_hello.wav
├── README.md
└── python
    ├── export.log
    ├── export.py
    └── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | *.pyc
3 | *~
4 | xcuserdata
5 | *.pth


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "python/audioset_tagging_cnn"]
2 | 	path = python/audioset_tagging_cnn
3 | 	url = https://github.com/qiuqiangkong/audioset_tagging_cnn
4 | 


--------------------------------------------------------------------------------
/Pytorch-CoreML-Sound-Classification.xcodeproj/project.pbxproj:
--------------------------------------------------------------------------------
  1 | // !$*UTF8*$!
  2 | {
  3 | 	archiveVersion = 1;
  4 | 	classes = {
  5 | 	};
  6 | 	objectVersion = 50;
  7 | 	objects = {
  8 | 
  9 | /* Begin PBXBuildFile section */
 10 | 		741B2DA32496E94800020939 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 741B2DA22496E94800020939 /* AppDelegate.swift */; };
 11 | 		741B2DA52496E94800020939 /* SceneDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 741B2DA42496E94800020939 /* SceneDelegate.swift */; };
 12 | 		741B2DA72496E94800020939 /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 741B2DA62496E94800020939 /* ViewController.swift */; };
 13 | 		741B2DAA2496E94800020939 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 741B2DA82496E94800020939 /* Main.storyboard */; };
 14 | 		741B2DAC2496E94B00020939 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 741B2DAB2496E94B00020939 /* Assets.xcassets */; };
 15 | 		741B2DAF2496E94B00020939 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 741B2DAD2496E94B00020939 /* LaunchScreen.storyboard */; };
 16 | 		741B2DBA2496E94B00020939 /* Pytorch_CoreML_Sound_ClassificationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 741B2DB92496E94B00020939 /* Pytorch_CoreML_Sound_ClassificationTests.swift */; };
 17 | 		741B2DCC2496EC2700020939 /* DrawSpecView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 741B2DCB2496EC2700020939 /* DrawSpecView.swift */; };
 18 | 		741B2DCE2496ED5100020939 /* ConvertSpectrogram.swift in Sources */ = {isa = PBXBuildFile; fileRef = 741B2DCD2496ED5100020939 /* ConvertSpectrogram.swift */; };
 19 | 		741B2DCF2496ED5100020939 /* ConvertSpectrogram.swift in Sources */ = {isa = PBXBuildFile; fileRef = 741B2DCD2496ED5100020939 /* ConvertSpectrogram.swift */; };
 20 | 		74C725A024A982D40010AC26 /* ring_hello.wav in Resources */ = {isa = PBXBuildFile; fileRef = 74C7259F24A982D40010AC26 /* ring_hello.wav */; };
 21 | 		74C725A324A982E20010AC26 /* PANN.mlmodel in Sources */ = {isa = PBXBuildFile; fileRef = 74C725A124A982E20010AC26 /* PANN.mlmodel */; };
 22 | 		74C725A424A982E20010AC26 /* PANN.mlmodel in Sources */ = {isa = PBXBuildFile; fileRef = 74C725A124A982E20010AC26 /* PANN.mlmodel */; };
 23 | 		74C725A524A982E20010AC26 /* PANN_labels.json in Resources */ = {isa = PBXBuildFile; fileRef = 74C725A224A982E20010AC26 /* PANN_labels.json */; };
 24 | 		74C725A624A982E20010AC26 /* PANN_labels.json in Resources */ = {isa = PBXBuildFile; fileRef = 74C725A224A982E20010AC26 /* PANN_labels.json */; };
 25 | 		74C725A824A982ED0010AC26 /* PANN_out.ring_hello.json in Resources */ = {isa = PBXBuildFile; fileRef = 74C725A724A982ED0010AC26 /* PANN_out.ring_hello.json */; };
 26 | /* End PBXBuildFile section */
 27 | 
 28 | /* Begin PBXContainerItemProxy section */
 29 | 		741B2DB62496E94B00020939 /* PBXContainerItemProxy */ = {
 30 | 			isa = PBXContainerItemProxy;
 31 | 			containerPortal = 741B2D972496E94800020939 /* Project object */;
 32 | 			proxyType = 1;
 33 | 			remoteGlobalIDString = 741B2D9E2496E94800020939;
 34 | 			remoteInfo = "Pytorch-CoreML-Sound-Classification";
 35 | 		};
 36 | /* End PBXContainerItemProxy section */
 37 | 
 38 | /* Begin PBXFileReference section */
 39 | 		741B2D9F2496E94800020939 /* Pytorch-CoreML-Sound-Classification.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "Pytorch-CoreML-Sound-Classification.app"; sourceTree = BUILT_PRODUCTS_DIR; };
 40 | 		741B2DA22496E94800020939 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
 41 | 		741B2DA42496E94800020939 /* SceneDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SceneDelegate.swift; sourceTree = "<group>"; };
 42 | 		741B2DA62496E94800020939 /* ViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ViewController.swift; sourceTree = "<group>"; };
 43 | 		741B2DA92496E94800020939 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = "<group>"; };
 44 | 		741B2DAB2496E94B00020939 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
 45 | 		741B2DAE2496E94B00020939 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; };
 46 | 		741B2DB02496E94B00020939 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
 47 | 		741B2DB52496E94B00020939 /* Pytorch-CoreML-Sound-ClassificationTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = "Pytorch-CoreML-Sound-ClassificationTests.xctest"; sourceTree = BUILT_PRODUCTS_DIR; };
 48 | 		741B2DB92496E94B00020939 /* Pytorch_CoreML_Sound_ClassificationTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Pytorch_CoreML_Sound_ClassificationTests.swift; sourceTree = "<group>"; };
 49 | 		741B2DBB2496E94B00020939 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
 50 | 		741B2DCB2496EC2700020939 /* DrawSpecView.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = DrawSpecView.swift; sourceTree = "<group>"; };
 51 | 		741B2DCD2496ED5100020939 /* ConvertSpectrogram.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ConvertSpectrogram.swift; sourceTree = "<group>"; };
 52 | 		74C7259F24A982D40010AC26 /* ring_hello.wav */ = {isa = PBXFileReference; lastKnownFileType = audio.wav; path = ring_hello.wav; sourceTree = "<group>"; };
 53 | 		74C725A124A982E20010AC26 /* PANN.mlmodel */ = {isa = PBXFileReference; lastKnownFileType = file.mlmodel; path = PANN.mlmodel; sourceTree = "<group>"; };
 54 | 		74C725A224A982E20010AC26 /* PANN_labels.json */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.json; path = PANN_labels.json; sourceTree = "<group>"; };
 55 | 		74C725A724A982ED0010AC26 /* PANN_out.ring_hello.json */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.json; path = PANN_out.ring_hello.json; sourceTree = "<group>"; };
 56 | /* End PBXFileReference section */
 57 | 
 58 | /* Begin PBXFrameworksBuildPhase section */
 59 | 		741B2D9C2496E94800020939 /* Frameworks */ = {
 60 | 			isa = PBXFrameworksBuildPhase;
 61 | 			buildActionMask = 2147483647;
 62 | 			files = (
 63 | 			);
 64 | 			runOnlyForDeploymentPostprocessing = 0;
 65 | 		};
 66 | 		741B2DB22496E94B00020939 /* Frameworks */ = {
 67 | 			isa = PBXFrameworksBuildPhase;
 68 | 			buildActionMask = 2147483647;
 69 | 			files = (
 70 | 			);
 71 | 			runOnlyForDeploymentPostprocessing = 0;
 72 | 		};
 73 | /* End PBXFrameworksBuildPhase section */
 74 | 
 75 | /* Begin PBXGroup section */
 76 | 		741B2D962496E94800020939 = {
 77 | 			isa = PBXGroup;
 78 | 			children = (
 79 | 				741B2DA12496E94800020939 /* Pytorch-CoreML-Sound-Classification */,
 80 | 				741B2DB82496E94B00020939 /* Pytorch-CoreML-Sound-ClassificationTests */,
 81 | 				741B2DA02496E94800020939 /* Products */,
 82 | 			);
 83 | 			sourceTree = "<group>";
 84 | 		};
 85 | 		741B2DA02496E94800020939 /* Products */ = {
 86 | 			isa = PBXGroup;
 87 | 			children = (
 88 | 				741B2D9F2496E94800020939 /* Pytorch-CoreML-Sound-Classification.app */,
 89 | 				741B2DB52496E94B00020939 /* Pytorch-CoreML-Sound-ClassificationTests.xctest */,
 90 | 			);
 91 | 			name = Products;
 92 | 			sourceTree = "<group>";
 93 | 		};
 94 | 		741B2DA12496E94800020939 /* Pytorch-CoreML-Sound-Classification */ = {
 95 | 			isa = PBXGroup;
 96 | 			children = (
 97 | 				741B2DA22496E94800020939 /* AppDelegate.swift */,
 98 | 				741B2DA42496E94800020939 /* SceneDelegate.swift */,
 99 | 				741B2DA62496E94800020939 /* ViewController.swift */,
100 | 				741B2DCD2496ED5100020939 /* ConvertSpectrogram.swift */,
101 | 				741B2DCB2496EC2700020939 /* DrawSpecView.swift */,
102 | 				74C725A224A982E20010AC26 /* PANN_labels.json */,
103 | 				74C725A124A982E20010AC26 /* PANN.mlmodel */,
104 | 				741B2DA82496E94800020939 /* Main.storyboard */,
105 | 				741B2DAB2496E94B00020939 /* Assets.xcassets */,
106 | 				741B2DAD2496E94B00020939 /* LaunchScreen.storyboard */,
107 | 				741B2DB02496E94B00020939 /* Info.plist */,
108 | 			);
109 | 			path = "Pytorch-CoreML-Sound-Classification";
110 | 			sourceTree = "<group>";
111 | 		};
112 | 		741B2DB82496E94B00020939 /* Pytorch-CoreML-Sound-ClassificationTests */ = {
113 | 			isa = PBXGroup;
114 | 			children = (
115 | 				74C7259F24A982D40010AC26 /* ring_hello.wav */,
116 | 				74C725A724A982ED0010AC26 /* PANN_out.ring_hello.json */,
117 | 				741B2DB92496E94B00020939 /* Pytorch_CoreML_Sound_ClassificationTests.swift */,
118 | 				741B2DBB2496E94B00020939 /* Info.plist */,
119 | 			);
120 | 			path = "Pytorch-CoreML-Sound-ClassificationTests";
121 | 			sourceTree = "<group>";
122 | 		};
123 | /* End PBXGroup section */
124 | 
125 | /* Begin PBXNativeTarget section */
126 | 		741B2D9E2496E94800020939 /* Pytorch-CoreML-Sound-Classification */ = {
127 | 			isa = PBXNativeTarget;
128 | 			buildConfigurationList = 741B2DBE2496E94B00020939 /* Build configuration list for PBXNativeTarget "Pytorch-CoreML-Sound-Classification" */;
129 | 			buildPhases = (
130 | 				741B2D9B2496E94800020939 /* Sources */,
131 | 				741B2D9C2496E94800020939 /* Frameworks */,
132 | 				741B2D9D2496E94800020939 /* Resources */,
133 | 			);
134 | 			buildRules = (
135 | 			);
136 | 			dependencies = (
137 | 			);
138 | 			name = "Pytorch-CoreML-Sound-Classification";
139 | 			productName = "Pytorch-CoreML-Sound-Classification";
140 | 			productReference = 741B2D9F2496E94800020939 /* Pytorch-CoreML-Sound-Classification.app */;
141 | 			productType = "com.apple.product-type.application";
142 | 		};
143 | 		741B2DB42496E94B00020939 /* Pytorch-CoreML-Sound-ClassificationTests */ = {
144 | 			isa = PBXNativeTarget;
145 | 			buildConfigurationList = 741B2DC12496E94B00020939 /* Build configuration list for PBXNativeTarget "Pytorch-CoreML-Sound-ClassificationTests" */;
146 | 			buildPhases = (
147 | 				741B2DB12496E94B00020939 /* Sources */,
148 | 				741B2DB22496E94B00020939 /* Frameworks */,
149 | 				741B2DB32496E94B00020939 /* Resources */,
150 | 			);
151 | 			buildRules = (
152 | 			);
153 | 			dependencies = (
154 | 				741B2DB72496E94B00020939 /* PBXTargetDependency */,
155 | 			);
156 | 			name = "Pytorch-CoreML-Sound-ClassificationTests";
157 | 			productName = "Pytorch-CoreML-Sound-ClassificationTests";
158 | 			productReference = 741B2DB52496E94B00020939 /* Pytorch-CoreML-Sound-ClassificationTests.xctest */;
159 | 			productType = "com.apple.product-type.bundle.unit-test";
160 | 		};
161 | /* End PBXNativeTarget section */
162 | 
163 | /* Begin PBXProject section */
164 | 		741B2D972496E94800020939 /* Project object */ = {
165 | 			isa = PBXProject;
166 | 			attributes = {
167 | 				LastSwiftUpdateCheck = 1140;
168 | 				LastUpgradeCheck = 1140;
169 | 				ORGANIZATIONNAME = Gerald;
170 | 				TargetAttributes = {
171 | 					741B2D9E2496E94800020939 = {
172 | 						CreatedOnToolsVersion = 11.4.1;
173 | 					};
174 | 					741B2DB42496E94B00020939 = {
175 | 						CreatedOnToolsVersion = 11.4.1;
176 | 						TestTargetID = 741B2D9E2496E94800020939;
177 | 					};
178 | 				};
179 | 			};
180 | 			buildConfigurationList = 741B2D9A2496E94800020939 /* Build configuration list for PBXProject "Pytorch-CoreML-Sound-Classification" */;
181 | 			compatibilityVersion = "Xcode 9.3";
182 | 			developmentRegion = en;
183 | 			hasScannedForEncodings = 0;
184 | 			knownRegions = (
185 | 				en,
186 | 				Base,
187 | 			);
188 | 			mainGroup = 741B2D962496E94800020939;
189 | 			productRefGroup = 741B2DA02496E94800020939 /* Products */;
190 | 			projectDirPath = "";
191 | 			projectRoot = "";
192 | 			targets = (
193 | 				741B2D9E2496E94800020939 /* Pytorch-CoreML-Sound-Classification */,
194 | 				741B2DB42496E94B00020939 /* Pytorch-CoreML-Sound-ClassificationTests */,
195 | 			);
196 | 		};
197 | /* End PBXProject section */
198 | 
199 | /* Begin PBXResourcesBuildPhase section */
200 | 		741B2D9D2496E94800020939 /* Resources */ = {
201 | 			isa = PBXResourcesBuildPhase;
202 | 			buildActionMask = 2147483647;
203 | 			files = (
204 | 				741B2DAF2496E94B00020939 /* LaunchScreen.storyboard in Resources */,
205 | 				741B2DAC2496E94B00020939 /* Assets.xcassets in Resources */,
206 | 				741B2DAA2496E94800020939 /* Main.storyboard in Resources */,
207 | 				74C725A524A982E20010AC26 /* PANN_labels.json in Resources */,
208 | 			);
209 | 			runOnlyForDeploymentPostprocessing = 0;
210 | 		};
211 | 		741B2DB32496E94B00020939 /* Resources */ = {
212 | 			isa = PBXResourcesBuildPhase;
213 | 			buildActionMask = 2147483647;
214 | 			files = (
215 | 				74C725A024A982D40010AC26 /* ring_hello.wav in Resources */,
216 | 				74C725A624A982E20010AC26 /* PANN_labels.json in Resources */,
217 | 				74C725A824A982ED0010AC26 /* PANN_out.ring_hello.json in Resources */,
218 | 			);
219 | 			runOnlyForDeploymentPostprocessing = 0;
220 | 		};
221 | /* End PBXResourcesBuildPhase section */
222 | 
223 | /* Begin PBXSourcesBuildPhase section */
224 | 		741B2D9B2496E94800020939 /* Sources */ = {
225 | 			isa = PBXSourcesBuildPhase;
226 | 			buildActionMask = 2147483647;
227 | 			files = (
228 | 				741B2DA72496E94800020939 /* ViewController.swift in Sources */,
229 | 				741B2DCC2496EC2700020939 /* DrawSpecView.swift in Sources */,
230 | 				741B2DA32496E94800020939 /* AppDelegate.swift in Sources */,
231 | 				74C725A324A982E20010AC26 /* PANN.mlmodel in Sources */,
232 | 				741B2DA52496E94800020939 /* SceneDelegate.swift in Sources */,
233 | 				741B2DCE2496ED5100020939 /* ConvertSpectrogram.swift in Sources */,
234 | 			);
235 | 			runOnlyForDeploymentPostprocessing = 0;
236 | 		};
237 | 		741B2DB12496E94B00020939 /* Sources */ = {
238 | 			isa = PBXSourcesBuildPhase;
239 | 			buildActionMask = 2147483647;
240 | 			files = (
241 | 				74C725A424A982E20010AC26 /* PANN.mlmodel in Sources */,
242 | 				741B2DBA2496E94B00020939 /* Pytorch_CoreML_Sound_ClassificationTests.swift in Sources */,
243 | 				741B2DCF2496ED5100020939 /* ConvertSpectrogram.swift in Sources */,
244 | 			);
245 | 			runOnlyForDeploymentPostprocessing = 0;
246 | 		};
247 | /* End PBXSourcesBuildPhase section */
248 | 
249 | /* Begin PBXTargetDependency section */
250 | 		741B2DB72496E94B00020939 /* PBXTargetDependency */ = {
251 | 			isa = PBXTargetDependency;
252 | 			target = 741B2D9E2496E94800020939 /* Pytorch-CoreML-Sound-Classification */;
253 | 			targetProxy = 741B2DB62496E94B00020939 /* PBXContainerItemProxy */;
254 | 		};
255 | /* End PBXTargetDependency section */
256 | 
257 | /* Begin PBXVariantGroup section */
258 | 		741B2DA82496E94800020939 /* Main.storyboard */ = {
259 | 			isa = PBXVariantGroup;
260 | 			children = (
261 | 				741B2DA92496E94800020939 /* Base */,
262 | 			);
263 | 			name = Main.storyboard;
264 | 			sourceTree = "<group>";
265 | 		};
266 | 		741B2DAD2496E94B00020939 /* LaunchScreen.storyboard */ = {
267 | 			isa = PBXVariantGroup;
268 | 			children = (
269 | 				741B2DAE2496E94B00020939 /* Base */,
270 | 			);
271 | 			name = LaunchScreen.storyboard;
272 | 			sourceTree = "<group>";
273 | 		};
274 | /* End PBXVariantGroup section */
275 | 
276 | /* Begin XCBuildConfiguration section */
277 | 		741B2DBC2496E94B00020939 /* Debug */ = {
278 | 			isa = XCBuildConfiguration;
279 | 			buildSettings = {
280 | 				ALWAYS_SEARCH_USER_PATHS = NO;
281 | 				CLANG_ANALYZER_NONNULL = YES;
282 | 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
283 | 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
284 | 				CLANG_CXX_LIBRARY = "libc++";
285 | 				CLANG_ENABLE_MODULES = YES;
286 | 				CLANG_ENABLE_OBJC_ARC = YES;
287 | 				CLANG_ENABLE_OBJC_WEAK = YES;
288 | 				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
289 | 				CLANG_WARN_BOOL_CONVERSION = YES;
290 | 				CLANG_WARN_COMMA = YES;
291 | 				CLANG_WARN_CONSTANT_CONVERSION = YES;
292 | 				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
293 | 				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
294 | 				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
295 | 				CLANG_WARN_EMPTY_BODY = YES;
296 | 				CLANG_WARN_ENUM_CONVERSION = YES;
297 | 				CLANG_WARN_INFINITE_RECURSION = YES;
298 | 				CLANG_WARN_INT_CONVERSION = YES;
299 | 				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
300 | 				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
301 | 				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
302 | 				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
303 | 				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
304 | 				CLANG_WARN_STRICT_PROTOTYPES = YES;
305 | 				CLANG_WARN_SUSPICIOUS_MOVE = YES;
306 | 				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
307 | 				CLANG_WARN_UNREACHABLE_CODE = YES;
308 | 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
309 | 				COPY_PHASE_STRIP = NO;
310 | 				DEBUG_INFORMATION_FORMAT = dwarf;
311 | 				ENABLE_STRICT_OBJC_MSGSEND = YES;
312 | 				ENABLE_TESTABILITY = YES;
313 | 				GCC_C_LANGUAGE_STANDARD = gnu11;
314 | 				GCC_DYNAMIC_NO_PIC = NO;
315 | 				GCC_NO_COMMON_BLOCKS = YES;
316 | 				GCC_OPTIMIZATION_LEVEL = 0;
317 | 				GCC_PREPROCESSOR_DEFINITIONS = (
318 | 					"DEBUG=1",
319 | 					"$(inherited)",
320 | 				);
321 | 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
322 | 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
323 | 				GCC_WARN_UNDECLARED_SELECTOR = YES;
324 | 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
325 | 				GCC_WARN_UNUSED_FUNCTION = YES;
326 | 				GCC_WARN_UNUSED_VARIABLE = YES;
327 | 				IPHONEOS_DEPLOYMENT_TARGET = 13.4;
328 | 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
329 | 				MTL_FAST_MATH = YES;
330 | 				ONLY_ACTIVE_ARCH = YES;
331 | 				SDKROOT = iphoneos;
332 | 				SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
333 | 				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
334 | 			};
335 | 			name = Debug;
336 | 		};
337 | 		741B2DBD2496E94B00020939 /* Release */ = {
338 | 			isa = XCBuildConfiguration;
339 | 			buildSettings = {
340 | 				ALWAYS_SEARCH_USER_PATHS = NO;
341 | 				CLANG_ANALYZER_NONNULL = YES;
342 | 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
343 | 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
344 | 				CLANG_CXX_LIBRARY = "libc++";
345 | 				CLANG_ENABLE_MODULES = YES;
346 | 				CLANG_ENABLE_OBJC_ARC = YES;
347 | 				CLANG_ENABLE_OBJC_WEAK = YES;
348 | 				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
349 | 				CLANG_WARN_BOOL_CONVERSION = YES;
350 | 				CLANG_WARN_COMMA = YES;
351 | 				CLANG_WARN_CONSTANT_CONVERSION = YES;
352 | 				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
353 | 				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
354 | 				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
355 | 				CLANG_WARN_EMPTY_BODY = YES;
356 | 				CLANG_WARN_ENUM_CONVERSION = YES;
357 | 				CLANG_WARN_INFINITE_RECURSION = YES;
358 | 				CLANG_WARN_INT_CONVERSION = YES;
359 | 				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
360 | 				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
361 | 				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
362 | 				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
363 | 				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
364 | 				CLANG_WARN_STRICT_PROTOTYPES = YES;
365 | 				CLANG_WARN_SUSPICIOUS_MOVE = YES;
366 | 				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
367 | 				CLANG_WARN_UNREACHABLE_CODE = YES;
368 | 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
369 | 				COPY_PHASE_STRIP = NO;
370 | 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
371 | 				ENABLE_NS_ASSERTIONS = NO;
372 | 				ENABLE_STRICT_OBJC_MSGSEND = YES;
373 | 				GCC_C_LANGUAGE_STANDARD = gnu11;
374 | 				GCC_NO_COMMON_BLOCKS = YES;
375 | 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
376 | 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
377 | 				GCC_WARN_UNDECLARED_SELECTOR = YES;
378 | 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
379 | 				GCC_WARN_UNUSED_FUNCTION = YES;
380 | 				GCC_WARN_UNUSED_VARIABLE = YES;
381 | 				IPHONEOS_DEPLOYMENT_TARGET = 13.4;
382 | 				MTL_ENABLE_DEBUG_INFO = NO;
383 | 				MTL_FAST_MATH = YES;
384 | 				SDKROOT = iphoneos;
385 | 				SWIFT_COMPILATION_MODE = wholemodule;
386 | 				SWIFT_OPTIMIZATION_LEVEL = "-O";
387 | 				VALIDATE_PRODUCT = YES;
388 | 			};
389 | 			name = Release;
390 | 		};
391 | 		741B2DBF2496E94B00020939 /* Debug */ = {
392 | 			isa = XCBuildConfiguration;
393 | 			buildSettings = {
394 | 				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
395 | 				CODE_SIGN_STYLE = Automatic;
396 | 				DEVELOPMENT_TEAM = K6KE3UBBT4;
397 | 				INFOPLIST_FILE = "Pytorch-CoreML-Sound-Classification/Info.plist";
398 | 				IPHONEOS_DEPLOYMENT_TARGET = 13.0;
399 | 				LD_RUNPATH_SEARCH_PATHS = (
400 | 					"$(inherited)",
401 | 					"@executable_path/Frameworks",
402 | 				);
403 | 				PRODUCT_BUNDLE_IDENTIFIER = "my.testco.Pytorch-CoreML-Sound-Classification";
404 | 				PRODUCT_NAME = "$(TARGET_NAME)";
405 | 				SWIFT_VERSION = 5.0;
406 | 				TARGETED_DEVICE_FAMILY = "1,2";
407 | 			};
408 | 			name = Debug;
409 | 		};
410 | 		741B2DC02496E94B00020939 /* Release */ = {
411 | 			isa = XCBuildConfiguration;
412 | 			buildSettings = {
413 | 				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
414 | 				CODE_SIGN_STYLE = Automatic;
415 | 				DEVELOPMENT_TEAM = K6KE3UBBT4;
416 | 				INFOPLIST_FILE = "Pytorch-CoreML-Sound-Classification/Info.plist";
417 | 				IPHONEOS_DEPLOYMENT_TARGET = 13.0;
418 | 				LD_RUNPATH_SEARCH_PATHS = (
419 | 					"$(inherited)",
420 | 					"@executable_path/Frameworks",
421 | 				);
422 | 				PRODUCT_BUNDLE_IDENTIFIER = "my.testco.Pytorch-CoreML-Sound-Classification";
423 | 				PRODUCT_NAME = "$(TARGET_NAME)";
424 | 				SWIFT_VERSION = 5.0;
425 | 				TARGETED_DEVICE_FAMILY = "1,2";
426 | 			};
427 | 			name = Release;
428 | 		};
429 | 		741B2DC22496E94B00020939 /* Debug */ = {
430 | 			isa = XCBuildConfiguration;
431 | 			buildSettings = {
432 | 				ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES;
433 | 				BUNDLE_LOADER = "$(TEST_HOST)";
434 | 				CODE_SIGN_STYLE = Automatic;
435 | 				DEVELOPMENT_TEAM = K6KE3UBBT4;
436 | 				INFOPLIST_FILE = "Pytorch-CoreML-Sound-ClassificationTests/Info.plist";
437 | 				IPHONEOS_DEPLOYMENT_TARGET = 13.4;
438 | 				LD_RUNPATH_SEARCH_PATHS = (
439 | 					"$(inherited)",
440 | 					"@executable_path/Frameworks",
441 | 					"@loader_path/Frameworks",
442 | 				);
443 | 				PRODUCT_BUNDLE_IDENTIFIER = "my.testco.Pytorch-CoreML-Sound-ClassificationTests";
444 | 				PRODUCT_NAME = "$(TARGET_NAME)";
445 | 				SWIFT_VERSION = 5.0;
446 | 				TARGETED_DEVICE_FAMILY = "1,2";
447 | 				TEST_HOST = "$(BUILT_PRODUCTS_DIR)/Pytorch-CoreML-Sound-Classification.app/Pytorch-CoreML-Sound-Classification";
448 | 			};
449 | 			name = Debug;
450 | 		};
451 | 		741B2DC32496E94B00020939 /* Release */ = {
452 | 			isa = XCBuildConfiguration;
453 | 			buildSettings = {
454 | 				ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES;
455 | 				BUNDLE_LOADER = "$(TEST_HOST)";
456 | 				CODE_SIGN_STYLE = Automatic;
457 | 				DEVELOPMENT_TEAM = K6KE3UBBT4;
458 | 				INFOPLIST_FILE = "Pytorch-CoreML-Sound-ClassificationTests/Info.plist";
459 | 				IPHONEOS_DEPLOYMENT_TARGET = 13.4;
460 | 				LD_RUNPATH_SEARCH_PATHS = (
461 | 					"$(inherited)",
462 | 					"@executable_path/Frameworks",
463 | 					"@loader_path/Frameworks",
464 | 				);
465 | 				PRODUCT_BUNDLE_IDENTIFIER = "my.testco.Pytorch-CoreML-Sound-ClassificationTests";
466 | 				PRODUCT_NAME = "$(TARGET_NAME)";
467 | 				SWIFT_VERSION = 5.0;
468 | 				TARGETED_DEVICE_FAMILY = "1,2";
469 | 				TEST_HOST = "$(BUILT_PRODUCTS_DIR)/Pytorch-CoreML-Sound-Classification.app/Pytorch-CoreML-Sound-Classification";
470 | 			};
471 | 			name = Release;
472 | 		};
473 | /* End XCBuildConfiguration section */
474 | 
475 | /* Begin XCConfigurationList section */
476 | 		741B2D9A2496E94800020939 /* Build configuration list for PBXProject "Pytorch-CoreML-Sound-Classification" */ = {
477 | 			isa = XCConfigurationList;
478 | 			buildConfigurations = (
479 | 				741B2DBC2496E94B00020939 /* Debug */,
480 | 				741B2DBD2496E94B00020939 /* Release */,
481 | 			);
482 | 			defaultConfigurationIsVisible = 0;
483 | 			defaultConfigurationName = Release;
484 | 		};
485 | 		741B2DBE2496E94B00020939 /* Build configuration list for PBXNativeTarget "Pytorch-CoreML-Sound-Classification" */ = {
486 | 			isa = XCConfigurationList;
487 | 			buildConfigurations = (
488 | 				741B2DBF2496E94B00020939 /* Debug */,
489 | 				741B2DC02496E94B00020939 /* Release */,
490 | 			);
491 | 			defaultConfigurationIsVisible = 0;
492 | 			defaultConfigurationName = Release;
493 | 		};
494 | 		741B2DC12496E94B00020939 /* Build configuration list for PBXNativeTarget "Pytorch-CoreML-Sound-ClassificationTests" */ = {
495 | 			isa = XCConfigurationList;
496 | 			buildConfigurations = (
497 | 				741B2DC22496E94B00020939 /* Debug */,
498 | 				741B2DC32496E94B00020939 /* Release */,
499 | 			);
500 | 			defaultConfigurationIsVisible = 0;
501 | 			defaultConfigurationName = Release;
502 | 		};
503 | /* End XCConfigurationList section */
504 | 	};
505 | 	rootObject = 741B2D972496E94800020939 /* Project object */;
506 | }
507 | 


--------------------------------------------------------------------------------
/Pytorch-CoreML-Sound-Classification.xcodeproj/project.xcworkspace/contents.xcworkspacedata:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <Workspace
3 |    version = "1.0">
4 |    <FileRef
5 |       location = "self:Pytorch-CoreML-Sound-Classification.xcodeproj">
6 |    </FileRef>
7 | </Workspace>
8 | 


--------------------------------------------------------------------------------
/Pytorch-CoreML-Sound-Classification.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3 | <plist version="1.0">
4 | <dict>
5 | 	<key>IDEDidComputeMac32BitWarning</key>
6 | 	<true/>
7 | </dict>
8 | </plist>
9 | 


--------------------------------------------------------------------------------
/Pytorch-CoreML-Sound-Classification.xcodeproj/xcshareddata/xcschemes/Pytorch-CoreML-Sound-Classification.xcscheme:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <Scheme
 3 |    LastUpgradeVersion = "1140"
 4 |    version = "1.3">
 5 |    <BuildAction
 6 |       parallelizeBuildables = "YES"
 7 |       buildImplicitDependencies = "YES">
 8 |       <BuildActionEntries>
 9 |          <BuildActionEntry
10 |             buildForTesting = "YES"
11 |             buildForRunning = "YES"
12 |             buildForProfiling = "YES"
13 |             buildForArchiving = "YES"
14 |             buildForAnalyzing = "YES">
15 |             <BuildableReference
16 |                BuildableIdentifier = "primary"
17 |                BlueprintIdentifier = "741B2D9E2496E94800020939"
18 |                BuildableName = "Pytorch-CoreML-Sound-Classification.app"
19 |                BlueprintName = "Pytorch-CoreML-Sound-Classification"
20 |                ReferencedContainer = "container:Pytorch-CoreML-Sound-Classification.xcodeproj">
21 |             </BuildableReference>
22 |          </BuildActionEntry>
23 |       </BuildActionEntries>
24 |    </BuildAction>
25 |    <TestAction
26 |       buildConfiguration = "Debug"
27 |       selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
28 |       selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
29 |       shouldUseLaunchSchemeArgsEnv = "YES">
30 |       <Testables>
31 |          <TestableReference
32 |             skipped = "NO">
33 |             <BuildableReference
34 |                BuildableIdentifier = "primary"
35 |                BlueprintIdentifier = "741B2DB42496E94B00020939"
36 |                BuildableName = "Pytorch-CoreML-Sound-ClassificationTests.xctest"
37 |                BlueprintName = "Pytorch-CoreML-Sound-ClassificationTests"
38 |                ReferencedContainer = "container:Pytorch-CoreML-Sound-Classification.xcodeproj">
39 |             </BuildableReference>
40 |          </TestableReference>
41 |       </Testables>
42 |    </TestAction>
43 |    <LaunchAction
44 |       buildConfiguration = "Debug"
45 |       selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
46 |       selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
47 |       launchStyle = "0"
48 |       useCustomWorkingDirectory = "NO"
49 |       ignoresPersistentStateOnLaunch = "NO"
50 |       debugDocumentVersioning = "YES"
51 |       debugServiceExtension = "internal"
52 |       allowLocationSimulation = "YES">
53 |       <BuildableProductRunnable
54 |          runnableDebuggingMode = "0">
55 |          <BuildableReference
56 |             BuildableIdentifier = "primary"
57 |             BlueprintIdentifier = "741B2D9E2496E94800020939"
58 |             BuildableName = "Pytorch-CoreML-Sound-Classification.app"
59 |             BlueprintName = "Pytorch-CoreML-Sound-Classification"
60 |             ReferencedContainer = "container:Pytorch-CoreML-Sound-Classification.xcodeproj">
61 |          </BuildableReference>
62 |       </BuildableProductRunnable>
63 |    </LaunchAction>
64 |    <ProfileAction
65 |       buildConfiguration = "Release"
66 |       shouldUseLaunchSchemeArgsEnv = "YES"
67 |       savedToolIdentifier = ""
68 |       useCustomWorkingDirectory = "NO"
69 |       debugDocumentVersioning = "YES">
70 |       <BuildableProductRunnable
71 |          runnableDebuggingMode = "0">
72 |          <BuildableReference
73 |             BuildableIdentifier = "primary"
74 |             BlueprintIdentifier = "741B2D9E2496E94800020939"
75 |             BuildableName = "Pytorch-CoreML-Sound-Classification.app"
76 |             BlueprintName = "Pytorch-CoreML-Sound-Classification"
77 |             ReferencedContainer = "container:Pytorch-CoreML-Sound-Classification.xcodeproj">
78 |          </BuildableReference>
79 |       </BuildableProductRunnable>
80 |    </ProfileAction>
81 |    <AnalyzeAction
82 |       buildConfiguration = "Debug">
83 |    </AnalyzeAction>
84 |    <ArchiveAction
85 |       buildConfiguration = "Release"
86 |       revealArchiveInOrganizer = "YES">
87 |    </ArchiveAction>
88 | </Scheme>
89 | 


--------------------------------------------------------------------------------
/Pytorch-CoreML-Sound-Classification.xcodeproj/xcuserdata/gerald.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <Bucket
 3 |    uuid = "2B67870E-F4C2-40D4-93D5-0D0C77D1274C"
 4 |    type = "1"
 5 |    version = "2.0">
 6 |    <Breakpoints>
 7 |       <BreakpointProxy
 8 |          BreakpointExtensionID = "Xcode.Breakpoint.SymbolicBreakpoint">
 9 |          <BreakpointContent
10 |             uuid = "ED7D818C-AB20-4197-9010-61CD48999F9D"
11 |             shouldBeEnabled = "Yes"
12 |             nameForDebugger = "ANE"
13 |             ignoreCount = "0"
14 |             continueAfterRunningActions = "No"
15 |             symbolName = "-[_ANEModel program]"
16 |             moduleName = "">
17 |             <Locations>
18 |             </Locations>
19 |          </BreakpointContent>
20 |       </BreakpointProxy>
21 |       <BreakpointProxy
22 |          BreakpointExtensionID = "Xcode.Breakpoint.SymbolicBreakpoint">
23 |          <BreakpointContent
24 |             uuid = "423AF4EC-695D-4190-A932-552A9BB45CA5"
25 |             shouldBeEnabled = "Yes"
26 |             nameForDebugger = "GPU"
27 |             ignoreCount = "0"
28 |             continueAfterRunningActions = "No"
29 |             symbolName = "Espresso::MPSEngine"
30 |             moduleName = "">
31 |             <Locations>
32 |             </Locations>
33 |          </BreakpointContent>
34 |       </BreakpointProxy>
35 |       <BreakpointProxy
36 |          BreakpointExtensionID = "Xcode.Breakpoint.SymbolicBreakpoint">
37 |          <BreakpointContent
38 |             uuid = "7C23C746-87CA-494A-A06C-88D15E4946C0"
39 |             shouldBeEnabled = "Yes"
40 |             nameForDebugger = "ANE 2"
41 |             ignoreCount = "0"
42 |             continueAfterRunningActions = "No"
43 |             symbolName = "Espresso::ANERuntimeEngine"
44 |             moduleName = "">
45 |             <Locations>
46 |             </Locations>
47 |          </BreakpointContent>
48 |       </BreakpointProxy>
49 |       <BreakpointProxy
50 |          BreakpointExtensionID = "Xcode.Breakpoint.SymbolicBreakpoint">
51 |          <BreakpointContent
52 |             uuid = "73A0C327-84E2-4F86-9D74-2E3697F2E08E"
53 |             shouldBeEnabled = "Yes"
54 |             nameForDebugger = "CPU"
55 |             ignoreCount = "0"
56 |             continueAfterRunningActions = "No"
57 |             symbolName = "Espresso::BNNSEngine"
58 |             moduleName = "">
59 |             <Locations>
60 |             </Locations>
61 |          </BreakpointContent>
62 |       </BreakpointProxy>
63 |       <BreakpointProxy
64 |          BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
65 |          <BreakpointContent
66 |             uuid = "05A57D5D-9624-43B0-8A12-C077C96BF662"
67 |             shouldBeEnabled = "No"
68 |             ignoreCount = "0"
69 |             continueAfterRunningActions = "No"
70 |             filePath = "Pytorch-CoreML-Sound-Classification/ViewController.swift"
71 |             startingColumnNumber = "9223372036854775807"
72 |             endingColumnNumber = "9223372036854775807"
73 |             startingLineNumber = "187"
74 |             endingLineNumber = "187"
75 |             landmarkName = "predict_provider(provider:)"
76 |             landmarkType = "7">
77 |          </BreakpointContent>
78 |       </BreakpointProxy>
79 |    </Breakpoints>
80 | </Bucket>
81 | 


--------------------------------------------------------------------------------
/Pytorch-CoreML-Sound-Classification.xcodeproj/xcuserdata/gerald.xcuserdatad/xcschemes/xcschememanagement.plist:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 3 | <plist version="1.0">
 4 | <dict>
 5 | 	<key>SchemeUserState</key>
 6 | 	<dict>
 7 | 		<key>Pytorch-CoreML-Sound-Classification.xcscheme_^#shared#^_</key>
 8 | 		<dict>
 9 | 			<key>orderHint</key>
10 | 			<integer>0</integer>
11 | 		</dict>
12 | 	</dict>
13 | 	<key>SuppressBuildableAutocreation</key>
14 | 	<dict>
15 | 		<key>741B2D9E2496E94800020939</key>
16 | 		<dict>
17 | 			<key>primary</key>
18 | 			<true/>
19 | 		</dict>
20 | 		<key>741B2DB42496E94B00020939</key>
21 | 		<dict>
22 | 			<key>primary</key>
23 | 			<true/>
24 | 		</dict>
25 | 	</dict>
26 | </dict>
27 | </plist>
28 | 


--------------------------------------------------------------------------------
/Pytorch-CoreML-Sound-Classification/AppDelegate.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  AppDelegate.swift
 3 | //  Pytorch-CoreML-Sound-Classification
 4 | //
 5 | //  Created by Gerald on 6/14/20.
 6 | //  Copyright © 2020 Gerald. All rights reserved.
 7 | //
 8 | 
 9 | import UIKit
10 | 
11 | @UIApplicationMain
12 | class AppDelegate: UIResponder, UIApplicationDelegate {
13 | 
14 | 
15 | 
16 |     func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool {
17 |         // Override point for customization after application launch.
18 |         return true
19 |     }
20 | 
21 |     // MARK: UISceneSession Lifecycle
22 | 
23 |     func application(_ application: UIApplication, configurationForConnecting connectingSceneSession: UISceneSession, options: UIScene.ConnectionOptions) -> UISceneConfiguration {
24 |         // Called when a new scene session is being created.
25 |         // Use this method to select a configuration to create the new scene with.
26 |         return UISceneConfiguration(name: "Default Configuration", sessionRole: connectingSceneSession.role)
27 |     }
28 | 
29 |     func application(_ application: UIApplication, didDiscardSceneSessions sceneSessions: Set<UISceneSession>) {
30 |         // Called when the user discards a scene session.
31 |         // If any sessions were discarded while the application was not running, this will be called shortly after application:didFinishLaunchingWithOptions.
32 |         // Use this method to release any resources that were specific to the discarded scenes, as they will not return.
33 |     }
34 | 
35 | 
36 | }
37 | 
38 | 


--------------------------------------------------------------------------------
/Pytorch-CoreML-Sound-Classification/Assets.xcassets/AppIcon.appiconset/Contents.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "images" : [
 3 |     {
 4 |       "idiom" : "iphone",
 5 |       "scale" : "2x",
 6 |       "size" : "20x20"
 7 |     },
 8 |     {
 9 |       "idiom" : "iphone",
10 |       "scale" : "3x",
11 |       "size" : "20x20"
12 |     },
13 |     {
14 |       "idiom" : "iphone",
15 |       "scale" : "2x",
16 |       "size" : "29x29"
17 |     },
18 |     {
19 |       "idiom" : "iphone",
20 |       "scale" : "3x",
21 |       "size" : "29x29"
22 |     },
23 |     {
24 |       "idiom" : "iphone",
25 |       "scale" : "2x",
26 |       "size" : "40x40"
27 |     },
28 |     {
29 |       "idiom" : "iphone",
30 |       "scale" : "3x",
31 |       "size" : "40x40"
32 |     },
33 |     {
34 |       "idiom" : "iphone",
35 |       "scale" : "2x",
36 |       "size" : "60x60"
37 |     },
38 |     {
39 |       "idiom" : "iphone",
40 |       "scale" : "3x",
41 |       "size" : "60x60"
42 |     },
43 |     {
44 |       "idiom" : "ipad",
45 |       "scale" : "1x",
46 |       "size" : "20x20"
47 |     },
48 |     {
49 |       "idiom" : "ipad",
50 |       "scale" : "2x",
51 |       "size" : "20x20"
52 |     },
53 |     {
54 |       "idiom" : "ipad",
55 |       "scale" : "1x",
56 |       "size" : "29x29"
57 |     },
58 |     {
59 |       "idiom" : "ipad",
60 |       "scale" : "2x",
61 |       "size" : "29x29"
62 |     },
63 |     {
64 |       "idiom" : "ipad",
65 |       "scale" : "1x",
66 |       "size" : "40x40"
67 |     },
68 |     {
69 |       "idiom" : "ipad",
70 |       "scale" : "2x",
71 |       "size" : "40x40"
72 |     },
73 |     {
74 |       "idiom" : "ipad",
75 |       "scale" : "1x",
76 |       "size" : "76x76"
77 |     },
78 |     {
79 |       "idiom" : "ipad",
80 |       "scale" : "2x",
81 |       "size" : "76x76"
82 |     },
83 |     {
84 |       "idiom" : "ipad",
85 |       "scale" : "2x",
86 |       "size" : "83.5x83.5"
87 |     },
88 |     {
89 |       "idiom" : "ios-marketing",
90 |       "scale" : "1x",
91 |       "size" : "1024x1024"
92 |     }
93 |   ],
94 |   "info" : {
95 |     "author" : "xcode",
96 |     "version" : 1
97 |   }
98 | }
99 | 


--------------------------------------------------------------------------------
/Pytorch-CoreML-Sound-Classification/Assets.xcassets/Contents.json:
--------------------------------------------------------------------------------
1 | {
2 |   "info" : {
3 |     "author" : "xcode",
4 |     "version" : 1
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/Pytorch-CoreML-Sound-Classification/Base.lproj/LaunchScreen.storyboard:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="13122.16" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" launchScreen="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="01J-lp-oVM">
 3 |     <dependencies>
 4 |         <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="13104.12"/>
 5 |         <capability name="Safe area layout guides" minToolsVersion="9.0"/>
 6 |         <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
 7 |     </dependencies>
 8 |     <scenes>
 9 |         <!--View Controller-->
10 |         <scene sceneID="EHf-IW-A2E">
11 |             <objects>
12 |                 <viewController id="01J-lp-oVM" sceneMemberID="viewController">
13 |                     <view key="view" contentMode="scaleToFill" id="Ze5-6b-2t3">
14 |                         <rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
15 |                         <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
16 |                         <color key="backgroundColor" xcode11CocoaTouchSystemColor="systemBackgroundColor" cocoaTouchSystemColor="whiteColor"/>
17 |                         <viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
18 |                     </view>
19 |                 </viewController>
20 |                 <placeholder placeholderIdentifier="IBFirstResponder" id="iYj-Kq-Ea1" userLabel="First Responder" sceneMemberID="firstResponder"/>
21 |             </objects>
22 |             <point key="canvasLocation" x="53" y="375"/>
23 |         </scene>
24 |     </scenes>
25 | </document>
26 | 


--------------------------------------------------------------------------------
/Pytorch-CoreML-Sound-Classification/Base.lproj/Main.storyboard:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="16096" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="BYZ-38-t0r">
 3 |     <device id="retina6_1" orientation="portrait" appearance="light"/>
 4 |     <dependencies>
 5 |         <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="16087"/>
 6 |         <capability name="Safe area layout guides" minToolsVersion="9.0"/>
 7 |         <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
 8 |     </dependencies>
 9 |     <scenes>
10 |         <!--View Controller-->
11 |         <scene sceneID="tne-QT-ifu">
12 |             <objects>
13 |                 <viewController id="BYZ-38-t0r" customClass="ViewController" customModule="Pytorch_CoreML_Sound_Classification" customModuleProvider="target" sceneMemberID="viewController">
14 |                     <view key="view" contentMode="scaleToFill" id="8bC-Xf-vdC">
15 |                         <rect key="frame" x="0.0" y="0.0" width="414" height="896"/>
16 |                         <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
17 |                         <subviews>
18 |                             <label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" text="CoreML Sound Classification Demo" textAlignment="natural" lineBreakMode="tailTruncation" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" id="d99-zB-oNV">
19 |                                 <rect key="frame" x="9" y="80" width="397" height="29"/>
20 |                                 <autoresizingMask key="autoresizingMask" flexibleMaxY="YES"/>
21 |                                 <fontDescription key="fontDescription" type="boldSystem" pointSize="24"/>
22 |                                 <color key="textColor" systemColor="systemBlueColor" red="0.0" green="0.47843137250000001" blue="1" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
23 |                                 <nil key="highlightedColor"/>
24 |                             </label>
25 |                             <view contentMode="scaleToFill" fixedFrame="YES" translatesAutoresizingMaskIntoConstraints="NO" id="mw6-eD-Gfy" customClass="DrawSpecView" customModule="Pytorch_CoreML_Sound_Classification" customModuleProvider="target">
26 |                                 <rect key="frame" x="0.0" y="187" width="414" height="245"/>
27 |                                 <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
28 |                                 <color key="backgroundColor" systemColor="systemBackgroundColor" cocoaTouchSystemColor="whiteColor"/>
29 |                             </view>
30 |                             <tableView clipsSubviews="YES" contentMode="scaleToFill" fixedFrame="YES" alwaysBounceVertical="YES" dataMode="prototypes" style="plain" separatorStyle="default" rowHeight="30" estimatedRowHeight="-1" sectionHeaderHeight="28" sectionFooterHeight="28" translatesAutoresizingMaskIntoConstraints="NO" id="PXM-Vu-OOL">
31 |                                 <rect key="frame" x="0.0" y="440" width="414" height="138"/>
32 |                                 <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
33 |                                 <color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
34 |                                 <prototypes>
35 |                                     <tableViewCell clipsSubviews="YES" contentMode="scaleToFill" preservesSuperviewLayoutMargins="YES" selectionStyle="default" indentationWidth="10" reuseIdentifier="LabelCell" textLabel="5bO-DP-nFF" detailTextLabel="nfy-Am-Iin" rowHeight="30" style="IBUITableViewCellStyleValue1" id="Xhc-4U-Z5K">
36 |                                         <rect key="frame" x="0.0" y="28" width="414" height="30"/>
37 |                                         <autoresizingMask key="autoresizingMask"/>
38 |                                         <tableViewCellContentView key="contentView" opaque="NO" clipsSubviews="YES" multipleTouchEnabled="YES" contentMode="center" preservesSuperviewLayoutMargins="YES" insetsLayoutMarginsFromSafeArea="NO" tableViewCell="Xhc-4U-Z5K" id="tgi-3T-Hfe">
39 |                                             <rect key="frame" x="0.0" y="0.0" width="414" height="30"/>
40 |                                             <autoresizingMask key="autoresizingMask"/>
41 |                                             <subviews>
42 |                                                 <label opaque="NO" multipleTouchEnabled="YES" contentMode="left" insetsLayoutMarginsFromSafeArea="NO" text="Title" textAlignment="natural" lineBreakMode="tailTruncation" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" id="5bO-DP-nFF">
43 |                                                     <rect key="frame" x="20" y="9" width="36.5" height="12"/>
44 |                                                     <autoresizingMask key="autoresizingMask"/>
45 |                                                     <fontDescription key="fontDescription" name="Courier-Bold" family="Courier" pointSize="12"/>
46 |                                                     <nil key="textColor"/>
47 |                                                     <nil key="highlightedColor"/>
48 |                                                 </label>
49 |                                                 <label opaque="NO" multipleTouchEnabled="YES" contentMode="left" insetsLayoutMarginsFromSafeArea="NO" text="Detail" textAlignment="right" lineBreakMode="tailTruncation" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" id="nfy-Am-Iin">
50 |                                                     <rect key="frame" x="350.5" y="9" width="43.5" height="12"/>
51 |                                                     <autoresizingMask key="autoresizingMask"/>
52 |                                                     <fontDescription key="fontDescription" name="Courier" family="Courier" pointSize="12"/>
53 |                                                     <nil key="textColor"/>
54 |                                                     <nil key="highlightedColor"/>
55 |                                                 </label>
56 |                                             </subviews>
57 |                                         </tableViewCellContentView>
58 |                                     </tableViewCell>
59 |                                 </prototypes>
60 |                             </tableView>
61 |                         </subviews>
62 |                         <color key="backgroundColor" systemColor="systemBackgroundColor" cocoaTouchSystemColor="whiteColor"/>
63 |                         <viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
64 |                     </view>
65 |                     <connections>
66 |                         <outlet property="drawSpecView" destination="mw6-eD-Gfy" id="N1d-ca-LTe"/>
67 |                         <outlet property="labelsTableView" destination="PXM-Vu-OOL" id="BO9-os-ygP"/>
68 |                     </connections>
69 |                 </viewController>
70 |                 <placeholder placeholderIdentifier="IBFirstResponder" id="dkx-z0-nzr" sceneMemberID="firstResponder"/>
71 |             </objects>
72 |             <point key="canvasLocation" x="137.68115942028987" y="101.78571428571428"/>
73 |         </scene>
74 |     </scenes>
75 | </document>
76 | 


--------------------------------------------------------------------------------
/Pytorch-CoreML-Sound-Classification/ConvertSpectrogram.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  ConvertSpectrogram.swift
 3 | //  CoreML_Audio_Analysis
 4 | //
 5 | //  Created by Gerald on 5/30/20.
 6 | //  Copyright © 2020 Gerald. All rights reserved.
 7 | //
 8 | // from https://github.com/tucan9389/DepthPrediction-CoreML/blob/master/DepthPrediction-CoreML/heatmapProcessor.swift
 9 | import CoreML
10 | 
11 | class SpectrogramConverter {
12 |     func convertTo2DArray(from spectrogram: MLMultiArray) -> Array<Array<Float32>> {
13 |         guard spectrogram.shape.count == 4 else {
14 |             print("spectrogram's shape is invalid. \(spectrogram.shape)")
15 |             return []
16 |         }
17 |         let spectrogram_w = spectrogram.shape[2].intValue
18 |         let spectrogram_h = spectrogram.shape[3].intValue
19 |         
20 |         var converted_spectrogram: Array<Array<Float32>> = Array(repeating: Array(repeating: 0.0, count: spectrogram_h), count: spectrogram_w)
21 |         
22 |         var minimumValue: Float32 = Float32.greatestFiniteMagnitude
23 |         var maximumValue: Float32 = -Float32.greatestFiniteMagnitude
24 |         
25 |         for i in 0..<spectrogram_w {
26 |             for j in 0..<spectrogram_h {
27 |                 let index = [ 0, 0, i as NSNumber, j as NSNumber ] // i*(spectrogram_h) + j
28 |                 let val = spectrogram[index].floatValue
29 |                 // guard val > 0 else { continue }
30 |                 converted_spectrogram[i][spectrogram_h-j-1] = val // origin at bottom
31 |                 
32 |                 if minimumValue > val {
33 |                     minimumValue = val
34 |                 }
35 |                 if maximumValue < val {
36 |                     maximumValue = val
37 |                 }
38 |             }
39 |         }
40 |         
41 |         maximumValue = max( -15.0, maximumValue ) // for improved contrast on device
42 |         var minmaxGap = maximumValue - minimumValue
43 |         
44 |         // print( "minmax \(minmaxGap) \(maximumValue) \(minimumValue)")
45 |         
46 |         if ( minmaxGap == 0 ) {
47 |             minmaxGap = 1.0
48 |         }
49 |         for i in 0..<spectrogram_h {
50 |             for j in 0..<spectrogram_w {
51 |                 converted_spectrogram[j][i] = (converted_spectrogram[j][i] - minimumValue) / minmaxGap
52 |             }
53 |         }
54 |         
55 |         return converted_spectrogram
56 |     }
57 | }
58 | 


--------------------------------------------------------------------------------
/Pytorch-CoreML-Sound-Classification/DrawSpecView.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  DrawSpecView.swift
 3 | //  CoreML_Audio_Analysis
 4 | //
 5 | //  Created by Gerald on 5/30/20.
 6 | //  Copyright © 2020 Gerald. All rights reserved.
 7 | //
 8 | // adapted from https://github.com/tucan9389/DepthPrediction-CoreML/blob/master/DepthPrediction-CoreML/DrawingHeatmapView.swift
 9 | 
10 | import Foundation
11 | 
12 | import UIKit
13 | 
14 | class DrawSpecView: UIView {
15 |     
16 |     var spectrogram: Array<Array<Float32>>? = nil {
17 |         didSet {
18 |             DispatchQueue.main.async {
19 |                 self.setNeedsDisplay()
20 |             }
21 |         }
22 |     }
23 | 
24 |     override func draw(_ rect: CGRect) {
25 |         
26 |         if let ctx = UIGraphicsGetCurrentContext() {
27 |             
28 |             ctx.clear(rect);
29 |             
30 |             guard let spectrogram = self.spectrogram else { return }
31 |             
32 |             let size = self.bounds.size
33 |             let spectrogram_w = spectrogram.count
34 |             let spectrogram_h = spectrogram.first?.count ?? 0
35 |             let w = size.width / CGFloat(spectrogram_w)
36 |             let h = size.height / CGFloat(spectrogram_h)
37 |             
38 |             for j in 0..<spectrogram_h {
39 |                 for i in 0..<spectrogram_w {
40 |                     let value = spectrogram[i][j]
41 |                     var alpha: CGFloat = CGFloat(value)
42 |                     if alpha > 1 {
43 |                         alpha = 1
44 |                     } else if alpha < 0 {
45 |                         alpha = 0
46 |                     }
47 |                     
48 |                     let rect: CGRect = CGRect(x: CGFloat(i) * w, y: CGFloat(j) * h, width: w, height: h)
49 |                     
50 |                     // color
51 |                     let hue: CGFloat = (1.0-alpha) * (240.0 / 360.0)
52 |                     let color: UIColor = UIColor(hue: hue, saturation: 1, brightness: 1, alpha: 0.94)
53 |                     
54 |                     // gray
55 |                     // let color: UIColor = UIColor(white: 1-alpha, alpha: 1)
56 |                     
57 |                     let bpath: UIBezierPath = UIBezierPath(rect: rect)
58 |                     
59 |                     color.set()
60 |                     bpath.fill()
61 |                 }
62 |             }
63 |         }
64 |     } // end of draw(rect:)
65 | 
66 | }
67 | 


--------------------------------------------------------------------------------
/Pytorch-CoreML-Sound-Classification/Info.plist:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 3 | <plist version="1.0">
 4 | <dict>
 5 | 	<key>CFBundleDevelopmentRegion</key>
 6 | 	<string>$(DEVELOPMENT_LANGUAGE)</string>
 7 | 	<key>CFBundleExecutable</key>
 8 | 	<string>$(EXECUTABLE_NAME)</string>
 9 | 	<key>CFBundleIdentifier</key>
10 | 	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
11 | 	<key>CFBundleInfoDictionaryVersion</key>
12 | 	<string>6.0</string>
13 | 	<key>CFBundleName</key>
14 | 	<string>$(PRODUCT_NAME)</string>
15 | 	<key>CFBundlePackageType</key>
16 | 	<string>$(PRODUCT_BUNDLE_PACKAGE_TYPE)</string>
17 | 	<key>CFBundleShortVersionString</key>
18 | 	<string>1.0</string>
19 | 	<key>CFBundleVersion</key>
20 | 	<string>1</string>
21 | 	<key>LSRequiresIPhoneOS</key>
22 | 	<true/>
23 | 	<key>NSMicrophoneUsageDescription</key>
24 | 	<string>Capture audio.</string>
25 | 	<key>UIApplicationSceneManifest</key>
26 | 	<dict>
27 | 		<key>UIApplicationSupportsMultipleScenes</key>
28 | 		<false/>
29 | 		<key>UISceneConfigurations</key>
30 | 		<dict>
31 | 			<key>UIWindowSceneSessionRoleApplication</key>
32 | 			<array>
33 | 				<dict>
34 | 					<key>UISceneConfigurationName</key>
35 | 					<string>Default Configuration</string>
36 | 					<key>UISceneDelegateClassName</key>
37 | 					<string>$(PRODUCT_MODULE_NAME).SceneDelegate</string>
38 | 					<key>UISceneStoryboardFile</key>
39 | 					<string>Main</string>
40 | 				</dict>
41 | 			</array>
42 | 		</dict>
43 | 	</dict>
44 | 	<key>UILaunchStoryboardName</key>
45 | 	<string>LaunchScreen</string>
46 | 	<key>UIMainStoryboardFile</key>
47 | 	<string>Main</string>
48 | 	<key>UIRequiredDeviceCapabilities</key>
49 | 	<array>
50 | 		<string>armv7</string>
51 | 	</array>
52 | 	<key>UISupportedInterfaceOrientations</key>
53 | 	<array>
54 | 		<string>UIInterfaceOrientationPortrait</string>
55 | 	</array>
56 | 	<key>UISupportedInterfaceOrientations~ipad</key>
57 | 	<array>
58 | 		<string>UIInterfaceOrientationPortrait</string>
59 | 		<string>UIInterfaceOrientationPortraitUpsideDown</string>
60 | 		<string>UIInterfaceOrientationLandscapeLeft</string>
61 | 		<string>UIInterfaceOrientationLandscapeRight</string>
62 | 	</array>
63 | </dict>
64 | </plist>
65 | 


--------------------------------------------------------------------------------
/Pytorch-CoreML-Sound-Classification/PANN.mlmodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ml-illustrated/Pytorch-CoreML-Sound-Classification/9c5b268dbc3926494e5c1d6611da316dbefe770a/Pytorch-CoreML-Sound-Classification/PANN.mlmodel


--------------------------------------------------------------------------------
/Pytorch-CoreML-Sound-Classification/PANN_labels.json:
--------------------------------------------------------------------------------
1 | ["Speech", "Male speech, man speaking", "Female speech, woman speaking", "Child speech, kid speaking", "Conversation", "Narration, monologue", "Babbling", "Speech synthesizer", "Shout", "Bellow", "Whoop", "Yell", "Battle cry", "Children shouting", "Screaming", "Whispering", "Laughter", "Baby laughter", "Giggle", "Snicker", "Belly laugh", "Chuckle, chortle", "Crying, sobbing", "Baby cry, infant cry", "Whimper", "Wail, moan", "Sigh", "Singing", "Choir", "Yodeling", "Chant", "Mantra", "Male singing", "Female singing", "Child singing", "Synthetic singing", "Rapping", "Humming", "Groan", "Grunt", "Whistling", "Breathing", "Wheeze", "Snoring", "Gasp", "Pant", "Snort", "Cough", "Throat clearing", "Sneeze", "Sniff", "Run", "Shuffle", "Walk, footsteps", "Chewing, mastication", "Biting", "Gargling", "Stomach rumble", "Burping, eructation", "Hiccup", "Fart", "Hands", "Finger snapping", "Clapping", "Heart sounds, heartbeat", "Heart murmur", "Cheering", "Applause", "Chatter", "Crowd", "Hubbub, speech noise, speech babble", "Children playing", "Animal", "Domestic animals, pets", "Dog", "Bark", "Yip", "Howl", "Bow-wow", "Growling", "Whimper (dog)", "Cat", "Purr", "Meow", "Hiss", "Caterwaul", "Livestock, farm animals, working animals", "Horse", "Clip-clop", "Neigh, whinny", "Cattle, bovinae", "Moo", "Cowbell", "Pig", "Oink", "Goat", "Bleat", "Sheep", "Fowl", "Chicken, rooster", "Cluck", "Crowing, cock-a-doodle-doo", "Turkey", "Gobble", "Duck", "Quack", "Goose", "Honk", "Wild animals", "Roaring cats (lions, tigers)", "Roar", "Bird", "Bird vocalization, bird call, bird song", "Chirp, tweet", "Squawk", "Pigeon, dove", "Coo", "Crow", "Caw", "Owl", "Hoot", "Bird flight, flapping wings", "Canidae, dogs, wolves", "Rodents, rats, mice", "Mouse", "Patter", "Insect", "Cricket", "Mosquito", "Fly, housefly", "Buzz", "Bee, wasp, etc.", "Frog", "Croak", "Snake", "Rattle", "Whale vocalization", "Music", "Musical instrument", "Plucked string instrument", "Guitar", "Electric guitar", "Bass guitar", "Acoustic guitar", "Steel guitar, slide guitar", "Tapping (guitar technique)", "Strum", "Banjo", "Sitar", "Mandolin", "Zither", "Ukulele", "Keyboard (musical)", "Piano", "Electric piano", "Organ", "Electronic organ", "Hammond organ", "Synthesizer", "Sampler", "Harpsichord", "Percussion", "Drum kit", "Drum machine", "Drum", "Snare drum", "Rimshot", "Drum roll", "Bass drum", "Timpani", "Tabla", "Cymbal", "Hi-hat", "Wood block", "Tambourine", "Rattle (instrument)", "Maraca", "Gong", "Tubular bells", "Mallet percussion", "Marimba, xylophone", "Glockenspiel", "Vibraphone", "Steelpan", "Orchestra", "Brass instrument", "French horn", "Trumpet", "Trombone", "Bowed string instrument", "String section", "Violin, fiddle", "Pizzicato", "Cello", "Double bass", "Wind instrument, woodwind instrument", "Flute", "Saxophone", "Clarinet", "Harp", "Bell", "Church bell", "Jingle bell", "Bicycle bell", "Tuning fork", "Chime", "Wind chime", "Change ringing (campanology)", "Harmonica", "Accordion", "Bagpipes", "Didgeridoo", "Shofar", "Theremin", "Singing bowl", "Scratching (performance technique)", "Pop music", "Hip hop music", "Beatboxing", "Rock music", "Heavy metal", "Punk rock", "Grunge", "Progressive rock", "Rock and roll", "Psychedelic rock", "Rhythm and blues", "Soul music", "Reggae", "Country", "Swing music", "Bluegrass", "Funk", "Folk music", "Middle Eastern music", "Jazz", "Disco", "Classical music", "Opera", "Electronic music", "House music", "Techno", "Dubstep", "Drum and bass", "Electronica", "Electronic dance music", "Ambient music", "Trance music", "Music of Latin America", "Salsa music", "Flamenco", "Blues", "Music for children", "New-age music", "Vocal music", "A capella", "Music of Africa", "Afrobeat", "Christian music", "Gospel music", "Music of Asia", "Carnatic music", "Music of Bollywood", "Ska", "Traditional music", "Independent music", "Song", "Background music", "Theme music", "Jingle (music)", "Soundtrack music", "Lullaby", "Video game music", "Christmas music", "Dance music", "Wedding music", "Happy music", "Funny music", "Sad music", "Tender music", "Exciting music", "Angry music", "Scary music", "Wind", "Rustling leaves", "Wind noise (microphone)", "Thunderstorm", "Thunder", "Water", "Rain", "Raindrop", "Rain on surface", "Stream", "Waterfall", "Ocean", "Waves, surf", "Steam", "Gurgling", "Fire", "Crackle", "Vehicle", "Boat, Water vehicle", "Sailboat, sailing ship", "Rowboat, canoe, kayak", "Motorboat, speedboat", "Ship", "Motor vehicle (road)", "Car", "Vehicle horn, car horn, honking", "Toot", "Car alarm", "Power windows, electric windows", "Skidding", "Tire squeal", "Car passing by", "Race car, auto racing", "Truck", "Air brake", "Air horn, truck horn", "Reversing beeps", "Ice cream truck, ice cream van", "Bus", "Emergency vehicle", "Police car (siren)", "Ambulance (siren)", "Fire engine, fire truck (siren)", "Motorcycle", "Traffic noise, roadway noise", "Rail transport", "Train", "Train whistle", "Train horn", "Railroad car, train wagon", "Train wheels squealing", "Subway, metro, underground", "Aircraft", "Aircraft engine", "Jet engine", "Propeller, airscrew", "Helicopter", "Fixed-wing aircraft, airplane", "Bicycle", "Skateboard", "Engine", "Light engine (high frequency)", "Dental drill, dentist's drill", "Lawn mower", "Chainsaw", "Medium engine (mid frequency)", "Heavy engine (low frequency)", "Engine knocking", "Engine starting", "Idling", "Accelerating, revving, vroom", "Door", "Doorbell", "Ding-dong", "Sliding door", "Slam", "Knock", "Tap", "Squeak", "Cupboard open or close", "Drawer open or close", "Dishes, pots, and pans", "Cutlery, silverware", "Chopping (food)", "Frying (food)", "Microwave oven", "Blender", "Water tap, faucet", "Sink (filling or washing)", "Bathtub (filling or washing)", "Hair dryer", "Toilet flush", "Toothbrush", "Electric toothbrush", "Vacuum cleaner", "Zipper (clothing)", "Keys jangling", "Coin (dropping)", "Scissors", "Electric shaver, electric razor", "Shuffling cards", "Typing", "Typewriter", "Computer keyboard", "Writing", "Alarm", "Telephone", "Telephone bell ringing", "Ringtone", "Telephone dialing, DTMF", "Dial tone", "Busy signal", "Alarm clock", "Siren", "Civil defense siren", "Buzzer", "Smoke detector, smoke alarm", "Fire alarm", "Foghorn", "Whistle", "Steam whistle", "Mechanisms", "Ratchet, pawl", "Clock", "Tick", "Tick-tock", "Gears", "Pulleys", "Sewing machine", "Mechanical fan", "Air conditioning", "Cash register", "Printer", "Camera", "Single-lens reflex camera", "Tools", "Hammer", "Jackhammer", "Sawing", "Filing (rasp)", "Sanding", "Power tool", "Drill", "Explosion", "Gunshot, gunfire", "Machine gun", "Fusillade", "Artillery fire", "Cap gun", "Fireworks", "Firecracker", "Burst, pop", "Eruption", "Boom", "Wood", "Chop", "Splinter", "Crack", "Glass", "Chink, clink", "Shatter", "Liquid", "Splash, splatter", "Slosh", "Squish", "Drip", "Pour", "Trickle, dribble", "Gush", "Fill (with liquid)", "Spray", "Pump (liquid)", "Stir", "Boiling", "Sonar", "Arrow", "Whoosh, swoosh, swish", "Thump, thud", "Thunk", "Electronic tuner", "Effects unit", "Chorus effect", "Basketball bounce", "Bang", "Slap, smack", "Whack, thwack", "Smash, crash", "Breaking", "Bouncing", "Whip", "Flap", "Scratch", "Scrape", "Rub", "Roll", "Crushing", "Crumpling, crinkling", "Tearing", "Beep, bleep", "Ping", "Ding", "Clang", "Squeal", "Creak", "Rustle", "Whir", "Clatter", "Sizzle", "Clicking", "Clickety-clack", "Rumble", "Plop", "Jingle, tinkle", "Hum", "Zing", "Boing", "Crunch", "Silence", "Sine wave", "Harmonic", "Chirp tone", "Sound effect", "Pulse", "Inside, small room", "Inside, large room or hall", "Inside, public space", "Outside, urban or manmade", "Outside, rural or natural", "Reverberation", "Echo", "Noise", "Environmental noise", "Static", "Mains hum", "Distortion", "Sidetone", "Cacophony", "White noise", "Pink noise", "Throbbing", "Vibration", "Television", "Radio", "Field recording"]


--------------------------------------------------------------------------------
/Pytorch-CoreML-Sound-Classification/SceneDelegate.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  SceneDelegate.swift
 3 | //  Pytorch-CoreML-Sound-Classification
 4 | //
 5 | //  Created by Gerald on 6/14/20.
 6 | //  Copyright © 2020 Gerald. All rights reserved.
 7 | //
 8 | 
 9 | import UIKit
10 | 
11 | class SceneDelegate: UIResponder, UIWindowSceneDelegate {
12 | 
13 |     var window: UIWindow?
14 | 
15 | 
16 |     func scene(_ scene: UIScene, willConnectTo session: UISceneSession, options connectionOptions: UIScene.ConnectionOptions) {
17 |         // Use this method to optionally configure and attach the UIWindow `window` to the provided UIWindowScene `scene`.
18 |         // If using a storyboard, the `window` property will automatically be initialized and attached to the scene.
19 |         // This delegate does not imply the connecting scene or session are new (see `application:configurationForConnectingSceneSession` instead).
20 |         guard let _ = (scene as? UIWindowScene) else { return }
21 |     }
22 | 
23 |     func sceneDidDisconnect(_ scene: UIScene) {
24 |         // Called as the scene is being released by the system.
25 |         // This occurs shortly after the scene enters the background, or when its session is discarded.
26 |         // Release any resources associated with this scene that can be re-created the next time the scene connects.
27 |         // The scene may re-connect later, as its session was not neccessarily discarded (see `application:didDiscardSceneSessions` instead).
28 |     }
29 | 
30 |     func sceneDidBecomeActive(_ scene: UIScene) {
31 |         // Called when the scene has moved from an inactive state to an active state.
32 |         // Use this method to restart any tasks that were paused (or not yet started) when the scene was inactive.
33 |     }
34 | 
35 |     func sceneWillResignActive(_ scene: UIScene) {
36 |         // Called when the scene will move from an active state to an inactive state.
37 |         // This may occur due to temporary interruptions (ex. an incoming phone call).
38 |     }
39 | 
40 |     func sceneWillEnterForeground(_ scene: UIScene) {
41 |         // Called as the scene transitions from the background to the foreground.
42 |         // Use this method to undo the changes made on entering the background.
43 |     }
44 | 
45 |     func sceneDidEnterBackground(_ scene: UIScene) {
46 |         // Called as the scene transitions from the foreground to the background.
47 |         // Use this method to save data, release shared resources, and store enough scene-specific state information
48 |         // to restore the scene back to its current state.
49 |     }
50 | 
51 | 
52 | }
53 | 
54 | 


--------------------------------------------------------------------------------
/Pytorch-CoreML-Sound-Classification/ViewController.swift:
--------------------------------------------------------------------------------
  1 | //
  2 | //  ViewController.swift
  3 | //  Pytorch-CoreML-Sound-Classification
  4 | //
  5 | //  Created by Gerald on 6/14/20.
  6 | //  Copyright © 2020 Gerald. All rights reserved.
  7 | //
  8 | 
  9 | import UIKit
 10 | import AVKit
 11 | import CoreML
 12 | 
 13 | class ViewController: UIViewController {
 14 | 
 15 |     @IBOutlet weak var drawSpecView: DrawSpecView!
 16 |     
 17 |     @IBOutlet weak var labelsTableView: UITableView!
 18 |     
 19 |     // set up for audio
 20 |   private let audioEngine = AVAudioEngine()
 21 |   // specify the audio samples format the CoreML model
 22 |   private let desiredAudioFormat: AVAudioFormat = {
 23 |       let avAudioChannelLayout = AVAudioChannelLayout(layoutTag: kAudioChannelLayoutTag_Mono)!
 24 |       return AVAudioFormat(
 25 |           commonFormat: .pcmFormatFloat32,
 26 |           sampleRate: Double( 32000 ), // as specified when creating the Pytorch model
 27 |           interleaved: true,
 28 |           channelLayout: avAudioChannelLayout
 29 |       )
 30 |   }()
 31 |   
 32 |   // create a queue to do analysis on a separate thread
 33 |   private let analysisQueue = DispatchQueue(label: "com.myco.AnalysisQueue")
 34 |   
 35 |   // instantiate our model
 36 | 
 37 | 
 38 |   var model : PANN? = nil
 39 |   typealias NetworkInput = PANNInput
 40 |   typealias NetworkOutput = PANNOutput
 41 |   var class_labels: NSArray?
 42 | 
 43 |   typealias OutputClass = ( String, Float32, Int )
 44 |   private var tableData: [OutputClass?] = []
 45 | 
 46 | 
 47 |   // semaphore to protect the CoreML model
 48 |   let semaphore = DispatchSemaphore(value: 1)
 49 | 
 50 |   // for rendering our spectrogram
 51 |   let spec_converter = SpectrogramConverter()
 52 | 
 53 |   override func viewDidLoad() {
 54 |       super.viewDidLoad()
 55 |       // Do any additional setup after loading the view.
 56 |       load_model()
 57 |     
 58 |     // setup tableview datasource on bottom
 59 |     labelsTableView.dataSource = self
 60 | 
 61 |   }
 62 |   
 63 |   override func viewDidAppear(_ animated: Bool) {
 64 |       startAudioEngine()
 65 |   }
 66 |   
 67 |   private func load_model() {
 68 |       let config = MLModelConfiguration()
 69 |       config.computeUnits = .all
 70 |       do {
 71 |           self.model = try PANN( configuration: config )
 72 |       } catch {
 73 |           fatalError( "unable to load ML model!" )
 74 |       }
 75 | 
 76 |     guard let path = Bundle.main.path(forResource:"PANN_labels", ofType: "json") else {
 77 |         return
 78 |     }
 79 | 
 80 | 
 81 |     if let JSONData = try? Data(contentsOf: URL(fileURLWithPath: path)) {
 82 |         self.class_labels = try! JSONSerialization.jsonObject(with: JSONData, options: .mutableContainers) as? NSArray
 83 |     }
 84 | 
 85 |   }
 86 |   
 87 |   // audio capture via microphone
 88 |   private func startAudioEngine() {
 89 |       
 90 |       // https://stackoverflow.com/questions/48831411/converting-avaudiopcmbuffer-to-another-avaudiopcmbuffer
 91 |       // more info at https://medium.com/@prianka.kariat/changing-the-format-of-ios-avaudioengine-mic-input-c183459cab63
 92 |       
 93 |       let inputNode = audioEngine.inputNode
 94 |       let originalAudioFormat: AVAudioFormat = inputNode.inputFormat(forBus: 0)
 95 |       // input is in 44.1kHz, 2 channels
 96 | 
 97 |       let downSampleRate: Double = desiredAudioFormat.sampleRate
 98 |       let ratio: Float = Float(originalAudioFormat.sampleRate)/Float(downSampleRate)
 99 | 
100 |       // print( "input sr: \(originalAudioFormat.sampleRate) ch: \(originalAudioFormat.channelCount)" )
101 |       // print( "desired sr: \(desiredAudioFormat.sampleRate) ch: \(desiredAudioFormat.channelCount) ratio \(ratio)" )
102 |       
103 |       guard let formatConverter =  AVAudioConverter(from:originalAudioFormat, to: desiredAudioFormat) else {
104 |           fatalError( "unable to create formatConverter!" )
105 |       }
106 | 
107 |       // start audio capture by installing a Tap
108 |       inputNode.installTap(
109 |           onBus: 0,
110 |           bufferSize: AVAudioFrameCount(downSampleRate * 2),
111 |           format: originalAudioFormat
112 |       ) {
113 |           (buffer: AVAudioPCMBuffer!, time: AVAudioTime!) in
114 |           // closure to process the captured audio, buffer size dictated by AudioEngine/device
115 |            
116 |           let capacity = UInt32(Float(buffer.frameCapacity)/ratio)
117 | 
118 |           guard let pcmBuffer = AVAudioPCMBuffer(
119 |               pcmFormat: self.desiredAudioFormat,
120 |               frameCapacity: capacity) else {
121 |             print("Failed to create pcm buffer")
122 |             return
123 |           }
124 |           
125 |           let inputBlock: AVAudioConverterInputBlock = { inNumPackets, outStatus in
126 |             outStatus.pointee = AVAudioConverterInputStatus.haveData
127 |             return buffer
128 |           }
129 | 
130 |           // convert input samples into the one our model needs
131 |           var error: NSError?
132 |           let status: AVAudioConverterOutputStatus = formatConverter.convert(
133 |               to: pcmBuffer,
134 |               error: &error,
135 |               withInputFrom: inputBlock)
136 | 
137 |           if status == .error {
138 |               if let unwrappedError: NSError = error {
139 |                   print("Error \(unwrappedError)")
140 |               }
141 |               return
142 |           }
143 |           
144 |           // we now have the audio in mono, 32000 sample rate the CoreML model needs
145 |           // convert audio samples into MLMultiArray format for CoreML models
146 |           let channelData = pcmBuffer.floatChannelData
147 |           let output_samples = Int(pcmBuffer.frameLength)
148 |           let channelDataPointer = channelData!.pointee
149 |           
150 |           //print( "converted from \(buffer.frameLength) to len \(output_samples) val[0] \(channelDataPointer[0]) \(channelDataPointer[output_samples-1])" )
151 | 
152 |           let audioData = try! MLMultiArray( shape: [1, output_samples as NSNumber], dataType: .float32 )
153 |           let ptr = UnsafeMutablePointer<Float32>(OpaquePointer(audioData.dataPointer))
154 |           for i in 0..<output_samples {
155 |               ptr[i] = Float32(channelDataPointer[i])
156 |           }
157 | 
158 |           // prepare the input dictionary
159 |           let inputs: [String: Any] = [
160 |               "input.1": audioData,
161 |           ]
162 |           // container for ML Model inputs
163 |           let provider = try! MLDictionaryFeatureProvider(dictionary: inputs)
164 |                  
165 |           // wait in case CoreML model is busy
166 |           self.semaphore.wait()
167 | 
168 |           self.analysisQueue.async {
169 |               // send this sample to CoreML to generate melspectrogram
170 |               self.predict_provider(provider: provider)
171 |           }
172 |       } // installTap
173 | 
174 |       // ready to start the actual audio capture
175 |       audioEngine.prepare()
176 |       do {
177 |           try audioEngine.start()
178 |       }
179 |       catch {
180 |          print(error.localizedDescription)
181 |       }
182 |   } // end startAudioEngine
183 |   
184 |   
185 |   func predict_provider(provider: MLDictionaryFeatureProvider ) {
186 | 
187 |     if let outFeatures = try? self.model?.model.prediction(from: provider) {
188 |       // release the semaphore as soon as the model is done
189 |       self.semaphore.signal()
190 | 
191 |       let outputs = NetworkOutput(features: outFeatures)
192 |     
193 |         let output_clipwise: MLMultiArray = outputs.clip_output
194 |         
195 |         let pointer = UnsafeMutablePointer<Float32>(OpaquePointer(output_clipwise.dataPointer))
196 | 
197 |         let num_classes = self.class_labels!.count
198 |         var max_class: Int = -1
199 |         var max_class_prob: Float32 = 0.0
200 |         for i in 0..<num_classes {
201 |             let val = Float32( pointer[i] )
202 |             if val > max_class_prob {
203 |                 max_class_prob = val
204 |                 max_class = i
205 |             }
206 |         }
207 |         let max_class_label: String = (self.class_labels?[max_class]) as! String
208 |         // print( "max: \(max_class) \(max_class_prob) \(max_class_label)" )
209 |         let row = OutputClass( max_class_label, max_class_prob, max_class )
210 |         /*
211 |         var max_class_2: Int = -1
212 |         var max_class_prob_2: Float32 = 0.0
213 |         for i in 0..<num_classes {
214 |             if i == max_class {
215 |                 continue
216 |             }
217 |             let val = Float32( pointer[i] )
218 |             if val > max_class_prob_2 {
219 |                 max_class_prob_2 = val
220 |                 max_class_2 = i
221 |             }
222 |         }
223 |         let max_class_label_2: String = (self.class_labels?[max_class_2]) as! String
224 |         // print( "max: \(max_class) \(max_class_prob) \(max_class_label)" )
225 |         let row_2 = OutputClass( max_class_label_2, max_class_prob_2, max_class_2 )
226 |         */
227 |         let predicted_classes = [ row ]
228 |         
229 |         DispatchQueue.main.sync {
230 |             self.showPredictedClasses(with: predicted_classes)
231 |         }
232 |         
233 |     
234 |       let output_spectrogram: MLMultiArray = outputs.melspec
235 | 
236 |       // melspectrogram is in MLMultiArray in decibels. Convert to 0..1 for visualization
237 |       // and then pass the converted spectrogram to the UI element drawSpecView
238 |       drawSpecView.spectrogram = spec_converter.convertTo2DArray(from: output_spectrogram)
239 |     } else {
240 |         self.semaphore.signal()
241 |     }
242 |   }
243 |     func showPredictedClasses(with predicted_classes : [OutputClass] ) {
244 |         self.tableData = predicted_classes
245 |         // print( "data: \(predicted_classes[0])" )
246 |         self.labelsTableView.reloadData()
247 |     }
248 | 
249 | }
250 | 
251 | 
252 | 
253 | // MARK: - UITableView Data Source
254 | extension ViewController: UITableViewDataSource {
255 |     func tableView(_ tableView: UITableView, numberOfRowsInSection section: Int) -> Int {
256 |         return tableData.count// > 0 ? 1 : 0
257 |     }
258 |     
259 |     func tableView(_ tableView: UITableView, cellForRowAt indexPath: IndexPath) -> UITableViewCell {
260 |         let cell: UITableViewCell = tableView.dequeueReusableCell(withIdentifier: "LabelCell", for: indexPath)
261 |         if let row = tableData[indexPath.row] {
262 |             cell.textLabel?.text = row.0
263 |             let probText: String = "\(String(format: "%.1f%%", row.1*100))"
264 |             cell.detailTextLabel?.text = "(\(probText))"
265 |         } else {
266 |             cell.detailTextLabel?.text = "N/A"
267 |         }
268 |         return cell
269 |     }
270 | }
271 | 


--------------------------------------------------------------------------------
/Pytorch-CoreML-Sound-ClassificationTests/Info.plist:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 3 | <plist version="1.0">
 4 | <dict>
 5 | 	<key>CFBundleDevelopmentRegion</key>
 6 | 	<string>$(DEVELOPMENT_LANGUAGE)</string>
 7 | 	<key>CFBundleExecutable</key>
 8 | 	<string>$(EXECUTABLE_NAME)</string>
 9 | 	<key>CFBundleIdentifier</key>
10 | 	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
11 | 	<key>CFBundleInfoDictionaryVersion</key>
12 | 	<string>6.0</string>
13 | 	<key>CFBundleName</key>
14 | 	<string>$(PRODUCT_NAME)</string>
15 | 	<key>CFBundlePackageType</key>
16 | 	<string>$(PRODUCT_BUNDLE_PACKAGE_TYPE)</string>
17 | 	<key>CFBundleShortVersionString</key>
18 | 	<string>1.0</string>
19 | 	<key>CFBundleVersion</key>
20 | 	<string>1</string>
21 | </dict>
22 | </plist>
23 | 


--------------------------------------------------------------------------------
/Pytorch-CoreML-Sound-ClassificationTests/Pytorch_CoreML_Sound_ClassificationTests.swift:
--------------------------------------------------------------------------------
  1 | //
  2 | //  Pytorch_CoreML_Sound_ClassificationTests.swift
  3 | //  Pytorch-CoreML-Sound-ClassificationTests
  4 | //
  5 | //  Created by Gerald on 6/14/20.
  6 | //  Copyright © 2020 Gerald. All rights reserved.
  7 | //
  8 | 
  9 | import XCTest
 10 | import AVFoundation
 11 | import CoreML
 12 | 
 13 | @testable import Pytorch_CoreML_Sound_Classification
 14 | 
 15 | class Pytorch_CoreML_Sound_ClassificationTests: XCTestCase {
 16 | 
 17 |     func test_labels() throws {
 18 |         
 19 |         let bundle = Bundle(for: Pytorch_CoreML_Sound_ClassificationTests.self)
 20 |         let path = bundle.path(forResource: "PANN_labels", ofType: "json")
 21 | 
 22 |         var labels: NSArray?
 23 |         if let JSONData = try? Data(contentsOf: URL(fileURLWithPath: path!)) {
 24 |             labels = try! JSONSerialization.jsonObject(with: JSONData, options: .mutableContainers) as? NSArray
 25 |         }
 26 |         /*
 27 |         for i in 0..<labels!.count {
 28 |             print( "\(i) \(labels![i])")
 29 |         }
 30 |         */
 31 |         XCTAssertTrue( labels![0] as! String == "Speech", "incorrect first label!" )
 32 |         XCTAssertTrue( labels![500] as! String == "Silence", "incorrect Silence label!" )
 33 | 
 34 |     }
 35 |     
 36 |     func test_model_inference() throws {
 37 |         
 38 |         let model = PANN()
 39 |         typealias NetworkInput = PANNInput
 40 |         typealias NetworkOutput = PANNOutput
 41 | 
 42 |         // read in the expected model output from JSON
 43 |         let bundle = Bundle(for: Pytorch_CoreML_Sound_ClassificationTests.self)
 44 |         let path = bundle.path(forResource: "PANN_out.ring_hello", ofType: "json")
 45 | 
 46 |         var expected_outputs: NSArray?
 47 |         if let JSONData = try? Data(contentsOf: URL(fileURLWithPath: path!)) {
 48 |             expected_outputs = try! JSONSerialization.jsonObject(with: JSONData, options: .mutableContainers) as? NSArray
 49 |         }
 50 |         let expected_clipwise: [NSNumber] = expected_outputs![0] as! [NSNumber]
 51 |         print( "expected clipwise: \(expected_clipwise.count)" )
 52 | 
 53 |         let expected_spectrogram: [[NSNumber]] = expected_outputs![2] as! [[NSNumber]]
 54 |         print( "expected spec: \(expected_spectrogram.count) \(expected_spectrogram[0].count)")
 55 | 
 56 |         // read the input shapes of our model
 57 |         let inputName = "input.1"
 58 |         let inputConstraint: MLFeatureDescription = model.model.modelDescription
 59 |             .inputDescriptionsByName[inputName]!
 60 | 
 61 |         let input_batch_size: Int = Int(truncating: (inputConstraint.multiArrayConstraint?.shape[0])! )
 62 |         let input_samples: Int = Int(truncating: (inputConstraint.multiArrayConstraint?.shape[1])! )
 63 |         // print( "inputCon: \(input_batch_size) \(input_samples)")
 64 | 
 65 |         // read the same WAV file used in PyTorch
 66 |         let testBundle = Bundle(for: type(of: self))
 67 |         guard let filePath = testBundle.path(forResource: "ring_hello", ofType: "wav") else {
 68 |             fatalError( "error opening ring_hello.wav" )
 69 |         }
 70 |         
 71 |         // Read wav file
 72 |         var wav_file:AVAudioFile!
 73 |         do {
 74 |            let fileUrl = URL(fileURLWithPath: filePath )
 75 |            wav_file = try AVAudioFile( forReading:fileUrl )
 76 |         } catch {
 77 |            fatalError("Could not open wav file.")
 78 |         }
 79 | 
 80 |         print("wav file length: \(wav_file.length)")
 81 | 
 82 |         let buffer = AVAudioPCMBuffer(pcmFormat: wav_file.processingFormat,
 83 |                                       frameCapacity: UInt32(wav_file.length))
 84 |         do {
 85 |            try wav_file.read(into:buffer!)
 86 |         } catch{
 87 |            fatalError("Error reading buffer.")
 88 |         }
 89 |         
 90 |         guard let bufferData = buffer?.floatChannelData![0] else {
 91 |            fatalError("Can not get a float handle to buffer")
 92 |         }
 93 |             
 94 | 
 95 |         // allocate a ML Array & copy samples over
 96 |         let array_shape = [input_batch_size as NSNumber, input_samples as NSNumber]
 97 |         let audioData = try! MLMultiArray(shape: array_shape, dataType: MLMultiArrayDataType.float32 )
 98 |         let ptr = UnsafeMutablePointer<Float32>(OpaquePointer(audioData.dataPointer))
 99 |         for i in 0..<input_samples {
100 |             ptr[i] = Float32(bufferData[i])
101 |         }
102 | 
103 |         // create the input dictionary as { 'input.1' : [<wave floats>] }
104 |         let inputs: [String: Any] = [
105 |             inputName: audioData,
106 |         ]
107 |         // container for ML Model inputs
108 |         let provider = try! MLDictionaryFeatureProvider(dictionary: inputs)
109 |         
110 |         // Send the wavefor samples into the model to generate the Spectrogram
111 |         let raw_outputs = try! model.model.prediction(from: provider)
112 |         
113 |         // convert raw dictionary into our model's output object
114 |         let outputs = NetworkOutput( features: raw_outputs )
115 | 
116 |         
117 |         let output_clipwise: MLMultiArray = outputs.clip_output
118 |         print( "clip outputs: \(output_clipwise.shape)") // [1, 527]
119 | 
120 |         // sanity check the shapes of our output
121 |         XCTAssertTrue( Int( truncating: output_clipwise.shape[1] ) == expected_clipwise.count,
122 |                        "incorrect shape[1]! \(output_clipwise.shape[1]) \(expected_clipwise.count)" )
123 | 
124 |         
125 |         // compare every element of our spectrogram with those from the JSON file
126 |         for i in 0..<expected_clipwise.count {
127 |             let test_idx: [NSNumber] = [ 0, NSNumber(value: i) ]
128 |             let val = output_clipwise[ test_idx ].floatValue
129 |             // print( "clipwise vals \(i) \(val), \(expected_clipwise[i].floatValue)" )
130 |             
131 |             XCTAssertLessThan( abs( val - expected_clipwise[i].floatValue ), 0.1,
132 |                                "clipwise vals different at \(i)! \(val), \(expected_clipwise[i].floatValue)" )
133 |             
134 |         }
135 |         
136 |         
137 |         let output_spectrogram: MLMultiArray = outputs.melspec
138 |         print( "outputs: \(output_spectrogram.shape)") // [1, 1, 28, 64]
139 | 
140 |         // sanity check the shapes of our output
141 |         XCTAssertTrue( Int( truncating: output_spectrogram.shape[2] ) == expected_spectrogram.count,
142 |                        "incorrect shape[2]! \(output_spectrogram.shape[2]) \(expected_spectrogram.count)" )
143 |         XCTAssertTrue( Int( truncating: output_spectrogram.shape[3] ) == expected_spectrogram[0].count,
144 |                        "incorrect shape[3]! \(output_spectrogram.shape[3]) \(expected_spectrogram[0].count)" )
145 | 
146 |         // compare every element of our spectrogram with those from the JSON file
147 |         for i in 0..<expected_spectrogram.count {
148 |             let spec_row = expected_spectrogram[i] as [NSNumber]
149 | 
150 |             for j in 0..<spec_row.count {
151 |                 let test_idx: [NSNumber] = [ 0, 0, NSNumber(value: i), NSNumber(value: j) ]
152 |                 let val = output_spectrogram[ test_idx ].floatValue
153 |                 XCTAssertLessThan( abs( val - spec_row[j].floatValue ), 5,
154 |                                    "spec vals different at \(i) \(j)! \(val), \(spec_row[j].floatValue)" )
155 |             }
156 |         }
157 |     }
158 | 
159 |     
160 |     func test_convert_spectrogram() throws {
161 |         let spec_converter = SpectrogramConverter()
162 | 
163 |         
164 |         // load our spectrogram from JSON
165 |         
166 |         let bundle = Bundle(for: Pytorch_CoreML_Sound_ClassificationTests.self)
167 |         let path = bundle.path(forResource: "PANN_out.ring_hello", ofType: "json")
168 | 
169 |         var expected_outputs: NSArray?
170 |         if let JSONData = try? Data(contentsOf: URL(fileURLWithPath: path!)) {
171 |             expected_outputs = try! JSONSerialization.jsonObject(with: JSONData, options: .mutableContainers) as? NSArray
172 |         }
173 | 
174 |         let expected_spectrogram: [[NSNumber]] = expected_outputs![2] as! [[NSNumber]]
175 |         print( "expected spec: \(expected_spectrogram.count) \(expected_spectrogram[0].count)")
176 | 
177 | 
178 |         // copy data over to an allocated MLMultiArray
179 |         let spec_cols = expected_spectrogram.count
180 |         let spec_rows = expected_spectrogram[0].count
181 |         let array_shape = [ 1, 1, spec_cols as NSNumber, spec_rows as NSNumber ]
182 |         let spectrogram = try! MLMultiArray(shape: array_shape, dataType: MLMultiArrayDataType.float32 )
183 |         let ptr = UnsafeMutablePointer<Float32>(OpaquePointer(spectrogram.dataPointer))
184 |         for i in 0..<expected_spectrogram.count {
185 |             let spec_row = expected_spectrogram[i] as [NSNumber]
186 | 
187 |             for j in 0..<spec_row.count {
188 |                 ptr[ i*Int(spectrogram.strides[2]) + j] = Float32( spec_row[j] )
189 |             }
190 |         }
191 |         // call convert
192 |         let converted_spec = spec_converter.convertTo2DArray(from: spectrogram )
193 |         
194 |         // check shape, should be the same
195 |         XCTAssertTrue( converted_spec.count == spec_cols, "converted spec shape incorrect!" )
196 |         XCTAssertTrue( converted_spec[0].count == spec_rows, "converted spec shape incorrect!" )
197 | 
198 |         // spot check the converted specs
199 |         XCTAssertTrue( converted_spec[0].min()! >= Float32(0.0), "converted spec min incorrect!" )
200 |         XCTAssertTrue( converted_spec[0].max()! <= Float32(1.0), "converted spec max incorrect!" )
201 | 
202 |     }
203 | 
204 |     func test_inference_time() throws {
205 |         // This is an example of a performance test case.
206 |         let model = PANN()
207 | 
208 |         let array_shape: [NSNumber] = [1, 12800]
209 |         let audioData = try! MLMultiArray(shape: array_shape, dataType: MLMultiArrayDataType.float32 )
210 |         let inputs: [String: Any] = [
211 |             "input.1": audioData,
212 |         ]
213 |         // container for ML Model inputs
214 |         let provider = try! MLDictionaryFeatureProvider(dictionary: inputs)
215 | 
216 |         self.measure {
217 |             // Put the code you want to measure the time of here.
218 |             let N = 10
219 |             let start_time = CACurrentMediaTime()
220 |             let options = MLPredictionOptions()
221 |             // options.usesCPUOnly = true
222 |             for _ in 0..<N {
223 |                 _ = try? model.model.prediction(
224 |                     from: provider,
225 |                     options: options
226 |                 )
227 |             }
228 |             let elapsed = CACurrentMediaTime() - start_time
229 |             print( "avg inference time: \(elapsed/Double(N))")
230 |             /* simulator:
231 |                 N = 10: avg inference time: 0.03517465239856392
232 |              */
233 |         }
234 |     }
235 | }
236 | 


--------------------------------------------------------------------------------
/Pytorch-CoreML-Sound-ClassificationTests/ring_hello.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ml-illustrated/Pytorch-CoreML-Sound-Classification/9c5b268dbc3926494e5c1d6611da316dbefe770a/Pytorch-CoreML-Sound-ClassificationTests/ring_hello.wav


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Pytorch-CoreML-Sound-Classification
 2 | 
 3 | Repo accompanying the blog post 
 4 | ["How to Deploy A State-of-the-art PyTorch Model to iOS via Core ML (Part 3)"](http://www.ml-illustrated.com/2020/06/15/deploy-pytorch-sound-classification-model-via-coreml.html).
 5 | 
 6 | <img src="http://www.ml-illustrated.com/images/2020-06-15/voice_classify.gif" width="300"/>
 7 | 
 8 | Be sure to clone with --recursive:
 9 | ```
10 | git clone --recursive https://github.com/ml-illustrated/Pytorch-CoreML-Sound-Classification
11 | ```
12 | 


--------------------------------------------------------------------------------
/python/export.log:
--------------------------------------------------------------------------------
  1 | $ python3 python/export.py 'python/MobileNetV1_mAP=0.389.pth' /tmp/ring_hello.wav
  2 | waveform:  (1, 12800)
  3 | 1/157: Converting Node Type Unsqueeze
  4 | 2/157: Converting Node Type Unsqueeze
  5 | 3/157: Converting Node Type Pad
  6 | node.name:  177 [0, 0, 512, 0, 0, 0, 512, 0]
  7 | 4/157: Converting Node Type Squeeze
  8 | 5/157: Converting Node Type Conv
  9 | 6/157: Converting Node Type Conv
 10 | 7/157: Converting Node Type Unsqueeze
 11 | 8/157: Converting Node Type Transpose
 12 | 9/157: Converting Node Type Unsqueeze
 13 | 10/157: Converting Node Type Transpose
 14 | 11/157: Converting Node Type Pow
 15 | 12/157: Converting Node Type Pow
 16 | 13/157: Converting Node Type Add
 17 | 14/157: Converting Node Type MatMul
 18 | 15/157: Converting Node Type Clip
 19 | 16/157: Converting Node Type Log
 20 | 17/157: Converting Node Type Mul
 21 | 18/157: Converting Node Type Div
 22 | 19/157: Converting Node Type Transpose
 23 | 20/157: Converting Node Type BatchNormalization
 24 | 21/157: Converting Node Type Transpose
 25 | 22/157: Converting Node Type Conv
 26 | 23/157: Converting Node Type Pad
 27 | node.name:  201 [0, 0, 0, 0, 0, 0, 0, 0]
 28 | 24/157: Converting Node Type AveragePool
 29 | 25/157: Converting Node Type BatchNormalization
 30 | 26/157: Converting Node Type Relu
 31 | 27/157: Converting Node Type Conv
 32 | 28/157: Converting Node Type Pad
 33 | node.name:  206 [0, 0, 0, 0, 0, 0, 0, 0]
 34 | 29/157: Converting Node Type AveragePool
 35 | 30/157: Converting Node Type BatchNormalization
 36 | 31/157: Converting Node Type Relu
 37 | 32/157: Converting Node Type Conv
 38 | 33/157: Converting Node Type BatchNormalization
 39 | 34/157: Converting Node Type Relu
 40 | 35/157: Converting Node Type Conv
 41 | 36/157: Converting Node Type Pad
 42 | node.name:  214 [0, 0, 0, 0, 0, 0, 0, 0]
 43 | 37/157: Converting Node Type AveragePool
 44 | 38/157: Converting Node Type BatchNormalization
 45 | 39/157: Converting Node Type Relu
 46 | 40/157: Converting Node Type Conv
 47 | 41/157: Converting Node Type BatchNormalization
 48 | 42/157: Converting Node Type Relu
 49 | 43/157: Converting Node Type Conv
 50 | 44/157: Converting Node Type Pad
 51 | node.name:  222 [0, 0, 0, 0, 0, 0, 0, 0]
 52 | 45/157: Converting Node Type AveragePool
 53 | 46/157: Converting Node Type BatchNormalization
 54 | 47/157: Converting Node Type Relu
 55 | 48/157: Converting Node Type Conv
 56 | 49/157: Converting Node Type BatchNormalization
 57 | 50/157: Converting Node Type Relu
 58 | 51/157: Converting Node Type Conv
 59 | 52/157: Converting Node Type Pad
 60 | node.name:  230 [0, 0, 0, 0, 0, 0, 0, 0]
 61 | 53/157: Converting Node Type AveragePool
 62 | 54/157: Converting Node Type BatchNormalization
 63 | 55/157: Converting Node Type Relu
 64 | 56/157: Converting Node Type Conv
 65 | 57/157: Converting Node Type BatchNormalization
 66 | 58/157: Converting Node Type Relu
 67 | 59/157: Converting Node Type Conv
 68 | 60/157: Converting Node Type Pad
 69 | node.name:  238 [0, 0, 0, 0, 0, 0, 0, 0]
 70 | 61/157: Converting Node Type AveragePool
 71 | 62/157: Converting Node Type BatchNormalization
 72 | 63/157: Converting Node Type Relu
 73 | 64/157: Converting Node Type Conv
 74 | 65/157: Converting Node Type BatchNormalization
 75 | 66/157: Converting Node Type Relu
 76 | 67/157: Converting Node Type Conv
 77 | 68/157: Converting Node Type Pad
 78 | node.name:  246 [0, 0, 0, 0, 0, 0, 0, 0]
 79 | 69/157: Converting Node Type AveragePool
 80 | 70/157: Converting Node Type BatchNormalization
 81 | 71/157: Converting Node Type Relu
 82 | 72/157: Converting Node Type Conv
 83 | 73/157: Converting Node Type BatchNormalization
 84 | 74/157: Converting Node Type Relu
 85 | 75/157: Converting Node Type Conv
 86 | 76/157: Converting Node Type Pad
 87 | node.name:  254 [0, 0, 0, 0, 0, 0, 0, 0]
 88 | 77/157: Converting Node Type AveragePool
 89 | 78/157: Converting Node Type BatchNormalization
 90 | 79/157: Converting Node Type Relu
 91 | 80/157: Converting Node Type Conv
 92 | 81/157: Converting Node Type BatchNormalization
 93 | 82/157: Converting Node Type Relu
 94 | 83/157: Converting Node Type Conv
 95 | 84/157: Converting Node Type Pad
 96 | node.name:  262 [0, 0, 0, 0, 0, 0, 0, 0]
 97 | 85/157: Converting Node Type AveragePool
 98 | 86/157: Converting Node Type BatchNormalization
 99 | 87/157: Converting Node Type Relu
100 | 88/157: Converting Node Type Conv
101 | 89/157: Converting Node Type BatchNormalization
102 | 90/157: Converting Node Type Relu
103 | 91/157: Converting Node Type Conv
104 | 92/157: Converting Node Type Pad
105 | node.name:  270 [0, 0, 0, 0, 0, 0, 0, 0]
106 | 93/157: Converting Node Type AveragePool
107 | 94/157: Converting Node Type BatchNormalization
108 | 95/157: Converting Node Type Relu
109 | 96/157: Converting Node Type Conv
110 | 97/157: Converting Node Type BatchNormalization
111 | 98/157: Converting Node Type Relu
112 | 99/157: Converting Node Type Conv
113 | 100/157: Converting Node Type Pad
114 | node.name:  278 [0, 0, 0, 0, 0, 0, 0, 0]
115 | 101/157: Converting Node Type AveragePool
116 | 102/157: Converting Node Type BatchNormalization
117 | 103/157: Converting Node Type Relu
118 | 104/157: Converting Node Type Conv
119 | 105/157: Converting Node Type BatchNormalization
120 | 106/157: Converting Node Type Relu
121 | 107/157: Converting Node Type Conv
122 | 108/157: Converting Node Type Pad
123 | node.name:  286 [0, 0, 0, 0, 0, 0, 0, 0]
124 | 109/157: Converting Node Type AveragePool
125 | 110/157: Converting Node Type BatchNormalization
126 | 111/157: Converting Node Type Relu
127 | 112/157: Converting Node Type Conv
128 | 113/157: Converting Node Type BatchNormalization
129 | 114/157: Converting Node Type Relu
130 | 115/157: Converting Node Type Conv
131 | 116/157: Converting Node Type Pad
132 | node.name:  294 [0, 0, 0, 0, 0, 0, 0, 0]
133 | 117/157: Converting Node Type AveragePool
134 | 118/157: Converting Node Type BatchNormalization
135 | 119/157: Converting Node Type Relu
136 | 120/157: Converting Node Type Conv
137 | 121/157: Converting Node Type BatchNormalization
138 | 122/157: Converting Node Type Relu
139 | 123/157: Converting Node Type Conv
140 | 124/157: Converting Node Type Pad
141 | node.name:  302 [0, 0, 0, 0, 0, 0, 0, 0]
142 | 125/157: Converting Node Type AveragePool
143 | 126/157: Converting Node Type BatchNormalization
144 | 127/157: Converting Node Type Relu
145 | 128/157: Converting Node Type Conv
146 | 129/157: Converting Node Type BatchNormalization
147 | 130/157: Converting Node Type Relu
148 | 131/157: Converting Node Type ReduceMean
149 | 132/157: Converting Node Type MaxPool
150 | 133/157: Converting Node Type Pad
151 | node.name:  311 [0, 0, 1, 0, 0, 1]
152 | 134/157: Converting Node Type AveragePool
153 | 135/157: Converting Node Type Add
154 | 136/157: Converting Node Type Transpose
155 | 137/157: Converting Node Type MatMul
156 | 138/157: Converting Node Type Add
157 | 139/157: Converting Node Type Relu
158 | 140/157: Converting Node Type MatMul
159 | 141/157: Converting Node Type Add
160 | 142/157: Converting Node Type Sigmoid
161 | 143/157: Converting Node Type ReduceMax
162 | 144/157: Converting Node Type Shape
163 | 145/157: Converting Node Type Gather
164 | 146/157: Converting Node Type Shape
165 | 147/157: Converting Node Type Gather
166 | 148/157: Converting Node Type Shape
167 | 149/157: Converting Node Type Gather
168 | 150/157: Converting Node Type Unsqueeze
169 | 151/157: Converting Node Type Tile
170 | 152/157: Converting Node Type Mul
171 | 153/157: Converting Node Type Unsqueeze
172 | 154/157: Converting Node Type Unsqueeze
173 | 155/157: Converting Node Type Unsqueeze
174 | 156/157: Converting Node Type Concat
175 | 157/157: Converting Node Type Reshape
176 | Translation to CoreML spec completed. Now compiling the CoreML model.
177 | Model Compilation done.
178 | out:  (1, 527)
179 | out:  (1, 32, 527)
180 | out:  (1, 1, 41, 64)


--------------------------------------------------------------------------------
/python/export.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import torch
  4 | from torch import nn
  5 | import torch.nn.functional as F
  6 | 
  7 | import librosa
  8 | #import onnx
  9 | import onnx_coreml
 10 | #import onnxruntime
 11 | import coremltools
 12 | 
 13 | sys.path.insert(1, os.path.join(sys.path[0], 'audioset_tagging_cnn/pytorch'))
 14 | 
 15 | from audioset_tagging_cnn.pytorch.pytorch_utils import interpolate, pad_framewise_output
 16 | 
 17 | from audioset_tagging_cnn.pytorch.models import MobileNetV1
 18 | 
 19 |         
 20 | class MobileNetV1Export(MobileNetV1):
 21 |     def __init__(self, *args, **kwargs):
 22 |         
 23 |         super(MobileNetV1Export, self).__init__(*args, **kwargs)
 24 |         self.interpolate_ratio = 32
 25 | 
 26 |         self.input_name = 'input.1'
 27 |         self.output_names = ['clip_output', 'frame_output', 'melspec' ]
 28 | 
 29 |         
 30 |     def forward(self, x, mixup_lambda=None):
 31 |         x = self.spectrogram_extractor(x)   # (batch_size, 1, time_steps, freq_bins)
 32 |         x = self.logmel_extractor(x)    # (batch_size, 1, time_steps, mel_bins)
 33 |         melspec = x
 34 | 
 35 |         # frames_num = x.shape[2]
 36 | 
 37 |         x = x.transpose(1, 3)
 38 |         x = self.bn0(x)
 39 |         x = x.transpose(1, 3)
 40 |         
 41 |         x = self.features(x)
 42 | 
 43 |         x = torch.mean(x, dim=3)
 44 | 
 45 |         x1 = F.max_pool1d(x, kernel_size=3, stride=1, padding=1)
 46 |         x2 = F.avg_pool1d(x, kernel_size=3, stride=1, padding=1)
 47 |         x = x1 + x2
 48 | 
 49 |         x = F.dropout(x, p=0.5, training=self.training)
 50 |         x = x.transpose(1, 2)
 51 |         x = F.relu_(self.fc1(x))
 52 |         x = F.dropout(x, p=0.5, training=self.training)
 53 |         segmentwise_output = torch.sigmoid(self.fc_audioset(x))
 54 |         (clipwise_output, _) = torch.max(segmentwise_output, dim=1)
 55 | 
 56 |         # Get framewise output
 57 |         framewise_output = interpolate(segmentwise_output, self.interpolate_ratio)
 58 |         # DISABLED framewise_output = pad_framewise_output(framewise_output, frames_num)
 59 | 
 60 |         return clipwise_output, framewise_output, melspec
 61 | 
 62 |     def gen_torch_output( self, sample_input ):
 63 |         # Forward
 64 |         with torch.no_grad():
 65 |             raw_outputs = self( torch.from_numpy( sample_input ) )
 66 |             torch_outputs = [ item.cpu().detach().numpy() for item in raw_outputs ]
 67 |         
 68 |         for output in torch_outputs:
 69 |             print( 'out: ', output.shape )
 70 | 
 71 |         return torch_outputs
 72 | 
 73 |     def convert_to_onnx( self, filename_onnx, sample_input ):
 74 | 
 75 |         input_names = [ self.input_name ]
 76 |         output_names = self.output_names
 77 |         
 78 |         torch.onnx.export(
 79 |             self,
 80 |             torch.from_numpy( sample_input ),
 81 |             filename_onnx,
 82 |             input_names=input_names,
 83 |             output_names=output_names,
 84 |             verbose=False,
 85 |             # operator_export_type=OperatorExportTypes.ONNX
 86 |         )
 87 | 
 88 |     def gen_onnx_outputs( self, filename_onnx, sample_input ):
 89 |         import onnxruntime
 90 |         
 91 |         session = onnxruntime.InferenceSession( filename_onnx, None)
 92 |         
 93 |         input_name = session.get_inputs()[0].name
 94 | 
 95 |         raw_results = session.run([], {input_name: sample_input})
 96 | 
 97 |         return raw_results[0]
 98 |         
 99 |     def convert_to_coreml( self, fn_mlmodel, sample_input ):
100 | 
101 |         # first convert to ONNX
102 |         filename_onnx = '/tmp/PANN_model.onnx'
103 |         self.convert_to_onnx( filename_onnx, sample_input )
104 | 
105 |         # onnx_outputs = self.gen_onnx_outputs( filename_onnx, sample_input )
106 |         
107 |         # set up for Core ML export
108 |         convert_params = dict(
109 |             predicted_feature_name = [],
110 |             minimum_ios_deployment_target='13',
111 |             custom_conversion_functions={'Pad':_convert_pad},
112 |         )
113 | 
114 |         mlmodel = onnx_coreml.convert(
115 |             model=filename_onnx,
116 |             **convert_params, 
117 |         )
118 | 
119 |         # print(mlmodel._spec.description)
120 | 
121 |         # assert mlmodel != None, 'CoreML Conversion failed'
122 | 
123 |         mlmodel.save( fn_mlmodel )
124 | 
125 |         torch_output = self.gen_torch_output( sample_input )
126 | 
127 |         return torch_output
128 | 
129 |         """
130 |         model_inputs = {
131 |             self.input_name : sample_input
132 |         }
133 |         # do forward pass
134 |         mlmodel_outputs = mlmodel.predict(model_inputs, useCPUOnly=True)
135 | 
136 |         # fetch the spectrogram from output dictionary
137 |         mlmodel_output =  mlmodel_outputs[ self.output_names[0] ]
138 |         # print( 'mlmodel_output: shape %s \nsample %s ' % ( mlmodel_output.shape, mlmodel_output[:,:,:3, :3] ) )
139 |         print( 'mlmodel_output: shape ', ( mlmodel_output.shape ) )
140 |         
141 |         # mlmodel = coremltools.models.MLModel( fn_mlmodel )
142 |         # _ = coremltools.models.MLModel( mlmodel._spec )
143 |         """
144 | 
145 | 
146 | def _convert_pad(builder, node, graph, err):
147 |     from onnx_coreml._operators import _convert_pad as _convert_pad_orig
148 | 
149 |     pads = node.attrs['pads']
150 |     print( 'node.name: ', node.name, pads )
151 |     
152 |     if node.name != '311': # hardcoded..
153 |         _convert_pad_orig( builder, node, graph, err )
154 | 
155 |     else:
156 | 
157 |         params_dict = {}
158 |         params_dict['left'] = pads[2] # padding left
159 |         params_dict['right'] = pads[5] # padding right
160 |         params_dict['top'] = 0
161 |         params_dict['bottom'] = 0
162 |         params_dict['value'] = 0.0
163 |         params_dict['padding_type'] = 'constant'
164 | 
165 |         builder.add_padding(
166 |             name=node.name,
167 |             input_name=node.inputs[0],
168 |             output_name=node.outputs[0],
169 |             **params_dict,
170 |         )
171 |         
172 | 
173 | 
174 | def save_class_label_json( fn_json ):
175 |     import csv, json
176 |     
177 |     with open('python/audioset_tagging_cnn/metadata/class_labels_indices.csv', 'r') as f:
178 |         reader = csv.reader(f, delimiter=',')
179 |         lines = list(reader)
180 | 
181 |         labels = []
182 |         for i1 in range(1, len(lines)):
183 |             id = lines[i1][1]
184 |             label = lines[i1][2]
185 |             labels.append(label)
186 | 
187 |     with open( fn_json, 'w' ) as ofp:
188 |         json.dump( labels, ofp )
189 |         
190 | def export_model( fn_mlmodel, fn_json, fn_label_json, checkpoint_path, audio_path ):
191 | 
192 |     model_args = {
193 |         'sample_rate': 32000,
194 |         'window_size': 1024,
195 |         'hop_size': 320,
196 |         'mel_bins': 64,
197 |         'fmin': 50,
198 |         'fmax': 14000,
199 |         'classes_num': 527
200 |     }
201 |     model = MobileNetV1Export(**model_args)
202 | 
203 |     checkpoint = torch.load(checkpoint_path, map_location='cpu')
204 |     model.load_state_dict(checkpoint['model'])
205 |     model.eval()
206 | 
207 |     # Load audio
208 |     sample_rate = model_args['sample_rate']
209 |     (waveform, _) = librosa.core.load(audio_path, sr=sample_rate, mono=True)
210 |     
211 |     sample_input = waveform[None, :sample_rate]    # (1, audio_length)
212 |     print( 'waveform: ', sample_input.shape ) # waveform:  torch.Size([1, 224000])
213 | 
214 |     model_outputs = model.convert_to_coreml( fn_mlmodel, sample_input )
215 | 
216 |     save_model_output_as_json( fn_json, model_outputs )
217 | 
218 |     save_class_label_json( fn_label_json )
219 | 
220 | def save_model_output_as_json( fn_output, model_outputs ):
221 |     import json
222 |     output_data = [
223 |         model_outputs[0][0,:].tolist(), # clipwise
224 |         model_outputs[1][0,:].tolist(), # framewise
225 |         model_outputs[2][0,0,:].tolist(), # melspec
226 |     ]
227 |     with open( fn_output, 'w' ) as fp:
228 |         json.dump( output_data, fp )
229 | 
230 |     
231 | if __name__ == '__main__':
232 |     import sys
233 |     checkpoint_path = sys.argv[1]
234 |     audio_path = sys.argv[2]
235 | 
236 |     fn_mlmodel = '/tmp/PANN.mlmodel'
237 |     fn_json = '/tmp/PANN_out.ring_hello.json'
238 |     fn_label_json = '/tmp/PANN_labels.json'
239 | 
240 |     export_model( fn_mlmodel, fn_json, fn_label_json, checkpoint_path, audio_path )
241 | 
242 | # with CoreML convert error but still usable
243 | # python3 python/export.py 'python/MobileNetV1_mAP=0.389.pth' python/audioset_tagging_cnn/examples/R9_ZSCveAHg_7s.wav
244 | # without convert error, used in final version
245 | # python3 python/export.py 'python/MobileNetV1_mAP=0.389.pth' /tmp/ring_hello.wav
246 | # xcrun coremlc compile /tmp/PANN.mlmodel  /tmp/mlc_output
247 | 
248 | 
249 | '''
250 | import soundfile as sf
251 | 
252 | fn_wav = 'python/audioset_tagging_cnn/examples/R9_ZSCveAHg_7s.wav'
253 | 
254 | waveform, samplerate = sf.read( fn_wav )
255 | # samplerate is 32000
256 | num_samples = 12800
257 | sample_input = waveform[ samplerate*2:samplerate*2+num_samples ] # sec 2 to 3
258 | 
259 | sf.write( '/tmp/ring_hello.wav', sample_input, samplerate )
260 | '''
261 | 


--------------------------------------------------------------------------------
/python/requirements.txt:
--------------------------------------------------------------------------------
 1 | matplotlib==3.0.3
 2 | soundfile==0.10.3.post1
 3 | librosa==0.6.3
 4 | torch==1.0.1.post2
 5 | # torchlibrosa
 6 | -e git://github.com/ml-illustrated/torchlibrosa.git#egg=torchlibrosa
 7 | onnx==1.5.0
 8 | onnx-coreml==1.3
 9 | onnxruntime==1.3.0
10 | coremltools==3.4
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------