├── LICENSE ├── MetalDetector.xcodeproj ├── project.pbxproj ├── project.xcworkspace │ └── contents.xcworkspacedata └── xcuserdata │ └── krasin.xcuserdatad │ └── xcschemes │ ├── MetalDetector.xcscheme │ └── xcschememanagement.plist ├── MetalDetector ├── AppDelegate.swift ├── Assets.xcassets │ ├── AppIcon.appiconset │ │ └── Contents.json │ ├── Contents.json │ └── cat.imageset │ │ ├── Contents.json │ │ └── cat.png ├── Base.lproj │ ├── LaunchScreen.storyboard │ └── Main.storyboard ├── Engine.metal ├── Engine.swift ├── GoogLeNet.data ├── GoogLeNet.gen.metal ├── GoogLeNet.gen.swift ├── GoogLeNetProfile.swift ├── Info.plist ├── Net.swift ├── ViewController.swift └── synset_words.txt ├── MetalDetectorTests ├── Info.plist └── MetalDetectorTests.swift └── README.md /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, Ivan Krasin 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 7 | 8 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 9 | 10 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /MetalDetector.xcodeproj/project.pbxproj: -------------------------------------------------------------------------------- 1 | // !$*UTF8*$! 2 | { 3 | archiveVersion = 1; 4 | classes = { 5 | }; 6 | objectVersion = 46; 7 | objects = { 8 | 9 | /* Begin PBXBuildFile section */ 10 | BB03AB4E1BE0638900FBE9B6 /* GoogLeNet.data in Resources */ = {isa = PBXBuildFile; fileRef = BB03AB4D1BE0638900FBE9B6 /* GoogLeNet.data */; }; 11 | BB03AB501BE0943900FBE9B6 /* synset_words.txt in Resources */ = {isa = PBXBuildFile; fileRef = BB03AB4F1BE0943900FBE9B6 /* synset_words.txt */; }; 12 | BB3D671E1BD0691B00FE349F /* GoogLeNet.gen.metal in Sources */ = {isa = PBXBuildFile; fileRef = BB3D671D1BD0691B00FE349F /* GoogLeNet.gen.metal */; }; 13 | BB3D67201BD4B45100FE349F /* GoogLeNet.gen.swift in Sources */ = {isa = PBXBuildFile; fileRef = BB3D671F1BD4B45100FE349F /* GoogLeNet.gen.swift */; }; 14 | BB7F5B551BE55AA20054592E /* GoogLeNetProfile.swift in Sources */ = {isa = PBXBuildFile; fileRef = BB7F5B541BE55AA20054592E /* GoogLeNetProfile.swift */; }; 15 | BBAE5A6E1BBE46CE00AD54A1 /* MetalDetectorTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = BBAE5A6D1BBE46CE00AD54A1 /* MetalDetectorTests.swift */; }; 16 | BBAE5A761BC6DC4B00AD54A1 /* Net.swift in Sources */ = {isa = PBXBuildFile; fileRef = BBAE5A751BC6DC4B00AD54A1 /* Net.swift */; }; 17 | BBAE5A781BC7794700AD54A1 /* Engine.swift in Sources */ = {isa = PBXBuildFile; fileRef = BBAE5A771BC7794700AD54A1 /* Engine.swift */; }; 18 | BBB6212C1BB884CC009620C4 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = BBB6212B1BB884CC009620C4 /* AppDelegate.swift */; }; 19 | BBB6212E1BB884CC009620C4 /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = BBB6212D1BB884CC009620C4 /* ViewController.swift */; }; 20 | BBB621311BB884CC009620C4 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = BBB6212F1BB884CC009620C4 /* Main.storyboard */; }; 21 | BBB621331BB884CC009620C4 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = BBB621321BB884CC009620C4 /* Assets.xcassets */; }; 22 | BBB621361BB884CC009620C4 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = BBB621341BB884CC009620C4 /* LaunchScreen.storyboard */; }; 23 | BBFDDBCC1BBCFA94005C21CF /* Engine.metal in Sources */ = {isa = PBXBuildFile; fileRef = BBFDDBCB1BBCFA94005C21CF /* Engine.metal */; }; 24 | /* End PBXBuildFile section */ 25 | 26 | /* Begin PBXContainerItemProxy section */ 27 | BBAE5A701BBE46CE00AD54A1 /* PBXContainerItemProxy */ = { 28 | isa = PBXContainerItemProxy; 29 | containerPortal = BBB621201BB884CC009620C4 /* Project object */; 30 | proxyType = 1; 31 | remoteGlobalIDString = BBB621271BB884CC009620C4; 32 | remoteInfo = MetalDetector; 33 | }; 34 | /* End PBXContainerItemProxy section */ 35 | 36 | /* Begin PBXFileReference section */ 37 | BB03AB4D1BE0638900FBE9B6 /* GoogLeNet.data */ = {isa = PBXFileReference; lastKnownFileType = file; path = GoogLeNet.data; sourceTree = ""; }; 38 | BB03AB4F1BE0943900FBE9B6 /* synset_words.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = synset_words.txt; sourceTree = ""; }; 39 | BB3D671D1BD0691B00FE349F /* GoogLeNet.gen.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = GoogLeNet.gen.metal; sourceTree = ""; }; 40 | BB3D671F1BD4B45100FE349F /* GoogLeNet.gen.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = GoogLeNet.gen.swift; sourceTree = ""; }; 41 | BB7F5B541BE55AA20054592E /* GoogLeNetProfile.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = GoogLeNetProfile.swift; sourceTree = ""; }; 42 | BBAE5A6B1BBE46CE00AD54A1 /* MetalDetectorTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = MetalDetectorTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; 43 | BBAE5A6D1BBE46CE00AD54A1 /* MetalDetectorTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MetalDetectorTests.swift; sourceTree = ""; }; 44 | BBAE5A6F1BBE46CE00AD54A1 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; 45 | BBAE5A751BC6DC4B00AD54A1 /* Net.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Net.swift; sourceTree = ""; }; 46 | BBAE5A771BC7794700AD54A1 /* Engine.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Engine.swift; sourceTree = ""; }; 47 | BBB621281BB884CC009620C4 /* MetalDetector.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = MetalDetector.app; sourceTree = BUILT_PRODUCTS_DIR; }; 48 | BBB6212B1BB884CC009620C4 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = ""; }; 49 | BBB6212D1BB884CC009620C4 /* ViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ViewController.swift; sourceTree = ""; }; 50 | BBB621301BB884CC009620C4 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = ""; }; 51 | BBB621321BB884CC009620C4 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; 52 | BBB621351BB884CC009620C4 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = ""; }; 53 | BBB621371BB884CC009620C4 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; 54 | BBFDDBCB1BBCFA94005C21CF /* Engine.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Engine.metal; sourceTree = ""; }; 55 | /* End PBXFileReference section */ 56 | 57 | /* Begin PBXFrameworksBuildPhase section */ 58 | BBAE5A681BBE46CE00AD54A1 /* Frameworks */ = { 59 | isa = PBXFrameworksBuildPhase; 60 | buildActionMask = 2147483647; 61 | files = ( 62 | ); 63 | runOnlyForDeploymentPostprocessing = 0; 64 | }; 65 | BBB621251BB884CC009620C4 /* Frameworks */ = { 66 | isa = PBXFrameworksBuildPhase; 67 | buildActionMask = 2147483647; 68 | files = ( 69 | ); 70 | runOnlyForDeploymentPostprocessing = 0; 71 | }; 72 | /* End PBXFrameworksBuildPhase section */ 73 | 74 | /* Begin PBXGroup section */ 75 | BBAE5A6C1BBE46CE00AD54A1 /* MetalDetectorTests */ = { 76 | isa = PBXGroup; 77 | children = ( 78 | BBAE5A6D1BBE46CE00AD54A1 /* MetalDetectorTests.swift */, 79 | BBAE5A6F1BBE46CE00AD54A1 /* Info.plist */, 80 | ); 81 | path = MetalDetectorTests; 82 | sourceTree = ""; 83 | }; 84 | BBB6211F1BB884CC009620C4 = { 85 | isa = PBXGroup; 86 | children = ( 87 | BBB6212A1BB884CC009620C4 /* MetalDetector */, 88 | BBAE5A6C1BBE46CE00AD54A1 /* MetalDetectorTests */, 89 | BBB621291BB884CC009620C4 /* Products */, 90 | ); 91 | sourceTree = ""; 92 | }; 93 | BBB621291BB884CC009620C4 /* Products */ = { 94 | isa = PBXGroup; 95 | children = ( 96 | BBB621281BB884CC009620C4 /* MetalDetector.app */, 97 | BBAE5A6B1BBE46CE00AD54A1 /* MetalDetectorTests.xctest */, 98 | ); 99 | name = Products; 100 | sourceTree = ""; 101 | }; 102 | BBB6212A1BB884CC009620C4 /* MetalDetector */ = { 103 | isa = PBXGroup; 104 | children = ( 105 | BB03AB4F1BE0943900FBE9B6 /* synset_words.txt */, 106 | BB03AB4D1BE0638900FBE9B6 /* GoogLeNet.data */, 107 | BB3D671F1BD4B45100FE349F /* GoogLeNet.gen.swift */, 108 | BB3D671D1BD0691B00FE349F /* GoogLeNet.gen.metal */, 109 | BBB6212B1BB884CC009620C4 /* AppDelegate.swift */, 110 | BBB6212D1BB884CC009620C4 /* ViewController.swift */, 111 | BBB6212F1BB884CC009620C4 /* Main.storyboard */, 112 | BBB621321BB884CC009620C4 /* Assets.xcassets */, 113 | BBB621341BB884CC009620C4 /* LaunchScreen.storyboard */, 114 | BBB621371BB884CC009620C4 /* Info.plist */, 115 | BBFDDBCB1BBCFA94005C21CF /* Engine.metal */, 116 | BBAE5A751BC6DC4B00AD54A1 /* Net.swift */, 117 | BBAE5A771BC7794700AD54A1 /* Engine.swift */, 118 | BB7F5B541BE55AA20054592E /* GoogLeNetProfile.swift */, 119 | ); 120 | path = MetalDetector; 121 | sourceTree = ""; 122 | }; 123 | /* End PBXGroup section */ 124 | 125 | /* Begin PBXNativeTarget section */ 126 | BBAE5A6A1BBE46CE00AD54A1 /* MetalDetectorTests */ = { 127 | isa = PBXNativeTarget; 128 | buildConfigurationList = BBAE5A741BBE46CE00AD54A1 /* Build configuration list for PBXNativeTarget "MetalDetectorTests" */; 129 | buildPhases = ( 130 | BBAE5A671BBE46CE00AD54A1 /* Sources */, 131 | BBAE5A681BBE46CE00AD54A1 /* Frameworks */, 132 | BBAE5A691BBE46CE00AD54A1 /* Resources */, 133 | ); 134 | buildRules = ( 135 | ); 136 | dependencies = ( 137 | BBAE5A711BBE46CE00AD54A1 /* PBXTargetDependency */, 138 | ); 139 | name = MetalDetectorTests; 140 | productName = MetalDetectorTests; 141 | productReference = BBAE5A6B1BBE46CE00AD54A1 /* MetalDetectorTests.xctest */; 142 | productType = "com.apple.product-type.bundle.unit-test"; 143 | }; 144 | BBB621271BB884CC009620C4 /* MetalDetector */ = { 145 | isa = PBXNativeTarget; 146 | buildConfigurationList = BBB6213A1BB884CC009620C4 /* Build configuration list for PBXNativeTarget "MetalDetector" */; 147 | buildPhases = ( 148 | BBB621241BB884CC009620C4 /* Sources */, 149 | BBB621251BB884CC009620C4 /* Frameworks */, 150 | BBB621261BB884CC009620C4 /* Resources */, 151 | ); 152 | buildRules = ( 153 | ); 154 | dependencies = ( 155 | ); 156 | name = MetalDetector; 157 | productName = MetalDetector; 158 | productReference = BBB621281BB884CC009620C4 /* MetalDetector.app */; 159 | productType = "com.apple.product-type.application"; 160 | }; 161 | /* End PBXNativeTarget section */ 162 | 163 | /* Begin PBXProject section */ 164 | BBB621201BB884CC009620C4 /* Project object */ = { 165 | isa = PBXProject; 166 | attributes = { 167 | LastUpgradeCheck = 0700; 168 | ORGANIZATIONNAME = "Ivan Krasin"; 169 | TargetAttributes = { 170 | BBAE5A6A1BBE46CE00AD54A1 = { 171 | CreatedOnToolsVersion = 7.0.1; 172 | TestTargetID = BBB621271BB884CC009620C4; 173 | }; 174 | BBB621271BB884CC009620C4 = { 175 | CreatedOnToolsVersion = 7.0; 176 | }; 177 | }; 178 | }; 179 | buildConfigurationList = BBB621231BB884CC009620C4 /* Build configuration list for PBXProject "MetalDetector" */; 180 | compatibilityVersion = "Xcode 3.2"; 181 | developmentRegion = English; 182 | hasScannedForEncodings = 0; 183 | knownRegions = ( 184 | en, 185 | Base, 186 | ); 187 | mainGroup = BBB6211F1BB884CC009620C4; 188 | productRefGroup = BBB621291BB884CC009620C4 /* Products */; 189 | projectDirPath = ""; 190 | projectRoot = ""; 191 | targets = ( 192 | BBB621271BB884CC009620C4 /* MetalDetector */, 193 | BBAE5A6A1BBE46CE00AD54A1 /* MetalDetectorTests */, 194 | ); 195 | }; 196 | /* End PBXProject section */ 197 | 198 | /* Begin PBXResourcesBuildPhase section */ 199 | BBAE5A691BBE46CE00AD54A1 /* Resources */ = { 200 | isa = PBXResourcesBuildPhase; 201 | buildActionMask = 2147483647; 202 | files = ( 203 | ); 204 | runOnlyForDeploymentPostprocessing = 0; 205 | }; 206 | BBB621261BB884CC009620C4 /* Resources */ = { 207 | isa = PBXResourcesBuildPhase; 208 | buildActionMask = 2147483647; 209 | files = ( 210 | BBB621361BB884CC009620C4 /* LaunchScreen.storyboard in Resources */, 211 | BB03AB501BE0943900FBE9B6 /* synset_words.txt in Resources */, 212 | BBB621331BB884CC009620C4 /* Assets.xcassets in Resources */, 213 | BB03AB4E1BE0638900FBE9B6 /* GoogLeNet.data in Resources */, 214 | BBB621311BB884CC009620C4 /* Main.storyboard in Resources */, 215 | ); 216 | runOnlyForDeploymentPostprocessing = 0; 217 | }; 218 | /* End PBXResourcesBuildPhase section */ 219 | 220 | /* Begin PBXSourcesBuildPhase section */ 221 | BBAE5A671BBE46CE00AD54A1 /* Sources */ = { 222 | isa = PBXSourcesBuildPhase; 223 | buildActionMask = 2147483647; 224 | files = ( 225 | BBAE5A6E1BBE46CE00AD54A1 /* MetalDetectorTests.swift in Sources */, 226 | ); 227 | runOnlyForDeploymentPostprocessing = 0; 228 | }; 229 | BBB621241BB884CC009620C4 /* Sources */ = { 230 | isa = PBXSourcesBuildPhase; 231 | buildActionMask = 2147483647; 232 | files = ( 233 | BBB6212E1BB884CC009620C4 /* ViewController.swift in Sources */, 234 | BBB6212C1BB884CC009620C4 /* AppDelegate.swift in Sources */, 235 | BBFDDBCC1BBCFA94005C21CF /* Engine.metal in Sources */, 236 | BBAE5A781BC7794700AD54A1 /* Engine.swift in Sources */, 237 | BB3D671E1BD0691B00FE349F /* GoogLeNet.gen.metal in Sources */, 238 | BB3D67201BD4B45100FE349F /* GoogLeNet.gen.swift in Sources */, 239 | BB7F5B551BE55AA20054592E /* GoogLeNetProfile.swift in Sources */, 240 | BBAE5A761BC6DC4B00AD54A1 /* Net.swift in Sources */, 241 | ); 242 | runOnlyForDeploymentPostprocessing = 0; 243 | }; 244 | /* End PBXSourcesBuildPhase section */ 245 | 246 | /* Begin PBXTargetDependency section */ 247 | BBAE5A711BBE46CE00AD54A1 /* PBXTargetDependency */ = { 248 | isa = PBXTargetDependency; 249 | target = BBB621271BB884CC009620C4 /* MetalDetector */; 250 | targetProxy = BBAE5A701BBE46CE00AD54A1 /* PBXContainerItemProxy */; 251 | }; 252 | /* End PBXTargetDependency section */ 253 | 254 | /* Begin PBXVariantGroup section */ 255 | BBB6212F1BB884CC009620C4 /* Main.storyboard */ = { 256 | isa = PBXVariantGroup; 257 | children = ( 258 | BBB621301BB884CC009620C4 /* Base */, 259 | ); 260 | name = Main.storyboard; 261 | sourceTree = ""; 262 | }; 263 | BBB621341BB884CC009620C4 /* LaunchScreen.storyboard */ = { 264 | isa = PBXVariantGroup; 265 | children = ( 266 | BBB621351BB884CC009620C4 /* Base */, 267 | ); 268 | name = LaunchScreen.storyboard; 269 | sourceTree = ""; 270 | }; 271 | /* End PBXVariantGroup section */ 272 | 273 | /* Begin XCBuildConfiguration section */ 274 | BBAE5A721BBE46CE00AD54A1 /* Debug */ = { 275 | isa = XCBuildConfiguration; 276 | buildSettings = { 277 | BUNDLE_LOADER = "$(TEST_HOST)"; 278 | INFOPLIST_FILE = MetalDetectorTests/Info.plist; 279 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks @loader_path/Frameworks"; 280 | PRODUCT_BUNDLE_IDENTIFIER = samofly.MetalDetectorTests; 281 | PRODUCT_NAME = "$(TARGET_NAME)"; 282 | TEST_HOST = "$(BUILT_PRODUCTS_DIR)/MetalDetector.app/MetalDetector"; 283 | }; 284 | name = Debug; 285 | }; 286 | BBAE5A731BBE46CE00AD54A1 /* Release */ = { 287 | isa = XCBuildConfiguration; 288 | buildSettings = { 289 | BUNDLE_LOADER = "$(TEST_HOST)"; 290 | INFOPLIST_FILE = MetalDetectorTests/Info.plist; 291 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks @loader_path/Frameworks"; 292 | PRODUCT_BUNDLE_IDENTIFIER = samofly.MetalDetectorTests; 293 | PRODUCT_NAME = "$(TARGET_NAME)"; 294 | TEST_HOST = "$(BUILT_PRODUCTS_DIR)/MetalDetector.app/MetalDetector"; 295 | }; 296 | name = Release; 297 | }; 298 | BBB621381BB884CC009620C4 /* Debug */ = { 299 | isa = XCBuildConfiguration; 300 | buildSettings = { 301 | ALWAYS_SEARCH_USER_PATHS = NO; 302 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; 303 | CLANG_CXX_LIBRARY = "libc++"; 304 | CLANG_ENABLE_MODULES = YES; 305 | CLANG_ENABLE_OBJC_ARC = YES; 306 | CLANG_WARN_BOOL_CONVERSION = YES; 307 | CLANG_WARN_CONSTANT_CONVERSION = YES; 308 | CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; 309 | CLANG_WARN_EMPTY_BODY = YES; 310 | CLANG_WARN_ENUM_CONVERSION = YES; 311 | CLANG_WARN_INT_CONVERSION = YES; 312 | CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; 313 | CLANG_WARN_UNREACHABLE_CODE = YES; 314 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 315 | "CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer"; 316 | COPY_PHASE_STRIP = NO; 317 | DEBUG_INFORMATION_FORMAT = dwarf; 318 | ENABLE_STRICT_OBJC_MSGSEND = YES; 319 | ENABLE_TESTABILITY = YES; 320 | GCC_C_LANGUAGE_STANDARD = gnu99; 321 | GCC_DYNAMIC_NO_PIC = NO; 322 | GCC_NO_COMMON_BLOCKS = YES; 323 | GCC_OPTIMIZATION_LEVEL = 0; 324 | GCC_PREPROCESSOR_DEFINITIONS = ( 325 | "DEBUG=1", 326 | "$(inherited)", 327 | ); 328 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES; 329 | GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; 330 | GCC_WARN_UNDECLARED_SELECTOR = YES; 331 | GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; 332 | GCC_WARN_UNUSED_FUNCTION = YES; 333 | GCC_WARN_UNUSED_VARIABLE = YES; 334 | IPHONEOS_DEPLOYMENT_TARGET = 9.0; 335 | MTL_ENABLE_DEBUG_INFO = YES; 336 | ONLY_ACTIVE_ARCH = YES; 337 | SDKROOT = iphoneos; 338 | SWIFT_OPTIMIZATION_LEVEL = "-Onone"; 339 | TARGETED_DEVICE_FAMILY = "1,2"; 340 | }; 341 | name = Debug; 342 | }; 343 | BBB621391BB884CC009620C4 /* Release */ = { 344 | isa = XCBuildConfiguration; 345 | buildSettings = { 346 | ALWAYS_SEARCH_USER_PATHS = NO; 347 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; 348 | CLANG_CXX_LIBRARY = "libc++"; 349 | CLANG_ENABLE_MODULES = YES; 350 | CLANG_ENABLE_OBJC_ARC = YES; 351 | CLANG_WARN_BOOL_CONVERSION = YES; 352 | CLANG_WARN_CONSTANT_CONVERSION = YES; 353 | CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; 354 | CLANG_WARN_EMPTY_BODY = YES; 355 | CLANG_WARN_ENUM_CONVERSION = YES; 356 | CLANG_WARN_INT_CONVERSION = YES; 357 | CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; 358 | CLANG_WARN_UNREACHABLE_CODE = YES; 359 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 360 | "CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer"; 361 | COPY_PHASE_STRIP = NO; 362 | DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; 363 | ENABLE_NS_ASSERTIONS = NO; 364 | ENABLE_STRICT_OBJC_MSGSEND = YES; 365 | GCC_C_LANGUAGE_STANDARD = gnu99; 366 | GCC_NO_COMMON_BLOCKS = YES; 367 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES; 368 | GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; 369 | GCC_WARN_UNDECLARED_SELECTOR = YES; 370 | GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; 371 | GCC_WARN_UNUSED_FUNCTION = YES; 372 | GCC_WARN_UNUSED_VARIABLE = YES; 373 | IPHONEOS_DEPLOYMENT_TARGET = 9.0; 374 | MTL_ENABLE_DEBUG_INFO = NO; 375 | SDKROOT = iphoneos; 376 | TARGETED_DEVICE_FAMILY = "1,2"; 377 | VALIDATE_PRODUCT = YES; 378 | }; 379 | name = Release; 380 | }; 381 | BBB6213B1BB884CC009620C4 /* Debug */ = { 382 | isa = XCBuildConfiguration; 383 | buildSettings = { 384 | ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; 385 | DEFINES_MODULE = YES; 386 | INFOPLIST_FILE = MetalDetector/Info.plist; 387 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks"; 388 | MTL_TREAT_WARNINGS_AS_ERRORS = YES; 389 | PRODUCT_BUNDLE_IDENTIFIER = samofly.MetalDetector; 390 | PRODUCT_NAME = "$(TARGET_NAME)"; 391 | }; 392 | name = Debug; 393 | }; 394 | BBB6213C1BB884CC009620C4 /* Release */ = { 395 | isa = XCBuildConfiguration; 396 | buildSettings = { 397 | ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; 398 | DEFINES_MODULE = YES; 399 | INFOPLIST_FILE = MetalDetector/Info.plist; 400 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks"; 401 | MTL_TREAT_WARNINGS_AS_ERRORS = YES; 402 | PRODUCT_BUNDLE_IDENTIFIER = samofly.MetalDetector; 403 | PRODUCT_NAME = "$(TARGET_NAME)"; 404 | }; 405 | name = Release; 406 | }; 407 | /* End XCBuildConfiguration section */ 408 | 409 | /* Begin XCConfigurationList section */ 410 | BBAE5A741BBE46CE00AD54A1 /* Build configuration list for PBXNativeTarget "MetalDetectorTests" */ = { 411 | isa = XCConfigurationList; 412 | buildConfigurations = ( 413 | BBAE5A721BBE46CE00AD54A1 /* Debug */, 414 | BBAE5A731BBE46CE00AD54A1 /* Release */, 415 | ); 416 | defaultConfigurationIsVisible = 0; 417 | defaultConfigurationName = Release; 418 | }; 419 | BBB621231BB884CC009620C4 /* Build configuration list for PBXProject "MetalDetector" */ = { 420 | isa = XCConfigurationList; 421 | buildConfigurations = ( 422 | BBB621381BB884CC009620C4 /* Debug */, 423 | BBB621391BB884CC009620C4 /* Release */, 424 | ); 425 | defaultConfigurationIsVisible = 0; 426 | defaultConfigurationName = Release; 427 | }; 428 | BBB6213A1BB884CC009620C4 /* Build configuration list for PBXNativeTarget "MetalDetector" */ = { 429 | isa = XCConfigurationList; 430 | buildConfigurations = ( 431 | BBB6213B1BB884CC009620C4 /* Debug */, 432 | BBB6213C1BB884CC009620C4 /* Release */, 433 | ); 434 | defaultConfigurationIsVisible = 0; 435 | defaultConfigurationName = Release; 436 | }; 437 | /* End XCConfigurationList section */ 438 | }; 439 | rootObject = BBB621201BB884CC009620C4 /* Project object */; 440 | } 441 | -------------------------------------------------------------------------------- /MetalDetector.xcodeproj/project.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /MetalDetector.xcodeproj/xcuserdata/krasin.xcuserdatad/xcschemes/MetalDetector.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 24 | 25 | 30 | 31 | 33 | 39 | 40 | 41 | 42 | 43 | 49 | 50 | 51 | 52 | 56 | 57 | 58 | 59 | 60 | 61 | 71 | 73 | 79 | 80 | 81 | 82 | 86 | 87 | 88 | 89 | 90 | 91 | 97 | 99 | 105 | 106 | 107 | 108 | 110 | 111 | 114 | 115 | 116 | -------------------------------------------------------------------------------- /MetalDetector.xcodeproj/xcuserdata/krasin.xcuserdatad/xcschemes/xcschememanagement.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | SchemeUserState 6 | 7 | MetalDetector.xcscheme 8 | 9 | orderHint 10 | 0 11 | 12 | 13 | SuppressBuildableAutocreation 14 | 15 | BBAE5A6A1BBE46CE00AD54A1 16 | 17 | primary 18 | 19 | 20 | BBB621271BB884CC009620C4 21 | 22 | primary 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /MetalDetector/AppDelegate.swift: -------------------------------------------------------------------------------- 1 | // 2 | // AppDelegate.swift 3 | // MetalDetector 4 | // 5 | // Created by Ivan Krasin on 9/27/15. 6 | // Copyright © 2015 Ivan Krasin. All rights reserved. 7 | // 8 | 9 | import UIKit 10 | 11 | @UIApplicationMain 12 | class AppDelegate: UIResponder, UIApplicationDelegate { 13 | 14 | var window: UIWindow? 15 | 16 | 17 | func application(application: UIApplication, didFinishLaunchingWithOptions launchOptions: [NSObject: AnyObject]?) -> Bool { 18 | // Override point for customization after application launch. 19 | return true 20 | } 21 | 22 | func applicationWillResignActive(application: UIApplication) { 23 | // Sent when the application is about to move from active to inactive state. This can occur for certain types of temporary interruptions (such as an incoming phone call or SMS message) or when the user quits the application and it begins the transition to the background state. 24 | // Use this method to pause ongoing tasks, disable timers, and throttle down OpenGL ES frame rates. Games should use this method to pause the game. 25 | } 26 | 27 | func applicationDidEnterBackground(application: UIApplication) { 28 | // Use this method to release shared resources, save user data, invalidate timers, and store enough application state information to restore your application to its current state in case it is terminated later. 29 | // If your application supports background execution, this method is called instead of applicationWillTerminate: when the user quits. 30 | } 31 | 32 | func applicationWillEnterForeground(application: UIApplication) { 33 | // Called as part of the transition from the background to the inactive state; here you can undo many of the changes made on entering the background. 34 | } 35 | 36 | func applicationDidBecomeActive(application: UIApplication) { 37 | // Restart any tasks that were paused (or not yet started) while the application was inactive. If the application was previously in the background, optionally refresh the user interface. 38 | } 39 | 40 | func applicationWillTerminate(application: UIApplication) { 41 | // Called when the application is about to terminate. Save data if appropriate. See also applicationDidEnterBackground:. 42 | } 43 | 44 | 45 | } 46 | 47 | -------------------------------------------------------------------------------- /MetalDetector/Assets.xcassets/AppIcon.appiconset/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "images" : [ 3 | { 4 | "idiom" : "iphone", 5 | "size" : "29x29", 6 | "scale" : "2x" 7 | }, 8 | { 9 | "idiom" : "iphone", 10 | "size" : "29x29", 11 | "scale" : "3x" 12 | }, 13 | { 14 | "idiom" : "iphone", 15 | "size" : "40x40", 16 | "scale" : "2x" 17 | }, 18 | { 19 | "idiom" : "iphone", 20 | "size" : "40x40", 21 | "scale" : "3x" 22 | }, 23 | { 24 | "idiom" : "iphone", 25 | "size" : "60x60", 26 | "scale" : "2x" 27 | }, 28 | { 29 | "idiom" : "iphone", 30 | "size" : "60x60", 31 | "scale" : "3x" 32 | }, 33 | { 34 | "idiom" : "ipad", 35 | "size" : "29x29", 36 | "scale" : "1x" 37 | }, 38 | { 39 | "idiom" : "ipad", 40 | "size" : "29x29", 41 | "scale" : "2x" 42 | }, 43 | { 44 | "idiom" : "ipad", 45 | "size" : "40x40", 46 | "scale" : "1x" 47 | }, 48 | { 49 | "idiom" : "ipad", 50 | "size" : "40x40", 51 | "scale" : "2x" 52 | }, 53 | { 54 | "idiom" : "ipad", 55 | "size" : "76x76", 56 | "scale" : "1x" 57 | }, 58 | { 59 | "idiom" : "ipad", 60 | "size" : "76x76", 61 | "scale" : "2x" 62 | } 63 | ], 64 | "info" : { 65 | "version" : 1, 66 | "author" : "xcode" 67 | } 68 | } -------------------------------------------------------------------------------- /MetalDetector/Assets.xcassets/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "info" : { 3 | "version" : 1, 4 | "author" : "xcode" 5 | } 6 | } -------------------------------------------------------------------------------- /MetalDetector/Assets.xcassets/cat.imageset/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "images" : [ 3 | { 4 | "idiom" : "universal", 5 | "filename" : "cat.png", 6 | "scale" : "1x" 7 | }, 8 | { 9 | "idiom" : "universal", 10 | "scale" : "2x" 11 | }, 12 | { 13 | "idiom" : "universal", 14 | "scale" : "3x" 15 | } 16 | ], 17 | "info" : { 18 | "version" : 1, 19 | "author" : "xcode" 20 | } 21 | } -------------------------------------------------------------------------------- /MetalDetector/Assets.xcassets/cat.imageset/cat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krasin/MetalDetector/2d204912d742ea366b624cdf556ea6297e7ea669/MetalDetector/Assets.xcassets/cat.imageset/cat.png -------------------------------------------------------------------------------- /MetalDetector/Base.lproj/LaunchScreen.storyboard: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /MetalDetector/Base.lproj/Main.storyboard: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /MetalDetector/Engine.metal: -------------------------------------------------------------------------------- 1 | // 2 | // Layers.metal 3 | // MetalDetector 4 | // 5 | // Created by Ivan Krasin on 9/30/15. 6 | // Copyright © 2015 Ivan Krasin. All rights reserved. 7 | // 8 | 9 | #include 10 | using namespace metal; 11 | 12 | // Takes 32BGRA image, crops 224x224 from the center, extracts color channels, 13 | // subtracts ImageNet mean, and puts float values into the output 3d texture. 14 | // It also flips the image vertically. 15 | kernel void preprocess(texture2d in [[texture(0)]], 16 | texture2d_array out [[texture(1)]], 17 | uint2 gid [[thread_position_in_grid]]) { 18 | uint dx = (in.get_width() - 224) / 2; 19 | uint dy = (in.get_height() - 224) / 2; 20 | uint2 coord = { gid.x + dx, gid.y + dy }; 21 | 22 | // Flipping vertically. This is need in case of loading an image from resources. 23 | // Not so sure about the camera images. 24 | coord.y = in.get_height()-coord.y-1; 25 | 26 | const float4 bgra = in.read(coord); 27 | const float b = bgra[0]; 28 | const float g = bgra[1]; 29 | const float r = bgra[2]; 30 | out.write(b * 255 - 104.007, gid, 0); 31 | out.write(g * 255 - 116.66947, gid, 1); 32 | out.write(r * 255 - 122.6751, gid, 2); 33 | } 34 | 35 | 36 | kernel void float2BGRA(texture2d_array in [[texture(0)]], 37 | texture2d out [[texture(1)]], 38 | uint2 gid [[thread_position_in_grid]]) { 39 | const float b = in.read(gid, 0).r; 40 | const float g = in.read(gid, 1).r; 41 | const float r = in.read(gid, 2).r; 42 | out.write(half4((r+122.679)/255, (g+116.669)/255, (b+104.001)/255, 255), gid); 43 | } 44 | 45 | kernel void computeL1(texture2d_array in [[texture(0)]], 46 | device float* res [[buffer(0)]], 47 | uint gid [[thread_position_in_grid]]) { 48 | if (gid >= in.get_height()) { return; } 49 | float sum = 0.0; 50 | for (uint i = 0; i < in.get_array_size(); i++) { 51 | for (uint j = 0; j < in.get_width(); j++) { 52 | sum += abs(in.read(uint2(j, gid), i).r); 53 | } 54 | } 55 | res[gid] = sum; 56 | } 57 | 58 | kernel void computeL2(texture2d_array in [[texture(0)]], 59 | device float* res [[buffer(0)]], 60 | uint gid [[thread_position_in_grid]]) { 61 | if (gid >= in.get_height()) { return; } 62 | float sum = 0.0; 63 | for (uint i = 0; i < in.get_array_size(); i++) { 64 | for (uint j = 0; j < in.get_width(); j++) { 65 | float r = in.read(uint2(j, gid), i).r; 66 | sum += r*r; 67 | } 68 | } 69 | res[gid] = sum; 70 | } 71 | 72 | kernel void computeMax(texture2d_array in [[texture(0)]], 73 | device float* res [[buffer(0)]], 74 | uint gid [[thread_position_in_grid]]) { 75 | if (gid >= in.get_height()) { return; } 76 | float val = in.read(uint2(0, gid), 0).r; 77 | for (uint i = 0; i < in.get_array_size(); i++) { 78 | for (uint j = 0; j < in.get_width(); j++) { 79 | float r = in.read(uint2(j, gid), i).r; 80 | if (r > val) { 81 | val = r; 82 | } 83 | } 84 | } 85 | res[gid] = val; 86 | } 87 | 88 | // Takes a sample 8x8 from the first slice. 89 | kernel void sample8x8(texture2d_array in [[texture(0)]], 90 | device float* res [[buffer(0)]], 91 | uint2 gid [[thread_position_in_grid]]) { 92 | res[gid.y*8+gid.x] = in.read(gid, 0).r; 93 | } 94 | 95 | kernel void loss3_classifier_0(texture2d_array in [[texture(0)]], 96 | texture2d_array out [[texture(1)]], 97 | device half* weights [[buffer(0)]], 98 | uint2 gid [[thread_position_in_grid]]) { 99 | if (gid.x >= 1000) { return; } 100 | float sum = 0.0; 101 | // Skip weights for the previous filters 102 | uint i = gid.x * in.get_array_size() * in.get_height() * in.get_height(); 103 | for (uint c = 0; c < in.get_array_size(); c++) { 104 | for (uint y = 0; y < in.get_height(); y++) { 105 | for (uint x = 0; x < in.get_width(); x++) { 106 | float v = in.read(uint2(x, y), c)[0]; 107 | sum += weights[i] * v; 108 | i++; 109 | } 110 | } 111 | } 112 | out.write(sum, uint2(0,0), gid.x); 113 | } 114 | 115 | kernel void prob_0(texture2d_array in [[texture(0)]], 116 | texture2d_array out [[texture(1)]], 117 | device half* weights [[buffer(0)]], 118 | uint2 gid [[thread_position_in_grid]]) { 119 | if (gid.x >= 1) { return; } 120 | float maxv = in.read(uint2(0,0), 0)[0]; 121 | for (uint x = 0; x < 1000; x++) { 122 | float v = in.read(uint2(0, 0), x)[0]; 123 | maxv = max(maxv, v); 124 | } 125 | 126 | float sum = 0.0; 127 | for (uint x = 0; x < 1000; x++) { 128 | float v = in.read(uint2(0, 0), x)[0]; 129 | sum += exp(v - maxv); 130 | } 131 | 132 | for (uint x = 0; x < 1000; x++) { 133 | float v = in.read(uint2(0, 0), x)[0]; 134 | float tmp = exp(v - maxv); 135 | float res = tmp / sum; 136 | out.write(res, uint2(0, 0), x); 137 | } 138 | } 139 | 140 | // Converts a texture 1x1xarray_length into a buffer. 141 | kernel void array1x1_to_buffer_0(texture2d_array in [[texture(0)]], 142 | device float* out [[buffer(0)]], 143 | uint2 gid [[thread_position_in_grid]]) { 144 | if (gid.x >= in.get_array_size()) { 145 | return; 146 | } 147 | float v = in.read(uint2(0, 0), gid.x)[0]; 148 | out[gid.x] = v; 149 | } 150 | 151 | kernel void float2half(device float* in [[buffer(0)]], 152 | device half* out [[buffer(1)]], 153 | uint gid [[thread_position_in_grid]]) { 154 | const int N = 27961088 / 4; 155 | if (gid >= N) { 156 | return; 157 | } 158 | out[gid] = in[gid]; 159 | } -------------------------------------------------------------------------------- /MetalDetector/Engine.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Layers.swift 3 | // MetalDetector 4 | // 5 | // Created by Ivan Krasin on 10/8/15. 6 | // Copyright © 2015 Ivan Krasin. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | import Metal 11 | import MetalKit 12 | import MetalPerformanceShaders 13 | import UIKit 14 | 15 | public class Engine { 16 | public var metalDevice: MTLDevice? 17 | var metalLib: MTLLibrary? 18 | public var commandQueue: MTLCommandQueue? 19 | var textureCache : Unmanaged? 20 | 21 | var kernelStates : [String: MTLComputePipelineState?] 22 | var computeL1State: MTLComputePipelineState? 23 | var computeL2State: MTLComputePipelineState? 24 | var computeMaxState: MTLComputePipelineState? 25 | var sample8x8State : MTLComputePipelineState? 26 | 27 | public init() { 28 | metalDevice = MTLCreateSystemDefaultDevice() 29 | CVMetalTextureCacheCreate(kCFAllocatorDefault, nil, metalDevice!, nil, &textureCache) 30 | metalLib = metalDevice!.newDefaultLibrary()! 31 | commandQueue = metalDevice!.newCommandQueue() 32 | 33 | // Load kernels 34 | kernelStates = [:] 35 | computeL1State = LoadKernelState("computeL1") 36 | computeL2State = LoadKernelState("computeL2") 37 | computeMaxState = LoadKernelState("computeMax") 38 | sample8x8State = LoadKernelState("sample8x8") 39 | } 40 | 41 | public func LoadKernelState(kernelName: String) -> MTLComputePipelineState { 42 | let state = kernelStates[kernelName] 43 | if state != nil { 44 | return state!! 45 | } 46 | let f = metalLib!.newFunctionWithName(kernelName) 47 | if f == nil { 48 | print("Could not load \(kernelName) from the library") 49 | exit(1) 50 | } 51 | do { 52 | return try metalDevice!.newComputePipelineStateWithFunction(f!) 53 | } catch let error as NSError { 54 | print("Could not create pipeline state for \(kernelName): \(error)") 55 | exit(1) 56 | } 57 | } 58 | 59 | public func UnaryLayer(commandBuffer : MTLCommandBuffer, name : String, weights : MTLBuffer?, 60 | input : MTLTexture, output : MTLTexture, threadsPerThreadgroup: MTLSize) { 61 | let state = LoadKernelState(name) 62 | let commandEncoder = commandBuffer.computeCommandEncoder() 63 | commandEncoder.setComputePipelineState(state) 64 | commandEncoder.setTexture(input, atIndex: 0) 65 | commandEncoder.setTexture(output, atIndex: 1) 66 | if weights != nil { 67 | commandEncoder.setBuffer(weights!, offset: 0, atIndex: 0) 68 | } 69 | let threadgroupsPerGrid = MTLSizeMake( 70 | ((output.width + threadsPerThreadgroup.width - 1) / threadsPerThreadgroup.width), 71 | (output.height + threadsPerThreadgroup.height - 1) / threadsPerThreadgroup.height, 1) 72 | commandEncoder.dispatchThreadgroups(threadgroupsPerGrid, threadsPerThreadgroup: threadsPerThreadgroup) 73 | commandEncoder.endEncoding() 74 | } 75 | 76 | public func PerFilterLayer(commandBuffer : MTLCommandBuffer, name : String, 77 | weights : MTLBuffer?, numFilters : Int, input : MTLTexture, output : MTLTexture) { 78 | let state = LoadKernelState(name) 79 | let commandEncoder = commandBuffer.computeCommandEncoder() 80 | commandEncoder.setComputePipelineState(state) 81 | commandEncoder.setTexture(input, atIndex: 0) 82 | commandEncoder.setTexture(output, atIndex: 1) 83 | if weights != nil { 84 | commandEncoder.setBuffer(weights!, offset: 0, atIndex: 0) 85 | } 86 | let threadsPerThreadgroup = MTLSizeMake(256, 1, 1) 87 | let threadgroupsPerGrid = MTLSizeMake( 88 | ((numFilters + threadsPerThreadgroup.width - 1) / threadsPerThreadgroup.width), 89 | 1, 1) 90 | commandEncoder.dispatchThreadgroups(threadgroupsPerGrid, threadsPerThreadgroup: threadsPerThreadgroup) 91 | commandEncoder.endEncoding() 92 | } 93 | 94 | func Preprocess(commandBuffer : MTLCommandBuffer, input : MTLTexture, output : MTLTexture) { 95 | let threadsPerThreadgroup = MTLSizeMake(16, 16, 1) 96 | UnaryLayer(commandBuffer, name: "preprocess", weights: nil, input: input, 97 | output: output, threadsPerThreadgroup: threadsPerThreadgroup) 98 | } 99 | 100 | // L1 computes L1 metric. It requires a 2d float texture array as an input. 101 | public func L1(texture : MTLTexture) -> Float { 102 | let commandBuffer = commandQueue!.commandBuffer() 103 | let commandEncoder = commandBuffer.computeCommandEncoder() 104 | commandEncoder.setComputePipelineState(computeL1State!) 105 | commandEncoder.setTexture(texture, atIndex: 0) 106 | let resBuf = metalDevice!.newBufferWithLength(4 * texture.height, options: .StorageModeShared) 107 | commandEncoder.setBuffer(resBuf, offset: 0, atIndex: 0) 108 | let threadsPerThreadgroup = MTLSizeMake(32, 1, 1) 109 | let threadgroupsPerGrid = MTLSizeMake( 110 | (texture.height + threadsPerThreadgroup.width - 1) / threadsPerThreadgroup.width, 1, 1) 111 | commandEncoder.dispatchThreadgroups(threadgroupsPerGrid, threadsPerThreadgroup: threadsPerThreadgroup) 112 | commandEncoder.endEncoding() 113 | commandBuffer.commit(); 114 | commandBuffer.waitUntilCompleted() 115 | let resArr = UnsafeMutablePointer(resBuf.contents()) 116 | var res : Float = 0; 117 | for i in 0...texture.height - 1 { 118 | res += resArr[i] 119 | } 120 | return res 121 | } 122 | 123 | // CPU_L1 takes the data from a Float32 texture and computes L1 on CPU 124 | public func CPU_L1(texture : MTLTexture) -> Float { 125 | var sum : Float = 0; 126 | for c in 0...texture.arrayLength-1 { 127 | var buf = Array(count: texture.width * texture.height, repeatedValue: 0) 128 | texture.getBytes(&buf, bytesPerRow: texture.width * 4, bytesPerImage: texture.width*texture.height*4, 129 | fromRegion: MTLRegionMake2D(0, 0, texture.width, texture.height), mipmapLevel: 0, slice: c) 130 | for i in 0...texture.width*texture.height-1 { 131 | sum += abs(buf[i]) 132 | } 133 | } 134 | return sum 135 | } 136 | 137 | // L2 computes the sum of squares. It requires a 2d float texture array as an input. 138 | public func L2(texture : MTLTexture) -> Float { 139 | let commandBuffer = commandQueue!.commandBuffer() 140 | let commandEncoder = commandBuffer.computeCommandEncoder() 141 | commandEncoder.setComputePipelineState(computeL2State!) 142 | commandEncoder.setTexture(texture, atIndex: 0) 143 | let resBuf = metalDevice!.newBufferWithLength(4 * texture.height, options: .StorageModeShared) 144 | commandEncoder.setBuffer(resBuf, offset: 0, atIndex: 0) 145 | let threadsPerThreadgroup = MTLSizeMake(32, 1, 1) 146 | let threadgroupsPerGrid = MTLSizeMake( 147 | (texture.height + threadsPerThreadgroup.width - 1) / threadsPerThreadgroup.width, 1, 1) 148 | commandEncoder.dispatchThreadgroups(threadgroupsPerGrid, threadsPerThreadgroup: threadsPerThreadgroup) 149 | commandEncoder.endEncoding() 150 | commandBuffer.commit(); 151 | commandBuffer.waitUntilCompleted() 152 | let resArr = UnsafeMutablePointer(resBuf.contents()) 153 | var res : Float = 0; 154 | for i in 0...texture.height - 1 { 155 | res += resArr[i] 156 | } 157 | return res 158 | } 159 | 160 | // Max computes the max value in the texture. It requires a 2d float texture array as an input. 161 | func Max(texture : MTLTexture) -> Float { 162 | let commandBuffer = commandQueue!.commandBuffer() 163 | let commandEncoder = commandBuffer.computeCommandEncoder() 164 | commandEncoder.setComputePipelineState(computeMaxState!) 165 | commandEncoder.setTexture(texture, atIndex: 0) 166 | let resBuf = metalDevice!.newBufferWithLength(4 * texture.height, options: .StorageModeShared) 167 | commandEncoder.setBuffer(resBuf, offset: 0, atIndex: 0) 168 | let threadsPerThreadgroup = MTLSizeMake(32, 1, 1) 169 | let threadgroupsPerGrid = MTLSizeMake( 170 | (texture.height + threadsPerThreadgroup.width - 1) / threadsPerThreadgroup.width, 1, 1) 171 | commandEncoder.dispatchThreadgroups(threadgroupsPerGrid, threadsPerThreadgroup: threadsPerThreadgroup) 172 | commandEncoder.endEncoding() 173 | commandBuffer.commit(); 174 | commandBuffer.waitUntilCompleted() 175 | let resArr = UnsafeMutablePointer(resBuf.contents()) 176 | var res : Float = resArr[0]; 177 | for i in 0...texture.height - 1 { 178 | if resArr[i] > res { 179 | res = resArr[i] 180 | } 181 | } 182 | return res 183 | } 184 | 185 | public func GetResourceAsMetalTexture(named: String) -> MTLTexture? { 186 | let textureLoader = MTKTextureLoader(device: metalDevice!) 187 | let input = UIImage(named:named) 188 | if input == nil { 189 | return nil 190 | } 191 | let cgInput = input!.CGImage 192 | var txInput: MTLTexture? 193 | do { 194 | txInput = try textureLoader.newTextureWithCGImage(cgInput!, options: nil) 195 | } catch { 196 | print("GetResourceAsMetalTexture(\(named)): failed to create a metal texture out of CGImage") 197 | } 198 | return txInput 199 | } 200 | 201 | public func Sample8x8(input : MTLTexture) -> [Float] { 202 | let commandBuffer = commandQueue!.commandBuffer() 203 | let commandEncoder = commandBuffer.computeCommandEncoder() 204 | commandEncoder.setComputePipelineState(sample8x8State!) 205 | commandEncoder.setTexture(input, atIndex: 0) 206 | let resBuf = metalDevice!.newBufferWithLength(4 * 8 * 8, options: .StorageModeShared) 207 | commandEncoder.setBuffer(resBuf, offset: 0, atIndex: 0) 208 | let threadsPerThreadgroup = MTLSizeMake(8, 8, 1) 209 | let threadgroupsPerGrid = MTLSizeMake(1, 1, 1) 210 | commandEncoder.dispatchThreadgroups(threadgroupsPerGrid, threadsPerThreadgroup: threadsPerThreadgroup) 211 | commandEncoder.endEncoding() 212 | commandBuffer.commit(); 213 | commandBuffer.waitUntilCompleted() 214 | 215 | let resArr = UnsafeMutablePointer(resBuf.contents()) 216 | var res : [Float] = [Float](count: 8*8, repeatedValue: 0) 217 | for i in 0...8*8 - 1 { 218 | res[i] = resArr[i] 219 | } 220 | return res 221 | } 222 | 223 | public func ExtractResult(input: MTLTexture) -> [Float] { 224 | let commandBuffer = commandQueue!.commandBuffer() 225 | let commandEncoder = commandBuffer.computeCommandEncoder() 226 | let state = LoadKernelState("array1x1_to_buffer_0") 227 | commandEncoder.setComputePipelineState(state) 228 | commandEncoder.setTexture(input, atIndex: 0) 229 | let resBuf = metalDevice!.newBufferWithLength(4 * input.arrayLength, options: .StorageModeShared) 230 | commandEncoder.setBuffer(resBuf, offset: 0, atIndex: 0) 231 | let threadsPerThreadgroup = MTLSizeMake(32, 1, 1) 232 | let tpgx = (input.arrayLength + threadsPerThreadgroup.width - 1) / threadsPerThreadgroup.width; 233 | let threadgroupsPerGrid = MTLSizeMake(tpgx, 1, 1) 234 | commandEncoder.dispatchThreadgroups(threadgroupsPerGrid, threadsPerThreadgroup: threadsPerThreadgroup) 235 | commandEncoder.endEncoding() 236 | commandBuffer.commit(); 237 | commandBuffer.waitUntilCompleted() 238 | 239 | let resArr = UnsafeMutablePointer(resBuf.contents()) 240 | var res : [Float] = [Float](count: input.arrayLength, repeatedValue: 0) 241 | for i in 0...input.arrayLength - 1 { 242 | res[i] = resArr[i] 243 | } 244 | return res 245 | } 246 | 247 | // Takes a float32 2D texture, Nx1 and return a buffer with half values. 248 | public func Float2Half(input: MTLBuffer) -> [UInt8] { 249 | let commandBuffer = commandQueue!.commandBuffer() 250 | let commandEncoder = commandBuffer.computeCommandEncoder() 251 | let state = LoadKernelState("float2half") 252 | commandEncoder.setComputePipelineState(state) 253 | commandEncoder.setBuffer(input, offset: 0, atIndex: 0) 254 | let resBuf = metalDevice!.newBufferWithLength(input.length/2, options: .StorageModeShared) 255 | commandEncoder.setBuffer(resBuf, offset: 0, atIndex: 1) 256 | let threadsPerThreadgroup = MTLSizeMake(128, 1, 1) 257 | let tpgx = (input.length / 4 + threadsPerThreadgroup.width - 1) / threadsPerThreadgroup.width; 258 | let threadgroupsPerGrid = MTLSizeMake(tpgx, 1, 1) 259 | commandEncoder.dispatchThreadgroups(threadgroupsPerGrid, threadsPerThreadgroup: threadsPerThreadgroup) 260 | commandEncoder.endEncoding() 261 | commandBuffer.commit(); 262 | commandBuffer.waitUntilCompleted() 263 | let resArr = UnsafeMutablePointer(resBuf.contents()) 264 | var res : [UInt8] = [UInt8](count: resBuf.length, repeatedValue: 0) 265 | for i in 0...resBuf.length - 1 { 266 | res[i] = resArr[i] 267 | } 268 | return res 269 | } 270 | } 271 | -------------------------------------------------------------------------------- /MetalDetector/GoogLeNet.data: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krasin/MetalDetector/2d204912d742ea366b624cdf556ea6297e7ea669/MetalDetector/GoogLeNet.data -------------------------------------------------------------------------------- /MetalDetector/GoogLeNet.gen.swift: -------------------------------------------------------------------------------- 1 | // This file is generated. Do not edit unless you have no access to the generator. 2 | 3 | import Foundation 4 | import Metal 5 | 6 | public class GoogLeNetConfig : NetConfig { 7 | public init() {} 8 | 9 | public func CreateBlobs(device: MTLDevice) -> [String: MTLTexture] { 10 | var blobs : [String: MTLTexture] = [:] 11 | blobs["data"] = initBlob(device, channels: 3, height: 224, width: 224) 12 | blobs["conv1_7x7_s2"] = initBlob(device, channels: 64, height: 112, width: 112) 13 | blobs["pool1_3x3_s2"] = initBlob(device, channels: 64, height: 56, width: 56) 14 | blobs["pool1_norm1"] = initBlob(device, channels: 64, height: 56, width: 56) 15 | blobs["conv2_3x3_reduce"] = initBlob(device, channels: 64, height: 56, width: 56) 16 | blobs["conv2_3x3"] = initBlob(device, channels: 192, height: 56, width: 56) 17 | blobs["conv2_norm2"] = initBlob(device, channels: 192, height: 56, width: 56) 18 | blobs["pool2_3x3_s2"] = initBlob(device, channels: 192, height: 28, width: 28) 19 | blobs["inception_3a_output"] = initBlob(device, channels: 256, height: 28, width: 28) 20 | blobs["inception_3a_1x1"] = subBlob(blobs["inception_3a_output"]!, from: 0, to: 64) // 64 21 | blobs["inception_3a_3x3_reduce"] = initBlob(device, channels: 96, height: 28, width: 28) 22 | blobs["inception_3a_3x3"] = subBlob(blobs["inception_3a_output"]!, from: 64, to: 192) // 128 23 | blobs["inception_3a_5x5_reduce"] = initBlob(device, channels: 16, height: 28, width: 28) 24 | blobs["inception_3a_5x5"] = subBlob(blobs["inception_3a_output"]!, from: 192, to: 224) // 32 25 | blobs["inception_3a_pool"] = initBlob(device, channels: 192, height: 28, width: 28) 26 | blobs["inception_3a_pool_proj"] = subBlob(blobs["inception_3a_output"]!, from: 224, to: 256) // 32 27 | blobs["inception_3b_output"] = initBlob(device, channels: 480, height: 28, width: 28) 28 | blobs["inception_3b_1x1"] = subBlob(blobs["inception_3b_output"]!, from: 0, to: 128) // 128 29 | blobs["inception_3b_3x3_reduce"] = initBlob(device, channels: 128, height: 28, width: 28) 30 | blobs["inception_3b_3x3"] = subBlob(blobs["inception_3b_output"]!, from: 128, to: 320) // 192 31 | blobs["inception_3b_5x5_reduce"] = initBlob(device, channels: 32, height: 28, width: 28) 32 | blobs["inception_3b_5x5"] = subBlob(blobs["inception_3b_output"]!, from: 320, to: 416) // 96 33 | blobs["inception_3b_pool"] = initBlob(device, channels: 256, height: 28, width: 28) 34 | blobs["inception_3b_pool_proj"] = subBlob(blobs["inception_3b_output"]!, from: 416, to: 480) // 64 35 | blobs["pool3_3x3_s2"] = initBlob(device, channels: 480, height: 14, width: 14) 36 | blobs["inception_4a_output"] = initBlob(device, channels: 512, height: 14, width: 14) 37 | blobs["inception_4a_1x1"] = subBlob(blobs["inception_4a_output"]!, from: 0, to: 192) // 192 38 | blobs["inception_4a_3x3_reduce"] = initBlob(device, channels: 96, height: 14, width: 14) 39 | blobs["inception_4a_3x3"] = subBlob(blobs["inception_4a_output"]!, from: 192, to: 400) // 208 40 | blobs["inception_4a_5x5_reduce"] = initBlob(device, channels: 16, height: 14, width: 14) 41 | blobs["inception_4a_5x5"] = subBlob(blobs["inception_4a_output"]!, from: 400, to: 448) // 48 42 | blobs["inception_4a_pool"] = initBlob(device, channels: 480, height: 14, width: 14) 43 | blobs["inception_4a_pool_proj"] = subBlob(blobs["inception_4a_output"]!, from: 448, to: 512) // 64 44 | blobs["inception_4b_output"] = initBlob(device, channels: 512, height: 14, width: 14) 45 | blobs["inception_4b_1x1"] = subBlob(blobs["inception_4b_output"]!, from: 0, to: 160) // 160 46 | blobs["inception_4b_3x3_reduce"] = initBlob(device, channels: 112, height: 14, width: 14) 47 | blobs["inception_4b_3x3"] = subBlob(blobs["inception_4b_output"]!, from: 160, to: 384) // 224 48 | blobs["inception_4b_5x5_reduce"] = initBlob(device, channels: 24, height: 14, width: 14) 49 | blobs["inception_4b_5x5"] = subBlob(blobs["inception_4b_output"]!, from: 384, to: 448) // 64 50 | blobs["inception_4b_pool"] = initBlob(device, channels: 512, height: 14, width: 14) 51 | blobs["inception_4b_pool_proj"] = subBlob(blobs["inception_4b_output"]!, from: 448, to: 512) // 64 52 | blobs["inception_4c_output"] = initBlob(device, channels: 512, height: 14, width: 14) 53 | blobs["inception_4c_1x1"] = subBlob(blobs["inception_4c_output"]!, from: 0, to: 128) // 128 54 | blobs["inception_4c_3x3_reduce"] = initBlob(device, channels: 128, height: 14, width: 14) 55 | blobs["inception_4c_3x3"] = subBlob(blobs["inception_4c_output"]!, from: 128, to: 384) // 256 56 | blobs["inception_4c_5x5_reduce"] = initBlob(device, channels: 24, height: 14, width: 14) 57 | blobs["inception_4c_5x5"] = subBlob(blobs["inception_4c_output"]!, from: 384, to: 448) // 64 58 | blobs["inception_4c_pool"] = initBlob(device, channels: 512, height: 14, width: 14) 59 | blobs["inception_4c_pool_proj"] = subBlob(blobs["inception_4c_output"]!, from: 448, to: 512) // 64 60 | blobs["inception_4d_output"] = initBlob(device, channels: 528, height: 14, width: 14) 61 | blobs["inception_4d_1x1"] = subBlob(blobs["inception_4d_output"]!, from: 0, to: 112) // 112 62 | blobs["inception_4d_3x3_reduce"] = initBlob(device, channels: 144, height: 14, width: 14) 63 | blobs["inception_4d_3x3"] = subBlob(blobs["inception_4d_output"]!, from: 112, to: 400) // 288 64 | blobs["inception_4d_5x5_reduce"] = initBlob(device, channels: 32, height: 14, width: 14) 65 | blobs["inception_4d_5x5"] = subBlob(blobs["inception_4d_output"]!, from: 400, to: 464) // 64 66 | blobs["inception_4d_pool"] = initBlob(device, channels: 512, height: 14, width: 14) 67 | blobs["inception_4d_pool_proj"] = subBlob(blobs["inception_4d_output"]!, from: 464, to: 528) // 64 68 | blobs["inception_4e_output"] = initBlob(device, channels: 832, height: 14, width: 14) 69 | blobs["inception_4e_1x1"] = subBlob(blobs["inception_4e_output"]!, from: 0, to: 256) // 256 70 | blobs["inception_4e_3x3_reduce"] = initBlob(device, channels: 160, height: 14, width: 14) 71 | blobs["inception_4e_3x3"] = subBlob(blobs["inception_4e_output"]!, from: 256, to: 576) // 320 72 | blobs["inception_4e_5x5_reduce"] = initBlob(device, channels: 32, height: 14, width: 14) 73 | blobs["inception_4e_5x5"] = subBlob(blobs["inception_4e_output"]!, from: 576, to: 704) // 128 74 | blobs["inception_4e_pool"] = initBlob(device, channels: 528, height: 14, width: 14) 75 | blobs["inception_4e_pool_proj"] = subBlob(blobs["inception_4e_output"]!, from: 704, to: 832) // 128 76 | blobs["pool4_3x3_s2"] = initBlob(device, channels: 832, height: 7, width: 7) 77 | blobs["inception_5a_output"] = initBlob(device, channels: 832, height: 7, width: 7) 78 | blobs["inception_5a_1x1"] = subBlob(blobs["inception_5a_output"]!, from: 0, to: 256) // 256 79 | blobs["inception_5a_3x3_reduce"] = initBlob(device, channels: 160, height: 7, width: 7) 80 | blobs["inception_5a_3x3"] = subBlob(blobs["inception_5a_output"]!, from: 256, to: 576) // 320 81 | blobs["inception_5a_5x5_reduce"] = initBlob(device, channels: 32, height: 7, width: 7) 82 | blobs["inception_5a_5x5"] = subBlob(blobs["inception_5a_output"]!, from: 576, to: 704) // 128 83 | blobs["inception_5a_pool"] = initBlob(device, channels: 832, height: 7, width: 7) 84 | blobs["inception_5a_pool_proj"] = subBlob(blobs["inception_5a_output"]!, from: 704, to: 832) // 128 85 | blobs["inception_5b_output"] = initBlob(device, channels: 1024, height: 7, width: 7) 86 | blobs["inception_5b_1x1"] = subBlob(blobs["inception_5b_output"]!, from: 0, to: 384) // 384 87 | blobs["inception_5b_3x3_reduce"] = initBlob(device, channels: 192, height: 7, width: 7) 88 | blobs["inception_5b_3x3"] = subBlob(blobs["inception_5b_output"]!, from: 384, to: 768) // 384 89 | blobs["inception_5b_5x5_reduce"] = initBlob(device, channels: 48, height: 7, width: 7) 90 | blobs["inception_5b_5x5"] = subBlob(blobs["inception_5b_output"]!, from: 768, to: 896) // 128 91 | blobs["inception_5b_pool"] = initBlob(device, channels: 832, height: 7, width: 7) 92 | blobs["inception_5b_pool_proj"] = subBlob(blobs["inception_5b_output"]!, from: 896, to: 1024) // 128 93 | blobs["pool5_7x7_s1"] = initBlob(device, channels: 1024, height: 1, width: 1) 94 | blobs["loss3_classifier"] = initBlob(device, channels: 1000, height: 1, width: 1) 95 | blobs["prob"] = initBlob(device, channels: 1000, height: 1, width: 1) 96 | return blobs 97 | } 98 | 99 | public func CreateWeights(engine: Engine) -> [String: MTLBuffer] { 100 | let device = engine.metalDevice! 101 | var res: [String: MTLBuffer] = [:] 102 | let data = getHalfArrayFromBundle(engine, named: "GoogLeNet.data") 103 | let ptr = UnsafePointer(data) 104 | res["conv1_7x7_s2"] = device.newBufferWithBytes(ptr.advancedBy(0), length: 18816, options: .StorageModeShared) 105 | res["conv2_3x3_reduce"] = device.newBufferWithBytes(ptr.advancedBy(18816), length: 8192, options: .StorageModeShared) 106 | res["conv2_3x3"] = device.newBufferWithBytes(ptr.advancedBy(27008), length: 221184, options: .StorageModeShared) 107 | res["inception_3a_1x1"] = device.newBufferWithBytes(ptr.advancedBy(248192), length: 24576, options: .StorageModeShared) 108 | res["inception_3a_3x3_reduce"] = device.newBufferWithBytes(ptr.advancedBy(272768), length: 36864, options: .StorageModeShared) 109 | res["inception_3a_3x3"] = device.newBufferWithBytes(ptr.advancedBy(309632), length: 221184, options: .StorageModeShared) 110 | res["inception_3a_5x5_reduce"] = device.newBufferWithBytes(ptr.advancedBy(530816), length: 6144, options: .StorageModeShared) 111 | res["inception_3a_5x5"] = device.newBufferWithBytes(ptr.advancedBy(536960), length: 25600, options: .StorageModeShared) 112 | res["inception_3a_pool_proj"] = device.newBufferWithBytes(ptr.advancedBy(562560), length: 12288, options: .StorageModeShared) 113 | res["inception_3b_1x1"] = device.newBufferWithBytes(ptr.advancedBy(574848), length: 65536, options: .StorageModeShared) 114 | res["inception_3b_3x3_reduce"] = device.newBufferWithBytes(ptr.advancedBy(640384), length: 65536, options: .StorageModeShared) 115 | res["inception_3b_3x3"] = device.newBufferWithBytes(ptr.advancedBy(705920), length: 442368, options: .StorageModeShared) 116 | res["inception_3b_5x5_reduce"] = device.newBufferWithBytes(ptr.advancedBy(1148288), length: 16384, options: .StorageModeShared) 117 | res["inception_3b_5x5"] = device.newBufferWithBytes(ptr.advancedBy(1164672), length: 153600, options: .StorageModeShared) 118 | res["inception_3b_pool_proj"] = device.newBufferWithBytes(ptr.advancedBy(1318272), length: 32768, options: .StorageModeShared) 119 | res["inception_4a_1x1"] = device.newBufferWithBytes(ptr.advancedBy(1351040), length: 184320, options: .StorageModeShared) 120 | res["inception_4a_3x3_reduce"] = device.newBufferWithBytes(ptr.advancedBy(1535360), length: 92160, options: .StorageModeShared) 121 | res["inception_4a_3x3"] = device.newBufferWithBytes(ptr.advancedBy(1627520), length: 359424, options: .StorageModeShared) 122 | res["inception_4a_5x5_reduce"] = device.newBufferWithBytes(ptr.advancedBy(1986944), length: 15360, options: .StorageModeShared) 123 | res["inception_4a_5x5"] = device.newBufferWithBytes(ptr.advancedBy(2002304), length: 38400, options: .StorageModeShared) 124 | res["inception_4a_pool_proj"] = device.newBufferWithBytes(ptr.advancedBy(2040704), length: 61440, options: .StorageModeShared) 125 | res["inception_4b_1x1"] = device.newBufferWithBytes(ptr.advancedBy(2102144), length: 163840, options: .StorageModeShared) 126 | res["inception_4b_3x3_reduce"] = device.newBufferWithBytes(ptr.advancedBy(2265984), length: 114688, options: .StorageModeShared) 127 | res["inception_4b_3x3"] = device.newBufferWithBytes(ptr.advancedBy(2380672), length: 451584, options: .StorageModeShared) 128 | res["inception_4b_5x5_reduce"] = device.newBufferWithBytes(ptr.advancedBy(2832256), length: 24576, options: .StorageModeShared) 129 | res["inception_4b_5x5"] = device.newBufferWithBytes(ptr.advancedBy(2856832), length: 76800, options: .StorageModeShared) 130 | res["inception_4b_pool_proj"] = device.newBufferWithBytes(ptr.advancedBy(2933632), length: 65536, options: .StorageModeShared) 131 | res["inception_4c_1x1"] = device.newBufferWithBytes(ptr.advancedBy(2999168), length: 131072, options: .StorageModeShared) 132 | res["inception_4c_3x3_reduce"] = device.newBufferWithBytes(ptr.advancedBy(3130240), length: 131072, options: .StorageModeShared) 133 | res["inception_4c_3x3"] = device.newBufferWithBytes(ptr.advancedBy(3261312), length: 589824, options: .StorageModeShared) 134 | res["inception_4c_5x5_reduce"] = device.newBufferWithBytes(ptr.advancedBy(3851136), length: 24576, options: .StorageModeShared) 135 | res["inception_4c_5x5"] = device.newBufferWithBytes(ptr.advancedBy(3875712), length: 76800, options: .StorageModeShared) 136 | res["inception_4c_pool_proj"] = device.newBufferWithBytes(ptr.advancedBy(3952512), length: 65536, options: .StorageModeShared) 137 | res["inception_4d_1x1"] = device.newBufferWithBytes(ptr.advancedBy(4018048), length: 114688, options: .StorageModeShared) 138 | res["inception_4d_3x3_reduce"] = device.newBufferWithBytes(ptr.advancedBy(4132736), length: 147456, options: .StorageModeShared) 139 | res["inception_4d_3x3"] = device.newBufferWithBytes(ptr.advancedBy(4280192), length: 746496, options: .StorageModeShared) 140 | res["inception_4d_5x5_reduce"] = device.newBufferWithBytes(ptr.advancedBy(5026688), length: 32768, options: .StorageModeShared) 141 | res["inception_4d_5x5"] = device.newBufferWithBytes(ptr.advancedBy(5059456), length: 102400, options: .StorageModeShared) 142 | res["inception_4d_pool_proj"] = device.newBufferWithBytes(ptr.advancedBy(5161856), length: 65536, options: .StorageModeShared) 143 | res["inception_4e_1x1"] = device.newBufferWithBytes(ptr.advancedBy(5227392), length: 270336, options: .StorageModeShared) 144 | res["inception_4e_3x3_reduce"] = device.newBufferWithBytes(ptr.advancedBy(5497728), length: 168960, options: .StorageModeShared) 145 | res["inception_4e_3x3"] = device.newBufferWithBytes(ptr.advancedBy(5666688), length: 921600, options: .StorageModeShared) 146 | res["inception_4e_5x5_reduce"] = device.newBufferWithBytes(ptr.advancedBy(6588288), length: 33792, options: .StorageModeShared) 147 | res["inception_4e_5x5"] = device.newBufferWithBytes(ptr.advancedBy(6622080), length: 204800, options: .StorageModeShared) 148 | res["inception_4e_pool_proj"] = device.newBufferWithBytes(ptr.advancedBy(6826880), length: 135168, options: .StorageModeShared) 149 | res["inception_5a_1x1"] = device.newBufferWithBytes(ptr.advancedBy(6962048), length: 425984, options: .StorageModeShared) 150 | res["inception_5a_3x3_reduce"] = device.newBufferWithBytes(ptr.advancedBy(7388032), length: 266240, options: .StorageModeShared) 151 | res["inception_5a_3x3"] = device.newBufferWithBytes(ptr.advancedBy(7654272), length: 921600, options: .StorageModeShared) 152 | res["inception_5a_5x5_reduce"] = device.newBufferWithBytes(ptr.advancedBy(8575872), length: 53248, options: .StorageModeShared) 153 | res["inception_5a_5x5"] = device.newBufferWithBytes(ptr.advancedBy(8629120), length: 204800, options: .StorageModeShared) 154 | res["inception_5a_pool_proj"] = device.newBufferWithBytes(ptr.advancedBy(8833920), length: 212992, options: .StorageModeShared) 155 | res["inception_5b_1x1"] = device.newBufferWithBytes(ptr.advancedBy(9046912), length: 638976, options: .StorageModeShared) 156 | res["inception_5b_3x3_reduce"] = device.newBufferWithBytes(ptr.advancedBy(9685888), length: 319488, options: .StorageModeShared) 157 | res["inception_5b_3x3"] = device.newBufferWithBytes(ptr.advancedBy(10005376), length: 1327104, options: .StorageModeShared) 158 | res["inception_5b_5x5_reduce"] = device.newBufferWithBytes(ptr.advancedBy(11332480), length: 79872, options: .StorageModeShared) 159 | res["inception_5b_5x5"] = device.newBufferWithBytes(ptr.advancedBy(11412352), length: 307200, options: .StorageModeShared) 160 | res["inception_5b_pool_proj"] = device.newBufferWithBytes(ptr.advancedBy(11719552), length: 212992, options: .StorageModeShared) 161 | res["loss3_classifier"] = device.newBufferWithBytes(ptr.advancedBy(11932544), length: 2048000, options: .StorageModeShared) 162 | 163 | return res 164 | } 165 | 166 | public func GetLayers() -> [NetLayer] { 167 | return [ 168 | NetLayer(name: "conv1_7x7_s2", weights: "conv1_7x7_s2", shards: 1, top: "conv1_7x7_s2", bottoms: ["data"]), 169 | NetLayer(name: "pool1_3x3_s2", weights: "", shards: 1, top: "pool1_3x3_s2", bottoms: ["conv1_7x7_s2"]), 170 | NetLayer(name: "pool1_norm1", weights: "", shards: 1, top: "pool1_norm1", bottoms: ["pool1_3x3_s2"]), 171 | NetLayer(name: "conv2_3x3_reduce", weights: "conv2_3x3_reduce", shards: 1, top: "conv2_3x3_reduce", bottoms: ["pool1_norm1"]), 172 | NetLayer(name: "conv2_3x3", weights: "conv2_3x3", shards: 1, top: "conv2_3x3", bottoms: ["conv2_3x3_reduce"]), 173 | NetLayer(name: "conv2_norm2", weights: "", shards: 1, top: "conv2_norm2", bottoms: ["conv2_3x3"]), 174 | NetLayer(name: "pool2_3x3_s2", weights: "", shards: 1, top: "pool2_3x3_s2", bottoms: ["conv2_norm2"]), 175 | NetLayer(name: "inception_3a_1x1", weights: "inception_3a_1x1", shards: 1, top: "inception_3a_1x1", bottoms: ["pool2_3x3_s2"]), 176 | NetLayer(name: "inception_3a_3x3_reduce", weights: "inception_3a_3x3_reduce", shards: 1, top: "inception_3a_3x3_reduce", bottoms: ["pool2_3x3_s2"]), 177 | NetLayer(name: "inception_3a_3x3", weights: "inception_3a_3x3", shards: 1, top: "inception_3a_3x3", bottoms: ["inception_3a_3x3_reduce"]), 178 | NetLayer(name: "inception_3a_5x5_reduce", weights: "inception_3a_5x5_reduce", shards: 1, top: "inception_3a_5x5_reduce", bottoms: ["pool2_3x3_s2"]), 179 | NetLayer(name: "inception_3a_5x5", weights: "inception_3a_5x5", shards: 1, top: "inception_3a_5x5", bottoms: ["inception_3a_5x5_reduce"]), 180 | NetLayer(name: "inception_3a_pool", weights: "", shards: 1, top: "inception_3a_pool", bottoms: ["pool2_3x3_s2"]), 181 | NetLayer(name: "inception_3a_pool_proj", weights: "inception_3a_pool_proj", shards: 1, top: "inception_3a_pool_proj", bottoms: ["inception_3a_pool"]), 182 | NetLayer(name: "inception_3a_output", weights: "", shards: 1, top: "inception_3a_output", bottoms: ["inception_3a_1x1", "inception_3a_3x3", "inception_3a_5x5", "inception_3a_pool_proj"]), 183 | NetLayer(name: "inception_3b_1x1", weights: "inception_3b_1x1", shards: 1, top: "inception_3b_1x1", bottoms: ["inception_3a_output"]), 184 | NetLayer(name: "inception_3b_3x3_reduce", weights: "inception_3b_3x3_reduce", shards: 1, top: "inception_3b_3x3_reduce", bottoms: ["inception_3a_output"]), 185 | NetLayer(name: "inception_3b_3x3", weights: "inception_3b_3x3", shards: 1, top: "inception_3b_3x3", bottoms: ["inception_3b_3x3_reduce"]), 186 | NetLayer(name: "inception_3b_5x5_reduce", weights: "inception_3b_5x5_reduce", shards: 1, top: "inception_3b_5x5_reduce", bottoms: ["inception_3a_output"]), 187 | NetLayer(name: "inception_3b_5x5", weights: "inception_3b_5x5", shards: 1, top: "inception_3b_5x5", bottoms: ["inception_3b_5x5_reduce"]), 188 | NetLayer(name: "inception_3b_pool", weights: "", shards: 1, top: "inception_3b_pool", bottoms: ["inception_3a_output"]), 189 | NetLayer(name: "inception_3b_pool_proj", weights: "inception_3b_pool_proj", shards: 1, top: "inception_3b_pool_proj", bottoms: ["inception_3b_pool"]), 190 | NetLayer(name: "inception_3b_output", weights: "", shards: 1, top: "inception_3b_output", bottoms: ["inception_3b_1x1", "inception_3b_3x3", "inception_3b_5x5", "inception_3b_pool_proj"]), 191 | NetLayer(name: "pool3_3x3_s2", weights: "", shards: 1, top: "pool3_3x3_s2", bottoms: ["inception_3b_output"]), 192 | NetLayer(name: "inception_4a_1x1", weights: "inception_4a_1x1", shards: 1, top: "inception_4a_1x1", bottoms: ["pool3_3x3_s2"]), 193 | NetLayer(name: "inception_4a_3x3_reduce", weights: "inception_4a_3x3_reduce", shards: 1, top: "inception_4a_3x3_reduce", bottoms: ["pool3_3x3_s2"]), 194 | NetLayer(name: "inception_4a_3x3", weights: "inception_4a_3x3", shards: 1, top: "inception_4a_3x3", bottoms: ["inception_4a_3x3_reduce"]), 195 | NetLayer(name: "inception_4a_5x5_reduce", weights: "inception_4a_5x5_reduce", shards: 1, top: "inception_4a_5x5_reduce", bottoms: ["pool3_3x3_s2"]), 196 | NetLayer(name: "inception_4a_5x5", weights: "inception_4a_5x5", shards: 1, top: "inception_4a_5x5", bottoms: ["inception_4a_5x5_reduce"]), 197 | NetLayer(name: "inception_4a_pool", weights: "", shards: 1, top: "inception_4a_pool", bottoms: ["pool3_3x3_s2"]), 198 | NetLayer(name: "inception_4a_pool_proj", weights: "inception_4a_pool_proj", shards: 1, top: "inception_4a_pool_proj", bottoms: ["inception_4a_pool"]), 199 | NetLayer(name: "inception_4a_output", weights: "", shards: 1, top: "inception_4a_output", bottoms: ["inception_4a_1x1", "inception_4a_3x3", "inception_4a_5x5", "inception_4a_pool_proj"]), 200 | NetLayer(name: "inception_4b_1x1", weights: "inception_4b_1x1", shards: 1, top: "inception_4b_1x1", bottoms: ["inception_4a_output"]), 201 | NetLayer(name: "inception_4b_3x3_reduce", weights: "inception_4b_3x3_reduce", shards: 1, top: "inception_4b_3x3_reduce", bottoms: ["inception_4a_output"]), 202 | NetLayer(name: "inception_4b_3x3", weights: "inception_4b_3x3", shards: 1, top: "inception_4b_3x3", bottoms: ["inception_4b_3x3_reduce"]), 203 | NetLayer(name: "inception_4b_5x5_reduce", weights: "inception_4b_5x5_reduce", shards: 1, top: "inception_4b_5x5_reduce", bottoms: ["inception_4a_output"]), 204 | NetLayer(name: "inception_4b_5x5", weights: "inception_4b_5x5", shards: 1, top: "inception_4b_5x5", bottoms: ["inception_4b_5x5_reduce"]), 205 | NetLayer(name: "inception_4b_pool", weights: "", shards: 1, top: "inception_4b_pool", bottoms: ["inception_4a_output"]), 206 | NetLayer(name: "inception_4b_pool_proj", weights: "inception_4b_pool_proj", shards: 1, top: "inception_4b_pool_proj", bottoms: ["inception_4b_pool"]), 207 | NetLayer(name: "inception_4b_output", weights: "", shards: 1, top: "inception_4b_output", bottoms: ["inception_4b_1x1", "inception_4b_3x3", "inception_4b_5x5", "inception_4b_pool_proj"]), 208 | NetLayer(name: "inception_4c_1x1", weights: "inception_4c_1x1", shards: 1, top: "inception_4c_1x1", bottoms: ["inception_4b_output"]), 209 | NetLayer(name: "inception_4c_3x3_reduce", weights: "inception_4c_3x3_reduce", shards: 1, top: "inception_4c_3x3_reduce", bottoms: ["inception_4b_output"]), 210 | NetLayer(name: "inception_4c_3x3", weights: "inception_4c_3x3", shards: 1, top: "inception_4c_3x3", bottoms: ["inception_4c_3x3_reduce"]), 211 | NetLayer(name: "inception_4c_5x5_reduce", weights: "inception_4c_5x5_reduce", shards: 1, top: "inception_4c_5x5_reduce", bottoms: ["inception_4b_output"]), 212 | NetLayer(name: "inception_4c_5x5", weights: "inception_4c_5x5", shards: 1, top: "inception_4c_5x5", bottoms: ["inception_4c_5x5_reduce"]), 213 | NetLayer(name: "inception_4c_pool", weights: "", shards: 1, top: "inception_4c_pool", bottoms: ["inception_4b_output"]), 214 | NetLayer(name: "inception_4c_pool_proj", weights: "inception_4c_pool_proj", shards: 1, top: "inception_4c_pool_proj", bottoms: ["inception_4c_pool"]), 215 | NetLayer(name: "inception_4c_output", weights: "", shards: 1, top: "inception_4c_output", bottoms: ["inception_4c_1x1", "inception_4c_3x3", "inception_4c_5x5", "inception_4c_pool_proj"]), 216 | NetLayer(name: "inception_4d_1x1", weights: "inception_4d_1x1", shards: 1, top: "inception_4d_1x1", bottoms: ["inception_4c_output"]), 217 | NetLayer(name: "inception_4d_3x3_reduce", weights: "inception_4d_3x3_reduce", shards: 1, top: "inception_4d_3x3_reduce", bottoms: ["inception_4c_output"]), 218 | NetLayer(name: "inception_4d_3x3", weights: "inception_4d_3x3", shards: 1, top: "inception_4d_3x3", bottoms: ["inception_4d_3x3_reduce"]), 219 | NetLayer(name: "inception_4d_5x5_reduce", weights: "inception_4d_5x5_reduce", shards: 1, top: "inception_4d_5x5_reduce", bottoms: ["inception_4c_output"]), 220 | NetLayer(name: "inception_4d_5x5", weights: "inception_4d_5x5", shards: 1, top: "inception_4d_5x5", bottoms: ["inception_4d_5x5_reduce"]), 221 | NetLayer(name: "inception_4d_pool", weights: "", shards: 1, top: "inception_4d_pool", bottoms: ["inception_4c_output"]), 222 | NetLayer(name: "inception_4d_pool_proj", weights: "inception_4d_pool_proj", shards: 1, top: "inception_4d_pool_proj", bottoms: ["inception_4d_pool"]), 223 | NetLayer(name: "inception_4d_output", weights: "", shards: 1, top: "inception_4d_output", bottoms: ["inception_4d_1x1", "inception_4d_3x3", "inception_4d_5x5", "inception_4d_pool_proj"]), 224 | NetLayer(name: "inception_4e_1x1", weights: "inception_4e_1x1", shards: 1, top: "inception_4e_1x1", bottoms: ["inception_4d_output"]), 225 | NetLayer(name: "inception_4e_3x3_reduce", weights: "inception_4e_3x3_reduce", shards: 1, top: "inception_4e_3x3_reduce", bottoms: ["inception_4d_output"]), 226 | NetLayer(name: "inception_4e_3x3", weights: "inception_4e_3x3", shards: 1, top: "inception_4e_3x3", bottoms: ["inception_4e_3x3_reduce"]), 227 | NetLayer(name: "inception_4e_5x5_reduce", weights: "inception_4e_5x5_reduce", shards: 1, top: "inception_4e_5x5_reduce", bottoms: ["inception_4d_output"]), 228 | NetLayer(name: "inception_4e_5x5", weights: "inception_4e_5x5", shards: 1, top: "inception_4e_5x5", bottoms: ["inception_4e_5x5_reduce"]), 229 | NetLayer(name: "inception_4e_pool", weights: "", shards: 1, top: "inception_4e_pool", bottoms: ["inception_4d_output"]), 230 | NetLayer(name: "inception_4e_pool_proj", weights: "inception_4e_pool_proj", shards: 1, top: "inception_4e_pool_proj", bottoms: ["inception_4e_pool"]), 231 | NetLayer(name: "inception_4e_output", weights: "", shards: 1, top: "inception_4e_output", bottoms: ["inception_4e_1x1", "inception_4e_3x3", "inception_4e_5x5", "inception_4e_pool_proj"]), 232 | NetLayer(name: "pool4_3x3_s2", weights: "", shards: 1, top: "pool4_3x3_s2", bottoms: ["inception_4e_output"]), 233 | NetLayer(name: "inception_5a_1x1", weights: "inception_5a_1x1", shards: 1, top: "inception_5a_1x1", bottoms: ["pool4_3x3_s2"]), 234 | NetLayer(name: "inception_5a_3x3_reduce", weights: "inception_5a_3x3_reduce", shards: 1, top: "inception_5a_3x3_reduce", bottoms: ["pool4_3x3_s2"]), 235 | NetLayer(name: "inception_5a_3x3", weights: "inception_5a_3x3", shards: 1, top: "inception_5a_3x3", bottoms: ["inception_5a_3x3_reduce"]), 236 | NetLayer(name: "inception_5a_5x5_reduce", weights: "inception_5a_5x5_reduce", shards: 1, top: "inception_5a_5x5_reduce", bottoms: ["pool4_3x3_s2"]), 237 | NetLayer(name: "inception_5a_5x5", weights: "inception_5a_5x5", shards: 1, top: "inception_5a_5x5", bottoms: ["inception_5a_5x5_reduce"]), 238 | NetLayer(name: "inception_5a_pool", weights: "", shards: 1, top: "inception_5a_pool", bottoms: ["pool4_3x3_s2"]), 239 | NetLayer(name: "inception_5a_pool_proj", weights: "inception_5a_pool_proj", shards: 1, top: "inception_5a_pool_proj", bottoms: ["inception_5a_pool"]), 240 | NetLayer(name: "inception_5a_output", weights: "", shards: 1, top: "inception_5a_output", bottoms: ["inception_5a_1x1", "inception_5a_3x3", "inception_5a_5x5", "inception_5a_pool_proj"]), 241 | NetLayer(name: "inception_5b_1x1", weights: "inception_5b_1x1", shards: 1, top: "inception_5b_1x1", bottoms: ["inception_5a_output"]), 242 | NetLayer(name: "inception_5b_3x3_reduce", weights: "inception_5b_3x3_reduce", shards: 1, top: "inception_5b_3x3_reduce", bottoms: ["inception_5a_output"]), 243 | NetLayer(name: "inception_5b_3x3", weights: "inception_5b_3x3", shards: 1, top: "inception_5b_3x3", bottoms: ["inception_5b_3x3_reduce"]), 244 | NetLayer(name: "inception_5b_5x5_reduce", weights: "inception_5b_5x5_reduce", shards: 1, top: "inception_5b_5x5_reduce", bottoms: ["inception_5a_output"]), 245 | NetLayer(name: "inception_5b_5x5", weights: "inception_5b_5x5", shards: 1, top: "inception_5b_5x5", bottoms: ["inception_5b_5x5_reduce"]), 246 | NetLayer(name: "inception_5b_pool", weights: "", shards: 1, top: "inception_5b_pool", bottoms: ["inception_5a_output"]), 247 | NetLayer(name: "inception_5b_pool_proj", weights: "inception_5b_pool_proj", shards: 1, top: "inception_5b_pool_proj", bottoms: ["inception_5b_pool"]), 248 | NetLayer(name: "inception_5b_output", weights: "", shards: 1, top: "inception_5b_output", bottoms: ["inception_5b_1x1", "inception_5b_3x3", "inception_5b_5x5", "inception_5b_pool_proj"]), 249 | NetLayer(name: "pool5_7x7_s1", weights: "", shards: 1, top: "pool5_7x7_s1", bottoms: ["inception_5b_output"]), 250 | // NetLayer(name: "loss3_classifier", weights: "loss3_classifier", shards: 1, top: "loss3_classifier", bottoms: ["pool5_7x7_s1"]), 251 | // NetLayer(name: "prob", weights: "", shards: 1, top: "prob", bottoms: ["loss3_classifier"]), 252 | ] 253 | } 254 | } 255 | -------------------------------------------------------------------------------- /MetalDetector/GoogLeNetProfile.swift: -------------------------------------------------------------------------------- 1 | // 2 | // GoogLeNetProfile.swift 3 | // MetalDetector 4 | // 5 | // Created by Ivan Krasin on 10/31/15. 6 | // Copyright © 2015 Ivan Krasin. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | import Metal 11 | 12 | public class GoogLeNetProfile { 13 | public static func GetThreadsPerThreadgroup() -> [String: MTLSize] { 14 | return [ 15 | "conv1_7x7_s2": MTLSizeMake(4, 1, 1), // 15.8 ms vs 27.5 ms for 16x16x1 16 | "pool1_3x3_s2": MTLSizeMake(16, 16, 1), // 2.0 ms vs 2.0 ms for 16x16x1 17 | "pool1_norm1": MTLSizeMake(3, 4, 1), // 2.2 ms vs 2.3 ms for 16x16x1 18 | "conv2_3x3_reduce": MTLSizeMake(4, 1, 1), // 5.3 ms vs 5.9 ms for 16x16x1 19 | "conv2_3x3": MTLSizeMake(4, 1, 1), // 59.4 ms vs 66.5 ms for 16x16x1 20 | "conv2_norm2": MTLSizeMake(16, 16, 1), // 2.2 ms vs 2.2 ms for 16x16x1 21 | "pool2_3x3_s2": MTLSizeMake(2, 4, 1), // 3.2 ms vs 3.6 ms for 16x16x1 22 | "inception_3a_1x1": MTLSizeMake(4, 4, 1), // 5.6 ms vs 6.1 ms for 16x16x1 23 | "inception_3a_3x3_reduce": MTLSizeMake(4, 4, 1), // 7.3 ms vs 8.2 ms for 16x16x1 24 | "inception_3a_3x3": MTLSizeMake(3, 1, 1), // 16.8 ms vs 34.9 ms for 16x16x1 25 | "inception_3a_5x5_reduce": MTLSizeMake(8, 8, 1), // 1.9 ms vs 2.0 ms for 16x16x1 26 | "inception_3a_5x5": MTLSizeMake(2, 4, 1), // 4.6 ms vs 6.0 ms for 16x16x1 27 | "inception_3a_pool": MTLSizeMake(2, 4, 1), // 3.2 ms vs 3.5 ms for 16x16x1 28 | "inception_3a_pool_proj": MTLSizeMake(2, 4, 1), // 3.2 ms vs 4.2 ms for 16x16x1 29 | "inception_3b_1x1": MTLSizeMake(4, 3, 1), // 10.3 ms vs 12.4 ms for 16x16x1 30 | "inception_3b_3x3_reduce": MTLSizeMake(4, 2, 1), // 10.3 ms vs 12.2 ms for 16x16x1 31 | "inception_3b_3x3": MTLSizeMake(4, 8, 1), // 29.9 ms vs 36.5 ms for 16x16x1 32 | "inception_3b_5x5_reduce": MTLSizeMake(8, 8, 1), // 2.6 ms vs 2.9 ms for 16x16x1 33 | "inception_3b_5x5": MTLSizeMake(4, 4, 1), // 15.9 ms vs 30.5 ms for 16x16x1 34 | "inception_3b_pool": MTLSizeMake(16, 16, 1), // 2.8 ms vs 2.8 ms for 16x16x1 35 | "inception_3b_pool_proj": MTLSizeMake(16, 8, 1), // 7.2 ms vs 7.8 ms for 16x16x1 36 | "pool3_3x3_s2": MTLSizeMake(1, 2, 1), // 5.3 ms vs 5.6 ms for 16x16x1 37 | "inception_4a_1x1": MTLSizeMake(1, 1, 1), // 18.0 ms vs 28.2 ms for 16x16x1 38 | "inception_4a_3x3_reduce": MTLSizeMake(8, 8, 1), // 11.8 ms vs 12.7 ms for 16x16x1 39 | "inception_4a_3x3": MTLSizeMake(3, 1, 1), // 25.3 ms vs 34.9 ms for 16x16x1 40 | "inception_4a_5x5_reduce": MTLSizeMake(8, 8, 1), // 2.2 ms vs 2.5 ms for 16x16x1 41 | "inception_4a_5x5": MTLSizeMake(2, 1, 1), // 6.2 ms vs 8.0 ms for 16x16x1 42 | "inception_4a_pool": MTLSizeMake(8, 4, 1), // 4.9 ms vs 5.3 ms for 16x16x1 43 | "inception_4a_pool_proj": MTLSizeMake(2, 1, 1), // 9.2 ms vs 10.8 ms for 16x16x1 44 | "inception_4b_1x1": MTLSizeMake(1, 1, 1), // 17.3 ms vs 25.4 ms for 16x16x1 45 | "inception_4b_3x3_reduce": MTLSizeMake(16, 16, 1), // 14.0 ms vs 14.0 ms for 16x16x1 46 | "inception_4b_3x3": MTLSizeMake(4, 8, 1), // 32.0 ms vs 36.0 ms for 16x16x1 47 | "inception_4b_5x5_reduce": MTLSizeMake(8, 8, 1), // 2.8 ms vs 3.2 ms for 16x16x1 48 | "inception_4b_5x5": MTLSizeMake(2, 1, 1), // 10.7 ms vs 13.6 ms for 16x16x1 49 | "inception_4b_pool": MTLSizeMake(16, 16, 1), // 5.5 ms vs 5.5 ms for 16x16x1 50 | "inception_4b_pool_proj": MTLSizeMake(8, 8, 1), // 9.6 ms vs 11.5 ms for 16x16x1 51 | "inception_4c_1x1": MTLSizeMake(2, 4, 1), // 17.4 ms vs 20.5 ms for 16x16x1 52 | "inception_4c_3x3_reduce": MTLSizeMake(8, 4, 1), // 17.2 ms vs 21.7 ms for 16x16x1 53 | "inception_4c_3x3": MTLSizeMake(3, 1, 1), // 43.5 ms vs 46.5 ms for 16x16x1 54 | "inception_4c_5x5_reduce": MTLSizeMake(8, 8, 1), // 2.7 ms vs 3.3 ms for 16x16x1 55 | "inception_4c_5x5": MTLSizeMake(2, 1, 1), // 10.7 ms vs 14.0 ms for 16x16x1 56 | "inception_4c_pool": MTLSizeMake(16, 16, 1), // 5.5 ms vs 5.5 ms for 16x16x1 57 | "inception_4c_pool_proj": MTLSizeMake(8, 16, 1), // 9.6 ms vs 11.5 ms for 16x16x1 58 | "inception_4d_1x1": MTLSizeMake(1, 1, 1), // 16.0 ms vs 19.1 ms for 16x16x1 59 | "inception_4d_3x3_reduce": MTLSizeMake(8, 16, 1), // 17.2 ms vs 23.0 ms for 16x16x1 60 | "inception_4d_3x3": MTLSizeMake(8, 8, 1), // 59.0 ms vs 59.0 ms for 16x16x1 61 | "inception_4d_5x5_reduce": MTLSizeMake(8, 8, 1), // 2.9 ms vs 4.0 ms for 16x16x1 62 | "inception_4d_5x5": MTLSizeMake(3, 1, 1), // 13.6 ms vs 17.8 ms for 16x16x1 63 | "inception_4d_pool": MTLSizeMake(8, 8, 1), // 5.3 ms vs 5.6 ms for 16x16x1 64 | "inception_4d_pool_proj": MTLSizeMake(4, 4, 1), // 9.7 ms vs 11.4 ms for 16x16x1 65 | "inception_4e_1x1": MTLSizeMake(1, 2, 1), // 18.4 ms vs 32.0 ms for 16x16x1 66 | "inception_4e_3x3_reduce": MTLSizeMake(16, 16, 1), // 14.7 ms vs 14.7 ms for 16x16x1 67 | "inception_4e_3x3": MTLSizeMake(3, 1, 1), // 64.5 ms vs 71.5 ms for 16x16x1 68 | "inception_4e_5x5_reduce": MTLSizeMake(8, 8, 1), // 2.9 ms vs 3.9 ms for 16x16x1 69 | "inception_4e_5x5": MTLSizeMake(2, 2, 1), // 17.5 ms vs 34.4 ms for 16x16x1 70 | "inception_4e_pool": MTLSizeMake(16, 16, 1), // 4.1 ms vs 4.1 ms for 16x16x1 71 | "inception_4e_pool_proj": MTLSizeMake(1, 1, 1), // 17.9 ms vs 21.5 ms for 16x16x1 72 | "pool4_3x3_s2": MTLSizeMake(3, 3, 1), // 7.0 ms vs 7.8 ms for 16x16x1 73 | "inception_5a_1x1": MTLSizeMake(8, 4, 1), // 26.9 ms vs 38.6 ms for 16x16x1 74 | "inception_5a_3x3_reduce": MTLSizeMake(16, 16, 1), // 16.3 ms vs 16.3 ms for 16x16x1 75 | "inception_5a_3x3": MTLSizeMake(8, 4, 1), // 60.5 ms vs 64.1 ms for 16x16x1 76 | "inception_5a_5x5_reduce": MTLSizeMake(16, 16, 1), // 3.5 ms vs 3.5 ms for 16x16x1 77 | "inception_5a_5x5": MTLSizeMake(4, 4, 1), // 15.2 ms vs 26.4 ms for 16x16x1 78 | "inception_5a_pool": MTLSizeMake(3, 1, 1), // 6.5 ms vs 7.2 ms for 16x16x1 79 | "inception_5a_pool_proj": MTLSizeMake(3, 3, 1), // 17.8 ms vs 25.4 ms for 16x16x1 80 | "inception_5b_1x1": MTLSizeMake(8, 8, 1), // 46.9 ms vs 46.9 ms for 16x16x1 81 | "inception_5b_3x3_reduce": MTLSizeMake(4, 4, 1), // 18.4 ms vs 20.4 ms for 16x16x1 82 | "inception_5b_3x3": MTLSizeMake(2, 4, 1), // 90.0 ms vs 96.4 ms for 16x16x1 83 | "inception_5b_5x5_reduce": MTLSizeMake(16, 16, 1), // 5.9 ms vs 5.9 ms for 16x16x1 84 | "inception_5b_5x5": MTLSizeMake(3, 3, 1), // 17.1 ms vs 31.2 ms for 16x16x1 85 | "inception_5b_pool": MTLSizeMake(16, 16, 1), // 5.0 ms vs 5.0 ms for 16x16x1 86 | "inception_5b_pool_proj": MTLSizeMake(2, 1, 1), // 17.5 ms vs 25.2 ms for 16x16x1 87 | "pool5_7x7_s1": MTLSizeMake(4, 4, 1), // 16.8 ms vs 22.3 ms for 16x16x1 88 | ] 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /MetalDetector/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | en 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | APPL 17 | CFBundleShortVersionString 18 | 1.0 19 | CFBundleSignature 20 | ???? 21 | CFBundleVersion 22 | 1 23 | LSRequiresIPhoneOS 24 | 25 | UILaunchStoryboardName 26 | LaunchScreen 27 | UIMainStoryboardFile 28 | Main 29 | UIRequiredDeviceCapabilities 30 | 31 | armv7 32 | 33 | UISupportedInterfaceOrientations 34 | 35 | UIInterfaceOrientationPortrait 36 | UIInterfaceOrientationLandscapeLeft 37 | UIInterfaceOrientationLandscapeRight 38 | 39 | UISupportedInterfaceOrientations~ipad 40 | 41 | UIInterfaceOrientationPortrait 42 | UIInterfaceOrientationPortraitUpsideDown 43 | UIInterfaceOrientationLandscapeLeft 44 | UIInterfaceOrientationLandscapeRight 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /MetalDetector/Net.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Net.swift 3 | // MetalDetector 4 | // 5 | // Created by Ivan Krasin on 10/8/15. 6 | // Copyright © 2015 Ivan Krasin. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | import Metal 11 | 12 | func initBlob(device : MTLDevice, channels : Int, height : Int, width : Int) -> MTLTexture { 13 | let desc = MTLTextureDescriptor() 14 | desc.textureType = MTLTextureType.Type2DArray 15 | desc.height = height 16 | desc.width = width 17 | desc.pixelFormat = MTLPixelFormat.R16Float 18 | desc.arrayLength = channels 19 | return device.newTextureWithDescriptor(desc) 20 | } 21 | 22 | func initBufferFromBundle(device : MTLDevice, named: String) -> MTLBuffer { 23 | let data = getBytesFromBundle(named) 24 | return device.newBufferWithBytes(data.bytes, length: data.length, options: .StorageModeShared) 25 | } 26 | 27 | func subBlob(input : MTLTexture, from : Int, to : Int) -> MTLTexture { 28 | return input.newTextureViewWithPixelFormat(MTLPixelFormat.R16Float, 29 | textureType: MTLTextureType.Type2DArray, 30 | levels: NSMakeRange(0, 1), 31 | slices: NSMakeRange(from, to-from)) 32 | } 33 | 34 | public func getBytesFromBundle(named: String) -> NSData { 35 | let path = NSBundle.mainBundle().pathForResource(named, ofType: "") 36 | if path == nil { 37 | print("Resource \(named) not found in the main bundle") 38 | exit(1) 39 | } 40 | let data: NSData? = NSData(contentsOfFile: path!) 41 | if data == nil { 42 | print("Could not read from file \(path)") 43 | exit(1) 44 | } 45 | print("\(data!.length) bytes loaded from file \(path)") 46 | return data! 47 | } 48 | 49 | public func getHalfArrayFromBundle(engine: Engine, named: String) -> [UInt8] { 50 | let data = getBytesFromBundle(named) 51 | let ptr = UnsafePointer(data.bytes) 52 | print("getHalfArrayFromBundle(\(named)), data.length: \(data.length)") 53 | let buf = engine.metalDevice!.newBufferWithBytes(ptr.advancedBy(0), length: data.length, options: .StorageModeShared) 54 | return engine.Float2Half(buf) 55 | } 56 | 57 | public func loadLabels(named: String) -> [String] { 58 | let path = NSBundle.mainBundle().pathForResource(named, ofType: "") 59 | if path == nil { 60 | print("Resource \(named) not found in the main bundle") 61 | exit(1) 62 | } 63 | do { 64 | let content = try String(contentsOfFile:path!, encoding: NSUTF8StringEncoding) 65 | return content.componentsSeparatedByString("\n") 66 | } catch _ as NSError { 67 | // TODO: display error message 68 | print("Failed to load labels from file \(path): ") 69 | exit(1) 70 | } 71 | } 72 | 73 | public struct NetLayer { 74 | public var name: String 75 | public var weights: String 76 | public var shards: Int 77 | public var top: String 78 | public var bottoms: [String] 79 | } 80 | 81 | public protocol NetConfig { 82 | func GetLayers() -> [NetLayer] 83 | func CreateBlobs(device: MTLDevice) -> [String: MTLTexture] 84 | func CreateWeights(engine: Engine) -> [String: MTLBuffer] 85 | } 86 | 87 | public class Net { 88 | var engine : Engine 89 | public var layers : [NetLayer] 90 | public var blobs : [String: MTLTexture] 91 | public var weights : [String: MTLBuffer] 92 | public var threadsPerThreadgroup: [String: MTLSize] 93 | public var labels : [String] 94 | 95 | public var L1 = [String: Float]() 96 | public var L2 = [String: Float]() 97 | 98 | public init(engine: Engine, config: NetConfig, threadsPerThreadgroup: [String: MTLSize]) { 99 | self.engine = engine 100 | self.layers = config.GetLayers() 101 | self.blobs = config.CreateBlobs(engine.metalDevice!) 102 | self.weights = config.CreateWeights(engine) 103 | self.threadsPerThreadgroup = threadsPerThreadgroup 104 | self.labels = loadLabels("synset_words.txt") 105 | print("Loaded \(self.labels.count) labels") 106 | } 107 | 108 | public func forward(input: MTLTexture) -> [Float] { 109 | let commandBuffer = engine.commandQueue!.commandBuffer() 110 | 111 | engine.Preprocess(commandBuffer, input: input, output: blobs["data"]!) 112 | 113 | for layer in layers { 114 | if layer.bottoms.count != 1 { 115 | continue 116 | } 117 | var w : MTLBuffer? = nil 118 | if layer.weights != "" { 119 | w = weights[layer.weights] 120 | if w == nil { 121 | print("Weights \(layer.weights) for layer \(layer.name) not found") 122 | exit(1) 123 | } 124 | } 125 | for i in 0...layer.shards-1 { 126 | var cell = self.threadsPerThreadgroup[layer.name] 127 | if cell == nil { 128 | cell = MTLSizeMake(16, 16, 1) 129 | } else { 130 | //print("Using profiled cell size for layer \(layer.name): \(cell!.width)x\(cell!.height)x\(cell!.depth)") 131 | } 132 | engine.UnaryLayer(commandBuffer, 133 | name: "\(layer.name)_\(i)", 134 | weights: w, 135 | input: blobs[layer.bottoms[0]]!, 136 | output: blobs[layer.top]!, 137 | threadsPerThreadgroup: cell!) 138 | } 139 | } 140 | 141 | engine.PerFilterLayer(commandBuffer, name: "loss3_classifier_0", 142 | weights: weights["loss3_classifier"]!, numFilters: 1000, 143 | input: blobs["pool5_7x7_s1"]!, output: blobs["loss3_classifier"]!) 144 | engine.PerFilterLayer(commandBuffer, name: "prob_0", 145 | weights: nil, numFilters: 1, 146 | input: blobs["loss3_classifier"]!, output: blobs["prob"]!) 147 | 148 | commandBuffer.commit(); 149 | commandBuffer.waitUntilCompleted() 150 | 151 | //print("Status: \(commandBuffer.status.rawValue)") 152 | //print("May be error: \(commandBuffer.error)") 153 | let res = engine.ExtractResult(blobs["prob"]!) 154 | return res 155 | } 156 | 157 | func FindLayer(layerName: String) -> NetLayer? { 158 | for layer in layers { 159 | if layer.name == layerName { 160 | return layer 161 | } 162 | } 163 | return nil 164 | } 165 | 166 | func tryLayer(layer: NetLayer, w: MTLBuffer?, threadsPerThreadgroup: MTLSize) -> Double? { 167 | if threadsPerThreadgroup.depth != 1 { 168 | return nil 169 | } 170 | let total = threadsPerThreadgroup.width * threadsPerThreadgroup.height * threadsPerThreadgroup.depth 171 | let state = engine.LoadKernelState("\(layer.name)_0") 172 | if total > state.maxTotalThreadsPerThreadgroup { 173 | return nil 174 | } 175 | var info = mach_timebase_info(numer: 0, denom: 0) 176 | mach_timebase_info(&info) 177 | let time_base = Double(info.numer) / Double(info.denom) 178 | var sumWorkTimeNs: Double = 0 179 | for i in -1...10 { 180 | let commandBuffer = engine.commandQueue!.commandBuffer() 181 | let startTime = mach_absolute_time() 182 | for i in 0...layer.shards-1 { 183 | engine.UnaryLayer(commandBuffer, 184 | name: "\(layer.name)_\(i)", 185 | weights: w, 186 | input: blobs[layer.bottoms[0]]!, 187 | output: blobs[layer.top]!, 188 | threadsPerThreadgroup: threadsPerThreadgroup) 189 | } 190 | commandBuffer.commit(); 191 | commandBuffer.waitUntilCompleted() 192 | let workTimeNs = Double(mach_absolute_time() - startTime) * time_base 193 | if commandBuffer.error != nil { 194 | return nil 195 | } 196 | if i > 0 { 197 | // TODO: exclude outliers, compute std deviation. 198 | sumWorkTimeNs += workTimeNs 199 | //let workTimeMsStr = NSString(format: "%.1f", workTimeNs / 1E6) 200 | //print("tryLayer(\"\(layer.name)\"): \(workTimeMsStr) ms") 201 | } 202 | } 203 | let aveWorkTimeNs = sumWorkTimeNs / 10 204 | return aveWorkTimeNs 205 | } 206 | 207 | public func ProfileLayer(layerName: String) { 208 | let layer = FindLayer(layerName) 209 | if layer == nil { 210 | print("ProfileLayer(\"\(layerName)\"): layer not found") 211 | exit(1) 212 | } 213 | var w : MTLBuffer? = nil 214 | if layer!.weights != "" { 215 | w = weights[layer!.weights] 216 | if w == nil { 217 | print("Weights \(layer!.weights) for layer \(layer!.name) not found") 218 | exit(1) 219 | } 220 | } 221 | let cells: [MTLSize] = [ MTLSizeMake(16, 16, 1), MTLSizeMake(8, 8, 1), MTLSizeMake(16, 8, 1), 222 | MTLSizeMake(8, 16, 1), MTLSizeMake(32, 16, 1), MTLSizeMake(16, 32, 1), MTLSizeMake(32, 32, 1), 223 | MTLSizeMake(4, 4, 1), MTLSizeMake(8, 4, 1), MTLSizeMake(4, 8, 1), MTLSizeMake(3, 3, 1), 224 | MTLSizeMake(4, 3, 1), MTLSizeMake(3, 4, 1), MTLSizeMake(4, 2, 1), MTLSizeMake(2, 4, 1), 225 | MTLSizeMake(2, 2, 1), MTLSizeMake(1, 1, 1), MTLSizeMake(2, 1, 1), MTLSizeMake(1, 2, 1), 226 | MTLSizeMake(3, 1, 1), MTLSizeMake(4, 1, 1)] 227 | 228 | var firstTimeNs: Double = 0 229 | var minTimeNs: Double = 0 230 | var bestCell: MTLSize? 231 | for cell in cells { 232 | let layerTimeNs = tryLayer(layer!, w: w, threadsPerThreadgroup: cell) 233 | if layerTimeNs == nil { 234 | // print("ProfileLayer(\"\(layer!.name)\"), cell: \(cell): failed") 235 | continue 236 | } 237 | if bestCell == nil { 238 | minTimeNs = layerTimeNs! 239 | firstTimeNs = layerTimeNs! 240 | bestCell = cell 241 | continue 242 | } 243 | // We want to be conservative about choosing alternative cell size. 244 | // There must be at least 5% improvement over the default choice of 16x16x1. 245 | if 0.95 * firstTimeNs > layerTimeNs && minTimeNs > layerTimeNs { 246 | minTimeNs = layerTimeNs! 247 | bestCell = cell 248 | } 249 | // let layerTimeMsStr = NSString(format: "%.1f", layerTimeNs! / 1E6) 250 | // print("ProfileLayer(\"\(layer!.name)\"), cell: \(cell): \(layerTimeMsStr) ms") 251 | } 252 | let minTimeMsStr = NSString(format: "%.1f", minTimeNs / 1E6) 253 | let firstTimeMsStr = NSString(format: "%.1f", firstTimeNs / 1E6) 254 | //print("\"\(layer!.name)\", best cell: \(bestCell!.width)x\(bestCell!.height)x\(bestCell!.depth), \(minTimeMsStr) ms vs \(firstTimeMsStr) ms for 16x16x1") 255 | // "inception_5b_pool_proj": MTLSizeMake(3, 3, 1), 256 | //if bestCell!.width != 16 || bestCell!.height != 16 || bestCell!.depth != 1 { 257 | print("\"\(layer!.name)\": MTLSizeMake(\(bestCell!.width), \(bestCell!.height), \(bestCell!.depth)), // \(minTimeMsStr) ms vs \(firstTimeMsStr) ms for 16x16x1") 258 | //} 259 | } 260 | } 261 | -------------------------------------------------------------------------------- /MetalDetector/ViewController.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ViewController.swift 3 | // MetalDetector 4 | // 5 | // Created by Ivan Krasin on 9/27/15. 6 | // Copyright © 2015 Ivan Krasin. All rights reserved. 7 | // 8 | 9 | import AVFoundation 10 | import Darwin 11 | import Metal 12 | import MetalKit 13 | import MetalPerformanceShaders 14 | import UIKit 15 | 16 | 17 | class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDelegate { 18 | // Size of the input image for the neural net. 19 | let windowSize = 224 20 | 21 | var previewLayer: AVCaptureVideoPreviewLayer? 22 | var frameCount = 0 23 | var running: Bool = false 24 | 25 | var metalDevice: MTLDevice? 26 | var textureCache : Unmanaged? 27 | var input: MTLTexture? 28 | var output : MTLTexture? 29 | var outputBGRA : MTLTexture? 30 | 31 | var engine: Engine? 32 | var net : Net? 33 | 34 | @IBOutlet weak var infoLabel: UILabel! 35 | 36 | override func viewDidLoad() { 37 | super.viewDidLoad() 38 | 39 | if NSProcessInfo.processInfo().environment["SAMOFLY_UNIT_TESTS"] == nil { 40 | initMetal() 41 | } 42 | 43 | // Initialize UI 44 | infoLabel.layer.cornerRadius=8.0; 45 | infoLabel.clipsToBounds = true 46 | 47 | // Initialize video recorder 48 | let session = AVCaptureSession() 49 | session.sessionPreset = AVCaptureSessionPreset352x288 50 | 51 | // Prepare input 52 | do { 53 | let camera = AVCaptureDevice.defaultDeviceWithMediaType(AVMediaTypeVideo) 54 | let input = try AVCaptureDeviceInput(device: camera) 55 | session.addInput(input) 56 | } catch { 57 | print("Can't get the camera input") 58 | exit(1) 59 | } 60 | 61 | // Prepare output 62 | let out = AVCaptureVideoDataOutput() 63 | out.videoSettings = [kCVPixelBufferPixelFormatTypeKey as String: Int(kCVPixelFormatType_32BGRA)] 64 | out.alwaysDiscardsLateVideoFrames = true 65 | let queue = dispatch_queue_create("input frames queue", DISPATCH_QUEUE_SERIAL) 66 | out.setSampleBufferDelegate(self, queue:queue) 67 | if !session.canAddOutput(out) { 68 | print("Can't add video preview output") 69 | exit(1) 70 | } 71 | session.addOutput(out) 72 | 73 | // Create and register the video preview layer 74 | previewLayer = AVCaptureVideoPreviewLayer(session: session) 75 | previewLayer!.videoGravity = AVLayerVideoGravityResizeAspectFill 76 | previewLayer!.zPosition = -1 77 | view.layer.addSublayer(previewLayer!) 78 | 79 | if NSProcessInfo.processInfo().environment["SAMOFLY_UNIT_TESTS"] == nil { 80 | session.startRunning() 81 | } 82 | } 83 | 84 | override func viewDidLayoutSubviews() { 85 | previewLayer!.frame = view.bounds 86 | let connection = previewLayer!.connection 87 | // Make sure the preview layer is always correctly oriented. 88 | if connection.supportsVideoOrientation { 89 | let statusBarOrientation = UIApplication.sharedApplication().statusBarOrientation 90 | switch statusBarOrientation { 91 | case UIInterfaceOrientation.Portrait: 92 | connection.videoOrientation = AVCaptureVideoOrientation.Portrait 93 | case UIInterfaceOrientation.PortraitUpsideDown: 94 | connection.videoOrientation = AVCaptureVideoOrientation.PortraitUpsideDown 95 | case UIInterfaceOrientation.LandscapeLeft: 96 | connection.videoOrientation = AVCaptureVideoOrientation.LandscapeLeft 97 | case UIInterfaceOrientation.LandscapeRight: 98 | connection.videoOrientation = AVCaptureVideoOrientation.LandscapeRight 99 | default: 100 | connection.videoOrientation = AVCaptureVideoOrientation.Portrait 101 | } 102 | } 103 | } 104 | 105 | private func initMetal() { 106 | engine = Engine() 107 | net = Net(engine: engine!, config: GoogLeNetConfig(), 108 | threadsPerThreadgroup: GoogLeNetProfile.GetThreadsPerThreadgroup()) 109 | metalDevice = MTLCreateSystemDefaultDevice() 110 | CVMetalTextureCacheCreate(kCFAllocatorDefault, nil, metalDevice!, nil, &textureCache) 111 | 112 | // init output texture 113 | let outputDesc = MTLTextureDescriptor() 114 | outputDesc.textureType = MTLTextureType.Type2DArray 115 | outputDesc.height = windowSize 116 | outputDesc.width = windowSize 117 | outputDesc.pixelFormat = MTLPixelFormat.R16Float 118 | outputDesc.arrayLength = 3 119 | output = metalDevice!.newTextureWithDescriptor(outputDesc) 120 | 121 | let outputDescBGRA = MTLTextureDescriptor.texture2DDescriptorWithPixelFormat( 122 | MTLPixelFormat.BGRA8Unorm, width:windowSize, height:windowSize, mipmapped:false) 123 | outputBGRA = metalDevice!.newTextureWithDescriptor(outputDescBGRA) 124 | } 125 | 126 | func argMax(arr : [Float], labels: [String]) -> (Int, Float) { 127 | var maxv : Float = arr[0] 128 | var idx : Int = 0 129 | for i in 0...arr.count-1 { 130 | var cur = arr[i] 131 | if labels[i].hasPrefix("-") { 132 | // Penaltize the label, as it's very unlikely 133 | cur /= 6 134 | } 135 | if cur > maxv { 136 | maxv = cur 137 | idx = i 138 | } 139 | } 140 | return (idx, maxv) 141 | } 142 | 143 | func captureOutput(captureOutput: AVCaptureOutput!, 144 | didOutputSampleBuffer sampleBuffer: CMSampleBuffer!, 145 | fromConnection connection: AVCaptureConnection!) { 146 | objc_sync_enter(self) 147 | if self.running { 148 | objc_sync_exit(self) 149 | return 150 | } 151 | self.frameCount++ 152 | if self.frameCount < 2 { 153 | objc_sync_exit(self) 154 | return 155 | } 156 | self.running = true 157 | self.frameCount = 0 158 | objc_sync_exit(self) 159 | 160 | let buf = CMSampleBufferGetImageBuffer(sampleBuffer) 161 | var texture : Unmanaged? 162 | let w = CVPixelBufferGetWidthOfPlane(buf!, 0); 163 | let h = CVPixelBufferGetHeightOfPlane(buf!, 0); 164 | print("w=\(w), h=\(h)") 165 | 166 | CVMetalTextureCacheCreateTextureFromImage(kCFAllocatorDefault, 167 | textureCache!.takeUnretainedValue(), 168 | buf!, nil, MTLPixelFormat.BGRA8Unorm, w, h, 0, &texture) 169 | if texture == nil { 170 | print("Failed to create a texture from image") 171 | exit(1) 172 | } 173 | input = CVMetalTextureGetTexture((texture?.takeUnretainedValue())!) 174 | texture!.release() 175 | 176 | // Run Metal shaders on the input and fill the output 177 | let startTime = NSDate() 178 | 179 | var ans = net!.forward(input!) 180 | let (idx, prob) = argMax(ans, labels: net!.labels) 181 | ans[idx] = 0 182 | let (idx2, prob2) = argMax(ans, labels: net!.labels) 183 | var label = "" 184 | if prob > 0.1 { 185 | label = "\(net!.labels[idx]): \(prob*100)%, \(net!.labels[idx2]): \(prob2*100)%" 186 | } 187 | let workTime = NSDate().timeIntervalSinceDate(startTime) 188 | print("net.forward is done within \(workTime) sec") 189 | objc_sync_enter(self) 190 | self.running = false 191 | objc_sync_exit(self) 192 | 193 | print("GoogLeNet: \(label)") 194 | dispatch_async(dispatch_get_main_queue(), { 195 | self.infoLabel.text = label 196 | }) 197 | } 198 | } 199 | -------------------------------------------------------------------------------- /MetalDetector/synset_words.txt: -------------------------------------------------------------------------------- 1 | tench, Tinca tinca 2 | -goldfish, Carassius auratus 3 | -great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias 4 | -tiger shark, Galeocerdo cuvieri 5 | -hammerhead, hammerhead shark 6 | -electric ray, crampfish, numbfish, torpedo 7 | -stingray 8 | cock 9 | hen 10 | ostrich 11 | brambling 12 | goldfinch 13 | house finch, linnet 14 | junco, snowbird 15 | -indigo bird 16 | robin, American robin 17 | bulbul 18 | jay 19 | magpie 20 | chickadee 21 | water ouzel, dipper 22 | kite 23 | bald eagle, American eagle 24 | vulture 25 | great gray owl 26 | European fire salamander 27 | common newt 28 | eft 29 | spotted salamander 30 | axolotl, mud puppy 31 | bullfrog, Rana catesbeiana 32 | tree frog, tree-frog 33 | tailed frog, bell toad 34 | loggerhead turtle 35 | leatherback turtle 36 | mud turtle 37 | terrapin 38 | box turtle 39 | banded gecko 40 | common iguana, iguana, Iguana iguana 41 | American chameleon 42 | whiptail, whiptail lizard 43 | agama 44 | frilled lizard 45 | alligator lizard 46 | Gila monster 47 | green lizard 48 | African chameleon 49 | Komodo dragon, Komodo lizard 50 | African crocodile 51 | American alligator 52 | triceratops 53 | thunder snake, worm snake, Carphophis amoenus 54 | ringneck snake, ring-necked snake, ring snake 55 | hognose snake, sand viper 56 | green snake, grass snake 57 | king snake, kingsnake 58 | garter snake, grass snake 59 | water snake 60 | vine snake 61 | night snake 62 | boa constrictor 63 | rock python, rock snake 64 | Indian cobra, Naja naja 65 | green mamba 66 | sea snake 67 | horned viper 68 | diamondback rattlesnake 69 | horned rattlesnake 70 | trilobite 71 | harvestman, daddy longlegs 72 | scorpion 73 | black and gold garden spider 74 | barn spider 75 | garden spider 76 | black widow 77 | tarantula 78 | wolf spider 79 | tick 80 | centipede 81 | black grouse 82 | ptarmigan 83 | ruffed grouse, partridge, Bonasa umbellus 84 | prairie chicken, prairie grouse, prairie fowl 85 | peacock 86 | quail 87 | partridge 88 | African gray 89 | macaw 90 | sulphur-crested cockatoo 91 | lorikeet 92 | coucal 93 | bee eater 94 | hornbill 95 | hummingbird 96 | jacamar 97 | toucan 98 | drake 99 | red-breasted merganser, Mergus serrator 100 | goose 101 | black swan 102 | tusker 103 | echidna, spiny anteater, anteater 104 | -platypus 105 | wallaby, brush kangaroo 106 | koala bear 107 | wombat 108 | -jellyfish 109 | -sea anemone, anemone 110 | brain coral 111 | -flatworm, platyhelminth 112 | -nematode, nematode worm, roundworm 113 | conch 114 | snail 115 | slug 116 | -sea slug, nudibranch 117 | -chiton, coat-of-mail shell 118 | chambered nautilus, pearly nautilus, nautilus 119 | Dungeness crab, Cancer magister 120 | rock crab, Cancer irroratus 121 | fiddler crab 122 | -king crab, Alaska crab 123 | -American lobster 124 | -langouste, rock lobster 125 | crayfish, crawfish, crawdad, crawdaddy 126 | hermit crab 127 | isopod 128 | white stork, Ciconia ciconia 129 | black stork, Ciconia nigra 130 | spoonbill 131 | flamingo 132 | little blue heron, Egretta caerulea 133 | American egret, great white heron, Egretta albus 134 | bittern 135 | crane 136 | limpkin, Aramus pictus 137 | -European gallinule 138 | American coot, marsh hen 139 | bustard 140 | ruddy turnstone, Arenaria interpres 141 | red-backed sandpiper, dunlin, Erolia alpina 142 | redshank, Tringa totanus 143 | dowitcher 144 | oystercatcher, oyster catcher 145 | pelican 146 | king penguin 147 | albatross, mollymawk 148 | -gray whale, devilfish 149 | -killer whale, sea wolf 150 | dugong, Dugong dugon 151 | sea lion 152 | Chihuahua 153 | Japanese spaniel 154 | Maltese dog, Maltese terrier, Maltese 155 | Pekinese, Pekingese, Peke 156 | Shih-Tzu 157 | Blenheim spaniel 158 | papillon 159 | toy terrier 160 | Rhodesian ridgeback 161 | Afghan hound, Afghan 162 | basset, basset hound 163 | beagle 164 | bloodhound, sleuthhound 165 | bluetick 166 | black-and-tan coonhound 167 | Walker foxhound 168 | English foxhound 169 | redbone 170 | borzoi, Russian wolfhound 171 | Irish wolfhound 172 | Italian greyhound 173 | whippet 174 | Ibizan hound, Ibizan Podenco 175 | Norwegian elkhound, elkhound 176 | otterhound, otter hound 177 | Saluki, gazelle hound 178 | Scottish deerhound, deerhound 179 | Weimaraner 180 | Staffordshire bullterrier 181 | pit bull terrier 182 | Bedlington terrier 183 | Border terrier 184 | Kerry blue terrier 185 | Irish terrier 186 | Norfolk terrier 187 | Norwich terrier 188 | Yorkshire terrier 189 | wire-haired fox terrier 190 | Lakeland terrier 191 | Sealyham terrier, Sealyham 192 | Airedale, Airedale terrier 193 | cairn, cairn terrier 194 | Australian terrier 195 | Dandie Dinmont terrier 196 | Boston bull, Boston terrier 197 | miniature schnauzer 198 | giant schnauzer 199 | standard schnauzer 200 | Scotch terrier 201 | Tibetan terrier, chrysanthemum dog 202 | silky terrier, Sydney silky 203 | soft-coated wheaten terrier 204 | West Highland white terrier 205 | Lhasa, Lhasa apso 206 | flat-coated retriever 207 | curly-coated retriever 208 | golden retriever 209 | Labrador retriever 210 | Chesapeake Bay retriever 211 | German short-haired pointer 212 | vizsla, Hungarian pointer 213 | English setter 214 | Irish setter, red setter 215 | Gordon setter 216 | Brittany spaniel 217 | clumber, clumber spaniel 218 | English springer spaniel 219 | Welsh springer spaniel 220 | English cocker spaniel 221 | Sussex spaniel 222 | Irish water spaniel 223 | kuvasz 224 | schipperke 225 | groenendael 226 | malinois 227 | briard 228 | kelpie 229 | komondor 230 | Old English sheepdog, bobtail 231 | Shetland sheep dog 232 | collie 233 | Border collie 234 | Bouvier des Flandres 235 | Rottweiler 236 | German shepherd dog 237 | Doberman, Doberman pinscher 238 | miniature pinscher 239 | Greater Swiss Mountain dog 240 | Bernese mountain dog 241 | Appenzeller 242 | EntleBucher 243 | boxer 244 | bull mastiff 245 | Tibetan mastiff 246 | French bulldog 247 | Great Dane 248 | Saint Bernard, St Bernard 249 | Eskimo dog, husky 250 | malamute, malemute, Alaskan malamute 251 | Siberian husky 252 | dalmatian, coach dog, carriage dog 253 | affenpinscher, monkey pinscher, monkey dog 254 | basenji 255 | pug, pug-dog 256 | Leonberg 257 | Newfoundland, Newfoundland dog 258 | Great Pyrenees 259 | Samoyed, Samoyede 260 | Pomeranian 261 | chow, chow chow 262 | keeshond 263 | Brabancon griffon 264 | Pembroke, Pembroke Welsh corgi 265 | Cardigan, Cardigan Welsh corgi 266 | toy poodle 267 | miniature poodle 268 | standard poodle 269 | Mexican hairless 270 | gray wolf 271 | white wolf, Arctic wolf 272 | red wolf, maned wolf 273 | coyote, prairie wolf, brush wolf 274 | dingo, warrigal, warragal, Canis dingo 275 | dhole, Cuon alpinus 276 | African hunting dog, hyena dog 277 | hyena, hyaena 278 | red fox, Vulpes vulpes 279 | kit fox, Vulpes macrotis 280 | Arctic fox, white fox 281 | gray fox 282 | tabby, tabby cat 283 | tiger cat 284 | Persian cat 285 | Siamese cat, Siamese 286 | Egyptian cat 287 | cougar, puma 288 | lynx, catamount 289 | leopard, Panthera pardus 290 | snow leopard, ounce 291 | jaguar, panther 292 | lion, king of beasts 293 | tiger, Panthera tigris 294 | cheetah, chetah, Acinonyx jubatus 295 | brown bear, bruin 296 | American black bear 297 | ice bear, polar bear 298 | sloth bear 299 | mongoose 300 | meerkat, mierkat 301 | tiger beetle 302 | ladybug 303 | ground beetle, carabid beetle 304 | long-horned beetle, longicorn, longicorn beetle 305 | leaf beetle, chrysomelid 306 | dung beetle 307 | rhinoceros beetle 308 | weevil 309 | fly 310 | bee 311 | ant, emmet, pismire 312 | grasshopper, hopper 313 | cricket 314 | walking stick, walkingstick, stick insect 315 | cockroach, roach 316 | mantis, mantid 317 | cicada, cicala 318 | leafhopper 319 | lacewing, lacewing fly 320 | dragonfly, darning needle, sewing needle 321 | damselfly 322 | admiral 323 | ringlet, ringlet butterfly 324 | monarch butterfly, milkweed butterfly 325 | cabbage butterfly 326 | sulphur butterfly, sulfur butterfly 327 | lycaenid, lycaenid butterfly 328 | starfish, sea star 329 | sea urchin 330 | sea cucumber, holothurian 331 | wood rabbit, cottontail rabbit 332 | hare 333 | Angora, Angora rabbit 334 | hamster 335 | porcupine, hedgehog 336 | eastern fox squirrel 337 | marmot 338 | beaver 339 | guinea pig 340 | sorrel 341 | zebra 342 | hog, pig 343 | wild boar, boar 344 | warthog 345 | hippopotamus 346 | ox 347 | -water buffalo 348 | bison 349 | ram, tup 350 | -bighorn sheep 351 | ibex, Capra ibex 352 | hartebeest 353 | impala, Aepyceros melampus 354 | gazelle 355 | Arabian camel, dromedary 356 | llama 357 | weasel 358 | mink 359 | polecat, fitch 360 | black-footed ferret, ferret 361 | otter 362 | skunk, polecat, wood pussy 363 | badger 364 | armadillo 365 | three-toed sloth, ai 366 | orangutan 367 | gorilla 368 | chimpanzee 369 | gibbon, Hylobates lar 370 | siamang 371 | guenon, guenon monkey 372 | patas, hussar monkey 373 | baboon 374 | macaque 375 | langur 376 | colobus, colobus monkey 377 | proboscis monkey, Nasalis larvatus 378 | marmoset 379 | capuchin, ringtail 380 | howler monkey, howler 381 | titi, titi monkey 382 | spider monkey, Ateles geoffroyi 383 | squirrel monkey, Saimiri sciureus 384 | Madagascar cat, ring-tailed lemur, Lemur catta 385 | indri, indris 386 | Indian elephant 387 | African elephant 388 | red panda 389 | Giant panda 390 | barracouta, snoek 391 | eel 392 | salmon, blue jack 393 | rock beauty, Holocanthus tricolor 394 | anemone fish 395 | sturgeon 396 | gar, garfish, garpike, billfish 397 | lionfish 398 | puffer, pufferfish, blowfish, globefish 399 | abacus 400 | abaya 401 | academic robe, judge's robe 402 | accordion, piano accordion 403 | acoustic guitar 404 | -aircraft carrier 405 | airliner 406 | airship, dirigible 407 | altar 408 | ambulance 409 | amphibious vehicle 410 | analog clock 411 | apiary, bee house 412 | apron 413 | trash can 414 | assault rifle, assault gun 415 | backpack 416 | bakery 417 | balance beam, beam 418 | balloon, apple 419 | ballpoint, ballpoint pen, ballpen, Biro 420 | Band Aid 421 | banjo 422 | bannister, handrail 423 | barbell 424 | barber chair 425 | barbershop 426 | barn 427 | barometer 428 | barrel, cask 429 | lawn cart, wheelbarrow 430 | baseball 431 | basketball 432 | bassinet 433 | bassoon 434 | bathing cap 435 | bath towel 436 | bathtub 437 | beach wagon, station wagon, wagon 438 | beacon, lighthouse 439 | beaker 440 | bearskin, busby, shako 441 | beer bottle 442 | beer glass 443 | bell cote, bell cot 444 | bib 445 | tandem bicycle 446 | bikini 447 | binder 448 | binoculars, opera glasses 449 | -birdhouse 450 | -boathouse 451 | bobsled, bobsleigh, bob 452 | bolo tie, bolo, bola tie, bola 453 | bonnet, poke bonnet 454 | bookcase 455 | bookshop, bookstore 456 | bottlecap 457 | bow 458 | bow tie 459 | brass, memorial tablet 460 | brassiere, bra, bandeau 461 | breakwater, groin 462 | breastplate, aegis, egis 463 | broom 464 | bucket, pail 465 | buckle 466 | bulletproof vest 467 | bullet train, bullet 468 | butcher shop 469 | cab, hack, taxi, taxicab 470 | caldron, cauldron 471 | candle, taper, wax light 472 | cannon 473 | canoe 474 | can opener, tin opener 475 | cardigan 476 | -car mirror 477 | carousel 478 | carpenter's kit, tool kit 479 | carton 480 | car wheel 481 | cash machine, ATM 482 | cassette 483 | cassette player 484 | castle 485 | catamaran 486 | CD player 487 | cello, violoncello 488 | cellphone 489 | chain 490 | chainlink fence 491 | ring mail, mail, chain armor 492 | chain saw, chainsaw 493 | chest 494 | chiffonier, commode 495 | chime, bell, gong 496 | china cabinet, china closet 497 | Christmas stocking 498 | church, church building 499 | cinema, movie theater 500 | cleaver, meat cleaver, chopper 501 | cliff dwelling 502 | cloak 503 | clog, geta, patten, sabot 504 | cocktail shaker 505 | coffee mug 506 | coffeepot 507 | coil, spiral, helix 508 | combination lock 509 | computer keyboard 510 | candy store 511 | -container ship 512 | convertible 513 | corkscrew, bottle screw 514 | cornet, horn, trumpet, trump 515 | cowboy boot 516 | cowboy hat, ten-gallon hat 517 | cradle 518 | crane 519 | crash helmet 520 | crate 521 | crib, cot 522 | Crock Pot 523 | croquet ball 524 | crutch 525 | cuirass 526 | dam, dike, dyke 527 | desk 528 | desktop computer 529 | dial phone 530 | diaper, nappy, napkin 531 | digital clock 532 | digital watch 533 | dining table, board 534 | dishrag, dishcloth 535 | dishwasher 536 | disk brake 537 | dock, docking facility 538 | -dog sled 539 | dome 540 | doormat, welcome mat 541 | -drilling platform 542 | drum, membranophone, tympan 543 | drumstick 544 | dumbbell 545 | Dutch oven 546 | -electric fan 547 | electric guitar 548 | electric locomotive 549 | entertainment center 550 | envelope 551 | espresso maker 552 | face powder 553 | feather boa, boa 554 | file, file cabinet 555 | fireboat 556 | fire engine, fire truck 557 | fire screen, fireguard 558 | flagpole, flagstaff 559 | flute, transverse flute 560 | folding chair 561 | football helmet 562 | forklift 563 | fountain 564 | fountain pen 565 | four-poster 566 | freight car 567 | French horn, horn 568 | frying pan 569 | fur coat 570 | garbage truck, dustcart 571 | respirator 572 | gas pump 573 | goblet 574 | go-kart 575 | golf ball 576 | golf cart 577 | gondola 578 | gong, tam-tam 579 | gown 580 | grand piano 581 | greenhouse, nursery 582 | grille, radiator grille 583 | grocery store, market 584 | guillotine 585 | hair slide 586 | hair spray 587 | half track 588 | hammer 589 | hamper 590 | hand blower, blow dryer 591 | hand-held computer 592 | handkerchief, hankie, hanky, hankey 593 | hard disk, fixed disk 594 | harmonica, mouth organ 595 | harp 596 | harvester, reaper 597 | hatchet 598 | holster 599 | home theater, home theatre 600 | honeycomb 601 | hook, claw 602 | hoopskirt, crinoline 603 | horizontal bar, high bar 604 | horse cart, horse-cart 605 | hourglass 606 | iPod 607 | iron, smoothing iron 608 | jack-o'-lantern 609 | jean, blue jean, denim 610 | jeep, landrover 611 | T-shirt 612 | jigsaw puzzle 613 | jinrikisha, ricksha 614 | joystick 615 | kimono 616 | knee pad 617 | knot 618 | lab coat 619 | ladle 620 | lamp shade 621 | laptop 622 | lawn mower, mower 623 | lens cap, lens cover 624 | letter opener, paper knife 625 | library 626 | lifeboat 627 | lighter, igniter 628 | limousine, limo 629 | liner, ocean liner 630 | lipstick, lip rouge 631 | Loafer 632 | lotion 633 | loudspeaker, speaker 634 | -loupe, jeweler's loupe 635 | lumbermill, sawmill 636 | magnetic compass 637 | mailbag, postbag 638 | mailbox, letter box 639 | maillot 640 | maillot, tank suit 641 | manhole cover 642 | maraca 643 | marimba, xylophone 644 | mask 645 | matchstick 646 | maypole 647 | maze, labyrinth 648 | measuring cup 649 | -medicine chest, medicine cabinet 650 | megalith, megalithic structure 651 | microphone, mike 652 | microwave oven 653 | military uniform 654 | milk can 655 | minibus 656 | miniskirt, mini 657 | minivan 658 | missile 659 | mitten 660 | mixing bowl 661 | mobile home 662 | Model T 663 | modem 664 | monastery 665 | monitor 666 | moped 667 | mortar 668 | mortarboard 669 | mosque 670 | mosquito net 671 | motor scooter 672 | mountain bike 673 | -mountain tent 674 | computer mouse 675 | mousetrap 676 | moving van 677 | muzzle 678 | nail 679 | neck brace 680 | necklace 681 | nipple 682 | notebook, notebook computer 683 | obelisk 684 | oboe, hautboy, hautbois 685 | ocarina, sweet potato 686 | odometer, hodometer, mileometer, milometer 687 | -oil filter 688 | organ, pipe organ 689 | -oscilloscope 690 | overskirt 691 | oxcart 692 | -oxygen mask 693 | packet 694 | paddle, boat paddle 695 | paddle wheel 696 | padlock 697 | paintbrush 698 | pajama, pyjama, pj's, jammies 699 | palace 700 | panpipe, pandean pipe, syrinx 701 | paper towel 702 | parachute, chute 703 | parallel bars, bars 704 | park bench 705 | parking meter 706 | passenger car, coach, carriage 707 | patio, terrace 708 | pay-phone, pay-station 709 | pedestal, plinth, footstall 710 | pencil box, pencil case 711 | pencil sharpener 712 | perfume, essence 713 | Petri dish 714 | photocopier 715 | -pick, plectrum 716 | pickelhaube 717 | picket fence, paling 718 | pickup, pickup truck 719 | pier 720 | piggy bank, penny bank 721 | pill bottle 722 | pillow 723 | ping-pong ball 724 | -pinwheel 725 | pirate, pirate ship 726 | pitcher, ewer 727 | plane, carpenter's plane 728 | planetarium 729 | plastic bag 730 | plate rack 731 | plow, plough 732 | plunger, plumber's helper 733 | Polaroid camera 734 | pole 735 | police van 736 | poncho 737 | pool table 738 | pop bottle, soda bottle 739 | pot, flowerpot 740 | potter's wheel 741 | power drill 742 | prayer rug, prayer mat 743 | printer 744 | prison, prison house 745 | projectile, missile 746 | projector 747 | puck, hockey puck 748 | punching bag 749 | purse 750 | quill, quill pen 751 | quilt, comforter, comfort, puff 752 | racer, race car, racing car 753 | racket, racquet 754 | radiator 755 | radio, wireless 756 | radio telescope, radio reflector 757 | rain barrel 758 | recreational vehicle, RV, R.V. 759 | reel 760 | reflex camera 761 | refrigerator, icebox 762 | remote control 763 | restaurant 764 | revolver, six-gun, six-shooter 765 | rifle 766 | rocking chair, rocker 767 | rotisserie 768 | rubber eraser 769 | rugby ball 770 | rule, ruler 771 | running shoe 772 | safe 773 | safety pin 774 | salt shaker 775 | sandal 776 | sarong 777 | sax, saxophone 778 | scabbard 779 | scale, weighing machine 780 | school bus 781 | schooner 782 | scoreboard 783 | screen, CRT screen 784 | screw 785 | screwdriver 786 | -seat belt, seatbelt 787 | sewing machine 788 | shield, buckler 789 | shoe shop 790 | shoji 791 | -shopping basket 792 | shopping cart 793 | shovel 794 | shower cap 795 | -shower curtain 796 | ski 797 | ski mask 798 | sleeping bag 799 | slide rule, slipstick 800 | sliding door 801 | slot, one-armed bandit 802 | snorkel 803 | snowmobile 804 | snowplow, snowplough 805 | soap dispenser 806 | soccer ball 807 | sock 808 | -solar dish, solar collector, solar furnace 809 | sombrero 810 | soup bowl 811 | space bar 812 | space heater 813 | space shuttle 814 | spatula 815 | speedboat 816 | spider web, spider's web 817 | spindle 818 | sports car, sport car 819 | -spotlight, spot 820 | stage 821 | steam locomotive 822 | -steel arch bridge 823 | steel drum 824 | -stethoscope 825 | stole 826 | stone wall 827 | stopwatch, stop watch 828 | stove 829 | strainer 830 | streetcar, tram 831 | stretcher 832 | studio couch, day bed 833 | stupa, tope 834 | submarine, pigboat, sub, U-boat 835 | suit, suit of clothes 836 | sundial 837 | sunglass 838 | sunglasses 839 | sunscreen 840 | suspension bridge 841 | swab, swob, mop 842 | sweatshirt 843 | swimming trunks, bathing trunks 844 | swing 845 | switch, electric switch 846 | syringe 847 | table lamp 848 | army tank 849 | tape player 850 | teapot 851 | teddy, teddy bear 852 | television system 853 | tennis ball 854 | thatch, thatched roof 855 | theater curtain 856 | -thimble 857 | thrasher 858 | throne 859 | tile roof 860 | toaster 861 | tobacco shop 862 | -toilet seat 863 | torch 864 | totem pole 865 | tow truck, tow car 866 | toyshop 867 | tractor 868 | trailer truck, tractor trailer 869 | tray 870 | trench coat 871 | tricycle, trike, velocipede 872 | trimaran 873 | tripod 874 | triumphal arch 875 | trolleybus 876 | trombone 877 | tub, vat 878 | turnstile 879 | typewriter keyboard 880 | umbrella 881 | unicycle, monocycle 882 | upright, upright piano 883 | vacuum, vacuum cleaner 884 | vase 885 | vault 886 | -velvet 887 | vending machine 888 | vestment 889 | viaduct 890 | violin, fiddle 891 | volleyball 892 | waffle iron 893 | wall clock 894 | wallet, billfold, notecase, pocketbook 895 | wardrobe, closet, press 896 | warplane, military plane 897 | washbasin, handbasin, washbowl 898 | washer, washing machine 899 | water bottle 900 | water jug 901 | water tower 902 | whiskey jug 903 | whistle 904 | wig 905 | window screen 906 | window shade 907 | -Windsor tie 908 | wine bottle 909 | -wing 910 | wok 911 | wooden spoon 912 | wool, woolen, woollen 913 | worm fence, snake fence 914 | wreck 915 | yawl 916 | yurt 917 | website 918 | comic book 919 | crossword 920 | street sign 921 | traffic light 922 | book jacket, dust cover 923 | menu 924 | plate 925 | guacamole 926 | consomme 927 | hot pot 928 | trifle 929 | icecream 930 | lollipop, popsicle 931 | French loaf 932 | bagel, beigel 933 | pretzel 934 | cheeseburger 935 | hot dog 936 | mashed potato 937 | head cabbage 938 | broccoli 939 | cauliflower 940 | zucchini, courgette 941 | spaghetti squash 942 | acorn squash 943 | butternut squash 944 | cucumber, cuke 945 | artichoke 946 | bell pepper 947 | cardoon 948 | mushroom 949 | Granny Smith 950 | strawberry 951 | orange 952 | lemon 953 | fig 954 | pineapple, ananas 955 | banana 956 | jackfruit, jak, jack 957 | custard apple 958 | pomegranate 959 | hay 960 | carbonara 961 | chocolate sauce 962 | dough 963 | meat loaf, meatloaf 964 | pizza, pizza pie 965 | potpie 966 | burrito 967 | red wine 968 | espresso 969 | cup 970 | eggnog 971 | alp 972 | bubble 973 | cliff, drop, drop-off 974 | grass, coral reef 975 | geyser 976 | lakeside, lakeshore 977 | promontory, headland, head, foreland 978 | sandbar, sand bar 979 | seashore, coast, seacoast, sea-coast 980 | valley, vale 981 | volcano 982 | ballplayer, baseball player 983 | groom, bridegroom 984 | scuba diver 985 | rapeseed 986 | daisy 987 | yellow lady-slipper 988 | corn 989 | acorn 990 | hip, rose hip, rosehip 991 | buckeye, horse chestnut, conker 992 | -coral fungus 993 | agaric 994 | gyromitra 995 | stinkhorn, carrion fungus 996 | earthstar 997 | hen-of-the-woods 998 | bolete 999 | ear, spike, capitulum 1000 | toilet paper 1001 | -------------------------------------------------------------------------------- /MetalDetectorTests/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | en 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | BNDL 17 | CFBundleShortVersionString 18 | 1.0 19 | CFBundleSignature 20 | ???? 21 | CFBundleVersion 22 | 1 23 | 24 | 25 | -------------------------------------------------------------------------------- /MetalDetectorTests/MetalDetectorTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // MetalDetectorTests.swift 3 | // MetalDetectorTests 4 | // 5 | // Created by Ivan Krasin on 10/1/15. 6 | // Copyright © 2015 Ivan Krasin. All rights reserved. 7 | // 8 | 9 | import XCTest 10 | import Metal 11 | import MetalKit 12 | import MetalPerformanceShaders 13 | import UIKit 14 | import MetalDetector 15 | 16 | 17 | class MetalDetectorTests: XCTestCase { 18 | var engine: Engine? 19 | var net: Net? 20 | var cat: MTLTexture? 21 | 22 | override func setUp() { 23 | engine = Engine() 24 | net = Net(engine: engine!, config: GoogLeNetConfig(), 25 | threadsPerThreadgroup: GoogLeNetProfile.GetThreadsPerThreadgroup()) 26 | 27 | cat = engine!.GetResourceAsMetalTexture("cat.png") 28 | XCTAssert(cat != nil) 29 | super.setUp() 30 | } 31 | 32 | override func tearDown() { 33 | // Put teardown code here. This method is called after the invocation of each test method in the class. 34 | super.tearDown() 35 | } 36 | 37 | func argMax(arr : [Float]) -> (Int, Float) { 38 | var maxv : Float = arr[0] 39 | var idx : Int = 0 40 | for i in 0...arr.count-1 { 41 | if arr[i] > maxv { 42 | maxv = arr[i] 43 | idx = i 44 | } 45 | } 46 | return (idx, maxv) 47 | } 48 | 49 | func checkMetrics(name: String, L1: Float, err1: Float, L2: Float, err2: Float) { 50 | let realL1 = engine!.L1(net!.blobs[name]!) 51 | let realL2 = engine!.L2(net!.blobs[name]!) 52 | // let realErr1 = abs(L1-realL1)/L1 53 | // let realErr2 = abs(L2-realL2)/L2 54 | // print("checkMetrics(\"\(name)\", L1: \(L1), err1: \(realErr1*1.1), L2: \(L2), err2: \(realErr2*1.1))") 55 | XCTAssertEqualWithAccuracy(realL1, L1, accuracy: L1 * err1) 56 | XCTAssertEqualWithAccuracy(realL2, L2, accuracy: L2 * err2) 57 | } 58 | 59 | func testGoogleNetOnCat() { 60 | var ans = net!.forward(cat!) 61 | // HACK: find the answer 62 | for i in 1...5 { 63 | let (idx, p) = argMax(ans) 64 | ans[idx] = 0 65 | print("\(i). \(net!.labels[idx]) - \(p)") 66 | } 67 | checkMetrics("data", L1: 8.81542e+06, err1: 0.000380708, L2: 6.46363e+08, err2: 0.000457561) 68 | checkMetrics("conv1_7x7_s2", L1: 2.6894e+07, err1: 0.00239215, L2: 7.38918e+09, err2: 0.00470541) 69 | checkMetrics("pool1_3x3_s2", L1: 1.57066e+07, err1: 0.00256157, L2: 5.23878e+09, err2: 0.00493635) 70 | checkMetrics("pool1_norm1", L1: 6.76352e+06, err1: 0.00113765, L2: 4.70019e+08, err2: 0.00197756) 71 | checkMetrics("conv2_3x3_reduce", L1: 4.60704e+06, err1: 0.0030698, L2: 3.65177e+08, err2: 0.00591738) 72 | checkMetrics("conv2_3x3", L1: 6.19039e+06, err1: 0.00479172, L2: 6.51582e+08, err2: 0.00959848) 73 | checkMetrics("conv2_norm2", L1: 4.97939e+06, err1: 0.00369263, L2: 3.50694e+08, err2: 0.00659015) 74 | checkMetrics("pool2_3x3_s2", L1: 3.98302e+06, err1: 0.00350697, L2: 3.00735e+08, err2: 0.00623133) 75 | checkMetrics("inception_3a_1x1", L1: 1.3767e+06, err1: 0.00471357, L2: 1.33713e+08, err2: 0.00898961) 76 | checkMetrics("inception_3a_3x3_reduce", L1: 1.7166e+06, err1: 0.00479199, L2: 1.41277e+08, err2: 0.00907613) 77 | checkMetrics("inception_3a_3x3", L1: 2.32059e+06, err1: 0.00638276, L2: 2.87686e+08, err2: 0.0125438) 78 | checkMetrics("inception_3a_5x5_reduce", L1: 418858.0, err1: 0.00444778, L2: 3.75757e+07, err2: 0.00869691) 79 | checkMetrics("inception_3a_5x5", L1: 697954.0, err1: 0.0063399, L2: 8.09328e+07, err2: 0.0124509) 80 | checkMetrics("inception_3a_pool", L1: 8.83716e+06, err1: 0.00327816, L2: 7.66142e+08, err2: 0.00578735) 81 | checkMetrics("inception_3a_pool_proj", L1: 460720.0, err1: 0.00190647, L2: 3.96342e+07, err2: 0.00575425) 82 | checkMetrics("inception_3a_output", L1: 4.85596e+06, err1: 0.00547751, L2: 5.41964e+08, err2: 0.0111515) 83 | checkMetrics("inception_3b_output", L1: 2.07071e+06, err1: 0.00836278, L2: 1.73487e+08, err2: 0.0161684) 84 | checkMetrics("inception_4a_output", L1: 803036.0, err1: 0.0111183, L2: 7.60488e+07, err2: 0.0219395) 85 | checkMetrics("inception_4b_output", L1: 1.17206e+06, err1: 0.0128661, L2: 7.70931e+07, err2: 0.0259412) 86 | checkMetrics("inception_4c_output", L1: 907352.0, err1: 0.0146419, L2: 6.21977e+07, err2: 0.0296205) 87 | checkMetrics("inception_4d_output", L1: 464166.0, err1: 0.0144568, L2: 2.76898e+07, err2: 0.0308716) 88 | checkMetrics("pool5_7x7_s1", L1: 584.41, err1: 0.00304463, L2: 1351.04, err2: 0.0102279) 89 | 90 | self.measureBlock { 91 | for _ in 1...1 { 92 | self.net!.forward(self.cat!) 93 | } 94 | } 95 | } 96 | 97 | func testLargeConvolution() { 98 | for layer in net!.layers { 99 | if layer.shards == 0 || layer.bottoms.count > 1 { 100 | continue 101 | } 102 | net!.ProfileLayer(layer.name) 103 | } 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MetalDetector 2 | GoogLeNet on iOS demo. 3 | 4 | **Note:** this demo was implemented before TensorFlow, PyTorch or Core ML existed. It's preserved for historical purposes only. 5 | 6 | This is a basic demo of GoogLeNet from Caffe Zoo working on iOS. 7 | It takes the input from the camera and prints the top prediction. 8 | The speed is 1 FPS on iPhone 6S, and 3 seconds per frame on iPhone 6. 9 | 10 | Two known bugs: 11 | 12 | * You need to hold the phone so that the round button is on the right. 13 | Otherwise, the network will get a rotated image, and the classification will likely miss. 14 | 15 | * There's a rounding bug right now (likely, in Convolution layers). While the network gives answers 16 | which are within ~3% of Caffe output, it's slightly worse than a real GoogLeNet taken from the Caffe Zoo. 17 | 18 | Also, please, be aware that ImageNet classes are weird, not so many real world things could be detected (but it knows about 300 breeds of dogs, whee!) 19 | 20 | The code was generated with [Goffe](https://bitbucket.org/krasin/goffe), Caffe-to-{Swift+Metal} generator. 21 | --------------------------------------------------------------------------------