├── .gitignore ├── ActionKitDemo ├── ActionKitDemo.xcodeproj │ ├── project.pbxproj │ └── project.xcworkspace │ │ ├── contents.xcworkspacedata │ │ └── xcshareddata │ │ ├── IDEWorkspaceChecks.plist │ │ └── swiftpm │ │ └── Package.resolved └── ActionKitDemo │ ├── AppDelegate.swift │ ├── Assets.xcassets │ ├── AccentColor.colorset │ │ └── Contents.json │ ├── AppIcon.appiconset │ │ └── Contents.json │ └── Contents.json │ ├── Base.lproj │ └── LaunchScreen.storyboard │ ├── ContentView.swift │ ├── Info.plist │ ├── Preview Content │ └── Preview Assets.xcassets │ │ └── Contents.json │ ├── SceneDelegate.swift │ └── VideoPicker.swift ├── LICENSE ├── Package.swift ├── README.md ├── Sources └── VisualActionKit │ ├── Classifier.swift │ ├── CoreMLHelpers │ ├── Array+Extensions.swift │ ├── CGImage+CVPixelBuffer.swift │ ├── CGImage+RawBytes.swift │ ├── CGImagePropertyOrientation.swift │ ├── CVPixelBuffer+Helpers.swift │ ├── CoreML+Combine.swift │ ├── CoreMLHelpers.h │ ├── MLModel+Images.swift │ ├── MLMultiArray+Helpers.swift │ ├── MLMultiArray+Image.swift │ ├── Math.swift │ ├── MultiArray.swift │ ├── NonMaxSuppression.swift │ ├── Predictions.swift │ ├── UIImage+CVPixelBuffer.swift │ ├── UIImage+Extensions.swift │ └── UIImage+RawBytes.swift │ ├── Kinetics.mlmodel │ └── Kinetics.swift └── Tests ├── LinuxMain.swift └── VisualActionKitTests ├── Test Videos ├── cricketShot.mp4 ├── making tea.mp4 ├── reading book.mp4 ├── rock scissors paper.mp4 ├── stretching arm.mp4 └── writing.mp4 ├── VisualActionKitTests.swift └── XCTestManifests.swift /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | /.build 3 | /Packages 4 | /*.xcodeproj 5 | xcuserdata/ 6 | .swiftpm 7 | !*.xcworkspace/contents.xcworkspacedata 8 | -------------------------------------------------------------------------------- /ActionKitDemo/ActionKitDemo.xcodeproj/project.pbxproj: -------------------------------------------------------------------------------- 1 | // !$*UTF8*$! 2 | { 3 | archiveVersion = 1; 4 | classes = { 5 | }; 6 | objectVersion = 52; 7 | objects = { 8 | 9 | /* Begin PBXBuildFile section */ 10 | C32542DA24DA787D000F2475 /* VisualActionKit in Frameworks */ = {isa = PBXBuildFile; productRef = C32542D924DA787D000F2475 /* VisualActionKit */; }; 11 | C34B91A424DA06E400E38993 /* VideoPicker.swift in Sources */ = {isa = PBXBuildFile; fileRef = C34B91A324DA06E400E38993 /* VideoPicker.swift */; }; 12 | C3E8767224D612DC0013AE9A /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = C3E8767124D612DC0013AE9A /* AppDelegate.swift */; }; 13 | C3E8767424D612DC0013AE9A /* SceneDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = C3E8767324D612DC0013AE9A /* SceneDelegate.swift */; }; 14 | C3E8767624D612DC0013AE9A /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = C3E8767524D612DC0013AE9A /* ContentView.swift */; }; 15 | C3E8767824D612DD0013AE9A /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = C3E8767724D612DD0013AE9A /* Assets.xcassets */; }; 16 | C3E8767B24D612DD0013AE9A /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = C3E8767A24D612DD0013AE9A /* Preview Assets.xcassets */; }; 17 | C3E8767E24D612DD0013AE9A /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = C3E8767C24D612DD0013AE9A /* LaunchScreen.storyboard */; }; 18 | /* End PBXBuildFile section */ 19 | 20 | /* Begin PBXFileReference section */ 21 | C34B91A324DA06E400E38993 /* VideoPicker.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VideoPicker.swift; sourceTree = ""; }; 22 | C3E8766E24D612DC0013AE9A /* ActionKitDemo.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = ActionKitDemo.app; sourceTree = BUILT_PRODUCTS_DIR; }; 23 | C3E8767124D612DC0013AE9A /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = ""; }; 24 | C3E8767324D612DC0013AE9A /* SceneDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SceneDelegate.swift; sourceTree = ""; }; 25 | C3E8767524D612DC0013AE9A /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = ""; }; 26 | C3E8767724D612DD0013AE9A /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; 27 | C3E8767A24D612DD0013AE9A /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = ""; }; 28 | C3E8767D24D612DD0013AE9A /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = ""; }; 29 | C3E8767F24D612DD0013AE9A /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; 30 | /* End PBXFileReference section */ 31 | 32 | /* Begin PBXFrameworksBuildPhase section */ 33 | C3E8766B24D612DC0013AE9A /* Frameworks */ = { 34 | isa = PBXFrameworksBuildPhase; 35 | buildActionMask = 2147483647; 36 | files = ( 37 | C32542DA24DA787D000F2475 /* VisualActionKit in Frameworks */, 38 | ); 39 | runOnlyForDeploymentPostprocessing = 0; 40 | }; 41 | /* End PBXFrameworksBuildPhase section */ 42 | 43 | /* Begin PBXGroup section */ 44 | C3E8766524D612DC0013AE9A = { 45 | isa = PBXGroup; 46 | children = ( 47 | C3E8767024D612DC0013AE9A /* ActionKitDemo */, 48 | C3E8766F24D612DC0013AE9A /* Products */, 49 | ); 50 | sourceTree = ""; 51 | }; 52 | C3E8766F24D612DC0013AE9A /* Products */ = { 53 | isa = PBXGroup; 54 | children = ( 55 | C3E8766E24D612DC0013AE9A /* ActionKitDemo.app */, 56 | ); 57 | name = Products; 58 | sourceTree = ""; 59 | }; 60 | C3E8767024D612DC0013AE9A /* ActionKitDemo */ = { 61 | isa = PBXGroup; 62 | children = ( 63 | C3E8767124D612DC0013AE9A /* AppDelegate.swift */, 64 | C3E8767324D612DC0013AE9A /* SceneDelegate.swift */, 65 | C3E8767524D612DC0013AE9A /* ContentView.swift */, 66 | C34B91A324DA06E400E38993 /* VideoPicker.swift */, 67 | C3E8767724D612DD0013AE9A /* Assets.xcassets */, 68 | C3E8767C24D612DD0013AE9A /* LaunchScreen.storyboard */, 69 | C3E8767F24D612DD0013AE9A /* Info.plist */, 70 | C3E8767924D612DD0013AE9A /* Preview Content */, 71 | ); 72 | path = ActionKitDemo; 73 | sourceTree = ""; 74 | }; 75 | C3E8767924D612DD0013AE9A /* Preview Content */ = { 76 | isa = PBXGroup; 77 | children = ( 78 | C3E8767A24D612DD0013AE9A /* Preview Assets.xcassets */, 79 | ); 80 | path = "Preview Content"; 81 | sourceTree = ""; 82 | }; 83 | /* End PBXGroup section */ 84 | 85 | /* Begin PBXNativeTarget section */ 86 | C3E8766D24D612DC0013AE9A /* ActionKitDemo */ = { 87 | isa = PBXNativeTarget; 88 | buildConfigurationList = C3E8768224D612DD0013AE9A /* Build configuration list for PBXNativeTarget "ActionKitDemo" */; 89 | buildPhases = ( 90 | C3E8766A24D612DC0013AE9A /* Sources */, 91 | C3E8766B24D612DC0013AE9A /* Frameworks */, 92 | C3E8766C24D612DC0013AE9A /* Resources */, 93 | ); 94 | buildRules = ( 95 | ); 96 | dependencies = ( 97 | ); 98 | name = ActionKitDemo; 99 | packageProductDependencies = ( 100 | C32542D924DA787D000F2475 /* VisualActionKit */, 101 | ); 102 | productName = ActionKitDemo; 103 | productReference = C3E8766E24D612DC0013AE9A /* ActionKitDemo.app */; 104 | productType = "com.apple.product-type.application"; 105 | }; 106 | /* End PBXNativeTarget section */ 107 | 108 | /* Begin PBXProject section */ 109 | C3E8766624D612DC0013AE9A /* Project object */ = { 110 | isa = PBXProject; 111 | attributes = { 112 | LastSwiftUpdateCheck = 1200; 113 | LastUpgradeCheck = 1200; 114 | TargetAttributes = { 115 | C3E8766D24D612DC0013AE9A = { 116 | CreatedOnToolsVersion = 12.0; 117 | }; 118 | }; 119 | }; 120 | buildConfigurationList = C3E8766924D612DC0013AE9A /* Build configuration list for PBXProject "ActionKitDemo" */; 121 | compatibilityVersion = "Xcode 9.3"; 122 | developmentRegion = en; 123 | hasScannedForEncodings = 0; 124 | knownRegions = ( 125 | en, 126 | Base, 127 | ); 128 | mainGroup = C3E8766524D612DC0013AE9A; 129 | packageReferences = ( 130 | C32542D824DA787D000F2475 /* XCRemoteSwiftPackageReference "VisualActionKit" */, 131 | ); 132 | productRefGroup = C3E8766F24D612DC0013AE9A /* Products */; 133 | projectDirPath = ""; 134 | projectRoot = ""; 135 | targets = ( 136 | C3E8766D24D612DC0013AE9A /* ActionKitDemo */, 137 | ); 138 | }; 139 | /* End PBXProject section */ 140 | 141 | /* Begin PBXResourcesBuildPhase section */ 142 | C3E8766C24D612DC0013AE9A /* Resources */ = { 143 | isa = PBXResourcesBuildPhase; 144 | buildActionMask = 2147483647; 145 | files = ( 146 | C3E8767E24D612DD0013AE9A /* LaunchScreen.storyboard in Resources */, 147 | C3E8767B24D612DD0013AE9A /* Preview Assets.xcassets in Resources */, 148 | C3E8767824D612DD0013AE9A /* Assets.xcassets in Resources */, 149 | ); 150 | runOnlyForDeploymentPostprocessing = 0; 151 | }; 152 | /* End PBXResourcesBuildPhase section */ 153 | 154 | /* Begin PBXSourcesBuildPhase section */ 155 | C3E8766A24D612DC0013AE9A /* Sources */ = { 156 | isa = PBXSourcesBuildPhase; 157 | buildActionMask = 2147483647; 158 | files = ( 159 | C3E8767224D612DC0013AE9A /* AppDelegate.swift in Sources */, 160 | C34B91A424DA06E400E38993 /* VideoPicker.swift in Sources */, 161 | C3E8767424D612DC0013AE9A /* SceneDelegate.swift in Sources */, 162 | C3E8767624D612DC0013AE9A /* ContentView.swift in Sources */, 163 | ); 164 | runOnlyForDeploymentPostprocessing = 0; 165 | }; 166 | /* End PBXSourcesBuildPhase section */ 167 | 168 | /* Begin PBXVariantGroup section */ 169 | C3E8767C24D612DD0013AE9A /* LaunchScreen.storyboard */ = { 170 | isa = PBXVariantGroup; 171 | children = ( 172 | C3E8767D24D612DD0013AE9A /* Base */, 173 | ); 174 | name = LaunchScreen.storyboard; 175 | sourceTree = ""; 176 | }; 177 | /* End PBXVariantGroup section */ 178 | 179 | /* Begin XCBuildConfiguration section */ 180 | C3E8768024D612DD0013AE9A /* Debug */ = { 181 | isa = XCBuildConfiguration; 182 | buildSettings = { 183 | ALWAYS_SEARCH_USER_PATHS = NO; 184 | CLANG_ANALYZER_NONNULL = YES; 185 | CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; 186 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; 187 | CLANG_CXX_LIBRARY = "libc++"; 188 | CLANG_ENABLE_MODULES = YES; 189 | CLANG_ENABLE_OBJC_ARC = YES; 190 | CLANG_ENABLE_OBJC_WEAK = YES; 191 | CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; 192 | CLANG_WARN_BOOL_CONVERSION = YES; 193 | CLANG_WARN_COMMA = YES; 194 | CLANG_WARN_CONSTANT_CONVERSION = YES; 195 | CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; 196 | CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; 197 | CLANG_WARN_DOCUMENTATION_COMMENTS = YES; 198 | CLANG_WARN_EMPTY_BODY = YES; 199 | CLANG_WARN_ENUM_CONVERSION = YES; 200 | CLANG_WARN_INFINITE_RECURSION = YES; 201 | CLANG_WARN_INT_CONVERSION = YES; 202 | CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; 203 | CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; 204 | CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; 205 | CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; 206 | CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; 207 | CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; 208 | CLANG_WARN_STRICT_PROTOTYPES = YES; 209 | CLANG_WARN_SUSPICIOUS_MOVE = YES; 210 | CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; 211 | CLANG_WARN_UNREACHABLE_CODE = YES; 212 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 213 | COPY_PHASE_STRIP = NO; 214 | DEBUG_INFORMATION_FORMAT = dwarf; 215 | ENABLE_STRICT_OBJC_MSGSEND = YES; 216 | ENABLE_TESTABILITY = YES; 217 | GCC_C_LANGUAGE_STANDARD = gnu11; 218 | GCC_DYNAMIC_NO_PIC = NO; 219 | GCC_NO_COMMON_BLOCKS = YES; 220 | GCC_OPTIMIZATION_LEVEL = 0; 221 | GCC_PREPROCESSOR_DEFINITIONS = ( 222 | "DEBUG=1", 223 | "$(inherited)", 224 | ); 225 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES; 226 | GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; 227 | GCC_WARN_UNDECLARED_SELECTOR = YES; 228 | GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; 229 | GCC_WARN_UNUSED_FUNCTION = YES; 230 | GCC_WARN_UNUSED_VARIABLE = YES; 231 | IPHONEOS_DEPLOYMENT_TARGET = 14.0; 232 | MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; 233 | MTL_FAST_MATH = YES; 234 | ONLY_ACTIVE_ARCH = YES; 235 | SDKROOT = iphoneos; 236 | SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG; 237 | SWIFT_OPTIMIZATION_LEVEL = "-Onone"; 238 | }; 239 | name = Debug; 240 | }; 241 | C3E8768124D612DD0013AE9A /* Release */ = { 242 | isa = XCBuildConfiguration; 243 | buildSettings = { 244 | ALWAYS_SEARCH_USER_PATHS = NO; 245 | CLANG_ANALYZER_NONNULL = YES; 246 | CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; 247 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; 248 | CLANG_CXX_LIBRARY = "libc++"; 249 | CLANG_ENABLE_MODULES = YES; 250 | CLANG_ENABLE_OBJC_ARC = YES; 251 | CLANG_ENABLE_OBJC_WEAK = YES; 252 | CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; 253 | CLANG_WARN_BOOL_CONVERSION = YES; 254 | CLANG_WARN_COMMA = YES; 255 | CLANG_WARN_CONSTANT_CONVERSION = YES; 256 | CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; 257 | CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; 258 | CLANG_WARN_DOCUMENTATION_COMMENTS = YES; 259 | CLANG_WARN_EMPTY_BODY = YES; 260 | CLANG_WARN_ENUM_CONVERSION = YES; 261 | CLANG_WARN_INFINITE_RECURSION = YES; 262 | CLANG_WARN_INT_CONVERSION = YES; 263 | CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; 264 | CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; 265 | CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; 266 | CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; 267 | CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; 268 | CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; 269 | CLANG_WARN_STRICT_PROTOTYPES = YES; 270 | CLANG_WARN_SUSPICIOUS_MOVE = YES; 271 | CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; 272 | CLANG_WARN_UNREACHABLE_CODE = YES; 273 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 274 | COPY_PHASE_STRIP = NO; 275 | DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; 276 | ENABLE_NS_ASSERTIONS = NO; 277 | ENABLE_STRICT_OBJC_MSGSEND = YES; 278 | GCC_C_LANGUAGE_STANDARD = gnu11; 279 | GCC_NO_COMMON_BLOCKS = YES; 280 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES; 281 | GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; 282 | GCC_WARN_UNDECLARED_SELECTOR = YES; 283 | GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; 284 | GCC_WARN_UNUSED_FUNCTION = YES; 285 | GCC_WARN_UNUSED_VARIABLE = YES; 286 | IPHONEOS_DEPLOYMENT_TARGET = 14.0; 287 | MTL_ENABLE_DEBUG_INFO = NO; 288 | MTL_FAST_MATH = YES; 289 | SDKROOT = iphoneos; 290 | SWIFT_COMPILATION_MODE = wholemodule; 291 | SWIFT_OPTIMIZATION_LEVEL = "-O"; 292 | VALIDATE_PRODUCT = YES; 293 | }; 294 | name = Release; 295 | }; 296 | C3E8768324D612DD0013AE9A /* Debug */ = { 297 | isa = XCBuildConfiguration; 298 | buildSettings = { 299 | ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; 300 | ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; 301 | CODE_SIGN_STYLE = Automatic; 302 | DEVELOPMENT_ASSET_PATHS = "\"ActionKitDemo/Preview Content\""; 303 | DEVELOPMENT_TEAM = 79X9YF69H4; 304 | ENABLE_PREVIEWS = YES; 305 | INFOPLIST_FILE = ActionKitDemo/Info.plist; 306 | LD_RUNPATH_SEARCH_PATHS = ( 307 | "$(inherited)", 308 | "@executable_path/Frameworks", 309 | ); 310 | PRODUCT_BUNDLE_IDENTIFIER = com.reikam.ActionKitDemo; 311 | PRODUCT_NAME = "$(TARGET_NAME)"; 312 | SWIFT_VERSION = 5.0; 313 | TARGETED_DEVICE_FAMILY = "1,2"; 314 | }; 315 | name = Debug; 316 | }; 317 | C3E8768424D612DD0013AE9A /* Release */ = { 318 | isa = XCBuildConfiguration; 319 | buildSettings = { 320 | ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; 321 | ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; 322 | CODE_SIGN_STYLE = Automatic; 323 | DEVELOPMENT_ASSET_PATHS = "\"ActionKitDemo/Preview Content\""; 324 | DEVELOPMENT_TEAM = 79X9YF69H4; 325 | ENABLE_PREVIEWS = YES; 326 | INFOPLIST_FILE = ActionKitDemo/Info.plist; 327 | LD_RUNPATH_SEARCH_PATHS = ( 328 | "$(inherited)", 329 | "@executable_path/Frameworks", 330 | ); 331 | PRODUCT_BUNDLE_IDENTIFIER = com.reikam.ActionKitDemo; 332 | PRODUCT_NAME = "$(TARGET_NAME)"; 333 | SWIFT_VERSION = 5.0; 334 | TARGETED_DEVICE_FAMILY = "1,2"; 335 | }; 336 | name = Release; 337 | }; 338 | /* End XCBuildConfiguration section */ 339 | 340 | /* Begin XCConfigurationList section */ 341 | C3E8766924D612DC0013AE9A /* Build configuration list for PBXProject "ActionKitDemo" */ = { 342 | isa = XCConfigurationList; 343 | buildConfigurations = ( 344 | C3E8768024D612DD0013AE9A /* Debug */, 345 | C3E8768124D612DD0013AE9A /* Release */, 346 | ); 347 | defaultConfigurationIsVisible = 0; 348 | defaultConfigurationName = Release; 349 | }; 350 | C3E8768224D612DD0013AE9A /* Build configuration list for PBXNativeTarget "ActionKitDemo" */ = { 351 | isa = XCConfigurationList; 352 | buildConfigurations = ( 353 | C3E8768324D612DD0013AE9A /* Debug */, 354 | C3E8768424D612DD0013AE9A /* Release */, 355 | ); 356 | defaultConfigurationIsVisible = 0; 357 | defaultConfigurationName = Release; 358 | }; 359 | /* End XCConfigurationList section */ 360 | 361 | /* Begin XCRemoteSwiftPackageReference section */ 362 | C32542D824DA787D000F2475 /* XCRemoteSwiftPackageReference "VisualActionKit" */ = { 363 | isa = XCRemoteSwiftPackageReference; 364 | repositoryURL = "https://github.com/lukereichold/VisualActionKit.git"; 365 | requirement = { 366 | kind = upToNextMajorVersion; 367 | minimumVersion = 0.1.1; 368 | }; 369 | }; 370 | /* End XCRemoteSwiftPackageReference section */ 371 | 372 | /* Begin XCSwiftPackageProductDependency section */ 373 | C32542D924DA787D000F2475 /* VisualActionKit */ = { 374 | isa = XCSwiftPackageProductDependency; 375 | package = C32542D824DA787D000F2475 /* XCRemoteSwiftPackageReference "VisualActionKit" */; 376 | productName = VisualActionKit; 377 | }; 378 | /* End XCSwiftPackageProductDependency section */ 379 | }; 380 | rootObject = C3E8766624D612DC0013AE9A /* Project object */; 381 | } 382 | -------------------------------------------------------------------------------- /ActionKitDemo/ActionKitDemo.xcodeproj/project.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /ActionKitDemo/ActionKitDemo.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | IDEDidComputeMac32BitWarning 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /ActionKitDemo/ActionKitDemo.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved: -------------------------------------------------------------------------------- 1 | { 2 | "object": { 3 | "pins": [ 4 | { 5 | "package": "VisualActionKit", 6 | "repositoryURL": "https://github.com/lukereichold/VisualActionKit.git", 7 | "state": { 8 | "branch": null, 9 | "revision": "df505507c32b16d39c18584fad482eebe6e7b3f5", 10 | "version": "0.1.1" 11 | } 12 | } 13 | ] 14 | }, 15 | "version": 1 16 | } 17 | -------------------------------------------------------------------------------- /ActionKitDemo/ActionKitDemo/AppDelegate.swift: -------------------------------------------------------------------------------- 1 | import UIKit 2 | 3 | @main 4 | class AppDelegate: UIResponder, UIApplicationDelegate { 5 | 6 | func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool { 7 | // Override point for customization after application launch. 8 | return true 9 | } 10 | 11 | // MARK: UISceneSession Lifecycle 12 | 13 | func application(_ application: UIApplication, configurationForConnecting connectingSceneSession: UISceneSession, options: UIScene.ConnectionOptions) -> UISceneConfiguration { 14 | // Called when a new scene session is being created. 15 | // Use this method to select a configuration to create the new scene with. 16 | return UISceneConfiguration(name: "Default Configuration", sessionRole: connectingSceneSession.role) 17 | } 18 | 19 | func application(_ application: UIApplication, didDiscardSceneSessions sceneSessions: Set) { 20 | // Called when the user discards a scene session. 21 | // If any sessions were discarded while the application was not running, this will be called shortly after application:didFinishLaunchingWithOptions. 22 | // Use this method to release any resources that were specific to the discarded scenes, as they will not return. 23 | } 24 | 25 | 26 | } 27 | 28 | -------------------------------------------------------------------------------- /ActionKitDemo/ActionKitDemo/Assets.xcassets/AccentColor.colorset/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "colors" : [ 3 | { 4 | "idiom" : "universal" 5 | } 6 | ], 7 | "info" : { 8 | "author" : "xcode", 9 | "version" : 1 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /ActionKitDemo/ActionKitDemo/Assets.xcassets/AppIcon.appiconset/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "images" : [ 3 | { 4 | "idiom" : "iphone", 5 | "scale" : "2x", 6 | "size" : "20x20" 7 | }, 8 | { 9 | "idiom" : "iphone", 10 | "scale" : "3x", 11 | "size" : "20x20" 12 | }, 13 | { 14 | "idiom" : "iphone", 15 | "scale" : "2x", 16 | "size" : "29x29" 17 | }, 18 | { 19 | "idiom" : "iphone", 20 | "scale" : "3x", 21 | "size" : "29x29" 22 | }, 23 | { 24 | "idiom" : "iphone", 25 | "scale" : "2x", 26 | "size" : "40x40" 27 | }, 28 | { 29 | "idiom" : "iphone", 30 | "scale" : "3x", 31 | "size" : "40x40" 32 | }, 33 | { 34 | "idiom" : "iphone", 35 | "scale" : "2x", 36 | "size" : "60x60" 37 | }, 38 | { 39 | "idiom" : "iphone", 40 | "scale" : "3x", 41 | "size" : "60x60" 42 | }, 43 | { 44 | "idiom" : "ipad", 45 | "scale" : "1x", 46 | "size" : "20x20" 47 | }, 48 | { 49 | "idiom" : "ipad", 50 | "scale" : "2x", 51 | "size" : "20x20" 52 | }, 53 | { 54 | "idiom" : "ipad", 55 | "scale" : "1x", 56 | "size" : "29x29" 57 | }, 58 | { 59 | "idiom" : "ipad", 60 | "scale" : "2x", 61 | "size" : "29x29" 62 | }, 63 | { 64 | "idiom" : "ipad", 65 | "scale" : "1x", 66 | "size" : "40x40" 67 | }, 68 | { 69 | "idiom" : "ipad", 70 | "scale" : "2x", 71 | "size" : "40x40" 72 | }, 73 | { 74 | "idiom" : "ipad", 75 | "scale" : "1x", 76 | "size" : "76x76" 77 | }, 78 | { 79 | "idiom" : "ipad", 80 | "scale" : "2x", 81 | "size" : "76x76" 82 | }, 83 | { 84 | "idiom" : "ipad", 85 | "scale" : "2x", 86 | "size" : "83.5x83.5" 87 | }, 88 | { 89 | "idiom" : "ios-marketing", 90 | "scale" : "1x", 91 | "size" : "1024x1024" 92 | } 93 | ], 94 | "info" : { 95 | "author" : "xcode", 96 | "version" : 1 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /ActionKitDemo/ActionKitDemo/Assets.xcassets/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "info" : { 3 | "author" : "xcode", 4 | "version" : 1 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /ActionKitDemo/ActionKitDemo/Base.lproj/LaunchScreen.storyboard: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /ActionKitDemo/ActionKitDemo/ContentView.swift: -------------------------------------------------------------------------------- 1 | import SwiftUI 2 | import VisualActionKit 3 | import AVKit 4 | 5 | struct Results: Identifiable { 6 | let id = UUID() 7 | let text: String 8 | } 9 | 10 | struct ContentView: View { 11 | @State private var showingFilePicker = false 12 | @State private var showSpinner = false 13 | @State private var videoUrl: URL? 14 | @State private var results: Results? 15 | 16 | var body: some View { 17 | NavigationView { 18 | VStack { 19 | Button(action: { 20 | showingFilePicker.toggle() 21 | }) { 22 | Text("Select video...") 23 | } 24 | .padding(.bottom, 100) 25 | .alert(item: $results) { message in 26 | Alert( 27 | title: Text(results?.text ?? ""), 28 | dismissButton: .default(Text("Dismiss")) 29 | ) 30 | } 31 | 32 | if $showSpinner.wrappedValue { 33 | ProgressView("Classifying video...") 34 | } 35 | } 36 | 37 | .navigationBarTitle("Video Classifier Demo") 38 | .sheet(isPresented: $showingFilePicker, 39 | onDismiss: videoSelected) { 40 | VideoPicker(videoUrl: self.$videoUrl) 41 | } 42 | } 43 | } 44 | 45 | func videoSelected() { 46 | guard let url = videoUrl else { return } 47 | let asset = AVAsset(url: url) 48 | 49 | showSpinner.toggle() 50 | defer { showSpinner.toggle() } 51 | 52 | DispatchQueue.global(qos: .default).async { 53 | do { 54 | try Classifier.shared.classify(asset) { predictions in 55 | results = Results(text: predictions.description) 56 | } 57 | } catch { 58 | debugPrint(error) 59 | } 60 | } 61 | } 62 | } 63 | 64 | struct ContentView_Previews: PreviewProvider { 65 | static var previews: some View { 66 | Group { 67 | ContentView() 68 | } 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /ActionKitDemo/ActionKitDemo/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | $(DEVELOPMENT_LANGUAGE) 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | $(PRODUCT_BUNDLE_PACKAGE_TYPE) 17 | CFBundleShortVersionString 18 | 1.0 19 | CFBundleVersion 20 | 1 21 | LSRequiresIPhoneOS 22 | 23 | UIApplicationSceneManifest 24 | 25 | UIApplicationSupportsMultipleScenes 26 | 27 | UISceneConfigurations 28 | 29 | UIWindowSceneSessionRoleApplication 30 | 31 | 32 | UISceneConfigurationName 33 | Default Configuration 34 | UISceneDelegateClassName 35 | $(PRODUCT_MODULE_NAME).SceneDelegate 36 | 37 | 38 | 39 | 40 | UIApplicationSupportsIndirectInputEvents 41 | 42 | UILaunchStoryboardName 43 | LaunchScreen 44 | UIRequiredDeviceCapabilities 45 | 46 | armv7 47 | 48 | UISupportedInterfaceOrientations 49 | 50 | UIInterfaceOrientationPortrait 51 | UIInterfaceOrientationLandscapeLeft 52 | UIInterfaceOrientationLandscapeRight 53 | 54 | UISupportedInterfaceOrientations~ipad 55 | 56 | UIInterfaceOrientationPortrait 57 | UIInterfaceOrientationPortraitUpsideDown 58 | UIInterfaceOrientationLandscapeLeft 59 | UIInterfaceOrientationLandscapeRight 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /ActionKitDemo/ActionKitDemo/Preview Content/Preview Assets.xcassets/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "info" : { 3 | "author" : "xcode", 4 | "version" : 1 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /ActionKitDemo/ActionKitDemo/SceneDelegate.swift: -------------------------------------------------------------------------------- 1 | import UIKit 2 | import SwiftUI 3 | 4 | class SceneDelegate: UIResponder, UIWindowSceneDelegate { 5 | 6 | var window: UIWindow? 7 | 8 | func scene(_ scene: UIScene, willConnectTo session: UISceneSession, options connectionOptions: UIScene.ConnectionOptions) { 9 | // Use this method to optionally configure and attach the UIWindow `window` to the provided UIWindowScene `scene`. 10 | // If using a storyboard, the `window` property will automatically be initialized and attached to the scene. 11 | // This delegate does not imply the connecting scene or session are new (see `application:configurationForConnectingSceneSession` instead). 12 | 13 | // Create the SwiftUI view that provides the window contents. 14 | let contentView = ContentView() 15 | 16 | // Use a UIHostingController as window root view controller. 17 | if let windowScene = scene as? UIWindowScene { 18 | let window = UIWindow(windowScene: windowScene) 19 | window.rootViewController = UIHostingController(rootView: contentView) 20 | self.window = window 21 | window.makeKeyAndVisible() 22 | } 23 | } 24 | 25 | func sceneDidDisconnect(_ scene: UIScene) { 26 | // Called as the scene is being released by the system. 27 | // This occurs shortly after the scene enters the background, or when its session is discarded. 28 | // Release any resources associated with this scene that can be re-created the next time the scene connects. 29 | // The scene may re-connect later, as its session was not necessarily discarded (see `application:didDiscardSceneSessions` instead). 30 | } 31 | 32 | func sceneDidBecomeActive(_ scene: UIScene) { 33 | // Called when the scene has moved from an inactive state to an active state. 34 | // Use this method to restart any tasks that were paused (or not yet started) when the scene was inactive. 35 | } 36 | 37 | func sceneWillResignActive(_ scene: UIScene) { 38 | // Called when the scene will move from an active state to an inactive state. 39 | // This may occur due to temporary interruptions (ex. an incoming phone call). 40 | } 41 | 42 | func sceneWillEnterForeground(_ scene: UIScene) { 43 | // Called as the scene transitions from the background to the foreground. 44 | // Use this method to undo the changes made on entering the background. 45 | } 46 | 47 | func sceneDidEnterBackground(_ scene: UIScene) { 48 | // Called as the scene transitions from the foreground to the background. 49 | // Use this method to save data, release shared resources, and store enough scene-specific state information 50 | // to restore the scene back to its current state. 51 | } 52 | 53 | 54 | } 55 | 56 | -------------------------------------------------------------------------------- /ActionKitDemo/ActionKitDemo/VideoPicker.swift: -------------------------------------------------------------------------------- 1 | import SwiftUI 2 | 3 | struct VideoPicker: UIViewControllerRepresentable { 4 | @Environment(\.presentationMode) var presentationMode 5 | @Binding var videoUrl: URL? 6 | 7 | func makeUIViewController(context: UIViewControllerRepresentableContext) -> UIImagePickerController { 8 | let picker = UIImagePickerController() 9 | picker.delegate = context.coordinator 10 | picker.mediaTypes = ["public.movie"] 11 | return picker 12 | } 13 | 14 | func updateUIViewController(_ uiViewController: UIImagePickerController, context: UIViewControllerRepresentableContext) { } 15 | 16 | func makeCoordinator() -> Coordinator { 17 | Coordinator(self) 18 | } 19 | 20 | class Coordinator: NSObject, UINavigationControllerDelegate, UIImagePickerControllerDelegate { 21 | let parent: VideoPicker 22 | 23 | init(_ parent: VideoPicker) { 24 | self.parent = parent 25 | } 26 | 27 | func imagePickerController(_ picker: UIImagePickerController, didFinishPickingMediaWithInfo info: [UIImagePickerController.InfoKey : Any]) { 28 | 29 | parent.videoUrl = info[.mediaURL] as? URL 30 | parent.presentationMode.wrappedValue.dismiss() 31 | } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Luke Reichold 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Package.swift: -------------------------------------------------------------------------------- 1 | // swift-tools-version:5.3 2 | // The swift-tools-version declares the minimum version of Swift required to build this package. 3 | 4 | import PackageDescription 5 | 6 | let package = Package( 7 | name: "VisualActionKit", 8 | platforms: [ 9 | .macOS(.v10_15), 10 | .iOS(.v14), 11 | ], 12 | products: [ 13 | .library( 14 | name: "VisualActionKit", 15 | targets: ["VisualActionKit"]), 16 | ], 17 | dependencies: [], 18 | targets: [ 19 | .target( 20 | name: "VisualActionKit", 21 | dependencies: [], 22 | resources: [ 23 | .process("Kinetics.mlmodel") 24 | ]), 25 | .testTarget( 26 | name: "VisualActionKitTests", 27 | dependencies: ["VisualActionKit"], 28 | resources: [ 29 | .process("Test Videos") 30 | ]) 31 | ] 32 | ) 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # VisualActionKit 2 | 3 | ![](https://img.shields.io/badge/Swift-5.3-orange.svg) 4 | [![GitHub license](https://img.shields.io/badge/license-MIT-lightgrey.svg)](https://github.com/lukereichold/VisualActionKit/blob/master/LICENSE) 5 | [![SPM compatible](https://img.shields.io/badge/spm-compatible-brightgreen.svg?style=flat)](https://swift.org/package-manager) 6 | [![Twitter](https://img.shields.io/badge/twitter-@lreichold-blue.svg?style=flat)](https://twitter.com/lreichold) 7 | 8 | Human action classification for video, offline and natively on iOS via Core ML 9 | 10 | Uses the [`kinetics-i3d`]() model to classify videos into one of 400 different action classes defined in [Kinetics 400](https://deepmind.com/research/open-source/kinetics) 11 | 12 | ⚠️ _This project requires Xcode 12._ 13 | 14 | **Reference**: [See accompanying blog post](https://lukereichold.com/blog/posts/video-action-classification-coreml/) 15 | 16 | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/11Cr8w_T53bPLHPvLfZteAxHhtjgBS-LH) 17 | 18 | ## Installation 19 | 20 | To install via [Swift Package Manager](https://swift.org/package-manager), add `VisualActionKit` to your `Package.swift` file. Alternatively, add it from Xcode directly. 21 | 22 | ```swift 23 | let package = Package( 24 | ... 25 | dependencies: [ 26 | .package(url: "https://github.com/lukereichold/VisualActionKit.git", from: "0.1.0") 27 | ], 28 | ... 29 | ) 30 | ``` 31 | 32 | Then import `VisualActionKit` wherever you’d like to use it: 33 | 34 | ```swift 35 | import VisualActionKit 36 | ``` 37 | 38 | ## Usage 39 | 40 | ```swift 41 | let url = Bundle.module.url(forResource: "writing", withExtension: "mp4") 42 | let asset = AVAsset(url: url) 43 | 44 | try Classifier.shared.classify(asset) { predictions in 45 | print(predictions) 46 | } 47 | ``` 48 | 49 | ## Contribute 50 | 51 | Contributions welcome. Please check out [the issues](https://github.com/lukereichold/VisualActionKit/issues). 52 | -------------------------------------------------------------------------------- /Sources/VisualActionKit/Classifier.swift: -------------------------------------------------------------------------------- 1 | import Foundation 2 | import AVKit 3 | import CoreML 4 | 5 | public class Classifier { 6 | public static let shared = Classifier() 7 | let frameSize = 224 8 | let model: Kinetics 9 | 10 | private init() { 11 | let modelUrl = Bundle.module.url(forResource: "Kinetics", withExtension: "mlmodel")! 12 | let compiledModelURL = try! MLModel.compileModel(at: modelUrl) 13 | let mlModel = try! MLModel(contentsOf: compiledModelURL) 14 | model = Kinetics(model: mlModel) 15 | } 16 | } 17 | 18 | public extension Classifier { 19 | 20 | typealias Predictions = [(classLabel: String, probability: Double)] 21 | 22 | enum ProcessingError: Error { 23 | case unsupportedFrameCount 24 | case videoFrameIsTooSmall 25 | case resizingFailure 26 | } 27 | 28 | func classify(_ asset: AVAsset, then completion: (Predictions) -> Void) throws { 29 | 30 | let reader = try AVAssetReader(asset: asset) 31 | let videoTrack = asset.tracks(withMediaType: .video)[0] 32 | 33 | let trackReaderOutput = AVAssetReaderTrackOutput(track: videoTrack, outputSettings:[String(kCVPixelBufferPixelFormatTypeKey): NSNumber(value: kCVPixelFormatType_32BGRA)]) 34 | let frameCount = asset.frameCount() 35 | 36 | guard 25...300 ~= frameCount else { 37 | throw ProcessingError.unsupportedFrameCount 38 | } 39 | 40 | reader.add(trackReaderOutput) 41 | reader.startReading() 42 | 43 | /// 5D tensor containing RGB data for each pixel in each sequential frame of the video. 44 | var multi = MultiArray(shape: [1, frameCount, frameSize, frameSize, 3]) 45 | 46 | var currentFrame = 0 47 | while let sampleBuffer = trackReaderOutput.copyNextSampleBuffer() { 48 | 49 | guard var imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else { continue } 50 | imageBuffer = try resizeIfNecessary(buffer: imageBuffer) 51 | 52 | extractRgbValuesInCenterCrop(from: imageBuffer, to: &multi, for: currentFrame) 53 | currentFrame += 1 54 | } 55 | 56 | completion(try performInference(for: multi)) 57 | } 58 | } 59 | 60 | private extension Classifier { 61 | 62 | /// Resize a frame preserving its aspect ratio such that the smallest dimension is 256 pixels. 63 | func resizeIfNecessary(buffer: CVPixelBuffer) throws -> CVPixelBuffer { 64 | let width = CVPixelBufferGetWidth(buffer) 65 | let height = CVPixelBufferGetHeight(buffer) 66 | let shorterDimension = min(width, height) 67 | 68 | guard shorterDimension >= 224 else { throw ProcessingError.videoFrameIsTooSmall } 69 | guard shorterDimension >= 256 else { return buffer } 70 | 71 | /// Aspect ratio is preserved since both width and height dimensions are scaled down by same factor. 72 | /// As a result, either new height or new width will be 256. 73 | let scale = 256.0 / Double(shorterDimension) 74 | guard let resizedBuffer = resizePixelBuffer(buffer, 75 | width: Int(scale * Double(width)), 76 | height: Int(scale * Double(height))) else { 77 | throw ProcessingError.resizingFailure 78 | } 79 | return resizedBuffer 80 | } 81 | 82 | func performInference(for tensor: MultiArray) throws -> Predictions { 83 | let input = KineticsInput(Placeholder: tensor.array) 84 | let output = try model.prediction(input: input) 85 | return top(5, output.Softmax) 86 | } 87 | 88 | func extractRgbValuesInCenterCrop(from buffer: CVPixelBuffer, 89 | to tensor: inout MultiArray, 90 | for frameIndex: Int) { 91 | 92 | let bytesPerRow = CVPixelBufferGetBytesPerRow(buffer) 93 | let width = CVPixelBufferGetWidth(buffer) 94 | let height = CVPixelBufferGetHeight(buffer) 95 | 96 | let flags = CVPixelBufferLockFlags(rawValue: 0) 97 | guard kCVReturnSuccess == CVPixelBufferLockBaseAddress(buffer, flags) else { return } 98 | guard let baseAddr = CVPixelBufferGetBaseAddress(buffer) else { return } 99 | let pixels = baseAddr.assumingMemoryBound(to: UInt8.self) 100 | 101 | let cropOriginX = width / 2 - frameSize / 2 102 | let cropOriginY = height / 2 - frameSize / 2 103 | 104 | for x in 0 ..< frameSize { 105 | for y in 0 ..< frameSize { 106 | let relativeX = cropOriginX + x 107 | let relativeY = cropOriginY + y 108 | 109 | let index = relativeX * 4 + relativeY * bytesPerRow 110 | let b = pixels[index] 111 | let g = pixels[index+1] 112 | let r = pixels[index+2] 113 | 114 | let color = NormalizedColor(r, g, b) 115 | 116 | tensor[0, frameIndex, x, y, 0] = color.red 117 | tensor[0, frameIndex, x, y, 1] = color.green 118 | tensor[0, frameIndex, x, y, 2] = color.blue 119 | } 120 | } 121 | 122 | CVPixelBufferUnlockBaseAddress(buffer, flags) 123 | } 124 | } 125 | 126 | extension AVAsset { 127 | func frameCount() -> Int { 128 | let reader = try! AVAssetReader(asset: self) 129 | let videoTrack = tracks(withMediaType: .video)[0] 130 | 131 | let trackReaderOutput = AVAssetReaderTrackOutput(track: videoTrack, outputSettings:[String(kCVPixelBufferPixelFormatTypeKey): NSNumber(value: kCVPixelFormatType_32BGRA)]) 132 | 133 | reader.add(trackReaderOutput) 134 | reader.startReading() 135 | 136 | var frameCount = 0 137 | while let _ = trackReaderOutput.copyNextSampleBuffer() { 138 | frameCount += 1 139 | } 140 | return frameCount 141 | } 142 | } 143 | 144 | /// Color with RGB values that are rescaled between -1 and 1. 145 | struct NormalizedColor: CustomStringConvertible { 146 | let red: Float32 147 | let green: Float32 148 | let blue: Float32 149 | 150 | init(_ r: UInt8, _ g: UInt8, _ b: UInt8) { 151 | red = Float32(2 * (Double(r) / 255.0) - 1) 152 | green = Float32(2 * (Double(g) / 255.0) - 1) 153 | blue = Float32(2 * (Double(b) / 255.0) - 1) 154 | } 155 | 156 | var description: String { 157 | "(\(red), \(green), \(blue))" 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /Sources/VisualActionKit/CoreMLHelpers/Array+Extensions.swift: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017-2019 M.I. Hollemans 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to 6 | deal in the Software without restriction, including without limitation the 7 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | sell copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | IN THE SOFTWARE. 21 | */ 22 | 23 | import Swift 24 | 25 | extension Array where Element: Comparable { 26 | /** 27 | Returns the index and value of the largest element in the array. 28 | 29 | - Note: This method is slow. For faster results, use the standalone 30 | version of argmax() instead. 31 | */ 32 | public func argmax() -> (Int, Element) { 33 | precondition(self.count > 0) 34 | var maxIndex = 0 35 | var maxValue = self[0] 36 | for i in 1.. maxValue { 37 | maxValue = self[i] 38 | maxIndex = i 39 | } 40 | return (maxIndex, maxValue) 41 | } 42 | 43 | /** 44 | Returns the indices of the array's elements in sorted order. 45 | */ 46 | public func argsort(by areInIncreasingOrder: (Element, Element) -> Bool) -> [Array.Index] { 47 | return self.indices.sorted { areInIncreasingOrder(self[$0], self[$1]) } 48 | } 49 | 50 | /** 51 | Returns a new array containing the elements at the specified indices. 52 | */ 53 | public func gather(indices: [Array.Index]) -> [Element] { 54 | return indices.map { self[$0] } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /Sources/VisualActionKit/CoreMLHelpers/CGImage+CVPixelBuffer.swift: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017-2019 M.I. Hollemans 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to 6 | deal in the Software without restriction, including without limitation the 7 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | sell copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | IN THE SOFTWARE. 21 | */ 22 | 23 | import CoreGraphics 24 | import CoreImage 25 | import VideoToolbox 26 | 27 | extension CGImage { 28 | /** 29 | Resizes the image to width x height and converts it to an RGB CVPixelBuffer. 30 | */ 31 | public func pixelBuffer(width: Int, height: Int, 32 | orientation: CGImagePropertyOrientation) -> CVPixelBuffer? { 33 | return pixelBuffer(width: width, height: height, 34 | pixelFormatType: kCVPixelFormatType_32ARGB, 35 | colorSpace: CGColorSpaceCreateDeviceRGB(), 36 | alphaInfo: .noneSkipFirst, 37 | orientation: orientation) 38 | } 39 | 40 | /** 41 | Resizes the image to width x height and converts it to a grayscale CVPixelBuffer. 42 | */ 43 | public func pixelBufferGray(width: Int, height: Int, 44 | orientation: CGImagePropertyOrientation) -> CVPixelBuffer? { 45 | return pixelBuffer(width: width, height: height, 46 | pixelFormatType: kCVPixelFormatType_OneComponent8, 47 | colorSpace: CGColorSpaceCreateDeviceGray(), 48 | alphaInfo: .none, 49 | orientation: orientation) 50 | } 51 | 52 | func pixelBuffer(width: Int, height: Int, pixelFormatType: OSType, 53 | colorSpace: CGColorSpace, alphaInfo: CGImageAlphaInfo, 54 | orientation: CGImagePropertyOrientation) -> CVPixelBuffer? { 55 | 56 | // TODO: If the orientation is not .up, then rotate the CGImage. 57 | // See also: https://stackoverflow.com/a/40438893/ 58 | assert(orientation == .up) 59 | 60 | var maybePixelBuffer: CVPixelBuffer? 61 | let attrs = [kCVPixelBufferCGImageCompatibilityKey: kCFBooleanTrue, 62 | kCVPixelBufferCGBitmapContextCompatibilityKey: kCFBooleanTrue] 63 | let status = CVPixelBufferCreate(kCFAllocatorDefault, 64 | width, 65 | height, 66 | pixelFormatType, 67 | attrs as CFDictionary, 68 | &maybePixelBuffer) 69 | 70 | guard status == kCVReturnSuccess, let pixelBuffer = maybePixelBuffer else { 71 | return nil 72 | } 73 | 74 | let flags = CVPixelBufferLockFlags(rawValue: 0) 75 | guard kCVReturnSuccess == CVPixelBufferLockBaseAddress(pixelBuffer, flags) else { 76 | return nil 77 | } 78 | defer { CVPixelBufferUnlockBaseAddress(pixelBuffer, flags) } 79 | 80 | guard let context = CGContext(data: CVPixelBufferGetBaseAddress(pixelBuffer), 81 | width: width, 82 | height: height, 83 | bitsPerComponent: 8, 84 | bytesPerRow: CVPixelBufferGetBytesPerRow(pixelBuffer), 85 | space: colorSpace, 86 | bitmapInfo: alphaInfo.rawValue) 87 | else { 88 | return nil 89 | } 90 | 91 | context.draw(self, in: CGRect(x: 0, y: 0, width: width, height: height)) 92 | return pixelBuffer 93 | } 94 | } 95 | 96 | extension CGImage { 97 | /** 98 | Creates a new CGImage from a CVPixelBuffer. 99 | 100 | - Note: Not all CVPixelBuffer pixel formats support conversion into a 101 | CGImage-compatible pixel format. 102 | */ 103 | public static func create(pixelBuffer: CVPixelBuffer) -> CGImage? { 104 | var cgImage: CGImage? 105 | VTCreateCGImageFromCVPixelBuffer(pixelBuffer, options: nil, imageOut: &cgImage) 106 | return cgImage 107 | } 108 | 109 | /* 110 | // Alternative implementation: 111 | public static func create(pixelBuffer: CVPixelBuffer) -> CGImage? { 112 | // This method creates a bitmap CGContext using the pixel buffer's memory. 113 | // It currently only handles kCVPixelFormatType_32ARGB images. To support 114 | // other pixel formats too, you'll have to change the bitmapInfo and maybe 115 | // the color space for the CGContext. 116 | 117 | guard kCVReturnSuccess == CVPixelBufferLockBaseAddress(pixelBuffer, .readOnly) else { 118 | return nil 119 | } 120 | defer { CVPixelBufferUnlockBaseAddress(pixelBuffer, .readOnly) } 121 | 122 | if let context = CGContext(data: CVPixelBufferGetBaseAddress(pixelBuffer), 123 | width: CVPixelBufferGetWidth(pixelBuffer), 124 | height: CVPixelBufferGetHeight(pixelBuffer), 125 | bitsPerComponent: 8, 126 | bytesPerRow: CVPixelBufferGetBytesPerRow(pixelBuffer), 127 | space: CGColorSpaceCreateDeviceRGB(), 128 | bitmapInfo: CGImageAlphaInfo.noneSkipFirst.rawValue), 129 | let cgImage = context.makeImage() { 130 | return cgImage 131 | } else { 132 | return nil 133 | } 134 | } 135 | */ 136 | 137 | /** 138 | Creates a new CGImage from a CVPixelBuffer, using Core Image. 139 | */ 140 | public static func create(pixelBuffer: CVPixelBuffer, context: CIContext) -> CGImage? { 141 | let ciImage = CIImage(cvPixelBuffer: pixelBuffer) 142 | let rect = CGRect(x: 0, y: 0, width: CVPixelBufferGetWidth(pixelBuffer), 143 | height: CVPixelBufferGetHeight(pixelBuffer)) 144 | return context.createCGImage(ciImage, from: rect) 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /Sources/VisualActionKit/CoreMLHelpers/CGImage+RawBytes.swift: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017-2019 M.I. Hollemans 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to 6 | deal in the Software without restriction, including without limitation the 7 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | sell copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | IN THE SOFTWARE. 21 | */ 22 | 23 | import CoreGraphics 24 | 25 | extension CGImage { 26 | /** 27 | Converts the image into an array of RGBA bytes. 28 | */ 29 | @nonobjc public func toByteArrayRGBA() -> [UInt8] { 30 | var bytes = [UInt8](repeating: 0, count: width * height * 4) 31 | bytes.withUnsafeMutableBytes { ptr in 32 | if let colorSpace = colorSpace, 33 | let context = CGContext( 34 | data: ptr.baseAddress, 35 | width: width, 36 | height: height, 37 | bitsPerComponent: bitsPerComponent, 38 | bytesPerRow: bytesPerRow, 39 | space: colorSpace, 40 | bitmapInfo: CGImageAlphaInfo.premultipliedLast.rawValue) { 41 | let rect = CGRect(x: 0, y: 0, width: width, height: height) 42 | context.draw(self, in: rect) 43 | } 44 | } 45 | return bytes 46 | } 47 | 48 | /** 49 | Creates a new CGImage from an array of RGBA bytes. 50 | */ 51 | @nonobjc public class func fromByteArrayRGBA(_ bytes: [UInt8], 52 | width: Int, 53 | height: Int) -> CGImage? { 54 | return fromByteArray(bytes, width: width, height: height, 55 | bytesPerRow: width * 4, 56 | colorSpace: CGColorSpaceCreateDeviceRGB(), 57 | alphaInfo: .premultipliedLast) 58 | } 59 | 60 | /** 61 | Creates a new CGImage from an array of grayscale bytes. 62 | */ 63 | @nonobjc public class func fromByteArrayGray(_ bytes: [UInt8], 64 | width: Int, 65 | height: Int) -> CGImage? { 66 | return fromByteArray(bytes, width: width, height: height, 67 | bytesPerRow: width, 68 | colorSpace: CGColorSpaceCreateDeviceGray(), 69 | alphaInfo: .none) 70 | } 71 | 72 | @nonobjc class func fromByteArray(_ bytes: [UInt8], 73 | width: Int, 74 | height: Int, 75 | bytesPerRow: Int, 76 | colorSpace: CGColorSpace, 77 | alphaInfo: CGImageAlphaInfo) -> CGImage? { 78 | return bytes.withUnsafeBytes { ptr in 79 | let context = CGContext(data: UnsafeMutableRawPointer(mutating: ptr.baseAddress!), 80 | width: width, 81 | height: height, 82 | bitsPerComponent: 8, 83 | bytesPerRow: bytesPerRow, 84 | space: colorSpace, 85 | bitmapInfo: alphaInfo.rawValue) 86 | return context?.makeImage() 87 | } 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /Sources/VisualActionKit/CoreMLHelpers/CGImagePropertyOrientation.swift: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017-2019 M.I. Hollemans 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to 6 | deal in the Software without restriction, including without limitation the 7 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | sell copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | IN THE SOFTWARE. 21 | */ 22 | 23 | #if canImport(UIKit) 24 | 25 | import UIKit 26 | 27 | public extension CGImagePropertyOrientation { 28 | init(_ orientation: UIImage.Orientation) { 29 | switch orientation { 30 | case .up: self = .up 31 | case .upMirrored: self = .upMirrored 32 | case .down: self = .down 33 | case .downMirrored: self = .downMirrored 34 | case .left: self = .left 35 | case .leftMirrored: self = .leftMirrored 36 | case .right: self = .right 37 | case .rightMirrored: self = .rightMirrored 38 | @unknown default: self = .up 39 | } 40 | } 41 | } 42 | 43 | #if !os(tvOS) 44 | 45 | public extension CGImagePropertyOrientation { 46 | init(_ orientation: UIDeviceOrientation) { 47 | switch orientation { 48 | case .portraitUpsideDown: self = .left 49 | case .landscapeLeft: self = .up 50 | case .landscapeRight: self = .down 51 | default: self = .right 52 | } 53 | } 54 | } 55 | 56 | #endif 57 | 58 | extension UIImage.Orientation { 59 | init(_ cgOrientation: UIImage.Orientation) { 60 | switch cgOrientation { 61 | case .up: self = .up 62 | case .upMirrored: self = .upMirrored 63 | case .down: self = .down 64 | case .downMirrored: self = .downMirrored 65 | case .left: self = .left 66 | case .leftMirrored: self = .leftMirrored 67 | case .right: self = .right 68 | case .rightMirrored: self = .rightMirrored 69 | @unknown default: self = .up 70 | } 71 | } 72 | } 73 | 74 | #endif 75 | -------------------------------------------------------------------------------- /Sources/VisualActionKit/CoreMLHelpers/CVPixelBuffer+Helpers.swift: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017 M.I. Hollemans 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to 6 | deal in the Software without restriction, including without limitation the 7 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | sell copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | IN THE SOFTWARE. 21 | */ 22 | 23 | import Foundation 24 | import Accelerate 25 | import CoreImage 26 | 27 | /** 28 | Creates a RGB pixel buffer of the specified width and height. 29 | */ 30 | public func createPixelBuffer(width: Int, height: Int) -> CVPixelBuffer? { 31 | var pixelBuffer: CVPixelBuffer? 32 | let status = CVPixelBufferCreate(nil, width, height, 33 | kCVPixelFormatType_32BGRA, nil, 34 | &pixelBuffer) 35 | if status != kCVReturnSuccess { 36 | print("Error: could not create pixel buffer", status) 37 | return nil 38 | } 39 | return pixelBuffer 40 | } 41 | 42 | /** 43 | First crops the pixel buffer, then resizes it. 44 | 45 | - Note: The new CVPixelBuffer is not backed by an IOSurface and therefore 46 | cannot be turned into a Metal texture. 47 | */ 48 | public func resizePixelBuffer(_ srcPixelBuffer: CVPixelBuffer, 49 | cropX: Int, 50 | cropY: Int, 51 | cropWidth: Int, 52 | cropHeight: Int, 53 | scaleWidth: Int, 54 | scaleHeight: Int) -> CVPixelBuffer? { 55 | let flags = CVPixelBufferLockFlags(rawValue: 0) 56 | guard kCVReturnSuccess == CVPixelBufferLockBaseAddress(srcPixelBuffer, flags) else { 57 | return nil 58 | } 59 | defer { CVPixelBufferUnlockBaseAddress(srcPixelBuffer, flags) } 60 | 61 | guard let srcData = CVPixelBufferGetBaseAddress(srcPixelBuffer) else { 62 | print("Error: could not get pixel buffer base address") 63 | return nil 64 | } 65 | let srcBytesPerRow = CVPixelBufferGetBytesPerRow(srcPixelBuffer) 66 | let offset = cropY*srcBytesPerRow + cropX*4 67 | var srcBuffer = vImage_Buffer(data: srcData.advanced(by: offset), 68 | height: vImagePixelCount(cropHeight), 69 | width: vImagePixelCount(cropWidth), 70 | rowBytes: srcBytesPerRow) 71 | 72 | let destBytesPerRow = scaleWidth*4 73 | guard let destData = malloc(scaleHeight*destBytesPerRow) else { 74 | print("Error: out of memory") 75 | return nil 76 | } 77 | var destBuffer = vImage_Buffer(data: destData, 78 | height: vImagePixelCount(scaleHeight), 79 | width: vImagePixelCount(scaleWidth), 80 | rowBytes: destBytesPerRow) 81 | 82 | let error = vImageScale_ARGB8888(&srcBuffer, &destBuffer, nil, vImage_Flags(0)) 83 | if error != kvImageNoError { 84 | print("Error:", error) 85 | free(destData) 86 | return nil 87 | } 88 | 89 | let releaseCallback: CVPixelBufferReleaseBytesCallback = { _, ptr in 90 | if let ptr = ptr { 91 | free(UnsafeMutableRawPointer(mutating: ptr)) 92 | } 93 | } 94 | 95 | let pixelFormat = CVPixelBufferGetPixelFormatType(srcPixelBuffer) 96 | var dstPixelBuffer: CVPixelBuffer? 97 | let status = CVPixelBufferCreateWithBytes(nil, scaleWidth, scaleHeight, 98 | pixelFormat, destData, 99 | destBytesPerRow, releaseCallback, 100 | nil, nil, &dstPixelBuffer) 101 | if status != kCVReturnSuccess { 102 | print("Error: could not create new pixel buffer") 103 | free(destData) 104 | return nil 105 | } 106 | return dstPixelBuffer 107 | } 108 | 109 | /** 110 | Resizes a CVPixelBuffer to a new width and height. 111 | 112 | - Note: The new CVPixelBuffer is not backed by an IOSurface and therefore 113 | cannot be turned into a Metal texture. 114 | */ 115 | public func resizePixelBuffer(_ pixelBuffer: CVPixelBuffer, 116 | width: Int, height: Int) -> CVPixelBuffer? { 117 | return resizePixelBuffer(pixelBuffer, cropX: 0, cropY: 0, 118 | cropWidth: CVPixelBufferGetWidth(pixelBuffer), 119 | cropHeight: CVPixelBufferGetHeight(pixelBuffer), 120 | scaleWidth: width, scaleHeight: height) 121 | } 122 | 123 | /** 124 | Resizes a CVPixelBuffer to a new width and height. 125 | */ 126 | public func resizePixelBuffer(_ pixelBuffer: CVPixelBuffer, 127 | width: Int, height: Int, 128 | output: CVPixelBuffer, context: CIContext) { 129 | let ciImage = CIImage(cvPixelBuffer: pixelBuffer) 130 | let sx = CGFloat(width) / CGFloat(CVPixelBufferGetWidth(pixelBuffer)) 131 | let sy = CGFloat(height) / CGFloat(CVPixelBufferGetHeight(pixelBuffer)) 132 | let scaleTransform = CGAffineTransform(scaleX: sx, y: sy) 133 | let scaledImage = ciImage.transformed(by: scaleTransform) 134 | context.render(scaledImage, to: output) 135 | } 136 | 137 | /** 138 | Rotates CVPixelBuffer by the provided factor of 90 counterclock-wise. 139 | 140 | - Note: The new CVPixelBuffer is not backed by an IOSurface and therefore 141 | cannot be turned into a Metal texture. 142 | */ 143 | public func rotate90PixelBuffer(_ srcPixelBuffer: CVPixelBuffer, factor: UInt8) -> CVPixelBuffer? { 144 | let flags = CVPixelBufferLockFlags(rawValue: 0) 145 | guard kCVReturnSuccess == CVPixelBufferLockBaseAddress(srcPixelBuffer, flags) else { 146 | return nil 147 | } 148 | defer { CVPixelBufferUnlockBaseAddress(srcPixelBuffer, flags) } 149 | 150 | guard let srcData = CVPixelBufferGetBaseAddress(srcPixelBuffer) else { 151 | print("Error: could not get pixel buffer base address") 152 | return nil 153 | } 154 | let sourceWidth = CVPixelBufferGetWidth(srcPixelBuffer) 155 | let sourceHeight = CVPixelBufferGetHeight(srcPixelBuffer) 156 | var destWidth = sourceHeight 157 | var destHeight = sourceWidth 158 | var color = UInt8(0) 159 | 160 | if factor % 2 == 0 { 161 | destWidth = sourceWidth 162 | destHeight = sourceHeight 163 | } 164 | 165 | let srcBytesPerRow = CVPixelBufferGetBytesPerRow(srcPixelBuffer) 166 | var srcBuffer = vImage_Buffer(data: srcData, 167 | height: vImagePixelCount(sourceHeight), 168 | width: vImagePixelCount(sourceWidth), 169 | rowBytes: srcBytesPerRow) 170 | 171 | let destBytesPerRow = destWidth*4 172 | guard let destData = malloc(destHeight*destBytesPerRow) else { 173 | print("Error: out of memory") 174 | return nil 175 | } 176 | var destBuffer = vImage_Buffer(data: destData, 177 | height: vImagePixelCount(destHeight), 178 | width: vImagePixelCount(destWidth), 179 | rowBytes: destBytesPerRow) 180 | 181 | let error = vImageRotate90_ARGB8888(&srcBuffer, &destBuffer, factor, &color, vImage_Flags(0)) 182 | if error != kvImageNoError { 183 | print("Error:", error) 184 | free(destData) 185 | return nil 186 | } 187 | 188 | let releaseCallback: CVPixelBufferReleaseBytesCallback = { _, ptr in 189 | if let ptr = ptr { 190 | free(UnsafeMutableRawPointer(mutating: ptr)) 191 | } 192 | } 193 | 194 | let pixelFormat = CVPixelBufferGetPixelFormatType(srcPixelBuffer) 195 | var dstPixelBuffer: CVPixelBuffer? 196 | let status = CVPixelBufferCreateWithBytes(nil, destWidth, destHeight, 197 | pixelFormat, destData, 198 | destBytesPerRow, releaseCallback, 199 | nil, nil, &dstPixelBuffer) 200 | if status != kCVReturnSuccess { 201 | print("Error: could not create new pixel buffer") 202 | free(destData) 203 | return nil 204 | } 205 | return dstPixelBuffer 206 | } 207 | 208 | public extension CVPixelBuffer { 209 | /** 210 | Copies a CVPixelBuffer to a new CVPixelBuffer that is compatible with Metal. 211 | 212 | - Tip: If CVMetalTextureCacheCreateTextureFromImage is failing, then call 213 | this method first! 214 | */ 215 | func copyToMetalCompatible() -> CVPixelBuffer? { 216 | // Other possible options: 217 | // String(kCVPixelBufferOpenGLCompatibilityKey): true, 218 | // String(kCVPixelBufferIOSurfacePropertiesKey): [ 219 | // "IOSurfaceOpenGLESFBOCompatibility": true, 220 | // "IOSurfaceOpenGLESTextureCompatibility": true, 221 | // "IOSurfaceCoreAnimationCompatibility": true 222 | // ] 223 | let attributes: [String: Any] = [ 224 | String(kCVPixelBufferMetalCompatibilityKey): true, 225 | ] 226 | return deepCopy(withAttributes: attributes) 227 | } 228 | 229 | /** 230 | Copies a CVPixelBuffer to a new CVPixelBuffer. 231 | 232 | This lets you specify new attributes, such as whether the new CVPixelBuffer 233 | must be IOSurface-backed. 234 | 235 | See: https://developer.apple.com/library/archive/qa/qa1781/_index.html 236 | */ 237 | func deepCopy(withAttributes attributes: [String: Any] = [:]) -> CVPixelBuffer? { 238 | let srcPixelBuffer = self 239 | let srcFlags: CVPixelBufferLockFlags = .readOnly 240 | guard kCVReturnSuccess == CVPixelBufferLockBaseAddress(srcPixelBuffer, srcFlags) else { 241 | return nil 242 | } 243 | defer { CVPixelBufferUnlockBaseAddress(srcPixelBuffer, srcFlags) } 244 | 245 | var combinedAttributes: [String: Any] = [:] 246 | 247 | // Copy attachment attributes. 248 | if let attachments = CVBufferGetAttachments(srcPixelBuffer, .shouldPropagate) as? [String: Any] { 249 | for (key, value) in attachments { 250 | combinedAttributes[key] = value 251 | } 252 | } 253 | 254 | // Add user attributes. 255 | combinedAttributes = combinedAttributes.merging(attributes) { $1 } 256 | 257 | var maybePixelBuffer: CVPixelBuffer? 258 | let status = CVPixelBufferCreate(kCFAllocatorDefault, 259 | CVPixelBufferGetWidth(srcPixelBuffer), 260 | CVPixelBufferGetHeight(srcPixelBuffer), 261 | CVPixelBufferGetPixelFormatType(srcPixelBuffer), 262 | combinedAttributes as CFDictionary, 263 | &maybePixelBuffer) 264 | 265 | guard status == kCVReturnSuccess, let dstPixelBuffer = maybePixelBuffer else { 266 | return nil 267 | } 268 | 269 | let dstFlags = CVPixelBufferLockFlags(rawValue: 0) 270 | guard kCVReturnSuccess == CVPixelBufferLockBaseAddress(dstPixelBuffer, dstFlags) else { 271 | return nil 272 | } 273 | defer { CVPixelBufferUnlockBaseAddress(dstPixelBuffer, dstFlags) } 274 | 275 | for plane in 0...max(0, CVPixelBufferGetPlaneCount(srcPixelBuffer) - 1) { 276 | if let srcAddr = CVPixelBufferGetBaseAddressOfPlane(srcPixelBuffer, plane), 277 | let dstAddr = CVPixelBufferGetBaseAddressOfPlane(dstPixelBuffer, plane) { 278 | let srcBytesPerRow = CVPixelBufferGetBytesPerRowOfPlane(srcPixelBuffer, plane) 279 | let dstBytesPerRow = CVPixelBufferGetBytesPerRowOfPlane(dstPixelBuffer, plane) 280 | 281 | for h in 0.. Publishers.Map> { 44 | map { input in 45 | do { 46 | return .success(try model.prediction(from: input)) 47 | } catch { 48 | return .failure(error) 49 | } 50 | } 51 | } 52 | 53 | public func prediction(model: MLModel) -> Publishers.CompactMap { 54 | compactMap { input in try? model.prediction(from: input) } 55 | } 56 | 57 | public func prediction(model: MLModel) -> Publishers.TryMap { 58 | tryMap { input in try model.prediction(from: input) } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /Sources/VisualActionKit/CoreMLHelpers/CoreMLHelpers.h: -------------------------------------------------------------------------------- 1 | #import 2 | 3 | //! Project version number for CoreMLHelpers. 4 | FOUNDATION_EXPORT double CoreMLHelpersVersionNumber; 5 | 6 | //! Project version string for CoreMLHelpers. 7 | FOUNDATION_EXPORT const unsigned char CoreMLHelpersVersionString[]; 8 | 9 | // In this header, you should import all the public headers of your framework using statements like #import 10 | 11 | 12 | -------------------------------------------------------------------------------- /Sources/VisualActionKit/CoreMLHelpers/MLModel+Images.swift: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2019 M.I. Hollemans 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to 6 | deal in the Software without restriction, including without limitation the 7 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | sell copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | IN THE SOFTWARE. 21 | */ 22 | 23 | import CoreML 24 | 25 | extension MLModel { 26 | /** 27 | Returns the MLImageConstraint for the given model input, or nil if that 28 | input doesn't exist or is not an image. 29 | */ 30 | public func imageConstraint(forInput inputName: String) -> MLImageConstraint? { 31 | modelDescription.inputDescriptionsByName[inputName]?.imageConstraint 32 | } 33 | } 34 | 35 | #if canImport(UIKit) 36 | import UIKit 37 | 38 | @available(iOS 13.0, tvOS 13.0, *) 39 | extension MLModel { 40 | /** 41 | Converts a UIImage into an MLFeatureValue, using the image constraint of 42 | the specified model input. 43 | */ 44 | public func featureValue(fromUIImage image: UIImage, 45 | forInput inputName: String, 46 | orientation: CGImagePropertyOrientation = .up, 47 | options: [MLFeatureValue.ImageOption: Any]? = nil) 48 | -> MLFeatureValue? { 49 | 50 | guard let cgImage = image.cgImage else { 51 | print("Error: could not convert UIImage to CGImage") 52 | return nil 53 | } 54 | 55 | return featureValue(fromCGImage: cgImage, forInput: inputName, 56 | orientation: orientation, options: options) 57 | } 58 | } 59 | 60 | #endif 61 | 62 | @available(iOS 13.0, tvOS 13.0, OSX 10.15, *) 63 | extension MLModel { 64 | /** 65 | Converts a CGImage into an MLFeatureValue, using the image constraint of 66 | the specified model input. 67 | */ 68 | public func featureValue(fromCGImage image: CGImage, 69 | forInput inputName: String, 70 | orientation: CGImagePropertyOrientation = .up, 71 | options: [MLFeatureValue.ImageOption: Any]? = nil) 72 | -> MLFeatureValue? { 73 | 74 | guard let constraint = imageConstraint(forInput: inputName) else { 75 | print("Error: could not get image constraint for input named '\(inputName)'") 76 | return nil 77 | } 78 | 79 | guard let featureValue = try? MLFeatureValue(cgImage: image, 80 | orientation: orientation, 81 | constraint: constraint, 82 | options: options) else { 83 | print("Error: could not get feature value for image \(image)") 84 | return nil 85 | } 86 | 87 | return featureValue 88 | } 89 | 90 | /** 91 | Converts an image file from a URL into an MLFeatureValue, using the image 92 | constraint of the specified model input. 93 | */ 94 | public func featureValue(fromImageAt url: URL, 95 | forInput inputName: String, 96 | orientation: CGImagePropertyOrientation = .up, 97 | options: [MLFeatureValue.ImageOption: Any]? = nil) -> MLFeatureValue? { 98 | 99 | guard let constraint = imageConstraint(forInput: inputName) else { 100 | print("Error: could not get image constraint for input named '\(inputName)'") 101 | return nil 102 | } 103 | 104 | guard let featureValue = try? MLFeatureValue(imageAt: url, 105 | orientation: orientation, 106 | constraint: constraint, 107 | options: options) else { 108 | print("Error: could not get feature value for image at '\(url)'") 109 | return nil 110 | } 111 | 112 | return featureValue 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /Sources/VisualActionKit/CoreMLHelpers/MLMultiArray+Helpers.swift: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017-2019 M.I. Hollemans 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to 6 | deal in the Software without restriction, including without limitation the 7 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | sell copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | IN THE SOFTWARE. 21 | */ 22 | 23 | import CoreML 24 | 25 | extension MLMultiArray { 26 | /** 27 | Returns a new MLMultiArray with the specified dimensions. 28 | 29 | - Note: This does not copy the data but uses a pointer into the original 30 | multi-array's memory. The caller is responsible for keeping the original 31 | object alive, for example using `withExtendedLifetime(originalArray) {...}` 32 | */ 33 | @nonobjc public func reshaped(to dimensions: [Int]) throws -> MLMultiArray { 34 | let newCount = dimensions.reduce(1, *) 35 | precondition(newCount == count, "Cannot reshape \(shape) to \(dimensions)") 36 | 37 | var newStrides = [Int](repeating: 0, count: dimensions.count) 38 | newStrides[dimensions.count - 1] = 1 39 | for i in stride(from: dimensions.count - 1, to: 0, by: -1) { 40 | newStrides[i - 1] = newStrides[i] * dimensions[i] 41 | } 42 | 43 | let newShape_ = dimensions.map { NSNumber(value: $0) } 44 | let newStrides_ = newStrides.map { NSNumber(value: $0) } 45 | 46 | return try MLMultiArray(dataPointer: self.dataPointer, 47 | shape: newShape_, 48 | dataType: self.dataType, 49 | strides: newStrides_) 50 | } 51 | 52 | /** 53 | Returns a transposed version of this MLMultiArray. 54 | 55 | - Note: This copies the data. 56 | 57 | - TODO: Support .float32 and .int32 types too. 58 | */ 59 | @nonobjc public func transposed(to order: [Int]) throws -> MLMultiArray { 60 | let ndim = order.count 61 | 62 | precondition(dataType == .double) 63 | precondition(ndim == strides.count) 64 | 65 | let newShape = shape.indices.map { shape[order[$0]] } 66 | let newArray = try MLMultiArray(shape: newShape, dataType: self.dataType) 67 | 68 | let srcPtr = UnsafeMutablePointer(OpaquePointer(dataPointer)) 69 | let dstPtr = UnsafeMutablePointer(OpaquePointer(newArray.dataPointer)) 70 | 71 | let srcShape = shape.map { $0.intValue } 72 | let dstStride = newArray.strides.map { $0.intValue } 73 | var idx = [Int](repeating: 0, count: ndim) 74 | 75 | for j in 0.. 0 && idx[i] >= srcShape[i] { 89 | idx[i] = 0 90 | idx[i - 1] += 1 91 | i -= 1 92 | } 93 | } 94 | return newArray 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /Sources/VisualActionKit/CoreMLHelpers/MLMultiArray+Image.swift: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017-2019 M.I. Hollemans 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to 6 | deal in the Software without restriction, including without limitation the 7 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | sell copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | IN THE SOFTWARE. 21 | */ 22 | 23 | import Accelerate 24 | import CoreML 25 | 26 | public protocol MultiArrayType: Comparable { 27 | static var multiArrayDataType: MLMultiArrayDataType { get } 28 | static func +(lhs: Self, rhs: Self) -> Self 29 | static func -(lhs: Self, rhs: Self) -> Self 30 | static func *(lhs: Self, rhs: Self) -> Self 31 | static func /(lhs: Self, rhs: Self) -> Self 32 | init(_: Int) 33 | var toUInt8: UInt8 { get } 34 | } 35 | 36 | extension Double: MultiArrayType { 37 | public static var multiArrayDataType: MLMultiArrayDataType { return .double } 38 | public var toUInt8: UInt8 { return UInt8(self) } 39 | } 40 | 41 | extension Float: MultiArrayType { 42 | public static var multiArrayDataType: MLMultiArrayDataType { return .float32 } 43 | public var toUInt8: UInt8 { return UInt8(self) } 44 | } 45 | 46 | extension Int32: MultiArrayType { 47 | public static var multiArrayDataType: MLMultiArrayDataType { return .int32 } 48 | public var toUInt8: UInt8 { return UInt8(self) } 49 | } 50 | 51 | extension MLMultiArray { 52 | /** 53 | Converts the multi-array to a CGImage. 54 | 55 | The multi-array must have at least 2 dimensions for a grayscale image, or 56 | at least 3 dimensions for a color image. 57 | 58 | The default expected shape is (height, width) or (channels, height, width). 59 | However, you can change this using the `axes` parameter. For example, if 60 | the array shape is (1, height, width, channels), use `axes: (3, 1, 2)`. 61 | 62 | If `channel` is not nil, only converts that channel to a grayscale image. 63 | This lets you visualize individual channels from a multi-array with more 64 | than 4 channels. 65 | 66 | Otherwise, converts all channels. In this case, the number of channels in 67 | the multi-array must be 1 for grayscale, 3 for RGB, or 4 for RGBA. 68 | 69 | Use the `min` and `max` parameters to put the values from the array into 70 | the range [0, 255], if not already: 71 | 72 | - `min`: should be the smallest value in the data; this will be mapped to 0. 73 | - `max`: should be the largest value in the data; will be mapped to 255. 74 | 75 | For example, if the range of the data in the multi-array is [-1, 1], use 76 | `min: -1, max: 1`. If the range is already [0, 255], then use the defaults. 77 | */ 78 | public func cgImage(min: Double = 0, 79 | max: Double = 255, 80 | channel: Int? = nil, 81 | axes: (Int, Int, Int)? = nil) -> CGImage? { 82 | switch self.dataType { 83 | case .double: 84 | return _image(min: min, max: max, channel: channel, axes: axes) 85 | case .float32: 86 | return _image(min: Float(min), max: Float(max), channel: channel, axes: axes) 87 | case .int32: 88 | return _image(min: Int32(min), max: Int32(max), channel: channel, axes: axes) 89 | @unknown default: 90 | fatalError("Unsupported data type \(dataType.rawValue)") 91 | } 92 | } 93 | 94 | /** 95 | Helper function that allows us to use generics. The type of `min` and `max` 96 | is also the dataType of the MLMultiArray. 97 | */ 98 | private func _image(min: T, 99 | max: T, 100 | channel: Int?, 101 | axes: (Int, Int, Int)?) -> CGImage? { 102 | if let (b, w, h, c) = toRawBytes(min: min, max: max, channel: channel, axes: axes) { 103 | if c == 1 { 104 | return CGImage.fromByteArrayGray(b, width: w, height: h) 105 | } else { 106 | return CGImage.fromByteArrayRGBA(b, width: w, height: h) 107 | } 108 | } 109 | return nil 110 | } 111 | 112 | /** 113 | Converts the multi-array into an array of RGBA or grayscale pixels. 114 | 115 | - Note: This is not particularly fast, but it is flexible. You can change 116 | the loops to convert the multi-array whichever way you please. 117 | 118 | - Note: The type of `min` and `max` must match the dataType of the 119 | MLMultiArray object. 120 | 121 | - Returns: tuple containing the RGBA bytes, the dimensions of the image, 122 | and the number of channels in the image (1, 3, or 4). 123 | */ 124 | public func toRawBytes(min: T, 125 | max: T, 126 | channel: Int? = nil, 127 | axes: (Int, Int, Int)? = nil) 128 | -> (bytes: [UInt8], width: Int, height: Int, channels: Int)? { 129 | // MLMultiArray with unsupported shape? 130 | if shape.count < 2 { 131 | print("Cannot convert MLMultiArray of shape \(shape) to image") 132 | return nil 133 | } 134 | 135 | // Figure out which dimensions to use for the channels, height, and width. 136 | let channelAxis: Int 137 | let heightAxis: Int 138 | let widthAxis: Int 139 | if let axes = axes { 140 | channelAxis = axes.0 141 | heightAxis = axes.1 142 | widthAxis = axes.2 143 | guard channelAxis >= 0 && channelAxis < shape.count && 144 | heightAxis >= 0 && heightAxis < shape.count && 145 | widthAxis >= 0 && widthAxis < shape.count else { 146 | print("Invalid axes \(axes) for shape \(shape)") 147 | return nil 148 | } 149 | } else if shape.count == 2 { 150 | // Expected shape for grayscale is (height, width) 151 | heightAxis = 0 152 | widthAxis = 1 153 | channelAxis = -1 // Never be used 154 | } else { 155 | // Expected shape for color is (channels, height, width) 156 | channelAxis = 0 157 | heightAxis = 1 158 | widthAxis = 2 159 | } 160 | 161 | let height = self.shape[heightAxis].intValue 162 | let width = self.shape[widthAxis].intValue 163 | let yStride = self.strides[heightAxis].intValue 164 | let xStride = self.strides[widthAxis].intValue 165 | 166 | let channels: Int 167 | let cStride: Int 168 | let bytesPerPixel: Int 169 | let channelOffset: Int 170 | 171 | // MLMultiArray with just two dimensions is always grayscale. (We ignore 172 | // the value of channelAxis here.) 173 | if shape.count == 2 { 174 | channels = 1 175 | cStride = 0 176 | bytesPerPixel = 1 177 | channelOffset = 0 178 | 179 | // MLMultiArray with more than two dimensions can be color or grayscale. 180 | } else { 181 | let channelDim = self.shape[channelAxis].intValue 182 | if let channel = channel { 183 | if channel < 0 || channel >= channelDim { 184 | print("Channel must be -1, or between 0 and \(channelDim - 1)") 185 | return nil 186 | } 187 | channels = 1 188 | bytesPerPixel = 1 189 | channelOffset = channel 190 | } else if channelDim == 1 { 191 | channels = 1 192 | bytesPerPixel = 1 193 | channelOffset = 0 194 | } else { 195 | if channelDim != 3 && channelDim != 4 { 196 | print("Expected channel dimension to have 1, 3, or 4 channels, got \(channelDim)") 197 | return nil 198 | } 199 | channels = channelDim 200 | bytesPerPixel = 4 201 | channelOffset = 0 202 | } 203 | cStride = self.strides[channelAxis].intValue 204 | } 205 | 206 | // Allocate storage for the RGBA or grayscale pixels. Set everything to 207 | // 255 so that alpha channel is filled in if only 3 channels. 208 | let count = height * width * bytesPerPixel 209 | var pixels = [UInt8](repeating: 255, count: count) 210 | 211 | // Grab the pointer to MLMultiArray's memory. 212 | var ptr = UnsafeMutablePointer(OpaquePointer(self.dataPointer)) 213 | ptr = ptr.advanced(by: channelOffset * cStride) 214 | 215 | // Loop through all the pixels and all the channels and copy them over. 216 | for c in 0.. CGImage? { 246 | assert(features.dataType == .float32) 247 | assert(features.shape.count == 3) 248 | 249 | let ptr = UnsafeMutablePointer(OpaquePointer(features.dataPointer)) 250 | 251 | let height = features.shape[1].intValue 252 | let width = features.shape[2].intValue 253 | let channelStride = features.strides[0].intValue 254 | let rowStride = features.strides[1].intValue 255 | let srcRowBytes = rowStride * MemoryLayout.stride 256 | 257 | var blueBuffer = vImage_Buffer(data: ptr, 258 | height: vImagePixelCount(height), 259 | width: vImagePixelCount(width), 260 | rowBytes: srcRowBytes) 261 | var greenBuffer = vImage_Buffer(data: ptr.advanced(by: channelStride), 262 | height: vImagePixelCount(height), 263 | width: vImagePixelCount(width), 264 | rowBytes: srcRowBytes) 265 | var redBuffer = vImage_Buffer(data: ptr.advanced(by: channelStride * 2), 266 | height: vImagePixelCount(height), 267 | width: vImagePixelCount(width), 268 | rowBytes: srcRowBytes) 269 | 270 | let destRowBytes = width * 4 271 | var pixels = [UInt8](repeating: 0, count: height * destRowBytes) 272 | var destBuffer = vImage_Buffer(data: &pixels, 273 | height: vImagePixelCount(height), 274 | width: vImagePixelCount(width), 275 | rowBytes: destRowBytes) 276 | 277 | let error = vImageConvert_PlanarFToBGRX8888(&blueBuffer, 278 | &greenBuffer, 279 | &redBuffer, 280 | Pixel_8(255), 281 | &destBuffer, 282 | [max, max, max], 283 | [min, min, min], 284 | vImage_Flags(0)) 285 | if error == kvImageNoError { 286 | return CGImage.fromByteArrayRGBA(pixels, width: width, height: height) 287 | } else { 288 | return nil 289 | } 290 | } 291 | 292 | #if canImport(UIKit) 293 | 294 | import UIKit 295 | 296 | extension MLMultiArray { 297 | public func image(min: Double = 0, 298 | max: Double = 255, 299 | channel: Int? = nil, 300 | axes: (Int, Int, Int)? = nil) -> UIImage? { 301 | let cgImg = cgImage(min: min, max: max, channel: channel, axes: axes) 302 | return cgImg.map { UIImage(cgImage: $0) } 303 | } 304 | } 305 | 306 | public func createUIImage(fromFloatArray features: MLMultiArray, 307 | min: Float = 0, 308 | max: Float = 255) -> UIImage? { 309 | let cgImg = createCGImage(fromFloatArray: features, min: min, max: max) 310 | return cgImg.map { UIImage(cgImage: $0) } 311 | } 312 | 313 | #endif 314 | -------------------------------------------------------------------------------- /Sources/VisualActionKit/CoreMLHelpers/Math.swift: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017-2019 M.I. Hollemans 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to 6 | deal in the Software without restriction, including without limitation the 7 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | sell copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | IN THE SOFTWARE. 21 | */ 22 | 23 | import Foundation 24 | import Accelerate 25 | 26 | /** 27 | Returns the index and value of the largest element in the array. 28 | 29 | - Parameters: 30 | - count: If provided, only look at the first `count` elements of the array, 31 | otherwise look at the entire array. 32 | */ 33 | public func argmax(_ array: [Float], count: Int? = nil) -> (Int, Float) { 34 | var maxValue: Float = 0 35 | var maxIndex: vDSP_Length = 0 36 | vDSP_maxvi(array, 1, &maxValue, &maxIndex, vDSP_Length(count ?? array.count)) 37 | return (Int(maxIndex), maxValue) 38 | } 39 | 40 | /** 41 | Returns the index and value of the largest element in the array. 42 | 43 | - Parameters: 44 | - ptr: Pointer to the first element in memory. 45 | - count: How many elements to look at. 46 | - stride: The distance between two elements in memory. 47 | */ 48 | public func argmax(_ ptr: UnsafePointer, count: Int, stride: Int = 1) -> (Int, Float) { 49 | var maxValue: Float = 0 50 | var maxIndex: vDSP_Length = 0 51 | vDSP_maxvi(ptr, vDSP_Stride(stride), &maxValue, &maxIndex, vDSP_Length(count)) 52 | return (Int(maxIndex), maxValue) 53 | } 54 | 55 | /** 56 | Returns the index and value of the largest element in the array. 57 | 58 | - Parameters: 59 | - count: If provided, only look at the first `count` elements of the array, 60 | otherwise look at the entire array. 61 | */ 62 | public func argmax(_ array: [Double], count: Int? = nil) -> (Int, Double) { 63 | var maxValue: Double = 0 64 | var maxIndex: vDSP_Length = 0 65 | vDSP_maxviD(array, 1, &maxValue, &maxIndex, vDSP_Length(count ?? array.count)) 66 | return (Int(maxIndex), maxValue) 67 | } 68 | 69 | /** 70 | Returns the index and value of the largest element in the array. 71 | 72 | - Parameters: 73 | - ptr: Pointer to the first element in memory. 74 | - count: How many elements to look at. 75 | - stride: The distance between two elements in memory. 76 | */ 77 | public func argmax(_ ptr: UnsafePointer, count: Int, stride: Int = 1) -> (Int, Double) { 78 | var maxValue: Double = 0 79 | var maxIndex: vDSP_Length = 0 80 | vDSP_maxviD(ptr, vDSP_Stride(stride), &maxValue, &maxIndex, vDSP_Length(count)) 81 | return (Int(maxIndex), maxValue) 82 | } 83 | 84 | /** Ensures that `x` is in the range `[min, max]`. */ 85 | public func clamp(_ x: T, min: T, max: T) -> T { 86 | if x < min { return min } 87 | if x > max { return max } 88 | return x 89 | } 90 | 91 | /** Logistic sigmoid. */ 92 | public func sigmoid(_ x: Float) -> Float { 93 | return 1 / (1 + exp(-x)) 94 | } 95 | 96 | /** Logistic sigmoid. */ 97 | public func sigmoid(_ x: Double) -> Double { 98 | return 1 / (1 + exp(-x)) 99 | } 100 | 101 | /* In-place logistic sigmoid: x = 1 / (1 + exp(-x)) */ 102 | public func sigmoid(_ x: UnsafeMutablePointer, count: Int) { 103 | vDSP_vneg(x, 1, x, 1, vDSP_Length(count)) 104 | var cnt = Int32(count) 105 | vvexpf(x, x, &cnt) 106 | var y: Float = 1 107 | vDSP_vsadd(x, 1, &y, x, 1, vDSP_Length(count)) 108 | vvrecf(x, x, &cnt) 109 | } 110 | 111 | /* In-place logistic sigmoid: x = 1 / (1 + exp(-x)) */ 112 | public func sigmoid(_ x: UnsafeMutablePointer, count: Int) { 113 | vDSP_vnegD(x, 1, x, 1, vDSP_Length(count)) 114 | var cnt = Int32(count) 115 | vvexp(x, x, &cnt) 116 | var y: Double = 1 117 | vDSP_vsaddD(x, 1, &y, x, 1, vDSP_Length(count)) 118 | vvrec(x, x, &cnt) 119 | } 120 | 121 | /** 122 | Computes the "softmax" function over an array. 123 | 124 | Based on code from https://github.com/nikolaypavlov/MLPNeuralNet/ 125 | 126 | This is what softmax looks like in "pseudocode" (actually using Python 127 | and numpy): 128 | 129 | x -= np.max(x) 130 | exp_scores = np.exp(x) 131 | softmax = exp_scores / np.sum(exp_scores) 132 | 133 | First we shift the values of x so that the highest value in the array is 0. 134 | This ensures numerical stability with the exponents, so they don't blow up. 135 | */ 136 | public func softmax(_ x: [Float]) -> [Float] { 137 | var x = x 138 | let len = vDSP_Length(x.count) 139 | 140 | // Find the maximum value in the input array. 141 | var max: Float = 0 142 | vDSP_maxv(x, 1, &max, len) 143 | 144 | // Subtract the maximum from all the elements in the array. 145 | // Now the highest value in the array is 0. 146 | max = -max 147 | vDSP_vsadd(x, 1, &max, &x, 1, len) 148 | 149 | // Exponentiate all the elements in the array. 150 | var count = Int32(x.count) 151 | vvexpf(&x, x, &count) 152 | 153 | // Compute the sum of all exponentiated values. 154 | var sum: Float = 0 155 | vDSP_sve(x, 1, &sum, len) 156 | 157 | // Divide each element by the sum. This normalizes the array contents 158 | // so that they all add up to 1. 159 | vDSP_vsdiv(x, 1, &sum, &x, 1, len) 160 | 161 | return x 162 | } 163 | -------------------------------------------------------------------------------- /Sources/VisualActionKit/CoreMLHelpers/MultiArray.swift: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017-2019 M.I. Hollemans 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to 6 | deal in the Software without restriction, including without limitation the 7 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | sell copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | IN THE SOFTWARE. 21 | */ 22 | 23 | import Foundation 24 | import CoreML 25 | import Swift 26 | 27 | /** 28 | Wrapper around MLMultiArray to make it more Swifty. 29 | */ 30 | public struct MultiArray { 31 | public let array: MLMultiArray 32 | public let pointer: UnsafeMutablePointer 33 | 34 | private(set) public var strides: [Int] 35 | private(set) public var shape: [Int] 36 | 37 | /** 38 | Creates a new multi-array filled with all zeros. 39 | */ 40 | public init(shape: [Int]) { 41 | let m = try! MLMultiArray(shape: shape as [NSNumber], dataType: T.multiArrayDataType) 42 | self.init(m) 43 | memset(pointer, 0, MemoryLayout.stride * count) 44 | } 45 | 46 | /** 47 | Creates a new multi-array initialized with the specified value. 48 | */ 49 | public init(shape: [Int], initial: T) { 50 | self.init(shape: shape) 51 | for i in 0..(OpaquePointer(array.dataPointer)) 68 | } 69 | 70 | /** 71 | Returns the number of elements in the entire array. 72 | */ 73 | public var count: Int { 74 | return shape.reduce(1, *) 75 | } 76 | 77 | public subscript(a: Int) -> T { 78 | get { return pointer[a] } 79 | set { pointer[a] = newValue } 80 | } 81 | 82 | public subscript(a: Int, b: Int) -> T { 83 | get { return pointer[a*strides[0] + b*strides[1]] } 84 | set { pointer[a*strides[0] + b*strides[1]] = newValue } 85 | } 86 | 87 | public subscript(a: Int, b: Int, c: Int) -> T { 88 | get { return pointer[a*strides[0] + b*strides[1] + c*strides[2]] } 89 | set { pointer[a*strides[0] + b*strides[1] + c*strides[2]] = newValue } 90 | } 91 | 92 | public subscript(a: Int, b: Int, c: Int, d: Int) -> T { 93 | get { return pointer[a*strides[0] + b*strides[1] + c*strides[2] + d*strides[3]] } 94 | set { pointer[a*strides[0] + b*strides[1] + c*strides[2] + d*strides[3]] = newValue } 95 | } 96 | 97 | public subscript(a: Int, b: Int, c: Int, d: Int, e: Int) -> T { 98 | get { return pointer[a*strides[0] + b*strides[1] + c*strides[2] + d*strides[3] + e*strides[4]] } 99 | set { pointer[a*strides[0] + b*strides[1] + c*strides[2] + d*strides[3] + e*strides[4]] = newValue } 100 | } 101 | 102 | public subscript(indices: [Int]) -> T { 103 | get { return pointer[offset(for: indices)] } 104 | set { pointer[offset(for: indices)] = newValue } 105 | } 106 | 107 | func offset(for indices: [Int]) -> Int { 108 | var offset = 0 109 | for i in 0.. MultiArray { 120 | precondition(order.count == strides.count) 121 | var newShape = shape 122 | var newStrides = strides 123 | for i in 0.. MultiArray { 134 | let newCount = dimensions.reduce(1, *) 135 | precondition(newCount == count, "Cannot reshape \(shape) to \(dimensions)") 136 | 137 | var newStrides = [Int](repeating: 0, count: dimensions.count) 138 | newStrides[dimensions.count - 1] = 1 139 | for i in stride(from: dimensions.count - 1, to: 0, by: -1) { 140 | newStrides[i - 1] = newStrides[i] * dimensions[i] 141 | } 142 | 143 | return MultiArray(array, dimensions, newStrides) 144 | } 145 | } 146 | 147 | extension MultiArray: CustomStringConvertible { 148 | public var description: String { 149 | return description([]) 150 | } 151 | 152 | func description(_ indices: [Int]) -> String { 153 | func indent(_ x: Int) -> String { 154 | return String(repeating: " ", count: x) 155 | } 156 | 157 | // This function is called recursively for every dimension. 158 | // Add an entry for this dimension to the end of the array. 159 | var indices = indices + [0] 160 | 161 | let d = indices.count - 1 // the current dimension 162 | let N = shape[d] // how many elements in this dimension 163 | 164 | var s = "[" 165 | if indices.count < shape.count { // not last dimension yet? 166 | for i in 0.. (bytes: [UInt8], width: Int, height: Int)? { 200 | guard shape.count == 3 else { 201 | print("Expected a multi-array with 3 dimensions, got \(shape)") 202 | return nil 203 | } 204 | guard shape[0] == 3 else { 205 | print("Expected first dimension to have 3 channels, got \(shape[0])") 206 | return nil 207 | } 208 | 209 | let height = shape[1] 210 | let width = shape[2] 211 | var bytes = [UInt8](repeating: 0, count: height * width * 4) 212 | 213 | for h in 0.. (bytes: [UInt8], width: Int, height: Int)? { 237 | guard shape.count == 2 else { 238 | print("Expected a multi-array with 2 dimensions, got \(shape)") 239 | return nil 240 | } 241 | 242 | let height = shape[0] 243 | let width = shape[1] 244 | var bytes = [UInt8](repeating: 0, count: height * width) 245 | 246 | for h in 0.. CGImage? { 269 | if shape.count == 3, let (b, w, h) = toRawBytesRGBA(offset: offset, scale: scale) { 270 | return CGImage.fromByteArrayRGBA(b, width: w, height: h) 271 | } else if shape.count == 2, let (b, w, h) = toRawBytesGray(offset: offset, scale: scale) { 272 | return CGImage.fromByteArrayGray(b, width: w, height: h) 273 | } else { 274 | return nil 275 | } 276 | } 277 | 278 | /** 279 | Converts a single channel from the multi-array to a grayscale CGImage. 280 | 281 | - Note: The multi-array must have shape (channels, height, width). If your 282 | array has a different shape, use `reshape()` or `transpose()` first. 283 | */ 284 | public func cgImage(channel: Int, offset: T, scale: T) -> CGImage? { 285 | guard shape.count == 3 else { 286 | print("Expected a multi-array with 3 dimensions, got \(shape)") 287 | return nil 288 | } 289 | guard channel >= 0 && channel < shape[0] else { 290 | print("Channel must be between 0 and \(shape[0] - 1)") 291 | return nil 292 | } 293 | 294 | let height = shape[1] 295 | let width = shape[2] 296 | var a = MultiArray(shape: [height, width]) 297 | for y in 0.. UIImage? { 322 | let cgImg = cgImage(offset: offset, scale: scale) 323 | return cgImg.map { UIImage(cgImage: $0) } 324 | } 325 | 326 | /** 327 | Converts a single channel from the multi-array to a grayscale UIImage. 328 | 329 | - Note: The multi-array must have shape (channels, height, width). If your 330 | array has a different shape, use `reshape()` or `transpose()` first. 331 | */ 332 | public func image(channel: Int, offset: T, scale: T) -> UIImage? { 333 | let cgImg = cgImage(channel: channel, offset: offset, scale: scale) 334 | return cgImg.map { UIImage(cgImage: $0) } 335 | } 336 | } 337 | 338 | #endif 339 | -------------------------------------------------------------------------------- /Sources/VisualActionKit/CoreMLHelpers/NonMaxSuppression.swift: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017-2019 M.I. Hollemans 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to 6 | deal in the Software without restriction, including without limitation the 7 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | sell copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | IN THE SOFTWARE. 21 | */ 22 | 23 | import Foundation 24 | import Accelerate 25 | 26 | public struct BoundingBox { 27 | /** Index of the predicted class. */ 28 | public let classIndex: Int 29 | 30 | /** Confidence score. */ 31 | public let score: Float 32 | 33 | /** Normalized coordinates between 0 and 1. */ 34 | public let rect: CGRect 35 | 36 | public init(classIndex: Int, score: Float, rect: CGRect) { 37 | self.classIndex = classIndex 38 | self.score = score 39 | self.rect = rect 40 | } 41 | } 42 | 43 | /** 44 | Computes intersection-over-union overlap between two bounding boxes. 45 | */ 46 | public func IOU(_ a: CGRect, _ b: CGRect) -> Float { 47 | let areaA = a.width * a.height 48 | if areaA <= 0 { return 0 } 49 | 50 | let areaB = b.width * b.height 51 | if areaB <= 0 { return 0 } 52 | 53 | let intersectionMinX = max(a.minX, b.minX) 54 | let intersectionMinY = max(a.minY, b.minY) 55 | let intersectionMaxX = min(a.maxX, b.maxX) 56 | let intersectionMaxY = min(a.maxY, b.maxY) 57 | let intersectionArea = max(intersectionMaxY - intersectionMinY, 0) * 58 | max(intersectionMaxX - intersectionMinX, 0) 59 | return Float(intersectionArea / (areaA + areaB - intersectionArea)) 60 | } 61 | 62 | /** 63 | Removes bounding boxes that overlap too much with other boxes that have 64 | a higher score. 65 | */ 66 | public func nonMaxSuppression(boundingBoxes: [BoundingBox], 67 | iouThreshold: Float, 68 | maxBoxes: Int) -> [Int] { 69 | return nonMaxSuppression(boundingBoxes: boundingBoxes, 70 | indices: Array(boundingBoxes.indices), 71 | iouThreshold: iouThreshold, 72 | maxBoxes: maxBoxes) 73 | } 74 | 75 | /** 76 | Removes bounding boxes that overlap too much with other boxes that have 77 | a higher score. 78 | 79 | Based on code from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/non_max_suppression_op.cc 80 | 81 | - Note: This version of NMS ignores the class of the bounding boxes. Since it 82 | selects the bounding boxes in a greedy fashion, if a certain class has many 83 | boxes that are selected, then it is possible none of the boxes of the other 84 | classes get selected. 85 | 86 | - Parameters: 87 | - boundingBoxes: an array of bounding boxes and their scores 88 | - indices: which predictions to look at 89 | - iouThreshold: used to decide whether boxes overlap too much 90 | - maxBoxes: the maximum number of boxes that will be selected 91 | 92 | - Returns: the array indices of the selected bounding boxes 93 | */ 94 | public func nonMaxSuppression(boundingBoxes: [BoundingBox], 95 | indices: [Int], 96 | iouThreshold: Float, 97 | maxBoxes: Int) -> [Int] { 98 | 99 | // Sort the boxes based on their confidence scores, from high to low. 100 | let sortedIndices = indices.sorted { boundingBoxes[$0].score > boundingBoxes[$1].score } 101 | 102 | var selected: [Int] = [] 103 | 104 | // Loop through the bounding boxes, from highest score to lowest score, 105 | // and determine whether or not to keep each box. 106 | for i in 0..= maxBoxes { break } 108 | 109 | var shouldSelect = true 110 | let boxA = boundingBoxes[sortedIndices[i]] 111 | 112 | // Does the current box overlap one of the selected boxes more than the 113 | // given threshold amount? Then it's too similar, so don't keep it. 114 | for j in 0.. iouThreshold { 117 | shouldSelect = false 118 | break 119 | } 120 | } 121 | 122 | // This bounding box did not overlap too much with any previously selected 123 | // bounding box, so we'll keep it. 124 | if shouldSelect { 125 | selected.append(sortedIndices[i]) 126 | } 127 | } 128 | 129 | return selected 130 | } 131 | 132 | /** 133 | Multi-class version of non maximum suppression. 134 | 135 | Where `nonMaxSuppression()` does not look at the class of the predictions at 136 | all, the multi-class version first selects the best bounding boxes for each 137 | class, and then keeps the best ones of those. 138 | 139 | With this method you can usually expect to see at least one bounding box for 140 | each class (unless all the scores for a given class are really low). 141 | 142 | Based on code from: https://github.com/tensorflow/models/blob/master/object_detection/core/post_processing.py 143 | 144 | - Parameters: 145 | - numClasses: the number of classes 146 | - boundingBoxes: an array of bounding boxes and their scores 147 | - scoreThreshold: used to only keep bounding boxes with a high enough score 148 | - iouThreshold: used to decide whether boxes overlap too much 149 | - maxPerClass: the maximum number of boxes that will be selected per class 150 | - maxTotal: maximum number of boxes that will be selected over all classes 151 | 152 | - Returns: the array indices of the selected bounding boxes 153 | */ 154 | public func nonMaxSuppressionMultiClass(numClasses: Int, 155 | boundingBoxes: [BoundingBox], 156 | scoreThreshold: Float, 157 | iouThreshold: Float, 158 | maxPerClass: Int, 159 | maxTotal: Int) -> [Int] { 160 | var selectedBoxes: [Int] = [] 161 | 162 | // Look at all the classes one-by-one. 163 | for c in 0.. scoreThreshold { 173 | filteredBoxes.append(p) 174 | } 175 | } 176 | } 177 | 178 | // Only keep the best bounding boxes for this class. 179 | let nmsBoxes = nonMaxSuppression(boundingBoxes: boundingBoxes, 180 | indices: filteredBoxes, 181 | iouThreshold: iouThreshold, 182 | maxBoxes: maxPerClass) 183 | 184 | // Add the indices of the surviving boxes to the big list. 185 | selectedBoxes.append(contentsOf: nmsBoxes) 186 | } 187 | 188 | // Sort all the surviving boxes by score and only keep the best ones. 189 | let sortedBoxes = selectedBoxes.sorted { boundingBoxes[$0].score > boundingBoxes[$1].score } 190 | return Array(sortedBoxes.prefix(maxTotal)) 191 | } 192 | -------------------------------------------------------------------------------- /Sources/VisualActionKit/CoreMLHelpers/Predictions.swift: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017-2019 M.I. Hollemans 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to 6 | deal in the Software without restriction, including without limitation the 7 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | sell copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | IN THE SOFTWARE. 21 | */ 22 | 23 | import Vision 24 | 25 | /** 26 | Returns the top `k` predictions from Core ML classification results as an 27 | array of `(String, Double)` pairs. 28 | */ 29 | public func top(_ k: Int, _ prob: [String: Double]) -> [(String, Double)] { 30 | return Array(prob.map { x in (x.key, x.value) } 31 | .sorted(by: { a, b -> Bool in a.1 > b.1 }) 32 | .prefix(through: min(k, prob.count) - 1)) 33 | } 34 | 35 | /** 36 | Returns the top `k` predictions from Vision classification results as an 37 | array of `(String, Double)` pairs. 38 | */ 39 | public func top(_ k: Int, _ observations: [VNClassificationObservation]) -> [(String, Double)] { 40 | // The Vision observations are sorted by confidence already. 41 | return observations.prefix(through: min(k, observations.count) - 1) 42 | .map { ($0.identifier, Double($0.confidence)) } 43 | } 44 | -------------------------------------------------------------------------------- /Sources/VisualActionKit/CoreMLHelpers/UIImage+CVPixelBuffer.swift: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017-2019 M.I. Hollemans 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to 6 | deal in the Software without restriction, including without limitation the 7 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | sell copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | IN THE SOFTWARE. 21 | */ 22 | 23 | #if canImport(UIKit) 24 | 25 | import UIKit 26 | import VideoToolbox 27 | 28 | extension UIImage { 29 | /** 30 | Resizes the image to width x height and converts it to an RGB CVPixelBuffer. 31 | */ 32 | public func pixelBuffer(width: Int, height: Int) -> CVPixelBuffer? { 33 | return pixelBuffer(width: width, height: height, 34 | pixelFormatType: kCVPixelFormatType_32ARGB, 35 | colorSpace: CGColorSpaceCreateDeviceRGB(), 36 | alphaInfo: .noneSkipFirst) 37 | } 38 | 39 | /** 40 | Resizes the image to width x height and converts it to a grayscale CVPixelBuffer. 41 | */ 42 | public func pixelBufferGray(width: Int, height: Int) -> CVPixelBuffer? { 43 | return pixelBuffer(width: width, height: height, 44 | pixelFormatType: kCVPixelFormatType_OneComponent8, 45 | colorSpace: CGColorSpaceCreateDeviceGray(), 46 | alphaInfo: .none) 47 | } 48 | 49 | func pixelBuffer(width: Int, height: Int, pixelFormatType: OSType, 50 | colorSpace: CGColorSpace, alphaInfo: CGImageAlphaInfo) -> CVPixelBuffer? { 51 | var maybePixelBuffer: CVPixelBuffer? 52 | let attrs = [kCVPixelBufferCGImageCompatibilityKey: kCFBooleanTrue, 53 | kCVPixelBufferCGBitmapContextCompatibilityKey: kCFBooleanTrue] 54 | let status = CVPixelBufferCreate(kCFAllocatorDefault, 55 | width, 56 | height, 57 | pixelFormatType, 58 | attrs as CFDictionary, 59 | &maybePixelBuffer) 60 | 61 | guard status == kCVReturnSuccess, let pixelBuffer = maybePixelBuffer else { 62 | return nil 63 | } 64 | 65 | let flags = CVPixelBufferLockFlags(rawValue: 0) 66 | guard kCVReturnSuccess == CVPixelBufferLockBaseAddress(pixelBuffer, flags) else { 67 | return nil 68 | } 69 | defer { CVPixelBufferUnlockBaseAddress(pixelBuffer, flags) } 70 | 71 | guard let context = CGContext(data: CVPixelBufferGetBaseAddress(pixelBuffer), 72 | width: width, 73 | height: height, 74 | bitsPerComponent: 8, 75 | bytesPerRow: CVPixelBufferGetBytesPerRow(pixelBuffer), 76 | space: colorSpace, 77 | bitmapInfo: alphaInfo.rawValue) 78 | else { 79 | return nil 80 | } 81 | 82 | UIGraphicsPushContext(context) 83 | context.translateBy(x: 0, y: CGFloat(height)) 84 | context.scaleBy(x: 1, y: -1) 85 | self.draw(in: CGRect(x: 0, y: 0, width: width, height: height)) 86 | UIGraphicsPopContext() 87 | 88 | return pixelBuffer 89 | } 90 | } 91 | 92 | extension UIImage { 93 | /** 94 | Creates a new UIImage from a CVPixelBuffer. 95 | 96 | - Note: Not all CVPixelBuffer pixel formats support conversion into a 97 | CGImage-compatible pixel format. 98 | */ 99 | public convenience init?(pixelBuffer: CVPixelBuffer) { 100 | if let cgImage = CGImage.create(pixelBuffer: pixelBuffer) { 101 | self.init(cgImage: cgImage) 102 | } else { 103 | return nil 104 | } 105 | } 106 | 107 | /* 108 | // Alternative implementation: 109 | public convenience init?(pixelBuffer: CVPixelBuffer) { 110 | // This converts the image to a CIImage first and then to a UIImage. 111 | // Does not appear to work on the simulator but is OK on the device. 112 | self.init(ciImage: CIImage(cvPixelBuffer: pixelBuffer)) 113 | } 114 | */ 115 | 116 | /** 117 | Creates a new UIImage from a CVPixelBuffer, using a Core Image context. 118 | */ 119 | public convenience init?(pixelBuffer: CVPixelBuffer, context: CIContext) { 120 | if let cgImage = CGImage.create(pixelBuffer: pixelBuffer, context: context) { 121 | self.init(cgImage: cgImage) 122 | } else { 123 | return nil 124 | } 125 | } 126 | } 127 | 128 | #endif 129 | -------------------------------------------------------------------------------- /Sources/VisualActionKit/CoreMLHelpers/UIImage+Extensions.swift: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017-2019 M.I. Hollemans 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to 6 | deal in the Software without restriction, including without limitation the 7 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | sell copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | IN THE SOFTWARE. 21 | */ 22 | 23 | #if canImport(UIKit) 24 | 25 | import UIKit 26 | 27 | extension UIImage { 28 | /** 29 | Resizes the image. 30 | 31 | - Parameters: 32 | - scale: If this is 1, `newSize` is the size in pixels. 33 | */ 34 | @nonobjc public func resized(to newSize: CGSize, scale: CGFloat = 1) -> UIImage { 35 | let format = UIGraphicsImageRendererFormat.default() 36 | format.scale = scale 37 | let renderer = UIGraphicsImageRenderer(size: newSize, format: format) 38 | let image = renderer.image { _ in 39 | draw(in: CGRect(origin: .zero, size: newSize)) 40 | } 41 | return image 42 | } 43 | } 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /Sources/VisualActionKit/CoreMLHelpers/UIImage+RawBytes.swift: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017-2019 M.I. Hollemans 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to 6 | deal in the Software without restriction, including without limitation the 7 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | sell copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | IN THE SOFTWARE. 21 | */ 22 | 23 | #if canImport(UIKit) 24 | 25 | import UIKit 26 | 27 | extension UIImage { 28 | /** 29 | Converts the image into an array of RGBA bytes. 30 | */ 31 | @nonobjc public func toByteArrayRGBA() -> [UInt8]? { 32 | return cgImage?.toByteArrayRGBA() 33 | } 34 | 35 | /** 36 | Creates a new UIImage from an array of RGBA bytes. 37 | */ 38 | @nonobjc public class func fromByteArrayRGBA(_ bytes: [UInt8], 39 | width: Int, 40 | height: Int, 41 | scale: CGFloat = 0, 42 | orientation: UIImage.Orientation = .up) -> UIImage? { 43 | if let cgImage = CGImage.fromByteArrayRGBA(bytes, width: width, height: height) { 44 | return UIImage(cgImage: cgImage, scale: scale, orientation: orientation) 45 | } else { 46 | return nil 47 | } 48 | } 49 | 50 | /** 51 | Creates a new UIImage from an array of grayscale bytes. 52 | */ 53 | @nonobjc public class func fromByteArrayGray(_ bytes: [UInt8], 54 | width: Int, 55 | height: Int, 56 | scale: CGFloat = 0, 57 | orientation: UIImage.Orientation = .up) -> UIImage? { 58 | if let cgImage = CGImage.fromByteArrayGray(bytes, width: width, height: height) { 59 | return UIImage(cgImage: cgImage, scale: scale, orientation: orientation) 60 | } else { 61 | return nil 62 | } 63 | } 64 | } 65 | 66 | #endif 67 | -------------------------------------------------------------------------------- /Sources/VisualActionKit/Kinetics.mlmodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lukereichold/VisualActionKit/22ba695054f115ce9ca111714f710f6c5a2ad34f/Sources/VisualActionKit/Kinetics.mlmodel -------------------------------------------------------------------------------- /Sources/VisualActionKit/Kinetics.swift: -------------------------------------------------------------------------------- 1 | import CoreML 2 | 3 | @available(macOS 10.16, iOS 14.0, tvOS 14.0, watchOS 7.0, *) 4 | class KineticsInput : MLFeatureProvider { 5 | var Placeholder: MLMultiArray 6 | 7 | var featureNames: Set { 8 | get { 9 | return ["Placeholder"] 10 | } 11 | } 12 | 13 | func featureValue(for featureName: String) -> MLFeatureValue? { 14 | if (featureName == "Placeholder") { 15 | return MLFeatureValue(multiArray: Placeholder) 16 | } 17 | return nil 18 | } 19 | 20 | init(Placeholder: MLMultiArray) { 21 | self.Placeholder = Placeholder 22 | } 23 | } 24 | 25 | @available(macOS 10.16, iOS 14.0, tvOS 14.0, watchOS 7.0, *) 26 | class KineticsOutput : MLFeatureProvider { 27 | 28 | private let provider : MLFeatureProvider 29 | 30 | lazy var Softmax: [String : Double] = { 31 | [unowned self] in return self.provider.featureValue(for: "Softmax")!.dictionaryValue as! [String : Double] 32 | }() 33 | lazy var classLabel: String = { 34 | [unowned self] in return self.provider.featureValue(for: "classLabel")!.stringValue 35 | }() 36 | 37 | var featureNames: Set { 38 | return self.provider.featureNames 39 | } 40 | 41 | func featureValue(for featureName: String) -> MLFeatureValue? { 42 | return self.provider.featureValue(for: featureName) 43 | } 44 | 45 | init(Softmax: [String : Double], classLabel: String) { 46 | self.provider = try! MLDictionaryFeatureProvider(dictionary: ["Softmax" : MLFeatureValue(dictionary: Softmax as [AnyHashable : NSNumber]), "classLabel" : MLFeatureValue(string: classLabel)]) 47 | } 48 | 49 | init(features: MLFeatureProvider) { 50 | self.provider = features 51 | } 52 | } 53 | 54 | @available(macOS 10.16, iOS 14.0, tvOS 14.0, watchOS 7.0, *) 55 | class Kinetics { 56 | let model: MLModel 57 | class var urlOfModelInThisBundle : URL { 58 | let bundle = Bundle(for: self) 59 | return bundle.url(forResource: "Kinetics", withExtension:"mlmodelc")! 60 | } 61 | init(model: MLModel) { 62 | self.model = model 63 | } 64 | @available(*, deprecated, message: "Use init(configuration:) instead and handle errors appropriately.") 65 | convenience init() { 66 | try! self.init(contentsOf: type(of:self).urlOfModelInThisBundle) 67 | } 68 | convenience init(configuration: MLModelConfiguration) throws { 69 | try self.init(contentsOf: type(of:self).urlOfModelInThisBundle, configuration: configuration) 70 | } 71 | convenience init(contentsOf modelURL: URL) throws { 72 | try self.init(model: MLModel(contentsOf: modelURL)) 73 | } 74 | convenience init(contentsOf modelURL: URL, configuration: MLModelConfiguration) throws { 75 | try self.init(model: MLModel(contentsOf: modelURL, configuration: configuration)) 76 | } 77 | class func load(configuration: MLModelConfiguration = MLModelConfiguration(), completionHandler handler: @escaping (Result) -> Void) { 78 | return self.load(contentsOf: self.urlOfModelInThisBundle, configuration: configuration, completionHandler: handler) 79 | } 80 | class func load(contentsOf modelURL: URL, configuration: MLModelConfiguration = MLModelConfiguration(), completionHandler handler: @escaping (Result) -> Void) { 81 | MLModel.__loadContents(of: modelURL, configuration: configuration) { (model, error) in 82 | if let error = error { 83 | handler(.failure(error)) 84 | } else if let model = model { 85 | handler(.success(Kinetics(model: model))) 86 | } else { 87 | fatalError("SPI failure: -[MLModel loadContentsOfURL:configuration::completionHandler:] vends nil for both model and error.") 88 | } 89 | } 90 | } 91 | func prediction(input: KineticsInput) throws -> KineticsOutput { 92 | return try self.prediction(input: input, options: MLPredictionOptions()) 93 | } 94 | func prediction(input: KineticsInput, options: MLPredictionOptions) throws -> KineticsOutput { 95 | let outFeatures = try model.prediction(from: input, options:options) 96 | return KineticsOutput(features: outFeatures) 97 | } 98 | func prediction(Placeholder: MLMultiArray) throws -> KineticsOutput { 99 | let input_ = KineticsInput(Placeholder: Placeholder) 100 | return try self.prediction(input: input_) 101 | } 102 | func predictions(inputs: [KineticsInput], options: MLPredictionOptions = MLPredictionOptions()) throws -> [KineticsOutput] { 103 | let batchIn = MLArrayBatchProvider(array: inputs) 104 | let batchOut = try model.predictions(from: batchIn, options: options) 105 | var results : [KineticsOutput] = [] 106 | results.reserveCapacity(inputs.count) 107 | for i in 0.. [XCTestCaseEntry] { 5 | return [ 6 | testCase(VisualActionKitTests.allTests), 7 | ] 8 | } 9 | #endif 10 | --------------------------------------------------------------------------------