├── .gitignore ├── README.md ├── SwiftMetalProcessing.xcodeproj ├── project.pbxproj └── project.xcworkspace │ └── contents.xcworkspacedata ├── SwiftMetalProcessing ├── AppDelegate.swift ├── Base.lproj │ ├── LaunchScreen.xib │ └── Main.storyboard ├── Images.xcassets │ └── AppIcon.appiconset │ │ └── Contents.json ├── Info.plist ├── Shaders.metal └── ViewController.swift ├── SwiftMetalProcessingTests ├── Info.plist └── SwiftMetalProcessingTests.swift └── simplebenchmark ├── accelerate_metal_cpu_benchmark_ipadair.tsv ├── accelerate_metal_cpu_benchmark_ipadair.txt ├── accelerate_metal_cpu_benchmark_ipadminia7retina.tsv ├── accelerate_metal_cpu_benchmark_ipadminia7retina.txt ├── accelerate_metal_cpu_benchmark_iphone5s.tsv ├── accelerate_metal_cpu_benchmark_iphone5s.txt ├── accelerate_metal_cpu_benchmark_iphone6.tsv ├── accelerate_metal_cpu_benchmark_iphone6.txt ├── accelerate_metal_cpu_benchmark_iphone6_only_metal_withoutloading.tsv ├── accelerate_metal_cpu_benchmark_iphone6_only_metal_withoutloading.txt ├── analyze_with_pandas.py ├── benchmark.txt ├── loop_benchmark.txt ├── numpy_similar_benchmark.py └── process.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Xcode 2 | # 3 | build/ 4 | *.pbxuser 5 | !default.pbxuser 6 | *.mode1v3 7 | !default.mode1v3 8 | *.mode2v3 9 | !default.mode2v3 10 | *.perspectivev3 11 | !default.perspectivev3 12 | xcuserdata 13 | *.xccheckout 14 | *.moved-aside 15 | DerivedData 16 | *.hmap 17 | *.ipa 18 | *.xcuserstate 19 | 20 | # CocoaPods 21 | # 22 | # We recommend against adding the Pods directory to your .gitignore. However 23 | # you should judge for yourself, the pros and cons are mentioned at: 24 | # http://guides.cocoapods.org/using/using-cocoapods.html#should-i-ignore-the-pods-directory-in-source-control 25 | # 26 | # Pods/ 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | SwiftMetalGPUParallelProcessing 2 | =============================== 3 | 4 | Data Parallel Processing with Swift and Metal on GPU for iOS8 (and beyond) 5 | 6 | Metal is an alternative to OpenGL for graphics processing, but for general data-parallel programming for GPUs it is an alternative to OpenCL and Cuda. This (simple) example shows how to use Metal with Swift for calculating the Sigmoid function (Sigmoid function is frequently occurring in machine learning settings, e.g. for Deep Learning and Kernel Methods/Support Vector Machines). 7 | 8 | If you want to read up on Metal I recommend having a look at https://developer.apple.com/metal/ (Metal Programming Guide, Metal Shading Language and Metal Framework Reference) 9 | 10 | See http://memkite.com/blog/2014/12/15/data-parallel-programming-with-metal-and-swift-for-iphoneipad-gpu/ for a blog post describing this code. 11 | 12 | The code is in the ViewController.swift and Shaders.metal - direct links: 13 | 14 | https://github.com/atveit/SwiftMetalGPUParallelProcessing/blob/master/SwiftMetalProcessing/ViewController.swift 15 | 16 | https://github.com/atveit/SwiftMetalGPUParallelProcessing/blob/master/SwiftMetalProcessing/Shaders.metal 17 | -------------------------------------------------------------------------------- /SwiftMetalProcessing.xcodeproj/project.pbxproj: -------------------------------------------------------------------------------- 1 | // !$*UTF8*$! 2 | { 3 | archiveVersion = 1; 4 | classes = { 5 | }; 6 | objectVersion = 46; 7 | objects = { 8 | 9 | /* Begin PBXBuildFile section */ 10 | 6704E3681A3EDD7200E14BD6 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6704E3671A3EDD7200E14BD6 /* AppDelegate.swift */; }; 11 | 6704E36A1A3EDD7200E14BD6 /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6704E3691A3EDD7200E14BD6 /* ViewController.swift */; }; 12 | 6704E36D1A3EDD7200E14BD6 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 6704E36B1A3EDD7200E14BD6 /* Main.storyboard */; }; 13 | 6704E36F1A3EDD7200E14BD6 /* Images.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 6704E36E1A3EDD7200E14BD6 /* Images.xcassets */; }; 14 | 6704E3721A3EDD7200E14BD6 /* LaunchScreen.xib in Resources */ = {isa = PBXBuildFile; fileRef = 6704E3701A3EDD7200E14BD6 /* LaunchScreen.xib */; }; 15 | 6704E37E1A3EDD7200E14BD6 /* SwiftMetalProcessingTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6704E37D1A3EDD7200E14BD6 /* SwiftMetalProcessingTests.swift */; }; 16 | 6704E3881A3EDDBA00E14BD6 /* Metal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 6704E3871A3EDDBA00E14BD6 /* Metal.framework */; }; 17 | 6704E38A1A3EDE4600E14BD6 /* UIKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 6704E3891A3EDE4600E14BD6 /* UIKit.framework */; }; 18 | 6704E38C1A3EDE4C00E14BD6 /* QuartzCore.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 6704E38B1A3EDE4C00E14BD6 /* QuartzCore.framework */; }; 19 | 6704E38E1A3EDEA100E14BD6 /* Shaders.metal in Sources */ = {isa = PBXBuildFile; fileRef = 6704E38D1A3EDEA100E14BD6 /* Shaders.metal */; }; 20 | 6704E38F1A3EDEA100E14BD6 /* Shaders.metal in Sources */ = {isa = PBXBuildFile; fileRef = 6704E38D1A3EDEA100E14BD6 /* Shaders.metal */; }; 21 | /* End PBXBuildFile section */ 22 | 23 | /* Begin PBXContainerItemProxy section */ 24 | 6704E3781A3EDD7200E14BD6 /* PBXContainerItemProxy */ = { 25 | isa = PBXContainerItemProxy; 26 | containerPortal = 6704E35A1A3EDD7200E14BD6 /* Project object */; 27 | proxyType = 1; 28 | remoteGlobalIDString = 6704E3611A3EDD7200E14BD6; 29 | remoteInfo = SwiftMetalProcessing; 30 | }; 31 | /* End PBXContainerItemProxy section */ 32 | 33 | /* Begin PBXFileReference section */ 34 | 6704E3621A3EDD7200E14BD6 /* SwiftMetalProcessing.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = SwiftMetalProcessing.app; sourceTree = BUILT_PRODUCTS_DIR; }; 35 | 6704E3661A3EDD7200E14BD6 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; 36 | 6704E3671A3EDD7200E14BD6 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = ""; }; 37 | 6704E3691A3EDD7200E14BD6 /* ViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ViewController.swift; sourceTree = ""; }; 38 | 6704E36C1A3EDD7200E14BD6 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = ""; }; 39 | 6704E36E1A3EDD7200E14BD6 /* Images.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Images.xcassets; sourceTree = ""; }; 40 | 6704E3711A3EDD7200E14BD6 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.xib; name = Base; path = Base.lproj/LaunchScreen.xib; sourceTree = ""; }; 41 | 6704E3771A3EDD7200E14BD6 /* SwiftMetalProcessingTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = SwiftMetalProcessingTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; 42 | 6704E37C1A3EDD7200E14BD6 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; 43 | 6704E37D1A3EDD7200E14BD6 /* SwiftMetalProcessingTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SwiftMetalProcessingTests.swift; sourceTree = ""; }; 44 | 6704E3871A3EDDBA00E14BD6 /* Metal.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Metal.framework; path = System/Library/Frameworks/Metal.framework; sourceTree = SDKROOT; }; 45 | 6704E3891A3EDE4600E14BD6 /* UIKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = UIKit.framework; path = System/Library/Frameworks/UIKit.framework; sourceTree = SDKROOT; }; 46 | 6704E38B1A3EDE4C00E14BD6 /* QuartzCore.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = QuartzCore.framework; path = System/Library/Frameworks/QuartzCore.framework; sourceTree = SDKROOT; }; 47 | 6704E38D1A3EDEA100E14BD6 /* Shaders.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Shaders.metal; sourceTree = ""; }; 48 | /* End PBXFileReference section */ 49 | 50 | /* Begin PBXFrameworksBuildPhase section */ 51 | 6704E35F1A3EDD7200E14BD6 /* Frameworks */ = { 52 | isa = PBXFrameworksBuildPhase; 53 | buildActionMask = 2147483647; 54 | files = ( 55 | 6704E38C1A3EDE4C00E14BD6 /* QuartzCore.framework in Frameworks */, 56 | 6704E38A1A3EDE4600E14BD6 /* UIKit.framework in Frameworks */, 57 | 6704E3881A3EDDBA00E14BD6 /* Metal.framework in Frameworks */, 58 | ); 59 | runOnlyForDeploymentPostprocessing = 0; 60 | }; 61 | 6704E3741A3EDD7200E14BD6 /* Frameworks */ = { 62 | isa = PBXFrameworksBuildPhase; 63 | buildActionMask = 2147483647; 64 | files = ( 65 | ); 66 | runOnlyForDeploymentPostprocessing = 0; 67 | }; 68 | /* End PBXFrameworksBuildPhase section */ 69 | 70 | /* Begin PBXGroup section */ 71 | 6704E3591A3EDD7200E14BD6 = { 72 | isa = PBXGroup; 73 | children = ( 74 | 6704E38B1A3EDE4C00E14BD6 /* QuartzCore.framework */, 75 | 6704E3891A3EDE4600E14BD6 /* UIKit.framework */, 76 | 6704E3871A3EDDBA00E14BD6 /* Metal.framework */, 77 | 6704E3641A3EDD7200E14BD6 /* SwiftMetalProcessing */, 78 | 6704E37A1A3EDD7200E14BD6 /* SwiftMetalProcessingTests */, 79 | 6704E3631A3EDD7200E14BD6 /* Products */, 80 | ); 81 | sourceTree = ""; 82 | }; 83 | 6704E3631A3EDD7200E14BD6 /* Products */ = { 84 | isa = PBXGroup; 85 | children = ( 86 | 6704E3621A3EDD7200E14BD6 /* SwiftMetalProcessing.app */, 87 | 6704E3771A3EDD7200E14BD6 /* SwiftMetalProcessingTests.xctest */, 88 | ); 89 | name = Products; 90 | sourceTree = ""; 91 | }; 92 | 6704E3641A3EDD7200E14BD6 /* SwiftMetalProcessing */ = { 93 | isa = PBXGroup; 94 | children = ( 95 | 6704E3671A3EDD7200E14BD6 /* AppDelegate.swift */, 96 | 6704E3691A3EDD7200E14BD6 /* ViewController.swift */, 97 | 6704E36B1A3EDD7200E14BD6 /* Main.storyboard */, 98 | 6704E36E1A3EDD7200E14BD6 /* Images.xcassets */, 99 | 6704E3701A3EDD7200E14BD6 /* LaunchScreen.xib */, 100 | 6704E3651A3EDD7200E14BD6 /* Supporting Files */, 101 | 6704E38D1A3EDEA100E14BD6 /* Shaders.metal */, 102 | ); 103 | path = SwiftMetalProcessing; 104 | sourceTree = ""; 105 | }; 106 | 6704E3651A3EDD7200E14BD6 /* Supporting Files */ = { 107 | isa = PBXGroup; 108 | children = ( 109 | 6704E3661A3EDD7200E14BD6 /* Info.plist */, 110 | ); 111 | name = "Supporting Files"; 112 | sourceTree = ""; 113 | }; 114 | 6704E37A1A3EDD7200E14BD6 /* SwiftMetalProcessingTests */ = { 115 | isa = PBXGroup; 116 | children = ( 117 | 6704E37D1A3EDD7200E14BD6 /* SwiftMetalProcessingTests.swift */, 118 | 6704E37B1A3EDD7200E14BD6 /* Supporting Files */, 119 | ); 120 | path = SwiftMetalProcessingTests; 121 | sourceTree = ""; 122 | }; 123 | 6704E37B1A3EDD7200E14BD6 /* Supporting Files */ = { 124 | isa = PBXGroup; 125 | children = ( 126 | 6704E37C1A3EDD7200E14BD6 /* Info.plist */, 127 | ); 128 | name = "Supporting Files"; 129 | sourceTree = ""; 130 | }; 131 | /* End PBXGroup section */ 132 | 133 | /* Begin PBXNativeTarget section */ 134 | 6704E3611A3EDD7200E14BD6 /* SwiftMetalProcessing */ = { 135 | isa = PBXNativeTarget; 136 | buildConfigurationList = 6704E3811A3EDD7200E14BD6 /* Build configuration list for PBXNativeTarget "SwiftMetalProcessing" */; 137 | buildPhases = ( 138 | 6704E35E1A3EDD7200E14BD6 /* Sources */, 139 | 6704E35F1A3EDD7200E14BD6 /* Frameworks */, 140 | 6704E3601A3EDD7200E14BD6 /* Resources */, 141 | ); 142 | buildRules = ( 143 | ); 144 | dependencies = ( 145 | ); 146 | name = SwiftMetalProcessing; 147 | productName = SwiftMetalProcessing; 148 | productReference = 6704E3621A3EDD7200E14BD6 /* SwiftMetalProcessing.app */; 149 | productType = "com.apple.product-type.application"; 150 | }; 151 | 6704E3761A3EDD7200E14BD6 /* SwiftMetalProcessingTests */ = { 152 | isa = PBXNativeTarget; 153 | buildConfigurationList = 6704E3841A3EDD7200E14BD6 /* Build configuration list for PBXNativeTarget "SwiftMetalProcessingTests" */; 154 | buildPhases = ( 155 | 6704E3731A3EDD7200E14BD6 /* Sources */, 156 | 6704E3741A3EDD7200E14BD6 /* Frameworks */, 157 | 6704E3751A3EDD7200E14BD6 /* Resources */, 158 | ); 159 | buildRules = ( 160 | ); 161 | dependencies = ( 162 | 6704E3791A3EDD7200E14BD6 /* PBXTargetDependency */, 163 | ); 164 | name = SwiftMetalProcessingTests; 165 | productName = SwiftMetalProcessingTests; 166 | productReference = 6704E3771A3EDD7200E14BD6 /* SwiftMetalProcessingTests.xctest */; 167 | productType = "com.apple.product-type.bundle.unit-test"; 168 | }; 169 | /* End PBXNativeTarget section */ 170 | 171 | /* Begin PBXProject section */ 172 | 6704E35A1A3EDD7200E14BD6 /* Project object */ = { 173 | isa = PBXProject; 174 | attributes = { 175 | LastUpgradeCheck = 0610; 176 | ORGANIZATIONNAME = "Amund Tveit"; 177 | TargetAttributes = { 178 | 6704E3611A3EDD7200E14BD6 = { 179 | CreatedOnToolsVersion = 6.1.1; 180 | DevelopmentTeam = 5L92A5ZTU9; 181 | }; 182 | 6704E3761A3EDD7200E14BD6 = { 183 | CreatedOnToolsVersion = 6.1.1; 184 | TestTargetID = 6704E3611A3EDD7200E14BD6; 185 | }; 186 | }; 187 | }; 188 | buildConfigurationList = 6704E35D1A3EDD7200E14BD6 /* Build configuration list for PBXProject "SwiftMetalProcessing" */; 189 | compatibilityVersion = "Xcode 3.2"; 190 | developmentRegion = English; 191 | hasScannedForEncodings = 0; 192 | knownRegions = ( 193 | en, 194 | Base, 195 | ); 196 | mainGroup = 6704E3591A3EDD7200E14BD6; 197 | productRefGroup = 6704E3631A3EDD7200E14BD6 /* Products */; 198 | projectDirPath = ""; 199 | projectRoot = ""; 200 | targets = ( 201 | 6704E3611A3EDD7200E14BD6 /* SwiftMetalProcessing */, 202 | 6704E3761A3EDD7200E14BD6 /* SwiftMetalProcessingTests */, 203 | ); 204 | }; 205 | /* End PBXProject section */ 206 | 207 | /* Begin PBXResourcesBuildPhase section */ 208 | 6704E3601A3EDD7200E14BD6 /* Resources */ = { 209 | isa = PBXResourcesBuildPhase; 210 | buildActionMask = 2147483647; 211 | files = ( 212 | 6704E36D1A3EDD7200E14BD6 /* Main.storyboard in Resources */, 213 | 6704E3721A3EDD7200E14BD6 /* LaunchScreen.xib in Resources */, 214 | 6704E36F1A3EDD7200E14BD6 /* Images.xcassets in Resources */, 215 | ); 216 | runOnlyForDeploymentPostprocessing = 0; 217 | }; 218 | 6704E3751A3EDD7200E14BD6 /* Resources */ = { 219 | isa = PBXResourcesBuildPhase; 220 | buildActionMask = 2147483647; 221 | files = ( 222 | ); 223 | runOnlyForDeploymentPostprocessing = 0; 224 | }; 225 | /* End PBXResourcesBuildPhase section */ 226 | 227 | /* Begin PBXSourcesBuildPhase section */ 228 | 6704E35E1A3EDD7200E14BD6 /* Sources */ = { 229 | isa = PBXSourcesBuildPhase; 230 | buildActionMask = 2147483647; 231 | files = ( 232 | 6704E38E1A3EDEA100E14BD6 /* Shaders.metal in Sources */, 233 | 6704E36A1A3EDD7200E14BD6 /* ViewController.swift in Sources */, 234 | 6704E3681A3EDD7200E14BD6 /* AppDelegate.swift in Sources */, 235 | ); 236 | runOnlyForDeploymentPostprocessing = 0; 237 | }; 238 | 6704E3731A3EDD7200E14BD6 /* Sources */ = { 239 | isa = PBXSourcesBuildPhase; 240 | buildActionMask = 2147483647; 241 | files = ( 242 | 6704E38F1A3EDEA100E14BD6 /* Shaders.metal in Sources */, 243 | 6704E37E1A3EDD7200E14BD6 /* SwiftMetalProcessingTests.swift in Sources */, 244 | ); 245 | runOnlyForDeploymentPostprocessing = 0; 246 | }; 247 | /* End PBXSourcesBuildPhase section */ 248 | 249 | /* Begin PBXTargetDependency section */ 250 | 6704E3791A3EDD7200E14BD6 /* PBXTargetDependency */ = { 251 | isa = PBXTargetDependency; 252 | target = 6704E3611A3EDD7200E14BD6 /* SwiftMetalProcessing */; 253 | targetProxy = 6704E3781A3EDD7200E14BD6 /* PBXContainerItemProxy */; 254 | }; 255 | /* End PBXTargetDependency section */ 256 | 257 | /* Begin PBXVariantGroup section */ 258 | 6704E36B1A3EDD7200E14BD6 /* Main.storyboard */ = { 259 | isa = PBXVariantGroup; 260 | children = ( 261 | 6704E36C1A3EDD7200E14BD6 /* Base */, 262 | ); 263 | name = Main.storyboard; 264 | sourceTree = ""; 265 | }; 266 | 6704E3701A3EDD7200E14BD6 /* LaunchScreen.xib */ = { 267 | isa = PBXVariantGroup; 268 | children = ( 269 | 6704E3711A3EDD7200E14BD6 /* Base */, 270 | ); 271 | name = LaunchScreen.xib; 272 | sourceTree = ""; 273 | }; 274 | /* End PBXVariantGroup section */ 275 | 276 | /* Begin XCBuildConfiguration section */ 277 | 6704E37F1A3EDD7200E14BD6 /* Debug */ = { 278 | isa = XCBuildConfiguration; 279 | buildSettings = { 280 | ALWAYS_SEARCH_USER_PATHS = NO; 281 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; 282 | CLANG_CXX_LIBRARY = "libc++"; 283 | CLANG_ENABLE_MODULES = YES; 284 | CLANG_ENABLE_OBJC_ARC = YES; 285 | CLANG_WARN_BOOL_CONVERSION = YES; 286 | CLANG_WARN_CONSTANT_CONVERSION = YES; 287 | CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; 288 | CLANG_WARN_EMPTY_BODY = YES; 289 | CLANG_WARN_ENUM_CONVERSION = YES; 290 | CLANG_WARN_INT_CONVERSION = YES; 291 | CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; 292 | CLANG_WARN_UNREACHABLE_CODE = YES; 293 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 294 | "CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer"; 295 | COPY_PHASE_STRIP = NO; 296 | ENABLE_STRICT_OBJC_MSGSEND = YES; 297 | GCC_C_LANGUAGE_STANDARD = gnu99; 298 | GCC_DYNAMIC_NO_PIC = NO; 299 | GCC_OPTIMIZATION_LEVEL = 0; 300 | GCC_PREPROCESSOR_DEFINITIONS = ( 301 | "DEBUG=1", 302 | "$(inherited)", 303 | ); 304 | GCC_SYMBOLS_PRIVATE_EXTERN = NO; 305 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES; 306 | GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; 307 | GCC_WARN_UNDECLARED_SELECTOR = YES; 308 | GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; 309 | GCC_WARN_UNUSED_FUNCTION = YES; 310 | GCC_WARN_UNUSED_VARIABLE = YES; 311 | IPHONEOS_DEPLOYMENT_TARGET = 8.1; 312 | MTL_ENABLE_DEBUG_INFO = YES; 313 | ONLY_ACTIVE_ARCH = YES; 314 | SDKROOT = iphoneos; 315 | SWIFT_OPTIMIZATION_LEVEL = "-Onone"; 316 | TARGETED_DEVICE_FAMILY = "1,2"; 317 | }; 318 | name = Debug; 319 | }; 320 | 6704E3801A3EDD7200E14BD6 /* Release */ = { 321 | isa = XCBuildConfiguration; 322 | buildSettings = { 323 | ALWAYS_SEARCH_USER_PATHS = NO; 324 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; 325 | CLANG_CXX_LIBRARY = "libc++"; 326 | CLANG_ENABLE_MODULES = YES; 327 | CLANG_ENABLE_OBJC_ARC = YES; 328 | CLANG_WARN_BOOL_CONVERSION = YES; 329 | CLANG_WARN_CONSTANT_CONVERSION = YES; 330 | CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; 331 | CLANG_WARN_EMPTY_BODY = YES; 332 | CLANG_WARN_ENUM_CONVERSION = YES; 333 | CLANG_WARN_INT_CONVERSION = YES; 334 | CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; 335 | CLANG_WARN_UNREACHABLE_CODE = YES; 336 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 337 | "CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer"; 338 | COPY_PHASE_STRIP = YES; 339 | ENABLE_NS_ASSERTIONS = NO; 340 | ENABLE_STRICT_OBJC_MSGSEND = YES; 341 | GCC_C_LANGUAGE_STANDARD = gnu99; 342 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES; 343 | GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; 344 | GCC_WARN_UNDECLARED_SELECTOR = YES; 345 | GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; 346 | GCC_WARN_UNUSED_FUNCTION = YES; 347 | GCC_WARN_UNUSED_VARIABLE = YES; 348 | IPHONEOS_DEPLOYMENT_TARGET = 8.1; 349 | MTL_ENABLE_DEBUG_INFO = NO; 350 | SDKROOT = iphoneos; 351 | TARGETED_DEVICE_FAMILY = "1,2"; 352 | VALIDATE_PRODUCT = YES; 353 | }; 354 | name = Release; 355 | }; 356 | 6704E3821A3EDD7200E14BD6 /* Debug */ = { 357 | isa = XCBuildConfiguration; 358 | buildSettings = { 359 | ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; 360 | CODE_SIGN_IDENTITY = "iPhone Developer"; 361 | "CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer"; 362 | INFOPLIST_FILE = SwiftMetalProcessing/Info.plist; 363 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks"; 364 | PRODUCT_NAME = "$(TARGET_NAME)"; 365 | PROVISIONING_PROFILE = ""; 366 | }; 367 | name = Debug; 368 | }; 369 | 6704E3831A3EDD7200E14BD6 /* Release */ = { 370 | isa = XCBuildConfiguration; 371 | buildSettings = { 372 | ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; 373 | CODE_SIGN_IDENTITY = "iPhone Developer"; 374 | "CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer"; 375 | INFOPLIST_FILE = SwiftMetalProcessing/Info.plist; 376 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks"; 377 | PRODUCT_NAME = "$(TARGET_NAME)"; 378 | PROVISIONING_PROFILE = ""; 379 | }; 380 | name = Release; 381 | }; 382 | 6704E3851A3EDD7200E14BD6 /* Debug */ = { 383 | isa = XCBuildConfiguration; 384 | buildSettings = { 385 | BUNDLE_LOADER = "$(TEST_HOST)"; 386 | FRAMEWORK_SEARCH_PATHS = ( 387 | "$(SDKROOT)/Developer/Library/Frameworks", 388 | "$(inherited)", 389 | ); 390 | GCC_PREPROCESSOR_DEFINITIONS = ( 391 | "DEBUG=1", 392 | "$(inherited)", 393 | ); 394 | INFOPLIST_FILE = SwiftMetalProcessingTests/Info.plist; 395 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks @loader_path/Frameworks"; 396 | PRODUCT_NAME = "$(TARGET_NAME)"; 397 | TEST_HOST = "$(BUILT_PRODUCTS_DIR)/SwiftMetalProcessing.app/SwiftMetalProcessing"; 398 | }; 399 | name = Debug; 400 | }; 401 | 6704E3861A3EDD7200E14BD6 /* Release */ = { 402 | isa = XCBuildConfiguration; 403 | buildSettings = { 404 | BUNDLE_LOADER = "$(TEST_HOST)"; 405 | FRAMEWORK_SEARCH_PATHS = ( 406 | "$(SDKROOT)/Developer/Library/Frameworks", 407 | "$(inherited)", 408 | ); 409 | INFOPLIST_FILE = SwiftMetalProcessingTests/Info.plist; 410 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks @loader_path/Frameworks"; 411 | PRODUCT_NAME = "$(TARGET_NAME)"; 412 | TEST_HOST = "$(BUILT_PRODUCTS_DIR)/SwiftMetalProcessing.app/SwiftMetalProcessing"; 413 | }; 414 | name = Release; 415 | }; 416 | /* End XCBuildConfiguration section */ 417 | 418 | /* Begin XCConfigurationList section */ 419 | 6704E35D1A3EDD7200E14BD6 /* Build configuration list for PBXProject "SwiftMetalProcessing" */ = { 420 | isa = XCConfigurationList; 421 | buildConfigurations = ( 422 | 6704E37F1A3EDD7200E14BD6 /* Debug */, 423 | 6704E3801A3EDD7200E14BD6 /* Release */, 424 | ); 425 | defaultConfigurationIsVisible = 0; 426 | defaultConfigurationName = Release; 427 | }; 428 | 6704E3811A3EDD7200E14BD6 /* Build configuration list for PBXNativeTarget "SwiftMetalProcessing" */ = { 429 | isa = XCConfigurationList; 430 | buildConfigurations = ( 431 | 6704E3821A3EDD7200E14BD6 /* Debug */, 432 | 6704E3831A3EDD7200E14BD6 /* Release */, 433 | ); 434 | defaultConfigurationIsVisible = 0; 435 | defaultConfigurationName = Release; 436 | }; 437 | 6704E3841A3EDD7200E14BD6 /* Build configuration list for PBXNativeTarget "SwiftMetalProcessingTests" */ = { 438 | isa = XCConfigurationList; 439 | buildConfigurations = ( 440 | 6704E3851A3EDD7200E14BD6 /* Debug */, 441 | 6704E3861A3EDD7200E14BD6 /* Release */, 442 | ); 443 | defaultConfigurationIsVisible = 0; 444 | defaultConfigurationName = Release; 445 | }; 446 | /* End XCConfigurationList section */ 447 | }; 448 | rootObject = 6704E35A1A3EDD7200E14BD6 /* Project object */; 449 | } 450 | -------------------------------------------------------------------------------- /SwiftMetalProcessing.xcodeproj/project.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /SwiftMetalProcessing/AppDelegate.swift: -------------------------------------------------------------------------------- 1 | // 2 | // AppDelegate.swift 3 | // SwiftMetalProcessing 4 | // 5 | // Created by Amund Tveit on 15/12/14. 6 | // Copyright (c) 2014 Amund Tveit. All rights reserved. 7 | // 8 | 9 | import UIKit 10 | 11 | @UIApplicationMain 12 | class AppDelegate: UIResponder, UIApplicationDelegate { 13 | 14 | var window: UIWindow? 15 | 16 | func application(application: UIApplication, didFinishLaunchingWithOptions launchOptions: [NSObject: AnyObject]?) -> Bool { 17 | return true 18 | } 19 | 20 | } 21 | 22 | -------------------------------------------------------------------------------- /SwiftMetalProcessing/Base.lproj/LaunchScreen.xib: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 20 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /SwiftMetalProcessing/Base.lproj/Main.storyboard: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /SwiftMetalProcessing/Images.xcassets/AppIcon.appiconset/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "images" : [ 3 | { 4 | "idiom" : "iphone", 5 | "size" : "29x29", 6 | "scale" : "2x" 7 | }, 8 | { 9 | "idiom" : "iphone", 10 | "size" : "29x29", 11 | "scale" : "3x" 12 | }, 13 | { 14 | "idiom" : "iphone", 15 | "size" : "40x40", 16 | "scale" : "2x" 17 | }, 18 | { 19 | "idiom" : "iphone", 20 | "size" : "40x40", 21 | "scale" : "3x" 22 | }, 23 | { 24 | "idiom" : "iphone", 25 | "size" : "60x60", 26 | "scale" : "2x" 27 | }, 28 | { 29 | "idiom" : "iphone", 30 | "size" : "60x60", 31 | "scale" : "3x" 32 | }, 33 | { 34 | "idiom" : "ipad", 35 | "size" : "29x29", 36 | "scale" : "1x" 37 | }, 38 | { 39 | "idiom" : "ipad", 40 | "size" : "29x29", 41 | "scale" : "2x" 42 | }, 43 | { 44 | "idiom" : "ipad", 45 | "size" : "40x40", 46 | "scale" : "1x" 47 | }, 48 | { 49 | "idiom" : "ipad", 50 | "size" : "40x40", 51 | "scale" : "2x" 52 | }, 53 | { 54 | "idiom" : "ipad", 55 | "size" : "76x76", 56 | "scale" : "1x" 57 | }, 58 | { 59 | "idiom" : "ipad", 60 | "size" : "76x76", 61 | "scale" : "2x" 62 | } 63 | ], 64 | "info" : { 65 | "version" : 1, 66 | "author" : "xcode" 67 | } 68 | } -------------------------------------------------------------------------------- /SwiftMetalProcessing/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | en 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | com.memkite.$(PRODUCT_NAME:rfc1034identifier) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | APPL 17 | CFBundleShortVersionString 18 | 1.0 19 | CFBundleSignature 20 | ???? 21 | CFBundleVersion 22 | 1 23 | LSRequiresIPhoneOS 24 | 25 | UILaunchStoryboardName 26 | LaunchScreen 27 | UIMainStoryboardFile 28 | Main 29 | UIRequiredDeviceCapabilities 30 | 31 | armv7 32 | 33 | UISupportedInterfaceOrientations 34 | 35 | UIInterfaceOrientationPortrait 36 | UIInterfaceOrientationLandscapeLeft 37 | UIInterfaceOrientationLandscapeRight 38 | 39 | UISupportedInterfaceOrientations~ipad 40 | 41 | UIInterfaceOrientationPortrait 42 | UIInterfaceOrientationPortraitUpsideDown 43 | UIInterfaceOrientationLandscapeLeft 44 | UIInterfaceOrientationLandscapeRight 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /SwiftMetalProcessing/Shaders.metal: -------------------------------------------------------------------------------- 1 | // 2 | // Shaders.metal 3 | // SwiftMetalProcessing 4 | // 5 | // Created by Amund Tveit on 15/12/14. 6 | // Copyright (c) 2014 Amund Tveit. All rights reserved. 7 | 8 | #include 9 | using namespace metal; 10 | 11 | kernel void sigmoid(const device float *inVector [[ buffer(0) ]], 12 | device float *outVector [[ buffer(1) ]], 13 | uint id [[ thread_position_in_grid ]]) { 14 | // This calculates sigmoid for _one_ position (=id) in a vector per call on the GPU 15 | outVector[id] = 1.0 / (1.0 + exp(-inVector[id])); 16 | } 17 | -------------------------------------------------------------------------------- /SwiftMetalProcessing/ViewController.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ViewController.Swift 3 | // SwiftMetalProcessing 4 | // 5 | // Created by Amund Tveit on 15/12/14. 6 | // Copyright (c) 2014 Amund Tveit. All rights reserved. 7 | // 8 | import UIKit 9 | import Metal 10 | import QuartzCore 11 | import Darwin 12 | import Accelerate 13 | 14 | class ViewController: UIViewController { 15 | 16 | 17 | override func viewDidLoad() { 18 | super.viewDidLoad() 19 | 20 | 21 | for(var i = 25; i<26; ++i) { 22 | 23 | let start0 = CACurrentMediaTime() 24 | 25 | let maxcount = Int(pow(2.0,Float(i))) 26 | println("#############################################") 27 | println("==> count = \(maxcount) - 2^\(i)") 28 | 29 | // NEW APPROACH FOR ALLOCATION 30 | 31 | // new way of prepping data 32 | var memory:UnsafeMutablePointer = nil 33 | //var memory:AutoreleasingUnsafeMutablePointer = nil 34 | var alignment:UInt = 0x4000 // 16K aligned 35 | var size:UInt = UInt(maxcount)*UInt(sizeof(Float)) 36 | posix_memalign(&memory, alignment, size) 37 | 38 | var outmemory:UnsafeMutablePointer = nil 39 | posix_memalign(&outmemory, alignment, size) 40 | 41 | // var myvectorPtr = unsafeBitCast(memory, UnsafePointer.self) 42 | var pptr = COpaquePointer(memory) 43 | var nps = UnsafeMutablePointer(pptr) 44 | nps.memory = 1234.47 45 | var yoda = UnsafeMutableBufferPointer(start: nps, count: maxcount) 46 | 47 | for index in yoda.startIndex..(memptr) 159 | var yoda2 = UnsafeMutableBufferPointer(start: memptrfloat, count: maxcount) 160 | 161 | println(memptrfloat.memory) 162 | // var r = Array(yoda2) 163 | // 164 | // println("r = ") 165 | // println(r) 166 | 167 | 168 | // let stop = CACurrentMediaTime() 169 | // let deltaMicroseconds = (stop-start) * (1.0*10e6) 170 | // println("cold GPU: runtime in microsecs : \(deltaMicroseconds)") 171 | 172 | /* 173 | // a. Get GPU data 174 | // outVectorBuffer.contents() returns UnsafeMutablePointer roughly equivalent to char* in C 175 | var data = NSData(bytesNoCopy: outVectorBuffer.contents(), 176 | length: myvector.count*sizeof(Float), freeWhenDone: false) 177 | // b. prepare Swift array large enough to receive data from GPU 178 | var finalResultArray = [Float](count: myvector.count, repeatedValue: 0) 179 | 180 | // c. get data from GPU into Swift array 181 | data.getBytes(&finalResultArray, length:myvector.count * sizeof(Float)) 182 | assert(finalResultVector[0] == 0.5) 183 | 184 | */ 185 | 186 | // STOP BENCHMARK 187 | 188 | let deltaMicroseconds = (stop-start) * (1.0*10e6) 189 | println("cold GPU: runtime in microsecs : \(deltaMicroseconds)") 190 | 191 | let start3 = CACurrentMediaTime() 192 | 193 | // timing without 194 | /* 195 | for (index, value) in enumerate(myvector) { 196 | finalResultArray[index] = 1.0 / (1.0 + exp(-myvector[index])) 197 | } 198 | 199 | var fra = NSMutableArray(capacity: myvector.count) 200 | let ccount = myvector.count 201 | for j in 0.. (MTLDevice, MTLCommandQueue, MTLLibrary, MTLCommandBuffer, 225 | MTLComputeCommandEncoder){ 226 | // Get access to iPhone or iPad GPU 227 | var device = MTLCreateSystemDefaultDevice() 228 | 229 | // Queue to handle an ordered list of command buffers 230 | var commandQueue = device.newCommandQueue() 231 | 232 | // Access to Metal functions that are stored in Shaders.metal file, e.g. sigmoid() 233 | var defaultLibrary = device.newDefaultLibrary() 234 | 235 | // Buffer for storing encoded commands that are sent to GPU 236 | var commandBuffer = commandQueue.commandBuffer() 237 | 238 | // Encoder for GPU commands 239 | var computeCommandEncoder = commandBuffer.computeCommandEncoder() 240 | 241 | return (device, commandQueue, defaultLibrary!, commandBuffer, computeCommandEncoder) 242 | } 243 | 244 | override func didReceiveMemoryWarning() { 245 | super.didReceiveMemoryWarning() 246 | // Dispose of any resources that can be recreated. 247 | if(self.isViewLoaded() && self.view.window == nil) { 248 | self.view = nil 249 | } 250 | } 251 | 252 | 253 | } 254 | 255 | -------------------------------------------------------------------------------- /SwiftMetalProcessingTests/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | en 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | com.memkite.$(PRODUCT_NAME:rfc1034identifier) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | BNDL 17 | CFBundleShortVersionString 18 | 1.0 19 | CFBundleSignature 20 | ???? 21 | CFBundleVersion 22 | 1 23 | 24 | 25 | -------------------------------------------------------------------------------- /SwiftMetalProcessingTests/SwiftMetalProcessingTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // SwiftMetalProcessingTests.swift 3 | // SwiftMetalProcessingTests 4 | // 5 | // Created by Amund Tveit on 15/12/14. 6 | // Copyright (c) 2014 Amund Tveit. All rights reserved. 7 | // 8 | 9 | import UIKit 10 | import XCTest 11 | 12 | class SwiftMetalProcessingTests: XCTestCase { 13 | 14 | override func setUp() { 15 | super.setUp() 16 | // Put setup code here. This method is called before the invocation of each test method in the class. 17 | } 18 | 19 | override func tearDown() { 20 | // Put teardown code here. This method is called after the invocation of each test method in the class. 21 | super.tearDown() 22 | } 23 | 24 | func testExample() { 25 | // This is an example of a functional test case. 26 | XCTAssert(true, "Pass") 27 | } 28 | 29 | func testPerformanceExample() { 30 | // This is an example of a performance test case. 31 | self.measureBlock() { 32 | // Put the code you want to measure the time of here. 33 | } 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /simplebenchmark/accelerate_metal_cpu_benchmark_ipadair.tsv: -------------------------------------------------------------------------------- 1 | vectorsize as 2^x array filltime (microsec) accelerate time (microsec) metal gpu time (microsec) cpu time (microsec) metal relative to cpu metal relative to accelerate 2 | 2^5 11517.3333333587 11517.3333333587 5164044.1666666 10437.50000008 0.00202118720584403 0.002230293344062 3 | 2^6 2466.62500001094 2466.62500001094 35487.9166661703 7705.4166661128 0.217127895632661 0.0695060525308971 4 | 2^7 4282.58333329268 4282.58333329268 32060.4166665817 14025.8333328802 0.437481317811447 0.133578530118002 5 | 2^8 7893.20833325746 7893.20833325746 31516.2499998678 26939.1666665797 0.854770687080243 0.250448842526969 6 | 2^9 15722.9166667321 15722.9166667321 31873.7500003863 53249.5833328994 1.67064067868557 0.493287318453007 7 | 2^10 30727.9583333866 30727.9583333866 39157.0833335209 109837.083333559 2.8050374027611 0.784735626799905 8 | 2^11 63234.8333332402 63234.8333332402 44922.4999999842 206147.500000498 4.58895876232556 1.40764279221465 9 | 2^12 117344.291666654 117344.291666654 52408.7500002679 410010.416667319 7.82331989725424 2.23902099680024 10 | 2^13 234020.708333333 234020.708333333 61772.0833326985 850657.083333317 13.770898396801 3.78845419658132 11 | 2^14 467565.374999936 467565.374999936 88402.4999993471 1668573.7499995 18.874734877541 5.28905149745074 12 | 2^15 934810.958333287 934810.958333287 136247.083332819 3257175.83333358 23.906389433505 6.8611447340107 13 | 2^16 1881085.33333332 1881085.33333332 220562.083333107 6522988.33333248 29.5743866523107 8.52859795712205 14 | 2^17 3736484.0416667 3736484.0416667 397627.499999089 13044074.5833334 32.8047596893156 9.39694573860022 15 | 2^18 7483013.8333333 7483013.8333333 662667.50000068 26136895.4166664 39.4419454955006 11.2922601958382 16 | 2^19 14943724.1666667 14943724.1666667 906277.91666634 53448797.916667 58.9761671709639 16.4891187260038 17 | 2^20 29896009.25 29896009.25 1826748.33333408 108519722.916667 59.4059515130925 16.3656967434783 18 | 2^21 59825244.0 59825244.0 3182813.33333289 217788127.5 68.426296075599 18.79634076352 19 | 2^22 119685028.708333 119685028.708333 6795023.33333176 437891037.916668 64.4429042309057 17.6136302757402 20 | -------------------------------------------------------------------------------- /simplebenchmark/accelerate_metal_cpu_benchmark_ipadair.txt: -------------------------------------------------------------------------------- 1 | ############################################# 2 | ==> count = 32 - 2^5 3 | filling array took 11517.3333333587 microseconds 4 | Accelerate approach took 11517.3333333587 microseconds 5 | 2014-12-17 10:26:58.412 SwiftMetalProcessing[203:6251] Metal GPU Frame Capture Enabled 6 | 2014-12-17 10:26:58.412 SwiftMetalProcessing[203:6251] Metal API Validation Enabled 7 | cold GPU: runtime in microsecs : 5164044.1666666 8 | CPU: runtime in microsecs : 10437.50000008 9 | Metal was 0.00202118720584403 times faster than CPU 10 | Metal was 0.002230293344062 times faster than Accelerate Framework 11 | ############################################# 12 | ==> count = 64 - 2^6 13 | filling array took 2466.62500001094 microseconds 14 | Accelerate approach took 2466.62500001094 microseconds 15 | cold GPU: runtime in microsecs : 35487.9166661703 16 | CPU: runtime in microsecs : 7705.4166661128 17 | Metal was 0.217127895632661 times faster than CPU 18 | Metal was 0.0695060525308971 times faster than Accelerate Framework 19 | ############################################# 20 | ==> count = 128 - 2^7 21 | filling array took 4282.58333329268 microseconds 22 | Accelerate approach took 4282.58333329268 microseconds 23 | cold GPU: runtime in microsecs : 32060.4166665817 24 | CPU: runtime in microsecs : 14025.8333328802 25 | Metal was 0.437481317811447 times faster than CPU 26 | Metal was 0.133578530118002 times faster than Accelerate Framework 27 | ############################################# 28 | ==> count = 256 - 2^8 29 | filling array took 7893.20833325746 microseconds 30 | Accelerate approach took 7893.20833325746 microseconds 31 | cold GPU: runtime in microsecs : 31516.2499998678 32 | CPU: runtime in microsecs : 26939.1666665797 33 | Metal was 0.854770687080243 times faster than CPU 34 | Metal was 0.250448842526969 times faster than Accelerate Framework 35 | ############################################# 36 | ==> count = 512 - 2^9 37 | filling array took 15722.9166667321 microseconds 38 | Accelerate approach took 15722.9166667321 microseconds 39 | cold GPU: runtime in microsecs : 31873.7500003863 40 | CPU: runtime in microsecs : 53249.5833328994 41 | Metal was 1.67064067868557 times faster than CPU 42 | Metal was 0.493287318453007 times faster than Accelerate Framework 43 | ############################################# 44 | ==> count = 1024 - 2^10 45 | filling array took 30727.9583333866 microseconds 46 | Accelerate approach took 30727.9583333866 microseconds 47 | cold GPU: runtime in microsecs : 39157.0833335209 48 | CPU: runtime in microsecs : 109837.083333559 49 | Metal was 2.8050374027611 times faster than CPU 50 | Metal was 0.784735626799905 times faster than Accelerate Framework 51 | ############################################# 52 | ==> count = 2048 - 2^11 53 | filling array took 63234.8333332402 microseconds 54 | Accelerate approach took 63234.8333332402 microseconds 55 | cold GPU: runtime in microsecs : 44922.4999999842 56 | CPU: runtime in microsecs : 206147.500000498 57 | Metal was 4.58895876232556 times faster than CPU 58 | Metal was 1.40764279221465 times faster than Accelerate Framework 59 | ############################################# 60 | ==> count = 4096 - 2^12 61 | filling array took 117344.291666654 microseconds 62 | Accelerate approach took 117344.291666654 microseconds 63 | cold GPU: runtime in microsecs : 52408.7500002679 64 | CPU: runtime in microsecs : 410010.416667319 65 | Metal was 7.82331989725424 times faster than CPU 66 | Metal was 2.23902099680024 times faster than Accelerate Framework 67 | ############################################# 68 | ==> count = 8192 - 2^13 69 | filling array took 234020.708333333 microseconds 70 | Accelerate approach took 234020.708333333 microseconds 71 | cold GPU: runtime in microsecs : 61772.0833326985 72 | CPU: runtime in microsecs : 850657.083333317 73 | Metal was 13.770898396801 times faster than CPU 74 | Metal was 3.78845419658132 times faster than Accelerate Framework 75 | ############################################# 76 | ==> count = 16384 - 2^14 77 | filling array took 467565.374999936 microseconds 78 | Accelerate approach took 467565.374999936 microseconds 79 | cold GPU: runtime in microsecs : 88402.4999993471 80 | CPU: runtime in microsecs : 1668573.7499995 81 | Metal was 18.874734877541 times faster than CPU 82 | Metal was 5.28905149745074 times faster than Accelerate Framework 83 | ############################################# 84 | ==> count = 32768 - 2^15 85 | filling array took 934810.958333287 microseconds 86 | Accelerate approach took 934810.958333287 microseconds 87 | cold GPU: runtime in microsecs : 136247.083332819 88 | CPU: runtime in microsecs : 3257175.83333358 89 | Metal was 23.906389433505 times faster than CPU 90 | Metal was 6.8611447340107 times faster than Accelerate Framework 91 | ############################################# 92 | ==> count = 65536 - 2^16 93 | filling array took 1881085.33333332 microseconds 94 | Accelerate approach took 1881085.33333332 microseconds 95 | cold GPU: runtime in microsecs : 220562.083333107 96 | CPU: runtime in microsecs : 6522988.33333248 97 | Metal was 29.5743866523107 times faster than CPU 98 | Metal was 8.52859795712205 times faster than Accelerate Framework 99 | ############################################# 100 | ==> count = 131072 - 2^17 101 | filling array took 3736484.0416667 microseconds 102 | Accelerate approach took 3736484.0416667 microseconds 103 | cold GPU: runtime in microsecs : 397627.499999089 104 | CPU: runtime in microsecs : 13044074.5833334 105 | Metal was 32.8047596893156 times faster than CPU 106 | Metal was 9.39694573860022 times faster than Accelerate Framework 107 | ############################################# 108 | ==> count = 262144 - 2^18 109 | filling array took 7483013.8333333 microseconds 110 | Accelerate approach took 7483013.8333333 microseconds 111 | cold GPU: runtime in microsecs : 662667.50000068 112 | CPU: runtime in microsecs : 26136895.4166664 113 | Metal was 39.4419454955006 times faster than CPU 114 | Metal was 11.2922601958382 times faster than Accelerate Framework 115 | ############################################# 116 | ==> count = 524288 - 2^19 117 | filling array took 14943724.1666667 microseconds 118 | Accelerate approach took 14943724.1666667 microseconds 119 | cold GPU: runtime in microsecs : 906277.91666634 120 | CPU: runtime in microsecs : 53448797.916667 121 | Metal was 58.9761671709639 times faster than CPU 122 | Metal was 16.4891187260038 times faster than Accelerate Framework 123 | ############################################# 124 | ==> count = 1048576 - 2^20 125 | filling array took 29896009.25 microseconds 126 | Accelerate approach took 29896009.25 microseconds 127 | cold GPU: runtime in microsecs : 1826748.33333408 128 | CPU: runtime in microsecs : 108519722.916667 129 | Metal was 59.4059515130925 times faster than CPU 130 | Metal was 16.3656967434783 times faster than Accelerate Framework 131 | ############################################# 132 | ==> count = 2097152 - 2^21 133 | filling array took 59825244.0 microseconds 134 | Accelerate approach took 59825244.0 microseconds 135 | cold GPU: runtime in microsecs : 3182813.33333289 136 | CPU: runtime in microsecs : 217788127.5 137 | Metal was 68.426296075599 times faster than CPU 138 | Metal was 18.79634076352 times faster than Accelerate Framework 139 | ############################################# 140 | ==> count = 4194304 - 2^22 141 | filling array took 119685028.708333 microseconds 142 | Accelerate approach took 119685028.708333 microseconds 143 | cold GPU: runtime in microsecs : 6795023.33333176 144 | CPU: runtime in microsecs : 437891037.916668 145 | Metal was 64.4429042309057 times faster than CPU 146 | Metal was 17.6136302757402 times faster than Accelerate Framework 147 | -------------------------------------------------------------------------------- /simplebenchmark/accelerate_metal_cpu_benchmark_ipadminia7retina.tsv: -------------------------------------------------------------------------------- 1 | vectorsize as 2^x array filltime (microsec) accelerate time (microsec) metal gpu time (microsec) cpu time (microsec) metal relative to cpu metal relative to accelerate 2 | 2^5 11923.6666650977 11923.6666650977 5289747.08333408 13542.9166766698 0.00256022007542445 0.00225410902964803 3 | 2^6 2506.08333590208 2506.08333590208 37993.3333169902 7911.66668932419 0.208238288104776 0.0659611336281827 4 | 2^7 4605.95833283151 4605.95833283151 32925.4166717874 15000.4166789586 0.455587755456164 0.139890661938933 5 | 2^8 8444.79166698875 8444.79166698875 33567.916652828 28184.583352413 0.839628614546103 0.251573302994283 6 | 2^9 16129.0416690463 16129.0416690463 32028.3333348925 57543.3333142428 1.79663839240594 0.503586668104108 7 | 2^10 33857.7500006068 33857.7500006068 40827.0833577262 128797.916695476 3.15471755763081 0.829296320384822 8 | 2^11 71993.2499996503 71993.2499996503 44299.5833509485 223063.7500179 5.03534645576128 1.62514508159835 9 | 2^12 124552.208333625 124552.208333625 52890.8333217259 433811.666662223 8.20201988543875 2.35489215259657 10 | 2^13 250088.333334133 250088.333334133 86791.2499961676 866945.000016131 9.98885256352929 2.8814924700955 11 | 2^14 506779.54166531 506779.54166531 88932.5000025565 1744323.74999014 19.6140190587243 5.69847402975 12 | 2^15 1036662.04166802 1036662.04166802 144628.333328001 3483881.66665245 24.0885142384335 7.16776594055738 13 | 2^16 1988050.54166587 1988050.54166587 232781.25001525 6959352.91667411 29.896535550944 8.54042385946302 14 | 2^17 3980010.04166872 3980010.04166872 445706.666650949 14054216.250006 31.5324344497912 8.92966235298793 15 | 2^18 7949285.49999895 7949285.49999895 697174.583328888 27891639.5833585 40.0066787434802 11.4021447282867 16 | 2^19 15920954.1666642 15920954.1666642 1067902.91665675 56795661.6666561 53.1842930483461 14.9086156787617 17 | 2^20 31840600.1666663 31840600.1666663 1856681.25002849 114604203.750023 61.7252981621183 17.1492011168733 18 | 2^21 63625027.4999984 63625027.4999984 3925342.08334837 231971756.249986 59.0959338891838 16.2087854125889 19 | 2^22 127350503.624999 127350503.624999 6785403.3333424 463321627.916666 68.2821057430701 18.7683026886874 20 | -------------------------------------------------------------------------------- /simplebenchmark/accelerate_metal_cpu_benchmark_ipadminia7retina.txt: -------------------------------------------------------------------------------- 1 | ############################################# 2 | ==> count = 32 - 2^5 3 | filling array took 11923.6666650977 microseconds 4 | Accelerate approach took 11923.6666650977 microseconds 5 | 2014-12-17 10:38:58.210 SwiftMetalProcessing[506:90552] Metal GPU Frame Capture Enabled 6 | 2014-12-17 10:38:58.211 SwiftMetalProcessing[506:90552] Metal API Validation Enabled 7 | cold GPU: runtime in microsecs : 5289747.08333408 8 | CPU: runtime in microsecs : 13542.9166766698 9 | Metal was 0.00256022007542445 times faster than CPU 10 | Metal was 0.00225410902964803 times faster than Accelerate Framework 11 | ############################################# 12 | ==> count = 64 - 2^6 13 | filling array took 2506.08333590208 microseconds 14 | Accelerate approach took 2506.08333590208 microseconds 15 | cold GPU: runtime in microsecs : 37993.3333169902 16 | CPU: runtime in microsecs : 7911.66668932419 17 | Metal was 0.208238288104776 times faster than CPU 18 | Metal was 0.0659611336281827 times faster than Accelerate Framework 19 | ############################################# 20 | ==> count = 128 - 2^7 21 | filling array took 4605.95833283151 microseconds 22 | Accelerate approach took 4605.95833283151 microseconds 23 | cold GPU: runtime in microsecs : 32925.4166717874 24 | CPU: runtime in microsecs : 15000.4166789586 25 | Metal was 0.455587755456164 times faster than CPU 26 | Metal was 0.139890661938933 times faster than Accelerate Framework 27 | ############################################# 28 | ==> count = 256 - 2^8 29 | filling array took 8444.79166698875 microseconds 30 | Accelerate approach took 8444.79166698875 microseconds 31 | cold GPU: runtime in microsecs : 33567.916652828 32 | CPU: runtime in microsecs : 28184.583352413 33 | Metal was 0.839628614546103 times faster than CPU 34 | Metal was 0.251573302994283 times faster than Accelerate Framework 35 | ############################################# 36 | ==> count = 512 - 2^9 37 | filling array took 16129.0416690463 microseconds 38 | Accelerate approach took 16129.0416690463 microseconds 39 | cold GPU: runtime in microsecs : 32028.3333348925 40 | CPU: runtime in microsecs : 57543.3333142428 41 | Metal was 1.79663839240594 times faster than CPU 42 | Metal was 0.503586668104108 times faster than Accelerate Framework 43 | ############################################# 44 | ==> count = 1024 - 2^10 45 | filling array took 33857.7500006068 microseconds 46 | Accelerate approach took 33857.7500006068 microseconds 47 | cold GPU: runtime in microsecs : 40827.0833577262 48 | CPU: runtime in microsecs : 128797.916695476 49 | Metal was 3.15471755763081 times faster than CPU 50 | Metal was 0.829296320384822 times faster than Accelerate Framework 51 | ############################################# 52 | ==> count = 2048 - 2^11 53 | filling array took 71993.2499996503 microseconds 54 | Accelerate approach took 71993.2499996503 microseconds 55 | cold GPU: runtime in microsecs : 44299.5833509485 56 | CPU: runtime in microsecs : 223063.7500179 57 | Metal was 5.03534645576128 times faster than CPU 58 | Metal was 1.62514508159835 times faster than Accelerate Framework 59 | ############################################# 60 | ==> count = 4096 - 2^12 61 | filling array took 124552.208333625 microseconds 62 | Accelerate approach took 124552.208333625 microseconds 63 | cold GPU: runtime in microsecs : 52890.8333217259 64 | CPU: runtime in microsecs : 433811.666662223 65 | Metal was 8.20201988543875 times faster than CPU 66 | Metal was 2.35489215259657 times faster than Accelerate Framework 67 | ############################################# 68 | ==> count = 8192 - 2^13 69 | filling array took 250088.333334133 microseconds 70 | Accelerate approach took 250088.333334133 microseconds 71 | cold GPU: runtime in microsecs : 86791.2499961676 72 | CPU: runtime in microsecs : 866945.000016131 73 | Metal was 9.98885256352929 times faster than CPU 74 | Metal was 2.8814924700955 times faster than Accelerate Framework 75 | ############################################# 76 | ==> count = 16384 - 2^14 77 | filling array took 506779.54166531 microseconds 78 | Accelerate approach took 506779.54166531 microseconds 79 | cold GPU: runtime in microsecs : 88932.5000025565 80 | CPU: runtime in microsecs : 1744323.74999014 81 | Metal was 19.6140190587243 times faster than CPU 82 | Metal was 5.69847402975 times faster than Accelerate Framework 83 | ############################################# 84 | ==> count = 32768 - 2^15 85 | filling array took 1036662.04166802 microseconds 86 | Accelerate approach took 1036662.04166802 microseconds 87 | cold GPU: runtime in microsecs : 144628.333328001 88 | CPU: runtime in microsecs : 3483881.66665245 89 | Metal was 24.0885142384335 times faster than CPU 90 | Metal was 7.16776594055738 times faster than Accelerate Framework 91 | ############################################# 92 | ==> count = 65536 - 2^16 93 | filling array took 1988050.54166587 microseconds 94 | Accelerate approach took 1988050.54166587 microseconds 95 | cold GPU: runtime in microsecs : 232781.25001525 96 | CPU: runtime in microsecs : 6959352.91667411 97 | Metal was 29.896535550944 times faster than CPU 98 | Metal was 8.54042385946302 times faster than Accelerate Framework 99 | ############################################# 100 | ==> count = 131072 - 2^17 101 | filling array took 3980010.04166872 microseconds 102 | Accelerate approach took 3980010.04166872 microseconds 103 | cold GPU: runtime in microsecs : 445706.666650949 104 | CPU: runtime in microsecs : 14054216.250006 105 | Metal was 31.5324344497912 times faster than CPU 106 | Metal was 8.92966235298793 times faster than Accelerate Framework 107 | ############################################# 108 | ==> count = 262144 - 2^18 109 | filling array took 7949285.49999895 microseconds 110 | Accelerate approach took 7949285.49999895 microseconds 111 | cold GPU: runtime in microsecs : 697174.583328888 112 | CPU: runtime in microsecs : 27891639.5833585 113 | Metal was 40.0066787434802 times faster than CPU 114 | Metal was 11.4021447282867 times faster than Accelerate Framework 115 | ############################################# 116 | ==> count = 524288 - 2^19 117 | filling array took 15920954.1666642 microseconds 118 | Accelerate approach took 15920954.1666642 microseconds 119 | cold GPU: runtime in microsecs : 1067902.91665675 120 | CPU: runtime in microsecs : 56795661.6666561 121 | Metal was 53.1842930483461 times faster than CPU 122 | Metal was 14.9086156787617 times faster than Accelerate Framework 123 | ############################################# 124 | ==> count = 1048576 - 2^20 125 | filling array took 31840600.1666663 microseconds 126 | Accelerate approach took 31840600.1666663 microseconds 127 | cold GPU: runtime in microsecs : 1856681.25002849 128 | CPU: runtime in microsecs : 114604203.750023 129 | Metal was 61.7252981621183 times faster than CPU 130 | Metal was 17.1492011168733 times faster than Accelerate Framework 131 | ############################################# 132 | ==> count = 2097152 - 2^21 133 | filling array took 63625027.4999984 microseconds 134 | Accelerate approach took 63625027.4999984 microseconds 135 | cold GPU: runtime in microsecs : 3925342.08334837 136 | CPU: runtime in microsecs : 231971756.249986 137 | Metal was 59.0959338891838 times faster than CPU 138 | Metal was 16.2087854125889 times faster than Accelerate Framework 139 | ############################################# 140 | ==> count = 4194304 - 2^22 141 | filling array took 127350503.624999 microseconds 142 | Accelerate approach took 127350503.624999 microseconds 143 | cold GPU: runtime in microsecs : 6785403.3333424 144 | CPU: runtime in microsecs : 463321627.916666 145 | Metal was 68.2821057430701 times faster than CPU 146 | Metal was 18.7683026886874 times faster than Accelerate Framework 147 | -------------------------------------------------------------------------------- /simplebenchmark/accelerate_metal_cpu_benchmark_iphone5s.tsv: -------------------------------------------------------------------------------- 1 | vectorsize as 2^x array filltime (microsec) accelerate time (microsec) metal gpu time (microsec) cpu time (microsec) metal relative to cpu metal relative to accelerate 2 | 2^5 13550.1666809432 13550.1666809432 3536931.66689482 21856.6667172126 0.00617955583416778 0.00383105130578882 3 | 2^6 2996.79167801514 2996.79167801514 41439.1668164171 8325.83318697289 0.200917002599444 0.0723178555035014 4 | 2^7 4736.54165398329 4736.54165398329 33519.5835214108 15182.5000648387 0.452944173818293 0.141306697649086 5 | 2^8 8523.58335396275 8523.58335396275 36756.2499013729 31315.4165633023 0.851975287123417 0.231894803654721 6 | 2^9 16697.1249855123 16697.1249855123 39031.6665288992 55970.4165789299 1.43397455339216 0.427784065360102 7 | 2^10 32786.5833241958 32786.5833241958 45149.1665444337 112370.416463818 2.48887022871682 0.726183578426167 8 | 2^11 64416.1249801982 64416.1249801982 45887.500164099 220804.583223071 4.81186777299805 1.4037837047091 9 | 2^12 125048.66666859 125048.66666859 52858.7499866262 439140.416565351 8.30780933481132 2.36571365573777 10 | 2^13 250193.708343431 250193.708343431 68176.6666821204 872196.666896343 12.7931843743995 3.66978499418256 11 | 2^14 500049.624999519 500049.624999519 90087.4999933876 1745251.66657986 19.3728504699094 5.55071042082667 12 | 2^15 1005838.62498752 1005838.62498752 116369.583120104 3494683.33326513 30.0309001679442 8.64348395877127 13 | 2^16 1997907.37501462 1997907.37501462 235795.833577868 6966099.16660236 29.5429272896882 8.47303934382218 14 | 2^17 3995306.41667661 3995306.41667661 423624.583345372 13980710.4167994 33.0025946709548 9.4312430716971 15 | 2^18 7995071.99999061 7995071.99999061 592910.83336575 28068359.1665002 47.3399330674493 13.4844424322716 16 | 2^19 16004321.7083148 16004321.7083148 1100254.58328892 57159315.0001718 51.9509901329467 14.5460168504586 17 | 2^20 31964094.5833526 31964094.5833526 1919497.50005733 117858820.833499 61.4008722751545 16.6523241537943 18 | 2^21 63943138.4583295 63943138.4583295 3291106.24996247 233642984.166509 70.9922337418225 19.4290714433965 19 | 2^22 128008848.416677 128008848.416677 6850174.58337825 467631814.999913 68.265678386447 18.6869468593234 20 | -------------------------------------------------------------------------------- /simplebenchmark/accelerate_metal_cpu_benchmark_iphone5s.txt: -------------------------------------------------------------------------------- 1 | ############################################# 2 | ==> count = 32 - 2^5 3 | filling array took 13550.1666809432 microseconds 4 | Accelerate approach took 13550.1666809432 microseconds 5 | 2014-12-17 09:57:56.348 SwiftMetalProcessing[2321:687860] Metal GPU Frame Capture Enabled 6 | 2014-12-17 09:57:56.349 SwiftMetalProcessing[2321:687860] Metal API Validation Enabled 7 | cold GPU: runtime in microsecs : 3536931.66689482 8 | CPU: runtime in microsecs : 21856.6667172126 9 | Metal was 0.00617955583416778 times faster than CPU 10 | Metal was 0.00383105130578882 times faster than Accelerate Framework 11 | ############################################# 12 | ==> count = 64 - 2^6 13 | filling array took 2996.79167801514 microseconds 14 | Accelerate approach took 2996.79167801514 microseconds 15 | cold GPU: runtime in microsecs : 41439.1668164171 16 | CPU: runtime in microsecs : 8325.83318697289 17 | Metal was 0.200917002599444 times faster than CPU 18 | Metal was 0.0723178555035014 times faster than Accelerate Framework 19 | ############################################# 20 | ==> count = 128 - 2^7 21 | filling array took 4736.54165398329 microseconds 22 | Accelerate approach took 4736.54165398329 microseconds 23 | cold GPU: runtime in microsecs : 33519.5835214108 24 | CPU: runtime in microsecs : 15182.5000648387 25 | Metal was 0.452944173818293 times faster than CPU 26 | Metal was 0.141306697649086 times faster than Accelerate Framework 27 | ############################################# 28 | ==> count = 256 - 2^8 29 | filling array took 8523.58335396275 microseconds 30 | Accelerate approach took 8523.58335396275 microseconds 31 | cold GPU: runtime in microsecs : 36756.2499013729 32 | CPU: runtime in microsecs : 31315.4165633023 33 | Metal was 0.851975287123417 times faster than CPU 34 | Metal was 0.231894803654721 times faster than Accelerate Framework 35 | ############################################# 36 | ==> count = 512 - 2^9 37 | filling array took 16697.1249855123 microseconds 38 | Accelerate approach took 16697.1249855123 microseconds 39 | cold GPU: runtime in microsecs : 39031.6665288992 40 | CPU: runtime in microsecs : 55970.4165789299 41 | Metal was 1.43397455339216 times faster than CPU 42 | Metal was 0.427784065360102 times faster than Accelerate Framework 43 | ############################################# 44 | ==> count = 1024 - 2^10 45 | filling array took 32786.5833241958 microseconds 46 | Accelerate approach took 32786.5833241958 microseconds 47 | cold GPU: runtime in microsecs : 45149.1665444337 48 | CPU: runtime in microsecs : 112370.416463818 49 | Metal was 2.48887022871682 times faster than CPU 50 | Metal was 0.726183578426167 times faster than Accelerate Framework 51 | ############################################# 52 | ==> count = 2048 - 2^11 53 | filling array took 64416.1249801982 microseconds 54 | Accelerate approach took 64416.1249801982 microseconds 55 | cold GPU: runtime in microsecs : 45887.500164099 56 | CPU: runtime in microsecs : 220804.583223071 57 | Metal was 4.81186777299805 times faster than CPU 58 | Metal was 1.4037837047091 times faster than Accelerate Framework 59 | ############################################# 60 | ==> count = 4096 - 2^12 61 | filling array took 125048.66666859 microseconds 62 | Accelerate approach took 125048.66666859 microseconds 63 | cold GPU: runtime in microsecs : 52858.7499866262 64 | CPU: runtime in microsecs : 439140.416565351 65 | Metal was 8.30780933481132 times faster than CPU 66 | Metal was 2.36571365573777 times faster than Accelerate Framework 67 | ############################################# 68 | ==> count = 8192 - 2^13 69 | filling array took 250193.708343431 microseconds 70 | Accelerate approach took 250193.708343431 microseconds 71 | cold GPU: runtime in microsecs : 68176.6666821204 72 | CPU: runtime in microsecs : 872196.666896343 73 | Metal was 12.7931843743995 times faster than CPU 74 | Metal was 3.66978499418256 times faster than Accelerate Framework 75 | ############################################# 76 | ==> count = 16384 - 2^14 77 | filling array took 500049.624999519 microseconds 78 | Accelerate approach took 500049.624999519 microseconds 79 | cold GPU: runtime in microsecs : 90087.4999933876 80 | CPU: runtime in microsecs : 1745251.66657986 81 | Metal was 19.3728504699094 times faster than CPU 82 | Metal was 5.55071042082667 times faster than Accelerate Framework 83 | ############################################# 84 | ==> count = 32768 - 2^15 85 | filling array took 1005838.62498752 microseconds 86 | Accelerate approach took 1005838.62498752 microseconds 87 | cold GPU: runtime in microsecs : 116369.583120104 88 | CPU: runtime in microsecs : 3494683.33326513 89 | Metal was 30.0309001679442 times faster than CPU 90 | Metal was 8.64348395877127 times faster than Accelerate Framework 91 | ############################################# 92 | ==> count = 65536 - 2^16 93 | filling array took 1997907.37501462 microseconds 94 | Accelerate approach took 1997907.37501462 microseconds 95 | cold GPU: runtime in microsecs : 235795.833577868 96 | CPU: runtime in microsecs : 6966099.16660236 97 | Metal was 29.5429272896882 times faster than CPU 98 | Metal was 8.47303934382218 times faster than Accelerate Framework 99 | ############################################# 100 | ==> count = 131072 - 2^17 101 | filling array took 3995306.41667661 microseconds 102 | Accelerate approach took 3995306.41667661 microseconds 103 | cold GPU: runtime in microsecs : 423624.583345372 104 | CPU: runtime in microsecs : 13980710.4167994 105 | Metal was 33.0025946709548 times faster than CPU 106 | Metal was 9.4312430716971 times faster than Accelerate Framework 107 | ############################################# 108 | ==> count = 262144 - 2^18 109 | filling array took 7995071.99999061 microseconds 110 | Accelerate approach took 7995071.99999061 microseconds 111 | cold GPU: runtime in microsecs : 592910.83336575 112 | CPU: runtime in microsecs : 28068359.1665002 113 | Metal was 47.3399330674493 times faster than CPU 114 | Metal was 13.4844424322716 times faster than Accelerate Framework 115 | ############################################# 116 | ==> count = 524288 - 2^19 117 | filling array took 16004321.7083148 microseconds 118 | Accelerate approach took 16004321.7083148 microseconds 119 | cold GPU: runtime in microsecs : 1100254.58328892 120 | CPU: runtime in microsecs : 57159315.0001718 121 | Metal was 51.9509901329467 times faster than CPU 122 | Metal was 14.5460168504586 times faster than Accelerate Framework 123 | ############################################# 124 | ==> count = 1048576 - 2^20 125 | filling array took 31964094.5833526 microseconds 126 | Accelerate approach took 31964094.5833526 microseconds 127 | cold GPU: runtime in microsecs : 1919497.50005733 128 | CPU: runtime in microsecs : 117858820.833499 129 | Metal was 61.4008722751545 times faster than CPU 130 | Metal was 16.6523241537943 times faster than Accelerate Framework 131 | ############################################# 132 | ==> count = 2097152 - 2^21 133 | filling array took 63943138.4583295 microseconds 134 | Accelerate approach took 63943138.4583295 microseconds 135 | cold GPU: runtime in microsecs : 3291106.24996247 136 | CPU: runtime in microsecs : 233642984.166509 137 | Metal was 70.9922337418225 times faster than CPU 138 | Metal was 19.4290714433965 times faster than Accelerate Framework 139 | ############################################# 140 | ==> count = 4194304 - 2^22 141 | filling array took 128008848.416677 microseconds 142 | Accelerate approach took 128008848.416677 microseconds 143 | cold GPU: runtime in microsecs : 6850174.58337825 144 | CPU: runtime in microsecs : 467631814.999913 145 | Metal was 68.265678386447 times faster than CPU 146 | Metal was 18.6869468593234 times faster than Accelerate Framework 147 | 148 | -------------------------------------------------------------------------------- /simplebenchmark/accelerate_metal_cpu_benchmark_iphone6.tsv: -------------------------------------------------------------------------------- 1 | vectorsize as 2^x array filltime (microsec) accelerate time (microsec) metal gpu time (microsec) cpu time (microsec) metal relative to cpu metal relative to accelerate 2 | 2^5 1652.62500013341 1652.62500013341 3308845.00002867 3933.33335523494 0.00118873303379302 0.000499456759116578 3 | 2^6 1621.41666805837 1621.41666805837 33446.6666754452 6184.1666320106 0.184896351317146 0.0484776759308196 4 | 2^7 3115.0416652963 3115.0416652963 29509.5833280357 11594.5833385922 0.392909083456178 0.105560340539843 5 | 2^8 6057.0416644623 6057.0416644623 28414.9999788497 17078.3333669533 0.60103232024161 0.213163528733795 6 | 2^9 9223.50000109873 9223.50000109873 27047.5000070292 35475.4166983184 1.3115968828579 0.341011183980098 7 | 2^10 18212.4583334371 18212.4583334371 35182.0833384409 66058.7499805843 1.87762473714587 0.517662872838963 8 | 2^11 35260.7916684065 35260.7916684065 33529.1666488047 133403.333347815 3.97872499323124 1.05164533427685 9 | 2^12 71741.3333331933 71741.3333331933 39214.5833393442 265704.166668002 6.77564681406213 1.82945545315063 10 | 2^13 139972.999997553 139972.999997553 47535.8333278564 515853.333345149 10.8518836682065 2.94457865148076 11 | 2^14 271856.208335521 271856.208335521 57735.8333248412 1036336.24999929 17.9496196784503 4.70862188488675 12 | 2^15 543090.583334561 543090.583334561 84529.1666337289 2065017.49999006 24.4296446093919 6.42488983344426 13 | 2^16 1088027.37500082 1088027.37500082 131122.499988123 4135187.08333868 31.5368230754694 8.29779309500178 14 | 2^17 2201753.54166713 2201753.54166713 232880.416660919 8269347.08333283 35.5089844045292 9.45443834752732 15 | 2^18 4349729.74999982 4349729.74999982 430693.333328236 16555786.6666495 38.4398489261782 10.0993663319252 16 | 2^19 8710635.45833022 8710635.45833022 622455.000011541 33678772.9166826 54.1063577544693 13.9940003023009 17 | 2^20 17406665.3750015 17406665.3750015 1030270.41666792 67975189.166682 65.978007392007 16.8952394375234 18 | 2^21 34811557.9583318 34811557.9583318 1869345.83332913 137288455.833332 73.4419781431419 18.6223208876956 19 | 2^22 70102324.4583339 70102324.4583339 3652484.16666873 277283521.249992 75.9164197836593 19.193053620345 20 | -------------------------------------------------------------------------------- /simplebenchmark/accelerate_metal_cpu_benchmark_iphone6.txt: -------------------------------------------------------------------------------- 1 | ############################################# 2 | ==> count = 32 - 2^5 3 | filling array took 1652.62500013341 microseconds 4 | Accelerate approach took 1652.62500013341 microseconds 5 | 2014-12-17 13:34:00.846 SwiftMetalProcessing[486:243333] Metal GPU Frame Capture Enabled 6 | 2014-12-17 13:34:00.847 SwiftMetalProcessing[486:243333] Metal API Validation Enabled 7 | cold GPU: runtime in microsecs : 3308845.00002867 8 | CPU: runtime in microsecs : 3933.33335523494 9 | Metal was 0.00118873303379302 times faster than CPU 10 | Metal was 0.000499456759116578 times faster than Accelerate Framework 11 | ############################################# 12 | ==> count = 64 - 2^6 13 | filling array took 1621.41666805837 microseconds 14 | Accelerate approach took 1621.41666805837 microseconds 15 | cold GPU: runtime in microsecs : 33446.6666754452 16 | CPU: runtime in microsecs : 6184.1666320106 17 | Metal was 0.184896351317146 times faster than CPU 18 | Metal was 0.0484776759308196 times faster than Accelerate Framework 19 | ############################################# 20 | ==> count = 128 - 2^7 21 | filling array took 3115.0416652963 microseconds 22 | Accelerate approach took 3115.0416652963 microseconds 23 | cold GPU: runtime in microsecs : 29509.5833280357 24 | CPU: runtime in microsecs : 11594.5833385922 25 | Metal was 0.392909083456178 times faster than CPU 26 | Metal was 0.105560340539843 times faster than Accelerate Framework 27 | ############################################# 28 | ==> count = 256 - 2^8 29 | filling array took 6057.0416644623 microseconds 30 | Accelerate approach took 6057.0416644623 microseconds 31 | cold GPU: runtime in microsecs : 28414.9999788497 32 | CPU: runtime in microsecs : 17078.3333669533 33 | Metal was 0.60103232024161 times faster than CPU 34 | Metal was 0.213163528733795 times faster than Accelerate Framework 35 | ############################################# 36 | ==> count = 512 - 2^9 37 | filling array took 9223.50000109873 microseconds 38 | Accelerate approach took 9223.50000109873 microseconds 39 | cold GPU: runtime in microsecs : 27047.5000070292 40 | CPU: runtime in microsecs : 35475.4166983184 41 | Metal was 1.3115968828579 times faster than CPU 42 | Metal was 0.341011183980098 times faster than Accelerate Framework 43 | ############################################# 44 | ==> count = 1024 - 2^10 45 | filling array took 18212.4583334371 microseconds 46 | Accelerate approach took 18212.4583334371 microseconds 47 | cold GPU: runtime in microsecs : 35182.0833384409 48 | CPU: runtime in microsecs : 66058.7499805843 49 | Metal was 1.87762473714587 times faster than CPU 50 | Metal was 0.517662872838963 times faster than Accelerate Framework 51 | ############################################# 52 | ==> count = 2048 - 2^11 53 | filling array took 35260.7916684065 microseconds 54 | Accelerate approach took 35260.7916684065 microseconds 55 | cold GPU: runtime in microsecs : 33529.1666488047 56 | CPU: runtime in microsecs : 133403.333347815 57 | Metal was 3.97872499323124 times faster than CPU 58 | Metal was 1.05164533427685 times faster than Accelerate Framework 59 | ############################################# 60 | ==> count = 4096 - 2^12 61 | filling array took 71741.3333331933 microseconds 62 | Accelerate approach took 71741.3333331933 microseconds 63 | cold GPU: runtime in microsecs : 39214.5833393442 64 | CPU: runtime in microsecs : 265704.166668002 65 | Metal was 6.77564681406213 times faster than CPU 66 | Metal was 1.82945545315063 times faster than Accelerate Framework 67 | ############################################# 68 | ==> count = 8192 - 2^13 69 | filling array took 139972.999997553 microseconds 70 | Accelerate approach took 139972.999997553 microseconds 71 | cold GPU: runtime in microsecs : 47535.8333278564 72 | CPU: runtime in microsecs : 515853.333345149 73 | Metal was 10.8518836682065 times faster than CPU 74 | Metal was 2.94457865148076 times faster than Accelerate Framework 75 | ############################################# 76 | ==> count = 16384 - 2^14 77 | filling array took 271856.208335521 microseconds 78 | Accelerate approach took 271856.208335521 microseconds 79 | cold GPU: runtime in microsecs : 57735.8333248412 80 | CPU: runtime in microsecs : 1036336.24999929 81 | Metal was 17.9496196784503 times faster than CPU 82 | Metal was 4.70862188488675 times faster than Accelerate Framework 83 | ############################################# 84 | ==> count = 32768 - 2^15 85 | filling array took 543090.583334561 microseconds 86 | Accelerate approach took 543090.583334561 microseconds 87 | cold GPU: runtime in microsecs : 84529.1666337289 88 | CPU: runtime in microsecs : 2065017.49999006 89 | Metal was 24.4296446093919 times faster than CPU 90 | Metal was 6.42488983344426 times faster than Accelerate Framework 91 | ############################################# 92 | ==> count = 65536 - 2^16 93 | filling array took 1088027.37500082 microseconds 94 | Accelerate approach took 1088027.37500082 microseconds 95 | cold GPU: runtime in microsecs : 131122.499988123 96 | CPU: runtime in microsecs : 4135187.08333868 97 | Metal was 31.5368230754694 times faster than CPU 98 | Metal was 8.29779309500178 times faster than Accelerate Framework 99 | ############################################# 100 | ==> count = 131072 - 2^17 101 | filling array took 2201753.54166713 microseconds 102 | Accelerate approach took 2201753.54166713 microseconds 103 | cold GPU: runtime in microsecs : 232880.416660919 104 | CPU: runtime in microsecs : 8269347.08333283 105 | Metal was 35.5089844045292 times faster than CPU 106 | Metal was 9.45443834752732 times faster than Accelerate Framework 107 | ############################################# 108 | ==> count = 262144 - 2^18 109 | filling array took 4349729.74999982 microseconds 110 | Accelerate approach took 4349729.74999982 microseconds 111 | cold GPU: runtime in microsecs : 430693.333328236 112 | CPU: runtime in microsecs : 16555786.6666495 113 | Metal was 38.4398489261782 times faster than CPU 114 | Metal was 10.0993663319252 times faster than Accelerate Framework 115 | ############################################# 116 | ==> count = 524288 - 2^19 117 | filling array took 8710635.45833022 microseconds 118 | Accelerate approach took 8710635.45833022 microseconds 119 | cold GPU: runtime in microsecs : 622455.000011541 120 | CPU: runtime in microsecs : 33678772.9166826 121 | Metal was 54.1063577544693 times faster than CPU 122 | Metal was 13.9940003023009 times faster than Accelerate Framework 123 | ############################################# 124 | ==> count = 1048576 - 2^20 125 | filling array took 17406665.3750015 microseconds 126 | Accelerate approach took 17406665.3750015 microseconds 127 | cold GPU: runtime in microsecs : 1030270.41666792 128 | CPU: runtime in microsecs : 67975189.166682 129 | Metal was 65.978007392007 times faster than CPU 130 | Metal was 16.8952394375234 times faster than Accelerate Framework 131 | ############################################# 132 | ==> count = 2097152 - 2^21 133 | filling array took 34811557.9583318 microseconds 134 | Accelerate approach took 34811557.9583318 microseconds 135 | cold GPU: runtime in microsecs : 1869345.83332913 136 | CPU: runtime in microsecs : 137288455.833332 137 | Metal was 73.4419781431419 times faster than CPU 138 | Metal was 18.6223208876956 times faster than Accelerate Framework 139 | ############################################# 140 | ==> count = 4194304 - 2^22 141 | filling array took 70102324.4583339 microseconds 142 | Accelerate approach took 70102324.4583339 microseconds 143 | cold GPU: runtime in microsecs : 3652484.16666873 144 | CPU: runtime in microsecs : 277283521.249992 145 | Metal was 75.9164197836593 times faster than CPU 146 | Metal was 19.193053620345 times faster than Accelerate Framework 147 | -------------------------------------------------------------------------------- /simplebenchmark/accelerate_metal_cpu_benchmark_iphone6_only_metal_withoutloading.tsv: -------------------------------------------------------------------------------- 1 | vectorsize as 2^x array filltime (microsec) accelerate time (microsec) metal gpu time (microsec) cpu time (microsec) metal relative to cpu metal relative to accelerate 2 | 2^5 9679.37500172411 9679.37500172411 47747.0833357074 9575.00000367872 0.200535809409705 0.202721806768152 3 | 2^6 2140.91666566674 2140.91666566674 9937.49999906868 7852.08332672482 0.790146749933153 0.215438155055837 4 | 2^7 3249.83333484852 3249.83333484852 9840.83333605668 11547.4999984144 1.17342704668156 0.330239647788889 5 | 2^8 6147.41666504415 6147.41666504415 9977.91667032288 26245.4166659154 2.63035035599933 0.616102225360158 6 | 2^9 13342.5833300862 13342.5833300862 11144.1666376777 50528.3333041007 4.5340611771963 1.19727062272883 7 | 2^10 20514.1250007728 20514.1250007728 16191.6666911566 71195.833334059 4.3970663855713 1.26695573668009 8 | 2^11 39458.7500013586 39458.7500013586 15717.5000276766 146840.416673513 9.34247917384729 2.5104978483777 9 | 2^12 78131.4166670199 78131.4166670199 17480.8333395049 296393.333337619 16.955332024578 4.46954759819436 10 | 2^13 160230.083332863 160230.083332863 22657.9166701413 560247.083303693 24.726328173057 7.07170414939405 11 | 2^14 296013.291666895 296013.291666895 29184.9999848637 1153362.08335066 39.5190023624749 10.1426517670179 12 | 2^15 599794.70833423 599794.70833423 47040.4166844673 2233408.33334078 47.4784980822299 12.7506249010817 13 | 2^16 1157516.79166715 1157516.79166715 79556.6666783998 4597166.24998691 57.7848022287123 14.5495888653342 14 | 2^17 2401103.24999841 2401103.24999841 148487.083351938 9105070.41669916 61.3189390697283 16.1704519733034 15 | 2^18 4727311.45833313 4727311.45833313 80742.4999948125 18266935.4166865 226.2369312055 58.5479946575453 16 | 2^19 9405677.75000091 9405677.75000091 281003.333329863 36045133.3333185 128.27297422485 33.4717657564574 17 | 2^20 18823093.7916669 18823093.7916669 149227.083347796 69225646.2500154 463.894654354898 126.137249146637 18 | 2^21 35216173.6666676 35216173.6666676 349229.583334818 139922095.000002 400.659341811412 100.839606228046 19 | 2^22 70353440.750001 70353440.750001 437294.16669521 281912080.000002 644.673772189813 160.88355644368 20 | -------------------------------------------------------------------------------- /simplebenchmark/accelerate_metal_cpu_benchmark_iphone6_only_metal_withoutloading.txt: -------------------------------------------------------------------------------- 1 | ############################################# 2 | ==> count = 32 - 2^5 3 | filling array took 9679.37500172411 microseconds 4 | Accelerate approach took 9679.37500172411 microseconds 5 | 2014-12-17 13:51:40.694 SwiftMetalProcessing[520:249237] Metal GPU Frame Capture Enabled 6 | 2014-12-17 13:51:40.695 SwiftMetalProcessing[520:249237] Metal API Validation Enabled 7 | cold GPU: runtime in microsecs : 47747.0833357074 8 | CPU: runtime in microsecs : 9575.00000367872 9 | Metal was 0.200535809409705 times faster than CPU 10 | Metal was 0.202721806768152 times faster than Accelerate Framework 11 | ############################################# 12 | ==> count = 64 - 2^6 13 | filling array took 2140.91666566674 microseconds 14 | Accelerate approach took 2140.91666566674 microseconds 15 | cold GPU: runtime in microsecs : 9937.49999906868 16 | CPU: runtime in microsecs : 7852.08332672482 17 | Metal was 0.790146749933153 times faster than CPU 18 | Metal was 0.215438155055837 times faster than Accelerate Framework 19 | ############################################# 20 | ==> count = 128 - 2^7 21 | filling array took 3249.83333484852 microseconds 22 | Accelerate approach took 3249.83333484852 microseconds 23 | cold GPU: runtime in microsecs : 9840.83333605668 24 | CPU: runtime in microsecs : 11547.4999984144 25 | Metal was 1.17342704668156 times faster than CPU 26 | Metal was 0.330239647788889 times faster than Accelerate Framework 27 | ############################################# 28 | ==> count = 256 - 2^8 29 | filling array took 6147.41666504415 microseconds 30 | Accelerate approach took 6147.41666504415 microseconds 31 | cold GPU: runtime in microsecs : 9977.91667032288 32 | CPU: runtime in microsecs : 26245.4166659154 33 | Metal was 2.63035035599933 times faster than CPU 34 | Metal was 0.616102225360158 times faster than Accelerate Framework 35 | ############################################# 36 | ==> count = 512 - 2^9 37 | filling array took 13342.5833300862 microseconds 38 | Accelerate approach took 13342.5833300862 microseconds 39 | cold GPU: runtime in microsecs : 11144.1666376777 40 | CPU: runtime in microsecs : 50528.3333041007 41 | Metal was 4.5340611771963 times faster than CPU 42 | Metal was 1.19727062272883 times faster than Accelerate Framework 43 | ############################################# 44 | ==> count = 1024 - 2^10 45 | filling array took 20514.1250007728 microseconds 46 | Accelerate approach took 20514.1250007728 microseconds 47 | cold GPU: runtime in microsecs : 16191.6666911566 48 | CPU: runtime in microsecs : 71195.833334059 49 | Metal was 4.3970663855713 times faster than CPU 50 | Metal was 1.26695573668009 times faster than Accelerate Framework 51 | ############################################# 52 | ==> count = 2048 - 2^11 53 | filling array took 39458.7500013586 microseconds 54 | Accelerate approach took 39458.7500013586 microseconds 55 | cold GPU: runtime in microsecs : 15717.5000276766 56 | CPU: runtime in microsecs : 146840.416673513 57 | Metal was 9.34247917384729 times faster than CPU 58 | Metal was 2.5104978483777 times faster than Accelerate Framework 59 | ############################################# 60 | ==> count = 4096 - 2^12 61 | filling array took 78131.4166670199 microseconds 62 | Accelerate approach took 78131.4166670199 microseconds 63 | cold GPU: runtime in microsecs : 17480.8333395049 64 | CPU: runtime in microsecs : 296393.333337619 65 | Metal was 16.955332024578 times faster than CPU 66 | Metal was 4.46954759819436 times faster than Accelerate Framework 67 | ############################################# 68 | ==> count = 8192 - 2^13 69 | filling array took 160230.083332863 microseconds 70 | Accelerate approach took 160230.083332863 microseconds 71 | cold GPU: runtime in microsecs : 22657.9166701413 72 | CPU: runtime in microsecs : 560247.083303693 73 | Metal was 24.726328173057 times faster than CPU 74 | Metal was 7.07170414939405 times faster than Accelerate Framework 75 | ############################################# 76 | ==> count = 16384 - 2^14 77 | filling array took 296013.291666895 microseconds 78 | Accelerate approach took 296013.291666895 microseconds 79 | cold GPU: runtime in microsecs : 29184.9999848637 80 | CPU: runtime in microsecs : 1153362.08335066 81 | Metal was 39.5190023624749 times faster than CPU 82 | Metal was 10.1426517670179 times faster than Accelerate Framework 83 | ############################################# 84 | ==> count = 32768 - 2^15 85 | filling array took 599794.70833423 microseconds 86 | Accelerate approach took 599794.70833423 microseconds 87 | cold GPU: runtime in microsecs : 47040.4166844673 88 | CPU: runtime in microsecs : 2233408.33334078 89 | Metal was 47.4784980822299 times faster than CPU 90 | Metal was 12.7506249010817 times faster than Accelerate Framework 91 | ############################################# 92 | ==> count = 65536 - 2^16 93 | filling array took 1157516.79166715 microseconds 94 | Accelerate approach took 1157516.79166715 microseconds 95 | cold GPU: runtime in microsecs : 79556.6666783998 96 | CPU: runtime in microsecs : 4597166.24998691 97 | Metal was 57.7848022287123 times faster than CPU 98 | Metal was 14.5495888653342 times faster than Accelerate Framework 99 | ############################################# 100 | ==> count = 131072 - 2^17 101 | filling array took 2401103.24999841 microseconds 102 | Accelerate approach took 2401103.24999841 microseconds 103 | cold GPU: runtime in microsecs : 148487.083351938 104 | CPU: runtime in microsecs : 9105070.41669916 105 | Metal was 61.3189390697283 times faster than CPU 106 | Metal was 16.1704519733034 times faster than Accelerate Framework 107 | ############################################# 108 | ==> count = 262144 - 2^18 109 | filling array took 4727311.45833313 microseconds 110 | Accelerate approach took 4727311.45833313 microseconds 111 | cold GPU: runtime in microsecs : 80742.4999948125 112 | CPU: runtime in microsecs : 18266935.4166865 113 | Metal was 226.2369312055 times faster than CPU 114 | Metal was 58.5479946575453 times faster than Accelerate Framework 115 | ############################################# 116 | ==> count = 524288 - 2^19 117 | filling array took 9405677.75000091 microseconds 118 | Accelerate approach took 9405677.75000091 microseconds 119 | cold GPU: runtime in microsecs : 281003.333329863 120 | CPU: runtime in microsecs : 36045133.3333185 121 | Metal was 128.27297422485 times faster than CPU 122 | Metal was 33.4717657564574 times faster than Accelerate Framework 123 | ############################################# 124 | ==> count = 1048576 - 2^20 125 | filling array took 18823093.7916669 microseconds 126 | Accelerate approach took 18823093.7916669 microseconds 127 | cold GPU: runtime in microsecs : 149227.083347796 128 | CPU: runtime in microsecs : 69225646.2500154 129 | Metal was 463.894654354898 times faster than CPU 130 | Metal was 126.137249146637 times faster than Accelerate Framework 131 | ############################################# 132 | ==> count = 2097152 - 2^21 133 | filling array took 35216173.6666676 microseconds 134 | Accelerate approach took 35216173.6666676 microseconds 135 | cold GPU: runtime in microsecs : 349229.583334818 136 | CPU: runtime in microsecs : 139922095.000002 137 | Metal was 400.659341811412 times faster than CPU 138 | Metal was 100.839606228046 times faster than Accelerate Framework 139 | ############################################# 140 | ==> count = 4194304 - 2^22 141 | filling array took 70353440.750001 microseconds 142 | Accelerate approach took 70353440.750001 microseconds 143 | cold GPU: runtime in microsecs : 437294.16669521 144 | CPU: runtime in microsecs : 281912080.000002 145 | Metal was 644.673772189813 times faster than CPU 146 | Metal was 160.88355644368 times faster than Accelerate Framework 147 | -------------------------------------------------------------------------------- /simplebenchmark/analyze_with_pandas.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | import numpy 3 | import sys 4 | import numpy as np 5 | import matplotlib as mpl 6 | #mpl.use('Agg') # alternative without display - http://stackoverflow.com/questions/4931376/generating-matplotlib-graphs-without-a-running-x-server 7 | import matplotlib.pyplot as plt 8 | 9 | filename = sys.argv[1] 10 | 11 | d = pandas.read_table(filename, sep="\t") 12 | 13 | # descriptive statistics 14 | print d.describe() 15 | 16 | print d.keys() 17 | 18 | print type(d['vectorsize as 2^x']) 19 | 20 | #plt.scatter(d['vectorsize as 2^x'].values, ) 21 | 22 | print d['metal gpu time (microsec)'] 23 | 24 | import math 25 | ts = pandas.Series(np.random.randn(1000), index=pandas.date_range('1/1/2000', periods=1000)) 26 | df = pandas.DataFrame(np.random.randn(1000, 4), index=ts.index, columns=list('ABCD')) 27 | df = df.cumsum() 28 | plt.figure(); df.plot(); 29 | 30 | df = pandas.Series(d['metal relative to accelerate'], index=d['vectorsize as 2^x']) 31 | 32 | print df 33 | 34 | plt.ioff() # interactive mode off 35 | plt.figure() 36 | plt.grid(True) 37 | #plt.cumsum() 38 | plt.plot(x=d['vectorsize as 2^x'], y=d['metal relative to accelerate']) 39 | plt.show() 40 | plt.savefig("yoda.png") 41 | plt.close() 42 | -------------------------------------------------------------------------------- /simplebenchmark/benchmark.txt: -------------------------------------------------------------------------------- 1 | A) number of elements: 123456 2 | 3 | 2014-12-16 12:22:12.307 SwiftMetalProcessing[1660:497840] Metal GPU Frame Capture Enabled 4 | 2014-12-16 12:22:12.309 SwiftMetalProcessing[1660:497840] Metal API Validation Enabled 5 | cold GPU: runtime in microsecs : 185110.416787211 6 | 0.5 7 | CPU: runtime in microsecs : 47843169.1665901 8 | relativespeed = 258.457465533055 9 | 10 | B) number of elements: 1234567 11 | 12 | filling array took 37754876.9166606 microseconds 13 | 2014-12-16 12:48:12.529 SwiftMetalProcessing[1674:503863] Metal GPU Frame Capture Enabled 14 | 2014-12-16 12:48:12.531 SwiftMetalProcessing[1674:503863] Metal API Validation Enabled 15 | cold GPU: runtime in microsecs : 659326.249879086 16 | 0.5 17 | CPU: runtime in microsecs : 483907755.833352 18 | relativespeed = 733.942802856849 19 | 20 | C) number of elements: 12345 21 | filling array took 392090.833338443 microseconds 22 | 2014-12-16 13:00:48.332 SwiftMetalProcessing[1683:506570] Metal GPU Frame Capture Enabled 23 | 2014-12-16 13:00:48.333 SwiftMetalProcessing[1683:506570] Metal API Validation Enabled 24 | cold GPU: runtime in microsecs : 69997.0831919927 25 | 0.5 26 | CPU: runtime in microsecs : 5653783.33325498 27 | relativespeed = 80.7716989827619 28 | 29 | D) number of elements: 1234 30 | filling array took 45910.2916793199 microseconds 31 | 2014-12-16 13:01:11.848 SwiftMetalProcessing[1690:506887] Metal GPU Frame Capture Enabled 32 | 2014-12-16 13:01:11.849 SwiftMetalProcessing[1690:506887] Metal API Validation Enabled 33 | cold GPU: runtime in microsecs : 51162.0834004134 34 | 0.5 35 | CPU: runtime in microsecs : 533953.750127694 36 | relativespeed = 10.4365130315115 37 | 38 | E) number of elements: 123 39 | filling array took 10645.3750049695 microseconds 40 | 2014-12-16 13:01:33.749 SwiftMetalProcessing[1697:507169] Metal GPU Frame Capture Enabled 41 | 2014-12-16 13:01:33.749 SwiftMetalProcessing[1697:507169] Metal API Validation Enabled 42 | cold GPU: runtime in microsecs : 43723.7499863841 43 | 0.5 44 | CPU: runtime in microsecs : 49662.0833291672 45 | relativespeed = 1.13581482248509 46 | 47 | F) number of elements: 12 48 | filling array took 6889.83334111981 microseconds 49 | 2014-12-16 13:01:56.817 SwiftMetalProcessing[1704:507447] Metal GPU Frame Capture Enabled 50 | 2014-12-16 13:01:56.818 SwiftMetalProcessing[1704:507447] Metal API Validation Enabled 51 | cold GPU: runtime in microsecs : 46259.1667019296 52 | 0.5 53 | CPU: runtime in microsecs : 5571.25007617287 54 | relativespeed = 0.1204355908975 55 | 56 | G) number of elements: 1 57 | filling array took 6692.37499823794 microseconds 58 | 2014-12-16 13:02:30.052 SwiftMetalProcessing[1711:507785] Metal GPU Frame Capture Enabled 59 | 2014-12-16 13:02:30.052 SwiftMetalProcessing[1711:507785] Metal API Validation Enabled 60 | cold GPU: runtime in microsecs : 45577.9167532455 61 | 0.5 62 | CPU: runtime in microsecs : 900.000013643876 63 | relativespeed = 0.0197464052276981 64 | 65 | H) number of elements: 10000000 (10 million) 66 | filling array took 305784797.541666 microseconds 67 | 2014-12-16 13:10:26.122 SwiftMetalProcessing[1727:508964] Metal GPU Frame Capture Enabled 68 | 2014-12-16 13:10:26.123 SwiftMetalProcessing[1727:508964] Metal API Validation Enabled 69 | cold GPU: runtime in microsecs : 3179862.08334332 70 | 0.5 71 | CPU: runtime in microsecs : 3908226572.91676 72 | relativespeed = 1229.0553711083 73 | 74 | I) number of elements: 1000000 (1 million) 75 | filling array took 30580217.2916738 microseconds 76 | 2014-12-16 13:03:42.253 SwiftMetalProcessing[1719:508194] Metal GPU Frame Capture Enabled 77 | 2014-12-16 13:03:42.254 SwiftMetalProcessing[1719:508194] Metal API Validation Enabled 78 | cold GPU: runtime in microsecs : 495616.249972954 79 | 0.5 80 | CPU: runtime in microsecs : 390826717.500022 81 | relativespeed = 788.567197950731 82 | 83 | J) number of elements: 100000 (hundred thousand) 84 | filling array took 3021321.2499948 microseconds 85 | 2014-12-16 13:19:05.970 SwiftMetalProcessing[1735:511510] Metal GPU Frame Capture Enabled 86 | 2014-12-16 13:19:05.970 SwiftMetalProcessing[1735:511510] Metal API Validation Enabled 87 | cold GPU: runtime in microsecs : 169442.083279137 88 | 0.5 89 | CPU: runtime in microsecs : 39302068.3333452 90 | relativespeed = 231.949864949427 91 | 92 | K) number of elements: 10000 (ten thousand) 93 | filling array took 318652.583329822 microseconds 94 | 2014-12-16 13:19:57.680 SwiftMetalProcessing[1742:511960] Metal GPU Frame Capture Enabled 95 | 2014-12-16 13:19:57.681 SwiftMetalProcessing[1742:511960] Metal API Validation Enabled 96 | cold GPU: runtime in microsecs : 61870.0000632089 97 | 0.5 98 | CPU: runtime in microsecs : 4451359.16656 99 | relativespeed = 71.9469720706693 100 | ++++++++++++ 101 | 102 | new metrics, very conservative, from before configuring gpu to after receiving data from it 103 | 104 | N = 100000 (hundred thousand) 105 | filling array took 3066978.62500732 microseconds 106 | 2014-12-16 13:34:18.780 SwiftMetalProcessing[1792:516455] Metal GPU Frame Capture Enabled 107 | 2014-12-16 13:34:18.781 SwiftMetalProcessing[1792:516455] Metal API Validation Enabled 108 | cold GPU: runtime in microsecs : 3985880.83335198 109 | 0.5 110 | CPU: runtime in microsecs : 39848030.4166151 111 | relativespeed = 9.99729597613291 112 | 113 | N=1000000 (1 million) 114 | filling array took 30289064.9999899 microseconds 115 | 2014-12-16 13:36:10.360 SwiftMetalProcessing[1800:517039] Metal GPU Frame Capture Enabled 116 | 2014-12-16 13:36:10.361 SwiftMetalProcessing[1800:517039] Metal API Validation Enabled 117 | cold GPU: runtime in microsecs : 5309011.25006494 118 | 0.5 119 | CPU: runtime in microsecs : 388455886.666634 120 | relativespeed = 73.1691586944522 121 | 122 | N=10000000 (10 million = 10^7) 123 | filling array took 305068795.083338 microseconds 124 | 2014-12-16 13:42:40.528 SwiftMetalProcessing[1807:517617] Metal GPU Frame Capture Enabled 125 | 2014-12-16 13:42:40.529 SwiftMetalProcessing[1807:517617] Metal API Validation Enabled 126 | cold GPU: runtime in microsecs : 20883388.3333136 127 | 0.5 128 | CPU: runtime in microsecs : 3922125540.83337 129 | relativespeed = 187.810784257491 130 | -------------------------------------------------------------------------------- /simplebenchmark/loop_benchmark.txt: -------------------------------------------------------------------------------- 1 | ############################################# 2 | ==> count = 4096 - 2^12 3 | filling array took 134470.000004512 microseconds 4 | 2014-12-16 14:49:54.307 SwiftMetalProcessing[1866:530742] Metal GPU Frame Capture Enabled 5 | 2014-12-16 14:49:54.307 SwiftMetalProcessing[1866:530742] Metal API Validation Enabled 6 | cold GPU: runtime in microsecs : 2994147.08337281 7 | 0.5 8 | CPU: runtime in microsecs : 520309.999992605 9 | relativespeed = 0.173775698222044 10 | ############################################# 11 | ==> count = 8192 - 2^13 12 | filling array took 252750.62500441 microseconds 13 | cold GPU: runtime in microsecs : 62489.9999820627 14 | 0.5 15 | CPU: runtime in microsecs : 871565.83336764 16 | relativespeed = 13.9472849034696 17 | ############################################# 18 | ==> count = 16384 - 2^14 19 | filling array took 499872.416665312 microseconds 20 | cold GPU: runtime in microsecs : 80839.5832427777 21 | 0.5 22 | CPU: runtime in microsecs : 1754669.58331526 23 | relativespeed = 21.7055743353554 24 | ############################################# 25 | ==> count = 32768 - 2^15 26 | filling array took 997217.166674091 microseconds 27 | cold GPU: runtime in microsecs : 126074.583386071 28 | 0.5 29 | CPU: runtime in microsecs : 3539578.33329332 30 | relativespeed = 28.0752728918744 31 | ############################################# 32 | ==> count = 65536 - 2^16 33 | filling array took 1999642.37499807 microseconds 34 | cold GPU: runtime in microsecs : 191510.833392385 35 | 0.5 36 | CPU: runtime in microsecs : 7018772.50001417 37 | relativespeed = 36.6494802183511 38 | ############################################# 39 | ==> count = 131072 - 2^17 40 | filling array took 4027364.74999983 microseconds 41 | cold GPU: runtime in microsecs : 321503.750019474 42 | 0.5 43 | CPU: runtime in microsecs : 14014958.7500491 44 | relativespeed = 43.5918982257601 45 | ############################################# 46 | ==> count = 262144 - 2^18 47 | filling array took 8001094.87499867 microseconds 48 | cold GPU: runtime in microsecs : 631818.333349656 49 | 0.5 50 | CPU: runtime in microsecs : 28017094.166571 51 | relativespeed = 44.3435916429257 52 | ############################################# 53 | ==> count = 524288 - 2^19 54 | filling array took 16034208.6250021 microseconds 55 | cold GPU: runtime in microsecs : 997512.500034645 56 | 0.5 57 | CPU: runtime in microsecs : 57490045.8334014 58 | relativespeed = 57.6334089361333 59 | ############################################# 60 | ==> count = 1048576 - 2^20 61 | filling array took 31968784.4166765 microseconds 62 | cold GPU: runtime in microsecs : 1710485.83331867 63 | 0.5 64 | CPU: runtime in microsecs : 115896939.166705 65 | relativespeed = 67.7567372433847 66 | ############################################# 67 | ==> count = 2097152 - 2^21 68 | filling array took 63905038.9166659 microseconds 69 | cold GPU: runtime in microsecs : 3361272.91666344 70 | 0.5 71 | CPU: runtime in microsecs : 233171236.666676 72 | relativespeed = 69.3699209935423 73 | ############################################# 74 | ==> count = 4194304 - 2^22 75 | filling array took 132434033.04166 microseconds 76 | cold GPU: runtime in microsecs : 6487682.50001012 77 | 0.5 78 | CPU: runtime in microsecs : 500710670.000117 79 | relativespeed = 77.1786643380491 80 | ############################################# 81 | ==> count = 8388608 - 2^23 82 | filling array took 280342180.000007 microseconds 83 | cold GPU: runtime in microsecs : 13615224.1667151 84 | 0.5 85 | CPU: runtime in microsecs : 1017163137.08341 86 | relativespeed = 74.7077774576824 87 | ############################################# 88 | ==> count = 16777216 - 2^24 89 | filling array took 511296174.333329 microseconds 90 | 2014-12-16 14:36:11.819 SwiftMetalProcessing[1850:527166] Metal GPU Frame Capture Enabled 91 | 2014-12-16 14:36:11.820 SwiftMetalProcessing[1850:527166] Metal API Validation Enabled 92 | cold GPU: runtime in microsecs : 37954072.9167638 93 | 0.5 94 | -------------------------------------------------------------------------------- /simplebenchmark/numpy_similar_benchmark.py: -------------------------------------------------------------------------------- 1 | data = range(2**22) 2 | import numpy 3 | import time 4 | from numpy import exp as ef 5 | d = numpy.array(data) 6 | t0 = time.time() 7 | f = 1.0/(1+ef(-d)) 8 | t1 = time.time() 9 | delta = t1-t0 10 | print "sigmoid on 1 million elements took %10.2f microseconds" %( delta*1000000.0) 11 | -------------------------------------------------------------------------------- /simplebenchmark/process.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import re 3 | import sys 4 | 5 | #lines = file(sys.stdin).readlines() 6 | 7 | record = [] 8 | 9 | header = ["vectorsize as 2^x", "array filltime (microsec)", "accelerate time (microsec)", 10 | "metal gpu time (microsec)", "cpu time (microsec)", "metal relative to cpu", "metal relative to accelerate"] 11 | 12 | print "\t".join(header) 13 | 14 | for line in sys.stdin: 15 | if "2014-12" in line: # skip noise 16 | continue 17 | res = re.findall(r'((\d+).(\d+))', line) 18 | res2 = re.findall(r'((\d+)\^(\d+))', line) 19 | 20 | #print ["RES2", res2] 21 | #print len(res) 22 | #print res 23 | if len(res) == 0: 24 | continue 25 | elif len(res2) > 0: 26 | #print "LINE: ", line 27 | #print "=====>>>>>>", len(header), len(record) 28 | if len(record) > 0: 29 | assert len(record) == len(header), [record,header] 30 | print "\t".join(record) 31 | record = [] 32 | #print "\n###############################" 33 | #print "new record: ", res 34 | expexpression = res2[0][0] 35 | #print "####", expexpression 36 | record.append(str(expexpression)) 37 | else: 38 | #print line, res[0][0] 39 | record.append(str(res[0][0])) 40 | 41 | if len(record) > 0: 42 | assert len(record) == len(header) 43 | print "\t".join(record) 44 | --------------------------------------------------------------------------------