├── README.txt ├── SpacePruner.xcodeproj ├── project.pbxproj ├── project.xcworkspace │ └── contents.xcworkspacedata └── xcuserdata │ └── derek.xcuserdatad │ └── xcschemes │ └── xcschememanagement.plist └── SpacePruner ├── AppDelegate.h ├── AppDelegate.m ├── Assets.xcassets └── AppIcon.appiconset │ └── Contents.json ├── Base.lproj ├── LaunchScreen.storyboard └── Main.storyboard ├── Info.plist ├── ViewController.h ├── ViewController.m ├── bigtable.h ├── despacebenchmark.c ├── despacebenchmark.h ├── despacer.h ├── interleaved_despacer.c ├── interleaved_despacer.h ├── main.m ├── unzipping_despacer.c └── unzipping_despacer.h /README.txt: -------------------------------------------------------------------------------- 1 | This project tests various solutions to the problem of removing whitespace from a string of characters, using an ARM processor. Based on the blog posts by Daniel Lemire. I added the function neon_interleaved_despace, and simple UI for iOS. 2 | 3 | http://lemire.me/blog/2017/07/03/pruning-spaces-from-strings-quickly-on-arm-processors/ 4 | http://lemire.me/blog/2017/07/10/pruning-spaces-faster-on-arm-processors-with-vector-table-lookups/ 5 | https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/tree/master/2017/07/10 6 | 7 | Built with Xcode 8, and runs on iOS 10.3 or later. 8 | 9 | This project and all of its code is public domain. 10 | 11 | – Derek Ledbetter 12 | -------------------------------------------------------------------------------- /SpacePruner.xcodeproj/project.pbxproj: -------------------------------------------------------------------------------- 1 | // !$*UTF8*$! 2 | { 3 | archiveVersion = 1; 4 | classes = { 5 | }; 6 | objectVersion = 48; 7 | objects = { 8 | 9 | /* Begin PBXBuildFile section */ 10 | 652BA0661F0F199A00A692A9 /* despacebenchmark.c in Sources */ = {isa = PBXBuildFile; fileRef = 652BA0641F0F11D000A692A9 /* despacebenchmark.c */; }; 11 | 653A6ED31F1D6BE80072A1E1 /* unzipping_despacer.c in Sources */ = {isa = PBXBuildFile; fileRef = 653A6ED11F1D6BE80072A1E1 /* unzipping_despacer.c */; }; 12 | 657FB1BB1F0E177400452EA8 /* AppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 657FB1BA1F0E177400452EA8 /* AppDelegate.m */; }; 13 | 657FB1BE1F0E177400452EA8 /* ViewController.m in Sources */ = {isa = PBXBuildFile; fileRef = 657FB1BD1F0E177400452EA8 /* ViewController.m */; }; 14 | 657FB1C11F0E177400452EA8 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 657FB1BF1F0E177400452EA8 /* Main.storyboard */; }; 15 | 657FB1C31F0E177400452EA8 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 657FB1C21F0E177400452EA8 /* Assets.xcassets */; }; 16 | 657FB1C61F0E177400452EA8 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 657FB1C41F0E177400452EA8 /* LaunchScreen.storyboard */; }; 17 | 657FB1C91F0E177400452EA8 /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = 657FB1C81F0E177400452EA8 /* main.m */; }; 18 | 65F28EAC1F17150200F80F65 /* README.txt in Resources */ = {isa = PBXBuildFile; fileRef = 65F28EAB1F17150200F80F65 /* README.txt */; }; 19 | 65F8A5EF1F103A7900D3D221 /* interleaved_despacer.c in Sources */ = {isa = PBXBuildFile; fileRef = 65F8A5ED1F103A7900D3D221 /* interleaved_despacer.c */; }; 20 | /* End PBXBuildFile section */ 21 | 22 | /* Begin PBXFileReference section */ 23 | 652BA0631F0F11D000A692A9 /* despacer.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = despacer.h; sourceTree = ""; }; 24 | 652BA0641F0F11D000A692A9 /* despacebenchmark.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = despacebenchmark.c; sourceTree = ""; }; 25 | 652BA0651F0F18BD00A692A9 /* despacebenchmark.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = despacebenchmark.h; sourceTree = ""; }; 26 | 653A6ED11F1D6BE80072A1E1 /* unzipping_despacer.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = unzipping_despacer.c; sourceTree = ""; }; 27 | 653A6ED21F1D6BE80072A1E1 /* unzipping_despacer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = unzipping_despacer.h; sourceTree = ""; }; 28 | 657FB1B61F0E177400452EA8 /* SpacePruner.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = SpacePruner.app; sourceTree = BUILT_PRODUCTS_DIR; }; 29 | 657FB1B91F0E177400452EA8 /* AppDelegate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = ""; }; 30 | 657FB1BA1F0E177400452EA8 /* AppDelegate.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = AppDelegate.m; sourceTree = ""; }; 31 | 657FB1BC1F0E177400452EA8 /* ViewController.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ViewController.h; sourceTree = ""; }; 32 | 657FB1BD1F0E177400452EA8 /* ViewController.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = ViewController.m; sourceTree = ""; }; 33 | 657FB1C01F0E177400452EA8 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = ""; }; 34 | 657FB1C21F0E177400452EA8 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; 35 | 657FB1C51F0E177400452EA8 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = ""; }; 36 | 657FB1C71F0E177400452EA8 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; 37 | 657FB1C81F0E177400452EA8 /* main.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = main.m; sourceTree = ""; }; 38 | 65F28EAB1F17150200F80F65 /* README.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = README.txt; sourceTree = ""; }; 39 | 65F28EAD1F1823A500F80F65 /* bigtable.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bigtable.h; sourceTree = ""; }; 40 | 65F8A5ED1F103A7900D3D221 /* interleaved_despacer.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = interleaved_despacer.c; sourceTree = ""; }; 41 | 65F8A5EE1F103A7900D3D221 /* interleaved_despacer.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = interleaved_despacer.h; sourceTree = ""; }; 42 | /* End PBXFileReference section */ 43 | 44 | /* Begin PBXFrameworksBuildPhase section */ 45 | 657FB1B31F0E177400452EA8 /* Frameworks */ = { 46 | isa = PBXFrameworksBuildPhase; 47 | buildActionMask = 2147483647; 48 | files = ( 49 | ); 50 | runOnlyForDeploymentPostprocessing = 0; 51 | }; 52 | /* End PBXFrameworksBuildPhase section */ 53 | 54 | /* Begin PBXGroup section */ 55 | 657FB1AD1F0E177300452EA8 = { 56 | isa = PBXGroup; 57 | children = ( 58 | 65F28EAB1F17150200F80F65 /* README.txt */, 59 | 657FB1B81F0E177400452EA8 /* SpacePruner */, 60 | 657FB1B71F0E177400452EA8 /* Products */, 61 | ); 62 | sourceTree = ""; 63 | }; 64 | 657FB1B71F0E177400452EA8 /* Products */ = { 65 | isa = PBXGroup; 66 | children = ( 67 | 657FB1B61F0E177400452EA8 /* SpacePruner.app */, 68 | ); 69 | name = Products; 70 | sourceTree = ""; 71 | }; 72 | 657FB1B81F0E177400452EA8 /* SpacePruner */ = { 73 | isa = PBXGroup; 74 | children = ( 75 | 653A6ED21F1D6BE80072A1E1 /* unzipping_despacer.h */, 76 | 653A6ED11F1D6BE80072A1E1 /* unzipping_despacer.c */, 77 | 65F8A5EE1F103A7900D3D221 /* interleaved_despacer.h */, 78 | 65F8A5ED1F103A7900D3D221 /* interleaved_despacer.c */, 79 | 65F28EAD1F1823A500F80F65 /* bigtable.h */, 80 | 652BA0631F0F11D000A692A9 /* despacer.h */, 81 | 652BA0651F0F18BD00A692A9 /* despacebenchmark.h */, 82 | 652BA0641F0F11D000A692A9 /* despacebenchmark.c */, 83 | 657FB1B91F0E177400452EA8 /* AppDelegate.h */, 84 | 657FB1BA1F0E177400452EA8 /* AppDelegate.m */, 85 | 657FB1BC1F0E177400452EA8 /* ViewController.h */, 86 | 657FB1BD1F0E177400452EA8 /* ViewController.m */, 87 | 657FB1BF1F0E177400452EA8 /* Main.storyboard */, 88 | 657FB1C21F0E177400452EA8 /* Assets.xcassets */, 89 | 657FB1C41F0E177400452EA8 /* LaunchScreen.storyboard */, 90 | 657FB1C71F0E177400452EA8 /* Info.plist */, 91 | 657FB1C81F0E177400452EA8 /* main.m */, 92 | ); 93 | path = SpacePruner; 94 | sourceTree = ""; 95 | }; 96 | /* End PBXGroup section */ 97 | 98 | /* Begin PBXNativeTarget section */ 99 | 657FB1B51F0E177400452EA8 /* SpacePruner */ = { 100 | isa = PBXNativeTarget; 101 | buildConfigurationList = 657FB1CC1F0E177400452EA8 /* Build configuration list for PBXNativeTarget "SpacePruner" */; 102 | buildPhases = ( 103 | 657FB1B21F0E177400452EA8 /* Sources */, 104 | 657FB1B31F0E177400452EA8 /* Frameworks */, 105 | 657FB1B41F0E177400452EA8 /* Resources */, 106 | ); 107 | buildRules = ( 108 | ); 109 | dependencies = ( 110 | ); 111 | name = SpacePruner; 112 | productName = SpacePruner; 113 | productReference = 657FB1B61F0E177400452EA8 /* SpacePruner.app */; 114 | productType = "com.apple.product-type.application"; 115 | }; 116 | /* End PBXNativeTarget section */ 117 | 118 | /* Begin PBXProject section */ 119 | 657FB1AE1F0E177300452EA8 /* Project object */ = { 120 | isa = PBXProject; 121 | attributes = { 122 | LastUpgradeCheck = 0900; 123 | ORGANIZATIONNAME = "Derek Ledbetter"; 124 | TargetAttributes = { 125 | 657FB1B51F0E177400452EA8 = { 126 | CreatedOnToolsVersion = 9.0; 127 | }; 128 | }; 129 | }; 130 | buildConfigurationList = 657FB1B11F0E177300452EA8 /* Build configuration list for PBXProject "SpacePruner" */; 131 | compatibilityVersion = "Xcode 8.0"; 132 | developmentRegion = en; 133 | hasScannedForEncodings = 0; 134 | knownRegions = ( 135 | en, 136 | Base, 137 | ); 138 | mainGroup = 657FB1AD1F0E177300452EA8; 139 | productRefGroup = 657FB1B71F0E177400452EA8 /* Products */; 140 | projectDirPath = ""; 141 | projectRoot = ""; 142 | targets = ( 143 | 657FB1B51F0E177400452EA8 /* SpacePruner */, 144 | ); 145 | }; 146 | /* End PBXProject section */ 147 | 148 | /* Begin PBXResourcesBuildPhase section */ 149 | 657FB1B41F0E177400452EA8 /* Resources */ = { 150 | isa = PBXResourcesBuildPhase; 151 | buildActionMask = 2147483647; 152 | files = ( 153 | 657FB1C61F0E177400452EA8 /* LaunchScreen.storyboard in Resources */, 154 | 657FB1C31F0E177400452EA8 /* Assets.xcassets in Resources */, 155 | 65F28EAC1F17150200F80F65 /* README.txt in Resources */, 156 | 657FB1C11F0E177400452EA8 /* Main.storyboard in Resources */, 157 | ); 158 | runOnlyForDeploymentPostprocessing = 0; 159 | }; 160 | /* End PBXResourcesBuildPhase section */ 161 | 162 | /* Begin PBXSourcesBuildPhase section */ 163 | 657FB1B21F0E177400452EA8 /* Sources */ = { 164 | isa = PBXSourcesBuildPhase; 165 | buildActionMask = 2147483647; 166 | files = ( 167 | 657FB1BE1F0E177400452EA8 /* ViewController.m in Sources */, 168 | 657FB1C91F0E177400452EA8 /* main.m in Sources */, 169 | 657FB1BB1F0E177400452EA8 /* AppDelegate.m in Sources */, 170 | 65F8A5EF1F103A7900D3D221 /* interleaved_despacer.c in Sources */, 171 | 652BA0661F0F199A00A692A9 /* despacebenchmark.c in Sources */, 172 | 653A6ED31F1D6BE80072A1E1 /* unzipping_despacer.c in Sources */, 173 | ); 174 | runOnlyForDeploymentPostprocessing = 0; 175 | }; 176 | /* End PBXSourcesBuildPhase section */ 177 | 178 | /* Begin PBXVariantGroup section */ 179 | 657FB1BF1F0E177400452EA8 /* Main.storyboard */ = { 180 | isa = PBXVariantGroup; 181 | children = ( 182 | 657FB1C01F0E177400452EA8 /* Base */, 183 | ); 184 | name = Main.storyboard; 185 | sourceTree = ""; 186 | }; 187 | 657FB1C41F0E177400452EA8 /* LaunchScreen.storyboard */ = { 188 | isa = PBXVariantGroup; 189 | children = ( 190 | 657FB1C51F0E177400452EA8 /* Base */, 191 | ); 192 | name = LaunchScreen.storyboard; 193 | sourceTree = ""; 194 | }; 195 | /* End PBXVariantGroup section */ 196 | 197 | /* Begin XCBuildConfiguration section */ 198 | 657FB1CA1F0E177400452EA8 /* Debug */ = { 199 | isa = XCBuildConfiguration; 200 | buildSettings = { 201 | ALWAYS_SEARCH_USER_PATHS = NO; 202 | CLANG_ANALYZER_NONNULL = YES; 203 | CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; 204 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; 205 | CLANG_CXX_LIBRARY = "libc++"; 206 | CLANG_ENABLE_MODULES = YES; 207 | CLANG_ENABLE_OBJC_ARC = YES; 208 | CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; 209 | CLANG_WARN_BOOL_CONVERSION = YES; 210 | CLANG_WARN_COMMA = YES; 211 | CLANG_WARN_CONSTANT_CONVERSION = YES; 212 | CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; 213 | CLANG_WARN_DOCUMENTATION_COMMENTS = YES; 214 | CLANG_WARN_EMPTY_BODY = YES; 215 | CLANG_WARN_ENUM_CONVERSION = YES; 216 | CLANG_WARN_INFINITE_RECURSION = YES; 217 | CLANG_WARN_INT_CONVERSION = YES; 218 | CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; 219 | CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; 220 | CLANG_WARN_STRICT_PROTOTYPES = YES; 221 | CLANG_WARN_SUSPICIOUS_MOVE = YES; 222 | CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; 223 | CLANG_WARN_UNREACHABLE_CODE = YES; 224 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 225 | CODE_SIGN_IDENTITY = "iPhone Developer"; 226 | COPY_PHASE_STRIP = NO; 227 | DEBUG_INFORMATION_FORMAT = dwarf; 228 | ENABLE_STRICT_OBJC_MSGSEND = YES; 229 | ENABLE_TESTABILITY = YES; 230 | GCC_C_LANGUAGE_STANDARD = gnu11; 231 | GCC_DYNAMIC_NO_PIC = NO; 232 | GCC_NO_COMMON_BLOCKS = YES; 233 | GCC_OPTIMIZATION_LEVEL = 0; 234 | GCC_PREPROCESSOR_DEFINITIONS = ( 235 | "DEBUG=1", 236 | "$(inherited)", 237 | ); 238 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES; 239 | GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; 240 | GCC_WARN_UNDECLARED_SELECTOR = YES; 241 | GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; 242 | GCC_WARN_UNUSED_FUNCTION = YES; 243 | GCC_WARN_UNUSED_VARIABLE = YES; 244 | IPHONEOS_DEPLOYMENT_TARGET = 11.0; 245 | LLVM_LTO = YES; 246 | MTL_ENABLE_DEBUG_INFO = YES; 247 | ONLY_ACTIVE_ARCH = YES; 248 | SDKROOT = iphoneos10.3; 249 | }; 250 | name = Debug; 251 | }; 252 | 657FB1CB1F0E177400452EA8 /* Release */ = { 253 | isa = XCBuildConfiguration; 254 | buildSettings = { 255 | ALWAYS_SEARCH_USER_PATHS = NO; 256 | CLANG_ANALYZER_NONNULL = YES; 257 | CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; 258 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; 259 | CLANG_CXX_LIBRARY = "libc++"; 260 | CLANG_ENABLE_MODULES = YES; 261 | CLANG_ENABLE_OBJC_ARC = YES; 262 | CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; 263 | CLANG_WARN_BOOL_CONVERSION = YES; 264 | CLANG_WARN_COMMA = YES; 265 | CLANG_WARN_CONSTANT_CONVERSION = YES; 266 | CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; 267 | CLANG_WARN_DOCUMENTATION_COMMENTS = YES; 268 | CLANG_WARN_EMPTY_BODY = YES; 269 | CLANG_WARN_ENUM_CONVERSION = YES; 270 | CLANG_WARN_INFINITE_RECURSION = YES; 271 | CLANG_WARN_INT_CONVERSION = YES; 272 | CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; 273 | CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; 274 | CLANG_WARN_STRICT_PROTOTYPES = YES; 275 | CLANG_WARN_SUSPICIOUS_MOVE = YES; 276 | CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; 277 | CLANG_WARN_UNREACHABLE_CODE = YES; 278 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 279 | CODE_SIGN_IDENTITY = "iPhone Developer"; 280 | COPY_PHASE_STRIP = NO; 281 | DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; 282 | ENABLE_NS_ASSERTIONS = NO; 283 | ENABLE_STRICT_OBJC_MSGSEND = YES; 284 | GCC_C_LANGUAGE_STANDARD = gnu11; 285 | GCC_NO_COMMON_BLOCKS = YES; 286 | GCC_OPTIMIZATION_LEVEL = fast; 287 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES; 288 | GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; 289 | GCC_WARN_UNDECLARED_SELECTOR = YES; 290 | GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; 291 | GCC_WARN_UNUSED_FUNCTION = YES; 292 | GCC_WARN_UNUSED_VARIABLE = YES; 293 | IPHONEOS_DEPLOYMENT_TARGET = 11.0; 294 | LLVM_LTO = YES; 295 | MTL_ENABLE_DEBUG_INFO = NO; 296 | SDKROOT = iphoneos10.3; 297 | VALIDATE_PRODUCT = YES; 298 | }; 299 | name = Release; 300 | }; 301 | 657FB1CD1F0E177400452EA8 /* Debug */ = { 302 | isa = XCBuildConfiguration; 303 | buildSettings = { 304 | ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; 305 | DEVELOPMENT_TEAM = 6NC7NZMXLP; 306 | GCC_OPTIMIZATION_LEVEL = fast; 307 | INFOPLIST_FILE = SpacePruner/Info.plist; 308 | IPHONEOS_DEPLOYMENT_TARGET = 10.3; 309 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks"; 310 | LLVM_LTO = YES; 311 | PRODUCT_BUNDLE_IDENTIFIER = net.derekledbetter.SpacePruner; 312 | PRODUCT_NAME = "$(TARGET_NAME)"; 313 | TARGETED_DEVICE_FAMILY = "1,2"; 314 | }; 315 | name = Debug; 316 | }; 317 | 657FB1CE1F0E177400452EA8 /* Release */ = { 318 | isa = XCBuildConfiguration; 319 | buildSettings = { 320 | ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; 321 | DEVELOPMENT_TEAM = 6NC7NZMXLP; 322 | GCC_OPTIMIZATION_LEVEL = fast; 323 | INFOPLIST_FILE = SpacePruner/Info.plist; 324 | IPHONEOS_DEPLOYMENT_TARGET = 10.3; 325 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks"; 326 | LLVM_LTO = YES; 327 | PRODUCT_BUNDLE_IDENTIFIER = net.derekledbetter.SpacePruner; 328 | PRODUCT_NAME = "$(TARGET_NAME)"; 329 | TARGETED_DEVICE_FAMILY = "1,2"; 330 | }; 331 | name = Release; 332 | }; 333 | /* End XCBuildConfiguration section */ 334 | 335 | /* Begin XCConfigurationList section */ 336 | 657FB1B11F0E177300452EA8 /* Build configuration list for PBXProject "SpacePruner" */ = { 337 | isa = XCConfigurationList; 338 | buildConfigurations = ( 339 | 657FB1CA1F0E177400452EA8 /* Debug */, 340 | 657FB1CB1F0E177400452EA8 /* Release */, 341 | ); 342 | defaultConfigurationIsVisible = 0; 343 | defaultConfigurationName = Release; 344 | }; 345 | 657FB1CC1F0E177400452EA8 /* Build configuration list for PBXNativeTarget "SpacePruner" */ = { 346 | isa = XCConfigurationList; 347 | buildConfigurations = ( 348 | 657FB1CD1F0E177400452EA8 /* Debug */, 349 | 657FB1CE1F0E177400452EA8 /* Release */, 350 | ); 351 | defaultConfigurationIsVisible = 0; 352 | defaultConfigurationName = Release; 353 | }; 354 | /* End XCConfigurationList section */ 355 | }; 356 | rootObject = 657FB1AE1F0E177300452EA8 /* Project object */; 357 | } 358 | -------------------------------------------------------------------------------- /SpacePruner.xcodeproj/project.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /SpacePruner.xcodeproj/xcuserdata/derek.xcuserdatad/xcschemes/xcschememanagement.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | SchemeUserState 6 | 7 | SpacePruner.xcscheme 8 | 9 | orderHint 10 | 0 11 | 12 | 13 | SuppressBuildableAutocreation 14 | 15 | 657FB1B51F0E177400452EA8 16 | 17 | primary 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /SpacePruner/AppDelegate.h: -------------------------------------------------------------------------------- 1 | // 2 | // AppDelegate.h 3 | // SpacePruner 4 | // 5 | // Created by Derek Ledbetter on 2017-07-06. 6 | // 7 | 8 | #import 9 | 10 | @interface AppDelegate : UIResponder 11 | 12 | @property (strong, nonatomic) UIWindow *window; 13 | 14 | 15 | @end 16 | 17 | -------------------------------------------------------------------------------- /SpacePruner/AppDelegate.m: -------------------------------------------------------------------------------- 1 | // 2 | // AppDelegate.m 3 | // SpacePruner 4 | // 5 | // Created by Derek Ledbetter on 2017-07-06. 6 | // 7 | 8 | #import "AppDelegate.h" 9 | 10 | @interface AppDelegate () 11 | 12 | @end 13 | 14 | @implementation AppDelegate 15 | 16 | 17 | - (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions { 18 | // Override point for customization after application launch. 19 | return YES; 20 | } 21 | 22 | 23 | - (void)applicationWillResignActive:(UIApplication *)application { 24 | // Sent when the application is about to move from active to inactive state. This can occur for certain types of temporary interruptions (such as an incoming phone call or SMS message) or when the user quits the application and it begins the transition to the background state. 25 | // Use this method to pause ongoing tasks, disable timers, and invalidate graphics rendering callbacks. Games should use this method to pause the game. 26 | } 27 | 28 | 29 | - (void)applicationDidEnterBackground:(UIApplication *)application { 30 | // Use this method to release shared resources, save user data, invalidate timers, and store enough application state information to restore your application to its current state in case it is terminated later. 31 | // If your application supports background execution, this method is called instead of applicationWillTerminate: when the user quits. 32 | } 33 | 34 | 35 | - (void)applicationWillEnterForeground:(UIApplication *)application { 36 | // Called as part of the transition from the background to the active state; here you can undo many of the changes made on entering the background. 37 | } 38 | 39 | 40 | - (void)applicationDidBecomeActive:(UIApplication *)application { 41 | // Restart any tasks that were paused (or not yet started) while the application was inactive. If the application was previously in the background, optionally refresh the user interface. 42 | } 43 | 44 | 45 | - (void)applicationWillTerminate:(UIApplication *)application { 46 | // Called when the application is about to terminate. Save data if appropriate. See also applicationDidEnterBackground:. 47 | } 48 | 49 | 50 | @end 51 | -------------------------------------------------------------------------------- /SpacePruner/Assets.xcassets/AppIcon.appiconset/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "images" : [ 3 | { 4 | "idiom" : "iphone", 5 | "size" : "20x20", 6 | "scale" : "2x" 7 | }, 8 | { 9 | "idiom" : "iphone", 10 | "size" : "20x20", 11 | "scale" : "3x" 12 | }, 13 | { 14 | "idiom" : "iphone", 15 | "size" : "29x29", 16 | "scale" : "2x" 17 | }, 18 | { 19 | "idiom" : "iphone", 20 | "size" : "29x29", 21 | "scale" : "3x" 22 | }, 23 | { 24 | "idiom" : "iphone", 25 | "size" : "40x40", 26 | "scale" : "2x" 27 | }, 28 | { 29 | "idiom" : "iphone", 30 | "size" : "40x40", 31 | "scale" : "3x" 32 | }, 33 | { 34 | "idiom" : "iphone", 35 | "size" : "60x60", 36 | "scale" : "2x" 37 | }, 38 | { 39 | "idiom" : "iphone", 40 | "size" : "60x60", 41 | "scale" : "3x" 42 | }, 43 | { 44 | "idiom" : "ipad", 45 | "size" : "20x20", 46 | "scale" : "1x" 47 | }, 48 | { 49 | "idiom" : "ipad", 50 | "size" : "20x20", 51 | "scale" : "2x" 52 | }, 53 | { 54 | "idiom" : "ipad", 55 | "size" : "29x29", 56 | "scale" : "1x" 57 | }, 58 | { 59 | "idiom" : "ipad", 60 | "size" : "29x29", 61 | "scale" : "2x" 62 | }, 63 | { 64 | "idiom" : "ipad", 65 | "size" : "40x40", 66 | "scale" : "1x" 67 | }, 68 | { 69 | "idiom" : "ipad", 70 | "size" : "40x40", 71 | "scale" : "2x" 72 | }, 73 | { 74 | "idiom" : "ipad", 75 | "size" : "76x76", 76 | "scale" : "1x" 77 | }, 78 | { 79 | "idiom" : "ipad", 80 | "size" : "76x76", 81 | "scale" : "2x" 82 | }, 83 | { 84 | "idiom" : "ipad", 85 | "size" : "83.5x83.5", 86 | "scale" : "2x" 87 | } 88 | ], 89 | "info" : { 90 | "version" : 1, 91 | "author" : "xcode" 92 | } 93 | } -------------------------------------------------------------------------------- /SpacePruner/Base.lproj/LaunchScreen.storyboard: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /SpacePruner/Base.lproj/Main.storyboard: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | CourierNewPSMT 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 36 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /SpacePruner/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | $(DEVELOPMENT_LANGUAGE) 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | APPL 17 | CFBundleShortVersionString 18 | 1.0 19 | CFBundleVersion 20 | 1 21 | LSRequiresIPhoneOS 22 | 23 | UILaunchStoryboardName 24 | LaunchScreen 25 | UIMainStoryboardFile 26 | Main 27 | UIRequiredDeviceCapabilities 28 | 29 | armv7 30 | 31 | UISupportedInterfaceOrientations 32 | 33 | UIInterfaceOrientationPortrait 34 | UIInterfaceOrientationLandscapeLeft 35 | UIInterfaceOrientationLandscapeRight 36 | 37 | UISupportedInterfaceOrientations~ipad 38 | 39 | UIInterfaceOrientationPortrait 40 | UIInterfaceOrientationPortraitUpsideDown 41 | UIInterfaceOrientationLandscapeLeft 42 | UIInterfaceOrientationLandscapeRight 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /SpacePruner/ViewController.h: -------------------------------------------------------------------------------- 1 | // 2 | // ViewController.h 3 | // SpacePruner 4 | // 5 | // Created by Derek Ledbetter on 2017-07-06. 6 | // 7 | 8 | #import 9 | 10 | @interface ViewController : UIViewController 11 | 12 | - (IBAction)runTest:(id)sender; 13 | 14 | 15 | @end 16 | 17 | -------------------------------------------------------------------------------- /SpacePruner/ViewController.m: -------------------------------------------------------------------------------- 1 | // 2 | // ViewController.m 3 | // SpacePruner 4 | // 5 | // Created by Derek Ledbetter on 2017-07-06. 6 | // 7 | 8 | #import "ViewController.h" 9 | 10 | #include "despacebenchmark.h" 11 | #include 12 | 13 | @interface ViewController () { 14 | BOOL _currentlyRunning; // TODO: change to property 15 | NSMutableString *_logText; 16 | } 17 | 18 | @property (nonatomic, weak) IBOutlet UIButton *button; 19 | @property (nonatomic, weak) IBOutlet UILabel *label; 20 | @end 21 | 22 | 23 | @interface ViewController (Private) 24 | - (void)addToLogFromBytes:(const char*)bytes length:(int)length; 25 | @end 26 | 27 | int streamWriteFunction(void *cookie, const char *buf, int nbyte) { 28 | __weak ViewController* weakController = (__bridge __weak ViewController*)cookie; 29 | ViewController* controller = weakController; 30 | if (controller) { 31 | [controller addToLogFromBytes:buf length:nbyte]; 32 | } 33 | return nbyte; 34 | } 35 | 36 | @implementation ViewController 37 | 38 | - (instancetype)initWithNibName:(NSString *)nibNameOrNil bundle:(NSBundle *)nibBundleOrNil { 39 | self = [super initWithNibName:nibNameOrNil bundle:nibBundleOrNil]; 40 | [self setUp]; 41 | return self; 42 | } 43 | 44 | - (nullable instancetype)initWithCoder:(NSCoder *)aDecoder { 45 | self = [super initWithCoder:aDecoder]; 46 | [self setUp]; 47 | return self; 48 | } 49 | 50 | - (void)setUp { 51 | _currentlyRunning = NO; 52 | _logText = [NSMutableString string]; 53 | } 54 | 55 | - (void)viewDidLoad { 56 | [super viewDidLoad]; 57 | [self updateRunButton]; 58 | } 59 | 60 | - (void)updateRunButton { 61 | self.button.enabled = !_currentlyRunning; 62 | } 63 | 64 | - (void)didReceiveMemoryWarning { 65 | [super didReceiveMemoryWarning]; 66 | } 67 | 68 | - (IBAction)runTest:(id)sender { 69 | if (_currentlyRunning) { 70 | return; 71 | } 72 | 73 | _currentlyRunning = YES; 74 | [self updateRunButton]; 75 | [_logText deleteCharactersInRange:(NSRange){ 0, _logText.length } ]; 76 | 77 | __weak ViewController *weakSelf = self; 78 | dispatch_async(dispatch_get_global_queue(QOS_CLASS_USER_INITIATED, 0), ^(void) { 79 | void* cookie = (__bridge void*)weakSelf; 80 | FILE* stream = fwopen(cookie, streamWriteFunction); 81 | despace_benchmark(stream); 82 | fclose(stream); 83 | 84 | ViewController* self = weakSelf; 85 | if (self) { 86 | dispatch_async(dispatch_get_main_queue(), ^(void) { 87 | self->_currentlyRunning = NO; 88 | [self updateRunButton]; 89 | }); 90 | } 91 | }); 92 | } 93 | 94 | - (void)addToLogFromBytes:(const char*)bytes length:(int)length { 95 | NSString* newString = [[NSString alloc] initWithBytes:bytes length:length encoding:NSASCIIStringEncoding]; 96 | dispatch_async(dispatch_get_main_queue(), ^{ 97 | [_logText appendString:newString]; 98 | self.label.text = _logText; 99 | }); 100 | } 101 | 102 | @end 103 | -------------------------------------------------------------------------------- /SpacePruner/despacebenchmark.c: -------------------------------------------------------------------------------- 1 | // gcc -std=c99 -O3 -o despacebenchmark despacebenchmark.c 2 | // Originally written by Daniel Lemire. 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "despacer.h" 12 | #include "interleaved_despacer.h" 13 | #include "unzipping_despacer.h" 14 | 15 | static inline uint64_t time_in_ns() { 16 | struct timeval tv; 17 | gettimeofday(&tv, NULL); 18 | return (uint64_t)tv.tv_sec * 1000000000 + (uint32_t)tv.tv_usec * 1000; 19 | } 20 | 21 | static const int functionNameLength = 30; 22 | 23 | #define BEST_TIME(test) \ 24 | do { \ 25 | uint64_t min_diff = (uint64_t)-1; \ 26 | for (int i = 0; i < repeat; i++) { \ 27 | fillwithtext(buffer, N); \ 28 | \ 29 | __asm volatile("" ::: /* pretend to clobber */ "memory"); \ 30 | const uint64_t cycles_start = time_in_ns(); \ 31 | test(buffer, N); \ 32 | const uint64_t cycles_final = time_in_ns(); \ 33 | \ 34 | const uint64_t cycles_diff = (cycles_final - cycles_start); \ 35 | if (cycles_diff < min_diff) \ 36 | min_diff = cycles_diff; \ 37 | } \ 38 | float cycle_per_op = (float)min_diff / (float)N; \ 39 | fprintf(stream, "%-*s: %.2f\n", functionNameLength, #test, cycle_per_op); \ 40 | fflush(stream); \ 41 | } while (0) 42 | 43 | 44 | // let us estimate that we have a 1% proba of hitting a white space 45 | size_t fillwithtext(char *buffer, size_t size) { 46 | size_t howmany = 0; 47 | for (size_t i = 0; i < size; ++i) { 48 | double r = ((double)rand() / (RAND_MAX)); 49 | if (r < 0.01) { 50 | buffer[i] = ' '; 51 | howmany++; 52 | } else if (r < 0.02) { 53 | buffer[i] = '\n'; 54 | howmany++; 55 | } else if (r < 0.03) { 56 | buffer[i] = '\r'; 57 | howmany++; 58 | } else { 59 | do { 60 | buffer[i] = (char)rand(); 61 | } while (buffer[i] <= 32); 62 | } 63 | } 64 | return howmany; 65 | } 66 | 67 | typedef size_t (*despace_function_ptr)(char *bytes, size_t howmany); 68 | 69 | #define FUNCTION_AND_NAME(func) { &func, #func } 70 | 71 | struct FunctionAndName { 72 | despace_function_ptr ptr; 73 | const char* name; 74 | }; 75 | 76 | const struct FunctionAndName functionsToTest[] = { 77 | FUNCTION_AND_NAME(despace), 78 | #if __ARM_NEON 79 | //FUNCTION_AND_NAME(neon_despace), 80 | //FUNCTION_AND_NAME(neon_despace_branchless), 81 | #if defined(__aarch64__) 82 | FUNCTION_AND_NAME(neontbl_despace), 83 | #endif 84 | FUNCTION_AND_NAME(neon_interleaved_despace), 85 | FUNCTION_AND_NAME(neon_unzipping_despace), 86 | #endif 87 | }; 88 | const size_t functionsToTestCount = sizeof(functionsToTest) / sizeof(functionsToTest[0]); 89 | 90 | void despace_benchmark(FILE* stream) { 91 | const int N = 1024 * 32; 92 | const int repeat = 100; 93 | const int alignoffset = 0; 94 | 95 | // Add one in case we want to null-terminate. 96 | char *origbuffer = malloc(N + alignoffset + 1); 97 | char *origtmpbuffer = malloc(N + alignoffset + 1); 98 | char *buffer = origbuffer + alignoffset; 99 | char *tmpbuffer = origtmpbuffer + alignoffset; 100 | char *correctbuffer = malloc(N + 1); 101 | fprintf(stream, "pointer alignment = %d bytes\n", 1 << __builtin_ctzll((uintptr_t)(const void *)(buffer))); 102 | 103 | static const size_t testSizes[] = { 0, 1, 2, 3, 4, 7, 8, 9, 13, 16, 17, 61, 64, 67, 104 | 100, 123, 1000, 10000, N }; 105 | const size_t testSizesCount = sizeof(testSizes) / sizeof(testSizes[0]); 106 | bool failedTests[functionsToTestCount] = {}; 107 | 108 | for (size_t i = 0; i != testSizesCount; ++i) { 109 | const size_t sourceCount = testSizes[i]; 110 | assert(sourceCount <= N); 111 | 112 | const size_t howmanywhite = fillwithtext(buffer, sourceCount); 113 | const size_t correctResultSize = sourceCount - howmanywhite; 114 | 115 | int j = 0; 116 | for (int i = 0; i < sourceCount; ++i) { 117 | char c = buffer[i]; 118 | if (c > 32) { 119 | correctbuffer[j++] = c; 120 | } 121 | } 122 | assert(j == correctResultSize); 123 | 124 | for (size_t t = 0; t != functionsToTestCount; ++t) { 125 | if (failedTests[t]) { 126 | continue; 127 | } 128 | 129 | memcpy(tmpbuffer, buffer, sourceCount); 130 | size_t resultSize = (*functionsToTest[t].ptr)(tmpbuffer, sourceCount); 131 | 132 | if (resultSize != correctResultSize 133 | || memcmp(tmpbuffer, correctbuffer, resultSize) != 0) { 134 | failedTests[t] = true; 135 | } 136 | } 137 | } 138 | 139 | for (size_t t = 0; t != functionsToTestCount; ++t) { 140 | fprintf(stream, "%-*s: %s\n", functionNameLength, functionsToTest[t].name, failedTests[t] ? "FAILURE" : "OK"); 141 | } 142 | fflush(stream); 143 | 144 | fprintf(stream, "\nns per operation:\n"); 145 | BEST_TIME(despace); 146 | #if __ARM_NEON 147 | //BEST_TIME(neon_despace); 148 | //BEST_TIME(neon_despace_branchless); 149 | #if defined(__aarch64__) 150 | BEST_TIME(neontbl_despace); 151 | #endif 152 | BEST_TIME(neon_interleaved_despace); 153 | BEST_TIME(neon_unzipping_despace); 154 | #endif // __ARM_NEON 155 | fprintf(stream, "\n"); 156 | 157 | free(correctbuffer); 158 | free(origbuffer); 159 | free(origtmpbuffer); 160 | } 161 | 162 | -------------------------------------------------------------------------------- /SpacePruner/despacebenchmark.h: -------------------------------------------------------------------------------- 1 | // 2 | // despacebenchmark.h 3 | // SpacePruner 4 | // 5 | // Created by Derek Ledbetter on 2017-07-06. 6 | // 7 | 8 | #ifndef despacebenchmark_h 9 | #define despacebenchmark_h 10 | 11 | #include 12 | 13 | void despace_benchmark(FILE* stream); 14 | 15 | #endif /* despacebenchmark_h */ 16 | -------------------------------------------------------------------------------- /SpacePruner/despacer.h: -------------------------------------------------------------------------------- 1 | // Originally written by Daniel Lemire. 2 | 3 | #ifndef DESPACER_H 4 | #define DESPACER_H 5 | #include 6 | #include 7 | #include 8 | 9 | static inline size_t despace(char *bytes, size_t howmany) { 10 | size_t i = 0, pos = 0; 11 | while (i < howmany) { 12 | const char c = bytes[i++]; 13 | bytes[pos] = c; 14 | pos += (c > 32 ? 1 : 0); 15 | } 16 | return pos; 17 | } 18 | 19 | #if __ARM_NEON 20 | // let us go neon 21 | #include 22 | 23 | 24 | static inline uint8x16_t is_white(uint8x16_t data) { 25 | const uint8x16_t wchar = vdupq_n_u8(' '); 26 | uint8x16_t isw = vcleq_u8(data, wchar); 27 | return isw; 28 | } 29 | 30 | 31 | 32 | //////////// 33 | // see https://stackoverflow.com/a/41406502/73007 34 | // compiles to something like 35 | // uqxtn v0.2s, v0.2d 36 | // fmov x0, d0 37 | ////////// 38 | static inline uint64_t is_not_zero(uint8x16_t v) { 39 | uint64x2_t v64 = vreinterpretq_u64_u8(v); 40 | uint32x2_t v32 = vqmovn_u64(v64); 41 | uint64x1_t result = vreinterpret_u64_u32(v32); 42 | return vget_lane_u64(result, 0); 43 | } 44 | 45 | /* 46 | * could also have 47 | * 48 | static inline uint16_t is_not_zero(uint8x16_t v) { 49 | return vaddlvq_u8(v); 50 | } 51 | * but experimentally, it appears slower. 52 | * credit: Cyril Lashkevich 53 | */ 54 | 55 | 56 | static inline size_t neon_despace(char *bytes, size_t howmany) { 57 | size_t i = 0, pos = 0; 58 | const size_t chunk_size = 16 * 4 * 1; 59 | uint8x16_t justone = vdupq_n_u8(1); 60 | for (; i + chunk_size <= howmany; /*i += chunk_size*/) { 61 | uint8x16_t vecbytes0 = vld1q_u8((uint8_t *)bytes + i); 62 | uint8x16_t vecbytes1 = vld1q_u8((uint8_t *)bytes + i + 16); 63 | uint8x16_t vecbytes2 = vld1q_u8((uint8_t *)bytes + i + 32); 64 | uint8x16_t vecbytes3 = vld1q_u8((uint8_t *)bytes + i + 48); 65 | uint8x16_t w0 = is_white(vecbytes0); 66 | uint64_t haswhite0 = is_not_zero(w0); 67 | w0 = vaddq_u8(justone, w0); 68 | uint8x16_t w1 = is_white(vecbytes1); 69 | uint64_t haswhite1 = is_not_zero(w1); 70 | w1 = vaddq_u8(justone, w1); 71 | uint8x16_t w2 = is_white(vecbytes2); 72 | uint64_t haswhite2 = is_not_zero(w2); 73 | w2 = vaddq_u8(justone, w2); 74 | uint8x16_t w3 = is_white(vecbytes3); 75 | uint64_t haswhite3 = is_not_zero(w3); 76 | w3 = vaddq_u8(justone, w3); 77 | if(!haswhite0) { 78 | vst1q_u8((uint8_t *)bytes + pos,vecbytes0); 79 | pos += 16; 80 | i += 16; 81 | } else { 82 | for (int k = 0; k < 16; k++) { 83 | bytes[pos] = bytes[i++]; 84 | pos += w0[k]; 85 | } 86 | } 87 | if(!haswhite1) { 88 | vst1q_u8((uint8_t *)bytes + pos,vecbytes1); 89 | pos += 16; 90 | i += 16; 91 | } else { 92 | for (int k = 0; k < 16; k++) { 93 | bytes[pos] = bytes[i++]; 94 | pos += w1[k]; 95 | } 96 | } 97 | if(!haswhite2) { 98 | vst1q_u8((uint8_t *)bytes + pos,vecbytes2); 99 | pos += 16; 100 | i += 16; 101 | } else { 102 | for (int k = 0; k < 16; k++) { 103 | bytes[pos] = bytes[i++]; 104 | pos += w2[k]; 105 | } 106 | } 107 | if(!haswhite3) { 108 | vst1q_u8((uint8_t *)bytes + pos,vecbytes3); 109 | pos += 16; 110 | i += 16; 111 | } else { 112 | for (int k = 0; k < 16; k++) { 113 | bytes[pos] = bytes[i++]; 114 | pos += w3[k]; 115 | } 116 | } 117 | } 118 | while (i < howmany) { 119 | char c = bytes[i++]; 120 | if (c == '\r' || c == '\n' || c == ' ') { 121 | continue; 122 | } 123 | bytes[pos++] = c; 124 | } 125 | return pos; 126 | } 127 | 128 | 129 | 130 | 131 | 132 | static const uint8_t __attribute__((aligned(16))) mask_shuffle[256*8] = { 133 | 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 1,0,0,0,0,0,0,0, 0,1,0,0,0,0,0,0, 134 | 2,0,0,0,0,0,0,0, 0,2,0,0,0,0,0,0, 1,2,0,0,0,0,0,0, 0,1,2,0,0,0,0,0, 135 | 3,0,0,0,0,0,0,0, 0,3,0,0,0,0,0,0, 1,3,0,0,0,0,0,0, 0,1,3,0,0,0,0,0, 136 | 2,3,0,0,0,0,0,0, 0,2,3,0,0,0,0,0, 1,2,3,0,0,0,0,0, 0,1,2,3,0,0,0,0, 137 | 4,0,0,0,0,0,0,0, 0,4,0,0,0,0,0,0, 1,4,0,0,0,0,0,0, 0,1,4,0,0,0,0,0, 138 | 2,4,0,0,0,0,0,0, 0,2,4,0,0,0,0,0, 1,2,4,0,0,0,0,0, 0,1,2,4,0,0,0,0, 139 | 3,4,0,0,0,0,0,0, 0,3,4,0,0,0,0,0, 1,3,4,0,0,0,0,0, 0,1,3,4,0,0,0,0, 140 | 2,3,4,0,0,0,0,0, 0,2,3,4,0,0,0,0, 1,2,3,4,0,0,0,0, 0,1,2,3,4,0,0,0, 141 | 5,0,0,0,0,0,0,0, 0,5,0,0,0,0,0,0, 1,5,0,0,0,0,0,0, 0,1,5,0,0,0,0,0, 142 | 2,5,0,0,0,0,0,0, 0,2,5,0,0,0,0,0, 1,2,5,0,0,0,0,0, 0,1,2,5,0,0,0,0, 143 | 3,5,0,0,0,0,0,0, 0,3,5,0,0,0,0,0, 1,3,5,0,0,0,0,0, 0,1,3,5,0,0,0,0, 144 | 2,3,5,0,0,0,0,0, 0,2,3,5,0,0,0,0, 1,2,3,5,0,0,0,0, 0,1,2,3,5,0,0,0, 145 | 4,5,0,0,0,0,0,0, 0,4,5,0,0,0,0,0, 1,4,5,0,0,0,0,0, 0,1,4,5,0,0,0,0, 146 | 2,4,5,0,0,0,0,0, 0,2,4,5,0,0,0,0, 1,2,4,5,0,0,0,0, 0,1,2,4,5,0,0,0, 147 | 3,4,5,0,0,0,0,0, 0,3,4,5,0,0,0,0, 1,3,4,5,0,0,0,0, 0,1,3,4,5,0,0,0, 148 | 2,3,4,5,0,0,0,0, 0,2,3,4,5,0,0,0, 1,2,3,4,5,0,0,0, 0,1,2,3,4,5,0,0, 149 | 6,0,0,0,0,0,0,0, 0,6,0,0,0,0,0,0, 1,6,0,0,0,0,0,0, 0,1,6,0,0,0,0,0, 150 | 2,6,0,0,0,0,0,0, 0,2,6,0,0,0,0,0, 1,2,6,0,0,0,0,0, 0,1,2,6,0,0,0,0, 151 | 3,6,0,0,0,0,0,0, 0,3,6,0,0,0,0,0, 1,3,6,0,0,0,0,0, 0,1,3,6,0,0,0,0, 152 | 2,3,6,0,0,0,0,0, 0,2,3,6,0,0,0,0, 1,2,3,6,0,0,0,0, 0,1,2,3,6,0,0,0, 153 | 4,6,0,0,0,0,0,0, 0,4,6,0,0,0,0,0, 1,4,6,0,0,0,0,0, 0,1,4,6,0,0,0,0, 154 | 2,4,6,0,0,0,0,0, 0,2,4,6,0,0,0,0, 1,2,4,6,0,0,0,0, 0,1,2,4,6,0,0,0, 155 | 3,4,6,0,0,0,0,0, 0,3,4,6,0,0,0,0, 1,3,4,6,0,0,0,0, 0,1,3,4,6,0,0,0, 156 | 2,3,4,6,0,0,0,0, 0,2,3,4,6,0,0,0, 1,2,3,4,6,0,0,0, 0,1,2,3,4,6,0,0, 157 | 5,6,0,0,0,0,0,0, 0,5,6,0,0,0,0,0, 1,5,6,0,0,0,0,0, 0,1,5,6,0,0,0,0, 158 | 2,5,6,0,0,0,0,0, 0,2,5,6,0,0,0,0, 1,2,5,6,0,0,0,0, 0,1,2,5,6,0,0,0, 159 | 3,5,6,0,0,0,0,0, 0,3,5,6,0,0,0,0, 1,3,5,6,0,0,0,0, 0,1,3,5,6,0,0,0, 160 | 2,3,5,6,0,0,0,0, 0,2,3,5,6,0,0,0, 1,2,3,5,6,0,0,0, 0,1,2,3,5,6,0,0, 161 | 4,5,6,0,0,0,0,0, 0,4,5,6,0,0,0,0, 1,4,5,6,0,0,0,0, 0,1,4,5,6,0,0,0, 162 | 2,4,5,6,0,0,0,0, 0,2,4,5,6,0,0,0, 1,2,4,5,6,0,0,0, 0,1,2,4,5,6,0,0, 163 | 3,4,5,6,0,0,0,0, 0,3,4,5,6,0,0,0, 1,3,4,5,6,0,0,0, 0,1,3,4,5,6,0,0, 164 | 2,3,4,5,6,0,0,0, 0,2,3,4,5,6,0,0, 1,2,3,4,5,6,0,0, 0,1,2,3,4,5,6,0, 165 | 7,0,0,0,0,0,0,0, 0,7,0,0,0,0,0,0, 1,7,0,0,0,0,0,0, 0,1,7,0,0,0,0,0, 166 | 2,7,0,0,0,0,0,0, 0,2,7,0,0,0,0,0, 1,2,7,0,0,0,0,0, 0,1,2,7,0,0,0,0, 167 | 3,7,0,0,0,0,0,0, 0,3,7,0,0,0,0,0, 1,3,7,0,0,0,0,0, 0,1,3,7,0,0,0,0, 168 | 2,3,7,0,0,0,0,0, 0,2,3,7,0,0,0,0, 1,2,3,7,0,0,0,0, 0,1,2,3,7,0,0,0, 169 | 4,7,0,0,0,0,0,0, 0,4,7,0,0,0,0,0, 1,4,7,0,0,0,0,0, 0,1,4,7,0,0,0,0, 170 | 2,4,7,0,0,0,0,0, 0,2,4,7,0,0,0,0, 1,2,4,7,0,0,0,0, 0,1,2,4,7,0,0,0, 171 | 3,4,7,0,0,0,0,0, 0,3,4,7,0,0,0,0, 1,3,4,7,0,0,0,0, 0,1,3,4,7,0,0,0, 172 | 2,3,4,7,0,0,0,0, 0,2,3,4,7,0,0,0, 1,2,3,4,7,0,0,0, 0,1,2,3,4,7,0,0, 173 | 5,7,0,0,0,0,0,0, 0,5,7,0,0,0,0,0, 1,5,7,0,0,0,0,0, 0,1,5,7,0,0,0,0, 174 | 2,5,7,0,0,0,0,0, 0,2,5,7,0,0,0,0, 1,2,5,7,0,0,0,0, 0,1,2,5,7,0,0,0, 175 | 3,5,7,0,0,0,0,0, 0,3,5,7,0,0,0,0, 1,3,5,7,0,0,0,0, 0,1,3,5,7,0,0,0, 176 | 2,3,5,7,0,0,0,0, 0,2,3,5,7,0,0,0, 1,2,3,5,7,0,0,0, 0,1,2,3,5,7,0,0, 177 | 4,5,7,0,0,0,0,0, 0,4,5,7,0,0,0,0, 1,4,5,7,0,0,0,0, 0,1,4,5,7,0,0,0, 178 | 2,4,5,7,0,0,0,0, 0,2,4,5,7,0,0,0, 1,2,4,5,7,0,0,0, 0,1,2,4,5,7,0,0, 179 | 3,4,5,7,0,0,0,0, 0,3,4,5,7,0,0,0, 1,3,4,5,7,0,0,0, 0,1,3,4,5,7,0,0, 180 | 2,3,4,5,7,0,0,0, 0,2,3,4,5,7,0,0, 1,2,3,4,5,7,0,0, 0,1,2,3,4,5,7,0, 181 | 6,7,0,0,0,0,0,0, 0,6,7,0,0,0,0,0, 1,6,7,0,0,0,0,0, 0,1,6,7,0,0,0,0, 182 | 2,6,7,0,0,0,0,0, 0,2,6,7,0,0,0,0, 1,2,6,7,0,0,0,0, 0,1,2,6,7,0,0,0, 183 | 3,6,7,0,0,0,0,0, 0,3,6,7,0,0,0,0, 1,3,6,7,0,0,0,0, 0,1,3,6,7,0,0,0, 184 | 2,3,6,7,0,0,0,0, 0,2,3,6,7,0,0,0, 1,2,3,6,7,0,0,0, 0,1,2,3,6,7,0,0, 185 | 4,6,7,0,0,0,0,0, 0,4,6,7,0,0,0,0, 1,4,6,7,0,0,0,0, 0,1,4,6,7,0,0,0, 186 | 2,4,6,7,0,0,0,0, 0,2,4,6,7,0,0,0, 1,2,4,6,7,0,0,0, 0,1,2,4,6,7,0,0, 187 | 3,4,6,7,0,0,0,0, 0,3,4,6,7,0,0,0, 1,3,4,6,7,0,0,0, 0,1,3,4,6,7,0,0, 188 | 2,3,4,6,7,0,0,0, 0,2,3,4,6,7,0,0, 1,2,3,4,6,7,0,0, 0,1,2,3,4,6,7,0, 189 | 5,6,7,0,0,0,0,0, 0,5,6,7,0,0,0,0, 1,5,6,7,0,0,0,0, 0,1,5,6,7,0,0,0, 190 | 2,5,6,7,0,0,0,0, 0,2,5,6,7,0,0,0, 1,2,5,6,7,0,0,0, 0,1,2,5,6,7,0,0, 191 | 3,5,6,7,0,0,0,0, 0,3,5,6,7,0,0,0, 1,3,5,6,7,0,0,0, 0,1,3,5,6,7,0,0, 192 | 2,3,5,6,7,0,0,0, 0,2,3,5,6,7,0,0, 1,2,3,5,6,7,0,0, 0,1,2,3,5,6,7,0, 193 | 4,5,6,7,0,0,0,0, 0,4,5,6,7,0,0,0, 1,4,5,6,7,0,0,0, 0,1,4,5,6,7,0,0, 194 | 2,4,5,6,7,0,0,0, 0,2,4,5,6,7,0,0, 1,2,4,5,6,7,0,0, 0,1,2,4,5,6,7,0, 195 | 3,4,5,6,7,0,0,0, 0,3,4,5,6,7,0,0, 1,3,4,5,6,7,0,0, 0,1,3,4,5,6,7,0, 196 | 2,3,4,5,6,7,0,0, 0,2,3,4,5,6,7,0, 1,2,3,4,5,6,7,0, 0,1,2,3,4,5,6,7, 197 | }; 198 | 199 | // credit: Martins Mozeiko 200 | static inline size_t neon_despace_branchless(char *bytes, size_t howmany) { 201 | size_t i = 0, pos = 0; 202 | const size_t chunk_size = 16; 203 | uint8x16_t bitmask = { 1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128 }; 204 | uint8x16_t space = vdupq_n_u8(' '); 205 | for (; i + chunk_size <= howmany; i += chunk_size) { 206 | uint8x16_t vec = vld1q_u8((uint8_t*)bytes + i); 207 | 208 | uint8x16_t cmp = vcgtq_u8(vec, space); 209 | uint64x2_t mask = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vandq_u8(cmp, bitmask)))); 210 | 211 | uint8_t mlow = vgetq_lane_u8(vreinterpretq_u8_u64(mask), 0); 212 | uint8_t mhigh = vgetq_lane_u8(vreinterpretq_u8_u64(mask), 8); 213 | 214 | uint8x8_t low = vtbl1_u8(vget_low_u8(vec), vld1_u8(mask_shuffle + mlow*8)); 215 | uint8x8_t high = vtbl1_u8(vget_high_u8(vec), vld1_u8(mask_shuffle + mhigh*8)); 216 | 217 | vst1_u8((uint8_t*)bytes + pos, low); 218 | pos += __builtin_popcount(mlow); 219 | 220 | vst1_u8((uint8_t*)bytes + pos, high); 221 | pos += __builtin_popcount(mhigh); 222 | } 223 | while (i < howmany) { 224 | char c = bytes[i++]; 225 | if (c == '\r' || c == '\n' || c == ' ') { 226 | continue; 227 | } 228 | bytes[pos++] = c; 229 | } 230 | return pos; 231 | } 232 | 233 | #if defined(__aarch64__) 234 | 235 | static inline uint8x16_t is_nonwhite(uint8x16_t data) { 236 | const uint8x16_t wchar = vdupq_n_u8(' '+1); 237 | uint8x16_t isw = vcgeq_u8(data, wchar); 238 | return isw; 239 | } 240 | 241 | static inline uint16_t neonmovemask_addv(uint8x16_t input8) { 242 | uint16x8_t input = vreinterpretq_u16_u8(input8); 243 | const uint16x8_t bitmask = { 0x0101 , 0x0202, 0x0404, 0x0808, 0x1010, 0x2020, 0x4040, 0x8080 }; 244 | uint16x8_t minput = vandq_u16(input, bitmask); 245 | return vaddvq_u16(minput); 246 | } 247 | 248 | static inline uint8_t bytepopcount(uint8x16_t v) { 249 | return vaddvq_u8(vshrq_n_u8(v,7)); 250 | } 251 | 252 | #include "bigtable.h" 253 | 254 | 255 | static inline size_t neontbl_despace(char *bytes, size_t howmany) { 256 | size_t i = 0, pos = 0; 257 | const size_t chunk_size = 16 * 4 * 1; 258 | for (; i + chunk_size <= howmany; i += chunk_size) { 259 | uint8x16_t vecbytes0 = vld1q_u8((uint8_t *)bytes + i); 260 | uint8x16_t vecbytes1 = vld1q_u8((uint8_t *)bytes + i + 16); 261 | uint8x16_t vecbytes2 = vld1q_u8((uint8_t *)bytes + i + 32); 262 | uint8x16_t vecbytes3 = vld1q_u8((uint8_t *)bytes + i + 48); 263 | // as early as possible, we compute the population counts 264 | uint8x16_t w0 = is_nonwhite(vecbytes0); 265 | uint8_t numberofkeptchars0 = bytepopcount(w0); 266 | uint8x16_t shuf0 = vld1q_u8(shufmask + 16 * neonmovemask_addv(w0)); 267 | uint8x16_t reshuf0 = vqtbl1q_u8(vecbytes0,shuf0); 268 | uint8x16_t w1 = is_nonwhite(vecbytes1); 269 | uint8_t numberofkeptchars1 = bytepopcount(w1); 270 | uint8x16_t shuf1 = vld1q_u8(shufmask + 16 * neonmovemask_addv(w1)); 271 | uint8x16_t reshuf1 = vqtbl1q_u8(vecbytes1,shuf1); 272 | uint8x16_t w2 = is_nonwhite(vecbytes2); 273 | uint8_t numberofkeptchars2 = bytepopcount(w2); 274 | uint8x16_t shuf2 = vld1q_u8(shufmask + 16 * neonmovemask_addv(w2)); 275 | uint8x16_t reshuf2 = vqtbl1q_u8(vecbytes2,shuf2); 276 | uint8x16_t w3 = is_nonwhite(vecbytes3); 277 | uint8_t numberofkeptchars3 = bytepopcount(w3); 278 | uint8x16_t shuf3 = vld1q_u8(shufmask + 16 * neonmovemask_addv(w3)); 279 | uint8x16_t reshuf3 = vqtbl1q_u8(vecbytes3,shuf3); 280 | 281 | vst1q_u8((uint8_t *)bytes + pos,reshuf0); 282 | pos += numberofkeptchars0; 283 | 284 | vst1q_u8((uint8_t *)bytes + pos,reshuf1); 285 | pos += numberofkeptchars1; 286 | 287 | vst1q_u8((uint8_t *)bytes + pos,reshuf2); 288 | pos += numberofkeptchars2; 289 | 290 | vst1q_u8((uint8_t *)bytes + pos,reshuf3); 291 | pos += numberofkeptchars3; 292 | 293 | } 294 | while (i < howmany) { 295 | const unsigned char c = bytes[i++]; 296 | bytes[pos] = c; 297 | pos += (c > 32) ? 1 : 0; 298 | } 299 | return pos; 300 | } 301 | #endif // defined(__aarch64__) 302 | 303 | 304 | #endif // __ARM_NEON 305 | 306 | #endif // end of file 307 | -------------------------------------------------------------------------------- /SpacePruner/interleaved_despacer.c: -------------------------------------------------------------------------------- 1 | // 2 | // interleaved_despacer.c 3 | // SpacePruner 4 | // 5 | // Created by Derek Ledbetter on 2017-07-07. 6 | // 7 | 8 | #include "interleaved_despacer.h" 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #if __ARM_NEON 16 | #include 17 | 18 | #define PRINT_8x8(var) ((void)printf("%s = %02X %02X %02X %02X %02X %02X %02X %02X\n", #var, var[0], var[1], var[2], var[3], var[4], var[5], var[6], var[7])) 19 | 20 | size_t neon_interleaved_despace(char *bytes, size_t howmany) { 21 | const size_t blockSize = 8 * 16; 22 | const uint8_t space = 32; 23 | 24 | uint8_t* dest = (uint8_t*)bytes; 25 | const uint8_t* source = (uint8_t*)bytes; 26 | const uint8_t* sourceEnd = source + howmany; 27 | 28 | while (sourceEnd - source >= blockSize) { 29 | /* 30 | Represent indices in octal. 31 | 32 | vld4q_u8 gives 4 uint8x16_t 33 | [ 00 04 10 14 … ] 34 | [ 01 05 11 15 … ] 35 | [ 02 06 12 16 … ] 36 | [ 03 07 13 17 … ] 37 | 38 | Test > 32, then AND with these masks, repeated eight times: 39 | [ b0000_0001 b0001_0000 … ] 40 | [ b0000_0010 b0010_0000 … ] 41 | [ b0000_0100 b0100_0000 … ] 42 | [ b0000_1000 b1000_0000 … ] 43 | 44 | Then OR these together to get a single uint8x16_t: 45 | [ 0000 03–00, 07–04 0000, 0000 13–10, 17–14 0000, … ] 46 | 47 | Unzip and OR together: 48 | [ 00–07, 10–17, 20–27, 30–37, 40–47, 50–57, 60–67, 70–77 ] 49 | */ 50 | 51 | uint8x16_t goodBitsZipped[2]; 52 | for (int i = 0; i != 2; ++i) { 53 | const uint8x16_t mask0 = vreinterpretq_u8_u16(vdupq_n_u16(0x1001)); 54 | const uint8x16x4_t masks = { 55 | mask0, // 0x01 0x10 56 | vshlq_n_u8(mask0, 1), // 0x02 0x20 57 | vshlq_n_u8(mask0, 2), // 0x04 0x40 58 | vshlq_n_u8(mask0, 3), // 0x08 0x80 59 | }; 60 | 61 | uint8x16x4_t characters = vld4q_u8(source + 8 * 8 * i); 62 | 63 | uint8x16_t goodBits_i = vdupq_n_u8(0); 64 | for (int j = 0; j != 4; ++j) { 65 | uint8x16_t good = vcgtq_u8(characters.val[j], vdupq_n_u8(space)); 66 | goodBits_i = vorrq_u8(goodBits_i, vandq_u8(masks.val[j], good)); 67 | } 68 | 69 | goodBitsZipped[i] = goodBits_i; 70 | } 71 | 72 | uint8x16x2_t unzipped = vuzpq_u8(goodBitsZipped[0], goodBitsZipped[1]); 73 | uint8x16_t goodBits = vorrq_u8(unzipped.val[0], unzipped.val[1]); 74 | 75 | uint8x16_t goodCount = vcntq_u8(goodBits); 76 | 77 | uint8x16_t unzippedIndices[8]; 78 | _Pragma("unroll") for (int i = 0; i != 8; ++i) { 79 | /* 80 | Suppose that goodBits[i] ends in a 1 followed by k 0's. 81 | Then subtracting one will change those to a 0 followed by k 1's, leaving the top bits the same. 82 | ANDing will clear the lowest set bit. 83 | ANDing minus one with the complemented original will give a number that ends in k 1's, 84 | and zero elsewhere. 85 | */ 86 | uint8x16_t minusOne = vsubq_u8(goodBits, vdupq_n_u8(1)); 87 | unzippedIndices[i] = vcntq_u8(vbicq_u8(minusOne, goodBits)); 88 | goodBits = vandq_u8(goodBits, minusOne); 89 | } 90 | 91 | #define ZIP_BY_1(v0, v1) \ 92 | vzipq_u8(v0, v1).val[0], \ 93 | vzipq_u8(v0, v1).val[1], 94 | #define ZIP_BY_2(v0, v1) \ 95 | vreinterpretq_u8_u16(vzipq_u16(vreinterpretq_u16_u8(v0), vreinterpretq_u16_u8(v1)).val[0]), \ 96 | vreinterpretq_u8_u16(vzipq_u16(vreinterpretq_u16_u8(v0), vreinterpretq_u16_u8(v1)).val[1]), 97 | #define ZIP_BY_4(v0, v1) \ 98 | vreinterpretq_u8_u32(vzipq_u32(vreinterpretq_u32_u8(v0), vreinterpretq_u32_u8(v1)).val[0]), \ 99 | vreinterpretq_u8_u32(vzipq_u32(vreinterpretq_u32_u8(v0), vreinterpretq_u32_u8(v1)).val[1]), 100 | 101 | const uint8x16_t indices_01[2] = { 102 | ZIP_BY_1(unzippedIndices[0], unzippedIndices[1]) 103 | }; 104 | const uint8x16_t indices_23[2] = { 105 | ZIP_BY_1(unzippedIndices[2], unzippedIndices[3]) 106 | }; 107 | const uint8x16_t indices_45[2] = { 108 | ZIP_BY_1(unzippedIndices[4], unzippedIndices[5]) 109 | }; 110 | const uint8x16_t indices_67[2] = { 111 | ZIP_BY_1(unzippedIndices[6], unzippedIndices[7]) 112 | }; 113 | const uint8x16_t indices_0123[4] = { 114 | ZIP_BY_2(indices_01[0], indices_23[0]) 115 | ZIP_BY_2(indices_01[1], indices_23[1]) 116 | }; 117 | const uint8x16_t indices_4567[4] = { 118 | ZIP_BY_2(indices_45[0], indices_67[0]) 119 | ZIP_BY_2(indices_45[1], indices_67[1]) 120 | }; 121 | const uint8x16_t indices[8] = { 122 | ZIP_BY_4(indices_0123[0], indices_4567[0]) 123 | ZIP_BY_4(indices_0123[1], indices_4567[1]) 124 | ZIP_BY_4(indices_0123[2], indices_4567[2]) 125 | ZIP_BY_4(indices_0123[3], indices_4567[3]) 126 | }; 127 | 128 | _Pragma("unroll") for (int i = 0; i != 8; ++i) { 129 | const uint8x16_t originalCharacters = vld1q_u8(source + 16 * i); 130 | 131 | #if defined(__aarch64__) 132 | const uint8x16_t offset = vextq_u8(vdupq_n_u8(0), vdupq_n_u8(8), 8); 133 | const uint8x16_t pickedCharacters = vqtbl1q_u8(originalCharacters, vaddq_u8(indices[i], offset)); 134 | const uint8x8_t pickedCharacters1 = vget_low_u8(pickedCharacters); 135 | const uint8x8_t pickedCharacters2 = vget_high_u8(pickedCharacters); 136 | #else 137 | const uint8x8_t pickedCharacters1 = vtbl1_u8(vget_low_u8(originalCharacters), vget_low_u8(indices[i])); 138 | const uint8x8_t pickedCharacters2 = vtbl1_u8(vget_high_u8(originalCharacters), vget_high_u8(indices[i])); 139 | #endif 140 | 141 | vst1_u8(dest, pickedCharacters1); 142 | dest += vgetq_lane_u8(goodCount, 0); 143 | vst1_u8(dest, pickedCharacters2); 144 | dest += vgetq_lane_u8(goodCount, 1); 145 | goodCount = vextq_u8(goodCount, goodCount, 2); 146 | } 147 | 148 | source += blockSize; 149 | } 150 | while (source < sourceEnd) { 151 | const char c = *source++; 152 | if (c > space) { 153 | *dest++ = c; 154 | } 155 | } 156 | return (char*)dest - bytes; 157 | } 158 | 159 | #endif 160 | -------------------------------------------------------------------------------- /SpacePruner/interleaved_despacer.h: -------------------------------------------------------------------------------- 1 | // 2 | // interleaved_despacer.h 3 | // SpacePruner 4 | // 5 | // Created by Derek Ledbetter on 2017-07-07. 6 | // 7 | 8 | #ifndef interleaved_despacer_h 9 | #define interleaved_despacer_h 10 | 11 | #include 12 | #include 13 | 14 | #if __ARM_NEON 15 | size_t neon_interleaved_despace(char *bytes, size_t howmany); 16 | #endif 17 | 18 | #endif /* interleaved_despacer_h */ 19 | -------------------------------------------------------------------------------- /SpacePruner/main.m: -------------------------------------------------------------------------------- 1 | // 2 | // main.m 3 | // SpacePruner 4 | // 5 | // Created by Derek Ledbetter on 2017-07-06. 6 | // 7 | 8 | #import 9 | #import "AppDelegate.h" 10 | 11 | int main(int argc, char * argv[]) { 12 | @autoreleasepool { 13 | return UIApplicationMain(argc, argv, nil, NSStringFromClass([AppDelegate class])); 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /SpacePruner/unzipping_despacer.c: -------------------------------------------------------------------------------- 1 | // 2 | // unzipping_despacer.c 3 | // SpacePruner 4 | // 5 | // Created by Derek Ledbetter on 2017-07-17. 6 | // 7 | 8 | #include "unzipping_despacer.h" 9 | 10 | #ifdef __ARM_NEON 11 | #include 12 | 13 | size_t neon_unzipping_despace(char *bytes, size_t howmany) { 14 | const size_t blockCount = 2; 15 | const size_t blockSize = 8 * 16; 16 | const uint8_t space = 32; 17 | 18 | uint8_t* dest = (uint8_t*)bytes; 19 | const uint8_t* source = (uint8_t*)bytes; 20 | const uint8_t* sourceEnd = source + howmany; 21 | 22 | while (sourceEnd - source >= blockCount * blockSize) { 23 | /* 24 | Represent indices in octal. 25 | 26 | vld4q_u8 gives 4 uint8x16_t 27 | [ 00 04 10 14 … ] 28 | [ 01 05 11 15 … ] 29 | [ 02 06 12 16 … ] 30 | [ 03 07 13 17 … ] 31 | 32 | Test > 32, then AND with these masks, repeated eight times: 33 | [ b0000_0001 b0001_0000 … ] 34 | [ b0000_0010 b0010_0000 … ] 35 | [ b0000_0100 b0100_0000 … ] 36 | [ b0000_1000 b1000_0000 … ] 37 | 38 | Then OR these together to get a single uint8x16_t: 39 | [ 0000 03–00, 07–04 0000, 0000 13–10, 17–14 0000, … ] 40 | 41 | Unzip and OR together: 42 | [ 00–07, 10–17, 20–27, 30–37, 40–47, 50–57, 60–67, 70–77 ] 43 | */ 44 | const uint8x16_t mask0 = vreinterpretq_u8_u16(vdupq_n_u16(0x1001)); 45 | const uint8x16x4_t masks = { 46 | mask0, // 0x01 0x10 47 | vshlq_n_u8(mask0, 1), // 0x02 0x20 48 | vshlq_n_u8(mask0, 2), // 0x04 0x40 49 | vshlq_n_u8(mask0, 3), // 0x08 0x80 50 | }; 51 | 52 | uint8x16_t goodBits[blockCount]; 53 | uint8x16_t goodCount[blockCount]; 54 | for (int j = 0; j != blockCount; ++j) { 55 | uint8x16_t goodBitsZipped[2]; 56 | for (int i = 0; i != 2; ++i) { 57 | uint8x16x4_t characters = vld4q_u8(source + 8 * 8 * (2 * j + i)); 58 | 59 | uint8x16_t goodBits_ij = vdupq_n_u8(0); 60 | for (int j = 0; j != 4; ++j) { 61 | uint8x16_t good = vcgtq_u8(characters.val[j], vdupq_n_u8(space)); 62 | goodBits_ij = vorrq_u8(goodBits_ij, vandq_u8(masks.val[j], good)); 63 | } 64 | 65 | goodBitsZipped[i] = goodBits_ij; 66 | } 67 | 68 | const uint8x16x2_t unzipped = vuzpq_u8(goodBitsZipped[0], goodBitsZipped[1]); 69 | goodBits[j] = vorrq_u8(unzipped.val[0], unzipped.val[1]); 70 | goodCount[j] = vcntq_u8(goodBits[j]); 71 | } 72 | 73 | /* 74 | If we do polynomial multiplication of a sequence of bits by a sequence of all ones, 75 | then each bit in the result is the XOR of the corresponding bit in the original and all of the 76 | bits to the right. We can then pick out alternating set bits by ANDing the original with 77 | both the product and the complement of the product. 78 | 79 | Examples: 80 | 00011110 1111111 81 | 82 | 00100110 00011110 11111111 83 | × 11111111 × 11111111 × 11111111 84 | __________ __________ __________ 85 | 00100110 00011110 11111111 86 | 00100110 00011110 11111111 87 | 00100110 00011110 11111111 88 | 00100110 00011110 11111111 89 | 00100110 00011110 11111111 90 | 00100110 00011110 11111111 91 | 00100110 00011110 11111111 92 | 00100110 00011110 11111111 93 | ________________ ________________ ________________ 94 | 0001110111100010 0000101000001010 0101010101010101 95 | 96 | Truncate: 97 | 11100010 00001010 01010101 98 | AND with original: 99 | 00100010 00001010 01010101 100 | AND complement with original: 101 | 00000100 00010100 10101010 102 | 103 | We can then do twice more to get eight bytes with the set bits in consecutive order, 104 | followed by zeroes once the set bits are exhausted. 105 | */ 106 | const poly8x16_t allOnesPoly = vdupq_n_u8(~0); 107 | 108 | uint8x16_t level1[2 * blockCount]; 109 | _Pragma("unroll") for (int i = 0; i != blockCount; ++i) { 110 | const uint8x16_t source = goodBits[i]; 111 | const uint8x16_t product = vreinterpretq_u8_p8(vmulq_p8(allOnesPoly, source)); 112 | const uint8x16_t evens = vandq_u8(source, product); 113 | const uint8x16_t odds = vbicq_u8(source, product); 114 | level1[2 * i + 0] = vzipq_u8(evens, odds).val[0]; 115 | level1[2 * i + 1] = vzipq_u8(evens, odds).val[1]; 116 | } 117 | 118 | uint8x16_t level2[4 * blockCount]; 119 | _Pragma("unroll") for (int i = 0; i != 2 * blockCount; ++i) { 120 | const uint8x16_t source = level1[i]; 121 | const uint8x16_t product = vreinterpretq_u8_p8(vmulq_p8(allOnesPoly, source)); 122 | const uint8x16_t evens = vandq_u8(source, product); 123 | const uint8x16_t odds = vbicq_u8(source, product); 124 | level2[2*i + 0] = vreinterpretq_u8_u16(vzipq_u16(vreinterpretq_u16_u8(evens), vreinterpretq_u16_u8(odds)).val[0]); 125 | level2[2*i + 1] = vreinterpretq_u8_u16(vzipq_u16(vreinterpretq_u16_u8(evens), vreinterpretq_u16_u8(odds)).val[1]); 126 | } 127 | 128 | uint8x16_t indices[8 * blockCount]; 129 | _Pragma("unroll") for (int i = 0; i != 4 * blockCount; ++i) { 130 | // There's at most 2 set bits left, so just read their positions directly. 131 | const uint8x16_t source = level2[i]; 132 | const uint8x16_t low = vcntq_u8(vbicq_u8(vsubq_u8(source, vdupq_n_u8(1)), source)); 133 | const uint8x16_t high = veorq_u8(vdupq_n_u8(7), vclzq_u8(source)); 134 | indices[2*i + 0] = vreinterpretq_u8_u32(vzipq_u32(vreinterpretq_u32_u8(low), vreinterpretq_u32_u8(high)).val[0]); 135 | indices[2*i + 1] = vreinterpretq_u8_u32(vzipq_u32(vreinterpretq_u32_u8(low), vreinterpretq_u32_u8(high)).val[1]); 136 | } 137 | 138 | _Pragma("unroll") for (int j = 0; j != blockCount; ++j) { 139 | uint8x16_t goodCount_j = goodCount[j]; 140 | 141 | _Pragma("unroll") for (int i = 0; i != 8; ++i) { 142 | const uint8x16_t originalCharacters = vld1q_u8(source + 16 * (8 * j + i)); 143 | const uint8x8_t pickedCharacters1 = vtbl1_u8(vget_low_u8(originalCharacters), vget_low_u8(indices[8 * j + i])); 144 | const uint8x8_t pickedCharacters2 = vtbl1_u8(vget_high_u8(originalCharacters), vget_high_u8(indices[8 * j + i])); 145 | 146 | vst1_u8(dest, pickedCharacters1); 147 | dest += vgetq_lane_u8(goodCount_j, 0); 148 | vst1_u8(dest, pickedCharacters2); 149 | dest += vgetq_lane_u8(goodCount_j, 1); 150 | goodCount_j = vextq_u8(goodCount_j, goodCount_j, 2); 151 | } 152 | } 153 | 154 | source += blockCount * blockSize; 155 | } 156 | while (source < sourceEnd) { 157 | const char c = *source++; 158 | if (c > space) { 159 | *dest++ = c; 160 | } 161 | } 162 | return (char*)dest - bytes; 163 | } 164 | 165 | #endif // __ARM_NEON 166 | -------------------------------------------------------------------------------- /SpacePruner/unzipping_despacer.h: -------------------------------------------------------------------------------- 1 | // 2 | // unzipping_despacer.h 3 | // SpacePruner 4 | // 5 | // Created by Derek Ledbetter on 2017-07-17. 6 | // 7 | 8 | #ifndef unzipping_despacer_h 9 | #define unzipping_despacer_h 10 | 11 | #include 12 | #include 13 | 14 | #ifdef __ARM_NEON 15 | size_t neon_unzipping_despace(char *bytes, size_t howmany); 16 | #endif 17 | 18 | #endif /* unzipping_despacer_h */ 19 | --------------------------------------------------------------------------------