├── .gitignore ├── LICENSE.txt ├── README.md ├── Tokenizing.xcodeproj ├── project.pbxproj └── project.xcworkspace │ └── contents.xcworkspacedata ├── Tokenizing ├── CharacterBasedTokenizer.swift ├── Measuring.swift ├── SampleData.swift ├── Token.swift ├── Tokenizing.swift ├── TokenizingError.swift ├── UnicodeScalarBasedTokenizer.swift └── main.swift └── emit_sil.sh /.gitignore: -------------------------------------------------------------------------------- 1 | # Xcode 2 | # 3 | # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore 4 | 5 | ## Build generated 6 | build/ 7 | DerivedData/ 8 | 9 | ## Various settings 10 | *.pbxuser 11 | !default.pbxuser 12 | *.mode1v3 13 | !default.mode1v3 14 | *.mode2v3 15 | !default.mode2v3 16 | *.perspectivev3 17 | !default.perspectivev3 18 | xcuserdata/ 19 | 20 | ## Other 21 | *.moved-aside 22 | *.xcuserstate 23 | 24 | ## Obj-C/Swift specific 25 | *.hmap 26 | *.ipa 27 | *.dSYM.zip 28 | *.dSYM 29 | 30 | ## Playgrounds 31 | timeline.xctimeline 32 | playground.xcworkspace 33 | 34 | # Swift Package Manager 35 | # 36 | # Add this line if you want to avoid checking in source code from Swift Package Manager dependencies. 37 | # Packages/ 38 | .build/ 39 | 40 | # CocoaPods 41 | # 42 | # We recommend against adding the Pods directory to your .gitignore. However 43 | # you should judge for yourself, the pros and cons are mentioned at: 44 | # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control 45 | # 46 | # Pods/ 47 | 48 | # Carthage 49 | # 50 | # Add this line if you want to avoid checking in source code from Carthage dependencies. 51 | # Carthage/Checkouts 52 | 53 | Carthage/Build 54 | 55 | # fastlane 56 | # 57 | # It is recommended to not store the screenshots in the git repo. Instead, use fastlane to re-generate the 58 | # screenshots whenever they are needed. 59 | # For more information about the recommended setup visit: 60 | # https://github.com/fastlane/fastlane/blob/master/fastlane/docs/Gitignore.md 61 | 62 | fastlane/report.xml 63 | fastlane/Preview.html 64 | fastlane/screenshots 65 | fastlane/test_output 66 | 67 | # Emitted SIL 68 | *.sil.txt 69 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Strings, characters, and performance in Swift—a deep dive 2 | 3 | This is the companion code for the article that can be found here: 4 | 5 | https://medium.com/@tonyallevato/strings-characters-and-performance-in-swift-a-deep-dive-b7b5bde58d53 6 | 7 | This code was written for Swift 3.0.1 and the performance implications 8 | described in the article apply to that version of the language. 9 | -------------------------------------------------------------------------------- /Tokenizing.xcodeproj/project.pbxproj: -------------------------------------------------------------------------------- 1 | // !$*UTF8*$! 2 | { 3 | archiveVersion = 1; 4 | classes = { 5 | }; 6 | objectVersion = 46; 7 | objects = { 8 | 9 | /* Begin PBXBuildFile section */ 10 | AA34E5E31DF120D5006CE7AE /* main.swift in Sources */ = {isa = PBXBuildFile; fileRef = AA34E5E21DF120D5006CE7AE /* main.swift */; }; 11 | AAACB1861E0D99F2005FE286 /* Token.swift in Sources */ = {isa = PBXBuildFile; fileRef = AAACB1851E0D99F2005FE286 /* Token.swift */; }; 12 | AAACB1881E0D99FB005FE286 /* TokenizingError.swift in Sources */ = {isa = PBXBuildFile; fileRef = AAACB1871E0D99FB005FE286 /* TokenizingError.swift */; }; 13 | AAACB18A1E0D9A05005FE286 /* Tokenizing.swift in Sources */ = {isa = PBXBuildFile; fileRef = AAACB1891E0D9A05005FE286 /* Tokenizing.swift */; }; 14 | AAACB18C1E0D9A12005FE286 /* CharacterBasedTokenizer.swift in Sources */ = {isa = PBXBuildFile; fileRef = AAACB18B1E0D9A12005FE286 /* CharacterBasedTokenizer.swift */; }; 15 | AAACB18E1E0D9B39005FE286 /* SampleData.swift in Sources */ = {isa = PBXBuildFile; fileRef = AAACB18D1E0D9B39005FE286 /* SampleData.swift */; }; 16 | AAACB1901E0D9BC7005FE286 /* Measuring.swift in Sources */ = {isa = PBXBuildFile; fileRef = AAACB18F1E0D9BC7005FE286 /* Measuring.swift */; }; 17 | AAACB1941E124099005FE286 /* UnicodeScalarBasedTokenizer.swift in Sources */ = {isa = PBXBuildFile; fileRef = AAACB1931E124099005FE286 /* UnicodeScalarBasedTokenizer.swift */; }; 18 | /* End PBXBuildFile section */ 19 | 20 | /* Begin PBXCopyFilesBuildPhase section */ 21 | AA34E5DD1DF120D5006CE7AE /* CopyFiles */ = { 22 | isa = PBXCopyFilesBuildPhase; 23 | buildActionMask = 2147483647; 24 | dstPath = /usr/share/man/man1/; 25 | dstSubfolderSpec = 0; 26 | files = ( 27 | ); 28 | runOnlyForDeploymentPostprocessing = 1; 29 | }; 30 | /* End PBXCopyFilesBuildPhase section */ 31 | 32 | /* Begin PBXFileReference section */ 33 | AA34E5DF1DF120D5006CE7AE /* Tokenizing */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = Tokenizing; sourceTree = BUILT_PRODUCTS_DIR; }; 34 | AA34E5E21DF120D5006CE7AE /* main.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = main.swift; sourceTree = ""; }; 35 | AAACB1851E0D99F2005FE286 /* Token.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Token.swift; sourceTree = ""; }; 36 | AAACB1871E0D99FB005FE286 /* TokenizingError.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TokenizingError.swift; sourceTree = ""; }; 37 | AAACB1891E0D9A05005FE286 /* Tokenizing.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Tokenizing.swift; sourceTree = ""; }; 38 | AAACB18B1E0D9A12005FE286 /* CharacterBasedTokenizer.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = CharacterBasedTokenizer.swift; sourceTree = ""; }; 39 | AAACB18D1E0D9B39005FE286 /* SampleData.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SampleData.swift; sourceTree = ""; }; 40 | AAACB18F1E0D9BC7005FE286 /* Measuring.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; path = Measuring.swift; sourceTree = ""; }; 41 | AAACB1931E124099005FE286 /* UnicodeScalarBasedTokenizer.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = UnicodeScalarBasedTokenizer.swift; sourceTree = ""; }; 42 | /* End PBXFileReference section */ 43 | 44 | /* Begin PBXFrameworksBuildPhase section */ 45 | AA34E5DC1DF120D5006CE7AE /* Frameworks */ = { 46 | isa = PBXFrameworksBuildPhase; 47 | buildActionMask = 2147483647; 48 | files = ( 49 | ); 50 | runOnlyForDeploymentPostprocessing = 0; 51 | }; 52 | /* End PBXFrameworksBuildPhase section */ 53 | 54 | /* Begin PBXGroup section */ 55 | AA34E5D61DF120D5006CE7AE = { 56 | isa = PBXGroup; 57 | children = ( 58 | AA34E5E11DF120D5006CE7AE /* Tokenizing */, 59 | AA34E5E01DF120D5006CE7AE /* Products */, 60 | ); 61 | sourceTree = ""; 62 | }; 63 | AA34E5E01DF120D5006CE7AE /* Products */ = { 64 | isa = PBXGroup; 65 | children = ( 66 | AA34E5DF1DF120D5006CE7AE /* Tokenizing */, 67 | ); 68 | name = Products; 69 | sourceTree = ""; 70 | }; 71 | AA34E5E11DF120D5006CE7AE /* Tokenizing */ = { 72 | isa = PBXGroup; 73 | children = ( 74 | AAACB18B1E0D9A12005FE286 /* CharacterBasedTokenizer.swift */, 75 | AA34E5E21DF120D5006CE7AE /* main.swift */, 76 | AAACB18F1E0D9BC7005FE286 /* Measuring.swift */, 77 | AAACB18D1E0D9B39005FE286 /* SampleData.swift */, 78 | AAACB1851E0D99F2005FE286 /* Token.swift */, 79 | AAACB1891E0D9A05005FE286 /* Tokenizing.swift */, 80 | AAACB1871E0D99FB005FE286 /* TokenizingError.swift */, 81 | AAACB1931E124099005FE286 /* UnicodeScalarBasedTokenizer.swift */, 82 | ); 83 | path = Tokenizing; 84 | sourceTree = ""; 85 | }; 86 | /* End PBXGroup section */ 87 | 88 | /* Begin PBXNativeTarget section */ 89 | AA34E5DE1DF120D5006CE7AE /* Tokenizing */ = { 90 | isa = PBXNativeTarget; 91 | buildConfigurationList = AA34E5E61DF120D5006CE7AE /* Build configuration list for PBXNativeTarget "Tokenizing" */; 92 | buildPhases = ( 93 | AA34E5DB1DF120D5006CE7AE /* Sources */, 94 | AA34E5DC1DF120D5006CE7AE /* Frameworks */, 95 | AA34E5DD1DF120D5006CE7AE /* CopyFiles */, 96 | ); 97 | buildRules = ( 98 | ); 99 | dependencies = ( 100 | ); 101 | name = Tokenizing; 102 | productName = Tokenizing; 103 | productReference = AA34E5DF1DF120D5006CE7AE /* Tokenizing */; 104 | productType = "com.apple.product-type.tool"; 105 | }; 106 | /* End PBXNativeTarget section */ 107 | 108 | /* Begin PBXProject section */ 109 | AA34E5D71DF120D5006CE7AE /* Project object */ = { 110 | isa = PBXProject; 111 | attributes = { 112 | LastSwiftUpdateCheck = 0810; 113 | LastUpgradeCheck = 0810; 114 | ORGANIZATIONNAME = "Tony Allevato"; 115 | TargetAttributes = { 116 | AA34E5DE1DF120D5006CE7AE = { 117 | CreatedOnToolsVersion = 8.1; 118 | DevelopmentTeam = BM6BV7V52K; 119 | ProvisioningStyle = Automatic; 120 | }; 121 | }; 122 | }; 123 | buildConfigurationList = AA34E5DA1DF120D5006CE7AE /* Build configuration list for PBXProject "Tokenizing" */; 124 | compatibilityVersion = "Xcode 3.2"; 125 | developmentRegion = English; 126 | hasScannedForEncodings = 0; 127 | knownRegions = ( 128 | en, 129 | ); 130 | mainGroup = AA34E5D61DF120D5006CE7AE; 131 | productRefGroup = AA34E5E01DF120D5006CE7AE /* Products */; 132 | projectDirPath = ""; 133 | projectRoot = ""; 134 | targets = ( 135 | AA34E5DE1DF120D5006CE7AE /* Tokenizing */, 136 | ); 137 | }; 138 | /* End PBXProject section */ 139 | 140 | /* Begin PBXSourcesBuildPhase section */ 141 | AA34E5DB1DF120D5006CE7AE /* Sources */ = { 142 | isa = PBXSourcesBuildPhase; 143 | buildActionMask = 2147483647; 144 | files = ( 145 | AAACB18C1E0D9A12005FE286 /* CharacterBasedTokenizer.swift in Sources */, 146 | AAACB18E1E0D9B39005FE286 /* SampleData.swift in Sources */, 147 | AA34E5E31DF120D5006CE7AE /* main.swift in Sources */, 148 | AAACB18A1E0D9A05005FE286 /* Tokenizing.swift in Sources */, 149 | AAACB1941E124099005FE286 /* UnicodeScalarBasedTokenizer.swift in Sources */, 150 | AAACB1861E0D99F2005FE286 /* Token.swift in Sources */, 151 | AAACB1901E0D9BC7005FE286 /* Measuring.swift in Sources */, 152 | AAACB1881E0D99FB005FE286 /* TokenizingError.swift in Sources */, 153 | ); 154 | runOnlyForDeploymentPostprocessing = 0; 155 | }; 156 | /* End PBXSourcesBuildPhase section */ 157 | 158 | /* Begin XCBuildConfiguration section */ 159 | AA34E5E41DF120D5006CE7AE /* Debug */ = { 160 | isa = XCBuildConfiguration; 161 | buildSettings = { 162 | ALWAYS_SEARCH_USER_PATHS = NO; 163 | CLANG_ANALYZER_NONNULL = YES; 164 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; 165 | CLANG_CXX_LIBRARY = "libc++"; 166 | CLANG_ENABLE_MODULES = YES; 167 | CLANG_ENABLE_OBJC_ARC = YES; 168 | CLANG_WARN_BOOL_CONVERSION = YES; 169 | CLANG_WARN_CONSTANT_CONVERSION = YES; 170 | CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; 171 | CLANG_WARN_DOCUMENTATION_COMMENTS = YES; 172 | CLANG_WARN_EMPTY_BODY = YES; 173 | CLANG_WARN_ENUM_CONVERSION = YES; 174 | CLANG_WARN_INFINITE_RECURSION = YES; 175 | CLANG_WARN_INT_CONVERSION = YES; 176 | CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; 177 | CLANG_WARN_SUSPICIOUS_MOVES = YES; 178 | CLANG_WARN_UNREACHABLE_CODE = YES; 179 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 180 | CODE_SIGN_IDENTITY = "-"; 181 | COPY_PHASE_STRIP = NO; 182 | DEBUG_INFORMATION_FORMAT = dwarf; 183 | ENABLE_STRICT_OBJC_MSGSEND = YES; 184 | ENABLE_TESTABILITY = YES; 185 | GCC_C_LANGUAGE_STANDARD = gnu99; 186 | GCC_DYNAMIC_NO_PIC = NO; 187 | GCC_NO_COMMON_BLOCKS = YES; 188 | GCC_OPTIMIZATION_LEVEL = 0; 189 | GCC_PREPROCESSOR_DEFINITIONS = ( 190 | "DEBUG=1", 191 | "$(inherited)", 192 | ); 193 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES; 194 | GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; 195 | GCC_WARN_UNDECLARED_SELECTOR = YES; 196 | GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; 197 | GCC_WARN_UNUSED_FUNCTION = YES; 198 | GCC_WARN_UNUSED_VARIABLE = YES; 199 | MACOSX_DEPLOYMENT_TARGET = 10.12; 200 | MTL_ENABLE_DEBUG_INFO = YES; 201 | ONLY_ACTIVE_ARCH = YES; 202 | SDKROOT = macosx; 203 | SWIFT_OPTIMIZATION_LEVEL = "-Onone"; 204 | }; 205 | name = Debug; 206 | }; 207 | AA34E5E51DF120D5006CE7AE /* Release */ = { 208 | isa = XCBuildConfiguration; 209 | buildSettings = { 210 | ALWAYS_SEARCH_USER_PATHS = NO; 211 | CLANG_ANALYZER_NONNULL = YES; 212 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; 213 | CLANG_CXX_LIBRARY = "libc++"; 214 | CLANG_ENABLE_MODULES = YES; 215 | CLANG_ENABLE_OBJC_ARC = YES; 216 | CLANG_WARN_BOOL_CONVERSION = YES; 217 | CLANG_WARN_CONSTANT_CONVERSION = YES; 218 | CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; 219 | CLANG_WARN_DOCUMENTATION_COMMENTS = YES; 220 | CLANG_WARN_EMPTY_BODY = YES; 221 | CLANG_WARN_ENUM_CONVERSION = YES; 222 | CLANG_WARN_INFINITE_RECURSION = YES; 223 | CLANG_WARN_INT_CONVERSION = YES; 224 | CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; 225 | CLANG_WARN_SUSPICIOUS_MOVES = YES; 226 | CLANG_WARN_UNREACHABLE_CODE = YES; 227 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 228 | CODE_SIGN_IDENTITY = "-"; 229 | COPY_PHASE_STRIP = NO; 230 | DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; 231 | ENABLE_NS_ASSERTIONS = NO; 232 | ENABLE_STRICT_OBJC_MSGSEND = YES; 233 | GCC_C_LANGUAGE_STANDARD = gnu99; 234 | GCC_NO_COMMON_BLOCKS = YES; 235 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES; 236 | GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; 237 | GCC_WARN_UNDECLARED_SELECTOR = YES; 238 | GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; 239 | GCC_WARN_UNUSED_FUNCTION = YES; 240 | GCC_WARN_UNUSED_VARIABLE = YES; 241 | MACOSX_DEPLOYMENT_TARGET = 10.12; 242 | MTL_ENABLE_DEBUG_INFO = NO; 243 | SDKROOT = macosx; 244 | }; 245 | name = Release; 246 | }; 247 | AA34E5E71DF120D5006CE7AE /* Debug */ = { 248 | isa = XCBuildConfiguration; 249 | buildSettings = { 250 | DEVELOPMENT_TEAM = BM6BV7V52K; 251 | PRODUCT_NAME = "$(TARGET_NAME)"; 252 | SWIFT_OPTIMIZATION_LEVEL = "-O"; 253 | SWIFT_VERSION = 3.0; 254 | }; 255 | name = Debug; 256 | }; 257 | AA34E5E81DF120D5006CE7AE /* Release */ = { 258 | isa = XCBuildConfiguration; 259 | buildSettings = { 260 | DEVELOPMENT_TEAM = BM6BV7V52K; 261 | PRODUCT_NAME = "$(TARGET_NAME)"; 262 | SWIFT_VERSION = 3.0; 263 | }; 264 | name = Release; 265 | }; 266 | /* End XCBuildConfiguration section */ 267 | 268 | /* Begin XCConfigurationList section */ 269 | AA34E5DA1DF120D5006CE7AE /* Build configuration list for PBXProject "Tokenizing" */ = { 270 | isa = XCConfigurationList; 271 | buildConfigurations = ( 272 | AA34E5E41DF120D5006CE7AE /* Debug */, 273 | AA34E5E51DF120D5006CE7AE /* Release */, 274 | ); 275 | defaultConfigurationIsVisible = 0; 276 | defaultConfigurationName = Release; 277 | }; 278 | AA34E5E61DF120D5006CE7AE /* Build configuration list for PBXNativeTarget "Tokenizing" */ = { 279 | isa = XCConfigurationList; 280 | buildConfigurations = ( 281 | AA34E5E71DF120D5006CE7AE /* Debug */, 282 | AA34E5E81DF120D5006CE7AE /* Release */, 283 | ); 284 | defaultConfigurationIsVisible = 0; 285 | defaultConfigurationName = Release; 286 | }; 287 | /* End XCConfigurationList section */ 288 | }; 289 | rootObject = AA34E5D71DF120D5006CE7AE /* Project object */; 290 | } 291 | -------------------------------------------------------------------------------- /Tokenizing.xcodeproj/project.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /Tokenizing/CharacterBasedTokenizer.swift: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Tony Allevato 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | 16 | /// A tokenizer that works with the character views of strings. 17 | struct CharacterBasedTokenizer: Tokenizing { 18 | 19 | /// Used during scanning to return each character in the string. 20 | private var iterator: String.CharacterView.Iterator 21 | 22 | /// If non-nil, this character will be returned by the next call to 23 | /// `nextCharacter`. This allows the tokenizer to detect the end of a token 24 | /// based on characters that are not part of that token, and then push the 25 | /// character back so that it can be read as the first character of the next 26 | /// token. 27 | private var pushedBackCharacter: Character? 28 | 29 | /// Creates a new character-based tokenizer that scans the given string. 30 | /// 31 | /// - Parameter text: The text to tokenize. 32 | init(text: String) { 33 | iterator = text.characters.makeIterator() 34 | } 35 | 36 | /// Returns the next token from the input string. 37 | /// 38 | /// - Returns: The next token from the input string, or nil if the end of the 39 | /// string has been reached. 40 | /// - Throws: A `TokenizeError` if an unexpected error occurred during 41 | /// tokenization, like a malformed integer or unrecognized character. 42 | mutating func nextToken() throws -> Token? { 43 | while let ch = nextCharacter() { 44 | switch ch { 45 | case " ", "\n", "\r", "\t": 46 | // Ignore whitespace. 47 | continue 48 | case ",": 49 | return .comma 50 | case ";": 51 | return .semicolon 52 | case "0"..."9": 53 | return try integerToken(startingWith: ch) 54 | case "\"": 55 | return try stringToken() 56 | default: 57 | throw TokenizingError.unrecognizedCharacter 58 | } 59 | } 60 | return nil 61 | } 62 | 63 | /// Returns the next character to process from the input string. 64 | /// 65 | /// If `pushedBackCharacter` is non-nil, that character will be returned and 66 | /// then that property is cleared. Otherwise, the iterator's next character is 67 | /// returned. 68 | /// 69 | /// - Returns: The next character to process, or nil if the end of the string 70 | /// has been reached. 71 | private mutating func nextCharacter() -> Character? { 72 | if let next = pushedBackCharacter { 73 | pushedBackCharacter = nil 74 | return next 75 | } 76 | return iterator.next() 77 | } 78 | 79 | /// Scans the remainder of an integer token and returns it. 80 | /// 81 | /// - Parameter first: The first character of the integer that has already 82 | /// been scanned. 83 | /// - Returns: A `Token.integer` whose associated value is the integer that 84 | /// was scanned. 85 | /// - Throws: `TokenizeError.malformedInteger` if the scanned token text could 86 | /// not be converted to an integer (for example, if it was too large). 87 | private mutating func integerToken( 88 | startingWith first: Character 89 | ) throws -> Token { 90 | var tokenText = String(first) 91 | 92 | loop: while let ch = nextCharacter() { 93 | switch ch { 94 | case "0"..."9": 95 | tokenText.append(ch) 96 | default: 97 | pushedBackCharacter = ch 98 | break loop 99 | } 100 | } 101 | 102 | guard let value = Int(tokenText) else { 103 | throw TokenizingError.malformedInteger 104 | } 105 | return .integer(value) 106 | } 107 | 108 | /// Scans the remainder of a quoted string token and returns it. 109 | /// 110 | /// - Returns: A `Token.string` whose associated value is the string that was 111 | /// quoted (without the surrounding quotes). 112 | /// - Throws: `TokenizeError.unterminatedString` if the end of the input was 113 | /// reached without seeing a terminating quote. 114 | private mutating func stringToken() throws -> Token { 115 | var tokenText = String() 116 | 117 | while let ch = nextCharacter() { 118 | switch ch { 119 | case "\"": 120 | return .string(tokenText) 121 | default: 122 | tokenText.append(ch) 123 | } 124 | } 125 | 126 | throw TokenizingError.unterminatedString 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /Tokenizing/Measuring.swift: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Tony Allevato 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | 16 | import Darwin 17 | 18 | /// The number of measurements that should be taken in each call to `measure`. 19 | private let numberOfMeasurements = 5 20 | 21 | /// The number of nanoseconds in one millisecond (10^6). 22 | private let nanosPerMilli = 1_000_000 23 | 24 | /// Executes a block a number of times, measuring its execution time, and then 25 | /// prints a summary of the results (its mean and standard deviation). 26 | /// 27 | /// - Parameter description: The description of the block being measured, which 28 | /// is printed in the summary output. 29 | /// - Parameter block: The block that should be executed and measured. 30 | /// - Throws: Any error that the measured block may throw. 31 | func measure(_ description: String, block: () throws -> Void) rethrows { 32 | var timebase = mach_timebase_info_data_t() 33 | guard mach_timebase_info(&timebase) == KERN_SUCCESS else { 34 | fatalError("mach_timebase_info failed; not much we can do about that.") 35 | } 36 | 37 | print("\(description):") 38 | 39 | // Run the block a couple times to "prime" the benchmark; this avoids the 40 | // first runs being outliers due to data that might be loaded lazily. 41 | for _ in 0..<2 { 42 | _ = try block() 43 | } 44 | 45 | var timings = [Double]() 46 | 47 | for _ in 0.. (mean: Double, stddev: Double) { 72 | var sum: Double = 0 73 | var sqsum: Double = 0 74 | for timing in timings { 75 | sum += timing 76 | sqsum += timing * timing 77 | } 78 | let n = Double(timings.count) 79 | let mean = sum / n 80 | let variance = sqsum / n - mean * mean 81 | return (mean: mean, stddev: sqrt(variance)) 82 | } 83 | -------------------------------------------------------------------------------- /Tokenizing/SampleData.swift: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Tony Allevato 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | 16 | /// Contrived sample data for our tokenizing measurements. 17 | let stateDataString = 18 | "\"California\", 39250017, \"Sacramento\";\n" + 19 | "\"Texas\", 27862596, \"Austin\";\n" + 20 | "\"Florida\", 20612439, \"Tallahassee\";\n" + 21 | "\"New York\", 19745289, \"Albany\";\n" + 22 | "\"Illinois\", 12801539, \"Springfield\";\n" + 23 | "\"Pennsylvania\", 12784227, \"Harrisburg\";\n" + 24 | "\"Ohio\", 11614373, \"Columbus\";\n" + 25 | "\"Georgia\", 10310371, \"Atlanta\";\n" + 26 | "\"North Carolina\", 10146788, \"Raleigh\";\n" + 27 | "\"Michigan\", 9928301, \"Lansing\";\n" + 28 | "\"New Jersey\", 8944469, \"Trenton\";\n" + 29 | "\"Virginia\", 8411808, \"Richmond\";\n" + 30 | "\"Washington\", 7288000, \"Olympia\";\n" + 31 | "\"Arizona\", 6931071, \"Phoenix\";\n" + 32 | "\"Massachusetts\", 6811779, \"Boston\";\n" + 33 | "\"Tennessee\", 6651194, \"Nashville\";\n" + 34 | "\"Indiana\", 6633053, \"Indianapolis\";\n" + 35 | "\"Missouri\", 6093000, \"Jefferson City\";\n" + 36 | "\"Maryland\", 6016447, \"Annapolis\";\n" + 37 | "\"Wisconsin\", 5778708, \"Madison\";\n" + 38 | "\"Colorado\", 5540545, \"Denver\";\n" + 39 | "\"Minnesota\", 5519952, \"Saint Paul\";\n" + 40 | "\"South Carolina\", 4961119, \"Columbia\";\n" + 41 | "\"Alabama\", 4863300, \"Montgomery\";\n" + 42 | "\"Louisiana\", 4681666, \"Baton Rouge\";\n" + 43 | "\"Kentucky\", 4436974, \"Frankfort\";\n" + 44 | "\"Oregon\", 4093465, \"Salem\";\n" + 45 | "\"Oklahoma\", 3923561, \"Oklahoma City\";\n" + 46 | "\"Connecticut\", 3576452, \"Hartford\";\n" + 47 | "\"Iowa\", 3134693, \"Des Moines\";\n" + 48 | "\"Utah\", 3051217, \"Salt Lake City\";\n" + 49 | "\"Mississippi\", 2988726, \"Jackson\";\n" + 50 | "\"Arkansas\", 2988248, \"Little Rock\";\n" + 51 | "\"Nevada\", 2940058, \"Carson City\";\n" + 52 | "\"Kansas\", 2907289, \"Topeka\";\n" + 53 | "\"New Mexico\", 2081015, \"Santa Fe\";\n" + 54 | "\"Nebraska\", 1907116, \"Lincoln\";\n" + 55 | "\"West Virginia\", 1831102, \"Charleston\";\n" + 56 | "\"Idaho\", 1683140, \"Boise\";\n" + 57 | "\"Hawaii\", 1428557, \"Honolulu\";\n" + 58 | "\"New Hampshire\", 1334795, \"Concord\";\n" + 59 | "\"Maine\", 1331479, \"Augusta\";\n" + 60 | "\"Rhode Island\", 1056426, \"Providence\";\n" + 61 | "\"Montana\", 1042520, \"Helena\";\n" + 62 | "\"Delaware\", 945934, \"Dover\";\n" + 63 | "\"South Dakota\", 865454, \"Pierre\";\n" + 64 | "\"North Dakota\", 757952, \"Bismarck\";\n" + 65 | "\"Alaska\", 738432, \"Juneau\";\n" + 66 | "\"Vermont\", 624594, \"Montpelier\";\n" + 67 | "\"Wyoming\", 585501, \"Cheyenne\";\n" 68 | -------------------------------------------------------------------------------- /Tokenizing/Token.swift: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Tony Allevato 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | 16 | /// Kinds of tokens recognized and returned by the tokenizer. 17 | enum Token { 18 | 19 | /// A quoted string. The string associated with this value does *not* contain 20 | /// the surrounding quotes from the original text. 21 | case string(String) 22 | 23 | /// An integer. 24 | case integer(Int) 25 | 26 | /// A comma. 27 | case comma 28 | 29 | /// A semicolon. 30 | case semicolon 31 | } 32 | -------------------------------------------------------------------------------- /Tokenizing/Tokenizing.swift: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Tony Allevato 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | 16 | /// The common interface provided by all tokenizers. 17 | protocol Tokenizing { 18 | 19 | /// Creates a new tokenizer that scans the given string. 20 | /// 21 | /// - Parameter text: The text to tokenize. 22 | init(text: String) 23 | 24 | /// Returns the next token from the input string. 25 | /// 26 | /// - Returns: The next token from the input string, or nil if the end of the 27 | /// string has been reached. 28 | /// - Throws: A `TokenizeError` if an unexpected error occurred during 29 | /// tokenization, like a malformed integer or unrecognized character. 30 | mutating func nextToken() throws -> Token? 31 | } 32 | -------------------------------------------------------------------------------- /Tokenizing/TokenizingError.swift: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Tony Allevato 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | 16 | /// Errors thrown a problem is encountered while tokenizing a string. 17 | enum TokenizingError: Error { 18 | 19 | /// A token that looked like an integer could not be converted to `Int`. 20 | case malformedInteger 21 | 22 | /// A character was found in the input string that is not recognized by the 23 | /// tokenizer. 24 | case unrecognizedCharacter 25 | 26 | /// The end of the input string was reached while scanning a quoted string. 27 | case unterminatedString 28 | } 29 | -------------------------------------------------------------------------------- /Tokenizing/UnicodeScalarBasedTokenizer.swift: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Tony Allevato 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | 16 | /// A tokenizer that works with the Unicode scalar views of strings. 17 | struct UnicodeScalarBasedTokenizer: Tokenizing { 18 | 19 | /// Used during scanning to return each Unicode scalar in the string. 20 | private var iterator: String.UnicodeScalarView.Iterator 21 | 22 | /// If non-nil, this scalar will be returned by the next call to `nextScalar`. 23 | /// This allows the tokenizer to detect the end of a token based on scalars 24 | /// that are not part of that token, and then push the scalar back so that it 25 | /// can be read as the first scalar of the next token. 26 | private var pushedBackScalar: UnicodeScalar? 27 | 28 | /// Creates a new `UnicodeScalar`-based tokenizer that scans the given string. 29 | /// 30 | /// - Parameter text: The text to tokenize. 31 | init(text: String) { 32 | iterator = text.unicodeScalars.makeIterator() 33 | } 34 | 35 | /// Returns the next token from the input string. 36 | /// 37 | /// - Returns: The next token from the input string, or nil if the end of the 38 | /// string has been reached. 39 | /// - Throws: A `TokenizeError` if an unexpected error occurred during 40 | /// tokenization, like a malformed integer or unrecognized character. 41 | mutating func nextToken() throws -> Token? { 42 | while let ch = nextScalar() { 43 | switch ch { 44 | case " ", "\n", "\r", "\t": 45 | // Ignore whitespace. 46 | continue 47 | case ",": 48 | return .comma 49 | case ";": 50 | return .semicolon 51 | case "0"..."9": 52 | return try integerToken(startingWith: ch) 53 | case "\"": 54 | return try stringToken() 55 | default: 56 | throw TokenizingError.unrecognizedCharacter 57 | } 58 | } 59 | return nil 60 | } 61 | 62 | /// Returns the next scalar to process from the input string. 63 | /// 64 | /// If `pushedBackScalar` is non-nil, that scalar will be returned and then 65 | /// that property is cleared. Otherwise, the iterator's next scalar is 66 | /// returned. 67 | /// 68 | /// - Returns: The next scalar to process, or nil if the end of the string has 69 | /// been reached. 70 | private mutating func nextScalar() -> UnicodeScalar? { 71 | if let next = pushedBackScalar { 72 | pushedBackScalar = nil 73 | return next 74 | } 75 | return iterator.next() 76 | } 77 | 78 | /// Scans the remainder of an integer token and returns it. 79 | /// 80 | /// - Parameter first: The first scalar of the integer that has already been 81 | /// scanned. 82 | /// - Returns: A `Token.integer` whose associated value is the integer that 83 | /// was scanned. 84 | /// - Throws: `TokenizeError.malformedInteger` if the scanned token text could 85 | /// not be converted to an integer (for example, if it was too large). 86 | private mutating func integerToken( 87 | startingWith first: UnicodeScalar 88 | ) throws -> Token { 89 | var tokenText = String(first) 90 | 91 | loop: while let ch = nextScalar() { 92 | switch ch { 93 | case "0"..."9": 94 | tokenText.unicodeScalars.append(ch) 95 | default: 96 | pushedBackScalar = ch 97 | break loop 98 | } 99 | } 100 | 101 | guard let value = Int(tokenText) else { 102 | throw TokenizingError.malformedInteger 103 | } 104 | return .integer(value) 105 | } 106 | 107 | /// Scans the remainder of a quoted string token and returns it. 108 | /// 109 | /// - Returns: A `Token.string` whose associated value is the string that was 110 | /// quoted (without the surrounding quotes). 111 | /// - Throws: `TokenizeError.unterminatedString` if the end of the input was 112 | /// reached without seeing a terminating quote. 113 | private mutating func stringToken() throws -> Token { 114 | var tokenText = String() 115 | 116 | while let ch = nextScalar() { 117 | switch ch { 118 | case "\"": 119 | return .string(tokenText) 120 | default: 121 | tokenText.unicodeScalars.append(ch) 122 | } 123 | } 124 | 125 | throw TokenizingError.unterminatedString 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /Tokenizing/main.swift: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Tony Allevato 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | 16 | do { 17 | // Measure the Character-based tokenizer. 18 | try measure("CharacterBasedTokenizer") { 19 | for _ in 0..<1000 { 20 | var tokenizer = CharacterBasedTokenizer(text: stateDataString) 21 | while let token = try tokenizer.nextToken() {} 22 | } 23 | } 24 | 25 | // Measure the UnicodeScalar-based tokenizer. 26 | try measure("UnicodeScalarBasedTokenizer") { 27 | for _ in 0..<1000 { 28 | var tokenizer = UnicodeScalarBasedTokenizer(text: stateDataString) 29 | while let token = try tokenizer.nextToken() {} 30 | } 31 | } 32 | } catch let e { 33 | fatalError("Error was thrown: \(e)") 34 | } 35 | -------------------------------------------------------------------------------- /emit_sil.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2016 Tony Allevato. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | # ---- 18 | # Emits and demangles the canonical SIL for Tokenizing. 19 | 20 | set -eu 21 | readonly scriptdir="$(dirname "$0")" 22 | 23 | xcrun swiftc -O -emit-sil \ 24 | -module-name Tokenizing "$scriptdir"/Tokenizing/*.swift 2>&1 | \ 25 | xcrun swift-demangle > Tokenizing.sil.txt 26 | --------------------------------------------------------------------------------