├── .github ├── ISSUE_TEMPLATE │ ├── bug-report.md │ └── feature-request.md └── workflows │ ├── Test.yml │ └── wiki.yml ├── .gitignore ├── .swiftformat ├── .swiftpm └── xcode │ ├── package.xcworkspace │ ├── contents.xcworkspacedata │ └── xcshareddata │ │ └── WorkspaceSettings.xcsettings │ └── xcshareddata │ ├── xcbaselines │ └── PerformanceTests.xcbaseline │ │ ├── 1868159C-3A7D-4BC9-A194-7BBFC5CE3264.plist │ │ ├── 51ACA040-1EE9-4847-BC78-B1AE6F337194.plist │ │ └── Info.plist │ └── xcschemes │ ├── Debug PerformanceTests.xcscheme │ ├── LongTests.xcscheme │ ├── Patterns-Package.xcscheme │ ├── Patterns.xcscheme │ ├── PerformanceTests.xcscheme │ ├── Test all.xcscheme │ └── unicode_properties.xcscheme ├── LICENSE ├── Package.resolved ├── Package.swift ├── README.md ├── Sources ├── Patterns │ ├── Atomic Patterns │ │ ├── Line.swift │ │ ├── Literal.swift │ │ ├── OneOf.swift │ │ └── Word.swift │ ├── Decoder.swift │ ├── General │ │ ├── General.swift │ │ └── Group.swift │ ├── Grammar.swift │ ├── Operations on Patterns │ │ ├── And.swift │ │ ├── AnyPattern.swift │ │ ├── Capture.swift │ │ ├── Choice.swift │ │ ├── Concatenation.swift │ │ ├── Not.swift │ │ ├── Repetition.swift │ │ └── Skip.swift │ ├── Optimise Instructions.swift │ ├── Parser.swift │ ├── Pattern And Instruction.swift │ ├── Regex.swift │ └── VMBacktrack.swift └── unicode_properties │ ├── Scripts.txt │ ├── WordBreakProperty.txt │ └── main.swift └── Tests ├── LinuxMain.swift ├── LongTests └── LongTests.swift ├── PatternsTests ├── ConcatenationTests.swift ├── GeneralTests.swift ├── GrammarTests.swift ├── PatternTests.swift ├── SkipTests.swift └── TestHelpers.swift └── PerformanceTests ├── Long.txt ├── Multi-language-short.txt ├── StringTests.swift └── UTF8Tests.swift /.github/ISSUE_TEMPLATE/bug-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a bug report 4 | title: '' 5 | labels: 'bug' 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## Patterns Environment 11 | 12 | - Patterns version: 13 | - macOS version: 14 | - Xcode version: 15 | - Dependency manager (Swift Package Manager, Manually): 16 | 17 | ## What did you do? 18 | 19 | > ℹ Please replace this with what you did. 20 | 21 | ## What did you expect to happen? 22 | 23 | > ℹ Please replace this with what you expected to happen. 24 | 25 | ## What happened instead? 26 | 27 | > ℹ Please replace this with of what happened instead. 28 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for Patterns 4 | title: '' 5 | labels: 'enhancement' 6 | assignees: '' 7 | 8 | --- 9 | 10 | # Motivation 11 | > ℹ Please replace this with your motivation. For example if your feature request is related to a problem. 12 | 13 | # Solution 14 | > ℹ Please replace this with your proposed solution. 15 | 16 | # Additional context 17 | > ℹ Please replace this with any other context or screenshots about your feature request (optional). 18 | -------------------------------------------------------------------------------- /.github/workflows/Test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | paths: 9 | - '**.swift' 10 | - '.swift-version' 11 | - 'Package.resolved' 12 | - '.github/workflows/Test.yml' 13 | - 'Tests/**' 14 | 15 | jobs: 16 | test: 17 | strategy: 18 | matrix: 19 | os: [macos-latest, ubuntu-latest] 20 | name: Test on ${{ matrix.os }} 21 | runs-on: ${{ matrix.os }} 22 | 23 | steps: 24 | - name: Checkout 25 | uses: actions/checkout@v3 26 | - name: Build 27 | run: | 28 | swift --version 29 | swift build 30 | - name: Test 31 | run: swift test --enable-test-discovery --parallel 32 | -------------------------------------------------------------------------------- /.github/workflows/wiki.yml: -------------------------------------------------------------------------------- 1 | name: Wiki 2 | 3 | on: 4 | workflow_dispatch: 5 | release: 6 | types: [published] 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - uses: actions/checkout@v1 14 | - name: Generate Documentation 15 | uses: kareman/swift-doc@support-kareman-Patterns 16 | with: 17 | inputs: "Sources/Patterns" 18 | module-name: Patterns 19 | output: "Documentation" 20 | - name: Upload Documentation to Wiki 21 | uses: SwiftDocOrg/github-wiki-publish-action@v1 22 | with: 23 | path: "Documentation" 24 | env: 25 | GH_PERSONAL_ACCESS_TOKEN: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/macos,xcode,carthage,cocoapods,fastlane,swift 3 | # Edit at https://www.gitignore.io/?templates=macos,xcode,carthage,cocoapods,fastlane,swift 4 | 5 | ### Carthage ### 6 | # Carthage 7 | # 8 | # Add this line if you want to avoid checking in source code from Carthage dependencies. 9 | Carthage/Checkouts 10 | Carthage/Build 11 | 12 | ### CocoaPods ### 13 | ## CocoaPods GitIgnore Template 14 | 15 | # CocoaPods - Only use to conserve bandwidth / Save time on Pushing 16 | # - Also handy if you have a large number of dependant pods 17 | # - AS PER https://guides.cocoapods.org/using/using-cocoapods.html NEVER IGNORE THE LOCK FILE 18 | Pods/ 19 | 20 | ### fastlane ### 21 | # fastlane - A streamlined workflow tool for Cocoa deployment 22 | # It is recommended to not store the screenshots in the git repo. Instead, use fastlane to re-generate the 23 | # screenshots whenever they are needed. 24 | # For more information about the recommended setup visit: 25 | # https://docs.fastlane.tools/best-practices/source-control/#source-control 26 | 27 | # fastlane specific 28 | fastlane/report.xml 29 | 30 | # deliver temporary files 31 | fastlane/Preview.html 32 | 33 | # snapshot generated screenshots 34 | fastlane/screenshots/**/*.png 35 | fastlane/screenshots/screenshots.html 36 | 37 | # scan temporary files 38 | fastlane/test_output 39 | 40 | ### macOS ### 41 | # General 42 | .DS_Store 43 | .AppleDouble 44 | .LSOverride 45 | 46 | # Icon must end with two \r 47 | Icon 48 | 49 | # Thumbnails 50 | ._* 51 | 52 | # Files that might appear in the root of a volume 53 | .DocumentRevisions-V100 54 | .fseventsd 55 | .Spotlight-V100 56 | .TemporaryItems 57 | .Trashes 58 | .VolumeIcon.icns 59 | .com.apple.timemachine.donotpresent 60 | 61 | # Directories potentially created on remote AFP share 62 | .AppleDB 63 | .AppleDesktop 64 | Network Trash Folder 65 | Temporary Items 66 | .apdisk 67 | 68 | ### Swift ### 69 | # Xcode 70 | # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore 71 | 72 | ## Build generated 73 | build/ 74 | DerivedData/ 75 | 76 | ## Various settings 77 | *.pbxuser 78 | !default.pbxuser 79 | *.mode1v3 80 | !default.mode1v3 81 | *.mode2v3 82 | !default.mode2v3 83 | *.perspectivev3 84 | !default.perspectivev3 85 | xcuserdata/ 86 | 87 | ## Other 88 | *.moved-aside 89 | *.xccheckout 90 | *.xcscmblueprint 91 | 92 | ## Obj-C/Swift specific 93 | *.hmap 94 | *.ipa 95 | *.dSYM.zip 96 | *.dSYM 97 | 98 | ## Playgrounds 99 | timeline.xctimeline 100 | playground.xcworkspace 101 | 102 | # Swift Package Manager 103 | # Add this line if you want to avoid checking in source code from Swift Package Manager dependencies. 104 | # Packages/ 105 | # Package.pins 106 | # Package.resolved 107 | .build/ 108 | 109 | # fastlane 110 | # It is recommended to not store the screenshots in the git repo. Instead, use fastlane to re-generate the 111 | # screenshots whenever they are needed. 112 | # For more information about the recommended setup visit: 113 | # https://docs.fastlane.tools/best-practices/source-control/#source-control 114 | 115 | 116 | # Code Injection 117 | # After new code Injection tools there's a generated folder /iOSInjectionProject 118 | # https://github.com/johnno1962/injectionforxcode 119 | 120 | iOSInjectionProject/ 121 | 122 | # End of https://www.gitignore.io/api/macos,xcode,carthage,cocoapods,fastlane,swift 123 | 124 | default.profraw 125 | *.xcodeproj 126 | #Playground/MyPlayground.playground/ 127 | 128 | .swift-version 129 | -------------------------------------------------------------------------------- /.swiftformat: -------------------------------------------------------------------------------- 1 | --allman false 2 | --binarygrouping 4,8 3 | --closingparen same-line 4 | --commas always 5 | --conflictmarkers reject 6 | --decimalgrouping 3,6 7 | --elseposition same-line 8 | --empty void 9 | --exponentcase lowercase 10 | --exponentgrouping disabled 11 | --fractiongrouping disabled 12 | --fragment false 13 | --header ignore 14 | --hexgrouping 4,8 15 | --hexliteralcase uppercase 16 | --ifdef no-indent 17 | --importgrouping alphabetized 18 | --indent tab 19 | --indentcase false 20 | --linebreaks lf 21 | --octalgrouping 4,8 22 | --operatorfunc spaced 23 | --patternlet hoist 24 | --ranges spaced 25 | --selfrequired 26 | --semicolons inline 27 | --stripunusedargs always 28 | --trailingclosures 29 | --trimwhitespace always 30 | --wraparguments preserve 31 | --wrapcollections preserve 32 | --disable redundantSelf,spaceInsideComments,typeSugar,unusedArguments 33 | --enable isEmpty 34 | -------------------------------------------------------------------------------- /.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.swiftpm/xcode/package.xcworkspace/xcshareddata/WorkspaceSettings.xcsettings: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | IDEWorkspaceSharedSettings_AutocreateContextsIfNeeded 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /.swiftpm/xcode/xcshareddata/xcbaselines/PerformanceTests.xcbaseline/1868159C-3A7D-4BC9-A194-7BBFC5CE3264.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | classNames 6 | 7 | PerformanceTests 8 | 9 | testAnyNumeral() 10 | 11 | com.apple.XCTPerformanceMetric_WallClockTime 12 | 13 | baselineAverage 14 | 0.61405 15 | baselineIntegrationDisplayName 16 | 7 May 2020 at 21:25:02 17 | 18 | 19 | testContainsClosure() 20 | 21 | com.apple.XCTPerformanceMetric_WallClockTime 22 | 23 | baselineAverage 24 | 0.50567 25 | baselineIntegrationDisplayName 26 | 7 May 2020 at 21:25:02 27 | 28 | 29 | testLine() 30 | 31 | com.apple.XCTPerformanceMetric_WallClockTime 32 | 33 | baselineAverage 34 | 0.26298 35 | baselineIntegrationDisplayName 36 | 7 May 2020 at 21:25:02 37 | 38 | 39 | testLiteralSearch() 40 | 41 | com.apple.XCTPerformanceMetric_WallClockTime 42 | 43 | baselineAverage 44 | 0.65413 45 | baselineIntegrationDisplayName 46 | 7 May 2020 at 21:25:02 47 | 48 | 49 | testNonExistentLiteralSearch() 50 | 51 | com.apple.XCTPerformanceMetric_WallClockTime 52 | 53 | baselineAverage 54 | 0.67721 55 | baselineIntegrationDisplayName 56 | 7 May 2020 at 21:25:02 57 | 58 | 59 | testNotNewLine() 60 | 61 | com.apple.XCTPerformanceMetric_WallClockTime 62 | 63 | baselineAverage 64 | 0.67932 65 | baselineIntegrationDisplayName 66 | 7 May 2020 at 21:25:02 67 | 68 | com.apple.dt.XCTMetric_CPU.instructions_retired 69 | 70 | baselineAverage 71 | 3.9872e+06 72 | baselineIntegrationDisplayName 73 | Local Baseline 74 | 75 | 76 | testOneOrMore() 77 | 78 | com.apple.XCTPerformanceMetric_WallClockTime 79 | 80 | baselineAverage 81 | 0.43377 82 | baselineIntegrationDisplayName 83 | 7 May 2020 at 21:25:02 84 | 85 | 86 | testOptionalStringFollowedByNonOptionalString() 87 | 88 | com.apple.XCTPerformanceMetric_WallClockTime 89 | 90 | baselineAverage 91 | 0.70984 92 | baselineIntegrationDisplayName 93 | 7 May 2020 at 21:25:02 94 | 95 | 96 | testSkipping1() 97 | 98 | com.apple.XCTPerformanceMetric_WallClockTime 99 | 100 | baselineAverage 101 | 0.26616 102 | baselineIntegrationDisplayName 103 | 7 May 2020 at 21:25:02 104 | 105 | 106 | testUppercaseWord() 107 | 108 | com.apple.XCTPerformanceMetric_WallClockTime 109 | 110 | baselineAverage 111 | 0.43782 112 | baselineIntegrationDisplayName 113 | 7 May 2020 at 21:25:02 114 | 115 | 116 | testWordBoundary() 117 | 118 | com.apple.XCTPerformanceMetric_WallClockTime 119 | 120 | baselineAverage 121 | 0.42154 122 | baselineIntegrationDisplayName 123 | 7 May 2020 at 21:25:02 124 | 125 | 126 | testWordBoundaryManyLanguages() 127 | 128 | com.apple.XCTPerformanceMetric_WallClockTime 129 | 130 | baselineAverage 131 | 0.30829 132 | baselineIntegrationDisplayName 133 | 7 May 2020 at 21:25:02 134 | 135 | 136 | 137 | StringTests 138 | 139 | testAnyNumeral() 140 | 141 | com.apple.dt.XCTMetric_CPU.instructions_retired 142 | 143 | baselineAverage 144 | 1.1767e+06 145 | baselineIntegrationDisplayName 146 | 4 Sep 2020 at 18:29:36 147 | 148 | 149 | testContainsClosure() 150 | 151 | com.apple.dt.XCTMetric_CPU.instructions_retired 152 | 153 | baselineAverage 154 | 1.0476e+06 155 | baselineIntegrationDisplayName 156 | 4 Sep 2020 at 18:29:36 157 | 158 | 159 | testGrammarLiteralSearch() 160 | 161 | com.apple.dt.XCTMetric_CPU.instructions_retired 162 | 163 | baselineAverage 164 | 5.4628e+05 165 | baselineIntegrationDisplayName 166 | 4 Sep 2020 at 18:29:36 167 | 168 | 169 | testLine() 170 | 171 | com.apple.dt.XCTMetric_CPU.instructions_retired 172 | 173 | baselineAverage 174 | 1.7351e+06 175 | baselineIntegrationDisplayName 176 | 4 Sep 2020 at 18:29:36 177 | 178 | 179 | testLiteralSearch() 180 | 181 | com.apple.dt.XCTMetric_CPU.instructions_retired 182 | 183 | baselineAverage 184 | 6.3384e+06 185 | baselineIntegrationDisplayName 186 | 4 Sep 2020 at 18:29:36 187 | 188 | 189 | testNonExistentLiteralSearch() 190 | 191 | com.apple.dt.XCTMetric_CPU.instructions_retired 192 | 193 | baselineAverage 194 | 3.4736e+06 195 | baselineIntegrationDisplayName 196 | 4 Sep 2020 at 18:29:36 197 | 198 | 199 | testNotNewLine() 200 | 201 | com.apple.dt.XCTMetric_CPU.instructions_retired 202 | 203 | baselineAverage 204 | 1.5674e+06 205 | baselineIntegrationDisplayName 206 | 4 Sep 2020 at 18:29:36 207 | 208 | 209 | testOneOrMore() 210 | 211 | com.apple.dt.XCTMetric_CPU.instructions_retired 212 | 213 | baselineAverage 214 | 8.66e+05 215 | baselineIntegrationDisplayName 216 | 4 Sep 2020 at 18:29:36 217 | 218 | 219 | testOptionalStringFollowedByNonOptionalString() 220 | 221 | com.apple.dt.XCTMetric_CPU.instructions_retired 222 | 223 | baselineAverage 224 | 9.5629e+05 225 | baselineIntegrationDisplayName 226 | 4 Sep 2020 at 18:29:36 227 | 228 | 229 | testSkipping1() 230 | 231 | com.apple.dt.XCTMetric_CPU.instructions_retired 232 | 233 | baselineAverage 234 | 1.2135e+06 235 | baselineIntegrationDisplayName 236 | 4 Sep 2020 at 18:29:36 237 | 238 | 239 | testUppercaseWord() 240 | 241 | com.apple.dt.XCTMetric_CPU.instructions_retired 242 | 243 | baselineAverage 244 | 2.6215e+06 245 | baselineIntegrationDisplayName 246 | 4 Sep 2020 at 18:29:36 247 | 248 | 249 | testWordBoundary() 250 | 251 | com.apple.dt.XCTMetric_CPU.instructions_retired 252 | 253 | baselineAverage 254 | 1.7362e+06 255 | baselineIntegrationDisplayName 256 | 4 Sep 2020 at 18:29:36 257 | 258 | 259 | testWordBoundaryManyLanguages() 260 | 261 | com.apple.dt.XCTMetric_CPU.instructions_retired 262 | 263 | baselineAverage 264 | 3.257e+06 265 | baselineIntegrationDisplayName 266 | 4 Sep 2020 at 18:29:36 267 | 268 | 269 | 270 | UTF8Tests 271 | 272 | testGrammarLiteralSearch() 273 | 274 | com.apple.dt.XCTMetric_CPU.instructions_retired 275 | 276 | baselineAverage 277 | 3.7555e+05 278 | baselineIntegrationDisplayName 279 | 4 Sep 2020 at 18:29:36 280 | 281 | 282 | testLine() 283 | 284 | com.apple.dt.XCTMetric_CPU.instructions_retired 285 | 286 | baselineAverage 287 | 9.8235e+05 288 | baselineIntegrationDisplayName 289 | 4 Sep 2020 at 18:29:36 290 | 291 | 292 | testLiteralSearch() 293 | 294 | com.apple.dt.XCTMetric_CPU.instructions_retired 295 | 296 | baselineAverage 297 | 8.3797e+05 298 | baselineIntegrationDisplayName 299 | 4 Sep 2020 at 18:29:36 300 | 301 | 302 | testNonExistentLiteralSearch() 303 | 304 | com.apple.dt.XCTMetric_CPU.instructions_retired 305 | 306 | baselineAverage 307 | 6.0681e+05 308 | baselineIntegrationDisplayName 309 | 4 Sep 2020 at 18:29:36 310 | 311 | 312 | testNotNewLine() 313 | 314 | com.apple.dt.XCTMetric_CPU.instructions_retired 315 | 316 | baselineAverage 317 | 7.6005e+05 318 | baselineIntegrationDisplayName 319 | 4 Sep 2020 at 18:29:36 320 | 321 | 322 | testOptionalStringFollowedByNonOptionalString() 323 | 324 | com.apple.dt.XCTMetric_CPU.instructions_retired 325 | 326 | baselineAverage 327 | 7.4862e+05 328 | baselineIntegrationDisplayName 329 | 4 Sep 2020 at 18:29:36 330 | 331 | 332 | testSkipping1() 333 | 334 | com.apple.dt.XCTMetric_CPU.instructions_retired 335 | 336 | baselineAverage 337 | 2.4327e+05 338 | baselineIntegrationDisplayName 339 | 4 Sep 2020 at 18:29:36 340 | 341 | 342 | 343 | 344 | 345 | 346 | -------------------------------------------------------------------------------- /.swiftpm/xcode/xcshareddata/xcbaselines/PerformanceTests.xcbaseline/51ACA040-1EE9-4847-BC78-B1AE6F337194.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | classNames 6 | 7 | PerformanceTests 8 | 9 | testUppercaseWord() 10 | 11 | com.apple.XCTPerformanceMetric_WallClockTime 12 | 13 | baselineAverage 14 | 0.42167 15 | baselineIntegrationDisplayName 16 | Local Baseline 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /.swiftpm/xcode/xcshareddata/xcbaselines/PerformanceTests.xcbaseline/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | runDestinationsByUUID 6 | 7 | 1868159C-3A7D-4BC9-A194-7BBFC5CE3264 8 | 9 | localComputer 10 | 11 | busSpeedInMHz 12 | 400 13 | cpuCount 14 | 1 15 | cpuKind 16 | 6-Core Intel Core i7 17 | cpuSpeedInMHz 18 | 3200 19 | logicalCPUCoresPerPackage 20 | 12 21 | modelCode 22 | Macmini8,1 23 | physicalCPUCoresPerPackage 24 | 6 25 | platformIdentifier 26 | com.apple.platform.macosx 27 | 28 | targetArchitecture 29 | x86_64 30 | 31 | 51ACA040-1EE9-4847-BC78-B1AE6F337194 32 | 33 | localComputer 34 | 35 | busSpeedInMHz 36 | 400 37 | cpuCount 38 | 1 39 | cpuKind 40 | 6-Core Intel Core i7 41 | cpuSpeedInMHz 42 | 3200 43 | logicalCPUCoresPerPackage 44 | 12 45 | modelCode 46 | Macmini8,1 47 | physicalCPUCoresPerPackage 48 | 6 49 | platformIdentifier 50 | com.apple.platform.macosx 51 | 52 | targetArchitecture 53 | x86_64h 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /.swiftpm/xcode/xcshareddata/xcschemes/Debug PerformanceTests.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 24 | 25 | 30 | 31 | 34 | 40 | 41 | 42 | 43 | 44 | 54 | 55 | 61 | 62 | 64 | 65 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /.swiftpm/xcode/xcshareddata/xcschemes/LongTests.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 16 | 18 | 24 | 25 | 26 | 27 | 28 | 38 | 39 | 45 | 46 | 48 | 49 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /.swiftpm/xcode/xcshareddata/xcschemes/Patterns-Package.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 29 | 35 | 36 | 37 | 43 | 49 | 50 | 51 | 52 | 53 | 58 | 59 | 61 | 67 | 68 | 69 | 71 | 77 | 78 | 79 | 81 | 87 | 88 | 89 | 90 | 91 | 101 | 102 | 108 | 109 | 110 | 111 | 117 | 118 | 124 | 125 | 126 | 127 | 129 | 130 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /.swiftpm/xcode/xcshareddata/xcschemes/Patterns.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 24 | 25 | 32 | 33 | 35 | 41 | 42 | 43 | 46 | 52 | 53 | 54 | 55 | 56 | 66 | 67 | 73 | 74 | 80 | 81 | 82 | 83 | 85 | 86 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /.swiftpm/xcode/xcshareddata/xcschemes/PerformanceTests.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 29 | 35 | 36 | 37 | 38 | 39 | 45 | 46 | 48 | 54 | 55 | 56 | 57 | 58 | 68 | 69 | 75 | 76 | 82 | 83 | 84 | 85 | 87 | 88 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /.swiftpm/xcode/xcshareddata/xcschemes/Test all.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 24 | 25 | 30 | 31 | 34 | 40 | 41 | 42 | 45 | 51 | 52 | 53 | 56 | 62 | 63 | 64 | 65 | 66 | 76 | 77 | 83 | 84 | 90 | 91 | 92 | 93 | 95 | 96 | 99 | 100 | 101 | -------------------------------------------------------------------------------- /.swiftpm/xcode/xcshareddata/xcschemes/unicode_properties.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 24 | 25 | 30 | 31 | 33 | 39 | 40 | 41 | 43 | 49 | 50 | 51 | 53 | 59 | 60 | 61 | 62 | 63 | 74 | 76 | 82 | 83 | 84 | 85 | 88 | 89 | 92 | 93 | 96 | 97 | 100 | 101 | 104 | 105 | 106 | 107 | 113 | 115 | 121 | 122 | 123 | 124 | 126 | 127 | 130 | 131 | 132 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2019 NotTooBad Software 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /Package.resolved: -------------------------------------------------------------------------------- 1 | { 2 | "object": { 3 | "pins": [ 4 | { 5 | "package": "swift-argument-parser", 6 | "repositoryURL": "https://github.com/apple/swift-argument-parser", 7 | "state": { 8 | "branch": null, 9 | "revision": "15351c1cd009eba0b6e438bfef55ea9847a8dc4a", 10 | "version": "0.3.0" 11 | } 12 | }, 13 | { 14 | "package": "swift-se0270-range-set", 15 | "repositoryURL": "https://github.com/apple/swift-se0270-range-set", 16 | "state": { 17 | "branch": null, 18 | "revision": "7d6d7531d40e95f4e1e26f6565265be6df228911", 19 | "version": "1.0.1" 20 | } 21 | } 22 | ] 23 | }, 24 | "version": 1 25 | } 26 | -------------------------------------------------------------------------------- /Package.swift: -------------------------------------------------------------------------------- 1 | // swift-tools-version:5.0 2 | // The swift-tools-version declares the minimum version of Swift required to build this package. 3 | 4 | import PackageDescription 5 | 6 | let package = Package( 7 | name: "Patterns", 8 | products: [ 9 | .library( 10 | name: "Patterns", 11 | targets: ["Patterns"]), 12 | .executable( 13 | name: "unicode_properties", 14 | targets: ["unicode_properties", "Patterns"]), 15 | ], 16 | dependencies: [ 17 | .package(url: "https://github.com/apple/swift-argument-parser", from: "0.0.1"), 18 | .package(url: "https://github.com/apple/swift-se0270-range-set", from: "1.0.1"), 19 | ], 20 | targets: [ 21 | .target( 22 | name: "Patterns", 23 | dependencies: ["SE0270_RangeSet"], 24 | swiftSettings: [ 25 | .define("DEBUG", .when(configuration: .debug)), 26 | ]), 27 | .testTarget( 28 | name: "PatternsTests", 29 | dependencies: ["Patterns"], 30 | swiftSettings: [ // Move code that takes too long to build into 'LongTests'. 31 | .unsafeFlags(["-Xfrontend", "-warn-long-expression-type-checking=200"]), 32 | ]), 33 | .testTarget( 34 | name: "PerformanceTests", 35 | dependencies: ["Patterns"], 36 | swiftSettings: [ 37 | .define("DEBUG", .when(configuration: .debug)), 38 | ]), 39 | .testTarget( // For code that takes a long time to build or run. Try to keep "PatternsTests" snappy. 40 | name: "LongTests", 41 | dependencies: ["Patterns"]), 42 | .target( 43 | name: "unicode_properties", 44 | dependencies: ["Patterns", "ArgumentParser"]), 45 | ], 46 | swiftLanguageVersions: [.v5]) 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 | SPM 4 | 5 | Linux 6 |

7 | 8 | 9 | # Patterns 10 | 11 | Patterns is a Swift library for Parser Expression Grammars (PEGs). It can be used to create expressions similar to regular expressions (like regex’es) and grammars (for parsers). 12 | 13 | For general information about PEGs, see [the original paper](https://dl.acm.org/doi/10.1145/982962.964011) or [Wikipedia](https://en.wikipedia.org/wiki/Parsing_expression_grammar). 14 | 15 | ## Example 16 | 17 | ```swift 18 | let text = "This is a point: (43,7), so is (0, 5). But my final point is (3,-1)." 19 | 20 | let number = ("+" / "-" / "") • digit+ 21 | let point = "(" • Capture(name: "x", number) 22 | • "," • " "¿ • Capture(name: "y", number) • ")" 23 | 24 | struct Point: Codable { 25 | let x, y: Int 26 | } 27 | 28 | let points = try Parser(search: point).decode([Point].self, from: text) 29 | // points == [Point(x: 43, y: 7), Point(x: 0, y: 5), Point(x: 3, y: -1)] 30 | ``` 31 | 32 | See also: 33 | - [Parsing Unicode property data files](https://nottoobadsoftware.com/blog/textpicker/patterns/parsing_unicode_property_data_files/) 34 | 35 | ## Usage 36 | 37 | Patterns are defined directly in code, instead of in a text string. 38 | 39 | **Note**: Long patterns can give the Swift type checker a lot to think about, especially long series of `a / b / c / etc...`. To improve build times, try to split a long pattern into multiple shorter ones. 40 | 41 | ### Standard PEG 42 | 43 | ##### `"text"` 44 | 45 | Text within double quotes matches that exact text, no need to escape special letters with `\`. If you want to turn a string variable `s` into a pattern, use `Literal(s)`. 46 | 47 | ##### `OneOf(...)` 48 | 49 | This is like character classes (`[...]`) from regular expressions, and matches 1 character. `OneOf("aeiouAEIOU")` matches any single character in that string, and `OneOf("a"..."e")` matches any of "abcde". They can also be combined, like `OneOf("aeiou", punctuation, "x"..."z")`. To match any character _except_ ..., use `OneOf(not: ...)`. 50 | 51 | 52 | You can also implement one yourself: 53 | 54 | ```swift 55 | OneOf(description: "ten") { character in 56 | character.wholeNumberValue == 10 57 | } 58 | ``` 59 | 60 | It takes a closure `@escaping (Character) -> Bool` and matches any character for which the closure returns `true`. The description parameter is only used when creating a textual representation of the pattern. 61 | 62 | ##### `a • b • c` 63 | 64 | The • operator (Option-8 on U.S. keyboards, Option-Q on Norwegian ones) first matches `a`, then `b` and then `c`. It is used to create a pattern from a sequence of other patterns. 65 | 66 | ##### `a*` 67 | 68 | matches 0 or more, as many as it can (it is greedy, like the regex `a*?`). So a pattern like `a* • a` will never match anything because the `a*` pattern will always match all it can, leaving nothing left for the last `a`. 69 | 70 | ##### `a+` 71 | 72 | matches 1 or more, also as many as it can (like the regex `a+?`). 73 | 74 | ##### `a¿` 75 | 76 | makes `a` optional, but it always matches if it can (the `¿` character is Option-Shift-TheKeyWith?OnIt on most keyboards). 77 | 78 | ##### `a / b` 79 | 80 | This first tries the pattern on the left. If that fails it tries the pattern on the right. This is _ordered choice_, once `a` has matched it will never go back and try `b` if a later part of the expression fails. This is the main difference between PEGs and most other grammars and regex'es. 81 | 82 | ##### `&&a • b` 83 | 84 | The "and predicate" first verifies that `a` matches, then moves the position in the input back to where `a` began and continues with `b`. In other words it verifies that both `a` and `b` match from the same position. So to match one ASCII letter you can use `&&ascii • letter`. 85 | 86 | ##### `!a • b` 87 | 88 | The "not predicate" verifies that `a` does _not_ match, then just like above it moves the position in the input back to where `a` began and continues with `b`. You can read it like "b and not a". 89 | 90 | #### Grammars 91 | 92 | The main advantage of PEGs over regular expressions is that they support recursive expressions. These expressions can contain themselves, or other expressions that in turn contain them. Here is how you can parse simple arithmetic expressions: 93 | 94 | ```swift 95 | let arithmetic = Grammar { g in 96 | g.all <- g.expr • !any 97 | g.expr <- g.sum 98 | g.sum <- g.product • (("+" / "-") • g.product)* 99 | g.product <- g.power • (("*" / "/") • g.power)* 100 | g.power <- g.value • ("^" • g.power)¿ 101 | g.value <- digit+ / "(" • g.expr • ")" 102 | } 103 | ``` 104 | 105 | This will parse expressions like "1+2-3^(4*3)/2". 106 | 107 | The top expression is called first. `• !any` means it must match the entire string, because only at the end of the string is there no characters. If you want to match multiple arithmetic expressions in a string, comment out the first expression. Grammars use dynamic properties so there is no auto-completion for the expression names. 108 | 109 | ### Additions 110 | 111 | There are predefined OneOf patterns for all the boolean `is...` properties of Swift's `Character`: `letter`, `lowercase`, `uppercase`, `punctuation`, `whitespace`, `newline`, `hexDigit`, `digit`, `ascii`, `symbol`, `mathSymbol`, `currencySymbol`. 112 | 113 | They all have the same name as the last part of the property, except for `wholeNumber`, which is renamed to `digit` because `wholeNumber` sounds more like an entire number than a single digit. 114 | 115 | There is also `alphanumeric`, which is a `letter` or a `digit`. 116 | 117 | ##### `any` 118 | 119 | Matches any character. `!any` matches only the end of the text. 120 | 121 | ##### `Line()` 122 | 123 | Matches a single line, not including the newline characters. So `Line() • Line()` will never match anything, but `Line() • "\n" • Line()` matches 2 lines. 124 | 125 | `Line.Start()` matches at the beginning of the text, and after any newline characters. `Line.End()` matches at the end of the text, and right before any newline characters. They both have a length of 0, which means the next pattern will start at the same position in the text. 126 | 127 | ##### `Word.Boundary()` 128 | 129 | Matches the position right before or right after a word. Like `Line.Start()` and `Line.End()` it also has a length of 0. 130 | 131 | ##### `a.repeat(...)` 132 | 133 | `a.repeat(2)` matches 2 of that pattern in a row. `a.repeat(...2)` matches 0, 1 or 2, `a.repeat(2...)` matches 2 or more and `a.repeat(3...6)` between 3 and 6. 134 | 135 | ##### `Skip() • a • b` 136 | 137 | Finds the first match of `a • b` from the current position. 138 | 139 | ### Parsing 140 | 141 | To actually use a pattern, pass it to a Parser: 142 | 143 | ```swift 144 | let parser = try Parser(search: a) 145 | for match in parser.matches(in: text) { 146 | // ... 147 | } 148 | ``` 149 | 150 | `Parser(search: a)` searches for the first match for `a`. It is the same as `Parser(Skip() • a)`. 151 | 152 | The `.matches(in: String)` method returns a lazy sequence of `Match` instances. 153 | 154 | Often we are only interested in parts of a pattern. You can use the `Capture` pattern to assign a name to those parts: 155 | 156 | ```swift 157 | let text = "This is a point: (43,7), so is (0, 5). But my final point is (3,-1)." 158 | 159 | let number = ("+" / "-" / "") • digit+ 160 | let point = "(" • Capture(name: "x", number) 161 | • "," • " "¿ • Capture(name: "y", number) • ")" 162 | 163 | struct Point: Codable { 164 | let x, y: Int 165 | } 166 | 167 | let parser = try Parser(search: point) 168 | let points = try parser.decode([Point].self, from: text) 169 | ``` 170 | 171 | Or you can use subscripting: 172 | 173 | ```swift 174 | let pointsAsSubstrings = parser.matches(in: text).map { match in 175 | (text[match[one: "x"]!], text[match[one: "y"]!]) 176 | } 177 | ``` 178 | 179 | You can also use `match[multiple: name]` to get an array if captures with that name may be matched multiple times. `match[one: name]` only returns the first capture of that name. 180 | 181 | ### Inputs 182 | 183 | By default, patterns have `String` as their input type. But you can use any `BidirectionalCollection` with `Hashable` elements for input. Just explicitly specify the input type of the first pattern, and the rest should get it automatically: 184 | 185 | ```swift 186 | let text = "This is a point: (43,7), so is (0, 5). But my final point is (3,-1).".utf8 187 | 188 | let digit = OneOf(UInt8(ascii: "0")...UInt8(ascii: "9")) 189 | let number = ("+" / "-" / "") • digit+ 190 | let point = "(" • Capture(name: "x", number) 191 | • "," • " "¿ • Capture(name: "y", number) • ")" 192 | 193 | struct Point: Codable { 194 | let x, y: Int 195 | } 196 | 197 | let parser = try Parser(search: point) 198 | let pointsAsSubstrings = parser.matches(in: text).map { match in 199 | (text[match[one: "x"]!], text[match[one: "y"]!]) 200 | } 201 | ``` 202 | 203 | `Parser.decode` can (currently) only take String as input, but `.matches` handles all types. 204 | 205 | ## Setup 206 | 207 | ### [Swift Package Manager](https://swift.org/package-manager/) 208 | 209 | Add this to your `Package.swift` file: 210 | 211 | ```swift 212 | dependencies: [ 213 | .package(url: "https://github.com/kareman/Patterns.git", from: "0.1.0"), 214 | ] 215 | ``` 216 | 217 | or choose “Add Package Dependency” from within Xcode. 218 | 219 | ## Implementation 220 | 221 | Patterns is implemented using a virtual parsing machine, similar to how [LPEG](http://www.inf.puc-rio.br/~roberto/lpeg/) is [implemented](http://www.inf.puc-rio.br/~roberto/docs/peg.pdf), and the `backtrackingvm` function described [here](https://swtch.com/~rsc/regexp/regexp2.html). 222 | 223 | ## Contributing 224 | 225 | Contributions are most welcome 🙌. 226 | 227 | ## License 228 | 229 | MIT 230 | 231 | ```text 232 | Patterns 233 | Copyright © 2019 234 | 235 | Permission is hereby granted, free of charge, to any person obtaining a copy 236 | of this software and associated documentation files (the "Software"), to deal 237 | in the Software without restriction, including without limitation the rights 238 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 239 | copies of the Software, and to permit persons to whom the Software is 240 | furnished to do so, subject to the following conditions: 241 | 242 | The above copyright notice and this permission notice shall be included in 243 | all copies or substantial portions of the Software. 244 | 245 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 246 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 247 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 248 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 249 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 250 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 251 | THE SOFTWARE. 252 | ``` 253 | -------------------------------------------------------------------------------- /Sources/Patterns/Atomic Patterns/Line.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Line.swift 3 | // 4 | // 5 | // Created by Kåre Morstøl on 25/05/2020. 6 | // 7 | 8 | public protocol CharacterLike: Hashable { 9 | @inlinable 10 | var isNewline: Bool { get } 11 | } 12 | 13 | extension Character: CharacterLike {} 14 | extension String.UTF8View.Element: CharacterLike { 15 | @inlinable 16 | public var isNewline: Bool { 17 | // “\n” (U+000A): LINE FEED (LF), U+000B: LINE TABULATION (VT), U+000C: FORM FEED (FF), “\r” (U+000D): CARRIAGE RETURN (CR) 18 | self < 14 && self > 9 19 | } 20 | } 21 | 22 | // U+0085: NEXT LINE (NEL), U+2028: LINE SEPARATOR, U+2029: PARAGRAPH SEPARATOR 23 | @usableFromInline 24 | let newlines = Set([0x000A as UInt16, 0x000B, 0x000C, 0x000D, 0x0085, 0x2028, 0x2029].map { Unicode.Scalar($0)! }) 25 | 26 | extension String.UnicodeScalarView.Element: CharacterLike { 27 | @inlinable 28 | public var isNewline: Bool { 29 | newlines.contains(self) 30 | } 31 | } 32 | 33 | extension String.UTF16View.Element: CharacterLike { 34 | @inlinable 35 | public var isNewline: Bool { 36 | Unicode.Scalar(self).map(newlines.contains(_:)) ?? false 37 | } 38 | } 39 | 40 | /// Matches one line, not including newline characters. 41 | public struct Line: Pattern 42 | where Input.Element: CharacterLike, Input.Index == String.Index { 43 | @inlinable 44 | public init() {} 45 | @inlinable 46 | public init() where Input == String {} 47 | 48 | public var description: String { "Line()" } 49 | 50 | @inlinable 51 | public func createInstructions(_ instructions: inout ContiguousArray>) throws { 52 | try (Start() • Skip() • End()).createInstructions(&instructions) 53 | } 54 | 55 | /// Matches the start of a line, including the start of input. 56 | public struct Start: Pattern { 57 | @inlinable 58 | public init() {} 59 | @inlinable 60 | public init() where Input == String {} 61 | 62 | public var description: String { "Line.Start()" } 63 | 64 | @inlinable 65 | func parse(_ input: Input, at index: Input.Index) -> Bool { 66 | (index == input.startIndex) || input[input.index(before: index)].isNewline 67 | } 68 | 69 | @inlinable 70 | public func createInstructions(_ instructions: inout ContiguousArray>) { 71 | instructions.append(.checkIndex(self.parse(_:at:))) 72 | } 73 | } 74 | 75 | /// Matches the end of a line, including the end of input. 76 | public struct End: Pattern { 77 | @inlinable 78 | public init() {} 79 | @inlinable 80 | public init() where Input == String {} 81 | 82 | public var description: String { "Line.End()" } 83 | 84 | @inlinable 85 | func parse(_ input: Input, at index: Input.Index) -> Bool { 86 | index == input.endIndex || input[index].isNewline 87 | } 88 | 89 | @inlinable 90 | public func createInstructions(_ instructions: inout ContiguousArray>) { 91 | instructions.append(.checkIndex(self.parse(_:at:))) 92 | } 93 | } 94 | } 95 | 96 | extension Line where Input == String { 97 | @available(*, deprecated, renamed: "Start()") 98 | public static let start = Start() 99 | 100 | @available(*, deprecated, renamed: "End()") 101 | public static let end = End() 102 | } 103 | -------------------------------------------------------------------------------- /Sources/Patterns/Atomic Patterns/Literal.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Literal.swift 3 | // 4 | // 5 | // Created by Kåre Morstøl on 25/05/2020. 6 | // 7 | 8 | import Foundation 9 | 10 | /// Matches a sequence of elements. 11 | /// 12 | /// If empty, it will always succeed without consuming any input. 13 | public struct Literal: Pattern where Input.Element: Hashable { 14 | public let elements: Input 15 | 16 | public var description: String { 17 | #""\#(String(describing: elements).replacingOccurrences(of: "\n", with: "\\n"))""# 18 | } 19 | 20 | @inlinable 21 | public init(_ input: Input) { 22 | elements = input 23 | } 24 | 25 | /// Matches `sequence`. 26 | @inlinable 27 | public init(_ sequence: S) where S.Element == Input.Element, Input == String { 28 | self.elements = Input(sequence) 29 | } 30 | 31 | @inlinable 32 | public func createInstructions(_ instructions: inout ContiguousArray>) { 33 | instructions.append(contentsOf: elements.map(Instruction.elementEquals)) 34 | } 35 | } 36 | 37 | extension Literal where Input == String { 38 | /// Matches this character. 39 | @inlinable 40 | public init(_ character: Character) { 41 | self.init(String(character)) 42 | } 43 | } 44 | 45 | // MARK: Create from string literal. 46 | 47 | extension Literal: ExpressibleByUnicodeScalarLiteral where Input: LosslessStringConvertible { 48 | @inlinable 49 | public init(unicodeScalarLiteral value: StaticString) { 50 | elements = Input(String(describing: value))! 51 | } 52 | } 53 | 54 | extension Literal: ExpressibleByExtendedGraphemeClusterLiteral where Input: LosslessStringConvertible { 55 | public typealias ExtendedGraphemeClusterLiteralType = StaticString 56 | } 57 | 58 | extension Literal: ExpressibleByStringLiteral where Input: LosslessStringConvertible { 59 | @inlinable 60 | public init(stringLiteral value: StaticString) { 61 | elements = Input(String(describing: value))! 62 | } 63 | } 64 | 65 | extension String.UTF8View: LosslessStringConvertible { 66 | @inlinable 67 | public init?(_ description: String) { 68 | self = description.utf8 69 | } 70 | } 71 | 72 | extension String.UTF16View: LosslessStringConvertible { 73 | @inlinable 74 | public init?(_ description: String) { 75 | self = description.utf16 76 | } 77 | } 78 | 79 | extension String.UnicodeScalarView: LosslessStringConvertible { 80 | @inlinable 81 | public init?(_ description: String) { 82 | self = description.unicodeScalars 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /Sources/Patterns/Atomic Patterns/OneOf.swift: -------------------------------------------------------------------------------- 1 | // 2 | // OneOf.swift 3 | // 4 | // 5 | // Created by Kåre Morstøl on 25/05/2020. 6 | // 7 | 8 | import Foundation 9 | 10 | /// Matches and consumes a single element. 11 | public struct OneOf: Pattern /*, RegexConvertible*/ where Input.Element: Hashable & Comparable { 12 | @usableFromInline 13 | let group: Group 14 | public let description: String 15 | 16 | @usableFromInline 17 | let _regex: String? 18 | public var regex: String { 19 | _regex ?? fatalError("Regex not provided for '\(description)'") 20 | } 21 | 22 | @usableFromInline 23 | init(description: String, regex: String? = nil, group: Group) { 24 | self.group = group 25 | self.description = description 26 | self._regex = regex 27 | } 28 | 29 | /// Matches any element for which `contains` returns `true`. 30 | /// - Parameters: 31 | /// - description: A descriptive identifier for textual representation of the pattern. 32 | /// - regex: An optional regex matching the same elements. 33 | /// - contains: A closure returning true for any element that matches. 34 | @inlinable 35 | public init(description: String, regex: String? = nil, contains: @escaping (Input.Element) -> Bool) { 36 | self.init(description: description, regex: regex, group: Group(contains: contains)) 37 | } 38 | 39 | /// Matches any element for which `contains` returns `true`. 40 | /// - Parameters: 41 | /// - description: A descriptive identifier for textual representation of the pattern. 42 | /// - regex: An optional regex matching the same elements. 43 | /// - contains: A closure returning true for any element that matches. 44 | @inlinable 45 | public init(description: String, regex: String? = nil, contains: @escaping (Input.Element) -> Bool) where Input == String { 46 | self.init(description: description, regex: regex, group: Group(contains: contains)) 47 | } 48 | 49 | /// Matches any elements in `elements`. 50 | /// - Parameter elements: A sequence of elements to match. 51 | @inlinable 52 | public init(_ elements: Input) { 53 | group = Group(contentsOf: elements) 54 | description = "[\(String(describing: elements))]" 55 | _regex = "[\(NSRegularExpression.escapedPattern(for: elements.map(String.init(describing:)).joined()))]" 56 | } 57 | 58 | /// Matches any elements _not_ in `elements`. 59 | /// - Parameter elements: A sequence of elements _not_ to match. 60 | @inlinable 61 | public init(not elements: Input) { 62 | group = Group(contentsOf: elements).inverted() 63 | description = "[^\(String(describing: elements))]" 64 | _regex = "[^\(NSRegularExpression.escapedPattern(for: elements.map(String.init(describing:)).joined()))]" 65 | } 66 | 67 | @inlinable 68 | public func createInstructions(_ instructions: inout ContiguousArray>) { 69 | instructions.append(.checkElement(group.contains)) 70 | } 71 | 72 | public static func == (lhs: OneOf, rhs: OneOf) -> Bool { 73 | lhs.description == rhs.description 74 | } 75 | } 76 | 77 | // MARK: OneOfConvertible 78 | 79 | // Allows for e.g. `OneOf("a" ..< "e", "g", uppercase)` and `OneOf(not: "a" ..< "e", "gåopr", uppercase)` 80 | 81 | /// A type that `OneOf` can use. 82 | public protocol OneOfConvertible { 83 | associatedtype Element: Hashable & Comparable 84 | @inlinable 85 | func contains(_: Element) -> Bool 86 | } 87 | 88 | extension OneOf: OneOfConvertible { 89 | @inlinable 90 | public func contains(_ char: Input.Element) -> Bool { group.contains(char) } 91 | } 92 | 93 | extension Character: OneOfConvertible { 94 | @inlinable 95 | public func contains(_ char: Character) -> Bool { char == self } 96 | } 97 | 98 | /* Should have been 99 | extension Collection: OneOfConvertible where Element: Hashable { } 100 | but "Extension of protocol 'Collection' cannot have an inheritance clause". 101 | */ 102 | extension String: OneOfConvertible {} 103 | extension Substring: OneOfConvertible {} 104 | extension String.UTF8View: OneOfConvertible {} 105 | extension Substring.UTF8View: OneOfConvertible {} 106 | extension String.UTF16View: OneOfConvertible {} 107 | extension Substring.UTF16View: OneOfConvertible {} 108 | extension String.UnicodeScalarView: OneOfConvertible {} 109 | extension Substring.UnicodeScalarView: OneOfConvertible {} 110 | 111 | @inlinable 112 | public func ... (lhs: Character, rhs: Character) -> ClosedRange { 113 | precondition(lhs <= rhs, "The left side of the '...' operator must be less than or equal to the right side.") 114 | return ClosedRange(uncheckedBounds: (lower: lhs, upper: rhs)) 115 | } 116 | 117 | extension ClosedRange: OneOfConvertible where Bound: Hashable {} 118 | 119 | @inlinable 120 | public func ..< (lhs: Character, rhs: Character) -> Range { 121 | precondition(lhs <= rhs, "The left side of the '..<' operator must be less than or equal to the right side.") 122 | return Range(uncheckedBounds: (lower: lhs, upper: rhs)) 123 | } 124 | 125 | extension Range: OneOfConvertible where Bound: Hashable {} 126 | 127 | extension OneOf { 128 | /* It will be a glorious day when all this can be replaced by two methods using variadic generics. */ 129 | 130 | @usableFromInline 131 | internal init(closures: [(Input.Element) -> Bool], description: String, isNegated: Bool = false) { 132 | group = Group(contains: isNegated 133 | ? { element in !closures.contains(where: { $0(element) }) } 134 | : { element in closures.contains(where: { $0(element) }) }) 135 | self.description = description 136 | _regex = nil 137 | } 138 | 139 | /// Matches any of the provided elements. 140 | @inlinable 141 | public init(_ o1: O1) 142 | where Input.Element == O1.Element { 143 | let closures = [o1.contains(_:)] 144 | self.init(closures: closures, description: "[\(o1)]") 145 | } 146 | 147 | /// Matches any of the provided elements. 148 | @inlinable 149 | public init(_ o1: O1, _ o2: O2) 150 | where Input.Element == O1.Element, O1.Element == O2.Element { 151 | let closures = [o1.contains(_:), o2.contains(_:)] 152 | self.init(closures: closures, description: "[\(o1), \(o2)]") 153 | } 154 | 155 | /// Matches any of the provided elements. 156 | @inlinable 157 | public init(_ o1: O1, _ o2: O2, _ o3: O3) 158 | where Input.Element == O1.Element, O1.Element == O2.Element, O2.Element == O3.Element { 159 | let closures = [o1.contains(_:), o2.contains(_:), o3.contains(_:)] 160 | self.init(closures: closures, description: "[\(o1), \(o2), \(o3)]") 161 | } 162 | 163 | /// Matches any of the provided elements. 164 | @inlinable 165 | public init 166 | (_ o1: O1, _ o2: O2, _ o3: O3, _ o4: O4) 167 | where Input.Element == O1.Element, O1.Element == O2.Element, O2.Element == O3.Element, O3.Element == O4.Element { 168 | let closures = [o1.contains(_:), o2.contains(_:), o3.contains(_:), o4.contains(_:)] 169 | self.init(closures: closures, description: "[\(o1), \(o2), \(o3), \(o4)]") 170 | } 171 | 172 | // Not 173 | 174 | /// Matches any _but_ the provided elements. 175 | @inlinable 176 | public init(not o1: O1) 177 | where Input.Element == O1.Element { 178 | let closures = [o1.contains(_:)] 179 | self.init(closures: closures, description: "[^\(o1)]", isNegated: true) 180 | } 181 | 182 | /// Matches any _but_ the provided elements. 183 | @inlinable 184 | public init(not o1: O1, _ o2: O2) 185 | where Input.Element == O1.Element, O1.Element == O2.Element { 186 | let closures = [o1.contains(_:), o2.contains(_:)] 187 | self.init(closures: closures, description: "[^\(o1), \(o2)]", isNegated: true) 188 | } 189 | 190 | /// Matches any _but_ the provided elements. 191 | @inlinable 192 | public init(not o1: O1, _ o2: O2, _ o3: O3) 193 | where Input.Element == O1.Element, O1.Element == O2.Element, O2.Element == O3.Element { 194 | let closures = [o1.contains(_:), o2.contains(_:), o3.contains(_:)] 195 | self.init(closures: closures, description: "[^\(o1), \(o2), \(o3)]", isNegated: true) 196 | } 197 | 198 | /// Matches any of the provided elements. 199 | @inlinable 200 | public init 201 | (not o1: O1, _ o2: O2, _ o3: O3, _ o4: O4) 202 | where Input.Element == O1.Element, O1.Element == O2.Element, O2.Element == O3.Element, O3.Element == O4.Element { 203 | let closures = [o1.contains(_:), o2.contains(_:), o3.contains(_:), o4.contains(_:)] 204 | self.init(closures: closures, description: "[^\(o1), \(o2), \(o3), \(o4)]", isNegated: true) 205 | } 206 | } 207 | 208 | // MARK: Join `&&OneOf • OneOf` into one. 209 | 210 | @inlinable 211 | public func • (lhs: AndPattern>, rhs: OneOf) -> OneOf { 212 | OneOf(description: "\(lhs) \(rhs)", group: lhs.wrapped.group.intersection(rhs.group)) 213 | } 214 | 215 | @inlinable 216 | public func • (lhs: Concat>>, rhs: OneOf) -> Concat> { 217 | lhs.first • (lhs.second • rhs) 218 | } 219 | 220 | // MARK: Join `!OneOf • Oneof` into one. 221 | 222 | @inlinable 223 | public func • (lhs: NotPattern>, rhs: OneOf) -> OneOf { 224 | OneOf(description: "\(lhs) \(rhs)", group: rhs.group.subtracting(lhs.wrapped.group)) 225 | } 226 | 227 | @inlinable 228 | public func • (lhs: Concat>>, rhs: OneOf) -> Concat> { 229 | lhs.first • (lhs.second • rhs) 230 | } 231 | 232 | // MARK: Join `OneOf / OneOf` into one. 233 | 234 | @inlinable 235 | public func / (lhs: OneOf, rhs: OneOf) -> OneOf { 236 | OneOf(description: "\(lhs) / \(rhs)", group: lhs.group.union(rhs.group)) 237 | } 238 | 239 | @inlinable 240 | public func / (lhs: OrPattern>, rhs: OneOf) -> OrPattern> { 241 | lhs.first / (lhs.second / rhs) 242 | } 243 | 244 | // MARK: Common patterns. 245 | 246 | /// Succeeds anywhere except for at the end of input, and consumes 1 element. 247 | public let any = OneOf(description: "any", regex: #"[.\p{Zl}]"#, 248 | contains: { _ in true }) 249 | /// Matches one character representing a letter, i.e. where `Character.isLetter` is `true`. 250 | public let letter = OneOf(description: "letter", regex: #"\p{Alphabetic}"#, 251 | contains: { $0.isLetter }) 252 | /// Matches one character representing a lowercase character, i.e. where `Character.isLowercase` is `true`. 253 | public let lowercase = OneOf(description: "lowercase", regex: #"\p{Ll}"#, 254 | contains: { $0.isLowercase }) 255 | /// Matches one character representing an uppercase character, i.e. where `Character.isUppercase` is `true`. 256 | public let uppercase = OneOf(description: "uppercase", regex: #"\p{Lu}"#, 257 | contains: { $0.isUppercase }) 258 | /// Matches one character representing a whole number, i.e. where `Character.isWholeNumber` is `true`. 259 | public let digit = OneOf(description: "digit", regex: #"\p{Nd}"#, 260 | contains: { $0.isWholeNumber }) 261 | /// Matches one letter or one digit. 262 | public let alphanumeric = OneOf(description: "alphanumeric", regex: #"(?:\p{Alphabetic}|\p{Nd})"#, 263 | contains: { $0.isWholeNumber || $0.isLetter }) 264 | /// Matches one character representing a newline, i.e. where `Character.isNewline` is `true`. 265 | public let newline = OneOf(description: "newline", regex: #"\p{Zl}"#, 266 | contains: { $0.isNewline }) 267 | /// Matches one character representing whitespace (including newlines), i.e. where `Character.isWhitespace` is `true`. 268 | public let whitespace = OneOf(description: "whitespace", regex: #"\p{White_Space}"#, 269 | contains: { $0.isWhitespace }) 270 | /// Matches one character representing punctuation, i.e. where `Character.isPunctuation` is `true`. 271 | public let punctuation = OneOf(description: "punctuation", regex: #"\p{P}"#, 272 | contains: { $0.isPunctuation }) 273 | /// Matches one character representing a symbol, i.e. where `Character.isSymbol` is `true`. 274 | public let symbol = OneOf(description: "symbol", regex: #"\p{S}"#, 275 | contains: { $0.isSymbol }) 276 | /// Matches one character representing a hexadecimal digit, i.e. where `Character.isHexDigit` is `true`. 277 | public let hexDigit = OneOf(description: "hexDigit", regex: #"\p{Hex_Digit}"#, 278 | contains: { $0.isHexDigit }) 279 | /// Matches one ASCII character, i.e. where `Character.isASCII` is `true`. 280 | public let ascii = OneOf(description: "ascii", regex: #"[[:ascii:]]"#, 281 | contains: { $0.isASCII }) // regex might also be [ -~] or [\x00-\x7F] 282 | /// Matches one character representing a mathematical symbol, i.e. where `Character.isMathSymbol` is `true`. 283 | public let mathSymbol = OneOf(description: "mathSymbol", regex: #"\p{Sm}"#, 284 | contains: { $0.isMathSymbol }) 285 | /// Matches one character representing a currency symbol, i.e. where `Character.isCurrencySymbol` is `true`. 286 | public let currencySymbol = OneOf(description: "currencySymbol", regex: #"\p{Sc}"#, 287 | contains: { $0.isCurrencySymbol }) 288 | 289 | extension OneOf where Input == String { 290 | /// Predefined OneOf patterns. 291 | public static var patterns: [OneOf] { 292 | [alphanumeric, letter, lowercase, uppercase, punctuation, whitespace, newline, hexDigit, digit, 293 | ascii, symbol, mathSymbol, currencySymbol] 294 | } 295 | 296 | /// All the predefined OneOf patterns that match `element`. 297 | public static func patterns(for element: Input.Element) -> [OneOf] { 298 | OneOf.patterns.filter { $0.group.contains(element) } 299 | } 300 | 301 | /// The predefined OneOf patterns that match _all_ the elements in `sequence`. 302 | public static func patterns(for sequence: S) -> [OneOf] where S.Element == Input.Element { 303 | let sequence = ContiguousArray(sequence) 304 | return OneOf.patterns.filter { $0.group.contains(contentsOf: sequence) } 305 | } 306 | } 307 | -------------------------------------------------------------------------------- /Sources/Patterns/Decoder.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Decoder.swift 3 | // Patterns 4 | // 5 | // Created by Kåre Morstøl on 14/08/2019. 6 | // 7 | 8 | extension Parser where Input == String { 9 | /// Decodes all matches found in `string` into an array of `T`. 10 | @inlinable 11 | public func decode(_ type: [T].Type, from string: String) throws -> [T] where T: Decodable { 12 | try matches(in: string).map { try $0.decode(type.Element.self, from: string) } 13 | } 14 | 15 | /// Decodes the first match found in `string` into a value of type `type`. 16 | @inlinable 17 | public func decodeFirst(_ type: T.Type, from string: String) throws -> T? where T: Decodable { 18 | try match(in: string, at: string.startIndex).map { try $0.decode(type.self, from: string) } 19 | } 20 | } 21 | 22 | extension Parser.Match where Input == String { 23 | /// Decodes this match found in `string` into a value of type `type`. 24 | @inlinable 25 | public func decode(_ type: T.Type, from string: String) throws -> T where T: Decodable { 26 | try type.init(from: MatchDecoder(match: self, string: string)) 27 | } 28 | 29 | public struct MatchDecoder: Decoder { 30 | @usableFromInline 31 | let match: Parser.Match 32 | @usableFromInline 33 | let string: String 34 | 35 | public let codingPath: [CodingKey] 36 | public var userInfo: [CodingUserInfoKey: Any] { [:] } 37 | 38 | @inlinable 39 | init(match: Parser.Match, string: String, codingPath: [CodingKey] = []) { 40 | let namePrefix = codingPath.first.map { $0.stringValue } 41 | let captures = namePrefix.map { namePrefix in 42 | match.captures.flatMap { name, range in 43 | name?.hasPrefix(namePrefix) ?? false ? [(String(name!.dropFirst(namePrefix.count)), range)] : [] 44 | } 45 | } ?? match.captures 46 | 47 | self.match = Parser.Match(endIndex: match.endIndex, captures: captures) 48 | self.string = string 49 | self.codingPath = codingPath 50 | } 51 | 52 | @inlinable 53 | public func container(keyedBy _: Key.Type) throws -> KeyedDecodingContainer where Key: CodingKey { 54 | KeyedDecodingContainer(KDC(codingPath: codingPath, matchDecoder: self)) 55 | } 56 | 57 | @inlinable 58 | public func unkeyedContainer() throws -> UnkeyedDecodingContainer { 59 | UDC(codingPath: codingPath, values: match.captures.map { $0.range }, string: string) 60 | } 61 | 62 | @inlinable 63 | public func singleValueContainer() throws -> SingleValueDecodingContainer { 64 | guard match.captures.count < 2 else { 65 | let property = codingPath.map { "\($0.stringValue)" }.joined(separator: ".") 66 | throw DecodingError.dataCorrupted(DecodingError.Context(codingPath: codingPath, debugDescription: 67 | "Property '\(property)' needs a single value, but multiple captures exists.")) 68 | } 69 | let range = match.captures.first?.range ?? match.endIndex ..< match.endIndex 70 | return StringDecoder(string: String(string[range]), codingPath: codingPath) 71 | } 72 | 73 | @usableFromInline 74 | struct UDC: UnkeyedDecodingContainer { 75 | @usableFromInline 76 | var codingPath: [CodingKey] 77 | @usableFromInline 78 | let values: [Range] 79 | @usableFromInline 80 | let string: String 81 | 82 | @usableFromInline 83 | init(codingPath: [CodingKey], values: [Range], string: String) { 84 | self.codingPath = codingPath 85 | self.values = values 86 | self.string = string 87 | } 88 | 89 | @usableFromInline 90 | var count: Int? { values.count } 91 | @usableFromInline 92 | var isAtEnd: Bool { currentIndex >= values.endIndex } 93 | @usableFromInline 94 | var currentIndex: Int = 0 95 | 96 | @usableFromInline 97 | mutating func decodeNil() throws -> Bool { false } 98 | 99 | @usableFromInline 100 | mutating func nestedContainer(keyedBy type: NestedKey.Type) 101 | throws -> KeyedDecodingContainer where NestedKey: CodingKey { 102 | fatalError("Not implemented yet. If you want to help with that, go to https://github.com/kareman/Patterns") 103 | } 104 | 105 | @usableFromInline 106 | mutating func nestedUnkeyedContainer() throws -> UnkeyedDecodingContainer { 107 | fatalError("Not implemented yet. If you want to help with that, go to https://github.com/kareman/Patterns") 108 | } 109 | 110 | @usableFromInline 111 | mutating func superDecoder() throws -> Decoder { 112 | fatalError("Not implemented yet. If you want to help with that, go to https://github.com/kareman/Patterns") 113 | } 114 | 115 | @usableFromInline 116 | mutating func decode(_ type: T.Type) throws -> T where T: Decodable { 117 | defer { currentIndex += 1 } 118 | let text = String(string[values[currentIndex]]) 119 | return try type.init(from: StringDecoder(string: text, codingPath: codingPath)) 120 | } 121 | 122 | @usableFromInline 123 | mutating func decode(_ type: T.Type) throws -> T where T: Decodable & LosslessStringConvertible { 124 | guard let value = type.init(String(string[values[currentIndex]])) else { 125 | throw DecodingError.typeMismatch(type, DecodingError.Context(codingPath: codingPath, debugDescription: "")) 126 | } 127 | currentIndex += 1 128 | return value 129 | } 130 | } 131 | 132 | @usableFromInline 133 | struct KDC: KeyedDecodingContainerProtocol { 134 | @usableFromInline 135 | var codingPath: [CodingKey] = [] 136 | @usableFromInline 137 | var allKeys: [Key] { 138 | matchDecoder.match.captureNames.compactMap(Key.init(stringValue:)) 139 | } 140 | 141 | @usableFromInline 142 | let matchDecoder: MatchDecoder 143 | 144 | @usableFromInline 145 | init(codingPath: [CodingKey] = [], matchDecoder: MatchDecoder) { 146 | self.codingPath = codingPath 147 | self.matchDecoder = matchDecoder 148 | } 149 | 150 | @usableFromInline 151 | func capture(for key: CodingKey) throws -> String { 152 | guard let range = matchDecoder.match[one: key.stringValue] else { 153 | throw DecodingError.keyNotFound(key, DecodingError.Context(codingPath: codingPath, debugDescription: "")) 154 | } 155 | return String(matchDecoder.string[range]) 156 | } 157 | 158 | @usableFromInline 159 | func contains(_ key: Key) -> Bool { 160 | matchDecoder.match[one: key.stringValue] == nil 161 | } 162 | 163 | @usableFromInline 164 | func decodeNil(forKey key: Key) throws -> Bool { 165 | contains(key) 166 | } 167 | 168 | @usableFromInline 169 | func decode(_ type: T.Type, forKey key: Key) throws -> T where T: Decodable { 170 | return try type.init(from: 171 | MatchDecoder(match: matchDecoder.match, string: matchDecoder.string, codingPath: codingPath + [key])) 172 | } 173 | 174 | @usableFromInline 175 | func decode(_ type: T.Type, forKey key: Key) throws -> T where T: Decodable & LosslessStringConvertible { 176 | guard let value = type.init(try capture(for: key)) else { 177 | throw DecodingError.typeMismatch(type, DecodingError.Context(codingPath: [key], debugDescription: "")) 178 | } 179 | return value 180 | } 181 | 182 | @usableFromInline 183 | func nestedContainer(keyedBy _: NestedKey.Type, forKey _: Key) 184 | throws -> KeyedDecodingContainer where NestedKey: CodingKey { 185 | fatalError("Not implemented yet. If you want to help with that, go to https://github.com/kareman/Patterns") 186 | } 187 | 188 | @usableFromInline 189 | func nestedUnkeyedContainer(forKey _: Key) throws -> UnkeyedDecodingContainer { 190 | fatalError("Not implemented yet. If you want to help with that, go to https://github.com/kareman/Patterns") 191 | } 192 | 193 | @usableFromInline 194 | func superDecoder() throws -> Decoder { 195 | fatalError("Not implemented yet. If you want to help with that, go to https://github.com/kareman/Patterns") 196 | } 197 | 198 | @usableFromInline 199 | func superDecoder(forKey _: Key) throws -> Decoder { 200 | fatalError("Not implemented yet. If you want to help with that, go to https://github.com/kareman/Patterns") 201 | } 202 | } 203 | } 204 | } 205 | 206 | @usableFromInline 207 | struct StringDecoder: Decoder, SingleValueDecodingContainer { 208 | @usableFromInline 209 | let string: String 210 | @usableFromInline 211 | let codingPath: [CodingKey] 212 | @usableFromInline 213 | var userInfo: [CodingUserInfoKey: Any] = [:] 214 | 215 | @usableFromInline 216 | init(string: String, codingPath: [CodingKey], userInfo: [CodingUserInfoKey: Any] = [:]) { 217 | self.string = string 218 | self.codingPath = codingPath 219 | self.userInfo = userInfo 220 | } 221 | 222 | @usableFromInline 223 | func container(keyedBy type: Key.Type) throws -> KeyedDecodingContainer where Key: CodingKey { 224 | fatalError("Not implemented yet. If you want to help with that, go to https://github.com/kareman/Patterns") 225 | } 226 | 227 | @usableFromInline 228 | func unkeyedContainer() throws -> UnkeyedDecodingContainer { 229 | fatalError("Not implemented yet. If you want to help with that, go to https://github.com/kareman/Patterns") 230 | } 231 | 232 | @usableFromInline 233 | func singleValueContainer() throws -> SingleValueDecodingContainer { self } 234 | 235 | @usableFromInline 236 | func decodeNil() -> Bool { false } 237 | 238 | @usableFromInline 239 | func decode(_ type: T.Type) throws -> T where T: Decodable { 240 | fatalError("Not implemented yet. If you want to help with that, go to https://github.com/kareman/Patterns") 241 | } 242 | 243 | @usableFromInline 244 | func decode(_ type: T.Type) throws -> T where T: Decodable & LosslessStringConvertible { 245 | guard let value = type.init(string) else { 246 | throw DecodingError.typeMismatch(type, DecodingError.Context(codingPath: codingPath, debugDescription: "")) 247 | } 248 | return value 249 | } 250 | } 251 | -------------------------------------------------------------------------------- /Sources/Patterns/General/General.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Collections.swift 3 | // Patterns 4 | // 5 | // Created by Kåre Morstøl on 24/09/16. 6 | // 7 | 8 | @usableFromInline 9 | struct SearchCache { 10 | @usableFromInline 11 | let skipTable: [Element: Int] 12 | @usableFromInline 13 | let target: [Element] 14 | 15 | @usableFromInline 16 | init(_ target: Target) where Target.Element == Element { 17 | let newtarget = Array(target) 18 | var skipTable = [Element: Int](minimumCapacity: newtarget.count) 19 | for (i, c) in newtarget[...].dropLast().enumerated() { 20 | skipTable[c] = newtarget.count - i - 1 21 | } 22 | self.skipTable = skipTable 23 | self.target = newtarget 24 | } 25 | } 26 | 27 | extension BidirectionalCollection where Element: Hashable { 28 | /// Finds the next occurrence of `target` in this collection. 29 | /// - Parameters: 30 | /// - target: The sequence of elements to search for. 31 | /// - start: Where to start the search from. 32 | /// - Returns: The range where `target` was found, or nil if not found. 33 | @inlinable 34 | func range(of target: Target, from start: Index? = nil) -> Range? 35 | where Target.Element == Element { 36 | self.range(of: SearchCache(target), from: start) 37 | } 38 | 39 | /// Finds the next occurrence of `cache.target` in this collection, using the pre-created `cache`. 40 | /// - Parameters: 41 | /// - cache: When searching for the same sequence multiple times, use a SearchCache for improved performance. 42 | /// - start: Where to start the search from. 43 | /// - Returns: The range where `target` was found, or nil if not found. 44 | @inlinable 45 | func range(of cache: SearchCache, from start: Index? = nil) -> Range? { 46 | // https://en.wikipedia.org/wiki/Boyer–Moore–Horspool_algorithm 47 | let target = cache.target 48 | guard !target.isEmpty else { return nil } 49 | 50 | var pos = self.index(start ?? self.startIndex, offsetBy: target.count - 1, limitedBy: endIndex) ?? endIndex 51 | 52 | while pos < endIndex { 53 | var i = pos 54 | var p = target.index(before: target.endIndex) 55 | 56 | while self[i] == target[p] { 57 | if p == target.startIndex { 58 | return i ..< index(after: pos) 59 | } else { 60 | self.formIndex(before: &i) 61 | target.formIndex(before: &p) 62 | } 63 | } 64 | 65 | let advance = cache.skipTable[self[pos]] ?? target.count 66 | pos = self.index(pos, offsetBy: advance, limitedBy: endIndex) ?? endIndex 67 | } 68 | 69 | return nil 70 | } 71 | } 72 | 73 | extension Collection { 74 | /// Returns the results of passing leading elements to `transform` until it returns nil. 75 | /// - Parameter transform: transforms each element, returns nil when it wants to stop. 76 | /// - Throws: Whatever `transform` throws. 77 | /// - Returns: An array of the transformed elements, not including the first `nil`. 78 | @inlinable 79 | func mapPrefix(transform: (Element) throws -> T?) rethrows -> [T] { 80 | var result = [T]() 81 | for e in self { 82 | guard let transformed = try transform(e) else { 83 | return result 84 | } 85 | result.append(transformed) 86 | } 87 | return result 88 | } 89 | } 90 | 91 | extension Sequence { 92 | /// Returns an array containing the entire sequence. 93 | func array() -> [Element] { Array(self) } 94 | 95 | /// Returns the result of combining the elements using the given closure, if there are no nil elements. 96 | /// - Parameters: 97 | /// - initialResult: The value to use as the initial accumulating value. 98 | /// - updateAccumulatingResult: A closure that updates the accumulating value with an element of the sequence. 99 | /// - partialResult: The accumulating value. 100 | /// - unwrappedElement: An unwrapped element. 101 | /// - Returns: The final accumulated value, or nil if there were any nil elements. 102 | /// If the sequence has no elements, the result is initialResult. 103 | @inlinable 104 | func reduceIfNoNils( 105 | into initialResult: Result, 106 | _ updateAccumulatingResult: (_ partialResult: inout Result, _ unwrappedElement: T) throws -> Void) 107 | rethrows -> Result? where Element == Optional { 108 | var accumulator = initialResult 109 | for element in self { 110 | guard let element = element else { return nil } 111 | try updateAccumulatingResult(&accumulator, element) 112 | } 113 | return accumulator 114 | } 115 | } 116 | 117 | /// Used like e.g. `let a = optional ?? fatalError("Message")`. 118 | @inlinable 119 | func ?? (b: T?, a: @autoclosure () -> Never) -> T { 120 | if let b = b { 121 | return b 122 | } 123 | a() 124 | } 125 | 126 | /// Used like e.g. `let a = try optional ?? AnError()`. 127 | @inlinable 128 | func ?? (b: T?, a: @autoclosure () -> (E)) throws -> T { 129 | if let b = b { 130 | return b 131 | } else { 132 | throw a() 133 | } 134 | } 135 | 136 | extension BidirectionalCollection { 137 | /// Returns an index that is the specified distance from the given index, or nil if that index would be invalid. 138 | /// Never returns `endIndex`. 139 | @inlinable 140 | func validIndex(_ i: Index, offsetBy distance: Int) -> Index? { 141 | if distance < 0 { 142 | return index(i, offsetBy: distance, limitedBy: startIndex) 143 | } 144 | let newI = index(i, offsetBy: distance, limitedBy: endIndex) 145 | return newI == endIndex ? nil : newI 146 | } 147 | 148 | /// Offsets the given index by the specified distance, limited by `startIndex...endIndex`. 149 | /// - Returns: true if `index` has been offset by exactly `distance` steps; otherwise, false. When the return value is false, `index` is either `startIndex` or `endIndex`. 150 | @inlinable 151 | func formIndexSafely(_ index: inout Index, offsetBy distance: Int) -> Bool { 152 | if distance > 0 { 153 | return formIndex(&index, offsetBy: distance, limitedBy: endIndex) 154 | } 155 | return formIndex(&index, offsetBy: distance, limitedBy: startIndex) 156 | } 157 | } 158 | 159 | extension RangeReplaceableCollection where SubSequence == Self, Self: BidirectionalCollection { 160 | /// Removes the trailing range of elements for which `predicate` returns true. 161 | /// Stops as soon as `predicate` returns false. 162 | @inlinable 163 | mutating func removeSuffix(where predicate: (Element) -> Bool) { 164 | guard !isEmpty else { return } 165 | var i = index(before: endIndex) 166 | guard predicate(self[i]) else { return } 167 | while i > startIndex { 168 | formIndex(before: &i) 169 | if !predicate(self[i]) { 170 | self = self[...i] 171 | return 172 | } 173 | } 174 | removeAll() 175 | } 176 | 177 | @inlinable 178 | mutating func removeSuffix(from index: Index) { 179 | guard index < endIndex else { return } 180 | removeLast(distance(from: index, to: endIndex)) 181 | } 182 | } 183 | 184 | extension RangeReplaceableCollection { 185 | /// Shortcut for creating a RangeReplaceableCollection. 186 | /// 187 | /// Example: 188 | /// ``` 189 | /// let longIdentifier = Array { 190 | /// $0.append(...) 191 | /// $0.append(contentsOf:...) 192 | /// } 193 | /// ``` 194 | @inlinable 195 | init(compose: (inout Self) throws -> Void) rethrows { 196 | self.init() 197 | try compose(&self) 198 | } 199 | 200 | /// Shortcut for appending to a RangeReplaceableCollection. 201 | /// 202 | /// Example: 203 | /// ``` 204 | /// longIdentifier.append { 205 | /// $0.append(...) 206 | /// $0.append(contentsOf:...) 207 | /// } 208 | /// ``` 209 | @inlinable 210 | mutating func append(compose: (inout Self) throws -> Void) rethrows { 211 | try compose(&self) 212 | } 213 | } 214 | -------------------------------------------------------------------------------- /Sources/Patterns/General/Group.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Group.swift 3 | // Patterns 4 | // 5 | // Created by Kåre Morstøl on 23/04/2019. 6 | // 7 | 8 | /// Works like a set, except it cannot list its contents. 9 | /// It can only tell whether or not it contains a specific element. 10 | @usableFromInline 11 | struct Group { 12 | /// Returns true if this group contains `element`. 13 | @usableFromInline 14 | let contains: (Element) -> Bool 15 | 16 | /// A new group containing only elements for which `contains` returns true. 17 | @usableFromInline 18 | init(contains: @escaping (Element) -> Bool) { 19 | self.contains = contains 20 | } 21 | 22 | /// Returns true if this group contains all the elements in `sequence`. 23 | @usableFromInline 24 | func contains(contentsOf sequence: S) -> Bool where S.Element == Element { 25 | sequence.allSatisfy(contains) 26 | } 27 | } 28 | 29 | extension Group { 30 | /// Returns a group which contains all the elements of `self` and `other`. 31 | @usableFromInline 32 | func union(_ other: Group) -> Group { 33 | Group { self.contains($0) || other.contains($0) } 34 | } 35 | 36 | @usableFromInline 37 | static func || (a: Group, b: Group) -> Group { 38 | a.union(b) 39 | } 40 | 41 | /// Returns a group containing only elements that are both in `self` and `other`. 42 | @usableFromInline 43 | func intersection(_ other: Group) -> Group { 44 | Group { self.contains($0) && other.contains($0) } 45 | } 46 | 47 | /// Returns a group containing only elements that are in `self` but not `other`. 48 | @usableFromInline 49 | func subtracting(_ other: Group) -> Group { 50 | Group { self.contains($0) && !other.contains($0) } 51 | } 52 | 53 | /// Returns a group containing only elements that _not_ in `self`. 54 | @usableFromInline 55 | func inverted() -> Group { 56 | Group { !self.contains($0) } 57 | } 58 | } 59 | 60 | extension Group where Element: Hashable { 61 | /// A new group containing only elements that are in `set`. 62 | @usableFromInline 63 | init(contentsOf set: Set) { 64 | contains = set.contains 65 | } 66 | 67 | /// A new group containing only elements that are in `sequence`. 68 | @usableFromInline 69 | init(contentsOf sequence: S) where S: Sequence, Element == S.Element { 70 | self.init(contentsOf: Set(sequence)) 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /Sources/Patterns/Grammar.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Grammar.swift 3 | // 4 | // 5 | // Created by Kåre Morstøl on 27/05/2020. 6 | // 7 | 8 | /// Allows for recursive patterns, also indirectly. 9 | /// 10 | /// Define subpatterns using `<-`, like this arithmetic pattern: 11 | /// ``` 12 | /// let g = Grammar { g in 13 | /// g.all <- g.expr • !any 14 | /// g.expr <- g.sum 15 | /// g.sum <- g.product • (("+" / "-") • g.product)* 16 | /// g.product <- g.power • (("*" / "/") • g.power)* 17 | /// g.power <- g.value • ("^" • g.power)¿ 18 | /// g.value <- digit+ / "(" • g.expr • ")" 19 | /// } 20 | /// ``` 21 | /// This recognises e.g. "1+2-3*(4+3)" 22 | /// 23 | /// - warning: Does not support left recursion: 24 | /// ``` 25 | /// g.a <- g.a • g.b 26 | /// ``` 27 | /// will lead to infinite recursion. 28 | @dynamicMemberLookup 29 | public class Grammar: Pattern where Input.Element: Hashable { 30 | /// Calls another subpattern in a grammar. 31 | public struct CallPattern: Pattern { 32 | /// The grammar that contains the subpattern being called. 33 | public let grammar: Grammar 34 | /// The name of the subpattern being called. 35 | public let name: String 36 | public var description: String { "<\(name)>" } 37 | 38 | @inlinable 39 | init(grammar: Grammar, name: String) { 40 | self.grammar = grammar 41 | self.name = name 42 | } 43 | 44 | @inlinable 45 | public func createInstructions(_ instructions: inout ContiguousArray>) { 46 | instructions.append(.openCall(name: name)) 47 | } 48 | } 49 | 50 | public var description: String { "Grammar" } // TODO: 51 | 52 | /// All the subpatterns and their names. 53 | public internal(set) var patterns: [(name: String, pattern: AnyPattern)] = [] 54 | 55 | /// The main subpattern, which will be called when this Grammar is being used. 56 | public var firstPattern: String? { patterns.first?.name } 57 | 58 | @inlinable 59 | public init() {} 60 | 61 | @inlinable 62 | public init() where Input == String {} 63 | 64 | @inlinable 65 | public convenience init(_ closure: (Grammar) -> Void) { 66 | self.init() 67 | closure(self) 68 | } 69 | 70 | @inlinable 71 | public convenience init(_ closure: (Grammar) -> Void) where Input == String { 72 | self.init() 73 | closure(self) 74 | } 75 | 76 | @inlinable 77 | /// Allows the use of e.g. `g.a` to refer to subpatterns. 78 | public subscript(dynamicMember name: String) -> CallPattern { 79 | CallPattern(grammar: self, name: name) 80 | } 81 | 82 | @inlinable 83 | public func createInstructions(_ instructions: inout ContiguousArray>) throws { 84 | // We begin with a call to the first subpattern, followed by a jump to the end. 85 | // This enables this grammar to be used inside other patterns (including other grammars). 86 | 87 | let startIndex = instructions.endIndex 88 | instructions.append( 89 | .openCall(name: try firstPattern ?? Parser.PatternError.message("Grammar is empty."))) 90 | instructions.append(.jump(offset: .max)) // replaced later 91 | var callTable = [String: Range]() 92 | 93 | // Create instructions for all subpatterns. Store their positions in `callTable`. 94 | for (name, pattern) in patterns { 95 | let startIndex = instructions.endIndex 96 | try pattern.createInstructions(&instructions) 97 | instructions.append(.return) 98 | guard (startIndex ..< instructions.endIndex).count > 1 else { 99 | throw Parser.PatternError.message("Pattern '\(name) <- \(pattern)' was empty.") 100 | } 101 | callTable[name] = startIndex ..< instructions.endIndex 102 | } 103 | 104 | // Replace all `.openCall` with `.call(offset)` and the correct offsets. 105 | for i in instructions.indices[startIndex...] { 106 | if case let .openCall(name) = instructions[i] { 107 | guard let subpatternRange = callTable[name] else { 108 | throw Parser.PatternError.message("Pattern '\(name)' was never defined with ´<-´ operator.") 109 | } 110 | // If the last non-dummy (i.e. not .choiceEnd) instruction in a subpattern is a call to itself we 111 | // perform a tail call optimisation by jumping directly instead. 112 | // The very last instruction is a .return, so skip that. 113 | if subpatternRange.upperBound - 2 == i 114 | || (subpatternRange.upperBound - 3 == i && instructions[i + 1].doesNotDoAnything) { 115 | instructions[i] = .jump(offset: subpatternRange.lowerBound - i) 116 | } else { 117 | instructions[i] = .call(offset: subpatternRange.lowerBound - i) 118 | } 119 | } 120 | } 121 | 122 | instructions[startIndex + 1] = .jump(offset: instructions.endIndex - startIndex - 1) 123 | } 124 | 125 | public static func == (lhs: Grammar, rhs: Grammar) -> Bool { 126 | lhs.patterns.elementsEqual(rhs.patterns, by: { $0.name == $1.name && $0.pattern == $1.pattern }) 127 | } 128 | } 129 | 130 | infix operator <-: AssignmentPrecedence 131 | 132 | /// Used by grammars to define subpatterns with `g.a <- ...`. 133 | public func <- (call: Grammar.CallPattern, pattern: P) { 134 | call.grammar.patterns.append((call.name, AnyPattern(pattern))) 135 | } 136 | 137 | /// In case of `g.name <- Capture(...)`, names the nameless Capture "name". 138 | public func <- (call: Grammar.CallPattern, capture: Capture

) { 139 | let newPattern = capture.name == nil 140 | ? Capture(name: call.name, capture.wrapped) 141 | : capture 142 | call.grammar.patterns.append((call.name, AnyPattern(newPattern))) 143 | } 144 | -------------------------------------------------------------------------------- /Sources/Patterns/Operations on Patterns/And.swift: -------------------------------------------------------------------------------- 1 | // 2 | // And.swift 3 | // 4 | // 5 | // Created by Kåre Morstøl on 04/06/2020. 6 | // 7 | 8 | /// A pattern which matches the `wrapped` pattern, without consuming any input. 9 | public struct AndPattern: Pattern { 10 | public typealias Input = Wrapped.Input 11 | public let wrapped: Wrapped 12 | public var description: String { "&\(wrapped)" } 13 | 14 | @usableFromInline 15 | init(_ wrapped: Wrapped) { 16 | self.wrapped = wrapped 17 | } 18 | 19 | @inlinable 20 | public func createInstructions(_ instructions: inout ContiguousArray>) throws { 21 | let wrappedInstructions = try wrapped.createInstructions() 22 | if let indexMovedBy = wrappedInstructions.movesIndexBy() { 23 | instructions.append(contentsOf: wrappedInstructions) 24 | instructions.append(.moveIndex(offset: -indexMovedBy)) 25 | } else { 26 | instructions.append { // TODO: test. And keep any captures. 27 | $0.append(.choice(offset: wrappedInstructions.count + 4)) 28 | $0.append(.choice(offset: wrappedInstructions.count + 1)) 29 | $0.append(contentsOf: wrappedInstructions) 30 | $0.append(.commit) 31 | $0.append(.fail) 32 | } 33 | } 34 | } 35 | } 36 | 37 | prefix operator && 38 | 39 | extension Pattern { 40 | /// Matches the following pattern without consuming any input. 41 | /// 42 | /// - note: in standard PEG this operator is `&`, but that is not allowed in Swift. 43 | @inlinable 44 | public static prefix func && (me: Self) -> AndPattern { 45 | AndPattern(me) 46 | } 47 | } 48 | 49 | extension Literal { 50 | /// Matches the following pattern without consuming any input. 51 | /// 52 | /// - note: in standard PEG this operator is `&`, but that is not allowed in Swift. 53 | @inlinable 54 | public static prefix func && (me: Literal) -> AndPattern { 55 | AndPattern(me) 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /Sources/Patterns/Operations on Patterns/AnyPattern.swift: -------------------------------------------------------------------------------- 1 | // 2 | // StringInterpolation.swift 3 | // Patterns 4 | // 5 | // Created by Kåre Morstøl on 11/08/2019. 6 | // 7 | 8 | /// A type erased wrapper around a pattern. 9 | /// Can be used to store patterns in arrays and non-generic variables. 10 | public struct AnyPattern: Pattern where Input.Element: Hashable { 11 | @usableFromInline 12 | let _instructions: (inout ContiguousArray>) throws -> Void 13 | 14 | @inlinable 15 | public func createInstructions(_ instructions: inout ContiguousArray>) throws { 16 | try _instructions(&instructions) 17 | } 18 | 19 | private let _description: () -> String 20 | public var description: String { _description() } 21 | 22 | /// The wrapped pattern. If you know the exact type you can unwrap it again. 23 | public let wrapped: Any 24 | 25 | public init(_ p: P) where Input == P.Input { 26 | _instructions = p.createInstructions 27 | _description = { p.description } 28 | wrapped = p 29 | } 30 | 31 | @inlinable 32 | public init(_ p: AnyPattern) { 33 | self = p 34 | } 35 | 36 | public init(_ p: Literal) { 37 | _instructions = p.createInstructions 38 | _description = { p.description } 39 | wrapped = p 40 | } 41 | 42 | public static func == (lhs: AnyPattern, rhs: AnyPattern) -> Bool { 43 | lhs.description == rhs.description 44 | } 45 | } 46 | 47 | extension AnyPattern: ExpressibleByUnicodeScalarLiteral where Input == String { 48 | @inlinable 49 | public init(unicodeScalarLiteral value: String) { 50 | self.init(stringLiteral: String(describing: value)) 51 | } 52 | } 53 | 54 | extension AnyPattern: ExpressibleByExtendedGraphemeClusterLiteral where Input == String { 55 | public typealias ExtendedGraphemeClusterLiteralType = String 56 | } 57 | 58 | extension AnyPattern: ExpressibleByStringLiteral where Input == String { 59 | public typealias StringLiteralType = String 60 | } 61 | 62 | /// Allows AnyPattern to be defined by a string with patterns in interpolations. 63 | /// 64 | /// `let p: AnyPattern = "hi\(whitespace)there"` 65 | /// is the same as `"hi" • whitespace • "there"`. 66 | extension AnyPattern: ExpressibleByStringInterpolation where Input == String { 67 | public struct StringInterpolation: StringInterpolationProtocol { 68 | @usableFromInline 69 | var pattern = AnyPattern("") 70 | 71 | @inlinable 72 | public init(literalCapacity: Int, interpolationCount: Int) {} 73 | 74 | @inlinable 75 | public mutating func appendLiteral(_ literal: String) { 76 | if !literal.isEmpty { 77 | pattern = AnyPattern(pattern • Literal(literal)) 78 | } 79 | } 80 | 81 | @inlinable 82 | public mutating func appendInterpolation(_ newpattern: P) where P.Input == Input { 83 | pattern = AnyPattern(pattern • newpattern) 84 | } 85 | } 86 | 87 | @inlinable 88 | public init(stringLiteral value: String) { 89 | self.init(Literal(value)) 90 | } 91 | 92 | @inlinable 93 | public init(stringInterpolation: StringInterpolation) { 94 | self.init(stringInterpolation.pattern) 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /Sources/Patterns/Operations on Patterns/Capture.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Capture.swift 3 | // 4 | // 5 | // Created by Kåre Morstøl on 25/05/2020. 6 | // 7 | 8 | /// Captures the current position as a range. 9 | /// 10 | /// It can be retrieved in `Parser.Match.captures` or used for decoding into Decodables. 11 | public struct Capture: Pattern { 12 | public typealias Input = Wrapped.Input 13 | public var description: String { 14 | let result: String 15 | switch (name, wrapped) { 16 | case (nil, is NoPattern): 17 | result = "" 18 | case let (name?, is NoPattern): 19 | result = "name: \(name)" 20 | case let (name?, wrapped): 21 | result = "name: \(name), \(wrapped)" 22 | case let (nil, wrapped): 23 | result = wrapped.description 24 | } 25 | return "Capture(\(result))" 26 | } 27 | 28 | public let name: String? 29 | public let wrapped: Wrapped 30 | 31 | /// Captures the position of `wrapped` as a range. 32 | /// - Parameters: 33 | /// - name: optional name 34 | @inlinable 35 | public init(name: String? = nil, _ wrapped: Wrapped) { 36 | self.name = name 37 | self.wrapped = wrapped 38 | } 39 | 40 | @inlinable 41 | public func createInstructions(_ instructions: inout ContiguousArray>) throws { 42 | instructions.append(.captureStart(name: name)) 43 | try wrapped.createInstructions(&instructions) 44 | instructions.append(.captureEnd) 45 | } 46 | } 47 | 48 | extension Capture { 49 | /// Captures the current input position as an empty range. 50 | /// - Parameter name: optional name 51 | @inlinable 52 | public init(name: String? = nil) where Wrapped == NoPattern { 53 | self.wrapped = NoPattern() 54 | self.name = name 55 | } 56 | 57 | /// Captures the current input position as an empty range. 58 | /// - Parameter name: optional name 59 | @inlinable 60 | public init(name: String? = nil) where Wrapped == NoPattern { 61 | self.wrapped = NoPattern() 62 | self.name = name 63 | } 64 | 65 | /// Captures the position of `wrapped` as a range. 66 | /// - Parameter name: optional name 67 | @inlinable 68 | public init(name: String? = nil, _ wrapped: Literal) where Wrapped == Literal { 69 | self.wrapped = wrapped 70 | self.name = name 71 | } 72 | } 73 | 74 | /// A pattern that does absolutely nothing. 75 | public struct NoPattern: Pattern where Input.Element: Hashable { 76 | public var description: String { "" } 77 | 78 | @inlinable 79 | public init() {} 80 | 81 | @inlinable 82 | public func createInstructions(_ instructions: inout ContiguousArray>) throws {} 83 | } 84 | -------------------------------------------------------------------------------- /Sources/Patterns/Operations on Patterns/Choice.swift: -------------------------------------------------------------------------------- 1 | // 2 | // SwiftPattern.swift 3 | // Patterns 4 | // 5 | // Created by Kåre Morstøl on 20/03/2017. 6 | // 7 | // 8 | 9 | import Foundation 10 | 11 | /// A pattern which first tries the `first` pattern, 12 | /// if that fails it tries the `second` pattern from the same position. 13 | public struct OrPattern: Pattern where First.Input == Second.Input { 14 | public typealias Input = First.Input 15 | public let first: First 16 | public let second: Second 17 | 18 | @inlinable 19 | init(_ first: First, or second: Second) { 20 | self.first = first 21 | self.second = second 22 | } 23 | 24 | public var description: String { 25 | "(\(first) / \(second))" 26 | } 27 | 28 | @inlinable 29 | public func createInstructions(_ instructions: inout ContiguousArray>) throws { 30 | let inst1 = try first.createInstructions() 31 | let inst2 = try second.createInstructions() 32 | instructions.append(.choice(offset: inst1.count + 3)) 33 | instructions.append(contentsOf: inst1) 34 | instructions.append(.commit) 35 | instructions.append(.jump(offset: inst2.count + 2)) 36 | instructions.append(contentsOf: inst2) 37 | instructions.append(.choiceEnd) 38 | } 39 | } 40 | 41 | /// First tries the pattern to the left, 42 | /// if that fails it tries the pattern to the right from the same position. 43 | @inlinable 44 | public func / (p1: First, p2: Second) -> OrPattern { 45 | OrPattern(p1, or: p2) 46 | } 47 | 48 | /// First tries the pattern to the left, 49 | /// if that fails it tries the pattern to the right from the same position. 50 | @inlinable 51 | public func / (p1: Literal, p2: Second) -> OrPattern, Second> { 52 | OrPattern(p1, or: p2) 53 | } 54 | 55 | /// First tries the pattern to the left, 56 | /// if that fails it tries the pattern to the right from the same position. 57 | @inlinable 58 | public func / (p1: First, p2: Literal) -> OrPattern> { 59 | OrPattern(p1, or: p2) 60 | } 61 | 62 | /// First tries the pattern to the left, 63 | /// if that fails it tries the pattern to the right from the same position. 64 | @inlinable 65 | public func / (p1: Literal, p2: Literal) -> OrPattern, Literal> { 66 | OrPattern(p1, or: p2) 67 | } 68 | 69 | /// First tries the pattern to the left, 70 | /// if that fails it tries the pattern to the right from the same position. 71 | @inlinable 72 | public func / (p1: Literal, p2: Literal) -> OrPattern, Literal> { 73 | OrPattern(p1, or: p2) 74 | } 75 | -------------------------------------------------------------------------------- /Sources/Patterns/Operations on Patterns/Concatenation.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Concatenation.swift 3 | // 4 | // 5 | // Created by Kåre Morstøl on 21/05/2020. 6 | // 7 | 8 | precedencegroup PatternConcatenationPrecedence { 9 | associativity: left 10 | higherThan: MultiplicationPrecedence // `/` has this 11 | } 12 | 13 | infix operator •: PatternConcatenationPrecedence 14 | 15 | /// A pattern which first tries the `first` pattern, 16 | /// if that succeeds it continues with the `second` pattern. 17 | public struct Concat: Pattern where First.Input == Second.Input { 18 | public typealias Input = First.Input 19 | public let first: First 20 | public let second: Second 21 | public var description: String { "\(first) \(second)" } 22 | 23 | @inlinable 24 | init(_ first: First, _ second: Second) { 25 | self.first = first 26 | self.second = second 27 | } 28 | 29 | @inlinable 30 | public func createInstructions(_ instructions: inout ContiguousArray>) throws { 31 | try first.createInstructions(&instructions) 32 | try second.createInstructions(&instructions) 33 | } 34 | } 35 | 36 | /// First tries the pattern to the left, if that succeeds it tries the pattern to the right. 37 | @inlinable 38 | public func • (lhs: Left, rhs: Right) -> Concat where Left.Input == Right.Input { 39 | Concat(lhs, rhs) 40 | } 41 | 42 | /// First tries the pattern to the left, if that succeeds it tries the pattern to the right. 43 | @inlinable 44 | public func • (lhs: Literal, rhs: Right) -> Concat, Right> { 45 | Concat(lhs, rhs) 46 | } 47 | 48 | /// First tries the pattern to the left, if that succeeds it tries the pattern to the right. 49 | @inlinable 50 | public func • (lhs: Left, rhs: Literal) -> Concat> { 51 | Concat(lhs, rhs) 52 | } 53 | 54 | /// First tries the pattern to the left, if that succeeds it tries the pattern to the right. 55 | @inlinable 56 | public func • (lhs: Literal, rhs: Literal) -> Concat, Literal> { 57 | Concat(lhs, rhs) 58 | } 59 | -------------------------------------------------------------------------------- /Sources/Patterns/Operations on Patterns/Not.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Negation.swift 3 | // 4 | // 5 | // Created by Kåre Morstøl on 25/05/2020. 6 | // 7 | 8 | /// A pattern which only succeeds if the `wrapped` pattern fails. 9 | /// The next pattern will continue from where `wrapped` started. 10 | public struct NotPattern: Pattern { 11 | public typealias Input = Wrapped.Input 12 | public let wrapped: Wrapped 13 | public var description: String { "!\(wrapped)" } 14 | 15 | @inlinable 16 | init(_ wrapped: Wrapped) { 17 | self.wrapped = wrapped 18 | } 19 | 20 | @inlinable 21 | public func createInstructions(_ instructions: inout ContiguousArray>) throws { 22 | let wrappedInstructions = try wrapped.createInstructions() 23 | instructions.append(.choice(offset: wrappedInstructions.count + 3)) 24 | instructions.append(contentsOf: wrappedInstructions) 25 | instructions.append(.commit) 26 | instructions.append(.fail) 27 | } 28 | } 29 | 30 | /// Will only succeed if the following pattern fails. Does not consume any input. 31 | @inlinable 32 | public prefix func ! (pattern: P) -> NotPattern

{ 33 | NotPattern(pattern) 34 | } 35 | 36 | /// Will only succeed if the following pattern fails. Does not consume any input. 37 | @inlinable 38 | public prefix func ! (pattern: Literal) -> NotPattern> { 39 | NotPattern(pattern) 40 | } 41 | -------------------------------------------------------------------------------- /Sources/Patterns/Operations on Patterns/Repetition.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Repetition.swift 3 | // 4 | // 5 | // Created by Kåre Morstøl on 25/05/2020. 6 | // 7 | 8 | /// Repeats the `wrapped` pattern `min` times, then repeats it optionally `max-min` times. 9 | /// Or an unlimited number of times if max is nil. 10 | /// 11 | /// Used by operators `*+¿`. 12 | public struct RepeatPattern: Pattern { 13 | public typealias Input = Wrapped.Input 14 | public let wrapped: Wrapped 15 | public let min: Int 16 | public let max: Int? 17 | 18 | @inlinable 19 | init(_ wrapped: Wrapped, range: R) where R.Bound == Int { 20 | let actualRange = range.relative(to: Int.zero ..< Int.max) 21 | self.wrapped = wrapped 22 | self.min = actualRange.lowerBound 23 | self.max = actualRange.upperBound == Int.max ? nil : actualRange.upperBound - 1 24 | } 25 | 26 | public var description: String { 27 | "\(wrapped){\(min)...\(max.map(String.init) ?? "")}" 28 | } 29 | 30 | @inlinable 31 | public func createInstructions(_ instructions: inout ContiguousArray>) throws { 32 | let repeatedInstructions = try wrapped.createInstructions() 33 | for _ in 0 ..< min { instructions.append(contentsOf: repeatedInstructions) } 34 | if let max = max { 35 | let optionalRepeatedInstructions = ContiguousArray> { 36 | $0.append(.choice(offset: repeatedInstructions.count + 2)) 37 | $0.append(contentsOf: repeatedInstructions) 38 | $0.append(.commit) 39 | } 40 | instructions.append(contentsOf: repeatElement(optionalRepeatedInstructions, count: max - min).lazy.flatMap { $0 }) 41 | } else { 42 | instructions.append { 43 | $0.append(.choice(offset: repeatedInstructions.count + 3)) 44 | $0.append(contentsOf: repeatedInstructions) 45 | $0.append(.commit) 46 | $0.append(.jump(offset: -repeatedInstructions.count - 2)) 47 | } 48 | } 49 | } 50 | } 51 | 52 | extension Pattern { 53 | /// Repeats this pattern from `range.lowerBound` to `range.upperBound` times. 54 | @inlinable 55 | public func `repeat`(_ range: R) -> RepeatPattern where R.Bound == Int { 56 | return RepeatPattern(self, range: range) 57 | } 58 | 59 | /// Repeats this pattern `count` times. 60 | @inlinable 61 | public func `repeat`(_ count: Int) -> RepeatPattern { 62 | RepeatPattern(self, range: count ... count) 63 | } 64 | } 65 | 66 | postfix operator * 67 | 68 | /// Repeats the preceding pattern 0 or more times. 69 | @inlinable 70 | public postfix func * (me: P) -> RepeatPattern

{ 71 | me.repeat(0...) 72 | } 73 | 74 | /// Repeats the preceding pattern 0 or more times. 75 | @inlinable 76 | public postfix func * (me: Literal) -> RepeatPattern> { 77 | me.repeat(0...) 78 | } 79 | 80 | postfix operator + 81 | 82 | /// Repeats the preceding pattern 1 or more times. 83 | @inlinable 84 | public postfix func + (me: P) -> RepeatPattern

{ 85 | me.repeat(1...) 86 | } 87 | 88 | /// Repeats the preceding pattern 1 or more times. 89 | @inlinable 90 | public postfix func + (me: Literal) -> RepeatPattern> { 91 | me.repeat(1...) 92 | } 93 | 94 | postfix operator ¿ 95 | 96 | /// Tries the preceding pattern, and continues even if it fails. 97 | /// 98 | /// - note: in standard PEG this operator is `?`, but that is not allowed in Swift. 99 | @inlinable 100 | public postfix func ¿ (me: P) -> RepeatPattern

{ 101 | me.repeat(0 ... 1) 102 | } 103 | 104 | /// Tries the preceding pattern, and continues even if it fails. 105 | /// 106 | /// - note: in standard PEG this operator is `?`, but that is not allowed in Swift. 107 | @inlinable 108 | public postfix func ¿ (me: Literal) -> RepeatPattern> { 109 | me.repeat(0 ... 1) 110 | } 111 | -------------------------------------------------------------------------------- /Sources/Patterns/Operations on Patterns/Skip.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Skip.swift 3 | // 4 | // 5 | // Created by Kåre Morstøl on 25/05/2020. 6 | // 7 | 8 | /// Skips 0 or more elements until a match for the next patterns is found. 9 | /// 10 | /// ```swift 11 | /// let s = Skip() • a 12 | /// ``` 13 | /// is the same as `|S <- A / . |` in standard PEG. 14 | /// 15 | /// - note: 16 | /// If `Skip` is at the end of a pattern, it just succeeds without consuming input. So it will be pointless. 17 | /// 18 | /// But this works: 19 | /// ```swift 20 | /// let s = Skip() 21 | /// let p = s • " " 22 | /// ``` 23 | /// because here the `s` pattern is "inlined". 24 | /// 25 | /// This, however, does not work: 26 | /// ```swift 27 | /// let g = Grammar { g in 28 | /// g.nextSpace <- g.skip • " " 29 | /// g.skip <- Skip() 30 | /// } 31 | /// ``` 32 | /// because in grammars the subexpressions are _called_, like functions, not "_inlined_", like Swift variables. 33 | /// So the `Skip()` in `g.skip` can't tell what will come after it. 34 | public struct Skip: Pattern where Input.Element: Hashable { 35 | public var description: String { "Skip()" } 36 | 37 | @inlinable 38 | public init() {} 39 | 40 | @inlinable 41 | public init() where Input == String {} 42 | 43 | @inlinable 44 | public func createInstructions(_ instructions: inout ContiguousArray>) throws { 45 | instructions.append(.skip) 46 | } 47 | } 48 | 49 | import SE0270_RangeSet 50 | 51 | extension ContiguousArray { 52 | /// Replaces all placeholder `.skip` instructions. 53 | @_specialize(where Input == String, Element == Instruction) // doesn't happen automatically (swiftlang-1200.0.28.1). 54 | @_specialize(where Input == String.UTF8View, Element == Instruction) 55 | @usableFromInline 56 | mutating func replaceSkips() where Element == Instruction { 57 | // `setupSkip(at: i)` adds 1 new instruction somewhere after `ì`, so we cant loop over self.indices directly. 58 | var i = self.startIndex 59 | while i < self.endIndex { 60 | switch self[i] { 61 | case .skip: 62 | self.setupSkip(at: i) 63 | default: break 64 | } 65 | self.formIndex(after: &i) 66 | } 67 | } 68 | 69 | /// Replaces the dummy `.skip` instruction at `skipIndex` with one that will search using the instructions 70 | /// right after `skipIndex`. 71 | /// 72 | /// In other words we look at the instructions right after the .skip and see if they can be searched for 73 | /// efficiently. 74 | /// 75 | /// Also places a .choice right after the search instruction replacing the .skip, and a corresponding .commit 76 | /// somewhere after that again. So if the search succeeds, but a later instruction fails, we can start a new 77 | /// search one step ahead from where the previous search succeeded. 78 | /// In the sub-pattern `Skip() • "abc" • letter • Skip() • "xyz"`, if "abc" succeeds, but there is no 79 | /// letter afterwards, we search for "abc" again from the "b". But if there is "abc" and another letter, 80 | /// we don't search for "abc" again because the next instruction is another .skip, and if we can't find "xyz" 81 | /// further on there's no point in searching for "abc" again. 82 | /// 83 | /// See `placeSkipCommit` for more. 84 | @usableFromInline 85 | mutating func setupSkip(at skipIndex: Index) where Element == Instruction { 86 | let afterSkip = skipIndex + 1 87 | switch self[afterSkip] { 88 | case let .checkIndex(function, atIndexOffset: 0): 89 | self[skipIndex] = .search { input, index in 90 | input[index...].indices.first(where: { function(input, $0) }) 91 | ?? (function(input, input.endIndex) ? input.endIndex : nil) 92 | } 93 | self[afterSkip] = .choice(offset: -1, atIndexOffset: +1) 94 | case .checkIndex(_, atIndexOffset: _): 95 | // A `.checkIndex` will only have a non-zero offset if it has been moved by `moveMovablesForward`, 96 | // and that will never move anything beyond a `.skip`. 97 | fatalError("A `.checkIndex` with a non-zero offset can't be located right after a `.skip` instruction.") 98 | case let .checkElement(test): 99 | self[skipIndex] = .search { input, index in 100 | input[index...].firstIndex(where: test) 101 | .map(input.index(after:)) 102 | } 103 | self[afterSkip] = .choice(offset: -1, atIndexOffset: 0) 104 | case .elementEquals: 105 | let elements: [Input.Element] = self[afterSkip...] 106 | .mapPrefix { 107 | switch $0 { 108 | case let .elementEquals(element): 109 | return element 110 | default: 111 | return nil 112 | } 113 | } 114 | if elements.count == 1 { 115 | self[skipIndex] = .search { input, index in 116 | input[index...].firstIndex(of: elements[0]) 117 | .map(input.index(after:)) 118 | } 119 | self[afterSkip] = .choice(offset: -1, atIndexOffset: 0) 120 | } else { 121 | // More than one literal, use Boyer–Moore–Horspool search. 122 | let cache = SearchCache(elements) 123 | self[skipIndex] = .search { input, index in 124 | input.range(of: cache, from: index)?.upperBound 125 | } 126 | self[afterSkip] = .choice(offset: -1, atIndexOffset: (-elements.count) + 1) 127 | self[afterSkip + 1] = .jump(offset: elements.count - 1) 128 | } 129 | default: 130 | // Could not find instructions to search for efficiently, 131 | // so we just try them and if they fail we move one step forward and try again. 132 | self[skipIndex] = .choice(offset: 0, atIndexOffset: +1) 133 | self.placeSkipCommit(startSearchFrom: skipIndex + 1) 134 | return 135 | } 136 | self.placeSkipCommit(startSearchFrom: skipIndex + 2) 137 | } 138 | 139 | /// Places a .commit after replacing a .skip . 140 | /// 141 | /// Any instruction replacing a .skip will have a .choice right after it. 142 | /// We place the corresponding .commit as far after it as possible. 143 | /// As always we have to make sure that no pairs of corresponding .choice (or other instruction) and .commit 144 | /// intersect with any other pair. 145 | /// 146 | /// So we have to jump over any optional repetition (`¿+*` and `.repeat(range)`) and any `/` choice patterns. 147 | /// All of them use the `.choice` instruction. 148 | /// If we are inside any of these we put the .commit at the end of our part of the pattern. 149 | @usableFromInline 150 | mutating func placeSkipCommit(startSearchFrom: Index) where Element == Instruction { 151 | var i = startSearchFrom 152 | loop: while true { 153 | switch self[i] { 154 | case let .choice(_, indexOffset) where indexOffset < 0: 155 | fatalError("Not implemented.") 156 | case let .choice(offset, _): 157 | // We jump over this entire sub-pattern. 158 | // If one step back there is a jump forwards, then it's a '/' pattern. So follow that jump too. 159 | if case let .jump(jumpOffset) = self[i + offset - 1], jumpOffset > 0 { 160 | i += offset - 1 + jumpOffset 161 | } else { 162 | i += offset 163 | } 164 | case let .jump(offset) where offset > 0: // If we jump backwards we are likely to enter an infinite loop. 165 | i += offset 166 | case .elementEquals, .checkElement, .checkIndex, .moveIndex, .captureStart, .captureEnd, .call, .jump: 167 | i += 1 168 | case .commit, .choiceEnd, .return, .match, .skip, .search, .fail: 169 | // This is as far as we can go. 170 | insertInstructions(.commit, at: i) 171 | return 172 | case .openCall: 173 | fatalError("`.openCall` instruction should have been replaced.") 174 | } 175 | } 176 | } 177 | 178 | /// Inserts `newInstructions` at `location`. Adjusts the offsets of the other instructions accordingly. 179 | /// 180 | /// Since all offsets are relative to the positions of their instructions, 181 | /// if `location` lies between an instruction with an offset and where that offset leads to, 182 | /// the offset needs to be increased by the length of `newInstructions`. 183 | @usableFromInline 184 | mutating func insertInstructions(_ newInstructions: Element..., at location: Index) 185 | where Element == Instruction { 186 | insert(contentsOf: newInstructions, at: location) 187 | let insertedRange = location ..< (location + newInstructions.count + 1) 188 | // instruction ... location ... offsetTarget 189 | for i in startIndex ..< insertedRange.lowerBound { 190 | switch self[i] { 191 | case let .call(offset) where offset > (location - i): 192 | self[i] = .call(offset: offset + newInstructions.count) 193 | case let .jump(offset) where offset > (location - i): 194 | self[i] = .jump(offset: offset + newInstructions.count) 195 | case let .choice(offset, atIndexOffset) where offset > (location - i): 196 | self[i] = .choice(offset: offset + newInstructions.count, atIndexOffset: atIndexOffset) 197 | default: 198 | break 199 | } 200 | } 201 | // offsetTarget ... location ... instruction 202 | for i in insertedRange.upperBound ..< endIndex { 203 | switch self[i] { 204 | case let .call(offset) where offset < (location - i): 205 | self[i] = .call(offset: offset - newInstructions.count) 206 | case let .jump(offset) where offset < (location - i): 207 | self[i] = .jump(offset: offset - newInstructions.count) 208 | case let .choice(offset, atIndexOffset) where offset < (location - i): 209 | self[i] = .choice(offset: offset - newInstructions.count, atIndexOffset: atIndexOffset) 210 | default: 211 | break 212 | } 213 | } 214 | } 215 | } 216 | -------------------------------------------------------------------------------- /Sources/Patterns/Optimise Instructions.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Optimise Instructions.swift 3 | // 4 | // Created by Kåre Morstøl on 17/06/2020. 5 | // 6 | 7 | private extension Instruction { 8 | /// Can this instruction be moved by `moveMovablesForward`? 9 | var isMovable: Bool { 10 | switch self { 11 | case .checkIndex, .captureStart, .captureEnd: 12 | return true 13 | default: 14 | return false 15 | } 16 | } 17 | 18 | /// Does this instruction prohibit `moveMovablesForward` from moving anything past it? 19 | var stopsMovables: Bool { 20 | switch self { 21 | case .elementEquals, .checkElement: 22 | return false 23 | default: 24 | return true 25 | } 26 | } 27 | } 28 | 29 | import SE0270_RangeSet 30 | 31 | extension MutableCollection where Self: RandomAccessCollection, Index == Int { 32 | /// Moves any `.checkIndex`, `.captureStart`, `.captureEnd` past any `.elementEquals`, `.checkElement`. 33 | /// 34 | /// Improves performance noticeably. 35 | @usableFromInline 36 | mutating func moveMovablesForward() where Element == Instruction { 37 | var movables = ContiguousArray() 38 | for i in indices { 39 | if self[i].isMovable { 40 | movables.append(i) 41 | } else if !movables.isEmpty, self[i].stopsMovables { 42 | let moved = moveSubranges(RangeSet(movables, within: self), to: i) 43 | var checkIndexIndices = RangeSet() 44 | for (movedIndex, oldPosition) in zip(moved, movables) { 45 | let distanceMoved = (movedIndex - oldPosition) 46 | switch self[movedIndex] { 47 | case let .captureStart(name, offset): 48 | self[movedIndex] = .captureStart(name: name, atIndexOffset: offset - distanceMoved) 49 | case let .captureEnd(offset): 50 | self[movedIndex] = .captureEnd(atIndexOffset: offset - distanceMoved) 51 | case let .checkIndex(test, offset): 52 | self[movedIndex] = .checkIndex(test, atIndexOffset: offset - distanceMoved) 53 | checkIndexIndices.insert(movedIndex, within: self) 54 | default: 55 | fatalError("'\(self[movedIndex])' is not a 'movable'.") 56 | } 57 | } 58 | movables.removeAll() 59 | 60 | // All `.checkIndex` should be first. If they fail there is no point in capturing anything. 61 | moveSubranges(checkIndexIndices, to: moved.lowerBound) 62 | } 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /Sources/Patterns/Parser.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Parser.swift 3 | // Patterns 4 | // 5 | // Created by Kåre Morstøl on 23/10/2018. 6 | // 7 | 8 | /// Takes a pattern, optimises it and tries to match it over an input. 9 | public struct Parser where Input.Element: Hashable { 10 | /// Indicates a problem with a malformed pattern. 11 | public enum PatternError: Error, CustomStringConvertible { 12 | /// The error message from the parser. 13 | case message(String) 14 | 15 | public var description: String { 16 | switch self { 17 | case let .message(string): 18 | return string 19 | } 20 | } 21 | } 22 | 23 | @usableFromInline 24 | let matcher: VMEngine 25 | 26 | /// A parser which matches `pattern` _at_ a given position. 27 | @inlinable 28 | public init(_ pattern: P) throws where P.Input == Input { 29 | self.matcher = try VMEngine(pattern) 30 | } 31 | 32 | /// A parser which searches for `pattern` _from_ a given position. 33 | /// 34 | /// Is the same as `Parser(Skip() • pattern)`. 35 | @inlinable 36 | public init(search pattern: P) throws where P.Input == Input { 37 | try self.init(Skip() • pattern) 38 | } 39 | 40 | /// Contains information about a patterns successfully completed match. 41 | public struct Match: Equatable { 42 | /// The position in the input when the pattern completed. 43 | /// 44 | /// - note: If the last part of the pattern is a `!` or `&&`, 45 | /// `endIndex` is the position when that last part _started_. 46 | public let endIndex: Input.Index 47 | 48 | /// The names and ranges of all captures. 49 | public let captures: [(name: String?, range: Range)] 50 | 51 | @inlinable 52 | init(endIndex: Input.Index, captures: [(name: String?, range: Range)]) { 53 | self.endIndex = endIndex 54 | self.captures = captures 55 | } 56 | 57 | @inlinable 58 | public static func == (lhs: Parser.Match, rhs: Parser.Match) -> Bool { 59 | lhs.endIndex == rhs.endIndex 60 | && lhs.captures.elementsEqual(rhs.captures, by: { left, right in 61 | left.range == right.range && left.name == right.name 62 | }) 63 | } 64 | 65 | /// The range from the beginning of the first capture to the end of the last one. 66 | /// If there are no captures, the empty range at the `endIndex` of this Match. 67 | @inlinable 68 | public var range: Range { 69 | // TODO: Is `captures.last!.range.upperBound` always the highest captured index? 70 | // What if there is one large range and a smaller inside that? 71 | captures.isEmpty 72 | ? endIndex ..< endIndex 73 | : captures.first!.range.lowerBound ..< captures.last!.range.upperBound 74 | } 75 | 76 | public func description(using input: Input) -> String { 77 | """ 78 | endIndex: "\(endIndex == input.endIndex ? "EOF" : String(describing: input[endIndex]))" 79 | \(captures.map { "\($0.name.map { $0 + ":" } ?? "") \(input[$0.range])" }.joined(separator: "\n")) 80 | 81 | """ 82 | } 83 | 84 | /// Returns the first capture named `name`. 85 | @inlinable 86 | public subscript(one name: String) -> Range? { 87 | captures.first(where: { $0.name == name })?.range 88 | } 89 | 90 | /// Returns all captures named `name`. 91 | @inlinable 92 | public subscript(multiple name: String) -> [Range] { 93 | captures.filter { $0.name == name }.map { $0.range } 94 | } 95 | 96 | /// The names of all the captures. 97 | @inlinable 98 | public var captureNames: Set { Set(captures.compactMap { $0.name }) } 99 | } 100 | 101 | /// Tries to match the pattern in `input` at `index`. 102 | /// - Parameters: 103 | /// - index: The position to match at, if not provided the beginning of input will be used. 104 | @inlinable 105 | public func match(in input: Input, at index: Input.Index? = nil) -> Match? { 106 | matcher.match(in: input, at: index ?? input.startIndex) 107 | } 108 | 109 | /// A lazily generated sequence of consecutive matches of the pattern in `input`. 110 | /// 111 | /// Each match attempt starts at the `.range.upperBound` of the previous match, 112 | /// so the matches can be overlapping. 113 | /// 114 | /// You can dictate where the next match should start by where you place the last capture. 115 | /// 116 | /// - Parameters: 117 | /// - startindex: The position to match from, if not provided the beginning of input will be used. 118 | @inlinable 119 | public func matches(in input: Input, from startindex: Input.Index? = nil) 120 | -> UnfoldSequence { 121 | var stop = false 122 | var lastMatch: Match? 123 | return sequence(state: startindex ?? input.startIndex, next: { (index: inout Input.Index) in 124 | guard var match = self.match(in: input, at: index), !stop else { return nil } 125 | if match == lastMatch { 126 | guard index != input.endIndex else { return nil } 127 | input.formIndex(after: &index) 128 | guard let newMatch = self.match(in: input, at: index) else { return nil } 129 | match = newMatch 130 | } 131 | lastMatch = match 132 | let matchEnd = match.range.upperBound 133 | if matchEnd == index { 134 | guard matchEnd != input.endIndex else { 135 | stop = true 136 | return match 137 | } 138 | input.formIndex(after: &index) 139 | } else { 140 | index = matchEnd 141 | } 142 | return match 143 | }) 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /Sources/Patterns/Pattern And Instruction.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Pattern And Instruction.swift 3 | // 4 | // 5 | // Created by Kåre Morstøl on 06/06/2020. 6 | // 7 | 8 | /// Something that can create Instructions for the Parser. 9 | public protocol Pattern: CustomStringConvertible { 10 | associatedtype Input: BidirectionalCollection where Input.Element: Hashable 11 | typealias ParsedRange = Range 12 | // Ideally this should be used by all implementors, but that sometimes causes a compiler crash (Swift 5.3 beta) 13 | typealias Instructions = ContiguousArray> 14 | 15 | /// Appends Instructions for the Parser to `instructions`. 16 | @inlinable 17 | func createInstructions(_ instructions: inout Instructions) throws 18 | /// Returns Instructions for the Parser. 19 | @inlinable 20 | func createInstructions() throws -> Instructions 21 | } 22 | 23 | extension Pattern { 24 | /// Returns Instructions for the Parser. 25 | @inlinable 26 | public func createInstructions() throws -> Instructions { 27 | var instructions = Instructions() 28 | try self.createInstructions(&instructions) 29 | return instructions 30 | } 31 | } 32 | 33 | /// The instructions used by patterns in `createInstructions`. 34 | /// 35 | /// Unless otherwise noted, each instruction moves on to the next instruction after it has finished. 36 | public enum Instruction where Input.Element: Hashable { 37 | public typealias Distance = Int 38 | 39 | /// Succeeds if the current element equals this element. Advances index to the next element. 40 | case elementEquals(Input.Element) 41 | /// Succeeds if the closure returns true when passed the current element. Advances index to the next element. 42 | case checkElement((Input.Element) -> Bool) 43 | /// Succeeds if the closure returns true when passed the input and the input index + `atIndexOffset`. 44 | case checkIndex((Input, Input.Index) -> Bool, atIndexOffset: Int) 45 | 46 | /// Moves the input index by `offset`. 47 | case moveIndex(offset: Distance) 48 | /// Continues with the instruction at `offset` relative to this instruction. 49 | case jump(offset: Distance) 50 | 51 | /// Sets the input index to the output from the closure. 52 | /// If the output is nil, the instruction fails. 53 | case search((Input, Input.Index) -> Input.Index?) 54 | 55 | /// Stores (current input index - `atIndexOffset`) as the beginning of capture `name` 56 | case captureStart(name: String?, atIndexOffset: Int) 57 | /// Stores (current input index - `atIndexOffset`) as the end of the most recently started capture. 58 | case captureEnd(atIndexOffset: Int) 59 | 60 | /// Stores a snapshot of the current state, with input index set to (current + `atIndexOffset`). 61 | /// 62 | /// If there is a future failure the snapshot will be restored 63 | /// and the instruction at `offset` (relative to this instruction) will be called. 64 | case choice(offset: Distance, atIndexOffset: Int) 65 | /// Signals the end of a choice. Doesn't do anything else. 66 | /// Used as a barrier across which instructions cannot be moved. 67 | case choiceEnd 68 | /// Discards the state saved by previous `.choice`, because the instructions since then have completed 69 | /// successfully and the alternative instructions at the previous `.choice` are no longer needed. 70 | case commit 71 | 72 | /// Will be replaced by .call in preprocessing. Is never executed. 73 | case openCall(name: String) 74 | /// Goes to the subpattern at `offset` relative to this instruction. 75 | /// When the subpattern finishes we move on to the instruction after this. 76 | case call(offset: Distance) 77 | /// Returns from this subpattern to the instruction after where this was called from. 78 | case `return` 79 | 80 | /// Signals a failure. 81 | /// 82 | /// The snapshot from the previous `.choice` is restored, if there aren't any left we stop matching altogether. 83 | case fail 84 | /// A match has been successfully completed! 85 | /// 86 | /// Will not continue with further instructions. 87 | case match 88 | 89 | /// Will be replaced in preprocessing. Is never executed. 90 | case skip 91 | 92 | /// Succeeds anywhere except at the end of the input. 93 | @inlinable 94 | public static var any: Self { Self.checkElement { _ in true } } // TODO: make its own instruction 95 | 96 | /// Stores the current input index as the beginning of capture `name` 97 | @inlinable 98 | public static func captureStart(name: String?) -> Self { 99 | .captureStart(name: name, atIndexOffset: 0) 100 | } 101 | 102 | /// Stores the current input index as the end of the most recently started capture. 103 | @inlinable 104 | public static var captureEnd: Self { 105 | .captureEnd(atIndexOffset: 0) 106 | } 107 | 108 | /// Succeeds if the closure returns true when passed the input and the input index. 109 | @inlinable 110 | public static func checkIndex(_ test: @escaping (Input, Input.Index) -> Bool) -> Self { 111 | .checkIndex(test, atIndexOffset: 0) 112 | } 113 | 114 | /// Stores a snapshot of the current state. 115 | /// 116 | /// If there is a future failure the snapshot will be restored 117 | /// and the instruction at `offset` (relative to this instruction) will be called. 118 | @inlinable 119 | public static func choice(offset: Int) -> Instruction { 120 | .choice(offset: offset, atIndexOffset: 0) 121 | } 122 | 123 | /// The offset by which this instruction will move the input index. 124 | @usableFromInline 125 | var movesIndexBy: Int? { 126 | switch self { 127 | case .checkIndex, .captureStart, .captureEnd, .commit, .match, .choiceEnd: 128 | return 0 129 | case .elementEquals, .checkElement: 130 | return 1 131 | case let .moveIndex(offset): 132 | return offset 133 | case .search, .choice, .jump, .openCall, .call, .return, .fail, .skip: 134 | return nil 135 | } 136 | } 137 | 138 | /// Returns false only if instruction has no effect. 139 | @usableFromInline 140 | var doesNotDoAnything: Bool { 141 | switch self { 142 | case .choiceEnd, .jump(+1): 143 | return true 144 | default: 145 | return false 146 | } 147 | } 148 | } 149 | 150 | extension Sequence { 151 | /// The offset by which these instructions will move the input index. 152 | @usableFromInline 153 | func movesIndexBy

() -> Int? where Element == Instruction

{ 154 | lazy .map { $0.movesIndexBy }.reduceIfNoNils(into: 0) { result, offset in result += offset } 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /Sources/Patterns/Regex.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Regex.swift 3 | // 4 | // 5 | // Created by Kåre Morstøl on 18/04/2020. 6 | // 7 | 8 | import Foundation 9 | 10 | /// A pattern that can be converted to regex. 11 | public protocol RegexConvertible: Pattern { 12 | /// The equivalent regex for this pattern. 13 | var regex: String { get } 14 | } 15 | 16 | extension Literal: RegexConvertible where Input: StringProtocol { 17 | public var regex: String { NSRegularExpression.escapedPattern(for: String(self.elements)) } 18 | } 19 | 20 | extension Line: RegexConvertible { 21 | public var regex: String { "^.*$" } 22 | } 23 | 24 | extension Line.Start: RegexConvertible { 25 | public var regex: String { "^" } 26 | } 27 | 28 | extension Line.End: RegexConvertible { 29 | public var regex: String { "$" } 30 | } 31 | 32 | extension Word.Boundary: RegexConvertible { 33 | public var regex: String { #"\b"# } 34 | } 35 | 36 | extension Capture: RegexConvertible where Wrapped: RegexConvertible { 37 | public var regex: String { 38 | let capturedRegex = wrapped.regex 39 | return name.map { "(?<\($0)>\(capturedRegex))" } ?? "(\(capturedRegex))" 40 | } 41 | } 42 | 43 | extension Concat: RegexConvertible where First: RegexConvertible, Second: RegexConvertible { 44 | public var regex: String { first.regex + second.regex } 45 | } 46 | 47 | extension OrPattern: RegexConvertible where First: RegexConvertible, Second: RegexConvertible { 48 | public var regex: String { first.regex + "|" + second.regex } 49 | } 50 | 51 | extension RepeatPattern: RegexConvertible where Wrapped: RegexConvertible { 52 | public var regex: String { 53 | "(?:\(wrapped.regex){\(min),\(max.map(String.init(describing:)) ?? "")}" 54 | } 55 | } 56 | 57 | extension Skip: RegexConvertible { 58 | public var regex: String { ".*?" } 59 | } 60 | 61 | extension NoPattern: RegexConvertible { 62 | public var regex: String { "" } 63 | } 64 | 65 | // For `OneOf` to be convertible the regex has to be provided manually when it is created. See OneOf.swift. 66 | -------------------------------------------------------------------------------- /Sources/Patterns/VMBacktrack.swift: -------------------------------------------------------------------------------- 1 | // 2 | // VMBacktrack.swift 3 | // 4 | // 5 | // Created by Kåre Morstøl on 18/04/2020. 6 | // 7 | 8 | @usableFromInline 9 | struct VMEngine where Input.Element: Hashable { 10 | @usableFromInline 11 | typealias Instructions = ContiguousArray> 12 | @usableFromInline 13 | typealias Captures = ContiguousArray<(index: Input.Index, 14 | instruction: VMEngine.Instructions.Index)>.SubSequence 15 | @usableFromInline 16 | let instructions: Instructions 17 | 18 | @usableFromInline 19 | init(_ pattern: P) throws where Input == P.Input { 20 | var instructions = try Instructions { 21 | $0.append(.fail) // dummy instruction used by '.choice'. 22 | try pattern.createInstructions(&$0) 23 | $0.append(.match) 24 | } 25 | instructions.moveMovablesForward() 26 | instructions.replaceSkips() 27 | self.instructions = instructions 28 | } 29 | 30 | @_specialize(where Input == String) // doesn't happen automatically (swiftlang-1200.0.28.1). 31 | @_specialize(where Input == String.UTF8View) 32 | @usableFromInline 33 | func match(in input: Input, at startIndex: Input.Index) -> Parser.Match? { 34 | launch(input: input, startIndex: startIndex) 35 | } 36 | } 37 | 38 | extension Parser.Match { 39 | @usableFromInline 40 | init(_ thread: VMEngine.Thread, 41 | instructions: VMEngine.Instructions, 42 | captures: VMEngine.Captures) { 43 | var newCaptures = [(name: String?, range: Range)]() 44 | newCaptures.reserveCapacity(captures.count / 2) 45 | var captureBeginnings = [(name: String?, start: Input.Index)]() 46 | captureBeginnings.reserveCapacity(captures.capacity) 47 | for capture in captures { 48 | switch instructions[capture.instruction] { 49 | case let .captureStart(name, _): 50 | captureBeginnings.append((name, capture.index)) 51 | case .captureEnd: 52 | let beginning = captureBeginnings.removeLast() 53 | newCaptures.append((name: beginning.name, range: beginning.start ..< capture.index)) 54 | default: 55 | fatalError("Captured wrong instructions.") 56 | } 57 | } 58 | assert(captureBeginnings.isEmpty) 59 | self.endIndex = thread.inputIndex 60 | self.captures = newCaptures 61 | } 62 | } 63 | 64 | extension VMEngine { 65 | @usableFromInline 66 | struct Thread { 67 | @usableFromInline 68 | var instructionIndex: Instructions.Index 69 | @usableFromInline 70 | var inputIndex: Input.Index 71 | @usableFromInline 72 | var capturesEndIndex: Captures.Index 73 | @usableFromInline 74 | var isReturnAddress: Bool = false 75 | 76 | @usableFromInline 77 | init(startAt instructionIndex: Int, withDataFrom other: Thread) { 78 | self.instructionIndex = instructionIndex 79 | self.inputIndex = other.inputIndex 80 | self.capturesEndIndex = other.capturesEndIndex 81 | } 82 | 83 | @usableFromInline 84 | init(instructionIndex: Instructions.Index, inputIndex: Input.Index) { 85 | self.instructionIndex = instructionIndex 86 | self.inputIndex = inputIndex 87 | self.capturesEndIndex = 0 88 | } 89 | } 90 | 91 | @usableFromInline 92 | func launch(input: Input, startIndex: Input.Index? = nil) -> Parser.Match? { 93 | // Skip the first instruction, which is always '.fail'. 94 | var stack = ContiguousArray()[...] 95 | stack.append( 96 | Thread(instructionIndex: instructions.startIndex + 1, inputIndex: startIndex ?? input.startIndex)) 97 | var captures = Captures() 98 | 99 | while var thread = stack.popLast() { 100 | assert(!thread.isReturnAddress, "Stack unexpectedly contains .returnAddress after fail") 101 | captures.removeSuffix(from: thread.capturesEndIndex) 102 | defer { // Fail, when `break loop` is called. 103 | stack.removeSuffix(where: { $0.isReturnAddress }) 104 | } 105 | 106 | loop: while true { 107 | switch instructions[thread.instructionIndex] { 108 | case let .elementEquals(char): 109 | guard thread.inputIndex != input.endIndex, input[thread.inputIndex] == char else { break loop } 110 | input.formIndex(after: &thread.inputIndex) 111 | thread.instructionIndex += 1 112 | case let .checkElement(test): 113 | guard thread.inputIndex != input.endIndex, test(input[thread.inputIndex]) else { break loop } 114 | input.formIndex(after: &thread.inputIndex) 115 | thread.instructionIndex += 1 116 | case let .checkIndex(test, offset): 117 | let index = input.index(thread.inputIndex, offsetBy: offset) 118 | guard test(input, index) else { break loop } 119 | thread.instructionIndex += 1 120 | case let .moveIndex(distance): 121 | guard input.formIndexSafely(&thread.inputIndex, offsetBy: distance) else { break loop } 122 | thread.instructionIndex += 1 123 | case let .search(closure): 124 | guard let index = closure(input, thread.inputIndex) else { break loop } 125 | thread.inputIndex = index 126 | thread.instructionIndex += 1 127 | case let .jump(distance): 128 | thread.instructionIndex += distance 129 | case let .captureStart(_, offset), 130 | let .captureEnd(offset): 131 | let index = input.index(thread.inputIndex, offsetBy: offset) 132 | captures.append((index: index, instruction: thread.instructionIndex)) 133 | thread.instructionIndex += 1 134 | case let .choice(offset, atIndex): 135 | var newThread = Thread(startAt: thread.instructionIndex + offset, withDataFrom: thread) 136 | if atIndex != 0, !input.formIndexSafely(&newThread.inputIndex, offsetBy: atIndex) { 137 | // we must always add to the stack here, so send it to an instruction that is always `.fail` 138 | newThread.instructionIndex = instructions.startIndex 139 | } 140 | newThread.capturesEndIndex = captures.endIndex 141 | stack.append(newThread) 142 | thread.instructionIndex += 1 143 | case .choiceEnd: 144 | thread.instructionIndex += 1 145 | case .commit: 146 | #if DEBUG 147 | let entry = stack.popLast() 148 | assert(entry != nil, "Empty stack during .commit") 149 | assert(entry.map { !$0.isReturnAddress } ?? true, "Missing thread during .commit") 150 | #else 151 | stack.removeLast() 152 | #endif 153 | thread.instructionIndex += 1 154 | case let .call(offset): 155 | var returnAddress = thread 156 | returnAddress.instructionIndex += 1 157 | returnAddress.isReturnAddress = true 158 | stack.append(returnAddress) 159 | thread.instructionIndex += offset 160 | case .return: 161 | guard let entry = stack.popLast() else { fatalError("Missing return address upon .return.") } 162 | assert(entry.isReturnAddress, "Unexpected uncommited thread in stack.") 163 | thread.instructionIndex = entry.instructionIndex 164 | case .fail: 165 | break loop 166 | case .match: 167 | return Parser.Match(thread, instructions: instructions, captures: captures) 168 | case .openCall: 169 | fatalError("`.openCall` should be removed by Grammar.") 170 | case .skip: 171 | fatalError("`.skip` should be removed by Parser in preprocessing.") 172 | } 173 | } 174 | } 175 | return nil 176 | } 177 | } 178 | -------------------------------------------------------------------------------- /Sources/unicode_properties/main.swift: -------------------------------------------------------------------------------- 1 | /// Converts Unicode property data files to Swift code. 2 | 3 | import ArgumentParser 4 | import Foundation 5 | import Patterns 6 | 7 | typealias RangesAndProperties = [(range: ClosedRange, property: Substring)] 8 | 9 | func unicodeProperty(fromDataFile text: String) -> RangesAndProperties { 10 | let hexNumber = Capture(name: "hexNumber", hexDigit+) 11 | let hexRange = hexNumber • ".." • hexNumber / hexNumber 12 | let rangeAndProperty = Line.start • hexRange • Skip() • "; " • Capture(name: "property", Skip()) • " " 13 | 14 | return try! Parser(search: rangeAndProperty).matches(in: text).map { match in 15 | let propertyName = text[match[one: "property"]!] 16 | let oneOrTwoNumbers = match[multiple: "hexNumber"].map { UInt32(text[$0], radix: 16)! } 17 | let range = oneOrTwoNumbers.first! ... oneOrTwoNumbers.last! 18 | return (range, propertyName) 19 | } 20 | } 21 | 22 | extension Sequence { 23 | /// Passes the 2 first elements to the `transform` closure. Then passes the last element returned from `transform`, 24 | /// together with the next element in the source sequence, to `transform` again. And so on. 25 | func flatMapPairs(_ transform: (Element, Element) -> [Element]) -> [Element] { 26 | var result = ContiguousArray() 27 | result.reserveCapacity(underestimatedCount) 28 | var iterator = self.makeIterator() 29 | guard var current = iterator.next() else { return [] } 30 | 31 | while let next = iterator.next() { 32 | let transformation = transform(current, next) 33 | result.append(contentsOf: transformation.dropLast()) 34 | guard let last = transformation.last ?? iterator.next() else { return Array(result) } 35 | current = last 36 | } 37 | result.append(current) 38 | return Array(result) 39 | } 40 | } 41 | 42 | /// Turns string into a proper Swift enum case name. 43 | /// 44 | /// Removes all underscores. Unless string is all caps, lowercases the first letter. 45 | func caseName(_ string: Substring) -> String { 46 | var caseName = string.replacingOccurrences(of: "_", with: "") 47 | let firstLetter = caseName.allSatisfy { $0.isUppercase } ? "" : caseName.removeFirst().lowercased() 48 | return firstLetter + caseName 49 | } 50 | 51 | func generateEnumAndDictionary(_ properties: [Substring: [ClosedRange]]) -> String { 52 | let propertyRanges = properties.map { propertyName, ranges in 53 | "\t.\(caseName(propertyName)): [\(ranges.map { "\($0)" }.joined(separator: ", "))]," 54 | } 55 | 56 | return """ 57 | enum UnicodeProperty: String { 58 | case \(properties.keys.map { #"\#(caseName($0)) = "\#($0)""# }.joined(separator: ", ")) 59 | } 60 | 61 | let propertyRanges: [UnicodeProperty : ContiguousArray>] = [ 62 | \(propertyRanges.joined(separator: "\n")) 63 | ] 64 | """ 65 | } 66 | 67 | func generateConstants(_ properties: [Substring: [ClosedRange]]) -> String { 68 | properties.map { propertyName, ranges in 69 | "let \(caseName(propertyName)) = [ \(ranges.map { "\($0)" }.joined(separator: ", ")) ]" 70 | }.joined(separator: "\n") 71 | } 72 | 73 | struct Arguments: ParsableCommand { 74 | @Flag(name: .customLong("enumAndDictionary"), help: "Outputs an enum containing all the property names, and a dictionary with the enum as keys and arrays of ranges as values. Is the default.") 75 | var enumAndDictionary: Bool = false 76 | 77 | @Flag(help: "Outputs the property names as constants with arrays of ranges as values.") 78 | var constants: Bool = false 79 | 80 | @Argument(help: "The path to the Unicode property data file.", transform: { 81 | try String(contentsOfFile: $0) 82 | }) 83 | var unicodeData: String 84 | 85 | func run() throws { 86 | var enumAndDictionary = self.enumAndDictionary 87 | 88 | if !enumAndDictionary, !constants { 89 | enumAndDictionary = true 90 | } 91 | 92 | let properties: [Substring: [ClosedRange]] = 93 | Dictionary(grouping: unicodeProperty(fromDataFile: unicodeData), by: \.property) 94 | .mapValues { (ranges: RangesAndProperties) -> [ClosedRange] in 95 | let sortedranges = ranges.map { $0.range } 96 | .sorted { $0.lowerBound < $1.lowerBound } 97 | 98 | // compact the list of ranges by joining together adjacent ranges 99 | return sortedranges.flatMapPairs { a, b in 100 | a.upperBound + 1 == b.lowerBound ? [a.lowerBound ... b.upperBound] : [a, b] 101 | } 102 | } 103 | 104 | if enumAndDictionary { 105 | print(generateEnumAndDictionary(properties)) 106 | print() 107 | } 108 | if constants { 109 | print(generateConstants(properties)) 110 | print() 111 | } 112 | } 113 | } 114 | 115 | Arguments.main() 116 | -------------------------------------------------------------------------------- /Tests/LinuxMain.swift: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Tests/LongTests/LongTests.swift: -------------------------------------------------------------------------------- 1 | 2 | import Patterns 3 | import XCTest 4 | 5 | class LongTests: XCTestCase { 6 | func testOr() { 7 | let char = letter / ascii / punctuation 8 | XCTAssert(type(of: "a" / char / "b") 9 | == OrPattern, OneOf>, Literal>.self, 10 | "'/' operator isn't optimizing OneOf's properly.") 11 | } 12 | 13 | func testNot() { 14 | XCTAssert( 15 | type(of: "a" • !letter • ascii • "b") == Concat, OneOf>, Literal>.self, 16 | "'•' operator isn't optimizing OneOf's properly.") 17 | } 18 | 19 | func testAnd() throws { 20 | XCTAssert( 21 | type(of: "a" • &&letter • ascii • "b") == Concat, OneOf>, Literal>.self, 22 | "'•' operator isn't optimizing OneOf's properly.") 23 | } 24 | 25 | func testOperatorPrecedence() throws { 26 | let p1 = "a" • Skip() • letter • !alphanumeric • "b"+ 27 | XCTAssert(type(of: p1.first.first.first.second) == Skip.self) 28 | XCTAssert(type(of: Literal("a") • "b" / "c" • "d") 29 | == OrPattern, Literal>, Concat, Literal>>.self, 30 | #"`/` should have lower precedence than `•`"#) 31 | } 32 | 33 | func testPlaygroundExample() throws { 34 | let text = #""" 35 | 0 0.0 0.01 36 | -0 +0 -0.0 +0.0 37 | -123.456e+00 -123.456E+00 -123.456e-00 -123.456E-00 38 | +123.456e+00 +123.456E+00 +123.456e-00 +123.456E-00 39 | 0 0.0 0.01 40 | -123e+12 -123e-12 41 | 123.456e+00 123.456E+00 42 | 0x123E 0x123e 43 | 0x0123456789abcdef 44 | 0b0 0b1 0b0000 0b0001 0b11110000 0b0000_1111 0b1010_00_11 45 | """# 46 | 47 | let unsigned = digit+ 48 | let sign = "-" / "+" 49 | let integer = Capture(name: "integer", sign¿ • unsigned) 50 | let hexa = Capture(name: "hexa", "0x" • hexDigit+) 51 | let binary = Capture(name: "binary", "0b" • OneOf("01") • OneOf("01_")*) 52 | let floating = Capture(name: "floating", integer • "." • unsigned) 53 | let scientific = floating • (("e" / "E") • integer)¿ 54 | let number = hexa / binary / floating / integer / unsigned / scientific 55 | 56 | let parser = try Parser(search: number) 57 | 58 | XCTAssertEqual(Array(parser.matches(in: text)).count, 44) 59 | } 60 | 61 | // from http://www.inf.puc-rio.br/~roberto/docs/peg.pdf, page 2 and 5 62 | static let pegGrammar = Grammar { g in 63 | //g.all <- g.pattern • !any 64 | g.pattern <- g.grammar / g.simplepatt 65 | g.grammar <- (g.nonterminal • "<-" • g.sp • g.simplepatt)+ 66 | g.simplepatt <- g.alternative • ("/" • g.sp • g.alternative)* 67 | g.alternative <- (OneOf("!&")¿ • g.sp • g.suffix)+ 68 | g.suffix <- g.primary • (OneOf("*+?") • g.sp)* 69 | let primaryPart1 = "(" • g.sp • g.pattern • ")" • g.sp / "." • g.sp / g.literal 70 | g.primary <- primaryPart1 / g.charclass / g.nonterminal • !"<-" 71 | g.literal <- "’" • (!"’" • any)* • "’" • g.sp 72 | g.charclass <- "[" • (!"]" • (any • "-" • any / any))* • "]" • g.sp 73 | g.nonterminal <- OneOf("a" ... "z", "A" ... "Z")+ • g.sp 74 | g.sp <- OneOf(" \t\n")* 75 | } 76 | 77 | static let pegGrammarParser = { try! Parser(pegGrammar) }() 78 | 79 | func testPEGGrammar() throws { 80 | // page 5 81 | let grammar1Text = """ 82 | pattern <- grammar / simplepatt 83 | grammar <- (nonterminal ’<-’ sp simplepatt)+ 84 | simplepatt <- alternative (’/’ sp alternative)* 85 | alternative <- ([!&]? sp suffix)+ 86 | suffix <- primary ([*+?] sp)* 87 | primary <- ’(’ sp pattern ’)’ sp / ’.’ sp / literal / charclass / nonterminal !’<-’ 88 | literal <- [’] (![’] .)* [’] sp 89 | charclass <- ’[’ (!’]’ (. ’-’ . / . ))* ’]’ sp 90 | nonterminal <- [a-zA-Z]+ sp 91 | sp <- [ \t\n]* 92 | """ 93 | XCTAssertEqual(Self.pegGrammarParser.match(in: grammar1Text)?.endIndex, grammar1Text.endIndex) 94 | 95 | // page 2 96 | let grammar2Text = """ 97 | grammar <- (nonterminal ’<-’ sp pattern)+ 98 | pattern <- alternative (’/’ sp alternative)* 99 | alternative <- ([!&]? sp suffix)+ 100 | suffix <- primary ([*+?] sp)* 101 | primary <- ’(’ sp pattern ’)’ sp / ’.’ sp / literal / charclass / nonterminal !’<-’ 102 | literal <- [’] (![’] .)* [’] sp 103 | charclass <- ’[’ (!’]’ (. ’-’ . / . ))* ’]’ sp 104 | nonterminal <- [a-zA-Z]+ sp 105 | sp <- [ \t\n]* 106 | """ 107 | XCTAssertEqual(Self.pegGrammarParser.match(in: grammar2Text)?.endIndex, grammar2Text.endIndex) 108 | } 109 | 110 | func testOriginalPEGGrammar() throws { 111 | try XCTSkipIf(true, "pegGrammar does not support escaping characters.") 112 | 113 | // https://bford.info/pub/lang/peg.pdf Page 2, Figure 1. 114 | let origPEGGrammarText = """ 115 | # Hierarchical syntax 116 | Grammar <- Spacing Definition+ EndOfFile 117 | Definition <- Identifier LEFTARROW Expression 118 | Expression <- Sequence (SLASH Sequence)* 119 | Sequence <- Prefix* 120 | Prefix <- (AND / NOT)? Suffix 121 | Suffix <- Primary (QUESTION / STAR / PLUS)? 122 | Primary <- Identifier !LEFTARROW / OPEN Expression CLOSE / Literal / Class / DOT 123 | 124 | # Lexical syntax 125 | Identifier <- IdentStart IdentCont* Spacing 126 | IdentStart <- [a-zA-Z_] 127 | IdentCont <- IdentStart / [0-9] 128 | Literal <- [’] (![’] Char)* [’] Spacing / ["] (!["] Char)* ["] Spacing 129 | Class <- ’[’ (!’]’ Range)* ’]’ Spacing 130 | Range <- Char ’-’ Char / Char 131 | Char <- ’\\’ [nrt’"[]\\] / ’\\’ [0-2][0-7][0-7] / ’\\’ [0-7][0-7]? / !’\\’ . 132 | LEFTARROW <- ’<-’ Spacing 133 | SLASH <- ’/’ Spacing 134 | AND <- ’&’ Spacing 135 | NOT <- ’!’ Spacing 136 | QUESTION <- ’?’ Spacing 137 | STAR <- ’*’ Spacing 138 | PLUS <- ’+’ Spacing 139 | OPEN <- ’(’ Spacing 140 | CLOSE <- ’)’ Spacing 141 | DOT <- ’.’ Spacing 142 | Spacing <- (Space / Comment)* 143 | Comment <- ’#’ (!EndOfLine .)* EndOfLine 144 | Space <- ’ ’ / ’\t’ / EndOfLine 145 | EndOfLine <- ’\r\n’ / ’\n’ / ’\r’ 146 | EndOfFile <- !. 147 | """ 148 | XCTAssertEqual(Self.pegGrammarParser.match(in: origPEGGrammarText)?.endIndex, origPEGGrammarText.endIndex) 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /Tests/PatternsTests/ConcatenationTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ConcatenationTests 3 | // 4 | // Created by Kåre Morstøl on 18/05/2018. 5 | // 6 | 7 | import Patterns 8 | import XCTest 9 | 10 | class ConcatenationTests: XCTestCase { 11 | func testSimple() throws { 12 | assertParseAll( 13 | Capture(Literal("a")¿ • "b"), 14 | input: "ibiiiiabiii", result: ["b", "ab"]) 15 | assertParseAll( 16 | Capture(Literal("a")¿ • Literal("b")), 17 | input: "ibiiaiiababiibi", result: ["b", "ab", "ab", "b"]) 18 | assertParseAll( 19 | Capture("b" • Literal("a")¿), 20 | input: "ibiiiibaiii", result: ["b", "ba"]) 21 | 22 | let p = Capture("ab" • digit • ".") 23 | assertParseAll(p, input: "$#%/ab8.lsgj", result: "ab8.", count: 1) 24 | assertParseAll(p, input: "$ab#%/ab8.lsgab3.j", result: ["ab8.", "ab3."]) 25 | assertParseAll(p, input: "$#%/ab8lsgj", count: 0) 26 | } 27 | 28 | func testRepeat() throws { 29 | let text = "This is 4 6 a test 123 text." 30 | assertParseAll( 31 | Capture(" " • digit* • " "), 32 | input: text, result: [" 4 ", " 123 "]) 33 | assertParseAll( 34 | " " • Capture(digit*) • " ", 35 | input: text, result: ["4", "6", "123"]) 36 | assertParseAll( 37 | Capture(digit • letter.repeat(0 ... 2)), 38 | input: "2a 35abz2", 39 | result: ["2a", "3", "5ab", "2"]) 40 | } 41 | 42 | func testCapture() throws { 43 | assertParseAll( 44 | Capture() • "a", 45 | input: "xaa xa", result: "", count: 3) 46 | assertParseAll( 47 | "x" • Capture() • "a", 48 | input: "xaxa xa", result: "", count: 3) 49 | assertParseAll( 50 | Capture() • "a", 51 | input: "xaa xa".utf8, result: "".utf8, count: 3) 52 | assertParseAll( 53 | "x" • Capture() • "a", 54 | input: "xaxa xa".unicodeScalars, result: "".unicodeScalars, count: 3) 55 | 56 | let text = "This is a test text." 57 | assertParseAll( 58 | " " • Capture(letter+) • " ", 59 | input: text, result: ["is", "a", "test"]) 60 | assertParseAll( 61 | Capture(letter+), 62 | input: text, result: ["This", "is", "a", "test", "text"]) 63 | assertParseAll( 64 | letter • Capture() • " ", 65 | input: text, result: "", count: 4) 66 | assertParseAll( 67 | " " • Capture("te"), 68 | input: text, result: "te", count: 2) 69 | 70 | XCTAssert(type(of: Capture()).Input == String.self) 71 | XCTAssert(type(of: "q" • Capture()).Input == String.self) 72 | XCTAssert(type(of: Literal("q".utf8) • Capture()).Input == String.UTF8View.self) 73 | } 74 | 75 | func testRepeatOrThenEndOfLine() throws { 76 | assertParseAll( 77 | Capture((alphanumeric / OneOf(" "))+ • Line.End()), 78 | input: "FMA026712 TECNOAUTOMOTRIZ ATLACOMULCO S", 79 | result: ["FMA026712 TECNOAUTOMOTRIZ ATLACOMULCO S"]) 80 | } 81 | 82 | func testMatchFullRange() throws { 83 | let text = """ 84 | line 1 85 | 86 | line 3 87 | line 4 88 | 89 | """ 90 | 91 | assertParseAll(Capture(Line()), input: text, 92 | result: ["line 1", "", "line 3", "line 4", ""]) 93 | } 94 | 95 | func testMatchBeginningOfLines() throws { 96 | let text = """ 97 | airs 98 | blip 99 | cera user 100 | dilled10 io 101 | """ 102 | let pattern = try Parser(search: Line.Start() • Capture()) 103 | 104 | let m = Array(pattern.matches(in: text)) 105 | XCTAssertEqual(m.map { text[$0.captures[0].range.lowerBound] }, ["a", "b", "c", "d"].map(Character.init)) 106 | 107 | XCTAssertEqual(pattern.matches(in: "\n\n").map { $0.captures[0] }.count, 3) 108 | } 109 | 110 | func testMatchEndOfLines() throws { 111 | let text = """ 112 | airs 113 | blip 114 | cera user 115 | dilled10 io 116 | 117 | """ 118 | 119 | var pattern = try Parser(search: Line.End() • Capture()) 120 | var m = pattern.matches(in: text) 121 | XCTAssertEqual(m.dropLast().map { text[$0.captures[0].range.lowerBound] }, 122 | Array(repeating: Character("\n"), count: 4)) 123 | 124 | pattern = try Parser(search: Capture() • Line.End()) 125 | m = pattern.matches(in: text) 126 | XCTAssertEqual(m.dropLast().map { text[$0.captures[0].range.lowerBound] }, 127 | Array(repeating: Character("\n"), count: 4)) 128 | } 129 | 130 | func testMultipleCaptures() throws { 131 | let text = """ 132 | There was a young woman named Bright, 133 | Whose speed was much faster than light. 134 | She set out one day, 135 | In a relative way, 136 | And returned on the previous night. 137 | """ 138 | 139 | let twoFirstWords = [["There", "was"], ["Whose", "speed"], ["She", "set"], ["In", "a"], ["And", "returned"]] 140 | let pattern = 141 | Line.Start() • Capture(name: "word", letter+) 142 | • " " • Capture(name: "word", letter+) 143 | 144 | assertCaptures(pattern, input: text, result: twoFirstWords) 145 | 146 | let matches = Array(try Parser(search: pattern).matches(in: text)) 147 | XCTAssertEqual(matches.map { text[$0[one: "word"]!] }, ["There", "Whose", "She", "In", "And"]) 148 | XCTAssertEqual(matches.map { $0[multiple: "word"].map { String(text[$0]) } }, twoFirstWords) 149 | XCTAssertNil(matches.first![one: "not a name"]) 150 | } 151 | 152 | let text = """ 153 | # ================================================ 154 | 155 | 0005..0010 ; Common # Cc [32] .. 156 | 002F ; Common # Zs SPACE 157 | """ 158 | 159 | lazy var rangeAndProperty: Parser = { 160 | let hexNumber = Capture(name: "codePoint", hexDigit+) 161 | let hexRange = AnyPattern("\(hexNumber)..\(hexNumber)") / hexNumber 162 | return try! Parser(search: AnyPattern("\n\(hexRange • Skip()); \(Capture(name: "property", Skip())) ")) 163 | }() 164 | 165 | func testStringInterpolation() throws { 166 | assertCaptures(rangeAndProperty, input: text, result: [["0005", "0010", "Common"], ["002F", "Common"]]) 167 | } 168 | 169 | func testAnyPattern() throws { 170 | let text = """ 171 | : Test Case '-[PerformanceTests.PerformanceTests testAnyNumeral]' measured [CPU Instructions Retired, kI] average: 6071231.970, relative standard deviation: 0.300%, values: [6125777.558000, 6066280.613000, 6064787.491000, 6063915.538000, 6066853.091000, 6063079.064000, 6068140.744000, 6064901.279000, 6064321.893000, 6064262.431000], performanceMetricID:com.apple.dt.XCTMetric_CPU.instructions_retired, baselineName: "", baselineAverage: , maxPercentRegression: 10.000%, maxPercentRelativeStandardDeviation: 10.000%, maxRegression: 0.000, maxStandardDeviation: 0.000 172 | 173 | """ 174 | let skip = Skip() 175 | let measurementPattern = AnyPattern(""" 176 | : Test Case '-[\(skip).\(Capture(name: "name", skip))]' measured [\(Capture(name: "measurementName", skip)), \(Capture(name: "measurementUnit", skip))] average: \(Capture(name: "average", skip)), relative standard deviation: \(Capture(name: "standardDeviation", skip))%\(skip), performanceMetricID:\(Capture(name: "measurementID", skip)),\(skip) 177 | 178 | """) 179 | assertParseAll(measurementPattern, input: text, count: 1) 180 | } 181 | 182 | func testMatchDecoding() throws { 183 | struct Property: Decodable, Equatable { 184 | let codePoint: [Int] 185 | let property: String 186 | let notCaptured: String? 187 | } 188 | 189 | let matches = Array(rangeAndProperty.matches(in: text)) 190 | let property = try matches.first!.decode(Property.self, from: text) 191 | XCTAssertEqual(property, Property(codePoint: [5, 10], property: "Common", notCaptured: nil)) 192 | 193 | XCTAssertThrowsError(try matches.last!.decode(Property.self, from: text)) 194 | } 195 | 196 | func testParserDecoding() { 197 | struct Property: Decodable, Equatable { 198 | let codePoint: [String] 199 | let property: String 200 | } 201 | 202 | XCTAssertEqual(try rangeAndProperty.decode([Property].self, from: text), 203 | [Property(codePoint: ["0005", "0010"], property: "Common"), 204 | Property(codePoint: ["002F"], property: "Common")]) 205 | XCTAssertEqual(try rangeAndProperty.decodeFirst(Property.self, from: text), 206 | Property(codePoint: ["0005", "0010"], property: "Common")) 207 | } 208 | } 209 | -------------------------------------------------------------------------------- /Tests/PatternsTests/GeneralTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // GeneralTests.swift 3 | // PatternsTests 4 | // 5 | // Created by Kåre Morstøl on 31/05/2019. 6 | // 7 | 8 | @testable import Patterns 9 | import XCTest 10 | 11 | class GeneralTests: XCTestCase { 12 | func testRangeOf() { 13 | let c = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] 14 | 15 | XCTAssertEqual(c.range(of: [4, 5, 6]), 4 ..< 7) 16 | XCTAssertEqual(c.range(of: [4, 5, 5]), nil) 17 | XCTAssertEqual(c.range(of: [0, 0, 0]), nil) 18 | XCTAssertEqual(c.range(of: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), nil) 19 | XCTAssertEqual(c.range(of: []), nil) 20 | XCTAssertEqual(c.range(of: [8, 9]), 8 ..< 10) 21 | XCTAssertEqual(c.range(of: [0]), 0 ..< 1) 22 | XCTAssertEqual(c[c.range(of: [0])!], [0]) 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /Tests/PatternsTests/GrammarTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // GrammarTests.swift 3 | // 4 | // 5 | // Created by Kåre Morstøl on 27/05/2020. 6 | // 7 | 8 | @testable import Patterns 9 | import XCTest 10 | 11 | class GrammarTests: XCTestCase { 12 | let grammar1: Grammar = { 13 | let g = Grammar() 14 | g.letter <- Capture(letter) 15 | g.space <- whitespace 16 | return g 17 | }() 18 | 19 | func testNamesAnonymousCaptures() { 20 | XCTAssertEqual((grammar1.patterns.first?.pattern.wrapped as? Capture>)?.name, "letter") 21 | } 22 | 23 | func testSetsFirstPattern() { 24 | XCTAssertEqual(grammar1.firstPattern, "letter") 25 | } 26 | 27 | func testDirectRecursion1() throws { 28 | let g = Grammar() 29 | g.a <- "a" / any • g.a 30 | let parser = try Parser(g) 31 | assertParseAll(parser, input: " aba", count: 2) 32 | } 33 | 34 | func testDirectRecursion2() throws { 35 | let g = Grammar() 36 | g.balancedParentheses <- "(" • (!OneOf("()") • any / g.balancedParentheses)* • ")" 37 | let parser = try Parser(g) 38 | assertParseAll(parser, input: "( )", count: 1) 39 | assertParseAll(parser, input: "((( )( )))", count: 1) 40 | assertParseAll(parser, input: "(( )", count: 0) 41 | } 42 | 43 | func testArithmetic() throws { 44 | let g = Grammar { g in 45 | g.all <- g.expr • !any 46 | g.expr <- g.sum 47 | g.sum <- g.product • (("+" / "-") • g.product)* 48 | g.product <- g.power • (("*" / "/") • g.power)* 49 | g.power <- g.value • ("^" • g.power)¿ 50 | g.value <- digit+ / "(" • g.expr • ")" 51 | } 52 | 53 | let p = try Parser(g) 54 | assertParseMarkers(p, input: "1+2-3*(4+3)|") 55 | assertParseAll(p, input: "1+2(", count: 0) 56 | } 57 | 58 | func testOptimisesTailCall() throws { 59 | let g = Grammar { g in 60 | g.a <- " " / Skip() • g.a 61 | } 62 | 63 | func isCall(_ inst: Instruction) -> Bool { 64 | switch inst { 65 | case .call: return true 66 | default: return false 67 | } 68 | } 69 | 70 | XCTAssertEqual(try Parser(g).matcher.instructions.filter(isCall(_:)).count, 1) 71 | XCTAssertEqual(try Parser(search: g).matcher.instructions.filter(isCall(_:)).count, 1) 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /Tests/PatternsTests/PatternTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PatternTests.swift 3 | // Patterns 4 | // 5 | // Created by Kåre Morstøl on 20/03/2017. 6 | // 7 | // 8 | 9 | import Patterns 10 | import XCTest 11 | 12 | let asciiDigit = OneOf(UInt8(ascii: "0") ... UInt8(ascii: "9")) 13 | let asciiLowercase = OneOf(UInt8(ascii: "a") ... UInt8(ascii: "z")) 14 | let asciiUppercase = OneOf(UInt8(ascii: "A") ... UInt8(ascii: "Z")) 15 | let asciiLetter = OneOf(asciiLowercase, asciiUppercase) 16 | 17 | class PatternTests: XCTestCase { 18 | func testLiteral() { 19 | assertParseAll(Capture("a"), input: "abcd", result: "a", count: 1) 20 | assertParseAll(Capture("b"), input: "abcdb", result: "b", count: 2) 21 | assertParseAll(Capture("ab"), input: "abcaba", result: "ab", count: 2) 22 | } 23 | 24 | func testLiteralUTF8() { 25 | assertParseAll(Capture(Literal("a".utf8)), input: "abcd".utf8, result: "a".utf8, count: 1) 26 | assertParseAll(Capture(Literal("b".utf8)), input: "abcdb".utf8, result: "b".utf8, count: 2) 27 | assertParseAll(Capture(Literal("ab".utf8)), input: "abcaba".utf8, result: "ab".utf8, count: 2) 28 | } 29 | 30 | func testOneOf() { 31 | let vowels = OneOf("aeiouAEIOU") 32 | assertParseAll(Capture(vowels), input: "I am, you are", result: ["I", "a", "o", "u", "a", "e"]) 33 | let notVowels = OneOf(not: "aeiouAEIOU") 34 | assertParseAll(Capture(notVowels), input: "I am, you are", result: [" ", "m", ",", " ", "y", " ", "r"]) 35 | 36 | let lowercaseASCII = OneOf(description: "lowercaseASCII") { character in 37 | character.isASCII && character.isLowercase 38 | } 39 | assertParseAll(Capture(lowercaseASCII), input: "aTæøåk☀️", result: ["a", "k"]) 40 | 41 | assertParseAll(digit, input: "ab12c3,d4", count: 4) 42 | 43 | assertParseAll(Capture(OneOf("a" ... "e")), 44 | input: "abgkxeryza", result: ["a", "b", "e", "a"]) 45 | assertParseAll(Capture(OneOf(not: "a" ..< "f")), 46 | input: "abgkxeryza", result: ["g", "k", "x", "r", "y", "z"]) 47 | } 48 | 49 | func testOneOfUTF8() { 50 | let vowels = OneOf("aeiouAEIOU".utf8) 51 | assertParseAll(Capture(vowels), input: "I am, you are".utf8, result: ["I", "a", "o", "u", "a", "e"].map { $0.utf8 }) 52 | let notVowels = OneOf(not: "aeiouAEIOU".utf8) 53 | assertParseAll(Capture(notVowels), input: "I am, you are".utf8, result: [" ", "m", ",", " ", "y", " ", "r"].map { $0.utf8 }) 54 | 55 | let lowercaseASCII = OneOf(description: "lowercaseASCII") { character in 56 | (UInt8(ascii: "a") ... UInt8(ascii: "z")).contains(character) 57 | } 58 | assertParseAll(Capture(lowercaseASCII), input: "aTæøåk☀️".utf8, result: ["a", "k"].map { $0.utf8 }) 59 | 60 | assertParseAll(Capture(OneOf(UInt8(ascii: "a") ... UInt8(ascii: "e"))), 61 | input: "abgkxeryza".utf8, result: ["a", "b", "e", "a"].map { $0.utf8 }) 62 | assertParseAll(Capture(OneOf(not: UInt8(ascii: "a") ..< UInt8(ascii: "f"))), 63 | input: "abgkxeryza".utf8, result: ["g", "k", "x", "r", "y", "z"].map { $0.utf8 }) 64 | 65 | // requires String.UTF8View to be ExpressibleByStringLiteral 66 | // assertParseAll(OneOf(".,"), input: "., ,".utf8, result: [".", ",", ","].map{$0.utf8}) 67 | } 68 | 69 | func testOneOfsMultiple() { 70 | assertParseAll(Capture(OneOf("a" ... "e", "xyz")), 71 | input: "abegkxryz", result: ["a", "b", "e", "x", "y", "z"]) 72 | assertParseAll(Capture(OneOf("a" ..< "e", "g", uppercase)), 73 | input: "aBcdefgh", result: ["a", "B", "c", "d", "g"]) 74 | 75 | assertParseAll(Capture(OneOf(not: "a" ... "e", "xyz")), 76 | input: "abegkxryz", result: ["g", "k", "r"]) 77 | assertParseAll(Capture(OneOf(not: "a" ..< "e", "g", uppercase)), 78 | input: "aBcdefgh", result: ["e", "f", "h"]) 79 | } 80 | 81 | func testOptional() throws { 82 | assertParseAll(letter • digit*, input: "123abc123d", count: 4) 83 | assertParseAll(Capture(digit¿ • letter), 84 | input: "123abc", result: ["3a", "b", "c"]) 85 | 86 | assertParseAll(asciiLetter • asciiDigit*, input: "123abc123d".utf8, count: 4) 87 | assertParseAll(Capture(asciiDigit¿ • asciiLetter), 88 | input: "123abc".utf8, result: ["3a", "b", "c"].map { $0.utf8 }) 89 | } 90 | 91 | func testRepeat() throws { 92 | assertParseAll(digit.repeat(2...), input: "12a1bc123", count: 2) 93 | assertParseAll(Capture(digit+), input: "123abc", result: "123", count: 1) 94 | assertParseAll(Capture(digit.repeat(3...)), input: "123abc", result: "123", count: 1) 95 | assertParseAll(digit.repeat(4...), input: "123abc", count: 0) 96 | 97 | assertParseAll(Capture(digit+), input: "a123abc123d", result: "123", count: 2) 98 | assertParseAll(digit+, input: "123abc09d4 8", count: 4) 99 | assertParseAll(Capture(digit.repeat(...2) • letter), 100 | input: "123abc09d4 8", result: ["23a", "b", "c", "09d"]) 101 | 102 | assertParseAll(Capture(digit.repeat(1 ... 2)), input: "123abc09d48", result: ["12", "3", "09", "48"]) 103 | 104 | assertParseAll(Capture(digit.repeat(2)), input: "1234 5 6 78", result: ["12", "34", "78"]) 105 | 106 | assertParseAll(Capture("a"* • "b"), input: "b aabb ab", result: ["b", "aab", "b", "ab"]) 107 | assertParseAll(Capture("a"*), input: "b aabb ab", result: ["", "", "aa", "", "", "", "a", "", ""]) 108 | 109 | assertParseAll( 110 | Capture((!newline • ascii)+), 111 | input: "123\n4567\n89", result: ["123", "4567", "89"]) 112 | 113 | XCTAssertEqual(digit+.description, "digit{1...}") 114 | } 115 | 116 | func testRepeatLiterals() throws { 117 | assertParseAll(Capture("a"+), input: "a aa aa", result: ["a", "aa", "aa"]) 118 | assertParseAll(Capture("a"+), input: "a aa aa".utf8, result: ["a", "aa", "aa"].map { $0.utf8 }) 119 | assertParseAll(Capture("a" • "a"*), input: "a aaa aa".utf16, result: ["a", "aaa", "aa"].map { $0.utf16 }) 120 | assertParseAll(Capture("a" • "a"¿), input: "a aa aa".unicodeScalars, result: ["a", "aa", "aa"].map { $0.unicodeScalars }) 121 | } 122 | 123 | func testOr() { 124 | assertParseAll(Capture("a" / "b"), input: "bcbd".utf16, result: "b".utf16, count: 2) 125 | let pattern = Capture("a" / "b") 126 | assertParseAll(pattern, input: "acdaa", result: "a", count: 3) 127 | assertParseAll(pattern, input: "abcdb", count: 3) 128 | } 129 | 130 | func testOrWithCapture() throws { 131 | let text = """ 132 | # Total code points: 88 133 | 134 | # ================================================ 135 | 136 | 0780..07A5 ; Thaana # Lo [38] THAANA LETTER HAA..THAANA LETTER WAAVU 137 | 07B1 ; Thaana # Lo THAANA LETTER NAA 138 | 139 | """ 140 | 141 | let hexNumber = Capture(hexDigit+) 142 | let hexRange = (hexNumber • ".." • hexNumber) / hexNumber 143 | let rangeAndProperty = Line.Start() • hexRange • Skip() • "; " • Capture(Skip()) • " " 144 | 145 | assertCaptures(rangeAndProperty, input: text, 146 | result: [["0780", "07A5", "Thaana"], ["07B1", "Thaana"]]) 147 | } 148 | 149 | func testLineStart() throws { 150 | let text = """ 151 | line 1 152 | line 2 153 | line 3 154 | line 4 155 | """ 156 | let pattern = Line.Start() 157 | assertParseAll(pattern, input: "", result: "", count: 1) 158 | assertParseAll(pattern, input: "\n", count: 2) 159 | assertParseAll(pattern, input: text, result: "", count: 4) 160 | assertParseAll( 161 | Line.Start() • Capture(Skip()) • " ", 162 | input: text, result: "line", count: 4) 163 | assertParseAll( 164 | Capture(Line.Start() • "line"), 165 | input: text, result: "line", count: 4) 166 | assertParseAll( 167 | Capture(digit • Skip() • Line.Start() • "l"), 168 | input: text, result: ["1\nl", "2\nl", "3\nl"]) 169 | 170 | /* TODO: Implement? 171 | XCTAssertThrowsError(Line.start • Line.start) 172 | XCTAssertThrowsError(Line.start • Capture(Line.start)) 173 | */ 174 | XCTAssertNoThrow(Line.Start() • Skip() • Line.Start()) 175 | } 176 | 177 | func testLineEnd() throws { 178 | let pattern = Line.End() 179 | assertParseAll(pattern, input: "", result: "", count: 1) 180 | assertParseAll(pattern, input: "\n", count: 2) 181 | assertParseAll(pattern, input: "\n\n", count: 3) 182 | 183 | let text = """ 184 | line 1 185 | line 2 186 | line 3 187 | line 4 188 | """ 189 | assertParseAll(pattern, input: text, count: 4) 190 | assertParseAll( 191 | " " • Capture(Skip()) • Line.End(), 192 | input: text, result: ["1", "2", "3", "4"]) 193 | assertParseAll( 194 | Capture(digit • Line.End()), 195 | input: text, result: ["1", "2", "3", "4"]) 196 | assertParseAll( 197 | Capture(digit • Line.End() • Skip() • "l"), 198 | input: text, result: ["1\nl", "2\nl", "3\nl"]) 199 | 200 | // TODO: Implement? 201 | // XCTAssertThrowsError(Line.end • Line.end) 202 | // XCTAssertThrowsError(Line.end • Capture(Line.end)) 203 | 204 | XCTAssertNoThrow(try Parser(search: Line.End() • Skip() • Line.End())) 205 | 206 | assertParseAll(Line.End(), input: "\n", count: 2) 207 | } 208 | 209 | func testLineEndUTF8_16_UnicodeScalars() throws { 210 | let pattern = Line.End() 211 | assertParseAll(pattern, input: "".utf16, result: "".utf16, count: 1) 212 | assertParseAll(pattern, input: "\n".utf16, count: 2) 213 | assertParseAll(pattern, input: "\n\n".utf16, count: 3) 214 | 215 | let text = """ 216 | line 1 217 | line 2 218 | line 3 219 | line 4 220 | """.utf8 221 | assertParseAll(Line.End(), input: text, count: 4) 222 | assertParseAll( 223 | " " • Capture(Skip()) • Line.End(), 224 | input: text, result: ["1", "2", "3", "4"].map { $0.utf8 }) 225 | assertParseAll( 226 | Capture(asciiDigit • Line.End()), 227 | input: text, result: ["1", "2", "3", "4"].map { $0.utf8 }) 228 | assertParseAll( 229 | Capture(asciiDigit • Line.End() • Skip() • "l"), 230 | input: text, result: ["1\nl", "2\nl", "3\nl"].map { $0.utf8 }) 231 | 232 | assertParseAll(Line.End(), input: "\n".unicodeScalars, count: 2) 233 | } 234 | 235 | func testLine() throws { 236 | let text = """ 237 | line 1 238 | 239 | line 3 240 | line 4 241 | 242 | """ 243 | 244 | assertParseAll(Capture(Line()), input: text, result: ["line 1", "", "line 3", "line 4", ""]) 245 | assertParseAll(Capture(Line()), input: text.utf8, result: ["line 1", "", "line 3", "line 4", ""].map { $0.utf8 }) 246 | } 247 | 248 | func testWordBoundary() throws { 249 | let pattern = Word.Boundary() 250 | assertParseMarkers(pattern, input: #"|I| |said| |"|hello|"|"#) 251 | assertParseMarkers(pattern, input: "|this| |I| |-|3,875.08| |can't|,| |you| |letter|-|like|.| |And|?| |then|") 252 | } 253 | 254 | func testNot() throws { 255 | assertParseMarkers(!alphanumeric, input: #"I| said|,| 3|"#) 256 | assertParseAll( 257 | Capture(Word.Boundary() • !digit • alphanumeric+), 258 | input: "123 abc 1ab a32b", 259 | result: ["abc", "a32b"]) 260 | assertParseAll( 261 | Word.Boundary() • Capture(!digit • alphanumeric+), 262 | input: "123 abc 1ab a32b", 263 | result: ["abc", "a32b"]) 264 | assertParseAll( 265 | Capture(!"abc" • letter+), 266 | input: "ab abc abcd efg", 267 | result: ["ab", "bc", "bcd", "efg"]) 268 | 269 | func any() -> OneOf { OneOf(description: "any", contains: { _ in true }) } 270 | 271 | assertParseAll( 272 | Capture(!"abc" • !" " • any()), 273 | input: "ab abc abcd ".utf8, 274 | result: ["a", "b", "b", "c", "b", "c", "d"].map { $0.utf8 }) 275 | 276 | assertParseAll( 277 | Capture(" " • (!OneOf(" ")).repeat(2) • "d"), // repeat a parser of length 0. 278 | input: " d cd", result: [" d"]) 279 | 280 | assertParseMarkers(!any(), input: " |") // EOF 281 | assertParseMarkers(try Parser(!any()), input: "|") 282 | } 283 | 284 | func testAnd() throws { 285 | assertParseAll(Capture(&&letter • ascii), input: "1abøcæ", result: ["a", "b", "c"]) 286 | assertParseAll(Capture(&&Line.Start() • "a"), input: "abø\ncæa\na".utf8, result: "a".utf8, count: 2) 287 | 288 | // find last occurence of "xuxu", even if it overlaps with itself. 289 | assertParseMarkers(try Parser(Grammar { g in g.last <- &&"xuxu" • any / any • g.last }+ • any.repeat(3)), 290 | input: "xuxuxuxu|i") 291 | } 292 | 293 | func testReadmeExample() throws { 294 | let text = "This is a point: (43,7), so is (0, 5). But my final point is (3,-1)." 295 | 296 | let number = ("+" / "-" / "") • digit+ 297 | let point = "(" • Capture(name: "x", number) 298 | • "," • " "¿ • Capture(name: "y", number) • ")" 299 | 300 | struct Point: Codable, Equatable { 301 | let x, y: Int 302 | } 303 | 304 | let points = try Parser(search: point).decode([Point].self, from: text) 305 | XCTAssertEqual(points, [Point(x: 43, y: 7), Point(x: 0, y: 5), Point(x: 3, y: -1)]) 306 | 307 | assertCaptures(point, input: text, result: [["43", "7"], ["0", "5"], ["3", "-1"]]) 308 | } 309 | 310 | func testReadme1() throws { 311 | let l = OneOf(description: "ten") { character in 312 | character.wholeNumberValue == 10 313 | } 314 | 315 | let arithmetic = Grammar { g in 316 | g.all <- g.expr • !any 317 | g.expr <- g.sum 318 | g.sum <- g.product • (("+" / "-") • g.product)* 319 | g.product <- g.power • (("*" / "/") • g.power)* 320 | g.power <- g.value • ("^" • g.power)¿ 321 | g.value <- digit+ / "(" • g.expr • ")" 322 | } 323 | 324 | let parser = try Parser(search: l) 325 | for match in parser.matches(in: "text") { 326 | _ = match 327 | // ... 328 | } 329 | 330 | _ = arithmetic 331 | } 332 | 333 | func testReadme2() throws { 334 | let text = "This is a point: (43,7), so is (0, 5). But my final point is (3,-1)." 335 | 336 | let number = ("+" / "-" / "") • digit+ 337 | let point = "(" • Capture(name: "x", number) 338 | • "," • " "¿ • Capture(name: "y", number) • ")" 339 | 340 | struct Point: Codable { 341 | let x, y: Int 342 | } 343 | 344 | let parser = try Parser(search: point) 345 | let points = try parser.decode([Point].self, from: text) 346 | 347 | let pointsAsSubstrings = parser.matches(in: text).map { match in 348 | (text[match[one: "x"]!], text[match[one: "y"]!]) 349 | } 350 | 351 | _ = (points, pointsAsSubstrings) 352 | } 353 | 354 | func testReadme3() throws { 355 | let text = "This is a point: (43,7), so is (0, 5). But my final point is (3,-1).".utf8 356 | 357 | let digit = OneOf(UInt8(ascii: "0") ... UInt8(ascii: "9")) 358 | let number = ("+" / "-" / "") • digit+ 359 | let point = "(" • Capture(name: "x", number) 360 | • "," • " "¿ • Capture(name: "y", number) • ")" 361 | 362 | struct Point: Codable { 363 | let x, y: Int 364 | } 365 | 366 | let parser = try Parser(search: point) 367 | let pointsAsSubstrings = parser.matches(in: text).map { match in 368 | (text[match[one: "x"]!], text[match[one: "y"]!]) 369 | } 370 | 371 | _ = pointsAsSubstrings 372 | } 373 | } 374 | -------------------------------------------------------------------------------- /Tests/PatternsTests/SkipTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // SkipTests.swift 3 | // 4 | // 5 | // Created by Kåre Morstøl on 26/06/2020. 6 | // 7 | 8 | import Patterns 9 | import XCTest 10 | 11 | class SkipTests: XCTestCase { 12 | func testSimple() throws { 13 | let text = "This is a test text." 14 | assertParseAll( 15 | Capture(" " • Skip() • " "), 16 | input: text, result: [" is ", " test "]) 17 | 18 | assertParseAll( 19 | Capture(" " • Skip() • "d"), 20 | input: " ad d", result: [" ad", " d"]) 21 | } 22 | 23 | func testWithCapture() throws { 24 | let text = "This is a test text." 25 | assertParseAll( 26 | " " • Capture(letter • Skip()) • " ", 27 | input: text, result: ["is", "a", "test"]) 28 | assertParseAll( 29 | " " • Capture(Skip() • letter+) • " ", 30 | input: text, result: ["is", "a", "test"]) 31 | let p = " " • Capture(Skip()) • " " 32 | assertParseAll( 33 | p, 34 | input: text, result: ["is", "a", "test"]) 35 | assertParseAll( 36 | " " • Capture(Skip()) • " ", 37 | input: text.utf8, result: ["is", "a", "test"].map { $0.utf8 }) 38 | 39 | let lines = """ 40 | 1 41 | 2 42 | 43 | 3 44 | """ 45 | assertParseAll(Line.Start() • Capture(Skip()) • Line.End(), 46 | input: lines, result: ["1", "2", "", "3"]) 47 | assertParseAll(Capture(Line.Start() • Skip() • Line.End()), 48 | input: lines, result: ["1", "2", "", "3"]) 49 | } 50 | 51 | func testInsideOptional() throws { 52 | assertParseMarkers((Skip() • " ")¿, input: "This |is |a |test |t|e|x|t|.|") 53 | assertParseMarkers((Skip() • " ")+, input: "This is a test |text.") 54 | assertParseMarkers(Skip() • " ", input: "This |is |a |test |text.") 55 | 56 | assertParseMarkers((" " • Skip())¿ • letter, input: "T|h|i|s| i|s|") 57 | assertParseMarkers((" " • Skip())+ • letter, input: "This i|s a| t|est t|ext.") 58 | assertParseMarkers(" " • Skip() • letter, input: "This i|s a| t|est t|ext.") 59 | } 60 | 61 | func testInsideChoice() { 62 | assertParseMarkers((Skip() • " ") / letter, input: "This |is |a |test |t|e|x|t|.") 63 | assertParseMarkers((" " • Skip()) / letter, input: "T|h|i|s| |i|s|") 64 | assertParseMarkers(letter / (" " • Skip()), input: "T|h|i|s| |i|s|") 65 | assertParseMarkers(letter / (Skip() • " "), input: "T|h|i|s|, |i|s| |") 66 | } 67 | 68 | func testDoubleSkip() throws { 69 | assertParseMarkers(try Parser(Skip() • Skip() • " "), input: "This |is") 70 | } 71 | 72 | func testAtTheEnd() throws { 73 | assertParseMarkers(" " • Skip(), input: "a |bee") 74 | assertParseAll(" " • Capture(Skip()), input: "a bee", result: [""]) 75 | 76 | // used in documentation for Skip. 77 | 78 | let s = Skip() 79 | assertParseMarkers(try Parser(s • " "), input: "jfd | |jlj |") 80 | 81 | let g = Grammar { g in 82 | g.nextSpace <- g.skip • " " 83 | g.skip <- Skip() // Does not work. 84 | } 85 | assertParseMarkers(try Parser(g), input: "sdf ksj") 86 | } 87 | 88 | func testBeforeGrammarTailCall() throws { 89 | let recursive = Grammar { g in 90 | g.a <- " " • Skip() • g.a 91 | } 92 | assertParseAll(recursive, input: "This is a test text.", count: 0) 93 | 94 | let callAnother = Grammar { g in 95 | g.a <- " " • Skip() • g.b 96 | g.b <- letter 97 | } 98 | assertParseMarkers(callAnother, input: "This i|s a| t|est t|ext.") 99 | assertParseMarkers(" " • Skip() • letter, input: "This i|s a| t|est t|ext.") 100 | } 101 | 102 | func testBeforeGrammarCallInChoice() { 103 | let g = Grammar { g in 104 | g.a <- " " • (Skip() • g.a / letter) 105 | } 106 | assertParseMarkers(g, input: "This is a test t|ext.") 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /Tests/PatternsTests/TestHelpers.swift: -------------------------------------------------------------------------------- 1 | // 2 | // File.swift 3 | // Patterns 4 | // 5 | // Created by Kåre Morstøl on 29.07.2018. 6 | // 7 | 8 | import Foundation 9 | @testable import Patterns 10 | import XCTest 11 | 12 | extension Array where Element: Hashable { 13 | func difference(from other: [Element]) -> [Element] { 14 | let thisSet = Set(self) 15 | let otherSet = Set(other) 16 | return Array(thisSet.symmetricDifference(otherSet)) 17 | } 18 | } 19 | 20 | extension Parser { 21 | func ranges(in input: Input, from startindex: Input.Index? = nil) 22 | -> AnySequence> { 23 | AnySequence(matches(in: input, from: startindex).lazy.map { $0.range }) 24 | } 25 | } 26 | 27 | extension XCTestCase { 28 | func XCTAssertEqualElements 29 | (_ seq1: @autoclosure () throws -> S1, 30 | _ seq2: @autoclosure () throws -> S2, 31 | _ message: @autoclosure () -> String = "", 32 | file: StaticString = #filePath, line: UInt = #line) 33 | where S1.Element: Sequence, S2.Element: Sequence, S1.Element.Element: Equatable, 34 | S1.Element.Element == S2.Element.Element { 35 | do { 36 | let seq1 = try seq1() 37 | let seq2 = try seq2() 38 | if !seq1.elementsEqual(seq2, by: { $0.elementsEqual($1) }) { 39 | XCTFail("'\(seq1)' and '\(seq2)' are not equal.") 40 | } 41 | } catch { 42 | XCTFail(String(describing: error)) 43 | } 44 | } 45 | 46 | func assertParseAll 47 | (_ parser: Parser, input: Input, result: [Input], file: StaticString = #filePath, line: UInt = #line) 48 | where Input.Element: Hashable { 49 | let parsed = parser.ranges(in: input).map { input[$0] } 50 | XCTAssertEqualElements(parsed, result, file: file, line: line) 51 | } 52 | 53 | func assertParseAll 54 | (_ pattern: P, input: P.Input, result: [P.Input], file: StaticString = #filePath, line: UInt = #line) { 55 | do { 56 | let parser = try Parser(search: pattern) 57 | assertParseAll(parser, input: input, result: result, file: file, line: line) 58 | } catch { 59 | XCTFail("\(error)", file: file, line: line) 60 | } 61 | } 62 | 63 | func assertParseAll 64 | (_ parser: Parser, input: Input, result: Input? = nil, count: Int, file: StaticString = #filePath, line: UInt = #line) 65 | where Input.Element: Hashable { 66 | if let result = result { 67 | assertParseAll(parser, input: input, result: Array(repeating: result, count: count), file: file, line: line) 68 | return 69 | } else { 70 | let parsedCount = parser.matches(in: input).reduce(into: 0) { count, _ in count += 1 } 71 | XCTAssertEqual(parsedCount, count, "Incorrect count.", file: file, line: line) 72 | } 73 | } 74 | 75 | func assertParseAll(_ pattern: P, input: P.Input, result: P.Input? = nil, count: Int, 76 | file: StaticString = #filePath, line: UInt = #line) { 77 | do { 78 | let parser = try Parser(search: pattern) 79 | assertParseAll(parser, input: input, result: result, count: count, file: file, line: line) 80 | } catch { 81 | XCTFail("\(error)", file: file, line: line) 82 | } 83 | } 84 | 85 | fileprivate func processMarkers(_ string: String, marker: Character = "|") -> (String, [String.Index]) { 86 | var indices = [String.Index]() 87 | var string = string 88 | 89 | while var i = string.firstIndex(of: marker) { 90 | string.remove(preservingIndex: &i) 91 | indices.append(i) 92 | } 93 | return (string, indices) 94 | } 95 | 96 | func assertParseMarkers(_ pattern: P, input: String, 97 | file: StaticString = #filePath, line: UInt = #line) where P.Input == String { 98 | assertParseMarkers(try! Parser(search: pattern), input: input, file: file, line: line) 99 | } 100 | 101 | func assertParseMarkers(_ pattern: Parser, input: String, 102 | file: StaticString = #filePath, line: UInt = #line) { 103 | let (string, correct) = processMarkers(input) 104 | let parsedRanges = Array(pattern.ranges(in: string)) 105 | XCTAssert(parsedRanges.allSatisfy { $0.isEmpty }, "Not all results are empty ranges", 106 | file: file, line: line) 107 | let parsed = parsedRanges.map { $0.lowerBound } 108 | let notParsed = Set(correct).subtracting(parsed).sorted() 109 | if !notParsed.isEmpty { 110 | XCTFail("\nThese positions were not parsed:\n" + string.underlineIndices(notParsed), 111 | file: file, line: line) 112 | } 113 | let incorrectlyParsed = Set(parsed).subtracting(correct).sorted() 114 | if !incorrectlyParsed.isEmpty { 115 | XCTFail("\nThese positions were incorrectly parsed:\n" + string.underlineIndices(incorrectlyParsed), 116 | file: file, line: line) 117 | } 118 | } 119 | 120 | func assertCaptures(_ pattern: P, input: String, result: [[String]], 121 | file: StaticString = #filePath, line: UInt = #line) where P.Input == String { 122 | assertCaptures(try! Parser(search: pattern), input: input, result: result, file: file, line: line) 123 | } 124 | 125 | func assertCaptures(_ pattern: Parser, input: String, result: [[String]], 126 | file: StaticString = #filePath, line: UInt = #line) { 127 | let matches = Array(pattern.matches(in: input)) 128 | let output = matches.map { match in match.captures.map { String(input[$0.range]) } } 129 | XCTAssertEqual(output, result, file: file, line: line) 130 | } 131 | } 132 | 133 | extension RangeReplaceableCollection where Self: BidirectionalCollection { 134 | @discardableResult mutating func remove(preservingIndex i: inout Self.Index) -> Self.Element { 135 | guard i != startIndex else { 136 | defer { i = startIndex } 137 | return remove(at: i) 138 | } 139 | let before = self.index(before: i) 140 | defer { i = index(after: before) } 141 | return remove(at: i) 142 | } 143 | } 144 | 145 | func getLocalURL(for path: String, file: String = #filePath) -> URL { 146 | URL(fileURLWithPath: file) 147 | .deletingLastPathComponent().appendingPathComponent(path) 148 | } 149 | 150 | extension StringProtocol where Index == String.Index { 151 | func underlineIndices(_ indices: [Index]) -> String { 152 | let marker: Character = "\u{0332}" 153 | var result = String(self) 154 | for index in indices.reversed() { 155 | if index == endIndex { 156 | result.append(" \(marker)") 157 | break 158 | } 159 | result.insert(marker, at: result.index(after: index)) 160 | } 161 | return result 162 | } 163 | } 164 | 165 | func debugVM(_ instructions: C, thread: VMEngine.Thread, input: Input) 166 | where C.Index == Int, Input: StringProtocol, Input: RangeReplaceableCollection { 167 | for i in instructions.indices { 168 | print(i == thread.instructionIndex ? ">" : " ", terminator: "") 169 | print("\(i)".padding(toLength: 2, withPad: " ", startingAt: 0), instructions[i]) 170 | } 171 | print(input.showIndex(thread.inputIndex)) 172 | print() 173 | } 174 | 175 | extension StringProtocol where Self: RangeReplaceableCollection { 176 | func showIndex(_ index: Index) -> Self { 177 | var result = self 178 | result.insert("_", at: index) 179 | return result 180 | } 181 | } 182 | -------------------------------------------------------------------------------- /Tests/PerformanceTests/StringTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PerformanceTests.swift 3 | // PatternsTests 4 | // 5 | // Created by Kåre Morstøl on 31/05/2019. 6 | // 7 | 8 | import Foundation 9 | import Patterns 10 | import XCTest 11 | 12 | // Note: the hits parameter to speedTest doesn't necessarily mean the _correct_ number of hits. 13 | // It's just there to notify us when the number of hits changes. 14 | 15 | class StringTests: XCTestCase { 16 | func speedTest(_ pattern: Parser, testFile: String = "Long.txt", textFraction: Int = 1, hits: Int, 17 | file: StaticString = #filePath, line: UInt = #line) throws { 18 | let fulltext = try String(contentsOf: getLocalURL(for: testFile)) 19 | let text = String(fulltext.prefix(fulltext.count / textFraction)) 20 | var result = 0 21 | let block = { 22 | result = pattern.matches(in: text).reduce(into: 0) { c, _ in c += 1 } 23 | } 24 | #if DEBUG 25 | block() 26 | #else 27 | if #available(OSX 10.15, *) { 28 | let options = XCTMeasureOptions() 29 | options.iterationCount = 10 30 | self.measure(metrics: [XCTCPUMetric(limitingToCurrentThread: true)], options: options, block: block) 31 | } else { 32 | self.measure(block) 33 | } 34 | #endif 35 | XCTAssertEqual(result, hits, file: file, line: line) 36 | } 37 | 38 | func testWordBoundary() throws { 39 | let pattern = try Parser(search: Word.Boundary()) 40 | try speedTest(pattern, textFraction: 16, hits: 79081) 41 | } 42 | 43 | func testWordBoundaryManyLanguages() throws { 44 | let pattern = try Parser(search: Word.Boundary()) 45 | try speedTest(pattern, testFile: "Multi-language-short.txt", hits: 49801) 46 | } 47 | 48 | func testUppercaseWord() throws { 49 | let pattern = try Parser(search: Word.Boundary() • uppercase+ • Word.Boundary()) 50 | try speedTest(pattern, textFraction: 2, hits: 3275) 51 | } 52 | 53 | func testLine() throws { 54 | let pattern = try Parser(search: Line.Start() • Capture(Skip()) • Line.End()) 55 | try speedTest(pattern, textFraction: 2, hits: 7260) 56 | } 57 | 58 | func testNotNewLine() throws { 59 | let pattern = try Parser(search: "," • Capture(Skip()) • Line.End()) 60 | try speedTest(pattern, textFraction: 2, hits: 4933) 61 | } 62 | 63 | func testLiteralSearch() throws { 64 | let pattern = try Parser(search: Literal("Prince")) 65 | try speedTest(pattern, textFraction: 1, hits: 2168) 66 | } 67 | 68 | func testGrammarLiteralSearch() throws { 69 | func any() -> OneOf { OneOf(description: "any", contains: { _ in true }) } 70 | 71 | let g = Grammar() 72 | g.a <- Capture("Prince") / any() • g.a 73 | let pattern = try Parser(g) 74 | try speedTest(pattern, textFraction: 13, hits: 260) 75 | } 76 | 77 | func testNonExistentLiteralSearch() throws { 78 | let pattern = try Parser(search: "\n" • Skip() • "DOESN'T EXIST") 79 | try speedTest(pattern, textFraction: 1, hits: 0) 80 | } 81 | 82 | func testOptionalStringFollowedByNonOptionalString() throws { 83 | let pattern = try Parser(search: "\""¿ • "I") 84 | try speedTest(pattern, textFraction: 12, hits: 814) 85 | } 86 | 87 | func testOneOrMore() throws { 88 | let pattern = try Parser(search: Capture(ascii+)) 89 | try speedTest(pattern, textFraction: 8, hits: 6041) 90 | } 91 | 92 | func testSkipping1() throws { 93 | // [ word.boundary ] * " " * ":" * " " * " " * " " * "{" * Line.end 94 | let pattern = try Parser(search: "." • Skip() • " " • Skip() • " ") 95 | try speedTest(pattern, textFraction: 2, hits: 13939) 96 | } 97 | 98 | func testAnyNumeral() throws { 99 | /* An advanced regular expression that matches any numeral: 100 | [+-]? 101 | (\d+(\.\d+)?) 102 | | 103 | (\.\d+) 104 | ([eE][+-]?\d+)? 105 | */ 106 | 107 | let digits = digit+ 108 | let pattern = try Parser(search: 109 | OneOf("+-")¿ 110 | • (digits • ("." • digits)¿) 111 | / 112 | ("." • digits) 113 | • (OneOf("eE") • OneOf("+-")¿ • digits)¿) 114 | try speedTest(pattern, textFraction: 16, hits: 11) 115 | } 116 | 117 | func testContainsClosure() throws { 118 | let pattern = try Parser(search: Word.Boundary() • (alphanumeric / symbol)) 119 | try speedTest(pattern, textFraction: 16, hits: 35643) 120 | } 121 | } 122 | 123 | func getLocalURL(for path: String, file: String = #filePath) -> URL { 124 | URL(fileURLWithPath: file) 125 | .deletingLastPathComponent().appendingPathComponent(path) 126 | } 127 | -------------------------------------------------------------------------------- /Tests/PerformanceTests/UTF8Tests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // File.swift 3 | // 4 | // 5 | // Created by Kåre Morstøl on 19/08/2020. 6 | // 7 | 8 | import Foundation 9 | import Patterns 10 | import XCTest 11 | 12 | // Note: the hits parameter to speedTest doesn't necessarily mean the _correct_ number of hits. 13 | // It's just there to notify us when the number of hits changes. 14 | 15 | class UTF8Tests: XCTestCase { 16 | func speedTest(_ pattern: Parser, testFile: String = "Long.txt", textFraction: Int = 1, hits: Int, 17 | file: StaticString = #filePath, line: UInt = #line) throws { 18 | let fulltext = try String(contentsOf: getLocalURL(for: testFile)) 19 | let text = String(fulltext.prefix(fulltext.count / textFraction)).utf8 20 | var result = 0 21 | let block = { 22 | result = pattern.matches(in: text).reduce(into: 0) { c, _ in c += 1 } 23 | } 24 | #if DEBUG 25 | block() 26 | #else 27 | if #available(OSX 10.15, *) { 28 | let options = XCTMeasureOptions() 29 | options.iterationCount = 10 30 | self.measure(metrics: [XCTCPUMetric(limitingToCurrentThread: true)], options: options, block: block) 31 | } else { 32 | self.measure(block) 33 | } 34 | #endif 35 | XCTAssertEqual(result, hits, file: file, line: line) 36 | } 37 | 38 | func testLine() throws { 39 | let pattern = try Parser(search: Line.Start() • Capture(Skip()) • Line.End()) 40 | try speedTest(pattern, textFraction: 2, hits: 7260) 41 | } 42 | 43 | func testNotNewLine() throws { 44 | let pattern = try Parser(search: "," • Capture(Skip()) • Line.End()) 45 | try speedTest(pattern, textFraction: 2, hits: 4933) 46 | } 47 | 48 | func testLiteralSearch() throws { 49 | let pattern = try Parser(search: Literal("Prince")) 50 | try speedTest(pattern, textFraction: 1, hits: 2168) 51 | } 52 | 53 | func testGrammarLiteralSearch() throws { 54 | func any() -> OneOf { OneOf(description: "any", contains: { _ in true }) } 55 | 56 | let g = Grammar() 57 | g.a <- Capture("Prince") / any() • g.a 58 | let pattern = try Parser(g) 59 | try speedTest(pattern, textFraction: 13, hits: 260) 60 | } 61 | 62 | func testNonExistentLiteralSearch() throws { 63 | let pattern = try Parser(search: "\n" • Skip() • "DOESN'T EXIST") 64 | try speedTest(pattern, textFraction: 1, hits: 0) 65 | } 66 | 67 | func testOptionalStringFollowedByNonOptionalString() throws { 68 | let pattern = try Parser(search: Literal("\"")¿ • "I") 69 | try speedTest(pattern, textFraction: 12, hits: 814) 70 | } 71 | 72 | func testSkipping1() throws { 73 | // [ word.boundary ] * " " * ":" * " " * " " * " " * "{" * Line.end 74 | let pattern = try Parser(search: "." • Skip() • " " • Skip() • " ") 75 | try speedTest(pattern, textFraction: 2, hits: 13939) 76 | } 77 | } 78 | --------------------------------------------------------------------------------