├── .editorconfig ├── .github ├── FUNDING.yml └── workflows │ └── ci.yml ├── .gitignore ├── .gitmodules ├── .swift-format ├── .swiftpm └── xcode │ └── xcshareddata │ └── xcschemes │ └── Flexer.xcscheme ├── CODE_OF_CONDUCT.md ├── LICENSE ├── Package.swift ├── README.md ├── Sources └── Flexer │ ├── BasicTextCharacterLexer.swift │ ├── CharacterSet+Character.swift │ ├── LookAheadIteratorProtocol.swift │ ├── LookAheadSequence.swift │ ├── LookAheadSequenceReference.swift │ └── TokenProtocol.swift └── Tests └── FlexerTests ├── BasicTextCharacterLexerTests.swift ├── ExampleLexerTests.swift └── LookAheadSequenceReferenceTests.swift /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = tab 5 | end_of_line = lf 6 | charset = utf-8 7 | trim_trailing_whitespace = true 8 | insert_final_newline = true 9 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: [mattmassicotte] 2 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | paths-ignore: 8 | - 'README.md' 9 | - 'CODE_OF_CONDUCT.md' 10 | - '.editorconfig' 11 | - '.spi.yml' 12 | pull_request: 13 | branches: 14 | - main 15 | 16 | concurrency: 17 | group: ${{ github.workflow }}-${{ github.head_ref || github.ref }} 18 | cancel-in-progress: true 19 | 20 | jobs: 21 | test: 22 | name: Test 23 | runs-on: macOS-15 24 | timeout-minutes: 30 25 | env: 26 | DEVELOPER_DIR: /Applications/Xcode_16.3.app 27 | strategy: 28 | matrix: 29 | destination: 30 | - "platform=macOS" 31 | - "platform=macOS,variant=Mac Catalyst" 32 | - "platform=iOS Simulator,name=iPhone 16" 33 | - "platform=tvOS Simulator,name=Apple TV" 34 | - "platform=watchOS Simulator,name=Apple Watch Series 10 (42mm)" 35 | - "platform=visionOS Simulator,name=Apple Vision Pro" 36 | steps: 37 | - name: Checkout 38 | uses: actions/checkout@v4 39 | - name: Test platform ${{ matrix.destination }} 40 | run: set -o pipefail && xcodebuild -scheme Flexer -destination "${{ matrix.destination }}" test | xcbeautify 41 | 42 | linux_test: 43 | name: Test Linux 44 | runs-on: ubuntu-latest 45 | timeout-minutes: 30 46 | strategy: 47 | matrix: 48 | swift-version: 49 | - 6.0.3 50 | - 6.1 51 | steps: 52 | - name: Checkout 53 | uses: actions/checkout@v4 54 | - name: Swiftly 55 | uses: vapor/swiftly-action@v0.2.0 56 | with: 57 | toolchain: ${{ matrix.swift-version }} 58 | - name: Test 59 | run: swift test 60 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | /.build 3 | /Packages 4 | /*.xcodeproj 5 | xcuserdata/ 6 | DerivedData/ 7 | /Carthage 8 | .swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata 9 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChimeHQ/Flexer/b8fb79efe3ea65cbb2f7e864042ebce47fada488/.gitmodules -------------------------------------------------------------------------------- /.swift-format: -------------------------------------------------------------------------------- 1 | { 2 | "fileScopedDeclarationPrivacy" : { 3 | "accessLevel" : "private" 4 | }, 5 | "indentConditionalCompilationBlocks" : true, 6 | "indentSwitchCaseLabels" : false, 7 | "indentation" : { 8 | "tabs" : 1 9 | }, 10 | "lineBreakAroundMultilineExpressionChainComponents" : false, 11 | "lineBreakBeforeControlFlowKeywords" : false, 12 | "lineBreakBeforeEachArgument" : false, 13 | "lineBreakBeforeEachGenericRequirement" : false, 14 | "lineBreakBetweenDeclarationAttributes" : false, 15 | "lineLength" : 100, 16 | "maximumBlankLines" : 1, 17 | "multiElementCollectionTrailingCommas" : true, 18 | "noAssignmentInExpressions" : { 19 | "allowedFunctions" : [ 20 | "XCTAssertNoThrow" 21 | ] 22 | }, 23 | "prioritizeKeepingFunctionOutputTogether" : false, 24 | "reflowMultilineStringLiterals" : { 25 | "never" : { 26 | } 27 | }, 28 | "respectsExistingLineBreaks" : true, 29 | "rules" : { 30 | "AllPublicDeclarationsHaveDocumentation" : false, 31 | "AlwaysUseLiteralForEmptyCollectionInit" : false, 32 | "AlwaysUseLowerCamelCase" : true, 33 | "AmbiguousTrailingClosureOverload" : true, 34 | "AvoidRetroactiveConformances" : true, 35 | "BeginDocumentationCommentWithOneLineSummary" : false, 36 | "DoNotUseSemicolons" : true, 37 | "DontRepeatTypeInStaticProperties" : true, 38 | "FileScopedDeclarationPrivacy" : true, 39 | "FullyIndirectEnum" : true, 40 | "GroupNumericLiterals" : true, 41 | "IdentifiersMustBeASCII" : true, 42 | "NeverForceUnwrap" : false, 43 | "NeverUseForceTry" : false, 44 | "NeverUseImplicitlyUnwrappedOptionals" : false, 45 | "NoAccessLevelOnExtensionDeclaration" : true, 46 | "NoAssignmentInExpressions" : true, 47 | "NoBlockComments" : true, 48 | "NoCasesWithOnlyFallthrough" : true, 49 | "NoEmptyLinesOpeningClosingBraces" : false, 50 | "NoEmptyTrailingClosureParentheses" : true, 51 | "NoLabelsInCasePatterns" : true, 52 | "NoLeadingUnderscores" : false, 53 | "NoParensAroundConditions" : true, 54 | "NoPlaygroundLiterals" : true, 55 | "NoVoidReturnOnFunctionSignature" : true, 56 | "OmitExplicitReturns" : false, 57 | "OneCasePerLine" : true, 58 | "OneVariableDeclarationPerLine" : true, 59 | "OnlyOneTrailingClosureArgument" : true, 60 | "OrderedImports" : true, 61 | "ReplaceForEachWithForLoop" : true, 62 | "ReturnVoidInsteadOfEmptyTuple" : true, 63 | "TypeNamesShouldBeCapitalized" : true, 64 | "UseEarlyExits" : false, 65 | "UseExplicitNilCheckInConditions" : true, 66 | "UseLetInEveryBoundCaseVariable" : true, 67 | "UseShorthandTypeNames" : true, 68 | "UseSingleLinePropertyGetter" : true, 69 | "UseSynthesizedInitializer" : true, 70 | "UseTripleSlashForDocumentationComments" : true, 71 | "UseWhereClausesInForLoops" : false, 72 | "ValidateDocumentationComments" : false 73 | }, 74 | "spacesAroundRangeFormationOperators" : false, 75 | "spacesBeforeEndOfLineComments" : 2, 76 | "tabWidth" : 4, 77 | "version" : 1 78 | } 79 | -------------------------------------------------------------------------------- /.swiftpm/xcode/xcshareddata/xcschemes/Flexer.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 29 | 35 | 36 | 37 | 38 | 39 | 46 | 47 | 53 | 54 | 55 | 56 | 58 | 64 | 65 | 66 | 67 | 68 | 78 | 79 | 85 | 86 | 92 | 93 | 94 | 95 | 97 | 98 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | 2 | # Contributor Covenant Code of Conduct 3 | 4 | ## Our Pledge 5 | 6 | We as members, contributors, and leaders pledge to make participation in our 7 | community a harassment-free experience for everyone, regardless of age, body 8 | size, visible or invisible disability, ethnicity, sex characteristics, gender 9 | identity and expression, level of experience, education, socio-economic status, 10 | nationality, personal appearance, race, caste, color, religion, or sexual 11 | identity and orientation. 12 | 13 | We pledge to act and interact in ways that contribute to an open, welcoming, 14 | diverse, inclusive, and healthy community. 15 | 16 | ## Our Standards 17 | 18 | Examples of behavior that contributes to a positive environment for our 19 | community include: 20 | 21 | * Demonstrating empathy and kindness toward other people 22 | * Being respectful of differing opinions, viewpoints, and experiences 23 | * Giving and gracefully accepting constructive feedback 24 | * Accepting responsibility and apologizing to those affected by our mistakes, 25 | and learning from the experience 26 | * Focusing on what is best not just for us as individuals, but for the overall 27 | community 28 | 29 | Examples of unacceptable behavior include: 30 | 31 | * The use of sexualized language or imagery, and sexual attention or advances of 32 | any kind 33 | * Trolling, insulting or derogatory comments, and personal or political attacks 34 | * Public or private harassment 35 | * Publishing others' private information, such as a physical or email address, 36 | without their explicit permission 37 | * Other conduct which could reasonably be considered inappropriate in a 38 | professional setting 39 | 40 | ## Enforcement Responsibilities 41 | 42 | Community leaders are responsible for clarifying and enforcing our standards of 43 | acceptable behavior and will take appropriate and fair corrective action in 44 | response to any behavior that they deem inappropriate, threatening, offensive, 45 | or harmful. 46 | 47 | Community leaders have the right and responsibility to remove, edit, or reject 48 | comments, commits, code, wiki edits, issues, and other contributions that are 49 | not aligned to this Code of Conduct, and will communicate reasons for moderation 50 | decisions when appropriate. 51 | 52 | ## Scope 53 | 54 | This Code of Conduct applies within all community spaces, and also applies when 55 | an individual is officially representing the community in public spaces. 56 | Examples of representing our community include using an official e-mail address, 57 | posting via an official social media account, or acting as an appointed 58 | representative at an online or offline event. 59 | 60 | ## Enforcement 61 | 62 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 63 | reported to the community leaders responsible for enforcement at 64 | support@chimehq.com. 65 | All complaints will be reviewed and investigated promptly and fairly. 66 | 67 | All community leaders are obligated to respect the privacy and security of the 68 | reporter of any incident. 69 | 70 | ## Enforcement Guidelines 71 | 72 | Community leaders will follow these Community Impact Guidelines in determining 73 | the consequences for any action they deem in violation of this Code of Conduct: 74 | 75 | ### 1. Correction 76 | 77 | **Community Impact**: Use of inappropriate language or other behavior deemed 78 | unprofessional or unwelcome in the community. 79 | 80 | **Consequence**: A private, written warning from community leaders, providing 81 | clarity around the nature of the violation and an explanation of why the 82 | behavior was inappropriate. A public apology may be requested. 83 | 84 | ### 2. Warning 85 | 86 | **Community Impact**: A violation through a single incident or series of 87 | actions. 88 | 89 | **Consequence**: A warning with consequences for continued behavior. No 90 | interaction with the people involved, including unsolicited interaction with 91 | those enforcing the Code of Conduct, for a specified period of time. This 92 | includes avoiding interactions in community spaces as well as external channels 93 | like social media. Violating these terms may lead to a temporary or permanent 94 | ban. 95 | 96 | ### 3. Temporary Ban 97 | 98 | **Community Impact**: A serious violation of community standards, including 99 | sustained inappropriate behavior. 100 | 101 | **Consequence**: A temporary ban from any sort of interaction or public 102 | communication with the community for a specified period of time. No public or 103 | private interaction with the people involved, including unsolicited interaction 104 | with those enforcing the Code of Conduct, is allowed during this period. 105 | Violating these terms may lead to a permanent ban. 106 | 107 | ### 4. Permanent Ban 108 | 109 | **Community Impact**: Demonstrating a pattern of violation of community 110 | standards, including sustained inappropriate behavior, harassment of an 111 | individual, or aggression toward or disparagement of classes of individuals. 112 | 113 | **Consequence**: A permanent ban from any sort of public interaction within the 114 | community. 115 | 116 | ## Attribution 117 | 118 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 119 | version 2.1, available at 120 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. 121 | 122 | Community Impact Guidelines were inspired by 123 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC]. 124 | 125 | For answers to common questions about this code of conduct, see the FAQ at 126 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at 127 | [https://www.contributor-covenant.org/translations][translations]. 128 | 129 | [homepage]: https://www.contributor-covenant.org 130 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html 131 | [Mozilla CoC]: https://github.com/mozilla/diversity 132 | [FAQ]: https://www.contributor-covenant.org/faq 133 | [translations]: https://www.contributor-covenant.org/translations 134 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019, Chime Systems Inc. 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /Package.swift: -------------------------------------------------------------------------------- 1 | // swift-tools-version: 5.9 2 | 3 | import PackageDescription 4 | 5 | let package = Package( 6 | name: "Flexer", 7 | platforms: [ 8 | .macOS(.v10_15), 9 | .iOS(.v13), 10 | .tvOS(.v13), 11 | .watchOS(.v6), 12 | .macCatalyst(.v13) 13 | ], 14 | products: [ 15 | .library(name: "Flexer", targets: ["Flexer"]), 16 | ], 17 | dependencies: [], 18 | targets: [ 19 | .target(name: "Flexer", dependencies: []), 20 | .testTarget(name: "FlexerTests", dependencies: ["Flexer"]), 21 | ] 22 | ) 23 | 24 | let swiftSettings: [SwiftSetting] = [ 25 | .enableExperimentalFeature("StrictConcurrency") 26 | ] 27 | 28 | for target in package.targets { 29 | var settings = target.swiftSettings ?? [] 30 | settings.append(contentsOf: swiftSettings) 31 | target.swiftSettings = settings 32 | } 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | [![Build Status][build status badge]][build status] 4 | [![Platforms][platforms badge]][platforms] 5 | [![Matrix][matrix badge]][matrix] 6 | 7 |
8 | 9 | # Flexer 10 | 11 | Flexer is a small library for building lexers in Swift. It is compatible with all Apple platforms. 12 | 13 | - API tailored for hand-written parsing 14 | - Fully Swift `String`-compatible 15 | - Based around `Sequence` and `IteratorProtocol` procotols 16 | 17 | It turns out that Swift's `Sequence` and `Iterator` concepts work pretty well for processing tokens. They make for a familiar API that also offers a surprising amount of power. Flexer builds on these concepts with some new protocols that are made specifically for lexing, but are generally applicable to all `Sequence` types. 18 | 19 | ## Integration 20 | 21 | ```swift 22 | dependencies: [ 23 | .package(url: "https://github.com/ChimeHQ/Flexer") 24 | ] 25 | ``` 26 | 27 | ## Look-Ahead 28 | 29 | Core to lexing is the ability to look ahead at future tokens without advancing. Flexer implements look-ahead with a protocol called `LookAheadIteratorProtocol`. The whole implementation is inspired by the `lazy` property of `Sequence`, and works very similarly. 30 | 31 | ```swift 32 | let lookAheadSequence = anySequence.lookAhead 33 | 34 | let next = lookAheadSequence.peek() 35 | ``` 36 | 37 | The main work of building your lexer is then defining a Sequence type of tokens. All of the lexing facilities you might need can then be exposed with a `typealias`. 38 | 39 | ```swift 40 | typealias MyLexer = LookAheadSequence 41 | 42 | let tokenSequence = MyLexer(string: myString) 43 | 44 | let nextToken = lexer.next() 45 | let futureToken = lexer.peek() 46 | let tabToken = lexer.nextUntil({ $0.kind == .tab }) 47 | ``` 48 | 49 | ## Token Sequences 50 | 51 | Your custom token sequence can be built by creating a struct that conforms to `Sequence`. To make this easier, Flexer includes a type that can be used as a foundation for creating more complex token streams, called `BasicTextCharacterSequence`. It is a sequence of `BasicTextCharacter` elements. It breaks up a string into commonly-needed tokens, catagorized by kind and range within the source string. This approach uses the `Token` type, which stores a kind and a range within the source string. 52 | 53 | It is usually much easier to build up more complex lexing functionality with the convenience of Swift switch pattern matching, instead of having to worry about the underlying characters and ranges themselves. You can do this by wrapping up a `BasicTextCharacterSequence` in your own custom sequence. 54 | 55 | Here's a fully-functioning example that produces four different token types. It shows off some of the scanning and look-ahead facilities that can be handy both for constructing and also using your lexer. 56 | 57 | ```swift 58 | enum ExampleTokenKind { 59 | case word 60 | case number 61 | case symbol 62 | case whitespace 63 | } 64 | 65 | typealias ExampleToken = Flexer.Token 66 | 67 | struct ExampleTokenSequence: Sequence, IteratorProtocol, StringInitializable { 68 | public typealias Element = ExampleToken 69 | 70 | private var lexer: BasicTextCharacterLexer 71 | 72 | public init(string: String) { 73 | self.lexer = BasicTextCharacterLexer(string: string) 74 | } 75 | 76 | public mutating func next() -> Element? { 77 | guard let token = lexer.peek() else { 78 | return nil 79 | } 80 | 81 | switch token.kind { 82 | case .lowercaseLetter, .uppercaseLetter, .underscore: 83 | guard let endingToken = lexer.nextUntil(notIn: [.lowercaseLetter, .uppercaseLetter, .underscore, .digit]) else { 84 | return nil 85 | } 86 | 87 | return ExampleToken(kind: .word, range: token.startIndex.. 113 | ``` 114 | 115 | ## Contributing and Collaboration 116 | 117 | I would love to hear from you! Issues or pull requests work great. Both a [Matrix space][matrix] and [Discord][discord] are available for live help, but I have a strong bias towards answering in the form of documentation. You can also find me on [mastodon](https://mastodon.social/@mattiem). 118 | 119 | I prefer collaboration, and would love to find ways to work together if you have a similar project. 120 | 121 | I prefer indentation with tabs for improved accessibility. But, I'd rather you use the system you want and make a PR than hesitate because of whitespace. 122 | 123 | By participating in this project you agree to abide by the [Contributor Code of Conduct](CODE_OF_CONDUCT.md). 124 | 125 | [build status]: https://github.com/ChimeHQ/Flexer/actions 126 | [build status badge]: https://github.com/ChimeHQ/Flexer/workflows/CI/badge.svg 127 | [platforms]: https://swiftpackageindex.com/ChimeHQ/Flexer 128 | [platforms badge]: https://img.shields.io/endpoint?url=https%3A%2F%2Fswiftpackageindex.com%2Fapi%2Fpackages%2FChimeHQ%2FFlexer%2Fbadge%3Ftype%3Dplatforms 129 | [matrix]: https://matrix.to/#/%23chimehq%3Amatrix.org 130 | [matrix badge]: https://img.shields.io/matrix/chimehq%3Amatrix.org?label=Matrix 131 | [discord]: https://discord.gg/esFpX6sErJ 132 | -------------------------------------------------------------------------------- /Sources/Flexer/BasicTextCharacterLexer.swift: -------------------------------------------------------------------------------- 1 | import Foundation 2 | 3 | public protocol StringInitializable { 4 | var string: String { get } 5 | 6 | init(string: String) 7 | } 8 | 9 | public extension StringInitializable { 10 | func substring(with range: Range) -> Substring { 11 | return string[range] 12 | } 13 | } 14 | 15 | public enum BasicTextCharacterKind: Hashable, Sendable { 16 | case tab 17 | case space 18 | case newline 19 | 20 | case lowercaseLetter 21 | case uppercaseLetter 22 | case digit 23 | case otherCharacter 24 | 25 | case singleQuote 26 | case doubleQuote 27 | case backtick 28 | 29 | case openBrace 30 | case closeBrace 31 | case openBracket 32 | case closeBracket 33 | case openParen 34 | case closeParen 35 | case lessThan 36 | case greaterThan 37 | 38 | case tilde 39 | case exclamation 40 | case question 41 | case at 42 | case percent 43 | case caret 44 | case ampersand 45 | case dollar 46 | case star 47 | case slash 48 | case numberSign 49 | case pipe 50 | case backslash 51 | case dash 52 | case plus 53 | case equals 54 | 55 | case period 56 | case comma 57 | case colon 58 | case semicolon 59 | case underscore 60 | 61 | @available(*, deprecated, message: "Please use numberSign") 62 | public static let pound = BasicTextCharacterKind.numberSign 63 | } 64 | 65 | public typealias BasicTextCharacter = Token 66 | 67 | struct CharacterRangePairIterator: IteratorProtocol { 68 | struct CharacterRangePair { 69 | public var character: Character 70 | public var range: Range 71 | } 72 | 73 | let string: String 74 | var currentIndex: String.Index 75 | 76 | init(string: String) { 77 | self.string = string 78 | self.currentIndex = string.startIndex 79 | } 80 | 81 | mutating func next() -> CharacterRangePair? { 82 | if currentIndex >= string.endIndex { 83 | return nil 84 | } 85 | 86 | let idx = currentIndex 87 | 88 | currentIndex = string.index(after: currentIndex) 89 | 90 | let char = string[idx] 91 | let range = idx.. Element? { 115 | guard let pair = characterIterator.next() else { 116 | return nil 117 | } 118 | 119 | let range = pair.range 120 | let char = pair.character 121 | 122 | switch char { 123 | case "\t": return BasicTextCharacter(kind: .tab, range: range) 124 | case " ": return BasicTextCharacter(kind: .space, range: range) 125 | case "'": return BasicTextCharacter(kind: .singleQuote, range: range) 126 | case "\"": return BasicTextCharacter(kind: .doubleQuote, range: range) 127 | case "`": return BasicTextCharacter(kind: .backtick, range: range) 128 | case "{": return BasicTextCharacter(kind: .openBrace, range: range) 129 | case "}": return BasicTextCharacter(kind: .closeBrace, range: range) 130 | case "[": return BasicTextCharacter(kind: .openBracket, range: range) 131 | case "]": return BasicTextCharacter(kind: .closeBracket, range: range) 132 | case "(": return BasicTextCharacter(kind: .openParen, range: range) 133 | case ")": return BasicTextCharacter(kind: .closeParen, range: range) 134 | case "<": return BasicTextCharacter(kind: .lessThan, range: range) 135 | case ">": return BasicTextCharacter(kind: .greaterThan, range: range) 136 | case "~": return BasicTextCharacter(kind: .tilde, range: range) 137 | case "!": return BasicTextCharacter(kind: .exclamation, range: range) 138 | case "?": return BasicTextCharacter(kind: .question, range: range) 139 | case "@": return BasicTextCharacter(kind: .at, range: range) 140 | case "%": return BasicTextCharacter(kind: .percent, range: range) 141 | case "^": return BasicTextCharacter(kind: .caret, range: range) 142 | case "&": return BasicTextCharacter(kind: .ampersand, range: range) 143 | case "$": return BasicTextCharacter(kind: .dollar, range: range) 144 | case "*": return BasicTextCharacter(kind: .star, range: range) 145 | case "/": return BasicTextCharacter(kind: .slash, range: range) 146 | case "#": return BasicTextCharacter(kind: .numberSign, range: range) 147 | case "|": return BasicTextCharacter(kind: .pipe, range: range) 148 | case "\\": return BasicTextCharacter(kind: .backslash, range: range) 149 | case "-": return BasicTextCharacter(kind: .dash, range: range) 150 | case "+": return BasicTextCharacter(kind: .plus, range: range) 151 | case "=": return BasicTextCharacter(kind: .equals, range: range) 152 | case ".": return BasicTextCharacter(kind: .period, range: range) 153 | case ",": return BasicTextCharacter(kind: .comma, range: range) 154 | case "_": return BasicTextCharacter(kind: .underscore, range: range) 155 | case ";": return BasicTextCharacter(kind: .semicolon, range: range) 156 | case ":": return BasicTextCharacter(kind: .colon, range: range) 157 | default: 158 | break 159 | } 160 | 161 | if newlineSet.contains(char) { 162 | return BasicTextCharacter(kind: .newline, range: range) 163 | } 164 | 165 | if digitSet.contains(char) { 166 | return BasicTextCharacter(kind: .digit, range: range) 167 | } 168 | 169 | if lowercaseSet.contains(char) { 170 | return BasicTextCharacter(kind: .lowercaseLetter, range: range) 171 | } 172 | 173 | if uppercaseSet.contains(char) { 174 | return BasicTextCharacter(kind: .uppercaseLetter, range: range) 175 | } 176 | 177 | return BasicTextCharacter(kind: .otherCharacter, range: range) 178 | } 179 | } 180 | 181 | public typealias BasicTextCharacterLexer = LookAheadSequence 182 | -------------------------------------------------------------------------------- /Sources/Flexer/CharacterSet+Character.swift: -------------------------------------------------------------------------------- 1 | // 2 | // CharacterSet+Character.swift 3 | // Flexer 4 | // 5 | // Created by Matt Massicotte on 2020-04-24. 6 | // Copyright © 2020 Chime Systems Inc. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | public extension CharacterSet { 12 | func contains(_ character: Character) -> Bool { 13 | return character.unicodeScalars.allSatisfy({ self.contains($0) }) 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /Sources/Flexer/LookAheadIteratorProtocol.swift: -------------------------------------------------------------------------------- 1 | // 2 | // LookAheadIteratorProtocol.swift 3 | // Flexer 4 | // 5 | // Created by Matt Massicotte on 2020-04-25. 6 | // Copyright © 2020 Chime Systems Inc. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | public protocol LookAheadIteratorProtocol: IteratorProtocol { 12 | mutating func peek(distance: Int) -> Element? 13 | } 14 | 15 | public extension LookAheadIteratorProtocol { 16 | mutating func peek() -> Element? { 17 | return peek(distance: 1) 18 | } 19 | 20 | mutating func peekUntil(_ predicate: (Element) -> Bool, limit: Int = Int.max) -> Bool { 21 | for _ in 0 ..< limit { 22 | switch peek() { 23 | case .none: 24 | return false 25 | case let elem?: 26 | if predicate(elem) { 27 | return true 28 | } 29 | } 30 | } 31 | 32 | return false 33 | } 34 | } 35 | 36 | public extension LookAheadIteratorProtocol { 37 | mutating func nextIf(_ predicate: (Element) throws -> Bool) rethrows -> Element? { 38 | guard let t = peek() else { 39 | return nil 40 | } 41 | 42 | if try predicate(t) { 43 | return next() 44 | } 45 | 46 | return nil 47 | } 48 | 49 | /// Skips the next token until predicate is true 50 | @discardableResult 51 | mutating func skipIf(_ predicate: (Element) throws -> Bool) rethrows -> Bool { 52 | return try nextIf(predicate) != nil 53 | } 54 | 55 | mutating func nextUntil(_ predicate: (Element) throws -> Bool, limit: Int = Int.max) rethrows -> Element? { 56 | var last: Element? 57 | 58 | for _ in 0 ..< limit { 59 | guard let elem = peek() else { 60 | return last 61 | } 62 | 63 | if try predicate(elem) { 64 | // on the first iteration, last will always be nil 65 | return last 66 | } 67 | 68 | last = elem 69 | 70 | _ = next() 71 | } 72 | 73 | return last 74 | } 75 | 76 | /// Skips tokens until predicate is true 77 | @discardableResult 78 | mutating func skipUntil(_ predicate: (Element) throws -> Bool, limit _: Int = Int.max) rethrows -> Bool { 79 | return try nextUntil(predicate) != nil 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /Sources/Flexer/LookAheadSequence.swift: -------------------------------------------------------------------------------- 1 | // 2 | // BufferingSequence.swift 3 | // Flexer 4 | // 5 | // Created by Matt Massicotte on 2020-04-24. 6 | // Copyright © 2020 Chime Systems Inc. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | public struct LookAheadSequence: Sequence, LookAheadIteratorProtocol where Base : Sequence { 12 | public typealias Element = Base.Element 13 | 14 | let baseSequence: Base 15 | private var buffer: [Base.Element] 16 | private var iterator: Base.Iterator 17 | 18 | public init(_ baseSequence: Base) { 19 | self.baseSequence = baseSequence 20 | self.iterator = baseSequence.makeIterator() 21 | self.buffer = [] 22 | } 23 | 24 | public mutating func next() -> Element? { 25 | if buffer.isEmpty { 26 | return iterator.next() 27 | } 28 | 29 | return buffer.removeFirst() 30 | } 31 | 32 | public mutating func peek(distance: Int = 1) -> Element? { 33 | if distance == 0 { 34 | return buffer.first 35 | } 36 | 37 | let delta = distance - buffer.count 38 | let index = distance - 1 39 | 40 | // fill buffer as needed 41 | if delta > 0 { 42 | for _ in 0..= buffer.endIndex { 50 | return nil 51 | } 52 | 53 | return buffer[index] 54 | } 55 | } 56 | 57 | public extension Sequence { 58 | var lookAhead: LookAheadSequence { 59 | return LookAheadSequence(self) 60 | } 61 | } 62 | 63 | extension LookAheadSequence: StringInitializable where Base: StringInitializable { 64 | public init(string: String) { 65 | self.init(Base(string: string)) 66 | } 67 | 68 | public var string: String { 69 | return baseSequence.string 70 | } 71 | } 72 | 73 | public extension LookAheadSequence where Base: StringInitializable, Base.Element: TokenProtocol, Base.Element.Index == String.Index { 74 | func substring(for token: Base.Element) -> Substring { 75 | return substring(with: token.range) 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /Sources/Flexer/LookAheadSequenceReference.swift: -------------------------------------------------------------------------------- 1 | import Foundation 2 | 3 | /// LookAheadSequence with reference semantics 4 | /// 5 | /// This is a class wrapper around LookAheadSequence. Useful if you need to pass 6 | /// around and operate on a single shared instance. 7 | public class LookAheadSequenceReference: Sequence, LookAheadIteratorProtocol where Base : Sequence { 8 | public typealias Element = Base.Element 9 | var internalSequence: LookAheadSequence 10 | 11 | public init(_ sequence: LookAheadSequence) { 12 | self.internalSequence = sequence 13 | } 14 | 15 | public func next() -> Element? { 16 | return internalSequence.next() 17 | } 18 | 19 | public func peek(distance: Int = 1) -> Element? { 20 | return internalSequence.peek(distance: distance) 21 | } 22 | } 23 | 24 | // This junk is needed to get around the "mutating" modifier 25 | public extension LookAheadSequenceReference { 26 | func peekUntil(_ predicate: (Element) -> Bool, limit: Int = Int.max) -> Bool { 27 | return internalSequence.peekUntil(predicate) 28 | } 29 | 30 | func nextIf(_ predicate: (Element) throws -> Bool) rethrows -> Element? { 31 | return try internalSequence.nextIf(predicate) 32 | } 33 | 34 | func skipIf(_ predicate: (Element) throws -> Bool) rethrows -> Bool { 35 | return try internalSequence.skipIf(predicate) 36 | } 37 | 38 | func nextUntil(_ predicate: (Element) throws -> Bool, limit: Int = Int.max) rethrows -> Element? { 39 | return try internalSequence.nextUntil(predicate, limit: limit) 40 | } 41 | 42 | func skipUntil(_ predicate: (Element) throws -> Bool, limit: Int = Int.max) rethrows -> Bool { 43 | return try internalSequence.skipUntil(predicate, limit: limit) 44 | } 45 | } 46 | 47 | extension LookAheadSequenceReference where Base : StringInitializable { 48 | public var string: String { 49 | return internalSequence.string 50 | } 51 | } 52 | 53 | public extension LookAheadSequenceReference where Base: StringInitializable, Base.Element: TokenProtocol, Base.Element.Index == String.Index { 54 | func substring(for token: Base.Element) -> Substring { 55 | return internalSequence.substring(for: token) 56 | } 57 | } 58 | 59 | public extension LookAheadSequence { 60 | /// LookAheadSequence wapper with reference semantics 61 | var reference: LookAheadSequenceReference { 62 | return LookAheadSequenceReference(self) 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /Sources/Flexer/TokenProtocol.swift: -------------------------------------------------------------------------------- 1 | // 2 | // TokenProtocol.swift 3 | // Flexer 4 | // 5 | // Created by Matt Massicotte on 2020-04-27. 6 | // Copyright © 2020 Chime Systems Inc. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | public protocol TokenProtocol: Comparable, CustomStringConvertible { 12 | associatedtype Kind: Hashable 13 | associatedtype Index: Comparable 14 | 15 | var range: Range { get } 16 | var kind: Kind { get } 17 | 18 | init(kind: Kind, range: Range) 19 | } 20 | 21 | extension TokenProtocol where Index == String.Index { 22 | public init?(kind: Kind, range: NSRange, in string: String) { 23 | guard let stringRange = Range(range, in: string) else { 24 | return nil 25 | } 26 | 27 | self.init(kind: kind, range: stringRange) 28 | } 29 | 30 | public init?(kind: Kind, start: BasicTextCharacter, end: BasicTextCharacter?) { 31 | guard let end = end else { return nil } 32 | 33 | self.init(kind: kind, range: start.startIndex.. NSRange { 37 | return NSRange(range, in: string) 38 | } 39 | } 40 | 41 | extension TokenProtocol { 42 | public var startIndex: Index { 43 | return range.lowerBound 44 | } 45 | 46 | public var endIndex: Index { 47 | return range.upperBound 48 | } 49 | } 50 | 51 | extension TokenProtocol { 52 | public static func < (lhs: Self, rhs: Self) -> Bool { 53 | return lhs.startIndex < rhs.startIndex 54 | } 55 | } 56 | 57 | extension TokenProtocol { 58 | public var description: String { 59 | return "<\(type(of: self)) \(String(describing: kind)) \(String(describing: range))>" 60 | } 61 | } 62 | 63 | extension LookAheadSequence where Base.Element: TokenProtocol { 64 | public mutating func nextUntil(notIn set: Set) -> Base.Element? { 65 | return nextUntil({ set.contains($0.kind) == false }) 66 | } 67 | 68 | public mutating func nextUntil(in set: Set) -> Base.Element? { 69 | return nextUntil({ set.contains($0.kind) }) 70 | } 71 | } 72 | 73 | extension LookAheadSequence where Base.Element: TokenProtocol { 74 | public var tokens: Base { 75 | return baseSequence 76 | } 77 | } 78 | 79 | public struct Token: TokenProtocol { 80 | public typealias Kind = TokenKind 81 | public typealias Index = String.Index 82 | 83 | public var range: Range 84 | public var kind: Kind 85 | 86 | public init(kind: TokenKind, range: Range) { 87 | self.kind = kind 88 | self.range = range 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /Tests/FlexerTests/BasicTextCharacterLexerTests.swift: -------------------------------------------------------------------------------- 1 | import XCTest 2 | 3 | import Flexer 4 | 5 | final class BasicTextCharacterLexerTests: XCTestCase { 6 | func testPeekTwiceReturnsSameValue() { 7 | let string = "\t" 8 | var lexer = BasicTextCharacterLexer(string: string) 9 | 10 | let token = BasicTextCharacter(kind: .tab, range: NSRange(0 ..< 1), in: string) 11 | 12 | XCTAssertEqual(lexer.peek(), token) 13 | XCTAssertEqual(lexer.peek(), token) 14 | } 15 | 16 | func testNextReturnsDifferentValue() { 17 | let string = "\t" 18 | var lexer = BasicTextCharacterLexer(string: string) 19 | 20 | XCTAssertNotNil(lexer.peek()) 21 | XCTAssertEqual(lexer.next(), BasicTextCharacter(kind: .tab, range: NSRange(0 ..< 1), in: string)) 22 | XCTAssertNil(lexer.peek()) 23 | } 24 | 25 | func testPeekFurtherAndThenCloser() { 26 | let string = "a1" 27 | var lexer = BasicTextCharacterLexer(string: string) 28 | 29 | XCTAssertEqual(lexer.peek(distance: 2), BasicTextCharacter(kind: .digit, range: NSRange(1 ..< 2), in: string)) 30 | XCTAssertEqual(lexer.peek(distance: 1), BasicTextCharacter(kind: .lowercaseLetter, range: NSRange(0 ..< 1), in: string)) 31 | XCTAssertEqual(lexer.peek(distance: 2), BasicTextCharacter(kind: .digit, range: NSRange(1 ..< 2), in: string)) 32 | } 33 | 34 | func testNewlineSequences() { 35 | let string = "\n \r \r\n" 36 | var lexer = BasicTextCharacterLexer(string: string) 37 | 38 | XCTAssertEqual(lexer.next(), BasicTextCharacter(kind: .newline, range: NSRange(0..<1), in: string)) 39 | XCTAssertNotNil(lexer.next()) 40 | XCTAssertEqual(lexer.next(), BasicTextCharacter(kind: .newline, range: NSRange(2..<3), in: string)) 41 | XCTAssertNotNil(lexer.next()) 42 | XCTAssertEqual(lexer.next(), BasicTextCharacter(kind: .newline, range: NSRange(4..<6), in: string)) 43 | XCTAssertNil(lexer.next()) 44 | } 45 | } 46 | 47 | extension BasicTextCharacterLexerTests { 48 | func testPeekNextPerformance() { 49 | var string = "" 50 | 51 | for _ in 0 ..< 10000 { 52 | string += "abc 123 ;[];^^%\n" 53 | } 54 | 55 | measure { 56 | var lexer = BasicTextCharacterLexer(string: string) 57 | 58 | while lexer.peek() != nil { 59 | _ = lexer.next() 60 | } 61 | } 62 | } 63 | } 64 | 65 | extension BasicTextCharacterLexerTests { 66 | func testSingleLowercaseCharacter() { 67 | let string = "a" 68 | var lexer = BasicTextCharacterLexer(string: string) 69 | 70 | XCTAssertEqual(lexer.next(), BasicTextCharacter(kind: .lowercaseLetter, range: NSRange(0 ..< 1), in: string)) 71 | XCTAssertNil(lexer.next()) 72 | } 73 | 74 | func testMultiCharacterUppercaseRun() { 75 | let string = "ABC" 76 | var lexer = BasicTextCharacterLexer(string: string) 77 | 78 | XCTAssertEqual(lexer.next(), BasicTextCharacter(kind: .uppercaseLetter, range: NSRange(0 ..< 1), in: string)) 79 | XCTAssertEqual(lexer.next(), BasicTextCharacter(kind: .uppercaseLetter, range: NSRange(1 ..< 2), in: string)) 80 | XCTAssertEqual(lexer.next(), BasicTextCharacter(kind: .uppercaseLetter, range: NSRange(2 ..< 3), in: string)) 81 | XCTAssertNil(lexer.next()) 82 | } 83 | 84 | func testDigitRun() { 85 | let string = "123" 86 | var lexer = BasicTextCharacterLexer(string: string) 87 | 88 | XCTAssertEqual(lexer.next(), BasicTextCharacter(kind: .digit, range: NSRange(0 ..< 1), in: string)) 89 | XCTAssertEqual(lexer.next(), BasicTextCharacter(kind: .digit, range: NSRange(1 ..< 2), in: string)) 90 | XCTAssertEqual(lexer.next(), BasicTextCharacter(kind: .digit, range: NSRange(2 ..< 3), in: string)) 91 | XCTAssertNil(lexer.next()) 92 | } 93 | 94 | func testNextUntilWithNoMatch() { 95 | let string = " ab " 96 | var lexer = BasicTextCharacterLexer(string: string) 97 | 98 | XCTAssertEqual(lexer.next(), BasicTextCharacter(kind: .space, range: NSRange(0 ..< 1), in: string)) 99 | XCTAssertEqual(lexer.next(), BasicTextCharacter(kind: .lowercaseLetter, range: NSRange(1 ..< 2), in: string)) 100 | 101 | // The lexer is spooled to the "a" position. 102 | let bToken = lexer.nextUntil(notIn: [.lowercaseLetter]) 103 | // So the return token must be the last one matching e.g. "b". 104 | XCTAssertEqual(bToken, BasicTextCharacter(kind: .lowercaseLetter, range: NSRange(2 ..< 3), in: string)) 105 | 106 | // The lexer is already spooled to the "b" position. 107 | // next() would be " ". 108 | let token = lexer.nextUntil(notIn: [.lowercaseLetter]) 109 | 110 | // But our stop condition is notIn: [.lowercaseLetter]. 111 | // So we need to immediately stop. 112 | XCTAssertNil(token) 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /Tests/FlexerTests/ExampleLexerTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ExampleLexerTests.swift 3 | // FlexerTests 4 | // 5 | // Created by Matt Massicotte on 2020-04-27. 6 | // Copyright © 2020 Chime Systems Inc. All rights reserved. 7 | // 8 | 9 | import XCTest 10 | @testable import Flexer 11 | 12 | enum ExampleTokenKind { 13 | case word 14 | case number 15 | case symbol 16 | case whitespace 17 | } 18 | 19 | typealias ExampleToken = Flexer.Token 20 | 21 | struct ExampleTokenSequence: Sequence, IteratorProtocol, StringInitializable { 22 | public typealias Element = ExampleToken 23 | 24 | private var lexer: BasicTextCharacterLexer 25 | 26 | public init(string: String) { 27 | self.lexer = BasicTextCharacterLexer(string: string) 28 | } 29 | 30 | public var string: String { 31 | return lexer.string 32 | } 33 | 34 | public mutating func next() -> Element? { 35 | guard let token = lexer.peek() else { 36 | return nil 37 | } 38 | 39 | switch token.kind { 40 | case .lowercaseLetter, .uppercaseLetter, .underscore: 41 | guard let endingToken = lexer.nextUntil(notIn: [.lowercaseLetter, .uppercaseLetter, .underscore, .digit]) else { 42 | return nil 43 | } 44 | 45 | return ExampleToken(kind: .word, range: token.startIndex.. 71 | 72 | class ExampleLexerTests: XCTestCase { 73 | func testTokens() { 74 | let string = "abc d_eF\t\t\tGhi123 JKL 123 **&\nz" 75 | var lexer = ExampleTokenLexer(string: string) 76 | 77 | XCTAssertEqual(lexer.next(), ExampleToken(kind: .word, range: NSRange(0..<3), in: string)) 78 | XCTAssertEqual(lexer.next(), ExampleToken(kind: .whitespace, range: NSRange(3..<4), in: string)) 79 | XCTAssertEqual(lexer.next(), ExampleToken(kind: .word, range: NSRange(4..<8), in: string)) 80 | XCTAssertEqual(lexer.next(), ExampleToken(kind: .whitespace, range: NSRange(8..<11), in: string)) 81 | XCTAssertEqual(lexer.next(), ExampleToken(kind: .word, range: NSRange(11..<17), in: string)) 82 | XCTAssertEqual(lexer.next(), ExampleToken(kind: .whitespace, range: NSRange(17..<18), in: string)) 83 | XCTAssertEqual(lexer.next(), ExampleToken(kind: .word, range: NSRange(18..<21), in: string)) 84 | XCTAssertEqual(lexer.next(), ExampleToken(kind: .whitespace, range: NSRange(21..<22), in: string)) 85 | XCTAssertEqual(lexer.next(), ExampleToken(kind: .number, range: NSRange(22..<25), in: string)) 86 | XCTAssertEqual(lexer.next(), ExampleToken(kind: .whitespace, range: NSRange(25..<26), in: string)) 87 | XCTAssertEqual(lexer.next(), ExampleToken(kind: .symbol, range: NSRange(26..<29), in: string)) 88 | XCTAssertEqual(lexer.next(), ExampleToken(kind: .whitespace, range: NSRange(29..<30), in: string)) 89 | XCTAssertEqual(lexer.next(), ExampleToken(kind: .word, range: NSRange(30..<31), in: string)) 90 | XCTAssertNil(lexer.next()) 91 | } 92 | } 93 | 94 | -------------------------------------------------------------------------------- /Tests/FlexerTests/LookAheadSequenceReferenceTests.swift: -------------------------------------------------------------------------------- 1 | import XCTest 2 | @testable import Flexer 3 | 4 | final class LookAheadSequenceReferenceTests: XCTestCase { 5 | func testMutatingMethods() { 6 | let string = "a1" 7 | let lexer = BasicTextCharacterLexer(string: string).reference 8 | 9 | XCTAssertNotNil(lexer.next()) 10 | XCTAssertNotNil(lexer.next()) 11 | XCTAssertNil(lexer.next()) 12 | } 13 | 14 | func testGetSubstsring() throws { 15 | let string = "a1" 16 | let lexer = BasicTextCharacterLexer(string: string).reference 17 | let token = try XCTUnwrap(lexer.peek()) 18 | 19 | XCTAssertEqual(lexer.substring(for: token), "a") 20 | } 21 | } 22 | --------------------------------------------------------------------------------