├── .editorconfig
├── .github
├── FUNDING.yml
└── workflows
│ └── ci.yml
├── .gitignore
├── .gitmodules
├── .swift-format
├── .swiftpm
└── xcode
│ └── xcshareddata
│ └── xcschemes
│ └── Flexer.xcscheme
├── CODE_OF_CONDUCT.md
├── LICENSE
├── Package.swift
├── README.md
├── Sources
└── Flexer
│ ├── BasicTextCharacterLexer.swift
│ ├── CharacterSet+Character.swift
│ ├── LookAheadIteratorProtocol.swift
│ ├── LookAheadSequence.swift
│ ├── LookAheadSequenceReference.swift
│ └── TokenProtocol.swift
└── Tests
└── FlexerTests
├── BasicTextCharacterLexerTests.swift
├── ExampleLexerTests.swift
└── LookAheadSequenceReferenceTests.swift
/.editorconfig:
--------------------------------------------------------------------------------
1 | root = true
2 |
3 | [*]
4 | indent_style = tab
5 | end_of_line = lf
6 | charset = utf-8
7 | trim_trailing_whitespace = true
8 | insert_final_newline = true
9 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: [mattmassicotte]
2 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | paths-ignore:
8 | - 'README.md'
9 | - 'CODE_OF_CONDUCT.md'
10 | - '.editorconfig'
11 | - '.spi.yml'
12 | pull_request:
13 | branches:
14 | - main
15 |
16 | concurrency:
17 | group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
18 | cancel-in-progress: true
19 |
20 | jobs:
21 | test:
22 | name: Test
23 | runs-on: macOS-15
24 | timeout-minutes: 30
25 | env:
26 | DEVELOPER_DIR: /Applications/Xcode_16.3.app
27 | strategy:
28 | matrix:
29 | destination:
30 | - "platform=macOS"
31 | - "platform=macOS,variant=Mac Catalyst"
32 | - "platform=iOS Simulator,name=iPhone 16"
33 | - "platform=tvOS Simulator,name=Apple TV"
34 | - "platform=watchOS Simulator,name=Apple Watch Series 10 (42mm)"
35 | - "platform=visionOS Simulator,name=Apple Vision Pro"
36 | steps:
37 | - name: Checkout
38 | uses: actions/checkout@v4
39 | - name: Test platform ${{ matrix.destination }}
40 | run: set -o pipefail && xcodebuild -scheme Flexer -destination "${{ matrix.destination }}" test | xcbeautify
41 |
42 | linux_test:
43 | name: Test Linux
44 | runs-on: ubuntu-latest
45 | timeout-minutes: 30
46 | strategy:
47 | matrix:
48 | swift-version:
49 | - 6.0.3
50 | - 6.1
51 | steps:
52 | - name: Checkout
53 | uses: actions/checkout@v4
54 | - name: Swiftly
55 | uses: vapor/swiftly-action@v0.2.0
56 | with:
57 | toolchain: ${{ matrix.swift-version }}
58 | - name: Test
59 | run: swift test
60 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | /.build
3 | /Packages
4 | /*.xcodeproj
5 | xcuserdata/
6 | DerivedData/
7 | /Carthage
8 | .swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata
9 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChimeHQ/Flexer/b8fb79efe3ea65cbb2f7e864042ebce47fada488/.gitmodules
--------------------------------------------------------------------------------
/.swift-format:
--------------------------------------------------------------------------------
1 | {
2 | "fileScopedDeclarationPrivacy" : {
3 | "accessLevel" : "private"
4 | },
5 | "indentConditionalCompilationBlocks" : true,
6 | "indentSwitchCaseLabels" : false,
7 | "indentation" : {
8 | "tabs" : 1
9 | },
10 | "lineBreakAroundMultilineExpressionChainComponents" : false,
11 | "lineBreakBeforeControlFlowKeywords" : false,
12 | "lineBreakBeforeEachArgument" : false,
13 | "lineBreakBeforeEachGenericRequirement" : false,
14 | "lineBreakBetweenDeclarationAttributes" : false,
15 | "lineLength" : 100,
16 | "maximumBlankLines" : 1,
17 | "multiElementCollectionTrailingCommas" : true,
18 | "noAssignmentInExpressions" : {
19 | "allowedFunctions" : [
20 | "XCTAssertNoThrow"
21 | ]
22 | },
23 | "prioritizeKeepingFunctionOutputTogether" : false,
24 | "reflowMultilineStringLiterals" : {
25 | "never" : {
26 | }
27 | },
28 | "respectsExistingLineBreaks" : true,
29 | "rules" : {
30 | "AllPublicDeclarationsHaveDocumentation" : false,
31 | "AlwaysUseLiteralForEmptyCollectionInit" : false,
32 | "AlwaysUseLowerCamelCase" : true,
33 | "AmbiguousTrailingClosureOverload" : true,
34 | "AvoidRetroactiveConformances" : true,
35 | "BeginDocumentationCommentWithOneLineSummary" : false,
36 | "DoNotUseSemicolons" : true,
37 | "DontRepeatTypeInStaticProperties" : true,
38 | "FileScopedDeclarationPrivacy" : true,
39 | "FullyIndirectEnum" : true,
40 | "GroupNumericLiterals" : true,
41 | "IdentifiersMustBeASCII" : true,
42 | "NeverForceUnwrap" : false,
43 | "NeverUseForceTry" : false,
44 | "NeverUseImplicitlyUnwrappedOptionals" : false,
45 | "NoAccessLevelOnExtensionDeclaration" : true,
46 | "NoAssignmentInExpressions" : true,
47 | "NoBlockComments" : true,
48 | "NoCasesWithOnlyFallthrough" : true,
49 | "NoEmptyLinesOpeningClosingBraces" : false,
50 | "NoEmptyTrailingClosureParentheses" : true,
51 | "NoLabelsInCasePatterns" : true,
52 | "NoLeadingUnderscores" : false,
53 | "NoParensAroundConditions" : true,
54 | "NoPlaygroundLiterals" : true,
55 | "NoVoidReturnOnFunctionSignature" : true,
56 | "OmitExplicitReturns" : false,
57 | "OneCasePerLine" : true,
58 | "OneVariableDeclarationPerLine" : true,
59 | "OnlyOneTrailingClosureArgument" : true,
60 | "OrderedImports" : true,
61 | "ReplaceForEachWithForLoop" : true,
62 | "ReturnVoidInsteadOfEmptyTuple" : true,
63 | "TypeNamesShouldBeCapitalized" : true,
64 | "UseEarlyExits" : false,
65 | "UseExplicitNilCheckInConditions" : true,
66 | "UseLetInEveryBoundCaseVariable" : true,
67 | "UseShorthandTypeNames" : true,
68 | "UseSingleLinePropertyGetter" : true,
69 | "UseSynthesizedInitializer" : true,
70 | "UseTripleSlashForDocumentationComments" : true,
71 | "UseWhereClausesInForLoops" : false,
72 | "ValidateDocumentationComments" : false
73 | },
74 | "spacesAroundRangeFormationOperators" : false,
75 | "spacesBeforeEndOfLineComments" : 2,
76 | "tabWidth" : 4,
77 | "version" : 1
78 | }
79 |
--------------------------------------------------------------------------------
/.swiftpm/xcode/xcshareddata/xcschemes/Flexer.xcscheme:
--------------------------------------------------------------------------------
1 |
2 |
5 |
8 |
9 |
15 |
21 |
22 |
23 |
29 |
35 |
36 |
37 |
38 |
39 |
46 |
47 |
53 |
54 |
55 |
56 |
58 |
64 |
65 |
66 |
67 |
68 |
78 |
79 |
85 |
86 |
92 |
93 |
94 |
95 |
97 |
98 |
101 |
102 |
103 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 |
2 | # Contributor Covenant Code of Conduct
3 |
4 | ## Our Pledge
5 |
6 | We as members, contributors, and leaders pledge to make participation in our
7 | community a harassment-free experience for everyone, regardless of age, body
8 | size, visible or invisible disability, ethnicity, sex characteristics, gender
9 | identity and expression, level of experience, education, socio-economic status,
10 | nationality, personal appearance, race, caste, color, religion, or sexual
11 | identity and orientation.
12 |
13 | We pledge to act and interact in ways that contribute to an open, welcoming,
14 | diverse, inclusive, and healthy community.
15 |
16 | ## Our Standards
17 |
18 | Examples of behavior that contributes to a positive environment for our
19 | community include:
20 |
21 | * Demonstrating empathy and kindness toward other people
22 | * Being respectful of differing opinions, viewpoints, and experiences
23 | * Giving and gracefully accepting constructive feedback
24 | * Accepting responsibility and apologizing to those affected by our mistakes,
25 | and learning from the experience
26 | * Focusing on what is best not just for us as individuals, but for the overall
27 | community
28 |
29 | Examples of unacceptable behavior include:
30 |
31 | * The use of sexualized language or imagery, and sexual attention or advances of
32 | any kind
33 | * Trolling, insulting or derogatory comments, and personal or political attacks
34 | * Public or private harassment
35 | * Publishing others' private information, such as a physical or email address,
36 | without their explicit permission
37 | * Other conduct which could reasonably be considered inappropriate in a
38 | professional setting
39 |
40 | ## Enforcement Responsibilities
41 |
42 | Community leaders are responsible for clarifying and enforcing our standards of
43 | acceptable behavior and will take appropriate and fair corrective action in
44 | response to any behavior that they deem inappropriate, threatening, offensive,
45 | or harmful.
46 |
47 | Community leaders have the right and responsibility to remove, edit, or reject
48 | comments, commits, code, wiki edits, issues, and other contributions that are
49 | not aligned to this Code of Conduct, and will communicate reasons for moderation
50 | decisions when appropriate.
51 |
52 | ## Scope
53 |
54 | This Code of Conduct applies within all community spaces, and also applies when
55 | an individual is officially representing the community in public spaces.
56 | Examples of representing our community include using an official e-mail address,
57 | posting via an official social media account, or acting as an appointed
58 | representative at an online or offline event.
59 |
60 | ## Enforcement
61 |
62 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
63 | reported to the community leaders responsible for enforcement at
64 | support@chimehq.com.
65 | All complaints will be reviewed and investigated promptly and fairly.
66 |
67 | All community leaders are obligated to respect the privacy and security of the
68 | reporter of any incident.
69 |
70 | ## Enforcement Guidelines
71 |
72 | Community leaders will follow these Community Impact Guidelines in determining
73 | the consequences for any action they deem in violation of this Code of Conduct:
74 |
75 | ### 1. Correction
76 |
77 | **Community Impact**: Use of inappropriate language or other behavior deemed
78 | unprofessional or unwelcome in the community.
79 |
80 | **Consequence**: A private, written warning from community leaders, providing
81 | clarity around the nature of the violation and an explanation of why the
82 | behavior was inappropriate. A public apology may be requested.
83 |
84 | ### 2. Warning
85 |
86 | **Community Impact**: A violation through a single incident or series of
87 | actions.
88 |
89 | **Consequence**: A warning with consequences for continued behavior. No
90 | interaction with the people involved, including unsolicited interaction with
91 | those enforcing the Code of Conduct, for a specified period of time. This
92 | includes avoiding interactions in community spaces as well as external channels
93 | like social media. Violating these terms may lead to a temporary or permanent
94 | ban.
95 |
96 | ### 3. Temporary Ban
97 |
98 | **Community Impact**: A serious violation of community standards, including
99 | sustained inappropriate behavior.
100 |
101 | **Consequence**: A temporary ban from any sort of interaction or public
102 | communication with the community for a specified period of time. No public or
103 | private interaction with the people involved, including unsolicited interaction
104 | with those enforcing the Code of Conduct, is allowed during this period.
105 | Violating these terms may lead to a permanent ban.
106 |
107 | ### 4. Permanent Ban
108 |
109 | **Community Impact**: Demonstrating a pattern of violation of community
110 | standards, including sustained inappropriate behavior, harassment of an
111 | individual, or aggression toward or disparagement of classes of individuals.
112 |
113 | **Consequence**: A permanent ban from any sort of public interaction within the
114 | community.
115 |
116 | ## Attribution
117 |
118 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
119 | version 2.1, available at
120 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
121 |
122 | Community Impact Guidelines were inspired by
123 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
124 |
125 | For answers to common questions about this code of conduct, see the FAQ at
126 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
127 | [https://www.contributor-covenant.org/translations][translations].
128 |
129 | [homepage]: https://www.contributor-covenant.org
130 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
131 | [Mozilla CoC]: https://github.com/mozilla/diversity
132 | [FAQ]: https://www.contributor-covenant.org/faq
133 | [translations]: https://www.contributor-covenant.org/translations
134 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2019, Chime Systems Inc.
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | * Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | * Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/Package.swift:
--------------------------------------------------------------------------------
1 | // swift-tools-version: 5.9
2 |
3 | import PackageDescription
4 |
5 | let package = Package(
6 | name: "Flexer",
7 | platforms: [
8 | .macOS(.v10_15),
9 | .iOS(.v13),
10 | .tvOS(.v13),
11 | .watchOS(.v6),
12 | .macCatalyst(.v13)
13 | ],
14 | products: [
15 | .library(name: "Flexer", targets: ["Flexer"]),
16 | ],
17 | dependencies: [],
18 | targets: [
19 | .target(name: "Flexer", dependencies: []),
20 | .testTarget(name: "FlexerTests", dependencies: ["Flexer"]),
21 | ]
22 | )
23 |
24 | let swiftSettings: [SwiftSetting] = [
25 | .enableExperimentalFeature("StrictConcurrency")
26 | ]
27 |
28 | for target in package.targets {
29 | var settings = target.swiftSettings ?? []
30 | settings.append(contentsOf: swiftSettings)
31 | target.swiftSettings = settings
32 | }
33 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | [![Build Status][build status badge]][build status]
4 | [![Platforms][platforms badge]][platforms]
5 | [![Matrix][matrix badge]][matrix]
6 |
7 |
8 |
9 | # Flexer
10 |
11 | Flexer is a small library for building lexers in Swift. It is compatible with all Apple platforms.
12 |
13 | - API tailored for hand-written parsing
14 | - Fully Swift `String`-compatible
15 | - Based around `Sequence` and `IteratorProtocol` procotols
16 |
17 | It turns out that Swift's `Sequence` and `Iterator` concepts work pretty well for processing tokens. They make for a familiar API that also offers a surprising amount of power. Flexer builds on these concepts with some new protocols that are made specifically for lexing, but are generally applicable to all `Sequence` types.
18 |
19 | ## Integration
20 |
21 | ```swift
22 | dependencies: [
23 | .package(url: "https://github.com/ChimeHQ/Flexer")
24 | ]
25 | ```
26 |
27 | ## Look-Ahead
28 |
29 | Core to lexing is the ability to look ahead at future tokens without advancing. Flexer implements look-ahead with a protocol called `LookAheadIteratorProtocol`. The whole implementation is inspired by the `lazy` property of `Sequence`, and works very similarly.
30 |
31 | ```swift
32 | let lookAheadSequence = anySequence.lookAhead
33 |
34 | let next = lookAheadSequence.peek()
35 | ```
36 |
37 | The main work of building your lexer is then defining a Sequence type of tokens. All of the lexing facilities you might need can then be exposed with a `typealias`.
38 |
39 | ```swift
40 | typealias MyLexer = LookAheadSequence
41 |
42 | let tokenSequence = MyLexer(string: myString)
43 |
44 | let nextToken = lexer.next()
45 | let futureToken = lexer.peek()
46 | let tabToken = lexer.nextUntil({ $0.kind == .tab })
47 | ```
48 |
49 | ## Token Sequences
50 |
51 | Your custom token sequence can be built by creating a struct that conforms to `Sequence`. To make this easier, Flexer includes a type that can be used as a foundation for creating more complex token streams, called `BasicTextCharacterSequence`. It is a sequence of `BasicTextCharacter` elements. It breaks up a string into commonly-needed tokens, catagorized by kind and range within the source string. This approach uses the `Token` type, which stores a kind and a range within the source string.
52 |
53 | It is usually much easier to build up more complex lexing functionality with the convenience of Swift switch pattern matching, instead of having to worry about the underlying characters and ranges themselves. You can do this by wrapping up a `BasicTextCharacterSequence` in your own custom sequence.
54 |
55 | Here's a fully-functioning example that produces four different token types. It shows off some of the scanning and look-ahead facilities that can be handy both for constructing and also using your lexer.
56 |
57 | ```swift
58 | enum ExampleTokenKind {
59 | case word
60 | case number
61 | case symbol
62 | case whitespace
63 | }
64 |
65 | typealias ExampleToken = Flexer.Token
66 |
67 | struct ExampleTokenSequence: Sequence, IteratorProtocol, StringInitializable {
68 | public typealias Element = ExampleToken
69 |
70 | private var lexer: BasicTextCharacterLexer
71 |
72 | public init(string: String) {
73 | self.lexer = BasicTextCharacterLexer(string: string)
74 | }
75 |
76 | public mutating func next() -> Element? {
77 | guard let token = lexer.peek() else {
78 | return nil
79 | }
80 |
81 | switch token.kind {
82 | case .lowercaseLetter, .uppercaseLetter, .underscore:
83 | guard let endingToken = lexer.nextUntil(notIn: [.lowercaseLetter, .uppercaseLetter, .underscore, .digit]) else {
84 | return nil
85 | }
86 |
87 | return ExampleToken(kind: .word, range: token.startIndex..
113 | ```
114 |
115 | ## Contributing and Collaboration
116 |
117 | I would love to hear from you! Issues or pull requests work great. Both a [Matrix space][matrix] and [Discord][discord] are available for live help, but I have a strong bias towards answering in the form of documentation. You can also find me on [mastodon](https://mastodon.social/@mattiem).
118 |
119 | I prefer collaboration, and would love to find ways to work together if you have a similar project.
120 |
121 | I prefer indentation with tabs for improved accessibility. But, I'd rather you use the system you want and make a PR than hesitate because of whitespace.
122 |
123 | By participating in this project you agree to abide by the [Contributor Code of Conduct](CODE_OF_CONDUCT.md).
124 |
125 | [build status]: https://github.com/ChimeHQ/Flexer/actions
126 | [build status badge]: https://github.com/ChimeHQ/Flexer/workflows/CI/badge.svg
127 | [platforms]: https://swiftpackageindex.com/ChimeHQ/Flexer
128 | [platforms badge]: https://img.shields.io/endpoint?url=https%3A%2F%2Fswiftpackageindex.com%2Fapi%2Fpackages%2FChimeHQ%2FFlexer%2Fbadge%3Ftype%3Dplatforms
129 | [matrix]: https://matrix.to/#/%23chimehq%3Amatrix.org
130 | [matrix badge]: https://img.shields.io/matrix/chimehq%3Amatrix.org?label=Matrix
131 | [discord]: https://discord.gg/esFpX6sErJ
132 |
--------------------------------------------------------------------------------
/Sources/Flexer/BasicTextCharacterLexer.swift:
--------------------------------------------------------------------------------
1 | import Foundation
2 |
3 | public protocol StringInitializable {
4 | var string: String { get }
5 |
6 | init(string: String)
7 | }
8 |
9 | public extension StringInitializable {
10 | func substring(with range: Range) -> Substring {
11 | return string[range]
12 | }
13 | }
14 |
15 | public enum BasicTextCharacterKind: Hashable, Sendable {
16 | case tab
17 | case space
18 | case newline
19 |
20 | case lowercaseLetter
21 | case uppercaseLetter
22 | case digit
23 | case otherCharacter
24 |
25 | case singleQuote
26 | case doubleQuote
27 | case backtick
28 |
29 | case openBrace
30 | case closeBrace
31 | case openBracket
32 | case closeBracket
33 | case openParen
34 | case closeParen
35 | case lessThan
36 | case greaterThan
37 |
38 | case tilde
39 | case exclamation
40 | case question
41 | case at
42 | case percent
43 | case caret
44 | case ampersand
45 | case dollar
46 | case star
47 | case slash
48 | case numberSign
49 | case pipe
50 | case backslash
51 | case dash
52 | case plus
53 | case equals
54 |
55 | case period
56 | case comma
57 | case colon
58 | case semicolon
59 | case underscore
60 |
61 | @available(*, deprecated, message: "Please use numberSign")
62 | public static let pound = BasicTextCharacterKind.numberSign
63 | }
64 |
65 | public typealias BasicTextCharacter = Token
66 |
67 | struct CharacterRangePairIterator: IteratorProtocol {
68 | struct CharacterRangePair {
69 | public var character: Character
70 | public var range: Range
71 | }
72 |
73 | let string: String
74 | var currentIndex: String.Index
75 |
76 | init(string: String) {
77 | self.string = string
78 | self.currentIndex = string.startIndex
79 | }
80 |
81 | mutating func next() -> CharacterRangePair? {
82 | if currentIndex >= string.endIndex {
83 | return nil
84 | }
85 |
86 | let idx = currentIndex
87 |
88 | currentIndex = string.index(after: currentIndex)
89 |
90 | let char = string[idx]
91 | let range = idx.. Element? {
115 | guard let pair = characterIterator.next() else {
116 | return nil
117 | }
118 |
119 | let range = pair.range
120 | let char = pair.character
121 |
122 | switch char {
123 | case "\t": return BasicTextCharacter(kind: .tab, range: range)
124 | case " ": return BasicTextCharacter(kind: .space, range: range)
125 | case "'": return BasicTextCharacter(kind: .singleQuote, range: range)
126 | case "\"": return BasicTextCharacter(kind: .doubleQuote, range: range)
127 | case "`": return BasicTextCharacter(kind: .backtick, range: range)
128 | case "{": return BasicTextCharacter(kind: .openBrace, range: range)
129 | case "}": return BasicTextCharacter(kind: .closeBrace, range: range)
130 | case "[": return BasicTextCharacter(kind: .openBracket, range: range)
131 | case "]": return BasicTextCharacter(kind: .closeBracket, range: range)
132 | case "(": return BasicTextCharacter(kind: .openParen, range: range)
133 | case ")": return BasicTextCharacter(kind: .closeParen, range: range)
134 | case "<": return BasicTextCharacter(kind: .lessThan, range: range)
135 | case ">": return BasicTextCharacter(kind: .greaterThan, range: range)
136 | case "~": return BasicTextCharacter(kind: .tilde, range: range)
137 | case "!": return BasicTextCharacter(kind: .exclamation, range: range)
138 | case "?": return BasicTextCharacter(kind: .question, range: range)
139 | case "@": return BasicTextCharacter(kind: .at, range: range)
140 | case "%": return BasicTextCharacter(kind: .percent, range: range)
141 | case "^": return BasicTextCharacter(kind: .caret, range: range)
142 | case "&": return BasicTextCharacter(kind: .ampersand, range: range)
143 | case "$": return BasicTextCharacter(kind: .dollar, range: range)
144 | case "*": return BasicTextCharacter(kind: .star, range: range)
145 | case "/": return BasicTextCharacter(kind: .slash, range: range)
146 | case "#": return BasicTextCharacter(kind: .numberSign, range: range)
147 | case "|": return BasicTextCharacter(kind: .pipe, range: range)
148 | case "\\": return BasicTextCharacter(kind: .backslash, range: range)
149 | case "-": return BasicTextCharacter(kind: .dash, range: range)
150 | case "+": return BasicTextCharacter(kind: .plus, range: range)
151 | case "=": return BasicTextCharacter(kind: .equals, range: range)
152 | case ".": return BasicTextCharacter(kind: .period, range: range)
153 | case ",": return BasicTextCharacter(kind: .comma, range: range)
154 | case "_": return BasicTextCharacter(kind: .underscore, range: range)
155 | case ";": return BasicTextCharacter(kind: .semicolon, range: range)
156 | case ":": return BasicTextCharacter(kind: .colon, range: range)
157 | default:
158 | break
159 | }
160 |
161 | if newlineSet.contains(char) {
162 | return BasicTextCharacter(kind: .newline, range: range)
163 | }
164 |
165 | if digitSet.contains(char) {
166 | return BasicTextCharacter(kind: .digit, range: range)
167 | }
168 |
169 | if lowercaseSet.contains(char) {
170 | return BasicTextCharacter(kind: .lowercaseLetter, range: range)
171 | }
172 |
173 | if uppercaseSet.contains(char) {
174 | return BasicTextCharacter(kind: .uppercaseLetter, range: range)
175 | }
176 |
177 | return BasicTextCharacter(kind: .otherCharacter, range: range)
178 | }
179 | }
180 |
181 | public typealias BasicTextCharacterLexer = LookAheadSequence
182 |
--------------------------------------------------------------------------------
/Sources/Flexer/CharacterSet+Character.swift:
--------------------------------------------------------------------------------
1 | //
2 | // CharacterSet+Character.swift
3 | // Flexer
4 | //
5 | // Created by Matt Massicotte on 2020-04-24.
6 | // Copyright © 2020 Chime Systems Inc. All rights reserved.
7 | //
8 |
9 | import Foundation
10 |
11 | public extension CharacterSet {
12 | func contains(_ character: Character) -> Bool {
13 | return character.unicodeScalars.allSatisfy({ self.contains($0) })
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/Sources/Flexer/LookAheadIteratorProtocol.swift:
--------------------------------------------------------------------------------
1 | //
2 | // LookAheadIteratorProtocol.swift
3 | // Flexer
4 | //
5 | // Created by Matt Massicotte on 2020-04-25.
6 | // Copyright © 2020 Chime Systems Inc. All rights reserved.
7 | //
8 |
9 | import Foundation
10 |
11 | public protocol LookAheadIteratorProtocol: IteratorProtocol {
12 | mutating func peek(distance: Int) -> Element?
13 | }
14 |
15 | public extension LookAheadIteratorProtocol {
16 | mutating func peek() -> Element? {
17 | return peek(distance: 1)
18 | }
19 |
20 | mutating func peekUntil(_ predicate: (Element) -> Bool, limit: Int = Int.max) -> Bool {
21 | for _ in 0 ..< limit {
22 | switch peek() {
23 | case .none:
24 | return false
25 | case let elem?:
26 | if predicate(elem) {
27 | return true
28 | }
29 | }
30 | }
31 |
32 | return false
33 | }
34 | }
35 |
36 | public extension LookAheadIteratorProtocol {
37 | mutating func nextIf(_ predicate: (Element) throws -> Bool) rethrows -> Element? {
38 | guard let t = peek() else {
39 | return nil
40 | }
41 |
42 | if try predicate(t) {
43 | return next()
44 | }
45 |
46 | return nil
47 | }
48 |
49 | /// Skips the next token until predicate is true
50 | @discardableResult
51 | mutating func skipIf(_ predicate: (Element) throws -> Bool) rethrows -> Bool {
52 | return try nextIf(predicate) != nil
53 | }
54 |
55 | mutating func nextUntil(_ predicate: (Element) throws -> Bool, limit: Int = Int.max) rethrows -> Element? {
56 | var last: Element?
57 |
58 | for _ in 0 ..< limit {
59 | guard let elem = peek() else {
60 | return last
61 | }
62 |
63 | if try predicate(elem) {
64 | // on the first iteration, last will always be nil
65 | return last
66 | }
67 |
68 | last = elem
69 |
70 | _ = next()
71 | }
72 |
73 | return last
74 | }
75 |
76 | /// Skips tokens until predicate is true
77 | @discardableResult
78 | mutating func skipUntil(_ predicate: (Element) throws -> Bool, limit _: Int = Int.max) rethrows -> Bool {
79 | return try nextUntil(predicate) != nil
80 | }
81 | }
82 |
--------------------------------------------------------------------------------
/Sources/Flexer/LookAheadSequence.swift:
--------------------------------------------------------------------------------
1 | //
2 | // BufferingSequence.swift
3 | // Flexer
4 | //
5 | // Created by Matt Massicotte on 2020-04-24.
6 | // Copyright © 2020 Chime Systems Inc. All rights reserved.
7 | //
8 |
9 | import Foundation
10 |
11 | public struct LookAheadSequence: Sequence, LookAheadIteratorProtocol where Base : Sequence {
12 | public typealias Element = Base.Element
13 |
14 | let baseSequence: Base
15 | private var buffer: [Base.Element]
16 | private var iterator: Base.Iterator
17 |
18 | public init(_ baseSequence: Base) {
19 | self.baseSequence = baseSequence
20 | self.iterator = baseSequence.makeIterator()
21 | self.buffer = []
22 | }
23 |
24 | public mutating func next() -> Element? {
25 | if buffer.isEmpty {
26 | return iterator.next()
27 | }
28 |
29 | return buffer.removeFirst()
30 | }
31 |
32 | public mutating func peek(distance: Int = 1) -> Element? {
33 | if distance == 0 {
34 | return buffer.first
35 | }
36 |
37 | let delta = distance - buffer.count
38 | let index = distance - 1
39 |
40 | // fill buffer as needed
41 | if delta > 0 {
42 | for _ in 0..= buffer.endIndex {
50 | return nil
51 | }
52 |
53 | return buffer[index]
54 | }
55 | }
56 |
57 | public extension Sequence {
58 | var lookAhead: LookAheadSequence {
59 | return LookAheadSequence(self)
60 | }
61 | }
62 |
63 | extension LookAheadSequence: StringInitializable where Base: StringInitializable {
64 | public init(string: String) {
65 | self.init(Base(string: string))
66 | }
67 |
68 | public var string: String {
69 | return baseSequence.string
70 | }
71 | }
72 |
73 | public extension LookAheadSequence where Base: StringInitializable, Base.Element: TokenProtocol, Base.Element.Index == String.Index {
74 | func substring(for token: Base.Element) -> Substring {
75 | return substring(with: token.range)
76 | }
77 | }
78 |
--------------------------------------------------------------------------------
/Sources/Flexer/LookAheadSequenceReference.swift:
--------------------------------------------------------------------------------
1 | import Foundation
2 |
3 | /// LookAheadSequence with reference semantics
4 | ///
5 | /// This is a class wrapper around LookAheadSequence. Useful if you need to pass
6 | /// around and operate on a single shared instance.
7 | public class LookAheadSequenceReference: Sequence, LookAheadIteratorProtocol where Base : Sequence {
8 | public typealias Element = Base.Element
9 | var internalSequence: LookAheadSequence
10 |
11 | public init(_ sequence: LookAheadSequence) {
12 | self.internalSequence = sequence
13 | }
14 |
15 | public func next() -> Element? {
16 | return internalSequence.next()
17 | }
18 |
19 | public func peek(distance: Int = 1) -> Element? {
20 | return internalSequence.peek(distance: distance)
21 | }
22 | }
23 |
24 | // This junk is needed to get around the "mutating" modifier
25 | public extension LookAheadSequenceReference {
26 | func peekUntil(_ predicate: (Element) -> Bool, limit: Int = Int.max) -> Bool {
27 | return internalSequence.peekUntil(predicate)
28 | }
29 |
30 | func nextIf(_ predicate: (Element) throws -> Bool) rethrows -> Element? {
31 | return try internalSequence.nextIf(predicate)
32 | }
33 |
34 | func skipIf(_ predicate: (Element) throws -> Bool) rethrows -> Bool {
35 | return try internalSequence.skipIf(predicate)
36 | }
37 |
38 | func nextUntil(_ predicate: (Element) throws -> Bool, limit: Int = Int.max) rethrows -> Element? {
39 | return try internalSequence.nextUntil(predicate, limit: limit)
40 | }
41 |
42 | func skipUntil(_ predicate: (Element) throws -> Bool, limit: Int = Int.max) rethrows -> Bool {
43 | return try internalSequence.skipUntil(predicate, limit: limit)
44 | }
45 | }
46 |
47 | extension LookAheadSequenceReference where Base : StringInitializable {
48 | public var string: String {
49 | return internalSequence.string
50 | }
51 | }
52 |
53 | public extension LookAheadSequenceReference where Base: StringInitializable, Base.Element: TokenProtocol, Base.Element.Index == String.Index {
54 | func substring(for token: Base.Element) -> Substring {
55 | return internalSequence.substring(for: token)
56 | }
57 | }
58 |
59 | public extension LookAheadSequence {
60 | /// LookAheadSequence wapper with reference semantics
61 | var reference: LookAheadSequenceReference {
62 | return LookAheadSequenceReference(self)
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/Sources/Flexer/TokenProtocol.swift:
--------------------------------------------------------------------------------
1 | //
2 | // TokenProtocol.swift
3 | // Flexer
4 | //
5 | // Created by Matt Massicotte on 2020-04-27.
6 | // Copyright © 2020 Chime Systems Inc. All rights reserved.
7 | //
8 |
9 | import Foundation
10 |
11 | public protocol TokenProtocol: Comparable, CustomStringConvertible {
12 | associatedtype Kind: Hashable
13 | associatedtype Index: Comparable
14 |
15 | var range: Range { get }
16 | var kind: Kind { get }
17 |
18 | init(kind: Kind, range: Range)
19 | }
20 |
21 | extension TokenProtocol where Index == String.Index {
22 | public init?(kind: Kind, range: NSRange, in string: String) {
23 | guard let stringRange = Range(range, in: string) else {
24 | return nil
25 | }
26 |
27 | self.init(kind: kind, range: stringRange)
28 | }
29 |
30 | public init?(kind: Kind, start: BasicTextCharacter, end: BasicTextCharacter?) {
31 | guard let end = end else { return nil }
32 |
33 | self.init(kind: kind, range: start.startIndex.. NSRange {
37 | return NSRange(range, in: string)
38 | }
39 | }
40 |
41 | extension TokenProtocol {
42 | public var startIndex: Index {
43 | return range.lowerBound
44 | }
45 |
46 | public var endIndex: Index {
47 | return range.upperBound
48 | }
49 | }
50 |
51 | extension TokenProtocol {
52 | public static func < (lhs: Self, rhs: Self) -> Bool {
53 | return lhs.startIndex < rhs.startIndex
54 | }
55 | }
56 |
57 | extension TokenProtocol {
58 | public var description: String {
59 | return "<\(type(of: self)) \(String(describing: kind)) \(String(describing: range))>"
60 | }
61 | }
62 |
63 | extension LookAheadSequence where Base.Element: TokenProtocol {
64 | public mutating func nextUntil(notIn set: Set) -> Base.Element? {
65 | return nextUntil({ set.contains($0.kind) == false })
66 | }
67 |
68 | public mutating func nextUntil(in set: Set) -> Base.Element? {
69 | return nextUntil({ set.contains($0.kind) })
70 | }
71 | }
72 |
73 | extension LookAheadSequence where Base.Element: TokenProtocol {
74 | public var tokens: Base {
75 | return baseSequence
76 | }
77 | }
78 |
79 | public struct Token: TokenProtocol {
80 | public typealias Kind = TokenKind
81 | public typealias Index = String.Index
82 |
83 | public var range: Range
84 | public var kind: Kind
85 |
86 | public init(kind: TokenKind, range: Range) {
87 | self.kind = kind
88 | self.range = range
89 | }
90 | }
91 |
--------------------------------------------------------------------------------
/Tests/FlexerTests/BasicTextCharacterLexerTests.swift:
--------------------------------------------------------------------------------
1 | import XCTest
2 |
3 | import Flexer
4 |
5 | final class BasicTextCharacterLexerTests: XCTestCase {
6 | func testPeekTwiceReturnsSameValue() {
7 | let string = "\t"
8 | var lexer = BasicTextCharacterLexer(string: string)
9 |
10 | let token = BasicTextCharacter(kind: .tab, range: NSRange(0 ..< 1), in: string)
11 |
12 | XCTAssertEqual(lexer.peek(), token)
13 | XCTAssertEqual(lexer.peek(), token)
14 | }
15 |
16 | func testNextReturnsDifferentValue() {
17 | let string = "\t"
18 | var lexer = BasicTextCharacterLexer(string: string)
19 |
20 | XCTAssertNotNil(lexer.peek())
21 | XCTAssertEqual(lexer.next(), BasicTextCharacter(kind: .tab, range: NSRange(0 ..< 1), in: string))
22 | XCTAssertNil(lexer.peek())
23 | }
24 |
25 | func testPeekFurtherAndThenCloser() {
26 | let string = "a1"
27 | var lexer = BasicTextCharacterLexer(string: string)
28 |
29 | XCTAssertEqual(lexer.peek(distance: 2), BasicTextCharacter(kind: .digit, range: NSRange(1 ..< 2), in: string))
30 | XCTAssertEqual(lexer.peek(distance: 1), BasicTextCharacter(kind: .lowercaseLetter, range: NSRange(0 ..< 1), in: string))
31 | XCTAssertEqual(lexer.peek(distance: 2), BasicTextCharacter(kind: .digit, range: NSRange(1 ..< 2), in: string))
32 | }
33 |
34 | func testNewlineSequences() {
35 | let string = "\n \r \r\n"
36 | var lexer = BasicTextCharacterLexer(string: string)
37 |
38 | XCTAssertEqual(lexer.next(), BasicTextCharacter(kind: .newline, range: NSRange(0..<1), in: string))
39 | XCTAssertNotNil(lexer.next())
40 | XCTAssertEqual(lexer.next(), BasicTextCharacter(kind: .newline, range: NSRange(2..<3), in: string))
41 | XCTAssertNotNil(lexer.next())
42 | XCTAssertEqual(lexer.next(), BasicTextCharacter(kind: .newline, range: NSRange(4..<6), in: string))
43 | XCTAssertNil(lexer.next())
44 | }
45 | }
46 |
47 | extension BasicTextCharacterLexerTests {
48 | func testPeekNextPerformance() {
49 | var string = ""
50 |
51 | for _ in 0 ..< 10000 {
52 | string += "abc 123 ;[];^^%\n"
53 | }
54 |
55 | measure {
56 | var lexer = BasicTextCharacterLexer(string: string)
57 |
58 | while lexer.peek() != nil {
59 | _ = lexer.next()
60 | }
61 | }
62 | }
63 | }
64 |
65 | extension BasicTextCharacterLexerTests {
66 | func testSingleLowercaseCharacter() {
67 | let string = "a"
68 | var lexer = BasicTextCharacterLexer(string: string)
69 |
70 | XCTAssertEqual(lexer.next(), BasicTextCharacter(kind: .lowercaseLetter, range: NSRange(0 ..< 1), in: string))
71 | XCTAssertNil(lexer.next())
72 | }
73 |
74 | func testMultiCharacterUppercaseRun() {
75 | let string = "ABC"
76 | var lexer = BasicTextCharacterLexer(string: string)
77 |
78 | XCTAssertEqual(lexer.next(), BasicTextCharacter(kind: .uppercaseLetter, range: NSRange(0 ..< 1), in: string))
79 | XCTAssertEqual(lexer.next(), BasicTextCharacter(kind: .uppercaseLetter, range: NSRange(1 ..< 2), in: string))
80 | XCTAssertEqual(lexer.next(), BasicTextCharacter(kind: .uppercaseLetter, range: NSRange(2 ..< 3), in: string))
81 | XCTAssertNil(lexer.next())
82 | }
83 |
84 | func testDigitRun() {
85 | let string = "123"
86 | var lexer = BasicTextCharacterLexer(string: string)
87 |
88 | XCTAssertEqual(lexer.next(), BasicTextCharacter(kind: .digit, range: NSRange(0 ..< 1), in: string))
89 | XCTAssertEqual(lexer.next(), BasicTextCharacter(kind: .digit, range: NSRange(1 ..< 2), in: string))
90 | XCTAssertEqual(lexer.next(), BasicTextCharacter(kind: .digit, range: NSRange(2 ..< 3), in: string))
91 | XCTAssertNil(lexer.next())
92 | }
93 |
94 | func testNextUntilWithNoMatch() {
95 | let string = " ab "
96 | var lexer = BasicTextCharacterLexer(string: string)
97 |
98 | XCTAssertEqual(lexer.next(), BasicTextCharacter(kind: .space, range: NSRange(0 ..< 1), in: string))
99 | XCTAssertEqual(lexer.next(), BasicTextCharacter(kind: .lowercaseLetter, range: NSRange(1 ..< 2), in: string))
100 |
101 | // The lexer is spooled to the "a" position.
102 | let bToken = lexer.nextUntil(notIn: [.lowercaseLetter])
103 | // So the return token must be the last one matching e.g. "b".
104 | XCTAssertEqual(bToken, BasicTextCharacter(kind: .lowercaseLetter, range: NSRange(2 ..< 3), in: string))
105 |
106 | // The lexer is already spooled to the "b" position.
107 | // next() would be " ".
108 | let token = lexer.nextUntil(notIn: [.lowercaseLetter])
109 |
110 | // But our stop condition is notIn: [.lowercaseLetter].
111 | // So we need to immediately stop.
112 | XCTAssertNil(token)
113 | }
114 | }
115 |
--------------------------------------------------------------------------------
/Tests/FlexerTests/ExampleLexerTests.swift:
--------------------------------------------------------------------------------
1 | //
2 | // ExampleLexerTests.swift
3 | // FlexerTests
4 | //
5 | // Created by Matt Massicotte on 2020-04-27.
6 | // Copyright © 2020 Chime Systems Inc. All rights reserved.
7 | //
8 |
9 | import XCTest
10 | @testable import Flexer
11 |
12 | enum ExampleTokenKind {
13 | case word
14 | case number
15 | case symbol
16 | case whitespace
17 | }
18 |
19 | typealias ExampleToken = Flexer.Token
20 |
21 | struct ExampleTokenSequence: Sequence, IteratorProtocol, StringInitializable {
22 | public typealias Element = ExampleToken
23 |
24 | private var lexer: BasicTextCharacterLexer
25 |
26 | public init(string: String) {
27 | self.lexer = BasicTextCharacterLexer(string: string)
28 | }
29 |
30 | public var string: String {
31 | return lexer.string
32 | }
33 |
34 | public mutating func next() -> Element? {
35 | guard let token = lexer.peek() else {
36 | return nil
37 | }
38 |
39 | switch token.kind {
40 | case .lowercaseLetter, .uppercaseLetter, .underscore:
41 | guard let endingToken = lexer.nextUntil(notIn: [.lowercaseLetter, .uppercaseLetter, .underscore, .digit]) else {
42 | return nil
43 | }
44 |
45 | return ExampleToken(kind: .word, range: token.startIndex..
71 |
72 | class ExampleLexerTests: XCTestCase {
73 | func testTokens() {
74 | let string = "abc d_eF\t\t\tGhi123 JKL 123 **&\nz"
75 | var lexer = ExampleTokenLexer(string: string)
76 |
77 | XCTAssertEqual(lexer.next(), ExampleToken(kind: .word, range: NSRange(0..<3), in: string))
78 | XCTAssertEqual(lexer.next(), ExampleToken(kind: .whitespace, range: NSRange(3..<4), in: string))
79 | XCTAssertEqual(lexer.next(), ExampleToken(kind: .word, range: NSRange(4..<8), in: string))
80 | XCTAssertEqual(lexer.next(), ExampleToken(kind: .whitespace, range: NSRange(8..<11), in: string))
81 | XCTAssertEqual(lexer.next(), ExampleToken(kind: .word, range: NSRange(11..<17), in: string))
82 | XCTAssertEqual(lexer.next(), ExampleToken(kind: .whitespace, range: NSRange(17..<18), in: string))
83 | XCTAssertEqual(lexer.next(), ExampleToken(kind: .word, range: NSRange(18..<21), in: string))
84 | XCTAssertEqual(lexer.next(), ExampleToken(kind: .whitespace, range: NSRange(21..<22), in: string))
85 | XCTAssertEqual(lexer.next(), ExampleToken(kind: .number, range: NSRange(22..<25), in: string))
86 | XCTAssertEqual(lexer.next(), ExampleToken(kind: .whitespace, range: NSRange(25..<26), in: string))
87 | XCTAssertEqual(lexer.next(), ExampleToken(kind: .symbol, range: NSRange(26..<29), in: string))
88 | XCTAssertEqual(lexer.next(), ExampleToken(kind: .whitespace, range: NSRange(29..<30), in: string))
89 | XCTAssertEqual(lexer.next(), ExampleToken(kind: .word, range: NSRange(30..<31), in: string))
90 | XCTAssertNil(lexer.next())
91 | }
92 | }
93 |
94 |
--------------------------------------------------------------------------------
/Tests/FlexerTests/LookAheadSequenceReferenceTests.swift:
--------------------------------------------------------------------------------
1 | import XCTest
2 | @testable import Flexer
3 |
4 | final class LookAheadSequenceReferenceTests: XCTestCase {
5 | func testMutatingMethods() {
6 | let string = "a1"
7 | let lexer = BasicTextCharacterLexer(string: string).reference
8 |
9 | XCTAssertNotNil(lexer.next())
10 | XCTAssertNotNil(lexer.next())
11 | XCTAssertNil(lexer.next())
12 | }
13 |
14 | func testGetSubstsring() throws {
15 | let string = "a1"
16 | let lexer = BasicTextCharacterLexer(string: string).reference
17 | let token = try XCTUnwrap(lexer.peek())
18 |
19 | XCTAssertEqual(lexer.substring(for: token), "a")
20 | }
21 | }
22 |
--------------------------------------------------------------------------------