├── .gitignore
├── .travis.yml
├── LICENSE.md
├── Package.swift
├── README.md
├── Sources
└── Regex
│ ├── Automaton.swift
│ ├── Compiler
│ └── Compiler.swift
│ ├── Lexer
│ ├── Lexer.swift
│ └── Token.swift
│ ├── Optimizer
│ └── Optimizer.swift
│ ├── Parser
│ ├── Parser.swift
│ └── Symbol.swift
│ ├── Regex.swift
│ ├── RegexError.swift
│ └── RegexMatch.swift
└── Tests
├── LinuxMain.swift
└── RegexTests
├── CompilerTests.swift
├── LexerTests.swift
├── OptimizerTests.swift
├── ParserTests.swift
└── RegexTests.swift
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | /.build
3 | /Packages
4 | /*.xcodeproj
5 | Package.resolved
6 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | os:
2 | - osx
3 | - linux
4 |
5 | language: generic
6 |
7 | sudo: required
8 | dist: trusty
9 |
10 | osx_image: xcode9.3
11 |
12 | script:
13 | - eval "$(curl -sL https://swift.skrundz.ca/ci)"
14 | - eval "$(curl -sL https://swift.skrundz.ca/codecov)"
15 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | GNU LESSER GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 |
9 | This version of the GNU Lesser General Public License incorporates
10 | the terms and conditions of version 3 of the GNU General Public
11 | License, supplemented by the additional permissions listed below.
12 |
13 | 0. Additional Definitions.
14 |
15 | As used herein, "this License" refers to version 3 of the GNU Lesser
16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
17 | General Public License.
18 |
19 | "The Library" refers to a covered work governed by this License,
20 | other than an Application or a Combined Work as defined below.
21 |
22 | An "Application" is any work that makes use of an interface provided
23 | by the Library, but which is not otherwise based on the Library.
24 | Defining a subclass of a class defined by the Library is deemed a mode
25 | of using an interface provided by the Library.
26 |
27 | A "Combined Work" is a work produced by combining or linking an
28 | Application with the Library. The particular version of the Library
29 | with which the Combined Work was made is also called the "Linked
30 | Version".
31 |
32 | The "Minimal Corresponding Source" for a Combined Work means the
33 | Corresponding Source for the Combined Work, excluding any source code
34 | for portions of the Combined Work that, considered in isolation, are
35 | based on the Application, and not on the Linked Version.
36 |
37 | The "Corresponding Application Code" for a Combined Work means the
38 | object code and/or source code for the Application, including any data
39 | and utility programs needed for reproducing the Combined Work from the
40 | Application, but excluding the System Libraries of the Combined Work.
41 |
42 | 1. Exception to Section 3 of the GNU GPL.
43 |
44 | You may convey a covered work under sections 3 and 4 of this License
45 | without being bound by section 3 of the GNU GPL.
46 |
47 | 2. Conveying Modified Versions.
48 |
49 | If you modify a copy of the Library, and, in your modifications, a
50 | facility refers to a function or data to be supplied by an Application
51 | that uses the facility (other than as an argument passed when the
52 | facility is invoked), then you may convey a copy of the modified
53 | version:
54 |
55 | a) under this License, provided that you make a good faith effort to
56 | ensure that, in the event an Application does not supply the
57 | function or data, the facility still operates, and performs
58 | whatever part of its purpose remains meaningful, or
59 |
60 | b) under the GNU GPL, with none of the additional permissions of
61 | this License applicable to that copy.
62 |
63 | 3. Object Code Incorporating Material from Library Header Files.
64 |
65 | The object code form of an Application may incorporate material from
66 | a header file that is part of the Library. You may convey such object
67 | code under terms of your choice, provided that, if the incorporated
68 | material is not limited to numerical parameters, data structure
69 | layouts and accessors, or small macros, inline functions and templates
70 | (ten or fewer lines in length), you do both of the following:
71 |
72 | a) Give prominent notice with each copy of the object code that the
73 | Library is used in it and that the Library and its use are
74 | covered by this License.
75 |
76 | b) Accompany the object code with a copy of the GNU GPL and this license
77 | document.
78 |
79 | 4. Combined Works.
80 |
81 | You may convey a Combined Work under terms of your choice that,
82 | taken together, effectively do not restrict modification of the
83 | portions of the Library contained in the Combined Work and reverse
84 | engineering for debugging such modifications, if you also do each of
85 | the following:
86 |
87 | a) Give prominent notice with each copy of the Combined Work that
88 | the Library is used in it and that the Library and its use are
89 | covered by this License.
90 |
91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license
92 | document.
93 |
94 | c) For a Combined Work that displays copyright notices during
95 | execution, include the copyright notice for the Library among
96 | these notices, as well as a reference directing the user to the
97 | copies of the GNU GPL and this license document.
98 |
99 | d) Do one of the following:
100 |
101 | 0) Convey the Minimal Corresponding Source under the terms of this
102 | License, and the Corresponding Application Code in a form
103 | suitable for, and under terms that permit, the user to
104 | recombine or relink the Application with a modified version of
105 | the Linked Version to produce a modified Combined Work, in the
106 | manner specified by section 6 of the GNU GPL for conveying
107 | Corresponding Source.
108 |
109 | 1) Use a suitable shared library mechanism for linking with the
110 | Library. A suitable mechanism is one that (a) uses at run time
111 | a copy of the Library already present on the user's computer
112 | system, and (b) will operate properly with a modified version
113 | of the Library that is interface-compatible with the Linked
114 | Version.
115 |
116 | e) Provide Installation Information, but only if you would otherwise
117 | be required to provide such information under section 6 of the
118 | GNU GPL, and only to the extent that such information is
119 | necessary to install and execute a modified version of the
120 | Combined Work produced by recombining or relinking the
121 | Application with a modified version of the Linked Version. (If
122 | you use option 4d0, the Installation Information must accompany
123 | the Minimal Corresponding Source and Corresponding Application
124 | Code. If you use option 4d1, you must provide the Installation
125 | Information in the manner specified by section 6 of the GNU GPL
126 | for conveying Corresponding Source.)
127 |
128 | 5. Combined Libraries.
129 |
130 | You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 |
136 | a) Accompany the combined library with a copy of the same work based
137 | on the Library, uncombined with any other library facilities,
138 | conveyed under the terms of this License.
139 |
140 | b) Give prominent notice with the combined library that part of it
141 | is a work based on the Library, and explaining where to find the
142 | accompanying uncombined form of the same work.
143 |
144 | 6. Revised Versions of the GNU Lesser General Public License.
145 |
146 | The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 |
151 | Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 |
161 | If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 |
--------------------------------------------------------------------------------
/Package.swift:
--------------------------------------------------------------------------------
1 | // swift-tools-version:4.1
2 | //
3 | // Package.swift
4 | // Regex
5 | //
6 |
7 | import PackageDescription
8 |
9 | let package = Package(
10 | name: "Regex",
11 | products: [
12 | .library(
13 | name: "Regex",
14 | targets: ["Regex"]),
15 | .library(
16 | name: "sRegex",
17 | type: .static,
18 | targets: ["Regex"]),
19 | .library(
20 | name: "dRegex",
21 | type: .dynamic,
22 | targets: ["Regex"])
23 | ],
24 | dependencies: [
25 | .package(url: "https://github.com/DavidSkrundz/Collections.git",
26 | .upToNextMinor(from: "1.1.0"))
27 | ],
28 | targets: [
29 | .target(
30 | name: "Regex",
31 | dependencies: ["Generator"]),
32 | .testTarget(
33 | name: "RegexTests",
34 | dependencies: ["Regex"])
35 | ]
36 | )
37 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Regex (V2 WIP) [](https://swift.org/download/#releases) [](https://swift.org/download/#releases) [](https://travis-ci.org/DavidSkrundz/Regex) [](https://codebeat.co/projects/github-com-davidskrundz-regex) [](https://codecov.io/gh/DavidSkrundz/Regex)
2 |
3 | A pure Swift implementation of a Regular Expression Engine
4 |
5 | **Trying again with V2 using DFAs instead of NFAs to get grep-like performance**
6 |
7 |
8 | ## Usage
9 |
10 | To avoid compiling overhead it is possible to create a `Regex` instance
11 |
12 | ```Swift
13 | // Compile the expression
14 | let regex = try! Regex(pattern: "[a-zA-Z]+")
15 |
16 | let string = "RegEx is tough, but useful."
17 |
18 | // Search for matches
19 | let words = regex.match(string)
20 |
21 | /*
22 | words = [
23 | RegexMatch(match: "RegEx", groups: []),
24 | RegexMatch(match: "is", groups: []),
25 | RegexMatch(match: "tough", groups: []),
26 | RegexMatch(match: "but", groups: []),
27 | RegexMatch(match: "useful", groups: []),
28 | ]
29 | */
30 | ```
31 |
32 | If compiling overhead is not an issue it is possible to use the `=~` operator to match a string
33 |
34 | ```Swift
35 | let fourLetterWords = "drink beer, it's very nice!" =~ "\\b\\w{4}\\b" ?? []
36 |
37 | /*
38 | fourLetterWords = [
39 | RegexMatch(match: "beer", groups: []),
40 | RegexMatch(match: "very", groups: []),
41 | RegexMatch(match: "nice", groups: []),
42 | ]
43 | */
44 | ```
45 |
46 | By default the `Global` flag is active. To change which flag are active, add a `/` at the start of the pattern, and add `/` at the end. The available flags are:
47 |
48 | - `g` `Global` - Allows multiple matches
49 | - `i` `Case Insensitive` - Case insensitive matching
50 | - `m` `Multiline` - `^` and `$` also match the begining and end of a line
51 |
52 | ```Swift
53 | // Global and Case Insensitive search
54 | let regex = try! Regex(pattern: "/\\w+/ig")
55 | ```
56 |
57 |
58 | ## Supported Operations
59 |
60 | ### Character Classes
61 | | Pattern | Description | Supported |
62 | |---------|------------|-----------|
63 | | `.` | `[^\n\r]` | |
64 | | `[^]` | `[\s\S]` | |
65 | | `\w` | `[A-Za-z0-9_]` | |
66 | | `\W` | `[^A-Za-z0-9_]` | |
67 | | `\d` | `[0-9]` | |
68 | | `\D` | `[^0-9]` | |
69 | | `\s` | `[\ \r\n\t\v\f]` | |
70 | | `\S` | `[^\ \r\n\t\v\f]` | |
71 | | `[ABC]` | Any in the set | |
72 | | `[^ABC]` | Any not in the set | |
73 | | `[A-Z]` | Any in the range inclusively | |
74 |
75 | ### Anchors (Match positions not characters)
76 | | Pattern | Description | Supported |
77 | |---------|------------|-----------|
78 | | `^` | Beginning of string | |
79 | | `$` | End of string | |
80 | | `\b` | Word boundary | |
81 | | `\B` | Not word boundary | |
82 |
83 | ### Escaped Characters
84 | | Pattern | Description | Supported |
85 | |---------|------------|-----------|
86 | | `\0` | Octal escaped character | |
87 | | `\00` | Octal escaped character | |
88 | | `\000` | Octal escaped character | |
89 | | `\xFF` | Hex escaped character | |
90 | | `\uFFFF` | Unicode escaped character | |
91 | | `\cA` | Control character | |
92 | | `\t` | Tab | |
93 | | `\n` | Newline | |
94 | | `\v` | Vertical tab | |
95 | | `\f` | Form feed | |
96 | | `\r` | Carriage return | |
97 | | `\0` | Null | |
98 | | `\.` | `.` | |
99 | | `\\` | `\` | |
100 | | `\+` | `+` | |
101 | | `\*` | `*` | |
102 | | `\?` | `?` | |
103 | | `\^` | `^` | |
104 | | `\$` | `$` | |
105 | | `\{` | `{` | |
106 | | `\}` | `}` | |
107 | | `\[` | `[` | |
108 | | `\]` | `]` | |
109 | | `\(` | `(` | |
110 | | `\)` | `)` | |
111 | | `\/` | `/` | |
112 | | `\|` | `|` | |
113 |
114 | ### Groups and Lookaround
115 | | Pattern | Description | Supported |
116 | |---------|------------|-----------|
117 | | `(ABC)` | Capture group | |
118 | | `(ABC)` | Named capture group | |
119 | | `\1` | Back reference | |
120 | | `\'name'` | Named back reference | |
121 | | `(?:ABC)` | Non-capturing group | |
122 | | `(?=ABC)` | Positive lookahead | |
123 | | `(?!ABC)` | Negative lookahead | |
124 | | `(?<=ABC)` | Positive lookbehind | |
125 | | `(?[ ] |
126 |
127 | ### Greedy Quantifiers
128 | | Pattern | Description | Supported |
129 | |---------|------------|-----------|
130 | | `+` | One or more | |
131 | | `*` | Zero or more | |
132 | | `?` | Optional | |
133 | | `{n}` | n | |
134 | | `{,}` | Same as `*` | |
135 | | `{,n}` | n or less | |
136 | | `{n,}` | n or more | |
137 | | `{n,m}` | n to m | |
138 |
139 | ### Lazy Quantifiers
140 | | Pattern | Description | Supported |
141 | |---------|------------|-----------|
142 | | `+?` | One or more | |
143 | | `*?` | Zero or more | |
144 | | `??` | Optional | |
145 | | `{n}?` | n | |
146 | | `{,n}?` | n or less | |
147 | | `{n,}?` | n or more | |
148 | | `{n,m}?` | n to m | |
149 |
150 | ### Alternation
151 | | Pattern | Description | Supported |
152 | |---------|------------|-----------|
153 | | `\|` | Everything before or everything after | |
154 |
155 | ### Flags
156 | | Pattern | Description | Supported |
157 | |---------|------------|-----------|
158 | | `i` | Case insensitive | |
159 | | `g` | Global | |
160 | | `m` | Multiline | |
161 |
162 |
163 | ## Inner Workings
164 |
165 | (Similar to before)
166 |
167 | - Lexer (String input to Tokens)
168 | - Parser (Tokens to NFA)
169 | - Compiler (NFA to DFA)
170 | - Optimizer (Simplify DFA (eg. `char(a), char(b)` -> `string(ab)`) for better performance)
171 | - Engine (Matches an input String using the DFA)
172 |
173 |
174 | ---
175 |
176 |
177 | # Note
178 |
179 | Swift treats `\r\n` as a single `Character`. Use `\n\r` to have both.
180 |
181 |
182 |
183 | # Resources
184 |
185 | - [regexr.com](http://www.regexr.com) - Regex testing
186 | - [swtch.com](https://swtch.com/~rsc/regexp/) - Implementing Regular Expressions
187 | - [Powerset construction](https://en.wikipedia.org/wiki/Powerset_construction) - NFA to DFA
188 | - [Minimization](https://en.wikipedia.org/wiki/DFA_minimization)
189 |
--------------------------------------------------------------------------------
/Sources/Regex/Automaton.swift:
--------------------------------------------------------------------------------
1 | //
2 | // Automaton.swift
3 | // Regex
4 | //
5 |
6 | /// Meant to be used as an opaque type
7 | internal typealias State = Int
8 |
9 | internal class Automaton {
10 | internal var initialStates = Set()
11 | internal var acceptingStates = Set()
12 |
13 | private(set) internal var transitions = [State : [Symbol : Set]]()
14 |
15 | internal init() {}
16 |
17 | private var stateCount = 0;
18 | internal func newState() -> State {
19 | self.stateCount += 1
20 | return stateCount
21 | }
22 |
23 | internal func addTransition(from: State, to: State, symbol: Symbol) {
24 | self.transitions[from, default: [:]][symbol, default: []].insert(to)
25 | }
26 |
27 | internal func ε_closure(of state: State) -> Set {
28 | var stack = [state]
29 | var closure = Set()
30 | while !stack.isEmpty {
31 | let s = stack.removeFirst()
32 | closure.insert(s)
33 | if let newStates = self.transitions[s]?[.None] {
34 | stack.append(contentsOf: newStates)
35 | }
36 | }
37 | return closure
38 | }
39 |
40 | internal func ε_closure(of states: Set) -> Set {
41 | return states
42 | .map { self.ε_closure(of: $0) }
43 | .reduce(Set()) { $0.union($1) }
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/Sources/Regex/Compiler/Compiler.swift:
--------------------------------------------------------------------------------
1 | //
2 | // Compiler.swift
3 | // Regex
4 | //
5 |
6 | internal struct Compiler {
7 | private var dfa = Automaton()
8 |
9 | private init() {}
10 |
11 | internal static func compile(_ nfa: Automaton) -> Automaton {
12 | var compiler = Compiler()
13 | compiler.compile(nfa)
14 | return compiler.dfa
15 | }
16 |
17 | private mutating func compile(_ nfa: Automaton) {
18 | /// `NewState -> [OldState]`
19 | var newStateMap = [State : Set]()
20 | /// `[OldState] -> NewState`
21 | var oldStateMap = [Set : State]()
22 | /// Register the equivent states in the state maps
23 | func register(_ newState: State, _ oldState: Set) {
24 | newStateMap[newState] = oldState
25 | oldStateMap[oldState] = newState
26 | }
27 |
28 | let initialState = self.dfa.newState()
29 | self.dfa.initialStates = [initialState]
30 | let oldInitialState = nfa.ε_closure(of: nfa.initialStates)
31 | register(initialState, oldInitialState)
32 | if !oldInitialState.intersection(nfa.acceptingStates).isEmpty {
33 | dfa.acceptingStates.insert(initialState)
34 | }
35 |
36 | var stateQueue = [initialState]
37 | while !stateQueue.isEmpty {
38 | let state = stateQueue.removeFirst()
39 | let oldStates = newStateMap[state]!
40 |
41 | let transitionList = oldStates
42 | .compactMap { nfa.transitions[$0] }
43 | .flatMap { $0.map { $0 } }
44 | .filter { $0.key != .None }
45 | let transitions = transitionList
46 | .reduce(into: [Symbol : Set]()) { dict, pair in
47 | dict[pair.key, default: []]
48 | .formUnion(nfa.ε_closure(of: pair.value))
49 | }
50 | for transition in transitions {
51 | let newState = oldStateMap[transition.value] ?? self.dfa.newState()
52 | if oldStateMap[transition.value] == nil {
53 | stateQueue.append(newState)
54 | register(newState, transition.value)
55 | if !newStateMap[newState]!.intersection(nfa.acceptingStates).isEmpty {
56 | dfa.acceptingStates.insert(newState)
57 | }
58 | }
59 | self.dfa.addTransition(from: state, to: newState,
60 | symbol: transition.key)
61 | }
62 | }
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/Sources/Regex/Lexer/Lexer.swift:
--------------------------------------------------------------------------------
1 | //
2 | // Lexer.swift
3 | // Regex
4 | //
5 |
6 | import Generator
7 |
8 | internal struct Lexer {
9 | private var tokens = [Token]()
10 |
11 | private init() {}
12 |
13 | internal static func lex(_ pattern: String) throws -> [Token] {
14 | var lexer = Lexer()
15 | lexer.tokens.reserveCapacity(pattern.count)
16 | try pattern.generator().forEach { try lexer.lex($0) }
17 | return lexer.tokens
18 | }
19 |
20 | private mutating func lex(_ character: Character) throws {
21 | switch character {
22 | default:
23 | self.tokens.append(.Character(character))
24 | }
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/Sources/Regex/Lexer/Token.swift:
--------------------------------------------------------------------------------
1 | //
2 | // Token.swift
3 | // Regex
4 | //
5 |
6 | internal enum Token: Equatable {
7 | case Character(Swift.Character)
8 | }
9 |
--------------------------------------------------------------------------------
/Sources/Regex/Optimizer/Optimizer.swift:
--------------------------------------------------------------------------------
1 | //
2 | // Optimizer.swift
3 | // Regex
4 | //
5 |
6 | internal struct Optimizer {
7 | private init() {}
8 |
9 | internal static func optimize(_ input: Automaton) -> Automaton {
10 | return input
11 | }
12 | }
13 |
--------------------------------------------------------------------------------
/Sources/Regex/Parser/Parser.swift:
--------------------------------------------------------------------------------
1 | //
2 | // Parser.swift
3 | // Regex
4 | //
5 |
6 | internal struct Parser {
7 | private var automaton = Automaton()
8 | private var lastState: State!
9 |
10 | private init() {
11 | self.lastState = self.automaton.newState()
12 | self.automaton.initialStates = [self.lastState]
13 | }
14 |
15 | internal static func parse(_ tokens: [Token]) throws -> Automaton {
16 | var parser = Parser()
17 | try tokens.generator().forEach { try parser.parse($0) }
18 | parser.automaton.acceptingStates = [parser.lastState]
19 | return parser.automaton
20 | }
21 |
22 | private mutating func parse(_ token: Token) throws {
23 | switch token {
24 | case .Character(let character):
25 | let newState = self.automaton.newState()
26 | self.automaton.addTransition(from: lastState, to: newState,
27 | symbol: .Character(character))
28 | self.lastState = newState
29 | }
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/Sources/Regex/Parser/Symbol.swift:
--------------------------------------------------------------------------------
1 | //
2 | // Symbol.swift
3 | // Regex
4 | //
5 |
6 | internal enum Symbol: Equatable, Hashable {
7 | case None
8 |
9 | case Character(Swift.Character)
10 | }
11 |
--------------------------------------------------------------------------------
/Sources/Regex/Regex.swift:
--------------------------------------------------------------------------------
1 | //
2 | // Regex.swift
3 | // Regex
4 | //
5 |
6 | /// Manages an `Engine` and compiles the `pattern` into a runnable format.
7 | ///
8 | /// Usage sample:
9 | ///
10 | /// let regex = try! Regex(pattern: "[a-zA-Z]+")
11 | /// let string = "RegEx is tough, but useful."
12 | ///
13 | /// let words = regex.match(string)
14 | ///
15 | /// /*
16 | /// words = [
17 | /// RegexMatch(match: "RegEx", groups: []),
18 | /// RegexMatch(match: "is", groups: []),
19 | /// RegexMatch(match: "tough", groups: []),
20 | /// RegexMatch(match: "but", groups: []),
21 | /// RegexMatch(match: "useful", groups: []),
22 | /// ]
23 | /// */
24 | ///
25 | /// - Author: David Skrundz
26 | public struct Regex {
27 | /// Compiles the prattern for later use
28 | public init(_ pattern: String) throws {
29 | }
30 |
31 | /// Find matches in `string`
32 | ///
33 | /// - Returns: A `[RegexMatch]` containing every found match
34 | public func match(_ string: String) -> [RegexMatch] {
35 | return []
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/Sources/Regex/RegexError.swift:
--------------------------------------------------------------------------------
1 | //
2 | // RegexError.swift
3 | // Regex
4 | //
5 |
6 | public enum RegexError: Error {
7 | }
8 |
--------------------------------------------------------------------------------
/Sources/Regex/RegexMatch.swift:
--------------------------------------------------------------------------------
1 | //
2 | // RegexMatch.swift
3 | // Regex
4 | //
5 |
6 | /// Represents a match found by the Regex engine
7 | public struct RegexMatch {
8 | }
9 |
--------------------------------------------------------------------------------
/Tests/LinuxMain.swift:
--------------------------------------------------------------------------------
1 | //
2 | // LinuxMain.swift
3 | // Regex
4 | //
5 |
6 | @testable import RegexTests
7 | import XCTest
8 |
9 | XCTMain([
10 | testCase(LexerTests.allTests),
11 | testCase(ParserTests.allTests),
12 | testCase(CompilerTests.allTests),
13 | testCase(OptimizerTests.allTests),
14 |
15 | testCase(RegexTests.allTests),
16 | ])
17 |
--------------------------------------------------------------------------------
/Tests/RegexTests/CompilerTests.swift:
--------------------------------------------------------------------------------
1 | //
2 | // CompilerTests.swift
3 | // RegexTests
4 | //
5 |
6 | @testable import Regex
7 | import XCTest
8 |
9 | class CompilerTests: XCTestCase {
10 | func testConversion1() {
11 | let nfa = Automaton()
12 |
13 | let s1 = nfa.newState()
14 | let s2 = nfa.newState()
15 | let s3 = nfa.newState()
16 | let s4 = nfa.newState()
17 |
18 | nfa.initialStates = [s1]
19 | nfa.acceptingStates = [s3, s4]
20 |
21 | nfa.addTransition(from: s1, to: s2, symbol: .Character("0"))
22 | nfa.addTransition(from: s2, to: s2, symbol: .Character("1"))
23 | nfa.addTransition(from: s2, to: s4, symbol: .Character("1"))
24 | nfa.addTransition(from: s3, to: s4, symbol: .Character("0"))
25 | nfa.addTransition(from: s4, to: s3, symbol: .Character("0"))
26 |
27 | nfa.addTransition(from: s1, to: s3, symbol: .None)
28 | nfa.addTransition(from: s3, to: s2, symbol: .None)
29 |
30 |
31 | let dfa = Compiler.compile(nfa)
32 |
33 | if dfa.initialStates.isEmpty {
34 | XCTFail("No s_1")
35 | return
36 | }
37 | let s_1 = dfa.initialStates.first!
38 | if dfa.transitions[s_1]?[.Character("0")]?.isEmpty ?? true {
39 | XCTFail("No s_2")
40 | return
41 | }
42 | let s_2 = dfa.transitions[s_1]![.Character("0")]!.first!
43 | if dfa.transitions[s_2]?[.Character("0")]?.isEmpty ?? true {
44 | XCTFail("No s_3")
45 | return
46 | }
47 | let s_3 = dfa.transitions[s_2]![.Character("0")]!.first!
48 | if dfa.transitions[s_3]?[.Character("0")]?.isEmpty ?? true {
49 | XCTFail("No s_4")
50 | return
51 | }
52 | let s_4 = dfa.transitions[s_3]![.Character("0")]!.first!
53 |
54 | XCTAssertEqual(dfa.acceptingStates, [s_1, s_2, s_3, s_4])
55 | XCTAssertEqual(dfa.transitions, [
56 | s_1 : [
57 | .Character("0") : [s_2],
58 | .Character("1") : [s_2]
59 | ],
60 | s_2 : [
61 | .Character("1") : [s_2],
62 | .Character("0") : [s_3]
63 | ],
64 | s_3 : [
65 | .Character("1") : [s_2],
66 | .Character("0") : [s_4]
67 | ],
68 | s_4 : [
69 | .Character("0") : [s_3]
70 | ]
71 | ])
72 | }
73 |
74 | func testConversion2() {
75 | let nfa = Automaton()
76 |
77 | let s1 = nfa.newState()
78 | let s2 = nfa.newState()
79 | let s3 = nfa.newState()
80 | let s4 = nfa.newState()
81 | let s5 = nfa.newState()
82 |
83 | nfa.initialStates = [s1]
84 | nfa.acceptingStates = [s5]
85 |
86 | nfa.addTransition(from: s1, to: s1, symbol: .Character("0"))
87 | nfa.addTransition(from: s1, to: s1, symbol: .Character("1"))
88 | nfa.addTransition(from: s1, to: s2, symbol: .Character("1"))
89 | nfa.addTransition(from: s2, to: s3, symbol: .Character("0"))
90 | nfa.addTransition(from: s2, to: s3, symbol: .Character("1"))
91 | nfa.addTransition(from: s3, to: s4, symbol: .Character("0"))
92 | nfa.addTransition(from: s3, to: s4, symbol: .Character("1"))
93 | nfa.addTransition(from: s4, to: s5, symbol: .Character("0"))
94 | nfa.addTransition(from: s4, to: s5, symbol: .Character("1"))
95 |
96 |
97 | let dfa = Compiler.compile(nfa)
98 |
99 | if dfa.initialStates.isEmpty {
100 | XCTFail("No s_5")
101 | return
102 | }
103 | let s_5 = dfa.initialStates.first!
104 | if dfa.transitions[s_5]?[.Character("1")]?.isEmpty ?? true {
105 | XCTFail("No s_4")
106 | return
107 | }
108 | let s_4 = dfa.transitions[s_5]![.Character("1")]!.first!
109 | if dfa.transitions[s_4]?[.Character("0")]?.isEmpty ?? true {
110 | XCTFail("No s_1")
111 | return
112 | }
113 | let s_1 = dfa.transitions[s_4]![.Character("0")]!.first!
114 | if dfa.transitions[s_1]?[.Character("0")]?.isEmpty ?? true {
115 | XCTFail("No s_2")
116 | return
117 | }
118 | let s_2 = dfa.transitions[s_1]![.Character("0")]!.first!
119 | if dfa.transitions[s_2]?[.Character("1")]?.isEmpty ?? true {
120 | XCTFail("No s_3")
121 | return
122 | }
123 | let s_3 = dfa.transitions[s_2]![.Character("1")]!.first!
124 | if dfa.transitions[s_2]?[.Character("0")]?.isEmpty ?? true {
125 | XCTFail("No s_6")
126 | return
127 | }
128 | let s_6 = dfa.transitions[s_2]![.Character("0")]!.first!
129 | if dfa.transitions[s_4]?[.Character("1")]?.isEmpty ?? true {
130 | XCTFail("No s_10")
131 | return
132 | }
133 | let s_10 = dfa.transitions[s_4]![.Character("1")]!.first!
134 | if dfa.transitions[s_10]?[.Character("1")]?.isEmpty ?? true {
135 | XCTFail("No s_16")
136 | return
137 | }
138 | let s_16 = dfa.transitions[s_10]![.Character("1")]!.first!
139 | if dfa.transitions[s_16]?[.Character("1")]?.isEmpty ?? true {
140 | XCTFail("No s_15")
141 | return
142 | }
143 | let s_15 = dfa.transitions[s_16]![.Character("1")]!.first!
144 | if dfa.transitions[s_15]?[.Character("0")]?.isEmpty ?? true {
145 | XCTFail("No s_14")
146 | return
147 | }
148 | let s_14 = dfa.transitions[s_15]![.Character("0")]!.first!
149 | if dfa.transitions[s_14]?[.Character("1")]?.isEmpty ?? true {
150 | XCTFail("No s_13")
151 | return
152 | }
153 | let s_13 = dfa.transitions[s_14]![.Character("1")]!.first!
154 | if dfa.transitions[s_13]?[.Character("1")]?.isEmpty ?? true {
155 | XCTFail("No s_12")
156 | return
157 | }
158 | let s_12 = dfa.transitions[s_13]![.Character("1")]!.first!
159 | if dfa.transitions[s_12]?[.Character("0")]?.isEmpty ?? true {
160 | XCTFail("No s_11")
161 | return
162 | }
163 | let s_11 = dfa.transitions[s_12]![.Character("0")]!.first!
164 | if dfa.transitions[s_11]?[.Character("0")]?.isEmpty ?? true {
165 | XCTFail("No s_9")
166 | return
167 | }
168 | let s_9 = dfa.transitions[s_11]![.Character("0")]!.first!
169 | if dfa.transitions[s_1]?[.Character("1")]?.isEmpty ?? true {
170 | XCTFail("No s_8")
171 | return
172 | }
173 | let s_8 = dfa.transitions[s_1]![.Character("1")]!.first!
174 | if dfa.transitions[s_8]?[.Character("0")]?.isEmpty ?? true {
175 | XCTFail("No s_7")
176 | return
177 | }
178 | let s_7 = dfa.transitions[s_8]![.Character("0")]!.first!
179 |
180 | XCTAssertEqual(dfa.acceptingStates, [s_3, s_6, s_7, s_9, s_12, s_13, s_14, s_15])
181 | XCTAssertEqual(dfa.transitions, [
182 | s_1 : [
183 | .Character("0") : [s_2],
184 | .Character("1") : [s_8]
185 | ],
186 | s_2 : [
187 | .Character("0") : [s_6],
188 | .Character("1") : [s_3]
189 | ],
190 | s_3 : [
191 | .Character("0") : [s_1],
192 | .Character("1") : [s_10]
193 | ],
194 | s_4 : [
195 | .Character("0") : [s_1],
196 | .Character("1") : [s_10]
197 | ],
198 | s_5 : [
199 | .Character("0") : [s_5],
200 | .Character("1") : [s_4]
201 | ],
202 | s_6 : [
203 | .Character("0") : [s_5],
204 | .Character("1") : [s_4]
205 | ],
206 | s_7 : [
207 | .Character("0") : [s_2],
208 | .Character("1") : [s_8]
209 | ],
210 | s_8 : [
211 | .Character("0") : [s_7],
212 | .Character("1") : [s_12]
213 | ],
214 | s_9 : [
215 | .Character("0") : [s_6],
216 | .Character("1") : [s_3]
217 | ],
218 | s_10 : [
219 | .Character("0") : [s_11],
220 | .Character("1") : [s_16]
221 | ],
222 | s_11 : [
223 | .Character("0") : [s_9],
224 | .Character("1") : [s_13]
225 | ],
226 | s_12 : [
227 | .Character("0") : [s_11],
228 | .Character("1") : [s_16]
229 | ],
230 | s_13 : [
231 | .Character("0") : [s_7],
232 | .Character("1") : [s_12]
233 | ],
234 | s_14 : [
235 | .Character("0") : [s_9],
236 | .Character("1") : [s_13]
237 | ],
238 | s_15 : [
239 | .Character("0") : [s_14],
240 | .Character("1") : [s_15]
241 | ],
242 | s_16 : [
243 | .Character("0") : [s_14],
244 | .Character("1") : [s_15]
245 | ]
246 | ] as [State : [Symbol : Set]])
247 | }
248 |
249 | static var allTests = [
250 | ("testConversion1", testConversion1),
251 | ("testConversion2", testConversion2),
252 | ]
253 | }
254 |
--------------------------------------------------------------------------------
/Tests/RegexTests/LexerTests.swift:
--------------------------------------------------------------------------------
1 | //
2 | // LexerTests.swift
3 | // RegexTests
4 | //
5 |
6 | @testable import Regex
7 | import XCTest
8 |
9 | class LexerTests: XCTestCase {
10 | func testCharacters() {
11 | let tokens = try! Lexer.lex("abcd")
12 | XCTAssertEqual(tokens, [
13 | .Character("a"),
14 | .Character("b"),
15 | .Character("c"),
16 | .Character("d"),
17 | ])
18 | }
19 |
20 | static var allTests = [
21 | ("testCharacters", testCharacters),
22 | ]
23 | }
24 |
--------------------------------------------------------------------------------
/Tests/RegexTests/OptimizerTests.swift:
--------------------------------------------------------------------------------
1 | //
2 | // OptimizerTests.swift
3 | // Regex
4 | //
5 |
6 | @testable import Regex
7 | import XCTest
8 |
9 | class OptimizerTests: XCTestCase {
10 | func testNOOP() {
11 | let input = Automaton()
12 | let output = Optimizer.optimize(input)
13 | XCTAssertEqual(output.initialStates, [])
14 | XCTAssertEqual(output.acceptingStates, [])
15 | XCTAssertEqual(output.transitions, [:])
16 | }
17 |
18 | static var allTests = [
19 | ("testNOOP", testNOOP),
20 | ]
21 | }
22 |
--------------------------------------------------------------------------------
/Tests/RegexTests/ParserTests.swift:
--------------------------------------------------------------------------------
1 | //
2 | // ParserTests.swift
3 | // RegexTests
4 | //
5 |
6 | @testable import Regex
7 | import XCTest
8 |
9 | class ParserTests: XCTestCase {
10 | func testCharacters() {
11 | let tokens: [Token] = [
12 | .Character("a"),
13 | .Character("b"),
14 | .Character("c"),
15 | .Character("d"),
16 | ]
17 | let automata = try! Parser.parse(tokens)
18 |
19 | XCTAssertEqual(automata.initialStates, [1])
20 | XCTAssertEqual(automata.acceptingStates, [5])
21 |
22 | XCTAssertEqual(automata.transitions, [
23 | 1 : [.Character("a") : [2]],
24 | 2 : [.Character("b") : [3]],
25 | 3 : [.Character("c") : [4]],
26 | 4 : [.Character("d") : [5]],
27 | ])
28 | }
29 |
30 | static var allTests = [
31 | ("testCharacters", testCharacters),
32 | ]
33 | }
34 |
--------------------------------------------------------------------------------
/Tests/RegexTests/RegexTests.swift:
--------------------------------------------------------------------------------
1 | //
2 | // RegexTests.swift
3 | // Regex
4 | //
5 |
6 | @testable import Regex
7 | import XCTest
8 |
9 | class RegexTests: XCTestCase {
10 | func testTest() {
11 | }
12 |
13 | static var allTests = [
14 | ("testTest", testTest),
15 | ]
16 | }
17 |
--------------------------------------------------------------------------------