├── .gitignore ├── .travis.yml ├── LICENSE.md ├── Package.swift ├── README.md ├── Sources └── Regex │ ├── Automaton.swift │ ├── Compiler │ └── Compiler.swift │ ├── Lexer │ ├── Lexer.swift │ └── Token.swift │ ├── Optimizer │ └── Optimizer.swift │ ├── Parser │ ├── Parser.swift │ └── Symbol.swift │ ├── Regex.swift │ ├── RegexError.swift │ └── RegexMatch.swift └── Tests ├── LinuxMain.swift └── RegexTests ├── CompilerTests.swift ├── LexerTests.swift ├── OptimizerTests.swift ├── ParserTests.swift └── RegexTests.swift /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | /.build 3 | /Packages 4 | /*.xcodeproj 5 | Package.resolved 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | os: 2 | - osx 3 | - linux 4 | 5 | language: generic 6 | 7 | sudo: required 8 | dist: trusty 9 | 10 | osx_image: xcode9.3 11 | 12 | script: 13 | - eval "$(curl -sL https://swift.skrundz.ca/ci)" 14 | - eval "$(curl -sL https://swift.skrundz.ca/codecov)" 15 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /Package.swift: -------------------------------------------------------------------------------- 1 | // swift-tools-version:4.1 2 | // 3 | // Package.swift 4 | // Regex 5 | // 6 | 7 | import PackageDescription 8 | 9 | let package = Package( 10 | name: "Regex", 11 | products: [ 12 | .library( 13 | name: "Regex", 14 | targets: ["Regex"]), 15 | .library( 16 | name: "sRegex", 17 | type: .static, 18 | targets: ["Regex"]), 19 | .library( 20 | name: "dRegex", 21 | type: .dynamic, 22 | targets: ["Regex"]) 23 | ], 24 | dependencies: [ 25 | .package(url: "https://github.com/DavidSkrundz/Collections.git", 26 | .upToNextMinor(from: "1.1.0")) 27 | ], 28 | targets: [ 29 | .target( 30 | name: "Regex", 31 | dependencies: ["Generator"]), 32 | .testTarget( 33 | name: "RegexTests", 34 | dependencies: ["Regex"]) 35 | ] 36 | ) 37 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Regex (V2 WIP) [![Swift Version](https://img.shields.io/badge/Swift-4.1-orange.svg)](https://swift.org/download/#releases) [![Platforms](https://img.shields.io/badge/Platforms-macOS%20|%20Linux-lightgray.svg)](https://swift.org/download/#releases) [![Build Status](https://travis-ci.org/DavidSkrundz/Regex.svg?branch=master)](https://travis-ci.org/DavidSkrundz/Regex) [![Codebeat Status](https://codebeat.co/badges/d3bc5b39-aa73-47f5-94e9-3c019368341e)](https://codebeat.co/projects/github-com-davidskrundz-regex) [![Codecov](https://codecov.io/gh/DavidSkrundz/Regex/branch/master/graph/badge.svg)](https://codecov.io/gh/DavidSkrundz/Regex) 2 | 3 | A pure Swift implementation of a Regular Expression Engine 4 | 5 | **Trying again with V2 using DFAs instead of NFAs to get grep-like performance** 6 | 7 | 8 | ## Usage 9 | 10 | To avoid compiling overhead it is possible to create a `Regex` instance 11 | 12 | ```Swift 13 | // Compile the expression 14 | let regex = try! Regex(pattern: "[a-zA-Z]+") 15 | 16 | let string = "RegEx is tough, but useful." 17 | 18 | // Search for matches 19 | let words = regex.match(string) 20 | 21 | /* 22 | words = [ 23 | RegexMatch(match: "RegEx", groups: []), 24 | RegexMatch(match: "is", groups: []), 25 | RegexMatch(match: "tough", groups: []), 26 | RegexMatch(match: "but", groups: []), 27 | RegexMatch(match: "useful", groups: []), 28 | ] 29 | */ 30 | ``` 31 | 32 | If compiling overhead is not an issue it is possible to use the `=~` operator to match a string 33 | 34 | ```Swift 35 | let fourLetterWords = "drink beer, it's very nice!" =~ "\\b\\w{4}\\b" ?? [] 36 | 37 | /* 38 | fourLetterWords = [ 39 | RegexMatch(match: "beer", groups: []), 40 | RegexMatch(match: "very", groups: []), 41 | RegexMatch(match: "nice", groups: []), 42 | ] 43 | */ 44 | ``` 45 | 46 | By default the `Global` flag is active. To change which flag are active, add a `/` at the start of the pattern, and add `/` at the end. The available flags are: 47 | 48 | - `g` `Global` - Allows multiple matches 49 | - `i` `Case Insensitive` - Case insensitive matching 50 | - `m` `Multiline` - `^` and `$` also match the begining and end of a line 51 | 52 | ```Swift 53 | // Global and Case Insensitive search 54 | let regex = try! Regex(pattern: "/\\w+/ig") 55 | ``` 56 | 57 | 58 | ## Supported Operations 59 | 60 | ### Character Classes 61 | | Pattern | Description | Supported | 62 | |---------|------------|-----------| 63 | | `.` | `[^\n\r]` | | 64 | | `[^]` | `[\s\S]` | | 65 | | `\w` | `[A-Za-z0-9_]` | | 66 | | `\W` | `[^A-Za-z0-9_]` | | 67 | | `\d` | `[0-9]` | | 68 | | `\D` | `[^0-9]` | | 69 | | `\s` | `[\ \r\n\t\v\f]` | | 70 | | `\S` | `[^\ \r\n\t\v\f]` | | 71 | | `[ABC]` | Any in the set | | 72 | | `[^ABC]` | Any not in the set | | 73 | | `[A-Z]` | Any in the range inclusively | | 74 | 75 | ### Anchors (Match positions not characters) 76 | | Pattern | Description | Supported | 77 | |---------|------------|-----------| 78 | | `^` | Beginning of string | | 79 | | `$` | End of string | | 80 | | `\b` | Word boundary | | 81 | | `\B` | Not word boundary | | 82 | 83 | ### Escaped Characters 84 | | Pattern | Description | Supported | 85 | |---------|------------|-----------| 86 | | `\0` | Octal escaped character | | 87 | | `\00` | Octal escaped character | | 88 | | `\000` | Octal escaped character | | 89 | | `\xFF` | Hex escaped character | | 90 | | `\uFFFF` | Unicode escaped character | | 91 | | `\cA` | Control character | | 92 | | `\t` | Tab | | 93 | | `\n` | Newline | | 94 | | `\v` | Vertical tab | | 95 | | `\f` | Form feed | | 96 | | `\r` | Carriage return | | 97 | | `\0` | Null | | 98 | | `\.` | `.` | | 99 | | `\\` | `\` | | 100 | | `\+` | `+` | | 101 | | `\*` | `*` | | 102 | | `\?` | `?` | | 103 | | `\^` | `^` | | 104 | | `\$` | `$` | | 105 | | `\{` | `{` | | 106 | | `\}` | `}` | | 107 | | `\[` | `[` | | 108 | | `\]` | `]` | | 109 | | `\(` | `(` | | 110 | | `\)` | `)` | | 111 | | `\/` | `/` | | 112 | | `\|` | `|` | | 113 | 114 | ### Groups and Lookaround 115 | | Pattern | Description | Supported | 116 | |---------|------------|-----------| 117 | | `(ABC)` | Capture group | | 118 | | `(ABC)` | Named capture group |
  • [ ]
| 119 | | `\1` | Back reference |
  • [ ]
| 120 | | `\'name'` | Named back reference |
  • [ ]
| 121 | | `(?:ABC)` | Non-capturing group |
  • [ ]
| 122 | | `(?=ABC)` | Positive lookahead |
  • [ ]
| 123 | | `(?!ABC)` | Negative lookahead |
  • [ ]
| 124 | | `(?<=ABC)` | Positive lookbehind |
  • [ ]
| 125 | | `(?
  • [ ]
  • | 126 | 127 | ### Greedy Quantifiers 128 | | Pattern | Description | Supported | 129 | |---------|------------|-----------| 130 | | `+` | One or more |
    • [ ]
    | 131 | | `*` | Zero or more |
    • [ ]
    | 132 | | `?` | Optional |
    • [ ]
    | 133 | | `{n}` | n |
    • [ ]
    | 134 | | `{,}` | Same as `*` |
    • [ ]
    | 135 | | `{,n}` | n or less |
    • [ ]
    | 136 | | `{n,}` | n or more |
    • [ ]
    | 137 | | `{n,m}` | n to m |
    • [ ]
    | 138 | 139 | ### Lazy Quantifiers 140 | | Pattern | Description | Supported | 141 | |---------|------------|-----------| 142 | | `+?` | One or more |
    • [ ]
    | 143 | | `*?` | Zero or more |
    • [ ]
    | 144 | | `??` | Optional |
    • [ ]
    | 145 | | `{n}?` | n |
    • [ ]
    | 146 | | `{,n}?` | n or less |
    • [ ]
    | 147 | | `{n,}?` | n or more |
    • [ ]
    | 148 | | `{n,m}?` | n to m |
    • [ ]
    | 149 | 150 | ### Alternation 151 | | Pattern | Description | Supported | 152 | |---------|------------|-----------| 153 | | `\|` | Everything before or everything after |
    • [ ]
    | 154 | 155 | ### Flags 156 | | Pattern | Description | Supported | 157 | |---------|------------|-----------| 158 | | `i` | Case insensitive |
    • [ ]
    | 159 | | `g` | Global |
    • [ ]
    | 160 | | `m` | Multiline |
    • [ ]
    | 161 | 162 | 163 | ## Inner Workings 164 | 165 | (Similar to before) 166 | 167 | - Lexer (String input to Tokens) 168 | - Parser (Tokens to NFA) 169 | - Compiler (NFA to DFA) 170 | - Optimizer (Simplify DFA (eg. `char(a), char(b)` -> `string(ab)`) for better performance) 171 | - Engine (Matches an input String using the DFA) 172 | 173 | 174 | --- 175 | 176 | 177 | # Note 178 | 179 | Swift treats `\r\n` as a single `Character`. Use `\n\r` to have both. 180 | 181 | 182 | 183 | # Resources 184 | 185 | - [regexr.com](http://www.regexr.com) - Regex testing 186 | - [swtch.com](https://swtch.com/~rsc/regexp/) - Implementing Regular Expressions 187 | - [Powerset construction](https://en.wikipedia.org/wiki/Powerset_construction) - NFA to DFA 188 | - [Minimization](https://en.wikipedia.org/wiki/DFA_minimization) 189 | -------------------------------------------------------------------------------- /Sources/Regex/Automaton.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Automaton.swift 3 | // Regex 4 | // 5 | 6 | /// Meant to be used as an opaque type 7 | internal typealias State = Int 8 | 9 | internal class Automaton { 10 | internal var initialStates = Set() 11 | internal var acceptingStates = Set() 12 | 13 | private(set) internal var transitions = [State : [Symbol : Set]]() 14 | 15 | internal init() {} 16 | 17 | private var stateCount = 0; 18 | internal func newState() -> State { 19 | self.stateCount += 1 20 | return stateCount 21 | } 22 | 23 | internal func addTransition(from: State, to: State, symbol: Symbol) { 24 | self.transitions[from, default: [:]][symbol, default: []].insert(to) 25 | } 26 | 27 | internal func ε_closure(of state: State) -> Set { 28 | var stack = [state] 29 | var closure = Set() 30 | while !stack.isEmpty { 31 | let s = stack.removeFirst() 32 | closure.insert(s) 33 | if let newStates = self.transitions[s]?[.None] { 34 | stack.append(contentsOf: newStates) 35 | } 36 | } 37 | return closure 38 | } 39 | 40 | internal func ε_closure(of states: Set) -> Set { 41 | return states 42 | .map { self.ε_closure(of: $0) } 43 | .reduce(Set()) { $0.union($1) } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /Sources/Regex/Compiler/Compiler.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Compiler.swift 3 | // Regex 4 | // 5 | 6 | internal struct Compiler { 7 | private var dfa = Automaton() 8 | 9 | private init() {} 10 | 11 | internal static func compile(_ nfa: Automaton) -> Automaton { 12 | var compiler = Compiler() 13 | compiler.compile(nfa) 14 | return compiler.dfa 15 | } 16 | 17 | private mutating func compile(_ nfa: Automaton) { 18 | /// `NewState -> [OldState]` 19 | var newStateMap = [State : Set]() 20 | /// `[OldState] -> NewState` 21 | var oldStateMap = [Set : State]() 22 | /// Register the equivent states in the state maps 23 | func register(_ newState: State, _ oldState: Set) { 24 | newStateMap[newState] = oldState 25 | oldStateMap[oldState] = newState 26 | } 27 | 28 | let initialState = self.dfa.newState() 29 | self.dfa.initialStates = [initialState] 30 | let oldInitialState = nfa.ε_closure(of: nfa.initialStates) 31 | register(initialState, oldInitialState) 32 | if !oldInitialState.intersection(nfa.acceptingStates).isEmpty { 33 | dfa.acceptingStates.insert(initialState) 34 | } 35 | 36 | var stateQueue = [initialState] 37 | while !stateQueue.isEmpty { 38 | let state = stateQueue.removeFirst() 39 | let oldStates = newStateMap[state]! 40 | 41 | let transitionList = oldStates 42 | .compactMap { nfa.transitions[$0] } 43 | .flatMap { $0.map { $0 } } 44 | .filter { $0.key != .None } 45 | let transitions = transitionList 46 | .reduce(into: [Symbol : Set]()) { dict, pair in 47 | dict[pair.key, default: []] 48 | .formUnion(nfa.ε_closure(of: pair.value)) 49 | } 50 | for transition in transitions { 51 | let newState = oldStateMap[transition.value] ?? self.dfa.newState() 52 | if oldStateMap[transition.value] == nil { 53 | stateQueue.append(newState) 54 | register(newState, transition.value) 55 | if !newStateMap[newState]!.intersection(nfa.acceptingStates).isEmpty { 56 | dfa.acceptingStates.insert(newState) 57 | } 58 | } 59 | self.dfa.addTransition(from: state, to: newState, 60 | symbol: transition.key) 61 | } 62 | } 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /Sources/Regex/Lexer/Lexer.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Lexer.swift 3 | // Regex 4 | // 5 | 6 | import Generator 7 | 8 | internal struct Lexer { 9 | private var tokens = [Token]() 10 | 11 | private init() {} 12 | 13 | internal static func lex(_ pattern: String) throws -> [Token] { 14 | var lexer = Lexer() 15 | lexer.tokens.reserveCapacity(pattern.count) 16 | try pattern.generator().forEach { try lexer.lex($0) } 17 | return lexer.tokens 18 | } 19 | 20 | private mutating func lex(_ character: Character) throws { 21 | switch character { 22 | default: 23 | self.tokens.append(.Character(character)) 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /Sources/Regex/Lexer/Token.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Token.swift 3 | // Regex 4 | // 5 | 6 | internal enum Token: Equatable { 7 | case Character(Swift.Character) 8 | } 9 | -------------------------------------------------------------------------------- /Sources/Regex/Optimizer/Optimizer.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Optimizer.swift 3 | // Regex 4 | // 5 | 6 | internal struct Optimizer { 7 | private init() {} 8 | 9 | internal static func optimize(_ input: Automaton) -> Automaton { 10 | return input 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /Sources/Regex/Parser/Parser.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Parser.swift 3 | // Regex 4 | // 5 | 6 | internal struct Parser { 7 | private var automaton = Automaton() 8 | private var lastState: State! 9 | 10 | private init() { 11 | self.lastState = self.automaton.newState() 12 | self.automaton.initialStates = [self.lastState] 13 | } 14 | 15 | internal static func parse(_ tokens: [Token]) throws -> Automaton { 16 | var parser = Parser() 17 | try tokens.generator().forEach { try parser.parse($0) } 18 | parser.automaton.acceptingStates = [parser.lastState] 19 | return parser.automaton 20 | } 21 | 22 | private mutating func parse(_ token: Token) throws { 23 | switch token { 24 | case .Character(let character): 25 | let newState = self.automaton.newState() 26 | self.automaton.addTransition(from: lastState, to: newState, 27 | symbol: .Character(character)) 28 | self.lastState = newState 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /Sources/Regex/Parser/Symbol.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Symbol.swift 3 | // Regex 4 | // 5 | 6 | internal enum Symbol: Equatable, Hashable { 7 | case None 8 | 9 | case Character(Swift.Character) 10 | } 11 | -------------------------------------------------------------------------------- /Sources/Regex/Regex.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Regex.swift 3 | // Regex 4 | // 5 | 6 | /// Manages an `Engine` and compiles the `pattern` into a runnable format. 7 | /// 8 | /// Usage sample: 9 | /// 10 | /// let regex = try! Regex(pattern: "[a-zA-Z]+") 11 | /// let string = "RegEx is tough, but useful." 12 | /// 13 | /// let words = regex.match(string) 14 | /// 15 | /// /* 16 | /// words = [ 17 | /// RegexMatch(match: "RegEx", groups: []), 18 | /// RegexMatch(match: "is", groups: []), 19 | /// RegexMatch(match: "tough", groups: []), 20 | /// RegexMatch(match: "but", groups: []), 21 | /// RegexMatch(match: "useful", groups: []), 22 | /// ] 23 | /// */ 24 | /// 25 | /// - Author: David Skrundz 26 | public struct Regex { 27 | /// Compiles the prattern for later use 28 | public init(_ pattern: String) throws { 29 | } 30 | 31 | /// Find matches in `string` 32 | /// 33 | /// - Returns: A `[RegexMatch]` containing every found match 34 | public func match(_ string: String) -> [RegexMatch] { 35 | return [] 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /Sources/Regex/RegexError.swift: -------------------------------------------------------------------------------- 1 | // 2 | // RegexError.swift 3 | // Regex 4 | // 5 | 6 | public enum RegexError: Error { 7 | } 8 | -------------------------------------------------------------------------------- /Sources/Regex/RegexMatch.swift: -------------------------------------------------------------------------------- 1 | // 2 | // RegexMatch.swift 3 | // Regex 4 | // 5 | 6 | /// Represents a match found by the Regex engine 7 | public struct RegexMatch { 8 | } 9 | -------------------------------------------------------------------------------- /Tests/LinuxMain.swift: -------------------------------------------------------------------------------- 1 | // 2 | // LinuxMain.swift 3 | // Regex 4 | // 5 | 6 | @testable import RegexTests 7 | import XCTest 8 | 9 | XCTMain([ 10 | testCase(LexerTests.allTests), 11 | testCase(ParserTests.allTests), 12 | testCase(CompilerTests.allTests), 13 | testCase(OptimizerTests.allTests), 14 | 15 | testCase(RegexTests.allTests), 16 | ]) 17 | -------------------------------------------------------------------------------- /Tests/RegexTests/CompilerTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // CompilerTests.swift 3 | // RegexTests 4 | // 5 | 6 | @testable import Regex 7 | import XCTest 8 | 9 | class CompilerTests: XCTestCase { 10 | func testConversion1() { 11 | let nfa = Automaton() 12 | 13 | let s1 = nfa.newState() 14 | let s2 = nfa.newState() 15 | let s3 = nfa.newState() 16 | let s4 = nfa.newState() 17 | 18 | nfa.initialStates = [s1] 19 | nfa.acceptingStates = [s3, s4] 20 | 21 | nfa.addTransition(from: s1, to: s2, symbol: .Character("0")) 22 | nfa.addTransition(from: s2, to: s2, symbol: .Character("1")) 23 | nfa.addTransition(from: s2, to: s4, symbol: .Character("1")) 24 | nfa.addTransition(from: s3, to: s4, symbol: .Character("0")) 25 | nfa.addTransition(from: s4, to: s3, symbol: .Character("0")) 26 | 27 | nfa.addTransition(from: s1, to: s3, symbol: .None) 28 | nfa.addTransition(from: s3, to: s2, symbol: .None) 29 | 30 | 31 | let dfa = Compiler.compile(nfa) 32 | 33 | if dfa.initialStates.isEmpty { 34 | XCTFail("No s_1") 35 | return 36 | } 37 | let s_1 = dfa.initialStates.first! 38 | if dfa.transitions[s_1]?[.Character("0")]?.isEmpty ?? true { 39 | XCTFail("No s_2") 40 | return 41 | } 42 | let s_2 = dfa.transitions[s_1]![.Character("0")]!.first! 43 | if dfa.transitions[s_2]?[.Character("0")]?.isEmpty ?? true { 44 | XCTFail("No s_3") 45 | return 46 | } 47 | let s_3 = dfa.transitions[s_2]![.Character("0")]!.first! 48 | if dfa.transitions[s_3]?[.Character("0")]?.isEmpty ?? true { 49 | XCTFail("No s_4") 50 | return 51 | } 52 | let s_4 = dfa.transitions[s_3]![.Character("0")]!.first! 53 | 54 | XCTAssertEqual(dfa.acceptingStates, [s_1, s_2, s_3, s_4]) 55 | XCTAssertEqual(dfa.transitions, [ 56 | s_1 : [ 57 | .Character("0") : [s_2], 58 | .Character("1") : [s_2] 59 | ], 60 | s_2 : [ 61 | .Character("1") : [s_2], 62 | .Character("0") : [s_3] 63 | ], 64 | s_3 : [ 65 | .Character("1") : [s_2], 66 | .Character("0") : [s_4] 67 | ], 68 | s_4 : [ 69 | .Character("0") : [s_3] 70 | ] 71 | ]) 72 | } 73 | 74 | func testConversion2() { 75 | let nfa = Automaton() 76 | 77 | let s1 = nfa.newState() 78 | let s2 = nfa.newState() 79 | let s3 = nfa.newState() 80 | let s4 = nfa.newState() 81 | let s5 = nfa.newState() 82 | 83 | nfa.initialStates = [s1] 84 | nfa.acceptingStates = [s5] 85 | 86 | nfa.addTransition(from: s1, to: s1, symbol: .Character("0")) 87 | nfa.addTransition(from: s1, to: s1, symbol: .Character("1")) 88 | nfa.addTransition(from: s1, to: s2, symbol: .Character("1")) 89 | nfa.addTransition(from: s2, to: s3, symbol: .Character("0")) 90 | nfa.addTransition(from: s2, to: s3, symbol: .Character("1")) 91 | nfa.addTransition(from: s3, to: s4, symbol: .Character("0")) 92 | nfa.addTransition(from: s3, to: s4, symbol: .Character("1")) 93 | nfa.addTransition(from: s4, to: s5, symbol: .Character("0")) 94 | nfa.addTransition(from: s4, to: s5, symbol: .Character("1")) 95 | 96 | 97 | let dfa = Compiler.compile(nfa) 98 | 99 | if dfa.initialStates.isEmpty { 100 | XCTFail("No s_5") 101 | return 102 | } 103 | let s_5 = dfa.initialStates.first! 104 | if dfa.transitions[s_5]?[.Character("1")]?.isEmpty ?? true { 105 | XCTFail("No s_4") 106 | return 107 | } 108 | let s_4 = dfa.transitions[s_5]![.Character("1")]!.first! 109 | if dfa.transitions[s_4]?[.Character("0")]?.isEmpty ?? true { 110 | XCTFail("No s_1") 111 | return 112 | } 113 | let s_1 = dfa.transitions[s_4]![.Character("0")]!.first! 114 | if dfa.transitions[s_1]?[.Character("0")]?.isEmpty ?? true { 115 | XCTFail("No s_2") 116 | return 117 | } 118 | let s_2 = dfa.transitions[s_1]![.Character("0")]!.first! 119 | if dfa.transitions[s_2]?[.Character("1")]?.isEmpty ?? true { 120 | XCTFail("No s_3") 121 | return 122 | } 123 | let s_3 = dfa.transitions[s_2]![.Character("1")]!.first! 124 | if dfa.transitions[s_2]?[.Character("0")]?.isEmpty ?? true { 125 | XCTFail("No s_6") 126 | return 127 | } 128 | let s_6 = dfa.transitions[s_2]![.Character("0")]!.first! 129 | if dfa.transitions[s_4]?[.Character("1")]?.isEmpty ?? true { 130 | XCTFail("No s_10") 131 | return 132 | } 133 | let s_10 = dfa.transitions[s_4]![.Character("1")]!.first! 134 | if dfa.transitions[s_10]?[.Character("1")]?.isEmpty ?? true { 135 | XCTFail("No s_16") 136 | return 137 | } 138 | let s_16 = dfa.transitions[s_10]![.Character("1")]!.first! 139 | if dfa.transitions[s_16]?[.Character("1")]?.isEmpty ?? true { 140 | XCTFail("No s_15") 141 | return 142 | } 143 | let s_15 = dfa.transitions[s_16]![.Character("1")]!.first! 144 | if dfa.transitions[s_15]?[.Character("0")]?.isEmpty ?? true { 145 | XCTFail("No s_14") 146 | return 147 | } 148 | let s_14 = dfa.transitions[s_15]![.Character("0")]!.first! 149 | if dfa.transitions[s_14]?[.Character("1")]?.isEmpty ?? true { 150 | XCTFail("No s_13") 151 | return 152 | } 153 | let s_13 = dfa.transitions[s_14]![.Character("1")]!.first! 154 | if dfa.transitions[s_13]?[.Character("1")]?.isEmpty ?? true { 155 | XCTFail("No s_12") 156 | return 157 | } 158 | let s_12 = dfa.transitions[s_13]![.Character("1")]!.first! 159 | if dfa.transitions[s_12]?[.Character("0")]?.isEmpty ?? true { 160 | XCTFail("No s_11") 161 | return 162 | } 163 | let s_11 = dfa.transitions[s_12]![.Character("0")]!.first! 164 | if dfa.transitions[s_11]?[.Character("0")]?.isEmpty ?? true { 165 | XCTFail("No s_9") 166 | return 167 | } 168 | let s_9 = dfa.transitions[s_11]![.Character("0")]!.first! 169 | if dfa.transitions[s_1]?[.Character("1")]?.isEmpty ?? true { 170 | XCTFail("No s_8") 171 | return 172 | } 173 | let s_8 = dfa.transitions[s_1]![.Character("1")]!.first! 174 | if dfa.transitions[s_8]?[.Character("0")]?.isEmpty ?? true { 175 | XCTFail("No s_7") 176 | return 177 | } 178 | let s_7 = dfa.transitions[s_8]![.Character("0")]!.first! 179 | 180 | XCTAssertEqual(dfa.acceptingStates, [s_3, s_6, s_7, s_9, s_12, s_13, s_14, s_15]) 181 | XCTAssertEqual(dfa.transitions, [ 182 | s_1 : [ 183 | .Character("0") : [s_2], 184 | .Character("1") : [s_8] 185 | ], 186 | s_2 : [ 187 | .Character("0") : [s_6], 188 | .Character("1") : [s_3] 189 | ], 190 | s_3 : [ 191 | .Character("0") : [s_1], 192 | .Character("1") : [s_10] 193 | ], 194 | s_4 : [ 195 | .Character("0") : [s_1], 196 | .Character("1") : [s_10] 197 | ], 198 | s_5 : [ 199 | .Character("0") : [s_5], 200 | .Character("1") : [s_4] 201 | ], 202 | s_6 : [ 203 | .Character("0") : [s_5], 204 | .Character("1") : [s_4] 205 | ], 206 | s_7 : [ 207 | .Character("0") : [s_2], 208 | .Character("1") : [s_8] 209 | ], 210 | s_8 : [ 211 | .Character("0") : [s_7], 212 | .Character("1") : [s_12] 213 | ], 214 | s_9 : [ 215 | .Character("0") : [s_6], 216 | .Character("1") : [s_3] 217 | ], 218 | s_10 : [ 219 | .Character("0") : [s_11], 220 | .Character("1") : [s_16] 221 | ], 222 | s_11 : [ 223 | .Character("0") : [s_9], 224 | .Character("1") : [s_13] 225 | ], 226 | s_12 : [ 227 | .Character("0") : [s_11], 228 | .Character("1") : [s_16] 229 | ], 230 | s_13 : [ 231 | .Character("0") : [s_7], 232 | .Character("1") : [s_12] 233 | ], 234 | s_14 : [ 235 | .Character("0") : [s_9], 236 | .Character("1") : [s_13] 237 | ], 238 | s_15 : [ 239 | .Character("0") : [s_14], 240 | .Character("1") : [s_15] 241 | ], 242 | s_16 : [ 243 | .Character("0") : [s_14], 244 | .Character("1") : [s_15] 245 | ] 246 | ] as [State : [Symbol : Set]]) 247 | } 248 | 249 | static var allTests = [ 250 | ("testConversion1", testConversion1), 251 | ("testConversion2", testConversion2), 252 | ] 253 | } 254 | -------------------------------------------------------------------------------- /Tests/RegexTests/LexerTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // LexerTests.swift 3 | // RegexTests 4 | // 5 | 6 | @testable import Regex 7 | import XCTest 8 | 9 | class LexerTests: XCTestCase { 10 | func testCharacters() { 11 | let tokens = try! Lexer.lex("abcd") 12 | XCTAssertEqual(tokens, [ 13 | .Character("a"), 14 | .Character("b"), 15 | .Character("c"), 16 | .Character("d"), 17 | ]) 18 | } 19 | 20 | static var allTests = [ 21 | ("testCharacters", testCharacters), 22 | ] 23 | } 24 | -------------------------------------------------------------------------------- /Tests/RegexTests/OptimizerTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // OptimizerTests.swift 3 | // Regex 4 | // 5 | 6 | @testable import Regex 7 | import XCTest 8 | 9 | class OptimizerTests: XCTestCase { 10 | func testNOOP() { 11 | let input = Automaton() 12 | let output = Optimizer.optimize(input) 13 | XCTAssertEqual(output.initialStates, []) 14 | XCTAssertEqual(output.acceptingStates, []) 15 | XCTAssertEqual(output.transitions, [:]) 16 | } 17 | 18 | static var allTests = [ 19 | ("testNOOP", testNOOP), 20 | ] 21 | } 22 | -------------------------------------------------------------------------------- /Tests/RegexTests/ParserTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ParserTests.swift 3 | // RegexTests 4 | // 5 | 6 | @testable import Regex 7 | import XCTest 8 | 9 | class ParserTests: XCTestCase { 10 | func testCharacters() { 11 | let tokens: [Token] = [ 12 | .Character("a"), 13 | .Character("b"), 14 | .Character("c"), 15 | .Character("d"), 16 | ] 17 | let automata = try! Parser.parse(tokens) 18 | 19 | XCTAssertEqual(automata.initialStates, [1]) 20 | XCTAssertEqual(automata.acceptingStates, [5]) 21 | 22 | XCTAssertEqual(automata.transitions, [ 23 | 1 : [.Character("a") : [2]], 24 | 2 : [.Character("b") : [3]], 25 | 3 : [.Character("c") : [4]], 26 | 4 : [.Character("d") : [5]], 27 | ]) 28 | } 29 | 30 | static var allTests = [ 31 | ("testCharacters", testCharacters), 32 | ] 33 | } 34 | -------------------------------------------------------------------------------- /Tests/RegexTests/RegexTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // RegexTests.swift 3 | // Regex 4 | // 5 | 6 | @testable import Regex 7 | import XCTest 8 | 9 | class RegexTests: XCTestCase { 10 | func testTest() { 11 | } 12 | 13 | static var allTests = [ 14 | ("testTest", testTest), 15 | ] 16 | } 17 | --------------------------------------------------------------------------------