├── .gitignore ├── .travis.yml ├── LICENSE ├── Package.swift ├── README.md ├── Sources └── CompilerKit │ ├── DFA.swift │ ├── Grammar.swift │ ├── Helpers.swift │ ├── LALRParser.swift │ ├── LLParser.swift │ ├── LRParser.swift │ ├── Matcher.swift │ ├── NFA.swift │ ├── RegularExpression.swift │ ├── SLRParser.swift │ ├── ScalarClass.swift │ └── Tokenizer.swift └── Tests ├── CompilerKitTests ├── FiniteStateTests.swift └── GrammarTests.swift └── LinuxMain.swift /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | /.build 3 | /Packages 4 | /*.xcodeproj 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | os: osx 2 | osx_image: xcode9.3 3 | install: true 4 | script: swift test -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2018 Ahmad Alhashemi 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /Package.swift: -------------------------------------------------------------------------------- 1 | // swift-tools-version:4.0 2 | // The swift-tools-version declares the minimum version of Swift required to build this package. 3 | 4 | import PackageDescription 5 | 6 | let package = Package( 7 | name: "CompilerKit", 8 | products: [ 9 | .library( 10 | name: "CompilerKit", 11 | targets: ["CompilerKit"]), 12 | ], 13 | dependencies: [], 14 | targets: [ 15 | .target( 16 | name: "CompilerKit", 17 | dependencies: []), 18 | .testTarget( 19 | name: "CompilerKitTests", 20 | dependencies: ["CompilerKit"]), 21 | ] 22 | ) 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CompilerKit 2 | 3 | [![Build Status](https://travis-ci.org/hashemi/CompilerKit.svg?branch=master)](https://travis-ci.org/hashemi/CompilerKit) 4 | 5 | The goal of this project is to create a library of data structures and algorithms that can be used to build a compiler in Swift. 6 | 7 | ## Features 8 | 9 | Since this project is under active development, it's very likely that the following lists are incomplete. 10 | 11 | ### Data Structures 12 | 13 | - Classes of unicode scalars (`ScalarClass`). 14 | - Regular expression (`RegularExpression`). 15 | - Nondeterministic finite automata (`NFA`). 16 | - Deterministic finite automata (`DFA`). 17 | - Tokenizer (`Tokenizer`). 18 | - Grammar (`Grammar`). 19 | - LL parser (`LLParser`). 20 | - SLR parser (`LRParser`). 21 | - LALR parser (`LALRParser`). 22 | 23 | ### Functions/Algorithms 24 | 25 | - Matching a unicode scalar against a `ScalarClass`. 26 | - Derive an `NFA` from a `RegularExpression`. 27 | - Derive a `DFA` from an `NFA`. 28 | - Minimize a `DFA`. 29 | - Match a string against an `NFA` or `DFA` (i.e., execute finite state machine). 30 | - Create a matcher that takes pairs of `RegularExpression`s and tokens and returns the correct token for a string based on match. 31 | - Create a tokenizer from pairs of `RegularExpression`s and tokens as well as a `RegularExpression` representing trivia between tokens that then takes a string and breaks it into individual tokens, skipping the trivia in between them. 32 | - Eliminate left recursion from a grammar. 33 | - Perform left refactoring to eliminate backtracking. 34 | - Check if a grammar is backtracking-free. 35 | - Generate a table-driven LL(1) parser from a backtracking-free grammar, which reports whether an input was accepted or rejected. 36 | - Generate an DFA-backed SLR parser from a grammar, which reports whether an input was accepted or rejected. 37 | - Construct a DFA-backed LALR parser from a grammar using the DeRemer and Pennello algorithm, which reports whether an input was accepted or rejected. 38 | 39 | ## Example 40 | 41 | ```swift 42 | enum Token { 43 | case integer 44 | case decimal 45 | case identifier 46 | case unknown 47 | } 48 | 49 | let scanner: [(RegularExpression, Token)] = [ 50 | (.digit + .digit*, .integer), 51 | (.digit + .digit* + "." + .digit + .digit*, .decimal), 52 | (.alpha + .alphanum*, .identifier), 53 | ] 54 | 55 | let nfa = NFA(scanner: scanner, nonAcceptingValue: .unknown) 56 | let dfa = nfa.dfa 57 | let minimizedDfa = dfa.minimized 58 | 59 | 60 | minimizedDfa.match("134") // .integer 61 | minimizedDfa.match("61.613") // .decimal 62 | minimizedDfa.match("x1") // .identifier 63 | minimizedDfa.match("1xy") // .unknown 64 | ``` 65 | 66 | See the test suite for more usage examples. 67 | 68 | ## See Also 69 | 70 | ### Resources Used 71 | 72 | 1. [Engineering a Compiler](https://www.cs.rice.edu/~keith/Errata.html) 2nd ed by Keith Cooper and Linda Torczon. 73 | 74 | 2. [Algorithms](https://algs4.cs.princeton.edu/home/) 4th ed by Robert Sedgewick and Kevin Wayne. 75 | 76 | 3. [Stanford's Compilers Course](https://lagunita.stanford.edu/courses/Engineering/Compilers/Fall2014/about) by Alex Aiken. 77 | 78 | 4. [Compilers: Principles, Techniques, and Tools](https://en.wikipedia.org/wiki/Compilers:_Principles,_Techniques,_and_Tools) by Alfred V. Aho, Monica S. Lam, Ravi Sethi, and Jeffrey D. Ullman. 79 | 80 | 5. [Efficient Computation of LALR(1) Look-Ahead Sets](https://dl.acm.org/citation.cfm?id=357187) by Frank DeRemer and Thomas Pennello. 81 | 82 | 6. [Modern Compiler Implementation in C](https://www.cs.princeton.edu/~appel/modern/c/) by Maia Ginsburg and Andrew W. Appel. 83 | 84 | ### My other projects, leading up to this 85 | 86 | 1. [slox](https://github.com/hashemi/slox) - Hand written scanner, recursive descent parser, and a tree-walking interpreter in Swift. See for a demonstration of using Swift's algebraic data types (`enum`s and `struct`s) to represent and render code. Implements the [lox programming language](http://www.craftinginterpreters.com). Ported from Java. 87 | 88 | 2. [bslox](https://github.com/hashemi/bslox) - Very early work-in-progress of what will eventually be a bytecode compiler and virtual machine of lox. Will be porting this from C. 89 | 90 | 3. [FlyingMonkey](https://github.com/hashemi/FlyingMonkey) - Hand written scanner and Pratt parser of the [monkey programming language](https://interpreterbook.com). Ported from Go. 91 | 92 | 4. [Sift](https://github.com/hashemi/Sift) - Hand written scanner and parser of [subset of Scheme](https://en.wikibooks.org/wiki/Write_Yourself_a_Scheme_in_48_Hours). Ported from Haskell. 93 | 94 | 5. [sparrow](https://github.com/hashemi/sparrow/blob/master/sparrow/Lexer.swift) - Hand written scanner of the Swift scanner from the official Swift compiler. Ported from the C++ to Swift. See for an example of a complex scanner/lexer with support for rewinding to arbitrary points in the input. 95 | 96 | ## License 97 | MIT -------------------------------------------------------------------------------- /Sources/CompilerKit/DFA.swift: -------------------------------------------------------------------------------- 1 | struct DFA { 2 | typealias Element = M.Element 3 | 4 | var alphabet: Set { 5 | return Set(transitions.keys) 6 | } 7 | 8 | let states: Int 9 | let transitions: [M: [(Int, Int)]] 10 | let initial: Int 11 | let accepting: [Int: Output] 12 | let nonAcceptingValue: Output 13 | 14 | func match(_ elements: S) -> Output where S.Element == Element { 15 | var state = initial 16 | for element in elements { 17 | guard let matcher = alphabet.first(where: { $0 ~= element }) else { 18 | return nonAcceptingValue 19 | } 20 | 21 | guard let newState = transitions[matcher]?.first(where: { $0.0 == state })?.1 else { 22 | return nonAcceptingValue 23 | } 24 | state = newState 25 | } 26 | return accepting[state] ?? nonAcceptingValue 27 | } 28 | 29 | func prefixMatch(_ elements: C) -> (Output, C.SubSequence) where C.Element == Element { 30 | var state = initial 31 | var result = (nonAcceptingValue, elements.prefix(upTo: elements.startIndex)) 32 | 33 | for idx in elements.indices { 34 | let element = elements[idx] 35 | guard let matcher = alphabet.first(where: { $0 ~= element }) else { 36 | break 37 | } 38 | 39 | guard let newState = transitions[matcher]?.first(where: { $0.0 == state })?.1 else { 40 | break 41 | } 42 | state = newState 43 | if let newOutput = accepting[state] { 44 | result = (newOutput, elements.prefix(through: idx)) 45 | } 46 | } 47 | 48 | return result 49 | } 50 | } 51 | 52 | extension DFA { 53 | init(_ nfa: NFA) where Output == Set { 54 | // precompute and cache epsilon closures 55 | let epsilonClosures = nfa.epsilonClosures 56 | 57 | func epsilonClosure(from states: Set) -> Set { 58 | var all = Set() 59 | for v in states { 60 | all.formUnion(epsilonClosures[v]) 61 | } 62 | return all 63 | } 64 | 65 | let alphabet = nfa.alphabet 66 | let q0 = epsilonClosures[nfa.initial] 67 | var Q: [Set] = [q0] 68 | var worklist = [(0, q0)] 69 | var transitions: [M: [(Int, Int)]] = [:] 70 | var accepting: [Int: Set] = [0: Set(q0.compactMap { nfa.accepting[$0] })] 71 | while let (qpos, q) = worklist.popLast() { 72 | for matcher in alphabet { 73 | let t = nfa.epsilonClosure(from: nfa.reachable(from: q, via: matcher)) 74 | if t.isEmpty { continue } 75 | let position = Q.index(of: t) ?? Q.count 76 | if position == Q.count { 77 | Q.append(t) 78 | worklist.append((position, t)) 79 | accepting[Q.count - 1] = Set(t.compactMap({ nfa.accepting[$0] })) 80 | } 81 | transitions[matcher, default: []].append((qpos, position)) 82 | } 83 | } 84 | 85 | self.init( 86 | states: Q.count, 87 | transitions: transitions, 88 | initial: 0, // this is always zero since q0 is always the first item in Q 89 | accepting: accepting, 90 | nonAcceptingValue: Set() 91 | ) 92 | } 93 | 94 | init?(consistent nfa: NFA, nonAcceptingValue: Output) { 95 | let dfa = DFA, M>(nfa) 96 | 97 | var accepting: [Int: Output] = [:] 98 | for (k,v) in dfa.accepting { 99 | switch v.count { 100 | case 0: break 101 | case 1: accepting[k] = v.first! 102 | default: return nil 103 | } 104 | } 105 | 106 | self.states = dfa.states 107 | self.transitions = dfa.transitions 108 | self.initial = dfa.initial 109 | self.accepting = accepting 110 | self.nonAcceptingValue = nonAcceptingValue 111 | } 112 | } 113 | 114 | // minimal dfa (Hopcroft's Algorithm) 115 | extension DFA { 116 | var minimized: DFA { 117 | // create a canonical partition per unique accepting value 118 | let acceptingPartition = Dictionary(uniqueKeysWithValues: 119 | Set(self.accepting.values) 120 | .enumerated() 121 | .map { ($0.element, $0.offset + 1) } 122 | ) 123 | 124 | // 0 = non-accepting states, otherwise location is determined by acceptingPartition 125 | var partition = (0.. Int in 126 | guard let acceptingValue = self.accepting[s] else { return 0 } 127 | return acceptingPartition[acceptingValue]! 128 | } 129 | 130 | var partitionCount = acceptingPartition.count + 1 131 | 132 | let alphabet = self.alphabet 133 | func split() { 134 | for matcher in alphabet { 135 | // -1: not set yet, -2: no path exists from this partition for this scalar 136 | var partitionTarget = Array(repeating: -1, count: partitionCount) 137 | var newPartition = Array(repeating: -1, count: partitionCount) 138 | for x in 0.. { 2 | enum Node: Hashable { 3 | case nt(Int) 4 | case t(T) 5 | } 6 | 7 | var productions: [[[Node]]] 8 | var start: Int 9 | 10 | var augmented: Grammar { 11 | var new = self 12 | new.productions.append([[.nt(new.start)]]) 13 | new.start = new.productions.count - 1 14 | return new 15 | } 16 | 17 | mutating func eliminateLeftRecursion() { 18 | for i in 0.. 0 { 20 | // find productions starting with a preceeding NT 21 | // as they could lead to indirect left recursion 22 | for j in 0.. 1 { 83 | // save common prefix 84 | let commonPrefix = productions[s][matchingProductions.first!].prefix(upTo: prefixLength) 85 | 86 | // save matching productions with their common prefix removed 87 | let matchingProductionsWithoutCommonPrefix = matchingProductions.map { 88 | Array(productions[s][$0][prefixLength...]) 89 | } 90 | 91 | // create a new NT for the common factor 92 | let newNt = productions.count 93 | productions.append(matchingProductionsWithoutCommonPrefix) 94 | 95 | productions[s] = productions[s] 96 | .enumerated() 97 | .filter { !matchingProductions.contains($0.offset) } 98 | .map { $0.element } 99 | + [commonPrefix + [.nt(newNt)]] 100 | 101 | break 102 | } 103 | } 104 | } 105 | if productions == lastProductions { break } 106 | } 107 | } 108 | 109 | func nullable() -> [Set] { 110 | var nullable: [Set] = Array(repeating: Set(), count: productions.count) 111 | 112 | func nodeIsNullabe(_ n: Node) -> Bool { 113 | switch n { 114 | case .t(_): return false 115 | case let .nt(nt): return !nullable[nt].isEmpty 116 | } 117 | } 118 | 119 | while true { 120 | let lastValue = nullable 121 | for s in 0..]) -> [[T: Set]] { 136 | precondition(nullable.count == productions.count) 137 | var first: [[T: Set]] = Array(repeating: [:], count: productions.count) 138 | 139 | func firstByNode(_ n: Node) -> Set { 140 | switch n { 141 | case let .t(t): return Set([t]) 142 | case let .nt(nt): return Set(first[nt].keys) 143 | } 144 | } 145 | 146 | while true { 147 | let lastValue = first 148 | for s in 0.. = firstByNode(p.first!) 153 | 154 | for node in p { 155 | if case let .nt(nt) = node, !nullable[nt].isEmpty { 156 | // accumulate first sets of nonterminal nodes with nullable productions... 157 | rhs.formUnion(firstByNode(node)) 158 | } else { 159 | // ...until we hit the first terminal or non-nullable 160 | break 161 | } 162 | } 163 | 164 | for t in rhs { 165 | first[s][t, default: []].insert(pIdx) 166 | } 167 | } 168 | } 169 | if first == lastValue { break } 170 | } 171 | 172 | return first 173 | } 174 | 175 | func follow(nullable: [Set], first: [[T: Set]]) -> [Set] { 176 | precondition(nullable.count == productions.count) 177 | precondition(first.count == productions.count) 178 | var follow = Array(repeating: Set(), count: productions.count) 179 | 180 | while true { 181 | let lastValue = follow 182 | for s in 0..], first: [[T: Set]], follow: [Set]) -> Bool { 208 | precondition(nullable.count == productions.count) 209 | precondition(first.count == productions.count) 210 | precondition(follow.count == productions.count) 211 | 212 | for s in 0.. 1 }) { 215 | return false 216 | } 217 | 218 | // we can only have production that can be empty 219 | if nullable[s].count > 1 { return false } 220 | 221 | // if we do have one empty production, we need to make sure that 222 | // non of the terminals that can follow this term is also part of 223 | // the first set of one of its productions 224 | if nullable[s].count == 1 { 225 | if !follow[s].isDisjoint(with: first[s].keys) { 226 | return false 227 | } 228 | } 229 | } 230 | 231 | return true 232 | } 233 | } 234 | -------------------------------------------------------------------------------- /Sources/CompilerKit/Helpers.swift: -------------------------------------------------------------------------------- 1 | extension Dictionary { 2 | init(_ keys: Set, _ value: (Key) -> Value) { 3 | self.init(uniqueKeysWithValues: keys.map { ($0, value($0)) }) 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /Sources/CompilerKit/LALRParser.swift: -------------------------------------------------------------------------------- 1 | private extension Grammar { 2 | subscript(_ item: LRParser.Item) -> Node? { 3 | let prod = productions[item.term][item.production] 4 | guard item.position < prod.count else { return nil } 5 | return prod[item.position] 6 | } 7 | } 8 | 9 | extension LRParser { 10 | init(lalr g: Grammar) { 11 | let grammar = g.augmented 12 | 13 | let startItem = Item(term: grammar.productions.count - 1, production: 0, position: 0) 14 | let allNodes = Set(grammar.productions.flatMap { $0.flatMap { $0 } }) 15 | let nullable = grammar.nullable() 16 | let itemSets = LRParser.itemSets(grammar, startItem, allNodes) 17 | let allTransitions = LRParser.allTransitions(grammar, itemSets) 18 | 19 | let directRead = Dictionary(allTransitions) { LRParser.directRead(grammar, $0) } 20 | 21 | let transitionReads = Dictionary(allTransitions) { LRParser.reads(grammar, nullable, $0) } 22 | 23 | let reads = LRParser.digraph(allTransitions, transitionReads, directRead) 24 | 25 | let transitionIncludes = Dictionary(allTransitions) { LRParser.includes(grammar, nullable, $0, allTransitions) } 26 | 27 | let follow = LRParser.digraph(allTransitions, transitionIncludes, reads) 28 | 29 | // make a list of all possible reduction items: [A -> w.] 30 | var reductions: [(Set, Item)] = [] 31 | let prods = grammar.productions 32 | for term in 0..: [Item: Set]] = [:] 42 | for (state, reduction) in reductions { 43 | lookbacks[state, default: [:]][reduction, default: []] = LRParser.lookback(grammar, state, reduction, allTransitions) 44 | } 45 | 46 | var lookaheads: [Set: [Item: Set]] = [:] 47 | for (state, reduction) in reductions { 48 | lookaheads[state] = [reduction: []] 49 | for transition in lookbacks[state]![reduction]! { 50 | lookaheads[state]![reduction]!.formUnion(follow[transition]!) 51 | } 52 | } 53 | 54 | // now we (very inefficiently) build a DFA out of that 55 | let orderedItemSets = Array(itemSets) 56 | func state(for itemSet: Set) -> Int { 57 | return orderedItemSets.index(of: itemSet)! 58 | } 59 | 60 | let startState = state(for: LRParser.closure(grammar, [startItem])) 61 | let finalState = state(for: [Item(term: grammar.productions.count - 1, production: 0, position: 1)]) 62 | 63 | var transitions: [Node: [(Int, Int)]] = [:] 64 | for from in itemSets { 65 | for x in allNodes { 66 | let to = LRParser.goto(grammar, from, x) 67 | if !to.isEmpty { 68 | transitions[x, default: []].append((state(for: from), state(for: to))) 69 | } 70 | } 71 | } 72 | 73 | var accepting: [Int: Set] = [:] 74 | for itemSet in itemSets { 75 | let s = state(for: itemSet) 76 | 77 | // if this is a final state, accept, cannot do anything else 78 | if s == finalState { 79 | accepting[s] = [.accept] 80 | continue 81 | } 82 | 83 | if let possibleReductions = lookaheads[itemSet] { 84 | for (reduction, allowedLookaheads) in possibleReductions { 85 | accepting[s, default: []].insert(.reduce(reduction.term, reduction.position, allowedLookaheads)) 86 | } 87 | 88 | // the item set also includes non-reduce items, so it can also shift 89 | if itemSet.count > possibleReductions.count { 90 | accepting[s, default: []].insert(.shift) 91 | } 92 | } else { 93 | // no reductions, so the only possible action here is to shift 94 | accepting[s] = [.shift] 95 | } 96 | } 97 | 98 | // "we have a parser." 99 | dfa = DFA( 100 | states: itemSets.count, 101 | transitions: transitions, 102 | initial: startState, 103 | accepting: accepting, 104 | nonAcceptingValue: [Action.error] 105 | ).minimized 106 | } 107 | 108 | static func closure(_ grammar: Grammar, _ I: Set) -> Set { 109 | var J = I 110 | var lastCount: Int 111 | repeat { 112 | lastCount = J.count 113 | for j in J { 114 | if let node = grammar[j] { 115 | if case let .nt(nt) = node { 116 | for x in 0.., _ I: Set, _ X: Node) -> Set { 127 | var G: Set = [] 128 | for i in I { 129 | if let node = grammar[i], node == X { 130 | G.insert(i.next) 131 | } 132 | } 133 | 134 | return closure(grammar, G) 135 | } 136 | 137 | static func goto(_ grammar: Grammar, _ t: Transition) -> Set { 138 | return goto(grammar, t.state, .nt(t.nt)) 139 | } 140 | 141 | static func itemSets(_ grammar: Grammar, _ startItem: Item, _ allNodes: Set) -> Set> { 142 | var C: Set> = [closure(grammar, [startItem])] 143 | 144 | var lastCount = 0 145 | while lastCount != C.count { 146 | lastCount = C.count 147 | for I in C { 148 | for x in allNodes { 149 | let g = goto(grammar, I, x) 150 | if !g.isEmpty { C.insert(g) } 151 | } 152 | } 153 | } 154 | 155 | return C 156 | } 157 | 158 | static func allTransitions(_ grammar: Grammar, _ itemSets: Set>) -> Set { 159 | var transitions: Set = [] 160 | 161 | for itemSet in itemSets { 162 | for i in itemSet { 163 | if case let .nt(nt)? = grammar[i] { 164 | transitions.insert(Transition(state: itemSet, nt: nt)) 165 | } 166 | } 167 | } 168 | 169 | return transitions 170 | } 171 | 172 | static func directRead(_ grammar: Grammar, _ t: Transition) -> Set { 173 | var terminals: Set = [] 174 | 175 | let G = goto(grammar, t) 176 | for i in G { 177 | if case let .t(terminal)? = grammar[i] { 178 | terminals.insert(terminal) 179 | } 180 | } 181 | 182 | return terminals 183 | } 184 | 185 | static func reads(_ grammar: Grammar, _ nullable: [Set], _ t: Transition) -> Set { 186 | var relations: Set = [] 187 | 188 | let g = goto(grammar, t) 189 | for i in g { 190 | guard case let .nt(nt)? = grammar[i.next] else { continue } 191 | 192 | if !nullable[nt].isEmpty { 193 | relations.insert(Transition(state: g, nt: nt)) 194 | } 195 | } 196 | 197 | return relations 198 | } 199 | 200 | // 't' is (p, A) in DeRemer & Pennello's description of includes 201 | static func includes(_ grammar: Grammar, _ nullable: [Set], _ t: Transition, _ allTransitions: Set) -> Set { 202 | var includes: Set = [] 203 | 204 | func tailNullable(_ i: Item) -> Bool { 205 | let prod = grammar.productions[i.term][i.production] 206 | 207 | // if item is last in a production, the tail is empty 208 | // and therefore is nullable 209 | guard i.position < prod.count else { return true } 210 | 211 | let nodes = prod[i.position.. β A ɣ] 226 | for initialItem in pre.state where initialItem.term == pre.nt { 227 | // check all possible (q, C) transitions we can take from this item 228 | // is our 't' one of them? 229 | var item = initialItem 230 | var q = pre.state 231 | while let node = grammar[item] { 232 | if case let .nt(nt) = node { 233 | if Transition(state: q, nt: nt) == t { 234 | // we just got to (p, A) from 'pre' 235 | // this means that this item is [B -> β .A ɣ] 236 | // if ɣ is nullable, the (p, A) includes (p', B) 237 | // i.e., 't' includes 'pre' 238 | if tailNullable(item.next) { 239 | includes.insert(pre) 240 | } 241 | } 242 | } 243 | 244 | q = goto(grammar, q, node) 245 | item = item.next 246 | } 247 | 248 | } 249 | } 250 | 251 | return includes 252 | } 253 | 254 | static func lookback(_ grammar: Grammar, _ q: Set, _ reduction: Item, _ allTransitions: Set) -> Set { 255 | let w = grammar.productions[reduction.term][reduction.production] 256 | // a reduction is represented by an item with the dot in the far right 257 | // [A -> w.] 258 | precondition(reduction.position == w.count) 259 | precondition(q.contains(reduction)) 260 | 261 | var lookback: Set = [] 262 | 263 | // check every transition (p, A) where A is the reductions lhs 264 | for t in allTransitions where t.nt == reduction.term { 265 | // check if we can spell a path from t.state (p) to (q) using w 266 | var g = t.state 267 | for n in w { 268 | g = goto(grammar, g, n) 269 | } 270 | 271 | // if this was a valid path, we will find ourselves at q 272 | if g == q { 273 | lookback.insert(t) 274 | } 275 | } 276 | 277 | return lookback 278 | } 279 | 280 | static func digraph( 281 | _ input: Set, 282 | _ relation: [Input: Set], 283 | _ fp: [Input: Set]) -> [Input: Set] { 284 | 285 | var stack: [Input] = [] 286 | var result: [Input: Set] = [:] 287 | var n = Dictionary(input) { _ in 0 } 288 | 289 | func traverse(_ x: Input) { 290 | stack.append(x) 291 | let d = stack.count 292 | n[x] = d 293 | result[x] = fp[x]! 294 | for y in relation[x]! { 295 | if n[y] == 0 { traverse(y) } 296 | n[x] = min(n[x]!, n[y]!) 297 | result[x]!.formUnion(result[y]!) 298 | } 299 | if n[x] == d { 300 | repeat { 301 | n[stack.last!] = Int.max 302 | result[stack.last!] = result[x] 303 | } while stack.popLast() != x 304 | } 305 | } 306 | 307 | for x in input where n[x] == 0 { 308 | traverse(x) 309 | } 310 | 311 | return result 312 | } 313 | } 314 | -------------------------------------------------------------------------------- /Sources/CompilerKit/LLParser.swift: -------------------------------------------------------------------------------- 1 | struct LLParser { 2 | let grammar: Grammar 3 | let nullable: [Set] 4 | let first: [[T: Set]] 5 | let follow: [Set] 6 | let table: [[T: Int]] 7 | 8 | init(_ g: Grammar) { 9 | var g = g 10 | 11 | // get the grammar ready for LL parsing 12 | g.eliminateLeftRecursion() 13 | g.leftRefactor() 14 | 15 | nullable = g.nullable() 16 | first = g.first(nullable: nullable) 17 | follow = g.follow(nullable: nullable, first: first) 18 | 19 | let isBacktrackFree = g.isBacktrackFree(nullable: nullable, first: first, follow: follow) 20 | precondition(isBacktrackFree, 21 | "Cannot initialize an LL parser for a non-backtrack free grammar") 22 | 23 | var table: [[T: Int]] = Array(repeating: [:], count: g.productions.count) 24 | 25 | for nt in 0.. Bool { 43 | var current = 0 44 | 45 | func advance() { current += 1 } 46 | 47 | func peek() -> T? { 48 | guard current < words.count else { return nil } 49 | return words[current] 50 | } 51 | 52 | var stack: [Grammar.Node] = [.nt(self.grammar.start)] 53 | 54 | while let focus = stack.popLast() { 55 | guard let word = peek() else { 56 | // unexpected end of input 57 | return false 58 | } 59 | switch focus { 60 | case let .t(t): 61 | guard t == word else { 62 | // unexpected word 63 | return false 64 | } 65 | advance() 66 | 67 | case let .nt(nt): 68 | guard let p = table[nt][word] else { 69 | // unexpected word 70 | return false 71 | } 72 | 73 | stack.append(contentsOf: grammar.productions[nt][p].reversed()) 74 | } 75 | } 76 | 77 | if peek() != nil { 78 | // input contains unconsumed words at the end 79 | return false 80 | } 81 | 82 | return true 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /Sources/CompilerKit/LRParser.swift: -------------------------------------------------------------------------------- 1 | extension Grammar.Node: Matcher { 2 | typealias Element = Grammar.Node 3 | 4 | static func ~=(pattern: Element, value: Element) -> Bool { 5 | return pattern == value 6 | } 7 | } 8 | 9 | struct LRParser { 10 | typealias Node = Grammar.Node 11 | 12 | struct Item: Hashable { 13 | let term: Int 14 | let production: Int 15 | let position: Int 16 | 17 | var next: Item { 18 | return Item(term: term, production: production, position: position + 1) 19 | } 20 | } 21 | 22 | // (p, A) where p is state and A is nt 23 | struct Transition: Hashable { 24 | let state: Set 25 | let nt: Int 26 | } 27 | 28 | enum Action: Hashable { 29 | case shift 30 | case reduce(Int, Int, Set) 31 | case accept 32 | case error 33 | } 34 | 35 | let dfa: DFA, Node> 36 | 37 | func parse(_ elements: S) -> Bool where S.Element == T { 38 | var stack: [Node] = [] 39 | var it = elements.makeIterator() 40 | 41 | var lookahead = it.next() 42 | func advance() -> T? { 43 | let current = lookahead 44 | lookahead = it.next() 45 | return current 46 | } 47 | 48 | func perform(_ action: Action) -> Bool { 49 | switch action { 50 | case .shift: 51 | guard let t = advance() else { return false } 52 | stack.append(.t(t)) 53 | case let .reduce(nt, size, _): 54 | stack.removeLast(size) 55 | stack.append(.nt(nt)) 56 | case .accept: 57 | guard lookahead == nil else { return false } 58 | case .error: 59 | return false 60 | } 61 | 62 | return true 63 | } 64 | 65 | while true { 66 | let actions = dfa.match(stack) 67 | let action: Action 68 | 69 | switch actions.count { 70 | case 0: action = .error 71 | case 1: action = actions.first! 72 | default: 73 | // we have a reduce/reduce or shift/reduce conflict 74 | // is there any viable reduce among the possible actions? 75 | let viableReduce = actions.first { action in 76 | if case let .reduce(_, _, la) = action { 77 | if let lookahead = lookahead { 78 | return la.contains(lookahead) 79 | } 80 | return true 81 | } 82 | return false 83 | } 84 | 85 | if let reduce = viableReduce { 86 | action = reduce 87 | } else if actions.contains(.shift) { 88 | action = .shift 89 | } else { 90 | action = .error 91 | } 92 | } 93 | 94 | if perform(action) { 95 | if action == .accept { return true } 96 | } else { 97 | return false 98 | } 99 | } 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /Sources/CompilerKit/Matcher.swift: -------------------------------------------------------------------------------- 1 | protocol Matcher { 2 | associatedtype Element 3 | 4 | static func ~=(pattern: Self, value: Element) -> Bool 5 | } 6 | -------------------------------------------------------------------------------- /Sources/CompilerKit/NFA.swift: -------------------------------------------------------------------------------- 1 | struct NFA { 2 | typealias Element = M.Element 3 | 4 | let states: Int 5 | let transitions: [M: [(Int, Int)]] 6 | let epsilonTransitions: [Int: [Int]] 7 | let initial: Int 8 | let accepting: [Int: Output] 9 | 10 | var epsilonClosures: [Set] { 11 | var epsilonClosures: [Set] = [] 12 | 13 | for v in 0..() 15 | 16 | func dfs(_ s: Int) { 17 | marked.insert(s) 18 | for w in epsilonTransitions[s, default: []] { 19 | if !marked.contains(w) { dfs(w) } 20 | } 21 | } 22 | 23 | dfs(v) 24 | 25 | epsilonClosures.append(marked) 26 | } 27 | 28 | return epsilonClosures 29 | } 30 | 31 | var alphabet: Dictionary.Keys { 32 | return transitions.keys 33 | } 34 | 35 | func epsilonClosure(from states: Set) -> Set { 36 | var marked = Set() 37 | 38 | func dfs(_ s: Int) { 39 | marked.insert(s) 40 | for w in epsilonTransitions[s, default: []] { 41 | if !marked.contains(w) { dfs(w) } 42 | } 43 | } 44 | 45 | for s in states { 46 | if !marked.contains(s) { dfs(s) } 47 | } 48 | 49 | return marked 50 | } 51 | 52 | func reachable(from states: Set, via matcher: M) -> Set { 53 | var set = Set() 54 | for (from, to) in transitions[matcher, default: []] { 55 | if states.contains(from) { 56 | set.insert(to) 57 | } 58 | } 59 | return set 60 | } 61 | 62 | func match(_ elements: S) -> Set where S.Element == Element { 63 | var states = Set() 64 | states.insert(initial) 65 | for element in elements { 66 | // add all states reachable by epsilon transitions 67 | states = epsilonClosure(from: states) 68 | 69 | guard let matcher = alphabet.first(where: { $0 ~= element }) else { 70 | return [] 71 | } 72 | 73 | // new set of states as allowed by current element in string 74 | states = reachable(from: states, via: matcher) 75 | 76 | if states.isEmpty { return [] } 77 | } 78 | return Set(states.compactMap { self.accepting[$0] }) 79 | } 80 | 81 | func offset(by offset: Int) -> NFA { 82 | return NFA( 83 | states: states + offset, 84 | transitions: transitions.mapValues { $0.map { from, to in (from + offset, to + offset) } }, 85 | epsilonTransitions: Dictionary(uniqueKeysWithValues: epsilonTransitions.map { ($0.key + offset, $0.value.map { $0 + offset }) }), 86 | initial: initial + offset, 87 | accepting: Dictionary(uniqueKeysWithValues: accepting.map { ($0.key + offset, $0.value) }) 88 | ) 89 | } 90 | } 91 | 92 | extension NFA where M == ScalarClass { 93 | init(alternatives: [NFA]) { 94 | let commonInitial = 0 95 | var states = 1 96 | var transitions: [ScalarClass: [(Int, Int)]] = [:] 97 | var epsilonTransitions: [Int: [Int]] = [:] 98 | var accepting: [Int: Output] = [:] 99 | 100 | for nfa in alternatives { 101 | let offset = nfa.offset(by: states) 102 | transitions.merge(offset.transitions, uniquingKeysWith: { $0 + $1 }) 103 | epsilonTransitions.merge(offset.epsilonTransitions, uniquingKeysWith: { first, _ in first }) 104 | epsilonTransitions[commonInitial, default: []].append(offset.initial) 105 | accepting.merge(offset.accepting, uniquingKeysWith: { first, _ in first }) 106 | states = offset.states 107 | } 108 | 109 | self.init( 110 | states: states, 111 | transitions: transitions, 112 | epsilonTransitions: epsilonTransitions, 113 | initial: commonInitial, 114 | accepting: accepting 115 | ) 116 | } 117 | 118 | init(scanner: [(RegularExpression, Output)]) { 119 | let alternatives = scanner.map { NFA(re: $0.0, acceptingValue: $0.1) } 120 | self.init(alternatives: alternatives) 121 | } 122 | } 123 | 124 | // DFA from NFA (subset construction) 125 | extension NFA { 126 | var dfa: DFA, M> { return DFA(self) } 127 | } 128 | 129 | // Initialize NFA from RE 130 | extension NFA where M == ScalarClass { 131 | init(re: RegularExpression, acceptingValue: Output) { 132 | switch re { 133 | case .scalarClass(let scalarClass): 134 | self.init( 135 | states: 2, 136 | transitions: [scalarClass: [(0, 1)]], 137 | epsilonTransitions: [:], 138 | initial: 0, 139 | accepting: [1: acceptingValue] 140 | ) 141 | 142 | 143 | case .concatenation(let re1, let re2): 144 | let nfa1 = NFA(re: re1, acceptingValue: acceptingValue) 145 | let nfa2 = NFA(re: re2, acceptingValue: acceptingValue) 146 | 147 | // nfa1 followed by nfa2 with episilon transition between them 148 | let nfa2offset = nfa2.offset(by: nfa1.states) 149 | let transitions = nfa1.transitions 150 | .merging(nfa2offset.transitions, uniquingKeysWith: { $0 + $1 }) 151 | let epsilonTransitions = nfa1.epsilonTransitions 152 | .merging(nfa2offset.epsilonTransitions, uniquingKeysWith: { $0 + $1 }) 153 | .merging( 154 | nfa1.accepting.keys.map { ($0, [nfa2offset.initial]) }, 155 | uniquingKeysWith: { $0 + $1 }) 156 | 157 | self.init( 158 | states: nfa2offset.states, 159 | transitions: transitions, 160 | epsilonTransitions: epsilonTransitions, 161 | initial: nfa1.initial, 162 | accepting: nfa2offset.accepting 163 | ) 164 | 165 | 166 | case .alternation(let re1, let re2): 167 | let nfa1 = NFA(re: re1, acceptingValue: acceptingValue) 168 | let nfa2 = NFA(re: re2, acceptingValue: acceptingValue) 169 | 170 | // create a common initial state that points to each nfa's initial 171 | // with an epsilon edge and a combined accepting dictionary 172 | let nfa1offset = nfa1.offset(by: 1) 173 | let nfa2offset = nfa2.offset(by: nfa1.states + 1) 174 | 175 | let states = nfa2offset.states 176 | let initial = 0 177 | 178 | let transitions = nfa1offset.transitions 179 | .merging(nfa2offset.transitions, uniquingKeysWith: { $0 + $1 }) 180 | 181 | let epsilonTransitions = nfa1offset.epsilonTransitions 182 | .merging(nfa2offset.epsilonTransitions, uniquingKeysWith: { $0 + $1 }) 183 | .merging([(0, [nfa1offset.initial, nfa2offset.initial])], uniquingKeysWith: { $0 + $1 }) 184 | 185 | let accepting = nfa1offset.accepting.merging(nfa2offset.accepting, uniquingKeysWith: { first, _ in first }) 186 | 187 | self.init( 188 | states: states, 189 | transitions: transitions, 190 | epsilonTransitions: epsilonTransitions, 191 | initial: initial, 192 | accepting: accepting 193 | ) 194 | 195 | 196 | case .closure(let re): 197 | let nfa = NFA(re: re, acceptingValue: acceptingValue) 198 | 199 | // turn nfa into a closure by: 200 | // - make intial state accepting, to allow skipping the NFA (zero occurences) 201 | // - looping over NFA many times by connecting NFAs accepting states to its initial state 202 | let accepting = nfa.accepting.merging([nfa.initial: acceptingValue], uniquingKeysWith: { first, _ in first }) 203 | let epsilonTransitions = nfa.epsilonTransitions 204 | .merging( 205 | nfa.accepting.keys.map { ($0, [nfa.initial]) }, uniquingKeysWith: { $0 + $1 }) 206 | 207 | self.init( 208 | states: nfa.states, 209 | transitions: nfa.transitions, 210 | epsilonTransitions: epsilonTransitions, 211 | initial: nfa.initial, 212 | accepting: accepting 213 | ) 214 | } 215 | } 216 | } 217 | -------------------------------------------------------------------------------- /Sources/CompilerKit/RegularExpression.swift: -------------------------------------------------------------------------------- 1 | indirect enum RegularExpression { 2 | case scalarClass(ScalarClass) 3 | case alternation(RegularExpression, RegularExpression) 4 | case concatenation(RegularExpression, RegularExpression) 5 | case closure(RegularExpression) 6 | } 7 | 8 | // A more convenient way for building a regular expression in Swift code 9 | postfix operator * 10 | 11 | extension RegularExpression: ExpressibleByUnicodeScalarLiteral { 12 | init(unicodeScalarLiteral scalar: UnicodeScalar) { 13 | self = .scalarClass(.single(scalar)) 14 | } 15 | 16 | static func +(lhs: RegularExpression, rhs: RegularExpression) -> RegularExpression { 17 | return .concatenation(lhs, rhs) 18 | } 19 | 20 | static func |(lhs: RegularExpression, rhs: RegularExpression) -> RegularExpression { 21 | return .alternation(lhs, rhs) 22 | } 23 | 24 | static postfix func *(re: RegularExpression) -> RegularExpression { 25 | return .closure(re) 26 | } 27 | 28 | static let digit: RegularExpression = .scalarClass(.range("0", "9")) 29 | 30 | static let lowercase: RegularExpression = .scalarClass(.range("a", "z")) 31 | 32 | static let uppercase: RegularExpression = .scalarClass(.range("A", "Z")) 33 | 34 | static let alpha: RegularExpression = .lowercase | .uppercase 35 | 36 | static let alphanum: RegularExpression = .alpha | .digit 37 | } 38 | 39 | // Derive an NFA from a regular expression (Thompson's Construction) 40 | extension RegularExpression { 41 | var nfa: NFA { 42 | return NFA(re: self, acceptingValue: true) 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /Sources/CompilerKit/SLRParser.swift: -------------------------------------------------------------------------------- 1 | extension LRParser { 2 | init(slr g: Grammar) { 3 | let grammar = g.augmented 4 | 5 | // construct the LR(0) state machine 6 | let nullable = grammar.nullable() 7 | let first = grammar.first(nullable: nullable) 8 | let follow = grammar.follow(nullable: nullable, first: first) 9 | 10 | var items: [Item] = [] 11 | var transitions: [Node: [(Int, Int)]] = [:] 12 | var accepting: [Int: Action] = [:] 13 | 14 | let prods = grammar.productions 15 | 16 | var nonterminalProductionStartingItems: [[Int]] = [] 17 | 18 | // add all LR(0) items with transitions through each production 19 | for s in 0.. Bool { 8 | switch pattern { 9 | case let .single(scalar): 10 | return value == scalar 11 | 12 | case let .range(from, to): 13 | return from <= value && value <= to 14 | } 15 | } 16 | } 17 | 18 | -------------------------------------------------------------------------------- /Sources/CompilerKit/Tokenizer.swift: -------------------------------------------------------------------------------- 1 | struct Tokenizer where Output: Hashable { 2 | let dfa: DFA 3 | let trivia: DFA 4 | let unknown: Output 5 | 6 | init?(tokens: [(RegularExpression, Output)], trivia: RegularExpression, unknown: Output) { 7 | let nfa = NFA(scanner: tokens) 8 | guard let dfa = DFA(consistent: nfa, nonAcceptingValue: unknown) 9 | else { return nil } 10 | self.dfa = dfa.minimized 11 | 12 | guard let triviaDFA = DFA(consistent: trivia.nfa, nonAcceptingValue: false) 13 | else { return nil } 14 | self.trivia = triviaDFA.minimized 15 | 16 | self.unknown = unknown 17 | } 18 | 19 | func tokenize(_ source: String.UnicodeScalarView) -> [(Output, Substring.UnicodeScalarView.SubSequence)] { 20 | var tokens: [(Output, Substring.UnicodeScalarView.SubSequence)] = [] 21 | var offset = source.startIndex 22 | var unknownStart: String.UnicodeScalarView.Index? = nil 23 | 24 | func processUnknown() { 25 | if unknownStart != nil { 26 | tokens.append((unknown, source[unknownStart!..( 8 | states: 4, 9 | transitions: [ 10 | .single("a"): [(0, 0), (1, 2)], 11 | .single("b"): [(2, 3)] 12 | ], 13 | epsilonTransitions: [0: [1]], 14 | initial: 0, 15 | accepting: [3: true] 16 | ) 17 | XCTAssertTrue(nfa.match("aaab".unicodeScalars).contains(true)) 18 | XCTAssertFalse(nfa.match("aaa".unicodeScalars).contains(true)) 19 | XCTAssertTrue(nfa.match("ab".unicodeScalars).contains(true)) 20 | XCTAssertFalse(nfa.match("b".unicodeScalars).contains(true)) 21 | XCTAssertFalse(nfa.match("bbbbab".unicodeScalars).contains(true)) 22 | } 23 | 24 | 25 | func testRegularExpression() { 26 | // a*ab - should match ab, aab, aaab, etc 27 | let re: RegularExpression = "a"* + ("a" + "b") 28 | let derivedNfa = re.nfa 29 | XCTAssertTrue(derivedNfa.match("aaab".unicodeScalars).contains(true)) 30 | XCTAssertFalse(derivedNfa.match("aaa".unicodeScalars).contains(true)) 31 | XCTAssertTrue(derivedNfa.match("ab".unicodeScalars).contains(true)) 32 | XCTAssertFalse(derivedNfa.match("b".unicodeScalars).contains(true)) 33 | XCTAssertFalse(derivedNfa.match("bbbbab".unicodeScalars).contains(true)) 34 | } 35 | 36 | func testDFA() { 37 | // a(b|c)* - should match a, ab, ac, abc, abbbb, acccc, abbccbcbbc, etc 38 | let dfa = DFA( 39 | states: 2, 40 | transitions: [ 41 | ScalarClass.single("a"): [(0, 1)], 42 | ScalarClass.single("b"): [(1, 1)], 43 | ScalarClass.single("c"): [(1, 1)], 44 | ], 45 | initial: 0, 46 | accepting: [1: true], 47 | nonAcceptingValue: false 48 | ) 49 | 50 | XCTAssertTrue(dfa.match("a".unicodeScalars)) 51 | XCTAssertTrue(dfa.match("ab".unicodeScalars)) 52 | XCTAssertTrue(dfa.match("ac".unicodeScalars)) 53 | XCTAssertTrue(dfa.match("abc".unicodeScalars)) 54 | XCTAssertTrue(dfa.match("acb".unicodeScalars)) 55 | XCTAssertTrue(dfa.match("abbbb".unicodeScalars)) 56 | XCTAssertTrue(dfa.match("acccc".unicodeScalars)) 57 | XCTAssertTrue(dfa.match("abbccbbccbc".unicodeScalars)) 58 | 59 | XCTAssertFalse(dfa.match("aa".unicodeScalars)) 60 | XCTAssertFalse(dfa.match("aba".unicodeScalars)) 61 | XCTAssertFalse(dfa.match("abac".unicodeScalars)) 62 | XCTAssertFalse(dfa.match("abbccbbccbca".unicodeScalars)) 63 | } 64 | 65 | func testRegularExpressionToDFAMatch() { 66 | // a(b|c)* - should match a, ab, ac, abc, abbbb, acccc, abbccbcbbc, etc 67 | let re: RegularExpression = "a" + ("b" | "c")* 68 | let dfa = DFA(consistent: re.nfa, nonAcceptingValue: false)! 69 | 70 | XCTAssertTrue(dfa.match("a".unicodeScalars)) 71 | XCTAssertTrue(dfa.match("ab".unicodeScalars)) 72 | XCTAssertTrue(dfa.match("ac".unicodeScalars)) 73 | XCTAssertTrue(dfa.match("abc".unicodeScalars)) 74 | XCTAssertTrue(dfa.match("acb".unicodeScalars)) 75 | XCTAssertTrue(dfa.match("abbbb".unicodeScalars)) 76 | XCTAssertTrue(dfa.match("acccc".unicodeScalars)) 77 | XCTAssertTrue(dfa.match("abbccbbccbc".unicodeScalars)) 78 | 79 | XCTAssertFalse(dfa.match("aa".unicodeScalars)) 80 | XCTAssertFalse(dfa.match("aba".unicodeScalars)) 81 | XCTAssertFalse(dfa.match("abac".unicodeScalars)) 82 | XCTAssertFalse(dfa.match("abbccbbccbca".unicodeScalars)) 83 | XCTAssertFalse(dfa.match("cbcab".unicodeScalars)) 84 | } 85 | 86 | func testRegularExpressionToMinimizedDFAMatch() { 87 | // a(b|c)* - should match a, ab, ac, abc, abbbb, acccc, abbccbcbbc, etc 88 | let re: RegularExpression = "a" + ("b" | "c")* 89 | let dfa = DFA(consistent: re.nfa, nonAcceptingValue: false)!.minimized 90 | 91 | XCTAssertTrue(dfa.match("a".unicodeScalars)) 92 | XCTAssertTrue(dfa.match("ab".unicodeScalars)) 93 | XCTAssertTrue(dfa.match("ac".unicodeScalars)) 94 | XCTAssertTrue(dfa.match("abc".unicodeScalars)) 95 | XCTAssertTrue(dfa.match("acb".unicodeScalars)) 96 | XCTAssertTrue(dfa.match("abbbb".unicodeScalars)) 97 | XCTAssertTrue(dfa.match("acccc".unicodeScalars)) 98 | XCTAssertTrue(dfa.match("abbccbbccbc".unicodeScalars)) 99 | 100 | XCTAssertFalse(dfa.match("aa".unicodeScalars)) 101 | XCTAssertFalse(dfa.match("aba".unicodeScalars)) 102 | XCTAssertFalse(dfa.match("abac".unicodeScalars)) 103 | XCTAssertFalse(dfa.match("abbccbbccbca".unicodeScalars)) 104 | XCTAssertFalse(dfa.match("cbcab".unicodeScalars)) 105 | } 106 | 107 | func testMultiAcceptingStatesDFA() { 108 | enum Token { case aa, ab, ac, unknown } 109 | 110 | let dfa = DFA( 111 | states: 5, 112 | transitions: [ 113 | ScalarClass.single("a"): [(0, 1), (1, 2)], 114 | ScalarClass.single("b"): [(1, 3)], 115 | ScalarClass.single("c"): [(1, 4)], 116 | ], 117 | initial: 0, 118 | accepting: [2: .aa, 3: .ab, 4: .ac], 119 | nonAcceptingValue: .unknown 120 | ) 121 | 122 | XCTAssertEqual(dfa.match("aa".unicodeScalars), .aa) 123 | XCTAssertEqual(dfa.match("ab".unicodeScalars), .ab) 124 | XCTAssertEqual(dfa.match("ac".unicodeScalars), .ac) 125 | XCTAssertEqual(dfa.match("bb".unicodeScalars), .unknown) 126 | } 127 | 128 | func testScanner() { 129 | enum Token { 130 | case integer 131 | case decimal 132 | case identifier 133 | } 134 | 135 | let scanner: [(RegularExpression, Token)] = [ 136 | (.digit + .digit*, .integer), 137 | (.digit + .digit* + "." + .digit + .digit*, .decimal), 138 | (.alpha + .alphanum*, .identifier), 139 | ] 140 | 141 | measure { 142 | let dfa = NFA(scanner: scanner) 143 | .dfa.minimized 144 | 145 | XCTAssertEqual(dfa.match("134".unicodeScalars), [.integer]) 146 | XCTAssertEqual(dfa.match("61.613".unicodeScalars), [.decimal]) 147 | XCTAssertEqual(dfa.match("x1".unicodeScalars), [.identifier]) 148 | XCTAssertEqual(dfa.match("1xy".unicodeScalars), []) 149 | } 150 | 151 | let dfa = NFA(scanner: scanner).dfa.minimized 152 | let source = "134 x3".unicodeScalars 153 | var offset = source.startIndex 154 | 155 | while offset < source.endIndex { 156 | let (token, match) = dfa.prefixMatch(source[offset...]) 157 | 158 | if token.isEmpty { 159 | // no match, skip over character 160 | print("Skipping: '\(source[offset])'") 161 | offset = source.index(after: offset) 162 | } else { 163 | offset = match.endIndex 164 | print(token.first!, String(match)) 165 | } 166 | } 167 | } 168 | 169 | func testTokenizer() { 170 | enum Token { 171 | case integer 172 | case decimal 173 | case identifier 174 | case unknown 175 | } 176 | 177 | let scanner: [(RegularExpression, Token)] = [ 178 | (.digit + .digit*, .integer), 179 | (.digit + .digit* + "." + .digit + .digit*, .decimal), 180 | (.alpha + .alphanum*, .identifier), 181 | ] 182 | 183 | let trivia: RegularExpression = " " | "\t" | "\r" | "\n" 184 | 185 | let tokenizer = Tokenizer(tokens: scanner, trivia: trivia, unknown: .unknown)! 186 | let tokens = tokenizer.tokenize("134 x3 !4x 41.4 ?ab".unicodeScalars) 187 | 188 | for (t, s) in tokens { 189 | print("'\(String(s))' - \(t)") 190 | } 191 | } 192 | } 193 | -------------------------------------------------------------------------------- /Tests/CompilerKitTests/GrammarTests.swift: -------------------------------------------------------------------------------- 1 | import XCTest 2 | @testable import CompilerKit 3 | 4 | final class GrammarTests: XCTestCase { 5 | enum Token: CustomStringConvertible { 6 | case plus, minus, multiply, divide 7 | case leftBracket, rightBracket 8 | case num, name 9 | case eof 10 | 11 | var description: String { 12 | func q(_ s: String) -> String { return "'\(s)'" } 13 | switch self { 14 | case .plus: return q("+") 15 | case .minus: return q("-") 16 | case .multiply: return q("*") 17 | case .divide: return q("/") 18 | case .leftBracket: return q("(") 19 | case .rightBracket: return q(")") 20 | case .name: return "name" 21 | case .num: return "num" 22 | case .eof: return "eof" 23 | } 24 | } 25 | } 26 | 27 | static let grammar = Grammar( 28 | productions: [ 29 | // (0) Goal -> Expr 30 | [[.nt(1), .t(.eof)]], 31 | 32 | // (1) Expr -> Expr + Term 33 | // | Expr - Term 34 | // | Term 35 | [[.nt(1), .t(.plus), .nt(2)], 36 | [.nt(1), .t(.minus), .nt(2)], 37 | [.nt(2)]], 38 | 39 | // (2) Term -> Term x Factor 40 | // | Term / Factor 41 | // | Factor 42 | [[.nt(2), .t(.multiply), .nt(3)], 43 | [.nt(2), .t(.divide), .nt(3)], 44 | [.nt(3)]], 45 | 46 | // (3) Factor -> ( Expr ) 47 | // | num 48 | // | name 49 | [[.t(.leftBracket), .nt(1), .t(.rightBracket)], 50 | [.t(.num)], 51 | [.t(.name)]] 52 | ], 53 | start: 0 54 | ) 55 | 56 | static let valid: [[Token]] = [ 57 | [.num, .eof], 58 | [.num, .plus, .name, .eof], 59 | [.leftBracket, .num, .plus, .num, .rightBracket, .eof], 60 | ] 61 | 62 | static let invalid: [[Token]] = [ 63 | // missing eof 64 | [.num], 65 | // unbalanced brackets 66 | [.leftBracket, .leftBracket, .rightBracket, .num, .rightBracket, .eof], 67 | // name followed by num 68 | [.name, .num, .eof], 69 | ] 70 | 71 | func testGrammar() { 72 | var g = GrammarTests.grammar 73 | 74 | g.eliminateLeftRecursion() 75 | XCTAssertEqual(g.productions.count, 6) 76 | 77 | let nullable = g.nullable() 78 | XCTAssertEqual(nullable, [[], [], [], [], [0], [0]]) 79 | 80 | let first = g.first(nullable: nullable) 81 | XCTAssertEqual(first, 82 | [ 83 | [.num: [0], .leftBracket: [0], .name: [0]], 84 | [.num: [0], .leftBracket: [0], .name: [0]], 85 | [.num: [0], .leftBracket: [0], .name: [0]], 86 | [.num: [1], .leftBracket: [0], .name: [2]], 87 | [.plus: [1], .minus: [2]], 88 | [.multiply: [1], .divide: [2]], 89 | ]) 90 | 91 | let follow = g.follow(nullable: nullable, first: first) 92 | XCTAssertEqual(follow, [ 93 | Set([]), 94 | Set([.eof, .rightBracket]), 95 | Set([.eof, .rightBracket, .plus, .minus]), 96 | Set([.eof, .rightBracket, .plus, .minus, .multiply, .divide]), 97 | Set([.eof, .rightBracket]), 98 | Set([.eof, .rightBracket, .plus, .minus]), 99 | ]) 100 | 101 | XCTAssert(g.isBacktrackFree(nullable: nullable, first: first, follow: follow)) 102 | } 103 | 104 | func testLLParserConstruction() { 105 | let g = GrammarTests.grammar 106 | 107 | _ = LLParser(g) 108 | 109 | let parser = LLParser(g) 110 | XCTAssertEqual(parser.table, 111 | [ 112 | [.num: 0, .leftBracket: 0, .name: 0], 113 | [.num: 0, .leftBracket: 0, .name: 0], 114 | [.num: 0, .leftBracket: 0, .name: 0], 115 | [.num: 1, .leftBracket: 0, .name: 2], 116 | [.rightBracket: 0, .plus: 1, .minus: 2, .eof: 0], 117 | [.rightBracket: 0, .minus: 0, .multiply: 1, .divide: 2, .plus: 0, .eof: 0] 118 | ]) 119 | } 120 | 121 | func testLLParserCorrectness() { 122 | let g = GrammarTests.grammar 123 | let parser = LLParser(g) 124 | 125 | for s in GrammarTests.valid { 126 | XCTAssert(parser.parse(s)) 127 | } 128 | 129 | for s in GrammarTests.invalid { 130 | XCTAssertFalse(parser.parse(s)) 131 | } 132 | } 133 | 134 | func testLRConstruction() { 135 | let g = GrammarTests.grammar 136 | _ = LRParser(slr: g) 137 | } 138 | 139 | func testLRParserCorrectness() { 140 | let g = GrammarTests.grammar 141 | let parser = LRParser(slr: g) 142 | 143 | for s in GrammarTests.valid { 144 | XCTAssert(parser.parse(s)) 145 | } 146 | 147 | for s in GrammarTests.invalid { 148 | XCTAssertFalse(parser.parse(s)) 149 | } 150 | } 151 | 152 | func testLALRParserCorrectness() { 153 | let g = GrammarTests.grammar 154 | let parser = LRParser(lalr: g) 155 | 156 | for s in GrammarTests.valid { 157 | XCTAssert(parser.parse(s)) 158 | } 159 | 160 | for s in GrammarTests.invalid { 161 | XCTAssertFalse(parser.parse(s)) 162 | } 163 | } 164 | 165 | func testBacktrackingGrammar() { 166 | var g = Grammar(productions: 167 | [ 168 | // (0) Goal -> Expr 169 | [ 170 | [.nt(1)], 171 | ], 172 | 173 | // (1) Expr -> Expr + Term 174 | // | Expr - Term 175 | // | Term 176 | [ 177 | [.nt(1), .t(.plus), .nt(2)], 178 | [.nt(1), .t(.minus), .nt(2)], 179 | [.nt(2)], 180 | ], 181 | 182 | // (2) Term -> Term x Factor 183 | // | Term / Factor 184 | // | Factor 185 | [ 186 | [.nt(2), .t(.multiply), .nt(3)], 187 | [.nt(2), .t(.divide), .nt(3)], 188 | [.nt(3)], 189 | ], 190 | 191 | // (3) Factor -> ( Expr ) 192 | // | num 193 | // | name 194 | [ 195 | [.t(.leftBracket), .nt(1), .t(.rightBracket)], 196 | [.t(.num)], 197 | [.t(.name)], 198 | [.t(.name), .t(.leftBracket), .nt(4), .t(.rightBracket)], 199 | ], 200 | // (4) ArgList -> Expr 201 | [ 202 | [.nt(1)] 203 | ], 204 | ], 205 | start: 0 206 | ) 207 | 208 | g.eliminateLeftRecursion() 209 | XCTAssertEqual(g.productions.count, 7) 210 | 211 | // there are two productions of Factor starting with .name 212 | let nullable = g.nullable() 213 | let first = g.first(nullable: nullable) 214 | let follow = g.follow(nullable: nullable, first: first) 215 | 216 | XCTAssertEqual(first[3][.name]?.count, 2) 217 | 218 | // ... which means that the grammar is NOT backtrack free 219 | XCTAssert(!g.isBacktrackFree(nullable: nullable, first: first, follow: follow)) 220 | 221 | g.leftRefactor() 222 | let newNullable = g.nullable() 223 | let newFirst = g.first(nullable: newNullable) 224 | let newFollow = g.follow(nullable: newNullable, first: newFirst) 225 | XCTAssert(g.isBacktrackFree(nullable: newNullable, first: newFirst, follow: newFollow)) 226 | } 227 | 228 | func testLALR() { 229 | enum Token: String, Hashable { 230 | case lb, rb, id, plus, mult 231 | } 232 | 233 | func constructItemSet(_ s: [(Int, Int, Int)]) -> Set.Item> { 234 | return Set(s.map(LRParser.Item.init)) 235 | } 236 | 237 | func constructItemSets(_ s: [[(Int, Int, Int)]]) -> Set.Item>> { 238 | return Set(s.map(constructItemSet)) 239 | } 240 | 241 | func constructTransition(_ s: Set.Item>, _ nt: Int) -> LRParser.Transition { 242 | return LRParser.Transition(state: s, nt: nt) 243 | } 244 | 245 | func constructTransitionSet(_ s: [(Set.Item>, Int)]) -> Set.Transition> { 246 | return Set(s.map(constructTransition)) 247 | } 248 | 249 | // This is Grammar 4.19 from the Dragon book 250 | // 0,0 E -> E + T 251 | // 0,1 E -> T 252 | // 1,0 T -> T * F 253 | // 1,1 T -> F 254 | // 2,0 F -> (E) 255 | // 2,1 F -> id 256 | // 3,0 E' -> E 257 | let g = Grammar(productions: [ 258 | // E -> E + T | T 259 | [[.nt(0), .t(.plus), .nt(1)], [.nt(1)]], 260 | // T -> T * F | F 261 | [[.nt(1), .t(.mult), .nt(2)], [.nt(2)]], 262 | // F -> (E) | id 263 | [[.t(.lb), .nt(0), .t(.rb)], [.t(.id)]], 264 | ], 265 | start: 0) 266 | let grammar = g.augmented 267 | 268 | // Item sets in an ordered array in the same order as the Dragon book 269 | // See Fig 4.35 in Dragon book for list of items (I0 to I11) 270 | let I = [ 271 | /* I0 */ [(1, 0, 0), (0, 1, 0), (0, 0, 0), (2, 0, 0), (1, 1, 0), (2, 1, 0), (3, 0, 0)], 272 | /* I1 */ [(0, 0, 1), (3, 0, 1)], 273 | /* I2 */ [(0, 1, 1), (1, 0, 1)], 274 | /* I3 */ [(1, 1, 1)], 275 | /* I4 */ [(1, 0, 0), (2, 0, 1), (0, 1, 0), (0, 0, 0), (2, 0, 0), (1, 1, 0), (2, 1, 0)], 276 | /* I5 */ [(2, 1, 1)], 277 | /* I6 */ [(1, 0, 0), (2, 0, 0), (1, 1, 0), (2, 1, 0), (0, 0, 2)], 278 | /* I7 */ [(1, 0, 2), (2, 0, 0), (2, 1, 0)], 279 | /* I8 */ [(0, 0, 1), (2, 0, 2)], 280 | /* I9 */ [(1, 0, 1), (0, 0, 3)], 281 | /* I10 */ [(1, 0, 3)], 282 | /* I11 */ [(2, 0, 3)], 283 | ].map(constructItemSet) 284 | 285 | let allNodes = Set(grammar.productions.flatMap { $0.flatMap { $0 } }) 286 | let nullable = grammar.nullable() 287 | 288 | // The LR(0) item sets or "canonical set of LR(0) items" 289 | let startItem = LRParser.Item(term: grammar.productions.count - 1, production: 0, position: 0) 290 | let itemSets = LRParser.itemSets(grammar, startItem, allNodes) 291 | let expectedItemSets = Set(I) 292 | XCTAssertEqual(itemSets, expectedItemSets) 293 | 294 | // goto from state I1 {[E' -> E.], [E -> E. + T]} by token '+'... 295 | let gotoSet = LRParser.goto(grammar, I[1], .t(.plus)) 296 | 297 | // ...and expect to land in state I6 298 | XCTAssertEqual(gotoSet, I[6]) 299 | 300 | let allTransitions = LRParser.allTransitions(grammar, itemSets) 301 | let expectedTransitions = constructTransitionSet([ 302 | (I[0], 0), (I[0], 1), (I[0], 2), 303 | (I[4], 0), (I[4], 1), (I[4], 2), 304 | (I[6], 1), (I[6], 2), 305 | (I[7], 2), 306 | ]) 307 | 308 | XCTAssertEqual(allTransitions, expectedTransitions) 309 | 310 | // In the conventions of the paper by DeRemer & Pennello (1982), 311 | // this is a transition (I4, E) - with state I4, nonterminal E. 312 | // This transition lands us in state I8 {[F -> ( E .)], [E -> E .+ T]} 313 | let t = constructTransition(I[4], 0) 314 | let drTerminals = LRParser.directRead(grammar, t) 315 | XCTAssertEqual(drTerminals, [.plus, .rb]) 316 | 317 | let reads = Dictionary(allTransitions) { LRParser.reads(grammar, nullable, $0) } 318 | let directRead = Dictionary(allTransitions) { LRParser.directRead(grammar, $0) } 319 | let indirectReads = LRParser.digraph(allTransitions, reads, directRead) 320 | 321 | // Without nullable terms, the 'reads' relationship is identical to direct read 322 | // TODO: test this with a grammar that has nullable rules 323 | XCTAssertEqual(directRead, indirectReads) 324 | 325 | let expectedFollowSets: [LRParser.Transition: Set] = [ 326 | constructTransition(I[0], 0): [.plus], 327 | constructTransition(I[0], 1): [.mult, .plus], 328 | constructTransition(I[0], 2): [.mult, .plus], 329 | constructTransition(I[4], 0): [.plus, .rb], 330 | constructTransition(I[4], 1): [.mult, .plus, .rb], 331 | constructTransition(I[4], 2): [.mult, .plus, .rb], 332 | constructTransition(I[6], 1): [.mult, .plus, .rb], 333 | constructTransition(I[6], 2): [.mult, .plus, .rb], 334 | constructTransition(I[7], 2): [.mult, .plus, .rb], 335 | ] 336 | let includes = Dictionary(allTransitions) { LRParser.includes(grammar, nullable, $0, allTransitions) } 337 | let followSets = LRParser.digraph(allTransitions, includes, indirectReads) 338 | XCTAssertEqual(expectedFollowSets, followSets) 339 | 340 | // make a list of all possible reduction items: [A -> w.] 341 | var reductions: [(Set.Item>, LRParser.Item)] = [] 342 | let prods = grammar.productions 343 | for term in 0...Item(term: term, production: production, position: prods[term][production].count) 346 | for state in itemSets where state.contains(r) { 347 | reductions.append((state, r)) 348 | } 349 | } 350 | } 351 | 352 | let lookbacks = reductions.map { LRParser.lookback(grammar, $0.0, $0.1, allTransitions) } 353 | let expectedLookbacks: [Set.Transition>] = [ 354 | constructTransitionSet([(I[4], 0), (I[0], 0)]), 355 | constructTransitionSet([(I[4], 0), (I[0], 0)]), 356 | constructTransitionSet([(I[6], 1), (I[4], 1), (I[0], 1)]), 357 | constructTransitionSet([(I[6], 1), (I[4], 1), (I[0], 1)]), 358 | constructTransitionSet([(I[6], 2), (I[0], 2), (I[7], 2), (I[4], 2)]), 359 | constructTransitionSet([(I[6], 2), (I[0], 2), (I[7], 2), (I[4], 2)]), 360 | [], 361 | ] 362 | 363 | XCTAssertEqual(lookbacks, expectedLookbacks) 364 | 365 | let lookaheads: [Set] = reductions.map { state, reduction in 366 | var la: Set = [] 367 | for transition in LRParser.lookback(grammar, state, reduction, allTransitions) { 368 | la.formUnion(followSets[transition]!) 369 | } 370 | return la 371 | } 372 | let expectedLookaheads: [Set] = [ 373 | [.plus, .rb], 374 | [.plus, .rb], 375 | [.mult, .plus, .rb], 376 | [.mult, .plus, .rb], 377 | [.mult, .plus, .rb], 378 | [.mult, .plus, .rb], 379 | [] 380 | ] 381 | XCTAssertEqual(lookaheads, expectedLookaheads) 382 | } 383 | } 384 | -------------------------------------------------------------------------------- /Tests/LinuxMain.swift: -------------------------------------------------------------------------------- 1 | @testable import CompilerKitTests 2 | import XCTest 3 | 4 | extension FiniteStateTests { 5 | static var allTests: [(String, (FiniteStateTests) -> () throws -> Void)] = [ 6 | ("testNFA", testNFA), 7 | ("testRegularExpression", testRegularExpression), 8 | ("testDFA", testDFA), 9 | ("testRegularExpressionToDFAMatch", testRegularExpressionToDFAMatch), 10 | ("testRegularExpressionToMinimizedDFAMatch", testRegularExpressionToMinimizedDFAMatch), 11 | ("testMultiAcceptingStatesDFA", testMultiAcceptingStatesDFA), 12 | ("testScanner", testScanner), 13 | ] 14 | } 15 | 16 | extension GrammarTests { 17 | static var allTests: [(String, (GrammarTests) -> () throws -> Void)] = [ 18 | ("testGrammar", testGrammar), 19 | ("testLLParserConstruction", testLLParserConstruction), 20 | ("testLLParserCorrectness", testLLParserCorrectness), 21 | ("testLRConstruction", testLRConstruction), 22 | ("testLRParserCorrectness", testLRParserCorrectness), 23 | ("testLALRParserCorrectness", testLALRParserCorrectness), 24 | ("testBacktrackingGrammar", testBacktrackingGrammar), 25 | ("testLALR", testLALR), 26 | ] 27 | } 28 | 29 | XCTMain([ 30 | testCase(FiniteStateTests.allTests), 31 | testCase(GrammarTests.allTests), 32 | ]) 33 | --------------------------------------------------------------------------------