├── .gitignore
├── .travis.yml
├── LICENSE
├── Package.swift
├── README.md
├── Sources
    └── CompilerKit
    │   ├── DFA.swift
    │   ├── Grammar.swift
    │   ├── Helpers.swift
    │   ├── LALRParser.swift
    │   ├── LLParser.swift
    │   ├── LRParser.swift
    │   ├── Matcher.swift
    │   ├── NFA.swift
    │   ├── RegularExpression.swift
    │   ├── SLRParser.swift
    │   ├── ScalarClass.swift
    │   └── Tokenizer.swift
└── Tests
    ├── CompilerKitTests
        ├── FiniteStateTests.swift
        └── GrammarTests.swift
    └── LinuxMain.swift


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | /.build
3 | /Packages
4 | /*.xcodeproj
5 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | os: osx
2 | osx_image: xcode9.3
3 | install: true
4 | script: swift test


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2018 Ahmad Alhashemi
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/Package.swift:
--------------------------------------------------------------------------------
 1 | // swift-tools-version:4.0
 2 | // The swift-tools-version declares the minimum version of Swift required to build this package.
 3 | 
 4 | import PackageDescription
 5 | 
 6 | let package = Package(
 7 |     name: "CompilerKit",
 8 |     products: [
 9 |         .library(
10 |             name: "CompilerKit",
11 |             targets: ["CompilerKit"]),
12 |     ],
13 |     dependencies: [],
14 |     targets: [
15 |         .target(
16 |             name: "CompilerKit",
17 |             dependencies: []),
18 |         .testTarget(
19 |             name: "CompilerKitTests",
20 |             dependencies: ["CompilerKit"]),
21 |     ]
22 | )
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CompilerKit
 2 | 
 3 | [![Build Status](https://travis-ci.org/hashemi/CompilerKit.svg?branch=master)](https://travis-ci.org/hashemi/CompilerKit)
 4 | 
 5 | The goal of this project is to create a library of data structures and algorithms that can be used to build a compiler in Swift.
 6 | 
 7 | ## Features
 8 | 
 9 | Since this project is under active development, it's very likely that the following lists are incomplete.
10 | 
11 | ### Data Structures
12 | 
13 | - Classes of unicode scalars (`ScalarClass`).
14 | - Regular expression (`RegularExpression`).
15 | - Nondeterministic finite automata (`NFA`).
16 | - Deterministic finite automata (`DFA`).
17 | - Tokenizer (`Tokenizer`).
18 | - Grammar (`Grammar`).
19 | - LL parser (`LLParser`).
20 | - SLR parser (`LRParser`).
21 | - LALR parser (`LALRParser`).
22 | 
23 | ### Functions/Algorithms
24 | 
25 | - Matching a unicode scalar against a `ScalarClass`.
26 | - Derive an `NFA` from a `RegularExpression`.
27 | - Derive a `DFA` from an `NFA`.
28 | - Minimize a `DFA`.
29 | - Match a string against an `NFA` or `DFA` (i.e., execute finite state machine).
30 | - Create a matcher that takes pairs of `RegularExpression`s and tokens and returns the correct token for a string based on match.
31 | - Create a tokenizer from pairs of `RegularExpression`s and tokens as well as a `RegularExpression` representing trivia between tokens that then takes a string and breaks it into individual tokens, skipping the trivia in between them.
32 | - Eliminate left recursion from a grammar.
33 | - Perform left refactoring to eliminate backtracking.
34 | - Check if a grammar is backtracking-free.
35 | - Generate a table-driven LL(1) parser from a backtracking-free grammar, which reports whether an input was accepted or rejected.
36 | - Generate an DFA-backed SLR parser from a grammar, which reports whether an input was accepted or rejected.
37 | - Construct a DFA-backed LALR parser from a grammar using the DeRemer and Pennello algorithm, which reports whether an input was accepted or rejected.
38 | 
39 | ## Example
40 | 
41 | ```swift
42 |     enum Token {
43 |         case integer
44 |         case decimal
45 |         case identifier
46 |         case unknown
47 |     }
48 |     
49 |     let scanner: [(RegularExpression, Token)] = [
50 |         (.digit + .digit*, .integer),
51 |         (.digit + .digit* + "." + .digit + .digit*, .decimal),
52 |         (.alpha + .alphanum*, .identifier),
53 |     ]
54 | 
55 |     let nfa = NFA(scanner: scanner, nonAcceptingValue: .unknown)
56 |     let dfa = nfa.dfa
57 |     let minimizedDfa = dfa.minimized
58 |                 
59 | 
60 |     minimizedDfa.match("134")      // .integer
61 |     minimizedDfa.match("61.613")   // .decimal
62 |     minimizedDfa.match("x1")       // .identifier
63 |     minimizedDfa.match("1xy")      // .unknown
64 | ```
65 | 
66 | See the test suite for more usage examples.
67 | 
68 | ## See Also
69 | 
70 | ### Resources Used
71 | 
72 | 1. [Engineering a Compiler](https://www.cs.rice.edu/~keith/Errata.html) 2nd ed by Keith Cooper and Linda Torczon.
73 | 
74 | 2. [Algorithms](https://algs4.cs.princeton.edu/home/) 4th ed by Robert Sedgewick and Kevin Wayne.
75 | 
76 | 3. [Stanford's Compilers Course](https://lagunita.stanford.edu/courses/Engineering/Compilers/Fall2014/about) by Alex Aiken.
77 | 
78 | 4. [Compilers: Principles, Techniques, and Tools](https://en.wikipedia.org/wiki/Compilers:_Principles,_Techniques,_and_Tools) by  Alfred V. Aho, Monica S. Lam, Ravi Sethi, and Jeffrey D. Ullman.
79 | 
80 | 5. [Efficient Computation of LALR(1) Look-Ahead Sets](https://dl.acm.org/citation.cfm?id=357187) by Frank DeRemer and Thomas Pennello.
81 | 
82 | 6. [Modern Compiler Implementation in C](https://www.cs.princeton.edu/~appel/modern/c/) by Maia Ginsburg and Andrew W. Appel.
83 | 
84 | ### My other projects, leading up to this
85 | 
86 | 1. [slox](https://github.com/hashemi/slox) - Hand written scanner, recursive descent parser, and a tree-walking interpreter in Swift. See for a demonstration of using Swift's algebraic data types (`enum`s and `struct`s) to represent and render code. Implements the [lox programming language](http://www.craftinginterpreters.com). Ported from Java.
87 | 
88 | 2. [bslox](https://github.com/hashemi/bslox) - Very early work-in-progress of what will eventually be a bytecode compiler and virtual machine of lox. Will be porting this from C.
89 | 
90 | 3. [FlyingMonkey](https://github.com/hashemi/FlyingMonkey) - Hand written scanner and Pratt parser of the [monkey programming language](https://interpreterbook.com). Ported from Go.
91 | 
92 | 4. [Sift](https://github.com/hashemi/Sift) - Hand written scanner and parser of [subset of Scheme](https://en.wikibooks.org/wiki/Write_Yourself_a_Scheme_in_48_Hours). Ported from Haskell.
93 | 
94 | 5. [sparrow](https://github.com/hashemi/sparrow/blob/master/sparrow/Lexer.swift) - Hand written scanner of the Swift scanner from the official Swift compiler. Ported from the C++ to Swift. See for an example of a complex scanner/lexer with support for rewinding to arbitrary points in the input.
95 | 
96 | ## License
97 | MIT


--------------------------------------------------------------------------------
/Sources/CompilerKit/DFA.swift:
--------------------------------------------------------------------------------
  1 | struct DFA<Output: Hashable, M: Matcher & Hashable> {
  2 |     typealias Element = M.Element
  3 |     
  4 |     var alphabet: Set<M> {
  5 |         return Set(transitions.keys)
  6 |     }
  7 |     
  8 |     let states: Int
  9 |     let transitions: [M: [(Int, Int)]]
 10 |     let initial: Int
 11 |     let accepting: [Int: Output]
 12 |     let nonAcceptingValue: Output
 13 |     
 14 |     func match<S: Sequence>(_ elements: S) -> Output where S.Element == Element {
 15 |         var state = initial
 16 |         for element in elements {
 17 |             guard let matcher = alphabet.first(where: { $0 ~= element }) else {
 18 |                 return nonAcceptingValue
 19 |             }
 20 |             
 21 |             guard let newState = transitions[matcher]?.first(where: { $0.0 == state })?.1 else {
 22 |                 return nonAcceptingValue
 23 |             }
 24 |             state = newState
 25 |         }
 26 |         return accepting[state] ?? nonAcceptingValue
 27 |     }
 28 |     
 29 |     func prefixMatch<C: Collection>(_ elements: C) -> (Output, C.SubSequence) where C.Element == Element {
 30 |         var state = initial
 31 |         var result = (nonAcceptingValue, elements.prefix(upTo: elements.startIndex))
 32 |         
 33 |         for idx in elements.indices {
 34 |             let element = elements[idx]
 35 |             guard let matcher = alphabet.first(where: { $0 ~= element }) else {
 36 |                 break
 37 |             }
 38 |             
 39 |             guard let newState = transitions[matcher]?.first(where: { $0.0 == state })?.1 else {
 40 |                 break
 41 |             }
 42 |             state = newState
 43 |             if let newOutput = accepting[state] {
 44 |                 result = (newOutput, elements.prefix(through: idx))
 45 |             }
 46 |         }
 47 |         
 48 |         return result
 49 |     }
 50 | }
 51 | 
 52 | extension DFA {
 53 |     init<NFAOutput: Hashable>(_ nfa: NFA<NFAOutput, M>) where Output == Set<NFAOutput> {
 54 |         // precompute and cache epsilon closures
 55 |         let epsilonClosures = nfa.epsilonClosures
 56 |         
 57 |         func epsilonClosure(from states: Set<Int>) -> Set<Int> {
 58 |             var all = Set<Int>()
 59 |             for v in states {
 60 |                 all.formUnion(epsilonClosures[v])
 61 |             }
 62 |             return all
 63 |         }
 64 |         
 65 |         let alphabet = nfa.alphabet
 66 |         let q0 = epsilonClosures[nfa.initial]
 67 |         var Q: [Set<Int>] = [q0]
 68 |         var worklist = [(0, q0)]
 69 |         var transitions: [M: [(Int, Int)]] = [:]
 70 |         var accepting: [Int: Set<NFAOutput>] = [0: Set(q0.compactMap { nfa.accepting[$0] })]
 71 |         while let (qpos, q) = worklist.popLast() {
 72 |             for matcher in alphabet {
 73 |                 let t = nfa.epsilonClosure(from: nfa.reachable(from: q, via: matcher))
 74 |                 if t.isEmpty { continue }
 75 |                 let position = Q.index(of: t) ?? Q.count
 76 |                 if position == Q.count {
 77 |                     Q.append(t)
 78 |                     worklist.append((position, t))
 79 |                     accepting[Q.count - 1] = Set(t.compactMap({ nfa.accepting[$0] }))
 80 |                 }
 81 |                 transitions[matcher, default: []].append((qpos, position))
 82 |             }
 83 |         }
 84 |         
 85 |         self.init(
 86 |             states: Q.count,
 87 |             transitions: transitions,
 88 |             initial: 0, // this is always zero since q0 is always the first item in Q
 89 |             accepting: accepting,
 90 |             nonAcceptingValue: Set<NFAOutput>()
 91 |         )
 92 |     }
 93 |     
 94 |     init?(consistent nfa: NFA<Output, M>, nonAcceptingValue: Output) {
 95 |         let dfa = DFA<Set<Output>, M>(nfa)
 96 |         
 97 |         var accepting: [Int: Output] = [:]
 98 |         for (k,v) in dfa.accepting {
 99 |             switch v.count {
100 |             case 0: break
101 |             case 1: accepting[k] = v.first!
102 |             default: return nil
103 |             }
104 |         }
105 |         
106 |         self.states = dfa.states
107 |         self.transitions = dfa.transitions
108 |         self.initial = dfa.initial
109 |         self.accepting = accepting
110 |         self.nonAcceptingValue = nonAcceptingValue
111 |     }
112 | }
113 | 
114 | // minimal dfa (Hopcroft's Algorithm)
115 | extension DFA {
116 |     var minimized: DFA {
117 |         // create a canonical partition per unique accepting value
118 |         let acceptingPartition = Dictionary(uniqueKeysWithValues:
119 |             Set(self.accepting.values)
120 |                 .enumerated()
121 |                 .map { ($0.element, $0.offset + 1) }
122 |         )
123 |         
124 |         // 0 = non-accepting states, otherwise location is determined by acceptingPartition
125 |         var partition = (0..<self.states).map { (s: Int) -> Int in
126 |             guard let acceptingValue = self.accepting[s] else { return 0 }
127 |             return acceptingPartition[acceptingValue]!
128 |         }
129 |         
130 |         var partitionCount = acceptingPartition.count + 1
131 |         
132 |         let alphabet = self.alphabet
133 |         func split() {
134 |             for matcher in alphabet {
135 |                 // -1: not set yet, -2: no path exists from this partition for this scalar
136 |                 var partitionTarget = Array(repeating: -1, count: partitionCount)
137 |                 var newPartition = Array(repeating: -1, count: partitionCount)
138 |                 for x in 0..<self.states {
139 |                     let p = partition[x]
140 |                     let target: Int
141 |                     if let nextState = self.transitions[matcher]?.first(where: { $0.0 == x })?.1 {
142 |                         target = partition[nextState]
143 |                     } else {
144 |                         target = -2
145 |                     }
146 |                     
147 |                     if partitionTarget[p] == -1 {
148 |                         // first item in partition
149 |                         partitionTarget[p] = target
150 |                         continue
151 |                     } else {
152 |                         if partitionTarget[p] != target {
153 |                             if newPartition[p] == -1 {
154 |                                 newPartition[p] = partitionCount
155 |                                 partitionCount += 1
156 |                             }
157 |                             
158 |                             partition[x] = newPartition[p]
159 |                         }
160 |                     }
161 |                 }
162 |             }
163 |         }
164 |         
165 |         var lastPartitionCount = 0
166 |         while partitionCount != lastPartitionCount {
167 |             lastPartitionCount = partitionCount
168 |             split()
169 |         }
170 |         
171 |         let initial = partition[self.initial]
172 |         let accepting = Dictionary(
173 |             self.accepting.map { (partition[$0.key], $0.value) },
174 |             uniquingKeysWith: { (first, _) in first })
175 |         
176 |         let transitions = self.transitions.mapValues { $0.map { (partition[$0.0], partition[$0.1]) } }
177 |         
178 |         return DFA(states: partitionCount, transitions: transitions, initial: initial, accepting: accepting, nonAcceptingValue: self.nonAcceptingValue)
179 |     }
180 | }
181 | 


--------------------------------------------------------------------------------
/Sources/CompilerKit/Grammar.swift:
--------------------------------------------------------------------------------
  1 | struct Grammar<T: Hashable> {
  2 |     enum Node<T: Hashable>: Hashable {
  3 |         case nt(Int)
  4 |         case t(T)
  5 |     }
  6 |     
  7 |     var productions: [[[Node<T>]]]
  8 |     var start: Int
  9 |     
 10 |     var augmented: Grammar<T> {
 11 |         var new = self
 12 |         new.productions.append([[.nt(new.start)]])
 13 |         new.start = new.productions.count - 1
 14 |         return new
 15 |     }
 16 |     
 17 |     mutating func eliminateLeftRecursion() {
 18 |         for i in 0..<productions.count {
 19 |             if i > 0 {
 20 |                 // find productions starting with a preceeding NT
 21 |                 // as they could lead to indirect left recursion
 22 |                 for j in 0..<i {
 23 |                     for p in 0..<productions[i].count {
 24 |                         if productions[i][p].first == .nt(j) {
 25 |                             // replace the potentially problematic first NT
 26 |                             // with all of its possible productions
 27 |                             let tail = productions[i][p][1...]
 28 |                             productions[i][p] = productions[j].first! + tail
 29 |                             for sub in productions[j][1...] {
 30 |                                 productions[i].append(sub + tail)
 31 |                             }
 32 |                         }
 33 |                     }
 34 |                 }
 35 |             }
 36 |             
 37 |             // eliminate direct left recursion
 38 |             if productions[i].contains(where: { $0.first == .nt(i) }) {
 39 |                 let newNt = productions.count
 40 |                 productions.append([[]])
 41 |                 let current = productions[i]
 42 |                 productions[i] = []
 43 |                 for p in current {
 44 |                     if p.first == .nt(i) {
 45 |                         productions[newNt].append(p[1...] + [.nt(newNt)])
 46 |                     } else {
 47 |                         productions[i].append(p + [.nt(newNt)])
 48 |                     }
 49 |                 }
 50 |             }
 51 |         }
 52 |     }
 53 |     
 54 |     mutating func leftRefactor() {
 55 |         while true {
 56 |             let lastProductions = productions
 57 |             for s in 0..<productions.count {
 58 |                 for i in 0..<productions[s].count where !productions[s][i].isEmpty {
 59 |                     var prefixLength = 1
 60 |                     var matchingProductions = [i]
 61 |                     while true {
 62 |                         if prefixLength == productions[s][i].count { break }
 63 |                         let lastProductions = matchingProductions
 64 |                         matchingProductions = [i]
 65 |                         for j in 0..<productions[s].count where i != j && productions[s][j].starts(with: productions[s][i].prefix(upTo: prefixLength)) {
 66 |                                 matchingProductions.append(j)
 67 |                         }
 68 |                         
 69 |                         // had more matches before this iteration, undo the iteration and stop
 70 |                         if matchingProductions.count < lastProductions.count {
 71 |                             prefixLength -= 1
 72 |                             matchingProductions = lastProductions
 73 |                             break
 74 |                         }
 75 |                         
 76 |                         // can't find matches with this prefix, no point trying a longer prefix
 77 |                         if matchingProductions.count == 1 { break }
 78 |                         
 79 |                         prefixLength += 1
 80 |                     }
 81 |                     
 82 |                     if matchingProductions.count > 1 {
 83 |                         // save common prefix
 84 |                         let commonPrefix = productions[s][matchingProductions.first!].prefix(upTo: prefixLength)
 85 |                         
 86 |                         // save matching productions with their common prefix removed
 87 |                         let matchingProductionsWithoutCommonPrefix = matchingProductions.map {
 88 |                             Array(productions[s][$0][prefixLength...])
 89 |                         }
 90 |                         
 91 |                         // create a new NT for the common factor
 92 |                         let newNt = productions.count
 93 |                         productions.append(matchingProductionsWithoutCommonPrefix)
 94 |                         
 95 |                         productions[s] = productions[s]
 96 |                             .enumerated()
 97 |                             .filter { !matchingProductions.contains($0.offset) }
 98 |                             .map { $0.element }
 99 |                             + [commonPrefix + [.nt(newNt)]]
100 |                         
101 |                         break
102 |                     }
103 |                 }
104 |             }
105 |             if productions == lastProductions { break }
106 |         }
107 |     }
108 |     
109 |     func nullable() -> [Set<Int>] {
110 |         var nullable: [Set<Int>] = Array(repeating: Set<Int>(), count: productions.count)
111 |         
112 |         func nodeIsNullabe(_ n: Node<T>) -> Bool {
113 |             switch n {
114 |             case .t(_): return false
115 |             case let .nt(nt): return !nullable[nt].isEmpty
116 |             }
117 |         }
118 |         
119 |         while true {
120 |             let lastValue = nullable
121 |             for s in 0..<productions.count {
122 |                 for (pIdx, p) in productions[s].enumerated() {
123 |                     // production is nullable iff all of its nodes are nullable (or production is empty)
124 |                     if !p.contains(where: { !nodeIsNullabe($0) }) {
125 |                         nullable[s].insert(pIdx)
126 |                     }
127 |                 }
128 |             }
129 |             if nullable == lastValue { break }
130 |         }
131 |         
132 |         return nullable
133 |     }
134 |     
135 |     func first(nullable: [Set<Int>]) -> [[T: Set<Int>]] {
136 |         precondition(nullable.count == productions.count)
137 |         var first: [[T: Set<Int>]] = Array(repeating: [:], count: productions.count)
138 |         
139 |         func firstByNode(_ n: Node<T>) -> Set<T> {
140 |             switch n {
141 |             case let .t(t): return Set([t])
142 |             case let .nt(nt): return Set(first[nt].keys)
143 |             }
144 |         }
145 |         
146 |         while true {
147 |             let lastValue = first
148 |             for s in 0..<productions.count {
149 |                 for (pIdx, p) in productions[s].enumerated() {
150 |                     if p.isEmpty { continue }
151 |                     
152 |                     var rhs: Set<T> = firstByNode(p.first!)
153 |                     
154 |                     for node in p {
155 |                         if case let .nt(nt) = node, !nullable[nt].isEmpty {
156 |                             // accumulate first sets of nonterminal nodes with nullable productions...
157 |                             rhs.formUnion(firstByNode(node))
158 |                         } else {
159 |                             // ...until we hit the first terminal or non-nullable
160 |                             break
161 |                         }
162 |                     }
163 |                     
164 |                     for t in rhs {
165 |                         first[s][t, default: []].insert(pIdx)
166 |                     }
167 |                 }
168 |             }
169 |             if first == lastValue { break }
170 |         }
171 |         
172 |         return first
173 |     }
174 |     
175 |     func follow(nullable: [Set<Int>], first: [[T: Set<Int>]]) -> [Set<T>] {
176 |         precondition(nullable.count == productions.count)
177 |         precondition(first.count == productions.count)
178 |         var follow = Array(repeating: Set<T>(), count: productions.count)
179 |         
180 |         while true {
181 |             let lastValue = follow
182 |             for s in 0..<productions.count {
183 |                 for p in productions[s] {
184 |                     var trailer = follow[s]
185 |                     if p.isEmpty { continue }
186 |                     for n in p.reversed() {
187 |                         switch n {
188 |                         case let .nt(nt):
189 |                             follow[nt].formUnion(trailer)
190 |                             
191 |                             if !nullable[nt].isEmpty {
192 |                                 trailer.formUnion(first[nt].keys)
193 |                             } else {
194 |                                 trailer = Set(first[nt].keys)
195 |                             }
196 |                         case let .t(t): trailer = [t]
197 |                         }
198 |                     }
199 |                 }
200 |             }
201 |             if lastValue == follow { break }
202 |         }
203 |         
204 |         return follow
205 |     }
206 |     
207 |     func isBacktrackFree(nullable: [Set<Int>], first: [[T: Set<Int>]], follow: [Set<T>]) -> Bool {
208 |         precondition(nullable.count == productions.count)
209 |         precondition(first.count == productions.count)
210 |         precondition(follow.count == productions.count)
211 |         
212 |         for s in 0..<productions.count {
213 |             // make sure no term leads to more than 1 production
214 |             if first[s].values.contains(where: { $0.count > 1 }) {
215 |                 return false
216 |             }
217 |             
218 |             // we can only have production that can be empty
219 |             if nullable[s].count > 1 { return false }
220 |             
221 |             // if we do have one empty production, we need to make sure that
222 |             // non of the terminals that can follow this term is also part of
223 |             // the first set of one of its productions
224 |             if nullable[s].count == 1 {
225 |                 if !follow[s].isDisjoint(with: first[s].keys) {
226 |                     return false
227 |                 }
228 |             }
229 |         }
230 |         
231 |         return true
232 |     }
233 | }
234 | 


--------------------------------------------------------------------------------
/Sources/CompilerKit/Helpers.swift:
--------------------------------------------------------------------------------
1 | extension Dictionary {
2 |     init(_ keys: Set<Key>, _ value: (Key) -> Value) {
3 |         self.init(uniqueKeysWithValues: keys.map { ($0, value($0)) })
4 |     }
5 | }
6 | 


--------------------------------------------------------------------------------
/Sources/CompilerKit/LALRParser.swift:
--------------------------------------------------------------------------------
  1 | private extension Grammar {
  2 |     subscript(_ item: LRParser<T>.Item) -> Node<T>? {
  3 |         let prod = productions[item.term][item.production]
  4 |         guard item.position < prod.count else { return nil }
  5 |         return prod[item.position]
  6 |     }
  7 | }
  8 | 
  9 | extension LRParser {
 10 |     init(lalr g: Grammar<T>) {
 11 |         let grammar = g.augmented
 12 |         
 13 |         let startItem = Item(term: grammar.productions.count - 1, production: 0, position: 0)
 14 |         let allNodes = Set(grammar.productions.flatMap { $0.flatMap { $0 } })
 15 |         let nullable = grammar.nullable()
 16 |         let itemSets = LRParser.itemSets(grammar, startItem, allNodes)
 17 |         let allTransitions = LRParser.allTransitions(grammar, itemSets)
 18 |         
 19 |         let directRead = Dictionary(allTransitions) { LRParser.directRead(grammar, $0) }
 20 |         
 21 |         let transitionReads = Dictionary(allTransitions) { LRParser.reads(grammar, nullable, $0) }
 22 |         
 23 |         let reads = LRParser.digraph(allTransitions, transitionReads, directRead)
 24 |         
 25 |         let transitionIncludes = Dictionary(allTransitions) { LRParser.includes(grammar, nullable, $0, allTransitions) }
 26 |         
 27 |         let follow = LRParser.digraph(allTransitions, transitionIncludes, reads)
 28 |         
 29 |         // make a list of all possible reduction items: [A -> w.]
 30 |         var reductions: [(Set<Item>, Item)] = []
 31 |         let prods = grammar.productions
 32 |         for term in 0..<prods.count {
 33 |             for production in 0..<prods[term].count {
 34 |                 let r = Item(term: term, production: production, position: prods[term][production].count)
 35 |                 for state in itemSets where state.contains(r) {
 36 |                     reductions.append((state, r))
 37 |                 }
 38 |             }
 39 |         }
 40 |         
 41 |         var lookbacks: [Set<Item>: [Item: Set<Transition>]] = [:]
 42 |         for (state, reduction) in reductions {
 43 |             lookbacks[state, default: [:]][reduction, default: []] = LRParser.lookback(grammar, state, reduction, allTransitions)
 44 |         }
 45 |         
 46 |         var lookaheads: [Set<Item>: [Item: Set<T>]] = [:]
 47 |         for (state, reduction) in reductions {
 48 |             lookaheads[state] = [reduction: []]
 49 |             for transition in lookbacks[state]![reduction]! {
 50 |                 lookaheads[state]![reduction]!.formUnion(follow[transition]!)
 51 |             }
 52 |         }
 53 |         
 54 |         // now we (very inefficiently) build a DFA out of that
 55 |         let orderedItemSets = Array(itemSets)
 56 |         func state(for itemSet: Set<Item>) -> Int {
 57 |             return orderedItemSets.index(of: itemSet)!
 58 |         }
 59 |         
 60 |         let startState = state(for: LRParser.closure(grammar, [startItem]))
 61 |         let finalState = state(for: [Item(term: grammar.productions.count - 1, production: 0, position: 1)])
 62 |         
 63 |         var transitions: [Node: [(Int, Int)]] = [:]
 64 |         for from in itemSets {
 65 |             for x in allNodes {
 66 |                 let to = LRParser.goto(grammar, from, x)
 67 |                 if !to.isEmpty {
 68 |                     transitions[x, default: []].append((state(for: from), state(for: to)))
 69 |                 }
 70 |             }
 71 |         }
 72 |         
 73 |         var accepting: [Int: Set<Action>] = [:]
 74 |         for itemSet in itemSets {
 75 |             let s = state(for: itemSet)
 76 |             
 77 |             // if this is a final state, accept, cannot do anything else
 78 |             if s == finalState {
 79 |                 accepting[s] = [.accept]
 80 |                 continue
 81 |             }
 82 |             
 83 |             if let possibleReductions = lookaheads[itemSet] {
 84 |                 for (reduction, allowedLookaheads) in possibleReductions {
 85 |                     accepting[s, default: []].insert(.reduce(reduction.term, reduction.position, allowedLookaheads))
 86 |                 }
 87 |                 
 88 |                 // the item set also includes non-reduce items, so it can also shift
 89 |                 if itemSet.count > possibleReductions.count {
 90 |                     accepting[s, default: []].insert(.shift)
 91 |                 }
 92 |             } else {
 93 |                 // no reductions, so the only possible action here is to shift
 94 |                 accepting[s] = [.shift]
 95 |             }
 96 |         }
 97 |         
 98 |         // "we have a parser."
 99 |         dfa = DFA(
100 |             states: itemSets.count,
101 |             transitions: transitions,
102 |             initial: startState,
103 |             accepting: accepting,
104 |             nonAcceptingValue: [Action.error]
105 |             ).minimized
106 |     }
107 |     
108 |     static func closure(_ grammar: Grammar<T>, _ I: Set<Item>) -> Set<Item> {
109 |         var J = I
110 |         var lastCount: Int
111 |         repeat {
112 |             lastCount = J.count
113 |             for j in J {
114 |                 if let node = grammar[j] {
115 |                     if case let .nt(nt) = node {
116 |                         for x in 0..<grammar.productions[nt].count {
117 |                             J.insert(Item(term: nt, production: x, position: 0))
118 |                         }
119 |                     }
120 |                 }
121 |             }
122 |         } while J.count != lastCount
123 |         return J
124 |     }
125 |     
126 |     static func goto(_ grammar: Grammar<T>, _ I: Set<Item>, _ X: Node) -> Set<Item> {
127 |         var G: Set<Item> = []
128 |         for i in I {
129 |             if let node = grammar[i], node == X {
130 |                 G.insert(i.next)
131 |             }
132 |         }
133 |         
134 |         return closure(grammar, G)
135 |     }
136 |     
137 |     static func goto(_ grammar: Grammar<T>, _ t: Transition) -> Set<Item> {
138 |         return goto(grammar, t.state, .nt(t.nt))
139 |     }
140 |     
141 |     static func itemSets(_ grammar: Grammar<T>, _ startItem: Item, _ allNodes: Set<Node>) -> Set<Set<Item>> {
142 |         var C: Set<Set<Item>> = [closure(grammar, [startItem])]
143 |         
144 |         var lastCount = 0
145 |         while lastCount != C.count {
146 |             lastCount = C.count
147 |             for I in C {
148 |                 for x in allNodes {
149 |                     let g = goto(grammar, I, x)
150 |                     if !g.isEmpty { C.insert(g) }
151 |                 }
152 |             }
153 |         }
154 |         
155 |         return C
156 |     }
157 |     
158 |     static func allTransitions(_ grammar: Grammar<T>, _ itemSets: Set<Set<Item>>) -> Set<Transition> {
159 |         var transitions: Set<Transition> = []
160 |         
161 |         for itemSet in itemSets {
162 |             for i in itemSet {
163 |                 if case let .nt(nt)? = grammar[i] {
164 |                     transitions.insert(Transition(state: itemSet, nt: nt))
165 |                 }
166 |             }
167 |         }
168 |         
169 |         return transitions
170 |     }
171 |     
172 |     static func directRead(_ grammar: Grammar<T>, _ t: Transition) -> Set<T> {
173 |         var terminals: Set<T> = []
174 |         
175 |         let G = goto(grammar, t)
176 |         for i in G {
177 |             if case let .t(terminal)? = grammar[i] {
178 |                 terminals.insert(terminal)
179 |             }
180 |         }
181 |         
182 |         return terminals
183 |     }
184 |     
185 |     static func reads(_ grammar: Grammar<T>, _ nullable: [Set<Int>], _ t: Transition) -> Set<Transition> {
186 |         var relations: Set<Transition> = []
187 |         
188 |         let g = goto(grammar, t)
189 |         for i in g {
190 |             guard case let .nt(nt)? = grammar[i.next] else { continue }
191 |             
192 |             if !nullable[nt].isEmpty {
193 |                 relations.insert(Transition(state: g, nt: nt))
194 |             }
195 |         }
196 |         
197 |         return relations
198 |     }
199 | 
200 |     // 't' is (p, A) in DeRemer & Pennello's description of includes
201 |     static func includes(_ grammar: Grammar<T>, _ nullable: [Set<Int>], _ t: Transition, _ allTransitions: Set<Transition>) -> Set<Transition> {
202 |         var includes: Set<Transition> = []
203 |         
204 |         func tailNullable(_ i: Item) -> Bool {
205 |             let prod = grammar.productions[i.term][i.production]
206 |             
207 |             // if item is last in a production, the tail is empty
208 |             // and therefore is nullable
209 |             guard i.position < prod.count else { return true }
210 |             
211 |             let nodes = prod[i.position..<prod.count]
212 |             
213 |             for n in nodes {
214 |                 switch n {
215 |                 case .t(_): return false
216 |                 case let .nt(nt): if !nullable[nt].isEmpty { return false }
217 |                 }
218 |             }
219 |             return true
220 |         }
221 |         
222 |         // check every other transtion for being in t's includes
223 |         // 'pre' is (p', B) in DeRemer & Pennello's description of includes
224 |         for pre in allTransitions {
225 |             // find items that reduce to B as candidates for [B -> β A ɣ]
226 |             for initialItem in pre.state where initialItem.term == pre.nt {
227 |                 // check all possible (q, C) transitions we can take from this item
228 |                 // is our 't' one of them?
229 |                 var item = initialItem
230 |                 var q = pre.state
231 |                 while let node = grammar[item] {
232 |                     if case let .nt(nt) = node {
233 |                         if Transition(state: q, nt: nt) == t {
234 |                             // we just got to (p, A) from 'pre'
235 |                             // this means that this item is [B -> β .A ɣ]
236 |                             // if ɣ is nullable, the (p, A) includes (p', B)
237 |                             // i.e., 't' includes 'pre'
238 |                             if tailNullable(item.next) {
239 |                                 includes.insert(pre)
240 |                             }
241 |                         }
242 |                     }
243 |                     
244 |                     q = goto(grammar, q, node)
245 |                     item = item.next
246 |                 }
247 |                 
248 |             }
249 |         }
250 |         
251 |         return includes
252 |     }
253 |     
254 |     static func lookback(_ grammar: Grammar<T>, _ q: Set<Item>, _ reduction: Item, _ allTransitions: Set<Transition>) -> Set<Transition> {
255 |         let w = grammar.productions[reduction.term][reduction.production]
256 |         // a reduction is represented by an item with the dot in the far right
257 |         // [A -> w.]
258 |         precondition(reduction.position == w.count)
259 |         precondition(q.contains(reduction))
260 |         
261 |         var lookback: Set<Transition> = []
262 |         
263 |         // check every transition (p, A) where A is the reductions lhs
264 |         for t in allTransitions where t.nt == reduction.term {
265 |             // check if we can spell a path from t.state (p) to (q) using w
266 |             var g = t.state
267 |             for n in w {
268 |                 g = goto(grammar, g, n)
269 |             }
270 |             
271 |             // if this was a valid path, we will find ourselves at q
272 |             if g == q {
273 |                 lookback.insert(t)
274 |             }
275 |         }
276 |         
277 |         return lookback
278 |     }
279 |     
280 |     static func digraph<Input: Hashable, Output: Hashable>(
281 |         _ input: Set<Input>,
282 |         _ relation: [Input: Set<Input>],
283 |         _ fp: [Input: Set<Output>]) -> [Input: Set<Output>] {
284 |         
285 |         var stack: [Input] = []
286 |         var result: [Input: Set<Output>] = [:]
287 |         var n = Dictionary(input) { _ in 0 }
288 |         
289 |         func traverse(_ x: Input) {
290 |             stack.append(x)
291 |             let d = stack.count
292 |             n[x] = d
293 |             result[x] = fp[x]!
294 |             for y in relation[x]! {
295 |                 if n[y] == 0 { traverse(y) }
296 |                 n[x] = min(n[x]!, n[y]!)
297 |                 result[x]!.formUnion(result[y]!)
298 |             }
299 |             if n[x] == d {
300 |                 repeat {
301 |                     n[stack.last!] = Int.max
302 |                     result[stack.last!] = result[x]
303 |                 } while stack.popLast() != x
304 |             }
305 |         }
306 |         
307 |         for x in input where n[x] == 0 {
308 |             traverse(x)
309 |         }
310 |         
311 |         return result
312 |     }
313 | }
314 | 


--------------------------------------------------------------------------------
/Sources/CompilerKit/LLParser.swift:
--------------------------------------------------------------------------------
 1 | struct LLParser<T: Hashable> {
 2 |     let grammar: Grammar<T>
 3 |     let nullable: [Set<Int>]
 4 |     let first: [[T: Set<Int>]]
 5 |     let follow: [Set<T>]
 6 |     let table: [[T: Int]]
 7 |     
 8 |     init(_ g: Grammar<T>) {
 9 |         var g = g
10 |         
11 |         // get the grammar ready for LL parsing
12 |         g.eliminateLeftRecursion()
13 |         g.leftRefactor()
14 |         
15 |         nullable = g.nullable()
16 |         first = g.first(nullable: nullable)
17 |         follow = g.follow(nullable: nullable, first: first)
18 |         
19 |         let isBacktrackFree = g.isBacktrackFree(nullable: nullable, first: first, follow: follow)
20 |         precondition(isBacktrackFree,
21 |                      "Cannot initialize an LL parser for a non-backtrack free grammar")
22 |         
23 |         var table: [[T: Int]] = Array(repeating: [:], count: g.productions.count)
24 |         
25 |         for nt in 0..<g.productions.count {
26 |             first[nt].forEach { t, prods in
27 |                 table[nt][t] = prods.first!
28 |             }
29 |             
30 |             if let emptyProduction = nullable[nt].first {
31 |                 for t in follow[nt] {
32 |                     table[nt][t] = emptyProduction
33 |                 }
34 |             }
35 |         }
36 |         
37 |         self.grammar = g
38 |         self.table = table
39 |     }
40 |     
41 |     
42 |     func parse(_ words: [T]) -> Bool {
43 |         var current = 0
44 |         
45 |         func advance() { current += 1 }
46 |         
47 |         func peek() -> T? {
48 |             guard current < words.count else { return nil }
49 |             return words[current]
50 |         }
51 |         
52 |         var stack: [Grammar<T>.Node<T>] = [.nt(self.grammar.start)]
53 |         
54 |         while let focus = stack.popLast() {
55 |             guard let word = peek() else {
56 |                 // unexpected end of input
57 |                 return false
58 |             }
59 |             switch focus {
60 |             case let .t(t):
61 |                 guard t == word else {
62 |                     // unexpected word
63 |                     return false
64 |                 }
65 |                 advance()
66 |                 
67 |             case let .nt(nt):
68 |                 guard let p = table[nt][word] else {
69 |                     // unexpected word
70 |                     return false
71 |                 }
72 |                 
73 |                 stack.append(contentsOf: grammar.productions[nt][p].reversed())
74 |             }
75 |         }
76 |         
77 |         if peek() != nil {
78 |             // input contains unconsumed words at the end
79 |             return false
80 |         }
81 |         
82 |         return true
83 |     }
84 | }
85 | 


--------------------------------------------------------------------------------
/Sources/CompilerKit/LRParser.swift:
--------------------------------------------------------------------------------
  1 | extension Grammar.Node: Matcher {
  2 |     typealias Element = Grammar.Node<T>
  3 |     
  4 |     static func ~=(pattern: Element, value: Element) -> Bool {
  5 |         return pattern == value
  6 |     }
  7 | }
  8 | 
  9 | struct LRParser<T: Hashable> {
 10 |     typealias Node = Grammar<T>.Node<T>
 11 |     
 12 |     struct Item: Hashable {
 13 |         let term: Int
 14 |         let production: Int
 15 |         let position: Int
 16 |         
 17 |         var next: Item {
 18 |             return Item(term: term, production: production, position: position + 1)
 19 |         }
 20 |     }
 21 |     
 22 |     // (p, A) where p is state and A is nt
 23 |     struct Transition: Hashable {
 24 |         let state: Set<Item>
 25 |         let nt: Int
 26 |     }
 27 |     
 28 |     enum Action: Hashable {
 29 |         case shift
 30 |         case reduce(Int, Int, Set<T>)
 31 |         case accept
 32 |         case error
 33 |     }
 34 |     
 35 |     let dfa: DFA<Set<LRParser.Action>, Node>
 36 |     
 37 |     func parse<S: Sequence>(_ elements: S) -> Bool where S.Element == T {
 38 |         var stack: [Node] = []
 39 |         var it = elements.makeIterator()
 40 |         
 41 |         var lookahead = it.next()
 42 |         func advance() -> T? {
 43 |             let current = lookahead
 44 |             lookahead = it.next()
 45 |             return current
 46 |         }
 47 |         
 48 |         func perform(_ action: Action) -> Bool {
 49 |             switch action {
 50 |             case .shift:
 51 |                 guard let t = advance() else { return false }
 52 |                 stack.append(.t(t))
 53 |             case let .reduce(nt, size, _):
 54 |                 stack.removeLast(size)
 55 |                 stack.append(.nt(nt))
 56 |             case .accept:
 57 |                 guard lookahead == nil else { return false }
 58 |             case .error:
 59 |                 return false
 60 |             }
 61 |             
 62 |             return true
 63 |         }
 64 |         
 65 |         while true {
 66 |             let actions = dfa.match(stack)
 67 |             let action: Action
 68 |             
 69 |             switch actions.count {
 70 |             case 0: action = .error
 71 |             case 1: action = actions.first!
 72 |             default:
 73 |                 // we have a reduce/reduce or shift/reduce conflict
 74 |                 // is there any viable reduce among the possible actions?
 75 |                 let viableReduce = actions.first { action in
 76 |                     if case let .reduce(_, _, la) = action {
 77 |                         if let lookahead = lookahead {
 78 |                             return la.contains(lookahead)
 79 |                         }
 80 |                         return true
 81 |                     }
 82 |                     return false
 83 |                 }
 84 |                 
 85 |                 if let reduce = viableReduce {
 86 |                     action = reduce
 87 |                 } else if actions.contains(.shift) {
 88 |                     action = .shift
 89 |                 } else {
 90 |                     action = .error
 91 |                 }
 92 |             }
 93 |             
 94 |             if perform(action) {
 95 |                 if action == .accept { return true }
 96 |             } else {
 97 |                 return false
 98 |             }
 99 |         }
100 |     }
101 | }
102 | 


--------------------------------------------------------------------------------
/Sources/CompilerKit/Matcher.swift:
--------------------------------------------------------------------------------
1 | protocol Matcher {
2 |     associatedtype Element
3 |     
4 |     static func ~=(pattern: Self, value: Element) -> Bool
5 | }
6 | 


--------------------------------------------------------------------------------
/Sources/CompilerKit/NFA.swift:
--------------------------------------------------------------------------------
  1 | struct NFA<Output: Hashable, M: Matcher & Hashable> {
  2 |     typealias Element = M.Element
  3 |     
  4 |     let states: Int
  5 |     let transitions: [M: [(Int, Int)]]
  6 |     let epsilonTransitions: [Int: [Int]]
  7 |     let initial: Int
  8 |     let accepting: [Int: Output]
  9 |     
 10 |     var epsilonClosures: [Set<Int>] {
 11 |         var epsilonClosures: [Set<Int>] = []
 12 |         
 13 |         for v in 0..<states {
 14 |             var marked = Set<Int>()
 15 |             
 16 |             func dfs(_ s: Int) {
 17 |                 marked.insert(s)
 18 |                 for w in epsilonTransitions[s, default: []] {
 19 |                     if !marked.contains(w) { dfs(w) }
 20 |                 }
 21 |             }
 22 |             
 23 |             dfs(v)
 24 |             
 25 |             epsilonClosures.append(marked)
 26 |         }
 27 |         
 28 |         return epsilonClosures
 29 |     }
 30 |     
 31 |     var alphabet: Dictionary<M, [(Int, Int)]>.Keys {
 32 |         return transitions.keys
 33 |     }
 34 | 
 35 |     func epsilonClosure(from states: Set<Int>) -> Set<Int> {
 36 |         var marked = Set<Int>()
 37 |         
 38 |         func dfs(_ s: Int) {
 39 |             marked.insert(s)
 40 |             for w in epsilonTransitions[s, default: []] {
 41 |                 if !marked.contains(w) { dfs(w) }
 42 |             }
 43 |         }
 44 |         
 45 |         for s in states {
 46 |             if !marked.contains(s) { dfs(s) }
 47 |         }
 48 |         
 49 |         return marked
 50 |     }
 51 | 
 52 |     func reachable(from states: Set<Int>, via matcher: M) -> Set<Int> {
 53 |         var set = Set<Int>()
 54 |         for (from, to) in transitions[matcher, default: []] {
 55 |             if states.contains(from) {
 56 |                 set.insert(to)
 57 |             }
 58 |         }
 59 |         return set
 60 |     }
 61 |     
 62 |     func match<S: Sequence>(_ elements: S) -> Set<Output> where S.Element == Element {
 63 |         var states = Set<Int>()
 64 |         states.insert(initial)
 65 |         for element in elements {
 66 |             // add all states reachable by epsilon transitions
 67 |             states = epsilonClosure(from: states)
 68 |             
 69 |             guard let matcher = alphabet.first(where: { $0 ~= element }) else {
 70 |                 return []
 71 |             }
 72 |             
 73 |             // new set of states as allowed by current element in string
 74 |             states = reachable(from: states, via: matcher)
 75 |             
 76 |             if states.isEmpty { return [] }
 77 |         }
 78 |         return Set(states.compactMap { self.accepting[$0] })
 79 |     }
 80 | 
 81 |     func offset(by offset: Int) -> NFA {
 82 |         return NFA(
 83 |             states: states + offset,
 84 |             transitions: transitions.mapValues { $0.map { from, to in (from + offset, to + offset) } },
 85 |             epsilonTransitions: Dictionary(uniqueKeysWithValues: epsilonTransitions.map { ($0.key + offset, $0.value.map { $0 + offset }) }),
 86 |             initial: initial + offset,
 87 |             accepting: Dictionary(uniqueKeysWithValues: accepting.map { ($0.key + offset, $0.value) })
 88 |         )
 89 |     }
 90 | }
 91 | 
 92 | extension NFA where M == ScalarClass {
 93 |     init(alternatives: [NFA<Output, ScalarClass>]) {
 94 |         let commonInitial = 0
 95 |         var states = 1
 96 |         var transitions: [ScalarClass: [(Int, Int)]] = [:]
 97 |         var epsilonTransitions: [Int: [Int]] = [:]
 98 |         var accepting: [Int: Output] = [:]
 99 |         
100 |         for nfa in alternatives {
101 |             let offset = nfa.offset(by: states)
102 |             transitions.merge(offset.transitions, uniquingKeysWith: { $0 + $1 })
103 |             epsilonTransitions.merge(offset.epsilonTransitions, uniquingKeysWith: { first, _ in first })
104 |             epsilonTransitions[commonInitial, default: []].append(offset.initial)
105 |             accepting.merge(offset.accepting, uniquingKeysWith: { first, _ in first })
106 |             states = offset.states
107 |         }
108 |         
109 |         self.init(
110 |             states: states,
111 |             transitions: transitions,
112 |             epsilonTransitions: epsilonTransitions,
113 |             initial: commonInitial,
114 |             accepting: accepting
115 |         )
116 |     }
117 |     
118 |     init(scanner: [(RegularExpression, Output)]) {
119 |         let alternatives = scanner.map { NFA<Output, ScalarClass>(re: $0.0, acceptingValue: $0.1) }
120 |         self.init(alternatives: alternatives)
121 |     }
122 | }
123 | 
124 | // DFA from NFA (subset construction)
125 | extension NFA {
126 |     var dfa: DFA<Set<Output>, M> { return DFA(self) }
127 | }
128 | 
129 | // Initialize NFA from RE
130 | extension NFA where M == ScalarClass {
131 |     init(re: RegularExpression, acceptingValue: Output) {
132 |         switch re {
133 |         case .scalarClass(let scalarClass):
134 |              self.init(
135 |                 states: 2,
136 |                 transitions: [scalarClass: [(0, 1)]],
137 |                 epsilonTransitions: [:],
138 |                 initial: 0,
139 |                 accepting: [1: acceptingValue]
140 |             )
141 | 
142 |         
143 |         case .concatenation(let re1, let re2):
144 |             let nfa1 = NFA(re: re1, acceptingValue: acceptingValue)
145 |             let nfa2 = NFA(re: re2, acceptingValue: acceptingValue)
146 |             
147 |             // nfa1 followed by nfa2 with episilon transition between them
148 |             let nfa2offset = nfa2.offset(by: nfa1.states)
149 |             let transitions = nfa1.transitions
150 |                 .merging(nfa2offset.transitions, uniquingKeysWith: { $0 + $1 })
151 |             let epsilonTransitions = nfa1.epsilonTransitions
152 |                 .merging(nfa2offset.epsilonTransitions, uniquingKeysWith: { $0 + $1 })
153 |                 .merging(
154 |                     nfa1.accepting.keys.map { ($0, [nfa2offset.initial]) },
155 |                     uniquingKeysWith: { $0 + $1 })
156 | 
157 |             self.init(
158 |                 states: nfa2offset.states,
159 |                 transitions: transitions,
160 |                 epsilonTransitions: epsilonTransitions,
161 |                 initial: nfa1.initial,
162 |                 accepting: nfa2offset.accepting
163 |             )
164 | 
165 | 
166 |         case .alternation(let re1, let re2):
167 |             let nfa1 = NFA(re: re1, acceptingValue: acceptingValue)
168 |             let nfa2 = NFA(re: re2, acceptingValue: acceptingValue)
169 |             
170 |             // create a common initial state that points to each nfa's initial
171 |             // with an epsilon edge and a combined accepting dictionary
172 |             let nfa1offset = nfa1.offset(by: 1)
173 |             let nfa2offset = nfa2.offset(by: nfa1.states + 1)
174 |             
175 |             let states = nfa2offset.states
176 |             let initial = 0
177 |             
178 |             let transitions = nfa1offset.transitions
179 |                 .merging(nfa2offset.transitions, uniquingKeysWith: { $0 + $1 })
180 |             
181 |             let epsilonTransitions = nfa1offset.epsilonTransitions
182 |                 .merging(nfa2offset.epsilonTransitions, uniquingKeysWith: { $0 + $1 })
183 |                 .merging([(0, [nfa1offset.initial, nfa2offset.initial])], uniquingKeysWith: { $0 + $1 })
184 |             
185 |             let accepting = nfa1offset.accepting.merging(nfa2offset.accepting, uniquingKeysWith: { first, _ in first })
186 |             
187 |             self.init(
188 |                 states: states,
189 |                 transitions: transitions,
190 |                 epsilonTransitions: epsilonTransitions,
191 |                 initial: initial,
192 |                 accepting: accepting
193 |             )
194 | 
195 | 
196 |         case .closure(let re):
197 |             let nfa = NFA(re: re, acceptingValue: acceptingValue)
198 |             
199 |             // turn nfa into a closure by:
200 |             // - make intial state accepting, to allow skipping the NFA (zero occurences)
201 |             // - looping over NFA many times by connecting NFAs accepting states to its initial state
202 |             let accepting = nfa.accepting.merging([nfa.initial: acceptingValue], uniquingKeysWith: { first, _ in first })
203 |             let epsilonTransitions = nfa.epsilonTransitions
204 |                 .merging(
205 |                     nfa.accepting.keys.map { ($0, [nfa.initial]) }, uniquingKeysWith: { $0 + $1 })
206 |             
207 |             self.init(
208 |                 states: nfa.states,
209 |                 transitions: nfa.transitions,
210 |                 epsilonTransitions: epsilonTransitions,
211 |                 initial: nfa.initial,
212 |                 accepting: accepting
213 |             )
214 |         }
215 |     }
216 | }
217 | 


--------------------------------------------------------------------------------
/Sources/CompilerKit/RegularExpression.swift:
--------------------------------------------------------------------------------
 1 | indirect enum RegularExpression {
 2 |     case scalarClass(ScalarClass)
 3 |     case alternation(RegularExpression, RegularExpression)
 4 |     case concatenation(RegularExpression, RegularExpression)
 5 |     case closure(RegularExpression)
 6 | }
 7 | 
 8 | // A more convenient way for building a regular expression in Swift code
 9 | postfix operator *
10 | 
11 | extension RegularExpression: ExpressibleByUnicodeScalarLiteral {
12 |     init(unicodeScalarLiteral scalar: UnicodeScalar) {
13 |         self = .scalarClass(.single(scalar))
14 |     }
15 |     
16 |     static func +(lhs: RegularExpression, rhs: RegularExpression) -> RegularExpression {
17 |         return .concatenation(lhs, rhs)
18 |     }
19 |     
20 |     static func |(lhs: RegularExpression, rhs: RegularExpression) -> RegularExpression {
21 |         return .alternation(lhs, rhs)
22 |     }
23 |     
24 |     static postfix func *(re: RegularExpression) -> RegularExpression {
25 |         return .closure(re)
26 |     }
27 |     
28 |     static let digit: RegularExpression = .scalarClass(.range("0", "9"))
29 |     
30 |     static let lowercase: RegularExpression = .scalarClass(.range("a", "z"))
31 | 
32 |     static let uppercase: RegularExpression = .scalarClass(.range("A", "Z"))
33 |     
34 |     static let alpha: RegularExpression = .lowercase | .uppercase
35 |     
36 |     static let alphanum: RegularExpression = .alpha | .digit
37 | }
38 | 
39 | // Derive an NFA from a regular expression (Thompson's Construction)
40 | extension RegularExpression {
41 |     var nfa: NFA<Bool, ScalarClass> {
42 |         return NFA(re: self, acceptingValue: true)
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------
/Sources/CompilerKit/SLRParser.swift:
--------------------------------------------------------------------------------
 1 | extension LRParser {
 2 |     init(slr g: Grammar<T>) {
 3 |         let grammar = g.augmented
 4 |         
 5 |         // construct the LR(0) state machine
 6 |         let nullable = grammar.nullable()
 7 |         let first = grammar.first(nullable: nullable)
 8 |         let follow = grammar.follow(nullable: nullable, first: first)
 9 |         
10 |         var items: [Item] = []
11 |         var transitions: [Node: [(Int, Int)]] = [:]
12 |         var accepting: [Int: Action] = [:]
13 |         
14 |         let prods = grammar.productions
15 |         
16 |         var nonterminalProductionStartingItems: [[Int]] = []
17 |         
18 |         // add all LR(0) items with transitions through each production
19 |         for s in 0..<prods.count {
20 |             nonterminalProductionStartingItems.append([])
21 |             for p in 0..<prods[s].count {
22 |                 if !prods[s][p].isEmpty {
23 |                     // the next item will be a starting state of of the 'p' production of 's' nonterminal
24 |                     nonterminalProductionStartingItems[s].append(items.count)
25 |                 }
26 |                 
27 |                 for pos in 0..<prods[s][p].count {
28 |                     items.append(Item(term: s, production: p, position: pos))
29 | 
30 |                     // for each position, receiving the next node takes us to the next position in the production
31 |                     transitions[prods[s][p][pos], default: []].append((items.count - 1, items.count))
32 |                     
33 |                     // for each intermediate position we land on, the action is to shift
34 |                     accepting[items.count - 1] = .shift
35 |                 }
36 |                 
37 |                 // position *past* the index of the last node in a production indicates a production has completed
38 |                 items.append(Item(term: s, production: p, position: prods[s][p].count))
39 |                 
40 |                 // production completed means we can reduce
41 |                 accepting[items.count - 1] = .reduce(s, prods[s][p].count, follow[s])
42 |             }
43 |         }
44 |         
45 |         // add a final accepting state
46 |         let initial = nonterminalProductionStartingItems[grammar.start][0]
47 |         let finalAccepting = items.count
48 |         transitions[.nt(grammar.start)] = [(initial, finalAccepting)]
49 |         accepting[finalAccepting] = .accept
50 |         
51 |         // add epsilon transitions between each nonterminal node and its productions
52 |         var epsilonTransitions: [Int: [Int]] = [:]
53 |         for (state, item) in items.enumerated() where item.position < prods[item.term][item.production].count {
54 |             if case let .nt(nt) = prods[item.term][item.production][item.position] {
55 |                 epsilonTransitions[state, default: []].append(contentsOf: nonterminalProductionStartingItems[nt])
56 |             }
57 |         }
58 |         
59 |         self.dfa = NFA(
60 |             states: items.count + 1,
61 |             transitions: transitions,
62 |             epsilonTransitions: epsilonTransitions,
63 |             initial: initial,
64 |             accepting: accepting
65 |         ).dfa.minimized
66 |     }
67 | }
68 | 


--------------------------------------------------------------------------------
/Sources/CompilerKit/ScalarClass.swift:
--------------------------------------------------------------------------------
 1 | enum ScalarClass: Hashable, Matcher {
 2 |     typealias Element = UnicodeScalar
 3 |     
 4 |     case single(UnicodeScalar)
 5 |     case range(UnicodeScalar, UnicodeScalar)
 6 |     
 7 |     static func ~=(pattern: ScalarClass, value: UnicodeScalar) -> Bool {
 8 |         switch pattern {
 9 |         case let .single(scalar):
10 |             return value == scalar
11 |             
12 |         case let .range(from, to):
13 |             return from <= value && value <= to
14 |         }
15 |     }
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/Sources/CompilerKit/Tokenizer.swift:
--------------------------------------------------------------------------------
 1 | struct Tokenizer<Output> where Output: Hashable {
 2 |     let dfa: DFA<Output, ScalarClass>
 3 |     let trivia: DFA<Bool, ScalarClass>
 4 |     let unknown: Output
 5 |     
 6 |     init?(tokens: [(RegularExpression, Output)], trivia: RegularExpression, unknown: Output) {
 7 |         let nfa = NFA<Output, ScalarClass>(scanner: tokens)
 8 |         guard let dfa = DFA(consistent: nfa, nonAcceptingValue: unknown)
 9 |             else { return nil }
10 |         self.dfa = dfa.minimized
11 |         
12 |         guard let triviaDFA = DFA(consistent: trivia.nfa, nonAcceptingValue: false)
13 |             else { return nil }
14 |         self.trivia = triviaDFA.minimized
15 |         
16 |         self.unknown = unknown
17 |     }
18 |     
19 |     func tokenize(_ source: String.UnicodeScalarView) -> [(Output, Substring.UnicodeScalarView.SubSequence)] {
20 |         var tokens: [(Output, Substring.UnicodeScalarView.SubSequence)] = []
21 |         var offset = source.startIndex
22 |         var unknownStart: String.UnicodeScalarView.Index? = nil
23 |         
24 |         func processUnknown() {
25 |             if unknownStart != nil {
26 |                 tokens.append((unknown, source[unknownStart!..<offset]))
27 |                 unknownStart = nil
28 |             }
29 |         }
30 |         
31 |         while true {
32 |             let (hasTrivia, triviaMatch) = trivia.prefixMatch(source[offset...])
33 |             if hasTrivia {
34 |                 // we've been skipping over an unknown segment until we reached trivia
35 |                 processUnknown()
36 |                 offset = triviaMatch.endIndex
37 |             }
38 |             
39 |             // reached end of string, we can stop
40 |             if offset == source.endIndex {
41 |                 processUnknown()
42 |                 break
43 |             }
44 |             
45 |             // we are in an unknown state, keep moving until we find trivia or end of string
46 |             if unknownStart != nil {
47 |                 offset = source.index(after: offset)
48 |                 continue
49 |             }
50 |             
51 |             let (token, match) = dfa.prefixMatch(source[offset...])
52 |             
53 |             // if we couldn't recognize a known token, enter the unknown state
54 |             if token == unknown {
55 |                 unknownStart = offset
56 |                 offset = source.index(after: offset)
57 |                 continue
58 |             }
59 |             
60 |             tokens.append((token, match))
61 |             offset = match.endIndex
62 |         }
63 |         
64 |         return tokens
65 |     }
66 | }
67 | 


--------------------------------------------------------------------------------
/Tests/CompilerKitTests/FiniteStateTests.swift:
--------------------------------------------------------------------------------
  1 | import XCTest
  2 | @testable import CompilerKit
  3 | 
  4 | final class FiniteStateTests: XCTestCase {
  5 |     func testNFA() {
  6 |         // a*ab - should match ab, aab, aaab, etc
  7 |         let nfa = NFA<Bool, ScalarClass>(
  8 |             states: 4,
  9 |             transitions: [
 10 |                 .single("a"): [(0, 0), (1, 2)],
 11 |                 .single("b"): [(2, 3)]
 12 |             ],
 13 |             epsilonTransitions: [0: [1]],
 14 |             initial: 0,
 15 |             accepting: [3: true]
 16 |         )
 17 |         XCTAssertTrue(nfa.match("aaab".unicodeScalars).contains(true))
 18 |         XCTAssertFalse(nfa.match("aaa".unicodeScalars).contains(true))
 19 |         XCTAssertTrue(nfa.match("ab".unicodeScalars).contains(true))
 20 |         XCTAssertFalse(nfa.match("b".unicodeScalars).contains(true))
 21 |         XCTAssertFalse(nfa.match("bbbbab".unicodeScalars).contains(true))
 22 |     }
 23 |     
 24 |     
 25 |     func testRegularExpression() {
 26 |         // a*ab - should match ab, aab, aaab, etc
 27 |         let re: RegularExpression = "a"* + ("a" + "b")
 28 |         let derivedNfa = re.nfa
 29 |         XCTAssertTrue(derivedNfa.match("aaab".unicodeScalars).contains(true))
 30 |         XCTAssertFalse(derivedNfa.match("aaa".unicodeScalars).contains(true))
 31 |         XCTAssertTrue(derivedNfa.match("ab".unicodeScalars).contains(true))
 32 |         XCTAssertFalse(derivedNfa.match("b".unicodeScalars).contains(true))
 33 |         XCTAssertFalse(derivedNfa.match("bbbbab".unicodeScalars).contains(true))
 34 |     }
 35 |     
 36 |     func testDFA() {
 37 |         // a(b|c)* - should match a, ab, ac, abc, abbbb, acccc, abbccbcbbc, etc
 38 |         let dfa = DFA<Bool, ScalarClass>(
 39 |             states: 2,
 40 |             transitions: [
 41 |                 ScalarClass.single("a"): [(0, 1)],
 42 |                 ScalarClass.single("b"): [(1, 1)],
 43 |                 ScalarClass.single("c"): [(1, 1)],
 44 |             ],
 45 |             initial: 0,
 46 |             accepting: [1: true],
 47 |             nonAcceptingValue: false
 48 |         )
 49 |         
 50 |         XCTAssertTrue(dfa.match("a".unicodeScalars))
 51 |         XCTAssertTrue(dfa.match("ab".unicodeScalars))
 52 |         XCTAssertTrue(dfa.match("ac".unicodeScalars))
 53 |         XCTAssertTrue(dfa.match("abc".unicodeScalars))
 54 |         XCTAssertTrue(dfa.match("acb".unicodeScalars))
 55 |         XCTAssertTrue(dfa.match("abbbb".unicodeScalars))
 56 |         XCTAssertTrue(dfa.match("acccc".unicodeScalars))
 57 |         XCTAssertTrue(dfa.match("abbccbbccbc".unicodeScalars))
 58 |         
 59 |         XCTAssertFalse(dfa.match("aa".unicodeScalars))
 60 |         XCTAssertFalse(dfa.match("aba".unicodeScalars))
 61 |         XCTAssertFalse(dfa.match("abac".unicodeScalars))
 62 |         XCTAssertFalse(dfa.match("abbccbbccbca".unicodeScalars))
 63 |     }
 64 |     
 65 |     func testRegularExpressionToDFAMatch() {
 66 |         // a(b|c)* - should match a, ab, ac, abc, abbbb, acccc, abbccbcbbc, etc
 67 |         let re: RegularExpression = "a" + ("b" | "c")*
 68 |         let dfa = DFA(consistent: re.nfa, nonAcceptingValue: false)!
 69 |         
 70 |         XCTAssertTrue(dfa.match("a".unicodeScalars))
 71 |         XCTAssertTrue(dfa.match("ab".unicodeScalars))
 72 |         XCTAssertTrue(dfa.match("ac".unicodeScalars))
 73 |         XCTAssertTrue(dfa.match("abc".unicodeScalars))
 74 |         XCTAssertTrue(dfa.match("acb".unicodeScalars))
 75 |         XCTAssertTrue(dfa.match("abbbb".unicodeScalars))
 76 |         XCTAssertTrue(dfa.match("acccc".unicodeScalars))
 77 |         XCTAssertTrue(dfa.match("abbccbbccbc".unicodeScalars))
 78 |         
 79 |         XCTAssertFalse(dfa.match("aa".unicodeScalars))
 80 |         XCTAssertFalse(dfa.match("aba".unicodeScalars))
 81 |         XCTAssertFalse(dfa.match("abac".unicodeScalars))
 82 |         XCTAssertFalse(dfa.match("abbccbbccbca".unicodeScalars))
 83 |         XCTAssertFalse(dfa.match("cbcab".unicodeScalars))
 84 |     }
 85 |     
 86 |     func testRegularExpressionToMinimizedDFAMatch() {
 87 |         // a(b|c)* - should match a, ab, ac, abc, abbbb, acccc, abbccbcbbc, etc
 88 |         let re: RegularExpression = "a" + ("b" | "c")*
 89 |         let dfa = DFA(consistent: re.nfa, nonAcceptingValue: false)!.minimized
 90 |         
 91 |         XCTAssertTrue(dfa.match("a".unicodeScalars))
 92 |         XCTAssertTrue(dfa.match("ab".unicodeScalars))
 93 |         XCTAssertTrue(dfa.match("ac".unicodeScalars))
 94 |         XCTAssertTrue(dfa.match("abc".unicodeScalars))
 95 |         XCTAssertTrue(dfa.match("acb".unicodeScalars))
 96 |         XCTAssertTrue(dfa.match("abbbb".unicodeScalars))
 97 |         XCTAssertTrue(dfa.match("acccc".unicodeScalars))
 98 |         XCTAssertTrue(dfa.match("abbccbbccbc".unicodeScalars))
 99 |         
100 |         XCTAssertFalse(dfa.match("aa".unicodeScalars))
101 |         XCTAssertFalse(dfa.match("aba".unicodeScalars))
102 |         XCTAssertFalse(dfa.match("abac".unicodeScalars))
103 |         XCTAssertFalse(dfa.match("abbccbbccbca".unicodeScalars))
104 |         XCTAssertFalse(dfa.match("cbcab".unicodeScalars))
105 |     }
106 |     
107 |     func testMultiAcceptingStatesDFA() {
108 |         enum Token { case aa, ab, ac, unknown }
109 |         
110 |         let dfa = DFA<Token, ScalarClass>(
111 |             states: 5,
112 |             transitions: [
113 |                 ScalarClass.single("a"): [(0, 1), (1, 2)],
114 |                 ScalarClass.single("b"): [(1, 3)],
115 |                 ScalarClass.single("c"): [(1, 4)],
116 |                 ],
117 |             initial: 0,
118 |             accepting: [2: .aa, 3: .ab, 4: .ac],
119 |             nonAcceptingValue: .unknown
120 |         )
121 |         
122 |         XCTAssertEqual(dfa.match("aa".unicodeScalars), .aa)
123 |         XCTAssertEqual(dfa.match("ab".unicodeScalars), .ab)
124 |         XCTAssertEqual(dfa.match("ac".unicodeScalars), .ac)
125 |         XCTAssertEqual(dfa.match("bb".unicodeScalars), .unknown)
126 |     }
127 |     
128 |     func testScanner() {
129 |         enum Token {
130 |             case integer
131 |             case decimal
132 |             case identifier
133 |         }
134 |         
135 |         let scanner: [(RegularExpression, Token)] = [
136 |             (.digit + .digit*, .integer),
137 |             (.digit + .digit* + "." + .digit + .digit*, .decimal),
138 |             (.alpha + .alphanum*, .identifier),
139 |             ]
140 |         
141 |         measure {
142 |             let dfa = NFA<Token, ScalarClass>(scanner: scanner)
143 |                 .dfa.minimized
144 |             
145 |             XCTAssertEqual(dfa.match("134".unicodeScalars), [.integer])
146 |             XCTAssertEqual(dfa.match("61.613".unicodeScalars), [.decimal])
147 |             XCTAssertEqual(dfa.match("x1".unicodeScalars), [.identifier])
148 |             XCTAssertEqual(dfa.match("1xy".unicodeScalars), [])
149 |         }
150 |         
151 |         let dfa = NFA<Token, ScalarClass>(scanner: scanner).dfa.minimized
152 |         let source = "134 x3".unicodeScalars
153 |         var offset = source.startIndex
154 |         
155 |         while offset < source.endIndex {
156 |             let (token, match) = dfa.prefixMatch(source[offset...])
157 |             
158 |             if token.isEmpty {
159 |                 // no match, skip over character
160 |                 print("Skipping: '\(source[offset])'")
161 |                 offset = source.index(after: offset)
162 |             } else {
163 |                 offset = match.endIndex
164 |                 print(token.first!, String(match))
165 |             }
166 |         }
167 |     }
168 |     
169 |     func testTokenizer() {
170 |         enum Token {
171 |             case integer
172 |             case decimal
173 |             case identifier
174 |             case unknown
175 |         }
176 |         
177 |         let scanner: [(RegularExpression, Token)] = [
178 |             (.digit + .digit*, .integer),
179 |             (.digit + .digit* + "." + .digit + .digit*, .decimal),
180 |             (.alpha + .alphanum*, .identifier),
181 |             ]
182 |         
183 |         let trivia: RegularExpression = " " | "\t" | "\r" | "\n"
184 |         
185 |         let tokenizer = Tokenizer(tokens: scanner, trivia: trivia, unknown: .unknown)!
186 |         let tokens = tokenizer.tokenize("134 x3 !4x 41.4 ?ab".unicodeScalars)
187 |         
188 |         for (t, s) in tokens {
189 |             print("'\(String(s))' - \(t)")
190 |         }
191 |     }
192 | }
193 | 


--------------------------------------------------------------------------------
/Tests/CompilerKitTests/GrammarTests.swift:
--------------------------------------------------------------------------------
  1 | import XCTest
  2 | @testable import CompilerKit
  3 | 
  4 | final class GrammarTests: XCTestCase {
  5 |     enum Token: CustomStringConvertible {
  6 |         case plus, minus, multiply, divide
  7 |         case leftBracket, rightBracket
  8 |         case num, name
  9 |         case eof
 10 |         
 11 |         var description: String {
 12 |             func q(_ s: String) -> String { return "'\(s)'" }
 13 |             switch self {
 14 |             case .plus: return q("+")
 15 |             case .minus: return q("-")
 16 |             case .multiply: return q("*")
 17 |             case .divide: return q("/")
 18 |             case .leftBracket: return q("(")
 19 |             case .rightBracket: return q(")")
 20 |             case .name: return "name"
 21 |             case .num: return "num"
 22 |             case .eof: return "eof"
 23 |             }
 24 |         }
 25 |     }
 26 | 
 27 |     static let grammar = Grammar<Token>(
 28 |         productions: [
 29 |             // (0) Goal   -> Expr
 30 |             [[.nt(1), .t(.eof)]],
 31 |             
 32 |             // (1) Expr   -> Expr + Term
 33 |             //             | Expr - Term
 34 |             //             | Term
 35 |             [[.nt(1), .t(.plus), .nt(2)],
 36 |              [.nt(1), .t(.minus), .nt(2)],
 37 |              [.nt(2)]],
 38 |             
 39 |             // (2) Term   -> Term x Factor
 40 |             //             | Term / Factor
 41 |             //             | Factor
 42 |             [[.nt(2), .t(.multiply), .nt(3)],
 43 |              [.nt(2), .t(.divide), .nt(3)],
 44 |              [.nt(3)]],
 45 |             
 46 |             // (3) Factor -> ( Expr )
 47 |             //             | num
 48 |             //             | name
 49 |             [[.t(.leftBracket), .nt(1), .t(.rightBracket)],
 50 |              [.t(.num)],
 51 |              [.t(.name)]]
 52 |         ],
 53 |         start: 0
 54 |     )
 55 |     
 56 |     static let valid: [[Token]] = [
 57 |         [.num, .eof],
 58 |         [.num, .plus, .name, .eof],
 59 |         [.leftBracket, .num, .plus, .num, .rightBracket, .eof],
 60 |     ]
 61 |     
 62 |     static let invalid: [[Token]] = [
 63 |         // missing eof
 64 |         [.num],
 65 |         // unbalanced brackets
 66 |         [.leftBracket, .leftBracket, .rightBracket, .num, .rightBracket, .eof],
 67 |         // name followed by num
 68 |         [.name, .num, .eof],
 69 |     ]
 70 |     
 71 |     func testGrammar() {
 72 |         var g = GrammarTests.grammar
 73 |         
 74 |         g.eliminateLeftRecursion()
 75 |         XCTAssertEqual(g.productions.count, 6)
 76 |         
 77 |         let nullable = g.nullable()
 78 |         XCTAssertEqual(nullable, [[], [], [], [], [0], [0]])
 79 |         
 80 |         let first = g.first(nullable: nullable)
 81 |         XCTAssertEqual(first,
 82 |             [
 83 |                 [.num:      [0], .leftBracket: [0], .name: [0]],
 84 |                 [.num:      [0], .leftBracket: [0], .name: [0]],
 85 |                 [.num:      [0], .leftBracket: [0], .name: [0]],
 86 |                 [.num:      [1], .leftBracket: [0], .name: [2]],
 87 |                 [.plus:     [1], .minus:       [2]],
 88 |                 [.multiply: [1], .divide:      [2]],
 89 |             ])
 90 |         
 91 |         let follow = g.follow(nullable: nullable, first: first)
 92 |         XCTAssertEqual(follow, [
 93 |                 Set<Token>([]),
 94 |                 Set<Token>([.eof, .rightBracket]),
 95 |                 Set<Token>([.eof, .rightBracket, .plus, .minus]),
 96 |                 Set<Token>([.eof, .rightBracket, .plus, .minus, .multiply, .divide]),
 97 |                 Set<Token>([.eof, .rightBracket]),
 98 |                 Set<Token>([.eof, .rightBracket, .plus, .minus]),
 99 |             ])
100 |         
101 |         XCTAssert(g.isBacktrackFree(nullable: nullable, first: first, follow: follow))
102 |     }
103 |     
104 |     func testLLParserConstruction() {
105 |         let g = GrammarTests.grammar
106 |         
107 |         _ = LLParser(g)
108 |         
109 |         let parser = LLParser(g)
110 |         XCTAssertEqual(parser.table,
111 |             [
112 |                 [.num: 0, .leftBracket: 0, .name: 0],
113 |                 [.num: 0, .leftBracket: 0, .name: 0],
114 |                 [.num: 0, .leftBracket: 0, .name: 0],
115 |                 [.num: 1, .leftBracket: 0, .name: 2],
116 |                 [.rightBracket: 0, .plus: 1, .minus: 2, .eof: 0],
117 |                 [.rightBracket: 0, .minus: 0, .multiply: 1, .divide: 2, .plus: 0, .eof: 0]
118 |             ])
119 |     }
120 |     
121 |     func testLLParserCorrectness() {
122 |         let g = GrammarTests.grammar
123 |         let parser = LLParser(g)
124 |         
125 |         for s in GrammarTests.valid {
126 |             XCTAssert(parser.parse(s))
127 |         }
128 |         
129 |         for s in GrammarTests.invalid {
130 |             XCTAssertFalse(parser.parse(s))
131 |         }
132 |     }
133 |     
134 |     func testLRConstruction() {
135 |         let g = GrammarTests.grammar
136 |         _ = LRParser(slr: g)
137 |     }
138 | 
139 |     func testLRParserCorrectness() {
140 |         let g = GrammarTests.grammar
141 |         let parser = LRParser(slr: g)
142 | 
143 |         for s in GrammarTests.valid {
144 |             XCTAssert(parser.parse(s))
145 |         }
146 |         
147 |         for s in GrammarTests.invalid {
148 |             XCTAssertFalse(parser.parse(s))
149 |         }
150 |     }
151 | 
152 |     func testLALRParserCorrectness() {
153 |         let g = GrammarTests.grammar
154 |         let parser = LRParser(lalr: g)
155 |         
156 |         for s in GrammarTests.valid {
157 |             XCTAssert(parser.parse(s))
158 |         }
159 | 
160 |         for s in GrammarTests.invalid {
161 |             XCTAssertFalse(parser.parse(s))
162 |         }
163 |     }
164 | 
165 |     func testBacktrackingGrammar() {
166 |         var g = Grammar<Token>(productions:
167 |             [
168 |                 // (0) Goal   -> Expr
169 |                 [
170 |                     [.nt(1)],
171 |                 ],
172 |             
173 |                 // (1) Expr   -> Expr + Term
174 |                 //             | Expr - Term
175 |                 //             | Term
176 |                 [
177 |                     [.nt(1), .t(.plus), .nt(2)],
178 |                     [.nt(1), .t(.minus), .nt(2)],
179 |                     [.nt(2)],
180 |                 ],
181 |             
182 |                 // (2) Term   -> Term x Factor
183 |                 //             | Term / Factor
184 |                 //             | Factor
185 |                 [
186 |                     [.nt(2), .t(.multiply), .nt(3)],
187 |                     [.nt(2), .t(.divide), .nt(3)],
188 |                     [.nt(3)],
189 |                 ],
190 |             
191 |                 // (3) Factor -> ( Expr )
192 |                 //             | num
193 |                 //             | name
194 |                 [
195 |                     [.t(.leftBracket), .nt(1), .t(.rightBracket)],
196 |                     [.t(.num)],
197 |                     [.t(.name)],
198 |                     [.t(.name), .t(.leftBracket), .nt(4), .t(.rightBracket)],
199 |                 ],
200 |                 // (4) ArgList -> Expr
201 |                 [
202 |                     [.nt(1)]
203 |                 ],
204 |             ],
205 |             start: 0
206 |         )
207 |         
208 |         g.eliminateLeftRecursion()
209 |         XCTAssertEqual(g.productions.count, 7)
210 |         
211 |         // there are two productions of Factor starting with .name
212 |         let nullable = g.nullable()
213 |         let first = g.first(nullable: nullable)
214 |         let follow = g.follow(nullable: nullable, first: first)
215 | 
216 |         XCTAssertEqual(first[3][.name]?.count, 2)
217 |         
218 |         // ... which means that the grammar is NOT backtrack free
219 |         XCTAssert(!g.isBacktrackFree(nullable: nullable, first: first, follow: follow))
220 |         
221 |         g.leftRefactor()
222 |         let newNullable = g.nullable()
223 |         let newFirst = g.first(nullable: newNullable)
224 |         let newFollow = g.follow(nullable: newNullable, first: newFirst)
225 |         XCTAssert(g.isBacktrackFree(nullable: newNullable, first: newFirst, follow: newFollow))
226 |     }
227 |     
228 |     func testLALR() {
229 |         enum Token: String, Hashable {
230 |             case lb, rb, id, plus, mult
231 |         }
232 |         
233 |         func constructItemSet(_ s: [(Int, Int, Int)]) -> Set<LRParser<Token>.Item> {
234 |             return Set(s.map(LRParser<Token>.Item.init))
235 |         }
236 |         
237 |         func constructItemSets(_ s: [[(Int, Int, Int)]]) -> Set<Set<LRParser<Token>.Item>> {
238 |             return Set(s.map(constructItemSet))
239 |         }
240 |         
241 |         func constructTransition(_ s: Set<LRParser<Token>.Item>, _ nt: Int) -> LRParser<Token>.Transition {
242 |             return LRParser<Token>.Transition(state: s, nt: nt)
243 |         }
244 |         
245 |         func constructTransitionSet(_ s: [(Set<LRParser<Token>.Item>, Int)]) -> Set<LRParser<Token>.Transition> {
246 |             return Set(s.map(constructTransition))
247 |         }
248 |         
249 |         // This is Grammar 4.19 from the Dragon book
250 |         // 0,0    E -> E + T
251 |         // 0,1    E -> T
252 |         // 1,0    T -> T * F
253 |         // 1,1    T -> F
254 |         // 2,0    F -> (E)
255 |         // 2,1    F -> id
256 |         // 3,0    E' -> E
257 |         let g = Grammar<Token>(productions: [
258 |                 // E -> E + T | T
259 |                 [[.nt(0), .t(.plus), .nt(1)], [.nt(1)]],
260 |                 // T -> T * F | F
261 |                 [[.nt(1), .t(.mult), .nt(2)], [.nt(2)]],
262 |                 // F -> (E) | id
263 |                 [[.t(.lb), .nt(0), .t(.rb)], [.t(.id)]],
264 |             ],
265 |                         start: 0)
266 |         let grammar = g.augmented
267 |         
268 |         // Item sets in an ordered array in the same order as the Dragon book
269 |         // See Fig 4.35 in Dragon book for list of items (I0 to I11)
270 |         let I = [
271 |             /* I0  */ [(1, 0, 0), (0, 1, 0), (0, 0, 0), (2, 0, 0), (1, 1, 0), (2, 1, 0), (3, 0, 0)],
272 |             /* I1  */ [(0, 0, 1), (3, 0, 1)],
273 |             /* I2  */ [(0, 1, 1), (1, 0, 1)],
274 |             /* I3  */ [(1, 1, 1)],
275 |             /* I4  */ [(1, 0, 0), (2, 0, 1), (0, 1, 0), (0, 0, 0), (2, 0, 0), (1, 1, 0), (2, 1, 0)],
276 |             /* I5  */ [(2, 1, 1)],
277 |             /* I6  */ [(1, 0, 0), (2, 0, 0), (1, 1, 0), (2, 1, 0), (0, 0, 2)],
278 |             /* I7  */ [(1, 0, 2), (2, 0, 0), (2, 1, 0)],
279 |             /* I8  */ [(0, 0, 1), (2, 0, 2)],
280 |             /* I9  */ [(1, 0, 1), (0, 0, 3)],
281 |             /* I10 */ [(1, 0, 3)],
282 |             /* I11 */ [(2, 0, 3)],
283 |         ].map(constructItemSet)
284 |         
285 |         let allNodes = Set(grammar.productions.flatMap { $0.flatMap { $0 } })
286 |         let nullable = grammar.nullable()
287 |         
288 |         // The LR(0) item sets or "canonical set of LR(0) items"
289 |         let startItem = LRParser<Token>.Item(term: grammar.productions.count - 1, production: 0, position: 0)
290 |         let itemSets = LRParser.itemSets(grammar, startItem, allNodes)
291 |         let expectedItemSets = Set(I)
292 |         XCTAssertEqual(itemSets, expectedItemSets)
293 |         
294 |         // goto from state I1 {[E' -> E.], [E -> E. + T]} by token '+'...
295 |         let gotoSet = LRParser.goto(grammar, I[1], .t(.plus))
296 |         
297 |         // ...and expect to land in state I6
298 |         XCTAssertEqual(gotoSet, I[6])
299 |         
300 |         let allTransitions = LRParser.allTransitions(grammar, itemSets)
301 |         let expectedTransitions = constructTransitionSet([
302 |             (I[0], 0), (I[0], 1), (I[0], 2),
303 |             (I[4], 0), (I[4], 1), (I[4], 2),
304 |             (I[6], 1), (I[6], 2),
305 |             (I[7], 2),
306 |         ])
307 |         
308 |         XCTAssertEqual(allTransitions, expectedTransitions)
309 |         
310 |         // In the conventions of the paper by DeRemer & Pennello (1982),
311 |         // this is a transition (I4, E) - with state I4, nonterminal E.
312 |         // This transition lands us in state I8 {[F -> ( E .)], [E -> E .+ T]}
313 |         let t = constructTransition(I[4], 0)
314 |         let drTerminals = LRParser.directRead(grammar, t)
315 |         XCTAssertEqual(drTerminals, [.plus, .rb])
316 |         
317 |         let reads = Dictionary(allTransitions) { LRParser.reads(grammar, nullable, $0) }
318 |         let directRead = Dictionary(allTransitions) { LRParser.directRead(grammar, $0) }
319 |         let indirectReads = LRParser<Token>.digraph(allTransitions, reads, directRead)
320 | 
321 |         // Without nullable terms, the 'reads' relationship is identical to direct read
322 |         // TODO: test this with a grammar that has nullable rules
323 |         XCTAssertEqual(directRead, indirectReads)
324 |         
325 |         let expectedFollowSets: [LRParser<Token>.Transition: Set<Token>] = [
326 |             constructTransition(I[0], 0): [.plus],
327 |             constructTransition(I[0], 1): [.mult, .plus],
328 |             constructTransition(I[0], 2): [.mult, .plus],
329 |             constructTransition(I[4], 0): [.plus, .rb],
330 |             constructTransition(I[4], 1): [.mult, .plus, .rb],
331 |             constructTransition(I[4], 2): [.mult, .plus, .rb],
332 |             constructTransition(I[6], 1): [.mult, .plus, .rb],
333 |             constructTransition(I[6], 2): [.mult, .plus, .rb],
334 |             constructTransition(I[7], 2): [.mult, .plus, .rb],
335 |         ]
336 |         let includes = Dictionary(allTransitions) { LRParser.includes(grammar, nullable, $0, allTransitions) }
337 |         let followSets = LRParser<Token>.digraph(allTransitions, includes, indirectReads)
338 |         XCTAssertEqual(expectedFollowSets, followSets)
339 |         
340 |         // make a list of all possible reduction items: [A -> w.]
341 |         var reductions: [(Set<LRParser<Token>.Item>, LRParser<Token>.Item)] = []
342 |         let prods = grammar.productions
343 |         for term in 0..<prods.count {
344 |             for production in 0..<prods[term].count {
345 |                 let r = LRParser<Token>.Item(term: term, production: production, position: prods[term][production].count)
346 |                 for state in itemSets where state.contains(r) {
347 |                     reductions.append((state, r))
348 |                 }
349 |             }
350 |         }
351 | 
352 |         let lookbacks = reductions.map { LRParser<Token>.lookback(grammar, $0.0, $0.1, allTransitions) }
353 |         let expectedLookbacks: [Set<LRParser<Token>.Transition>] = [
354 |             constructTransitionSet([(I[4], 0), (I[0], 0)]),
355 |             constructTransitionSet([(I[4], 0), (I[0], 0)]),
356 |             constructTransitionSet([(I[6], 1), (I[4], 1), (I[0], 1)]),
357 |             constructTransitionSet([(I[6], 1), (I[4], 1), (I[0], 1)]),
358 |             constructTransitionSet([(I[6], 2), (I[0], 2), (I[7], 2), (I[4], 2)]),
359 |             constructTransitionSet([(I[6], 2), (I[0], 2), (I[7], 2), (I[4], 2)]),
360 |             [],
361 |         ]
362 |         
363 |         XCTAssertEqual(lookbacks, expectedLookbacks)
364 | 
365 |         let lookaheads: [Set<Token>] = reductions.map { state, reduction in
366 |             var la: Set<Token> = []
367 |             for transition in LRParser.lookback(grammar, state, reduction, allTransitions) {
368 |                 la.formUnion(followSets[transition]!)
369 |             }
370 |             return la
371 |         }
372 |         let expectedLookaheads: [Set<Token>] = [
373 |             [.plus, .rb],
374 |             [.plus, .rb],
375 |             [.mult, .plus, .rb],
376 |             [.mult, .plus, .rb],
377 |             [.mult, .plus, .rb],
378 |             [.mult, .plus, .rb],
379 |             []
380 |         ]
381 |         XCTAssertEqual(lookaheads, expectedLookaheads)
382 |     }
383 | }
384 | 


--------------------------------------------------------------------------------
/Tests/LinuxMain.swift:
--------------------------------------------------------------------------------
 1 | @testable import CompilerKitTests
 2 | import XCTest
 3 | 
 4 | extension FiniteStateTests {
 5 |     static var allTests: [(String, (FiniteStateTests) -> () throws -> Void)] = [
 6 |         ("testNFA", testNFA),
 7 |         ("testRegularExpression", testRegularExpression),
 8 |         ("testDFA", testDFA),
 9 |         ("testRegularExpressionToDFAMatch", testRegularExpressionToDFAMatch),
10 |         ("testRegularExpressionToMinimizedDFAMatch", testRegularExpressionToMinimizedDFAMatch),
11 |         ("testMultiAcceptingStatesDFA", testMultiAcceptingStatesDFA),
12 |         ("testScanner", testScanner),
13 |     ]
14 | }
15 | 
16 | extension GrammarTests {
17 |     static var allTests: [(String, (GrammarTests) -> () throws -> Void)] = [
18 |         ("testGrammar", testGrammar),
19 |         ("testLLParserConstruction", testLLParserConstruction),
20 |         ("testLLParserCorrectness", testLLParserCorrectness),
21 |         ("testLRConstruction", testLRConstruction),
22 |         ("testLRParserCorrectness", testLRParserCorrectness),
23 |         ("testLALRParserCorrectness", testLALRParserCorrectness),
24 |         ("testBacktrackingGrammar", testBacktrackingGrammar),
25 |         ("testLALR", testLALR),
26 |     ]
27 | }
28 | 
29 | XCTMain([
30 | 		testCase(FiniteStateTests.allTests),
31 | 		testCase(GrammarTests.allTests),
32 | 	])
33 | 


--------------------------------------------------------------------------------