├── .gitignore
├── 01
└── TryRegex
│ ├── .swiftpm
│ └── xcode
│ │ └── package.xcworkspace
│ │ └── contents.xcworkspacedata
│ ├── Package.swift
│ ├── Sources
│ ├── RegexParser
│ │ └── Regex
│ │ │ ├── AST
│ │ │ ├── AST.swift
│ │ │ ├── Atom.swift
│ │ │ ├── Group.swift
│ │ │ └── Quantification.swift
│ │ │ └── Parse
│ │ │ ├── LexicalAnalysis.swift
│ │ │ ├── Parse.swift
│ │ │ └── Source.swift
│ └── StringProcessing
│ │ ├── ByteCodeGen.swift
│ │ ├── Compiler.swift
│ │ ├── Engine
│ │ ├── Backtracking.swift
│ │ ├── InstPayload.swift
│ │ ├── Instruction.swift
│ │ ├── MEBuilder.swift
│ │ ├── MEBuiltins.swift
│ │ ├── MEProgram.swift
│ │ ├── Processor.swift
│ │ └── Registers.swift
│ │ ├── Executor.swift
│ │ └── Regex
│ │ ├── ASTConversion.swift
│ │ ├── Core.swift
│ │ ├── DSLTree.swift
│ │ └── Match.swift
│ └── Tests
│ └── RegexTests
│ ├── MatchTests.swift
│ └── PerformanceTests.swift
├── 02
└── TryRegex
│ ├── .swiftpm
│ └── xcode
│ │ └── package.xcworkspace
│ │ └── contents.xcworkspacedata
│ ├── Package.swift
│ ├── Sources
│ ├── RegexParser
│ │ └── Regex
│ │ │ ├── AST
│ │ │ ├── AST.swift
│ │ │ ├── Atom.swift
│ │ │ ├── Group.swift
│ │ │ └── Quantification.swift
│ │ │ └── Parse
│ │ │ ├── LexicalAnalysis.swift
│ │ │ ├── Parse.swift
│ │ │ └── Source.swift
│ └── StringProcessing
│ │ ├── ByteCodeGen.swift
│ │ ├── Compiler.swift
│ │ ├── Engine
│ │ ├── Backtracking.swift
│ │ ├── InstPayload.swift
│ │ ├── Instruction.swift
│ │ ├── MEBuilder.swift
│ │ ├── MEBuiltins.swift
│ │ ├── MEProgram.swift
│ │ ├── Processor.swift
│ │ └── Registers.swift
│ │ ├── Executor.swift
│ │ └── Regex
│ │ ├── ASTConversion.swift
│ │ ├── Core.swift
│ │ ├── DSLTree.swift
│ │ └── Match.swift
│ └── Tests
│ └── RegexTests
│ ├── MatchTests.swift
│ └── PerformanceTests.swift
├── LICENSE
└── README.md
/.gitignore:
--------------------------------------------------------------------------------
1 | ### Generated by gibo (https://github.com/simonwhitaker/gibo)
2 | ### https://raw.github.com/github/gitignore/76e40b7cecb059211e360822247bf0b6e585d1eb/Global/macOS.gitignore
3 |
4 | # General
5 | .DS_Store
6 | .AppleDouble
7 | .LSOverride
8 |
9 | # Icon must end with two \r
10 | Icon
11 |
12 | # Thumbnails
13 | ._*
14 |
15 | # Files that might appear in the root of a volume
16 | .DocumentRevisions-V100
17 | .fseventsd
18 | .Spotlight-V100
19 | .TemporaryItems
20 | .Trashes
21 | .VolumeIcon.icns
22 | .com.apple.timemachine.donotpresent
23 |
24 | # Directories potentially created on remote AFP share
25 | .AppleDB
26 | .AppleDesktop
27 | Network Trash Folder
28 | Temporary Items
29 | .apdisk
30 | ### Generated by gibo (https://github.com/simonwhitaker/gibo)
31 | ### https://raw.github.com/github/gitignore/76e40b7cecb059211e360822247bf0b6e585d1eb/Swift.gitignore
32 |
33 | # Xcode
34 | #
35 | # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore
36 |
37 | ## User settings
38 | xcuserdata/
39 |
40 | ## Obj-C/Swift specific
41 | *.hmap
42 |
43 | ## App packaging
44 | *.ipa
45 | *.dSYM.zip
46 | *.dSYM
47 |
48 | ## Playgrounds
49 | timeline.xctimeline
50 | playground.xcworkspace
51 |
52 | # Swift Package Manager
53 | #
54 | # Add this line if you want to avoid checking in source code from Swift Package Manager dependencies.
55 | # Packages/
56 | # Package.pins
57 | # Package.resolved
58 | # *.xcodeproj
59 | #
60 | # Xcode automatically generates this directory with a .xcworkspacedata file and xcuserdata
61 | # hence it is not needed unless you have added a package configuration file to your project
62 | # .swiftpm
63 |
64 | .build/
65 |
66 | # CocoaPods
67 | #
68 | # We recommend against adding the Pods directory to your .gitignore. However
69 | # you should judge for yourself, the pros and cons are mentioned at:
70 | # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
71 | #
72 | # Pods/
73 | #
74 | # Add this line if you want to avoid checking in source code from the Xcode workspace
75 | # *.xcworkspace
76 |
77 | # Carthage
78 | #
79 | # Add this line if you want to avoid checking in source code from Carthage dependencies.
80 | # Carthage/Checkouts
81 |
82 | Carthage/Build/
83 |
84 | # fastlane
85 | #
86 | # It is recommended to not store the screenshots in the git repo.
87 | # Instead, use fastlane to re-generate the screenshots whenever they are needed.
88 | # For more information about the recommended setup visit:
89 | # https://docs.fastlane.tools/best-practices/source-control/#source-control
90 |
91 | fastlane/report.xml
92 | fastlane/Preview.html
93 | fastlane/screenshots/**/*.png
94 | fastlane/test_output
95 |
--------------------------------------------------------------------------------
/01/TryRegex/.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/01/TryRegex/Package.swift:
--------------------------------------------------------------------------------
1 | // swift-tools-version: 5.5
2 | // The swift-tools-version declares the minimum version of Swift required to build this package.
3 |
4 | import PackageDescription
5 |
6 | let package = Package(
7 | name: "TryRegex",
8 | products: [
9 | .library(
10 | name: "StringProcessing",
11 | targets: ["StringProcessing"]
12 | ),
13 | .library(
14 | name: "RegexParser",
15 | targets: ["RegexParser"]
16 | ),
17 | ],
18 | targets: [
19 | .target(
20 | name: "RegexParser",
21 | dependencies: []
22 | ),
23 | .target(
24 | name: "StringProcessing",
25 | dependencies: [
26 | "RegexParser",
27 | ]
28 | ),
29 | .testTarget(
30 | name: "RegexTests",
31 | dependencies: [
32 | "StringProcessing",
33 | ]
34 | ),
35 | ]
36 | )
37 |
--------------------------------------------------------------------------------
/01/TryRegex/Sources/RegexParser/Regex/AST/AST.swift:
--------------------------------------------------------------------------------
1 | public struct AST {
2 | public var root: AST.Node
3 |
4 | public init(_ root: AST.Node) {
5 | self.root = root
6 | }
7 | }
8 |
9 | extension AST {
10 | public indirect enum Node {
11 | case alternation(Alternation)
12 | case concatenation(Concatenation)
13 | case group(Group)
14 | case quantification(Quantification)
15 | case atom(Atom)
16 | case empty(Empty)
17 | }
18 | }
19 |
20 | extension AST {
21 | public struct Alternation {
22 | public let children: [AST.Node]
23 |
24 | public init(_ mems: [AST.Node]) {
25 | self.children = mems
26 | }
27 | }
28 |
29 | public struct Concatenation {
30 | public let children: [AST.Node]
31 |
32 | public init(_ mems: [AST.Node]) {
33 | self.children = mems
34 | }
35 | }
36 |
37 | public struct Empty {}
38 | }
39 |
--------------------------------------------------------------------------------
/01/TryRegex/Sources/RegexParser/Regex/AST/Atom.swift:
--------------------------------------------------------------------------------
1 | extension AST {
2 | public struct Atom {
3 | public let kind: Kind
4 |
5 | public init(_ k: Kind) {
6 | self.kind = k
7 | }
8 |
9 | public enum Kind {
10 | case char(Character)
11 | }
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/01/TryRegex/Sources/RegexParser/Regex/AST/Group.swift:
--------------------------------------------------------------------------------
1 | extension AST {
2 | public struct Group {
3 | public let kind: Kind
4 | public let child: AST.Node
5 |
6 | public init(_ kind: Kind, _ child: AST.Node) {
7 | self.kind = kind
8 | self.child = child
9 | }
10 |
11 | public enum Kind {
12 | case capture
13 | }
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/01/TryRegex/Sources/RegexParser/Regex/AST/Quantification.swift:
--------------------------------------------------------------------------------
1 | extension AST {
2 | public struct Quantification {
3 | public let amount: Amount
4 | public let child: AST.Node
5 |
6 | public init(_ amount: Amount, _ child: AST.Node) {
7 | self.amount = amount
8 | self.child = child
9 | }
10 |
11 | public enum Amount: Hashable {
12 | case zeroOrMore // *
13 | }
14 | }
15 | }
16 |
17 | extension AST.Quantification.Amount {
18 | public var bounds: (atLeast: Int?, atMost: Int?) {
19 | switch self {
20 | case .zeroOrMore: return (0, nil)
21 | }
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/01/TryRegex/Sources/RegexParser/Regex/Parse/LexicalAnalysis.swift:
--------------------------------------------------------------------------------
1 | extension Parser {
2 | typealias Char = Source.Char
3 | }
4 |
5 | extension Parser {
6 | mutating func tryEating(
7 | _ body: (inout Self) -> T?
8 | ) -> T? {
9 | guard let result = body(&self) else {
10 | return nil
11 | }
12 | return result
13 | }
14 | }
15 |
16 | extension Parser {
17 | typealias Quant = AST.Quantification
18 |
19 | @discardableResult
20 | mutating func expect(_ c: Character) -> Bool {
21 | guard tryEat(c) else {
22 | return false
23 | }
24 | return true
25 | }
26 |
27 | func peek() -> Char? { src.peek() }
28 |
29 | mutating func advance(_ n: Int = 1) {
30 | guard src.tryAdvance(n) else {
31 | fatalError("Advancing beyond end!")
32 | }
33 | }
34 |
35 | mutating func tryEat() -> Char? {
36 | guard let char = peek() else { return nil }
37 | advance()
38 | return char
39 | }
40 |
41 | mutating func tryEat(_ c: Char) -> Bool {
42 | guard peek() == c else { return false }
43 | advance()
44 | return true
45 | }
46 | }
47 |
48 | extension Parser {
49 | mutating func lexQuantifier() -> Quant.Amount? {
50 | tryEating { p in
51 | let amt: Quant.Amount? = {
52 | if p.tryEat("*") { return .zeroOrMore }
53 | return nil
54 | }()
55 | guard let amt = amt else { return nil }
56 |
57 | return amt
58 | }
59 | }
60 |
61 | mutating func lexGroupStart() -> AST.Group.Kind? {
62 | tryEating { p in
63 | guard p.tryEat("(") else { return nil }
64 | return .capture
65 | }
66 | }
67 |
68 | mutating func lexAtom() -> AST.Atom? {
69 | if src.isEmpty { return nil }
70 | if (peek() == ")" || peek() == "|") { return nil }
71 |
72 | guard let char = tryEat() else {
73 | fatalError("Unexpected end of input")
74 | }
75 |
76 | return AST.Atom(.char(char))
77 | }
78 | }
79 |
--------------------------------------------------------------------------------
/01/TryRegex/Sources/RegexParser/Regex/Parse/Parse.swift:
--------------------------------------------------------------------------------
1 | struct Parser {
2 | var src: Source
3 |
4 | init(_ src: Source) {
5 | self.src = src
6 | }
7 | }
8 |
9 | extension Parser {
10 | mutating func parse() -> AST {
11 | let ast = parseNode()
12 |
13 | if !src.isEmpty {
14 | if tryEat(")") {
15 | fatalError("closing ')' does not balance any groups openings")
16 | } else {
17 | fatalError("Unhandled termination condition")
18 | }
19 | }
20 |
21 | return .init(ast)
22 | }
23 |
24 | mutating func parseNode() -> AST.Node {
25 | if src.isEmpty {
26 | return .empty(.init())
27 | }
28 |
29 | var result = [parseConcatenation()]
30 | while true {
31 | guard tryEat("|") else { break }
32 | result.append(parseConcatenation())
33 | }
34 |
35 | if result.count == 1 {
36 | return result[0]
37 | }
38 |
39 | return .alternation(.init(result))
40 | }
41 |
42 | mutating func parseConcatenation() -> AST.Node {
43 | var result = [AST.Node]()
44 |
45 | while true {
46 | if src.isEmpty {
47 | break
48 | }
49 | if peek() == "|" || peek() == ")" {
50 | break
51 | }
52 |
53 | if let operand = parseQuantifierOperand() {
54 | if let amt = lexQuantifier() {
55 | result.append(
56 | .quantification(.init(amt, operand))
57 | )
58 | } else {
59 | result.append(operand)
60 | }
61 | continue
62 | }
63 |
64 | fatalError("Should have parsed at least an atom")
65 | break
66 | }
67 | guard !result.isEmpty else {
68 | return .empty(.init())
69 | }
70 | if result.count == 1 {
71 | return result[0]
72 | }
73 |
74 | return .concatenation(.init(result))
75 | }
76 |
77 | mutating func parseGroupBody(
78 | _ kind: AST.Group.Kind
79 | ) -> AST.Group {
80 | let child = parseNode()
81 | expect(")")
82 | return .init(kind, child)
83 | }
84 |
85 | mutating func parseQuantifierOperand() -> AST.Node? {
86 | if let kind = lexGroupStart() {
87 | return .group(parseGroupBody(kind))
88 | }
89 |
90 | if let atom = lexAtom() {
91 | return .atom(atom)
92 | }
93 |
94 | return nil
95 | }
96 | }
97 |
98 | public func parse(_ regex: S) -> AST where S.SubSequence == Substring {
99 | let source = Source(String(regex))
100 | var parser = Parser(source)
101 | return parser.parse()
102 | }
103 |
--------------------------------------------------------------------------------
/01/TryRegex/Sources/RegexParser/Regex/Parse/Source.swift:
--------------------------------------------------------------------------------
1 | public struct Source {
2 | var input: Input
3 | var bounds: Range
4 |
5 | init(_ str: Input) {
6 | self.input = str
7 | self.bounds = str.startIndex.. Char? { _slice.first }
22 |
23 | @discardableResult
24 | mutating func tryAdvance(_ n: Int = 1) -> Bool {
25 | guard n > 0, let newLower = _slice.index(
26 | bounds.lowerBound, offsetBy: n, limitedBy: bounds.upperBound
27 | )
28 | else {
29 | return false
30 | }
31 | self.bounds = newLower.. MEProgram {
11 | try emitNode(root)
12 | builder.buildAccept()
13 |
14 | return try builder.assemble()
15 | }
16 | }
17 |
18 | fileprivate extension Compiler.ByteCodeGen {
19 | mutating func emitAtom(_ a: DSLTree.Atom) throws {
20 | switch a {
21 | case let .char(c):
22 | emitCharacter(c)
23 | }
24 | }
25 |
26 | mutating func emitCharacter(_ c: Character) {
27 | builder.buildMatch(c)
28 | }
29 |
30 | mutating func emitAlternationGen(
31 | _ elements: C,
32 | withBacktracking: Bool,
33 | _ body: (inout Compiler.ByteCodeGen, C.Element) throws -> Void
34 | ) rethrows {
35 | let done = builder.makeAddress()
36 | for element in elements.dropLast() {
37 | let next = builder.makeAddress()
38 | builder.buildSave(next)
39 | try body(&self, element)
40 | builder.buildBranch(to: done)
41 | builder.label(next)
42 | }
43 | try body(&self, elements.last!)
44 | builder.label(done)
45 | }
46 |
47 | mutating func emitAlternation(
48 | _ children: [DSLTree.Node]
49 | ) throws {
50 | try emitAlternationGen(children, withBacktracking: true) {
51 | try $0.emitNode($1)
52 | }
53 | }
54 |
55 | mutating func emitConcatenationComponent(
56 | _ node: DSLTree.Node
57 | ) throws {
58 | try emitNode(node)
59 | }
60 |
61 | mutating func emitQuantification(
62 | _ amount: AST.Quantification.Amount,
63 | _ child: DSLTree.Node
64 | ) throws {
65 | let (low, _) = amount.bounds
66 | guard let low = low else {
67 | fatalError("Must have a lower bound")
68 | }
69 |
70 | let minTrips = low
71 |
72 | let minTripsControl = builder.makeAddress()
73 | let loopBody = builder.makeAddress()
74 | let exitPolicy = builder.makeAddress()
75 | let exit = builder.makeAddress()
76 |
77 | builder.label(minTripsControl)
78 | switch minTrips {
79 | case 0: builder.buildBranch(to: exitPolicy)
80 | default: break
81 | }
82 |
83 | builder.label(loopBody)
84 | try emitNode(child)
85 |
86 | builder.label(exitPolicy)
87 |
88 | builder.buildSplit(to: loopBody, saving: exit)
89 | builder.label(exit)
90 | }
91 |
92 | mutating func emitConcatenation(_ children: [DSLTree.Node]) throws {
93 | for child in children {
94 | try emitConcatenationComponent(child)
95 | }
96 | }
97 |
98 | mutating func emitNode(_ node: DSLTree.Node) throws {
99 | switch node {
100 | case let .orderedChoice(children):
101 | try emitAlternation(children)
102 |
103 | case let .concatenation(children):
104 | try emitConcatenation(children)
105 |
106 | case .capture(let child):
107 | try emitNode(child)
108 |
109 | case let .quantification(amt, child):
110 | try emitQuantification(amt.ast, child)
111 |
112 | case let .atom(a):
113 | try emitAtom(a)
114 |
115 | case .empty:
116 | return
117 | }
118 | }
119 | }
120 |
--------------------------------------------------------------------------------
/01/TryRegex/Sources/StringProcessing/Compiler.swift:
--------------------------------------------------------------------------------
1 | internal import RegexParser
2 |
3 | class Compiler {
4 | let tree: DSLTree
5 |
6 | init(ast: AST) {
7 | self.tree = ast.dslTree
8 | }
9 |
10 | init(tree: DSLTree) {
11 | self.tree = tree
12 | }
13 |
14 | __consuming func emit() throws -> MEProgram {
15 | var codegen = ByteCodeGen()
16 | return try codegen.emitRoot(tree.root)
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/01/TryRegex/Sources/StringProcessing/Engine/Backtracking.swift:
--------------------------------------------------------------------------------
1 | extension Processor {
2 | struct SavePoint {
3 | var pc: InstructionAddress
4 | var pos: Position?
5 |
6 | var destructure: (
7 | pc: InstructionAddress,
8 | pos: Position?
9 | ) {
10 | return (pc, pos)
11 | }
12 | }
13 |
14 | func makeSavePoint(
15 | resumingAt pc: InstructionAddress
16 | ) -> SavePoint {
17 | SavePoint(
18 | pc: pc,
19 | pos: currentPosition
20 | )
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/01/TryRegex/Sources/StringProcessing/Engine/InstPayload.swift:
--------------------------------------------------------------------------------
1 | extension Instruction {
2 | enum Payload {
3 | case none
4 | case int(Int)
5 | case element(Character)
6 | case addr(InstructionAddress)
7 | case pairedImmediateInt(UInt64, Int)
8 | case pairedAddrInt(InstructionAddress, Int)
9 | case pairedAddrAddr(InstructionAddress, InstructionAddress)
10 | }
11 | }
12 |
13 | extension Instruction.Payload {
14 | init(int: Int) {
15 | self = .int(int)
16 | }
17 | var int: Int {
18 | guard case .int(let value) = self else {
19 | fatalError("Payload is not an operand.")
20 | }
21 | return value
22 | }
23 |
24 | init(element: Character) {
25 | self = .element(element)
26 | }
27 | var elementPayload: Character {
28 | guard case .element(let element) = self else {
29 | fatalError("Payload is not an operand.")
30 | }
31 | return element
32 | }
33 |
34 | init(addr: InstructionAddress) {
35 | self = .addr(addr)
36 | }
37 | var addr: InstructionAddress {
38 | guard case .addr(let addr) = self else {
39 | fatalError("Payload is not an address.")
40 | }
41 | return addr
42 | }
43 |
44 | init(immediate: UInt64, int: IntRegister) {
45 | self = .pairedImmediateInt(immediate, int)
46 | }
47 | var pairedImmediateInt: (UInt64, IntRegister) {
48 | guard case .pairedImmediateInt(let i, let r) = self else {
49 | fatalError("Payload is not an address pair.")
50 | }
51 | return (i, r)
52 | }
53 |
54 | init(addr: InstructionAddress, int: Int) {
55 | self = .pairedAddrInt(addr, int)
56 | }
57 | var pairedAddrInt: (InstructionAddress, Int) {
58 | guard case .pairedAddrInt(let addr, let i) = self else {
59 | fatalError("Payload is not an address pair.")
60 | }
61 | return (addr, i)
62 | }
63 |
64 | init(addr: InstructionAddress, addr2: InstructionAddress) {
65 | self = .pairedAddrAddr(addr, addr2)
66 | }
67 | var pairedAddrAddr: (InstructionAddress, InstructionAddress) {
68 | guard case .pairedAddrAddr(let a1, let a2) = self else {
69 | fatalError("Payload is not an address pair.")
70 | }
71 | return (a1, a2)
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/01/TryRegex/Sources/StringProcessing/Engine/Instruction.swift:
--------------------------------------------------------------------------------
1 | struct Instruction {
2 | let opcode: Instruction.OpCode
3 | let payload: Instruction.Payload
4 |
5 | init(_ opcode: Instruction.OpCode) {
6 | self.init(opcode, .none)
7 | }
8 |
9 | init(_ opcode: Instruction.OpCode, _ payload: Instruction.Payload) {
10 | self.opcode = opcode
11 | self.payload = payload
12 | }
13 | }
14 |
15 | extension Instruction {
16 | enum OpCode: UInt64 {
17 | case invalid = 0
18 |
19 | case branch
20 |
21 | case match
22 |
23 | case save
24 |
25 | case splitSaving
26 |
27 | case accept
28 | }
29 | }
30 |
31 | extension Instruction {
32 | var destructure: (opcode: OpCode, payload: Payload) {
33 | get { (opcode, payload) }
34 | set { self = Self(opcode, payload) }
35 | }
36 | }
37 |
38 | enum State {
39 | case inProgress
40 | case fail
41 | case accept
42 | }
43 |
--------------------------------------------------------------------------------
/01/TryRegex/Sources/StringProcessing/Engine/MEBuilder.swift:
--------------------------------------------------------------------------------
1 | internal import RegexParser // For errors
2 |
3 | typealias InstructionAddress = Int
4 |
5 | extension MEProgram {
6 | struct Builder {
7 | var instructions: [Instruction] = []
8 |
9 | var elements = Array()
10 |
11 | var addressTokens: [InstructionAddress?] = []
12 | var addressFixups: [(InstructionAddress, AddressFixup)] = []
13 |
14 | var nextIntRegister = IntRegister(0)
15 | }
16 | }
17 |
18 | extension MEProgram.Builder {
19 | struct AddressFixup {
20 | var first: AddressToken
21 | var second: AddressToken? = nil
22 |
23 | init(_ a: AddressToken) { self.first = a }
24 | init(_ a: AddressToken, _ b: AddressToken) {
25 | self.first = a
26 | self.second = b
27 | }
28 | }
29 | }
30 |
31 | extension MEProgram.Builder {
32 | mutating func buildBranch(to t: AddressToken) {
33 | instructions.append(.init(.branch))
34 | fixup(to: t)
35 | }
36 |
37 | mutating func buildSave(_ t: AddressToken) {
38 | instructions.append(.init(.save))
39 | fixup(to: t)
40 | }
41 |
42 | mutating func buildSplit(
43 | to: AddressToken, saving: AddressToken
44 | ) {
45 | instructions.append(.init(.splitSaving))
46 | fixup(to: (to, saving))
47 | }
48 |
49 | mutating func buildMatch(_ e: Character) {
50 | instructions.append(.init(.match, .init(element: e)))
51 | }
52 |
53 | mutating func buildAccept() {
54 | instructions.append(.init(.accept))
55 | }
56 |
57 | mutating func assemble() throws -> MEProgram {
58 | var instructions = instructions
59 | for (instAddr, tok) in addressFixups {
60 | let inst = instructions[instAddr]
61 | let addr = addressTokens[tok.first]!
62 | let payload: Instruction.Payload
63 |
64 | switch inst.opcode {
65 | case .branch, .save:
66 | payload = .init(addr: addr)
67 | case .splitSaving:
68 | guard let fix2 = tok.second else {
69 | fatalError()
70 | }
71 | let saving = addressTokens[fix2]!
72 | payload = .init(addr: addr, addr2: saving)
73 | default:
74 | fatalError()
75 | }
76 |
77 | instructions[instAddr] = .init(inst.opcode, payload)
78 | }
79 |
80 | let regs = Processor.Registers(
81 | numInts: nextIntRegister
82 | )
83 |
84 | let program = MEProgram(
85 | instructions: instructions,
86 | registers: regs
87 | )
88 | return program
89 | }
90 | }
91 |
92 | extension MEProgram.Builder {
93 | enum _AddressToken {}
94 | typealias AddressToken = Int
95 |
96 | mutating func makeAddress() -> AddressToken {
97 | defer { addressTokens.append(nil) }
98 | return AddressToken(addressTokens.count)
99 | }
100 |
101 | mutating func label(_ t: AddressToken) {
102 | addressTokens[t] = InstructionAddress(instructions.count)
103 | }
104 |
105 | mutating func fixup(to t: AddressToken) {
106 | assert(!instructions.isEmpty)
107 | addressFixups.append(
108 | (InstructionAddress(instructions.endIndex - 1), .init(t)))
109 | }
110 |
111 | mutating func fixup(to ts: (AddressToken, AddressToken)) {
112 | assert(!instructions.isEmpty)
113 | addressFixups.append((
114 | InstructionAddress(instructions.endIndex - 1),
115 | .init(ts.0, ts.1)))
116 | }
117 | }
118 |
--------------------------------------------------------------------------------
/01/TryRegex/Sources/StringProcessing/Engine/MEBuiltins.swift:
--------------------------------------------------------------------------------
1 | extension String {
2 | func characterAndEnd(
3 | at pos: String.Index, limitedBy end: String.Index
4 | ) -> (Character, String.Index)? {
5 | guard pos < end else { return nil }
6 | let next = index(after: pos)
7 | if next <= end {
8 | return (self[pos], next)
9 | }
10 |
11 | let substr = self[pos..
17 | let instructions: [Instruction]
18 |
19 | var searchBounds: Range
20 | var currentPosition: Position
21 | var controller: Controller
22 | var registers: Registers
23 | var savePoints: [SavePoint] = []
24 | var state: State = .inProgress
25 | }
26 |
27 | extension Processor {
28 | typealias Position = Input.Index
29 |
30 | var start: Position { searchBounds.lowerBound }
31 | var end: Position { searchBounds.upperBound }
32 | }
33 |
34 | extension Processor {
35 | init(
36 | program: MEProgram,
37 | input: Input,
38 | subjectBounds: Range,
39 | searchBounds: Range
40 | ) {
41 | self.controller = Controller(pc: 0)
42 | self.instructions = program.instructions
43 | self.input = input
44 | self.subjectBounds = subjectBounds
45 | self.searchBounds = searchBounds
46 |
47 | self.currentPosition = searchBounds.lowerBound
48 |
49 | self.registers = program.registers
50 | }
51 |
52 | mutating func reset(
53 | currentPosition: Position,
54 | searchBounds: Range
55 | ) {
56 | self.currentPosition = currentPosition
57 | self.searchBounds = searchBounds
58 |
59 | self.controller = Controller(pc: 0)
60 |
61 | if !self.savePoints.isEmpty {
62 | self.savePoints.removeAll(keepingCapacity: true)
63 | }
64 |
65 | self.state = .inProgress
66 | }
67 | }
68 |
69 | extension Processor {
70 | func fetch() -> (Instruction.OpCode, Instruction.Payload) {
71 | instructions[controller.pc].destructure
72 | }
73 |
74 | mutating func match(_ e: Element) -> Bool {
75 | guard let next = input.match(
76 | e,
77 | at: currentPosition,
78 | limitedBy: end
79 | ) else {
80 | signalFailure()
81 | return false
82 | }
83 |
84 | currentPosition = next
85 | return true
86 | }
87 |
88 | mutating func signalFailure(preservingCaptures: Bool = false) {
89 | guard !savePoints.isEmpty else {
90 | state = .fail
91 | return
92 | }
93 |
94 | let (pc, pos) = savePoints.removeLast().destructure
95 |
96 | controller.pc = pc
97 | currentPosition = pos ?? currentPosition
98 | }
99 |
100 | mutating func tryAccept() {
101 | state = .accept
102 | }
103 |
104 | mutating func cycle() {
105 | let (opcode, payload) = fetch()
106 | switch opcode {
107 | case .invalid:
108 | fatalError("Invalid program")
109 |
110 | case .branch:
111 | controller.pc = payload.addr
112 |
113 | case .save:
114 | let resumeAddr = payload.addr
115 | let sp = makeSavePoint(resumingAt: resumeAddr)
116 | savePoints.append(sp)
117 | controller.step()
118 |
119 | case .splitSaving:
120 | let (nextPC, resumeAddr) = payload.pairedAddrAddr
121 | let sp = makeSavePoint(resumingAt: resumeAddr)
122 | savePoints.append(sp)
123 | controller.pc = nextPC
124 |
125 | case .accept:
126 | tryAccept()
127 |
128 | case .match:
129 | let element = payload.elementPayload
130 | if match(element) {
131 | controller.step()
132 | }
133 | }
134 | }
135 | }
136 |
137 | extension String {
138 | func match(
139 | _ char: Character,
140 | at pos: Index,
141 | limitedBy end: String.Index
142 | ) -> Index? {
143 | guard let (stringChar, next) = characterAndEnd(at: pos, limitedBy: end) else { return nil }
144 | guard stringChar == char else { return nil }
145 |
146 | return next
147 | }
148 | }
149 |
--------------------------------------------------------------------------------
/01/TryRegex/Sources/StringProcessing/Engine/Registers.swift:
--------------------------------------------------------------------------------
1 | internal import RegexParser
2 |
3 | typealias IntRegister = Int
4 |
5 | extension Processor {
6 | struct Registers {
7 | var isDirty = false
8 | var ints: [Int]
9 |
10 | init(
11 | isDirty: Bool = false,
12 | numInts: Int
13 | ) {
14 | self.isDirty = isDirty
15 | self.ints = Array(repeating: 0, count: numInts)
16 | }
17 | }
18 | }
19 |
20 | extension Processor.Registers {
21 | typealias Input = String
22 |
23 | subscript(_ i: IntRegister) -> Int {
24 | get { ints[i] }
25 | set {
26 | isDirty = true
27 | ints[i] = newValue
28 | }
29 | }
30 | }
31 |
32 | extension Processor.Registers {
33 | mutating func reset() {
34 | guard isDirty else {
35 | return
36 | }
37 | self.ints._setAll(to: 0)
38 | }
39 | }
40 |
41 | extension MutableCollection {
42 | mutating func _setAll(to e: Element) {
43 | for idx in self.indices {
44 | self[idx] = e
45 | }
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/01/TryRegex/Sources/StringProcessing/Executor.swift:
--------------------------------------------------------------------------------
1 | internal import RegexParser
2 |
3 | enum Executor