├── README.md └── WriteYourLanguage.playground ├── Pages ├── Conclusion.xcplaygroundpage │ ├── Contents.swift │ ├── Resources │ │ └── complete-flow.png │ └── Sources │ │ └── Mu.swift ├── Interpreter.xcplaygroundpage │ ├── Contents.swift │ ├── Resources │ │ └── simple-ast.png │ └── Sources │ │ ├── Lexer.swift │ │ └── Parser.swift ├── Intro.xcplaygroundpage │ ├── Contents.swift │ └── Resources │ │ └── flow.png ├── Lexer.xcplaygroundpage │ ├── Contents.swift │ └── Resources │ │ └── lexer.png └── Parser.xcplaygroundpage │ ├── Contents.swift │ ├── Resources │ └── parser.png │ └── Sources │ ├── Lexer.swift │ └── Util.swift ├── Sources └── Support.swift ├── contents.xcplayground └── playground.xcworkspace ├── contents.xcworkspacedata └── xcuserdata └── MarcioK.xcuserdatad └── UserInterfaceState.xcuserstate /README.md: -------------------------------------------------------------------------------- 1 | # Mu 2 | It's a playground explaining how to create a tiny programming language (Mu). 3 | 4 | You can download the playground [here](https://github.com/marciok/Mu/releases/download/1.0/WriteYourLanguage.playground.zip) 5 | or check the source code live [here](http://swiftlang.ng.bluemix.net/#/repl/5825fba6dee52b5745935831) 6 | 7 | Or follow the tutorial below. 8 | 9 | --- 10 | 11 | # Writing Your Own Programming Language 12 | 13 | You don't need a CS degree to write a programing language, you just need to understand 3 basic steps. 14 | 15 | ## The Language: **Mu(μ)** 16 | Mu is a minimal language, that is consisted by a postfix operator, a binary operation and one digit numbers. 17 | 18 | ### Examples: 19 | `(s 2 4)` or `(s (s 4 5) 4)` or `(s (s 4 5) (s 3 2))`... 20 | 21 | ## The Steps: 22 | * [Lexer](#lexer) 23 | * [Parser](#parser) 24 | * [Interpreter](#interpreter) 25 | 26 | ![Alt text](https://raw.githubusercontent.com/marciok/Mu/master/WriteYourLanguage.playground/Pages/Intro.xcplaygroundpage/Resources/flow.png) 27 | 28 | --- 29 | 30 | # Lexer 31 | 32 | *"In computer science, lexical analysis is the process of converting a sequence of characters into a sequence of tokens (strings with an identified "meaning"). A program that performs lexical analysis may be called a lexer, tokenizer,[1] or scanner (though "scanner" is also used to refer to the first stage of a lexer). Such a lexer is generally combined with a parser, which together analyze the syntax of programming languages..."* *-Wikipedia* 33 | 34 | The idea is to transform an array of charaters into an array of tokens (strings with an identified "meaning") 35 | 36 | ## Example: 37 | ![Alt text](https://raw.githubusercontent.com/marciok/Mu/master/WriteYourLanguage.playground/Pages/Lexer.xcplaygroundpage/Resources/lexer.png) 38 | 39 | Because `Mu` is so small--only one character operator and numbers--you can simply iterate over the input and check each character. 40 | 41 | ```swift 42 | enum Token { 43 | case parensOpen 44 | case op(String) 45 | case number(Int) 46 | case parensClose 47 | } 48 | 49 | struct Lexer { 50 | 51 | static func tokenize(_ input: String) -> [Token] { 52 | return input.characters.flatMap { 53 | switch $0 { 54 | case "(": return Token.parensOpen 55 | case ")": return Token.parensClose 56 | case "s": return Token.op(String($0)) 57 | default: 58 | if "0"..."9" ~= $0 { 59 | return Token.number(Int(String($0))!) 60 | } 61 | } 62 | 63 | return nil 64 | } 65 | } 66 | } 67 | 68 | let input = "(s (s 4 5) 4)" 69 | let tokens = Lexer.tokenize(input) 70 | 71 | ``` 72 | --- 73 | # Parser 74 | 75 | *Parsing or syntactic analysis is the process of analysing a string of symbols, either in natural language or in computer languages, conforming to the rules of a formal grammar...* *-Wikipedia* 76 | 77 | ## Grammar: 78 | 79 | `expression: parensOpen operator primaryExpression primaryExpression parensClose` 80 | 81 | `primaryExpression: expression | number` 82 | 83 | `parensOpen: "("` 84 | 85 | `parensClose: ")"` 86 | 87 | `operator: "s"` 88 | 89 | `number: [0-9]` 90 | 91 | `Mu`'s grammar is a context-free grammar, that means it describes all possible strings in the language. 92 | The parser will start from the top (root of the generated tree) and it will go until the lowest node. 93 | 94 | **Tip: the code should be a direct representation of the grammar** 95 | ~~~ 96 | func parseExpression() -> ExpressionNode { 97 | ... 98 | firstPrimaryExpression = parsePrimaryExpression() 99 | secondPrimaryExpression = parsePrimaryExpression() 100 | ... 101 | } 102 | 103 | func parsePrimaryExpression() -> PrimaryExpressionNode { 104 | return parseExpression() || parseNumber() 105 | } 106 | ~~~ 107 | 108 | ![Alt text](https://raw.githubusercontent.com/marciok/Mu/master/WriteYourLanguage.playground/Pages/Parser.xcplaygroundpage/Resources/parser.png) 109 | 110 | ```swift 111 | indirect enum PrimaryExpressionNode { 112 | case number(Int) 113 | case expression(ExpressionNode) 114 | } 115 | 116 | struct ExpressionNode { 117 | var op: String 118 | var firstExpression: PrimaryExpressionNode 119 | var secondExpression: PrimaryExpressionNode 120 | } 121 | 122 | struct Parser { 123 | 124 | var index = 0 125 | let tokens: [Token] 126 | init(tokens: [Token]) { 127 | self.tokens = tokens 128 | } 129 | 130 | mutating func popToken() -> Token { 131 | let token = tokens[index] 132 | index += 1 133 | 134 | return token 135 | } 136 | 137 | mutating func peekToken() -> Token { 138 | return tokens[index] 139 | } 140 | 141 | mutating func parse() throws -> ExpressionNode { 142 | return try parseExpression() 143 | } 144 | 145 | mutating func parseExpression() throws -> ExpressionNode { 146 | guard case .parensOpen = popToken() else { 147 | throw ParsingError.unexpectedToken 148 | } 149 | guard case let Token.op(_operator) = popToken() else { 150 | throw ParsingError.unexpectedToken 151 | } 152 | 153 | let firstExpression = try parsePrimaryExpression() 154 | let secondExpression = try parsePrimaryExpression() 155 | 156 | guard case .parensClose = popToken() else { 157 | throw ParsingError.unexpectedToken 158 | } 159 | 160 | return ExpressionNode(op: _operator, firstExpression: firstExpression, secondExpression: secondExpression) 161 | } 162 | 163 | mutating func parsePrimaryExpression() throws -> PrimaryExpressionNode { 164 | switch peekToken() { 165 | case .number: 166 | return try parseNumber() 167 | case .parensOpen: 168 | let expressionNode = try parseExpression() 169 | 170 | return PrimaryExpressionNode.expression(expressionNode) 171 | default: 172 | throw ParsingError.unexpectedToken 173 | } 174 | } 175 | 176 | mutating func parseNumber() throws -> PrimaryExpressionNode { 177 | guard case let Token.number(n) = popToken() else { throw ParsingError.unexpectedToken } 178 | 179 | return PrimaryExpressionNode.number(n) 180 | } 181 | 182 | } 183 | 184 | //MARK: Utils 185 | 186 | extension ExpressionNode: CustomStringConvertible { 187 | public var description: String { 188 | return "\(op) -> [\(firstExpression), \(secondExpression)]" 189 | } 190 | } 191 | extension PrimaryExpressionNode: CustomStringConvertible { 192 | public var description: String { 193 | switch self { 194 | case .number(let n): return n.description 195 | case .expression(let exp): return exp.description 196 | } 197 | } 198 | } 199 | 200 | 201 | let input = "(s 2 (s 3 5))" 202 | let tokens = Lexer.tokenize(input) 203 | var parser = Parser(tokens: tokens) 204 | var ast = try! parser.parse() 205 | 206 | ``` 207 | --- 208 | 209 | # Interpreter 210 | 211 | *"In computer science, an interpreter is a computer program that directly executes, i.e. performs, instructions written in a programming or scripting language, without previously compiling them into a machine language program."* *-Wikipedia* 212 | 213 | 214 | ## Example: 215 | `Mu`'s interpreter will walk through its A.S.T and compute a value by applying an operator to the children nodes. 216 | 217 | ![Alt text](https://raw.githubusercontent.com/marciok/Mu/master/WriteYourLanguage.playground/Pages/Interpreter.xcplaygroundpage/Resources/simple-ast.png) 218 | 219 | ```swift 220 | enum InterpreterError: Error { 221 | case unknownOperator 222 | } 223 | 224 | struct Interpreter { 225 | static func eval(_ expression: ExpressionNode) throws -> Int { 226 | let firstEval = try eval(expression.first) 227 | let secEval = try eval(expression.second) 228 | 229 | if expression.op == "s" { 230 | return firstEval + secEval 231 | } 232 | 233 | throw InterpreterError.unknownOperator 234 | } 235 | 236 | static func eval(_ prim: PrimaryExpressionNode) throws -> Int { 237 | switch prim { 238 | case .expression(let exp): 239 | return try eval(exp) 240 | case .number(let n): 241 | return Int(n) 242 | } 243 | } 244 | 245 | } 246 | 247 | let input = "(s (s 5 2) 4)" 248 | let tokens = Lexer.tokenize(input) 249 | var parser = Parser(tokens: tokens) 250 | 251 | let ast = try! parser.parse() 252 | try! Interpreter.eval(ast) 253 | ``` 254 | --- 255 | 256 | # Conclusion 257 | ![Alt text](https://raw.githubusercontent.com/marciok/Mu/master/WriteYourLanguage.playground/Pages/Conclusion.xcplaygroundpage/Resources/complete-flow.png) 258 | - Given an input 259 | `let input = "(s (s 4 5) 4)` 260 | - Extract an array of tokens (Lexing) 261 | `let tokens = Lexer.tokenize(input)` 262 | - Parse the given tokens into a tree (Parsing) 263 | ~~~ 264 | var parser = Parser(tokens: tokens) 265 | let ast = try! parser.parse() 266 | ~~~ 267 | - And walk through this tree, and compute the values contained inside a node (Interpreting) 268 | `let result = try! Interpreter.eval(ast)` 269 | 270 | ### Resources 271 | 272 | - https://ruslanspivak.com/lsbasi-part1/ 273 | - https://www.amazon.com/Compilers-Principles-Techniques-Tools-2nd/dp/0321486811 274 | - http://llvm.org/docs/tutorial/ 275 | 276 | 277 | 278 | -------------------------------------------------------------------------------- /WriteYourLanguage.playground/Pages/Conclusion.xcplaygroundpage/Contents.swift: -------------------------------------------------------------------------------- 1 | //: [Previous](@previous) 2 | /*: 3 | # Conclusion 4 | 5 | ![Alt text](complete-flow.png) 6 | 7 | - Given an input 8 | */ 9 | 10 | let input = "(s (s 4 5) 4)" 11 | 12 | /*: 13 | - Extract an array of tokens (Lexing); 14 | */ 15 | 16 | let tokens = Lexer.tokenize(input) 17 | 18 | /*: 19 | - Parse the given tokens into a tree (Parsing); 20 | */ 21 | 22 | var parser = Parser(tokens: tokens) 23 | let ast = try! parser.parse() 24 | 25 | /*: 26 | - And walk through this tree, and compute the values contained inside a node (Interpreting); 27 | */ 28 | let result = try! Interpreter.eval(ast) 29 | 30 | 31 | /*: 32 | 33 | ### Resources 34 | 35 | - https://ruslanspivak.com/lsbasi-part1/ 36 | - https://www.amazon.com/Compilers-Principles-Techniques-Tools-2nd/dp/0321486811 37 | - http://llvm.org/docs/tutorial/ 38 | */ 39 | -------------------------------------------------------------------------------- /WriteYourLanguage.playground/Pages/Conclusion.xcplaygroundpage/Resources/complete-flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marciok/Mu/ba14938eac5fecc2f2ec12eade3188e7946090a8/WriteYourLanguage.playground/Pages/Conclusion.xcplaygroundpage/Resources/complete-flow.png -------------------------------------------------------------------------------- /WriteYourLanguage.playground/Pages/Conclusion.xcplaygroundpage/Sources/Mu.swift: -------------------------------------------------------------------------------- 1 | 2 | public enum Token { 3 | case parensOpen 4 | case op(String) 5 | case number(Int) 6 | case parensClose 7 | } 8 | 9 | public struct Lexer { 10 | public static func tokenize(_ input: String) -> [Token] { 11 | return input.characters.flatMap { 12 | switch $0 { 13 | case "(": return Token.parensOpen 14 | case ")": return Token.parensClose 15 | case "s": return Token.op(String($0)) 16 | default: 17 | if "0"..."9" ~= $0 { 18 | return Token.number(Int(String($0))!) 19 | } 20 | } 21 | 22 | return nil 23 | } 24 | } 25 | } 26 | 27 | public indirect enum PrimaryExpressionNode { 28 | case number(Int) 29 | case expression(ExpressionNode) 30 | } 31 | 32 | public struct ExpressionNode { 33 | public var op: String 34 | public var first: PrimaryExpressionNode 35 | public var second: PrimaryExpressionNode 36 | } 37 | 38 | public enum ParsingError: Error { 39 | case unexpectedToken 40 | } 41 | public struct Parser { 42 | 43 | var index = 0 44 | let tokens: [Token] 45 | 46 | public init(tokens: [Token]) { 47 | self.tokens = tokens 48 | } 49 | 50 | mutating func popToken() -> Token { 51 | let token = tokens[index] 52 | index += 1 53 | 54 | return token 55 | } 56 | 57 | mutating func peekToken() -> Token { 58 | return tokens[index] 59 | } 60 | 61 | 62 | mutating func parsePrimaryExpression() throws -> PrimaryExpressionNode { 63 | switch peekToken() { 64 | case .number(let n): 65 | _ = popToken() // Removing number 66 | return PrimaryExpressionNode.number(n) 67 | case .parensOpen: 68 | let expressionNode = try parseExpression() 69 | 70 | return PrimaryExpressionNode.expression(expressionNode) 71 | default: 72 | throw ParsingError.unexpectedToken 73 | } 74 | } 75 | 76 | mutating func parseExpression() throws -> ExpressionNode { 77 | guard case .parensOpen = popToken() else { 78 | throw ParsingError.unexpectedToken 79 | } 80 | guard case let .op(_operator) = popToken() else { 81 | throw ParsingError.unexpectedToken 82 | } 83 | 84 | let firstExpression = try parsePrimaryExpression() 85 | let secondExpression = try parsePrimaryExpression() 86 | 87 | guard case .parensClose = popToken() else { 88 | throw ParsingError.unexpectedToken 89 | } 90 | 91 | return ExpressionNode(op: _operator, first: firstExpression, second: secondExpression) 92 | } 93 | 94 | public mutating func parse() throws -> ExpressionNode { 95 | return try parseExpression() 96 | } 97 | } 98 | 99 | enum InterpreterError: Error { 100 | case unknownOperator 101 | } 102 | 103 | public struct Interpreter { 104 | public static func eval(_ expression: ExpressionNode) throws -> Int { 105 | let firstEval = try eval(expression.first) 106 | let secEval = try eval(expression.second) 107 | 108 | if expression.op == "s" { 109 | return firstEval + secEval 110 | } 111 | 112 | throw InterpreterError.unknownOperator 113 | } 114 | 115 | static func eval(_ prim: PrimaryExpressionNode) throws -> Int { 116 | switch prim { 117 | case .expression(let exp): 118 | return try eval(exp) 119 | case .number(let n): 120 | return Int(n) 121 | } 122 | } 123 | 124 | } 125 | 126 | 127 | -------------------------------------------------------------------------------- /WriteYourLanguage.playground/Pages/Interpreter.xcplaygroundpage/Contents.swift: -------------------------------------------------------------------------------- 1 | //: [Previous](@previous) 2 | /*: 3 | # Interpreter 4 | 5 | *"In computer science, an interpreter is a computer program that directly executes, i.e. performs, instructions written in a programming or scripting language, without previously compiling them into a machine language program."* *-Wikipedia* 6 | 7 | 8 | ## Example: 9 | `Mu`'s interpreter will walk through its A.S.T and compute a value by applying an operator to the children nodes. 10 | 11 | 12 | ![Alt text](simple-ast.png) 13 | 14 | */ 15 | enum InterpreterError: Error { 16 | case unknownOperator 17 | } 18 | 19 | struct Interpreter { 20 | static func eval(_ expression: ExpressionNode) throws -> Int { 21 | let firstEval = try eval(expression.first) 22 | let secEval = try eval(expression.second) 23 | 24 | if expression.op == "s" { 25 | return firstEval + secEval 26 | } 27 | 28 | throw InterpreterError.unknownOperator 29 | } 30 | 31 | static func eval(_ prim: PrimaryExpressionNode) throws -> Int { 32 | switch prim { 33 | case .expression(let exp): 34 | return try eval(exp) 35 | case .number(let n): 36 | return Int(n) 37 | } 38 | } 39 | 40 | } 41 | 42 | let input = "(s (s 5 2) 4)" 43 | let tokens = Lexer.tokenize(input) 44 | var parser = Parser(tokens: tokens) 45 | 46 | let ast = try! parser.parse() 47 | try! Interpreter.eval(ast) 48 | 49 | 50 | //: [Next](@next) 51 | -------------------------------------------------------------------------------- /WriteYourLanguage.playground/Pages/Interpreter.xcplaygroundpage/Resources/simple-ast.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marciok/Mu/ba14938eac5fecc2f2ec12eade3188e7946090a8/WriteYourLanguage.playground/Pages/Interpreter.xcplaygroundpage/Resources/simple-ast.png -------------------------------------------------------------------------------- /WriteYourLanguage.playground/Pages/Interpreter.xcplaygroundpage/Sources/Lexer.swift: -------------------------------------------------------------------------------- 1 | 2 | public enum Token { 3 | case parensOpen 4 | case op(String) 5 | case number(Int) 6 | case parensClose 7 | } 8 | 9 | public struct Lexer { 10 | public static func tokenize(_ input: String) -> [Token] { 11 | return input.characters.flatMap { 12 | switch $0 { 13 | case "(": return Token.parensOpen 14 | case ")": return Token.parensClose 15 | case "s": return Token.op(String($0)) 16 | default: 17 | if "0"..."9" ~= $0 { 18 | return Token.number(Int(String($0))!) 19 | } 20 | } 21 | 22 | return nil 23 | } 24 | } 25 | } 26 | 27 | -------------------------------------------------------------------------------- /WriteYourLanguage.playground/Pages/Interpreter.xcplaygroundpage/Sources/Parser.swift: -------------------------------------------------------------------------------- 1 | public indirect enum PrimaryExpressionNode { 2 | case number(Int) 3 | case expression(ExpressionNode) 4 | } 5 | 6 | public struct ExpressionNode { 7 | public var op: String 8 | public var first: PrimaryExpressionNode 9 | public var second: PrimaryExpressionNode 10 | } 11 | 12 | public enum ParsingError: Error { 13 | case unexpectedToken 14 | } 15 | public struct Parser { 16 | 17 | var index = 0 18 | let tokens: [Token] 19 | 20 | public init(tokens: [Token]) { 21 | self.tokens = tokens 22 | } 23 | 24 | mutating func popToken() -> Token { 25 | let token = tokens[index] 26 | index += 1 27 | 28 | return token 29 | } 30 | 31 | mutating func peekToken() -> Token { 32 | return tokens[index] 33 | } 34 | 35 | 36 | mutating func parsePrimaryExpression() throws -> PrimaryExpressionNode { 37 | switch peekToken() { 38 | case .number(let n): 39 | _ = popToken() // Removing number 40 | return PrimaryExpressionNode.number(n) 41 | case .parensOpen: 42 | let expressionNode = try parseExpression() 43 | 44 | return PrimaryExpressionNode.expression(expressionNode) 45 | default: 46 | throw ParsingError.unexpectedToken 47 | } 48 | } 49 | 50 | mutating func parseExpression() throws -> ExpressionNode { 51 | guard case .parensOpen = popToken() else { 52 | throw ParsingError.unexpectedToken 53 | } 54 | guard case let .op(_operator) = popToken() else { 55 | throw ParsingError.unexpectedToken 56 | } 57 | 58 | let firstExpression = try parsePrimaryExpression() 59 | let secondExpression = try parsePrimaryExpression() 60 | 61 | guard case .parensClose = popToken() else { 62 | throw ParsingError.unexpectedToken 63 | } 64 | 65 | return ExpressionNode(op: _operator, first: firstExpression, second: secondExpression) 66 | } 67 | 68 | public mutating func parse() throws -> ExpressionNode { 69 | return try parseExpression() 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /WriteYourLanguage.playground/Pages/Intro.xcplaygroundpage/Contents.swift: -------------------------------------------------------------------------------- 1 | /*: 2 | # Writing Your Own Programming Language 3 | 4 | You don't need a CS degree to write a programing language, you just need to understand 3 basic steps. 5 | 6 | ## The Language: **Mu(μ)** 7 | Mu is a minimal language, that is consisted by a postfix operator, a binary operation and one digit numbers. 8 | 9 | ### Examples: 10 | `(s 2 4)` or `(s (s 4 5) 4)` or `(s (s 4 5) (s 3 2))`... 11 | 12 | ## The Steps: 13 | * Lexer 14 | * Parser 15 | * Interpreter 16 | 17 | ![Alt text](flow.png) 18 | */ 19 | 20 | let input = "(s (s 6 6) 6)" // Should return 18 21 | 22 | //: [Lexer ->](@next) 23 | -------------------------------------------------------------------------------- /WriteYourLanguage.playground/Pages/Intro.xcplaygroundpage/Resources/flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marciok/Mu/ba14938eac5fecc2f2ec12eade3188e7946090a8/WriteYourLanguage.playground/Pages/Intro.xcplaygroundpage/Resources/flow.png -------------------------------------------------------------------------------- /WriteYourLanguage.playground/Pages/Lexer.xcplaygroundpage/Contents.swift: -------------------------------------------------------------------------------- 1 | //: [Previous](@previous) 2 | /*: 3 | 4 | # Lexer 5 | 6 | *"In computer science, lexical analysis is the process of converting a sequence of characters into a sequence of tokens (strings with an identified "meaning"). A program that performs lexical analysis may be called a lexer, tokenizer,[1] or scanner (though "scanner" is also used to refer to the first stage of a lexer). Such a lexer is generally combined with a parser, which together analyze the syntax of programming languages..."* *-Wikipedia* 7 | 8 | The idea is to transform an array of charaters into an array of tokens (strings with an identified "meaning") 9 | 10 | ## Example: 11 | ![Alt text](lexer.png) 12 | 13 | Because `Mu` is so small--only one character operator and numbers--you can simply iterate over the input and check each one character at the time. 14 | 15 | */ 16 | 17 | enum Token { 18 | case parensOpen 19 | case op(String) 20 | case number(Int) 21 | case parensClose 22 | } 23 | 24 | struct Lexer { 25 | 26 | static func tokenize(_ input: String) -> [Token] { 27 | return input.characters.flatMap { 28 | switch $0 { 29 | case "(": return Token.parensOpen 30 | case ")": return Token.parensClose 31 | case "s": return Token.op(String($0)) 32 | default: 33 | if "0"..."9" ~= $0 { 34 | return Token.number(Int(String($0))!) 35 | } 36 | } 37 | 38 | return nil 39 | } 40 | } 41 | } 42 | 43 | let input = "(s (s 4 5) 4)" 44 | let tokens = Lexer.tokenize(input) 45 | 46 | //: [Next](@next) 47 | -------------------------------------------------------------------------------- /WriteYourLanguage.playground/Pages/Lexer.xcplaygroundpage/Resources/lexer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marciok/Mu/ba14938eac5fecc2f2ec12eade3188e7946090a8/WriteYourLanguage.playground/Pages/Lexer.xcplaygroundpage/Resources/lexer.png -------------------------------------------------------------------------------- /WriteYourLanguage.playground/Pages/Parser.xcplaygroundpage/Contents.swift: -------------------------------------------------------------------------------- 1 | //: [Previous](@previous) 2 | /*: 3 | # Parser 4 | 5 | *Parsing or syntactic analysis is the process of analysing a string of symbols, either in natural language or in computer languages, conforming to the rules of a formal grammar...* *-Wikipedia* 6 | 7 | ## Grammar: 8 | 9 | `expression: parensOpen operator primaryExpression primaryExpression parensClose` 10 | 11 | `primaryExpression: expression | number` 12 | 13 | `parensOpen: "("` 14 | 15 | `parensClose: ")"` 16 | 17 | `operator: "s"` 18 | 19 | `number: [0-9]` 20 | 21 | `Mu`'s grammar is a context-free grammar, that means it describes all possible strings in the language. 22 | The parser will start from the top (root of the generated tree) and it will go until the lowest node. 23 | 24 | **Tip: the code should be a direct representation of the grammar** 25 | ~~~ 26 | func parseExpression() -> ExpressionNode { 27 | ... 28 | firstPrimaryExpression = parsePrimaryExpression() 29 | secondPrimaryExpression = parsePrimaryExpression() 30 | ... 31 | } 32 | 33 | func parseExpression() -> PrimaryExpressionNode { 34 | return parseExpression() || parseNumber() 35 | } 36 | ~~~ 37 | 38 | ![Alt text](parser.png) 39 | 40 | */ 41 | 42 | indirect enum PrimaryExpressionNode { 43 | case number(Int) 44 | case expression(ExpressionNode) 45 | } 46 | 47 | struct ExpressionNode { 48 | var op: String 49 | var firstExpression: PrimaryExpressionNode 50 | var secondExpression: PrimaryExpressionNode 51 | } 52 | 53 | struct Parser { 54 | 55 | var index = 0 56 | let tokens: [Token] 57 | init(tokens: [Token]) { 58 | self.tokens = tokens 59 | } 60 | 61 | mutating func popToken() -> Token { 62 | let token = tokens[index] 63 | index += 1 64 | 65 | return token 66 | } 67 | 68 | mutating func peekToken() -> Token { 69 | return tokens[index] 70 | } 71 | 72 | mutating func parse() throws -> ExpressionNode { 73 | return try parseExpression() 74 | } 75 | 76 | mutating func parseExpression() throws -> ExpressionNode { 77 | guard case .parensOpen = popToken() else { 78 | throw ParsingError.unexpectedToken 79 | } 80 | guard case let Token.op(_operator) = popToken() else { 81 | throw ParsingError.unexpectedToken 82 | } 83 | 84 | let firstExpression = try parsePrimaryExpression() 85 | let secondExpression = try parsePrimaryExpression() 86 | 87 | guard case .parensClose = popToken() else { 88 | throw ParsingError.unexpectedToken 89 | } 90 | 91 | return ExpressionNode(op: _operator, firstExpression: firstExpression, secondExpression: secondExpression) 92 | } 93 | 94 | mutating func parsePrimaryExpression() throws -> PrimaryExpressionNode { 95 | switch peekToken() { 96 | case .number: 97 | return try parseNumber() 98 | case .parensOpen: 99 | let expressionNode = try parseExpression() 100 | 101 | return PrimaryExpressionNode.expression(expressionNode) 102 | default: 103 | throw ParsingError.unexpectedToken 104 | } 105 | } 106 | 107 | mutating func parseNumber() throws -> PrimaryExpressionNode { 108 | guard case let Token.number(n) = popToken() else { throw ParsingError.unexpectedToken } 109 | 110 | return PrimaryExpressionNode.number(n) 111 | } 112 | 113 | } 114 | 115 | //MARK: Utils 116 | 117 | extension ExpressionNode: CustomStringConvertible { 118 | public var description: String { 119 | return "\(op) -> [\(firstExpression), \(secondExpression)]" 120 | } 121 | } 122 | extension PrimaryExpressionNode: CustomStringConvertible { 123 | public var description: String { 124 | switch self { 125 | case .number(let n): return n.description 126 | case .expression(let exp): return exp.description 127 | } 128 | } 129 | } 130 | 131 | 132 | let input = "(s 2 (s 3 5))" 133 | let tokens = Lexer.tokenize(input) 134 | var parser = Parser(tokens: tokens) 135 | var ast = try! parser.parse() 136 | //: [Next](@next) 137 | -------------------------------------------------------------------------------- /WriteYourLanguage.playground/Pages/Parser.xcplaygroundpage/Resources/parser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marciok/Mu/ba14938eac5fecc2f2ec12eade3188e7946090a8/WriteYourLanguage.playground/Pages/Parser.xcplaygroundpage/Resources/parser.png -------------------------------------------------------------------------------- /WriteYourLanguage.playground/Pages/Parser.xcplaygroundpage/Sources/Lexer.swift: -------------------------------------------------------------------------------- 1 | 2 | public enum Token { 3 | case parensOpen 4 | case op(String) 5 | case number(Int) 6 | case parensClose 7 | } 8 | 9 | public struct Lexer { 10 | public static func tokenize(_ input: String) -> [Token] { 11 | return input.characters.flatMap { 12 | switch $0 { 13 | case "(": return Token.parensOpen 14 | case ")": return Token.parensClose 15 | case "s": return Token.op(String($0)) 16 | default: 17 | if "0"..."9" ~= $0 { 18 | return Token.number(Int(String($0))!) 19 | } 20 | } 21 | 22 | return nil 23 | } 24 | } 25 | } 26 | 27 | -------------------------------------------------------------------------------- /WriteYourLanguage.playground/Pages/Parser.xcplaygroundpage/Sources/Util.swift: -------------------------------------------------------------------------------- 1 | public enum ParsingError: Error { 2 | case unexpectedToken 3 | } 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /WriteYourLanguage.playground/Sources/Support.swift: -------------------------------------------------------------------------------- 1 | import Foundation 2 | 3 | -------------------------------------------------------------------------------- /WriteYourLanguage.playground/contents.xcplayground: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /WriteYourLanguage.playground/playground.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /WriteYourLanguage.playground/playground.xcworkspace/xcuserdata/MarcioK.xcuserdatad/UserInterfaceState.xcuserstate: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marciok/Mu/ba14938eac5fecc2f2ec12eade3188e7946090a8/WriteYourLanguage.playground/playground.xcworkspace/xcuserdata/MarcioK.xcuserdatad/UserInterfaceState.xcuserstate --------------------------------------------------------------------------------