├── .editorconfig ├── .gitignore ├── Makefile ├── docs ├── 1.md ├── 2-1.md ├── 2-2.md ├── 3-1.md ├── 3-2.md ├── 3-3.md ├── 4.md ├── 5.md ├── 6-1.md ├── 6-2.md ├── 7.md ├── README.md ├── SUMMARY.md ├── book.json └── images │ ├── 2-1-1.png │ ├── 2-1-2.png │ ├── 2-1-3.png │ ├── 3-1-1.png │ ├── 3-1-2.png │ ├── 3-1-3.png │ ├── 3-1-4.png │ ├── 3-1-5.gif │ ├── 3-2-1.png │ ├── 3-2-2.png │ ├── 3-3-1.png │ ├── 3-3-10.png │ ├── 3-3-11.png │ ├── 3-3-2.png │ ├── 3-3-3.png │ ├── 3-3-4.png │ ├── 3-3-5.png │ ├── 3-3-6.png │ ├── 3-3-7.png │ ├── 3-3-8.png │ ├── 3-3-9.png │ └── 5-1.png ├── examples ├── 2-1-1.html ├── 2-1-2.html ├── 2-2-1.html ├── 2-2-2.html ├── 3-1-1.html ├── 3-1-2.html ├── 3-1-3.html ├── 3-2-1.html ├── 3-2-2.html ├── 4-1.html ├── 4-2.html ├── 4-3.html ├── 4-4.html ├── 4-5.html ├── 7-1.html ├── 7-2.html ├── 7-3.html └── step.html ├── package.json ├── src ├── Errors.js ├── Nodes │ ├── ExpressionBlockNode.js │ ├── IntNode.js │ ├── Node.js │ ├── PrintNode.js │ └── VariableNode.js ├── Parser.js ├── Reader.js ├── Scanner.js ├── Token.js ├── example.ws └── index.js └── yarn.lock /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*.js] 4 | charset = utf-8 5 | end_of_line = lf 6 | indent_size = 2 7 | indent_style = space 8 | trim_trailing_whitespace = true 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | _book/ 2 | build/ 3 | public/ 4 | node_modules/ -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: serve 2 | 3 | serve: 4 | docker run -d --rm -v ${PWD}:/gitbook -p 4000:4000 jaceju/gitbook serve docs 5 | 6 | build: 7 | docker run --rm -v ${PWD}:/gitbook jaceju/gitbook install docs 8 | docker run --rm -v ${PWD}:/gitbook jaceju/gitbook build docs ./build 9 | cp -R build/* public/ 10 | rm -rf build 11 | cd public/ && git add . && git commit -m "Update" && git push && cd ../ 12 | -------------------------------------------------------------------------------- /docs/1.md: -------------------------------------------------------------------------------- 1 | # 一、簡介 2 | 3 | 相信每個 programmer 都跟西杰一樣想過設計一種自己的編程語言,最近西杰就有機會要寫一個編譯器了。雖然在大學時已經讀過如何編寫一個編譯器,但要認真寫起上來還真的不容易,而且網上教寫編譯器的教材不多(尤其中文的),所以就把這次經驗記下來,疏理一下自己在開發過程中所學到的東西,也同時為互聯網增加一些有關編譯器這方面的中文資源吧。 4 | 5 | 西杰在開發過程中經常參考 Actionscript 編譯器的 source code(用 Java 寫的),大家有興趣可以看看這裡(在 `/trunk/modules/asc` 裡), 是 open source 的。 6 | 7 | 在這個教程中,西杰將會使用 JavaScript 來開發,原因有二。第一,JS 是我最喜愛的編程語言之一,語法簡潔易明,亦較多人認識。第二,可以讓大家在瀏覽器直接運行 Demo,大家不用浪費時間下載本文所舉的例子再執行。 8 | 9 | 整個教程將會分為七個主要單元,除了這篇簡介外,還包括以下六個單元。 10 | 11 | 二、詞法分析(Lexical analysis):把字元合併成為詞語 12 | 13 | 三、語法分析(Syntactic analysis):把詞語組合成一句有意思的句子 14 | 15 | 四、語意分析(Semantic analysis):把句子組成有上文下理的段落,成為有意思的故事。西杰認為這個單元和第三個單元最難,大家要有心理準備 16 | 17 | 五、虛擬機(Virtual Machine):用來運行編譯好的程式 18 | 19 | 六、生成代碼(Code Generation):把你閱讀完的故事寫出來給虛擬機看 20 | 21 | 七、優化器(Optimizer):可以把故事說得簡單一點 22 | 23 | 好了,那麼我們開始吧,先來看看我們即將開發的語言 ﹣ Wescript (音類似 Westkit,不過要翹舌)的特徵: 24 | 25 | * 兩種變數類型(variable type): `bool`, `int` 26 | * 兩種控制結構(Control structure): `if/else`, `while` 27 | * 註釋(Comment): `// 單行`, `/* 多行 */` 28 | * 運算符(Operator): `+`, `-`, `*`, `/`, `%`, `(`, `)`, `&&`, `||`, `!`, `==`, `!=`, `=`, `+=`, `++`, `-=`, `–` 29 | * Static scoping , `bool` 不能與 `int` 比較,忽略空白符號。 30 | 31 | 例子: 32 | 33 | ```js 34 | /* 35 | Wescript 36 | */ 37 | var a:int = 1; 38 | var b:int = 2; 39 | var c:bool = true; 40 | if (c){ 41 | print a; 42 | }else{ 43 | print b; 44 | } 45 | var i:int = 0; 46 | while (i < 10){ 47 | print i; 48 | i++; 49 | } 50 | //WoW 51 | ``` 52 | 53 | 就是這樣了,下一章就會開始做 Scanner 。 54 | -------------------------------------------------------------------------------- /docs/2-1.md: -------------------------------------------------------------------------------- 1 | # 二、掃瞄器(Scanner)﹣詞法分析(Lexical analysis)(上) 2 | 3 | 寫 Compiler 第一步通常都是先寫 Scanner,什麼是 Scanner 呢?這裡只給你初步概念,詳細解釋在維基看吧。試想像有一句英文句子(例子: `”The quick brown fox jumps over the lazy dog” is an English-language pangram.`),人類看英文的方法就是逐個逐個詞語地看,電腦怎樣才能知道要跳過 `”` 雙引號才能讀取第一個詞語呢?那就是要靠 Scanner 來分析了, Scanner 會逐個逐個字元讀進來並且在 “適當時候” 把字元合成一組詞語供後邊的 Parser 做其他處理工作。 4 | 5 | ![](./images/2-1-1.png) 6 | ![](./images/2-1-2.png) 7 | 8 | 單字元的 Token 9 | 10 | 先來處理比較簡單的單字元 Token 吧,在這裡要先界定一下什麼是單字元 Token(這只是西傑的定義),單字元 Token 的意思是這個 Token 只有一個字元而且不會因後面的字元而有任何歧義,例如 `“:”` 或者 `“;”` 就是了。 `”+”` 是不是單字元 Token 呢?不是,因為 `“+”` 是會有歧義的,它可能是代表 `1 + 1` 中的相加意思,亦可能代表 `i ++` 中加 1 的意思,所以它不是單字元 Token ,而多字元 Token 會在下一節才處理。 11 | 12 | 現在我們要列出所有單字元 Token 。 13 | 14 | ``` 15 | : COLON_TOKEN 16 | 17 | ; SEMICOLON_TOKEN 18 | 19 | ( LEFTPAREN_TOKEN 20 | 21 | ) RIGHTPAREN_TOKEN 22 | 23 | { LEFTBRACE_TOKEN 24 | 25 | } RIGHTBRACE_TOKEN 26 | 27 | % MOD_TOKEN 28 | ``` 29 | 30 | 就這七款了嗎?其實還有一個是 `EOS_TOKEN` ,代表 `end of stream` ,即已經沒有東西可以讀了,用來終止 Scanner 再讀。 31 | 32 | ## Reader 33 | 34 | 現在要開始寫一個 Reader , Reader 的工作主要是用來逐個逐個字元讀進來,但亦可以退回一個字元下次再讀(這個功能在讀取多字元 Token 會有用),看看 code 吧。 35 | 36 | ```js 37 | //Reader class 38 | //str is the data to be read 39 | 40 | function Reader(str){ 41 | this.data = str; 42 | this.currPos = 0; 43 | this.dataLength = str.length; 44 | 45 | } 46 | 47 | Reader.prototype.nextChar = function (){ 48 | if (this.currPos >= this.dataLength){ 49 | return -1; //end of stream 50 | } 51 | 52 | return this.data[this.currPos++]; 53 | } 54 | 55 | //n is the number of characters to be retracted 56 | Reader.prototype.retract = function (n){ 57 | 58 | if (n == undefined){ 59 | n = 1; 60 | } 61 | 62 | this.currPos -= n; 63 | 64 | if (this.currPos < 0){ 65 | this.currPos = 0; 66 | } 67 | } 68 | ``` 69 | 70 | 就三個 function , 一個 constructor ,把要 compile 的字串傳入去,用 `nextChar ()` 來讀取下一個字元,用 `retract ()` 來退回。現在運行一下我們的 tester ,看看 Reader 是否運作正常。 71 | 72 | ```js 73 | function log(str){ 74 | $("#log").append(str + "
"); 75 | } 76 | $(function (){ 77 | //we stored our wescript in 23 | 24 | 25 | 28 | 29 | 30 | 58 | 59 | 60 | 88 | 89 | 90 | 91 |
92 |

93 |     
94 | 95 |
96 | 97 | 98 | -------------------------------------------------------------------------------- /examples/2-1-2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Scanner - section 2 5 | 6 | 16 | 17 | 18 | 23 | 24 | 25 | 28 | 29 | 30 | 58 | 84 | 144 | 145 | 146 | 170 | 171 | 172 | 173 |
174 |

175 |     
176 | 177 |
178 | 179 | 180 | -------------------------------------------------------------------------------- /examples/2-2-1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Scanner - section 3 5 | 6 | 16 | 17 | 18 | 23 | 24 | 25 | 34 | 35 | 36 | 64 | 99 | 212 | 213 | 214 | 244 | 245 | 246 | 247 |
248 |

249 |     
250 | 251 |
252 | 253 | 254 | -------------------------------------------------------------------------------- /examples/2-2-2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Scanner - section 4 5 | 6 | 22 | 23 | 24 | 29 | 30 | 31 | 38 | 39 | 40 | 59 | 87 | 145 | 424 | 425 | 426 | 471 | 472 | 473 | 474 |
475 |

476 |     
477 | 478 |
479 | 480 |
481 | 482 | 483 | -------------------------------------------------------------------------------- /examples/3-1-1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Parser - section 1 5 | 6 | 22 | 23 | 24 | 29 | 30 | 31 | 37 | 38 | 39 | 58 | 86 | 144 | 423 | 479 | 480 | 481 | 529 | 530 | 531 | 532 |
533 |

534 |     
535 | 536 |
537 | 538 |
539 | 540 | 541 | -------------------------------------------------------------------------------- /examples/3-1-2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Parser - section 2 5 | 6 | 22 | 23 | 24 | 29 | 30 | 31 | 36 | 37 | 48 | 49 | 50 | 69 | 97 | 155 | 434 | 469 | 568 | 569 | 570 | 605 | 606 | 607 | 608 |
609 |

610 |     
611 | 612 |
613 | 614 |
615 | 616 | 617 | -------------------------------------------------------------------------------- /examples/3-1-3.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Parser - section 3 5 | 6 | 22 | 23 | 24 | 29 | 30 | 31 | 36 | 37 | 48 | 49 | 50 | 69 | 97 | 155 | 434 | 469 | 575 | 576 | 577 | 612 | 613 | 614 | 615 |
616 |

617 |     
618 | 619 |
620 | 621 |
622 | 623 | 624 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "repository": "git@github.com:jaceju/simple-compiler.git", 3 | "author": "jaceju ", 4 | "private": true, 5 | "license": "MIT", 6 | "devDependencies": { 7 | "prettier": "1.18.2" 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /src/Errors.js: -------------------------------------------------------------------------------- 1 | let errors = []; 2 | 3 | class Errors { 4 | static push(error) { 5 | errors.push(error); 6 | } 7 | 8 | static print() { 9 | console.dir(errors); 10 | } 11 | 12 | static each(cb) { 13 | for (let i = 0, l = errors.length; i < l; i++) { 14 | cb(errors[i], i); 15 | } 16 | } 17 | } 18 | 19 | Errors.SYNTAX_ERROR = 0; 20 | Errors.type = ["Syntax error"]; 21 | 22 | module.exports = Errors; 23 | -------------------------------------------------------------------------------- /src/Nodes/ExpressionBlockNode.js: -------------------------------------------------------------------------------- 1 | const Node = require("./Node"); 2 | 3 | class ExpressionBlockNode extends Node { 4 | constructor() { 5 | super(); 6 | this.expressions = []; 7 | } 8 | 9 | push(node) { 10 | this.expressions.push(node); 11 | } 12 | 13 | iterate(cb) { 14 | for (let i = 0, l = this.expressions.length; i < l; i++) { 15 | let expression = this.expressions[i]; 16 | cb(expression, i); 17 | } 18 | } 19 | } 20 | 21 | module.exports = ExpressionBlockNode; 22 | -------------------------------------------------------------------------------- /src/Nodes/IntNode.js: -------------------------------------------------------------------------------- 1 | const Node = require("./Node"); 2 | 3 | class IntNode extends Node { 4 | constructor(data) { 5 | super(); 6 | this.data = data; 7 | } 8 | } 9 | 10 | module.exports = IntNode; 11 | -------------------------------------------------------------------------------- /src/Nodes/Node.js: -------------------------------------------------------------------------------- 1 | class Node { 2 | constructor(params) {} 3 | } 4 | 5 | module.exports = Node; 6 | -------------------------------------------------------------------------------- /src/Nodes/PrintNode.js: -------------------------------------------------------------------------------- 1 | const Node = require("./Node"); 2 | 3 | class PrintNode extends Node { 4 | constructor(expressionNode) { 5 | super(); 6 | this.expressionNode = expressionNode; 7 | } 8 | } 9 | 10 | module.exports = PrintNode; 11 | -------------------------------------------------------------------------------- /src/Nodes/VariableNode.js: -------------------------------------------------------------------------------- 1 | const Node = require("./Node"); 2 | 3 | class VariableNode extends Node { 4 | constructor(varName, type, initExpressionNode) { 5 | super(); 6 | this.varName = varName; 7 | this.type = type; 8 | this.initExpressionNode = initExpressionNode; 9 | } 10 | } 11 | 12 | module.exports = VariableNode; 13 | -------------------------------------------------------------------------------- /src/Parser.js: -------------------------------------------------------------------------------- 1 | const Errors = require("./Errors"); 2 | const Token = require("./Token"); 3 | const ExpressionBlockNode = require("./Nodes/ExpressionBlockNode"); 4 | const PrintNode = require("./Nodes/PrintNode"); 5 | const IntNode = require("./Nodes/IntNode"); 6 | const VariableNode = require("./Nodes/VariableNode"); 7 | 8 | // Parser class 9 | class Parser { 10 | constructor(scanner) { 11 | this.scanner = scanner; 12 | this.currentToken = new Token(); 13 | this.lookaheadToken = new Token(); 14 | this.lookaheadToken.consumed = true; 15 | } 16 | 17 | nextToken() { 18 | let token; 19 | if (this.lookaheadToken.consumed) { 20 | token = this.scanner.nextToken(); 21 | // skip comments 22 | while ( 23 | token === Token.tokens.LINECOMMENT_TOKEN || 24 | token === Token.tokens.BLOCKCOMMENT_TOKEN 25 | ) { 26 | token = this.scanner.nextToken(); 27 | } 28 | this.currentToken.type = token; 29 | this.currentToken.text = this.scanner.currentToken.text; 30 | return token; 31 | } else { 32 | this.currentToken.type = this.lookaheadToken.type; 33 | this.currentToken.text = this.lookaheadToken.text; 34 | this.lookaheadToken.consumed = true; 35 | return this.currentToken.type; 36 | } 37 | } 38 | 39 | lookahead() { 40 | if (this.lookaheadToken.consumed) { 41 | let token = this.scanner.nextToken(); 42 | // skip comments 43 | while ( 44 | token === Token.tokens.LINECOMMENT_TOKEN || 45 | token === Token.tokens.BLOCKCOMMENT_TOKEN 46 | ) { 47 | token = this.scanner.nextToken(); 48 | } 49 | this.lookaheadToken.type = token; 50 | this.lookaheadToken.text = this.scanner.currentToken.text; 51 | this.lookaheadToken.consumed = false; 52 | return token; 53 | } else { 54 | return this.lookaheadToken.type; 55 | } 56 | } 57 | 58 | // the entry point of our parser 59 | parse() { 60 | let rootBlock = new ExpressionBlockNode(); 61 | this.parseExpressions(rootBlock); 62 | return rootBlock; 63 | } 64 | 65 | // to parse a list of expressions 66 | parseExpressions(expressionBlockNode) { 67 | while ( 68 | this.lookahead() !== Token.tokens.RIGHTBRACE_TOKEN && 69 | this.lookahead() !== Token.tokens.EOS_TOKEN 70 | ) { 71 | let expressionNode = this.parseExpression(); 72 | if (expressionNode) { 73 | expressionBlockNode.push(expressionNode); 74 | } 75 | 76 | // consume the semicolon 77 | if (this.lookahead() === Token.tokens.SEMICOLON_TOKEN) { 78 | this.nextToken(); 79 | } else { 80 | // syntax error 81 | Errors.push({ 82 | type: Errors.SYNTAX_ERROR, 83 | msg: "Expecting a semicolon at the end of expression", 84 | line: this.scanner.currLine 85 | }); 86 | } 87 | } 88 | } 89 | 90 | // to parse an expression 91 | parseExpression() { 92 | switch (this.lookahead()) { 93 | case Token.tokens.PRINT_TOKEN: 94 | let printToken = this.nextToken(); 95 | let expressionNode = this.parseExpression(); 96 | if (expressionNode === undefined) { 97 | Errors.push({ 98 | type: Errors.SYNTAX_ERROR, 99 | msg: 'Missing an expression after "print"', 100 | line: this.scanner.currLine 101 | }); 102 | } 103 | return new PrintNode(expressionNode); 104 | case Token.tokens.INTLITERAL_TOKEN: 105 | let intToken = this.nextToken(); 106 | return new IntNode(this.currentToken.text); 107 | case Token.tokens.VAR_TOKEN: 108 | return this.parseVarExpression(); 109 | default: 110 | // unexpected, consume it 111 | this.nextToken(); 112 | } 113 | } 114 | 115 | parseVarExpression() { 116 | // consume "var" 117 | this.nextToken(); 118 | 119 | // expecting an identifier 120 | if (this.lookahead() === Token.tokens.IDENTIFIER_TOKEN) { 121 | this.nextToken(); 122 | let varName = this.currentToken.text; 123 | 124 | // consume a colon 125 | if (this.nextToken() !== Token.tokens.COLON_TOKEN) { 126 | this.skipError(); 127 | return; 128 | } 129 | 130 | // type token 131 | if (this.lookahead() !== Token.tokens.TYPE_TOKEN) { 132 | this.skipError(); 133 | return; 134 | } 135 | 136 | this.nextToken(); 137 | let typeName = this.currentToken.text; 138 | 139 | let initNode; 140 | // check if it has initialization expression 141 | if (this.lookahead() === Token.tokens.ASSIGN_TOKEN) { 142 | initNode = this.parseSimpleAssignmentExpression(); 143 | } 144 | return new VariableNode(varName, typeName, initNode); 145 | } 146 | 147 | this.skipError(); 148 | } 149 | 150 | parseSimpleAssignmentExpression() { 151 | // consume the "=" sign 152 | this.nextToken(); 153 | 154 | return this.parseExpression(); 155 | } 156 | 157 | // a naive implementation for skipping error 158 | skipError() { 159 | this.scanner.skipNewLine = false; 160 | 161 | while ( 162 | this.lookahead() !== Token.tokens.NEWLINE_TOKEN && 163 | this.lookahead() !== Token.tokens.EOS_TOKEN 164 | ) { 165 | this.nextToken(); 166 | } 167 | 168 | this.scanner.skipNewLine = true; 169 | } 170 | // 171 | // matchSemicolon() { 172 | // // consume the semicolon 173 | // if (this.lookahead() === Token.tokens.SEMICOLON_TOKEN) { 174 | // this.nextToken(); 175 | // } else { 176 | // // syntax error 177 | // Errors.push({ 178 | // type: Errors.SYNTAX_ERROR, 179 | // msg: "Expecting a semicolon at the end of expression", 180 | // line: this.scanner.currLine 181 | // }); 182 | // } 183 | // } 184 | } 185 | 186 | module.exports = Parser; 187 | -------------------------------------------------------------------------------- /src/Reader.js: -------------------------------------------------------------------------------- 1 | // Reader class 2 | // str is the data to be read 3 | class Reader { 4 | constructor(str) { 5 | this.data = str; 6 | this.currPos = 0; 7 | this.dataLength = str.length; 8 | } 9 | 10 | nextChar() { 11 | if (this.currPos >= this.dataLength) { 12 | return -1; // end of stream 13 | } 14 | 15 | return this.data[this.currPos++]; 16 | } 17 | 18 | retract(n) { 19 | if (n === undefined) { 20 | n = 1; 21 | } 22 | 23 | this.currPos -= n; 24 | 25 | if (this.currPos < 0) { 26 | this.currPos = 0; 27 | } 28 | } 29 | } 30 | 31 | module.exports = Reader; 32 | -------------------------------------------------------------------------------- /src/Scanner.js: -------------------------------------------------------------------------------- 1 | // Scanner class 2 | // reader: the reader used to read in characters 3 | const Token = require("./Token"); 4 | const Errors = require("./Errors"); 5 | 6 | class Scanner { 7 | constructor(reader) { 8 | this.reader = reader; 9 | this.currentToken = new Token(); // storing the current analysed token 10 | this.currLine = 0; // the line number of the current line being read 11 | this.state = Scanner.START_STATE; 12 | } 13 | 14 | makeToken(type, text) { 15 | this.currentToken.type = type; 16 | this.currentToken.text = text; 17 | return type; 18 | } 19 | 20 | nextToken() { 21 | let bufferStr = "", 22 | c = "", 23 | d = ""; 24 | while (true) { 25 | switch (this.state) { 26 | case Scanner.START_STATE: 27 | c = this.reader.nextChar(); 28 | if ((c >= "a" && c <= "z") || (c >= "A" && c <= "Z")) { 29 | this.state = Scanner.IDENTIFIER_STATE; 30 | // we need to remember what the token's text is 31 | bufferStr = c; 32 | } else if (c >= "0" && c <= "9") { 33 | bufferStr = c; 34 | let d; 35 | while (true) { 36 | d = this.reader.nextChar(); 37 | if (d >= "0" && d <= "9") { 38 | bufferStr += d; 39 | } else { 40 | this.reader.retract(); 41 | return this.makeToken(Token.tokens.INTLITERAL_TOKEN, bufferStr); 42 | } 43 | } 44 | } else { 45 | switch (c) { 46 | case ":": 47 | return this.makeToken(Token.tokens.COLON_TOKEN); 48 | case ";": 49 | return this.makeToken(Token.tokens.SEMICOLON_TOKEN); 50 | case "(": 51 | return this.makeToken(Token.tokens.LEFTPAREN_TOKEN); 52 | case ")": 53 | return this.makeToken(Token.tokens.RIGHTPAREN_TOKEN); 54 | case "{": 55 | return this.makeToken(Token.tokens.LEFTBRACE_TOKEN); 56 | case "}": 57 | return this.makeToken(Token.tokens.RIGHTBRACE_TOKEN); 58 | case "%": 59 | return this.makeToken(Token.tokens.MOD_TOKEN); 60 | case "!": 61 | if (this.reader.nextChar() === "=") { 62 | return this.makeToken(Token.tokens.NOTEQUAL_TOKEN); 63 | } else { 64 | // we have consumed one more char in if-condition 65 | this.reader.retract(); 66 | return this.makeToken(Token.tokens.NOT_TOKEN); 67 | } 68 | case "+": 69 | d = this.reader.nextChar(); 70 | if (d === "=") { 71 | return this.makeToken(Token.tokens.PLUSASSIGN_TOKEN); 72 | } else if (d === "+") { 73 | return this.makeToken(Token.tokens.PLUSPLUS_TOKEN); 74 | } else { 75 | this.reader.retract(); 76 | return this.makeToken(Token.tokens.PLUS_TOKEN); 77 | } 78 | case "-": 79 | d = this.reader.nextChar(); 80 | if (d === "=") { 81 | return this.makeToken(Token.tokens.MINUSASSIGN_TOKEN); 82 | } else if (d === "-") { 83 | return this.makeToken(Token.tokens.MINUSMINUS_TOKEN); 84 | } else { 85 | this.reader.retract(); 86 | return this.makeToken(Token.tokens.MINUS_TOKEN); 87 | } 88 | case "*": 89 | return this.makeToken(Token.tokens.MULT_TOKEN); 90 | case "=": 91 | if (this.reader.nextChar() === "=") { 92 | return this.makeToken(Token.tokens.EQUAL_TOKEN); 93 | } else { 94 | this.reader.retract(); 95 | return this.makeToken(Token.tokens.ASSIGN_TOKEN); 96 | } 97 | case ">": 98 | if (this.reader.nextChar() === "=") { 99 | return this.makeToken(Token.tokens.GREATEREQUAL_TOKEN); 100 | } else { 101 | this.reader.retract(); 102 | return this.makeToken(Token.tokens.GREATER_TOKEN); 103 | } 104 | case "<": 105 | if (this.reader.nextChar() === "=") { 106 | return this.makeToken(Token.tokens.LESSEQUAL_TOKEN); 107 | } else { 108 | this.reader.retract(); 109 | return this.makeToken(Token.tokens.LESS_TOKEN); 110 | } 111 | case "/": 112 | this.state = Scanner.SLASH_STATE; 113 | break; 114 | case "&": 115 | if (this.reader.nextChar() === "&") { 116 | return this.makeToken(Token.tokens.AND_TOKEN); 117 | } else { 118 | this.reader.retract(); 119 | Errors.push({ 120 | type: Errors.SYNTAX_ERROR, 121 | msg: "You have only one &", 122 | line: this.currLine 123 | }); 124 | } 125 | break; 126 | case "|": 127 | if (this.reader.nextChar() === "|") { 128 | return this.makeToken(Token.tokens.OR_TOKEN); 129 | } else { 130 | this.reader.retract(); 131 | Errors.push({ 132 | type: Errors.SYNTAX_ERROR, 133 | msg: "You have only one |", 134 | line: this.currLine 135 | }); 136 | } 137 | break; 138 | case -1: 139 | return this.makeToken(Token.tokens.EOS_TOKEN); 140 | case "\r": 141 | case "\n": 142 | this.currLine++; 143 | break; 144 | default: 145 | // ignore them 146 | } 147 | } 148 | break; 149 | case Scanner.IDENTIFIER_STATE: 150 | c = this.reader.nextChar(); 151 | if ((c >= "a" && c <= "z") || (c >= "A" && c <= "Z")) { 152 | bufferStr += c; 153 | } else if (c === -1) { 154 | return this.makeToken(Token.tokens.EOS_TOKEN); 155 | } else { 156 | // stop reading it since it is not a letter anymore 157 | // retract the last character we read because it does not belong to this identfier 158 | this.reader.retract(); 159 | // change back the state to read the next token 160 | this.state = Scanner.START_STATE; 161 | switch (bufferStr) { 162 | case "var": 163 | return this.makeToken(Token.tokens.VAR_TOKEN); 164 | case "int": 165 | case "bool": 166 | //need to pass bufferStr as well to distinguish which type it is 167 | return this.makeToken(Token.tokens.TYPE_TOKEN, bufferStr); 168 | case "true": 169 | case "false": 170 | case "TRUE": 171 | case "FALSE": 172 | return this.makeToken( 173 | Token.tokens.BOOLLITERAL_TOKEN, 174 | bufferStr 175 | ); 176 | case "if": 177 | return this.makeToken(Token.tokens.IF_TOKEN); 178 | case "else": 179 | return this.makeToken(Token.tokens.ELSE_TOKEN); 180 | case "while": 181 | return this.makeToken(Token.tokens.WHILE_TOKEN); 182 | case "print": 183 | return this.makeToken(Token.tokens.PRINT_TOKEN); 184 | default: 185 | return this.makeToken(Token.tokens.IDENTIFIER_TOKEN, bufferStr); 186 | } 187 | } 188 | break; 189 | case Scanner.SLASH_STATE: 190 | d = this.reader.nextChar(); 191 | if (d === "/") { 192 | // line comment 193 | bufferStr = ""; 194 | // reading 1 more char here can prevent the case that a // is followed by a line break char immediately 195 | d = this.reader.nextChar(); 196 | if (d !== "\r" && d !== "\n") { 197 | while (d !== "\r" && d !== "\n") { 198 | bufferStr += d; 199 | d = this.reader.nextChar(); 200 | if (d === -1) { 201 | break; 202 | } 203 | } 204 | // to retract the line break char 205 | this.reader.retract(); 206 | } 207 | this.state = Scanner.START_STATE; 208 | return this.makeToken(Token.tokens.LINECOMMENT_TOKEN, bufferStr); 209 | } else if (d === "*") { 210 | // block comment 211 | bufferStr = ""; 212 | let end = false; 213 | while (!end) { 214 | d = this.reader.nextChar(); 215 | if (d !== -1) { 216 | if (d === "\r" || d === "\n") { 217 | this.currLine++; 218 | } 219 | if (d === "*") { 220 | let e = this.reader.nextChar(); 221 | if (e === "/") { 222 | // meet */ 223 | end = true; 224 | } else { 225 | bufferStr += "*" + e; 226 | } 227 | } else { 228 | bufferStr += d; 229 | } 230 | } else { 231 | end = true; 232 | } 233 | } 234 | this.state = Scanner.START_STATE; 235 | return this.makeToken(Token.tokens.BLOCKCOMMENT_TOKEN, bufferStr); 236 | } else { 237 | this.state = Scanner.START_STATE; 238 | this.reader.retract(); 239 | return this.makeToken(Token.tokens.DIV_TOKEN); 240 | } 241 | } 242 | } 243 | } 244 | } 245 | 246 | Scanner.START_STATE = 1; // every FSM should have a start state 247 | Scanner.IDENTIFIER_STATE = Scanner.START_STATE + 1; 248 | Scanner.SLASH_STATE = Scanner.IDENTIFIER_STATE + 1; 249 | 250 | module.exports = Scanner; 251 | -------------------------------------------------------------------------------- /src/Token.js: -------------------------------------------------------------------------------- 1 | // Token class 2 | // type: Token's type 3 | // text: the actual text that makes this token, may be null if it is not important 4 | 5 | class Token { 6 | constructor(type, text) { 7 | this.type = type; 8 | this.text = text; 9 | this.consumed = false; 10 | } 11 | } 12 | 13 | Token.tokens = {}; 14 | Token.tokens.EOS_TOKEN = 1; // end of stream 15 | // using + 1 allows adding a new token easily later 16 | Token.tokens.COLON_TOKEN = Token.tokens.EOS_TOKEN + 1; 17 | Token.tokens.SEMICOLON_TOKEN = Token.tokens.COLON_TOKEN + 1; 18 | Token.tokens.LEFTPAREN_TOKEN = Token.tokens.SEMICOLON_TOKEN + 1; 19 | Token.tokens.RIGHTPAREN_TOKEN = Token.tokens.LEFTPAREN_TOKEN + 1; 20 | Token.tokens.LEFTBRACE_TOKEN = Token.tokens.RIGHTPAREN_TOKEN + 1; 21 | Token.tokens.RIGHTBRACE_TOKEN = Token.tokens.LEFTBRACE_TOKEN + 1; 22 | Token.tokens.MOD_TOKEN = Token.tokens.RIGHTBRACE_TOKEN + 1; 23 | 24 | Token.tokens.VAR_TOKEN = Token.tokens.MOD_TOKEN + 1; 25 | Token.tokens.TYPE_TOKEN = Token.tokens.VAR_TOKEN + 1; 26 | Token.tokens.BOOLLITERAL_TOKEN = Token.tokens.TYPE_TOKEN + 1; 27 | Token.tokens.INTLITERAL_TOKEN = Token.tokens.BOOLLITERAL_TOKEN + 1; 28 | Token.tokens.IF_TOKEN = Token.tokens.INTLITERAL_TOKEN + 1; 29 | Token.tokens.ELSE_TOKEN = Token.tokens.IF_TOKEN + 1; 30 | Token.tokens.WHILE_TOKEN = Token.tokens.ELSE_TOKEN + 1; 31 | Token.tokens.PRINT_TOKEN = Token.tokens.WHILE_TOKEN + 1; 32 | Token.tokens.IDENTIFIER_TOKEN = Token.tokens.PRINT_TOKEN + 1; 33 | 34 | Token.tokens.PLUS_TOKEN = Token.tokens.IDENTIFIER_TOKEN + 1; 35 | Token.tokens.PLUSPLUS_TOKEN = Token.tokens.PLUS_TOKEN + 1; 36 | Token.tokens.PLUSASSIGN_TOKEN = Token.tokens.PLUSPLUS_TOKEN + 1; 37 | Token.tokens.MINUS_TOKEN = Token.tokens.PLUSASSIGN_TOKEN + 1; 38 | Token.tokens.MINUSMINUS_TOKEN = Token.tokens.MINUS_TOKEN + 1; 39 | Token.tokens.MINUSASSIGN_TOKEN = Token.tokens.MINUSMINUS_TOKEN + 1; 40 | Token.tokens.MULT_TOKEN = Token.tokens.MINUSASSIGN_TOKEN + 1; 41 | Token.tokens.DIV_TOKEN = Token.tokens.MULT_TOKEN + 1; 42 | Token.tokens.ASSIGN_TOKEN = Token.tokens.DIV_TOKEN + 1; 43 | Token.tokens.EQUAL_TOKEN = Token.tokens.ASSIGN_TOKEN + 1; 44 | Token.tokens.NOTEQUAL_TOKEN = Token.tokens.EQUAL_TOKEN + 1; 45 | Token.tokens.GREATER_TOKEN = Token.tokens.NOTEQUAL_TOKEN + 1; 46 | Token.tokens.GREATEREQUAL_TOKEN = Token.tokens.GREATER_TOKEN + 1; 47 | Token.tokens.LESS_TOKEN = Token.tokens.GREATEREQUAL_TOKEN + 1; 48 | Token.tokens.LESSEQUAL_TOKEN = Token.tokens.LESS_TOKEN + 1; 49 | Token.tokens.AND_TOKEN = Token.tokens.LESSEQUAL_TOKEN + 1; 50 | Token.tokens.OR_TOKEN = Token.tokens.AND_TOKEN + 1; 51 | Token.tokens.NOT_TOKEN = Token.tokens.OR_TOKEN + 1; 52 | 53 | Token.tokens.LINECOMMENT_TOKEN = Token.tokens.NOT_TOKEN + 1; 54 | Token.tokens.BLOCKCOMMENT_TOKEN = Token.tokens.LINECOMMENT_TOKEN + 1; 55 | Token.tokens.NEWLINE_TOKEN = Token.tokens.BLOCKCOMMENT_TOKEN + 1; 56 | 57 | Token.backwardMap = {}; // for inverse look-up 58 | 59 | for (let x in Token.tokens) { 60 | if (Token.tokens.hasOwnProperty(x)) { 61 | Token.backwardMap[Token.tokens[x]] = x; 62 | } 63 | } 64 | 65 | module.exports = Token; 66 | -------------------------------------------------------------------------------- /src/example.ws: -------------------------------------------------------------------------------- 1 | //program start 2 | var a:int = 89 error; 3 | var b:bool recovery; 4 | var c:int = 64; 5 | var d:bool; -------------------------------------------------------------------------------- /src/index.js: -------------------------------------------------------------------------------- 1 | const fs = require("fs"); 2 | const Reader = require("./Reader"); 3 | const Scanner = require("./Scanner"); 4 | const Token = require("./Token"); 5 | const Parser = require("./Parser"); 6 | const Errors = require("./Errors"); 7 | 8 | function log(str) { 9 | console.log(str); 10 | } 11 | 12 | function errorLog(str) { 13 | console.error(str); 14 | } 15 | 16 | let dataToBeCompiled = fs.readFileSync("src/example.ws", "utf8"); 17 | let reader = new Reader(dataToBeCompiled); 18 | let scanner = new Scanner(reader); 19 | let parser = new Parser(scanner); 20 | 21 | expressionBlockNode = parser.parse(); 22 | 23 | console.log(expressionBlockNode); 24 | 25 | Errors.each(function(error, i) { 26 | errorLog( 27 | "Line " + error.line + ": (" + Errors.type[error.type] + ") " + error.msg 28 | ); 29 | }); 30 | -------------------------------------------------------------------------------- /yarn.lock: -------------------------------------------------------------------------------- 1 | # THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. 2 | # yarn lockfile v1 3 | 4 | 5 | prettier@1.18.2: 6 | version "1.18.2" 7 | resolved "https://registry.yarnpkg.com/prettier/-/prettier-1.18.2.tgz#6823e7c5900017b4bd3acf46fe9ac4b4d7bda9ea" 8 | integrity sha512-OeHeMc0JhFE9idD4ZdtNibzY0+TPHSpSSb9h8FqtP+YnoZZ1sl8Vc9b1sasjfymH3SonAF4QcA2+mzHPhMvIiw== 9 | --------------------------------------------------------------------------------