├── .editorconfig
├── .gitignore
├── Makefile
├── docs
    ├── 1.md
    ├── 2-1.md
    ├── 2-2.md
    ├── 3-1.md
    ├── 3-2.md
    ├── 3-3.md
    ├── 4.md
    ├── 5.md
    ├── 6-1.md
    ├── 6-2.md
    ├── 7.md
    ├── README.md
    ├── SUMMARY.md
    ├── book.json
    └── images
    │   ├── 2-1-1.png
    │   ├── 2-1-2.png
    │   ├── 2-1-3.png
    │   ├── 3-1-1.png
    │   ├── 3-1-2.png
    │   ├── 3-1-3.png
    │   ├── 3-1-4.png
    │   ├── 3-1-5.gif
    │   ├── 3-2-1.png
    │   ├── 3-2-2.png
    │   ├── 3-3-1.png
    │   ├── 3-3-10.png
    │   ├── 3-3-11.png
    │   ├── 3-3-2.png
    │   ├── 3-3-3.png
    │   ├── 3-3-4.png
    │   ├── 3-3-5.png
    │   ├── 3-3-6.png
    │   ├── 3-3-7.png
    │   ├── 3-3-8.png
    │   ├── 3-3-9.png
    │   └── 5-1.png
├── examples
    ├── 2-1-1.html
    ├── 2-1-2.html
    ├── 2-2-1.html
    ├── 2-2-2.html
    ├── 3-1-1.html
    ├── 3-1-2.html
    ├── 3-1-3.html
    ├── 3-2-1.html
    ├── 3-2-2.html
    ├── 4-1.html
    ├── 4-2.html
    ├── 4-3.html
    ├── 4-4.html
    ├── 4-5.html
    ├── 7-1.html
    ├── 7-2.html
    ├── 7-3.html
    └── step.html
├── package.json
├── src
    ├── Errors.js
    ├── Nodes
    │   ├── ExpressionBlockNode.js
    │   ├── IntNode.js
    │   ├── Node.js
    │   ├── PrintNode.js
    │   └── VariableNode.js
    ├── Parser.js
    ├── Reader.js
    ├── Scanner.js
    ├── Token.js
    ├── example.ws
    └── index.js
└── yarn.lock


/.editorconfig:
--------------------------------------------------------------------------------
1 | root = true
2 | 
3 | [*.js]
4 | charset = utf-8
5 | end_of_line = lf
6 | indent_size = 2
7 | indent_style = space
8 | trim_trailing_whitespace = true
9 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | _book/
2 | build/
3 | public/
4 | node_modules/


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: serve
 2 | 
 3 | serve:
 4 | 	docker run -d --rm -v ${PWD}:/gitbook -p 4000:4000 jaceju/gitbook serve docs
 5 | 
 6 | build:
 7 | 	docker run --rm -v ${PWD}:/gitbook jaceju/gitbook install docs
 8 | 	docker run --rm -v ${PWD}:/gitbook jaceju/gitbook build docs ./build
 9 | 	cp -R build/* public/
10 | 	rm -rf build
11 | 	cd public/ && git add . && git commit -m "Update" && git push && cd ../
12 | 


--------------------------------------------------------------------------------
/docs/1.md:
--------------------------------------------------------------------------------
 1 | # 一、簡介 
 2 | 
 3 | 相信每個 programmer 都跟西杰一樣想過設計一種自己的編程語言，最近西杰就有機會要寫一個編譯器了。雖然在大學時已經讀過如何編寫一個編譯器，但要認真寫起上來還真的不容易，而且網上教寫編譯器的教材不多（尤其中文的），所以就把這次經驗記下來，疏理一下自己在開發過程中所學到的東西，也同時為互聯網增加一些有關編譯器這方面的中文資源吧。
 4 | 
 5 | 西杰在開發過程中經常參考 Actionscript 編譯器的 source code（用 Java 寫的），大家有興趣可以看看這裡（在 `/trunk/modules/asc` 裡）， 是 open source 的。
 6 | 
 7 | 在這個教程中，西杰將會使用 JavaScript 來開發，原因有二。第一，JS 是我最喜愛的編程語言之一，語法簡潔易明，亦較多人認識。第二，可以讓大家在瀏覽器直接運行 Demo，大家不用浪費時間下載本文所舉的例子再執行。
 8 | 
 9 | 整個教程將會分為七個主要單元，除了這篇簡介外，還包括以下六個單元。
10 | 
11 | 二、詞法分析（Lexical analysis）：把字元合併成為詞語
12 | 
13 | 三、語法分析（Syntactic analysis）：把詞語組合成一句有意思的句子
14 | 
15 | 四、語意分析（Semantic analysis）：把句子組成有上文下理的段落，成為有意思的故事。西杰認為這個單元和第三個單元最難，大家要有心理準備
16 | 
17 | 五、虛擬機（Virtual Machine）：用來運行編譯好的程式
18 | 
19 | 六、生成代碼（Code Generation）：把你閱讀完的故事寫出來給虛擬機看
20 | 
21 | 七、優化器（Optimizer）：可以把故事說得簡單一點
22 |  
23 | 好了，那麼我們開始吧，先來看看我們即將開發的語言 ﹣ Wescript （音類似 Westkit，不過要翹舌）的特徵：
24 | 
25 | * 兩種變數類型（variable type）： `bool`, `int`
26 | * 兩種控制結構（Control structure）： `if/else`, `while`
27 | * 註釋（Comment）： `// 單行`, `/* 多行 */`
28 | * 運算符（Operator）： `+`, `-`, `*`, `/`, `%`, `(`, `)`, `&&`, `||`, `!`, `==`, `!=`, `=`, `+=`, `++`, `-=`, `–`
29 | * Static scoping ， `bool` 不能與 `int` 比較，忽略空白符號。
30 | 
31 | 例子：
32 | 
33 | ```js
34 | /*
35 | Wescript
36 | */
37 | var a:int = 1;
38 | var b:int = 2;
39 | var c:bool = true;
40 | if (c){
41 |     print a;
42 | }else{
43 |     print b;
44 | }
45 | var i:int = 0;
46 | while (i < 10){
47 |     print i;
48 |     i++;
49 | }
50 | //WoW
51 | ```
52 | 
53 | 就是這樣了，下一章就會開始做 Scanner 。
54 | 


--------------------------------------------------------------------------------
/docs/2-1.md:
--------------------------------------------------------------------------------
  1 | # 二、掃瞄器（Scanner）﹣詞法分析（Lexical analysis）（上）
  2 | 
  3 | 寫 Compiler 第一步通常都是先寫 Scanner，什麼是 Scanner 呢？這裡只給你初步概念，詳細解釋在維基看吧。試想像有一句英文句子（例子： `”The quick brown fox jumps over the lazy dog” is an English-language pangram.`），人類看英文的方法就是逐個逐個詞語地看，電腦怎樣才能知道要跳過 `”` 雙引號才能讀取第一個詞語呢？那就是要靠 Scanner 來分析了， Scanner 會逐個逐個字元讀進來並且在 “適當時候” 把字元合成一組詞語供後邊的 Parser 做其他處理工作。
  4 | 
  5 | ![](./images/2-1-1.png)
  6 | ![](./images/2-1-2.png)
  7 | 
  8 | 單字元的 Token
  9 | 
 10 | 先來處理比較簡單的單字元 Token 吧，在這裡要先界定一下什麼是單字元 Token（這只是西傑的定義），單字元 Token 的意思是這個 Token 只有一個字元而且不會因後面的字元而有任何歧義，例如 `“:”` 或者 `“;”` 就是了。 `”+”` 是不是單字元 Token 呢？不是，因為 `“+”` 是會有歧義的，它可能是代表 `1 + 1` 中的相加意思，亦可能代表 `i ++` 中加 1 的意思，所以它不是單字元 Token ，而多字元 Token 會在下一節才處理。
 11 | 
 12 | 現在我們要列出所有單字元 Token 。
 13 | 
 14 | ```
 15 | : COLON_TOKEN
 16 | 
 17 | ; SEMICOLON_TOKEN
 18 | 
 19 | ( LEFTPAREN_TOKEN
 20 | 
 21 | ) RIGHTPAREN_TOKEN
 22 | 
 23 | { LEFTBRACE_TOKEN
 24 | 
 25 | } RIGHTBRACE_TOKEN
 26 | 
 27 | % MOD_TOKEN
 28 | ```
 29 | 
 30 | 就這七款了嗎？其實還有一個是 `EOS_TOKEN` ，代表 `end of stream` ，即已經沒有東西可以讀了，用來終止 Scanner 再讀。
 31 | 
 32 | ## Reader
 33 | 
 34 | 現在要開始寫一個 Reader ， Reader 的工作主要是用來逐個逐個字元讀進來，但亦可以退回一個字元下次再讀（這個功能在讀取多字元 Token 會有用），看看 code 吧。
 35 | 
 36 | ```js
 37 | //Reader class
 38 | //str is the data to be read
 39 | 
 40 | function Reader(str){
 41 |     this.data = str;
 42 |     this.currPos = 0;
 43 |     this.dataLength = str.length;
 44 | 
 45 | }
 46 | 
 47 | Reader.prototype.nextChar = function (){
 48 |     if (this.currPos >= this.dataLength){
 49 |         return -1; //end of stream
 50 |     }
 51 | 
 52 |     return this.data[this.currPos++];
 53 | }
 54 | 
 55 | //n is the number of characters to be retracted
 56 | Reader.prototype.retract = function (n){
 57 | 
 58 |     if (n == undefined){
 59 |         n = 1;
 60 |     }
 61 | 
 62 |     this.currPos -= n;
 63 | 
 64 |     if (this.currPos < 0){
 65 |         this.currPos = 0;
 66 |     }
 67 | }
 68 | ```
 69 |  
 70 | 就三個 function ， 一個 constructor ，把要 compile 的字串傳入去，用 `nextChar ()` 來讀取下一個字元，用 `retract ()` 來退回。現在運行一下我們的 tester ，看看 Reader 是否運作正常。
 71 | 
 72 | ```js
 73 | function log(str){
 74 |     $("#log").append(str + "<br />");
 75 | }
 76 | $(function (){
 77 |     //we stored our wescript in <script id="wescript">
 78 |     var dataToBeCompiled = $("#wescript").text();
 79 |     var reader = new Reader(dataToBeCompiled);
 80 |     var retracted = false;
 81 |     while (true){
 82 |         var nextChar = reader.nextChar();
 83 |         if (nextChar == -1){
 84 |             break;
 85 |         }
 86 |         //if it meets !, it will retract once
 87 |         if (nextChar == "!" && !retracted){
 88 |             reader.retract();
 89 |             retracted = true;
 90 |         }
 91 |         log("char: " + nextChar);
 92 |     }
 93 | });
 94 | ```
 95 |  
 96 | 運行結果（想看完整的 source code 就按右鍵看吧，iframe 來的）：
 97 | 
 98 | Reader 就是這麼簡單了，下一步我們要定義一些常數來識別不同的 Token ，而且要定義一個叫做 Token 的 class 來記下讀取了的 Token 。
 99 | 
100 | ```js
101 | //Token class
102 | //type: Token's type
103 | //text: the actual text that makes this token, may be null if it is not important
104 | function Token(type, text){
105 |     this.type = type;
106 |     this.text = text;
107 | }
108 | 
109 | Token.tokens = {};
110 | Token.tokens.EOS_TOKEN = 1; //end of stream
111 | // using + 1 allows adding a new token easily later
112 | Token.tokens.COLON_TOKEN = Token.tokens.EOS_TOKEN + 1;
113 | Token.tokens.SEMICOLON_TOKEN = Token.tokens.COLON_TOKEN + 1;
114 | Token.tokens.LEFTPAREN_TOKEN = Token.tokens.SEMICOLON_TOKEN + 1;
115 | Token.tokens.RIGHTPAREN_TOKEN = Token.tokens.LEFTPAREN_TOKEN + 1;
116 | Token.tokens.LEFTBRACE_TOKEN = Token.tokens.RIGHTPAREN_TOKEN + 1;
117 | Token.tokens.RIGHTBRACE_TOKEN = Token.tokens.LEFTBRACE_TOKEN + 1;
118 | Token.tokens.MOD_TOKEN = Token.tokens.RIGHTBRACE_TOKEN + 1;
119 | Token.backwardMap = {}; //for inverse look-up
120 | 
121 | for (var x in Token.tokens){
122 |     Token.backwardMap[Token.tokens[x]] = x;
123 | }
124 | ```
125 |  
126 | 大家可以看到定義 Token 常數的方法是不斷的 +1 ，這可以方便大家日後想在中間插入新的 Token，如果大家用了 1,2,3,4,5 等數字來定義的話日後要插入一兩個就要把那些數字重新排列……
127 | 
128 | ## Finite-state machine
129 | 
130 | 現在到戲肉了，開始寫 Scanner。呀，開始寫 Scanner 之前，還要先瞭解一樣東西，就是 Finite-state machine ，詳細的大家還是看維基吧，這裡只輕輕解說一下。 FSM 在軟件開發中算是一種模式吧，是指一台機器（我們的 Scanner）經過一些變動（例如我們的 Scanner 會讀取字元）之後在數個有限的狀態徘徊，維基裡有一張圖很淺白地解釋了這個概念。
131 | 
132 | ![](./images/2-1-3.png)
133 | 
134 | 開門關門 FSM
135 | 
136 | FSM 跟 Scanner 有什麼關係呢？我們做 Scanner 時就會用到 FSM 這種模式了，現在我們只做單字元分析未必會用到，但當我們做多字元分析時，我們就經常需要根據我們上一個讀取到的字元來判斷我們下一步要做什麼，這個時候 FSM 就大派用場了，詳細如何使用 FSM 在下一節用到時再說吧，現在真的開始寫 Scanner 了。
137 | 
138 | ## Scanner
139 | 
140 | ```js
141 | //Scanner class
142 | //reader: the reader used to read in characters
143 | function Scanner(reader){
144 |     this.reader = reader;
145 |     this.currentToken = new Token(); //storing the current analysed token
146 |     this.currLine = 0; //the line number of the current line being read
147 |     this.state = Scanner.START_STATE;
148 | }
149 | 
150 | Scanner.START_STATE = 1; //every FSM should have a start state
151 | ```
152 | 
153 | 首先定義一個 constructor，負責初始化我們的 Scanner object 。我們的 Scanner object 有四種東西要記著的：
154 | 
155 | 第一是 reader ，用來讀取字元的。
156 | 
157 | 第二是現在被讀取的 Token ，我們不會每個 Token 都建立新的 object，不必要之餘亦很浪費 memory。
158 | 
159 | 第三是我們現在讀取那一行的行數，做錯誤訊息時用的。
160 | 
161 | 最後是 Scanner 這個 FSM 的狀態。
162 | 
163 | 定義好 constructor 之後就要定義 Scanner 這個 FSM 有哪些狀態了，由於我們這個 Scanner 暫時比較簡單，只需要有一個開始狀態就足夠了。
164 | 
165 | ```js
166 | Scanner.prototype.makeToken = function (type, text){
167 |     this.currentToken.type = type;
168 |     this.currentToken.text = text;
169 |     return type;
170 | }
171 | ```
172 | 
173 | 這個 method 是用來 “製造” 下一個 Token 的，當然你也可以看得出，並不是真正的製造，只是把現在的 Token 換一下數值而已。
174 | 
175 | 為什麼我們不需要建立新的 Token object 呢？
176 | 因為我們不會把整個檔案一次過讀進來並一次過返回所有 Token ，而是當 Parser （下一章才做）需要時才 call 一下 `nextToken ()` 這個 method，這樣可以節省不少 memory 啊！
177 | 
178 | ```js 
179 | Scanner.prototype.nextToken = function(){
180 |     while (true){
181 |         switch (this.state){
182 |             case Scanner.START_STATE:
183 |                 var c = this.reader.nextChar();
184 |                 switch (c){
185 |                     case ":":
186 |                         return this.makeToken(Token.tokens.COLON_TOKEN);
187 |                     break;
188 |                     case ";":
189 |                         return this.makeToken(Token.tokens.SEMICOLON_TOKEN);
190 |                     break;
191 |                     case "(":
192 |                         return this.makeToken(Token.tokens.LEFTPAREN_TOKEN);
193 |                     break;
194 |                     case ")":
195 |                         return this.makeToken(Token.tokens.RIGHTPAREN_TOKEN);
196 |                     break;
197 |                     case "{":
198 |                         return this.makeToken(Token.tokens.LEFTBRACE_TOKEN);
199 |                     break;
200 |                     case "}":
201 |                         return this.makeToken(Token.tokens.RIGHTBRACE_TOKEN);
202 |                     break;
203 |                     case "%":
204 |                         return this.makeToken(Token.tokens.MOD_TOKEN);
205 |                     break;
206 |                     case -1:
207 |                         return this.makeToken(Token.tokens.EOS_TOKEN);
208 |                     break;
209 |                     case "\r": case "\n":
210 |                         this.currLine++;
211 |                     default:
212 |                         //ignore them
213 |                 }
214 |             break;
215 |         }
216 |     }
217 | }
218 | ```
219 | 
220 | 這就是我們的 FSM 了，我們永遠只會處於開始狀態，遇到某個字元時，我們就會看看它是不是我們要的東西，是的話就返回一個新的 Token，不是的話就不理會它，再讀取下一個字元（換行字元我們會用來計算行數，然後再忽略）。我們的簡易 Scanner 就完成了第一步了，現在測試一下吧。
221 | 
222 | ```js
223 | $(function () {
224 |     //we stored our wescript in <script id="wescript">
225 |     var dataToBeCompiled = $("#wescript").text();
226 |     var reader = new Reader(dataToBeCompiled);
227 |     var scanner = new Scanner(reader);
228 |     while (true) {
229 |         var token = scanner.nextToken();
230 |         if (token == Token.tokens.EOS_TOKEN){
231 |             break;
232 |         }
233 |         log("Read token: " + Token.backwardMap[token]);
234 |     }
235 | });
236 | ```
237 | 
238 | 這就是把我們讀到的 Token 都列印出來（當然只會讀取到我們想處理的那七個 Token），看看結果吧。
239 | 
240 | 這一節就到此為止了，明天會出下一節，教大家寫一個能處理多字元 Token 的 Scanner 。


--------------------------------------------------------------------------------
/docs/2-2.md:
--------------------------------------------------------------------------------
  1 | # 二、掃瞄器（Scanner）﹣詞法分析（Lexical analysis）（下）
  2 | 
  3 | 繼續上一節未完成的 Scanner 吧，上一節我們寫好了一個 Reader ，可以逐個逐個字元讀取，有需要時又可以退回 n 個字元之後再讀（本節將會使用這個功能）。另外，上一節亦寫好了一個簡單的 Scanner ，可以讀取七款單字元 Token ，並忽略其他字元。本節將會教大家如何建立多字元 Token ，過程將會利用 FSM 來分析字元，忘記了什麼是 FSM 的朋友請到上一節回顧一下嚕。
  4 | 
  5 | ## 多字元的 Token
  6 | 
  7 | 首先我們要讀取含有英文字的 Token，含有英文字的 Token 如下：
  8 | 
  9 | ```
 10 | var VAR_TOKEN
 11 | 
 12 | int TYPE_TOKEN
 13 | 
 14 | bool TYPE_TOKEN
 15 | 
 16 | true, false, TRUE, FALSE BOOLLITERAL_TOKEN
 17 | 
 18 | if IF_TOKEN
 19 | 
 20 | else ELSE_TOKEN
 21 | 
 22 | while WHILE_TOKEN
 23 | 
 24 | print PRINT_TOKEN
 25 | 
 26 | 其他英文字 IDENTIFIER_TOKEN
 27 | ```
 28 | 
 29 | 在程式中先定義一下這些 Token 吧，承接著之前的 Token 定義，增加以下的定義。
 30 | 
 31 | ```js
 32 | Token.tokens.VAR_TOKEN = Token.tokens.MOD_TOKEN + 1;
 33 | Token.tokens.TYPE_TOKEN = Token.tokens.VAR_TOKEN + 1;
 34 | Token.tokens.BOOLLITERAL_TOKEN = Token.tokens.TYPE_TOKEN + 1;
 35 | Token.tokens.IF_TOKEN = Token.tokens.BOOLLITERAL_TOKEN + 1;
 36 | Token.tokens.ELSE_TOKEN = Token.tokens.IF_TOKEN + 1;
 37 | Token.tokens.WHILE_TOKEN = Token.tokens.ELSE_TOKEN + 1;
 38 | Token.tokens.PRINT_TOKEN = Token.tokens.WHILE_TOKEN + 1;
 39 | Token.tokens.IDENTIFIER_TOKEN = Token.tokens.PRINT_TOKEN + 1;
 40 |  ```
 41 | 
 42 | 現在要修改我們的 FSM，遇見有英文字的時候我們就要轉換一下 FSM 的狀態，轉到讀取整個英文詞的狀態，我稱它為 `IDENTIFIER_STATE` 吧。
 43 | 
 44 | ```js
 45 | Scanner.IDENTIFIER_STATE = Scanner.START_STATE + 1;
 46 |  
 47 | case Scanner.START_STATE:
 48 |     var c = this.reader.nextChar();
 49 |     if ((c >= "a" && c <= "z") || (c >= "A" && c <= "Z")) {
 50 |         this.state = Scanner.IDENTIFIER_STATE;
 51 |         //we need to remember what the token's text is
 52 |         bufferStr = c;
 53 |     } else {
 54 |         switch (c) {
 55 |             case ":":
 56 |                 return this.makeToken(Token.tokens.COLON_TOKEN);
 57 |             break;
 58 |             //...and more written in the previous section
 59 |         }
 60 |     }
 61 |     break;
 62 | ```
 63 | 
 64 | 這裡我們修改了先前寫的 `START_STATE` ，現在只要一遇到英文字， FSM 的狀態就會改變為 `IDENTIFIER_STATE` 。除了改變了狀態之外，我們還要記下剛讀進來的字元到 `bufferStr` 中，因為後面我們可能需要用它來分辨那個 Token 真正的意思，例如 `true` `false` ，我們只會有一個 Token 叫做 `BOOLLITERAL_TOKEN` ，所以我們需要用 `“true”` 或者 `“false”` 來記住這個 Token 真正的意思了。
 65 | 
 66 | 為什麼不創造兩個 Token ，一個叫做 `TRUE_TOKEN` ，一個叫做 `FALSE_TOKEN` 呢？
 67 | 
 68 | 其實分別不大，只在乎你想把工作留到 Parser 才處理還是現在就分好，我個人比較偏好把相同類型（即後面處理方法大同小異的）的字合併成一個 Token 。
 69 | 
 70 | 好了，現在要寫一寫 `IDENTIFIER_STATE` 了。
 71 | 
 72 | ```js
 73 | case Scanner.IDENTIFIER_STATE:
 74 |     var c = this.reader.nextChar();
 75 |     if ((c >= "a" && c <= "z") || (c >= "A" && c <= "Z")) {
 76 |         bufferStr += c;
 77 |     } else {
 78 |         //stop reading it since it is not a letter anymore
 79 |         //retract the last character we read because it does not belong to this identfier
 80 |         this.reader.retract();
 81 |         //change back the state to read the next token
 82 |         this.state = Scanner.START_STATE;
 83 |         switch (bufferStr) {
 84 |             case "var":
 85 |                 return this.makeToken(Token.tokens.VAR_TOKEN);
 86 |             case "int": case "bool":
 87 |                 //need to pass bufferStr as well to distinguish which type it is
 88 |                 return this.makeToken(Token.tokens.TYPE_TOKEN, bufferStr);
 89 |             case "true": case "false":
 90 |             case "TRUE": case "FALSE":
 91 |                 return this.makeToken(Token.tokens.BOOLLITERAL_TOKEN, bufferStr);
 92 |             case "if":
 93 |                 return this.makeToken(Token.tokens.IF_TOKEN);
 94 |             case "else":
 95 |                 return this.makeToken(Token.tokens.ELSE_TOKEN);
 96 |             case "while":
 97 |                 return this.makeToken(Token.tokens.WHILE_TOKEN);
 98 |             case "print":
 99 |                 return this.makeToken(Token.tokens.PRINT_TOKEN);
100 |             default:
101 |                 return this.makeToken(Token.tokens.IDENTIFIER_TOKEN, bufferStr);
102 |         }
103 |     }
104 |     break;
105 | ```
106 | 
107 | 在 `IDENTIFIER_STATE` 中要處理幾件事，第一讀取下一個字元，如果這個字元仍然是英文字的話就把它加到 `bufferStr` 中，不用改變 FSM 狀態，繼續讀取下一個字元。當讀進來的字元不是英文字的時候，我們就可以改變 FSM 狀態，把它轉變為 `START_STATE` 以讀取下一個 Token 。
108 | 
109 | 切記要把最後一個讀進來的字元退回，因為這個字元並不屬於這個 Token 的，不能鳩占人家的鵲巢。
110 | 
111 | 又，判斷一下 `bufferStr` 中的字是不是關鍵字（Reserved word），如果是的話就返回相對應的 Token ，不然就把它統稱為 `IDENTIFIER_TOKEN` ，留給 Parser 做語法分析時再判斷如何處理它。
112 | 
113 | 現在執行一下我們的 Scanner ，看它是否運作正常。
114 | 
115 | 注意， `var a:bool = true;` 的那個 `“=”` 沒有被建立為一個 Token，因為我們根本還未處理。
116 | 
117 | 現在就把餘下的 Token 都定義過來吧。
118 | 
119 | ```js
120 | Token.tokens.PLUS_TOKEN = Token.tokens.IDENTIFIER_TOKEN + 1;
121 | Token.tokens.PLUSPLUS_TOKEN = Token.tokens.PLUS_TOKEN + 1;
122 | Token.tokens.PLUSASSIGN_TOKEN = Token.tokens.PLUSPLUS_TOKEN + 1;
123 | Token.tokens.MINUS_TOKEN = Token.tokens.PLUSASSIGN_TOKEN + 1;
124 | Token.tokens.MINUSMINUS_TOKEN = Token.tokens.MINUS_TOKEN + 1;
125 | Token.tokens.MINUSASSIGN_TOKEN = Token.tokens.MINUSMINUS_TOKEN + 1;
126 | Token.tokens.MULT_TOKEN = Token.tokens.MINUSASSIGN_TOKEN + 1;
127 | Token.tokens.DIV_TOKEN = Token.tokens.MULT_TOKEN + 1;
128 | Token.tokens.ASSIGN_TOKEN = Token.tokens.DIV_TOKEN + 1;
129 | Token.tokens.EQUAL_TOKEN = Token.tokens.ASSIGN_TOKEN + 1;
130 | Token.tokens.NOTEQUAL_TOKEN = Token.tokens.EQUAL_TOKEN + 1;
131 | Token.tokens.GREATER_TOKEN = Token.tokens.NOTEQUAL_TOKEN + 1;
132 | Token.tokens.GREATEREQUAL_TOKEN = Token.tokens.GREATER_TOKEN + 1;
133 | Token.tokens.LESS_TOKEN = Token.tokens.GREATEREQUAL_TOKEN + 1;
134 | Token.tokens.LESSEQUAL_TOKEN = Token.tokens.LESS_TOKEN + 1;
135 | Token.tokens.AND_TOKEN = Token.tokens.LESSEQUAL_TOKEN + 1;
136 | Token.tokens.OR_TOKEN = Token.tokens.AND_TOKEN + 1;
137 | Token.tokens.NOT_TOKEN = Token.tokens.OR_TOKEN + 1;
138 | Token.tokens.LINECOMMENT_TOKEN = Token.tokens.NOT_TOKEN + 1;
139 | Token.tokens.BLOCKCOMMENT_TOKEN = Token.tokens.LINECOMMENT_TOKEN + 1;
140 | ```
141 | 
142 | 接著就是在 `START_STATE` 裡增加一段辨認以上字元的 logic 了。
143 | 
144 | ```js
145 | case "!":
146 |     if (this.reader.nextChar() == "=") {
147 |         return this.makeToken(Token.tokens.NOTEQUAL_TOKEN);
148 |     } else {
149 |         this.reader.retract();
150 |         return this.makeToken(Token.tokens.NOT_TOKEN);
151 |     }
152 |     break;
153 | case "+":
154 |     var d = this.reader.nextChar();
155 |     if (d == "=") {
156 |         return this.makeToken(Token.tokens.PLUSASSIGN_TOKEN);
157 |     } else if (d == "+") {
158 |         return this.makeToken(Token.tokens.PLUSPLUS_TOKEN);
159 |     } else {
160 |         this.reader.retract();
161 |         return this.makeToken(Token.tokens.PLUS_TOKEN);
162 |     }
163 |     break;
164 | case "-":
165 |     var d = this.reader.nextChar();
166 |     if (d == "=") {
167 |         return this.makeToken(Token.tokens.MINUSASSIGN_TOKEN);
168 |     } else if (d == "-") {
169 |         return this.makeToken(Token.tokens.MINUSMINUS_TOKEN);
170 |     } else {
171 |         this.reader.retract();
172 |         return this.makeToken(Token.tokens.MINUS_TOKEN);
173 |     }
174 |     break;
175 | case "*":
176 |     return this.makeToken(Token.tokens.MULT_TOKEN);
177 |     break;
178 | case "=":
179 |     if (this.reader.nextChar() == "=") {
180 |         return this.makeToken(Token.tokens.EQUAL_TOKEN);
181 |     } else {
182 |         this.reader.retract();
183 |         return this.makeToken(Token.tokens.ASSIGN_TOKEN);
184 |     }
185 |     break;
186 | case ">":
187 |     if (this.reader.nextChar() == "=") {
188 |         return this.makeToken(Token.tokens.GREATEREQUAL_TOKEN);
189 |     } else {
190 |         this.reader.retract();
191 |         return this.makeToken(Token.tokens.GREATER_TOKEN);
192 |     }
193 |     break;
194 | case "<":
195 |     if (this.reader.nextChar() == "=") {
196 |         return this.makeToken(Token.tokens.LESSEQUAL_TOKEN);
197 |     } else {
198 |         this.reader.retract();
199 |         return this.makeToken(Token.tokens.LESS_TOKEN);
200 |     }
201 |     break;
202 | ```
203 | 
204 | 這裡有幾個字元還未被處理，因為它們比較特別，待會再談。以上一段程式有很多個 case ，但它們做的大致上都差不多，就是當遇到某個字元（例如 `“+”`）時，就多讀一個字元，如果這個兩個字元連在一起是有特別意思的話就先返回這個” 詞 “（例如 `“++”`），否則就只返回自己成為單字元 Token 。現在再看看如何處理 `“/”` 這個特別字元。
205 | 
206 | ## SLASH_STATE
207 | 
208 | 第一步是要增加一個叫做 `SLASH_STATE` 的狀態。
209 | 
210 | ```js
211 | Scanner.SLASH_STATE = Scanner.IDENTIFIER_STATE + 1;
212 | ```
213 | 
214 | 然後在 `START_STATE` 增加一個 case ，遇到 `“/”` 時就要轉到 `SLASH_STATE` 。
215 | 
216 | ```js
217 | case "/":
218 |     this.state = Scanner.SLASH_STATE;
219 |     break;
220 | ```
221 | 
222 | 最後在 `SLASH_STATE` 處理三個情況， line comment 、 block comment 及除號。
223 | 
224 | ```js
225 | case Scanner.SLASH_STATE:
226 |     var d = this.reader.nextChar();
227 |     if (d == "/") {
228 |         //line comment
229 |         bufferStr = "";
230 |         //reading 1 more char here can prevent the case that a // is followed by a line break char immediately
231 |         d = this.reader.nextChar();
232 |         if (d != "\r" && d != "\n") {
233 |             while (d != "\r" && d != "\n") {
234 |                 bufferStr += d;
235 |                 d = this.reader.nextChar();
236 |             }
237 |             //to retract the line break char
238 |             this.reader.retract();
239 |         }
240 |         this.state = Scanner.START_STATE;
241 |         return this.makeToken(Token.tokens.LINECOMMENT_TOKEN, bufferStr);
242 |     } else if (d == "*") {
243 |         //block comment
244 |         bufferStr = "";
245 |         var end = false;
246 |         while (! end) {
247 |             d = this.reader.nextChar();
248 |             if (d != -1) {
249 |                 if (d == "\r" || d == "\n") {
250 |                     this.currLine++;
251 |                 }
252 |                 if (d == "*") {
253 |                     var e = this.reader.nextChar();
254 |                     if (e == "/") {
255 |                         //meet */
256 |                         end = true;
257 |                     } else {
258 |                         bufferStr += "*" + e;
259 |                     }
260 |                 } else {
261 |                     bufferStr += d;
262 |                 }
263 |             } else {
264 |                 end = true;
265 |             }
266 |         }
267 |         this.state = Scanner.START_STATE;
268 |         return this.makeToken(Token.tokens.BLOCKCOMMENT_TOKEN, bufferStr);
269 |     } else {
270 |         this.state = Scanner.START_STATE;
271 |         this.reader.retract();
272 |         return this.makeToken(Token.tokens.DIV_TOKEN);
273 |     }
274 |     break;
275 | ```
276 | 
277 | 大家可以研究一下 `SLASH_STATE` 的 source code ，但其實當中的 logic 都不太困難，如果下一個字元是 `“*”` 或者 `“/”` 的話就代表它是 comment ，那就一直讀到 “完畢” 為止，否則就代表它只是除號 `“/”` ， retract 之後就可以返回了。
278 | 
279 | 為什麼處理 `“/”` 時要用一個新的狀態來處理，其他如 `“+”` 又不用呢？
280 | 
281 | 其實用不用另一個狀態來處理都可以，沒有一個客觀的標準，西傑只能說兩個” 可能 “需要另開狀態的例子。第一個是當 logic 比較長的時候，就要考慮使用新的狀態，以避免代碼太過混亂或者太多縮排（indentation）。第二個情況是，如果開一個新狀態可以減少代碼重複的話就要開了，本教程沒有這種情況，大家想知更多的話看看 Actionscript compiler 的 Scanner 吧，當中處理 exponent 就會被整數和小數 literal 重用。
282 | 
283 | ## 錯誤匯報
284 | 
285 | 做完了沒有？細心閱讀的話就會發現還欠了 “&” 和 “|” 的處理，因為它們都有別於以上情況。看看以下代碼：
286 | 
287 | ```js
288 | case "&":
289 |     if (this.reader.nextChar() == "&") {
290 |         return this.makeToken(Token.tokens.AND_TOKEN);
291 |     } else {
292 |         this.reader.retract();
293 |     }
294 |     break;
295 | case "|":
296 |     if (this.reader.nextChar() == "|") {
297 |         return this.makeToken(Token.tokens.OR_TOKEN);
298 |     } else {
299 |         this.reader.retract();
300 |     }
301 |     break;
302 | ```
303 | 
304 | 那就好了嗎？不！如果遇上一個 `“&”` 或者一個 `“|”` 的話，在 Wescript 來說是 syntax error ！那有 syntax error 要怎麼辦？當然是告訴用家啦。
305 | 
306 | ```js
307 | case "&":
308 |     if (this.reader.nextChar() == "&") {
309 |         return this.makeToken(Token.tokens.AND_TOKEN);
310 |     } else {
311 |         this.reader.retract();
312 |         Errors.push({
313 |             type: Errors.SYNTAX_ERROR,
314 |             msg: "You have only one &",
315 |             line: this.currLine
316 |         });
317 |     }
318 |     break;
319 | case "|":
320 |     if (this.reader.nextChar() == "|") {
321 |         return this.makeToken(Token.tokens.OR_TOKEN);
322 |     } else {
323 |         this.reader.retract();
324 |         Errors.push({
325 |             type: Errors.SYNTAX_ERROR,
326 |             msg: "You have only one |",
327 |             line: this.currLine
328 |         });
329 |     }
330 |     break;
331 | ```
332 | 
333 | 在最後就是在 Tester 中 iterate 一下所有錯誤並告訴用家了。現在看看運行結果。
334 | 
335 | 這裡沒有提及數字處理部份，因為數字的處理方法沒有什麼特別之處，只要一直讀進來讀到沒有數字就可以了。
336 | 
337 | 修正：網友指出如果程式末端不是空白的話會出現無限循環，原因是當程式語法在程式的末端出錯時， retract 會把 reader 退到上一個字元，永遠停不了！
338 | 
339 | 解決方法有二：一、每次編譯前都在程式前後加一個空白字元；二、在 retract 時檢查程式到了末端沒有，到了就不能再 retract ，現提供修正檔案！感謝網友 ”KJlmfe” 指出問題。
340 | 
341 | 大功告成！ Scanner 終於寫好了，感受到 FSM 帶來的好處了沒有？使用 FSM 模式來開發，代碼簡單易明，亦很 scalable ，要隨時加多兩種 Token 都可以。現在大家也可以試試自己開發一個 Scanner ，創造一種屬於你自己的語言啦（其實還有很長的一段路……）！下週會開始寫 Parser，大家記得先讀熟這章的 Scanner 啊！
342 | 
343 | 下周同樣時間，再見！


--------------------------------------------------------------------------------
/docs/3-1.md:
--------------------------------------------------------------------------------
  1 | # 三、語法分析器（Parser）﹣語法分析（Syntactic analysis）（上）
  2 | 
  3 | 現在我們有了 Scanner 幫我們把字元合併成 Token 了，那下一步要做什麼呢？就是要把 Token 組合成有意思的 “句子” 了，這一步我們稱為語法分析（Syntactic analysis），而負責做這項工作的程式我們稱之為語法分析器（Parser）。
  4 | 
  5 | ![](./images/3-1-1.png)
  6 | 
  7 | ## Parse tree
  8 | 
  9 | Parser 要做的工作就是，讀取 Scanner 分析出來的 Token，然後建立並返回一棵 Parse tree 給後面做語意分析（下一章再談談）。什麼是 Parse tree 呢？維基上的解釋是：
 10 | 
 11 | > an ordered, rooted tree that represents the syntactic structure of a string according to some formal grammar
 12 | 
 13 | 意思就是一棵可以形容到你寫的句子的樹了，例如你的句子是 `if a then b else c` ，那你的樹就可能會是這樣了：
 14 | 
 15 | ![](./images/3-1-2.png)
 16 | 
 17 | 為什麼不用一些現成的工具來自動生成 Parser 呢？
 18 | 
 19 | 其實市面上有一些很好的工具可以根據大家寫下的規則來生成 Scanner 及 Parser，例如 [ANTLR](https://www.antlr.org/) ， [Flex](https://github.com/westes/flex)（生成 Scanner） 及 [Bison](https://www.gnu.org/software/bison/)（生成 Parser），不用它們的原因有三：第一、用了它們的話西傑就沒工作嚕。第二、它們大部份的 documentation 做得很差，而且網上缺乏教學，要學習的話要花很多時間，然而比起自己寫 Scanner 及 Parser 又快不了多少。第三，自己寫的通常會更加 user-friendly ，可以輸出更多種錯誤訊息給開發人員 debug 。更多討論可以在[這裡](https://softwareengineering.stackexchange.com/questions/17824/should-i-use-a-parser-generator-or-should-i-roll-my-own-custom-lexer-and-parser)看看。
 20 | 
 21 | ## Top-down parser
 22 | 
 23 | 我們可以根據 Parser 建立 Parse tree 的方法來分類，一種是 Top-down parsing，另一種是 Bottom-up parsing。Top-down parsing 的意思就是先由 root 開始，一步一步地把 descendant 加到 root 之下， Bottom-up parsing 就是剛好相反，由 leaf 開始建立到 root，還是那句吧，想知得更詳細就看[維基](http://en.wikipedia.org/wiki/Parsing#Types_of_parser)吧。
 24 | 
 25 | 我們這裡會做的是 Recursive descent parser ，是其中一種 Top-down parser ，它是由多個 mutually recursive functions 組成的，什麼是 mutually recursive functions ？即是各個 function 都可能會互相利用來做 recursion ，看看代碼你就會大概知道是什麼意思了。
 26 | 
 27 | ![](./images/3-1-3.png)
 28 | 
 29 | 取自維基
 30 | 
 31 | 除此之外，我們的 Recursive descent parser 還會使用 lookahead 的技巧，lookahead 在我們 Parser 裡的意思就是多看一個 Token 再決定走向，類似我們之前做 Scanner 時的 nextChar-retract 技巧。
 32 | 
 33 | 廢話說完了，正式開始寫 code，第一步我們要為我們的 Parser 建立讀取 Token 的 function 以及 lookahead 的 function 。
 34 | 
 35 | ```js
 36 | //Parser class
 37 | function Parser(scanner) {
 38 |     this.scanner = scanner;
 39 |     this.currentToken = new Token();
 40 |     this.lookaheadToken = new Token();
 41 |     this.lookaheadToken.consumed = true;
 42 | }
 43 | Parser.prototype.nextToken = function () {
 44 |     if (this.lookaheadToken.consumed) {
 45 |         var token = this.scanner.nextToken();
 46 |         //skip comments
 47 |         while (token == Token.tokens.LINECOMMENT_TOKEN || token == Token.tokens.BLOCKCOMMENT_TOKEN) {
 48 |             token = this.scanner.nextToken();
 49 |         }
 50 |         this.currentToken.type = token;
 51 |         this.currentToken.text = this.scanner.currentToken.text;
 52 |         return token;
 53 |     } else {
 54 |         this.currentToken.type = this.lookaheadToken.type;
 55 |         this.currentToken.text = this.lookaheadToken.text;
 56 |         this.lookaheadToken.consumed = true;
 57 |         return this.currentToken.type;
 58 |     }
 59 | }
 60 | Parser.prototype.lookahead = function () {
 61 |     if (this.lookaheadToken.consumed) {
 62 |         var token = this.scanner.nextToken();
 63 |         //skip comments
 64 |         while (token == Token.tokens.LINECOMMENT_TOKEN || token == Token.tokens.BLOCKCOMMENT_TOKEN) {
 65 |             token = this.scanner.nextToken();
 66 |         }
 67 |         this.lookaheadToken.type = token;
 68 |         this.lookaheadToken.text = this.scanner.currentToken.text;
 69 |         this.lookaheadToken.consumed = false;
 70 |         return token;
 71 |     } else {
 72 |         return this.lookaheadToken.type;
 73 |     }
 74 | }
 75 | ```
 76 | 
 77 | `nextToken ()` 要處理的事情有兩項，第一是看看 lookahead 的 Token 被使用了沒有，使用了的話就由 Scanner 再讀取下一個 Token ，否則只需使用 lookahead 的 Token 。第二是忽略所有 comment ，當然如果在你的語言中 comment 是有特別作用的話（如 PHP 中有些 library 會用 comment 來做 Annotation）你也可以保留。
 78 | 
 79 | `lookahead ()` 的作用就是 lookahead 嚕，不過如果已經有一個 lookahead 了的 Token 就不要再讀下一個了（直至它被使用了），一般來說能夠 lookahead 一個 Token 已經足夠，如不足夠的話，大概是因為你設計的語言很複雜……
 80 | 
 81 | 現在寫一個 Tester 程式來試試這些功能吧。
 82 | 
 83 | ```js
 84 | var dataToBeCompiled = $("#wescript").text();
 85 | var reader = new Reader(dataToBeCompiled);
 86 | var scanner = new Scanner(reader);
 87 | var parser = new Parser(scanner);
 88 | while (true) {
 89 |     if (parser.lookahead() == Token.tokens.PLUSPLUS_TOKEN) {
 90 |         log("lookahead: PLUSPLUS_TOKEN");
 91 |     }
 92 |     if (parser.lookahead() == Token.tokens.PLUSPLUS_TOKEN) {
 93 |         log("lookahead again: PLUSPLUS_TOKEN");
 94 |     }
 95 |     var token = parser.nextToken();
 96 |     log("Token: " + Token.backwardMap[token]);
 97 |     if (token == Token.tokens.EOS_TOKEN) {
 98 |         break;
 99 |     }
100 | }
101 | ```
102 | 
103 | 如果 lookahead 遇到 `++` 的話就會輸出兩次 `“lookahead: …”` 那句，這可以用來試試是否只能 lookahead 一個 Token ，亦可以試試 `nextToken ()` 是否能先讀取 lookahead Token 然後才讀下一個 Token ，再者也要試試是否真的忽略了 comment 。
104 | 
105 | 運作正常！現在可以開始讀 Wescript 的 syntax 了，首先我們要準備一個入口給人調用這個 Parser。
106 | 
107 | ```js
108 | //the entry point of our parser
109 | Parser.prototype.parse = function () {
110 |     var rootBlock = new ExpressionBlockNode();
111 |     this.parseExpressions(rootBlock);
112 |     return rootBlock;
113 | }
114 | ```
115 | 
116 | Wescript 是由 N 句 expression 組成的，那入口當然是要 `parseExpressions` 啦， `parseExpressions` 要做的東西就是分析 Wescript 並且把程式分為 N 句 expression，將其寫到 `ExpressionBlockNode` 裡。
117 | 
118 | ```js
119 | //to parse a list of expressions
120 | Parser.prototype.parseExpressions = function (expressionBlockNode) {
121 |     while (this.lookahead() != Token.tokens.RIGHTBRACE_TOKEN &&
122 |             this.lookahead() != Token.tokens.EOS_TOKEN) {
123 |         var expressionNode = this.parseExpression();
124 |         if (expressionNode) {
125 |             expressionBlockNode.push(expressionNode);
126 |         }
127 |     }
128 | }
129 | ```
130 | 
131 | 就是一直 lookahead ，如果未遇到 `“}”` `RIGHTBRACE_TOKEN` 或者未到結尾的話就一直 `parseExpression` 下去。
132 | 
133 | 為什麼是用 `lookahead` 而不是用 `nextChar` 呢？
134 | 
135 | 因為 `parseExpressions` 不能夠亦不應該 “吃” 掉屬於 expression 的 Token（而不是屬於 expressions 的），不然當你 `parseExpression` 的時候那句 expression 就會少了一個 Token 。
136 | 
137 | ```js
138 | //to parse an expression
139 | Parser.prototype.parseExpression = function () {
140 |     switch (this.lookahead()) {
141 |         case Token.tokens.PRINT_TOKEN:
142 |             var printToken = this.nextToken();
143 |             var expressionNode = this.parseExpression();
144 |             return new PrintNode(expressionNode);
145 |             break;
146 |         case Token.tokens.INTLITERAL_TOKEN:
147 |             var intToken = this.nextToken();
148 |             return new IntNode(this.currentToken.text);
149 |             break;
150 |         default:
151 |             //unexpected, consume it
152 |             this.nextToken();
153 |     }
154 | }
155 | ```
156 | 
157 | `parseExpression` 做什麼呢？暫時只處理兩種 expression ，一種是 `“print”` ，一種是數字，當我們遇到 `PRINT_TOKEN` 或者 `INTLITERAL_TOKEN` 時就會建立並返回相應的 Node ，如果是 `PRINT_TOKEN` 的話更會再 `parseExpression` 一下，因為 `print` 後面是要配上一個 expression 才可以的嘛（看到嗎？這就是 syntactic level 的規則了）。現在修改一下 Tester 程式再試試運行一下吧，今次會用 `console.log ()` 來查看一下建立出來的樹是否正確。
158 | 
159 | ![](./images/3-1-4.png)
160 | 
161 | 看看 console。
162 | 
163 | `root` 就是 `ExpressionBlockNode` 了，下面是由 N（這裡是 2）句 expression 組成，其中一句就是 `print` 了，所以建立了 `PrintNode` 。
164 | 
165 | ## 錯誤匯報
166 | 
167 | 注意到 `expressions [1]` 那個 `PrintNode` 的 `expressionNode` 是 `undefined` 嗎？因為那句 Wescript 是 `print;` ，所以分析不到下一句 expression 了，如果我想 `print` 後面一定要有 expression 的話，那這個情況就算是 syntax error 了，我們可以怎樣告訴開發人員這個錯誤呢？那就要修改一下 `PRINT_TOKEN` 那個 case 了。
168 | 
169 | ```js
170 | case Token.tokens.PRINT_TOKEN:
171 |     var printToken = this.nextToken();
172 |     var expressionNode = this.parseExpression();
173 |     if (expressionNode == undefined) {
174 |         Errors.push({
175 |             type: Errors.SYNTAX_ERROR,
176 |             msg: "Missing an expression after \"print\"",
177 |             line: this.scanner.currLine
178 |         });
179 |     }
180 |     return new PrintNode(expressionNode);
181 |     break;
182 | ```
183 | 
184 | 加多一個 `if` 去驗證一下 `expressionNode` 是不是一個正常的 Node ，不是的話就匯報錯誤。再看一看運行結果。
185 | 
186 | 錯誤訊息出來了。 ![](./images/3-1-5.gif)
187 | 
188 | 這一節就到此為止，大家先理清以上數個概念吧，下一節會再教大家寫一些更複雜的 expression 。


--------------------------------------------------------------------------------
/docs/3-2.md:
--------------------------------------------------------------------------------
  1 | # 三、語法分析器（Parser）﹣語法分析（Syntactic analysis）（中）
  2 | 
  3 | 上回提到，我們要寫一個 Recursive descent parser ，從 Scanner 一直讀 Token 進來，並建立一棵 Parse tree 。在建立途中，我們還用了 lookahead 的技巧，就是偷看下一個 Token 但是不會佔用它，原因是我們需要用它來判斷下一步怎麼走，但是我們又不能用掉它，否則到下一步時我們就用不到了。現在我們就來完成餘下的部份吧！
  4 | 
  5 | 上回我們只處理了 `print` 和數字，現在我們來處理一下 `var` 吧。 `var` 主要有兩種寫法，第一種是純粹定義一個變數，即：
  6 | 
  7 | ```js
  8 | var a:bool;
  9 | ```
 10 | 
 11 | 第二種是除了定義變數之外，還會 initialise 一下那個變數，即：
 12 | 
 13 | ```js
 14 | var a:int = 1;
 15 | ```
 16 | 
 17 | 那我們先來定義一個新的 Node 吧。
 18 | 
 19 | ```js
 20 | function VariableNode(varName, type, initExpressionNode) {
 21 |     this.varName = varName;
 22 |     this.type = type;
 23 |     this.initExpressionNode = initExpressionNode;
 24 | }
 25 | extend(VariableNode, Node);
 26 | ```
 27 | 
 28 | `VariableNode` 要記著三樣資料，第一是變數的名稱，第二是變數的類型，第三是 initialise 的 expression（這個可以是 `null`）。然後我們要加多一個 case 到 `parseExpression` 中
 29 | 
 30 | ```js
 31 | case Token.tokens.VAR_TOKEN:
 32 |     return this.parseVarExpression();
 33 |     break;
 34 | ```
 35 | 
 36 | 遇到 `var` 時就要啟動 `parseVarExpression` 程序：
 37 | 
 38 | ```js
 39 | Parser.prototype.parseVarExpression = function () {
 40 |     //consume "var"
 41 |     this.nextToken();
 42 |     //expecting an identifier
 43 |     if (this.lookahead() == Token.tokens.IDENTIFIER_TOKEN) {
 44 |         this.nextToken();
 45 |         var varName = this.currentToken.text;
 46 |         //consume a colon
 47 |         if (this.nextToken() != Token.tokens.COLON_TOKEN) {
 48 |             this.skipError();
 49 |             return;
 50 |         }
 51 |         //type token
 52 |         if (this.lookahead() != Token.tokens.TYPE_TOKEN) {
 53 |             this.skipError();
 54 |             return;
 55 |         }
 56 |         this.nextToken();
 57 |         var typeName = this.currentToken.text;
 58 |         var initNode;
 59 |         //check if it has initialization expression
 60 |         if (this.lookahead() == Token.tokens.ASSIGN_TOKEN) {
 61 |             initNode = this.parseSimpleAssignmentExpression();
 62 |         }
 63 |         return new VariableNode(varName, typeName, initNode);
 64 |     }
 65 |     this.skipError();
 66 | }
 67 | Parser.prototype.parseSimpleAssignmentExpression = function () {
 68 |     //consume the "=" sign
 69 |     this.nextToken();
 70 |     var expressionNode = this.parseExpression();
 71 |     return expressionNode;
 72 | }
 73 | ```
 74 | 
 75 | Parse `“var”` 的方法其實就是用 `lookahead` 同 `nextToken` 來讀取 Token，然後再檢查一下這個 Token 是不是我們想要的 Token，如果不是的話就 call 一下 `skipError` 來跳過接下來的 Token，到最後就檢查一下 `“=”` 有沒有出現，出現了的話就再 parse 接下來的 initialisation expression 。現在看看 `skipError` 的做法：
 76 | 
 77 | ```js
 78 | //a naive implementation for skipping error
 79 | Parser.prototype.skipError = function () {
 80 |     this.scanner.skipNewLine = false;
 81 |     while (this.lookahead() != Token.tokens.NEWLINE_TOKEN &&this.lookahead() != Token.tokens.EOS_TOKEN) {
 82 |         this.nextToken();
 83 |     }
 84 |     this.scanner.skipNewLine = true;
 85 | }
 86 | ```
 87 | 
 88 | 方法很簡單，其實就是一直跳過接下來的 Token ，直至讀到新一行或者再沒有 Token 為止。
 89 | 
 90 | 為什麼我們要跳過某些 Token 呢？
 91 | 
 92 | 一個好的 Compiler 不應該一遇到錯誤就停止，而是要儘量把整個程式中的錯誤告知開發人員以便 debug 。
 93 | 
 94 | 試行一下，看看它有沒有做 Error recovery 以及能否 parse `“var”` expression 。
 95 | 
 96 | ![](./images/3-2-1.png)
 97 | 
 98 | 很好，不但成功建立到正確的 Parse tree ，亦成功地跳過了錯誤。
 99 | 
100 | ## Expression block
101 | 
102 | 接著下來我們要處理 `if` 和 `while` ，它們跟我們之前處理過的 expression 有分別，分別在於它們的結構不是一句 expression 接著一個分號就成為一個 expression node，而是先有一個 condition 再配一／兩個 expression block，最後還沒有分號，所以我們要改寫一下 `parseExpressions` ，把讀分號的程式移到每一句期待分號的 expression 中。
103 | 
104 | ```js
105 | Parser.prototype.matchSemicolon = function () {
106 |     //consume the semicolon
107 |     if (this.lookahead() == Token.tokens.SEMICOLON_TOKEN) {
108 |         this.nextToken();
109 |     } else {
110 |         //syntax error
111 |         Errors.push({
112 |             type: Errors.SYNTAX_ERROR,
113 |             msg: "Expecting a semicolon at the end of expression",
114 |             line: this.scanner.currLine
115 |         });
116 |     }
117 | }
118 | ```
119 | 
120 | 把 match 分號的程式抽出來，叫 `matchSemicolon` ，方便後面使用。
121 | 
122 | ```js
123 | case Token.tokens.IF_TOKEN:
124 |     return this.parseIfExpression();
125 |     break;
126 | case Token.tokens.WHILE_TOKEN:
127 |     return this.parseWhileExpression();
128 |     break;
129 | ```
130 | 
131 | 新增兩個 case 到 `parseExpression` 中，遇到 `if` 或者 `while` Token 都會另外處理。
132 | 
133 | ```js
134 | Parser.prototype.parseIfExpression = function () {
135 |     //consume "if"
136 |     this.nextToken();
137 |     var condition = this.parseParenExpression();
138 |     var expressions = this.parseExpressionBlock();
139 |     var elseExpressions;
140 |     if (this.lookahead() == Token.tokens.ELSE_TOKEN) {
141 |         //consume "else"
142 |         this.nextToken();
143 |         elseExpressions = this.parseExpressionBlock();
144 |     }
145 |     return new IfNode(condition, expressions, elseExpressions);
146 | }
147 | ```
148 | 
149 | 遇到 `if` 的時候要做什麼呢？首先當然是讀取 condition，就是那句 `parseParenExpression` ，然後就要讀取一個 expression block ，即被 `“{”` `“}”` 包住的 expression list ，接著還要判斷一下有沒有 `else` ，有的話還要讀取整個 else block 進來。 `while` 的做法跟 if 差不多，這裡就不多討論了。
150 | 
151 | ![](./images/3-2-2.png)
152 | 
153 | 還有個 boss 要打，就是那堆運算符了，由於這部份比較複雜，所以西傑決定開多一節詳談當中的細節，下星期四再見吧！


--------------------------------------------------------------------------------
/docs/3-3.md:
--------------------------------------------------------------------------------
  1 | # 三、語法分析器（Parser）﹣語法分析（Syntactic analysis）（下）
  2 | 
  3 | 在上一節中，我們處理了 `“var”` `“if”` `“while”` ，大家應該學會了如何處理 expression block 了，這一節我們將會學習如何處理運算符。處理運算符有兩個基本概念要掌握，第一是運算符的運算次序，第二是運算表達式的表示法。
  4 | 
  5 | ## 運算次序（Order or operations）
  6 | 
  7 | 在數學算式裡，我們有個法則叫做 “先乘除，後加減”，意思就是乘除的運算次序比加減高，同樣地在程式語言中，我們也需要定義一下運算符的運算次序。在 Wescript 中，我們會用常用的運算符次序，即如下圖所示：
  8 | 
  9 | ![](./images/3-3-1.png)
 10 | 
 11 | 取自維基
 12 | 
 13 | 西傑不會處理這裡所列的所有運算符，只會處理第一章所列的運算符，其他就留給讀者們當練習吧。
 14 | 
 15 | ## 表示法
 16 | 
 17 | 西傑相信大部份人都會使用 infix notation（可能用完也不知道它的名稱原來就是 infix notation ），但其實這世界上還有兩種運算式表示法， prefix notation 和 postfix notation ，看看例子大家就會明白這是什麼概念。
 18 | 
 19 | ```
 20 | Infix notation: 1 + 2 * 3
 21 | 
 22 | Prefix notation: + 1 * 2 3
 23 | 
 24 | Postfix notation: 1 2 3 * +
 25 | ```
 26 | 
 27 | 我們日常寫程式時使用的都是 infix notation，但是對電腦來說，postfix notation 比較容易處理（通常會用一個 stack 來處理），因此我們的 Parser 將會讀進 Infix notation，到 compile 時就會轉為 postfix notation。
 28 | 
 29 | ## 分項（或稱運算元 operand ）
 30 | 
 31 | 開始寫程式了，第一件事情是要分項，什麼是分項呢？就是把一句 expression 斬成多個項目，怎樣才算是一個項目呢？看看下圖：
 32 | 
 33 | ![](./images/3-3-2.png)
 34 | 
 35 | 項目包括以上所列的幾種：數值（數字和布林值），負數值，包著一堆子項目的括號，變數。根據這個分類法，我們可以改寫出 parse 項目的 function 。
 36 | 
 37 | ```js
 38 | Parser.prototype.parseOperand = function () {
 39 |     var token = this.nextToken();
 40 |     var operandNode;
 41 |     switch (token) {
 42 |         case Token.tokens.INTLITERAL_TOKEN:
 43 |             operandNode = new IntNode(this.currentToken.text);
 44 |             break;
 45 |         case Token.tokens.BOOLLITERAL_TOKEN:
 46 |             operandNode = new BoolNode(this.currentToken.text);
 47 |             break;
 48 |         case Token.tokens.IDENTIFIER_TOKEN:
 49 |             operandNode = new IdentifierNode(this.currentToken.text);
 50 |             break;
 51 |         case Token.tokens.LEFTPAREN_TOKEN:
 52 |             operandNode = new ParenNode(this.parseCompoundExpression(0));
 53 |             //consume the right paren )
 54 |             if (this.lookahead() == Token.tokens.RIGHTPAREN_TOKEN) {
 55 |                 this.nextToken();
 56 |             } else {
 57 |                 Errors.push({
 58 |                     type: Errors.SYNTAX_ERROR,
 59 |                     msg: "Missing right paren \")\"",
 60 |                     line: this.scanner.currLine
 61 |                 });
 62 |             }
 63 |             break;
 64 |         case Token.tokens.MINUS_TOKEN:
 65 |             operandNode = new NegateNode(this.parseOperand());
 66 |             break;
 67 |         default:
 68 |             //not valid
 69 |             Errors.push({
 70 |                 type: Errors.SYNTAX_ERROR,
 71 |                 msg: "Unexpected token",
 72 |                 line: this.scanner.currLine
 73 |             });
 74 |             return null;
 75 |     }
 76 |     return operandNode;
 77 | }
 78 | ```
 79 | 
 80 | 頭三款分別為數字，布林值和 identifier （即變數），只是很簡單的讀進一個 token 再建立一個 node 。遇到左括號時就有點特別了，這時我們要 `parseCompoundExpression` ，意思就是要處理更複雜的 expression （例如： `1 + 2 * 3` ），我們待會兒就會寫這個 function 。接著當我們遇到 `“-”` 時，我們就要再讀多一個項目，因為這個 `“-”` 代表負數值，我們要建立一個 `NegateNode` 。最後如果都不是以上幾款的話就算是 syntax error 了。
 81 | 
 82 | 之前我們是在 `parseExpression` 中處理數值的，現在我們要從中抽起，放到 `parseOperand` 當中！
 83 | 
 84 | `parseCompoundExpression` 的 algorithm 如下，先大約看一看，下面再解釋。
 85 | 
 86 | ```js
 87 | Parser.prototype.getBindingPower = function (token) {
 88 |     switch (token) {
 89 |         case Token.tokens.PLUS_TOKEN:
 90 |         case Token.tokens.MINUS_TOKEN:
 91 |             return 120;
 92 |         case Token.tokens.MULT_TOKEN:
 93 |         case Token.tokens.DIV_TOKEN:
 94 |             return 130;
 95 |     }
 96 |     return -1;
 97 | }
 98 | Parser.prototype.createOperatorNode = function (operatorToken) {
 99 |     switch (operatorToken) {
100 |         case Token.tokens.PLUS_TOKEN:
101 |             return new OperatorPlusNode();
102 |             break;
103 |         case Token.tokens.MINUS_TOKEN:
104 |             return new OperatorMinusNode();
105 |             break;
106 |         case Token.tokens.MULT_TOKEN:
107 |             return new OperatorMultNode();
108 |             break;
109 |         case Token.tokens.DIV_TOKEN:
110 |             return new OperatorDivNode();
111 |             break;
112 |     }
113 | }
114 | Parser.prototype.parseCompoundExpression = function (rightBindingPower) {
115 |     var operandNode = this.parseOperand();
116 |     if (operandNode == null) {
117 |         return operandNode;
118 |     }
119 |     var compoundExpressionNode = new CompoundNode();
120 |     compoundExpressionNode.push(operandNode);
121 |     var operator = this.lookahead();
122 |     var leftBindingPower = this.getBindingPower(operator);
123 |     if (leftBindingPower == -1) {
124 |         //not an operator
125 |         return compoundExpressionNode;
126 |     }
127 |     while (rightBindingPower < leftBindingPower) {
128 |         operator = this.nextToken();
129 |         compoundExpressionNode.push(this.createOperatorNode(operator));
130 |         var node = this.parseCompoundExpression(leftBindingPower);
131 |         compoundExpressionNode.push(node);
132 |         var oper = this.lookahead();
133 |         leftBindingPower = this.getBindingPower(oper);
134 |         if (leftBindingPower == -1) {
135 |             //not an operator
136 |             return compoundExpressionNode;
137 |         }
138 |     }
139 |     return compoundExpressionNode;
140 | }
141 | ```
142 | 
143 | 這個 algorithm 是取自 Vaughan Pratt 的（好似係），但我是在 [Eli Bendersky](http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/) 那裡學的。看起來有點複雜了吧，放心，我慢慢地來 。
144 | 
145 | ## Binding Power
146 | 
147 | 由於運算符運算時有先後次序之分，我們的 compiler 也需要處理，而 Wescript compiler 處理的方法就是賦予每一個運算符一個 binding power ，Binding Power 高的運算符可以佔有其相鄰的項目，如 Eli 的例子所示：
148 | 
149 | ![](./images/3-3-3.png)
150 | 
151 | B 比 A 強，所以佔有 E （唉，連電腦的世界裡也是弱肉強食……）。因此，我們需要先設定運算符的 binding power ，這就是 `getBindingPower` 的工作了。到戲肉了，這裡我會用靜態動畫（靜態的動畫）來描述這個 algorithm 的工作。
152 | 
153 | ![](./images/3-3-4.png)
154 | 
155 | 開始時是這樣的，注意括號的 binding power 是 0 ，因為我把它當成項目，而不是普通的運算符。
156 | 
157 | ![](./images/3-3-5.png)
158 | 
159 | 現在開始執行，一開始的 `rightBindingPower` 是 0 ，讀取了一個項目（以圓形表示），由於 `“+”` 的 binding power 較強，所以這句判斷成立：
160 | 
161 | ```js
162 | while (rightBindingPower < leftBindingPower) { //0 < 120
163 | ```
164 | 
165 | 注意不要搞錯左和右，這裡的左是指運算符的左面，而不是項目的左面
166 | 接下來就是要 recursive 地再運行 `parseCompoundExpression` 。
167 | 
168 | ![](./images/3-3-6.png)
169 | 
170 | 同樣地，讀取一個項目，檢查一下右面的運算符的 `leftBindingPower` ，發現 `rightBindingPower (120)`  比 `leftBindingPower (130)`  小，於是就再運行一個新的 recursion 。
171 | 
172 | ![](./images/3-3-7.png)
173 | 
174 | 接下來的 3 – 4 的處理方法跟上面相似，但有些分別，括號是當成一個項目來處理，所以 binding power 由 0 開始，當讀到 4 的時候，再讀一個運算符時，我們會讀到 “)” 右括號，當我們嘗試拿取它的 binding power 時就會拿到 -1，於是是次 recursion 直接終止。
175 | 
176 | ![](./images/3-3-8.png)
177 | 
178 | 然後括號也處理好了，也可以 return 了。
179 | 
180 | ![](./images/3-3-9.png)
181 | 
182 | 由於下一個運算符 `“+”` 的 binding power `(120)` 比現在的 `（130）` 小，所以也可以 return 了。
183 | 
184 | 可不可以用 `rightBindingPower <= leftBindingPower` 而不是 `rightBindingPower < leftBindingPower` 呢？
185 | 不可以，想想這個例子， `1 – 2 + 3` ，由於 `“-”` 和 `“+”` 的 binding power 一樣，如果用小於或等於的話， parse tree 將會當成 `1 – (2 + 3)`
186 | 
187 | ![](./images/3-3-10.png)
188 | 
189 | 現在連 `2 * (3 – 4)` 也可以 return 了，因為 `1 + 2` 的 `“+”` 的 binding power 並不比 `+ 5` 的 `“+”` 小。返回之後， binding power 為 0 的那個 function 將會繼續讀取運算式直至讀取所有運算式為止。
190 | 
191 | ![](./images/3-3-11.png)
192 | 
193 | 就是這樣了，很簡單吧？不明白的多看幾次以上的例子吧，再不明白的話就留個言，西傑很快會跟進的了。現在看看這段程式是不是運作正常。
194 | 
195 | 看看 `console.log` ，程式運作正常。現在把餘下的運算符都編寫下來！
196 | 
197 | 這段程式主要編寫了邏輯運算符，看看 `console.log` ，還是運作正常。那我們現在寫餘下的 assignment 運算符吧。
198 | 
199 | `“=”` `“+=”` `“-=”` 其實沒有什麼特別處理方法，跟之前的運算符差不多，這裡就不詳述了。但是 `“++”` 和 `“–”` 就有特別了，它們是 unary 運算符，同時又可以作 pre-in/decrement 或 post-in/decrement ，這樣要如何處理呢？西傑就把它們當成 operand 一樣來處理，而且一定要配合 identifier 使用，看看代碼。
200 | 
201 | ```js
202 | case Token.tokens.IDENTIFIER_TOKEN:
203 |     operandNode = new IdentifierNode(this.currentToken.text);
204 |     if (this.lookahead() == Token.tokens.MINUSMINUS_TOKEN) {
205 |         //post decrement
206 |         this.nextToken();
207 |         operandNode = new PostDecrementNode(operandNode);
208 |     } else if (this.lookahead() == Token.tokens.PLUSPLUS_TOKEN) {
209 |         //post increment
210 |         this.nextToken();
211 |         operandNode = new PostIncrementNode(operandNode);
212 |     }
213 | break;
214 | ```
215 | 
216 | 首先處理 post-in/decrement ，方法很簡單，就是當我們遇到 identifier 時，我們就會 lookahead 一下，看看有沒有 `“++”` 或者 `“–”` ，有的話就用 `PostIn/DecrementNode` 來取代原本的 `operandNode` ，沒有就自然不用理會啦。
217 | 
218 | ```js
219 | case Token.tokens.PLUSPLUS_TOKEN:
220 |     if (this.lookahead() == Token.tokens.IDENTIFIER_TOKEN) {
221 |         this.nextToken();
222 |         operandNode = new PreIncrementNode(newIdentifierNode(this.currentToken.text));
223 |     } else {
224 |         Errors.push({
225 |             type: Errors.SYNTAX_ERROR,
226 |             msg: "Expecting an identifier for pre-increment expression",
227 |             line: this.scanner.currLine
228 |         });
229 |         return null;
230 |     }
231 |     break;
232 | ```
233 | 
234 | 之後要增加 `PreIn/Decrement` 的處理，就是增加一個 case 到 `parseOperand` 中，遇到 `“++”` 或 `“–”` 就會 lookahead 一下看看有沒有 identifier ，有的話就是 `PreIn/Decrement` ，沒有的話應該算是 syntax error 了吧。
235 | 
236 | 看看 `console.log` ，還是運作正常，那就大功告成啦！！！
237 | 
238 | ## Put it together
239 | 
240 | 現在看看完整的 Parse tree（console.log）
241 | 
242 | ## 總結
243 | 
244 | 兩個星期的 Syntactic analysis 課程終於完結了，不知道大家吸收了多少，其實編寫 Parser 說難不難，說易不易，基本來說就是一堆 mutual recursion，把大問題斬件成為多個小問題，逐點擊破就可以了，只要大家思緒清晰的話，其實不難（當然要思緒清晰也確實不易）。
245 | 
246 | 大家可以放心，最難捱的時刻已經過了，下一章會比這章容易一點，大家如果仍然未搞清這章的思路的話不妨留下你的問題，西傑一定會儘量解答，拜拜，下星期再見。
247 | 


--------------------------------------------------------------------------------
/docs/4.md:
--------------------------------------------------------------------------------
  1 | # 四、語意分析（Semantic analysis）
  2 | 
  3 | 大家好，又見到西傑了。在上兩章我們探討了如何編寫 Scanner 和 Parser ，能夠把一份程式文件轉變成一棵 Parse tree ，如果文件有 syntax error 的話亦能夠被偵測出來並且告訴開發人員，現在要進行最後一步的分析了。今天要說的是語意分析，即 Semantic analysis ，這是什麼來的？Semantic analysis 要做的工作就是分析語意啦！哈哈。同學們或許你們會問，當我們建立了一棵 Parse tree 之後，不就可以 compile 了嗎？其實不然，你現在有的只是 N 句句子，但這還不是一個完整故事，還要分析一下上文下理電腦才知道你說的是什麼故事， Semantic analysis 就是做這樣的工作了。給你一個例子：
  4 | 
  5 | ```js
  6 | var a:bool = 1 + true;
  7 | ```
  8 | 
  9 | 一個數字跟一個布林值相加是一個怎樣的概念？別說電腦看不懂，連西傑也看不明白，Semantic analysis 要做的就是把這個問題抽出來。
 10 | 
 11 | 由於 Wescript 比較簡單，我們只會做以下幾款分析：
 12 | 
 13 | 一、不可重複定義變數
 14 | 
 15 | 二、自動初始化變數 
 16 | 
 17 | 三、變數要先定義後使用
 18 | 
 19 | 四、變數類型檢查
 20 | 
 21 | ## 不可重複定義變數
 22 | 
 23 | 開始吧，第一步要做的事是要修改一下 Parser ，因為我們做 semantic analysis 是會把一整棵 Parse tree 讀進來，所以我們需要在建立 Parse tree 時記下那些 node 是哪一行建立出來的，那樣我們匯報錯誤時才可以告訴開發人員哪一行出錯，因此就有了以下的修改：
 24 | 
 25 | ```js
 26 | function Node() {
 27 |     this.line = 0;
 28 | }
 29 | Node.prototype.setLine = function (line) {
 30 |     this.line = line;
 31 |     return this;
 32 | }
 33 | ```
 34 | 
 35 | 在 base class `“Node”` 添加一個 method 來記錄行數。之後在每次建立新 node 時都要加上一句
 36 | 
 37 | ```js
 38 | .setLine(this.scanner.currLine)
 39 | ```
 40 | 
 41 | 現在可以開始寫 Analyser 了，先看代碼後解說。
 42 | 
 43 | ```js
 44 | //Analyser class
 45 | function Analyser() {
 46 |     this.vars = {};
 47 | }
 48 | Analyser.prototype.evaluateExpressionBlockNode = function (node) {
 49 |     for (var i = 0, l = node.expressions.length; i < l; i++) {
 50 |         var expressionNode = node.expressions[i];
 51 |         this.evaluateExpressionNode(expressionNode);
 52 |     }
 53 | }
 54 | Analyser.prototype.evaluateExpressionNode = function (node) {
 55 |     if (node instanceof VariableNode) {
 56 |         this.evaluateVariableNode(node);
 57 |     }
 58 | }
 59 | Analyser.prototype.evaluateVariableNode = function (node) {
 60 |     if (this.vars[node.varName]) {
 61 |         //this variable has been declared before
 62 |         //since we can find it in our variable table
 63 |         Errors.push({
 64 |             type: Errors.SEMANTIC_ERROR,
 65 |             msg: "The variable \"" + node.varName + "\" has been declared already",
 66 |             line: node.line
 67 |         });
 68 |     } else {
 69 |         this.vars[node.varName] = node;
 70 |         //if we do not use "else", this variable declaration will replace the previous one
 71 |         //This may result in wrong data type checking later on
 72 |     }
 73 |     this.evaluateExpressionNode(node.initExpressionNode);
 74 | }
 75 | ```
 76 | 
 77 | Analyser 暫時有三個 method，第一個是 `evaluateExpressionBlockNode` ，它要做的工作很簡單，只是純綷把它其下的所有 expression 都遍歷一次，並執行 `evaluateExpressionNode` 。第二個是 `evaluateExpressionNode` ，它負責判斷 node 的類型並分配到不同的 evaluator 。
 78 | 
 79 | 第三個是 `evaluateVariableNode` ，它就是做第一項語意分析的核心程式了，每當我們遇到 `VariableNode` 的時候，就是要定義新變數的時候，所以我們就在這個時候檢查一下是否已經定義了變數。檢查的方法就是先建立一個變數 hash map ，然後每次 evaluate 時都檢查一下這個變數是否已經在 hash map 當中，是的話就算是重複定義變數了，這個時候就要匯報錯誤了。如果還沒有定義的話，我們要把變數放到 hash map 中以便下次檢查。
 80 | 
 81 | 注意，我們不可以用以下這種寫法，因為這會做成變數類型不斷改變的情況，後面做變數類型檢查時有機會出錯。
 82 | 
 83 | ```js
 84 | Analyser.prototype.evaluateVariableNode = function (node) {
 85 |     if (this.vars[node.varName]) {
 86 |         //this variable has been declared before
 87 |         //since we can find it in our variable table
 88 |         Errors.push({
 89 |             type: Errors.SEMANTIC_ERROR,
 90 |             msg: "The variable \"" + node.varName + "\" has been declared already",
 91 |             line: node.line
 92 |         });
 93 |     }
 94 |     this.vars[node.varName] = node;
 95 |     this.evaluateExpressionNode(node.initExpressionNode);
 96 | }
 97 | ```
 98 | 
 99 | 運行一下，看看程式是否正常運作。
100 | 
101 | Analyser 的基本結構就是這樣子了，看起來有點熟悉吧？其實跟 Parser 差不多，都是一堆 mutual recursion 而已，把大問題斬成多個小問題，那就沒問題了。
102 | 
103 | ## 自動初始化變數
104 | 
105 | 下一步是自動初始化變數，這一步應該放在 Parser 中做還是在 semantic analysis 才做，其實都可以，不過西傑認為這應該屬於 Wescript 的特性之一，所以就放在 semantic analysis 才做。
106 | 
107 | ```js
108 | Analyser.prototype.evaluateVariableNode = function (node) {
109 |     if (this.vars[node.varName]) {
110 |         //this variable has been declared before
111 |         //since we can find it in our variable table
112 |         Errors.push({
113 |             type: Errors.SEMANTIC_ERROR,
114 |             msg: "The variable \"" + node.varName + "\" has been declared already",
115 |             line: node.line
116 |         });
117 |     } else {
118 |         this.vars[node.varName] = node;
119 |         //if we do not use "else", this variable declaration will replace the previous one
120 |         //This may result in wrong data type checking later on
121 |     }
122 |     if (node.initExpressionNode) {
123 |         this.evaluateExpressionNode(node.initExpressionNode);
124 |     } else {
125 |         if (node.type == "bool") {
126 |             node.initExpressionNode = new BoolNode("false");
127 |         } else if (node.type == "int") {
128 |             node.initExpressionNode = new IntNode("0");
129 |         }
130 |     }
131 | }
132 | ```
133 | 
134 | 其實就是改寫了一下最底的那一小段，如果有 `initExpressionNode` 的話就 evaluate 一下，否則我們就要自行建立相應的預設數值 node 了（當然，這些是 compiler 建立的就不用再 evaluate 了）。
135 | 
136 | ## 變數要先定義後使用
137 | 
138 | 接下來我們就要檢查一下是不是所有變數都先被定義過然後才被使用。
139 | 
140 | ```js
141 | Analyser.prototype.evaluateIdentifierNode = function (node) {
142 |     if (! this.vars[node.identifier]) {
143 |         Errors.push({
144 |             type: Errors.SEMANTIC_ERROR,
145 |             msg: "Variable \"" + node.identifier + "\" must been declared before using",
146 |             line: node.line
147 |         });
148 |     }
149 | }
150 | ```
151 | 
152 | 技巧照舊，同樣是使用 hash map 來檢查就可以了，沒有難度吧。看看運行結果：
153 | 
154 | 注意，有數個 method 上面沒有解說的，因為它們的做法都很普通，跟之前沒有太大分別，所以就不在此著墨了。
155 | 
156 | ## 變數類型檢查
157 | 
158 | 最後是檢查變數類型，這個比較複雜，我們一步一步來吧。首先要定義兩個常數用來分辨變數的類型：
159 | 
160 | ```js
161 | Analyser.TYPE_BOOL = 1;
162 | Analyser.TYPE_INT = 2;
163 | ```
164 | 
165 | 然後我們就要在 evaluate 時為某些 node 賦予類型了，首先是 `IntNode` 和 `BoolNode` ，很明顯地它們分別是 `integer` 和 `boolean` 類型了。
166 | 
167 | ```js
168 | Analyser.prototype.evaluateBoolNode = function (node) {
169 |     node.valueType = Analyser.TYPE_BOOL;
170 | }
171 | Analyser.prototype.evaluateIntNode = function (node) {
172 |     node.valueType = Analyser.TYPE_INT;
173 | }
174 | ```
175 | 
176 | 另外是 compound node，現在先做一個較簡單的版本，並未處理有運算符的情況：
177 | 
178 | ```js
179 | Analyser.prototype.evaluateCompoundNode = function (node) {
180 |     var type = null;
181 |     for (var i = 0, l = node.nodes.length; i < l; i++) {
182 |         var subNode = node.nodes[i];
183 |         this.evaluateExpressionNode(subNode);
184 |         if (type == null) {
185 |             type = subNode.valueType;
186 |         }
187 |     }
188 |     node.valueType = type;
189 | }
190 | ```
191 | 
192 | 這裡假設 compound node 裡沒有運算符，並只有一個數值，我們就可以得出以上代碼。
193 | 
194 | 還有 identifier node ，我們也要為它們賦予類型，所以要改寫一下原本的 `evaluateIdentifierNode` 。
195 | 
196 | ```js
197 | Analyser.prototype.evaluateIdentifierNode = function (node) {
198 |     if (! this.vars[node.identifier]) {
199 |         Errors.push({
200 |             type: Errors.SEMANTIC_ERROR,
201 |             msg: "Variable \"" + node.identifier + "\" must been declared before using",
202 |             line: node.line
203 |         });
204 |     } else {
205 |         node.valueType = this.vars[node.identifier].valueType;
206 |     }
207 | }
208 | ```
209 | 
210 | 最後我們要改寫 `evaluateVariableNode` ，加入變數類型檢查。
211 | 
212 | ```js
213 | Analyser.prototype.evaluateVariableNode = function (node) {
214 |     if (this.vars[node.varName]) {
215 |         //this variable has been declared before
216 |         //since we can find it in our variable table
217 |         Errors.push({
218 |             type: Errors.SEMANTIC_ERROR,
219 |             msg: "The variable \"" + node.varName + "\" has been declared already",
220 |             line: node.line
221 |         });
222 |     } else {
223 |         this.vars[node.varName] = node;
224 |         //if we do not use "else", this variable declaration will replace the previous one
225 |         //This may result in wrong data type checking later on
226 |     }
227 |     if (node.initExpressionNode) {
228 |         this.evaluateExpressionNode(node.initExpressionNode);
229 |         if (node.type == "bool" &&
230 |             node.initExpressionNode.valueType != Analyser.TYPE_BOOL) {
231 |             Errors.push({
232 |                 type: Errors.SEMANTIC_ERROR,
233 |                 msg: "The variable \"" + node.varName + "\" is Boolean type but the assignment value is not Boolean",
234 |                 line: node.line
235 |             });
236 |         } else if (node.type == "int" &&
237 |             node.initExpressionNode.valueType != Analyser.TYPE_INT) {
238 |             Errors.push({
239 |                 type: Errors.SEMANTIC_ERROR,
240 |                 msg: "The variable \"" + node.varName + "\" is Integer type but the assignment value is not Integer",
241 |                 line: node.line
242 |             });
243 |         }
244 |     } else {
245 |         if (node.type == "bool") {
246 |             node.initExpressionNode = new BoolNode("false");
247 |         } else if (node.type == "int") {
248 |             node.initExpressionNode = new IntNode("0");
249 |         }
250 |     }
251 |     node.valueType = (node.type == "bool" ? Analyser.TYPE_BOOL :Analyser.TYPE_INT);
252 | }
253 | ```
254 | 
255 | 在 initialise 時，我們會檢查一下變數的類型和初始化類型是否匹配，不是的話就需要報錯。現在試試運行一下：
256 | 
257 | 現在再改寫一下 `evaluateCompoundNode` ，因為 compound node 通常都不只有一個數值，而是一堆數值加一堆運算符。（記住一點，compound node 裡的運算是排列好的，可以由左至右直接讀！）做法是先記下左面的數值類型，以及運算符類型，當遇到右面的數值才看看有沒有語意錯誤，於是得出以下代碼。
258 | 
259 | ```js
260 | Analyser.prototype.evaluateCompoundNode = function (node) {
261 |     var type = null;
262 |     var operator = null;
263 |     for (var i = 0, l = node.nodes.length; i < l; i++) {
264 |         var subNode = node.nodes[i];
265 |         this.evaluateExpressionNode(subNode);
266 |         if (type == null) {
267 |             type = subNode.valueType;
268 |         } else {
269 |             if (subNode instanceof OperatorNode) {
270 |                 operator = subNode;
271 |             } else {
272 |                 if (operator instanceof OperatorPlusNode) {
273 |                     if (type != Analyser.TYPE_INT || subNode.valueType !=Analyser.TYPE_INT) {
274 |                         Errors.push({
275 |                             type: Errors.SEMANTIC_ERROR,
276 |                             msg: "Require Integers on both sides of \"+\"",
277 |                             line: operator.line
278 |                         });
279 |                     }
280 |                     type = Analyser.TYPE_INT;
281 |                     operator = null;
282 |                 } else if (operator instanceof OperatorEqualNode) {
283 |                     if ((type == Analyser.TYPE_BOOL && subNode.valueType!= Analyser.TYPE_BOOL) ||
284 |                         (type == Analyser.TYPE_INT && subNode.valueType !=Analyser.TYPE_INT)) {
285 |                         Errors.push({
286 |                             type: Errors.SEMANTIC_ERROR,
287 |                             msg: "Require the type on both sides of \"==\" to be the same",
288 |                             line: operator.line
289 |                         });
290 |                     }
291 |                     type = Analyser.TYPE_BOOL;
292 |                     operator = null;
293 |                 }
294 |             }
295 |         }
296 |     }
297 |     node.valueType = type;
298 | }
299 | ```
300 | 
301 | 又運行一下看看結果吧：
302 | 
303 | 現在把餘下可能出現在 compound node 中的 node 都編寫下來！
304 | 
305 | ```js
306 | Analyser.prototype.evaluateCompoundNode = function (node) {
307 |     var type = null;
308 |     var operator = null;
309 |     for (var i = 0, l = node.nodes.length; i < l; i++) {
310 |         var subNode = node.nodes[i];
311 |         this.evaluateExpressionNode(subNode);
312 |         if (type == null) {
313 |             type = subNode.valueType;
314 |         } else {
315 |             if (subNode instanceof OperatorNode) {
316 |                 operator = subNode;
317 |             } else {
318 |                 if (operator instanceof OperatorPlusNode ||
319 |                     operator instanceof OperatorMinusNode ||
320 |                     operator instanceof OperatorMultNode ||
321 |                     operator instanceof OperatorDivNode ||
322 |                     operator instanceof OperatorModNode) {
323 |                     if (type != Analyser.TYPE_INT || subNode.valueType !=Analyser.TYPE_INT) {
324 |                         Errors.push({
325 |                             type: Errors.SEMANTIC_ERROR,
326 |                             msg: "Require Integers on both sides of arithmetic operator",
327 |                             line: operator.line
328 |                         });
329 |                     }
330 |                     type = Analyser.TYPE_INT;
331 |                     operator = null;
332 |                 } else if (operator instanceof OperatorAndNode ||
333 |                             operator instanceof OperatorOrNode) {
334 |                     if (type != Analyser.TYPE_BOOL || subNode.valueType !=Analyser.TYPE_BOOL) {
335 |                         Errors.push({
336 |                             type: Errors.SEMANTIC_ERROR,
337 |                             msg: "Require Booleans on both sides of logical operator",
338 |                             line: operator.line
339 |                         });
340 |                     }
341 |                     type = Analyser.TYPE_BOOL;
342 |                     operator = null;
343 |                 } else if (operator instanceof OperatorEqualNode ||
344 |                             operator instanceof OperatorNotEqualNode) {
345 |                     if ((type == Analyser.TYPE_BOOL && subNode.valueType!= Analyser.TYPE_BOOL) ||
346 |                         (type == Analyser.TYPE_INT && subNode.valueType !=Analyser.TYPE_INT)) {
347 |                         Errors.push({
348 |                             type: Errors.SEMANTIC_ERROR,
349 |                             msg: "Require the type on both sides of comparison operator to be the same",
350 |                             line: operator.line
351 |                         });
352 |                     }
353 |                     type = Analyser.TYPE_BOOL;
354 |                     operator = null;
355 |                 } else if (operator instanceof OperatorAssignNode) {
356 |                     if ((type == Analyser.TYPE_BOOL && subNode.valueType!= Analyser.TYPE_BOOL) ||
357 |                         (type == Analyser.TYPE_INT && subNode.valueType !=Analyser.TYPE_INT)) {
358 |                         Errors.push({
359 |                             type: Errors.SEMANTIC_ERROR,
360 |                             msg: "Require the type on both sides of assignment operator to be the same",
361 |                             line: operator.line
362 |                         });
363 |                     }
364 |                 } else if (operator instanceof OperatorPlusAssignNode ||
365 |                             operator instanceof OperatorMinusAssignNode) {
366 |                     if (type != Analyser.TYPE_INT || subNode.valueType !=Analyser.TYPE_INT) {
367 |                         Errors.push({
368 |                             type: Errors.SEMANTIC_ERROR,
369 |                             msg: "Require the type on both sides of plus/minus assignment operator to be Integer",
370 |                             line: operator.line
371 |                         });
372 |                     }
373 |                 }
374 |             }
375 |         }
376 |     }
377 |     node.valueType = type;
378 | }
379 | ```
380 | 
381 | 其實工作跟之前的都差不多，只要用多幾個 if 就可以了。另外，這次亦處理了 unary 運算符，做法是先 evaluate 一下那些運算符下的 node ，之後再設定自己類型。
382 | 
383 | ```js
384 | Analyser.prototype.evaluateNegateNode = function (node) {
385 |     this.evaluateExpressionNode(node.node);
386 |     node.valueType = node.node.valueType;
387 | }
388 | Analyser.prototype.evaluateNotNode = function (node) {
389 |     this.evaluateExpressionNode(node.node);
390 |     node.valueType = node.node.valueType;
391 | }
392 | Analyser.prototype.evaluateParenNode = function (node) {
393 |     this.evaluateExpressionNode(node.node);
394 |     node.valueType = node.node.valueType;
395 | }
396 | Analyser.prototype.evaluatePostIncrementNode = function (node) {
397 |     this.evaluateExpressionNode(node.node);
398 |     node.valueType = node.node.valueType;
399 | }
400 | Analyser.prototype.evaluatePreIncrementNode = function (node) {
401 |     this.evaluateExpressionNode(node.node);
402 |     node.valueType = node.node.valueType;
403 | }
404 | Analyser.prototype.evaluatePostDecrementNode = function (node) {
405 |     this.evaluateExpressionNode(node.node);
406 |     node.valueType = node.node.valueType;
407 | }
408 | Analyser.prototype.evaluatePreDecrementNode = function (node) {
409 |     this.evaluateExpressionNode(node.node);
410 |     node.valueType = node.node.valueType;
411 | }
412 | ```
413 | 
414 | 現在還有什麼 node 未被處理呢？就是 `while` 跟 `if` 。它們的處理方法都不難，只是使用之前編寫了的 recursion 就可以了（這就是 recursion 的威力嚕）。
415 | 
416 | ```js
417 | Analyser.prototype.evaluateIfNode = function (node) {
418 |     this.evaluateExpressionNode(node.conditionExpression);
419 |     if (node.conditionExpression.valueType != Analyser.TYPE_BOOL) {
420 |         Errors.push({
421 |             type: Errors.SEMANTIC_ERROR,
422 |             msg: "The condition must be of Boolean type",
423 |             line: node.conditionExpression.line
424 |         });
425 |     }
426 |     this.evaluateExpressionBlockNode(node.expressions);
427 |     this.evaluateExpressionBlockNode(node.elseExpressions);
428 | }
429 | Analyser.prototype.evaluateWhileNode = function (node) {
430 |     this.evaluateExpressionNode(node.conditionExpression);
431 |     if (node.conditionExpression.valueType != Analyser.TYPE_BOOL) {
432 |         Errors.push({
433 |             type: Errors.SEMANTIC_ERROR,
434 |             msg: "The condition must be of Boolean type",
435 |             line: node.conditionExpression.line
436 |         });
437 |     }
438 |     this.evaluateExpressionBlockNode(node.expressions);
439 | }
440 | ```
441 | 
442 | 最後運行一下：
443 | 
444 | ## 總結
445 | 
446 | 語意分析（ Semantic analysis ）到此算是完了，大家學到了什麼呢？其實這一章的編寫風格跟上一章寫 Parser 真的差不多，都是一堆 mutual recursion 湊合起來，組合成一個強大的分析器。現在大家可算是過了最艱難的時刻了，接下來的幾章都應該比較簡單（或者可能是因為大家都認識了這種編寫方法了吧），大家請緊記熟讀這前幾章，打好基礎，咱們下禮拜再來。
447 | 


--------------------------------------------------------------------------------
/docs/5.md:
--------------------------------------------------------------------------------
  1 | # 五、虛擬機（Virtual Machine）
  2 | 
  3 | 為什麼我們需要虛擬機呢？因為我們要運行我們編譯好的程式。那為什麼我們不直接編譯到 native binary code 呢？這是因為真正的電腦資源十分有限，我們編寫起上來會比用虛擬機的做法難很多，那就嚴重超出了本教程的範圍了（其實是因為西傑還不太認識這個課題，要偷懶一下）。當然，虛擬機的做法和 native code 的也有幾分相似，這裡就給讀者們一個初步的概念，大家真的想再接觸多一點底層的東西就要自己摸索一下了……
  4 | 
  5 | 接著下來，我們就稱我們的虛擬機為 Wemachine 吧。
  6 | 
  7 | 遊戲開始之前，我們當然要先定義一下遊戲規則﹣ Instruction set architecture（ISA），Wemachine 會支持以下的 Instruction set：
  8 | 
  9 | | Instruction | Example | Meaning | Comments |
 10 | | --- | --- | --- | --- |
 11 | | Arithmetic |
 12 | | add | add $1,$2,$3 | $1 = $2 + $3 |   |
 13 | | subtract | sub $1,$2,$3 | $1 = $2 – $3 |   |
 14 | | mult | mult $1,$2,$3 | $1 = $2 * $3 |   |
 15 | | div | div $1,$2,$3 | $1 = $2 / $3 |   |
 16 | | modulo | mod $1,$2,$3 | $1 = $2 % $3 |   |
 17 | | add immediate | addi $1,$2,100 | $1 = $2 + 100 |   |
 18 | | subtract immediate | subi $1,$2,100 | $1 = $2 – 100 |   |
 19 | | multiply immediate | multi $1,$2,100 | $1 = $2 * 100 |   |
 20 | | divide immediate | divi $1,$2,100 | $1 = $2 / 100 |   |
 21 | | modulo immediate | modi $1,$2,100 | $1 = $2 % 100 |   |
 22 | | Logical |
 23 | | and | and $1,$2,$3 | $1 = $2 & $3 | Logical AND |
 24 | | or | or $1,$2,$3 | $1 = $2 | $3 | Logical OR |
 25 | | Data transfer |
 26 | | move data from register to another register | move $1,$2 | $1 = $2 |   |
 27 | | load data | lwi $1,100 | $1 = 100 |   |
 28 | | load upper immediate | lui $1,100 | $1 = 100 * 2^16 |   |
 29 | | Conditional branch |
 30 | | define LABEL | label LABEL | LABEL: |   |
 31 | | branch on equal | beq $1,$2,LABEL | if ($1 == $2) goto LABEL |   |
 32 | | branch on not equal | bne $1,$2,LABEL | if ($1 != $2) goto LABEL |   |
 33 | | branch on less than | bl $1,$2,LABEL | if ($1 < $2) goto LABEL |   |
 34 | | branch on greater than | bg $1,$2,LABEL | if ($1 > $2) goto LABEL |   |
 35 | | branch on less than or equal | ble $1,$2,LABEL | if ($1 <= $2) goto LABEL |   |
 36 | | branch on greater than or equal | bge $1,$2,LABEL | if ($1 >= $2) goto LABEL |   |
 37 | | Unconditional jump |
 38 | | jump | j LABEL | goto LABEL |   |
 39 | | Others |
 40 | | print | print $1 | print $1 |   |
 41 | 
 42 | 而且西傑將會假設我們的虛擬機有無限個 register ，以便我們開發。
 43 | 
 44 | 開始寫程式了，第一步我們要做的是讓 Wemachine 學會讀那堆指令，為了方便讀取，我們要限定指令的格式，格式如下：
 45 | 
 46 | ```
 47 | opcode1 operand1, operand2, operand3;
 48 | 
 49 | opcode2 operand1, operand2;
 50 | ```
 51 | 
 52 | 每個指令由分號分隔開，在第一個空格號前的為指令的 opcode ，接下來是一至三個 operand ，以逗號分隔開，就是這麼簡單了。現在先開始寫一個 Parser 來分析以此格式寫的程式，西傑相信這一步應該很簡單，現在看看運行結果。
 53 | 
 54 | ![](./images/5-1.png)
 55 | 
 56 | 現在要開始實現功能了，首先要有 register 以記下數據，這個西傑會用一個 array 來做，$0 指向 array 第 0 個元素，$1 指向第 1 個元素，如此類推。然後我們要寫兩個 method 來 get set register 的數據，如果那個 register 未被使用過的話，它的數據就會是 0。於是我們就有了以下的程式：
 57 | 
 58 | ```js
 59 | Wemachine.prototype.resolveRegister = function (operand) {
 60 |     if (typeof operand == "string" && operand.length > 0) {
 61 |         if (operand[0] == "$") {
 62 |             return parseInt(operand.substr(1));
 63 |         }
 64 |     }
 65 |     Errors.push({
 66 |         type: Errors.RUNTIME_ERROR,
 67 |         msg: "Fail to resolve register",
 68 |         line: 0
 69 |     });
 70 |     return -1;
 71 | }
 72 | Wemachine.prototype.getRegisterContent = function (operand) {
 73 |     operand = this.resolveRegister(operand);
 74 |     if (operand != -1) {
 75 |         if (this.registers.length > operand) {
 76 |             return this.registers[operand];
 77 |         }
 78 |     }
 79 |     return 0;
 80 | }
 81 | Wemachine.prototype.setRegisterContent = function (operand, value) {
 82 |     operand = this.resolveRegister(operand);
 83 |     if (operand != -1) {
 84 |         this.registers[operand] = value;
 85 |     }
 86 | }
 87 | ```
 88 | 
 89 | 現在開始要編寫執行功能，執行功能很簡單，只是把每一句指令都執行一次就可以了。
 90 | 
 91 | ```js
 92 | Wemachine.prototype.run = function () {
 93 |     for (var i = 0, l = this.instructions.length; i < l; i++) {
 94 |         var instruction = this.instructions[i];
 95 |         this[instruction.opcode].apply(this, instruction.operands);
 96 |     }
 97 | }
 98 | ```
 99 | 
100 | 最後就是要編寫指令的實際功能了，lwi 要做的就是直接把一個數值寫到 register 中，所以我們只需 call 一下 `setRegisterContent` 就可以了，而 print 要做的就是把一個 register 中的數值寫到 output 去，所以就有以下的代碼了：
101 | 
102 | ```js
103 | Wemachine.prototype.lwi = function (operand1, operand2) {
104 |     this.setRegisterContent(operand1, parseInt(operand2));
105 | }
106 | Wemachine.prototype.print = function (operand1) {
107 |     var val = this.getRegisterContent(operand1);
108 |     log(val);
109 | }
110 | ```
111 | 
112 | 放在一起，我們運行一下程式，看看結果。
113 | 
114 | 很好，現在就把餘下的 Data transfer 功能都編寫下來吧。
115 | 
116 | ```js
117 | Wemachine.prototype.move = function (operand1, operand2) {
118 |     this.setRegisterContent(operand1, this.getRegisterContent(operand2));
119 | }
120 | Wemachine.prototype.lwi = function (operand1, operand2) {
121 |     this.setRegisterContent(operand1, parseInt(operand2));
122 | }
123 | Wemachine.prototype.lui = function (operand1, operand2) {
124 |     this.setRegisterContent(operand1, parseInt(operand2) << 16);
125 | }
126 | ```
127 | 
128 | 很直觀，沒什麼特別之處，這裡就不多解釋了。 Arithmetic 的處理其實都很直觀，唯一需要提醒讀者的地方是，做除數運算是有可能會出現 Division by zero 的情況，所以我們要發出 runtime error 。
129 | 
130 | Logical 跟 Arithmetic 的處理方法很相似，這裡就不著墨了。現在到最後要寫 `branch` 和 `jump` 了，要實現這個功能，我們要改變一下程式執行的方法，記得我們的 `run` method 嗎？我們的 `run` method 是用 `i` 來做 loop counter 的，現在要改變一下了，要用 program counter 取代 i，這樣我們才可以在其他方法中改變運行次序。
131 | 
132 | 在 `label ()` 中，我們要把想定義的 label 和 program counter 的數值放到 map 裡，這樣我們才可以在後面的程式設定要跳到哪一個位置。現在看看我們的 bne ：
133 | 
134 | ```js
135 | Wemachine.prototype.bne = function (operand1, operand2, operand3) {
136 |     var nextPC = this.labelMap[operand3];
137 |     if (nextPC == null) {
138 |         Errors.push({
139 |             type: Errors.RUNTIME_ERROR,
140 |             msg: "Label not found",
141 |             line: 0
142 |         });
143 |     } else {
144 |         var val1 = this.getRegisterContent(operand1);
145 |         var val2 = this.getRegisterContent(operand2);
146 |         if (val1 != val2) {
147 |             this.pc = nextPC;
148 |         }
149 |     }
150 | }
151 | ```
152 | 
153 | 首先要在 map 中找找有沒有相關的 label ，沒有的話就要發出錯誤，有的話就要看看條件是否成立，是的話就要把 program counter 設定為要跳到的位置，這樣程式在下次 loop 的時候才可以跳到 label 那處。現在看看程式運行結果吧！
154 | 
155 | 現在把剩下來的 `branch` 都寫下來。
156 | 
157 | 大功告成，怎麼樣，如西傑之前所說，這部份不是很難吧，如果大家曾經學過 assembly language 的話應該更容易上手！現在有了這個簡單的虛擬機，下一步我們就可以把之前建立的 parse tree 變成可以在這個 Wemachine 運行的代碼了，下個星期再見吧。
158 | 


--------------------------------------------------------------------------------
/docs/6-1.md:
--------------------------------------------------------------------------------
  1 | # 六、編譯器（Compiler）﹣生成代碼（Code Generation）（上）
  2 | 
  3 | 終於來到最重要一步了，我們要把之前建立好的 parse tree 變成可以被 Wemachine 運行的代碼，有了編譯器才是真正的 compiler！在這一步，我們還是要用那個老技巧，即 mutual recursion，來遍歷我們的 parse tree，並輸出相應的代碼。事不宜遲，現在就開始了。
  4 | 
  5 | 首先， Copy and paste 一下，把 Analyser 的 method 複製一次，並且把所有屬於 Analyser 做的工作都移除，只留下 `evaluate*` 的語句，以保留 mutual recursion 來遍歷我們的 parse tree。然後我們需要建立一個安排 register 的機制，由於我們的 Wemachine 有無限個 register 可以用，所以我們只需要一直取新的 register 來用就可以，而不用重用已被使用的 register（實際的情況當然不會有無限的 register，這需要利用一些技巧才可以）。
  6 | 
  7 | ```js
  8 | function Compiler() {
  9 |     this.lineBreak = "\n";
 10 |     this.register = 0;
 11 | }
 12 | Compiler.prototype.getMachineCode = function (expressionBlockNode) {
 13 |     this.code = "";
 14 |     this.evaluateExpressionBlockNode(expressionBlockNode);
 15 |     return this.code;
 16 | }
 17 | Compiler.prototype.getNextRegister = function () {
 18 |     return "$" + this.register++;
 19 | }
 20 | Compiler.prototype.writeln = function (code) {
 21 |     this.code += code + this.lineBreak;
 22 | }
 23 | ```
 24 | 
 25 | 這就是我們 compiler 的基本功能了。現在先來處理最簡單的語句吧，第一樣要處理的是純數字，遇到一個數字的時候，我們只需簡單地把數字放到 register 中，並把 register 名稱 return 出去就可以了。
 26 | 
 27 | ```js
 28 | Compiler.prototype.evaluateIntNode = function (node) {
 29 |     var register = this.getNextRegister();
 30 |     this.writeln("lwi " + register + ", " + node.data + ";");
 31 |     return register;
 32 | }
 33 | ```
 34 | 
 35 | 為什麼要返回 register 名稱呢？因為我們後面會用到它來運算，例如加減乘除等等的運算，我們需要兩個 register
 36 | 處理完數字之後，我們要處理其中一個最複雜的東西，就是 compound node 了。其實說它複雜也不是真的很複雜，跟我們的 Analyser 做的東西差不多，只是今次要用數個 register 來做。來，看看代碼：
 37 | 
 38 | ```js
 39 | Compiler.prototype.evaluateCompoundNode = function (node) {
 40 |     var operator = null;
 41 |     var resultRegister;
 42 |     for (var i = 0, l = node.nodes.length; i < l; i++) {
 43 |         var subNode = node.nodes[i];
 44 |         if (subNode instanceof OperatorNode) {
 45 |             operator = subNode;
 46 |         } else {
 47 |             if (resultRegister == null) {
 48 |                 resultRegister = this.getNextRegister();
 49 |                 this.writeln("move " + resultRegister + "," +this.evaluateExpressionNode(subNode) + ";");
 50 |             } else {
 51 |                 var currRegister = this.evaluateExpressionNode(subNode);
 52 |                 if (operator instanceof OperatorPlusNode) {
 53 |                     this.writeln("add " + resultRegister + "," +resultRegister + "," + currRegister + ";");
 54 |                 }
 55 |             }
 56 |         }
 57 |     }
 58 |     return resultRegister;
 59 | }
 60 | ```
 61 | 
 62 | 這段程式在做什麼呢？這段程式有一個 for loop ， loop 裡面做的跟 Analyser 差不多，就是讀一個 node 進來，是運算元的話就 evaluate 一下，並把結果放到另一個 register 中，然後再讀一個運算符 node ，把兩邊的數值進行一下運算，再返回結果，這就完成了 compound node 的基本流程了。
 63 | 
 64 | 最後再寫一下 print，以便我們之後 debug ， print 要做的事很簡單，就是直接輸出 print 指令，以列印出 register 裡的內容。
 65 | 
 66 | ```js
 67 | Compiler.prototype.evaluatePrintNode = function (node) {
 68 |     var register = this.evaluateExpressionNode(node.expressionNode);
 69 |     this.writeln("print " + register + ";");
 70 | }
 71 | ```
 72 | 
 73 | 現在看看運行結果吧：
 74 | 
 75 | 第一步成功了，現在就把剩餘的運算符都編寫出來吧。加減乘除及取餘數的做法都差不多，但相等的比較就很不同了，要記住， instruction set 提供的指令通常都很有限，很多時我們都要模擬一些指令，例如 `“==”` ，我們的 instruction set 裡只指供 beq ，那麼我們只能靠它來模擬這個比較。又，我們的 Wemachine 不能跳到未曾運行到的 label ，所以我們在 compile 的時候也要考慮目標機器運行的特性。現在看看 `“==”` 是如何模擬的：
 76 | 
 77 | ```js
 78 | var doneLbl = this.getNextLabel();
 79 | var falseLbl = this.getNextLabel();
 80 | var trueLbl = this.getNextLabel();
 81 | var doneRegister = this.getNextRegister();
 82 | var tempResultRegister = this.getNextRegister();
 83 | var aRegister = this.getNextRegister();
 84 | var bRegister = this.getNextRegister();
 85 | this.writeln("move " + aRegister + "," + resultRegister + ";");
 86 | this.writeln("move " + bRegister + "," + currRegister + ";");
 87 | this.writeln("lwi " + doneRegister + ",0;");
 88 | this.writeln("label " + doneLbl + ";");
 89 | this.writeln("label " + trueLbl + ";");
 90 | this.writeln("add " + aRegister + "," + aRegister + "," + doneRegister +";");
 91 | this.writeln("lwi " + tempResultRegister + ",1;");
 92 | this.writeln("beq " + doneRegister + "," + this.trueRegister + "," +doneLbl + ";");
 93 | this.writeln("label " + falseLbl + ";");
 94 | this.writeln("lwi " + tempResultRegister + ",0;");
 95 | this.writeln("beq " + doneRegister + "," + this.trueRegister + "," +doneLbl + ";");
 96 | this.writeln("label " + doneLbl + ";");
 97 | this.writeln("lwi " + doneRegister + ",1;");
 98 | this.writeln("beq " + aRegister + "," + bRegister + "," + trueLbl + ";");
 99 | this.writeln("move " + resultRegister + "," + tempResultRegister + ";");
100 | ```
101 | 
102 | 比你想像中的要複雜得多吧？在有限的資源下，我們只能這樣做。這段代碼在做什麼呢？首先我們要把準備比較的項目抄到兩個新的 register 中，以防止改變了原本的數值。然後，我們設定一個 `doneRegister` 來表示我們的比較到底做完了沒有，然後是設定兩個 label ，一個叫 `doneLbl` ，這並不是真的用來跳轉的 label，而只是防止 runtime error 而事先定義的 label，稍後我們仍然會再重新定義它的位置。
103 | 
104 | 在 `trueLbl` 下要做三件事，第一是把 `aRegister` 的數值加上 `doneRegister` 的數值，第一次執行 `doneRegister` 的數值是 0 ，所以不會改變 `aRegister` ，下一次執行 `doneRegister` 的數值就是 1 ，這就會改變 `aRegister` ，這有助我們離開這段比較程式。然後就是真正的設定結果為 1（亦即 true），再接下來的是看看 `doneRegister` 是不是 1 ，是的話就直接跳出設定 `true/false` 的程式，否則繼續初始化比較程式。 `falseLbl` 下做的事都差不多，不詳述。
105 | 
106 | 到 `doneLbl` 了，它要做的事是設定 `doneRegister` 的數值為 1 ，並比較 `aRegister` 和 `bRegister` 的數值，相等就跳到 `trueLbl` 以設定結果為 1 ，而這次執行 `trueLbl` 就和初始化時有所不同了，因為這時 `doneRegister` 的數值是 1 ，所以會令 `aRegister` 的數值有所改變，那麼下次再比較 `aRegister` 和 `bRegister` 時結果就會不同，這就可以避免 infinite loop。最後就是把 `tempResultRegister` 中的東西抄到 `resultRegister` 中，那麼整個比較過程就完了。
107 | 
108 | 編寫不等比較和 `and/or` 都跟之前使用的技巧很相似，這裡就不詳述了，現在就欠 assign node 未做，但做這個之前我們先要處理變數。變數的處理不難，只要有個 hash map 記下變數的數值儲在哪個 register 中就可以了，以後使用這個變數就用同一個 register 。
109 | 
110 | ```js
111 | Compiler.prototype.evaluateVariableNode = function (node) {
112 |     var init = null;
113 |     if (node.initExpressionNode) {
114 |         init = this.evaluateExpressionNode(node.initExpressionNode);
115 |     }
116 |     var reg = this.getNextRegister();
117 |     this.varMap[node.varName] = reg;
118 |     this.writeln("move " + reg + "," + init + ";");
119 | }
120 | Compiler.prototype.evaluateIdentifierNode = function (node) {
121 |     var reg = this.varMap[node.identifier];
122 |     return reg;
123 | }
124 | ```
125 | 
126 | 當然，如果變數有 initialise block 的話我們也要把數值抄到變數的 register 中。
127 | 
128 | 好了，一切正常。把 assign node 也處理掉：
129 | 
130 | ```js
131 | if (operator instanceof OperatorAssignNode) {
132 |     this.writeln("move " + this.evaluateIdentifierNode(operand) + "," +currRegister + ";");
133 | } else if (operator instanceof OperatorPlusAssignNode) {
134 |     var reg = this.evaluateIdentifierNode(operand);
135 |     this.writeln("add " + reg + "," + reg + "," + currRegister + ";");
136 | } else if (operator instanceof OperatorMinusAssignNode) {
137 |     var reg = this.evaluateIdentifierNode(operand);
138 |     this.writeln("sub " + reg + "," + reg + "," + currRegister + ";");
139 | }
140 | ```
141 | 
142 | 這部份應該沒有什麼懸念，只是新加了一個叫做 `operand` 的變數，是用來儲存要 assign 到的變數，以便要真正 assign 時可以用來 resolve 正確的 register。再看一下結果：
143 | 
144 | 接下來要編寫的是一堆 unary operator ，先看看代碼：
145 | 
146 | ```js
147 | Compiler.prototype.evaluateNegateNode = function (node) {
148 |     var reg = this.evaluateExpressionNode(node.node);
149 |     this.writeln("multi " + reg + "," + reg + ",-1;");
150 |     return reg;
151 | }
152 | Compiler.prototype.evaluateNotNode = function (node) {
153 |     var reg = this.evaluateExpressionNode(node.node);
154 |     this.writeln("addi " + reg + "," + reg + ",1;");
155 |     this.writeln("modi " + reg + "," + reg + ",2;");
156 |     return reg;
157 | }
158 | Compiler.prototype.evaluateParenNode = function (node) {
159 |     return this.evaluateExpressionNode(node.node);
160 | }
161 | Compiler.prototype.evaluatePostIncrementNode = function (node) {
162 |     var reg = this.evaluateExpressionNode(node.node);
163 |     this.writelnToBuffer("addi " + reg + "," + reg + ",1;");
164 |     return reg;
165 | }
166 | Compiler.prototype.evaluatePreIncrementNode = function (node) {
167 |     var reg = this.evaluateExpressionNode(node.node);
168 |     this.writeln("addi " + reg + "," + reg + ",1;");
169 |     return reg;
170 | }
171 | Compiler.prototype.evaluatePostDecrementNode = function (node) {
172 |     var reg = this.evaluateExpressionNode(node.node);
173 |     this.writelnToBuffer("subi " + reg + "," + reg + ",1;");
174 |     return reg;
175 | }
176 | Compiler.prototype.evaluatePreDecrementNode = function (node) {
177 |     var reg = this.evaluateExpressionNode(node.node);
178 |     this.writeln("subi " + reg + "," + reg + ",1;");
179 |     return reg;
180 | }
181 | ```
182 | 
183 | 大抵上都很直觀，這裡只抽兩個比較特別的來說。第一個是 `not` ， `not` 的意思就是 `true` 變 `false` ， `false` 變 `true` ，由於 Wemachine 沒有提供指令，我們只能模擬，就是一條很簡單的數學： `a = (a + 1) % 2` ，那就可以 1 變 0 ， 0 變 1 了。第二個是 post increment ，post increment 的意思是先執行整句 expression ，執行完就把變數 + 1 ，這個跟我們之前的做法都不一樣，我們需要把指令寫到 buffer 中，等到 expression 執行完才把 buffer 的東西抄到 output stream 去。
184 | 
185 | 最後看一看結果：
186 | 
187 | 現在只剩下 `if` 和 `while` 未編譯，這兩個是比較特別的，因為這需要改動 Wemachine 的 label 機制才可以完美編譯，所以留待下一節才討論如何編譯。大家現在先摸熟以上的教學吧！


--------------------------------------------------------------------------------
/docs/6-2.md:
--------------------------------------------------------------------------------
  1 | # 六、編譯器（Compiler）﹣生成代碼（Code Generation）（下）
  2 | 
  3 | 上回提要：我們開始著手編寫編譯部份，從那棵 Parse tree 生成代碼，做法跟之前的 Analyser 差不多，都是用 mutual recursion 來遍歷 Parse tree 。上回我們已經寫好了大部份的編譯，只剩下兩款 expression 未寫好，即 `if` 和 `while` ，本節就是要把這兩款 expression 都寫出來。
  4 | 
  5 | 還記得我們 Wemachine 是如何做 label 的嗎？我們的 Wemachine 只能跳到程式上面已經定義好的 label ，如果要跳轉到的 label 之前還未被定義的話就會當是 runtime error，但問題是，我們要做 `if-else` 時，如果條件式不成立的話就要跳到 else block ，那可能是十丈遠，不可以跟之前做「相等比較」一樣先執行一次所有程式碼再判斷，因此，我們必須要有一個支援跳到在後面定義的 label 的 Wemachine ，所以第一步我們就要改寫 Wemachine 了。
  6 | 
  7 | 西傑將會新增一個指令，叫做 `vlabel` ，定義方法跟 `label` 一樣，只是跳轉的時候，我們要在 `label` 前面加上 `“_”` ，以標示要跳轉的是 `vlabel` 而不是 `label` 。另外，跟 `label` 一樣，我們要定義一個 hash map 以記下 program counter ，但這次不同的是，我們要在程式開始之前就建立好這個 hash map ，或則我們就不能跳轉到未被定義的 label 了。因此，我們最好就是在一開始建立 instruction 列表時就順便建立這個 hash map 。
  8 | 
  9 | ```js
 10 | function Wemachine(code) {
 11 |     //simple parser
 12 |     this.vlabelMap = {};
 13 |     var instructions = code.split(";");
 14 |     var processedInstructions = [];
 15 |     for (var i = 0, l = instructions.length; i < l; i++) {
 16 |         var instruction = trim(instructions[i]);
 17 |         if (instruction == "") {
 18 |             continue;
 19 |         }
 20 |         var insObj = {};
 21 |         var opcode = "";
 22 |         for (var j = 0, k = instruction.length; j < k; j++) {
 23 |             if (instruction[j] == " ") {
 24 |                 instruction = instruction.substr(j);
 25 |                 break;
 26 |             } else {
 27 |                 opcode += instruction[j];
 28 |             }
 29 |         }
 30 |         insObj.opcode = opcode;
 31 |         insObj.operands = [];
 32 |         var operands = instruction.split(",");
 33 |         for (var j = 0, k = operands.length; j < k; j++) {
 34 |             insObj.operands.push(trim(operands[j]));
 35 |         }
 36 |         if (insObj.opcode == "vlabel") {
 37 |             this.vlabelMap[insObj.operands[0]] = i;
 38 |         }
 39 |         processedInstructions.push(insObj);
 40 |     }
 41 |     this.instructions = processedInstructions;
 42 |     this.registers = [];
 43 |     this.pc = 0;
 44 |     this.labelMap = {};
 45 | }
 46 | ```
 47 | 
 48 | 看看新增了的那段代碼，如果我們讀到了 `vlabel` 的指令，我們就會記下 program counter ，就是這樣簡單了。另外，現在跳轉的方法有所改變，即要加上 `“_”` 來跳轉到 vlabel，所以我們最好也把跳轉的代碼抽出來。
 49 | 
 50 | ```js
 51 | Wemachine.prototype.easyJump = function (lbl) {
 52 |     var nextPC;// = this.labelMap[lbl];
 53 |     if (lbl.substr(0, 1) == "_") {
 54 |         //it is vlabel
 55 |         var realLbl = lbl.substr(1);
 56 |         nextPC = this.vlabelMap[realLbl];
 57 |     } else {
 58 |         nextPC = this.labelMap[lbl];
 59 |     }
 60 |     if (nextPC == null) {
 61 |         Errors.push({
 62 |             type: Errors.RUNTIME_ERROR,
 63 |             msg: "Label not found",
 64 |             line: 0
 65 |         });
 66 |     } else {
 67 |         this.pc = nextPC;
 68 |     }
 69 | }
 70 | ```
 71 | 
 72 | 最後要改寫一下那些跳轉程式的寫法，把它們改成使用 `easyJump` 來跳轉，這裡就用 `beq` 來做例子。
 73 | 
 74 | ```js
 75 | Wemachine.prototype.beq = function (operand1, operand2, operand3) {
 76 |     var val1 = this.getRegisterContent(operand1);
 77 |     var val2 = this.getRegisterContent(operand2);
 78 |     if (val1 == val2) {
 79 |         this.easyJump(operand3);
 80 |     }
 81 | }
 82 | ```
 83 |  
 84 | 比之前簡潔了很多吧，其他跳轉的做法也差不多，這裡不詳述了。
 85 | 
 86 | 好了，改寫了 Wemachine，我們改寫了未來，現在要做的事就是要生成 `if` 和 `while` 的 Wemachine code 。
 87 | 
 88 | ## if
 89 | 
 90 | ```js
 91 | Compiler.prototype.evaluateIfNode = function (node) {
 92 |     var condReg = this.evaluateExpressionNode(node.conditionExpression);
 93 |     var elseLbl = this.getNextLabel();
 94 |     var endLbl = this.getNextLabel();
 95 |     this.writeln("beq " + condReg + "," + this.falseRegister + "," + "_" + elseLbl + ";");
 96 |     this.evaluateExpressionBlockNode(node.expressions);
 97 |     this.writeln("j _" + endLbl + ";");
 98 |     this.writeln("vlabel " + elseLbl + ";");
 99 |     this.evaluateExpressionBlockNode(node.elseExpressions);
100 |     this.writeln("vlabel " + endLbl + ";");
101 | }
102 | ```
103 |  
104 | `if` 要做什麼呢？首先就是要做一個判斷，看看條件式是否不成立，不成立的話就要跳到 `else` 的位置，成立的話只需直接執行落去就可以了。但要緊記，不能一直執行下去，當完成執行時就要跳到 `else` block 之後，不然條件式成立與否 `else` block 都會運行。 `if` 就是這樣了，不太難吧，不暪你說，其實 `while` 更容易！
105 | 
106 | ## while
107 | 
108 | ```js
109 | Compiler.prototype.evaluateWhileNode = function (node) {
110 |     var whileLbl = this.getNextLabel();
111 |     var endLbl = this.getNextLabel();
112 |     this.writeln("vlabel " + whileLbl + ";");
113 |     var condReg = this.evaluateExpressionNode(node.conditionExpression);
114 |     this.writeln("beq " + condReg + "," + this.falseRegister + "," + "_" + endLbl + ";");
115 |     this.evaluateExpressionBlockNode(node.expressions);
116 |     this.writeln("j _" + whileLbl + ";");
117 |     this.writeln("vlabel " + endLbl + ";");
118 | }
119 | ```
120 |  
121 | 又是判斷條件式，如果不成立的話就直接跳到 `end` label 結束 `while` 迴圈，不然就繼續執行，並且每次執行完都跳到最頂再做一次條件式判斷。很容易吧！運行一下看看運作是否正常。
122 | 
123 | 運作正常，那就大功告成啦！
124 | 
125 | 本章就此完結了， Wescript compiler 也可以算是完成了，從讀取 Wescript 到建立 parse tree，再到本章生成代碼，一路走來用的技巧都是 mutual recursion ，可見其重要性，大家要好好掌握這個技巧，那麼大家編寫自己的 compiler 時也能得心應手～
126 | 


--------------------------------------------------------------------------------
/docs/7.md:
--------------------------------------------------------------------------------
  1 | # 七、優化器（Optimizer）﹣還可以更好
  2 | 
  3 | 大家好，又見到西杰了。我們之前已經做好了一個簡單的編譯器，可以把 Wescript 編譯成 Wemachine 讀得到的 Wemachine code ，理論上編譯器教程也可以算完成了，但世事並不是這麼簡單的，我們雖然已經編譯到一個可以運行的程式，但在這個時間就是金錢的世界中，我們必須爭取每一分每一秒，把程式縮到最精簡，這就是我們這一章要做的工作了。
  4 | 
  5 | 要做優化，我們可以從兩個層面著手，即代碼層面及指令碼層面。例如，我們可以移除一些沒有用過的變數，以減少記憶體的使用，這就屬於代碼層面的優化。又或者我們可以研究程式使用過的 register 並把沒有用的都移除或減少使用，以減少程式的代碼量及加快運行速度。
  6 | 
  7 | 在實際應用環境中，有數之不盡那麼多種不同的優化技巧，所以在這章教學中，我們會挑選兩種來討論，第一種是移除沒有用過的變數，第二種是 Loop inversion 。現在就由第一種開始吧！
  8 | 
  9 | ## 移除沒用的變數
 10 | 
 11 | 首先我們要改寫一下我們的 Analyser，我們要記下哪些變數曾經被使用，那麼我們才可以在生成代碼時避免生成那些未曾使用的變數。
 12 | 
 13 | ```js
 14 | function Analyser() {
 15 |     this.vars = {};
 16 |     //added for optimization use
 17 |     this.unusedVars = {};
 18 | }
 19 | ```
 20 | 
 21 | 另外，在 evaluateVariableNode 時，我們需要記下這個變數未被使用，然後在以後的代碼中當這個變數被使用之時我們就要把它從未被使用的 hash map 中剔除掉。
 22 | 
 23 | ```js
 24 | Analyser.prototype.evaluateVariableNode = function (node) {
 25 |     if (this.vars[node.varName]) {
 26 |         //this variable has been declared before
 27 |         //since we can find it in our variable table
 28 |         Errors.push({
 29 |             type: Errors.SEMANTIC_ERROR,
 30 |             msg: "The variable \"" + node.varName + "\" has been declared already",
 31 |             line: node.line
 32 |         });
 33 |     } else {
 34 |         this.vars[node.varName] = node;
 35 |         //if we do not use "else", this variable declaration will replace the previous one
 36 |         //This may result in wrong data type checking later on
 37 |  
 38 |         //it is not used at the moment of declaration
 39 |         this.unusedVars[node.varName] = true;
 40 |     }
 41 |  
 42 |     ...
 43 | }
 44 | ```
 45 | 
 46 | 在使用變數時把它剔出未被使用的名單：
 47 | 
 48 | ```js
 49 | Analyser.prototype.evaluateIdentifierNode = function (node) {
 50 |     if (! this.vars[node.identifier]) {
 51 |         Errors.push({
 52 |             type: Errors.SEMANTIC_ERROR,
 53 |             msg: "Variable \"" + node.identifier + "\" must be declared before using",
 54 |             line: node.line
 55 |         });
 56 |     } else {
 57 |         this.unusedVars[node.identifier] = false;
 58 |         node.valueType = this.vars[node.identifier].valueType;
 59 |     }
 60 | }
 61 | ```
 62 | 
 63 | 最後就得出這個樣子了。
 64 | 
 65 | ```js
 66 | var a:int = 3;
 67 | var b:bool = false;
 68 | 		
 69 | print a;
 70 | ```
 71 | 
 72 | ```js
 73 | lwi $0,1;
 74 | lwi $1,0;
 75 | lwi $3,3;
 76 | move $2,$3;
 77 | move $4,$2;
 78 | lwi $6,0;
 79 | move $5,$6;
 80 | move $7,$4;
 81 | print $7;
 82 | 3
 83 | ```
 84 | 
 85 | ## Loop inversion
 86 | 
 87 | 什麼是 Loop inversion 呢？就是把一個 while-loop 轉換成為一個 if 加一個 do-while-loop ，為什麼要這樣做呢？ branch 和 jump 一般來說都是消費很大（即用很多時間）的指令，而這個做法就可以幫助我們節省一個 branch ，那就可以幫助我們加快程式運行速度了。
 88 | 
 89 | 先看看這個 while-loop ：
 90 | 
 91 | ```js
 92 | var i:int = 3;
 93 |  
 94 | while (i != 0) {
 95 |     i--;
 96 | }
 97 | ```
 98 | 
 99 | 我們要把它改寫成類似這樣的代碼：
100 | 
101 | ```js
102 | var i:int = 3;
103 |  
104 | if (i != 0) {
105 |     do {
106 |         i--;
107 |     } while (i != 0);
108 | }
109 | ```
110 | 
111 | 這個真的能夠幫助我們節省使用 branch 嗎？待會看看你就會明白了，現在先看一下我們如何改寫現在 while loop 生成代碼的寫法。為了更好地模擬現實的機器，我們第一步要做的是在跳轉的時候增加一些睡眠時間 ，因為在現實世界中的機器處理跳轉的時間都比較長，這亦是為什麼這個優化的技巧有用武之地。
112 | 
113 | 在 easyJump 之中加入以下的代碼：
114 | 
115 | ```js
116 | //sleep for 10 ms
117 | var t = + new Date;
118 | while ((+new Date) - t < 10) {}
119 | ```
120 | 
121 | 讓程式在跳轉時睡 10 個微秒。
122 | 
123 | 先看看現在 while-loop 生成的代碼執行三十次要用多少時間：
124 | 
125 | ```js
126 | var i:int = 3;
127 | 
128 | while (i != 0) {
129 | 	i--;
130 | }
131 | ```
132 | 
133 | ```js
134 | lwi $0,1;
135 | lwi $1,0;
136 | lwi $3,3;
137 | move $2,$3;
138 | move $4,$2;
139 | vlabel lbl0;
140 | move $5,$4;
141 | lwi $7,0;
142 | move $6,$7;
143 | move $10,$5;
144 | move $11,$6;
145 | lwi $8,0;
146 | label lbl2;
147 | label lbl4;
148 | add $10,$10,$8;
149 | lwi $9,0;
150 | beq $8,$0,lbl2;
151 | label lbl3;
152 | lwi $9,1;
153 | beq $8,$0,lbl2;
154 | label lbl2;
155 | lwi $8,1;
156 | beq $10,$11,lbl4;
157 | move $5,$9;
158 | beq $5,$1,_lbl1;
159 | move $12,$4;
160 | subi $4,$4,1;
161 | j _lbl0;
162 | vlabel lbl1;
163 | ```
164 | 
165 | 接著我們就要改寫 while-loop 生成的代碼了。我們一開始要先檢查一下條件是否成立，是的話才執行 do-while loop 的內部程式，在執行完內部程式碼之後就要檢查一下條件式是否成立，是的話就跳到內部程式碼的頂端再執行一次，直至條件式不成立為止。
166 | 
167 | ```js
168 | Compiler.prototype.evaluateWhileNode = function (node) {
169 |     var whileLbl = this.getNextLabel();
170 |     var endLbl = this.getNextLabel();
171 |  
172 |     var condReg = this.evaluateExpressionNode(node.conditionExpression);
173 |     this.writeln("beq " + condReg + "," + this.falseRegister + "," + "_" + endLbl + ";");
174 |  
175 |     this.writeln("vlabel " + whileLbl + ";");
176 |     this.evaluateExpressionBlockNode(node.expressions);
177 |  
178 |     condReg = this.evaluateExpressionNode(node.conditionExpression);
179 |     this.writeln("beq " + condReg + "," + this.trueRegister + "," + "_" + whileLbl + ";");
180 |     this.writeln("vlabel " + endLbl + ";");
181 | }
182 | ```
183 | 
184 | ```js
185 | var i:int = 3;
186 | 
187 | while (i != 0) {
188 |     i--;
189 | }
190 | ```
191 | 
192 | ```js
193 | lwi $0,1;
194 | lwi $1,0;
195 | lwi $3,3;
196 | move $2,$3;
197 | move $4,$2;
198 | move $5,$4;
199 | lwi $7,0;
200 | move $6,$7;
201 | move $10,$5;
202 | move $11,$6;
203 | lwi $8,0;
204 | label lbl2;
205 | label lbl4;
206 | add $10,$10,$8;
207 | lwi $9,0;
208 | beq $8,$0,lbl2;
209 | label lbl3;
210 | lwi $9,1;
211 | beq $8,$0,lbl2;
212 | label lbl2;
213 | lwi $8,1;
214 | beq $10,$11,lbl4;
215 | move $5,$9;
216 | beq $5,$1,_lbl1;
217 | vlabel lbl0;
218 | move $12,$4;
219 | subi $4,$4,1;
220 | move $13,$4;
221 | lwi $15,0;
222 | move $14,$15;
223 | move $18,$13;
224 | move $19,$14;
225 | lwi $16,0;
226 | label lbl5;
227 | label lbl7;
228 | add $18,$18,$16;
229 | lwi $17,0;
230 | beq $16,$0,lbl5;
231 | label lbl6;
232 | lwi $17,1;
233 | beq $16,$0,lbl5;
234 | label lbl5;
235 | lwi $16,1;
236 | beq $18,$19,lbl7;
237 | move $13,$17;
238 | beq $13,$0,_lbl0;
239 | vlabel lbl1;
240 | ```
241 | 
242 | 這一章說的優化技巧其實只是很皮毛而已，如果你還想更深入地探討其他技巧，可以到維基看看，那裏列出了很多種不同的優化技巧，相信要研究都要花一段很長的時間了。
243 | 
244 | 編譯器的教學也來到尾聲了，大家在當中學到了多少東西呢？現在就到你們出手了，把你們一直想做的編譯器做出來吧！
245 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # 深入淺出教你寫編譯器（Compiler）
 2 | 
 3 | (by Dukeland)
 4 | 
 5 | 本書內容取材自 Dukeland ，是一位香港 IT 人西杰所撰寫的；然而因為網路上版本眾多，且原網站已經關閉，以下連結皆已遺失 (目前改為連結 [web.archive.org](https://web.archive.org/) 的留存版本) ，故重新製作本書已留存。
 6 | 
 7 | * [http://dukeland.hk/2012/03/26/a-simple-tutorial-of-writing-a-compiler-part-1-introduction/](https://web.archive.org/web/20140720055336/http://dukeland.hk/2012/03/26/a-simple-tutorial-of-writing-a-compiler-part-1-introduction/)
 8 | * [http://dukeland.hk/2012/03/29/a-simple-tutorial-of-writing-a-compiler-part-2-scanner-lexical-analysis-section-1/](https://web.archive.org/web/20140205044843/http://dukeland.hk/2012/03/29/a-simple-tutorial-of-writing-a-compiler-part-2-scanner-lexical-analysis-section-1/)
 9 | * [http://dukeland.hk/2012/03/30/a-simple-tutorial-of-writing-a-compiler-part-2-scanner-lexical-analysis-section-2/](https://web.archive.org/web/20131002175643/http://dukeland.hk/2012/03/30/a-simple-tutorial-of-writing-a-compiler-part-2-scanner-lexical-analysis-section-2/)
10 | * [http://dukeland.hk/2012/04/05/a-simple-tutorial-of-writing-a-compiler-part-3-parser-syntactic-analysis-section-1/](https://web.archive.org/web/20140720055341/http://dukeland.hk/2012/04/05/a-simple-tutorial-of-writing-a-compiler-part-3-parser-syntactic-analysis-section-1/)
11 | * [http://dukeland.hk/2012/04/06/a-simple-tutorial-of-writing-a-compiler-part-3-parser-syntactic-analysis-section-2/](https://web.archive.org/web/20140222100924/http://dukeland.hk/2012/04/06/a-simple-tutorial-of-writing-a-compiler-part-3-parser-syntactic-analysis-section-2/)
12 | * [http://dukeland.hk/2012/04/12/a-simple-tutorial-of-writing-a-compiler-part-3-parser-syntactic-analysis-section-3/](https://web.archive.org/web/20131002175653/http://dukeland.hk/2012/04/12/a-simple-tutorial-of-writing-a-compiler-part-3-parser-syntactic-analysis-section-3/)
13 | * [http://dukeland.hk/2012/04/19/a-simple-tutorial-of-writing-a-compiler-part-4-semantic-analysis/](https://web.archive.org/web/20140228223242/http://dukeland.hk/2012/04/19/a-simple-tutorial-of-writing-a-compiler-part-4-semantic-analysis/)
14 | * [http://dukeland.hk/2012/04/26/a-simple-tutorial-of-writing-a-compiler-part-5-virtual-machine/](https://web.archive.org/web/20131002175658/http://dukeland.hk/2012/04/26/a-simple-tutorial-of-writing-a-compiler-part-5-virtual-machine/)
15 | * [http://dukeland.hk/2012/05/03/a-simple-tutorial-of-writing-a-compiler-part-6-compiler-code-generation-section-1/](https://web.archive.org/web/20131002165607/http://dukeland.hk/2012/05/03/a-simple-tutorial-of-writing-a-compiler-part-6-compiler-code-generation-section-1/)
16 | * [http://dukeland.hk/2012/05/10/a-simple-tutorial-of-writing-a-compiler-part-6-compiler-code-generation-section-2/](https://web.archive.org/web/20131002175703/http://dukeland.hk/2012/05/10/a-simple-tutorial-of-writing-a-compiler-part-6-compiler-code-generation-section-2/)
17 | * [http://dukeland.hk/2012/05/17/a-simple-tutorial-of-writing-a-compiler-part-7-optimizer-optimization/](https://web.archive.org/web/20131002175717/http://dukeland.hk/2012/05/17/a-simple-tutorial-of-writing-a-compiler-part-7-optimizer-optimization/)
18 | 


--------------------------------------------------------------------------------
/docs/SUMMARY.md:
--------------------------------------------------------------------------------
 1 | # 深入淺出教你寫編譯器（Compiler）
 2 | 
 3 | * [前言](README.md)
 4 | * [簡介](1.md)
 5 | * [掃瞄器（Scanner）﹣詞法分析（Lexical analysis）（上）](2-1.md)
 6 | * [掃瞄器（Scanner）﹣詞法分析（Lexical analysis）（下）](2-2.md)
 7 | * [語法分析器（Parser）﹣語法分析（Syntactic analysis）（上）](3-1.md)
 8 | * [語法分析器（Parser）﹣語法分析（Syntactic analysis）（中）](3-2.md)
 9 | * [語法分析器（Parser）﹣語法分析（Syntactic analysis）（下）](3-3.md)
10 | * [語意分析（Semantic analysis）](4.md)
11 | * [虛擬機（Virtual Machine）](5.md)
12 | * [編譯器（Compiler）﹣生成代碼（Code Generation）（上）](6-1.md)
13 | * [編譯器（Compiler）﹣生成代碼（Code Generation）（下）](6-2.md)
14 | * [優化器（Optimizer）﹣還可以更好](7.md)
15 | 


--------------------------------------------------------------------------------
/docs/book.json:
--------------------------------------------------------------------------------
1 | {
2 |     "title": "深入淺出教你寫編譯器（Compiler）"
3 | }
4 | 


--------------------------------------------------------------------------------
/docs/images/2-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaceju/simple-compiler/40968086ec777afa8c211a456c478ce7b4ed4956/docs/images/2-1-1.png


--------------------------------------------------------------------------------
/docs/images/2-1-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaceju/simple-compiler/40968086ec777afa8c211a456c478ce7b4ed4956/docs/images/2-1-2.png


--------------------------------------------------------------------------------
/docs/images/2-1-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaceju/simple-compiler/40968086ec777afa8c211a456c478ce7b4ed4956/docs/images/2-1-3.png


--------------------------------------------------------------------------------
/docs/images/3-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaceju/simple-compiler/40968086ec777afa8c211a456c478ce7b4ed4956/docs/images/3-1-1.png


--------------------------------------------------------------------------------
/docs/images/3-1-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaceju/simple-compiler/40968086ec777afa8c211a456c478ce7b4ed4956/docs/images/3-1-2.png


--------------------------------------------------------------------------------
/docs/images/3-1-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaceju/simple-compiler/40968086ec777afa8c211a456c478ce7b4ed4956/docs/images/3-1-3.png


--------------------------------------------------------------------------------
/docs/images/3-1-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaceju/simple-compiler/40968086ec777afa8c211a456c478ce7b4ed4956/docs/images/3-1-4.png


--------------------------------------------------------------------------------
/docs/images/3-1-5.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaceju/simple-compiler/40968086ec777afa8c211a456c478ce7b4ed4956/docs/images/3-1-5.gif


--------------------------------------------------------------------------------
/docs/images/3-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaceju/simple-compiler/40968086ec777afa8c211a456c478ce7b4ed4956/docs/images/3-2-1.png


--------------------------------------------------------------------------------
/docs/images/3-2-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaceju/simple-compiler/40968086ec777afa8c211a456c478ce7b4ed4956/docs/images/3-2-2.png


--------------------------------------------------------------------------------
/docs/images/3-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaceju/simple-compiler/40968086ec777afa8c211a456c478ce7b4ed4956/docs/images/3-3-1.png


--------------------------------------------------------------------------------
/docs/images/3-3-10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaceju/simple-compiler/40968086ec777afa8c211a456c478ce7b4ed4956/docs/images/3-3-10.png


--------------------------------------------------------------------------------
/docs/images/3-3-11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaceju/simple-compiler/40968086ec777afa8c211a456c478ce7b4ed4956/docs/images/3-3-11.png


--------------------------------------------------------------------------------
/docs/images/3-3-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaceju/simple-compiler/40968086ec777afa8c211a456c478ce7b4ed4956/docs/images/3-3-2.png


--------------------------------------------------------------------------------
/docs/images/3-3-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaceju/simple-compiler/40968086ec777afa8c211a456c478ce7b4ed4956/docs/images/3-3-3.png


--------------------------------------------------------------------------------
/docs/images/3-3-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaceju/simple-compiler/40968086ec777afa8c211a456c478ce7b4ed4956/docs/images/3-3-4.png


--------------------------------------------------------------------------------
/docs/images/3-3-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaceju/simple-compiler/40968086ec777afa8c211a456c478ce7b4ed4956/docs/images/3-3-5.png


--------------------------------------------------------------------------------
/docs/images/3-3-6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaceju/simple-compiler/40968086ec777afa8c211a456c478ce7b4ed4956/docs/images/3-3-6.png


--------------------------------------------------------------------------------
/docs/images/3-3-7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaceju/simple-compiler/40968086ec777afa8c211a456c478ce7b4ed4956/docs/images/3-3-7.png


--------------------------------------------------------------------------------
/docs/images/3-3-8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaceju/simple-compiler/40968086ec777afa8c211a456c478ce7b4ed4956/docs/images/3-3-8.png


--------------------------------------------------------------------------------
/docs/images/3-3-9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaceju/simple-compiler/40968086ec777afa8c211a456c478ce7b4ed4956/docs/images/3-3-9.png


--------------------------------------------------------------------------------
/docs/images/5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaceju/simple-compiler/40968086ec777afa8c211a456c478ce7b4ed4956/docs/images/5-1.png


--------------------------------------------------------------------------------
/examples/2-1-1.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 |   <head>
 4 |     <title>Scanner - section 1</title>
 5 | 
 6 |     <style type="text/css">
 7 |       #wescriptSrc {
 8 |         margin: 10px 0;
 9 |       }
10 | 
11 |       #wescriptSrc .content {
12 |         border: 1px solid orange;
13 |         padding: 10px;
14 |       }
15 |     </style>
16 | 
17 |     <!-- of course we need jQuery -->
18 |     <script
19 |       src="https://code.jquery.com/jquery-3.4.1.slim.min.js"
20 |       integrity="sha256-pasqAKBDmFT4eHoN2ndd6lN370kFiGUFyTiUHWhU7k8="
21 |       crossorigin="anonymous"
22 |     ></script>
23 | 
24 |     <!-- the wescript to be compiled will be stored here -->
25 |     <script id="wescript" type="text/wescript">
26 |       *&! hello
27 |     </script>
28 | 
29 |     <!-- our compiler -->
30 |     <script type="text/javascript">
31 |       //Reader class
32 | 
33 |       //str is the data to be read
34 |       function Reader(str) {
35 |         this.data = str;
36 |         this.currPos = 0;
37 |         this.dataLength = str.length;
38 |       }
39 | 
40 |       Reader.prototype.nextChar = function() {
41 |         if (this.currPos >= this.dataLength) {
42 |           return -1; //end of stream
43 |         }
44 |         return this.data[this.currPos++];
45 |       };
46 | 
47 |       //n is the number of characters to be retracted
48 |       Reader.prototype.retract = function(n) {
49 |         if (n == undefined) {
50 |           n = 1;
51 |         }
52 |         this.currPos -= n;
53 |         if (this.currPos < 0) {
54 |           this.currPos = 0;
55 |         }
56 |       };
57 |     </script>
58 | 
59 |     <!-- our tester -->
60 |     <script type="text/javascript">
61 |       function log(str) {
62 |         $("#log").append(str + "<br />");
63 |       }
64 | 
65 |       $(function() {
66 |         $("#wescriptSrc .content").text($("#wescript").text());
67 |       });
68 | 
69 |       $(function() {
70 |         //we stored our wescript in <script id="wescript">
71 |         var dataToBeCompiled = $("#wescript").text();
72 |         var reader = new Reader(dataToBeCompiled);
73 |         var retracted = false;
74 |         while (true) {
75 |           var nextChar = reader.nextChar();
76 |           if (nextChar == -1) {
77 |             break;
78 |           }
79 |           //if it meets !, it will retract once
80 |           if (nextChar == "!" && !retracted) {
81 |             reader.retract();
82 |             retracted = true;
83 |           }
84 |           log("char: " + nextChar);
85 |         }
86 |       });
87 |     </script>
88 |   </head>
89 | 
90 |   <body>
91 |     <div id="wescriptSrc">
92 |       <pre class="content"></pre>
93 |     </div>
94 | 
95 |     <div id="log"></div>
96 |   </body>
97 | </html>
98 | 


--------------------------------------------------------------------------------
/examples/2-1-2.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 |   <head>
  4 |     <title>Scanner - section 2</title>
  5 | 
  6 |     <style type="text/css">
  7 |       #wescriptSrc {
  8 |         margin: 10px 0;
  9 |       }
 10 | 
 11 |       #wescriptSrc .content {
 12 |         border: 1px solid orange;
 13 |         padding: 10px;
 14 |       }
 15 |     </style>
 16 | 
 17 |     <!-- of course we need jQuery -->
 18 |     <script
 19 |       src="https://code.jquery.com/jquery-3.4.1.slim.min.js"
 20 |       integrity="sha256-pasqAKBDmFT4eHoN2ndd6lN370kFiGUFyTiUHWhU7k8="
 21 |       crossorigin="anonymous"
 22 |     ></script>
 23 | 
 24 |     <!-- the wescript to be compiled will be stored here -->
 25 |     <script id="wescript" type="text/wescript">
 26 |       ;: !@# / %
 27 |     </script>
 28 | 
 29 |     <!-- our compiler -->
 30 |     <script type="text/javascript">
 31 |       //Reader class
 32 | 
 33 |       //str is the data to be read
 34 |       function Reader(str) {
 35 |         this.data = str;
 36 |         this.currPos = 0;
 37 |         this.dataLength = str.length;
 38 |       }
 39 | 
 40 |       Reader.prototype.nextChar = function() {
 41 |         if (this.currPos >= this.dataLength) {
 42 |           return -1; //end of stream
 43 |         }
 44 |         return this.data[this.currPos++];
 45 |       };
 46 | 
 47 |       //n is the number of characters to be retracted
 48 |       Reader.prototype.retract = function(n) {
 49 |         if (n == undefined) {
 50 |           n = 1;
 51 |         }
 52 |         this.currPos -= n;
 53 |         if (this.currPos < 0) {
 54 |           this.currPos = 0;
 55 |         }
 56 |       };
 57 |     </script>
 58 |     <script type="text/javascript">
 59 |       //Token class
 60 | 
 61 |       //type: Token's type
 62 |       //text: the actual text that makes this token, may be null if it is not important
 63 |       function Token(type, text) {
 64 |         this.type = type;
 65 |         this.text = text;
 66 |       }
 67 | 
 68 |       Token.tokens = {};
 69 |       Token.tokens.EOS_TOKEN = 1; //end of stream
 70 |       // using + 1 allows adding a new token easily later
 71 |       Token.tokens.COLON_TOKEN = Token.tokens.EOS_TOKEN + 1;
 72 |       Token.tokens.SEMICOLON_TOKEN = Token.tokens.COLON_TOKEN + 1;
 73 |       Token.tokens.LEFTPAREN_TOKEN = Token.tokens.SEMICOLON_TOKEN + 1;
 74 |       Token.tokens.RIGHTPAREN_TOKEN = Token.tokens.LEFTPAREN_TOKEN + 1;
 75 |       Token.tokens.LEFTBRACE_TOKEN = Token.tokens.RIGHTPAREN_TOKEN + 1;
 76 |       Token.tokens.RIGHTBRACE_TOKEN = Token.tokens.LEFTBRACE_TOKEN + 1;
 77 |       Token.tokens.MOD_TOKEN = Token.tokens.RIGHTBRACE_TOKEN + 1;
 78 | 
 79 |       Token.backwardMap = {}; //for inverse look-up
 80 |       for (var x in Token.tokens) {
 81 |         Token.backwardMap[Token.tokens[x]] = x;
 82 |       }
 83 |     </script>
 84 |     <script type="text/javascript">
 85 |       //Scanner class
 86 | 
 87 |       //reader: the reader used to read in characters
 88 |       function Scanner(reader) {
 89 |         this.reader = reader;
 90 |         this.currentToken = new Token(); //storing the current analysed token
 91 |         this.currLine = 0; //the line number of the current line being read
 92 |         this.state = Scanner.START_STATE;
 93 |       }
 94 | 
 95 |       Scanner.START_STATE = 1; //every FSM should have a start state
 96 | 
 97 |       Scanner.prototype.makeToken = function(type, text) {
 98 |         this.currentToken.type = type;
 99 |         this.currentToken.text = text;
100 |         return type;
101 |       };
102 | 
103 |       Scanner.prototype.nextToken = function() {
104 |         while (true) {
105 |           switch (this.state) {
106 |             case Scanner.START_STATE:
107 |               var c = this.reader.nextChar();
108 |               switch (c) {
109 |                 case ":":
110 |                   return this.makeToken(Token.tokens.COLON_TOKEN);
111 |                   break;
112 |                 case ";":
113 |                   return this.makeToken(Token.tokens.SEMICOLON_TOKEN);
114 |                   break;
115 |                 case "(":
116 |                   return this.makeToken(Token.tokens.LEFTPAREN_TOKEN);
117 |                   break;
118 |                 case ")":
119 |                   return this.makeToken(Token.tokens.RIGHTPAREN_TOKEN);
120 |                   break;
121 |                 case "{":
122 |                   return this.makeToken(Token.tokens.LEFTBRACE_TOKEN);
123 |                   break;
124 |                 case "}":
125 |                   return this.makeToken(Token.tokens.RIGHTBRACE_TOKEN);
126 |                   break;
127 |                 case "%":
128 |                   return this.makeToken(Token.tokens.MOD_TOKEN);
129 |                   break;
130 |                 case -1:
131 |                   return this.makeToken(Token.tokens.EOS_TOKEN);
132 |                   break;
133 |                 case "\r":
134 |                 case "\n":
135 |                   this.currLine++;
136 |                 default:
137 |                 //ignore them
138 |               }
139 |               break;
140 |           }
141 |         }
142 |       };
143 |     </script>
144 | 
145 |     <!-- our tester -->
146 |     <script type="text/javascript">
147 |       function log(str) {
148 |         $("#log").append(str + "<br />");
149 |       }
150 | 
151 |       $(function() {
152 |         $("#wescriptSrc .content").text($("#wescript").text());
153 |       });
154 | 
155 |       $(function() {
156 |         //we stored our wescript in <script id="wescript">
157 |         var dataToBeCompiled = $("#wescript").text();
158 |         var reader = new Reader(dataToBeCompiled);
159 |         var scanner = new Scanner(reader);
160 |         while (true) {
161 |           var token = scanner.nextToken();
162 |           if (token == Token.tokens.EOS_TOKEN) {
163 |             break;
164 |           }
165 | 
166 |           log("Read token: " + Token.backwardMap[token]);
167 |         }
168 |       });
169 |     </script>
170 |   </head>
171 | 
172 |   <body>
173 |     <div id="wescriptSrc">
174 |       <pre class="content"></pre>
175 |     </div>
176 | 
177 |     <div id="log"></div>
178 |   </body>
179 | </html>
180 | 


--------------------------------------------------------------------------------
/examples/2-2-1.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 |   <head>
  4 |     <title>Scanner - section 3</title>
  5 | 
  6 |     <style type="text/css">
  7 |       #wescriptSrc {
  8 |         margin: 10px 0;
  9 |       }
 10 | 
 11 |       #wescriptSrc .content {
 12 |         border: 1px solid orange;
 13 |         padding: 10px;
 14 |       }
 15 |     </style>
 16 | 
 17 |     <!-- of course we need jQuery -->
 18 |     <script
 19 |       src="https://code.jquery.com/jquery-3.4.1.slim.min.js"
 20 |       integrity="sha256-pasqAKBDmFT4eHoN2ndd6lN370kFiGUFyTiUHWhU7k8="
 21 |       crossorigin="anonymous"
 22 |     ></script>
 23 | 
 24 |     <!-- the wescript to be compiled will be stored here -->
 25 |     <script id="wescript" type="text/wescript">
 26 |       var a:bool = true;
 27 | 
 28 |       if (a){
 29 |       	print a;
 30 |       }else{
 31 |       	print false;
 32 |       }
 33 |     </script>
 34 | 
 35 |     <!-- our compiler -->
 36 |     <script type="text/javascript">
 37 |       //Reader class
 38 | 
 39 |       //str is the data to be read
 40 |       function Reader(str) {
 41 |         this.data = str;
 42 |         this.currPos = 0;
 43 |         this.dataLength = str.length;
 44 |       }
 45 | 
 46 |       Reader.prototype.nextChar = function() {
 47 |         if (this.currPos >= this.dataLength) {
 48 |           return -1; //end of stream
 49 |         }
 50 |         return this.data[this.currPos++];
 51 |       };
 52 | 
 53 |       //n is the number of characters to be retracted
 54 |       Reader.prototype.retract = function(n) {
 55 |         if (n == undefined) {
 56 |           n = 1;
 57 |         }
 58 |         this.currPos -= n;
 59 |         if (this.currPos < 0) {
 60 |           this.currPos = 0;
 61 |         }
 62 |       };
 63 |     </script>
 64 |     <script type="text/javascript">
 65 |       //Token class
 66 | 
 67 |       //type: Token's type
 68 |       //text: the actual text that makes this token, may be null if it is not important
 69 |       function Token(type, text) {
 70 |         this.type = type;
 71 |         this.text = text;
 72 |       }
 73 | 
 74 |       Token.tokens = {};
 75 |       Token.tokens.EOS_TOKEN = 1; //end of stream
 76 |       // using + 1 allows adding a new token easily later
 77 |       Token.tokens.COLON_TOKEN = Token.tokens.EOS_TOKEN + 1;
 78 |       Token.tokens.SEMICOLON_TOKEN = Token.tokens.COLON_TOKEN + 1;
 79 |       Token.tokens.LEFTPAREN_TOKEN = Token.tokens.SEMICOLON_TOKEN + 1;
 80 |       Token.tokens.RIGHTPAREN_TOKEN = Token.tokens.LEFTPAREN_TOKEN + 1;
 81 |       Token.tokens.LEFTBRACE_TOKEN = Token.tokens.RIGHTPAREN_TOKEN + 1;
 82 |       Token.tokens.RIGHTBRACE_TOKEN = Token.tokens.LEFTBRACE_TOKEN + 1;
 83 |       Token.tokens.MOD_TOKEN = Token.tokens.RIGHTBRACE_TOKEN + 1;
 84 | 
 85 |       Token.tokens.VAR_TOKEN = Token.tokens.MOD_TOKEN + 1;
 86 |       Token.tokens.TYPE_TOKEN = Token.tokens.VAR_TOKEN + 1;
 87 |       Token.tokens.BOOLLITERAL_TOKEN = Token.tokens.TYPE_TOKEN + 1;
 88 |       Token.tokens.IF_TOKEN = Token.tokens.BOOLLITERAL_TOKEN + 1;
 89 |       Token.tokens.ELSE_TOKEN = Token.tokens.IF_TOKEN + 1;
 90 |       Token.tokens.WHILE_TOKEN = Token.tokens.ELSE_TOKEN + 1;
 91 |       Token.tokens.PRINT_TOKEN = Token.tokens.WHILE_TOKEN + 1;
 92 |       Token.tokens.IDENTIFIER_TOKEN = Token.tokens.PRINT_TOKEN + 1;
 93 | 
 94 |       Token.backwardMap = {}; //for inverse look-up
 95 |       for (var x in Token.tokens) {
 96 |         Token.backwardMap[Token.tokens[x]] = x;
 97 |       }
 98 |     </script>
 99 |     <script type="text/javascript">
100 |       //Scanner class
101 | 
102 |       //reader: the reader used to read in characters
103 |       function Scanner(reader) {
104 |         this.reader = reader;
105 |         this.currentToken = new Token(); //storing the current analysed token
106 |         this.currLine = 0; //the line number of the current line being read
107 |         this.state = Scanner.START_STATE;
108 |       }
109 | 
110 |       Scanner.START_STATE = 1; //every FSM should have a start state
111 |       Scanner.IDENTIFIER_STATE = Scanner.START_STATE + 1;
112 | 
113 |       Scanner.prototype.makeToken = function(type, text) {
114 |         this.currentToken.type = type;
115 |         this.currentToken.text = text;
116 |         return type;
117 |       };
118 | 
119 |       Scanner.prototype.nextToken = function() {
120 |         var bufferStr = "";
121 |         while (true) {
122 |           switch (this.state) {
123 |             case Scanner.START_STATE:
124 |               var c = this.reader.nextChar();
125 | 
126 |               if ((c >= "a" && c <= "z") || (c >= "A" && c <= "Z")) {
127 |                 this.state = Scanner.IDENTIFIER_STATE;
128 |                 //we need to remember what the token's text is
129 |                 bufferStr = c;
130 |               } else {
131 |                 switch (c) {
132 |                   case ":":
133 |                     return this.makeToken(Token.tokens.COLON_TOKEN);
134 |                     break;
135 |                   case ";":
136 |                     return this.makeToken(Token.tokens.SEMICOLON_TOKEN);
137 |                     break;
138 |                   case "(":
139 |                     return this.makeToken(Token.tokens.LEFTPAREN_TOKEN);
140 |                     break;
141 |                   case ")":
142 |                     return this.makeToken(Token.tokens.RIGHTPAREN_TOKEN);
143 |                     break;
144 |                   case "{":
145 |                     return this.makeToken(Token.tokens.LEFTBRACE_TOKEN);
146 |                     break;
147 |                   case "}":
148 |                     return this.makeToken(Token.tokens.RIGHTBRACE_TOKEN);
149 |                     break;
150 |                   case "%":
151 |                     return this.makeToken(Token.tokens.MOD_TOKEN);
152 |                     break;
153 |                   case -1:
154 |                     return this.makeToken(Token.tokens.EOS_TOKEN);
155 |                     break;
156 |                   case "\r":
157 |                   case "\n":
158 |                     this.currLine++;
159 |                   default:
160 |                   //ignore them
161 |                 }
162 |               }
163 |               break;
164 |             case Scanner.IDENTIFIER_STATE:
165 |               var c = this.reader.nextChar();
166 | 
167 |               if ((c >= "a" && c <= "z") || (c >= "A" && c <= "Z")) {
168 |                 bufferStr += c;
169 |               } else {
170 |                 //stop reading it since it is not a letter anymore
171 |                 //retract the last character we read because it does not belong to this identfier
172 |                 this.reader.retract();
173 | 
174 |                 //change back the state to read the next token
175 |                 this.state = Scanner.START_STATE;
176 | 
177 |                 switch (bufferStr) {
178 |                   case "var":
179 |                     return this.makeToken(Token.tokens.VAR_TOKEN);
180 |                   case "int":
181 |                   case "bool":
182 |                     //need to pass bufferStr as well to distinguish which type it is
183 |                     return this.makeToken(Token.tokens.TYPE_TOKEN, bufferStr);
184 |                   case "true":
185 |                   case "false":
186 |                   case "TRUE":
187 |                   case "FALSE":
188 |                     return this.makeToken(
189 |                       Token.tokens.BOOLLITERAL_TOKEN,
190 |                       bufferStr
191 |                     );
192 |                   case "if":
193 |                     return this.makeToken(Token.tokens.IF_TOKEN);
194 |                   case "else":
195 |                     return this.makeToken(Token.tokens.ELSE_TOKEN);
196 |                   case "while":
197 |                     return this.makeToken(Token.tokens.WHILE_TOKEN);
198 |                   case "print":
199 |                     return this.makeToken(Token.tokens.PRINT_TOKEN);
200 |                   default:
201 |                     return this.makeToken(
202 |                       Token.tokens.IDENTIFIER_TOKEN,
203 |                       bufferStr
204 |                     );
205 |                 }
206 |               }
207 |               break;
208 |           }
209 |         }
210 |       };
211 |     </script>
212 | 
213 |     <!-- our tester -->
214 |     <script type="text/javascript">
215 |       function log(str) {
216 |         $("#log").append(str + "<br />");
217 |       }
218 | 
219 |       $(function() {
220 |         $("#wescriptSrc .content").text($("#wescript").text());
221 |       });
222 | 
223 |       $(function() {
224 |         //we stored our wescript in <script id="wescript">
225 |         var dataToBeCompiled = $("#wescript").text();
226 |         var reader = new Reader(dataToBeCompiled);
227 |         var scanner = new Scanner(reader);
228 |         while (true) {
229 |           var token = scanner.nextToken();
230 |           if (token == Token.tokens.EOS_TOKEN) {
231 |             break;
232 |           }
233 | 
234 |           log(
235 |             "Read token: " +
236 |               Token.backwardMap[token] +
237 |               (scanner.currentToken.text == null
238 |                 ? ""
239 |                 : "(" + scanner.currentToken.text + ")")
240 |           );
241 |         }
242 |       });
243 |     </script>
244 |   </head>
245 | 
246 |   <body>
247 |     <div id="wescriptSrc">
248 |       <pre class="content"></pre>
249 |     </div>
250 | 
251 |     <div id="log"></div>
252 |   </body>
253 | </html>
254 | 


--------------------------------------------------------------------------------
/examples/2-2-2.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 |   <head>
  4 |     <title>Scanner - section 4</title>
  5 | 
  6 |     <style type="text/css">
  7 |       #wescriptSrc {
  8 |         margin: 10px 0;
  9 |       }
 10 | 
 11 |       #wescriptSrc .content {
 12 |         border: 1px solid orange;
 13 |         padding: 10px;
 14 |       }
 15 | 
 16 |       #error {
 17 |         color: red;
 18 |         line-height: 1.2;
 19 |         margin: 5px 0;
 20 |       }
 21 |     </style>
 22 | 
 23 |     <!-- of course we need jQuery -->
 24 |     <script
 25 |       src="https://code.jquery.com/jquery-3.4.1.slim.min.js"
 26 |       integrity="sha256-pasqAKBDmFT4eHoN2ndd6lN370kFiGUFyTiUHWhU7k8="
 27 |       crossorigin="anonymous"
 28 |     ></script>
 29 | 
 30 |     <!-- the wescript to be compiled will be stored here -->
 31 |     <script id="wescript" type="text/wescript">
 32 |       //line comment
 33 |       /* block comment
 34 |       */
 35 |       + - * / ++ += -- -= % = == != ! && ||
 36 |       & |
 37 |     </script>
 38 | 
 39 |     <!-- our compiler -->
 40 |     <script type="text/javascript">
 41 |       function Errors() {}
 42 | 
 43 |       Errors.errors = [];
 44 | 
 45 |       Errors.push = function(obj) {
 46 |         Errors.errors.push(obj);
 47 |       };
 48 | 
 49 |       Errors.each = function(func) {
 50 |         for (var i = 0, l = Errors.errors.length; i < l; i++) {
 51 |           func(Errors.errors[i], i);
 52 |         }
 53 |       };
 54 | 
 55 |       Errors.SYNTAX_ERROR = 0;
 56 | 
 57 |       Errors.type = ["Syntax error"];
 58 |     </script>
 59 |     <script type="text/javascript">
 60 |       //Reader class
 61 | 
 62 |       //str is the data to be read
 63 |       function Reader(str) {
 64 |         this.data = str;
 65 |         this.currPos = 0;
 66 |         this.dataLength = str.length;
 67 |       }
 68 | 
 69 |       Reader.prototype.nextChar = function() {
 70 |         if (this.currPos >= this.dataLength) {
 71 |           return -1; //end of stream
 72 |         }
 73 |         return this.data[this.currPos++];
 74 |       };
 75 | 
 76 |       //n is the number of characters to be retracted
 77 |       Reader.prototype.retract = function(n) {
 78 |         if (n == undefined) {
 79 |           n = 1;
 80 |         }
 81 |         this.currPos -= n;
 82 |         if (this.currPos < 0) {
 83 |           this.currPos = 0;
 84 |         }
 85 |       };
 86 |     </script>
 87 |     <script type="text/javascript">
 88 |       //Token class
 89 | 
 90 |       //type: Token's type
 91 |       //text: the actual text that makes this token, may be null if it is not important
 92 |       function Token(type, text) {
 93 |         this.type = type;
 94 |         this.text = text;
 95 |       }
 96 | 
 97 |       Token.tokens = {};
 98 |       Token.tokens.EOS_TOKEN = 1; //end of stream
 99 |       // using + 1 allows adding a new token easily later
100 |       Token.tokens.COLON_TOKEN = Token.tokens.EOS_TOKEN + 1;
101 |       Token.tokens.SEMICOLON_TOKEN = Token.tokens.COLON_TOKEN + 1;
102 |       Token.tokens.LEFTPAREN_TOKEN = Token.tokens.SEMICOLON_TOKEN + 1;
103 |       Token.tokens.RIGHTPAREN_TOKEN = Token.tokens.LEFTPAREN_TOKEN + 1;
104 |       Token.tokens.LEFTBRACE_TOKEN = Token.tokens.RIGHTPAREN_TOKEN + 1;
105 |       Token.tokens.RIGHTBRACE_TOKEN = Token.tokens.LEFTBRACE_TOKEN + 1;
106 |       Token.tokens.MOD_TOKEN = Token.tokens.RIGHTBRACE_TOKEN + 1;
107 | 
108 |       Token.tokens.VAR_TOKEN = Token.tokens.MOD_TOKEN + 1;
109 |       Token.tokens.TYPE_TOKEN = Token.tokens.VAR_TOKEN + 1;
110 |       Token.tokens.BOOLLITERAL_TOKEN = Token.tokens.TYPE_TOKEN + 1;
111 |       Token.tokens.INTLITERAL_TOKEN = Token.tokens.BOOLLITERAL_TOKEN + 1;
112 |       Token.tokens.IF_TOKEN = Token.tokens.INTLITERAL_TOKEN + 1;
113 |       Token.tokens.ELSE_TOKEN = Token.tokens.IF_TOKEN + 1;
114 |       Token.tokens.WHILE_TOKEN = Token.tokens.ELSE_TOKEN + 1;
115 |       Token.tokens.PRINT_TOKEN = Token.tokens.WHILE_TOKEN + 1;
116 |       Token.tokens.IDENTIFIER_TOKEN = Token.tokens.PRINT_TOKEN + 1;
117 | 
118 |       Token.tokens.PLUS_TOKEN = Token.tokens.IDENTIFIER_TOKEN + 1;
119 |       Token.tokens.PLUSPLUS_TOKEN = Token.tokens.PLUS_TOKEN + 1;
120 |       Token.tokens.PLUSASSIGN_TOKEN = Token.tokens.PLUSPLUS_TOKEN + 1;
121 |       Token.tokens.MINUS_TOKEN = Token.tokens.PLUSASSIGN_TOKEN + 1;
122 |       Token.tokens.MINUSMINUS_TOKEN = Token.tokens.MINUS_TOKEN + 1;
123 |       Token.tokens.MINUSASSIGN_TOKEN = Token.tokens.MINUSMINUS_TOKEN + 1;
124 |       Token.tokens.MULT_TOKEN = Token.tokens.MINUSASSIGN_TOKEN + 1;
125 |       Token.tokens.DIV_TOKEN = Token.tokens.MULT_TOKEN + 1;
126 |       Token.tokens.ASSIGN_TOKEN = Token.tokens.DIV_TOKEN + 1;
127 |       Token.tokens.EQUAL_TOKEN = Token.tokens.ASSIGN_TOKEN + 1;
128 |       Token.tokens.NOTEQUAL_TOKEN = Token.tokens.EQUAL_TOKEN + 1;
129 |       Token.tokens.GREATER_TOKEN = Token.tokens.NOTEQUAL_TOKEN + 1;
130 |       Token.tokens.GREATEREQUAL_TOKEN = Token.tokens.GREATER_TOKEN + 1;
131 |       Token.tokens.LESS_TOKEN = Token.tokens.GREATEREQUAL_TOKEN + 1;
132 |       Token.tokens.LESSEQUAL_TOKEN = Token.tokens.LESS_TOKEN + 1;
133 |       Token.tokens.AND_TOKEN = Token.tokens.LESSEQUAL_TOKEN + 1;
134 |       Token.tokens.OR_TOKEN = Token.tokens.AND_TOKEN + 1;
135 |       Token.tokens.NOT_TOKEN = Token.tokens.OR_TOKEN + 1;
136 | 
137 |       Token.tokens.LINECOMMENT_TOKEN = Token.tokens.NOT_TOKEN + 1;
138 |       Token.tokens.BLOCKCOMMENT_TOKEN = Token.tokens.LINECOMMENT_TOKEN + 1;
139 | 
140 |       Token.backwardMap = {}; //for inverse look-up
141 |       for (var x in Token.tokens) {
142 |         Token.backwardMap[Token.tokens[x]] = x;
143 |       }
144 |     </script>
145 |     <script type="text/javascript">
146 |       //Scanner class
147 | 
148 |       //reader: the reader used to read in characters
149 |       function Scanner(reader) {
150 |         this.reader = reader;
151 |         this.currentToken = new Token(); //storing the current analysed token
152 |         this.currLine = 0; //the line number of the current line being read
153 |         this.state = Scanner.START_STATE;
154 |       }
155 | 
156 |       Scanner.START_STATE = 1; //every FSM should have a start state
157 |       Scanner.IDENTIFIER_STATE = Scanner.START_STATE + 1;
158 |       Scanner.SLASH_STATE = Scanner.IDENTIFIER_STATE + 1;
159 | 
160 |       Scanner.prototype.makeToken = function(type, text) {
161 |         this.currentToken.type = type;
162 |         this.currentToken.text = text;
163 |         return type;
164 |       };
165 | 
166 |       Scanner.prototype.nextToken = function() {
167 |         var bufferStr = "";
168 |         while (true) {
169 |           switch (this.state) {
170 |             case Scanner.START_STATE:
171 |               var c = this.reader.nextChar();
172 | 
173 |               if ((c >= "a" && c <= "z") || (c >= "A" && c <= "Z")) {
174 |                 this.state = Scanner.IDENTIFIER_STATE;
175 |                 //we need to remember what the token's text is
176 |                 bufferStr = c;
177 |               } else if (c >= "0" && c <= "9") {
178 |                 bufferStr = c;
179 |                 var d;
180 |                 while (true) {
181 |                   d = this.reader.nextChar();
182 |                   if (d >= "0" && d <= "9") {
183 |                     bufferStr += d;
184 |                   } else {
185 |                     this.reader.retract();
186 |                     return this.makeToken(
187 |                       Token.tokens.INTLITERAL_TOKEN,
188 |                       bufferStr
189 |                     );
190 |                   }
191 |                 }
192 |               } else {
193 |                 switch (c) {
194 |                   case ":":
195 |                     return this.makeToken(Token.tokens.COLON_TOKEN);
196 |                     break;
197 |                   case ";":
198 |                     return this.makeToken(Token.tokens.SEMICOLON_TOKEN);
199 |                     break;
200 |                   case "(":
201 |                     return this.makeToken(Token.tokens.LEFTPAREN_TOKEN);
202 |                     break;
203 |                   case ")":
204 |                     return this.makeToken(Token.tokens.RIGHTPAREN_TOKEN);
205 |                     break;
206 |                   case "{":
207 |                     return this.makeToken(Token.tokens.LEFTBRACE_TOKEN);
208 |                     break;
209 |                   case "}":
210 |                     return this.makeToken(Token.tokens.RIGHTBRACE_TOKEN);
211 |                     break;
212 |                   case "%":
213 |                     return this.makeToken(Token.tokens.MOD_TOKEN);
214 |                     break;
215 | 
216 |                   case "!":
217 |                     if (this.reader.nextChar() == "=") {
218 |                       return this.makeToken(Token.tokens.NOTEQUAL_TOKEN);
219 |                     } else {
220 |                       //we have consumed one more char in if-condition
221 |                       this.reader.retract();
222 |                       return this.makeToken(Token.tokens.NOT_TOKEN);
223 |                     }
224 |                     break;
225 |                   case "+":
226 |                     var d = this.reader.nextChar();
227 |                     if (d == "=") {
228 |                       return this.makeToken(Token.tokens.PLUSASSIGN_TOKEN);
229 |                     } else if (d == "+") {
230 |                       return this.makeToken(Token.tokens.PLUSPLUS_TOKEN);
231 |                     } else {
232 |                       this.reader.retract();
233 |                       return this.makeToken(Token.tokens.PLUS_TOKEN);
234 |                     }
235 |                     break;
236 |                   case "-":
237 |                     var d = this.reader.nextChar();
238 |                     if (d == "=") {
239 |                       return this.makeToken(Token.tokens.MINUSASSIGN_TOKEN);
240 |                     } else if (d == "-") {
241 |                       return this.makeToken(Token.tokens.MINUSMINUS_TOKEN);
242 |                     } else {
243 |                       this.reader.retract();
244 |                       return this.makeToken(Token.tokens.MINUS_TOKEN);
245 |                     }
246 |                     break;
247 |                   case "*":
248 |                     return this.makeToken(Token.tokens.MULT_TOKEN);
249 |                     break;
250 |                   case "=":
251 |                     if (this.reader.nextChar() == "=") {
252 |                       return this.makeToken(Token.tokens.EQUAL_TOKEN);
253 |                     } else {
254 |                       this.reader.retract();
255 |                       return this.makeToken(Token.tokens.ASSIGN_TOKEN);
256 |                     }
257 |                     break;
258 |                   case ">":
259 |                     if (this.reader.nextChar() == "=") {
260 |                       return this.makeToken(Token.tokens.GREATEREQUAL_TOKEN);
261 |                     } else {
262 |                       this.reader.retract();
263 |                       return this.makeToken(Token.tokens.GREATER_TOKEN);
264 |                     }
265 |                     break;
266 |                   case "<":
267 |                     if (this.reader.nextChar() == "=") {
268 |                       return this.makeToken(Token.tokens.LESSEQUAL_TOKEN);
269 |                     } else {
270 |                       this.reader.retract();
271 |                       return this.makeToken(Token.tokens.LESS_TOKEN);
272 |                     }
273 |                     break;
274 | 
275 |                   case "/":
276 |                     this.state = Scanner.SLASH_STATE;
277 |                     break;
278 | 
279 |                   case "&":
280 |                     if (this.reader.nextChar() == "&") {
281 |                       return this.makeToken(Token.tokens.AND_TOKEN);
282 |                     } else {
283 |                       this.reader.retract();
284 |                       Errors.push({
285 |                         type: Errors.SYNTAX_ERROR,
286 |                         msg: "You have only one &",
287 |                         line: this.currLine
288 |                       });
289 |                     }
290 |                     break;
291 |                   case "|":
292 |                     if (this.reader.nextChar() == "|") {
293 |                       return this.makeToken(Token.tokens.OR_TOKEN);
294 |                     } else {
295 |                       this.reader.retract();
296 |                       Errors.push({
297 |                         type: Errors.SYNTAX_ERROR,
298 |                         msg: "You have only one |",
299 |                         line: this.currLine
300 |                       });
301 |                     }
302 |                     break;
303 | 
304 |                   case -1:
305 |                     return this.makeToken(Token.tokens.EOS_TOKEN);
306 |                     break;
307 |                   case "\r":
308 |                   case "\n":
309 |                     this.currLine++;
310 |                   default:
311 |                   //ignore them
312 |                 }
313 |               }
314 |               break;
315 |             case Scanner.IDENTIFIER_STATE:
316 |               var c = this.reader.nextChar();
317 | 
318 |               if ((c >= "a" && c <= "z") || (c >= "A" && c <= "Z")) {
319 |                 bufferStr += c;
320 |               } else {
321 |                 //stop reading it since it is not a letter anymore
322 |                 //retract the last character we read because it does not belong to this identfier
323 |                 this.reader.retract();
324 | 
325 |                 //change back the state to read the next token
326 |                 this.state = Scanner.START_STATE;
327 | 
328 |                 switch (bufferStr) {
329 |                   case "var":
330 |                     return this.makeToken(Token.tokens.VAR_TOKEN);
331 |                   case "int":
332 |                   case "bool":
333 |                     //need to pass bufferStr as well to distinguish which type it is
334 |                     return this.makeToken(Token.tokens.TYPE_TOKEN, bufferStr);
335 |                   case "true":
336 |                   case "false":
337 |                   case "TRUE":
338 |                   case "FALSE":
339 |                     return this.makeToken(
340 |                       Token.tokens.BOOLLITERAL_TOKEN,
341 |                       bufferStr
342 |                     );
343 |                   case "if":
344 |                     return this.makeToken(Token.tokens.IF_TOKEN);
345 |                   case "else":
346 |                     return this.makeToken(Token.tokens.ELSE_TOKEN);
347 |                   case "while":
348 |                     return this.makeToken(Token.tokens.WHILE_TOKEN);
349 |                   case "print":
350 |                     return this.makeToken(Token.tokens.PRINT_TOKEN);
351 |                   default:
352 |                     return this.makeToken(
353 |                       Token.tokens.IDENTIFIER_TOKEN,
354 |                       bufferStr
355 |                     );
356 |                 }
357 |               }
358 |               break;
359 |             case Scanner.SLASH_STATE:
360 |               var d = this.reader.nextChar();
361 |               if (d == "/") {
362 |                 //line comment
363 |                 bufferStr = "";
364 |                 //reading 1 more char here can prevent the case that a // is followed by a line break char immediately
365 |                 d = this.reader.nextChar();
366 |                 if (d != "\r" && d != "\n") {
367 |                   while (d != "\r" && d != "\n") {
368 |                     bufferStr += d;
369 |                     d = this.reader.nextChar();
370 |                   }
371 | 
372 |                   //to retract the line break char
373 |                   this.reader.retract();
374 |                 }
375 | 
376 |                 this.state = Scanner.START_STATE;
377 | 
378 |                 return this.makeToken(
379 |                   Token.tokens.LINECOMMENT_TOKEN,
380 |                   bufferStr
381 |                 );
382 |               } else if (d == "*") {
383 |                 //block comment
384 |                 bufferStr = "";
385 |                 var end = false;
386 |                 while (!end) {
387 |                   d = this.reader.nextChar();
388 |                   if (d != -1) {
389 |                     if (d == "\r" || d == "\n") {
390 |                       this.currLine++;
391 |                     }
392 |                     if (d == "*") {
393 |                       var e = this.reader.nextChar();
394 |                       if (e == "/") {
395 |                         //meet */
396 |                         end = true;
397 |                       } else {
398 |                         bufferStr += "*" + e;
399 |                       }
400 |                     } else {
401 |                       bufferStr += d;
402 |                     }
403 |                   } else {
404 |                     end = true;
405 |                   }
406 |                 }
407 | 
408 |                 this.state = Scanner.START_STATE;
409 | 
410 |                 return this.makeToken(
411 |                   Token.tokens.BLOCKCOMMENT_TOKEN,
412 |                   bufferStr
413 |                 );
414 |               } else {
415 |                 this.state = Scanner.START_STATE;
416 |                 this.reader.retract();
417 |                 return this.makeToken(Token.tokens.DIV_TOKEN);
418 |               }
419 |               break;
420 |           }
421 |         }
422 |       };
423 |     </script>
424 | 
425 |     <!-- our tester -->
426 |     <script type="text/javascript">
427 |       function log(str) {
428 |         $("#log").append(str + "<br />");
429 |       }
430 | 
431 |       function errorLog(str) {
432 |         $("#error").append(str + "<br />");
433 |       }
434 | 
435 |       $(function() {
436 |         $("#wescriptSrc .content").text($("#wescript").text());
437 |       });
438 | 
439 |       $(function() {
440 |         //we stored our wescript in <script id="wescript">
441 |         var dataToBeCompiled = $("#wescript").text();
442 |         var reader = new Reader(dataToBeCompiled);
443 |         var scanner = new Scanner(reader);
444 |         while (true) {
445 |           var token = scanner.nextToken();
446 |           if (token == Token.tokens.EOS_TOKEN) {
447 |             break;
448 |           }
449 | 
450 |           log(
451 |             "Read token: " +
452 |               Token.backwardMap[token] +
453 |               (scanner.currentToken.text == null
454 |                 ? ""
455 |                 : "(" + scanner.currentToken.text + ")")
456 |           );
457 |         }
458 | 
459 |         Errors.each(function(error, i) {
460 |           errorLog(
461 |             "Line " +
462 |               error.line +
463 |               ": (" +
464 |               Errors.type[error.type] +
465 |               ") " +
466 |               error.msg
467 |           );
468 |         });
469 |       });
470 |     </script>
471 |   </head>
472 | 
473 |   <body>
474 |     <div id="wescriptSrc">
475 |       <pre class="content"></pre>
476 |     </div>
477 | 
478 |     <div id="error"></div>
479 | 
480 |     <div id="log"></div>
481 |   </body>
482 | </html>
483 | 


--------------------------------------------------------------------------------
/examples/3-1-1.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 |   <head>
  4 |     <title>Parser - section 1</title>
  5 | 
  6 |     <style type="text/css">
  7 |       #wescriptSrc {
  8 |         margin: 10px 0;
  9 |       }
 10 | 
 11 |       #wescriptSrc .content {
 12 |         border: 1px solid orange;
 13 |         padding: 10px;
 14 |       }
 15 | 
 16 |       #error {
 17 |         color: red;
 18 |         line-height: 1.2;
 19 |         margin: 5px 0;
 20 |       }
 21 |     </style>
 22 | 
 23 |     <!-- of course we need jQuery -->
 24 |     <script
 25 |       src="https://code.jquery.com/jquery-3.4.1.slim.min.js"
 26 |       integrity="sha256-pasqAKBDmFT4eHoN2ndd6lN370kFiGUFyTiUHWhU7k8="
 27 |       crossorigin="anonymous"
 28 |     ></script>
 29 | 
 30 |     <!-- the wescript to be compiled will be stored here -->
 31 |     <script id="wescript" type="text/wescript">
 32 |       //program start
 33 |       test + - *
 34 |       /* some comment */
 35 |       / ++ -- ! 1
 36 |     </script>
 37 | 
 38 |     <!-- our compiler -->
 39 |     <script type="text/javascript">
 40 |       function Errors() {}
 41 | 
 42 |       Errors.errors = [];
 43 | 
 44 |       Errors.push = function(obj) {
 45 |         Errors.errors.push(obj);
 46 |       };
 47 | 
 48 |       Errors.each = function(func) {
 49 |         for (var i = 0, l = Errors.errors.length; i < l; i++) {
 50 |           func(Errors.errors[i], i);
 51 |         }
 52 |       };
 53 | 
 54 |       Errors.SYNTAX_ERROR = 0;
 55 | 
 56 |       Errors.type = ["Syntax error"];
 57 |     </script>
 58 |     <script type="text/javascript">
 59 |       //Reader class
 60 | 
 61 |       //str is the data to be read
 62 |       function Reader(str) {
 63 |         this.data = str;
 64 |         this.currPos = 0;
 65 |         this.dataLength = str.length;
 66 |       }
 67 | 
 68 |       Reader.prototype.nextChar = function() {
 69 |         if (this.currPos >= this.dataLength) {
 70 |           return -1; //end of stream
 71 |         }
 72 |         return this.data[this.currPos++];
 73 |       };
 74 | 
 75 |       //n is the number of characters to be retracted
 76 |       Reader.prototype.retract = function(n) {
 77 |         if (n == undefined) {
 78 |           n = 1;
 79 |         }
 80 |         this.currPos -= n;
 81 |         if (this.currPos < 0) {
 82 |           this.currPos = 0;
 83 |         }
 84 |       };
 85 |     </script>
 86 |     <script type="text/javascript">
 87 |       //Token class
 88 | 
 89 |       //type: Token's type
 90 |       //text: the actual text that makes this token, may be null if it is not important
 91 |       function Token(type, text) {
 92 |         this.type = type;
 93 |         this.text = text;
 94 |       }
 95 | 
 96 |       Token.tokens = {};
 97 |       Token.tokens.EOS_TOKEN = 1; //end of stream
 98 |       // using + 1 allows adding a new token easily later
 99 |       Token.tokens.COLON_TOKEN = Token.tokens.EOS_TOKEN + 1;
100 |       Token.tokens.SEMICOLON_TOKEN = Token.tokens.COLON_TOKEN + 1;
101 |       Token.tokens.LEFTPAREN_TOKEN = Token.tokens.SEMICOLON_TOKEN + 1;
102 |       Token.tokens.RIGHTPAREN_TOKEN = Token.tokens.LEFTPAREN_TOKEN + 1;
103 |       Token.tokens.LEFTBRACE_TOKEN = Token.tokens.RIGHTPAREN_TOKEN + 1;
104 |       Token.tokens.RIGHTBRACE_TOKEN = Token.tokens.LEFTBRACE_TOKEN + 1;
105 |       Token.tokens.MOD_TOKEN = Token.tokens.RIGHTBRACE_TOKEN + 1;
106 | 
107 |       Token.tokens.VAR_TOKEN = Token.tokens.MOD_TOKEN + 1;
108 |       Token.tokens.TYPE_TOKEN = Token.tokens.VAR_TOKEN + 1;
109 |       Token.tokens.BOOLLITERAL_TOKEN = Token.tokens.TYPE_TOKEN + 1;
110 |       Token.tokens.INTLITERAL_TOKEN = Token.tokens.BOOLLITERAL_TOKEN + 1;
111 |       Token.tokens.IF_TOKEN = Token.tokens.INTLITERAL_TOKEN + 1;
112 |       Token.tokens.ELSE_TOKEN = Token.tokens.IF_TOKEN + 1;
113 |       Token.tokens.WHILE_TOKEN = Token.tokens.ELSE_TOKEN + 1;
114 |       Token.tokens.PRINT_TOKEN = Token.tokens.WHILE_TOKEN + 1;
115 |       Token.tokens.IDENTIFIER_TOKEN = Token.tokens.PRINT_TOKEN + 1;
116 | 
117 |       Token.tokens.PLUS_TOKEN = Token.tokens.IDENTIFIER_TOKEN + 1;
118 |       Token.tokens.PLUSPLUS_TOKEN = Token.tokens.PLUS_TOKEN + 1;
119 |       Token.tokens.PLUSASSIGN_TOKEN = Token.tokens.PLUSPLUS_TOKEN + 1;
120 |       Token.tokens.MINUS_TOKEN = Token.tokens.PLUSASSIGN_TOKEN + 1;
121 |       Token.tokens.MINUSMINUS_TOKEN = Token.tokens.MINUS_TOKEN + 1;
122 |       Token.tokens.MINUSASSIGN_TOKEN = Token.tokens.MINUSMINUS_TOKEN + 1;
123 |       Token.tokens.MULT_TOKEN = Token.tokens.MINUSASSIGN_TOKEN + 1;
124 |       Token.tokens.DIV_TOKEN = Token.tokens.MULT_TOKEN + 1;
125 |       Token.tokens.ASSIGN_TOKEN = Token.tokens.DIV_TOKEN + 1;
126 |       Token.tokens.EQUAL_TOKEN = Token.tokens.ASSIGN_TOKEN + 1;
127 |       Token.tokens.NOTEQUAL_TOKEN = Token.tokens.EQUAL_TOKEN + 1;
128 |       Token.tokens.GREATER_TOKEN = Token.tokens.NOTEQUAL_TOKEN + 1;
129 |       Token.tokens.GREATEREQUAL_TOKEN = Token.tokens.GREATER_TOKEN + 1;
130 |       Token.tokens.LESS_TOKEN = Token.tokens.GREATEREQUAL_TOKEN + 1;
131 |       Token.tokens.LESSEQUAL_TOKEN = Token.tokens.LESS_TOKEN + 1;
132 |       Token.tokens.AND_TOKEN = Token.tokens.LESSEQUAL_TOKEN + 1;
133 |       Token.tokens.OR_TOKEN = Token.tokens.AND_TOKEN + 1;
134 |       Token.tokens.NOT_TOKEN = Token.tokens.OR_TOKEN + 1;
135 | 
136 |       Token.tokens.LINECOMMENT_TOKEN = Token.tokens.NOT_TOKEN + 1;
137 |       Token.tokens.BLOCKCOMMENT_TOKEN = Token.tokens.LINECOMMENT_TOKEN + 1;
138 | 
139 |       Token.backwardMap = {}; //for inverse look-up
140 |       for (var x in Token.tokens) {
141 |         Token.backwardMap[Token.tokens[x]] = x;
142 |       }
143 |     </script>
144 |     <script type="text/javascript">
145 |       //Scanner class
146 | 
147 |       //reader: the reader used to read in characters
148 |       function Scanner(reader) {
149 |         this.reader = reader;
150 |         this.currentToken = new Token(); //storing the current analysed token
151 |         this.currLine = 0; //the line number of the current line being read
152 |         this.state = Scanner.START_STATE;
153 |       }
154 | 
155 |       Scanner.START_STATE = 1; //every FSM should have a start state
156 |       Scanner.IDENTIFIER_STATE = Scanner.START_STATE + 1;
157 |       Scanner.SLASH_STATE = Scanner.IDENTIFIER_STATE + 1;
158 | 
159 |       Scanner.prototype.makeToken = function(type, text) {
160 |         this.currentToken.type = type;
161 |         this.currentToken.text = text;
162 |         return type;
163 |       };
164 | 
165 |       Scanner.prototype.nextToken = function() {
166 |         var bufferStr = "";
167 |         while (true) {
168 |           switch (this.state) {
169 |             case Scanner.START_STATE:
170 |               var c = this.reader.nextChar();
171 | 
172 |               if ((c >= "a" && c <= "z") || (c >= "A" && c <= "Z")) {
173 |                 this.state = Scanner.IDENTIFIER_STATE;
174 |                 //we need to remember what the token's text is
175 |                 bufferStr = c;
176 |               } else if (c >= "0" && c <= "9") {
177 |                 bufferStr = c;
178 |                 var d;
179 |                 while (true) {
180 |                   d = this.reader.nextChar();
181 |                   if (d >= "0" && d <= "9") {
182 |                     bufferStr += d;
183 |                   } else {
184 |                     this.reader.retract();
185 |                     return this.makeToken(
186 |                       Token.tokens.INTLITERAL_TOKEN,
187 |                       bufferStr
188 |                     );
189 |                   }
190 |                 }
191 |               } else {
192 |                 switch (c) {
193 |                   case ":":
194 |                     return this.makeToken(Token.tokens.COLON_TOKEN);
195 |                     break;
196 |                   case ";":
197 |                     return this.makeToken(Token.tokens.SEMICOLON_TOKEN);
198 |                     break;
199 |                   case "(":
200 |                     return this.makeToken(Token.tokens.LEFTPAREN_TOKEN);
201 |                     break;
202 |                   case ")":
203 |                     return this.makeToken(Token.tokens.RIGHTPAREN_TOKEN);
204 |                     break;
205 |                   case "{":
206 |                     return this.makeToken(Token.tokens.LEFTBRACE_TOKEN);
207 |                     break;
208 |                   case "}":
209 |                     return this.makeToken(Token.tokens.RIGHTBRACE_TOKEN);
210 |                     break;
211 |                   case "%":
212 |                     return this.makeToken(Token.tokens.MOD_TOKEN);
213 |                     break;
214 | 
215 |                   case "!":
216 |                     if (this.reader.nextChar() == "=") {
217 |                       return this.makeToken(Token.tokens.NOTEQUAL_TOKEN);
218 |                     } else {
219 |                       //we have consumed one more char in if-condition
220 |                       this.reader.retract();
221 |                       return this.makeToken(Token.tokens.NOT_TOKEN);
222 |                     }
223 |                     break;
224 |                   case "+":
225 |                     var d = this.reader.nextChar();
226 |                     if (d == "=") {
227 |                       return this.makeToken(Token.tokens.PLUSASSIGN_TOKEN);
228 |                     } else if (d == "+") {
229 |                       return this.makeToken(Token.tokens.PLUSPLUS_TOKEN);
230 |                     } else {
231 |                       this.reader.retract();
232 |                       return this.makeToken(Token.tokens.PLUS_TOKEN);
233 |                     }
234 |                     break;
235 |                   case "-":
236 |                     var d = this.reader.nextChar();
237 |                     if (d == "=") {
238 |                       return this.makeToken(Token.tokens.MINUSASSIGN_TOKEN);
239 |                     } else if (d == "-") {
240 |                       return this.makeToken(Token.tokens.MINUSMINUS_TOKEN);
241 |                     } else {
242 |                       this.reader.retract();
243 |                       return this.makeToken(Token.tokens.MINUS_TOKEN);
244 |                     }
245 |                     break;
246 |                   case "*":
247 |                     return this.makeToken(Token.tokens.MULT_TOKEN);
248 |                     break;
249 |                   case "=":
250 |                     if (this.reader.nextChar() == "=") {
251 |                       return this.makeToken(Token.tokens.EQUAL_TOKEN);
252 |                     } else {
253 |                       this.reader.retract();
254 |                       return this.makeToken(Token.tokens.ASSIGN_TOKEN);
255 |                     }
256 |                     break;
257 |                   case ">":
258 |                     if (this.reader.nextChar() == "=") {
259 |                       return this.makeToken(Token.tokens.GREATEREQUAL_TOKEN);
260 |                     } else {
261 |                       this.reader.retract();
262 |                       return this.makeToken(Token.tokens.GREATER_TOKEN);
263 |                     }
264 |                     break;
265 |                   case "<":
266 |                     if (this.reader.nextChar() == "=") {
267 |                       return this.makeToken(Token.tokens.LESSEQUAL_TOKEN);
268 |                     } else {
269 |                       this.reader.retract();
270 |                       return this.makeToken(Token.tokens.LESS_TOKEN);
271 |                     }
272 |                     break;
273 | 
274 |                   case "/":
275 |                     this.state = Scanner.SLASH_STATE;
276 |                     break;
277 | 
278 |                   case "&":
279 |                     if (this.reader.nextChar() == "&") {
280 |                       return this.makeToken(Token.tokens.AND_TOKEN);
281 |                     } else {
282 |                       this.reader.retract();
283 |                       Errors.push({
284 |                         type: Errors.SYNTAX_ERROR,
285 |                         msg: "You have only one &",
286 |                         line: this.currLine
287 |                       });
288 |                     }
289 |                     break;
290 |                   case "|":
291 |                     if (this.reader.nextChar() == "|") {
292 |                       return this.makeToken(Token.tokens.OR_TOKEN);
293 |                     } else {
294 |                       this.reader.retract();
295 |                       Errors.push({
296 |                         type: Errors.SYNTAX_ERROR,
297 |                         msg: "You have only one |",
298 |                         line: this.currLine
299 |                       });
300 |                     }
301 |                     break;
302 | 
303 |                   case -1:
304 |                     return this.makeToken(Token.tokens.EOS_TOKEN);
305 |                     break;
306 |                   case "\r":
307 |                   case "\n":
308 |                     this.currLine++;
309 |                   default:
310 |                   //ignore them
311 |                 }
312 |               }
313 |               break;
314 |             case Scanner.IDENTIFIER_STATE:
315 |               var c = this.reader.nextChar();
316 | 
317 |               if ((c >= "a" && c <= "z") || (c >= "A" && c <= "Z")) {
318 |                 bufferStr += c;
319 |               } else {
320 |                 //stop reading it since it is not a letter anymore
321 |                 //retract the last character we read because it does not belong to this identfier
322 |                 this.reader.retract();
323 | 
324 |                 //change back the state to read the next token
325 |                 this.state = Scanner.START_STATE;
326 | 
327 |                 switch (bufferStr) {
328 |                   case "var":
329 |                     return this.makeToken(Token.tokens.VAR_TOKEN);
330 |                   case "int":
331 |                   case "bool":
332 |                     //need to pass bufferStr as well to distinguish which type it is
333 |                     return this.makeToken(Token.tokens.TYPE_TOKEN, bufferStr);
334 |                   case "true":
335 |                   case "false":
336 |                   case "TRUE":
337 |                   case "FALSE":
338 |                     return this.makeToken(
339 |                       Token.tokens.BOOLLITERAL_TOKEN,
340 |                       bufferStr
341 |                     );
342 |                   case "if":
343 |                     return this.makeToken(Token.tokens.IF_TOKEN);
344 |                   case "else":
345 |                     return this.makeToken(Token.tokens.ELSE_TOKEN);
346 |                   case "while":
347 |                     return this.makeToken(Token.tokens.WHILE_TOKEN);
348 |                   case "print":
349 |                     return this.makeToken(Token.tokens.PRINT_TOKEN);
350 |                   default:
351 |                     return this.makeToken(
352 |                       Token.tokens.IDENTIFIER_TOKEN,
353 |                       bufferStr
354 |                     );
355 |                 }
356 |               }
357 |               break;
358 |             case Scanner.SLASH_STATE:
359 |               var d = this.reader.nextChar();
360 |               if (d == "/") {
361 |                 //line comment
362 |                 bufferStr = "";
363 |                 //reading 1 more char here can prevent the case that a // is followed by a line break char immediately
364 |                 d = this.reader.nextChar();
365 |                 if (d != "\r" && d != "\n") {
366 |                   while (d != "\r" && d != "\n") {
367 |                     bufferStr += d;
368 |                     d = this.reader.nextChar();
369 |                   }
370 | 
371 |                   //to retract the line break char
372 |                   this.reader.retract();
373 |                 }
374 | 
375 |                 this.state = Scanner.START_STATE;
376 | 
377 |                 return this.makeToken(
378 |                   Token.tokens.LINECOMMENT_TOKEN,
379 |                   bufferStr
380 |                 );
381 |               } else if (d == "*") {
382 |                 //block comment
383 |                 bufferStr = "";
384 |                 var end = false;
385 |                 while (!end) {
386 |                   d = this.reader.nextChar();
387 |                   if (d != -1) {
388 |                     if (d == "\r" || d == "\n") {
389 |                       this.currLine++;
390 |                     }
391 |                     if (d == "*") {
392 |                       var e = this.reader.nextChar();
393 |                       if (e == "/") {
394 |                         //meet */
395 |                         end = true;
396 |                       } else {
397 |                         bufferStr += "*" + e;
398 |                       }
399 |                     } else {
400 |                       bufferStr += d;
401 |                     }
402 |                   } else {
403 |                     end = true;
404 |                   }
405 |                 }
406 | 
407 |                 this.state = Scanner.START_STATE;
408 | 
409 |                 return this.makeToken(
410 |                   Token.tokens.BLOCKCOMMENT_TOKEN,
411 |                   bufferStr
412 |                 );
413 |               } else {
414 |                 this.state = Scanner.START_STATE;
415 |                 this.reader.retract();
416 |                 return this.makeToken(Token.tokens.DIV_TOKEN);
417 |               }
418 |               break;
419 |           }
420 |         }
421 |       };
422 |     </script>
423 |     <script type="text/javascript">
424 |       //Parser class
425 | 
426 |       function Parser(scanner) {
427 |         this.scanner = scanner;
428 |         this.currentToken = new Token();
429 |         this.lookaheadToken = new Token();
430 |         this.lookaheadToken.consumed = true;
431 |       }
432 | 
433 |       Parser.prototype.nextToken = function() {
434 |         if (this.lookaheadToken.consumed) {
435 |           var token = this.scanner.nextToken();
436 | 
437 |           //skip comments
438 |           while (
439 |             token == Token.tokens.LINECOMMENT_TOKEN ||
440 |             token == Token.tokens.BLOCKCOMMENT_TOKEN
441 |           ) {
442 |             token = this.scanner.nextToken();
443 |           }
444 | 
445 |           this.currentToken.type = token;
446 |           this.currentToken.text = this.scanner.currentToken.text;
447 | 
448 |           return token;
449 |         } else {
450 |           this.currentToken.type = this.lookaheadToken.type;
451 |           this.currentToken.text = this.lookaheadToken.text;
452 |           this.lookaheadToken.consumed = true;
453 |           return this.currentToken.type;
454 |         }
455 |       };
456 | 
457 |       Parser.prototype.lookahead = function() {
458 |         if (this.lookaheadToken.consumed) {
459 |           var token = this.scanner.nextToken();
460 | 
461 |           //skip comments
462 |           while (
463 |             token == Token.tokens.LINECOMMENT_TOKEN ||
464 |             token == Token.tokens.BLOCKCOMMENT_TOKEN
465 |           ) {
466 |             token = this.scanner.nextToken();
467 |           }
468 | 
469 |           this.lookaheadToken.type = token;
470 |           this.lookaheadToken.text = this.scanner.currentToken.text;
471 |           this.lookaheadToken.consumed = false;
472 | 
473 |           return token;
474 |         } else {
475 |           return this.lookaheadToken.type;
476 |         }
477 |       };
478 |     </script>
479 | 
480 |     <!-- our tester -->
481 |     <script type="text/javascript">
482 |       function log(str) {
483 |         $("#log").append(str + "<br />");
484 |       }
485 | 
486 |       function errorLog(str) {
487 |         $("#error").append(str + "<br />");
488 |       }
489 | 
490 |       $(function() {
491 |         $("#wescriptSrc .content").text($("#wescript").text());
492 |       });
493 | 
494 |       $(function() {
495 |         //we stored our wescript in <script id="wescript">
496 |         var dataToBeCompiled = $("#wescript").text();
497 |         var reader = new Reader(dataToBeCompiled);
498 |         var scanner = new Scanner(reader);
499 |         var parser = new Parser(scanner);
500 |         while (true) {
501 |           if (parser.lookahead() == Token.tokens.PLUSPLUS_TOKEN) {
502 |             log("lookahead: PLUSPLUS_TOKEN");
503 |           }
504 | 
505 |           if (parser.lookahead() == Token.tokens.PLUSPLUS_TOKEN) {
506 |             log("lookahead again: PLUSPLUS_TOKEN");
507 |           }
508 | 
509 |           var token = parser.nextToken();
510 |           log("Token: " + Token.backwardMap[token]);
511 | 
512 |           if (token == Token.tokens.EOS_TOKEN) {
513 |             break;
514 |           }
515 |         }
516 | 
517 |         Errors.each(function(error, i) {
518 |           errorLog(
519 |             "Line " +
520 |               error.line +
521 |               ": (" +
522 |               Errors.type[error.type] +
523 |               ") " +
524 |               error.msg
525 |           );
526 |         });
527 |       });
528 |     </script>
529 |   </head>
530 | 
531 |   <body>
532 |     <div id="wescriptSrc">
533 |       <pre class="content"></pre>
534 |     </div>
535 | 
536 |     <div id="error"></div>
537 | 
538 |     <div id="log"></div>
539 |   </body>
540 | </html>
541 | 


--------------------------------------------------------------------------------
/examples/3-1-2.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 |   <head>
  4 |     <title>Parser - section 2</title>
  5 | 
  6 |     <style type="text/css">
  7 |       #wescriptSrc {
  8 |         margin: 10px 0;
  9 |       }
 10 | 
 11 |       #wescriptSrc .content {
 12 |         border: 1px solid orange;
 13 |         padding: 10px;
 14 |       }
 15 | 
 16 |       #error {
 17 |         color: red;
 18 |         line-height: 1.2;
 19 |         margin: 5px 0;
 20 |       }
 21 |     </style>
 22 | 
 23 |     <!-- of course we need jQuery -->
 24 |     <script
 25 |       src="https://code.jquery.com/jquery-3.4.1.slim.min.js"
 26 |       integrity="sha256-pasqAKBDmFT4eHoN2ndd6lN370kFiGUFyTiUHWhU7k8="
 27 |       crossorigin="anonymous"
 28 |     ></script>
 29 | 
 30 |     <!-- the wescript to be compiled will be stored here -->
 31 |     <script id="wescript" type="text/wescript">
 32 |       //program start
 33 |       print 64;
 34 |       print;
 35 |     </script>
 36 | 
 37 |     <script type="text/javascript">
 38 |       function extend(subClass, baseClass) {
 39 |         function inheritance() {}
 40 |         inheritance.prototype = baseClass.prototype;
 41 | 
 42 |         subClass.prototype = new inheritance();
 43 |         subClass.prototype.constructor = subClass;
 44 |         subClass.baseConstructor = baseClass;
 45 |         subClass.superClass = baseClass.prototype;
 46 |       }
 47 |     </script>
 48 | 
 49 |     <!-- our compiler -->
 50 |     <script type="text/javascript">
 51 |       function Errors() {}
 52 | 
 53 |       Errors.errors = [];
 54 | 
 55 |       Errors.push = function(obj) {
 56 |         Errors.errors.push(obj);
 57 |       };
 58 | 
 59 |       Errors.each = function(func) {
 60 |         for (var i = 0, l = Errors.errors.length; i < l; i++) {
 61 |           func(Errors.errors[i], i);
 62 |         }
 63 |       };
 64 | 
 65 |       Errors.SYNTAX_ERROR = 0;
 66 | 
 67 |       Errors.type = ["Syntax error"];
 68 |     </script>
 69 |     <script type="text/javascript">
 70 |       //Reader class
 71 | 
 72 |       //str is the data to be read
 73 |       function Reader(str) {
 74 |         this.data = str;
 75 |         this.currPos = 0;
 76 |         this.dataLength = str.length;
 77 |       }
 78 | 
 79 |       Reader.prototype.nextChar = function() {
 80 |         if (this.currPos >= this.dataLength) {
 81 |           return -1; //end of stream
 82 |         }
 83 |         return this.data[this.currPos++];
 84 |       };
 85 | 
 86 |       //n is the number of characters to be retracted
 87 |       Reader.prototype.retract = function(n) {
 88 |         if (n == undefined) {
 89 |           n = 1;
 90 |         }
 91 |         this.currPos -= n;
 92 |         if (this.currPos < 0) {
 93 |           this.currPos = 0;
 94 |         }
 95 |       };
 96 |     </script>
 97 |     <script type="text/javascript">
 98 |       //Token class
 99 | 
100 |       //type: Token's type
101 |       //text: the actual text that makes this token, may be null if it is not important
102 |       function Token(type, text) {
103 |         this.type = type;
104 |         this.text = text;
105 |       }
106 | 
107 |       Token.tokens = {};
108 |       Token.tokens.EOS_TOKEN = 1; //end of stream
109 |       // using + 1 allows adding a new token easily later
110 |       Token.tokens.COLON_TOKEN = Token.tokens.EOS_TOKEN + 1;
111 |       Token.tokens.SEMICOLON_TOKEN = Token.tokens.COLON_TOKEN + 1;
112 |       Token.tokens.LEFTPAREN_TOKEN = Token.tokens.SEMICOLON_TOKEN + 1;
113 |       Token.tokens.RIGHTPAREN_TOKEN = Token.tokens.LEFTPAREN_TOKEN + 1;
114 |       Token.tokens.LEFTBRACE_TOKEN = Token.tokens.RIGHTPAREN_TOKEN + 1;
115 |       Token.tokens.RIGHTBRACE_TOKEN = Token.tokens.LEFTBRACE_TOKEN + 1;
116 |       Token.tokens.MOD_TOKEN = Token.tokens.RIGHTBRACE_TOKEN + 1;
117 | 
118 |       Token.tokens.VAR_TOKEN = Token.tokens.MOD_TOKEN + 1;
119 |       Token.tokens.TYPE_TOKEN = Token.tokens.VAR_TOKEN + 1;
120 |       Token.tokens.BOOLLITERAL_TOKEN = Token.tokens.TYPE_TOKEN + 1;
121 |       Token.tokens.INTLITERAL_TOKEN = Token.tokens.BOOLLITERAL_TOKEN + 1;
122 |       Token.tokens.IF_TOKEN = Token.tokens.INTLITERAL_TOKEN + 1;
123 |       Token.tokens.ELSE_TOKEN = Token.tokens.IF_TOKEN + 1;
124 |       Token.tokens.WHILE_TOKEN = Token.tokens.ELSE_TOKEN + 1;
125 |       Token.tokens.PRINT_TOKEN = Token.tokens.WHILE_TOKEN + 1;
126 |       Token.tokens.IDENTIFIER_TOKEN = Token.tokens.PRINT_TOKEN + 1;
127 | 
128 |       Token.tokens.PLUS_TOKEN = Token.tokens.IDENTIFIER_TOKEN + 1;
129 |       Token.tokens.PLUSPLUS_TOKEN = Token.tokens.PLUS_TOKEN + 1;
130 |       Token.tokens.PLUSASSIGN_TOKEN = Token.tokens.PLUSPLUS_TOKEN + 1;
131 |       Token.tokens.MINUS_TOKEN = Token.tokens.PLUSASSIGN_TOKEN + 1;
132 |       Token.tokens.MINUSMINUS_TOKEN = Token.tokens.MINUS_TOKEN + 1;
133 |       Token.tokens.MINUSASSIGN_TOKEN = Token.tokens.MINUSMINUS_TOKEN + 1;
134 |       Token.tokens.MULT_TOKEN = Token.tokens.MINUSASSIGN_TOKEN + 1;
135 |       Token.tokens.DIV_TOKEN = Token.tokens.MULT_TOKEN + 1;
136 |       Token.tokens.ASSIGN_TOKEN = Token.tokens.DIV_TOKEN + 1;
137 |       Token.tokens.EQUAL_TOKEN = Token.tokens.ASSIGN_TOKEN + 1;
138 |       Token.tokens.NOTEQUAL_TOKEN = Token.tokens.EQUAL_TOKEN + 1;
139 |       Token.tokens.GREATER_TOKEN = Token.tokens.NOTEQUAL_TOKEN + 1;
140 |       Token.tokens.GREATEREQUAL_TOKEN = Token.tokens.GREATER_TOKEN + 1;
141 |       Token.tokens.LESS_TOKEN = Token.tokens.GREATEREQUAL_TOKEN + 1;
142 |       Token.tokens.LESSEQUAL_TOKEN = Token.tokens.LESS_TOKEN + 1;
143 |       Token.tokens.AND_TOKEN = Token.tokens.LESSEQUAL_TOKEN + 1;
144 |       Token.tokens.OR_TOKEN = Token.tokens.AND_TOKEN + 1;
145 |       Token.tokens.NOT_TOKEN = Token.tokens.OR_TOKEN + 1;
146 | 
147 |       Token.tokens.LINECOMMENT_TOKEN = Token.tokens.NOT_TOKEN + 1;
148 |       Token.tokens.BLOCKCOMMENT_TOKEN = Token.tokens.LINECOMMENT_TOKEN + 1;
149 | 
150 |       Token.backwardMap = {}; //for inverse look-up
151 |       for (var x in Token.tokens) {
152 |         Token.backwardMap[Token.tokens[x]] = x;
153 |       }
154 |     </script>
155 |     <script type="text/javascript">
156 |       //Scanner class
157 | 
158 |       //reader: the reader used to read in characters
159 |       function Scanner(reader) {
160 |         this.reader = reader;
161 |         this.currentToken = new Token(); //storing the current analysed token
162 |         this.currLine = 0; //the line number of the current line being read
163 |         this.state = Scanner.START_STATE;
164 |       }
165 | 
166 |       Scanner.START_STATE = 1; //every FSM should have a start state
167 |       Scanner.IDENTIFIER_STATE = Scanner.START_STATE + 1;
168 |       Scanner.SLASH_STATE = Scanner.IDENTIFIER_STATE + 1;
169 | 
170 |       Scanner.prototype.makeToken = function(type, text) {
171 |         this.currentToken.type = type;
172 |         this.currentToken.text = text;
173 |         return type;
174 |       };
175 | 
176 |       Scanner.prototype.nextToken = function() {
177 |         var bufferStr = "";
178 |         while (true) {
179 |           switch (this.state) {
180 |             case Scanner.START_STATE:
181 |               var c = this.reader.nextChar();
182 | 
183 |               if ((c >= "a" && c <= "z") || (c >= "A" && c <= "Z")) {
184 |                 this.state = Scanner.IDENTIFIER_STATE;
185 |                 //we need to remember what the token's text is
186 |                 bufferStr = c;
187 |               } else if (c >= "0" && c <= "9") {
188 |                 bufferStr = c;
189 |                 var d;
190 |                 while (true) {
191 |                   d = this.reader.nextChar();
192 |                   if (d >= "0" && d <= "9") {
193 |                     bufferStr += d;
194 |                   } else {
195 |                     this.reader.retract();
196 |                     return this.makeToken(
197 |                       Token.tokens.INTLITERAL_TOKEN,
198 |                       bufferStr
199 |                     );
200 |                   }
201 |                 }
202 |               } else {
203 |                 switch (c) {
204 |                   case ":":
205 |                     return this.makeToken(Token.tokens.COLON_TOKEN);
206 |                     break;
207 |                   case ";":
208 |                     return this.makeToken(Token.tokens.SEMICOLON_TOKEN);
209 |                     break;
210 |                   case "(":
211 |                     return this.makeToken(Token.tokens.LEFTPAREN_TOKEN);
212 |                     break;
213 |                   case ")":
214 |                     return this.makeToken(Token.tokens.RIGHTPAREN_TOKEN);
215 |                     break;
216 |                   case "{":
217 |                     return this.makeToken(Token.tokens.LEFTBRACE_TOKEN);
218 |                     break;
219 |                   case "}":
220 |                     return this.makeToken(Token.tokens.RIGHTBRACE_TOKEN);
221 |                     break;
222 |                   case "%":
223 |                     return this.makeToken(Token.tokens.MOD_TOKEN);
224 |                     break;
225 | 
226 |                   case "!":
227 |                     if (this.reader.nextChar() == "=") {
228 |                       return this.makeToken(Token.tokens.NOTEQUAL_TOKEN);
229 |                     } else {
230 |                       //we have consumed one more char in if-condition
231 |                       this.reader.retract();
232 |                       return this.makeToken(Token.tokens.NOT_TOKEN);
233 |                     }
234 |                     break;
235 |                   case "+":
236 |                     var d = this.reader.nextChar();
237 |                     if (d == "=") {
238 |                       return this.makeToken(Token.tokens.PLUSASSIGN_TOKEN);
239 |                     } else if (d == "+") {
240 |                       return this.makeToken(Token.tokens.PLUSPLUS_TOKEN);
241 |                     } else {
242 |                       this.reader.retract();
243 |                       return this.makeToken(Token.tokens.PLUS_TOKEN);
244 |                     }
245 |                     break;
246 |                   case "-":
247 |                     var d = this.reader.nextChar();
248 |                     if (d == "=") {
249 |                       return this.makeToken(Token.tokens.MINUSASSIGN_TOKEN);
250 |                     } else if (d == "-") {
251 |                       return this.makeToken(Token.tokens.MINUSMINUS_TOKEN);
252 |                     } else {
253 |                       this.reader.retract();
254 |                       return this.makeToken(Token.tokens.MINUS_TOKEN);
255 |                     }
256 |                     break;
257 |                   case "*":
258 |                     return this.makeToken(Token.tokens.MULT_TOKEN);
259 |                     break;
260 |                   case "=":
261 |                     if (this.reader.nextChar() == "=") {
262 |                       return this.makeToken(Token.tokens.EQUAL_TOKEN);
263 |                     } else {
264 |                       this.reader.retract();
265 |                       return this.makeToken(Token.tokens.ASSIGN_TOKEN);
266 |                     }
267 |                     break;
268 |                   case ">":
269 |                     if (this.reader.nextChar() == "=") {
270 |                       return this.makeToken(Token.tokens.GREATEREQUAL_TOKEN);
271 |                     } else {
272 |                       this.reader.retract();
273 |                       return this.makeToken(Token.tokens.GREATER_TOKEN);
274 |                     }
275 |                     break;
276 |                   case "<":
277 |                     if (this.reader.nextChar() == "=") {
278 |                       return this.makeToken(Token.tokens.LESSEQUAL_TOKEN);
279 |                     } else {
280 |                       this.reader.retract();
281 |                       return this.makeToken(Token.tokens.LESS_TOKEN);
282 |                     }
283 |                     break;
284 | 
285 |                   case "/":
286 |                     this.state = Scanner.SLASH_STATE;
287 |                     break;
288 | 
289 |                   case "&":
290 |                     if (this.reader.nextChar() == "&") {
291 |                       return this.makeToken(Token.tokens.AND_TOKEN);
292 |                     } else {
293 |                       this.reader.retract();
294 |                       Errors.push({
295 |                         type: Errors.SYNTAX_ERROR,
296 |                         msg: "You have only one &",
297 |                         line: this.currLine
298 |                       });
299 |                     }
300 |                     break;
301 |                   case "|":
302 |                     if (this.reader.nextChar() == "|") {
303 |                       return this.makeToken(Token.tokens.OR_TOKEN);
304 |                     } else {
305 |                       this.reader.retract();
306 |                       Errors.push({
307 |                         type: Errors.SYNTAX_ERROR,
308 |                         msg: "You have only one |",
309 |                         line: this.currLine
310 |                       });
311 |                     }
312 |                     break;
313 | 
314 |                   case -1:
315 |                     return this.makeToken(Token.tokens.EOS_TOKEN);
316 |                     break;
317 |                   case "\r":
318 |                   case "\n":
319 |                     this.currLine++;
320 |                   default:
321 |                   //ignore them
322 |                 }
323 |               }
324 |               break;
325 |             case Scanner.IDENTIFIER_STATE:
326 |               var c = this.reader.nextChar();
327 | 
328 |               if ((c >= "a" && c <= "z") || (c >= "A" && c <= "Z")) {
329 |                 bufferStr += c;
330 |               } else {
331 |                 //stop reading it since it is not a letter anymore
332 |                 //retract the last character we read because it does not belong to this identfier
333 |                 this.reader.retract();
334 | 
335 |                 //change back the state to read the next token
336 |                 this.state = Scanner.START_STATE;
337 | 
338 |                 switch (bufferStr) {
339 |                   case "var":
340 |                     return this.makeToken(Token.tokens.VAR_TOKEN);
341 |                   case "int":
342 |                   case "bool":
343 |                     //need to pass bufferStr as well to distinguish which type it is
344 |                     return this.makeToken(Token.tokens.TYPE_TOKEN, bufferStr);
345 |                   case "true":
346 |                   case "false":
347 |                   case "TRUE":
348 |                   case "FALSE":
349 |                     return this.makeToken(
350 |                       Token.tokens.BOOLLITERAL_TOKEN,
351 |                       bufferStr
352 |                     );
353 |                   case "if":
354 |                     return this.makeToken(Token.tokens.IF_TOKEN);
355 |                   case "else":
356 |                     return this.makeToken(Token.tokens.ELSE_TOKEN);
357 |                   case "while":
358 |                     return this.makeToken(Token.tokens.WHILE_TOKEN);
359 |                   case "print":
360 |                     return this.makeToken(Token.tokens.PRINT_TOKEN);
361 |                   default:
362 |                     return this.makeToken(
363 |                       Token.tokens.IDENTIFIER_TOKEN,
364 |                       bufferStr
365 |                     );
366 |                 }
367 |               }
368 |               break;
369 |             case Scanner.SLASH_STATE:
370 |               var d = this.reader.nextChar();
371 |               if (d == "/") {
372 |                 //line comment
373 |                 bufferStr = "";
374 |                 //reading 1 more char here can prevent the case that a // is followed by a line break char immediately
375 |                 d = this.reader.nextChar();
376 |                 if (d != "\r" && d != "\n") {
377 |                   while (d != "\r" && d != "\n") {
378 |                     bufferStr += d;
379 |                     d = this.reader.nextChar();
380 |                   }
381 | 
382 |                   //to retract the line break char
383 |                   this.reader.retract();
384 |                 }
385 | 
386 |                 this.state = Scanner.START_STATE;
387 | 
388 |                 return this.makeToken(
389 |                   Token.tokens.LINECOMMENT_TOKEN,
390 |                   bufferStr
391 |                 );
392 |               } else if (d == "*") {
393 |                 //block comment
394 |                 bufferStr = "";
395 |                 var end = false;
396 |                 while (!end) {
397 |                   d = this.reader.nextChar();
398 |                   if (d != -1) {
399 |                     if (d == "\r" || d == "\n") {
400 |                       this.currLine++;
401 |                     }
402 |                     if (d == "*") {
403 |                       var e = this.reader.nextChar();
404 |                       if (e == "/") {
405 |                         //meet */
406 |                         end = true;
407 |                       } else {
408 |                         bufferStr += "*" + e;
409 |                       }
410 |                     } else {
411 |                       bufferStr += d;
412 |                     }
413 |                   } else {
414 |                     end = true;
415 |                   }
416 |                 }
417 | 
418 |                 this.state = Scanner.START_STATE;
419 | 
420 |                 return this.makeToken(
421 |                   Token.tokens.BLOCKCOMMENT_TOKEN,
422 |                   bufferStr
423 |                 );
424 |               } else {
425 |                 this.state = Scanner.START_STATE;
426 |                 this.reader.retract();
427 |                 return this.makeToken(Token.tokens.DIV_TOKEN);
428 |               }
429 |               break;
430 |           }
431 |         }
432 |       };
433 |     </script>
434 |     <script type="text/javascript">
435 |       //Nodes
436 | 
437 |       function Node(param) {}
438 | 
439 |       function ExpressionBlockNode() {
440 |         ExpressionBlockNode.baseConstructor.call(this, "test");
441 |         this.expressions = [];
442 |       }
443 | 
444 |       extend(ExpressionBlockNode, Node);
445 | 
446 |       ExpressionBlockNode.prototype.push = function(expression) {
447 |         this.expressions.push(expression);
448 |       };
449 | 
450 |       ExpressionBlockNode.prototype.iterate = function(func) {
451 |         for (var i = 0, l = this.expressions.length; i < l; i++) {
452 |           var expression = this.expressions[i];
453 |           func(expression, i);
454 |         }
455 |       };
456 | 
457 |       function PrintNode(expressionNode) {
458 |         this.expressionNode = expressionNode;
459 |       }
460 | 
461 |       extend(PrintNode, Node);
462 | 
463 |       function IntNode(data) {
464 |         this.data = data;
465 |       }
466 | 
467 |       extend(IntNode, Node);
468 |     </script>
469 |     <script type="text/javascript">
470 |       //Parser class
471 | 
472 |       function Parser(scanner) {
473 |         this.scanner = scanner;
474 |         this.currentToken = new Token();
475 |         this.lookaheadToken = new Token();
476 |         this.lookaheadToken.consumed = true;
477 |       }
478 | 
479 |       Parser.prototype.nextToken = function() {
480 |         if (this.lookaheadToken.consumed) {
481 |           var token = this.scanner.nextToken();
482 | 
483 |           //skip comments
484 |           while (
485 |             token == Token.tokens.LINECOMMENT_TOKEN ||
486 |             token == Token.tokens.BLOCKCOMMENT_TOKEN
487 |           ) {
488 |             token = this.scanner.nextToken();
489 |           }
490 | 
491 |           this.currentToken.type = token;
492 |           this.currentToken.text = this.scanner.currentToken.text;
493 | 
494 |           return token;
495 |         } else {
496 |           this.currentToken.type = this.lookaheadToken.type;
497 |           this.currentToken.text = this.lookaheadToken.text;
498 |           this.lookaheadToken.consumed = true;
499 |           return this.currentToken.type;
500 |         }
501 |       };
502 | 
503 |       Parser.prototype.lookahead = function() {
504 |         if (this.lookaheadToken.consumed) {
505 |           var token = this.scanner.nextToken();
506 | 
507 |           //skip comments
508 |           while (
509 |             token == Token.tokens.LINECOMMENT_TOKEN ||
510 |             token == Token.tokens.BLOCKCOMMENT_TOKEN
511 |           ) {
512 |             token = this.scanner.nextToken();
513 |           }
514 | 
515 |           this.lookaheadToken.type = token;
516 |           this.lookaheadToken.text = this.scanner.currentToken.text;
517 |           this.lookaheadToken.consumed = false;
518 | 
519 |           return token;
520 |         } else {
521 |           return this.lookaheadToken.type;
522 |         }
523 |       };
524 | 
525 |       //the entry point of our parser
526 |       Parser.prototype.parse = function() {
527 |         var rootBlock = new ExpressionBlockNode();
528 | 
529 |         this.parseExpressions(rootBlock);
530 | 
531 |         return rootBlock;
532 |       };
533 | 
534 |       //to parse a list of expressions
535 |       Parser.prototype.parseExpressions = function(expressionBlockNode) {
536 |         while (
537 |           this.lookahead() != Token.tokens.RIGHTBRACE_TOKEN &&
538 |           this.lookahead() != Token.tokens.EOS_TOKEN
539 |         ) {
540 |           var expressionNode = this.parseExpression();
541 | 
542 |           if (expressionNode) {
543 |             expressionBlockNode.push(expressionNode);
544 |           }
545 |         }
546 |       };
547 | 
548 |       //to parse an expression
549 |       Parser.prototype.parseExpression = function() {
550 |         switch (this.lookahead()) {
551 |           case Token.tokens.PRINT_TOKEN:
552 |             var printToken = this.nextToken();
553 |             var expressionNode = this.parseExpression();
554 |             return new PrintNode(expressionNode);
555 |             break;
556 | 
557 |           case Token.tokens.INTLITERAL_TOKEN:
558 |             var intToken = this.nextToken();
559 |             return new IntNode(this.currentToken.text);
560 |             break;
561 | 
562 |           default:
563 |             //unexpected, consume it
564 |             this.nextToken();
565 |         }
566 |       };
567 |     </script>
568 | 
569 |     <!-- our tester -->
570 |     <script type="text/javascript">
571 |       function log(str) {
572 |         $("#log").append(str + "<br />");
573 |       }
574 | 
575 |       function errorLog(str) {
576 |         $("#error").append(str + "<br />");
577 |       }
578 | 
579 |       $(function() {
580 |         $("#wescriptSrc .content").text($("#wescript").text());
581 |       });
582 | 
583 |       $(function() {
584 |         //we stored our wescript in <script id="wescript">
585 |         var dataToBeCompiled = $("#wescript").text();
586 |         var reader = new Reader(dataToBeCompiled);
587 |         var scanner = new Scanner(reader);
588 |         var parser = new Parser(scanner);
589 |         var expressionBlockNode = parser.parse();
590 | 
591 |         console.log(expressionBlockNode);
592 | 
593 |         Errors.each(function(error, i) {
594 |           errorLog(
595 |             "Line " +
596 |               error.line +
597 |               ": (" +
598 |               Errors.type[error.type] +
599 |               ") " +
600 |               error.msg
601 |           );
602 |         });
603 |       });
604 |     </script>
605 |   </head>
606 | 
607 |   <body>
608 |     <div id="wescriptSrc">
609 |       <pre class="content"></pre>
610 |     </div>
611 | 
612 |     <div id="error"></div>
613 | 
614 |     <div id="log"></div>
615 |   </body>
616 | </html>
617 | 


--------------------------------------------------------------------------------
/examples/3-1-3.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 |   <head>
  4 |     <title>Parser - section 3</title>
  5 | 
  6 |     <style type="text/css">
  7 |       #wescriptSrc {
  8 |         margin: 10px 0;
  9 |       }
 10 | 
 11 |       #wescriptSrc .content {
 12 |         border: 1px solid orange;
 13 |         padding: 10px;
 14 |       }
 15 | 
 16 |       #error {
 17 |         color: red;
 18 |         line-height: 1.2;
 19 |         margin: 5px 0;
 20 |       }
 21 |     </style>
 22 | 
 23 |     <!-- of course we need jQuery -->
 24 |     <script
 25 |       src="https://code.jquery.com/jquery-3.4.1.slim.min.js"
 26 |       integrity="sha256-pasqAKBDmFT4eHoN2ndd6lN370kFiGUFyTiUHWhU7k8="
 27 |       crossorigin="anonymous"
 28 |     ></script>
 29 | 
 30 |     <!-- the wescript to be compiled will be stored here -->
 31 |     <script id="wescript" type="text/wescript">
 32 |       //program start
 33 |       print 64;
 34 |       print;
 35 |     </script>
 36 | 
 37 |     <script type="text/javascript">
 38 |       function extend(subClass, baseClass) {
 39 |         function inheritance() {}
 40 |         inheritance.prototype = baseClass.prototype;
 41 | 
 42 |         subClass.prototype = new inheritance();
 43 |         subClass.prototype.constructor = subClass;
 44 |         subClass.baseConstructor = baseClass;
 45 |         subClass.superClass = baseClass.prototype;
 46 |       }
 47 |     </script>
 48 | 
 49 |     <!-- our compiler -->
 50 |     <script type="text/javascript">
 51 |       function Errors() {}
 52 | 
 53 |       Errors.errors = [];
 54 | 
 55 |       Errors.push = function(obj) {
 56 |         Errors.errors.push(obj);
 57 |       };
 58 | 
 59 |       Errors.each = function(func) {
 60 |         for (var i = 0, l = Errors.errors.length; i < l; i++) {
 61 |           func(Errors.errors[i], i);
 62 |         }
 63 |       };
 64 | 
 65 |       Errors.SYNTAX_ERROR = 0;
 66 | 
 67 |       Errors.type = ["Syntax error"];
 68 |     </script>
 69 |     <script type="text/javascript">
 70 |       //Reader class
 71 | 
 72 |       //str is the data to be read
 73 |       function Reader(str) {
 74 |         this.data = str;
 75 |         this.currPos = 0;
 76 |         this.dataLength = str.length;
 77 |       }
 78 | 
 79 |       Reader.prototype.nextChar = function() {
 80 |         if (this.currPos >= this.dataLength) {
 81 |           return -1; //end of stream
 82 |         }
 83 |         return this.data[this.currPos++];
 84 |       };
 85 | 
 86 |       //n is the number of characters to be retracted
 87 |       Reader.prototype.retract = function(n) {
 88 |         if (n == undefined) {
 89 |           n = 1;
 90 |         }
 91 |         this.currPos -= n;
 92 |         if (this.currPos < 0) {
 93 |           this.currPos = 0;
 94 |         }
 95 |       };
 96 |     </script>
 97 |     <script type="text/javascript">
 98 |       //Token class
 99 | 
100 |       //type: Token's type
101 |       //text: the actual text that makes this token, may be null if it is not important
102 |       function Token(type, text) {
103 |         this.type = type;
104 |         this.text = text;
105 |       }
106 | 
107 |       Token.tokens = {};
108 |       Token.tokens.EOS_TOKEN = 1; //end of stream
109 |       // using + 1 allows adding a new token easily later
110 |       Token.tokens.COLON_TOKEN = Token.tokens.EOS_TOKEN + 1;
111 |       Token.tokens.SEMICOLON_TOKEN = Token.tokens.COLON_TOKEN + 1;
112 |       Token.tokens.LEFTPAREN_TOKEN = Token.tokens.SEMICOLON_TOKEN + 1;
113 |       Token.tokens.RIGHTPAREN_TOKEN = Token.tokens.LEFTPAREN_TOKEN + 1;
114 |       Token.tokens.LEFTBRACE_TOKEN = Token.tokens.RIGHTPAREN_TOKEN + 1;
115 |       Token.tokens.RIGHTBRACE_TOKEN = Token.tokens.LEFTBRACE_TOKEN + 1;
116 |       Token.tokens.MOD_TOKEN = Token.tokens.RIGHTBRACE_TOKEN + 1;
117 | 
118 |       Token.tokens.VAR_TOKEN = Token.tokens.MOD_TOKEN + 1;
119 |       Token.tokens.TYPE_TOKEN = Token.tokens.VAR_TOKEN + 1;
120 |       Token.tokens.BOOLLITERAL_TOKEN = Token.tokens.TYPE_TOKEN + 1;
121 |       Token.tokens.INTLITERAL_TOKEN = Token.tokens.BOOLLITERAL_TOKEN + 1;
122 |       Token.tokens.IF_TOKEN = Token.tokens.INTLITERAL_TOKEN + 1;
123 |       Token.tokens.ELSE_TOKEN = Token.tokens.IF_TOKEN + 1;
124 |       Token.tokens.WHILE_TOKEN = Token.tokens.ELSE_TOKEN + 1;
125 |       Token.tokens.PRINT_TOKEN = Token.tokens.WHILE_TOKEN + 1;
126 |       Token.tokens.IDENTIFIER_TOKEN = Token.tokens.PRINT_TOKEN + 1;
127 | 
128 |       Token.tokens.PLUS_TOKEN = Token.tokens.IDENTIFIER_TOKEN + 1;
129 |       Token.tokens.PLUSPLUS_TOKEN = Token.tokens.PLUS_TOKEN + 1;
130 |       Token.tokens.PLUSASSIGN_TOKEN = Token.tokens.PLUSPLUS_TOKEN + 1;
131 |       Token.tokens.MINUS_TOKEN = Token.tokens.PLUSASSIGN_TOKEN + 1;
132 |       Token.tokens.MINUSMINUS_TOKEN = Token.tokens.MINUS_TOKEN + 1;
133 |       Token.tokens.MINUSASSIGN_TOKEN = Token.tokens.MINUSMINUS_TOKEN + 1;
134 |       Token.tokens.MULT_TOKEN = Token.tokens.MINUSASSIGN_TOKEN + 1;
135 |       Token.tokens.DIV_TOKEN = Token.tokens.MULT_TOKEN + 1;
136 |       Token.tokens.ASSIGN_TOKEN = Token.tokens.DIV_TOKEN + 1;
137 |       Token.tokens.EQUAL_TOKEN = Token.tokens.ASSIGN_TOKEN + 1;
138 |       Token.tokens.NOTEQUAL_TOKEN = Token.tokens.EQUAL_TOKEN + 1;
139 |       Token.tokens.GREATER_TOKEN = Token.tokens.NOTEQUAL_TOKEN + 1;
140 |       Token.tokens.GREATEREQUAL_TOKEN = Token.tokens.GREATER_TOKEN + 1;
141 |       Token.tokens.LESS_TOKEN = Token.tokens.GREATEREQUAL_TOKEN + 1;
142 |       Token.tokens.LESSEQUAL_TOKEN = Token.tokens.LESS_TOKEN + 1;
143 |       Token.tokens.AND_TOKEN = Token.tokens.LESSEQUAL_TOKEN + 1;
144 |       Token.tokens.OR_TOKEN = Token.tokens.AND_TOKEN + 1;
145 |       Token.tokens.NOT_TOKEN = Token.tokens.OR_TOKEN + 1;
146 | 
147 |       Token.tokens.LINECOMMENT_TOKEN = Token.tokens.NOT_TOKEN + 1;
148 |       Token.tokens.BLOCKCOMMENT_TOKEN = Token.tokens.LINECOMMENT_TOKEN + 1;
149 | 
150 |       Token.backwardMap = {}; //for inverse look-up
151 |       for (var x in Token.tokens) {
152 |         Token.backwardMap[Token.tokens[x]] = x;
153 |       }
154 |     </script>
155 |     <script type="text/javascript">
156 |       //Scanner class
157 | 
158 |       //reader: the reader used to read in characters
159 |       function Scanner(reader) {
160 |         this.reader = reader;
161 |         this.currentToken = new Token(); //storing the current analysed token
162 |         this.currLine = 0; //the line number of the current line being read
163 |         this.state = Scanner.START_STATE;
164 |       }
165 | 
166 |       Scanner.START_STATE = 1; //every FSM should have a start state
167 |       Scanner.IDENTIFIER_STATE = Scanner.START_STATE + 1;
168 |       Scanner.SLASH_STATE = Scanner.IDENTIFIER_STATE + 1;
169 | 
170 |       Scanner.prototype.makeToken = function(type, text) {
171 |         this.currentToken.type = type;
172 |         this.currentToken.text = text;
173 |         return type;
174 |       };
175 | 
176 |       Scanner.prototype.nextToken = function() {
177 |         var bufferStr = "";
178 |         while (true) {
179 |           switch (this.state) {
180 |             case Scanner.START_STATE:
181 |               var c = this.reader.nextChar();
182 | 
183 |               if ((c >= "a" && c <= "z") || (c >= "A" && c <= "Z")) {
184 |                 this.state = Scanner.IDENTIFIER_STATE;
185 |                 //we need to remember what the token's text is
186 |                 bufferStr = c;
187 |               } else if (c >= "0" && c <= "9") {
188 |                 bufferStr = c;
189 |                 var d;
190 |                 while (true) {
191 |                   d = this.reader.nextChar();
192 |                   if (d >= "0" && d <= "9") {
193 |                     bufferStr += d;
194 |                   } else {
195 |                     this.reader.retract();
196 |                     return this.makeToken(
197 |                       Token.tokens.INTLITERAL_TOKEN,
198 |                       bufferStr
199 |                     );
200 |                   }
201 |                 }
202 |               } else {
203 |                 switch (c) {
204 |                   case ":":
205 |                     return this.makeToken(Token.tokens.COLON_TOKEN);
206 |                     break;
207 |                   case ";":
208 |                     return this.makeToken(Token.tokens.SEMICOLON_TOKEN);
209 |                     break;
210 |                   case "(":
211 |                     return this.makeToken(Token.tokens.LEFTPAREN_TOKEN);
212 |                     break;
213 |                   case ")":
214 |                     return this.makeToken(Token.tokens.RIGHTPAREN_TOKEN);
215 |                     break;
216 |                   case "{":
217 |                     return this.makeToken(Token.tokens.LEFTBRACE_TOKEN);
218 |                     break;
219 |                   case "}":
220 |                     return this.makeToken(Token.tokens.RIGHTBRACE_TOKEN);
221 |                     break;
222 |                   case "%":
223 |                     return this.makeToken(Token.tokens.MOD_TOKEN);
224 |                     break;
225 | 
226 |                   case "!":
227 |                     if (this.reader.nextChar() == "=") {
228 |                       return this.makeToken(Token.tokens.NOTEQUAL_TOKEN);
229 |                     } else {
230 |                       //we have consumed one more char in if-condition
231 |                       this.reader.retract();
232 |                       return this.makeToken(Token.tokens.NOT_TOKEN);
233 |                     }
234 |                     break;
235 |                   case "+":
236 |                     var d = this.reader.nextChar();
237 |                     if (d == "=") {
238 |                       return this.makeToken(Token.tokens.PLUSASSIGN_TOKEN);
239 |                     } else if (d == "+") {
240 |                       return this.makeToken(Token.tokens.PLUSPLUS_TOKEN);
241 |                     } else {
242 |                       this.reader.retract();
243 |                       return this.makeToken(Token.tokens.PLUS_TOKEN);
244 |                     }
245 |                     break;
246 |                   case "-":
247 |                     var d = this.reader.nextChar();
248 |                     if (d == "=") {
249 |                       return this.makeToken(Token.tokens.MINUSASSIGN_TOKEN);
250 |                     } else if (d == "-") {
251 |                       return this.makeToken(Token.tokens.MINUSMINUS_TOKEN);
252 |                     } else {
253 |                       this.reader.retract();
254 |                       return this.makeToken(Token.tokens.MINUS_TOKEN);
255 |                     }
256 |                     break;
257 |                   case "*":
258 |                     return this.makeToken(Token.tokens.MULT_TOKEN);
259 |                     break;
260 |                   case "=":
261 |                     if (this.reader.nextChar() == "=") {
262 |                       return this.makeToken(Token.tokens.EQUAL_TOKEN);
263 |                     } else {
264 |                       this.reader.retract();
265 |                       return this.makeToken(Token.tokens.ASSIGN_TOKEN);
266 |                     }
267 |                     break;
268 |                   case ">":
269 |                     if (this.reader.nextChar() == "=") {
270 |                       return this.makeToken(Token.tokens.GREATEREQUAL_TOKEN);
271 |                     } else {
272 |                       this.reader.retract();
273 |                       return this.makeToken(Token.tokens.GREATER_TOKEN);
274 |                     }
275 |                     break;
276 |                   case "<":
277 |                     if (this.reader.nextChar() == "=") {
278 |                       return this.makeToken(Token.tokens.LESSEQUAL_TOKEN);
279 |                     } else {
280 |                       this.reader.retract();
281 |                       return this.makeToken(Token.tokens.LESS_TOKEN);
282 |                     }
283 |                     break;
284 | 
285 |                   case "/":
286 |                     this.state = Scanner.SLASH_STATE;
287 |                     break;
288 | 
289 |                   case "&":
290 |                     if (this.reader.nextChar() == "&") {
291 |                       return this.makeToken(Token.tokens.AND_TOKEN);
292 |                     } else {
293 |                       this.reader.retract();
294 |                       Errors.push({
295 |                         type: Errors.SYNTAX_ERROR,
296 |                         msg: "You have only one &",
297 |                         line: this.currLine
298 |                       });
299 |                     }
300 |                     break;
301 |                   case "|":
302 |                     if (this.reader.nextChar() == "|") {
303 |                       return this.makeToken(Token.tokens.OR_TOKEN);
304 |                     } else {
305 |                       this.reader.retract();
306 |                       Errors.push({
307 |                         type: Errors.SYNTAX_ERROR,
308 |                         msg: "You have only one |",
309 |                         line: this.currLine
310 |                       });
311 |                     }
312 |                     break;
313 | 
314 |                   case -1:
315 |                     return this.makeToken(Token.tokens.EOS_TOKEN);
316 |                     break;
317 |                   case "\r":
318 |                   case "\n":
319 |                     this.currLine++;
320 |                   default:
321 |                   //ignore them
322 |                 }
323 |               }
324 |               break;
325 |             case Scanner.IDENTIFIER_STATE:
326 |               var c = this.reader.nextChar();
327 | 
328 |               if ((c >= "a" && c <= "z") || (c >= "A" && c <= "Z")) {
329 |                 bufferStr += c;
330 |               } else {
331 |                 //stop reading it since it is not a letter anymore
332 |                 //retract the last character we read because it does not belong to this identfier
333 |                 this.reader.retract();
334 | 
335 |                 //change back the state to read the next token
336 |                 this.state = Scanner.START_STATE;
337 | 
338 |                 switch (bufferStr) {
339 |                   case "var":
340 |                     return this.makeToken(Token.tokens.VAR_TOKEN);
341 |                   case "int":
342 |                   case "bool":
343 |                     //need to pass bufferStr as well to distinguish which type it is
344 |                     return this.makeToken(Token.tokens.TYPE_TOKEN, bufferStr);
345 |                   case "true":
346 |                   case "false":
347 |                   case "TRUE":
348 |                   case "FALSE":
349 |                     return this.makeToken(
350 |                       Token.tokens.BOOLLITERAL_TOKEN,
351 |                       bufferStr
352 |                     );
353 |                   case "if":
354 |                     return this.makeToken(Token.tokens.IF_TOKEN);
355 |                   case "else":
356 |                     return this.makeToken(Token.tokens.ELSE_TOKEN);
357 |                   case "while":
358 |                     return this.makeToken(Token.tokens.WHILE_TOKEN);
359 |                   case "print":
360 |                     return this.makeToken(Token.tokens.PRINT_TOKEN);
361 |                   default:
362 |                     return this.makeToken(
363 |                       Token.tokens.IDENTIFIER_TOKEN,
364 |                       bufferStr
365 |                     );
366 |                 }
367 |               }
368 |               break;
369 |             case Scanner.SLASH_STATE:
370 |               var d = this.reader.nextChar();
371 |               if (d == "/") {
372 |                 //line comment
373 |                 bufferStr = "";
374 |                 //reading 1 more char here can prevent the case that a // is followed by a line break char immediately
375 |                 d = this.reader.nextChar();
376 |                 if (d != "\r" && d != "\n") {
377 |                   while (d != "\r" && d != "\n") {
378 |                     bufferStr += d;
379 |                     d = this.reader.nextChar();
380 |                   }
381 | 
382 |                   //to retract the line break char
383 |                   this.reader.retract();
384 |                 }
385 | 
386 |                 this.state = Scanner.START_STATE;
387 | 
388 |                 return this.makeToken(
389 |                   Token.tokens.LINECOMMENT_TOKEN,
390 |                   bufferStr
391 |                 );
392 |               } else if (d == "*") {
393 |                 //block comment
394 |                 bufferStr = "";
395 |                 var end = false;
396 |                 while (!end) {
397 |                   d = this.reader.nextChar();
398 |                   if (d != -1) {
399 |                     if (d == "\r" || d == "\n") {
400 |                       this.currLine++;
401 |                     }
402 |                     if (d == "*") {
403 |                       var e = this.reader.nextChar();
404 |                       if (e == "/") {
405 |                         //meet */
406 |                         end = true;
407 |                       } else {
408 |                         bufferStr += "*" + e;
409 |                       }
410 |                     } else {
411 |                       bufferStr += d;
412 |                     }
413 |                   } else {
414 |                     end = true;
415 |                   }
416 |                 }
417 | 
418 |                 this.state = Scanner.START_STATE;
419 | 
420 |                 return this.makeToken(
421 |                   Token.tokens.BLOCKCOMMENT_TOKEN,
422 |                   bufferStr
423 |                 );
424 |               } else {
425 |                 this.state = Scanner.START_STATE;
426 |                 this.reader.retract();
427 |                 return this.makeToken(Token.tokens.DIV_TOKEN);
428 |               }
429 |               break;
430 |           }
431 |         }
432 |       };
433 |     </script>
434 |     <script type="text/javascript">
435 |       //Nodes
436 | 
437 |       function Node(param) {}
438 | 
439 |       function ExpressionBlockNode() {
440 |         ExpressionBlockNode.baseConstructor.call(this, "test");
441 |         this.expressions = [];
442 |       }
443 | 
444 |       extend(ExpressionBlockNode, Node);
445 | 
446 |       ExpressionBlockNode.prototype.push = function(expression) {
447 |         this.expressions.push(expression);
448 |       };
449 | 
450 |       ExpressionBlockNode.prototype.iterate = function(func) {
451 |         for (var i = 0, l = this.expressions.length; i < l; i++) {
452 |           var expression = this.expressions[i];
453 |           func(expression, i);
454 |         }
455 |       };
456 | 
457 |       function PrintNode(expressionNode) {
458 |         this.expressionNode = expressionNode;
459 |       }
460 | 
461 |       extend(PrintNode, Node);
462 | 
463 |       function IntNode(data) {
464 |         this.data = data;
465 |       }
466 | 
467 |       extend(IntNode, Node);
468 |     </script>
469 |     <script type="text/javascript">
470 |       //Parser class
471 | 
472 |       function Parser(scanner) {
473 |         this.scanner = scanner;
474 |         this.currentToken = new Token();
475 |         this.lookaheadToken = new Token();
476 |         this.lookaheadToken.consumed = true;
477 |       }
478 | 
479 |       Parser.prototype.nextToken = function() {
480 |         if (this.lookaheadToken.consumed) {
481 |           var token = this.scanner.nextToken();
482 | 
483 |           //skip comments
484 |           while (
485 |             token == Token.tokens.LINECOMMENT_TOKEN ||
486 |             token == Token.tokens.BLOCKCOMMENT_TOKEN
487 |           ) {
488 |             token = this.scanner.nextToken();
489 |           }
490 | 
491 |           this.currentToken.type = token;
492 |           this.currentToken.text = this.scanner.currentToken.text;
493 | 
494 |           return token;
495 |         } else {
496 |           this.currentToken.type = this.lookaheadToken.type;
497 |           this.currentToken.text = this.lookaheadToken.text;
498 |           this.lookaheadToken.consumed = true;
499 |           return this.currentToken.type;
500 |         }
501 |       };
502 | 
503 |       Parser.prototype.lookahead = function() {
504 |         if (this.lookaheadToken.consumed) {
505 |           var token = this.scanner.nextToken();
506 | 
507 |           //skip comments
508 |           while (
509 |             token == Token.tokens.LINECOMMENT_TOKEN ||
510 |             token == Token.tokens.BLOCKCOMMENT_TOKEN
511 |           ) {
512 |             token = this.scanner.nextToken();
513 |           }
514 | 
515 |           this.lookaheadToken.type = token;
516 |           this.lookaheadToken.text = this.scanner.currentToken.text;
517 |           this.lookaheadToken.consumed = false;
518 | 
519 |           return token;
520 |         } else {
521 |           return this.lookaheadToken.type;
522 |         }
523 |       };
524 | 
525 |       //the entry point of our parser
526 |       Parser.prototype.parse = function() {
527 |         var rootBlock = new ExpressionBlockNode();
528 | 
529 |         this.parseExpressions(rootBlock);
530 | 
531 |         return rootBlock;
532 |       };
533 | 
534 |       //to parse a list of expressions
535 |       Parser.prototype.parseExpressions = function(expressionBlockNode) {
536 |         while (
537 |           this.lookahead() != Token.tokens.RIGHTBRACE_TOKEN &&
538 |           this.lookahead() != Token.tokens.EOS_TOKEN
539 |         ) {
540 |           var expressionNode = this.parseExpression();
541 | 
542 |           if (expressionNode) {
543 |             expressionBlockNode.push(expressionNode);
544 |           }
545 |         }
546 |       };
547 | 
548 |       //to parse an expression
549 |       Parser.prototype.parseExpression = function() {
550 |         switch (this.lookahead()) {
551 |           case Token.tokens.PRINT_TOKEN:
552 |             var printToken = this.nextToken();
553 |             var expressionNode = this.parseExpression();
554 |             if (expressionNode == undefined) {
555 |               Errors.push({
556 |                 type: Errors.SYNTAX_ERROR,
557 |                 msg: 'Missing an expression after "print"',
558 |                 line: this.scanner.currLine
559 |               });
560 |             }
561 |             return new PrintNode(expressionNode);
562 |             break;
563 | 
564 |           case Token.tokens.INTLITERAL_TOKEN:
565 |             var intToken = this.nextToken();
566 |             return new IntNode(this.currentToken.text);
567 |             break;
568 | 
569 |           default:
570 |             //unexpected, consume it
571 |             this.nextToken();
572 |         }
573 |       };
574 |     </script>
575 | 
576 |     <!-- our tester -->
577 |     <script type="text/javascript">
578 |       function log(str) {
579 |         $("#log").append(str + "<br />");
580 |       }
581 | 
582 |       function errorLog(str) {
583 |         $("#error").append(str + "<br />");
584 |       }
585 | 
586 |       $(function() {
587 |         $("#wescriptSrc .content").text($("#wescript").text());
588 |       });
589 | 
590 |       $(function() {
591 |         //we stored our wescript in <script id="wescript">
592 |         var dataToBeCompiled = $("#wescript").text();
593 |         var reader = new Reader(dataToBeCompiled);
594 |         var scanner = new Scanner(reader);
595 |         var parser = new Parser(scanner);
596 |         var expressionBlockNode = parser.parse();
597 | 
598 |         console.log(expressionBlockNode);
599 | 
600 |         Errors.each(function(error, i) {
601 |           errorLog(
602 |             "Line " +
603 |               error.line +
604 |               ": (" +
605 |               Errors.type[error.type] +
606 |               ") " +
607 |               error.msg
608 |           );
609 |         });
610 |       });
611 |     </script>
612 |   </head>
613 | 
614 |   <body>
615 |     <div id="wescriptSrc">
616 |       <pre class="content"></pre>
617 |     </div>
618 | 
619 |     <div id="error"></div>
620 | 
621 |     <div id="log"></div>
622 |   </body>
623 | </html>
624 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "repository": "git@github.com:jaceju/simple-compiler.git",
 3 |   "author": "jaceju <jaceju@kkbox.com>",
 4 |   "private": true,
 5 |   "license": "MIT",
 6 |   "devDependencies": {
 7 |     "prettier": "1.18.2"
 8 |   }
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/Errors.js:
--------------------------------------------------------------------------------
 1 | let errors = [];
 2 | 
 3 | class Errors {
 4 |   static push(error) {
 5 |     errors.push(error);
 6 |   }
 7 | 
 8 |   static print() {
 9 |     console.dir(errors);
10 |   }
11 | 
12 |   static each(cb) {
13 |     for (let i = 0, l = errors.length; i < l; i++) {
14 |       cb(errors[i], i);
15 |     }
16 |   }
17 | }
18 | 
19 | Errors.SYNTAX_ERROR = 0;
20 | Errors.type = ["Syntax error"];
21 | 
22 | module.exports = Errors;
23 | 


--------------------------------------------------------------------------------
/src/Nodes/ExpressionBlockNode.js:
--------------------------------------------------------------------------------
 1 | const Node = require("./Node");
 2 | 
 3 | class ExpressionBlockNode extends Node {
 4 |   constructor() {
 5 |     super();
 6 |     this.expressions = [];
 7 |   }
 8 | 
 9 |   push(node) {
10 |     this.expressions.push(node);
11 |   }
12 | 
13 |   iterate(cb) {
14 |     for (let i = 0, l = this.expressions.length; i < l; i++) {
15 |       let expression = this.expressions[i];
16 |       cb(expression, i);
17 |     }
18 |   }
19 | }
20 | 
21 | module.exports = ExpressionBlockNode;
22 | 


--------------------------------------------------------------------------------
/src/Nodes/IntNode.js:
--------------------------------------------------------------------------------
 1 | const Node = require("./Node");
 2 | 
 3 | class IntNode extends Node {
 4 |   constructor(data) {
 5 |     super();
 6 |     this.data = data;
 7 |   }
 8 | }
 9 | 
10 | module.exports = IntNode;
11 | 


--------------------------------------------------------------------------------
/src/Nodes/Node.js:
--------------------------------------------------------------------------------
1 | class Node {
2 |   constructor(params) {}
3 | }
4 | 
5 | module.exports = Node;
6 | 


--------------------------------------------------------------------------------
/src/Nodes/PrintNode.js:
--------------------------------------------------------------------------------
 1 | const Node = require("./Node");
 2 | 
 3 | class PrintNode extends Node {
 4 |   constructor(expressionNode) {
 5 |     super();
 6 |     this.expressionNode = expressionNode;
 7 |   }
 8 | }
 9 | 
10 | module.exports = PrintNode;
11 | 


--------------------------------------------------------------------------------
/src/Nodes/VariableNode.js:
--------------------------------------------------------------------------------
 1 | const Node = require("./Node");
 2 | 
 3 | class VariableNode extends Node {
 4 |   constructor(varName, type, initExpressionNode) {
 5 |     super();
 6 |     this.varName = varName;
 7 |     this.type = type;
 8 |     this.initExpressionNode = initExpressionNode;
 9 |   }
10 | }
11 | 
12 | module.exports = VariableNode;
13 | 


--------------------------------------------------------------------------------
/src/Parser.js:
--------------------------------------------------------------------------------
  1 | const Errors = require("./Errors");
  2 | const Token = require("./Token");
  3 | const ExpressionBlockNode = require("./Nodes/ExpressionBlockNode");
  4 | const PrintNode = require("./Nodes/PrintNode");
  5 | const IntNode = require("./Nodes/IntNode");
  6 | const VariableNode = require("./Nodes/VariableNode");
  7 | 
  8 | // Parser class
  9 | class Parser {
 10 |   constructor(scanner) {
 11 |     this.scanner = scanner;
 12 |     this.currentToken = new Token();
 13 |     this.lookaheadToken = new Token();
 14 |     this.lookaheadToken.consumed = true;
 15 |   }
 16 | 
 17 |   nextToken() {
 18 |     let token;
 19 |     if (this.lookaheadToken.consumed) {
 20 |       token = this.scanner.nextToken();
 21 |       // skip comments
 22 |       while (
 23 |         token === Token.tokens.LINECOMMENT_TOKEN ||
 24 |         token === Token.tokens.BLOCKCOMMENT_TOKEN
 25 |       ) {
 26 |         token = this.scanner.nextToken();
 27 |       }
 28 |       this.currentToken.type = token;
 29 |       this.currentToken.text = this.scanner.currentToken.text;
 30 |       return token;
 31 |     } else {
 32 |       this.currentToken.type = this.lookaheadToken.type;
 33 |       this.currentToken.text = this.lookaheadToken.text;
 34 |       this.lookaheadToken.consumed = true;
 35 |       return this.currentToken.type;
 36 |     }
 37 |   }
 38 | 
 39 |   lookahead() {
 40 |     if (this.lookaheadToken.consumed) {
 41 |       let token = this.scanner.nextToken();
 42 |       // skip comments
 43 |       while (
 44 |         token === Token.tokens.LINECOMMENT_TOKEN ||
 45 |         token === Token.tokens.BLOCKCOMMENT_TOKEN
 46 |       ) {
 47 |         token = this.scanner.nextToken();
 48 |       }
 49 |       this.lookaheadToken.type = token;
 50 |       this.lookaheadToken.text = this.scanner.currentToken.text;
 51 |       this.lookaheadToken.consumed = false;
 52 |       return token;
 53 |     } else {
 54 |       return this.lookaheadToken.type;
 55 |     }
 56 |   }
 57 | 
 58 |   // the entry point of our parser
 59 |   parse() {
 60 |     let rootBlock = new ExpressionBlockNode();
 61 |     this.parseExpressions(rootBlock);
 62 |     return rootBlock;
 63 |   }
 64 | 
 65 |   // to parse a list of expressions
 66 |   parseExpressions(expressionBlockNode) {
 67 |     while (
 68 |       this.lookahead() !== Token.tokens.RIGHTBRACE_TOKEN &&
 69 |       this.lookahead() !== Token.tokens.EOS_TOKEN
 70 |     ) {
 71 |       let expressionNode = this.parseExpression();
 72 |       if (expressionNode) {
 73 |         expressionBlockNode.push(expressionNode);
 74 |       }
 75 | 
 76 |       // consume the semicolon
 77 |       if (this.lookahead() === Token.tokens.SEMICOLON_TOKEN) {
 78 |         this.nextToken();
 79 |       } else {
 80 |         // syntax error
 81 |         Errors.push({
 82 |           type: Errors.SYNTAX_ERROR,
 83 |           msg: "Expecting a semicolon at the end of expression",
 84 |           line: this.scanner.currLine
 85 |         });
 86 |       }
 87 |     }
 88 |   }
 89 | 
 90 |   // to parse an expression
 91 |   parseExpression() {
 92 |     switch (this.lookahead()) {
 93 |       case Token.tokens.PRINT_TOKEN:
 94 |         let printToken = this.nextToken();
 95 |         let expressionNode = this.parseExpression();
 96 |         if (expressionNode === undefined) {
 97 |           Errors.push({
 98 |             type: Errors.SYNTAX_ERROR,
 99 |             msg: 'Missing an expression after "print"',
100 |             line: this.scanner.currLine
101 |           });
102 |         }
103 |         return new PrintNode(expressionNode);
104 |       case Token.tokens.INTLITERAL_TOKEN:
105 |         let intToken = this.nextToken();
106 |         return new IntNode(this.currentToken.text);
107 |       case Token.tokens.VAR_TOKEN:
108 |         return this.parseVarExpression();
109 |       default:
110 |         // unexpected, consume it
111 |         this.nextToken();
112 |     }
113 |   }
114 | 
115 |   parseVarExpression() {
116 |     // consume "var"
117 |     this.nextToken();
118 | 
119 |     // expecting an identifier
120 |     if (this.lookahead() === Token.tokens.IDENTIFIER_TOKEN) {
121 |       this.nextToken();
122 |       let varName = this.currentToken.text;
123 | 
124 |       // consume a colon
125 |       if (this.nextToken() !== Token.tokens.COLON_TOKEN) {
126 |         this.skipError();
127 |         return;
128 |       }
129 | 
130 |       // type token
131 |       if (this.lookahead() !== Token.tokens.TYPE_TOKEN) {
132 |         this.skipError();
133 |         return;
134 |       }
135 | 
136 |       this.nextToken();
137 |       let typeName = this.currentToken.text;
138 | 
139 |       let initNode;
140 |       // check if it has initialization expression
141 |       if (this.lookahead() === Token.tokens.ASSIGN_TOKEN) {
142 |         initNode = this.parseSimpleAssignmentExpression();
143 |       }
144 |       return new VariableNode(varName, typeName, initNode);
145 |     }
146 | 
147 |     this.skipError();
148 |   }
149 | 
150 |   parseSimpleAssignmentExpression() {
151 |     // consume the "=" sign
152 |     this.nextToken();
153 | 
154 |     return this.parseExpression();
155 |   }
156 | 
157 |   // a naive implementation for skipping error
158 |   skipError() {
159 |     this.scanner.skipNewLine = false;
160 | 
161 |     while (
162 |       this.lookahead() !== Token.tokens.NEWLINE_TOKEN &&
163 |       this.lookahead() !== Token.tokens.EOS_TOKEN
164 |     ) {
165 |       this.nextToken();
166 |     }
167 | 
168 |     this.scanner.skipNewLine = true;
169 |   }
170 |   //
171 |   // matchSemicolon() {
172 |   //   // consume the semicolon
173 |   //   if (this.lookahead() === Token.tokens.SEMICOLON_TOKEN) {
174 |   //     this.nextToken();
175 |   //   } else {
176 |   //     // syntax error
177 |   //     Errors.push({
178 |   //       type: Errors.SYNTAX_ERROR,
179 |   //       msg: "Expecting a semicolon at the end of expression",
180 |   //       line: this.scanner.currLine
181 |   //     });
182 |   //   }
183 |   // }
184 | }
185 | 
186 | module.exports = Parser;
187 | 


--------------------------------------------------------------------------------
/src/Reader.js:
--------------------------------------------------------------------------------
 1 | // Reader class
 2 | // str is the data to be read
 3 | class Reader {
 4 |   constructor(str) {
 5 |     this.data = str;
 6 |     this.currPos = 0;
 7 |     this.dataLength = str.length;
 8 |   }
 9 | 
10 |   nextChar() {
11 |     if (this.currPos >= this.dataLength) {
12 |       return -1; // end of stream
13 |     }
14 | 
15 |     return this.data[this.currPos++];
16 |   }
17 | 
18 |   retract(n) {
19 |     if (n === undefined) {
20 |       n = 1;
21 |     }
22 | 
23 |     this.currPos -= n;
24 | 
25 |     if (this.currPos < 0) {
26 |       this.currPos = 0;
27 |     }
28 |   }
29 | }
30 | 
31 | module.exports = Reader;
32 | 


--------------------------------------------------------------------------------
/src/Scanner.js:
--------------------------------------------------------------------------------
  1 | // Scanner class
  2 | // reader: the reader used to read in characters
  3 | const Token = require("./Token");
  4 | const Errors = require("./Errors");
  5 | 
  6 | class Scanner {
  7 |   constructor(reader) {
  8 |     this.reader = reader;
  9 |     this.currentToken = new Token(); // storing the current analysed token
 10 |     this.currLine = 0; // the line number of the current line being read
 11 |     this.state = Scanner.START_STATE;
 12 |   }
 13 | 
 14 |   makeToken(type, text) {
 15 |     this.currentToken.type = type;
 16 |     this.currentToken.text = text;
 17 |     return type;
 18 |   }
 19 | 
 20 |   nextToken() {
 21 |     let bufferStr = "",
 22 |       c = "",
 23 |       d = "";
 24 |     while (true) {
 25 |       switch (this.state) {
 26 |         case Scanner.START_STATE:
 27 |           c = this.reader.nextChar();
 28 |           if ((c >= "a" && c <= "z") || (c >= "A" && c <= "Z")) {
 29 |             this.state = Scanner.IDENTIFIER_STATE;
 30 |             // we need to remember what the token's text is
 31 |             bufferStr = c;
 32 |           } else if (c >= "0" && c <= "9") {
 33 |             bufferStr = c;
 34 |             let d;
 35 |             while (true) {
 36 |               d = this.reader.nextChar();
 37 |               if (d >= "0" && d <= "9") {
 38 |                 bufferStr += d;
 39 |               } else {
 40 |                 this.reader.retract();
 41 |                 return this.makeToken(Token.tokens.INTLITERAL_TOKEN, bufferStr);
 42 |               }
 43 |             }
 44 |           } else {
 45 |             switch (c) {
 46 |               case ":":
 47 |                 return this.makeToken(Token.tokens.COLON_TOKEN);
 48 |               case ";":
 49 |                 return this.makeToken(Token.tokens.SEMICOLON_TOKEN);
 50 |               case "(":
 51 |                 return this.makeToken(Token.tokens.LEFTPAREN_TOKEN);
 52 |               case ")":
 53 |                 return this.makeToken(Token.tokens.RIGHTPAREN_TOKEN);
 54 |               case "{":
 55 |                 return this.makeToken(Token.tokens.LEFTBRACE_TOKEN);
 56 |               case "}":
 57 |                 return this.makeToken(Token.tokens.RIGHTBRACE_TOKEN);
 58 |               case "%":
 59 |                 return this.makeToken(Token.tokens.MOD_TOKEN);
 60 |               case "!":
 61 |                 if (this.reader.nextChar() === "=") {
 62 |                   return this.makeToken(Token.tokens.NOTEQUAL_TOKEN);
 63 |                 } else {
 64 |                   // we have consumed one more char in if-condition
 65 |                   this.reader.retract();
 66 |                   return this.makeToken(Token.tokens.NOT_TOKEN);
 67 |                 }
 68 |               case "+":
 69 |                 d = this.reader.nextChar();
 70 |                 if (d === "=") {
 71 |                   return this.makeToken(Token.tokens.PLUSASSIGN_TOKEN);
 72 |                 } else if (d === "+") {
 73 |                   return this.makeToken(Token.tokens.PLUSPLUS_TOKEN);
 74 |                 } else {
 75 |                   this.reader.retract();
 76 |                   return this.makeToken(Token.tokens.PLUS_TOKEN);
 77 |                 }
 78 |               case "-":
 79 |                 d = this.reader.nextChar();
 80 |                 if (d === "=") {
 81 |                   return this.makeToken(Token.tokens.MINUSASSIGN_TOKEN);
 82 |                 } else if (d === "-") {
 83 |                   return this.makeToken(Token.tokens.MINUSMINUS_TOKEN);
 84 |                 } else {
 85 |                   this.reader.retract();
 86 |                   return this.makeToken(Token.tokens.MINUS_TOKEN);
 87 |                 }
 88 |               case "*":
 89 |                 return this.makeToken(Token.tokens.MULT_TOKEN);
 90 |               case "=":
 91 |                 if (this.reader.nextChar() === "=") {
 92 |                   return this.makeToken(Token.tokens.EQUAL_TOKEN);
 93 |                 } else {
 94 |                   this.reader.retract();
 95 |                   return this.makeToken(Token.tokens.ASSIGN_TOKEN);
 96 |                 }
 97 |               case ">":
 98 |                 if (this.reader.nextChar() === "=") {
 99 |                   return this.makeToken(Token.tokens.GREATEREQUAL_TOKEN);
100 |                 } else {
101 |                   this.reader.retract();
102 |                   return this.makeToken(Token.tokens.GREATER_TOKEN);
103 |                 }
104 |               case "<":
105 |                 if (this.reader.nextChar() === "=") {
106 |                   return this.makeToken(Token.tokens.LESSEQUAL_TOKEN);
107 |                 } else {
108 |                   this.reader.retract();
109 |                   return this.makeToken(Token.tokens.LESS_TOKEN);
110 |                 }
111 |               case "/":
112 |                 this.state = Scanner.SLASH_STATE;
113 |                 break;
114 |               case "&":
115 |                 if (this.reader.nextChar() === "&") {
116 |                   return this.makeToken(Token.tokens.AND_TOKEN);
117 |                 } else {
118 |                   this.reader.retract();
119 |                   Errors.push({
120 |                     type: Errors.SYNTAX_ERROR,
121 |                     msg: "You have only one &",
122 |                     line: this.currLine
123 |                   });
124 |                 }
125 |                 break;
126 |               case "|":
127 |                 if (this.reader.nextChar() === "|") {
128 |                   return this.makeToken(Token.tokens.OR_TOKEN);
129 |                 } else {
130 |                   this.reader.retract();
131 |                   Errors.push({
132 |                     type: Errors.SYNTAX_ERROR,
133 |                     msg: "You have only one |",
134 |                     line: this.currLine
135 |                   });
136 |                 }
137 |                 break;
138 |               case -1:
139 |                 return this.makeToken(Token.tokens.EOS_TOKEN);
140 |               case "\r":
141 |               case "\n":
142 |                 this.currLine++;
143 |                 break;
144 |               default:
145 |               // ignore them
146 |             }
147 |           }
148 |           break;
149 |         case Scanner.IDENTIFIER_STATE:
150 |           c = this.reader.nextChar();
151 |           if ((c >= "a" && c <= "z") || (c >= "A" && c <= "Z")) {
152 |             bufferStr += c;
153 |           } else if (c === -1) {
154 |             return this.makeToken(Token.tokens.EOS_TOKEN);
155 |           } else {
156 |             // stop reading it since it is not a letter anymore
157 |             // retract the last character we read because it does not belong to this identfier
158 |             this.reader.retract();
159 |             // change back the state to read the next token
160 |             this.state = Scanner.START_STATE;
161 |             switch (bufferStr) {
162 |               case "var":
163 |                 return this.makeToken(Token.tokens.VAR_TOKEN);
164 |               case "int":
165 |               case "bool":
166 |                 //need to pass bufferStr as well to distinguish which type it is
167 |                 return this.makeToken(Token.tokens.TYPE_TOKEN, bufferStr);
168 |               case "true":
169 |               case "false":
170 |               case "TRUE":
171 |               case "FALSE":
172 |                 return this.makeToken(
173 |                   Token.tokens.BOOLLITERAL_TOKEN,
174 |                   bufferStr
175 |                 );
176 |               case "if":
177 |                 return this.makeToken(Token.tokens.IF_TOKEN);
178 |               case "else":
179 |                 return this.makeToken(Token.tokens.ELSE_TOKEN);
180 |               case "while":
181 |                 return this.makeToken(Token.tokens.WHILE_TOKEN);
182 |               case "print":
183 |                 return this.makeToken(Token.tokens.PRINT_TOKEN);
184 |               default:
185 |                 return this.makeToken(Token.tokens.IDENTIFIER_TOKEN, bufferStr);
186 |             }
187 |           }
188 |           break;
189 |         case Scanner.SLASH_STATE:
190 |           d = this.reader.nextChar();
191 |           if (d === "/") {
192 |             // line comment
193 |             bufferStr = "";
194 |             // reading 1 more char here can prevent the case that a // is followed by a line break char immediately
195 |             d = this.reader.nextChar();
196 |             if (d !== "\r" && d !== "\n") {
197 |               while (d !== "\r" && d !== "\n") {
198 |                 bufferStr += d;
199 |                 d = this.reader.nextChar();
200 |                 if (d === -1) {
201 |                   break;
202 |                 }
203 |               }
204 |               // to retract the line break char
205 |               this.reader.retract();
206 |             }
207 |             this.state = Scanner.START_STATE;
208 |             return this.makeToken(Token.tokens.LINECOMMENT_TOKEN, bufferStr);
209 |           } else if (d === "*") {
210 |             // block comment
211 |             bufferStr = "";
212 |             let end = false;
213 |             while (!end) {
214 |               d = this.reader.nextChar();
215 |               if (d !== -1) {
216 |                 if (d === "\r" || d === "\n") {
217 |                   this.currLine++;
218 |                 }
219 |                 if (d === "*") {
220 |                   let e = this.reader.nextChar();
221 |                   if (e === "/") {
222 |                     // meet */
223 |                     end = true;
224 |                   } else {
225 |                     bufferStr += "*" + e;
226 |                   }
227 |                 } else {
228 |                   bufferStr += d;
229 |                 }
230 |               } else {
231 |                 end = true;
232 |               }
233 |             }
234 |             this.state = Scanner.START_STATE;
235 |             return this.makeToken(Token.tokens.BLOCKCOMMENT_TOKEN, bufferStr);
236 |           } else {
237 |             this.state = Scanner.START_STATE;
238 |             this.reader.retract();
239 |             return this.makeToken(Token.tokens.DIV_TOKEN);
240 |           }
241 |       }
242 |     }
243 |   }
244 | }
245 | 
246 | Scanner.START_STATE = 1; // every FSM should have a start state
247 | Scanner.IDENTIFIER_STATE = Scanner.START_STATE + 1;
248 | Scanner.SLASH_STATE = Scanner.IDENTIFIER_STATE + 1;
249 | 
250 | module.exports = Scanner;
251 | 


--------------------------------------------------------------------------------
/src/Token.js:
--------------------------------------------------------------------------------
 1 | // Token class
 2 | // type: Token's type
 3 | // text: the actual text that makes this token, may be null if it is not important
 4 | 
 5 | class Token {
 6 |   constructor(type, text) {
 7 |     this.type = type;
 8 |     this.text = text;
 9 |     this.consumed = false;
10 |   }
11 | }
12 | 
13 | Token.tokens = {};
14 | Token.tokens.EOS_TOKEN = 1; // end of stream
15 | // using + 1 allows adding a new token easily later
16 | Token.tokens.COLON_TOKEN = Token.tokens.EOS_TOKEN + 1;
17 | Token.tokens.SEMICOLON_TOKEN = Token.tokens.COLON_TOKEN + 1;
18 | Token.tokens.LEFTPAREN_TOKEN = Token.tokens.SEMICOLON_TOKEN + 1;
19 | Token.tokens.RIGHTPAREN_TOKEN = Token.tokens.LEFTPAREN_TOKEN + 1;
20 | Token.tokens.LEFTBRACE_TOKEN = Token.tokens.RIGHTPAREN_TOKEN + 1;
21 | Token.tokens.RIGHTBRACE_TOKEN = Token.tokens.LEFTBRACE_TOKEN + 1;
22 | Token.tokens.MOD_TOKEN = Token.tokens.RIGHTBRACE_TOKEN + 1;
23 | 
24 | Token.tokens.VAR_TOKEN = Token.tokens.MOD_TOKEN + 1;
25 | Token.tokens.TYPE_TOKEN = Token.tokens.VAR_TOKEN + 1;
26 | Token.tokens.BOOLLITERAL_TOKEN = Token.tokens.TYPE_TOKEN + 1;
27 | Token.tokens.INTLITERAL_TOKEN = Token.tokens.BOOLLITERAL_TOKEN + 1;
28 | Token.tokens.IF_TOKEN = Token.tokens.INTLITERAL_TOKEN + 1;
29 | Token.tokens.ELSE_TOKEN = Token.tokens.IF_TOKEN + 1;
30 | Token.tokens.WHILE_TOKEN = Token.tokens.ELSE_TOKEN + 1;
31 | Token.tokens.PRINT_TOKEN = Token.tokens.WHILE_TOKEN + 1;
32 | Token.tokens.IDENTIFIER_TOKEN = Token.tokens.PRINT_TOKEN + 1;
33 | 
34 | Token.tokens.PLUS_TOKEN = Token.tokens.IDENTIFIER_TOKEN + 1;
35 | Token.tokens.PLUSPLUS_TOKEN = Token.tokens.PLUS_TOKEN + 1;
36 | Token.tokens.PLUSASSIGN_TOKEN = Token.tokens.PLUSPLUS_TOKEN + 1;
37 | Token.tokens.MINUS_TOKEN = Token.tokens.PLUSASSIGN_TOKEN + 1;
38 | Token.tokens.MINUSMINUS_TOKEN = Token.tokens.MINUS_TOKEN + 1;
39 | Token.tokens.MINUSASSIGN_TOKEN = Token.tokens.MINUSMINUS_TOKEN + 1;
40 | Token.tokens.MULT_TOKEN = Token.tokens.MINUSASSIGN_TOKEN + 1;
41 | Token.tokens.DIV_TOKEN = Token.tokens.MULT_TOKEN + 1;
42 | Token.tokens.ASSIGN_TOKEN = Token.tokens.DIV_TOKEN + 1;
43 | Token.tokens.EQUAL_TOKEN = Token.tokens.ASSIGN_TOKEN + 1;
44 | Token.tokens.NOTEQUAL_TOKEN = Token.tokens.EQUAL_TOKEN + 1;
45 | Token.tokens.GREATER_TOKEN = Token.tokens.NOTEQUAL_TOKEN + 1;
46 | Token.tokens.GREATEREQUAL_TOKEN = Token.tokens.GREATER_TOKEN + 1;
47 | Token.tokens.LESS_TOKEN = Token.tokens.GREATEREQUAL_TOKEN + 1;
48 | Token.tokens.LESSEQUAL_TOKEN = Token.tokens.LESS_TOKEN + 1;
49 | Token.tokens.AND_TOKEN = Token.tokens.LESSEQUAL_TOKEN + 1;
50 | Token.tokens.OR_TOKEN = Token.tokens.AND_TOKEN + 1;
51 | Token.tokens.NOT_TOKEN = Token.tokens.OR_TOKEN + 1;
52 | 
53 | Token.tokens.LINECOMMENT_TOKEN = Token.tokens.NOT_TOKEN + 1;
54 | Token.tokens.BLOCKCOMMENT_TOKEN = Token.tokens.LINECOMMENT_TOKEN + 1;
55 | Token.tokens.NEWLINE_TOKEN = Token.tokens.BLOCKCOMMENT_TOKEN + 1;
56 | 
57 | Token.backwardMap = {}; // for inverse look-up
58 | 
59 | for (let x in Token.tokens) {
60 |   if (Token.tokens.hasOwnProperty(x)) {
61 |     Token.backwardMap[Token.tokens[x]] = x;
62 |   }
63 | }
64 | 
65 | module.exports = Token;
66 | 


--------------------------------------------------------------------------------
/src/example.ws:
--------------------------------------------------------------------------------
1 | //program start
2 | var a:int = 89 error;
3 | var b:bool recovery;
4 | var c:int = 64;
5 | var d:bool;


--------------------------------------------------------------------------------
/src/index.js:
--------------------------------------------------------------------------------
 1 | const fs = require("fs");
 2 | const Reader = require("./Reader");
 3 | const Scanner = require("./Scanner");
 4 | const Token = require("./Token");
 5 | const Parser = require("./Parser");
 6 | const Errors = require("./Errors");
 7 | 
 8 | function log(str) {
 9 |   console.log(str);
10 | }
11 | 
12 | function errorLog(str) {
13 |   console.error(str);
14 | }
15 | 
16 | let dataToBeCompiled = fs.readFileSync("src/example.ws", "utf8");
17 | let reader = new Reader(dataToBeCompiled);
18 | let scanner = new Scanner(reader);
19 | let parser = new Parser(scanner);
20 | 
21 | expressionBlockNode = parser.parse();
22 | 
23 | console.log(expressionBlockNode);
24 | 
25 | Errors.each(function(error, i) {
26 |   errorLog(
27 |     "Line " + error.line + ": (" + Errors.type[error.type] + ") " + error.msg
28 |   );
29 | });
30 | 


--------------------------------------------------------------------------------
/yarn.lock:
--------------------------------------------------------------------------------
1 | # THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.
2 | # yarn lockfile v1
3 | 
4 | 
5 | prettier@1.18.2:
6 |   version "1.18.2"
7 |   resolved "https://registry.yarnpkg.com/prettier/-/prettier-1.18.2.tgz#6823e7c5900017b4bd3acf46fe9ac4b4d7bda9ea"
8 |   integrity sha512-OeHeMc0JhFE9idD4ZdtNibzY0+TPHSpSSb9h8FqtP+YnoZZ1sl8Vc9b1sasjfymH3SonAF4QcA2+mzHPhMvIiw==
9 | 


--------------------------------------------------------------------------------