├── LICENSE
├── README.md
├── super-tiny-compiler-chinese.js
├── super-tiny-compiler.js
└── test.js


/LICENSE:
--------------------------------------------------------------------------------
  1 | Creative Commons Attribution 4.0 International
  2 | 
  3 | =======================================================================
  4 | 
  5 | Creative Commons Corporation ("Creative Commons") is not a law firm and
  6 | does not provide legal services or legal advice. Distribution of
  7 | Creative Commons public licenses does not create a lawyer-client or
  8 | other relationship. Creative Commons makes its licenses and related
  9 | information available on an "as-is" basis. Creative Commons gives no
 10 | warranties regarding its licenses, any material licensed under their
 11 | terms and conditions, or any related information. Creative Commons
 12 | disclaims all liability for damages resulting from their use to the
 13 | fullest extent possible.
 14 | 
 15 | Using Creative Commons Public Licenses
 16 | 
 17 | Creative Commons public licenses provide a standard set of terms and
 18 | conditions that creators and other rights holders may use to share
 19 | original works of authorship and other material subject to copyright
 20 | and certain other rights specified in the public license below. The
 21 | following considerations are for informational purposes only, are not
 22 | exhaustive, and do not form part of our licenses.
 23 | 
 24 |      Considerations for licensors: Our public licenses are
 25 |      intended for use by those authorized to give the public
 26 |      permission to use material in ways otherwise restricted by
 27 |      copyright and certain other rights. Our licenses are
 28 |      irrevocable. Licensors should read and understand the terms
 29 |      and conditions of the license they choose before applying it.
 30 |      Licensors should also secure all rights necessary before
 31 |      applying our licenses so that the public can reuse the
 32 |      material as expected. Licensors should clearly mark any
 33 |      material not subject to the license. This includes other CC-
 34 |      licensed material, or material used under an exception or
 35 |      limitation to copyright. More considerations for licensors:
 36 |   wiki.creativecommons.org/Considerations_for_licensors
 37 | 
 38 |      Considerations for the public: By using one of our public
 39 |      licenses, a licensor grants the public permission to use the
 40 |      licensed material under specified terms and conditions. If
 41 |      the licensor's permission is not necessary for any reason--for
 42 |      example, because of any applicable exception or limitation to
 43 |      copyright--then that use is not regulated by the license. Our
 44 |      licenses grant only permissions under copyright and certain
 45 |      other rights that a licensor has authority to grant. Use of
 46 |      the licensed material may still be restricted for other
 47 |      reasons, including because others have copyright or other
 48 |      rights in the material. A licensor may make special requests,
 49 |      such as asking that all changes be marked or described.
 50 |      Although not required by our licenses, you are encouraged to
 51 |      respect those requests where reasonable. More_considerations
 52 |      for the public:
 53 |   wiki.creativecommons.org/Considerations_for_licensees
 54 | 
 55 | =======================================================================
 56 | 
 57 | Creative Commons Attribution 4.0 International Public License
 58 | 
 59 | By exercising the Licensed Rights (defined below), You accept and agree
 60 | to be bound by the terms and conditions of this Creative Commons
 61 | Attribution 4.0 International Public License ("Public License"). To the
 62 | extent this Public License may be interpreted as a contract, You are
 63 | granted the Licensed Rights in consideration of Your acceptance of
 64 | these terms and conditions, and the Licensor grants You such rights in
 65 | consideration of benefits the Licensor receives from making the
 66 | Licensed Material available under these terms and conditions.
 67 | 
 68 | 
 69 | Section 1 -- Definitions.
 70 | 
 71 |   a. Adapted Material means material subject to Copyright and Similar
 72 |      Rights that is derived from or based upon the Licensed Material
 73 |      and in which the Licensed Material is translated, altered,
 74 |      arranged, transformed, or otherwise modified in a manner requiring
 75 |      permission under the Copyright and Similar Rights held by the
 76 |      Licensor. For purposes of this Public License, where the Licensed
 77 |      Material is a musical work, performance, or sound recording,
 78 |      Adapted Material is always produced where the Licensed Material is
 79 |      synched in timed relation with a moving image.
 80 | 
 81 |   b. Adapter's License means the license You apply to Your Copyright
 82 |      and Similar Rights in Your contributions to Adapted Material in
 83 |      accordance with the terms and conditions of this Public License.
 84 | 
 85 |   c. Copyright and Similar Rights means copyright and/or similar rights
 86 |      closely related to copyright including, without limitation,
 87 |      performance, broadcast, sound recording, and Sui Generis Database
 88 |      Rights, without regard to how the rights are labeled or
 89 |      categorized. For purposes of this Public License, the rights
 90 |      specified in Section 2(b)(1)-(2) are not Copyright and Similar
 91 |      Rights.
 92 | 
 93 |   d. Effective Technological Measures means those measures that, in the
 94 |      absence of proper authority, may not be circumvented under laws
 95 |      fulfilling obligations under Article 11 of the WIPO Copyright
 96 |      Treaty adopted on December 20, 1996, and/or similar international
 97 |      agreements.
 98 | 
 99 |   e. Exceptions and Limitations means fair use, fair dealing, and/or
100 |      any other exception or limitation to Copyright and Similar Rights
101 |      that applies to Your use of the Licensed Material.
102 | 
103 |   f. Licensed Material means the artistic or literary work, database,
104 |      or other material to which the Licensor applied this Public
105 |      License.
106 | 
107 |   g. Licensed Rights means the rights granted to You subject to the
108 |      terms and conditions of this Public License, which are limited to
109 |      all Copyright and Similar Rights that apply to Your use of the
110 |      Licensed Material and that the Licensor has authority to license.
111 | 
112 |   h. Licensor means the individual(s) or entity(ies) granting rights
113 |      under this Public License.
114 | 
115 |   i. Share means to provide material to the public by any means or
116 |      process that requires permission under the Licensed Rights, such
117 |      as reproduction, public display, public performance, distribution,
118 |      dissemination, communication, or importation, and to make material
119 |      available to the public including in ways that members of the
120 |      public may access the material from a place and at a time
121 |      individually chosen by them.
122 | 
123 |   j. Sui Generis Database Rights means rights other than copyright
124 |      resulting from Directive 96/9/EC of the European Parliament and of
125 |      the Council of 11 March 1996 on the legal protection of databases,
126 |      as amended and/or succeeded, as well as other essentially
127 |      equivalent rights anywhere in the world.
128 | 
129 |   k. You means the individual or entity exercising the Licensed Rights
130 |      under this Public License. Your has a corresponding meaning.
131 | 
132 | 
133 | Section 2 -- Scope.
134 | 
135 |   a. License grant.
136 | 
137 |        1. Subject to the terms and conditions of this Public License,
138 |           the Licensor hereby grants You a worldwide, royalty-free,
139 |           non-sublicensable, non-exclusive, irrevocable license to
140 |           exercise the Licensed Rights in the Licensed Material to:
141 | 
142 |             a. reproduce and Share the Licensed Material, in whole or
143 |                in part; and
144 | 
145 |             b. produce, reproduce, and Share Adapted Material.
146 | 
147 |        2. Exceptions and Limitations. For the avoidance of doubt, where
148 |           Exceptions and Limitations apply to Your use, this Public
149 |           License does not apply, and You do not need to comply with
150 |           its terms and conditions.
151 | 
152 |        3. Term. The term of this Public License is specified in Section
153 |           6(a).
154 | 
155 |        4. Media and formats; technical modifications allowed. The
156 |           Licensor authorizes You to exercise the Licensed Rights in
157 |           all media and formats whether now known or hereafter created,
158 |           and to make technical modifications necessary to do so. The
159 |           Licensor waives and/or agrees not to assert any right or
160 |           authority to forbid You from making technical modifications
161 |           necessary to exercise the Licensed Rights, including
162 |           technical modifications necessary to circumvent Effective
163 |           Technological Measures. For purposes of this Public License,
164 |           simply making modifications authorized by this Section 2(a)
165 |           (4) never produces Adapted Material.
166 | 
167 |        5. Downstream recipients.
168 | 
169 |             a. Offer from the Licensor -- Licensed Material. Every
170 |                recipient of the Licensed Material automatically
171 |                receives an offer from the Licensor to exercise the
172 |                Licensed Rights under the terms and conditions of this
173 |                Public License.
174 | 
175 |             b. No downstream restrictions. You may not offer or impose
176 |                any additional or different terms or conditions on, or
177 |                apply any Effective Technological Measures to, the
178 |                Licensed Material if doing so restricts exercise of the
179 |                Licensed Rights by any recipient of the Licensed
180 |                Material.
181 | 
182 |        6. No endorsement. Nothing in this Public License constitutes or
183 |           may be construed as permission to assert or imply that You
184 |           are, or that Your use of the Licensed Material is, connected
185 |           with, or sponsored, endorsed, or granted official status by,
186 |           the Licensor or others designated to receive attribution as
187 |           provided in Section 3(a)(1)(A)(i).
188 | 
189 |   b. Other rights.
190 | 
191 |        1. Moral rights, such as the right of integrity, are not
192 |           licensed under this Public License, nor are publicity,
193 |           privacy, and/or other similar personality rights; however, to
194 |           the extent possible, the Licensor waives and/or agrees not to
195 |           assert any such rights held by the Licensor to the limited
196 |           extent necessary to allow You to exercise the Licensed
197 |           Rights, but not otherwise.
198 | 
199 |        2. Patent and trademark rights are not licensed under this
200 |           Public License.
201 | 
202 |        3. To the extent possible, the Licensor waives any right to
203 |           collect royalties from You for the exercise of the Licensed
204 |           Rights, whether directly or through a collecting society
205 |           under any voluntary or waivable statutory or compulsory
206 |           licensing scheme. In all other cases the Licensor expressly
207 |           reserves any right to collect such royalties.
208 | 
209 | 
210 | Section 3 -- License Conditions.
211 | 
212 | Your exercise of the Licensed Rights is expressly made subject to the
213 | following conditions.
214 | 
215 |   a. Attribution.
216 | 
217 |        1. If You Share the Licensed Material (including in modified
218 |           form), You must:
219 | 
220 |             a. retain the following if it is supplied by the Licensor
221 |                with the Licensed Material:
222 | 
223 |                  i. identification of the creator(s) of the Licensed
224 |                     Material and any others designated to receive
225 |                     attribution, in any reasonable manner requested by
226 |                     the Licensor (including by pseudonym if
227 |                     designated);
228 | 
229 |                 ii. a copyright notice;
230 | 
231 |                iii. a notice that refers to this Public License;
232 | 
233 |                 iv. a notice that refers to the disclaimer of
234 |                     warranties;
235 | 
236 |                  v. a URI or hyperlink to the Licensed Material to the
237 |                     extent reasonably practicable;
238 | 
239 |             b. indicate if You modified the Licensed Material and
240 |                retain an indication of any previous modifications; and
241 | 
242 |             c. indicate the Licensed Material is licensed under this
243 |                Public License, and include the text of, or the URI or
244 |                hyperlink to, this Public License.
245 | 
246 |        2. You may satisfy the conditions in Section 3(a)(1) in any
247 |           reasonable manner based on the medium, means, and context in
248 |           which You Share the Licensed Material. For example, it may be
249 |           reasonable to satisfy the conditions by providing a URI or
250 |           hyperlink to a resource that includes the required
251 |           information.
252 | 
253 |        3. If requested by the Licensor, You must remove any of the
254 |           information required by Section 3(a)(1)(A) to the extent
255 |           reasonably practicable.
256 | 
257 |        4. If You Share Adapted Material You produce, the Adapter's
258 |           License You apply must not prevent recipients of the Adapted
259 |           Material from complying with this Public License.
260 | 
261 | 
262 | Section 4 -- Sui Generis Database Rights.
263 | 
264 | Where the Licensed Rights include Sui Generis Database Rights that
265 | apply to Your use of the Licensed Material:
266 | 
267 |   a. for the avoidance of doubt, Section 2(a)(1) grants You the right
268 |      to extract, reuse, reproduce, and Share all or a substantial
269 |      portion of the contents of the database;
270 | 
271 |   b. if You include all or a substantial portion of the database
272 |      contents in a database in which You have Sui Generis Database
273 |      Rights, then the database in which You have Sui Generis Database
274 |      Rights (but not its individual contents) is Adapted Material; and
275 | 
276 |   c. You must comply with the conditions in Section 3(a) if You Share
277 |      all or a substantial portion of the contents of the database.
278 | 
279 | For the avoidance of doubt, this Section 4 supplements and does not
280 | replace Your obligations under this Public License where the Licensed
281 | Rights include other Copyright and Similar Rights.
282 | 
283 | 
284 | Section 5 -- Disclaimer of Warranties and Limitation of Liability.
285 | 
286 |   a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
287 |      EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
288 |      AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
289 |      ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
290 |      IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
291 |      WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
292 |      PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
293 |      ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
294 |      KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
295 |      ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
296 | 
297 |   b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
298 |      TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
299 |      NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
300 |      INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
301 |      COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
302 |      USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
303 |      ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
304 |      DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
305 |      IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
306 | 
307 |   c. The disclaimer of warranties and limitation of liability provided
308 |      above shall be interpreted in a manner that, to the extent
309 |      possible, most closely approximates an absolute disclaimer and
310 |      waiver of all liability.
311 | 
312 | 
313 | Section 6 -- Term and Termination.
314 | 
315 |   a. This Public License applies for the term of the Copyright and
316 |      Similar Rights licensed here. However, if You fail to comply with
317 |      this Public License, then Your rights under this Public License
318 |      terminate automatically.
319 | 
320 |   b. Where Your right to use the Licensed Material has terminated under
321 |      Section 6(a), it reinstates:
322 | 
323 |        1. automatically as of the date the violation is cured, provided
324 |           it is cured within 30 days of Your discovery of the
325 |           violation; or
326 | 
327 |        2. upon express reinstatement by the Licensor.
328 | 
329 |      For the avoidance of doubt, this Section 6(b) does not affect any
330 |      right the Licensor may have to seek remedies for Your violations
331 |      of this Public License.
332 | 
333 |   c. For the avoidance of doubt, the Licensor may also offer the
334 |      Licensed Material under separate terms or conditions or stop
335 |      distributing the Licensed Material at any time; however, doing so
336 |      will not terminate this Public License.
337 | 
338 |   d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
339 |      License.
340 | 
341 | 
342 | Section 7 -- Other Terms and Conditions.
343 | 
344 |   a. The Licensor shall not be bound by any additional or different
345 |      terms or conditions communicated by You unless expressly agreed.
346 | 
347 |   b. Any arrangements, understandings, or agreements regarding the
348 |      Licensed Material not stated herein are separate from and
349 |      independent of the terms and conditions of this Public License.
350 | 
351 | 
352 | Section 8 -- Interpretation.
353 | 
354 |   a. For the avoidance of doubt, this Public License does not, and
355 |      shall not be interpreted to, reduce, limit, restrict, or impose
356 |      conditions on any use of the Licensed Material that could lawfully
357 |      be made without permission under this Public License.
358 | 
359 |   b. To the extent possible, if any provision of this Public License is
360 |      deemed unenforceable, it shall be automatically reformed to the
361 |      minimum extent necessary to make it enforceable. If the provision
362 |      cannot be reformed, it shall be severed from this Public License
363 |      without affecting the enforceability of the remaining terms and
364 |      conditions.
365 | 
366 |   c. No term or condition of this Public License will be waived and no
367 |      failure to comply consented to unless expressly agreed to by the
368 |      Licensor.
369 | 
370 |   d. Nothing in this Public License constitutes or may be interpreted
371 |      as a limitation upon, or waiver of, any privileges and immunities
372 |      that apply to the Licensor or You, including from the legal
373 |      processes of any jurisdiction or authority.
374 | 
375 | 
376 | =======================================================================
377 | 
378 | Creative Commons is not a party to its public licenses.
379 | Notwithstanding, Creative Commons may elect to apply one of its public
380 | licenses to material it publishes and in those instances will be
381 | considered the "Licensor." Except for the limited purpose of indicating
382 | that material is shared under a Creative Commons public license or as
383 | otherwise permitted by the Creative Commons policies published at
384 | creativecommons.org/policies, Creative Commons does not authorize the
385 | use of the trademark "Creative Commons" or any other trademark or logo
386 | of Creative Commons without its prior written consent including,
387 | without limitation, in connection with any unauthorized modifications
388 | to any of its public licenses or any other arrangements,
389 | understandings, or agreements concerning use of licensed material. For
390 | the avoidance of doubt, this paragraph does not form part of the public
391 | licenses.
392 | 
393 | Creative Commons may be contacted at creativecommons.org.
394 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <a href="super-tiny-compiler.js"><img width="731" alt="THE SUPER TINY COMPILER" src="https://cloud.githubusercontent.com/assets/952783/14413766/134c4068-ff39-11e5-996e-9452973299c2.png"/></a>
 2 | 
 3 | ***Welcome to The Super Tiny Compiler!***
 4 | 
 5 | 这是一个超级简单的编译器的例子，包含了现代编译器的几个主要部分，用简单易读的 JavaScript 编写。
 6 | 
 7 | 把这个读完将会有助于你了解*大多数*编译器从前端到后端是如何工作的。
 8 | 
 9 | ### [想直接看代码？点这里](super-tiny-compiler-chinese.js)
10 | 
11 | ### 或者... [看看演讲](https://www.youtube.com/watch?v=Tar4WgAfMr4)
12 | 
13 | ---
14 | 
15 | ### 为啥我要关心这个？
16 | 
17 | 确实，大多数人在日常工作中没有必要了解编译器都是如何工作的。但是，编译器无处不在，你使用的很多
18 | 工具的底层原理都是从编译器那儿来的。
19 | 
20 | ### 但是编译器太高大上了！
21 | 
22 | 额，确实。但这是我们（写编译器的人）的过错，我们把一些本应该很通俗易懂的事情弄得太可怕了，
23 | 让很多人都认为编译器这种东西是可望而不可即的，甚至只有最书呆子的书呆子才能理解。
24 | 
25 | ### 好吧，所以我该从哪儿开始？
26 | 
27 | 太棒了！直接去看 [super-tiny-compiler-chinese.js](super-tiny-compiler-chinese.js) 这个文件吧！
28 | 
29 | ### Tests
30 | 
31 | 直接运行 `node test.js`
32 | 
33 | ---
34 | 
35 | [![cc-by-4.0](https://licensebuttons.net/l/by/4.0/80x15.png)](http://creativecommons.org/licenses/by/4.0/)
36 | 


--------------------------------------------------------------------------------
/super-tiny-compiler-chinese.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHH     HHHHHHHHHEEEEEEEEEEEEEEEEEEEEEE
  3 |  * T:::::::::::::::::::::TH:::::::H     H:::::::HE::::::::::::::::::::E
  4 |  * T:::::::::::::::::::::TH:::::::H     H:::::::HE::::::::::::::::::::E
  5 |  * T:::::TT:::::::TT:::::THH::::::H     H::::::HHEE::::::EEEEEEEEE::::E
  6 |  * TTTTTT  T:::::T  TTTTTT  H:::::H     H:::::H    E:::::E       EEEEEE
  7 |  *         T:::::T          H:::::H     H:::::H    E:::::E
  8 |  *         T:::::T          H::::::HHHHH::::::H    E::::::EEEEEEEEEE
  9 |  *         T:::::T          H:::::::::::::::::H    E:::::::::::::::E
 10 |  *         T:::::T          H:::::::::::::::::H    E:::::::::::::::E
 11 |  *         T:::::T          H::::::HHHHH::::::H    E::::::EEEEEEEEEE
 12 |  *         T:::::T          H:::::H     H:::::H    E:::::E
 13 |  *         T:::::T          H:::::H     H:::::H    E:::::E       EEEEEE
 14 |  *       TT:::::::TT      HH::::::H     H::::::HHEE::::::EEEEEEEE:::::E
 15 |  *       T:::::::::T      H:::::::H     H:::::::HE::::::::::::::::::::E
 16 |  *       T:::::::::T      H:::::::H     H:::::::HE::::::::::::::::::::E
 17 |  *       TTTTTTTTTTT      HHHHHHHHH     HHHHHHHHHEEEEEEEEEEEEEEEEEEEEEE
 18 |  *
 19 |  *    SSSSSSSSSSSSSSS UUUUUUUU     UUUUUUUUPPPPPPPPPPPPPPPPP   EEEEEEEEEEEEEEEEEEEEEERRRRRRRRRRRRRRRRR
 20 |  *  SS:::::::::::::::SU::::::U     U::::::UP::::::::::::::::P  E::::::::::::::::::::ER::::::::::::::::R
 21 |  * S:::::SSSSSS::::::SU::::::U     U::::::UP::::::PPPPPP:::::P E::::::::::::::::::::ER::::::RRRRRR:::::R
 22 |  * S:::::S     SSSSSSSUU:::::U     U:::::UUPP:::::P     P:::::PEE::::::EEEEEEEEE::::ERR:::::R     R:::::R
 23 |  * S:::::S             U:::::U     U:::::U   P::::P     P:::::P  E:::::E       EEEEEE  R::::R     R:::::R
 24 |  * S:::::S             U:::::U     U:::::U   P::::P     P:::::P  E:::::E               R::::R     R:::::R
 25 |  *  S::::SSSS          U:::::U     U:::::U   P::::PPPPPP:::::P   E::::::EEEEEEEEEE     R::::RRRRRR:::::R
 26 |  *   SS::::::SSSSS     U:::::U     U:::::U   P:::::::::::::PP    E:::::::::::::::E     R:::::::::::::RR
 27 |  *     SSS::::::::SS   U:::::U     U:::::U   P::::PPPPPPPPP      E:::::::::::::::E     R::::RRRRRR:::::R
 28 |  *        SSSSSS::::S  U:::::U     U:::::U   P::::P              E::::::EEEEEEEEEE     R::::R     R:::::R
 29 |  *             S:::::S U:::::U     U:::::U   P::::P              E:::::E               R::::R     R:::::R
 30 |  *             S:::::S U::::::U   U::::::U   P::::P              E:::::E       EEEEEE  R::::R     R:::::R
 31 |  * SSSSSSS     S:::::S U:::::::UUU:::::::U PP::::::PP          EE::::::EEEEEEEE:::::ERR:::::R     R:::::R
 32 |  * S::::::SSSSSS:::::S  UU:::::::::::::UU  P::::::::P          E::::::::::::::::::::ER::::::R     R:::::R
 33 |  * S:::::::::::::::SS     UU:::::::::UU    P::::::::P          E::::::::::::::::::::ER::::::R     R:::::R
 34 |  *  SSSSSSSSSSSSSSS         UUUUUUUUU      PPPPPPPPPP          EEEEEEEEEEEEEEEEEEEEEERRRRRRRR     RRRRRRR
 35 |  *
 36 |  * TTTTTTTTTTTTTTTTTTTTTTTIIIIIIIIIINNNNNNNN        NNNNNNNNYYYYYYY       YYYYYYY
 37 |  * T:::::::::::::::::::::TI::::::::IN:::::::N       N::::::NY:::::Y       Y:::::Y
 38 |  * T:::::::::::::::::::::TI::::::::IN::::::::N      N::::::NY:::::Y       Y:::::Y
 39 |  * T:::::TT:::::::TT:::::TII::::::IIN:::::::::N     N::::::NY::::::Y     Y::::::Y
 40 |  * TTTTTT  T:::::T  TTTTTT  I::::I  N::::::::::N    N::::::NYYY:::::Y   Y:::::YYY
 41 |  *         T:::::T          I::::I  N:::::::::::N   N::::::N   Y:::::Y Y:::::Y
 42 |  *         T:::::T          I::::I  N:::::::N::::N  N::::::N    Y:::::Y:::::Y
 43 |  *         T:::::T          I::::I  N::::::N N::::N N::::::N     Y:::::::::Y
 44 |  *         T:::::T          I::::I  N::::::N  N::::N:::::::N      Y:::::::Y
 45 |  *         T:::::T          I::::I  N::::::N   N:::::::::::N       Y:::::Y
 46 |  *         T:::::T          I::::I  N::::::N    N::::::::::N       Y:::::Y
 47 |  *         T:::::T          I::::I  N::::::N     N:::::::::N       Y:::::Y
 48 |  *       TT:::::::TT      II::::::IIN::::::N      N::::::::N       Y:::::Y
 49 |  *       T:::::::::T      I::::::::IN::::::N       N:::::::N    YYYY:::::YYYY
 50 |  *       T:::::::::T      I::::::::IN::::::N        N::::::N    Y:::::::::::Y
 51 |  *       TTTTTTTTTTT      IIIIIIIIIINNNNNNNN         NNNNNNN    YYYYYYYYYYYYY
 52 |  *
 53 |  *         CCCCCCCCCCCCC     OOOOOOOOO     MMMMMMMM               MMMMMMMMPPPPPPPPPPPPPPPPP   IIIIIIIIIILLLLLLLLLLL             EEEEEEEEEEEEEEEEEEEEEERRRRRRRRRRRRRRRRR
 54 |  *      CCC::::::::::::C   OO:::::::::OO   M:::::::M             M:::::::MP::::::::::::::::P  I::::::::IL:::::::::L             E::::::::::::::::::::ER::::::::::::::::R
 55 |  *    CC:::::::::::::::C OO:::::::::::::OO M::::::::M           M::::::::MP::::::PPPPPP:::::P I::::::::IL:::::::::L             E::::::::::::::::::::ER::::::RRRRRR:::::R
 56 |  *   C:::::CCCCCCCC::::CO:::::::OOO:::::::OM:::::::::M         M:::::::::MPP:::::P     P:::::PII::::::IILL:::::::LL             EE::::::EEEEEEEEE::::ERR:::::R     R:::::R
 57 |  *  C:::::C       CCCCCCO::::::O   O::::::OM::::::::::M       M::::::::::M  P::::P     P:::::P  I::::I    L:::::L                 E:::::E       EEEEEE  R::::R     R:::::R
 58 |  * C:::::C              O:::::O     O:::::OM:::::::::::M     M:::::::::::M  P::::P     P:::::P  I::::I    L:::::L                 E:::::E               R::::R     R:::::R
 59 |  * C:::::C              O:::::O     O:::::OM:::::::M::::M   M::::M:::::::M  P::::PPPPPP:::::P   I::::I    L:::::L                 E::::::EEEEEEEEEE     R::::RRRRRR:::::R
 60 |  * C:::::C              O:::::O     O:::::OM::::::M M::::M M::::M M::::::M  P:::::::::::::PP    I::::I    L:::::L                 E:::::::::::::::E     R:::::::::::::RR
 61 |  * C:::::C              O:::::O     O:::::OM::::::M  M::::M::::M  M::::::M  P::::PPPPPPPPP      I::::I    L:::::L                 E:::::::::::::::E     R::::RRRRRR:::::R
 62 |  * C:::::C              O:::::O     O:::::OM::::::M   M:::::::M   M::::::M  P::::P              I::::I    L:::::L                 E::::::EEEEEEEEEE     R::::R     R:::::R
 63 |  * C:::::C              O:::::O     O:::::OM::::::M    M:::::M    M::::::M  P::::P              I::::I    L:::::L                 E:::::E               R::::R     R:::::R
 64 |  *  C:::::C       CCCCCCO::::::O   O::::::OM::::::M     MMMMM     M::::::M  P::::P              I::::I    L:::::L         LLLLLL  E:::::E       EEEEEE  R::::R     R:::::R
 65 |  *   C:::::CCCCCCCC::::CO:::::::OOO:::::::OM::::::M               M::::::MPP::::::PP          II::::::IILL:::::::LLLLLLLLL:::::LEE::::::EEEEEEEE:::::ERR:::::R     R:::::R
 66 |  *    CC:::::::::::::::C OO:::::::::::::OO M::::::M               M::::::MP::::::::P          I::::::::IL::::::::::::::::::::::LE::::::::::::::::::::ER::::::R     R:::::R
 67 |  *      CCC::::::::::::C   OO:::::::::OO   M::::::M               M::::::MP::::::::P          I::::::::IL::::::::::::::::::::::LE::::::::::::::::::::ER::::::R     R:::::R
 68 |  *         CCCCCCCCCCCCC     OOOOOOOOO     MMMMMMMM               MMMMMMMMPPPPPPPPPP          IIIIIIIIIILLLLLLLLLLLLLLLLLLLLLLLLEEEEEEEEEEEEEEEEEEEEEERRRRRRRR     RRRRRRR
 69 |  *
 70 |  * =======================================================================================================================================================================
 71 |  * =======================================================================================================================================================================
 72 |  * =======================================================================================================================================================================
 73 |  * =======================================================================================================================================================================
 74 |  */
 75 | 
 76 | /**
 77 |  * 今天让我们来写一个编译器，一个超级无敌小的编译器！它小到如果把所有注释删去的话，大概只剩
 78 |  * 200行左右的代码。
 79 |  * 
 80 |  * 我们将会用它将 lisp 风格的函数调用转换为 C 风格。
 81 |  *
 82 |  * 如果你对这两种风格不是很熟悉，下面是一个简单的介绍。
 83 |  *
 84 |  * 假设我们有两个函数，`add` 和 `subtract`，那么它们的写法将会是下面这样：
 85 |  * 
 86 |  *                  LISP                      C
 87 |  *
 88 |  *   2 + 2          (add 2 2)                 add(2, 2)
 89 |  *   4 - 2          (subtract 4 2)            subtract(4, 2)
 90 |  *   2 + (4 - 2)    (add 2 (subtract 4 2))    add(2, subtract(4, 2))
 91 |  *
 92 |  * 很简单对吧？
 93 |  *
 94 |  * 这个转换就是我们将要做的事情。虽然这并不包含 LISP 或者 C 的全部语法，但它足以向我们
 95 |  * 展示现代编译器很多要点。
 96 |  * 
 97 |  */
 98 | 
 99 | /**
100 |  * 大多数编译器可以分成三个阶段：解析（Parsing），转换（Transformation）以及代码
101 |  * 生成（Code Generation）
102 |  *
103 |  * 1. *解析*是将最初原始的代码转换为一种更加抽象的表示（译者注：即AST）。*
104 |  *
105 |  * 2. *转换*将对这个抽象的表示做一些处理，让它能做到编译器期望
106 |  *    它做到的事情。
107 |  *
108 |  * 3. *代码生成*接收处理之后的代码表示，然后把它转换成新的代码。
109 |  */
110 | 
111 | /**
112 |  * 解析（Parsing）
113 |  * -------
114 |  *
115 |  * 解析一般来说会分成两个阶段：词法分析（Lexical Analysis）和语法分析（Syntactic Analysis）。
116 |  *
117 |  * 1. *词法分析*接收原始代码，然后把它分割成一些被称为 Token 的东西，这个过程是在词法分析
118 |  *    器（Tokenizer或者Lexer）中完成的。
119 |  *
120 |  *    Token 是一个数组，由一些代码语句的碎片组成。它们可以是数字、标签、标点符号、运算符，
121 |  *    或者其它任何东西。
122 |  *
123 |  * 2. *语法分析* 接收之前生成的 Token，把它们转换成一种抽象的表示，这种抽象的表示描述了代
124 |  *    码语句中的每一个片段以及它们之间的关系。这被称为中间表示（intermediate representation）
125 |  *    或抽象语法树（Abstract Syntax Tree， 缩写为AST）
126 |  *
127 |  *    抽象语法树是一个嵌套程度很深的对象，用一种更容易处理的方式代表了代码本身，也能给我们
128 |  *    更多信息。
129 |  *
130 |  * 比如说对于下面这一行代码语句：
131 |  *
132 |  *   (add 2 (subtract 4 2))
133 |  *
134 |  * 它产生的 Token 看起来或许是这样的：
135 |  *
136 |  *   [
137 |  *     { type: 'paren',  value: '('        },
138 |  *     { type: 'name',   value: 'add'      },
139 |  *     { type: 'number', value: '2'        },
140 |  *     { type: 'paren',  value: '('        },
141 |  *     { type: 'name',   value: 'subtract' },
142 |  *     { type: 'number', value: '4'        },
143 |  *     { type: 'number', value: '2'        },
144 |  *     { type: 'paren',  value: ')'        },
145 |  *     { type: 'paren',  value: ')'        }
146 |  *   ]
147 |  *
148 |  * 它的抽象语法树（AST）看起来或许是这样的：
149 |  *
150 |  *   {
151 |  *     type: 'Program',
152 |  *     body: [{
153 |  *       type: 'CallExpression',
154 |  *       name: 'add',
155 |  *       params: [{
156 |  *         type: 'NumberLiteral',
157 |  *         value: '2'
158 |  *       }, {
159 |  *         type: 'CallExpression',
160 |  *         name: 'subtract',
161 |  *         params: [{
162 |  *           type: 'NumberLiteral',
163 |  *           value: '4'
164 |  *         }, {
165 |  *           type: 'NumberLiteral',
166 |  *           value: '2'
167 |  *         }]
168 |  *       }]
169 |  *     }]
170 |  *   }
171 |  */
172 | 
173 | /**
174 |  * 转换（Transformation）
175 |  * --------------
176 |  *
177 |  * 编译器的下一步就是转换。它只是把 AST 拿过来然后对它做一些修改。它可以在同种语言下操
178 |  * 作 AST，也可以把 AST 翻译成全新的语言。
179 |  *
180 |  * 下面我们来看看该如何转换 AST。
181 |  *
182 |  * 你或许注意到了我们的 AST 中有很多相似的元素，这些元素都有 type 属性，它们被称为 AST
183 |  * 结点。这些结点含有若干属性，可以用于描述 AST 的部分信息。
184 |  *
185 |  * 比如下面是一个“NumberLiteral”结点：
186 |  *
187 |  *   {
188 |  *     type: 'NumberLiteral',
189 |  *     value: '2'
190 |  *   }
191 |  *
192 |  * 又比如下面是一个“CallExpression”结点：
193 |  *
194 |  *   {
195 |  *     type: 'CallExpression',
196 |  *     name: 'subtract',
197 |  *     params: [...nested nodes go here...]
198 |  *   }
199 |  *
200 |  * 当转换 AST 的时候我们可以添加、移动、替代这些结点，也可以根据现有的 AST 生成一个全新
201 |  * 的 AST
202 |  *
203 |  * 既然我们编译器的目标是把输入的代码转换为一种新的语言，所以我们将会着重于产生一个针对
204 |  * 新语言的全新的 AST。
205 |  * 
206 |  *
207 |  * 遍历（Traversal）
208 |  * ---------
209 |  *
210 |  * 为了能处理所有的结点，我们需要遍历它们，使用的是深度优先遍历。
211 |  *
212 |  *   {
213 |  *     type: 'Program',
214 |  *     body: [{
215 |  *       type: 'CallExpression',
216 |  *       name: 'add',
217 |  *       params: [{
218 |  *         type: 'NumberLiteral',
219 |  *         value: '2'
220 |  *       }, {
221 |  *         type: 'CallExpression',
222 |  *         name: 'subtract',
223 |  *         params: [{
224 |  *           type: 'NumberLiteral',
225 |  *           value: '4'
226 |  *         }, {
227 |  *           type: 'NumberLiteral',
228 |  *           value: '2'
229 |  *         }]
230 |  *       }]
231 |  *     }]
232 |  *   }
233 |  *
234 |  * 对于上面的 AST 的遍历流程是这样的：
235 |  *
236 |  *   1. Program - 从 AST 的顶部结点开始
237 |  *   2. CallExpression (add) - Program 的第一个子元素
238 |  *   3. NumberLiteral (2) - CallExpression (add) 的第一个子元素
239 |  *   4. CallExpression (subtract) - CallExpression (add) 的第二个子元素
240 |  *   5. NumberLiteral (4) - CallExpression (subtract) 的第一个子元素
241 |  *   6. NumberLiteral (2) - CallExpression (subtract) 的第二个子元素
242 |  *
243 |  * 如果我们直接在 AST 内部操作，而不是产生一个新的 AST，那么就要在这里介绍所有种类的抽象，
244 |  * 但是目前访问（visiting）所有结点的方法已经足够了。
245 |  *
246 |  * 使用“访问（visiting）”这个词的是因为这是一种模式，代表在对象结构内对元素进行操作。
247 |  *
248 |  * 访问者（Visitors）
249 |  * --------
250 |  *
251 |  * 我们最基础的想法是创建一个“访问者（visitor）”对象，这个对象中包含一些方法，可以接收不
252 |  * 同的结点。
253 |  *
254 |  *   var visitor = {
255 |  *     NumberLiteral() {},
256 |  *     CallExpression() {}
257 |  *   };
258 |  *
259 |  * 当我们遍历 AST 的时候，如果遇到了匹配 type 的结点，我们可以调用 visitor 中的方法。
260 |  *
261 |  * 一般情况下为了让这些方法可用性更好，我们会把父结点也作为参数传入。
262 |  */
263 | 
264 | /**
265 |  * 代码生成（Code Generation）
266 |  * ---------------
267 |  *
268 |  * 编译器的最后一个阶段是代码生成，这个阶段做的事情有时候会和转换（transformation）重叠，
269 |  * 但是代码生成最主要的部分还是根据 AST 来输出代码。
270 |  *
271 |  * 代码生成有几种不同的工作方式，有些编译器将会重用之前生成的 token，有些会创建独立的代码
272 |  * 表示，以便于线性地输出代码。但是接下来我们还是着重于使用之前生成好的 AST。
273 |  *
274 |  * 我们的代码生成器需要知道如何“打印”AST 中所有类型的结点，然后它会递归地调用自身，直到所
275 |  * 有代码都被打印到一个很长的字符串中。
276 |  * 
277 |  */
278 | 
279 | /**
280 |  * 好了！这就是编译器中所有的部分了。
281 |  *
282 |  * 当然不是说所有的编译器都像我说的这样。不同的编译器有不同的目的，所以也可能需要不同的步骤。
283 |  *
284 |  * 但你现在应该对编译器到底是个什么东西有个大概的认识了。
285 |  *
286 |  * 既然我全都解释一遍了，你应该能写一个属于自己的编译器了吧？
287 |  *
288 |  * 哈哈开个玩笑，接下来才是重点 :P
289 |  *
290 |  * 所以我们开始吧...
291 |  */
292 | 
293 | /**
294 |  * ============================================================================
295 |  *                                   (/^▽^)/
296 |  *                            词法分析器（Tokenizer）!
297 |  * ============================================================================
298 |  */
299 | 
300 | /**
301 |  * 我们从第一个阶段开始，即词法分析，使用的是词法分析器（Tokenizer）。
302 |  *
303 |  * 我们只是接收代码组成的字符串，然后把它们分割成 token 组成的数组。
304 |  *
305 |  *   (add 2 (subtract 4 2))   =>   [{ type: 'paren', value: '(' }, ...]
306 |  */
307 | 
308 | // 我们从接收一个字符串开始，首先设置两个变量。
309 | function tokenizer(input) {
310 | 
311 |   // `current`变量类似指针，用于记录我们在代码字符串中的位置。
312 |   var current = 0;
313 | 
314 |   // `tokens`数组是我们放置 token 的地方
315 |   var tokens = [];
316 | 
317 |   // 首先我们创建一个 `while` 循环， `current` 变量会在循环中自增。
318 |   // 
319 |   // 我们这么做的原因是，由于 token 数组的长度是任意的，所以可能要在单个循环中多次
320 |   // 增加 `current` 
321 |   while (current < input.length) {
322 | 
323 |     // 我们在这里储存了 `input` 中的当前字符
324 |     var char = input[current];
325 | 
326 |     // 要做的第一件事情就是检查是不是右圆括号。这在之后将会用在 `CallExpressions` 中，
327 |     // 但是现在我们关心的只是字符本身。
328 |     //
329 |     // 检查一下是不是一个左圆括号。
330 |     if (char === '(') {
331 | 
332 |       // 如果是，那么我们 push 一个 type 为 `paren`，value 为左圆括号的对象。
333 |       tokens.push({
334 |         type: 'paren',
335 |         value: '('
336 |       });
337 | 
338 |       // 自增 `current`
339 |       current++;
340 | 
341 |       // 结束本次循环，进入下一次循环
342 |       continue;
343 |     }
344 | 
345 |     // 然后我们检查是不是一个右圆括号。这里做的时候和之前一样：检查右圆括号、加入新的 token、
346 |     // 自增 `current`，然后进入下一次循环。
347 |     if (char === ')') {
348 |       tokens.push({
349 |         type: 'paren',
350 |         value: ')'
351 |       });
352 |       current++;
353 |       continue;
354 |     }
355 | 
356 |     // 继续，我们现在检查是不是空格。有趣的是，我们想要空格的本意是分隔字符，但这现在
357 |     // 对于我们储存 token 来说不那么重要。我们暂且搁置它。
358 |     // 
359 |     // 所以我们只是简单地检查是不是空格，如果是，那么我们直接进入下一个循环。
360 |     var WHITESPACE = /\s/;
361 |     if (WHITESPACE.test(char)) {
362 |       current++;
363 |       continue;
364 |     }
365 | 
366 |     // 下一个 token 的类型是数字。它和之前的 token 不同，因为数字可以由多个数字字符组成，
367 |     // 但是我们只能把它们识别为一个 token。
368 |     // 
369 |     //   (add 123 456)
370 |     //        ^^^ ^^^
371 |     //        Only two separate tokens
372 |     //        这里只有两个 token
373 |     //        
374 |     // 当我们遇到一个数字字符时，将会从这里开始。
375 |     var NUMBERS = /[0-9]/;
376 |     if (NUMBERS.test(char)) {
377 | 
378 |       // 创建一个 `value` 字符串，用于 push 字符。
379 |       var value = '';
380 | 
381 |       // 然后我们循环遍历接下来的字符，直到我们遇到的字符不再是数字字符为止，把遇到的每
382 |       // 一个数字字符 push 进 `value` 中，然后自增 `current`。
383 |       while (NUMBERS.test(char)) {
384 |         value += char;
385 |         char = input[++current];
386 |       }
387 | 
388 |       // 然后我们把类型为 `number` 的 token 放入 `tokens` 数组中。
389 |       tokens.push({
390 |         type: 'number',
391 |         value: value
392 |       });
393 | 
394 |       // 进入下一次循环。
395 |       continue;
396 |     }
397 | 
398 |     // 最后一种类型的 token 是 `name`。它由一系列的字母组成，这在我们的 lisp 语法中
399 |     // 代表了函数。
400 |     //
401 |     //   (add 2 4)
402 |     //    ^^^
403 |     //    Name token
404 |     //
405 |     var LETTERS = /[a-z]/i;
406 |     if (LETTERS.test(char)) {
407 |       var value = '';
408 | 
409 |       // 同样，我们用一个循环遍历所有的字母，把它们存入 value 中。
410 |       while (LETTERS.test(char)) {
411 |         value += char;
412 |         char = input[++current];
413 |       }
414 | 
415 |       // 然后添加一个类型为 `name` 的 token，然后进入下一次循环。
416 |       tokens.push({
417 |         type: 'name',
418 |         value: value
419 |       });
420 | 
421 |       continue;
422 |     }
423 | 
424 |     // 最后如果我们没有匹配上任何类型的 token，那么我们抛出一个错误。
425 |     throw new TypeError('I dont know what this character is: ' + char);
426 |   }
427 | 
428 |   // 词法分析器的最后我们返回 tokens 数组。
429 |   return tokens;
430 | }
431 | 
432 | /**
433 |  * ============================================================================
434 |  *                                 ヽ/❀o ل͜ o\ﾉ
435 |  *                             语法分析器（Parser）!!!
436 |  * ============================================================================
437 |  */
438 | 
439 | /**
440 |  *  语法分析器接受 token 数组，然后把它转化为 AST
441 |  *
442 |  *   [{ type: 'paren', value: '(' }, ...]   =>   { type: 'Program', body: [...] }
443 |  */
444 | 
445 | // 现在我们定义 parser 函数，接受 `tokens` 数组
446 | function parser(tokens) {
447 | 
448 |   // 我们再次声明一个 `current` 变量作为指针。
449 |   var current = 0;
450 | 
451 |   // 但是这次我们使用递归而不是 `while` 循环，所以我们定义一个 `walk` 函数。
452 |   function walk() {
453 | 
454 |     // walk函数里，我们从当前token开始
455 |     var token = tokens[current];
456 | 
457 |     // 对于不同类型的结点，对应的处理方法也不同，我们从 `number` 类型的 token 开始。
458 |     // 检查是不是 `number` 类型
459 |     if (token.type === 'number') {
460 | 
461 |       // 如果是，`current` 自增。
462 |       current++;
463 | 
464 |       // 然后我们会返回一个新的 AST 结点 `NumberLiteral`，并且把它的值设为 token 的值。
465 |       return {
466 |         type: 'NumberLiteral',
467 |         value: token.value
468 |       };
469 |     }
470 | 
471 |     // 接下来我们检查是不是 CallExpressions 类型，我们从左圆括号开始。
472 |     if (
473 |       token.type === 'paren' &&
474 |       token.value === '('
475 |     ) {
476 | 
477 |       // 我们会自增 `current` 来跳过这个括号，因为括号在 AST 中是不重要的。
478 |       token = tokens[++current];
479 | 
480 |       // 我们创建一个类型为 `CallExpression` 的根节点，然后把它的 name 属性设置为当前
481 |       // token 的值，因为紧跟在左圆括号后面的 token 一定是调用的函数的名字。 
482 |       var node = {
483 |         type: 'CallExpression',
484 |         name: token.value,
485 |         params: []
486 |       };
487 | 
488 |       // 我们再次自增 `current` 变量，跳过当前的 token 
489 |       token = tokens[++current];
490 | 
491 |       // 现在我们循环遍历接下来的每一个 token，直到我们遇到右圆括号，这些 token 将会
492 |       // 是 `CallExpression` 的 `params`（参数）
493 |       // 
494 |       // 这也是递归开始的地方，我们采用递归的方式来解决问题，而不是去尝试解析一个可能有无限
495 |       // 层嵌套的结点。
496 |       // 
497 |       // 为了更好地解释，我们来看看我们的 Lisp 代码。你会注意到 `add` 函数的参数有两个，
498 |       // 一个是数字，另一个是一个嵌套的 `CallExpression`，这个 `CallExpression` 中
499 |       // 包含了它自己的参数（两个数字）
500 |       //
501 |       //   (add 2 (subtract 4 2))
502 |       // 
503 |       // 你也会注意到我们的 token 数组中有多个右圆括号。
504 |       //
505 |       //   [
506 |       //     { type: 'paren',  value: '('        },
507 |       //     { type: 'name',   value: 'add'      },
508 |       //     { type: 'number', value: '2'        },
509 |       //     { type: 'paren',  value: '('        },
510 |       //     { type: 'name',   value: 'subtract' },
511 |       //     { type: 'number', value: '4'        },
512 |       //     { type: 'number', value: '2'        },
513 |       //     { type: 'paren',  value: ')'        }, <<< 右圆括号
514 |       //     { type: 'paren',  value: ')'        }  <<< 右圆括号
515 |       //   ]
516 |       //
517 |       // 遇到嵌套的 `CallExpressions` 时，我们将会依赖嵌套的 `walk` 函数来
518 |       // 增加 `current` 变量
519 |       // 
520 |       // 所以我们创建一个 `while` 循环，直到遇到类型为 `'paren'`，值为右圆括号的 token。 
521 |       while (
522 |         (token.type !== 'paren') ||
523 |         (token.type === 'paren' && token.value !== ')')
524 |       ) {
525 |         // 我们调用 `walk` 函数，它将会返回一个结点，然后我们把这个节点
526 |         // 放入 `node.params` 中。
527 |         node.params.push(walk());
528 |         token = tokens[current];
529 |       }
530 | 
531 |       // 我们最后一次增加 `current`，跳过右圆括号。
532 |       current++;
533 | 
534 |       // 返回结点。
535 |       return node;
536 |     }
537 | 
538 |     // 同样，如果我们遇到了一个类型未知的结点，就抛出一个错误。
539 |     throw new TypeError(token.type);
540 |   }
541 | 
542 |   // 现在，我们创建 AST，根结点是一个类型为 `Program` 的结点。
543 |   var ast = {
544 |     type: 'Program',
545 |     body: []
546 |   };
547 | 
548 |   // 现在我们开始 `walk` 函数，把结点放入 `ast.body` 中。
549 |   //
550 |   // 之所以在一个循环中处理，是因为我们的程序可能在 `CallExpressions` 后面包含连续的两个
551 |   // 参数，而不是嵌套的。
552 |   //
553 |   //   (add 2 2)
554 |   //   (subtract 4 2)
555 |   //
556 |   while (current < tokens.length) {
557 |     ast.body.push(walk());
558 |   }
559 | 
560 |   // 最后我们的语法分析器返回 AST 
561 |   return ast;
562 | }
563 | 
564 | /**
565 |  * ============================================================================
566 |  *                                 ⌒(❀>◞౪◟<❀)⌒
567 |  *                                   遍历器!!!
568 |  * ============================================================================
569 |  */
570 | 
571 | /**
572 |  * 现在我们有了 AST，我们需要一个 visitor 去遍历所有的结点。当遇到某个类型的结点时，我们
573 |  * 需要调用 visitor 中对应类型的处理函数。
574 |  *
575 |  *   traverse(ast, {
576 |  *     Program(node, parent) {
577 |  *       // ...
578 |  *     },
579 |  *
580 |  *     CallExpression(node, parent) {
581 |  *       // ...
582 |  *     },
583 |  *
584 |  *     NumberLiteral(node, parent) {
585 |  *       // ...
586 |  *     }
587 |  *   });
588 |  */
589 | 
590 | // 所以我们定义一个遍历器，它有两个参数，AST 和 vistor。在它的里面我们又定义了两个函数...
591 | function traverser(ast, visitor) {
592 | 
593 |   // `traverseArray` 函数允许我们对数组中的每一个元素调用 `traverseNode` 函数。
594 |   function traverseArray(array, parent) {
595 |     array.forEach(function(child) {
596 |       traverseNode(child, parent);
597 |     });
598 |   }
599 | 
600 |   // `traverseNode` 函数接受一个 `node` 和它的父结点 `parent` 作为参数，这个结点会被
601 |   // 传入到 visitor 中相应的处理函数那里。
602 |   function traverseNode(node, parent) {
603 | 
604 |     // 首先我们看看 visitor 中有没有对应 `type` 的处理函数。
605 |     var method = visitor[node.type];
606 | 
607 |     // 如果有，那么我们把 `node` 和 `parent` 都传入其中。
608 |     if (method) {
609 |       method(node, parent);
610 |     }
611 | 
612 |     // 下面我们对每一个不同类型的结点分开处理。
613 |     switch (node.type) {
614 | 
615 |       // 我们从顶层的 `Program` 开始，Program 结点中有一个 body 属性，它是一个由若干
616 |       // 个结点组成的数组，所以我们对这个数组调用 `traverseArray`。
617 |       //
618 |       // （记住 `traverseArray` 会调用 `traverseNode`，所以我们会递归地遍历这棵树。）
619 |       case 'Program':
620 |         traverseArray(node.body, node);
621 |         break;
622 | 
623 |       // 下面我们对 `CallExpressions` 做同样的事情，遍历它的 `params`。
624 |       case 'CallExpression':
625 |         traverseArray(node.params, node);
626 |         break;
627 | 
628 |       // 如果是 `NumberLiterals`，那么就没有任何子结点了，所以我们直接 break
629 |       case 'NumberLiteral':
630 |         break;
631 | 
632 |       // 同样，如果我们不能识别当前的结点，那么就抛出一个错误。
633 |       default:
634 |         throw new TypeError(node.type);
635 |     }
636 |   }
637 | 
638 |   // 最后我们对 AST 调用 `traverseNode`，开始遍历。注意 AST 并没有父结点。
639 |   traverseNode(ast, null);
640 | }
641 | 
642 | /**
643 |  * ============================================================================
644 |  *                                   ⁽(◍˃̵͈̑ᴗ˂̵͈̑)⁽
645 |  *                                   转换器!!!
646 |  * ============================================================================
647 |  */
648 | 
649 | /**
650 |  * 下面是转换器。转换器接收我们在之前构建好的 AST，然后把它和 visitor 传递进入我们的遍历
651 |  * 器中 ，最后得到一个新的 AST。
652 |  *
653 |  * ----------------------------------------------------------------------------
654 |  *            原始的 AST               |               转换后的 AST
655 |  * ----------------------------------------------------------------------------
656 |  *   {                                |   {
657 |  *     type: 'Program',               |     type: 'Program',
658 |  *     body: [{                       |     body: [{
659 |  *       type: 'CallExpression',      |       type: 'ExpressionStatement',
660 |  *       name: 'add',                 |       expression: {
661 |  *       params: [{                   |         type: 'CallExpression',
662 |  *         type: 'NumberLiteral',     |         callee: {
663 |  *         value: '2'                 |           type: 'Identifier',
664 |  *       }, {                         |           name: 'add'
665 |  *         type: 'CallExpression',    |         },
666 |  *         name: 'subtract',          |         arguments: [{
667 |  *         params: [{                 |           type: 'NumberLiteral',
668 |  *           type: 'NumberLiteral',   |           value: '2'
669 |  *           value: '4'               |         }, {
670 |  *         }, {                       |           type: 'CallExpression',
671 |  *           type: 'NumberLiteral',   |           callee: {
672 |  *           value: '2'               |             type: 'Identifier',
673 |  *         }]                         |             name: 'subtract'
674 |  *       }]                           |           },
675 |  *     }]                             |           arguments: [{
676 |  *   }                                |             type: 'NumberLiteral',
677 |  *                                    |             value: '4'
678 |  * ---------------------------------- |           }, {
679 |  *                                    |             type: 'NumberLiteral',
680 |  *                                    |             value: '2'
681 |  *                                    |           }]
682 |  *         (那一边比较长/w\)           |         }]
683 |  *                                    |       }
684 |  *                                    |     }]
685 |  *                                    |   }
686 |  * ----------------------------------------------------------------------------
687 |  */
688 | 
689 | // 定义我们的转换器函数，接收 AST 作为参数
690 | function transformer(ast) {
691 | 
692 |   // 创建 `newAST`，它与我们之前的 AST 类似，有一个类型为 Program 的根节点。
693 |   var newAst = {
694 |     type: 'Program',
695 |     body: []
696 |   };
697 | 
698 |   // 下面的代码会有些奇技淫巧，我们在父结点上使用一个属性 `context`（上下文），这样我们就
699 |   // 可以把结点放入他们父结点的 context 中。当然可能会有更好的做法，但是为了简单我们姑且
700 |   // 这么做吧。
701 |   //
702 |   // 注意 context 是一个*引用*，从旧的 AST 到新的 AST。
703 |   ast._context = newAst.body;
704 | 
705 |   // 我们把 AST 和 visitor 函数传入遍历器
706 |   traverser(ast, {
707 | 
708 |     // 第一个 visitor 方法接收 `NumberLiterals`。
709 |     NumberLiteral: function(node, parent) {
710 | 
711 |       // 我们创建一个新结点，名字叫 `NumberLiteral`，并把它放入父结点的 context 中。
712 |       parent._context.push({
713 |         type: 'NumberLiteral',
714 |         value: node.value
715 |       });
716 |     },
717 | 
718 |     // 下一个，`CallExpressions`。
719 |     CallExpression: function(node, parent) {
720 | 
721 |       // 我们创建一个 `CallExpression` 结点，里面有一个嵌套的 `Identifier`。
722 |       var expression = {
723 |         type: 'CallExpression',
724 |         callee: {
725 |           type: 'Identifier',
726 |           name: node.name
727 |         },
728 |         arguments: []
729 |       };
730 | 
731 |       // 下面我们在原来的 `CallExpression` 结点上定义一个新的 context，它是 expression
732 |       // 中 arguments 这个数组的引用，我们可以向其中放入参数。
733 |       node._context = expression.arguments;
734 | 
735 |       // 然后来看看父结点是不是一个 `CallExpression`，如果不是...
736 |       if (parent.type !== 'CallExpression') {
737 | 
738 |         // 我们把 `CallExpression` 结点包在一个 `ExpressionStatement` 中，这么做是因为
739 |         // 单独存在（原文为top level）的 `CallExpressions` 在 JavaScript 中也可以被当做
740 |         // 是声明语句。
741 |         // 
742 |         // 译者注：比如 `var a = foo()` 与 `foo()`，后者既可以当作表达式给某个变量赋值，也
743 |         // 可以作为一个独立的语句存在。
744 |         expression = {
745 |           type: 'ExpressionStatement',
746 |           expression: expression
747 |         };
748 |       }
749 | 
750 |       // 最后我们把 `CallExpression`（可能是被包起来的） 放入父结点的 context 中。
751 |       parent._context.push(expression);
752 |     }
753 |   });
754 | 
755 |   // 最后返回创建好的新 AST。
756 |   return newAst;
757 | }
758 | 
759 | /**
760 |  * ============================================================================
761 |  *                               ヾ（〃＾∇＾）ﾉ♪
762 |  *                                代码生成器!!!!
763 |  * ============================================================================
764 |  */
765 | 
766 | /**
767 |  * 现在只剩最后一步啦：代码生成器。
768 |  *
769 |  * 我们的代码生成器会递归地调用它自己，把 AST 中的每个结点打印到一个很大的字符串中。
770 |  */
771 | 
772 | function codeGenerator(node) {
773 | 
774 |   // 对于不同 `type` 的结点分开处理。
775 |   switch (node.type) {
776 | 
777 |     // 如果是 `Program` 结点，那么我们会遍历它的 `body` 属性中的每一个结点，并且递归地
778 |     // 对这些结点再次调用 codeGenerator，再把结果打印进入新的一行中。
779 |     case 'Program':
780 |       return node.body.map(codeGenerator)
781 |         .join('\n');
782 | 
783 |     // 对于 `ExpressionStatements`,我们对它的 expression 属性递归调用，同时加入一个
784 |     // 分号。
785 |     case 'ExpressionStatement':
786 |       return (
787 |         codeGenerator(node.expression) +
788 |         ';' // << (...因为我们喜欢用*正确*的方式写代码)
789 |       );
790 | 
791 |     // 对于 `CallExpressions`，我们会打印出 `callee`，接着是一个左圆括号，然后对
792 |     // arguments 递归调用 codeGenerator，并且在它们之间加一个逗号，最后加上右圆括号。
793 |     case 'CallExpression':
794 |       return (
795 |         codeGenerator(node.callee) +
796 |         '(' +
797 |         node.arguments.map(codeGenerator)
798 |           .join(', ') +
799 |         ')'
800 |       );
801 | 
802 |     // 对于 `Identifiers` 我们只是返回 `node` 的 name。
803 |     case 'Identifier':
804 |       return node.name;
805 | 
806 |     // 对于 `NumberLiterals` 我们只是返回 `node` 的 value
807 |     case 'NumberLiteral':
808 |       return node.value;
809 | 
810 |     // 如果我们不能识别这个结点，那么抛出一个错误。
811 |     default:
812 |       throw new TypeError(node.type);
813 |   }
814 | }
815 | 
816 | /**
817 |  * ============================================================================
818 |  *                                  (۶* ‘ヮ’)۶”
819 |  *                         !!!!!!!!!!!!编译器!!!!!!!!!!!
820 |  * ============================================================================
821 |  */
822 | 
823 | /**
824 |  * 最后！我们创建 `compiler` 函数，它只是把上面说到的那些函数连接到一起。
825 |  *
826 |  *   1. input  => tokenizer   => tokens
827 |  *   2. tokens => parser      => ast
828 |  *   3. ast    => transformer => newAst
829 |  *   4. newAst => generator   => output
830 |  */
831 | 
832 | function compiler(input) {
833 |   var tokens = tokenizer(input);
834 |   var ast    = parser(tokens);
835 |   var newAst = transformer(ast);
836 |   var output = codeGenerator(newAst);
837 | 
838 |   // 然后返回输出!
839 |   return output;
840 | }
841 | 
842 | /**
843 |  * ============================================================================
844 |  *                                   (๑˃̵ᴗ˂̵)و
845 |  * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!你做到了!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
846 |  * ============================================================================
847 |  */
848 | 
849 | // 现在导出所有接口...
850 | module.exports = {
851 |   tokenizer: tokenizer,
852 |   parser: parser,
853 |   transformer: transformer,
854 |   codeGenerator: codeGenerator,
855 |   compiler: compiler
856 | };
857 | 


--------------------------------------------------------------------------------
/super-tiny-compiler.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHH     HHHHHHHHHEEEEEEEEEEEEEEEEEEEEEE
  3 |  * T:::::::::::::::::::::TH:::::::H     H:::::::HE::::::::::::::::::::E
  4 |  * T:::::::::::::::::::::TH:::::::H     H:::::::HE::::::::::::::::::::E
  5 |  * T:::::TT:::::::TT:::::THH::::::H     H::::::HHEE::::::EEEEEEEEE::::E
  6 |  * TTTTTT  T:::::T  TTTTTT  H:::::H     H:::::H    E:::::E       EEEEEE
  7 |  *         T:::::T          H:::::H     H:::::H    E:::::E
  8 |  *         T:::::T          H::::::HHHHH::::::H    E::::::EEEEEEEEEE
  9 |  *         T:::::T          H:::::::::::::::::H    E:::::::::::::::E
 10 |  *         T:::::T          H:::::::::::::::::H    E:::::::::::::::E
 11 |  *         T:::::T          H::::::HHHHH::::::H    E::::::EEEEEEEEEE
 12 |  *         T:::::T          H:::::H     H:::::H    E:::::E
 13 |  *         T:::::T          H:::::H     H:::::H    E:::::E       EEEEEE
 14 |  *       TT:::::::TT      HH::::::H     H::::::HHEE::::::EEEEEEEE:::::E
 15 |  *       T:::::::::T      H:::::::H     H:::::::HE::::::::::::::::::::E
 16 |  *       T:::::::::T      H:::::::H     H:::::::HE::::::::::::::::::::E
 17 |  *       TTTTTTTTTTT      HHHHHHHHH     HHHHHHHHHEEEEEEEEEEEEEEEEEEEEEE
 18 |  *
 19 |  *    SSSSSSSSSSSSSSS UUUUUUUU     UUUUUUUUPPPPPPPPPPPPPPPPP   EEEEEEEEEEEEEEEEEEEEEERRRRRRRRRRRRRRRRR
 20 |  *  SS:::::::::::::::SU::::::U     U::::::UP::::::::::::::::P  E::::::::::::::::::::ER::::::::::::::::R
 21 |  * S:::::SSSSSS::::::SU::::::U     U::::::UP::::::PPPPPP:::::P E::::::::::::::::::::ER::::::RRRRRR:::::R
 22 |  * S:::::S     SSSSSSSUU:::::U     U:::::UUPP:::::P     P:::::PEE::::::EEEEEEEEE::::ERR:::::R     R:::::R
 23 |  * S:::::S             U:::::U     U:::::U   P::::P     P:::::P  E:::::E       EEEEEE  R::::R     R:::::R
 24 |  * S:::::S             U:::::U     U:::::U   P::::P     P:::::P  E:::::E               R::::R     R:::::R
 25 |  *  S::::SSSS          U:::::U     U:::::U   P::::PPPPPP:::::P   E::::::EEEEEEEEEE     R::::RRRRRR:::::R
 26 |  *   SS::::::SSSSS     U:::::U     U:::::U   P:::::::::::::PP    E:::::::::::::::E     R:::::::::::::RR
 27 |  *     SSS::::::::SS   U:::::U     U:::::U   P::::PPPPPPPPP      E:::::::::::::::E     R::::RRRRRR:::::R
 28 |  *        SSSSSS::::S  U:::::U     U:::::U   P::::P              E::::::EEEEEEEEEE     R::::R     R:::::R
 29 |  *             S:::::S U:::::U     U:::::U   P::::P              E:::::E               R::::R     R:::::R
 30 |  *             S:::::S U::::::U   U::::::U   P::::P              E:::::E       EEEEEE  R::::R     R:::::R
 31 |  * SSSSSSS     S:::::S U:::::::UUU:::::::U PP::::::PP          EE::::::EEEEEEEE:::::ERR:::::R     R:::::R
 32 |  * S::::::SSSSSS:::::S  UU:::::::::::::UU  P::::::::P          E::::::::::::::::::::ER::::::R     R:::::R
 33 |  * S:::::::::::::::SS     UU:::::::::UU    P::::::::P          E::::::::::::::::::::ER::::::R     R:::::R
 34 |  *  SSSSSSSSSSSSSSS         UUUUUUUUU      PPPPPPPPPP          EEEEEEEEEEEEEEEEEEEEEERRRRRRRR     RRRRRRR
 35 |  *
 36 |  * TTTTTTTTTTTTTTTTTTTTTTTIIIIIIIIIINNNNNNNN        NNNNNNNNYYYYYYY       YYYYYYY
 37 |  * T:::::::::::::::::::::TI::::::::IN:::::::N       N::::::NY:::::Y       Y:::::Y
 38 |  * T:::::::::::::::::::::TI::::::::IN::::::::N      N::::::NY:::::Y       Y:::::Y
 39 |  * T:::::TT:::::::TT:::::TII::::::IIN:::::::::N     N::::::NY::::::Y     Y::::::Y
 40 |  * TTTTTT  T:::::T  TTTTTT  I::::I  N::::::::::N    N::::::NYYY:::::Y   Y:::::YYY
 41 |  *         T:::::T          I::::I  N:::::::::::N   N::::::N   Y:::::Y Y:::::Y
 42 |  *         T:::::T          I::::I  N:::::::N::::N  N::::::N    Y:::::Y:::::Y
 43 |  *         T:::::T          I::::I  N::::::N N::::N N::::::N     Y:::::::::Y
 44 |  *         T:::::T          I::::I  N::::::N  N::::N:::::::N      Y:::::::Y
 45 |  *         T:::::T          I::::I  N::::::N   N:::::::::::N       Y:::::Y
 46 |  *         T:::::T          I::::I  N::::::N    N::::::::::N       Y:::::Y
 47 |  *         T:::::T          I::::I  N::::::N     N:::::::::N       Y:::::Y
 48 |  *       TT:::::::TT      II::::::IIN::::::N      N::::::::N       Y:::::Y
 49 |  *       T:::::::::T      I::::::::IN::::::N       N:::::::N    YYYY:::::YYYY
 50 |  *       T:::::::::T      I::::::::IN::::::N        N::::::N    Y:::::::::::Y
 51 |  *       TTTTTTTTTTT      IIIIIIIIIINNNNNNNN         NNNNNNN    YYYYYYYYYYYYY
 52 |  *
 53 |  *         CCCCCCCCCCCCC     OOOOOOOOO     MMMMMMMM               MMMMMMMMPPPPPPPPPPPPPPPPP   IIIIIIIIIILLLLLLLLLLL             EEEEEEEEEEEEEEEEEEEEEERRRRRRRRRRRRRRRRR
 54 |  *      CCC::::::::::::C   OO:::::::::OO   M:::::::M             M:::::::MP::::::::::::::::P  I::::::::IL:::::::::L             E::::::::::::::::::::ER::::::::::::::::R
 55 |  *    CC:::::::::::::::C OO:::::::::::::OO M::::::::M           M::::::::MP::::::PPPPPP:::::P I::::::::IL:::::::::L             E::::::::::::::::::::ER::::::RRRRRR:::::R
 56 |  *   C:::::CCCCCCCC::::CO:::::::OOO:::::::OM:::::::::M         M:::::::::MPP:::::P     P:::::PII::::::IILL:::::::LL             EE::::::EEEEEEEEE::::ERR:::::R     R:::::R
 57 |  *  C:::::C       CCCCCCO::::::O   O::::::OM::::::::::M       M::::::::::M  P::::P     P:::::P  I::::I    L:::::L                 E:::::E       EEEEEE  R::::R     R:::::R
 58 |  * C:::::C              O:::::O     O:::::OM:::::::::::M     M:::::::::::M  P::::P     P:::::P  I::::I    L:::::L                 E:::::E               R::::R     R:::::R
 59 |  * C:::::C              O:::::O     O:::::OM:::::::M::::M   M::::M:::::::M  P::::PPPPPP:::::P   I::::I    L:::::L                 E::::::EEEEEEEEEE     R::::RRRRRR:::::R
 60 |  * C:::::C              O:::::O     O:::::OM::::::M M::::M M::::M M::::::M  P:::::::::::::PP    I::::I    L:::::L                 E:::::::::::::::E     R:::::::::::::RR
 61 |  * C:::::C              O:::::O     O:::::OM::::::M  M::::M::::M  M::::::M  P::::PPPPPPPPP      I::::I    L:::::L                 E:::::::::::::::E     R::::RRRRRR:::::R
 62 |  * C:::::C              O:::::O     O:::::OM::::::M   M:::::::M   M::::::M  P::::P              I::::I    L:::::L                 E::::::EEEEEEEEEE     R::::R     R:::::R
 63 |  * C:::::C              O:::::O     O:::::OM::::::M    M:::::M    M::::::M  P::::P              I::::I    L:::::L                 E:::::E               R::::R     R:::::R
 64 |  *  C:::::C       CCCCCCO::::::O   O::::::OM::::::M     MMMMM     M::::::M  P::::P              I::::I    L:::::L         LLLLLL  E:::::E       EEEEEE  R::::R     R:::::R
 65 |  *   C:::::CCCCCCCC::::CO:::::::OOO:::::::OM::::::M               M::::::MPP::::::PP          II::::::IILL:::::::LLLLLLLLL:::::LEE::::::EEEEEEEE:::::ERR:::::R     R:::::R
 66 |  *    CC:::::::::::::::C OO:::::::::::::OO M::::::M               M::::::MP::::::::P          I::::::::IL::::::::::::::::::::::LE::::::::::::::::::::ER::::::R     R:::::R
 67 |  *      CCC::::::::::::C   OO:::::::::OO   M::::::M               M::::::MP::::::::P          I::::::::IL::::::::::::::::::::::LE::::::::::::::::::::ER::::::R     R:::::R
 68 |  *         CCCCCCCCCCCCC     OOOOOOOOO     MMMMMMMM               MMMMMMMMPPPPPPPPPP          IIIIIIIIIILLLLLLLLLLLLLLLLLLLLLLLLEEEEEEEEEEEEEEEEEEEEEERRRRRRRR     RRRRRRR
 69 |  *
 70 |  * =======================================================================================================================================================================
 71 |  * =======================================================================================================================================================================
 72 |  * =======================================================================================================================================================================
 73 |  * =======================================================================================================================================================================
 74 |  */
 75 | 
 76 | /**
 77 |  * Today we're going to write a compiler together. But not just any compiler... A
 78 |  * super duper teeny tiny compiler! A compiler that is so small that if you
 79 |  * remove all the comments this file would only be ~200 lines of actual code.
 80 |  *
 81 |  * We're going to compile some lisp-like function calls into some C-like
 82 |  * function calls.
 83 |  *
 84 |  * If you are not familiar with one or the other. I'll just give you a quick intro.
 85 |  *
 86 |  * If we had two functions `add` and `subtract` they would be written like this:
 87 |  *
 88 |  *                  LISP                      C
 89 |  *
 90 |  *   2 + 2          (add 2 2)                 add(2, 2)
 91 |  *   4 - 2          (subtract 4 2)            subtract(4, 2)
 92 |  *   2 + (4 - 2)    (add 2 (subtract 4 2))    add(2, subtract(4, 2))
 93 |  *
 94 |  * Easy peezy right?
 95 |  *
 96 |  * Well good, because this is exactly what we are going to compile. While this
 97 |  * is neither a complete LISP or C syntax, it will be enough of the syntax to
 98 |  * demonstrate many of the major pieces of a modern compiler.
 99 |  */
100 | 
101 | /**
102 |  * Most compilers break down into three primary stages: Parsing, Transformation,
103 |  * and Code Generation
104 |  *
105 |  * 1. *Parsing* is taking raw code and turning it into a more abstract
106 |  *    representation of the code.
107 |  *
108 |  * 2. *Transformation* takes this abstract representation and manipulates to do
109 |  *    whatever the compiler wants it to.
110 |  *
111 |  * 3. *Code Generation* takes the transformed representation of the code and
112 |  *    turns it into new code.
113 |  */
114 | 
115 | /**
116 |  * Parsing
117 |  * -------
118 |  *
119 |  * Parsing typically gets broken down into two phases: Lexical Analysis and
120 |  * Syntactic Analysis.
121 |  *
122 |  * 1. *Lexical Analysis* takes the raw code and splits it apart into these things
123 |  *    called tokens by a thing called a tokenizer (or lexer).
124 |  *
125 |  *    Tokens are an array of tiny little objects that describe an isolated piece
126 |  *    of the syntax. They could be numbers, labels, punctuation, operators,
127 |  *    whatever.
128 |  *
129 |  * 2. *Syntactic Analysis* takes the tokens and reformats them into a
130 |  *    representation that describes each part of the syntax and their relation
131 |  *    to one another. This is known as an intermediate representation or
132 |  *    Abstract Syntax Tree.
133 |  *
134 |  *    An Abstract Syntax Tree, or AST for short, is a deeply nested object that
135 |  *    represents code in a way that is both easy to work with and tells us a lot
136 |  *    of information.
137 |  *
138 |  * For the following syntax:
139 |  *
140 |  *   (add 2 (subtract 4 2))
141 |  *
142 |  * Tokens might look something like this:
143 |  *
144 |  *   [
145 |  *     { type: 'paren',  value: '('        },
146 |  *     { type: 'name',   value: 'add'      },
147 |  *     { type: 'number', value: '2'        },
148 |  *     { type: 'paren',  value: '('        },
149 |  *     { type: 'name',   value: 'subtract' },
150 |  *     { type: 'number', value: '4'        },
151 |  *     { type: 'number', value: '2'        },
152 |  *     { type: 'paren',  value: ')'        },
153 |  *     { type: 'paren',  value: ')'        }
154 |  *   ]
155 |  *
156 |  * And an Abstract Syntax Tree (AST) might look like this:
157 |  *
158 |  *   {
159 |  *     type: 'Program',
160 |  *     body: [{
161 |  *       type: 'CallExpression',
162 |  *       name: 'add',
163 |  *       params: [{
164 |  *         type: 'NumberLiteral',
165 |  *         value: '2'
166 |  *       }, {
167 |  *         type: 'CallExpression',
168 |  *         name: 'subtract',
169 |  *         params: [{
170 |  *           type: 'NumberLiteral',
171 |  *           value: '4'
172 |  *         }, {
173 |  *           type: 'NumberLiteral',
174 |  *           value: '2'
175 |  *         }]
176 |  *       }]
177 |  *     }]
178 |  *   }
179 |  */
180 | 
181 | /**
182 |  * Transformation
183 |  * --------------
184 |  *
185 |  * The next type of stage for a compiler is transformation. Again, this just
186 |  * takes the AST from the last step and makes changes to it. It can manipulate
187 |  * the AST in the same language or it can translate it into an entirely new
188 |  * language.
189 |  *
190 |  * Let’s look at how we would transform an AST.
191 |  *
192 |  * You might notice that our AST has elements within it that look very similar.
193 |  * There are these objects with a type property. Each of these are known as an
194 |  * AST Node. These nodes have defined properties on them that describe one
195 |  * isolated part of the tree.
196 |  *
197 |  * We can have a node for a "NumberLiteral":
198 |  *
199 |  *   {
200 |  *     type: 'NumberLiteral',
201 |  *     value: '2'
202 |  *   }
203 |  *
204 |  * Or maybe a node for a "CallExpression":
205 |  *
206 |  *   {
207 |  *     type: 'CallExpression',
208 |  *     name: 'subtract',
209 |  *     params: [...nested nodes go here...]
210 |  *   }
211 |  *
212 |  * When transforming the AST we can manipulate nodes by
213 |  * adding/removing/replacing properties, we can add new nodes, remove nodes, or
214 |  * we could leave the existing AST alone and create an entirely new one based
215 |  * on it.
216 |  *
217 |  * Since we’re targeting a new language, we’re going to focus on creating an
218 |  * entirely new AST that is specific to the target language.
219 |  *
220 |  * Traversal
221 |  * ---------
222 |  *
223 |  * In order to navigate through all of these nodes, we need to be able to
224 |  * traverse through them. This traversal process goes to each node in the AST
225 |  * depth-first.
226 |  *
227 |  *   {
228 |  *     type: 'Program',
229 |  *     body: [{
230 |  *       type: 'CallExpression',
231 |  *       name: 'add',
232 |  *       params: [{
233 |  *         type: 'NumberLiteral',
234 |  *         value: '2'
235 |  *       }, {
236 |  *         type: 'CallExpression',
237 |  *         name: 'subtract',
238 |  *         params: [{
239 |  *           type: 'NumberLiteral',
240 |  *           value: '4'
241 |  *         }, {
242 |  *           type: 'NumberLiteral',
243 |  *           value: '2'
244 |  *         }]
245 |  *       }]
246 |  *     }]
247 |  *   }
248 |  *
249 |  * So for the above AST we would go:
250 |  *
251 |  *   1. Program - Starting at the top level of the AST
252 |  *   2. CallExpression (add) - Moving to the first element of the Program's body
253 |  *   3. NumberLiteral (2) - Moving to the first element of CallExpression's params
254 |  *   4. CallExpression (subtract) - Moving to the second element of CallExpression's params
255 |  *   5. NumberLiteral (4) - Moving to the first element of CallExpression's params
256 |  *   6. NumberLiteral (2) - Moving to the second element of CallExpression's params
257 |  *
258 |  * If we were manipulating this AST directly, instead of creating a separate AST,
259 |  * we would likely introduce all sorts of abstractions here. But just visiting
260 |  * each node in the tree is enough.
261 |  *
262 |  * The reason I use the word “visiting” is because there is this pattern of how
263 |  * to represent operations on elements of an object structure.
264 |  *
265 |  * Visitors
266 |  * --------
267 |  *
268 |  * The basic idea here is that we are going to create a “visitor” object that
269 |  * has methods that will accept different node types.
270 |  *
271 |  *   var visitor = {
272 |  *     NumberLiteral() {},
273 |  *     CallExpression() {}
274 |  *   };
275 |  *
276 |  * When we traverse our AST we will call the methods on this visitor whenever we
277 |  * encounter a node of a matching type.
278 |  *
279 |  * In order to make this useful we will also pass the node and a reference to
280 |  * the parent node.
281 |  *
282 |  *   var visitor = {
283 |  *     NumberLiteral(node, parent) {},
284 |  *     CallExpression(node, parent) {}
285 |  *   };
286 |  */
287 | 
288 | /**
289 |  * Code Generation
290 |  * ---------------
291 |  *
292 |  * The final phase of a compiler is code generation. Sometimes compilers will do
293 |  * things that overlap with transformation, but for the most part code
294 |  * generation just means take our AST and string-ify code back out.
295 |  *
296 |  * Code generators work several different ways, some compilers will reuse the
297 |  * tokens from earlier, others will have created a separate representation of
298 |  * the code so that they can print node linearly, but from what I can tell most
299 |  * will use the same AST we just created, which is what we’re going to focus on.
300 |  *
301 |  * Effectively our code generator will know how to “print” all of the different
302 |  * node types of the AST, and it will recursively call itself to print nested
303 |  * nodes until everything is printed into one long string of code.
304 |  */
305 | 
306 | /**
307 |  * And that's it! That's all the different pieces of a compiler.
308 |  *
309 |  * Now that isn’t to say every compiler looks exactly like I described here.
310 |  * Compilers serve many different purposes, and they might need more steps than
311 |  * I have detailed.
312 |  *
313 |  * But now you should have a general high-level idea of what most compilers look
314 |  * like.
315 |  *
316 |  * Now that I’ve explained all of this, you’re all good to go write your own
317 |  * compilers right?
318 |  *
319 |  * Just kidding, that's what I'm here to help with :P
320 |  *
321 |  * So let's begin...
322 |  */
323 | 
324 | /**
325 |  * ============================================================================
326 |  *                                   (/^▽^)/
327 |  *                                THE TOKENIZER!
328 |  * ============================================================================
329 |  */
330 | 
331 | /**
332 |  * We're gonna start off with our first phase of parsing, lexical analysis, with
333 |  * the tokenizer.
334 |  *
335 |  * We're just going to take our string of code and break it down into an array
336 |  * of tokens.
337 |  *
338 |  *   (add 2 (subtract 4 2))   =>   [{ type: 'paren', value: '(' }, ...]
339 |  */
340 | 
341 | // We start by accepting an input string of code, and we're gonna set up two
342 | // things...
343 | function tokenizer(input) {
344 | 
345 |   // A `current` variable for tracking our position in the code like a cursor.
346 |   var current = 0;
347 | 
348 |   // And a `tokens` array for pushing our tokens to.
349 |   var tokens = [];
350 | 
351 |   // We start by creating a `while` loop where we are setting up our `current`
352 |   // variable to be incremented as much as we want `inside` the loop.
353 |   //
354 |   // We do this because we may want to increment `current` many times within a
355 |   // single loop because our tokens can be any length.
356 |   while (current < input.length) {
357 | 
358 |     // We're also going to store the `current` character in the `input`.
359 |     var char = input[current];
360 | 
361 |     // The first thing we want to check for is an open parenthesis. This will
362 |     // later be used for `CallExpressions` but for now we only care about the
363 |     // character.
364 |     //
365 |     // We check to see if we have an open parenthesis:
366 |     if (char === '(') {
367 | 
368 |       // If we do, we push a new token with the type `paren` and set the value
369 |       // to an open parenthesis.
370 |       tokens.push({
371 |         type: 'paren',
372 |         value: '('
373 |       });
374 | 
375 |       // Then we increment `current`
376 |       current++;
377 | 
378 |       // And we `continue` onto the next cycle of the loop.
379 |       continue;
380 |     }
381 | 
382 |     // Next we're going to check for a closing parenthesis. We do the same exact
383 |     // thing as before: Check for a closing parenthesis, add a new token,
384 |     // increment `current`, and `continue`.
385 |     if (char === ')') {
386 |       tokens.push({
387 |         type: 'paren',
388 |         value: ')'
389 |       });
390 |       current++;
391 |       continue;
392 |     }
393 | 
394 |     // Moving on, we're now going to check for whitespace. This is interesting
395 |     // because we care that whitespace exists to separate characters, but it
396 |     // isn't actually important for us to store as a token. We would only throw
397 |     // it out later.
398 |     //
399 |     // So here we're just going to test for existence and if it does exist we're
400 |     // going to just `continue` on.
401 |     var WHITESPACE = /\s/;
402 |     if (WHITESPACE.test(char)) {
403 |       current++;
404 |       continue;
405 |     }
406 | 
407 |     // The next type of token is a number. This is different than what we have
408 |     // seen before because a number could be any number of characters and we
409 |     // want to capture the entire sequence of characters as one token.
410 |     //
411 |     //   (add 123 456)
412 |     //        ^^^ ^^^
413 |     //        Only two separate tokens
414 |     //
415 |     // So we start this off when we encounter the first number in a sequence.
416 |     var NUMBERS = /[0-9]/;
417 |     if (NUMBERS.test(char)) {
418 | 
419 |       // We're going to create a `value` string that we are going to push
420 |       // characters to.
421 |       var value = '';
422 | 
423 |       // Then we're going to loop through each character in the sequence until
424 |       // we encounter a character that is not a number, pushing each character
425 |       // that is a number to our `value` and incrementing `current` as we go.
426 |       while (NUMBERS.test(char)) {
427 |         value += char;
428 |         char = input[++current];
429 |       }
430 | 
431 |       // After that we push our `number` token to the `tokens` array.
432 |       tokens.push({
433 |         type: 'number',
434 |         value: value
435 |       });
436 | 
437 |       // And we continue on.
438 |       continue;
439 |     }
440 | 
441 |     // The last type of token will be a `name` token. This is a sequence of
442 |     // letters instead of numbers, that are the names of functions in our lisp
443 |     // syntax.
444 |     //
445 |     //   (add 2 4)
446 |     //    ^^^
447 |     //    Name token
448 |     //
449 |     var LETTERS = /[a-z]/i;
450 |     if (LETTERS.test(char)) {
451 |       var value = '';
452 | 
453 |       // Again we're just going to loop through all the letters pushing them to
454 |       // a value.
455 |       while (LETTERS.test(char)) {
456 |         value += char;
457 |         char = input[++current];
458 |       }
459 | 
460 |       // And pushing that value as a token with the type `name` and continuing.
461 |       tokens.push({
462 |         type: 'name',
463 |         value: value
464 |       });
465 | 
466 |       continue;
467 |     }
468 | 
469 |     // Finally if we have not matched a character by now, we're going to throw
470 |     // an error and completely exit.
471 |     throw new TypeError('I dont know what this character is: ' + char);
472 |   }
473 | 
474 |   // Then at the end of our `tokenizer` we simply return the tokens array.
475 |   return tokens;
476 | }
477 | 
478 | /**
479 |  * ============================================================================
480 |  *                                 ヽ/❀o ل͜ o\ﾉ
481 |  *                                THE PARSER!!!
482 |  * ============================================================================
483 |  */
484 | 
485 | /**
486 |  * For our parser we're going to take our array of tokens and turn it into an
487 |  * AST.
488 |  *
489 |  *   [{ type: 'paren', value: '(' }, ...]   =>   { type: 'Program', body: [...] }
490 |  */
491 | 
492 | // Okay, so we define a `parser` function that accepts our array of `tokens`.
493 | function parser(tokens) {
494 | 
495 |   // Again we keep a `current` variable that we will use as a cursor.
496 |   var current = 0;
497 | 
498 |   // But this time we're going to use recursion instead of a `while` loop. So we
499 |   // define a `walk` function.
500 |   function walk() {
501 | 
502 |     // Inside the walk function we start by grabbing the `current` token.
503 |     var token = tokens[current];
504 | 
505 |     // We're going to split each type of token off into a different code path,
506 |     // starting off with `number` tokens.
507 |     //
508 |     // We test to see if we have a `number` token.
509 |     if (token.type === 'number') {
510 | 
511 |       // If we have one, we'll increment `current`.
512 |       current++;
513 | 
514 |       // And we'll return a new AST node called `NumberLiteral` and setting its
515 |       // value to the value of our token.
516 |       return {
517 |         type: 'NumberLiteral',
518 |         value: token.value
519 |       };
520 |     }
521 | 
522 |     // Next we're going to look for CallExpressions. We start this off when we
523 |     // encounter an open parenthesis.
524 |     if (
525 |       token.type === 'paren' &&
526 |       token.value === '('
527 |     ) {
528 | 
529 |       // We'll increment `current` to skip the parenthesis since we don't care
530 |       // about it in our AST.
531 |       token = tokens[++current];
532 | 
533 |       // We create a base node with the type `CallExpression`, and we're going
534 |       // to set the name as the current token's value since the next token after
535 |       // the open parenthesis is the name of the function.
536 |       var node = {
537 |         type: 'CallExpression',
538 |         name: token.value,
539 |         params: []
540 |       };
541 | 
542 |       // We increment `current` *again* to skip the name token.
543 |       token = tokens[++current];
544 | 
545 |       // And now we want to loop through each token that will be the `params` of
546 |       // our `CallExpression` until we encounter a closing parenthesis.
547 |       //
548 |       // Now this is where recursion comes in. Instead of trying to parse a
549 |       // potentially infinitely nested set of nodes we're going to rely on
550 |       // recursion to resolve things.
551 |       //
552 |       // To explain this, let's take our Lisp code. You can see that the
553 |       // parameters of the `add` are a number and a nested `CallExpression` that
554 |       // includes its own numbers.
555 |       //
556 |       //   (add 2 (subtract 4 2))
557 |       //
558 |       // You'll also notice that in our tokens array we have multiple closing
559 |       // parentheses.
560 |       //
561 |       //   [
562 |       //     { type: 'paren',  value: '('        },
563 |       //     { type: 'name',   value: 'add'      },
564 |       //     { type: 'number', value: '2'        },
565 |       //     { type: 'paren',  value: '('        },
566 |       //     { type: 'name',   value: 'subtract' },
567 |       //     { type: 'number', value: '4'        },
568 |       //     { type: 'number', value: '2'        },
569 |       //     { type: 'paren',  value: ')'        }, <<< Closing parenthesis
570 |       //     { type: 'paren',  value: ')'        }  <<< Closing parenthesis
571 |       //   ]
572 |       //
573 |       // We're going to rely on the nested `walk` function to increment our
574 |       // `current` variable past any nested `CallExpressions`.
575 | 
576 |       // So we create a `while` loop that will continue until it encounters a
577 |       // token with a `type` of `'paren'` and a `value` of a closing
578 |       // parenthesis.
579 |       while (
580 |         (token.type !== 'paren') ||
581 |         (token.type === 'paren' && token.value !== ')')
582 |       ) {
583 |         // we'll call the `walk` function which will return a `node` and we'll
584 |         // push it into our `node.params`.
585 |         node.params.push(walk());
586 |         token = tokens[current];
587 |       }
588 | 
589 |       // Finally we will increment `current` one last time to skip the closing
590 |       // parenthesis.
591 |       current++;
592 | 
593 |       // And return the node.
594 |       return node;
595 |     }
596 | 
597 |     // Again, if we haven't recognized the token type by now we're going to
598 |     // throw an error.
599 |     throw new TypeError(token.type);
600 |   }
601 | 
602 |   // Now, we're going to create our AST which will have a root which is a
603 |   // `Program` node.
604 |   var ast = {
605 |     type: 'Program',
606 |     body: []
607 |   };
608 | 
609 |   // And we're going to kickstart our `walk` function, pushing nodes to our
610 |   // `ast.body` array.
611 |   //
612 |   // The reason we are doing this inside a loop is because our program can have
613 |   // `CallExpressions` after one another instead of being nested.
614 |   //
615 |   //   (add 2 2)
616 |   //   (subtract 4 2)
617 |   //
618 |   while (current < tokens.length) {
619 |     ast.body.push(walk());
620 |   }
621 | 
622 |   // At the end of our parser we'll return the AST.
623 |   return ast;
624 | }
625 | 
626 | /**
627 |  * ============================================================================
628 |  *                                 ⌒(❀>◞౪◟<❀)⌒
629 |  *                               THE TRAVERSER!!!
630 |  * ============================================================================
631 |  */
632 | 
633 | /**
634 |  * So now we have our AST, and we want to be able to visit different nodes with
635 |  * a visitor. We need to be able to call the methods on the visitor whenever we
636 |  * encounter a node with a matching type.
637 |  *
638 |  *   traverse(ast, {
639 |  *     Program(node, parent) {
640 |  *       // ...
641 |  *     },
642 |  *
643 |  *     CallExpression(node, parent) {
644 |  *       // ...
645 |  *     },
646 |  *
647 |  *     NumberLiteral(node, parent) {
648 |  *       // ...
649 |  *     }
650 |  *   });
651 |  */
652 | 
653 | // So we define a traverser function which accepts an AST and a
654 | // visitor. Inside we're going to define two functions...
655 | function traverser(ast, visitor) {
656 | 
657 |   // A `traverseArray` function that will allow us to iterate over an array and
658 |   // call the next function that we will define: `traverseNode`.
659 |   function traverseArray(array, parent) {
660 |     array.forEach(function(child) {
661 |       traverseNode(child, parent);
662 |     });
663 |   }
664 | 
665 |   // `traverseNode` will accept a `node` and its `parent` node. So that it can
666 |   // pass both to our visitor methods.
667 |   function traverseNode(node, parent) {
668 | 
669 |     // We start by testing for the existence of a method on the visitor with a
670 |     // matching `type`.
671 |     var method = visitor[node.type];
672 | 
673 |     // If it exists we'll call it with the `node` and its `parent`.
674 |     if (method) {
675 |       method(node, parent);
676 |     }
677 | 
678 |     // Next we are going to split things up by the current node type.
679 |     switch (node.type) {
680 | 
681 |       // We'll start with our top level `Program`. Since Program nodes have a
682 |       // property named body that has an array of nodes, we will call
683 |       // `traverseArray` to traverse down into them.
684 |       //
685 |       // (Remember that `traverseArray` will in turn call `traverseNode` so  we
686 |       // are causing the tree to be traversed recursively)
687 |       case 'Program':
688 |         traverseArray(node.body, node);
689 |         break;
690 | 
691 |       // Next we do the same with `CallExpressions` and traverse their `params`.
692 |       case 'CallExpression':
693 |         traverseArray(node.params, node);
694 |         break;
695 | 
696 |       // In the case of `NumberLiterals` we don't have any child nodes to visit,
697 |       // so we'll just break.
698 |       case 'NumberLiteral':
699 |         break;
700 | 
701 |       // And again, if we haven't recognized the node type then we'll throw an
702 |       // error.
703 |       default:
704 |         throw new TypeError(node.type);
705 |     }
706 |   }
707 | 
708 |   // Finally we kickstart the traverser by calling `traverseNode` with our ast
709 |   // with no `parent` because the top level of the AST doesn't have a parent.
710 |   traverseNode(ast, null);
711 | }
712 | 
713 | /**
714 |  * ============================================================================
715 |  *                                   ⁽(◍˃̵͈̑ᴗ˂̵͈̑)⁽
716 |  *                              THE TRANSFORMER!!!
717 |  * ============================================================================
718 |  */
719 | 
720 | /**
721 |  * Next up, the transformer. Our transformer is going to take the AST that we
722 |  * have built and pass it to our traverser function with a visitor and will
723 |  * create a new ast.
724 |  *
725 |  * ----------------------------------------------------------------------------
726 |  *   Original AST                     |   Transformed AST
727 |  * ----------------------------------------------------------------------------
728 |  *   {                                |   {
729 |  *     type: 'Program',               |     type: 'Program',
730 |  *     body: [{                       |     body: [{
731 |  *       type: 'CallExpression',      |       type: 'ExpressionStatement',
732 |  *       name: 'add',                 |       expression: {
733 |  *       params: [{                   |         type: 'CallExpression',
734 |  *         type: 'NumberLiteral',     |         callee: {
735 |  *         value: '2'                 |           type: 'Identifier',
736 |  *       }, {                         |           name: 'add'
737 |  *         type: 'CallExpression',    |         },
738 |  *         name: 'subtract',          |         arguments: [{
739 |  *         params: [{                 |           type: 'NumberLiteral',
740 |  *           type: 'NumberLiteral',   |           value: '2'
741 |  *           value: '4'               |         }, {
742 |  *         }, {                       |           type: 'CallExpression',
743 |  *           type: 'NumberLiteral',   |           callee: {
744 |  *           value: '2'               |             type: 'Identifier',
745 |  *         }]                         |             name: 'subtract'
746 |  *       }]                           |           },
747 |  *     }]                             |           arguments: [{
748 |  *   }                                |             type: 'NumberLiteral',
749 |  *                                    |             value: '4'
750 |  * ---------------------------------- |           }, {
751 |  *                                    |             type: 'NumberLiteral',
752 |  *                                    |             value: '2'
753 |  *                                    |           }]
754 |  *  (sorry the other one is longer.)  |         }]
755 |  *                                    |       }
756 |  *                                    |     }]
757 |  *                                    |   }
758 |  * ----------------------------------------------------------------------------
759 |  */
760 | 
761 | // So we have our transformer function which will accept the lisp ast.
762 | function transformer(ast) {
763 | 
764 |   // We'll create a `newAst` which like our previous AST will have a program
765 |   // node.
766 |   var newAst = {
767 |     type: 'Program',
768 |     body: []
769 |   };
770 | 
771 |   // Next I'm going to cheat a little and create a bit of a hack. We're going to
772 |   // use a property named `context` on our parent nodes that we're going to use
773 |   // to push nodes to their parents' `context`'s. Normally you would have a
774 |   // better abstraction than this, but for our purposes this keeps things
775 |   // simple.
776 |   //
777 |   // Just take note that the context is a reference *from* the old ast *to* the
778 |   // new ast.
779 |   ast._context = newAst.body;
780 | 
781 |   // We'll start by calling the traverser function with our ast and a visitor.
782 |   traverser(ast, {
783 | 
784 |     // The first visitor method accepts `NumberLiterals`
785 |     NumberLiteral: function(node, parent) {
786 |       // We'll create a new node also named `NumberLiteral` that we will push to
787 |       // the parent context.
788 |       parent._context.push({
789 |         type: 'NumberLiteral',
790 |         value: node.value
791 |       });
792 |     },
793 | 
794 |     // Next up, `CallExpressions`.
795 |     CallExpression: function(node, parent) {
796 | 
797 |       // We start creating a new node `CallExpression` with a nested
798 |       // `Identifier`.
799 |       var expression = {
800 |         type: 'CallExpression',
801 |         callee: {
802 |           type: 'Identifier',
803 |           name: node.name
804 |         },
805 |         arguments: []
806 |       };
807 | 
808 |       // Next we're going to define a new context on the original
809 |       // `CallExpression` node that will reference the `expression`'s arguments
810 |       // so that we can push arguments.
811 |       node._context = expression.arguments;
812 | 
813 |       // Then we're going to check if the parent node is a `CallExpression`.
814 |       // If it is not...
815 |       if (parent.type !== 'CallExpression') {
816 | 
817 |         // We're going to wrap our `CallExpression` node with an
818 |         // `ExpressionStatement`. We do this because the top level
819 |         // `CallExpressions` in JavaScript are actually statements.
820 |         expression = {
821 |           type: 'ExpressionStatement',
822 |           expression: expression
823 |         };
824 |       }
825 | 
826 |       // Last, we push our (possibly wrapped) `CallExpression` to the `parent`'s
827 |       // `context`.
828 |       parent._context.push(expression);
829 |     }
830 |   });
831 | 
832 |   // At the end of our transformer function we'll return the new ast that we
833 |   // just created.
834 |   return newAst;
835 | }
836 | 
837 | /**
838 |  * ============================================================================
839 |  *                               ヾ（〃＾∇＾）ﾉ♪
840 |  *                            THE CODE GENERATOR!!!!
841 |  * ============================================================================
842 |  */
843 | 
844 | /**
845 |  * Now let's move on to our last phase: The Code Generator.
846 |  *
847 |  * Our code generator is going to recursively call itself to print each node in
848 |  * the tree into one giant string.
849 |  */
850 | 
851 | function codeGenerator(node) {
852 | 
853 |   // We'll break things down by the `type` of the `node`.
854 |   switch (node.type) {
855 | 
856 |     // If we have a `Program` node. We will map through each node in the `body`
857 |     // and run them through the code generator and join them with a newline.
858 |     case 'Program':
859 |       return node.body.map(codeGenerator)
860 |         .join('\n');
861 | 
862 |     // For `ExpressionStatements` we'll call the code generator on the nested
863 |     // expression and we'll add a semicolon...
864 |     case 'ExpressionStatement':
865 |       return (
866 |         codeGenerator(node.expression) +
867 |         ';' // << (...because we like to code the *correct* way)
868 |       );
869 | 
870 |     // For `CallExpressions` we will print the `callee`, add an open
871 |     // parenthesis, we'll map through each node in the `arguments` array and run
872 |     // them through the code generator, joining them with a comma, and then
873 |     // we'll add a closing parenthesis.
874 |     case 'CallExpression':
875 |       return (
876 |         codeGenerator(node.callee) +
877 |         '(' +
878 |         node.arguments.map(codeGenerator)
879 |           .join(', ') +
880 |         ')'
881 |       );
882 | 
883 |     // For `Identifiers` we'll just return the `node`'s name.
884 |     case 'Identifier':
885 |       return node.name;
886 | 
887 |     // For `NumberLiterals` we'll just return the `node`'s value.
888 |     case 'NumberLiteral':
889 |       return node.value;
890 | 
891 |     // And if we haven't recognized the node, we'll throw an error.
892 |     default:
893 |       throw new TypeError(node.type);
894 |   }
895 | }
896 | 
897 | /**
898 |  * ============================================================================
899 |  *                                  (۶* ‘ヮ’)۶”
900 |  *                         !!!!!!!!THE COMPILER!!!!!!!!
901 |  * ============================================================================
902 |  */
903 | 
904 | /**
905 |  * FINALLY! We'll create our `compiler` function. Here we will link together
906 |  * every part of the pipeline.
907 |  *
908 |  *   1. input  => tokenizer   => tokens
909 |  *   2. tokens => parser      => ast
910 |  *   3. ast    => transformer => newAst
911 |  *   4. newAst => generator   => output
912 |  */
913 | 
914 | function compiler(input) {
915 |   var tokens = tokenizer(input);
916 |   var ast    = parser(tokens);
917 |   var newAst = transformer(ast);
918 |   var output = codeGenerator(newAst);
919 | 
920 |   // and simply return the output!
921 |   return output;
922 | }
923 | 
924 | /**
925 |  * ============================================================================
926 |  *                                   (๑˃̵ᴗ˂̵)و
927 |  * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!YOU MADE IT!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
928 |  * ============================================================================
929 |  */
930 | 
931 | // Now I'm just exporting everything...
932 | module.exports = {
933 |   tokenizer: tokenizer,
934 |   parser: parser,
935 |   transformer: transformer,
936 |   codeGenerator: codeGenerator,
937 |   compiler: compiler
938 | };
939 | 


--------------------------------------------------------------------------------
/test.js:
--------------------------------------------------------------------------------
 1 | var superTinyCompiler = require('./super-tiny-compiler');
 2 | var assert            = require('assert');
 3 | 
 4 | var tokenizer     = superTinyCompiler.tokenizer;
 5 | var parser        = superTinyCompiler.parser;
 6 | var transformer   = superTinyCompiler.transformer;
 7 | var codeGenerator = superTinyCompiler.codeGenerator;
 8 | var compiler      = superTinyCompiler.compiler;
 9 | 
10 | var input  = '(add 2 (subtract 4 2))';
11 | var output = 'add(2, subtract(4, 2));';
12 | 
13 | var tokens = [
14 |   { type: 'paren',  value: '('        },
15 |   { type: 'name',   value: 'add'      },
16 |   { type: 'number', value: '2'        },
17 |   { type: 'paren',  value: '('        },
18 |   { type: 'name',   value: 'subtract' },
19 |   { type: 'number', value: '4'        },
20 |   { type: 'number', value: '2'        },
21 |   { type: 'paren',  value: ')'        },
22 |   { type: 'paren',  value: ')'        }
23 | ];
24 | 
25 | var ast = {
26 |   type: 'Program',
27 |   body: [{
28 |     type: 'CallExpression',
29 |     name: 'add',
30 |     params: [{
31 |       type: 'NumberLiteral',
32 |       value: '2'
33 |     }, {
34 |       type: 'CallExpression',
35 |       name: 'subtract',
36 |       params: [{
37 |         type: 'NumberLiteral',
38 |         value: '4'
39 |       }, {
40 |         type: 'NumberLiteral',
41 |         value: '2'
42 |       }]
43 |     }]
44 |   }]
45 | };
46 | 
47 | var newAst = {
48 |   type: 'Program',
49 |   body: [{
50 |     type: 'ExpressionStatement',
51 |     expression: {
52 |       type: 'CallExpression',
53 |       callee: {
54 |         type: 'Identifier',
55 |         name: 'add'
56 |       },
57 |       arguments: [{
58 |         type: 'NumberLiteral',
59 |         value: '2'
60 |       }, {
61 |         type: 'CallExpression',
62 |         callee: {
63 |           type: 'Identifier',
64 |           name: 'subtract'
65 |         },
66 |         arguments: [{
67 |           type: 'NumberLiteral',
68 |           value: '4'
69 |         }, {
70 |           type: 'NumberLiteral',
71 |           value: '2'
72 |         }]
73 |       }]
74 |     }
75 |   }]
76 | };
77 | 
78 | assert.deepStrictEqual(tokenizer(input), tokens, 'Tokenizer should turn `input` string into `tokens` array');
79 | assert.deepStrictEqual(parser(tokens), ast, 'Parser should turn `tokens` array into `ast`');
80 | assert.deepStrictEqual(transformer(ast), newAst, 'Transformer should turn `ast` into a `newAst`');
81 | assert.deepStrictEqual(codeGenerator(newAst), output, 'Code Generator should turn `newAst` into `output` string');
82 | assert.deepStrictEqual(compiler(input), output, 'Compiler should turn `input` into `output`');
83 | 
84 | console.log('All Passed!');
85 | 


--------------------------------------------------------------------------------