├── package.json ├── README.md ├── test.js ├── LICENSE └── the-super-tiny-compiler.js /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "the-super-tiny-compiler", 3 | "version": "1.0.0", 4 | "author": "James Kyle (thejameskyle.com)", 5 | "license": "CC-BY-4.0", 6 | "main": "./the-super-tiny-compiler.js" 7 | } 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![The Super Tiny Compiler](https://cloud.githubusercontent.com/assets/952783/21579290/5755288a-cf75-11e6-90e0-029529a44a38.png)](the-super-tiny-compiler.js) 2 | 3 | ***Welcome to The Super Tiny Compiler!*** 4 | 5 | This is an ultra-simplified example of all the major pieces of a modern compiler 6 | written in easy to read JavaScript. 7 | 8 | Reading through the guided code will help you learn about how *most* compilers 9 | work from end to end. 10 | 11 | ### [Want to jump into the code? Click here](the-super-tiny-compiler.js) 12 | 13 | ### [You can also check it out on Glitch](https://the-super-tiny-compiler.glitch.me/) 14 | 15 | --- 16 | 17 | ### Why should I care? 18 | 19 | That's fair, most people don't really have to think about compilers in their day 20 | jobs. However, compilers are all around you, tons of the tools you use are based 21 | on concepts borrowed from compilers. 22 | 23 | ### But compilers are scary! 24 | 25 | Yes, they are. But that's our fault (the people who write compilers), we've 26 | taken something that is reasonably straightforward and made it so scary that 27 | most think of it as this totally unapproachable thing that only the nerdiest of 28 | the nerds are able to understand. 29 | 30 | ### Okay so where do I begin? 31 | 32 | Awesome! Head on over to the [the-super-tiny-compiler.js](the-super-tiny-compiler.js) 33 | file. 34 | 35 | ### I'm back, that didn't make sense 36 | 37 | Ouch, I'm really sorry. Let me know how it can be improved. 38 | 39 | ### Tests 40 | 41 | Run with `node test.js` 42 | 43 | --- 44 | 45 | [![cc-by-4.0](https://licensebuttons.net/l/by/4.0/80x15.png)](http://creativecommons.org/licenses/by/4.0/) 46 | -------------------------------------------------------------------------------- /test.js: -------------------------------------------------------------------------------- 1 | const { 2 | tokenizer, 3 | parser, 4 | transformer, 5 | codeGenerator, 6 | compiler, 7 | } = require('./the-super-tiny-compiler'); 8 | const assert = require('assert'); 9 | 10 | const input = '(add 2 (subtract 4 2))'; 11 | const output = 'add(2, subtract(4, 2));'; 12 | 13 | const tokens = [ 14 | { type: 'paren', value: '(' }, 15 | { type: 'name', value: 'add' }, 16 | { type: 'number', value: '2' }, 17 | { type: 'paren', value: '(' }, 18 | { type: 'name', value: 'subtract' }, 19 | { type: 'number', value: '4' }, 20 | { type: 'number', value: '2' }, 21 | { type: 'paren', value: ')' }, 22 | { type: 'paren', value: ')' } 23 | ]; 24 | 25 | const ast = { 26 | type: 'Program', 27 | body: [{ 28 | type: 'CallExpression', 29 | name: 'add', 30 | params: [{ 31 | type: 'NumberLiteral', 32 | value: '2' 33 | }, { 34 | type: 'CallExpression', 35 | name: 'subtract', 36 | params: [{ 37 | type: 'NumberLiteral', 38 | value: '4' 39 | }, { 40 | type: 'NumberLiteral', 41 | value: '2' 42 | }] 43 | }] 44 | }] 45 | }; 46 | 47 | const newAst = { 48 | type: 'Program', 49 | body: [{ 50 | type: 'ExpressionStatement', 51 | expression: { 52 | type: 'CallExpression', 53 | callee: { 54 | type: 'Identifier', 55 | name: 'add' 56 | }, 57 | arguments: [{ 58 | type: 'NumberLiteral', 59 | value: '2' 60 | }, { 61 | type: 'CallExpression', 62 | callee: { 63 | type: 'Identifier', 64 | name: 'subtract' 65 | }, 66 | arguments: [{ 67 | type: 'NumberLiteral', 68 | value: '4' 69 | }, { 70 | type: 'NumberLiteral', 71 | value: '2' 72 | }] 73 | }] 74 | } 75 | }] 76 | }; 77 | 78 | assert.deepStrictEqual(tokenizer(input), tokens, 'Tokenizer should turn `input` string into `tokens` array'); 79 | assert.deepStrictEqual(parser(tokens), ast, 'Parser should turn `tokens` array into `ast`'); 80 | assert.deepStrictEqual(transformer(ast), newAst, 'Transformer should turn `ast` into a `newAst`'); 81 | assert.deepStrictEqual(codeGenerator(newAst), output, 'Code Generator should turn `newAst` into `output` string'); 82 | assert.deepStrictEqual(compiler(input), output, 'Compiler should turn `input` into `output`'); 83 | 84 | console.log('All Passed!'); 85 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Creative Commons Attribution 4.0 International 2 | 3 | ======================================================================= 4 | 5 | Creative Commons Corporation ("Creative Commons") is not a law firm and 6 | does not provide legal services or legal advice. Distribution of 7 | Creative Commons public licenses does not create a lawyer-client or 8 | other relationship. Creative Commons makes its licenses and related 9 | information available on an "as-is" basis. Creative Commons gives no 10 | warranties regarding its licenses, any material licensed under their 11 | terms and conditions, or any related information. Creative Commons 12 | disclaims all liability for damages resulting from their use to the 13 | fullest extent possible. 14 | 15 | Using Creative Commons Public Licenses 16 | 17 | Creative Commons public licenses provide a standard set of terms and 18 | conditions that creators and other rights holders may use to share 19 | original works of authorship and other material subject to copyright 20 | and certain other rights specified in the public license below. The 21 | following considerations are for informational purposes only, are not 22 | exhaustive, and do not form part of our licenses. 23 | 24 | Considerations for licensors: Our public licenses are 25 | intended for use by those authorized to give the public 26 | permission to use material in ways otherwise restricted by 27 | copyright and certain other rights. Our licenses are 28 | irrevocable. Licensors should read and understand the terms 29 | and conditions of the license they choose before applying it. 30 | Licensors should also secure all rights necessary before 31 | applying our licenses so that the public can reuse the 32 | material as expected. Licensors should clearly mark any 33 | material not subject to the license. This includes other CC- 34 | licensed material, or material used under an exception or 35 | limitation to copyright. More considerations for licensors: 36 | wiki.creativecommons.org/Considerations_for_licensors 37 | 38 | Considerations for the public: By using one of our public 39 | licenses, a licensor grants the public permission to use the 40 | licensed material under specified terms and conditions. If 41 | the licensor's permission is not necessary for any reason--for 42 | example, because of any applicable exception or limitation to 43 | copyright--then that use is not regulated by the license. Our 44 | licenses grant only permissions under copyright and certain 45 | other rights that a licensor has authority to grant. Use of 46 | the licensed material may still be restricted for other 47 | reasons, including because others have copyright or other 48 | rights in the material. A licensor may make special requests, 49 | such as asking that all changes be marked or described. 50 | Although not required by our licenses, you are encouraged to 51 | respect those requests where reasonable. More_considerations 52 | for the public: 53 | wiki.creativecommons.org/Considerations_for_licensees 54 | 55 | ======================================================================= 56 | 57 | Creative Commons Attribution 4.0 International Public License 58 | 59 | By exercising the Licensed Rights (defined below), You accept and agree 60 | to be bound by the terms and conditions of this Creative Commons 61 | Attribution 4.0 International Public License ("Public License"). To the 62 | extent this Public License may be interpreted as a contract, You are 63 | granted the Licensed Rights in consideration of Your acceptance of 64 | these terms and conditions, and the Licensor grants You such rights in 65 | consideration of benefits the Licensor receives from making the 66 | Licensed Material available under these terms and conditions. 67 | 68 | 69 | Section 1 -- Definitions. 70 | 71 | a. Adapted Material means material subject to Copyright and Similar 72 | Rights that is derived from or based upon the Licensed Material 73 | and in which the Licensed Material is translated, altered, 74 | arranged, transformed, or otherwise modified in a manner requiring 75 | permission under the Copyright and Similar Rights held by the 76 | Licensor. For purposes of this Public License, where the Licensed 77 | Material is a musical work, performance, or sound recording, 78 | Adapted Material is always produced where the Licensed Material is 79 | synched in timed relation with a moving image. 80 | 81 | b. Adapter's License means the license You apply to Your Copyright 82 | and Similar Rights in Your contributions to Adapted Material in 83 | accordance with the terms and conditions of this Public License. 84 | 85 | c. Copyright and Similar Rights means copyright and/or similar rights 86 | closely related to copyright including, without limitation, 87 | performance, broadcast, sound recording, and Sui Generis Database 88 | Rights, without regard to how the rights are labeled or 89 | categorized. For purposes of this Public License, the rights 90 | specified in Section 2(b)(1)-(2) are not Copyright and Similar 91 | Rights. 92 | 93 | d. Effective Technological Measures means those measures that, in the 94 | absence of proper authority, may not be circumvented under laws 95 | fulfilling obligations under Article 11 of the WIPO Copyright 96 | Treaty adopted on December 20, 1996, and/or similar international 97 | agreements. 98 | 99 | e. Exceptions and Limitations means fair use, fair dealing, and/or 100 | any other exception or limitation to Copyright and Similar Rights 101 | that applies to Your use of the Licensed Material. 102 | 103 | f. Licensed Material means the artistic or literary work, database, 104 | or other material to which the Licensor applied this Public 105 | License. 106 | 107 | g. Licensed Rights means the rights granted to You subject to the 108 | terms and conditions of this Public License, which are limited to 109 | all Copyright and Similar Rights that apply to Your use of the 110 | Licensed Material and that the Licensor has authority to license. 111 | 112 | h. Licensor means the individual(s) or entity(ies) granting rights 113 | under this Public License. 114 | 115 | i. Share means to provide material to the public by any means or 116 | process that requires permission under the Licensed Rights, such 117 | as reproduction, public display, public performance, distribution, 118 | dissemination, communication, or importation, and to make material 119 | available to the public including in ways that members of the 120 | public may access the material from a place and at a time 121 | individually chosen by them. 122 | 123 | j. Sui Generis Database Rights means rights other than copyright 124 | resulting from Directive 96/9/EC of the European Parliament and of 125 | the Council of 11 March 1996 on the legal protection of databases, 126 | as amended and/or succeeded, as well as other essentially 127 | equivalent rights anywhere in the world. 128 | 129 | k. You means the individual or entity exercising the Licensed Rights 130 | under this Public License. Your has a corresponding meaning. 131 | 132 | 133 | Section 2 -- Scope. 134 | 135 | a. License grant. 136 | 137 | 1. Subject to the terms and conditions of this Public License, 138 | the Licensor hereby grants You a worldwide, royalty-free, 139 | non-sublicensable, non-exclusive, irrevocable license to 140 | exercise the Licensed Rights in the Licensed Material to: 141 | 142 | a. reproduce and Share the Licensed Material, in whole or 143 | in part; and 144 | 145 | b. produce, reproduce, and Share Adapted Material. 146 | 147 | 2. Exceptions and Limitations. For the avoidance of doubt, where 148 | Exceptions and Limitations apply to Your use, this Public 149 | License does not apply, and You do not need to comply with 150 | its terms and conditions. 151 | 152 | 3. Term. The term of this Public License is specified in Section 153 | 6(a). 154 | 155 | 4. Media and formats; technical modifications allowed. The 156 | Licensor authorizes You to exercise the Licensed Rights in 157 | all media and formats whether now known or hereafter created, 158 | and to make technical modifications necessary to do so. The 159 | Licensor waives and/or agrees not to assert any right or 160 | authority to forbid You from making technical modifications 161 | necessary to exercise the Licensed Rights, including 162 | technical modifications necessary to circumvent Effective 163 | Technological Measures. For purposes of this Public License, 164 | simply making modifications authorized by this Section 2(a) 165 | (4) never produces Adapted Material. 166 | 167 | 5. Downstream recipients. 168 | 169 | a. Offer from the Licensor -- Licensed Material. Every 170 | recipient of the Licensed Material automatically 171 | receives an offer from the Licensor to exercise the 172 | Licensed Rights under the terms and conditions of this 173 | Public License. 174 | 175 | b. No downstream restrictions. You may not offer or impose 176 | any additional or different terms or conditions on, or 177 | apply any Effective Technological Measures to, the 178 | Licensed Material if doing so restricts exercise of the 179 | Licensed Rights by any recipient of the Licensed 180 | Material. 181 | 182 | 6. No endorsement. Nothing in this Public License constitutes or 183 | may be construed as permission to assert or imply that You 184 | are, or that Your use of the Licensed Material is, connected 185 | with, or sponsored, endorsed, or granted official status by, 186 | the Licensor or others designated to receive attribution as 187 | provided in Section 3(a)(1)(A)(i). 188 | 189 | b. Other rights. 190 | 191 | 1. Moral rights, such as the right of integrity, are not 192 | licensed under this Public License, nor are publicity, 193 | privacy, and/or other similar personality rights; however, to 194 | the extent possible, the Licensor waives and/or agrees not to 195 | assert any such rights held by the Licensor to the limited 196 | extent necessary to allow You to exercise the Licensed 197 | Rights, but not otherwise. 198 | 199 | 2. Patent and trademark rights are not licensed under this 200 | Public License. 201 | 202 | 3. To the extent possible, the Licensor waives any right to 203 | collect royalties from You for the exercise of the Licensed 204 | Rights, whether directly or through a collecting society 205 | under any voluntary or waivable statutory or compulsory 206 | licensing scheme. In all other cases the Licensor expressly 207 | reserves any right to collect such royalties. 208 | 209 | 210 | Section 3 -- License Conditions. 211 | 212 | Your exercise of the Licensed Rights is expressly made subject to the 213 | following conditions. 214 | 215 | a. Attribution. 216 | 217 | 1. If You Share the Licensed Material (including in modified 218 | form), You must: 219 | 220 | a. retain the following if it is supplied by the Licensor 221 | with the Licensed Material: 222 | 223 | i. identification of the creator(s) of the Licensed 224 | Material and any others designated to receive 225 | attribution, in any reasonable manner requested by 226 | the Licensor (including by pseudonym if 227 | designated); 228 | 229 | ii. a copyright notice; 230 | 231 | iii. a notice that refers to this Public License; 232 | 233 | iv. a notice that refers to the disclaimer of 234 | warranties; 235 | 236 | v. a URI or hyperlink to the Licensed Material to the 237 | extent reasonably practicable; 238 | 239 | b. indicate if You modified the Licensed Material and 240 | retain an indication of any previous modifications; and 241 | 242 | c. indicate the Licensed Material is licensed under this 243 | Public License, and include the text of, or the URI or 244 | hyperlink to, this Public License. 245 | 246 | 2. You may satisfy the conditions in Section 3(a)(1) in any 247 | reasonable manner based on the medium, means, and context in 248 | which You Share the Licensed Material. For example, it may be 249 | reasonable to satisfy the conditions by providing a URI or 250 | hyperlink to a resource that includes the required 251 | information. 252 | 253 | 3. If requested by the Licensor, You must remove any of the 254 | information required by Section 3(a)(1)(A) to the extent 255 | reasonably practicable. 256 | 257 | 4. If You Share Adapted Material You produce, the Adapter's 258 | License You apply must not prevent recipients of the Adapted 259 | Material from complying with this Public License. 260 | 261 | 262 | Section 4 -- Sui Generis Database Rights. 263 | 264 | Where the Licensed Rights include Sui Generis Database Rights that 265 | apply to Your use of the Licensed Material: 266 | 267 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right 268 | to extract, reuse, reproduce, and Share all or a substantial 269 | portion of the contents of the database; 270 | 271 | b. if You include all or a substantial portion of the database 272 | contents in a database in which You have Sui Generis Database 273 | Rights, then the database in which You have Sui Generis Database 274 | Rights (but not its individual contents) is Adapted Material; and 275 | 276 | c. You must comply with the conditions in Section 3(a) if You Share 277 | all or a substantial portion of the contents of the database. 278 | 279 | For the avoidance of doubt, this Section 4 supplements and does not 280 | replace Your obligations under this Public License where the Licensed 281 | Rights include other Copyright and Similar Rights. 282 | 283 | 284 | Section 5 -- Disclaimer of Warranties and Limitation of Liability. 285 | 286 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE 287 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS 288 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF 289 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, 290 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, 291 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR 292 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, 293 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT 294 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT 295 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. 296 | 297 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE 298 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, 299 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, 300 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, 301 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR 302 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN 303 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR 304 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR 305 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. 306 | 307 | c. The disclaimer of warranties and limitation of liability provided 308 | above shall be interpreted in a manner that, to the extent 309 | possible, most closely approximates an absolute disclaimer and 310 | waiver of all liability. 311 | 312 | 313 | Section 6 -- Term and Termination. 314 | 315 | a. This Public License applies for the term of the Copyright and 316 | Similar Rights licensed here. However, if You fail to comply with 317 | this Public License, then Your rights under this Public License 318 | terminate automatically. 319 | 320 | b. Where Your right to use the Licensed Material has terminated under 321 | Section 6(a), it reinstates: 322 | 323 | 1. automatically as of the date the violation is cured, provided 324 | it is cured within 30 days of Your discovery of the 325 | violation; or 326 | 327 | 2. upon express reinstatement by the Licensor. 328 | 329 | For the avoidance of doubt, this Section 6(b) does not affect any 330 | right the Licensor may have to seek remedies for Your violations 331 | of this Public License. 332 | 333 | c. For the avoidance of doubt, the Licensor may also offer the 334 | Licensed Material under separate terms or conditions or stop 335 | distributing the Licensed Material at any time; however, doing so 336 | will not terminate this Public License. 337 | 338 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public 339 | License. 340 | 341 | 342 | Section 7 -- Other Terms and Conditions. 343 | 344 | a. The Licensor shall not be bound by any additional or different 345 | terms or conditions communicated by You unless expressly agreed. 346 | 347 | b. Any arrangements, understandings, or agreements regarding the 348 | Licensed Material not stated herein are separate from and 349 | independent of the terms and conditions of this Public License. 350 | 351 | 352 | Section 8 -- Interpretation. 353 | 354 | a. For the avoidance of doubt, this Public License does not, and 355 | shall not be interpreted to, reduce, limit, restrict, or impose 356 | conditions on any use of the Licensed Material that could lawfully 357 | be made without permission under this Public License. 358 | 359 | b. To the extent possible, if any provision of this Public License is 360 | deemed unenforceable, it shall be automatically reformed to the 361 | minimum extent necessary to make it enforceable. If the provision 362 | cannot be reformed, it shall be severed from this Public License 363 | without affecting the enforceability of the remaining terms and 364 | conditions. 365 | 366 | c. No term or condition of this Public License will be waived and no 367 | failure to comply consented to unless expressly agreed to by the 368 | Licensor. 369 | 370 | d. Nothing in this Public License constitutes or may be interpreted 371 | as a limitation upon, or waiver of, any privileges and immunities 372 | that apply to the Licensor or You, including from the legal 373 | processes of any jurisdiction or authority. 374 | 375 | 376 | ======================================================================= 377 | 378 | Creative Commons is not a party to its public licenses. 379 | Notwithstanding, Creative Commons may elect to apply one of its public 380 | licenses to material it publishes and in those instances will be 381 | considered the "Licensor." Except for the limited purpose of indicating 382 | that material is shared under a Creative Commons public license or as 383 | otherwise permitted by the Creative Commons policies published at 384 | creativecommons.org/policies, Creative Commons does not authorize the 385 | use of the trademark "Creative Commons" or any other trademark or logo 386 | of Creative Commons without its prior written consent including, 387 | without limitation, in connection with any unauthorized modifications 388 | to any of its public licenses or any other arrangements, 389 | understandings, or agreements concerning use of licensed material. For 390 | the avoidance of doubt, this paragraph does not form part of the public 391 | licenses. 392 | 393 | Creative Commons may be contacted at creativecommons.org. 394 | -------------------------------------------------------------------------------- /the-super-tiny-compiler.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | /** 4 | * TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHH HHHHHHHHHEEEEEEEEEEEEEEEEEEEEEE 5 | * T:::::::::::::::::::::TH:::::::H H:::::::HE::::::::::::::::::::E 6 | * T:::::::::::::::::::::TH:::::::H H:::::::HE::::::::::::::::::::E 7 | * T:::::TT:::::::TT:::::THH::::::H H::::::HHEE::::::EEEEEEEEE::::E 8 | * TTTTTT T:::::T TTTTTT H:::::H H:::::H E:::::E EEEEEE 9 | * T:::::T H:::::H H:::::H E:::::E 10 | * T:::::T H::::::HHHHH::::::H E::::::EEEEEEEEEE 11 | * T:::::T H:::::::::::::::::H E:::::::::::::::E 12 | * T:::::T H:::::::::::::::::H E:::::::::::::::E 13 | * T:::::T H::::::HHHHH::::::H E::::::EEEEEEEEEE 14 | * T:::::T H:::::H H:::::H E:::::E 15 | * T:::::T H:::::H H:::::H E:::::E EEEEEE 16 | * TT:::::::TT HH::::::H H::::::HHEE::::::EEEEEEEE:::::E 17 | * T:::::::::T H:::::::H H:::::::HE::::::::::::::::::::E 18 | * T:::::::::T H:::::::H H:::::::HE::::::::::::::::::::E 19 | * TTTTTTTTTTT HHHHHHHHH HHHHHHHHHEEEEEEEEEEEEEEEEEEEEEE 20 | * 21 | * SSSSSSSSSSSSSSS UUUUUUUU UUUUUUUUPPPPPPPPPPPPPPPPP EEEEEEEEEEEEEEEEEEEEEERRRRRRRRRRRRRRRRR 22 | * SS:::::::::::::::SU::::::U U::::::UP::::::::::::::::P E::::::::::::::::::::ER::::::::::::::::R 23 | * S:::::SSSSSS::::::SU::::::U U::::::UP::::::PPPPPP:::::P E::::::::::::::::::::ER::::::RRRRRR:::::R 24 | * S:::::S SSSSSSSUU:::::U U:::::UUPP:::::P P:::::PEE::::::EEEEEEEEE::::ERR:::::R R:::::R 25 | * S:::::S U:::::U U:::::U P::::P P:::::P E:::::E EEEEEE R::::R R:::::R 26 | * S:::::S U:::::U U:::::U P::::P P:::::P E:::::E R::::R R:::::R 27 | * S::::SSSS U:::::U U:::::U P::::PPPPPP:::::P E::::::EEEEEEEEEE R::::RRRRRR:::::R 28 | * SS::::::SSSSS U:::::U U:::::U P:::::::::::::PP E:::::::::::::::E R:::::::::::::RR 29 | * SSS::::::::SS U:::::U U:::::U P::::PPPPPPPPP E:::::::::::::::E R::::RRRRRR:::::R 30 | * SSSSSS::::S U:::::U U:::::U P::::P E::::::EEEEEEEEEE R::::R R:::::R 31 | * S:::::S U:::::U U:::::U P::::P E:::::E R::::R R:::::R 32 | * S:::::S U::::::U U::::::U P::::P E:::::E EEEEEE R::::R R:::::R 33 | * SSSSSSS S:::::S U:::::::UUU:::::::U PP::::::PP EE::::::EEEEEEEE:::::ERR:::::R R:::::R 34 | * S::::::SSSSSS:::::S UU:::::::::::::UU P::::::::P E::::::::::::::::::::ER::::::R R:::::R 35 | * S:::::::::::::::SS UU:::::::::UU P::::::::P E::::::::::::::::::::ER::::::R R:::::R 36 | * SSSSSSSSSSSSSSS UUUUUUUUU PPPPPPPPPP EEEEEEEEEEEEEEEEEEEEEERRRRRRRR RRRRRRR 37 | * 38 | * TTTTTTTTTTTTTTTTTTTTTTTIIIIIIIIIINNNNNNNN NNNNNNNNYYYYYYY YYYYYYY 39 | * T:::::::::::::::::::::TI::::::::IN:::::::N N::::::NY:::::Y Y:::::Y 40 | * T:::::::::::::::::::::TI::::::::IN::::::::N N::::::NY:::::Y Y:::::Y 41 | * T:::::TT:::::::TT:::::TII::::::IIN:::::::::N N::::::NY::::::Y Y::::::Y 42 | * TTTTTT T:::::T TTTTTT I::::I N::::::::::N N::::::NYYY:::::Y Y:::::YYY 43 | * T:::::T I::::I N:::::::::::N N::::::N Y:::::Y Y:::::Y 44 | * T:::::T I::::I N:::::::N::::N N::::::N Y:::::Y:::::Y 45 | * T:::::T I::::I N::::::N N::::N N::::::N Y:::::::::Y 46 | * T:::::T I::::I N::::::N N::::N:::::::N Y:::::::Y 47 | * T:::::T I::::I N::::::N N:::::::::::N Y:::::Y 48 | * T:::::T I::::I N::::::N N::::::::::N Y:::::Y 49 | * T:::::T I::::I N::::::N N:::::::::N Y:::::Y 50 | * TT:::::::TT II::::::IIN::::::N N::::::::N Y:::::Y 51 | * T:::::::::T I::::::::IN::::::N N:::::::N YYYY:::::YYYY 52 | * T:::::::::T I::::::::IN::::::N N::::::N Y:::::::::::Y 53 | * TTTTTTTTTTT IIIIIIIIIINNNNNNNN NNNNNNN YYYYYYYYYYYYY 54 | * 55 | * CCCCCCCCCCCCC OOOOOOOOO MMMMMMMM MMMMMMMMPPPPPPPPPPPPPPPPP IIIIIIIIIILLLLLLLLLLL EEEEEEEEEEEEEEEEEEEEEERRRRRRRRRRRRRRRRR 56 | * CCC::::::::::::C OO:::::::::OO M:::::::M M:::::::MP::::::::::::::::P I::::::::IL:::::::::L E::::::::::::::::::::ER::::::::::::::::R 57 | * CC:::::::::::::::C OO:::::::::::::OO M::::::::M M::::::::MP::::::PPPPPP:::::P I::::::::IL:::::::::L E::::::::::::::::::::ER::::::RRRRRR:::::R 58 | * C:::::CCCCCCCC::::CO:::::::OOO:::::::OM:::::::::M M:::::::::MPP:::::P P:::::PII::::::IILL:::::::LL EE::::::EEEEEEEEE::::ERR:::::R R:::::R 59 | * C:::::C CCCCCCO::::::O O::::::OM::::::::::M M::::::::::M P::::P P:::::P I::::I L:::::L E:::::E EEEEEE R::::R R:::::R 60 | * C:::::C O:::::O O:::::OM:::::::::::M M:::::::::::M P::::P P:::::P I::::I L:::::L E:::::E R::::R R:::::R 61 | * C:::::C O:::::O O:::::OM:::::::M::::M M::::M:::::::M P::::PPPPPP:::::P I::::I L:::::L E::::::EEEEEEEEEE R::::RRRRRR:::::R 62 | * C:::::C O:::::O O:::::OM::::::M M::::M M::::M M::::::M P:::::::::::::PP I::::I L:::::L E:::::::::::::::E R:::::::::::::RR 63 | * C:::::C O:::::O O:::::OM::::::M M::::M::::M M::::::M P::::PPPPPPPPP I::::I L:::::L E:::::::::::::::E R::::RRRRRR:::::R 64 | * C:::::C O:::::O O:::::OM::::::M M:::::::M M::::::M P::::P I::::I L:::::L E::::::EEEEEEEEEE R::::R R:::::R 65 | * C:::::C O:::::O O:::::OM::::::M M:::::M M::::::M P::::P I::::I L:::::L E:::::E R::::R R:::::R 66 | * C:::::C CCCCCCO::::::O O::::::OM::::::M MMMMM M::::::M P::::P I::::I L:::::L LLLLLL E:::::E EEEEEE R::::R R:::::R 67 | * C:::::CCCCCCCC::::CO:::::::OOO:::::::OM::::::M M::::::MPP::::::PP II::::::IILL:::::::LLLLLLLLL:::::LEE::::::EEEEEEEE:::::ERR:::::R R:::::R 68 | * CC:::::::::::::::C OO:::::::::::::OO M::::::M M::::::MP::::::::P I::::::::IL::::::::::::::::::::::LE::::::::::::::::::::ER::::::R R:::::R 69 | * CCC::::::::::::C OO:::::::::OO M::::::M M::::::MP::::::::P I::::::::IL::::::::::::::::::::::LE::::::::::::::::::::ER::::::R R:::::R 70 | * CCCCCCCCCCCCC OOOOOOOOO MMMMMMMM MMMMMMMMPPPPPPPPPP IIIIIIIIIILLLLLLLLLLLLLLLLLLLLLLLLEEEEEEEEEEEEEEEEEEEEEERRRRRRRR RRRRRRR 71 | * 72 | * ======================================================================================================================================================================= 73 | * ======================================================================================================================================================================= 74 | * ======================================================================================================================================================================= 75 | * ======================================================================================================================================================================= 76 | */ 77 | 78 | /** 79 | * Today we're going to write a compiler together. But not just any compiler... A 80 | * super duper teeny tiny compiler! A compiler that is so small that if you 81 | * remove all the comments this file would only be ~200 lines of actual code. 82 | * 83 | * We're going to compile some lisp-like function calls into some C-like 84 | * function calls. 85 | * 86 | * If you are not familiar with one or the other. I'll just give you a quick intro. 87 | * 88 | * If we had two functions `add` and `subtract` they would be written like this: 89 | * 90 | * LISP C 91 | * 92 | * 2 + 2 (add 2 2) add(2, 2) 93 | * 4 - 2 (subtract 4 2) subtract(4, 2) 94 | * 2 + (4 - 2) (add 2 (subtract 4 2)) add(2, subtract(4, 2)) 95 | * 96 | * Easy peezy right? 97 | * 98 | * Well good, because this is exactly what we are going to compile. While this 99 | * is neither a complete LISP or C syntax, it will be enough of the syntax to 100 | * demonstrate many of the major pieces of a modern compiler. 101 | */ 102 | 103 | /** 104 | * Most compilers break down into three primary stages: Parsing, Transformation, 105 | * and Code Generation 106 | * 107 | * 1. *Parsing* is taking raw code and turning it into a more abstract 108 | * representation of the code. 109 | * 110 | * 2. *Transformation* takes this abstract representation and manipulates to do 111 | * whatever the compiler wants it to. 112 | * 113 | * 3. *Code Generation* takes the transformed representation of the code and 114 | * turns it into new code. 115 | */ 116 | 117 | /** 118 | * Parsing 119 | * ------- 120 | * 121 | * Parsing typically gets broken down into two phases: Lexical Analysis and 122 | * Syntactic Analysis. 123 | * 124 | * 1. *Lexical Analysis* takes the raw code and splits it apart into these things 125 | * called tokens by a thing called a tokenizer (or lexer). 126 | * 127 | * Tokens are an array of tiny little objects that describe an isolated piece 128 | * of the syntax. They could be numbers, labels, punctuation, operators, 129 | * whatever. 130 | * 131 | * 2. *Syntactic Analysis* takes the tokens and reformats them into a 132 | * representation that describes each part of the syntax and their relation 133 | * to one another. This is known as an intermediate representation or 134 | * Abstract Syntax Tree. 135 | * 136 | * An Abstract Syntax Tree, or AST for short, is a deeply nested object that 137 | * represents code in a way that is both easy to work with and tells us a lot 138 | * of information. 139 | * 140 | * For the following syntax: 141 | * 142 | * (add 2 (subtract 4 2)) 143 | * 144 | * Tokens might look something like this: 145 | * 146 | * [ 147 | * { type: 'paren', value: '(' }, 148 | * { type: 'name', value: 'add' }, 149 | * { type: 'number', value: '2' }, 150 | * { type: 'paren', value: '(' }, 151 | * { type: 'name', value: 'subtract' }, 152 | * { type: 'number', value: '4' }, 153 | * { type: 'number', value: '2' }, 154 | * { type: 'paren', value: ')' }, 155 | * { type: 'paren', value: ')' }, 156 | * ] 157 | * 158 | * And an Abstract Syntax Tree (AST) might look like this: 159 | * 160 | * { 161 | * type: 'Program', 162 | * body: [{ 163 | * type: 'CallExpression', 164 | * name: 'add', 165 | * params: [{ 166 | * type: 'NumberLiteral', 167 | * value: '2', 168 | * }, { 169 | * type: 'CallExpression', 170 | * name: 'subtract', 171 | * params: [{ 172 | * type: 'NumberLiteral', 173 | * value: '4', 174 | * }, { 175 | * type: 'NumberLiteral', 176 | * value: '2', 177 | * }] 178 | * }] 179 | * }] 180 | * } 181 | */ 182 | 183 | /** 184 | * Transformation 185 | * -------------- 186 | * 187 | * The next type of stage for a compiler is transformation. Again, this just 188 | * takes the AST from the last step and makes changes to it. It can manipulate 189 | * the AST in the same language or it can translate it into an entirely new 190 | * language. 191 | * 192 | * Let’s look at how we would transform an AST. 193 | * 194 | * You might notice that our AST has elements within it that look very similar. 195 | * There are these objects with a type property. Each of these are known as an 196 | * AST Node. These nodes have defined properties on them that describe one 197 | * isolated part of the tree. 198 | * 199 | * We can have a node for a "NumberLiteral": 200 | * 201 | * { 202 | * type: 'NumberLiteral', 203 | * value: '2', 204 | * } 205 | * 206 | * Or maybe a node for a "CallExpression": 207 | * 208 | * { 209 | * type: 'CallExpression', 210 | * name: 'subtract', 211 | * params: [...nested nodes go here...], 212 | * } 213 | * 214 | * When transforming the AST we can manipulate nodes by 215 | * adding/removing/replacing properties, we can add new nodes, remove nodes, or 216 | * we could leave the existing AST alone and create an entirely new one based 217 | * on it. 218 | * 219 | * Since we’re targeting a new language, we’re going to focus on creating an 220 | * entirely new AST that is specific to the target language. 221 | * 222 | * Traversal 223 | * --------- 224 | * 225 | * In order to navigate through all of these nodes, we need to be able to 226 | * traverse through them. This traversal process goes to each node in the AST 227 | * depth-first. 228 | * 229 | * { 230 | * type: 'Program', 231 | * body: [{ 232 | * type: 'CallExpression', 233 | * name: 'add', 234 | * params: [{ 235 | * type: 'NumberLiteral', 236 | * value: '2' 237 | * }, { 238 | * type: 'CallExpression', 239 | * name: 'subtract', 240 | * params: [{ 241 | * type: 'NumberLiteral', 242 | * value: '4' 243 | * }, { 244 | * type: 'NumberLiteral', 245 | * value: '2' 246 | * }] 247 | * }] 248 | * }] 249 | * } 250 | * 251 | * So for the above AST we would go: 252 | * 253 | * 1. Program - Starting at the top level of the AST 254 | * 2. CallExpression (add) - Moving to the first element of the Program's body 255 | * 3. NumberLiteral (2) - Moving to the first element of CallExpression's params 256 | * 4. CallExpression (subtract) - Moving to the second element of CallExpression's params 257 | * 5. NumberLiteral (4) - Moving to the first element of CallExpression's params 258 | * 6. NumberLiteral (2) - Moving to the second element of CallExpression's params 259 | * 260 | * If we were manipulating this AST directly, instead of creating a separate AST, 261 | * we would likely introduce all sorts of abstractions here. But just visiting 262 | * each node in the tree is enough for what we're trying to do. 263 | * 264 | * The reason I use the word "visiting" is because there is this pattern of how 265 | * to represent operations on elements of an object structure. 266 | * 267 | * Visitors 268 | * -------- 269 | * 270 | * The basic idea here is that we are going to create a “visitor” object that 271 | * has methods that will accept different node types. 272 | * 273 | * var visitor = { 274 | * NumberLiteral() {}, 275 | * CallExpression() {}, 276 | * }; 277 | * 278 | * When we traverse our AST, we will call the methods on this visitor whenever we 279 | * "enter" a node of a matching type. 280 | * 281 | * In order to make this useful we will also pass the node and a reference to 282 | * the parent node. 283 | * 284 | * var visitor = { 285 | * NumberLiteral(node, parent) {}, 286 | * CallExpression(node, parent) {}, 287 | * }; 288 | * 289 | * However, there also exists the possibility of calling things on "exit". Imagine 290 | * our tree structure from before in list form: 291 | * 292 | * - Program 293 | * - CallExpression 294 | * - NumberLiteral 295 | * - CallExpression 296 | * - NumberLiteral 297 | * - NumberLiteral 298 | * 299 | * As we traverse down, we're going to reach branches with dead ends. As we 300 | * finish each branch of the tree we "exit" it. So going down the tree we 301 | * "enter" each node, and going back up we "exit". 302 | * 303 | * -> Program (enter) 304 | * -> CallExpression (enter) 305 | * -> Number Literal (enter) 306 | * <- Number Literal (exit) 307 | * -> Call Expression (enter) 308 | * -> Number Literal (enter) 309 | * <- Number Literal (exit) 310 | * -> Number Literal (enter) 311 | * <- Number Literal (exit) 312 | * <- CallExpression (exit) 313 | * <- CallExpression (exit) 314 | * <- Program (exit) 315 | * 316 | * In order to support that, the final form of our visitor will look like this: 317 | * 318 | * var visitor = { 319 | * NumberLiteral: { 320 | * enter(node, parent) {}, 321 | * exit(node, parent) {}, 322 | * } 323 | * }; 324 | */ 325 | 326 | /** 327 | * Code Generation 328 | * --------------- 329 | * 330 | * The final phase of a compiler is code generation. Sometimes compilers will do 331 | * things that overlap with transformation, but for the most part code 332 | * generation just means take our AST and string-ify code back out. 333 | * 334 | * Code generators work several different ways, some compilers will reuse the 335 | * tokens from earlier, others will have created a separate representation of 336 | * the code so that they can print nodes linearly, but from what I can tell most 337 | * will use the same AST we just created, which is what we’re going to focus on. 338 | * 339 | * Effectively our code generator will know how to “print” all of the different 340 | * node types of the AST, and it will recursively call itself to print nested 341 | * nodes until everything is printed into one long string of code. 342 | */ 343 | 344 | /** 345 | * And that's it! That's all the different pieces of a compiler. 346 | * 347 | * Now that isn’t to say every compiler looks exactly like I described here. 348 | * Compilers serve many different purposes, and they might need more steps than 349 | * I have detailed. 350 | * 351 | * But now you should have a general high-level idea of what most compilers look 352 | * like. 353 | * 354 | * Now that I’ve explained all of this, you’re all good to go write your own 355 | * compilers right? 356 | * 357 | * Just kidding, that's what I'm here to help with :P 358 | * 359 | * So let's begin... 360 | */ 361 | 362 | /** 363 | * ============================================================================ 364 | * (/^▽^)/ 365 | * THE TOKENIZER! 366 | * ============================================================================ 367 | */ 368 | 369 | /** 370 | * We're gonna start off with our first phase of parsing, lexical analysis, with 371 | * the tokenizer. 372 | * 373 | * We're just going to take our string of code and break it down into an array 374 | * of tokens. 375 | * 376 | * (add 2 (subtract 4 2)) => [{ type: 'paren', value: '(' }, ...] 377 | */ 378 | 379 | // We start by accepting an input string of code, and we're gonna set up two 380 | // things... 381 | function tokenizer(input) { 382 | 383 | // A `current` variable for tracking our position in the code like a cursor. 384 | let current = 0; 385 | 386 | // And a `tokens` array for pushing our tokens to. 387 | let tokens = []; 388 | 389 | // We start by creating a `while` loop where we are setting up our `current` 390 | // variable to be incremented as much as we want `inside` the loop. 391 | // 392 | // We do this because we may want to increment `current` many times within a 393 | // single loop because our tokens can be any length. 394 | while (current < input.length) { 395 | 396 | // We're also going to store the `current` character in the `input`. 397 | let char = input[current]; 398 | 399 | // The first thing we want to check for is an open parenthesis. This will 400 | // later be used for `CallExpression` but for now we only care about the 401 | // character. 402 | // 403 | // We check to see if we have an open parenthesis: 404 | if (char === '(') { 405 | 406 | // If we do, we push a new token with the type `paren` and set the value 407 | // to an open parenthesis. 408 | tokens.push({ 409 | type: 'paren', 410 | value: '(', 411 | }); 412 | 413 | // Then we increment `current` 414 | current++; 415 | 416 | // And we `continue` onto the next cycle of the loop. 417 | continue; 418 | } 419 | 420 | // Next we're going to check for a closing parenthesis. We do the same exact 421 | // thing as before: Check for a closing parenthesis, add a new token, 422 | // increment `current`, and `continue`. 423 | if (char === ')') { 424 | tokens.push({ 425 | type: 'paren', 426 | value: ')', 427 | }); 428 | current++; 429 | continue; 430 | } 431 | 432 | // Moving on, we're now going to check for whitespace. This is interesting 433 | // because we care that whitespace exists to separate characters, but it 434 | // isn't actually important for us to store as a token. We would only throw 435 | // it out later. 436 | // 437 | // So here we're just going to test for existence and if it does exist we're 438 | // going to just `continue` on. 439 | let WHITESPACE = /\s/; 440 | if (WHITESPACE.test(char)) { 441 | current++; 442 | continue; 443 | } 444 | 445 | // The next type of token is a number. This is different than what we have 446 | // seen before because a number could be any number of characters and we 447 | // want to capture the entire sequence of characters as one token. 448 | // 449 | // (add 123 456) 450 | // ^^^ ^^^ 451 | // Only two separate tokens 452 | // 453 | // So we start this off when we encounter the first number in a sequence. 454 | let NUMBERS = /[0-9]/; 455 | if (NUMBERS.test(char)) { 456 | 457 | // We're going to create a `value` string that we are going to push 458 | // characters to. 459 | let value = ''; 460 | 461 | // Then we're going to loop through each character in the sequence until 462 | // we encounter a character that is not a number, pushing each character 463 | // that is a number to our `value` and incrementing `current` as we go. 464 | while (NUMBERS.test(char)) { 465 | value += char; 466 | char = input[++current]; 467 | } 468 | 469 | // After that we push our `number` token to the `tokens` array. 470 | tokens.push({ type: 'number', value }); 471 | 472 | // And we continue on. 473 | continue; 474 | } 475 | 476 | // We'll also add support for strings in our language which will be any 477 | // text surrounded by double quotes ("). 478 | // 479 | // (concat "foo" "bar") 480 | // ^^^ ^^^ string tokens 481 | // 482 | // We'll start by checking for the opening quote: 483 | if (char === '"') { 484 | // Keep a `value` variable for building up our string token. 485 | let value = ''; 486 | 487 | // We'll skip the opening double quote in our token. 488 | char = input[++current]; 489 | 490 | // Then we'll iterate through each character until we reach another 491 | // double quote. 492 | while (char !== '"') { 493 | value += char; 494 | char = input[++current]; 495 | } 496 | 497 | // Skip the closing double quote. 498 | char = input[++current]; 499 | 500 | // And add our `string` token to the `tokens` array. 501 | tokens.push({ type: 'string', value }); 502 | 503 | continue; 504 | } 505 | 506 | // The last type of token will be a `name` token. This is a sequence of 507 | // letters instead of numbers, that are the names of functions in our lisp 508 | // syntax. 509 | // 510 | // (add 2 4) 511 | // ^^^ 512 | // Name token 513 | // 514 | let LETTERS = /[a-z]/i; 515 | if (LETTERS.test(char)) { 516 | let value = ''; 517 | 518 | // Again we're just going to loop through all the letters pushing them to 519 | // a value. 520 | while (LETTERS.test(char)) { 521 | value += char; 522 | char = input[++current]; 523 | } 524 | 525 | // And pushing that value as a token with the type `name` and continuing. 526 | tokens.push({ type: 'name', value }); 527 | 528 | continue; 529 | } 530 | 531 | // Finally if we have not matched a character by now, we're going to throw 532 | // an error and completely exit. 533 | throw new TypeError('I dont know what this character is: ' + char); 534 | } 535 | 536 | // Then at the end of our `tokenizer` we simply return the tokens array. 537 | return tokens; 538 | } 539 | 540 | /** 541 | * ============================================================================ 542 | * ヽ/❀o ل͜ o\ノ 543 | * THE PARSER!!! 544 | * ============================================================================ 545 | */ 546 | 547 | /** 548 | * For our parser we're going to take our array of tokens and turn it into an 549 | * AST. 550 | * 551 | * [{ type: 'paren', value: '(' }, ...] => { type: 'Program', body: [...] } 552 | */ 553 | 554 | // Okay, so we define a `parser` function that accepts our array of `tokens`. 555 | function parser(tokens) { 556 | 557 | // Again we keep a `current` variable that we will use as a cursor. 558 | let current = 0; 559 | 560 | // But this time we're going to use recursion instead of a `while` loop. So we 561 | // define a `walk` function. 562 | function walk() { 563 | 564 | // Inside the walk function we start by grabbing the `current` token. 565 | let token = tokens[current]; 566 | 567 | // We're going to split each type of token off into a different code path, 568 | // starting off with `number` tokens. 569 | // 570 | // We test to see if we have a `number` token. 571 | if (token.type === 'number') { 572 | 573 | // If we have one, we'll increment `current`. 574 | current++; 575 | 576 | // And we'll return a new AST node called `NumberLiteral` and setting its 577 | // value to the value of our token. 578 | return { 579 | type: 'NumberLiteral', 580 | value: token.value, 581 | }; 582 | } 583 | 584 | // If we have a string we will do the same as number and create a 585 | // `StringLiteral` node. 586 | if (token.type === 'string') { 587 | current++; 588 | 589 | return { 590 | type: 'StringLiteral', 591 | value: token.value, 592 | }; 593 | } 594 | 595 | // Next we're going to look for CallExpressions. We start this off when we 596 | // encounter an open parenthesis. 597 | if ( 598 | token.type === 'paren' && 599 | token.value === '(' 600 | ) { 601 | 602 | // We'll increment `current` to skip the parenthesis since we don't care 603 | // about it in our AST. 604 | token = tokens[++current]; 605 | 606 | // We create a base node with the type `CallExpression`, and we're going 607 | // to set the name as the current token's value since the next token after 608 | // the open parenthesis is the name of the function. 609 | let node = { 610 | type: 'CallExpression', 611 | name: token.value, 612 | params: [], 613 | }; 614 | 615 | // We increment `current` *again* to skip the name token. 616 | token = tokens[++current]; 617 | 618 | // And now we want to loop through each token that will be the `params` of 619 | // our `CallExpression` until we encounter a closing parenthesis. 620 | // 621 | // Now this is where recursion comes in. Instead of trying to parse a 622 | // potentially infinitely nested set of nodes we're going to rely on 623 | // recursion to resolve things. 624 | // 625 | // To explain this, let's take our Lisp code. You can see that the 626 | // parameters of the `add` are a number and a nested `CallExpression` that 627 | // includes its own numbers. 628 | // 629 | // (add 2 (subtract 4 2)) 630 | // 631 | // You'll also notice that in our tokens array we have multiple closing 632 | // parenthesis. 633 | // 634 | // [ 635 | // { type: 'paren', value: '(' }, 636 | // { type: 'name', value: 'add' }, 637 | // { type: 'number', value: '2' }, 638 | // { type: 'paren', value: '(' }, 639 | // { type: 'name', value: 'subtract' }, 640 | // { type: 'number', value: '4' }, 641 | // { type: 'number', value: '2' }, 642 | // { type: 'paren', value: ')' }, <<< Closing parenthesis 643 | // { type: 'paren', value: ')' }, <<< Closing parenthesis 644 | // ] 645 | // 646 | // We're going to rely on the nested `walk` function to increment our 647 | // `current` variable past any nested `CallExpression`. 648 | 649 | // So we create a `while` loop that will continue until it encounters a 650 | // token with a `type` of `'paren'` and a `value` of a closing 651 | // parenthesis. 652 | while ( 653 | (token.type !== 'paren') || 654 | (token.type === 'paren' && token.value !== ')') 655 | ) { 656 | // we'll call the `walk` function which will return a `node` and we'll 657 | // push it into our `node.params`. 658 | node.params.push(walk()); 659 | token = tokens[current]; 660 | } 661 | 662 | // Finally we will increment `current` one last time to skip the closing 663 | // parenthesis. 664 | current++; 665 | 666 | // And return the node. 667 | return node; 668 | } 669 | 670 | // Again, if we haven't recognized the token type by now we're going to 671 | // throw an error. 672 | throw new TypeError(token.type); 673 | } 674 | 675 | // Now, we're going to create our AST which will have a root which is a 676 | // `Program` node. 677 | let ast = { 678 | type: 'Program', 679 | body: [], 680 | }; 681 | 682 | // And we're going to kickstart our `walk` function, pushing nodes to our 683 | // `ast.body` array. 684 | // 685 | // The reason we are doing this inside a loop is because our program can have 686 | // `CallExpression` after one another instead of being nested. 687 | // 688 | // (add 2 2) 689 | // (subtract 4 2) 690 | // 691 | while (current < tokens.length) { 692 | ast.body.push(walk()); 693 | } 694 | 695 | // At the end of our parser we'll return the AST. 696 | return ast; 697 | } 698 | 699 | /** 700 | * ============================================================================ 701 | * ⌒(❀>◞౪◟<❀)⌒ 702 | * THE TRAVERSER!!! 703 | * ============================================================================ 704 | */ 705 | 706 | /** 707 | * So now we have our AST, and we want to be able to visit different nodes with 708 | * a visitor. We need to be able to call the methods on the visitor whenever we 709 | * encounter a node with a matching type. 710 | * 711 | * traverse(ast, { 712 | * Program: { 713 | * enter(node, parent) { 714 | * // ... 715 | * }, 716 | * exit(node, parent) { 717 | * // ... 718 | * }, 719 | * }, 720 | * 721 | * CallExpression: { 722 | * enter(node, parent) { 723 | * // ... 724 | * }, 725 | * exit(node, parent) { 726 | * // ... 727 | * }, 728 | * }, 729 | * 730 | * NumberLiteral: { 731 | * enter(node, parent) { 732 | * // ... 733 | * }, 734 | * exit(node, parent) { 735 | * // ... 736 | * }, 737 | * }, 738 | * }); 739 | */ 740 | 741 | // So we define a traverser function which accepts an AST and a 742 | // visitor. Inside we're going to define two functions... 743 | function traverser(ast, visitor) { 744 | 745 | // A `traverseArray` function that will allow us to iterate over an array and 746 | // call the next function that we will define: `traverseNode`. 747 | function traverseArray(array, parent) { 748 | array.forEach(child => { 749 | traverseNode(child, parent); 750 | }); 751 | } 752 | 753 | // `traverseNode` will accept a `node` and its `parent` node. So that it can 754 | // pass both to our visitor methods. 755 | function traverseNode(node, parent) { 756 | 757 | // We start by testing for the existence of a method on the visitor with a 758 | // matching `type`. 759 | let methods = visitor[node.type]; 760 | 761 | // If there is an `enter` method for this node type we'll call it with the 762 | // `node` and its `parent`. 763 | if (methods && methods.enter) { 764 | methods.enter(node, parent); 765 | } 766 | 767 | // Next we are going to split things up by the current node type. 768 | switch (node.type) { 769 | 770 | // We'll start with our top level `Program`. Since Program nodes have a 771 | // property named body that has an array of nodes, we will call 772 | // `traverseArray` to traverse down into them. 773 | // 774 | // (Remember that `traverseArray` will in turn call `traverseNode` so we 775 | // are causing the tree to be traversed recursively) 776 | case 'Program': 777 | traverseArray(node.body, node); 778 | break; 779 | 780 | // Next we do the same with `CallExpression` and traverse their `params`. 781 | case 'CallExpression': 782 | traverseArray(node.params, node); 783 | break; 784 | 785 | // In the cases of `NumberLiteral` and `StringLiteral` we don't have any 786 | // child nodes to visit, so we'll just break. 787 | case 'NumberLiteral': 788 | case 'StringLiteral': 789 | break; 790 | 791 | // And again, if we haven't recognized the node type then we'll throw an 792 | // error. 793 | default: 794 | throw new TypeError(node.type); 795 | } 796 | 797 | // If there is an `exit` method for this node type we'll call it with the 798 | // `node` and its `parent`. 799 | if (methods && methods.exit) { 800 | methods.exit(node, parent); 801 | } 802 | } 803 | 804 | // Finally we kickstart the traverser by calling `traverseNode` with our ast 805 | // with no `parent` because the top level of the AST doesn't have a parent. 806 | traverseNode(ast, null); 807 | } 808 | 809 | /** 810 | * ============================================================================ 811 | * ⁽(◍˃̵͈̑ᴗ˂̵͈̑)⁽ 812 | * THE TRANSFORMER!!! 813 | * ============================================================================ 814 | */ 815 | 816 | /** 817 | * Next up, the transformer. Our transformer is going to take the AST that we 818 | * have built and pass it to our traverser function with a visitor and will 819 | * create a new ast. 820 | * 821 | * ---------------------------------------------------------------------------- 822 | * Original AST | Transformed AST 823 | * ---------------------------------------------------------------------------- 824 | * { | { 825 | * type: 'Program', | type: 'Program', 826 | * body: [{ | body: [{ 827 | * type: 'CallExpression', | type: 'ExpressionStatement', 828 | * name: 'add', | expression: { 829 | * params: [{ | type: 'CallExpression', 830 | * type: 'NumberLiteral', | callee: { 831 | * value: '2' | type: 'Identifier', 832 | * }, { | name: 'add' 833 | * type: 'CallExpression', | }, 834 | * name: 'subtract', | arguments: [{ 835 | * params: [{ | type: 'NumberLiteral', 836 | * type: 'NumberLiteral', | value: '2' 837 | * value: '4' | }, { 838 | * }, { | type: 'CallExpression', 839 | * type: 'NumberLiteral', | callee: { 840 | * value: '2' | type: 'Identifier', 841 | * }] | name: 'subtract' 842 | * }] | }, 843 | * }] | arguments: [{ 844 | * } | type: 'NumberLiteral', 845 | * | value: '4' 846 | * ---------------------------------- | }, { 847 | * | type: 'NumberLiteral', 848 | * | value: '2' 849 | * | }] 850 | * (sorry the other one is longer.) | } 851 | * | } 852 | * | }] 853 | * | } 854 | * ---------------------------------------------------------------------------- 855 | */ 856 | 857 | // So we have our transformer function which will accept the lisp ast. 858 | function transformer(ast) { 859 | 860 | // We'll create a `newAst` which like our previous AST will have a program 861 | // node. 862 | let newAst = { 863 | type: 'Program', 864 | body: [], 865 | }; 866 | 867 | // Next I'm going to cheat a little and create a bit of a hack. We're going to 868 | // use a property named `context` on our parent nodes that we're going to push 869 | // nodes to their parent's `context`. Normally you would have a better 870 | // abstraction than this, but for our purposes this keeps things simple. 871 | // 872 | // Just take note that the context is a reference *from* the old ast *to* the 873 | // new ast. 874 | ast._context = newAst.body; 875 | 876 | // We'll start by calling the traverser function with our ast and a visitor. 877 | traverser(ast, { 878 | 879 | // The first visitor method accepts any `NumberLiteral` 880 | NumberLiteral: { 881 | // We'll visit them on enter. 882 | enter(node, parent) { 883 | // We'll create a new node also named `NumberLiteral` that we will push to 884 | // the parent context. 885 | parent._context.push({ 886 | type: 'NumberLiteral', 887 | value: node.value, 888 | }); 889 | }, 890 | }, 891 | 892 | // Next we have `StringLiteral` 893 | StringLiteral: { 894 | enter(node, parent) { 895 | parent._context.push({ 896 | type: 'StringLiteral', 897 | value: node.value, 898 | }); 899 | }, 900 | }, 901 | 902 | // Next up, `CallExpression`. 903 | CallExpression: { 904 | enter(node, parent) { 905 | 906 | // We start creating a new node `CallExpression` with a nested 907 | // `Identifier`. 908 | let expression = { 909 | type: 'CallExpression', 910 | callee: { 911 | type: 'Identifier', 912 | name: node.name, 913 | }, 914 | arguments: [], 915 | }; 916 | 917 | // Next we're going to define a new context on the original 918 | // `CallExpression` node that will reference the `expression`'s arguments 919 | // so that we can push arguments. 920 | node._context = expression.arguments; 921 | 922 | // Then we're going to check if the parent node is a `CallExpression`. 923 | // If it is not... 924 | if (parent.type !== 'CallExpression') { 925 | 926 | // We're going to wrap our `CallExpression` node with an 927 | // `ExpressionStatement`. We do this because the top level 928 | // `CallExpression` in JavaScript are actually statements. 929 | expression = { 930 | type: 'ExpressionStatement', 931 | expression: expression, 932 | }; 933 | } 934 | 935 | // Last, we push our (possibly wrapped) `CallExpression` to the `parent`'s 936 | // `context`. 937 | parent._context.push(expression); 938 | }, 939 | } 940 | }); 941 | 942 | // At the end of our transformer function we'll return the new ast that we 943 | // just created. 944 | return newAst; 945 | } 946 | 947 | /** 948 | * ============================================================================ 949 | * ヾ(〃^∇^)ノ♪ 950 | * THE CODE GENERATOR!!!! 951 | * ============================================================================ 952 | */ 953 | 954 | /** 955 | * Now let's move onto our last phase: The Code Generator. 956 | * 957 | * Our code generator is going to recursively call itself to print each node in 958 | * the tree into one giant string. 959 | */ 960 | 961 | function codeGenerator(node) { 962 | 963 | // We'll break things down by the `type` of the `node`. 964 | switch (node.type) { 965 | 966 | // If we have a `Program` node. We will map through each node in the `body` 967 | // and run them through the code generator and join them with a newline. 968 | case 'Program': 969 | return node.body.map(codeGenerator) 970 | .join('\n'); 971 | 972 | // For `ExpressionStatement` we'll call the code generator on the nested 973 | // expression and we'll add a semicolon... 974 | case 'ExpressionStatement': 975 | return ( 976 | codeGenerator(node.expression) + 977 | ';' // << (...because we like to code the *correct* way) 978 | ); 979 | 980 | // For `CallExpression` we will print the `callee`, add an open 981 | // parenthesis, we'll map through each node in the `arguments` array and run 982 | // them through the code generator, joining them with a comma, and then 983 | // we'll add a closing parenthesis. 984 | case 'CallExpression': 985 | return ( 986 | codeGenerator(node.callee) + 987 | '(' + 988 | node.arguments.map(codeGenerator) 989 | .join(', ') + 990 | ')' 991 | ); 992 | 993 | // For `Identifier` we'll just return the `node`'s name. 994 | case 'Identifier': 995 | return node.name; 996 | 997 | // For `NumberLiteral` we'll just return the `node`'s value. 998 | case 'NumberLiteral': 999 | return node.value; 1000 | 1001 | // For `StringLiteral` we'll add quotations around the `node`'s value. 1002 | case 'StringLiteral': 1003 | return '"' + node.value + '"'; 1004 | 1005 | // And if we haven't recognized the node, we'll throw an error. 1006 | default: 1007 | throw new TypeError(node.type); 1008 | } 1009 | } 1010 | 1011 | /** 1012 | * ============================================================================ 1013 | * (۶* ‘ヮ’)۶” 1014 | * !!!!!!!!THE COMPILER!!!!!!!! 1015 | * ============================================================================ 1016 | */ 1017 | 1018 | /** 1019 | * FINALLY! We'll create our `compiler` function. Here we will link together 1020 | * every part of the pipeline. 1021 | * 1022 | * 1. input => tokenizer => tokens 1023 | * 2. tokens => parser => ast 1024 | * 3. ast => transformer => newAst 1025 | * 4. newAst => generator => output 1026 | */ 1027 | 1028 | function compiler(input) { 1029 | let tokens = tokenizer(input); 1030 | let ast = parser(tokens); 1031 | let newAst = transformer(ast); 1032 | let output = codeGenerator(newAst); 1033 | 1034 | // and simply return the output! 1035 | return output; 1036 | } 1037 | 1038 | /** 1039 | * ============================================================================ 1040 | * (๑˃̵ᴗ˂̵)و 1041 | * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!YOU MADE IT!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 1042 | * ============================================================================ 1043 | */ 1044 | 1045 | // Now I'm just exporting everything... 1046 | module.exports = { 1047 | tokenizer, 1048 | parser, 1049 | traverser, 1050 | transformer, 1051 | codeGenerator, 1052 | compiler, 1053 | }; 1054 | --------------------------------------------------------------------------------