├── .babelrc
├── index.js
├── lib
    ├── generation.js
    ├── parse.js
    ├── tokenize.js
    └── transform.js
├── test
    └── index.js
├── gulpfile.js
├── package.json
├── demo
    ├── tokenizer.html
    ├── parser.html
    └── transformer.html
└── README.md


/.babelrc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "presets": [
 3 |     ["latest", {
 4 |       "es2015": {
 5 |         "modules": false
 6 |       }
 7 |     }]
 8 |   ],
 9 |   "plugins": ["external-helpers"]
10 | }


--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
 1 | const {tokenize} = require('../lib/tokenize');
 2 | const {parse} = require('../lib/parse');
 3 | const {transform} = require('../lib/transform');
 4 | const {generation} = require('../lib/generation');
 5 | 
 6 | module.exports = {
 7 |   tokenize, 
 8 |   parse,
 9 |   transform,
10 |   generation
11 | }


--------------------------------------------------------------------------------
/lib/generation.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @author Jan
 3 |  * @description 代码生成器
 4 |  */
 5 | function generation(node) {
 6 |   switch(node.type) {
 7 |     case 'program':
 8 |       return node.body.map(generation).join('\n');
 9 |     case 'arithExpression':
10 |       return '(' + node.arguments.map(generation).join(` ${node.name} `) + ')';
11 |     case 'number':
12 |       return node.value;
13 |     default:
14 |       throw new Error('节点类型无法被识别');
15 |   }
16 | }
17 | 
18 | exports.generation = generation;


--------------------------------------------------------------------------------
/test/index.js:
--------------------------------------------------------------------------------
 1 | const {tokenize} = require('../lib/tokenize');
 2 | const {parse} = require('../lib/parse');
 3 | const {transform} = require('../lib/transform');
 4 | const {generation} = require('../lib/generation');
 5 | 
 6 | const tokens = tokenize('(/ (- 72 10) (+ 4.5 2))');
 7 | 
 8 | console.log(tokens);
 9 | 
10 | const ast = parse(tokens, null, 2);
11 | 
12 | console.log(JSON.stringify(ast, null, 2));
13 | 
14 | const newAst = transform(ast);
15 | 
16 | console.log(JSON.stringify(newAst, null, 2));
17 | 
18 | console.log(generation(newAst));
19 | 


--------------------------------------------------------------------------------
/gulpfile.js:
--------------------------------------------------------------------------------
 1 | const gulp = require('gulp');
 2 | const rollup = require('rollup');
 3 | const commonjs = require('rollup-plugin-commonjs');
 4 | const resolve = require('rollup-plugin-node-resolve');
 5 | const babel = require('rollup-plugin-babel');
 6 | 
 7 | gulp.task('build', async function() {
 8 |   const index = await rollup.rollup({
 9 |     entry: `./index.js`,
10 |     plugins: [
11 |       commonjs(),
12 |       resolve(),
13 |       babel({
14 |         exclude: 'node_modules/**' // 只编译我们的源代码
15 |       })
16 |     ]
17 |   })
18 | 
19 |   await index.write({
20 |     dest: `./dist/tiny-compiler.js`,
21 |     format: 'umd',
22 |     moduleName: 'compiler',
23 |   })
24 | });


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "tiny-compiler",
 3 |   "version": "1.0.0",
 4 |   "description": "a tiny compiler",
 5 |   "main": "index.js",
 6 |   "scripts": {
 7 |     "test": "echo \"Error: no test specified\" && exit 1"
 8 |   },
 9 |   "repository": {
10 |     "type": "git",
11 |     "url": "git+https://github.com/jan-wong/tiny-compiler.git"
12 |   },
13 |   "keywords": [
14 |     "compiler",
15 |     "javascript"
16 |   ],
17 |   "author": "wolf",
18 |   "license": "ISC",
19 |   "bugs": {
20 |     "url": "https://github.com/jan-wong/tiny-compiler/issues"
21 |   },
22 |   "homepage": "https://github.com/jan-wong/tiny-compiler#readme",
23 |   "devDependencies": {
24 |     "esprima": "^4.0.0"
25 |   }
26 | }
27 | 


--------------------------------------------------------------------------------
/demo/tokenizer.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |   <meta charset="UTF-8">
 5 |   <title>tokenizer</title>
 6 | </head>
 7 | <style>
 8 |   td {
 9 |     padding: 0 20px;
10 |   }
11 | </style>
12 | <body>
13 | <ul>
14 |   <li><a href="tokenizer.html">词法分析器</a></li>
15 |   <li><a href="parser.html">语法分析器</a></li>
16 |   <li><a href="transformer.html">AST转换器</a></li>
17 | </ul>
18 | <table border="1px solid #ccc">
19 |   <tr>
20 |     <td><p>(add 2 (subtract 4 2))</p></td>
21 |     <td>=></td>
22 |     <td><pre></pre></td>
23 |   </tr>
24 | </table>
25 | <script src="../src/tokenizer.js"></script>
26 | <script>
27 | const str = '(add 2 (subtract 4 2))';
28 | const tokens = tokenizer(str);
29 | const json = JSON.stringify(tokens, null, 2);
30 | document.querySelector('pre').innerHTML = json;
31 | </script>
32 | </body>
33 | </html>


--------------------------------------------------------------------------------
/demo/parser.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |   <meta charset="UTF-8">
 5 |   <title>parser</title>
 6 | </head>
 7 | <style>
 8 |   td {
 9 |     padding: 0 20px;
10 |   }
11 | </style>
12 | <body>
13 | <ul>
14 |   <li><a href="tokenizer.html">词法分析器</a></li>
15 |   <li><a href="parser.html">语法分析器</a></li>
16 |   <li><a href="transformer.html">AST转换器</a></li>
17 | </ul>
18 | <table border="1px solid #ccc">
19 |   <tr>
20 |     <td><pre id="tokens"></pre></td>
21 |     <td>=></td>
22 |     <td><pre id="parser"></pre></td>
23 |   </tr>
24 | </table>
25 | <script src="../src/tokenizer.js"></script>
26 | <script src="../src/parser.js"></script>
27 | <script>
28 | const str = '(add 2 (subtract 4 2))';
29 | const tokens = tokenizer(str);
30 | const tokensJSON = JSON.stringify(tokens, null, 2);
31 | document.getElementById('tokens').innerHTML = tokensJSON;
32 | const ast = parser(tokens);
33 | const astJSON = JSON.stringify(ast, null, 2);
34 | document.getElementById('parser').innerHTML = astJSON;
35 | </script>
36 | </body>
37 | </html>


--------------------------------------------------------------------------------
/lib/parse.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @author Jan
 3 |  * @description 语法分析器
 4 |  */
 5 | function parse(tokens) {
 6 |   let i = 0; // tokens的下标
 7 | 
 8 |   function walk() {
 9 |     let token = tokens[i];
10 |     
11 |     // 数值类型
12 |     if (token.type === 'number') {
13 |       return {
14 |         type: 'number',
15 |         value: token.value
16 |       }
17 |     }
18 | 
19 |     // 处理'('  入口
20 |     if (token.type === 'parren' && token.value === '(') {
21 |       token = tokens[++i];
22 | 
23 |       // 在LISP语言中 运算操作符实际上是一个函数，数值为其中的参数
24 |       let node = {
25 |         type: 'arithCall',
26 |         name: token.value,
27 |         params: []
28 |       }
29 | 
30 |       token = tokens[++i];
31 |       while(token && token.value !== ')') {
32 |         node.params.push(walk());
33 |         token = tokens[++i];
34 |       }
35 | 
36 |       // 出口
37 |       if (token.type === 'parren' && token.value === ')') return node;
38 |     }
39 |   }
40 | 
41 |   const ast = {
42 |     type: 'program',
43 |     body: []
44 |   }
45 | 
46 |   ast.body.push(walk());
47 | 
48 |   return ast;
49 | }
50 | 
51 | exports.parse = parse;


--------------------------------------------------------------------------------
/demo/transformer.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |   <meta charset="UTF-8">
 5 |   <title>parser</title>
 6 | </head>
 7 | <style>
 8 |   td {
 9 |     padding: 0 20px;
10 |   }
11 | </style>
12 | <body>
13 | <ul>
14 |   <li><a href="tokenizer.html">词法分析器</a></li>
15 |   <li><a href="parser.html">语法分析器</a></li>
16 |   <li><a href="transformer.html">AST转换器</a></li>
17 | </ul>
18 | <table border="1px solid #ccc">
19 |   <tr>
20 |     <td><pre id="parser"></pre></td>
21 |     <td>=></td>
22 |     <td><pre id="transformer"></pre></td>
23 |   </tr>
24 | </table>
25 | <script src="../src/tokenizer.js"></script>
26 | <script src="../src/parser.js"></script>
27 | <script src="../src/traverser.js"></script>
28 | <script src="../src/transformer.js"></script>
29 | <script>
30 | const str = '(add 2 (subtract 4 2))';
31 | const tokens = tokenizer(str);
32 | const ast = parser(tokens);
33 | const astJSON = JSON.stringify(ast, null, 2);
34 | document.getElementById('parser').innerHTML = astJSON;
35 | const targetAST = transformer(ast);
36 | const targetASTJSON = JSON.stringify(targetAST, null, 2);
37 | document.getElementById('transformer').innerHTML = targetASTJSON;
38 | </script>
39 | </body>
40 | </html>


--------------------------------------------------------------------------------
/lib/tokenize.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @author Jan
 3 |  * @description 词法分析器
 4 |  * @param {String} input 
 5 |  */
 6 | function tokenize(input) {
 7 |   const tokens = [];  //包含tokens的数组
 8 |   let i = 0;  //input字符串序列的下标
 9 | 
10 |   while(i < input.length) {
11 |     let char = input[i];
12 | 
13 |     // 处理空白符
14 |     const whiteReg = /\s/;
15 |     if (whiteReg.test(char)) {
16 |       i++;
17 |       continue;
18 |     }
19 | 
20 |     // 处理小括号
21 |     if (char === '(' || char === ')') {
22 |       tokens.push({type: 'parren', value: input[i]});
23 |       i++;
24 |       continue;
25 |     }
26 | 
27 |     // 处理算数运算符
28 |     const arithReg = /[\+\-\*\/%]/;
29 |     if (arithReg.test(char)) {
30 |       tokens.push({type: 'arith', value: char});
31 |       i++;
32 |       continue;
33 |     }
34 | 
35 |     // 处理number
36 |     const numberReg = /[0-9\.]/;
37 |     if (numberReg.test(char)) {
38 |       let numberical = '';
39 | 
40 |       while(numberReg.test(char)) {
41 |         numberical += char;
42 |         i++;
43 |         char = input[i];
44 |       }
45 | 
46 |       tokens.push({type: 'number', value: numberical});
47 |       continue;
48 |     }
49 | 
50 |     throw new Error('不能被识别的token类型');
51 |   }
52 | 
53 |   return tokens;
54 | }
55 | 
56 | exports.tokenize = tokenize;


--------------------------------------------------------------------------------
/lib/transform.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @author Jan
 3 |  * @description 转换器
 4 |  */
 5 | 
 6 | // 访问者，包含处理对应ast节点类型的函数的对象
 7 | const visitor = {
 8 |   number(node, parent) {
 9 |     const childNode = {
10 |       type: 'number',
11 |       value: node.value
12 |     }
13 |     parent.push(childNode);
14 |   },
15 |   arithCall(node, parent) {
16 |     const expression = {
17 |       type: 'arithExpression',
18 |       name: node.name,
19 |       arguments: []
20 |     }
21 |     parent.push(expression);
22 | 
23 |     return expression.arguments;
24 |   }
25 | }
26 | 
27 | // 深度优先遍历ast，并构建newAst
28 | function DFSwalk(ast, newAst, visitor) {
29 | 
30 |   function walkNodeArray(nodeArr, newNodeArr) {
31 |     nodeArr.forEach(childNode => {
32 |       walkNode(childNode, newNodeArr);
33 |     });
34 |   }
35 | 
36 |   function walkNode(node, newParentArr) {
37 |     const type = node.type;
38 | 
39 |     if (visitor[type]) {
40 |       newParentArr = visitor[type](node, newParentArr) || newParentArr;
41 |     }
42 | 
43 |     switch(type) {
44 |       case 'program':
45 |       walkNodeArray(node.body, newParentArr);
46 |         break;
47 |       case 'number':
48 |         break;
49 |       case 'arithCall':
50 |         walkNodeArray(node.params, newParentArr);
51 |         break;
52 |       default:
53 |         throw new Error('不能被识别的类型');
54 |     }
55 |   }
56 | 
57 |   walkNode(ast, newAst.body);
58 | }
59 | 
60 | // 转换器
61 | function transform(ast) {
62 |   const newAst = {
63 |     type: 'program',
64 |     body: []
65 |   }
66 | 
67 |   DFSwalk(ast, newAst, visitor);
68 | 
69 |   return newAst;
70 | }
71 | 
72 | exports.transform = transform;


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # tiny-compiler (一个简单的编译器)
  2 | 
  3 | ## 前言
  4 | 
  5 | 数值求值的栗子：
  6 | ```
  7 |         LISP                 Javascript
  8 | 
  9 |       (+ 3 2)                 (3 + 2)
 10 |       (- 7 1)                 (7 - 1)
 11 |  (/ (- 72 10) (+ 4.5 2))     ((72 - 10) / ( 4.5 + 2))
 12 | ```
 13 | 
 14 | 如果某一天我们脑洞大开，想使用LISP的语法形式去编写Javascript，或许我们会使用到编译原理，将LISP语法形式的代码编译成Javascript。就像我们编写ES6标准的代码，通过Babel编译成浏览器都支持的ES5标准一样，这非常有趣。那么编译原理对于Javascript开发者有用吗？其实编译原理在很多开源项目上都进行了使用，比如Bebel、TypeScript、CoffeScript、Flow等等，可见掌握一些编译原理对于理解它们是很有用的一件事情。如上是一个数值求值的例子，接下来将演示将如上LISP语法编译为Javascript语法的整个编译过程。源码见[https://github.com/jan-wong/tiny-compiler](https://github.com/jan-wong/tiny-compiler)
 15 | 
 16 | ## 整体实现
 17 | 
 18 | 一般而言，编译原理会被划分为三个阶段：解析(解析又分为词法分析(tokenize)、语法分析(parse))、转换(transform)、代码生成(code generation)
 19 | 
 20 | ### 词法分析器(tokenizer)
 21 | 
 22 | 词法分析器的作用是将输入(input)的字符串序列划分成一个个标记对象(token)组成的数组，以便进行语法分析，标记对象包含标记类型和标记的字面值。标记是什么？标记是源代码的最小单位，一般用空格分开。编程语言的标记种类是有限的，比如有数据类型(字符串、数值、数组等)、操作符(算数操作符、比较操作符、逻辑操作符等)、分隔符(逗号、分号、括号等)、保留字、标识符等等。对'(/ (- 72 10) (+ 4.5 2))'进行词法分析可以得到如下, 源码见tokenize.js：
 23 | 
 24 | ```javascript
 25 |                                                     [ { type: 'parren', value: '(' },
 26 |                                                       { type: 'arith', value: '/' },
 27 |                                                       { type: 'parren', value: '(' },
 28 |                                                       { type: 'arith', value: '-' },
 29 |                                                       { type: 'number', value: '72' },
 30 |                              词法分析(tokenize)        { type: 'number', value: '10' },
 31 | '(/ (- 72 10) (+ 4.5 2))'   =================>>>      { type: 'parren', value: ')' },
 32 |                                                       { type: 'parren', value: '(' },
 33 |                                                       { type: 'arith', value: '+' },
 34 |                                                       { type: 'number', value: '4.5' },
 35 |                                                       { type: 'number', value: '2' },
 36 |                                                       { type: 'parren', value: ')' },
 37 |                                                       { type: 'parren', value: ')' } ]
 38 | ```
 39 | 
 40 | ### 语法分析器(parser)
 41 | 
 42 | 语法分析器的作用是将输入标记数组(tokens)重新格式化，让标记与标记之间形成关联，最后形成程序、语句或者表达式。我们会用一棵树来描述这种形成相互关系的程序，这棵树唤做抽象语法树(AST)。继续进行语法分析，过程如下，源码在parse.js：
 43 | 
 44 | ```javascript
 45 |                                                                   {"type": "program",
 46 |                                                                   "body": [
 47 |                                                                     {
 48 |                                                                       "type": "arithCall",
 49 |                                                                       "name": "/",
 50 | [ { type: 'parren', value: '(' },                                     "params": [
 51 | { type: 'arith', value: '/' },                                          {
 52 | { type: 'parren', value: '(' },                                           "type": "arithCall",
 53 | { type: 'arith', value: '-' },                                            "name": "-",
 54 | { type: 'number', value: '72' },                                          "params": [
 55 | { type: 'number', value: '10' },        语法分析(parse)                      { "type": "number", "value": "72" },
 56 | { type: 'parren', value: ')' },      ==================>>>                  { "type": "number", "value": "10" }
 57 | { type: 'parren', value: '(' },                                           ]
 58 | { type: 'arith', value: '+' },                                          },
 59 | { type: 'number', value: '4.5' },                                       {
 60 | { type: 'number', value: '2' },                                           "type": "arithCall",
 61 | { type: 'parren', value: ')' },                                           "name": "+",
 62 | { type: 'parren', value: ')' } ]                                          "params": [
 63 |                                                                             { "type": "number", "value": "4.5" },
 64 |                                                                             { "type": "number", "value": "2" }
 65 |                                                                           ]
 66 |                                                                         }
 67 |                                                                       ]
 68 |                                                                     }
 69 |                                                                   ]}
 70 | ```
 71 | 
 72 | ### 转换器(transform)
 73 | 转换器的作用是将符合LISP语法的语法树(AST)转换为符合Javascript语法的语法树。转换后如下：
 74 | 
 75 | ```javascript
 76 | {
 77 |   "type": "program",
 78 |   "body": [
 79 |     {
 80 |       "type": "arithExpression",
 81 |       "name": "/",
 82 |       "arguments": [
 83 |         {
 84 |           "type": "arithExpression",
 85 |           "name": "-",
 86 |           "arguments": [ 
 87 |             { "type": "number", "value": "72" },
 88 |             { "type": "number", "value": "10" }
 89 |           ]
 90 |         },
 91 |         {
 92 |           "type": "arithExpression",
 93 |           "name": "+",
 94 |           "arguments": [
 95 |             { "type": "number", "value": "4.5" },
 96 |             { "type": "number", "value": "2" }
 97 |           ]
 98 |         }
 99 |       ]
100 |     }
101 |   ]
102 | }
103 | ```
104 | ### 代码生成器(generator)
105 | 
106 | 代码生成器的作用是将newAst生成javascript语法形式的代码。具体实现参考generation.js，结果如下：
107 | 
108 | ```javscript
109 | ((72 - 10) / (4.5 + 2))
110 | ```
111 | 
112 | ### 参考
113 | 
114 | [https://github.com/thejameskyle/the-super-tiny-compiler](https://github.com/thejameskyle/the-super-tiny-compiler)
115 | 
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 


--------------------------------------------------------------------------------