├── .idea
├── .gitignore
├── misc.xml
├── vcs.xml
├── the-super-tiny-compiler-cn.iml
└── modules.xml
├── my-super-tiny-compiler
├── TOKEN.json
├── AST-TYPE.json
├── generator.js
├── transform.js
├── parser.js
└── lexer.js
├── my-super-tiny-compiler.js
├── test.json
├── README.md
├── test.js
├── optimize-version.js
├── yarn.lock
├── LICENSE
├── super-tiny-compiler-chinese.js
└── super-tiny-compiler.js
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /workspace.xml
3 |
--------------------------------------------------------------------------------
/my-super-tiny-compiler/TOKEN.json:
--------------------------------------------------------------------------------
1 | {
2 | "NUMBER": "number",
3 | "PAREN": "paren",
4 | "NAME": "name",
5 | "SPACE": "space",
6 | "EOF": "EOF"
7 | }
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/my-super-tiny-compiler/AST-TYPE.json:
--------------------------------------------------------------------------------
1 | {
2 | "CALL_EXPRESSION": "CallExpression",
3 | "NUMBER_LITERAL": "NumberLiteral",
4 | "PROGRAM": "Program",
5 | "STATEMENT": "ExpressionStatement",
6 | "ID": "Identifier"
7 | }
--------------------------------------------------------------------------------
/.idea/the-super-tiny-compiler-cn.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/my-super-tiny-compiler.js:
--------------------------------------------------------------------------------
1 | let lexer = require("./my-super-tiny-compiler/lexer").lexer;
2 | let parser = require("./my-super-tiny-compiler/parser").parser;
3 | let transform = require("./my-super-tiny-compiler/transform").transform;
4 | let codeGenerator = require("./my-super-tiny-compiler/generator").generator;
5 | function compile(input) {
6 | try {
7 | let tokens = lexer(input);
8 | console.log(tokens);
9 | let ast = parser(tokens);
10 | console.log(JSON.stringify(ast));
11 | let newAst = transform(ast);
12 | console.log(JSON.stringify(newAst));
13 | console.log(codeGenerator(newAst))
14 | } catch (e) {
15 | console.log(e)
16 | }
17 | }
18 |
19 | let input = '(add 2 (subtract (add 2 (add 3 4)) 2))';
20 | compile(input);
21 |
--------------------------------------------------------------------------------
/my-super-tiny-compiler/generator.js:
--------------------------------------------------------------------------------
1 | let AST_TYPE = require('./AST-TYPE');
2 |
3 | function codeGenerator(node) {
4 | switch (node.type) {
5 | case AST_TYPE.PROGRAM:
6 | return node.body.map(codeGenerator).join('\n');
7 | case AST_TYPE.STATEMENT:
8 | return (codeGenerator(node.expression) + ';');
9 | case AST_TYPE.CALL_EXPRESSION:
10 | return (codeGenerator(node.callee) + '(' + node.arguments.map(codeGenerator).join(',') + ')');
11 | case AST_TYPE.NUMBER_LITERAL:
12 | return node.value;
13 | case AST_TYPE.ID:
14 | return node.name;
15 | default:
16 | throw new TypeError(node.type);
17 | }
18 | }
19 |
20 | exports.generator = function (node) {
21 | //c代码生成
22 | return codeGenerator(node);
23 | };
--------------------------------------------------------------------------------
/test.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "Program",
3 | "body": [
4 | {
5 | "type": "ExpressionStatement",
6 | "expression": {
7 | "type": "CallExpression",
8 | "callee": {
9 | "type": "Identifier",
10 | "name": "add"
11 | },
12 | "arguments": [
13 | {
14 | "type": "NumberLiteral",
15 | "value": "2"
16 | },
17 | {
18 | "type": "CallExpression",
19 | "callee": {
20 | "type": "Identifier",
21 | "name": "subtract"
22 | },
23 | "arguments": [
24 | {
25 | "type": "NumberLiteral",
26 | "value": "4"
27 | },
28 | {
29 | "type": "NumberLiteral",
30 | "value": "2"
31 | }
32 | ]
33 | }
34 | ]
35 | }
36 | }
37 | ]
38 | }
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | ***Welcome to The Super Tiny Compiler!***
4 |
5 | 这是一个超级简单的编译器的例子,包含了现代编译器的几个主要部分,用简单易读的 JavaScript 编写。
6 |
7 | 把这个读完将会有助于你了解*大多数*编译器从前端到后端是如何工作的。
8 |
9 | ### [想直接看代码?点这里](super-tiny-compiler-chinese.js)
10 |
11 | ### 或者... [看看演讲](https://www.youtube.com/watch?v=Tar4WgAfMr4)
12 |
13 | ---
14 |
15 | ### 为啥我要关心这个?
16 |
17 | 确实,大多数人在日常工作中没有必要了解编译器都是如何工作的。但是,编译器无处不在,你使用的很多
18 | 工具的底层原理都是从编译器那儿来的。
19 |
20 | ### 但是编译器太高大上了!
21 |
22 | 额,确实。但这是我们(写编译器的人)的过错,我们把一些本应该很通俗易懂的事情弄得太可怕了,
23 | 让很多人都认为编译器这种东西是可望而不可即的,甚至只有最书呆子的书呆子才能理解。
24 |
25 | ### 好吧,所以我该从哪儿开始?
26 |
27 | 太棒了!直接去看 [super-tiny-compiler-chinese.js](super-tiny-compiler-chinese.js) 这个文件吧!
28 |
29 | ### Tests
30 |
31 | 直接运行 `node test.js`
32 |
33 | ---
34 |
35 | [](http://creativecommons.org/licenses/by/4.0/)
36 |
37 | ## PS
38 |
39 | - 该项目更改自 https://github.com/starkwang/the-super-tiny-compiler-cn
40 | - 该项目里面有多个版本的 super-tiny-compiler,包含原始版本,第三方中文翻译版本,第三方优化版本以及本人修改版。
41 | - 原始版本:[super-tiny-compiler.js](super-tiny-compiler.js) (地址:https://github.com/ayqy/the-super-tiny-compiler)
42 | - 第三方翻译版:[super-tiny-compiler-chinese.js](super-tiny-compiler-chinese.js) (地址:https://github.com/starkwang/the-super-tiny-compiler-cn)
43 | - 第三方优化版:[optimize-version.js](optimize-version.js)(该版本已经找不到来源了,如果原作者看到,请告知。)
44 | - 本人修改版:[my-super-tiny-compiler.js](my-super-tiny-compiler.js)
45 |
46 |
--------------------------------------------------------------------------------
/my-super-tiny-compiler/transform.js:
--------------------------------------------------------------------------------
1 | let AST_TYPE = require('./AST-TYPE');
2 |
3 | function traverse(lispASTNode, parentNode) {
4 | //遍历一遍lispAST,并打印出来
5 | if (lispASTNode.type === AST_TYPE.NUMBER_LITERAL) {
6 | return {
7 | type: AST_TYPE.NUMBER_LITERAL,
8 | value: lispASTNode.value
9 | }
10 | }
11 | let lispArray;
12 | let cNode;
13 | let cArray;
14 | if (lispASTNode.type === AST_TYPE.PROGRAM) {
15 | lispArray = lispASTNode.body;
16 | cNode = {
17 | type: AST_TYPE.PROGRAM,
18 | body: []
19 | };
20 | cArray = cNode.body;
21 | } else if (lispASTNode.type === AST_TYPE.CALL_EXPRESSION) {
22 | lispArray = lispASTNode.params;
23 | cNode = {
24 | type: AST_TYPE.CALL_EXPRESSION,
25 | callee: {
26 | type: AST_TYPE.ID,
27 | name: lispASTNode.name,
28 | },
29 | arguments: [],
30 | };
31 | cArray = cNode.arguments;
32 | //需要判断父节点是否为函数调用
33 | if (parentNode === null || parentNode.type !== AST_TYPE.CALL_EXPRESSION) {
34 | //如果不是,则表明当前节点为顶层函数调用
35 | cNode = {
36 | type: AST_TYPE.STATEMENT,
37 | expression: cNode
38 | };
39 | cArray = cNode.expression.arguments;
40 | }
41 | } else {
42 | throw new TypeError("Unrecognized ASTNode: " + lispASTNode);
43 | }
44 | for (let index = 0; index < lispArray.length; index++) {
45 | cArray.push(traverse(lispArray[index],lispASTNode));
46 | }
47 | return cNode;
48 | }
49 |
50 | exports.transform = function (lispAST) {
51 | //将lisp的AST转换为c的AST
52 | return traverse(lispAST, null);
53 | };
--------------------------------------------------------------------------------
/my-super-tiny-compiler/parser.js:
--------------------------------------------------------------------------------
1 | const TOKEN = require('./TOKEN.json');
2 | const ASTType = require('./AST-TYPE.json');
3 | let curIndex = 0;//指向待处理的Token
4 | let tokens;//token列表
5 | let id = 0;
6 | function getNextToken() {
7 | return tokens[curIndex++];
8 | }
9 |
10 | function lookAhead() {
11 | return tokens[curIndex];
12 | }
13 |
14 | function matchNextToken(tokenType) {
15 | return tokens[curIndex].type === tokenType
16 | }
17 |
18 | function generateASTNode() {
19 | //生成AST节点
20 | let token = getNextToken();
21 | //判断是否为数字
22 | if (token.type === TOKEN.NUMBER) {
23 | //token为数字
24 | return {
25 | id:id++,
26 | type: ASTType.NUMBER_LITERAL,
27 | value: token.value
28 | }
29 | }
30 | //判断是否为方法调用
31 | if (token.type === TOKEN.PAREN && token.value === '(') {
32 | //匹配左括号,继续匹配标识符
33 | token = getNextToken();
34 | if (token.type === TOKEN.NAME) {
35 | //匹配标识符
36 | let astNode = {
37 | id:id++,
38 | type: ASTType.CALL_EXPRESSION,
39 | name: token.value,
40 | params: []
41 | };
42 | token = lookAhead();
43 | //接下去使用DFS匹配列表参数,直到遇到右圆括号
44 | while (token.value !== ')') {
45 | astNode.params.push(generateASTNode());
46 | token = lookAhead();
47 | }
48 | getNextToken();//读取右圆括号
49 | return astNode;
50 | }
51 |
52 | }
53 | throw new TypeError("Unrecognized token: " + token);
54 | }
55 |
56 | exports.parser = function (outerTokens) {
57 | //语法分析器
58 | tokens = outerTokens;
59 | let ast = {
60 | id:id++,
61 | type: ASTType.PROGRAM,
62 | body: []
63 | };
64 |
65 | while (!matchNextToken(TOKEN.EOF)) {
66 | //一直读取token直到文件末尾
67 | ast.body.push(generateASTNode());
68 | }
69 | return ast;
70 | };
--------------------------------------------------------------------------------
/my-super-tiny-compiler/lexer.js:
--------------------------------------------------------------------------------
1 | let tokens = [];//存储最终的token
2 | let curIndex = 0;//input的指针,永远指向待读取的字符
3 | const TOKEN = require('./TOKEN.json');
4 | let input;
5 |
6 | function getNextChar() {
7 | //读取下一个字符
8 | return input[curIndex++];
9 | }
10 |
11 | function lookAhead() {
12 | //查看下一个字符,不读取
13 | return input[curIndex];
14 | }
15 |
16 | function testNext(char) {
17 | //测试下一个字符
18 | if (char === "(" || char === ")") {
19 | //处理括号
20 | return TOKEN.PAREN;
21 | }
22 | let number = /\d/;
23 | let space = /\s/;
24 | let name = /[A-Za-z]/;
25 | if (number.test(char)) {
26 | return TOKEN.NUMBER;
27 | }
28 | if (space.test(char)) {
29 | return TOKEN.SPACE
30 | }
31 | if (name.test(char)) {
32 | return TOKEN.NAME;
33 | }
34 | }
35 |
36 | function isNumber(char) {
37 | return testNext(char) === TOKEN.NUMBER;
38 | }
39 |
40 | function isName(char) {
41 | return testNext(char) === TOKEN.NAME;
42 | }
43 |
44 | exports.lexer = function (outerInput) {
45 | input = outerInput;
46 | //词法分析器
47 | let pushToken = (type, value) => {
48 | tokens.push({
49 | type: type,
50 | value: value
51 | })
52 | };
53 | while (curIndex < input.length) {
54 | let char = getNextChar(input);
55 | switch (testNext(char)) {
56 | case TOKEN.NAME:
57 | //循环识别name
58 | let nameValue = char;
59 | while (isName(lookAhead(input))) {
60 | //下一个是name,读取
61 | nameValue += getNextChar(input)
62 | }
63 | pushToken(TOKEN.NAME, nameValue);
64 | break;
65 | case TOKEN.PAREN:
66 | pushToken(TOKEN.PAREN, char);
67 | break;
68 | case TOKEN.NUMBER:
69 | let numberValue = char;
70 | while (isNumber(lookAhead(input))) {
71 | //下一个是number,读取
72 | numberValue += getNextChar(input)
73 | }
74 | pushToken(TOKEN.NUMBER, numberValue);
75 | break;
76 | case TOKEN.SPACE:
77 | //空格不处理
78 | break;
79 | default:
80 | throw new TypeError("Unrecognized token: " + char);
81 | }
82 |
83 | }
84 | pushToken(TOKEN.EOF, TOKEN.EOF);
85 | return tokens;
86 | };
87 |
88 |
--------------------------------------------------------------------------------
/test.js:
--------------------------------------------------------------------------------
1 | var superTinyCompiler = require('./super-tiny-compiler-chinese');
2 | // var assert = require('assert');
3 |
4 | var tokenizer = superTinyCompiler.tokenizer;
5 | var parser = superTinyCompiler.parser;
6 | var transformer = superTinyCompiler.transformer;
7 | var codeGenerator = superTinyCompiler.codeGenerator;
8 | var compiler = superTinyCompiler.compiler;
9 |
10 | var input = '(add (subtract 4 2) 2)';
11 | var output = 'add(2, subtract(4, 2));';
12 |
13 | /*var tokens = [
14 | {type: 'paren', value: '('},
15 | {type: 'name', value: 'add'},
16 | {type: 'number', value: '2'},
17 | {type: 'paren', value: '('},
18 | {type: 'name', value: 'subtract'},
19 | {type: 'number', value: '4'},
20 | {type: 'number', value: '2'},
21 | {type: 'paren', value: ')'},
22 | {type: 'paren', value: ')'}
23 | ];
24 |
25 | var ast = {
26 | type: 'Program',
27 | body: [
28 | {
29 | type: 'CallExpression',
30 | name: 'add',
31 | params: [
32 | {
33 | type: 'NumberLiteral',
34 | value: '2'
35 | },
36 | {
37 | type: 'CallExpression',
38 | name: 'subtract',
39 | params: [
40 | {
41 | type: 'NumberLiteral',
42 | value: '4'
43 | },
44 | {
45 | type: 'NumberLiteral',
46 | value: '2'
47 | }]
48 | }]
49 | }]
50 | };
51 | */
52 | var newAst = {
53 | type: 'Program',
54 | body: [
55 | {
56 | type: 'ExpressionStatement',
57 | expression: {
58 | type: 'CallExpression',
59 | callee: {
60 | type: 'Identifier',
61 | name: 'add'
62 | },
63 | arguments: [
64 | {
65 | type: 'NumberLiteral',
66 | value: '2'
67 | },
68 | {
69 | type: 'CallExpression',
70 | callee: {
71 | type: 'Identifier',
72 | name: 'subtract'
73 | },
74 | arguments: [
75 | {
76 | type: 'NumberLiteral',
77 | value: '4'
78 | }, {
79 | type: 'NumberLiteral',
80 | value: '2'
81 | }]
82 | }]
83 | }
84 | }]
85 | };
86 |
87 | /*assert.deepStrictEqual(tokenizer(input), tokens, 'Tokenizer should turn `input` string into `tokens` array');
88 | assert.deepStrictEqual(parser(tokens), ast, 'Parser should turn `tokens` array into `ast`');
89 | assert.deepStrictEqual(transformer(ast), newAst, 'Transformer should turn `ast` into a `newAst`');
90 | assert.deepStrictEqual(codeGenerator(newAst), output, 'Code Generator should turn `newAst` into `output` string');
91 | assert.deepStrictEqual(compiler(input), output, 'Compiler should turn `input` into `output`');*/
92 | let tokens = tokenizer(input);
93 | console.log(tokens);
94 | let ast = parser(tokens);
95 | console.log(ast);
96 | let newAst = transformer(ast);
97 | console.log(newAst);
98 | console.log('All Passed!');
99 |
--------------------------------------------------------------------------------
/optimize-version.js:
--------------------------------------------------------------------------------
1 | //TODO 非本人编写!!!
2 |
3 | // 接受代码字符串input
4 | function tokenizer(input) {
5 | // 剩余待处理字符
6 | let rest = input;
7 | // 输出结果集合,存放词法单元
8 | let tokens = [];
9 | // 各词素对应的正则表达式
10 | const REGEX = {
11 | PAREN: /^\(|^\)/,
12 | WHITESPACE: /^\s+/,
13 | NUMBERS: /^\d+/,
14 | STRING: /^"([^"]+)?"/,
15 | NAME: /^[a-z]+/i
16 | };
17 |
18 | // 遍历字符串,挑出词法单元
19 | while (rest.length > 0) {
20 | let type, value;
21 | // 匹配结果,本次匹配消费掉的串长度
22 | let matched, span;
23 |
24 | // 匹配左括号、右括号
25 | if (matched = rest.match(REGEX.PAREN)) {
26 | type = 'paren';
27 | }
28 | // 跳过空白字符
29 | else if (matched = rest.match(REGEX.WHITESPACE)) {
30 | rest = rest.slice(matched[0].length);
31 | continue;
32 | }
33 | // 匹配数值
34 | else if (matched = rest.match(REGEX.NUMBERS)) {
35 | type = 'number';
36 | }
37 | // 匹配形如"abc"的字符串
38 | else if (matched = rest.match(REGEX.STRING)) {
39 | type = 'string';
40 | value = matched[1] || '';
41 | span = matched[0].length;
42 | }
43 | // 匹配函数名,要求只含大小写字母
44 | else if (matched = rest.match(REGEX.NAME)) {
45 | type = 'name';
46 | }
47 | // 无法识别的字符,报错
48 | else {
49 | throw new TypeError('Unexpected character: ' + rest);
50 | }
51 |
52 | value = value || matched[0];
53 | tokens.push({type, value});
54 | rest = rest.slice(span || matched[0].length);
55 | }
56 |
57 | return tokens;
58 | }
59 |
60 | function parser(tokens) {
61 | // 当前正在处理的token索引
62 | let current = 0;
63 |
64 | // 递归遍历(因为函数调用允许嵌套),把token转成AST节点
65 | function walk() {
66 | let token = tokens[current];
67 |
68 | // 数值
69 | if (token.type === 'number') {
70 | current++;
71 |
72 | // 生成一个AST节点,表示数值字面量
73 | return {
74 | type: 'NumberLiteral',
75 | value: token.value,
76 | };
77 | }
78 |
79 | // 字符串
80 | if (token.type === 'string') {
81 | current++;
82 |
83 | return {
84 | type: 'StringLiteral',
85 | value: token.value,
86 | };
87 | }
88 |
89 | // 函数调用
90 | if (
91 | token.type === 'paren' &&
92 | token.value === '('
93 | ) {
94 | // 丢掉左括号,取下一个token作为函数名
95 | token = tokens[++current];
96 |
97 | let node = {
98 | type: 'CallExpression',
99 | name: token.value,
100 | params: [],
101 | };
102 |
103 | // 看下一个token
104 | token = tokens[++current];
105 |
106 | // 右括号之前的所有token解析完都是参数
107 | while (
108 | (token.type !== 'paren') ||
109 | (token.type === 'paren' && token.value !== ')')
110 | ) {
111 | node.params.push(walk());
112 | token = tokens[current];
113 | }
114 | // 吃掉右括号
115 | current++;
116 |
117 | return node;
118 | }
119 |
120 | // 无法识别的token,报错
121 | throw new TypeError(token.type);
122 | }
123 |
124 | // AST的根节点
125 | let ast = {
126 | type: 'Program',
127 | body: [],
128 | };
129 | // 填充ast.body,允许多条语句,所以放循环里
130 | while (current < tokens.length) {
131 | ast.body.push(walk());
132 | }
133 |
134 | return ast;
135 | }
136 |
137 | function traverser(ast, visitor) {
138 | // 遍历AST节点数组
139 | function traverseArray(array, parent) {
140 | array.forEach(child => {
141 | traverseNode(child, parent);
142 | });
143 | }
144 |
145 | function traverseNode(node, parent) {
146 | // 从visitor取出对应的一组方法
147 | let methods = visitor[node.type];
148 | // 通知visitor我们正在访问node
149 | if (methods && methods.enter) {
150 | methods.enter(node, parent);
151 | }
152 |
153 | switch (node.type) {
154 | // 根节点
155 | case 'Program':
156 | traverseArray(node.body, node);
157 | break;
158 | // 函数调用
159 | case 'CallExpression':
160 | traverseArray(node.params, node);
161 | break;
162 | // 数值和字符串,没孩子,不用处理
163 | case 'NumberLiteral':
164 | case 'StringLiteral':
165 | break;
166 |
167 | // 无法识别的AST节点,报错
168 | default:
169 | throw new TypeError(node.type);
170 | }
171 |
172 | // 通知visitor我们要离开node了
173 | if (methods && methods.exit) {
174 | methods.exit(node, parent);
175 | }
176 | }
177 |
178 | // 开始遍历
179 | traverseNode(ast, null);
180 | }
181 |
182 | // 输入Lisp AST,输出C AST
183 | function transformer(ast) {
184 | // 新AST的根节点
185 | let newAst = {
186 | type: 'Program',
187 | body: [],
188 | };
189 |
190 | // 用额外的数据结构维持新旧AST的联系
191 | let stack = [newAst.body];
192 | function peak() {
193 | return stack[stack.length - 1];
194 | }
195 |
196 | // 创建vistor,开始遍历
197 | traverser(ast,
198 | {
199 | // 数值和字符串,直接原样插入新AST
200 | NumberLiteral: {
201 | enter(node, parent) {
202 | let newASTHost = peak();
203 | newASTHost.push({
204 | type: 'NumberLiteral',
205 | value: node.value,
206 | });
207 | }
208 | },
209 | StringLiteral: {
210 | enter(node, parent) {
211 | let newASTHost = peak();
212 | newASTHost.push({
213 | type: 'StringLiteral',
214 | value: node.value,
215 | });
216 | }
217 | },
218 | // 函数调用
219 | CallExpression: {
220 | enter(node, parent) {
221 | let newASTHost = peak();
222 | // 创建不同的AST节点
223 | let expression = {
224 | type: 'CallExpression',
225 | callee: {
226 | type: 'Identifier',
227 | name: node.name,
228 | },
229 | arguments: [],
230 | };
231 |
232 | // 函数调用可以有孩子,建立节点对应关系,供子节点使用
233 | stack.push(expression.arguments);
234 |
235 | // 顶层函数调用算是语句,包装成特殊的AST节点
236 | if (parent.type !== 'CallExpression') {
237 | expression = {
238 | type: 'ExpressionStatement',
239 | expression: expression,
240 | };
241 | }
242 |
243 | newASTHost.push(expression);
244 | },
245 | leave(node, parent) {
246 | // 参数收集结束,回到上一层
247 | stack.pop();
248 | }
249 | }
250 | });
251 |
252 | return newAst;
253 | }
254 |
255 | // 递归遍历新AST,输出代码字符串
256 | function codeGenerator(node) {
257 | switch (node.type) {
258 | // 根节点,把body里的所有内容都生成一遍,按行输出
259 | case 'Program':
260 | return node.body.map(codeGenerator).join('\n');
261 |
262 | // 表达式语句,处理其表达式内容,并添上分号
263 | case 'ExpressionStatement':
264 | return (
265 | codeGenerator(node.expression) + ';'
266 | );
267 |
268 | // 函数调用,添上括号,参数用逗号分隔
269 | case 'CallExpression':
270 | return (
271 | codeGenerator(node.callee) +
272 | '(' +
273 | node.arguments.map(codeGenerator).join(', ') +
274 | ')'
275 | );
276 |
277 | // 标识符,数值,原样输出
278 | case 'Identifier':
279 | return node.name;
280 | case 'NumberLiteral':
281 | return node.value;
282 |
283 | // 字符串,用双引号包起来再输出
284 | case 'StringLiteral':
285 | return '"' + node.value + '"';
286 |
287 | // 无法识别的新AST节点,报错
288 | default:
289 | throw new TypeError(node.type);
290 | }
291 | }
292 |
293 | function compiler(input) {
294 | let tokens = tokenizer(input);
295 | let ast = parser(tokens);
296 | let newAst = transformer(ast);
297 | let output = codeGenerator(newAst);
298 |
299 | return output;
300 | }
301 |
302 | // test
303 | // const input = '(add 2 (subtract 4 2))';
304 | // let output = compiler(input);
305 | // console.log(output);
306 |
307 | module.exports = {
308 | tokenizer,
309 | parser,
310 | traverser,
311 | transformer,
312 | codeGenerator,
313 | compiler,
314 | };
315 |
--------------------------------------------------------------------------------
/yarn.lock:
--------------------------------------------------------------------------------
1 | # THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.
2 | # yarn lockfile v1
3 |
4 |
5 | assert@^2.0.0:
6 | version "2.0.0"
7 | resolved "https://registry.yarnpkg.com/assert/-/assert-2.0.0.tgz#95fc1c616d48713510680f2eaf2d10dd22e02d32"
8 | integrity sha512-se5Cd+js9dXJnu6Ag2JFc00t+HmHOen+8Q+L7O9zI0PqQXr20uk2J0XQqMxZEeo5U50o8Nvmmx7dZrl+Ufr35A==
9 | dependencies:
10 | es6-object-assign "^1.1.0"
11 | is-nan "^1.2.1"
12 | object-is "^1.0.1"
13 | util "^0.12.0"
14 |
15 | define-properties@^1.1.2, define-properties@^1.1.3:
16 | version "1.1.3"
17 | resolved "https://registry.yarnpkg.com/define-properties/-/define-properties-1.1.3.tgz#cf88da6cbee26fe6db7094f61d870cbd84cee9f1"
18 | integrity sha512-3MqfYKj2lLzdMSf8ZIZE/V+Zuy+BgD6f164e8K2w7dgnpKArBDerGYpM46IYYcjnkdPNMjPk9A6VFB8+3SKlXQ==
19 | dependencies:
20 | object-keys "^1.0.12"
21 |
22 | es-abstract@^1.17.0-next.1:
23 | version "1.17.4"
24 | resolved "https://registry.yarnpkg.com/es-abstract/-/es-abstract-1.17.4.tgz#e3aedf19706b20e7c2594c35fc0d57605a79e184"
25 | integrity sha512-Ae3um/gb8F0mui/jPL+QiqmglkUsaQf7FwBEHYIFkztkneosu9imhqHpBzQ3h1vit8t5iQ74t6PEVvphBZiuiQ==
26 | dependencies:
27 | es-to-primitive "^1.2.1"
28 | function-bind "^1.1.1"
29 | has "^1.0.3"
30 | has-symbols "^1.0.1"
31 | is-callable "^1.1.5"
32 | is-regex "^1.0.5"
33 | object-inspect "^1.7.0"
34 | object-keys "^1.1.1"
35 | object.assign "^4.1.0"
36 | string.prototype.trimleft "^2.1.1"
37 | string.prototype.trimright "^2.1.1"
38 |
39 | es-to-primitive@^1.2.1:
40 | version "1.2.1"
41 | resolved "https://registry.yarnpkg.com/es-to-primitive/-/es-to-primitive-1.2.1.tgz#e55cd4c9cdc188bcefb03b366c736323fc5c898a"
42 | integrity sha512-QCOllgZJtaUo9miYBcLChTUaHNjJF3PYs1VidD7AwiEj1kYxKeQTctLAezAOH5ZKRH0g2IgPn6KwB4IT8iRpvA==
43 | dependencies:
44 | is-callable "^1.1.4"
45 | is-date-object "^1.0.1"
46 | is-symbol "^1.0.2"
47 |
48 | es6-object-assign@^1.1.0:
49 | version "1.1.0"
50 | resolved "https://registry.yarnpkg.com/es6-object-assign/-/es6-object-assign-1.1.0.tgz#c2c3582656247c39ea107cb1e6652b6f9f24523c"
51 | integrity sha1-wsNYJlYkfDnqEHyx5mUrb58kUjw=
52 |
53 | fs@^0.0.1-security:
54 | version "0.0.1-security"
55 | resolved "https://registry.yarnpkg.com/fs/-/fs-0.0.1-security.tgz#8a7bd37186b6dddf3813f23858b57ecaaf5e41d4"
56 | integrity sha1-invTcYa23d84E/I4WLV+yq9eQdQ=
57 |
58 | function-bind@^1.1.1:
59 | version "1.1.1"
60 | resolved "https://registry.yarnpkg.com/function-bind/-/function-bind-1.1.1.tgz#a56899d3ea3c9bab874bb9773b7c5ede92f4895d"
61 | integrity sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==
62 |
63 | has-symbols@^1.0.0, has-symbols@^1.0.1:
64 | version "1.0.1"
65 | resolved "https://registry.yarnpkg.com/has-symbols/-/has-symbols-1.0.1.tgz#9f5214758a44196c406d9bd76cebf81ec2dd31e8"
66 | integrity sha512-PLcsoqu++dmEIZB+6totNFKq/7Do+Z0u4oT0zKOJNl3lYK6vGwwu2hjHs+68OEZbTjiUE9bgOABXbP/GvrS0Kg==
67 |
68 | has@^1.0.3:
69 | version "1.0.3"
70 | resolved "https://registry.yarnpkg.com/has/-/has-1.0.3.tgz#722d7cbfc1f6aa8241f16dd814e011e1f41e8796"
71 | integrity sha512-f2dvO0VU6Oej7RkWJGrehjbzMAjFp5/VKPp5tTpWIV4JHHZK1/BxbFRtf/siA2SWTe09caDmVtYYzWEIbBS4zw==
72 | dependencies:
73 | function-bind "^1.1.1"
74 |
75 | inherits@^2.0.3:
76 | version "2.0.4"
77 | resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c"
78 | integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==
79 |
80 | is-arguments@^1.0.4:
81 | version "1.0.4"
82 | resolved "https://registry.yarnpkg.com/is-arguments/-/is-arguments-1.0.4.tgz#3faf966c7cba0ff437fb31f6250082fcf0448cf3"
83 | integrity sha512-xPh0Rmt8NE65sNzvyUmWgI1tz3mKq74lGA0mL8LYZcoIzKOzDh6HmrYm3d18k60nHerC8A9Km8kYu87zfSFnLA==
84 |
85 | is-callable@^1.1.4, is-callable@^1.1.5:
86 | version "1.1.5"
87 | resolved "https://registry.yarnpkg.com/is-callable/-/is-callable-1.1.5.tgz#f7e46b596890456db74e7f6e976cb3273d06faab"
88 | integrity sha512-ESKv5sMCJB2jnHTWZ3O5itG+O128Hsus4K4Qh1h2/cgn2vbgnLSVqfV46AeJA9D5EeeLa9w81KUXMtn34zhX+Q==
89 |
90 | is-date-object@^1.0.1:
91 | version "1.0.2"
92 | resolved "https://registry.yarnpkg.com/is-date-object/-/is-date-object-1.0.2.tgz#bda736f2cd8fd06d32844e7743bfa7494c3bfd7e"
93 | integrity sha512-USlDT524woQ08aoZFzh3/Z6ch9Y/EWXEHQ/AaRN0SkKq4t2Jw2R2339tSXmwuVoY7LLlBCbOIlx2myP/L5zk0g==
94 |
95 | is-generator-function@^1.0.7:
96 | version "1.0.7"
97 | resolved "https://registry.yarnpkg.com/is-generator-function/-/is-generator-function-1.0.7.tgz#d2132e529bb0000a7f80794d4bdf5cd5e5813522"
98 | integrity sha512-YZc5EwyO4f2kWCax7oegfuSr9mFz1ZvieNYBEjmukLxgXfBUbxAWGVF7GZf0zidYtoBl3WvC07YK0wT76a+Rtw==
99 |
100 | is-nan@^1.2.1:
101 | version "1.3.0"
102 | resolved "https://registry.yarnpkg.com/is-nan/-/is-nan-1.3.0.tgz#85d1f5482f7051c2019f5673ccebdb06f3b0db03"
103 | integrity sha512-z7bbREymOqt2CCaZVly8aC4ML3Xhfi0ekuOnjO2L8vKdl+CttdVoGZQhd4adMFAsxQ5VeRVwORs4tU8RH+HFtQ==
104 | dependencies:
105 | define-properties "^1.1.3"
106 |
107 | is-regex@^1.0.5:
108 | version "1.0.5"
109 | resolved "https://registry.yarnpkg.com/is-regex/-/is-regex-1.0.5.tgz#39d589a358bf18967f726967120b8fc1aed74eae"
110 | integrity sha512-vlKW17SNq44owv5AQR3Cq0bQPEb8+kF3UKZ2fiZNOWtztYE5i0CzCZxFDwO58qAOWtxdBRVO/V5Qin1wjCqFYQ==
111 | dependencies:
112 | has "^1.0.3"
113 |
114 | is-symbol@^1.0.2:
115 | version "1.0.3"
116 | resolved "https://registry.yarnpkg.com/is-symbol/-/is-symbol-1.0.3.tgz#38e1014b9e6329be0de9d24a414fd7441ec61937"
117 | integrity sha512-OwijhaRSgqvhm/0ZdAcXNZt9lYdKFpcRDT5ULUuYXPoT794UNOdU+gpT6Rzo7b4V2HUl/op6GqY894AZwv9faQ==
118 | dependencies:
119 | has-symbols "^1.0.1"
120 |
121 | lodash@^4.17.15:
122 | version "4.17.15"
123 | resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.15.tgz#b447f6670a0455bbfeedd11392eff330ea097548"
124 | integrity sha512-8xOcRHvCjnocdS5cpwXQXVzmmh5e5+saE2QGoeQmbKmRS6J3VQppPOIt0MnmE+4xlZoumy0GPG0D0MVIQbNA1A==
125 |
126 | object-inspect@^1.7.0:
127 | version "1.7.0"
128 | resolved "https://registry.yarnpkg.com/object-inspect/-/object-inspect-1.7.0.tgz#f4f6bd181ad77f006b5ece60bd0b6f398ff74a67"
129 | integrity sha512-a7pEHdh1xKIAgTySUGgLMx/xwDZskN1Ud6egYYN3EdRW4ZMPNEDUTF+hwy2LUC+Bl+SyLXANnwz/jyh/qutKUw==
130 |
131 | object-is@^1.0.1:
132 | version "1.0.2"
133 | resolved "https://registry.yarnpkg.com/object-is/-/object-is-1.0.2.tgz#6b80eb84fe451498f65007982f035a5b445edec4"
134 | integrity sha512-Epah+btZd5wrrfjkJZq1AOB9O6OxUQto45hzFd7lXGrpHPGE0W1k+426yrZV+k6NJOzLNNW/nVsmZdIWsAqoOQ==
135 |
136 | object-keys@^1.0.11, object-keys@^1.0.12, object-keys@^1.1.1:
137 | version "1.1.1"
138 | resolved "https://registry.yarnpkg.com/object-keys/-/object-keys-1.1.1.tgz#1c47f272df277f3b1daf061677d9c82e2322c60e"
139 | integrity sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==
140 |
141 | object.assign@^4.1.0:
142 | version "4.1.0"
143 | resolved "https://registry.yarnpkg.com/object.assign/-/object.assign-4.1.0.tgz#968bf1100d7956bb3ca086f006f846b3bc4008da"
144 | integrity sha512-exHJeq6kBKj58mqGyTQ9DFvrZC/eR6OwxzoM9YRoGBqrXYonaFyGiFMuc9VZrXf7DarreEwMpurG3dd+CNyW5w==
145 | dependencies:
146 | define-properties "^1.1.2"
147 | function-bind "^1.1.1"
148 | has-symbols "^1.0.0"
149 | object-keys "^1.0.11"
150 |
151 | object.entries@^1.1.0:
152 | version "1.1.1"
153 | resolved "https://registry.yarnpkg.com/object.entries/-/object.entries-1.1.1.tgz#ee1cf04153de02bb093fec33683900f57ce5399b"
154 | integrity sha512-ilqR7BgdyZetJutmDPfXCDffGa0/Yzl2ivVNpbx/g4UeWrCdRnFDUBrKJGLhGieRHDATnyZXWBeCb29k9CJysQ==
155 | dependencies:
156 | define-properties "^1.1.3"
157 | es-abstract "^1.17.0-next.1"
158 | function-bind "^1.1.1"
159 | has "^1.0.3"
160 |
161 | safe-buffer@^5.1.2:
162 | version "5.2.0"
163 | resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.0.tgz#b74daec49b1148f88c64b68d49b1e815c1f2f519"
164 | integrity sha512-fZEwUGbVl7kouZs1jCdMLdt95hdIv0ZeHg6L7qPeciMZhZ+/gdesW4wgTARkrFWEpspjEATAzUGPG8N2jJiwbg==
165 |
166 | string.prototype.trimleft@^2.1.1:
167 | version "2.1.1"
168 | resolved "https://registry.yarnpkg.com/string.prototype.trimleft/-/string.prototype.trimleft-2.1.1.tgz#9bdb8ac6abd6d602b17a4ed321870d2f8dcefc74"
169 | integrity sha512-iu2AGd3PuP5Rp7x2kEZCrB2Nf41ehzh+goo8TV7z8/XDBbsvc6HQIlUl9RjkZ4oyrW1XM5UwlGl1oVEaDjg6Ag==
170 | dependencies:
171 | define-properties "^1.1.3"
172 | function-bind "^1.1.1"
173 |
174 | string.prototype.trimright@^2.1.1:
175 | version "2.1.1"
176 | resolved "https://registry.yarnpkg.com/string.prototype.trimright/-/string.prototype.trimright-2.1.1.tgz#440314b15996c866ce8a0341894d45186200c5d9"
177 | integrity sha512-qFvWL3/+QIgZXVmJBfpHmxLB7xsUXz6HsUmP8+5dRaC3Q7oKUv9Vo6aMCRZC1smrtyECFsIT30PqBJ1gTjAs+g==
178 | dependencies:
179 | define-properties "^1.1.3"
180 | function-bind "^1.1.1"
181 |
182 | util@^0.12.0:
183 | version "0.12.1"
184 | resolved "https://registry.yarnpkg.com/util/-/util-0.12.1.tgz#f908e7b633e7396c764e694dd14e716256ce8ade"
185 | integrity sha512-MREAtYOp+GTt9/+kwf00IYoHZyjM8VU4aVrkzUlejyqaIjd2GztVl5V9hGXKlvBKE3gENn/FMfHE5v6hElXGcQ==
186 | dependencies:
187 | inherits "^2.0.3"
188 | is-arguments "^1.0.4"
189 | is-generator-function "^1.0.7"
190 | object.entries "^1.1.0"
191 | safe-buffer "^5.1.2"
192 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Creative Commons Attribution 4.0 International
2 |
3 | =======================================================================
4 |
5 | Creative Commons Corporation ("Creative Commons") is not a law firm and
6 | does not provide legal services or legal advice. Distribution of
7 | Creative Commons public licenses does not create a lawyer-client or
8 | other relationship. Creative Commons makes its licenses and related
9 | information available on an "as-is" basis. Creative Commons gives no
10 | warranties regarding its licenses, any material licensed under their
11 | terms and conditions, or any related information. Creative Commons
12 | disclaims all liability for damages resulting from their use to the
13 | fullest extent possible.
14 |
15 | Using Creative Commons Public Licenses
16 |
17 | Creative Commons public licenses provide a standard set of terms and
18 | conditions that creators and other rights holders may use to share
19 | original works of authorship and other material subject to copyright
20 | and certain other rights specified in the public license below. The
21 | following considerations are for informational purposes only, are not
22 | exhaustive, and do not form part of our licenses.
23 |
24 | Considerations for licensors: Our public licenses are
25 | intended for use by those authorized to give the public
26 | permission to use material in ways otherwise restricted by
27 | copyright and certain other rights. Our licenses are
28 | irrevocable. Licensors should read and understand the terms
29 | and conditions of the license they choose before applying it.
30 | Licensors should also secure all rights necessary before
31 | applying our licenses so that the public can reuse the
32 | material as expected. Licensors should clearly mark any
33 | material not subject to the license. This includes other CC-
34 | licensed material, or material used under an exception or
35 | limitation to copyright. More considerations for licensors:
36 | wiki.creativecommons.org/Considerations_for_licensors
37 |
38 | Considerations for the public: By using one of our public
39 | licenses, a licensor grants the public permission to use the
40 | licensed material under specified terms and conditions. If
41 | the licensor's permission is not necessary for any reason--for
42 | example, because of any applicable exception or limitation to
43 | copyright--then that use is not regulated by the license. Our
44 | licenses grant only permissions under copyright and certain
45 | other rights that a licensor has authority to grant. Use of
46 | the licensed material may still be restricted for other
47 | reasons, including because others have copyright or other
48 | rights in the material. A licensor may make special requests,
49 | such as asking that all changes be marked or described.
50 | Although not required by our licenses, you are encouraged to
51 | respect those requests where reasonable. More_considerations
52 | for the public:
53 | wiki.creativecommons.org/Considerations_for_licensees
54 |
55 | =======================================================================
56 |
57 | Creative Commons Attribution 4.0 International Public License
58 |
59 | By exercising the Licensed Rights (defined below), You accept and agree
60 | to be bound by the terms and conditions of this Creative Commons
61 | Attribution 4.0 International Public License ("Public License"). To the
62 | extent this Public License may be interpreted as a contract, You are
63 | granted the Licensed Rights in consideration of Your acceptance of
64 | these terms and conditions, and the Licensor grants You such rights in
65 | consideration of benefits the Licensor receives from making the
66 | Licensed Material available under these terms and conditions.
67 |
68 |
69 | Section 1 -- Definitions.
70 |
71 | a. Adapted Material means material subject to Copyright and Similar
72 | Rights that is derived from or based upon the Licensed Material
73 | and in which the Licensed Material is translated, altered,
74 | arranged, transformed, or otherwise modified in a manner requiring
75 | permission under the Copyright and Similar Rights held by the
76 | Licensor. For purposes of this Public License, where the Licensed
77 | Material is a musical work, performance, or sound recording,
78 | Adapted Material is always produced where the Licensed Material is
79 | synched in timed relation with a moving image.
80 |
81 | b. Adapter's License means the license You apply to Your Copyright
82 | and Similar Rights in Your contributions to Adapted Material in
83 | accordance with the terms and conditions of this Public License.
84 |
85 | c. Copyright and Similar Rights means copyright and/or similar rights
86 | closely related to copyright including, without limitation,
87 | performance, broadcast, sound recording, and Sui Generis Database
88 | Rights, without regard to how the rights are labeled or
89 | categorized. For purposes of this Public License, the rights
90 | specified in Section 2(b)(1)-(2) are not Copyright and Similar
91 | Rights.
92 |
93 | d. Effective Technological Measures means those measures that, in the
94 | absence of proper authority, may not be circumvented under laws
95 | fulfilling obligations under Article 11 of the WIPO Copyright
96 | Treaty adopted on December 20, 1996, and/or similar international
97 | agreements.
98 |
99 | e. Exceptions and Limitations means fair use, fair dealing, and/or
100 | any other exception or limitation to Copyright and Similar Rights
101 | that applies to Your use of the Licensed Material.
102 |
103 | f. Licensed Material means the artistic or literary work, database,
104 | or other material to which the Licensor applied this Public
105 | License.
106 |
107 | g. Licensed Rights means the rights granted to You subject to the
108 | terms and conditions of this Public License, which are limited to
109 | all Copyright and Similar Rights that apply to Your use of the
110 | Licensed Material and that the Licensor has authority to license.
111 |
112 | h. Licensor means the individual(s) or entity(ies) granting rights
113 | under this Public License.
114 |
115 | i. Share means to provide material to the public by any means or
116 | process that requires permission under the Licensed Rights, such
117 | as reproduction, public display, public performance, distribution,
118 | dissemination, communication, or importation, and to make material
119 | available to the public including in ways that members of the
120 | public may access the material from a place and at a time
121 | individually chosen by them.
122 |
123 | j. Sui Generis Database Rights means rights other than copyright
124 | resulting from Directive 96/9/EC of the European Parliament and of
125 | the Council of 11 March 1996 on the legal protection of databases,
126 | as amended and/or succeeded, as well as other essentially
127 | equivalent rights anywhere in the world.
128 |
129 | k. You means the individual or entity exercising the Licensed Rights
130 | under this Public License. Your has a corresponding meaning.
131 |
132 |
133 | Section 2 -- Scope.
134 |
135 | a. License grant.
136 |
137 | 1. Subject to the terms and conditions of this Public License,
138 | the Licensor hereby grants You a worldwide, royalty-free,
139 | non-sublicensable, non-exclusive, irrevocable license to
140 | exercise the Licensed Rights in the Licensed Material to:
141 |
142 | a. reproduce and Share the Licensed Material, in whole or
143 | in part; and
144 |
145 | b. produce, reproduce, and Share Adapted Material.
146 |
147 | 2. Exceptions and Limitations. For the avoidance of doubt, where
148 | Exceptions and Limitations apply to Your use, this Public
149 | License does not apply, and You do not need to comply with
150 | its terms and conditions.
151 |
152 | 3. Term. The term of this Public License is specified in Section
153 | 6(a).
154 |
155 | 4. Media and formats; technical modifications allowed. The
156 | Licensor authorizes You to exercise the Licensed Rights in
157 | all media and formats whether now known or hereafter created,
158 | and to make technical modifications necessary to do so. The
159 | Licensor waives and/or agrees not to assert any right or
160 | authority to forbid You from making technical modifications
161 | necessary to exercise the Licensed Rights, including
162 | technical modifications necessary to circumvent Effective
163 | Technological Measures. For purposes of this Public License,
164 | simply making modifications authorized by this Section 2(a)
165 | (4) never produces Adapted Material.
166 |
167 | 5. Downstream recipients.
168 |
169 | a. Offer from the Licensor -- Licensed Material. Every
170 | recipient of the Licensed Material automatically
171 | receives an offer from the Licensor to exercise the
172 | Licensed Rights under the terms and conditions of this
173 | Public License.
174 |
175 | b. No downstream restrictions. You may not offer or impose
176 | any additional or different terms or conditions on, or
177 | apply any Effective Technological Measures to, the
178 | Licensed Material if doing so restricts exercise of the
179 | Licensed Rights by any recipient of the Licensed
180 | Material.
181 |
182 | 6. No endorsement. Nothing in this Public License constitutes or
183 | may be construed as permission to assert or imply that You
184 | are, or that Your use of the Licensed Material is, connected
185 | with, or sponsored, endorsed, or granted official status by,
186 | the Licensor or others designated to receive attribution as
187 | provided in Section 3(a)(1)(A)(i).
188 |
189 | b. Other rights.
190 |
191 | 1. Moral rights, such as the right of integrity, are not
192 | licensed under this Public License, nor are publicity,
193 | privacy, and/or other similar personality rights; however, to
194 | the extent possible, the Licensor waives and/or agrees not to
195 | assert any such rights held by the Licensor to the limited
196 | extent necessary to allow You to exercise the Licensed
197 | Rights, but not otherwise.
198 |
199 | 2. Patent and trademark rights are not licensed under this
200 | Public License.
201 |
202 | 3. To the extent possible, the Licensor waives any right to
203 | collect royalties from You for the exercise of the Licensed
204 | Rights, whether directly or through a collecting society
205 | under any voluntary or waivable statutory or compulsory
206 | licensing scheme. In all other cases the Licensor expressly
207 | reserves any right to collect such royalties.
208 |
209 |
210 | Section 3 -- License Conditions.
211 |
212 | Your exercise of the Licensed Rights is expressly made subject to the
213 | following conditions.
214 |
215 | a. Attribution.
216 |
217 | 1. If You Share the Licensed Material (including in modified
218 | form), You must:
219 |
220 | a. retain the following if it is supplied by the Licensor
221 | with the Licensed Material:
222 |
223 | i. identification of the creator(s) of the Licensed
224 | Material and any others designated to receive
225 | attribution, in any reasonable manner requested by
226 | the Licensor (including by pseudonym if
227 | designated);
228 |
229 | ii. a copyright notice;
230 |
231 | iii. a notice that refers to this Public License;
232 |
233 | iv. a notice that refers to the disclaimer of
234 | warranties;
235 |
236 | v. a URI or hyperlink to the Licensed Material to the
237 | extent reasonably practicable;
238 |
239 | b. indicate if You modified the Licensed Material and
240 | retain an indication of any previous modifications; and
241 |
242 | c. indicate the Licensed Material is licensed under this
243 | Public License, and include the text of, or the URI or
244 | hyperlink to, this Public License.
245 |
246 | 2. You may satisfy the conditions in Section 3(a)(1) in any
247 | reasonable manner based on the medium, means, and context in
248 | which You Share the Licensed Material. For example, it may be
249 | reasonable to satisfy the conditions by providing a URI or
250 | hyperlink to a resource that includes the required
251 | information.
252 |
253 | 3. If requested by the Licensor, You must remove any of the
254 | information required by Section 3(a)(1)(A) to the extent
255 | reasonably practicable.
256 |
257 | 4. If You Share Adapted Material You produce, the Adapter's
258 | License You apply must not prevent recipients of the Adapted
259 | Material from complying with this Public License.
260 |
261 |
262 | Section 4 -- Sui Generis Database Rights.
263 |
264 | Where the Licensed Rights include Sui Generis Database Rights that
265 | apply to Your use of the Licensed Material:
266 |
267 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right
268 | to extract, reuse, reproduce, and Share all or a substantial
269 | portion of the contents of the database;
270 |
271 | b. if You include all or a substantial portion of the database
272 | contents in a database in which You have Sui Generis Database
273 | Rights, then the database in which You have Sui Generis Database
274 | Rights (but not its individual contents) is Adapted Material; and
275 |
276 | c. You must comply with the conditions in Section 3(a) if You Share
277 | all or a substantial portion of the contents of the database.
278 |
279 | For the avoidance of doubt, this Section 4 supplements and does not
280 | replace Your obligations under this Public License where the Licensed
281 | Rights include other Copyright and Similar Rights.
282 |
283 |
284 | Section 5 -- Disclaimer of Warranties and Limitation of Liability.
285 |
286 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
287 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
288 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
289 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
290 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
291 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
292 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
293 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
294 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
295 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
296 |
297 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
298 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
299 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
300 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
301 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
302 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
303 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
304 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
305 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
306 |
307 | c. The disclaimer of warranties and limitation of liability provided
308 | above shall be interpreted in a manner that, to the extent
309 | possible, most closely approximates an absolute disclaimer and
310 | waiver of all liability.
311 |
312 |
313 | Section 6 -- Term and Termination.
314 |
315 | a. This Public License applies for the term of the Copyright and
316 | Similar Rights licensed here. However, if You fail to comply with
317 | this Public License, then Your rights under this Public License
318 | terminate automatically.
319 |
320 | b. Where Your right to use the Licensed Material has terminated under
321 | Section 6(a), it reinstates:
322 |
323 | 1. automatically as of the date the violation is cured, provided
324 | it is cured within 30 days of Your discovery of the
325 | violation; or
326 |
327 | 2. upon express reinstatement by the Licensor.
328 |
329 | For the avoidance of doubt, this Section 6(b) does not affect any
330 | right the Licensor may have to seek remedies for Your violations
331 | of this Public License.
332 |
333 | c. For the avoidance of doubt, the Licensor may also offer the
334 | Licensed Material under separate terms or conditions or stop
335 | distributing the Licensed Material at any time; however, doing so
336 | will not terminate this Public License.
337 |
338 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
339 | License.
340 |
341 |
342 | Section 7 -- Other Terms and Conditions.
343 |
344 | a. The Licensor shall not be bound by any additional or different
345 | terms or conditions communicated by You unless expressly agreed.
346 |
347 | b. Any arrangements, understandings, or agreements regarding the
348 | Licensed Material not stated herein are separate from and
349 | independent of the terms and conditions of this Public License.
350 |
351 |
352 | Section 8 -- Interpretation.
353 |
354 | a. For the avoidance of doubt, this Public License does not, and
355 | shall not be interpreted to, reduce, limit, restrict, or impose
356 | conditions on any use of the Licensed Material that could lawfully
357 | be made without permission under this Public License.
358 |
359 | b. To the extent possible, if any provision of this Public License is
360 | deemed unenforceable, it shall be automatically reformed to the
361 | minimum extent necessary to make it enforceable. If the provision
362 | cannot be reformed, it shall be severed from this Public License
363 | without affecting the enforceability of the remaining terms and
364 | conditions.
365 |
366 | c. No term or condition of this Public License will be waived and no
367 | failure to comply consented to unless expressly agreed to by the
368 | Licensor.
369 |
370 | d. Nothing in this Public License constitutes or may be interpreted
371 | as a limitation upon, or waiver of, any privileges and immunities
372 | that apply to the Licensor or You, including from the legal
373 | processes of any jurisdiction or authority.
374 |
375 |
376 | =======================================================================
377 |
378 | Creative Commons is not a party to its public licenses.
379 | Notwithstanding, Creative Commons may elect to apply one of its public
380 | licenses to material it publishes and in those instances will be
381 | considered the "Licensor." Except for the limited purpose of indicating
382 | that material is shared under a Creative Commons public license or as
383 | otherwise permitted by the Creative Commons policies published at
384 | creativecommons.org/policies, Creative Commons does not authorize the
385 | use of the trademark "Creative Commons" or any other trademark or logo
386 | of Creative Commons without its prior written consent including,
387 | without limitation, in connection with any unauthorized modifications
388 | to any of its public licenses or any other arrangements,
389 | understandings, or agreements concerning use of licensed material. For
390 | the avoidance of doubt, this paragraph does not form part of the public
391 | licenses.
392 |
393 | Creative Commons may be contacted at creativecommons.org.
394 |
--------------------------------------------------------------------------------
/super-tiny-compiler-chinese.js:
--------------------------------------------------------------------------------
1 | /**
2 | * TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHH HHHHHHHHHEEEEEEEEEEEEEEEEEEEEEE
3 | * T:::::::::::::::::::::TH:::::::H H:::::::HE::::::::::::::::::::E
4 | * T:::::::::::::::::::::TH:::::::H H:::::::HE::::::::::::::::::::E
5 | * T:::::TT:::::::TT:::::THH::::::H H::::::HHEE::::::EEEEEEEEE::::E
6 | * TTTTTT T:::::T TTTTTT H:::::H H:::::H E:::::E EEEEEE
7 | * T:::::T H:::::H H:::::H E:::::E
8 | * T:::::T H::::::HHHHH::::::H E::::::EEEEEEEEEE
9 | * T:::::T H:::::::::::::::::H E:::::::::::::::E
10 | * T:::::T H:::::::::::::::::H E:::::::::::::::E
11 | * T:::::T H::::::HHHHH::::::H E::::::EEEEEEEEEE
12 | * T:::::T H:::::H H:::::H E:::::E
13 | * T:::::T H:::::H H:::::H E:::::E EEEEEE
14 | * TT:::::::TT HH::::::H H::::::HHEE::::::EEEEEEEE:::::E
15 | * T:::::::::T H:::::::H H:::::::HE::::::::::::::::::::E
16 | * T:::::::::T H:::::::H H:::::::HE::::::::::::::::::::E
17 | * TTTTTTTTTTT HHHHHHHHH HHHHHHHHHEEEEEEEEEEEEEEEEEEEEEE
18 | *
19 | * SSSSSSSSSSSSSSS UUUUUUUU UUUUUUUUPPPPPPPPPPPPPPPPP EEEEEEEEEEEEEEEEEEEEEERRRRRRRRRRRRRRRRR
20 | * SS:::::::::::::::SU::::::U U::::::UP::::::::::::::::P E::::::::::::::::::::ER::::::::::::::::R
21 | * S:::::SSSSSS::::::SU::::::U U::::::UP::::::PPPPPP:::::P E::::::::::::::::::::ER::::::RRRRRR:::::R
22 | * S:::::S SSSSSSSUU:::::U U:::::UUPP:::::P P:::::PEE::::::EEEEEEEEE::::ERR:::::R R:::::R
23 | * S:::::S U:::::U U:::::U P::::P P:::::P E:::::E EEEEEE R::::R R:::::R
24 | * S:::::S U:::::U U:::::U P::::P P:::::P E:::::E R::::R R:::::R
25 | * S::::SSSS U:::::U U:::::U P::::PPPPPP:::::P E::::::EEEEEEEEEE R::::RRRRRR:::::R
26 | * SS::::::SSSSS U:::::U U:::::U P:::::::::::::PP E:::::::::::::::E R:::::::::::::RR
27 | * SSS::::::::SS U:::::U U:::::U P::::PPPPPPPPP E:::::::::::::::E R::::RRRRRR:::::R
28 | * SSSSSS::::S U:::::U U:::::U P::::P E::::::EEEEEEEEEE R::::R R:::::R
29 | * S:::::S U:::::U U:::::U P::::P E:::::E R::::R R:::::R
30 | * S:::::S U::::::U U::::::U P::::P E:::::E EEEEEE R::::R R:::::R
31 | * SSSSSSS S:::::S U:::::::UUU:::::::U PP::::::PP EE::::::EEEEEEEE:::::ERR:::::R R:::::R
32 | * S::::::SSSSSS:::::S UU:::::::::::::UU P::::::::P E::::::::::::::::::::ER::::::R R:::::R
33 | * S:::::::::::::::SS UU:::::::::UU P::::::::P E::::::::::::::::::::ER::::::R R:::::R
34 | * SSSSSSSSSSSSSSS UUUUUUUUU PPPPPPPPPP EEEEEEEEEEEEEEEEEEEEEERRRRRRRR RRRRRRR
35 | *
36 | * TTTTTTTTTTTTTTTTTTTTTTTIIIIIIIIIINNNNNNNN NNNNNNNNYYYYYYY YYYYYYY
37 | * T:::::::::::::::::::::TI::::::::IN:::::::N N::::::NY:::::Y Y:::::Y
38 | * T:::::::::::::::::::::TI::::::::IN::::::::N N::::::NY:::::Y Y:::::Y
39 | * T:::::TT:::::::TT:::::TII::::::IIN:::::::::N N::::::NY::::::Y Y::::::Y
40 | * TTTTTT T:::::T TTTTTT I::::I N::::::::::N N::::::NYYY:::::Y Y:::::YYY
41 | * T:::::T I::::I N:::::::::::N N::::::N Y:::::Y Y:::::Y
42 | * T:::::T I::::I N:::::::N::::N N::::::N Y:::::Y:::::Y
43 | * T:::::T I::::I N::::::N N::::N N::::::N Y:::::::::Y
44 | * T:::::T I::::I N::::::N N::::N:::::::N Y:::::::Y
45 | * T:::::T I::::I N::::::N N:::::::::::N Y:::::Y
46 | * T:::::T I::::I N::::::N N::::::::::N Y:::::Y
47 | * T:::::T I::::I N::::::N N:::::::::N Y:::::Y
48 | * TT:::::::TT II::::::IIN::::::N N::::::::N Y:::::Y
49 | * T:::::::::T I::::::::IN::::::N N:::::::N YYYY:::::YYYY
50 | * T:::::::::T I::::::::IN::::::N N::::::N Y:::::::::::Y
51 | * TTTTTTTTTTT IIIIIIIIIINNNNNNNN NNNNNNN YYYYYYYYYYYYY
52 | *
53 | * CCCCCCCCCCCCC OOOOOOOOO MMMMMMMM MMMMMMMMPPPPPPPPPPPPPPPPP IIIIIIIIIILLLLLLLLLLL EEEEEEEEEEEEEEEEEEEEEERRRRRRRRRRRRRRRRR
54 | * CCC::::::::::::C OO:::::::::OO M:::::::M M:::::::MP::::::::::::::::P I::::::::IL:::::::::L E::::::::::::::::::::ER::::::::::::::::R
55 | * CC:::::::::::::::C OO:::::::::::::OO M::::::::M M::::::::MP::::::PPPPPP:::::P I::::::::IL:::::::::L E::::::::::::::::::::ER::::::RRRRRR:::::R
56 | * C:::::CCCCCCCC::::CO:::::::OOO:::::::OM:::::::::M M:::::::::MPP:::::P P:::::PII::::::IILL:::::::LL EE::::::EEEEEEEEE::::ERR:::::R R:::::R
57 | * C:::::C CCCCCCO::::::O O::::::OM::::::::::M M::::::::::M P::::P P:::::P I::::I L:::::L E:::::E EEEEEE R::::R R:::::R
58 | * C:::::C O:::::O O:::::OM:::::::::::M M:::::::::::M P::::P P:::::P I::::I L:::::L E:::::E R::::R R:::::R
59 | * C:::::C O:::::O O:::::OM:::::::M::::M M::::M:::::::M P::::PPPPPP:::::P I::::I L:::::L E::::::EEEEEEEEEE R::::RRRRRR:::::R
60 | * C:::::C O:::::O O:::::OM::::::M M::::M M::::M M::::::M P:::::::::::::PP I::::I L:::::L E:::::::::::::::E R:::::::::::::RR
61 | * C:::::C O:::::O O:::::OM::::::M M::::M::::M M::::::M P::::PPPPPPPPP I::::I L:::::L E:::::::::::::::E R::::RRRRRR:::::R
62 | * C:::::C O:::::O O:::::OM::::::M M:::::::M M::::::M P::::P I::::I L:::::L E::::::EEEEEEEEEE R::::R R:::::R
63 | * C:::::C O:::::O O:::::OM::::::M M:::::M M::::::M P::::P I::::I L:::::L E:::::E R::::R R:::::R
64 | * C:::::C CCCCCCO::::::O O::::::OM::::::M MMMMM M::::::M P::::P I::::I L:::::L LLLLLL E:::::E EEEEEE R::::R R:::::R
65 | * C:::::CCCCCCCC::::CO:::::::OOO:::::::OM::::::M M::::::MPP::::::PP II::::::IILL:::::::LLLLLLLLL:::::LEE::::::EEEEEEEE:::::ERR:::::R R:::::R
66 | * CC:::::::::::::::C OO:::::::::::::OO M::::::M M::::::MP::::::::P I::::::::IL::::::::::::::::::::::LE::::::::::::::::::::ER::::::R R:::::R
67 | * CCC::::::::::::C OO:::::::::OO M::::::M M::::::MP::::::::P I::::::::IL::::::::::::::::::::::LE::::::::::::::::::::ER::::::R R:::::R
68 | * CCCCCCCCCCCCC OOOOOOOOO MMMMMMMM MMMMMMMMPPPPPPPPPP IIIIIIIIIILLLLLLLLLLLLLLLLLLLLLLLLEEEEEEEEEEEEEEEEEEEEEERRRRRRRR RRRRRRR
69 | *
70 | * =======================================================================================================================================================================
71 | * =======================================================================================================================================================================
72 | * =======================================================================================================================================================================
73 | * =======================================================================================================================================================================
74 | */
75 |
76 | /**
77 | * 今天让我们来写一个编译器,一个超级无敌小的编译器!它小到如果把所有注释删去的话,大概只剩
78 | * 200行左右的代码。
79 | *
80 | * 我们将会用它将 lisp 风格的函数调用转换为 C 风格。
81 | *
82 | * 如果你对这两种风格不是很熟悉,下面是一个简单的介绍。
83 | *
84 | * 假设我们有两个函数,`add` 和 `subtract`,那么它们的写法将会是下面这样:
85 | *
86 | * LISP C
87 | *
88 | * 2 + 2 (add 2 2) add(2, 2)
89 | * 4 - 2 (subtract 4 2) subtract(4, 2)
90 | * 2 + (4 - 2) (add 2 (subtract 4 2)) add(2, subtract(4, 2))
91 | *
92 | * 很简单对吧?
93 | *
94 | * 这个转换就是我们将要做的事情。虽然这并不包含 LISP 或者 C 的全部语法,但它足以向我们
95 | * 展示现代编译器很多要点。
96 | *
97 | */
98 |
99 | /**
100 | * 大多数编译器可以分成三个阶段:解析(Parsing),转换(Transformation)以及代码
101 | * 生成(Code Generation)
102 | *
103 | * 1. *解析*是将最初原始的代码转换为一种更加抽象的表示(译者注:即AST)。*
104 | *
105 | * 2. *转换*将对这个抽象的表示做一些处理,让它能做到编译器期望
106 | * 它做到的事情。
107 | *
108 | * 3. *代码生成*接收处理之后的代码表示,然后把它转换成新的代码。
109 | */
110 |
111 | /**
112 | * 解析(Parsing)
113 | * -------
114 | *
115 | * 解析一般来说会分成两个阶段:词法分析(Lexical Analysis)和语法分析(Syntactic Analysis)。
116 | *
117 | * 1. *词法分析*接收原始代码,然后把它分割成一些被称为 Token 的东西,这个过程是在词法分析
118 | * 器(Tokenizer或者Lexer)中完成的。
119 | *
120 | * Token 是一个数组,由一些代码语句的碎片组成。它们可以是数字、标签、标点符号、运算符,
121 | * 或者其它任何东西。
122 | *
123 | * 2. *语法分析* 接收之前生成的 Token,把它们转换成一种抽象的表示,这种抽象的表示描述了代
124 | * 码语句中的每一个片段以及它们之间的关系。这被称为中间表示(intermediate representation)
125 | * 或抽象语法树(Abstract Syntax Tree, 缩写为AST)
126 | *
127 | * 抽象语法树是一个嵌套程度很深的对象,用一种更容易处理的方式代表了代码本身,也能给我们
128 | * 更多信息。
129 | *
130 | * 比如说对于下面这一行代码语句:
131 | *
132 | * (add 2 (subtract 4 2))
133 | *
134 | * 它产生的 Token 看起来或许是这样的:
135 | *
136 | * [
137 | * { type: 'paren', value: '(' },
138 | * { type: 'name', value: 'add' },
139 | * { type: 'number', value: '2' },
140 | * { type: 'paren', value: '(' },
141 | * { type: 'name', value: 'subtract' },
142 | * { type: 'number', value: '4' },
143 | * { type: 'number', value: '2' },
144 | * { type: 'paren', value: ')' },
145 | * { type: 'paren', value: ')' }
146 | * ]
147 | *
148 | * 它的抽象语法树(AST)看起来或许是这样的:
149 | *
150 | * {
151 | * type: 'Program',
152 | * body: [{
153 | * type: 'CallExpression',
154 | * name: 'add',
155 | * params: [{
156 | * type: 'NumberLiteral',
157 | * value: '2'
158 | * }, {
159 | * type: 'CallExpression',
160 | * name: 'subtract',
161 | * params: [{
162 | * type: 'NumberLiteral',
163 | * value: '4'
164 | * }, {
165 | * type: 'NumberLiteral',
166 | * value: '2'
167 | * }]
168 | * }]
169 | * }]
170 | * }
171 | */
172 |
173 | /**
174 | * 转换(Transformation)
175 | * --------------
176 | *
177 | * 编译器的下一步就是转换。它只是把 AST 拿过来然后对它做一些修改。它可以在同种语言下操
178 | * 作 AST,也可以把 AST 翻译成全新的语言。
179 | *
180 | * 下面我们来看看该如何转换 AST。
181 | *
182 | * 你或许注意到了我们的 AST 中有很多相似的元素,这些元素都有 type 属性,它们被称为 AST
183 | * 结点。这些结点含有若干属性,可以用于描述 AST 的部分信息。
184 | *
185 | * 比如下面是一个“NumberLiteral”结点:
186 | *
187 | * {
188 | * type: 'NumberLiteral',
189 | * value: '2'
190 | * }
191 | *
192 | * 又比如下面是一个“CallExpression”结点:
193 | *
194 | * {
195 | * type: 'CallExpression',
196 | * name: 'subtract',
197 | * params: [...nested nodes go here...]
198 | * }
199 | *
200 | * 当转换 AST 的时候我们可以添加、移动、替代这些结点,也可以根据现有的 AST 生成一个全新
201 | * 的 AST
202 | *
203 | * 既然我们编译器的目标是把输入的代码转换为一种新的语言,所以我们将会着重于产生一个针对
204 | * 新语言的全新的 AST。
205 | *
206 | *
207 | * 遍历(Traversal)
208 | * ---------
209 | *
210 | * 为了能处理所有的结点,我们需要遍历它们,使用的是深度优先遍历。
211 | *
212 | * {
213 | * type: 'Program',
214 | * body: [{
215 | * type: 'CallExpression',
216 | * name: 'add',
217 | * params: [{
218 | * type: 'NumberLiteral',
219 | * value: '2'
220 | * }, {
221 | * type: 'CallExpression',
222 | * name: 'subtract',
223 | * params: [{
224 | * type: 'NumberLiteral',
225 | * value: '4'
226 | * }, {
227 | * type: 'NumberLiteral',
228 | * value: '2'
229 | * }]
230 | * }]
231 | * }]
232 | * }
233 | *
234 | * 对于上面的 AST 的遍历流程是这样的:
235 | *
236 | * 1. Program - 从 AST 的顶部结点开始
237 | * 2. CallExpression (add) - Program 的第一个子元素
238 | * 3. NumberLiteral (2) - CallExpression (add) 的第一个子元素
239 | * 4. CallExpression (subtract) - CallExpression (add) 的第二个子元素
240 | * 5. NumberLiteral (4) - CallExpression (subtract) 的第一个子元素
241 | * 6. NumberLiteral (2) - CallExpression (subtract) 的第二个子元素
242 | *
243 | * 如果我们直接在 AST 内部操作,而不是产生一个新的 AST,那么就要在这里介绍所有种类的抽象,
244 | * 但是目前访问(visiting)所有结点的方法已经足够了。
245 | *
246 | * 使用“访问(visiting)”这个词的是因为这是一种模式,代表在对象结构内对元素进行操作。
247 | *
248 | * 访问者(Visitors)
249 | * --------
250 | *
251 | * 我们最基础的想法是创建一个“访问者(visitor)”对象,这个对象中包含一些方法,可以接收不
252 | * 同的结点。
253 | *
254 | * var visitor = {
255 | * NumberLiteral() {},
256 | * CallExpression() {}
257 | * };
258 | *
259 | * 当我们遍历 AST 的时候,如果遇到了匹配 type 的结点,我们可以调用 visitor 中的方法。
260 | *
261 | * 一般情况下为了让这些方法可用性更好,我们会把父结点也作为参数传入。
262 | */
263 |
264 | /**
265 | * 代码生成(Code Generation)
266 | * ---------------
267 | *
268 | * 编译器的最后一个阶段是代码生成,这个阶段做的事情有时候会和转换(transformation)重叠,
269 | * 但是代码生成最主要的部分还是根据 AST 来输出代码。
270 | *
271 | * 代码生成有几种不同的工作方式,有些编译器将会重用之前生成的 token,有些会创建独立的代码
272 | * 表示,以便于线性地输出代码。但是接下来我们还是着重于使用之前生成好的 AST。
273 | *
274 | * 我们的代码生成器需要知道如何“打印”AST 中所有类型的结点,然后它会递归地调用自身,直到所
275 | * 有代码都被打印到一个很长的字符串中。
276 | *
277 | */
278 |
279 | /**
280 | * 好了!这就是编译器中所有的部分了。
281 | *
282 | * 当然不是说所有的编译器都像我说的这样。不同的编译器有不同的目的,所以也可能需要不同的步骤。
283 | *
284 | * 但你现在应该对编译器到底是个什么东西有个大概的认识了。
285 | *
286 | * 既然我全都解释一遍了,你应该能写一个属于自己的编译器了吧?
287 | *
288 | * 哈哈开个玩笑,接下来才是重点 :P
289 | *
290 | * 所以我们开始吧...
291 | */
292 |
293 | /**
294 | * ============================================================================
295 | * (/^▽^)/
296 | * 词法分析器(Tokenizer)!
297 | * ============================================================================
298 | */
299 |
300 | /**
301 | * 我们从第一个阶段开始,即词法分析,使用的是词法分析器(Tokenizer)。
302 | *
303 | * 我们只是接收代码组成的字符串,然后把它们分割成 token 组成的数组。
304 | *
305 | * (add 2 (subtract 4 2)) => [{ type: 'paren', value: '(' }, ...]
306 | */
307 |
308 | // 我们从接收一个字符串开始,首先设置两个变量。
309 | function tokenizer(input) {
310 |
311 | // `current`变量类似指针,用于记录我们在代码字符串中的位置。
312 | var current = 0;
313 |
314 | // `tokens`数组是我们放置 token 的地方
315 | var tokens = [];
316 |
317 | // 首先我们创建一个 `while` 循环, `current` 变量会在循环中自增。
318 | //
319 | // 我们这么做的原因是,由于 token 数组的长度是任意的,所以可能要在单个循环中多次
320 | // 增加 `current`
321 | while (current < input.length) {
322 |
323 | // 我们在这里储存了 `input` 中的当前字符
324 | var char = input[current];
325 |
326 | // 要做的第一件事情就是检查是不是右圆括号。这在之后将会用在 `CallExpressions` 中,
327 | // 但是现在我们关心的只是字符本身。
328 | //
329 | // 检查一下是不是一个左圆括号。
330 | if (char === '(') {
331 |
332 | // 如果是,那么我们 push 一个 type 为 `paren`,value 为左圆括号的对象。
333 | tokens.push({
334 | type: 'paren',
335 | value: '('
336 | });
337 |
338 | // 自增 `current`
339 | current++;
340 |
341 | // 结束本次循环,进入下一次循环
342 | continue;
343 | }
344 |
345 | // 然后我们检查是不是一个右圆括号。这里做的时候和之前一样:检查右圆括号、加入新的 token、
346 | // 自增 `current`,然后进入下一次循环。
347 | if (char === ')') {
348 | tokens.push({
349 | type: 'paren',
350 | value: ')'
351 | });
352 | current++;
353 | continue;
354 | }
355 |
356 | // 继续,我们现在检查是不是空格。有趣的是,我们想要空格的本意是分隔字符,但这现在
357 | // 对于我们储存 token 来说不那么重要。我们暂且搁置它。
358 | //
359 | // 所以我们只是简单地检查是不是空格,如果是,那么我们直接进入下一个循环。
360 | var WHITESPACE = /\s/;
361 | if (WHITESPACE.test(char)) {
362 | current++;
363 | continue;
364 | }
365 |
366 | // 下一个 token 的类型是数字。它和之前的 token 不同,因为数字可以由多个数字字符组成,
367 | // 但是我们只能把它们识别为一个 token。
368 | //
369 | // (add 123 456)
370 | // ^^^ ^^^
371 | // Only two separate tokens
372 | // 这里只有两个 token
373 | //
374 | // 当我们遇到一个数字字符时,将会从这里开始。
375 | var NUMBERS = /[0-9]/;
376 | if (NUMBERS.test(char)) {
377 |
378 | // 创建一个 `value` 字符串,用于 push 字符。
379 | var value = '';
380 |
381 | // 然后我们循环遍历接下来的字符,直到我们遇到的字符不再是数字字符为止,把遇到的每
382 | // 一个数字字符 push 进 `value` 中,然后自增 `current`。
383 | while (NUMBERS.test(char)) {
384 | value += char;
385 | char = input[++current];
386 | }
387 |
388 | // 然后我们把类型为 `number` 的 token 放入 `tokens` 数组中。
389 | tokens.push({
390 | type: 'number',
391 | value: value
392 | });
393 |
394 | // 进入下一次循环。
395 | continue;
396 | }
397 |
398 | // 最后一种类型的 token 是 `name`。它由一系列的字母组成,这在我们的 lisp 语法中
399 | // 代表了函数。
400 | //
401 | // (add 2 4)
402 | // ^^^
403 | // Name token
404 | //
405 | var LETTERS = /[a-z]/i;
406 | if (LETTERS.test(char)) {
407 | var value = '';
408 |
409 | // 同样,我们用一个循环遍历所有的字母,把它们存入 value 中。
410 | while (LETTERS.test(char)) {
411 | value += char;
412 | char = input[++current];
413 | }
414 |
415 | // 然后添加一个类型为 `name` 的 token,然后进入下一次循环。
416 | tokens.push({
417 | type: 'name',
418 | value: value
419 | });
420 |
421 | continue;
422 | }
423 |
424 | // 最后如果我们没有匹配上任何类型的 token,那么我们抛出一个错误。
425 | throw new TypeError('I dont know what this character is: ' + char);
426 | }
427 |
428 | // 词法分析器的最后我们返回 tokens 数组。
429 | return tokens;
430 | }
431 |
432 | /**
433 | * ============================================================================
434 | * ヽ/❀o ل͜ o\ノ
435 | * 语法分析器(Parser)!!!
436 | * ============================================================================
437 | */
438 |
439 | /**
440 | * 语法分析器接受 token 数组,然后把它转化为 AST
441 | *
442 | * [{ type: 'paren', value: '(' }, ...] => { type: 'Program', body: [...] }
443 | */
444 |
445 | // 现在我们定义 parser 函数,接受 `tokens` 数组
446 | function parser(tokens) {
447 |
448 | // 我们再次声明一个 `current` 变量作为指针。
449 | var current = 0;
450 |
451 | // 但是这次我们使用递归而不是 `while` 循环,所以我们定义一个 `walk` 函数。
452 | function walk() {
453 |
454 | // walk函数里,我们从当前token开始
455 | var token = tokens[current];
456 |
457 | // 对于不同类型的结点,对应的处理方法也不同,我们从 `number` 类型的 token 开始。
458 | // 检查是不是 `number` 类型
459 | if (token.type === 'number') {
460 | // 如果是,`current` 自增。
461 | current++;
462 | // 然后我们会返回一个新的 AST 结点 `NumberLiteral`,并且把它的值设为 token 的值。
463 | return {
464 | type: 'NumberLiteral',
465 | value: token.value
466 | };
467 | }
468 |
469 | // 接下来我们检查是不是 CallExpressions 类型,我们从左圆括号开始。
470 | if (token.type === 'paren' && token.value === '(') {
471 |
472 | // 我们会自增 `current` 来跳过这个括号,因为括号在 AST 中是不重要的。
473 | token = tokens[++current];
474 |
475 | // 我们创建一个类型为 `CallExpression` 的根节点,然后把它的 name 属性设置为当前
476 | // token 的值,因为紧跟在左圆括号后面的 token 一定是调用的函数的名字。
477 | var node = {
478 | type: 'CallExpression',
479 | name: token.value,
480 | params: []
481 | };
482 |
483 | // 我们再次自增 `current` 变量,跳过当前的 token
484 | token = tokens[++current];
485 |
486 | // 现在我们循环遍历接下来的每一个 token,直到我们遇到右圆括号,这些 token 将会
487 | // 是 `CallExpression` 的 `params`(参数)
488 | //
489 | // 这也是递归开始的地方,我们采用递归的方式来解决问题,而不是去尝试解析一个可能有无限
490 | // 层嵌套的结点。
491 | //
492 | // 为了更好地解释,我们来看看我们的 Lisp 代码。你会注意到 `add` 函数的参数有两个,
493 | // 一个是数字,另一个是一个嵌套的 `CallExpression`,这个 `CallExpression` 中
494 | // 包含了它自己的参数(两个数字)
495 | //
496 | // (add 2 (subtract 4 2))
497 | //
498 | // 你也会注意到我们的 token 数组中有多个右圆括号。
499 | //
500 | // [
501 | // { type: 'paren', value: '(' },
502 | // { type: 'name', value: 'add' },
503 | // { type: 'number', value: '2' },
504 | // { type: 'paren', value: '(' },
505 | // { type: 'name', value: 'subtract' },
506 | // { type: 'number', value: '4' },
507 | // { type: 'number', value: '2' },
508 | // { type: 'paren', value: ')' }, <<< 右圆括号
509 | // { type: 'paren', value: ')' } <<< 右圆括号
510 | // ]
511 | //
512 | // 遇到嵌套的 `CallExpressions` 时,我们将会依赖嵌套的 `walk` 函数来
513 | // 增加 `current` 变量
514 | //
515 | // 所以我们创建一个 `while` 循环,直到遇到类型为 `'paren'`,值为右圆括号的 token。
516 | while (
517 | (token.type !== 'paren') ||
518 | (token.type === 'paren' && token.value !== ')')
519 | ) {
520 | // 我们调用 `walk` 函数,它将会返回一个结点,然后我们把这个节点
521 | // 放入 `node.params` 中。
522 | node.params.push(walk());
523 | token = tokens[current];
524 | }
525 |
526 | // 我们最后一次增加 `current`,跳过右圆括号。
527 | current++;
528 |
529 | // 返回结点。
530 | return node;
531 | }
532 |
533 | // 同样,如果我们遇到了一个类型未知的结点,就抛出一个错误。
534 | throw new TypeError(token.type);
535 | }
536 |
537 | // 现在,我们创建 AST,根结点是一个类型为 `Program` 的结点。
538 | var ast = {
539 | type: 'Program',
540 | body: []
541 | };
542 |
543 | // 现在我们开始 `walk` 函数,把结点放入 `ast.body` 中。
544 | //
545 | // 之所以在一个循环中处理,是因为我们的程序可能在 `CallExpressions` 后面包含连续的两个
546 | // 参数,而不是嵌套的。
547 | //
548 | // (add 2 2)
549 | // (subtract 4 2)
550 | //
551 | while (current < tokens.length) {
552 | ast.body.push(walk());
553 | }
554 |
555 | // 最后我们的语法分析器返回 AST
556 | return ast;
557 | }
558 |
559 | /**
560 | * ============================================================================
561 | * ⌒(❀>◞౪◟<❀)⌒
562 | * 遍历器!!!
563 | * ============================================================================
564 | */
565 |
566 | /**
567 | * 现在我们有了 AST,我们需要一个 visitor 去遍历所有的结点。当遇到某个类型的结点时,我们
568 | * 需要调用 visitor 中对应类型的处理函数。
569 | *
570 | * traverse(ast, {
571 | * Program(node, parent) {
572 | * // ...
573 | * },
574 | *
575 | * CallExpression(node, parent) {
576 | * // ...
577 | * },
578 | *
579 | * NumberLiteral(node, parent) {
580 | * // ...
581 | * }
582 | * });
583 | */
584 |
585 | // 所以我们定义一个遍历器,它有两个参数,AST 和 vistor。在它的里面我们又定义了两个函数...
586 | function traverser(ast, visitor) {
587 |
588 | // `traverseArray` 函数允许我们对数组中的每一个元素调用 `traverseNode` 函数。
589 | function traverseArray(array, parent) {
590 | array.forEach(function (child) {
591 | traverseNode(child, parent);
592 | });
593 | }
594 |
595 | // `traverseNode` 函数接受一个 `node` 和它的父结点 `parent` 作为参数,这个结点会被
596 | // 传入到 visitor 中相应的处理函数那里。
597 | function traverseNode(node, parent) {
598 |
599 | // 首先我们看看 visitor 中有没有对应 `type` 的处理函数。
600 | var method = visitor[node.type];
601 |
602 | // 如果有,那么我们把 `node` 和 `parent` 都传入其中。
603 | if (method) {
604 | method(node, parent);
605 | }
606 |
607 | // 下面我们对每一个不同类型的结点分开处理。
608 | switch (node.type) {
609 |
610 | // 我们从顶层的 `Program` 开始,Program 结点中有一个 body 属性,它是一个由若干
611 | // 个结点组成的数组,所以我们对这个数组调用 `traverseArray`。
612 | //
613 | // (记住 `traverseArray` 会调用 `traverseNode`,所以我们会递归地遍历这棵树。)
614 | case 'Program':
615 | traverseArray(node.body, node);
616 | break;
617 |
618 | // 下面我们对 `CallExpressions` 做同样的事情,遍历它的 `params`。
619 | case 'CallExpression':
620 | traverseArray(node.params, node);
621 | break;
622 |
623 | // 如果是 `NumberLiterals`,那么就没有任何子结点了,所以我们直接 break
624 | case 'NumberLiteral':
625 | break;
626 |
627 | // 同样,如果我们不能识别当前的结点,那么就抛出一个错误。
628 | default:
629 | throw new TypeError(node.type);
630 | }
631 | }
632 |
633 | // 最后我们对 AST 调用 `traverseNode`,开始遍历。注意 AST 并没有父结点。
634 | traverseNode(ast, null);
635 | }
636 |
637 | /**
638 | * ============================================================================
639 | * ⁽(◍˃̵͈̑ᴗ˂̵͈̑)⁽
640 | * 转换器!!!
641 | * ============================================================================
642 | */
643 |
644 | /**
645 | * 下面是转换器。转换器接收我们在之前构建好的 AST,然后把它和 visitor 传递进入我们的遍历
646 | * 器中 ,最后得到一个新的 AST。
647 | *
648 | * ----------------------------------------------------------------------------
649 | * 原始的 AST | 转换后的 AST
650 | * ----------------------------------------------------------------------------
651 | * { | {
652 | * type: 'Program', | type: 'Program',
653 | * body: [{ | body: [{
654 | * type: 'CallExpression', | type: 'ExpressionStatement',
655 | * name: 'add', | expression: {
656 | * params: [{ | type: 'CallExpression',
657 | * type: 'NumberLiteral', | callee: {
658 | * value: '2' | type: 'Identifier',
659 | * }, { | name: 'add'
660 | * type: 'CallExpression', | },
661 | * name: 'subtract', | arguments: [{
662 | * params: [{ | type: 'NumberLiteral',
663 | * type: 'NumberLiteral', | value: '2'
664 | * value: '4' | }, {
665 | * }, { | type: 'CallExpression',
666 | * type: 'NumberLiteral', | callee: {
667 | * value: '2' | type: 'Identifier',
668 | * }] | name: 'subtract'
669 | * }] | },
670 | * }] | arguments: [{
671 | * } | type: 'NumberLiteral',
672 | * | value: '4'
673 | * ---------------------------------- | }, {
674 | * | type: 'NumberLiteral',
675 | * | value: '2'
676 | * | }]
677 | * (那一边比较长/w\) | }]
678 | * | }
679 | * | }]
680 | * | }
681 | * ----------------------------------------------------------------------------
682 | */
683 |
684 | // 定义我们的转换器函数,接收 AST 作为参数
685 | function transformer(ast) {
686 |
687 | // 创建 `newAST`,它与我们之前的 AST 类似,有一个类型为 Program 的根节点。
688 | var newAst = {
689 | type: 'Program',
690 | body: []
691 | };
692 |
693 | // 下面的代码会有些奇技淫巧,我们在父结点上使用一个属性 `context`(上下文),这样我们就
694 | // 可以把结点放入他们父结点的 context 中。当然可能会有更好的做法,但是为了简单我们姑且
695 | // 这么做吧。
696 | //
697 | // 注意 context 是一个*引用*,从旧的 AST 到新的 AST。
698 | ast._context = newAst.body;
699 |
700 | // 我们把 AST 和 visitor 函数传入遍历器
701 | traverser(ast, {
702 |
703 | // 第一个 visitor 方法接收 `NumberLiterals`。
704 | NumberLiteral: function (node, parent) {
705 | // 我们创建一个新结点,名字叫 `NumberLiteral`,并把它放入父结点的 context 中。
706 | parent._context.push({
707 | type: 'NumberLiteral',
708 | value: node.value
709 | });
710 | },
711 |
712 | // 下一个,`CallExpressions`。
713 | CallExpression: function (node, parent) {
714 |
715 | // 我们创建一个 `CallExpression` 结点,里面有一个嵌套的 `Identifier`。
716 | var expression = {
717 | type: 'CallExpression',
718 | callee: {
719 | type: 'Identifier',
720 | name: node.name
721 | },
722 | arguments: []
723 | };
724 |
725 | // 下面我们在原来的 `CallExpression` 结点上定义一个新的 context,它是 expression
726 | // 中 arguments 这个数组的引用,我们可以向其中放入参数。
727 | node._context = expression.arguments;
728 |
729 | // 然后来看看父结点是不是一个 `CallExpression`,如果不是...
730 | if (parent.type !== 'CallExpression') {
731 |
732 | // 我们把 `CallExpression` 结点包在一个 `ExpressionStatement` 中,这么做是因为
733 | // 单独存在(原文为top level)的 `CallExpressions` 在 JavaScript 中也可以被当做
734 | // 是声明语句。
735 | //
736 | // 译者注:比如 `var a = foo()` 与 `foo()`,后者既可以当作表达式给某个变量赋值,也
737 | // 可以作为一个独立的语句存在。
738 | expression = {
739 | type: 'ExpressionStatement',
740 | expression: expression
741 | };
742 | }
743 |
744 | // 最后我们把 `CallExpression`(可能是被包起来的) 放入父结点的 context 中。
745 | parent._context.push(expression);
746 | }
747 | });
748 |
749 | // 最后返回创建好的新 AST。
750 | return newAst;
751 | }
752 |
753 | /**
754 | * ============================================================================
755 | * ヾ(〃^∇^)ノ♪
756 | * 代码生成器!!!!
757 | * ============================================================================
758 | */
759 |
760 | /**
761 | * 现在只剩最后一步啦:代码生成器。
762 | *
763 | * 我们的代码生成器会递归地调用它自己,把 AST 中的每个结点打印到一个很大的字符串中。
764 | */
765 |
766 | function codeGenerator(node) {
767 |
768 | // 对于不同 `type` 的结点分开处理。
769 | switch (node.type) {
770 |
771 | // 如果是 `Program` 结点,那么我们会遍历它的 `body` 属性中的每一个结点,并且递归地
772 | // 对这些结点再次调用 codeGenerator,再把结果打印进入新的一行中。
773 | case 'Program':
774 | return node.body.map(codeGenerator)
775 | .join('\n');
776 |
777 | // 对于 `ExpressionStatements`,我们对它的 expression 属性递归调用,同时加入一个
778 | // 分号。
779 | case 'ExpressionStatement':
780 | return (
781 | codeGenerator(node.expression) +
782 | ';' // << (...因为我们喜欢用*正确*的方式写代码)
783 | );
784 |
785 | // 对于 `CallExpressions`,我们会打印出 `callee`,接着是一个左圆括号,然后对
786 | // arguments 递归调用 codeGenerator,并且在它们之间加一个逗号,最后加上右圆括号。
787 | case 'CallExpression':
788 | return (
789 | codeGenerator(node.callee) +
790 | '(' +
791 | node.arguments.map(codeGenerator)
792 | .join(', ') +
793 | ')'
794 | );
795 |
796 | // 对于 `Identifiers` 我们只是返回 `node` 的 name。
797 | case 'Identifier':
798 | return node.name;
799 |
800 | // 对于 `NumberLiterals` 我们只是返回 `node` 的 value
801 | case 'NumberLiteral':
802 | return node.value;
803 |
804 | // 如果我们不能识别这个结点,那么抛出一个错误。
805 | default:
806 | throw new TypeError(node.type);
807 | }
808 | }
809 |
810 | /**
811 | * ============================================================================
812 | * (۶* ‘ヮ’)۶”
813 | * !!!!!!!!!!!!编译器!!!!!!!!!!!
814 | * ============================================================================
815 | */
816 |
817 | /**
818 | * 最后!我们创建 `compiler` 函数,它只是把上面说到的那些函数连接到一起。
819 | *
820 | * 1. input => tokenizer => tokens
821 | * 2. tokens => parser => ast
822 | * 3. ast => transformer => newAst
823 | * 4. newAst => generator => output
824 | */
825 |
826 | function compiler(input) {
827 | var tokens = tokenizer(input);
828 | var ast = parser(tokens);
829 | var newAst = transformer(ast);
830 | var output = codeGenerator(newAst);
831 |
832 | // 然后返回输出!
833 | return output;
834 | }
835 |
836 | /**
837 | * ============================================================================
838 | * (๑˃̵ᴗ˂̵)و
839 | * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!你做到了!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
840 | * ============================================================================
841 | */
842 |
843 | // 现在导出所有接口...
844 | module.exports = {
845 | tokenizer: tokenizer,
846 | parser: parser,
847 | transformer: transformer,
848 | codeGenerator: codeGenerator,
849 | compiler: compiler
850 | };
851 |
--------------------------------------------------------------------------------
/super-tiny-compiler.js:
--------------------------------------------------------------------------------
1 | /**
2 | * TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHH HHHHHHHHHEEEEEEEEEEEEEEEEEEEEEE
3 | * T:::::::::::::::::::::TH:::::::H H:::::::HE::::::::::::::::::::E
4 | * T:::::::::::::::::::::TH:::::::H H:::::::HE::::::::::::::::::::E
5 | * T:::::TT:::::::TT:::::THH::::::H H::::::HHEE::::::EEEEEEEEE::::E
6 | * TTTTTT T:::::T TTTTTT H:::::H H:::::H E:::::E EEEEEE
7 | * T:::::T H:::::H H:::::H E:::::E
8 | * T:::::T H::::::HHHHH::::::H E::::::EEEEEEEEEE
9 | * T:::::T H:::::::::::::::::H E:::::::::::::::E
10 | * T:::::T H:::::::::::::::::H E:::::::::::::::E
11 | * T:::::T H::::::HHHHH::::::H E::::::EEEEEEEEEE
12 | * T:::::T H:::::H H:::::H E:::::E
13 | * T:::::T H:::::H H:::::H E:::::E EEEEEE
14 | * TT:::::::TT HH::::::H H::::::HHEE::::::EEEEEEEE:::::E
15 | * T:::::::::T H:::::::H H:::::::HE::::::::::::::::::::E
16 | * T:::::::::T H:::::::H H:::::::HE::::::::::::::::::::E
17 | * TTTTTTTTTTT HHHHHHHHH HHHHHHHHHEEEEEEEEEEEEEEEEEEEEEE
18 | *
19 | * SSSSSSSSSSSSSSS UUUUUUUU UUUUUUUUPPPPPPPPPPPPPPPPP EEEEEEEEEEEEEEEEEEEEEERRRRRRRRRRRRRRRRR
20 | * SS:::::::::::::::SU::::::U U::::::UP::::::::::::::::P E::::::::::::::::::::ER::::::::::::::::R
21 | * S:::::SSSSSS::::::SU::::::U U::::::UP::::::PPPPPP:::::P E::::::::::::::::::::ER::::::RRRRRR:::::R
22 | * S:::::S SSSSSSSUU:::::U U:::::UUPP:::::P P:::::PEE::::::EEEEEEEEE::::ERR:::::R R:::::R
23 | * S:::::S U:::::U U:::::U P::::P P:::::P E:::::E EEEEEE R::::R R:::::R
24 | * S:::::S U:::::U U:::::U P::::P P:::::P E:::::E R::::R R:::::R
25 | * S::::SSSS U:::::U U:::::U P::::PPPPPP:::::P E::::::EEEEEEEEEE R::::RRRRRR:::::R
26 | * SS::::::SSSSS U:::::U U:::::U P:::::::::::::PP E:::::::::::::::E R:::::::::::::RR
27 | * SSS::::::::SS U:::::U U:::::U P::::PPPPPPPPP E:::::::::::::::E R::::RRRRRR:::::R
28 | * SSSSSS::::S U:::::U U:::::U P::::P E::::::EEEEEEEEEE R::::R R:::::R
29 | * S:::::S U:::::U U:::::U P::::P E:::::E R::::R R:::::R
30 | * S:::::S U::::::U U::::::U P::::P E:::::E EEEEEE R::::R R:::::R
31 | * SSSSSSS S:::::S U:::::::UUU:::::::U PP::::::PP EE::::::EEEEEEEE:::::ERR:::::R R:::::R
32 | * S::::::SSSSSS:::::S UU:::::::::::::UU P::::::::P E::::::::::::::::::::ER::::::R R:::::R
33 | * S:::::::::::::::SS UU:::::::::UU P::::::::P E::::::::::::::::::::ER::::::R R:::::R
34 | * SSSSSSSSSSSSSSS UUUUUUUUU PPPPPPPPPP EEEEEEEEEEEEEEEEEEEEEERRRRRRRR RRRRRRR
35 | *
36 | * TTTTTTTTTTTTTTTTTTTTTTTIIIIIIIIIINNNNNNNN NNNNNNNNYYYYYYY YYYYYYY
37 | * T:::::::::::::::::::::TI::::::::IN:::::::N N::::::NY:::::Y Y:::::Y
38 | * T:::::::::::::::::::::TI::::::::IN::::::::N N::::::NY:::::Y Y:::::Y
39 | * T:::::TT:::::::TT:::::TII::::::IIN:::::::::N N::::::NY::::::Y Y::::::Y
40 | * TTTTTT T:::::T TTTTTT I::::I N::::::::::N N::::::NYYY:::::Y Y:::::YYY
41 | * T:::::T I::::I N:::::::::::N N::::::N Y:::::Y Y:::::Y
42 | * T:::::T I::::I N:::::::N::::N N::::::N Y:::::Y:::::Y
43 | * T:::::T I::::I N::::::N N::::N N::::::N Y:::::::::Y
44 | * T:::::T I::::I N::::::N N::::N:::::::N Y:::::::Y
45 | * T:::::T I::::I N::::::N N:::::::::::N Y:::::Y
46 | * T:::::T I::::I N::::::N N::::::::::N Y:::::Y
47 | * T:::::T I::::I N::::::N N:::::::::N Y:::::Y
48 | * TT:::::::TT II::::::IIN::::::N N::::::::N Y:::::Y
49 | * T:::::::::T I::::::::IN::::::N N:::::::N YYYY:::::YYYY
50 | * T:::::::::T I::::::::IN::::::N N::::::N Y:::::::::::Y
51 | * TTTTTTTTTTT IIIIIIIIIINNNNNNNN NNNNNNN YYYYYYYYYYYYY
52 | *
53 | * CCCCCCCCCCCCC OOOOOOOOO MMMMMMMM MMMMMMMMPPPPPPPPPPPPPPPPP IIIIIIIIIILLLLLLLLLLL EEEEEEEEEEEEEEEEEEEEEERRRRRRRRRRRRRRRRR
54 | * CCC::::::::::::C OO:::::::::OO M:::::::M M:::::::MP::::::::::::::::P I::::::::IL:::::::::L E::::::::::::::::::::ER::::::::::::::::R
55 | * CC:::::::::::::::C OO:::::::::::::OO M::::::::M M::::::::MP::::::PPPPPP:::::P I::::::::IL:::::::::L E::::::::::::::::::::ER::::::RRRRRR:::::R
56 | * C:::::CCCCCCCC::::CO:::::::OOO:::::::OM:::::::::M M:::::::::MPP:::::P P:::::PII::::::IILL:::::::LL EE::::::EEEEEEEEE::::ERR:::::R R:::::R
57 | * C:::::C CCCCCCO::::::O O::::::OM::::::::::M M::::::::::M P::::P P:::::P I::::I L:::::L E:::::E EEEEEE R::::R R:::::R
58 | * C:::::C O:::::O O:::::OM:::::::::::M M:::::::::::M P::::P P:::::P I::::I L:::::L E:::::E R::::R R:::::R
59 | * C:::::C O:::::O O:::::OM:::::::M::::M M::::M:::::::M P::::PPPPPP:::::P I::::I L:::::L E::::::EEEEEEEEEE R::::RRRRRR:::::R
60 | * C:::::C O:::::O O:::::OM::::::M M::::M M::::M M::::::M P:::::::::::::PP I::::I L:::::L E:::::::::::::::E R:::::::::::::RR
61 | * C:::::C O:::::O O:::::OM::::::M M::::M::::M M::::::M P::::PPPPPPPPP I::::I L:::::L E:::::::::::::::E R::::RRRRRR:::::R
62 | * C:::::C O:::::O O:::::OM::::::M M:::::::M M::::::M P::::P I::::I L:::::L E::::::EEEEEEEEEE R::::R R:::::R
63 | * C:::::C O:::::O O:::::OM::::::M M:::::M M::::::M P::::P I::::I L:::::L E:::::E R::::R R:::::R
64 | * C:::::C CCCCCCO::::::O O::::::OM::::::M MMMMM M::::::M P::::P I::::I L:::::L LLLLLL E:::::E EEEEEE R::::R R:::::R
65 | * C:::::CCCCCCCC::::CO:::::::OOO:::::::OM::::::M M::::::MPP::::::PP II::::::IILL:::::::LLLLLLLLL:::::LEE::::::EEEEEEEE:::::ERR:::::R R:::::R
66 | * CC:::::::::::::::C OO:::::::::::::OO M::::::M M::::::MP::::::::P I::::::::IL::::::::::::::::::::::LE::::::::::::::::::::ER::::::R R:::::R
67 | * CCC::::::::::::C OO:::::::::OO M::::::M M::::::MP::::::::P I::::::::IL::::::::::::::::::::::LE::::::::::::::::::::ER::::::R R:::::R
68 | * CCCCCCCCCCCCC OOOOOOOOO MMMMMMMM MMMMMMMMPPPPPPPPPP IIIIIIIIIILLLLLLLLLLLLLLLLLLLLLLLLEEEEEEEEEEEEEEEEEEEEEERRRRRRRR RRRRRRR
69 | *
70 | * =======================================================================================================================================================================
71 | * =======================================================================================================================================================================
72 | * =======================================================================================================================================================================
73 | * =======================================================================================================================================================================
74 | */
75 |
76 | /**
77 | * Today we're going to write a compiler together. But not just any compiler... A
78 | * super duper teeny tiny compiler! A compiler that is so small that if you
79 | * remove all the comments this file would only be ~200 lines of actual code.
80 | *
81 | * We're going to compile some lisp-like function calls into some C-like
82 | * function calls.
83 | *
84 | * If you are not familiar with one or the other. I'll just give you a quick intro.
85 | *
86 | * If we had two functions `add` and `subtract` they would be written like this:
87 | *
88 | * LISP C
89 | *
90 | * 2 + 2 (add 2 2) add(2, 2)
91 | * 4 - 2 (subtract 4 2) subtract(4, 2)
92 | * 2 + (4 - 2) (add 2 (subtract 4 2)) add(2, subtract(4, 2))
93 | *
94 | * Easy peezy right?
95 | *
96 | * Well good, because this is exactly what we are going to compile. While this
97 | * is neither a complete LISP or C syntax, it will be enough of the syntax to
98 | * demonstrate many of the major pieces of a modern compiler.
99 | */
100 |
101 | /**
102 | * Most compilers break down into three primary stages: Parsing, Transformation,
103 | * and Code Generation
104 | *
105 | * 1. *Parsing* is taking raw code and turning it into a more abstract
106 | * representation of the code.
107 | *
108 | * 2. *Transformation* takes this abstract representation and manipulates to do
109 | * whatever the compiler wants it to.
110 | *
111 | * 3. *Code Generation* takes the transformed representation of the code and
112 | * turns it into new code.
113 | */
114 |
115 | /**
116 | * Parsing
117 | * -------
118 | *
119 | * Parsing typically gets broken down into two phases: Lexical Analysis and
120 | * Syntactic Analysis.
121 | *
122 | * 1. *Lexical Analysis* takes the raw code and splits it apart into these things
123 | * called tokens by a thing called a tokenizer (or lexer).
124 | *
125 | * Tokens are an array of tiny little objects that describe an isolated piece
126 | * of the syntax. They could be numbers, labels, punctuation, operators,
127 | * whatever.
128 | *
129 | * 2. *Syntactic Analysis* takes the tokens and reformats them into a
130 | * representation that describes each part of the syntax and their relation
131 | * to one another. This is known as an intermediate representation or
132 | * Abstract Syntax Tree.
133 | *
134 | * An Abstract Syntax Tree, or AST for short, is a deeply nested object that
135 | * represents code in a way that is both easy to work with and tells us a lot
136 | * of information.
137 | *
138 | * For the following syntax:
139 | *
140 | * (add 2 (subtract 4 2))
141 | *
142 | * Tokens might look something like this:
143 | *
144 | * [
145 | * { type: 'paren', value: '(' },
146 | * { type: 'name', value: 'add' },
147 | * { type: 'number', value: '2' },
148 | * { type: 'paren', value: '(' },
149 | * { type: 'name', value: 'subtract' },
150 | * { type: 'number', value: '4' },
151 | * { type: 'number', value: '2' },
152 | * { type: 'paren', value: ')' },
153 | * { type: 'paren', value: ')' }
154 | * ]
155 | *
156 | * And an Abstract Syntax Tree (AST) might look like this:
157 | *
158 | * {
159 | * type: 'Program',
160 | * body: [{
161 | * type: 'CallExpression',
162 | * name: 'add',
163 | * params: [{
164 | * type: 'NumberLiteral',
165 | * value: '2'
166 | * }, {
167 | * type: 'CallExpression',
168 | * name: 'subtract',
169 | * params: [{
170 | * type: 'NumberLiteral',
171 | * value: '4'
172 | * }, {
173 | * type: 'NumberLiteral',
174 | * value: '2'
175 | * }]
176 | * }]
177 | * }]
178 | * }
179 | */
180 |
181 | /**
182 | * Transformation
183 | * --------------
184 | *
185 | * The next type of stage for a compiler is transformation. Again, this just
186 | * takes the AST from the last step and makes changes to it. It can manipulate
187 | * the AST in the same language or it can translate it into an entirely new
188 | * language.
189 | *
190 | * Let’s look at how we would transform an AST.
191 | *
192 | * You might notice that our AST has elements within it that look very similar.
193 | * There are these objects with a type property. Each of these are known as an
194 | * AST Node. These nodes have defined properties on them that describe one
195 | * isolated part of the tree.
196 | *
197 | * We can have a node for a "NumberLiteral":
198 | *
199 | * {
200 | * type: 'NumberLiteral',
201 | * value: '2'
202 | * }
203 | *
204 | * Or maybe a node for a "CallExpression":
205 | *
206 | * {
207 | * type: 'CallExpression',
208 | * name: 'subtract',
209 | * params: [...nested nodes go here...]
210 | * }
211 | *
212 | * When transforming the AST we can manipulate nodes by
213 | * adding/removing/replacing properties, we can add new nodes, remove nodes, or
214 | * we could leave the existing AST alone and create an entirely new one based
215 | * on it.
216 | *
217 | * Since we’re targeting a new language, we’re going to focus on creating an
218 | * entirely new AST that is specific to the target language.
219 | *
220 | * Traversal
221 | * ---------
222 | *
223 | * In order to navigate through all of these nodes, we need to be able to
224 | * traverse through them. This traversal process goes to each node in the AST
225 | * depth-first.
226 | *
227 | * {
228 | * type: 'Program',
229 | * body: [{
230 | * type: 'CallExpression',
231 | * name: 'add',
232 | * params: [{
233 | * type: 'NumberLiteral',
234 | * value: '2'
235 | * }, {
236 | * type: 'CallExpression',
237 | * name: 'subtract',
238 | * params: [{
239 | * type: 'NumberLiteral',
240 | * value: '4'
241 | * }, {
242 | * type: 'NumberLiteral',
243 | * value: '2'
244 | * }]
245 | * }]
246 | * }]
247 | * }
248 | *
249 | * So for the above AST we would go:
250 | *
251 | * 1. Program - Starting at the top level of the AST
252 | * 2. CallExpression (add) - Moving to the first element of the Program's body
253 | * 3. NumberLiteral (2) - Moving to the first element of CallExpression's params
254 | * 4. CallExpression (subtract) - Moving to the second element of CallExpression's params
255 | * 5. NumberLiteral (4) - Moving to the first element of CallExpression's params
256 | * 6. NumberLiteral (2) - Moving to the second element of CallExpression's params
257 | *
258 | * If we were manipulating this AST directly, instead of creating a separate AST,
259 | * we would likely introduce all sorts of abstractions here. But just visiting
260 | * each node in the tree is enough.
261 | *
262 | * The reason I use the word “visiting” is because there is this pattern of how
263 | * to represent operations on elements of an object structure.
264 | *
265 | * Visitors
266 | * --------
267 | *
268 | * The basic idea here is that we are going to create a “visitor” object that
269 | * has methods that will accept different node types.
270 | *
271 | * var visitor = {
272 | * NumberLiteral() {},
273 | * CallExpression() {}
274 | * };
275 | *
276 | * When we traverse our AST we will call the methods on this visitor whenever we
277 | * encounter a node of a matching type.
278 | *
279 | * In order to make this useful we will also pass the node and a reference to
280 | * the parent node.
281 | *
282 | * var visitor = {
283 | * NumberLiteral(node, parent) {},
284 | * CallExpression(node, parent) {}
285 | * };
286 | */
287 |
288 | /**
289 | * Code Generation
290 | * ---------------
291 | *
292 | * The final phase of a compiler is code generation. Sometimes compilers will do
293 | * things that overlap with transformation, but for the most part code
294 | * generation just means take our AST and string-ify code back out.
295 | *
296 | * Code generators work several different ways, some compilers will reuse the
297 | * tokens from earlier, others will have created a separate representation of
298 | * the code so that they can print node linearly, but from what I can tell most
299 | * will use the same AST we just created, which is what we’re going to focus on.
300 | *
301 | * Effectively our code generator will know how to “print” all of the different
302 | * node types of the AST, and it will recursively call itself to print nested
303 | * nodes until everything is printed into one long string of code.
304 | */
305 |
306 | /**
307 | * And that's it! That's all the different pieces of a compiler.
308 | *
309 | * Now that isn’t to say every compiler looks exactly like I described here.
310 | * Compilers serve many different purposes, and they might need more steps than
311 | * I have detailed.
312 | *
313 | * But now you should have a general high-level idea of what most compilers look
314 | * like.
315 | *
316 | * Now that I’ve explained all of this, you’re all good to go write your own
317 | * compilers right?
318 | *
319 | * Just kidding, that's what I'm here to help with :P
320 | *
321 | * So let's begin...
322 | */
323 |
324 | /**
325 | * ============================================================================
326 | * (/^▽^)/
327 | * THE TOKENIZER!
328 | * ============================================================================
329 | */
330 |
331 | /**
332 | * We're gonna start off with our first phase of parsing, lexical analysis, with
333 | * the tokenizer.
334 | *
335 | * We're just going to take our string of code and break it down into an array
336 | * of tokens.
337 | *
338 | * (add 2 (subtract 4 2)) => [{ type: 'paren', value: '(' }, ...]
339 | */
340 |
341 | // We start by accepting an input string of code, and we're gonna set up two
342 | // things...
343 | function tokenizer(input) {
344 |
345 | // A `current` variable for tracking our position in the code like a cursor.
346 | var current = 0;
347 |
348 | // And a `tokens` array for pushing our tokens to.
349 | var tokens = [];
350 |
351 | // We start by creating a `while` loop where we are setting up our `current`
352 | // variable to be incremented as much as we want `inside` the loop.
353 | //
354 | // We do this because we may want to increment `current` many times within a
355 | // single loop because our tokens can be any length.
356 | while (current < input.length) {
357 |
358 | // We're also going to store the `current` character in the `input`.
359 | var char = input[current];
360 |
361 | // The first thing we want to check for is an open parenthesis. This will
362 | // later be used for `CallExpressions` but for now we only care about the
363 | // character.
364 | //
365 | // We check to see if we have an open parenthesis:
366 | if (char === '(') {
367 |
368 | // If we do, we push a new token with the type `paren` and set the value
369 | // to an open parenthesis.
370 | tokens.push({
371 | type: 'paren',
372 | value: '('
373 | });
374 |
375 | // Then we increment `current`
376 | current++;
377 |
378 | // And we `continue` onto the next cycle of the loop.
379 | continue;
380 | }
381 |
382 | // Next we're going to check for a closing parenthesis. We do the same exact
383 | // thing as before: Check for a closing parenthesis, add a new token,
384 | // increment `current`, and `continue`.
385 | if (char === ')') {
386 | tokens.push({
387 | type: 'paren',
388 | value: ')'
389 | });
390 | current++;
391 | continue;
392 | }
393 |
394 | // Moving on, we're now going to check for whitespace. This is interesting
395 | // because we care that whitespace exists to separate characters, but it
396 | // isn't actually important for us to store as a token. We would only throw
397 | // it out later.
398 | //
399 | // So here we're just going to test for existence and if it does exist we're
400 | // going to just `continue` on.
401 | var WHITESPACE = /\s/;
402 | if (WHITESPACE.test(char)) {
403 | current++;
404 | continue;
405 | }
406 |
407 | // The next type of token is a number. This is different than what we have
408 | // seen before because a number could be any number of characters and we
409 | // want to capture the entire sequence of characters as one token.
410 | //
411 | // (add 123 456)
412 | // ^^^ ^^^
413 | // Only two separate tokens
414 | //
415 | // So we start this off when we encounter the first number in a sequence.
416 | var NUMBERS = /[0-9]/;
417 | if (NUMBERS.test(char)) {
418 |
419 | // We're going to create a `value` string that we are going to push
420 | // characters to.
421 | var value = '';
422 |
423 | // Then we're going to loop through each character in the sequence until
424 | // we encounter a character that is not a number, pushing each character
425 | // that is a number to our `value` and incrementing `current` as we go.
426 | while (NUMBERS.test(char)) {
427 | value += char;
428 | char = input[++current];
429 | }
430 |
431 | // After that we push our `number` token to the `tokens` array.
432 | tokens.push({
433 | type: 'number',
434 | value: value
435 | });
436 |
437 | // And we continue on.
438 | continue;
439 | }
440 |
441 | // The last type of token will be a `name` token. This is a sequence of
442 | // letters instead of numbers, that are the names of functions in our lisp
443 | // syntax.
444 | //
445 | // (add 2 4)
446 | // ^^^
447 | // Name token
448 | //
449 | var LETTERS = /[a-z]/i;
450 | if (LETTERS.test(char)) {
451 | var value = '';
452 |
453 | // Again we're just going to loop through all the letters pushing them to
454 | // a value.
455 | while (LETTERS.test(char)) {
456 | value += char;
457 | char = input[++current];
458 | }
459 |
460 | // And pushing that value as a token with the type `name` and continuing.
461 | tokens.push({
462 | type: 'name',
463 | value: value
464 | });
465 |
466 | continue;
467 | }
468 |
469 | // Finally if we have not matched a character by now, we're going to throw
470 | // an error and completely exit.
471 | throw new TypeError('I dont know what this character is: ' + char);
472 | }
473 |
474 | // Then at the end of our `tokenizer` we simply return the tokens array.
475 | return tokens;
476 | }
477 |
478 | /**
479 | * ============================================================================
480 | * ヽ/❀o ل͜ o\ノ
481 | * THE PARSER!!!
482 | * ============================================================================
483 | */
484 |
485 | /**
486 | * For our parser we're going to take our array of tokens and turn it into an
487 | * AST.
488 | *
489 | * [{ type: 'paren', value: '(' }, ...] => { type: 'Program', body: [...] }
490 | */
491 |
492 | // Okay, so we define a `parser` function that accepts our array of `tokens`.
493 | function parser(tokens) {
494 |
495 | // Again we keep a `current` variable that we will use as a cursor.
496 | var current = 0;
497 |
498 | // But this time we're going to use recursion instead of a `while` loop. So we
499 | // define a `walk` function.
500 | function walk() {
501 |
502 | // Inside the walk function we start by grabbing the `current` token.
503 | var token = tokens[current];
504 |
505 | // We're going to split each type of token off into a different code path,
506 | // starting off with `number` tokens.
507 | //
508 | // We test to see if we have a `number` token.
509 | if (token.type === 'number') {
510 |
511 | // If we have one, we'll increment `current`.
512 | current++;
513 |
514 | // And we'll return a new AST node called `NumberLiteral` and setting its
515 | // value to the value of our token.
516 | return {
517 | type: 'NumberLiteral',
518 | value: token.value
519 | };
520 | }
521 |
522 | // Next we're going to look for CallExpressions. We start this off when we
523 | // encounter an open parenthesis.
524 | if (
525 | token.type === 'paren' &&
526 | token.value === '('
527 | ) {
528 |
529 | // We'll increment `current` to skip the parenthesis since we don't care
530 | // about it in our AST.
531 | token = tokens[++current];
532 |
533 | // We create a base node with the type `CallExpression`, and we're going
534 | // to set the name as the current token's value since the next token after
535 | // the open parenthesis is the name of the function.
536 | var node = {
537 | type: 'CallExpression',
538 | name: token.value,
539 | params: []
540 | };
541 |
542 | // We increment `current` *again* to skip the name token.
543 | token = tokens[++current];
544 |
545 | // And now we want to loop through each token that will be the `params` of
546 | // our `CallExpression` until we encounter a closing parenthesis.
547 | //
548 | // Now this is where recursion comes in. Instead of trying to parse a
549 | // potentially infinitely nested set of nodes we're going to rely on
550 | // recursion to resolve things.
551 | //
552 | // To explain this, let's take our Lisp code. You can see that the
553 | // parameters of the `add` are a number and a nested `CallExpression` that
554 | // includes its own numbers.
555 | //
556 | // (add 2 (subtract 4 2))
557 | //
558 | // You'll also notice that in our tokens array we have multiple closing
559 | // parentheses.
560 | //
561 | // [
562 | // { type: 'paren', value: '(' },
563 | // { type: 'name', value: 'add' },
564 | // { type: 'number', value: '2' },
565 | // { type: 'paren', value: '(' },
566 | // { type: 'name', value: 'subtract' },
567 | // { type: 'number', value: '4' },
568 | // { type: 'number', value: '2' },
569 | // { type: 'paren', value: ')' }, <<< Closing parenthesis
570 | // { type: 'paren', value: ')' } <<< Closing parenthesis
571 | // ]
572 | //
573 | // We're going to rely on the nested `walk` function to increment our
574 | // `current` variable past any nested `CallExpressions`.
575 |
576 | // So we create a `while` loop that will continue until it encounters a
577 | // token with a `type` of `'paren'` and a `value` of a closing
578 | // parenthesis.
579 | while (
580 | (token.type !== 'paren') ||
581 | (token.type === 'paren' && token.value !== ')')
582 | ) {
583 | // we'll call the `walk` function which will return a `node` and we'll
584 | // push it into our `node.params`.
585 | node.params.push(walk());
586 | token = tokens[current];
587 | }
588 |
589 | // Finally we will increment `current` one last time to skip the closing
590 | // parenthesis.
591 | current++;
592 |
593 | // And return the node.
594 | return node;
595 | }
596 |
597 | // Again, if we haven't recognized the token type by now we're going to
598 | // throw an error.
599 | throw new TypeError(token.type);
600 | }
601 |
602 | // Now, we're going to create our AST which will have a root which is a
603 | // `Program` node.
604 | var ast = {
605 | type: 'Program',
606 | body: []
607 | };
608 |
609 | // And we're going to kickstart our `walk` function, pushing nodes to our
610 | // `ast.body` array.
611 | //
612 | // The reason we are doing this inside a loop is because our program can have
613 | // `CallExpressions` after one another instead of being nested.
614 | //
615 | // (add 2 2)
616 | // (subtract 4 2)
617 | //
618 | while (current < tokens.length) {
619 | ast.body.push(walk());
620 | }
621 |
622 | // At the end of our parser we'll return the AST.
623 | return ast;
624 | }
625 |
626 | /**
627 | * ============================================================================
628 | * ⌒(❀>◞౪◟<❀)⌒
629 | * THE TRAVERSER!!!
630 | * ============================================================================
631 | */
632 |
633 | /**
634 | * So now we have our AST, and we want to be able to visit different nodes with
635 | * a visitor. We need to be able to call the methods on the visitor whenever we
636 | * encounter a node with a matching type.
637 | *
638 | * traverse(ast, {
639 | * Program(node, parent) {
640 | * // ...
641 | * },
642 | *
643 | * CallExpression(node, parent) {
644 | * // ...
645 | * },
646 | *
647 | * NumberLiteral(node, parent) {
648 | * // ...
649 | * }
650 | * });
651 | */
652 |
653 | // So we define a traverser function which accepts an AST and a
654 | // visitor. Inside we're going to define two functions...
655 | function traverser(ast, visitor) {
656 |
657 | // A `traverseArray` function that will allow us to iterate over an array and
658 | // call the next function that we will define: `traverseNode`.
659 | function traverseArray(array, parent) {
660 | array.forEach(function(child) {
661 | traverseNode(child, parent);
662 | });
663 | }
664 |
665 | // `traverseNode` will accept a `node` and its `parent` node. So that it can
666 | // pass both to our visitor methods.
667 | function traverseNode(node, parent) {
668 |
669 | // We start by testing for the existence of a method on the visitor with a
670 | // matching `type`.
671 | var method = visitor[node.type];
672 |
673 | // If it exists we'll call it with the `node` and its `parent`.
674 | if (method) {
675 | method(node, parent);
676 | }
677 |
678 | // Next we are going to split things up by the current node type.
679 | switch (node.type) {
680 |
681 | // We'll start with our top level `Program`. Since Program nodes have a
682 | // property named body that has an array of nodes, we will call
683 | // `traverseArray` to traverse down into them.
684 | //
685 | // (Remember that `traverseArray` will in turn call `traverseNode` so we
686 | // are causing the tree to be traversed recursively)
687 | case 'Program':
688 | traverseArray(node.body, node);
689 | break;
690 |
691 | // Next we do the same with `CallExpressions` and traverse their `params`.
692 | case 'CallExpression':
693 | traverseArray(node.params, node);
694 | break;
695 |
696 | // In the case of `NumberLiterals` we don't have any child nodes to visit,
697 | // so we'll just break.
698 | case 'NumberLiteral':
699 | break;
700 |
701 | // And again, if we haven't recognized the node type then we'll throw an
702 | // error.
703 | default:
704 | throw new TypeError(node.type);
705 | }
706 | }
707 |
708 | // Finally we kickstart the traverser by calling `traverseNode` with our ast
709 | // with no `parent` because the top level of the AST doesn't have a parent.
710 | traverseNode(ast, null);
711 | }
712 |
713 | /**
714 | * ============================================================================
715 | * ⁽(◍˃̵͈̑ᴗ˂̵͈̑)⁽
716 | * THE TRANSFORMER!!!
717 | * ============================================================================
718 | */
719 |
720 | /**
721 | * Next up, the transformer. Our transformer is going to take the AST that we
722 | * have built and pass it to our traverser function with a visitor and will
723 | * create a new ast.
724 | *
725 | * ----------------------------------------------------------------------------
726 | * Original AST | Transformed AST
727 | * ----------------------------------------------------------------------------
728 | * { | {
729 | * type: 'Program', | type: 'Program',
730 | * body: [{ | body: [{
731 | * type: 'CallExpression', | type: 'ExpressionStatement',
732 | * name: 'add', | expression: {
733 | * params: [{ | type: 'CallExpression',
734 | * type: 'NumberLiteral', | callee: {
735 | * value: '2' | type: 'Identifier',
736 | * }, { | name: 'add'
737 | * type: 'CallExpression', | },
738 | * name: 'subtract', | arguments: [{
739 | * params: [{ | type: 'NumberLiteral',
740 | * type: 'NumberLiteral', | value: '2'
741 | * value: '4' | }, {
742 | * }, { | type: 'CallExpression',
743 | * type: 'NumberLiteral', | callee: {
744 | * value: '2' | type: 'Identifier',
745 | * }] | name: 'subtract'
746 | * }] | },
747 | * }] | arguments: [{
748 | * } | type: 'NumberLiteral',
749 | * | value: '4'
750 | * ---------------------------------- | }, {
751 | * | type: 'NumberLiteral',
752 | * | value: '2'
753 | * | }]
754 | * (sorry the other one is longer.) | }]
755 | * | }
756 | * | }]
757 | * | }
758 | * ----------------------------------------------------------------------------
759 | */
760 |
761 | // So we have our transformer function which will accept the lisp ast.
762 | function transformer(ast) {
763 |
764 | // We'll create a `newAst` which like our previous AST will have a program
765 | // node.
766 | var newAst = {
767 | type: 'Program',
768 | body: []
769 | };
770 |
771 | // Next I'm going to cheat a little and create a bit of a hack. We're going to
772 | // use a property named `context` on our parent nodes that we're going to use
773 | // to push nodes to their parents' `context`'s. Normally you would have a
774 | // better abstraction than this, but for our purposes this keeps things
775 | // simple.
776 | //
777 | // Just take note that the context is a reference *from* the old ast *to* the
778 | // new ast.
779 | ast._context = newAst.body;
780 |
781 | // We'll start by calling the traverser function with our ast and a visitor.
782 | traverser(ast, {
783 |
784 | // The first visitor method accepts `NumberLiterals`
785 | NumberLiteral: function(node, parent) {
786 | // We'll create a new node also named `NumberLiteral` that we will push to
787 | // the parent context.
788 | parent._context.push({
789 | type: 'NumberLiteral',
790 | value: node.value
791 | });
792 | },
793 |
794 | // Next up, `CallExpressions`.
795 | CallExpression: function(node, parent) {
796 |
797 | // We start creating a new node `CallExpression` with a nested
798 | // `Identifier`.
799 | var expression = {
800 | type: 'CallExpression',
801 | callee: {
802 | type: 'Identifier',
803 | name: node.name
804 | },
805 | arguments: []
806 | };
807 |
808 | // Next we're going to define a new context on the original
809 | // `CallExpression` node that will reference the `expression`'s arguments
810 | // so that we can push arguments.
811 | node._context = expression.arguments;
812 |
813 | // Then we're going to check if the parent node is a `CallExpression`.
814 | // If it is not...
815 | if (parent.type !== 'CallExpression') {
816 |
817 | // We're going to wrap our `CallExpression` node with an
818 | // `ExpressionStatement`. We do this because the top level
819 | // `CallExpressions` in JavaScript are actually statements.
820 | expression = {
821 | type: 'ExpressionStatement',
822 | expression: expression
823 | };
824 | }
825 |
826 | // Last, we push our (possibly wrapped) `CallExpression` to the `parent`'s
827 | // `context`.
828 | parent._context.push(expression);
829 | }
830 | });
831 |
832 | // At the end of our transformer function we'll return the new ast that we
833 | // just created.
834 | return newAst;
835 | }
836 |
837 | /**
838 | * ============================================================================
839 | * ヾ(〃^∇^)ノ♪
840 | * THE CODE GENERATOR!!!!
841 | * ============================================================================
842 | */
843 |
844 | /**
845 | * Now let's move on to our last phase: The Code Generator.
846 | *
847 | * Our code generator is going to recursively call itself to print each node in
848 | * the tree into one giant string.
849 | */
850 |
851 | function codeGenerator(node) {
852 |
853 | // We'll break things down by the `type` of the `node`.
854 | switch (node.type) {
855 |
856 | // If we have a `Program` node. We will map through each node in the `body`
857 | // and run them through the code generator and join them with a newline.
858 | case 'Program':
859 | return node.body.map(codeGenerator)
860 | .join('\n');
861 |
862 | // For `ExpressionStatements` we'll call the code generator on the nested
863 | // expression and we'll add a semicolon...
864 | case 'ExpressionStatement':
865 | return (
866 | codeGenerator(node.expression) +
867 | ';' // << (...because we like to code the *correct* way)
868 | );
869 |
870 | // For `CallExpressions` we will print the `callee`, add an open
871 | // parenthesis, we'll map through each node in the `arguments` array and run
872 | // them through the code generator, joining them with a comma, and then
873 | // we'll add a closing parenthesis.
874 | case 'CallExpression':
875 | return (
876 | codeGenerator(node.callee) +
877 | '(' +
878 | node.arguments.map(codeGenerator)
879 | .join(', ') +
880 | ')'
881 | );
882 |
883 | // For `Identifiers` we'll just return the `node`'s name.
884 | case 'Identifier':
885 | return node.name;
886 |
887 | // For `NumberLiterals` we'll just return the `node`'s value.
888 | case 'NumberLiteral':
889 | return node.value;
890 |
891 | // And if we haven't recognized the node, we'll throw an error.
892 | default:
893 | throw new TypeError(node.type);
894 | }
895 | }
896 |
897 | /**
898 | * ============================================================================
899 | * (۶* ‘ヮ’)۶”
900 | * !!!!!!!!THE COMPILER!!!!!!!!
901 | * ============================================================================
902 | */
903 |
904 | /**
905 | * FINALLY! We'll create our `compiler` function. Here we will link together
906 | * every part of the pipeline.
907 | *
908 | * 1. input => tokenizer => tokens
909 | * 2. tokens => parser => ast
910 | * 3. ast => transformer => newAst
911 | * 4. newAst => generator => output
912 | */
913 |
914 | function compiler(input) {
915 | var tokens = tokenizer(input);
916 | var ast = parser(tokens);
917 | var newAst = transformer(ast);
918 | var output = codeGenerator(newAst);
919 |
920 | // and simply return the output!
921 | return output;
922 | }
923 |
924 | /**
925 | * ============================================================================
926 | * (๑˃̵ᴗ˂̵)و
927 | * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!YOU MADE IT!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
928 | * ============================================================================
929 | */
930 |
931 | // Now I'm just exporting everything...
932 | module.exports = {
933 | tokenizer: tokenizer,
934 | parser: parser,
935 | transformer: transformer,
936 | codeGenerator: codeGenerator,
937 | compiler: compiler
938 | };
939 |
--------------------------------------------------------------------------------