├── .idea ├── .gitignore ├── misc.xml ├── vcs.xml ├── the-super-tiny-compiler-cn.iml └── modules.xml ├── my-super-tiny-compiler ├── TOKEN.json ├── AST-TYPE.json ├── generator.js ├── transform.js ├── parser.js └── lexer.js ├── my-super-tiny-compiler.js ├── test.json ├── README.md ├── test.js ├── optimize-version.js ├── yarn.lock ├── LICENSE ├── super-tiny-compiler-chinese.js └── super-tiny-compiler.js /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /workspace.xml 3 | -------------------------------------------------------------------------------- /my-super-tiny-compiler/TOKEN.json: -------------------------------------------------------------------------------- 1 | { 2 | "NUMBER": "number", 3 | "PAREN": "paren", 4 | "NAME": "name", 5 | "SPACE": "space", 6 | "EOF": "EOF" 7 | } -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /my-super-tiny-compiler/AST-TYPE.json: -------------------------------------------------------------------------------- 1 | { 2 | "CALL_EXPRESSION": "CallExpression", 3 | "NUMBER_LITERAL": "NumberLiteral", 4 | "PROGRAM": "Program", 5 | "STATEMENT": "ExpressionStatement", 6 | "ID": "Identifier" 7 | } -------------------------------------------------------------------------------- /.idea/the-super-tiny-compiler-cn.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /my-super-tiny-compiler.js: -------------------------------------------------------------------------------- 1 | let lexer = require("./my-super-tiny-compiler/lexer").lexer; 2 | let parser = require("./my-super-tiny-compiler/parser").parser; 3 | let transform = require("./my-super-tiny-compiler/transform").transform; 4 | let codeGenerator = require("./my-super-tiny-compiler/generator").generator; 5 | function compile(input) { 6 | try { 7 | let tokens = lexer(input); 8 | console.log(tokens); 9 | let ast = parser(tokens); 10 | console.log(JSON.stringify(ast)); 11 | let newAst = transform(ast); 12 | console.log(JSON.stringify(newAst)); 13 | console.log(codeGenerator(newAst)) 14 | } catch (e) { 15 | console.log(e) 16 | } 17 | } 18 | 19 | let input = '(add 2 (subtract (add 2 (add 3 4)) 2))'; 20 | compile(input); 21 | -------------------------------------------------------------------------------- /my-super-tiny-compiler/generator.js: -------------------------------------------------------------------------------- 1 | let AST_TYPE = require('./AST-TYPE'); 2 | 3 | function codeGenerator(node) { 4 | switch (node.type) { 5 | case AST_TYPE.PROGRAM: 6 | return node.body.map(codeGenerator).join('\n'); 7 | case AST_TYPE.STATEMENT: 8 | return (codeGenerator(node.expression) + ';'); 9 | case AST_TYPE.CALL_EXPRESSION: 10 | return (codeGenerator(node.callee) + '(' + node.arguments.map(codeGenerator).join(',') + ')'); 11 | case AST_TYPE.NUMBER_LITERAL: 12 | return node.value; 13 | case AST_TYPE.ID: 14 | return node.name; 15 | default: 16 | throw new TypeError(node.type); 17 | } 18 | } 19 | 20 | exports.generator = function (node) { 21 | //c代码生成 22 | return codeGenerator(node); 23 | }; -------------------------------------------------------------------------------- /test.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "Program", 3 | "body": [ 4 | { 5 | "type": "ExpressionStatement", 6 | "expression": { 7 | "type": "CallExpression", 8 | "callee": { 9 | "type": "Identifier", 10 | "name": "add" 11 | }, 12 | "arguments": [ 13 | { 14 | "type": "NumberLiteral", 15 | "value": "2" 16 | }, 17 | { 18 | "type": "CallExpression", 19 | "callee": { 20 | "type": "Identifier", 21 | "name": "subtract" 22 | }, 23 | "arguments": [ 24 | { 25 | "type": "NumberLiteral", 26 | "value": "4" 27 | }, 28 | { 29 | "type": "NumberLiteral", 30 | "value": "2" 31 | } 32 | ] 33 | } 34 | ] 35 | } 36 | } 37 | ] 38 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 | ***Welcome to The Super Tiny Compiler!*** 4 | 5 | 这是一个超级简单的编译器的例子，包含了现代编译器的几个主要部分，用简单易读的 JavaScript 编写。 6 | 7 | 把这个读完将会有助于你了解*大多数*编译器从前端到后端是如何工作的。 8 | 9 | ### [想直接看代码？点这里](super-tiny-compiler-chinese.js) 10 | 11 | ### 或者... [看看演讲](https://www.youtube.com/watch?v=Tar4WgAfMr4) 12 | 13 | --- 14 | 15 | ### 为啥我要关心这个？ 16 | 17 | 确实，大多数人在日常工作中没有必要了解编译器都是如何工作的。但是，编译器无处不在，你使用的很多 18 | 工具的底层原理都是从编译器那儿来的。 19 | 20 | ### 但是编译器太高大上了！ 21 | 22 | 额，确实。但这是我们（写编译器的人）的过错，我们把一些本应该很通俗易懂的事情弄得太可怕了， 23 | 让很多人都认为编译器这种东西是可望而不可即的，甚至只有最书呆子的书呆子才能理解。 24 | 25 | ### 好吧，所以我该从哪儿开始？ 26 | 27 | 太棒了！直接去看 [super-tiny-compiler-chinese.js](super-tiny-compiler-chinese.js) 这个文件吧！ 28 | 29 | ### Tests 30 | 31 | 直接运行 `node test.js` 32 | 33 | --- 34 | 35 | [![cc-by-4.0](https://licensebuttons.net/l/by/4.0/80x15.png)](http://creativecommons.org/licenses/by/4.0/) 36 | 37 | ## PS 38 | 39 | - 该项目更改自 https://github.com/starkwang/the-super-tiny-compiler-cn 40 | - 该项目里面有多个版本的 super-tiny-compiler，包含原始版本，第三方中文翻译版本，第三方优化版本以及本人修改版。 41 | - 原始版本：[super-tiny-compiler.js](super-tiny-compiler.js) (地址：https://github.com/ayqy/the-super-tiny-compiler) 42 | - 第三方翻译版：[super-tiny-compiler-chinese.js](super-tiny-compiler-chinese.js) (地址：https://github.com/starkwang/the-super-tiny-compiler-cn) 43 | - 第三方优化版：[optimize-version.js](optimize-version.js)（该版本已经找不到来源了，如果原作者看到，请告知。） 44 | - 本人修改版：[my-super-tiny-compiler.js](my-super-tiny-compiler.js) 45 | 46 | -------------------------------------------------------------------------------- /my-super-tiny-compiler/transform.js: -------------------------------------------------------------------------------- 1 | let AST_TYPE = require('./AST-TYPE'); 2 | 3 | function traverse(lispASTNode, parentNode) { 4 | //遍历一遍lispAST，并打印出来 5 | if (lispASTNode.type === AST_TYPE.NUMBER_LITERAL) { 6 | return { 7 | type: AST_TYPE.NUMBER_LITERAL, 8 | value: lispASTNode.value 9 | } 10 | } 11 | let lispArray; 12 | let cNode; 13 | let cArray; 14 | if (lispASTNode.type === AST_TYPE.PROGRAM) { 15 | lispArray = lispASTNode.body; 16 | cNode = { 17 | type: AST_TYPE.PROGRAM, 18 | body: [] 19 | }; 20 | cArray = cNode.body; 21 | } else if (lispASTNode.type === AST_TYPE.CALL_EXPRESSION) { 22 | lispArray = lispASTNode.params; 23 | cNode = { 24 | type: AST_TYPE.CALL_EXPRESSION, 25 | callee: { 26 | type: AST_TYPE.ID, 27 | name: lispASTNode.name, 28 | }, 29 | arguments: [], 30 | }; 31 | cArray = cNode.arguments; 32 | //需要判断父节点是否为函数调用 33 | if (parentNode === null || parentNode.type !== AST_TYPE.CALL_EXPRESSION) { 34 | //如果不是，则表明当前节点为顶层函数调用 35 | cNode = { 36 | type: AST_TYPE.STATEMENT, 37 | expression: cNode 38 | }; 39 | cArray = cNode.expression.arguments; 40 | } 41 | } else { 42 | throw new TypeError("Unrecognized ASTNode: " + lispASTNode); 43 | } 44 | for (let index = 0; index < lispArray.length; index++) { 45 | cArray.push(traverse(lispArray[index],lispASTNode)); 46 | } 47 | return cNode; 48 | } 49 | 50 | exports.transform = function (lispAST) { 51 | //将lisp的AST转换为c的AST 52 | return traverse(lispAST, null); 53 | }; -------------------------------------------------------------------------------- /my-super-tiny-compiler/parser.js: -------------------------------------------------------------------------------- 1 | const TOKEN = require('./TOKEN.json'); 2 | const ASTType = require('./AST-TYPE.json'); 3 | let curIndex = 0;//指向待处理的Token 4 | let tokens;//token列表 5 | let id = 0; 6 | function getNextToken() { 7 | return tokens[curIndex++]; 8 | } 9 | 10 | function lookAhead() { 11 | return tokens[curIndex]; 12 | } 13 | 14 | function matchNextToken(tokenType) { 15 | return tokens[curIndex].type === tokenType 16 | } 17 | 18 | function generateASTNode() { 19 | //生成AST节点 20 | let token = getNextToken(); 21 | //判断是否为数字 22 | if (token.type === TOKEN.NUMBER) { 23 | //token为数字 24 | return { 25 | id:id++, 26 | type: ASTType.NUMBER_LITERAL, 27 | value: token.value 28 | } 29 | } 30 | //判断是否为方法调用 31 | if (token.type === TOKEN.PAREN && token.value === '(') { 32 | //匹配左括号，继续匹配标识符 33 | token = getNextToken(); 34 | if (token.type === TOKEN.NAME) { 35 | //匹配标识符 36 | let astNode = { 37 | id:id++, 38 | type: ASTType.CALL_EXPRESSION, 39 | name: token.value, 40 | params: [] 41 | }; 42 | token = lookAhead(); 43 | //接下去使用DFS匹配列表参数，直到遇到右圆括号 44 | while (token.value !== ')') { 45 | astNode.params.push(generateASTNode()); 46 | token = lookAhead(); 47 | } 48 | getNextToken();//读取右圆括号 49 | return astNode; 50 | } 51 | 52 | } 53 | throw new TypeError("Unrecognized token: " + token); 54 | } 55 | 56 | exports.parser = function (outerTokens) { 57 | //语法分析器 58 | tokens = outerTokens; 59 | let ast = { 60 | id:id++, 61 | type: ASTType.PROGRAM, 62 | body: [] 63 | }; 64 | 65 | while (!matchNextToken(TOKEN.EOF)) { 66 | //一直读取token直到文件末尾 67 | ast.body.push(generateASTNode()); 68 | } 69 | return ast; 70 | }; -------------------------------------------------------------------------------- /my-super-tiny-compiler/lexer.js: -------------------------------------------------------------------------------- 1 | let tokens = [];//存储最终的token 2 | let curIndex = 0;//input的指针，永远指向待读取的字符 3 | const TOKEN = require('./TOKEN.json'); 4 | let input; 5 | 6 | function getNextChar() { 7 | //读取下一个字符 8 | return input[curIndex++]; 9 | } 10 | 11 | function lookAhead() { 12 | //查看下一个字符，不读取 13 | return input[curIndex]; 14 | } 15 | 16 | function testNext(char) { 17 | //测试下一个字符 18 | if (char === "(" || char === ")") { 19 | //处理括号 20 | return TOKEN.PAREN; 21 | } 22 | let number = /\d/; 23 | let space = /\s/; 24 | let name = /[A-Za-z]/; 25 | if (number.test(char)) { 26 | return TOKEN.NUMBER; 27 | } 28 | if (space.test(char)) { 29 | return TOKEN.SPACE 30 | } 31 | if (name.test(char)) { 32 | return TOKEN.NAME; 33 | } 34 | } 35 | 36 | function isNumber(char) { 37 | return testNext(char) === TOKEN.NUMBER; 38 | } 39 | 40 | function isName(char) { 41 | return testNext(char) === TOKEN.NAME; 42 | } 43 | 44 | exports.lexer = function (outerInput) { 45 | input = outerInput; 46 | //词法分析器 47 | let pushToken = (type, value) => { 48 | tokens.push({ 49 | type: type, 50 | value: value 51 | }) 52 | }; 53 | while (curIndex < input.length) { 54 | let char = getNextChar(input); 55 | switch (testNext(char)) { 56 | case TOKEN.NAME: 57 | //循环识别name 58 | let nameValue = char; 59 | while (isName(lookAhead(input))) { 60 | //下一个是name，读取 61 | nameValue += getNextChar(input) 62 | } 63 | pushToken(TOKEN.NAME, nameValue); 64 | break; 65 | case TOKEN.PAREN: 66 | pushToken(TOKEN.PAREN, char); 67 | break; 68 | case TOKEN.NUMBER: 69 | let numberValue = char; 70 | while (isNumber(lookAhead(input))) { 71 | //下一个是number，读取 72 | numberValue += getNextChar(input) 73 | } 74 | pushToken(TOKEN.NUMBER, numberValue); 75 | break; 76 | case TOKEN.SPACE: 77 | //空格不处理 78 | break; 79 | default: 80 | throw new TypeError("Unrecognized token: " + char); 81 | } 82 | 83 | } 84 | pushToken(TOKEN.EOF, TOKEN.EOF); 85 | return tokens; 86 | }; 87 | 88 | -------------------------------------------------------------------------------- /test.js: -------------------------------------------------------------------------------- 1 | var superTinyCompiler = require('./super-tiny-compiler-chinese'); 2 | // var assert = require('assert'); 3 | 4 | var tokenizer = superTinyCompiler.tokenizer; 5 | var parser = superTinyCompiler.parser; 6 | var transformer = superTinyCompiler.transformer; 7 | var codeGenerator = superTinyCompiler.codeGenerator; 8 | var compiler = superTinyCompiler.compiler; 9 | 10 | var input = '(add (subtract 4 2) 2)'; 11 | var output = 'add(2, subtract(4, 2));'; 12 | 13 | /*var tokens = [ 14 | {type: 'paren', value: '('}, 15 | {type: 'name', value: 'add'}, 16 | {type: 'number', value: '2'}, 17 | {type: 'paren', value: '('}, 18 | {type: 'name', value: 'subtract'}, 19 | {type: 'number', value: '4'}, 20 | {type: 'number', value: '2'}, 21 | {type: 'paren', value: ')'}, 22 | {type: 'paren', value: ')'} 23 | ]; 24 | 25 | var ast = { 26 | type: 'Program', 27 | body: [ 28 | { 29 | type: 'CallExpression', 30 | name: 'add', 31 | params: [ 32 | { 33 | type: 'NumberLiteral', 34 | value: '2' 35 | }, 36 | { 37 | type: 'CallExpression', 38 | name: 'subtract', 39 | params: [ 40 | { 41 | type: 'NumberLiteral', 42 | value: '4' 43 | }, 44 | { 45 | type: 'NumberLiteral', 46 | value: '2' 47 | }] 48 | }] 49 | }] 50 | }; 51 | */ 52 | var newAst = { 53 | type: 'Program', 54 | body: [ 55 | { 56 | type: 'ExpressionStatement', 57 | expression: { 58 | type: 'CallExpression', 59 | callee: { 60 | type: 'Identifier', 61 | name: 'add' 62 | }, 63 | arguments: [ 64 | { 65 | type: 'NumberLiteral', 66 | value: '2' 67 | }, 68 | { 69 | type: 'CallExpression', 70 | callee: { 71 | type: 'Identifier', 72 | name: 'subtract' 73 | }, 74 | arguments: [ 75 | { 76 | type: 'NumberLiteral', 77 | value: '4' 78 | }, { 79 | type: 'NumberLiteral', 80 | value: '2' 81 | }] 82 | }] 83 | } 84 | }] 85 | }; 86 | 87 | /*assert.deepStrictEqual(tokenizer(input), tokens, 'Tokenizer should turn `input` string into `tokens` array'); 88 | assert.deepStrictEqual(parser(tokens), ast, 'Parser should turn `tokens` array into `ast`'); 89 | assert.deepStrictEqual(transformer(ast), newAst, 'Transformer should turn `ast` into a `newAst`'); 90 | assert.deepStrictEqual(codeGenerator(newAst), output, 'Code Generator should turn `newAst` into `output` string'); 91 | assert.deepStrictEqual(compiler(input), output, 'Compiler should turn `input` into `output`');*/ 92 | let tokens = tokenizer(input); 93 | console.log(tokens); 94 | let ast = parser(tokens); 95 | console.log(ast); 96 | let newAst = transformer(ast); 97 | console.log(newAst); 98 | console.log('All Passed!'); 99 | -------------------------------------------------------------------------------- /optimize-version.js: -------------------------------------------------------------------------------- 1 | //TODO 非本人编写！！！ 2 | 3 | // 接受代码字符串input 4 | function tokenizer(input) { 5 | // 剩余待处理字符 6 | let rest = input; 7 | // 输出结果集合，存放词法单元 8 | let tokens = []; 9 | // 各词素对应的正则表达式 10 | const REGEX = { 11 | PAREN: /^\(|^\)/, 12 | WHITESPACE: /^\s+/, 13 | NUMBERS: /^\d+/, 14 | STRING: /^"([^"]+)?"/, 15 | NAME: /^[a-z]+/i 16 | }; 17 | 18 | // 遍历字符串，挑出词法单元 19 | while (rest.length > 0) { 20 | let type, value; 21 | // 匹配结果，本次匹配消费掉的串长度 22 | let matched, span; 23 | 24 | // 匹配左括号、右括号 25 | if (matched = rest.match(REGEX.PAREN)) { 26 | type = 'paren'; 27 | } 28 | // 跳过空白字符 29 | else if (matched = rest.match(REGEX.WHITESPACE)) { 30 | rest = rest.slice(matched[0].length); 31 | continue; 32 | } 33 | // 匹配数值 34 | else if (matched = rest.match(REGEX.NUMBERS)) { 35 | type = 'number'; 36 | } 37 | // 匹配形如"abc"的字符串 38 | else if (matched = rest.match(REGEX.STRING)) { 39 | type = 'string'; 40 | value = matched[1] || ''; 41 | span = matched[0].length; 42 | } 43 | // 匹配函数名，要求只含大小写字母 44 | else if (matched = rest.match(REGEX.NAME)) { 45 | type = 'name'; 46 | } 47 | // 无法识别的字符，报错 48 | else { 49 | throw new TypeError('Unexpected character: ' + rest); 50 | } 51 | 52 | value = value || matched[0]; 53 | tokens.push({type, value}); 54 | rest = rest.slice(span || matched[0].length); 55 | } 56 | 57 | return tokens; 58 | } 59 | 60 | function parser(tokens) { 61 | // 当前正在处理的token索引 62 | let current = 0; 63 | 64 | // 递归遍历（因为函数调用允许嵌套），把token转成AST节点 65 | function walk() { 66 | let token = tokens[current]; 67 | 68 | // 数值 69 | if (token.type === 'number') { 70 | current++; 71 | 72 | // 生成一个AST节点，表示数值字面量 73 | return { 74 | type: 'NumberLiteral', 75 | value: token.value, 76 | }; 77 | } 78 | 79 | // 字符串 80 | if (token.type === 'string') { 81 | current++; 82 | 83 | return { 84 | type: 'StringLiteral', 85 | value: token.value, 86 | }; 87 | } 88 | 89 | // 函数调用 90 | if ( 91 | token.type === 'paren' && 92 | token.value === '(' 93 | ) { 94 | // 丢掉左括号，取下一个token作为函数名 95 | token = tokens[++current]; 96 | 97 | let node = { 98 | type: 'CallExpression', 99 | name: token.value, 100 | params: [], 101 | }; 102 | 103 | // 看下一个token 104 | token = tokens[++current]; 105 | 106 | // 右括号之前的所有token解析完都是参数 107 | while ( 108 | (token.type !== 'paren') || 109 | (token.type === 'paren' && token.value !== ')') 110 | ) { 111 | node.params.push(walk()); 112 | token = tokens[current]; 113 | } 114 | // 吃掉右括号 115 | current++; 116 | 117 | return node; 118 | } 119 | 120 | // 无法识别的token，报错 121 | throw new TypeError(token.type); 122 | } 123 | 124 | // AST的根节点 125 | let ast = { 126 | type: 'Program', 127 | body: [], 128 | }; 129 | // 填充ast.body，允许多条语句，所以放循环里 130 | while (current < tokens.length) { 131 | ast.body.push(walk()); 132 | } 133 | 134 | return ast; 135 | } 136 | 137 | function traverser(ast, visitor) { 138 | // 遍历AST节点数组 139 | function traverseArray(array, parent) { 140 | array.forEach(child => { 141 | traverseNode(child, parent); 142 | }); 143 | } 144 | 145 | function traverseNode(node, parent) { 146 | // 从visitor取出对应的一组方法 147 | let methods = visitor[node.type]; 148 | // 通知visitor我们正在访问node 149 | if (methods && methods.enter) { 150 | methods.enter(node, parent); 151 | } 152 | 153 | switch (node.type) { 154 | // 根节点 155 | case 'Program': 156 | traverseArray(node.body, node); 157 | break; 158 | // 函数调用 159 | case 'CallExpression': 160 | traverseArray(node.params, node); 161 | break; 162 | // 数值和字符串，没孩子，不用处理 163 | case 'NumberLiteral': 164 | case 'StringLiteral': 165 | break; 166 | 167 | // 无法识别的AST节点，报错 168 | default: 169 | throw new TypeError(node.type); 170 | } 171 | 172 | // 通知visitor我们要离开node了 173 | if (methods && methods.exit) { 174 | methods.exit(node, parent); 175 | } 176 | } 177 | 178 | // 开始遍历 179 | traverseNode(ast, null); 180 | } 181 | 182 | // 输入Lisp AST，输出C AST 183 | function transformer(ast) { 184 | // 新AST的根节点 185 | let newAst = { 186 | type: 'Program', 187 | body: [], 188 | }; 189 | 190 | // 用额外的数据结构维持新旧AST的联系 191 | let stack = [newAst.body]; 192 | function peak() { 193 | return stack[stack.length - 1]; 194 | } 195 | 196 | // 创建vistor，开始遍历 197 | traverser(ast, 198 | { 199 | // 数值和字符串，直接原样插入新AST 200 | NumberLiteral: { 201 | enter(node, parent) { 202 | let newASTHost = peak(); 203 | newASTHost.push({ 204 | type: 'NumberLiteral', 205 | value: node.value, 206 | }); 207 | } 208 | }, 209 | StringLiteral: { 210 | enter(node, parent) { 211 | let newASTHost = peak(); 212 | newASTHost.push({ 213 | type: 'StringLiteral', 214 | value: node.value, 215 | }); 216 | } 217 | }, 218 | // 函数调用 219 | CallExpression: { 220 | enter(node, parent) { 221 | let newASTHost = peak(); 222 | // 创建不同的AST节点 223 | let expression = { 224 | type: 'CallExpression', 225 | callee: { 226 | type: 'Identifier', 227 | name: node.name, 228 | }, 229 | arguments: [], 230 | }; 231 | 232 | // 函数调用可以有孩子，建立节点对应关系，供子节点使用 233 | stack.push(expression.arguments); 234 | 235 | // 顶层函数调用算是语句，包装成特殊的AST节点 236 | if (parent.type !== 'CallExpression') { 237 | expression = { 238 | type: 'ExpressionStatement', 239 | expression: expression, 240 | }; 241 | } 242 | 243 | newASTHost.push(expression); 244 | }, 245 | leave(node, parent) { 246 | // 参数收集结束，回到上一层 247 | stack.pop(); 248 | } 249 | } 250 | }); 251 | 252 | return newAst; 253 | } 254 | 255 | // 递归遍历新AST，输出代码字符串 256 | function codeGenerator(node) { 257 | switch (node.type) { 258 | // 根节点，把body里的所有内容都生成一遍，按行输出 259 | case 'Program': 260 | return node.body.map(codeGenerator).join('\n'); 261 | 262 | // 表达式语句，处理其表达式内容，并添上分号 263 | case 'ExpressionStatement': 264 | return ( 265 | codeGenerator(node.expression) + ';' 266 | ); 267 | 268 | // 函数调用，添上括号，参数用逗号分隔 269 | case 'CallExpression': 270 | return ( 271 | codeGenerator(node.callee) + 272 | '(' + 273 | node.arguments.map(codeGenerator).join(', ') + 274 | ')' 275 | ); 276 | 277 | // 标识符，数值，原样输出 278 | case 'Identifier': 279 | return node.name; 280 | case 'NumberLiteral': 281 | return node.value; 282 | 283 | // 字符串，用双引号包起来再输出 284 | case 'StringLiteral': 285 | return '"' + node.value + '"'; 286 | 287 | // 无法识别的新AST节点，报错 288 | default: 289 | throw new TypeError(node.type); 290 | } 291 | } 292 | 293 | function compiler(input) { 294 | let tokens = tokenizer(input); 295 | let ast = parser(tokens); 296 | let newAst = transformer(ast); 297 | let output = codeGenerator(newAst); 298 | 299 | return output; 300 | } 301 | 302 | // test 303 | // const input = '(add 2 (subtract 4 2))'; 304 | // let output = compiler(input); 305 | // console.log(output); 306 | 307 | module.exports = { 308 | tokenizer, 309 | parser, 310 | traverser, 311 | transformer, 312 | codeGenerator, 313 | compiler, 314 | }; 315 | -------------------------------------------------------------------------------- /yarn.lock: -------------------------------------------------------------------------------- 1 | # THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. 2 | # yarn lockfile v1 3 | 4 | 5 | assert@^2.0.0: 6 | version "2.0.0" 7 | resolved "https://registry.yarnpkg.com/assert/-/assert-2.0.0.tgz#95fc1c616d48713510680f2eaf2d10dd22e02d32" 8 | integrity sha512-se5Cd+js9dXJnu6Ag2JFc00t+HmHOen+8Q+L7O9zI0PqQXr20uk2J0XQqMxZEeo5U50o8Nvmmx7dZrl+Ufr35A== 9 | dependencies: 10 | es6-object-assign "^1.1.0" 11 | is-nan "^1.2.1" 12 | object-is "^1.0.1" 13 | util "^0.12.0" 14 | 15 | define-properties@^1.1.2, define-properties@^1.1.3: 16 | version "1.1.3" 17 | resolved "https://registry.yarnpkg.com/define-properties/-/define-properties-1.1.3.tgz#cf88da6cbee26fe6db7094f61d870cbd84cee9f1" 18 | integrity sha512-3MqfYKj2lLzdMSf8ZIZE/V+Zuy+BgD6f164e8K2w7dgnpKArBDerGYpM46IYYcjnkdPNMjPk9A6VFB8+3SKlXQ== 19 | dependencies: 20 | object-keys "^1.0.12" 21 | 22 | es-abstract@^1.17.0-next.1: 23 | version "1.17.4" 24 | resolved "https://registry.yarnpkg.com/es-abstract/-/es-abstract-1.17.4.tgz#e3aedf19706b20e7c2594c35fc0d57605a79e184" 25 | integrity sha512-Ae3um/gb8F0mui/jPL+QiqmglkUsaQf7FwBEHYIFkztkneosu9imhqHpBzQ3h1vit8t5iQ74t6PEVvphBZiuiQ== 26 | dependencies: 27 | es-to-primitive "^1.2.1" 28 | function-bind "^1.1.1" 29 | has "^1.0.3" 30 | has-symbols "^1.0.1" 31 | is-callable "^1.1.5" 32 | is-regex "^1.0.5" 33 | object-inspect "^1.7.0" 34 | object-keys "^1.1.1" 35 | object.assign "^4.1.0" 36 | string.prototype.trimleft "^2.1.1" 37 | string.prototype.trimright "^2.1.1" 38 | 39 | es-to-primitive@^1.2.1: 40 | version "1.2.1" 41 | resolved "https://registry.yarnpkg.com/es-to-primitive/-/es-to-primitive-1.2.1.tgz#e55cd4c9cdc188bcefb03b366c736323fc5c898a" 42 | integrity sha512-QCOllgZJtaUo9miYBcLChTUaHNjJF3PYs1VidD7AwiEj1kYxKeQTctLAezAOH5ZKRH0g2IgPn6KwB4IT8iRpvA== 43 | dependencies: 44 | is-callable "^1.1.4" 45 | is-date-object "^1.0.1" 46 | is-symbol "^1.0.2" 47 | 48 | es6-object-assign@^1.1.0: 49 | version "1.1.0" 50 | resolved "https://registry.yarnpkg.com/es6-object-assign/-/es6-object-assign-1.1.0.tgz#c2c3582656247c39ea107cb1e6652b6f9f24523c" 51 | integrity sha1-wsNYJlYkfDnqEHyx5mUrb58kUjw= 52 | 53 | fs@^0.0.1-security: 54 | version "0.0.1-security" 55 | resolved "https://registry.yarnpkg.com/fs/-/fs-0.0.1-security.tgz#8a7bd37186b6dddf3813f23858b57ecaaf5e41d4" 56 | integrity sha1-invTcYa23d84E/I4WLV+yq9eQdQ= 57 | 58 | function-bind@^1.1.1: 59 | version "1.1.1" 60 | resolved "https://registry.yarnpkg.com/function-bind/-/function-bind-1.1.1.tgz#a56899d3ea3c9bab874bb9773b7c5ede92f4895d" 61 | integrity sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A== 62 | 63 | has-symbols@^1.0.0, has-symbols@^1.0.1: 64 | version "1.0.1" 65 | resolved "https://registry.yarnpkg.com/has-symbols/-/has-symbols-1.0.1.tgz#9f5214758a44196c406d9bd76cebf81ec2dd31e8" 66 | integrity sha512-PLcsoqu++dmEIZB+6totNFKq/7Do+Z0u4oT0zKOJNl3lYK6vGwwu2hjHs+68OEZbTjiUE9bgOABXbP/GvrS0Kg== 67 | 68 | has@^1.0.3: 69 | version "1.0.3" 70 | resolved "https://registry.yarnpkg.com/has/-/has-1.0.3.tgz#722d7cbfc1f6aa8241f16dd814e011e1f41e8796" 71 | integrity sha512-f2dvO0VU6Oej7RkWJGrehjbzMAjFp5/VKPp5tTpWIV4JHHZK1/BxbFRtf/siA2SWTe09caDmVtYYzWEIbBS4zw== 72 | dependencies: 73 | function-bind "^1.1.1" 74 | 75 | inherits@^2.0.3: 76 | version "2.0.4" 77 | resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c" 78 | integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ== 79 | 80 | is-arguments@^1.0.4: 81 | version "1.0.4" 82 | resolved "https://registry.yarnpkg.com/is-arguments/-/is-arguments-1.0.4.tgz#3faf966c7cba0ff437fb31f6250082fcf0448cf3" 83 | integrity sha512-xPh0Rmt8NE65sNzvyUmWgI1tz3mKq74lGA0mL8LYZcoIzKOzDh6HmrYm3d18k60nHerC8A9Km8kYu87zfSFnLA== 84 | 85 | is-callable@^1.1.4, is-callable@^1.1.5: 86 | version "1.1.5" 87 | resolved "https://registry.yarnpkg.com/is-callable/-/is-callable-1.1.5.tgz#f7e46b596890456db74e7f6e976cb3273d06faab" 88 | integrity sha512-ESKv5sMCJB2jnHTWZ3O5itG+O128Hsus4K4Qh1h2/cgn2vbgnLSVqfV46AeJA9D5EeeLa9w81KUXMtn34zhX+Q== 89 | 90 | is-date-object@^1.0.1: 91 | version "1.0.2" 92 | resolved "https://registry.yarnpkg.com/is-date-object/-/is-date-object-1.0.2.tgz#bda736f2cd8fd06d32844e7743bfa7494c3bfd7e" 93 | integrity sha512-USlDT524woQ08aoZFzh3/Z6ch9Y/EWXEHQ/AaRN0SkKq4t2Jw2R2339tSXmwuVoY7LLlBCbOIlx2myP/L5zk0g== 94 | 95 | is-generator-function@^1.0.7: 96 | version "1.0.7" 97 | resolved "https://registry.yarnpkg.com/is-generator-function/-/is-generator-function-1.0.7.tgz#d2132e529bb0000a7f80794d4bdf5cd5e5813522" 98 | integrity sha512-YZc5EwyO4f2kWCax7oegfuSr9mFz1ZvieNYBEjmukLxgXfBUbxAWGVF7GZf0zidYtoBl3WvC07YK0wT76a+Rtw== 99 | 100 | is-nan@^1.2.1: 101 | version "1.3.0" 102 | resolved "https://registry.yarnpkg.com/is-nan/-/is-nan-1.3.0.tgz#85d1f5482f7051c2019f5673ccebdb06f3b0db03" 103 | integrity sha512-z7bbREymOqt2CCaZVly8aC4ML3Xhfi0ekuOnjO2L8vKdl+CttdVoGZQhd4adMFAsxQ5VeRVwORs4tU8RH+HFtQ== 104 | dependencies: 105 | define-properties "^1.1.3" 106 | 107 | is-regex@^1.0.5: 108 | version "1.0.5" 109 | resolved "https://registry.yarnpkg.com/is-regex/-/is-regex-1.0.5.tgz#39d589a358bf18967f726967120b8fc1aed74eae" 110 | integrity sha512-vlKW17SNq44owv5AQR3Cq0bQPEb8+kF3UKZ2fiZNOWtztYE5i0CzCZxFDwO58qAOWtxdBRVO/V5Qin1wjCqFYQ== 111 | dependencies: 112 | has "^1.0.3" 113 | 114 | is-symbol@^1.0.2: 115 | version "1.0.3" 116 | resolved "https://registry.yarnpkg.com/is-symbol/-/is-symbol-1.0.3.tgz#38e1014b9e6329be0de9d24a414fd7441ec61937" 117 | integrity sha512-OwijhaRSgqvhm/0ZdAcXNZt9lYdKFpcRDT5ULUuYXPoT794UNOdU+gpT6Rzo7b4V2HUl/op6GqY894AZwv9faQ== 118 | dependencies: 119 | has-symbols "^1.0.1" 120 | 121 | lodash@^4.17.15: 122 | version "4.17.15" 123 | resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.15.tgz#b447f6670a0455bbfeedd11392eff330ea097548" 124 | integrity sha512-8xOcRHvCjnocdS5cpwXQXVzmmh5e5+saE2QGoeQmbKmRS6J3VQppPOIt0MnmE+4xlZoumy0GPG0D0MVIQbNA1A== 125 | 126 | object-inspect@^1.7.0: 127 | version "1.7.0" 128 | resolved "https://registry.yarnpkg.com/object-inspect/-/object-inspect-1.7.0.tgz#f4f6bd181ad77f006b5ece60bd0b6f398ff74a67" 129 | integrity sha512-a7pEHdh1xKIAgTySUGgLMx/xwDZskN1Ud6egYYN3EdRW4ZMPNEDUTF+hwy2LUC+Bl+SyLXANnwz/jyh/qutKUw== 130 | 131 | object-is@^1.0.1: 132 | version "1.0.2" 133 | resolved "https://registry.yarnpkg.com/object-is/-/object-is-1.0.2.tgz#6b80eb84fe451498f65007982f035a5b445edec4" 134 | integrity sha512-Epah+btZd5wrrfjkJZq1AOB9O6OxUQto45hzFd7lXGrpHPGE0W1k+426yrZV+k6NJOzLNNW/nVsmZdIWsAqoOQ== 135 | 136 | object-keys@^1.0.11, object-keys@^1.0.12, object-keys@^1.1.1: 137 | version "1.1.1" 138 | resolved "https://registry.yarnpkg.com/object-keys/-/object-keys-1.1.1.tgz#1c47f272df277f3b1daf061677d9c82e2322c60e" 139 | integrity sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA== 140 | 141 | object.assign@^4.1.0: 142 | version "4.1.0" 143 | resolved "https://registry.yarnpkg.com/object.assign/-/object.assign-4.1.0.tgz#968bf1100d7956bb3ca086f006f846b3bc4008da" 144 | integrity sha512-exHJeq6kBKj58mqGyTQ9DFvrZC/eR6OwxzoM9YRoGBqrXYonaFyGiFMuc9VZrXf7DarreEwMpurG3dd+CNyW5w== 145 | dependencies: 146 | define-properties "^1.1.2" 147 | function-bind "^1.1.1" 148 | has-symbols "^1.0.0" 149 | object-keys "^1.0.11" 150 | 151 | object.entries@^1.1.0: 152 | version "1.1.1" 153 | resolved "https://registry.yarnpkg.com/object.entries/-/object.entries-1.1.1.tgz#ee1cf04153de02bb093fec33683900f57ce5399b" 154 | integrity sha512-ilqR7BgdyZetJutmDPfXCDffGa0/Yzl2ivVNpbx/g4UeWrCdRnFDUBrKJGLhGieRHDATnyZXWBeCb29k9CJysQ== 155 | dependencies: 156 | define-properties "^1.1.3" 157 | es-abstract "^1.17.0-next.1" 158 | function-bind "^1.1.1" 159 | has "^1.0.3" 160 | 161 | safe-buffer@^5.1.2: 162 | version "5.2.0" 163 | resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.0.tgz#b74daec49b1148f88c64b68d49b1e815c1f2f519" 164 | integrity sha512-fZEwUGbVl7kouZs1jCdMLdt95hdIv0ZeHg6L7qPeciMZhZ+/gdesW4wgTARkrFWEpspjEATAzUGPG8N2jJiwbg== 165 | 166 | string.prototype.trimleft@^2.1.1: 167 | version "2.1.1" 168 | resolved "https://registry.yarnpkg.com/string.prototype.trimleft/-/string.prototype.trimleft-2.1.1.tgz#9bdb8ac6abd6d602b17a4ed321870d2f8dcefc74" 169 | integrity sha512-iu2AGd3PuP5Rp7x2kEZCrB2Nf41ehzh+goo8TV7z8/XDBbsvc6HQIlUl9RjkZ4oyrW1XM5UwlGl1oVEaDjg6Ag== 170 | dependencies: 171 | define-properties "^1.1.3" 172 | function-bind "^1.1.1" 173 | 174 | string.prototype.trimright@^2.1.1: 175 | version "2.1.1" 176 | resolved "https://registry.yarnpkg.com/string.prototype.trimright/-/string.prototype.trimright-2.1.1.tgz#440314b15996c866ce8a0341894d45186200c5d9" 177 | integrity sha512-qFvWL3/+QIgZXVmJBfpHmxLB7xsUXz6HsUmP8+5dRaC3Q7oKUv9Vo6aMCRZC1smrtyECFsIT30PqBJ1gTjAs+g== 178 | dependencies: 179 | define-properties "^1.1.3" 180 | function-bind "^1.1.1" 181 | 182 | util@^0.12.0: 183 | version "0.12.1" 184 | resolved "https://registry.yarnpkg.com/util/-/util-0.12.1.tgz#f908e7b633e7396c764e694dd14e716256ce8ade" 185 | integrity sha512-MREAtYOp+GTt9/+kwf00IYoHZyjM8VU4aVrkzUlejyqaIjd2GztVl5V9hGXKlvBKE3gENn/FMfHE5v6hElXGcQ== 186 | dependencies: 187 | inherits "^2.0.3" 188 | is-arguments "^1.0.4" 189 | is-generator-function "^1.0.7" 190 | object.entries "^1.1.0" 191 | safe-buffer "^5.1.2" 192 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Creative Commons Attribution 4.0 International 2 | 3 | ======================================================================= 4 | 5 | Creative Commons Corporation ("Creative Commons") is not a law firm and 6 | does not provide legal services or legal advice. Distribution of 7 | Creative Commons public licenses does not create a lawyer-client or 8 | other relationship. Creative Commons makes its licenses and related 9 | information available on an "as-is" basis. Creative Commons gives no 10 | warranties regarding its licenses, any material licensed under their 11 | terms and conditions, or any related information. Creative Commons 12 | disclaims all liability for damages resulting from their use to the 13 | fullest extent possible. 14 | 15 | Using Creative Commons Public Licenses 16 | 17 | Creative Commons public licenses provide a standard set of terms and 18 | conditions that creators and other rights holders may use to share 19 | original works of authorship and other material subject to copyright 20 | and certain other rights specified in the public license below. The 21 | following considerations are for informational purposes only, are not 22 | exhaustive, and do not form part of our licenses. 23 | 24 | Considerations for licensors: Our public licenses are 25 | intended for use by those authorized to give the public 26 | permission to use material in ways otherwise restricted by 27 | copyright and certain other rights. Our licenses are 28 | irrevocable. Licensors should read and understand the terms 29 | and conditions of the license they choose before applying it. 30 | Licensors should also secure all rights necessary before 31 | applying our licenses so that the public can reuse the 32 | material as expected. Licensors should clearly mark any 33 | material not subject to the license. This includes other CC- 34 | licensed material, or material used under an exception or 35 | limitation to copyright. More considerations for licensors: 36 | wiki.creativecommons.org/Considerations_for_licensors 37 | 38 | Considerations for the public: By using one of our public 39 | licenses, a licensor grants the public permission to use the 40 | licensed material under specified terms and conditions. If 41 | the licensor's permission is not necessary for any reason--for 42 | example, because of any applicable exception or limitation to 43 | copyright--then that use is not regulated by the license. Our 44 | licenses grant only permissions under copyright and certain 45 | other rights that a licensor has authority to grant. Use of 46 | the licensed material may still be restricted for other 47 | reasons, including because others have copyright or other 48 | rights in the material. A licensor may make special requests, 49 | such as asking that all changes be marked or described. 50 | Although not required by our licenses, you are encouraged to 51 | respect those requests where reasonable. More_considerations 52 | for the public: 53 | wiki.creativecommons.org/Considerations_for_licensees 54 | 55 | ======================================================================= 56 | 57 | Creative Commons Attribution 4.0 International Public License 58 | 59 | By exercising the Licensed Rights (defined below), You accept and agree 60 | to be bound by the terms and conditions of this Creative Commons 61 | Attribution 4.0 International Public License ("Public License"). To the 62 | extent this Public License may be interpreted as a contract, You are 63 | granted the Licensed Rights in consideration of Your acceptance of 64 | these terms and conditions, and the Licensor grants You such rights in 65 | consideration of benefits the Licensor receives from making the 66 | Licensed Material available under these terms and conditions. 67 | 68 | 69 | Section 1 -- Definitions. 70 | 71 | a. Adapted Material means material subject to Copyright and Similar 72 | Rights that is derived from or based upon the Licensed Material 73 | and in which the Licensed Material is translated, altered, 74 | arranged, transformed, or otherwise modified in a manner requiring 75 | permission under the Copyright and Similar Rights held by the 76 | Licensor. For purposes of this Public License, where the Licensed 77 | Material is a musical work, performance, or sound recording, 78 | Adapted Material is always produced where the Licensed Material is 79 | synched in timed relation with a moving image. 80 | 81 | b. Adapter's License means the license You apply to Your Copyright 82 | and Similar Rights in Your contributions to Adapted Material in 83 | accordance with the terms and conditions of this Public License. 84 | 85 | c. Copyright and Similar Rights means copyright and/or similar rights 86 | closely related to copyright including, without limitation, 87 | performance, broadcast, sound recording, and Sui Generis Database 88 | Rights, without regard to how the rights are labeled or 89 | categorized. For purposes of this Public License, the rights 90 | specified in Section 2(b)(1)-(2) are not Copyright and Similar 91 | Rights. 92 | 93 | d. Effective Technological Measures means those measures that, in the 94 | absence of proper authority, may not be circumvented under laws 95 | fulfilling obligations under Article 11 of the WIPO Copyright 96 | Treaty adopted on December 20, 1996, and/or similar international 97 | agreements. 98 | 99 | e. Exceptions and Limitations means fair use, fair dealing, and/or 100 | any other exception or limitation to Copyright and Similar Rights 101 | that applies to Your use of the Licensed Material. 102 | 103 | f. Licensed Material means the artistic or literary work, database, 104 | or other material to which the Licensor applied this Public 105 | License. 106 | 107 | g. Licensed Rights means the rights granted to You subject to the 108 | terms and conditions of this Public License, which are limited to 109 | all Copyright and Similar Rights that apply to Your use of the 110 | Licensed Material and that the Licensor has authority to license. 111 | 112 | h. Licensor means the individual(s) or entity(ies) granting rights 113 | under this Public License. 114 | 115 | i. Share means to provide material to the public by any means or 116 | process that requires permission under the Licensed Rights, such 117 | as reproduction, public display, public performance, distribution, 118 | dissemination, communication, or importation, and to make material 119 | available to the public including in ways that members of the 120 | public may access the material from a place and at a time 121 | individually chosen by them. 122 | 123 | j. Sui Generis Database Rights means rights other than copyright 124 | resulting from Directive 96/9/EC of the European Parliament and of 125 | the Council of 11 March 1996 on the legal protection of databases, 126 | as amended and/or succeeded, as well as other essentially 127 | equivalent rights anywhere in the world. 128 | 129 | k. You means the individual or entity exercising the Licensed Rights 130 | under this Public License. Your has a corresponding meaning. 131 | 132 | 133 | Section 2 -- Scope. 134 | 135 | a. License grant. 136 | 137 | 1. Subject to the terms and conditions of this Public License, 138 | the Licensor hereby grants You a worldwide, royalty-free, 139 | non-sublicensable, non-exclusive, irrevocable license to 140 | exercise the Licensed Rights in the Licensed Material to: 141 | 142 | a. reproduce and Share the Licensed Material, in whole or 143 | in part; and 144 | 145 | b. produce, reproduce, and Share Adapted Material. 146 | 147 | 2. Exceptions and Limitations. For the avoidance of doubt, where 148 | Exceptions and Limitations apply to Your use, this Public 149 | License does not apply, and You do not need to comply with 150 | its terms and conditions. 151 | 152 | 3. Term. The term of this Public License is specified in Section 153 | 6(a). 154 | 155 | 4. Media and formats; technical modifications allowed. The 156 | Licensor authorizes You to exercise the Licensed Rights in 157 | all media and formats whether now known or hereafter created, 158 | and to make technical modifications necessary to do so. The 159 | Licensor waives and/or agrees not to assert any right or 160 | authority to forbid You from making technical modifications 161 | necessary to exercise the Licensed Rights, including 162 | technical modifications necessary to circumvent Effective 163 | Technological Measures. For purposes of this Public License, 164 | simply making modifications authorized by this Section 2(a) 165 | (4) never produces Adapted Material. 166 | 167 | 5. Downstream recipients. 168 | 169 | a. Offer from the Licensor -- Licensed Material. Every 170 | recipient of the Licensed Material automatically 171 | receives an offer from the Licensor to exercise the 172 | Licensed Rights under the terms and conditions of this 173 | Public License. 174 | 175 | b. No downstream restrictions. You may not offer or impose 176 | any additional or different terms or conditions on, or 177 | apply any Effective Technological Measures to, the 178 | Licensed Material if doing so restricts exercise of the 179 | Licensed Rights by any recipient of the Licensed 180 | Material. 181 | 182 | 6. No endorsement. Nothing in this Public License constitutes or 183 | may be construed as permission to assert or imply that You 184 | are, or that Your use of the Licensed Material is, connected 185 | with, or sponsored, endorsed, or granted official status by, 186 | the Licensor or others designated to receive attribution as 187 | provided in Section 3(a)(1)(A)(i). 188 | 189 | b. Other rights. 190 | 191 | 1. Moral rights, such as the right of integrity, are not 192 | licensed under this Public License, nor are publicity, 193 | privacy, and/or other similar personality rights; however, to 194 | the extent possible, the Licensor waives and/or agrees not to 195 | assert any such rights held by the Licensor to the limited 196 | extent necessary to allow You to exercise the Licensed 197 | Rights, but not otherwise. 198 | 199 | 2. Patent and trademark rights are not licensed under this 200 | Public License. 201 | 202 | 3. To the extent possible, the Licensor waives any right to 203 | collect royalties from You for the exercise of the Licensed 204 | Rights, whether directly or through a collecting society 205 | under any voluntary or waivable statutory or compulsory 206 | licensing scheme. In all other cases the Licensor expressly 207 | reserves any right to collect such royalties. 208 | 209 | 210 | Section 3 -- License Conditions. 211 | 212 | Your exercise of the Licensed Rights is expressly made subject to the 213 | following conditions. 214 | 215 | a. Attribution. 216 | 217 | 1. If You Share the Licensed Material (including in modified 218 | form), You must: 219 | 220 | a. retain the following if it is supplied by the Licensor 221 | with the Licensed Material: 222 | 223 | i. identification of the creator(s) of the Licensed 224 | Material and any others designated to receive 225 | attribution, in any reasonable manner requested by 226 | the Licensor (including by pseudonym if 227 | designated); 228 | 229 | ii. a copyright notice; 230 | 231 | iii. a notice that refers to this Public License; 232 | 233 | iv. a notice that refers to the disclaimer of 234 | warranties; 235 | 236 | v. a URI or hyperlink to the Licensed Material to the 237 | extent reasonably practicable; 238 | 239 | b. indicate if You modified the Licensed Material and 240 | retain an indication of any previous modifications; and 241 | 242 | c. indicate the Licensed Material is licensed under this 243 | Public License, and include the text of, or the URI or 244 | hyperlink to, this Public License. 245 | 246 | 2. You may satisfy the conditions in Section 3(a)(1) in any 247 | reasonable manner based on the medium, means, and context in 248 | which You Share the Licensed Material. For example, it may be 249 | reasonable to satisfy the conditions by providing a URI or 250 | hyperlink to a resource that includes the required 251 | information. 252 | 253 | 3. If requested by the Licensor, You must remove any of the 254 | information required by Section 3(a)(1)(A) to the extent 255 | reasonably practicable. 256 | 257 | 4. If You Share Adapted Material You produce, the Adapter's 258 | License You apply must not prevent recipients of the Adapted 259 | Material from complying with this Public License. 260 | 261 | 262 | Section 4 -- Sui Generis Database Rights. 263 | 264 | Where the Licensed Rights include Sui Generis Database Rights that 265 | apply to Your use of the Licensed Material: 266 | 267 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right 268 | to extract, reuse, reproduce, and Share all or a substantial 269 | portion of the contents of the database; 270 | 271 | b. if You include all or a substantial portion of the database 272 | contents in a database in which You have Sui Generis Database 273 | Rights, then the database in which You have Sui Generis Database 274 | Rights (but not its individual contents) is Adapted Material; and 275 | 276 | c. You must comply with the conditions in Section 3(a) if You Share 277 | all or a substantial portion of the contents of the database. 278 | 279 | For the avoidance of doubt, this Section 4 supplements and does not 280 | replace Your obligations under this Public License where the Licensed 281 | Rights include other Copyright and Similar Rights. 282 | 283 | 284 | Section 5 -- Disclaimer of Warranties and Limitation of Liability. 285 | 286 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE 287 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS 288 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF 289 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, 290 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, 291 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR 292 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, 293 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT 294 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT 295 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. 296 | 297 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE 298 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, 299 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, 300 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, 301 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR 302 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN 303 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR 304 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR 305 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. 306 | 307 | c. The disclaimer of warranties and limitation of liability provided 308 | above shall be interpreted in a manner that, to the extent 309 | possible, most closely approximates an absolute disclaimer and 310 | waiver of all liability. 311 | 312 | 313 | Section 6 -- Term and Termination. 314 | 315 | a. This Public License applies for the term of the Copyright and 316 | Similar Rights licensed here. However, if You fail to comply with 317 | this Public License, then Your rights under this Public License 318 | terminate automatically. 319 | 320 | b. Where Your right to use the Licensed Material has terminated under 321 | Section 6(a), it reinstates: 322 | 323 | 1. automatically as of the date the violation is cured, provided 324 | it is cured within 30 days of Your discovery of the 325 | violation; or 326 | 327 | 2. upon express reinstatement by the Licensor. 328 | 329 | For the avoidance of doubt, this Section 6(b) does not affect any 330 | right the Licensor may have to seek remedies for Your violations 331 | of this Public License. 332 | 333 | c. For the avoidance of doubt, the Licensor may also offer the 334 | Licensed Material under separate terms or conditions or stop 335 | distributing the Licensed Material at any time; however, doing so 336 | will not terminate this Public License. 337 | 338 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public 339 | License. 340 | 341 | 342 | Section 7 -- Other Terms and Conditions. 343 | 344 | a. The Licensor shall not be bound by any additional or different 345 | terms or conditions communicated by You unless expressly agreed. 346 | 347 | b. Any arrangements, understandings, or agreements regarding the 348 | Licensed Material not stated herein are separate from and 349 | independent of the terms and conditions of this Public License. 350 | 351 | 352 | Section 8 -- Interpretation. 353 | 354 | a. For the avoidance of doubt, this Public License does not, and 355 | shall not be interpreted to, reduce, limit, restrict, or impose 356 | conditions on any use of the Licensed Material that could lawfully 357 | be made without permission under this Public License. 358 | 359 | b. To the extent possible, if any provision of this Public License is 360 | deemed unenforceable, it shall be automatically reformed to the 361 | minimum extent necessary to make it enforceable. If the provision 362 | cannot be reformed, it shall be severed from this Public License 363 | without affecting the enforceability of the remaining terms and 364 | conditions. 365 | 366 | c. No term or condition of this Public License will be waived and no 367 | failure to comply consented to unless expressly agreed to by the 368 | Licensor. 369 | 370 | d. Nothing in this Public License constitutes or may be interpreted 371 | as a limitation upon, or waiver of, any privileges and immunities 372 | that apply to the Licensor or You, including from the legal 373 | processes of any jurisdiction or authority. 374 | 375 | 376 | ======================================================================= 377 | 378 | Creative Commons is not a party to its public licenses. 379 | Notwithstanding, Creative Commons may elect to apply one of its public 380 | licenses to material it publishes and in those instances will be 381 | considered the "Licensor." Except for the limited purpose of indicating 382 | that material is shared under a Creative Commons public license or as 383 | otherwise permitted by the Creative Commons policies published at 384 | creativecommons.org/policies, Creative Commons does not authorize the 385 | use of the trademark "Creative Commons" or any other trademark or logo 386 | of Creative Commons without its prior written consent including, 387 | without limitation, in connection with any unauthorized modifications 388 | to any of its public licenses or any other arrangements, 389 | understandings, or agreements concerning use of licensed material. For 390 | the avoidance of doubt, this paragraph does not form part of the public 391 | licenses. 392 | 393 | Creative Commons may be contacted at creativecommons.org. 394 | -------------------------------------------------------------------------------- /super-tiny-compiler-chinese.js: -------------------------------------------------------------------------------- 1 | /** 2 | * TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHH HHHHHHHHHEEEEEEEEEEEEEEEEEEEEEE 3 | * T:::::::::::::::::::::TH:::::::H H:::::::HE::::::::::::::::::::E 4 | * T:::::::::::::::::::::TH:::::::H H:::::::HE::::::::::::::::::::E 5 | * T:::::TT:::::::TT:::::THH::::::H H::::::HHEE::::::EEEEEEEEE::::E 6 | * TTTTTT T:::::T TTTTTT H:::::H H:::::H E:::::E EEEEEE 7 | * T:::::T H:::::H H:::::H E:::::E 8 | * T:::::T H::::::HHHHH::::::H E::::::EEEEEEEEEE 9 | * T:::::T H:::::::::::::::::H E:::::::::::::::E 10 | * T:::::T H:::::::::::::::::H E:::::::::::::::E 11 | * T:::::T H::::::HHHHH::::::H E::::::EEEEEEEEEE 12 | * T:::::T H:::::H H:::::H E:::::E 13 | * T:::::T H:::::H H:::::H E:::::E EEEEEE 14 | * TT:::::::TT HH::::::H H::::::HHEE::::::EEEEEEEE:::::E 15 | * T:::::::::T H:::::::H H:::::::HE::::::::::::::::::::E 16 | * T:::::::::T H:::::::H H:::::::HE::::::::::::::::::::E 17 | * TTTTTTTTTTT HHHHHHHHH HHHHHHHHHEEEEEEEEEEEEEEEEEEEEEE 18 | * 19 | * SSSSSSSSSSSSSSS UUUUUUUU UUUUUUUUPPPPPPPPPPPPPPPPP EEEEEEEEEEEEEEEEEEEEEERRRRRRRRRRRRRRRRR 20 | * SS:::::::::::::::SU::::::U U::::::UP::::::::::::::::P E::::::::::::::::::::ER::::::::::::::::R 21 | * S:::::SSSSSS::::::SU::::::U U::::::UP::::::PPPPPP:::::P E::::::::::::::::::::ER::::::RRRRRR:::::R 22 | * S:::::S SSSSSSSUU:::::U U:::::UUPP:::::P P:::::PEE::::::EEEEEEEEE::::ERR:::::R R:::::R 23 | * S:::::S U:::::U U:::::U P::::P P:::::P E:::::E EEEEEE R::::R R:::::R 24 | * S:::::S U:::::U U:::::U P::::P P:::::P E:::::E R::::R R:::::R 25 | * S::::SSSS U:::::U U:::::U P::::PPPPPP:::::P E::::::EEEEEEEEEE R::::RRRRRR:::::R 26 | * SS::::::SSSSS U:::::U U:::::U P:::::::::::::PP E:::::::::::::::E R:::::::::::::RR 27 | * SSS::::::::SS U:::::U U:::::U P::::PPPPPPPPP E:::::::::::::::E R::::RRRRRR:::::R 28 | * SSSSSS::::S U:::::U U:::::U P::::P E::::::EEEEEEEEEE R::::R R:::::R 29 | * S:::::S U:::::U U:::::U P::::P E:::::E R::::R R:::::R 30 | * S:::::S U::::::U U::::::U P::::P E:::::E EEEEEE R::::R R:::::R 31 | * SSSSSSS S:::::S U:::::::UUU:::::::U PP::::::PP EE::::::EEEEEEEE:::::ERR:::::R R:::::R 32 | * S::::::SSSSSS:::::S UU:::::::::::::UU P::::::::P E::::::::::::::::::::ER::::::R R:::::R 33 | * S:::::::::::::::SS UU:::::::::UU P::::::::P E::::::::::::::::::::ER::::::R R:::::R 34 | * SSSSSSSSSSSSSSS UUUUUUUUU PPPPPPPPPP EEEEEEEEEEEEEEEEEEEEEERRRRRRRR RRRRRRR 35 | * 36 | * TTTTTTTTTTTTTTTTTTTTTTTIIIIIIIIIINNNNNNNN NNNNNNNNYYYYYYY YYYYYYY 37 | * T:::::::::::::::::::::TI::::::::IN:::::::N N::::::NY:::::Y Y:::::Y 38 | * T:::::::::::::::::::::TI::::::::IN::::::::N N::::::NY:::::Y Y:::::Y 39 | * T:::::TT:::::::TT:::::TII::::::IIN:::::::::N N::::::NY::::::Y Y::::::Y 40 | * TTTTTT T:::::T TTTTTT I::::I N::::::::::N N::::::NYYY:::::Y Y:::::YYY 41 | * T:::::T I::::I N:::::::::::N N::::::N Y:::::Y Y:::::Y 42 | * T:::::T I::::I N:::::::N::::N N::::::N Y:::::Y:::::Y 43 | * T:::::T I::::I N::::::N N::::N N::::::N Y:::::::::Y 44 | * T:::::T I::::I N::::::N N::::N:::::::N Y:::::::Y 45 | * T:::::T I::::I N::::::N N:::::::::::N Y:::::Y 46 | * T:::::T I::::I N::::::N N::::::::::N Y:::::Y 47 | * T:::::T I::::I N::::::N N:::::::::N Y:::::Y 48 | * TT:::::::TT II::::::IIN::::::N N::::::::N Y:::::Y 49 | * T:::::::::T I::::::::IN::::::N N:::::::N YYYY:::::YYYY 50 | * T:::::::::T I::::::::IN::::::N N::::::N Y:::::::::::Y 51 | * TTTTTTTTTTT IIIIIIIIIINNNNNNNN NNNNNNN YYYYYYYYYYYYY 52 | * 53 | * CCCCCCCCCCCCC OOOOOOOOO MMMMMMMM MMMMMMMMPPPPPPPPPPPPPPPPP IIIIIIIIIILLLLLLLLLLL EEEEEEEEEEEEEEEEEEEEEERRRRRRRRRRRRRRRRR 54 | * CCC::::::::::::C OO:::::::::OO M:::::::M M:::::::MP::::::::::::::::P I::::::::IL:::::::::L E::::::::::::::::::::ER::::::::::::::::R 55 | * CC:::::::::::::::C OO:::::::::::::OO M::::::::M M::::::::MP::::::PPPPPP:::::P I::::::::IL:::::::::L E::::::::::::::::::::ER::::::RRRRRR:::::R 56 | * C:::::CCCCCCCC::::CO:::::::OOO:::::::OM:::::::::M M:::::::::MPP:::::P P:::::PII::::::IILL:::::::LL EE::::::EEEEEEEEE::::ERR:::::R R:::::R 57 | * C:::::C CCCCCCO::::::O O::::::OM::::::::::M M::::::::::M P::::P P:::::P I::::I L:::::L E:::::E EEEEEE R::::R R:::::R 58 | * C:::::C O:::::O O:::::OM:::::::::::M M:::::::::::M P::::P P:::::P I::::I L:::::L E:::::E R::::R R:::::R 59 | * C:::::C O:::::O O:::::OM:::::::M::::M M::::M:::::::M P::::PPPPPP:::::P I::::I L:::::L E::::::EEEEEEEEEE R::::RRRRRR:::::R 60 | * C:::::C O:::::O O:::::OM::::::M M::::M M::::M M::::::M P:::::::::::::PP I::::I L:::::L E:::::::::::::::E R:::::::::::::RR 61 | * C:::::C O:::::O O:::::OM::::::M M::::M::::M M::::::M P::::PPPPPPPPP I::::I L:::::L E:::::::::::::::E R::::RRRRRR:::::R 62 | * C:::::C O:::::O O:::::OM::::::M M:::::::M M::::::M P::::P I::::I L:::::L E::::::EEEEEEEEEE R::::R R:::::R 63 | * C:::::C O:::::O O:::::OM::::::M M:::::M M::::::M P::::P I::::I L:::::L E:::::E R::::R R:::::R 64 | * C:::::C CCCCCCO::::::O O::::::OM::::::M MMMMM M::::::M P::::P I::::I L:::::L LLLLLL E:::::E EEEEEE R::::R R:::::R 65 | * C:::::CCCCCCCC::::CO:::::::OOO:::::::OM::::::M M::::::MPP::::::PP II::::::IILL:::::::LLLLLLLLL:::::LEE::::::EEEEEEEE:::::ERR:::::R R:::::R 66 | * CC:::::::::::::::C OO:::::::::::::OO M::::::M M::::::MP::::::::P I::::::::IL::::::::::::::::::::::LE::::::::::::::::::::ER::::::R R:::::R 67 | * CCC::::::::::::C OO:::::::::OO M::::::M M::::::MP::::::::P I::::::::IL::::::::::::::::::::::LE::::::::::::::::::::ER::::::R R:::::R 68 | * CCCCCCCCCCCCC OOOOOOOOO MMMMMMMM MMMMMMMMPPPPPPPPPP IIIIIIIIIILLLLLLLLLLLLLLLLLLLLLLLLEEEEEEEEEEEEEEEEEEEEEERRRRRRRR RRRRRRR 69 | * 70 | * ======================================================================================================================================================================= 71 | * ======================================================================================================================================================================= 72 | * ======================================================================================================================================================================= 73 | * ======================================================================================================================================================================= 74 | */ 75 | 76 | /** 77 | * 今天让我们来写一个编译器，一个超级无敌小的编译器！它小到如果把所有注释删去的话，大概只剩 78 | * 200行左右的代码。 79 | * 80 | * 我们将会用它将 lisp 风格的函数调用转换为 C 风格。 81 | * 82 | * 如果你对这两种风格不是很熟悉，下面是一个简单的介绍。 83 | * 84 | * 假设我们有两个函数，`add` 和 `subtract`，那么它们的写法将会是下面这样： 85 | * 86 | * LISP C 87 | * 88 | * 2 + 2 (add 2 2) add(2, 2) 89 | * 4 - 2 (subtract 4 2) subtract(4, 2) 90 | * 2 + (4 - 2) (add 2 (subtract 4 2)) add(2, subtract(4, 2)) 91 | * 92 | * 很简单对吧？ 93 | * 94 | * 这个转换就是我们将要做的事情。虽然这并不包含 LISP 或者 C 的全部语法，但它足以向我们 95 | * 展示现代编译器很多要点。 96 | * 97 | */ 98 | 99 | /** 100 | * 大多数编译器可以分成三个阶段：解析（Parsing），转换（Transformation）以及代码 101 | * 生成（Code Generation） 102 | * 103 | * 1. *解析*是将最初原始的代码转换为一种更加抽象的表示（译者注：即AST）。* 104 | * 105 | * 2. *转换*将对这个抽象的表示做一些处理，让它能做到编译器期望 106 | * 它做到的事情。 107 | * 108 | * 3. *代码生成*接收处理之后的代码表示，然后把它转换成新的代码。 109 | */ 110 | 111 | /** 112 | * 解析（Parsing） 113 | * ------- 114 | * 115 | * 解析一般来说会分成两个阶段：词法分析（Lexical Analysis）和语法分析（Syntactic Analysis）。 116 | * 117 | * 1. *词法分析*接收原始代码，然后把它分割成一些被称为 Token 的东西，这个过程是在词法分析 118 | * 器（Tokenizer或者Lexer）中完成的。 119 | * 120 | * Token 是一个数组，由一些代码语句的碎片组成。它们可以是数字、标签、标点符号、运算符， 121 | * 或者其它任何东西。 122 | * 123 | * 2. *语法分析* 接收之前生成的 Token，把它们转换成一种抽象的表示，这种抽象的表示描述了代 124 | * 码语句中的每一个片段以及它们之间的关系。这被称为中间表示（intermediate representation） 125 | * 或抽象语法树（Abstract Syntax Tree，缩写为AST） 126 | * 127 | * 抽象语法树是一个嵌套程度很深的对象，用一种更容易处理的方式代表了代码本身，也能给我们 128 | * 更多信息。 129 | * 130 | * 比如说对于下面这一行代码语句： 131 | * 132 | * (add 2 (subtract 4 2)) 133 | * 134 | * 它产生的 Token 看起来或许是这样的： 135 | * 136 | * [ 137 | * { type: 'paren', value: '(' }, 138 | * { type: 'name', value: 'add' }, 139 | * { type: 'number', value: '2' }, 140 | * { type: 'paren', value: '(' }, 141 | * { type: 'name', value: 'subtract' }, 142 | * { type: 'number', value: '4' }, 143 | * { type: 'number', value: '2' }, 144 | * { type: 'paren', value: ')' }, 145 | * { type: 'paren', value: ')' } 146 | * ] 147 | * 148 | * 它的抽象语法树（AST）看起来或许是这样的： 149 | * 150 | * { 151 | * type: 'Program', 152 | * body: [{ 153 | * type: 'CallExpression', 154 | * name: 'add', 155 | * params: [{ 156 | * type: 'NumberLiteral', 157 | * value: '2' 158 | * }, { 159 | * type: 'CallExpression', 160 | * name: 'subtract', 161 | * params: [{ 162 | * type: 'NumberLiteral', 163 | * value: '4' 164 | * }, { 165 | * type: 'NumberLiteral', 166 | * value: '2' 167 | * }] 168 | * }] 169 | * }] 170 | * } 171 | */ 172 | 173 | /** 174 | * 转换（Transformation） 175 | * -------------- 176 | * 177 | * 编译器的下一步就是转换。它只是把 AST 拿过来然后对它做一些修改。它可以在同种语言下操 178 | * 作 AST，也可以把 AST 翻译成全新的语言。 179 | * 180 | * 下面我们来看看该如何转换 AST。 181 | * 182 | * 你或许注意到了我们的 AST 中有很多相似的元素，这些元素都有 type 属性，它们被称为 AST 183 | * 结点。这些结点含有若干属性，可以用于描述 AST 的部分信息。 184 | * 185 | * 比如下面是一个“NumberLiteral”结点： 186 | * 187 | * { 188 | * type: 'NumberLiteral', 189 | * value: '2' 190 | * } 191 | * 192 | * 又比如下面是一个“CallExpression”结点： 193 | * 194 | * { 195 | * type: 'CallExpression', 196 | * name: 'subtract', 197 | * params: [...nested nodes go here...] 198 | * } 199 | * 200 | * 当转换 AST 的时候我们可以添加、移动、替代这些结点，也可以根据现有的 AST 生成一个全新 201 | * 的 AST 202 | * 203 | * 既然我们编译器的目标是把输入的代码转换为一种新的语言，所以我们将会着重于产生一个针对 204 | * 新语言的全新的 AST。 205 | * 206 | * 207 | * 遍历（Traversal） 208 | * --------- 209 | * 210 | * 为了能处理所有的结点，我们需要遍历它们，使用的是深度优先遍历。 211 | * 212 | * { 213 | * type: 'Program', 214 | * body: [{ 215 | * type: 'CallExpression', 216 | * name: 'add', 217 | * params: [{ 218 | * type: 'NumberLiteral', 219 | * value: '2' 220 | * }, { 221 | * type: 'CallExpression', 222 | * name: 'subtract', 223 | * params: [{ 224 | * type: 'NumberLiteral', 225 | * value: '4' 226 | * }, { 227 | * type: 'NumberLiteral', 228 | * value: '2' 229 | * }] 230 | * }] 231 | * }] 232 | * } 233 | * 234 | * 对于上面的 AST 的遍历流程是这样的： 235 | * 236 | * 1. Program - 从 AST 的顶部结点开始 237 | * 2. CallExpression (add) - Program 的第一个子元素 238 | * 3. NumberLiteral (2) - CallExpression (add) 的第一个子元素 239 | * 4. CallExpression (subtract) - CallExpression (add) 的第二个子元素 240 | * 5. NumberLiteral (4) - CallExpression (subtract) 的第一个子元素 241 | * 6. NumberLiteral (2) - CallExpression (subtract) 的第二个子元素 242 | * 243 | * 如果我们直接在 AST 内部操作，而不是产生一个新的 AST，那么就要在这里介绍所有种类的抽象， 244 | * 但是目前访问（visiting）所有结点的方法已经足够了。 245 | * 246 | * 使用“访问（visiting）”这个词的是因为这是一种模式，代表在对象结构内对元素进行操作。 247 | * 248 | * 访问者（Visitors） 249 | * -------- 250 | * 251 | * 我们最基础的想法是创建一个“访问者（visitor）”对象，这个对象中包含一些方法，可以接收不 252 | * 同的结点。 253 | * 254 | * var visitor = { 255 | * NumberLiteral() {}, 256 | * CallExpression() {} 257 | * }; 258 | * 259 | * 当我们遍历 AST 的时候，如果遇到了匹配 type 的结点，我们可以调用 visitor 中的方法。 260 | * 261 | * 一般情况下为了让这些方法可用性更好，我们会把父结点也作为参数传入。 262 | */ 263 | 264 | /** 265 | * 代码生成（Code Generation） 266 | * --------------- 267 | * 268 | * 编译器的最后一个阶段是代码生成，这个阶段做的事情有时候会和转换（transformation）重叠， 269 | * 但是代码生成最主要的部分还是根据 AST 来输出代码。 270 | * 271 | * 代码生成有几种不同的工作方式，有些编译器将会重用之前生成的 token，有些会创建独立的代码 272 | * 表示，以便于线性地输出代码。但是接下来我们还是着重于使用之前生成好的 AST。 273 | * 274 | * 我们的代码生成器需要知道如何“打印”AST 中所有类型的结点，然后它会递归地调用自身，直到所 275 | * 有代码都被打印到一个很长的字符串中。 276 | * 277 | */ 278 | 279 | /** 280 | * 好了！这就是编译器中所有的部分了。 281 | * 282 | * 当然不是说所有的编译器都像我说的这样。不同的编译器有不同的目的，所以也可能需要不同的步骤。 283 | * 284 | * 但你现在应该对编译器到底是个什么东西有个大概的认识了。 285 | * 286 | * 既然我全都解释一遍了，你应该能写一个属于自己的编译器了吧？ 287 | * 288 | * 哈哈开个玩笑，接下来才是重点 :P 289 | * 290 | * 所以我们开始吧... 291 | */ 292 | 293 | /** 294 | * ============================================================================ 295 | * (/^▽^)/ 296 | * 词法分析器（Tokenizer）! 297 | * ============================================================================ 298 | */ 299 | 300 | /** 301 | * 我们从第一个阶段开始，即词法分析，使用的是词法分析器（Tokenizer）。 302 | * 303 | * 我们只是接收代码组成的字符串，然后把它们分割成 token 组成的数组。 304 | * 305 | * (add 2 (subtract 4 2)) => [{ type: 'paren', value: '(' }, ...] 306 | */ 307 | 308 | // 我们从接收一个字符串开始，首先设置两个变量。 309 | function tokenizer(input) { 310 | 311 | // `current`变量类似指针，用于记录我们在代码字符串中的位置。 312 | var current = 0; 313 | 314 | // `tokens`数组是我们放置 token 的地方 315 | var tokens = []; 316 | 317 | // 首先我们创建一个 `while` 循环， `current` 变量会在循环中自增。 318 | // 319 | // 我们这么做的原因是，由于 token 数组的长度是任意的，所以可能要在单个循环中多次 320 | // 增加 `current` 321 | while (current < input.length) { 322 | 323 | // 我们在这里储存了 `input` 中的当前字符 324 | var char = input[current]; 325 | 326 | // 要做的第一件事情就是检查是不是右圆括号。这在之后将会用在 `CallExpressions` 中， 327 | // 但是现在我们关心的只是字符本身。 328 | // 329 | // 检查一下是不是一个左圆括号。 330 | if (char === '(') { 331 | 332 | // 如果是，那么我们 push 一个 type 为 `paren`，value 为左圆括号的对象。 333 | tokens.push({ 334 | type: 'paren', 335 | value: '(' 336 | }); 337 | 338 | // 自增 `current` 339 | current++; 340 | 341 | // 结束本次循环，进入下一次循环 342 | continue; 343 | } 344 | 345 | // 然后我们检查是不是一个右圆括号。这里做的时候和之前一样：检查右圆括号、加入新的 token、 346 | // 自增 `current`，然后进入下一次循环。 347 | if (char === ')') { 348 | tokens.push({ 349 | type: 'paren', 350 | value: ')' 351 | }); 352 | current++; 353 | continue; 354 | } 355 | 356 | // 继续，我们现在检查是不是空格。有趣的是，我们想要空格的本意是分隔字符，但这现在 357 | // 对于我们储存 token 来说不那么重要。我们暂且搁置它。 358 | // 359 | // 所以我们只是简单地检查是不是空格，如果是，那么我们直接进入下一个循环。 360 | var WHITESPACE = /\s/; 361 | if (WHITESPACE.test(char)) { 362 | current++; 363 | continue; 364 | } 365 | 366 | // 下一个 token 的类型是数字。它和之前的 token 不同，因为数字可以由多个数字字符组成， 367 | // 但是我们只能把它们识别为一个 token。 368 | // 369 | // (add 123 456) 370 | // ^^^ ^^^ 371 | // Only two separate tokens 372 | // 这里只有两个 token 373 | // 374 | // 当我们遇到一个数字字符时，将会从这里开始。 375 | var NUMBERS = /[0-9]/; 376 | if (NUMBERS.test(char)) { 377 | 378 | // 创建一个 `value` 字符串，用于 push 字符。 379 | var value = ''; 380 | 381 | // 然后我们循环遍历接下来的字符，直到我们遇到的字符不再是数字字符为止，把遇到的每 382 | // 一个数字字符 push 进 `value` 中，然后自增 `current`。 383 | while (NUMBERS.test(char)) { 384 | value += char; 385 | char = input[++current]; 386 | } 387 | 388 | // 然后我们把类型为 `number` 的 token 放入 `tokens` 数组中。 389 | tokens.push({ 390 | type: 'number', 391 | value: value 392 | }); 393 | 394 | // 进入下一次循环。 395 | continue; 396 | } 397 | 398 | // 最后一种类型的 token 是 `name`。它由一系列的字母组成，这在我们的 lisp 语法中 399 | // 代表了函数。 400 | // 401 | // (add 2 4) 402 | // ^^^ 403 | // Name token 404 | // 405 | var LETTERS = /[a-z]/i; 406 | if (LETTERS.test(char)) { 407 | var value = ''; 408 | 409 | // 同样，我们用一个循环遍历所有的字母，把它们存入 value 中。 410 | while (LETTERS.test(char)) { 411 | value += char; 412 | char = input[++current]; 413 | } 414 | 415 | // 然后添加一个类型为 `name` 的 token，然后进入下一次循环。 416 | tokens.push({ 417 | type: 'name', 418 | value: value 419 | }); 420 | 421 | continue; 422 | } 423 | 424 | // 最后如果我们没有匹配上任何类型的 token，那么我们抛出一个错误。 425 | throw new TypeError('I dont know what this character is: ' + char); 426 | } 427 | 428 | // 词法分析器的最后我们返回 tokens 数组。 429 | return tokens; 430 | } 431 | 432 | /** 433 | * ============================================================================ 434 | * ヽ/❀o ل͜ o\ﾉ 435 | * 语法分析器（Parser）!!! 436 | * ============================================================================ 437 | */ 438 | 439 | /** 440 | * 语法分析器接受 token 数组，然后把它转化为 AST 441 | * 442 | * [{ type: 'paren', value: '(' }, ...] => { type: 'Program', body: [...] } 443 | */ 444 | 445 | // 现在我们定义 parser 函数，接受 `tokens` 数组 446 | function parser(tokens) { 447 | 448 | // 我们再次声明一个 `current` 变量作为指针。 449 | var current = 0; 450 | 451 | // 但是这次我们使用递归而不是 `while` 循环，所以我们定义一个 `walk` 函数。 452 | function walk() { 453 | 454 | // walk函数里，我们从当前token开始 455 | var token = tokens[current]; 456 | 457 | // 对于不同类型的结点，对应的处理方法也不同，我们从 `number` 类型的 token 开始。 458 | // 检查是不是 `number` 类型 459 | if (token.type === 'number') { 460 | // 如果是，`current` 自增。 461 | current++; 462 | // 然后我们会返回一个新的 AST 结点 `NumberLiteral`，并且把它的值设为 token 的值。 463 | return { 464 | type: 'NumberLiteral', 465 | value: token.value 466 | }; 467 | } 468 | 469 | // 接下来我们检查是不是 CallExpressions 类型，我们从左圆括号开始。 470 | if (token.type === 'paren' && token.value === '(') { 471 | 472 | // 我们会自增 `current` 来跳过这个括号，因为括号在 AST 中是不重要的。 473 | token = tokens[++current]; 474 | 475 | // 我们创建一个类型为 `CallExpression` 的根节点，然后把它的 name 属性设置为当前 476 | // token 的值，因为紧跟在左圆括号后面的 token 一定是调用的函数的名字。 477 | var node = { 478 | type: 'CallExpression', 479 | name: token.value, 480 | params: [] 481 | }; 482 | 483 | // 我们再次自增 `current` 变量，跳过当前的 token 484 | token = tokens[++current]; 485 | 486 | // 现在我们循环遍历接下来的每一个 token，直到我们遇到右圆括号，这些 token 将会 487 | // 是 `CallExpression` 的 `params`（参数） 488 | // 489 | // 这也是递归开始的地方，我们采用递归的方式来解决问题，而不是去尝试解析一个可能有无限 490 | // 层嵌套的结点。 491 | // 492 | // 为了更好地解释，我们来看看我们的 Lisp 代码。你会注意到 `add` 函数的参数有两个， 493 | // 一个是数字，另一个是一个嵌套的 `CallExpression`，这个 `CallExpression` 中 494 | // 包含了它自己的参数（两个数字） 495 | // 496 | // (add 2 (subtract 4 2)) 497 | // 498 | // 你也会注意到我们的 token 数组中有多个右圆括号。 499 | // 500 | // [ 501 | // { type: 'paren', value: '(' }, 502 | // { type: 'name', value: 'add' }, 503 | // { type: 'number', value: '2' }, 504 | // { type: 'paren', value: '(' }, 505 | // { type: 'name', value: 'subtract' }, 506 | // { type: 'number', value: '4' }, 507 | // { type: 'number', value: '2' }, 508 | // { type: 'paren', value: ')' }, <<< 右圆括号 509 | // { type: 'paren', value: ')' } <<< 右圆括号 510 | // ] 511 | // 512 | // 遇到嵌套的 `CallExpressions` 时，我们将会依赖嵌套的 `walk` 函数来 513 | // 增加 `current` 变量 514 | // 515 | // 所以我们创建一个 `while` 循环，直到遇到类型为 `'paren'`，值为右圆括号的 token。 516 | while ( 517 | (token.type !== 'paren') || 518 | (token.type === 'paren' && token.value !== ')') 519 | ) { 520 | // 我们调用 `walk` 函数，它将会返回一个结点，然后我们把这个节点 521 | // 放入 `node.params` 中。 522 | node.params.push(walk()); 523 | token = tokens[current]; 524 | } 525 | 526 | // 我们最后一次增加 `current`，跳过右圆括号。 527 | current++; 528 | 529 | // 返回结点。 530 | return node; 531 | } 532 | 533 | // 同样，如果我们遇到了一个类型未知的结点，就抛出一个错误。 534 | throw new TypeError(token.type); 535 | } 536 | 537 | // 现在，我们创建 AST，根结点是一个类型为 `Program` 的结点。 538 | var ast = { 539 | type: 'Program', 540 | body: [] 541 | }; 542 | 543 | // 现在我们开始 `walk` 函数，把结点放入 `ast.body` 中。 544 | // 545 | // 之所以在一个循环中处理，是因为我们的程序可能在 `CallExpressions` 后面包含连续的两个 546 | // 参数，而不是嵌套的。 547 | // 548 | // (add 2 2) 549 | // (subtract 4 2) 550 | // 551 | while (current < tokens.length) { 552 | ast.body.push(walk()); 553 | } 554 | 555 | // 最后我们的语法分析器返回 AST 556 | return ast; 557 | } 558 | 559 | /** 560 | * ============================================================================ 561 | * ⌒(❀>◞౪◟<❀)⌒ 562 | * 遍历器!!! 563 | * ============================================================================ 564 | */ 565 | 566 | /** 567 | * 现在我们有了 AST，我们需要一个 visitor 去遍历所有的结点。当遇到某个类型的结点时，我们 568 | * 需要调用 visitor 中对应类型的处理函数。 569 | * 570 | * traverse(ast, { 571 | * Program(node, parent) { 572 | * // ... 573 | * }, 574 | * 575 | * CallExpression(node, parent) { 576 | * // ... 577 | * }, 578 | * 579 | * NumberLiteral(node, parent) { 580 | * // ... 581 | * } 582 | * }); 583 | */ 584 | 585 | // 所以我们定义一个遍历器，它有两个参数，AST 和 vistor。在它的里面我们又定义了两个函数... 586 | function traverser(ast, visitor) { 587 | 588 | // `traverseArray` 函数允许我们对数组中的每一个元素调用 `traverseNode` 函数。 589 | function traverseArray(array, parent) { 590 | array.forEach(function (child) { 591 | traverseNode(child, parent); 592 | }); 593 | } 594 | 595 | // `traverseNode` 函数接受一个 `node` 和它的父结点 `parent` 作为参数，这个结点会被 596 | // 传入到 visitor 中相应的处理函数那里。 597 | function traverseNode(node, parent) { 598 | 599 | // 首先我们看看 visitor 中有没有对应 `type` 的处理函数。 600 | var method = visitor[node.type]; 601 | 602 | // 如果有，那么我们把 `node` 和 `parent` 都传入其中。 603 | if (method) { 604 | method(node, parent); 605 | } 606 | 607 | // 下面我们对每一个不同类型的结点分开处理。 608 | switch (node.type) { 609 | 610 | // 我们从顶层的 `Program` 开始，Program 结点中有一个 body 属性，它是一个由若干 611 | // 个结点组成的数组，所以我们对这个数组调用 `traverseArray`。 612 | // 613 | // （记住 `traverseArray` 会调用 `traverseNode`，所以我们会递归地遍历这棵树。） 614 | case 'Program': 615 | traverseArray(node.body, node); 616 | break; 617 | 618 | // 下面我们对 `CallExpressions` 做同样的事情，遍历它的 `params`。 619 | case 'CallExpression': 620 | traverseArray(node.params, node); 621 | break; 622 | 623 | // 如果是 `NumberLiterals`，那么就没有任何子结点了，所以我们直接 break 624 | case 'NumberLiteral': 625 | break; 626 | 627 | // 同样，如果我们不能识别当前的结点，那么就抛出一个错误。 628 | default: 629 | throw new TypeError(node.type); 630 | } 631 | } 632 | 633 | // 最后我们对 AST 调用 `traverseNode`，开始遍历。注意 AST 并没有父结点。 634 | traverseNode(ast, null); 635 | } 636 | 637 | /** 638 | * ============================================================================ 639 | * ⁽(◍˃̵͈̑ᴗ˂̵͈̑)⁽ 640 | * 转换器!!! 641 | * ============================================================================ 642 | */ 643 | 644 | /** 645 | * 下面是转换器。转换器接收我们在之前构建好的 AST，然后把它和 visitor 传递进入我们的遍历 646 | * 器中，最后得到一个新的 AST。 647 | * 648 | * ---------------------------------------------------------------------------- 649 | * 原始的 AST | 转换后的 AST 650 | * ---------------------------------------------------------------------------- 651 | * { | { 652 | * type: 'Program', | type: 'Program', 653 | * body: [{ | body: [{ 654 | * type: 'CallExpression', | type: 'ExpressionStatement', 655 | * name: 'add', | expression: { 656 | * params: [{ | type: 'CallExpression', 657 | * type: 'NumberLiteral', | callee: { 658 | * value: '2' | type: 'Identifier', 659 | * }, { | name: 'add' 660 | * type: 'CallExpression', | }, 661 | * name: 'subtract', | arguments: [{ 662 | * params: [{ | type: 'NumberLiteral', 663 | * type: 'NumberLiteral', | value: '2' 664 | * value: '4' | }, { 665 | * }, { | type: 'CallExpression', 666 | * type: 'NumberLiteral', | callee: { 667 | * value: '2' | type: 'Identifier', 668 | * }] | name: 'subtract' 669 | * }] | }, 670 | * }] | arguments: [{ 671 | * } | type: 'NumberLiteral', 672 | * | value: '4' 673 | * ---------------------------------- | }, { 674 | * | type: 'NumberLiteral', 675 | * | value: '2' 676 | * | }] 677 | * (那一边比较长/w\) | }] 678 | * | } 679 | * | }] 680 | * | } 681 | * ---------------------------------------------------------------------------- 682 | */ 683 | 684 | // 定义我们的转换器函数，接收 AST 作为参数 685 | function transformer(ast) { 686 | 687 | // 创建 `newAST`，它与我们之前的 AST 类似，有一个类型为 Program 的根节点。 688 | var newAst = { 689 | type: 'Program', 690 | body: [] 691 | }; 692 | 693 | // 下面的代码会有些奇技淫巧，我们在父结点上使用一个属性 `context`（上下文），这样我们就 694 | // 可以把结点放入他们父结点的 context 中。当然可能会有更好的做法，但是为了简单我们姑且 695 | // 这么做吧。 696 | // 697 | // 注意 context 是一个*引用*，从旧的 AST 到新的 AST。 698 | ast._context = newAst.body; 699 | 700 | // 我们把 AST 和 visitor 函数传入遍历器 701 | traverser(ast, { 702 | 703 | // 第一个 visitor 方法接收 `NumberLiterals`。 704 | NumberLiteral: function (node, parent) { 705 | // 我们创建一个新结点，名字叫 `NumberLiteral`，并把它放入父结点的 context 中。 706 | parent._context.push({ 707 | type: 'NumberLiteral', 708 | value: node.value 709 | }); 710 | }, 711 | 712 | // 下一个，`CallExpressions`。 713 | CallExpression: function (node, parent) { 714 | 715 | // 我们创建一个 `CallExpression` 结点，里面有一个嵌套的 `Identifier`。 716 | var expression = { 717 | type: 'CallExpression', 718 | callee: { 719 | type: 'Identifier', 720 | name: node.name 721 | }, 722 | arguments: [] 723 | }; 724 | 725 | // 下面我们在原来的 `CallExpression` 结点上定义一个新的 context，它是 expression 726 | // 中 arguments 这个数组的引用，我们可以向其中放入参数。 727 | node._context = expression.arguments; 728 | 729 | // 然后来看看父结点是不是一个 `CallExpression`，如果不是... 730 | if (parent.type !== 'CallExpression') { 731 | 732 | // 我们把 `CallExpression` 结点包在一个 `ExpressionStatement` 中，这么做是因为 733 | // 单独存在（原文为top level）的 `CallExpressions` 在 JavaScript 中也可以被当做 734 | // 是声明语句。 735 | // 736 | // 译者注：比如 `var a = foo()` 与 `foo()`，后者既可以当作表达式给某个变量赋值，也 737 | // 可以作为一个独立的语句存在。 738 | expression = { 739 | type: 'ExpressionStatement', 740 | expression: expression 741 | }; 742 | } 743 | 744 | // 最后我们把 `CallExpression`（可能是被包起来的）放入父结点的 context 中。 745 | parent._context.push(expression); 746 | } 747 | }); 748 | 749 | // 最后返回创建好的新 AST。 750 | return newAst; 751 | } 752 | 753 | /** 754 | * ============================================================================ 755 | * ヾ（〃＾∇＾）ﾉ♪ 756 | * 代码生成器!!!! 757 | * ============================================================================ 758 | */ 759 | 760 | /** 761 | * 现在只剩最后一步啦：代码生成器。 762 | * 763 | * 我们的代码生成器会递归地调用它自己，把 AST 中的每个结点打印到一个很大的字符串中。 764 | */ 765 | 766 | function codeGenerator(node) { 767 | 768 | // 对于不同 `type` 的结点分开处理。 769 | switch (node.type) { 770 | 771 | // 如果是 `Program` 结点，那么我们会遍历它的 `body` 属性中的每一个结点，并且递归地 772 | // 对这些结点再次调用 codeGenerator，再把结果打印进入新的一行中。 773 | case 'Program': 774 | return node.body.map(codeGenerator) 775 | .join('\n'); 776 | 777 | // 对于 `ExpressionStatements`,我们对它的 expression 属性递归调用，同时加入一个 778 | // 分号。 779 | case 'ExpressionStatement': 780 | return ( 781 | codeGenerator(node.expression) + 782 | ';' // << (...因为我们喜欢用*正确*的方式写代码) 783 | ); 784 | 785 | // 对于 `CallExpressions`，我们会打印出 `callee`，接着是一个左圆括号，然后对 786 | // arguments 递归调用 codeGenerator，并且在它们之间加一个逗号，最后加上右圆括号。 787 | case 'CallExpression': 788 | return ( 789 | codeGenerator(node.callee) + 790 | '(' + 791 | node.arguments.map(codeGenerator) 792 | .join(', ') + 793 | ')' 794 | ); 795 | 796 | // 对于 `Identifiers` 我们只是返回 `node` 的 name。 797 | case 'Identifier': 798 | return node.name; 799 | 800 | // 对于 `NumberLiterals` 我们只是返回 `node` 的 value 801 | case 'NumberLiteral': 802 | return node.value; 803 | 804 | // 如果我们不能识别这个结点，那么抛出一个错误。 805 | default: 806 | throw new TypeError(node.type); 807 | } 808 | } 809 | 810 | /** 811 | * ============================================================================ 812 | * (۶* ‘ヮ’)۶” 813 | * !!!!!!!!!!!!编译器!!!!!!!!!!! 814 | * ============================================================================ 815 | */ 816 | 817 | /** 818 | * 最后！我们创建 `compiler` 函数，它只是把上面说到的那些函数连接到一起。 819 | * 820 | * 1. input => tokenizer => tokens 821 | * 2. tokens => parser => ast 822 | * 3. ast => transformer => newAst 823 | * 4. newAst => generator => output 824 | */ 825 | 826 | function compiler(input) { 827 | var tokens = tokenizer(input); 828 | var ast = parser(tokens); 829 | var newAst = transformer(ast); 830 | var output = codeGenerator(newAst); 831 | 832 | // 然后返回输出! 833 | return output; 834 | } 835 | 836 | /** 837 | * ============================================================================ 838 | * (๑˃̵ᴗ˂̵)و 839 | * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!你做到了!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 840 | * ============================================================================ 841 | */ 842 | 843 | // 现在导出所有接口... 844 | module.exports = { 845 | tokenizer: tokenizer, 846 | parser: parser, 847 | transformer: transformer, 848 | codeGenerator: codeGenerator, 849 | compiler: compiler 850 | }; 851 | -------------------------------------------------------------------------------- /super-tiny-compiler.js: -------------------------------------------------------------------------------- 1 | /** 2 | * TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHH HHHHHHHHHEEEEEEEEEEEEEEEEEEEEEE 3 | * T:::::::::::::::::::::TH:::::::H H:::::::HE::::::::::::::::::::E 4 | * T:::::::::::::::::::::TH:::::::H H:::::::HE::::::::::::::::::::E 5 | * T:::::TT:::::::TT:::::THH::::::H H::::::HHEE::::::EEEEEEEEE::::E 6 | * TTTTTT T:::::T TTTTTT H:::::H H:::::H E:::::E EEEEEE 7 | * T:::::T H:::::H H:::::H E:::::E 8 | * T:::::T H::::::HHHHH::::::H E::::::EEEEEEEEEE 9 | * T:::::T H:::::::::::::::::H E:::::::::::::::E 10 | * T:::::T H:::::::::::::::::H E:::::::::::::::E 11 | * T:::::T H::::::HHHHH::::::H E::::::EEEEEEEEEE 12 | * T:::::T H:::::H H:::::H E:::::E 13 | * T:::::T H:::::H H:::::H E:::::E EEEEEE 14 | * TT:::::::TT HH::::::H H::::::HHEE::::::EEEEEEEE:::::E 15 | * T:::::::::T H:::::::H H:::::::HE::::::::::::::::::::E 16 | * T:::::::::T H:::::::H H:::::::HE::::::::::::::::::::E 17 | * TTTTTTTTTTT HHHHHHHHH HHHHHHHHHEEEEEEEEEEEEEEEEEEEEEE 18 | * 19 | * SSSSSSSSSSSSSSS UUUUUUUU UUUUUUUUPPPPPPPPPPPPPPPPP EEEEEEEEEEEEEEEEEEEEEERRRRRRRRRRRRRRRRR 20 | * SS:::::::::::::::SU::::::U U::::::UP::::::::::::::::P E::::::::::::::::::::ER::::::::::::::::R 21 | * S:::::SSSSSS::::::SU::::::U U::::::UP::::::PPPPPP:::::P E::::::::::::::::::::ER::::::RRRRRR:::::R 22 | * S:::::S SSSSSSSUU:::::U U:::::UUPP:::::P P:::::PEE::::::EEEEEEEEE::::ERR:::::R R:::::R 23 | * S:::::S U:::::U U:::::U P::::P P:::::P E:::::E EEEEEE R::::R R:::::R 24 | * S:::::S U:::::U U:::::U P::::P P:::::P E:::::E R::::R R:::::R 25 | * S::::SSSS U:::::U U:::::U P::::PPPPPP:::::P E::::::EEEEEEEEEE R::::RRRRRR:::::R 26 | * SS::::::SSSSS U:::::U U:::::U P:::::::::::::PP E:::::::::::::::E R:::::::::::::RR 27 | * SSS::::::::SS U:::::U U:::::U P::::PPPPPPPPP E:::::::::::::::E R::::RRRRRR:::::R 28 | * SSSSSS::::S U:::::U U:::::U P::::P E::::::EEEEEEEEEE R::::R R:::::R 29 | * S:::::S U:::::U U:::::U P::::P E:::::E R::::R R:::::R 30 | * S:::::S U::::::U U::::::U P::::P E:::::E EEEEEE R::::R R:::::R 31 | * SSSSSSS S:::::S U:::::::UUU:::::::U PP::::::PP EE::::::EEEEEEEE:::::ERR:::::R R:::::R 32 | * S::::::SSSSSS:::::S UU:::::::::::::UU P::::::::P E::::::::::::::::::::ER::::::R R:::::R 33 | * S:::::::::::::::SS UU:::::::::UU P::::::::P E::::::::::::::::::::ER::::::R R:::::R 34 | * SSSSSSSSSSSSSSS UUUUUUUUU PPPPPPPPPP EEEEEEEEEEEEEEEEEEEEEERRRRRRRR RRRRRRR 35 | * 36 | * TTTTTTTTTTTTTTTTTTTTTTTIIIIIIIIIINNNNNNNN NNNNNNNNYYYYYYY YYYYYYY 37 | * T:::::::::::::::::::::TI::::::::IN:::::::N N::::::NY:::::Y Y:::::Y 38 | * T:::::::::::::::::::::TI::::::::IN::::::::N N::::::NY:::::Y Y:::::Y 39 | * T:::::TT:::::::TT:::::TII::::::IIN:::::::::N N::::::NY::::::Y Y::::::Y 40 | * TTTTTT T:::::T TTTTTT I::::I N::::::::::N N::::::NYYY:::::Y Y:::::YYY 41 | * T:::::T I::::I N:::::::::::N N::::::N Y:::::Y Y:::::Y 42 | * T:::::T I::::I N:::::::N::::N N::::::N Y:::::Y:::::Y 43 | * T:::::T I::::I N::::::N N::::N N::::::N Y:::::::::Y 44 | * T:::::T I::::I N::::::N N::::N:::::::N Y:::::::Y 45 | * T:::::T I::::I N::::::N N:::::::::::N Y:::::Y 46 | * T:::::T I::::I N::::::N N::::::::::N Y:::::Y 47 | * T:::::T I::::I N::::::N N:::::::::N Y:::::Y 48 | * TT:::::::TT II::::::IIN::::::N N::::::::N Y:::::Y 49 | * T:::::::::T I::::::::IN::::::N N:::::::N YYYY:::::YYYY 50 | * T:::::::::T I::::::::IN::::::N N::::::N Y:::::::::::Y 51 | * TTTTTTTTTTT IIIIIIIIIINNNNNNNN NNNNNNN YYYYYYYYYYYYY 52 | * 53 | * CCCCCCCCCCCCC OOOOOOOOO MMMMMMMM MMMMMMMMPPPPPPPPPPPPPPPPP IIIIIIIIIILLLLLLLLLLL EEEEEEEEEEEEEEEEEEEEEERRRRRRRRRRRRRRRRR 54 | * CCC::::::::::::C OO:::::::::OO M:::::::M M:::::::MP::::::::::::::::P I::::::::IL:::::::::L E::::::::::::::::::::ER::::::::::::::::R 55 | * CC:::::::::::::::C OO:::::::::::::OO M::::::::M M::::::::MP::::::PPPPPP:::::P I::::::::IL:::::::::L E::::::::::::::::::::ER::::::RRRRRR:::::R 56 | * C:::::CCCCCCCC::::CO:::::::OOO:::::::OM:::::::::M M:::::::::MPP:::::P P:::::PII::::::IILL:::::::LL EE::::::EEEEEEEEE::::ERR:::::R R:::::R 57 | * C:::::C CCCCCCO::::::O O::::::OM::::::::::M M::::::::::M P::::P P:::::P I::::I L:::::L E:::::E EEEEEE R::::R R:::::R 58 | * C:::::C O:::::O O:::::OM:::::::::::M M:::::::::::M P::::P P:::::P I::::I L:::::L E:::::E R::::R R:::::R 59 | * C:::::C O:::::O O:::::OM:::::::M::::M M::::M:::::::M P::::PPPPPP:::::P I::::I L:::::L E::::::EEEEEEEEEE R::::RRRRRR:::::R 60 | * C:::::C O:::::O O:::::OM::::::M M::::M M::::M M::::::M P:::::::::::::PP I::::I L:::::L E:::::::::::::::E R:::::::::::::RR 61 | * C:::::C O:::::O O:::::OM::::::M M::::M::::M M::::::M P::::PPPPPPPPP I::::I L:::::L E:::::::::::::::E R::::RRRRRR:::::R 62 | * C:::::C O:::::O O:::::OM::::::M M:::::::M M::::::M P::::P I::::I L:::::L E::::::EEEEEEEEEE R::::R R:::::R 63 | * C:::::C O:::::O O:::::OM::::::M M:::::M M::::::M P::::P I::::I L:::::L E:::::E R::::R R:::::R 64 | * C:::::C CCCCCCO::::::O O::::::OM::::::M MMMMM M::::::M P::::P I::::I L:::::L LLLLLL E:::::E EEEEEE R::::R R:::::R 65 | * C:::::CCCCCCCC::::CO:::::::OOO:::::::OM::::::M M::::::MPP::::::PP II::::::IILL:::::::LLLLLLLLL:::::LEE::::::EEEEEEEE:::::ERR:::::R R:::::R 66 | * CC:::::::::::::::C OO:::::::::::::OO M::::::M M::::::MP::::::::P I::::::::IL::::::::::::::::::::::LE::::::::::::::::::::ER::::::R R:::::R 67 | * CCC::::::::::::C OO:::::::::OO M::::::M M::::::MP::::::::P I::::::::IL::::::::::::::::::::::LE::::::::::::::::::::ER::::::R R:::::R 68 | * CCCCCCCCCCCCC OOOOOOOOO MMMMMMMM MMMMMMMMPPPPPPPPPP IIIIIIIIIILLLLLLLLLLLLLLLLLLLLLLLLEEEEEEEEEEEEEEEEEEEEEERRRRRRRR RRRRRRR 69 | * 70 | * ======================================================================================================================================================================= 71 | * ======================================================================================================================================================================= 72 | * ======================================================================================================================================================================= 73 | * ======================================================================================================================================================================= 74 | */ 75 | 76 | /** 77 | * Today we're going to write a compiler together. But not just any compiler... A 78 | * super duper teeny tiny compiler! A compiler that is so small that if you 79 | * remove all the comments this file would only be ~200 lines of actual code. 80 | * 81 | * We're going to compile some lisp-like function calls into some C-like 82 | * function calls. 83 | * 84 | * If you are not familiar with one or the other. I'll just give you a quick intro. 85 | * 86 | * If we had two functions `add` and `subtract` they would be written like this: 87 | * 88 | * LISP C 89 | * 90 | * 2 + 2 (add 2 2) add(2, 2) 91 | * 4 - 2 (subtract 4 2) subtract(4, 2) 92 | * 2 + (4 - 2) (add 2 (subtract 4 2)) add(2, subtract(4, 2)) 93 | * 94 | * Easy peezy right? 95 | * 96 | * Well good, because this is exactly what we are going to compile. While this 97 | * is neither a complete LISP or C syntax, it will be enough of the syntax to 98 | * demonstrate many of the major pieces of a modern compiler. 99 | */ 100 | 101 | /** 102 | * Most compilers break down into three primary stages: Parsing, Transformation, 103 | * and Code Generation 104 | * 105 | * 1. *Parsing* is taking raw code and turning it into a more abstract 106 | * representation of the code. 107 | * 108 | * 2. *Transformation* takes this abstract representation and manipulates to do 109 | * whatever the compiler wants it to. 110 | * 111 | * 3. *Code Generation* takes the transformed representation of the code and 112 | * turns it into new code. 113 | */ 114 | 115 | /** 116 | * Parsing 117 | * ------- 118 | * 119 | * Parsing typically gets broken down into two phases: Lexical Analysis and 120 | * Syntactic Analysis. 121 | * 122 | * 1. *Lexical Analysis* takes the raw code and splits it apart into these things 123 | * called tokens by a thing called a tokenizer (or lexer). 124 | * 125 | * Tokens are an array of tiny little objects that describe an isolated piece 126 | * of the syntax. They could be numbers, labels, punctuation, operators, 127 | * whatever. 128 | * 129 | * 2. *Syntactic Analysis* takes the tokens and reformats them into a 130 | * representation that describes each part of the syntax and their relation 131 | * to one another. This is known as an intermediate representation or 132 | * Abstract Syntax Tree. 133 | * 134 | * An Abstract Syntax Tree, or AST for short, is a deeply nested object that 135 | * represents code in a way that is both easy to work with and tells us a lot 136 | * of information. 137 | * 138 | * For the following syntax: 139 | * 140 | * (add 2 (subtract 4 2)) 141 | * 142 | * Tokens might look something like this: 143 | * 144 | * [ 145 | * { type: 'paren', value: '(' }, 146 | * { type: 'name', value: 'add' }, 147 | * { type: 'number', value: '2' }, 148 | * { type: 'paren', value: '(' }, 149 | * { type: 'name', value: 'subtract' }, 150 | * { type: 'number', value: '4' }, 151 | * { type: 'number', value: '2' }, 152 | * { type: 'paren', value: ')' }, 153 | * { type: 'paren', value: ')' } 154 | * ] 155 | * 156 | * And an Abstract Syntax Tree (AST) might look like this: 157 | * 158 | * { 159 | * type: 'Program', 160 | * body: [{ 161 | * type: 'CallExpression', 162 | * name: 'add', 163 | * params: [{ 164 | * type: 'NumberLiteral', 165 | * value: '2' 166 | * }, { 167 | * type: 'CallExpression', 168 | * name: 'subtract', 169 | * params: [{ 170 | * type: 'NumberLiteral', 171 | * value: '4' 172 | * }, { 173 | * type: 'NumberLiteral', 174 | * value: '2' 175 | * }] 176 | * }] 177 | * }] 178 | * } 179 | */ 180 | 181 | /** 182 | * Transformation 183 | * -------------- 184 | * 185 | * The next type of stage for a compiler is transformation. Again, this just 186 | * takes the AST from the last step and makes changes to it. It can manipulate 187 | * the AST in the same language or it can translate it into an entirely new 188 | * language. 189 | * 190 | * Let’s look at how we would transform an AST. 191 | * 192 | * You might notice that our AST has elements within it that look very similar. 193 | * There are these objects with a type property. Each of these are known as an 194 | * AST Node. These nodes have defined properties on them that describe one 195 | * isolated part of the tree. 196 | * 197 | * We can have a node for a "NumberLiteral": 198 | * 199 | * { 200 | * type: 'NumberLiteral', 201 | * value: '2' 202 | * } 203 | * 204 | * Or maybe a node for a "CallExpression": 205 | * 206 | * { 207 | * type: 'CallExpression', 208 | * name: 'subtract', 209 | * params: [...nested nodes go here...] 210 | * } 211 | * 212 | * When transforming the AST we can manipulate nodes by 213 | * adding/removing/replacing properties, we can add new nodes, remove nodes, or 214 | * we could leave the existing AST alone and create an entirely new one based 215 | * on it. 216 | * 217 | * Since we’re targeting a new language, we’re going to focus on creating an 218 | * entirely new AST that is specific to the target language. 219 | * 220 | * Traversal 221 | * --------- 222 | * 223 | * In order to navigate through all of these nodes, we need to be able to 224 | * traverse through them. This traversal process goes to each node in the AST 225 | * depth-first. 226 | * 227 | * { 228 | * type: 'Program', 229 | * body: [{ 230 | * type: 'CallExpression', 231 | * name: 'add', 232 | * params: [{ 233 | * type: 'NumberLiteral', 234 | * value: '2' 235 | * }, { 236 | * type: 'CallExpression', 237 | * name: 'subtract', 238 | * params: [{ 239 | * type: 'NumberLiteral', 240 | * value: '4' 241 | * }, { 242 | * type: 'NumberLiteral', 243 | * value: '2' 244 | * }] 245 | * }] 246 | * }] 247 | * } 248 | * 249 | * So for the above AST we would go: 250 | * 251 | * 1. Program - Starting at the top level of the AST 252 | * 2. CallExpression (add) - Moving to the first element of the Program's body 253 | * 3. NumberLiteral (2) - Moving to the first element of CallExpression's params 254 | * 4. CallExpression (subtract) - Moving to the second element of CallExpression's params 255 | * 5. NumberLiteral (4) - Moving to the first element of CallExpression's params 256 | * 6. NumberLiteral (2) - Moving to the second element of CallExpression's params 257 | * 258 | * If we were manipulating this AST directly, instead of creating a separate AST, 259 | * we would likely introduce all sorts of abstractions here. But just visiting 260 | * each node in the tree is enough. 261 | * 262 | * The reason I use the word “visiting” is because there is this pattern of how 263 | * to represent operations on elements of an object structure. 264 | * 265 | * Visitors 266 | * -------- 267 | * 268 | * The basic idea here is that we are going to create a “visitor” object that 269 | * has methods that will accept different node types. 270 | * 271 | * var visitor = { 272 | * NumberLiteral() {}, 273 | * CallExpression() {} 274 | * }; 275 | * 276 | * When we traverse our AST we will call the methods on this visitor whenever we 277 | * encounter a node of a matching type. 278 | * 279 | * In order to make this useful we will also pass the node and a reference to 280 | * the parent node. 281 | * 282 | * var visitor = { 283 | * NumberLiteral(node, parent) {}, 284 | * CallExpression(node, parent) {} 285 | * }; 286 | */ 287 | 288 | /** 289 | * Code Generation 290 | * --------------- 291 | * 292 | * The final phase of a compiler is code generation. Sometimes compilers will do 293 | * things that overlap with transformation, but for the most part code 294 | * generation just means take our AST and string-ify code back out. 295 | * 296 | * Code generators work several different ways, some compilers will reuse the 297 | * tokens from earlier, others will have created a separate representation of 298 | * the code so that they can print node linearly, but from what I can tell most 299 | * will use the same AST we just created, which is what we’re going to focus on. 300 | * 301 | * Effectively our code generator will know how to “print” all of the different 302 | * node types of the AST, and it will recursively call itself to print nested 303 | * nodes until everything is printed into one long string of code. 304 | */ 305 | 306 | /** 307 | * And that's it! That's all the different pieces of a compiler. 308 | * 309 | * Now that isn’t to say every compiler looks exactly like I described here. 310 | * Compilers serve many different purposes, and they might need more steps than 311 | * I have detailed. 312 | * 313 | * But now you should have a general high-level idea of what most compilers look 314 | * like. 315 | * 316 | * Now that I’ve explained all of this, you’re all good to go write your own 317 | * compilers right? 318 | * 319 | * Just kidding, that's what I'm here to help with :P 320 | * 321 | * So let's begin... 322 | */ 323 | 324 | /** 325 | * ============================================================================ 326 | * (/^▽^)/ 327 | * THE TOKENIZER! 328 | * ============================================================================ 329 | */ 330 | 331 | /** 332 | * We're gonna start off with our first phase of parsing, lexical analysis, with 333 | * the tokenizer. 334 | * 335 | * We're just going to take our string of code and break it down into an array 336 | * of tokens. 337 | * 338 | * (add 2 (subtract 4 2)) => [{ type: 'paren', value: '(' }, ...] 339 | */ 340 | 341 | // We start by accepting an input string of code, and we're gonna set up two 342 | // things... 343 | function tokenizer(input) { 344 | 345 | // A `current` variable for tracking our position in the code like a cursor. 346 | var current = 0; 347 | 348 | // And a `tokens` array for pushing our tokens to. 349 | var tokens = []; 350 | 351 | // We start by creating a `while` loop where we are setting up our `current` 352 | // variable to be incremented as much as we want `inside` the loop. 353 | // 354 | // We do this because we may want to increment `current` many times within a 355 | // single loop because our tokens can be any length. 356 | while (current < input.length) { 357 | 358 | // We're also going to store the `current` character in the `input`. 359 | var char = input[current]; 360 | 361 | // The first thing we want to check for is an open parenthesis. This will 362 | // later be used for `CallExpressions` but for now we only care about the 363 | // character. 364 | // 365 | // We check to see if we have an open parenthesis: 366 | if (char === '(') { 367 | 368 | // If we do, we push a new token with the type `paren` and set the value 369 | // to an open parenthesis. 370 | tokens.push({ 371 | type: 'paren', 372 | value: '(' 373 | }); 374 | 375 | // Then we increment `current` 376 | current++; 377 | 378 | // And we `continue` onto the next cycle of the loop. 379 | continue; 380 | } 381 | 382 | // Next we're going to check for a closing parenthesis. We do the same exact 383 | // thing as before: Check for a closing parenthesis, add a new token, 384 | // increment `current`, and `continue`. 385 | if (char === ')') { 386 | tokens.push({ 387 | type: 'paren', 388 | value: ')' 389 | }); 390 | current++; 391 | continue; 392 | } 393 | 394 | // Moving on, we're now going to check for whitespace. This is interesting 395 | // because we care that whitespace exists to separate characters, but it 396 | // isn't actually important for us to store as a token. We would only throw 397 | // it out later. 398 | // 399 | // So here we're just going to test for existence and if it does exist we're 400 | // going to just `continue` on. 401 | var WHITESPACE = /\s/; 402 | if (WHITESPACE.test(char)) { 403 | current++; 404 | continue; 405 | } 406 | 407 | // The next type of token is a number. This is different than what we have 408 | // seen before because a number could be any number of characters and we 409 | // want to capture the entire sequence of characters as one token. 410 | // 411 | // (add 123 456) 412 | // ^^^ ^^^ 413 | // Only two separate tokens 414 | // 415 | // So we start this off when we encounter the first number in a sequence. 416 | var NUMBERS = /[0-9]/; 417 | if (NUMBERS.test(char)) { 418 | 419 | // We're going to create a `value` string that we are going to push 420 | // characters to. 421 | var value = ''; 422 | 423 | // Then we're going to loop through each character in the sequence until 424 | // we encounter a character that is not a number, pushing each character 425 | // that is a number to our `value` and incrementing `current` as we go. 426 | while (NUMBERS.test(char)) { 427 | value += char; 428 | char = input[++current]; 429 | } 430 | 431 | // After that we push our `number` token to the `tokens` array. 432 | tokens.push({ 433 | type: 'number', 434 | value: value 435 | }); 436 | 437 | // And we continue on. 438 | continue; 439 | } 440 | 441 | // The last type of token will be a `name` token. This is a sequence of 442 | // letters instead of numbers, that are the names of functions in our lisp 443 | // syntax. 444 | // 445 | // (add 2 4) 446 | // ^^^ 447 | // Name token 448 | // 449 | var LETTERS = /[a-z]/i; 450 | if (LETTERS.test(char)) { 451 | var value = ''; 452 | 453 | // Again we're just going to loop through all the letters pushing them to 454 | // a value. 455 | while (LETTERS.test(char)) { 456 | value += char; 457 | char = input[++current]; 458 | } 459 | 460 | // And pushing that value as a token with the type `name` and continuing. 461 | tokens.push({ 462 | type: 'name', 463 | value: value 464 | }); 465 | 466 | continue; 467 | } 468 | 469 | // Finally if we have not matched a character by now, we're going to throw 470 | // an error and completely exit. 471 | throw new TypeError('I dont know what this character is: ' + char); 472 | } 473 | 474 | // Then at the end of our `tokenizer` we simply return the tokens array. 475 | return tokens; 476 | } 477 | 478 | /** 479 | * ============================================================================ 480 | * ヽ/❀o ل͜ o\ﾉ 481 | * THE PARSER!!! 482 | * ============================================================================ 483 | */ 484 | 485 | /** 486 | * For our parser we're going to take our array of tokens and turn it into an 487 | * AST. 488 | * 489 | * [{ type: 'paren', value: '(' }, ...] => { type: 'Program', body: [...] } 490 | */ 491 | 492 | // Okay, so we define a `parser` function that accepts our array of `tokens`. 493 | function parser(tokens) { 494 | 495 | // Again we keep a `current` variable that we will use as a cursor. 496 | var current = 0; 497 | 498 | // But this time we're going to use recursion instead of a `while` loop. So we 499 | // define a `walk` function. 500 | function walk() { 501 | 502 | // Inside the walk function we start by grabbing the `current` token. 503 | var token = tokens[current]; 504 | 505 | // We're going to split each type of token off into a different code path, 506 | // starting off with `number` tokens. 507 | // 508 | // We test to see if we have a `number` token. 509 | if (token.type === 'number') { 510 | 511 | // If we have one, we'll increment `current`. 512 | current++; 513 | 514 | // And we'll return a new AST node called `NumberLiteral` and setting its 515 | // value to the value of our token. 516 | return { 517 | type: 'NumberLiteral', 518 | value: token.value 519 | }; 520 | } 521 | 522 | // Next we're going to look for CallExpressions. We start this off when we 523 | // encounter an open parenthesis. 524 | if ( 525 | token.type === 'paren' && 526 | token.value === '(' 527 | ) { 528 | 529 | // We'll increment `current` to skip the parenthesis since we don't care 530 | // about it in our AST. 531 | token = tokens[++current]; 532 | 533 | // We create a base node with the type `CallExpression`, and we're going 534 | // to set the name as the current token's value since the next token after 535 | // the open parenthesis is the name of the function. 536 | var node = { 537 | type: 'CallExpression', 538 | name: token.value, 539 | params: [] 540 | }; 541 | 542 | // We increment `current` *again* to skip the name token. 543 | token = tokens[++current]; 544 | 545 | // And now we want to loop through each token that will be the `params` of 546 | // our `CallExpression` until we encounter a closing parenthesis. 547 | // 548 | // Now this is where recursion comes in. Instead of trying to parse a 549 | // potentially infinitely nested set of nodes we're going to rely on 550 | // recursion to resolve things. 551 | // 552 | // To explain this, let's take our Lisp code. You can see that the 553 | // parameters of the `add` are a number and a nested `CallExpression` that 554 | // includes its own numbers. 555 | // 556 | // (add 2 (subtract 4 2)) 557 | // 558 | // You'll also notice that in our tokens array we have multiple closing 559 | // parentheses. 560 | // 561 | // [ 562 | // { type: 'paren', value: '(' }, 563 | // { type: 'name', value: 'add' }, 564 | // { type: 'number', value: '2' }, 565 | // { type: 'paren', value: '(' }, 566 | // { type: 'name', value: 'subtract' }, 567 | // { type: 'number', value: '4' }, 568 | // { type: 'number', value: '2' }, 569 | // { type: 'paren', value: ')' }, <<< Closing parenthesis 570 | // { type: 'paren', value: ')' } <<< Closing parenthesis 571 | // ] 572 | // 573 | // We're going to rely on the nested `walk` function to increment our 574 | // `current` variable past any nested `CallExpressions`. 575 | 576 | // So we create a `while` loop that will continue until it encounters a 577 | // token with a `type` of `'paren'` and a `value` of a closing 578 | // parenthesis. 579 | while ( 580 | (token.type !== 'paren') || 581 | (token.type === 'paren' && token.value !== ')') 582 | ) { 583 | // we'll call the `walk` function which will return a `node` and we'll 584 | // push it into our `node.params`. 585 | node.params.push(walk()); 586 | token = tokens[current]; 587 | } 588 | 589 | // Finally we will increment `current` one last time to skip the closing 590 | // parenthesis. 591 | current++; 592 | 593 | // And return the node. 594 | return node; 595 | } 596 | 597 | // Again, if we haven't recognized the token type by now we're going to 598 | // throw an error. 599 | throw new TypeError(token.type); 600 | } 601 | 602 | // Now, we're going to create our AST which will have a root which is a 603 | // `Program` node. 604 | var ast = { 605 | type: 'Program', 606 | body: [] 607 | }; 608 | 609 | // And we're going to kickstart our `walk` function, pushing nodes to our 610 | // `ast.body` array. 611 | // 612 | // The reason we are doing this inside a loop is because our program can have 613 | // `CallExpressions` after one another instead of being nested. 614 | // 615 | // (add 2 2) 616 | // (subtract 4 2) 617 | // 618 | while (current < tokens.length) { 619 | ast.body.push(walk()); 620 | } 621 | 622 | // At the end of our parser we'll return the AST. 623 | return ast; 624 | } 625 | 626 | /** 627 | * ============================================================================ 628 | * ⌒(❀>◞౪◟<❀)⌒ 629 | * THE TRAVERSER!!! 630 | * ============================================================================ 631 | */ 632 | 633 | /** 634 | * So now we have our AST, and we want to be able to visit different nodes with 635 | * a visitor. We need to be able to call the methods on the visitor whenever we 636 | * encounter a node with a matching type. 637 | * 638 | * traverse(ast, { 639 | * Program(node, parent) { 640 | * // ... 641 | * }, 642 | * 643 | * CallExpression(node, parent) { 644 | * // ... 645 | * }, 646 | * 647 | * NumberLiteral(node, parent) { 648 | * // ... 649 | * } 650 | * }); 651 | */ 652 | 653 | // So we define a traverser function which accepts an AST and a 654 | // visitor. Inside we're going to define two functions... 655 | function traverser(ast, visitor) { 656 | 657 | // A `traverseArray` function that will allow us to iterate over an array and 658 | // call the next function that we will define: `traverseNode`. 659 | function traverseArray(array, parent) { 660 | array.forEach(function(child) { 661 | traverseNode(child, parent); 662 | }); 663 | } 664 | 665 | // `traverseNode` will accept a `node` and its `parent` node. So that it can 666 | // pass both to our visitor methods. 667 | function traverseNode(node, parent) { 668 | 669 | // We start by testing for the existence of a method on the visitor with a 670 | // matching `type`. 671 | var method = visitor[node.type]; 672 | 673 | // If it exists we'll call it with the `node` and its `parent`. 674 | if (method) { 675 | method(node, parent); 676 | } 677 | 678 | // Next we are going to split things up by the current node type. 679 | switch (node.type) { 680 | 681 | // We'll start with our top level `Program`. Since Program nodes have a 682 | // property named body that has an array of nodes, we will call 683 | // `traverseArray` to traverse down into them. 684 | // 685 | // (Remember that `traverseArray` will in turn call `traverseNode` so we 686 | // are causing the tree to be traversed recursively) 687 | case 'Program': 688 | traverseArray(node.body, node); 689 | break; 690 | 691 | // Next we do the same with `CallExpressions` and traverse their `params`. 692 | case 'CallExpression': 693 | traverseArray(node.params, node); 694 | break; 695 | 696 | // In the case of `NumberLiterals` we don't have any child nodes to visit, 697 | // so we'll just break. 698 | case 'NumberLiteral': 699 | break; 700 | 701 | // And again, if we haven't recognized the node type then we'll throw an 702 | // error. 703 | default: 704 | throw new TypeError(node.type); 705 | } 706 | } 707 | 708 | // Finally we kickstart the traverser by calling `traverseNode` with our ast 709 | // with no `parent` because the top level of the AST doesn't have a parent. 710 | traverseNode(ast, null); 711 | } 712 | 713 | /** 714 | * ============================================================================ 715 | * ⁽(◍˃̵͈̑ᴗ˂̵͈̑)⁽ 716 | * THE TRANSFORMER!!! 717 | * ============================================================================ 718 | */ 719 | 720 | /** 721 | * Next up, the transformer. Our transformer is going to take the AST that we 722 | * have built and pass it to our traverser function with a visitor and will 723 | * create a new ast. 724 | * 725 | * ---------------------------------------------------------------------------- 726 | * Original AST | Transformed AST 727 | * ---------------------------------------------------------------------------- 728 | * { | { 729 | * type: 'Program', | type: 'Program', 730 | * body: [{ | body: [{ 731 | * type: 'CallExpression', | type: 'ExpressionStatement', 732 | * name: 'add', | expression: { 733 | * params: [{ | type: 'CallExpression', 734 | * type: 'NumberLiteral', | callee: { 735 | * value: '2' | type: 'Identifier', 736 | * }, { | name: 'add' 737 | * type: 'CallExpression', | }, 738 | * name: 'subtract', | arguments: [{ 739 | * params: [{ | type: 'NumberLiteral', 740 | * type: 'NumberLiteral', | value: '2' 741 | * value: '4' | }, { 742 | * }, { | type: 'CallExpression', 743 | * type: 'NumberLiteral', | callee: { 744 | * value: '2' | type: 'Identifier', 745 | * }] | name: 'subtract' 746 | * }] | }, 747 | * }] | arguments: [{ 748 | * } | type: 'NumberLiteral', 749 | * | value: '4' 750 | * ---------------------------------- | }, { 751 | * | type: 'NumberLiteral', 752 | * | value: '2' 753 | * | }] 754 | * (sorry the other one is longer.) | }] 755 | * | } 756 | * | }] 757 | * | } 758 | * ---------------------------------------------------------------------------- 759 | */ 760 | 761 | // So we have our transformer function which will accept the lisp ast. 762 | function transformer(ast) { 763 | 764 | // We'll create a `newAst` which like our previous AST will have a program 765 | // node. 766 | var newAst = { 767 | type: 'Program', 768 | body: [] 769 | }; 770 | 771 | // Next I'm going to cheat a little and create a bit of a hack. We're going to 772 | // use a property named `context` on our parent nodes that we're going to use 773 | // to push nodes to their parents' `context`'s. Normally you would have a 774 | // better abstraction than this, but for our purposes this keeps things 775 | // simple. 776 | // 777 | // Just take note that the context is a reference *from* the old ast *to* the 778 | // new ast. 779 | ast._context = newAst.body; 780 | 781 | // We'll start by calling the traverser function with our ast and a visitor. 782 | traverser(ast, { 783 | 784 | // The first visitor method accepts `NumberLiterals` 785 | NumberLiteral: function(node, parent) { 786 | // We'll create a new node also named `NumberLiteral` that we will push to 787 | // the parent context. 788 | parent._context.push({ 789 | type: 'NumberLiteral', 790 | value: node.value 791 | }); 792 | }, 793 | 794 | // Next up, `CallExpressions`. 795 | CallExpression: function(node, parent) { 796 | 797 | // We start creating a new node `CallExpression` with a nested 798 | // `Identifier`. 799 | var expression = { 800 | type: 'CallExpression', 801 | callee: { 802 | type: 'Identifier', 803 | name: node.name 804 | }, 805 | arguments: [] 806 | }; 807 | 808 | // Next we're going to define a new context on the original 809 | // `CallExpression` node that will reference the `expression`'s arguments 810 | // so that we can push arguments. 811 | node._context = expression.arguments; 812 | 813 | // Then we're going to check if the parent node is a `CallExpression`. 814 | // If it is not... 815 | if (parent.type !== 'CallExpression') { 816 | 817 | // We're going to wrap our `CallExpression` node with an 818 | // `ExpressionStatement`. We do this because the top level 819 | // `CallExpressions` in JavaScript are actually statements. 820 | expression = { 821 | type: 'ExpressionStatement', 822 | expression: expression 823 | }; 824 | } 825 | 826 | // Last, we push our (possibly wrapped) `CallExpression` to the `parent`'s 827 | // `context`. 828 | parent._context.push(expression); 829 | } 830 | }); 831 | 832 | // At the end of our transformer function we'll return the new ast that we 833 | // just created. 834 | return newAst; 835 | } 836 | 837 | /** 838 | * ============================================================================ 839 | * ヾ（〃＾∇＾）ﾉ♪ 840 | * THE CODE GENERATOR!!!! 841 | * ============================================================================ 842 | */ 843 | 844 | /** 845 | * Now let's move on to our last phase: The Code Generator. 846 | * 847 | * Our code generator is going to recursively call itself to print each node in 848 | * the tree into one giant string. 849 | */ 850 | 851 | function codeGenerator(node) { 852 | 853 | // We'll break things down by the `type` of the `node`. 854 | switch (node.type) { 855 | 856 | // If we have a `Program` node. We will map through each node in the `body` 857 | // and run them through the code generator and join them with a newline. 858 | case 'Program': 859 | return node.body.map(codeGenerator) 860 | .join('\n'); 861 | 862 | // For `ExpressionStatements` we'll call the code generator on the nested 863 | // expression and we'll add a semicolon... 864 | case 'ExpressionStatement': 865 | return ( 866 | codeGenerator(node.expression) + 867 | ';' // << (...because we like to code the *correct* way) 868 | ); 869 | 870 | // For `CallExpressions` we will print the `callee`, add an open 871 | // parenthesis, we'll map through each node in the `arguments` array and run 872 | // them through the code generator, joining them with a comma, and then 873 | // we'll add a closing parenthesis. 874 | case 'CallExpression': 875 | return ( 876 | codeGenerator(node.callee) + 877 | '(' + 878 | node.arguments.map(codeGenerator) 879 | .join(', ') + 880 | ')' 881 | ); 882 | 883 | // For `Identifiers` we'll just return the `node`'s name. 884 | case 'Identifier': 885 | return node.name; 886 | 887 | // For `NumberLiterals` we'll just return the `node`'s value. 888 | case 'NumberLiteral': 889 | return node.value; 890 | 891 | // And if we haven't recognized the node, we'll throw an error. 892 | default: 893 | throw new TypeError(node.type); 894 | } 895 | } 896 | 897 | /** 898 | * ============================================================================ 899 | * (۶* ‘ヮ’)۶” 900 | * !!!!!!!!THE COMPILER!!!!!!!! 901 | * ============================================================================ 902 | */ 903 | 904 | /** 905 | * FINALLY! We'll create our `compiler` function. Here we will link together 906 | * every part of the pipeline. 907 | * 908 | * 1. input => tokenizer => tokens 909 | * 2. tokens => parser => ast 910 | * 3. ast => transformer => newAst 911 | * 4. newAst => generator => output 912 | */ 913 | 914 | function compiler(input) { 915 | var tokens = tokenizer(input); 916 | var ast = parser(tokens); 917 | var newAst = transformer(ast); 918 | var output = codeGenerator(newAst); 919 | 920 | // and simply return the output! 921 | return output; 922 | } 923 | 924 | /** 925 | * ============================================================================ 926 | * (๑˃̵ᴗ˂̵)و 927 | * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!YOU MADE IT!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 928 | * ============================================================================ 929 | */ 930 | 931 | // Now I'm just exporting everything... 932 | module.exports = { 933 | tokenizer: tokenizer, 934 | parser: parser, 935 | transformer: transformer, 936 | codeGenerator: codeGenerator, 937 | compiler: compiler 938 | }; 939 | --------------------------------------------------------------------------------