├── assets
    └── tweet.png
├── README.md
└── tiny.js


/assets/tweet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mgechev/tiny-compiler/HEAD/assets/tweet.png


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Tiny Interpreter and Compiler
 2 | 
 3 | A tiny interpreter and compiler which shows the basics of compiler development.
 4 | 
 5 | For more details see [the source](./tiny.js) or my blog post "[Implementing a Simple Compiler on 25 Lines of JavaScript](http://blog.mgechev.com/2017/09/16/developing-simple-interpreter-transpiler-compiler-tutorial/)"
 6 | 
 7 | Along the implementation you can find sample (and simple) implementations of:
 8 | 
 9 | - Lexer which produces a list of tokens (module for lexical analysis).
10 | - Parser which produces an Abstract Syntax Tree (AST) (module for syntax analysis).
11 | - Interpreter which traverses and evaluates the AST.
12 | - EBNF grammar.
13 | - Recursive Descent Parsing.
14 | 
15 | ## You want even smaller compiler?
16 | 
17 | Here it is!
18 | 
19 | [![Compiler in a Tweet](/assets/tweet.png)](https://twitter.com/mgechev/status/955211214719602688)
20 | 
21 | # License
22 | 
23 | MIT
24 | 
25 | 


--------------------------------------------------------------------------------
/tiny.js:
--------------------------------------------------------------------------------
  1 | /*
  2 |   # Lexer
  3 | 
  4 |   The lexer is responsible for turning the input string into
  5 |   a list of tokens. Usually a token looks the following way:
  6 | 
  7 |   ```javascript
  8 |   {
  9 |     "type": Symbol("Operator"),
 10 |     "value: "-"
 11 |   }
 12 |   ```
 13 | 
 14 |   In our case we're keeping everything simplified and store
 15 |   only the token's value. We can infer the type based on
 16 |   regular expressions defined below.
 17 | 
 18 |   In short, `lex` will turn the following expression:
 19 | 
 20 |   ```
 21 |   mul 3 sub 2 sum 1 3 4
 22 |   ```
 23 | 
 24 |   To the following array:
 25 | 
 26 |   ```
 27 |   ["mul", "3", "sub", "2", "sum", "1", "3", "4"]
 28 |   ```
 29 | */
 30 | const lex = str => str.split(' ').map(s => s.trim()).filter(s => s.length);
 31 | 
 32 | /*
 33 |   # Parser
 34 | 
 35 |   The parser is responsible for turning the list of tokens
 36 |   into an AST or Abstract Syntax Tree. In the example below
 37 |   we use recursive descent parsing to produce the AST
 38 |   from the input token array.
 39 | 
 40 |   Visually, the parsing is a process which turns the array:
 41 | 
 42 |   ```javascript
 43 |   const tokens = ["sub", "2", "sum", "1", "3", "4"];
 44 |   ```
 45 | 
 46 |   to the following tree:
 47 | 
 48 |   ```
 49 |    sub
 50 |    / \
 51 |   2  sum
 52 |      /|\
 53 |     1 3 4
 54 |   ```
 55 | 
 56 |   The parser uses the following grammar to parse the input token array:
 57 | 
 58 |   ```
 59 |   num := 0-9+
 60 |   op := sum | sub | div | mul
 61 |   expr := num | op expr+
 62 |   ```
 63 | 
 64 |   This translated to plain English, means:
 65 |   - `num` can be any sequence of the numbers between 0 and 9.
 66 |   - `op` can be any of `sum`, `sub`, `div`, `mul`.
 67 |   - `expr` can be either a number (i.e. `num`) or an operation followed by one or more `expr`s.
 68 | 
 69 |   Notice that `expr` has a recursive declaration.
 70 | */
 71 | 
 72 | const Op = Symbol('op');
 73 | const Num = Symbol('num');
 74 | 
 75 | const parse = tokens => {
 76 | 
 77 |   let c = 0;
 78 |   const peek = () => tokens[c];
 79 |   const consume = () => tokens[c++];
 80 | 
 81 |   const parseNum = () => ({ val: parseInt(consume()), type: Num });
 82 | 
 83 |   const parseOp = () => {
 84 |     const node = { val: consume(), type: Op, expr: [] };
 85 |     while (peek()) node.expr.push(parseExpr());
 86 |     return node;
 87 |   };
 88 | 
 89 |   const parseExpr = () => /\d/.test(peek()) ? parseNum() : parseOp();
 90 | 
 91 |   return parseExpr();
 92 | };
 93 | 
 94 | /*
 95 |   # Evaluator
 96 | 
 97 |   Finally, this is our evaluator. In it we simply visit each node
 98 |   from the tree with pre-order traversal and either:
 99 | 
100 |   - Return the corresponding value, in case the node is of type number.
101 |   - Perform the corresponding arithmetic operation, in case of an operation node.
102 | */
103 | const evaluate = ast => {
104 |   const opAcMap = {
105 |     sum: args => args.reduce((a, b) => a + b, 0),
106 |     sub: args => args.reduce((a, b) => a - b),
107 |     div: args => args.reduce((a, b) => a / b),
108 |     mul: args => args.reduce((a, b) => a * b, 1)
109 |   };
110 | 
111 |   if (ast.type === Num) return ast.val;
112 |   return opAcMap[ast.val](ast.expr.map(evaluate));
113 | };
114 | 
115 | /*
116 |   # Code generator
117 | 
118 |   Alternatively, instead of interpreting the AST, we can translate
119 |   it to another language. Here's how we can do that with JavaScript.
120 | */
121 | const compile = ast => {
122 |   const opMap = { sum: '+', mul: '*', sub: '-', div: '/' };
123 |   const compileNum = ast => ast.val;
124 |   const compileOp = ast => `(${ast.expr.map(compile).join(' ' + opMap[ast.val] + ' ')})`;
125 |   const compile = ast => ast.type === Num ? compileNum(ast) : compileOp(ast);
126 |   return compile(ast);
127 | };
128 | 
129 | const program = 'mul 3 sub 2 sum 1 3 4';
130 | 
131 | /*
132 |   # Interpreter
133 | 
134 |   In order to interpret the input stream we feed the parser with the input
135 |   from the lexer and the evaluator with the output of the parser.
136 | */
137 | console.log(evaluate(parse(lex(program))));
138 | 
139 | /*
140 |   # Compiler
141 | 
142 |   In order to compile the expression to JavaScript, the only change we need to make
143 |   is to update the outermost `evaluate` invocation to `compile`.
144 | */
145 | console.log(compile(parse(lex(program))));
146 | 
147 | 


--------------------------------------------------------------------------------