├── examples ├── simple-if.js ├── test-single-stmt-if.js ├── test-stmt-after-if.js ├── simple-if-no-else.js ├── simple-const-if.js ├── ackermann.js ├── gcd.js ├── factorial.js ├── test-const-arith.js ├── test-multiple.js ├── test-unreachable.js ├── test-two-funcs.js ├── const-test.js ├── optimization-demo.js ├── const-comprehensive.js └── comprehensive-optimizations.js ├── package.json ├── compiler.js ├── test ├── lexer.test.mjs ├── integration.test.mjs ├── const.test.mjs ├── optimizer.test.mjs ├── parser.test.mjs └── codegen.test.mjs ├── src ├── utils.mjs ├── lexer.mjs ├── optimizer.mjs ├── parser.mjs └── codegen.mjs └── README.md /examples/simple-if.js: -------------------------------------------------------------------------------- 1 | function test() { 2 | if (1 == 1) { 3 | return 5; 4 | } 5 | return 0; 6 | } 7 | 8 | test(); 9 | -------------------------------------------------------------------------------- /examples/test-single-stmt-if.js: -------------------------------------------------------------------------------- 1 | function test(n) { 2 | if (n < 0) return 0; 3 | let x = 5; 4 | return x; 5 | } 6 | 7 | test(5); 8 | -------------------------------------------------------------------------------- /examples/test-stmt-after-if.js: -------------------------------------------------------------------------------- 1 | function test(n) { 2 | if (n < 0) { 3 | return 0; 4 | } 5 | let x = 5; 6 | return x; 7 | } 8 | 9 | test(5); 10 | -------------------------------------------------------------------------------- /examples/simple-if-no-else.js: -------------------------------------------------------------------------------- 1 | // Simpler version 2 | function test(n) { 3 | if (n < 0) { 4 | return 0; 5 | } 6 | return 1; 7 | } 8 | 9 | test(5); 10 | -------------------------------------------------------------------------------- /examples/simple-const-if.js: -------------------------------------------------------------------------------- 1 | function simple(x) { 2 | if (5 > 3) { 3 | return x + 1; 4 | } else { 5 | return x - 1; 6 | } 7 | } 8 | 9 | simple(10); 10 | -------------------------------------------------------------------------------- /examples/ackermann.js: -------------------------------------------------------------------------------- 1 | function ack(m, n) { 2 | if (m == 0) return n + 1; 3 | if (n == 0) return ack(m - 1, 1); 4 | return ack(m - 1, ack(m, n - 1)); 5 | } 6 | 7 | ack(3, 4); // must return 125 8 | -------------------------------------------------------------------------------- /examples/gcd.js: -------------------------------------------------------------------------------- 1 | function gcd(a, b) { 2 | while (b != 0) { 3 | let t = b; 4 | b = a % b; 5 | a = t; 6 | } 7 | return a; 8 | } 9 | 10 | gcd(48, 18); // must return 6 11 | -------------------------------------------------------------------------------- /examples/factorial.js: -------------------------------------------------------------------------------- 1 | function fact(n) { 2 | let result = 1; 3 | while (n > 0) { 4 | result = result * n; 5 | n = n - 1; 6 | } 7 | return result; 8 | } 9 | 10 | fact(5); // must return 120 11 | -------------------------------------------------------------------------------- /examples/test-const-arith.js: -------------------------------------------------------------------------------- 1 | function constantArithmetic() { 2 | let a = 10 + 20; 3 | let b = 100 - 50; 4 | let c = 5 * 6; 5 | let d = 100 / 4; 6 | let e = 17 % 3; 7 | return a + b + c + d + e; 8 | } 9 | 10 | constantArithmetic(); 11 | -------------------------------------------------------------------------------- /examples/test-multiple.js: -------------------------------------------------------------------------------- 1 | function multipleOptimizations(x) { 2 | let a = 5 + 5; 3 | let b = a * 1; 4 | let unused = 999; 5 | 6 | if (2 == 2) { 7 | return b + x; 8 | } 9 | 10 | return 0; 11 | } 12 | 13 | multipleOptimizations(10); 14 | -------------------------------------------------------------------------------- /examples/test-unreachable.js: -------------------------------------------------------------------------------- 1 | function unreachableAfterReturn(n) { 2 | if (n < 0) { 3 | return 0; 4 | } 5 | 6 | let result = n * 2; 7 | return result; 8 | 9 | let unused1 = 100; 10 | let unused2 = 200; 11 | return unused1 + unused2; 12 | } 13 | 14 | unreachableAfterReturn(5); 15 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "humera-interview", 3 | "version": "1.0.0", 4 | "description": "WASM compiler", 5 | "main": "index.js", 6 | "type": "module", 7 | "scripts": { 8 | "test": "node --test test/*.test.*" 9 | }, 10 | "author": "Sankha Narayan Guria ", 11 | "license": "ISC" 12 | } 13 | -------------------------------------------------------------------------------- /examples/test-two-funcs.js: -------------------------------------------------------------------------------- 1 | function constantArithmetic() { 2 | let a = 10 + 20; 3 | let b = 100 - 50; 4 | let c = 5 * 6; 5 | let d = 100 / 4; 6 | let e = 17 % 3; 7 | return a + b + c + d + e; 8 | } 9 | 10 | function algebraicSimplification(x, y) { 11 | let a = x + 0; 12 | let b = y * 1; 13 | let c = x - 0; 14 | let d = y / 1; 15 | let e = x * 0; 16 | return a + b + c + d + e; 17 | } 18 | 19 | constantArithmetic(); 20 | -------------------------------------------------------------------------------- /examples/const-test.js: -------------------------------------------------------------------------------- 1 | // Test const vs let 2 | 3 | function testConst(x) { 4 | const PI = 3; 5 | const DOUBLED = PI * 2; 6 | let radius = x; 7 | let area = PI * radius * radius; 8 | return area + DOUBLED; 9 | } 10 | 11 | function testMixed(n) { 12 | const MAX = 100; 13 | let count = 0; 14 | 15 | while (count < n) { 16 | count = count + 1; 17 | } 18 | 19 | if (count > MAX) { 20 | return MAX; 21 | } 22 | 23 | return count; 24 | } 25 | 26 | testConst(5); 27 | -------------------------------------------------------------------------------- /examples/optimization-demo.js: -------------------------------------------------------------------------------- 1 | // Optimization demo: constant folding and dead code elimination 2 | 3 | function demo(x) { 4 | // Constant folding examples 5 | let a = 2 + 3; // -> 5 6 | let b = a * 1; // -> a (identity) 7 | let c = b + 0; // -> b (identity) 8 | let d = c * 2; // Should fold if c is constant 9 | 10 | // Dead code elimination examples 11 | let unused = 42; // Never used - should be eliminated 12 | 13 | if (1 == 1) { // Always true - else eliminated 14 | return d; 15 | } else { 16 | return 999; // Dead code 17 | } 18 | 19 | return 0; // Unreachable - eliminated 20 | } 21 | 22 | demo(10); 23 | -------------------------------------------------------------------------------- /compiler.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | // get cmd arg 4 | import fs from 'node:fs'; 5 | import path from 'node:path'; 6 | import { Lexer } from './src/lexer.mjs'; 7 | import { Parser } from './src/parser.mjs'; 8 | import { Optimizer } from './src/optimizer.mjs'; 9 | import { Codegen } from './src/codegen.mjs'; 10 | 11 | const args = process.argv.slice(2); 12 | 13 | if (args.length !== 1) { 14 | console.error('Usage: node compiler.js '); 15 | process.exit(1); 16 | } 17 | 18 | const sourceFile = args[0]; 19 | const sourceCode = fs.readFileSync(path.resolve(sourceFile), 'utf-8'); 20 | 21 | const lexer = new Lexer(sourceCode); 22 | const parser = new Parser(lexer); 23 | const ast = parser.parseProgram(); 24 | 25 | // Optimize AST 26 | const optimizer = new Optimizer(ast); 27 | const optimizedAst = optimizer.optimize(); 28 | 29 | const codegen = new Codegen(optimizedAst); 30 | const output = codegen.generate(); 31 | 32 | console.log(output); 33 | -------------------------------------------------------------------------------- /test/lexer.test.mjs: -------------------------------------------------------------------------------- 1 | import assert from 'node:assert/strict'; 2 | import { describe, it } from 'node:test'; 3 | 4 | import { Lexer, Token, TOKEN_TYPES } from '../src/lexer.mjs'; 5 | 6 | describe('Lexer', () => { 7 | it('should tokenize a simple expression', () => { 8 | const input = 'let x = 10 + 5;'; 9 | const lexer = new Lexer(input); 10 | 11 | const expectedTokens = [ 12 | new Token(TOKEN_TYPES.LET, 'let'), 13 | new Token(TOKEN_TYPES.IDENTIFIER, 'x'), 14 | new Token(TOKEN_TYPES.ASSIGN, '='), 15 | new Token(TOKEN_TYPES.NUMBER, '10'), 16 | new Token(TOKEN_TYPES.PLUS, '+'), 17 | new Token(TOKEN_TYPES.NUMBER, '5'), 18 | new Token(TOKEN_TYPES.SEMICOLON, ';'), 19 | new Token(TOKEN_TYPES.EOF, ''), 20 | ]; 21 | 22 | for (const expectedToken of expectedTokens) { 23 | const token = lexer.nextToken(); 24 | assert.deepEqual(token, expectedToken); 25 | } 26 | }); 27 | }); 28 | -------------------------------------------------------------------------------- /examples/const-comprehensive.js: -------------------------------------------------------------------------------- 1 | // Comprehensive const example demonstrating immutability and optimization 2 | 3 | function circleArea(radius) { 4 | const PI = 3; 5 | const TWO = 2; 6 | const DIAMETER = TWO * radius; 7 | return PI * radius * radius; 8 | } 9 | 10 | function fibonacci(n) { 11 | const ZERO = 0; 12 | const ONE = 1; 13 | const TWO = 2; 14 | 15 | if (n <= ZERO) { 16 | return ZERO; 17 | } 18 | if (n == ONE) { 19 | return ONE; 20 | } 21 | 22 | let prev = ZERO; 23 | let curr = ONE; 24 | let count = TWO; 25 | 26 | while (count <= n) { 27 | let next = prev + curr; 28 | prev = curr; 29 | curr = next; 30 | count = count + ONE; 31 | } 32 | 33 | return curr; 34 | } 35 | 36 | function clamp(value) { 37 | const MIN = 0; 38 | const MAX = 100; 39 | const DEFAULT = 50; 40 | 41 | if (value < MIN) { 42 | return MIN; 43 | } 44 | if (value > MAX) { 45 | return MAX; 46 | } 47 | return value; 48 | } 49 | 50 | circleArea(5); 51 | -------------------------------------------------------------------------------- /src/utils.mjs: -------------------------------------------------------------------------------- 1 | // Parse tokens into nested arrays 2 | function parse(tokens) { 3 | let stack = [[]]; 4 | 5 | for (let token of tokens) { 6 | if (token === '(') { 7 | let newList = []; 8 | stack[stack.length - 1].push(newList); 9 | stack.push(newList); 10 | } else if (token === ')') { 11 | stack.pop(); 12 | } else { 13 | stack[stack.length - 1].push(token); 14 | } 15 | } 16 | 17 | return stack[0][0]; 18 | } 19 | 20 | function watToSexp(watCode) { 21 | // Remove comments 22 | watCode = watCode.replace(/;;.*$/gm, '').replace(/\(;[\s\S]*?;\)/g, ''); 23 | 24 | let tokens = []; 25 | let current = ''; 26 | let inString = false; 27 | 28 | // Tokenize 29 | for (let i = 0; i < watCode.length; i++) { 30 | const char = watCode[i]; 31 | 32 | if (char === '"') { 33 | inString = !inString; 34 | current += char; 35 | } else if (inString) { 36 | current += char; 37 | } else if (char === '(' || char === ')') { 38 | if (current.trim()) { 39 | tokens.push(current.trim()); 40 | current = ''; 41 | } 42 | tokens.push(char); 43 | } else if (/\s/.test(char)) { 44 | if (current.trim()) { 45 | tokens.push(current.trim()); 46 | current = ''; 47 | } 48 | } else { 49 | current += char; 50 | } 51 | } 52 | if (current.trim()) tokens.push(current.trim()); 53 | 54 | return parse(tokens); 55 | } 56 | 57 | export { watToSexp }; 58 | -------------------------------------------------------------------------------- /examples/comprehensive-optimizations.js: -------------------------------------------------------------------------------- 1 | // More optimization examples 2 | 3 | function constantArithmetic() { 4 | // All of these will be folded to constants 5 | let a = 10 + 20; 6 | let b = 100 - 50; 7 | let c = 5 * 6; 8 | let d = 100 / 4; 9 | let e = 17 % 3; 10 | return a + b + c + d + e; 11 | } 12 | 13 | function algebraicSimplification(x, y) { 14 | // These will be simplified 15 | let a = x + 0; // -> x 16 | let b = y * 1; // -> y 17 | let c = x - 0; // -> x 18 | let d = y / 1; // -> y 19 | let e = x * 0; // -> 0 20 | return a + b + c + d + e; 21 | } 22 | 23 | function deadBranches(x) { 24 | // The false branch is eliminated 25 | if (5 > 3) { 26 | return x + 1; 27 | } else { 28 | return x - 1; // Dead code 29 | } 30 | } 31 | 32 | function unreachableAfterReturn(n) { 33 | if (n < 0) { 34 | return 0; 35 | } 36 | 37 | let result = n * 2; 38 | return result; 39 | 40 | // All of this is unreachable and will be eliminated 41 | let unused1 = 100; 42 | let unused2 = 200; 43 | return unused1 + unused2; 44 | } 45 | 46 | function multipleOptimizations(x) { 47 | // Combines constant folding, dead code, and unused variable elimination 48 | let a = 5 + 5; // -> 10 49 | let b = a * 1; // -> a 50 | let unused = 999; // Eliminated 51 | 52 | if (2 == 2) { // Condition -> 1 (true) 53 | return b + x; 54 | } 55 | 56 | return 0; // Unreachable 57 | } 58 | 59 | // Call all functions 60 | constantArithmetic(); 61 | -------------------------------------------------------------------------------- /test/integration.test.mjs: -------------------------------------------------------------------------------- 1 | import assert from 'node:assert/strict'; 2 | import { describe, it } from 'node:test'; 3 | import fs from 'node:fs'; 4 | const vm = await import('node:vm'); 5 | import { execSync } from 'child_process'; 6 | 7 | import { Lexer } from '../src/lexer.mjs'; 8 | import { Parser } from '../src/parser.mjs'; 9 | import { Codegen } from '../src/codegen.mjs'; 10 | import { Optimizer } from '../src/optimizer.mjs'; 11 | 12 | describe('WASM Compiler', () => { 13 | it('compiles all examples with correct semantics without optimizations', () => { 14 | const exampleFiles = fs.readdirSync('examples').filter(file => file.endsWith('.js')); 15 | 16 | for (const file of exampleFiles) { 17 | const input = fs.readFileSync(`examples/${file}`, 'utf-8'); 18 | const script = new vm.Script(input); 19 | const expectedValue = script.runInThisContext(); 20 | 21 | const lexer = new Lexer(input); 22 | const parser = new Parser(lexer); 23 | const ast = parser.parseProgram(); 24 | 25 | const codegen = new Codegen(ast); 26 | const output = codegen.generate(); 27 | 28 | fs.writeFileSync('temp.wat', output); 29 | 30 | // Run the file with `wasmtime temp.wat --invoke _start` to get the return value 31 | const stdout = execSync(`wasmtime temp.wat --invoke _start`); 32 | const lines = stdout.toString().trim().split('\n'); 33 | console.log(lines); 34 | const receivedValue = parseInt(lines[lines.length - 1], 10); 35 | 36 | assert.equal(receivedValue, expectedValue, `Failed on example: ${file}`); 37 | } 38 | }); 39 | 40 | it('compiles all examples with correct semantics with optimizations', () => { 41 | const exampleFiles = fs.readdirSync('examples').filter(file => file.endsWith('.js')); 42 | 43 | for (const file of exampleFiles) { 44 | const input = fs.readFileSync(`examples/${file}`, 'utf-8'); 45 | const script = new vm.Script(input); 46 | const expectedValue = script.runInThisContext(); 47 | 48 | const lexer = new Lexer(input); 49 | const parser = new Parser(lexer); 50 | const ast = parser.parseProgram(); 51 | const optimizer = new Optimizer(ast); 52 | const optimizedAst = optimizer.optimize(); 53 | 54 | const codegen = new Codegen(optimizedAst); 55 | const output = codegen.generate(); 56 | 57 | fs.writeFileSync('temp.wat', output); 58 | 59 | // Run the file with `wasmtime temp.wat --invoke _start` to get the return value 60 | const stdout = execSync(`wasmtime temp.wat --invoke _start`); 61 | const lines = stdout.toString().trim().split('\n'); 62 | console.log(lines); 63 | const receivedValue = parseInt(lines[lines.length - 1], 10); 64 | 65 | assert.equal(receivedValue, expectedValue, `Failed on example: ${file}`); 66 | } 67 | }); 68 | }); 69 | -------------------------------------------------------------------------------- /test/const.test.mjs: -------------------------------------------------------------------------------- 1 | import assert from 'node:assert/strict'; 2 | import { describe, it } from 'node:test'; 3 | 4 | import { Lexer } from '../src/lexer.mjs'; 5 | import { Parser } from '../src/parser.mjs'; 6 | import { Codegen } from '../src/codegen.mjs'; 7 | 8 | describe('Const Support', () => { 9 | it('should parse const declarations', () => { 10 | const input = ` 11 | function test() { 12 | const x = 5; 13 | return x; 14 | } 15 | test(); 16 | `; 17 | const lexer = new Lexer(input); 18 | const parser = new Parser(lexer); 19 | const ast = parser.parseProgram(); 20 | 21 | // Check that const is in the AST 22 | const func = ast[1]; 23 | const body = func[3]; 24 | const firstStmt = body[1][0]; 25 | 26 | assert.strictEqual(firstStmt[1][0], 'const'); 27 | }); 28 | 29 | it('should inline simple const values', () => { 30 | const input = ` 31 | function test() { 32 | const PI = 3; 33 | const radius = 5; 34 | return PI * radius; 35 | } 36 | test(); 37 | `; 38 | const lexer = new Lexer(input); 39 | const parser = new Parser(lexer); 40 | const codegen = new Codegen(parser.parseProgram()); 41 | const output = codegen.generate(); 42 | 43 | // Should inline both PI and radius 44 | assert.match(output, /i32\.const 3/); 45 | assert.match(output, /i32\.const 5/); 46 | // Should not have local variables for PI or radius 47 | assert.doesNotMatch(output, /local \$PI/); 48 | assert.doesNotMatch(output, /local \$radius/); 49 | }); 50 | 51 | it('should evaluate const expressions at compile time', () => { 52 | const input = ` 53 | function test() { 54 | const A = 10; 55 | const B = 20; 56 | const C = A + B; 57 | return C; 58 | } 59 | test(); 60 | `; 61 | const lexer = new Lexer(input); 62 | const parser = new Parser(lexer); 63 | const codegen = new Codegen(parser.parseProgram()); 64 | const output = codegen.generate(); 65 | 66 | // C should be evaluated to 30 at compile time 67 | assert.match(output, /i32\.const 30/); 68 | // Should not have local variables for A, B, or C 69 | assert.doesNotMatch(output, /local \$A/); 70 | assert.doesNotMatch(output, /local \$B/); 71 | assert.doesNotMatch(output, /local \$C/); 72 | }); 73 | 74 | it('should handle const with complex expressions', () => { 75 | const input = ` 76 | function test(x) { 77 | const MULTIPLIER = 2; 78 | const OFFSET = 5; 79 | const COMPUTED = MULTIPLIER * OFFSET; 80 | let result = x * MULTIPLIER + OFFSET + COMPUTED; 81 | return result; 82 | } 83 | test(10); 84 | `; 85 | const lexer = new Lexer(input); 86 | const parser = new Parser(lexer); 87 | const codegen = new Codegen(parser.parseProgram()); 88 | const output = codegen.generate(); 89 | 90 | // MULTIPLIER, OFFSET, and COMPUTED should all be inlined 91 | assert.match(output, /i32\.const 2/); 92 | assert.match(output, /i32\.const 5/); 93 | assert.match(output, /i32\.const 10/); // COMPUTED = 2 * 5 94 | // Only result should have a local 95 | assert.match(output, /local \$result/); 96 | assert.doesNotMatch(output, /local \$MULTIPLIER/); 97 | assert.doesNotMatch(output, /local \$OFFSET/); 98 | assert.doesNotMatch(output, /local \$COMPUTED/); 99 | }); 100 | 101 | it('should handle const in conditionals', () => { 102 | const input = ` 103 | function test(x) { 104 | const MAX = 100; 105 | if (x > MAX) { 106 | return MAX; 107 | } 108 | return x; 109 | } 110 | test(50); 111 | `; 112 | const lexer = new Lexer(input); 113 | const parser = new Parser(lexer); 114 | const codegen = new Codegen(parser.parseProgram()); 115 | const output = codegen.generate(); 116 | 117 | // MAX should be inlined 118 | assert.match(output, /i32\.const 100/); 119 | assert.doesNotMatch(output, /local \$MAX/); 120 | }); 121 | 122 | it('should remove unused const variables', () => { 123 | const input = ` 124 | function test(x) { 125 | const UNUSED = 42; 126 | const USED = 10; 127 | return x + USED; 128 | } 129 | test(5); 130 | `; 131 | const lexer = new Lexer(input); 132 | const parser = new Parser(lexer); 133 | const codegen = new Codegen(parser.parseProgram()); 134 | const output = codegen.generate(); 135 | 136 | // USED should be inlined, UNUSED should not appear 137 | assert.match(output, /i32\.const 10/); 138 | assert.doesNotMatch(output, /42/); 139 | assert.doesNotMatch(output, /UNUSED/); 140 | }); 141 | 142 | it('should handle mix of let and const', () => { 143 | const input = ` 144 | function test(x) { 145 | const MULTIPLIER = 3; 146 | let temp = x * MULTIPLIER; 147 | temp = temp + 1; 148 | return temp; 149 | } 150 | test(5); 151 | `; 152 | const lexer = new Lexer(input); 153 | const parser = new Parser(lexer); 154 | const codegen = new Codegen(parser.parseProgram()); 155 | const output = codegen.generate(); 156 | 157 | // MULTIPLIER should be inlined 158 | assert.match(output, /i32\.const 3/); 159 | // temp should have a local (it's mutable) 160 | assert.match(output, /local \$temp/); 161 | assert.doesNotMatch(output, /local \$MULTIPLIER/); 162 | }); 163 | }); 164 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # JavaScript to WebAssembly Compiler 2 | 3 | A lightweight, optimizing compiler that transforms a subset of JavaScript into WebAssembly (WASM). 4 | 5 | ## Features 6 | 7 | ### Core Compiler Pipeline 8 | 9 | - **Lexer** - Tokenizes JavaScript source code 10 | - **Parser** - Builds an Abstract Syntax Tree (AST) from tokens 11 | - **Optimizer** - Applies optimization passes to improve generated code 12 | - **Code Generator** - Emits WebAssembly Text (WAT) format 13 | 14 | ### Language Support 15 | 16 | The compiler supports a practical subset of JavaScript: 17 | 18 | - **Variables**: `let` and `const` declarations 19 | - **Functions**: Function declarations with parameters and return values 20 | - **Control Flow**: `if`/`else` statements and `while` loops 21 | - **Operators**: Arithmetic (`+`, `-`, `*`, `/`, `%`), comparisons (`==`, `!=`, `<`, `>`, `<=`, `>=`), unary (`!`, `-`) 22 | - **Function Calls**: Direct function invocation 23 | - **Data Types**: 32-bit integers (i32) 24 | 25 | ### Optimizations 26 | 27 | #### 1. Constant Folding 28 | Evaluates constant expressions at compile time: 29 | ```javascript 30 | let x = 2 + 3; // Compiled as: let x = 5; 31 | let y = 10 * (4 - 2); // Compiled as: let y = 20; 32 | ``` 33 | 34 | #### 2. Constant Inlining 35 | `const` variables are inlined at usage sites, eliminating storage overhead: 36 | ```javascript 37 | const PI = 3; 38 | const radius = 5; 39 | return PI * radius; // Compiled as: return 3 * 5; 40 | ``` 41 | 42 | #### 3. Dead Code Elimination 43 | Removes unreachable code after returns and constant-false branches: 44 | ```javascript 45 | if (1 == 0) { 46 | // This entire branch is eliminated 47 | } 48 | return 42; 49 | // Any code here is eliminated 50 | ``` 51 | 52 | #### 4. Tail Call Elimination (TCE) 53 | Transforms tail-recursive functions into loops to avoid stack growth: 54 | ```javascript 55 | function factorial(n, acc) { 56 | if (n <= 1) return acc; 57 | return factorial(n - 1, n * acc); // Tail call → loop 58 | } 59 | ``` 60 | 61 | #### 5. Algebraic Simplifications 62 | Applies mathematical identities: 63 | - `x + 0` → `x` 64 | - `x * 1` → `x` 65 | - `x * 0` → `0` 66 | - `x / 1` → `x` 67 | 68 | ## Usage 69 | 70 | ### Compile a JavaScript file to WebAssembly 71 | 72 | ```bash 73 | node compiler.js [source-file.js] > output.wat 74 | ``` 75 | 76 | This generates a `output.wat` file (WebAssembly Text format) that can be converted to binary WASM and executed. 77 | 78 | ### Example 79 | 80 | ```bash 81 | # Compile factorial example 82 | node compiler.js examples/factorial.js 83 | ``` 84 | 85 | ## Examples 86 | 87 | The `examples/` directory contains various test programs: 88 | 89 | - **factorial.js** - Iterative factorial with while loop 90 | - **gcd.js** - Greatest common divisor (Euclidean algorithm) 91 | - **ackermann.js** - Ackermann function (recursive) 92 | - **const-test.js** - Demonstrates const inlining 93 | - **optimization-demo.js** - Shows various optimizations 94 | - **comprehensive-optimizations.js** - Complex optimization scenarios 95 | 96 | ### Example: Factorial 97 | 98 | **Input** (`examples/factorial.js`): 99 | ```javascript 100 | function fact(n) { 101 | let result = 1; 102 | while (n > 0) { 103 | result = result * n; 104 | n = n - 1; 105 | } 106 | return result; 107 | } 108 | 109 | fact(5); // returns 120 110 | ``` 111 | 112 | **Output** (Generated WAT): 113 | ```wat 114 | (module 115 | (func $fact (param $n i32) (result i32) 116 | (local $result i32) 117 | (local.set $result (i32.const 1)) 118 | (block $break 119 | (loop $continue 120 | (br_if $break (i32.le_s (local.get $n) (i32.const 0))) 121 | (local.set $result (i32.mul (local.get $result) (local.get $n))) 122 | (local.set $n (i32.sub (local.get $n) (i32.const 1))) 123 | (br $continue) 124 | ) 125 | ) 126 | (local.get $result) 127 | ) 128 | (export "fact" (func $fact)) 129 | 130 | (func $_start (export "_start") (result i32) 131 | (call $fact (i32.const 5)) 132 | ) 133 | ) 134 | ``` 135 | 136 | ## Architecture 137 | 138 | ### Compiler Phases 139 | 140 | ``` 141 | JavaScript Source 142 | ↓ 143 | [Lexer] - Tokenization 144 | ↓ 145 | [Parser] - AST Construction 146 | ↓ 147 | [Optimizer] - Optimization Passes 148 | ↓ 149 | [Codegen] - WASM Emission 150 | ↓ 151 | WebAssembly Text (.wat) 152 | ``` 153 | 154 | ### AST Structure 155 | 156 | The compiler uses S-expression style ASTs: 157 | ```javascript 158 | ['program', 159 | ['function', ['ident', 'add'], 160 | [['param', ['ident', 'a']], ['param', ['ident', 'b']]], 161 | ['block', [ 162 | ['stmt', ['return', ['+', ['ident', 'a'], ['ident', 'b']]]] 163 | ]] 164 | ] 165 | ] 166 | ``` 167 | 168 | ### Code Generator Design 169 | 170 | The code generator follows WebAssembly stack machine semantics: 171 | 172 | - **`emit()`** - Handles statements (side effects, control flow) 173 | - **`emitExpression()`** - Handles expressions (produces values on stack) 174 | - Clear separation between result-producing and control-flow constructs 175 | - Proper handling of implicit returns (last statement value on stack) 176 | 177 | ## Testing 178 | 179 | The project includes comprehensive test coverage: 180 | 181 | ```bash 182 | npm test 183 | ``` 184 | 185 | Test suites cover: 186 | - **Lexer** - Token generation 187 | - **Parser** - AST construction 188 | - **Optimizer** - Constant folding, dead code elimination 189 | - **Codegen** - WAT generation for various constructs 190 | - **Integration** - End-to-end compilation and execution (requires `wasmtime` installed) 191 | - **Const Support** - Const inlining and immutability 192 | 193 | ## Project Structure 194 | 195 | ``` 196 | ├── compiler.js # Main compiler entry point 197 | ├── src/ 198 | │ ├── lexer.mjs # Tokenization 199 | │ ├── parser.mjs # AST construction 200 | │ ├── optimizer.mjs # Optimization passes 201 | │ ├── codegen.mjs # WebAssembly code generation 202 | │ └── utils.mjs # Utility functions 203 | ├── test/ # Test suites 204 | │ ├── lexer.test.mjs 205 | │ ├── parser.test.mjs 206 | │ ├── optimizer.test.mjs 207 | │ ├── codegen.test.mjs 208 | │ ├── const.test.mjs 209 | │ └── integration.test.mjs 210 | └── examples/ # Example programs 211 | ``` 212 | 213 | ## Limitations 214 | 215 | - Only 32-bit integers (i32) are supported 216 | - No string or array types 217 | - No closures or higher-order functions 218 | - No object-oriented features 219 | - Limited to single module output 220 | 221 | ## License 222 | 223 | ISC 224 | 225 | ## Author 226 | 227 | Sankha Narayan Guria 228 | -------------------------------------------------------------------------------- /src/lexer.mjs: -------------------------------------------------------------------------------- 1 | const TOKEN_TYPES = { 2 | EOF: 'EOF', 3 | IDENTIFIER: 'IDENTIFIER', 4 | NUMBER: 'NUMBER', 5 | PLUS: 'PLUS', 6 | MINUS: 'MINUS', 7 | MULT: 'MULT', 8 | DIV: 'DIV', 9 | MOD: 'MOD', 10 | EQ: 'EQ', 11 | NEQ: 'NEQ', 12 | LT: 'LT', 13 | GT: 'GT', 14 | LTE: 'LTE', 15 | GTE: 'GTE', 16 | NOT: 'NOT', 17 | ASSIGN: 'ASSIGN', 18 | SEMICOLON: 'SEMICOLON', 19 | COMMA: 'COMMA', 20 | LPAREN: 'LPAREN', 21 | RPAREN: 'RPAREN', 22 | LBRACE: 'LBRACE', 23 | RBRACE: 'RBRACE', 24 | LET: 'LET', 25 | CONST: 'CONST', 26 | FUNCTION: 'FUNCTION', 27 | IF: 'IF', 28 | ELSE: 'ELSE', 29 | RETURN: 'RETURN', 30 | WHILE: 'WHILE', 31 | }; 32 | 33 | // 2. Token Class/Structure 34 | class Token { 35 | constructor(type, literal) { 36 | this.type = type; 37 | this.literal = literal; 38 | } 39 | } 40 | 41 | // 3. Lexer Class 42 | class Lexer { 43 | constructor(input) { 44 | this.input = input; 45 | this.position = 0; // Current position in input 46 | this.readPosition = 0; // Next character position 47 | this.ch = ''; // Current character 48 | this.readChar(); // Initialize 49 | } 50 | 51 | readChar() { 52 | if (this.readPosition >= this.input.length) { 53 | this.ch = null; // EOF 54 | } else { 55 | this.ch = this.input[this.readPosition]; 56 | } 57 | this.position = this.readPosition; 58 | this.readPosition += 1; 59 | } 60 | 61 | peekChar() { 62 | if (this.readPosition >= this.input.length) { 63 | return null; 64 | } 65 | return this.input[this.readPosition]; 66 | } 67 | 68 | isDigit(char) { 69 | return /[0-9]/.test(char); 70 | } 71 | 72 | isLetter(char) { 73 | // Simple check for letters and _ and $ for JS identifiers 74 | return /[_a-zA-Z$]/.test(char); 75 | } 76 | 77 | skipWhitespace() { 78 | while (this.ch === ' ' || this.ch === '\t' || this.ch === '\n' || this.ch === '\r') { 79 | this.readChar(); 80 | } 81 | } 82 | 83 | skipLineComment() { 84 | while (this.ch !== '\n' && this.ch !== '\r' && this.ch !== null) { 85 | this.readChar(); 86 | } 87 | } 88 | 89 | readNumber() { 90 | const startPos = this.position; 91 | while (this.isDigit(this.ch)) { 92 | this.readChar(); 93 | } 94 | // Basic handling for floating point 95 | if (this.ch === '.' && this.isDigit(this.peekChar())) { 96 | this.readChar(); 97 | while (this.isDigit(this.ch)) { 98 | this.readChar(); 99 | } 100 | } 101 | return this.input.slice(startPos, this.position); 102 | } 103 | 104 | readIdentifier() { 105 | const startPos = this.position; 106 | // Identifiers start with a letter, _, or $ 107 | while (this.isLetter(this.ch) || this.isDigit(this.ch)) { 108 | this.readChar(); 109 | } 110 | return this.input.slice(startPos, this.position); 111 | } 112 | 113 | // Main scanning function 114 | nextToken() { 115 | let token = null; 116 | 117 | this.skipWhitespace(); 118 | 119 | while (this.ch === '/' && this.peekChar() === '/') { 120 | this.skipLineComment(); 121 | this.skipWhitespace(); 122 | } 123 | 124 | switch (this.ch) { 125 | case '+': 126 | token = new Token(TOKEN_TYPES.PLUS, this.ch); 127 | break; 128 | case '-': 129 | token = new Token(TOKEN_TYPES.MINUS, this.ch); 130 | break; 131 | case '*': 132 | token = new Token(TOKEN_TYPES.MULT, this.ch); 133 | break; 134 | case '/': 135 | if (this.peekChar() === '/') { 136 | this.skipLineComment(); 137 | return this.nextToken(); 138 | } else { 139 | token = new Token(TOKEN_TYPES.DIV, this.ch); 140 | break; 141 | } 142 | case '%': 143 | token = new Token(TOKEN_TYPES.MOD, this.ch); 144 | break; 145 | case '!': 146 | if (this.peekChar() === '=') { 147 | this.readChar(); 148 | token = new Token(TOKEN_TYPES.NEQ, '!='); 149 | } else { 150 | token = new Token(TOKEN_TYPES.NOT, this.ch); 151 | } 152 | break; 153 | case '=': 154 | if (this.peekChar() === '=') { 155 | this.readChar(); 156 | token = new Token(TOKEN_TYPES.EQ, '=='); 157 | } else { 158 | token = new Token(TOKEN_TYPES.ASSIGN, this.ch); 159 | } 160 | break; 161 | case '<': 162 | if (this.peekChar() === '=') { 163 | this.readChar(); 164 | token = new Token(TOKEN_TYPES.LTE, '<='); 165 | } else { 166 | token = new Token(TOKEN_TYPES.LT, this.ch); 167 | } 168 | break; 169 | case '>': 170 | if (this.peekChar() === '=') { 171 | this.readChar(); 172 | token = new Token(TOKEN_TYPES.GTE, '>='); 173 | } else { 174 | token = new Token(TOKEN_TYPES.GT, this.ch); 175 | } 176 | break; 177 | case ';': 178 | token = new Token(TOKEN_TYPES.SEMICOLON, this.ch); 179 | break; 180 | case ',': 181 | token = new Token(TOKEN_TYPES.COMMA, this.ch); 182 | break; 183 | case '(': 184 | token = new Token(TOKEN_TYPES.LPAREN, this.ch); 185 | break; 186 | case ')': 187 | token = new Token(TOKEN_TYPES.RPAREN, this.ch); 188 | break; 189 | case '{': 190 | token = new Token(TOKEN_TYPES.LBRACE, this.ch); 191 | break; 192 | case '}': 193 | token = new Token(TOKEN_TYPES.RBRACE, this.ch); 194 | break; 195 | case null: 196 | token = new Token(TOKEN_TYPES.EOF, ''); 197 | break; 198 | default: 199 | if (this.isLetter(this.ch)) { 200 | const literal = this.readIdentifier(); 201 | // Check for keywords like 'if', 'var', 'function', etc. 202 | if (literal === 'if') { 203 | return new Token(TOKEN_TYPES.IF, literal); 204 | } else if (literal === 'else') { 205 | return new Token(TOKEN_TYPES.ELSE, literal); 206 | } else if (literal === 'const') { 207 | return new Token(TOKEN_TYPES.CONST, literal); 208 | } else if (literal === 'let') { 209 | return new Token(TOKEN_TYPES.LET, literal); 210 | } else if (literal === 'function') { 211 | return new Token(TOKEN_TYPES.FUNCTION, literal); 212 | } else if (literal === 'return') { 213 | return new Token(TOKEN_TYPES.RETURN, literal); 214 | } else if (literal === 'while') { 215 | return new Token(TOKEN_TYPES.WHILE, literal); 216 | } else { 217 | return new Token(TOKEN_TYPES.IDENTIFIER, literal); 218 | } 219 | } else if (this.isDigit(this.ch)) { 220 | const literal = this.readNumber(); 221 | return new Token(TOKEN_TYPES.NUMBER, literal); 222 | } else { 223 | // Handle illegal or unrecognized characters 224 | token = new Token('ILLEGAL', this.ch); 225 | } 226 | } 227 | 228 | // Advance to the next character after tokenizing 229 | if (token.type !== TOKEN_TYPES.EOF && token.type !== TOKEN_TYPES.IDENTIFIER && token.type !== TOKEN_TYPES.NUMBER) { 230 | this.readChar(); 231 | } 232 | 233 | return token; 234 | } 235 | } 236 | 237 | export { Lexer, TOKEN_TYPES, Token }; 238 | -------------------------------------------------------------------------------- /test/optimizer.test.mjs: -------------------------------------------------------------------------------- 1 | import { Optimizer } from '../src/optimizer.mjs'; 2 | import assert from 'node:assert/strict'; 3 | import { describe, it } from 'node:test'; 4 | 5 | describe('Optimizer', () => { 6 | describe('Constant Folding', () => { 7 | it('should fold addition of constants', () => { 8 | const ast = ['program', ['+', ['number', 2], ['number', 3]]]; 9 | const optimizer = new Optimizer(ast); 10 | const optimized = optimizer.constantFolding(ast); 11 | assert.deepStrictEqual(optimized, ['program', ['number', 5]]); 12 | }); 13 | 14 | it('should fold subtraction of constants', () => { 15 | const expr = ['-', ['number', 10], ['number', 3]]; 16 | const optimizer = new Optimizer(expr); 17 | const optimized = optimizer.constantFolding(expr); 18 | assert.deepStrictEqual(optimized, ['number', 7]); 19 | }); 20 | 21 | it('should fold multiplication of constants', () => { 22 | const expr = ['*', ['number', 4], ['number', 5]]; 23 | const optimizer = new Optimizer(expr); 24 | const optimized = optimizer.constantFolding(expr); 25 | assert.deepStrictEqual(optimized, ['number', 20]); 26 | }); 27 | 28 | it('should fold division of constants', () => { 29 | const expr = ['/', ['number', 20], ['number', 4]]; 30 | const optimizer = new Optimizer(expr); 31 | const optimized = optimizer.constantFolding(expr); 32 | assert.deepStrictEqual(optimized, ['number', 5]); 33 | }); 34 | 35 | it('should not divide by zero', () => { 36 | const expr = ['/', ['number', 10], ['number', 0]]; 37 | const optimizer = new Optimizer(expr); 38 | const optimized = optimizer.constantFolding(expr); 39 | // Should remain unchanged 40 | assert.deepStrictEqual(optimized, expr); 41 | }); 42 | 43 | it('should fold comparison operations', () => { 44 | const expr1 = ['==', ['number', 5], ['number', 5]]; 45 | const optimizer1 = new Optimizer(expr1); 46 | assert.deepStrictEqual(optimizer1.constantFolding(expr1), ['number', 1]); 47 | 48 | const expr2 = ['<', ['number', 3], ['number', 7]]; 49 | const optimizer2 = new Optimizer(expr2); 50 | assert.deepStrictEqual(optimizer2.constantFolding(expr2), ['number', 1]); 51 | 52 | const expr3 = ['>', ['number', 3], ['number', 7]]; 53 | const optimizer3 = new Optimizer(expr3); 54 | assert.deepStrictEqual(optimizer3.constantFolding(expr3), ['number', 0]); 55 | }); 56 | 57 | it('should apply algebraic simplifications', () => { 58 | // x + 0 = x 59 | const expr1 = ['+', ['ident', 'x'], ['number', 0]]; 60 | const optimizer1 = new Optimizer(expr1); 61 | assert.deepStrictEqual(optimizer1.constantFolding(expr1), ['ident', 'x']); 62 | 63 | // x * 1 = x 64 | const expr2 = ['*', ['ident', 'y'], ['number', 1]]; 65 | const optimizer2 = new Optimizer(expr2); 66 | assert.deepStrictEqual(optimizer2.constantFolding(expr2), ['ident', 'y']); 67 | 68 | // x * 0 = 0 69 | const expr3 = ['*', ['ident', 'z'], ['number', 0]]; 70 | const optimizer3 = new Optimizer(expr3); 71 | assert.deepStrictEqual(optimizer3.constantFolding(expr3), ['number', 0]); 72 | }); 73 | 74 | it('should fold prefix operations', () => { 75 | // -5 = -5 76 | const expr1 = ['prefix', '-', ['number', 5]]; 77 | const optimizer1 = new Optimizer(expr1); 78 | assert.deepStrictEqual(optimizer1.constantFolding(expr1), ['number', -5]); 79 | 80 | // !0 = 1 81 | const expr2 = ['prefix', '!', ['number', 0]]; 82 | const optimizer2 = new Optimizer(expr2); 83 | assert.deepStrictEqual(optimizer2.constantFolding(expr2), ['number', 1]); 84 | 85 | // !5 = 0 86 | const expr3 = ['prefix', '!', ['number', 5]]; 87 | const optimizer3 = new Optimizer(expr3); 88 | assert.deepStrictEqual(optimizer3.constantFolding(expr3), ['number', 0]); 89 | }); 90 | }); 91 | 92 | describe('Dead Code Elimination', () => { 93 | it('should eliminate unreachable code after return', () => { 94 | const ast = [ 95 | 'function', 96 | ['ident', 'test'], 97 | [], 98 | ['block', [ 99 | ['stmt', ['return', ['number', 5]]], 100 | ['stmt', ['return', ['number', 10]]] // unreachable 101 | ]] 102 | ]; 103 | const optimizer = new Optimizer(ast); 104 | const optimized = optimizer.deadCodeElimination(ast); 105 | 106 | const expectedBody = ['block', [ 107 | ['stmt', ['return', ['number', 5]]] 108 | ]]; 109 | assert.deepStrictEqual(optimized[3], expectedBody); 110 | }); 111 | 112 | it('should eliminate constant false if branches', () => { 113 | const ast = [ 114 | 'if', 115 | ['number', 0], // constant false 116 | ['block', [['stmt', ['return', ['number', 1]]]]], 117 | ['block', [['stmt', ['return', ['number', 2]]]]] 118 | ]; 119 | const optimizer = new Optimizer(ast); 120 | const optimized = optimizer.deadCodeElimination(ast); 121 | 122 | // Should keep only the else branch 123 | const expected = ['block', [['stmt', ['return', ['number', 2]]]]]; 124 | assert.deepStrictEqual(optimized, expected); 125 | }); 126 | 127 | it('should eliminate constant true if branches', () => { 128 | const ast = [ 129 | 'if', 130 | ['number', 1], // constant true 131 | ['block', [['stmt', ['return', ['number', 1]]]]], 132 | ['block', [['stmt', ['return', ['number', 2]]]]] 133 | ]; 134 | const optimizer = new Optimizer(ast); 135 | const optimized = optimizer.deadCodeElimination(ast); 136 | 137 | // Should keep only the then branch 138 | const expected = ['block', [['stmt', ['return', ['number', 1]]]]]; 139 | assert.deepStrictEqual(optimized, expected); 140 | }); 141 | 142 | it('should eliminate while loops with constant false condition', () => { 143 | const ast = [ 144 | 'while', 145 | ['number', 0], // constant false 146 | ['block', [['stmt', ['return', ['number', 1]]]]] 147 | ]; 148 | const optimizer = new Optimizer(ast); 149 | const optimized = optimizer.deadCodeElimination(ast); 150 | 151 | // Should become empty block 152 | assert.deepStrictEqual(optimized, ['block', []]); 153 | }); 154 | 155 | it('should remove unused variables', () => { 156 | const ast = [ 157 | 'function', 158 | ['ident', 'test'], 159 | [['ident', 'x']], 160 | ['block', [ 161 | ['stmt', ['let', ['ident', 'unused'], ['number', 42]]], 162 | ['stmt', ['let', ['ident', 'used'], ['number', 5]]], 163 | ['stmt', ['return', ['ident', 'used']]] 164 | ]] 165 | ]; 166 | const optimizer = new Optimizer(ast); 167 | const optimized = optimizer.deadCodeElimination(ast); 168 | 169 | // Should only have 'used' variable 170 | const statements = optimized[3][1]; 171 | const letStatements = statements.filter(s => 172 | s[0] === 'stmt' && s[1][0] === 'let' 173 | ); 174 | assert.strictEqual(letStatements.length, 1); 175 | assert.strictEqual(letStatements[0][1][1][1], 'used'); 176 | }); 177 | }); 178 | 179 | describe('Integration - Full Optimization', () => { 180 | it('should apply both constant folding and dead code elimination', () => { 181 | const ast = [ 182 | 'program', 183 | [ 184 | 'function', 185 | ['ident', 'test'], 186 | [], 187 | ['block', [ 188 | ['stmt', ['let', ['ident', 'a'], ['+', ['number', 2], ['number', 3]]]], 189 | ['if', 190 | ['==', ['number', 1], ['number', 1]], 191 | ['block', [['stmt', ['return', ['ident', 'a']]]]], 192 | ['block', [['stmt', ['return', ['number', 0]]]]] 193 | ], 194 | ['stmt', ['return', ['number', 999]]] // unreachable 195 | ]] 196 | ] 197 | ]; 198 | 199 | const optimizer = new Optimizer(ast); 200 | const optimized = optimizer.optimize(); 201 | 202 | // Should fold 2+3 to 5, eliminate if (keeping then branch), remove unreachable code 203 | const functionBody = optimized[1][3][1]; 204 | 205 | // First statement should have constant 5 206 | assert.deepStrictEqual(functionBody[0][1][2], ['number', 5]); 207 | 208 | // Should only have 2 statements (let and return), no if 209 | assert.strictEqual(functionBody.length, 2); 210 | 211 | // Last statement should be the return from then branch 212 | assert.deepStrictEqual(functionBody[1][1][0], 'return'); 213 | }); 214 | }); 215 | }); 216 | -------------------------------------------------------------------------------- /test/parser.test.mjs: -------------------------------------------------------------------------------- 1 | import assert from 'node:assert/strict'; 2 | import { describe, it } from 'node:test'; 3 | import fs from 'node:fs'; 4 | 5 | import { Lexer } from '../src/lexer.mjs'; 6 | import { Parser } from '../src/parser.mjs'; 7 | 8 | describe('Parser', () => { 9 | it('should parse a simple expression', () => { 10 | const input = 'let x = 10 + 5;'; 11 | const lexer = new Lexer(input); 12 | const parser = new Parser(lexer); 13 | 14 | const expected = [ 15 | 'program', 16 | [ 17 | 'stmt', 18 | [ 19 | 'let', 20 | ['ident', 'x'], 21 | [ 22 | '+', 23 | ['number', 10], 24 | ['number', 5], 25 | ], 26 | ], 27 | ], 28 | ]; 29 | 30 | assert.deepEqual(expected, parser.parseProgram()); 31 | }); 32 | 33 | it('should parse the factorial program', () => { 34 | const input = fs.readFileSync('./examples/factorial.js', 'utf-8'); 35 | 36 | const lexer = new Lexer(input); 37 | const parser = new Parser(lexer); 38 | 39 | const expected = [ 40 | 'program', 41 | [ 42 | 'function', 43 | ['ident', 'fact'], 44 | [['ident', 'n']], 45 | [ 46 | 'block', 47 | [ 48 | [ 49 | 'stmt', 50 | [ 51 | 'let', 52 | ['ident', 'result'], 53 | ['number', 1], 54 | ], 55 | ], 56 | [ 57 | 'while', 58 | [ 59 | '>', 60 | ['ident', 'n'], 61 | ['number', 0], 62 | ], 63 | [ 64 | 'block', 65 | [ 66 | [ 67 | 'stmt', 68 | [ 69 | 'assign', 70 | ['ident', 'result'], 71 | ['*', ['ident', 'result'], ['ident', 'n']], 72 | ], 73 | ], 74 | [ 75 | 'stmt', 76 | [ 77 | 'assign', 78 | ['ident', 'n'], 79 | ['-', ['ident', 'n'], ['number', 1]], 80 | ], 81 | ], 82 | ], 83 | ], 84 | ], 85 | [ 86 | 'stmt', 87 | [ 88 | 'return', 89 | ['ident', 'result'], 90 | ], 91 | ], 92 | ], 93 | ], 94 | ], 95 | [ 96 | 'stmt', 97 | [ 98 | 'call', 99 | ['ident', 'fact'], 100 | [['number', 5]], 101 | ], 102 | ], 103 | ]; 104 | 105 | assert.deepEqual(expected, parser.parseProgram()); 106 | }); 107 | 108 | it('should parse the gcd program', () => { 109 | const input = fs.readFileSync('./examples/gcd.js', 'utf-8'); 110 | 111 | const lexer = new Lexer(input); 112 | const parser = new Parser(lexer); 113 | 114 | const expected = [ 115 | 'program', 116 | [ 117 | 'function', 118 | ['ident', 'gcd'], 119 | [['ident', 'a'], ['ident', 'b']], 120 | [ 121 | 'block', 122 | [ 123 | [ 124 | 'while', 125 | [ 126 | '!=', 127 | ['ident', 'b'], 128 | ['number', 0], 129 | ], 130 | [ 131 | 'block', 132 | [ 133 | [ 134 | 'stmt', 135 | [ 136 | 'let', 137 | ['ident', 't'], 138 | ['ident', 'b'], 139 | ], 140 | ], 141 | [ 142 | 'stmt', 143 | [ 144 | 'assign', 145 | ['ident', 'b'], 146 | ['%', ['ident', 'a'], ['ident', 'b']], 147 | ], 148 | ], 149 | [ 150 | 'stmt', 151 | [ 152 | 'assign', 153 | ['ident', 'a'], 154 | ['ident', 't'], 155 | ], 156 | ], 157 | ], 158 | ], 159 | ], 160 | [ 161 | 'stmt', 162 | [ 163 | 'return', 164 | ['ident', 'a'], 165 | ], 166 | ], 167 | ], 168 | ], 169 | ], 170 | [ 171 | 'stmt', 172 | [ 173 | 'call', 174 | ['ident', 'gcd'], 175 | [['number', 48], ['number', 18]], 176 | ], 177 | ], 178 | ]; 179 | 180 | assert.deepEqual(expected, parser.parseProgram()); 181 | }); 182 | 183 | it('should parse the ackermann program', () => { 184 | const input = fs.readFileSync('./examples/ackermann.js', 'utf-8'); 185 | 186 | const lexer = new Lexer(input); 187 | const parser = new Parser(lexer); 188 | 189 | const expected = [ 190 | 'program', 191 | [ 192 | 'function', 193 | ['ident', 'ack'], 194 | [['ident', 'm'], ['ident', 'n']], 195 | [ 196 | 'block', 197 | [ 198 | [ 199 | 'if', 200 | [ 201 | '==', 202 | ['ident', 'm'], 203 | ['number', 0], 204 | ], 205 | [ 206 | 'stmt', 207 | [ 208 | 'return', 209 | [ 210 | '+', 211 | ['ident', 'n'], 212 | ['number', 1], 213 | ], 214 | ], 215 | ], 216 | ], 217 | [ 218 | 'if', 219 | [ 220 | '==', 221 | ['ident', 'n'], 222 | ['number', 0], 223 | ], 224 | [ 225 | 'stmt', 226 | [ 227 | 'return', 228 | [ 229 | 'call', 230 | ['ident', 'ack'], 231 | [ 232 | [ 233 | '-', 234 | ['ident', 'm'], 235 | ['number', 1], 236 | ], 237 | ['number', 1], 238 | ], 239 | ], 240 | ], 241 | ], 242 | ], 243 | [ 244 | 'stmt', 245 | [ 246 | 'return', 247 | [ 248 | 'call', 249 | ['ident', 'ack'], 250 | [ 251 | [ 252 | '-', 253 | ['ident', 'm'], 254 | ['number', 1], 255 | ], 256 | [ 257 | 'call', 258 | ['ident', 'ack'], 259 | [ 260 | ['ident', 'm'], 261 | [ 262 | '-', 263 | ['ident', 'n'], 264 | ['number', 1], 265 | ], 266 | ], 267 | ], 268 | ], 269 | ], 270 | ], 271 | ], 272 | ], 273 | ], 274 | ], 275 | [ 276 | 'stmt', 277 | [ 278 | 'call', 279 | ['ident', 'ack'], 280 | [['number', 3], ['number', 4]], 281 | ], 282 | ], 283 | ]; 284 | 285 | assert.deepEqual(expected, parser.parseProgram()); 286 | }); 287 | }); 288 | -------------------------------------------------------------------------------- /src/optimizer.mjs: -------------------------------------------------------------------------------- 1 | /** 2 | * AST Optimizer 3 | * Implements constant folding and dead code elimination 4 | */ 5 | 6 | export class Optimizer { 7 | constructor(ast) { 8 | this.ast = ast; 9 | } 10 | 11 | optimize() { 12 | // Run multiple passes until no more optimizations are possible 13 | let changed = true; 14 | let iterations = 0; 15 | const maxIterations = 10; // Prevent infinite loops 16 | 17 | while (changed && iterations < maxIterations) { 18 | const before = JSON.stringify(this.ast); 19 | 20 | // Apply optimization passes 21 | this.ast = this.constantFolding(this.ast); 22 | this.ast = this.deadCodeElimination(this.ast); 23 | 24 | const after = JSON.stringify(this.ast); 25 | changed = before !== after; 26 | iterations++; 27 | } 28 | 29 | return this.ast; 30 | } 31 | 32 | /** 33 | * Constant Folding: Evaluate constant expressions at compile time 34 | */ 35 | constantFolding(node) { 36 | if (!Array.isArray(node)) return node; 37 | 38 | const type = node[0]; 39 | 40 | // Recursively fold children first 41 | const folded = node.map(child => 42 | Array.isArray(child) ? this.constantFolding(child) : child 43 | ); 44 | 45 | // Binary operations 46 | if (['+', '-', '*', '/', '%'].includes(type)) { 47 | const left = folded[1]; 48 | const right = folded[2]; 49 | 50 | if (left[0] === 'number' && right[0] === 'number') { 51 | const leftVal = left[1]; 52 | const rightVal = right[1]; 53 | let result; 54 | 55 | switch (type) { 56 | case '+': result = leftVal + rightVal; break; 57 | case '-': result = leftVal - rightVal; break; 58 | case '*': result = leftVal * rightVal; break; 59 | case '/': 60 | if (rightVal === 0) return folded; // Avoid division by zero 61 | result = Math.floor(leftVal / rightVal); 62 | break; 63 | case '%': 64 | if (rightVal === 0) return folded; 65 | result = leftVal % rightVal; 66 | break; 67 | } 68 | 69 | return ['number', result]; 70 | } 71 | 72 | // Algebraic simplifications 73 | if (type === '+') { 74 | // x + 0 = x 75 | if (right[0] === 'number' && right[1] === 0) return left; 76 | // 0 + x = x 77 | if (left[0] === 'number' && left[1] === 0) return right; 78 | } else if (type === '-') { 79 | // x - 0 = x 80 | if (right[0] === 'number' && right[1] === 0) return left; 81 | // x - x = 0 (for identifiers) 82 | if (left[0] === 'ident' && right[0] === 'ident' && left[1] === right[1]) { 83 | return ['number', 0]; 84 | } 85 | } else if (type === '*') { 86 | // x * 0 = 0 87 | if (right[0] === 'number' && right[1] === 0) return ['number', 0]; 88 | if (left[0] === 'number' && left[1] === 0) return ['number', 0]; 89 | // x * 1 = x 90 | if (right[0] === 'number' && right[1] === 1) return left; 91 | // 1 * x = x 92 | if (left[0] === 'number' && left[1] === 1) return right; 93 | } else if (type === '/') { 94 | // x / 1 = x 95 | if (right[0] === 'number' && right[1] === 1) return left; 96 | // 0 / x = 0 (if x != 0) 97 | if (left[0] === 'number' && left[1] === 0 && 98 | !(right[0] === 'number' && right[1] === 0)) { 99 | return ['number', 0]; 100 | } 101 | } 102 | } 103 | 104 | // Comparison operations 105 | if (['==', '!=', '<', '>', '<=', '>='].includes(type)) { 106 | const left = folded[1]; 107 | const right = folded[2]; 108 | 109 | if (left[0] === 'number' && right[0] === 'number') { 110 | const leftVal = left[1]; 111 | const rightVal = right[1]; 112 | let result; 113 | 114 | switch (type) { 115 | case '==': result = leftVal === rightVal; break; 116 | case '!=': result = leftVal !== rightVal; break; 117 | case '<': result = leftVal < rightVal; break; 118 | case '>': result = leftVal > rightVal; break; 119 | case '<=': result = leftVal <= rightVal; break; 120 | case '>=': result = leftVal >= rightVal; break; 121 | } 122 | 123 | return ['number', result ? 1 : 0]; 124 | } 125 | 126 | // Identity comparisons 127 | if (left[0] === 'ident' && right[0] === 'ident' && left[1] === right[1]) { 128 | switch (type) { 129 | case '==': return ['number', 1]; 130 | case '!=': return ['number', 0]; 131 | case '<=': return ['number', 1]; 132 | case '>=': return ['number', 1]; 133 | case '<': return ['number', 0]; 134 | case '>': return ['number', 0]; 135 | } 136 | } 137 | } 138 | 139 | // Prefix operations 140 | if (type === 'prefix') { 141 | const op = folded[1]; 142 | const operand = folded[2]; 143 | 144 | if (operand[0] === 'number') { 145 | if (op === '-') { 146 | return ['number', -operand[1]]; 147 | } else if (op === '!') { 148 | return ['number', operand[1] === 0 ? 1 : 0]; 149 | } 150 | } 151 | 152 | // Double negation elimination: !!x = x (for truthy/falsy context) 153 | if (op === '!' && operand[0] === 'prefix' && operand[1] === '!') { 154 | // Convert to boolean: !!x becomes (x != 0) 155 | return ['!=', operand[2], ['number', 0]]; 156 | } 157 | } 158 | 159 | return folded; 160 | } 161 | 162 | /** 163 | * Dead Code Elimination: Remove unreachable code and unused variables 164 | */ 165 | deadCodeElimination(node) { 166 | if (!Array.isArray(node)) return node; 167 | 168 | const type = node[0]; 169 | 170 | // Handle program 171 | if (type === 'program') { 172 | const statements = node.slice(1); 173 | const optimizedStatements = statements.map(stmt => this.deadCodeElimination(stmt)); 174 | return ['program', ...optimizedStatements]; 175 | } 176 | 177 | // Handle function 178 | if (type === 'function') { 179 | const name = node[1]; 180 | const params = node[2]; 181 | const body = node[3]; 182 | 183 | const optimizedBody = this.deadCodeElimination(body); 184 | const cleanedBody = this.removeUnusedVariables(optimizedBody, params.map(p => p[1])); 185 | 186 | return ['function', name, params, cleanedBody]; 187 | } 188 | 189 | // Handle block 190 | if (type === 'block') { 191 | const statements = node[1]; 192 | if (!Array.isArray(statements)) return node; 193 | 194 | const optimizedStatements = []; 195 | let reachable = true; 196 | 197 | for (const stmt of statements) { 198 | if (!reachable) { 199 | // Skip unreachable code (after return) 200 | continue; 201 | } 202 | 203 | const optimizedStmt = this.deadCodeElimination(stmt); 204 | 205 | // If the optimized statement is a block, inline its contents 206 | if (optimizedStmt[0] === 'block' && Array.isArray(optimizedStmt[1])) { 207 | for (const s of optimizedStmt[1]) { 208 | if (!reachable) break; 209 | optimizedStatements.push(s); 210 | if (this.isReturn(s)) { 211 | reachable = false; 212 | } 213 | } 214 | continue; 215 | } 216 | 217 | optimizedStatements.push(optimizedStmt); 218 | 219 | // Mark as unreachable after return 220 | if (this.isReturn(optimizedStmt)) { 221 | reachable = false; 222 | } 223 | } 224 | 225 | return ['block', optimizedStatements]; 226 | } 227 | 228 | // Handle if statement 229 | if (type === 'if') { 230 | const condition = this.constantFolding(node[1]); // Fold the condition first! 231 | const consequence = node[2]; 232 | const alternative = node.length > 3 ? node[3] : null; 233 | 234 | // If condition is constant, eliminate one branch 235 | if (condition[0] === 'number') { 236 | if (condition[1] !== 0) { 237 | // Condition is true, return consequence 238 | return this.deadCodeElimination(consequence); 239 | } else { 240 | // Condition is false, return alternative or empty 241 | return alternative ? this.deadCodeElimination(alternative) : ['block', []]; 242 | } 243 | } 244 | 245 | // Not constant - recursively optimize both branches 246 | const optimizedConsequence = this.deadCodeElimination(consequence); 247 | const optimizedAlternative = alternative ? this.deadCodeElimination(alternative) : null; 248 | 249 | if (optimizedAlternative) { 250 | return ['if', condition, optimizedConsequence, optimizedAlternative]; 251 | } 252 | return ['if', condition, optimizedConsequence]; 253 | } 254 | 255 | // Handle while loop 256 | if (type === 'while') { 257 | const condition = this.constantFolding(node[1]); // Fold condition first! 258 | const body = node[2]; 259 | 260 | // If condition is constant false, eliminate loop 261 | if (condition[0] === 'number' && condition[1] === 0) { 262 | return ['block', []]; 263 | } 264 | 265 | const optimizedBody = this.deadCodeElimination(body); 266 | return ['while', condition, optimizedBody]; 267 | } 268 | 269 | // Handle statements 270 | if (type === 'stmt') { 271 | const inner = this.deadCodeElimination(node[1]); 272 | return ['stmt', inner]; 273 | } 274 | 275 | // Recursively process other nodes 276 | return node.map(child => 277 | Array.isArray(child) ? this.deadCodeElimination(child) : child 278 | ); 279 | } 280 | 281 | /** 282 | * Remove unused variable declarations 283 | */ 284 | removeUnusedVariables(body, params) { 285 | if (body[0] !== 'block' || !Array.isArray(body[1])) { 286 | return body; 287 | } 288 | 289 | const statements = body[1]; 290 | 291 | // Collect variable definitions 292 | const definitions = new Map(); // varName -> statement index 293 | const usages = new Set(); // Set of used variable names 294 | 295 | // Add parameters as used 296 | params.forEach(p => usages.add(p)); 297 | 298 | // First pass: collect definitions 299 | statements.forEach((stmt, idx) => { 300 | if (stmt[0] === 'stmt' && stmt[1]) { 301 | if (stmt[1][0] === 'let' || stmt[1][0] === 'const') { 302 | const varName = stmt[1][1][1]; 303 | definitions.set(varName, idx); 304 | } 305 | } 306 | }); 307 | 308 | // Second pass: collect usages 309 | const collectUsages = (node, excludeVar = null) => { 310 | if (!Array.isArray(node)) return; 311 | 312 | if (node[0] === 'ident') { 313 | const varName = node[1]; 314 | if (varName !== excludeVar) { 315 | usages.add(varName); 316 | } 317 | } 318 | 319 | // Don't count the LHS of assignment/declaration as usage 320 | if (node[0] === 'let' || node[0] === 'const') { 321 | const varName = node[1][1]; 322 | collectUsages(node[2], varName); // Only collect from RHS 323 | return; 324 | } 325 | 326 | if (node[0] === 'assign') { 327 | const varName = node[1][1]; 328 | usages.add(varName); // Assignments need the variable to exist 329 | collectUsages(node[2], varName); // Collect from RHS 330 | return; 331 | } 332 | 333 | node.forEach(child => { 334 | if (Array.isArray(child)) { 335 | collectUsages(child, excludeVar); 336 | } 337 | }); 338 | }; 339 | 340 | statements.forEach(stmt => collectUsages(stmt)); 341 | 342 | // Third pass: filter out unused variables 343 | const filteredStatements = statements.filter((stmt, idx) => { 344 | if (stmt[0] === 'stmt' && stmt[1]) { 345 | if (stmt[1][0] === 'let' || stmt[1][0] === 'const') { 346 | const varName = stmt[1][1][1]; 347 | // Keep if used 348 | return usages.has(varName); 349 | } 350 | } 351 | return true; // Keep all non-let/const statements 352 | }); 353 | 354 | return ['block', filteredStatements]; 355 | } 356 | 357 | /** 358 | * Check if a statement is a return statement 359 | */ 360 | isReturn(node) { 361 | if (!Array.isArray(node)) return false; 362 | if (node[0] === 'return') return true; 363 | if (node[0] === 'stmt' && node[1] && node[1][0] === 'return') return true; 364 | return false; 365 | } 366 | } 367 | -------------------------------------------------------------------------------- /src/parser.mjs: -------------------------------------------------------------------------------- 1 | import { TOKEN_TYPES } from './lexer.mjs'; 2 | 3 | const PRECEDENCES = { 4 | LOWEST: 1, 5 | ASSIGN: 2, // = 6 | EQUALITY: 3, // ==, != 7 | COMPARE: 4, // <, >, <=, >= 8 | SUM: 5, // +, - 9 | PRODUCT: 6, // *, / 10 | PREFIX: 7, // -X, !X 11 | CALL: 8, // myFunction(X) 12 | }; 13 | 14 | const precedences = { 15 | [TOKEN_TYPES.ASSIGN]: PRECEDENCES.ASSIGN, 16 | [TOKEN_TYPES.EQ]: PRECEDENCES.EQUALITY, 17 | [TOKEN_TYPES.NEQ]: PRECEDENCES.EQUALITY, 18 | [TOKEN_TYPES.LT]: PRECEDENCES.COMPARE, 19 | [TOKEN_TYPES.GT]: PRECEDENCES.COMPARE, 20 | [TOKEN_TYPES.LTE]: PRECEDENCES.COMPARE, 21 | [TOKEN_TYPES.GTE]: PRECEDENCES.COMPARE, 22 | [TOKEN_TYPES.PLUS]: PRECEDENCES.SUM, 23 | [TOKEN_TYPES.MINUS]: PRECEDENCES.SUM, 24 | [TOKEN_TYPES.MULT]: PRECEDENCES.PRODUCT, 25 | [TOKEN_TYPES.DIV]: PRECEDENCES.PRODUCT, 26 | [TOKEN_TYPES.MOD]: PRECEDENCES.PRODUCT, 27 | [TOKEN_TYPES.LPAREN]: PRECEDENCES.CALL, 28 | }; 29 | 30 | class Parser { 31 | constructor(lexer) { 32 | this.lexer = lexer; 33 | this.currentToken = null; 34 | this.peekToken = null; 35 | 36 | this.nextToken(); 37 | this.nextToken(); 38 | 39 | // Register Parsing Functions 40 | this.prefixParseFns = {}; 41 | this.infixParseFns = {}; 42 | 43 | this.registerPrefix(TOKEN_TYPES.IDENTIFIER, this.parseIdentifier); 44 | this.registerPrefix(TOKEN_TYPES.NUMBER, this.parseNumberLiteral); 45 | this.registerPrefix(TOKEN_TYPES.MINUS, this.parsePrefixExpression); // Unary Minus 46 | this.registerPrefix(TOKEN_TYPES.NOT, this.parsePrefixExpression); 47 | this.registerPrefix(TOKEN_TYPES.LPAREN, this.parseGroupedExpression); // Parenthesized expressions 48 | 49 | this.registerInfix(TOKEN_TYPES.PLUS, this.parseInfixExpression); 50 | this.registerInfix(TOKEN_TYPES.MINUS, this.parseInfixExpression); 51 | this.registerInfix(TOKEN_TYPES.MULT, this.parseInfixExpression); 52 | this.registerInfix(TOKEN_TYPES.DIV, this.parseInfixExpression); 53 | this.registerInfix(TOKEN_TYPES.MOD, this.parseInfixExpression); 54 | this.registerInfix(TOKEN_TYPES.EQ, this.parseInfixExpression); 55 | this.registerInfix(TOKEN_TYPES.NEQ, this.parseInfixExpression); 56 | this.registerInfix(TOKEN_TYPES.LT, this.parseInfixExpression); 57 | this.registerInfix(TOKEN_TYPES.GT, this.parseInfixExpression); 58 | this.registerInfix(TOKEN_TYPES.LTE, this.parseInfixExpression); 59 | this.registerInfix(TOKEN_TYPES.GTE, this.parseInfixExpression); 60 | 61 | this.registerInfix(TOKEN_TYPES.ASSIGN, this.parseInfixExpression); 62 | this.registerInfix(TOKEN_TYPES.LPAREN, this.parseCallExpression); 63 | } 64 | 65 | nextToken() { 66 | this.currentToken = this.peekToken; 67 | this.peekToken = this.lexer.nextToken(); 68 | } 69 | 70 | // --- Token Helpers (Same as before) --- 71 | curTokenIs(type) { return this.currentToken && this.currentToken.type === type; } 72 | peekTokenIs(type) { return this.peekToken && this.peekToken.type === type; } 73 | peekError(type) { 74 | const actual = this.peekToken ? this.peekToken.type : 'null'; 75 | console.error(`ERROR: Expected next token to be ${type}, got ${actual} instead.`); 76 | } 77 | expectPeek(type) { 78 | if (this.peekTokenIs(type)) { 79 | this.nextToken(); 80 | return true; 81 | } else { 82 | this.peekError(type); 83 | return false; 84 | } 85 | } 86 | 87 | // --- Precedence Helpers (Same as before) --- 88 | peekPrecedence() { 89 | if (!this.peekToken) { 90 | return PRECEDENCES.LOWEST; 91 | } 92 | return precedences[this.peekToken.type] || PRECEDENCES.LOWEST; 93 | } 94 | curPrecedence() { 95 | if (!this.currentToken) { 96 | return PRECEDENCES.LOWEST; 97 | } 98 | return precedences[this.currentToken.type] || PRECEDENCES.LOWEST; 99 | } 100 | 101 | // --- Function Registration (Same as before) --- 102 | registerPrefix(tokenType, fn) { 103 | this.prefixParseFns[tokenType] = fn.bind(this); 104 | } 105 | registerInfix(tokenType, fn) { 106 | this.infixParseFns[tokenType] = fn.bind(this); 107 | } 108 | 109 | // --- Main Parsing Loop --- 110 | parseProgram() { 111 | const statements = []; 112 | 113 | while (!this.curTokenIs(TOKEN_TYPES.EOF)) { 114 | const statement = this.parseStatement(); 115 | if (statement) { 116 | statements.push(statement); 117 | } 118 | this.nextToken(); 119 | } 120 | 121 | // Return the whole program as an S-expression: ['program', ...statements] 122 | return ['program', ...statements]; 123 | } 124 | 125 | parseStatement() { 126 | if (this.curTokenIs(TOKEN_TYPES.LET)) { 127 | return this.parseLetStatement(); 128 | } 129 | if (this.curTokenIs(TOKEN_TYPES.CONST)) { 130 | return this.parseConstStatement(); 131 | } 132 | if (this.curTokenIs(TOKEN_TYPES.RETURN)) { 133 | return this.parseReturnStatement(); 134 | } 135 | if (this.curTokenIs(TOKEN_TYPES.FUNCTION)) { 136 | return this.parseFunctionStatement(); 137 | } 138 | if (this.curTokenIs(TOKEN_TYPES.WHILE)) { 139 | return this.parseWhileStatement(); 140 | } 141 | if (this.curTokenIs(TOKEN_TYPES.IF)) { 142 | return this.parseIfStatement(); 143 | } 144 | // We'll treat all expressions followed by a semicolon as expression statements 145 | return this.parseExpressionStatement(); 146 | } 147 | 148 | parseLetStatement() { 149 | if (!this.expectPeek(TOKEN_TYPES.IDENTIFIER)) { 150 | return null; 151 | } 152 | 153 | const identifier = ['ident', this.currentToken.literal]; 154 | 155 | if (!this.expectPeek(TOKEN_TYPES.ASSIGN)) { 156 | return null; 157 | } 158 | 159 | this.nextToken(); 160 | 161 | const value = this.parseExpression(PRECEDENCES.LOWEST); 162 | 163 | if (this.peekTokenIs(TOKEN_TYPES.SEMICOLON)) { 164 | this.nextToken(); 165 | } 166 | 167 | return ['stmt', ['let', identifier, value]]; 168 | } 169 | 170 | parseConstStatement() { 171 | if (!this.expectPeek(TOKEN_TYPES.IDENTIFIER)) { 172 | return null; 173 | } 174 | 175 | const identifier = ['ident', this.currentToken.literal]; 176 | 177 | if (!this.expectPeek(TOKEN_TYPES.ASSIGN)) { 178 | return null; 179 | } 180 | 181 | this.nextToken(); 182 | 183 | const value = this.parseExpression(PRECEDENCES.LOWEST); 184 | 185 | if (this.peekTokenIs(TOKEN_TYPES.SEMICOLON)) { 186 | this.nextToken(); 187 | } 188 | 189 | return ['stmt', ['const', identifier, value]]; 190 | } 191 | 192 | parseExpressionStatement() { 193 | // Return ['stmt', expression] 194 | const expression = this.parseExpression(PRECEDENCES.LOWEST); 195 | 196 | // Consume the semicolon if present 197 | if (this.peekTokenIs(TOKEN_TYPES.SEMICOLON)) { 198 | this.nextToken(); 199 | } 200 | 201 | return ['stmt', expression]; 202 | } 203 | 204 | parseReturnStatement() { 205 | this.nextToken(); 206 | 207 | const returnValue = this.parseExpression(PRECEDENCES.LOWEST); 208 | 209 | if (this.peekTokenIs(TOKEN_TYPES.SEMICOLON)) { 210 | this.nextToken(); 211 | } 212 | 213 | return ['stmt', ['return', returnValue]]; 214 | } 215 | 216 | parseFunctionStatement() { 217 | if (!this.expectPeek(TOKEN_TYPES.IDENTIFIER)) { 218 | return null; 219 | } 220 | 221 | const name = ['ident', this.currentToken.literal]; 222 | 223 | if (!this.expectPeek(TOKEN_TYPES.LPAREN)) { 224 | return null; 225 | } 226 | 227 | const parameters = this.parseFunctionParameters(); 228 | 229 | if (!this.expectPeek(TOKEN_TYPES.LBRACE)) { 230 | return null; 231 | } 232 | 233 | const body = this.parseBlock(); 234 | 235 | return ['function', name, parameters, body]; 236 | } 237 | 238 | parseWhileStatement() { 239 | if (!this.expectPeek(TOKEN_TYPES.LPAREN)) { 240 | return null; 241 | } 242 | 243 | this.nextToken(); 244 | 245 | const condition = this.parseExpression(PRECEDENCES.LOWEST); 246 | 247 | if (!this.expectPeek(TOKEN_TYPES.RPAREN)) { 248 | return null; 249 | } 250 | 251 | if (!this.expectPeek(TOKEN_TYPES.LBRACE)) { 252 | return null; 253 | } 254 | 255 | const body = this.parseBlock(); 256 | 257 | return ['while', condition, body]; 258 | } 259 | 260 | parseBlock() { 261 | const statements = []; 262 | 263 | this.nextToken(); 264 | 265 | while (!this.curTokenIs(TOKEN_TYPES.RBRACE) && !this.curTokenIs(TOKEN_TYPES.EOF)) { 266 | const stmt = this.parseStatement(); 267 | if (stmt) { 268 | statements.push(stmt); 269 | } 270 | this.nextToken(); 271 | } 272 | 273 | return ['block', statements]; 274 | } 275 | 276 | parseFunctionParameters() { 277 | const identifiers = []; 278 | 279 | if (this.peekTokenIs(TOKEN_TYPES.RPAREN)) { 280 | this.nextToken(); 281 | return identifiers; 282 | } 283 | 284 | this.nextToken(); 285 | identifiers.push(['ident', this.currentToken.literal]); 286 | 287 | while (this.peekTokenIs(TOKEN_TYPES.COMMA)) { 288 | this.nextToken(); 289 | this.nextToken(); 290 | identifiers.push(['ident', this.currentToken.literal]); 291 | } 292 | 293 | if (!this.expectPeek(TOKEN_TYPES.RPAREN)) { 294 | return null; 295 | } 296 | 297 | return identifiers; 298 | } 299 | 300 | // --- Expression Parsing (The Core Logic - Unchanged logic, changed return value) --- 301 | parseExpression(precedence) { 302 | // 1. Prefix (Unary) Parsing 303 | const prefixFn = this.prefixParseFns[this.currentToken.type]; 304 | if (!prefixFn) { 305 | console.error(`No prefix parsing function for ${this.currentToken.type}`); 306 | return null; 307 | } 308 | 309 | let leftExpression = prefixFn(); 310 | 311 | // 2. Infix Parsing (Loop for precedence) 312 | while (this.peekToken && !this.peekTokenIs(TOKEN_TYPES.SEMICOLON) && precedence < this.peekPrecedence()) { 313 | const infixFn = this.infixParseFns[this.peekToken.type]; 314 | if (!infixFn) { 315 | return leftExpression; 316 | } 317 | 318 | this.nextToken(); // Advance to the operator token 319 | 320 | leftExpression = infixFn(leftExpression); 321 | } 322 | 323 | return leftExpression; 324 | } 325 | 326 | // --- S-Expression Parsing Functions --- 327 | parseIdentifier() { 328 | return ['ident', this.currentToken.literal]; 329 | } 330 | 331 | parseNumberLiteral() { 332 | const value = parseFloat(this.currentToken.literal); 333 | if (isNaN(value)) { 334 | console.error(`Could not parse ${this.currentToken.literal} as number.`); 335 | return null; 336 | } 337 | // Returns ['number', value] 338 | return ['number', value]; 339 | } 340 | 341 | parsePrefixExpression() { 342 | const operator = this.currentToken.literal; 343 | this.nextToken(); 344 | const right = this.parseExpression(PRECEDENCES.PREFIX); 345 | // Returns ['prefix', operator, right_expression] 346 | return ['prefix', operator, right]; 347 | } 348 | 349 | parseGroupedExpression() { 350 | this.nextToken(); // Consume the '(' 351 | const exp = this.parseExpression(PRECEDENCES.LOWEST); 352 | 353 | if (!this.expectPeek(TOKEN_TYPES.RPAREN)) { 354 | return null; // Handle error: missing ')' 355 | } 356 | return exp; // The grouped expression itself 357 | } 358 | 359 | parseInfixExpression(left) { 360 | const operator = this.currentToken.literal; 361 | const operatorType = this.currentToken.type; 362 | const precedence = this.curPrecedence(); 363 | 364 | this.nextToken(); // Move past the operator 365 | 366 | const right = this.parseExpression(precedence); 367 | 368 | if (operatorType === TOKEN_TYPES.ASSIGN) { 369 | return ['assign', left, right]; 370 | } 371 | 372 | return [operator, left, right]; 373 | } 374 | 375 | parseCallExpression(func) { 376 | const args = this.parseExpressionList(TOKEN_TYPES.RPAREN); 377 | return ['call', func, args]; 378 | } 379 | 380 | parseExpressionList(endToken) { 381 | const list = []; 382 | 383 | if (this.peekTokenIs(endToken)) { 384 | this.nextToken(); 385 | return list; 386 | } 387 | 388 | this.nextToken(); 389 | list.push(this.parseExpression(PRECEDENCES.LOWEST)); 390 | 391 | while (this.peekTokenIs(TOKEN_TYPES.COMMA)) { 392 | this.nextToken(); 393 | this.nextToken(); 394 | list.push(this.parseExpression(PRECEDENCES.LOWEST)); 395 | } 396 | 397 | if (!this.expectPeek(endToken)) { 398 | return list; 399 | } 400 | 401 | return list; 402 | } 403 | 404 | parseIfStatement() { 405 | if (!this.expectPeek(TOKEN_TYPES.LPAREN)) { 406 | return null; 407 | } 408 | 409 | this.nextToken(); 410 | const condition = this.parseExpression(PRECEDENCES.LOWEST); 411 | 412 | if (!this.expectPeek(TOKEN_TYPES.RPAREN)) { 413 | return null; 414 | } 415 | 416 | if (this.peekTokenIs(TOKEN_TYPES.LBRACE)) { 417 | if (!this.expectPeek(TOKEN_TYPES.LBRACE)) { 418 | return null; 419 | } 420 | const consequence = this.parseBlock(); 421 | let alternative = null; 422 | 423 | if (this.peekTokenIs(TOKEN_TYPES.ELSE)) { 424 | this.nextToken(); // current token is ELSE 425 | if (this.peekTokenIs(TOKEN_TYPES.IF)) { 426 | this.nextToken(); // advance to IF 427 | alternative = this.parseIfStatement(); 428 | } else if (this.peekTokenIs(TOKEN_TYPES.LBRACE)) { 429 | this.expectPeek(TOKEN_TYPES.LBRACE); 430 | alternative = this.parseBlock(); 431 | } else { 432 | this.nextToken(); 433 | alternative = this.parseStatement(); 434 | } 435 | } 436 | 437 | const result = ['if', condition, consequence]; 438 | if (alternative !== null) { 439 | result.push(alternative); 440 | } 441 | return result; 442 | } 443 | 444 | this.nextToken(); 445 | const consequence = this.parseStatement(); 446 | 447 | let alternative = null; 448 | if (this.peekTokenIs(TOKEN_TYPES.ELSE)) { 449 | this.nextToken(); // current ELSE 450 | if (this.peekTokenIs(TOKEN_TYPES.IF)) { 451 | this.nextToken(); // advance to IF 452 | alternative = this.parseIfStatement(); 453 | } else { 454 | this.nextToken(); 455 | alternative = this.parseStatement(); 456 | } 457 | } 458 | 459 | const result = ['if', condition, consequence]; 460 | if (alternative !== null) { 461 | result.push(alternative); 462 | } 463 | return result; 464 | } 465 | } 466 | 467 | export { Parser }; 468 | -------------------------------------------------------------------------------- /test/codegen.test.mjs: -------------------------------------------------------------------------------- 1 | import assert from 'node:assert/strict'; 2 | import { describe, it } from 'node:test'; 3 | import fs from 'node:fs'; 4 | 5 | import { Lexer } from '../src/lexer.mjs'; 6 | import { Parser } from '../src/parser.mjs'; 7 | import { Codegen } from '../src/codegen.mjs'; 8 | import { watToSexp } from '../src/utils.mjs'; 9 | 10 | describe('Codegen', () => { 11 | it('generates code for factorial', () => { 12 | const input = fs.readFileSync('./examples/factorial.js', 'utf-8'); 13 | 14 | const output = ` 15 | (module 16 | (func $fact (param $n i32) (result i32) 17 | (local $result i32) 18 | (local.set $result (i32.const 1)) 19 | (block $break 20 | (loop $continue 21 | (br_if $break (i32.le_s (local.get $n) (i32.const 0))) 22 | (local.set $result (i32.mul (local.get $result) (local.get $n))) 23 | (local.set $n (i32.sub (local.get $n) (i32.const 1))) 24 | (br $continue) 25 | ) 26 | ) 27 | (local.get $result) 28 | ) 29 | 30 | (func $_start (export "_start") (result i32) 31 | (call $fact (i32.const 5)) 32 | ) 33 | 34 | (export "fact" (func $fact)) 35 | ) 36 | `; 37 | 38 | const lexer = new Lexer(input); 39 | const parser = new Parser(lexer); 40 | const codegen = new Codegen(parser.parseProgram()); 41 | 42 | assert.deepEqual(watToSexp(output), watToSexp(codegen.generate())); 43 | }); 44 | 45 | it('generates code for gcd', () => { 46 | const input = fs.readFileSync('./examples/gcd.js', 'utf-8'); 47 | 48 | const output = ` 49 | (module 50 | (func $gcd (param $a i32) (param $b i32) (result i32) 51 | (local $t i32) 52 | (block $break 53 | (loop $continue 54 | (br_if $break (i32.eq (local.get $b) (i32.const 0))) 55 | (local.set $t (local.get $b)) 56 | (local.set $b (i32.rem_s (local.get $a) (local.get $b))) 57 | (local.set $a (local.get $t)) 58 | (br $continue) 59 | ) 60 | ) 61 | (local.get $a) 62 | ) 63 | 64 | (func $_start (export "_start") (result i32) 65 | (call $gcd (i32.const 48) (i32.const 18)) 66 | ) 67 | 68 | (export "gcd" (func $gcd)) 69 | ) 70 | `; 71 | 72 | const lexer = new Lexer(input); 73 | const parser = new Parser(lexer); 74 | const codegen = new Codegen(parser.parseProgram()); 75 | 76 | assert.deepEqual(watToSexp(output), watToSexp(codegen.generate())); 77 | }); 78 | 79 | it('generates code for ackermann', () => { 80 | const input = fs.readFileSync('./examples/ackermann.js', 'utf-8'); 81 | 82 | const output = ` 83 | (module 84 | (func $ack (param $m i32) (param $n i32) (result i32) 85 | (if (result i32) 86 | (i32.eq (local.get $m) (i32.const 0)) 87 | (then 88 | (i32.add (local.get $n) (i32.const 1)) 89 | ) 90 | (else 91 | (if (result i32) 92 | (i32.eq (local.get $n) (i32.const 0)) 93 | (then 94 | (call $ack 95 | (i32.sub (local.get $m) (i32.const 1)) 96 | (i32.const 1) 97 | ) 98 | ) 99 | (else 100 | (call $ack 101 | (i32.sub (local.get $m) (i32.const 1)) 102 | (call $ack 103 | (local.get $m) 104 | (i32.sub (local.get $n) (i32.const 1)) 105 | ) 106 | ) 107 | ) 108 | ) 109 | ) 110 | ) 111 | ) 112 | 113 | (func $_start (export "_start") (result i32) 114 | (call $ack (i32.const 3) (i32.const 4)) 115 | ) 116 | 117 | (export "ack" (func $ack)) 118 | ) 119 | `; 120 | 121 | const lexer = new Lexer(input); 122 | const parser = new Parser(lexer); 123 | const codegen = new Codegen(parser.parseProgram()); 124 | 125 | assert.deepEqual(watToSexp(output), watToSexp(codegen.generate())); 126 | }); 127 | 128 | it('generates code for simple arithmetic', () => { 129 | const input = ` 130 | function add(a, b) { 131 | return a + b; 132 | } 133 | 134 | add(10, 20); 135 | `; 136 | 137 | const output = ` 138 | (module 139 | (func $add (param $a i32) (param $b i32) (result i32) 140 | (i32.add (local.get $a) (local.get $b)) 141 | ) 142 | 143 | (func $_start (export "_start") (result i32) 144 | (call $add (i32.const 10) (i32.const 20)) 145 | ) 146 | 147 | (export "add" (func $add)) 148 | ) 149 | `; 150 | 151 | const lexer = new Lexer(input); 152 | const parser = new Parser(lexer); 153 | const codegen = new Codegen(parser.parseProgram()); 154 | 155 | assert.deepEqual(watToSexp(output), watToSexp(codegen.generate())); 156 | }); 157 | 158 | it('generates code for multiple arithmetic operations', () => { 159 | const input = ` 160 | function calc(x, y, z) { 161 | return (x + y) * z - x / y; 162 | } 163 | 164 | calc(5, 3, 2); 165 | `; 166 | 167 | const output = ` 168 | (module 169 | (func $calc (param $x i32) (param $y i32) (param $z i32) (result i32) 170 | (i32.sub (i32.mul (i32.add (local.get $x) (local.get $y)) (local.get $z)) (i32.div_s (local.get $x) (local.get $y))) 171 | ) 172 | 173 | (func $_start (export "_start") (result i32) 174 | (call $calc (i32.const 5) (i32.const 3) (i32.const 2)) 175 | ) 176 | 177 | (export "calc" (func $calc)) 178 | ) 179 | `; 180 | 181 | const lexer = new Lexer(input); 182 | const parser = new Parser(lexer); 183 | const codegen = new Codegen(parser.parseProgram()); 184 | 185 | assert.deepEqual(watToSexp(output), watToSexp(codegen.generate())); 186 | }); 187 | 188 | it('generates code for simple if-else', () => { 189 | const input = ` 190 | function max(a, b) { 191 | if (a > b) { 192 | return a; 193 | } else { 194 | return b; 195 | } 196 | } 197 | 198 | max(15, 7); 199 | `; 200 | 201 | const output = ` 202 | (module 203 | (func $max (param $a i32) (param $b i32) (result i32) 204 | (if (result i32) 205 | (i32.gt_s (local.get $a) (local.get $b)) 206 | (then 207 | (local.get $a) 208 | ) 209 | (else 210 | (local.get $b) 211 | ) 212 | ) 213 | ) 214 | 215 | (func $_start (export "_start") (result i32) 216 | (call $max (i32.const 15) (i32.const 7)) 217 | ) 218 | 219 | (export "max" (func $max)) 220 | ) 221 | `; 222 | 223 | const lexer = new Lexer(input); 224 | const parser = new Parser(lexer); 225 | const codegen = new Codegen(parser.parseProgram()); 226 | 227 | assert.deepEqual(watToSexp(output), watToSexp(codegen.generate())); 228 | }); 229 | 230 | it('generates code for function with local variable', () => { 231 | const input = ` 232 | function square(n) { 233 | let result = n * n; 234 | return result; 235 | } 236 | 237 | square(7); 238 | `; 239 | 240 | const output = ` 241 | (module 242 | (func $square (param $n i32) (result i32) 243 | (local $result i32) 244 | (local.set $result (i32.mul (local.get $n) (local.get $n))) 245 | (local.get $result) 246 | ) 247 | 248 | (func $_start (export "_start") (result i32) 249 | (call $square (i32.const 7)) 250 | ) 251 | 252 | (export "square" (func $square)) 253 | ) 254 | `; 255 | 256 | const lexer = new Lexer(input); 257 | const parser = new Parser(lexer); 258 | const codegen = new Codegen(parser.parseProgram()); 259 | 260 | assert.deepEqual(watToSexp(output), watToSexp(codegen.generate())); 261 | }); 262 | 263 | it('generates code for comparison operations', () => { 264 | const input = ` 265 | function isEqual(a, b) { 266 | if (a == b) return 1; 267 | return 0; 268 | } 269 | 270 | isEqual(5, 5); 271 | `; 272 | 273 | const output = ` 274 | (module 275 | (func $isEqual (param $a i32) (param $b i32) (result i32) 276 | (if (result i32) 277 | (i32.eq (local.get $a) (local.get $b)) 278 | (then 279 | (i32.const 1) 280 | ) 281 | (else 282 | (i32.const 0) 283 | ) 284 | ) 285 | ) 286 | 287 | (func $_start (export "_start") (result i32) 288 | (call $isEqual (i32.const 5) (i32.const 5)) 289 | ) 290 | 291 | (export "isEqual" (func $isEqual)) 292 | ) 293 | `; 294 | 295 | const lexer = new Lexer(input); 296 | const parser = new Parser(lexer); 297 | const codegen = new Codegen(parser.parseProgram()); 298 | 299 | assert.deepEqual(watToSexp(output), watToSexp(codegen.generate())); 300 | }); 301 | 302 | it('generates code for fibonacci', () => { 303 | const input = ` 304 | function fib(n) { 305 | if (n <= 1) return n; 306 | return fib(n - 1) + fib(n - 2); 307 | } 308 | 309 | fib(10); 310 | `; 311 | 312 | const output = ` 313 | (module 314 | (func $fib (param $n i32) (result i32) 315 | (if (result i32) 316 | (i32.le_s (local.get $n) (i32.const 1)) 317 | (then 318 | (local.get $n) 319 | ) 320 | (else 321 | (i32.add 322 | (call $fib 323 | (i32.sub (local.get $n) (i32.const 1)) 324 | ) 325 | (call $fib 326 | (i32.sub (local.get $n) (i32.const 2)) 327 | ) 328 | ) 329 | ) 330 | ) 331 | ) 332 | 333 | (func $_start (export "_start") (result i32) 334 | (call $fib (i32.const 10)) 335 | ) 336 | 337 | (export "fib" (func $fib)) 338 | ) 339 | `; 340 | 341 | const lexer = new Lexer(input); 342 | const parser = new Parser(lexer); 343 | const codegen = new Codegen(parser.parseProgram()); 344 | 345 | assert.deepEqual(watToSexp(output), watToSexp(codegen.generate())); 346 | }); 347 | 348 | it('generates code for modulo and division', () => { 349 | const input = ` 350 | function isDivisible(n, d) { 351 | if (n % d == 0) return 1; 352 | return 0; 353 | } 354 | 355 | isDivisible(20, 5); 356 | `; 357 | 358 | const output = ` 359 | (module 360 | (func $isDivisible (param $n i32) (param $d i32) (result i32) 361 | (if (result i32) 362 | (i32.eq (i32.rem_s (local.get $n) (local.get $d)) (i32.const 0)) 363 | (then 364 | (i32.const 1) 365 | ) 366 | (else 367 | (i32.const 0) 368 | ) 369 | ) 370 | ) 371 | 372 | (func $_start (export "_start") (result i32) 373 | (call $isDivisible (i32.const 20) (i32.const 5)) 374 | ) 375 | 376 | (export "isDivisible" (func $isDivisible)) 377 | ) 378 | `; 379 | 380 | const lexer = new Lexer(input); 381 | const parser = new Parser(lexer); 382 | const codegen = new Codegen(parser.parseProgram()); 383 | 384 | assert.deepEqual(watToSexp(output), watToSexp(codegen.generate())); 385 | }); 386 | 387 | it('generates code for nested if-else-if chain', () => { 388 | const input = ` 389 | function sign(n) { 390 | if (n > 0) return 1; 391 | if (n < 0) return -1; 392 | return 0; 393 | } 394 | 395 | sign(5); 396 | `; 397 | 398 | const output = ` 399 | (module 400 | (func $sign (param $n i32) (result i32) 401 | (if (result i32) 402 | (i32.gt_s (local.get $n) (i32.const 0)) 403 | (then 404 | (i32.const 1) 405 | ) 406 | (else 407 | (if (result i32) 408 | (i32.lt_s (local.get $n) (i32.const 0)) 409 | (then 410 | (i32.sub (i32.const 0) (i32.const 1)) 411 | ) 412 | (else 413 | (i32.const 0) 414 | ) 415 | ) 416 | ) 417 | ) 418 | ) 419 | 420 | (func $_start (export "_start") (result i32) 421 | (call $sign (i32.const 5)) 422 | ) 423 | 424 | (export "sign" (func $sign)) 425 | ) 426 | `; 427 | 428 | const lexer = new Lexer(input); 429 | const parser = new Parser(lexer); 430 | const codegen = new Codegen(parser.parseProgram()); 431 | 432 | assert.deepEqual(watToSexp(output), watToSexp(codegen.generate())); 433 | }); 434 | 435 | it('generates code for power function with loop', () => { 436 | const input = ` 437 | function power(base, exp) { 438 | let result = 1; 439 | while (exp > 0) { 440 | result = result * base; 441 | exp = exp - 1; 442 | } 443 | return result; 444 | } 445 | 446 | power(2, 8); 447 | `; 448 | 449 | const output = ` 450 | (module 451 | (func $power (param $base i32) (param $exp i32) (result i32) 452 | (local $result i32) 453 | (local.set $result (i32.const 1)) 454 | (block $break 455 | (loop $continue 456 | (br_if $break (i32.le_s (local.get $exp) (i32.const 0))) 457 | (local.set $result (i32.mul (local.get $result) (local.get $base))) 458 | (local.set $exp (i32.sub (local.get $exp) (i32.const 1))) 459 | (br $continue) 460 | ) 461 | ) 462 | (local.get $result) 463 | ) 464 | 465 | (func $_start (export "_start") (result i32) 466 | (call $power (i32.const 2) (i32.const 8)) 467 | ) 468 | 469 | (export "power" (func $power)) 470 | ) 471 | `; 472 | 473 | const lexer = new Lexer(input); 474 | const parser = new Parser(lexer); 475 | const codegen = new Codegen(parser.parseProgram()); 476 | 477 | assert.deepEqual(watToSexp(output), watToSexp(codegen.generate())); 478 | }); 479 | 480 | it('applies tail call elimination to simple recursive function', () => { 481 | const input = ` 482 | function sum(n, acc) { 483 | if (n == 0) return acc; 484 | return sum(n - 1, acc + n); 485 | } 486 | 487 | sum(10, 0); 488 | `; 489 | 490 | const output = ` 491 | (module 492 | (func $sum (param $n i32) (param $acc i32) (result i32) 493 | (local $_n_next i32) 494 | (local $_acc_next i32) 495 | (local.set $_n_next (local.get $n)) 496 | (local.set $_acc_next (local.get $acc)) 497 | (block $tail_loop (result i32) 498 | (loop $tail_continue 499 | (if 500 | (i32.eq (local.get $n) (i32.const 0)) 501 | (then 502 | (br $tail_loop (local.get $acc)) 503 | ) 504 | (else 505 | (local.set $_n_next (i32.sub (local.get $n) (i32.const 1))) 506 | (local.set $_acc_next (i32.add (local.get $acc) (local.get $n))) 507 | (local.set $n (local.get $_n_next)) 508 | (local.set $acc (local.get $_acc_next)) 509 | (br $tail_continue) 510 | ) 511 | ) 512 | ) 513 | (unreachable) 514 | ) 515 | ) 516 | 517 | (func $_start (export "_start") (result i32) 518 | (call $sum (i32.const 10) (i32.const 0)) 519 | ) 520 | 521 | (export "sum" (func $sum)) 522 | ) 523 | `; 524 | 525 | const lexer = new Lexer(input); 526 | const parser = new Parser(lexer); 527 | const codegen = new Codegen(parser.parseProgram()); 528 | 529 | assert.deepEqual(watToSexp(output), watToSexp(codegen.generate())); 530 | }); 531 | 532 | it('applies tail call elimination to countdown function', () => { 533 | const input = ` 534 | function countdown(n) { 535 | if (n <= 0) return 0; 536 | return countdown(n - 1); 537 | } 538 | 539 | countdown(100); 540 | `; 541 | 542 | const output = ` 543 | (module 544 | (func $countdown (param $n i32) (result i32) 545 | (local $_n_next i32) 546 | (local.set $_n_next (local.get $n)) 547 | (block $tail_loop (result i32) 548 | (loop $tail_continue 549 | (if 550 | (i32.le_s (local.get $n) (i32.const 0)) 551 | (then 552 | (br $tail_loop (i32.const 0)) 553 | ) 554 | (else 555 | (local.set $_n_next (i32.sub (local.get $n) (i32.const 1))) 556 | (local.set $n (local.get $_n_next)) 557 | (br $tail_continue) 558 | ) 559 | ) 560 | ) 561 | (unreachable) 562 | ) 563 | ) 564 | 565 | (func $_start (export "_start") (result i32) 566 | (call $countdown (i32.const 100)) 567 | ) 568 | 569 | (export "countdown" (func $countdown)) 570 | ) 571 | `; 572 | 573 | const lexer = new Lexer(input); 574 | const parser = new Parser(lexer); 575 | const codegen = new Codegen(parser.parseProgram()); 576 | 577 | assert.deepEqual(watToSexp(output), watToSexp(codegen.generate())); 578 | }); 579 | 580 | it('does not apply tail call elimination to non-tail recursive functions', () => { 581 | const input = ` 582 | function fib(n) { 583 | if (n <= 1) return n; 584 | return fib(n - 1) + fib(n - 2); 585 | } 586 | 587 | fib(10); 588 | `; 589 | 590 | // Should generate normal recursive calls, not tail-call optimized 591 | const output = ` 592 | (module 593 | (func $fib (param $n i32) (result i32) 594 | (if (result i32) 595 | (i32.le_s (local.get $n) (i32.const 1)) 596 | (then 597 | (local.get $n) 598 | ) 599 | (else 600 | (i32.add 601 | (call $fib 602 | (i32.sub (local.get $n) (i32.const 1)) 603 | ) 604 | (call $fib 605 | (i32.sub (local.get $n) (i32.const 2)) 606 | ) 607 | ) 608 | ) 609 | ) 610 | ) 611 | 612 | (func $_start (export "_start") (result i32) 613 | (call $fib (i32.const 10)) 614 | ) 615 | 616 | (export "fib" (func $fib)) 617 | ) 618 | `; 619 | 620 | const lexer = new Lexer(input); 621 | const parser = new Parser(lexer); 622 | const codegen = new Codegen(parser.parseProgram()); 623 | 624 | assert.deepEqual(watToSexp(output), watToSexp(codegen.generate())); 625 | }); 626 | }); 627 | 628 | -------------------------------------------------------------------------------- /src/codegen.mjs: -------------------------------------------------------------------------------- 1 | export class Codegen { 2 | constructor(ast) { 3 | this.ast = ast; 4 | this.functions = []; 5 | this.startStatements = []; 6 | this.output = []; 7 | this.constValues = new Map(); // Track const variable values per function 8 | } 9 | 10 | // ===== Public API ===== 11 | // Generates WebAssembly Text (WAT) format from AST 12 | 13 | generate() { 14 | const program = this.ast; 15 | if (program[0] !== 'program') throw new Error("Invalid AST"); 16 | 17 | const statements = program.slice(1); 18 | 19 | for (const stmt of statements) { 20 | if (stmt[0] === 'function') { 21 | this.functions.push(stmt); 22 | } else { 23 | this.startStatements.push(stmt); 24 | } 25 | } 26 | 27 | let wat = '\n(module'; 28 | 29 | for (const func of this.functions) { 30 | wat += this.generateFunction(func); 31 | } 32 | 33 | wat += this.generateStart(); 34 | 35 | // Exports 36 | wat += '\n'; 37 | for (const func of this.functions) { 38 | const name = func[1][1]; 39 | wat += `\n (export "${name}" (func $${name}))`; 40 | } 41 | 42 | wat += '\n)\n'; 43 | return wat; 44 | } 45 | 46 | generateFunction(node) { 47 | const name = node[1][1]; 48 | const params = node[2].map(p => p[1]); 49 | const body = node[3]; // ['block', statements] 50 | 51 | // Reset const values for this function 52 | this.constValues = new Map(); 53 | 54 | // Apply tail call elimination 55 | const optimizedBody = this.eliminateTailCalls(body, name, params); 56 | 57 | let wat = `\n (func $${name}`; 58 | params.forEach(p => { 59 | wat += ` (param $${p} i32)`; 60 | }); 61 | wat += ` (result i32)`; 62 | 63 | // Collect const values before collecting locals 64 | this.collectConstValues(optimizedBody); 65 | 66 | const locals = this.collectLocals(optimizedBody, params); 67 | locals.forEach(l => { 68 | wat += `\n (local $${l} i32)`; 69 | }); 70 | 71 | // Body statements 72 | // body is ['block', [stmts...]] 73 | const stmts = optimizedBody[1]; 74 | 75 | // Check if TCE was applied (body contains tce_loop or tce_tail_call nodes) 76 | const hasTCE = this.containsTCENodes(optimizedBody); 77 | 78 | // Transform to if-else chain if: 79 | // 1. TCE is applied (for tail recursive functions), OR 80 | // 2. All statements are if/return (expression-style functions) 81 | const allStmtsAreIfOrReturn = stmts.every(s => 82 | s[0] === 'if' || 83 | (s[0] === 'stmt' && s[1] && s[1][0] === 'return') || 84 | s[0] === 'return' 85 | ); 86 | 87 | const hasMultipleIfs = stmts.filter(s => s[0] === 'if').length > 1; 88 | const firstIsIf = stmts.length > 0 && stmts[0][0] === 'if'; 89 | 90 | if ((hasTCE || allStmtsAreIfOrReturn) && (hasMultipleIfs || (firstIsIf && stmts.length > 1))) { 91 | // Transform statements to handle if-else chains 92 | const transformedStmt = this.transformStatementsToIfElseChain(stmts); 93 | wat += '\n' + this.emit(transformedStmt, true); // Mark as last statement 94 | } else { 95 | // Regular emission 96 | for (let i = 0; i < stmts.length; i++) { 97 | const stmt = stmts[i]; 98 | const isLast = i === stmts.length - 1; 99 | wat += '\n' + this.emit(stmt, isLast); 100 | } 101 | } 102 | 103 | wat += `\n )`; 104 | return wat; 105 | } 106 | 107 | generateStart() { 108 | let wat = `\n\n (func $_start (export "_start") (result i32)`; 109 | 110 | // Locals for start? 111 | const locals = this.collectLocals(['block', this.startStatements], []); 112 | locals.forEach(l => { 113 | wat += `\n (local $${l} i32)`; 114 | }); 115 | 116 | for (const stmt of this.startStatements) { 117 | wat += '\n' + this.emit(stmt); 118 | } 119 | 120 | wat += `\n )`; 121 | return wat; 122 | } 123 | 124 | eliminateTailCalls(body, funcName, params) { 125 | // Check if function has ONLY tail recursive calls (no non-tail recursive calls) 126 | if (!this.hasOnlyTailRecursion(body, funcName)) { 127 | return body; 128 | } 129 | 130 | // Transform tail recursive function into iterative version 131 | // Create shadow parameters for tail call elimination 132 | const shadowParams = params.map(p => `_${p}_next`); 133 | 134 | // Transform the body to replace tail calls with parameter updates 135 | const transformedStmts = this.transformTailCallsToLoop(body[1], funcName, params, shadowParams); 136 | 137 | return ['block', transformedStmts]; 138 | } 139 | 140 | hasOnlyTailRecursion(body, funcName) { 141 | // Check that the function has tail recursive calls AND no non-tail recursive calls 142 | const hasTail = this.hasTailRecursiveCalls(body[1], funcName); 143 | const hasNonTail = this.hasNonTailRecursiveCalls(body[1], funcName); 144 | return hasTail && !hasNonTail; 145 | } 146 | 147 | hasNonTailRecursiveCalls(stmts, funcName) { 148 | if (!Array.isArray(stmts)) return false; 149 | 150 | for (const stmt of stmts) { 151 | if (this.containsNonTailRecursiveCall(stmt, funcName)) { 152 | return true; 153 | } 154 | } 155 | return false; 156 | } 157 | 158 | containsNonTailRecursiveCall(node, funcName) { 159 | if (!Array.isArray(node)) return false; 160 | 161 | // Check return statements 162 | if (node[0] === 'stmt' && node[1] && node[1][0] === 'return') { 163 | const returnExpr = node[1][1]; 164 | // If it's not a tail call but contains a recursive call, it's a non-tail call 165 | if (!this.isTailCall(returnExpr, funcName) && this.containsRecursiveCall(returnExpr, funcName)) { 166 | return true; 167 | } 168 | } 169 | 170 | // Check if statements 171 | if (node[0] === 'if') { 172 | const consequence = node[2]; 173 | const alternative = node.length > 3 ? node[3] : null; 174 | return this.containsNonTailRecursiveCall(consequence, funcName) || 175 | (alternative && this.containsNonTailRecursiveCall(alternative, funcName)); 176 | } 177 | 178 | // Check blocks 179 | if (node[0] === 'block' && Array.isArray(node[1])) { 180 | return node[1].some(child => this.containsNonTailRecursiveCall(child, funcName)); 181 | } 182 | 183 | return false; 184 | } 185 | 186 | hasTailRecursiveCalls(stmts, funcName) { 187 | if (!Array.isArray(stmts)) return false; 188 | 189 | for (const stmt of stmts) { 190 | if (this.containsTailCall(stmt, funcName)) { 191 | return true; 192 | } 193 | } 194 | return false; 195 | } 196 | 197 | containsTailCall(node, funcName) { 198 | if (!Array.isArray(node)) return false; 199 | 200 | // Check return statements - only direct calls are tail calls 201 | if (node[0] === 'stmt' && node[1] && node[1][0] === 'return') { 202 | const returnExpr = node[1][1]; 203 | // Only a direct call in tail position is a tail call 204 | // Calls within expressions (like arguments to other calls) are NOT tail calls 205 | return this.isTailCall(returnExpr, funcName); 206 | } 207 | 208 | // Check if statements (need to check both branches) 209 | if (node[0] === 'if') { 210 | const consequence = node[2]; 211 | const alternative = node.length > 3 ? node[3] : null; 212 | return this.containsTailCall(consequence, funcName) || 213 | (alternative && this.containsTailCall(alternative, funcName)); 214 | } 215 | 216 | // Check blocks 217 | if (node[0] === 'block' && Array.isArray(node[1])) { 218 | return node[1].some(child => this.containsTailCall(child, funcName)); 219 | } 220 | 221 | return false; 222 | } 223 | 224 | isTailCall(expr, funcName) { 225 | if (!Array.isArray(expr)) return false; 226 | // Only a direct call to the function is a tail call 227 | // Nested calls (e.g., as arguments) are not tail calls 228 | if (expr[0] === 'call' && expr[1][1] === funcName) { 229 | // Check if any of the arguments contain recursive calls 230 | // If so, this is NOT a tail call 231 | const args = expr[2] || []; 232 | for (const arg of args) { 233 | if (this.containsRecursiveCall(arg, funcName)) { 234 | return false; 235 | } 236 | } 237 | return true; 238 | } 239 | return false; 240 | } 241 | 242 | containsRecursiveCall(expr, funcName) { 243 | if (!Array.isArray(expr)) return false; 244 | if (expr[0] === 'call' && expr[1][1] === funcName) { 245 | return true; 246 | } 247 | // Recursively check all sub-expressions 248 | return expr.some(child => 249 | Array.isArray(child) && this.containsRecursiveCall(child, funcName) 250 | ); 251 | } 252 | 253 | transformTailCallsToLoop(stmts, funcName, params, shadowParams) { 254 | // Check if we need to transform to if-else chain first 255 | const hasMultipleIfs = stmts.filter(s => s[0] === 'if').length > 1; 256 | const firstIsIf = stmts.length > 0 && stmts[0][0] === 'if'; 257 | 258 | let bodyStmt; 259 | if (hasMultipleIfs || (firstIsIf && stmts.length > 1)) { 260 | bodyStmt = this.transformStatementsToIfElseChain(stmts); 261 | } else if (stmts.length === 1) { 262 | bodyStmt = stmts[0]; 263 | } else { 264 | bodyStmt = ['block', stmts]; 265 | } 266 | 267 | // Transform the body to replace tail calls 268 | const transformedBody = this.replaceTailCallWithUpdate(bodyStmt, funcName, params, shadowParams); 269 | 270 | // Create the loop wrapper with shadow parameters 271 | const result = []; 272 | 273 | // Add shadow parameter initialization 274 | for (let i = 0; i < params.length; i++) { 275 | result.push(['stmt', ['let', ['ident', shadowParams[i]], ['ident', params[i]]]]); 276 | } 277 | 278 | // Add the loop 279 | result.push(['stmt', ['tce_loop', transformedBody]]); 280 | 281 | return result; 282 | } 283 | 284 | replaceTailCallWithUpdate(node, funcName, params, shadowParams) { 285 | if (!Array.isArray(node)) return node; 286 | 287 | // Handle return statement with tail call 288 | if (node[0] === 'stmt' && node[1] && node[1][0] === 'return') { 289 | const returnExpr = node[1][1]; 290 | if (this.isTailCall(returnExpr, funcName)) { 291 | // Replace with parameter updates and continue 292 | const callArgs = returnExpr[2]; 293 | const updates = []; 294 | for (let i = 0; i < params.length; i++) { 295 | updates.push(['assign', ['ident', shadowParams[i]], callArgs[i]]); 296 | } 297 | for (let i = 0; i < params.length; i++) { 298 | updates.push(['assign', ['ident', params[i]], ['ident', shadowParams[i]]]); 299 | } 300 | return ['stmt', ['tce_tail_call', updates]]; 301 | } 302 | // Non-tail call return - keep as is 303 | return node; 304 | } 305 | 306 | // Handle if statement 307 | if (node[0] === 'if') { 308 | const condition = node[1]; 309 | const consequence = this.replaceTailCallWithUpdate(node[2], funcName, params, shadowParams); 310 | const alternative = node.length > 3 ? 311 | this.replaceTailCallWithUpdate(node[3], funcName, params, shadowParams) : null; 312 | 313 | if (alternative) { 314 | return ['if', condition, consequence, alternative]; 315 | } 316 | return ['if', condition, consequence]; 317 | } 318 | 319 | // Handle block 320 | if (node[0] === 'block' && Array.isArray(node[1])) { 321 | const transformedStmts = node[1].map(stmt => 322 | this.replaceTailCallWithUpdate(stmt, funcName, params, shadowParams) 323 | ); 324 | return ['block', transformedStmts]; 325 | } 326 | 327 | // Recursively transform arrays 328 | if (Array.isArray(node)) { 329 | return node.map(child => 330 | Array.isArray(child) ? this.replaceTailCallWithUpdate(child, funcName, params, shadowParams) : child 331 | ); 332 | } 333 | 334 | return node; 335 | } 336 | 337 | containsTCENodes(node) { 338 | if (!Array.isArray(node)) return false; 339 | if (node[0] === 'tce_loop' || node[0] === 'tce_tail_call' || node[0] === 'tce_block' || node[0] === 'tce_continue') { 340 | return true; 341 | } 342 | return node.some(child => Array.isArray(child) && this.containsTCENodes(child)); 343 | } 344 | 345 | // ===== Const Value Analysis & Inlining ===== 346 | // Tracks compile-time constant values for inlining 347 | 348 | collectConstValues(node) { 349 | const traverse = (n) => { 350 | if (!Array.isArray(n)) return; 351 | if (n[0] === 'stmt' && n[1] && n[1][0] === 'const') { 352 | const varName = n[1][1][1]; 353 | const value = n[1][2]; 354 | // Try to evaluate the expression with known const values 355 | const evaluatedValue = this.tryEvaluateWithConsts(value); 356 | if (this.isConstantExpression(evaluatedValue)) { 357 | this.constValues.set(varName, evaluatedValue); 358 | } 359 | } 360 | n.forEach(child => { 361 | if (Array.isArray(child)) traverse(child); 362 | }); 363 | }; 364 | traverse(node); 365 | } 366 | 367 | tryEvaluateWithConsts(node) { 368 | if (!Array.isArray(node)) return node; 369 | 370 | // If it's an identifier, try to substitute with const value 371 | if (node[0] === 'ident') { 372 | const varName = node[1]; 373 | if (this.constValues.has(varName)) { 374 | return this.constValues.get(varName); 375 | } 376 | return node; 377 | } 378 | 379 | // Recursively evaluate sub-expressions 380 | if (['+', '-', '*', '/', '%'].includes(node[0])) { 381 | const left = this.tryEvaluateWithConsts(node[1]); 382 | const right = this.tryEvaluateWithConsts(node[2]); 383 | 384 | // If both are numbers, compute the result 385 | if (left[0] === 'number' && right[0] === 'number') { 386 | const leftVal = left[1]; 387 | const rightVal = right[1]; 388 | let result; 389 | 390 | switch (node[0]) { 391 | case '+': result = leftVal + rightVal; break; 392 | case '-': result = leftVal - rightVal; break; 393 | case '*': result = leftVal * rightVal; break; 394 | case '/': 395 | if (rightVal === 0) return node; 396 | result = Math.floor(leftVal / rightVal); 397 | break; 398 | case '%': 399 | if (rightVal === 0) return node; 400 | result = leftVal % rightVal; 401 | break; 402 | } 403 | 404 | return ['number', result]; 405 | } 406 | 407 | return [node[0], left, right]; 408 | } 409 | 410 | if (node[0] === 'prefix') { 411 | const operand = this.tryEvaluateWithConsts(node[2]); 412 | if (operand[0] === 'number') { 413 | if (node[1] === '-') { 414 | return ['number', -operand[1]]; 415 | } else if (node[1] === '!') { 416 | return ['number', operand[1] === 0 ? 1 : 0]; 417 | } 418 | } 419 | return [node[0], node[1], operand]; 420 | } 421 | 422 | return node; 423 | } 424 | 425 | isConstantExpression(node) { 426 | if (!Array.isArray(node)) return false; 427 | if (node[0] === 'number') return true; 428 | // Could be extended to handle constant arithmetic expressions 429 | if (['+', '-', '*', '/', '%'].includes(node[0])) { 430 | return this.isConstantExpression(node[1]) && this.isConstantExpression(node[2]); 431 | } 432 | if (node[0] === 'prefix') { 433 | return this.isConstantExpression(node[2]); 434 | } 435 | return false; 436 | } 437 | 438 | // ===== Local Variable Collection ===== 439 | 440 | collectLocals(node, existingParams) { 441 | const locals = new Set(); 442 | 443 | const traverse = (n) => { 444 | if (!Array.isArray(n)) return; 445 | if (n[0] === 'stmt' && n[1] && n[1][0] === 'let') { 446 | const varName = n[1][1][1]; 447 | if (!existingParams.includes(varName)) { 448 | locals.add(varName); 449 | } 450 | } 451 | // Const variables that aren't inlined need locals too 452 | if (n[0] === 'stmt' && n[1] && n[1][0] === 'const') { 453 | const varName = n[1][1][1]; 454 | if (!existingParams.includes(varName) && !this.constValues.has(varName)) { 455 | locals.add(varName); 456 | } 457 | } 458 | n.forEach(child => { 459 | if (Array.isArray(child)) traverse(child); 460 | }); 461 | }; 462 | 463 | traverse(node); 464 | return Array.from(locals); 465 | } 466 | 467 | // ===== WASM Code Emission ===== 468 | // emit() handles statements (side effects, control flow) 469 | // emitExpression() handles expressions (produces values on stack) 470 | 471 | emit(node, isLastStatement = false, options = {}) { 472 | if (!Array.isArray(node)) return ''; 473 | const type = node[0]; 474 | 475 | switch (type) { 476 | case 'block': 477 | // Blocks inside expressions (like if/while bodies) 478 | return node[1].map(s => this.emit(s, false, options)).join('\n'); 479 | 480 | case 'stmt': 481 | return this.emit(node[1], isLastStatement, options); 482 | 483 | case 'let': 484 | return `(local.set $${node[1][1]} ${this.emitExpression(node[2])})`; 485 | 486 | case 'const': 487 | // For const with constant values, we inline them, so no need to set 488 | // For const with non-constant values, treat like let but track for immutability checking 489 | const constVar = node[1][1]; 490 | if (this.constValues.has(constVar)) { 491 | // Constant value - no need to emit, will be inlined 492 | return ''; 493 | } 494 | // Non-constant const - emit like let (but we won't allow reassignment) 495 | return `(local.set $${constVar} ${this.emitExpression(node[2])})`; 496 | 497 | case 'assign': 498 | return `(local.set $${node[1][1]} ${this.emitExpression(node[2])})`; 499 | 500 | case 'return': 501 | return this.emitReturn(node[1], isLastStatement, options); 502 | 503 | case 'while': 504 | return this.emitWhileLoop(node); 505 | 506 | case 'if': 507 | return this.emitIfStatement(node, isLastStatement, options); 508 | 509 | case 'br': 510 | // Branch instruction for tail call elimination 511 | return `(br ${node[1]})`; 512 | 513 | case 'tce_loop': 514 | return this.emitTCELoop(node, options); 515 | 516 | case 'tce_tail_call': 517 | return this.emitTCETailCall(node); 518 | 519 | case 'tce_block': 520 | // Block for tail call updates 521 | const tceStmts = node[1]; 522 | return tceStmts.map(s => this.emit(s)).join('\n'); 523 | 524 | case 'tce_continue': 525 | // Continue in tail call loop 526 | return `(br $tail_continue)`; 527 | 528 | case 'loop': 529 | // ['loop', [label], body] 530 | const loopLabel = node[1]; 531 | const loopBody = node[2]; 532 | let loopWat = `(loop ${loopLabel}\n`; 533 | if (loopBody[0] === 'block') { 534 | loopWat += loopBody[1].map(s => this.emit(s)).join('\n') + '\n'; 535 | } else { 536 | loopWat += this.emit(loopBody) + '\n'; 537 | } 538 | loopWat += `)`; 539 | return loopWat; 540 | 541 | case 'block': 542 | // ['block', [label], body] or ['block', stmts] 543 | if (typeof node[1] === 'string') { 544 | // Labeled block 545 | const blockLabel = node[1]; 546 | const blockBody = node[2]; 547 | let blockWat = `(block ${blockLabel}\n`; 548 | if (Array.isArray(blockBody)) { 549 | if (blockBody[0] === 'loop') { 550 | blockWat += this.emit(blockBody) + '\n'; 551 | } else if (Array.isArray(blockBody[0])) { 552 | blockWat += blockBody.map(s => this.emit(s)).join('\n') + '\n'; 553 | } else { 554 | blockWat += this.emit(blockBody) + '\n'; 555 | } 556 | } 557 | blockWat += `)`; 558 | return blockWat; 559 | } 560 | // Fall through to default for unlabeled blocks 561 | break; 562 | 563 | case 'call': 564 | // Call as a statement 565 | return `${this.emitExpression(node)}`; 566 | 567 | default: 568 | // Fallback for expressions used as statements (e.g. return value) 569 | return `${this.emitExpression(node)}`; 570 | } 571 | } 572 | 573 | // ===== Statement Emission Helpers ===== 574 | // These methods handle specific statement types with stack semantics in mind 575 | 576 | emitReturn(expr, isLastStatement, options) { 577 | // WASM stack semantics: 578 | // - Last statement in function: value left on stack is implicit return 579 | // - Inside TCE loop: branch out of loop with value 580 | // - Otherwise: explicit return instruction 581 | if (options.insideTCELoop) { 582 | return `(br $tail_loop ${this.emitExpression(expr)})`; 583 | } else if (isLastStatement) { 584 | return `${this.emitExpression(expr)}`; 585 | } else { 586 | return `(return ${this.emitExpression(expr)})`; 587 | } 588 | } 589 | 590 | emitWhileLoop(node) { 591 | // ['while', condition, body] 592 | // WASM pattern: (block (loop (br_if exit inverted-cond) body (br loop))) 593 | const cond = node[1]; 594 | const inverted = this.invertCondition(cond); 595 | const body = node[2]; 596 | 597 | let loop = `(block $break\n`; 598 | loop += `(loop $continue\n`; 599 | loop += `(br_if $break ${this.emitExpression(inverted)})\n`; 600 | 601 | if (body[0] === 'block') { 602 | loop += body[1].map(s => this.emit(s)).join('\n') + '\n'; 603 | } else { 604 | loop += this.emit(body) + '\n'; 605 | } 606 | 607 | loop += `(br $continue)\n`; 608 | loop += `)\n`; 609 | loop += `)`; 610 | return loop; 611 | } 612 | 613 | emitTCELoop(node, options) { 614 | // Wraps tail-recursive body in a loop structure 615 | const tceBody = node[1]; 616 | let tceWat = `(block $tail_loop (result i32)\n`; 617 | tceWat += `(loop $tail_continue\n`; 618 | tceWat += this.emit(tceBody, true, { insideTCELoop: true }) + '\n'; 619 | tceWat += `)\n`; 620 | tceWat += `(unreachable)\n`; 621 | tceWat += `)`; 622 | return tceWat; 623 | } 624 | 625 | emitTCETailCall(node) { 626 | // Emits parameter updates and continues the loop (instead of recursive call) 627 | const updates = node[1]; 628 | let updateWat = ''; 629 | for (const update of updates) { 630 | updateWat += `(local.set $${update[1][1]} ${this.emitExpression(update[2])})\n`; 631 | } 632 | updateWat += `(br $tail_continue)`; 633 | return updateWat; 634 | } 635 | 636 | emitIfStatement(node, isLastStatement = false, options = {}) { 637 | const condition = node[1]; 638 | const consequence = node[2]; 639 | const alternative = node.length > 3 ? node[3] : null; 640 | 641 | // Determine if we need a result-producing if (stack-oriented) 642 | const hasTailCall = this.hasTailCall(consequence) || 643 | (alternative && this.hasTailCall(alternative)); 644 | const hasReturn = this.hasReturn(consequence); 645 | const hasAlternative = alternative !== null && alternative !== undefined; 646 | 647 | // Use result-producing if only when: 648 | // 1. No tail calls (tail calls use br, not stack values) 649 | // 2. Has returns in both branches 650 | // 3. Is the last statement (implicit return via stack) 651 | const useResultIf = !hasTailCall && hasReturn && hasAlternative && isLastStatement; 652 | 653 | if (useResultIf) { 654 | return this.emitResultProducingIf(node); 655 | } else { 656 | return this.emitControlFlowIf(node, isLastStatement, options); 657 | } 658 | } 659 | 660 | emitResultProducingIf(node) { 661 | // Emits if as an expression that leaves a value on the stack 662 | const condition = node[1]; 663 | const consequence = node[2]; 664 | const alternative = node[3]; 665 | 666 | let result = `(if (result i32)\n`; 667 | result += `${this.emitExpression(condition)}\n`; 668 | result += `(then\n`; 669 | const returnValue = this.extractReturnValue(consequence); 670 | result += this.emitExpression(returnValue) + '\n'; 671 | result += `)\n`; 672 | result += `(else\n`; 673 | 674 | if (Array.isArray(alternative) && alternative[0] === 'if') { 675 | result += this.emitIfStatement(alternative, true) + '\n'; 676 | } else { 677 | const altReturnValue = this.extractReturnValue(alternative); 678 | result += this.emitExpression(altReturnValue) + '\n'; 679 | } 680 | 681 | result += `)\n`; 682 | result += `)`; 683 | return result; 684 | } 685 | 686 | emitControlFlowIf(node, isLastStatement, options) { 687 | // Emits if as control flow (no stack value produced) 688 | const condition = node[1]; 689 | const consequence = node[2]; 690 | const alternative = node.length > 3 ? node[3] : null; 691 | const hasReturn = this.hasReturn(consequence); 692 | const hasTailCall = this.hasTailCall(consequence) || 693 | (alternative && this.hasTailCall(alternative)); 694 | 695 | let result = `(if\n`; 696 | result += `${this.emitExpression(condition)}\n`; 697 | result += `(then\n`; 698 | 699 | // Special handling for explicit returns when not last statement 700 | if (hasReturn && !alternative && !hasTailCall) { 701 | const returnValue = this.extractReturnValue(consequence); 702 | result += `(return ${this.emitExpression(returnValue)})\n`; 703 | } else if (hasReturn && alternative && !isLastStatement && !hasTailCall) { 704 | const returnValue = this.extractReturnValue(consequence); 705 | result += `(return ${this.emitExpression(returnValue)})\n`; 706 | } else if (consequence[0] === 'block') { 707 | result += consequence[1].map(s => this.emit(s, false, options)).join('\n') + '\n'; 708 | } else { 709 | result += this.emit(consequence, false, options) + '\n'; 710 | } 711 | 712 | result += `)\n`; 713 | 714 | if (alternative) { 715 | result += `(else\n`; 716 | if (Array.isArray(alternative) && alternative[0] === 'if') { 717 | result += this.emitIfStatement(alternative, isLastStatement, options) + '\n'; 718 | } else if (hasReturn && !isLastStatement && !hasTailCall) { 719 | const altReturnValue = this.extractReturnValue(alternative); 720 | result += `(return ${this.emitExpression(altReturnValue)})\n`; 721 | } else if (alternative[0] === 'block') { 722 | result += alternative[1].map(s => this.emit(s, false, options)).join('\n') + '\n'; 723 | } else { 724 | result += this.emit(alternative, false, options) + '\n'; 725 | } 726 | result += `)\n`; 727 | } 728 | 729 | result += `)`; 730 | return result; 731 | } 732 | 733 | // ===== AST Analysis Helpers ===== 734 | 735 | transformStatementsToIfElseChain(stmts) { 736 | if (stmts.length === 0) return null; 737 | if (stmts.length === 1) return stmts[0]; 738 | 739 | const firstStmt = stmts[0]; 740 | if (firstStmt[0] === 'if' && firstStmt.length === 3) { 741 | const rest = this.transformStatementsToIfElseChain(stmts.slice(1)); 742 | return ['if', firstStmt[1], firstStmt[2], rest]; 743 | } 744 | 745 | return firstStmt; 746 | } 747 | 748 | hasReturn(node) { 749 | if (!Array.isArray(node)) return false; 750 | if (node[0] === 'stmt' && node[1] && node[1][0] === 'return') return true; 751 | if (node[0] === 'stmt' && node[1] && node[1][0] === 'tce_tail_call') return false; 752 | if (node[0] === 'return') return true; 753 | if (node[0] === 'block' && Array.isArray(node[1])) { 754 | return node[1].some(stmt => this.hasReturn(stmt)); 755 | } 756 | return false; 757 | } 758 | 759 | hasTailCall(node) { 760 | if (!Array.isArray(node)) return false; 761 | if (node[0] === 'stmt' && node[1] && node[1][0] === 'tce_tail_call') return true; 762 | if (node[0] === 'block' && Array.isArray(node[1])) { 763 | return node[1].some(stmt => this.hasTailCall(stmt)); 764 | } 765 | if (node[0] === 'if') { 766 | return this.hasTailCall(node[2]) || (node.length > 3 && this.hasTailCall(node[3])); 767 | } 768 | return false; 769 | } 770 | 771 | extractReturnValue(node) { 772 | if (!Array.isArray(node)) return node; 773 | if (node[0] === 'stmt' && node[1] && node[1][0] === 'return') { 774 | return node[1][1]; 775 | } 776 | if (node[0] === 'return') { 777 | return node[1]; 778 | } 779 | if (node[0] === 'block' && Array.isArray(node[1])) { 780 | for (const stmt of node[1]) { 781 | if (this.hasReturn(stmt)) { 782 | return this.extractReturnValue(stmt); 783 | } 784 | } 785 | } 786 | return node; 787 | } 788 | 789 | invertCondition(node) { 790 | const map = { 791 | '>': '<=', 792 | '<': '>=', 793 | '>=': '<', 794 | '<=': '>', 795 | '==': '!=', 796 | '!=': '==' 797 | }; 798 | if (Array.isArray(node) && map[node[0]]) { 799 | return [map[node[0]], node[1], node[2]]; 800 | } 801 | return ['prefix', '!', node]; 802 | } 803 | 804 | // ===== Expression Emission ===== 805 | // Expressions always produce a value on the WASM stack 806 | 807 | emitExpression(node) { 808 | if (!Array.isArray(node)) return ''; 809 | const type = node[0]; 810 | 811 | switch (type) { 812 | // Load value onto stack 813 | case 'ident': 814 | const varName = node[1]; 815 | if (this.constValues.has(varName)) { 816 | return this.emitExpression(this.constValues.get(varName)); 817 | } 818 | return `(local.get $${varName})`; 819 | 820 | case 'number': 821 | return `(i32.const ${node[1]})`; 822 | 823 | // Function call - pushes return value onto stack 824 | case 'call': 825 | const args = node[2].map(a => this.emitExpression(a)).join(' '); 826 | return `(call $${node[1][1]} ${args})`; 827 | 828 | // Binary arithmetic - pops 2, pushes 1 829 | case '+': 830 | return `(i32.add ${this.emitExpression(node[1])} ${this.emitExpression(node[2])})`; 831 | case '-': 832 | return `(i32.sub ${this.emitExpression(node[1])} ${this.emitExpression(node[2])})`; 833 | case '*': 834 | return `(i32.mul ${this.emitExpression(node[1])} ${this.emitExpression(node[2])})`; 835 | case '/': 836 | return `(i32.div_s ${this.emitExpression(node[1])} ${this.emitExpression(node[2])})`; 837 | case '%': 838 | return `(i32.rem_s ${this.emitExpression(node[1])} ${this.emitExpression(node[2])})`; 839 | 840 | // Comparisons - pops 2, pushes 1 (boolean as i32) 841 | case '==': 842 | return `(i32.eq ${this.emitExpression(node[1])} ${this.emitExpression(node[2])})`; 843 | case '!=': 844 | return `(i32.ne ${this.emitExpression(node[1])} ${this.emitExpression(node[2])})`; 845 | case '<': 846 | return `(i32.lt_s ${this.emitExpression(node[1])} ${this.emitExpression(node[2])})`; 847 | case '>': 848 | return `(i32.gt_s ${this.emitExpression(node[1])} ${this.emitExpression(node[2])})`; 849 | case '<=': 850 | return `(i32.le_s ${this.emitExpression(node[1])} ${this.emitExpression(node[2])})`; 851 | case '>=': 852 | return `(i32.ge_s ${this.emitExpression(node[1])} ${this.emitExpression(node[2])})`; 853 | 854 | // Unary operations - pops 1, pushes 1 855 | case 'prefix': 856 | if (node[1] === '!') { 857 | return `(i32.eqz ${this.emitExpression(node[2])})`; 858 | } 859 | if (node[1] === '-') { 860 | return `(i32.sub (i32.const 0) ${this.emitExpression(node[2])})`; 861 | } 862 | return ''; 863 | 864 | case 'if': 865 | // If used as expression - must produce value on stack 866 | return this.emitIfExpression(node); 867 | 868 | default: 869 | throw new Error(`Unknown expression type: ${type}`); 870 | } 871 | } 872 | 873 | emitIfExpression(node) { 874 | // If as expression - pops condition, pushes result value 875 | const ifCond = node[1]; 876 | const ifCons = node[2]; 877 | const ifAlt = node.length > 3 ? node[3] : null; 878 | 879 | let ifExpr = `(if (result i32)\n`; 880 | ifExpr += ` ${this.emitExpression(ifCond)}\n`; 881 | ifExpr += ` (then\n`; 882 | 883 | const consValue = this.extractReturnValue(ifCons); 884 | ifExpr += ` ${this.emitExpression(consValue)}\n`; 885 | ifExpr += ` )\n`; 886 | 887 | if (ifAlt) { 888 | ifExpr += ` (else\n`; 889 | if (Array.isArray(ifAlt) && ifAlt[0] === 'if') { 890 | const nestedIf = this.emitExpression(ifAlt); 891 | ifExpr += ` ${nestedIf}\n`; 892 | } else { 893 | const altValue = this.extractReturnValue(ifAlt); 894 | ifExpr += ` ${this.emitExpression(altValue)}\n`; 895 | } 896 | ifExpr += ` )\n`; 897 | } 898 | 899 | ifExpr += ` )`; 900 | return ifExpr; 901 | } 902 | 903 | // ===== Formatting Helpers ===== 904 | 905 | emitExpressionIndented(node, indentLevel) { 906 | if (!Array.isArray(node)) return ''; 907 | const indent = ' '.repeat(indentLevel); 908 | const type = node[0]; 909 | 910 | switch (type) { 911 | case 'call': 912 | // For calls, check if we should use multi-line format 913 | // Use multi-line if: any argument is a call, OR there are 2+ args and any is non-trivial 914 | const hasCallArgs = node[2].some(arg => 915 | Array.isArray(arg) && arg[0] === 'call' 916 | ); 917 | 918 | const hasMultipleArgs = node[2].length >= 2; 919 | const hasComplexArgs = node[2].some(arg => 920 | Array.isArray(arg) && (arg[0] === 'call' || arg[0] === '+' || arg[0] === '-' || arg[0] === '*' || arg[0] === '/') 921 | ); 922 | 923 | if (hasCallArgs || (hasMultipleArgs && hasComplexArgs)) { 924 | // Multi-line format 925 | let result = `${indent}(call $${node[1][1]}`; 926 | for (const arg of node[2]) { 927 | result += '\n' + this.emitExpressionIndented(arg, indentLevel + 2); 928 | } 929 | result += '\n' + indent + ')'; 930 | return result; 931 | } else { 932 | // Single-line format 933 | const args = node[2].map(a => this.emitExpression(a)).join(' '); 934 | return `${indent}(call $${node[1][1]} ${args})`; 935 | } 936 | 937 | default: 938 | // For other expressions, use the regular emit with indent prefix 939 | return `${indent}${this.emitExpression(node)}`; 940 | } 941 | } 942 | } 943 | --------------------------------------------------------------------------------