├── .nvmrc ├── .husky ├── .gitignore └── pre-commit ├── .github ├── FUNDING.yml └── workflows │ └── push-ci.yml ├── src ├── lexer │ ├── index.ts │ ├── lexer.ts │ └── tests │ │ └── lexer.test.ts ├── evaluator │ ├── index.ts │ ├── tests │ │ └── evaluator.test.ts │ └── evaluator.ts ├── index.ts ├── parser │ ├── index.ts │ ├── tests │ │ ├── checkParserErrors.ts │ │ ├── parse.ts │ │ ├── errors.test.ts │ │ ├── indexExpression.test.ts │ │ ├── integerLiteralExpression.test.ts │ │ ├── identifierExpression.test.ts │ │ ├── returnStatement.test.ts │ │ ├── stringLiteralExpression.test.ts │ │ ├── arrayLiteral.test.ts │ │ ├── booleanExpression.test.ts │ │ ├── hashLiteral.test.ts │ │ ├── prefixExpression.test.ts │ │ ├── callExpression.test.ts │ │ ├── letStatement.test.ts │ │ ├── ifExpression.test.ts │ │ ├── operatorPrecedence.test.ts │ │ ├── functionLiteral.test.ts │ │ └── infixExpression.test.ts │ └── parser.ts ├── token │ ├── index.ts │ └── token.ts ├── object │ ├── index.ts │ ├── environment.ts │ ├── tests │ │ └── object.test.ts │ └── object.ts ├── ast │ ├── Identifier.ts │ ├── Boolean.ts │ ├── StringLiteral.ts │ ├── IntegerLiteral.ts │ ├── ExpressionStatement.ts │ ├── Program.ts │ ├── ArrayLiteral.ts │ ├── PrefixExpression.ts │ ├── IndexExpression.ts │ ├── BlockStatement.ts │ ├── InfixExpression.ts │ ├── ReturnStatement.ts │ ├── CallExpression.ts │ ├── HashLiteral.ts │ ├── IfExpression.ts │ ├── LetStatement.ts │ ├── FunctionLiteral.ts │ ├── tests │ │ └── ast.test.ts │ ├── index.ts │ └── base.ts └── repl │ └── repl.ts ├── .prettierrc.json ├── nodemon.json ├── vite.config.ts ├── jest.config.ts ├── .eslintrc ├── README.md ├── tsconfig.json ├── LICENSE ├── .gitignore ├── series ├── README.md ├── error-handling.md ├── evaluation.md ├── parser-part-2.md ├── the-REPL.md ├── lexical-analysis-part-3.md ├── lexical-analysis-part-1.md ├── lexical-analysis-part-2.md └── parser-part-1.md └── package.json /.nvmrc: -------------------------------------------------------------------------------- 1 | v16.1.0 2 | -------------------------------------------------------------------------------- /.husky/.gitignore: -------------------------------------------------------------------------------- 1 | _ 2 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: [imteekay] 2 | -------------------------------------------------------------------------------- /src/lexer/index.ts: -------------------------------------------------------------------------------- 1 | export { Lexer } from 'lexer/lexer'; 2 | -------------------------------------------------------------------------------- /src/evaluator/index.ts: -------------------------------------------------------------------------------- 1 | export { Evaluator } from 'evaluator/evaluator'; 2 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | import { startRepl } from 'repl/repl'; 2 | 3 | startRepl(); 4 | -------------------------------------------------------------------------------- /src/parser/index.ts: -------------------------------------------------------------------------------- 1 | export { Parser, ParserError } from 'parser/parser'; 2 | -------------------------------------------------------------------------------- /.husky/pre-commit: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | . "$(dirname "$0")/_/husky.sh" 3 | 4 | npx lint-staged 5 | -------------------------------------------------------------------------------- /src/token/index.ts: -------------------------------------------------------------------------------- 1 | export { Tokens, Token, TokenType, lookupIdent } from 'token/token'; 2 | -------------------------------------------------------------------------------- /.prettierrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "trailingComma": "es5", 3 | "tabWidth": 2, 4 | "semi": true, 5 | "singleQuote": true 6 | } 7 | -------------------------------------------------------------------------------- /nodemon.json: -------------------------------------------------------------------------------- 1 | { 2 | "watch": ["src"], 3 | "ext": ".ts,.js", 4 | "ignore": ["**/*.test.ts"], 5 | "exec": "ts-node -r tsconfig-paths/register ./src/index.ts" 6 | } 7 | -------------------------------------------------------------------------------- /vite.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'vitest/config'; 2 | import tsconfigPaths from 'vite-tsconfig-paths'; 3 | 4 | export default defineConfig({ 5 | plugins: [tsconfigPaths()], 6 | }); 7 | -------------------------------------------------------------------------------- /src/parser/tests/checkParserErrors.ts: -------------------------------------------------------------------------------- 1 | import { ParserError } from 'parser'; 2 | 3 | export function checkParserErrors(errors: ParserError[]) { 4 | if (errors.length > 0) { 5 | throw new Error(`The input data has parse errors: ${errors}`); 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /src/object/index.ts: -------------------------------------------------------------------------------- 1 | export { 2 | EvalObject, 3 | Integer, 4 | BooleanLiteral, 5 | Null, 6 | ObjectTypes, 7 | ReturnValue, 8 | ErrorObject, 9 | FunctionObject, 10 | StringObject, 11 | Builtin, 12 | ArrayObject, 13 | Hash, 14 | HashKey, 15 | HashPair, 16 | } from 'object/object'; 17 | 18 | export { Environment } from 'object/environment'; 19 | -------------------------------------------------------------------------------- /jest.config.ts: -------------------------------------------------------------------------------- 1 | import { resolve } from 'path'; 2 | 3 | const root = resolve(__dirname); 4 | 5 | module.exports = { 6 | rootDir: root, 7 | displayName: 'root-tests', 8 | testMatch: ['/src/**/*.test.ts'], 9 | testEnvironment: 'node', 10 | clearMocks: true, 11 | preset: 'ts-jest', 12 | moduleNameMapper: { 13 | 'src/(.*)': '/src/$1', 14 | }, 15 | moduleDirectories: ['node_modules', 'src'], 16 | }; 17 | -------------------------------------------------------------------------------- /.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "env": { 3 | "jest": true 4 | }, 5 | "root": true, 6 | "parser": "@typescript-eslint/parser", 7 | "plugins": ["@typescript-eslint", "prettier"], 8 | "extends": [ 9 | "eslint:recommended", 10 | "plugin:@typescript-eslint/eslint-recommended", 11 | "plugin:@typescript-eslint/recommended" 12 | ], 13 | "rules": { 14 | "prettier/prettier": "error", 15 | "@typescript-eslint/explicit-module-boundary-types": ["off"], 16 | "@typescript-eslint/no-empty-interface": ["off"] 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/ast/Identifier.ts: -------------------------------------------------------------------------------- 1 | import { Token } from 'token'; 2 | import { BaseExpression, ExpressionKind } from 'ast/base'; 3 | 4 | export class Identifier implements BaseExpression { 5 | token: Token; 6 | value: string; 7 | kind: ExpressionKind.Identifier; 8 | 9 | constructor(token: Token, value: string) { 10 | this.token = token; 11 | this.value = value; 12 | this.kind = ExpressionKind.Identifier; 13 | } 14 | 15 | tokenLiteral() { 16 | return this.token.literal; 17 | } 18 | 19 | string() { 20 | return this.value; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/parser/tests/parse.ts: -------------------------------------------------------------------------------- 1 | import { Lexer } from 'lexer'; 2 | import { Parser } from 'parser'; 3 | import { checkParserErrors } from './checkParserErrors'; 4 | 5 | export function parse(input: string) { 6 | const lexer = new Lexer(input); 7 | const parser = new Parser(lexer); 8 | const program = parser.parseProgram(); 9 | const statements = program.statements; 10 | const programString = program.string(); 11 | const errors = parser.getErrors(); 12 | 13 | checkParserErrors(errors); 14 | 15 | return { statements, programString, errors }; 16 | } 17 | -------------------------------------------------------------------------------- /src/ast/Boolean.ts: -------------------------------------------------------------------------------- 1 | import { BaseExpression, ExpressionKind } from 'ast/base'; 2 | import { Token } from 'token'; 3 | 4 | export class BooleanExpression implements BaseExpression { 5 | token: Token; 6 | value: boolean; 7 | kind: ExpressionKind.Boolean; 8 | 9 | constructor(token: Token, value: boolean) { 10 | this.token = token; 11 | this.value = value; 12 | this.kind = ExpressionKind.Boolean; 13 | } 14 | 15 | tokenLiteral() { 16 | return this.token.literal; 17 | } 18 | 19 | string() { 20 | return this.token.literal; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/ast/StringLiteral.ts: -------------------------------------------------------------------------------- 1 | import { Token } from 'token'; 2 | import { BaseExpression, ExpressionKind } from 'ast/base'; 3 | 4 | export class StringLiteral implements BaseExpression { 5 | token: Token; 6 | value: string; 7 | kind: ExpressionKind.StringLiteral; 8 | 9 | constructor(token: Token, value: string) { 10 | this.token = token; 11 | this.value = value; 12 | this.kind = ExpressionKind.StringLiteral; 13 | } 14 | 15 | tokenLiteral() { 16 | return this.token.literal; 17 | } 18 | 19 | string() { 20 | return this.token.literal; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/ast/IntegerLiteral.ts: -------------------------------------------------------------------------------- 1 | import { Token } from 'token'; 2 | import { BaseExpression, ExpressionKind } from 'ast/base'; 3 | 4 | export class IntegerLiteral implements BaseExpression { 5 | token: Token; 6 | value: number; 7 | kind: ExpressionKind.IntegerLiteral; 8 | 9 | constructor(token: Token, value: number) { 10 | this.token = token; 11 | this.value = value; 12 | this.kind = ExpressionKind.IntegerLiteral; 13 | } 14 | 15 | tokenLiteral() { 16 | return this.token.literal; 17 | } 18 | 19 | string() { 20 | return this.token.literal; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/ast/ExpressionStatement.ts: -------------------------------------------------------------------------------- 1 | import { Token } from 'token'; 2 | import { BaseStatement, StatementKind, Expression } from 'ast/base'; 3 | 4 | export class ExpressionStatement implements BaseStatement { 5 | token: Token; 6 | expression: Expression; 7 | kind: StatementKind.Expression; 8 | 9 | constructor(token: Token) { 10 | this.token = token; 11 | this.kind = StatementKind.Expression; 12 | } 13 | 14 | tokenLiteral() { 15 | return this.token.literal; 16 | } 17 | 18 | string() { 19 | return this.expression === null ? '' : this.expression.string(); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/ast/Program.ts: -------------------------------------------------------------------------------- 1 | import { Node, ProgramKind, Statement } from 'ast/base'; 2 | 3 | export class Program implements Node { 4 | statements: Statement[] = []; 5 | kind: ProgramKind.program = ProgramKind.program; 6 | 7 | string() { 8 | return this.statements.map(this.statementToString).join(''); 9 | } 10 | 11 | tokenLiteral() { 12 | if (this.statements.length > 0) { 13 | return this.statements[0].tokenLiteral(); 14 | } 15 | 16 | return ''; 17 | } 18 | 19 | private statementToString(statement: Statement) { 20 | return statement.string(); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/ast/ArrayLiteral.ts: -------------------------------------------------------------------------------- 1 | import { Token } from 'token'; 2 | import { BaseExpression, Expression, ExpressionKind } from 'ast/base'; 3 | 4 | export class ArrayLiteral implements BaseExpression { 5 | token: Token; 6 | elements: Expression[]; 7 | kind: ExpressionKind.ArrayLiteral; 8 | 9 | constructor(token: Token) { 10 | this.token = token; 11 | this.kind = ExpressionKind.ArrayLiteral; 12 | } 13 | 14 | tokenLiteral() { 15 | return this.token.literal; 16 | } 17 | 18 | string(): string { 19 | return `[${this.elements.map((element) => element.string()).join(', ')}]`; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/ast/PrefixExpression.ts: -------------------------------------------------------------------------------- 1 | import { Token } from 'token'; 2 | import { BaseExpression, Expression, ExpressionKind } from 'ast/base'; 3 | 4 | export class PrefixExpression implements BaseExpression { 5 | token: Token; 6 | operator: string; 7 | right: Expression; 8 | kind: ExpressionKind.Prefix; 9 | 10 | constructor(token: Token, operator: string) { 11 | this.token = token; 12 | this.operator = operator; 13 | this.kind = ExpressionKind.Prefix; 14 | } 15 | 16 | tokenLiteral() { 17 | return this.token.literal; 18 | } 19 | 20 | string(): string { 21 | return `(${this.operator}${this.right.string()})`; 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/ast/IndexExpression.ts: -------------------------------------------------------------------------------- 1 | import { Token } from 'token'; 2 | import { BaseExpression, Expression, ExpressionKind } from 'ast/base'; 3 | 4 | export class IndexExpression implements BaseExpression { 5 | token: Token; 6 | left: Expression; 7 | index: Expression; 8 | kind: ExpressionKind.IndexExpression; 9 | 10 | constructor(token: Token, left: Expression) { 11 | this.token = token; 12 | this.left = left; 13 | this.kind = ExpressionKind.IndexExpression; 14 | } 15 | 16 | tokenLiteral() { 17 | return this.token.literal; 18 | } 19 | 20 | string(): string { 21 | return `(${this.left.string()}[${this.index.string()}])`; 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/ast/BlockStatement.ts: -------------------------------------------------------------------------------- 1 | import { BaseStatement, Statement, StatementKind } from 'ast/base'; 2 | import { Token } from 'token'; 3 | 4 | export class BlockStatement implements BaseStatement { 5 | statements: Statement[] = []; 6 | token: Token; 7 | kind: StatementKind.Block; 8 | 9 | constructor(token: Token) { 10 | this.token = token; 11 | this.kind = StatementKind.Block; 12 | } 13 | 14 | tokenLiteral() { 15 | return this.token.literal; 16 | } 17 | 18 | string() { 19 | return this.statements.map(this.statementToString).join(''); 20 | } 21 | 22 | private statementToString(statement: Statement) { 23 | return statement.string(); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Crafting an Interpreter 4 | 5 | Crafting an interpreter/compiler with TypeScript. 6 | 7 | ## Series 8 | 9 | - [Building an Interpreter Series](https://www.iamtk.co/series/building-an-interpreter) 10 | - [Lexical Analysis - Part 1](https://www.iamtk.co/series/building-an-interpreter/lexical-analysis-part-1) 11 | - [Lexical Analysis - Part 2](https://www.iamtk.co/series/building-an-interpreter/lexical-analysis-part-2) 12 | - [Lexical Analysis - Part 3](https://www.iamtk.co/series/building-an-interpreter/lexical-analysis-part-3) 13 | - [The REPL](https://www.iamtk.co/series/building-an-interpreter/repl) 14 | - [Parser - Part 1](https://www.iamtk.co/series/building-an-interpreter/parser-part-1) 15 | 16 | 17 | -------------------------------------------------------------------------------- /src/ast/InfixExpression.ts: -------------------------------------------------------------------------------- 1 | import { Token } from 'token'; 2 | import { BaseExpression, Expression, ExpressionKind } from 'ast/base'; 3 | 4 | export class InfixExpression implements BaseExpression { 5 | token: Token; 6 | left: Expression; 7 | operator: string; 8 | right: Expression; 9 | kind: ExpressionKind.Infix; 10 | 11 | constructor(token: Token, operator: string, left: Expression) { 12 | this.token = token; 13 | this.operator = operator; 14 | this.left = left; 15 | this.kind = ExpressionKind.Infix; 16 | } 17 | 18 | tokenLiteral() { 19 | return this.token.literal; 20 | } 21 | 22 | string(): string { 23 | return `(${this.left.string()} ${this.operator} ${this.right.string()})`; 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/ast/ReturnStatement.ts: -------------------------------------------------------------------------------- 1 | import { Token } from 'token'; 2 | import { BaseExpression, BaseStatement, StatementKind } from 'ast/base'; 3 | 4 | export class ReturnStatement implements BaseStatement { 5 | token: Token; 6 | kind: StatementKind.Return; 7 | returnValue: BaseExpression; 8 | 9 | constructor(token: Token) { 10 | this.token = token; 11 | this.kind = StatementKind.Return; 12 | } 13 | 14 | tokenLiteral() { 15 | return this.token.literal; 16 | } 17 | 18 | string() { 19 | const strings = [this.tokenLiteral(), ' ']; 20 | 21 | if (this.returnValue) { 22 | strings.push(this.returnValue.string()); 23 | } 24 | 25 | strings.push(';'); 26 | 27 | return strings.join(''); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/ast/CallExpression.ts: -------------------------------------------------------------------------------- 1 | import { Token } from 'token'; 2 | import { BaseExpression, Expression, ExpressionKind } from 'ast/base'; 3 | 4 | export class CallExpression implements BaseExpression { 5 | token: Token; 6 | function: Expression; // Identifier or FunctionLiteral 7 | arguments: Expression[]; 8 | kind: ExpressionKind.Call; 9 | 10 | constructor(token: Token, fn: Expression) { 11 | this.token = token; 12 | this.kind = ExpressionKind.Call; 13 | this.function = fn; 14 | } 15 | 16 | tokenLiteral() { 17 | return this.token.literal; 18 | } 19 | 20 | string(): string { 21 | return ( 22 | this.function.string() + 23 | '(' + 24 | this.arguments.map((arg) => arg.string()).join(', ') + 25 | ')' 26 | ); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/ast/HashLiteral.ts: -------------------------------------------------------------------------------- 1 | import { Token } from 'token'; 2 | import { BaseExpression, Expression, ExpressionKind } from 'ast/base'; 3 | 4 | export class HashLiteral implements BaseExpression { 5 | token: Token; 6 | pairs: Map; 7 | kind: ExpressionKind.HashLiteral; 8 | 9 | constructor(token: Token) { 10 | this.token = token; 11 | this.kind = ExpressionKind.HashLiteral; 12 | this.pairs = new Map(); 13 | } 14 | 15 | tokenLiteral() { 16 | return this.token.literal; 17 | } 18 | 19 | string(): string { 20 | const pairs = []; 21 | 22 | for (const [key, value] of this.pairs.entries()) { 23 | pairs.push(`${key.string()}:${value.string()}`); 24 | } 25 | 26 | return `{${pairs.join(', ')}}`; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/ast/IfExpression.ts: -------------------------------------------------------------------------------- 1 | import { Token } from 'token'; 2 | import { BaseExpression, Expression, ExpressionKind } from 'ast/base'; 3 | import { BlockStatement } from 'ast/BlockStatement'; 4 | 5 | export class IfExpression implements BaseExpression { 6 | token: Token; 7 | condition: Expression; 8 | consequence: BlockStatement; 9 | alternative: BlockStatement; 10 | kind: ExpressionKind.If; 11 | 12 | constructor(token: Token) { 13 | this.token = token; 14 | this.kind = ExpressionKind.If; 15 | } 16 | 17 | tokenLiteral() { 18 | return this.token.literal; 19 | } 20 | 21 | string(): string { 22 | return `if ${this.condition.string()} ${this.consequence.string()} ${ 23 | this.alternative ? `else ${this.alternative.string()}` : null 24 | }`; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/ast/LetStatement.ts: -------------------------------------------------------------------------------- 1 | import { Token } from 'token'; 2 | import { Identifier } from 'ast/Identifier'; 3 | import { BaseExpression, BaseStatement, StatementKind } from 'ast/base'; 4 | 5 | export class LetStatement implements BaseStatement { 6 | token: Token; 7 | name: Identifier; 8 | value: BaseExpression; 9 | kind: StatementKind.Let; 10 | 11 | constructor(token: Token) { 12 | this.token = token; 13 | this.kind = StatementKind.Let; 14 | } 15 | 16 | tokenLiteral() { 17 | return this.token.literal; 18 | } 19 | 20 | string() { 21 | const strings = [this.tokenLiteral(), ' ', this.name.string(), ' = ']; 22 | 23 | if (this.value) { 24 | strings.push(this.value.string()); 25 | } 26 | 27 | strings.push(';'); 28 | 29 | return strings.join(''); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/ast/FunctionLiteral.ts: -------------------------------------------------------------------------------- 1 | import { Token } from 'token'; 2 | import { BaseExpression, ExpressionKind } from 'ast/base'; 3 | import { BlockStatement } from 'ast/BlockStatement'; 4 | import { Identifier } from 'ast/Identifier'; 5 | 6 | export class FunctionLiteral implements BaseExpression { 7 | token: Token; 8 | parameters: Identifier[]; 9 | body: BlockStatement; 10 | kind: ExpressionKind.FunctionLiteral; 11 | 12 | constructor(token: Token) { 13 | this.token = token; 14 | this.kind = ExpressionKind.FunctionLiteral; 15 | } 16 | 17 | tokenLiteral() { 18 | return this.token.literal; 19 | } 20 | 21 | string(): string { 22 | return ( 23 | this.tokenLiteral() + 24 | '(' + 25 | this.parameters.join(', ') + 26 | ') ' + 27 | this.body.string() 28 | ); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /.github/workflows/push-ci.yml: -------------------------------------------------------------------------------- 1 | name: Push CI 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v2 10 | - name: Read .nvmrc 11 | run: echo "##[set-output name=NVMRC;]$(cat .nvmrc)" 12 | id: nvm 13 | 14 | - name: Use Node.js (.nvmrc) 15 | uses: actions/setup-node@v2 16 | with: 17 | node-version: '${{ steps.nvm.outputs.NVMRC }}' 18 | 19 | - name: Install dependencies 20 | run: yarn 21 | 22 | - name: Type check code w/ TypeScript 23 | run: yarn typecheck 24 | 25 | - name: Check code format w/ Prettier 26 | run: yarn prettier:check 27 | 28 | - name: Analyze code w/ ESLint 29 | run: yarn lint 30 | 31 | - name: Run Jest tests 32 | run: yarn test 33 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es2019", 4 | "moduleResolution": "node", 5 | "module": "commonjs", 6 | "lib": ["es2019", "dom"], 7 | "sourceMap": true, 8 | "outDir": "dist", 9 | "strict": true, 10 | "noImplicitAny": true, 11 | "strictNullChecks": true, 12 | "strictFunctionTypes": true, 13 | "noImplicitThis": true, 14 | "resolveJsonModule": true, 15 | "alwaysStrict": true, 16 | "removeComments": true, 17 | "noImplicitReturns": true, 18 | "esModuleInterop": true, 19 | "allowSyntheticDefaultImports": true, 20 | "strictPropertyInitialization": false, 21 | "baseUrl": "./src", 22 | "rootDirs": ["./src"], 23 | "experimentalDecorators": true, 24 | "emitDecoratorMetadata": true 25 | }, 26 | "include": ["./src/**/*.ts"], 27 | "exclude": ["./node_modules/*", "dist"] 28 | } 29 | -------------------------------------------------------------------------------- /src/parser/tests/errors.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from 'vitest'; 2 | import { Lexer } from 'lexer'; 3 | import { Parser } from 'parser'; 4 | 5 | describe('Parser', () => { 6 | describe('parseProgram', () => { 7 | it('parses an input with error', () => { 8 | const input = ` 9 | let 123; 10 | let a; 11 | `; 12 | 13 | const lexer = new Lexer(input); 14 | const parser = new Parser(lexer); 15 | 16 | parser.parseProgram(); 17 | 18 | const errors = parser.getErrors(); 19 | const expectedErrors = [ 20 | 'expected next token to be IDENT, got INT instead', 21 | 'expected next token to be =, got ; instead', 22 | 'no prefix parse function for ; found', 23 | ]; 24 | 25 | errors.forEach((error, index) => { 26 | expect(error).toEqual(expectedErrors[index]); 27 | }); 28 | }); 29 | }); 30 | }); 31 | -------------------------------------------------------------------------------- /src/ast/tests/ast.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from 'vitest'; 2 | import { Identifier, LetStatement, Program } from 'ast'; 3 | import { Tokens, Token } from 'token'; 4 | 5 | describe('AST', () => { 6 | it('returns the correct program string related to the source code', () => { 7 | // source code to be tested 8 | // let myVar = anotherVar; 9 | const program = new Program(); 10 | const letStatemenet = new LetStatement(new Token(Tokens.LET, 'let')); 11 | const identifier = new Identifier( 12 | new Token(Tokens.IDENT, 'myVar'), 13 | 'myVar' 14 | ); 15 | 16 | const value = new Identifier( 17 | new Token(Tokens.IDENT, 'anotherVar'), 18 | 'anotherVar' 19 | ); 20 | 21 | letStatemenet.name = identifier; 22 | letStatemenet.value = value; 23 | program.statements.push(letStatemenet); 24 | 25 | expect(program.string()).toEqual('let myVar = anotherVar;'); 26 | }); 27 | }); 28 | -------------------------------------------------------------------------------- /src/object/environment.ts: -------------------------------------------------------------------------------- 1 | import { EvalObject } from 'object/object'; 2 | 3 | type EnvironmentTypeKey = string; 4 | type EnvironmentTypeValue = EvalObject | null | undefined; 5 | type EnvironmentType = Map; 6 | 7 | export class Environment { 8 | store: EnvironmentType; 9 | outer?: Environment; 10 | 11 | constructor(outer?: Environment) { 12 | this.store = new Map(); 13 | this.outer = outer; 14 | } 15 | 16 | get(name: EnvironmentTypeKey) { 17 | const has = this.store.has(name); 18 | const value = this.store.get(name); 19 | 20 | if (!has && this.outer) { 21 | const has = this.outer.store.has(name); 22 | const value = this.outer.store.get(name); 23 | return { has, value }; 24 | } 25 | 26 | return { has, value }; 27 | } 28 | 29 | set(name: EnvironmentTypeKey, value: EnvironmentTypeValue) { 30 | this.store.set(name, value); 31 | return value; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/ast/index.ts: -------------------------------------------------------------------------------- 1 | export { 2 | StatementKind, 3 | BaseStatement, 4 | BaseExpression, 5 | Expression, 6 | } from 'ast/base'; 7 | 8 | export { Identifier } from 'ast/Identifier'; 9 | export { LetStatement } from 'ast/LetStatement'; 10 | export { ReturnStatement } from 'ast/ReturnStatement'; 11 | export { Program } from 'ast/Program'; 12 | export { ExpressionStatement } from 'ast/ExpressionStatement'; 13 | export { IntegerLiteral } from 'ast/IntegerLiteral'; 14 | export { PrefixExpression } from 'ast/PrefixExpression'; 15 | export { InfixExpression } from 'ast/InfixExpression'; 16 | export { BooleanExpression } from 'ast/Boolean'; 17 | export { BlockStatement } from 'ast/BlockStatement'; 18 | export { IfExpression } from 'ast/IfExpression'; 19 | export { FunctionLiteral } from 'ast/FunctionLiteral'; 20 | export { CallExpression } from 'ast/CallExpression'; 21 | export { StringLiteral } from 'ast/StringLiteral'; 22 | export { ArrayLiteral } from 'ast/ArrayLiteral'; 23 | export { IndexExpression } from 'ast/IndexExpression'; 24 | export { HashLiteral } from 'ast/HashLiteral'; 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 TK 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | yarn-debug.log* 5 | yarn-error.log* 6 | 7 | # Diagnostic reports (https://nodejs.org/api/report.html) 8 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 9 | 10 | # Runtime data 11 | pids 12 | *.pid 13 | *.seed 14 | *.pid.lock 15 | dist 16 | 17 | # Directory for instrumented libs generated by jscoverage/JSCover 18 | lib-cov 19 | 20 | # Coverage directory used by tools like istanbul 21 | coverage 22 | *.lcov 23 | 24 | # node-waf configuration 25 | .lock-wscript 26 | 27 | # Compiled binary addons (https://nodejs.org/api/addons.html) 28 | build/Release 29 | 30 | # Dependency directories 31 | node_modules/ 32 | 33 | # TypeScript v1 declaration files 34 | typings/ 35 | 36 | # TypeScript cache 37 | *.tsbuildinfo 38 | 39 | # Optional npm cache directory 40 | .npm 41 | 42 | # Optional eslint cache 43 | .eslintcache 44 | 45 | # Optional REPL history 46 | .node_repl_history 47 | 48 | # Output of 'npm pack' 49 | *.tgz 50 | 51 | # Yarn Integrity file 52 | .yarn-integrity 53 | 54 | # dotenv environment variables file 55 | .env.test 56 | 57 | # parcel-bundler cache (https://parceljs.org/) 58 | .cache 59 | .DS_Store 60 | -------------------------------------------------------------------------------- /series/README.md: -------------------------------------------------------------------------------- 1 | # Building an Interpreter Series 2 | 3 | After rumbling a lot about learning Programming Language Theory (PLT), I finally decided to commit and learn this field that I care about. 4 | 5 | I wanted to start simple, with a friendly resource to just get started. After collecting a lot of resources, most of them were heavy books and papers, I got a copy of the `Writing an interpreter in Go` book. It seemed to be the simplest resource to get started in the field and it also let me be very practical in terms of engineering a compiler. 6 | 7 | But instead of Go, I wanted to use a language I'm most familiar nowadays. So TypeScript it's. 8 | 9 | This book creates an interpreter for the Monkey programming language and we'll see in each chapter of this series the building blocks of the interpreter. 10 | 11 | If you want to see the code, I have a public Github repository for this implementation: [crafting-an-interpreter](https://github.com/imteekay/crafting-an-interpreter), a programming language's compiler written in TypeScript. 12 | 13 | - [Lexical Analysis - Part 1](lexical-analysis-part-1.md) 14 | - [Lexical Analysis - Part 2](lexical-analysis-part-2.md) 15 | - [Lexical Analysis - Part 3](lexical-analysis-part-3.md) 16 | - [The REPL](the-REPL.md) 17 | - [Parser - Part 1](parser-part-1.md) 18 | -------------------------------------------------------------------------------- /src/parser/tests/indexExpression.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from 'vitest'; 2 | import { 3 | ExpressionStatement, 4 | InfixExpression, 5 | IntegerLiteral, 6 | Identifier, 7 | IndexExpression, 8 | } from 'ast'; 9 | import { Token, Tokens } from 'token'; 10 | import { parse } from './parse'; 11 | 12 | describe('Parser', () => { 13 | describe('parseProgram', () => { 14 | it('validates ast after parsing', () => { 15 | const input = 'myArray[1 + 1];'; 16 | const { statements } = parse(input); 17 | 18 | const statement = new ExpressionStatement( 19 | new Token(Tokens.IDENT, 'myArray') 20 | ); 21 | 22 | const leftExpression = new Identifier( 23 | new Token(Tokens.IDENT, 'myArray'), 24 | 'myArray' 25 | ); 26 | 27 | const indexExpression = new IndexExpression( 28 | new Token(Tokens.LBRACKET, '['), 29 | leftExpression 30 | ); 31 | 32 | const infixExpression = new InfixExpression( 33 | new Token(Tokens.PLUS, '+'), 34 | '+', 35 | new IntegerLiteral(new Token(Tokens.INT, '1'), 1) 36 | ); 37 | 38 | infixExpression.right = new IntegerLiteral(new Token(Tokens.INT, '1'), 1); 39 | indexExpression.index = infixExpression; 40 | statement.expression = indexExpression; 41 | 42 | expect(statements).toEqual([statement]); 43 | }); 44 | }); 45 | }); 46 | -------------------------------------------------------------------------------- /src/parser/tests/integerLiteralExpression.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from 'vitest'; 2 | import { ExpressionStatement, IntegerLiteral, StatementKind } from 'ast'; 3 | import { ExpressionKind } from 'ast/base'; 4 | import { Token, Tokens } from 'token'; 5 | import { parse } from './parse'; 6 | 7 | describe('Parser', () => { 8 | describe('parseProgram', () => { 9 | it('parses an integer literal expression', () => { 10 | const input = '10;'; 11 | const { statements } = parse(input); 12 | const statement = statements[0]; 13 | 14 | if ( 15 | statement.kind === StatementKind.Expression && 16 | statement.expression.kind === ExpressionKind.IntegerLiteral 17 | ) { 18 | expect(statements.length).toEqual(1); 19 | 20 | const expression = statement.expression; 21 | 22 | expect(expression.value).toEqual(10); 23 | expect(expression.tokenLiteral()).toEqual('10'); 24 | } 25 | }); 26 | 27 | it('validates ast after parsing', () => { 28 | const input = '10;'; 29 | const { statements } = parse(input); 30 | const integerToken = new Token(Tokens.INT, '10'); 31 | const expressionStatement = new ExpressionStatement(integerToken); 32 | expressionStatement.expression = new IntegerLiteral(integerToken, 10); 33 | expect(statements).toEqual([expressionStatement]); 34 | }); 35 | }); 36 | }); 37 | -------------------------------------------------------------------------------- /src/repl/repl.ts: -------------------------------------------------------------------------------- 1 | import readline from 'readline'; 2 | import { Lexer } from 'lexer'; 3 | import { Parser } from 'parser'; 4 | import { Evaluator } from 'evaluator'; 5 | import { Environment } from 'object'; 6 | 7 | const ScannerClose = { 8 | exit: 'exit', 9 | quit: 'quit', 10 | }; 11 | 12 | const exits = [ScannerClose.exit, ScannerClose.quit]; 13 | 14 | function printParserErrors(errors: string[]) { 15 | for (const error of errors) { 16 | console.error(error, '\n'); 17 | } 18 | } 19 | 20 | export function startRepl() { 21 | const scanner = readline.createInterface({ 22 | input: process.stdin, 23 | output: process.stdout, 24 | }); 25 | 26 | const env = new Environment(); 27 | 28 | function repl() { 29 | scanner.question('> ', (input) => { 30 | if (exits.includes(input)) return scanner.close(); 31 | 32 | const lexer = new Lexer(input); 33 | const parser = new Parser(lexer); 34 | const program = parser.parseProgram(); 35 | 36 | if (parser.getErrors().length > 0) { 37 | printParserErrors(parser.getErrors()); 38 | repl(); 39 | } 40 | 41 | const evaluator = new Evaluator(); 42 | const evaluated = evaluator.evaluate(program, env); 43 | 44 | if (evaluated) { 45 | console.log(evaluated.inspect()); 46 | } 47 | 48 | repl(); 49 | }); 50 | } 51 | 52 | console.log('Welcome'); 53 | repl(); 54 | } 55 | -------------------------------------------------------------------------------- /src/parser/tests/identifierExpression.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from 'vitest'; 2 | import { ExpressionStatement, StatementKind, Identifier } from 'ast'; 3 | import { ExpressionKind } from 'ast/base'; 4 | import { Token, Tokens } from 'token'; 5 | import { parse } from './parse'; 6 | 7 | describe('Parser', () => { 8 | describe('parseProgram', () => { 9 | it('parses an identifier expression', () => { 10 | const input = 'foobar;'; 11 | const { statements } = parse(input); 12 | const statement = statements[0]; 13 | 14 | if ( 15 | statement.kind === StatementKind.Expression && 16 | statement.expression.kind === ExpressionKind.Identifier 17 | ) { 18 | expect(statements.length).toEqual(1); 19 | expect(statement.expression.value).toEqual('foobar'); 20 | expect(statement.expression.tokenLiteral()).toEqual('foobar'); 21 | } 22 | }); 23 | 24 | it('validates ast after parsing', () => { 25 | const input = 'foobar'; 26 | const { statements } = parse(input); 27 | const identifierToken = new Token(Tokens.IDENT, 'foobar'); 28 | const expressionStatement = new ExpressionStatement(identifierToken); 29 | expressionStatement.expression = new Identifier( 30 | identifierToken, 31 | 'foobar' 32 | ); 33 | 34 | expect(statements).toEqual([expressionStatement]); 35 | }); 36 | }); 37 | }); 38 | -------------------------------------------------------------------------------- /src/parser/tests/returnStatement.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from 'vitest'; 2 | import { IntegerLiteral, ReturnStatement, StatementKind } from 'ast'; 3 | import { parse } from './parse'; 4 | import { Token, Tokens } from 'token'; 5 | 6 | describe('Parser', () => { 7 | describe('parseProgram', () => { 8 | it('parses the return statement', () => { 9 | const input = ` 10 | return 5; 11 | return 10; 12 | return 10000; 13 | `; 14 | 15 | const { statements } = parse(input); 16 | const tests = [ 17 | { tokenLiteral: 'return' }, 18 | { tokenLiteral: 'return' }, 19 | { tokenLiteral: 'return' }, 20 | ]; 21 | 22 | tests.forEach(({ tokenLiteral }, index) => { 23 | const statement = statements[index]; 24 | 25 | if (statement.kind === StatementKind.Return) { 26 | expect(statement.tokenLiteral()).toEqual(tokenLiteral); 27 | } 28 | }); 29 | }); 30 | 31 | it('validates ast after parsing', () => { 32 | const input = 'return 10;'; 33 | const { statements } = parse(input); 34 | 35 | const returnStatement = new ReturnStatement( 36 | new Token(Tokens.RETURN, 'return') 37 | ); 38 | 39 | returnStatement.returnValue = new IntegerLiteral( 40 | new Token(Tokens.INT, '10'), 41 | 10 42 | ); 43 | 44 | expect(statements).toEqual([returnStatement]); 45 | }); 46 | }); 47 | }); 48 | -------------------------------------------------------------------------------- /src/parser/tests/stringLiteralExpression.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from 'vitest'; 2 | import { ExpressionStatement, StringLiteral, StatementKind } from 'ast'; 3 | import { ExpressionKind } from 'ast/base'; 4 | import { Token, Tokens } from 'token'; 5 | import { parse } from './parse'; 6 | 7 | describe('Parser', () => { 8 | describe('parseProgram', () => { 9 | it('parses a string literal expression', () => { 10 | const input = '"Hello World";'; 11 | const { statements } = parse(input); 12 | const statement = statements[0]; 13 | 14 | if ( 15 | statement.kind === StatementKind.Expression && 16 | statement.expression.kind === ExpressionKind.StringLiteral 17 | ) { 18 | expect(statements.length).toEqual(1); 19 | 20 | const expression = statement.expression; 21 | 22 | expect(expression.value).toEqual('Hello World'); 23 | expect(expression.tokenLiteral()).toEqual('Hello World'); 24 | } 25 | }); 26 | 27 | it('validates ast after parsing', () => { 28 | const input = '"Hello World";'; 29 | const { statements } = parse(input); 30 | const stringToken = new Token(Tokens.STRING, 'Hello World'); 31 | const expressionStatement = new ExpressionStatement(stringToken); 32 | expressionStatement.expression = new StringLiteral( 33 | stringToken, 34 | 'Hello World' 35 | ); 36 | expect(statements).toEqual([expressionStatement]); 37 | }); 38 | }); 39 | }); 40 | -------------------------------------------------------------------------------- /src/token/token.ts: -------------------------------------------------------------------------------- 1 | export type TokenType = string; 2 | 3 | export class Token { 4 | type: TokenType; 5 | literal: string; 6 | 7 | constructor(type: TokenType, literal: string) { 8 | this.type = type; 9 | this.literal = literal; 10 | } 11 | } 12 | 13 | export enum Tokens { 14 | ILLEGAL = 'ILLEGAL', 15 | EOF = 'EOF', 16 | 17 | // Identifiers + literals 18 | IDENT = 'IDENT', // add, foobar, x, y, ... 19 | INT = 'INT', // 1343456 20 | STRING = 'STRING', 21 | 22 | // Operators 23 | ASSIGN = '=', 24 | PLUS = '+', 25 | MINUS = '-', 26 | BANG = '!', 27 | ASTERISK = '*', 28 | SLASH = '/', 29 | LESS_THAN = '<', 30 | GREATER_THAN = '>', 31 | 32 | // Delimiters 33 | COMMA = ',', 34 | COLON = ':', 35 | SEMICOLON = ';', 36 | LPAREN = '(', 37 | RPAREN = ')', 38 | LBRACE = '{', 39 | RBRACE = '}', 40 | LBRACKET = '[', 41 | RBRACKET = ']', 42 | 43 | // Keywords 44 | FUNCTION = 'FUNCTION', 45 | LET = 'LET', 46 | TRUE = 'TRUE', 47 | FALSE = 'FALSE', 48 | IF = 'IF', 49 | ELSE = 'ELSE', 50 | RETURN = 'RETURN', 51 | EQUAL = '==', 52 | NOT_EQUAL = '!=', 53 | } 54 | 55 | interface KeywordsType { 56 | [key: string]: string; 57 | } 58 | 59 | const Keywords: KeywordsType = { 60 | fn: Tokens.FUNCTION, 61 | let: Tokens.LET, 62 | true: Tokens.TRUE, 63 | false: Tokens.FALSE, 64 | if: Tokens.IF, 65 | else: Tokens.ELSE, 66 | return: Tokens.RETURN, 67 | }; 68 | 69 | export function lookupIdent(ident: string) { 70 | return ident in Keywords ? Keywords[ident] : Tokens.IDENT; 71 | } 72 | -------------------------------------------------------------------------------- /src/object/tests/object.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, expect, it } from 'vitest'; 2 | import { BooleanLiteral, Integer, StringObject } from 'object'; 3 | 4 | describe('integer hash keys', () => { 5 | it('verifies integer hash keys', () => { 6 | const one1 = new Integer(1); 7 | const one2 = new Integer(1); 8 | const two1 = new Integer(2); 9 | const two2 = new Integer(2); 10 | 11 | expect(one1.hashKey()).toEqual(one2.hashKey()); 12 | expect(two1.hashKey()).toEqual(two2.hashKey()); 13 | expect(one1.hashKey()).not.toEqual(two1.hashKey()); 14 | }); 15 | }); 16 | 17 | describe('integer hash keys', () => { 18 | it('verifies integer hash keys', () => { 19 | const true1 = new BooleanLiteral(true); 20 | const true2 = new BooleanLiteral(true); 21 | const false1 = new BooleanLiteral(false); 22 | const false2 = new BooleanLiteral(false); 23 | 24 | expect(true1.hashKey()).toEqual(true2.hashKey()); 25 | expect(false1.hashKey()).toEqual(false2.hashKey()); 26 | expect(true1.hashKey()).not.toEqual(false1.hashKey()); 27 | }); 28 | }); 29 | 30 | describe('string hash keys', () => { 31 | it('verifies string hash keys', () => { 32 | const hello1 = new StringObject('Hello World'); 33 | const hello2 = new StringObject('Hello World'); 34 | const diff1 = new StringObject('My name is TK'); 35 | const diff2 = new StringObject('My name is TK'); 36 | 37 | expect(hello1.hashKey()).toEqual(hello2.hashKey()); 38 | expect(diff1.hashKey()).toEqual(diff2.hashKey()); 39 | expect(hello1.hashKey()).not.toEqual(diff1.hashKey()); 40 | }); 41 | }); 42 | -------------------------------------------------------------------------------- /series/error-handling.md: -------------------------------------------------------------------------------- 1 | # Error Handling 2 | 3 | - Internal error handling. 4 | - the same way as handling return statements: errors and return statements both stop the evaluation of a series of statements. 5 | 6 | ## Final words & Resources 7 | 8 | If you didn't have the opportunity, take a look at the posts from the [Building an Interpreter series](https://leandrotk.github.io/series/building-an-interpreter/): 9 | 10 | - [Building an Interpreter: Lexical Analysis - Part 1](https://leandrotk.github.io/series/building-an-interpreter/building-an-interpreter-lexical-analysis-part-1.html) 11 | - [Building an Interpreter: Lexical Analysis - Part 2](https://leandrotk.github.io/series/building-an-interpreter/building-an-interpreter-lexical-analysis-part-2.html) 12 | - [Building an Interpreter: Lexical Analysis - Part 3](https://leandrotk.github.io/series/building-an-interpreter/building-an-interpreter-lexical-analysis-part-3.html) 13 | - [Building an Interpreter: REPL](https://leandrotk.github.io/series/building-an-interpreter/building-an-interpreter-repl.html) 14 | 15 | These are the resources I'm using to learn more about this field: 16 | 17 | - [monkey-ts](https://github.com/leandrotk/monkey-ts): the open-source project of the compiler for the TypeScript version of the Monkey programming language. 18 | - [Programming Language Theory](https://github.com/leandrotk/programming-language-theory): a bunch of resources about my studies on Programming Language Theory & Applied PLT. 19 | - [Writing an Interpreter in Go](https://www.goodreads.com/book/show/32681092-writing-an-interpreter-in-go): the book I'm reading to learn and implement the Monkey compiler. 20 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "crafting-an-interpreter", 3 | "version": "1.0.0", 4 | "description": "Crafting an interpreter/compiler with TypeScript", 5 | "main": "src/index.ts", 6 | "scripts": { 7 | "dev": "nodemon", 8 | "test": "vitest", 9 | "lint": "eslint ./src --ext .ts", 10 | "lint:fix": "eslint ./src --ext .ts --fix", 11 | "prettier:check": "prettier --check 'src/**/*.ts'", 12 | "prettier:fix": "prettier --write 'src/**/*.ts'", 13 | "typecheck": "tsc", 14 | "prepare": "husky install" 15 | }, 16 | "repository": { 17 | "type": "git", 18 | "url": "git+ssh://git@github.com/imteekay/crafting-an-interpreter.git" 19 | }, 20 | "keywords": [ 21 | "compiler", 22 | "programming-language", 23 | "compiler-design" 24 | ], 25 | "author": "=", 26 | "license": "MIT", 27 | "bugs": { 28 | "url": "https://github.com/imteekay/mocrafting-an-interpreter/issues" 29 | }, 30 | "homepage": "https://github.com/imteekay/mocrafting-an-interpreter", 31 | "devDependencies": { 32 | "@types/node": "15.3.0", 33 | "@typescript-eslint/eslint-plugin": "4.23.0", 34 | "@typescript-eslint/parser": "5.1.0", 35 | "ansi-regex": "6.0.1", 36 | "eslint": "7.26.0", 37 | "eslint-plugin-prettier": "3.4.0", 38 | "husky": ">=6", 39 | "lint-staged": ">=10", 40 | "nodemon": "2.0.13", 41 | "prettier": "2.3.0", 42 | "ts-node": "9.1.1", 43 | "tsconfig-paths": "3.11.0", 44 | "typescript": "4.6.3", 45 | "vite-tsconfig-paths": "3.4.1", 46 | "vitest": "0.9.3" 47 | }, 48 | "lint-staged": { 49 | "*.ts": [ 50 | "yarn lint", 51 | "yarn prettier:check" 52 | ] 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/parser/tests/arrayLiteral.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from 'vitest'; 2 | import { 3 | ExpressionStatement, 4 | InfixExpression, 5 | IntegerLiteral, 6 | ArrayLiteral, 7 | } from 'ast'; 8 | import { Token, Tokens } from 'token'; 9 | import { parse } from './parse'; 10 | 11 | function buildInfix( 12 | integerLiteral1: number, 13 | integerLiteral2: number, 14 | operator: { 15 | token: Tokens; 16 | op: string; 17 | } 18 | ) { 19 | const integerLiteral = new IntegerLiteral( 20 | new Token(Tokens.INT, integerLiteral1.toString()), 21 | integerLiteral1 22 | ); 23 | 24 | const infixExpression = new InfixExpression( 25 | new Token(operator.token, operator.op), 26 | operator.op, 27 | integerLiteral 28 | ); 29 | 30 | infixExpression.right = new IntegerLiteral( 31 | new Token(Tokens.INT, integerLiteral2.toString()), 32 | integerLiteral2 33 | ); 34 | 35 | return infixExpression; 36 | } 37 | 38 | describe('Parser', () => { 39 | describe('parseProgram', () => { 40 | it('validates ast after parsing', () => { 41 | const input = '[1, 2 * 2, 3 + 3];'; 42 | const { statements } = parse(input); 43 | 44 | const statement = new ExpressionStatement( 45 | new Token(Tokens.LBRACKET, '[') 46 | ); 47 | 48 | const arrayLiteral = new ArrayLiteral(new Token(Tokens.LBRACKET, '[')); 49 | 50 | arrayLiteral.elements = [ 51 | new IntegerLiteral(new Token(Tokens.INT, '1'), 1), 52 | buildInfix(2, 2, { token: Tokens.ASTERISK, op: '*' }), 53 | buildInfix(3, 3, { token: Tokens.PLUS, op: '+' }), 54 | ]; 55 | 56 | statement.expression = arrayLiteral; 57 | 58 | expect(statements).toEqual([statement]); 59 | }); 60 | }); 61 | }); 62 | -------------------------------------------------------------------------------- /src/parser/tests/booleanExpression.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from 'vitest'; 2 | import { StatementKind, BooleanExpression } from 'ast'; 3 | import { ExpressionKind } from 'ast/base'; 4 | import { Token, Tokens } from 'token'; 5 | import { parse } from './parse'; 6 | 7 | describe('Parser', () => { 8 | describe('parseProgram', () => { 9 | it('parses boolean expression', () => { 10 | const input = ` 11 | true; 12 | false; 13 | `; 14 | 15 | const { statements } = parse(input); 16 | const tests = [ 17 | { value: true, valueString: 'true' }, 18 | { value: false, valueString: 'false' }, 19 | ]; 20 | 21 | tests.forEach(({ value, valueString }, index) => { 22 | const statement = statements[index]; 23 | 24 | if ( 25 | statement.kind === StatementKind.Expression && 26 | statement.expression.kind === ExpressionKind.Boolean 27 | ) { 28 | expect(statement.expression.value).toEqual(value); 29 | expect(statement.expression.tokenLiteral()).toEqual(valueString); 30 | } 31 | }); 32 | }); 33 | 34 | it('validates ast after parsing', () => { 35 | const input = ` 36 | true; 37 | false; 38 | `; 39 | 40 | const { statements } = parse(input); 41 | const trueToken = new Token(Tokens.TRUE, 'true'); 42 | const falseToken = new Token(Tokens.FALSE, 'false'); 43 | 44 | expect(statements).toEqual([ 45 | { 46 | token: trueToken, 47 | kind: StatementKind.Expression, 48 | expression: new BooleanExpression(trueToken, true), 49 | }, 50 | { 51 | token: falseToken, 52 | kind: 'expression', 53 | expression: new BooleanExpression(falseToken, false), 54 | }, 55 | ]); 56 | }); 57 | }); 58 | }); 59 | -------------------------------------------------------------------------------- /src/parser/tests/hashLiteral.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from 'vitest'; 2 | import { ExpressionStatement, HashLiteral } from 'ast'; 3 | import { parse } from './parse'; 4 | 5 | describe('Parser', () => { 6 | describe('parseProgram', () => { 7 | it('validates empty hash', () => { 8 | const input = '{};'; 9 | const { statements } = parse(input); 10 | const hashLiteral = (statements[0] as ExpressionStatement) 11 | .expression as HashLiteral; 12 | 13 | expect(hashLiteral.pairs.size).toEqual(0); 14 | }); 15 | 16 | it('validates ast after parsing', () => { 17 | const input = '{"one": 1, "two": 2, "three": 3};'; 18 | const { statements } = parse(input); 19 | const hashLiteral = (statements[0] as ExpressionStatement) 20 | .expression as HashLiteral; 21 | 22 | expect(hashLiteral.pairs.size).toEqual(3); 23 | 24 | const expectedKeyValues: Record = { 25 | one: '1', 26 | two: '2', 27 | three: '3', 28 | }; 29 | 30 | for (const [key, value] of hashLiteral.pairs.entries()) { 31 | expect(expectedKeyValues[key.string()]).toEqual(value.string()); 32 | } 33 | }); 34 | 35 | it('validates hash with expressions', () => { 36 | const input = '{"one": 0 + 1, "two": 10 - 8, "three": 15 / 5};'; 37 | const { statements } = parse(input); 38 | const hashLiteral = (statements[0] as ExpressionStatement) 39 | .expression as HashLiteral; 40 | 41 | expect(hashLiteral.pairs.size).toEqual(3); 42 | 43 | const expectedKeyValues: Record = { 44 | one: '(0 + 1)', 45 | two: '(10 - 8)', 46 | three: '(15 / 5)', 47 | }; 48 | 49 | for (const [key, value] of hashLiteral.pairs.entries()) { 50 | expect(expectedKeyValues[key.string()]).toEqual(value.string()); 51 | } 52 | }); 53 | }); 54 | }); 55 | -------------------------------------------------------------------------------- /src/parser/tests/prefixExpression.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from 'vitest'; 2 | import { 3 | ExpressionStatement, 4 | IntegerLiteral, 5 | PrefixExpression, 6 | StatementKind, 7 | } from 'ast'; 8 | import { ExpressionKind } from 'ast/base'; 9 | import { parse } from './parse'; 10 | import { Token, Tokens } from 'token'; 11 | 12 | describe('Parser', () => { 13 | describe('parseProgram', () => { 14 | it('parses prefix expressions', () => { 15 | const tests = [ 16 | { input: '!5;', operator: '!', value: 5 }, 17 | { input: '-15;', operator: '-', value: 15 }, 18 | { input: '!true', operator: '!', value: true }, 19 | { input: '!false', operator: '!', value: false }, 20 | ]; 21 | 22 | tests.forEach((test) => { 23 | const { statements } = parse(test.input); 24 | const statement = statements[0]; 25 | 26 | if ( 27 | statement.kind === StatementKind.Expression && 28 | statement.expression.kind === ExpressionKind.Prefix 29 | ) { 30 | const expression = statement.expression; 31 | const rightExpression = expression.right; 32 | 33 | expect(expression.operator).toEqual(test.operator); 34 | 35 | if (rightExpression.kind == ExpressionKind.IntegerLiteral) { 36 | expect(rightExpression.value).toEqual(test.value); 37 | expect(rightExpression.tokenLiteral()).toEqual( 38 | test.value.toString() 39 | ); 40 | } 41 | } 42 | }); 43 | }); 44 | 45 | it('validates ast after parsing', () => { 46 | const input = '!10;'; 47 | const { statements } = parse(input); 48 | 49 | const bangToken = new Token(Tokens.BANG, '!'); 50 | const expressionStatement = new ExpressionStatement(bangToken); 51 | const prefixExpression = new PrefixExpression(bangToken, '!'); 52 | prefixExpression.right = new IntegerLiteral( 53 | new Token(Tokens.INT, '10'), 54 | 10 55 | ); 56 | expressionStatement.expression = prefixExpression; 57 | expect(statements).toEqual([expressionStatement]); 58 | }); 59 | }); 60 | }); 61 | -------------------------------------------------------------------------------- /src/parser/tests/callExpression.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from 'vitest'; 2 | import { 3 | ExpressionStatement, 4 | Identifier, 5 | InfixExpression, 6 | CallExpression, 7 | IntegerLiteral, 8 | } from 'ast'; 9 | import { Token, Tokens } from 'token'; 10 | import { parse } from './parse'; 11 | 12 | function buildInfix( 13 | integerLiteral1: number, 14 | integerLiteral2: number, 15 | operator: { 16 | token: Tokens; 17 | op: string; 18 | } 19 | ) { 20 | const integerLiteral = new IntegerLiteral( 21 | new Token(Tokens.INT, integerLiteral1.toString()), 22 | integerLiteral1 23 | ); 24 | 25 | const infixExpression = new InfixExpression( 26 | new Token(operator.token, operator.op), 27 | operator.op, 28 | integerLiteral 29 | ); 30 | 31 | infixExpression.right = new IntegerLiteral( 32 | new Token(Tokens.INT, integerLiteral2.toString()), 33 | integerLiteral2 34 | ); 35 | 36 | return infixExpression; 37 | } 38 | 39 | describe('Parser', () => { 40 | describe('parseProgram', () => { 41 | it('validates ast after parsing', () => { 42 | const input = 'add(1, 2 * 3, 4 + 5);'; 43 | const { statements } = parse(input); 44 | 45 | const statement = new ExpressionStatement(new Token(Tokens.IDENT, 'add')); 46 | const callExpression = new CallExpression( 47 | new Token(Tokens.LPAREN, '('), 48 | new Identifier(new Token(Tokens.IDENT, 'add'), 'add') 49 | ); 50 | 51 | const integerLiteral = new IntegerLiteral(new Token(Tokens.INT, '2'), 2); 52 | const infixExpression = new InfixExpression( 53 | new Token(Tokens.ASTERISK, '*'), 54 | '*', 55 | integerLiteral 56 | ); 57 | 58 | infixExpression.right = new IntegerLiteral(new Token(Tokens.INT, '3'), 3); 59 | 60 | callExpression.arguments = [ 61 | new IntegerLiteral(new Token(Tokens.INT, '1'), 1), 62 | buildInfix(2, 3, { token: Tokens.ASTERISK, op: '*' }), 63 | buildInfix(4, 5, { token: Tokens.PLUS, op: '+' }), 64 | ]; 65 | 66 | statement.expression = callExpression; 67 | 68 | expect(statements).toEqual([statement]); 69 | }); 70 | }); 71 | }); 72 | -------------------------------------------------------------------------------- /src/ast/base.ts: -------------------------------------------------------------------------------- 1 | import { Identifier } from 'ast/Identifier'; 2 | import { ExpressionStatement } from 'ast/ExpressionStatement'; 3 | import { LetStatement } from 'ast/LetStatement'; 4 | import { ReturnStatement } from 'ast/ReturnStatement'; 5 | import { IntegerLiteral } from 'ast/IntegerLiteral'; 6 | import { PrefixExpression } from 'ast/PrefixExpression'; 7 | import { InfixExpression } from 'ast/InfixExpression'; 8 | import { BooleanExpression } from 'ast/Boolean'; 9 | import { IfExpression } from 'ast/IfExpression'; 10 | import { FunctionLiteral } from 'ast/FunctionLiteral'; 11 | import { CallExpression } from 'ast/CallExpression'; 12 | import { StringLiteral } from 'ast/StringLiteral'; 13 | import { ArrayLiteral } from 'ast/ArrayLiteral'; 14 | import { IndexExpression } from 'ast/IndexExpression'; 15 | import { HashLiteral } from 'ast/HashLiteral'; 16 | 17 | export enum ProgramKind { 18 | program = 'program', 19 | } 20 | 21 | export enum StatementKind { 22 | Let = 'let', 23 | Return = 'return', 24 | Expression = 'expression', 25 | Block = 'block', 26 | } 27 | 28 | export enum ExpressionKind { 29 | Identifier = 'identifier', 30 | IntegerLiteral = 'integerLiteral', 31 | StringLiteral = 'stringLiteral', 32 | Prefix = 'prefix', 33 | Infix = 'infix', 34 | Boolean = 'boolean', 35 | If = 'if', 36 | FunctionLiteral = 'functionLiteral', 37 | Call = 'call', 38 | ArrayLiteral = 'arrayLiteral', 39 | IndexExpression = 'indexExpression', 40 | HashLiteral = 'hashLiteral', 41 | } 42 | 43 | type NodeKind = ProgramKind | StatementKind | ExpressionKind; 44 | 45 | export interface Node { 46 | kind: NodeKind; 47 | tokenLiteral: () => string; 48 | string: () => string; 49 | } 50 | 51 | export interface BaseStatement extends Node { 52 | kind: StatementKind; 53 | } 54 | 55 | export interface BaseExpression extends Node { 56 | kind: ExpressionKind; 57 | } 58 | 59 | export type Statement = LetStatement | ReturnStatement | ExpressionStatement; 60 | export type Expression = 61 | | Identifier 62 | | IntegerLiteral 63 | | PrefixExpression 64 | | InfixExpression 65 | | BooleanExpression 66 | | IfExpression 67 | | FunctionLiteral 68 | | CallExpression 69 | | StringLiteral 70 | | ArrayLiteral 71 | | IndexExpression 72 | | HashLiteral; 73 | -------------------------------------------------------------------------------- /src/parser/tests/letStatement.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from 'vitest'; 2 | import { StatementKind, LetStatement, Identifier, IntegerLiteral } from 'ast'; 3 | import { Token, Tokens } from 'token'; 4 | import { parse } from './parse'; 5 | 6 | describe('Parser', () => { 7 | describe('parseProgram', () => { 8 | it('parses the let statement', () => { 9 | const input = ` 10 | let x = 5; 11 | let y = 10; 12 | let foobar = 10000; 13 | `; 14 | 15 | const { statements } = parse(input); 16 | const tests = [ 17 | { identifier: 'x' }, 18 | { identifier: 'y' }, 19 | { identifier: 'foobar' }, 20 | ]; 21 | 22 | tests.forEach(({ identifier }, index) => { 23 | const statement = statements[index]; 24 | 25 | if (statement.kind === StatementKind.Let) { 26 | expect(statement.tokenLiteral()).toEqual('let'); 27 | expect(statement.name.value).toEqual(identifier); 28 | expect(statement.name.tokenLiteral()).toEqual(identifier); 29 | } 30 | }); 31 | }); 32 | 33 | it('validates ast after parsing', () => { 34 | const input = ` 35 | let x = 5; 36 | let y = 10; 37 | let foobar = 10000; 38 | `; 39 | 40 | const { statements } = parse(input); 41 | 42 | const xStatement = new LetStatement(new Token(Tokens.LET, 'let')); 43 | xStatement.name = new Identifier(new Token(Tokens.IDENT, 'x'), 'x'); 44 | xStatement.value = new IntegerLiteral(new Token(Tokens.INT, '5'), 5); 45 | 46 | const yStatement = new LetStatement(new Token(Tokens.LET, 'let')); 47 | yStatement.name = new Identifier(new Token(Tokens.IDENT, 'y'), 'y'); 48 | yStatement.value = new IntegerLiteral(new Token(Tokens.INT, '10'), 10); 49 | 50 | const foobarStatement = new LetStatement(new Token(Tokens.LET, 'let')); 51 | foobarStatement.name = new Identifier( 52 | new Token(Tokens.IDENT, 'foobar'), 53 | 'foobar' 54 | ); 55 | 56 | foobarStatement.value = new IntegerLiteral( 57 | new Token(Tokens.INT, '10000'), 58 | 10000 59 | ); 60 | 61 | expect(statements).toEqual([xStatement, yStatement, foobarStatement]); 62 | }); 63 | }); 64 | }); 65 | -------------------------------------------------------------------------------- /src/parser/tests/ifExpression.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from 'vitest'; 2 | import { 3 | ExpressionStatement, 4 | IfExpression, 5 | InfixExpression, 6 | Identifier, 7 | BlockStatement, 8 | } from 'ast'; 9 | import { Token, Tokens } from 'token'; 10 | import { parse } from './parse'; 11 | 12 | function buildIfExpression() { 13 | const ifExpression = new IfExpression(new Token(Tokens.IF, 'if')); 14 | const identifier = new Identifier(new Token(Tokens.IDENT, 'x'), 'x'); 15 | const condition = new InfixExpression( 16 | new Token(Tokens.LESS_THAN, '<'), 17 | '<', 18 | identifier 19 | ); 20 | 21 | condition.right = new Identifier(new Token(Tokens.IDENT, 'y'), 'y'); 22 | 23 | const consequence = new BlockStatement(new Token(Tokens.LBRACE, '{')); 24 | 25 | const xIdentifierStatement = new ExpressionStatement( 26 | new Token(Tokens.IDENT, 'x') 27 | ); 28 | 29 | xIdentifierStatement.expression = new Identifier( 30 | new Token(Tokens.IDENT, 'x'), 31 | 'x' 32 | ); 33 | 34 | consequence.statements.push(xIdentifierStatement); 35 | 36 | ifExpression.condition = condition; 37 | ifExpression.consequence = consequence; 38 | 39 | return ifExpression; 40 | } 41 | 42 | function buildIfElseExpression() { 43 | const ifExpression = buildIfExpression(); 44 | const alternative = new BlockStatement(new Token(Tokens.LBRACE, '{')); 45 | const yIdentifierStatement = new ExpressionStatement( 46 | new Token(Tokens.IDENT, 'y') 47 | ); 48 | 49 | yIdentifierStatement.expression = new Identifier( 50 | new Token(Tokens.IDENT, 'y'), 51 | 'y' 52 | ); 53 | 54 | alternative.statements.push(yIdentifierStatement); 55 | ifExpression.alternative = alternative; 56 | 57 | return ifExpression; 58 | } 59 | 60 | describe('Parser', () => { 61 | describe('parseProgram', () => { 62 | it('validates ast after parsing for an if expression', () => { 63 | const input = 'if (x < y) { x }'; 64 | const { statements } = parse(input); 65 | const ifToken = new Token(Tokens.IF, 'if'); 66 | const expressionStatement = new ExpressionStatement(ifToken); 67 | const ifExpression = buildIfExpression(); 68 | 69 | expressionStatement.expression = ifExpression; 70 | 71 | expect(statements).toEqual([expressionStatement]); 72 | }); 73 | 74 | it('validates ast after parsing for an if-else expression', () => { 75 | const input = 'if (x < y) { x } else { y }'; 76 | const { statements } = parse(input); 77 | const ifToken = new Token(Tokens.IF, 'if'); 78 | const expressionStatement = new ExpressionStatement(ifToken); 79 | const ifExpression = buildIfElseExpression(); 80 | 81 | expressionStatement.expression = ifExpression; 82 | 83 | expect(statements).toEqual([expressionStatement]); 84 | }); 85 | }); 86 | }); 87 | -------------------------------------------------------------------------------- /src/parser/tests/operatorPrecedence.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from 'vitest'; 2 | import { parse } from './parse'; 3 | 4 | describe('Parser', () => { 5 | describe('parseProgram', () => { 6 | it('parses operator precedence', () => { 7 | const tests = [ 8 | { 9 | input: 'true', 10 | expected: 'true', 11 | }, 12 | { 13 | input: 'false', 14 | expected: 'false', 15 | }, 16 | { 17 | input: '3 > 5 == false', 18 | expected: '((3 > 5) == false)', 19 | }, 20 | { 21 | input: '3 < 5 == true', 22 | expected: '((3 < 5) == true)', 23 | }, 24 | { input: '-a * b', expected: '((-a) * b)' }, 25 | { input: '!-a', expected: '(!(-a))' }, 26 | { input: 'a + b + c', expected: '((a + b) + c)' }, 27 | { input: 'a + b - c', expected: '((a + b) - c)' }, 28 | { input: 'a * b * c', expected: '((a * b) * c)' }, 29 | { input: 'a * b / c', expected: '((a * b) / c)' }, 30 | { input: 'a + b / c', expected: '(a + (b / c))' }, 31 | { 32 | input: 'a + b * c + d / e - f', 33 | expected: '(((a + (b * c)) + (d / e)) - f)', 34 | }, 35 | { input: '3 + 4; -5 * 5', expected: '(3 + 4)((-5) * 5)' }, 36 | { input: '5 > 4 == 3 < 4', expected: '((5 > 4) == (3 < 4))' }, 37 | { input: '5 < 4 != 3 > 4', expected: '((5 < 4) != (3 > 4))' }, 38 | { 39 | input: '3 + 4 * 5 == 3 * 1 + 4 * 5', 40 | expected: '((3 + (4 * 5)) == ((3 * 1) + (4 * 5)))', 41 | }, 42 | { 43 | input: '1 + (2 + 3) + 4', 44 | expected: '((1 + (2 + 3)) + 4)', 45 | }, 46 | { input: '(5 + 5) * 2', expected: '((5 + 5) * 2)' }, 47 | { 48 | input: '2 / (5 + 5)', 49 | expected: '(2 / (5 + 5))', 50 | }, 51 | { 52 | input: '-(5 + 5)', 53 | expected: '(-(5 + 5))', 54 | }, 55 | { 56 | input: '!(true == true)', 57 | expected: '(!(true == true))', 58 | }, 59 | { 60 | input: 'a + add(b * c) + d', 61 | expected: '((a + add((b * c))) + d)', 62 | }, 63 | { 64 | input: 'add(a, b, 1, 2 * 3, 4 + 5, add(6, 7 * 8))', 65 | expected: 'add(a, b, 1, (2 * 3), (4 + 5), add(6, (7 * 8)))', 66 | }, 67 | { 68 | input: 'add(a + b + c * d / f + g)', 69 | expected: 'add((((a + b) + ((c * d) / f)) + g))', 70 | }, 71 | { 72 | input: 'a * [1, 2, 3, 4][b * c] * d', 73 | expected: '((a * ([1, 2, 3, 4][(b * c)])) * d)', 74 | }, 75 | { 76 | input: 'add(a * b[2], b[1], 2 * [1, 2][1])', 77 | expected: 'add((a * (b[2])), (b[1]), (2 * ([1, 2][1])))', 78 | }, 79 | ]; 80 | 81 | for (const { input, expected } of tests) { 82 | const { programString } = parse(input); 83 | expect(programString).equal(expected); 84 | } 85 | }); 86 | }); 87 | }); 88 | -------------------------------------------------------------------------------- /src/parser/tests/functionLiteral.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from 'vitest'; 2 | import { 3 | BlockStatement, 4 | ExpressionStatement, 5 | Identifier, 6 | InfixExpression, 7 | FunctionLiteral, 8 | } from 'ast'; 9 | import { Token, Tokens } from 'token'; 10 | import { parse } from './parse'; 11 | 12 | describe('Parser', () => { 13 | describe('parseProgram', () => { 14 | it('validates ast after parsing function without parameters', () => { 15 | const input = 'fn() {}'; 16 | const { statements } = parse(input); 17 | 18 | const statement = new ExpressionStatement( 19 | new Token(Tokens.FUNCTION, 'fn') 20 | ); 21 | 22 | const functionLiteralExpression = new FunctionLiteral( 23 | new Token(Tokens.FUNCTION, 'fn') 24 | ); 25 | 26 | functionLiteralExpression.parameters = []; 27 | functionLiteralExpression.body = new BlockStatement( 28 | new Token(Tokens.LBRACE, '{') 29 | ); 30 | 31 | statement.expression = functionLiteralExpression; 32 | 33 | expect(statements).toEqual([statement]); 34 | }); 35 | 36 | it('validates ast after parsing function with one parameter', () => { 37 | const input = 'fn(x) { x; }'; 38 | const { statements } = parse(input); 39 | 40 | const statement = new ExpressionStatement( 41 | new Token(Tokens.FUNCTION, 'fn') 42 | ); 43 | 44 | const functionLiteralExpression = new FunctionLiteral( 45 | new Token(Tokens.FUNCTION, 'fn') 46 | ); 47 | 48 | functionLiteralExpression.parameters = [ 49 | new Identifier(new Token(Tokens.IDENT, 'x'), 'x'), 50 | ]; 51 | 52 | const body = new BlockStatement(new Token(Tokens.LBRACE, '{')); 53 | 54 | const bodyStatement = new ExpressionStatement( 55 | new Token(Tokens.IDENT, 'x') 56 | ); 57 | 58 | bodyStatement.expression = new Identifier( 59 | new Token(Tokens.IDENT, 'x'), 60 | 'x' 61 | ); 62 | 63 | body.statements.push(bodyStatement); 64 | functionLiteralExpression.body = body; 65 | statement.expression = functionLiteralExpression; 66 | 67 | expect(statements).toEqual([statement]); 68 | }); 69 | 70 | it('validates ast after parsing function with multiple parameters', () => { 71 | const input = 'fn(x, y) { x + y; }'; 72 | const { statements } = parse(input); 73 | 74 | const statement = new ExpressionStatement( 75 | new Token(Tokens.FUNCTION, 'fn') 76 | ); 77 | 78 | const functionLiteralExpression = new FunctionLiteral( 79 | new Token(Tokens.FUNCTION, 'fn') 80 | ); 81 | 82 | functionLiteralExpression.parameters = [ 83 | new Identifier(new Token(Tokens.IDENT, 'x'), 'x'), 84 | new Identifier(new Token(Tokens.IDENT, 'y'), 'y'), 85 | ]; 86 | 87 | const body = new BlockStatement(new Token(Tokens.LBRACE, '{')); 88 | 89 | const bodyStatement = new ExpressionStatement( 90 | new Token(Tokens.IDENT, 'x') 91 | ); 92 | 93 | const infixExpression = new InfixExpression( 94 | new Token(Tokens.PLUS, '+'), 95 | '+', 96 | new Identifier(new Token(Tokens.IDENT, 'x'), 'x') 97 | ); 98 | 99 | infixExpression.right = new Identifier(new Token(Tokens.IDENT, 'y'), 'y'); 100 | bodyStatement.expression = infixExpression; 101 | body.statements.push(bodyStatement); 102 | functionLiteralExpression.body = body; 103 | statement.expression = functionLiteralExpression; 104 | 105 | expect(statements).toEqual([statement]); 106 | }); 107 | }); 108 | }); 109 | -------------------------------------------------------------------------------- /src/parser/tests/infixExpression.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from 'vitest'; 2 | import { 3 | ExpressionStatement, 4 | InfixExpression, 5 | IntegerLiteral, 6 | StatementKind, 7 | } from 'ast'; 8 | import { ExpressionKind } from 'ast/base'; 9 | import { parse } from './parse'; 10 | import { Token, Tokens } from 'token'; 11 | 12 | describe('Parser', () => { 13 | describe('parseProgram', () => { 14 | it('parses infix expressions', () => { 15 | const tests = [ 16 | { input: '5 + 5;', leftValue: 5, operator: '+', rightValue: 5 }, 17 | { input: '5 - 5;', leftValue: 5, operator: '-', rightValue: 5 }, 18 | { input: '5 * 5;', leftValue: 5, operator: '*', rightValue: 5 }, 19 | { input: '5 / 5;', leftValue: 5, operator: '/', rightValue: 5 }, 20 | { input: '5 > 5;', leftValue: 5, operator: '>', rightValue: 5 }, 21 | { input: '5 < 5;', leftValue: 5, operator: '<', rightValue: 5 }, 22 | { input: '5 == 5;', leftValue: 5, operator: '==', rightValue: 5 }, 23 | { input: '5 != 5;', leftValue: 5, operator: '!=', rightValue: 5 }, 24 | ]; 25 | 26 | tests.forEach((test) => { 27 | const { statements } = parse(test.input); 28 | const statement = statements[0]; 29 | 30 | if (statements.length !== 1) { 31 | throw new Error( 32 | `program does not contain 1 statement. got ${statements.length}` 33 | ); 34 | } 35 | 36 | if ( 37 | statement.kind === StatementKind.Expression && 38 | statement.expression.kind === ExpressionKind.Infix 39 | ) { 40 | const { expression } = statement; 41 | const { operator } = expression; 42 | 43 | const left = expression.left as IntegerLiteral; 44 | const right = expression.right as IntegerLiteral; 45 | 46 | expect(left.value).toEqual(test.leftValue); 47 | expect(left.tokenLiteral()).toEqual(test.leftValue.toString()); 48 | 49 | expect(operator).toEqual(test.operator); 50 | 51 | expect(right.value).toEqual(test.rightValue); 52 | expect(right.tokenLiteral()).toEqual(test.rightValue.toString()); 53 | } 54 | }); 55 | }); 56 | }); 57 | 58 | it('parses two infix expressions', () => { 59 | const input = '1 + 2 + 3'; 60 | const { statements } = parse(input); 61 | const plusToken = new Token(Tokens.PLUS, '+'); 62 | const expressionStatement = new ExpressionStatement( 63 | new Token(Tokens.INT, '1') 64 | ); 65 | 66 | const nestedInfixExpression = new InfixExpression( 67 | plusToken, 68 | '+', 69 | new IntegerLiteral(new Token(Tokens.INT, '1'), 1) 70 | ); 71 | 72 | nestedInfixExpression.right = new IntegerLiteral( 73 | new Token(Tokens.INT, '2'), 74 | 2 75 | ); 76 | 77 | const infixExpression = new InfixExpression( 78 | plusToken, 79 | '+', 80 | nestedInfixExpression 81 | ); 82 | 83 | infixExpression.right = new IntegerLiteral(new Token(Tokens.INT, '3'), 3); 84 | expressionStatement.expression = infixExpression; 85 | 86 | expect(statements).toEqual([expressionStatement]); 87 | }); 88 | 89 | it('parses two infix expressions with different precedences', () => { 90 | const input = '1 + 2 * 3'; 91 | const { statements } = parse(input); 92 | const expressionStatement = new ExpressionStatement( 93 | new Token(Tokens.INT, '1') 94 | ); 95 | 96 | const nestedInfixExpression = new InfixExpression( 97 | new Token(Tokens.ASTERISK, '*'), 98 | '*', 99 | new IntegerLiteral(new Token(Tokens.INT, '2'), 2) 100 | ); 101 | 102 | nestedInfixExpression.right = new IntegerLiteral( 103 | new Token(Tokens.INT, '3'), 104 | 3 105 | ); 106 | 107 | const infixExpression = new InfixExpression( 108 | new Token(Tokens.PLUS, '+'), 109 | '+', 110 | new IntegerLiteral(new Token(Tokens.INT, '1'), 1) 111 | ); 112 | 113 | infixExpression.right = nestedInfixExpression; 114 | expressionStatement.expression = infixExpression; 115 | 116 | expect(statements).toEqual([expressionStatement]); 117 | }); 118 | }); 119 | -------------------------------------------------------------------------------- /series/evaluation.md: -------------------------------------------------------------------------------- 1 | # Evaluation 2 | 3 | Evaluation defined the semantics of the language. 4 | 5 | This code: 6 | 7 | ``` 8 | let num = 5; 9 | 10 | if (num) { 11 | return a; 12 | } else { 13 | return b; 14 | } 15 | ``` 16 | 17 | with the evaluation process, we know if it will return `a` or `b`. 18 | 19 | - different implementations of interpreters 20 | - “tree-walking interpreters” 21 | - the evaluation step can be preceded by small optimizations that rewrite the AST (e.g. remove unused variable bindings) or convert it into another intermediate representation (IR) that’s more suitable for recursive and repeated evaluation. 22 | - convert to bytecode (another IR of the AST) 23 | - it would be run in a virtual machine that knows how to interpret bytcode (bytecode cannot be interpreted by the CPU) 24 | - [difference between bytecode and machine code](https://www.geeksforgeeks.org/difference-between-byte-code-and-machine-code) 25 | - JIT (for “just in time”) interpreter/compiler: the virtual machine compiles the bytecode to native machine code, right before its execution - just in time 26 | - evaluating expressions 27 | - `func Eval(node ast.Node) object.Object`: it takes a node (every AST node fulfills the node interface) and return the object 28 | - this structure will help us recursively call eval, evaluate part of the AST node and recall it to evaluate the rest 29 | - self-evaluating expressions: integers and booleans: they evaluate to themselves. if you type `10`, it will evaluate to `10`. If you type `true`, it will evaluate to `true`. 30 | - operator expressions 31 | - the operator `!` converts the operand into a boolean and then negates it 32 | - if not a boolean, the value will be acted like a truthy or falsy value. e.g. `10`. `10` is truthy, so `!10` will be converted into `!true` and then negated `false`. 33 | - `evaluateBangOperatorExpression`: if it's not true, false, or null, it means it's a truthy value and it should be evaluated as false (converting to boolean and negating it) 34 | - infix: 35 | - integer infix expressions: `1 + 1` should evaluate to `2`. 36 | - boolean infix expressions: `true == true` should evaluate to `true` 37 | - conditionals 38 | - When evaluating this if-else-expression the important thing is to only evaluate the correct branch. 39 | - if the condition is met: evaluate the if branch only (true or a truthy value — not null and not false) 40 | - if not: evaluate the else branch 41 | - functions 42 | - an IR object: body and paramaters to evaluate the function and environment to hold internal values and enable closure (That allows for closures, which “close over” the environment they’re defined in and can later access it.) 43 | - evaluation: evaluate the function (call expression) 44 | - evaluate parameters/arguments (identifiers/expressions) 45 | - evaluate the function body (block statement): preserve the environment by extending it 46 | - don't overwrite the envirnment 47 | - extend it: create a new instance of Environment and point it to the current environment 48 | 49 | ## Final words & Resources 50 | 51 | If you didn't have the opportunity, take a look at the posts from the [Building an Interpreter series](https://leandrotk.github.io/series/building-an-interpreter/): 52 | 53 | - [Building an Interpreter: Lexical Analysis - Part 1](https://leandrotk.github.io/series/building-an-interpreter/building-an-interpreter-lexical-analysis-part-1.html) 54 | - [Building an Interpreter: Lexical Analysis - Part 2](https://leandrotk.github.io/series/building-an-interpreter/building-an-interpreter-lexical-analysis-part-2.html) 55 | - [Building an Interpreter: Lexical Analysis - Part 3](https://leandrotk.github.io/series/building-an-interpreter/building-an-interpreter-lexical-analysis-part-3.html) 56 | - [Building an Interpreter: REPL](https://leandrotk.github.io/series/building-an-interpreter/building-an-interpreter-repl.html) 57 | 58 | These are the resources I'm using to learn more about this field: 59 | 60 | - [monkey-ts](https://github.com/leandrotk/monkey-ts): the open-source project of the compiler for the TypeScript version of the Monkey programming language. 61 | - [Programming Language Theory](https://github.com/leandrotk/programming-language-theory): a bunch of resources about my studies on Programming Language Theory & Applied PLT. 62 | - [Writing an Interpreter in Go](https://www.goodreads.com/book/show/32681092-writing-an-interpreter-in-go): the book I'm reading to learn and implement the Monkey compiler. 63 | -------------------------------------------------------------------------------- /src/lexer/lexer.ts: -------------------------------------------------------------------------------- 1 | import { Tokens, Token, TokenType, lookupIdent } from 'token'; 2 | 3 | export class Lexer { 4 | input: string; 5 | position: number; 6 | readPosition: number; 7 | char: string; 8 | 9 | INITIAL_POSITION = 0; 10 | EMPTY_CHAR = ''; 11 | 12 | constructor(input: string) { 13 | this.input = input; 14 | this.setUpInitialState(); 15 | this.readChar(); 16 | } 17 | 18 | nextToken(): Token { 19 | const token = this.getToken(); 20 | return token; 21 | } 22 | 23 | private setUpInitialState() { 24 | this.position = this.INITIAL_POSITION; 25 | this.readPosition = this.INITIAL_POSITION; 26 | this.char = this.EMPTY_CHAR; 27 | } 28 | 29 | private readChar() { 30 | if (this.readPosition >= this.input.length) { 31 | this.char = ''; 32 | } else { 33 | this.char = this.input[this.readPosition]; 34 | } 35 | 36 | this.position = this.readPosition; 37 | this.readPosition += 1; 38 | } 39 | 40 | private getToken(): Token { 41 | this.skipWhitespace(); 42 | 43 | switch (this.char) { 44 | case '=': 45 | if (this.peekChar() === '=') { 46 | this.readChar(); 47 | return this.buildToken(Tokens.EQUAL, '=='); 48 | } else { 49 | return this.buildToken(Tokens.ASSIGN, '='); 50 | } 51 | case ';': 52 | return this.buildToken(Tokens.SEMICOLON, ';'); 53 | case ':': 54 | return this.buildToken(Tokens.COLON, ':'); 55 | case '(': 56 | return this.buildToken(Tokens.LPAREN, '('); 57 | case ')': 58 | return this.buildToken(Tokens.RPAREN, ')'); 59 | case ',': 60 | return this.buildToken(Tokens.COMMA, ','); 61 | case '+': 62 | return this.buildToken(Tokens.PLUS, '+'); 63 | case '{': 64 | return this.buildToken(Tokens.LBRACE, '{'); 65 | case '}': 66 | return this.buildToken(Tokens.RBRACE, '}'); 67 | case '[': 68 | return this.buildToken(Tokens.LBRACKET, '['); 69 | case ']': 70 | return this.buildToken(Tokens.RBRACKET, ']'); 71 | case '!': 72 | if (this.peekChar() === '=') { 73 | this.readChar(); 74 | return this.buildToken(Tokens.NOT_EQUAL, '!='); 75 | } else { 76 | return this.buildToken(Tokens.BANG, '!'); 77 | } 78 | case '-': 79 | return this.buildToken(Tokens.MINUS, '-'); 80 | case '/': 81 | return this.buildToken(Tokens.SLASH, '/'); 82 | case '*': 83 | return this.buildToken(Tokens.ASTERISK, '*'); 84 | case '<': 85 | return this.buildToken(Tokens.LESS_THAN, '<'); 86 | case '>': 87 | return this.buildToken(Tokens.GREATER_THAN, '>'); 88 | case '': 89 | return this.buildToken(Tokens.EOF, ''); 90 | case '"': 91 | return this.buildToken(Tokens.STRING, this.readString()); 92 | default: 93 | if (this.isLetter(this.char)) { 94 | const tokenLiteral = this.readIdentifier(); 95 | const tokenType = lookupIdent(tokenLiteral); 96 | return new Token(tokenType, tokenLiteral); 97 | } 98 | 99 | if (this.isDigit(this.char)) { 100 | const tokenLiteral = this.readNumber(); 101 | return new Token(Tokens.INT, tokenLiteral); 102 | } 103 | 104 | return new Token(Tokens.ILLEGAL, this.char); 105 | } 106 | } 107 | 108 | private buildToken(type: TokenType, literal: string) { 109 | this.readChar(); 110 | return new Token(type, literal); 111 | } 112 | 113 | private readIdentifier() { 114 | const initialCharPosition = this.position; 115 | 116 | while (this.isLetter(this.char)) { 117 | this.readChar(); 118 | } 119 | 120 | return this.input.substring(initialCharPosition, this.position); 121 | } 122 | 123 | private readNumber() { 124 | const initialIntPosition = this.position; 125 | 126 | while (this.isDigit(this.char)) { 127 | this.readChar(); 128 | } 129 | 130 | return this.input.substring(initialIntPosition, this.position); 131 | } 132 | 133 | private readString() { 134 | const position = this.position + 1; 135 | this.readChar(); 136 | 137 | while (this.char && this.char !== '"') { 138 | this.readChar(); 139 | } 140 | 141 | return this.input.slice(position, this.position); 142 | } 143 | 144 | private isLetter(char: string) { 145 | return ( 146 | ('a' <= char && char <= 'z') || 147 | ('A' <= char && char <= 'Z') || 148 | char === '_' 149 | ); 150 | } 151 | 152 | private isDigit(char: string) { 153 | return '0' <= char && char <= '9'; 154 | } 155 | 156 | private skipWhitespace() { 157 | while ( 158 | this.char == ' ' || 159 | this.char == '\t' || 160 | this.char == '\n' || 161 | this.char == '\r' 162 | ) { 163 | this.readChar(); 164 | } 165 | } 166 | 167 | private peekChar() { 168 | if (this.readPosition >= this.input.length) { 169 | return ''; 170 | } else { 171 | return this.input[this.readPosition]; 172 | } 173 | } 174 | } 175 | -------------------------------------------------------------------------------- /series/parser-part-2.md: -------------------------------------------------------------------------------- 1 | # Parser - Part 2: 2 | 3 | - everything besides let and return statements is an expression. 4 | - prefix operators: `-5, !true, !false` 5 | - infix operators: `5+5, 5-5, 5/5, 5*5` 6 | - arithmetic operators: `foo == bar, foo != bar, foo < bar, foo > bar` 7 | - parenthesis to influence order of evaluation: `5 * (5 + 5), ((5 + 5) * 5) * 5` 8 | - call expressions: `add(2, 3), add(add(2, 3), add(5, 10)), max(5, add(5, (5 * 5)))` 9 | - identifiers are expressions: `foo * bar / foobar, add(foo, bar)` 10 | - functions are expressions: `(fn(x) { return x }(5) + 10 ) * 10` 11 | - if expressions: `let result = if (10 > 5) { true } else { false }; result // => true` 12 | 13 | terminology for expressions 14 | 15 | - A prefix operator is an operator “in front of” its operand. Example: `--5` 16 | - A postfix operator is an operator “after” its operand. Example: `foobar++` 17 | - An infix operator sits between its operands, like this: `5*8`. Infix operators appear in binary expressions - where the operator has two operands. 18 | - operator precedence or order of operations: which priority do different operators have. 19 | 20 | ## Implementation the Pratt parser 21 | 22 | Association of parsing functions (which Pratt calls “semantic code”) with token types. 23 | 24 | - semicolon are optional: easier to type 5 + 5 into the REPL 25 | - precedence: using an enum starting with the value `1` — we can see the order of operations in the enum. The bigger the value, the higher the precedence 26 | - prefix operators: `;` 27 | - e.g. `!isGreaterThanZero(10);` 28 | - operator: the prefix operator 29 | - right: the expression after the operator 30 | - infix operators: ` ` 31 | - because of the two expressions, it's also called binary expressions 32 | - e.g. 33 | - `5 + 5;` 34 | - `5 - 5;` 35 | - `5 * 5;` 36 | - `5 / 5;` 37 | - `5 > 5;` 38 | - `5 < 5;` 39 | - `5 == 5;` 40 | - `5 != 5;` 41 | - higher precedence to be deeper in the tree than expressions with lower precedence operators. 42 | - boolean literals 43 | - e.g. 44 | - `true;` 45 | - `false;` 46 | - `let foobar = true;` 47 | - `let barfoo = false;` 48 | - grouped expressions 49 | - e.g. 50 | - `1 + (2 + 3) + 4` 51 | - `(5 + 5) * 2` 52 | - If expressions 53 | - `let value = if (x > y) { x } else { y };`: this if-else expression will return a value and it assigns the value in the `value` variable. 54 | - The structure of an if-else expression: `if () else ` 55 | - `IfExpression` AST: `condition` holds the condition, which can be any expression, and `consequence` and `alternative` point to the consequence and alternative of the conditional 56 | - Function literals expression 57 | - e.g. `fn(x, y) { return x + y; }` 58 | - function literals are expressions 59 | - abstract structure: `fn ` 60 | - two main parts of function literals 61 | - parameters are just a list of identifiers: `(, , , ...)` 62 | - function's body as the block statement 63 | - different usages of function literals 64 | - the list of parameters can be empty: `fn() { return foobar + barfoo; }` 65 | - function literal as the expression in a let statement: `let myFunction = fn(x, y) { return x + y; }` 66 | - function literal as the expression in a return statement: `fn() { return fn(x, y) { return x > y; }; }` 67 | - function literal as an argument when calling another function: `myFunc(x, y, fn(x, y) { return x > y; }); ` 68 | - Call expressions 69 | - structure: `()` 70 | - e.g. 71 | - simple integer literal expressions as arguments: `add(2, 3)` 72 | - infix expressions as arguments: `add(2 + 2, 3 * 3 * 3)` 73 | - call directly from the function literal: `fn(x, y) { x + y; }(2, 3)` 74 | - function literal as argument: `callsFunction(2, 3, fn(x, y) { x + y; });` 75 | 76 | ## Final words & Resources 77 | 78 | If you didn't have the opportunity, take a look at the posts from the [Building an Interpreter series](https://leandrotk.github.io/series/building-an-interpreter/): 79 | 80 | - [Building an Interpreter: Lexical Analysis - Part 1](https://leandrotk.github.io/series/building-an-interpreter/building-an-interpreter-lexical-analysis-part-1.html) 81 | - [Building an Interpreter: Lexical Analysis - Part 2](https://leandrotk.github.io/series/building-an-interpreter/building-an-interpreter-lexical-analysis-part-2.html) 82 | - [Building an Interpreter: Lexical Analysis - Part 3](https://leandrotk.github.io/series/building-an-interpreter/building-an-interpreter-lexical-analysis-part-3.html) 83 | - [Building an Interpreter: REPL](https://leandrotk.github.io/series/building-an-interpreter/building-an-interpreter-repl.html) 84 | 85 | These are the resources I'm using to learn more about this field: 86 | 87 | - [monkey-ts](https://github.com/leandrotk/monkey-ts): the open-source project of the compiler for the TypeScript version of the Monkey programming language. 88 | - [Programming Language Theory](https://github.com/leandrotk/programming-language-theory): a bunch of resources about my studies on Programming Language Theory & Applied PLT. 89 | - [Writing an Interpreter in Go](https://www.goodreads.com/book/show/32681092-writing-an-interpreter-in-go): the book I'm reading to learn and implement the Monkey compiler. 90 | -------------------------------------------------------------------------------- /src/object/object.ts: -------------------------------------------------------------------------------- 1 | import { BlockStatement, Identifier } from 'ast'; 2 | import { Environment } from 'object/environment'; 3 | 4 | type ObjectType = string; 5 | type type = () => ObjectType; 6 | type inspect = (error?: ErrorObject) => string; 7 | 8 | export interface EvalObject { 9 | type: type; 10 | inspect: inspect; 11 | } 12 | 13 | export enum ObjectTypes { 14 | INTEGER = 'INTEGER', 15 | BOOLEAN = 'BOOLEAN', 16 | NULL = 'NULL', 17 | RETURN_VALUE = 'RETURN_VALUE', 18 | ERROR = 'ERROR', 19 | FUNCTION = 'FUNCTION', 20 | STRING = 'STRING', 21 | BUILTIN = 'BUILTIN', 22 | ARRAY = 'ARRAY', 23 | HASH = 'HASH', 24 | } 25 | 26 | export class Integer implements EvalObject { 27 | value: number; 28 | 29 | constructor(value: number) { 30 | this.value = value; 31 | } 32 | 33 | type() { 34 | return ObjectTypes.INTEGER; 35 | } 36 | 37 | inspect() { 38 | return this.value.toString(); 39 | } 40 | 41 | hashKey() { 42 | return JSON.stringify(new HashKey(this.type(), this.value)); 43 | } 44 | } 45 | 46 | export class BooleanLiteral implements EvalObject { 47 | value: boolean; 48 | 49 | constructor(value: boolean) { 50 | this.value = value; 51 | } 52 | 53 | type() { 54 | return ObjectTypes.BOOLEAN; 55 | } 56 | 57 | inspect() { 58 | return this.value.toString(); 59 | } 60 | 61 | hashKey() { 62 | return JSON.stringify(new HashKey(this.type(), this.value ? 1 : 0)); 63 | } 64 | } 65 | 66 | export class Null implements EvalObject { 67 | value: null = null; 68 | 69 | type() { 70 | return ObjectTypes.NULL; 71 | } 72 | 73 | inspect() { 74 | return 'null'; 75 | } 76 | } 77 | 78 | export class ReturnValue implements EvalObject { 79 | value: EvalObject; 80 | 81 | constructor(value: EvalObject) { 82 | this.value = value; 83 | } 84 | 85 | type() { 86 | return ObjectTypes.RETURN_VALUE; 87 | } 88 | 89 | inspect() { 90 | return this.value.inspect(); 91 | } 92 | } 93 | 94 | export class ErrorObject implements EvalObject { 95 | message: string; 96 | 97 | constructor(message: string) { 98 | this.message = message; 99 | } 100 | 101 | type() { 102 | return ObjectTypes.ERROR; 103 | } 104 | 105 | inspect() { 106 | return `ERROR: ${this.message}`; 107 | } 108 | } 109 | 110 | export class FunctionObject implements EvalObject { 111 | parameters: Identifier[]; 112 | body: BlockStatement; 113 | env: Environment; 114 | 115 | constructor( 116 | parameters: Identifier[], 117 | body: BlockStatement, 118 | env: Environment 119 | ) { 120 | this.parameters = parameters; 121 | this.body = body; 122 | this.env = env; 123 | } 124 | 125 | type() { 126 | return ObjectTypes.FUNCTION; 127 | } 128 | 129 | inspect() { 130 | return ` 131 | fn(${this.parameters.join(', ')}) { 132 | ${this.body.string()} 133 | } 134 | `; 135 | } 136 | } 137 | 138 | export class StringObject implements EvalObject { 139 | value: string; 140 | 141 | constructor(value: string) { 142 | this.value = value; 143 | } 144 | 145 | type() { 146 | return ObjectTypes.STRING; 147 | } 148 | 149 | inspect() { 150 | return this.value; 151 | } 152 | 153 | hashKey() { 154 | return JSON.stringify(new HashKey(this.type(), this.hashCode(this.value))); 155 | } 156 | 157 | private hashCode(str: string) { 158 | let hash = 0; 159 | let chr; 160 | 161 | if (str.length === 0) return hash; 162 | 163 | for (let i = 0; i < str.length; i++) { 164 | chr = str.charCodeAt(i); 165 | hash = (hash << 5) - hash + chr; 166 | hash |= 0; 167 | } 168 | 169 | return hash; 170 | } 171 | } 172 | 173 | type BuiltingFunction = (...args: EvalObject[]) => EvalObject; 174 | 175 | export class Builtin implements EvalObject { 176 | fn: BuiltingFunction; 177 | 178 | constructor(fn: BuiltingFunction) { 179 | this.fn = fn; 180 | } 181 | 182 | type() { 183 | return ObjectTypes.BUILTIN; 184 | } 185 | 186 | inspect() { 187 | return 'builting function'; 188 | } 189 | } 190 | 191 | export class ArrayObject implements EvalObject { 192 | elements: EvalObject[]; 193 | 194 | constructor(elements: EvalObject[]) { 195 | this.elements = elements; 196 | } 197 | 198 | type() { 199 | return ObjectTypes.ARRAY; 200 | } 201 | 202 | inspect() { 203 | return `[${this.elements.map((element) => element.inspect()).join(', ')}]`; 204 | } 205 | } 206 | 207 | export class HashKey { 208 | type: ObjectType; 209 | value: number; 210 | 211 | constructor(type: ObjectType, value: number) { 212 | this.type = type; 213 | this.value = value; 214 | } 215 | } 216 | 217 | export class HashPair { 218 | key: EvalObject; 219 | value: EvalObject; 220 | 221 | constructor(key: EvalObject, value: EvalObject) { 222 | this.key = key; 223 | this.value = value; 224 | } 225 | } 226 | 227 | export class Hash implements EvalObject { 228 | pairs: Map; 229 | 230 | constructor(pairs: Map) { 231 | this.pairs = pairs; 232 | } 233 | 234 | type() { 235 | return ObjectTypes.HASH; 236 | } 237 | 238 | inspect() { 239 | const pairs = []; 240 | 241 | for (const [_, pair] of this.pairs.entries()) { 242 | pairs.push(`${pair.key.inspect()}:${pair.value.inspect()}`); 243 | } 244 | 245 | return `{${pairs.join(', ')}}`; 246 | } 247 | } 248 | -------------------------------------------------------------------------------- /src/lexer/tests/lexer.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from 'vitest'; 2 | import { Tokens, Token } from 'token'; 3 | import { Lexer } from 'lexer'; 4 | 5 | describe('Lexer', () => { 6 | it('verifies tokens', () => { 7 | const input = ` 8 | let five = 5; 9 | let ten = 10; 10 | 11 | let add = fn(x, y) { 12 | x + y; 13 | }; 14 | 15 | let result = add(five, ten); 16 | !-/*5; 17 | 5 < 10 > 5; 18 | 19 | if (5 < 10) { 20 | return true; 21 | } else { 22 | return false; 23 | } 24 | 25 | 10 == 10; 26 | 10 != 9; 27 | 28 | "foobar"; 29 | "foo bar"; 30 | [9, 9]; 31 | 32 | {"foo": "bar"} 33 | `; 34 | 35 | const tokens: Token[] = [ 36 | { type: Tokens.LET, literal: 'let' }, 37 | { type: Tokens.IDENT, literal: 'five' }, 38 | { type: Tokens.ASSIGN, literal: '=' }, 39 | { type: Tokens.INT, literal: '5' }, 40 | { type: Tokens.SEMICOLON, literal: ';' }, 41 | { type: Tokens.LET, literal: 'let' }, 42 | { type: Tokens.IDENT, literal: 'ten' }, 43 | { type: Tokens.ASSIGN, literal: '=' }, 44 | { type: Tokens.INT, literal: '10' }, 45 | { type: Tokens.SEMICOLON, literal: ';' }, 46 | { type: Tokens.LET, literal: 'let' }, 47 | { type: Tokens.IDENT, literal: 'add' }, 48 | { type: Tokens.ASSIGN, literal: '=' }, 49 | { type: Tokens.FUNCTION, literal: 'fn' }, 50 | { type: Tokens.LPAREN, literal: '(' }, 51 | { type: Tokens.IDENT, literal: 'x' }, 52 | { type: Tokens.COMMA, literal: ',' }, 53 | { type: Tokens.IDENT, literal: 'y' }, 54 | { type: Tokens.RPAREN, literal: ')' }, 55 | { type: Tokens.LBRACE, literal: '{' }, 56 | { type: Tokens.IDENT, literal: 'x' }, 57 | { type: Tokens.PLUS, literal: '+' }, 58 | { type: Tokens.IDENT, literal: 'y' }, 59 | { type: Tokens.SEMICOLON, literal: ';' }, 60 | { type: Tokens.RBRACE, literal: '}' }, 61 | { type: Tokens.SEMICOLON, literal: ';' }, 62 | { type: Tokens.LET, literal: 'let' }, 63 | { type: Tokens.IDENT, literal: 'result' }, 64 | { type: Tokens.ASSIGN, literal: '=' }, 65 | { type: Tokens.IDENT, literal: 'add' }, 66 | { type: Tokens.LPAREN, literal: '(' }, 67 | { type: Tokens.IDENT, literal: 'five' }, 68 | { type: Tokens.COMMA, literal: ',' }, 69 | { type: Tokens.IDENT, literal: 'ten' }, 70 | { type: Tokens.RPAREN, literal: ')' }, 71 | { type: Tokens.SEMICOLON, literal: ';' }, 72 | { type: Tokens.BANG, literal: '!' }, 73 | { type: Tokens.MINUS, literal: '-' }, 74 | { type: Tokens.SLASH, literal: '/' }, 75 | { type: Tokens.ASTERISK, literal: '*' }, 76 | { type: Tokens.INT, literal: '5' }, 77 | { type: Tokens.SEMICOLON, literal: ';' }, 78 | { type: Tokens.INT, literal: '5' }, 79 | { type: Tokens.LESS_THAN, literal: '<' }, 80 | { type: Tokens.INT, literal: '10' }, 81 | { type: Tokens.GREATER_THAN, literal: '>' }, 82 | { type: Tokens.INT, literal: '5' }, 83 | { type: Tokens.SEMICOLON, literal: ';' }, 84 | { type: Tokens.IF, literal: 'if' }, 85 | { type: Tokens.LPAREN, literal: '(' }, 86 | { type: Tokens.INT, literal: '5' }, 87 | { type: Tokens.LESS_THAN, literal: '<' }, 88 | { type: Tokens.INT, literal: '10' }, 89 | { type: Tokens.RPAREN, literal: ')' }, 90 | { type: Tokens.LBRACE, literal: '{' }, 91 | { type: Tokens.RETURN, literal: 'return' }, 92 | { type: Tokens.TRUE, literal: 'true' }, 93 | { type: Tokens.SEMICOLON, literal: ';' }, 94 | { type: Tokens.RBRACE, literal: '}' }, 95 | { type: Tokens.ELSE, literal: 'else' }, 96 | { type: Tokens.LBRACE, literal: '{' }, 97 | { type: Tokens.RETURN, literal: 'return' }, 98 | { type: Tokens.FALSE, literal: 'false' }, 99 | { type: Tokens.SEMICOLON, literal: ';' }, 100 | { type: Tokens.RBRACE, literal: '}' }, 101 | { type: Tokens.INT, literal: '10' }, 102 | { type: Tokens.EQUAL, literal: '==' }, 103 | { type: Tokens.INT, literal: '10' }, 104 | { type: Tokens.SEMICOLON, literal: ';' }, 105 | { type: Tokens.INT, literal: '10' }, 106 | { type: Tokens.NOT_EQUAL, literal: '!=' }, 107 | { type: Tokens.INT, literal: '9' }, 108 | { type: Tokens.SEMICOLON, literal: ';' }, 109 | { type: Tokens.STRING, literal: 'foobar' }, 110 | { type: Tokens.SEMICOLON, literal: ';' }, 111 | { type: Tokens.STRING, literal: 'foo bar' }, 112 | { type: Tokens.SEMICOLON, literal: ';' }, 113 | { type: Tokens.LBRACKET, literal: '[' }, 114 | { type: Tokens.INT, literal: '9' }, 115 | { type: Tokens.COMMA, literal: ',' }, 116 | { type: Tokens.INT, literal: '9' }, 117 | { type: Tokens.RBRACKET, literal: ']' }, 118 | { type: Tokens.SEMICOLON, literal: ';' }, 119 | { type: Tokens.LBRACE, literal: '{' }, 120 | { type: Tokens.STRING, literal: 'foo' }, 121 | { type: Tokens.COLON, literal: ':' }, 122 | { type: Tokens.STRING, literal: 'bar' }, 123 | { type: Tokens.RBRACE, literal: '}' }, 124 | { type: Tokens.EOF, literal: '' }, 125 | ]; 126 | 127 | const lexer = new Lexer(input); 128 | 129 | tokens.forEach(({ type, literal }) => { 130 | const inputToken = lexer.nextToken(); 131 | 132 | expect(inputToken.type).toEqual(type); 133 | expect(inputToken.literal).toEqual(literal); 134 | }); 135 | }); 136 | }); 137 | -------------------------------------------------------------------------------- /series/the-REPL.md: -------------------------------------------------------------------------------- 1 | # The REPL 2 | 3 | This post is part of a series called [Building an Interpreter](https://leandrotk.github.io/series/building-an-interpreter/). 4 | 5 | Now that we implemented the [first steps of our lexer](https://leandrotk.github.io/series/building-an-interpreter/building-an-interpreter-lexical-analysis-part-1.html), [more complex tokens](https://leandrotk.github.io/series/building-an-interpreter/building-an-interpreter-lexical-analysis-part-2.html), and [extended the token set with special characters](https://leandrotk.github.io/series/building-an-interpreter/building-an-interpreter-lexical-analysis-part-3.html), we want to take a step back and implement a REPL and print the tokens using our lexer. 6 | 7 | A REPL stands for Read-Eval-Print-Loop, and it is an interactive environment that "reads" the input, "evaluates" and "prints" it. And then do it all over again (loop). 8 | 9 | As we only have the token yet, we'll just print tokens related to the user input. 10 | 11 | An example would be if we type this in the REPL: 12 | 13 | ```bash 14 | > let a = 1; 15 | ``` 16 | 17 | We'll get the tokens related to this input 18 | 19 | ```bash 20 | Token { type: 'LET', literal: 'let' } 21 | Token { type: 'IDENT', literal: 'a' } 22 | Token { type: '=', literal: '=' } 23 | Token { type: 'INT', literal: '1' } 24 | Token { type: ';', literal: ';' } 25 | ``` 26 | 27 | Nice, let's implement it! 28 | 29 | ## Building the REPL 30 | 31 | To build the REPL, I listed some ideas behind it: 32 | 33 | - We need to share a prompt to read the user input 34 | - When the user types code and clicks enter, we should print the tokens related to the input 35 | - After printing the token, we need to share a prompt again 36 | - If the user types "exit" or "quit", we want to close the REPL 37 | 38 | These are the building blocks. 39 | 40 | To share the prompt and read the user input, we can use the `readline` from Node's API. 41 | 42 | ```jsx 43 | import readline from 'readline'; 44 | 45 | const scanner = readline.createInterface({ 46 | input: process.stdin, 47 | output: process.stdout, 48 | }); 49 | 50 | scanner.question('> ', (input) => { 51 | // do stuff 52 | }); 53 | ``` 54 | 55 | With this code, we can share the prompt with `>` and read the user input. 56 | 57 | Now that we have the user input, we need to be aware of if the input is `"quit"` or `"exit"`. If it is, just close the REPL. 58 | 59 | ```jsx 60 | const ScannerClose = { 61 | exit: 'exit', 62 | quit: 'quit', 63 | }; 64 | 65 | const exits = [ScannerClose.exit, ScannerClose.quit]; 66 | 67 | if (exits.includes(input)) return scanner.close(); 68 | ``` 69 | 70 | Built an object and an array to have all the possible ways to exit the REPL, and verify if the input is included in these possible exits. If it is, close the REPL. If it isn't, we are able to print the tokens. 71 | 72 | To print the tokens, we need to instantiate our Lexer class with the input, and print token by token until it gets an `EOF` token type. 73 | 74 | ```jsx 75 | import { Tokens } from '../token'; 76 | import { Lexer } from '../lexer'; 77 | 78 | const lexer = new Lexer(input); 79 | 80 | for ( 81 | let token = lexer.nextToken(); 82 | token.type !== Tokens.EOF; 83 | token = lexer.nextToken() 84 | ) { 85 | console.log(token); 86 | } 87 | ``` 88 | 89 | After printing the token, we want to share the prompt again for the user to type more code. We can do this with a recursive approach. 90 | 91 | The idea is to wrap all this code into a function and call itself in the end like this: 92 | 93 | ```jsx 94 | function repl() { 95 | scanner.question('> ', (input) => { 96 | if (exits.includes(input)) return scanner.close(); 97 | 98 | const lexer = new Lexer(input); 99 | 100 | for ( 101 | let token = lexer.nextToken(); 102 | token.type !== Tokens.EOF; 103 | token = lexer.nextToken() 104 | ) { 105 | console.log(token); 106 | } 107 | 108 | repl(); 109 | }); 110 | } 111 | ``` 112 | 113 | To finish the REPL, I wanted to wrap this code into a `startRepl` function with a "Welcome to monkey.ts" print before letting the user type code. 114 | 115 | ```jsx 116 | import readline from 'readline'; 117 | import { Tokens } from '../token'; 118 | import { Lexer } from '../lexer'; 119 | 120 | const ScannerClose = { 121 | exit: 'exit', 122 | quit: 'quit', 123 | }; 124 | 125 | const exits = [ScannerClose.exit, ScannerClose.quit]; 126 | 127 | export function startRepl() { 128 | const scanner = readline.createInterface({ 129 | input: process.stdin, 130 | output: process.stdout, 131 | }); 132 | 133 | function repl() { 134 | scanner.question('> ', (input) => { 135 | if (exits.includes(input)) return scanner.close(); 136 | 137 | const lexer = new Lexer(input); 138 | 139 | for ( 140 | let token = lexer.nextToken(); 141 | token.type !== Tokens.EOF; 142 | token = lexer.nextToken() 143 | ) { 144 | console.log(token); 145 | } 146 | 147 | repl(); 148 | }); 149 | } 150 | 151 | console.log('Welcome to monkey.ts'); 152 | repl(); 153 | } 154 | ``` 155 | 156 | And now we can call it anywhere in our code to start the REPL. 157 | 158 | ## **Final words & Resources** 159 | 160 | If you didn't have the opportunity, take a look at the posts from the [Building an Interpreter series](https://leandrotk.github.io/series/building-an-interpreter/): 161 | 162 | - [Building an Interpreter: Lexical Analysis - Part 1](https://leandrotk.github.io/series/building-an-interpreter/building-an-interpreter-lexical-analysis-part-1.html) 163 | - [Building an Interpreter: Lexical Analysis - Part 2](https://leandrotk.github.io/series/building-an-interpreter/building-an-interpreter-lexical-analysis-part-2.html) 164 | - [Building an Interpreter: Lexical Analysis - Part 3](https://leandrotk.github.io/series/building-an-interpreter/building-an-interpreter-lexical-analysis-part-3.html) 165 | 166 | These are the resources I'm using to learn more about this field: 167 | 168 | - [Crafting an Interpreter](https://github.com/imteekay/crafting-an-interpreter): the open-source project of the compiler for the TypeScript version of the Monkey programming language. 169 | - [Crafting an Interpreter - REPL PR](https://github.com/imteekay/crafting-an-interpreter/pull/4) 170 | - [Programming Language Theory](https://github.com/leandrotk/programming-language-theory): a bunch of resources about my studies on Programming Language Theory & Applied PLT. 171 | - [Writing an Interpreter in Go](https://www.goodreads.com/book/show/32681092-writing-an-interpreter-in-go): the book I'm reading to learn and implement the Monkey compiler. 172 | -------------------------------------------------------------------------------- /series/lexical-analysis-part-3.md: -------------------------------------------------------------------------------- 1 | # Building an Interpreter: Lexical Analysis - Part 3 2 | 3 | This post is part of a series called [Building an Interpreter](https://leandrotk.github.io/series/building-an-interpreter/). After implementing a [basic lexer](https://leandrotk.github.io/series/building-an-interpreter/building-an-interpreter-lexical-analysis-part-1.html) and [building more tokens](https://leandrotk.github.io/series/building-an-interpreter/building-an-interpreter-lexical-analysis-part-2.html), we'll extend the token set to work with special characters, new keywords, and the "equal" and "not equal" symbols. 4 | 5 | Let's add support for `==`, `!`, `!=`, `-`, `/`, `*`, `<`, `>`, and the keywords `true`, `false`, `if`, `else`, and `return`. 6 | 7 | ## Single characters as tokens 8 | 9 | First, the single characters, as they are the easiest ones to handle in the lexer. 10 | 11 | We add this to the test: 12 | 13 | ```jsx 14 | const input = ` 15 | !-/*5; 16 | 5 < 10 > 5; 17 | `; 18 | ``` 19 | 20 | Add new tokens: 21 | 22 | ```jsx 23 | export enum Tokens { 24 | // ... 25 | MINUS = '-', 26 | BANG = '!', 27 | ASTERISK = '*', 28 | SLASH = '/', 29 | LESS_THAN = '<', 30 | GREATER_THAN = '>', 31 | // ... 32 | } 33 | ``` 34 | 35 | And finally, add the expectations in the test: 36 | 37 | ```jsx 38 | const tokens: Token[] = [ 39 | // ... 40 | { type: Tokens.BANG, literal: '!' }, 41 | { type: Tokens.MINUS, literal: '-' }, 42 | { type: Tokens.SLASH, literal: '/' }, 43 | { type: Tokens.ASTERISK, literal: '*' }, 44 | { type: Tokens.INT, literal: '5' }, 45 | { type: Tokens.SEMICOLON, literal: ';' }, 46 | { type: Tokens.INT, literal: '5' }, 47 | { type: Tokens.LESS_THAN, literal: '<' }, 48 | { type: Tokens.INT, literal: '10' }, 49 | { type: Tokens.GREATER_THAN, literal: '>' }, 50 | { type: Tokens.INT, literal: '5' }, 51 | { type: Tokens.SEMICOLON, literal: ';' }, 52 | // ... 53 | ]; 54 | ``` 55 | 56 | Now we just need to implement the lexer part to generate these token based on the source code: 57 | 58 | ```jsx 59 | private getToken(): Token { 60 | this.skipWhitespace(); 61 | 62 | switch (this.char) { 63 | // ... 64 | case '!': 65 | return this.buildToken(Tokens.BANG, '!'); 66 | case '-': 67 | return this.buildToken(Tokens.MINUS, '-'); 68 | case '/': 69 | return this.buildToken(Tokens.SLASH, '/'); 70 | case '*': 71 | return this.buildToken(Tokens.ASTERISK, '*'); 72 | case '<': 73 | return this.buildToken(Tokens.LESS_THAN, '<'); 74 | case '>': 75 | return this.buildToken(Tokens.GREATER_THAN, '>'); 76 | // ... 77 | } 78 | } 79 | ``` 80 | 81 | If we run the tests again, we make all green and passing. 82 | 83 | ## Building new keywords as tokens 84 | 85 | The process of building the tokens for the new keyword is pretty similar to the single characters. 86 | 87 | Add the input to the test: 88 | 89 | ```jsx 90 | const input = ` 91 | if (5 < 10) { 92 | return true; 93 | } else { 94 | return false; 95 | } 96 | `; 97 | ``` 98 | 99 | Now add the expected tokens in the test: 100 | 101 | ```jsx 102 | const tokens: Token[] = [ 103 | // ... 104 | { type: Tokens.IF, literal: 'if' }, 105 | { type: Tokens.LPAREN, literal: '(' }, 106 | { type: Tokens.INT, literal: '5' }, 107 | { type: Tokens.LESS_THAN, literal: '<' }, 108 | { type: Tokens.INT, literal: '10' }, 109 | { type: Tokens.RPAREN, literal: ')' }, 110 | { type: Tokens.LBRACE, literal: '{' }, 111 | { type: Tokens.RETURN, literal: 'return' }, 112 | { type: Tokens.TRUE, literal: 'true' }, 113 | { type: Tokens.SEMICOLON, literal: ';' }, 114 | { type: Tokens.RBRACE, literal: '}' }, 115 | { type: Tokens.ELSE, literal: 'else' }, 116 | { type: Tokens.LBRACE, literal: '{' }, 117 | { type: Tokens.RETURN, literal: 'return' }, 118 | { type: Tokens.FALSE, literal: 'false' }, 119 | { type: Tokens.SEMICOLON, literal: ';' }, 120 | { type: Tokens.RBRACE, literal: '}' }, 121 | // ... 122 | ]; 123 | ``` 124 | 125 | And the new tokens: 126 | 127 | ```jsx 128 | export enum Tokens { 129 | // ... 130 | TRUE = 'TRUE', 131 | FALSE = 'FALSE', 132 | IF = 'IF', 133 | ELSE = 'ELSE', 134 | RETURN = 'RETURN', 135 | } 136 | ``` 137 | 138 | But the difference is that we also need to update the `Keywords` object to having the new tokens and be used in the `lookupIdent` function: 139 | 140 | ```jsx 141 | const Keywords: KeywordsType = { 142 | // ... 143 | true: Tokens.TRUE, 144 | false: Tokens.FALSE, 145 | if: Tokens.IF, 146 | else: Tokens.ELSE, 147 | return: Tokens.RETURN, 148 | }; 149 | ``` 150 | 151 | Running the tests again, we get all green and passing. 152 | 153 | ## The Equal and Not Equal operators 154 | 155 | What we want to do now is to build the Equal and Not Equal tokens. We start adding the source code we need to handle: 156 | 157 | ```jsx 158 | const input = ` 159 | 10 == 10; 160 | 10 != 9; 161 | `; 162 | ``` 163 | 164 | The expected tokens are: 165 | 166 | ```jsx 167 | const tokens: Token[] = [ 168 | // ... 169 | { type: Tokens.INT, literal: '10' }, 170 | { type: Tokens.EQUAL, literal: '==' }, 171 | { type: Tokens.INT, literal: '10' }, 172 | { type: Tokens.SEMICOLON, literal: ';' }, 173 | { type: Tokens.INT, literal: '10' }, 174 | { type: Tokens.NOT_EQUAL, literal: '!=' }, 175 | { type: Tokens.INT, literal: '9' }, 176 | { type: Tokens.SEMICOLON, literal: ';' }, 177 | // ... 178 | ]; 179 | ``` 180 | 181 | Then we need to add the new tokens: 182 | 183 | ```jsx 184 | export enum Tokens { 185 | // ... 186 | EQUAL = '==', 187 | NOT_EQUAL = '!=', 188 | // ... 189 | } 190 | ``` 191 | 192 | Now we are ready to implement the lexer part for these new tokens. 193 | 194 | First, the `==`. Every time we get the character `=`, we need to be aware of if the next character is a `=` char. If it is, we return the token type `Equal`. If it's not, we just return the token type `Assign`. 195 | 196 | To search for the next character, let's build a new method to handle that for us: `peekChar`. 197 | 198 | ```jsx 199 | private peekChar() { 200 | if (this.readPosition >= this.input.length) { 201 | return ''; 202 | } else { 203 | return this.input[this.readPosition]; 204 | } 205 | } 206 | ``` 207 | 208 | It's a simple method: if we get to the end of the source code, we return an empty string. Otherwise, it returns the next character. 209 | 210 | Now it becomes very easy to implement the lexer algorithm for the `==` token: 211 | 212 | ```jsx 213 | switch (this.char) { 214 | // ... 215 | case '=': 216 | if (this.peekChar() === '=') { 217 | this.readChar(); 218 | return this.buildToken(Tokens.EQUAL, '=='); 219 | } else { 220 | return this.buildToken(Tokens.ASSIGN, '='); 221 | } 222 | // ... 223 | } 224 | ``` 225 | 226 | Inside the case of a `=` character, we see if the next character is also `=` with the help of our new method `peekChar`. 227 | 228 | If it is, read the next character to update the `position` and the `readPosition`'s states and return the new token type `EQUAL`. 229 | 230 | If it is not, just return the already implemented token type `ASSIGN`. 231 | 232 | We actually do this same implementation for the `NOT_EQUAL` token type: 233 | 234 | ```jsx 235 | switch (this.char) { 236 | // ... 237 | case '!': 238 | if (this.peekChar() === '=') { 239 | this.readChar(); 240 | return this.buildToken(Tokens.NOT_EQUAL, '!='); 241 | } else { 242 | return this.buildToken(Tokens.BANG, '!'); 243 | } 244 | // ... 245 | } 246 | ``` 247 | 248 | But now we are looking at the `!` character. 249 | 250 | ## **Final words & Resources** 251 | 252 | If you didn't have the opportunity, take a look at the [first](https://leandrotk.github.io/series/building-an-interpreter/building-an-interpreter-lexical-analysis-part-1.html) and the [second part of the Lexical Analysis](https://leandrotk.github.io/series/building-an-interpreter/building-an-interpreter-lexical-analysis-part-2.html). This is the third post about my journey learning compilers and studying programming language theory. And part of the [Building an Interpreter series](https://leandrotk.github.io/series/building-an-interpreter/). 253 | 254 | These are the resources I'm using to learn more about this field: 255 | 256 | - [Crafting an Interpreter](https://github.com/imteekay/crafting-an-interpreter): the open source project of the compiler for the TypeScript version of the Monkey programming language. 257 | - [Programming Language Theory](https://github.com/leandrotk/programming-language-theory): a bunch of resources about my studies on Programming Language Theory & Applied PLT. 258 | - [Writing an Interpreter in Go](https://www.goodreads.com/book/show/32681092-writing-an-interpreter-in-go): the book I'm reading to learn and implement the Monkey compiler. 259 | -------------------------------------------------------------------------------- /series/lexical-analysis-part-1.md: -------------------------------------------------------------------------------- 1 | # Building an Interpreter: Lexical Analysis - Part 1 2 | 3 | Lexical Analysis is the process of transforming the source code into tokens. Tokens are an accessible form to ease the way we interpret the programming language. 4 | 5 | ![Glasses on top of a notebook](https://leandrotk.github.io/series/building-an-interpreter/assets/analysis.jpg) 6 | 7 | The book `Writing an Interpreter in Go` shows a very simple example to illustrate how lexing works. Imagine this source code: 8 | 9 | ```jsx 10 | let x = 5 + 5; 11 | ``` 12 | 13 | We read this source code and generate tokens like this: 14 | 15 | ```jsx 16 | [ 17 | LET, 18 | IDENTIFIER('x'), 19 | EQUAL_SIGN, 20 | INTEGER(5), 21 | PLUS_SIGN, 22 | INTEGER(5), 23 | SEMICOLON, 24 | ]; 25 | ``` 26 | 27 | - `let` will be the `LET` token 28 | - `x` will be the `INDENTIFIER` token with literal `"x"` 29 | - `=` will be the `EQUAL_SIGN` token 30 | - `5` will be the `INTEGER` token with literal `5` 31 | - `+` will be the `PLUS_SIGN` token 32 | - `5` will be the `INTEGER` token with literal `5` again 33 | - `;` will be the `SEMICOLON` token 34 | 35 | Take a look that we don't count "spaces" as tokens. 36 | 37 | ## Defining tokens 38 | 39 | To define tokens, I created a class to represent and to create tokens when we start to analyze our source code. 40 | 41 | ```tsx 42 | export type TokenType = string; 43 | 44 | export class Token { 45 | type: TokenType; 46 | literal: string; 47 | 48 | constructor(type: TokenType, literal: string) { 49 | this.type = type; 50 | this.literal = literal; 51 | } 52 | } 53 | ``` 54 | 55 | The implementation is very simple. It contains the token type and the literal value. A simple example would be a token like the basic `+` operator. We create it like this: 56 | 57 | ```tsx 58 | const plusToken = new Token('PLUS', '+'); 59 | ``` 60 | 61 | It has the type `PLUS` and the literal value `+`. 62 | 63 | Now let's define all the possible token types for the Monkey language. 64 | 65 | ```tsx 66 | export enum Tokens { 67 | ILLEGAL = 'ILLEGAL', 68 | EOF = 'EOF', 69 | IDENT = 'IDENT', 70 | INT = 'INT', 71 | ASSIGN = '=', 72 | PLUS = '+', 73 | COMMA = ',', 74 | SEMICOLON = ';', 75 | LPAREN = '(', 76 | RPAREN = ')', 77 | LBRACE = '{', 78 | RBRACE = '}', 79 | FUNCTION = 'FUNCTION', 80 | LET = 'LET', 81 | } 82 | ``` 83 | 84 | Now we can use the defined tokens instead of a random string. Let's see the `+` example again: 85 | 86 | ```tsx 87 | const plusToken = new Token(Tokens.PLUS, '+'); 88 | ``` 89 | 90 | Nice! 91 | 92 | ## Lexer 93 | 94 | As we saw earlier, the lexer receives the source code and output tokens that have a more accessible source code representation. 95 | 96 | Our lexer will receive the source code input and it'll have a method called `nextToken` to output each token while reading the source code. 97 | 98 | To validate our `Lexer` code, let's add tests to match tokens. 99 | 100 | ```tsx 101 | import { Tokens, Token } from 'src/token'; 102 | import { Lexer } from '../lexer'; 103 | 104 | describe('Lexer', () => { 105 | it('matches each token', () => { 106 | const input = '=+(){},;'; 107 | const tokens: Token[] = [ 108 | { type: Tokens.ASSIGN, literal: '=' }, 109 | { type: Tokens.PLUS, literal: '+' }, 110 | { type: Tokens.LPAREN, literal: '(' }, 111 | { type: Tokens.RPAREN, literal: ')' }, 112 | { type: Tokens.LBRACE, literal: '{' }, 113 | { type: Tokens.RBRACE, literal: '}' }, 114 | { type: Tokens.COMMA, literal: ',' }, 115 | { type: Tokens.SEMICOLON, literal: ';' }, 116 | { type: Tokens.EOF, literal: '' }, 117 | ]; 118 | 119 | const lexer = new Lexer(input); 120 | 121 | tokens.forEach(({ type, literal }) => { 122 | const inputToken = lexer.nextToken(); 123 | expect(inputToken.type).toEqual(type); 124 | expect(inputToken.literal).toEqual(literal); 125 | }); 126 | }); 127 | }); 128 | ``` 129 | 130 | Ok, let's break it down! 131 | 132 | - The `input` is our source code. It'll be transformed into tokens. 133 | - The `tokens` is a list of tokens we expect to match the source code. 134 | - The `Lexer` is a class to be implemented. 135 | - It receives an input as source code. 136 | - And have a `nextToken` method to output the next token. 137 | - For each token in the list of tokens, we want to test if they match the "next token" from our lexer. 138 | 139 | Running our test, we get an error as we didn't implement our Lexer yet. So let's do it! 140 | 141 | To help analyze the source code, we will have 4 different variable helpers: 142 | 143 | - `input`: this is the actual source code. 144 | - `position`: the current position of the current char we are reading. 145 | - `readPosition`: the position we are about to read the next char. 146 | - `char`: the character of the source code we are reading. 147 | 148 | With these four parameters, we can build a simple class representing the `Lexer`. 149 | 150 | ```tsx 151 | export class Lexer { 152 | input: string; 153 | position: number; 154 | readPosition: number; 155 | char: string; 156 | 157 | constructor(input: string) { 158 | this.input = input; 159 | } 160 | } 161 | ``` 162 | 163 | Running our test again, we fix the lexer instantiation. But now we got another issue. When reading each token, we expect that the `Lexer` instance has a `nextToken` method. But in our current lexer implementation, we don't do much. We just let it be instantiated. Let's implement the `nextToken` method. 164 | 165 | To get started, we first need to make sure that the lexer starts with its variables in the correct state. We do this in the constructor. 166 | 167 | ```tsx 168 | INITIAL_POSITION = 0; 169 | EMPTY_CHAR = ''; 170 | 171 | constructor(input: string) { 172 | this.input = input; 173 | this.setUpInitialState(); 174 | } 175 | 176 | private setUpInitialState() { 177 | this.position = this.INITIAL_POSITION; 178 | this.readPosition = this.INITIAL_POSITION; 179 | this.char = this.EMPTY_CHAR; 180 | } 181 | ``` 182 | 183 | The initial state for the positions is the index `0` and the `char` starts with the empty character (`''`) state. 184 | 185 | The `nextToken` algorithm is very simple in this first implementation. We just need to: 186 | 187 | - read the next character 188 | - transform this character into a token 189 | - return this new token 190 | 191 | "read the next character" is basically the idea of updating the current state of the `position`, the `readPosition`, and the `char` variables. 192 | 193 | ```tsx 194 | private readChar() { 195 | if (this.readPosition >= this.input.length) { 196 | this.char = ''; 197 | } else { 198 | this.char = this.input[this.readPosition]; 199 | } 200 | 201 | this.position = this.readPosition; 202 | this.readPosition += 1; 203 | } 204 | ``` 205 | 206 | We start verifying the `readPosition` to make sure that we didn't finish reading the entire source code. If we finish reading the source code, we just update the `char` with its initial state (empty string). 207 | 208 | To get the next character, we just access the input with the next position index and update the `char`. 209 | 210 | After that, we always need to update the indices: 211 | 212 | - `position` becomes the `readPosition` 213 | - `readPosition` increments by one 214 | 215 | Now that we read the next character, we can generate the token based on this new current state. Here it's very simple. We just need to map the current `char` to its own `Token`. We build this with a simple switch case. 216 | 217 | ```tsx 218 | private getToken(): Token { 219 | switch (this.char) { 220 | case '=': 221 | return new Token(Tokens.ASSIGN, '='); 222 | case ';': 223 | return new Token(Tokens.SEMICOLON, ';'); 224 | case '(': 225 | return new Token(Tokens.LPAREN, '('); 226 | case ')': 227 | return new Token(Tokens.RPAREN, ')'); 228 | case ',': 229 | return new Token(Tokens.COMMA, ','); 230 | case '+': 231 | return new Token(Tokens.PLUS, '+'); 232 | case '{': 233 | return new Token(Tokens.LBRACE, '{'); 234 | case '}': 235 | return new Token(Tokens.RBRACE, '}'); 236 | case '': 237 | return new Token(Tokens.EOF, ''); 238 | } 239 | } 240 | ``` 241 | 242 | So let's get everything together now. We need to set up the lexer with the appropriate state and then start reading the source code. The constructor looks like this now: 243 | 244 | ```tsx 245 | constructor(input: string) { 246 | this.input = input; 247 | this.setUpInitialState(); 248 | this.readChar(); 249 | } 250 | ``` 251 | 252 | and the `nextToken` looks like this: 253 | 254 | ```tsx 255 | nextToken(): Token { 256 | const token = this.getToken(); 257 | this.readChar(); 258 | return token; 259 | } 260 | ``` 261 | 262 | As we read the next character in the constructor of the `Lexer`, we can start by getting the token, read the next character and return the created token. 263 | 264 | Running our test again, we fixed all the issues and it is passing now. 265 | 266 | ## Final words & Resources 267 | 268 | I'm very happy to share with you the first post about my journey learning compilers and studying programming language theory. 269 | 270 | This is the first part of the Lexical Analysis posts and part of the [Building an Interpreter series](https://leandrotk.github.io/series/building-an-interpreter/). 271 | 272 | These are the resources I'm using to learn more about this field: 273 | 274 | - [Crafting an Interpreter](https://github.com/imteekay/crafting-an-interpreter): the open source project of the compiler for the TypeScript version of the Monkey programming language. 275 | - [Programming Language Theory](https://github.com/leandrotk/programming-language-theory): a bunch of resources about my studies on Programming Language Theory & Applied PLT. 276 | - [Writing an Interpreter in Go](https://www.goodreads.com/book/show/32681092-writing-an-interpreter-in-go): the book I'm reading to learn and implement the Monkey compiler. 277 | -------------------------------------------------------------------------------- /series/lexical-analysis-part-2.md: -------------------------------------------------------------------------------- 1 | # Lexical Analysis - Part 2 2 | 3 | This post is part of a series called [Building an Interpreter](https://leandrotk.github.io/series/building-an-interpreter/index.html). The [first part of the Lexical Analysis](https://leandrotk.github.io/series/building-an-interpreter/building-an-interpreter-lexical-analysis-part-1.html) post illustrated a basic lexer creating tokens from a simple source code. 4 | 5 | In this post we'll extend the tests and improve the `Lexer` to work with new tokens. The source code was this basic one-liner `"=+(){},;"`. But now we want a more complex source code: 6 | 7 | ```tsx 8 | const input = ` 9 | let five = 5; 10 | let ten = 10; 11 | 12 | let add = fn(x, y) { 13 | x + y; 14 | }; 15 | 16 | let result = add(five, ten); 17 | `; 18 | ``` 19 | 20 | With a new source code, we need more tokens to represent it. These are the tokens that we need to make the source code matches: 21 | 22 | ```tsx 23 | const tokens: Token[] = [ 24 | { type: Tokens.LET, literal: 'let' }, 25 | { type: Tokens.IDENT, literal: 'five' }, 26 | { type: Tokens.ASSIGN, literal: '=' }, 27 | { type: Tokens.INT, literal: '5' }, 28 | { type: Tokens.SEMICOLON, literal: ';' }, 29 | { type: Tokens.LET, literal: 'let' }, 30 | { type: Tokens.IDENT, literal: 'ten' }, 31 | { type: Tokens.ASSIGN, literal: '=' }, 32 | { type: Tokens.INT, literal: '10' }, 33 | { type: Tokens.SEMICOLON, literal: ';' }, 34 | { type: Tokens.LET, literal: 'let' }, 35 | { type: Tokens.IDENT, literal: 'add' }, 36 | { type: Tokens.ASSIGN, literal: '=' }, 37 | { type: Tokens.FUNCTION, literal: 'fn' }, 38 | { type: Tokens.LPAREN, literal: '(' }, 39 | { type: Tokens.IDENT, literal: 'x' }, 40 | { type: Tokens.COMMA, literal: ',' }, 41 | { type: Tokens.IDENT, literal: 'y' }, 42 | { type: Tokens.RPAREN, literal: ')' }, 43 | { type: Tokens.LBRACE, literal: '{' }, 44 | { type: Tokens.IDENT, literal: 'x' }, 45 | { type: Tokens.PLUS, literal: '+' }, 46 | { type: Tokens.IDENT, literal: 'y' }, 47 | { type: Tokens.SEMICOLON, literal: ';' }, 48 | { type: Tokens.RBRACE, literal: '}' }, 49 | { type: Tokens.SEMICOLON, literal: ';' }, 50 | { type: Tokens.LET, literal: 'let' }, 51 | { type: Tokens.IDENT, literal: 'result' }, 52 | { type: Tokens.ASSIGN, literal: '=' }, 53 | { type: Tokens.IDENT, literal: 'add' }, 54 | { type: Tokens.LPAREN, literal: '(' }, 55 | { type: Tokens.IDENT, literal: 'five' }, 56 | { type: Tokens.COMMA, literal: ',' }, 57 | { type: Tokens.IDENT, literal: 'ten' }, 58 | { type: Tokens.RPAREN, literal: ')' }, 59 | { type: Tokens.SEMICOLON, literal: ';' }, 60 | { type: Tokens.EOF, literal: '' }, 61 | ]; 62 | ``` 63 | 64 | The test keeps the same, only the data changes. 65 | 66 | ```tsx 67 | const lexer = new Lexer(input); 68 | 69 | tokens.forEach(({ type, literal }) => { 70 | const inputToken = lexer.nextToken(); 71 | 72 | expect(inputToken.type).toEqual(type); 73 | expect(inputToken.literal).toEqual(literal); 74 | }); 75 | ``` 76 | 77 | Running this test, we start getting new errors related to the new tokens that don't match with the next generated token by our lexer. 78 | 79 | Also, the new tokens are a bit different now. They are not a "single character" token, they are a bit more complex and should be handled in a different way. 80 | 81 | The simplest example is the integer tokens. In the test's source code, we have integer `5` (single character), but we also have integer `10` (multiple characters). 82 | 83 | As they can be multiple characters tokens, we'll add the default case in our `Lexer`'s switch case. Starting with integers, we need to make sure that the current character is a digit, read the number to get the whole token literal, in this case, the whole integer. As we know that it's an integer and we have the integer value, we just create a new token and return it. It looks like this: 84 | 85 | ```tsx 86 | if (this.isDigit(this.char)) { 87 | const tokenLiteral = this.readNumber(); 88 | return new Token(Tokens.INT, tokenLiteral); 89 | } 90 | ``` 91 | 92 | Two parts are missing: 93 | 94 | - `isDigit`: verifies that a given character is a digit. 95 | - `readNumber`: read the whole number, independently if it's a single digit number or bigger. 96 | 97 | Lets start with the easier one: `isDigit`. To simplify the idea of a digit, we'll just do a verification if the character is between `'0'` and `'9'`. 98 | 99 | ```tsx 100 | private isDigit(char: string) { 101 | return '0' <= char && char <= '9'; 102 | } 103 | ``` 104 | 105 | Now about the `readNumber`. The algorithm would be: 106 | 107 | - get the initial position of the number 108 | - read the next character while it's still a digit 109 | - now we have the initial position and the last position 110 | - return the slice of the source code: the whole number 111 | 112 | ```tsx 113 | private readNumber() { 114 | const initialIntPosition = this.position; 115 | 116 | while (this.isDigit(this.char)) { 117 | this.readChar(); 118 | } 119 | 120 | return this.input.substring(initialIntPosition, this.position); 121 | } 122 | ``` 123 | 124 | Reading the next character, we update the current state of the main variables (`position`, `char`, and `readPosition`). 125 | 126 | We use the `substring` string's method to the source code's slice that represents the whole number. 127 | 128 | This is a very simplistic way to handle numbers as we are just handling integers but not float numbers. 129 | 130 | Running the tests again, we don't have the integer token problem anymore. But we still have work to do and more tokens to build. 131 | 132 | Now we start to generate the other tokens: identifiers and keywords. The main difference between identifiers and keywords is that keywords are part of the language "grammar", the language's syntax. In the test's source code, we saw keywords like `fn` and `let` for example. Identifiers, on the other hand, are not part of the language's syntax, they are user-defined identifiers. 133 | 134 | To first identify that the next token is an identifier or a keyword, we need to verify if the current character is a letter, read the next characters until it is not a letter anymore, and decides if the token is an identifier or a keyword looking at its value. 135 | 136 | We add this code to the default part of the switch case as we did for the number tokens. 137 | 138 | ```tsx 139 | if (this.isLetter(this.char)) { 140 | const tokenLiteral = this.readIdentifier(); 141 | const tokenType = lookupIdent(tokenLiteral); 142 | return new Token(tokenType, tokenLiteral); 143 | } 144 | ``` 145 | 146 | Let's break it down: 147 | 148 | - `isLetter`: just a method to verify if the current character is a letter. 149 | - `readIdentifier`: reads the characters until it's not part of the identifier/keyword anymore and return it. 150 | - `lookupIdent`: returns the token type (`FUNCTION`, `LET`, or `IDENT`) based on the token literal we got from the `readIdentifier`. 151 | - And finally it returns the new generated token. 152 | 153 | The `isLetter` is pretty basic: 154 | 155 | ```tsx 156 | private isLetter(char: string) { 157 | return ( 158 | ('a' <= char && char <= 'z') || 159 | ('A' <= char && char <= 'Z') || 160 | char === '_' 161 | ); 162 | } 163 | ``` 164 | 165 | The Monkey programming language accepts `_` as part of the identifiers. It's very similar to Ruby and Python. And the main part of this verification is the idea that the `char` should be between `'a'` and `'z'` (lower case characters) or between `'A'` and `'Z'` (upper case characters). 166 | 167 | The `readIdentifier` is pretty similar to the `readNumber` that we implemented earlier. 168 | 169 | ```tsx 170 | private readIdentifier() { 171 | const initialCharPosition = this.position; 172 | 173 | while (this.isLetter(this.char)) { 174 | this.readChar(); 175 | } 176 | 177 | return this.input.substring(initialCharPosition, this.position); 178 | } 179 | ``` 180 | 181 | - We get the initial char position 182 | - Read the next char while it is still a letter 183 | - With the initial position and the last position of the identifier, we can get the slice of the source code and return it. 184 | 185 | And finally the `lookupIdent` that we decided to implement it in the `Token` module because it belongs to that domain. 186 | 187 | ```tsx 188 | interface KeywordsType { 189 | [key: string]: string; 190 | } 191 | 192 | const Keywords: KeywordsType = { 193 | fn: Tokens.FUNCTION, 194 | let: Tokens.LET, 195 | }; 196 | 197 | export function lookupIdent(ident: string) { 198 | return ident in Keywords ? Keywords[ident] : Tokens.IDENT; 199 | } 200 | ``` 201 | 202 | It receives the identifier string, verify if it is in the `Keywords` object, if it's, get the token type, otherwise, just return the `IDENT` as the token type. 203 | 204 | Running the tests again, we see more tokens passing the test. But some still fail. It turns out that we are not handling the white spaces between characters. Let's handle that issue! 205 | 206 | ```tsx 207 | private skipWhitespace() { 208 | while ( 209 | this.char == ' ' || 210 | this.char == '\t' || 211 | this.char == '\n' || 212 | this.char == '\r' 213 | ) { 214 | this.readChar(); 215 | } 216 | } 217 | ``` 218 | 219 | To skip the white spaces, we need to keep reading the next until it's not a white space anymore. 220 | 221 | - `' '`: white space 222 | - `'\t'`: add tab 223 | - `'\n'`: new line 224 | - `'\r'`: return 225 | 226 | Calling `readChar` we update the state of the `position` and `char` variables. With this new implementation, we just need to add the `skipWhitespace` to the `getToken` method before generating any token: 227 | 228 | ```tsx 229 | private getToken(): Token { 230 | this.skipWhitespace(); 231 | ``` 232 | 233 | The only adjustment we need to do now is to update the `nextToken`. It was like this before: 234 | 235 | ```tsx 236 | nextToken(): Token { 237 | const token = this.getToken(); 238 | this.readChar(); 239 | return token; 240 | } 241 | ``` 242 | 243 | But as we read the next char for identifiers, keywords, and integers, we need to remove this line: 244 | 245 | ```tsx 246 | nextToken(): Token { 247 | const token = this.getToken(); 248 | return token; 249 | } 250 | ``` 251 | 252 | ...and add only for the other tokens. 253 | 254 | ```tsx 255 | case '=': 256 | this.readChar(); 257 | return new Token(Tokens.ASSIGN, '='); 258 | ``` 259 | 260 | But as we need to make this same instruction for almost all tokens, I created a private method to handle that. 261 | 262 | ```tsx 263 | private buildToken(type: TokenType, literal: string) { 264 | this.readChar(); 265 | return new Token(type, literal); 266 | } 267 | ``` 268 | 269 | The use is very straightforward. 270 | 271 | ```tsx 272 | switch (this.char) { 273 | case '=': 274 | return this.buildToken(Tokens.ASSIGN, '='); 275 | case ';': 276 | return this.buildToken(Tokens.SEMICOLON, ';'); 277 | case '(': 278 | return this.buildToken(Tokens.LPAREN, '('); 279 | case ')': 280 | return this.buildToken(Tokens.RPAREN, ')'); 281 | case ',': 282 | return this.buildToken(Tokens.COMMA, ','); 283 | case '+': 284 | return this.buildToken(Tokens.PLUS, '+'); 285 | case '{': 286 | return this.buildToken(Tokens.LBRACE, '{'); 287 | case '}': 288 | return this.buildToken(Tokens.RBRACE, '}'); 289 | case '': 290 | return this.buildToken(Tokens.EOF, ''); 291 | ``` 292 | 293 | Now we have the tests passing and an improved lexer. Our language is taking shape. The source code is a bit more complex and all the tokens were generated. That's pretty nice! 294 | 295 | ## **Final words & Resources** 296 | 297 | If you didn't have the opportunity, take a look at the [first part of the Lexical Analysis](https://leandrotk.github.io/series/building-an-interpreter/building-an-interpreter-lexical-analysis-part-1.html). This is the second post about my journey learning compilers and studying programming language theory. And part of the [Building an Interpreter series](https://leandrotk.github.io/series/building-an-interpreter/). 298 | 299 | These are the resources I'm using to learn more about this field: 300 | 301 | - [Crafting an Interpreter](https://github.com/imteekay/crafting-an-interpreter): the open source project of the compiler for the TypeScript version of the Monkey programming language. 302 | - [Programming Language Theory](https://github.com/leandrotk/programming-language-theory): a bunch of resources about my studies on Programming Language Theory & Applied PLT. 303 | - [Writing an Interpreter in Go](https://www.goodreads.com/book/show/32681092-writing-an-interpreter-in-go): the book I'm reading to learn and implement the Monkey compiler. 304 | -------------------------------------------------------------------------------- /src/parser/parser.ts: -------------------------------------------------------------------------------- 1 | import { Lexer } from 'lexer'; 2 | import { Token, Tokens, TokenType } from 'token'; 3 | import { 4 | Program, 5 | LetStatement, 6 | Identifier, 7 | IntegerLiteral, 8 | ReturnStatement, 9 | ExpressionStatement, 10 | Expression, 11 | InfixExpression, 12 | PrefixExpression, 13 | BooleanExpression, 14 | BlockStatement, 15 | IfExpression, 16 | FunctionLiteral, 17 | CallExpression, 18 | StringLiteral, 19 | ArrayLiteral, 20 | IndexExpression, 21 | HashLiteral, 22 | } from 'ast'; 23 | 24 | export type ParserError = string; 25 | 26 | type prefixParseFn = () => Expression | null; 27 | type infixParseFn = (expression: Expression) => Expression | null; 28 | 29 | enum Precedence { 30 | LOWEST = 1, 31 | EQUALS, // == 32 | LESSGREATER, // > or < 33 | SUM, // + 34 | PRODUCT, // * 35 | PREFIX, // -X or !X 36 | CALL, // myFunction(X) 37 | INDEX, // [][1] 38 | } 39 | 40 | const precedences = new Map([ 41 | [Tokens.EQUAL, Precedence.EQUALS], 42 | [Tokens.NOT_EQUAL, Precedence.EQUALS], 43 | [Tokens.LESS_THAN, Precedence.LESSGREATER], 44 | [Tokens.GREATER_THAN, Precedence.LESSGREATER], 45 | [Tokens.PLUS, Precedence.SUM], 46 | [Tokens.MINUS, Precedence.SUM], 47 | [Tokens.SLASH, Precedence.PRODUCT], 48 | [Tokens.ASTERISK, Precedence.PRODUCT], 49 | [Tokens.LPAREN, Precedence.CALL], 50 | [Tokens.LBRACKET, Precedence.INDEX], 51 | ]); 52 | 53 | export class Parser { 54 | private lexer: Lexer; 55 | private currentToken: Token; 56 | private peekToken: Token; 57 | private errors: ParserError[]; 58 | private prefixParseFns: { [key: TokenType]: prefixParseFn } = {}; 59 | private infixParseFns: { [key: TokenType]: infixParseFn } = {}; 60 | 61 | constructor(lexer: Lexer) { 62 | this.lexer = lexer; 63 | this.errors = []; 64 | this.nextToken(); 65 | this.nextToken(); 66 | 67 | // Parsing prefix expressions 68 | this.registerPrefix(Tokens.IDENT, this.parseIdentifier.bind(this)); 69 | this.registerPrefix(Tokens.INT, this.parseIntegerLiteral.bind(this)); 70 | this.registerPrefix(Tokens.BANG, this.parsePrefixExpression.bind(this)); 71 | this.registerPrefix(Tokens.MINUS, this.parsePrefixExpression.bind(this)); 72 | this.registerPrefix(Tokens.TRUE, this.parseBoolean.bind(this)); 73 | this.registerPrefix(Tokens.FALSE, this.parseBoolean.bind(this)); 74 | this.registerPrefix(Tokens.LPAREN, this.parseGroupedExpression.bind(this)); 75 | this.registerPrefix(Tokens.IF, this.parseIfExpression.bind(this)); 76 | this.registerPrefix(Tokens.FUNCTION, this.parseFunctionLiteral.bind(this)); 77 | this.registerPrefix(Tokens.STRING, this.parseStringLiteral.bind(this)); 78 | this.registerPrefix(Tokens.LBRACKET, this.parseArrayLiteral.bind(this)); 79 | this.registerPrefix(Tokens.LBRACE, this.parseHashLiteral.bind(this)); 80 | 81 | // Parsing infix expressions 82 | this.registerInfix(Tokens.PLUS, this.parseInfixExpression.bind(this)); 83 | this.registerInfix(Tokens.MINUS, this.parseInfixExpression.bind(this)); 84 | this.registerInfix(Tokens.SLASH, this.parseInfixExpression.bind(this)); 85 | this.registerInfix(Tokens.ASTERISK, this.parseInfixExpression.bind(this)); 86 | this.registerInfix(Tokens.EQUAL, this.parseInfixExpression.bind(this)); 87 | this.registerInfix(Tokens.NOT_EQUAL, this.parseInfixExpression.bind(this)); 88 | this.registerInfix(Tokens.LESS_THAN, this.parseInfixExpression.bind(this)); 89 | this.registerInfix(Tokens.LPAREN, this.parseCallExpression.bind(this)); 90 | this.registerInfix(Tokens.LBRACKET, this.parseIndexExpression.bind(this)); 91 | this.registerInfix( 92 | Tokens.GREATER_THAN, 93 | this.parseInfixExpression.bind(this) 94 | ); 95 | } 96 | 97 | nextToken() { 98 | this.currentToken = this.peekToken; 99 | // peekToken is always pointing to the next token 100 | this.peekToken = this.lexer.nextToken(); 101 | } 102 | 103 | parseProgram() { 104 | const program = new Program(); 105 | 106 | while (this.currentToken.type !== Tokens.EOF) { 107 | const statement = this.parseStatement(); 108 | 109 | if (statement) { 110 | program.statements.push(statement); 111 | } 112 | 113 | this.nextToken(); 114 | } 115 | 116 | return program; 117 | } 118 | 119 | getErrors() { 120 | return this.errors; 121 | } 122 | 123 | /** === Parsing Statements === */ 124 | private parseStatement() { 125 | switch (this.currentToken.type) { 126 | case Tokens.LET: 127 | return this.parseLetStatement(); 128 | case Tokens.RETURN: 129 | return this.parseReturnStatement(); 130 | default: 131 | return this.parseExpressionStatement(); 132 | } 133 | } 134 | 135 | private parseLetStatement() { 136 | const statement = new LetStatement(this.currentToken); 137 | 138 | // We expect that after the let statement, we have the identifier 139 | if (!this.expectPeek(Tokens.IDENT)) { 140 | return null; 141 | } 142 | 143 | const identifier = new Identifier( 144 | this.currentToken, 145 | this.currentToken.literal 146 | ); 147 | 148 | statement.name = identifier; 149 | 150 | if (!this.expectPeek(Tokens.ASSIGN)) { 151 | return null; 152 | } 153 | 154 | this.nextToken(); 155 | 156 | const valueExpression = this.parseExpression(Precedence.LOWEST); 157 | 158 | if (valueExpression) { 159 | statement.value = valueExpression; 160 | } 161 | 162 | while (!this.currentTokenIs(Tokens.SEMICOLON)) { 163 | this.nextToken(); 164 | } 165 | 166 | return statement; 167 | } 168 | 169 | private parseReturnStatement() { 170 | const statement = new ReturnStatement(this.currentToken); 171 | this.nextToken(); 172 | const returnValue = this.parseExpression(Precedence.LOWEST); 173 | 174 | if (returnValue) { 175 | statement.returnValue = returnValue; 176 | } 177 | 178 | while (!this.currentTokenIs(Tokens.SEMICOLON)) { 179 | this.nextToken(); 180 | } 181 | 182 | return statement; 183 | } 184 | 185 | private parseExpressionStatement() { 186 | const statement = new ExpressionStatement(this.currentToken); 187 | const expression = this.parseExpression(Precedence.LOWEST); 188 | 189 | if (!expression) { 190 | return null; 191 | } 192 | 193 | statement.expression = expression; 194 | 195 | if (this.peekTokenIs(Tokens.SEMICOLON)) { 196 | this.nextToken(); 197 | } 198 | 199 | return statement; 200 | } 201 | /** === Parsing Statements === */ 202 | 203 | /** === Parsing Expressions === */ 204 | private parseExpression(precedence: Precedence) { 205 | const getPrefix = this.prefixParseFns[this.currentToken.type]; 206 | 207 | if (!getPrefix) { 208 | this.noPrefixParseFnError(this.currentToken.type); 209 | return null; 210 | } 211 | 212 | let leftExpression = getPrefix(); 213 | 214 | while ( 215 | !this.peekTokenIs(Tokens.SEMICOLON) && 216 | precedence < this.peekPrecedence() 217 | ) { 218 | const getInfix = this.infixParseFns[this.peekToken.type]; 219 | 220 | if (!getInfix) { 221 | return leftExpression; 222 | } 223 | 224 | this.nextToken(); 225 | 226 | if (leftExpression) { 227 | leftExpression = getInfix(leftExpression); 228 | } 229 | } 230 | 231 | return leftExpression; 232 | } 233 | /** === Parsing Expressions === */ 234 | 235 | /** === Token Handler === */ 236 | private currentTokenIs(token: TokenType) { 237 | return this.currentToken.type === token; 238 | } 239 | 240 | private peekTokenIs(token: TokenType) { 241 | return this.peekToken.type === token; 242 | } 243 | 244 | private expectPeek(token: TokenType) { 245 | if (this.peekTokenIs(token)) { 246 | this.nextToken(); 247 | return true; 248 | } 249 | 250 | this.peekError(token); 251 | return false; 252 | } 253 | /** === Token Handler === */ 254 | 255 | /** === Parsing Functions === */ 256 | private parseIdentifier() { 257 | return new Identifier(this.currentToken, this.currentToken.literal); 258 | } 259 | 260 | private parseIntegerLiteral() { 261 | const value = parseInt(this.currentToken.literal); 262 | 263 | if (isNaN(value)) { 264 | const msg = `could not parse ${this.currentToken.literal} as integer`; 265 | this.errors.push(msg); 266 | return null; 267 | } 268 | 269 | return new IntegerLiteral( 270 | this.currentToken, 271 | parseInt(this.currentToken.literal) 272 | ); 273 | } 274 | 275 | private parseBoolean() { 276 | return new BooleanExpression( 277 | this.currentToken, 278 | this.currentTokenIs(Tokens.TRUE) 279 | ); 280 | } 281 | 282 | private parsePrefixExpression() { 283 | const expression = new PrefixExpression( 284 | this.currentToken, 285 | this.currentToken.literal 286 | ); 287 | 288 | this.nextToken(); 289 | 290 | const rightExpression = this.parseExpression(Precedence.PREFIX); 291 | 292 | if (rightExpression) { 293 | expression.right = rightExpression; 294 | } 295 | 296 | return expression; 297 | } 298 | 299 | private parseInfixExpression(left: Expression) { 300 | const expression = new InfixExpression( 301 | this.currentToken, 302 | this.currentToken.literal, 303 | left 304 | ); 305 | 306 | const precedence = this.currentPrecedence(); 307 | this.nextToken(); 308 | const right = this.parseExpression(precedence); 309 | 310 | if (right) { 311 | expression.right = right; 312 | } 313 | 314 | return expression; 315 | } 316 | 317 | private parseGroupedExpression() { 318 | this.nextToken(); 319 | 320 | const expression = this.parseExpression(Precedence.LOWEST); 321 | 322 | if (!this.expectPeek(Tokens.RPAREN)) { 323 | return null; 324 | } 325 | 326 | return expression; 327 | } 328 | 329 | private parseIfExpression() { 330 | const expression = new IfExpression(this.currentToken); 331 | 332 | if (!this.expectPeek(Tokens.LPAREN)) { 333 | return null; 334 | } 335 | 336 | this.nextToken(); 337 | 338 | const condition = this.parseExpression(Precedence.LOWEST); 339 | 340 | if (condition) { 341 | expression.condition = condition; 342 | } 343 | 344 | if (!this.expectPeek(Tokens.RPAREN)) { 345 | return null; 346 | } 347 | 348 | if (!this.expectPeek(Tokens.LBRACE)) { 349 | return null; 350 | } 351 | 352 | const consequence = this.parseBlockStatement(); 353 | 354 | if (consequence) { 355 | expression.consequence = consequence; 356 | } 357 | 358 | if (this.peekTokenIs(Tokens.ELSE)) { 359 | this.nextToken(); 360 | 361 | if (!this.expectPeek(Tokens.LBRACE)) { 362 | return null; 363 | } 364 | 365 | const alternative = this.parseBlockStatement(); 366 | 367 | if (alternative) { 368 | expression.alternative = alternative; 369 | } 370 | } 371 | 372 | return expression; 373 | } 374 | 375 | private parseBlockStatement() { 376 | const block = new BlockStatement(this.currentToken); 377 | block.statements = []; 378 | 379 | this.nextToken(); 380 | 381 | while ( 382 | !this.currentTokenIs(Tokens.RBRACE) && 383 | !this.currentTokenIs(Tokens.EOF) 384 | ) { 385 | const statement = this.parseStatement(); 386 | 387 | if (statement) { 388 | block.statements.push(statement); 389 | } 390 | 391 | this.nextToken(); 392 | } 393 | 394 | return block; 395 | } 396 | 397 | private parseFunctionLiteral() { 398 | const functionLiteral = new FunctionLiteral(this.currentToken); 399 | 400 | if (!this.expectPeek(Tokens.LPAREN)) { 401 | return null; 402 | } 403 | 404 | const functionParameters = this.parseFunctionParameters(); 405 | 406 | if (functionParameters) { 407 | functionLiteral.parameters = functionParameters; 408 | } 409 | 410 | if (!this.expectPeek(Tokens.LBRACE)) { 411 | return null; 412 | } 413 | 414 | functionLiteral.body = this.parseBlockStatement(); 415 | 416 | return functionLiteral; 417 | } 418 | 419 | private parseStringLiteral() { 420 | return new StringLiteral(this.currentToken, this.currentToken.literal); 421 | } 422 | 423 | private parseArrayLiteral() { 424 | const array = new ArrayLiteral(this.currentToken); 425 | const elements = this.parseExpressionList(Tokens.RBRACKET); 426 | 427 | if (elements) { 428 | array.elements = elements; 429 | } 430 | 431 | return array; 432 | } 433 | 434 | private parseExpressionList(endToken: Tokens) { 435 | const list: Expression[] = []; 436 | 437 | // if function call has no arguments 438 | if (this.peekTokenIs(endToken)) { 439 | this.nextToken(); 440 | return list; 441 | } 442 | 443 | this.nextToken(); 444 | 445 | const listItem = this.parseExpression(Precedence.LOWEST); 446 | 447 | if (listItem) { 448 | list.push(listItem); 449 | } 450 | 451 | while (this.peekTokenIs(Tokens.COMMA)) { 452 | this.nextToken(); 453 | this.nextToken(); 454 | 455 | const listItem = this.parseExpression(Precedence.LOWEST); 456 | 457 | if (listItem) { 458 | list.push(listItem); 459 | } 460 | } 461 | 462 | if (!this.expectPeek(endToken)) { 463 | return null; 464 | } 465 | 466 | return list; 467 | } 468 | 469 | private parseHashLiteral() { 470 | const hash = new HashLiteral(this.currentToken); 471 | 472 | while (!this.peekTokenIs(Tokens.RBRACE)) { 473 | this.nextToken(); 474 | const key = this.parseExpression(Precedence.LOWEST); 475 | 476 | if (!this.expectPeek(Tokens.COLON)) { 477 | return null; 478 | } 479 | 480 | this.nextToken(); 481 | const value = this.parseExpression(Precedence.LOWEST); 482 | 483 | if (key && value) { 484 | hash.pairs.set(key, value); 485 | } 486 | 487 | if (!this.peekTokenIs(Tokens.RBRACE) && !this.expectPeek(Tokens.COMMA)) { 488 | return null; 489 | } 490 | } 491 | 492 | if (!this.expectPeek(Tokens.RBRACE)) { 493 | return null; 494 | } 495 | 496 | return hash; 497 | } 498 | 499 | private parseFunctionParameters() { 500 | const identifiers = [] as Identifier[]; 501 | 502 | if (this.peekTokenIs(Tokens.RPAREN)) { 503 | this.nextToken(); 504 | return identifiers; 505 | } 506 | 507 | this.nextToken(); 508 | 509 | const identifier = new Identifier( 510 | this.currentToken, 511 | this.currentToken.literal 512 | ); 513 | 514 | identifiers.push(identifier); 515 | 516 | while (this.peekTokenIs(Tokens.COMMA)) { 517 | this.nextToken(); 518 | this.nextToken(); 519 | 520 | const identifier = new Identifier( 521 | this.currentToken, 522 | this.currentToken.literal 523 | ); 524 | 525 | identifiers.push(identifier); 526 | } 527 | 528 | if (!this.expectPeek(Tokens.RPAREN)) { 529 | return null; 530 | } 531 | 532 | return identifiers; 533 | } 534 | 535 | private parseCallExpression(fn: Expression) { 536 | const callExpression = new CallExpression(this.currentToken, fn); 537 | const args = this.parseExpressionList(Tokens.RPAREN); 538 | 539 | if (args) { 540 | callExpression.arguments = args; 541 | } 542 | 543 | return callExpression; 544 | } 545 | 546 | private parseIndexExpression(left: Expression) { 547 | const indexExpression = new IndexExpression(this.currentToken, left); 548 | 549 | this.nextToken(); 550 | const index = this.parseExpression(Precedence.LOWEST); 551 | 552 | if (index) { 553 | indexExpression.index = index; 554 | } 555 | 556 | if (!this.expectPeek(Tokens.RBRACKET)) { 557 | return null; 558 | } 559 | 560 | return indexExpression; 561 | } 562 | /** === Parsing Functions === */ 563 | 564 | /** === Registering parsing functions === */ 565 | private registerPrefix(tokenType: TokenType, fn: prefixParseFn) { 566 | this.prefixParseFns[tokenType] = fn; 567 | } 568 | 569 | private registerInfix(tokenType: TokenType, fn: infixParseFn) { 570 | this.infixParseFns[tokenType] = fn; 571 | } 572 | /** === Registering parsing functions === */ 573 | 574 | /** === Precedence Handlers === */ 575 | private currentPrecedence() { 576 | return precedences.has(this.currentToken.type) 577 | ? (precedences.get(this.currentToken.type) as Precedence) 578 | : Precedence.LOWEST; 579 | } 580 | 581 | private peekPrecedence() { 582 | return precedences.has(this.peekToken.type) 583 | ? (precedences.get(this.peekToken.type) as Precedence) 584 | : Precedence.LOWEST; 585 | } 586 | /** === Precedence Handlers === */ 587 | 588 | /** === Error Handling === */ 589 | private peekError(token: TokenType) { 590 | const msg = `expected next token to be ${token}, got ${this.peekToken.type} instead`; 591 | this.errors.push(msg); 592 | } 593 | 594 | private noPrefixParseFnError(tokenType: TokenType) { 595 | const msg = `no prefix parse function for ${tokenType} found`; 596 | this.errors.push(msg); 597 | } 598 | /** === Error Handling === */ 599 | } 600 | -------------------------------------------------------------------------------- /src/evaluator/tests/evaluator.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, expect, it } from 'vitest'; 2 | import { Lexer } from 'lexer'; 3 | import { Parser } from 'parser'; 4 | import { Evaluator } from 'evaluator'; 5 | import { 6 | ArrayObject, 7 | BooleanLiteral, 8 | Environment, 9 | ErrorObject, 10 | FunctionObject, 11 | Hash, 12 | Integer, 13 | Null, 14 | StringObject, 15 | } from 'object'; 16 | 17 | function evaluate(input: string) { 18 | const lexer = new Lexer(input); 19 | const parser = new Parser(lexer); 20 | const evaluator = new Evaluator(); 21 | const env = new Environment(); 22 | const program = parser.parseProgram(); 23 | return evaluator.evaluate(program, env); 24 | } 25 | 26 | describe('Evaluator', () => { 27 | it('evaluates integer literals', () => { 28 | const tests = [ 29 | { input: '5', expected: 5 }, 30 | { input: '10', expected: 10 }, 31 | ]; 32 | 33 | for (const { input, expected } of tests) { 34 | const evaluatedProgram = evaluate(input); 35 | expect(evaluatedProgram).toEqual(new Integer(expected)); 36 | } 37 | }); 38 | 39 | it('evaluates boolean literals', () => { 40 | const tests = [ 41 | { input: 'true', expected: true }, 42 | { input: 'false', expected: false }, 43 | ]; 44 | 45 | for (const { input, expected } of tests) { 46 | const evaluatedProgram = evaluate(input); 47 | expect(evaluatedProgram).toEqual(new BooleanLiteral(expected)); 48 | } 49 | }); 50 | 51 | it('evaluates string literals', () => { 52 | const tests = [{ input: '"Hello World"', expected: 'Hello World' }]; 53 | 54 | for (const { input, expected } of tests) { 55 | const evaluatedProgram = evaluate(input); 56 | expect(evaluatedProgram).toEqual(new StringObject(expected)); 57 | } 58 | }); 59 | 60 | it('evaluates string concatenation', () => { 61 | const tests = [ 62 | { input: '"Hello" + " " + "World"', expected: 'Hello World' }, 63 | ]; 64 | 65 | for (const { input, expected } of tests) { 66 | const evaluatedProgram = evaluate(input); 67 | expect(evaluatedProgram).toEqual(new StringObject(expected)); 68 | } 69 | }); 70 | 71 | describe('evaluates operator expressions', () => { 72 | it('evaluates bang operators', () => { 73 | const tests = [ 74 | { input: '!true', expected: false }, 75 | { input: '!false', expected: true }, 76 | { input: '!!true', expected: true }, 77 | { input: '!!false', expected: false }, 78 | { input: '!10', expected: false }, 79 | { input: '!!10', expected: true }, 80 | ]; 81 | 82 | for (const { input, expected } of tests) { 83 | const evaluatedProgram = evaluate(input); 84 | expect(evaluatedProgram).toEqual(new BooleanLiteral(expected)); 85 | } 86 | }); 87 | 88 | it('evaluates bang operators', () => { 89 | const tests = [ 90 | { input: '-10', expected: -10 }, 91 | { input: '-1', expected: -1 }, 92 | ]; 93 | 94 | for (const { input, expected } of tests) { 95 | const evaluatedProgram = evaluate(input); 96 | expect(evaluatedProgram).toEqual(new Integer(expected)); 97 | } 98 | }); 99 | 100 | it('evaluates integer infix operators', () => { 101 | const tests = [ 102 | { input: '1 + 1', expected: 2 }, 103 | { input: '5 + 5 + 5 + 5 - 10', expected: 10 }, 104 | { input: '2 * 2 * 2 * 2 * 2', expected: 32 }, 105 | { input: '-50 + 100 + -50', expected: 0 }, 106 | { input: '5 * 2 + 10', expected: 20 }, 107 | { input: '5 + 2 * 10', expected: 25 }, 108 | { input: '20 + 2 * -10', expected: 0 }, 109 | { input: '50 / 2 * 2 + 10', expected: 60 }, 110 | { input: '2 * (5 + 10)', expected: 30 }, 111 | { input: '3 * 3 * 3 + 10', expected: 37 }, 112 | { input: '3 * (3 * 3) + 10', expected: 37 }, 113 | { input: '(5 + 10 * 2 + 15 / 3) * 2 + -10', expected: 50 }, 114 | ]; 115 | 116 | for (const { input, expected } of tests) { 117 | const evaluatedProgram = evaluate(input); 118 | expect(evaluatedProgram).toEqual(new Integer(expected)); 119 | } 120 | }); 121 | 122 | it('evaluates integer infix operators', () => { 123 | const tests = [ 124 | { input: 'true', expected: true }, 125 | { input: 'false', expected: false }, 126 | { input: '1 < 2', expected: true }, 127 | { input: '1 > 2', expected: false }, 128 | { input: '1 < 1', expected: false }, 129 | { input: '1 > 1', expected: false }, 130 | { input: '1 == 1', expected: true }, 131 | { input: '1 != 1', expected: false }, 132 | { input: '1 == 2', expected: false }, 133 | { input: '1 != 2', expected: true }, 134 | ]; 135 | 136 | for (const { input, expected } of tests) { 137 | const evaluatedProgram = evaluate(input); 138 | expect(evaluatedProgram).toEqual(new BooleanLiteral(expected)); 139 | } 140 | }); 141 | 142 | it('evaluates boolean infix operators', () => { 143 | const tests = [ 144 | { input: 'true == true', expected: true }, 145 | { input: 'false == false', expected: true }, 146 | { input: 'true == false', expected: false }, 147 | { input: 'true != false', expected: true }, 148 | { input: 'false != true', expected: true }, 149 | { input: '(1 < 2) == true', expected: true }, 150 | { input: '(1 < 2) == false', expected: false }, 151 | { input: '(1 > 2) == true', expected: false }, 152 | { input: '(1 > 2) == false', expected: true }, 153 | ]; 154 | 155 | for (const { input, expected } of tests) { 156 | const evaluatedProgram = evaluate(input); 157 | expect(evaluatedProgram).toEqual(new BooleanLiteral(expected)); 158 | } 159 | }); 160 | 161 | it('evaluates if else expressions', () => { 162 | const tests = [ 163 | { input: 'if (true) { 10 }', expected: 10 }, 164 | { input: 'if (false) { 10 }', expected: null }, 165 | { input: 'if (1) { 10 }', expected: 10 }, 166 | { input: 'if (1 < 2) { 10 }', expected: 10 }, 167 | { input: 'if (1 > 2) { 10 }', expected: null }, 168 | { input: 'if (1 > 2) { 10 } else { 20 }', expected: 20 }, 169 | { input: 'if (1 < 2) { 10 } else { 20 }', expected: 10 }, 170 | ]; 171 | 172 | for (const { input, expected } of tests) { 173 | const evaluatedProgram = evaluate(input); 174 | 175 | if (typeof expected === 'number') { 176 | expect(evaluatedProgram).toEqual(new Integer(expected)); 177 | } else { 178 | expect(evaluatedProgram).toEqual(new Null()); 179 | } 180 | } 181 | }); 182 | 183 | it('evaluates if else expressions', () => { 184 | const tests = [ 185 | { input: 'return 10;', expected: 10 }, 186 | { input: 'return 10; 9;', expected: 10 }, 187 | { input: 'return 2 * 5; 9;', expected: 10 }, 188 | { input: '9; return 2 * 5; 9;', expected: 10 }, 189 | { 190 | input: `if (10 > 1) { 191 | if (10 > 1) { 192 | return 10; 193 | } 194 | return 1; }`, 195 | expected: 10, 196 | }, 197 | ]; 198 | 199 | for (const { input, expected } of tests) { 200 | const evaluatedProgram = evaluate(input); 201 | expect(evaluatedProgram).toEqual(new Integer(expected)); 202 | } 203 | }); 204 | 205 | it('handles errors', () => { 206 | const tests = [ 207 | { 208 | input: '5 + true;', 209 | expected: 'type mismatch: INTEGER + BOOLEAN', 210 | }, 211 | { 212 | input: '5 + true; 5;', 213 | expected: 'type mismatch: INTEGER + BOOLEAN', 214 | }, 215 | { 216 | input: '-true', 217 | expected: 'unknown operator: -BOOLEAN', 218 | }, 219 | { 220 | input: 'true + false;', 221 | expected: 'unknown operator: BOOLEAN + BOOLEAN', 222 | }, 223 | { 224 | input: 'true + false;', 225 | expected: 'unknown operator: BOOLEAN + BOOLEAN', 226 | }, 227 | { 228 | input: 'if (10 > 1) { true + false; }', 229 | expected: 'unknown operator: BOOLEAN + BOOLEAN', 230 | }, 231 | { 232 | input: `if (10 > 1) { 233 | if (10 > 1) { 234 | return true + false; 235 | } 236 | return 1; }`, 237 | expected: 'unknown operator: BOOLEAN + BOOLEAN', 238 | }, 239 | { 240 | input: '5; true + false; 5', 241 | expected: 'unknown operator: BOOLEAN + BOOLEAN', 242 | }, 243 | { 244 | input: 'foo', 245 | expected: 'identifier not found: foo', 246 | }, 247 | { 248 | input: '"Hello" - "World"', 249 | expected: 'unknown operator: STRING - STRING', 250 | }, 251 | { 252 | input: '{"name": "Monkey"}[fn(x) { x }];', 253 | expected: 'unusable as hash key: FUNCTION', 254 | }, 255 | ]; 256 | 257 | for (const { input, expected } of tests) { 258 | const evaluatedProgram = evaluate(input); 259 | expect(evaluatedProgram).toEqual(new ErrorObject(expected)); 260 | } 261 | }); 262 | }); 263 | 264 | it('evaluates let statements', () => { 265 | const tests = [ 266 | { input: 'let a = 5; a;', expected: 5 }, 267 | { input: 'let a = 5 * 5; a;', expected: 25 }, 268 | { input: 'let a = 5; let b = a; b;', expected: 5 }, 269 | { 270 | input: 'let a = 5; let b = a; let c = a + b + 5; c;', 271 | expected: 15, 272 | }, 273 | ]; 274 | 275 | for (const { input, expected } of tests) { 276 | const evaluatedProgram = evaluate(input); 277 | expect(evaluatedProgram).toEqual(new Integer(expected)); 278 | } 279 | }); 280 | 281 | describe('evaluates functions', () => { 282 | it('evaluates to function object', () => { 283 | const input = 'fn(n) { n + 1 };'; 284 | const evaluatedProgram = evaluate(input); 285 | 286 | expect((evaluatedProgram as FunctionObject).parameters.length).toEqual(1); 287 | expect( 288 | (evaluatedProgram as FunctionObject).parameters[0].string() 289 | ).toEqual('n'); 290 | 291 | expect((evaluatedProgram as FunctionObject).body.string()).toEqual( 292 | '(n + 1)' 293 | ); 294 | }); 295 | 296 | it('evaluates function application', () => { 297 | const tests = [ 298 | { input: 'let identity = fn(x) { x; }; identity(5);', expected: 5 }, 299 | { 300 | input: 'let identity = fn(x) { return x; }; identity(5);', 301 | expected: 5, 302 | }, 303 | { input: 'let double = fn(x) { x * 2; }; double(5);', expected: 10 }, 304 | { input: 'let add = fn(x, y) { x + y; }; add(5, 5);', expected: 10 }, 305 | { 306 | input: 'let add = fn(x, y) { x + y; }; add(5 + 5, add(5, 5));', 307 | expected: 20, 308 | }, 309 | { input: 'fn(x) { x; }(5)', expected: 5 }, 310 | { input: 'fn() { 5; }()', expected: 5 }, 311 | ]; 312 | 313 | for (const { input, expected } of tests) { 314 | const evaluatedProgram = evaluate(input); 315 | expect(evaluatedProgram).toEqual(new Integer(expected)); 316 | } 317 | }); 318 | 319 | it('evaluates function application to undefined', () => { 320 | const evaluatedProgram = evaluate('fn() {}()'); 321 | expect(evaluatedProgram).toEqual(undefined); 322 | }); 323 | 324 | it('evaluates function application with closures', () => { 325 | const evaluatedProgram = evaluate(` 326 | let newAdder = fn(x) { 327 | fn(y) { x + y }; 328 | }; 329 | let addTwo = newAdder(2); 330 | addTwo(2); 331 | `); 332 | 333 | expect(evaluatedProgram).toEqual(new Integer(4)); 334 | }); 335 | }); 336 | 337 | describe('evaluates builtin functions', () => { 338 | it('evaluates function application of builtin functions', () => { 339 | const tests = [ 340 | { input: `len("")`, expected: 0 }, 341 | { input: `len("four")`, expected: 4 }, 342 | { input: `len("hello world")`, expected: 11 }, 343 | ]; 344 | 345 | for (const { input, expected } of tests) { 346 | const evaluatedProgram = evaluate(input); 347 | expect(evaluatedProgram).toEqual(new Integer(expected)); 348 | } 349 | }); 350 | 351 | it('handles unsupported argunents', () => { 352 | const tests = [ 353 | { 354 | input: `len(1)`, 355 | expected: 'argument to "len" not supported, got INTEGER', 356 | }, 357 | { 358 | input: `len()`, 359 | expected: 'wrong number of arguments. got=0, want=1', 360 | }, 361 | { 362 | input: `len("one", "two")`, 363 | expected: 'wrong number of arguments. got=2, want=1', 364 | }, 365 | ]; 366 | 367 | for (const { input, expected } of tests) { 368 | const evaluatedProgram = evaluate(input); 369 | expect(evaluatedProgram).toEqual(new ErrorObject(expected)); 370 | } 371 | }); 372 | }); 373 | 374 | describe('evaluates arrays', () => { 375 | it('evaluates array literals', () => { 376 | const tests = [ 377 | { input: '[1, 2 * 2, 3 + 3]', expected: [1, 4, 6] }, 378 | { input: '[1 + 1, 2 * 2, 3 - 3, 4 / 2]', expected: [2, 4, 0, 2] }, 379 | ]; 380 | 381 | for (const { input, expected } of tests) { 382 | const evaluatedProgram = evaluate(input); 383 | const elements = expected.map((int) => new Integer(int)); 384 | expect(evaluatedProgram).toEqual(new ArrayObject(elements)); 385 | } 386 | }); 387 | 388 | it('evaluates index expressions', () => { 389 | const tests = [ 390 | { 391 | input: '[1, 2, 3][0]', 392 | expected: 1, 393 | }, 394 | { 395 | input: '[1, 2, 3][1]', 396 | expected: 2, 397 | }, 398 | { 399 | input: '[1, 2, 3][2]', 400 | expected: 3, 401 | }, 402 | { 403 | input: 'let i = 0; [1][i];', 404 | expected: 1, 405 | }, 406 | { 407 | input: '[1, 2, 3][1 + 1];', 408 | expected: 3, 409 | }, 410 | { 411 | input: 'let myArray = [1, 2, 3]; myArray[2];', 412 | expected: 3, 413 | }, 414 | { 415 | input: 416 | 'let myArray = [1, 2, 3]; myArray[0] + myArray[1] + myArray[2];', 417 | expected: 6, 418 | }, 419 | { 420 | input: 'let myArray = [1, 2, 3]; let i = myArray[0]; myArray[i]', 421 | expected: 2, 422 | }, 423 | { 424 | input: '[1, 2, 3][3]', 425 | expected: null, 426 | }, 427 | { 428 | input: '[1, 2, 3][-1]', 429 | expected: null, 430 | }, 431 | ]; 432 | 433 | for (const { input, expected } of tests) { 434 | const evaluatedProgram = evaluate(input); 435 | 436 | if (evaluatedProgram) { 437 | expect(evaluatedProgram).toEqual(new Integer(expected as number)); 438 | } else { 439 | expect(evaluatedProgram).toEqual(new Null()); 440 | } 441 | } 442 | }); 443 | }); 444 | 445 | describe('evaluates hash literals', () => { 446 | it('evaluates a hashmap', () => { 447 | const input = `let two = "two"; 448 | { 449 | "one": 10 - 9, 450 | "two": 1 + 1, 451 | "thr" + "ee": 6 / 2, 452 | 4: 4, 453 | true: 5, 454 | false: 6, 455 | }`; 456 | 457 | const evaluatedProgram = evaluate(input); 458 | expect(evaluatedProgram instanceof Hash).toEqual(true); 459 | 460 | const expectedHashMap = new Map(); 461 | expectedHashMap.set(new StringObject('one').hashKey(), 1); 462 | expectedHashMap.set(new StringObject('two').hashKey(), 2); 463 | expectedHashMap.set(new StringObject('three').hashKey(), 3); 464 | expectedHashMap.set(new Integer(4).hashKey(), 4); 465 | expectedHashMap.set(new BooleanLiteral(true).hashKey(), 5); 466 | expectedHashMap.set(new BooleanLiteral(false).hashKey(), 6); 467 | 468 | if (evaluatedProgram instanceof Hash) { 469 | for (const [key, hashPair] of evaluatedProgram.pairs.entries()) { 470 | const expectedHashPair = expectedHashMap.get(key); 471 | expect(expectedHashPair.toString()).toEqual(hashPair.value.inspect()); 472 | } 473 | } 474 | }); 475 | 476 | it('evaluates hash index expressions', () => { 477 | const tests = [ 478 | { 479 | input: `{"foo": 5}["foo"]`, 480 | expected: 5, 481 | }, 482 | 483 | { 484 | input: `{"foo": 5}["bar"]`, 485 | expected: null, 486 | }, 487 | { 488 | input: `let key = "foo"; {"foo": 5}[key]`, 489 | expected: 5, 490 | }, 491 | { 492 | input: `{}["foo"]`, 493 | expected: null, 494 | }, 495 | { 496 | input: `{5: 5}[5]`, 497 | expected: 5, 498 | }, 499 | { 500 | input: `{true: 5}[true]`, 501 | expected: 5, 502 | }, 503 | { 504 | input: `{false: 5}[false]`, 505 | expected: 5, 506 | }, 507 | ]; 508 | 509 | for (const { input, expected } of tests) { 510 | const evaluated = evaluate(input); 511 | 512 | if (expected) { 513 | expect(evaluated).toEqual(new Integer(expected)); 514 | } else { 515 | expect(evaluated).toEqual(new Null()); 516 | } 517 | } 518 | }); 519 | }); 520 | }); 521 | -------------------------------------------------------------------------------- /src/evaluator/evaluator.ts: -------------------------------------------------------------------------------- 1 | import { 2 | ArrayObject, 3 | BooleanLiteral, 4 | Builtin, 5 | Environment, 6 | ErrorObject, 7 | EvalObject, 8 | FunctionObject, 9 | Hash, 10 | HashPair, 11 | Integer, 12 | Null, 13 | ObjectTypes, 14 | ReturnValue, 15 | StringObject, 16 | } from 'object'; 17 | 18 | import { 19 | BlockStatement, 20 | BooleanExpression, 21 | ExpressionStatement, 22 | Identifier, 23 | IfExpression, 24 | InfixExpression, 25 | IntegerLiteral, 26 | LetStatement, 27 | PrefixExpression, 28 | Program, 29 | ReturnStatement, 30 | FunctionLiteral, 31 | CallExpression, 32 | StringLiteral, 33 | ArrayLiteral, 34 | IndexExpression, 35 | HashLiteral, 36 | } from 'ast'; 37 | 38 | import { 39 | Expression, 40 | ExpressionKind, 41 | Node, 42 | ProgramKind, 43 | Statement, 44 | StatementKind, 45 | } from 'ast/base'; 46 | 47 | const NULL = new Null(); 48 | const TRUE = new BooleanLiteral(true); 49 | const FALSE = new BooleanLiteral(false); 50 | 51 | type Builtins = Record; 52 | 53 | export class Evaluator { 54 | builtins: Builtins = { 55 | print: new Builtin((...args: EvalObject[]) => { 56 | for (const arg of args) { 57 | console.log(arg.inspect()); 58 | } 59 | 60 | return NULL; 61 | }), 62 | len: new Builtin((...args: EvalObject[]) => { 63 | if (args.length !== 1) { 64 | return this.newError( 65 | `wrong number of arguments. got=${args.length}, want=1` 66 | ); 67 | } 68 | 69 | const arg = args[0]; 70 | 71 | if (arg instanceof StringObject) { 72 | return new Integer(arg.value.length); 73 | } 74 | 75 | if (arg instanceof ArrayObject) { 76 | return new Integer(arg.elements.length); 77 | } 78 | 79 | return this.newError( 80 | `argument to "len" not supported, got ${arg.type()}` 81 | ); 82 | }), 83 | first: new Builtin((...args: EvalObject[]) => { 84 | if (args.length !== 1) { 85 | return this.newError( 86 | `wrong number of arguments. got=${args.length}, want=1` 87 | ); 88 | } 89 | 90 | if (args[0].type() !== ObjectTypes.ARRAY) { 91 | return this.newError( 92 | `argument to "first" must be ARRAY, got ${args[0].type()}` 93 | ); 94 | } 95 | 96 | const array = args[0] as ArrayObject; 97 | 98 | if (array.elements.length > 0) { 99 | return array.elements[0]; 100 | } 101 | 102 | return NULL; 103 | }), 104 | last: new Builtin((...args: EvalObject[]) => { 105 | if (args.length !== 1) { 106 | return this.newError( 107 | `wrong number of arguments. got=${args.length}, want=1` 108 | ); 109 | } 110 | 111 | if (args[0].type() !== ObjectTypes.ARRAY) { 112 | return this.newError( 113 | `argument to "last" must be ARRAY, got ${args[0].type()}` 114 | ); 115 | } 116 | 117 | const array = args[0] as ArrayObject; 118 | const elementsLength = array.elements.length; 119 | 120 | if (elementsLength > 0) { 121 | return array.elements[elementsLength - 1]; 122 | } 123 | 124 | return NULL; 125 | }), 126 | rest: new Builtin((...args: EvalObject[]) => { 127 | if (args.length !== 1) { 128 | return this.newError( 129 | `wrong number of arguments. got=${args.length}, want=1` 130 | ); 131 | } 132 | 133 | if (args[0].type() !== ObjectTypes.ARRAY) { 134 | return this.newError( 135 | `argument to "rest" must be ARRAY, got ${args[0].type()}` 136 | ); 137 | } 138 | 139 | const array = args[0] as ArrayObject; 140 | const elementsLength = array.elements.length; 141 | 142 | if (elementsLength > 0) { 143 | const restElements = array.elements.slice(1); 144 | return new ArrayObject(restElements); 145 | } 146 | 147 | return NULL; 148 | }), 149 | push: new Builtin((...args: EvalObject[]) => { 150 | if (args.length !== 2) { 151 | return this.newError( 152 | `wrong number of arguments. got=${args.length}, want=2` 153 | ); 154 | } 155 | 156 | if (args[0].type() !== ObjectTypes.ARRAY) { 157 | return this.newError( 158 | `argument to "push" must be ARRAY, got ${args[0].type()}` 159 | ); 160 | } 161 | 162 | const array = args[0] as ArrayObject; 163 | const elements = array.elements; 164 | 165 | return new ArrayObject([...elements, args[1]]); 166 | }), 167 | }; 168 | 169 | evaluate(node: Node, env: Environment): EvalObject | null | undefined { 170 | switch (node.kind) { 171 | case ProgramKind.program: 172 | return this.evaluateProgram((node as Program).statements, env); 173 | case StatementKind.Expression: 174 | return this.evaluate((node as ExpressionStatement).expression, env); 175 | case ExpressionKind.IntegerLiteral: 176 | return new Integer((node as IntegerLiteral).value); 177 | case ExpressionKind.Boolean: 178 | return this.toBooleanLiteral((node as BooleanExpression).value); 179 | case ExpressionKind.StringLiteral: 180 | return new StringObject((node as StringLiteral).value); 181 | case ExpressionKind.Prefix: { 182 | const evaluatedRightExpressions = this.evaluate( 183 | (node as PrefixExpression).right, 184 | env 185 | ); 186 | 187 | if (this.isError(evaluatedRightExpressions)) { 188 | return evaluatedRightExpressions; 189 | } 190 | 191 | const object = 192 | evaluatedRightExpressions && 193 | this.evaluatePrefixExpression( 194 | (node as PrefixExpression).operator, 195 | evaluatedRightExpressions 196 | ); 197 | 198 | return object; 199 | } 200 | case ExpressionKind.Infix: { 201 | const evaluatedLeftExpression = this.evaluate( 202 | (node as InfixExpression).left, 203 | env 204 | ); 205 | 206 | if (this.isError(evaluatedLeftExpression)) { 207 | return evaluatedLeftExpression; 208 | } 209 | 210 | const evaluatedRightExpression = this.evaluate( 211 | (node as InfixExpression).right, 212 | env 213 | ); 214 | 215 | if (this.isError(evaluatedRightExpression)) { 216 | return evaluatedRightExpression; 217 | } 218 | 219 | if (evaluatedLeftExpression && evaluatedRightExpression) { 220 | return this.evaluateInfixExpression( 221 | (node as InfixExpression).operator, 222 | evaluatedLeftExpression, 223 | evaluatedRightExpression 224 | ); 225 | } 226 | 227 | return null; 228 | } 229 | case StatementKind.Block: 230 | return this.evaluateBlockStatement(node as BlockStatement, env); 231 | case ExpressionKind.If: 232 | return this.evaluateIfExpression(node as IfExpression, env); 233 | case StatementKind.Return: { 234 | const value = this.evaluate((node as ReturnStatement).returnValue, env); 235 | 236 | if (this.isError(value)) { 237 | return value; 238 | } 239 | 240 | if (value) { 241 | return new ReturnValue(value); 242 | } 243 | 244 | return null; 245 | } 246 | case StatementKind.Let: { 247 | const value = this.evaluate((node as LetStatement).value, env); 248 | 249 | if (this.isError(value)) { 250 | return value; 251 | } 252 | 253 | if (value) { 254 | env.set((node as LetStatement).name.value, value); 255 | } 256 | 257 | return null; 258 | } 259 | case ExpressionKind.Identifier: { 260 | return this.evaluateIdentifier(node as Identifier, env); 261 | } 262 | case ExpressionKind.FunctionLiteral: { 263 | return new FunctionObject( 264 | (node as FunctionLiteral).parameters, 265 | (node as FunctionLiteral).body, 266 | env 267 | ); 268 | } 269 | case ExpressionKind.Call: { 270 | const fn = this.evaluate((node as CallExpression).function, env); 271 | 272 | if (this.isError(fn) || !fn) { 273 | return fn; 274 | } 275 | 276 | const args = this.evaluateExpressions( 277 | (node as CallExpression).arguments, 278 | env 279 | ); 280 | 281 | if (args.length === 1 && this.isError(args[0])) { 282 | return args[0]; 283 | } 284 | 285 | return this.applyFunction(fn, args); 286 | } 287 | case ExpressionKind.ArrayLiteral: { 288 | const elements = this.evaluateExpressions( 289 | (node as ArrayLiteral).elements, 290 | env 291 | ); 292 | 293 | if (elements.length === 1 && this.isError(elements[0])) { 294 | return elements[0]; 295 | } 296 | 297 | return new ArrayObject(elements as EvalObject[]); 298 | } 299 | case ExpressionKind.IndexExpression: { 300 | const left = this.evaluate((node as IndexExpression).left, env); 301 | 302 | if (this.isError(left)) { 303 | return left; 304 | } 305 | 306 | const index = this.evaluate((node as IndexExpression).index, env); 307 | 308 | if (this.isError(index)) { 309 | return index; 310 | } 311 | 312 | return this.evaluateIndexExpression( 313 | left as EvalObject, 314 | index as EvalObject 315 | ); 316 | } 317 | case ExpressionKind.HashLiteral: { 318 | return this.evaluateHashLiteral(node as HashLiteral, env); 319 | } 320 | default: 321 | return null; 322 | } 323 | } 324 | 325 | private evaluateProgram( 326 | statements: Statement[], 327 | env: Environment 328 | ): EvalObject | null | undefined { 329 | let result: EvalObject | null | undefined; 330 | 331 | for (const statement of statements) { 332 | result = this.evaluate(statement, env); 333 | 334 | if (result?.type() === ObjectTypes.RETURN_VALUE) { 335 | return (result as ReturnValue).value; 336 | } 337 | 338 | if (result?.type() === ObjectTypes.ERROR) { 339 | return result; 340 | } 341 | } 342 | 343 | return result; 344 | } 345 | 346 | private toBooleanLiteral(value: boolean) { 347 | return value ? TRUE : FALSE; 348 | } 349 | 350 | private evaluateExpressions(expressions: Expression[], env: Environment) { 351 | const result = []; 352 | 353 | for (const expression of expressions) { 354 | const evaluatedExpression = this.evaluate(expression, env); 355 | 356 | if (this.isError(evaluatedExpression)) { 357 | // TODO: fix this, should return an object 358 | return [evaluatedExpression] as EvalObject[]; 359 | } 360 | 361 | result.push(evaluatedExpression); 362 | } 363 | 364 | return result; 365 | } 366 | 367 | private applyFunction( 368 | fn: EvalObject, 369 | args: (EvalObject | null | undefined)[] 370 | ) { 371 | if (fn instanceof FunctionObject) { 372 | const extendedEnv = this.extendFunctionEnv(fn, args); 373 | const evaluatedBody = this.evaluate(fn.body, extendedEnv); 374 | return this.unwrapReturnValue(evaluatedBody); 375 | } 376 | 377 | if (fn instanceof Builtin) { 378 | return fn.fn(...(args as EvalObject[])); 379 | } 380 | 381 | return this.newError(`not a function: ${fn.type()}`); 382 | } 383 | 384 | private evaluatePrefixExpression(operator: string, operand: EvalObject) { 385 | switch (operator) { 386 | case '!': 387 | return this.evaluateBangOperatorExpression(operand); 388 | case '-': 389 | return this.evaluateMinusOperatorExpression(operand); 390 | default: 391 | return this.newError(`unknown operator: ${operator}${operand.type()}`); 392 | } 393 | } 394 | 395 | private evaluateBangOperatorExpression(operand: EvalObject) { 396 | switch (operand) { 397 | case TRUE: 398 | return FALSE; 399 | case FALSE: 400 | return TRUE; 401 | case NULL: 402 | return TRUE; 403 | default: 404 | return FALSE; 405 | } 406 | } 407 | 408 | private evaluateMinusOperatorExpression(operand: EvalObject) { 409 | if (operand.type() !== ObjectTypes.INTEGER) { 410 | return this.newError(`unknown operator: -${operand.type()}`); 411 | } 412 | 413 | return new Integer(-(operand as Integer).value); 414 | } 415 | 416 | private evaluateInfixExpression( 417 | operator: string, 418 | left: EvalObject, 419 | right: EvalObject 420 | ) { 421 | if ( 422 | left.type() === ObjectTypes.INTEGER && 423 | right.type() === ObjectTypes.INTEGER 424 | ) { 425 | return this.evaluateIntegerInfixExpression( 426 | operator, 427 | left as Integer, 428 | right as Integer 429 | ); 430 | } 431 | 432 | if ( 433 | left.type() === ObjectTypes.BOOLEAN && 434 | right.type() === ObjectTypes.BOOLEAN 435 | ) { 436 | return this.evaluateBooleanInfixExpression( 437 | operator, 438 | left as BooleanLiteral, 439 | right as BooleanLiteral 440 | ); 441 | } 442 | 443 | if ( 444 | left.type() === ObjectTypes.STRING && 445 | right.type() === ObjectTypes.STRING 446 | ) { 447 | return this.evaluateStringInfixExpression( 448 | operator, 449 | left as StringObject, 450 | right as StringObject 451 | ); 452 | } 453 | 454 | if (left.type() !== right.type()) { 455 | return this.newError( 456 | `type mismatch: ${left.type()} ${operator} ${right.type()}` 457 | ); 458 | } 459 | 460 | return this.newError( 461 | `unknown operator: ${left.type()} ${operator} ${right.type()}` 462 | ); 463 | } 464 | 465 | private evaluateIntegerInfixExpression( 466 | operator: string, 467 | left: Integer, 468 | right: Integer 469 | ) { 470 | const leftValue = left.value; 471 | const rightValue = right.value; 472 | 473 | switch (operator) { 474 | case '+': 475 | return new Integer(leftValue + rightValue); 476 | case '-': 477 | return new Integer(leftValue - rightValue); 478 | case '*': 479 | return new Integer(leftValue * rightValue); 480 | case '/': 481 | return new Integer(leftValue / rightValue); 482 | case '<': 483 | return new BooleanLiteral(leftValue < rightValue); 484 | case '>': 485 | return new BooleanLiteral(leftValue > rightValue); 486 | case '==': 487 | return new BooleanLiteral(leftValue == rightValue); 488 | case '!=': 489 | return new BooleanLiteral(leftValue != rightValue); 490 | default: 491 | return this.newError( 492 | `unknown operator: ${left.type()} ${operator} ${right.type()}` 493 | ); 494 | } 495 | } 496 | 497 | private evaluateBooleanInfixExpression( 498 | operator: string, 499 | left: BooleanLiteral, 500 | right: BooleanLiteral 501 | ) { 502 | const leftValue = left.value; 503 | const rightValue = right.value; 504 | 505 | switch (operator) { 506 | case '==': 507 | return this.toBooleanLiteral(leftValue == rightValue); 508 | case '!=': 509 | return this.toBooleanLiteral(leftValue != rightValue); 510 | default: 511 | return this.newError( 512 | `unknown operator: ${left.type()} ${operator} ${right.type()}` 513 | ); 514 | } 515 | } 516 | 517 | private evaluateStringInfixExpression( 518 | operator: string, 519 | left: StringObject, 520 | right: StringObject 521 | ) { 522 | if (operator !== '+') { 523 | return this.newError( 524 | `unknown operator: ${left.type()} ${operator} ${right.type()}` 525 | ); 526 | } 527 | 528 | const leftValue = left.value; 529 | const rightValue = right.value; 530 | 531 | return new StringObject(leftValue + rightValue); 532 | } 533 | 534 | private evaluateBlockStatement(node: BlockStatement, env: Environment) { 535 | let result: EvalObject | null | undefined; 536 | 537 | for (const statement of node.statements) { 538 | result = this.evaluate(statement, env); 539 | 540 | if ( 541 | result?.type() === ObjectTypes.RETURN_VALUE || 542 | result?.type() === ObjectTypes.ERROR 543 | ) { 544 | return result; 545 | } 546 | } 547 | 548 | return result; 549 | } 550 | 551 | private evaluateIfExpression(node: IfExpression, env: Environment) { 552 | const condition = this.evaluate(node.condition, env); 553 | 554 | if (this.isTruthy(condition)) { 555 | return this.evaluate(node.consequence, env); 556 | } 557 | 558 | if (node.alternative) { 559 | return this.evaluate(node.alternative, env); 560 | } 561 | 562 | return NULL; 563 | } 564 | 565 | private evaluateIdentifier(node: Identifier, env: Environment) { 566 | const { has, value } = env.get(node.value); 567 | 568 | if (has) { 569 | return value; 570 | } 571 | 572 | const builtin = this.builtins[node.value]; 573 | 574 | if (builtin) { 575 | return builtin; 576 | } 577 | 578 | return this.newError(`identifier not found: ${node.value}`); 579 | } 580 | 581 | private evaluateIndexExpression(left: EvalObject, index: EvalObject) { 582 | if ( 583 | left.type() === ObjectTypes.ARRAY && 584 | index.type() === ObjectTypes.INTEGER 585 | ) { 586 | return this.evaluateArrayIndexExpression(left, index); 587 | } 588 | 589 | if (left instanceof Hash) { 590 | return this.evaluateHashIndexExpression(left, index); 591 | } 592 | 593 | return this.newError(`index operator not supported: ${left.type()}`); 594 | } 595 | 596 | private evaluateArrayIndexExpression(array: EvalObject, index: EvalObject) { 597 | const indexValue = (index as Integer).value; 598 | const maxIndex = (array as ArrayObject).elements.length - 1; 599 | 600 | if (indexValue < 0 || indexValue > maxIndex) { 601 | return NULL; 602 | } 603 | 604 | return (array as ArrayObject).elements[indexValue]; 605 | } 606 | 607 | private evaluateHashIndexExpression(hash: Hash, index: EvalObject) { 608 | if ( 609 | !( 610 | index instanceof Integer || 611 | index instanceof BooleanLiteral || 612 | index instanceof StringObject 613 | ) 614 | ) { 615 | return this.newError(`unusable as hash key: ${index?.type()}`); 616 | } 617 | 618 | const pair = hash.pairs.get(index.hashKey()); 619 | 620 | if (!pair) { 621 | return NULL; 622 | } 623 | 624 | return pair.value; 625 | } 626 | 627 | private evaluateHashLiteral(node: HashLiteral, env: Environment) { 628 | const pairs = new Map(); 629 | 630 | for (const [nodeKey, nodeValue] of node.pairs.entries()) { 631 | const key = this.evaluate(nodeKey, env); 632 | 633 | if (this.isError(key)) { 634 | return key; 635 | } 636 | 637 | if ( 638 | !( 639 | key instanceof Integer || 640 | key instanceof BooleanLiteral || 641 | key instanceof StringObject 642 | ) 643 | ) { 644 | return this.newError(`unusable as hash key: ${key?.type()}`); 645 | } 646 | 647 | const value = this.evaluate(nodeValue, env); 648 | 649 | if (this.isError(value) || !value) { 650 | return value; 651 | } 652 | 653 | const hashed = key.hashKey(); 654 | pairs.set(hashed, new HashPair(key, value)); 655 | } 656 | 657 | return new Hash(pairs); 658 | } 659 | 660 | private extendFunctionEnv( 661 | fn: FunctionObject, 662 | args: (EvalObject | null | undefined)[] 663 | ) { 664 | const env = new Environment(fn.env); 665 | 666 | for (const [index, identifier] of fn.parameters.entries()) { 667 | env.set(identifier.value, args[index]); 668 | } 669 | 670 | return env; 671 | } 672 | 673 | private unwrapReturnValue(evaluatedBody: EvalObject | null | undefined) { 674 | if (evaluatedBody instanceof ReturnValue) { 675 | return evaluatedBody.value; 676 | } 677 | 678 | return evaluatedBody; 679 | } 680 | 681 | private isTruthy(condition: EvalObject | null | undefined) { 682 | if (!condition) { 683 | return NULL; 684 | } 685 | 686 | switch (condition.inspect()) { 687 | case 'null': 688 | return false; 689 | case 'true': 690 | return true; 691 | case 'false': 692 | return false; 693 | default: 694 | return true; 695 | } 696 | } 697 | 698 | private newError(message: string) { 699 | return new ErrorObject(message); 700 | } 701 | 702 | private isError(evalObject: EvalObject | null | undefined) { 703 | if (evalObject && evalObject.type() === ObjectTypes.ERROR) { 704 | return evalObject.type() === ObjectTypes.ERROR; 705 | } 706 | 707 | return false; 708 | } 709 | } 710 | -------------------------------------------------------------------------------- /series/parser-part-1.md: -------------------------------------------------------------------------------- 1 | # Parser - Part 1: Fundamental parts and basic statements 2 | 3 | > "A parser is a software component that takes input data (frequently text) and builds a data structure – often some kind of parse tree, abstract syntax tree or other hierarchical structure – giving a structural representation of the input, checking for correct syntax in the process. [...] The parser is often preceded by a separate lexical analyser, which creates tokens from the sequence of input characters." - [Wikipedia](https://en.wikipedia.org/wiki/Parsing) 4 | 5 | So the idea of the parser will be to receive input data, can be text or tokens, and to produce a new data structure that represents this input data. 6 | 7 | The data structure the parsers produce is commonly called AST, or abstract syntax tree. It's called "abstract" because this data structure omits visible details of the source code like semicolons, newlines, whitespace, and so on. 8 | 9 | In the process of building this data structure, it also analyzes the source code, checking whether the generated tokens form a meaningful expression. This process is called _Syntactic Analysis_. 10 | 11 | Parsing JSON strings as an example. It transforms the string input into a data structure (JavaScript Object). 12 | 13 | ```jsx 14 | const input = '{"name": "TK", "age": 25}'; 15 | const output = JSON.parse(input); 16 | 17 | output; 18 | => { name: 'TK', age: 25 } 19 | 20 | output.name; 21 | => 'TK' 22 | 23 | output.age; 24 | => 25 25 | ``` 26 | 27 | It also has the "syntactic analysis". When passing a "wrong" input data, it will throw a syntax error (`SyntaxError`): 28 | 29 | ```jsx 30 | const input = '{"name": "TK", 25}'; 31 | const output = JSON.parse(input); 32 | => Uncaught SyntaxError: Unexpected number in JSON at position 15 33 | ``` 34 | 35 | In this case, position 15 is the value `25`, where it is missing the attribute here. 36 | 37 | ## Fundamental AST 38 | 39 | To parse the `Let Statement`, let's first understand its syntax. 40 | 41 | ```jsx 42 | let x = 10; 43 | let y = 15; 44 | 45 | let add = fn(a, b) { 46 | return a + b; 47 | }; 48 | ``` 49 | 50 | Taking a closer look at this example, we can see a pattern here. All three statements have this same form: 51 | 52 | ```jsx 53 | let = ; 54 | ``` 55 | 56 | So, in this case, it's easy to understand that `10`, `15`, and `fn` are expressions. The difference between statements and expressions is simple in this PL: expressions produce value, statements don't. 57 | 58 | And they are a fundamental part of the AST. Everything is a node in the tree, that can be a statement or an expression. 59 | 60 | ```jsx 61 | interface Node { 62 | tokenLiteral: () => string; 63 | } 64 | ``` 65 | 66 | The first step is to create this `Node` interface. Every node has to implement this `tokenLiteral` function. Meaning: every node has a token literal associated with it. 67 | 68 | ```jsx 69 | export interface Statement extends Node {} 70 | 71 | export interface Expression extends Node {} 72 | ``` 73 | 74 | The `Statement` and the `Expression` interfaces are built on top of the `Node` interface. Let's make it very simple now and improve later. 75 | 76 | The other fundamental part of the AST is the `Program`. It's the root node of the AST and it contains a list of statements. 77 | 78 | ```jsx 79 | export class Program { 80 | statements: Statement[] = []; 81 | } 82 | ``` 83 | 84 | ## Parsing the let statement 85 | 86 | Now that we have the foundation for our AST, we can build more specific statements. And we'll start with the `LetStatement`. 87 | 88 | ![Screen Shot 2021-10-16 at 21.52.02.png](https://s3-us-west-2.amazonaws.com/secure.notion-static.com/ede20de3-879f-49b1-be3d-b89bce7652f5/Screen_Shot_2021-10-16_at_21.52.02.png) 89 | 90 | This is how I'm visualizing the relationship between the `LetStatement` and the other interfaces. 91 | 92 | We start with "everything is a node". Then we have the two main interfaces: `Expression` and `Statement`. In this case, `LetStatement` implements the `Statement` interface. 93 | 94 | The `LetStatement` has these attributes: 95 | 96 | - `token`: it has the type of the `Token` class we defined when we built the lexer. (just to refresh our memories, the `Token` has two attributes: the `type` (string) and `literal` (string). The `type` is any token we defined in the enum and the `literal` is the literal value of the token) 97 | - `value`: it's an `Expression`. But we'll see more about this only in the next part of this series. 98 | - `name`: it's an `Identifier` that has a `token` and a `value` as the attributes. 99 | 100 | To illustrate how it works in the code, let's see this example 101 | 102 | ```tsx 103 | let x = 1; 104 | ``` 105 | 106 | The representation of the `LetStatement`'s AST would be: 107 | 108 | ```tsx 109 | LetStatement { 110 | token: Token { type: 'LET', literal: 'let' }, 111 | name: Identifier { 112 | token: Token { type: 'IDENT', literal: 'x' }, 113 | value: 'x' 114 | } 115 | } 116 | ``` 117 | 118 | We have the `token` that's the `LET` token and the `name` that's an `Identifier` with a token `IDENT` and the `value` as `'x'`. We won't cover the `value` attribute because we'll see this `Expression` in the next part of this series. 119 | 120 | With this in mind, we can create our `LetStatement` class: 121 | 122 | ```tsx 123 | class LetStatement implements Statement { 124 | token: Token; 125 | name: Identifier; 126 | value: Expression; 127 | 128 | constructor(token: Token) { 129 | this.token = token; 130 | } 131 | 132 | tokenLiteral() { 133 | return this.token.literal; 134 | } 135 | } 136 | ``` 137 | 138 | It has everything we already discussed, but we are missing the `Identifier` implementation: 139 | 140 | ```tsx 141 | class Identifier implements Expression { 142 | token: Token; 143 | value: string; 144 | 145 | constructor(token: Token, value: string) { 146 | this.token = token; 147 | this.value = value; 148 | } 149 | 150 | tokenLiteral() { 151 | return this.token.literal; 152 | } 153 | } 154 | ``` 155 | 156 | We also discussed this structure and what it should have: the `token` and the `value`. 157 | 158 | Now we have all the necessary AST nodes to start to implement the parser. But before we build the parser and start parsing the `LetStatement`, we'll add tests to cover this implementation first. 159 | 160 | ```tsx 161 | describe('Parser', () => { 162 | describe('parseProgram', () => { 163 | it('parses the let statement', () => { 164 | const input = ` 165 | let x = 5; 166 | let y = 10; 167 | let foobar = 10000; 168 | `; 169 | 170 | const lexer = new Lexer(input); 171 | const parser = new Parser(lexer); 172 | const program = parser.parseProgram(); 173 | 174 | const tests = [ 175 | { identifier: 'x' }, 176 | { identifier: 'y' }, 177 | { identifier: 'foobar' }, 178 | ]; 179 | 180 | tests.forEach(({ identifier }, index) => { 181 | const statement = program.statements[index]; 182 | 183 | expect(statement.tokenLiteral()).toEqual('let'); 184 | expect(statement.name.value).toEqual(identifier); 185 | expect(statement.name.tokenLiteral()).toEqual(identifier); 186 | }); 187 | }); 188 | }); 189 | }); 190 | ``` 191 | 192 | We want the parser to parse this input 193 | 194 | ```tsx 195 | let x = 5; 196 | let y = 10; 197 | let foobar = 10000; 198 | ``` 199 | 200 | First, we pass the `input` to the `Lexer` and then the `lexer` to the `Parser`. And now we can call the `parsePogram`. It'll return the `program` with all the `statements` related to the `input` data. 201 | 202 | What are we testing here? 203 | 204 | - the statement token literal should be `'let'`. 205 | - the identifier's value should the `identifier` we have in the `tests` array. 206 | - and the token literal of the identifier should also be the `identifier`. 207 | 208 | Now we can start implementing the `Parser` and pass the tests. We start with the basic definition of the `Parser` class. 209 | 210 | ```tsx 211 | class Parser { 212 | private lexer: Lexer; 213 | private currentToken: Token; 214 | private peekToken: Token; 215 | 216 | constructor(lexer: Lexer) { 217 | this.lexer = lexer; 218 | } 219 | } 220 | ``` 221 | 222 | We need to make sure that the `lexer` is passed as a parameter and the parser should also have the: 223 | 224 | - `currentToken`: it is the token under examination 225 | - `peekToken`: it is the next token that helps decide what to do next 226 | 227 | We'll also have a method called `nextToken` to update the `currentToken` and the `peekToken`: 228 | 229 | ```tsx 230 | nextToken() { 231 | this.currentToken = this.peekToken; 232 | this.peekToken = this.lexer.nextToken(); 233 | } 234 | ``` 235 | 236 | And to initialize these two states, we can call this method two times in the constructor. Calling two times will set the correct state for the current and the next tokens. It looks like this: 237 | 238 | ```tsx 239 | constructor(lexer: Lexer) { 240 | this.lexer = lexer; 241 | this.nextToken(); 242 | this.nextToken(); 243 | } 244 | ``` 245 | 246 | Now the `parseProgram`. The idea of this method is to create a program and parse each statement based on the tokens and add all the statements to the statements list in the program. 247 | 248 | ```tsx 249 | parseProgram() { 250 | const program = new Program(); 251 | 252 | while (this.currentToken.type !== Tokens.EOF) { 253 | const statement = this.parseStatement(); 254 | 255 | if (statement !== null) { 256 | program.statements.push(statement); 257 | } 258 | 259 | this.nextToken(); 260 | } 261 | 262 | return program; 263 | } 264 | ``` 265 | 266 | It iterates through all the tokens from the lexer, for each token, it'll parse the statement and add it to the statements list. And in the end, it'll just return the program. 267 | 268 | From this code, we need to implement this `parseStatement` method. 269 | 270 | ```tsx 271 | private parseLetStatement() { 272 | const statement = new LetStatement(this.currentToken); 273 | 274 | if (!this.expectPeek(Tokens.IDENT)) { 275 | return null; 276 | } 277 | 278 | const identifier = new Identifier( 279 | this.currentToken, 280 | this.currentToken.literal 281 | ); 282 | 283 | statement.name = identifier; 284 | 285 | if (!this.expectPeek(Tokens.ASSIGN)) { 286 | return null; 287 | } 288 | 289 | while (!this.currentTokenIs(Tokens.SEMICOLON)) { 290 | this.nextToken(); 291 | } 292 | 293 | return statement; 294 | } 295 | ``` 296 | 297 | I think this first implementation is not the best one, but we can refactor it later (I have some ideas in mind that I want to try later). 298 | 299 | - The first thing is to create a new `LetStatement` based on the current token 300 | - Then we need to confirm that the next token is an `Identifier`. If it's, we call `nextToken` to update the state of the `currentToken` and the `peekToken`. If not, we just return `null` (just to simplify for now). 301 | - Then we move to create the identifier. We just pass the current token and the current token's literal and update the statement name with this new identifier. 302 | - We expect that the next token is a `=` token (`ASSIGN`). 303 | - **\*TODO**:\* implement the expression/value in the next post of this series 304 | - After that, we just go through all the tokens until we find the `;` token and return the new statement. 305 | 306 | This new statement will be used to add it to the statements list in the program. But we are missing two important methods here: `expectPeek` and `currentTokenIs`. Let's implement them. 307 | 308 | ```tsx 309 | private currentTokenIs(token: TokenType) { 310 | return this.currentToken.type === token; 311 | } 312 | ``` 313 | 314 | `currentTokenIs` is a simple method to verify if the current token has the same token type that we expect it has. 315 | 316 | ```tsx 317 | private peekTokenIs(token: TokenType) { 318 | return this.peekToken.type === token; 319 | } 320 | 321 | private expectPeek(token: TokenType) { 322 | if (this.peekTokenIs(token)) { 323 | this.nextToken(); 324 | return true; 325 | } 326 | 327 | return false; 328 | } 329 | ``` 330 | 331 | The `expectPeek` method will use the `peekTokenIs` (that's very similar to the `currentTokenIs`, but for the `peekToken`) to verify if the token is the expected one. If it is, we update the current and the next token and return true. If not just return false (we'll also add error handling soon). 332 | 333 | Now we have the parser, the program, and we can parse let statements making the tests pass. 334 | 335 | ## Handling errors 336 | 337 | For this input data `let x = 5;`, we don't have any syntax problem, so we won't have any error to handle. But imagine the PL user type this: 338 | 339 | ```tsx 340 | let 123; 341 | let a; 342 | ``` 343 | 344 | There's a syntax error. Because, for a let statement, we expect that after the `let` token, we get an identifier, not the value/expression. And for the second example, after having the identifier, the parser expects that it has `=` token. 345 | 346 | We could output something like this to the user: 347 | 348 | ```tsx 349 | let 123; 350 | => 'expected next token to be IDENT, got INT instead' 351 | 352 | let a; 353 | => 'expected next token to be =, got ; instead' 354 | ``` 355 | 356 | Errors are ways to communicate to the users about what they are writing and help them get to the final goal (a "working software"). 357 | 358 | But how do we do that? Let's start with the tests as we always do. 359 | 360 | ```tsx 361 | it('parses an input with error', () => { 362 | const input = ` 363 | let 123; 364 | let a; 365 | `; 366 | 367 | const lexer = new Lexer(input); 368 | const parser = new Parser(lexer); 369 | 370 | parser.parseProgram(); 371 | 372 | const errors = parser.getErrors(); 373 | const expectedErrors = [ 374 | 'expected next token to be IDENT, got INT instead', 375 | 'expected next token to be =, got ; instead', 376 | ]; 377 | 378 | errors.forEach((error, index) => { 379 | expect(error).toEqual(expectedErrors[index]); 380 | }); 381 | }); 382 | ``` 383 | 384 | Here we have the input data that we talked about. But now we'll also have a `getErrors` method to get all the possible errors the parser had while parsing the input. 385 | 386 | And we expect that the parser has these two errors we already discussed. 387 | 388 | Ok, to have all the errors, let's create a list of errors in the parser. 389 | 390 | ```tsx 391 | type Error = string; 392 | 393 | class Parser { 394 | private lexer: Lexer; 395 | private currentToken: Token; 396 | private peekToken: Token; 397 | private errors: Error[]; 398 | 399 | constructor(lexer: Lexer) { 400 | this.lexer = lexer; 401 | this.errors = []; 402 | this.nextToken(); 403 | this.nextToken(); 404 | } 405 | } 406 | ``` 407 | 408 | It's very simple, it's just a private attribute from the class `Parser`, it has the type `Error` (that's a `string`), and we initialize it with an empty list. 409 | 410 | The `getErrors` method will only return the errors attribute: 411 | 412 | ```tsx 413 | getErrors() { 414 | return this.errors; 415 | } 416 | ``` 417 | 418 | And now what we need to do is to add a error message when the parser gets an error. 419 | 420 | In this case, we'll add an error to the list when the next token it's not the expected one, so the place we do that is in the `expectPeek` method. 421 | 422 | ```tsx 423 | private expectPeek(token: TokenType) { 424 | if (this.peekTokenIs(token)) { 425 | this.nextToken(); 426 | return true; 427 | } 428 | 429 | this.peekError(token); 430 | return false; 431 | } 432 | ``` 433 | 434 | When the next token is not the expected one, we call the `peekError` method passing the expected token to it. Now let's implement this new method: 435 | 436 | ```tsx 437 | private peekError(token: TokenType) { 438 | const msg = `expected next token to be ${token}, got ${this.peekToken.type} instead`; 439 | this.errors.push(msg); 440 | } 441 | ``` 442 | 443 | It's very simple, we just need to add an error message to the `errors` list. 444 | 445 | And syntax error message is also simple: `expected next token to be ${token}, got ${this.peekToken.type} instead`. We expect one thing and got another. 446 | 447 | Running the tests again, they pass, and we have a way to communicate to the user about the program she/he is creating. 448 | 449 | ## Parsing the return statement 450 | 451 | The `return` is also a `Statement`. Let's illustrate it: 452 | 453 | ![Screen Shot 2021-10-17 at 12.44.43.png](https://s3-us-west-2.amazonaws.com/secure.notion-static.com/286d43b5-f06f-413f-bfe6-e793ce935b8e/Screen_Shot_2021-10-17_at_12.44.43.png) 454 | 455 | The `ReturnStatement` will implement this `Statement` interface and it has these two attributes: `token` and `returnValue`. 456 | 457 | This is actually very similar to the `LetStatement`: 458 | 459 | ```tsx 460 | class ReturnStatement implements Statement { 461 | token: Token; 462 | returnValue: Expression; 463 | 464 | constructor(token: Token) { 465 | this.token = token; 466 | } 467 | 468 | tokenLiteral() { 469 | return this.token.literal; 470 | } 471 | } 472 | ``` 473 | 474 | The `token` is a `Token`, the `returnValue` is an `Expression`, when instantiating the `ReturnStatement`, we initialize it with the `token`, and we also provide a `tokenLiteral` method. 475 | 476 | Let's create a new test to verify this new statement: 477 | 478 | ```tsx 479 | it('parses the return statement', () => { 480 | const input = ` 481 | return 5; 482 | return 10; 483 | return 10000; 484 | `; 485 | 486 | const lexer = new Lexer(input); 487 | const parser = new Parser(lexer); 488 | const program = parser.parseProgram(); 489 | 490 | const tests = [ 491 | { tokenLiteral: 'return' }, 492 | { tokenLiteral: 'return' }, 493 | { tokenLiteral: 'return' }, 494 | ]; 495 | 496 | tests.forEach(({ tokenLiteral }, index) => { 497 | const statement = program.statements[index]; 498 | 499 | expect(statement.tokenLiteral()).toEqual(tokenLiteral); 500 | }); 501 | }); 502 | ``` 503 | 504 | The input data has only correct return statements, and we expect that the statement token literal should be the `return` token. 505 | 506 | When the program parses statements, we only handle the let statement. But we want to handle return statements as well. 507 | 508 | ```tsx 509 | private parseStatement() { 510 | switch (this.currentToken.type) { 511 | case Tokens.LET: 512 | return this.parseLetStatement(); 513 | case Tokens.RETURN: 514 | return this.parseReturnStatement(); 515 | default: 516 | return null; 517 | } 518 | } 519 | ``` 520 | 521 | If the current token is a `RETURN` token, we call the `parseReturnStatement` method. 522 | 523 | This method is also similar to the let statement, but simpler. 524 | 525 | ```tsx 526 | private parseReturnStatement() { 527 | const statement = new ReturnStatement(this.currentToken); 528 | 529 | while (!this.currentTokenIs(Tokens.SEMICOLON)) { 530 | this.nextToken(); 531 | } 532 | 533 | return statement; 534 | } 535 | ``` 536 | 537 | It creates the `ReturnStatement` passing the current token, go to the end of the statement (semicolon - we'll talk about the `returnValue` expression in another part of this series), and return the new statement. 538 | 539 | This new statement is added to the `statements` list. 540 | 541 | One small thing that I realized when implementing this new statement is that it also implements the `Statement` and the `statements` list doesn't know if it is a `LetStatement` or a `ReturnStatement`. One way to solve this in TypeScript is to add a [tagged union](https://mariusschulz.com/blog/tagged-union-types-in-typescript). 542 | 543 | Let's add the `kind` attribute to these two statements. 544 | 545 | - `LetStatement`: 546 | 547 | ```tsx 548 | class LetStatement implements BaseStatement { 549 | token: Token; 550 | name: Identifier; 551 | value: Expression; 552 | kind: StatementKind.Let; 553 | 554 | constructor(token: Token) { 555 | this.token = token; 556 | this.kind = StatementKind.Let; 557 | } 558 | 559 | tokenLiteral() { 560 | return this.token.literal; 561 | } 562 | } 563 | ``` 564 | 565 | - `ReturnStatement`: 566 | 567 | ```tsx 568 | class ReturnStatement implements BaseStatement { 569 | token: Token; 570 | kind: StatementKind.Return; 571 | returnValue: Expression; 572 | 573 | constructor(token: Token) { 574 | this.token = token; 575 | this.kind = StatementKind.Return; 576 | } 577 | 578 | tokenLiteral() { 579 | return this.token.literal; 580 | } 581 | } 582 | ``` 583 | 584 | The two statements have a new attribute called `kind` and we initialize it with the expected statement kind. 585 | 586 | ```tsx 587 | enum StatementKind { 588 | Let = 'let', 589 | Return = 'return', 590 | } 591 | ``` 592 | 593 | It's basically an enum to support this new attribute. 594 | 595 | But we also need to update some things here: 596 | 597 | ```tsx 598 | type StatementKindType = StatementKind.Let | StatementKind.Return; 599 | type Statement = LetStatement | ReturnStatement; 600 | 601 | interface BaseStatement extends Node { 602 | kind: StatementKindType; 603 | } 604 | 605 | class Program { 606 | statements: Statement[] = []; 607 | } 608 | ``` 609 | 610 | - The `StatementKindType` is all possible kind types 611 | - The `Statement` is all possible statements 612 | - The `BaseStatement` is the interface that `LetStatement` and `ReturnStatement` implement 613 | - The `Program` still have the `Statement` type for the list of the statements 614 | 615 | That's all for this post. The next one we'll talk about expressions, I'm excited about it. 616 | 617 | ## **Final words & Resources** 618 | 619 | If you didn't have the opportunity, take a look at the posts from the [Building an Interpreter series](https://leandrotk.github.io/series/building-an-interpreter/): 620 | 621 | - [Building an Interpreter: Lexical Analysis - Part 1](https://leandrotk.github.io/series/building-an-interpreter/building-an-interpreter-lexical-analysis-part-1.html) 622 | - [Building an Interpreter: Lexical Analysis - Part 2](https://leandrotk.github.io/series/building-an-interpreter/building-an-interpreter-lexical-analysis-part-2.html) 623 | - [Building an Interpreter: Lexical Analysis - Part 3](https://leandrotk.github.io/series/building-an-interpreter/building-an-interpreter-lexical-analysis-part-3.html) 624 | - [Building an Interpreter: REPL](https://leandrotk.github.io/series/building-an-interpreter/building-an-interpreter-repl.html) 625 | 626 | These are the resources I'm using to learn more about this field: 627 | 628 | - [Crafting an Interpreter](https://github.com/imteekay/crafting-an-interpreter): the open-source project of the compiler for the TypeScript version of the Monkey programming language. 629 | - [Programming Language Theory](https://github.com/leandrotk/programming-language-theory): a bunch of resources about my studies on Programming Language Theory & Applied PLT. 630 | - [Writing an Interpreter in Go](https://www.goodreads.com/book/show/32681092-writing-an-interpreter-in-go): the book I'm reading to learn and implement the Monkey compiler. 631 | --------------------------------------------------------------------------------