├── .gitattributes ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .tours ├── lexer.tour ├── overview.tour └── parser.tour ├── LICENSE ├── README.md ├── baselines └── reference │ ├── basicLex.lex.baseline │ ├── firstLex.lex.baseline │ ├── newlineLex.lex.baseline │ ├── redeclare.errors.baseline │ ├── redeclare.js.baseline │ ├── redeclare.tree.baseline │ ├── semicolonLex.lex.baseline │ ├── singleIdentifier.errors.baseline │ ├── singleIdentifier.js.baseline │ ├── singleIdentifier.tree.baseline │ ├── singleTypedVar.errors.baseline │ ├── singleTypedVar.js.baseline │ ├── singleTypedVar.tree.baseline │ ├── singleVar.errors.baseline │ ├── singleVar.js.baseline │ ├── singleVar.tree.baseline │ ├── twoStatements.errors.baseline │ ├── twoStatements.js.baseline │ ├── twoStatements.tree.baseline │ ├── twoTypedStatements.errors.baseline │ ├── twoTypedStatements.js.baseline │ ├── twoTypedStatements.tree.baseline │ ├── typeAlias.errors.baseline │ ├── typeAlias.js.baseline │ ├── typeAlias.tree.baseline │ ├── underscoreLex.lex.baseline │ └── varLex.lex.baseline ├── package-lock.json ├── package.json ├── src ├── bind.ts ├── check.ts ├── compile.ts ├── emit.ts ├── error.ts ├── index.ts ├── lex.ts ├── parse.ts ├── test.ts ├── transform.ts └── types.ts ├── tests ├── redeclare.ts ├── singleIdentifier.ts ├── singleTypedVar.ts ├── singleVar.ts ├── twoStatements.ts ├── twoTypedStatements.ts └── typeAlias.ts └── tsconfig.json /.gitattributes: -------------------------------------------------------------------------------- 1 | /baselines/reference/* eol=lf -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ubuntu-latest 13 | 14 | strategy: 15 | matrix: 16 | node-version: [15.x] 17 | 18 | steps: 19 | - uses: actions/checkout@v2 20 | - name: Use Node.js ${{ matrix.node-version }} 21 | uses: actions/setup-node@v1 22 | with: 23 | node-version: ${{ matrix.node-version }} 24 | - run: npm ci 25 | - run: npm run build 26 | - run: npm test -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | baselines/local/ 3 | *.js -------------------------------------------------------------------------------- /.tours/lexer.tour: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://aka.ms/codetour-schema", 3 | "title": "Lexer", 4 | "steps": [ 5 | { 6 | "title": "Introduction", 7 | "description": "The lexer is the first phase of the compiler. It prepares the program text for the parser by dividing the characters into categories called *tokens*. For example, `=` is a different token from `==` and `+` is a different token from `++` — the lexer takes care of figuring out which is which in a tricky expression like `x+++x`." 8 | }, 9 | { 10 | "file": "src/types.ts", 11 | "description": "Our first stop is the Token type. You can see that there are basically 4 categories of tokens:\n\n1. Keywords like `function`, `var`, `return`.\n2. 'words' like Literal (`'hi'` and `123`), Whitespace (`' ', '\\t', '\\n'`) and Identifier (`x, y, aLongIdentifier`).\n3. Single characters like `=`, `;`. This would include *digraphs* like `==` and `>=` if mini-typescript supported them, but it doesn't.\n4. Abstract characters like Unknown, BOF (Beginning of File) and EOF (End of File).", 12 | "line": 1 13 | }, 14 | { 15 | "file": "src/types.ts", 16 | "description": "The Lexer type exposes two categories of functions:\n\n1. `scan` advances to the next token.\n2. `token`, `pos`, `text` return the respective parts of the lexer's state.\n\nTypically, the parser calls `token` to decide what to do based on the current token. It might collect additional information with `pos` and `text`, then advances to the next token with `scan`.", 17 | "line": 17 18 | }, 19 | { 20 | "file": "src/types.ts", 21 | "description": "`text` returns the text of word-like tokens -- that is, `'hi'` for strings, `x` for identifiers, `1` for numbers. The parser should only call this function when the *current* token is known to be word-like, because the lexer just keeps the text around from the last word-like token.", 22 | "line": 21 23 | }, 24 | { 25 | "file": "src/lex.ts", 26 | "description": "All the complexity of the lexer is in `scan`. `token`, `pos` and `text` just expose the state of the lexer.", 27 | "line": 13 28 | }, 29 | { 30 | "file": "src/lex.ts", 31 | "description": "`scan` first skips all whitespace by calling the `scanForward` utility.\n\nThe real Typescript lexer retains some information about this step to help with ASI, and it also saves the position before scanning forward.", 32 | "line": 19 33 | }, 34 | { 35 | "file": "src/lex.ts", 36 | "description": "`scanForward` is a `while` loop that increments its position as long as the predicate function is true.\n\nI can't remember if the real Typescript lexer works like this. I don't think it does.", 37 | "line": 44 38 | }, 39 | { 40 | "file": "src/lex.ts", 41 | "description": "mini-typescript's lexer first checks whether it's at the end of the source text and sets `token` to `EOF` if so. That's the signal for the parser to stop parsing.\n\nNotice that `scan` doesn't *return*: it sets `token` instead. mini-typescript is simple enough that it can just run to the end of the function, but Typescript is not.", 42 | "line": 21 43 | }, 44 | { 45 | "file": "src/lex.ts", 46 | "description": "Then it checks for numbers -- mini-typescript only supports integers, so the regexes are simple.\nNote that `scan` sets both `text` and `token` for numbers.\nAlso, mini-typescript doesn't support string, boolean or bigint literals, so it just calls the number literal token `Literal`.", 47 | "line": 24 48 | }, 49 | { 50 | "file": "src/lex.ts", 51 | "description": "Third, the scanner checks for identifiers. This is basically the same as lexing a number.\n\nThe real Typescript lexer uses Unicode classes for identifiers.", 52 | "line": 29 53 | }, 54 | { 55 | "file": "src/lex.ts", 56 | "description": "Identifiers get turned into keywords if they're found as keys in the `keywords` object.", 57 | "line": 32 58 | }, 59 | { 60 | "file": "src/lex.ts", 61 | "description": "Finally, single characters fall through a giant switch statement with all the punctuation tokens.\n\nThe real Typescript lexer handles digraphs like `==` by checking for another `=` in the `=` case. But mini-typescript doesn't support any digraphs.", 62 | "line": 36 63 | }, 64 | { 65 | "file": "src/lex.ts", 66 | "description": "`lexAll` is a convenience function used only for testing: it turns a string into an *array* of tokens instead of an object that lets you iterate a *stream* of tokens. It makes a good, simple example of how to use the lexer.", 67 | "line": 48 68 | }, 69 | { 70 | "file": "src/lex.ts", 71 | "description": "First, create a lexer (plus an array to hold the tokens and a variable to hold the current token).", 72 | "line": 49 73 | }, 74 | { 75 | "file": "src/lex.ts", 76 | "description": "1. Advance to the next token.", 77 | "line": 53 78 | }, 79 | { 80 | "file": "src/lex.ts", 81 | "description": "2. Save the token.", 82 | "line": 54 83 | }, 84 | { 85 | "file": "src/lex.ts", 86 | "description": "3. Quit if the token is `EOF`.", 87 | "line": 56 88 | }, 89 | { 90 | "file": "src/lex.ts", 91 | "description": "4. For word-like tokens, save both the token and its text. Otherwise just save the token.", 92 | "line": 58 93 | } 94 | ] 95 | } -------------------------------------------------------------------------------- /.tours/overview.tour: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://aka.ms/codetour-schema", 3 | "title": "Overview", 4 | "steps": [ 5 | { 6 | "file": "src/index.ts", 7 | "description": "`mini-typescript` is a miniature Typescript compiler intended to teach you how the real Typescript compiler works. It only compiles a tiny slice of the Typescript language, but for that tiny slice, it does almost everything that the real compiler does. (It doesn't cover the language service -- mini-typescript is just a batch compiler.)\n\nThis overview covers all the compiler phases that mini-typescript has. But first, let's look at how the batch compiler `mtsc` works.", 8 | "line": 2 9 | }, 10 | { 11 | "file": "src/index.ts", 12 | "description": "First of all `mtsc` processes arguments. The only allowed syntax is `mtsc file.ts`, so the code is pretty simple.", 13 | "line": 4 14 | }, 15 | { 16 | "file": "src/index.ts", 17 | "description": "Then it reads the file and passes it to `compile`. `compile` returns the parse tree, a list of errors and the output JS file. If mini-typescript had an API, it would return the parse tree, but `mtsc` just ignores it.\n\nLet's look at `compile` next.", 18 | "line": 14 19 | }, 20 | { 21 | "file": "src/compile.ts", 22 | "description": "`mini-typescript` is a pipeline of the following phases:\n\n- Lex\n- Parse\n- Bind\n- Check\n- Transform\n- Emit\n\nThese phases are common to almost all compilers, although they may be combined.\n\nOne important phase that `mini-typescript` omits is module resolution. And both Typescript and `mini-typescript` have relatively simple transform and emit phases, since their target is Javascript, a language that is very close to Typescript.\n\nI'll briefly explain each phase now.", 23 | "line": 10 24 | }, 25 | { 26 | "file": "src/compile.ts", 27 | "description": "The lexer breaks source text down into *tokens*, which represent words and punctuation. Lexing makes it easier for the parser by distinguishing `=` from `==` or `+` from `++`, for example. Some more examples:\n\n* Keywords: `Function`, `Var`, `Let`\n* Punctuation: `Equals`, `EqualsEquals`, `LeftBrace`, `RightBrace`\n* Names: `Identifier`", 28 | "line": 3 29 | }, 30 | { 31 | "file": "src/compile.ts", 32 | "description": "Using the tokens that the lexer produces, the parser builds a *tree* to represent the structure of the program. For example, a function has a name, a list of parameters and a body. The body in turn contains a list of statements. So when a parser sees the Function keyword, it knows the next token should be Identifier, followed by a list of parameters and then a function body. From those 3 parts it produces something like this object:\n\n```ts\n{\n name: \"foo\",\n parameters: [...],\n body: { statements: [...] }\n}\n```\n\nAll the phases after the parser work by visiting each node in the parse tree. That is, for the example function above, there is code to check the function itself, plus code that checks the parameters and the body recursively.", 33 | "line": 4 34 | }, 35 | { 36 | "file": "src/compile.ts", 37 | "description": "The binder produces an *environment*, which is a table that maps names to where they're declared. For example, if you declare `var x = 1`, the binder will record that `x` is declared at `var x = 1`. This lets the checker *resolve* a name to a declaration so it can figure out what its type is.\n\nIn a full language, many constructs have their own environments because they introduce a new *scope*, which is an area where names are valid. For example, functions have parameters and local variables that are only valid inside their body, and classes have properties that behave similarly. In mini-typescript, only modules have an environment because there are no functions or classes.", 38 | "line": 5 39 | }, 40 | { 41 | "file": "src/compile.ts", 42 | "description": "The checker checks each node in the parse tree and issues an error whenever it finds something wrong.\n\nAlthough the checker *mainly* checks types, it can check lots of other things too. Even in mini-typescript, it issues an error for an undeclared identifier. But Typescript's checker has errors for incorrect use of modifiers like `public` and `override`, errors for incorrect imports and even errors for complex incorrect syntax, among many others.", 43 | "line": 6 44 | }, 45 | { 46 | "file": "src/compile.ts", 47 | "description": "The transformer converts the Typescript parse tree into a Javascript parse tree. For mini-typescript, this just means removing type annotations. The same is true for the full Typescript compiler when you target ES Next, but when you target old ES versions like ES2015, the compiler converts new features like object spread into `Object.assign` calls.\n\nNotably, the transformer works with no information from the binder or checker. It just uses the parse tree. In the full Typescript compiler, this is important for fast emit of single files as you edit.", 48 | "line": 7 49 | }, 50 | { 51 | "file": "src/compile.ts", 52 | "description": "The emitter converts a Javascript parse tree into a string. It's basically a giant `toString`.\n\nNotably, the emitter can emit a Typescript parse tree too, since the only difference is that TS has type annotations.", 53 | "line": 8 54 | }, 55 | { 56 | "file": "src/compile.ts", 57 | "description": "That leaves 3 more pieces of the compiler to cover:\n\n1. Types\n2. Errors\n3. Tests", 58 | "line": 2 59 | }, 60 | { 61 | "file": "src/types.ts", 62 | "description": "Typescript puts all of its types into a single file named `types.ts`. They're all here, for every single phase.", 63 | "line": 2 64 | }, 65 | { 66 | "file": "src/error.ts", 67 | "description": "`mini-typescript` maintains a global array of errors that any phase can add to. To avoid dupes and follow-on errors, only the first error at a position is shown. Typescript works similarly, except that it distinguishes between syntax errors (from the parser), semantic errors (from the binder+checker), and suggestions (only shown in the editor).", 68 | "line": 2 69 | }, 70 | { 71 | "file": "src/test.ts", 72 | "description": "`mini-typescript` uses *baselines* for its tests, kind of like Jest snapshots. So does Typescript. Basically, you write a .ts file, and the tests make sure that the compiler's tree, javascript and errors match the ones from the baseline.", 73 | "line": 6 74 | } 75 | ] 76 | } -------------------------------------------------------------------------------- /.tours/parser.tour: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://aka.ms/codetour-schema", 3 | "title": "Parser", 4 | "steps": [ 5 | { 6 | "title": "Introduction", 7 | "description": "The parser builds a *tree* to represent the structure of the program. For example, a function has a name, a list of parameters and a body. The body in turn contains a list of statements. So when a parser sees the Function keyword, it knows the next token should be Identifier, followed by a list of parameters and then a function body. From those 3 parts it produces something like this object:\n\n```ts\n{\n name: \"foo\",\n parameters: [...],\n body: { statements: [...] }\n}\n```\n\nAll the phases after the parser work by visiting each node in the parse tree. For example, to emit a function, the emitter writes \"function\", followed by the function name. Then it recurs on the function's parameters and body." 8 | }, 9 | { 10 | "file": "src/types.ts", 11 | "description": "The `Node` enum is used as the `kind` discriminant for node types. Conveniently, this acts as an index of all the node types that the parser can construct.\n\nYou can see that mini-typescript parses only a few different kinds of nodes.", 12 | "line": 23 13 | }, 14 | { 15 | "file": "src/types.ts", 16 | "description": "`Expression` is a union of all the expression types. For mini-typescript, that's just `Identifier`, `Literal` and `Assignment`.\n\nThe real Typescript compiler doesn't have unions for all its types -- for many it uses interface inheritance instead. That's mostly for historical reasons, because discriminated unions weren't available until Typescript 2.0.", 17 | "line": 38 18 | }, 19 | { 20 | "file": "src/types.ts", 21 | "description": "Similarly, mini-typescript's node types use intersection to add `Location` for error reporting. Typescript uses interface inheritance.", 22 | "line": 39 23 | }, 24 | { 25 | "file": "src/types.ts", 26 | "description": "`Statement` consists of `ExpressionStatement`, `Var` and `TypeAlias`. `ExpressionStatement` is an expression that is used for its side-effects, like `Assignment`: `x = 1`.", 27 | "line": 52 28 | }, 29 | { 30 | "file": "src/types.ts", 31 | "description": "The parser returns a `Module`, which is really just a file. It ambitiously assumes that everyone will soon be writing nothing but modules in mini-typescript.\n\nThe parser only fills in `statements`; the binder fills in `locals`.", 32 | "line": 74 33 | }, 34 | { 35 | "file": "src/parse.ts", 36 | "description": "A module consists of statements, separated by semicolons. `parseSeparated` alternates calling `parseStatement` and `tryParseToken(Token.Semicolon)` until the latter returns false.\nThe final call to `parseExpected(Token.EOF)` logs an error if there's stray text at the end of the file after the last statement.", 37 | "line": 8 38 | }, 39 | { 40 | "file": "src/parse.ts", 41 | "description": "`tryParseToken` shows off basic lexer usage: first it checks whether the lexer's current token is the one expected by the caller. If it is, it advances to the next token. Otherwise it stays put. This lets the caller inspect the lexer's current token and report errors on it.", 42 | "line": 57 43 | }, 44 | { 45 | "file": "src/parse.ts", 46 | "description": "Both mini-typescript and Typescript use a hand-written recursive descent parser. These parser are collections of functions that recursively call each other. For example `parseStatement` recursively calls `parseIdentifier` and `parseExpression`.", 47 | "line": 40 48 | }, 49 | { 50 | "file": "src/parse.ts", 51 | "description": " Then `parseExpression` recursively calls itself for assignments.", 52 | "line": 16 53 | }, 54 | { 55 | "file": "src/parse.ts", 56 | "description": "The easiest grammar rules to parse have unambiguous start tokens. Here, statements that start with a `var` keyword are `Var`, those that start with `type` are `Type`, and all others are `ExpressionStatements`.\n\nThe original rules are obscured by the code in a recursive descent grammar, but if you work you can see them:\n\n```\nStatement -> Expression | 'var' Identifier [':' Identifier] '=' Expression | 'type' Identifer '=' Identifier\nExpression -> *literal* | Identifier ['=' Expression]\n```", 57 | "line": 42 58 | } 59 | ] 60 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Nathan Shively-Sanders 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mini-typescript 2 | A miniature model of the Typescript compiler, intended to teach the structure of the real Typescript compiler 3 | 4 | This project contains two models of the compiler: micro-typescript and centi-typescript. 5 | 6 | micro-typescript started when I started reading [Modern Compiler Implementation in ML](https://www.cs.princeton.edu/~appel/modern/ml/) because I wanted to learn more about compiler backends. When I started building the example compiler I found I disagreed with the implementation of nearly everything in the *frontend*. So I wrote my own, and found that I had just written [a small Typescript](https://github.com/sandersn/minits). 7 | 8 | I realised a small Typescript would be useful to others who want to learn how the Typescript compiler works. So I rewrote it in Typescript and added some exercises to let you practise with it. micro-typescript is the smallest compiler I can imagine, implementing just a tiny slice of Typescript: `var` declarations, assignments and numeric literals. The only two types are `string` and `number`. 9 | 10 | So that's micro-typescript: a textbook compiler that implements a tiny bit of Typescript in a way that's a tiny bit like the Typescript compiler. [centi-typescript](https://github.com/sandersn/mini-typescript/tree/centi-typescript), on the other hand, is a 1/100 scale model of the Typescript compiler. It's intended as a reference in code for peopple who want to see how the Typescript compiler actually works, without the clutter caused by real-life compatibility and requirements. Currently centi-typescript is most complete in the checker, because most of Typescript's complexity is there. 11 | 12 | ### To get set up 13 | 14 | ```sh 15 | git clone https://github.com/sandersn/mini-typescript 16 | cd mini-typescript 17 | code . 18 | 19 | # Get set up 20 | npm i 21 | npm run build 22 | 23 | # Or have your changes instantly happen 24 | npm run build --watch 25 | 26 | # Run the compiler: 27 | npm run mtsc ./tests/singleVar.ts 28 | ``` 29 | 30 | ### To switch to centi-typescript 31 | 32 | ```sh 33 | git checkout centi-typescript 34 | npm run build 35 | ``` 36 | 37 | ## Limitations 38 | 39 | 1. This is an example of the way that Typescript's compiler does things. A compiler textbook will help you learn *compilers*. This project will help you learn *Typescript's code*. 40 | 2. This is only a tiny slice of the language, also unlike a textbook. Often I only put it one instance of a thing, like nodes that introduce a scope, to keep the code size small. 41 | 3. There is no laziness, caching or node reuse, so the checker and transformer code do not teach you those aspects of the design. 42 | 4. There's no surrounding infrastructure, like a language service or a program builder. This is just a model of tsc. 43 | 44 | ## Exercises 45 | 46 | - Add EmptyStatement. 47 | - Make semicolon a statement ender, not statement separator. 48 | - Hint: You'll need a predicate to peek at the next token and decide if it's the start of an element. 49 | - Bonus: Switch from semicolon to newline as statement ender. 50 | - Add string literals. 51 | - Add `let`. 52 | - Make sure the binder resolves variables declared with `var` and `let` the same way. The simplest way is to add a `kind` property to `Symbol`. 53 | - Add use-before-declaration errors in the checker. 54 | - Finally, add an ES2015 -> ES5 transform that transforms `let` to `var`. 55 | - Allow var to have multiple declarations. 56 | - Check that all declarations have the same type. 57 | - Add objects and object types. 58 | - `Type` will need to become more complicated. 59 | - Add `interface`. 60 | - Make sure the binder resolves types declared with `type` and `interface` the same way. 61 | - After the basics are working, allow interface to have multiple declarations. 62 | - Interfaces should have an object type, but that object type should combine the properties from every declaration. 63 | - Add an ES5 transformer that converts `let` -> `var`. 64 | - Add function declarations and function calls. 65 | - Add arrow functions with an appropriate transform in ES5. 66 | -------------------------------------------------------------------------------- /baselines/reference/basicLex.lex.baseline: -------------------------------------------------------------------------------- 1 | [ 2 | [ 3 | "Identifier", 4 | "x" 5 | ] 6 | ] -------------------------------------------------------------------------------- /baselines/reference/firstLex.lex.baseline: -------------------------------------------------------------------------------- 1 | [ 2 | [ 3 | "Literal", 4 | "1200" 5 | ], 6 | [ 7 | "Identifier", 8 | "Hello" 9 | ], 10 | [ 11 | "Identifier", 12 | "World1" 13 | ], 14 | [ 15 | "Unknown" 16 | ], 17 | [ 18 | "Literal", 19 | "14" 20 | ], 21 | [ 22 | "Identifier", 23 | "d" 24 | ] 25 | ] -------------------------------------------------------------------------------- /baselines/reference/newlineLex.lex.baseline: -------------------------------------------------------------------------------- 1 | [ 2 | [ 3 | "Identifier", 4 | "x" 5 | ], 6 | [ 7 | "Identifier", 8 | "y" 9 | ] 10 | ] -------------------------------------------------------------------------------- /baselines/reference/redeclare.errors.baseline: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "pos": 14, 4 | "message": "Cannot redeclare x; first declared at 3" 5 | } 6 | ] -------------------------------------------------------------------------------- /baselines/reference/redeclare.js.baseline: -------------------------------------------------------------------------------- 1 | "var x = 1;\nvar x = 2" -------------------------------------------------------------------------------- /baselines/reference/redeclare.tree.baseline: -------------------------------------------------------------------------------- 1 | { 2 | "locals": { 3 | "x": [ 4 | { 5 | "kind": "Var", 6 | "pos": 3 7 | } 8 | ] 9 | }, 10 | "statements": [ 11 | { 12 | "kind": "Var", 13 | "name": { 14 | "kind": "Identifier", 15 | "text": "x" 16 | }, 17 | "init": { 18 | "kind": "Literal", 19 | "value": 1 20 | } 21 | }, 22 | { 23 | "kind": "Var", 24 | "name": { 25 | "kind": "Identifier", 26 | "text": "x" 27 | }, 28 | "init": { 29 | "kind": "Literal", 30 | "value": 2 31 | } 32 | } 33 | ] 34 | } -------------------------------------------------------------------------------- /baselines/reference/semicolonLex.lex.baseline: -------------------------------------------------------------------------------- 1 | [ 2 | [ 3 | "Identifier", 4 | "x" 5 | ], 6 | [ 7 | "Semicolon" 8 | ], 9 | [ 10 | "Identifier", 11 | "y" 12 | ] 13 | ] -------------------------------------------------------------------------------- /baselines/reference/singleIdentifier.errors.baseline: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "pos": 1, 4 | "message": "Could not resolve x" 5 | } 6 | ] -------------------------------------------------------------------------------- /baselines/reference/singleIdentifier.js.baseline: -------------------------------------------------------------------------------- 1 | "x" -------------------------------------------------------------------------------- /baselines/reference/singleIdentifier.tree.baseline: -------------------------------------------------------------------------------- 1 | { 2 | "locals": {}, 3 | "statements": [ 4 | { 5 | "kind": "ExpressionStatement", 6 | "expr": { 7 | "kind": "Identifier", 8 | "text": "x" 9 | } 10 | } 11 | ] 12 | } -------------------------------------------------------------------------------- /baselines/reference/singleTypedVar.errors.baseline: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "pos": 17, 4 | "message": "Cannot assign initialiser of type 'number' to variable with declared type 'string'." 5 | } 6 | ] -------------------------------------------------------------------------------- /baselines/reference/singleTypedVar.js.baseline: -------------------------------------------------------------------------------- 1 | "var s = 1" -------------------------------------------------------------------------------- /baselines/reference/singleTypedVar.tree.baseline: -------------------------------------------------------------------------------- 1 | { 2 | "locals": { 3 | "s": [ 4 | { 5 | "kind": "Var", 6 | "pos": 3 7 | } 8 | ] 9 | }, 10 | "statements": [ 11 | { 12 | "kind": "Var", 13 | "name": { 14 | "kind": "Identifier", 15 | "text": "s" 16 | }, 17 | "typename": { 18 | "kind": "Identifier", 19 | "text": "string" 20 | }, 21 | "init": { 22 | "kind": "Literal", 23 | "value": 1 24 | } 25 | } 26 | ] 27 | } -------------------------------------------------------------------------------- /baselines/reference/singleVar.errors.baseline: -------------------------------------------------------------------------------- 1 | [] -------------------------------------------------------------------------------- /baselines/reference/singleVar.js.baseline: -------------------------------------------------------------------------------- 1 | "var singleDeclaration = 1" -------------------------------------------------------------------------------- /baselines/reference/singleVar.tree.baseline: -------------------------------------------------------------------------------- 1 | { 2 | "locals": { 3 | "singleDeclaration": [ 4 | { 5 | "kind": "Var", 6 | "pos": 3 7 | } 8 | ] 9 | }, 10 | "statements": [ 11 | { 12 | "kind": "Var", 13 | "name": { 14 | "kind": "Identifier", 15 | "text": "singleDeclaration" 16 | }, 17 | "init": { 18 | "kind": "Literal", 19 | "value": 1 20 | } 21 | } 22 | ] 23 | } -------------------------------------------------------------------------------- /baselines/reference/twoStatements.errors.baseline: -------------------------------------------------------------------------------- 1 | [] -------------------------------------------------------------------------------- /baselines/reference/twoStatements.js.baseline: -------------------------------------------------------------------------------- 1 | "var arthurTwoShedsJackson = 1;\narthurTwoShedsJackson = 2" -------------------------------------------------------------------------------- /baselines/reference/twoStatements.tree.baseline: -------------------------------------------------------------------------------- 1 | { 2 | "locals": { 3 | "arthurTwoShedsJackson": [ 4 | { 5 | "kind": "Var", 6 | "pos": 3 7 | } 8 | ] 9 | }, 10 | "statements": [ 11 | { 12 | "kind": "Var", 13 | "name": { 14 | "kind": "Identifier", 15 | "text": "arthurTwoShedsJackson" 16 | }, 17 | "init": { 18 | "kind": "Literal", 19 | "value": 1 20 | } 21 | }, 22 | { 23 | "kind": "ExpressionStatement", 24 | "expr": { 25 | "kind": "Assignment", 26 | "name": { 27 | "kind": "Identifier", 28 | "text": "arthurTwoShedsJackson" 29 | }, 30 | "value": { 31 | "kind": "Literal", 32 | "value": 2 33 | } 34 | } 35 | } 36 | ] 37 | } -------------------------------------------------------------------------------- /baselines/reference/twoTypedStatements.errors.baseline: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "pos": 17, 4 | "message": "Cannot assign initialiser of type 'number' to variable with declared type 'string'." 5 | }, 6 | { 7 | "pos": 24, 8 | "message": "Cannot assign value of type 'number' to variable of type 'string'." 9 | } 10 | ] -------------------------------------------------------------------------------- /baselines/reference/twoTypedStatements.js.baseline: -------------------------------------------------------------------------------- 1 | "var s = 1;\ns = 2" -------------------------------------------------------------------------------- /baselines/reference/twoTypedStatements.tree.baseline: -------------------------------------------------------------------------------- 1 | { 2 | "locals": { 3 | "s": [ 4 | { 5 | "kind": "Var", 6 | "pos": 3 7 | } 8 | ] 9 | }, 10 | "statements": [ 11 | { 12 | "kind": "Var", 13 | "name": { 14 | "kind": "Identifier", 15 | "text": "s" 16 | }, 17 | "typename": { 18 | "kind": "Identifier", 19 | "text": "string" 20 | }, 21 | "init": { 22 | "kind": "Literal", 23 | "value": 1 24 | } 25 | }, 26 | { 27 | "kind": "ExpressionStatement", 28 | "expr": { 29 | "kind": "Assignment", 30 | "name": { 31 | "kind": "Identifier", 32 | "text": "s" 33 | }, 34 | "value": { 35 | "kind": "Literal", 36 | "value": 2 37 | } 38 | } 39 | } 40 | ] 41 | } -------------------------------------------------------------------------------- /baselines/reference/typeAlias.errors.baseline: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "pos": 76, 4 | "message": "Cannot redeclare Not; first declared at 57" 5 | }, 6 | { 7 | "pos": 124, 8 | "message": "Cannot assign initialiser of type 'number' to variable with declared type 'string'." 9 | }, 10 | { 11 | "pos": 153, 12 | "message": "Could not resolve type Nut" 13 | }, 14 | { 15 | "pos": 192, 16 | "message": "Could not resolve Net" 17 | } 18 | ] -------------------------------------------------------------------------------- /baselines/reference/typeAlias.js.baseline: -------------------------------------------------------------------------------- 1 | "var Nat = 12;\nvar nat = 13;\nvar nit = 14;\nvar Nut = 15;\nvar nut = 16;\nvar net = Net" -------------------------------------------------------------------------------- /baselines/reference/typeAlias.tree.baseline: -------------------------------------------------------------------------------- 1 | { 2 | "locals": { 3 | "Nat": [ 4 | { 5 | "kind": "TypeAlias", 6 | "pos": 4 7 | }, 8 | { 9 | "kind": "Var", 10 | "pos": 22 11 | } 12 | ], 13 | "nat": [ 14 | { 15 | "kind": "Var", 16 | "pos": 36 17 | } 18 | ], 19 | "Not": [ 20 | { 21 | "kind": "TypeAlias", 22 | "pos": 57 23 | } 24 | ], 25 | "Nit": [ 26 | { 27 | "kind": "TypeAlias", 28 | "pos": 95 29 | } 30 | ], 31 | "nit": [ 32 | { 33 | "kind": "Var", 34 | "pos": 110 35 | } 36 | ], 37 | "Nut": [ 38 | { 39 | "kind": "Var", 40 | "pos": 130 41 | } 42 | ], 43 | "nut": [ 44 | { 45 | "kind": "Var", 46 | "pos": 144 47 | } 48 | ], 49 | "Net": [ 50 | { 51 | "kind": "TypeAlias", 52 | "pos": 164 53 | } 54 | ], 55 | "net": [ 56 | { 57 | "kind": "Var", 58 | "pos": 182 59 | } 60 | ] 61 | }, 62 | "statements": [ 63 | { 64 | "kind": "TypeAlias", 65 | "name": { 66 | "kind": "Identifier", 67 | "text": "Nat" 68 | }, 69 | "typename": { 70 | "kind": "Identifier", 71 | "text": "number" 72 | } 73 | }, 74 | { 75 | "kind": "Var", 76 | "name": { 77 | "kind": "Identifier", 78 | "text": "Nat" 79 | }, 80 | "init": { 81 | "kind": "Literal", 82 | "value": 12 83 | } 84 | }, 85 | { 86 | "kind": "Var", 87 | "name": { 88 | "kind": "Identifier", 89 | "text": "nat" 90 | }, 91 | "typename": { 92 | "kind": "Identifier", 93 | "text": "Nat" 94 | }, 95 | "init": { 96 | "kind": "Literal", 97 | "value": 13 98 | } 99 | }, 100 | { 101 | "kind": "TypeAlias", 102 | "name": { 103 | "kind": "Identifier", 104 | "text": "Not" 105 | }, 106 | "typename": { 107 | "kind": "Identifier", 108 | "text": "string" 109 | } 110 | }, 111 | { 112 | "kind": "TypeAlias", 113 | "name": { 114 | "kind": "Identifier", 115 | "text": "Not" 116 | }, 117 | "typename": { 118 | "kind": "Identifier", 119 | "text": "number" 120 | } 121 | }, 122 | { 123 | "kind": "TypeAlias", 124 | "name": { 125 | "kind": "Identifier", 126 | "text": "Nit" 127 | }, 128 | "typename": { 129 | "kind": "Identifier", 130 | "text": "Not" 131 | } 132 | }, 133 | { 134 | "kind": "Var", 135 | "name": { 136 | "kind": "Identifier", 137 | "text": "nit" 138 | }, 139 | "typename": { 140 | "kind": "Identifier", 141 | "text": "Not" 142 | }, 143 | "init": { 144 | "kind": "Literal", 145 | "value": 14 146 | } 147 | }, 148 | { 149 | "kind": "Var", 150 | "name": { 151 | "kind": "Identifier", 152 | "text": "Nut" 153 | }, 154 | "init": { 155 | "kind": "Literal", 156 | "value": 15 157 | } 158 | }, 159 | { 160 | "kind": "Var", 161 | "name": { 162 | "kind": "Identifier", 163 | "text": "nut" 164 | }, 165 | "typename": { 166 | "kind": "Identifier", 167 | "text": "Nut" 168 | }, 169 | "init": { 170 | "kind": "Literal", 171 | "value": 16 172 | } 173 | }, 174 | { 175 | "kind": "TypeAlias", 176 | "name": { 177 | "kind": "Identifier", 178 | "text": "Net" 179 | }, 180 | "typename": { 181 | "kind": "Identifier", 182 | "text": "number" 183 | } 184 | }, 185 | { 186 | "kind": "Var", 187 | "name": { 188 | "kind": "Identifier", 189 | "text": "net" 190 | }, 191 | "init": { 192 | "kind": "Identifier", 193 | "text": "Net" 194 | } 195 | } 196 | ] 197 | } -------------------------------------------------------------------------------- /baselines/reference/underscoreLex.lex.baseline: -------------------------------------------------------------------------------- 1 | [ 2 | [ 3 | "Identifier", 4 | "x_y" 5 | ], 6 | [ 7 | "Identifier", 8 | "is" 9 | ], 10 | [ 11 | "Identifier", 12 | "_aSingle" 13 | ], 14 | [ 15 | "Identifier", 16 | "Identifier_" 17 | ] 18 | ] -------------------------------------------------------------------------------- /baselines/reference/varLex.lex.baseline: -------------------------------------------------------------------------------- 1 | [ 2 | [ 3 | "Var" 4 | ], 5 | [ 6 | "Identifier", 7 | "x" 8 | ], 9 | [ 10 | "Equals" 11 | ], 12 | [ 13 | "Literal", 14 | "1" 15 | ] 16 | ] -------------------------------------------------------------------------------- /package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mini-typescript", 3 | "version": "1.0.0", 4 | "lockfileVersion": 2, 5 | "requires": true, 6 | "packages": { 7 | "": { 8 | "name": "mini-typescript", 9 | "version": "1.0.0", 10 | "license": "MIT", 11 | "devDependencies": { 12 | "@types/node": "^20.2.5", 13 | "typescript": "latest" 14 | } 15 | }, 16 | "node_modules/@types/node": { 17 | "version": "20.2.5", 18 | "resolved": "https://registry.npmjs.org/@types/node/-/node-20.2.5.tgz", 19 | "integrity": "sha512-JJulVEQXmiY9Px5axXHeYGLSjhkZEnD+MDPDGbCbIAbMslkKwmygtZFy1X6s/075Yo94sf8GuSlFfPzysQrWZQ==", 20 | "dev": true 21 | }, 22 | "node_modules/typescript": { 23 | "version": "4.3.2", 24 | "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.3.2.tgz", 25 | "integrity": "sha512-zZ4hShnmnoVnAHpVHWpTcxdv7dWP60S2FsydQLV8V5PbS3FifjWFFRiHSWpDJahly88PRyV5teTSLoq4eG7mKw==", 26 | "dev": true, 27 | "bin": { 28 | "tsc": "bin/tsc", 29 | "tsserver": "bin/tsserver" 30 | }, 31 | "engines": { 32 | "node": ">=4.2.0" 33 | } 34 | } 35 | }, 36 | "dependencies": { 37 | "@types/node": { 38 | "version": "20.2.5", 39 | "resolved": "https://registry.npmjs.org/@types/node/-/node-20.2.5.tgz", 40 | "integrity": "sha512-JJulVEQXmiY9Px5axXHeYGLSjhkZEnD+MDPDGbCbIAbMslkKwmygtZFy1X6s/075Yo94sf8GuSlFfPzysQrWZQ==", 41 | "dev": true 42 | }, 43 | "typescript": { 44 | "version": "4.3.2", 45 | "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.3.2.tgz", 46 | "integrity": "sha512-zZ4hShnmnoVnAHpVHWpTcxdv7dWP60S2FsydQLV8V5PbS3FifjWFFRiHSWpDJahly88PRyV5teTSLoq4eG7mKw==", 47 | "dev": true 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mini-typescript", 3 | "version": "1.0.0", 4 | "description": "A miniature model of the TypeScript compiler", 5 | "main": "compiler/index.js", 6 | "scripts": { 7 | "build": "tsc", 8 | "test": "rm baselines/local/*; tsc && node built/test.js", 9 | "accept": "mv baselines/local/* baselines/reference/", 10 | "mtsc": "node built/index.js" 11 | }, 12 | "repository": { 13 | "type": "git", 14 | "url": "git+https://github.com/sandersn/mini-typescript.git" 15 | }, 16 | "keywords": [ 17 | "TypeScript", 18 | "compiler", 19 | "miniature", 20 | "model", 21 | "example", 22 | "teaching" 23 | ], 24 | "author": "Nathan Shively-Sanders", 25 | "license": "MIT", 26 | "bugs": { 27 | "url": "https://github.com/sandersn/mini-typescript/issues" 28 | }, 29 | "homepage": "https://github.com/sandersn/mini-typescript#readme", 30 | "devDependencies": { 31 | "@types/node": "^20.2.5", 32 | "typescript": "latest" 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/bind.ts: -------------------------------------------------------------------------------- 1 | import { Module, Node, Statement, Table } from './types' 2 | import { error } from './error' 3 | export function bind(m: Module) { 4 | for (const statement of m.statements) { 5 | bindStatement(m.locals, statement) 6 | } 7 | 8 | function bindStatement(locals: Table, statement: Statement) { 9 | if (statement.kind === Node.Var || statement.kind === Node.TypeAlias) { 10 | const symbol = locals.get(statement.name.text) 11 | if (symbol) { 12 | const other = symbol.declarations.find(d => d.kind === statement.kind) 13 | if (other) { 14 | error(statement.pos, `Cannot redeclare ${statement.name.text}; first declared at ${other.pos}`) 15 | } 16 | else { 17 | symbol.declarations.push(statement) 18 | if (statement.kind === Node.Var) { 19 | symbol.valueDeclaration = statement 20 | } 21 | } 22 | } 23 | else { 24 | locals.set(statement.name.text, { 25 | declarations: [statement], 26 | valueDeclaration: statement.kind === Node.Var ? statement : undefined 27 | }) 28 | } 29 | } 30 | } 31 | } 32 | export function resolve(locals: Table, name: string, meaning: Node.Var | Node.TypeAlias) { 33 | const symbol = locals.get(name) 34 | if (symbol?.declarations.some(d => d.kind === meaning)) { 35 | return symbol 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/check.ts: -------------------------------------------------------------------------------- 1 | import { Module, Statement, Type, Node, Expression, Identifier, TypeAlias } from './types' 2 | import { error } from './error' 3 | import { resolve } from './bind' 4 | const stringType: Type = { id: "string" } 5 | const numberType: Type = { id: "number" } 6 | const errorType: Type = { id: "error" } 7 | function typeToString(type: Type) { 8 | return type.id 9 | } 10 | export function check(module: Module) { 11 | return module.statements.map(checkStatement) 12 | 13 | function checkStatement(statement: Statement): Type { 14 | switch (statement.kind) { 15 | case Node.ExpressionStatement: 16 | return checkExpression(statement.expr) 17 | case Node.Var: 18 | const i = checkExpression(statement.init) 19 | if (!statement.typename) { 20 | return i 21 | } 22 | const t = checkType(statement.typename) 23 | if (t !== i && t !== errorType) 24 | error(statement.init.pos, `Cannot assign initialiser of type '${typeToString(i)}' to variable with declared type '${typeToString(t)}'.`) 25 | return t 26 | case Node.TypeAlias: 27 | return checkType(statement.typename) 28 | } 29 | } 30 | function checkExpression(expression: Expression): Type { 31 | switch (expression.kind) { 32 | case Node.Identifier: 33 | const symbol = resolve(module.locals, expression.text, Node.Var) 34 | if (symbol) { 35 | return checkStatement(symbol.valueDeclaration!) 36 | } 37 | error(expression.pos, "Could not resolve " + expression.text) 38 | return errorType 39 | case Node.Literal: 40 | return numberType 41 | case Node.Assignment: 42 | const v = checkExpression(expression.value) 43 | const t = checkExpression(expression.name) 44 | if (t !== v) 45 | error(expression.value.pos, `Cannot assign value of type '${typeToString(v)}' to variable of type '${typeToString(t)}'.`) 46 | return t 47 | } 48 | } 49 | function checkType(name: Identifier): Type { 50 | switch (name.text) { 51 | case "string": 52 | return stringType 53 | case "number": 54 | return numberType 55 | default: 56 | const symbol = resolve(module.locals, name.text, Node.TypeAlias) 57 | if (symbol) { 58 | return checkType((symbol.declarations.find(d => d.kind === Node.TypeAlias) as TypeAlias).typename) 59 | } 60 | error(name.pos, "Could not resolve type " + name.text) 61 | return errorType 62 | } 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/compile.ts: -------------------------------------------------------------------------------- 1 | import { Error, Module } from "./types" 2 | import { errors } from './error' 3 | import { lex } from "./lex" 4 | import { parse } from "./parse" 5 | import { bind } from "./bind" 6 | import { check } from "./check" 7 | import { transform } from "./transform" 8 | import { emit } from "./emit" 9 | 10 | export function compile(s: string): [Module, Error[], string] { 11 | errors.clear() 12 | const tree = parse(lex(s)) 13 | bind(tree) 14 | check(tree) 15 | const js = emit(transform(tree.statements)) 16 | return [tree, Array.from(errors.values()), js] 17 | } 18 | -------------------------------------------------------------------------------- /src/emit.ts: -------------------------------------------------------------------------------- 1 | import { Statement, Node, Expression } from './types' 2 | export function emit(statements: Statement[]) { 3 | return statements.map(emitStatement).join(";\n") 4 | } 5 | function emitStatement(statement: Statement): string { 6 | switch (statement.kind) { 7 | case Node.ExpressionStatement: 8 | return emitExpression(statement.expr) 9 | case Node.Var: 10 | const typestring = statement.typename ? ": " + statement.name : "" 11 | return `var ${statement.name.text}${typestring} = ${emitExpression(statement.init)}` 12 | case Node.TypeAlias: 13 | return `type ${statement.name.text} = ${statement.typename.text}` 14 | } 15 | } 16 | function emitExpression(expression: Expression): string { 17 | switch (expression.kind) { 18 | case Node.Identifier: 19 | return expression.text 20 | case Node.Literal: 21 | return ""+expression.value 22 | case Node.Assignment: 23 | return `${expression.name.text} = ${emitExpression(expression.value)}` 24 | } 25 | } 26 | 27 | -------------------------------------------------------------------------------- /src/error.ts: -------------------------------------------------------------------------------- 1 | import { Error } from './types' 2 | export const errors: Map = new Map() 3 | export function error(pos: number, message: string) { 4 | if (!errors.has(pos)) { 5 | errors.set(pos, { pos, message }) 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | import fs = require('fs') 2 | import { compile } from './compile' 3 | 4 | const args = process.argv.slice(2); 5 | const title = (str: string) => console.log('\x1b[1m%s\x1b[0m', str); 6 | 7 | if (!args.length) { 8 | console.error("Expected a path to a TS file as the argument") 9 | process.exit(1) 10 | } 11 | 12 | title(`Looking at: ${args[0]}\n`) 13 | const ts = fs.readFileSync(args[0], 'utf8') 14 | const [_tree, errors, js] = compile(ts) 15 | 16 | 17 | title("> TS input:") 18 | console.log(ts) 19 | 20 | if (errors.length) { 21 | title("> Errors:") 22 | console.log(errors) 23 | } 24 | 25 | title("> Output:") 26 | console.log(js) 27 | 28 | // Print errors, write js to file 29 | fs.writeFileSync(args[0] + '.js', js) 30 | -------------------------------------------------------------------------------- /src/lex.ts: -------------------------------------------------------------------------------- 1 | import { Token, Lexer } from './types' 2 | const keywords = { 3 | "function": Token.Function, 4 | "var": Token.Var, 5 | "type": Token.Type, 6 | "return": Token.Return, 7 | } 8 | export function lex(s: string): Lexer { 9 | let pos = 0 10 | let text = "" 11 | let token = Token.BOF 12 | return { 13 | scan, 14 | token: () => token, 15 | pos: () => pos, 16 | text: () => text, 17 | } 18 | function scan() { 19 | scanForward(c => /[ \t\b\n]/.test(c)) 20 | const start = pos 21 | if (pos === s.length) { 22 | token = Token.EOF 23 | } 24 | else if (/[0-9]/.test(s.charAt(pos))) { 25 | scanForward(c => /[0-9]/.test(c)) 26 | text = s.slice(start, pos) 27 | token = Token.Literal 28 | } 29 | else if (/[_a-zA-Z]/.test(s.charAt(pos))) { 30 | scanForward(c => /[_a-zA-Z0-9]/.test(c)) 31 | text = s.slice(start, pos) 32 | token = text in keywords ? keywords[text as keyof typeof keywords] : Token.Identifier 33 | } 34 | else { 35 | pos++ 36 | switch (s.charAt(pos - 1)) { 37 | case '=': token = Token.Equals; break 38 | case ';': token = Token.Semicolon; break 39 | case ":": token = Token.Colon; break 40 | default: token = Token.Unknown; break 41 | } 42 | } 43 | } 44 | function scanForward(pred: (x: string) => boolean) { 45 | while (pos < s.length && pred(s.charAt(pos))) pos++ 46 | } 47 | } 48 | export function lexAll(s: string) { 49 | const lexer = lex(s) 50 | let tokens = [] 51 | let t 52 | while(true) { 53 | lexer.scan() 54 | t = lexer.token() 55 | switch (t) { 56 | case Token.EOF: 57 | return tokens 58 | case Token.Identifier: 59 | case Token.Literal: 60 | tokens.push({ token: t, text: lexer.text() }) 61 | break 62 | default: 63 | tokens.push({ token: t }) 64 | break 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/parse.ts: -------------------------------------------------------------------------------- 1 | import { Lexer, Token, Node, Statement, Identifier, Expression, Module } from './types' 2 | import { error } from './error' 3 | export function parse(lexer: Lexer): Module { 4 | lexer.scan() 5 | return parseModule() 6 | 7 | function parseModule(): Module { 8 | const statements = parseSeparated(parseStatement, () => tryParseToken(Token.Semicolon)) 9 | parseExpected(Token.EOF) 10 | return { statements, locals: new Map() } 11 | } 12 | function parseExpression(): Expression { 13 | const pos = lexer.pos() 14 | const e = parseIdentifierOrLiteral() 15 | if (e.kind === Node.Identifier && tryParseToken(Token.Equals)) { 16 | return { kind: Node.Assignment, name: e, value: parseExpression(), pos } 17 | } 18 | return e 19 | } 20 | function parseIdentifierOrLiteral(): Expression { 21 | const pos = lexer.pos() 22 | if (tryParseToken(Token.Identifier)) { 23 | return { kind: Node.Identifier, text: lexer.text(), pos } 24 | } 25 | else if (tryParseToken(Token.Literal)) { 26 | return { kind: Node.Literal, value: +lexer.text(), pos } 27 | } 28 | error(pos, "Expected identifier or literal but got " + Token[lexer.token()]) 29 | lexer.scan() 30 | return { kind: Node.Identifier, text: "(missing)", pos } 31 | } 32 | function parseIdentifier(): Identifier { 33 | const e = parseIdentifierOrLiteral() 34 | if (e.kind === Node.Identifier) { 35 | return e 36 | } 37 | error(e.pos, "Expected identifier but got a literal") 38 | return { kind: Node.Identifier, text: "(missing)", pos: e.pos } 39 | } 40 | function parseStatement(): Statement { 41 | const pos = lexer.pos() 42 | if (tryParseToken(Token.Var)) { 43 | const name = parseIdentifier() 44 | const typename = tryParseToken(Token.Colon) ? parseIdentifier() : undefined 45 | parseExpected(Token.Equals) 46 | const init = parseExpression() 47 | return { kind: Node.Var, name, typename, init, pos } 48 | } 49 | else if (tryParseToken(Token.Type)) { 50 | const name = parseIdentifier() 51 | parseExpected(Token.Equals) 52 | const typename = parseIdentifier() 53 | return { kind: Node.TypeAlias, name, typename, pos } 54 | } 55 | return { kind: Node.ExpressionStatement, expr: parseExpression(), pos } 56 | } 57 | function tryParseToken(expected: Token) { 58 | const ok = lexer.token() === expected 59 | if (ok) { 60 | lexer.scan() 61 | } 62 | return ok 63 | } 64 | function parseExpected(expected: Token) { 65 | if (!tryParseToken(expected)) { 66 | error(lexer.pos(), `parseToken: Expected ${Token[expected]} but got ${Token[lexer.token()]}`) 67 | } 68 | } 69 | function parseSeparated(element: () => T, separator: () => unknown) { 70 | const list = [element()] 71 | while (separator()) { 72 | list.push(element()) 73 | } 74 | return list 75 | } 76 | } -------------------------------------------------------------------------------- /src/test.ts: -------------------------------------------------------------------------------- 1 | import * as fs from 'fs' 2 | import { Module, Statement, Token, Node, Identifier, Expression, Table } from './types' 3 | import { lexAll } from './lex' 4 | import { compile } from './compile' 5 | 6 | const args = process.argv.slice(2); 7 | const write = args.includes("--write") 8 | 9 | const strong = (str: string) => console.log('\x1b[1m%s\x1b[0m', str); 10 | 11 | function test(kind: string, name: string, value: unknown) { 12 | const reference = `baselines/reference/${name}.${kind}.baseline` 13 | const local = `baselines/local/${name}.${kind}.baseline` 14 | const actual = JSON.stringify(value, undefined, 2) 15 | const expected = fs.existsSync(reference) ? fs.readFileSync(reference, "utf8") : "" 16 | if (actual !== expected) { 17 | if (!fs.existsSync("./baselines/local")) fs.mkdirSync("./baselines/local") 18 | fs.writeFileSync(local, actual) 19 | 20 | strong(`${name} failed: Expected baselines to match`) 21 | if (actual && expected) { 22 | console.log(` - result - ${local}`) 23 | console.log(` - expected - ${reference}`) 24 | console.log(` - run: diff ${local} ${reference}`) 25 | } else if (actual && !expected) { 26 | console.log(` - result - ${local}`) 27 | console.log(` - missing - ${reference}`) 28 | if (!write) { 29 | console.log(` - run with '--write' to update the baselines`) 30 | } else { 31 | console.log(` - updated baselines`) 32 | fs.writeFileSync(reference, actual) 33 | } 34 | } 35 | console.log(``) 36 | return 1 37 | } 38 | return 0 39 | } 40 | 41 | function sum(ns: number[]) { 42 | let total = 0 43 | for (const n of ns) total += n 44 | return total 45 | } 46 | const lexTests = { 47 | "basicLex": "x", 48 | "firstLex": " 1200Hello World1! 14d", 49 | "underscoreLex": "x_y is _aSingle Identifier_", 50 | "varLex": "var x = 1", 51 | "semicolonLex": "x; y", 52 | "newlineLex": "x\n y \n" , 53 | } 54 | let lexResult = sum(Object.entries(lexTests).map( 55 | ([name, text]) => test("lex", name, lexAll(text).map(t => t.text ? [Token[t.token], t.text] : [Token[t.token]])))) 56 | let compileResult = sum(fs.readdirSync("tests").map(file => { 57 | const [tree, errors, js] = compile(fs.readFileSync("tests/" + file, 'utf8')) 58 | const name = file.slice(0, file.length - 3) 59 | return test("tree", name, displayModule(tree)) 60 | + test("errors", name, errors) 61 | + test("js", name, js) 62 | })) 63 | function displayModule(m: Module) { 64 | return { locals: displayTable(m.locals), statements: m.statements.map(display) } 65 | } 66 | function displayTable(table: Table) { 67 | const o = {} as any 68 | for (const [k,v] of table) { 69 | o[k] = v.declarations.map(({ kind, pos }) => ({ kind: Node[kind], pos })) 70 | } 71 | return o 72 | } 73 | function display(o: any) { 74 | const o2 = {} as any 75 | for (const k in o) { 76 | if (k === 'pos') continue 77 | else if (k === 'kind') o2[k] = Node[o.kind] 78 | else if (typeof o[k] === 'object') o2[k] = display(o[k]) 79 | else o2[k] = o[k] 80 | } 81 | return o2 82 | } 83 | 84 | let result = lexResult + compileResult 85 | if (result === 0) { 86 | strong("All tests passed") 87 | } 88 | else { 89 | console.log(result, "tests failed.") 90 | } 91 | console.log("") 92 | process.exit(result) 93 | -------------------------------------------------------------------------------- /src/transform.ts: -------------------------------------------------------------------------------- 1 | import { Statement, Node } from './types' 2 | export function transform(statements: Statement[]) { 3 | return typescript(statements) 4 | } 5 | /** Convert TS to JS: remove type annotations and declarations */ 6 | function typescript(statements: Statement[]) { 7 | return statements.flatMap(transformStatement) 8 | 9 | function transformStatement(statement: Statement): Statement[] { 10 | switch (statement.kind) { 11 | case Node.ExpressionStatement: 12 | return [statement] 13 | case Node.Var: 14 | return [{ ...statement, typename: undefined }] 15 | case Node.TypeAlias: 16 | return [] 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/types.ts: -------------------------------------------------------------------------------- 1 | export enum Token { 2 | Function, 3 | Var, 4 | Type, 5 | Return, 6 | Equals, 7 | Literal, 8 | Identifier, 9 | Newline, 10 | Semicolon, 11 | Colon, 12 | Whitespace, 13 | Unknown, 14 | BOF, 15 | EOF, 16 | } 17 | export type Lexer = { 18 | scan(): void 19 | token(): Token 20 | pos(): number 21 | text(): string 22 | } 23 | export enum Node { 24 | Identifier, 25 | Literal, 26 | Assignment, 27 | ExpressionStatement, 28 | Var, 29 | TypeAlias, 30 | } 31 | export type Error = { 32 | pos: number 33 | message: string 34 | } 35 | export interface Location { 36 | pos: number 37 | } 38 | export type Expression = Identifier | Literal | Assignment 39 | export type Identifier = Location & { 40 | kind: Node.Identifier 41 | text: string 42 | } 43 | export type Literal = Location & { 44 | kind: Node.Literal 45 | value: number 46 | } 47 | export type Assignment = Location & { 48 | kind: Node.Assignment 49 | name: Identifier 50 | value: Expression 51 | } 52 | export type Statement = ExpressionStatement | Var | TypeAlias 53 | export type ExpressionStatement = Location & { 54 | kind: Node.ExpressionStatement 55 | expr: Expression 56 | } 57 | export type Var = Location & { 58 | kind: Node.Var 59 | name: Identifier 60 | typename?: Identifier | undefined 61 | init: Expression 62 | } 63 | export type TypeAlias = Location & { 64 | kind: Node.TypeAlias 65 | name: Identifier 66 | typename: Identifier 67 | } 68 | export type Declaration = Var | TypeAlias // plus others, like function 69 | export type Symbol = { 70 | valueDeclaration: Declaration | undefined 71 | declarations: Declaration[] 72 | } 73 | export type Table = Map 74 | export type Module = { 75 | locals: Table 76 | statements: Statement[] 77 | } 78 | export type Type = { id: string } -------------------------------------------------------------------------------- /tests/redeclare.ts: -------------------------------------------------------------------------------- 1 | var x = 1; 2 | var x = 2 3 | -------------------------------------------------------------------------------- /tests/singleIdentifier.ts: -------------------------------------------------------------------------------- 1 | x 2 | -------------------------------------------------------------------------------- /tests/singleTypedVar.ts: -------------------------------------------------------------------------------- 1 | var s: string = 1 2 | -------------------------------------------------------------------------------- /tests/singleVar.ts: -------------------------------------------------------------------------------- 1 | var singleDeclaration = 1 -------------------------------------------------------------------------------- /tests/twoStatements.ts: -------------------------------------------------------------------------------- 1 | var arthurTwoShedsJackson = 1; 2 | arthurTwoShedsJackson = 2 3 | -------------------------------------------------------------------------------- /tests/twoTypedStatements.ts: -------------------------------------------------------------------------------- 1 | var s: string = 1; 2 | s = 2 3 | -------------------------------------------------------------------------------- /tests/typeAlias.ts: -------------------------------------------------------------------------------- 1 | type Nat = number; 2 | var Nat = 12; 3 | var nat: Nat = 13; 4 | 5 | type Not = string; 6 | type Not = number; 7 | type Nit = Not; 8 | var nit: Not = 14; 9 | 10 | var Nut = 15; 11 | var nut: Nut = 16; 12 | type Net = number; 13 | var net = Net -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "commonjs", 4 | "strict": true, 5 | "target": "esnext", 6 | "lib": ["es2020", "dom"], 7 | "outDir": "built" 8 | }, 9 | "include": [ 10 | "src" 11 | ] 12 | } 13 | --------------------------------------------------------------------------------