├── .eslintrc.js ├── .github └── workflows │ ├── main.yml │ └── size.yml ├── .gitignore ├── LICENSE ├── notes.md ├── package-lock.json ├── package.json ├── readme.md ├── src ├── core.test.ts ├── core.ts ├── examples │ ├── data-expressions.ts │ ├── itself.ts │ ├── range.ts │ ├── regex.ts │ └── vanilla.ts ├── index.ts ├── lang.test.ts ├── lang.ts ├── lexer.test.ts ├── lexer.ts ├── operator.test.ts ├── operator.ts ├── parser-combinators.ts ├── parser-ll.test.ts ├── parser-ll.ts ├── simplifier.test.ts ├── simplifier.ts ├── tag.test.ts ├── tag.ts ├── util.test.ts └── util.ts └── tsconfig.json /.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | env: { 3 | browser: true, 4 | es2020: true, 5 | node: true, 6 | }, 7 | extends: ['eslint:recommended', 'plugin:@typescript-eslint/recommended'], 8 | parser: '@typescript-eslint/parser', 9 | parserOptions: { 10 | ecmaVersion: 12, 11 | sourceType: 'module', 12 | }, 13 | plugins: ['@typescript-eslint'], 14 | rules: { 15 | 'prettier/prettier': ['off'], 16 | '@typescript-eslint/no-unused-vars': ['error', { argsIgnorePattern: '^_' }], 17 | '@typescript-eslint/no-non-null-assertion': ['off'], 18 | '@typescript-eslint/explicit-function-return-type': ['off'], 19 | '@typescript-eslint/no-explicit-any': ['off'], 20 | '@typescript-eslint/explicit-module-boundary-types': ['off'], 21 | }, 22 | }; 23 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: [push] 3 | jobs: 4 | build: 5 | name: Build, lint, and test on Node ${{ matrix.node }} and ${{ matrix.os }} 6 | 7 | runs-on: ${{ matrix.os }} 8 | strategy: 9 | matrix: 10 | node: ['10.x', '12.x', '14.x'] 11 | os: [ubuntu-latest, windows-latest, macOS-latest] 12 | 13 | steps: 14 | - name: Checkout repo 15 | uses: actions/checkout@v2 16 | 17 | - name: Use Node ${{ matrix.node }} 18 | uses: actions/setup-node@v1 19 | with: 20 | node-version: ${{ matrix.node }} 21 | 22 | - name: Install deps and build (with cache) 23 | uses: bahmutov/npm-install@v1 24 | 25 | - name: Lint 26 | run: yarn lint 27 | 28 | - name: Test 29 | run: yarn test --ci --coverage --maxWorkers=2 30 | 31 | - name: Build 32 | run: yarn build 33 | -------------------------------------------------------------------------------- /.github/workflows/size.yml: -------------------------------------------------------------------------------- 1 | name: size 2 | on: [pull_request] 3 | jobs: 4 | size: 5 | runs-on: ubuntu-latest 6 | env: 7 | CI_JOB_NUMBER: 1 8 | steps: 9 | - uses: actions/checkout@v1 10 | - uses: andresz1/size-limit-action@v1 11 | with: 12 | github_token: ${{ secrets.GITHUB_TOKEN }} 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.log 2 | .DS_Store 3 | node_modules 4 | dist 5 | coverage -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | ISC License (ISC) 2 | Copyright 2018 Justin Falcone 3 | 4 | Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. 5 | 6 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -------------------------------------------------------------------------------- /notes.md: -------------------------------------------------------------------------------- 1 | # notes 2 | 3 | - use structure & sepBy for error handling (ie. on parse error, drop tokens until next comma/semi/closing brace) 4 | 5 | - errors that show the unexpected text 6 | - documentation (see https://nearley.js.org/docs/how-to-grammar-good for examples) 7 | - 'language workbench' tool -- design language grammar, parse and diagram expressions in language 8 | - preprocessing via https://github.com/kentcdodds/babel-plugin-macros 9 | - vscode syntax highlight extension 10 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "zebu", 3 | "author": "Justin Falcone", 4 | "version": "0.1.0", 5 | "license": "ISC", 6 | "main": "dist/index.js", 7 | "typings": "dist/index.d.ts", 8 | "files": [ 9 | "dist", 10 | "src" 11 | ], 12 | "engines": { 13 | "node": ">=10" 14 | }, 15 | "scripts": { 16 | "start": "tsdx watch", 17 | "build": "tsdx build", 18 | "test": "tsdx test", 19 | "lint": "tsdx lint src", 20 | "prepare": "tsdx build", 21 | "size": "size-limit", 22 | "analyze": "size-limit --why" 23 | }, 24 | "peerDependencies": {}, 25 | "husky": { 26 | "hooks": { 27 | "pre-commit": "npm run lint" 28 | } 29 | }, 30 | "prettier": { 31 | "printWidth": 80, 32 | "semi": true, 33 | "singleQuote": true, 34 | "trailingComma": "es5", 35 | "tabWidth": 2, 36 | "useTabs": false 37 | }, 38 | "jest": { 39 | "collectCoverage": true, 40 | "coveragePathIgnorePatterns": ["/node_modules/", "/src/examples/"] 41 | }, 42 | "module": "dist/zebu.esm.js", 43 | "size-limit": [ 44 | { 45 | "path": "dist/zebu.cjs.production.min.js", 46 | "limit": "10 KB" 47 | }, 48 | { 49 | "path": "dist/zebu.esm.js", 50 | "limit": "10 KB" 51 | } 52 | ], 53 | "resolutions": { 54 | "**/typescript": "^4.0.5", 55 | "**/@typescript-eslint/eslint-plugin": "^4.6.1", 56 | "**/@typescript-eslint/parser": "^4.6.1" 57 | }, 58 | "devDependencies": { 59 | "@size-limit/preset-small-lib": "^4.7.0", 60 | "@typescript-eslint/eslint-plugin": "^4.7.0", 61 | "@typescript-eslint/parser": "^4.7.0", 62 | "husky": "^4.3.0", 63 | "size-limit": "^4.7.0", 64 | "tsdx": "^0.14.1", 65 | "tslib": "^2.0.3", 66 | "typescript": "^4.0.5" 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Zebu 2 | 3 | ## What is this? 4 | 5 | Zebu is a JavaScript library for building [little languages](http://staff.um.edu.mt/afra1/seminar/little-languages.pdf) with [tagged template literals](http://2ality.com/2016/11/computing-tag-functions.html). 6 | 7 | ## Why would I want to do that? 8 | 9 | When we work with code, we don't just care about performance; we also care about human-centric concerns like convenience, elegance, and readability. In most cases, the human-centric concerns take priority: any time you have chosen to use a framework over "vanilla JS", or a high-level language over a low-level one, you have implicitly chosen developer experience over performance. Everyone has different thresholds for when to make this trade-off, but in general we recognize that the programer's time is more valuable than the computer's. 10 | 11 | While most programmers are comfortable writing code for other programmers to use, whether that's via the open source ecosystem or just in your project's `utils.js` file, very few are comfortable designing or contributing to a programming language itself. Most programmers have opinions about programming languages, and may even have ideas for features they'd like in a programming language, but few even consider that they could make these ideas a reality. Implementing a programming language seems like it belongs to the category of software that's beyond the reach of ordinary programmers, alongside databases and operating systems. 12 | 13 | Implementing a general-purpose, high-performance programming language is, indeed, a lot of work. But there is a huge spectrum of possibility between "library that is pleasant to use" and "industrial strengtth programming language". Many interesting and useful languages are (relatively) simple and are more like _features_ of a language than a language in and of themselves -- examples include regular expressions, DOM selectors and date format strings. (You may have invented a "little language" like this without even realizing it -- any function that takes a string and does something based on the contents of that string is, in some sense, an interpreter for a programming language.) These languages don't need to implement many of the features we take for granted in programming languages -- variables, functions, etc -- because all of that is already implemented by the "host" language; they can focus on doing a single specialized task using an appropriately specialized syntax. 14 | 15 | Zebu is a toolkit for building these little languages that handles the boring and error-prone parts (ie. turning a string into structured data) so you can focus on providing a great developer experience. 16 | 17 | ## How is Zebu different from similar tools? 18 | 19 | Zebu is a parser generator that broadly resembles tools like Yacc, Bison or ANTLR. It works with LL(k) grammars, though like ANTLR it can also handle direct left recursion. Like the aforementioned tools, but unlike PEG and parser combinator libraries, Zebu has separate lexing and parsing phases. 20 | 21 | The major difference between Zebu and other parser generators is that Zebu applies the principle of "convention over configuration" to parsing. Specifically, all languagees created with Zebu use these lexing rules: 22 | 23 | - whitespace (including newlines) and JavaScript-styled comments (`//` and `/* */`) are ignored 24 | - numbers, strings, and identifiers are tokenized with the same syntactic rules as JavaScript 25 | 26 | In other words: Zebu is not a general-purpose parser generator; languages created with Zebu will necessarily have a strong family resemblance to JavaScript, even if they are structured very differently. However, we believe what you give up in terms of expressivity are more than made up for by improved developer and user experience -- so many irrelevant but potentially confusing differences can be avoided completely, 27 | 28 | ## How does Zebu work? 29 | 30 | Let's walk through a simple example first, to cover the core principles, and then we can get to more useful examples. Here's how you create a language: 31 | 32 | ```js 33 | import { lang } from 'zebu'; 34 | 35 | const add = lang` 36 | Expr = Expr "+" value : ${(left, _, right) => left + right} 37 | | value; 38 | `; 39 | ``` 40 | 41 | And then you use the lanugage like this: 42 | 43 | ```js 44 | const result = add`1 + ${2} + 3`; 45 | console.log(result); // 6 46 | ``` 47 | 48 | So what is actually happening here? `lang` is a function being used as a tagged template literal. 49 | 50 | ```js 51 | lang` 52 | Expr = Expr "+" value : ${(left, _, right) => left + right} 53 | | value; 54 | `; 55 | // is equivalent to 56 | lang( 57 | ['\n Expr = Expr "+" value : ', '\n | value;\n'], 58 | (left, _, right) => left + right 59 | ); 60 | ` 61 | ``` 62 | 63 | Likewise: 64 | 65 | ```js 66 | add`1 + ${2} + 3`; 67 | // is equivalent to 68 | add(['1 + ', ' + 3'], 2); 69 | ``` 70 | 71 | ### Tokenizing 72 | 73 | Now, what is happening inside `add`? (We'll cover what's happening in `lang` in the next section.) First, the strings and interpolations passed into `add` are turned into an array of tokens. This process interleaves the strings and interpolations back together, and strips out any whitespace or comments: 74 | 75 | ```js 76 | [ 77 | { type: 'value', value: 1 }, 78 | { type: 'literal', value: '+' }, 79 | { type: 'value', value: 2 }, 80 | { type: 'literal', value: '+' }, 81 | { type: 'value', value: 3 }, 82 | ]; 83 | ``` 84 | 85 | - `value` - Numbers (decimal, hexidecimal, octal or binary), quoted strings (single or double quote), and interpolated values (of any type) are wrapped in `value` tokens. 86 | - `literal` - Values that are explicitly enumerated in the definition of a language will be wrapped in `literal` tokens. For example, the definition of `add` includes a `"+"`, so `+` is matched as a `literal` token. These are typically be used for keywords, operators, or other 'punctuation' in programming languages. 87 | - `identifier` - Values that match the identifier rules for JavaScript, and which are _not_ explicitly enumerated in the language's definition, are matched as `identifier` tokens. 88 | 89 | Any text that is not ignored as whitespace or comments and does not match any of the above token types will throw an error. For example, in `` add`1 - 2` ``, `"-"` is not in `add`'s definition and therefore is not identified as a literal, so this will throw. 90 | 91 | ### Parsing 92 | 93 | Now that we've converted the input into tokens, let's match the tokens to the grammar in our language definition. Let's look at that again: 94 | 95 | ```js 96 | lang` 97 | Expr = Expr "+" value : ${(left, _, right) => left + right} 98 | | value; 99 | `; 100 | ``` 101 | 102 | In this grammar, we have a single _rule_, labelled `Expr`; more complex grammars can have many rules, separated by semicolons. The `Expr` rule has two branches, separated by the `|` operator. The first branch matches a sequence -- first, it recursively matches itself, then it matches the token `{ type: 'literal', value: '+' }`, and then it matches any token of the type `value`. The results of each of these are passed into the function to the right, the result of which becomes the overall result of the expression. The second branch matches a single `value` token, and the result is the value in that token. 103 | 104 | When we apply this grammar to `` add`1 + ${2} + 3` ``, it parses as: 105 | 106 | 107 | 108 | ``` 109 | 1 + 2 + 3 110 | value : 1 111 | Expr "+" value : 1 + 2 112 | Expr "+" value : (1 + 2) + 3 113 | ``` 114 | 115 | And returns the result `6`. 116 | 117 | Unlike in regular expressions, parsing must match the whole input; `` add`1 +` `` or `` add` + 2` `` would both fail, as would `` add`1 + + 2 3` ``. 118 | 119 | ## A more complicated example 120 | 121 | TODO: something with a couple of rules, more language features, notes on technique (e.g. precedence climbing) 122 | 123 | ### Operator grammars 124 | 125 | TODO 126 | 127 | ### Tag helper 128 | 129 | ## Zebu language reference 130 | 131 | ### Literals 132 | 133 | A quoted string in a Zebu grammar matches the value of that string and returns that value. 134 | 135 | ```js 136 | const lit = lang`Main = "foo"`; 137 | 138 | equal(lit`foo`, 'foo'); 139 | 140 | throws(() => lit`bar`); 141 | throws(() => lit`"foo"`); 142 | ``` 143 | 144 | ### Rules 145 | 146 | Zebu grammars are composed from a list of rules, separated by semicolons. Rules do not have to be defined in any particular order (a rule can reference rules either above or below it), but the top rule is the rule for the whole grammar. 147 | 148 | Identifiers in a grammar will match the rule with that name. Zebu will raise an error if there is no rule by that name. 149 | 150 | ```js 151 | const math = lang` 152 | Neg = "-" Expr : ${(_, value) => -value} 153 | | Expr; 154 | Expr = #( Neg ) 155 | | value; 156 | `; 157 | 158 | equal(math`123`, 123); 159 | equal(math`-123`, -123); 160 | equal(math`(123)`, 123); 161 | equal(math`-(-(123))`, 123); 162 | ``` 163 | 164 | ### `value` 165 | 166 | The `value` keyword matches a number, a quoted string, or an interpolation, and returns that value. 167 | 168 | ```js 169 | const val = lang`Main = value`; 170 | 171 | equal(val`"hello, world!"`, 'hello, world!'); 172 | equal(val`'string with \'escaped quotes\''`, "string with 'escaped quotes'"); 173 | equal(val`-123.45e6`, -123.45e6); 174 | equal(val`0xDEADBEEF`, 0xdeadbeef); 175 | 176 | equal(val`${1}`, 1); 177 | equal(val`${'hello, world!'}`, 'hello, world!'); 178 | const object = {}; 179 | equal(val`${object}`, object); 180 | 181 | equal(val`"foo${'bar'}baz"`, 'foobarbaz'); 182 | ``` 183 | 184 | ### `identifier` 185 | 186 | The `identifier` keyword matches a JavaScript identifier which is not used as a literal in the grammar, and returns that value. 187 | 188 | ```js 189 | const id = lang` 190 | Main = identifier; 191 | Reserved = "class" | "function" | "if" | "else"; 192 | `; 193 | 194 | equal(id`foo`, 'foo'); 195 | equal(id`$bar`, 'bar'); 196 | equal(id`_0`, '_0'); 197 | 198 | throws(() => id`class`); 199 | ``` 200 | 201 | 202 | 203 | ### `keyword` 204 | 205 | The `keyword` keyword (!) matches a literal value that is in the format of a JavaScript identifier and returns that value. 206 | 207 | ```js 208 | const kw = lang` 209 | Main = identifier "." (identifier | keyword) 210 | : ${(left, _, right) => [left, right]}; 211 | Reserved = "class" | "function" | "if" | "else"; 212 | `; 213 | 214 | equal(id`foo.bar`, ['foo', 'bar']); 215 | equal(id`foo.class`, ['foo', 'class']); 216 | 217 | throws(() => id`class.foo`); 218 | ``` 219 | 220 | ### `operator` 221 | 222 | The `operator` keyword matches a literal value that is _not_ in the format of a JavaScript identifier. I have no idea why you would use this, but it is included for symmetry with `keyword`. 223 | 224 | ### `nil` 225 | 226 | The `nil` keyword matches nothing and returns `null`. 227 | 228 | ```js 229 | const opt = lang`Main = identifier | nil`; 230 | 231 | equal(opt`foo`, 'foo'); 232 | equal(opt``, null); 233 | ``` 234 | 235 | ### `include` 236 | 237 | The `include` keyword allows you to embed one grammar in another. If you use this, it will probably be for embedding an operator grammar. 238 | 239 | ```js 240 | const prog = lang` 241 | Program = Statement ** ";"; 242 | Statement = "print" Expr : ${(_, expr) => console.log(expr)} 243 | | Expr; 244 | Expr = include ${op` 245 | left "+" : ${(l, r) => l + r} 246 | "-" : ${(l, r) => l - r} 247 | left "*" : ${(l, r) => l * r} 248 | "/" : ${(l, r) => l / r} 249 | "%" : ${(l, r) => l % r} 250 | right "**" : ${(l, r) => l ** r} 251 | pre "-" : ${x => -x} 252 | root BaseExpr 253 | `}; 254 | BaseExpr = value; 255 | `; 256 | ``` 257 | 258 | ### Sequence 259 | 260 | A sequence of expressions followed by a colon and an interpolated function matches that sequence and passes the results of each expression into that function, returning the result. 261 | 262 | ```js 263 | const seq = lang`identifier "=" value : ${(name, _, value) => ({ 264 | [name]: value, 265 | })}`; 266 | equal(seq`foo = 1`, { foo: 1 }); 267 | equal(seq`bar = "bar"`, { bar: 'bar' }); 268 | 269 | throws(() => seq`foo`); 270 | throws(() => seq`foo = 1 bar`); 271 | throws(() => seq`1 = foo`); 272 | ``` 273 | 274 | ### Alternation 275 | 276 | The pipe character, `|`, like in regular expressions, is the alternation operator, and (Foo | Bar) matches either of the rules Foo or Bar. 277 | 278 | ```js 279 | const alts = lang` 280 | Main = "foo" | "bar" | value; 281 | `; 282 | 283 | equal(alts`foo`, 'foo'); 284 | equal(alts`bar`, 'bar'); 285 | equal(alts`123.45`, 123.45); 286 | ``` 287 | 288 | #### A note about parsing strategy 289 | 290 | The behavior of this operator happens to be one of the major differences between traditional CFG parsers (including Zebu) and PEG parsers. In Zebu, the order of branches doesn't matter -- the parser looks ahead at the next tokens and chooses the branch based on that. However, this means that each branch must not overlap; if they do, Zebu will raise a "first/first conflict" error. 291 | On the other hand, PEG parsers try each branch in order, and backtrack if one doesn't succeed. This means that branches _can_ overlap, though more often than not overlapping branches are an indication of a bug, not a desirable feature. 292 | 293 | ### Repetition 294 | 295 | The operators `*`, `+` and `?` work similarly to how they work in regular expressions: 296 | 297 | - `Expr*` matches a sequence of 0 or more `Expr`s and returns an array, 298 | - `Expr+` matches a sequence of 1 or more `Expr`s and returns an array, 299 | - `Expr?` optionally matches `Expr`, and returns null if it doesn't match. `Expr?` is equivalent to `(Expr | nil)`. 300 | 301 | ```js 302 | const repeat0 = lang`value*`; 303 | equal(repeat0``, []); 304 | equal(repeat0`"foo"`, ['foo']); 305 | equal(repeat0`"foo" "bar"`, ['foo', 'bar']); 306 | 307 | const repeat1 = lang`value+`; 308 | throws(() => repeat1``); 309 | equal(repeat1`"foo"`, ['foo']); 310 | equal(repeat1`"foo" "bar"`, ['foo', 'bar']); 311 | 312 | const maybe = lang`value?`; 313 | equal(maybe``, null); 314 | equal(maybe`"foo"`, 'foo'); 315 | ``` 316 | 317 | ### Parentheses, brackets and braces 318 | 319 | Zebu includes syntactic sugar for matching expressions wrapped in punctuation: 320 | 321 | - `#{ Expr }` matches `Expr` wrapped in curly braces and returns the result of `Expr`, and is equivalent to `("{" Expr "}" : ${(_, result) => result})` 322 | - `#[ Expr ]` as above, for square brackets 323 | - `#( Expr )` as above, for parentheses. 324 | 325 | ### Separated sequences 326 | 327 | The operators `++` and `**` are used for matching sequences with separators, e.g. function arguments separated by commas, or statements separated by semicolons, and return an array of the matched expression (ignoring the separators). The `++` operator matches one or more elements, while the `**` operator matches zero or more. Both allow optional trailing separators. 328 | 329 | ```js 330 | const sepBy0 = lang`value ** ","`; 331 | equal(sepBy0``, []); 332 | equal(sepBy0`1`, [1]); 333 | equal(sepBy0`1,`, [1]); 334 | equal(sepBy0`1, 2`, [1, 2]); 335 | 336 | const sepBy1 = lang`value ++ ","`; 337 | throws(() => sepBy1``); 338 | equal(sepBy1`1`, [1]); 339 | equal(sepBy1`1,`, [1]); 340 | equal(sepBy1`1, 2`, [1, 2]); 341 | ``` 342 | 343 | If you explicitly do _not_ want trailing separators to be valid, use something like: 344 | 345 | ```js 346 | const noTrailing1 = lang` 347 | Main = value Rest* : ${(first, rest) => [first, ...rest]}; 348 | Rest = "," value : ${(_, value) => value}; 349 | `; 350 | 351 | throws(() => noTrailing1``); 352 | equal(noTrailing1`1`, [1]); 353 | throws(() => noTrailing1`1,`); 354 | equal(noTrailing1`1, 2`, [1, 2]); 355 | ``` 356 | -------------------------------------------------------------------------------- /src/core.test.ts: -------------------------------------------------------------------------------- 1 | import { coreAST, print, builders } from './core'; 2 | 3 | test('pretty-printer', () => { 4 | expect(print(coreAST)).toEqual(` 5 | Grammar = Rule ++ ";" 6 | Rule = identifier "=" AltExpr 7 | AltExpr = "|"? SeqExpr ("|" SeqExpr)* 8 | SeqExpr = SepExpr (SepExpr* ":" value)? 9 | SepExpr = SepExpr "**" RepExpr 10 | | SepExpr "++" RepExpr 11 | | RepExpr 12 | RepExpr = Expr "*" 13 | | Expr "+" 14 | | Expr "?" 15 | | Expr 16 | Expr = #( AltExpr ) 17 | | "#" #( AltExpr ) 18 | | "#" #[ AltExpr ] 19 | | "#" #{ AltExpr } 20 | | "include" value 21 | | identifier 22 | | "identifier" 23 | | "operator" 24 | | "keyword" 25 | | "value" 26 | | "nil" 27 | | value`); 28 | 29 | const { error, repeat1, sepBy0, lit, seq, alt } = builders; 30 | 31 | const grammarWithOtherTokens = repeat1(sepBy0(error('message'), lit('foo'))); 32 | expect(print(grammarWithOtherTokens)).toEqual(`( ** "foo")+`); 33 | 34 | const grammarWithInnerAlt = seq( 35 | () => null, 36 | alt(lit('foo'), lit('bar')), 37 | lit('baz') 38 | ); 39 | expect(print(grammarWithInnerAlt)).toEqual(`("foo" | "bar") "baz"`); 40 | 41 | const grammarWithEmptySeq = seq(() => null); 42 | expect(print(grammarWithEmptySeq)).toEqual(`nil`); 43 | }); 44 | -------------------------------------------------------------------------------- /src/core.ts: -------------------------------------------------------------------------------- 1 | import { identifierOrOperator } from './lexer'; 2 | import { assertUnreachable } from './util'; 3 | 4 | type TerminalType = 'keyword' | 'operator' | 'identifier' | 'value'; 5 | export type AST = 6 | | { type: 'error'; message: string } 7 | | { type: 'literal'; value: string } 8 | | { type: 'terminal'; value: TerminalType } 9 | | { type: 'identifier'; value: string } 10 | | { type: 'structure'; startToken: string; endToken: string; expr: AST } 11 | | { type: 'maybe'; expr: AST } 12 | | { type: 'repeat0'; expr: AST } 13 | | { type: 'repeat1'; expr: AST } 14 | | { type: 'sepBy0'; expr: AST; separator: AST } 15 | | { type: 'sepBy1'; expr: AST; separator: AST } 16 | | { type: 'seq'; exprs: AST[]; fn: SeqFn } 17 | | { type: 'alt'; exprs: AST[] } 18 | | { type: 'ruleset'; rules: Array<{ name: string; expr: AST }> }; 19 | 20 | const spaces = (n: number) => 21 | Array(n) 22 | .fill(' ') 23 | .join(''); 24 | const wrapIf = (cond: boolean, str: string) => (cond ? `(${str})` : str); 25 | export function print(node: AST, indent = 0, prec = 0): string { 26 | switch (node.type) { 27 | case 'error': 28 | return ``; 29 | case 'literal': 30 | return `"${node.value}"`; 31 | case 'terminal': 32 | return node.value; 33 | case 'identifier': 34 | return node.value; 35 | case 'structure': 36 | return `#${node.startToken} ${print(node.expr, indent)} ${node.endToken}`; 37 | case 'maybe': 38 | return wrapIf(prec > 3, `${print(node.expr, indent, 4)}?`); 39 | case 'repeat0': 40 | return wrapIf(prec > 3, `${print(node.expr, indent, 4)}*`); 41 | case 'repeat1': 42 | return wrapIf(prec > 3, `${print(node.expr, indent, 4)}+`); 43 | case 'sepBy0': 44 | return wrapIf( 45 | prec > 2, 46 | `${print(node.expr, indent, 3)} ** ${print(node.separator, indent, 3)}` 47 | ); 48 | case 'sepBy1': 49 | return wrapIf( 50 | prec > 2, 51 | `${print(node.expr, indent, 3)} ++ ${print(node.separator, indent, 3)}` 52 | ); 53 | case 'seq': 54 | return ( 55 | wrapIf( 56 | prec > 1, 57 | node.exprs.map(expr => print(expr, indent, 2)).join(' ') 58 | ) || 'nil' 59 | ); 60 | case 'alt': 61 | return wrapIf( 62 | prec > 0, 63 | node.exprs.map(expr => print(expr, indent, 1)).join(` | `) 64 | ); 65 | case 'ruleset': 66 | return node.rules 67 | .map(rule => 68 | rule.expr.type === 'alt' 69 | ? `\n${spaces(indent)}${rule.name} = ${rule.expr.exprs 70 | .map(expr => print(expr, indent)) 71 | .join(`\n${spaces(indent + rule.name.length)} | `)}` 72 | : `\n${spaces(indent)}${rule.name} = ${print(rule.expr, indent)}` 73 | ) 74 | .join(''); 75 | // istanbul ignore next 76 | default: 77 | assertUnreachable(node); 78 | } 79 | } 80 | 81 | export function checkLit(x: string): AST { 82 | if (typeof x !== 'string' || !identifierOrOperator.test(x)) { 83 | return { type: 'error', message: 'invalid pattern' }; 84 | } 85 | return { type: 'literal', value: x }; 86 | } 87 | 88 | type SeqFn = (...xs: any[]) => unknown; 89 | 90 | const error = (message: string): AST => ({ type: 'error', message }); 91 | const ident = (value: string): AST => ({ type: 'identifier', value }); 92 | const lit = (value: string): AST => ({ type: 'literal', value }); 93 | const terminal = (value: TerminalType): AST => ({ type: 'terminal', value }); 94 | const alt = (...exprs: AST[]): AST => ({ type: 'alt', exprs }); 95 | const seq = (fn: SeqFn, ...exprs: AST[]): AST => ({ 96 | type: 'seq', 97 | fn, 98 | exprs, 99 | }); 100 | const rule = (name: string, expr: AST): { name: string; expr: AST } => ({ 101 | name, 102 | expr, 103 | }); 104 | const ruleset = (...rules: Array<{ name: string; expr: AST }>): AST => ({ 105 | type: 'ruleset', 106 | rules, 107 | }); 108 | const repeat0 = (expr: AST): AST => ({ type: 'repeat0', expr }); 109 | const repeat1 = (expr: AST): AST => ({ type: 'repeat1', expr }); 110 | const maybe = (expr: AST): AST => ({ type: 'maybe', expr }); 111 | const sepBy0 = (expr: AST, separator: AST): AST => ({ 112 | type: 'sepBy0', 113 | expr, 114 | separator, 115 | }); 116 | const sepBy1 = (expr: AST, separator: AST): AST => ({ 117 | type: 'sepBy1', 118 | expr, 119 | separator, 120 | }); 121 | const structure = (startToken: string, expr: AST, endToken: string): AST => ({ 122 | type: 'structure', 123 | expr, 124 | startToken, 125 | endToken, 126 | }); 127 | 128 | // prettier-ignore 129 | export const builders = { 130 | error, ident, lit, terminal, 131 | alt, seq, repeat0, repeat1, maybe, sepBy0, sepBy1, structure, 132 | rule, ruleset, 133 | } 134 | 135 | // prettier-ignore 136 | export const coreAST = ruleset( 137 | rule("Grammar", 138 | seq((rules) => ruleset(...rules), sepBy1(ident("Rule"), lit(";")))), 139 | rule("Rule", seq( 140 | (name, _, expr) => rule(name, expr), 141 | terminal('identifier'), lit('='), ident('AltExpr') 142 | )), 143 | rule('AltExpr', seq( 144 | (_, first, rest) => rest.length ? alt(first, ...rest) : first, 145 | maybe(lit('|')), ident('SeqExpr'), 146 | repeat0(seq((_,x) => x, lit('|'), ident('SeqExpr')))) 147 | ), 148 | rule('SeqExpr', seq( 149 | (first, getSeq) => getSeq ? getSeq(first) : first, 150 | ident('SepExpr'), 151 | maybe(seq( 152 | (rest, _, fn) => (first) => seq(fn, first, ...rest), 153 | repeat0(ident('SepExpr')), lit(":"), terminal('value') 154 | )), 155 | )), 156 | rule('SepExpr', alt( 157 | seq((expr, _, sep) => sepBy0(expr, sep), 158 | ident('SepExpr'), lit('**'), ident('RepExpr')), 159 | seq((expr, _, sep) => sepBy1(expr, sep), 160 | ident('SepExpr'), lit('++'), ident('RepExpr')), 161 | ident('RepExpr'), 162 | )), 163 | rule('RepExpr', alt( 164 | seq(repeat0, ident('Expr'), lit('*')), 165 | seq(repeat1, ident('Expr'), lit('+')), 166 | seq(maybe, ident('Expr'), lit('?')), 167 | ident('Expr') 168 | )), 169 | rule('Expr', alt( 170 | structure('(', ident('AltExpr'), ')'), 171 | seq((_, expr) => structure('(', expr, ')'), 172 | lit('#'), structure('(', ident('AltExpr'), ')') 173 | ), 174 | seq((_, expr) => structure('[', expr, ']'), 175 | lit('#'), structure('[', ident('AltExpr'), ']') 176 | ), 177 | seq((_, expr) => structure('{', expr, '}'), 178 | lit('#'), structure('{', ident('AltExpr'), '}') 179 | ), 180 | seq( 181 | (_, lang) => lang && lang.ast || error('expected a language or AST'), 182 | lit('include'), terminal('value')), 183 | seq(ident, terminal('identifier')), 184 | seq(() => terminal('identifier'), lit('identifier')), 185 | seq(() => terminal('operator'), lit('operator')), 186 | seq(() => terminal('keyword'), lit('keyword')), 187 | seq(() => terminal('value'), lit('value')), 188 | seq(() => seq(() => null), lit('nil')), 189 | seq( 190 | checkLit, 191 | terminal('value') 192 | ) 193 | )) 194 | ); 195 | -------------------------------------------------------------------------------- /src/examples/data-expressions.ts: -------------------------------------------------------------------------------- 1 | /* 2 | import { lang } from "../lang"; 3 | 4 | // prettier-ignore 5 | export const dx = lang` 6 | Expr 7 | = SeqExpr "," Expr : ${(l, _, r) => new Comma(l, r)} 8 | | SeqExpr; 9 | SeqExpr 10 | = SeqExpr "|"? AltExpr : ${(l, _, r) => new Seq(l, r)} 11 | | AltExpr 12 | AltExpr 13 | = AltExpr "/" BaseExpr : ${(l, _, r) => new Alt(l, r)} 14 | | BaseExpr; 15 | BaseExpr 16 | = #( Expr ) 17 | | "." : ${() => new Identity()} 18 | | "." Key Opt : ${(_, key, opt) => new Field(key, opt)} 19 | | ".." : ${() => new Recur()} 20 | | "empty" : ${() => new Nil()} 21 | | "." #[ Slice ** "," ] Opt : ${(_, s, opt) => new Slice(s, opt)} 22 | | "@" Pattern : ${(_, pat) => new Test(pat)} 23 | | "select" BaseExpr : ${(_, expr) => new Select(expr)} 24 | | value; 25 | 26 | Key = identifier | keyword | value; 27 | Opt = "?"? : ${(x) => !!x}; 28 | Slice 29 | = value ":" value : ${(from, _, to) => ({ type: "range", from, to })} 30 | | value : ${(key) => ({ type: "key", key })}; 31 | Pattern 32 | = value : ${toTest} 33 | | #[(Pattern ** ",") PatRest?] : ${arrayTest} 34 | | #{ PatPair ** "," } : ${(pairs) => toTest(fromPairs(pairs))}; 35 | 36 | Pair = Key ":" SeqExpr : ${(key, _, value) => [key, value]}; 37 | PatPair = Key ":" Pattern : ${(key, _, value) => [key, value]}; 38 | PatRest = "..." Pattern : ${(_, x) => x}; 39 | `; 40 | 41 | type TestFn = (x: unknown) => boolean; 42 | type UpdateFn = (x: unknown) => unknown; 43 | 44 | abstract class Dx { 45 | test(subject) { 46 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 47 | for (const _ of this.matches(subject)) { 48 | return true; 49 | } 50 | return false; 51 | } 52 | match(subject) { 53 | for (const match of this.matches(subject)) { 54 | return match; 55 | } 56 | return undefined; 57 | } 58 | abstract matches(subject: unknown): Generator; 59 | abstract replace(subject: unknown, fn: UpdateFn): unknown; 60 | } 61 | 62 | class Nil extends Dx { 63 | *matches(_subject) { 64 | // noop 65 | } 66 | replace(subject, _fn: UpdateFn) { 67 | return subject; 68 | } 69 | } 70 | 71 | class Identity extends Dx { 72 | *matches(subject) { 73 | yield subject; 74 | } 75 | replace(subject, fn: UpdateFn) { 76 | return fn(subject); 77 | } 78 | } 79 | 80 | // TODO: make this work on object, array, map etc 81 | class Field extends Dx { 82 | constructor( 83 | private readonly fieldName: string, 84 | private readonly optional: boolean 85 | ) { 86 | super(); 87 | } 88 | *matches(subject) { 89 | if (this.fieldName in subject) { 90 | yield subject[this.fieldName]; 91 | } else if (this.optional) { 92 | yield null; 93 | } 94 | } 95 | replace(subject, fn: UpdateFn) { 96 | if (this.fieldName in subject || this.optional) { 97 | return { ...subject, [this.fieldName]: fn(subject[this.fieldName]) }; 98 | } else { 99 | return subject; 100 | } 101 | } 102 | } 103 | 104 | function toTest(pattern: unknown): TestFn { 105 | if (typeof pattern === "function") return pattern as TestFn; 106 | if (pattern && typeof pattern === "object") { 107 | return (value: any) => { 108 | for (const key in pattern) { 109 | if (!value || !(key in value) || !toTest(pattern[key])(value[key])) 110 | return false; 111 | } 112 | return true; 113 | }; 114 | } 115 | return (value) => value === pattern; 116 | } 117 | 118 | function fromPairs(pairs: Array<[string, unknown]>) { 119 | const res = {}; 120 | for (const [key, value] of pairs) { 121 | res[key] = value; 122 | } 123 | return res; 124 | } 125 | 126 | function arrayTest(heads: TestFn[], tail?: TestFn): TestFn { 127 | return (value: unknown[]) => { 128 | if (!Array.isArray(value)) return false; 129 | for (const [i, head] of heads.entries()) { 130 | if (value.length <= i) return false; 131 | if (!head(value[i])) return false; 132 | } 133 | if (tail) { 134 | for (let i = heads.length; i < value.length; i++) { 135 | if (!tail(value[i])) return false; 136 | } 137 | } 138 | return true; 139 | }; 140 | } 141 | 142 | class Test extends Dx { 143 | constructor(private readonly testFn: (x: unknown) => boolean) { 144 | super(); 145 | } 146 | *matches(subject) { 147 | if (this.testFn(subject)) { 148 | yield subject; 149 | } 150 | } 151 | replace(subject, fn: UpdateFn) { 152 | if (this.testFn(subject)) { 153 | return fn(subject); 154 | } else { 155 | return subject; 156 | } 157 | } 158 | } 159 | 160 | class Select extends Dx { 161 | constructor(private readonly dx: Dx) { 162 | super(); 163 | } 164 | *matches(subject) { 165 | for (const result of this.dx.matches(subject)) { 166 | if (result) { 167 | yield subject; 168 | return; 169 | } 170 | } 171 | } 172 | replace(subject, fn: UpdateFn) { 173 | for (const result of this.dx.matches(subject)) { 174 | if (result) { 175 | return fn(subject); 176 | } 177 | } 178 | return subject; 179 | } 180 | } 181 | 182 | class Seq extends Dx { 183 | constructor(private readonly left: Dx, private readonly right: Dx) { 184 | super(); 185 | } 186 | *matches(subject): Generator { 187 | for (const next of this.left.matches(subject)) { 188 | yield* this.right.matches(next); 189 | } 190 | } 191 | replace(subject, fn: UpdateFn) { 192 | return this.left.replace(subject, (value) => this.right.replace(value, fn)); 193 | } 194 | } 195 | 196 | class Comma extends Dx { 197 | constructor(private readonly left: Dx, private readonly right: Dx) { 198 | super(); 199 | } 200 | *matches(subject) { 201 | yield* this.left.matches(subject); 202 | yield* this.right.matches(subject); 203 | } 204 | replace(subject, fn: UpdateFn) { 205 | const next = this.left.replace(subject, fn); 206 | return this.right.replace(next, fn); 207 | } 208 | } 209 | 210 | class Alt extends Dx { 211 | constructor(private readonly left: Dx, private readonly right: Dx) { 212 | super(); 213 | } 214 | *matches(subject) { 215 | let didMatch = false; 216 | for (const match of this.left.matches(subject)) { 217 | didMatch = true; 218 | yield match; 219 | } 220 | if (!didMatch) { 221 | yield* this.right.matches(subject); 222 | } 223 | } 224 | replace(subject, fn: UpdateFn) { 225 | let didMatch = false; 226 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 227 | for (const _ of this.left.matches(subject)) { 228 | didMatch = true; 229 | break; 230 | } 231 | if (didMatch) { 232 | return this.left.replace(subject, fn); 233 | } else { 234 | return this.right.replace(subject, fn); 235 | } 236 | } 237 | } 238 | 239 | class Slice extends Dx { 240 | *matches(subject: any) { 241 | yield* subject.values(); 242 | } 243 | replace(subject: any, fn: UpdateFn) { 244 | return subject.map(fn); 245 | } 246 | } 247 | 248 | class Recur extends Dx { 249 | *matches(rootSubject: any) { 250 | const visited = new WeakSet(); 251 | function* inner(subject) { 252 | if (visited.has(subject)) return; 253 | yield subject; 254 | 255 | if (subject && typeof subject === "object") { 256 | visited.add(subject); 257 | for (const key in subject) { 258 | yield* inner(subject[key]); 259 | } 260 | } 261 | } 262 | 263 | yield* inner(rootSubject); 264 | } 265 | replace(initSubject: any, fn: UpdateFn) { 266 | const cache = new WeakMap(); 267 | function inner(subject) { 268 | if (cache.has(subject)) return cache.get(subject)!; 269 | 270 | if (subject && typeof subject === "object") { 271 | const result = { ...subject }; 272 | for (const key in subject) { 273 | result[key] = fn(inner(subject[key])); 274 | } 275 | cache.set(subject, result); 276 | return result; 277 | } else { 278 | return fn(subject); 279 | } 280 | } 281 | 282 | return inner(initSubject); 283 | } 284 | } 285 | */ 286 | -------------------------------------------------------------------------------- /src/examples/itself.ts: -------------------------------------------------------------------------------- 1 | import { lang, tag } from '../index'; 2 | 3 | // prettier-ignore 4 | export const zebu = lang` 5 | Program = Rule ++ ";" : ${tag`ruleset rules`}; 6 | Rule = identifier "=" AltExpr : ${(name, _, expr) => ({ name, expr })}; 7 | 8 | AltExpr = "|"? SeqExpr AltRest* 9 | : ${(_, head, tail) => tail ? ({ type: 'alt', exprs: [head, ...tail] }) : head}; 10 | AltRest = "|" SeqExpr : ${(_, expr) => expr}; 11 | 12 | SeqExpr = SepExpr SeqTail? : ${(head, tail) => tail ? tail(head) : head}; 13 | SeqTail = SepExpr* ":" value 14 | : ${(tail, _,fn) => (head) => ({ type: 'seq', fn, exprs: [head, ...tail] })}; 15 | 16 | SepExpr = SepExpr "++" RepExpr : ${tag`sepBy1 expr _ separator`} 17 | | SepExpr "**" RepExpr : ${tag`sepBy0 expr _ separator`} 18 | | RepExpr; 19 | 20 | RepExpr = BaseExpr "+" : ${tag`repeat1 expr`} 21 | | BaseExpr "*" : ${tag`repeat0 expr`} 22 | | BaseExpr "?" : ${tag`maybe expr`} 23 | | BaseExpr; 24 | 25 | BaseExpr = 26 | | #( AltExpr ) 27 | | "#" #( AltExpr ) : ${tag`structure _ expr startToken="(" endToken=")"`} 28 | | "#" #[ AltExpr ] : ${tag`structure _ expr startToken="[" endToken="]"`} 29 | | "#" #{ AltExpr } : ${tag`structure _ expr startToken="{" endToken="}"`} 30 | | "include" AST : ${tag`include _ ast`} 31 | | "value" : ${tag`terminal value="value"`} 32 | | "identifier" : ${tag`terminal value="identifier"`} 33 | | "keyword" : ${tag`terminal value="keyword"`} 34 | | "operator" : ${tag`terminal value="operator"`} 35 | | "nil" : ${tag`seq exprs=${[]} fn=${() => null}`} 36 | | identifier : ${tag`identifier value`} 37 | | value : ${tag`literal value`}; 38 | 39 | AST = value : ${(value) => value.ast}; 40 | `; 41 | -------------------------------------------------------------------------------- /src/examples/range.ts: -------------------------------------------------------------------------------- 1 | /* 2 | import { lang } from "../lang"; 3 | 4 | // prettier-ignore 5 | export const range = lang` 6 | SubsetExpr 7 | = SubsetExpr "&" ConcatExpr : ${(b, _, f) => new Subset(b, f)} 8 | | ConcatExpr; 9 | ConcatExpr 10 | = ConcatExpr ";" BaseExpr : ${(l, _, r) => new Concat(l, r)} 11 | | BaseExpr; 12 | BaseExpr = 13 | #( Expr ) 14 | | StartRange "..." EndRange? : ${(l, _, r) => Span.fromArgs(l, r)} 15 | | value : ${(x) => Value.from(x)} 16 | | "nil" : ${() => Nil}; 17 | 18 | SpanEnd 19 | = value "," value : ${(start, _, next) => ({ start, next })}; 20 | | value : ${(start) => ({ start, next: null})}; 21 | | nil 22 | EndRange = Excluding value : ${(excluding, end) => ({ excluding, end })}; 23 | Excluding = "including" | "excluding" | nil : ${(x) => x === "excluding"}; 24 | `; 25 | 26 | abstract class Range { 27 | abstract has(value: T): boolean; 28 | abstract [Symbol.iterator](): Generator; 29 | } 30 | 31 | const Nil: Range = { 32 | has(_value: unknown) { 33 | return false; 34 | }, 35 | // eslint-disable-next-line @typescript-eslint/no-empty-function 36 | *[Symbol.iterator]() {}, 37 | }; 38 | 39 | class Value extends Range { 40 | constructor(private readonly value: T) { 41 | super(); 42 | } 43 | static from(x: T): Range { 44 | if (x instanceof Range) { 45 | return x as Range; 46 | } 47 | if (Symbol.iterator in x) { 48 | return new IterRange(x[Symbol.iterator]); 49 | } 50 | return new Value(x); 51 | } 52 | has(value: T) { 53 | return value === this.value; 54 | } 55 | *[Symbol.iterator]() { 56 | yield this.value; 57 | } 58 | } 59 | 60 | class IterRange extends Range { 61 | cache = new Set(); 62 | constructor(private readonly value: Iterable) { 63 | super(); 64 | this.cache = new Set(value); 65 | } 66 | has(value: T) { 67 | return this.cache.has(value); 68 | } 69 | *[Symbol.iterator]() { 70 | yield* this.value; 71 | } 72 | } 73 | 74 | interface SpanInner { 75 | first: T; 76 | next(prev: T): T; 77 | beforeEnd(value: T): boolean; 78 | afterBeginning(value: T): boolean; 79 | } 80 | 81 | type SpanEnd = 82 | | { type: "open" } 83 | | { type: "value"; value: number } 84 | | { type: "interval"; first: number; next: number }; 85 | 86 | function agreeOrDefault(xs: T[], defaultValue: T, error: Error) { 87 | if (!xs.length) return defaultValue; 88 | return xs.reduce((l, r) => { 89 | if (l !== r) { 90 | throw error; 91 | } 92 | return l; 93 | }); 94 | } 95 | 96 | class Span extends Range { 97 | static fromArgs( 98 | start: SpanEnd, 99 | end: SpanEnd, 100 | includeStart: boolean, 101 | includeEnd: boolean 102 | ) { 103 | const isPositiveArr: boolean[] = []; 104 | const intervalArr: number[] = []; 105 | if (start.type === "interval") { 106 | isPositiveArr.push(start.next > start.first); 107 | intervalArr.push(start.next - start.first); 108 | } 109 | if (end.type === "interval") { 110 | isPositiveArr.push(end.next > end.first); 111 | intervalArr.push(end.next - end.first); 112 | } 113 | if (start.type !== "open" && end.type !== "open") { 114 | isPositiveArr.push(end.value > start.value); 115 | } 116 | const isPositive = agreeOrDefault( 117 | isPositiveArr, 118 | true, 119 | new Error("inconsistent iteration direction") 120 | ); 121 | 122 | const interval = agreeOrDefault( 123 | intervalArr, 124 | isPositive ? 1 : -1, 125 | new Error("inconsistent interval") 126 | ); 127 | } 128 | constructor(private inner: SpanInner) { 129 | super(); 130 | } 131 | has(value: T) { 132 | return this.inner.beforeEnd(value) && this.inner.afterBeginning(value); 133 | } 134 | *[Symbol.iterator]() { 135 | let current = this.inner.first; 136 | while (this.inner.beforeEnd(current)) { 137 | yield current; 138 | current = this.inner.next(current); 139 | } 140 | } 141 | } 142 | 143 | class NumberRange implements SpanInner { 144 | constructor( 145 | public readonly first: number, 146 | private readonly last: number, 147 | private readonly interval: number, 148 | private readonly excludeEnd: boolean 149 | ) {} 150 | beforeEnd(value: number) { 151 | if (this.interval > 0) { 152 | if (this.excludeEnd) { 153 | return value < this.last; 154 | } else { 155 | return value <= this.last; 156 | } 157 | } else { 158 | if (this.excludeEnd) { 159 | return value > this.last; 160 | } else { 161 | return value >= this.last; 162 | } 163 | } 164 | } 165 | afterBeginning(value: number) { 166 | if (this.interval > 0) { 167 | return this.first <= value; 168 | } else { 169 | return this.first >= value; 170 | } 171 | } 172 | next(value: number) { 173 | return value + this.interval; 174 | } 175 | } 176 | 177 | class Concat extends Range { 178 | constructor( 179 | private readonly left: Range, 180 | private readonly right: Range 181 | ) { 182 | super(); 183 | } 184 | has(value: T) { 185 | return this.left.has(value) || this.right.has(value); 186 | } 187 | *[Symbol.iterator]() { 188 | yield* this.left; 189 | yield* this.right; 190 | } 191 | } 192 | 193 | class Subset extends Range { 194 | constructor( 195 | private readonly base: Range, 196 | private readonly filter: Range 197 | ) { 198 | super(); 199 | } 200 | has(value: T) { 201 | return this.base.has(value) && this.filter.has(value); 202 | } 203 | *[Symbol.iterator]() { 204 | for (const item of this.base) { 205 | if (this.filter.has(item)) { 206 | yield item; 207 | } 208 | } 209 | } 210 | } 211 | */ 212 | -------------------------------------------------------------------------------- /src/examples/regex.ts: -------------------------------------------------------------------------------- 1 | import { lang } from '../lang'; 2 | 3 | export const re = lang` 4 | Prog = Expr : ${expr => new RegExp(expr)}; 5 | Expr = "?" identifier Alt : ${(_, name, expr) => `(?<${name}>${expr})`} 6 | | Alt; 7 | Alt = Seq "|" Alt :${(left, _, right) => `${left}|${right}`} 8 | | Seq; 9 | Seq = Repeat Seq : ${(left, right) => `${left}${right}`} 10 | | Repeat; 11 | Repeat = Seq "*" : ${expr => `${expr}*`} 12 | | Seq "+" : ${expr => `${expr}+`} 13 | | Seq "?" : ${expr => `${expr}?`} 14 | | Seq #{ Span } : ${(expr, [min, max]) => `${expr}{${min},${max}}`} 15 | | Base; 16 | Span = value "," value : ${(x, _, y) => [x, y]} 17 | | value "," : ${x => [x, '']} 18 | | "," value : ${(_, x) => [0, x]} 19 | | value : ${x => [x, x]} 20 | ; 21 | Base = #( Expr ) : ${expr => `(?:${expr})`} 22 | | "digit" : ${() => `\\d`} 23 | | "ws" : ${() => `\\s`} 24 | | "letter" : ${() => `[A-Za-z]`} 25 | | "any" : ${() => '.'} 26 | | "nil" : ${() => ''} 27 | | "^" : ${() => '^'} 28 | | "$" : ${() => '$'} 29 | | value : ${value => `(?:${escape(value)})`} 30 | ; 31 | `; 32 | 33 | function escape(value: string | RegExp) { 34 | if (value instanceof RegExp) { 35 | return value.source; 36 | } 37 | 38 | return value.replace(/[/\\^$*+?.()|[\]{}]/g, '\\$&'); 39 | } 40 | -------------------------------------------------------------------------------- /src/examples/vanilla.ts: -------------------------------------------------------------------------------- 1 | import { lang, op, tag } from '../index'; 2 | import { assertUnreachable } from '../util'; 3 | 4 | /* 5 | A programming language with no interesting features. 6 | */ 7 | 8 | type Stmt = 9 | | { type: 'if'; branches: Array<{ cond: Expr; body: Stmt[] }>; else: Stmt[] } 10 | | { 11 | type: 'try'; 12 | body: Stmt[]; 13 | catch: { binding: string; body: Stmt[] } | null; 14 | finally: Stmt[] | null; 15 | } 16 | | { type: 'let'; binding: string; expr: Expr } 17 | | { type: 'while'; expr: Expr; body: Stmt[] } 18 | | { type: 'for'; binding: string; expr: Expr; body: Stmt[] } 19 | | { type: 'return'; expr: Expr | null } 20 | | { type: 'throw'; expr: Expr } 21 | | { type: 'break' } 22 | | { type: 'continue' } 23 | | Expr; 24 | 25 | type Expr = 26 | | { type: 'identAssign'; binding: string; right: Expr } 27 | | { type: 'keyAssign'; left: Expr; right: Expr; arg: Expr } 28 | | { type: 'or'; left: Expr; right: Expr } 29 | | { type: 'and'; left: Expr; right: Expr } 30 | | { type: 'eq'; left: Expr; right: Expr } 31 | | { type: 'neq'; left: Expr; right: Expr } 32 | | { type: 'gt'; left: Expr; right: Expr } 33 | | { type: 'lt'; left: Expr; right: Expr } 34 | | { type: 'gte'; left: Expr; right: Expr } 35 | | { type: 'lte'; left: Expr; right: Expr } 36 | | { type: 'add'; left: Expr; right: Expr } 37 | | { type: 'sub'; left: Expr; right: Expr } 38 | | { type: 'mul'; left: Expr; right: Expr } 39 | | { type: 'div'; left: Expr; right: Expr } 40 | | { type: 'mod'; left: Expr; right: Expr } 41 | | { type: 'pow'; left: Expr; right: Expr } 42 | | { type: 'not'; expr: Expr } 43 | | { type: 'neg'; expr: Expr } 44 | | { type: 'call'; expr: Expr; args: Expr[] } 45 | | { type: 'get'; expr: Expr; arg: Expr } 46 | | { type: 'dict'; pairs: Array<{ key: Expr; value: Expr }> } 47 | | { type: 'list'; exprs: Expr[] } 48 | | { type: 'func'; params: string[]; body: Stmt[] } 49 | | { type: 'ident'; value: string } 50 | | { type: 'value'; value: unknown }; 51 | 52 | class Scope { 53 | private values = new Map(); 54 | constructor(private readonly parentScope: Scope | null) {} 55 | get(key: string): unknown { 56 | if (this.values.has(key)) return this.values.get(key)!; 57 | if (this.parentScope) return this.parentScope.get(key); 58 | throw new Error(`ReferenceError: unknown identifier ${key}`); 59 | } 60 | set(key: string, value: unknown): void { 61 | this.values.set(key, value); 62 | } 63 | } 64 | 65 | class ReturnInterrupt { 66 | constructor(public readonly value: unknown) {} 67 | } 68 | 69 | class ThrowInterrupt { 70 | private nodeStack: Stmt[] = []; 71 | constructor(public readonly value: unknown, node: Stmt) { 72 | this.nodeStack.push(node); 73 | } 74 | pushStack(node: Stmt) { 75 | this.nodeStack.push(node); 76 | } 77 | } 78 | 79 | class VanillaException { 80 | constructor(public readonly message: string) {} 81 | } 82 | 83 | class BreakInterrupt {} 84 | class ContinueInterrupt {} 85 | 86 | class Dict extends Map {} 87 | 88 | class Interpreter { 89 | constructor(private readonly scope: Scope = new Scope(null)) {} 90 | interpret(node: Stmt) { 91 | const assertBool = arg => assertBoolBound(arg, node); 92 | const assertNum = arg => assertNumBound(arg, node); 93 | switch (node.type) { 94 | case 'let': 95 | this.scope.set(node.binding, this.interpret(node.expr)); 96 | return null; 97 | case 'if': { 98 | for (const branch of node.branches) { 99 | const cond = assertBool(this.interpret(branch.cond)); 100 | if (cond) { 101 | const ctx = this.createContext(); 102 | for (const stmt of branch.body) { 103 | ctx.interpret(stmt); 104 | } 105 | return null; 106 | } 107 | } 108 | 109 | if (node.else.length) { 110 | const ctx = this.createContext(); 111 | for (const stmt of node.else) { 112 | ctx.interpret(stmt); 113 | } 114 | } 115 | 116 | return null; 117 | } 118 | case 'try': { 119 | const ctx = this.createContext(); 120 | try { 121 | for (const stmt of node.body) { 122 | ctx.interpret(stmt); 123 | } 124 | } catch (interrupt) { 125 | if (interrupt instanceof ThrowInterrupt && node.catch) { 126 | const ctx = this.createContext(); 127 | ctx.scope.set(node.catch.binding, interrupt.value); 128 | for (const stmt of node.catch.body) { 129 | ctx.interpret(stmt); 130 | } 131 | } else { 132 | throw interrupt; 133 | } 134 | } finally { 135 | if (node.finally) { 136 | const ctx = this.createContext(); 137 | for (const stmt of node.finally) { 138 | ctx.interpret(stmt); 139 | } 140 | } 141 | } 142 | return null; 143 | } 144 | case 'while': { 145 | try { 146 | while (assertBool(this.interpret(node.expr))) { 147 | const ctx = this.createContext(); 148 | try { 149 | for (const stmt of node.body) { 150 | ctx.interpret(stmt); 151 | } 152 | } catch (interrupt) { 153 | if (interrupt instanceof ContinueInterrupt) { 154 | // continue 155 | } else { 156 | throw interrupt; 157 | } 158 | } 159 | } 160 | } catch (interrupt) { 161 | if (interrupt instanceof BreakInterrupt) { 162 | // break 163 | } else { 164 | throw interrupt; 165 | } 166 | } 167 | return null; 168 | } 169 | case 'for': { 170 | const expr = this.interpret(node.expr); 171 | const iter = expr instanceof Dict ? expr.keys() : expr; 172 | try { 173 | for (const value of iter) { 174 | const ctx = this.createContext(); 175 | ctx.scope.set(node.binding, value); 176 | try { 177 | for (const stmt of node.body) { 178 | ctx.interpret(stmt); 179 | } 180 | } catch (interrupt) { 181 | if (interrupt instanceof ContinueInterrupt) { 182 | // continue 183 | } else { 184 | throw interrupt; 185 | } 186 | } 187 | } 188 | } catch (interrupt) { 189 | if (interrupt instanceof BreakInterrupt) { 190 | // break 191 | } else { 192 | throw interrupt; 193 | } 194 | } 195 | return null; 196 | } 197 | case 'return': 198 | throw new ReturnInterrupt(node.expr ? this.interpret(node.expr) : null); 199 | case 'throw': 200 | throw new ThrowInterrupt(this.interpret(node.expr), node); 201 | case 'break': 202 | throw new BreakInterrupt(); 203 | case 'continue': 204 | throw new ContinueInterrupt(); 205 | case 'identAssign': { 206 | const right = this.interpret(node.right); 207 | this.scope.set(node.binding, right); 208 | return right; 209 | } 210 | case 'keyAssign': { 211 | const left = this.interpret(node.left); 212 | const right = this.interpret(node.right); 213 | const key = this.interpret(node.arg); 214 | if (left instanceof Dict) { 215 | left.set(key, right); 216 | } else { 217 | left[key] = right; 218 | } 219 | return right; 220 | } 221 | case 'or': 222 | if (assertBool(this.interpret(node.left))) return true; 223 | return assertBool(this.interpret(node.right)); 224 | case 'and': 225 | if (!assertBool(this.interpret(node.left))) return false; 226 | return assertBool(this.interpret(node.right)); 227 | case 'eq': 228 | return isEqual(this.interpret(node.left), this.interpret(node.right)); 229 | case 'neq': 230 | return !isEqual(this.interpret(node.left), this.interpret(node.right)); 231 | case 'gt': 232 | return ( 233 | assertNum(this.interpret(node.left)) > 234 | assertNum(this.interpret(node.right)) 235 | ); 236 | case 'lt': 237 | return ( 238 | assertNum(this.interpret(node.left)) < 239 | assertNum(this.interpret(node.right)) 240 | ); 241 | case 'gte': 242 | return ( 243 | assertNum(this.interpret(node.left)) >= 244 | assertNum(this.interpret(node.right)) 245 | ); 246 | case 'lte': 247 | return ( 248 | assertNum(this.interpret(node.left)) <= 249 | assertNum(this.interpret(node.right)) 250 | ); 251 | case 'add': 252 | return ( 253 | assertNum(this.interpret(node.left)) + 254 | assertNum(this.interpret(node.right)) 255 | ); 256 | case 'sub': 257 | return ( 258 | assertNum(this.interpret(node.left)) - 259 | assertNum(this.interpret(node.right)) 260 | ); 261 | case 'mul': 262 | return ( 263 | assertNum(this.interpret(node.left)) * 264 | assertNum(this.interpret(node.right)) 265 | ); 266 | case 'div': 267 | return ( 268 | assertNum(this.interpret(node.left)) / 269 | assertNum(this.interpret(node.right)) 270 | ); 271 | case 'mod': 272 | return ( 273 | assertNum(this.interpret(node.left)) % 274 | assertNum(this.interpret(node.right)) 275 | ); 276 | case 'pow': 277 | return ( 278 | assertNum(this.interpret(node.left)) ** 279 | assertNum(this.interpret(node.right)) 280 | ); 281 | case 'not': 282 | return !assertBool(this.interpret(node.expr)); 283 | case 'neg': 284 | return -assertNum(this.interpret(node.expr)); 285 | case 'call': { 286 | const target = this.interpret(node.expr); 287 | // TODO: check target length 288 | if (typeof target === 'function') { 289 | return target(...node.args.map(arg => this.interpret(arg))); 290 | } 291 | throw new ThrowInterrupt( 292 | new VanillaException(`TypeError: target is not a function`), 293 | node 294 | ); 295 | } 296 | case 'get': { 297 | const target = this.interpret(node.expr); 298 | const arg = this.interpret(node.arg); 299 | const result = target instanceof Dict ? target.get(arg) : target[arg]; 300 | if (result === undefined) { 301 | throw new ThrowInterrupt( 302 | new VanillaException( 303 | `RangeError: target does not have property ${arg}` 304 | ), 305 | node 306 | ); 307 | } 308 | return result; 309 | } 310 | case 'dict': { 311 | return new Dict( 312 | node.pairs.map(({ key, value }) => [ 313 | this.interpret(key), 314 | this.interpret(value), 315 | ]) 316 | ); 317 | } 318 | case 'list': { 319 | return node.exprs.map(expr => this.interpret(expr)); 320 | } 321 | case 'func': { 322 | const ctx = this.createContext(); 323 | return (...args: unknown[]) => { 324 | for (const [i, arg] of args.entries()) { 325 | ctx.scope.set(node.params[i], arg); 326 | } 327 | try { 328 | for (const stmt of node.body) { 329 | ctx.interpret(stmt); 330 | } 331 | return null; 332 | } catch (interrupt) { 333 | if (interrupt instanceof ReturnInterrupt) { 334 | return interrupt.value; 335 | } else if (interrupt instanceof ThrowInterrupt) { 336 | interrupt.pushStack(node); 337 | throw interrupt; 338 | } else { 339 | throw interrupt; 340 | } 341 | } 342 | }; 343 | } 344 | 345 | case 'ident': 346 | return this.scope.get(node.value); 347 | case 'value': 348 | return node.value; 349 | default: 350 | assertUnreachable(node); 351 | } 352 | } 353 | private createContext() { 354 | return new Interpreter(new Scope(this.scope)); 355 | } 356 | } 357 | 358 | function assertNumBound(value: T, node: Stmt): T { 359 | if (typeof value !== 'number') { 360 | throw new ThrowInterrupt( 361 | new VanillaException(`TypeError: ${value} is not a number`), 362 | node 363 | ); 364 | } 365 | return value; 366 | } 367 | 368 | function assertBoolBound(value: T, node: Stmt): T { 369 | if (typeof value !== 'boolean') { 370 | throw new ThrowInterrupt( 371 | new VanillaException(`TypeError: ${value} is not a boolean`), 372 | node 373 | ); 374 | } 375 | return value; 376 | } 377 | 378 | function isEqual(left: unknown, right: unknown) { 379 | if (left === right) return true; 380 | if ( 381 | Array.isArray(left) && 382 | Array.isArray(right) && 383 | left.length === right.length 384 | ) { 385 | for (let i = 0; i < left.length; i++) { 386 | if (!isEqual(left[i], right[i])) return false; 387 | } 388 | return true; 389 | } 390 | if ( 391 | left instanceof Dict && 392 | right instanceof Dict && 393 | left.size === right.size 394 | ) { 395 | for (const [key, leftValue] of left) { 396 | if (!isEqual(leftValue, right.get(key))) return false; 397 | } 398 | return true; 399 | } 400 | return false; 401 | } 402 | 403 | type BlockContext = { 404 | inLoop: boolean; 405 | inFunc: boolean; 406 | inFinally: boolean; 407 | }; 408 | 409 | function assertBlockContext( 410 | node: Stmt, 411 | context: BlockContext = { inLoop: false, inFunc: false, inFinally: false } 412 | ): void { 413 | switch (node.type) { 414 | case 'if': { 415 | for (const branch of node.branches) { 416 | for (const stmt of branch.body) { 417 | assertBlockContext(stmt, context); 418 | } 419 | } 420 | for (const stmt of node.else) { 421 | assertBlockContext(stmt, context); 422 | } 423 | return; 424 | } 425 | case 'try': { 426 | for (const stmt of node.body) { 427 | assertBlockContext(stmt, context); 428 | } 429 | if (node.catch) { 430 | for (const stmt of node.catch.body) { 431 | assertBlockContext(stmt, context); 432 | } 433 | } 434 | if (node.finally) { 435 | const innerContext = { ...context, inFinally: true }; 436 | for (const stmt of node.finally) { 437 | assertBlockContext(stmt, innerContext); 438 | } 439 | } 440 | return; 441 | } 442 | case 'while': 443 | case 'for': { 444 | const innerContext = { ...context, inLoop: true }; 445 | for (const stmt of node.body) { 446 | assertBlockContext(stmt, innerContext); 447 | } 448 | return; 449 | } 450 | case 'func': { 451 | const innerContext = { 452 | inFunc: true, 453 | inLoop: false, 454 | inFinally: false, 455 | }; 456 | for (const stmt of node.body) { 457 | assertBlockContext(stmt, innerContext); 458 | } 459 | return; 460 | } 461 | case 'return': 462 | if (!context.inFunc) { 463 | throw new Error('Cannot return from top level'); 464 | } 465 | if (context.inFinally) { 466 | throw new Error('Cannot return from inside finally block'); 467 | } 468 | return; 469 | case 'break': 470 | case 'continue': 471 | if (!context.inLoop) { 472 | throw new Error('Cannot break or continue outside of loop'); 473 | } 474 | return; 475 | } 476 | } 477 | 478 | function interpret(program: Stmt[]) { 479 | for (const node of program) { 480 | assertBlockContext(node); 481 | } 482 | const interpreter = new Interpreter(); 483 | for (const node of program) { 484 | interpreter.interpret(node); 485 | } 486 | } 487 | 488 | type IfParseTree = 489 | | { type: 'if'; cond: Expr; body: Stmt[]; next: IfParseTree | null } 490 | | { type: 'else'; body: Stmt[] }; 491 | 492 | function foldIfStatement(node: IfParseTree | null): Stmt { 493 | const branches: Array<{ cond: Expr; body: Stmt[] }> = []; 494 | while (node) { 495 | if (node.type === 'if') { 496 | branches.push({ cond: node.cond, body: node.body }); 497 | node = node.next; 498 | } else { 499 | return { type: 'if', branches, else: node.body }; 500 | } 501 | } 502 | 503 | return { type: 'if', branches, else: [] }; 504 | } 505 | 506 | function parseAssignment(left: Expr, right: Expr): Expr { 507 | if (left.type === 'ident') { 508 | return { type: 'identAssign', binding: left.value, right }; 509 | } else if (left.type === 'get') { 510 | return { type: 'keyAssign', left: left.expr, arg: left.arg, right }; 511 | } 512 | throw new Error('Invalid assignment'); 513 | } 514 | 515 | export const vanilla = lang` 516 | Program = Statement ** ";" : ${interpret}; 517 | Block = #{ Statement ** ";" }; 518 | Statement = 519 | | "let" Binding "=" Expression : ${tag`let _ binding _ expr`} 520 | | "while" Expression Block : ${tag`while _ expr body`} 521 | | "for" Binding "of" Expression Block : ${tag`for _ binding _ expr body`} 522 | | "return" Expression? : ${tag`return _ expr`} 523 | | "throw" Expression : ${tag`throw _ expr`} 524 | | "break" : ${tag`break`} 525 | | "continue" : ${tag`continue`} 526 | | IfStatement : ${foldIfStatement} 527 | | TryStatement 528 | | Expression; 529 | 530 | IfStatement = "if" Expression Block ElseBranch? : ${tag`if _ expr body next`}; 531 | ElseBranch = "else" IfStatement : ${(_, node) => node} 532 | | "else" Block : ${tag`else _ body`}; 533 | 534 | TryStatement = "try" Block Catch? Finally : ${tag`try _ body catch finallyBody`}; 535 | Catch = "catch" Binding Block : ${(_, binding, body) => ({ binding, body })}; 536 | Finally = "finally" Block : ${(_, body) => body}; 537 | 538 | Expression = include ${op` 539 | right "=" : ${parseAssignment} 540 | left "||" : ${tag`or left right`} 541 | left "&&" : ${tag`and left right`} 542 | left "==" : ${tag`eq left right`} 543 | "!=" : ${tag`neq left right`} 544 | left ">" : ${tag`gt left right`} 545 | "<" : ${tag`lt left right`} 546 | ">=" : ${tag`gte left right`} 547 | "<=" : ${tag`lte left right`} 548 | left "+" : ${tag`add left right`} 549 | "-" : ${tag`sub left right`} 550 | left "*" : ${tag`mul left right`} 551 | "/" : ${tag`div left right`} 552 | "%" : ${tag`mod left right`} 553 | right "**" : ${tag`pow left right`} 554 | pre "!" : ${tag`not expr`} 555 | "-" : ${tag`neg expr`} 556 | root RootExpression 557 | `}; 558 | 559 | RootExpression = 560 | | #( Expression ) 561 | | RootExpression #( Expression ** "," ) : ${tag`call expr args`} 562 | | RootExpression #[ Expression ] : ${tag`get expr arg`} 563 | | RootExpression "." Key : ${tag`get expr _ arg`} 564 | | "func" #( Binding ** "," ) Block : ${tag`func _ params body`} 565 | | #{ Pair ** "," } : ${tag`dict pairs`} 566 | | #[ Expression ** "," ] : ${tag`list exprs`} 567 | | identifier : ${tag`ident value`} 568 | | value : ${tag`value value`} 569 | | "true" : ${tag`value value=${true}`} 570 | | "false" : ${tag`value value=${false}`} 571 | | "null" : ${tag`value value=${null}`}; 572 | 573 | Pair = Expression ":" Expression : ${(key, _, value) => ({ key, value })}; 574 | Key = identifier | keyword : ${tag`value value`}; 575 | Binding = identifier; 576 | `; 577 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | export { op } from './operator'; 2 | export { tag } from './tag'; 3 | export { lang } from './lang'; 4 | export { ParseError, CompileError } from './util'; 5 | -------------------------------------------------------------------------------- /src/lang.test.ts: -------------------------------------------------------------------------------- 1 | import { lang } from './lang'; 2 | import { ParseError, CompileError } from './util'; 3 | 4 | test('nil language', () => { 5 | const nil = lang`Main = nil`; 6 | expect(nil``).toEqual(null); 7 | }); 8 | 9 | test('simple values', () => { 10 | const num = lang`Main = "return" value : ${(_, x) => x};`; 11 | expect(num`return 123`).toEqual(123); 12 | expect(() => num`yield 123`).toThrow(ParseError); 13 | expect(() => num`return`).toThrow(ParseError); 14 | expect(() => num`return foo`).toThrow(ParseError); 15 | expect(() => num`"return" 123`).toThrow(ParseError); 16 | expect(() => num`return 123 456`).toThrow(ParseError); 17 | }); 18 | 19 | test('recursive rules', () => { 20 | const math = lang` 21 | Neg = "-" Expr : ${(_, value) => -value} 22 | | Expr; 23 | Expr = #( Neg ) 24 | | value; 25 | `; 26 | expect(math`123`).toEqual(123); 27 | expect(math`-123`).toEqual(-123); 28 | expect(math`(123)`).toEqual(123); 29 | expect(math`-(-(123))`).toEqual(123); 30 | }); 31 | 32 | test('undefined rules', () => { 33 | expect(() => lang`Rule = Expr`.compile()).toThrow(CompileError); 34 | }); 35 | 36 | test('self-recursion', () => { 37 | const right = lang` 38 | Term = Expr "**" Term : ${(l, _, r) => l ** r} 39 | | Expr; 40 | Expr = value; 41 | `; 42 | expect(right`2 ** 3 ** 4`).toEqual(2 ** (3 ** 4)); 43 | const left = lang` 44 | Term = Term "-" Expr: ${(l, _, r) => l - r} 45 | | Expr; 46 | Expr = value; 47 | `; 48 | expect(left`2 - 3 - 4`).toEqual(2 - 3 - 4); 49 | }); 50 | 51 | test('repeaters', () => { 52 | const list = lang` 53 | Expr = #( Expr* ) 54 | | identifier 55 | `; 56 | expect(list`(foo bar (baz quux) xyzzy)`).toEqual([ 57 | 'foo', 58 | 'bar', 59 | ['baz', 'quux'], 60 | 'xyzzy', 61 | ]); 62 | 63 | const nonEmptyList = lang` 64 | Expr = #( Expr+ ) 65 | | identifier 66 | `; 67 | expect(nonEmptyList`(foo bar (baz quux) xyzzy)`).toEqual([ 68 | 'foo', 69 | 'bar', 70 | ['baz', 'quux'], 71 | 'xyzzy', 72 | ]); 73 | expect(() => nonEmptyList`()`).toThrow(ParseError); 74 | 75 | expect(() => lang`Main = (value?)*`.compile()).toThrow(CompileError); 76 | }); 77 | 78 | test('if else', () => { 79 | const ifElse = lang` 80 | IfElse = "if" Block Else? 81 | : ${(_, ifBlock, elseBlock) => ({ ifBlock, elseBlock })}; 82 | Else = "else" (IfElse | Block) : ${(_, x) => x}; 83 | Block = value; 84 | `; 85 | ifElse`if "foo"`; 86 | ifElse`if "foo" else "bar"`; 87 | ifElse`if "foo" else if "bar"`; 88 | ifElse`if "foo" else if "bar" else if "baz" else "quux"`; 89 | }); 90 | 91 | test('interpolated parser', () => { 92 | const list = lang`Main = (include ${lang`Main = value`})+`; 93 | expect(list`1 2 3`).toEqual([1, 2, 3]); 94 | 95 | expect(() => lang`Main = include ${null}`.compile()).toThrow(CompileError); 96 | }); 97 | 98 | test('keyword and operator terminals', () => { 99 | const noop = () => { 100 | return; 101 | }; 102 | const it = lang` 103 | Statement = "print" Expr : ${noop}; 104 | Expr = DotExpr ("+" Expr : ${noop})* : ${noop}; 105 | DotExpr = RootExpr ("." (identifier | keyword) : ${noop})* : ${noop}; 106 | RootExpr = #( operator ) 107 | | identifier 108 | | value; 109 | `; 110 | expect(() => { 111 | it`print (+)`; 112 | it`print foo.bar.print`; 113 | }).not.toThrow(); 114 | expect(() => { 115 | it`print print.foo`; 116 | }).toThrow(ParseError); 117 | }); 118 | 119 | test('separators', () => { 120 | const list1 = lang`Rule = identifier ++ ","`; 121 | expect(list1`foo`).toEqual(['foo']); 122 | expect(list1`foo,`).toEqual(['foo']); 123 | expect(list1`foo, bar, baz`).toEqual(['foo', 'bar', 'baz']); 124 | 125 | const list0 = lang`Rule = (identifier ** ",")`; 126 | expect(list0``).toEqual([]); 127 | expect(list0`foo`).toEqual(['foo']); 128 | expect(list0`foo,`).toEqual(['foo']); 129 | expect(list0`foo, bar, baz`).toEqual(['foo', 'bar', 'baz']); 130 | }); 131 | 132 | test('structures', () => { 133 | const fromPairs = pairs => pairs.reduce((l, r) => Object.assign(l, r), {}); 134 | const json = lang` 135 | Expr = 136 | | #{ Pair ** "," : ${fromPairs} } 137 | | #[ Expr ** "," ] 138 | | "true" : ${() => true} 139 | | "false" : ${() => false} 140 | | "null" : ${() => null} 141 | | value; 142 | Pair = value ":" Expr : ${(k, _, v) => ({ [k]: v })}; 143 | `; 144 | expect(json`{"foo": [123, "bar", true, false, null] }`).toEqual({ 145 | foo: [123, 'bar', true, false, null], 146 | }); 147 | }); 148 | 149 | test('unresolvable conflicts', () => { 150 | const noop = () => { 151 | return; 152 | }; 153 | expect(() => lang`Main = value? value : ${noop}`.compile()).toThrow( 154 | CompileError 155 | ); 156 | expect(() => lang`Main = "foo" value? value : ${noop}`.compile()).toThrow( 157 | CompileError 158 | ); 159 | // TODO: why does this throw, but not the rule below? 160 | expect(() => { 161 | lang` 162 | Main = Loop value : ${noop}; 163 | Loop = value Loop : ${(l, r) => [l, ...r]} 164 | | nil : ${() => []}; 165 | `.compile(); 166 | }).toThrow(CompileError); 167 | // expect(() => lang`Main = value* value`.compile()).toThrow(CompileError); 168 | expect(() => 169 | lang`Main = value : ${() => 'x'} 170 | | value : ${() => 'y'}; 171 | `.compile() 172 | ).toThrow(CompileError); 173 | }); 174 | -------------------------------------------------------------------------------- /src/lang.ts: -------------------------------------------------------------------------------- 1 | import { coreAST, AST } from './core'; 2 | import { createParser } from './parser-ll'; 3 | 4 | export type ZebuLanguageReturning = { 5 | ast: AST; 6 | compile: () => void; 7 | } & TemplateStringParser; 8 | 9 | type TemplateStringParser = ( 10 | strs: TemplateStringsArray, 11 | ...xs: unknown[] 12 | ) => Type; 13 | export type ZebuLanguage = ZebuLanguageReturning; 14 | 15 | export function createLanguage(ast: AST): TemplateStringParser { 16 | let parser: TemplateStringParser | null = null; 17 | function wrappedParser(strs: TemplateStringsArray, ...xs: unknown[]): T { 18 | if (!parser) { 19 | wrappedParser.compile(); 20 | } 21 | return parser!(strs, ...xs); 22 | } 23 | wrappedParser.ast = ast; 24 | wrappedParser.compile = () => { 25 | parser = createParser(ast); 26 | }; 27 | return wrappedParser; 28 | } 29 | 30 | const coreParser = createParser(coreAST) as TemplateStringParser; 31 | 32 | export const lang = ((strs: TemplateStringsArray, ...xs: unknown[]) => { 33 | const langAST = coreParser(strs, ...xs); 34 | return createLanguage(langAST); 35 | }) as ZebuLanguageReturning; 36 | -------------------------------------------------------------------------------- /src/lexer.test.ts: -------------------------------------------------------------------------------- 1 | import { Lexer, Token } from './lexer'; 2 | 3 | const operators = new Set(['{', '}', '(', ')', '>', ';']); 4 | const keywords = new Set(['if', 'else']); 5 | 6 | function tok(strs: TemplateStringsArray, ...interps: unknown[]) { 7 | return new Lexer(keywords, operators).run(strs.raw, interps); 8 | } 9 | 10 | function strip(token: Token) { 11 | return { 12 | type: token.type, 13 | value: 'value' in token ? token.value : undefined, 14 | }; 15 | } 16 | 17 | test('basic tokens', () => { 18 | expect(tok`if foo > 1 { bar('baz'); }`.map(strip)).toEqual([ 19 | { type: 'literal', value: 'if' }, 20 | { type: 'identifier', value: 'foo' }, 21 | { type: 'literal', value: '>' }, 22 | { type: 'value', value: 1 }, 23 | { type: 'literal', value: '{' }, 24 | { type: 'identifier', value: 'bar' }, 25 | { type: 'literal', value: '(' }, 26 | { type: 'value', value: 'baz' }, 27 | { type: 'literal', value: ')' }, 28 | { type: 'literal', value: ';' }, 29 | { type: 'literal', value: '}' }, 30 | ]); 31 | 32 | expect(() => tok`"foo`).toThrow(); 33 | expect(() => tok`"\n"`).toThrow(); 34 | }); 35 | 36 | test('interpolation', () => { 37 | expect(tok`1 ${2} /* ${3} */ "${4}" // ${5}`.map(strip)).toEqual([ 38 | { type: 'value', value: 1 }, 39 | { type: 'value', value: 2 }, 40 | { type: 'value', value: '4' }, 41 | ]); 42 | 43 | expect( 44 | tok` 45 | 1 46 | ${2} /* 47 | ${3} */ 48 | "${4}" 49 | // ${5} 50 | `.map(strip) 51 | ).toEqual([ 52 | { type: 'value', value: 1 }, 53 | { type: 'value', value: 2 }, 54 | { type: 'value', value: '4' }, 55 | ]); 56 | }); 57 | 58 | test('no match for token', () => { 59 | expect(() => { 60 | tok`1 + 2`; 61 | }).toThrow(); 62 | }); 63 | 64 | test('unexpected newline in string', () => { 65 | expect(() => { 66 | tok`"foo 67 | bar"`; 68 | }).toThrow(); 69 | }); 70 | 71 | test('string incomplete', () => { 72 | expect(() => { 73 | tok`"foo`; 74 | }).toThrow(); 75 | }); 76 | -------------------------------------------------------------------------------- /src/lexer.ts: -------------------------------------------------------------------------------- 1 | import { TokenPosition, ParseError } from './util'; 2 | 3 | type TokenContent = 4 | | { 5 | type: 'literal'; 6 | value: string; 7 | } 8 | | { 9 | type: 'value'; 10 | value: unknown; 11 | } 12 | | { 13 | type: 'identifier'; 14 | value: string; 15 | }; 16 | 17 | export type Token = TokenContent & TokenPosition; 18 | 19 | abstract class LexerError { 20 | message: string; 21 | constructor(public pos: TokenPosition) {} 22 | } 23 | 24 | class NoTokenMatchError extends LexerError { 25 | message = 'No match for token'; 26 | } 27 | class StringNewlineError extends LexerError { 28 | message = 'Unexpected newline in string'; 29 | } 30 | class StringIncompleteError extends LexerError { 31 | message = 'Unexpected end of input in string'; 32 | } 33 | 34 | class LexerState { 35 | index = 0; 36 | outerIndex = 0; 37 | private tokens: Token[] = []; 38 | constructor( 39 | public readonly strings: readonly string[], 40 | public readonly interps: unknown[] 41 | ) {} 42 | getTokens() { 43 | return this.tokens; 44 | } 45 | push(token: Token | undefined) { 46 | if (!token) return; 47 | this.tokens.push(token); 48 | } 49 | getInterpolation() { 50 | let token: (Token & { type: 'value' }) | undefined; 51 | if (this.outerIndex < this.interps.length) { 52 | token = { 53 | type: 'value', 54 | value: this.interps[this.outerIndex], 55 | index: 0, 56 | outerIndex: this.outerIndex + 1, 57 | length: 0, 58 | }; 59 | } 60 | this.outerIndex++; 61 | this.index = 0; 62 | return token; 63 | } 64 | hasStrings() { 65 | return this.outerIndex < this.strings.length; 66 | } 67 | nextChar() { 68 | const buf = this.strings[this.outerIndex]; 69 | if (!buf) return undefined; 70 | return buf[this.index]; 71 | } 72 | matchPattern(pattern: RegExp) { 73 | const buf = this.strings[this.outerIndex]; 74 | pattern.lastIndex = this.index; 75 | const result = pattern.exec(buf); 76 | if (!result) return null; 77 | 78 | this.index = pattern.lastIndex; 79 | return result; 80 | } 81 | } 82 | 83 | export const identifierPattern = /(?:\$|_|\p{ID_Start})(?:\$|\u200C|\u200D|\p{ID_Continue})*/u; 84 | const singleQuotePattern = /((?:\\['\\]|[^\n'\\])+)|(')/y; 85 | const doubleQuotePattern = /((?:\\["\\]|[^\n"\\])+)|(")/y; 86 | 87 | const lineCommentPattern = /([^\n]+)|(\n)/y; 88 | const blockCommentPattern = /((?:\*[^/]|[^*])+)|(\*\/)/y; 89 | 90 | export const identifierOrOperator = /^(?:(?:\$|_|\p{ID_Start})(?:\$|\u200C|\u200D|\p{ID_Continue})*|[!@#%^&*\-+=|/:<>.?~]+|[,;])$/u; 91 | 92 | export class Lexer { 93 | lexerState: LexerState; 94 | mainPattern: RegExp; 95 | constructor(private readonly keywords: Set, operators: Set) { 96 | const operatorsPattern = matchOperators(Array.from(operators)); 97 | 98 | // each regex state is a set of capture groups 99 | this.mainPattern = new RegExp( 100 | [ 101 | /[ \t\n]+/, // whitespace 102 | /\/\//, // line comment 103 | /\/\*/, // block comment 104 | /0x[0-9A-Fa-f_]+/, // number 105 | /0o[0-7_]+/, 106 | /0b[0-1_]+/, 107 | /-?[0-9][0-9_]*(?:\.[0-9_]*)?(?:[eE]-?[0-9_])?/, 108 | identifierPattern, // identifier 109 | operatorsPattern, // operators 110 | ] 111 | .map(re => `(${re.source})`) 112 | .join('|'), 113 | 'uy' 114 | ); 115 | } 116 | run(strs: readonly string[], interps: unknown[]): Token[] { 117 | this.lexerState = new LexerState(strs, interps); 118 | try { 119 | this.mainState(); 120 | } catch (e) { 121 | // istanbul ignore else 122 | if (e instanceof LexerError) { 123 | throw new ParseError(e.message, strs, e.pos); 124 | } else { 125 | throw e; 126 | } 127 | } 128 | return this.lexerState.getTokens(); 129 | } 130 | private mainState() { 131 | const lexerState = this.lexerState; 132 | let ch: string | undefined; 133 | while (lexerState.hasStrings()) { 134 | while ((ch = lexerState.nextChar())) { 135 | // TODO: is this actually better than using regex? 136 | switch (ch) { 137 | case `'`: 138 | lexerState.index++; 139 | this.quote(singleQuotePattern); 140 | continue; 141 | case `"`: 142 | lexerState.index++; 143 | this.quote(doubleQuotePattern); 144 | continue; 145 | } 146 | 147 | const lastIndex = lexerState.index; 148 | const match = lexerState.matchPattern(this.mainPattern); 149 | if (!match) { 150 | throw new NoTokenMatchError({ 151 | index: lexerState.index, 152 | outerIndex: lexerState.outerIndex, 153 | length: 1, 154 | }); 155 | } 156 | const matchedString = match[0]; 157 | 158 | if (match[2]) { 159 | this.comment(lineCommentPattern); 160 | } else if (match[3]) { 161 | this.comment(blockCommentPattern); 162 | } else if (match[4] || match[5] || match[6] || match[7]) { 163 | const value = Number(matchedString); 164 | lexerState.push({ 165 | type: 'value', 166 | value, 167 | index: lastIndex, 168 | outerIndex: lexerState.outerIndex, 169 | length: matchedString.length, 170 | }); 171 | } else if (match[8]) { 172 | lexerState.push({ 173 | type: this.keywords.has(matchedString) ? 'literal' : 'identifier', 174 | value: matchedString, 175 | index: lastIndex, 176 | outerIndex: lexerState.outerIndex, 177 | length: matchedString.length, 178 | }); 179 | } else if (match[9]) { 180 | lexerState.push({ 181 | type: 'literal', 182 | value: matchedString, 183 | index: lastIndex, 184 | outerIndex: lexerState.outerIndex, 185 | length: matchedString.length, 186 | }); 187 | } 188 | } 189 | 190 | lexerState.push(lexerState.getInterpolation()); 191 | } 192 | } 193 | private quote(pattern: RegExp) { 194 | const lexerState = this.lexerState; 195 | const token: Token = { 196 | type: 'value', 197 | value: '', 198 | index: lexerState.index - 1, // -1 for initial quote 199 | outerIndex: lexerState.outerIndex, 200 | length: 1, 201 | }; 202 | while (lexerState.hasStrings()) { 203 | while (lexerState.nextChar()) { 204 | const match = lexerState.matchPattern(pattern); 205 | // TODO: what could this be besides a newline? Why _shouldn't_ a newline be allowed? 206 | if (!match) throw new StringNewlineError(token); 207 | 208 | if (match[1]) { 209 | // quote body 210 | token.value += match[1]; 211 | token.length += match[1].length; 212 | } else { 213 | // end quote 214 | token.length++; // add 1 for end quote 215 | lexerState.push(token); 216 | return; 217 | } 218 | } 219 | // if interpolating mid-string, interpolate the value _into_ the string 220 | const interpolatedToken = lexerState.getInterpolation(); 221 | if (interpolatedToken) { 222 | token.value += String(interpolatedToken.value); 223 | } 224 | } 225 | throw new StringIncompleteError(token); 226 | } 227 | private comment(pattern: RegExp) { 228 | while (this.lexerState.hasStrings()) { 229 | while (this.lexerState.nextChar()) { 230 | const match = this.lexerState.matchPattern(pattern); 231 | // istanbul ignore next 232 | if (!match || match[2]) return; 233 | } 234 | this.lexerState.getInterpolation(); 235 | } 236 | } 237 | } 238 | 239 | function reEscape(s: string) { 240 | return s.replace(/[/\\^$*+?.()|[\]{}]/g, '\\$&'); 241 | } 242 | 243 | function matchOperators(operators: string[]) { 244 | const longestFirst = Array.from(new Set(operators)).sort( 245 | (a, b) => b.length - a.length 246 | ); 247 | return new RegExp(`(?:${longestFirst.map(reEscape).join('|')})`); 248 | } 249 | -------------------------------------------------------------------------------- /src/operator.test.ts: -------------------------------------------------------------------------------- 1 | import { lang } from './lang'; 2 | import { op } from './operator'; 3 | 4 | test('operator', () => { 5 | const math = op` 6 | left "+" : ${(l, r) => l + r} 7 | "-" : ${(l, r) => l - r} 8 | left "*" : ${(l, r) => l * r} 9 | "/" : ${(l, r) => l / r} 10 | "%" : ${(l, r) => l % r} 11 | right "**" : ${(l, r) => l ** r} 12 | pre "-" : ${x => -x} 13 | post "++" : ${x => x + 1} 14 | "--" : ${x => x - 1} 15 | `; 16 | expect(math`3 * 4 / 5 * 6`).toEqual(((3 * 4) / 5) * 6); 17 | expect(math`3 * (4 / 5) * 6`).toEqual(3 * (4 / 5) * 6); 18 | expect(math` 19 | 1 20 | + 2 21 | * 3 22 | - 4`).toEqual(1 + 2 * 3 - 4); 23 | expect(math`2 ** 3 ** 2`).toEqual(2 ** (3 ** 2)); 24 | }); 25 | 26 | test('operator invalid syntax', () => { 27 | expect(() => { 28 | op`left "a>" : ${(l, r) => l + r}`.compile(); 29 | }).toThrow(); 30 | expect(() => { 31 | op`left ${1} : ${(l, r) => l + r}`.compile(); 32 | }).toThrow(); 33 | expect(() => { 34 | op` 35 | left "+" : ${(l, r) => l + r} 36 | "+" : ${(l, r) => l - r} 37 | `.compile(); 38 | }).toThrow(); 39 | }); 40 | 41 | test('operator parser include', () => { 42 | const expr = lang` 43 | Expr = include ${op` 44 | left "++" : ${(xs, ys) => xs.concat(ys)} 45 | root RootExpr 46 | `}; 47 | RootExpr = #[ Expr ** "," ] 48 | | value; 49 | `; 50 | expect(expr`["foo", "bar"] ++ ["baz"]`).toEqual(['foo', 'bar', 'baz']); 51 | }); 52 | 53 | test('operator longest match first', () => { 54 | const eq = op` 55 | left "is" : ${(l, r) => l === r} 56 | "is" "not" : ${(l, r) => l !== r} 57 | pre "not" : ${l => !l} 58 | `; 59 | expect(eq`4 is 4`).toEqual(true); 60 | expect(eq`4 is not 3`).toEqual(true); 61 | expect(eq`4 is (not 3)`).toEqual(false); 62 | }); 63 | -------------------------------------------------------------------------------- /src/operator.ts: -------------------------------------------------------------------------------- 1 | import { AST, checkLit, builders } from './core'; 2 | import { 3 | lang, 4 | ZebuLanguageReturning, 5 | ZebuLanguage, 6 | createLanguage, 7 | } from './lang'; 8 | import { assertUnreachable } from './util'; 9 | const { alt, seq, ident } = builders; 10 | 11 | type Fixity = 'left' | 'right' | 'pre' | 'post'; 12 | type SeqFn = (...xs: unknown[]) => unknown; 13 | type OpExpr = { pattern: string[]; fn: SeqFn }; 14 | type Rule = { fixity: Fixity; operators: OpExpr[] }; 15 | 16 | export const op = lang` 17 | Program = Rule* RootRule? : 18 | ${(rules, root) => createLanguage(buildAST(rules, root))}; 19 | Rule = Fixity Expr+ : 20 | ${(fixity: Fixity, operators: OpExpr[]): Rule => ({ fixity, operators })}; 21 | Expr = Pattern ":" value : 22 | ${(pattern: string[], _, fn: SeqFn): OpExpr => ({ pattern, fn })}; 23 | Pattern = value+; 24 | RootRule = "root" identifier : ${(_: unknown, x: unknown) => x}; 25 | Fixity = "left" | "right" | "pre" | "post"; 26 | ` as ZebuLanguageReturning; 27 | 28 | export function buildAST(rules: Rule[], rootExpr: string): AST { 29 | const ruleset: AST = { type: 'ruleset', rules: [] }; 30 | let next = ident('0'); 31 | for (const [i, rule] of rules.entries()) { 32 | const self = ident(String(i)); 33 | next = ident(String(i + 1)); 34 | // eslint-disable-next-line no-loop-func 35 | const push = (fn: any, ...exprs: AST[]) => 36 | ruleset.rules.push({ 37 | name: String(i), 38 | expr: alt(seq(fn, ...exprs), next), 39 | }); 40 | const opAlts = alt( 41 | ...rule.operators.map(({ pattern, fn }) => 42 | seq(() => fn, ...pattern.map(checkLit)) 43 | ) 44 | ); 45 | 46 | switch (rule.fixity) { 47 | case 'left': 48 | push((l, op, r) => op(l, r), self, opAlts, next); 49 | break; 50 | case 'right': 51 | push((l, op, r) => op(l, r), next, opAlts, self); 52 | break; 53 | case 'pre': 54 | push((op, r) => op(r), opAlts, self); 55 | break; 56 | case 'post': 57 | push((l, op) => op(l), self, opAlts); 58 | break; 59 | // istanbul ignore next 60 | default: 61 | assertUnreachable(rule.fixity); 62 | } 63 | } 64 | 65 | const rootASTNode: AST = rootExpr 66 | ? ident(rootExpr) 67 | : builders.terminal('value'); 68 | 69 | ruleset.rules.push({ 70 | name: (next as AST & { type: 'terminal' }).value, 71 | expr: alt(builders.structure('(', ident('0'), ')'), rootASTNode), 72 | }); 73 | 74 | return ruleset; 75 | } 76 | -------------------------------------------------------------------------------- /src/parser-combinators.ts: -------------------------------------------------------------------------------- 1 | import { Token } from './lexer'; 2 | import { TokenPosition } from './util'; 3 | 4 | type Brand = K & { __brand: T }; 5 | export type Terminal = Brand; 6 | export const brandLiteral = (value: string) => `"${value}"` as Terminal; 7 | export const brandType = (type: string) => `<${type}>` as Terminal; 8 | export const brandEof = '(end of input)' as Terminal; 9 | 10 | type EofToken = { 11 | type: 'eof'; 12 | index: number; 13 | outerIndex: number; 14 | length: number; 15 | }; 16 | 17 | const brandToken = (token: Token | EofToken) => { 18 | if (token.type === 'eof') return brandEof; 19 | if (token.type === 'identifier' || token.type === 'value') { 20 | return brandType(token.type); 21 | } else { 22 | return brandLiteral(token.value); 23 | } 24 | }; 25 | 26 | export class InternalParseError { 27 | constructor( 28 | public readonly message: string, 29 | public readonly pos: TokenPosition 30 | ) {} 31 | } 32 | 33 | class MatchError extends InternalParseError { 34 | constructor(expected: string, received: Token | EofToken) { 35 | super(`Expected ${expected}, received ${brandToken(received)}`, received); 36 | } 37 | } 38 | 39 | class RuleError extends InternalParseError { 40 | constructor(ruleName: string, prev: InternalParseError) { 41 | super(`${prev.message}\n in ${ruleName}`, prev.pos); 42 | } 43 | } 44 | 45 | export class ParseState { 46 | private index = 0; 47 | constructor(private readonly tokens: Token[]) {} 48 | private results: unknown[] = []; 49 | next(): Token | EofToken { 50 | return this.tokens[this.index++] || this.eofToken(); 51 | } 52 | peek(): Token | EofToken { 53 | return this.tokens[this.index] || this.eofToken(); 54 | } 55 | push(x: unknown): void { 56 | this.results.push(x); 57 | } 58 | reduce(arity: number, fn: (...xs: unknown[]) => unknown): void { 59 | const args: unknown[] = []; 60 | for (let i = 0; i < arity; i++) { 61 | args.unshift(this.results.pop()); 62 | } 63 | this.results.push(fn(...args)); 64 | } 65 | done(): unknown { 66 | if (this.index === this.tokens.length) { 67 | return this.results.pop(); 68 | } else { 69 | throw new MatchError('end of input', this.peek()); 70 | } 71 | } 72 | private eofToken(): EofToken { 73 | const lastToken = this.tokens[this.tokens.length - 1]; 74 | return lastToken 75 | ? { 76 | type: 'eof', 77 | index: lastToken.index + 1, 78 | outerIndex: lastToken.outerIndex, 79 | length: 0, 80 | } 81 | : { type: 'eof', index: 0, outerIndex: 0, length: 0 }; 82 | } 83 | } 84 | 85 | export interface Parser { 86 | parse(state: ParseState): void; 87 | } 88 | 89 | export class MatchType implements Parser { 90 | constructor(private type: 'identifier' | 'value') {} 91 | parse(state: ParseState): void { 92 | const token = state.next(); 93 | if (token.type !== this.type) { 94 | throw new MatchError(brandType(this.type), token); 95 | } 96 | state.push(token.value); 97 | } 98 | } 99 | 100 | export class MatchLiteral implements Parser { 101 | constructor(private value: string) {} 102 | parse(state: ParseState): void { 103 | const token = state.next(); 104 | if (token.type === 'literal' && token.value === this.value) { 105 | state.push(token.value); 106 | } else { 107 | throw new MatchError(brandLiteral(this.value), token); 108 | } 109 | } 110 | } 111 | 112 | export class MatchRule implements Parser { 113 | constructor(private parsers: Map, private ruleName: symbol) {} 114 | parse(state: ParseState): void { 115 | try { 116 | this.parsers.get(this.ruleName)!.parse(state); 117 | } catch (e) { 118 | // istanbul ignore else 119 | if (e instanceof InternalParseError) { 120 | throw new RuleError(this.ruleName.description || '(anonymous)', e); 121 | } else { 122 | throw e; 123 | } 124 | } 125 | } 126 | } 127 | 128 | type SeqFn = (...xs: unknown[]) => unknown; 129 | 130 | export class Seq implements Parser { 131 | constructor(private parsers: Parser[]) {} 132 | parse(state: ParseState): void { 133 | for (const parser of this.parsers) { 134 | parser.parse(state); 135 | } 136 | } 137 | } 138 | 139 | export class Reduce implements Parser { 140 | constructor(private arity: number, private fn: SeqFn) {} 141 | parse(state: ParseState): void { 142 | state.reduce(this.arity, this.fn); 143 | } 144 | } 145 | 146 | export class Alt implements Parser { 147 | constructor(private parserMap: Map) {} 148 | parse(state: ParseState): void { 149 | const token = state.peek(); 150 | let parser = this.parserMap.get(brandToken(token)); 151 | if (!parser) { 152 | parser = this.parserMap.get(brandEof); 153 | } 154 | if (!parser) { 155 | throw new MatchError( 156 | 'one of ' + [...this.parserMap.keys()].join(), 157 | token 158 | ); 159 | } 160 | parser.parse(state); 161 | } 162 | } 163 | -------------------------------------------------------------------------------- /src/parser-ll.test.ts: -------------------------------------------------------------------------------- 1 | import { AST, builders, coreAST } from './core'; 2 | import { createParser } from './parser-ll'; 3 | 4 | const { 5 | ruleset, 6 | rule, 7 | seq, 8 | alt, 9 | lit, 10 | ident, 11 | terminal, 12 | sepBy0, 13 | structure, 14 | } = builders; 15 | 16 | test('json', () => { 17 | // prettier-ignore 18 | const grammar = ruleset( 19 | rule('expr', alt( 20 | seq((xs) => xs.reduce((l, r) => Object.assign(l, r), {}), 21 | structure('{', sepBy0(ident('pair'), lit(',')), '}'), 22 | ), 23 | structure('[', sepBy0(ident('expr'), lit(',')), ']'), 24 | seq(() => null, lit('null')), 25 | seq(() => true, lit('true')), 26 | seq(() => false, lit('false')), 27 | terminal('value') 28 | )), 29 | rule('pair', seq( 30 | (k, _, v) => ({ [k]: v }), 31 | terminal('value'), lit(':'), ident('expr') 32 | )) 33 | ) 34 | const json = createParser(grammar); 35 | 36 | expect(json`"foo"`).toEqual('foo'); 37 | expect(json`true`).toEqual(true); 38 | 39 | expect(json`{ "foo": 123 }`).toEqual({ foo: 123 }); 40 | }); 41 | 42 | test('core', () => { 43 | const lang = createParser(coreAST); 44 | const returnLangAST = lang`Main = "return" value : ${(_, x) => x}` as AST; 45 | const returnLang = createParser(returnLangAST); 46 | expect(returnLang`return 123`).toEqual(123); 47 | }); 48 | -------------------------------------------------------------------------------- /src/parser-ll.ts: -------------------------------------------------------------------------------- 1 | import { AST } from './core'; 2 | import { Lexer } from './lexer'; 3 | import { 4 | Alt, 5 | MatchLiteral, 6 | MatchRule, 7 | MatchType, 8 | Parser, 9 | Reduce, 10 | Seq, 11 | ParseState, 12 | InternalParseError, 13 | brandEof, 14 | brandLiteral, 15 | brandType, 16 | Terminal, 17 | } from './parser-combinators'; 18 | import { 19 | assertUnreachable, 20 | ParseError, 21 | CompileError, 22 | intersection, 23 | union, 24 | } from './util'; 25 | import { SimpleAST, SimpleASTAlt, ASTSimplifier } from './simplifier'; 26 | 27 | class FirstSetBuilder { 28 | cache = new Map>(); 29 | constructor(private rules: Map) {} 30 | get(node: SimpleAST, recurSet: Set = new Set()) { 31 | if (this.cache.has(node)) return this.cache.get(node)!; 32 | const res = this.getInner(node, recurSet); 33 | this.cache.set(node, res); 34 | return res; 35 | } 36 | private getInner(node: SimpleAST, recurSet: Set): Set { 37 | switch (node.type) { 38 | case 'reduce': 39 | return new Set([brandEof]); 40 | case 'literal': 41 | return new Set([brandLiteral(node.value)]); 42 | case 'identifier': 43 | return new Set([brandType('identifier')]); 44 | case 'value': 45 | return new Set([brandType('value')]); 46 | case 'nonterminal': { 47 | if (recurSet.has(node.value)) { 48 | throw new CompileError(`left recursion on ${node.value.description}`); 49 | } 50 | const next = this.rules.get(node.value)!; 51 | return this.get(next, new Set([...recurSet, node.value])); 52 | } 53 | case 'seq': { 54 | let set = new Set([brandEof]); 55 | for (const expr of node.exprs) { 56 | set.delete(brandEof); 57 | set = union(set, this.get(expr, recurSet)); 58 | if (!set.has(brandEof)) break; 59 | } 60 | return set; 61 | } 62 | case 'alt': { 63 | const set: Set = new Set(); 64 | for (const expr of node.exprs) { 65 | for (const terminal of this.get(expr, recurSet)) { 66 | if (set.has(terminal)) { 67 | throw new CompileError(`first/first conflict on ${terminal}`); 68 | } 69 | set.add(terminal); 70 | } 71 | } 72 | return set; 73 | } 74 | // istanbul ignore next 75 | default: 76 | assertUnreachable(node); 77 | } 78 | } 79 | } 80 | 81 | export class ParserCompiler { 82 | compiledRules = new Map(); 83 | firstSet: FirstSetBuilder; 84 | constructor(ruleASTMap: Map) { 85 | this.firstSet = new FirstSetBuilder(ruleASTMap); 86 | } 87 | static compileRuleset( 88 | ruleASTMap: Map 89 | ): Map { 90 | const compiler = new ParserCompiler(ruleASTMap); 91 | for (const [name, node] of ruleASTMap) { 92 | compiler.compiledRules.set(name, compiler.compile(node)); 93 | } 94 | checkFollowSets(ruleASTMap, compiler.firstSet); 95 | return compiler.compiledRules; 96 | } 97 | compile(node: SimpleAST): Parser { 98 | switch (node.type) { 99 | case 'reduce': 100 | return new Reduce(node.arity, node.fn); 101 | case 'literal': 102 | return new MatchLiteral(node.value); 103 | case 'identifier': 104 | return new MatchType('identifier'); 105 | case 'value': 106 | return new MatchType('value'); 107 | case 'nonterminal': 108 | return new MatchRule(this.compiledRules, node.value); 109 | case 'seq': 110 | this.firstSet.get(node); 111 | return new Seq(node.exprs.map(expr => this.compile(expr))); 112 | case 'alt': { 113 | if (node.exprs.length === 1) return this.compile(node.exprs[0]); 114 | 115 | this.firstSet.get(node); 116 | const parserMap = new Map(); 117 | for (const expr of node.exprs) { 118 | for (const terminal of this.firstSet.get(expr)) { 119 | parserMap.set(terminal, this.compile(expr)); 120 | } 121 | } 122 | return new Alt(parserMap); 123 | } 124 | // istanbul ignore next 125 | default: 126 | assertUnreachable(node); 127 | } 128 | } 129 | } 130 | 131 | // TODO: this catches `Rule = Foo? Foo` but not `Rule = Foo* Foo` 132 | function checkFollowSets( 133 | ruleMap: Map, 134 | firstSet: FirstSetBuilder 135 | ) { 136 | for (const rule of ruleMap.values()) { 137 | for (const branch of rule.exprs) { 138 | let workingSet = new Set(); 139 | for (const expr of branch.exprs) { 140 | const exprSet = firstSet.get(expr); 141 | 142 | if (workingSet.has(brandEof)) { 143 | workingSet.delete(brandEof); 144 | const conflicts = intersection(workingSet, exprSet); 145 | if (conflicts.size) { 146 | throw new CompileError( 147 | `first/follow conflict on ${[...conflicts].join()}` 148 | ); 149 | } 150 | workingSet = union(workingSet, exprSet); 151 | } else { 152 | workingSet = exprSet; 153 | } 154 | } 155 | } 156 | } 157 | } 158 | 159 | export function createParser(ast: AST) { 160 | const { startRule, rules, keywords, operators } = ASTSimplifier.simplifyAll( 161 | ast 162 | ); 163 | const lexer = new Lexer(keywords, operators); 164 | const parserMap = ParserCompiler.compileRuleset(rules); 165 | const parser = parserMap.get(startRule)!; 166 | 167 | return (strs: readonly string[], ...xs: unknown[]): T => { 168 | const tokens = lexer.run(strs, xs); 169 | const parseState = new ParseState(tokens); 170 | try { 171 | parser.parse(parseState); 172 | return parseState.done() as T; 173 | } catch (e) { 174 | // istanbul ignore else 175 | if (e instanceof InternalParseError) { 176 | throw new ParseError(e.message, strs, e.pos); 177 | } else { 178 | throw e; 179 | } 180 | } 181 | }; 182 | } 183 | -------------------------------------------------------------------------------- /src/simplifier.test.ts: -------------------------------------------------------------------------------- 1 | import { 2 | SimpleASTAlt, 3 | SimpleASTNode, 4 | SimpleASTSeq, 5 | factorLeft, 6 | fixLeftRecursion, 7 | } from './simplifier'; 8 | 9 | const alt = (...exprs: SimpleASTSeq[]): SimpleASTAlt => ({ 10 | type: 'alt', 11 | exprs, 12 | }); 13 | const seq = (...exprs: SimpleASTNode[]): SimpleASTSeq => ({ 14 | type: 'seq', 15 | exprs, 16 | }); 17 | const nt = (value: symbol): SimpleASTNode => ({ type: 'nonterminal', value }); 18 | const lit = (value: string): SimpleASTNode => ({ type: 'literal', value }); 19 | 20 | test('factorLeft', () => { 21 | const root = Symbol('root'); 22 | const rules = new Map([ 23 | [ 24 | root, 25 | alt( 26 | seq(lit('x'), lit('a')), 27 | seq(lit('x'), lit('b')), 28 | seq(lit('x')), 29 | seq(lit('c')) 30 | ), 31 | ], 32 | ]); 33 | factorLeft(rules); 34 | const next = (rules.get(root)!.exprs[0].exprs[1] as SimpleASTNode & { 35 | type: 'nonterminal'; 36 | }).value; 37 | expect(rules).toEqual( 38 | new Map([ 39 | [root, alt(seq(lit('x'), nt(next)), seq(lit('c')))], 40 | [next, alt(seq(lit('a')), seq(lit('b')), seq())], 41 | ]) 42 | ); 43 | }); 44 | 45 | test('fixLeftRecursion', () => { 46 | const Expr = Symbol('Expr'); 47 | const Factor = Symbol('Factor'); 48 | const Term = Symbol('Term'); 49 | // prettier-ignore 50 | const rules = new Map([ 51 | [Expr, alt( 52 | seq(nt(Expr), lit('+'), nt(Factor)), 53 | seq(nt(Expr), lit('-'), nt(Factor)), 54 | seq(nt(Factor)) 55 | )], 56 | [Factor, alt( 57 | seq(nt(Factor), lit('*'), nt(Term)), 58 | seq(nt(Factor), lit('/'), nt(Term)), 59 | seq(nt(Term)) 60 | )], 61 | [Term, alt( 62 | seq(lit('('), nt(Expr), lit(')')), 63 | seq({ type: "value" }) 64 | )] 65 | ]); 66 | fixLeftRecursion(rules); 67 | const ExprNext = (rules.get(Expr)!.exprs[0].exprs[1] as SimpleASTNode & { 68 | type: 'nonterminal'; 69 | }).value; 70 | const FactorNext = (rules.get(Factor)!.exprs[0].exprs[1] as SimpleASTNode & { 71 | type: 'nonterminal'; 72 | }).value; 73 | // prettier-ignore 74 | expect(rules).toEqual(new Map([ 75 | [Expr, alt(seq(nt(Factor), nt(ExprNext)))], 76 | [ExprNext, alt( 77 | seq(lit('+'), nt(Factor), nt(ExprNext)), 78 | seq(lit('-'), nt(Factor), nt(ExprNext)), 79 | seq() 80 | )], 81 | [Factor, alt(seq(nt(Term), nt(FactorNext)))], 82 | [FactorNext, alt( 83 | seq(lit('*'), nt(Term), nt(FactorNext)), 84 | seq(lit('/'), nt(Term), nt(FactorNext)), 85 | seq() 86 | )], 87 | [Term, alt( 88 | seq(lit('('), nt(Expr), lit(')')), 89 | seq({ type: "value" }) 90 | )] 91 | ])) 92 | }); 93 | -------------------------------------------------------------------------------- /src/simplifier.ts: -------------------------------------------------------------------------------- 1 | import { AST } from './core'; 2 | import { identifierPattern } from './lexer'; 3 | import { assertUnreachable, CompileError, partition } from './util'; 4 | 5 | export type SimpleASTAlt = { type: 'alt'; exprs: Array }; 6 | export type SimpleASTSeq = { type: 'seq'; exprs: SimpleASTNode[] }; 7 | 8 | type SeqFn = (...xs: unknown[]) => unknown; 9 | export type SimpleASTNode = 10 | | { type: 'literal'; value: string } 11 | | { type: 'identifier' } 12 | | { type: 'value' } 13 | | { type: 'nonterminal'; value: symbol } 14 | | { type: 'reduce'; arity: number; fn: SeqFn }; 15 | 16 | export type SimpleAST = SimpleASTNode | SimpleASTSeq | SimpleASTAlt; 17 | 18 | const _2 = (_, x) => x; 19 | const cons = (h, t: unknown[]) => [h, ...t]; 20 | const pushNull: SimpleASTSeq = { 21 | type: 'seq', 22 | exprs: [{ type: 'reduce', arity: 0, fn: () => null }], 23 | }; 24 | const pushArr: SimpleASTSeq = { 25 | type: 'seq', 26 | exprs: [{ type: 'reduce', arity: 0, fn: () => [] }], 27 | }; 28 | 29 | /** 30 | * factor out direct left recursion, using the algorithm: 31 | * ``` 32 | * A = A "+" B | B 33 | * --> 34 | * A = B A_ 35 | * A_ = "+" B A_ | nil 36 | * ``` 37 | */ 38 | export function fixLeftRecursion(rules: Map): void { 39 | for (const [ruleName, rule] of rules) { 40 | const [leftRecursiveBranches, safeBranches] = partition( 41 | rule.exprs, 42 | expr => { 43 | const node = expr.exprs[0]; 44 | return node && node.type === 'nonterminal' && node.value === ruleName; 45 | } 46 | ); 47 | if (leftRecursiveBranches.length > 0) { 48 | const newRule = Symbol(); 49 | // make a self-recursive rule here (instead of using * repeater) 50 | // so that you don't have to transform the reduce fns 51 | rules.set(newRule, { 52 | type: 'alt', 53 | exprs: leftRecursiveBranches 54 | .map( 55 | (seq): SimpleASTSeq => ({ 56 | type: 'seq', 57 | exprs: seq.exprs 58 | .slice(1) 59 | .concat([{ type: 'nonterminal', value: newRule }]), 60 | }) 61 | ) 62 | .concat({ type: 'seq', exprs: [] }), 63 | }); 64 | rules.set(ruleName, { 65 | type: 'alt', 66 | exprs: safeBranches.map(seq => ({ 67 | type: 'seq', 68 | exprs: seq.exprs.concat([{ type: 'nonterminal', value: newRule }]), 69 | })), 70 | }); 71 | } 72 | } 73 | } 74 | 75 | /** 76 | * factor out shared left prefixes, using the algorithm: 77 | * ``` 78 | * A = a X | a Y 79 | * --> 80 | * A = a A_ 81 | * A_ = X | Y 82 | * ``` 83 | */ 84 | export function factorLeft(rules: Map): void { 85 | // eslint-disable-next-line no-constant-condition 86 | while (true) { 87 | const toRewrite: Array<[ 88 | symbol, 89 | Map 90 | ]> = []; 91 | for (const [ruleName, rule] of rules) { 92 | const byPrefix = groupByPrefix(rule); 93 | if (byPrefix) { 94 | toRewrite.push([ruleName, byPrefix]); 95 | } 96 | } 97 | if (toRewrite.length === 0) return; 98 | 99 | for (const [ruleName, byPrefix] of toRewrite) { 100 | const updatedRule: SimpleASTAlt = { type: 'alt', exprs: [] }; 101 | for (const [prefix, rest] of byPrefix) { 102 | if ( 103 | rest.exprs.length > 0 && 104 | rest.exprs.some(expr => expr.exprs.length > 0) 105 | ) { 106 | const key = Symbol(); 107 | rules.set(key, rest); 108 | updatedRule.exprs.push({ 109 | type: 'seq', 110 | exprs: [prefix, { type: 'nonterminal', value: key }].filter( 111 | Boolean 112 | ) as SimpleASTNode[], 113 | }); 114 | } else { 115 | updatedRule.exprs.push({ 116 | type: 'seq', 117 | exprs: [prefix].filter(Boolean) as SimpleASTNode[], 118 | }); 119 | } 120 | } 121 | rules.set(ruleName, updatedRule); 122 | } 123 | } 124 | } 125 | 126 | // a X | a Y | b -> { a -> X | Y, b -> nil } 127 | function groupByPrefix( 128 | rule: SimpleASTAlt 129 | ): Map | null { 130 | const map = new Map(); 131 | let needsFactoring = false; 132 | for (const expr of rule.exprs) { 133 | const [prefix, ...rest] = expr.exprs; 134 | let found = false; 135 | for (const [key, alt] of map) { 136 | if (isEqual(prefix, key)) { 137 | alt.exprs.push({ type: 'seq', exprs: rest }); 138 | needsFactoring = true; 139 | found = true; 140 | break; 141 | } 142 | } 143 | if (!found) { 144 | map.set(prefix || null, { 145 | type: 'alt', 146 | exprs: [{ type: 'seq', exprs: rest }], 147 | }); 148 | } 149 | } 150 | 151 | if (needsFactoring) return map; 152 | return null; 153 | } 154 | 155 | // TODO: coverage? 156 | // istanbul ignore next 157 | function isEqual(l: SimpleASTNode | null, rIn: SimpleASTNode | null) { 158 | if (l === rIn) return true; 159 | if (!l || !rIn) return false; 160 | if (l.type !== rIn.type) return false; 161 | const r = rIn as any; // :( 162 | switch (l.type) { 163 | case 'identifier': 164 | case 'value': 165 | return true; 166 | case 'literal': 167 | return l.value === r.value; 168 | case 'nonterminal': 169 | return l.value === r.value; 170 | case 'reduce': 171 | return l.arity === r.arity && l.fn === r.fn; 172 | // istanbul ignore next 173 | default: 174 | assertUnreachable(l); 175 | } 176 | } 177 | 178 | /** 179 | * Flatten nested grammars into a single grammar, using the rules of lexical scope, and replacing string identifiers with symbols. 180 | */ 181 | class ScopeManager { 182 | private stack: Array> = []; 183 | public lookup(value: string): symbol { 184 | for (let i = this.stack.length - 1; i >= 0; i--) { 185 | const scope = this.stack[i]; 186 | if (scope.has(value)) return scope.get(value)!; 187 | } 188 | throw new CompileError(`unknown identifier ${value}`); 189 | } 190 | public compileRuleset( 191 | rules: Array<{ name: string; expr: AST }>, 192 | addRule: (name: symbol, expr: AST) => void 193 | ): SimpleASTNode { 194 | // istanbul ignore next 195 | if (!rules.length) { 196 | throw new CompileError('should be unreachable'); 197 | } 198 | // build scope lookup 199 | const nextScope = new Map(); 200 | const mappedRules: Array<{ name: symbol; expr: AST }> = []; 201 | for (const { name, expr } of rules) { 202 | const symName = Symbol(name); 203 | nextScope.set(name, symName); 204 | mappedRules.push({ name: symName, expr }); 205 | } 206 | 207 | // build rules in scope 208 | this.stack.push(nextScope); 209 | for (const { name, expr } of mappedRules) { 210 | addRule(name, expr); 211 | } 212 | this.stack.pop(); 213 | 214 | // return first rule as identifier 215 | const firstRuleName = nextScope.get(rules[0].name)!; 216 | return { type: 'nonterminal', value: firstRuleName }; 217 | } 218 | } 219 | 220 | /** 221 | * Track the literals used in the grammar so they can be correctly tokenized by the lexer, and convert references to the 'keyword' and 'operator' token with rules that match all keywords or operators. 222 | */ 223 | class LiteralManager { 224 | private keywords: Set = new Set(); 225 | private operators: Set = new Set(); 226 | private keywordRule = Symbol('keyword'); 227 | private operatorRule = Symbol('operator'); 228 | private usedKeywordRule = false; 229 | private usedOperatorRule = false; 230 | public add(literal: string): SimpleASTNode { 231 | if (literal.match(identifierPattern)) { 232 | this.keywords.add(literal); 233 | } else { 234 | this.operators.add(literal); 235 | } 236 | return { type: 'literal', value: literal }; 237 | } 238 | public terminal(node: AST & { type: 'terminal' }): SimpleASTNode { 239 | switch (node.value) { 240 | case 'keyword': 241 | this.usedKeywordRule = true; 242 | return { type: 'nonterminal', value: this.keywordRule }; 243 | case 'operator': 244 | this.usedOperatorRule = true; 245 | return { type: 'nonterminal', value: this.operatorRule }; 246 | default: 247 | return { type: node.value }; 248 | } 249 | } 250 | public compile(map: Map) { 251 | if (this.usedKeywordRule) { 252 | map.set(this.keywordRule, createAlts(this.keywords)); 253 | } 254 | if (this.usedOperatorRule) { 255 | map.set(this.operatorRule, createAlts(this.operators)); 256 | } 257 | return { keywords: this.keywords, operators: this.operators }; 258 | } 259 | } 260 | 261 | function createAlts(lits: Set): SimpleASTAlt { 262 | return { 263 | type: 'alt', 264 | exprs: Array.from(lits).map(value => ({ 265 | type: 'seq', 266 | exprs: [{ type: 'literal', value }], 267 | })), 268 | }; 269 | } 270 | 271 | /** 272 | * Transform a Zebu AST into a flat grammar, using only alternation, sequence, and recursion. 273 | */ 274 | export class ASTSimplifier { 275 | rules = new Map(); 276 | scope = new ScopeManager(); 277 | literals = new LiteralManager(); 278 | static simplifyAll(node: AST) { 279 | return new ASTSimplifier().simplifyAll(node); 280 | } 281 | private simplifyAll(node: AST) { 282 | const startRule = Symbol('start'); 283 | this.rules.set(startRule, this.simplifyAlt(node)); 284 | fixLeftRecursion(this.rules); 285 | factorLeft(this.rules); 286 | 287 | const { keywords, operators } = this.literals.compile(this.rules); 288 | return { 289 | startRule, 290 | rules: this.rules, 291 | keywords, 292 | operators, 293 | }; 294 | } 295 | private simplifyAlt(node: AST): SimpleASTAlt { 296 | switch (node.type) { 297 | case 'alt': 298 | return { 299 | type: 'alt', 300 | exprs: node.exprs.map(expr => this.simplifySeq(expr)), 301 | }; 302 | case 'maybe': 303 | return { 304 | type: 'alt', 305 | exprs: [this.simplifySeq(node.expr), pushNull], 306 | }; 307 | case 'sepBy0': 308 | return { 309 | type: 'alt', 310 | exprs: [ 311 | { 312 | type: 'seq', 313 | exprs: [ 314 | this.simplifyNode({ 315 | type: 'sepBy1', 316 | expr: node.expr, 317 | separator: node.separator, 318 | }), 319 | ], 320 | }, 321 | pushArr, 322 | ], 323 | }; 324 | default: 325 | return { type: 'alt', exprs: [this.simplifySeq(node)] }; 326 | } 327 | } 328 | private simplifySeq(node: AST): SimpleASTSeq { 329 | switch (node.type) { 330 | case 'structure': 331 | return { 332 | type: 'seq', 333 | exprs: [ 334 | this.literals.add(node.startToken), 335 | this.simplifyNode(node.expr), 336 | this.literals.add(node.endToken), 337 | { type: 'reduce', arity: 3, fn: _2 }, 338 | ], 339 | }; 340 | case 'seq': 341 | return { 342 | type: 'seq', 343 | exprs: node.exprs 344 | .map(expr => this.simplifyNode(expr)) 345 | .concat([ 346 | { type: 'reduce', arity: node.exprs.length, fn: node.fn }, 347 | ]), 348 | }; 349 | case 'repeat1': 350 | return { 351 | type: 'seq', 352 | exprs: [ 353 | this.simplifyNode(node.expr), 354 | this.simplifyNode({ type: 'repeat0', expr: node.expr }), 355 | { type: 'reduce', arity: 2, fn: cons }, 356 | ], 357 | }; 358 | default: 359 | return { type: 'seq', exprs: [this.simplifyNode(node)] }; 360 | } 361 | } 362 | private simplifyNode(node: AST): SimpleASTNode { 363 | switch (node.type) { 364 | case 'error': 365 | throw new CompileError(node.message); 366 | case 'repeat1': 367 | case 'structure': 368 | case 'seq': 369 | case 'alt': 370 | case 'sepBy0': 371 | case 'maybe': { 372 | const ruleName = Symbol(); 373 | this.rules.set(ruleName, this.simplifyAlt(node)); 374 | return { type: 'nonterminal', value: ruleName }; 375 | } 376 | case 'ruleset': 377 | return this.scope.compileRuleset(node.rules, (name, expr) => { 378 | this.rules.set(name, this.simplifyAlt(expr)); 379 | }); 380 | case 'literal': 381 | return this.literals.add(node.value); 382 | case 'terminal': 383 | return this.literals.terminal(node); 384 | case 'identifier': 385 | return { type: 'nonterminal', value: this.scope.lookup(node.value) }; 386 | case 'repeat0': { 387 | // A = Expr* ---> A = Expr A | nil 388 | const recur: SimpleASTNode = { 389 | type: 'nonterminal', 390 | value: Symbol('Repeat'), 391 | }; 392 | this.rules.set(recur.value, { 393 | type: 'alt', 394 | exprs: [ 395 | { 396 | type: 'seq', 397 | exprs: [ 398 | this.simplifyNode(node.expr), 399 | recur, 400 | { type: 'reduce', arity: 2, fn: cons }, 401 | ], 402 | }, 403 | pushArr, 404 | ], 405 | }); 406 | return recur; 407 | } 408 | case 'sepBy1': { 409 | // A = Expr (Sep Expr)* Sep? 410 | // ---> 411 | // A = Expr B 412 | // B = Sep C | nil 413 | // C = A | nil 414 | const A: SimpleASTNode = { type: 'nonterminal', value: Symbol() }; 415 | const B: SimpleASTNode = { type: 'nonterminal', value: Symbol() }; 416 | const C: SimpleASTNode = { type: 'nonterminal', value: Symbol() }; 417 | const expr = this.simplifyNode(node.expr); 418 | const sep = this.simplifyNode(node.separator); 419 | 420 | this.rules.set(A.value, { 421 | type: 'alt', 422 | exprs: [ 423 | { 424 | type: 'seq', 425 | exprs: [expr, B, { type: 'reduce', arity: 2, fn: cons }], 426 | }, 427 | ], 428 | }); 429 | this.rules.set(B.value, { 430 | type: 'alt', 431 | exprs: [ 432 | { 433 | type: 'seq', 434 | exprs: [sep, C, { type: 'reduce', arity: 2, fn: _2 }], 435 | }, 436 | pushArr, 437 | ], 438 | }); 439 | this.rules.set(C.value, { 440 | type: 'alt', 441 | exprs: [{ type: 'seq', exprs: [A] }, pushArr], 442 | }); 443 | 444 | return A; 445 | } 446 | // istanbul ignore next 447 | default: 448 | assertUnreachable(node); 449 | } 450 | } 451 | } 452 | -------------------------------------------------------------------------------- /src/tag.test.ts: -------------------------------------------------------------------------------- 1 | import { tag } from './tag'; 2 | 3 | test('tag helper', () => { 4 | const letNode = tag`let _ binding _ expr`; 5 | expect(letNode('let', 'x', '=', 'foo')).toEqual({ 6 | type: 'let', 7 | binding: 'x', 8 | expr: 'foo', 9 | }); 10 | }); 11 | 12 | test('tag helper with default values', () => { 13 | const trueNode = tag`value value = ${true}`; 14 | expect(trueNode()).toEqual({ type: 'value', value: true }); 15 | }); 16 | -------------------------------------------------------------------------------- /src/tag.ts: -------------------------------------------------------------------------------- 1 | import { lang, ZebuLanguageReturning } from './lang'; 2 | 3 | type Field = 4 | | { type: 'field'; field: string } 5 | | { type: 'fieldWithValue'; field: string; value: unknown } 6 | | { type: 'skip' }; 7 | 8 | const parse = (type: string, fields: Field[]) => (...results: unknown[]) => { 9 | const obj: any = { type }; 10 | for (const [i, field] of fields.entries()) { 11 | switch (field.type) { 12 | case 'skip': 13 | break; 14 | case 'field': 15 | obj[field.field] = results[i]; 16 | break; 17 | case 'fieldWithValue': 18 | obj[field.field] = field.value; 19 | break; 20 | } 21 | } 22 | 23 | return obj; 24 | }; 25 | 26 | type TagFn = ( 27 | ...xs: unknown[] 28 | ) => { 29 | type: 'string'; 30 | [key: string]: unknown; 31 | }; 32 | 33 | export const tag = lang` 34 | Main = TagType Field* : ${parse}; 35 | TagType = identifier; 36 | Field = Key AndValue? 37 | : ${(field, andValue) => 38 | andValue 39 | ? { type: 'fieldWithValue', field, value: andValue.value } 40 | : { type: 'field', field }} 41 | | "_" : ${() => ({ type: 'skip' })}; 42 | Key = identifier | value; 43 | AndValue = "=" value : ${(_, value) => ({ value })}; 44 | ` as ZebuLanguageReturning; 45 | -------------------------------------------------------------------------------- /src/util.test.ts: -------------------------------------------------------------------------------- 1 | import { showInContext } from './util'; 2 | 3 | test('showInContext', () => { 4 | const id = (x: T, ..._) => x; 5 | 6 | const example = id` 7 | The quick brown fox jumps over the lazy dog. 8 | How razorback jumping ${false} frogs level six piqued gymnasts! 9 | Sphinx of black quartz: judge my vow. 10 | `; 11 | 12 | expect( 13 | showInContext(example, { index: 68, outerIndex: 0, length: 14 }) 14 | ).toEqual( 15 | [ 16 | // eslint-disable-next-line no-template-curly-in-string 17 | ' How razorback jumping ${...} frogs level six piqued gymnasts!', 18 | ' ^^^^^^^^^^^^^^^^^^^^ ', 19 | ].join('\n') 20 | ); 21 | 22 | expect( 23 | showInContext(example, { index: 24, outerIndex: 1, length: 20 }) 24 | ).toEqual( 25 | [ 26 | // eslint-disable-next-line no-template-curly-in-string 27 | ' How razorback jumping ${...} frogs level six piqued gymnasts!', 28 | ' ^^^^^^^^^', 29 | ' Sphinx of black quartz: judge my vow.', 30 | '^^^^^^^^^^ ', 31 | ].join('\n') 32 | ); 33 | }); 34 | -------------------------------------------------------------------------------- /src/util.ts: -------------------------------------------------------------------------------- 1 | // istanbul ignore next 2 | export function assertUnreachable(value: never): never { 3 | console.error("shouldnt have gotten (", value, ")"); 4 | throw new Error(`unreachable`); 5 | } 6 | 7 | export function union(left: Set, right: Set): Set { 8 | return new Set([...left, ...right]); 9 | } 10 | 11 | export function intersection(left: Set, right: Set): Set { 12 | const out = new Set(); 13 | for (const item of left) { 14 | if (right.has(item)) out.add(item); 15 | } 16 | return out; 17 | } 18 | 19 | export function partition(xs: T[], fn: (x: T) => boolean): [T[], T[]] { 20 | const trues: T[] = []; 21 | const falses: T[] = []; 22 | for (const x of xs) { 23 | if (fn(x)) { 24 | trues.push(x); 25 | } else { 26 | falses.push(x); 27 | } 28 | } 29 | return [trues, falses]; 30 | } 31 | 32 | /* 33 | * outerIndex: index of string in interpolation 34 | * index: position in string 35 | * length: length of matched pattern 36 | * NOTE: an interpolation is treated as if it as it index 0 and has a length of 0. 37 | * If a token spans across an interpolation (e.g. its a quoted string with an interpolation), 38 | * the length will be the sum of the string parts, e.g. 39 | * the length of "foo${x}bar", which is parsed as [`"foo`, x, bar"`], is 8 40 | */ 41 | export type TokenPosition = { 42 | index: number; 43 | outerIndex: number; 44 | length: number; 45 | }; 46 | 47 | const MAX_OFFSET = 100; 48 | 49 | function scan( 50 | strs: readonly string[], 51 | pos: TokenPosition, 52 | direction: number, 53 | fn: (ch: string, pos: TokenPosition) => boolean 54 | ) { 55 | const state = { length: 0, index: pos.index, outerIndex: pos.outerIndex }; 56 | while (state.outerIndex >= 0 && state.outerIndex < strs.length) { 57 | while (state.index >= 0 && state.index < strs[state.outerIndex].length) { 58 | const ch = strs[state.outerIndex][state.index]; 59 | if (fn(ch, state)) return state; 60 | state.length++; 61 | state.index += direction; 62 | } 63 | state.outerIndex += direction; 64 | if (direction < 0) { 65 | state.index = (strs[state.outerIndex] || "").length - 1; 66 | } else { 67 | state.index = 0; 68 | } 69 | } 70 | 71 | return state; 72 | } 73 | 74 | function findOffset( 75 | strs: readonly string[], 76 | pos: TokenPosition, 77 | direction: number 78 | ): TokenPosition { 79 | return scan( 80 | strs, 81 | pos, 82 | direction, 83 | (ch, newPos) => ch === "\n" || newPos.length >= MAX_OFFSET 84 | ); 85 | } 86 | 87 | function toEnd(strs: readonly string[], pos: TokenPosition): TokenPosition { 88 | return scan(strs, pos, 1, (_, nextPos) => nextPos.length === pos.length); 89 | } 90 | 91 | export function showInContext( 92 | strs: readonly string[], 93 | pos: TokenPosition 94 | ): string { 95 | const lines: string[] = []; 96 | let strInContext = ""; 97 | let underline = ""; 98 | const startPos = findOffset(strs, pos, -1); 99 | const endPos = findOffset(strs, toEnd(strs, pos), 1); 100 | let offset = -startPos.length; 101 | 102 | scan(strs, startPos, 1, (ch, { index, outerIndex }) => { 103 | if (outerIndex > 0 && index === 0) { 104 | // eslint-disable-next-line no-template-curly-in-string 105 | strInContext += "${...}"; 106 | if (offset > 0 && offset <= pos.length) { 107 | underline += "^^^^^^"; 108 | } else { 109 | underline += " "; 110 | } 111 | } 112 | 113 | offset++; 114 | 115 | if (ch === "\n") { 116 | lines.push(strInContext, underline); 117 | strInContext = ""; 118 | underline = ""; 119 | } else { 120 | strInContext += ch; 121 | if (offset > 0 && offset <= pos.length) { 122 | underline += "^"; 123 | } else { 124 | underline += " "; 125 | } 126 | } 127 | 128 | return !(outerIndex < endPos.outerIndex || index <= endPos.index); 129 | }); 130 | 131 | lines.push(strInContext, underline); 132 | return lines.filter((line) => line.trimEnd().length > 0).join("\n"); 133 | } 134 | 135 | export class ParseError extends Error { 136 | constructor(message: string, strs: readonly string[], pos: TokenPosition) { 137 | super(`${message}\n${showInContext(strs, pos)}`); 138 | } 139 | } 140 | 141 | // TODO: add token position & show in context 142 | // will probably need to include source tokens in AST nodes 143 | export class CompileError extends Error {} 144 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | // see https://www.typescriptlang.org/tsconfig to better understand tsconfigs 3 | "include": ["src", "types"], 4 | "compilerOptions": { 5 | "target": "es2020" /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019', 'ES2020', or 'ESNEXT'. */, 6 | "module": "esnext", 7 | "lib": ["dom", "esnext"], 8 | "importHelpers": true, 9 | // output .d.ts declaration files for consumers 10 | "declaration": true, 11 | // output .js.map sourcemap files for consumers 12 | "sourceMap": true, 13 | // match output dir to input dir. e.g. dist/index instead of dist/src/index 14 | "rootDir": "./src", 15 | 16 | // stricter type-checking for stronger correctness. Recommended by TS 17 | "strict": false /* Enable all strict type-checking options. */, 18 | // "noImplicitAny": true, /* Raise error on expressions and declarations with an implied 'any' type. */ 19 | "strictNullChecks": true /* Enable strict null checks. */, 20 | // "strictFunctionTypes": true, /* Enable strict checking of function types. */ 21 | // "strictBindCallApply": true, /* Enable strict 'bind', 'call', and 'apply' methods on functions. */ 22 | // "strictPropertyInitialization": true, /* Enable strict checking of property initialization in classes. */ 23 | // "noImplicitThis": true, /* Raise error on 'this' expressions with an implied 'any' type. */ 24 | // "alwaysStrict": true, /* Parse in strict mode and emit "use strict" for each source file. */ 25 | 26 | // linter checks for common issues 27 | "noImplicitReturns": true, 28 | "noFallthroughCasesInSwitch": true, 29 | // noUnused* overlap with @typescript-eslint/no-unused-vars, can disable if duplicative 30 | "noUnusedLocals": true, 31 | "noUnusedParameters": true, 32 | // use Node's module resolution algorithm, instead of the legacy TS one 33 | "moduleResolution": "node", 34 | // transpile JSX to React.createElement 35 | "jsx": "react", 36 | // interop between ESM and CJS modules. Recommended by TS 37 | "esModuleInterop": true, 38 | // significant perf increase by skipping checking .d.ts files, particularly those in node_modules. Recommended by TS 39 | "skipLibCheck": true, 40 | // error out if import and file system have a casing mismatch. Recommended by TS 41 | "forceConsistentCasingInFileNames": true, 42 | // `tsdx build` ignores this option, but it is commonly used when type-checking separately with `tsc` 43 | "noEmit": true, 44 | } 45 | } 46 | --------------------------------------------------------------------------------