├── .npmignore ├── .gitignore ├── tests ├── fixtures │ └── tokens1.txt ├── whitespace-token-ava.mjs ├── identifier-token-ava.mjs ├── line-comment-token-ava.mjs ├── keyword-token-ava.mjs ├── calculator-grammar-ava.mjs ├── json-parser-ava.mjs ├── mini-lang-ava.mjs ├── tokenizer-ava.mjs └── token-kitchen-sink-ava-node.mjs ├── .github ├── workflows │ ├── pr_labeler.yml │ ├── auto_approve.yml │ ├── codeql_analysis.yml │ ├── update_readme_api.yml │ ├── update_package_lock.yml │ └── ci.yml ├── dependabot.yml └── pr_labeler.yml ├── .vscode └── launch.json ├── LICENSE ├── examples ├── sql.js ├── tns.mjs └── calculator.mjs ├── src ├── parser.mjs ├── tokenizer.mjs └── known-tokens.mjs ├── package.json └── README.md /.npmignore: -------------------------------------------------------------------------------- 1 | build 2 | tests 3 | doc 4 | *.log 5 | *.tmp 6 | .vscode 7 | examples 8 | .github 9 | tsconfig.json 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | bun.lockb 2 | build 3 | /node_modules/** 4 | *.log 5 | *.dump 6 | types 7 | *.node 8 | .DS_Store 9 | .test 10 | .tmp 11 | -------------------------------------------------------------------------------- /tests/fixtures/tokens1.txt: -------------------------------------------------------------------------------- 1 | 4711 0.23 12345.0 12.4E+20 0.4E-7 2 | "str2""str3" "\\\b\f\n\r\t\"\'\u0041" 'str4''str5' 3 | name1 name_2 _name3 4 | n 5 | + 6 | - 7 | * 8 | / 9 | () 10 | {} 11 | [] 12 | :,;. 13 | < ===> !=== 14 | <= 15 | >= 16 | = 17 | 2 + (3 * 17) 18 | -------------------------------------------------------------------------------- /tests/whitespace-token-ava.mjs: -------------------------------------------------------------------------------- 1 | import test from "ava"; 2 | import { WhiteSpaceToken } from "pratt-parser"; 3 | 4 | test("whitespace", t => { 5 | const pp = { 6 | chunk: "x \n A", 7 | offset: 1, 8 | lineNumber: 1 9 | }; 10 | 11 | const token = WhiteSpaceToken.parse(pp); 12 | 13 | //t.is(token.value, undefined); 14 | t.is(pp.offset, 7); 15 | t.is(pp.lineNumber, 2); 16 | }); 17 | -------------------------------------------------------------------------------- /.github/workflows/pr_labeler.yml: -------------------------------------------------------------------------------- 1 | name: Pull Request Labeler 2 | on: pull_request_target 3 | jobs: 4 | triage: 5 | permissions: 6 | contents: read 7 | pull-requests: write 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/labeler@v6 11 | with: 12 | repo-token: ${{ secrets.GITHUB_TOKEN }} 13 | configuration-path: .github/pr_labeler.yml 14 | sync-labels: false 15 | -------------------------------------------------------------------------------- /tests/identifier-token-ava.mjs: -------------------------------------------------------------------------------- 1 | import test from "ava"; 2 | import { IdentifierToken } from "pratt-parser"; 3 | 4 | test("identifier tokens", t => { 5 | const pp = { 6 | chunk: " abc \n A", 7 | offset: 1, 8 | lineNumber: 1, 9 | get properties() { 10 | return {}; 11 | } 12 | }; 13 | 14 | const token = IdentifierToken.parse(pp); 15 | 16 | t.is(token.value, "abc"); 17 | t.is(pp.offset, 4); 18 | }); 19 | -------------------------------------------------------------------------------- /tests/line-comment-token-ava.mjs: -------------------------------------------------------------------------------- 1 | import test from "ava"; 2 | import { LineCommentToken } from "pratt-parser"; 3 | 4 | test("line comment", t => { 5 | const tokenizer = {}; 6 | const pp = { 7 | chunk: "x# \n A", 8 | offset: 1, 9 | lineNumber: 1 10 | }; 11 | 12 | const token = LineCommentToken.parse(pp); 13 | 14 | //t.is(token.value, undefined); 15 | t.is(pp.offset, 5); 16 | t.is(pp.lineNumber, 2); 17 | }); 18 | -------------------------------------------------------------------------------- /.github/workflows/auto_approve.yml: -------------------------------------------------------------------------------- 1 | name: Auto approve 2 | on: pull_request_target 3 | jobs: 4 | auto-approve: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - uses: hmarr/auto-approve-action@v4.0.0 8 | if: github.actor == 'dependabot[bot]' 9 | with: 10 | github-token: ${{ secrets.GITHUB_TOKEN }} 11 | review-message: Auto approved automated PR 12 | permissions: 13 | pull-requests: write 14 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: npm 4 | directory: / 5 | schedule: 6 | interval: daily 7 | commit-message: 8 | prefix: "fix(deps):" 9 | prefix-development: "chore(deps):" 10 | labels: 11 | - npm dependencies 12 | - package-ecosystem: github-actions 13 | directory: / 14 | schedule: 15 | interval: daily 16 | commit-message: 17 | prefix: "chore(action):" 18 | labels: 19 | - dependencies 20 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.2.0", 3 | "configurations": [ 4 | { 5 | "type": "node", 6 | "request": "launch", 7 | "name": "Debug AVA", 8 | "runtimeExecutable": "${workspaceFolder}/node_modules/.bin/ava", 9 | "runtimeArgs": [ 10 | "--serial", 11 | "${file}" 12 | ], 13 | "outputCapture": "std", 14 | "skipFiles": [ 15 | "/**/*" 16 | ] 17 | } 18 | ] 19 | } 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2016-2025 by arlac77 2 | 3 | Permission to use, copy, modify, and/or distribute this software for any 4 | purpose with or without fee is hereby granted. 5 | 6 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 7 | REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 8 | AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, 9 | INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 10 | LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 11 | OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 12 | PERFORMANCE OF THIS SOFTWARE. -------------------------------------------------------------------------------- /tests/keyword-token-ava.mjs: -------------------------------------------------------------------------------- 1 | import test from "ava"; 2 | import { KeywordToken, Tokenizer } from "pratt-parser"; 3 | 4 | test("keyword", t => { 5 | const keywords = Object.create(KeywordToken, { 6 | keywords: { 7 | value: { 8 | CREATE: {}, 9 | TABLE: {} 10 | } 11 | } 12 | }); 13 | 14 | const tokenizer = new Tokenizer({}); 15 | 16 | keywords.registerWithinTokenizer(tokenizer); 17 | 18 | const pp = { 19 | chunk: " CREATE TABLE X ", 20 | offset: 1, 21 | lineNumber: 1, 22 | get properties() { 23 | return {}; 24 | } 25 | }; 26 | 27 | const k1 = keywords.parse(pp); 28 | t.is(k1.value, "CREATE"); 29 | t.is(pp.offset, 8); 30 | t.is(pp.lineNumber, 1); 31 | 32 | const k2 = keywords.parse(pp); 33 | t.is(k2.value, "TABLE"); 34 | t.is(pp.offset, 14); 35 | t.is(pp.lineNumber, 1); 36 | }); 37 | -------------------------------------------------------------------------------- /.github/workflows/codeql_analysis.yml: -------------------------------------------------------------------------------- 1 | name: CodeQL 2 | on: 3 | push: 4 | branches: 5 | - master 6 | - next 7 | pull_request: 8 | branches: 9 | - master 10 | - next 11 | jobs: 12 | analyze: 13 | name: Analyze 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: checkout 17 | uses: actions/checkout@v6.0.1 18 | - name: git checkout 19 | run: git checkout 20 | if: ${{ github.event_name == 'pull_request' }} 21 | - name: prepare node 22 | uses: actions/setup-node@v6.1.0 23 | with: 24 | node-version: 24.12.0 25 | - name: prepare CodeQL 26 | uses: github/codeql-action/init@v4 27 | with: 28 | languages: javascript 29 | - name: install 30 | run: npm ci 31 | - name: perform CodeQL analysis 32 | uses: github/codeql-action/analyze@v4 33 | -------------------------------------------------------------------------------- /.github/workflows/update_readme_api.yml: -------------------------------------------------------------------------------- 1 | name: API to readme 2 | on: 3 | schedule: 4 | - cron: 31 14 * * 6 5 | push: 6 | paths: 7 | - src/* 8 | jobs: 9 | update_readme_api: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: checkout 13 | uses: actions/checkout@v6.0.1 14 | - name: prepare node 15 | uses: actions/setup-node@v6.1.0 16 | with: 17 | node-version: 24.12.0 18 | - name: install 19 | run: npm ci 20 | - name: run docs 21 | run: npm run docs 22 | - uses: gr2m/create-or-update-pull-request-action@v1.10.1 23 | env: 24 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 25 | with: 26 | path: README.md 27 | title: Sync API into README 28 | branch: readme-api 29 | commit-message: "docs(README): sync API" 30 | permissions: 31 | contents: write 32 | pull-requests: write 33 | -------------------------------------------------------------------------------- /examples/sql.js: -------------------------------------------------------------------------------- 1 | /* jslint node: true, esnext: true */ 2 | 3 | /* WIP does not work for now!!! */ 4 | 5 | 'use strict'; 6 | 7 | const { 8 | Parser, WhiteSpaceToken, StringToken, NumberToken, KeywordToken, IdentifierToken 9 | } = require('../dist/parser'); 10 | 11 | function Value(value) { 12 | return Object.create(null, { 13 | value: { 14 | value: value 15 | } 16 | }); 17 | } 18 | 19 | const sqlGrammar = new Parser({ 20 | tokens: [ 21 | WhiteSpaceToken, 22 | NumberToken, 23 | StringToken, 24 | Object.create(KeywordToken, { 25 | keywords: { 26 | value: { 27 | CREATE: {}, 28 | TABLE: {}, 29 | CHAR: {}, 30 | NUMBER: {}, 31 | NOT: {}, 32 | NULL: {} 33 | } 34 | } 35 | }), 36 | IdentifierToken 37 | ], 38 | infix: { 39 | '(': {}, 40 | ')': {} 41 | } 42 | }); 43 | 44 | console.log(sqlGrammar.parse('CREATE TABLE t1(a1 CHAR(10),a2 NUMBER NOT NULL)').value); 45 | -------------------------------------------------------------------------------- /.github/workflows/update_package_lock.yml: -------------------------------------------------------------------------------- 1 | name: Update package lock 2 | on: 3 | push: 4 | paths: 5 | - package.json 6 | schedule: 7 | - cron: 13 8 * * 6 8 | jobs: 9 | update_package_lock: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: checkout 13 | uses: actions/checkout@v6.0.1 14 | - name: prepare node 15 | uses: actions/setup-node@v6.1.0 16 | with: 17 | node-version: 24.12.0 18 | - name: remove lock 19 | run: rm package-lock.json 20 | - name: install playwright 21 | run: npx playwright install --with-deps 22 | - name: test 23 | run: npm install-test 24 | env: 25 | BROWSER: chrome 26 | - name: create pull request 27 | uses: gr2m/create-or-update-pull-request-action@v1.10.1 28 | env: 29 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 30 | with: 31 | path: package-lock.json 32 | title: Regenerate package lock 33 | body: bring lock in sync 34 | branch: package-lock 35 | labels: npm 36 | commit-message: "chore(deps): lock" 37 | -------------------------------------------------------------------------------- /examples/tns.mjs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | /* WIP does not work for now!!! */ 3 | 4 | import { 5 | Parser, 6 | WhiteSpaceToken, 7 | NumberToken, 8 | IdentifierToken 9 | } from "pratt-parser"; 10 | 11 | function value(value) { 12 | return Object.create(null, { 13 | value: { 14 | value 15 | } 16 | }); 17 | } 18 | 19 | const tnsGrammar = new Parser({ 20 | tokens: [WhiteSpaceToken, NumberToken, IdentifierToken], 21 | 22 | prefix: { 23 | "(": { 24 | nud(grammar) { 25 | const e = grammar.expression(0); 26 | grammar.advance(")"); 27 | console.log( 28 | "EXPRESSION", 29 | e.map(e => e.value) 30 | ); 31 | return e; 32 | } 33 | } 34 | }, 35 | infixr: { 36 | }, 37 | infix: { 38 | ")": { 39 | }, 40 | "=": { 41 | precedence: 50, 42 | combine: (left, right) => [left, right] 43 | } 44 | } 45 | }); 46 | 47 | console.log( 48 | tnsGrammar.parse(`(ADDRESS_LIST= 49 | (FAILOVER=ON) 50 | (LOAD_BALANCE=off) 51 | (ADDRESS=(PROTOCOL=tcp)(HOST=host2a)(PORT=1630)) 52 | (ADDRESS=(PROTOCOL=tcp)(HOST=host2b)(PORT=1630))`).value 53 | ); 54 | -------------------------------------------------------------------------------- /examples/calculator.mjs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | import { WhiteSpaceToken, NumberToken, Parser } from "pratt-parser"; 3 | import { argv } from "node:process"; 4 | 5 | function value(value) { 6 | return Object.create(null, { 7 | value: { 8 | value 9 | } 10 | }); 11 | } 12 | 13 | const Calculator = new Parser({ 14 | tokens: [WhiteSpaceToken, NumberToken], 15 | prefix: { 16 | "(": { 17 | nud(grammar) { 18 | const e = grammar.expression(0); 19 | grammar.advance(")"); 20 | return e; 21 | } 22 | } 23 | }, 24 | infix: { 25 | ")": {}, 26 | "+": { 27 | precedence: 50, 28 | combine: (left, right) => value(left.value + right.value) 29 | }, 30 | "-": { 31 | precedence: 50, 32 | combine: (left, right) => value(left.value - right.value) 33 | }, 34 | "*": { 35 | precedence: 60, 36 | combine: (left, right) => value(left.value * right.value) 37 | }, 38 | "/": { 39 | precedence: 60, 40 | combine: (left, right) => value(left.value / right.value) 41 | } 42 | } 43 | }); 44 | 45 | const input = argv.slice(2).join(" "); 46 | console.log(Calculator.parse(input).value); 47 | -------------------------------------------------------------------------------- /.github/pr_labeler.yml: -------------------------------------------------------------------------------- 1 | ci: 2 | - changed-files: 3 | - any-glob-to-any-file: 4 | - .github/workflows/* 5 | - .travis.yml 6 | dependencies: 7 | - changed-files: 8 | - any-glob-to-any-file: 9 | - package-lock.json 10 | - yarn.lockfile 11 | deployment: 12 | - changed-files: 13 | - any-glob-to-any-file: 14 | - netlify.toml 15 | - _redirects 16 | documentation: 17 | - changed-files: 18 | - any-glob-to-any-file: 19 | - docs/**/* 20 | - "**/*.md" 21 | github: 22 | - changed-files: 23 | - any-glob-to-any-file: 24 | - .github/**/* 25 | git: 26 | - changed-files: 27 | - any-glob-to-any-file: 28 | - .gitignore 29 | labels: 30 | - changed-files: 31 | - any-glob-to-any-file: 32 | - .github/pr_labeler.yml 33 | lint: 34 | - changed-files: 35 | - any-glob-to-any-file: 36 | - .stylelintrc.json 37 | npm: 38 | - changed-files: 39 | - any-glob-to-any-file: 40 | - .npmignore 41 | - package.json 42 | - package-lock.json 43 | openapi: 44 | - changed-files: 45 | - any-glob-to-any-file: 46 | - openapi/* 47 | rollup: 48 | - changed-files: 49 | - any-glob-to-any-file: 50 | - rollup.config.mjs 51 | styling: 52 | - changed-files: 53 | - any-glob-to-any-file: 54 | - "**/*.css" 55 | test: 56 | - changed-files: 57 | - any-glob-to-any-file: tests/**/* 58 | vscode: 59 | - changed-files: 60 | - any-glob-to-any-file: 61 | - .vscode/* 62 | -------------------------------------------------------------------------------- /tests/calculator-grammar-ava.mjs: -------------------------------------------------------------------------------- 1 | import test from "ava"; 2 | import { WhiteSpaceToken, NumberToken, Parser } from "pratt-parser"; 3 | 4 | function value(value) { 5 | return Object.create(null, { 6 | value: { 7 | value 8 | } 9 | }); 10 | } 11 | 12 | const Calculator = new Parser({ 13 | tokens: [WhiteSpaceToken, NumberToken], 14 | prefix: { 15 | "(": { 16 | nud(grammar) { 17 | const e = grammar.expression(0); 18 | grammar.advance(")"); 19 | return e; 20 | } 21 | } 22 | }, 23 | infix: { 24 | ")": {}, 25 | "+": { 26 | precedence: 50, 27 | combine: (left, right) => value(left.value + right.value) 28 | }, 29 | "-": { 30 | precedence: 50, 31 | combine: (left, right) => value(left.value - right.value) 32 | }, 33 | "*": { 34 | precedence: 60, 35 | combine: (left, right) => value(left.value * right.value) 36 | }, 37 | "/": { 38 | precedence: 60, 39 | combine: (left, right) => value(left.value / right.value) 40 | } 41 | } 42 | }); 43 | 44 | test("calculator simple", t => { 45 | t.is(Calculator.parse("1 + 41 * 3 - 2").value, 122); 46 | }); 47 | 48 | test("calculator braces", t => { 49 | t.is(Calculator.parse("(1 + 41)").value, 42); 50 | t.is(Calculator.parse("(1 + 41) * 2").value, 84); 51 | t.is(Calculator.parse("(1 + (1 + 4 * 3)) * (2 + 1)").value, 42); 52 | }); 53 | 54 | test("calculator unexpected token", t => { 55 | function doit() { 56 | Calculator.parse("(1 + %"); 57 | } 58 | 59 | const error = t.throws(doit); 60 | t.is(error.message, '1,6: Unknown char "%"'); 61 | }); 62 | -------------------------------------------------------------------------------- /src/parser.mjs: -------------------------------------------------------------------------------- 1 | import { EOFToken } from "./known-tokens.mjs"; 2 | import { Tokenizer } from "./tokenizer.mjs"; 3 | export * from "./known-tokens.mjs"; 4 | export { Tokenizer }; 5 | 6 | /** 7 | * Creates a grammar for later parsing 8 | * @param {Object} grammar definition of the grammar with operators... 9 | */ 10 | export class Parser { 11 | tokenizer; 12 | context; 13 | 14 | /** 15 | * 16 | * @param {any} grammar 17 | * @param {Object} [options] 18 | * @param {Tokenizer} options.tokenizer 19 | */ 20 | constructor(grammar, options) { 21 | this.tokenizer = options?.tokenizer || new Tokenizer(grammar); 22 | } 23 | 24 | /** 25 | * Forwards error to the tokenizer 26 | * @return {Object} error 27 | */ 28 | error(...args) { 29 | // @ts-ignore 30 | return this.tokenizer.error(...args); 31 | } 32 | 33 | /** 34 | * Parses the input and delivers the outermoost expression. 35 | * @param {string} chunk input text 36 | * @param {Object} context object transparently passed to tokenizer 37 | * @return {Object} evaluated input 38 | */ 39 | parse(chunk, context) { 40 | this.context = context; 41 | 42 | const tokens = this.tokenizer.tokens(chunk, context); 43 | 44 | this.advance = id => { 45 | if ( 46 | id !== undefined && 47 | this.token.value !== undefined && 48 | this.token.value !== id 49 | ) { 50 | this.error(`Got '${this.token.value}' expected '${id}'`, this.token); 51 | } 52 | 53 | const next = tokens.next(); 54 | this.token = next.done ? EOFToken : next.value; 55 | 56 | return this.token; 57 | }; 58 | 59 | this.advance(); 60 | 61 | this.expression = precedence => { 62 | let token = this.token; 63 | this.advance(); 64 | let left = token.nud(this); 65 | 66 | while (precedence < this.token.precedence) { 67 | token = this.token; 68 | this.advance(); 69 | left = token.led(this, left); 70 | } 71 | 72 | return left; 73 | }; 74 | 75 | return this.expression(this.token.precedence); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "pratt-parser", 3 | "version": "0.0.0-semantic-release", 4 | "publishConfig": { 5 | "access": "public", 6 | "provenance": true 7 | }, 8 | "packageManager": "npm@11.6.4+sha512.1118cab46a05a50aee6bff5b1b4fa1df18afff89d57465620a3518035026955db87c5bdf9d207b07b7487d99f2490d450cb774655ad63ec2cba7bf1d0ad25d45", 9 | "types": "./types/parser.d.mts", 10 | "exports": { 11 | ".": { 12 | "types": "./types/parser.d.mts", 13 | "default": "./src/parser.mjs" 14 | } 15 | }, 16 | "description": "TDOP parser", 17 | "keywords": [ 18 | "PRATT", 19 | "TDOP", 20 | "parser", 21 | "tokenizer" 22 | ], 23 | "contributors": [ 24 | { 25 | "name": "Markus Felten", 26 | "email": "markus.felten@gmx.de" 27 | } 28 | ], 29 | "license": "0BSD", 30 | "scripts": { 31 | "prepare": "node --run prepare:typescript", 32 | "prepare:typescript": "tsc --allowJs --declaration --emitDeclarationOnly --declarationDir types --resolveJsonModule --target esnext -m esnext --module nodenext --moduleResolution nodenext --rootDir src ./src**/*.mjs", 33 | "test": "node --run test:browser-ava && node --run test:ava", 34 | "test:ava": "ava --timeout 4m tests/*-ava.mjs tests/*-ava-node.mjs", 35 | "test:browser-ava": "browser-ava --headless --no-keep-open tests/*-ava.mjs tests/*-ava-browser.mjs", 36 | "cover": "c8 -x 'tests/**/*' --temp-directory build/tmp ava --timeout 4m tests/*-ava.mjs tests/*-ava-node.mjs && c8 report -r lcov -o build/coverage --temp-directory build/tmp", 37 | "docs": "documentation readme --section=API ./src**/*.mjs", 38 | "lint": "node --run lint:docs && node --run lint:typescript", 39 | "lint:docs": "documentation lint ./src**/*.mjs", 40 | "lint:typescript": "tsc --allowJs --checkJs --noEmit --resolveJsonModule --target esnext -m esnext --module nodenext --moduleResolution nodenext ./src**/*.mjs" 41 | }, 42 | "devDependencies": { 43 | "ava": "^6.4.1", 44 | "browser-ava": "^2.3.47", 45 | "c8": "^10.1.3", 46 | "documentation": "^14.0.3", 47 | "semantic-release": "^25.0.2", 48 | "typescript": "^5.9.3" 49 | }, 50 | "engines": { 51 | "node": ">=22.15.0" 52 | }, 53 | "repository": { 54 | "type": "git", 55 | "url": "git+https://github.com/arlac77/pratt-parser.git" 56 | }, 57 | "bugs": { 58 | "url": "https://github.com/arlac77/pratt-parser/issues" 59 | }, 60 | "homepage": "", 61 | "template": { 62 | "inheritFrom": [ 63 | "arlac77/template-arlac77-github", 64 | "arlac77/template-browser-ava", 65 | "arlac77/template-javascript-component", 66 | "arlac77/template-typescript" 67 | ] 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | push: 4 | branches: 5 | - master 6 | - next 7 | pull_request: 8 | branches: 9 | - master 10 | - next 11 | permissions: 12 | contents: read 13 | id-token: write 14 | jobs: 15 | test-node: 16 | runs-on: ubuntu-latest 17 | strategy: 18 | matrix: 19 | node-version: 20 | - 24.12.0 21 | steps: 22 | - name: checkout 23 | uses: actions/checkout@v6.0.1 24 | - name: prepare node 25 | uses: actions/setup-node@v6.1.0 26 | with: 27 | node-version: ${{ matrix.node-version }} 28 | - name: install 29 | run: npm ci 30 | - name: test and coverage 31 | run: npm run cover --if-present 32 | - name: coveralls 33 | uses: coverallsapp/github-action@v2 34 | continue-on-error: true 35 | with: 36 | github-token: ${{ secrets.GITHUB_TOKEN }} 37 | flag-name: run-${{ matrix.test_number }} 38 | path-to-lcov: build/coverage/lcov.info 39 | parallel: true 40 | test-browser: 41 | runs-on: ubuntu-latest 42 | strategy: 43 | matrix: 44 | browser: 45 | - chrome 46 | - firefox 47 | steps: 48 | - name: checkout 49 | uses: actions/checkout@v6.0.1 50 | - name: prepare node 51 | uses: actions/setup-node@v6.1.0 52 | with: 53 | node-version: 24.12.0 54 | - name: install 55 | run: npm ci 56 | - name: install playwright 57 | run: npx playwright install 58 | - name: test browser 59 | run: npm run test:browser-ava --if-present 60 | env: 61 | BROWSER: ${{ matrix.browser }} 62 | release: 63 | needs: 64 | - test-browser 65 | - test-node 66 | runs-on: ubuntu-latest 67 | permissions: 68 | contents: write 69 | issues: write 70 | pull-requests: write 71 | id-token: write 72 | steps: 73 | - name: checkout 74 | uses: actions/checkout@v6.0.1 75 | - name: prepare node 76 | uses: actions/setup-node@v6.1.0 77 | with: 78 | node-version: 24.12.0 79 | - name: install 80 | run: npm ci 81 | - name: release 82 | run: npx semantic-release 83 | env: 84 | CI: "true" 85 | NPM_TOKEN: ${{ secrets.NPM_TOKEN }} 86 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 87 | finish: 88 | needs: 89 | - test-node 90 | runs-on: ubuntu-latest 91 | steps: 92 | - name: finish coveralls 93 | uses: coverallsapp/github-action@v2 94 | with: 95 | github-token: ${{ secrets.GITHUB_TOKEN }} 96 | path-to-lcov: build/coverage/lcov.info 97 | parallel-finished: true 98 | -------------------------------------------------------------------------------- /tests/json-parser-ava.mjs: -------------------------------------------------------------------------------- 1 | import test from "ava"; 2 | import { 3 | Parser, 4 | IdentifierToken, 5 | WhiteSpaceToken, 6 | NumberToken, 7 | StringToken 8 | } from "pratt-parser"; 9 | 10 | function Value(value) { 11 | return Object.create(null, { 12 | value: { 13 | value: value 14 | } 15 | }); 16 | } 17 | 18 | const g = { 19 | tokens: [ 20 | WhiteSpaceToken, 21 | NumberToken, 22 | StringToken, 23 | 24 | /*Object.create(KeywordToken, { 25 | keywords: { 26 | values: ['true', 'false'] 27 | } 28 | }), */ 29 | Object.create(IdentifierToken, { 30 | parse: { 31 | value(pp) { 32 | let i = pp.offset + 1; 33 | for (;;) { 34 | const c = pp.chunk[i]; 35 | if ( 36 | (c >= "a" && c <= "z") || 37 | (c >= "A" && c <= "Z") || 38 | (c >= "0" && c <= "9") || 39 | c === "_" 40 | ) { 41 | i += 1; 42 | } else { 43 | break; 44 | } 45 | } 46 | const value = pp.chunk.substring(pp.offset, i); 47 | const properties = pp.properties; 48 | if (value === "true") { 49 | properties.value = { 50 | value: true 51 | }; 52 | } else if (value === "false") { 53 | properties.value = { 54 | value: false 55 | }; 56 | } else { 57 | properties.value = { 58 | value: value 59 | }; 60 | } 61 | 62 | pp.offset = i; 63 | return Object.create(this, properties); 64 | } 65 | } 66 | }) 67 | ], 68 | 69 | prefix: { 70 | "[": { 71 | nud(grammar, left) { 72 | const values = []; 73 | 74 | if (grammar.token.value !== "]") { 75 | while (true) { 76 | values.push(grammar.expression(0).value); 77 | 78 | if (grammar.token.value !== ",") { 79 | break; 80 | } 81 | grammar.advance(","); 82 | } 83 | } 84 | grammar.advance("]"); 85 | return Value(values); 86 | } 87 | }, 88 | "{": { 89 | nud(grammar, left) { 90 | const object = {}; 91 | 92 | if (grammar.token.value !== "}") { 93 | while (true) { 94 | const key = grammar.expression(0).value; 95 | 96 | if (grammar.token.value !== ":") { 97 | break; 98 | } 99 | grammar.advance(":"); 100 | 101 | const value = grammar.expression(0).value; 102 | object[key] = value; 103 | if (grammar.token.value === "}") { 104 | break; 105 | } 106 | grammar.advance(","); 107 | } 108 | } 109 | grammar.advance("}"); 110 | return Value(object); 111 | } 112 | } 113 | }, 114 | infix: { 115 | ",": {}, 116 | ":": {}, 117 | "}": {}, 118 | "]": {} 119 | } 120 | }; 121 | 122 | test("json simple array", t => { 123 | const myGrammar = new Parser(g); 124 | t.deepEqual( 125 | myGrammar.parse('[1,"b",[4],{ "c" : 5, "d" : true, "e": false}]').value, 126 | [ 127 | 1, 128 | "b", 129 | [4], 130 | { 131 | c: 5, 132 | d: true, 133 | e: false 134 | } 135 | ] 136 | ); 137 | }); 138 | -------------------------------------------------------------------------------- /src/tokenizer.mjs: -------------------------------------------------------------------------------- 1 | import { EOFToken, OperatorToken } from "./known-tokens.mjs"; 2 | 3 | const rootPP = { 4 | chunk: undefined, 5 | context: {}, 6 | firstCharInLine: 0, 7 | lineNumber: 1, 8 | offset: 0, 9 | get positionInLine() { 10 | return this.offset - this.firstCharInLine; 11 | }, 12 | get properties() { 13 | return { 14 | lineNumber: { 15 | value: this.lineNumber 16 | }, 17 | positionInLine: { 18 | value: this.positionInLine 19 | } 20 | }; 21 | } 22 | }; 23 | 24 | /** 25 | * Creates a tokenizer for later parsing. 26 | * @param {Object} grammar definition of the grammar with operators... 27 | */ 28 | export class Tokenizer { 29 | maxTokenLengthForFirstChar = {}; 30 | registeredTokens = {}; 31 | 32 | constructor(grammar) { 33 | const operatorTypes = { 34 | prefix: { 35 | token: OperatorToken, 36 | 37 | properties: { 38 | nud: { 39 | value(grammar, left) { 40 | return this.combine(left, grammar.expression(this.precedence)); 41 | }, 42 | writable: true 43 | } 44 | } 45 | }, 46 | infix: { 47 | token: OperatorToken, 48 | 49 | properties: { 50 | led: { 51 | value(grammar, left) { 52 | return this.combine(left, grammar.expression(this.precedence)); 53 | }, 54 | writable: true 55 | } 56 | } 57 | }, 58 | infixr: { 59 | token: OperatorToken, 60 | 61 | properties: { 62 | led: { 63 | value(grammar, left) { 64 | return this.combine( 65 | left, 66 | grammar.expression(this.precedence - 1) 67 | ); 68 | }, 69 | writable: true 70 | } 71 | } 72 | } 73 | }; 74 | 75 | for (const operatorTypeName in operatorTypes) { 76 | const ops = grammar[operatorTypeName]; 77 | const operatorType = operatorTypes[operatorTypeName]; 78 | 79 | for (const c in ops) { 80 | operatorType.properties.value = { 81 | value: c 82 | }; 83 | 84 | Object.assign( 85 | Object.create(operatorType.token, operatorType.properties), 86 | ops[c] 87 | ).registerWithinTokenizer(this); 88 | } 89 | } 90 | 91 | grammar.tokens?.forEach(token => token.registerWithinTokenizer(this)); 92 | } 93 | 94 | /** 95 | * Delivers tokens from the input. 96 | * @param {string} chunk the input to be processed 97 | * @param {Object} context additional info to be used by the actual token types 98 | */ 99 | *tokens(chunk, context) { 100 | const pp = Object.create(rootPP); 101 | pp.context = context; 102 | pp.chunk = chunk; 103 | pp.tokenizer = this; 104 | 105 | while (true) { 106 | const c = pp.chunk[pp.offset]; 107 | let tokenLength = this.maxTokenLengthForFirstChar[c]; 108 | 109 | if (tokenLength > 0) { 110 | do { 111 | const token = 112 | this.registeredTokens[ 113 | pp.chunk.slice(pp.offset, pp.offset + tokenLength) 114 | ]; 115 | if (token !== undefined) { 116 | const rt = token.parse(pp); 117 | 118 | if (rt !== undefined) { 119 | yield rt; 120 | } 121 | break; 122 | } 123 | } while (tokenLength-- > 1); 124 | } else { 125 | if (c === undefined) { 126 | yield Object.create(EOFToken, pp.properties); 127 | return; 128 | } else { 129 | pp.offset += 1; 130 | this.error("Unknown char", pp, c); 131 | } 132 | } 133 | } 134 | } 135 | 136 | /** 137 | * @param {string} message 138 | * @param {Object} context token initiating the error 139 | * @param {Object} [value] 140 | * @return {Object} error 141 | */ 142 | error(message, context, value) { 143 | message = `${context.lineNumber},${context.positionInLine}: ${message} "${value}"`; 144 | throw new Error(message); 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /tests/mini-lang-ava.mjs: -------------------------------------------------------------------------------- 1 | import test from "ava"; 2 | import { 3 | Parser, 4 | IdentifierToken, 5 | WhiteSpaceToken, 6 | NumberToken, 7 | StringToken 8 | } from "pratt-parser"; 9 | 10 | function Value(value) { 11 | return Object.create(null, { 12 | value: { 13 | value: value 14 | } 15 | }); 16 | } 17 | 18 | const identifiers = { 19 | array: [1, 2, 3, 4, 5, 6, 7] 20 | }; 21 | 22 | const functions = { 23 | concat: args => Value(args.map(a => a.value).join("")), 24 | noargs: args => Value("-- no args --"), 25 | onearg: args => args[0] 26 | }; 27 | 28 | const g = { 29 | tokens: [ 30 | WhiteSpaceToken, 31 | NumberToken, 32 | StringToken, 33 | 34 | Object.create(IdentifierToken, { 35 | parse: { 36 | value(pp) { 37 | let i = pp.offset + 1; 38 | for (;;) { 39 | const c = pp.chunk[i]; 40 | if ( 41 | (c >= "a" && c <= "z") || 42 | (c >= "A" && c <= "Z") || 43 | (c >= "0" && c <= "9") || 44 | c === "_" 45 | ) { 46 | i += 1; 47 | } else { 48 | break; 49 | } 50 | } 51 | const value = pp.chunk.substring(pp.offset, i); 52 | 53 | const properties = pp.properties; 54 | 55 | if (functions[value]) { 56 | properties.value = { 57 | value: functions[value] 58 | }; 59 | } else if (identifiers[value]) { 60 | properties.value = { 61 | value: identifiers[value] 62 | }; 63 | } else { 64 | properties.value = { 65 | value: value 66 | }; 67 | } 68 | 69 | pp.offset = i; 70 | return Object.create(this, properties); 71 | } 72 | } 73 | }) 74 | ], 75 | prefix: { 76 | "(": { 77 | precedence: 80, 78 | led(grammar, left) { 79 | if (left.type === "identifier") { 80 | const args = []; 81 | 82 | if (grammar.token.value !== ")") { 83 | while (true) { 84 | args.push(grammar.expression(0)); 85 | 86 | if (grammar.token.value !== ",") { 87 | break; 88 | } 89 | grammar.advance(","); 90 | } 91 | } 92 | 93 | grammar.advance(")"); 94 | 95 | return left.value(args); 96 | } else { 97 | const e = grammar.expression(0); 98 | grammar.advance(")"); 99 | return e; 100 | } 101 | } 102 | } 103 | }, 104 | infix: { 105 | ",": {}, 106 | ")": {}, 107 | "]": {}, 108 | "[": { 109 | precedence: 40, 110 | led(grammar, left) { 111 | const right = grammar.expression(0); 112 | grammar.advance("]"); 113 | return Value(left.value[right.value]); 114 | } 115 | }, 116 | "+": { 117 | precedence: 50, 118 | combine: (left, right) => Value(left.value + right.value) 119 | }, 120 | "-": { 121 | precedence: 50, 122 | combine: (left, right) => Value(left.value - right.value) 123 | }, 124 | "*": { 125 | precedence: 60, 126 | combine: (left, right) => Value(left.value * right.value) 127 | }, 128 | "/": { 129 | precedence: 60, 130 | combine: (left, right) => Value(left.value / right.value) 131 | } 132 | } 133 | }; 134 | 135 | test("mini_lang noargs", t => { 136 | const myGrammar = new Parser(g); 137 | t.is(myGrammar.parse("noargs()").value, "-- no args --"); 138 | }); 139 | 140 | test("mini_lang onearg", t => { 141 | const myGrammar = new Parser(g); 142 | t.is(myGrammar.parse('onearg("the arg")').value, "the arg"); 143 | }); 144 | 145 | test("mini_lang concat", t => { 146 | const myGrammar = new Parser(g); 147 | t.is(myGrammar.parse('concat("A","B")').value, "AB"); 148 | t.is(myGrammar.parse('concat(concat("A","B"),"C")').value, "ABC"); 149 | }); 150 | 151 | test("mini_lang array", t => { 152 | const myGrammar = new Parser(g); 153 | t.is(myGrammar.parse("array[3 * 2] + 2").value, 9); 154 | }); 155 | -------------------------------------------------------------------------------- /tests/tokenizer-ava.mjs: -------------------------------------------------------------------------------- 1 | import test from "ava"; 2 | import { 3 | Tokenizer, 4 | WhiteSpaceToken, 5 | NumberToken, 6 | StringToken, 7 | IdentifierToken 8 | } from "pratt-parser"; 9 | 10 | test.only("tokens trailing space", t => { 11 | const tokenizer = new Tokenizer({ 12 | tokens: [WhiteSpaceToken, NumberToken, StringToken, IdentifierToken], 13 | 14 | infix: { 15 | "=": { 16 | precedence: 77 17 | }, 18 | "+": {}, 19 | "-": {}, 20 | "*": { 21 | precedence: 42 22 | }, 23 | "/": {}, 24 | "(": {}, 25 | ")": {}, 26 | "[": {}, 27 | "]": {}, 28 | "{": {}, 29 | "}": {}, 30 | ":": {}, 31 | "<": {}, 32 | ">": {}, 33 | ".": {}, 34 | ",": {}, 35 | ";": {}, 36 | "<=": {}, 37 | ">=": {}, 38 | "=>": {}, 39 | "===": {}, 40 | "!===": {} 41 | } 42 | }); 43 | 44 | const tokens = [ 45 | { 46 | type: "identifier", 47 | value: "A" 48 | }, 49 | { 50 | type: "EOF" 51 | } 52 | ]; 53 | 54 | let i = 0; 55 | 56 | for (const token of tokenizer.tokens("A ")) { 57 | const refToken = tokens[i]; 58 | t.is(token.type, refToken.type, `${i}:`); 59 | t.is(token.id, refToken.id, `${i}:`); 60 | i++; 61 | } 62 | }); 63 | 64 | /* 65 | describe('trailing number', function() { 66 | const tokens = [ 67 | { 68 | type: 'number', 69 | value: 123 70 | } 71 | ]; 72 | 73 | let i = 0; 74 | 75 | for (const token of tokenizer.tokens('123')) { 76 | const refToken = tokens[i]; 77 | 78 | it(`tokens ${refToken.type}`, () => { 79 | assert.equal(token.type, refToken.type, 'type: ' + refToken.type); 80 | assert.equal(token.value, refToken.value, 'value: ' + refToken.value); 81 | }); 82 | } 83 | }); 84 | 85 | describe('trailing string', () => { 86 | const tokens = [ 87 | { 88 | type: 'string', 89 | value: 'ABC' 90 | } 91 | ]; 92 | 93 | let i = 0; 94 | 95 | for (const token of tokenizer.tokens('"ABC"')) { 96 | const refToken = tokens[i]; 97 | 98 | it(`tokens ${refToken.type}`, () => { 99 | assert.equal(token.type, refToken.type, 'type: ' + refToken.type); 100 | assert.equal(token.value, refToken.value, 'value: ' + refToken.value); 101 | }); 102 | } 103 | }); 104 | 105 | describe('trailing identifier', () => { 106 | const tokens = [ 107 | { 108 | type: 'identifier', 109 | value: 'ABC' 110 | } 111 | ]; 112 | 113 | let i = 0; 114 | 115 | for (const token of tokenizer.tokens('ABC')) { 116 | const refToken = tokens[i]; 117 | 118 | it(`tokens ${refToken.type}`, () => { 119 | assert.equal(token.type, refToken.type, 'type: ' + refToken.type); 120 | assert.equal(token.value, refToken.value, 'value: ' + refToken.value); 121 | }); 122 | } 123 | }); 124 | 125 | describe('unknown char', () => { 126 | it('thows', () => { 127 | try { 128 | for (const token of tokenizer.tokens('%')) { 129 | console.log(token); 130 | } 131 | assert.ok(false); 132 | } catch (e) { 133 | if (e.message !== '1,1: Unknown char "%"') { 134 | throw e; 135 | } 136 | } 137 | }); 138 | }); 139 | 140 | describe('unterminated string', () => { 141 | it('thows', () => { 142 | try { 143 | for (const token of tokenizer.tokens('"abc')) { 144 | console.log(token); 145 | } 146 | assert.ok(false); 147 | } catch (e) { 148 | if (e.message !== '1,0: Unterminated string "abc"') { 149 | throw e; 150 | } 151 | } 152 | }); 153 | 154 | it('thows when in \\u', () => { 155 | try { 156 | for (const token of tokenizer.tokens('"\\u"')) { 157 | console.log(token); 158 | } 159 | assert.ok(false); 160 | } catch (e) { 161 | if (e.message !== '1,0: Unterminated string ""') { 162 | throw e; 163 | } 164 | } 165 | }); 166 | }); 167 | }); 168 | 169 | */ 170 | -------------------------------------------------------------------------------- /tests/token-kitchen-sink-ava-node.mjs: -------------------------------------------------------------------------------- 1 | import test from "ava"; 2 | import { readFileSync } from "node:fs"; 3 | import { 4 | Tokenizer, 5 | WhiteSpaceToken, 6 | NumberToken, 7 | StringToken, 8 | IdentifierToken 9 | } from "pratt-parser"; 10 | 11 | const tokenizer = new Tokenizer({ 12 | tokens: [WhiteSpaceToken, NumberToken, StringToken, IdentifierToken], 13 | infix: { 14 | "=": { 15 | precedence: 77 16 | }, 17 | "+": {}, 18 | "-": {}, 19 | "*": { 20 | precedence: 42 21 | }, 22 | "/": {}, 23 | "(": {}, 24 | ")": {}, 25 | "[": {}, 26 | "]": {}, 27 | "{": {}, 28 | "}": {}, 29 | ":": {}, 30 | "<": {}, 31 | ">": {}, 32 | ".": {}, 33 | ",": {}, 34 | ";": {}, 35 | "<=": {}, 36 | ">=": {}, 37 | "=>": {}, 38 | "===": {}, 39 | "!===": {} 40 | } 41 | }); 42 | 43 | test("Kitchen sink", t => { 44 | const tokens = [ 45 | { 46 | type: "number", 47 | value: 4711, 48 | line: 1, 49 | pos: 0 50 | }, 51 | { 52 | type: "number", 53 | value: 0.23, 54 | line: 1, 55 | pos: 5 56 | }, 57 | { 58 | type: "number", 59 | value: 12345.0, 60 | line: 1, 61 | pos: 10 62 | }, 63 | { 64 | type: "number", 65 | value: 12.4e20, 66 | line: 1, 67 | pos: 18 68 | }, 69 | { 70 | type: "number", 71 | value: 0.4e-7, 72 | line: 1, 73 | pos: 27 74 | }, 75 | { 76 | type: "string", 77 | value: "str2", 78 | line: 2, 79 | pos: 1 80 | }, 81 | { 82 | type: "string", 83 | value: "str3", 84 | line: 2, 85 | pos: 7 86 | }, 87 | { 88 | type: "string", 89 | value: "\\\b\f\n\r\t\"'A", 90 | line: 2 91 | }, 92 | { 93 | type: "string", 94 | value: "str4", 95 | line: 2 96 | }, 97 | { 98 | type: "string", 99 | value: "str5", 100 | line: 2 101 | }, 102 | { 103 | type: "identifier", 104 | value: "name1", 105 | line: 3 106 | }, 107 | { 108 | type: "identifier", 109 | value: "name_2", 110 | line: 3 111 | }, 112 | { 113 | type: "identifier", 114 | value: "_name3", 115 | line: 3 116 | }, 117 | { 118 | type: "identifier", 119 | value: "n", 120 | line: 4 121 | }, 122 | { 123 | type: "operator", 124 | value: "+", 125 | line: 5 126 | }, 127 | { 128 | type: "operator", 129 | value: "-", 130 | line: 6 131 | }, 132 | { 133 | type: "operator", 134 | value: "*", 135 | line: 7, 136 | precedence: 42 137 | }, 138 | { 139 | type: "operator", 140 | value: "/", 141 | line: 8 142 | }, 143 | { 144 | type: "operator", 145 | value: "(", 146 | line: 9 147 | }, 148 | { 149 | type: "operator", 150 | value: ")", 151 | line: 9 152 | }, 153 | { 154 | type: "operator", 155 | value: "{", 156 | line: 10 157 | }, 158 | { 159 | type: "operator", 160 | value: "}", 161 | line: 10 162 | }, 163 | { 164 | type: "operator", 165 | value: "[", 166 | line: 11 167 | }, 168 | { 169 | type: "operator", 170 | value: "]", 171 | line: 11 172 | }, 173 | { 174 | type: "operator", 175 | value: ":", 176 | line: 12 177 | }, 178 | { 179 | type: "operator", 180 | value: ",", 181 | line: 12 182 | }, 183 | { 184 | type: "operator", 185 | value: ";", 186 | line: 12 187 | }, 188 | { 189 | type: "operator", 190 | value: ".", 191 | line: 12 192 | }, 193 | { 194 | type: "operator", 195 | value: "<", 196 | line: 13 197 | }, 198 | { 199 | type: "operator", 200 | value: "===", 201 | line: 13 202 | }, 203 | { 204 | type: "operator", 205 | value: "!===", 206 | line: 13 207 | // pos: 22 208 | }, 209 | { 210 | type: "operator", 211 | value: ">", 212 | line: 13 213 | }, 214 | { 215 | type: "operator", 216 | value: "<=", 217 | line: 14 218 | }, 219 | { 220 | type: "operator", 221 | value: ">=", 222 | line: 15 223 | }, 224 | { 225 | type: "operator", 226 | value: "=", 227 | line: 16, 228 | precedence: 77 229 | }, 230 | { 231 | type: "number", 232 | value: 2, 233 | line: 17 234 | }, 235 | { 236 | type: "operator", 237 | value: "+", 238 | line: 17 239 | }, 240 | { 241 | type: "operator", 242 | value: "(", 243 | line: 17 244 | }, 245 | { 246 | type: "number", 247 | value: 3, 248 | line: 17 249 | }, 250 | { 251 | type: "operator", 252 | value: "*", 253 | line: 17 254 | }, 255 | { 256 | type: "number", 257 | value: 17, 258 | line: 17 259 | }, 260 | { 261 | type: "operator", 262 | value: ")", 263 | line: 17 264 | }, 265 | { 266 | type: "EOF", 267 | line: 18 268 | } 269 | ]; 270 | 271 | const s = readFileSync( 272 | new URL("fixtures/tokens1.txt", import.meta.url).pathname, 273 | { 274 | encoding: "utf8" 275 | } 276 | ); 277 | 278 | let i = 0; 279 | 280 | for (const token of tokenizer.tokens(s)) { 281 | const refToken = tokens[i]; 282 | 283 | t.is(token.type, refToken.type); 284 | t.is(token.id, refToken.id); 285 | t.is(token.lineNumber, refToken.line); 286 | 287 | if (refToken.pos !== undefined) { 288 | t.is(token.positionInLine, refToken.pos); 289 | } 290 | if (refToken.precedence !== undefined) { 291 | t.is(token.precedence, refToken.precedence); 292 | } 293 | i++; 294 | } 295 | }); 296 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![npm](https://img.shields.io/npm/v/pratt-parser.svg)](https://www.npmjs.com/package/pratt-parser) 2 | [![License](https://img.shields.io/badge/License-0BSD-blue.svg)](https://spdx.org/licenses/0BSD.html) 3 | [![Typed with TypeScript](https://flat.badgen.net/badge/icon/Typed?icon=typescript\&label\&labelColor=blue\&color=555555)](https://typescriptlang.org) 4 | [![bundlejs](https://deno.bundlejs.com/?q=pratt-parser\&badge=detailed)](https://bundlejs.com/?q=pratt-parser) 5 | [![downloads](http://img.shields.io/npm/dm/pratt-parser.svg?style=flat-square)](https://npmjs.org/package/pratt-parser) 6 | [![GitHub Issues](https://img.shields.io/github/issues/arlac77/pratt-parser.svg?style=flat-square)](https://github.com/arlac77/pratt-parser/issues) 7 | [![Build Status](https://img.shields.io/endpoint.svg?url=https%3A%2F%2Factions-badge.atrox.dev%2Farlac77%2Fpratt-parser%2Fbadge\&style=flat)](https://actions-badge.atrox.dev/arlac77/pratt-parser/goto) 8 | [![Styled with prettier](https://img.shields.io/badge/styled_with-prettier-ff69b4.svg)](https://github.com/prettier/prettier) 9 | [![Commitizen friendly](https://img.shields.io/badge/commitizen-friendly-brightgreen.svg)](http://commitizen.github.io/cz-cli/) 10 | [![Known Vulnerabilities](https://snyk.io/test/github/arlac77/pratt-parser/badge.svg)](https://snyk.io/test/github/arlac77/pratt-parser) 11 | 12 | # pratt-parser 13 | 14 | Pratt Parser 15 | 16 | Based on 17 | [Top Down Operator Precedence](https://tdop.github.io) and 18 | [Douglas Crockford TDOP](https://github.com/douglascrockford/TDOP) 19 | 20 | 21 | 22 | ```javascript 23 | import { Parser, WhiteSpaceToken, NumberToken } from "pratt-parser"; 24 | 25 | function Value(value) { 26 | return Object.create(null, { 27 | value: { 28 | value: value 29 | } 30 | }); 31 | } 32 | 33 | const myGrammar = new Parser({ 34 | tokens: [WhiteSpaceToken, NumberToken], 35 | prefix: { 36 | "(": { 37 | nud(grammar) { 38 | const e = grammar.expression(0); 39 | grammar.advance(")"); 40 | return e; 41 | } 42 | } 43 | }, 44 | infix: { 45 | ")": {}, 46 | "+": { 47 | precedence: 50, 48 | combine: (left, right) => Value(left.value + right.value) 49 | }, 50 | "-": { 51 | precedence: 50, 52 | combine: (left, right) => Value(left.value - right.value) 53 | }, 54 | "*": { 55 | precedence: 60, 56 | combine: (left, right) => Value(left.value * right.value) 57 | }, 58 | "/": { 59 | precedence: 60, 60 | combine: (left, right) => Value(left.value / right.value) 61 | } 62 | } 63 | }); 64 | 65 | console.log(myGrammar.parse("(1 + (1 + 4 * 3)) * (2 + 1)").value); 66 | ``` 67 | 68 | # API 69 | 70 | 71 | 72 | ### Table of Contents 73 | 74 | * [ParsePosition](#parseposition) 75 | * [Properties](#properties) 76 | * [Token](#token) 77 | * [Properties](#properties-1) 78 | * [RootToken](#roottoken) 79 | * [parse](#parse) 80 | * [Parameters](#parameters) 81 | * [WhiteSpaceToken](#whitespacetoken) 82 | * [LineCommentToken](#linecommenttoken) 83 | * [EOFToken](#eoftoken) 84 | * [Parser](#parser) 85 | * [Parameters](#parameters-1) 86 | * [error](#error) 87 | * [Parameters](#parameters-2) 88 | * [parse](#parse-1) 89 | * [Parameters](#parameters-3) 90 | * [Tokenizer](#tokenizer) 91 | * [Parameters](#parameters-4) 92 | * [tokens](#tokens) 93 | * [Parameters](#parameters-5) 94 | * [error](#error-1) 95 | * [Parameters](#parameters-6) 96 | 97 | ## ParsePosition 98 | 99 | Type: [Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object) 100 | 101 | ### Properties 102 | 103 | * `offset` **[number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** 104 | * `chunk` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** 105 | 106 | ## Token 107 | 108 | Type: [Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object) 109 | 110 | ### Properties 111 | 112 | * `type` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** 113 | * `precedence` **[number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** 114 | * `value` **any** 115 | 116 | ## RootToken 117 | 118 | Base object for all tokens 119 | 120 | ### parse 121 | 122 | Parses from chunk of PrasePosition and delivers next token 123 | Modifies ParsePosition so that it points behind the detected token. 124 | 125 | #### Parameters 126 | 127 | * `pp` **[ParsePosition](#parseposition)** 128 | 129 | Returns **[Token](#token)** 130 | 131 | ## WhiteSpaceToken 132 | 133 | skip white space 134 | 135 | ## LineCommentToken 136 | 137 | skips until end of line 138 | 139 | ## EOFToken 140 | 141 | Token representing 'end of file' 142 | 143 | ## Parser 144 | 145 | Creates a grammar for later parsing 146 | 147 | ### Parameters 148 | 149 | * `grammar` **any** 150 | * `options` **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)?** 151 | 152 | ### error 153 | 154 | Forwards error to the tokenizer 155 | 156 | #### Parameters 157 | 158 | * `args` **...any** 159 | 160 | Returns **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** error 161 | 162 | ### parse 163 | 164 | Parses the input and delivers the outermoost expression. 165 | 166 | #### Parameters 167 | 168 | * `chunk` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** input text 169 | * `context` **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** object transparently passed to tokenizer 170 | 171 | Returns **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** evaluated input 172 | 173 | ## Tokenizer 174 | 175 | Creates a tokenizer for later parsing. 176 | 177 | ### Parameters 178 | 179 | * `grammar` **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** definition of the grammar with operators... 180 | 181 | ### tokens 182 | 183 | Delivers tokens from the input. 184 | 185 | #### Parameters 186 | 187 | * `chunk` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** the input to be processed 188 | * `context` **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** additional info to be used by the actual token types 189 | 190 | ### error 191 | 192 | #### Parameters 193 | 194 | * `message` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** 195 | * `context` **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** token initiating the error 196 | * `value` **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)?** 197 | 198 | Returns **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** error 199 | 200 | # install 201 | 202 | With [npm](http://npmjs.org) do: 203 | 204 | ```shell 205 | npm install pratt-parser 206 | ``` 207 | 208 | # license 209 | 210 | BSD-2-Clause 211 | -------------------------------------------------------------------------------- /src/known-tokens.mjs: -------------------------------------------------------------------------------- 1 | /** 2 | * @typedef {Object} ParsePosition 3 | * @property {number} offset 4 | * @property {string} chunk 5 | */ 6 | 7 | /** 8 | * @typedef {Object} Token 9 | * @property {string} type 10 | * @property {number} precedence 11 | * @property {any} value 12 | */ 13 | 14 | /** 15 | * Base object for all tokens 16 | */ 17 | export const RootToken = { 18 | precedence: 0, 19 | get type() { 20 | return "unknown"; 21 | }, 22 | 23 | registerWithinTokenizer(tokenizer) {}, 24 | 25 | /** 26 | * Parses from chunk of PrasePosition and delivers next token 27 | * Modifies ParsePosition so that it points behind the detected token. 28 | * @param {ParsePosition} pp 29 | * @return {Token} 30 | */ 31 | parse(pp) { 32 | return EOFToken; 33 | }, 34 | toString() { 35 | return `${this.type}: ${this.value} [${this.precedence}]`; 36 | }, 37 | led(grammar, left) { 38 | return left; 39 | }, 40 | nud(grammar) { 41 | return this; 42 | }, 43 | combine() { 44 | return 0; 45 | } 46 | }; 47 | 48 | export const IdentifierToken = Object.create(RootToken, { 49 | firstChar: { 50 | value: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_" 51 | }, 52 | registerWithinTokenizer: { 53 | value(tokenizer) { 54 | for (const c of this.firstChar) { 55 | tokenizer.maxTokenLengthForFirstChar[c] = 1; 56 | tokenizer.registeredTokens[c] = this; 57 | } 58 | } 59 | }, 60 | parse: { 61 | value(pp) { 62 | let i = pp.offset + 1; 63 | for (;;) { 64 | const c = pp.chunk[i]; 65 | if ( 66 | (c >= "a" && c <= "z") || 67 | (c >= "A" && c <= "Z") || 68 | (c >= "0" && c <= "9") || 69 | c === "_" 70 | ) { 71 | i += 1; 72 | } else { 73 | break; 74 | } 75 | } 76 | 77 | const properties = pp.properties; 78 | properties.value = { 79 | value: pp.chunk.slice(pp.offset, i) 80 | }; 81 | pp.offset = i; 82 | return Object.create(this, properties); 83 | } 84 | }, 85 | type: { 86 | value: "identifier" 87 | } 88 | }); 89 | 90 | export const KeywordToken = Object.create(IdentifierToken, { 91 | keywords: { 92 | value: {} 93 | }, 94 | registerWithinTokenizer: { 95 | value(tokenizer) { 96 | Object.keys(this.keywords).forEach(k => { 97 | tokenizer.maxTokenLengthForFirstChar[k] = 1; 98 | tokenizer.registeredTokens[k] = this; 99 | }); 100 | } 101 | }, 102 | parse: { 103 | value(pp) { 104 | const start = pp.offset; 105 | 106 | for (let i = start + 1; i < pp.chunk.length; i++) { 107 | const c = pp.chunk[i]; 108 | if (!((c >= "A" && c <= "Z") || (c >= "a" && c <= "z"))) { 109 | pp.offset = i + 1; 110 | return Object.create(this, { 111 | value: { 112 | value: pp.chunk.slice(start, i) 113 | }, 114 | ...pp.properties 115 | }); 116 | } 117 | } 118 | } 119 | }, 120 | type: { 121 | value: "keyword" 122 | } 123 | }); 124 | 125 | export const StringToken = Object.create(RootToken, { 126 | registerWithinTokenizer: { 127 | value(tokenizer) { 128 | for (const c of "\"'") { 129 | tokenizer.maxTokenLengthForFirstChar[c] = 1; 130 | tokenizer.registeredTokens[c] = this; 131 | } 132 | } 133 | }, 134 | parse: { 135 | value(pp) { 136 | const properties = pp.properties; 137 | const tc = pp.chunk[pp.offset]; 138 | let str = ""; 139 | let i = pp.offset + 1; 140 | let c; 141 | for (; i < pp.chunk.length; ) { 142 | c = pp.chunk[i]; 143 | if (c === tc) { 144 | pp.offset = i + 1; 145 | return Object.create(this, { 146 | value: { 147 | value: str 148 | }, 149 | ...properties 150 | }); 151 | } else if (c === "\\") { 152 | i += 1; 153 | c = pp.chunk[i]; 154 | switch (c) { 155 | case "b": 156 | c = "\b"; 157 | break; 158 | case "f": 159 | c = "\f"; 160 | break; 161 | case "n": 162 | c = "\n"; 163 | break; 164 | case "r": 165 | c = "\r"; 166 | break; 167 | case "t": 168 | c = "\t"; 169 | break; 170 | case "\\": 171 | c = "\\"; 172 | break; 173 | case "u": 174 | c = parseInt(pp.chunk.substr(i + 1, 4), 16); 175 | if (!isFinite(c) || c < 0) { 176 | pp.tokenizer.error("Unterminated string", pp, str); 177 | } 178 | c = String.fromCharCode(c); 179 | i += 4; 180 | break; 181 | } 182 | str += c; 183 | i += 1; 184 | } else { 185 | str += c; 186 | i += 1; 187 | } 188 | } 189 | if (i === pp.chunk.length && c !== tc) { 190 | pp.tokenizer.error("Unterminated string", pp, str); 191 | } 192 | } 193 | }, 194 | type: { 195 | value: "string" 196 | } 197 | }); 198 | 199 | export const NumberToken = Object.create(RootToken, { 200 | registerWithinTokenizer: { 201 | value(tokenizer) { 202 | for (const c of "0123456789") { 203 | tokenizer.maxTokenLengthForFirstChar[c] = 1; 204 | tokenizer.registeredTokens[c] = this; 205 | } 206 | } 207 | }, 208 | parse: { 209 | value(pp) { 210 | const properties = pp.properties; 211 | let str = pp.chunk[pp.offset]; 212 | pp.offset += 1; 213 | for (; pp.offset < pp.chunk.length; ) { 214 | const c = pp.chunk[pp.offset]; 215 | if ( 216 | (c < "0" || c > "9") && 217 | c !== "." && 218 | c !== "e" && 219 | c !== "E" && 220 | c !== "-" && 221 | c !== "+" 222 | ) { 223 | break; 224 | } 225 | pp.offset += 1; 226 | str += c; 227 | } 228 | return Object.create(this, { 229 | ...properties, 230 | value: { 231 | value: +str 232 | } 233 | }); 234 | } 235 | }, 236 | type: { 237 | value: "number" 238 | } 239 | }); 240 | 241 | export const OperatorToken = Object.create(RootToken, { 242 | registerWithinTokenizer: { 243 | value(tokenizer) { 244 | const c = this.value; 245 | const firstChar = c[0]; 246 | const maxLength = tokenizer.maxTokenLengthForFirstChar[firstChar] || 0; 247 | 248 | if (maxLength < c.length) { 249 | tokenizer.maxTokenLengthForFirstChar[firstChar] = c.length; 250 | } 251 | 252 | const p = tokenizer.registeredTokens[c]; 253 | if (p) { 254 | // TODO dirty hack how to merge nud() and let() tokens 255 | //console.log(`Token already defined ${c} ${this.nud} <> ${p.nud}`); 256 | this.nud = p.nud; 257 | //tokenizer.registeredTokens[c] = Object.assign(this,p); 258 | tokenizer.registeredTokens[c] = this; 259 | } else { 260 | tokenizer.registeredTokens[c] = this; 261 | } 262 | } 263 | }, 264 | parse: { 265 | value(pp) { 266 | pp.offset += this.value.length; 267 | return Object.create(this, pp.properties); 268 | } 269 | }, 270 | type: { 271 | value: "operator" 272 | } 273 | }); 274 | 275 | /** 276 | * skip white space 277 | */ 278 | export const WhiteSpaceToken = Object.create(RootToken, { 279 | registerWithinTokenizer: { 280 | value(tokenizer) { 281 | for (const c of " \f\t\b\r\n") { 282 | tokenizer.maxTokenLengthForFirstChar[c] = 1; 283 | tokenizer.registeredTokens[c] = this; 284 | } 285 | } 286 | }, 287 | parse: { 288 | value(pp) { 289 | while (pp.chunk[pp.offset] <= " ") { 290 | if (pp.chunk[pp.offset] === "\n") { 291 | pp.lineNumber += 1; 292 | pp.firstCharInLine = pp.offset; 293 | } 294 | pp.offset += 1; 295 | } 296 | } 297 | }, 298 | type: { 299 | value: "space" 300 | } 301 | }); 302 | 303 | /** 304 | * skips until end of line 305 | */ 306 | export const LineCommentToken = Object.create(RootToken, { 307 | parse: { 308 | value(pp) { 309 | while ( 310 | pp.chunk[pp.offset] !== "\n" && 311 | pp.chunk[pp.offset] !== undefined 312 | ) { 313 | pp.offset += 1; 314 | } 315 | 316 | pp.lineNumber += 1; 317 | pp.firstCharInLine = pp.offset; 318 | } 319 | }, 320 | type: { 321 | value: "comment" 322 | } 323 | }); 324 | 325 | /** 326 | * Token representing 'end of file' 327 | */ 328 | export const EOFToken = Object.create(RootToken, { 329 | type: { 330 | value: "EOF" 331 | } 332 | }); 333 | --------------------------------------------------------------------------------