├── src ├── Grammars │ ├── index.ts │ ├── types.ts │ ├── BNF.ts │ ├── W3CEBNF.ts │ └── Custom.ts ├── index.ts ├── SemanticHelpers.ts ├── TokenError.ts ├── bin.ts └── Parser.ts ├── .travis.yml ├── .editorconfig ├── tsconfig-test.json ├── .vscode ├── tasks.json └── launch.json ├── tsconfig.json ├── .npmignore ├── .github └── workflows │ └── push.yml ├── .gitignore ├── LICENSE ├── test ├── EOF.spec.ts ├── W3CEBNF.spec.ts ├── TestHelpers.ts ├── StringLiteral.spec.ts ├── JSON2.spec.ts ├── JSON.spec.ts ├── WS.spec.ts ├── BNF.spec.ts ├── ATL.spec.ts ├── JSONRecovery.spec.ts ├── Lookahead.spec.ts └── NewLang.spec.ts ├── tslint.json ├── package.json └── README.md /src/Grammars/index.ts: -------------------------------------------------------------------------------- 1 | export { default as BNF } from './BNF'; 2 | export { default as W3C } from './W3CEBNF'; 3 | export { default as Custom } from './Custom'; 4 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | export { Parser, IToken, IRule } from './Parser'; 2 | export { TokenError } from './TokenError'; 3 | export import Grammars = require('./Grammars'); 4 | -------------------------------------------------------------------------------- /src/Grammars/types.ts: -------------------------------------------------------------------------------- 1 | import { IParserOptions } from "../Parser"; 2 | 3 | export interface IGrammarParserOptions extends IParserOptions { 4 | debugRulesParser: boolean; 5 | } -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - 8 4 | script: npm run-script test-travis 5 | after_script: cat ./coverage/lcov.info | ./node_modules/coveralls/bin/coveralls.js 6 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig is awesome: http://EditorConfig.org 2 | 3 | root = true 4 | 5 | [*] 6 | indent_size = 2 7 | indent_style = space 8 | end_of_line = lf 9 | charset = utf-8 10 | trim_trailing_whitespace = true 11 | insert_final_newline = true -------------------------------------------------------------------------------- /src/SemanticHelpers.ts: -------------------------------------------------------------------------------- 1 | import { IToken } from './Parser'; 2 | 3 | /** 4 | * Finds all the direct childs of a specifyed type 5 | */ 6 | export function findChildrenByType(token: IToken, type: string) { 7 | return token.children ? token.children.filter(x => x.type == type) : []; 8 | } 9 | -------------------------------------------------------------------------------- /src/TokenError.ts: -------------------------------------------------------------------------------- 1 | import { IToken } from './Parser'; 2 | 3 | export class TokenError extends Error { 4 | constructor(public message: string, public token: IToken) { 5 | super(message); 6 | if (token && token.errors) token.errors.push(this); 7 | else throw this; 8 | } 9 | 10 | inspect() { 11 | return 'SyntaxError: ' + this.message; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /tsconfig-test.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "commonjs", 4 | "target": "es2017", 5 | "diagnostics": true, 6 | "emitDecoratorMetadata": true, 7 | "experimentalDecorators": true, 8 | "inlineSourceMap": false, 9 | "inlineSources": false, 10 | "sourceMap": false, 11 | "types": [], 12 | "lib": ["es2017", "dom"] 13 | }, 14 | "exclude": ["../src"], 15 | "include": ["test/**/*"] 16 | } 17 | -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | // See https://go.microsoft.com/fwlink/?LinkId=733558 3 | // for the documentation about the tasks.json format 4 | "version": "0.1.0", 5 | "command": "npm", 6 | "isShellCommand": true, 7 | "showOutput": "always", 8 | "suppressTaskName": true, 9 | "tasks": [ 10 | { 11 | "taskName": "build", 12 | "args": ["run", "build"], 13 | "isBuildCommand": true 14 | }, 15 | { 16 | "taskName": "update", 17 | "args": ["update"] 18 | }, 19 | { 20 | "taskName": "test", 21 | "args": ["run", "test"] 22 | } 23 | ] 24 | } -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "commonjs", 4 | "target": "es2017", 5 | "diagnostics": true, 6 | "emitDecoratorMetadata": true, 7 | "experimentalDecorators": true, 8 | "inlineSourceMap": false, 9 | "inlineSources": false, 10 | "sourceMap": true, 11 | "declaration": true, 12 | "outDir": "dist", 13 | "moduleResolution": "node", 14 | "stripInternal": true, 15 | "pretty": true, 16 | "forceConsistentCasingInFileNames": true, 17 | "noUnusedParameters": true, 18 | "noUnusedLocals": true, 19 | "types": [], 20 | "lib": ["es2017", "dom"] 21 | }, 22 | "exclude": ["node_modules", "test", "dist"], 23 | "include": ["src/**/*"] 24 | } 25 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | # Logs 3 | logs 4 | *.log 5 | npm-debug.log* 6 | 7 | # Runtime data 8 | pids 9 | *.pid 10 | *.seed 11 | **/*.js.map 12 | # Directory for instrumented libs generated by jscoverage/JSCover 13 | lib-cov 14 | 15 | # Coverage directory used by tools like istanbul 16 | coverage 17 | 18 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 19 | .grunt 20 | 21 | # node-waf configuration 22 | .lock-wscript 23 | 24 | # Compiled binary addons (http://nodejs.org/api/addons.html) 25 | build/Release 26 | 27 | # Dependency directory 28 | node_modules 29 | 30 | # Optional npm cache directory 31 | .npm 32 | 33 | # Optional REPL history 34 | .node_repl_history 35 | 36 | # Idea IDE 37 | .idea 38 | 39 | # Sublime 40 | terminal.glue 41 | 42 | *.pem 43 | *.zip 44 | 45 | .alm 46 | 47 | src 48 | test 49 | 50 | .npmrc -------------------------------------------------------------------------------- /.github/workflows/push.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push 3 | 4 | name: Test and release 5 | jobs: 6 | install: 7 | runs-on: ubuntu-latest 8 | permissions: 9 | contents: read 10 | id-token: write 11 | steps: 12 | - uses: actions/checkout@master 13 | - name: Use Node.js 12.x 14 | uses: actions/setup-node@v1 15 | with: 16 | node-version: 12.x 17 | - name: npm install 18 | run: npm install 19 | - name: build 20 | run: npm run build 21 | - name: test 22 | run: npm run test 23 | - name: coverage 24 | run: npm run coverage 25 | - name: lint 26 | run: npm run lint 27 | - name: Publish 28 | uses: menduz/oddish-action@master 29 | with: 30 | provenance: true 31 | env: 32 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | # Logs 3 | logs 4 | *.log 5 | npm-debug.log* 6 | 7 | # Runtime data 8 | pids 9 | *.pid 10 | *.seed 11 | **/*.js.map 12 | # Directory for instrumented libs generated by jscoverage/JSCover 13 | lib-cov 14 | 15 | # Coverage directory used by tools like istanbul 16 | coverage 17 | 18 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 19 | .grunt 20 | 21 | # node-waf configuration 22 | .lock-wscript 23 | 24 | # Compiled binary addons (http://nodejs.org/api/addons.html) 25 | build/Release 26 | 27 | # Dependency directory 28 | node_modules 29 | 30 | # Optional npm cache directory 31 | .npm 32 | 33 | # Optional REPL history 34 | .node_repl_history 35 | 36 | # Idea IDE 37 | .idea 38 | 39 | # Sublime 40 | terminal.glue 41 | 42 | *.pem 43 | *.zip 44 | dist 45 | .alm 46 | test/*.js 47 | 48 | src/typings 49 | test/typings 50 | 51 | test/DW.* 52 | -------------------------------------------------------------------------------- /src/bin.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | console.log(`/* AUTO GENERATED CODE USING ebnf NPM MODULE ${new Date().toISOString()}`); 4 | 5 | function printUsage() { 6 | console.error(`Usage: 7 | ebnf Grammar.ebnf >> myFile.js 8 | ^^^^^^^^^^^^ Source file`); 9 | } 10 | 11 | declare var process, require; 12 | 13 | const path = require('path'); 14 | const fs = require('fs'); 15 | const util = require('util'); 16 | 17 | import { Grammars } from '.'; 18 | 19 | let source: string = process.argv[2]; 20 | 21 | if (!source || source.length == 0) { 22 | printUsage(); 23 | throw new Error('You must provide a source file'); 24 | } 25 | 26 | source = path.resolve(process.cwd(), source); 27 | 28 | let sourceCode = fs.readFileSync(source).toString() + '\n'; 29 | 30 | let RULES = Grammars.Custom.getRules(sourceCode); 31 | 32 | console.log(`*/ 33 | 34 | module.exports = ${util.inspect(RULES, { depth: 20, maxArrayLength: null })};`); 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Agustin Mendez 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /test/EOF.spec.ts: -------------------------------------------------------------------------------- 1 | declare var describe, it, require; 2 | 3 | import { Grammars, Parser, IToken } from '../dist'; 4 | import { testParseTokenFailsafe, describeTree, printBNF, testParseToken } from './TestHelpers'; 5 | 6 | let inspect = require('util').inspect; 7 | let expect = require('expect'); 8 | 9 | describe('EOF', function() { 10 | let parser = new Grammars.Custom.Parser( 11 | ` 12 | Rule ::= Item* EOF 13 | Item ::= Space? Rules {recoverUntil=Space, fragment=true} 14 | Rules ::= "true" | "false" 15 | Space ::= " "+ | EOF 16 | `, 17 | {} 18 | ); 19 | 20 | testParseTokenFailsafe(parser, 'true', null, doc => { 21 | expect(doc.errors.length).toEqual(0); 22 | }); 23 | 24 | testParseTokenFailsafe(parser, 'true false true', null, doc => { 25 | expect(doc.errors.length).toEqual(0); 26 | }); 27 | }); 28 | 29 | describe('EOF1', function() { 30 | let parser = new Grammars.Custom.Parser( 31 | ` 32 | Rule ::= Rules EOF {pin=1} 33 | Rules ::= "true" | "false" 34 | `, 35 | {} 36 | ); 37 | 38 | testParseTokenFailsafe(parser, 'true', null, doc => { 39 | expect(doc.errors.length).toEqual(0); 40 | }); 41 | 42 | testParseTokenFailsafe(parser, 'true false true', null, doc => { 43 | expect(doc.errors.length).toEqual(1); 44 | }); 45 | }); 46 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.2.0", 3 | "configurations": [ 4 | { 5 | "name": "Launch", 6 | "type": "node", 7 | "request": "launch", 8 | "program": "${workspaceRoot}/node_modules/.bin/_mocha", 9 | "stopOnEntry": false, 10 | "args": [ 11 | "test/Lookahead.spec.js", 12 | "--ui", 13 | "bdd", 14 | "--reporter", 15 | "spec" 16 | ], 17 | "cwd": "${workspaceRoot}", 18 | "preLaunchTask": "build", 19 | "runtimeExecutable": null, 20 | "runtimeArgs": [ 21 | 22 | ], 23 | "env": { 24 | "NODE_ENV": "development" 25 | }, 26 | "console": "internalConsole", 27 | "sourceMaps": true, 28 | "outDir": null 29 | }, 30 | { 31 | "name": "Attach", 32 | "type": "node", 33 | "request": "attach", 34 | "port": 5858, 35 | "address": "localhost", 36 | "restart": false, 37 | "sourceMaps": false, 38 | "outDir": null, 39 | "localRoot": "${workspaceRoot}", 40 | "remoteRoot": null 41 | }, 42 | { 43 | "name": "Attach to Process", 44 | "type": "node", 45 | "request": "attach", 46 | "processId": "${command.PickProcess}", 47 | "port": 5858, 48 | "sourceMaps": false, 49 | "outDir": null 50 | } 51 | ] 52 | } -------------------------------------------------------------------------------- /tslint.json: -------------------------------------------------------------------------------- 1 | { 2 | "rules": { 3 | "class-name": true, 4 | "comment-format": [ 5 | true, 6 | "check-space" 7 | ], 8 | "indent": [ 9 | true, 10 | "spaces" 11 | ], 12 | "no-duplicate-variable": true, 13 | "no-eval": true, 14 | "no-internal-module": true, 15 | "no-trailing-whitespace": true, 16 | "no-var-keyword": true, 17 | "one-line": [ 18 | true, 19 | "check-open-brace", 20 | "check-whitespace" 21 | ], 22 | "quotemark": [ 23 | false, 24 | "double" 25 | ], 26 | "semicolon": [ 27 | true, 28 | "always" 29 | ], 30 | "triple-equals": [ 31 | false, 32 | "allow-null-check" 33 | ], 34 | "typedef-whitespace": [ 35 | true, 36 | { 37 | "call-signature": "nospace", 38 | "index-signature": "nospace", 39 | "parameter": "nospace", 40 | "property-declaration": "nospace", 41 | "variable-declaration": "nospace" 42 | } 43 | ], 44 | "variable-name": [ 45 | true, 46 | "ban-keywords" 47 | ], 48 | "whitespace": [ 49 | true, 50 | "check-branch", 51 | "check-decl", 52 | "check-operator", 53 | "check-separator", 54 | "check-type" 55 | ] 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ebnf", 3 | "version": "1.9.0", 4 | "description": "Creates an AST parser from a [E]BNF file", 5 | "main": "dist/index.js", 6 | "typings": "dist/index.d.ts", 7 | "bin": "dist/bin.js", 8 | "scripts": { 9 | "test": "mocha --reporter spec", 10 | "test-travis": "npm run lint && npm run build && npm run coverage && npm run test", 11 | "preversion": "npm run build && npm test", 12 | "coverage": "(`npm bin`/istanbul cover `npm bin`/_mocha || true)", 13 | "lint": "tslint -t msbuild src/**/*.ts", 14 | "lint-fix": "tslint -t msbuild src/**/*.ts --fix", 15 | "build": "npm run lint-fix && rm -rf dist && tsc -p tsconfig.json && tsc -p tsconfig-test.json && chmod +x dist/bin.js", 16 | "watch": "npm run build && chmod +x dist/bin.js && (tsc -p tsconfig.json --watch & tsc -p tsconfig-test.json --watch)" 17 | }, 18 | "repository": { 19 | "type": "git", 20 | "url": "git+https://github.com/menduz/node-ebnf.git" 21 | }, 22 | "keywords": [ 23 | "EBNF", 24 | "AST", 25 | "Parser", 26 | "Grammar", 27 | "Lexer", 28 | "Syntax" 29 | ], 30 | "author": { 31 | "name": "Agustin Mendez @menduz", 32 | "email": "menduz@protonmail.com", 33 | "url": "https://menduz.com" 34 | }, 35 | "license": "MIT", 36 | "bugs": { 37 | "url": "https://github.com/menduz/node-ebnf/issues" 38 | }, 39 | "homepage": "https://github.com/menduz/node-ebnf#readme", 40 | "devDependencies": { 41 | "@types/node": "^13.13.16", 42 | "child_process": "^1.0.2", 43 | "coveralls": "^3.1.0", 44 | "expect": "^24.9.0", 45 | "git-rev-sync": "^2.1.0", 46 | "istanbul": "^0.4.5", 47 | "mocha": "^6.2.3", 48 | "node-fetch": "^2.6.0", 49 | "semver": "^7.3.2", 50 | "ts-node": "^8.10.2", 51 | "tslint": "^5.20.1", 52 | "typescript": "^3.9.7" 53 | }, 54 | "dependencies": {}, 55 | "prettier": { 56 | "semi": true, 57 | "singleQuote": true, 58 | "printWidth": 120 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /test/W3CEBNF.spec.ts: -------------------------------------------------------------------------------- 1 | declare var describe, it, require; 2 | 3 | import { Grammars, Parser, IToken } from '../dist'; 4 | import { testParseToken, describeTree, printBNF } from './TestHelpers'; 5 | 6 | let inspect = require('util').inspect; 7 | 8 | let grammar = ` 9 | Grammar ::= RULE_S* (Production RULE_S*)* EOF 10 | Production ::= NCName RULE_S* "::=" RULE_WHITESPACE* Choice RULE_WHITESPACE* RULE_EOL+ RULE_S* 11 | NCName ::= [a-zA-Z][a-zA-Z_0-9]* 12 | Choice ::= SequenceOrDifference (RULE_WHITESPACE* "|" RULE_WHITESPACE* SequenceOrDifference)* 13 | SequenceOrDifference ::= Item RULE_WHITESPACE* (Minus Item | Item*)? 14 | Minus ::= "-" 15 | Item ::= RULE_WHITESPACE* (NCName | StringLiteral | CharCode | CharClass | SubItem) PrimaryDecoration? 16 | PrimaryDecoration ::= "?" | "*" | "+" 17 | DecorationName ::= "ebnf://" [^#x5D#]+ 18 | SubItem ::= "(" RULE_WHITESPACE* Choice RULE_WHITESPACE* ")" 19 | StringLiteral ::= '"' [^"]* '"' | "'" [^']* "'" 20 | CharCode ::= "#x" [0-9a-zA-Z]+ 21 | CharClass ::= '[' '^'? (CharCodeRange | CharRange | CharCode | RULE_Char)+ "]" 22 | RULE_Char ::= #x09 | #x0A | #x0D | [#x20-#x5c] | [#x5e-#xD7FF] | [#xE000-#xFFFD] 23 | CharRange ::= RULE_Char "-" RULE_Char 24 | CharCodeRange ::= CharCode "-" CharCode 25 | RULE_WHITESPACE ::= (#x09 | #x20)* | Comment RULE_WHITESPACE* 26 | RULE_S ::= RULE_WHITESPACE RULE_S* | RULE_EOL RULE_S* 27 | Comment ::= "/*" ( [^*])* "*/" 28 | RULE_EOL ::= #x0D #x0A | #x0A | #x0D 29 | Link ::= '[' Url ']' 30 | Url ::= [^#x5D:\/?#] "://" [^#x5D#]+ ("#" NCName)? 31 | `; 32 | 33 | describe('Parse W3CEBNF', () => { 34 | let parser: Parser; 35 | 36 | it('create parser', () => { 37 | parser = new Parser(Grammars.W3C.RULES, {}); 38 | testParseToken(parser, grammar); 39 | console.log('W3C PARSER', Grammars.W3C.emit(parser)); 40 | printBNF(parser); 41 | }); 42 | }); 43 | 44 | describe('Grammars.W3C parses itself', function() { 45 | let RULES = Grammars.W3C.getRules(grammar); 46 | let parser = new Parser(RULES, {}); 47 | 48 | testParseToken(parser, grammar); 49 | }); 50 | -------------------------------------------------------------------------------- /test/TestHelpers.ts: -------------------------------------------------------------------------------- 1 | import { IToken, Parser, Grammars } from '../dist'; 2 | 3 | declare var require, it; 4 | 5 | export const printBNF = (parser: Parser) => console.log(parser.emitSource()); 6 | 7 | let inspect = require('util').inspect; 8 | 9 | export function testParseToken(parser: Parser, txt: string, target?: string, customTest?: (document: IToken) => void) { 10 | testParseTokenFailsafe(parser, txt, target, (doc: IToken) => { 11 | if (doc.errors.length) throw doc.errors[0]; 12 | 13 | if (doc.rest.length != 0) throw new Error('Got rest: ' + doc.rest); 14 | 15 | customTest && customTest(doc); 16 | }); 17 | } 18 | 19 | export function testParseTokenFailsafe( 20 | parser: Parser, 21 | txt: string, 22 | target?: string, 23 | customTest?: (document: IToken) => void 24 | ) { 25 | it(inspect(txt, false, 1, true) + ' must resolve into ' + (target || '(FIRST RULE)'), () => { 26 | console.log(' ---------------------------------------------------'); 27 | 28 | let result; 29 | 30 | try { 31 | result = parser.getAST(txt, target); 32 | 33 | if (!result) throw new Error('Did not resolve'); 34 | 35 | if (target && result.type != target) throw new Error("Type doesn't match. Got: " + result.type); 36 | 37 | if (result.text.length == 0) throw new Error('Empty text result'); 38 | 39 | if (customTest) customTest(result); 40 | } catch (e) { 41 | console.error(e); 42 | // parser.debug = true; 43 | // try { 44 | // // result = parser.getAST(txt, target); 45 | // console.log(txt + '\n' + inspect(result, false, 20, true)); 46 | // } catch (ee) { 47 | // console.(ee); 48 | // } 49 | // parser.debug = false; 50 | describeTree(result); 51 | throw e; 52 | } 53 | 54 | describeTree(result); 55 | }); 56 | } 57 | 58 | function printAST(token: IToken, level = 0) { 59 | console.log( 60 | ' ' + ' '.repeat(level) + `|-${token.type}${token.children.length == 0 ? '=' + token.text : ''}` 61 | ); 62 | token.children && 63 | token.children.forEach(c => { 64 | printAST(c, level + 1); 65 | }); 66 | } 67 | 68 | export function describeTree(token: IToken) { 69 | printAST(token); 70 | } 71 | -------------------------------------------------------------------------------- /test/StringLiteral.spec.ts: -------------------------------------------------------------------------------- 1 | declare var describe, it, require; 2 | 3 | import { Grammars, Parser } from '../dist'; 4 | import { findRuleByName } from '../dist/Parser'; 5 | import { testParseToken, testParseTokenFailsafe } from './TestHelpers'; 6 | 7 | let inspect = require('util').inspect; 8 | let expect = require('expect'); 9 | 10 | let grammar = ` 11 | 12 | Document ::= Keyword1 | Keyword2 13 | 14 | Keyword1 ::= 'And' | 'Or' 15 | Keyword2 ::= 'Not' | 'Is' {ignoreCase=true} 16 | 17 | `; 18 | 19 | describe('String Literals', () => { 20 | describe('Parse JSON', () => { 21 | let parser: Parser; 22 | 23 | it('create parser', () => { 24 | parser = new Parser(Grammars.Custom.RULES, {}); 25 | testParseToken(parser, grammar); 26 | }); 27 | }); 28 | 29 | describe('Grammars.Custom parses JSON grammar', function() { 30 | let RULES = Grammars.Custom.getRules(grammar); 31 | console.log('JSON:\n' + inspect(RULES, false, 20, true)); 32 | let parser = new Parser(RULES, {}); 33 | 34 | it('string literal case sensitive rule', () => { 35 | let rule = findRuleByName("Keyword1", parser); 36 | 37 | console.log(rule.bnf[0]); 38 | 39 | expect(rule.bnf[0][0]).toEqual(RegExp('A')); 40 | expect(rule.bnf[0][1]).toEqual(RegExp('n')); 41 | expect(rule.bnf[0][2]).toEqual(RegExp('d')); 42 | 43 | console.log(rule.bnf[1]); 44 | 45 | expect(rule.bnf[1][0]).toEqual(RegExp('O')); 46 | expect(rule.bnf[1][1]).toEqual(RegExp('r')); 47 | }); 48 | 49 | testParseTokenFailsafe(parser, 'And'); 50 | testParseTokenFailsafe(parser, 'Or'); 51 | 52 | it('string literal case sensitive rule - OR', () => { 53 | expect(parser.getAST('OR')).toEqual(null); 54 | }); 55 | 56 | it('string literal case insensitive rule', () => { 57 | let rule = findRuleByName("Keyword2", parser); 58 | 59 | console.log(rule.bnf[0]); 60 | 61 | expect(rule.bnf[0][0]).toEqual(RegExp('[Nn]')); 62 | expect(rule.bnf[0][1]).toEqual(RegExp('[Oo]')); 63 | expect(rule.bnf[0][2]).toEqual(RegExp('[Tt]')); 64 | 65 | console.log(rule.bnf[1]); 66 | 67 | expect(rule.bnf[1][0]).toEqual(RegExp('[Ii]')); 68 | expect(rule.bnf[1][1]).toEqual(RegExp('[Ss]')); 69 | }); 70 | 71 | testParseTokenFailsafe(parser, 'is'); 72 | testParseTokenFailsafe(parser, 'IS'); 73 | testParseTokenFailsafe(parser, 'NoT'); 74 | testParseTokenFailsafe(parser, 'not'); 75 | testParseTokenFailsafe(parser, 'NOT'); 76 | }); 77 | }); 78 | -------------------------------------------------------------------------------- /test/JSON2.spec.ts: -------------------------------------------------------------------------------- 1 | declare var describe, it, require; 2 | 3 | import { Grammars, Parser, IToken } from '../dist'; 4 | import { testParseToken, describeTree, printBNF } from './TestHelpers'; 5 | 6 | let inspect = require('util').inspect; 7 | 8 | let grammar = ` 9 | /* https://www.ietf.org/rfc/rfc4627.txt */ 10 | value ::= false | null | true | object | array | number | string 11 | BEGIN_ARRAY ::= WS* #x5B WS* /* [ left square bracket */ 12 | BEGIN_OBJECT ::= WS* #x7B WS* /* { left curly bracket */ 13 | END_ARRAY ::= WS* #x5D WS* /* ] right square bracket */ 14 | END_OBJECT ::= WS* #x7D WS* /* } right curly bracket */ 15 | NAME_SEPARATOR ::= WS* #x3A WS* /* : colon */ 16 | VALUE_SEPARATOR ::= WS* #x2C WS* /* , comma */ 17 | WS ::= [#x20#x09#x0A#x0D]+ /* Space | Tab | \n | \r */ 18 | false ::= "false" 19 | null ::= "null" 20 | true ::= "true" 21 | object ::= BEGIN_OBJECT (member (VALUE_SEPARATOR member)*)? END_OBJECT 22 | member ::= string NAME_SEPARATOR value 23 | array ::= BEGIN_ARRAY (value (VALUE_SEPARATOR value)*)? END_ARRAY 24 | 25 | number ::= "-"? ("0" | [1-9] [0-9]*) ("." [0-9]+)? (("e" | "E") ( "-" | "+" )? ("0" | [1-9] [0-9]*))? 26 | 27 | /* STRINGS */ 28 | 29 | string ::= '"' (([#x20-#x21] | [#x23-#x5B] | [#x5D-#xFFFF]) | #x5C (#x22 | #x5C | #x2F | #x62 | #x66 | #x6E | #x72 | #x74 | #x75 HEXDIG HEXDIG HEXDIG HEXDIG))* '"' 30 | HEXDIG ::= [a-fA-F0-9] 31 | `; 32 | 33 | describe('JSON 2', () => { 34 | describe('Parse JSON', () => { 35 | let parser: Parser; 36 | 37 | it('create parser', () => { 38 | parser = new Parser(Grammars.W3C.RULES, {}); 39 | testParseToken(parser, grammar); 40 | }); 41 | }); 42 | 43 | describe('Grammars.W3C parses JSON grammar', function() { 44 | let RULES = Grammars.W3C.getRules(grammar); 45 | // console.log('JSON:\n' + inspect(RULES, false, 20, true)); 46 | let parser = new Parser(RULES, {}); 47 | 48 | // printBNF(parser); 49 | 50 | testParseToken(parser, JSON.stringify(true)); 51 | testParseToken(parser, JSON.stringify(false)); 52 | testParseToken(parser, JSON.stringify(null)); 53 | testParseToken(parser, JSON.stringify('')); 54 | testParseToken(parser, JSON.stringify('"')); 55 | testParseToken(parser, JSON.stringify('"{}')); 56 | testParseToken(parser, JSON.stringify(10)); 57 | testParseToken(parser, JSON.stringify(-10)); 58 | testParseToken(parser, JSON.stringify(-10.1)); 59 | 60 | testParseToken(parser, JSON.stringify(10.1e123)); 61 | 62 | testParseToken(parser, JSON.stringify({})); 63 | testParseToken(parser, JSON.stringify({ a: true })); 64 | testParseToken(parser, JSON.stringify({ a: false })); 65 | 66 | testParseToken( 67 | parser, 68 | JSON.stringify({ 69 | a: false, 70 | b: `asd 71 | asd `, 72 | list: [1, 2, 3, true] 73 | }) 74 | ); 75 | 76 | testParseToken(parser, JSON.stringify([])); 77 | testParseToken(parser, JSON.stringify([{}])); 78 | testParseToken(parser, JSON.stringify([null, false])); 79 | }); 80 | }); 81 | -------------------------------------------------------------------------------- /test/JSON.spec.ts: -------------------------------------------------------------------------------- 1 | declare var describe, it, require; 2 | 3 | import { Grammars, Parser, IToken } from '../dist'; 4 | import { testParseToken, describeTree, printBNF } from './TestHelpers'; 5 | 6 | let inspect = require('util').inspect; 7 | 8 | let grammar = ` 9 | /* https://www.ietf.org/rfc/rfc4627.txt */ 10 | value ::= false | null | true | object | array | number | string 11 | BEGIN_ARRAY ::= WS* #x5B WS* /* [ left square bracket */ 12 | BEGIN_OBJECT ::= WS* #x7B WS* /* { left curly bracket */ 13 | END_ARRAY ::= WS* #x5D WS* /* ] right square bracket */ 14 | END_OBJECT ::= WS* #x7D WS* /* } right curly bracket */ 15 | NAME_SEPARATOR ::= WS* #x3A WS* /* : colon */ 16 | VALUE_SEPARATOR ::= WS* #x2C WS* /* , comma */ 17 | WS ::= [#x20#x09#x0A#x0D]+ /* Space | Tab | \n | \r */ 18 | false ::= "false" 19 | null ::= "null" 20 | true ::= "true" 21 | object ::= BEGIN_OBJECT (member (VALUE_SEPARATOR member)*)? END_OBJECT 22 | member ::= string NAME_SEPARATOR value 23 | array ::= BEGIN_ARRAY (value (VALUE_SEPARATOR value)*)? END_ARRAY 24 | 25 | /* NUMBERS */ 26 | 27 | number ::= "-"? ("0" | [1-9] DIGIT*) ("." [0-9]+)? EXP? 28 | DIGIT ::= [0-9] 29 | EXP ::= ("e" | "E") ( "-" | "+" )? ("0" | [1-9] [0-9]*) 30 | 31 | /* STRINGS */ 32 | 33 | string ::= '"' CHAR* '"' 34 | ESCAPE ::= #x5C /* \ */ 35 | HEXDIG ::= [a-fA-F0-9] 36 | ESCAPABLE ::= #x22 | #x5C | #x2F | #x62 | #x66 | #x6E | #x72 | #x74 | #x75 HEXDIG HEXDIG HEXDIG HEXDIG 37 | CHAR ::= UNESCAPED | ESCAPE ESCAPABLE 38 | UNESCAPED ::= [#x20-#x21] | [#x23-#x5B] | [#x5D-#xFFFF] 39 | `; 40 | 41 | describe('JSON', () => { 42 | describe('Parse JSON', () => { 43 | let parser: Parser; 44 | 45 | it('create parser', () => { 46 | parser = new Parser(Grammars.W3C.RULES, {}); 47 | testParseToken(parser, grammar); 48 | }); 49 | }); 50 | 51 | describe('Grammars.W3C parses JSON grammar', function() { 52 | let RULES = Grammars.W3C.getRules(grammar); 53 | 54 | let parser = new Parser(RULES, {debug: true}); 55 | 56 | testParseToken(parser, JSON.stringify(true)); 57 | testParseToken(parser, JSON.stringify(false)); 58 | testParseToken(parser, JSON.stringify(null)); 59 | testParseToken(parser, JSON.stringify('')); 60 | testParseToken(parser, JSON.stringify('"')); 61 | testParseToken(parser, JSON.stringify('"{}')); 62 | testParseToken(parser, JSON.stringify(10)); 63 | testParseToken(parser, JSON.stringify(-10)); 64 | testParseToken(parser, JSON.stringify(-10.1)); 65 | 66 | testParseToken(parser, JSON.stringify(10.1e123)); 67 | 68 | testParseToken(parser, JSON.stringify({})); 69 | testParseToken(parser, JSON.stringify({ a: true })); 70 | testParseToken(parser, JSON.stringify({ a: false })); 71 | 72 | testParseToken( 73 | parser, 74 | JSON.stringify({ 75 | a: false, 76 | b: `asd 77 | asd `, 78 | list: [1, 2, 3, true] 79 | }) 80 | ); 81 | 82 | testParseToken(parser, JSON.stringify([])); 83 | testParseToken(parser, JSON.stringify([{}])); 84 | testParseToken(parser, JSON.stringify([null, false])); 85 | }); 86 | }); 87 | -------------------------------------------------------------------------------- /test/WS.spec.ts: -------------------------------------------------------------------------------- 1 | declare var describe, it, require; 2 | 3 | import { Grammars, Parser, IToken } from '../dist'; 4 | import { testParseToken, describeTree, printBNF } from './TestHelpers'; 5 | 6 | let inspect = require('util').inspect; 7 | 8 | let grammar = ` 9 | { ws=implicit } 10 | /* https://www.ietf.org/rfc/rfc4627.txt */ 11 | value ::= false | null | true | object | array | number | string 12 | BEGIN_ARRAY ::= #x5B /* [ left square bracket */ 13 | BEGIN_OBJECT ::= #x7B /* { left curly bracket */ 14 | END_ARRAY ::= #x5D /* ] right square bracket */ 15 | END_OBJECT ::= #x7D /* } right curly bracket */ 16 | NAME_SEPARATOR ::= #x3A /* : colon */ 17 | VALUE_SEPARATOR ::= #x2C /* , comma */ 18 | WS ::= [#x20#x09#x0A#x0D]+ /* Space | Tab | \n | \r */ 19 | false ::= "false" 20 | null ::= "null" 21 | true ::= "true" 22 | object ::= BEGIN_OBJECT (member (VALUE_SEPARATOR member)*)? END_OBJECT 23 | member ::= string NAME_SEPARATOR value 24 | array ::= BEGIN_ARRAY (value (VALUE_SEPARATOR value)*)? END_ARRAY 25 | 26 | number ::= "-"? ("0" | [1-9] [0-9]*) ("." [0-9]+)? (("e" | "E") ( "-" | "+" )? ("0" | [1-9] [0-9]*))? {ws=explicit} 27 | 28 | /* STRINGS */ 29 | 30 | string ::= '"' (([#x20-#x21] | [#x23-#x5B] | [#x5D-#xFFFF]) | #x5C (#x22 | #x5C | #x2F | #x62 | #x66 | #x6E | #x72 | #x74 | #x75 HEXDIG HEXDIG HEXDIG HEXDIG))* '"' {ws=explicit} 31 | HEXDIG ::= [a-fA-F0-9] {ws=explicit} 32 | `; 33 | 34 | describe('WS', () => { 35 | describe('Parse JSON', () => { 36 | let parser: Parser; 37 | 38 | it('create parser', () => { 39 | parser = new Parser(Grammars.Custom.RULES, {}); 40 | testParseToken(parser, grammar); 41 | }); 42 | }); 43 | 44 | describe('Grammars.Custom parses JSON grammar', function() { 45 | let RULES = Grammars.Custom.getRules(grammar); 46 | // console.log('JSON:\n' + inspect(RULES, false, 20, true)); 47 | let parser = new Parser(RULES, {}); 48 | 49 | // printBNF(parser); 50 | 51 | testParseToken(parser, JSON.stringify(true, null, 2)); 52 | testParseToken(parser, JSON.stringify(false, null, 2)); 53 | testParseToken(parser, JSON.stringify(null, null, 2)); 54 | testParseToken(parser, JSON.stringify('', null, 2)); 55 | testParseToken(parser, JSON.stringify('"', null, 2)); 56 | testParseToken(parser, JSON.stringify('"{}', null, 2)); 57 | testParseToken(parser, JSON.stringify(10, null, 2)); 58 | testParseToken(parser, JSON.stringify(-10, null, 2)); 59 | testParseToken(parser, JSON.stringify(-10.1, null, 2)); 60 | 61 | testParseToken(parser, JSON.stringify(10.1e123, null, 2)); 62 | 63 | testParseToken(parser, JSON.stringify({}, null, 2)); 64 | testParseToken(parser, '{ "a": true }'); 65 | testParseToken(parser, JSON.stringify({ a: false }, null, 2)); 66 | 67 | testParseToken( 68 | parser, 69 | JSON.stringify( 70 | { 71 | a: false, 72 | b: `asd 73 | asd `, 74 | list: [1, 2, 3, true] 75 | }, 76 | null, 77 | 2 78 | ) 79 | ); 80 | 81 | testParseToken(parser, JSON.stringify([])); 82 | testParseToken(parser, JSON.stringify([{}], null, 2)); 83 | testParseToken(parser, JSON.stringify([null, false], null, 2)); 84 | }); 85 | }); 86 | -------------------------------------------------------------------------------- /test/BNF.spec.ts: -------------------------------------------------------------------------------- 1 | declare var describe, it, require; 2 | 3 | import { Grammars, Parser, IToken } from '../dist'; 4 | import { testParseToken, describeTree, printBNF } from './TestHelpers'; 5 | 6 | let inspect = require('util').inspect; 7 | 8 | let lexer = Grammars.BNF.RULES; 9 | let parser = new Parser(Grammars.BNF.RULES, {}); 10 | 11 | describe('Parse BNF', () => { 12 | let lisp = ` 13 | ::= | | | 14 | ::= | "(" "." ")" | 15 | ::= | 16 | ::= | "" 17 | ::= " " | | " " | 18 | ::= "(" ")" 19 | ::= "+" | "*" | "=" | "/" | | 20 | ::= | | | 21 | ::= "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" 22 | ::= "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" | "0" 23 | `; 24 | 25 | let lispParser: Parser; 26 | 27 | it('creates a LISP parser', () => { 28 | lispParser = new Grammars.BNF.Parser(lisp); 29 | 30 | printBNF(lispParser); 31 | }); 32 | 33 | lispParser = new Grammars.BNF.Parser(lisp); 34 | 35 | testParseToken(lispParser, 'test'); 36 | testParseToken(lispParser, '(test a)'); 37 | }); 38 | 39 | describe('Parse custom calculator', () => { 40 | let calc = ` 41 | ::= 42 | ::= | 43 | ::= "(" ")" | "(" ")" | 44 | ::= 45 | 46 | ::= | | 47 | ::= "+" | "-" | "*" | "/" | "^" 48 | 49 | ::= | 50 | ::= "-" 51 | ::= "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" 52 | ::= "0" | 53 | ::= | "" 54 | ::= " " | | " " | 55 | `; 56 | 57 | let calcuParser: Parser; 58 | 59 | it('creates a calculator parser', () => { 60 | calcuParser = new Grammars.BNF.Parser(calc, {debug: false}); 61 | 62 | printBNF(calcuParser); 63 | }); 64 | 65 | calcuParser = new Grammars.BNF.Parser(calc, {debug: false}); 66 | 67 | testParseToken(calcuParser, '1'); 68 | testParseToken(calcuParser, '0'); 69 | testParseToken(calcuParser, '(1)'); 70 | testParseToken(calcuParser, '-122 + 2'); 71 | testParseToken(calcuParser, '(2 + 212312)'); 72 | testParseToken(calcuParser, '(2123 + 23332) * 11312'); 73 | testParseToken(calcuParser, '(2 + 2) * (5)'); 74 | testParseToken(calcuParser, '(2 + (2 * -123)) * 5332'); 75 | }); 76 | -------------------------------------------------------------------------------- /test/ATL.spec.ts: -------------------------------------------------------------------------------- 1 | declare var describe, it, require; 2 | 3 | import { Grammars, Parser, IToken } from '../dist'; 4 | import { testParseToken, describeTree, printBNF } from './TestHelpers'; 5 | 6 | let inspect = require('util').inspect; 7 | 8 | let grammar = ` 9 | { ws=explicit } 10 | Expression ::= FunctionCall | Literal | Selector | Interpolation {ws=implicit} 11 | Literal ::= Boolean | Null | String | Number | Integer | Date | Regex {ws=implicit} 12 | FunctionCall ::= Identifier FunctionArguments {ws=implicit} 13 | FunctionArguments ::= "(" ( Expression (WS* "," WS* Expression)* )? ")" { pin=1, ws=implicit } 14 | Selector ::= Identifier 15 | 16 | Identifier ::= RULE_UNQUOTED_STRING 17 | 18 | STRING_CONTENT ::= "\\\\$" | [^$"#x0000-#x001F] 19 | JSON_STRING ::= '\\"' | [^#x0000-#x001F"] 20 | RULE_UNQUOTED_STRING ::= [A-Za-z_][A-Za-z0-9_]* 21 | RULE_ANY_REGEX ::= "/" ( '\\/' | [^/] )* "/" 22 | NOT_PIPE ::= !"|" STRING_CONTENT 23 | 24 | Date ::= '|' NOT_PIPE+ '|' {pin=1} 25 | 26 | Regex ::= RULE_ANY_REGEX [gim]* 27 | 28 | WS ::= [#x20#x09#x0A#x0D]+ 29 | 30 | Interpolation ::= '"' InterpolationPart* '"' 31 | InterpolationPart ::= InterpolationExpression | InterpolationVariable | InterpolationLiteral {fragment=true} 32 | InterpolationLiteral ::= STRING_CONTENT* 33 | InterpolationVariable ::= "$" Identifier {pin=1,fragment=true} 34 | InterpolationExpression ::= "$(" Expression ")" {pin=1,fragment=true} 35 | 36 | Boolean ::= ("true" | "false") !RULE_UNQUOTED_STRING 37 | Null ::= "null" !RULE_UNQUOTED_STRING 38 | Number ::= "-"? ("0" | [1-9] [0-9]*) ("." [0-9]+)? (("e" | "E") ( "-" | "+" )? ("0" | [1-9] [0-9]*))? 39 | Integer ::= "0" | [1-9] [0-9]* 40 | 41 | String ::= '"' CHAR* '"' 42 | ESCAPE ::= #x5C /* \\ */ 43 | HEXDIG ::= [a-fA-F0-9] 44 | ESCAPABLE ::= #x22 | #x5C | #x2F | #x62 | #x66 | #x6E | #x72 | #x74 | #x75 HEXDIG HEXDIG HEXDIG HEXDIG 45 | CHAR ::= !"$" UNESCAPED | ESCAPE ESCAPABLE 46 | UNESCAPED ::= [#x20-#x21] | [#x23-#x5B] | [#x5D-#xFFFF] 47 | `; 48 | 49 | describe('ATL', () => { 50 | describe('Grammars.Custom parses ATL Expressions', function() { 51 | let RULES = Grammars.Custom.getRules(grammar); 52 | // console.log('JSON:\n' + inspect(RULES, false, 20, true)); 53 | let parser = new Parser(RULES, {}); 54 | 55 | // printBNF(parser); 56 | 57 | testParseToken(parser, JSON.stringify(true, null, 2)); 58 | 59 | testParseToken(parser, '"$(var)"', 'Interpolation'); 60 | testParseToken(parser, '"$var"', 'Interpolation'); 61 | testParseToken(parser, '"asd"', 'Interpolation'); 62 | testParseToken(parser, '"asd$var"', 'Interpolation'); 63 | testParseToken(parser, '"asd$(true)"', 'Interpolation'); 64 | 65 | testParseToken(parser, 'true', 'Literal'); 66 | testParseToken(parser, '123', 'Literal'); 67 | testParseToken(parser, '"asd"', 'Literal'); 68 | testParseToken(parser, 'null', 'Literal'); 69 | testParseToken(parser, '|2016-01-01|', 'Literal'); 70 | 71 | testParseToken(parser, 'true', 'Expression'); 72 | testParseToken(parser, '123', 'Expression'); 73 | testParseToken(parser, '"asd"', 'Expression'); 74 | testParseToken(parser, 'null', 'Expression'); 75 | testParseToken(parser, '|2016-01-01|', 'Expression'); 76 | 77 | testParseToken(parser, 'teta()', 'FunctionCall'); 78 | testParseToken(parser, 'teta(a)', 'FunctionCall'); 79 | testParseToken(parser, 'teta(a, b)', 'FunctionCall'); 80 | testParseToken(parser, 'teta(a, b)', 'Expression'); 81 | testParseToken( 82 | parser, 83 | 'teta( a , b, "text", |2016-10-01|, null, false, "agus$interpolation", "string", "str\\$ing")', 84 | 'Expression' 85 | ); 86 | testParseToken(parser, 'teta(a(b()))', 'Expression'); 87 | testParseToken(parser, 'teta(a(b()))', 'FunctionCall'); 88 | testParseToken(parser, '"agus"', 'Expression'); 89 | testParseToken(parser, '"agus$interpolation"', 'Expression'); 90 | testParseToken(parser, '"$interpolation"', 'Expression'); 91 | testParseToken(parser, '"agus$interpolation"', 'Interpolation'); 92 | testParseToken(parser, '"$interpolation"', 'Interpolation'); 93 | testParseToken(parser, '"agus\\$interpolation"', 'Expression'); 94 | testParseToken(parser, '"\\$interpolation"', 'Expression'); 95 | 96 | testParseToken(parser, '"$(var)"'); 97 | testParseToken(parser, '"$var"'); 98 | testParseToken(parser, '"asd"'); 99 | testParseToken(parser, '"asd$var"'); 100 | testParseToken(parser, '"asd$(true)"'); 101 | 102 | testParseToken(parser, 'true'); 103 | testParseToken(parser, '123'); 104 | testParseToken(parser, '"asd"'); 105 | testParseToken(parser, 'null'); 106 | testParseToken(parser, '|2016-01-01|'); 107 | 108 | testParseToken(parser, 'true'); 109 | testParseToken(parser, '123'); 110 | testParseToken(parser, '"asd"'); 111 | testParseToken(parser, 'null'); 112 | testParseToken(parser, '|2016-01-01|'); 113 | 114 | testParseToken(parser, 'teta()'); 115 | testParseToken(parser, 'teta(a)'); 116 | testParseToken(parser, 'teta(a, b)'); 117 | testParseToken(parser, 'teta(a, b)'); 118 | testParseToken( 119 | parser, 120 | 'teta( a , b, "text", |2016-10-01|, null, false, "agus$interpolation", "string", "str\\$ing")' 121 | ); 122 | testParseToken(parser, 'teta(a(b()))'); 123 | testParseToken(parser, 'teta(a(b()))'); 124 | testParseToken(parser, '"agus"'); 125 | testParseToken(parser, '"agus$interpolation"'); 126 | testParseToken(parser, '"$interpolation"'); 127 | testParseToken(parser, '"agus$interpolation"'); 128 | testParseToken(parser, '"$interpolation"'); 129 | testParseToken(parser, '"agus\\$interpolation"'); 130 | testParseToken( 131 | parser, 132 | 'teta("hola $(false) $dia $(a(a))", a , b, "t$ext", |2016-10-01|, null, false, "agus$interpolation", "string", "str$ing")' 133 | ); 134 | }); 135 | }); 136 | -------------------------------------------------------------------------------- /test/JSONRecovery.spec.ts: -------------------------------------------------------------------------------- 1 | declare var describe, it, require; 2 | 3 | import { Grammars, Parser, IToken } from '../dist'; 4 | import { testParseTokenFailsafe, describeTree, printBNF, testParseToken } from './TestHelpers'; 5 | 6 | let inspect = require('util').inspect; 7 | let expect = require('expect'); 8 | 9 | let grammar = ` 10 | /* https://www.ietf.org/rfc/rfc4627.txt */ 11 | value ::= false | null | true | object | number | string | array 12 | BEGIN_ARRAY ::= WS* #x5B WS* /* [ left square bracket */ 13 | BEGIN_OBJECT ::= WS* #x7B WS* /* { left curly bracket */ 14 | END_ARRAY ::= WS* #x5D WS* /* ] right square bracket */ 15 | END_OBJECT ::= WS* #x7D WS* /* } right curly bracket */ 16 | NAME_SEPARATOR ::= WS* #x3A WS* /* : colon */ 17 | VALUE_SEPARATOR ::= WS* #x2C WS* /* , comma */ 18 | WS ::= [#x20#x09#x0A#x0D]+ /* Space | Tab | \n | \r */ 19 | false ::= "false" 20 | null ::= "null" 21 | true ::= "true" 22 | object ::= BEGIN_OBJECT object_content? END_OBJECT { pin=1 } 23 | object_content ::= (member (object_n)*) { recoverUntil=OBJECT_RECOVERY } 24 | object_n ::= VALUE_SEPARATOR member { recoverUntil=OBJECT_RECOVERY,fragment=true } 25 | Key ::= &(WS* '"') string { recoverUntil=VALUE_SEPARATOR, pin=1 } 26 | OBJECT_RECOVERY ::= END_OBJECT | VALUE_SEPARATOR 27 | ARRAY_RECOVERY ::= END_ARRAY | VALUE_SEPARATOR 28 | MEMBER_RECOVERY ::= '"' | NAME_SEPARATOR | OBJECT_RECOVERY | VALUE_SEPARATOR 29 | member ::= Key NAME_SEPARATOR value { recoverUntil=MEMBER_RECOVERY, pin=2 } 30 | array ::= BEGIN_ARRAY array_content? END_ARRAY { pin=1 } 31 | array_content ::= array_value (VALUE_SEPARATOR array_value)* { recoverUntil=ARRAY_RECOVERY,fragment=true } 32 | array_value ::= value { recoverUntil=ARRAY_RECOVERY, fragment=true } 33 | 34 | number ::= "-"? ("0" | [1-9] [0-9]*) ("." [0-9]+)? (("e" | "E") ( "-" | "+" )? ("0" | [1-9] [0-9]*))? { pin=2 } 35 | 36 | /* STRINGS */ 37 | 38 | string ::= ~'"' (([#x20-#x21] | [#x23-#x5B] | [#x5D-#xFFFF]) | #x5C (#x22 | #x5C | #x2F | #x62 | #x66 | #x6E | #x72 | #x74 | #x75 HEXDIG HEXDIG HEXDIG HEXDIG))* '"' 39 | HEXDIG ::= [a-fA-F0-9] 40 | `; 41 | 42 | describe('JSON 2', () => { 43 | describe('Parse JSON', () => { 44 | let parser: Parser; 45 | 46 | it('create parser', () => { 47 | printBNF(Grammars.Custom.defaultParser); 48 | // console.dir(Grammars.Custom.getRules(grammar)); 49 | }); 50 | }); 51 | 52 | describe('Grammars.Custom parses JSON grammar', function() { 53 | let parser = new Grammars.Custom.Parser(grammar, {}); 54 | 55 | // printBNF(parser); 56 | 57 | testParseTokenFailsafe(parser, '{"b": ZZZZ}', null, doc => { 58 | expect(doc.errors.length).toEqual(1); 59 | expect(doc.errors[0].token.type).toEqual('SyntaxError'); 60 | expect(doc.errors[0].token.text).toEqual('ZZZZ'); 61 | }); 62 | 63 | testParseTokenFailsafe(parser, '[ZZZZ]', null, doc => { 64 | expect(doc.errors.length).toEqual(1); 65 | expect(doc.errors[0].token.type).toEqual('SyntaxError'); 66 | expect(doc.errors[0].token.text).toEqual('ZZZZ'); 67 | }); 68 | 69 | testParseTokenFailsafe(parser, '[ZZZZ', null, doc => { 70 | expect(doc.errors.length).toEqual(1); 71 | expect(doc.children[0].type).toEqual('array'); 72 | expect(doc.errors[0].token.type).toEqual('SyntaxError'); 73 | expect(doc.errors[0].token.text).toEqual('ZZZZ'); 74 | }); 75 | 76 | testParseTokenFailsafe(parser, '{"s": true', null, doc => { 77 | expect(doc.errors.length).toEqual(1); 78 | expect(doc.children[0].type).toEqual('object'); 79 | expect(doc.errors[0].token.type).toEqual('SyntaxError'); 80 | expect(doc.errors[0].token.text).toEqual(''); 81 | }); 82 | 83 | testParseTokenFailsafe(parser, '{"s": true, ZZZZ', null, doc => { 84 | expect(doc.errors.length).toEqual(1); 85 | expect(doc.children[0].type).toEqual('object'); 86 | expect(doc.errors[0].token.type).toEqual('SyntaxError'); 87 | expect(doc.errors[0].token.text).toEqual(', ZZZZ'); 88 | }); 89 | 90 | testParseTokenFailsafe(parser, '{"s": true, ZZZZ, "b": false', null, doc => { 91 | expect(doc.errors.length).toEqual(2); 92 | expect(doc.children[0].type).toEqual('object'); 93 | expect(doc.errors[0].token.type).toEqual('SyntaxError'); 94 | expect(doc.errors[0].token.text).toEqual('ZZZZ'); 95 | }); 96 | 97 | testParseTokenFailsafe(parser, '[1, ZZZZ]', null, doc => { 98 | expect(doc.errors.length).toEqual(1); 99 | expect(doc.errors[0].token.type).toEqual('SyntaxError'); 100 | expect(doc.errors[0].token.text).toEqual('ZZZZ'); 101 | }); 102 | 103 | testParseTokenFailsafe(parser, '[1, ZZZZ, 1]', null, doc => { 104 | expect(doc.errors.length).toEqual(1); 105 | expect(doc.errors[0].token.type).toEqual('SyntaxError'); 106 | expect(doc.errors[0].token.text).toEqual('ZZZZ'); 107 | }); 108 | 109 | testParseTokenFailsafe(parser, '[ZZZZ, 1]', null, doc => { 110 | expect(doc.errors.length).toEqual(1); 111 | expect(doc.errors[0].token.type).toEqual('SyntaxError'); 112 | expect(doc.errors[0].token.text).toEqual('ZZZZ'); 113 | }); 114 | 115 | testParseTokenFailsafe(parser, '{"b": ZZZZ, "c": true}', null, doc => { 116 | expect(doc.errors.length).toEqual(1); 117 | expect(doc.errors[0].token.type).toEqual('SyntaxError'); 118 | expect(doc.errors[0].token.text).toEqual('ZZZZ'); 119 | }); 120 | 121 | testParseTokenFailsafe(parser, '{"a":false,"b": ZZZZ,"list":[1,2,3,true]}', null, doc => { 122 | expect(doc.errors.length).toEqual(1); 123 | expect(doc.errors[0].token.type).toEqual('SyntaxError'); 124 | expect(doc.errors[0].token.text).toEqual('ZZZZ'); 125 | }); 126 | }); 127 | }); 128 | -------------------------------------------------------------------------------- /test/Lookahead.spec.ts: -------------------------------------------------------------------------------- 1 | declare var describe, it, require; 2 | 3 | import { Grammars, Parser, IToken } from '../dist'; 4 | import { testParseToken, describeTree, printBNF } from './TestHelpers'; 5 | 6 | let inspect = require('util').inspect; 7 | let expect = require('expect'); 8 | 9 | describe('Lookahead Negative', () => { 10 | let parser = new Grammars.Custom.Parser( 11 | ` 12 | Document ::= ((Boolean | IdentifieR) " "*)+ 13 | IdentifieR ::= [a-zA-Z]+ 14 | Boolean ::= ("true" | "false") !IdentifieR 15 | `, 16 | {} 17 | ); 18 | 19 | // printBNF(parser); 20 | 21 | testParseToken(parser, 'true', 'Boolean', doc => { 22 | expect(doc.type).toEqual('Boolean'); 23 | }); 24 | testParseToken(parser, 'false', 'Boolean', doc => { 25 | expect(doc.type).toEqual('Boolean'); 26 | }); 27 | 28 | testParseToken(parser, 'keyword', null, doc => { 29 | expect(doc.children[0].type).toEqual('IdentifieR'); 30 | }); 31 | 32 | testParseToken(parser, 'true', null, doc => { 33 | expect(doc.children[0].type).toEqual('Boolean'); 34 | }); 35 | testParseToken(parser, 'false', null, doc => { 36 | expect(doc.children[0].type).toEqual('Boolean'); 37 | }); 38 | testParseToken(parser, 'trueAAA', null, doc => { 39 | expect(doc.children[0].type).toEqual('IdentifieR'); 40 | }); 41 | testParseToken(parser, 'falseaAAA', null, doc => { 42 | expect(doc.children[0].type).toEqual('IdentifieR'); 43 | }); 44 | 45 | testParseToken(parser, 'keyword a', null, doc => { 46 | expect(doc.children[0].type).toEqual('IdentifieR'); 47 | expect(doc.children[1].type).toEqual('IdentifieR'); 48 | }); 49 | testParseToken(parser, 'true a', null, doc => { 50 | expect(doc.children[0].type).toEqual('Boolean'); 51 | expect(doc.children[1].type).toEqual('IdentifieR'); 52 | }); 53 | testParseToken(parser, 'false a', null, doc => { 54 | expect(doc.children[0].type).toEqual('Boolean'); 55 | expect(doc.children[1].type).toEqual('IdentifieR'); 56 | }); 57 | testParseToken(parser, 'trueAAA a', null, doc => { 58 | expect(doc.children[0].type).toEqual('IdentifieR'); 59 | expect(doc.children[1].type).toEqual('IdentifieR'); 60 | }); 61 | testParseToken(parser, 'falseaAAA a', null, doc => { 62 | expect(doc.children[0].type).toEqual('IdentifieR'); 63 | expect(doc.children[1].type).toEqual('IdentifieR'); 64 | }); 65 | testParseToken(parser, 'falseaAAA a', null, doc => { 66 | expect(doc.children[0].type).toEqual('IdentifieR'); 67 | expect(doc.children[1].type).toEqual('IdentifieR'); 68 | }); 69 | }); 70 | 71 | describe('Lookahead Positive', () => { 72 | let parser = new Grammars.Custom.Parser( 73 | ` 74 | Document ::= ((Boolean | IdentifieR) " "*)+ 75 | IdentifieR ::= [a-zA-Z]+ 76 | Boolean ::= ("true" | "false") &Eol 77 | Eol ::= " " | Eof 78 | Eof ::= EOF 79 | `, 80 | {} 81 | ); 82 | 83 | // printBNF(parser); 84 | 85 | testParseToken(parser, 'true', 'Boolean', doc => { 86 | expect(doc.type).toEqual('Boolean'); 87 | }); 88 | testParseToken(parser, 'false', 'Boolean', doc => { 89 | expect(doc.type).toEqual('Boolean'); 90 | }); 91 | 92 | testParseToken(parser, 'keyword', null, doc => { 93 | expect(doc.children[0].type).toEqual('IdentifieR'); 94 | }); 95 | 96 | testParseToken(parser, 'true', null, doc => { 97 | expect(doc.children[0].type).toEqual('Boolean'); 98 | }); 99 | testParseToken(parser, 'false', null, doc => { 100 | expect(doc.children[0].type).toEqual('Boolean'); 101 | }); 102 | testParseToken(parser, 'trueAAA', null, doc => { 103 | expect(doc.children[0].type).toEqual('IdentifieR'); 104 | }); 105 | testParseToken(parser, 'falseaAAA', null, doc => { 106 | expect(doc.children[0].type).toEqual('IdentifieR'); 107 | }); 108 | 109 | testParseToken(parser, 'keyword a', null, doc => { 110 | expect(doc.children[0].type).toEqual('IdentifieR'); 111 | expect(doc.children[1].type).toEqual('IdentifieR'); 112 | }); 113 | testParseToken(parser, 'true a', null, doc => { 114 | expect(doc.children[0].type).toEqual('Boolean'); 115 | expect(doc.children[1].type).toEqual('IdentifieR'); 116 | }); 117 | testParseToken(parser, 'false a', null, doc => { 118 | expect(doc.children[0].type).toEqual('Boolean'); 119 | expect(doc.children[1].type).toEqual('IdentifieR'); 120 | }); 121 | testParseToken(parser, 'trueAAA a', null, doc => { 122 | expect(doc.children[0].type).toEqual('IdentifieR'); 123 | expect(doc.children[1].type).toEqual('IdentifieR'); 124 | }); 125 | testParseToken(parser, 'falseaAAA a', null, doc => { 126 | expect(doc.children[0].type).toEqual('IdentifieR'); 127 | expect(doc.children[1].type).toEqual('IdentifieR'); 128 | }); 129 | testParseToken(parser, 'falseaAAA a', null, doc => { 130 | expect(doc.children[0].type).toEqual('IdentifieR'); 131 | expect(doc.children[1].type).toEqual('IdentifieR'); 132 | }); 133 | }); 134 | 135 | describe('Lookahead Positive', () => { 136 | let parser = new Grammars.Custom.Parser( 137 | ` 138 | Document ::= ((FunctionName | Identifier | Parenthesis) " "*)+ 139 | Identifier ::= [a-zA-Z_]+ 140 | FunctionName ::= Identifier &"(" 141 | Parenthesis ::= "(" ( !")" [.])* ")" 142 | `, 143 | {} 144 | ); 145 | 146 | testParseToken(parser, '()', null, doc => { 147 | expect(doc.children[0].type).toEqual('Parenthesis'); 148 | }); 149 | testParseToken(parser, 'hola', null, doc => { 150 | expect(doc.children[0].type).toEqual('Identifier'); 151 | }); 152 | testParseToken(parser, 'hola()', null, doc => { 153 | expect(doc.children[0].type).toEqual('FunctionName'); 154 | expect(doc.children[1].type).toEqual('Parenthesis'); 155 | }); 156 | }); 157 | 158 | // describe('Empty', () => { 159 | // let parser = new Grammars.W3C.Parser(` 160 | // TextAndEmpty ::= "hi" "" 161 | // `, {}); 162 | 163 | // console.log('TextAndEmpty:\n' + inspect(parser.grammarRules, false, 20, true)); 164 | 165 | // printBNF(parser); 166 | 167 | // parser.debug = true; 168 | 169 | // testParseToken(parser, 'hi'); 170 | // }); 171 | -------------------------------------------------------------------------------- /src/Grammars/BNF.ts: -------------------------------------------------------------------------------- 1 | // https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_Form 2 | 3 | /* 4 | syntax ::= RULE_EOL* rule+ 5 | rule ::= " "* "<" rule-name ">" " "* "::=" firstExpression otherExpression* " "* RULE_EOL+ " "* 6 | firstExpression ::= " "* list 7 | otherExpression ::= " "* "|" " "* list 8 | RULE_EOL ::= "\r" | "\n" 9 | list ::= term " "* list | term 10 | term ::= literal | "<" rule-name ">" 11 | literal ::= '"' RULE_CHARACTER1* '"' | "'" RULE_CHARACTER2* "'" 12 | RULE_CHARACTER ::= " " | RULE_LETTER | RULE_DIGIT | RULE_SYMBOL 13 | RULE_LETTER ::= "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" | "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" 14 | RULE_DIGIT ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" 15 | RULE_SYMBOL ::= "-" | "_" | "!" | "#" | "$" | "%" | "&" | "(" | ")" | "*" | "+" | "," | "-" | "." | "/" | ":" | ";" | "<" | "=" | ">" | "?" | "@" | "[" | "\" | "]" | "^" | "_" | "`" | "{" | "|" | "}" | "~" 16 | RULE_CHARACTER1 ::= RULE_CHARACTER | "'" 17 | RULE_CHARACTER2 ::= RULE_CHARACTER | '"' 18 | rule-name ::= RULE_LETTER RULE_CHAR* 19 | RULE_CHAR ::= RULE_LETTER | RULE_DIGIT | "_" | "-" 20 | */ 21 | 22 | import { findChildrenByType } from '../SemanticHelpers'; 23 | 24 | import { IRule, Parser as _Parser, IToken } from '../Parser'; 25 | import { IGrammarParserOptions } from './types'; 26 | 27 | namespace BNF { 28 | export const RULES: IRule[] = [ 29 | { 30 | name: 'syntax', 31 | bnf: [['RULE_EOL*', 'rule+']] 32 | }, 33 | { 34 | name: 'rule', 35 | bnf: [ 36 | [ 37 | '" "*', 38 | '"<"', 39 | 'rule-name', 40 | '">"', 41 | '" "*', 42 | '"::="', 43 | 'firstExpression', 44 | 'otherExpression*', 45 | '" "*', 46 | 'RULE_EOL+', 47 | '" "*' 48 | ] 49 | ] 50 | }, 51 | { 52 | name: 'firstExpression', 53 | bnf: [['" "*', 'list']] 54 | }, 55 | { 56 | name: 'otherExpression', 57 | bnf: [['" "*', '"|"', '" "*', 'list']] 58 | }, 59 | { 60 | name: 'RULE_EOL', 61 | bnf: [['"\\r"'], ['"\\n"']] 62 | }, 63 | { 64 | name: 'list', 65 | bnf: [['term', '" "*', 'list'], ['term']] 66 | }, 67 | { 68 | name: 'term', 69 | bnf: [['literal'], ['"<"', 'rule-name', '">"']] 70 | }, 71 | { 72 | name: 'literal', 73 | bnf: [[`'"'`, 'RULE_CHARACTER1*', `'"'`], [`"'"`, 'RULE_CHARACTER2*', `"'"`]] 74 | }, 75 | { 76 | name: 'RULE_CHARACTER', 77 | bnf: [['" "'], ['RULE_LETTER'], ['RULE_DIGIT'], ['RULE_SYMBOL']] 78 | }, 79 | { 80 | name: 'RULE_LETTER', 81 | bnf: [ 82 | ['"A"'], 83 | ['"B"'], 84 | ['"C"'], 85 | ['"D"'], 86 | ['"E"'], 87 | ['"F"'], 88 | ['"G"'], 89 | ['"H"'], 90 | ['"I"'], 91 | ['"J"'], 92 | ['"K"'], 93 | ['"L"'], 94 | ['"M"'], 95 | ['"N"'], 96 | ['"O"'], 97 | ['"P"'], 98 | ['"Q"'], 99 | ['"R"'], 100 | ['"S"'], 101 | ['"T"'], 102 | ['"U"'], 103 | ['"V"'], 104 | ['"W"'], 105 | ['"X"'], 106 | ['"Y"'], 107 | ['"Z"'], 108 | ['"a"'], 109 | ['"b"'], 110 | ['"c"'], 111 | ['"d"'], 112 | ['"e"'], 113 | ['"f"'], 114 | ['"g"'], 115 | ['"h"'], 116 | ['"i"'], 117 | ['"j"'], 118 | ['"k"'], 119 | ['"l"'], 120 | ['"m"'], 121 | ['"n"'], 122 | ['"o"'], 123 | ['"p"'], 124 | ['"q"'], 125 | ['"r"'], 126 | ['"s"'], 127 | ['"t"'], 128 | ['"u"'], 129 | ['"v"'], 130 | ['"w"'], 131 | ['"x"'], 132 | ['"y"'], 133 | ['"z"'] 134 | ] 135 | }, 136 | { 137 | name: 'RULE_DIGIT', 138 | bnf: [['"0"'], ['"1"'], ['"2"'], ['"3"'], ['"4"'], ['"5"'], ['"6"'], ['"7"'], ['"8"'], ['"9"']] 139 | }, 140 | { 141 | name: 'RULE_SYMBOL', 142 | bnf: [ 143 | ['"-"'], 144 | ['"_"'], 145 | ['"!"'], 146 | ['"#"'], 147 | ['"$"'], 148 | ['"%"'], 149 | ['"&"'], 150 | ['"("'], 151 | ['")"'], 152 | ['"*"'], 153 | ['"+"'], 154 | ['","'], 155 | ['"-"'], 156 | ['"."'], 157 | ['"/"'], 158 | ['":"'], 159 | ['";"'], 160 | ['"<"'], 161 | ['"="'], 162 | ['">"'], 163 | ['"?"'], 164 | ['"@"'], 165 | ['"["'], 166 | ['"\\"'], 167 | ['"]"'], 168 | ['"^"'], 169 | ['"_"'], 170 | ['"`"'], 171 | ['"{"'], 172 | ['"|"'], 173 | ['"}"'], 174 | ['"~"'] 175 | ] 176 | }, 177 | { 178 | name: 'RULE_CHARACTER1', 179 | bnf: [['RULE_CHARACTER'], [`"'"`]] 180 | }, 181 | { 182 | name: 'RULE_CHARACTER2', 183 | bnf: [['RULE_CHARACTER'], [`'"'`]] 184 | }, 185 | { 186 | name: 'rule-name', 187 | bnf: [['RULE_LETTER', 'RULE_CHAR*']] 188 | }, 189 | { 190 | name: 'RULE_CHAR', 191 | bnf: [['RULE_LETTER'], ['RULE_DIGIT'], ['"_"'], ['"-"']] 192 | } 193 | ]; 194 | 195 | export const defaultParser = new _Parser(RULES, { debug: false }); 196 | 197 | function getAllTerms(expr: IToken): string[] { 198 | let terms = findChildrenByType(expr, 'term').map(term => { 199 | return findChildrenByType(term, 'literal').concat(findChildrenByType(term, 'rule-name'))[0].text; 200 | }); 201 | 202 | findChildrenByType(expr, 'list').forEach(expr => { 203 | terms = terms.concat(getAllTerms(expr)); 204 | }); 205 | 206 | return terms; 207 | } 208 | 209 | export function getRules(source: string, parser: _Parser = defaultParser): IRule[] { 210 | let ast = parser.getAST(source); 211 | 212 | if (!ast) throw new Error('Could not parse ' + source); 213 | 214 | if (ast.errors && ast.errors.length) { 215 | throw ast.errors[0]; 216 | } 217 | 218 | let rules = findChildrenByType(ast, 'rule'); 219 | 220 | let ret = rules.map((rule): IRule => { 221 | let name = findChildrenByType(rule, 'rule-name')[0].text; 222 | 223 | let expressions = findChildrenByType(rule, 'firstExpression').concat(findChildrenByType(rule, 'otherExpression')); 224 | 225 | let bnf = []; 226 | 227 | expressions.forEach(expr => { 228 | bnf.push(getAllTerms(expr)); 229 | }); 230 | 231 | return { 232 | name: name, 233 | bnf 234 | }; 235 | }); 236 | 237 | if (!ret.some(x => x.name == 'EOL')) { 238 | ret.push({ 239 | name: 'EOL', 240 | bnf: [['"\\r\\n"', '"\\r"', '"\\n"']] 241 | }); 242 | } 243 | 244 | return ret; 245 | } 246 | 247 | export function Transform(source: TemplateStringsArray, subParser: _Parser = defaultParser): IRule[] { 248 | return getRules(source.join(''), subParser); 249 | } 250 | 251 | export class Parser extends _Parser { 252 | private readonly source: string; 253 | constructor(source: string, options?: Partial) { 254 | const subParser = options && options.debugRulesParser === true ? new _Parser(BNF.RULES, { debug: true }) : defaultParser; 255 | super(getRules(source, subParser), options); 256 | this.source = source; 257 | } 258 | 259 | emitSource(): string { 260 | return this.source; 261 | } 262 | } 263 | } 264 | 265 | export default BNF; 266 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Coverage Status](https://coveralls.io/repos/github/menduz/node-ebnf/badge.svg?branch=master)](https://coveralls.io/github/menduz/node-ebnf?branch=master) 2 | [![Build Status](https://travis-ci.org/lys-lang/node-ebnf.svg?branch=master)](https://travis-ci.org/lys-lang/node-ebnf) 3 | 4 | # What is this? 5 | 6 | It parses a formal grammar and returns a parser instance. That parser returns ASTs based on your grammar. [**SEE IT IN ACTION HERE**](http://menduz.com/ebnf-highlighter/) [GitHub source](https://github.com/menduz/ebnf-highlighter) 7 | 8 | # Install 9 | 10 | `npm i --save ebnf` (It's compatible with WebPack, and Browserify) 11 | 12 | # Usage 13 | 14 | By the moment we only accept two grammars. [BNF](https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_Form) and [W3C EBNF](http://www.w3.org/TR/xquery/#EBNFNotation) (compatible with [Railroad Diagram Generator](http://www.bottlecaps.de/rr/ui)) 15 | 16 | ## Create a parser 17 | 18 | ```typescript 19 | import { Grammars } from 'ebnf'; 20 | 21 | let bnfParser = new Grammars.BNF.Parser(bnfGrammar); 22 | let w3cParser = new Grammars.W3C.Parser(railRoadGeneratorGrammar); 23 | ``` 24 | 25 | [**Check out the test folder for more examples**](https://github.com/menduz/node-ebnf/tree/master/test) 26 | 27 | ## BNF Equation example 28 | 29 | In this example we use plain BNF to create a simple integer formula parser 30 | 31 | Grammar: 32 | 33 | ```ebnf 34 | ::= | 35 | ::= "(" ")" | "(" ")" | 36 | ::= 37 | 38 | ::= | | 39 | ::= "+" | "-" | "*" | "/" | "^" 40 | 41 | ::= | 42 | ::= "-" 43 | ::= "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" 44 | ::= "0" | 45 | ::= | "" 46 | ::= " " | | " " | 47 | ``` 48 | 49 | ```typescript 50 | import { Grammars } from 'ebnf'; 51 | let parser = new Grammars.BNF.Parser(grammar); 52 | 53 | parser.getAST('-122 + 2'); 54 | /* -122 + 2 Equation 55 | -122 + 2 BinaryOperation 56 | -122 Term 57 | -122 Number 58 | + Operator 59 | 2 Term 60 | 2 Number 61 | */ 62 | parser.getAST( '(2 + (2 * -123)) * 5332'); 63 | /* (2 + (2 * -123)) * 5332 Equation 64 | (2 + (2 * -123)) * 5332 BinaryOperation 65 | (2 + (2 * -123)) Term 66 | 2 + (2 * -123) Equation 67 | 2 + (2 * -123) BinaryOperation 68 | 2 Term 69 | 2 Number 70 | + Operator 71 | (2 * -123) Term 72 | 2 * -123 Equation 73 | 2 * -123 BinaryOperation 74 | 2 Term 75 | 2 Number 76 | * Operator 77 | -123 Term 78 | -123 Number 79 | * Operator 80 | 5332 Term 81 | 5332 Number 82 | */ 83 | ``` 84 | 85 | ## JSON example 86 | 87 | ```wbnf 88 | /* https://www.ietf.org/rfc/rfc4627.txt */ 89 | value ::= false | null | true | object | array | number | string 90 | BEGIN_ARRAY ::= WS* #x5B WS* /* [ left square bracket */ 91 | BEGIN_OBJECT ::= WS* #x7B WS* /* { left curly bracket */ 92 | END_ARRAY ::= WS* #x5D WS* /* ] right square bracket */ 93 | END_OBJECT ::= WS* #x7D WS* /* } right curly bracket */ 94 | NAME_SEPARATOR ::= WS* #x3A WS* /* : colon */ 95 | VALUE_SEPARATOR ::= WS* #x2C WS* /* , comma */ 96 | WS ::= [#x20#x09#x0A#x0D]+ /* Space | Tab | \n | \r */ 97 | false ::= "false" 98 | null ::= "null" 99 | true ::= "true" 100 | object ::= BEGIN_OBJECT (member (VALUE_SEPARATOR member)*)? END_OBJECT 101 | member ::= string NAME_SEPARATOR value 102 | array ::= BEGIN_ARRAY (value (VALUE_SEPARATOR value)*)? END_ARRAY 103 | 104 | number ::= "-"? ("0" | [1-9] [0-9]*) ("." [0-9]+)? (("e" | "E") ( "-" | "+" )? ("0" | [1-9] [0-9]*))? 105 | 106 | /* STRINGS */ 107 | 108 | string ::= '"' (([#x20-#x21] | [#x23-#x5B] | [#x5D-#xFFFF]) | #x5C (#x22 | #x5C | #x2F | #x62 | #x66 | #x6E | #x72 | #x74 | #x75 HEXDIG HEXDIG HEXDIG HEXDIG))* '"' 109 | HEXDIG ::= [a-fA-F0-9] 110 | ``` 111 | 112 | ```typescript 113 | import { Grammars } from 'ebnf'; 114 | let parser = new Grammars.W3C.Parser(grammar); 115 | 116 | parser.getAST( '{"a":false,"b":"asd\\n asd ","list":[1,2,3,true]}'); 117 | /* {"a":false,"b":"asd\n asd ","list":[1,2,3,true]} value 118 | {"a":false,"b":"asd\n asd ","list":[1,2,3,true]} object 119 | "a":false member 120 | "a" string 121 | false value 122 | false false 123 | "b":"asd\n asd " member 124 | "b" string 125 | "asd\n asd " value 126 | "asd\n asd " string 127 | "list":[1,2,3,true] member 128 | "list" string 129 | [1,2,3,true] value 130 | [1,2,3,true] array 131 | 1 value 132 | 1 number 133 | 2 value 134 | 2 number 135 | 3 value 136 | 3 number 137 | true value 138 | true true 139 | */ 140 | ``` 141 | 142 | ## AST 143 | 144 | Every ast node has the following interface 145 | 146 | ```typescript 147 | interface IToken { 148 | type: string; // Rule name 149 | text: string; // Inner text 150 | children: IToken[]; // Children nodes 151 | start: number; // Start position of the input string 152 | end: number; // End position 153 | errors: TokenError[]; // List of Errors 154 | } 155 | ``` 156 | 157 | ## Conventions 158 | 159 | We try to keep this tool as much unopinionated and free of conventions as possible. However, we have some conventions: 160 | - All `UPPER_AND_SNAKE_CASE` rules are not emmited on the AST. This option can be deactivated setting the flag `keepUpperRules: true`. 161 | -------------------------------------------------------------------------------- /src/Grammars/W3CEBNF.ts: -------------------------------------------------------------------------------- 1 | // https://www.w3.org/TR/REC-xml/#NT-Name 2 | // http://www.bottlecaps.de/rr/ui 3 | 4 | // Grammar ::= Production* 5 | // Production ::= NCName '::=' Choice 6 | // NCName ::= [http://www.w3.org/TR/xml-names/#NT-NCName] 7 | // Choice ::= SequenceOrDifference ( '|' SequenceOrDifference )* 8 | // SequenceOrDifference ::= (Item ( '-' Item | Item* ))? 9 | // Item ::= Primary ( '?' | '*' | '+' )? 10 | // Primary ::= NCName | StringLiteral | CharCode | CharClass | '(' Choice ')' 11 | // StringLiteral ::= '"' [^"]* '"' | "'" [^']* "'" 12 | // CharCode ::= '#x' [0-9a-fA-F]+ 13 | // CharClass ::= '[' '^'? ( RULE_Char | CharCode | CharRange | CharCodeRange )+ ']' 14 | // RULE_Char ::= [http://www.w3.org/TR/xml#NT-RULE_Char] 15 | // CharRange ::= RULE_Char '-' ( RULE_Char - ']' ) 16 | // CharCodeRange ::= CharCode '-' CharCode 17 | // RULE_WHITESPACE ::= RULE_S | Comment 18 | // RULE_S ::= #x9 | #xA | #xD | #x20 19 | // Comment ::= '/*' ( [^*] | '*'+ [^*/] )* '*'* '*/' 20 | 21 | import { IRule, Parser as _Parser, IToken, findRuleByName } from '../Parser'; 22 | import { IGrammarParserOptions } from './types'; 23 | 24 | namespace BNF { 25 | export const RULES: IRule[] = [ 26 | { 27 | name: 'Grammar', 28 | bnf: [['RULE_S*', '%Atomic*', 'EOF']] 29 | }, 30 | { 31 | name: '%Atomic', 32 | bnf: [['Production', 'RULE_S*']], 33 | fragment: true 34 | }, 35 | { 36 | name: 'Production', 37 | bnf: [['NCName', 'RULE_S*', '"::="', 'RULE_WHITESPACE*', 'Choice', 'RULE_WHITESPACE*', 'RULE_EOL+', 'RULE_S*']] 38 | }, 39 | { 40 | name: 'NCName', 41 | bnf: [[/[a-zA-Z][a-zA-Z_0-9]*/]] 42 | }, 43 | { 44 | name: 'Choice', 45 | bnf: [['SequenceOrDifference', '%_Choice_1*']], 46 | fragment: true 47 | }, 48 | { 49 | name: '%_Choice_1', 50 | bnf: [['RULE_WHITESPACE*', '"|"', 'RULE_WHITESPACE*', 'SequenceOrDifference']], 51 | fragment: true 52 | }, 53 | { 54 | name: 'SequenceOrDifference', 55 | bnf: [['Item', 'RULE_WHITESPACE*', '%_Item_1?']] 56 | }, 57 | { 58 | name: '%_Item_1', 59 | bnf: [['Minus', 'Item'], ['Item*']], 60 | fragment: true 61 | }, 62 | { 63 | name: 'Minus', 64 | bnf: [['"-"']] 65 | }, 66 | { 67 | name: 'Item', 68 | bnf: [['RULE_WHITESPACE*', '%Primary', 'PrimaryDecoration?']], 69 | fragment: true 70 | }, 71 | { 72 | name: 'PrimaryDecoration', 73 | bnf: [['"?"'], ['"*"'], ['"+"']] 74 | }, 75 | { 76 | name: 'DecorationName', 77 | bnf: [['"ebnf://"', /[^\x5D#]+/]] 78 | }, 79 | { 80 | name: '%Primary', 81 | bnf: [['NCName'], ['StringLiteral'], ['CharCode'], ['CharClass'], ['SubItem']], 82 | fragment: true 83 | }, 84 | { 85 | name: 'SubItem', 86 | bnf: [['"("', 'RULE_WHITESPACE*', 'Choice', 'RULE_WHITESPACE*', '")"']] 87 | }, 88 | { 89 | name: 'StringLiteral', 90 | bnf: [[`'"'`, /[^"]*/, `'"'`], [`"'"`, /[^']*/, `"'"`]], 91 | pinned: 1 92 | }, 93 | { 94 | name: 'CharCode', 95 | bnf: [['"#x"', /[0-9a-zA-Z]+/]] 96 | }, 97 | { 98 | name: 'CharClass', 99 | bnf: [["'['", "'^'?", '%RULE_CharClass_1+', '"]"']] 100 | }, 101 | { 102 | name: '%RULE_CharClass_1', 103 | bnf: [['CharCodeRange'], ['CharRange'], ['CharCode'], ['RULE_Char']], 104 | fragment: true 105 | }, 106 | { 107 | name: 'RULE_Char', 108 | bnf: [[/\x09/], [/\x0A/], [/\x0D/], [/[\x20-\x5c]/], [/[\x5e-\uD7FF]/], [/[\uE000-\uFFFD]/]] 109 | }, 110 | { 111 | name: 'CharRange', 112 | bnf: [['RULE_Char', '"-"', 'RULE_Char']] 113 | }, 114 | { 115 | name: 'CharCodeRange', 116 | bnf: [['CharCode', '"-"', 'CharCode']] 117 | }, 118 | { 119 | name: 'RULE_WHITESPACE', 120 | bnf: [['%RULE_WHITESPACE_CHAR*'], ['Comment', 'RULE_WHITESPACE*']] 121 | }, 122 | { 123 | name: 'RULE_S', 124 | bnf: [['RULE_WHITESPACE', 'RULE_S*'], ['RULE_EOL', 'RULE_S*']] 125 | }, 126 | { 127 | name: '%RULE_WHITESPACE_CHAR', 128 | bnf: [[/\x09/], [/\x20/]], 129 | fragment: true 130 | }, 131 | { 132 | name: 'Comment', 133 | bnf: [['"/*"', '%RULE_Comment_Body*', '"*/"']] 134 | }, 135 | { 136 | name: '%RULE_Comment_Body', 137 | bnf: [['!"*/"', /[^*]/]], 138 | fragment: true 139 | }, 140 | { 141 | name: 'RULE_EOL', 142 | bnf: [[/\x0D/, /\x0A/], [/\x0A/], [/\x0D/]] 143 | }, 144 | { 145 | name: 'Link', 146 | bnf: [["'['", 'Url', "']'"]] 147 | }, 148 | { 149 | name: 'Url', 150 | bnf: [[/[^\x5D:/?#]/, '"://"', /[^\x5D#]+/, '%Url1?']] 151 | }, 152 | { 153 | name: '%Url1', 154 | bnf: [['"#"', 'NCName']], 155 | fragment: true 156 | } 157 | ]; 158 | 159 | export const defaultParser = new _Parser(RULES, { debug: false }); 160 | 161 | const preDecorationRE = /^(!|&)/; 162 | const decorationRE = /(\?|\+|\*)$/; 163 | const subExpressionRE = /^%/; 164 | 165 | function getBNFRule(name: string | RegExp, parser: Parser): string { 166 | if (typeof name == 'string') { 167 | if (preDecorationRE.test(name)) return ''; 168 | 169 | let subexpression = subExpressionRE.test(name); 170 | 171 | if (subexpression) { 172 | let decoration = decorationRE.exec(name); 173 | let decorationText = decoration ? decoration[0] + ' ' : ''; 174 | let lonely = isLonelyRule(name, parser); 175 | 176 | if (lonely) return getBNFBody(name, parser) + decorationText; 177 | 178 | return '(' + getBNFBody(name, parser) + ')' + decorationText; 179 | } 180 | 181 | return name; 182 | } else { 183 | return name.source 184 | .replace(/\\(?:x|u)([a-zA-Z0-9]+)/g, '#x$1') 185 | .replace(/\[\\(?:x|u)([a-zA-Z0-9]+)-\\(?:x|u)([a-zA-Z0-9]+)\]/g, '[#x$1-#x$2]'); 186 | } 187 | } 188 | 189 | /// Returns true if the rule is a string literal or regular expression without a descendant tree 190 | function isLonelyRule(name: string, parser: Parser) { 191 | let rule = findRuleByName(name, parser); 192 | return ( 193 | rule && 194 | rule.bnf.length == 1 && 195 | rule.bnf[0].length == 1 && 196 | (rule.bnf[0][0] instanceof RegExp || rule.bnf[0][0][0] == '"' || rule.bnf[0][0][0] == "'") 197 | ); 198 | } 199 | 200 | function getBNFChoice(rules, parser: Parser) { 201 | return rules.map(x => getBNFRule(x, parser)).join(' '); 202 | } 203 | 204 | function getBNFBody(name: string, parser: Parser): string { 205 | let rule = findRuleByName(name, parser); 206 | 207 | if (rule) return rule.bnf.map(x => getBNFChoice(x, parser)).join(' | '); 208 | 209 | return 'RULE_NOT_FOUND {' + name + '}'; 210 | } 211 | export function emit(parser: Parser): string { 212 | let acumulator: string[] = []; 213 | 214 | parser.grammarRules.forEach(l => { 215 | if (!/^%/.test(l.name)) { 216 | let recover = l.recover ? ' /* { recoverUntil=' + l.recover + ' } */' : ''; 217 | 218 | acumulator.push(l.name + ' ::= ' + getBNFBody(l.name, parser) + recover); 219 | } 220 | }); 221 | 222 | return acumulator.join('\n'); 223 | } 224 | 225 | let subitems = 0; 226 | 227 | function restar(total, resta) { 228 | console.log('reberia restar ' + resta + ' a ' + total); 229 | throw new Error('Difference not supported yet'); 230 | } 231 | 232 | function convertRegex(txt: string): RegExp { 233 | return new RegExp( 234 | txt 235 | .replace(/#x([a-zA-Z0-9]{4})/g, '\\u$1') 236 | .replace(/#x([a-zA-Z0-9]{3})/g, '\\u0$1') 237 | .replace(/#x([a-zA-Z0-9]{2})/g, '\\x$1') 238 | .replace(/#x([a-zA-Z0-9]{1})/g, '\\x0$1') 239 | ); 240 | } 241 | 242 | function getSubItems(tmpRules, seq: IToken, parentName: string) { 243 | let anterior = null; 244 | let bnfSeq = []; 245 | 246 | seq.children.forEach((x, i) => { 247 | if (x.type == 'Minus') { 248 | restar(anterior, x); 249 | } else { 250 | } 251 | 252 | let decoration: any = seq.children[i + 1]; 253 | decoration = (decoration && decoration.type == 'PrimaryDecoration' && decoration.text) || ''; 254 | 255 | let preDecoration = ''; 256 | 257 | switch (x.type) { 258 | case 'SubItem': 259 | let name = '%' + (parentName + subitems++); 260 | 261 | createRule(tmpRules, x, name); 262 | 263 | bnfSeq.push(preDecoration + name + decoration); 264 | break; 265 | case 'NCName': 266 | case 'StringLiteral': 267 | bnfSeq.push(preDecoration + x.text + decoration); 268 | break; 269 | case 'CharCode': 270 | case 'CharClass': 271 | if (decoration || preDecoration) { 272 | let newRule = { 273 | name: '%' + (parentName + subitems++), 274 | bnf: [[convertRegex(x.text)]] 275 | }; 276 | 277 | tmpRules.push(newRule); 278 | 279 | bnfSeq.push(preDecoration + newRule.name + decoration); 280 | } else { 281 | bnfSeq.push(convertRegex(x.text)); 282 | } 283 | break; 284 | case 'PrimaryDecoration': 285 | break; 286 | default: 287 | throw new Error(' HOW SHOULD I PARSE THIS? ' + x.type + ' -> ' + JSON.stringify(x.text)); 288 | } 289 | 290 | anterior = x; 291 | }); 292 | 293 | return bnfSeq; 294 | } 295 | 296 | function createRule(tmpRules: IRule[], token: IToken, name: string) { 297 | let bnf = token.children.filter(x => x.type == 'SequenceOrDifference').map(s => getSubItems(tmpRules, s, name)); 298 | 299 | let rule: IRule = { 300 | name, 301 | bnf 302 | }; 303 | 304 | let recover: string = null; 305 | 306 | bnf.forEach(x => { 307 | recover = recover || x['recover']; 308 | delete x['recover']; 309 | }); 310 | 311 | if (name.indexOf('%') == 0) rule.fragment = true; 312 | 313 | if (recover) rule.recover = recover; 314 | 315 | tmpRules.push(rule); 316 | } 317 | 318 | export function getRules(source: string, parser: _Parser = defaultParser): IRule[] { 319 | let ast = parser.getAST(source); 320 | 321 | if (!ast) throw new Error('Could not parse ' + source); 322 | 323 | if (ast.errors && ast.errors.length) { 324 | throw ast.errors[0]; 325 | } 326 | 327 | let tmpRules: IRule[] = []; 328 | 329 | ast.children.filter(x => x.type == 'Production').map((x: any) => { 330 | let name = x.children.filter(x => x.type == 'NCName')[0].text; 331 | createRule(tmpRules, x, name); 332 | }); 333 | 334 | return tmpRules; 335 | } 336 | 337 | export function Transform(source: TemplateStringsArray, subParser: _Parser = defaultParser): IRule[] { 338 | return getRules(source.join(''), subParser); 339 | } 340 | 341 | export class Parser extends _Parser { 342 | constructor(source: string, options?: Partial) { 343 | const subParser = options && options.debugRulesParser === true ? new _Parser(BNF.RULES, { debug: true }) : defaultParser; 344 | super(getRules(source, subParser), options); 345 | } 346 | 347 | emitSource(): string { 348 | return emit(this); 349 | } 350 | } 351 | } 352 | 353 | export default BNF; 354 | -------------------------------------------------------------------------------- /test/NewLang.spec.ts: -------------------------------------------------------------------------------- 1 | declare var describe, it, require; 2 | 3 | import { Grammars, Parser, IToken } from '../dist'; 4 | import { testParseToken, describeTree, printBNF, testParseTokenFailsafe } from './TestHelpers'; 5 | 6 | let inspect = require('util').inspect; 7 | let expect = require('expect'); 8 | 9 | let grammar = ` 10 | 11 | 12 | {ws=explicit} 13 | 14 | Document ::= Directives EOF? {ws=implicit} 15 | Directives ::= Directive Directives? {pin=1,ws=implicit,recoverUntil=DIRECTIVE_RECOVERY,fragment=true} 16 | Directive ::= FunctionDirective | ValDirective | VarDirective | StructDirective {fragment=true} 17 | 18 | FunctionDirective ::= EXPORT_KEYWORD? FUN_KEYWORD NameIdentifier FunctionparamList OfType? WS* AssignExpression {pin=2} 19 | ValDirective ::= EXPORT_KEYWORD? VAL_KEYWORD NameIdentifier OfType? WS* AssignExpression {pin=2} 20 | VarDirective ::= EXPORT_KEYWORD? VAR_KEYWORD NameIdentifier OfType? WS* AssignExpression {pin=2} 21 | StructDirective ::= EXPORT_KEYWORD? STRUCT_KEYWORD NameIdentifier {pin=2} 22 | 23 | AssignExpression ::= '=' WS* Expression {pin=2,fragment=true} 24 | OfType ::= COLON WS* Type WS* {pin=2,recoverUntil=NEXT_ARG_RECOVERY} 25 | 26 | FunctionparamList ::= OPEN_PAREN WS* ParameterList? WS* CLOSE_PAREN {pin=1,recoverUntil=CLOSE_PAREN} 27 | ParameterList ::= Parameter NthParameter* {fragment=true} 28 | NthParameter ::= ',' WS* Parameter WS* {pin=1,recoverUntil=NEXT_ARG_RECOVERY} 29 | Parameter ::= NameIdentifier WS* OfType {pin=1,recoverUntil=NEXT_ARG_RECOVERY} 30 | 31 | Type ::= WS* NameIdentifier IsPointer* IsArray? 32 | IsPointer ::= '*' 33 | IsArray ::= '[]' 34 | 35 | Expression ::= OrExpression WS* (MatchExpression | BinaryExpression)* {simplifyWhenOneChildren=true} 36 | 37 | MatchExpression ::= MATCH_KEYWORD WS* MatchBody WS* {pin=1} 38 | BinaryExpression ::= NameIdentifier WS* OrExpression WS* {pin=1} 39 | 40 | OrExpression ::= AndExpression (WS+ 'or' WS+ AndExpression)? {simplifyWhenOneChildren=true} 41 | AndExpression ::= EqExpression (WS+ 'and' WS+ EqExpression)? {simplifyWhenOneChildren=true} 42 | EqExpression ::= RelExpression (WS* ('==' | '!=') WS* RelExpression)? {simplifyWhenOneChildren=true} 43 | RelExpression ::= ShiftExpression (WS* ('>=' | '<=' | '>' | '<') WS* ShiftExpression)? {simplifyWhenOneChildren=true} 44 | ShiftExpression ::= AddExpression (WS* ('>>' | '<<' | '>>>') WS* AddExpression)? {simplifyWhenOneChildren=true} 45 | AddExpression ::= MulExpression (WS* ('+' | '-') WS* MulExpression)? {simplifyWhenOneChildren=true} 46 | MulExpression ::= UnaryExpression (WS* ('*' | '/' | '%') WS* UnaryExpression)? {simplifyWhenOneChildren=true} 47 | UnaryExpression ::= NegExpression | UnaryMinus | IfExpression | FunctionCallExpression {simplifyWhenOneChildren=true} 48 | 49 | NegExpression ::= '!' OrExpression {pin=1} 50 | UnaryMinus ::= !NumberLiteral '-' OrExpression {pin=2} 51 | 52 | RefPointerOperator::= '*' | '&' 53 | RefExpression ::= RefPointerOperator VariableReference 54 | 55 | FunctionCallExpression 56 | ::= Value WS* (&'(' CallArguments)? {simplifyWhenOneChildren=true} 57 | 58 | Value ::= Literal | RefExpression | VariableReference | ParenExpression {fragment=true} 59 | ParenExpression ::= '(' WS* Expression WS* ')' {pin=3,recoverUntil=CLOSE_PAREN} 60 | 61 | IfExpression ::= 'if' 62 | 63 | /* Pattern matching */ 64 | MatchBody ::= '{' WS* MatchElements* '}' {pin=1,recoverUntil=MATCH_RECOVERY} 65 | 66 | MatchElements ::= (CaseCondition | CaseLiteral | CaseElse) WS* {fragment=true} 67 | 68 | CaseCondition ::= CASE_KEYWORD WS+ NameIdentifier WS+ IF_KEYWORD WS* Expression '->' WS* Expression {pin=5} 69 | CaseLiteral ::= CASE_KEYWORD WS+ Literal WS* '->' WS* Expression {pin=3} 70 | CaseElse ::= ELSE_KEYWORD WS* '->' WS* Expression {pin=3} 71 | 72 | /* Function call */ 73 | CallArguments ::= OPEN_PAREN Arguments? CLOSE_PAREN {pin=1,recoverUntil=PAREN_RECOVERY} 74 | Arguments ::= WS* Expression WS* NthArgument* {fragment=true} 75 | NthArgument ::= ',' WS* Expression WS* {pin=1,fragment=true,recoverUntil=NEXT_ARG_RECOVERY} 76 | 77 | 78 | VariableReference ::= NameIdentifier 79 | 80 | BooleanLiteral ::= TRUE_KEYWORD | FALSE_KEYWORD 81 | NullLiteral ::= NULL_KEYWORD 82 | NumberLiteral ::= "-"? ("0" | [1-9] [0-9]*) ("." [0-9]+)? (("e" | "E") ( "-" | "+" )? ("0" | [1-9] [0-9]*))? {pin=2} 83 | StringLiteral ::= '"' (!'"' [#x20-#xFFFF])* '"' 84 | Literal ::= ( StringLiteral 85 | | NumberLiteral 86 | | BooleanLiteral 87 | | NullLiteral 88 | ) {fragment=true} 89 | 90 | NameIdentifier ::= !KEYWORD [A-Za-z_]([A-Za-z0-9_])* 91 | 92 | /* Keywords */ 93 | 94 | KEYWORD ::= TRUE_KEYWORD | FALSE_KEYWORD | NULL_KEYWORD | IF_KEYWORD | ELSE_KEYWORD | CASE_KEYWORD | VAR_KEYWORD | VAL_KEYWORD | FUN_KEYWORD | STRUCT_KEYWORD | EXPORT_KEYWORD | MATCH_KEYWORD | RESERVED_WORDS 95 | 96 | FUN_KEYWORD ::= 'fun' WS+ 97 | VAL_KEYWORD ::= 'val' WS+ 98 | VAR_KEYWORD ::= 'var' WS+ 99 | STRUCT_KEYWORD ::= 'struct' WS+ 100 | EXPORT_KEYWORD ::= 'export' WS+ 101 | 102 | RESERVED_WORDS ::= ( 'async' | 'await' | 'defer' 103 | | 'package' | 'declare' 104 | | 'using' 105 | | 'delete' 106 | | 'break' | 'continue' 107 | | 'let' | 'const' | 'void' 108 | | 'class' | 'private' | 'public' | 'protected' | 'extends' 109 | | 'import' | 'from' | 'abstract' 110 | | 'finally' | 'new' | 'native' | 'enum' | 'type' 111 | | 'yield' | 'for' | 'do' | 'while' | 'try' 112 | ) WS+ 113 | 114 | TRUE_KEYWORD ::= 'true' ![A-Za-z0-9_] 115 | FALSE_KEYWORD ::= 'false' ![A-Za-z0-9_] 116 | NULL_KEYWORD ::= 'null' ![A-Za-z0-9_] 117 | IF_KEYWORD ::= 'if' ![A-Za-z0-9_] 118 | ELSE_KEYWORD ::= 'else' ![A-Za-z0-9_] 119 | CASE_KEYWORD ::= 'case' ![A-Za-z0-9_] 120 | MATCH_KEYWORD ::= 'match' ![A-Za-z0-9_] 121 | 122 | 123 | 124 | /* Tokens */ 125 | 126 | DIRECTIVE_RECOVERY::= &(FUN_KEYWORD | VAL_KEYWORD | VAR_KEYWORD | STRUCT_KEYWORD | EXPORT_KEYWORD | RESERVED_WORDS) 127 | NEXT_ARG_RECOVERY ::= &(',' | ')') 128 | PAREN_RECOVERY ::= &(')') 129 | MATCH_RECOVERY ::= &('}' | 'case' | 'else') 130 | OPEN_PAREN ::= '(' 131 | CLOSE_PAREN ::= ')' 132 | COLON ::= ':' 133 | OPEN_DOC_COMMENT ::= '/*' 134 | CLOSE_DOC_COMMENT ::= '*/' 135 | DOC_COMMENT ::= !CLOSE_DOC_COMMENT [#x00-#xFFFF] 136 | 137 | Comment ::= '//' (![#x0A#x0D] [#x00-#xFFFF])* EOL 138 | MultiLineComment ::= OPEN_DOC_COMMENT DOC_COMMENT* CLOSE_DOC_COMMENT {pin=1} 139 | WS ::= Comment | MultiLineComment | [#x20#x09#x0A#x0D]+ {fragment=true} 140 | EOL ::= [#x0A#x0D]+|EOF 141 | 142 | `; 143 | 144 | describe('New lang', () => { 145 | describe('Parse JSON', () => { 146 | let parser: Parser; 147 | 148 | it('create parser', () => { 149 | parser = new Parser(Grammars.Custom.RULES, {}); 150 | testParseToken(parser, grammar); 151 | }); 152 | }); 153 | 154 | describe('Grammars.Custom parses JSON grammar', function() { 155 | let RULES = Grammars.Custom.getRules(grammar); 156 | console.log('JSON:\n' + inspect(RULES, false, 20, true)); 157 | let parser = new Parser(RULES, {}); 158 | 159 | printBNF(parser); 160 | 161 | function test(literals, ...placeholders) { 162 | let result = ''; 163 | 164 | // interleave the literals with the placeholders 165 | for (let i = 0; i < placeholders.length; i++) { 166 | result += literals[i]; 167 | result += placeholders[i]; 168 | } 169 | 170 | // add the last literal 171 | result += literals[literals.length - 1]; 172 | testParseToken(parser, result); 173 | } 174 | 175 | test`fun test() = 1`; 176 | 177 | test`fun test( a: MBER, b : NumBer) = 1`; 178 | 179 | test`export fun test() = 2`; 180 | 181 | test`var test: Double = 1`; 182 | test`var test = 1`; 183 | test`export var test = 1`; 184 | 185 | test`val test: Number = 1`; 186 | test`val test = 1`; 187 | 188 | test`val test = 1 * 1 - 2 / 4 and 1 == 3 or 4 <= 4`; 189 | 190 | test`val test = 1`; 191 | 192 | test`val test = 1 mul 4`; 193 | 194 | test`val floatingNumber: Number = 1.0`; 195 | test`val floatingNumber: Number = 0.0`; 196 | 197 | test`export val test = 1`; 198 | test`val test = true`; 199 | test`val test = false`; 200 | test`val test = null`; 201 | 202 | test`fun test(): Number = 1`; 203 | 204 | test`fun test(): Number = /*asd*/ 1`; 205 | test`fun test(): Number = /**/ 1`; 206 | 207 | test`export fun test(a: Number) = 2`; 208 | test`export fun test(a: Number, b: Type) = 2`; 209 | 210 | test`val test = 1 + (4 + 1)`; 211 | test`val test = (1 + 4) + 1`; 212 | 213 | test` 214 | export var test = 1 215 | var test2 = 1 216 | val test2 = 1 217 | `; 218 | 219 | test` 220 | var test = 1 221 | fun getTest() = test 222 | `; 223 | 224 | test`var test = 1 fun pointerOfTest() = &test `; 225 | 226 | test`var test: Entity* = 1 fun valueOfTest() = *test`; 227 | 228 | test`var test: Struct* = 1`; 229 | test`var test: Struct**** = 1`; 230 | 231 | test`var test: Struct[] = 1`; 232 | test`var test: Struct*[] = 1`; 233 | test`var test: Int64**[] = 1`; 234 | 235 | // test` 236 | // export struct Entity { 237 | // a: Number, 238 | // b: Entity*, 239 | // c: Number*[] 240 | // } 241 | // export var entities: Entity* = 1 242 | // export fun getTest() = test 243 | // `; 244 | 245 | test`val test = 1 match {}`; 246 | test`val test = 1 match { else -> 1 }`; 247 | test` 248 | val test = 1 match { 249 | case 2 -> true 250 | else -> false 251 | } 252 | `; 253 | 254 | test`val test = 1 match { case 2 -> true else -> false }`; 255 | 256 | test` 257 | val test = 1 match { 258 | case 2->true 259 | else->false 260 | } 261 | `; 262 | 263 | test` 264 | val test = 1 match { 265 | case 2 -> true 266 | else -> false 267 | } 268 | `; 269 | 270 | test` 271 | val test = 1 match { 272 | case x if true -> true 273 | case x if x < 1 and x < 10 -> true 274 | case 2 -> true 275 | else -> false 276 | } 277 | `; 278 | 279 | test`val test = 1 match { case x if x < 1 and x < 10 -> true }`; 280 | test`var a = x match { else -> 1 } map 1 * 2`; 281 | 282 | test`var a = !x()`; 283 | test`var a = x()`; 284 | 285 | testParseTokenFailsafe(parser, `export fun test(a: ) = 2`, null, doc => { 286 | expect(doc.errors[0].token.type).toEqual('SyntaxError'); 287 | expect(doc.errors[0].token.text).toEqual(') = 2'); 288 | }); 289 | testParseTokenFailsafe(parser, `export struct Entity asd val x = 1`, null, doc => { 290 | expect(doc.errors[0].token.type).toEqual('SyntaxError'); 291 | expect(doc.errors[0].token.text).toEqual('asd '); 292 | }); 293 | testParseTokenFailsafe(parser, `export struct Entity asd`, null, doc => { 294 | expect(doc.errors[0].message).toEqual('Unexpected end of input: \nasd'); 295 | }); 296 | testParseTokenFailsafe(parser, `struct Entity asd val x = 1`, null, doc => { 297 | expect(doc.errors[0].token.type).toEqual('SyntaxError'); 298 | expect(doc.errors[0].token.text).toEqual('asd '); 299 | }); 300 | testParseTokenFailsafe(parser, `struct Entity asd`, null, doc => { 301 | expect(doc.errors[0].message).toEqual('Unexpected end of input: \nasd'); 302 | }); 303 | 304 | testParseTokenFailsafe(parser, `export fun test(a: ,b: AType) = 2`, null, doc => { 305 | expect(doc.errors[0].token.type).toEqual('SyntaxError'); 306 | expect(doc.errors[0].token.text).toEqual(',b: AType) = 2'); 307 | }); 308 | 309 | testParseTokenFailsafe(parser, `export fun test() = 2 /*`, null, doc => { 310 | expect(doc.errors[0].token.type).toEqual('SyntaxError'); 311 | expect(doc.errors[0].token.text).toEqual(''); 312 | }); 313 | 314 | testParseTokenFailsafe(parser, `export fun test(a: 1) = 2`, null, doc => { 315 | expect(doc.errors[0].token.type).toEqual('SyntaxError'); 316 | expect(doc.errors[0].message).toEqual('Unexpected input: "1" Expecting: OfType'); 317 | expect(doc.errors[0].token.text).toEqual('1'); 318 | }); 319 | 320 | testParseTokenFailsafe(parser, 'export fun () = 1', null, doc => { 321 | expect(doc.errors[0].token.type).toEqual('SyntaxError'); 322 | expect(doc.errors[0].token.text).toEqual('() = 1'); 323 | }); 324 | 325 | testParseTokenFailsafe(parser, 'var a = .0', null, doc => { 326 | expect(doc.errors[0].token.type).toEqual('SyntaxError'); 327 | expect(doc.errors[0].token.text).toEqual('.0'); 328 | }); 329 | 330 | testParseTokenFailsafe(parser, 'var a = x match { else } map 1', null, doc => { 331 | expect(doc.errors[0].token.type).toEqual('SyntaxError'); 332 | expect(doc.errors[0].token.text).toEqual('else } map 1'); 333 | }); 334 | 335 | testParseTokenFailsafe(parser, 'var a = x match { else -> } map 1', null, doc => { 336 | expect(doc.errors[0].token.type).toEqual('SyntaxError'); 337 | expect(doc.errors[0].token.text).toEqual('} map 1'); 338 | }); 339 | 340 | testParseTokenFailsafe(parser, 'var a = match', null, doc => { 341 | expect(doc.errors[0].token.type).toEqual('SyntaxError'); 342 | expect(doc.errors[0].token.text).toEqual('match'); 343 | }); 344 | 345 | test`val test = 1 map 1 map 2 map 3`; 346 | test`val test = x(1)`; 347 | test`val test = x(1,2)`; 348 | test`val test = (x)(1,2)`; 349 | test`val test = (x())(1,2)`; 350 | test`val test = x( 1 , 2 /* sdgf */)`; 351 | }); 352 | }); 353 | -------------------------------------------------------------------------------- /src/Grammars/Custom.ts: -------------------------------------------------------------------------------- 1 | // https://www.w3.org/TR/REC-xml/#NT-Name 2 | // http://www.bottlecaps.de/rr/ui 3 | 4 | // Grammar ::= Production* 5 | // Production ::= NCName '::=' Choice 6 | // NCName ::= [http://www.w3.org/TR/xml-names/#NT-NCName] 7 | // Choice ::= SequenceOrDifference ( '|' SequenceOrDifference )* 8 | // SequenceOrDifference ::= (Item ( '-' Item | Item* ))? 9 | // Item ::= Primary ( '?' | '*' | '+' )? 10 | // Primary ::= NCName | StringLiteral | CharCode | CharClass | '(' Choice ')' 11 | // StringLiteral ::= '"' [^"]* '"' | "'" [^']* "'" 12 | // CharCode ::= '#x' [0-9a-fA-F]+ 13 | // CharClass ::= '[' '^'? ( RULE_Char | CharCode | CharRange | CharCodeRange )+ ']' 14 | // RULE_Char ::= [http://www.w3.org/TR/xml#NT-RULE_Char] 15 | // CharRange ::= RULE_Char '-' ( RULE_Char - ']' ) 16 | // CharCodeRange ::= CharCode '-' CharCode 17 | // RULE_WHITESPACE ::= RULE_S | Comment 18 | // RULE_S ::= #x9 | #xA | #xD | #x20 19 | // Comment ::= '/*' ( [^*] | '*'+ [^*/] )* '*'* '*/' 20 | 21 | import { TokenError } from '../TokenError'; 22 | import { IRule, Parser as _Parser, IToken, escapeRegExp, findRuleByName } from '../Parser'; 23 | import { IGrammarParserOptions } from './types'; 24 | 25 | namespace BNF { 26 | export const RULES: IRule[] = [ 27 | { 28 | name: 'Grammar', 29 | bnf: [['RULE_S*', 'Attributes?', 'RULE_S*', '%Atomic*', 'EOF']] 30 | }, 31 | { 32 | name: '%Atomic', 33 | bnf: [['Production', 'RULE_S*']], 34 | fragment: true 35 | }, 36 | { 37 | name: 'Production', 38 | bnf: [ 39 | [ 40 | 'NCName', 41 | 'RULE_S*', 42 | '"::="', 43 | 'RULE_WHITESPACE*', 44 | '%Choice', 45 | 'RULE_WHITESPACE*', 46 | 'Attributes?', 47 | 'RULE_EOL+', 48 | 'RULE_S*' 49 | ] 50 | ] 51 | }, 52 | { 53 | name: 'NCName', 54 | bnf: [[/[a-zA-Z][a-zA-Z_0-9]*/]] 55 | }, 56 | { 57 | name: 'Attributes', 58 | bnf: [['"{"', 'Attribute', '%Attributes*', 'RULE_S*', '"}"']] 59 | }, 60 | { 61 | name: '%Attributes', 62 | bnf: [['RULE_S*', '","', 'Attribute']], 63 | fragment: true 64 | }, 65 | { 66 | name: 'Attribute', 67 | bnf: [['RULE_S*', 'NCName', 'RULE_WHITESPACE*', '"="', 'RULE_WHITESPACE*', 'AttributeValue']] 68 | }, 69 | { 70 | name: 'AttributeValue', 71 | bnf: [['NCName'], [/[1-9][0-9]*/]] 72 | }, 73 | { 74 | name: '%Choice', 75 | bnf: [['SequenceOrDifference', '%_Choice_1*']], 76 | fragment: true 77 | }, 78 | { 79 | name: '%_Choice_1', 80 | bnf: [['RULE_S*', '"|"', 'RULE_S*', 'SequenceOrDifference']], 81 | fragment: true 82 | }, 83 | { 84 | name: 'SequenceOrDifference', 85 | bnf: [['%Item', 'RULE_WHITESPACE*', '%_Item_1?']] 86 | }, 87 | { 88 | name: '%_Item_1', 89 | bnf: [['Minus', '%Item'], ['%Item*']], 90 | fragment: true 91 | }, 92 | { 93 | name: 'Minus', 94 | bnf: [['"-"']] 95 | }, 96 | { 97 | name: '%Item', 98 | bnf: [['RULE_WHITESPACE*', 'PrimaryPreDecoration?', '%Primary', 'PrimaryDecoration?']], 99 | fragment: true 100 | }, 101 | { 102 | name: 'PrimaryDecoration', 103 | bnf: [['"?"'], ['"*"'], ['"+"']] 104 | }, 105 | { 106 | name: 'PrimaryPreDecoration', 107 | bnf: [['"&"'], ['"!"'], ['"~"']] 108 | }, 109 | { 110 | name: '%Primary', 111 | bnf: [['NCName'], ['StringLiteral'], ['CharCode'], ['CharClass'], ['SubItem']], 112 | fragment: true 113 | }, 114 | { 115 | name: 'SubItem', 116 | bnf: [['"("', 'RULE_S*', '%Choice', 'RULE_S*', '")"']] 117 | }, 118 | { 119 | name: 'StringLiteral', 120 | bnf: [[`'"'`, /[^"]*/, `'"'`], [`"'"`, /[^']*/, `"'"`]] 121 | }, 122 | { 123 | name: 'CharCode', 124 | bnf: [['"#x"', /[0-9a-zA-Z]+/]] 125 | }, 126 | { 127 | name: 'CharClass', 128 | bnf: [["'['", "'^'?", '%RULE_CharClass_1+', '"]"']] 129 | }, 130 | { 131 | name: '%RULE_CharClass_1', 132 | bnf: [['CharCodeRange'], ['CharRange'], ['CharCode'], ['RULE_Char']], 133 | fragment: true 134 | }, 135 | { 136 | name: 'RULE_Char', 137 | bnf: [[/\x09/], [/\x0A/], [/\x0D/], [/[\x20-\x5c]/], [/[\x5e-\uD7FF]/], [/[\uE000-\uFFFD]/]] 138 | }, 139 | { 140 | name: 'CharRange', 141 | bnf: [['RULE_Char', '"-"', 'RULE_Char']] 142 | }, 143 | { 144 | name: 'CharCodeRange', 145 | bnf: [['CharCode', '"-"', 'CharCode']] 146 | }, 147 | { 148 | name: 'RULE_WHITESPACE', 149 | bnf: [['%RULE_WHITESPACE_CHAR*'], ['Comment', 'RULE_WHITESPACE*']] 150 | }, 151 | { 152 | name: 'RULE_S', 153 | bnf: [['RULE_WHITESPACE', 'RULE_S*'], ['RULE_EOL', 'RULE_S*']] 154 | }, 155 | { 156 | name: '%RULE_WHITESPACE_CHAR', 157 | bnf: [[/\x09/], [/\x20/]], 158 | fragment: true 159 | }, 160 | { 161 | name: 'Comment', 162 | bnf: [['"/*"', '%RULE_Comment_Body*', '"*/"']] 163 | }, 164 | { 165 | name: '%RULE_Comment_Body', 166 | bnf: [[/[^*]/], ['"*"+', /[^/]*/]], 167 | fragment: true 168 | }, 169 | { 170 | name: 'RULE_EOL', 171 | bnf: [[/\x0D/, /\x0A/], [/\x0A/], [/\x0D/]] 172 | }, 173 | { 174 | name: 'Link', 175 | bnf: [["'['", 'Url', "']'"]] 176 | }, 177 | { 178 | name: 'Url', 179 | bnf: [[/[^\x5D:/?#]/, '"://"', /[^\x5D#]+/, '%Url1?']] 180 | }, 181 | { 182 | name: '%Url1', 183 | bnf: [['"#"', 'NCName']], 184 | fragment: true 185 | } 186 | ]; 187 | 188 | export const defaultParser = new _Parser(RULES, { debug: false }); 189 | 190 | const preDecorationRE = /^(!|&)/; 191 | const decorationRE = /(\?|\+|\*)$/; 192 | const subExpressionRE = /^%/; 193 | 194 | function getBNFRule(name: string | RegExp, parser: Parser): string { 195 | if (typeof name == 'string') { 196 | let decoration = decorationRE.exec(name); 197 | let preDecoration = preDecorationRE.exec(name); 198 | 199 | let preDecorationText = preDecoration ? preDecoration[0] : ''; 200 | let decorationText = decoration ? decoration[0] + ' ' : ''; 201 | 202 | let subexpression = subExpressionRE.test(name); 203 | 204 | if (subexpression) { 205 | let lonely = isLonelyRule(name, parser); 206 | 207 | if (lonely) return preDecorationText + getBNFBody(name, parser) + decorationText; 208 | 209 | return preDecorationText + '(' + getBNFBody(name, parser) + ')' + decorationText; 210 | } 211 | 212 | return name.replace(preDecorationRE, preDecorationText); 213 | } else { 214 | return name.source 215 | .replace(/\\(?:x|u)([a-zA-Z0-9]+)/g, '#x$1') 216 | .replace(/\[\\(?:x|u)([a-zA-Z0-9]+)-\\(?:x|u)([a-zA-Z0-9]+)\]/g, '[#x$1-#x$2]'); 217 | } 218 | } 219 | 220 | /// Returns true if the rule is a string literal or regular expression without a descendant tree 221 | function isLonelyRule(name: string, parser: Parser) { 222 | let rule = findRuleByName(name, parser); 223 | return ( 224 | rule && 225 | rule.bnf.length == 1 && 226 | rule.bnf[0].length == 1 && 227 | (rule.bnf[0][0] instanceof RegExp || rule.bnf[0][0][0] == '"' || rule.bnf[0][0][0] == "'") 228 | ); 229 | } 230 | 231 | function getBNFChoice(rules, parser: Parser) { 232 | return rules.map(x => getBNFRule(x, parser)).join(' '); 233 | } 234 | 235 | function getBNFBody(name: string, parser: Parser): string { 236 | let rule = findRuleByName(name, parser); 237 | 238 | if (rule) return rule.bnf.map(x => getBNFChoice(x, parser)).join(' | '); 239 | 240 | return 'RULE_NOT_FOUND {' + name + '}'; 241 | } 242 | 243 | export function emit(parser: Parser): string { 244 | let acumulator: string[] = []; 245 | 246 | parser.grammarRules.forEach(l => { 247 | if (!/^%/.test(l.name)) { 248 | let recover = l.recover ? ' { recoverUntil=' + l.recover + ' }' : ''; 249 | 250 | acumulator.push(l.name + ' ::= ' + getBNFBody(l.name, parser) + recover); 251 | } 252 | }); 253 | 254 | return acumulator.join('\n'); 255 | } 256 | 257 | let subitems = 0; 258 | 259 | function restar(total, resta) { 260 | console.log('reberia restar ' + resta + ' a ' + total); 261 | throw new Error('Difference not supported yet'); 262 | } 263 | 264 | function convertRegex(txt: string): RegExp { 265 | return new RegExp( 266 | txt 267 | .replace(/#x([a-zA-Z0-9]{4})/g, '\\u$1') 268 | .replace(/#x([a-zA-Z0-9]{3})/g, '\\u0$1') 269 | .replace(/#x([a-zA-Z0-9]{2})/g, '\\x$1') 270 | .replace(/#x([a-zA-Z0-9]{1})/g, '\\x0$1') 271 | ); 272 | } 273 | 274 | function getSubItems(tmpRules: IRule[], seq: IToken, parentName: string, parentAttributes: any) { 275 | let anterior = null; 276 | let bnfSeq = []; 277 | 278 | seq.children.forEach((x, i) => { 279 | if (x.type == 'Minus') { 280 | restar(anterior, x); 281 | } else { 282 | } 283 | 284 | let decoration: any = seq.children[i + 1]; 285 | decoration = (decoration && decoration.type == 'PrimaryDecoration' && decoration.text) || ''; 286 | 287 | let preDecoration = ''; 288 | 289 | if (anterior && anterior.type == 'PrimaryPreDecoration') { 290 | preDecoration = anterior.text; 291 | } 292 | 293 | let pinned = preDecoration == '~' ? 1 : undefined; 294 | 295 | if (pinned) { 296 | preDecoration = ''; 297 | } 298 | 299 | switch (x.type) { 300 | case 'SubItem': 301 | let name = '%' + (parentName + subitems++); 302 | 303 | createRule(tmpRules, x, name, parentAttributes); 304 | 305 | bnfSeq.push(preDecoration + name + decoration); 306 | break; 307 | case 'NCName': 308 | bnfSeq.push(preDecoration + x.text + decoration); 309 | break; 310 | case 'StringLiteral': 311 | if (decoration || preDecoration || !/^['"/()a-zA-Z0-9&_.:=,+*\-\^\\]+$/.test(x.text)) { 312 | bnfSeq.push(preDecoration + x.text + decoration); 313 | } else { 314 | for (const c of x.text.slice(1, -1)) { 315 | if (parentAttributes && parentAttributes["ignoreCase"] == "true" && /[a-zA-Z]/.test(c)) { 316 | bnfSeq.push(new RegExp("[" + c.toUpperCase() + c.toLowerCase() + "]")); 317 | } 318 | else { 319 | bnfSeq.push(new RegExp(escapeRegExp(c))); 320 | } 321 | } 322 | } 323 | break; 324 | case 'CharCode': 325 | case 'CharClass': 326 | if (decoration || preDecoration) { 327 | let newRule: IRule = { 328 | name: '%' + (parentName + subitems++), 329 | bnf: [[convertRegex(x.text)]], 330 | pinned 331 | }; 332 | 333 | tmpRules.push(newRule); 334 | 335 | bnfSeq.push(preDecoration + newRule.name + decoration); 336 | } else { 337 | bnfSeq.push(convertRegex(x.text)); 338 | } 339 | break; 340 | case 'PrimaryPreDecoration': 341 | case 'PrimaryDecoration': 342 | break; 343 | default: 344 | throw new Error(' HOW SHOULD I PARSE THIS? ' + x.type + ' -> ' + JSON.stringify(x.text)); 345 | } 346 | 347 | anterior = x; 348 | }); 349 | 350 | return bnfSeq; 351 | } 352 | 353 | function createRule(tmpRules: IRule[], token: IToken, name: string, parentAttributes: any = undefined) { 354 | let attrNode = token.children.filter(x => x.type == 'Attributes')[0]; 355 | 356 | let attributes: any = {}; 357 | 358 | if (attrNode) { 359 | attrNode.children.forEach(x => { 360 | let name = x.children.filter(x => x.type == 'NCName')[0].text; 361 | if (name in attributes) { 362 | throw new TokenError('Duplicated attribute ' + name, x); 363 | } else { 364 | attributes[name] = x.children.filter(x => x.type == 'AttributeValue')[0].text; 365 | } 366 | }); 367 | } 368 | 369 | let bnf = token.children.filter(x => x.type == 'SequenceOrDifference').map(s => getSubItems(tmpRules, s, name, parentAttributes ? parentAttributes : attributes)); 370 | 371 | let rule: IRule = { 372 | name, 373 | bnf 374 | }; 375 | 376 | if (name.indexOf('%') == 0) rule.fragment = true; 377 | 378 | if (attributes['recoverUntil']) { 379 | rule.recover = attributes['recoverUntil']; 380 | if (rule.bnf.length > 1) 381 | throw new TokenError('only one-option productions are suitable for error recovering', token); 382 | } 383 | 384 | if ('pin' in attributes) { 385 | let num = parseInt(attributes['pin']); 386 | if (!isNaN(num)) { 387 | rule.pinned = num; 388 | } 389 | if (rule.bnf.length > 1) throw new TokenError('only one-option productions are suitable for pinning', token); 390 | } 391 | 392 | if ('ws' in attributes) { 393 | rule.implicitWs = attributes['ws'] != 'explicit'; 394 | } else { 395 | rule.implicitWs = null; 396 | } 397 | 398 | rule.fragment = rule.fragment || attributes['fragment'] == 'true'; 399 | 400 | rule.simplifyWhenOneChildren = attributes['simplifyWhenOneChildren'] == 'true'; 401 | 402 | tmpRules.push(rule); 403 | } 404 | 405 | export function getRules(source: string, parser: _Parser = defaultParser): IRule[] { 406 | let ast = parser.getAST(source); 407 | 408 | if (!ast) throw new Error('Could not parse ' + source); 409 | 410 | if (ast.errors && ast.errors.length) { 411 | throw ast.errors[0]; 412 | } 413 | 414 | let implicitWs = null; 415 | 416 | let attrNode = ast.children.filter(x => x.type == 'Attributes')[0]; 417 | 418 | let attributes: any = {}; 419 | 420 | if (attrNode) { 421 | attrNode.children.forEach(x => { 422 | let name = x.children.filter(x => x.type == 'NCName')[0].text; 423 | if (name in attributes) { 424 | throw new TokenError('Duplicated attribute ' + name, x); 425 | } else { 426 | attributes[name] = x.children.filter(x => x.type == 'AttributeValue')[0].text; 427 | } 428 | }); 429 | } 430 | 431 | implicitWs = attributes['ws'] == 'implicit'; 432 | 433 | let tmpRules: IRule[] = []; 434 | 435 | ast.children.filter(x => x.type == 'Production').map((x: any) => { 436 | let name = x.children.filter(x => x.type == 'NCName')[0].text; 437 | createRule(tmpRules, x, name); 438 | }); 439 | 440 | tmpRules.forEach(rule => { 441 | if (rule.implicitWs === null) rule.implicitWs = implicitWs; 442 | }); 443 | 444 | return tmpRules; 445 | } 446 | 447 | export function Transform(source: TemplateStringsArray, subParser: _Parser = defaultParser): IRule[] { 448 | return getRules(source.join(''), subParser); 449 | } 450 | 451 | export class Parser extends _Parser { 452 | constructor(source: string, options?: Partial) { 453 | const subParser = options && options.debugRulesParser === true ? new _Parser(BNF.RULES, { debug: true }) : defaultParser; 454 | super(getRules(source, subParser), options); 455 | } 456 | emitSource(): string { 457 | return emit(this); 458 | } 459 | } 460 | } 461 | 462 | export default BNF; 463 | -------------------------------------------------------------------------------- /src/Parser.ts: -------------------------------------------------------------------------------- 1 | // https://www.ics.uci.edu/~pattis/ICS-33/lectures/ebnf.pdf 2 | 3 | const UPPER_SNAKE_RE = /^[A-Z0-9_]+$/; 4 | const decorationRE = /(\?|\+|\*)$/; 5 | const preDecorationRE = /^(@|&|!)/; 6 | const WS_RULE = 'WS'; 7 | 8 | import { TokenError } from './TokenError'; 9 | 10 | export type RulePrimary = string | RegExp; 11 | 12 | export interface IRule { 13 | name: string; 14 | bnf: RulePrimary[][]; 15 | recover?: string; 16 | fragment?: boolean; 17 | pinned?: number; 18 | implicitWs?: boolean; 19 | simplifyWhenOneChildren?: boolean; 20 | } 21 | 22 | export interface IToken { 23 | type: string; 24 | text: string; 25 | start: number; 26 | end: number; 27 | children: IToken[]; 28 | parent: IToken; 29 | fullText: string; 30 | errors: TokenError[]; 31 | rest: string; 32 | fragment?: boolean; 33 | lookup?: boolean; 34 | } 35 | 36 | export function readToken(txt: string, expr: RegExp): IToken { 37 | let result = expr.exec(txt); 38 | 39 | if (result && result.index == 0) { 40 | if (result[0].length == 0 && expr.source.length > 0) return null; 41 | return { 42 | type: null, 43 | text: result[0], 44 | rest: txt.substr(result[0].length), 45 | start: 0, 46 | end: result[0].length - 1, 47 | fullText: result[0], 48 | errors: [], 49 | children: [], 50 | parent: null 51 | }; 52 | } 53 | 54 | return null; 55 | } 56 | 57 | export function escapeRegExp(str) { 58 | return str.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, '\\$&'); 59 | } 60 | 61 | function fixRest(token: IToken) { 62 | token.rest = ''; 63 | token.children && token.children.forEach(c => fixRest(c)); 64 | } 65 | 66 | function fixPositions(token: IToken, start: number) { 67 | token.start += start; 68 | token.end += start; 69 | token.children && token.children.forEach(c => fixPositions(c, token.start)); 70 | } 71 | 72 | function agregateErrors(errors: any[], token: IToken) { 73 | if (token.errors && token.errors.length) token.errors.forEach(err => errors.push(err)); 74 | 75 | token.children && token.children.forEach(tok => agregateErrors(errors, tok)); 76 | } 77 | export function parseRuleName(name: string) { 78 | let postDecoration = decorationRE.exec(name); 79 | let preDecoration = preDecorationRE.exec(name); 80 | 81 | let postDecorationText = (postDecoration && postDecoration[0]) || ''; 82 | let preDecorationText = (preDecoration && preDecoration[0]) || ''; 83 | 84 | let out = { 85 | raw: name, 86 | name: name.replace(decorationRE, '').replace(preDecorationRE, ''), 87 | isOptional: postDecorationText == '?' || postDecorationText == '*', 88 | allowRepetition: postDecorationText == '+' || postDecorationText == '*', 89 | atLeastOne: postDecorationText == '+', 90 | lookupPositive: preDecorationText == '&', 91 | lookupNegative: preDecorationText == '!', 92 | pinned: preDecorationText == '@', 93 | lookup: false, 94 | isLiteral: false 95 | }; 96 | 97 | out.isLiteral = out.name[0] == "'" || out.name[0] == '"'; 98 | out.lookup = out.lookupNegative || out.lookupPositive; 99 | 100 | return out; 101 | } 102 | 103 | export function findRuleByName(name: string, parser: Parser): IRule { 104 | let parsed = parseRuleName(name); 105 | 106 | return parser.cachedRules[parsed.name] || null; 107 | } 108 | 109 | /// Removes all the nodes starting with 'RULE_' 110 | function stripRules(token: IToken, re: RegExp) { 111 | if (token.children) { 112 | let localRules = token.children.filter(x => x.type && re.test(x.type)); 113 | for (let i = 0; i < localRules.length; i++) { 114 | let indexOnChildren = token.children.indexOf(localRules[i]); 115 | if (indexOnChildren != -1) { 116 | token.children.splice(indexOnChildren, 1); 117 | } 118 | } 119 | 120 | token.children.forEach(c => stripRules(c, re)); 121 | } 122 | } 123 | 124 | export interface IDictionary { 125 | [s: string]: T; 126 | } 127 | export interface IParserOptions { 128 | keepUpperRules: boolean; 129 | debug: boolean; 130 | } 131 | 132 | const ignoreMissingRules = ['EOF']; 133 | 134 | export class Parser { 135 | private readonly debug; 136 | 137 | cachedRules: IDictionary = {}; 138 | constructor(public grammarRules: IRule[], public options?: Partial) { 139 | this.debug = options ? options.debug === true : false; 140 | let errors = []; 141 | 142 | let neededRules: string[] = []; 143 | 144 | grammarRules.forEach(rule => { 145 | let parsedName = parseRuleName(rule.name); 146 | 147 | if (parsedName.name in this.cachedRules) { 148 | errors.push('Duplicated rule ' + parsedName.name); 149 | return; 150 | } else { 151 | this.cachedRules[parsedName.name] = rule; 152 | } 153 | 154 | if (!rule.bnf || !rule.bnf.length) { 155 | let error = 'Missing rule content, rule: ' + rule.name; 156 | 157 | if (errors.indexOf(error) == -1) errors.push(error); 158 | } else { 159 | rule.bnf.forEach(options => { 160 | if (typeof options[0] === 'string') { 161 | let parsed = parseRuleName(options[0] as string); 162 | if (parsed.name == rule.name) { 163 | let error = 'Left recursion is not allowed, rule: ' + rule.name; 164 | 165 | if (errors.indexOf(error) == -1) errors.push(error); 166 | } 167 | } 168 | 169 | options.forEach(option => { 170 | if (typeof option == 'string') { 171 | let name = parseRuleName(option); 172 | if ( 173 | !name.isLiteral && 174 | neededRules.indexOf(name.name) == -1 && 175 | ignoreMissingRules.indexOf(name.name) == -1 176 | ) 177 | neededRules.push(name.name); 178 | } 179 | }); 180 | }); 181 | } 182 | 183 | if (WS_RULE == rule.name) rule.implicitWs = false; 184 | 185 | if (rule.implicitWs) { 186 | if (neededRules.indexOf(WS_RULE) == -1) neededRules.push(WS_RULE); 187 | } 188 | 189 | if (rule.recover) { 190 | if (neededRules.indexOf(rule.recover) == -1) neededRules.push(rule.recover); 191 | } 192 | }); 193 | 194 | neededRules.forEach(ruleName => { 195 | if (!(ruleName in this.cachedRules)) { 196 | errors.push('Missing rule ' + ruleName); 197 | } 198 | }); 199 | 200 | if (errors.length) throw new Error(errors.join('\n')); 201 | } 202 | 203 | getAST(txt: string, target?: string) { 204 | if (!target) { 205 | target = this.grammarRules.filter(x => !x.fragment && x.name.indexOf('%') != 0)[0].name; 206 | } 207 | 208 | let result = this.parse(txt, target); 209 | 210 | if (result) { 211 | agregateErrors(result.errors, result); 212 | fixPositions(result, 0); 213 | 214 | // REMOVE ALL THE TAGS MATCHING /^%/ 215 | stripRules(result, /^%/); 216 | 217 | if (!this.options || !this.options.keepUpperRules) stripRules(result, UPPER_SNAKE_RE); 218 | 219 | let rest = result.rest; 220 | 221 | if (rest) { 222 | new TokenError('Unexpected end of input: \n' + rest, result); 223 | } 224 | 225 | fixRest(result); 226 | 227 | result.rest = rest; 228 | } 229 | 230 | return result; 231 | } 232 | 233 | emitSource(): string { 234 | return 'CANNOT EMIT SOURCE FROM BASE Parser'; 235 | } 236 | 237 | parse(txt: string, target: string, recursion = 0): IToken { 238 | let out: IToken = null; 239 | 240 | let type = parseRuleName(target); 241 | 242 | let expr: RegExp; 243 | 244 | let printable = this.debug && /*!isLiteral &*/ !UPPER_SNAKE_RE.test(type.name); 245 | 246 | printable && 247 | console.log( 248 | new Array(recursion).join('│ ') + 'Trying to get ' + target + ' from ' + JSON.stringify(txt.split('\n')[0]) 249 | ); 250 | 251 | let realType = type.name; 252 | 253 | let targetLex = findRuleByName(type.name, this); 254 | 255 | if (type.name == 'EOF') { 256 | if (txt.length) { 257 | return null; 258 | } else if (txt.length == 0) { 259 | return { 260 | type: 'EOF', 261 | text: '', 262 | rest: '', 263 | start: 0, 264 | end: 0, 265 | fullText: '', 266 | errors: [], 267 | children: [], 268 | parent: null 269 | }; 270 | } 271 | } 272 | 273 | try { 274 | if (!targetLex && type.isLiteral) { 275 | 276 | 277 | let src: string = type.name.trim(); 278 | 279 | if (src.startsWith('"')) { 280 | src = JSON.parse(src); 281 | } else if (src.startsWith("'")) { 282 | src = src.replace(/^'(.+)'$/, '$1').replace(/\\'/g, "'"); 283 | } 284 | 285 | if (src === '') { 286 | return { 287 | type: '%%EMPTY%%', 288 | text: '', 289 | rest: txt, 290 | start: 0, 291 | end: 0, 292 | fullText: '', 293 | errors: [], 294 | children: [], 295 | parent: null 296 | }; 297 | } 298 | 299 | expr = new RegExp(escapeRegExp(src)); 300 | realType = null; 301 | } 302 | } catch (e) { 303 | if (e instanceof ReferenceError) { 304 | console.error(e); 305 | } 306 | return null; 307 | } 308 | 309 | if (expr) { 310 | let result = readToken(txt, expr); 311 | 312 | if (result) { 313 | result.type = realType; 314 | return result; 315 | } 316 | } else { 317 | let options = targetLex.bnf; 318 | 319 | if (options instanceof Array) { 320 | options.forEach(phases => { 321 | if (out) return; 322 | 323 | let pinned: IToken = null; 324 | 325 | let tmp: IToken = { 326 | type: type.name, 327 | text: '', 328 | children: [], 329 | end: 0, 330 | errors: [], 331 | fullText: '', 332 | parent: null, 333 | start: 0, 334 | rest: txt 335 | }; 336 | 337 | if (targetLex.fragment) tmp.fragment = true; 338 | 339 | let tmpTxt = txt; 340 | let position = 0; 341 | 342 | let allOptional = phases.length > 0; 343 | let foundSomething = false; 344 | 345 | for (let i = 0; i < phases.length; i++) { 346 | if (typeof phases[i] == 'string') { 347 | let localTarget = parseRuleName(phases[i] as string); 348 | 349 | allOptional = allOptional && localTarget.isOptional; 350 | 351 | let got: IToken; 352 | 353 | let foundAtLeastOne = false; 354 | 355 | do { 356 | got = null; 357 | 358 | if (targetLex.implicitWs) { 359 | got = this.parse(tmpTxt, localTarget.name, recursion + 1); 360 | 361 | if (!got) { 362 | let WS: IToken; 363 | 364 | do { 365 | WS = this.parse(tmpTxt, WS_RULE, recursion + 1); 366 | 367 | if (WS) { 368 | tmp.text = tmp.text + WS.text; 369 | tmp.end = tmp.text.length; 370 | 371 | WS.parent = tmp; 372 | tmp.children.push(WS); 373 | 374 | tmpTxt = tmpTxt.substr(WS.text.length); 375 | position += WS.text.length; 376 | } else { 377 | break; 378 | } 379 | } while (WS && WS.text.length); 380 | } 381 | } 382 | 383 | got = got || this.parse(tmpTxt, localTarget.name, recursion + 1); 384 | 385 | // rule ::= "true" ![a-zA-Z] 386 | // negative lookup, if it does not match, we should continue 387 | if (localTarget.lookupNegative) { 388 | if (got) return /* cancel this path */; 389 | break; 390 | } 391 | 392 | if (localTarget.lookupPositive) { 393 | if (!got) return; 394 | } 395 | 396 | if (!got) { 397 | if (localTarget.isOptional) break; 398 | if (localTarget.atLeastOne && foundAtLeastOne) break; 399 | } 400 | 401 | if (got && targetLex.pinned == i + 1) { 402 | pinned = got; 403 | printable && console.log(new Array(recursion + 1).join('│ ') + '└─ ' + got.type + ' PINNED'); 404 | } 405 | 406 | if (!got) got = this.parseRecovery(targetLex, tmpTxt, recursion + 1); 407 | 408 | if (!got) { 409 | if (pinned) { 410 | out = tmp; 411 | got = { 412 | type: 'SyntaxError', 413 | text: tmpTxt, 414 | children: [], 415 | end: tmpTxt.length, 416 | errors: [], 417 | fullText: '', 418 | parent: null, 419 | start: 0, 420 | rest: '' 421 | }; 422 | if (tmpTxt.length) { 423 | new TokenError(`Unexpected end of input. Expecting ${localTarget.name} Got: ${tmpTxt}`, got); 424 | } else { 425 | new TokenError(`Unexpected end of input. Missing ${localTarget.name}`, got); 426 | } 427 | printable && 428 | console.log( 429 | new Array(recursion + 1).join('│ ') + '└─ ' + got.type + ' ' + JSON.stringify(got.text) 430 | ); 431 | } else { 432 | return; 433 | } 434 | } 435 | 436 | foundAtLeastOne = true; 437 | foundSomething = true; 438 | 439 | if (got.type == '%%EMPTY%%') { 440 | break; 441 | } 442 | 443 | got.start += position; 444 | got.end += position; 445 | 446 | if (!localTarget.lookupPositive && got.type) { 447 | if (got.fragment) { 448 | got.children && 449 | got.children.forEach(x => { 450 | x.start += position; 451 | x.end += position; 452 | x.parent = tmp; 453 | tmp.children.push(x); 454 | }); 455 | } else { 456 | got.parent = tmp; 457 | tmp.children.push(got); 458 | } 459 | } 460 | 461 | if (localTarget.lookup) got.lookup = true; 462 | 463 | printable && 464 | console.log(new Array(recursion + 1).join('│ ') + '└─ ' + got.type + ' ' + JSON.stringify(got.text)); 465 | 466 | // Eat it from the input stream, only if it is not a lookup 467 | if (!localTarget.lookup && !got.lookup) { 468 | tmp.text = tmp.text + got.text; 469 | tmp.end = tmp.text.length; 470 | 471 | tmpTxt = tmpTxt.substr(got.text.length); 472 | position += got.text.length; 473 | } 474 | 475 | tmp.rest = tmpTxt; 476 | } while (got && localTarget.allowRepetition && tmpTxt.length && !got.lookup); 477 | } /* IS A REGEXP */ else { 478 | let got = readToken(tmpTxt, phases[i] as RegExp); 479 | 480 | if (!got) { 481 | return; 482 | } 483 | 484 | printable && 485 | console.log( 486 | new Array(recursion + 1).join('│ ') + '└> ' + JSON.stringify(got.text) + (phases[i] as RegExp).source 487 | ); 488 | 489 | foundSomething = true; 490 | 491 | got.start += position; 492 | got.end += position; 493 | 494 | tmp.text = tmp.text + got.text; 495 | tmp.end = tmp.text.length; 496 | 497 | tmpTxt = tmpTxt.substr(got.text.length); 498 | position += got.text.length; 499 | 500 | tmp.rest = tmpTxt; 501 | } 502 | } 503 | 504 | if (foundSomething) { 505 | out = tmp; 506 | 507 | printable && 508 | console.log( 509 | new Array(recursion).join('│ ') + '├<─┴< PUSHING ' + out.type + ' ' + JSON.stringify(out.text) 510 | ); 511 | } 512 | }); 513 | } 514 | 515 | if (out && targetLex.simplifyWhenOneChildren && out.children.length == 1) { 516 | out = out.children[0]; 517 | } 518 | } 519 | 520 | if (!out) { 521 | printable && console.log(target + ' NOT RESOLVED FROM ' + txt); 522 | } 523 | 524 | return out; 525 | } 526 | 527 | private parseRecovery(recoverableToken: IRule, tmpTxt: string, recursion: number): IToken { 528 | if (recoverableToken.recover && tmpTxt.length) { 529 | let printable = this.debug; 530 | 531 | printable && 532 | console.log( 533 | new Array(recursion + 1).join('│ ') + 534 | 'Trying to recover until token ' + 535 | recoverableToken.recover + 536 | ' from ' + 537 | JSON.stringify(tmpTxt.split('\n')[0] + tmpTxt.split('\n')[1]) 538 | ); 539 | 540 | let tmp: IToken = { 541 | type: 'SyntaxError', 542 | text: '', 543 | children: [], 544 | end: 0, 545 | errors: [], 546 | fullText: '', 547 | parent: null, 548 | start: 0, 549 | rest: '' 550 | }; 551 | 552 | let got: IToken; 553 | 554 | do { 555 | got = this.parse(tmpTxt, recoverableToken.recover, recursion + 1); 556 | 557 | if (got) { 558 | new TokenError('Unexpected input: "' + tmp.text + `" Expecting: ${recoverableToken.name}`, tmp); 559 | break; 560 | } else { 561 | tmp.text = tmp.text + tmpTxt[0]; 562 | tmp.end = tmp.text.length; 563 | tmpTxt = tmpTxt.substr(1); 564 | } 565 | } while (!got && tmpTxt.length > 0); 566 | 567 | if (tmp.text.length > 0 && got) { 568 | printable && console.log(new Array(recursion + 1).join('│ ') + 'Recovered text: ' + JSON.stringify(tmp.text)); 569 | return tmp; 570 | } 571 | } 572 | return null; 573 | } 574 | } 575 | 576 | export default Parser; 577 | --------------------------------------------------------------------------------