├── .gitattributes ├── babel.js ├── macro.js ├── docs └── reghex-logo.png ├── .gitignore ├── src ├── babel │ ├── macro.js │ ├── plugin.js │ ├── plugin.test.js │ ├── transform.js │ └── __snapshots__ │ │ └── plugin.test.js.snap ├── parser.js ├── core.js ├── parser.test.js ├── codegen.js └── core.test.js ├── .github └── workflows │ └── mirror.yml ├── LICENSE.md ├── scripts └── simplify-jstags-plugin.js ├── rollup.config.js ├── package.json └── README.md /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | -------------------------------------------------------------------------------- /babel.js: -------------------------------------------------------------------------------- 1 | module.exports = require('./dist/reghex-babel.js'); 2 | -------------------------------------------------------------------------------- /macro.js: -------------------------------------------------------------------------------- 1 | module.exports = require('./dist/reghex-macro.js').default; 2 | -------------------------------------------------------------------------------- /docs/reghex-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0no-co/reghex/HEAD/docs/reghex-logo.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.vscode 2 | *.log 3 | .rts2_cache* 4 | dist/ 5 | node_modules/ 6 | package-lock.json 7 | .DS_Store 8 | .next 9 | -------------------------------------------------------------------------------- /src/babel/macro.js: -------------------------------------------------------------------------------- 1 | import { createMacro } from 'babel-plugin-macros'; 2 | import { makeHelpers } from './transform'; 3 | 4 | function reghexMacro({ references, babel }) { 5 | const helpers = makeHelpers(babel); 6 | const defaultRefs = references.default || []; 7 | 8 | defaultRefs.forEach((ref) => { 9 | if (!t.isCallExpression(ref.parentPath.node)) return; 10 | const path = ref.parentPath.parentPath; 11 | if (!helpers.isMatch(path)) return; 12 | 13 | const importPath = helpers.getMatchImport(path); 14 | if (!importPath) return; 15 | 16 | helpers.updateImport(importPath); 17 | helpers.transformMatch(path); 18 | }); 19 | 20 | return { 21 | keepImports: true, 22 | }; 23 | } 24 | 25 | export default createMacro(reghexMacro); 26 | -------------------------------------------------------------------------------- /src/babel/plugin.js: -------------------------------------------------------------------------------- 1 | import { makeHelpers } from './transform'; 2 | 3 | export default function reghexPlugin(babel, opts = {}) { 4 | let helpers; 5 | 6 | return { 7 | name: 'reghex', 8 | visitor: { 9 | Program() { 10 | helpers = makeHelpers(babel); 11 | }, 12 | ImportDeclaration(path) { 13 | if (opts.codegen === false) return; 14 | helpers.updateImport(path); 15 | }, 16 | TaggedTemplateExpression(path) { 17 | if (helpers.isMatch(path) && helpers.getMatchImport(path)) { 18 | if (opts.codegen === false) { 19 | helpers.minifyMatch(path); 20 | } else { 21 | helpers.transformMatch(path); 22 | } 23 | } 24 | }, 25 | }, 26 | }; 27 | } 28 | -------------------------------------------------------------------------------- /.github/workflows/mirror.yml: -------------------------------------------------------------------------------- 1 | # Mirrors to https://tangled.sh/@kitten.sh (knot.kitten.sh) 2 | name: Mirror (Git Backup) 3 | on: 4 | push: 5 | branches: 6 | - main 7 | jobs: 8 | mirror: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Checkout repository 12 | uses: actions/checkout@v4 13 | with: 14 | fetch-depth: 0 15 | fetch-tags: true 16 | - name: Mirror 17 | env: 18 | MIRROR_SSH_KEY: ${{ secrets.MIRROR_SSH_KEY }} 19 | GIT_SSH_COMMAND: 'ssh -o StrictHostKeyChecking=yes' 20 | run: | 21 | mkdir -p ~/.ssh 22 | echo "$MIRROR_SSH_KEY" > ~/.ssh/id_rsa 23 | chmod 600 ~/.ssh/id_rsa 24 | ssh-keyscan -H knot.kitten.sh >> ~/.ssh/known_hosts 25 | git remote add mirror "git@knot.kitten.sh:kitten.sh/${GITHUB_REPOSITORY#*/}" 26 | git push --mirror mirror 27 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Phil Plückthun 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /scripts/simplify-jstags-plugin.js: -------------------------------------------------------------------------------- 1 | import { transformSync as transform } from '@babel/core'; 2 | import { createFilter } from '@rollup/pluginutils'; 3 | 4 | import transformTemplateLiterals from '@babel/plugin-transform-template-literals'; 5 | import eliminateClosures from 'babel-plugin-closure-elimination'; 6 | 7 | const simplifyJSTags = ({ types: t }) => ({ 8 | visitor: { 9 | TaggedTemplateExpression(path) { 10 | if (path.node.tag.name !== 'js') return; 11 | 12 | const expressions = path.node.quasi.expressions; 13 | 14 | const quasis = path.node.quasi.quasis.map((x) => 15 | x.value.cooked 16 | .replace(/\s*[=(){},;:!]\s*/g, (x) => x.trim()) 17 | .replace(/\s+/g, ' ') 18 | .replace(/^\s+$/g, '') 19 | ); 20 | 21 | const concat = expressions.reduceRight( 22 | (prev, node, i) => 23 | t.binaryExpression( 24 | '+', 25 | t.stringLiteral(quasis[i]), 26 | t.binaryExpression('+', node, prev) 27 | ), 28 | t.stringLiteral(quasis[quasis.length - 1]) 29 | ); 30 | 31 | path.replaceWith(concat); 32 | }, 33 | }, 34 | }); 35 | 36 | function simplifyJSTagsPlugin(opts = {}) { 37 | const filter = createFilter(opts.include, opts.exclude, { 38 | resolve: false, 39 | }); 40 | 41 | return { 42 | name: 'cleanup', 43 | 44 | renderChunk(code, chunk) { 45 | if (!filter(chunk.fileName)) { 46 | return null; 47 | } 48 | 49 | return transform(code, { 50 | plugins: [ 51 | simplifyJSTags, 52 | [transformTemplateLiterals, { loose: true }], 53 | eliminateClosures, 54 | ], 55 | babelrc: false, 56 | }); 57 | }, 58 | }; 59 | } 60 | 61 | export default simplifyJSTagsPlugin; 62 | -------------------------------------------------------------------------------- /rollup.config.js: -------------------------------------------------------------------------------- 1 | import commonjs from '@rollup/plugin-commonjs'; 2 | import resolve from '@rollup/plugin-node-resolve'; 3 | import buble from '@rollup/plugin-buble'; 4 | import compiler from '@ampproject/rollup-plugin-closure-compiler'; 5 | 6 | import simplifyJSTags from './scripts/simplify-jstags-plugin.js'; 7 | 8 | const plugins = [ 9 | commonjs({ 10 | ignoreGlobal: true, 11 | include: ['*', '**'], 12 | extensions: ['.js', '.ts', '.tsx'], 13 | }), 14 | resolve({ 15 | mainFields: ['module', 'jsnext', 'main'], 16 | extensions: ['.js', '.ts', '.tsx'], 17 | browser: true, 18 | }), 19 | buble({ 20 | transforms: { 21 | unicodeRegExp: false, 22 | dangerousForOf: true, 23 | templateString: false, 24 | }, 25 | exclude: 'node_modules/**', 26 | }), 27 | ]; 28 | 29 | const output = (format = 'cjs', ext = '.js') => ({ 30 | chunkFileNames: '[hash]' + ext, 31 | entryFileNames: 'reghex-[name]' + ext, 32 | dir: './dist', 33 | exports: 'named', 34 | externalLiveBindings: false, 35 | sourcemap: true, 36 | esModule: false, 37 | indent: false, 38 | freeze: false, 39 | strict: false, 40 | format, 41 | plugins: [ 42 | simplifyJSTags(), 43 | compiler({ 44 | formatting: 'PRETTY_PRINT', 45 | compilation_level: 'SIMPLE_OPTIMIZATIONS', 46 | }), 47 | ], 48 | }); 49 | 50 | const base = { 51 | onwarn: () => {}, 52 | external: () => false, 53 | treeshake: { 54 | propertyReadSideEffects: false, 55 | }, 56 | plugins, 57 | output: [output('cjs', '.js'), output('esm', '.mjs')], 58 | }; 59 | 60 | export default [ 61 | { 62 | ...base, 63 | input: { 64 | core: './src/core.js', 65 | }, 66 | }, 67 | { 68 | ...base, 69 | output: { 70 | ...output('cjs', '.js'), 71 | exports: 'default', 72 | }, 73 | input: { 74 | babel: './src/babel/plugin.js', 75 | macro: './src/babel/macro.js', 76 | }, 77 | }, 78 | ]; 79 | -------------------------------------------------------------------------------- /src/parser.js: -------------------------------------------------------------------------------- 1 | const syntaxError = (char) => { 2 | throw new SyntaxError('Unexpected token "' + char + '"'); 3 | }; 4 | 5 | export const parse = (quasis, expressions) => { 6 | let quasiIndex = 0; 7 | let stackIndex = 0; 8 | 9 | const sequenceStack = []; 10 | const rootSequence = []; 11 | 12 | let currentGroup = null; 13 | let lastMatch; 14 | let currentSequence = rootSequence; 15 | let capture; 16 | 17 | for ( 18 | let quasiIndex = 0, stackIndex = 0; 19 | stackIndex < quasis.length + expressions.length; 20 | stackIndex++ 21 | ) { 22 | if (stackIndex % 2 !== 0) { 23 | const expression = expressions[stackIndex++ >> 1]; 24 | currentSequence.push({ expression, capture }); 25 | capture = undefined; 26 | } 27 | 28 | const quasi = quasis[stackIndex >> 1]; 29 | for (quasiIndex = 0; quasiIndex < quasi.length; ) { 30 | const char = quasi[quasiIndex++]; 31 | if (char === ' ' || char === '\t' || char === '\r' || char === '\n') { 32 | } else if (char === '|' && currentSequence.length) { 33 | currentSequence = currentSequence.alternation = []; 34 | } else if (char === ')' && currentSequence.length) { 35 | currentGroup = null; 36 | currentSequence = sequenceStack.pop(); 37 | if (!currentSequence) syntaxError(char); 38 | } else if (char === '(') { 39 | sequenceStack.push(currentSequence); 40 | currentSequence.push((currentGroup = { sequence: [], capture })); 41 | currentSequence = currentGroup.sequence; 42 | capture = undefined; 43 | } else if (char === ':' || char === '=' || char === '!') { 44 | capture = char; 45 | const nextChar = quasi[quasiIndex]; 46 | if (quasi[quasiIndex] && quasi[quasiIndex] !== '(') syntaxError(char); 47 | } else if (char === '?' && !currentSequence.length && currentGroup) { 48 | capture = quasi[quasiIndex++]; 49 | if (capture === ':' || capture === '=' || capture === '!') { 50 | currentGroup.capture = capture; 51 | capture = undefined; 52 | } else { 53 | syntaxError(char); 54 | } 55 | } else if ( 56 | (char === '?' || char === '+' || char === '*') && 57 | (lastMatch = currentSequence[currentSequence.length - 1]) 58 | ) { 59 | lastMatch.quantifier = char; 60 | } else { 61 | syntaxError(char); 62 | } 63 | } 64 | } 65 | 66 | return rootSequence; 67 | }; 68 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "reghex", 3 | "version": "3.0.2", 4 | "description": "The magical sticky regex-based parser generator 🧙", 5 | "author": "Phil Pluckthun ", 6 | "license": "MIT", 7 | "main": "dist/reghex-core", 8 | "module": "dist/reghex-core.mjs", 9 | "source": "src/core.js", 10 | "sideEffects": false, 11 | "files": [ 12 | "README.md", 13 | "LICENSE.md", 14 | "dist", 15 | "src", 16 | "babel.js", 17 | "macro.js" 18 | ], 19 | "exports": { 20 | ".": { 21 | "import": "./dist/reghex-core.mjs", 22 | "require": "./dist/reghex-core.js" 23 | }, 24 | "./babel": { 25 | "require": "./dist/reghex-babel.js" 26 | }, 27 | "./macro": { 28 | "require": "./dist/reghex-macro.js" 29 | }, 30 | "./package.json": "./package.json" 31 | }, 32 | "scripts": { 33 | "prepublishOnly": "run-s clean build test", 34 | "clean": "rimraf dist ./node_modules/.cache", 35 | "build": "rollup -c rollup.config.js", 36 | "test": "jest" 37 | }, 38 | "keywords": [ 39 | "regex", 40 | "sticky regex", 41 | "parser", 42 | "parser generator", 43 | "babel" 44 | ], 45 | "repository": "https://github.com/kitten/reghex", 46 | "bugs": { 47 | "url": "https://github.com/kitten/reghex/issues" 48 | }, 49 | "devDependencies": { 50 | "@ampproject/rollup-plugin-closure-compiler": "^0.27.0", 51 | "@babel/core": "7.15.0", 52 | "@babel/plugin-transform-modules-commonjs": "^7.15.0", 53 | "@babel/plugin-transform-template-literals": "^7.14.5", 54 | "@rollup/plugin-buble": "^0.21.3", 55 | "@rollup/plugin-commonjs": "^20.0.0", 56 | "@rollup/plugin-node-resolve": "^13.0.4", 57 | "@rollup/pluginutils": "^4.1.1", 58 | "@sucrase/jest-plugin": "^2.1.1", 59 | "babel-jest": "^27.1.0", 60 | "babel-plugin-closure-elimination": "^1.3.2", 61 | "husky-v4": "^4.3.8", 62 | "jest": "^27.1.0", 63 | "lint-staged": "^11.1.2", 64 | "npm-run-all": "^4.1.5", 65 | "prettier": "^2.3.2", 66 | "rimraf": "^3.0.2", 67 | "rollup": "^2.56.3" 68 | }, 69 | "prettier": { 70 | "singleQuote": true 71 | }, 72 | "lint-staged": { 73 | "*.{js,jsx,json,md}": "prettier --write" 74 | }, 75 | "husky": { 76 | "hooks": { 77 | "pre-commit": "lint-staged --quiet --relative" 78 | } 79 | }, 80 | "jest": { 81 | "testEnvironment": "node", 82 | "transform": { 83 | "\\.js$": "@sucrase/jest-plugin" 84 | } 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /src/core.js: -------------------------------------------------------------------------------- 1 | import { astRoot } from './codegen'; 2 | import { parse as parseDSL } from './parser'; 3 | 4 | const isStickySupported = typeof /./g.sticky === 'boolean'; 5 | 6 | const execLambda = (pattern) => { 7 | if (pattern.length) return pattern; 8 | return (state) => pattern()(state); 9 | }; 10 | 11 | const execString = (pattern) => { 12 | return (state) => { 13 | if (state.x < state.quasis.length) { 14 | const input = state.quasis[state.x]; 15 | for (let i = 0, l = pattern.length; i < l; i++) 16 | if (input.charCodeAt(state.y + i) !== pattern.charCodeAt(i)) 17 | return null; 18 | state.y += pattern.length; 19 | return pattern; 20 | } 21 | }; 22 | }; 23 | 24 | const execRegex = (pattern) => { 25 | pattern = isStickySupported 26 | ? new RegExp(pattern.source, 'y') 27 | : new RegExp(pattern.source + '|()', 'g'); 28 | return (state) => { 29 | if (state.x < state.quasis.length) { 30 | const input = state.quasis[state.x]; 31 | pattern.lastIndex = state.y; 32 | let match; 33 | if (isStickySupported) { 34 | if (pattern.test(input)) 35 | match = input.slice(state.y, pattern.lastIndex); 36 | } else { 37 | const x = pattern.exec(input); 38 | if (x[1] == null) match = x[0]; 39 | } 40 | 41 | state.y = pattern.lastIndex; 42 | return match; 43 | } 44 | }; 45 | }; 46 | 47 | export const __pattern = (input) => { 48 | if (typeof input === 'function') { 49 | return execLambda(input); 50 | } else if (typeof input === 'string') { 51 | return execString(input); 52 | } else { 53 | return execRegex(input); 54 | } 55 | }; 56 | 57 | export const interpolation = (predicate) => (state) => { 58 | let match; 59 | 60 | if ( 61 | state.x < state.expressions.length && 62 | state.y >= state.quasis[state.x].length 63 | ) { 64 | state.y = 0; 65 | match = state.expressions[state.x++]; 66 | if (predicate && match) match = predicate(match); 67 | } 68 | 69 | return match; 70 | }; 71 | 72 | export const parse = (matcher) => (quasis, ...expressions) => { 73 | if (typeof quasis === 'string') quasis = [quasis]; 74 | const state = { quasis, expressions, x: 0, y: 0 }; 75 | return matcher(state); 76 | }; 77 | 78 | export const match = (name, transform) => (quasis, ...expressions) => { 79 | const ast = parseDSL( 80 | quasis, 81 | expressions.map((_, i) => ({ id: `_${i}` })) 82 | ); 83 | return new Function( 84 | '_n,_t,' + expressions.map((_expression, i) => `_${i}`).join(','), 85 | 'return ' + astRoot(ast, '_n', transform ? '_t' : null) 86 | )(name, transform, ...expressions.map(__pattern)); 87 | }; 88 | -------------------------------------------------------------------------------- /src/babel/plugin.test.js: -------------------------------------------------------------------------------- 1 | import { transform } from '@babel/core'; 2 | import reghexPlugin from './plugin'; 3 | 4 | it('works with standard features', () => { 5 | const code = ` 6 | import { match } from 'reghex/macro'; 7 | 8 | const node = match('node')\` 9 | \${1}+ | \${2}+ (\${3} ( \${4}? \${5} ) )* 10 | \`; 11 | `; 12 | 13 | expect( 14 | transform(code, { babelrc: false, presets: [], plugins: [reghexPlugin] }) 15 | .code 16 | ).toMatchSnapshot(); 17 | }); 18 | 19 | it('works with nameless matchers', () => { 20 | const code = ` 21 | import { match } from 'reghex/macro'; 22 | 23 | const node = match()\` 24 | \${1}+ | \${2}+ (\${3} ( \${4}? \${5} ) )* 25 | \`; 26 | `; 27 | 28 | expect( 29 | transform(code, { babelrc: false, presets: [], plugins: [reghexPlugin] }) 30 | .code 31 | ).toMatchSnapshot(); 32 | }); 33 | 34 | it('works while only minifying', () => { 35 | const code = ` 36 | import { match } from 'reghex/macro'; 37 | 38 | const node = match('node')\` 39 | \${1}+ | \${2}+ (\${3} ( \${4}? \${5} ) )* 40 | \`; 41 | `; 42 | 43 | expect( 44 | transform(code, { 45 | babelrc: false, 46 | presets: [], 47 | plugins: [[reghexPlugin, { codegen: false }]], 48 | }).code 49 | ).toMatchSnapshot(); 50 | }); 51 | 52 | it('deduplicates hoisted expressions', () => { 53 | const code = ` 54 | import { match } from 'reghex/macro'; 55 | 56 | const re = /1/; 57 | const str = '1'; 58 | 59 | const a = match('a')\` 60 | \${re} 61 | \${str} 62 | \`; 63 | 64 | const b = match('b')\` 65 | \${re} 66 | \${'2'} 67 | \`; 68 | `; 69 | 70 | expect( 71 | transform(code, { babelrc: false, presets: [], plugins: [reghexPlugin] }) 72 | .code 73 | ).toMatchSnapshot(); 74 | }); 75 | 76 | it('works with local recursion', () => { 77 | // NOTE: A different default name is allowed 78 | const code = ` 79 | import { match as m, tag } from 'reghex'; 80 | 81 | const inner = m('inner')\` 82 | \${/inner/} 83 | \`; 84 | 85 | const node = m('node')\` 86 | \${inner} 87 | \`; 88 | `; 89 | 90 | expect( 91 | transform(code, { babelrc: false, presets: [], plugins: [reghexPlugin] }) 92 | .code 93 | ).toMatchSnapshot(); 94 | }); 95 | 96 | it('works with self-referential thunks', () => { 97 | const code = ` 98 | import { match, tag } from 'reghex'; 99 | 100 | const inner = match('inner')\` 101 | \${() => node} 102 | \`; 103 | 104 | const node = match('node')\` 105 | \${inner} 106 | \`; 107 | `; 108 | 109 | expect( 110 | transform(code, { babelrc: false, presets: [], plugins: [reghexPlugin] }) 111 | .code 112 | ).toMatchSnapshot(); 113 | }); 114 | 115 | it('works with transform functions', () => { 116 | const code = ` 117 | import { match } from 'reghex'; 118 | 119 | const first = match('inner', x => x)\`\`; 120 | 121 | const transform = x => x; 122 | const second = match('node', transform)\`\`; 123 | `; 124 | 125 | expect( 126 | transform(code, { babelrc: false, presets: [], plugins: [reghexPlugin] }) 127 | .code 128 | ).toMatchSnapshot(); 129 | }); 130 | 131 | it('works with non-capturing groups', () => { 132 | const code = ` 133 | import { match } from 'reghex'; 134 | 135 | const node = match('node')\` 136 | \${1} (\${2} | (?: \${3})+) 137 | \`; 138 | `; 139 | 140 | expect( 141 | transform(code, { babelrc: false, presets: [], plugins: [reghexPlugin] }) 142 | .code 143 | ).toMatchSnapshot(); 144 | }); 145 | 146 | it('works together with @babel/plugin-transform-modules-commonjs', () => { 147 | const code = ` 148 | import { match } from 'reghex'; 149 | 150 | const node = match('node')\` 151 | \${1} \${2} 152 | \`; 153 | `; 154 | 155 | expect( 156 | transform(code, { 157 | babelrc: false, 158 | presets: [], 159 | plugins: [ 160 | reghexPlugin, 161 | [ 162 | '@babel/plugin-transform-modules-commonjs', 163 | { 164 | noInterop: true, 165 | loose: true, 166 | }, 167 | ], 168 | ], 169 | }).code 170 | ).toMatchSnapshot(); 171 | }); 172 | -------------------------------------------------------------------------------- /src/parser.test.js: -------------------------------------------------------------------------------- 1 | import { parse } from './parser'; 2 | 3 | const parseTag = (quasis, ...expressions) => parse(quasis, expressions); 4 | 5 | it('supports parsing expressions with quantifiers', () => { 6 | let ast; 7 | 8 | ast = parseTag`${1}?`; 9 | expect(ast).toHaveProperty('0.quantifier', '?'); 10 | 11 | ast = parseTag`${1}+`; 12 | expect(ast).toHaveProperty('0.quantifier', '+'); 13 | 14 | ast = parseTag`${1}*`; 15 | expect(ast).toHaveProperty('0.quantifier', '*'); 16 | }); 17 | 18 | it('supports top-level alternations', () => { 19 | let ast; 20 | 21 | ast = parseTag`${1} | ${2}`; 22 | expect(ast).toHaveProperty('length', 1); 23 | expect(ast).toHaveProperty('0.expression', 1); 24 | expect(ast).toHaveProperty('alternation.0.expression', 2); 25 | 26 | ast = parseTag`${1}? | ${2}?`; 27 | expect(ast).toHaveProperty('0.quantifier', '?'); 28 | }); 29 | 30 | it('supports groups with quantifiers', () => { 31 | let ast; 32 | 33 | ast = parseTag`(${1} ${2})`; 34 | expect(ast).toHaveProperty('length', 1); 35 | expect(ast).toHaveProperty('0.sequence.length', 2); 36 | expect(ast).toHaveProperty('0.sequence.0.expression', 1); 37 | expect(ast).toHaveProperty('0.sequence.1.expression', 2); 38 | 39 | ast = parseTag`(${1} ${2}?)?`; 40 | expect(ast).toHaveProperty('length', 1); 41 | expect(ast).toHaveProperty('0.quantifier', '?'); 42 | expect(ast).toHaveProperty('0.sequence.0.quantifier', undefined); 43 | }); 44 | 45 | describe('non-capturing syntax', () => { 46 | it('supports regex-like syntax', () => { 47 | const ast = parseTag`(?: ${1})`; 48 | expect(ast).toHaveProperty('length', 1); 49 | expect(ast).toHaveProperty('0.capture', ':'); 50 | expect(ast).toHaveProperty('0.sequence.length', 1); 51 | }); 52 | 53 | it('supports shorthand', () => { 54 | let ast = parseTag`:${1}`; 55 | expect(ast).toHaveProperty('length', 1); 56 | expect(ast).toHaveProperty('0.capture', ':'); 57 | expect(ast).toHaveProperty('0.expression', 1); 58 | ast = parseTag`:(${1})`; 59 | expect(ast).toHaveProperty('length', 1); 60 | expect(ast).toHaveProperty('0.capture', ':'); 61 | expect(ast).toHaveProperty('0.sequence.length', 1); 62 | }); 63 | 64 | it('fails on invalid usage', () => { 65 | expect(() => parseTag`${1} : ${2}`).toThrow(); 66 | expect(() => parseTag`${1} :|${2}`).toThrow(); 67 | }); 68 | }); 69 | 70 | describe('positive lookaheads syntax', () => { 71 | it('supports regex-like syntax', () => { 72 | const ast = parseTag`(?= ${1})`; 73 | expect(ast).toHaveProperty('length', 1); 74 | expect(ast).toHaveProperty('0.capture', '='); 75 | expect(ast).toHaveProperty('0.sequence.length', 1); 76 | }); 77 | 78 | it('supports shorthand', () => { 79 | let ast = parseTag`=${1}`; 80 | expect(ast).toHaveProperty('length', 1); 81 | expect(ast).toHaveProperty('0.capture', '='); 82 | expect(ast).toHaveProperty('0.expression', 1); 83 | ast = parseTag`=(${1})`; 84 | expect(ast).toHaveProperty('length', 1); 85 | expect(ast).toHaveProperty('0.capture', '='); 86 | expect(ast).toHaveProperty('0.sequence.length', 1); 87 | }); 88 | }); 89 | 90 | describe('negative lookaheads syntax', () => { 91 | it('supports regex-like syntax', () => { 92 | const ast = parseTag`(?! ${1})`; 93 | expect(ast).toHaveProperty('length', 1); 94 | expect(ast).toHaveProperty('0.capture', '!'); 95 | expect(ast).toHaveProperty('0.sequence.length', 1); 96 | }); 97 | 98 | it('supports shorthand', () => { 99 | let ast = parseTag`!${1}`; 100 | expect(ast).toHaveProperty('length', 1); 101 | expect(ast).toHaveProperty('0.capture', '!'); 102 | expect(ast).toHaveProperty('0.expression', 1); 103 | ast = parseTag`!(${1})`; 104 | expect(ast).toHaveProperty('length', 1); 105 | expect(ast).toHaveProperty('0.capture', '!'); 106 | expect(ast).toHaveProperty('0.sequence.length', 1); 107 | }); 108 | }); 109 | 110 | it('supports groups with alternates', () => { 111 | expect(parseTag`(${1} | ${2}) ${3}`).toMatchInlineSnapshot(` 112 | Array [ 113 | Object { 114 | "capture": undefined, 115 | "sequence": Array [ 116 | Object { 117 | "capture": undefined, 118 | "expression": 1, 119 | }, 120 | ], 121 | }, 122 | Object { 123 | "capture": undefined, 124 | "expression": 3, 125 | }, 126 | ] 127 | `); 128 | }); 129 | -------------------------------------------------------------------------------- /src/codegen.js: -------------------------------------------------------------------------------- 1 | const _state = 'state'; 2 | const _node = 'node'; 3 | const _match = 'x'; 4 | 5 | function js(/* arguments */) { 6 | let body = arguments[0][0]; 7 | for (let i = 1; i < arguments.length; i++) 8 | body = body + arguments[i] + arguments[0][i]; 9 | return body.trim(); 10 | } 11 | 12 | const copy = (prev) => { 13 | const next = {}; 14 | for (const key in prev) next[key] = prev[key]; 15 | return next; 16 | }; 17 | 18 | const assignIndex = (depth) => js` 19 | var y${depth} = ${_state}.y, 20 | x${depth} = ${_state}.x; 21 | `; 22 | 23 | const restoreIndex = (depth) => js` 24 | ${_state}.y = y${depth}; 25 | ${_state}.x = x${depth}; 26 | `; 27 | 28 | const astExpression = (ast, depth, opts) => { 29 | const capture = !!opts.capture && !ast.capture; 30 | const restoreLength = 31 | (opts.length && opts.abort && js`${_node}.length = ln${opts.length};`) || 32 | ''; 33 | const condition = `(${_match} = ${ast.expression.id}(${_state})) ${ 34 | capture ? '!=' : '==' 35 | } null`; 36 | return js` 37 | if (${condition}) ${ 38 | capture 39 | ? js`{ 40 | ${_node}.push(${_match}); 41 | } else ` 42 | : '' 43 | }{ 44 | ${restoreIndex(opts.index)} 45 | ${restoreLength} 46 | ${opts.abort} 47 | } 48 | `; 49 | }; 50 | 51 | const astGroup = (ast, depth, opts) => { 52 | const capture = !!opts.capture && !ast.capture; 53 | 54 | opts = copy(opts); 55 | opts.capture = capture; 56 | 57 | if (!opts.length && capture) { 58 | opts.length = depth; 59 | return js` 60 | ${js`var ln${depth} = ${_node}.length;`} 61 | ${astSequence(ast.sequence, depth + 1, opts)} 62 | `; 63 | } 64 | 65 | return astSequence(ast.sequence, depth + 1, opts); 66 | }; 67 | 68 | const astChild = (ast, depth, opts) => 69 | ast.expression ? astExpression(ast, depth, opts) : astGroup(ast, depth, opts); 70 | 71 | const astQuantifier = (ast, depth, opts) => { 72 | const { index, abort } = opts; 73 | const invert = `inv_${depth}`; 74 | const group = `group_${depth}`; 75 | 76 | opts = copy(opts); 77 | if (ast.capture === '!') { 78 | opts.index = depth; 79 | opts.abort = js`break ${invert}`; 80 | } 81 | 82 | let child; 83 | if (ast.quantifier === '+') { 84 | const starAst = copy(ast); 85 | starAst.quantifier = '*'; 86 | child = js` 87 | ${astChild(ast, depth, opts)} 88 | ${astQuantifier(starAst, depth, opts)} 89 | `; 90 | } else if (ast.quantifier === '*') { 91 | opts.length = 0; 92 | opts.index = depth; 93 | opts.abort = js`break ${group};`; 94 | 95 | child = js` 96 | ${group}: for (;;) { 97 | ${assignIndex(depth)} 98 | ${astChild(ast, depth, opts)} 99 | } 100 | `; 101 | } else if (ast.quantifier === '?' && ast.expression) { 102 | opts.index = depth; 103 | opts.abort = ''; 104 | 105 | child = js` 106 | ${assignIndex(depth)} 107 | ${astChild(ast, depth, opts)} 108 | `; 109 | } else if (ast.quantifier === '?') { 110 | opts.index = depth; 111 | opts.abort = js`break ${group}`; 112 | 113 | child = js` 114 | ${group}: { 115 | ${assignIndex(depth)} 116 | ${astChild(ast, depth, opts)} 117 | } 118 | `; 119 | } else { 120 | child = astChild(ast, depth, opts); 121 | } 122 | 123 | if (ast.capture === '!') { 124 | return js` 125 | ${invert}: { 126 | ${assignIndex(depth)} 127 | ${child} 128 | ${restoreIndex(index)} 129 | ${abort} 130 | } 131 | `; 132 | } else if (ast.capture === '=') { 133 | return js` 134 | ${assignIndex(depth)} 135 | ${child} 136 | ${restoreIndex(depth)} 137 | `; 138 | } else { 139 | return child; 140 | } 141 | }; 142 | 143 | const astSequence = (ast, depth, opts) => { 144 | const alternation = ast.alternation ? `alt_${depth}` : ''; 145 | 146 | let body = ''; 147 | for (; ast; ast = ast.alternation) { 148 | const block = `block_${depth}`; 149 | 150 | let childOpts = opts; 151 | if (ast.alternation) { 152 | childOpts = copy(opts); 153 | childOpts.index = depth; 154 | childOpts.abort = js`break ${block};`; 155 | } 156 | 157 | let sequence = ''; 158 | for (let i = 0; i < ast.length; i++) 159 | sequence += astQuantifier(ast[i], depth, childOpts); 160 | 161 | if (!ast.alternation) { 162 | body += sequence; 163 | } else { 164 | body += js` 165 | ${block}: { 166 | ${assignIndex(depth)} 167 | ${sequence} 168 | break ${alternation}; 169 | } 170 | `; 171 | } 172 | } 173 | 174 | if (!alternation) return body; 175 | 176 | return js` 177 | ${alternation}: { 178 | ${body} 179 | } 180 | `; 181 | }; 182 | 183 | const astRoot = (ast, name, transform) => { 184 | return js` 185 | (function (${_state}) { 186 | ${assignIndex(1)} 187 | var ${_node} = []; 188 | var ${_match}; 189 | 190 | ${astSequence(ast, 2, { 191 | index: 1, 192 | length: 0, 193 | abort: js`return;`, 194 | capture: true, 195 | })} 196 | 197 | if (${name}) ${_node}.tag = ${name}; 198 | return ${transform ? js`(${transform})(${_node})` : _node}; 199 | }) 200 | `; 201 | }; 202 | 203 | export { astRoot }; 204 | -------------------------------------------------------------------------------- /src/babel/transform.js: -------------------------------------------------------------------------------- 1 | import { astRoot } from '../codegen'; 2 | import { parse } from '../parser'; 3 | 4 | export function makeHelpers({ types: t, template }) { 5 | const regexPatternsRe = /^[()\[\]|.+?*]|[^\\][()\[\]|.+?*$^]|\\[wdsWDS]/; 6 | const importSourceRe = /reghex$|^reghex\/macro/; 7 | const importName = 'reghex'; 8 | 9 | let _hasUpdatedImport = false; 10 | let _matchId = t.identifier('match'); 11 | let _patternId = t.identifier('__pattern'); 12 | 13 | const _hoistedExpressions = new Map(); 14 | 15 | return { 16 | /** Adds the reghex import declaration to the Program scope */ 17 | updateImport(path) { 18 | if (_hasUpdatedImport) return; 19 | if (!importSourceRe.test(path.node.source.value)) return; 20 | _hasUpdatedImport = true; 21 | 22 | if (path.node.source.value !== importName) { 23 | path.node.source = t.stringLiteral(importName); 24 | } 25 | 26 | _patternId = path.scope.generateUidIdentifier('_pattern'); 27 | path.node.specifiers.push( 28 | t.importSpecifier(_patternId, t.identifier('__pattern')) 29 | ); 30 | 31 | const tagImport = path.node.specifiers.find((node) => { 32 | return t.isImportSpecifier(node) && node.imported.name === 'match'; 33 | }); 34 | 35 | if (!tagImport) { 36 | path.node.specifiers.push( 37 | t.importSpecifier( 38 | (_matchId = path.scope.generateUidIdentifier('match')), 39 | t.identifier('match') 40 | ) 41 | ); 42 | } else { 43 | _matchId = tagImport.imported; 44 | } 45 | }, 46 | 47 | /** Determines whether the given tagged template expression is a reghex match */ 48 | isMatch(path) { 49 | if ( 50 | t.isTaggedTemplateExpression(path.node) && 51 | t.isCallExpression(path.node.tag) && 52 | t.isIdentifier(path.node.tag.callee) && 53 | path.scope.hasBinding(path.node.tag.callee.name) 54 | ) { 55 | if (t.isVariableDeclarator(path.parentPath)) 56 | path.parentPath._isMatch = true; 57 | return true; 58 | } 59 | 60 | return ( 61 | t.isVariableDeclarator(path.parentPath) && path.parentPath._isMatch 62 | ); 63 | }, 64 | 65 | /** Given a reghex match, returns the path to reghex's match import declaration */ 66 | getMatchImport(path) { 67 | t.assertTaggedTemplateExpression(path.node); 68 | const binding = path.scope.getBinding(path.node.tag.callee.name); 69 | 70 | if ( 71 | binding.kind !== 'module' || 72 | !t.isImportDeclaration(binding.path.parent) || 73 | !importSourceRe.test(binding.path.parent.source.value) || 74 | !t.isImportSpecifier(binding.path.node) 75 | ) { 76 | return null; 77 | } 78 | 79 | return binding.path.parentPath; 80 | }, 81 | 82 | /** Given a match, returns an evaluated name or a best guess */ 83 | getMatchName(path) { 84 | t.assertTaggedTemplateExpression(path.node); 85 | const nameArgumentPath = path.get('tag.arguments.0'); 86 | if (nameArgumentPath) { 87 | const { confident, value } = nameArgumentPath.evaluate(); 88 | if (!confident && t.isIdentifier(nameArgumentPath.node)) { 89 | return nameArgumentPath.node.name; 90 | } else if (confident && typeof value === 'string') { 91 | return value; 92 | } 93 | } 94 | 95 | return path.scope.generateUidIdentifierBasedOnNode(path.node); 96 | }, 97 | 98 | /** Given a match, hoists its expressions in front of the match's statement */ 99 | _prepareExpressions(path) { 100 | t.assertTaggedTemplateExpression(path.node); 101 | 102 | const variableDeclarators = []; 103 | const matchName = this.getMatchName(path); 104 | 105 | const hoistedExpressions = path.node.quasi.expressions.map( 106 | (expression, i) => { 107 | if ( 108 | t.isArrowFunctionExpression(expression) && 109 | t.isIdentifier(expression.body) 110 | ) { 111 | expression = expression.body; 112 | } else if ( 113 | (t.isFunctionExpression(expression) || 114 | t.isArrowFunctionExpression(expression)) && 115 | t.isBlockStatement(expression.body) && 116 | expression.body.body.length === 1 && 117 | t.isReturnStatement(expression.body.body[0]) && 118 | t.isIdentifier(expression.body.body[0].argument) 119 | ) { 120 | expression = expression.body.body[0].argument; 121 | } 122 | 123 | const isBindingExpression = 124 | t.isIdentifier(expression) && 125 | path.scope.hasBinding(expression.name); 126 | if (isBindingExpression) { 127 | const binding = path.scope.getBinding(expression.name); 128 | if (t.isVariableDeclarator(binding.path.node)) { 129 | const matchPath = binding.path.get('init'); 130 | if (this.isMatch(matchPath)) { 131 | return expression; 132 | } else if (_hoistedExpressions.has(expression.name)) { 133 | return t.identifier(_hoistedExpressions.get(expression.name)); 134 | } 135 | } 136 | } 137 | 138 | const id = path.scope.generateUidIdentifier( 139 | isBindingExpression 140 | ? `${expression.name}_expression` 141 | : `${matchName}_expression` 142 | ); 143 | 144 | variableDeclarators.push( 145 | t.variableDeclarator( 146 | id, 147 | t.callExpression(t.identifier(_patternId.name), [expression]) 148 | ) 149 | ); 150 | 151 | if (t.isIdentifier(expression)) { 152 | _hoistedExpressions.set(expression.name, id.name); 153 | } 154 | 155 | return id; 156 | } 157 | ); 158 | 159 | if (variableDeclarators.length) { 160 | path 161 | .getStatementParent() 162 | .insertBefore(t.variableDeclaration('var', variableDeclarators)); 163 | } 164 | 165 | return hoistedExpressions.map((id) => { 166 | const binding = path.scope.getBinding(id.name); 167 | if (binding && t.isVariableDeclarator(binding.path.node)) { 168 | const matchPath = binding.path.get('init'); 169 | if (this.isMatch(matchPath)) { 170 | return { fn: true, id: id.name }; 171 | } 172 | } 173 | 174 | const input = t.isStringLiteral(id) 175 | ? JSON.stringify(id.value) 176 | : id.name; 177 | return { fn: false, id: input }; 178 | }); 179 | }, 180 | 181 | _prepareTransform(path) { 182 | const transformNode = path.node.tag.arguments[1]; 183 | 184 | if (!transformNode) return null; 185 | if (t.isIdentifier(transformNode)) return transformNode.name; 186 | 187 | const matchName = this.getMatchName(path); 188 | const id = path.scope.generateUidIdentifier(`${matchName}_transform`); 189 | const declarator = t.variableDeclarator(id, transformNode); 190 | 191 | path 192 | .getStatementParent() 193 | .insertBefore(t.variableDeclaration('var', [declarator])); 194 | 195 | return id.name; 196 | }, 197 | 198 | minifyMatch(path) { 199 | const quasis = path.node.quasi.quasis.map((x) => 200 | t.stringLiteral(x.value.cooked.replace(/\s*/g, '')) 201 | ); 202 | const expressions = path.node.quasi.expressions; 203 | const transform = this._prepareTransform(path); 204 | 205 | path.replaceWith( 206 | t.callExpression(path.node.tag, [ 207 | t.arrayExpression(quasis), 208 | ...expressions, 209 | ]) 210 | ); 211 | }, 212 | 213 | transformMatch(path) { 214 | let name = path.node.tag.arguments[0]; 215 | if (!name) { 216 | name = t.nullLiteral(); 217 | } 218 | 219 | const quasis = path.node.quasi.quasis.map((x) => x.value.cooked); 220 | 221 | const expressions = this._prepareExpressions(path); 222 | const transform = this._prepareTransform(path); 223 | 224 | let ast; 225 | try { 226 | ast = parse(quasis, expressions); 227 | } catch (error) { 228 | if (error.name !== 'SyntaxError') throw error; 229 | throw path.get('quasi').buildCodeFrameError(error.message); 230 | } 231 | 232 | const code = astRoot(ast, '%%name%%', transform && '%%transform%%'); 233 | 234 | path.replaceWith( 235 | template.expression(code)(transform ? { name, transform } : { name }) 236 | ); 237 | }, 238 | }; 239 | } 240 | -------------------------------------------------------------------------------- /src/babel/__snapshots__/plugin.test.js.snap: -------------------------------------------------------------------------------- 1 | // Jest Snapshot v1, https://goo.gl/fbAQLP 2 | 3 | exports[`deduplicates hoisted expressions 1`] = ` 4 | "import { match, __pattern as _pattern } from \\"reghex\\"; 5 | const re = /1/; 6 | const str = '1'; 7 | 8 | var _re_expression = _pattern(re), 9 | _str_expression = _pattern(str); 10 | 11 | const a = function (state) { 12 | var y1 = state.y, 13 | x1 = state.x; 14 | var node = []; 15 | var x; 16 | 17 | if ((x = _re_expression(state)) != null) { 18 | node.push(x); 19 | } else { 20 | state.y = y1; 21 | state.x = x1; 22 | return; 23 | } 24 | 25 | if ((x = _str_expression(state)) != null) { 26 | node.push(x); 27 | } else { 28 | state.y = y1; 29 | state.x = x1; 30 | return; 31 | } 32 | 33 | if ('a') node.tag = 'a'; 34 | return node; 35 | }; 36 | 37 | var _b_expression = _pattern('2'); 38 | 39 | const b = function (state) { 40 | var y1 = state.y, 41 | x1 = state.x; 42 | var node = []; 43 | var x; 44 | 45 | if ((x = _re_expression(state)) != null) { 46 | node.push(x); 47 | } else { 48 | state.y = y1; 49 | state.x = x1; 50 | return; 51 | } 52 | 53 | if ((x = _b_expression(state)) != null) { 54 | node.push(x); 55 | } else { 56 | state.y = y1; 57 | state.x = x1; 58 | return; 59 | } 60 | 61 | if ('b') node.tag = 'b'; 62 | return node; 63 | };" 64 | `; 65 | 66 | exports[`works together with @babel/plugin-transform-modules-commonjs 1`] = ` 67 | "\\"use strict\\"; 68 | 69 | var _reghex = require(\\"reghex\\"); 70 | 71 | var _node_expression = (0, _reghex.__pattern)(1), 72 | _node_expression2 = (0, _reghex.__pattern)(2); 73 | 74 | const node = function (state) { 75 | var y1 = state.y, 76 | x1 = state.x; 77 | var node = []; 78 | var x; 79 | 80 | if ((x = _node_expression(state)) != null) { 81 | node.push(x); 82 | } else { 83 | state.y = y1; 84 | state.x = x1; 85 | return; 86 | } 87 | 88 | if ((x = _node_expression2(state)) != null) { 89 | node.push(x); 90 | } else { 91 | state.y = y1; 92 | state.x = x1; 93 | return; 94 | } 95 | 96 | if ('node') node.tag = 'node'; 97 | return node; 98 | };" 99 | `; 100 | 101 | exports[`works while only minifying 1`] = ` 102 | "import { match } from 'reghex/macro'; 103 | const node = match('node')([\\"\\", \\"+|\\", \\"+(\\", \\"(\\", \\"?\\", \\"))*\\"], 1, 2, 3, 4, 5);" 104 | `; 105 | 106 | exports[`works with local recursion 1`] = ` 107 | "import { match as m, tag, __pattern as _pattern } from 'reghex'; 108 | 109 | var _inner_expression = _pattern(/inner/); 110 | 111 | const inner = function (state) { 112 | var y1 = state.y, 113 | x1 = state.x; 114 | var node = []; 115 | var x; 116 | 117 | if ((x = _inner_expression(state)) != null) { 118 | node.push(x); 119 | } else { 120 | state.y = y1; 121 | state.x = x1; 122 | return; 123 | } 124 | 125 | if ('inner') node.tag = 'inner'; 126 | return node; 127 | }; 128 | 129 | const node = function (state) { 130 | var y1 = state.y, 131 | x1 = state.x; 132 | var node = []; 133 | var x; 134 | 135 | if ((x = inner(state)) != null) { 136 | node.push(x); 137 | } else { 138 | state.y = y1; 139 | state.x = x1; 140 | return; 141 | } 142 | 143 | if ('node') node.tag = 'node'; 144 | return node; 145 | };" 146 | `; 147 | 148 | exports[`works with nameless matchers 1`] = ` 149 | "import { match, __pattern as _pattern } from \\"reghex\\"; 150 | 151 | var _objectObject_expression = _pattern(1), 152 | _objectObject_expression2 = _pattern(2), 153 | _objectObject_expression3 = _pattern(3), 154 | _objectObject_expression4 = _pattern(4), 155 | _objectObject_expression5 = _pattern(5); 156 | 157 | const node = function (state) { 158 | var y1 = state.y, 159 | x1 = state.x; 160 | var node = []; 161 | var x; 162 | 163 | alt_2: { 164 | block_2: { 165 | var y2 = state.y, 166 | x2 = state.x; 167 | 168 | if ((x = _objectObject_expression(state)) != null) { 169 | node.push(x); 170 | } else { 171 | state.y = y2; 172 | state.x = x2; 173 | break block_2; 174 | } 175 | 176 | group_2: for (;;) { 177 | var y2 = state.y, 178 | x2 = state.x; 179 | 180 | if ((x = _objectObject_expression(state)) != null) { 181 | node.push(x); 182 | } else { 183 | state.y = y2; 184 | state.x = x2; 185 | break group_2; 186 | } 187 | } 188 | 189 | break alt_2; 190 | } 191 | 192 | if ((x = _objectObject_expression2(state)) != null) { 193 | node.push(x); 194 | } else { 195 | state.y = y1; 196 | state.x = x1; 197 | return; 198 | } 199 | 200 | group_2: for (;;) { 201 | var y2 = state.y, 202 | x2 = state.x; 203 | 204 | if ((x = _objectObject_expression2(state)) != null) { 205 | node.push(x); 206 | } else { 207 | state.y = y2; 208 | state.x = x2; 209 | break group_2; 210 | } 211 | } 212 | 213 | group_2: for (;;) { 214 | var y2 = state.y, 215 | x2 = state.x; 216 | var ln2 = node.length; 217 | 218 | if ((x = _objectObject_expression3(state)) != null) { 219 | node.push(x); 220 | } else { 221 | state.y = y2; 222 | state.x = x2; 223 | node.length = ln2; 224 | break group_2; 225 | } 226 | 227 | var y4 = state.y, 228 | x4 = state.x; 229 | 230 | if ((x = _objectObject_expression4(state)) != null) { 231 | node.push(x); 232 | } else { 233 | state.y = y4; 234 | state.x = x4; 235 | } 236 | 237 | if ((x = _objectObject_expression5(state)) != null) { 238 | node.push(x); 239 | } else { 240 | state.y = y2; 241 | state.x = x2; 242 | node.length = ln2; 243 | break group_2; 244 | } 245 | } 246 | } 247 | 248 | if (null) node.tag = null; 249 | return node; 250 | };" 251 | `; 252 | 253 | exports[`works with non-capturing groups 1`] = ` 254 | "import { match, __pattern as _pattern } from 'reghex'; 255 | 256 | var _node_expression = _pattern(1), 257 | _node_expression2 = _pattern(2), 258 | _node_expression3 = _pattern(3); 259 | 260 | const node = function (state) { 261 | var y1 = state.y, 262 | x1 = state.x; 263 | var node = []; 264 | var x; 265 | 266 | if ((x = _node_expression(state)) != null) { 267 | node.push(x); 268 | } else { 269 | state.y = y1; 270 | state.x = x1; 271 | return; 272 | } 273 | 274 | var ln2 = node.length; 275 | 276 | alt_3: { 277 | block_3: { 278 | var y3 = state.y, 279 | x3 = state.x; 280 | 281 | if ((x = _node_expression2(state)) != null) { 282 | node.push(x); 283 | } else { 284 | state.y = y3; 285 | state.x = x3; 286 | node.length = ln2; 287 | break block_3; 288 | } 289 | 290 | break alt_3; 291 | } 292 | 293 | if ((x = _node_expression3(state)) == null) { 294 | state.y = y1; 295 | state.x = x1; 296 | node.length = ln2; 297 | return; 298 | } 299 | 300 | group_3: for (;;) { 301 | var y3 = state.y, 302 | x3 = state.x; 303 | 304 | if ((x = _node_expression3(state)) == null) { 305 | state.y = y3; 306 | state.x = x3; 307 | break group_3; 308 | } 309 | } 310 | } 311 | 312 | if ('node') node.tag = 'node'; 313 | return node; 314 | };" 315 | `; 316 | 317 | exports[`works with self-referential thunks 1`] = ` 318 | "import { match, tag, __pattern as _pattern } from 'reghex'; 319 | 320 | const inner = function (state) { 321 | var y1 = state.y, 322 | x1 = state.x; 323 | var node = []; 324 | var x; 325 | 326 | if ((x = node(state)) != null) { 327 | node.push(x); 328 | } else { 329 | state.y = y1; 330 | state.x = x1; 331 | return; 332 | } 333 | 334 | if ('inner') node.tag = 'inner'; 335 | return node; 336 | }; 337 | 338 | const node = function (state) { 339 | var y1 = state.y, 340 | x1 = state.x; 341 | var node = []; 342 | var x; 343 | 344 | if ((x = inner(state)) != null) { 345 | node.push(x); 346 | } else { 347 | state.y = y1; 348 | state.x = x1; 349 | return; 350 | } 351 | 352 | if ('node') node.tag = 'node'; 353 | return node; 354 | };" 355 | `; 356 | 357 | exports[`works with standard features 1`] = ` 358 | "import { match, __pattern as _pattern } from \\"reghex\\"; 359 | 360 | var _node_expression = _pattern(1), 361 | _node_expression2 = _pattern(2), 362 | _node_expression3 = _pattern(3), 363 | _node_expression4 = _pattern(4), 364 | _node_expression5 = _pattern(5); 365 | 366 | const node = function (state) { 367 | var y1 = state.y, 368 | x1 = state.x; 369 | var node = []; 370 | var x; 371 | 372 | alt_2: { 373 | block_2: { 374 | var y2 = state.y, 375 | x2 = state.x; 376 | 377 | if ((x = _node_expression(state)) != null) { 378 | node.push(x); 379 | } else { 380 | state.y = y2; 381 | state.x = x2; 382 | break block_2; 383 | } 384 | 385 | group_2: for (;;) { 386 | var y2 = state.y, 387 | x2 = state.x; 388 | 389 | if ((x = _node_expression(state)) != null) { 390 | node.push(x); 391 | } else { 392 | state.y = y2; 393 | state.x = x2; 394 | break group_2; 395 | } 396 | } 397 | 398 | break alt_2; 399 | } 400 | 401 | if ((x = _node_expression2(state)) != null) { 402 | node.push(x); 403 | } else { 404 | state.y = y1; 405 | state.x = x1; 406 | return; 407 | } 408 | 409 | group_2: for (;;) { 410 | var y2 = state.y, 411 | x2 = state.x; 412 | 413 | if ((x = _node_expression2(state)) != null) { 414 | node.push(x); 415 | } else { 416 | state.y = y2; 417 | state.x = x2; 418 | break group_2; 419 | } 420 | } 421 | 422 | group_2: for (;;) { 423 | var y2 = state.y, 424 | x2 = state.x; 425 | var ln2 = node.length; 426 | 427 | if ((x = _node_expression3(state)) != null) { 428 | node.push(x); 429 | } else { 430 | state.y = y2; 431 | state.x = x2; 432 | node.length = ln2; 433 | break group_2; 434 | } 435 | 436 | var y4 = state.y, 437 | x4 = state.x; 438 | 439 | if ((x = _node_expression4(state)) != null) { 440 | node.push(x); 441 | } else { 442 | state.y = y4; 443 | state.x = x4; 444 | } 445 | 446 | if ((x = _node_expression5(state)) != null) { 447 | node.push(x); 448 | } else { 449 | state.y = y2; 450 | state.x = x2; 451 | node.length = ln2; 452 | break group_2; 453 | } 454 | } 455 | } 456 | 457 | if ('node') node.tag = 'node'; 458 | return node; 459 | };" 460 | `; 461 | 462 | exports[`works with transform functions 1`] = ` 463 | "import { match, __pattern as _pattern } from 'reghex'; 464 | 465 | var _inner_transform = x => x; 466 | 467 | const first = function (state) { 468 | var y1 = state.y, 469 | x1 = state.x; 470 | var node = []; 471 | var x; 472 | if ('inner') node.tag = 'inner'; 473 | return _inner_transform(node); 474 | }; 475 | 476 | const transform = x => x; 477 | 478 | const second = function (state) { 479 | var y1 = state.y, 480 | x1 = state.x; 481 | var node = []; 482 | var x; 483 | if ('node') node.tag = 'node'; 484 | return transform(node); 485 | };" 486 | `; 487 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | reghex 3 |
4 |
5 | 6 | The magical sticky regex-based parser generator 7 | 8 |
9 |
10 |
11 |
12 | 13 | Leveraging the power of sticky regexes and JS code generation, `reghex` allows 14 | you to code parsers quickly, by surrounding regular expressions with a regex-like 15 | [DSL](https://en.wikipedia.org/wiki/Domain-specific_language). 16 | 17 | With `reghex` you can generate a parser from a tagged template literal, which is 18 | quick to prototype and generates reasonably compact and performant code. 19 | 20 | _This project is still in its early stages and is experimental. Its API may still 21 | change and some issues may need to be ironed out._ 22 | 23 | ## Quick Start 24 | 25 | ##### 1. Install with yarn or npm 26 | 27 | ```sh 28 | yarn add reghex 29 | # or 30 | npm install --save reghex 31 | ``` 32 | 33 | ##### 2. Add the plugin to your Babel configuration _(optional)_ 34 | 35 | In your `.babelrc`, `babel.config.js`, or `package.json:babel` add: 36 | 37 | ```json 38 | { 39 | "plugins": ["reghex/babel"] 40 | } 41 | ``` 42 | 43 | Alternatively, you can set up [`babel-plugin-macros`](https://github.com/kentcdodds/babel-plugin-macros) and 44 | import `reghex` from `"reghex/macro"` instead. 45 | 46 | This step is **optional**. `reghex` can also generate its optimised JS code during runtime. 47 | This will only incur a tiny parsing cost on initialisation, but due to the JIT of modern 48 | JS engines there won't be any difference in performance between pre-compiled and compiled 49 | versions otherwise. 50 | 51 | Since the `reghex` runtime is rather small, for larger grammars it may even make sense not 52 | to precompile the matchers at all. For this case you may pass the `{ "codegen": false }` 53 | option to the Babel plugin, which will minify the `reghex` matcher templates without 54 | precompiling them. 55 | 56 | ##### 3. Have fun writing parsers! 57 | 58 | ```js 59 | import { match, parse } from 'reghex'; 60 | 61 | const name = match('name')` 62 | ${/\w+/} 63 | `; 64 | 65 | parse(name)('hello'); 66 | // [ "hello", .tag = "name" ] 67 | ``` 68 | 69 | ## Concepts 70 | 71 | The fundamental concept of `reghex` are regexes, specifically 72 | [sticky regexes](https://www.loganfranken.com/blog/831/es6-everyday-sticky-regex-matches/)! 73 | These are regular expressions that don't search a target string, but instead match at the 74 | specific position they're at. The flag for sticky regexes is `y` and hence 75 | they can be created using `/phrase/y` or `new RegExp('phrase', 'y')`. 76 | 77 | **Sticky Regexes** are the perfect foundation for a parsing framework in JavaScript! 78 | Because they only match at a single position they can be used to match patterns 79 | continuously, as a parser would. Like global regexes, we can then manipulate where 80 | they should be matched by setting `regex.lastIndex = index;` and after matching 81 | read back their updated `regex.lastIndex`. 82 | 83 | > **Note:** Sticky Regexes aren't natively 84 | > [supported in any versions of Internet Explorer](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/sticky#Browser_compatibility). `reghex` works around this by imitating its behaviour, which may decrease performance on IE11. 85 | 86 | This primitive allows us to build up a parser from regexes that you pass when 87 | authoring a parser function, also called a "matcher" in `reghex`. When `reghex` compiles 88 | to parser code, this code is just a sequence and combination of sticky regexes that 89 | are executed in order! 90 | 91 | ```js 92 | let input = 'phrases should be parsed...'; 93 | let lastIndex = 0; 94 | 95 | const regex = /phrase/y; 96 | function matcher() { 97 | let match; 98 | // Before matching we set the current index on the RegExp 99 | regex.lastIndex = lastIndex; 100 | // Then we match and store the result 101 | if ((match = regex.exec(input))) { 102 | // If the RegExp matches successfully, we update our lastIndex 103 | lastIndex = regex.lastIndex; 104 | } 105 | } 106 | ``` 107 | 108 | This mechanism is used in all matcher functions that `reghex` generates. 109 | Internally `reghex` keeps track of the input string and the current index on 110 | that string, and the matcher functions execute regexes against this state. 111 | 112 | ## Authoring Guide 113 | 114 | You can write "matchers" by importing the `match` import from `reghex` and 115 | using it to write a matcher expression. 116 | 117 | ```js 118 | import { match } from 'reghex'; 119 | 120 | const name = match('name')` 121 | ${/\w+/} 122 | `; 123 | ``` 124 | 125 | As can be seen above, the `match` function, is called with a "node name" and 126 | is then called as a tagged template. This template is our **parsing definition**. 127 | 128 | `reghex` functions only with its Babel plugin, which will detect `match('name')` 129 | and replace the entire tag with a parsing function, which may then look like 130 | the following in your transpiled code: 131 | 132 | ```js 133 | import { _pattern /* ... */ } from 'reghex'; 134 | 135 | var _name_expression = _pattern(/\w+/); 136 | var name = function name() { 137 | /* ... */ 138 | }; 139 | ``` 140 | 141 | We've now successfully created a matcher, which matches a single regex, which 142 | is a pattern of one or more letters. We can execute this matcher by calling 143 | it with the curried `parse` utility: 144 | 145 | ```js 146 | import { parse } from 'reghex'; 147 | 148 | const result = parse(name)('Tim'); 149 | 150 | console.log(result); // [ "Tim", .tag = "name" ] 151 | console.log(result.tag); // "name" 152 | ``` 153 | 154 | If the string (Here: "Tim") was parsed successfully by the matcher, it will 155 | return an array that contains the result of the regex. The array is special 156 | in that it will also have a `tag` property set to the matcher's name, here 157 | `"name"`, which we determined when we defined the matcher as `match('name')`. 158 | 159 | ```js 160 | import { parse } from 'reghex'; 161 | parse(name)('42'); // undefined 162 | ``` 163 | 164 | Similarly, if the matcher does not parse an input string successfully, it will 165 | return `undefined` instead. 166 | 167 | ### Nested matchers 168 | 169 | This on its own is nice, but a parser must be able to traverse a string and 170 | turn it into an [Abstract Syntax Tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree). 171 | To introduce nesting to `reghex` matchers, we can refer to one matcher in another! 172 | Let's extend our original example; 173 | 174 | ```js 175 | import { match } from 'reghex'; 176 | 177 | const name = match('name')` 178 | ${/\w+/} 179 | `; 180 | 181 | const hello = match('hello')` 182 | ${/hello /} ${name} 183 | `; 184 | ``` 185 | 186 | The new `hello` matcher is set to match `/hello /` and then attempts to match 187 | the `name` matcher afterwards. If either of these matchers fail, it will return 188 | `undefined` as well and roll back its changes. Using this matcher will give us 189 | **nested abstract output**. 190 | 191 | We can also see in this example that _outside_ of the regex interpolations, 192 | whitespace and newlines don't matter. 193 | 194 | ```js 195 | import { parse } from 'reghex'; 196 | 197 | parse(hello)('hello tim'); 198 | /* 199 | [ 200 | "hello", 201 | ["tim", .tag = "name"], 202 | .tag = "hello" 203 | ] 204 | */ 205 | ``` 206 | 207 | Furthermore, interpolations don't have to just be RegHex matchers. They can 208 | also be functions returning matchers or completely custom matching functions. 209 | This is useful when your DSL becomes _self-referential_, i.e. when one matchers 210 | start referencing each other forming a loop. To fix this we can create a 211 | function that returns our root matcher: 212 | 213 | ```js 214 | import { match } from 'reghex'; 215 | 216 | const value = match('value')` 217 | (${/\w+/} | ${() => root})+ 218 | `; 219 | 220 | const root = match('root')` 221 | ${/root/}+ ${value} 222 | `; 223 | ``` 224 | 225 | ### Regex-like DSL 226 | 227 | We've seen in the previous examples that matchers are authored using tagged 228 | template literals, where interpolations can either be filled using regexes, 229 | `${/pattern/}`, or with other matchers `${name}`. 230 | 231 | The tagged template syntax supports more ways to match these interpolations, 232 | using a regex-like Domain Specific Language. Unlike in regexes, whitespace 233 | and newlines don't matter, which makes it easier to format and read matchers. 234 | 235 | We can create **sequences** of matchers by adding multiple expressions in 236 | a row. A matcher using `${/1/} ${/2/}` will attempt to match `1` and then `2` 237 | in the parsed string. This is just one feature of the regex-like DSL. The 238 | available operators are the following: 239 | 240 | | Operator | Example | Description | 241 | | -------- | ------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | 242 | | `?` | `${/1/}?` | An **optional** may be used to make an interpolation optional. This means that the interpolation may or may not match. | 243 | | `*` | `${/1/}*` | A **star** can be used to match an arbitrary amount of interpolation or none at all. This means that the interpolation may repeat itself or may not be matched at all. | 244 | | `+` | `${/1/}+` | A **plus** is used like `*` and must match one or more times. When the matcher doesn't match, that's considered a failing case, since the match isn't optional. | 245 | | `\|` | `${/1/} \| ${/2/}` | An **alternation** can be used to match either one thing or another, falling back when the first interpolation fails. | 246 | | `()` | `(${/1/} ${/2/})+` | A **group** can be used to apply one of the other operators to an entire group of interpolations. | 247 | | `(?: )` | `(?: ${/1/})` | A **non-capturing group** is like a regular group, but the interpolations matched inside it don't appear in the parser's output. | 248 | | `(?= )` | `(?= ${/1/})` | A **positive lookahead** checks whether interpolations match, and if so continues the matcher without changing the input. If it matches, it's essentially ignored. | 249 | | `(?! )` | `(?! ${/1/})` | A **negative lookahead** checks whether interpolations _don't_ match, and if so continues the matcher without changing the input. If the interpolations do match the matcher is aborted. | 250 | 251 | A couple of operators also support "short hands" that allow you to write 252 | lookaheads or non-capturing groups a little quicker. 253 | 254 | | Shorthand | Example | Description | 255 | | --------- | --------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | 256 | | `:` | `:${/1/}` | A **non-capturing group** is like a regular group, but the interpolations matched inside it don't appear in the parser's output. | 257 | | `=` | `=${/1/}` | A **positive lookahead** checks whether interpolations match, and if so continues the matcher without changing the input. If it matches, it's essentially ignored. | 258 | | `!` | `!${/1/}` | A **negative lookahead** checks whether interpolations _don't_ match, and if so continues the matcher without changing the input. If the interpolations do match the matcher is aborted. | 259 | 260 | We can combine and compose these operators to create more complex matchers. 261 | For instance, we can extend the original example to only allow a specific set 262 | of names by using the `|` operator: 263 | 264 | ```js 265 | const name = match('name')` 266 | ${/tim/} | ${/tom/} | ${/tam/} 267 | `; 268 | 269 | parse(name)('tim'); // [ "tim", .tag = "name" ] 270 | parse(name)('tom'); // [ "tom", .tag = "name" ] 271 | parse(name)('patrick'); // undefined 272 | ``` 273 | 274 | The above will now only match specific name strings. When one pattern in this 275 | chain of **alternations** does not match, it will try the next one. 276 | 277 | We can also use **groups** to add more matchers around the alternations themselves, 278 | by surrounding the alternations with `(` and `)` 279 | 280 | ```js 281 | const name = match('name')` 282 | (${/tim/} | ${/tom/}) ${/!/} 283 | `; 284 | 285 | parse(name)('tim!'); // [ "tim", "!", .tag = "name" ] 286 | parse(name)('tom!'); // [ "tom", "!", .tag = "name" ] 287 | parse(name)('tim'); // undefined 288 | ``` 289 | 290 | Maybe we're also not that interested in the `"!"` showing up in the output node. 291 | If we want to get rid of it, we can use a **non-capturing group** to hide it, 292 | while still requiring it. 293 | 294 | ```js 295 | const name = match('name')` 296 | (${/tim/} | ${/tom/}) (?: ${/!/}) 297 | `; 298 | 299 | parse(name)('tim!'); // [ "tim", .tag = "name" ] 300 | parse(name)('tim'); // undefined 301 | ``` 302 | 303 | Lastly, like with regexes, `?`, `*`, and `+` may be used as "quantifiers". The first two 304 | may also be optional and _not_ match their patterns without the matcher failing. 305 | The `+` operator is used to match an interpolation _one or more_ times, while the 306 | `*` operators may match _zero or more_ times. Let's use this to allow the `"!"` 307 | to repeat. 308 | 309 | ```js 310 | const name = match('name')` 311 | (${/tim/} | ${/tom/})+ (?: ${/!/})* 312 | `; 313 | 314 | parse(name)('tim!'); // [ "tim", .tag = "name" ] 315 | parse(name)('tim!!!!'); // [ "tim", .tag = "name" ] 316 | parse(name)('tim'); // [ "tim", .tag = "name" ] 317 | parse(name)('timtim'); // [ "tim", tim", .tag = "name" ] 318 | ``` 319 | 320 | As we can see from the above, like in regexes, quantifiers can be combined with groups, 321 | non-capturing groups, or other groups. 322 | 323 | ### Transforming as we match 324 | 325 | In the previous sections, we've seen that the **nodes** that `reghex` outputs are arrays containing 326 | match strings or other nodes and have a special `tag` property with the node's type. 327 | We can **change this output** while we're parsing by passing a function to our matcher definition. 328 | 329 | ```js 330 | const name = match('name', (x) => x[0])` 331 | (${/tim/} | ${/tom/}) ${/!/} 332 | `; 333 | 334 | parse(name)('tim'); // "tim" 335 | ``` 336 | 337 | In the above example, we're passing a small function, `x => x[0]` to the matcher as a 338 | second argument. This will change the matcher's output, which causes the parser to 339 | now return a new output for this matcher. 340 | 341 | We can use this function creatively by outputting full AST nodes, maybe even like the 342 | ones that resemble Babel's output: 343 | 344 | ```js 345 | const identifier = match('identifier', (x) => ({ 346 | type: 'Identifier', 347 | name: x[0], 348 | }))` 349 | ${/[\w_][\w\d_]+/} 350 | `; 351 | 352 | parse(name)('var_name'); // { type: "Identifier", name: "var_name" } 353 | ``` 354 | 355 | We've now entirely changed the output of the parser for this matcher. Given that each 356 | matcher can change its output, we're free to change the parser's output entirely. 357 | By returning `null` or `undefined` in this matcher, we can also change the matcher 358 | to not have matched, which would cause other matchers to treat it like a mismatch! 359 | 360 | ```js 361 | import { match, parse } from 'reghex'; 362 | 363 | const name = match('name')((x) => { 364 | return x[0] !== 'tim' ? x : undefined; 365 | })` 366 | ${/\w+/} 367 | `; 368 | 369 | const hello = match('hello')` 370 | ${/hello /} ${name} 371 | `; 372 | 373 | parse(name)('tom'); // ["hello", ["tom", .tag = "name"], .tag = "hello"] 374 | parse(name)('tim'); // undefined 375 | ``` 376 | 377 | Lastly, if we need to create these special array nodes ourselves, we can use `reghex`'s 378 | `tag` export for this purpose. 379 | 380 | ```js 381 | import { tag } from 'reghex'; 382 | 383 | tag(['test'], 'node_name'); 384 | // ["test", .tag = "node_name"] 385 | ``` 386 | 387 | ### Tagged Template Parsing 388 | 389 | Any grammar in RegHex can also be used to parse a tagged template literal. 390 | A tagged template literal consists of a list of literals alternating with 391 | a list of "interpolations". 392 | 393 | In RegHex we can add an `interpolation` matcher to our grammars to allow it 394 | to parse interpolations in a template literal. 395 | 396 | ```js 397 | import { interpolation } from 'reghex'; 398 | 399 | const anyNumber = interpolation((x) => typeof x === 'number'); 400 | 401 | const num = match('num')` 402 | ${/[+-]?/} ${anyNumber} 403 | `; 404 | 405 | parse(num)`+${42}`; 406 | // ["+", 42, .tag = "num"] 407 | ``` 408 | 409 | This grammar now allows us to match arbitrary values if they're input into the 410 | parser. We can now call our grammar using a tagged template literal themselves 411 | to parse this. 412 | 413 | **That's it! May the RegExp be ever in your favor.** 414 | -------------------------------------------------------------------------------- /src/core.test.js: -------------------------------------------------------------------------------- 1 | import { parse, match, interpolation } from './core'; 2 | 3 | const expectToParse = (node, input, result, lastIndex = 0) => { 4 | const state = { quasis: [input], expressions: [], x: 0, y: 0 }; 5 | if (result) result.tag = 'node'; 6 | expect(node(state)).toEqual(result); 7 | 8 | // NOTE: After parsing we expect the current index to exactly match the 9 | // sum amount of matched characters 10 | if (result === undefined) { 11 | expect(state.y).toBe(0); 12 | } else { 13 | const index = lastIndex || result.reduce((acc, x) => acc + x.length, 0); 14 | expect(state.y).toBe(index); 15 | } 16 | }; 17 | 18 | describe('can create nameless matchers', () => { 19 | it('matches without tagging', () => { 20 | const state = { quasis: ['1'], expressions: [], x: 0, y: 0 }; 21 | const node = match(null)`${/1/}`; 22 | expect(node(state)).toEqual(['1']); 23 | }); 24 | }); 25 | 26 | describe('required matcher', () => { 27 | const node = match('node')`${/1/}`; 28 | it.each` 29 | input | result 30 | ${'1'} | ${['1']} 31 | ${''} | ${undefined} 32 | `('should return $result when $input is passed', ({ input, result }) => { 33 | expectToParse(node, input, result); 34 | }); 35 | 36 | it('matches empty regex patterns', () => { 37 | const node = match('node')`${/[ ]*/}`; 38 | expectToParse(node, '', ['']); 39 | }); 40 | }); 41 | 42 | describe('optional matcher', () => { 43 | const node = match('node')`${/1/}?`; 44 | it.each` 45 | input | result 46 | ${'1'} | ${['1']} 47 | ${'_'} | ${[]} 48 | ${''} | ${[]} 49 | `('should return $result when $input is passed', ({ input, result }) => { 50 | expectToParse(node, input, result); 51 | }); 52 | }); 53 | 54 | describe('star matcher', () => { 55 | const node = match('node')`${/1/}*`; 56 | it.each` 57 | input | result 58 | ${'1'} | ${['1']} 59 | ${'11'} | ${['1', '1']} 60 | ${'111'} | ${['1', '1', '1']} 61 | ${'_'} | ${[]} 62 | ${''} | ${[]} 63 | `('should return $result when "$input" is passed', ({ input, result }) => { 64 | expectToParse(node, input, result); 65 | }); 66 | }); 67 | 68 | describe('plus matcher', () => { 69 | const node = match('node')`${/1/}+`; 70 | it.each` 71 | input | result 72 | ${'1'} | ${['1']} 73 | ${'11'} | ${['1', '1']} 74 | ${'111'} | ${['1', '1', '1']} 75 | ${'_'} | ${undefined} 76 | ${''} | ${undefined} 77 | `('should return $result when "$input" is passed', ({ input, result }) => { 78 | expectToParse(node, input, result); 79 | }); 80 | }); 81 | 82 | describe('optional then required matcher', () => { 83 | const node = match('node')`${/1/}? ${/2/}`; 84 | it.each` 85 | input | result 86 | ${'12'} | ${['1', '2']} 87 | ${'2'} | ${['2']} 88 | ${''} | ${undefined} 89 | `('should return $result when $input is passed', ({ input, result }) => { 90 | expectToParse(node, input, result); 91 | }); 92 | }); 93 | 94 | describe('star then required matcher', () => { 95 | const node = match('node')`${/1/}* ${/2/}`; 96 | it.each` 97 | input | result 98 | ${'12'} | ${['1', '2']} 99 | ${'112'} | ${['1', '1', '2']} 100 | ${'2'} | ${['2']} 101 | ${''} | ${undefined} 102 | `('should return $result when $input is passed', ({ input, result }) => { 103 | expectToParse(node, input, result); 104 | }); 105 | }); 106 | 107 | describe('plus then required matcher', () => { 108 | const node = match('node')`${/1/}+ ${/2/}`; 109 | it.each` 110 | input | result 111 | ${'12'} | ${['1', '2']} 112 | ${'112'} | ${['1', '1', '2']} 113 | ${'2'} | ${undefined} 114 | ${''} | ${undefined} 115 | `('should return $result when $input is passed', ({ input, result }) => { 116 | expectToParse(node, input, result); 117 | }); 118 | }); 119 | 120 | describe('optional group then required matcher', () => { 121 | const node = match('node')`(${/1/} ${/2/})? ${/3/}`; 122 | it.each` 123 | input | result 124 | ${'123'} | ${['1', '2', '3']} 125 | ${'3'} | ${['3']} 126 | ${'23'} | ${undefined} 127 | ${'_'} | ${undefined} 128 | `('should return $result when $input is passed', ({ input, result }) => { 129 | expectToParse(node, input, result); 130 | }); 131 | }); 132 | 133 | describe('star group then required matcher', () => { 134 | const node = match('node')`(${/1/} ${/2/})* ${/3/}`; 135 | it.each` 136 | input | result 137 | ${'123'} | ${['1', '2', '3']} 138 | ${'12123'} | ${['1', '2', '1', '2', '3']} 139 | ${'3'} | ${['3']} 140 | ${'23'} | ${undefined} 141 | ${'13'} | ${undefined} 142 | ${'_'} | ${undefined} 143 | `('should return $result when $input is passed', ({ input, result }) => { 144 | expectToParse(node, input, result); 145 | }); 146 | }); 147 | 148 | describe('plus group then required matcher', () => { 149 | const node = match('node')`(${/1/} ${/2/})+ ${/3/}`; 150 | it.each` 151 | input | result 152 | ${'123'} | ${['1', '2', '3']} 153 | ${'12123'} | ${['1', '2', '1', '2', '3']} 154 | ${'23'} | ${undefined} 155 | ${'3'} | ${undefined} 156 | ${'13'} | ${undefined} 157 | ${'_'} | ${undefined} 158 | `('should return $result when $input is passed', ({ input, result }) => { 159 | expectToParse(node, input, result); 160 | }); 161 | }); 162 | 163 | describe('optional group with nested optional matcher, then required matcher', () => { 164 | const node = match('node')`(${/1/}? ${/2/})? ${/3/}`; 165 | it.each` 166 | input | result 167 | ${'123'} | ${['1', '2', '3']} 168 | ${'23'} | ${['2', '3']} 169 | ${'3'} | ${['3']} 170 | ${'13'} | ${undefined} 171 | ${'_'} | ${undefined} 172 | `('should return $result when $input is passed', ({ input, result }) => { 173 | expectToParse(node, input, result); 174 | }); 175 | }); 176 | 177 | describe('star group with nested optional matcher, then required matcher', () => { 178 | const node = match('node')`(${/1/}? ${/2/})* ${/3/}`; 179 | it.each` 180 | input | result 181 | ${'123'} | ${['1', '2', '3']} 182 | ${'23'} | ${['2', '3']} 183 | ${'223'} | ${['2', '2', '3']} 184 | ${'2123'} | ${['2', '1', '2', '3']} 185 | ${'3'} | ${['3']} 186 | ${'13'} | ${undefined} 187 | ${'_'} | ${undefined} 188 | `('should return $result when $input is passed', ({ input, result }) => { 189 | expectToParse(node, input, result); 190 | }); 191 | }); 192 | 193 | describe('plus group with nested optional matcher, then required matcher', () => { 194 | const node = match('node')`(${/1/}? ${/2/})+ ${/3/}`; 195 | it.each` 196 | input | result 197 | ${'123'} | ${['1', '2', '3']} 198 | ${'23'} | ${['2', '3']} 199 | ${'223'} | ${['2', '2', '3']} 200 | ${'2123'} | ${['2', '1', '2', '3']} 201 | ${'3'} | ${undefined} 202 | ${'13'} | ${undefined} 203 | ${'_'} | ${undefined} 204 | `('should return $result when $input is passed', ({ input, result }) => { 205 | expectToParse(node, input, result); 206 | }); 207 | }); 208 | 209 | describe('plus group with nested plus matcher, then required matcher', () => { 210 | const node = match('node')`(${/1/}+ ${/2/})+ ${/3/}`; 211 | it.each` 212 | input | result 213 | ${'123'} | ${['1', '2', '3']} 214 | ${'1123'} | ${['1', '1', '2', '3']} 215 | ${'12123'} | ${['1', '2', '1', '2', '3']} 216 | ${'121123'} | ${['1', '2', '1', '1', '2', '3']} 217 | ${'3'} | ${undefined} 218 | ${'23'} | ${undefined} 219 | ${'13'} | ${undefined} 220 | ${'_'} | ${undefined} 221 | `('should return $result when $input is passed', ({ input, result }) => { 222 | expectToParse(node, input, result); 223 | }); 224 | }); 225 | 226 | describe('plus group with nested required and plus matcher, then required matcher', () => { 227 | const node = match('node')`(${/1/} ${/2/}+)+ ${/3/}`; 228 | it.each` 229 | input | result 230 | ${'123'} | ${['1', '2', '3']} 231 | ${'1223'} | ${['1', '2', '2', '3']} 232 | ${'122123'} | ${['1', '2', '2', '1', '2', '3']} 233 | ${'13'} | ${undefined} 234 | ${'_'} | ${undefined} 235 | `('should return $result when $input is passed', ({ input, result }) => { 236 | expectToParse(node, input, result); 237 | }); 238 | }); 239 | 240 | describe('nested plus group with nested required and plus matcher, then required matcher or alternate', () => { 241 | const node = match('node')`(${/1/} ${/2/}+)+ ${/3/} | ${/1/}`; 242 | it.each` 243 | input | result 244 | ${'123'} | ${['1', '2', '3']} 245 | ${'1223'} | ${['1', '2', '2', '3']} 246 | ${'122123'} | ${['1', '2', '2', '1', '2', '3']} 247 | ${'1'} | ${['1']} 248 | ${'13'} | ${['1']} 249 | ${'_'} | ${undefined} 250 | `('should return $result when $input is passed', ({ input, result }) => { 251 | expectToParse(node, input, result); 252 | }); 253 | }); 254 | 255 | describe('nested plus group with nested required and plus matcher, then alternate', () => { 256 | const node = match('node')`(${/1/} ${/2/}+)+ (${/3/} | ${/4/})`; 257 | it.each` 258 | input | result 259 | ${'123'} | ${['1', '2', '3']} 260 | ${'124'} | ${['1', '2', '4']} 261 | ${'1223'} | ${['1', '2', '2', '3']} 262 | ${'1224'} | ${['1', '2', '2', '4']} 263 | ${'1'} | ${undefined} 264 | ${'13'} | ${undefined} 265 | ${'_'} | ${undefined} 266 | `('should return $result when $input is passed', ({ input, result }) => { 267 | expectToParse(node, input, result); 268 | }); 269 | }); 270 | 271 | describe('regular alternate', () => { 272 | const node = match('node')`${/1/} | ${/2/} | ${/3/} | ${/4/}`; 273 | it.each` 274 | input | result 275 | ${'1'} | ${['1']} 276 | ${'2'} | ${['2']} 277 | ${'3'} | ${['3']} 278 | ${'4'} | ${['4']} 279 | ${'_'} | ${undefined} 280 | `('should return $result when $input is passed', ({ input, result }) => { 281 | expectToParse(node, input, result); 282 | }); 283 | }); 284 | 285 | describe('nested alternate in nested alternate in alternate', () => { 286 | const node = match('node')`((${/1/} | ${/2/}) | ${/3/}) | ${/4/}`; 287 | it.each` 288 | input | result 289 | ${'1'} | ${['1']} 290 | ${'2'} | ${['2']} 291 | ${'3'} | ${['3']} 292 | ${'4'} | ${['4']} 293 | ${'_'} | ${undefined} 294 | `('should return $result when $input is passed', ({ input, result }) => { 295 | expectToParse(node, input, result); 296 | }); 297 | }); 298 | 299 | describe('alternate after required matcher', () => { 300 | const node = match('node')`${/1/} (${/2/} | ${/3/})`; 301 | it.each` 302 | input | result 303 | ${'12'} | ${['1', '2']} 304 | ${'13'} | ${['1', '3']} 305 | ${'14'} | ${undefined} 306 | ${'3'} | ${undefined} 307 | ${'_'} | ${undefined} 308 | `('should return $result when $input is passed', ({ input, result }) => { 309 | expectToParse(node, input, result); 310 | }); 311 | }); 312 | 313 | describe('alternate with star group and required matcher after required matcher', () => { 314 | const node = match('node')`${/1/} (${/2/}* ${/3/} | ${/4/})`; 315 | it.each` 316 | input | result 317 | ${'123'} | ${['1', '2', '3']} 318 | ${'1223'} | ${['1', '2', '2', '3']} 319 | ${'13'} | ${['1', '3']} 320 | ${'14'} | ${['1', '4']} 321 | ${'12'} | ${undefined} 322 | ${'15'} | ${undefined} 323 | ${'_'} | ${undefined} 324 | `('should return $result when $input is passed', ({ input, result }) => { 325 | expectToParse(node, input, result); 326 | }); 327 | }); 328 | 329 | describe('alternate with plus group and required matcher after required matcher', () => { 330 | const node = match('node')`${/1/} (${/2/}+ ${/3/} | ${/4/})`; 331 | it.each` 332 | input | result 333 | ${'123'} | ${['1', '2', '3']} 334 | ${'1223'} | ${['1', '2', '2', '3']} 335 | ${'14'} | ${['1', '4']} 336 | ${'13'} | ${undefined} 337 | ${'12'} | ${undefined} 338 | ${'15'} | ${undefined} 339 | ${'_'} | ${undefined} 340 | `('should return $result when $input is passed', ({ input, result }) => { 341 | expectToParse(node, input, result); 342 | }); 343 | }); 344 | 345 | describe('alternate with optional and required matcher after required matcher', () => { 346 | const node = match('node')`${/1/} (${/2/}? ${/3/} | ${/4/})`; 347 | it.each` 348 | input | result 349 | ${'123'} | ${['1', '2', '3']} 350 | ${'13'} | ${['1', '3']} 351 | ${'14'} | ${['1', '4']} 352 | ${'12'} | ${undefined} 353 | ${'15'} | ${undefined} 354 | ${'_'} | ${undefined} 355 | `('should return $result when $input is passed', ({ input, result }) => { 356 | expectToParse(node, input, result); 357 | }); 358 | }); 359 | 360 | describe('non-capturing group', () => { 361 | const node = match('node')`${/1/} (?: ${/2/}+)`; 362 | it.each` 363 | input | result | lastIndex 364 | ${'12'} | ${['1']} | ${2} 365 | ${'122'} | ${['1']} | ${3} 366 | ${'13'} | ${undefined} | ${0} 367 | ${'1'} | ${undefined} | ${0} 368 | ${'_'} | ${undefined} | ${0} 369 | `( 370 | 'should return $result when $input is passed', 371 | ({ input, result, lastIndex }) => { 372 | expectToParse(node, input, result, lastIndex); 373 | } 374 | ); 375 | }); 376 | 377 | describe('non-capturing shorthand', () => { 378 | const node = match('node')`${/1/} :${/2/}+`; 379 | it.each` 380 | input | result | lastIndex 381 | ${'12'} | ${['1']} | ${2} 382 | ${'122'} | ${['1']} | ${3} 383 | ${'13'} | ${undefined} | ${0} 384 | ${'1'} | ${undefined} | ${0} 385 | ${'_'} | ${undefined} | ${0} 386 | `( 387 | 'should return $result when $input is passed', 388 | ({ input, result, lastIndex }) => { 389 | expectToParse(node, input, result, lastIndex); 390 | } 391 | ); 392 | }); 393 | 394 | describe('non-capturing group with plus matcher, then required matcher', () => { 395 | const node = match('node')`(?: ${/1/}+) ${/2/}`; 396 | it.each` 397 | input | result | lastIndex 398 | ${'12'} | ${['2']} | ${2} 399 | ${'112'} | ${['2']} | ${3} 400 | ${'1'} | ${undefined} | ${0} 401 | ${'13'} | ${undefined} | ${0} 402 | ${'2'} | ${undefined} | ${0} 403 | ${'_'} | ${undefined} | ${0} 404 | `( 405 | 'should return $result when $input is passed', 406 | ({ input, result, lastIndex }) => { 407 | expectToParse(node, input, result, lastIndex); 408 | } 409 | ); 410 | }); 411 | 412 | describe('non-capturing group with star group and required matcher, then required matcher', () => { 413 | const node = match('node')`(?: ${/1/}* ${/2/}) ${/3/}`; 414 | it.each` 415 | input | result | lastIndex 416 | ${'123'} | ${['3']} | ${3} 417 | ${'1123'} | ${['3']} | ${4} 418 | ${'23'} | ${['3']} | ${2} 419 | ${'13'} | ${undefined} | ${0} 420 | ${'2'} | ${undefined} | ${0} 421 | ${'_'} | ${undefined} | ${0} 422 | `( 423 | 'should return $result when $input is passed', 424 | ({ input, result, lastIndex }) => { 425 | expectToParse(node, input, result, lastIndex); 426 | } 427 | ); 428 | }); 429 | 430 | describe('non-capturing group with plus group and required matcher, then required matcher', () => { 431 | const node = match('node')`(?: ${/1/}+ ${/2/}) ${/3/}`; 432 | it.each` 433 | input | result | lastIndex 434 | ${'123'} | ${['3']} | ${3} 435 | ${'1123'} | ${['3']} | ${4} 436 | ${'23'} | ${undefined} | ${0} 437 | ${'13'} | ${undefined} | ${0} 438 | ${'2'} | ${undefined} | ${0} 439 | ${'_'} | ${undefined} | ${0} 440 | `( 441 | 'should return $result when $input is passed', 442 | ({ input, result, lastIndex }) => { 443 | expectToParse(node, input, result, lastIndex); 444 | } 445 | ); 446 | }); 447 | 448 | describe('non-capturing group with optional and required matcher, then required matcher', () => { 449 | const node = match('node')`(?: ${/1/}? ${/2/}) ${/3/}`; 450 | it.each` 451 | input | result | lastIndex 452 | ${'123'} | ${['3']} | ${3} 453 | ${'23'} | ${['3']} | ${2} 454 | ${'13'} | ${undefined} | ${0} 455 | ${'2'} | ${undefined} | ${0} 456 | ${'_'} | ${undefined} | ${0} 457 | `( 458 | 'should return $result when $input is passed', 459 | ({ input, result, lastIndex }) => { 460 | expectToParse(node, input, result, lastIndex); 461 | } 462 | ); 463 | }); 464 | 465 | describe('positive lookahead group', () => { 466 | const node = match('node')`(?= ${/1/}) ${/\d/}`; 467 | it.each` 468 | input | result | lastIndex 469 | ${'1'} | ${['1']} | ${1} 470 | ${'13'} | ${['1']} | ${1} 471 | ${'2'} | ${undefined} | ${0} 472 | ${'_'} | ${undefined} | ${0} 473 | `( 474 | 'should return $result when $input is passed', 475 | ({ input, result, lastIndex }) => { 476 | expectToParse(node, input, result, lastIndex); 477 | } 478 | ); 479 | }); 480 | 481 | describe('positive lookahead shorthand', () => { 482 | const node = match('node')`=${/1/} ${/\d/}`; 483 | it.each` 484 | input | result | lastIndex 485 | ${'1'} | ${['1']} | ${1} 486 | ${'13'} | ${['1']} | ${1} 487 | ${'2'} | ${undefined} | ${0} 488 | ${'_'} | ${undefined} | ${0} 489 | `( 490 | 'should return $result when $input is passed', 491 | ({ input, result, lastIndex }) => { 492 | expectToParse(node, input, result, lastIndex); 493 | } 494 | ); 495 | }); 496 | 497 | describe('positive lookahead group with plus matcher', () => { 498 | const node = match('node')`(?= ${/1/}+) ${/\d/}`; 499 | it.each` 500 | input | result | lastIndex 501 | ${'1'} | ${['1']} | ${1} 502 | ${'11'} | ${['1']} | ${1} 503 | ${'12'} | ${['1']} | ${1} 504 | ${'22'} | ${undefined} | ${0} 505 | ${'2'} | ${undefined} | ${0} 506 | ${'_'} | ${undefined} | ${0} 507 | `( 508 | 'should return $result when $input is passed', 509 | ({ input, result, lastIndex }) => { 510 | expectToParse(node, input, result, lastIndex); 511 | } 512 | ); 513 | }); 514 | 515 | describe('positive lookahead group with plus group and required matcher', () => { 516 | const node = match('node')`(?= ${/1/}+ ${/2/}) ${/\d/}`; 517 | it.each` 518 | input | result | lastIndex 519 | ${'12'} | ${['1']} | ${1} 520 | ${'112'} | ${['1']} | ${1} 521 | ${'1123'} | ${['1']} | ${1} 522 | ${'2'} | ${undefined} | ${0} 523 | ${'1'} | ${undefined} | ${0} 524 | ${'2'} | ${undefined} | ${0} 525 | ${'_'} | ${undefined} | ${0} 526 | `( 527 | 'should return $result when $input is passed', 528 | ({ input, result, lastIndex }) => { 529 | expectToParse(node, input, result, lastIndex); 530 | } 531 | ); 532 | }); 533 | 534 | describe('negative lookahead group', () => { 535 | const node = match('node')`(?! ${/1/}) ${/\d/}`; 536 | it.each` 537 | input | result | lastIndex 538 | ${'2'} | ${['2']} | ${1} 539 | ${'23'} | ${['2']} | ${1} 540 | ${'1'} | ${undefined} | ${0} 541 | ${'1'} | ${undefined} | ${0} 542 | ${'_'} | ${undefined} | ${0} 543 | `( 544 | 'should return $result when $input is passed', 545 | ({ input, result, lastIndex }) => { 546 | expectToParse(node, input, result, lastIndex); 547 | } 548 | ); 549 | }); 550 | 551 | describe('negative lookahead shorthand', () => { 552 | const node = match('node')`!${/1/} ${/\d/}`; 553 | it.each` 554 | input | result | lastIndex 555 | ${'2'} | ${['2']} | ${1} 556 | ${'23'} | ${['2']} | ${1} 557 | ${'1'} | ${undefined} | ${0} 558 | ${'1'} | ${undefined} | ${0} 559 | ${'_'} | ${undefined} | ${0} 560 | `( 561 | 'should return $result when $input is passed', 562 | ({ input, result, lastIndex }) => { 563 | expectToParse(node, input, result, lastIndex); 564 | } 565 | ); 566 | }); 567 | 568 | describe('longer negative lookahead group', () => { 569 | const node = match('node')`${/1/} (?! ${/2/} ${/3/}) ${/\d/} ${/\d/}`; 570 | it.each` 571 | input | result | lastIndex 572 | ${'145'} | ${['1', '4', '5']} | ${3} 573 | ${'124'} | ${['1', '2', '4']} | ${3} 574 | ${'123'} | ${undefined} | ${0} 575 | ${'2'} | ${undefined} | ${0} 576 | ${'_'} | ${undefined} | ${0} 577 | `( 578 | 'should return $result when $input is passed', 579 | ({ input, result, lastIndex }) => { 580 | expectToParse(node, input, result, lastIndex); 581 | } 582 | ); 583 | }); 584 | 585 | describe('negative lookahead group with plus matcher', () => { 586 | const node = match('node')`(?! ${/1/}+) ${/\d/}`; 587 | it.each` 588 | input | result | lastIndex 589 | ${'2'} | ${['2']} | ${1} 590 | ${'21'} | ${['2']} | ${1} 591 | ${'22'} | ${['2']} | ${1} 592 | ${'11'} | ${undefined} | ${0} 593 | ${'1'} | ${undefined} | ${0} 594 | ${'_'} | ${undefined} | ${0} 595 | `( 596 | 'should return $result when $input is passed', 597 | ({ input, result, lastIndex }) => { 598 | expectToParse(node, input, result, lastIndex); 599 | } 600 | ); 601 | }); 602 | 603 | describe('negative lookahead group with plus group and required matcher', () => { 604 | const node = match('node')`(?! ${/1/}+ ${/2/}) ${/\d/}`; 605 | it.each` 606 | input | result | lastIndex 607 | ${'21'} | ${['2']} | ${1} 608 | ${'211'} | ${['2']} | ${1} 609 | ${'113'} | ${['1']} | ${1} 610 | ${'1'} | ${['1']} | ${1} 611 | ${'112'} | ${undefined} | ${0} 612 | ${'12'} | ${undefined} | ${0} 613 | ${'_'} | ${undefined} | ${0} 614 | `( 615 | 'should return $result when $input is passed', 616 | ({ input, result, lastIndex }) => { 617 | expectToParse(node, input, result, lastIndex); 618 | } 619 | ); 620 | }); 621 | 622 | describe('interpolation parsing', () => { 623 | const node = match('node')` 624 | ${/1/} 625 | ${interpolation((x) => (x > 1 ? x : null))} 626 | ${/3/} 627 | `; 628 | 629 | it('matches interpolations', () => { 630 | const expected = ['1', 2, '3']; 631 | expected.tag = 'node'; 632 | expect(parse(node)`1${2}3`).toEqual(expected); 633 | }); 634 | 635 | it('does not match invalid inputs', () => { 636 | expect(parse(node)`13`).toBe(undefined); 637 | expect(parse(node)`13${2}`).toBe(undefined); 638 | expect(parse(node)`${2}13`).toBe(undefined); 639 | expect(parse(node)`1${1}3`).toBe(undefined); 640 | }); 641 | }); 642 | 643 | describe('string matching', () => { 644 | const node = match('node')` 645 | ${'1'} 646 | ${'2'} 647 | `; 648 | 649 | it('matches strings', () => { 650 | const expected = ['1', '2']; 651 | expected.tag = 'node'; 652 | expect(parse(node)('12')).toEqual(expected); 653 | expect(parse(node)('13')).toBe(undefined); 654 | }); 655 | }); 656 | --------------------------------------------------------------------------------