├── .eslintignore
├── .eslintrc.js
├── .github
    └── workflows
    │   ├── benchmark.yml
    │   ├── release.yml
    │   └── test.yml
├── .gitignore
├── .prettierignore
├── .releaserc
├── .vscode
    └── launch.json
├── LICENSE
├── README.md
├── as-pect.config.js
├── asconfig.empty.json
├── asconfig.json
├── assembly
    ├── __spec_tests__
    │   └── generated.spec.ts
    ├── __tests__
    │   ├── alterations.spec.ts
    │   ├── as-pect.d.ts
    │   ├── boundary-assertions.spec.ts
    │   ├── capture-group.spec.ts
    │   ├── character-classes.spec.ts
    │   ├── character-sets.spec.ts
    │   ├── characters.ts
    │   ├── empty.ts
    │   ├── empty.wat
    │   ├── quantifiers.spec.ts
    │   ├── range-quantifiers.spec.ts
    │   ├── regex.spec.ts
    │   └── utils.ts
    ├── char.ts
    ├── env.ts
    ├── index.ts
    ├── nfa
    │   ├── matcher.ts
    │   ├── nfa.ts
    │   ├── types.ts
    │   └── walker.ts
    ├── parser
    │   ├── node.ts
    │   ├── parser.ts
    │   ├── string-iterator.ts
    │   └── walker.ts
    ├── regexp.ts
    ├── tsconfig.json
    └── util.ts
├── benchmark
    └── benchmark.js
├── package-lock.json
├── package.json
├── spec
    ├── pcre-1.dat
    ├── test-generator.js
    └── test.dat
└── ts
    ├── index.ts
    └── tsconfig.json


/.eslintignore:
--------------------------------------------------------------------------------
1 | node_modules/


--------------------------------------------------------------------------------
/.eslintrc.js:
--------------------------------------------------------------------------------
  1 | module.exports = {
  2 |   root: true,
  3 |   parser: "@typescript-eslint/parser",
  4 |   plugins: ["@typescript-eslint"],
  5 |   extends: [
  6 |     "eslint:recommended",
  7 |     "plugin:@typescript-eslint/eslint-recommended",
  8 |     "plugin:@typescript-eslint/recommended",
  9 |   ],
 10 |   parserOptions: {
 11 |     ecmaVersion: 2020,
 12 |     sourceType: "module",
 13 |     ecmaFeatures: {},
 14 |   },
 15 |   ignorePatterns: ["node_modules/**/*"],
 16 |   // === General rules =========================================================
 17 | 
 18 |   rules: {
 19 |     // Omitted semicolons are hugely popular, yet within the compiler it makes
 20 |     // sense to be better safe than sorry.
 21 |     semi: "error",
 22 | 
 23 |     // Our code bases uses 2 spaces for indentation, and we enforce it here so
 24 |     // files don't mix spaces, tabs or different indentation levels.
 25 |     indent: [
 26 |       "error",
 27 |       2,
 28 |       {
 29 |         SwitchCase: 1,
 30 |         VariableDeclarator: "first",
 31 |         offsetTernaryExpressions: true,
 32 |         ignoredNodes: [
 33 |           // FIXME: something's odd here
 34 |           "ConditionalExpression > *",
 35 |           "ConditionalExpression > * > *",
 36 |           "ConditionalExpression > * > * > *",
 37 |         ],
 38 |       },
 39 |     ],
 40 | 
 41 |     // This is mostly visual style, making comments look uniform.
 42 |     "spaced-comment": [
 43 |       "error",
 44 |       "always",
 45 |       {
 46 |         markers: ["/"], // triple-slash
 47 |         exceptions: ["/"], // all slashes
 48 |       },
 49 |     ],
 50 | 
 51 |     // This tends to be annoying as it encourages developers to make everything
 52 |     // that is never reassigned a 'const', sometimes semantically incorrect so,
 53 |     // typically leading to huge diffs in follow-up PRs modifying affected code.
 54 |     "prefer-const": "off",
 55 | 
 56 |     // It is perfectly fine to declare top-level variables with `var`, yet this
 57 |     // rule doesn't provide configuration options that would help.
 58 |     "no-var": "off",
 59 | 
 60 |     // Quite often, dealing with multiple related cases at once or otherwise
 61 |     // falling through is exactly the point of using a switch.
 62 |     "no-fallthrough": "off",
 63 | 
 64 |     // Typical false-positives here are `do { ... } while (true)` statements or
 65 |     // similar, but the only option provided here is not checking any loops.
 66 |     "no-constant-condition": ["error", { checkLoops: false }],
 67 | 
 68 |     // Functions are nested in blocks occasionally, and there haven't been any
 69 |     // problems with this so far, so turning the check off.
 70 |     "no-inner-declarations": "off",
 71 | 
 72 |     // Quite common in scenarios where an iteration starts at `current = this`.
 73 |     "@typescript-eslint/no-this-alias": "off",
 74 | 
 75 |     // Disabled here, but enabled again for JavaScript files.
 76 |     "no-unused-vars": "off",
 77 | 
 78 |     // Disabled here, but enabled again for TypeScript files.
 79 |     "@typescript-eslint/no-unused-vars": "off",
 80 |   },
 81 |   overrides: [
 82 |     // === TypeScript rules ====================================================
 83 | 
 84 |     {
 85 |       files: ["**/assembly/**/*.ts"],
 86 |       rules: {
 87 |         // Enforcing to remove function parameters on stubs makes code less
 88 |         // maintainable, so we instead allow unused function parameters.
 89 |         "@typescript-eslint/no-unused-vars": [
 90 |           "warn",
 91 |           {
 92 |             vars: "local",
 93 |             varsIgnorePattern: "^_|^[A-Z](?:From|To)?$", // ignore type params
 94 |             args: "none",
 95 |             ignoreRestSiblings: false,
 96 |           },
 97 |         ],
 98 | 
 99 |         // Namespaces are quite useful in AssemblyScript
100 |         "@typescript-eslint/no-namespace": "off",
101 | 
102 |         // There is actually codegen difference here
103 |         "@typescript-eslint/no-array-constructor": "off",
104 | 
105 |         // Sometimes it can't be avoided to add a @ts-ignore
106 |         "@typescript-eslint/ban-ts-comment": "off",
107 | 
108 |         // Utilized to achieve portability in some cases
109 |         "@typescript-eslint/no-non-null-assertion": "off",
110 |       },
111 |     },
112 | 
113 |     // === Compiler rules (extends AssemblyScript rules) =======================
114 | 
115 |     {
116 |       files: ["**/assembly/**/*.ts"],
117 |       rules: {
118 |         // There is an actual codegen difference here - TODO: revisit
119 |         "no-cond-assign": "off",
120 | 
121 |         // Not all types can be omitted in AS yet - TODO: revisit
122 |         "@typescript-eslint/no-inferrable-types": "off",
123 | 
124 |         // Used rarely to reference internals that are not user-visible
125 |         "@typescript-eslint/triple-slash-reference": "off",
126 | 
127 |         // The compiler has its own `Function` class for example
128 |         "no-shadow-restricted-names": "off",
129 |         "@typescript-eslint/ban-types": "off",
130 |       },
131 |     },
132 | 
133 |     // === Standard Library rules (extends AssemblyScript rules) ===============
134 | 
135 |     {
136 |       files: ["**/assembly/**/*.ts"],
137 |       rules: {
138 |         // We are implementing with --noLib, so we shadow all the time
139 |         "no-shadow-restricted-names": "off",
140 | 
141 |         // Similarly, sometimes we need the return type to be String, not string
142 |         "@typescript-eslint/ban-types": "off",
143 |       },
144 |     },
145 | 
146 |     // === Standard Definition rules (extends TypeScript rules) ================
147 | 
148 |     {
149 |       files: ["**/assembly/**/*.d.ts"],
150 |       rules: {
151 |         // Often required to achieve compatibility with TypeScript
152 |         "@typescript-eslint/no-explicit-any": "off",
153 | 
154 |         // Interfaces can be stubs here, i.e. not yet fully implemented
155 |         "@typescript-eslint/no-empty-interface": "off",
156 | 
157 |         // Definitions make use of `object` to model rather unusual constraints
158 |         "@typescript-eslint/ban-types": "off",
159 |       },
160 |     },
161 | 
162 |     // === Test rules (extends TypeScript rules) ===============================
163 | 
164 |     {
165 |       files: ["**/assembly/__tests__/**/*.ts"],
166 |       rules: {
167 |         // Tests typically include unusual code patterns on purpose. This is
168 |         // very likely not an extensive list, but covers what's there so far.
169 |         "no-empty": "off",
170 |         "no-cond-assign": "off",
171 |         "no-compare-neg-zero": "off",
172 |         "no-inner-declarations": "off",
173 |         "no-constant-condition": "off",
174 |         "use-isnan": "off",
175 |         "@typescript-eslint/no-namespace": "off",
176 |         "@typescript-eslint/no-unused-vars": "off",
177 |         "@typescript-eslint/no-empty-function": "off",
178 |         "@typescript-eslint/no-non-null-assertion": "off",
179 |         "@typescript-eslint/no-extra-semi": "off",
180 |         "@typescript-eslint/no-inferrable-types": "off",
181 |         "@typescript-eslint/ban-types": "off",
182 |         "@typescript-eslint/triple-slash-reference": "off",
183 |         "@typescript-eslint/ban-ts-comment": "off",
184 |         "@typescript-eslint/no-extra-non-null-assertion": "off",
185 |         "@typescript-eslint/no-empty-interface": "off",
186 |       },
187 |     },
188 |   ],
189 | };
190 | 


--------------------------------------------------------------------------------
/.github/workflows/benchmark.yml:
--------------------------------------------------------------------------------
 1 | name: Performance benchmark
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 | 
 7 | jobs:
 8 |   benchmark:
 9 |     name: Performance regression check
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - uses: actions/checkout@v2
13 |       - name: Setup Node.js
14 |         uses: actions/setup-node@v1
15 |         with:
16 |           node-version: 15
17 |       - run: npm ci
18 |       - run: npm run asbuild
19 |       # Run benchmark with `go test -bench` and stores the output to a file
20 |       - name: Run benchmark
21 |         run: npm run benchmark | tee benchmark/output.txt
22 |       # Download previous benchmark result from cache (if exists)
23 |       - name: Download previous benchmark data
24 |         uses: actions/cache@v1
25 |         with:
26 |           path: ./cache
27 |           key: ${{ runner.os }}-benchmark
28 |       # Run `github-action-benchmark` action
29 |       - name: Store benchmark result
30 |         uses: rhysd/github-action-benchmark@v1
31 |         with:
32 |           # What benchmark tool the output.txt came from
33 |           tool: "benchmarkjs"
34 |           # Where the output from the benchmark tool is stored
35 |           output-file-path: benchmark/output.txt
36 |           # Personal access token to deploy GitHub Pages branch
37 |           github-token: ${{ secrets.PERSONAL_GITHUB_TOKEN }}
38 |           # Enable alert commit comment
39 |           comment-on-alert: true
40 |           # Mention @colineberhardt in the commit comment
41 |           alert-comment-cc-users: "@colineberhardt"
42 |           # Push and deploy GitHub pages branch automatically
43 |           auto-push: true
44 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will do a clean install of node dependencies, build the source code and run tests across different versions of node
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-nodejs-with-github-actions
 3 | 
 4 | name: Release
 5 | 
 6 | on:
 7 |   push:
 8 |     branches:
 9 |       - main
10 | jobs:
11 |   release:
12 |     name: Release
13 |     runs-on: ubuntu-18.04
14 |     steps:
15 |       - name: Checkout
16 |         uses: actions/checkout@v1
17 |       - name: Setup Node.js
18 |         uses: actions/setup-node@v1
19 |         with:
20 |           node-version: 15
21 |       - name: Install dependencies
22 |         run: npm ci
23 |       - name: Run tests
24 |         run: npm test
25 |       - name: Release
26 |         env:
27 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
28 |           NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
29 |         run: npx semantic-release
30 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will do a clean install of node dependencies, build the source code and run tests across different versions of node
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-nodejs-with-github-actions
 3 | 
 4 | name: Test
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [main]
 9 |   pull_request:
10 |     branches: [main]
11 | 
12 | jobs:
13 |   build:
14 |     runs-on: ubuntu-latest
15 | 
16 |     strategy:
17 |       matrix:
18 |         node-version: [15.x]
19 |         # See supported Node.js release schedule at https://nodejs.org/en/about/releases/
20 | 
21 |     steps:
22 |       - uses: actions/checkout@v2
23 |       - name: Use Node.js ${{ matrix.node-version }}
24 |         uses: actions/setup-node@v1
25 |         with:
26 |           node-version: ${{ matrix.node-version }}
27 |       - run: npm ci
28 |       - run: npm test
29 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 | build/
3 | .history
4 | .vscode
5 | .idea
6 | npm-debug.*
7 | assembly/__tests__/index.spec.wat
8 | assembly/__tests__/spec.spec.wat


--------------------------------------------------------------------------------
/.prettierignore:
--------------------------------------------------------------------------------
1 | # prettier doesn't support decorators on functions :-(
2 | assembly/char.ts
3 | assembly/nfa/types.ts


--------------------------------------------------------------------------------
/.releaserc:
--------------------------------------------------------------------------------
1 | {
2 |   "branches": ["main"]
3 | }


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   // Use IntelliSense to learn about possible attributes.
 3 |   // Hover to view descriptions of existing attributes.
 4 |   // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
 5 |   "version": "0.2.0",
 6 |   "configurations": [
 7 |     {
 8 |       "type": "node",
 9 |       "request": "launch",
10 |       "name": "Launch Program",
11 |       "runtimeArgs": ["-r", "ts-node/register"],
12 |       "args": ["${workspaceFolder}/ts/index.ts"],
13 |       "env": { "TS_NODE_PROJECT": "${workspaceFolder}/ts/tsconfig.json" }
14 |     },
15 |     {
16 |       "type": "node",
17 |       "request": "launch",
18 |       "name": "Launch Test Gen",
19 |       "args": ["${workspaceFolder}/spec/test-generator.js"]
20 |     }
21 |   ]
22 | }
23 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Colin Eberhardt
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # assemblyscript-regex
  2 | 
  3 | A regex engine for AssemblyScript.
  4 | 
  5 | [AssemblyScript](https://www.assemblyscript.org/) is a new language, based on TypeScript, that runs on WebAssembly. AssemblyScript has a lightweight standard library, but lacks support for Regular Expression. The project fills that gap!
  6 | 
  7 | This project exposes an API that mirrors the JavaScript [RegExp](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp) class:
  8 | 
  9 | ```javascript
 10 | const regex = new RegExp("fo*", "g");
 11 | const str = "table football, foul";
 12 | 
 13 | let match: Match | null = regex.exec(str);
 14 | while (match != null) {
 15 |   // first iteration
 16 |   //   match.index = 6
 17 |   //   match.matches[0] = "foo"
 18 | 
 19 |   // second iteration
 20 |   //   match.index = 16
 21 |   //   match.matches[0] = "fo"
 22 |   match = regex.exec(str);
 23 | }
 24 | ```
 25 | 
 26 | ## Project status
 27 | 
 28 | The initial focus of this implementation has been feature support and functionality over performance. It currently supports a sufficient number of regex features to be considered useful, including most character classes, common assertions, groups, alternations, capturing groups and quantifiers.
 29 | 
 30 | The next phase of development will focussed on more extensive testing and performance. The project currently has reasonable unit test coverage, focussed on positive and negative test cases on a per-feature basis. It also includes a more exhaustive test suite with test cases borrowed from another regex library.
 31 | 
 32 | ### Feature support
 33 | 
 34 | Based on the classfication within the [MDN cheatsheet](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Cheatsheet)
 35 | 
 36 | **Character sets**
 37 | 
 38 | - [x] .
 39 | - [x] \d
 40 | - [x] \D
 41 | - [x] \w
 42 | - [x] \W
 43 | - [x] \s
 44 | - [x] \S
 45 | - [x] \t
 46 | - [x] \r
 47 | - [x] \n
 48 | - [x] \v
 49 | - [x] \f
 50 | - [ ] [\b]
 51 | - [ ] \0
 52 | - [ ] \cX
 53 | - [x] \xhh
 54 | - [x] \uhhhh
 55 | - [ ] \u{hhhh} or \u{hhhhh}
 56 | - [x] \
 57 | 
 58 | **Assertions**
 59 | 
 60 | - [x] ^
 61 | - [x] $
 62 | - [ ] \b
 63 | - [ ] \B
 64 | 
 65 | **Other assertions**
 66 | 
 67 | - [ ] x(?=y) Lookahead assertion
 68 | - [ ] x(?!y) Negative lookahead assertion
 69 | - [ ] (?<=y)x Lookbehind assertion
 70 | - [ ] (?<!y)x Negative lookbehind assertion
 71 | 
 72 | **Groups and ranges**
 73 | 
 74 | - [x] x|y
 75 | - [x] [xyz][a-c]
 76 | - [x] [^xyz][^a-c]
 77 | - [x] (x) capturing group
 78 | - [ ] \n back reference
 79 | - [ ] (?<Name>x) named capturing group
 80 | - [x] (?:x) Non-capturing group
 81 | 
 82 | **Quantifiers**
 83 | 
 84 | - [x] x\*
 85 | - [x] x+
 86 | - [x] x?
 87 | - [x] x{n}
 88 | - [x] x{n,}
 89 | - [x] x{n,m}
 90 | - [ ] x\*? / x+? / ...
 91 | 
 92 | **RegExp**
 93 | 
 94 | - [x] global
 95 | - [ ] sticky
 96 | - [x] case insensitive
 97 | - [x] multiline
 98 | - [x] dotAll
 99 | - [ ] unicode
100 | 
101 | ### Development
102 | 
103 | This project is open source, MIT licenced and your contributions are very much welcomed.
104 | 
105 | To get started, check out the repository and install dependencies:
106 | 
107 | ```
108 | $ npm install
109 | ```
110 | 
111 | A few general points about the tools and processes this project uses:
112 | 
113 | - This project uses prettier for code formatting and eslint to provide additional syntactic checks. These are both run on `npm test` and as part of the CI build.
114 | - The unit tests are executed using [as-pect](https://github.com/jtenner/as-pect) - a native AssemblyScript test runner
115 | - The specification tests are within the `spec` folder. The `npm run test:generate` target transforms these tests into as-pect tests which execute as part of the standard build / test cycle
116 | - In order to support improved debugging you can execute this library as TypeScript (rather than WebAssembly), via the `npm run tsrun` target.
117 | 


--------------------------------------------------------------------------------
/as-pect.config.js:
--------------------------------------------------------------------------------
 1 | module.exports = {
 2 |   /**
 3 |    * A set of globs passed to the glob package that qualify typescript files for testing.
 4 |    */
 5 |   include: [
 6 |     "assembly/__tests__/**/*.spec.ts",
 7 |     "assembly/__spec_tests__/**/*.spec.ts",
 8 |   ],
 9 |   /**
10 |    * A set of globs passed to the glob package that quality files to be added to each test.
11 |    */
12 |   add: ["assembly/__tests__/**/*.include.ts"],
13 |   /**
14 |    * All the compiler flags needed for this test suite. Make sure that a binary file is output.
15 |    */
16 |   flags: {
17 |     /** To output a wat file, uncomment the following line. */
18 |     // "--textFile": ["output.wat"],
19 |     /** A runtime must be provided here. */
20 |     "--runtime": ["stub"], // Acceptable values are: full, half, stub (arena), and none
21 |     "--target": "test",
22 |   },
23 |   /**
24 |    * A set of regexp that will disclude source files from testing.
25 |    */
26 |   disclude: [/node_modules/],
27 |   /**
28 |    * Add your required AssemblyScript imports here.
29 |    */
30 |   imports(memory, createImports, instantiateSync, binary) {
31 |     let instance; // Imports can reference this
32 |     const myImports = {
33 |       // put your web assembly imports here, and return the module
34 |     };
35 |     instance = instantiateSync(binary, createImports(myImports));
36 |     return instance;
37 |   },
38 |   /**
39 |    * Add a custom reporter here if you want one. The following example is in typescript.
40 |    *
41 |    * @example
42 |    * import { TestReporter, TestGroup, TestResult, TestContext } from "as-pect";
43 |    *
44 |    * export class CustomReporter extends TestReporter {
45 |    *   // implement each abstract method here
46 |    *   public abstract onStart(suite: TestContext): void;
47 |    *   public abstract onGroupStart(group: TestGroup): void;
48 |    *   public abstract onGroupFinish(group: TestGroup): void;
49 |    *   public abstract onTestStart(group: TestGroup, result: TestResult): void;
50 |    *   public abstract onTestFinish(group: TestGroup, result: TestResult): void;
51 |    *   public abstract onFinish(suite: TestContext): void;
52 |    * }
53 |    */
54 |   // reporter: new CustomReporter(),
55 |   /**
56 |    * Specify if the binary wasm file should be written to the file system.
57 |    */
58 |   outputBinary: false,
59 | };
60 | 


--------------------------------------------------------------------------------
/asconfig.empty.json:
--------------------------------------------------------------------------------
1 | {
2 |   "options": {
3 |     "runtime": "stub",
4 |     "textFile": "build/empty.wat",
5 |     "debug": true
6 |   },
7 |   "entries": ["assembly/__tests__/empty.ts"]
8 | }
9 | 


--------------------------------------------------------------------------------
/asconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "targets": {
 3 |     "debug": {
 4 |       "binaryFile": "build/untouched.wasm",
 5 |       "textFile": "build/untouched.wat",
 6 |       "sourceMap": true,
 7 |       "debug": true
 8 |     },
 9 |     "release": {
10 |       "binaryFile": "build/optimized.wasm",
11 |       "textFile": "build/optimized.wat",
12 |       "sourceMap": false,
13 |       "optimizeLevel": 3,
14 |       "shrinkLevel": 0,
15 |       "converge": true,
16 |       "noAssert": true
17 |     },
18 |     "test": {
19 |       "debug": true
20 |     }
21 |   },
22 |   "options": {
23 |     "transform": [],
24 |     "exportRuntime": true
25 |   }
26 | }
27 | 


--------------------------------------------------------------------------------
/assembly/__tests__/alterations.spec.ts:
--------------------------------------------------------------------------------
 1 | import { expectMatch, expectNotMatch } from "./utils";
 2 | 
 3 | it("or", () => {
 4 |   expectMatch("a|b", ["b", "a"]);
 5 |   expectNotMatch("a|b", ["c"]);
 6 |   expectMatch("a|br", ["br", "a"]);
 7 |   expectNotMatch("a|br", ["b", "c"]);
 8 | });
 9 | 
10 | it("or multi-term", () => {
11 |   expectMatch("a|b|c", ["b", "a", "c"]);
12 |   expectNotMatch("a|b|c", ["d"]);
13 |   expectMatch("a|br|pc", ["br", "a", "pc"]);
14 |   expectNotMatch("a|br|pc", ["b", "pr"]);
15 | });
16 | 


--------------------------------------------------------------------------------
/assembly/__tests__/as-pect.d.ts:
--------------------------------------------------------------------------------
1 | /// <reference types="@as-pect/assembly/types/as-pect" />
2 | 
3 | declare function includeBytes(filename: string): StaticArray<u8>;
4 | 


--------------------------------------------------------------------------------
/assembly/__tests__/boundary-assertions.spec.ts:
--------------------------------------------------------------------------------
 1 | import { expectMatch, expectNotMatch, exec } from "./utils";
 2 | 
 3 | it("matches end of string", () => {
 4 |   const match = exec("a$", "ba");
 5 |   expect(match.index).toBe(1);
 6 |   expect(match.matches[0]).toBe("a");
 7 |   expectNotMatch("a$", ["ab"]);
 8 | });
 9 | 
10 | it("matches start of string", () => {
11 |   expectMatch("^a", ["a"]);
12 |   expectNotMatch("^a", ["ba"]);
13 | });
14 | 
15 | it("handles escaped boundaries", () => {
16 |   expectMatch("\\^a", ["^a"]);
17 |   expectMatch("a\\$", ["a$"]);
18 | });
19 | 


--------------------------------------------------------------------------------
/assembly/__tests__/capture-group.spec.ts:
--------------------------------------------------------------------------------
 1 | import { expectMatch, expectNotMatch, exec } from "./utils";
 2 | 
 3 | it("supports capture groups", () => {
 4 |   let match = exec("a(\\d)a", "a3a");
 5 |   expect(match.index).toBe(0);
 6 |   expect(match.input).toBe("a3a");
 7 |   expect(match.matches[0]).toBe("a3a");
 8 |   expect(match.matches[1]).toBe("3");
 9 | 
10 |   match = exec("a(\\d)a", "  a3a");
11 |   expect(match.index).toBe(2);
12 |   expect(match.input).toBe("  a3a");
13 |   expect(match.matches[0]).toBe("a3a");
14 |   expect(match.matches[1]).toBe("3");
15 | 
16 |   match = exec("a(\\d*)a", "a3456a");
17 |   expect(match.index).toBe(0);
18 |   expect(match.input).toBe("a3456a");
19 |   expect(match.matches[0]).toBe("a3456a");
20 |   expect(match.matches[1]).toBe("3456");
21 | 
22 |   match = exec("a*(\\d*)(a*)", "aaa456aaa");
23 |   expect(match.index).toBe(0);
24 |   expect(match.input).toBe("aaa456aaa");
25 |   expect(match.matches[0]).toBe("aaa456aaa");
26 |   expect(match.matches[1]).toBe("456");
27 |   expect(match.matches[2]).toBe("aaa");
28 | });
29 | 
30 | it("should not return captured values for non-matching alternations", () => {
31 |   const match = exec("(a|b)c|a(b|c)", "ab");
32 |   expect(match.matches[0]).toBe("ab");
33 |   expect(match.matches[1]).toBe("");
34 |   expect(match.matches[2]).toBe("b");
35 | });
36 | 
37 | it("repeated capture groups should return the last match", () => {
38 |   const match = exec("([a-c])+", "ac");
39 |   expect(match.matches[0]).toBe("ac");
40 |   expect(match.matches[1]).toBe("c");
41 | });
42 | 
43 | it("range repitition capture groups should return the last match", () => {
44 |   const match = exec("([a-c]){2}", "ac");
45 |   expect(match.matches[0]).toBe("ac");
46 |   expect(match.matches[1]).toBe("c");
47 | });
48 | 
49 | it("non-capturing groups should not capture", () => {
50 |   const match = exec("(?:foo)bar(baz)", "foobarbaz");
51 |   expect(match.matches[0]).toBe("foobarbaz");
52 |   expect(match.matches[1]).toBe("baz");
53 | });
54 | 


--------------------------------------------------------------------------------
/assembly/__tests__/character-classes.spec.ts:
--------------------------------------------------------------------------------
 1 | import { expectMatch, expectNotMatch } from "./utils";
 2 | 
 3 | it("throws an error if no closing bracket is found", () => {
 4 |   // expect(() => new RegExp("[abce")).toThrow();
 5 | });
 6 | 
 7 | it("matches discrete characters", () => {
 8 |   expectMatch("[abce]", ["a", "b", "c", "e"]);
 9 |   expectNotMatch("[abce]", ["", "f", "h"]);
10 | });
11 | 
12 | it("matches character ranges", () => {
13 |   expectMatch("[a-c]", ["a", "b", "c"]);
14 |   expectNotMatch("[a-c]", ["d", "e", ""]);
15 |   expectMatch("[K-M]", ["K", "L", "M"]);
16 |   expectNotMatch("[K-M]", ["9", "J"]);
17 |   expectMatch("[0-9]", ["0", "9"]);
18 |   expectNotMatch("[0-9]", ["a", "A"]);
19 | });
20 | 
21 | it("matches multiple ranges", () => {
22 |   expectMatch("[a-ce-f]", ["a", "b", "c", "e", "f"]);
23 |   expectNotMatch("[a-ce-f]", ["d"]);
24 | });
25 | 
26 | it("supports closing brackets", () => {
27 |   expectMatch("[]a]", ["]", "a"]);
28 | });
29 | 
30 | it("supports negated sets", () => {
31 |   expectNotMatch("[^a-c]", ["a", "b", "c"]);
32 |   expectMatch("[^a-c]", ["d", "e"]);
33 |   expectNotMatch("[^a-ce-f]", ["a", "b", "c", "e", "f"]);
34 |   expectMatch("[^a-ce-f]", ["d"]);
35 | });
36 | 
37 | it("treats - as a literal", () => {
38 |   expectMatch("[-abc]", ["-", "a", "b", "c"]);
39 |   expectMatch("[abc-]", ["-", "a", "b", "c"]);
40 | });
41 | 
42 | it("treats - as a literal in negated sets", () => {
43 |   expectNotMatch("[^-abc]", ["-", "a", "b", "c"]);
44 |   expectMatch("[^-abc]", ["1", "A"]);
45 | });
46 | 
47 | it("supports case insensitive matching", () => {
48 |   // simple ranges
49 |   expectMatch("[a-c]", ["A", "C", "a", "c"], "i");
50 |   expectNotMatch("[a-c]", ["D", "d"], "i");
51 |   // complex
52 |   expectMatch("[W-c]", ["W", "w", "C", "c"], "i");
53 |   expectNotMatch("[W-c]", ["V", "v", "D", "d"], "i");
54 | });
55 | 


--------------------------------------------------------------------------------
/assembly/__tests__/character-sets.spec.ts:
--------------------------------------------------------------------------------
 1 | import { RegExp } from "..";
 2 | import { expectMatch, expectNotMatch, exec } from "./utils";
 3 | 
 4 | it("dot", () => {
 5 |   expectMatch(".", [" ", "B", "|", "9"]);
 6 |   expectNotMatch(".", ["", "\n"]);
 7 | });
 8 | 
 9 | it("digit", () => {
10 |   expectMatch("\\d", ["0", "9"]);
11 |   expectNotMatch("\\d", ["", "b"]);
12 | });
13 | 
14 | it("non-digit", () => {
15 |   expectNotMatch("\\D", ["0", "9", ""]);
16 |   expectMatch("\\D", ["b", "|"]);
17 | });
18 | 
19 | it("word", () => {
20 |   expectMatch("\\w", ["A", "a", "Z", "z", "0", "9", "_"]);
21 |   expectNotMatch("\\w", ["", "$"]);
22 | });
23 | 
24 | it("not word", () => {
25 |   expectNotMatch("\\W", ["A", "a", "Z", "z", "0", "9", "_", ""]);
26 |   expectMatch("\\W", ["&", "$"]);
27 | });
28 | 
29 | it("whitespace", () => {
30 |   expectMatch("\\s", ["\f", "\n", "\r", "\t", "\v"]);
31 |   expectNotMatch("\\s", ["", "a", "0"]);
32 | });
33 | 
34 | it("not whitespace", () => {
35 |   expectNotMatch("\\S", ["", "\f", "\n", "\r", "\t", "\v"]);
36 |   expectMatch("\\S", ["a", "0"]);
37 | });
38 | 
39 | it("tab, cr, lf, vt, ff", () => {
40 |   expectMatch("\\t", ["\t"]);
41 |   expectMatch("\\r", ["\r"]);
42 |   expectMatch("\\n", ["\n"]);
43 |   expectMatch("\\v", ["\v"]);
44 |   expectMatch("\\f", ["\f"]);
45 |   expectNotMatch("\\t", ["a", " ", ""]);
46 | });
47 | 
48 | it("escaped dot", () => {
49 |   expectMatch("\\.", ["."]);
50 |   expectNotMatch("\\.", ["", "a"]);
51 | });
52 | 
53 | it("unrecognised character classes are treated as characters", () => {
54 |   expectMatch("\\g\\m", ["gm"]);
55 | });
56 | 


--------------------------------------------------------------------------------
/assembly/__tests__/characters.ts:
--------------------------------------------------------------------------------
 1 | import { expectMatch, expectNotMatch } from "./utils";
 2 | 
 3 | it("single character", () => {
 4 |   expectMatch("a", ["a"]);
 5 |   expectNotMatch("a", ["fish", ""]);
 6 | });
 7 | 
 8 | it("concatenation", () => {
 9 |   expectMatch("ab", ["ab"]);
10 |   expectNotMatch("ab", ["aac", "aa", ""]);
11 | });
12 | 


--------------------------------------------------------------------------------
/assembly/__tests__/empty.ts:
--------------------------------------------------------------------------------
1 | import * as regex from "..";
2 | 


--------------------------------------------------------------------------------
/assembly/__tests__/empty.wat:
--------------------------------------------------------------------------------
1 | (module
2 |  (memory $0 0)
3 |  (table $0 1 funcref)
4 |  (export "memory" (memory $0))
5 | )
6 | 


--------------------------------------------------------------------------------
/assembly/__tests__/quantifiers.spec.ts:
--------------------------------------------------------------------------------
 1 | import { expectMatch, expectNotMatch, exec } from "./utils";
 2 | 
 3 | it("matches empty strings", () => {
 4 |   expectMatch("a?", [""]);
 5 |   expectMatch("a*", [""]);
 6 | });
 7 | 
 8 | it("zero or one", () => {
 9 |   expectMatch("a?", ["a"]);
10 |   let match = exec("a?", "bc");
11 |   expect(match).not.toBeNull();
12 |   expect(match.matches[0]).toStrictEqual("");
13 | });
14 | 
15 | it("one or more", () => {
16 |   expectMatch("a+", ["a", "aa"]);
17 |   expectNotMatch("a+", [""]);
18 | });
19 | 
20 | it("zero or more", () => {
21 |   expectMatch("a*", ["aa", "aaaa"]);
22 | });
23 | 
24 | it("multiple rules", () => {
25 |   expectMatch("a*b", ["b", "ab", "aaaab"]);
26 |   expectNotMatch("a*b", ["aaaad"]);
27 | });
28 | 
29 | it("zero or more is greedy", () => {
30 |   let match = exec("a*", "aaaaa");
31 |   expect(match).not.toBeNull();
32 |   expect(match.matches[0]).toStrictEqual("aaaaa");
33 | });
34 | 
35 | it("one or more is greedy", () => {
36 |   let match = exec("a+", "aaaaa");
37 |   expect(match).not.toBeNull();
38 |   expect(match.matches[0]).toStrictEqual("aaaaa");
39 | });
40 | 
41 | describe("non-greedy", () => {
42 |   it("one or more supports non-greedy mode", () => {
43 |     let match = exec("[a-c]+?b", "abb");
44 |     expect(match).not.toBeNull();
45 |     expect(match.matches[0]).toStrictEqual("ab");
46 |   });
47 | 
48 |   it("zero or more supports non-greedy mode", () => {
49 |     let match = exec("[a-c]*?b", "abb");
50 |     expect(match).not.toBeNull();
51 |     expect(match.matches[0]).toStrictEqual("ab");
52 |   });
53 | 
54 |   // it("zero or one supports non-greedy mode", () => {
55 |   //   expectMatch("a?", ["a"]);
56 |   //   let match = exec("a??", "bc");
57 |   //   expect(match).not.toBeNull();
58 |   //   expect(match.matches[0]).toStrictEqual("");
59 |   // });
60 | });
61 | 


--------------------------------------------------------------------------------
/assembly/__tests__/range-quantifiers.spec.ts:
--------------------------------------------------------------------------------
 1 | /* eslint-disable no-invalid-regexp */
 2 | import { RegExp } from "..";
 3 | import { expectMatch, expectNotMatch, exec } from "./utils";
 4 | 
 5 | it("handles single quantifier", () => {
 6 |   expectMatch("a{2}", ["aa"]);
 7 |   expectMatch("ba{2}", ["baa"]);
 8 |   expectMatch("ba{1}b", ["bab"]);
 9 | });
10 | 
11 | it("handles open upper bound quantifiers", () => {
12 |   expectMatch("a{2,}", ["aa", "aaaaa"]);
13 |   expectMatch("ba{2,}", ["baa", "baaaaaaa"]);
14 |   expectMatch("ba{1,}b", ["bab", "baaaaaab"]);
15 | });
16 | 
17 | it("handles explicit upper bound quantifiers", () => {
18 |   const match = exec("a{2,4}", "aaaaaaaaaa");
19 |   expect(match.matches[0]).toBe("aaaa");
20 | });
21 | 
22 | it("handles zero value quantifier", () => {
23 |   expectMatch("ba{0}b", ["bb"]);
24 | });
25 | 
26 | it("handles quantifiers within alternates", () => {
27 |   expectMatch("a{2}|b{2}", ["bb", "aa"]);
28 |   expectNotMatch("a{2}|b{2}", ["cc"]);
29 | });
30 | 
31 | it("handles imcomplete quantifier ", () => {
32 |   expectMatch("a{2", ["a{2"]);
33 |   expectMatch("a{2,", ["a{2,"]);
34 |   expectMatch("a{2,3", ["a{2,3"]);
35 |   expectMatch("a{2,3a", ["a{2,3a"]);
36 |   expectMatch("a{2,3a}", ["a{2,3a}"]);
37 | });
38 | 
39 | it("handles nested quantifiers", () => {
40 |   expectMatch("(a{3}){2}", ["aaaaaa"]);
41 | });
42 | 
43 | it("handles nongreedy quantifiers", () => {
44 |   const match = exec("a{2,4}?", "aaaaaaaaaa");
45 |   expect(match.matches[0]).toBe("aa");
46 | });
47 | 
48 | it("throws if quantifying a quantifier!", () => {
49 |   expect(() => {
50 |     let foo = new RegExp("a{3}{2}");
51 |   }).toThrow();
52 | });
53 | 


--------------------------------------------------------------------------------
/assembly/__tests__/regex.spec.ts:
--------------------------------------------------------------------------------
  1 | import { RegExp } from "..";
  2 | import { exec, expectNotMatch, expectMatch } from "./utils";
  3 | 
  4 | it("match returns correct substring", () => {
  5 |   const match = exec("\\d", "asd123asd");
  6 |   expect(match.index).toBe(3);
  7 |   expect(match.input).toStrictEqual("asd123asd");
  8 |   expect(match.matches[0]).toStrictEqual("1");
  9 | });
 10 | 
 11 | describe("dotAll mode", () => {
 12 |   it("sets the dotAll flag", () => {
 13 |     expect(new RegExp("foo", "s").dotAll).toBeTruthy();
 14 |     expect(new RegExp("foo", "").dotAll).toBeFalsy();
 15 |   });
 16 | 
 17 |   it("allows dot to match any character", () => {
 18 |     const regex = new RegExp("^12.34", "s");
 19 |     const match = exec(regex, "12\n34");
 20 |     expect(match.matches[0]).toBe("12\n34");
 21 |   });
 22 | });
 23 | 
 24 | describe("case insensitive mode", () => {
 25 |   it("supports characters", () => {
 26 |     const regex = new RegExp("AbC", "i");
 27 |     const match = exec(regex, "aBc");
 28 |     expect(match.matches[0]).toBe("aBc");
 29 |   });
 30 | 
 31 |   it("supports character ranges", () => {
 32 |     const regex = new RegExp("[a-c][A-C]", "i");
 33 |     const match = exec(regex, "Ac");
 34 |     expect(match.matches[0]).toBe("Ac");
 35 |   });
 36 | 
 37 |   it("sets ignoreCase flag", () => {
 38 |     expect(new RegExp("\\d+", "i").ignoreCase).toBeTruthy();
 39 |     expect(new RegExp("\\d+", "g").ignoreCase).toBeFalsy();
 40 |   });
 41 | });
 42 | 
 43 | describe("global mode", () => {
 44 |   it("sets global flag", () => {
 45 |     expect(new RegExp("\\d+", "g").global).toBeTruthy();
 46 |     expect(new RegExp("\\d+", "").global).toBeFalsy();
 47 |   });
 48 | 
 49 |   it("increments lastIndex", () => {
 50 |     const regex = new RegExp("\\d+", "g");
 51 |     const match = exec(regex, "dog 23 fish 45 cat");
 52 |     expect(match.matches[0]).toStrictEqual("23");
 53 |     expect(regex.lastIndex).toStrictEqual(6);
 54 |   });
 55 | 
 56 |   it("uses lastIndex to support multiple matches", () => {
 57 |     const regex = new RegExp("\\d+", "g");
 58 |     let match = exec(regex, "dog 23 fish 45 cat");
 59 |     expect(match.matches[0]).toBe("23");
 60 |     expect(regex.lastIndex).toBe(6);
 61 | 
 62 |     match = exec(regex, "dog 23 fish 45 cat");
 63 |     expect(match.matches[0]).toBe("45");
 64 |     expect(regex.lastIndex).toBe(14);
 65 | 
 66 |     let empty_match = regex.exec("dog 23 fish 45 cat");
 67 |     expect(empty_match).toBeNull();
 68 |     expect(regex.lastIndex).toBe(0);
 69 |   });
 70 | });
 71 | 
 72 | describe("multi-line mode", () => {
 73 |   it("sets multi-line flag", () => {
 74 |     expect(new RegExp("\\d+", "m").multiline).toBeTruthy();
 75 |     expect(new RegExp("\\d+", "").multiline).toBeFalsy();
 76 |   });
 77 | 
 78 |   it("matches across multiple lines", () => {
 79 |     const match = exec("^f\\d{1}$", "f1\nbar\nbaz\nf2", "m");
 80 |     expect(match.matches.length).toBe(1);
 81 |     expect(match.matches[0]).toBe("f1");
 82 |   });
 83 | 
 84 |   it("matches across multiple lines with global mode", () => {
 85 |     const regex = new RegExp("^f\\d{1}$", "gm");
 86 | 
 87 |     let match = regex.exec("f1\nbar\nbaz\nf2");
 88 |     expect(match!.matches[0]).toBe("f1");
 89 | 
 90 |     match = regex.exec("f1\nbar\nbaz\nf2");
 91 |     expect(match!.matches[0]).toBe("f2");
 92 | 
 93 |     match = regex.exec("f1\nbar\nbaz\nf2");
 94 |     expect(match).toBeNull();
 95 |   });
 96 | 
 97 |   it("matches across multiple lines with global mode", () => {
 98 |     const regex = new RegExp("^[a-c]", "gm");
 99 | 
100 |     let match = regex.exec("a1\nd2\nc3\n");
101 |     expect(match!.matches[0]).toBe("a");
102 | 
103 |     match = regex.exec("a1\nd2\nc3\n");
104 |     expect(match!.matches[0]).toBe("c");
105 | 
106 |     match = regex.exec("a1\nd2\nc3\n");
107 |     expect(match).toBeNull();
108 |   });
109 | 
110 |   it("matches across multiple lines with global mode", () => {
111 |     const regex = new RegExp("[a-c]$", "gm");
112 | 
113 |     let match = regex.exec("1a\n2d\n3c\n");
114 |     expect(match!.matches[0]).toBe("a");
115 | 
116 |     match = regex.exec("1a\n2d\n3c\n");
117 |     expect(match!.matches[0]).toBe("c");
118 | 
119 |     match = regex.exec("1a\n2d\n3c\n");
120 |     expect(match).toBeNull();
121 |   });
122 | });
123 | 
124 | describe("non-global mode", () => {
125 |   it("doesn't increment lastIndex", () => {
126 |     const regex = new RegExp("\\d+");
127 |     let match = exec(regex, "dog 23 fish 45 cat");
128 |     expect(match.matches[0]).toBe("23");
129 |     expect(regex.lastIndex).toBe(0);
130 | 
131 |     match = exec(regex, "dog 23 fish 45 cat");
132 |     expect(match.matches[0]).toBe("23");
133 |     expect(regex.lastIndex).toBe(0);
134 |   });
135 | });
136 | 
137 | describe("use cases", () => {
138 |   it("matches combinations", () => {
139 |     expectMatch("\\s\\w*", [" bar"]);
140 |     expectMatch("\\S\\w*", ["foo"]);
141 |   });
142 | 
143 |   it("email", () => {
144 |     const regex = ".+@.+\\..+";
145 |     expect(exec(regex, "colin@gmail.com")).toBeTruthy();
146 |     expectNotMatch(regex, ["gmail"]);
147 | 
148 |     const capturingRegex = "(.+)@(.+)\\.(.+)";
149 |     expect(exec(capturingRegex, "colin@gmail.com")).toBeTruthy();
150 | 
151 |     const match = exec(capturingRegex, "colin@gmail.com");
152 |     expect(match.matches[0]).toBe("colin@gmail.com");
153 |     expect(match.matches[1]).toBe("colin");
154 |     expect(match.matches[2]).toBe("gmail");
155 |     expect(match.matches[3]).toBe("com");
156 |   });
157 | });
158 | 
159 | describe("error cases", () => {
160 |   it("throws an explicit error when there is nothing to repeat", () => {
161 |     expect(() => {
162 |       let foo = new RegExp("*m", ""); // eslint-disable-line no-invalid-regexp
163 |     }).toThrow("Invalid regular expression: Nothing to repeat");
164 |   });
165 | });
166 | 


--------------------------------------------------------------------------------
/assembly/__tests__/utils.ts:
--------------------------------------------------------------------------------
 1 | import { RegExp, Match } from "..";
 2 | 
 3 | export function expectMatch(
 4 |   regex: string,
 5 |   arr: string[],
 6 |   flags: string = ""
 7 | ): void {
 8 |   let regexp = new RegExp(regex, flags);
 9 |   for (let i = 0; i < arr.length; i++) {
10 |     const value = arr[i];
11 |     const match = exec(regexp, value);
12 |     expect(match.matches[0]).toStrictEqual(value);
13 |   }
14 | }
15 | 
16 | export function expectNotMatch(
17 |   regex: string,
18 |   arr: string[],
19 |   flags: string = ""
20 | ): void {
21 |   let regexp = new RegExp(regex, flags);
22 |   for (let i = 0; i < arr.length; i++) {
23 |     const match = regexp.exec(arr[i]);
24 |     expect(match).toBeNull(
25 |       "string: " + arr[i] + " should not match regex: " + regex
26 |     );
27 |   }
28 | }
29 | 
30 | export function exec<T = string>(
31 |   regex: T,
32 |   value: string,
33 |   flags: string = ""
34 | ): Match {
35 |   let regexp: RegExp;
36 |   if (regex instanceof RegExp) {
37 |     regexp = regex;
38 |   } else if (isString<T>()) {
39 |     // @ts-ignore
40 |     regexp = new RegExp(<string>regex, flags);
41 |   } else {
42 |     ERROR("Only RegExp and string are valid types");
43 |   }
44 |   // @ts-ignore
45 |   let res = regexp.exec(value);
46 |   // @ts-ignore
47 |   expect(res).not.toBe(
48 |     null,
49 |     // @ts-ignore
50 |     "string: " + value + " should match regex: " + regexp.toString()
51 |   );
52 |   return <Match>res;
53 | }
54 | 


--------------------------------------------------------------------------------
/assembly/char.ts:
--------------------------------------------------------------------------------
  1 | // @ts-ignore
  2 | @lazy
  3 | export const enum Char {
  4 |   None = -1,
  5 |   HorizontalTab = 0x09,
  6 |   LineFeed = 0x0a,
  7 |   VerticalTab = 0x0b,
  8 |   FormFeed = 0x0c,
  9 |   CarriageReturn = 0x0d,
 10 |   Space = 0x20,
 11 |   Dollar = 0x24, // "$"
 12 |   LeftParenthesis = 0x28,
 13 |   RightParenthesis = 0x29,
 14 |   Asterisk = 0x2a, // "*"
 15 |   Plus = 0x2b, // "+"
 16 |   Comma = 0x2c, // "*"
 17 |   Minus = 0x2d, // "-"
 18 |   Dot = 0x2e, // "."
 19 |   Zero = 0x30,
 20 |   Nine = 0x39,
 21 |   Colon = 0x3a,
 22 |   Question = 0x3f, // "?"
 23 |   A = 0x41,
 24 |   D = 0x44,
 25 |   S = 0x53,
 26 |   W = 0x57,
 27 |   Z = 0x5a,
 28 |   LeftSquareBracket = 0x5b, // "["
 29 |   Backslash = 0x5c, // "\"
 30 |   RightSquareBracket = 0x5d, // "]"
 31 |   Caret = 0x5e, // "^"
 32 |   Underscore = 0x5f,
 33 |   a = 0x61,
 34 |   d = 0x64,
 35 |   f = 0x66,
 36 |   n = 0x6e,
 37 |   r = 0x72,
 38 |   s = 0x73,
 39 |   t = 0x74,
 40 |   u = 0x75,
 41 |   v = 0x76,
 42 |   w = 0x77,
 43 |   x = 0x78,
 44 |   z = 0x7a,
 45 |   LeftCurlyBrace = 0x7b /* { */,
 46 |   VerticalBar = 0x7c /* | */,
 47 |   RightCurlyBrace = 0x7d /* { */,
 48 |   NonBreakingSpace = 0xa0,
 49 | }
 50 | 
 51 | // @ts-ignore 
 52 | @inline
 53 | function inRange(value: u32, from: u32, to: u32): bool {
 54 |   if (ASC_TARGET == 1) {
 55 |     // makes use of unsigned integer operations, making this
 56 |     // approach a little faster when compiled to WASM
 57 |     return value - from < (to - from + 1);
 58 |   } else {
 59 |     return value >= from && value <= to;
 60 |   }
 61 | }
 62 | 
 63 | export function isDigit(code: u32): bool {
 64 |   return inRange(code, Char.Zero, Char.Nine);
 65 | }
 66 | 
 67 | export function isHexadecimalDigit(code: u32): bool {
 68 |   return isDigit(code) || inRange(code, Char.a, Char.f);
 69 | }
 70 | 
 71 | export function isLowercaseAlpha(code: u32): bool {
 72 |   return inRange(code, Char.a, Char.z);
 73 | }
 74 | 
 75 | export function isUppercaseAlpha(code: u32): bool {
 76 |   return inRange(code, Char.A, Char.Z);
 77 | }
 78 | 
 79 | export function isAlpha(code: u32): bool {
 80 |   if (ASC_TARGET == 1) {
 81 |     return (code | 32) - Char.a < 26;
 82 |   } else {
 83 |     return inRange(code, Char.a, Char.z) || inRange(code, Char.A, Char.Z);
 84 |   }
 85 | }
 86 | 
 87 | export function isWhitespace(code: u32): bool {
 88 |   switch (code) {
 89 |     case Char.Space:
 90 |     case Char.HorizontalTab:
 91 |     case Char.VerticalTab:
 92 |     case Char.FormFeed:
 93 |     case Char.LineFeed:
 94 |     case Char.CarriageReturn:
 95 |     case Char.NonBreakingSpace:
 96 |     case 0x1680: // <LS> (1)
 97 |     case 0x2028: // <LS> (2)
 98 |     case 0x2029: // <PS>
 99 |     case 0x202f: // <NNS>
100 |     case 0x205f: // <MMSP>
101 |     case 0x3000: // <IS>
102 |     case 0xfeff:
103 |       return true; // <ZWNBSP>
104 |   }
105 |   if (inRange(code, 0x2000, 0x200a)) {
106 |     return true;
107 |   }
108 |   return false;
109 | }
110 | 


--------------------------------------------------------------------------------
/assembly/env.ts:
--------------------------------------------------------------------------------
1 | export declare function log(str: string): void;
2 | 


--------------------------------------------------------------------------------
/assembly/index.ts:
--------------------------------------------------------------------------------
1 | export * from "./regexp";
2 | 


--------------------------------------------------------------------------------
/assembly/nfa/matcher.ts:
--------------------------------------------------------------------------------
  1 | import { isDigit, isAlpha, isWhitespace, Char } from "../char";
  2 | 
  3 | import {
  4 |   CharacterNode,
  5 |   CharacterClassNode,
  6 |   CharacterSetNode,
  7 |   CharacterRangeNode,
  8 |   NodeType,
  9 | } from "../parser/node";
 10 | import { Flags } from "../regexp";
 11 | import { Range } from "../util";
 12 | 
 13 | const enum MatcherType {
 14 |   Character,
 15 |   CharacterRange,
 16 |   CharacterSet,
 17 |   CharacterClass,
 18 | }
 19 | export class Matcher {
 20 |   @lazy static _flags!: Flags;
 21 | 
 22 |   constructor(readonly type: MatcherType) {}
 23 | 
 24 |   matches(code: u32): bool {
 25 |     return false;
 26 |   }
 27 | 
 28 |   static fromCharacterClassNode(
 29 |     node: CharacterSetNode,
 30 |     flags: Flags
 31 |   ): CharacterSetMatcher {
 32 |     return new CharacterSetMatcher(node.charClass, flags.dotAll);
 33 |   }
 34 | 
 35 |   static fromCharacterRangeNode(
 36 |     node: CharacterRangeNode,
 37 |     flags: Flags
 38 |   ): CharacterRangeMatcher {
 39 |     return new CharacterRangeMatcher(
 40 |       new Range(node.from, node.to),
 41 |       flags.ignoreCase
 42 |     );
 43 |   }
 44 | 
 45 |   static fromCharacterSetNode(
 46 |     node: CharacterClassNode,
 47 |     flags: Flags
 48 |   ): CharacterClassMatcher {
 49 |     Matcher._flags = flags;
 50 |     const matchers = node.expressions.map<Matcher>((exp) => {
 51 |       switch (exp.type) {
 52 |         case NodeType.CharacterRange:
 53 |           return Matcher.fromCharacterRangeNode(
 54 |             exp as CharacterRangeNode,
 55 |             Matcher._flags
 56 |           );
 57 |         case NodeType.Character:
 58 |           return Matcher.fromCharacterNode(
 59 |             exp as CharacterNode,
 60 |             Matcher._flags
 61 |           );
 62 |         case NodeType.CharacterSet:
 63 |           return Matcher.fromCharacterClassNode(
 64 |             exp as CharacterSetNode,
 65 |             Matcher._flags
 66 |           );
 67 |         default:
 68 |           throw new Error("unsupported node type within character set");
 69 |       }
 70 |     });
 71 |     return new CharacterClassMatcher(matchers, node.negated);
 72 |   }
 73 | 
 74 |   static fromCharacterNode(
 75 |     node: CharacterNode,
 76 |     flags: Flags
 77 |   ): CharacterMatcher {
 78 |     return new CharacterMatcher(node.char, flags.ignoreCase);
 79 |   }
 80 | }
 81 | 
 82 | export class CharacterMatcher extends Matcher {
 83 |   constructor(private character: Char, private ignoreCase: bool) {
 84 |     super(MatcherType.Character);
 85 |     if (ignoreCase) {
 86 |       this.character |= 0x20;
 87 |     }
 88 |   }
 89 | 
 90 |   matches(code: u32): bool {
 91 |     if (this.ignoreCase) {
 92 |       code |= 0x20;
 93 |     }
 94 |     return this.character == code;
 95 |   }
 96 | }
 97 | 
 98 | // @ts-ignore
 99 | @lazy const LOWERCASE_LETTERS = new Range(Char.a, Char.z);
100 | // @ts-ignore
101 | @lazy const UPPERCASE_LETTERS = new Range(Char.A, Char.Z);
102 | // @ts-ignore
103 | @lazy const UPPER_LOWER_OFFSET = Char.a - Char.A;
104 | 
105 | export class CharacterRangeMatcher extends Matcher {
106 |   private ranges: Range[];
107 | 
108 |   constructor(private range: Range, ignoreCase: bool) {
109 |     super(MatcherType.CharacterRange);
110 |     this.ranges = [range];
111 | 
112 |     if (ignoreCase) {
113 |       const lowerIntersect = range.intersection(LOWERCASE_LETTERS);
114 |       if (lowerIntersect) {
115 |         this.ranges.push(lowerIntersect.offset(-UPPER_LOWER_OFFSET));
116 |       }
117 |       const upperIntersect = range.intersection(UPPERCASE_LETTERS);
118 |       if (upperIntersect) {
119 |         this.ranges.push(upperIntersect.offset(UPPER_LOWER_OFFSET));
120 |       }
121 |     }
122 |   }
123 | 
124 |   matches(code: u32): bool {
125 |     for (let i = 0, len = this.ranges.length; i < len; i++) {
126 |       if (this.ranges[i].contains(code)) {
127 |         return true;
128 |       }
129 |     }
130 |     return false;
131 |   }
132 | }
133 | 
134 | export class CharacterSetMatcher extends Matcher {
135 |   constructor(public charClass: Char, private dotAll: bool) {
136 |     super(MatcherType.CharacterSet);
137 |   }
138 | 
139 |   matches(code: u32): bool {
140 |     switch (this.charClass) {
141 |       case Char.d:
142 |         return isDigit(code);
143 |       case Char.D:
144 |         return !isDigit(code);
145 |       case Char.Dot:
146 |         return this.dotAll
147 |           ? true
148 |           : code != Char.CarriageReturn &&
149 |               code != Char.LineFeed &&
150 |               code != 8232 &&
151 |               code != 8233;
152 |       case Char.w:
153 |         return isAlpha(code) || code == Char.Underscore || isDigit(code);
154 |       case Char.W:
155 |         return !(isAlpha(code) || code == Char.Underscore || isDigit(code));
156 |       case Char.s:
157 |         return isWhitespace(code);
158 |       case Char.S:
159 |         return !isWhitespace(code);
160 |       case Char.t:
161 |         return code == Char.HorizontalTab;
162 |       case Char.r:
163 |         return code == Char.CarriageReturn;
164 |       case Char.n:
165 |         return code == Char.LineFeed;
166 |       case Char.v:
167 |         return code == Char.VerticalTab;
168 |       case Char.f:
169 |         return code == Char.FormFeed;
170 | 
171 |       default:
172 |         throw new Error(
173 |           "unsupported character class - " + String.fromCharCode(this.charClass)
174 |         );
175 |     }
176 |   }
177 | }
178 | 
179 | export class CharacterClassMatcher extends Matcher {
180 |   constructor(public matchers: Matcher[], public negated: bool) {
181 |     super(MatcherType.CharacterClass);
182 |   }
183 | 
184 |   matches(code: u32): bool {
185 |     let match: bool = false;
186 |     for (let i = 0, len = this.matchers.length; i < len; i++) {
187 |       let matcher = this.matchers[i];
188 |       switch (matcher.type) {
189 |         case MatcherType.Character:
190 |           match = (matcher as CharacterMatcher).matches(code);
191 |           break;
192 | 
193 |         case MatcherType.CharacterRange:
194 |           match = (matcher as CharacterRangeMatcher).matches(code);
195 |           break;
196 | 
197 |         case MatcherType.CharacterSet:
198 |           match = (matcher as CharacterSetMatcher).matches(code);
199 |           break;
200 | 
201 |         case MatcherType.CharacterClass:
202 |           match = (matcher as CharacterClassMatcher).matches(code);
203 |           break;
204 |       }
205 |       if (match) break;
206 |     }
207 |     return this.negated ? !match : match;
208 |   }
209 | }
210 | 


--------------------------------------------------------------------------------
/assembly/nfa/nfa.ts:
--------------------------------------------------------------------------------
  1 | import {
  2 |   AST,
  3 |   CharacterNode,
  4 |   Node,
  5 |   ConcatenationNode,
  6 |   RepetitionNode,
  7 |   AlternationNode,
  8 |   CharacterClassNode,
  9 |   CharacterSetNode,
 10 |   GroupNode,
 11 |   NodeType,
 12 | } from "../parser/node";
 13 | 
 14 | import { Char } from "../char";
 15 | import { Matcher } from "./matcher";
 16 | import { Flags } from "../regexp";
 17 | import { MatchResult } from "./types";
 18 | 
 19 | /* eslint @typescript-eslint/no-empty-function: ["error", { "allow": ["constructors", "methods"] }] */
 20 | export class State {
 21 |   @lazy static _stateId: u32 = 0;
 22 |   constructor(
 23 |     public transitions: State[] = [],
 24 |     public id: u32 = State._stateId++
 25 |   ) {}
 26 | 
 27 |   matches(input: string, position: u32): MatchResult {
 28 |     return MatchResult.Ignore;
 29 |   }
 30 | }
 31 | 
 32 | export class GroupStartMarkerState extends State {
 33 |   location: i32 = -1;
 34 |   // a bit yucky - storing transient state in the state machine!
 35 |   capture: string = "";
 36 |   // captures from the path through the NFA that reaches the end are flagged
 37 |   flagged: bool = false;
 38 | 
 39 |   constructor(next: State, public capturing: bool, public groupId: i32) {
 40 |     super();
 41 |     this.transitions.push(next);
 42 |   }
 43 | 
 44 |   matches(input: string, position: u32): MatchResult {
 45 |     this.location = position;
 46 |     return MatchResult.Ignore;
 47 |   }
 48 | }
 49 | 
 50 | export class GroupEndMarkerState extends State {
 51 |   constructor(next: State, public startMarker: GroupStartMarkerState) {
 52 |     super();
 53 |     this.transitions.push(next);
 54 |   }
 55 | 
 56 |   matches(input: string, position: u32): MatchResult {
 57 |     if (this.startMarker.capturing) {
 58 |       this.startMarker.capture = input.substring(
 59 |         this.startMarker.location,
 60 |         position
 61 |       );
 62 |     }
 63 |     return MatchResult.Ignore;
 64 |   }
 65 | }
 66 | 
 67 | export class MatcherState<T extends Matcher> extends State {
 68 |   ignoreCase: bool = false;
 69 | 
 70 |   constructor(private matcher: T, next: State) {
 71 |     super();
 72 |     this.transitions.push(next);
 73 |   }
 74 | 
 75 |   matches(input: string, position: u32): MatchResult {
 76 |     return this.matcher.matches(input.charCodeAt(position))
 77 |       ? MatchResult.Match
 78 |       : MatchResult.Fail;
 79 |   }
 80 | }
 81 | 
 82 | export class Automata {
 83 |   static toNFA(ast: AST, flags: Flags): Automata {
 84 |     return new AutomataFactor(flags).automataForNode(ast.body);
 85 |   }
 86 | 
 87 |   static fromEpsilon(): Automata {
 88 |     const start = new State();
 89 |     const end = new State();
 90 |     start.transitions.push(end);
 91 |     return new Automata(start, end);
 92 |   }
 93 | 
 94 |   static fromMatcher<T extends Matcher>(matcher: T): Automata {
 95 |     const end = new State();
 96 |     const start = new MatcherState<T>(matcher, end);
 97 |     return new Automata(start, end);
 98 |   }
 99 | 
100 |   constructor(public start: State, public end: State) {}
101 | }
102 | 
103 | function concat(first: Automata, second: Automata): Automata {
104 |   first.end.transitions.push(second.start);
105 |   return new Automata(first.start, second.end);
106 | }
107 | 
108 | function union(first: Automata, second: Automata): Automata {
109 |   const start = new State();
110 |   start.transitions.push(first.start);
111 |   start.transitions.push(second.start);
112 |   const end = new State();
113 |   first.end.transitions.push(end);
114 |   second.end.transitions.push(end);
115 |   return new Automata(start, end);
116 | }
117 | 
118 | function closure(nfa: Automata, greedy: bool): Automata {
119 |   const start = new State();
120 |   const end = new State();
121 |   if (greedy) {
122 |     nfa.end.transitions.push(nfa.start);
123 |     nfa.end.transitions.push(end);
124 |     start.transitions.push(nfa.start);
125 |     start.transitions.push(end);
126 |   } else {
127 |     nfa.end.transitions.push(end);
128 |     nfa.end.transitions.push(nfa.start);
129 |     start.transitions.push(end);
130 |     start.transitions.push(nfa.start);
131 |   }
132 |   return new Automata(start, end);
133 | }
134 | 
135 | function zeroOrOne(nfa: Automata, greedy: bool): Automata {
136 |   const start = new State();
137 |   const end = new State();
138 |   if (greedy) {
139 |     start.transitions.push(nfa.start);
140 |     start.transitions.push(end);
141 |   } else {
142 |     start.transitions.push(end);
143 |     start.transitions.push(nfa.start);
144 |   }
145 |   nfa.end.transitions.push(end);
146 |   return new Automata(start, end);
147 | }
148 | 
149 | function oneOrMore(nfa: Automata, greedy: bool): Automata {
150 |   const start = new State();
151 |   const end = new State();
152 |   start.transitions.push(nfa.start);
153 |   if (greedy) {
154 |     nfa.end.transitions.push(nfa.start);
155 |     nfa.end.transitions.push(end);
156 |   } else {
157 |     nfa.end.transitions.push(end);
158 |     nfa.end.transitions.push(nfa.start);
159 |   }
160 |   return new Automata(start, end);
161 | }
162 | 
163 | function group(nfa: Automata, capturing: bool, id: i32): Automata {
164 |   // groups are implemented by wrapping the automata with
165 |   // a pair of markers that record matches
166 |   const startMarker = new GroupStartMarkerState(nfa.start, capturing, id);
167 |   const end = new State();
168 |   const endMarker = new GroupEndMarkerState(end, startMarker);
169 |   nfa.end.transitions.push(endMarker);
170 |   return new Automata(startMarker, end);
171 | }
172 | 
173 | class AutomataFactor {
174 |   constructor(private flags: Flags) {}
175 | 
176 |   // recursively builds an automata for the given AST
177 |   automataForNode(expression: Node | null): Automata {
178 |     if (expression == null) {
179 |       return Automata.fromEpsilon();
180 |     }
181 | 
182 |     switch (expression.type) {
183 |       case NodeType.Repetition: {
184 |         const node = expression as RepetitionNode;
185 |         const automata = this.automataForNode(node.expression);
186 |         const quantifier = node.quantifier;
187 |         if (quantifier == Char.Question) {
188 |           return zeroOrOne(automata, node.greedy);
189 |         } else if (quantifier == Char.Plus) {
190 |           return oneOrMore(automata, node.greedy);
191 |         } else if (quantifier == Char.Asterisk) {
192 |           return closure(automata, node.greedy);
193 |         } else {
194 |           throw new Error(
195 |             "unsupported quantifier - " + String.fromCharCode(quantifier)
196 |           );
197 |         }
198 |       }
199 |       case NodeType.Character:
200 |         return Automata.fromMatcher(
201 |           Matcher.fromCharacterNode(expression as CharacterNode, this.flags)
202 |         );
203 |       case NodeType.Concatenation: {
204 |         const expressions = (expression as ConcatenationNode).expressions;
205 |         if (expressions.length == 0) {
206 |           return Automata.fromEpsilon();
207 |         }
208 |         let automata = this.automataForNode(expressions[0]);
209 |         for (let i = 1, len = expressions.length; i < len; i++) {
210 |           automata = concat(automata, this.automataForNode(expressions[i]));
211 |         }
212 |         return automata;
213 |       }
214 |       case NodeType.Alternation: {
215 |         const node = expression as AlternationNode;
216 |         return union(
217 |           this.automataForNode(node.left),
218 |           this.automataForNode(node.right)
219 |         );
220 |       }
221 |       case NodeType.CharacterClass:
222 |         return Automata.fromMatcher(
223 |           Matcher.fromCharacterSetNode(
224 |             expression as CharacterClassNode,
225 |             this.flags
226 |           )
227 |         );
228 |       case NodeType.CharacterSet:
229 |         return Automata.fromMatcher(
230 |           Matcher.fromCharacterClassNode(
231 |             expression as CharacterSetNode,
232 |             this.flags
233 |           )
234 |         );
235 |       case NodeType.Group: {
236 |         const node = expression as GroupNode;
237 |         return group(
238 |           this.automataForNode(node.expression),
239 |           node.capturing,
240 |           node.id
241 |         );
242 |       }
243 |       case NodeType.Assertion:
244 |         return Automata.fromEpsilon();
245 |       default:
246 |         throw new Error("un-recognised AST node");
247 |     }
248 |   }
249 | }
250 | 
251 | export { MatchResult } from "./types";
252 | 


--------------------------------------------------------------------------------
/assembly/nfa/types.ts:
--------------------------------------------------------------------------------
 1 | // @ts-ignore
 2 | @lazy
 3 | export enum MatchResult {
 4 |   // a match has occurred - which is a signal to consume a character
 5 |   Match,
 6 |   // a match failed, abort this regex
 7 |   Fail,
 8 |   // this state doesn't preform a match
 9 |   Ignore,
10 | }
11 | 


--------------------------------------------------------------------------------
/assembly/nfa/walker.ts:
--------------------------------------------------------------------------------
 1 | import { State } from "./nfa";
 2 | 
 3 | export function walker(
 4 |   state: State,
 5 |   visitor: (state: State) => void,
 6 |   visited: State[] = []
 7 | ): void {
 8 |   if (visited.includes(state)) return;
 9 |   visitor(state);
10 |   visited.push(state);
11 |   const nextStates = state.transitions;
12 |   for (let i = 0, len = nextStates.length; i < len; i++) {
13 |     walker(nextStates[i], visitor, visited);
14 |   }
15 | }
16 | 


--------------------------------------------------------------------------------
/assembly/parser/node.ts:
--------------------------------------------------------------------------------
  1 | import { Char } from "../char";
  2 | import { replaceAtIndex } from "../util";
  3 | 
  4 | export const enum NodeType {
  5 |   AST,
  6 |   Assertion,
  7 |   Alternation,
  8 |   Concatenation,
  9 |   Character,
 10 |   CharacterClass,
 11 |   CharacterSet,
 12 |   CharacterRange,
 13 |   Repetition,
 14 |   RangeRepetition,
 15 |   Group,
 16 | }
 17 | 
 18 | export abstract class Node {
 19 |   @lazy static readonly emptyArray: Node[] = new Array<Node>();
 20 |   constructor(public type: NodeType) {}
 21 | 
 22 |   children(): Node[] {
 23 |     return Node.emptyArray;
 24 |   }
 25 | 
 26 |   abstract clone(): Node;
 27 | 
 28 |   replace(node: Node, replacement: Node): void {
 29 |     throw new Error("replace not implemented for this node type");
 30 |   }
 31 | }
 32 | 
 33 | export class AST extends Node {
 34 |   constructor(public body: Node | null) {
 35 |     super(NodeType.AST);
 36 |   }
 37 | 
 38 |   children(): Node[] {
 39 |     return this.body != null ? [this.body as Node] : Node.emptyArray;
 40 |   }
 41 | 
 42 |   clone(): Node {
 43 |     const body = this.body;
 44 |     return new AST(body != null ? body.clone() : null);
 45 |   }
 46 | 
 47 |   replace(node: Node, replacement: Node): void {
 48 |     this.body = replacement;
 49 |   }
 50 | }
 51 | 
 52 | export class ConcatenationNode extends Node {
 53 |   constructor(public expressions: Node[]) {
 54 |     super(NodeType.Concatenation);
 55 |   }
 56 | 
 57 |   children(): Node[] {
 58 |     return this.expressions;
 59 |   }
 60 | 
 61 |   clone(): Node {
 62 |     return new ConcatenationNode(
 63 |       this.expressions.slice(0).map<Node>((s) => s.clone())
 64 |     );
 65 |   }
 66 | 
 67 |   replace(node: Node, replacement: Node): void {
 68 |     const expressions = this.expressions;
 69 |     const index = expressions.indexOf(node);
 70 |     this.expressions = replaceAtIndex(expressions, index, replacement);
 71 |   }
 72 | }
 73 | 
 74 | export class CharacterClassNode extends Node {
 75 |   constructor(public expressions: Node[], public negated: bool) {
 76 |     super(NodeType.CharacterClass);
 77 |   }
 78 | 
 79 |   clone(): Node {
 80 |     return new CharacterClassNode(
 81 |       this.expressions.slice(0).map<Node>((s) => s.clone()),
 82 |       this.negated
 83 |     );
 84 |   }
 85 | }
 86 | 
 87 | export class CharacterRangeNode extends Node {
 88 |   constructor(public from: u32, public to: u32) {
 89 |     super(NodeType.CharacterRange);
 90 |   }
 91 | 
 92 |   static is(node: Node): bool {
 93 |     return node.type == NodeType.CharacterRange;
 94 |   }
 95 | 
 96 |   clone(): Node {
 97 |     return new CharacterRangeNode(this.from, this.to);
 98 |   }
 99 | }
100 | 
101 | export class CharacterNode extends Node {
102 |   constructor(public char: u32) {
103 |     super(NodeType.Character);
104 |   }
105 | 
106 |   clone(): Node {
107 |     return new CharacterNode(this.char);
108 |   }
109 | }
110 | 
111 | export class AssertionNode extends Node {
112 |   constructor(public kind: Char) {
113 |     super(NodeType.Assertion);
114 |   }
115 | 
116 |   static is(node: Node, kind: Char = Char.None): bool {
117 |     return (
118 |       node.type == NodeType.Assertion &&
119 |       ((node as AssertionNode).kind == kind || kind == Char.None)
120 |     );
121 |   }
122 | 
123 |   clone(): Node {
124 |     return new AssertionNode(this.kind);
125 |   }
126 | }
127 | 
128 | export class CharacterSetNode extends Node {
129 |   constructor(public charClass: Char) {
130 |     super(NodeType.CharacterSet);
131 |   }
132 | 
133 |   clone(): Node {
134 |     return new CharacterSetNode(this.charClass);
135 |   }
136 | }
137 | 
138 | export class RepetitionNode extends Node {
139 |   constructor(
140 |     public expression: Node,
141 |     public quantifier: Char,
142 |     public greedy: bool = true
143 |   ) {
144 |     super(NodeType.Repetition);
145 |   }
146 | 
147 |   clone(): Node {
148 |     return new RepetitionNode(this.expression.clone(), this.quantifier);
149 |   }
150 | 
151 |   replace(node: Node, replacement: Node): void {
152 |     this.expression = replacement;
153 |   }
154 | 
155 |   children(): Node[] {
156 |     return [this.expression];
157 |   }
158 | }
159 | 
160 | export class RangeRepetitionNode extends Node {
161 |   constructor(
162 |     public expression: Node,
163 |     public from: i32,
164 |     public to: i32,
165 |     public greedy: bool = true
166 |   ) {
167 |     super(NodeType.RangeRepetition);
168 |     if (expression.type == NodeType.RangeRepetition) {
169 |       throw new Error("The preceding token is not quantifiable");
170 |     }
171 |   }
172 | 
173 |   clone(): Node {
174 |     return new RangeRepetitionNode(this.expression.clone(), this.from, this.to);
175 |   }
176 | 
177 |   replace(node: Node, replacement: Node): void {
178 |     this.expression = replacement;
179 |   }
180 | 
181 |   children(): Node[] {
182 |     return [this.expression];
183 |   }
184 | }
185 | 
186 | export class AlternationNode extends Node {
187 |   constructor(public left: Node, public right: Node) {
188 |     super(NodeType.Alternation);
189 |   }
190 | 
191 |   static is(node: Node): bool {
192 |     return node.type == NodeType.Alternation;
193 |   }
194 | 
195 |   children(): Node[] {
196 |     return [this.left, this.right];
197 |   }
198 | 
199 |   clone(): Node {
200 |     return new AlternationNode(this.left.clone(), this.right.clone());
201 |   }
202 | 
203 |   replace(node: Node, replacement: Node): void {
204 |     if (this.left === node) {
205 |       this.left = replacement;
206 |     } else {
207 |       this.right = replacement;
208 |     }
209 |   }
210 | }
211 | 
212 | export class GroupNode extends Node {
213 |   @lazy static _id: i32 = 0;
214 | 
215 |   constructor(
216 |     public expression: Node,
217 |     public capturing: bool,
218 |     public id: i32 = -1
219 |   ) {
220 |     super(NodeType.Group);
221 |     if (id == -1) {
222 |       this.id = GroupNode._id++;
223 |     }
224 |   }
225 | 
226 |   children(): Node[] {
227 |     return [this.expression];
228 |   }
229 | 
230 |   clone(): Node {
231 |     return new GroupNode(this.expression.clone(), this.capturing, this.id);
232 |   }
233 | 
234 |   replace(node: Node, replacement: Node): void {
235 |     this.expression = replacement;
236 |   }
237 | }
238 | 


--------------------------------------------------------------------------------
/assembly/parser/parser.ts:
--------------------------------------------------------------------------------
  1 | import { isDigit, Char, isHexadecimalDigit } from "../char";
  2 | import { StringIterator } from "./string-iterator";
  3 | import {
  4 |   AST,
  5 |   RangeRepetitionNode,
  6 |   GroupNode,
  7 |   AssertionNode,
  8 |   CharacterSetNode,
  9 |   CharacterNode,
 10 |   Node,
 11 |   AlternationNode,
 12 |   ConcatenationNode,
 13 |   RepetitionNode,
 14 |   CharacterClassNode,
 15 |   CharacterRangeNode,
 16 | } from "./node";
 17 | 
 18 | function isQuantifier(code: Char): bool {
 19 |   return code == Char.Question || code == Char.Plus || code == Char.Asterisk;
 20 | }
 21 | 
 22 | // characters which have special meaning within character sets
 23 | function isCharacterSetSpecialChar(code: Char): bool {
 24 |   return (
 25 |     code == Char.Caret ||
 26 |     code == Char.Minus ||
 27 |     code == Char.RightSquareBracket ||
 28 |     code == Char.Backslash
 29 |   );
 30 | }
 31 | 
 32 | function isCharacterClass(code: u32): bool {
 33 |   switch (code) {
 34 |     case Char.d:
 35 |     case Char.D:
 36 |     case Char.Dot:
 37 |     case Char.w:
 38 |     case Char.W:
 39 |     case Char.s:
 40 |     case Char.S:
 41 |     case Char.t:
 42 |     case Char.r:
 43 |     case Char.n:
 44 |     case Char.v:
 45 |     case Char.f:
 46 |       return true;
 47 |   }
 48 |   return false;
 49 | }
 50 | 
 51 | function isAssertion(code: u32): bool {
 52 |   return code == Char.Dollar || code == Char.Caret; // "$" or "^"
 53 | }
 54 | 
 55 | function isSpecialCharacter(code: u32): bool {
 56 |   switch (code) {
 57 |     case Char.Dollar:
 58 |     case Char.LeftParenthesis:
 59 |     case Char.RightParenthesis:
 60 |     case Char.Asterisk:
 61 |     case Char.Plus:
 62 |     case Char.Dot:
 63 |     case Char.Question:
 64 |     case Char.Backslash:
 65 |     case Char.LeftSquareBracket:
 66 |     case Char.RightSquareBracket:
 67 |     case Char.Caret:
 68 |     case Char.VerticalBar:
 69 |     case Char.LeftCurlyBrace:
 70 |     case Char.RightCurlyBrace:
 71 |       return true;
 72 |   }
 73 |   return false;
 74 | }
 75 | 
 76 | class Range {
 77 |   constructor(public from: i32, public to: i32) {}
 78 | }
 79 | 
 80 | export class Parser {
 81 |   iterator: StringIterator;
 82 | 
 83 |   private constructor(input: string) {
 84 |     this.iterator = new StringIterator(input);
 85 |   }
 86 | 
 87 |   static toAST(input: string): AST {
 88 |     return new Parser(input).toAST();
 89 |   }
 90 | 
 91 |   private eatToken(value: u32 = -1): u32 {
 92 |     const currentToken = this.iterator.current;
 93 |     if (value != -1 && this.iterator.current != value) {
 94 |       throw new Error("invalid token");
 95 |     }
 96 |     this.iterator.next();
 97 |     return currentToken;
 98 |   }
 99 | 
100 |   private toAST(): AST {
101 |     if (!this.iterator.more()) {
102 |       return new AST(null);
103 |     } else {
104 |       return new AST(this.parseSequence());
105 |     }
106 |   }
107 | 
108 |   private parseCharacterCode(code: u32): Node {
109 |     const length = code == Char.x ? 2 : 4;
110 |     // check whether we have the correct number of digits ahead
111 |     for (let i = 0; i < length; i++) {
112 |       if (!isHexadecimalDigit(this.iterator.lookahead(i + 1))) {
113 |         return new CharacterNode(this.eatToken());
114 |       }
115 |     }
116 |     // if so, parse the hex string
117 |     this.eatToken(code);
118 |     let value = "";
119 |     for (let i = 0; i < length; i++) {
120 |       value += this.iterator.currentAsString();
121 |       this.eatToken();
122 |     }
123 |     return new CharacterNode(u32(parseInt(value, 16)));
124 |   }
125 | 
126 |   private parseCharacter(): Node {
127 |     let token = this.iterator.current;
128 |     if (this.iterator.current == Char.Backslash) {
129 |       this.eatToken(Char.Backslash);
130 |       token = this.iterator.current;
131 |       if (isSpecialCharacter(token)) {
132 |         this.eatToken();
133 |         return new CharacterNode(token);
134 |       } else if (isAssertion(token)) {
135 |         return new CharacterNode(this.eatToken());
136 |       } else if (token == Char.x) {
137 |         return this.parseCharacterCode(Char.x);
138 |       } else if (token == Char.u) {
139 |         return this.parseCharacterCode(Char.u);
140 |       } else if (isCharacterClass(token)) {
141 |         return new CharacterSetNode(this.eatToken());
142 |       } else {
143 |         return new CharacterNode(this.eatToken());
144 |       }
145 |     }
146 | 
147 |     if (isAssertion(token)) {
148 |       return new AssertionNode(this.eatToken());
149 |     }
150 | 
151 |     if (token == Char.Dot) {
152 |       this.eatToken(Char.Dot);
153 |       return new CharacterSetNode(Char.Dot);
154 |     }
155 | 
156 |     return new CharacterNode(this.eatToken());
157 |   }
158 | 
159 |   private maybeParseDigit(): i32 {
160 |     let digitStr = "";
161 |     while (this.iterator.more()) {
162 |       const token = this.iterator.current;
163 |       if (isDigit(token)) {
164 |         digitStr += this.iterator.currentAsString();
165 |       } else {
166 |         return digitStr == "" ? -1 : <i32>parseInt(digitStr);
167 |       }
168 |       this.eatToken();
169 |     }
170 |     return digitStr == "" ? -1 : <i32>parseInt(digitStr);
171 |   }
172 | 
173 |   private maybeParseRepetitionRange(): Range | null {
174 |     // snapshot
175 |     const iteratorCopy = this.iterator.copy();
176 |     this.eatToken(Char.LeftCurlyBrace);
177 | 
178 |     const from = this.maybeParseDigit();
179 |     if (from == -1) {
180 |       return null;
181 |     }
182 |     if (this.iterator.current == Char.RightCurlyBrace) {
183 |       this.eatToken();
184 |       return new Range(from, from);
185 |     } else if (this.iterator.current == Char.Comma) {
186 |       this.eatToken();
187 |       const to = this.maybeParseDigit();
188 |       // @ts-ignore
189 |       if (this.iterator.current == Char.RightCurlyBrace) {
190 |         this.eatToken();
191 |         return new Range(from, to);
192 |       }
193 |     }
194 | 
195 |     this.iterator = iteratorCopy;
196 |     return null;
197 |   }
198 | 
199 |   private isGreedy(): bool {
200 |     if (this.iterator.current == Char.Question) {
201 |       this.eatToken();
202 |       return false;
203 |     }
204 |     return true;
205 |   }
206 | 
207 |   private isCapturing(): bool {
208 |     if (
209 |       this.iterator.current == Char.Question &&
210 |       this.iterator.lookahead(1) == Char.Colon
211 |     ) {
212 |       this.eatToken(Char.Question);
213 |       this.eatToken(Char.Colon);
214 |       return false;
215 |     }
216 |     return true;
217 |   }
218 | 
219 |   // parses a sequence of chars
220 |   private parseSequence(): Node {
221 |     let nodes = new Array<Node>();
222 |     while (this.iterator.more()) {
223 |       const token = this.iterator.current;
224 |       if (token == Char.RightParenthesis) break;
225 |       // @ts-ignore
226 |       if (token == Char.VerticalBar) {
227 |         this.eatToken(Char.VerticalBar);
228 |         const left = nodes.length > 1 ? new ConcatenationNode(nodes) : nodes[0];
229 |         nodes = [new AlternationNode(left, this.parseSequence())];
230 |         // @ts-ignore
231 |       } else if (token == Char.LeftParenthesis) {
232 |         this.eatToken(Char.LeftParenthesis);
233 |         const capturing = this.isCapturing();
234 |         nodes.push(new GroupNode(this.parseSequence(), capturing));
235 |         this.eatToken(Char.RightParenthesis);
236 |         // @ts-ignore
237 |       } else if (token == Char.LeftCurlyBrace) {
238 |         const range = this.maybeParseRepetitionRange();
239 |         if (range != null) {
240 |           const expression = nodes.pop();
241 |           nodes.push(
242 |             new RangeRepetitionNode(
243 |               expression,
244 |               range.from,
245 |               range.to,
246 |               this.isGreedy()
247 |             )
248 |           );
249 |         } else {
250 |           // this is not the start of a repetition, it's just a char!
251 |           nodes.push(this.parseCharacter());
252 |         }
253 |       } else if (isQuantifier(token)) {
254 |         if (nodes.length === 0) {
255 |           throw new Error("Invalid regular expression: Nothing to repeat");
256 |         }
257 | 
258 |         const expression = nodes.pop();
259 |         const quantifier = this.eatToken();
260 |         nodes.push(new RepetitionNode(expression, quantifier, this.isGreedy()));
261 |         // @ts-ignore
262 |       } else if (token == Char.LeftSquareBracket) {
263 |         nodes.push(this.parseCharacterClass());
264 |       } else {
265 |         nodes.push(this.parseCharacter());
266 |       }
267 |     }
268 | 
269 |     return nodes.length > 1 ? new ConcatenationNode(nodes) : nodes[0];
270 |   }
271 | 
272 |   private parseCharacterRange(): Node {
273 |     const from = this.eatToken();
274 |     this.eatToken(Char.Minus);
275 |     const to = this.eatToken();
276 |     return new CharacterRangeNode(from, to);
277 |   }
278 | 
279 |   private parseCharacterClass(): CharacterClassNode {
280 |     this.eatToken(Char.LeftSquareBracket);
281 | 
282 |     const negated = this.iterator.current == Char.Caret;
283 |     if (negated) {
284 |       this.eatToken(Char.Caret);
285 |     }
286 | 
287 |     const nodes = new Array<Node>();
288 |     while (
289 |       this.iterator.current != Char.RightSquareBracket ||
290 |       nodes.length == 0
291 |     ) {
292 |       // lookahead for character range
293 |       if (
294 |         this.iterator.current != Char.Backslash &&
295 |         this.iterator.lookahead(1) == Char.Minus &&
296 |         this.iterator.lookahead(2) != Char.RightSquareBracket
297 |       ) {
298 |         nodes.push(this.parseCharacterRange());
299 |       } else {
300 |         // have we encountered a backslash?
301 |         if (this.iterator.current == Char.Backslash) {
302 |           this.eatToken();
303 |           if (isCharacterSetSpecialChar(this.iterator.current)) {
304 |             // if it was a backslashed special char, treat as a regular char
305 |             nodes.push(new CharacterNode(this.eatToken()));
306 |           } else {
307 |             // otherwise this is a character class
308 |             nodes.push(new CharacterSetNode(this.eatToken()));
309 |           }
310 |         } else {
311 |           nodes.push(new CharacterNode(this.eatToken()));
312 |         }
313 |       }
314 | 
315 |       if (!this.iterator.more()) {
316 |         throw new SyntaxError("Unterminated character class");
317 |       }
318 |     }
319 |     this.eatToken(Char.RightSquareBracket);
320 |     return new CharacterClassNode(nodes, negated);
321 |   }
322 | }
323 | 


--------------------------------------------------------------------------------
/assembly/parser/string-iterator.ts:
--------------------------------------------------------------------------------
 1 | export class StringIterator {
 2 |   current: u32;
 3 |   cursor: u32 = 0;
 4 | 
 5 |   constructor(private sourceString: string) {
 6 |     this.current = this.sourceString.charCodeAt(0);
 7 |   }
 8 | 
 9 |   lookahead(distance: u32): u32 {
10 |     return this.sourceString.charCodeAt(this.cursor + distance);
11 |   }
12 | 
13 |   next(): bool {
14 |     this.cursor++;
15 |     if (this.cursor >= u32(this.sourceString.length)) {
16 |       this.current = -1;
17 |       return false;
18 |     }
19 |     this.current = this.sourceString.charCodeAt(this.cursor);
20 |     return true;
21 |   }
22 | 
23 |   currentAsString(): string {
24 |     return String.fromCharCode(this.current);
25 |   }
26 | 
27 |   more(): bool {
28 |     return this.cursor < u32(this.sourceString.length);
29 |   }
30 | 
31 |   copy(): StringIterator {
32 |     const iterator = new StringIterator(this.sourceString);
33 |     iterator.cursor = this.cursor;
34 |     iterator.current = this.current;
35 |     return iterator;
36 |   }
37 | }
38 | 


--------------------------------------------------------------------------------
/assembly/parser/walker.ts:
--------------------------------------------------------------------------------
  1 | import { Char } from "../char";
  2 | import {
  3 |   AST,
  4 |   ConcatenationNode,
  5 |   Node,
  6 |   NodeType,
  7 |   RangeRepetitionNode,
  8 |   RepetitionNode,
  9 | } from "./node";
 10 | 
 11 | export class NodeVisitor {
 12 |   constructor(public node: Node, public parentNode: Node) {}
 13 | }
 14 | 
 15 | function walkNode(
 16 |   node: Node,
 17 |   parentNode: Node,
 18 |   visitor: (node: NodeVisitor) => void
 19 | ): void {
 20 |   const children = node.children();
 21 |   for (let i = children.length - 1; i >= 0; i--) {
 22 |     walkNode(children[i], node, visitor);
 23 |   }
 24 | 
 25 |   const nodeVisitor = new NodeVisitor(node, parentNode);
 26 |   visitor(nodeVisitor);
 27 | }
 28 | 
 29 | // depth first, right-left walker
 30 | export function walker(ast: AST, visitor: (node: NodeVisitor) => void): void {
 31 |   let node = ast.body;
 32 |   if (node != null) {
 33 |     walkNode(node, ast, visitor);
 34 |   }
 35 | }
 36 | 
 37 | /**
 38 |  range quantifiers are implemented via 'expansion', which significantly 
 39 |  increases the size of the AST. This imposes a hard limit to prevent 
 40 |  memory-related issues
 41 | */
 42 | // @ts-ignore
 43 | @lazy const QUANTIFIER_LIMIT = 1000;
 44 | 
 45 | function parentAsConcatNode(visitor: NodeVisitor): ConcatenationNode {
 46 |   let concatNode: ConcatenationNode | null = null;
 47 |   let parentNode = visitor.parentNode;
 48 |   if (parentNode.type != NodeType.Concatenation) {
 49 |     let node = visitor.node;
 50 |     concatNode = new ConcatenationNode([node]);
 51 |     parentNode.replace(node, concatNode);
 52 |     return concatNode;
 53 |   }
 54 |   return parentNode as ConcatenationNode;
 55 | }
 56 | 
 57 | // take each range repetition and replace with a concatenation
 58 | // of cloned nodes, e.g. a{2} becomes aa
 59 | export function expandRepetitions(visitor: NodeVisitor): void {
 60 |   let node = visitor.node;
 61 |   if (node.type == NodeType.RangeRepetition) {
 62 |     // find the parent
 63 |     const rangeRepNode = node as RangeRepetitionNode;
 64 | 
 65 |     if (rangeRepNode.to > QUANTIFIER_LIMIT) {
 66 |       throw new Error(
 67 |         "Cannot handle range quantifiers > " + QUANTIFIER_LIMIT.toString()
 68 |       );
 69 |     }
 70 |     const concatNode = parentAsConcatNode(visitor);
 71 |     const expressions = concatNode.expressions;
 72 | 
 73 |     // locate the original index
 74 |     const index = expressions.indexOf(rangeRepNode);
 75 | 
 76 |     const from = rangeRepNode.from;
 77 |     const expression = rangeRepNode.expression;
 78 |     // create multiple clones
 79 |     const clones = new Array<Node>(from);
 80 |     // a{4} => aaaa
 81 |     if (from > 0) {
 82 |       clones[0] = expression;
 83 |       for (let i = 1; i < from; i++) {
 84 |         clones[i] = expression.clone();
 85 |       }
 86 |     }
 87 | 
 88 |     if (rangeRepNode.to == -1) {
 89 |       // a{4,} => aaaaa*
 90 |       clones.push(
 91 |         new RepetitionNode(
 92 |           expression.clone(),
 93 |           Char.Asterisk,
 94 |           rangeRepNode.greedy
 95 |         )
 96 |       );
 97 |     } else {
 98 |       // a{4,6} => aaaaa?a?
 99 |       const count = rangeRepNode.to - rangeRepNode.from;
100 |       for (let i = 0; i < count; i++) {
101 |         clones.push(
102 |           new RepetitionNode(
103 |             expression.clone(),
104 |             Char.Question,
105 |             rangeRepNode.greedy
106 |           )
107 |         );
108 |       }
109 |     }
110 | 
111 |     // replace the rangeRepNode with the clones
112 |     concatNode.expressions = expressions
113 |       .slice(0, index)
114 |       .concat(clones)
115 |       .concat(expressions.slice(index + 1));
116 |   }
117 | }
118 | 


--------------------------------------------------------------------------------
/assembly/regexp.ts:
--------------------------------------------------------------------------------
  1 | import { State, Automata, GroupStartMarkerState, MatchResult } from "./nfa/nfa";
  2 | import { walker as nfaWalker } from "./nfa/walker";
  3 | import { ConcatenationNode, AssertionNode, NodeType } from "./parser/node";
  4 | import { Char } from "./char";
  5 | import { Parser } from "./parser/parser";
  6 | import { first, last } from "./util";
  7 | import { walker as astWalker, expandRepetitions } from "./parser/walker";
  8 | 
  9 | function recursiveBacktrackingSearch(
 10 |   state: State,
 11 |   input: string,
 12 |   visited: u32[] = [],
 13 |   position: i32 = 0
 14 | ): string | null {
 15 |   // prevent endless loops when following epsilon transitions
 16 |   for (let i = 0, len = visited.length; i < len; i++) {
 17 |     if (visited[i] == state.id) {
 18 |       return null;
 19 |     }
 20 |   }
 21 |   visited.push(state.id);
 22 | 
 23 |   const matches = state.matches(input, position);
 24 |   if (matches == MatchResult.Match) {
 25 |     // a match occurred
 26 |     if (position == input.length) {
 27 |       // we've reached the end of the string
 28 |       return null;
 29 |     }
 30 |     visited = [];
 31 |     position++;
 32 |   } else if (matches == MatchResult.Fail) {
 33 |     return null;
 34 |   }
 35 | 
 36 |   const transitions = state.transitions;
 37 |   if (transitions.length == 0) {
 38 |     // we've reached the end, so retur the matched string
 39 |     return input.substring(0, position);
 40 |   }
 41 | 
 42 |   for (let i = 0, len = transitions.length; i < len; i++) {
 43 |     const match = recursiveBacktrackingSearch(
 44 |       transitions[i],
 45 |       input,
 46 |       visited,
 47 |       position
 48 |     );
 49 |     if (match != null) {
 50 |       // when unwinding the stack after a successful match, flag the captured values
 51 |       if (state instanceof GroupStartMarkerState) {
 52 |         (state as GroupStartMarkerState).flagged = true;
 53 |       }
 54 |       return match;
 55 |     }
 56 |   }
 57 |   return null;
 58 | }
 59 | 
 60 | export class Match {
 61 |   constructor(
 62 |     public matches: string[],
 63 |     public index: i32,
 64 |     public input: string
 65 |   ) {}
 66 | 
 67 |   static fromMatch(match: string, index: i32, input: string): Match {
 68 |     return new Match([match], index, input);
 69 |   }
 70 | }
 71 | 
 72 | export class Flags {
 73 |   global: bool = false;
 74 |   ignoreCase: bool = false;
 75 |   dotAll: bool = false;
 76 |   multiline: bool = false;
 77 | 
 78 |   constructor(flagString: string | null) {
 79 |     if (flagString) {
 80 |       this.global = flagString.includes("g");
 81 |       this.ignoreCase = flagString.includes("i");
 82 |       this.dotAll = flagString.includes("s");
 83 |       this.multiline = flagString.includes("m");
 84 |     }
 85 |   }
 86 | }
 87 | 
 88 | // capture groups are implemented as GroupStart / GroupEnd states that record (capture)
 89 | // the value of the current state of the string being matched.
 90 | // Repeated capture groups, via range repetitions (e.g. {2,3}) share the same 'id'. The
 91 | // returned regex should only return the value of the final repetition.
 92 | function lastCapturesForGroup(groupMarkers: GroupStartMarkerState[]): string[] {
 93 |   if (!groupMarkers.length) {
 94 |     return [];
 95 |   }
 96 |   const values = [first(groupMarkers).capture];
 97 |   let currrentId = first(groupMarkers).groupId;
 98 |   for (let i = 0; i < groupMarkers.length; i++) {
 99 |     const gm = groupMarkers[i];
100 |     if (gm.groupId != currrentId) {
101 |       currrentId = gm.groupId;
102 |       values.push(gm.capture);
103 |     } else {
104 |       if (gm.flagged) {
105 |         values[values.length - 1] = gm.capture;
106 |       }
107 |     }
108 |   }
109 |   return values;
110 | }
111 | 
112 | export class RegExp {
113 |   @lazy static gm: GroupStartMarkerState[] = new Array<GroupStartMarkerState>();
114 |   lastIndex: i32 = 0;
115 |   private flags: Flags;
116 |   private nfa: Automata;
117 |   private endOfInput: bool = false;
118 |   private startOfInput: bool = false;
119 |   private groupMarkers: GroupStartMarkerState[];
120 | 
121 |   constructor(private regex: string, public flagsString: string | null = null) {
122 |     const ast = Parser.toAST(regex);
123 |     const flags = new Flags(flagsString);
124 | 
125 |     // look for start / end assertions
126 |     const body = ast.body;
127 |     if (body != null && body.type == NodeType.Concatenation) {
128 |       const expressions = (ast.body as ConcatenationNode).expressions;
129 |       this.startOfInput = AssertionNode.is(first(expressions), Char.Caret);
130 |       this.endOfInput = AssertionNode.is(last(expressions), Char.Dollar);
131 |     }
132 | 
133 |     astWalker(ast, expandRepetitions);
134 | 
135 |     const nfa = Automata.toNFA(ast, flags);
136 | 
137 |     // find all the group marker states
138 |     RegExp.gm = new Array<GroupStartMarkerState>();
139 |     nfaWalker(nfa.start, (state) => {
140 |       if (state instanceof GroupStartMarkerState) {
141 |         const startMarker = state as GroupStartMarkerState;
142 |         if (startMarker.capturing) {
143 |           RegExp.gm.push(state as GroupStartMarkerState);
144 |         }
145 |       }
146 |     });
147 |     this.nfa = nfa;
148 |     this.groupMarkers = RegExp.gm;
149 | 
150 |     this.flags = flags;
151 |   }
152 | 
153 |   exec(str: string): Match | null {
154 |     let groupMarkers = this.groupMarkers;
155 |     // remove all previous group marker results
156 |     for (let i = 0, len = groupMarkers.length; i < len; i++) {
157 |       groupMarkers[i].capture = "";
158 |     }
159 | 
160 |     let len = str.length;
161 |     if (!len) {
162 |       const matchStr = recursiveBacktrackingSearch(this.nfa.start, "");
163 |       return matchStr != null ? new Match([matchStr], 0, str) : null;
164 |     }
165 | 
166 |     // search for a match at each index within the string
167 | 
168 |     for (
169 |       let matchIndex = this.lastIndex;
170 |       matchIndex < (this.startOfInput && !this.multiline ? 1 : len);
171 |       matchIndex++
172 |     ) {
173 |       // search for a match in this substring
174 |       const matchStr = recursiveBacktrackingSearch(
175 |         this.nfa.start,
176 |         str.substr(matchIndex)
177 |       );
178 | 
179 |       // we have found a match
180 |       if (matchStr != null) {
181 |         // remove any non-flagged captures
182 |         groupMarkers.forEach((gm) => {
183 |           gm.capture = gm.flagged ? gm.capture : "";
184 |         });
185 | 
186 |         const matches: string[] = [matchStr];
187 |         const match = new Match(
188 |           matches.concat(lastCapturesForGroup(groupMarkers)),
189 |           matchIndex,
190 |           str
191 |         );
192 | 
193 |         // return this match (checking end of input condition)
194 |         const matchEndIndex = match.index + match.matches[0].length;
195 | 
196 |         // has the start of input criteria been met?
197 |         if (this.startOfInput) {
198 |           if (this.flags.multiline && matchIndex != 0) {
199 |             if (str.charCodeAt(matchIndex - 1) != Char.LineFeed) continue;
200 |           } else if (matchIndex != 0) {
201 |             continue;
202 |           }
203 |         }
204 | 
205 |         // has the enf of input criteria been met?
206 |         if (this.endOfInput) {
207 |           if (this.flags.multiline && matchEndIndex != len) {
208 |             if (str.charCodeAt(matchEndIndex) != Char.LineFeed) continue;
209 |           } else if (matchEndIndex != len) {
210 |             continue;
211 |           }
212 |         }
213 | 
214 |         if (this.global) {
215 |           this.lastIndex = matchEndIndex;
216 |         }
217 |         return match;
218 |       }
219 |     }
220 | 
221 |     this.lastIndex = 0;
222 |     return null;
223 |   }
224 | 
225 |   test(str: string): bool {
226 |     return this.exec(str) != null;
227 |   }
228 | 
229 |   toString(): string {
230 |     return this.regex;
231 |   }
232 | 
233 |   get global(): bool {
234 |     return this.flags.global;
235 |   }
236 | 
237 |   get ignoreCase(): bool {
238 |     return this.flags.ignoreCase;
239 |   }
240 | 
241 |   get dotAll(): bool {
242 |     return this.flags.dotAll;
243 |   }
244 | 
245 |   get multiline(): bool {
246 |     return this.flags.multiline;
247 |   }
248 | }
249 | 
250 | // TODO: do we need this factory function, or can we invoke
251 | // the ctr via the loader?
252 | export function createRegExp(regex: string, flags: string): RegExp {
253 |   return new RegExp(regex, flags);
254 | }
255 | 
256 | // the executeRegExp exported function is used for benchmarking, giving a simple API
257 | // for executing a regex a given number of times
258 | export function executeRegExp(
259 |   regexStr: string,
260 |   value: string,
261 |   iterations: i32
262 | ): void {
263 |   const regex = new RegExp(regexStr, "g");
264 |   if (iterations < 0) {
265 |     while (regex.exec(value) != null);
266 |   } else {
267 |     for (let i = 0; i < iterations; i++) {
268 |       regex.exec(value);
269 |     }
270 |   }
271 | }
272 | 


--------------------------------------------------------------------------------
/assembly/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 |   "extends": "assemblyscript/std/assembly.json",
3 |   "include": ["./**/*.ts"]
4 | }
5 | 


--------------------------------------------------------------------------------
/assembly/util.ts:
--------------------------------------------------------------------------------
 1 | export function last<T>(arr: T[]): T {
 2 |   return unchecked(arr[arr.length - 1]);
 3 | }
 4 | 
 5 | export function first<T>(arr: T[]): T {
 6 |   return unchecked(arr[0]);
 7 | }
 8 | 
 9 | export function replaceAtIndex<T>(arr: T[], index: u32, item: T): T[] {
10 |   let res = arr.slice(0);
11 |   unchecked((res[index] = item));
12 |   return res;
13 | }
14 | 
15 | export class Range {
16 |   constructor(public from: i32, public to: i32) {}
17 | 
18 |   intersection(other: Range): Range | null {
19 |     const lower = i32(Math.max(this.from, other.from));
20 |     const upper = i32(Math.min(this.to, other.to));
21 |     return lower < upper ? new Range(lower, upper) : null;
22 |   }
23 | 
24 |   offset(value: i32): Range {
25 |     return new Range(this.from + value, this.to + value);
26 |   }
27 | 
28 |   contains(value: i32): bool {
29 |     return value >= this.from && value <= this.to;
30 |   }
31 | }
32 | 


--------------------------------------------------------------------------------
/benchmark/benchmark.js:
--------------------------------------------------------------------------------
 1 | global.TextDecoder = require("text-encoding").TextDecoder;
 2 | const fs = require("fs");
 3 | const loader = require("@assemblyscript/loader");
 4 | 
 5 | const Benchmark = require("benchmark");
 6 | const suite = new Benchmark.Suite();
 7 | 
 8 | wasmModule = loader.instantiateSync(fs.readFileSync("./build/optimized.wasm"), {
 9 |   env: {
10 |     log: () => {
11 |       const { __getString } = wasmModule.exports;
12 |       console.log(__getString(strPtr));
13 |     },
14 |   },
15 | });
16 | 
17 | // the executeRegExp exported function is ex
18 | function executeRegex(regexStr, valueStr, untilNull = false) {
19 |   const { executeRegExp, __newString, __pin, __unpin } = wasmModule.exports;
20 | 
21 |   // create the regexp
22 |   const regexPtr = __pin(__newString(regexStr));
23 |   const strPtr = __newString(valueStr);
24 |   executeRegExp(regexPtr, strPtr, untilNull ? -1 : 5);
25 |   __unpin(regexPtr);
26 | }
27 | 
28 | // add tests
29 | suite
30 |   .add("baseline", () => {
31 |     // this test primarily measures the overhead in the wasm / JS interop
32 |     executeRegex("", "");
33 |   })
34 |   .add("character class", () => {
35 |     executeRegex("[a-zA-C0-9J]", "J"); // match char
36 |     executeRegex("[a-zA-C0-9J]", "a"); // match char in range
37 |   })
38 |   .add("concatenation", () => {
39 |     executeRegex("this is a long string", "this is a long string");
40 |   })
41 |   .add("quantifiers", () => {
42 |     executeRegex("a*", "aaaaa");
43 |     executeRegex("a+", "aaaaa");
44 |     executeRegex("a?", "a");
45 |   })
46 |   .add("range quantifiers", () => {
47 |     executeRegex("a{20,30}", "a".repeat(25));
48 |   })
49 |   .add("alternation", () => {
50 |     executeRegex("a|b|c|d|e|f|g", "d");
51 |   })
52 |   .add("multiple regex matches", () => {
53 |     const text =
54 |       "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.";
55 |     executeRegex("[a-l]{3}", text, true);
56 |   })
57 |   .add("complex regex", () => {
58 |     const text =
59 |       "<TR BGCOLOR='#DBE9E9'><TD align=left valign=top>43.<a href='joblist.cfm?JobID=94 6735&Keyword='>Word Processor<BR>(N-1286)</a></TD><TD align=left valign=top>Lega lstaff.com</TD><TD align=left valign=top>CA - Statewide</TD></TR>";
60 |     const regex =
61 |       "<tr([\\w\\W\\s\\d][^<>]{0,})><TD([\\w\\W\\s\\d][^<>]{0,})>([\\d]{0,}\\.)(.*)((<BR>([\\w\\W\\s\\d][^<>]{0,})|[\\s]{0,}))<\\/a><\\/TD><TD([\\w\\W\\s\\d][^<>]{0,})>([\\w\\W\\s\\d][^<>]{0,})<\\/TD><TD([\\w\\W\\s\\d][^<>]{0,})>([\\w\\W\\s\\d][^<>]{0,})<\\/TD><\\/TR>";
62 |     executeRegex(regex, text, true);
63 |   })
64 |   // add listeners
65 |   .on("cycle", (event) => {
66 |     console.log(String(event.target));
67 |   })
68 |   // run async
69 |   .run({ async: true });
70 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "assemblyscript-regex",
 3 |   "version": "0.1.0",
 4 |   "description": "A regex engine built with AssemblyScript",
 5 |   "ascMain": "assembly/index.ts",
 6 |   "main": "assembly/index.ts",
 7 |   "scripts": {
 8 |     "pretest": "npm run test:generate && npm run asbuild:untouched && npm run asbuild:empty",
 9 |     "test": "npm run prettier && npm run eslint && npm run asp && npm run test:empty",
10 |     "test:empty": "diff build/empty.wat assembly/__tests__/empty.wat",
11 |     "test:generate": "node spec/test-generator.js",
12 |     "asp": "asp --verbose --nologo",
13 |     "asp:ci": "asp --nologo",
14 |     "prettier": "prettier --check .",
15 |     "prettier:write": "prettier --write .",
16 |     "eslint:write": "npm run eslint -- --fix ",
17 |     "asbuild:untouched": "asc assembly/index.ts --target debug",
18 |     "asbuild:optimized": "asc assembly/index.ts --target release",
19 |     "asbuild": "npm run asbuild:untouched && npm run asbuild:optimized",
20 |     "asbuild:empty": "asc --config asconfig.empty.json",
21 |     "tsrun": "ts-node ts/index.ts",
22 |     "benchmark": "node benchmark/benchmark.js",
23 |     "eslint": "eslint --max-warnings 0 --ext ts \"assembly/**/*.ts\""
24 |   },
25 |   "author": "colin.eberhardt@gmail.com",
26 |   "license": "MIT",
27 |   "repository": {
28 |     "type": "git",
29 |     "url": "git+https://github.com/ColinEberhardt/assemblyscript-regex"
30 |   },
31 |   "devDependencies": {
32 |     "@as-pect/cli": "^8.0.1",
33 |     "@assemblyscript/loader": "^0.27.0",
34 |     "@types/node": "^14.14.13",
35 |     "@typescript-eslint/eslint-plugin": "^4.14.1",
36 |     "@typescript-eslint/parser": "^4.14.1",
37 |     "assemblyscript": "^0.25.0",
38 |     "benchmark": "^2.1.4",
39 |     "eslint": "^7.18.0",
40 |     "husky": "^4.2.5",
41 |     "prettier": "^2.2.1",
42 |     "semantic-release": "^17.3.7",
43 |     "text-encoding": "^0.7.0",
44 |     "ts-node": "^9.1.1",
45 |     "typescript": "^4.1.3"
46 |   },
47 |   "husky": {
48 |     "hooks": {
49 |       "pre-commit": "npm run prettier && npm run eslint"
50 |     }
51 |   }
52 | }
53 | 


--------------------------------------------------------------------------------
/spec/pcre-1.dat:
--------------------------------------------------------------------------------
   1 | E	the quick brown fox	the quick brown fox	(0,19)
   2 | E	SAME	The quick brown FOX	NOMATCH
   3 | E	SAME	What do you know about the quick brown fox?	(23,42)
   4 | E	SAME	What do you know about THE QUICK BROWN FOX?	NOMATCH
   5 | Ei	The quick brown fox	the quick brown fox	(0,19)
   6 | Ei	SAME	The quick brown FOX	(0,19)
   7 | Ei	SAME	What do you know about the quick brown fox?	(23,42)
   8 | Ei	SAME	What do you know about THE QUICK BROWN FOX?	(23,42)
   9 | E$	abcd\\t\\n\\r\\f\\a\\e\\071\\x3b\\$\\\\\\?caxyz	abcd\t\n\r\f\a\e9;$\\?caxyz	(0,20)
  10 | E	a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz	abxyzpqrrrabbxyyyypqAzz	(0,23)
  11 | E	SAME	abxyzpqrrrabbxyyyypqAzz	(0,23)
  12 | E	SAME	aabxyzpqrrrabbxyyyypqAzz	(0,24)
  13 | E	SAME	aaabxyzpqrrrabbxyyyypqAzz	(0,25)
  14 | E	SAME	aaaabxyzpqrrrabbxyyyypqAzz	(0,26)
  15 | E	SAME	abcxyzpqrrrabbxyyyypqAzz	(0,24)
  16 | E	SAME	aabcxyzpqrrrabbxyyyypqAzz	(0,25)
  17 | E	SAME	aaabcxyzpqrrrabbxyyyypAzz	(0,25)
  18 | E	SAME	aaabcxyzpqrrrabbxyyyypqAzz	(0,26)
  19 | E	SAME	aaabcxyzpqrrrabbxyyyypqqAzz	(0,27)
  20 | E	SAME	aaabcxyzpqrrrabbxyyyypqqqAzz	(0,28)
  21 | E	SAME	aaabcxyzpqrrrabbxyyyypqqqqAzz	(0,29)
  22 | E	SAME	aaabcxyzpqrrrabbxyyyypqqqqqAzz	(0,30)
  23 | E	SAME	aaabcxyzpqrrrabbxyyyypqqqqqqAzz	(0,31)
  24 | E	SAME	aaaabcxyzpqrrrabbxyyyypqAzz	(0,27)
  25 | E	SAME	abxyzzpqrrrabbxyyyypqAzz	(0,24)
  26 | E	SAME	aabxyzzzpqrrrabbxyyyypqAzz	(0,26)
  27 | E	SAME	aaabxyzzzzpqrrrabbxyyyypqAzz	(0,28)
  28 | E	SAME	aaaabxyzzzzpqrrrabbxyyyypqAzz	(0,29)
  29 | E	SAME	abcxyzzpqrrrabbxyyyypqAzz	(0,25)
  30 | E	SAME	aabcxyzzzpqrrrabbxyyyypqAzz	(0,27)
  31 | E	SAME	aaabcxyzzzzpqrrrabbxyyyypqAzz	(0,29)
  32 | E	SAME	aaaabcxyzzzzpqrrrabbxyyyypqAzz	(0,30)
  33 | E	SAME	aaaabcxyzzzzpqrrrabbbxyyyypqAzz	(0,31)
  34 | E	SAME	aaaabcxyzzzzpqrrrabbbxyyyyypqAzz	(0,32)
  35 | E	SAME	aaabcxyzpqrrrabbxyyyypABzz	(0,26)
  36 | E	SAME	aaabcxyzpqrrrabbxyyyypABBzz	(0,27)
  37 | E	SAME	>>>aaabxyzpqrrrabbxyyyypqAzz	(3,28)
  38 | E	SAME	>aaaabxyzpqrrrabbxyyyypqAzz	(1,27)
  39 | E	SAME	>>>>abcxyzpqrrrabbxyyyypqAzz	(4,28)
  40 | E	SAME	abxyzpqrrabbxyyyypqAzz	NOMATCH
  41 | E	SAME	abxyzpqrrrrabbxyyyypqAzz	NOMATCH
  42 | E	SAME	abxyzpqrrrabxyyyypqAzz	NOMATCH
  43 | E	SAME	aaaabcxyzzzzpqrrrabbbxyyyyyypqAzz	NOMATCH
  44 | E	SAME	aaaabcxyzzzzpqrrrabbbxyyypqAzz	NOMATCH
  45 | E	SAME	aaabcxyzpqrrrabbxyyyypqqqqqqqAzz	NOMATCH
  46 | E	^(abc){1,2}zz	abczz	(0,5)(0,3)
  47 | E	SAME	abcabczz	(0,8)(3,6)
  48 | E	SAME	zz	NOMATCH
  49 | E	SAME	abcabcabczz	NOMATCH
  50 | E	SAME	>>abczz	NOMATCH
  51 | E	^(b+?|a){1,2}?c	bc	(0,2)(0,1)
  52 | E	SAME	bbc	(0,3)(1,2)
  53 | E	SAME	bbbc	(0,4)(1,3)
  54 | E	SAME	bac	(0,3)(1,2)
  55 | E	SAME	bbac	(0,4)(2,3)
  56 | E	SAME	aac	(0,3)(1,2)
  57 | E	SAME	abbbbbbbbbbbc	(0,13)(1,12)
  58 | E	SAME	bbbbbbbbbbbac	(0,13)(11,12)
  59 | E	SAME	aaac	NOMATCH
  60 | E	SAME	abbbbbbbbbbbac	NOMATCH
  61 | E	^(b+|a){1,2}c	bc	(0,2)(0,1)
  62 | E	SAME	bbc	(0,3)(0,2)
  63 | E	SAME	bbbc	(0,4)(0,3)
  64 | E	SAME	bac	(0,3)(1,2)
  65 | E	SAME	bbac	(0,4)(2,3)
  66 | E	SAME	aac	(0,3)(1,2)
  67 | E	SAME	abbbbbbbbbbbc	(0,13)(1,12)
  68 | E	SAME	bbbbbbbbbbbac	(0,13)(11,12)
  69 | E	SAME	aaac	NOMATCH
  70 | E	SAME	abbbbbbbbbbbac	NOMATCH
  71 | E	^(b+|a){1,2}?bc	bbc	(0,3)(0,1)
  72 | E	^(b*|ba){1,2}?bc	babc	(0,4)(0,2)
  73 | E	SAME	bbabc	(0,5)(1,3)
  74 | E	SAME	bababc	(0,6)(2,4)
  75 | E	SAME	bababbc	NOMATCH
  76 | E	SAME	babababc	NOMATCH
  77 | E	^(ba|b*){1,2}?bc	babc	(0,4)(0,2)
  78 | E	SAME	bbabc	(0,5)(1,3)
  79 | E	SAME	bababc	(0,6)(2,4)
  80 | E	SAME	bababbc	NOMATCH
  81 | E	SAME	babababc	NOMATCH
  82 | E$	^\\ca\\cA\\c[\\c{\\c:	\x01\x01\e;z	(0,5)
  83 | E$kv	^[ab\\]cde]	athing	(0,1)
  84 | E$kv	SAME	bthing	(0,1)
  85 | E$kv	SAME	]thing	(0,1)
  86 | E$kv	SAME	cthing	(0,1)
  87 | E$kv	SAME	dthing	(0,1)
  88 | E$kv	SAME	ething	(0,1)
  89 | E$kv	SAME	fthing	NOMATCH
  90 | E$kv	SAME	[thing	NOMATCH
  91 | E$kv	SAME	\\thing	NOMATCH
  92 | E	^[]cde]	]thing	(0,1)
  93 | E	SAME	cthing	(0,1)
  94 | E	SAME	dthing	(0,1)
  95 | E	SAME	ething	(0,1)
  96 | E	SAME	athing	NOMATCH
  97 | E	SAME	fthing	NOMATCH
  98 | E$kv	^[^ab\\]cde]	fthing	(0,1)
  99 | E$kv	SAME	[thing	(0,1)
 100 | E$kv	SAME	\\thing	(0,1)
 101 | E$kv	SAME	athing	NOMATCH
 102 | E$kv	SAME	bthing	NOMATCH
 103 | E$kv	SAME	]thing	NOMATCH
 104 | E$kv	SAME	cthing	NOMATCH
 105 | E$kv	SAME	dthing	NOMATCH
 106 | E$kv	SAME	ething	NOMATCH
 107 | E	^[^]cde]	athing	(0,1)
 108 | E	SAME	fthing	(0,1)
 109 | E	SAME	]thing	NOMATCH
 110 | E	SAME	cthing	NOMATCH
 111 | E	SAME	dthing	NOMATCH
 112 | E	SAME	ething	NOMATCH
 113 | E$x	^\\�	�	(0,1)
 114 | E	^�	�	(0,1)
 115 | E	^[0-9]+$	0	(0,1)
 116 | E	SAME	1	(0,1)
 117 | E	SAME	2	(0,1)
 118 | E	SAME	3	(0,1)
 119 | E	SAME	4	(0,1)
 120 | E	SAME	5	(0,1)
 121 | E	SAME	6	(0,1)
 122 | E	SAME	7	(0,1)
 123 | E	SAME	8	(0,1)
 124 | E	SAME	9	(0,1)
 125 | E	SAME	10	(0,2)
 126 | E	SAME	100	(0,3)
 127 | E	SAME	abc	NOMATCH
 128 | E	^.*nter	enter	(0,5)
 129 | E	SAME	inter	(0,5)
 130 | E	SAME	uponter	(0,7)
 131 | E	^xxx[0-9]+$	xxx0	(0,4)
 132 | E	SAME	xxx1234	(0,7)
 133 | E	SAME	xxx	NOMATCH
 134 | E	^.+[0-9][0-9][0-9]$	x123	(0,4)
 135 | E	SAME	xx123	(0,5)
 136 | E	SAME	123456	(0,6)
 137 | E	SAME	123	NOMATCH
 138 | E	SAME	x1234	(0,5)
 139 | E	^.+?[0-9][0-9][0-9]$	x123	(0,4)
 140 | E	SAME	xx123	(0,5)
 141 | E	SAME	123456	(0,6)
 142 | E	SAME	123	NOMATCH
 143 | E	SAME	x1234	(0,5)
 144 | E$	^([^!]+)!(.+)=apquxz\\.ixr\\.zzz\\.ac\\.uk$	abc!pqr=apquxz.ixr.zzz.ac.uk	(0,28)(0,3)(4,7)
 145 | E$	SAME	!pqr=apquxz.ixr.zzz.ac.uk	NOMATCH
 146 | E$	SAME	abc!=apquxz.ixr.zzz.ac.uk	NOMATCH
 147 | E$	SAME	abc!pqr=apquxz:ixr.zzz.ac.uk	NOMATCH
 148 | E$	SAME	abc!pqr=apquxz.ixr.zzz.ac.ukk	NOMATCH
 149 | E	:	Well, we need a colon: somewhere	(21,22)
 150 | E	SAME	*** Fail if we don't	NOMATCH
 151 | E$iv	([\\da-f:]+)$	0abc	(0,4)(0,4)
 152 | E$iv	SAME	abc	(0,3)(0,3)
 153 | E$iv	SAME	fed	(0,3)(0,3)
 154 | E$iv	SAME	E	(0,1)(0,1)
 155 | E$iv	SAME	::	(0,2)(0,2)
 156 | E$iv	SAME	5f03:12C0::932e	(0,15)(0,15)
 157 | E$iv	SAME	fed def	(4,7)(4,7)
 158 | E$iv	SAME	Any old stuff	(11,13)(11,13)
 159 | E$iv	SAME	0zzz	NOMATCH
 160 | E$iv	SAME	gzzz	NOMATCH
 161 | E$iv	SAME	fed\x20	NOMATCH
 162 | E$iv	SAME	Any old rubbish	NOMATCH
 163 | E$	^.*\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$	.1.2.3	(0,6)(1,2)(3,4)(5,6)
 164 | E$	SAME	A.12.123.0	(0,10)(2,4)(5,8)(9,10)
 165 | E$	SAME	.1.2.3333	NOMATCH
 166 | E$	SAME	1.2.3	NOMATCH
 167 | E$	SAME	1234.2.3	NOMATCH
 168 | E$	^(\\d+)\\s+IN\\s+SOA\\s+(\\S+)\\s+(\\S+)\\s*\\(\\s*$	1 IN SOA non-sp1 non-sp2(	(0,25)(0,1)(9,16)(17,24)
 169 | E$	SAME	1    IN    SOA    non-sp1    non-sp2   (	(0,40)(0,1)(18,25)(29,36)
 170 | E$	SAME	1IN SOA non-sp1 non-sp2(	NOMATCH
 171 | E$kv	^[a-zA-Z\\d][a-zA-Z\\d\\-]*(\\.[a-zA-Z\\d][a-zA-z\\d\\-]*)*\\.$	a.	(0,2)
 172 | E$kv	SAME	Z.	(0,2)
 173 | E$kv	SAME	2.	(0,2)
 174 | E$kv	SAME	ab-c.pq-r.	(0,10)(4,9)
 175 | E$kv	SAME	sxk.zzz.ac.uk.	(0,14)(10,13)
 176 | E$kv	SAME	x-.y-.	(0,6)(2,5)
 177 | E$kv	SAME	-abc.peq.	NOMATCH
 178 | E$kv	^\\*\\.[a-z]([a-z\\-\\d]*[a-z\\d]+)?(\\.[a-z]([a-z\\-\\d]*[a-z\\d]+)?)*$	*.a	(0,3)
 179 | E$kv	SAME	*.b0-a	(0,6)(3,6)
 180 | E$kv	SAME	*.c3-b.c	(0,8)(3,6)(6,8)
 181 | E$kv	SAME	*.c-a.b-c	(0,9)(3,5)(5,9)(7,9)
 182 | E$kv	SAME	*.0	NOMATCH
 183 | E$kv	SAME	*.a-	NOMATCH
 184 | E$kv	SAME	*.a-b.c-	NOMATCH
 185 | E$kv	SAME	*.c-a.0-c	NOMATCH
 186 | E	^(?=ab(de))(abd)(e)	abde	(0,4)(2,4)(0,3)(3,4)
 187 | E	^(?!(ab)de|x)(abd)(f)	abdf	(0,4)(?,?)(0,3)(3,4)
 188 | E	^(?=(ab(cd)))(ab)	abcd	(0,2)(0,4)(2,4)(0,2)
 189 | E$iv	^[\\da-f](\\.[\\da-f])*$	a.b.c.d	(0,7)(5,7)
 190 | E$iv	SAME	A.B.C.D	(0,7)(5,7)
 191 | E$iv	SAME	a.b.c.1.2.3.C	(0,13)(11,13)
 192 | E$	^".*"\\s*(;.*)?$	"1234"	(0,6)
 193 | E$	SAME	"abcd" ;	(0,8)(7,8)
 194 | E$	SAME	"" ; rhubarb	(0,12)(3,12)
 195 | E$	SAME	"1234" : things	NOMATCH
 196 | E$	^$	NULL	(0,0)
 197 | E$c	   ^    a   (?# begins with a)  b\\sc (?# then b c) $ (?# then end)	ab c	(0,4)
 198 | E$c	SAME	abc	NOMATCH
 199 | E$c	SAME	ab cde	NOMATCH
 200 | E$	(?x)   ^    a   (?# begins with a)  b\\sc (?# then b c) $ (?# then end)	ab c	(0,4)
 201 | E$	SAME	abc	NOMATCH
 202 | E$	SAME	ab cde	NOMATCH
 203 | E$c	^   a\\ b[c ]d       $	a bcd	(0,5)
 204 | E$c	SAME	a b d	(0,5)
 205 | E$c	SAME	abcd	NOMATCH
 206 | E$c	SAME	ab d	NOMATCH
 207 | E	^(a(b(c)))(d(e(f)))(h(i(j)))(k(l(m)))$	abcdefhijklm	(0,12)(0,3)(1,3)(2,3)(3,6)(4,6)(5,6)(6,9)(7,9)(8,9)(9,12)(10,12)(11,12)
 208 | E	^(?:a(b(c)))(?:d(e(f)))(?:h(i(j)))(?:k(l(m)))$	abcdefhijklm	(0,12)(1,3)(2,3)(4,6)(5,6)(7,9)(8,9)(10,12)(11,12)
 209 | E$v	^[\\w][\\W][\\s][\\S][\\d][\\D][\\f][\\n][\\c]][\\022]	a+ Z0+\x0c\n\x1d\x12	(0,10)
 210 | E$	^[.^$|()*+?{,}]+	.^$(*+)|{?,?}	(0,13)
 211 | E$	^a*\\w	z	(0,1)
 212 | E$	SAME	az	(0,2)
 213 | E$	SAME	aaaz	(0,4)
 214 | E$	SAME	a	(0,1)
 215 | E$	SAME	aa	(0,2)
 216 | E$	SAME	aaaa	(0,4)
 217 | E$	SAME	a+	(0,1)
 218 | E$	SAME	aa+	(0,2)
 219 | E$	^a*?\\w	z	(0,1)
 220 | E$	SAME	az	(0,1)
 221 | E$	SAME	aaaz	(0,1)
 222 | E$	SAME	a	(0,1)
 223 | E$	SAME	aa	(0,1)
 224 | E$	SAME	aaaa	(0,1)
 225 | E$	SAME	a+	(0,1)
 226 | E$	SAME	aa+	(0,1)
 227 | E$	^a+\\w	az	(0,2)
 228 | E$	SAME	aaaz	(0,4)
 229 | E$	SAME	aa	(0,2)
 230 | E$	SAME	aaaa	(0,4)
 231 | E$	SAME	aa+	(0,2)
 232 | E$	^a+?\\w	az	(0,2)
 233 | E$	SAME	aaaz	(0,2)
 234 | E$	SAME	aa	(0,2)
 235 | E$	SAME	aaaa	(0,2)
 236 | E$	SAME	aa+	(0,2)
 237 | E$	^\\d{8}\\w{2,}	1234567890	(0,10)
 238 | E$	SAME	12345678ab	(0,10)
 239 | E$	SAME	12345678__	(0,10)
 240 | E$	SAME	1234567	NOMATCH
 241 | E$v	^[aeiou\\d]{4,5}$	uoie	(0,4)
 242 | E$v	SAME	1234	(0,4)
 243 | E$v	SAME	12345	(0,5)
 244 | E$v	SAME	aaaaa	(0,5)
 245 | E$v	SAME	123456	NOMATCH
 246 | E$v	^[aeiou\\d]{4,5}?	uoie	(0,4)
 247 | E$v	SAME	1234	(0,4)
 248 | E$v	SAME	12345	(0,4)
 249 | E$v	SAME	aaaaa	(0,4)
 250 | E$v	SAME	123456	(0,4)
 251 | E$	\\A(abc|def)=(\\1){2,3}\\Z	abc=abcabc	(0,10)(0,3)(7,10)
 252 | E$	SAME	def=defdefdef	(0,13)(0,3)(10,13)
 253 | E$	SAME	abc=defdef	NOMATCH
 254 | E$h	^(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11*(\\3\\4)\\1(?#)2$	abcdefghijkcda2	(0,15)(0,1)(1,2)(2,3)(3,4)(4,5)(5,6)(6,7)(7,8)(8,9)(9,10)(10,11)(11,13)
 255 | E$h	SAME	abcdefghijkkkkcda2	(0,18)(0,1)(1,2)(2,3)(3,4)(4,5)(5,6)(6,7)(7,8)(8,9)(9,10)(10,11)(14,16)
 256 | E$	^(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(?11)*((?3)(?4))(?1)(?#)2$	abcdefghijkcda2	(0,15)(0,1)(1,2)(2,3)(3,4)(4,5)(5,6)(6,7)(7,8)(8,9)(9,10)(10,11)(11,13)
 257 | E$	SAME	abcdefghijkkkkcda2	(0,18)(0,1)(1,2)(2,3)(3,4)(4,5)(5,6)(6,7)(7,8)(8,9)(9,10)(10,11)(14,16)
 258 | E$z	(cat(a(ract|tonic)|erpillar)) \\1()2(3)	cataract cataract23	(0,19)(0,8)(3,8)(4,8)(17,17)(18,19)
 259 | E$z	SAME	catatonic catatonic23	(0,21)(0,9)(3,9)(4,9)(19,19)(20,21)
 260 | E$z	SAME	caterpillar caterpillar23	(0,25)(0,11)(3,11)(?,?)(23,23)(24,25)
 261 | E	^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]	From abcd  Mon Sep 01 12:33:02 1997	(0,27)(5,9)
 262 | E$	^From\\s+\\S+\\s+([a-zA-Z]{3}\\s+){2}\\d{1,2}\\s+\\d\\d:\\d\\d	From abcd  Mon Sep 01 12:33:02 1997	(0,27)(15,19)
 263 | E$	SAME	From abcd  Mon Sep  1 12:33:02 1997	(0,27)(15,20)
 264 | E$	SAME	From abcd  Sep 01 12:33:02 1997	NOMATCH
 265 | Ej$	^12.34	12\n34	(0,5)
 266 | Ej$	SAME	12\r34	(0,5)
 267 | E$	\\w+(?=\\t)	the quick brown\t fox	(10,15)
 268 | E	foo(?!bar)(.*)	foobar is foolish see?	(10,22)(13,22)
 269 | E	(?:(?!foo)...|^.{0,2})bar(.*)	foobar crowbar etc	(8,18)(14,18)
 270 | E	SAME	barrel	(0,6)(3,6)
 271 | E	SAME	2barrel	(0,7)(4,7)
 272 | E	SAME	A barrel	(0,8)(5,8)
 273 | E$	^(\\D*)(?=\\d)(?!123)	abc456	(0,3)(0,3)
 274 | E$	SAME	abc123	NOMATCH
 275 | E$	^1234(?# test newlines\n\
 276 |   inside)	1234	(0,4)
 277 | E$c	^1234 #comment in extended re\n\
 278 |   	1234	(0,4)
 279 | E$c	#rhubarb\n\
 280 |   abcd	abcd	(0,4)
 281 | Ec	^abcd#rhubarb	abcd	(0,4)
 282 | E$	^(a)\\1{2,3}(.)	aaab	(0,4)(0,1)(3,4)
 283 | E$	SAME	aaaab	(0,5)(0,1)(4,5)
 284 | E$	SAME	aaaaab	(0,5)(0,1)(4,5)
 285 | E$	SAME	aaaaaab	(0,5)(0,1)(4,5)
 286 | E	(?!^)abc	the abc	(4,7)
 287 | E	SAME	abc	NOMATCH
 288 | E	(?=^)abc	abc	(0,3)
 289 | E	SAME	the abc	NOMATCH
 290 | E	^[ab]{1,3}(ab*|b)	aabbbbb	(0,7)(1,7)
 291 | E	^[ab]{1,3}?(ab*|b)	aabbbbb	(0,7)(1,7)
 292 | E	^[ab]{1,3}?(ab*?|b)	aabbbbb	(0,2)(1,2)
 293 | E	^[ab]{1,3}(ab*?|b)	aabbbbb	(0,4)(3,4)
 294 | E$ckv	  (?: [\\040\\t] |  \\(\n\
 295 | (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  |  \\( (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  )* \\)  )*\n\
 296 | \\)  )*                          # optional leading comment\n\
 297 | (?:    (?:\n\
 298 | [^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]+    # some number of atom characters...\n\
 299 | (?![^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n\
 300 | |\n\
 301 | " (?:                      # opening quote...\n\
 302 | [^\\\\\\x80-\\xff\\n\\015"]                #   Anything except backslash and quote\n\
 303 | |                     #    or\n\
 304 | \\\\ [^\\x80-\\xff]           #   Escaped something (something != CR)\n\
 305 | )* "  # closing quote\n\
 306 | )                    # initial word\n\
 307 | (?:  (?: [\\040\\t] |  \\(\n\
 308 | (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  |  \\( (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  )* \\)  )*\n\
 309 | \\)  )*  \\.  (?: [\\040\\t] |  \\(\n\
 310 | (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  |  \\( (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  )* \\)  )*\n\
 311 | \\)  )*   (?:\n\
 312 | [^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]+    # some number of atom characters...\n\
 313 | (?![^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n\
 314 | |\n\
 315 | " (?:                      # opening quote...\n\
 316 | [^\\\\\\x80-\\xff\\n\\015"]                #   Anything except backslash and quote\n\
 317 | |                     #    or\n\
 318 | \\\\ [^\\x80-\\xff]           #   Escaped something (something != CR)\n\
 319 | )* "  # closing quote\n\
 320 | )  )* # further okay, if led by a period\n\
 321 | (?: [\\040\\t] |  \\(\n\
 322 | (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  |  \\( (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  )* \\)  )*\n\
 323 | \\)  )*  @  (?: [\\040\\t] |  \\(\n\
 324 | (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  |  \\( (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  )* \\)  )*\n\
 325 | \\)  )*    (?:\n\
 326 | [^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]+    # some number of atom characters...\n\
 327 | (?![^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n\
 328 | |   \\[                         # [\n\
 329 | (?: [^\\\\\\x80-\\xff\\n\\015\\[\\]] |  \\\\ [^\\x80-\\xff]  )*    #    stuff\n\
 330 | \\]                        #           ]\n\
 331 | )                           # initial subdomain\n\
 332 | (?:                                  #\n\
 333 | (?: [\\040\\t] |  \\(\n\
 334 | (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  |  \\( (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  )* \\)  )*\n\
 335 | \\)  )*  \\.                        # if led by a period...\n\
 336 | (?: [\\040\\t] |  \\(\n\
 337 | (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  |  \\( (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  )* \\)  )*\n\
 338 | \\)  )*   (?:\n\
 339 | [^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]+    # some number of atom characters...\n\
 340 | (?![^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n\
 341 | |   \\[                         # [\n\
 342 | (?: [^\\\\\\x80-\\xff\\n\\015\\[\\]] |  \\\\ [^\\x80-\\xff]  )*    #    stuff\n\
 343 | \\]                        #           ]\n\
 344 | )                     #   ...further okay\n\
 345 | )*\n\
 346 | # address\n\
 347 | |                     #  or\n\
 348 | (?:\n\
 349 | [^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]+    # some number of atom characters...\n\
 350 | (?![^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n\
 351 | |\n\
 352 | " (?:                      # opening quote...\n\
 353 | [^\\\\\\x80-\\xff\\n\\015"]                #   Anything except backslash and quote\n\
 354 | |                     #    or\n\
 355 | \\\\ [^\\x80-\\xff]           #   Escaped something (something != CR)\n\
 356 | )* "  # closing quote\n\
 357 | )             # one word, optionally followed by....\n\
 358 | (?:\n\
 359 | [^()<>@,;:".\\\\\\[\\]\\x80-\\xff\\000-\\010\\012-\\037]  |  # atom and space parts, or...\n\
 360 | \\(\n\
 361 | (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  |  \\( (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  )* \\)  )*\n\
 362 | \\)       |  # comments, or...\n\
 363 | \n\
 364 | " (?:                      # opening quote...\n\
 365 | [^\\\\\\x80-\\xff\\n\\015"]                #   Anything except backslash and quote\n\
 366 | |                     #    or\n\
 367 | \\\\ [^\\x80-\\xff]           #   Escaped something (something != CR)\n\
 368 | )* "  # closing quote\n\
 369 | # quoted strings\n\
 370 | )*\n\
 371 | <  (?: [\\040\\t] |  \\(\n\
 372 | (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  |  \\( (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  )* \\)  )*\n\
 373 | \\)  )*                     # leading <\n\
 374 | (?:  @  (?: [\\040\\t] |  \\(\n\
 375 | (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  |  \\( (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  )* \\)  )*\n\
 376 | \\)  )*    (?:\n\
 377 | [^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]+    # some number of atom characters...\n\
 378 | (?![^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n\
 379 | |   \\[                         # [\n\
 380 | (?: [^\\\\\\x80-\\xff\\n\\015\\[\\]] |  \\\\ [^\\x80-\\xff]  )*    #    stuff\n\
 381 | \\]                        #           ]\n\
 382 | )                           # initial subdomain\n\
 383 | (?:                                  #\n\
 384 | (?: [\\040\\t] |  \\(\n\
 385 | (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  |  \\( (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  )* \\)  )*\n\
 386 | \\)  )*  \\.                        # if led by a period...\n\
 387 | (?: [\\040\\t] |  \\(\n\
 388 | (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  |  \\( (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  )* \\)  )*\n\
 389 | \\)  )*   (?:\n\
 390 | [^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]+    # some number of atom characters...\n\
 391 | (?![^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n\
 392 | |   \\[                         # [\n\
 393 | (?: [^\\\\\\x80-\\xff\\n\\015\\[\\]] |  \\\\ [^\\x80-\\xff]  )*    #    stuff\n\
 394 | \\]                        #           ]\n\
 395 | )                     #   ...further okay\n\
 396 | )*\n\
 397 | \n\
 398 | (?:  (?: [\\040\\t] |  \\(\n\
 399 | (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  |  \\( (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  )* \\)  )*\n\
 400 | \\)  )*  ,  (?: [\\040\\t] |  \\(\n\
 401 | (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  |  \\( (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  )* \\)  )*\n\
 402 | \\)  )*  @  (?: [\\040\\t] |  \\(\n\
 403 | (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  |  \\( (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  )* \\)  )*\n\
 404 | \\)  )*    (?:\n\
 405 | [^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]+    # some number of atom characters...\n\
 406 | (?![^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n\
 407 | |   \\[                         # [\n\
 408 | (?: [^\\\\\\x80-\\xff\\n\\015\\[\\]] |  \\\\ [^\\x80-\\xff]  )*    #    stuff\n\
 409 | \\]                        #           ]\n\
 410 | )                           # initial subdomain\n\
 411 | (?:                                  #\n\
 412 | (?: [\\040\\t] |  \\(\n\
 413 | (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  |  \\( (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  )* \\)  )*\n\
 414 | \\)  )*  \\.                        # if led by a period...\n\
 415 | (?: [\\040\\t] |  \\(\n\
 416 | (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  |  \\( (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  )* \\)  )*\n\
 417 | \\)  )*   (?:\n\
 418 | [^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]+    # some number of atom characters...\n\
 419 | (?![^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n\
 420 | |   \\[                         # [\n\
 421 | (?: [^\\\\\\x80-\\xff\\n\\015\\[\\]] |  \\\\ [^\\x80-\\xff]  )*    #    stuff\n\
 422 | \\]                        #           ]\n\
 423 | )                     #   ...further okay\n\
 424 | )*\n\
 425 | )* # further okay, if led by comma\n\
 426 | :                                # closing colon\n\
 427 | (?: [\\040\\t] |  \\(\n\
 428 | (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  |  \\( (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  )* \\)  )*\n\
 429 | \\)  )*  )? #       optional route\n\
 430 | (?:\n\
 431 | [^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]+    # some number of atom characters...\n\
 432 | (?![^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n\
 433 | |\n\
 434 | " (?:                      # opening quote...\n\
 435 | [^\\\\\\x80-\\xff\\n\\015"]                #   Anything except backslash and quote\n\
 436 | |                     #    or\n\
 437 | \\\\ [^\\x80-\\xff]           #   Escaped something (something != CR)\n\
 438 | )* "  # closing quote\n\
 439 | )                    # initial word\n\
 440 | (?:  (?: [\\040\\t] |  \\(\n\
 441 | (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  |  \\( (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  )* \\)  )*\n\
 442 | \\)  )*  \\.  (?: [\\040\\t] |  \\(\n\
 443 | (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  |  \\( (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  )* \\)  )*\n\
 444 | \\)  )*   (?:\n\
 445 | [^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]+    # some number of atom characters...\n\
 446 | (?![^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n\
 447 | |\n\
 448 | " (?:                      # opening quote...\n\
 449 | [^\\\\\\x80-\\xff\\n\\015"]                #   Anything except backslash and quote\n\
 450 | |                     #    or\n\
 451 | \\\\ [^\\x80-\\xff]           #   Escaped something (something != CR)\n\
 452 | )* "  # closing quote\n\
 453 | )  )* # further okay, if led by a period\n\
 454 | (?: [\\040\\t] |  \\(\n\
 455 | (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  |  \\( (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  )* \\)  )*\n\
 456 | \\)  )*  @  (?: [\\040\\t] |  \\(\n\
 457 | (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  |  \\( (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  )* \\)  )*\n\
 458 | \\)  )*    (?:\n\
 459 | [^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]+    # some number of atom characters...\n\
 460 | (?![^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n\
 461 | |   \\[                         # [\n\
 462 | (?: [^\\\\\\x80-\\xff\\n\\015\\[\\]] |  \\\\ [^\\x80-\\xff]  )*    #    stuff\n\
 463 | \\]                        #           ]\n\
 464 | )                           # initial subdomain\n\
 465 | (?:                                  #\n\
 466 | (?: [\\040\\t] |  \\(\n\
 467 | (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  |  \\( (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  )* \\)  )*\n\
 468 | \\)  )*  \\.                        # if led by a period...\n\
 469 | (?: [\\040\\t] |  \\(\n\
 470 | (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  |  \\( (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  )* \\)  )*\n\
 471 | \\)  )*   (?:\n\
 472 | [^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]+    # some number of atom characters...\n\
 473 | (?![^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n\
 474 | |   \\[                         # [\n\
 475 | (?: [^\\\\\\x80-\\xff\\n\\015\\[\\]] |  \\\\ [^\\x80-\\xff]  )*    #    stuff\n\
 476 | \\]                        #           ]\n\
 477 | )                     #   ...further okay\n\
 478 | )*\n\
 479 | #       address spec\n\
 480 | (?: [\\040\\t] |  \\(\n\
 481 | (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  |  \\( (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  )* \\)  )*\n\
 482 | \\)  )*  > #                  trailing >\n\
 483 | # name and address\n\
 484 | )  (?: [\\040\\t] |  \\(\n\^[ab]{1,3}(ab*?|b)
 485 | (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  |  \\( (?:  [^\\\\\\x80-\\xff\\n\\015()]  |  \\\\ [^\\x80-\\xff]  )* \\)  )*\n\
 486 | \\)  )*                       # optional trailing comment\n\
 487 | 	Alan Other <user@dom.ain>	(0,25)
 488 | E$ckv	SAME	<user@dom.ain>	(1,13)
 489 | E$ckv	SAME	user@dom.ain	(0,12)
 490 | E$ckv	SAME	"A. Other" <user.1234@dom.ain> (a comment)	(0,42)
 491 | E$ckv	SAME	A. Other <user.1234@dom.ain> (a comment)	(2,40)
 492 | E$ckv	SAME	"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/"@x400-re.lay	(0,61)
 493 | E$ckv	SAME	A missing angle <user@some.where	(17,32)
 494 | E$ckv	SAME	The quick brown fox	NOMATCH
 495 | E$ckv	[\\040\\t]*                    # Nab whitespace.\n\
 496 | (?:\n\
 497 | \\(                              #  (\n\
 498 | [^\\\\\\x80-\\xff\\n\\015()] *                             #     normal*\n\
 499 | (?:                                 #       (\n\
 500 | (?:  \\\\ [^\\x80-\\xff]  |\n\
 501 | \\(                            #  (\n\
 502 | [^\\\\\\x80-\\xff\\n\\015()] *                            #     normal*\n\
 503 | (?:  \\\\ [^\\x80-\\xff]   [^\\\\\\x80-\\xff\\n\\015()] * )*        #     (special normal*)*\n\
 504 | \\)                           #                       )\n\
 505 | )    #         special\n\
 506 | [^\\\\\\x80-\\xff\\n\\015()] *                         #         normal*\n\
 507 | )*                                  #            )*\n\
 508 | \\)                             #                )\n\
 509 | [\\040\\t]* )*    # If comment found, allow more spaces.\n\
 510 | # optional leading comment\n\
 511 | (?:\n\
 512 | (?:\n\
 513 | [^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]+    # some number of atom characters...\n\
 514 | (?![^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n\
 515 | # Atom\n\
 516 | |                       #  or\n\
 517 | "                                     # "\n\
 518 | [^\\\\\\x80-\\xff\\n\\015"] *                            #   normal\n\
 519 | (?:  \\\\ [^\\x80-\\xff]  [^\\\\\\x80-\\xff\\n\\015"] * )*        #   ( special normal* )*\n\
 520 | "                                     #        "\n\
 521 | # Quoted string\n\
 522 | )\n\
 523 | [\\040\\t]*                    # Nab whitespace.\n\
 524 | (?:\n\
 525 | \\(                              #  (\n\
 526 | [^\\\\\\x80-\\xff\\n\\015()] *                             #     normal*\n\
 527 | (?:                                 #       (\n\
 528 | (?:  \\\\ [^\\x80-\\xff]  |\n\
 529 | \\(                            #  (\n\
 530 | [^\\\\\\x80-\\xff\\n\\015()] *                            #     normal*\n\
 531 | (?:  \\\\ [^\\x80-\\xff]   [^\\\\\\x80-\\xff\\n\\015()] * )*        #     (special normal*)*\n\
 532 | \\)                           #                       )\n\
 533 | )    #         special\n\
 534 | [^\\\\\\x80-\\xff\\n\\015()] *                         #         normal*\n\
 535 | )*                                  #            )*\n\
 536 | \\)                             #                )\n\
 537 | [\\040\\t]* )*    # If comment found, allow more spaces.\n\
 538 | (?:\n\
 539 | \\.\n\
 540 | [\\040\\t]*                    # Nab whitespace.\n\
 541 | (?:\n\
 542 | \\(                              #  (\n\
 543 | [^\\\\\\x80-\\xff\\n\\015()] *                             #     normal*\n\
 544 | (?:                                 #       (\n\
 545 | (?:  \\\\ [^\\x80-\\xff]  |\n\
 546 | \\(                            #  (\n\
 547 | [^\\\\\\x80-\\xff\\n\\015()] *                            #     normal*\n\
 548 | (?:  \\\\ [^\\x80-\\xff]   [^\\\\\\x80-\\xff\\n\\015()] * )*        #     (special normal*)*\n\
 549 | \\)                           #                       )\n\
 550 | )    #         special\n\
 551 | [^\\\\\\x80-\\xff\\n\\015()] *                         #         normal*\n\
 552 | )*                                  #            )*\n\
 553 | \\)                             #                )\n\
 554 | [\\040\\t]* )*    # If comment found, allow more spaces.\n\
 555 | (?:\n\
 556 | [^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]+    # some number of atom characters...\n\
 557 | (?![^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n\
 558 | # Atom\n\
 559 | |                       #  or\n\
 560 | "                                     # "\n\
 561 | [^\\\\\\x80-\\xff\\n\\015"] *                            #   normal\n\
 562 | (?:  \\\\ [^\\x80-\\xff]  [^\\\\\\x80-\\xff\\n\\015"] * )*        #   ( special normal* )*\n\
 563 | "                                     #        "\n\
 564 | # Quoted string\n\
 565 | )\n\
 566 | [\\040\\t]*                    # Nab whitespace.\n\
 567 | (?:\n\
 568 | \\(                              #  (\n\
 569 | [^\\\\\\x80-\\xff\\n\\015()] *                             #     normal*\n\
 570 | (?:                                 #       (\n\
 571 | (?:  \\\\ [^\\x80-\\xff]  |\n\
 572 | \\(                            #  (\n\
 573 | [^\\\\\\x80-\\xff\\n\\015()] *                            #     normal*\n\
 574 | (?:  \\\\ [^\\x80-\\xff]   [^\\\\\\x80-\\xff\\n\\015()] * )*        #     (special normal*)*\n\
 575 | \\)                           #                       )\n\
 576 | )    #         special\n\
 577 | [^\\\\\\x80-\\xff\\n\\015()] *                         #         normal*\n\
 578 | )*                                  #            )*\n\
 579 | \\)                             #                )\n\
 580 | [\\040\\t]* )*    # If comment found, allow more spaces.\n\
 581 | # additional words\n\
 582 | )*\n\
 583 | @\n\
 584 | [\\040\\t]*                    # Nab whitespace.\n\
 585 | (?:\n\
 586 | \\(                              #  (\n\
 587 | [^\\\\\\x80-\\xff\\n\\015()] *                             #     normal*\n\
 588 | (?:                                 #       (\n\
 589 | (?:  \\\\ [^\\x80-\\xff]  |\n\
 590 | \\(                            #  (\n\
 591 | [^\\\\\\x80-\\xff\\n\\015()] *                            #     normal*\n\
 592 | (?:  \\\\ [^\\x80-\\xff]   [^\\\\\\x80-\\xff\\n\\015()] * )*        #     (special normal*)*\n\
 593 | \\)                           #                       )\n\
 594 | )    #         special\n\
 595 | [^\\\\\\x80-\\xff\\n\\015()] *                         #         normal*\n\
 596 | )*                                  #            )*\n\
 597 | \\)                             #                )\n\
 598 | [\\040\\t]* )*    # If comment found, allow more spaces.\n\
 599 | (?:\n\
 600 | [^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]+    # some number of atom characters...\n\
 601 | (?![^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n\
 602 | |\n\
 603 | \\[                            # [\n\
 604 | (?: [^\\\\\\x80-\\xff\\n\\015\\[\\]] |  \\\\ [^\\x80-\\xff]  )*     #    stuff\n\
 605 | \\]                           #           ]\n\
 606 | )\n\
 607 | [\\040\\t]*                    # Nab whitespace.\n\
 608 | (?:\n\
 609 | \\(                              #  (\n\
 610 | [^\\\\\\x80-\\xff\\n\\015()] *                             #     normal*\n\
 611 | (?:                                 #       (\n\
 612 | (?:  \\\\ [^\\x80-\\xff]  |\n\
 613 | \\(                            #  (\n\
 614 | [^\\\\\\x80-\\xff\\n\\015()] *                            #     normal*\n\
 615 | (?:  \\\\ [^\\x80-\\xff]   [^\\\\\\x80-\\xff\\n\\015()] * )*        #     (special normal*)*\n\
 616 | \\)                           #                       )\n\
 617 | )    #         special\n\
 618 | [^\\\\\\x80-\\xff\\n\\015()] *                         #         normal*\n\
 619 | )*                                  #            )*\n\
 620 | \\)                             #                )\n\
 621 | [\\040\\t]* )*    # If comment found, allow more spaces.\n\
 622 | # optional trailing comments\n\
 623 | (?:\n\
 624 | \\.\n\
 625 | [\\040\\t]*                    # Nab whitespace.\n\
 626 | (?:\n\
 627 | \\(                              #  (\n\
 628 | [^\\\\\\x80-\\xff\\n\\015()] *                             #     normal*\n\
 629 | (?:                                 #       (\n\
 630 | (?:  \\\\ [^\\x80-\\xff]  |\n\
 631 | \\(                            #  (\n\
 632 | [^\\\\\\x80-\\xff\\n\\015()] *                            #     normal*\n\
 633 | (?:  \\\\ [^\\x80-\\xff]   [^\\\\\\x80-\\xff\\n\\015()] * )*        #     (special normal*)*\n\
 634 | \\)                           #                       )\n\
 635 | )    #         special\n\
 636 | [^\\\\\\x80-\\xff\\n\\015()] *                         #         normal*\n\
 637 | )*                                  #            )*\n\
 638 | \\)                             #                )\n\
 639 | [\\040\\t]* )*    # If comment found, allow more spaces.\n\
 640 | (?:\n\
 641 | [^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]+    # some number of atom characters...\n\
 642 | (?![^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n\
 643 | |\n\
 644 | \\[                            # [\n\
 645 | (?: [^\\\\\\x80-\\xff\\n\\015\\[\\]] |  \\\\ [^\\x80-\\xff]  )*     #    stuff\n\
 646 | \\]                           #           ]\n\
 647 | )\n\
 648 | [\\040\\t]*                    # Nab whitespace.\n\
 649 | (?:\n\
 650 | \\(                              #  (\n\
 651 | [^\\\\\\x80-\\xff\\n\\015()] *                             #     normal*\n\
 652 | (?:                                 #       (\n\
 653 | (?:  \\\\ [^\\x80-\\xff]  |\n\
 654 | \\(                            #  (\n\
 655 | [^\\\\\\x80-\\xff\\n\\015()] *                            #     normal*\n\
 656 | (?:  \\\\ [^\\x80-\\xff]   [^\\\\\\x80-\\xff\\n\\015()] * )*        #     (special normal*)*\n\
 657 | \\)                           #                       )\n\
 658 | )    #         special\n\
 659 | [^\\\\\\x80-\\xff\\n\\015()] *                         #         normal*\n\
 660 | )*                                  #            )*\n\
 661 | \\)                             #                )\n\
 662 | [\\040\\t]* )*    # If comment found, allow more spaces.\n\
 663 | # optional trailing comments\n\
 664 | )*\n\
 665 | # address\n\
 666 | |                             #  or\n\
 667 | (?:\n\
 668 | [^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]+    # some number of atom characters...\n\
 669 | (?![^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n\
 670 | # Atom\n\
 671 | |                       #  or\n\
 672 | "                                     # "\n\
 673 | [^\\\\\\x80-\\xff\\n\\015"] *                            #   normal\n\
 674 | (?:  \\\\ [^\\x80-\\xff]  [^\\\\\\x80-\\xff\\n\\015"] * )*        #   ( special normal* )*\n\
 675 | "                                     #        "\n\
 676 | # Quoted string\n\
 677 | )\n\
 678 | # leading word\n\
 679 | [^()<>@,;:".\\\\\\[\\]\\x80-\\xff\\000-\\010\\012-\\037] *               # "normal" atoms and or spaces\n\
 680 | (?:\n\
 681 | (?:\n\
 682 | \\(                              #  (\n\
 683 | [^\\\\\\x80-\\xff\\n\\015()] *                             #     normal*\n\
 684 | (?:                                 #       (\n\
 685 | (?:  \\\\ [^\\x80-\\xff]  |\n\
 686 | \\(                            #  (\n\
 687 | [^\\\\\\x80-\\xff\\n\\015()] *                            #     normal*\n\
 688 | (?:  \\\\ [^\\x80-\\xff]   [^\\\\\\x80-\\xff\\n\\015()] * )*        #     (special normal*)*\n\
 689 | \\)                           #                       )\n\
 690 | )    #         special\n\
 691 | [^\\\\\\x80-\\xff\\n\\015()] *                         #         normal*\n\
 692 | )*                                  #            )*\n\
 693 | \\)                             #                )\n\
 694 | |\n\
 695 | "                                     # "\n\
 696 | [^\\\\\\x80-\\xff\\n\\015"] *                            #   normal\n\
 697 | (?:  \\\\ [^\\x80-\\xff]  [^\\\\\\x80-\\xff\\n\\015"] * )*        #   ( special normal* )*\n\
 698 | "                                     #        "\n\
 699 | ) # "special" comment or quoted string\n\
 700 | [^()<>@,;:".\\\\\\[\\]\\x80-\\xff\\000-\\010\\012-\\037] *            #  more "normal"\n\
 701 | )*\n\
 702 | <\n\
 703 | [\\040\\t]*                    # Nab whitespace.\n\
 704 | (?:\n\
 705 | \\(                              #  (\n\
 706 | [^\\\\\\x80-\\xff\\n\\015()] *                             #     normal*\n\
 707 | (?:                                 #       (\n\
 708 | (?:  \\\\ [^\\x80-\\xff]  |\n\
 709 | \\(                            #  (\n\
 710 | [^\\\\\\x80-\\xff\\n\\015()] *                            #     normal*\n\
 711 | (?:  \\\\ [^\\x80-\\xff]   [^\\\\\\x80-\\xff\\n\\015()] * )*        #     (special normal*)*\n\
 712 | \\)                           #                       )\n\
 713 | )    #         special\n\
 714 | [^\\\\\\x80-\\xff\\n\\015()] *                         #         normal*\n\
 715 | )*                                  #            )*\n\
 716 | \\)                             #                )\n\
 717 | [\\040\\t]* )*    # If comment found, allow more spaces.\n\
 718 | # <\n\
 719 | (?:\n\
 720 | @\n\
 721 | [\\040\\t]*                    # Nab whitespace.\n\
 722 | (?:\n\
 723 | \\(                              #  (\n\
 724 | [^\\\\\\x80-\\xff\\n\\015()] *                             #     normal*\n\
 725 | (?:                                 #       (\n\
 726 | (?:  \\\\ [^\\x80-\\xff]  |\n\
 727 | \\(                            #  (\n\
 728 | [^\\\\\\x80-\\xff\\n\\015()] *                            #     normal*\n\
 729 | (?:  \\\\ [^\\x80-\\xff]   [^\\\\\\x80-\\xff\\n\\015()] * )*        #     (special normal*)*\n\
 730 | \\)                           #                       )\n\
 731 | )    #         special\n\
 732 | [^\\\\\\x80-\\xff\\n\\015()] *                         #         normal*\n\
 733 | )*                                  #            )*\n\
 734 | \\)                             #                )\n\
 735 | [\\040\\t]* )*    # If comment found, allow more spaces.\n\
 736 | (?:\n\
 737 | [^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]+    # some number of atom characters...\n\
 738 | (?![^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n\
 739 | |\n\
 740 | \\[                            # [\n\
 741 | (?: [^\\\\\\x80-\\xff\\n\\015\\[\\]] |  \\\\ [^\\x80-\\xff]  )*     #    stuff\n\
 742 | \\]                           #           ]\n\
 743 | )\n\
 744 | [\\040\\t]*                    # Nab whitespace.\n\
 745 | (?:\n\
 746 | \\(                              #  (\n\
 747 | [^\\\\\\x80-\\xff\\n\\015()] *                             #     normal*\n\
 748 | (?:                                 #       (\n\
 749 | (?:  \\\\ [^\\x80-\\xff]  |\n\
 750 | \\(                            #  (\n\
 751 | [^\\\\\\x80-\\xff\\n\\015()] *                            #     normal*\n\
 752 | (?:  \\\\ [^\\x80-\\xff]   [^\\\\\\x80-\\xff\\n\\015()] * )*        #     (special normal*)*\n\
 753 | \\)                           #                       )\n\
 754 | )    #         special\n\
 755 | [^\\\\\\x80-\\xff\\n\\015()] *                         #         normal*\n\
 756 | )*                                  #            )*\n\
 757 | \\)                             #                )\n\
 758 | [\\040\\t]* )*    # If comment found, allow more spaces.\n\
 759 | # optional trailing comments\n\
 760 | (?:\n\
 761 | \\.\n\
 762 | [\\040\\t]*                    # Nab whitespace.\n\
 763 | (?:\n\
 764 | \\(                              #  (\n\
 765 | [^\\\\\\x80-\\xff\\n\\015()] *                             #     normal*\n\
 766 | (?:                                 #       (\n\
 767 | (?:  \\\\ [^\\x80-\\xff]  |\n\
 768 | \\(                            #  (\n\
 769 | [^\\\\\\x80-\\xff\\n\\015()] *                            #     normal*\n\
 770 | (?:  \\\\ [^\\x80-\\xff]   [^\\\\\\x80-\\xff\\n\\015()] * )*        #     (special normal*)*\n\
 771 | \\)                           #                       )\n\
 772 | )    #         special\n\
 773 | [^\\\\\\x80-\\xff\\n\\015()] *                         #         normal*\n\
 774 | )*                                  #            )*\n\
 775 | \\)                             #                )\n\
 776 | [\\040\\t]* )*    # If comment found, allow more spaces.\n\
 777 | (?:\n\
 778 | [^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]+    # some number of atom characters...\n\
 779 | (?![^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n\
 780 | |\n\
 781 | \\[                            # [\n\
 782 | (?: [^\\\\\\x80-\\xff\\n\\015\\[\\]] |  \\\\ [^\\x80-\\xff]  )*     #    stuff\n\
 783 | \\]                           #           ]\n\
 784 | )\n\
 785 | [\\040\\t]*                    # Nab whitespace.\n\
 786 | (?:\n\
 787 | \\(                              #  (\n\
 788 | [^\\\\\\x80-\\xff\\n\\015()] *                             #     normal*\n\
 789 | (?:                                 #       (\n\
 790 | (?:  \\\\ [^\\x80-\\xff]  |\n\
 791 | \\(                            #  (\n\
 792 | [^\\\\\\x80-\\xff\\n\\015()] *                            #     normal*\n\
 793 | (?:  \\\\ [^\\x80-\\xff]   [^\\\\\\x80-\\xff\\n\\015()] * )*        #     (special normal*)*\n\
 794 | \\)                           #                       )\n\
 795 | )    #         special\n\
 796 | [^\\\\\\x80-\\xff\\n\\015()] *                         #         normal*\n\
 797 | )*                                  #            )*\n\
 798 | \\)                             #                )\n\
 799 | [\\040\\t]* )*    # If comment found, allow more spaces.\n\
 800 | # optional trailing comments\n\
 801 | )*\n\
 802 | (?: ,\n\
 803 | [\\040\\t]*                    # Nab whitespace.\n\
 804 | (?:\n\
 805 | \\(                              #  (\n\
 806 | [^\\\\\\x80-\\xff\\n\\015()] *                             #     normal*\n\
 807 | (?:                                 #       (\n\
 808 | (?:  \\\\ [^\\x80-\\xff]  |\n\
 809 | \\(                            #  (\n\
 810 | [^\\\\\\x80-\\xff\\n\\015()] *                            #     normal*\n\
 811 | (?:  \\\\ [^\\x80-\\xff]   [^\\\\\\x80-\\xff\\n\\015()] * )*        #     (special normal*)*\n\
 812 | \\)                           #                       )\n\
 813 | )    #         special\n\
 814 | [^\\\\\\x80-\\xff\\n\\015()] *                         #         normal*\n\
 815 | )*                                  #            )*\n\
 816 | \\)                             #                )\n\
 817 | [\\040\\t]* )*    # If comment found, allow more spaces.\n\
 818 | @\n\
 819 | [\\040\\t]*                    # Nab whitespace.\n\
 820 | (?:\n\
 821 | \\(                              #  (\n\
 822 | [^\\\\\\x80-\\xff\\n\\015()] *                             #     normal*\n\
 823 | (?:                                 #       (\n\
 824 | (?:  \\\\ [^\\x80-\\xff]  |\n\
 825 | \\(                            #  (\n\
 826 | [^\\\\\\x80-\\xff\\n\\015()] *                            #     normal*\n\
 827 | (?:  \\\\ [^\\x80-\\xff]   [^\\\\\\x80-\\xff\\n\\015()] * )*        #     (special normal*)*\n\
 828 | \\)                           #                       )\n\
 829 | )    #         special\n\
 830 | [^\\\\\\x80-\\xff\\n\\015()] *                         #         normal*\n\
 831 | )*                                  #            )*\n\
 832 | \\)                             #                )\n\
 833 | [\\040\\t]* )*    # If comment found, allow more spaces.\n\
 834 | (?:\n\
 835 | [^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]+    # some number of atom characters...\n\
 836 | (?![^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n\
 837 | |\n\
 838 | \\[                            # [\n\
 839 | (?: [^\\\\\\x80-\\xff\\n\\015\\[\\]] |  \\\\ [^\\x80-\\xff]  )*     #    stuff\n\
 840 | \\]                           #           ]\n\
 841 | )\n\
 842 | [\\040\\t]*                    # Nab whitespace.\n\
 843 | (?:\n\
 844 | \\(                              #  (\n\
 845 | [^\\\\\\x80-\\xff\\n\\015()] *                             #     normal*\n\
 846 | (?:                                 #       (\n\
 847 | (?:  \\\\ [^\\x80-\\xff]  |\n\
 848 | \\(                            #  (\n\
 849 | [^\\\\\\x80-\\xff\\n\\015()] *                            #     normal*\n\
 850 | (?:  \\\\ [^\\x80-\\xff]   [^\\\\\\x80-\\xff\\n\\015()] * )*        #     (special normal*)*\n\
 851 | \\)                           #                       )\n\
 852 | )    #         special\n\
 853 | [^\\\\\\x80-\\xff\\n\\015()] *                         #         normal*\n\
 854 | )*                                  #            )*\n\
 855 | \\)                             #                )\n\
 856 | [\\040\\t]* )*    # If comment found, allow more spaces.\n\
 857 | # optional trailing comments\n\
 858 | (?:\n\
 859 | \\.\n\
 860 | [\\040\\t]*                    # Nab whitespace.\n\
 861 | (?:\n\
 862 | \\(                              #  (\n\
 863 | [^\\\\\\x80-\\xff\\n\\015()] *                             #     normal*\n\
 864 | (?:                                 #       (\n\
 865 | (?:  \\\\ [^\\x80-\\xff]  |\n\
 866 | \\(                            #  (\n\
 867 | [^\\\\\\x80-\\xff\\n\\015()] *                            #     normal*\n\
 868 | (?:  \\\\ [^\\x80-\\xff]   [^\\\\\\x80-\\xff\\n\\015()] * )*        #     (special normal*)*\n\
 869 | \\)                           #                       )\n\
 870 | )    #         special\n\
 871 | [^\\\\\\x80-\\xff\\n\\015()] *                         #         normal*\n\
 872 | )*                                  #            )*\n\
 873 | \\)                             #                )\n\
 874 | [\\040\\t]* )*    # If comment found, allow more spaces.\n\
 875 | (?:\n\
 876 | [^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]+    # some number of atom characters...\n\
 877 | (?![^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n\
 878 | |\n\
 879 | \\[                            # [\n\
 880 | (?: [^\\\\\\x80-\\xff\\n\\015\\[\\]] |  \\\\ [^\\x80-\\xff]  )*     #    stuff\n\
 881 | \\]                           #           ]\n\
 882 | )\n\
 883 | [\\040\\t]*                    # Nab whitespace.\n\
 884 | (?:\n\
 885 | \\(                              #  (\n\
 886 | [^\\\\\\x80-\\xff\\n\\015()] *                             #     normal*\n\
 887 | (?:                                 #       (\n\
 888 | (?:  \\\\ [^\\x80-\\xff]  |\n\
 889 | \\(                            #  (\n\
 890 | [^\\\\\\x80-\\xff\\n\\015()] *                            #     normal*\n\
 891 | (?:  \\\\ [^\\x80-\\xff]   [^\\\\\\x80-\\xff\\n\\015()] * )*        #     (special normal*)*\n\
 892 | \\)                           #                       )\n\
 893 | )    #         special\n\
 894 | [^\\\\\\x80-\\xff\\n\\015()] *                         #         normal*\n\
 895 | )*                                  #            )*\n\
 896 | \\)                             #                )\n\
 897 | [\\040\\t]* )*    # If comment found, allow more spaces.\n\
 898 | # optional trailing comments\n\
 899 | )*\n\
 900 | )*  # additional domains\n\
 901 | :\n\
 902 | [\\040\\t]*                    # Nab whitespace.\n\
 903 | (?:\n\
 904 | \\(                              #  (\n\
 905 | [^\\\\\\x80-\\xff\\n\\015()] *                             #     normal*\n\
 906 | (?:                                 #       (\n\
 907 | (?:  \\\\ [^\\x80-\\xff]  |\n\
 908 | \\(                            #  (\n\
 909 | [^\\\\\\x80-\\xff\\n\\015()] *                            #     normal*\n\
 910 | (?:  \\\\ [^\\x80-\\xff]   [^\\\\\\x80-\\xff\\n\\015()] * )*        #     (special normal*)*\n\
 911 | \\)                           #                       )\n\
 912 | )    #         special\n\
 913 | [^\\\\\\x80-\\xff\\n\\015()] *                         #         normal*\n\
 914 | )*                                  #            )*\n\
 915 | \\)                             #                )\n\
 916 | [\\040\\t]* )*    # If comment found, allow more spaces.\n\
 917 | # optional trailing comments\n\
 918 | )?     #       optional route\n\
 919 | (?:\n\
 920 | [^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]+    # some number of atom characters...\n\
 921 | (?![^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n\
 922 | # Atom\n\
 923 | |                       #  or\n\
 924 | "                                     # "\n\
 925 | [^\\\\\\x80-\\xff\\n\\015"] *                            #   normal\n\
 926 | (?:  \\\\ [^\\x80-\\xff]  [^\\\\\\x80-\\xff\\n\\015"] * )*        #   ( special normal* )*\n\
 927 | "                                     #        "\n\
 928 | # Quoted string\n\
 929 | )\n\
 930 | [\\040\\t]*                    # Nab whitespace.\n\
 931 | (?:\n\
 932 | \\(                              #  (\n\
 933 | [^\\\\\\x80-\\xff\\n\\015()] *                             #     normal*\n\
 934 | (?:                                 #       (\n\
 935 | (?:  \\\\ [^\\x80-\\xff]  |\n\
 936 | \\(                            #  (\n\
 937 | [^\\\\\\x80-\\xff\\n\\015()] *                            #     normal*\n\
 938 | (?:  \\\\ [^\\x80-\\xff]   [^\\\\\\x80-\\xff\\n\\015()] * )*        #     (special normal*)*\n\
 939 | \\)                           #                       )\n\
 940 | )    #         special\n\
 941 | [^\\\\\\x80-\\xff\\n\\015()] *                         #         normal*\n\
 942 | )*                                  #            )*\n\
 943 | \\)                             #                )\n\
 944 | [\\040\\t]* )*    # If comment found, allow more spaces.\n\
 945 | (?:\n\
 946 | \\.\n\
 947 | [\\040\\t]*                    # Nab whitespace.\n\
 948 | (?:\n\
 949 | \\(                              #  (\n\
 950 | [^\\\\\\x80-\\xff\\n\\015()] *                             #     normal*\n\
 951 | (?:                                 #       (\n\
 952 | (?:  \\\\ [^\\x80-\\xff]  |\n\
 953 | \\(                            #  (\n\
 954 | [^\\\\\\x80-\\xff\\n\\015()] *                            #     normal*\n\
 955 | (?:  \\\\ [^\\x80-\\xff]   [^\\\\\\x80-\\xff\\n\\015()] * )*        #     (special normal*)*\n\
 956 | \\)                           #                       )\n\
 957 | )    #         special\n\
 958 | [^\\\\\\x80-\\xff\\n\\015()] *                         #         normal*\n\
 959 | )*                                  #            )*\n\
 960 | \\)                             #                )\n\
 961 | [\\040\\t]* )*    # If comment found, allow more spaces.\n\
 962 | (?:\n\
 963 | [^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]+    # some number of atom characters...\n\
 964 | (?![^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n\
 965 | # Atom\n\
 966 | |                       #  or\n\
 967 | "                                     # "\n\
 968 | [^\\\\\\x80-\\xff\\n\\015"] *                            #   normal\n\
 969 | (?:  \\\\ [^\\x80-\\xff]  [^\\\\\\x80-\\xff\\n\\015"] * )*        #   ( special normal* )*\n\
 970 | "                                     #        "\n\
 971 | # Quoted string\n\
 972 | )\n\
 973 | [\\040\\t]*                    # Nab whitespace.\n\
 974 | (?:\n\
 975 | \\(                              #  (\n\
 976 | [^\\\\\\x80-\\xff\\n\\015()] *                             #     normal*\n\
 977 | (?:                                 #       (\n\
 978 | (?:  \\\\ [^\\x80-\\xff]  |\n\
 979 | \\(                            #  (\n\
 980 | [^\\\\\\x80-\\xff\\n\\015()] *                            #     normal*\n\
 981 | (?:  \\\\ [^\\x80-\\xff]   [^\\\\\\x80-\\xff\\n\\015()] * )*        #     (special normal*)*\n\
 982 | \\)                           #                       )\n\
 983 | )    #         special\n\
 984 | [^\\\\\\x80-\\xff\\n\\015()] *                         #         normal*\n\
 985 | )*                                  #            )*\n\
 986 | \\)                             #                )\n\
 987 | [\\040\\t]* )*    # If comment found, allow more spaces.\n\
 988 | # additional words\n\
 989 | )*\n\
 990 | @\n\
 991 | [\\040\\t]*                    # Nab whitespace.\n\
 992 | (?:\n\
 993 | \\(                              #  (\n\
 994 | [^\\\\\\x80-\\xff\\n\\015()] *                             #     normal*\n\
 995 | (?:                                 #       (\n\
 996 | (?:  \\\\ [^\\x80-\\xff]  |\n\
 997 | \\(                            #  (\n\
 998 | [^\\\\\\x80-\\xff\\n\\015()] *                            #     normal*\n\
 999 | (?:  \\\\ [^\\x80-\\xff]   [^\\\\\\x80-\\xff\\n\\015()] * )*        #     (special normal*)*\n\
1000 | \\)                           #                       )\n\
1001 | )    #         special\n\
1002 | [^\\\\\\x80-\\xff\\n\\015()] *                         #         normal*\n\
1003 | )*                                  #            )*\n\
1004 | \\)                             #                )\n\
1005 | [\\040\\t]* )*    # If comment found, allow more spaces.\n\
1006 | (?:\n\
1007 | [^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]+    # some number of atom characters...\n\
1008 | (?![^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n\
1009 | |\n\
1010 | \\[                            # [\n\
1011 | (?: [^\\\\\\x80-\\xff\\n\\015\\[\\]] |  \\\\ [^\\x80-\\xff]  )*     #    stuff\n\
1012 | \\]                           #           ]\n\
1013 | )\n\
1014 | [\\040\\t]*                    # Nab whitespace.\n\
1015 | (?:\n\
1016 | \\(                              #  (\n\
1017 | [^\\\\\\x80-\\xff\\n\\015()] *                             #     normal*\n\
1018 | (?:                                 #       (\n\
1019 | (?:  \\\\ [^\\x80-\\xff]  |\n\
1020 | \\(                            #  (\n\
1021 | [^\\\\\\x80-\\xff\\n\\015()] *                            #     normal*\n\
1022 | (?:  \\\\ [^\\x80-\\xff]   [^\\\\\\x80-\\xff\\n\\015()] * )*        #     (special normal*)*\n\
1023 | \\)                           #                       )\n\
1024 | )    #         special\n\
1025 | [^\\\\\\x80-\\xff\\n\\015()] *                         #         normal*\n\
1026 | )*                                  #            )*\n\
1027 | \\)                             #                )\n\
1028 | [\\040\\t]* )*    # If comment found, allow more spaces.\n\
1029 | # optional trailing comments\n\
1030 | (?:\n\
1031 | \\.\n\
1032 | [\\040\\t]*                    # Nab whitespace.\n\
1033 | (?:\n\
1034 | \\(                              #  (\n\
1035 | [^\\\\\\x80-\\xff\\n\\015()] *                             #     normal*\n\
1036 | (?:                                 #       (\n\
1037 | (?:  \\\\ [^\\x80-\\xff]  |\n\
1038 | \\(                            #  (\n\
1039 | [^\\\\\\x80-\\xff\\n\\015()] *                            #     normal*\n\
1040 | (?:  \\\\ [^\\x80-\\xff]   [^\\\\\\x80-\\xff\\n\\015()] * )*        #     (special normal*)*\n\
1041 | \\)                           #                       )\n\
1042 | )    #         special\n\
1043 | [^\\\\\\x80-\\xff\\n\\015()] *                         #         normal*\n\
1044 | )*                                  #            )*\n\
1045 | \\)                             #                )\n\
1046 | [\\040\\t]* )*    # If comment found, allow more spaces.\n\
1047 | (?:\n\
1048 | [^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]+    # some number of atom characters...\n\
1049 | (?![^(\\040)<>@,;:".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n\
1050 | |\n\
1051 | \\[                            # [\n\
1052 | (?: [^\\\\\\x80-\\xff\\n\\015\\[\\]] |  \\\\ [^\\x80-\\xff]  )*     #    stuff\n\
1053 | \\]                           #           ]\n\
1054 | )\n\
1055 | [\\040\\t]*                    # Nab whitespace.\n\
1056 | (?:\n\
1057 | \\(                              #  (\n\
1058 | [^\\\\\\x80-\\xff\\n\\015()] *                             #     normal*\n\
1059 | (?:                                 #       (\n\
1060 | (?:  \\\\ [^\\x80-\\xff]  |\n\
1061 | \\(                            #  (\n\
1062 | [^\\\\\\x80-\\xff\\n\\015()] *                            #     normal*\n\
1063 | (?:  \\\\ [^\\x80-\\xff]   [^\\\\\\x80-\\xff\\n\\015()] * )*        #     (special normal*)*\n\
1064 | \\)                           #                       )\n\
1065 | )    #         special\n\
1066 | [^\\\\\\x80-\\xff\\n\\015()] *                         #         normal*\n\
1067 | )*                                  #            )*\n\
1068 | \\)                             #                )\n\
1069 | [\\040\\t]* )*    # If comment found, allow more spaces.\n\
1070 | # optional trailing comments\n\
1071 | )*\n\
1072 | #       address spec\n\
1073 | >                    #                 >\n\
1074 | # name and address\n\
1075 | )\n\
1076 | 	Alan Other <user@dom.ain>	(0,25)
1077 | E$ckv	SAME	<user@dom.ain>	(1,13)
1078 | E$ckv	SAME	user@dom.ain	(0,12)
1079 | E$ckv	SAME	"A. Other" <user.1234@dom.ain> (a comment)	(0,30)
1080 | E$ckv	SAME	A. Other <user.1234@dom.ain> (a comment)	(2,28)
1081 | E$ckv	SAME	"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/"@x400-re.lay	(0,61)
1082 | E$ckv	SAME	A missing angle <user@some.where	(17,32)
1083 | E$ckv	SAME	The quick brown fox	NOMATCH
1084 | E$hx	abc\\0def\\00pqr\\000xyz\\0000AB	abc\0def\00pqr\000xyz\0000AB	(0,19)
1085 | E$hx	SAME	abc456 abc\0def\00pqr\000xyz\0000ABCDE	(7,26)
1086 | E$	abc\\x{0d}ef\\x00pqr\\x{00}0xyz\\x{00}00AB	abc\x{0d}ef\x{00}pqr\x{00}0xyz\x{00}00AB	(0,20)
1087 | E$	abc\\x000d\\x00pqr\\x000xyz\\x0000AB	abc\x000d\x00pqr\x000xyz\x0000AB	(0,13)
1088 | Ex$	SAME	abc\x000d\x00pqr\x000xyz\x0000AB		(0,13)
1089 | #E$	abc\\x0def\\x00pqr\\x000xyz\\x0000AB	abc456 abc\x0def\x00pqr\x000xyz\x0000ABCDE	(7,27)
1090 | #Ex$	abc\\x0def\\x00pqr\\x000xyz\\x0000AB	abc456 abc\x0def\x00pqr\x000xyz\x0000ABCDE	(7,27)
1091 | E$v	^[\\000-\\037]	\0A	(0,1)
1092 | E$v	SAME	\01B	(0,1)
1093 | E$v	SAME	\037C	(0,1)
1094 | E$x	\\0*	\0\0\0\0	(0,4)
1095 | E$	A\\x0{2,3}Z	The A\x0\x0Z	(4,8)
1096 | E$	SAME	An A\0\x0\0Z	(3,8)
1097 | E$	SAME	A\0Z	NOMATCH
1098 | E$	SAME	A\0\x0\0\x0Z	NOMATCH
1099 | E$z	^(cow|)\\1(bell)	cowcowbell	(0,10)(0,3)(6,10)
1100 | E$z	SAME	bell		(0,4)(0,0)(0,4)
1101 | E$z	SAME	cowbell		NOMATCH
1102 | E$	^\\s	\040abc		(0,1)
1103 | E$	SAME	\x{0c}abc	(0,1)
1104 | E$	SAME	\x0cxyz		(0,1)
1105 | E$	SAME	\fabc	(0,1)
1106 | E$	SAME	\nabc	(0,1)
1107 | E$	SAME	\rabc	(0,1)
1108 | E$	SAME	\tabc	(0,1)
1109 | E$	SAME	abc	NOMATCH
1110 | E$c	^a\tb\n\
1111 |   
1112 |     c	abc	(0,3)
1113 | E$z	^(a|)\\1*b	ab	(0,2)(0,1)
1114 | E$z	SAME	aaaab	(0,5)(0,1)
1115 | E$z	SAME	b	(0,1)(0,0)
1116 | E$z	SAME	acb	NOMATCH
1117 | E$z	^(a|)\\1+b	aab	(0,3)(0,1)
1118 | E$z	SAME	aaaab	(0,5)(0,1)
1119 | E$z	SAME	b	(0,1)(0,0)
1120 | E$z	SAME	ab	NOMATCH
1121 | E$z	^(a|)\\1?b	ab	(0,2)(0,1)
1122 | E$z	SAME	aab	(0,3)(0,1)
1123 | E$z	SAME	b	(0,1)(0,0)
1124 | E$z	SAME	acb	NOMATCH
1125 | E$z	^(a|)\\1{2}b	aaab	(0,4)(0,1)
1126 | E$z	SAME	b	(0,1)(0,0)
1127 | E$z	SAME	ab	NOMATCH
1128 | E$z	SAME	aab	NOMATCH
1129 | E$z	SAME	aaaab	NOMATCH
1130 | E$z	^(a|)\\1{2,3}b	aaab	(0,4)(0,1)
1131 | E$z	SAME	aaaab	(0,5)(0,1)
1132 | E$z	SAME	b	(0,1)(0,0)
1133 | E$z	SAME	ab	NOMATCH
1134 | E$z	SAME	aab	NOMATCH
1135 | E$z	SAME	aaaaab	NOMATCH
1136 | E	ab{1,3}bc	abbbbc	(0,6)
1137 | E	SAME	abbbc	(0,5)
1138 | E	SAME	abbc	(0,4)
1139 | E	SAME	abc	NOMATCH
1140 | E	SAME	abbbbbc	NOMATCH
1141 | E$	([^.]*)\\.([^:]*):[T ]+(.*)	track1.title:TBlah blah blah	(0,28)(0,6)(7,12)(14,28)
1142 | E$i	([^.]*)\\.([^:]*):[T ]+(.*)	track1.title:TBlah blah blah	(0,28)(0,6)(7,12)(14,28)
1143 | E$i	([^.]*)\\.([^:]*):[t ]+(.*)	track1.title:TBlah blah blah	(0,28)(0,6)(7,12)(14,28)
1144 | E	^[W-c]+$	WXY_^abc	(0,8)
1145 | E	SAME	wxy	NOMATCH
1146 | Ei	^[W-c]+$	WXY_^abc	(0,8)
1147 | Ei	SAME	wxy_^ABC	(0,8)
1148 | E$iv	^[\\x3f-\\x5F]+$	WXY_^abc	(0,8)
1149 | E$iv	SAME	wxy_^ABC	(0,8)
1150 | En	^abc$	abc	(0,3)
1151 | En$	SAME	qqq\nabc	(4,7)
1152 | En$	SAME	abc\nzzz	(0,3)
1153 | En$	SAME	qqq\nabc\nzzz	(4,7)
1154 | E	^abc$	abc	(0,3)
1155 | E$	SAME	qqq\nabc	NOMATCH
1156 | E$	SAME	abc\nzzz	NOMATCH
1157 | E$	SAME	qqq\nabc\nzzz	NOMATCH
1158 | E$n	\\Aabc\\Z	abc	(0,3)
1159 | E$n	SAME	abc\n 	NOMATCH
1160 | E$n	SAME	qqq\nabc	NOMATCH
1161 | E$n	SAME	abc\nzzz	NOMATCH
1162 | E$n	SAME	qqq\nabc\nzzz	NOMATCH
1163 | E$j	\\A(.)*\\Z	abc\ndef	(0,7)(6,7)
1164 | E$n	\\A(.)*\\Z	abc\ndef	NOMATCH
1165 | E	(?:b)|(?::+)	b::c	(0,1)
1166 | E	SAME	c::b	(1,3)
1167 | E	[-az]+	az-	(0,3)
1168 | E	SAME	b	NOMATCH
1169 | E	[az-]+	za-	(0,3)
1170 | E	SAME	b	NOMATCH
1171 | E$kv	[a\\-z]+	a-z	(0,3)
1172 | E$kv	SAME	b	NOMATCH
1173 | E	[a-z]+	abcdxyz	(0,7)
1174 | E$v	[\\d-]+	12-34	(0,5)
1175 | E$v	SAME	aaa	NOMATCH
1176 | E$v	[\\d-z]+	12-34z	ERANGE
1177 | E$v	SAME	aaa	ERANGE
1178 | E$	\\x5c	\\	(0,1)
1179 | E$	\\x20Z	the Zoo	(3,5)
1180 | E$	SAME	Zulu	NOMATCH
1181 | E$i	(abc)\\1	abcabc	(0,6)(0,3)
1182 | E$i	SAME	ABCabc	(0,6)(0,3)
1183 | E$i	SAME	abcABC	(0,6)(0,3)
1184 | E	(main(O)?)+	mainmain	(0,8)(4,8)
1185 | E	SAME	mainOmain	(0,9)(5,9)
1186 | E	ab{3cd	ab{3cd	BADBR
1187 | E	ab{3,cd	ab{3,cd	BADBR
1188 | E	ab{3,4a}cd	ab{3,4a}cd	BADBR
1189 | E	{4,5a}bc	{4,5a}bc	BADBR
1190 | E$	^a.b	a\rb	(0,3)
1191 | E$	SAME	a\nb	(0,3)
1192 | E	abc$	abc	(0,3)
1193 | E$	SAME	abc\n	NOMATCH
1194 | E$n	SAME	abc\n	(0,3)
1195 | E$	SAME	abc\ndef	NOMATCH
1196 | E$n	SAME	abc\ndef	(0,3)
1197 | E$	(abc)\\123	abc\x53		(0,4)(0,3)
1198 | E$	(abc)\\223	abc\x93		(0,4)(0,3)
1199 | E$	(abc)\\323	abc\xd3		(0,4)(0,3)
1200 | E$h	(abc)\\500	abc\x40		ESUBREG
1201 | E$h	SAME		abc\100		ESUBREG
1202 | E$h	(abc)\\1000	abc\x{40}0	(0,5)(0,3)
1203 | E$h	SAME		abc\x40\x30	(0,5)(0,3)
1204 | E$h	SAME		abc\1000	(0,5)(0,3)
1205 | E$h	SAME		abc\100\x30	(0,5)(0,3)
1206 | E$h	SAME		abc\100\060	(0,5)(0,3)
1207 | E$h	SAME		abc\100\60	(0,5)(0,3)
1208 | E$h	(abc)\\100z	abc\x40z	(0,5)(0,3)
1209 | E$	abc\\71	abc\071	ESUBREG
1210 | E$h	abc\\71	abc\071	(0,4)
1211 | E$	abc\\81	abc\081	ESUBREG
1212 | E$h	abc\\81	abc\081	NOMATCH
1213 | E$h	SAME	abc\0\x38\x31	NOMATCH
1214 | E$	abc\\091	abc\091	BADESC
1215 | E$x	abc\\091	abc\091	(0,6)
1216 | E$x	SAME	abc\0\x39\x31	(0,6)
1217 | E$	(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\12\\123	abcdefghijkllS	NOMATCH
1218 | E$	(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\12\\123	abcdefghijk\12S	NOMATCH
1219 | E$h	(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\12\\123	abcdefghijkllS	(0,14)(0,1)(1,2)(2,3)(3,4)(4,5)(5,6)(6,7)(7,8)(8,9)(9,10)(10,11)(11,12)
1220 | E$h	(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\12\\123	abcdefghijk\12S	(0,13)(0,1)(1,2)(2,3)(3,4)(4,5)(5,6)(6,7)(7,8)(8,9)(9,10)(10,11)
1221 | E$	(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)(?12)\\123	abcdefghijkllS	(0,14)(0,1)(1,2)(2,3)(3,4)(4,5)(5,6)(6,7)(7,8)(8,9)(9,10)(10,11)(11,12)
1222 | E$	ab\\gdef	abgdef	BADESC
1223 | E$x	ab\\gdef	abgdef	(0,6)
1224 | E	a{0}bc	bc	(0,2)
1225 | E	(a|(bc)){0,0}?xyz	xyz	(0,3)
1226 | E$v	abc[\\10]de	abc\010de	(0,6)
1227 | E$v	abc[\\1]de	abc\1de	(0,6)
1228 | E$kv	abc[\\1]de	abc\1de	(0,6)
1229 | E$v	(abc)[\\1]de	abc\1de	(0,6)(0,3)
1230 | E$kv	(abc)[\\1]de	abc\1de	(0,6)(0,3)
1231 | E$	a.b(?s)	a\nb	(0,3)
1232 | E$v	^([^a])([^\\b])([^c]*)([^d]{3,4})	baNOTccccd	(0,9)(0,1)(1,2)(2,5)(5,9)
1233 | E$v	SAME	baNOTcccd	(0,8)(0,1)(1,2)(2,5)(5,8)
1234 | E$v	SAME	baNOTccd	(0,7)(0,1)(1,2)(2,4)(4,7)
1235 | E$v	SAME	bacccd	(0,5)(0,1)(1,2)(2,2)(2,5)
1236 | E$v	SAME	anything	NOMATCH
1237 | E$v	SAME	b\bc...	NOMATCH
1238 | E$v	SAME	baccd	NOMATCH
1239 | E$v	^([^a])([^\\f])([^c]*)([^d]{3,4})	b\fc...	NOMATCH
1240 | E	[^a]	Abc	(0,1)
1241 | Ei	[^a]	Abc 	(1,2)
1242 | E	[^a]+	AAAaAbc	(0,3)
1243 | Ei	[^a]+	AAAaAbc 	(5,8)
1244 | E$	[^a]+	bbb\nccc	(0,7)
1245 | E	[^k]$	abc	(2,3)
1246 | E	SAME	abk   	(5,6)
1247 | E	[^k]{2,3}$	abc	(0,3)
1248 | E	SAME	kbc	(1,3)
1249 | E	SAME	kabc 	(2,5)
1250 | E	SAME	abk	NOMATCH
1251 | E	SAME	akb	NOMATCH
1252 | E	SAME	akk 	NOMATCH
1253 | E$	^\\d{8,}\\@.+[^k]$	12345678@a.b.c.d	(0,16)
1254 | E$	SAME	123456789@x.y.z	(0,15)
1255 | E$	SAME	12345678@x.y.uk	NOMATCH
1256 | E$	SAME	1234567@a.b.c.d       	NOMATCH
1257 | E$	(a)\\1{8,}	aaaaaaaaa	(0,9)(0,1)
1258 | E$	SAME	aaaaaaaaaa	(0,10)(0,1)
1259 | E$	SAME	aaaaaaa   	NOMATCH
1260 | E	[^a]	aaaabcd	(4,5)
1261 | E	SAME	aaAabcd 	(2,3)
1262 | Ei	[^a]	aaaabcd	(4,5)
1263 | Ei	SAME	aaAabcd 	(4,5)
1264 | E	[^az]	aaaabcd	(4,5)
1265 | E	SAME	aaAabcd 	(2,3)
1266 | Ei	[^az]	aaaabcd	(4,5)
1267 | Ei	SAME	aaAabcd 	(4,5)
1268 | E$	\\000\\001\\002\\003\\004\\005\\006\\007\\010\\011\\012\\013\\014\\015\\016\\017\\020\\021\\022\\023\\024\\025\\026\\027\\030\\031\\032\\033\\034\\035\\036\\037\\040\\041\\042\\043\\044\\045\\046\\047\\050\\051\\052\\053\\054\\055\\056\\057\\060\\061\\062\\063\\064\\065\\066\\067\\070\\071\\072\\073\\074\\075\\076\\077\\100\\101\\102\\103\\104\\105\\106\\107\\110\\111\\112\\113\\114\\115\\116\\117\\120\\121\\122\\123\\124\\125\\126\\127\\130\\131\\132\\133\\134\\135\\136\\137\\140\\141\\142\\143\\144\\145\\146\\147\\150\\151\\152\\153\\154\\155\\156\\157\\160\\161\\162\\163\\164\\165\\166\\167\\170\\171\\172\\173\\174\\175\\176\\177\\200\\201\\202\\203\\204\\205\\206\\207\\210\\211\\212\\213\\214\\215\\216\\217\\220\\221\\222\\223\\224\\225\\226\\227\\230\\231\\232\\233\\234\\235\\236\\237\\240\\241\\242\\243\\244\\245\\246\\247\\250\\251\\252\\253\\254\\255\\256\\257\\260\\261\\262\\263\\264\\265\\266\\267\\270\\271\\272\\273\\274\\275\\276\\277\\300\\301\\302\\303\\304\\305\\306\\307\\310\\311\\312\\313\\314\\315\\316\\317\\320\\321\\322\\323\\324\\325\\326\\327\\330\\331\\332\\333\\334\\335\\336\\337\\340\\341\\342\\343\\344\\345\\346\\347\\350\\351\\352\\353\\354\\355\\356\\357\\360\\361\\362\\363\\364\\365\\366\\367\\370\\371\\372\\373\\374\\375\\376\\377	\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037\040\041\042\043\044\045\046\047\050\051\052\053\054\055\056\057\060\061\062\063\064\065\066\067\070\071\072\073\074\075\076\077\100\101\102\103\104\105\106\107\110\111\112\113\114\115\116\117\120\121\122\123\124\125\126\127\130\131\132\133\134\135\136\137\140\141\142\143\144\145\146\147\150\151\152\153\154\155\156\157\160\161\162\163\164\165\166\167\170\171\172\173\174\175\176\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377	(0,256)
1269 | E	P[^*]TAIRE[^*]{1,6}?LL	xxxxxxxxxxxPSTAIREISLLxxxxxxxxx	(11,22)
1270 | E	P[^*]TAIRE[^*]{1,}?LL	xxxxxxxxxxxPSTAIREISLLxxxxxxxxx	(11,22)
1271 | E$	(\\.\\d\\d[1-9]?)\\d+	1.230003938	(1,11)(1,4)
1272 | E$	SAME	1.875000282   	(1,11)(1,5)
1273 | E$	SAME	1.235  	(1,5)(1,4)
1274 | E$	(\\.\\d\\d((?=0)|[1-9](?=\\d)))	1.230003938      	(1,4)(1,4)(4,4)
1275 | E$	(\\.\\d\\d((?=0)|\\d(?=\\d)))	1.230003938      	(1,5)(1,5)(4,5)
1276 | E$	SAME	1.875000282	(1,5)(1,5)(4,5)
1277 | E$	SAME	1.235 	NOMATCH
1278 | E	a(?)b	ab 	(0,2)
1279 | E	a(?#)b	ab 	(0,2)
1280 | E$i	\\b(foo)\\s+(\\w+)	Food is on the foo table	(15,24)(15,18)(19,24)
1281 | E	foo(.*)bar	The food is under the bar in the barn.	(4,36)(7,33)
1282 | E	foo(.*?)bar	The food is under the bar in the barn.	(4,25)(7,22)
1283 | E$	(.*)(\\d*)	I have 2 numbers: 53147	(0,23)(0,23)(23,23)
1284 | E$	(.*)(\\d+)	I have 2 numbers: 53147	(0,23)(0,22)(22,23)
1285 | E$	(.*?)(\\d*)	I have 2 numbers: 53147	(0,0)(0,0)(0,0)
1286 | E$	(.*?)(\\d+)	I have 2 numbers: 53147	(0,8)(0,7)(7,8)
1287 | E$	(.*)(\\d+)$	I have 2 numbers: 53147	(0,23)(0,22)(22,23)
1288 | E$	(.*?)(\\d+)$	I have 2 numbers: 53147	(0,23)(0,18)(18,23)
1289 | E$	(.*)\\b(\\d+)$	I have 2 numbers: 53147	(0,23)(0,18)(18,23)
1290 | E$	(.*\\D)(\\d+)$	I have 2 numbers: 53147	(0,23)(0,18)(18,23)
1291 | E$	^\\D*(?!123)	ABC123	(0,2)
1292 | E$	^(\\D*)(?=\\d)(?!123)	ABC445	(0,3)(0,3)
1293 | E$	SAME	ABC123	NOMATCH
1294 | E	^[W-]46]	W46]789 	(0,4)
1295 | E	SAME	-46]789	(0,4)
1296 | E	SAME	Wall	NOMATCH
1297 | E	SAME	Zebra	NOMATCH
1298 | E	SAME	42	NOMATCH
1299 | E	SAME	[abcd] 	NOMATCH
1300 | E	SAME	]abcd[	NOMATCH
1301 | E$kv	^[W-\\]46]	W46]789 	(0,1)
1302 | E$kv	SAME	Wall	(0,1)
1303 | E$kv	SAME	Zebra	(0,1)
1304 | E$kv	SAME	Xylophone  	(0,1)
1305 | E$kv	SAME	42	(0,1)
1306 | E$kv	SAME	[abcd] 	(0,1)
1307 | E$kv	SAME	]abcd[	(0,1)
1308 | E$kv	SAME	\\backslash 	(0,1)
1309 | E$kv	SAME	-46]789	NOMATCH
1310 | E$kv	SAME	well	NOMATCH
1311 | E$	\\d\\d/\\d\\d/\\d\\d\\d\\d	01/01/2000	(0,10)
1312 | E	word (?:[a-zA-Z0-9]+ ){0,10}otherword	word cat dog elephant mussel cow horse canary baboon snake shark otherword	(0,74)
1313 | E	SAME	word cat dog elephant mussel cow horse canary baboon snake shark	NOMATCH
1314 | E	word (?:[a-zA-Z0-9]+ ){0,300}otherword	word cat dog elephant mussel cow horse canary baboon snake shark the quick brown fox and the lazy dog and several other words getting close to thirty by now I hope	NOMATCH
1315 | E	^(a){0,0}	bcd	(0,0)
1316 | E	SAME	abc	(0,0)
1317 | E	SAME	aab     	(0,0)
1318 | E	^(a){0,1}	bcd	(0,0)
1319 | E	SAME	abc	(0,1)(0,1)
1320 | E	SAME	aab  	(0,1)(0,1)
1321 | E	^(a){0,2}	bcd	(0,0)
1322 | E	SAME	abc	(0,1)(0,1)
1323 | E	SAME	aab  	(0,2)(1,2)
1324 | E	^(a){0,3}	bcd	(0,0)
1325 | E	SAME	abc	(0,1)(0,1)
1326 | E	SAME	aab	(0,2)(1,2)
1327 | E	SAME	aaa   	(0,3)(2,3)
1328 | E	^(a){0,}	bcd	(0,0)
1329 | E	SAME	abc	(0,1)(0,1)
1330 | E	SAME	aab	(0,2)(1,2)
1331 | E	SAME	aaa	(0,3)(2,3)
1332 | E	SAME	aaaaaaaa    	(0,8)(7,8)
1333 | E	^(a){1,1}	bcd	NOMATCH
1334 | E	SAME	abc	(0,1)(0,1)
1335 | E	SAME	aab  	(0,1)(0,1)
1336 | E	^(a){1,2}	bcd	NOMATCH
1337 | E	SAME	abc	(0,1)(0,1)
1338 | E	SAME	aab  	(0,2)(1,2)
1339 | E	^(a){1,3}	bcd	NOMATCH
1340 | E	SAME	abc	(0,1)(0,1)
1341 | E	SAME	aab	(0,2)(1,2)
1342 | E	SAME	aaa   	(0,3)(2,3)
1343 | E	^(a){1,}	bcd	NOMATCH
1344 | E	SAME	abc	(0,1)(0,1)
1345 | E	SAME	aab	(0,2)(1,2)
1346 | E	SAME	aaa	(0,3)(2,3)
1347 | E	SAME	aaaaaaaa    	(0,8)(7,8)
1348 | E$	.{0,}\\.gif	borfle\nbib.gif\nno	(0,14)
1349 | E$	.*\\.gif	borfle\nbib.gif\nno	(0,14)
1350 | E$n	SAME		borfle\nbib.gif\nno	(7,14)
1351 | E$j	SAME		borfle\nbib.gif\nno	(0,14)
1352 | E$nj	SAME		borfle\nbib.gif\nno	(0,14)
1353 | E$	.*$		borfle\nbib.gif\nno	(0,17)
1354 | En$	SAME		borfle\nbib.gif\nno	(0,6)
1355 | Ej$	SAME		borfle\nbib.gif\nno	(0,17)
1356 | Enj$	SAME		borfle\nbib.gif\nno	(0,17)
1357 | E$	SAME		borfle\nbib.gif\nno\n	(0,18)
1358 | En$	SAME		borfle\nbib.gif\nno\n	(0,6)
1359 | Ej$	SAME		borfle\nbib.gif\nno\n	(0,18)
1360 | Enj$	SAME		borfle\nbib.gif\nno\n	(0,18)
1361 | E$	(.*X|^B)	abcde\n1234Xyz	(0,11)(0,11)
1362 | E$	SAME	BarFoo 	(0,1)(0,1)
1363 | E$	SAME	abcde\nBar  	NOMATCH
1364 | En$	(.*X|^B)	abcde\n1234Xyz	(6,11)(6,11)
1365 | En$	SAME	BarFoo 	(0,1)(0,1)
1366 | En$	SAME	abcde\nBar  	(6,7)(6,7)
1367 | Ej$	(.*X|^B)	abcde\n1234Xyz	(0,11)(0,11)
1368 | Ej$	SAME	BarFoo 	(0,1)(0,1)
1369 | Ej$	SAME	abcde\nBar  	NOMATCH
1370 | Enj$	(.*X|^B)	abcde\n1234Xyz	(0,11)(0,11)
1371 | Enj$	SAME	BarFoo 	(0,1)(0,1)
1372 | Enj$	SAME	abcde\nBar  	(6,7)(6,7)
1373 | E$	(?s)(.*X|^B)	abcde\n1234Xyz	(0,11)(0,11)
1374 | E$	SAME	BarFoo 	(0,1)(0,1)
1375 | E$	SAME	abcde\nBar  	NOMATCH
1376 | E$	(?s:.*X|^B)	abcde\n1234Xyz	(0,11)
1377 | E$	SAME	BarFoo 	(0,1)
1378 | E$	SAME	abcde\nBar  	NOMATCH
1379 | E$	^.*B	abc\nB	(0,5)
1380 | E$n	^.*B	abc\nB	(4,5)
1381 | E$	(?s)^.*B	abc\nB	(0,5)
1382 | E$	(?m)^.*B	abc\nB	(4,5)
1383 | E$	(?ms)^.*B	abc\nB	(0,5)
1384 | E$	(?ms)^B	abc\nB	(4,5)
1385 | E$	(?m)B$	B\n	(0,1)
1386 | E$	(?s)B$	B\n	NOMATCH
1387 | E	^[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]	123456654321	(0,12)
1388 | E$	^\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d	123456654321 	(0,12)
1389 | E$v	^[\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d]	123456654321	(0,12)
1390 | E	^[abc]{12}	abcabcabcabc	(0,12)
1391 | E	^[a-c]{12}	abcabcabcabc	(0,12)
1392 | E	^(a|b|c){12}	abcabcabcabc 	(0,12)(11,12)
1393 | E	^[abcdefghijklmnopqrstuvwxy0123456789]	n	(0,1)
1394 | E	SAME	z 	NOMATCH
1395 | E	abcde{0,0}	abcd	(0,4)
1396 | E	SAME	abce  	NOMATCH
1397 | E	ab[cd]{0,0}e	abe	(0,3)
1398 | E	SAME	abcde 	NOMATCH
1399 | E	ab(c){0,0}d	abd	(0,3)
1400 | E	SAME	abcd   	NOMATCH
1401 | E	a(b*)	a	(0,1)(1,1)
1402 | E	SAME	ab	(0,2)(1,2)
1403 | E	SAME	abbbb	(0,5)(1,5)
1404 | E	SAME	bbbbb    	NOMATCH
1405 | E$	ab\\d{0}e	abe	(0,3)
1406 | E$	SAME	ab1e   	NOMATCH
1407 | E$v	"([^\\\\"]+|\\\\.)*"	the "quick" brown fox	(4,11)(5,10)
1408 | #E$v	SAME	"the \\"quick\\" brown fox" 	(0,25)(14,24)
1409 | E	.*?	abc	(0,0)
1410 | E$	\\b	abc 	(0,0)
1411 | E$	\\b	abc 	(0,0)
1412 | Ez	NULL	abc	(0,0)
1413 | E$ijv	<tr([\\w\\W\\s\\d][^<>]{0,})><TD([\\w\\W\\s\\d][^<>]{0,})>([\\d]{0,}\\.)(.*)((<BR>([\\w\\W\\s\\d][^<>]{0,})|[\\s]{0,}))</a></TD><TD([\\w\\W\\s\\d][^<>]{0,})>([\\w\\W\\s\\d][^<>]{0,})</TD><TD([\\w\\W\\s\\d][^<>]{0,})>([\\w\\W\\s\\d][^<>]{0,})</TD></TR>	<TR BGCOLOR='#DBE9E9'><TD align=left valign=top>43.<a href='joblist.cfm?JobID=94 6735&Keyword='>Word Processor<BR>(N-1286)</a></TD><TD align=left valign=top>Lega lstaff.com</TD><TD align=left valign=top>CA - Statewide</TD></TR>	(0,227)(3,21)(25,47)(48,51)(51,122)(122,122)(122,122)(?,?)(134,156)(157,172)(180,202)(203,217)
1414 | E	a[^a]b	acb	(0,3)
1415 | E$	SAME	a\nb	(0,3)
1416 | E	a.b	acb	(0,3)
1417 | E$	SAME	a\nb   	(0,3)
1418 | Ej	a[^a]b	acb	(0,3)
1419 | Ej$	SAME	a\nb  	(0,3)
1420 | Ej	a.b	acb	(0,3)
1421 | Ej$	SAME	a\nb  	(0,3)


--------------------------------------------------------------------------------
/spec/test-generator.js:
--------------------------------------------------------------------------------
  1 | const fs = require("fs");
  2 | const data = fs.readFileSync("./spec/pcre-1.dat", "utf8");
  3 | const lines = data.split("\n");
  4 | const prettier = require("prettier");
  5 | 
  6 | const escapeQuote = (str) => str.replaceAll('"', '\\"');
  7 | 
  8 | const range = (from, to) =>
  9 |   Array.from({ length: to - from + 1 }, (_, i) => i + from);
 10 | 
 11 | const knownIssues = {
 12 |   /* ------- features  not yet implemented ------- */
 13 |   "does not support start of string quantified within an alternation": [
 14 |     1363,
 15 |     1369,
 16 |   ],
 17 |   "does not support hex notification in character sets": [...range(1147, 1149)],
 18 |   "does nto support escaped characters in character ranges": [
 19 |     ...range(1301, 1308),
 20 |   ],
 21 |   "lazy quantifiers should still yield the longest overall regex match": [
 22 |     ...range(141, 143),
 23 |     1288,
 24 |   ],
 25 |   "peformance issue": [1313, 1314],
 26 | 
 27 |   /* -------- issues with the tests ------------  */
 28 |   "test appears to be incorrect?": [203, 204],
 29 |   "issue with parsing the test itself": [
 30 |     1103,
 31 |     ...range(1095, 1098),
 32 |     ...range(487, 494),
 33 |     ...range(1077, 1082),
 34 |   ],
 35 |   "test contains an octal escape sequence": [1102],
 36 |   // the test results measure captured groups using character length / locations
 37 |   // see: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/length
 38 |   // this is tricky to reproduce
 39 |   "test requires a substring function": [1087, 1088],
 40 | 
 41 |   /* -------- differences between PCRE and JS regex ------------  */
 42 |   "test indicates a malformed regex, whereas it appears OK in JS": [
 43 |     1189,
 44 |     ...range(1186, 1188),
 45 |   ],
 46 |   "JS does not support the \\A \\Z syntax for start and end of string": [
 47 |     1163,
 48 |     1164,
 49 |   ],
 50 |   "test regex contains syntax not supported in JS": [82, 1158, 281],
 51 |   "the test behaviour differs between PCRE and JS": [290, 1278],
 52 | };
 53 | 
 54 | const hasKnownIssue = (index) => {
 55 |   for (const issue in knownIssues) {
 56 |     if (knownIssues[issue].includes(index)) {
 57 |       return issue;
 58 |     }
 59 |   }
 60 |   return null;
 61 | };
 62 | 
 63 | let testCase = `
 64 | /* eslint-disable no-useless-escape */
 65 | /* eslint-disable @typescript-eslint/no-empty-function */
 66 | /* eslint-disable @typescript-eslint/no-unused-vars */
 67 | import { RegExp, Match } from "..";
 68 | import { expectMatch, expectNotMatch, exec} from "../__tests__/utils";
 69 | 
 70 | `;
 71 | 
 72 | let regex = "";
 73 | lines.forEach((line, index) => {
 74 |   index += 1;
 75 | 
 76 |   let nextCase = "";
 77 | 
 78 |   const knownIssue = hasKnownIssue(index);
 79 |   if (knownIssue == "issue with parsing the test itself") {
 80 |     testCase += `xit("line ${index} - issue with parsing the test itself", () => {});`;
 81 |     return;
 82 |   }
 83 | 
 84 |   try {
 85 |     const parts = line.split("\t").filter((f) => f !== "");
 86 |     if (parts.length < 4) {
 87 |       // TODO - these should probably be listed as known issues
 88 |       return;
 89 |     }
 90 | 
 91 |     regex =
 92 |       parts[1] == "SAME"
 93 |         ? regex
 94 |         : escapeQuote(parts[1] == "NULL" ? "" : parts[1]).replaceAll(
 95 |             "/",
 96 |             "\\\\/"
 97 |           );
 98 |     let str = parts[2] !== "NULL" ? escapeQuote(parts[2]) : "";
 99 |     let flags = "m" + (parts[0].includes("i") ? "i" : "");
100 |     flags += parts[0] !== "En$" && parts[0] !== "E$n" ? "s" : "";
101 | 
102 |     if (regex.includes("\\b")) {
103 |       testCase += `xit("line: ${index} - word boundary class not supported yet!", () => { });`;
104 |       return;
105 |     }
106 | 
107 |     if (str.includes("\\x{")) {
108 |       testCase += `xit("line: ${index} - test encoding issue", () => { });`;
109 |       return;
110 |     }
111 | 
112 |     if (["(?!", "(?="].some((f) => regex.includes(f))) {
113 |       testCase += `xit("line: ${index} - lookaheads not supported", () => {});`;
114 |       return;
115 |     }
116 | 
117 |     if (["(?m", "(?s", "(?ms"].some((f) => regex.includes(f))) {
118 |       testCase += `xit("line: ${index} - JS regex does not support mode modifiers", () => {});`;
119 |       return;
120 |     }
121 | 
122 |     if (["(?#"].some((f) => regex.includes(f))) {
123 |       testCase += `xit("line: ${index} - JS regex does not support comments", () => {});`;
124 |       return;
125 |     }
126 | 
127 |     if (regex.match(/\\\\\d{1}/)) {
128 |       testCase += `xit("line: ${index} - back references are not supported", () => {});`;
129 |       return;
130 |     }
131 | 
132 |     if (knownIssue) {
133 |       testCase += `xit("line: ${index} - ${knownIssue}", () => {});`;
134 |       return;
135 |     }
136 | 
137 |     nextCase += `it("line: ${index} - matches ${regex} against '${str}'", () => {
138 |       `;
139 |     if (parts[3] == "BADBR") {
140 |       nextCase += ` expect(() => { let foo = new RegExp("${regex}") }).toThrow();`;
141 |     } else if (parts[3] == "NOMATCH") {
142 |       nextCase += ` expectNotMatch("${regex}", ["${str}"]);`;
143 |     } else {
144 |       nextCase += ` const match = exec("${regex}", "${str}", "${flags}");`;
145 | 
146 |       // create an expect for each capture group
147 |       const captures = parts[3].match(/\((\d{1,3}|\?),(\d{1,3}|\?)\)+/g);
148 |       captures.forEach((capture, index) => {
149 |         const digits = capture.match(/\((\d{1,3}|\?),(\d{1,3}|\?)\)/);
150 |         if (digits[1] !== "?") {
151 |           nextCase += `expect(match.matches[${index}]).toBe("${str}".substring(${digits[1]}, ${digits[2]}));`;
152 |         }
153 |       });
154 |     }
155 | 
156 |     nextCase += `});
157 |     `;
158 | 
159 |     testCase += nextCase;
160 |   } catch {
161 |     console.error("could not parse test case", index);
162 |   }
163 | });
164 | 
165 | fs.writeFileSync(
166 |   "./assembly/__spec_tests__/generated.spec.ts",
167 |   // testCase
168 |   prettier.format(testCase, { parser: "babel" })
169 | );
170 | 


--------------------------------------------------------------------------------
/spec/test.dat:
--------------------------------------------------------------------------------
  1 | BE	abracadabra$	abracadabracadabra	(7,18)
  2 | BE	a...b		abababbb		(2,7)
  3 | BE	XXXXXX		..XXXXXX		(2,8)
  4 | E	\)		()	(1,2)
  5 | BE	a]		a]a	(0,2)
  6 | B	}		}	(0,1)
  7 | E	\}		}	(0,1)
  8 | BE	\]		]	(0,1)
  9 | B	]		]	(0,1)
 10 | E	]		]	(0,1)
 11 | B	{		{	(0,1)
 12 | B	}		}	(0,1)
 13 | BE	^a		ax	(0,1)
 14 | BE	\^a		a^a	(1,3)
 15 | BE	a\^		a^	(0,2)
 16 | BE	a$		aa	(1,2)
 17 | BE	a\$		a$	(0,2)
 18 | BE	^$		NULL	(0,0)
 19 | E	$^		NULL	(0,0)
 20 | E	a($)		aa	(1,2)(2,2)
 21 | E	a*(^a)		aa	(0,1)(0,1)
 22 | E	(..)*(...)*		a	(0,0)
 23 | E	(..)*(...)*		abcd	(0,4)(2,4)
 24 | E	(ab|a)(bc|c)		abc	(0,3)(0,2)(2,3)
 25 | E	(ab)c|abc		abc	(0,3)(0,2)
 26 | E	a{0}b		ab			(1,2)
 27 | E	(a*)(b?)(b+)b{3}	aaabbbbbbb	(0,10)(0,3)(3,4)(4,7)
 28 | E	(a*)(b{0,1})(b{1,})b{3}	aaabbbbbbb	(0,10)(0,3)(3,4)(4,7)
 29 | E	a{9876543210}	NULL	BADBR
 30 | E	((a|a)|a)			a	(0,1)(0,1)(0,1)
 31 | E	(a*)(a|aa)			aaaa	(0,4)(0,3)(3,4)
 32 | E	a*(a.|aa)			aaaa	(0,4)(2,4)
 33 | E	a(b)|c(d)|a(e)f			aef	(0,3)(?,?)(?,?)(1,2)
 34 | E	(a|b)?.*			b	(0,1)(0,1)
 35 | E	(a|b)c|a(b|c)			ac	(0,2)(0,1)
 36 | E	(a|b)c|a(b|c)			ab	(0,2)(?,?)(1,2)
 37 | E	(a|b)*c|(a|ab)*c		abc	(0,3)(1,2)
 38 | E	(a|b)*c|(a|ab)*c		xc	(1,2)
 39 | E	(.a|.b).*|.*(.a|.b)		xa	(0,2)(0,2)
 40 | E	a?(ab|ba)ab			abab	(0,4)(0,2)
 41 | E	a?(ac{0}b|ba)ab			abab	(0,4)(0,2)
 42 | E	ab|abab				abbabab	(0,2)
 43 | E	aba|bab|bba			baaabbbaba	(5,8)
 44 | E	aba|bab				baaabbbaba	(6,9)
 45 | E	(aa|aaa)*|(a|aaaaa)		aa	(0,2)(0,2)
 46 | E	(a.|.a.)*|(a|.a...)		aa	(0,2)(0,2)
 47 | E	ab|a				xabc	(1,3)
 48 | E	ab|a				xxabc	(2,4)
 49 | Ei	(Ab|cD)*			aBcD	(0,4)(2,4)
 50 | BE	[^-]			--a		(2,3)
 51 | BE	[a-]*			--a		(0,3)
 52 | BE	[a-m-]*			--amoma--	(0,4)
 53 | E	:::1:::0:|:::1:1:0:	:::0:::1:::1:::0:	(8,17)
 54 | E	:::1:::0:|:::1:1:1:	:::0:::1:::1:::0:	(8,17)
 55 | {E	[[:upper:]]		A		(0,1)	[[<element>]] not supported
 56 | E	[[:lower:]]+		`az{		(1,3)
 57 | E	[[:upper:]]+		@AZ[		(1,3)
 58 | BE	[[-]]			[[-]]		(2,4)
 59 | BE	[[.NIL.]]	NULL	ECOLLATE
 60 | BE	[[=aleph=]]	NULL	ECOLLATE
 61 | }
 62 | BE$	\n		\n	(0,1)
 63 | BEn$	\n		\n	(0,1)
 64 | BE$	[^a]		\n	(0,1)
 65 | BE$	\na		\na	(0,2)
 66 | E	(a)(b)(c)	abc	(0,3)(0,1)(1,2)(2,3)
 67 | BE	xxx		xxx	(0,3)
 68 | E1	(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)	feb 6,	(0,6)
 69 | E1	(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)	2/7	(0,3)
 70 | E1	(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)	feb 1,Feb 6	(5,11)
 71 | E3	((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))	x	(0,1)(0,1)(0,1)
 72 | E3	((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))*	xx	(0,2)(1,2)(1,2)
 73 | E	a?(ab|ba)*	ababababababababababababababababababababababababababababababababababababababababa	(0,81)(79,81)
 74 | E	abaa|abbaa|abbbaa|abbbbaa	ababbabbbabbbabbbbabbbbaa	(18,25)
 75 | E	abaa|abbaa|abbbaa|abbbbaa	ababbabbbabbbabbbbabaa	(18,22)
 76 | E	aaac|aabc|abac|abbc|baac|babc|bbac|bbbc	baaabbbabac	(7,11)
 77 | BE$	.*			\x01\xff	(0,2)
 78 | E	aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll		XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa	(53,57)
 79 | L	aaaa\nbbbb\ncccc\nddddd\neeeeee\nfffffff\ngggg\nhhhh\niiiii\njjjjj\nkkkkk\nllll		XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa	NOMATCH
 80 | E	a*a*a*a*a*b		aaaaaaaaab	(0,10)
 81 | BE	^			NULL		(0,0)
 82 | BE	$			NULL		(0,0)
 83 | BE	^$			NULL		(0,0)
 84 | BE	^a$			a		(0,1)
 85 | BE	abc			abc		(0,3)
 86 | BE	abc			xabcy		(1,4)
 87 | BE	abc			ababc		(2,5)
 88 | BE	ab*c			abc		(0,3)
 89 | BE	ab*bc			abc		(0,3)
 90 | BE	ab*bc			abbc		(0,4)
 91 | BE	ab*bc			abbbbc		(0,6)
 92 | E	ab+bc			abbc		(0,4)
 93 | E	ab+bc			abbbbc		(0,6)
 94 | E	ab?bc			abbc		(0,4)
 95 | E	ab?bc			abc		(0,3)
 96 | E	ab?c			abc		(0,3)
 97 | BE	^abc$			abc		(0,3)
 98 | BE	^abc			abcc		(0,3)
 99 | BE	abc$			aabc		(1,4)
100 | BE	^			abc		(0,0)
101 | BE	$			abc		(3,3)
102 | BE	a.c			abc		(0,3)
103 | BE	a.c			axc		(0,3)
104 | BE	a.*c			axyzc		(0,5)
105 | BE	a[bc]d			abd		(0,3)
106 | BE	a[b-d]e			ace		(0,3)
107 | BE	a[b-d]			aac		(1,3)
108 | BE	a[-b]			a-		(0,2)
109 | BE	a[b-]			a-		(0,2)
110 | BE	a]			a]		(0,2)
111 | BE	a[]]b			a]b		(0,3)
112 | BE	a[^bc]d			aed		(0,3)
113 | BE	a[^-b]c			adc		(0,3)
114 | BE	a[^]b]c			adc		(0,3)
115 | E	ab|cd			abc		(0,2)
116 | E	ab|cd			abcd		(0,2)
117 | E	a\(b			a(b		(0,3)
118 | E	a\(*b			ab		(0,2)
119 | E	a\(*b			a((b		(0,4)
120 | E	((a))			abc		(0,1)(0,1)(0,1)
121 | E	(a)b(c)			abc		(0,3)(0,1)(2,3)
122 | E	a+b+c			aabbabc		(4,7)
123 | E	a*			aaa		(0,3)
124 | E	(a*)*			-		(0,0)(0,0)
125 | E	(a*)+			-		(0,0)(0,0)
126 | E	(a*|b)*			-		(0,0)(0,0)
127 | E	(a+|b)*			ab		(0,2)(1,2)
128 | E	(a+|b)+			ab		(0,2)(1,2)
129 | E	(a+|b)?			ab		(0,1)(0,1)
130 | BE	[^ab]*			cde		(0,3)
131 | E	(^)*			-		(0,0)(0,0)
132 | BE	a*			NULL		(0,0)
133 | E	([abc])*d		abbbcd		(0,6)(4,5)
134 | E	([abc])*bcd		abcd		(0,4)(0,1)
135 | E	a|b|c|d|e		e		(0,1)
136 | E	(a|b|c|d|e)f		ef		(0,2)(0,1)
137 | E	((a*|b))*		-		(0,0)(0,0)(0,0)
138 | BE	abcd*efg		abcdefg		(0,7)
139 | BE	ab*			xabyabbbz	(1,3)
140 | BE	ab*			xayabbbz	(1,2)
141 | E	(ab|cd)e		abcde		(2,5)(2,4)
142 | BE	[abhgefdc]ij		hij		(0,3)
143 | E	(a|b)c*d		abcd		(1,4)(1,2)
144 | E	(ab|ab*)bc		abc		(0,3)(0,1)
145 | E	a([bc]*)c*		abc		(0,3)(1,3)
146 | E	a([bc]*)(c*d)		abcd		(0,4)(1,3)(3,4)
147 | E	a([bc]+)(c*d)		abcd		(0,4)(1,3)(3,4)
148 | E	a([bc]*)(c+d)		abcd		(0,4)(1,2)(2,4)
149 | E	a[bcd]*dcdcde		adcdcde		(0,7)
150 | E	(ab|a)b*c		abc		(0,3)(0,2)
151 | E	((a)(b)c)(d)		abcd		(0,4)(0,3)(0,1)(1,2)(3,4)
152 | BE	[A-Za-z_][A-Za-z0-9_]*	alpha		(0,5)
153 | E	^a(bc+|b[eh])g|.h$	abh		(1,3)
154 | E	(bc+d$|ef*g.|h?i(j|k))	effgz		(0,5)(0,5)
155 | E	(bc+d$|ef*g.|h?i(j|k))	ij		(0,2)(0,2)(1,2)
156 | E	(bc+d$|ef*g.|h?i(j|k))	reffgz		(1,6)(1,6)
157 | E	(((((((((a)))))))))	a		(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)
158 | BE	multiple words		multiple words yeah	(0,14)
159 | E	(.*)c(.*)		abcde		(0,5)(0,2)(3,5)
160 | BE	abcd			abcd		(0,4)
161 | E	a(bc)d			abcd		(0,4)(1,3)
162 | E	a[-]?c		ac		(0,3)
163 | E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Qaddafi	(0,15)(?,?)(10,12)
164 | E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Mo'ammar Gadhafi	(0,16)(?,?)(11,13)
165 | E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Kaddafi	(0,15)(?,?)(10,12)
166 | E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Qadhafi	(0,15)(?,?)(10,12)
167 | E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Gadafi	(0,14)(?,?)(10,11)
168 | E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Mu'ammar Qadafi	(0,15)(?,?)(11,12)
169 | E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Moamar Gaddafi	(0,14)(?,?)(9,11)
170 | E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Mu'ammar Qadhdhafi	(0,18)(?,?)(13,15)
171 | E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Khaddafi	(0,16)(?,?)(11,13)
172 | E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Ghaddafy	(0,16)(?,?)(11,13)
173 | E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Ghadafi	(0,15)(?,?)(11,12)
174 | E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Ghaddafi	(0,16)(?,?)(11,13)
175 | E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muamar Kaddafi	(0,14)(?,?)(9,11)
176 | E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Quathafi	(0,16)(?,?)(11,13)
177 | E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Gheddafi	(0,16)(?,?)(11,13)
178 | E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Moammar Khadafy	(0,15)(?,?)(11,12)
179 | E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Moammar Qudhafi	(0,15)(?,?)(10,12)
180 | E	a+(b|c)*d+		aabcdd			(0,6)(3,4)
181 | E	^.+$			vivi			(0,4)
182 | E	^(.+)$			vivi			(0,4)(0,4)
183 | E	^([^!.]+).att.com!(.+)$	gryphon.att.com!eby	(0,19)(0,7)(16,19)
184 | E	^([^!]+!)?([^!]+)$	bas			(0,3)(?,?)(0,3)
185 | E	^([^!]+!)?([^!]+)$	bar!bas			(0,7)(0,4)(4,7)
186 | E	^([^!]+!)?([^!]+)$	foo!bas			(0,7)(0,4)(4,7)
187 | E	^.+!([^!]+!)([^!]+)$	foo!bar!bas		(0,11)(4,8)(8,11)
188 | E	((foo)|(bar))!bas	bar!bas			(0,7)(0,3)(?,?)(0,3)
189 | E	((foo)|(bar))!bas	foo!bar!bas		(4,11)(4,7)(?,?)(4,7)
190 | E	((foo)|(bar))!bas	foo!bas			(0,7)(0,3)(0,3)
191 | E	((foo)|bar)!bas		bar!bas			(0,7)(0,3)
192 | E	((foo)|bar)!bas		foo!bar!bas		(4,11)(4,7)
193 | E	((foo)|bar)!bas		foo!bas			(0,7)(0,3)(0,3)
194 | E	(foo|(bar))!bas		bar!bas			(0,7)(0,3)(0,3)
195 | E	(foo|(bar))!bas		foo!bar!bas		(4,11)(4,7)(4,7)
196 | E	(foo|(bar))!bas		foo!bas			(0,7)(0,3)
197 | E	(foo|bar)!bas		bar!bas			(0,7)(0,3)
198 | E	(foo|bar)!bas		foo!bar!bas		(4,11)(4,7)
199 | E	(foo|bar)!bas		foo!bas			(0,7)(0,3)
200 | E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	foo!bar!bas	(0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
201 | E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	bas		(0,3)(?,?)(0,3)
202 | E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	bar!bas		(0,7)(0,4)(4,7)
203 | E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	foo!bar!bas	(0,11)(?,?)(?,?)(4,8)(8,11)
204 | E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	foo!bas		(0,7)(0,4)(4,7)
205 | E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	bas		(0,3)(0,3)(?,?)(0,3)
206 | E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	bar!bas		(0,7)(0,7)(0,4)(4,7)
207 | E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	foo!bar!bas	(0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
208 | E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	foo!bas		(0,7)(0,7)(0,4)(4,7)
209 | E	.*(/XXX).*			/XXX			(0,4)(0,4)
210 | E	.*(\\XXX).*			\XXX			(0,4)(0,4)
211 | E	\\XXX				\XXX			(0,4)
212 | E	.*(/000).*			/000			(0,4)(0,4)
213 | E	.*(\\000).*			\000			(0,4)(0,4)
214 | E	\\000				\000			(0,4)
215 | 


--------------------------------------------------------------------------------
/ts/index.ts:
--------------------------------------------------------------------------------
 1 | import "assemblyscript/std/portable/index";
 2 | 
 3 | const globalAny: any = global;
 4 | globalAny.log = console.log;
 5 | 
 6 | import { RegExp } from "../assembly/regexp";
 7 | 
 8 | const regexObj = new RegExp("word (?:[a-zA-Z0-9]+ ){0,300}otherword", "");
 9 | let match = regexObj.exec(
10 |   "word cat dog elephant mussel cow horse canary baboon snake shark the quick brown fox and the lazy dog and several other words getting close to thirty by now I hope"
11 | );
12 | console.log(JSON.stringify(match, null, 2));
13 | 


--------------------------------------------------------------------------------
/ts/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 |   "extends": "assemblyscript/std/portable.json",
3 |   "include": ["./**/*.ts"],
4 |   "compilerOptions": {
5 |     "types": ["node"],
6 |     "strictNullChecks": false
7 |   }
8 | }
9 | 


--------------------------------------------------------------------------------